Bonjour à tous !
Je débute en OpenCL et j'ai besoin de conseils.
J'ai créé un kernel pour filtrer une image (un filtre moyen) et je souhaite appliquer ce kernel à plusieurs reprises.
Mon kernel ("filter_kernel.cl") :
Ma fonction (qui fait appel à ce kernel) :
Code : Sélectionner tout - Visualiser dans une fenêtre à part
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29 __kernel void average ( __global const unsigned char *src_image, __global unsigned char *dest_image, const int width, const int height, const int neighborhoodz ) { const int idx = get_global_id( 0 ); const int idy = get_global_id( 1 ); const int idz = get_global_id( 2 ); const int index = ( idy * width + idx ) * 3 + idz ; int temp = 0; int withStep = width * 3; for(int y = idy - neighborhood; y <= idy + neighborhood; y++) { for(int x = idx - neighborhood; x <= idx + neighborhood; x++) { temp += src_image[ y * withStep + x * 3 + idz ]; } } int coeff = ( 2 * neighborhood + 1 ) * ( 2 * neighborhood + 1 ); dest_image[ index ] = temp / coeff; }
Tout ce passe bien, mais les étapes d'initialisation du contexte OpenCL et de chargement du kernel prennent trop de temps.
Code : Sélectionner tout - Visualiser dans une fenêtre à part
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151 #define MAX_SOURCE_SIZE (0x100000) cl_uint ret_num_platforms; cl_platform_id platform_id; cl_uint ret_num_devices; cl_device_id device_id; cl_context context; cl_command_queue command_queue; cl_program program; cl_kernel kernel; void gpu_compute_average ( const unsigned char *src_image, unsigned char *dst_image, const unsigned int width, const unsigned int height, const unsigned int neighboorhood ) { cl_int err; // -------------------------------------------------- // Create the basic OpenCL run-time environment // -------------------------------------------------- // Get platform information err = clGetPlatformIDs( 1, &platform_id, &ret_num_platforms ); assert( err, "clGetPlatformIDs failure!\n" ); // Get device information err = clGetDeviceIDs( platform_id, CL_DEVICE_TYPE_GPU, 1, &device_id, &ret_num_devices ); assert( err, "clGetDeviceIDs failure!\n" ); // Create an OpenCL context context = clCreateContext( NULL, 1, &device_id, NULL, NULL, &err ); assert( err, "clCreateContext failure!\n" ); // Create a command queue command_queue = clCreateCommandQueue( context, device_id, 0, &err ); assert( err, "clCreateCommandQueue failure!\n" ); // -------------------------------------------------- // The Program and the Kernels // -------------------------------------------------- // Load the kernel source code FILE *file = fopen( "filter_kernel.cl", "r" ); if( !file ) { fprintf( stderr, "Failed to load kernel.\n" ); exit( 1 ); } char *source_str = (char*) malloc( MAX_SOURCE_SIZE ); size_t source_size = fread( source_str, 1, MAX_SOURCE_SIZE, file ); fclose( file ); // Create a program from the kernel source program = clCreateProgramWithSource( context, 1, (const char **) &source_str, (const size_t *) &source_size, &err ); assert( err, "clCreateProgramWithSource failure!\n" ); // Build the program err = clBuildProgram( program, 1, &device_id, NULL, NULL, NULL ); assert( err, "clBuildProgram failure!\n" ); // Create the OpenCL kernel kernel = clCreateKernel( program, "average", &err ); assert( err, "clCreateKernel failure!\n" ); // -------------------------------------------------- // Allocating Memory // -------------------------------------------------- // Create memory buffers on the device // CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR cl_mem src_mem_obj = clCreateBuffer( context, CL_MEM_READ_ONLY, width * height * 3 * sizeof(unsigned char), NULL, &err ); assert( err, "clCreateBuffer failure!\n" ); cl_mem dst_mem_obj = clCreateBuffer( context, CL_MEM_WRITE_ONLY, width * height * 3 * sizeof(unsigned char), NULL, &err ); assert( err, "clCreateBuffer failure!\n" ); // Copy to the memory buffers err = clEnqueueWriteBuffer( command_queue, src_mem_obj, CL_TRUE, 0, width * height * 3 * sizeof(unsigned char), src_image, 0, NULL, NULL ); assert( err, "clEnqueueWriteBuffer failure!\n" ); // -------------------------------------------------- // Launching the Kernel // -------------------------------------------------- // Set the arguments of the kernel err |= clSetKernelArg( kernel, 0, sizeof(cl_mem), (void *) &src_mem_obj ); err |= clSetKernelArg( kernel, 1, sizeof(cl_mem), (void *) &dst_mem_obj ); err |= clSetKernelArg( kernel, 2, sizeof(size_t), (void *) &width ); err |= clSetKernelArg( kernel, 3, sizeof(size_t), (void *) &height); err |= clSetKernelArg( kernel, 4, sizeof(size_t), (void *) &neighboorhood); assert( err, "clSetKernelArg failure!\n" ); // Execute the OpenCL kernel on the list cl_uint work_dim = 3; size_t global_work_size[3]; // clGetDeviceInfo( device_id, CL_DEVICE_ADDRESS_BITS, ... global_work_size[0] = width; global_work_size[1] = height; global_work_size[2] = 3; err = clEnqueueNDRangeKernel ( command_queue, kernel, work_dim, NULL, global_work_size, NULL, 0, NULL, NULL ); assert( err, "clEnqueueNDRangeKernel failure!\n" ); // -------------------------------------------------- // Reading back // -------------------------------------------------- // Read the memory buffer on the device to the local variable err = clEnqueueReadBuffer ( command_queue, dst_mem_obj, CL_TRUE, 0, width * height * 3 * sizeof(unsigned char), dst_image, 0, NULL, NULL ); assert( err, "clEnqueueReadBuffer failure!\n" ); // -------------------------------------------------- // Cleaning up // -------------------------------------------------- // Clean err |= clFlush( command_queue ); err |= clFinish( command_queue ); err |= clReleaseKernel( kernel ); err |= clReleaseProgram( program ); err |= clReleaseMemObject( src_mem_obj ); err |= clReleaseMemObject( dst_mem_obj ); err |= clReleaseCommandQueue( command_queue ); err |= clReleaseContext( context ); assert( err, "clRelease failure!\n" ); }
Or, si j'initialise (une seule fois) mon contexte OpenCL et si je charge (une seule fois) mon kernel alors j'obtiens un crash : la fonction "clEnqueueReadBuffer" retourne CL_INVALID_COMMAND_QUEUE
Le code associé :
Code : Sélectionner tout - Visualiser dans une fenêtre à part
1
2
3
4
5
6
7
8 startContext(); startKernel(); while( ... ) { gpu_compute_average(); } stopKernel(); stopContext();Je dois oublier quelque chose, aussi si vous avez une idée ...
Code : Sélectionner tout - Visualiser dans une fenêtre à part
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163 #define MAX_SOURCE_SIZE (0x100000) cl_uint ret_num_platforms; cl_platform_id platform_id; cl_uint ret_num_devices; cl_device_id device_id; cl_context context; cl_command_queue command_queue; cl_program program; cl_kernel kernel; void startContext() { cl_int err; // Get platform information err = clGetPlatformIDs( 1, &platform_id, &ret_num_platforms ); assert( err, "clGetPlatformIDs failure!\n" ); // Get device information err = clGetDeviceIDs( platform_id, CL_DEVICE_TYPE_GPU, 1, &device_id, &ret_num_devices ); assert( err, "clGetDeviceIDs failure!\n" ); // Create an OpenCL context context = clCreateContext( NULL, 1, &device_id, NULL, NULL, &err ); assert( err, "clCreateContext failure!\n" ); } void stopContext() { cl_int err = CL_SUCCESS; err |= clFinish( command_queue ); err |= clReleaseCommandQueue( command_queue ); err |= clReleaseContext( context ); assert( err, "clRelease failure!\n" ); } void startKernel() { cl_int err; // Load the kernel source code FILE *file = fopen( "filter_kernel.cl", "r" ); if( !file ) { fprintf( stderr, "Failed to load kernel.\n" ); exit( 1 ); } char *source_str = (char*) malloc( MAX_SOURCE_SIZE ); size_t source_size = fread( source_str, 1, MAX_SOURCE_SIZE, file ); fclose( file ); // Create a program from the kernel source program = clCreateProgramWithSource( context, 1, (const char **) &source_str, (const size_t *) &source_size, &err ); assert( err, "clCreateProgramWithSource failure!\n" ); // Build the program err = clBuildProgram( program, 1, &device_id, NULL, NULL, NULL ); assert( err, "clBuildProgram failure!\n" ); // Create the OpenCL kernel kernel = clCreateKernel( program, "average", &err ); assert( err, "clCreateKernel failure!\n" ); // Create a command queue command_queue = clCreateCommandQueue( context, device_id, 0, &err ); assert( err, "clCreateCommandQueue failure!\n" ); } void stopKernel() { cl_int err = CL_SUCCESS; err |= clReleaseKernel( kernel ); err |= clReleaseProgram( program ); assert( err, "clRelease failure!\n" ); } void gpu_compute_average ( const unsigned char *src_image, unsigned char *dst_image, const unsigned int width, const unsigned int height, const unsigned int neighboorhood ) { cl_int err; // -------------------------------------------------- // Allocating Memory // -------------------------------------------------- // Create memory buffers on the device // CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR cl_mem src_mem_obj = clCreateBuffer( context, CL_MEM_READ_ONLY, width * height * 3 * sizeof(unsigned char), NULL, &err ); assert( err, "clCreateBuffer failure!\n" ); cl_mem dst_mem_obj = clCreateBuffer( context, CL_MEM_WRITE_ONLY, width * height * 3 * sizeof(unsigned char), NULL, &err ); assert( err, "clCreateBuffer failure!\n" ); // Copy to the memory buffers err = clEnqueueWriteBuffer( command_queue, src_mem_obj, CL_TRUE, 0, width * height * 3 * sizeof(unsigned char), src_image, 0, NULL, NULL ); assert( err, "clEnqueueWriteBuffer failure!\n" ); // -------------------------------------------------- // Launching the Kernel // -------------------------------------------------- // Set the arguments of the kernel err |= clSetKernelArg( kernel, 0, sizeof(cl_mem), (void *) &src_mem_obj ); err |= clSetKernelArg( kernel, 1, sizeof(cl_mem), (void *) &dst_mem_obj ); err |= clSetKernelArg( kernel, 2, sizeof(size_t), (void *) &width ); err |= clSetKernelArg( kernel, 3, sizeof(size_t), (void *) &height); err |= clSetKernelArg( kernel, 4, sizeof(size_t), (void *) &neighboorhood); assert( err, "clSetKernelArg failure!\n" ); // Execute the OpenCL kernel on the list cl_uint work_dim = 3; size_t global_work_size[3]; // clGetDeviceInfo( device_id, CL_DEVICE_ADDRESS_BITS, ... global_work_size[0] = width; global_work_size[1] = height; global_work_size[2] = 3; err = clEnqueueNDRangeKernel ( command_queue, kernel, work_dim, NULL, global_work_size, NULL, 0, NULL, NULL ); assert( err, "clEnqueueNDRangeKernel failure!\n" ); // -------------------------------------------------- // Reading back // -------------------------------------------------- // Read the memory buffer on the device to the local variable err = clEnqueueReadBuffer ( command_queue, dst_mem_obj, CL_TRUE, 0, width * height * 3 * sizeof(unsigned char), dst_image, 0, NULL, NULL ); assert( err, "clEnqueueReadBuffer failure!\n" ); // -------------------------------------------------- // Cleaning up // -------------------------------------------------- // Clean err |= clFlush( command_queue ); err |= clReleaseMemObject( src_mem_obj ); err |= clReleaseMemObject( dst_mem_obj ); assert( err, "clRelease failure!\n" ); }
Partager