OpenCL et cmake

**Letinono** · 25/11/2009, 11h18

Bonjour,

J'essaie de compiler une librairie écrite avec openCL en utilisant cmake sous linux 64 bits.

OpenCL est une librairie C permettant d'effectuer des calculs sur GPU.

L'architecture finale de mon projet et la suivante:

OclTestLib
|- src -> oclVectorMul.cpp
|- include -> oclVectorMul.h
|- lib (crée par cmake) -> oclVectorMul.a
|- test
|- src -> test.cpp

J'ai créé un module CMake permettant de trouver les répertoires d'include d'openCL que j'ai appellé FindOpenCL.cmake

Code :

Sélectionner tout - Visualiser dans une fenêtre à part

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
FIND_PATH(
    	OPENCL_INCLUDE_DIR
    	NAMES CL/cl.h OpenCL/cl.h
)
 
FIND_LIBRARY(
	OPENCL_LIBRARY
    	NAMES OpenCL
)
 
include(FindPackageHandleStandardArgs)
find_package_handle_standard_args(
  OPENCL
  DEFAULT_MSG
  OPENCL_LIBRARY OPENCL_INCLUDE_DIR
  )
 
if(OPENCL_FOUND)
  set(OPENCL_LIBRARIES ${OPENCL_LIBRARY})
else(OPENCL_FOUND)
  set(OPENCL_LIBRARIES)
endif(OPENCL_FOUND)
 
mark_as_advanced(
  OPENCL_INCLUDE_DIR
  OPENCL_LIBRARY
  )

J'ai ensuite créé, en me basant sur l'initiation à cmake de Florian Goujeon http://florian-goujeon.developpez.co...ke/initiation/, un fichier CMakeLists.txt permttant de compiler une librairie statique contenant du code OpenCL.

Code :

Sélectionner tout - Visualiser dans une fenêtre à part

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
cmake_minimum_required(VERSION 2.6 FATAL_ERROR)
project(oclVectorMul)
 
set(CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR})
 
set(LIBRARY_OUTPUT_PATH lib/${CMAKE_BUILD_TYPE})
 
include(FindOpenCL)
 
# Recherche d'OpenCL
find_package(OpenCL REQUIRED)
include_directories(${OPENCL_INCLUDE_DIR})
 
# Recherche des includes du sdk
include_directories(~/NVIDIA_GPU_Computing_SDK/shared/inc)
link_directories(~/NVIDIA_GPU_Computing_SDK/shared/lib)
include_directories(~/NVIDIA_GPU_Computing_SDK/OpenCL/common/inc)
link_directories(~/NVIDIA_GPU_Computing_SDK/OpenCL/common/lib)
 
# Include de la lib
include_directories(include)
 
file(
	GLOB_RECURSE
	source_files
	src/*
	include/*
)
 
add_library(
	oclVectorMul
	STATIC
	${source_files}	
  )
  
target_link_libraries(
  	oclVectorMul
  	${OPENCL_LIBRARIES}
	libshrutil.a
	liboclUtil.a  )

Ce fichier me permet donc de créer le dossier "lib" dans lequel sera mis la librairie oclVectorMul compilée.

La compilation de la librairie se passe sans soucis.

J'ai ensuite écrit un fichier CMakeLists.txt permettant de compiler un executable utilisant la librairie.

Code :

Sélectionner tout - Visualiser dans une fenêtre à part

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
cmake_minimum_required(VERSION 2.6)
 
#Configuration du projet
project(test)
set(EXECUTABLE_OUTPUT_PATH bin/${CMAKE_BUILD_TYPE})
 
#Inclusion de la bibliothèque oclVectorMul
include_directories(../include)
link_directories(../lib)
 
#Configuration de l'exécutable
file(
	GLOB_RECURSE
	source_files
	src/*
)
add_executable(
	test
	${source_files}
)
 
#Configuration de l'édition de liens
target_link_libraries(
	test
	oclVectorMul
)

La compilation de l'executable plante lors du linkage avec la librairie. Chaque appel au SDK d'OpenCL provoque une erreur.

Voilà ce que donne la compilation:

Code :

Sélectionner tout - Visualiser dans une fenêtre à part

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
Scanning dependencies of target test
[100%] Building CXX object CMakeFiles/test.dir/src/test.cpp.o
Linking CXX executable bin/test
../lib/liboclVectorMul.a(oclVectorMul.cpp.o): In function `liboclvecmul::Cleanup(int)':
oclVectorMul.cpp:(.text+0x20): undefined reference to `shrLog'
oclVectorMul.cpp:(.text+0x80): undefined reference to `clReleaseKernel'
oclVectorMul.cpp:(.text+0x98): undefined reference to `clReleaseProgram'
oclVectorMul.cpp:(.text+0xb0): undefined reference to `clReleaseCommandQueue'
oclVectorMul.cpp:(.text+0xc8): undefined reference to `clReleaseContext'
oclVectorMul.cpp:(.text+0xe0): undefined reference to `clReleaseMemObject'
oclVectorMul.cpp:(.text+0xf8): undefined reference to `clReleaseMemObject'
oclVectorMul.cpp:(.text+0x133): undefined reference to `shrLog'
oclVectorMul.cpp:(.text+0x14e): undefined reference to `shrLog'
../lib/liboclVectorMul.a(oclVectorMul.cpp.o): In function `liboclvecmul::multiplie(int, char**)':
oclVectorMul.cpp:(.text+0x17c): undefined reference to `shrCheckCmdLineFlag'
oclVectorMul.cpp:(.text+0x18c): undefined reference to `shrSetLogFileName'
oclVectorMul.cpp:(.text+0x1ab): undefined reference to `shrRoundUp'
oclVectorMul.cpp:(.text+0x1fd): undefined reference to `shrFillArray'
oclVectorMul.cpp:(.text+0x21c): undefined reference to `clCreateContextFromType'
oclVectorMul.cpp:(.text+0x251): undefined reference to `shrLog'
oclVectorMul.cpp:(.text+0x27c): undefined reference to `clGetContextInfo'
oclVectorMul.cpp:(.text+0x2ba): undefined reference to `clGetContextInfo'
oclVectorMul.cpp:(.text+0x2f8): undefined reference to `shrLog'
oclVectorMul.cpp:(.text+0x322): undefined reference to `clCreateCommandQueue'
oclVectorMul.cpp:(.text+0x357): undefined reference to `shrLog'
oclVectorMul.cpp:(.text+0x36b): undefined reference to `shrDeltaT'
oclVectorMul.cpp:(.text+0x398): undefined reference to `clCreateBuffer'
oclVectorMul.cpp:(.text+0x3a9): undefined reference to `shrDeltaT'
oclVectorMul.cpp:(.text+0x3e1): undefined reference to `clCreateBuffer'
oclVectorMul.cpp:(.text+0x42a): undefined reference to `shrLog'
oclVectorMul.cpp:(.text+0x447): undefined reference to `shrFindFilePath'
oclVectorMul.cpp:(.text+0x464): undefined reference to `oclLoadProgSource'
oclVectorMul.cpp:(.text+0x48c): undefined reference to `clCreateProgramWithSource'
oclVectorMul.cpp:(.text+0x4c1): undefined reference to `shrLog'
oclVectorMul.cpp:(.text+0x4f2): undefined reference to `clBuildProgram'
oclVectorMul.cpp:(.text+0x526): undefined reference to `shrLog'
oclVectorMul.cpp:(.text+0x546): undefined reference to `clCreateKernel'
oclVectorMul.cpp:(.text+0x57b): undefined reference to `shrLog'
oclVectorMul.cpp:(.text+0x5a0): undefined reference to `clSetKernelArg'
oclVectorMul.cpp:(.text+0x5c1): undefined reference to `clSetKernelArg'
oclVectorMul.cpp:(.text+0x5ec): undefined reference to `clSetKernelArg'
oclVectorMul.cpp:(.text+0x617): undefined reference to `clSetKernelArg'
oclVectorMul.cpp:(.text+0x655): undefined reference to `shrLog'
oclVectorMul.cpp:(.text+0x669): undefined reference to `shrDeltaT'
oclVectorMul.cpp:(.text+0x6ab): undefined reference to `clEnqueueNDRangeKernel'
oclVectorMul.cpp:(.text+0x6bb): undefined reference to `shrDeltaT'
oclVectorMul.cpp:(.text+0x6f6): undefined reference to `shrLog'
oclVectorMul.cpp:(.text+0x70a): undefined reference to `shrDeltaT'
oclVectorMul.cpp:(.text+0x758): undefined reference to `clEnqueueReadBuffer'
oclVectorMul.cpp:(.text+0x768): undefined reference to `shrDeltaT'
oclVectorMul.cpp:(.text+0x7a3): undefined reference to `shrLog'
collect2: ld a retourné 1 code d'état d'exécution
make[2]: *** [bin/test] Erreur 1
make[1]: *** [CMakeFiles/test.dir/all] Erreur 2
make: *** [all] Erreur 2

Voilà maintenant les codes sources des 3 fichiers:

oclVectorMul.cpp:

Code :

Sélectionner tout - Visualiser dans une fenêtre à part

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
//*********************************************************************
//
//	Multiply a vector by a float data
//
//*********************************************************************
 
// common SDK header for standard utilities and system libs 
#include <oclUtils.h>
#include "oclVectorMul.h"
 
namespace liboclvecmul {
 
// Name of the file with the source code for the computation kernel
// *********************************************************************
const char* cSourceFile = "VectorMul.cl";
 
// Host buffers for demo
// *********************************************************************
void *src, *dst;        // Host buffers for OpenCL test
 
// OpenCL Vars
cl_context cxGPUContext;        // OpenCL context
cl_command_queue cqCommandQue;  // OpenCL command que
cl_device_id* cdDevices;        // OpenCL device list    
cl_program cpProgram;           // OpenCL program
cl_kernel ckKernel;             // OpenCL kernel
cl_mem cmDevSrc;               // OpenCL device source buffer
cl_mem cmDevDst;                // OpenCL device destination buffer 
size_t szGlobalWorkSize;        // 1D var for Total # of work items
size_t szLocalWorkSize;		    // 1D var for # of work items in the work group	
size_t szParmDataBytes;			// Byte size of context information
size_t szKernelLength;			// Byte size of kernel code
cl_int ciErr1, ciErr2;			// Error code var
char* cPathAndName = NULL;      // var for full paths to data, src, etc.
char* cSourceCL = NULL;         // Buffer to hold source for compilation 
 
// demo config vars
int iNumElements = 8388608;//16777216;//11444777;	// Length of float arrays to process (odd # for illustration)
float value = 10.0f;
shrBOOL bQuickTest = shrFALSE;  
 
// Forward Declarations
// *********************************************************************
void Cleanup (int iExitCode);
 
// Main function 
// *********************************************************************
int multiplie(int argc, char **argv)
{
    // get command line arg for quick test, if provided
    bQuickTest = shrCheckCmdLineFlag(argc, (const char**)argv, "noprompt");
 
    // start logs 
    shrSetLogFileName ("oclVectorMul.txt");
//    shrLog(LOGBOTH, 0.0, "%s Starting...\n\n# of float elements per Array \t= %u\n", argv[0], iNumElements); 
 
    // set and log Global and Local work size dimensions
    szLocalWorkSize = 256;
    szGlobalWorkSize = shrRoundUp((int)szLocalWorkSize, iNumElements);  // rounded up to the nearest multiple of the LocalWorkSize
//    shrLog(LOGBOTH, 0.0, "Global Work Size \t\t= %u\nLocal Work Size \t\t= %u\n# of Work Groups \t\t= %u\n\n", 
//           szGlobalWorkSize, szLocalWorkSize, (szGlobalWorkSize % szLocalWorkSize + szGlobalWorkSize/szLocalWorkSize)); 
 
    // Allocate and initialize host arrays 
    src = (void *)malloc(sizeof(cl_float) * szGlobalWorkSize);
    dst = (void *)malloc(sizeof(cl_float) * szGlobalWorkSize);
    shrFillArray((float*)src, iNumElements);
//    shrLog(LOGBOTH, 0.0,  "Allocate and Init Host Mem...\n"); 
 
    // Create the OpenCL context on a GPU device
    cxGPUContext = clCreateContextFromType(0, CL_DEVICE_TYPE_GPU, NULL, NULL, &ciErr1);
//    shrLog(LOGBOTH, 0.0, "clCreateContextFromType...\n"); 
    if (ciErr1 != CL_SUCCESS)
    {
        shrLog(LOGBOTH, 0.0, "Error in clCreateContextFromType, near Line %u in file %u", __LINE__, __FILE__);
        Cleanup(EXIT_FAILURE);
    }
 
    // Get the list of GPU devices associated with context
    ciErr1 = clGetContextInfo(cxGPUContext, CL_CONTEXT_DEVICES, 0, NULL, &szParmDataBytes);
    cdDevices = (cl_device_id*)malloc(szParmDataBytes);
    ciErr1 |= clGetContextInfo(cxGPUContext, CL_CONTEXT_DEVICES, szParmDataBytes, cdDevices, NULL);
//    shrLog(LOGBOTH, 0.0, "clGetContextInfo...\n"); 
    if (ciErr1 != CL_SUCCESS)
    {
        shrLog(LOGBOTH, 0.0, "Error in clGetContextInfo, near Line %u in file %u", __LINE__, __FILE__);
        Cleanup(EXIT_FAILURE);
    }
 
    // Create a command-queue
    cqCommandQue = clCreateCommandQueue(cxGPUContext, cdDevices[0], 0, &ciErr1);
//    shrLog(LOGBOTH, 0.0, "clCreateCommandQueue...\n"); 
    if (ciErr1 != CL_SUCCESS)
    {
        shrLog(LOGBOTH, 0.0, "Error in clCreateCommandQueue, near Line %u in file %u", __LINE__, __FILE__);
        Cleanup(EXIT_FAILURE);
    }
 
    // Allocate the OpenCL source and result buffer memory objects on the device GMEM, and copy the data to the device
shrDeltaT(0);
    cmDevSrc = clCreateBuffer(cxGPUContext, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(cl_float) * szGlobalWorkSize, src, &ciErr1);
double memCpyHDTime = shrDeltaT(0);
    cmDevDst = clCreateBuffer(cxGPUContext, CL_MEM_WRITE_ONLY, sizeof(cl_float) * szGlobalWorkSize, NULL, &ciErr2);
    ciErr1 |= ciErr2;
//    shrLog(LOGBOTH, 0.0, "clCreateBuffer...\n"); 
    if (ciErr1 != CL_SUCCESS)
    {
        shrLog(LOGBOTH, 0.0, "Error in clCreateBuffer, near Line %u in file %u", __LINE__, __FILE__);
        Cleanup(EXIT_FAILURE);
    }
 
    // Read the OpenCL kernel in from source file
    cPathAndName = shrFindFilePath(cSourceFile, argv[0]);
    cSourceCL = oclLoadProgSource(cPathAndName, "", &szKernelLength);
//    shrLog(LOGBOTH, 0.0, "oclLoadProgSource (%s)...\n", cSourceFile); 
 
    // Create the program
    cpProgram = clCreateProgramWithSource(cxGPUContext, 1, (const char **)&cSourceCL, &szKernelLength, &ciErr1);
//    shrLog(LOGBOTH, 0.0, "clCreateProgramWithSource...\n"); 
    if (ciErr1 != CL_SUCCESS)
    {
        shrLog(LOGBOTH, 0.0, "Error in clCreateProgramWithSource, near Line %u in file %u", __LINE__, __FILE__);
        Cleanup(EXIT_FAILURE);
    }
 
    // Build the program
    ciErr1 = clBuildProgram(cpProgram, 0, NULL, NULL, NULL, NULL);
//    shrLog(LOGBOTH, 0.0, "clBuildProgram...\n"); 
    if (ciErr1 != CL_SUCCESS)
    {
        shrLog(LOGBOTH, 0.0, "Error in clBuildProgram, near Line %u in file %u", __LINE__, __FILE__);
        Cleanup(EXIT_FAILURE);
    }
 
    // Create the kernel
    ckKernel = clCreateKernel(cpProgram, "VectorMul", &ciErr1);
//    shrLog(LOGBOTH, 0.0, "clCreateKernel...\n"); 
    if (ciErr1 != CL_SUCCESS)
    {
        shrLog(LOGBOTH, 0.0, "Error in clCreateKernel, near Line %u in file %u", __LINE__, __FILE__);
        Cleanup(EXIT_FAILURE);
    }
 
    // Set the Argument values
    ciErr1 = clSetKernelArg(ckKernel, 0, sizeof(cl_mem), (void*)&cmDevSrc);
    ciErr1 |= clSetKernelArg(ckKernel, 1, sizeof(cl_float), (void*)&value);
    ciErr1 |= clSetKernelArg(ckKernel, 2, sizeof(cl_mem), (void*)&cmDevDst);
    ciErr1 |= clSetKernelArg(ckKernel, 3, sizeof(cl_int), (void*)&iNumElements);
//    shrLog(LOGBOTH, 0.0, "clSetKernelArg...\n"); 
    if (ciErr1 != CL_SUCCESS)
    {
        shrLog(LOGBOTH, 0.0, "Error in clSetKernelArg, near Line %u in file %u", __LINE__, __FILE__);
        Cleanup(EXIT_FAILURE);
    }
 
    // Launch kernel
shrDeltaT(0);
    ciErr1 = clEnqueueNDRangeKernel(cqCommandQue, ckKernel, 1, NULL, &szGlobalWorkSize, &szLocalWorkSize, 0, NULL, NULL);
double computeTime = shrDeltaT(0);
//    shrLog(LOGBOTH, 0.0, "clEnqueueNDRangeKernel...\n"); 
    if (ciErr1 != CL_SUCCESS)
    {
        shrLog(LOGBOTH, 0.0, "Error in clEnqueueNDRangeKernel, near Line %u in file %u", __LINE__, __FILE__);
        Cleanup(EXIT_FAILURE);
    }
 
    // Synchronous/blocking read of results, and check accumulated errors
shrDeltaT(0);
    ciErr1 = clEnqueueReadBuffer(cqCommandQue, cmDevDst, CL_TRUE, 0, sizeof(cl_float) * szGlobalWorkSize, dst, 0, NULL, NULL);
double memCpyDHTime = shrDeltaT(0);
//    shrLog(LOGBOTH, 0.0, "clEnqueueReadBuffer...\n\n"); 
    if (ciErr1 != CL_SUCCESS)
    {
        shrLog(LOGBOTH, 0.0, "Error in clEnqueueReadBuffer, near Line %u in file %u", __LINE__, __FILE__);
        Cleanup(EXIT_FAILURE);
    }
 
	printf("\nMultiplication\n\n\t*%d datas\n\t*%d Global Work Size\n\t*%d Local Work Size\n", iNumElements, (int)szGlobalWorkSize, (int)szLocalWorkSize);
	printf("\n=============================================\n");
	printf("Time to copy datas HOST -> DEVICE : %.3f ms\n", (memCpyHDTime*1000));
	printf("Time to compute                   : %.3f ms\n", (computeTime*1000));
	printf("Time to copy datas DEVICE -> HOST : %.3f ms\n", (memCpyDHTime*1000));
	printf("--------------------------------------------\n");
	printf("Total time                        : %.3f ms\n", ((memCpyHDTime+computeTime+memCpyDHTime)*1000));
	printf("=============================================\n\n");
 
	float *srcf, *dstf;
	srcf = (float*)src;
	dstf = (float*)dst;
 
	for (int i=0; i<iNumElements; i++)
	{
		if (srcf[i]*10 != dstf[i])
			printf("Error at indice %d\n", i);
	}
 
    // Cleanup and leave
    Cleanup (EXIT_SUCCESS);
}
 
void Cleanup (int iExitCode)
{
    // Cleanup allocated objects
    shrLog(LOGBOTH, 0.0, "\nStarting Cleanup...\n\n");
    if(cdDevices)free(cdDevices);
    if(cPathAndName)free(cPathAndName);
    if(cSourceCL)free(cSourceCL);
    if(ckKernel)clReleaseKernel(ckKernel);  
    if(cpProgram)clReleaseProgram(cpProgram);
    if(cqCommandQue)clReleaseCommandQueue(cqCommandQue);
    if(cxGPUContext)clReleaseContext(cxGPUContext);
    if(cmDevSrc)clReleaseMemObject(cmDevSrc);
    if(cmDevDst)clReleaseMemObject(cmDevDst);
 
    // Free host memory
    free(src); 
    free (dst);
 
    // finalize logs and leave
    if (bQuickTest)
    {
        shrLog(LOGBOTH | CLOSELOG, 0.0, "oclVectorMul Ending...\n");
    }
    else 
    {
        shrLog(LOGBOTH | CLOSELOG, 0.0, "oclVectorMul Ending...\nPress Enter to Exit\n");
        getchar();
    }
    exit (iExitCode);
}
 
}	// namespace

oclVectorMul.h :

Code :

Sélectionner tout - Visualiser dans une fenêtre à part

1
2
3
4
5
6
7
8
9
10
#ifndef __OCLVECTORMUL__
#define __OCLVECTORMUL__
 
namespace liboclvecmul {
 
int multiplie(int argc, char **argv); 
 
}
 
#endif

test.cpp :

Code :

Sélectionner tout - Visualiser dans une fenêtre à part

1
2
3
4
5
6
#include "oclVectorMul.h"
 
int main(int argc, char **argv)
{
	return liboclvecmul::multiplie(argc,argv);;
}

Pourquoi la compilation plante alors que j'ai bien linké, normalement, le sdk avec la librairie?

Merci pour votre aide!

OpenCL et cmake

Systèmes de compilation

Mode arborescent

Discussions similaires

Partager

Partager