1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156
| #include <stdio.h>
#include <stdlib.h>
#include <CL/cl.h>
const char kernel_cl[] =
{
//"#pragma OPENCL EXTENSION all : enable\n" //n'est pas suporter par ma carte
"__kernel void testopencl(__global float *out)\n"
"{\n"
"uint i=0xffffffff;\n"
// "double f=0.0;\n" float pressision n'est pas suporter
"float f=0.0;\n"
"while(i)\n"
"{\n"
"i--;\n"
"f=f+0.000001;\n"
"}\n"
"*out=f;\n"
"}"
};
int main()
{
cl_int err;
cl_platform_id platform_id;
cl_device_id device_id;
cl_uint platforms, devices;
cl_context_properties properties[]={CL_CONTEXT_PLATFORM,0,0};
cl_context context;
cl_command_queue command_queue ;
cl_program program;
cl_kernel kernel;
cl_mem output_buffer;
cl_float results;
const char* kernel_src=kernel_cl;
size_t len;
size_t maxworkitemsize[3], global_work_size[3], local_work_size[3];//3 pour 3 dimention
char buffer[4096];
//la meme fonction sur le CPU
uint i=0xffffffff;
double f=0.0;//avec double pressision on obtien un resultat correcte
while (i)
{
i--;
f=f+0.000001;
}
printf("Resultat CPU:%f\n",f);
// recuperation platform_id
err=clGetPlatformIDs(1, &platform_id, &platforms);
if (err != CL_SUCCESS)
{
printf("Unable to get Platform ID. err Code=%d\n",err);
exit(1);
}
// recuperation device_id
err=clGetDeviceIDs(platform_id, CL_DEVICE_TYPE_ALL, 1, &device_id, &devices);
if (err != CL_SUCCESS)
{
printf("Unable to get Device ID. err Code=%d\n",err);
exit(1);
}
// des info sur ma carte
err=clGetDeviceInfo(device_id,CL_DEVICE_VERSION,sizeof(buffer),buffer, NULL);
printf("CL_DEVICE_VERSION:%s\n",buffer);
err=clGetDeviceInfo(device_id,CL_DEVICE_EXTENSIONS,sizeof(buffer),buffer, NULL);
printf("CL_DEVICE_EXTENSIONS:%s\n",buffer);
err=clGetDeviceInfo(device_id,CL_DRIVER_VERSION,sizeof(buffer),buffer, NULL);
printf("CL_DRIVER_VERSION:%s\n",buffer);
err=clGetDeviceInfo(device_id,CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS,sizeof(global_work_size[0]),global_work_size, NULL);
printf("CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS:%d\n",global_work_size[0]);
err=clGetDeviceInfo(device_id,CL_DEVICE_ADDRESS_BITS,sizeof(global_work_size[0]),global_work_size, NULL);
printf("CL_DEVICE_ADDRESS_BITS:%d\n",global_work_size[0]);
err=clGetDeviceInfo(device_id,CL_DEVICE_MAX_WORK_GROUP_SIZE,sizeof(len),&len, NULL);
printf("CL_DEVICE_MAX_WORK_GROUP_SIZE:%d\n",len);
err=clGetDeviceInfo(device_id,CL_DEVICE_MAX_WORK_ITEM_SIZES,sizeof(maxworkitemsize),&maxworkitemsize, NULL);
printf("CL_DEVICE_MAX_WORK_ITEM_SIZES:(%d,%d,%d)\n",maxworkitemsize[0],maxworkitemsize[1],maxworkitemsize[2]);
properties[1]=(cl_context_properties)platform_id;
// creation du contenu
context = clCreateContext(properties, 1, &device_id, NULL, NULL, &err);
if (err != CL_SUCCESS)
{
printf("Unable to create context. Error Code=%d\n",err);
exit(1);
}
// creation command queue
command_queue = clCreateCommandQueue(context,device_id, 0, &err);
if (err != CL_SUCCESS)
{
printf("Unable to create command queue. Error Code=%d\n",err);
exit(1);
}
// creation program
program = clCreateProgramWithSource(context, 1 ,&kernel_src, NULL, &err);
if (err != CL_SUCCESS)
{
printf("Unable to create program object. Error Code=%d\n",err);
exit(1);
}
err = clBuildProgram(program, 0, NULL, NULL, NULL, NULL);
if (err != CL_SUCCESS)
{
printf("Build failed. Error Code=%d\n", err);
// affichage de log de compillation
clGetProgramBuildInfo(program, device_id, CL_PROGRAM_BUILD_LOG, sizeof(buffer), buffer, &len);
printf("%s\n",buffer);
exit(1);
}
// creation du kernel
kernel = clCreateKernel(program, "testopencl", &err);
if (err != CL_SUCCESS)
{
printf("Unable to create kernel object. Error Code=%d\n",err);
exit(1);
}
// creation du buffer de sortie
output_buffer = clCreateBuffer(context, CL_MEM_WRITE_ONLY, sizeof(float), NULL ,NULL);
// set the kernel arguments
err=clSetKernelArg(kernel, 0, sizeof(cl_mem), &output_buffer);
if (err != CL_SUCCESS)
{
printf("Unable to set kernel arguments. Error Code=%d\n",err);
exit(1);
}
// global & local work size
global_work_size[0]=(512+512+64)*512;//(512+512+64)*512CL_DEVICE_MAX_WORK_ITEM_SIZES(x,y,z)*CL_DEVICE_MAX_WORK_GROUP_SIZE
local_work_size[0]=512;// pour ma carte (512;512;64)
//execution
err = clEnqueueNDRangeKernel(command_queue, kernel, 1, NULL,&global_work_size,&local_work_size, NULL, NULL, NULL);
if (err != CL_SUCCESS)
{
printf("Unable to enqueue kernel command. Error Code=%d\n",err);
exit(1);
}
// on attend que la commande se termine
clFinish(command_queue);
// lescture du buffer de sortie
err = clEnqueueReadBuffer(command_queue, output_buffer, CL_TRUE, 0, sizeof(float), &results, 0, NULL, NULL);
if (err != CL_SUCCESS)
{
printf("Error enqueuing read buffer command. Error Code=%d\n",err);
//exit(1);
}
printf("Resultat GPU:%f %d\n",results,sizeof(float));
// liberation de memoire
clReleaseMemObject(output_buffer);
clReleaseProgram(program);
clReleaseKernel(kernel);
clReleaseCommandQueue(command_queue);
clReleaseContext(context);
return 0;
} |
Partager