1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134
|
#include <stdio.h>
// For the CUDA runtime routines (prefixed with "cuda_")
#include <cuda_runtime.h>
/**
* CUDA Kernel Device code
*
* Computes the vector addition of A and B into C. The 3 vectors have the same
* number of elements numElements.
*/
__global__ void pi ( float *a, int num)
{
int i =blockIdx.x*blockDim.x + threadIdx.x;
//printf("%d",i);
if (i < num)
{
a[i]=(float) (4*i*i)/(4*i*i-1);
//a[i]=i;
}
}
int main(void)
{
// Error code to check return values for CUDA calls
cudaError_t err = cudaSuccess;
int num =50;
size_t size = num * sizeof(float);
//
// float *h_p=(float *) malloc(size2);
// Allocate the host input vector A
float *h_a = (float *)malloc(size);
// Initialize the host input vectors
for (int i = 1; i < num; i++)
{
h_a[i] = rand()/(float)RAND_MAX;
}
// Allocate the device input vector A
float *d_a = NULL;
//float *p_a=NULL;
err = cudaMalloc((void **)&d_a, size);
//err = cudaMalloc ((void **)&p_a, size);
// Copy the host input vectors A and B in host memory to the device input vectors in
// device memory
printf("Copy input data from the host memory to the CUDA device\n");
err = cudaMemcpy(d_a, h_a, size, cudaMemcpyHostToDevice);
//err=cudaMemcpy(p_a,p_a,size2,cudaMemcpyHostToDevice);
if (err != cudaSuccess)
{
fprintf(stderr, "Failed to copy vector A from host to device (error code %s)!\n", cudaGetErrorString(err));
exit(EXIT_FAILURE);
}
printf("test");
int threadsPerBlock = num;
int blocksPerGrid =1;
pi <<<blocksPerGrid, threadsPerBlock >>>(d_a,num);
// Copy the device result vector in device memory to the host result vector
// in host mory.
printf("Copy output data from the CUDA device to the host memory\n");
err = cudaMemcpy(h_a, d_a, size, cudaMemcpyDeviceToHost);
//err=cudaMemcpy(p_a, p_a, size, cudaMemcpyDeviceToHost);
if (err != cudaSuccess)
{
fprintf(stderr, "Failed to copy vector a from device to host (error code %s)!\n", cudaGetErrorString(err));
exit(EXIT_FAILURE);
}
// Verify that the result vector is correct
// Free host memory
//free(h_a);
// Reset the device and exit
err = cudaDeviceReset();
if (err != cudaSuccess)
{
fprintf(stderr, "Failed to deinitialize the device! error=%s\n", cudaGetErrorString(err));
exit(EXIT_FAILURE);
}
for (int i=1;i< num ;i++)
{
printf("%f \n" ,h_a[i]);
}
float f=1;
for (int i=1;i< num;i++)
f=(float)h_a[i]*f;
printf("pi est égal %f",2*f);
//rintf("test \n %f",p_a[0]);
free(h_a);
return 0;
} |
Partager