1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66
|
#pragma OPENCL EXTENSION cl_khr_byte_addressable_store : enable
//Multiply two 4*4 matrices.
float16 multMat (float16 matA, float16 matB) {
float16 matC;
for (int x = 0; x < 4; ++x) {
for (int y = 0; y < 4; ++y) {
float value = 0;
for (int k = 0; k < 4; ++k) {
float elementA = matA[y * 4 + k];
float elementB = matB[k * 4 + x];
value += elementA * elementB;
}
matC[y * 4 + x] = value;
}
}
return matC;
}
//Multiply a 4*4 matrix with a vec4.
float4 multVec (float16 matA, float4 vecB) {
float4 vecC;
for (int i = 0; i < 4; ++i) {
float value = 0;
for (int j = 0; j < 4; ++j) {
value += vecB[j] * matA[i][j];
}
vecC[i] = value;
}
return vecC;
}
//Transpose a 4*4 matrix.
float16 transpose(float16 matA) {
float16 matT
for (int i = 0; i < 4; ++i) {
for (int j = 0; j < 4; ++j) {
matT[i][j] = matA[j][i];
}
}
return matT;
}
//Convert vertex position from object space toviewport space.
__kernel void vertexShader(__global float* vPosX, __global float* vPosY, __global float* vPosZ, __global float* vPosW,
__global unsigned int* vColRed, __global unsigned int* vColBlue, __global unsigned int* vColGreen, __global unsigned int* vColAlpha, __global int* vTCU, __global int* vTCV,
__global unsigned int* indices, __global unsigned int numIndices, __global unsigned int* baseIndices,
__global unsigned int* baseVertices, __global unsigned int* nbVerticesPerFaces, __global float* transfMatrices, __global float16 projMatrix, __global float16 viewMatrix, __global float16 viewportMatrix,
__global int nbVertices) {
size_t tid = get_global_id(0);
if (tid < nbVertices) {
int instanceID = tid / nbVerticesPerFace;
float16 transfMatrix;
float4 position = (float4) (vPosX[indices[tid]], vPosY[indices[tid]], vPosZ[indices[tid]], vPosW[indices[tid]]);
for (int i = 0; i < 16; i++) {
transfMatrix[i] = transfMatrices[instanceID*16+i];
}
float4 worldcoords = multVec(transfMatrix, position);
float4 viewcoords = multVec(viewMatrix, worldcoords);
float4 clipcoords = multVec(projMatrix, viewcoords);
float4 ndcCoords = clipcoords / clipcoords.w;
position = multVec(viewportMatrix, ndcCoords);
vPosX[tid] = abs(position.x);
vPosY[tid] = abs(position.y);
vPosZ[tid] = abs(position.z);
vPosW[tid] = position.w;
}
} |