1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87
| void evalDerSurfacePointCustom(GLint d, GLint *IndicesU, GLint *IndicesV, GLfloat *u, GLfloat *v, sfloat4 *, std::vector<glm::detail::tvec3<T> > &Res){
assert(d <= 1);
GLint tailleu = degreeU+1;
GLint taillev = degreeV+1;
GLint np = knotVectorU.size()-2-degreeU;
GLint nq = knotVectorV.size()-2-degreeV;
std::vector<std::vector<T> > Bin(d+1,std::vector<T>(d+1,1.0f));
std::vector<std::vector<std::vector<sfloat4,Eigen::aligned_allocator<sfloat4> > > > SKL(d+1,std::vector<std::vector<sfloat4,Eigen::aligned_allocator<sfloat4> > >(d+1,std::vector<sfloat4,Eigen::aligned_allocator<sfloat4> >(3)));
std::vector<std::vector<std::vector<sfloat4,Eigen::aligned_allocator<sfloat4> > > > Aders(d+1,std::vector<std::vector<sfloat4,Eigen::aligned_allocator<sfloat4> > >(d+1,std::vector<sfloat4,Eigen::aligned_allocator<sfloat4> >(4)));
{//Compute Aders and Wders
std::vector<std::vector<sfloat4,Eigen::aligned_allocator<sfloat4> > > temp(taillev,std::vector<sfloat4,Eigen::aligned_allocator<sfloat4> >(4));
std::vector<sfloat4, Eigen::aligned_allocator<sfloat4> > Nu((d+1)*tailleu);
std::vector<sfloat4, Eigen::aligned_allocator<sfloat4> > Nv((d+1)*taillev);
basisDerFuncCustom(degreeU, d, IndicesU, u, knotVectorU, Nu);
basisDerFuncCustom(degreeV, d, IndicesV, v, knotVectorV, Nv);
GLint du = std::min(d,degreeU);
GLint dv = std::min(d,degreeV);
for(GLint k=0; k<=du; k++){
for(GLint s=0; s<=degreeV; s++){
for(GLint r=0; r<=degreeU; r++){
sfloat4 simd_x = _mm_set_ps(ctrlPointVector[(IndicesU[3]-degreeU+r)*(nq+1)+(IndicesV[3]-degreeV+s)][0], ctrlPointVector[(IndicesU[2]-degreeU+r)*(nq+1)+(IndicesV[2]-degreeV+s)][0], ctrlPointVector[(IndicesU[1]-degreeU+r)*(nq+1)+(IndicesV[1]-degreeV+s)][0], ctrlPointVector[(IndicesU[0]-degreeU+r)*(nq+1)+(IndicesV[0]-degreeV+s)][0]);
sfloat4 simd_y = _mm_set_ps(ctrlPointVector[(IndicesU[3]-degreeU+r)*(nq+1)+(IndicesV[3]-degreeV+s)][1], ctrlPointVector[(IndicesU[2]-degreeU+r)*(nq+1)+(IndicesV[2]-degreeV+s)][1], ctrlPointVector[(IndicesU[1]-degreeU+r)*(nq+1)+(IndicesV[1]-degreeV+s)][1], ctrlPointVector[(IndicesU[0]-degreeU+r)*(nq+1)+(IndicesV[0]-degreeV+s)][1]);
sfloat4 simd_z = _mm_set_ps(ctrlPointVector[(IndicesU[3]-degreeU+r)*(nq+1)+(IndicesV[3]-degreeV+s)][2], ctrlPointVector[(IndicesU[2]-degreeU+r)*(nq+1)+(IndicesV[2]-degreeV+s)][2], ctrlPointVector[(IndicesU[1]-degreeU+r)*(nq+1)+(IndicesV[1]-degreeV+s)][2], ctrlPointVector[(IndicesU[0]-degreeU+r)*(nq+1)+(IndicesV[0]-degreeV+s)][2]);
sfloat4 simd_w = _mm_set_ps(ctrlPointVector[(IndicesU[3]-degreeU+r)*(nq+1)+(IndicesV[3]-degreeV+s)][3], ctrlPointVector[(IndicesU[2]-degreeU+r)*(nq+1)+(IndicesV[2]-degreeV+s)][3], ctrlPointVector[(IndicesU[1]-degreeU+r)*(nq+1)+(IndicesV[1]-degreeV+s)][3], ctrlPointVector[(IndicesU[0]-degreeU+r)*(nq+1)+(IndicesV[0]-degreeV+s)][3]);
temp[s][0] = _mm_add_ps(temp[s][0], _mm_mul_ps(Nu[(k*tailleu)+r], simd_x));
temp[s][1] = _mm_add_ps(temp[s][1], _mm_mul_ps(Nu[(k*tailleu)+r], simd_y));
temp[s][2] = _mm_add_ps(temp[s][2], _mm_mul_ps(Nu[(k*tailleu)+r], simd_z));
temp[s][3] = _mm_add_ps(temp[s][3], _mm_mul_ps(Nu[(k*tailleu)+r], simd_w));
}
}
GLint dd = std::min(d-k,dv);
for(GLint l=0; l<=dd; l++){
for(GLint s=0; s<=degreeV; s++){
Aders[k][l][0] = _mm_add_ps(Aders[k][l][0], _mm_mul_ps(Nv[(l*taillev)+s], temp[s][0]));
Aders[k][l][1] = _mm_add_ps(Aders[k][l][1], _mm_mul_ps(Nv[(l*taillev)+s], temp[s][1]));
Aders[k][l][2] = _mm_add_ps(Aders[k][l][2], _mm_mul_ps(Nv[(l*taillev)+s], temp[s][2]));
Aders[k][l][3] = _mm_add_ps(Aders[k][l][3], _mm_mul_ps(Nv[(l*taillev)+s], temp[s][3]));
}
}
}
}
//Compute nurbs derivates
for(GLint k=0; k<=d; k++){
for(GLint l=0; l<=d-k; l++){
std::vector<sfloat4,Eigen::aligned_allocator<sfloat4> > v(3);
v[0] = Aders[k][l][0];
v[1] = Aders[k][l][1];
v[2] = Aders[k][l][2];
for(GLint j=1; j<=l; j++){
v[0] = _mm_sub_ps(v[0], _mm_mul_ps(_mm_set1_ps(Bin[l][j]), _mm_mul_ps(Aders[0][j][3], SKL[k][l-j][0])));
v[1] = _mm_sub_ps(v[1], _mm_mul_ps(_mm_set1_ps(Bin[l][j]), _mm_mul_ps(Aders[0][j][3], SKL[k][l-j][1])));
v[2] = _mm_sub_ps(v[2], _mm_mul_ps(_mm_set1_ps(Bin[l][j]), _mm_mul_ps(Aders[0][j][3], SKL[k][l-j][2])));
}
for(GLint i=1; i<=k; i++){
v[0] = _mm_sub_ps(v[0], _mm_mul_ps(_mm_set1_ps(Bin[k][i]), _mm_mul_ps(Aders[i][0][3], SKL[k-i][l][0])));
v[1] = _mm_sub_ps(v[1], _mm_mul_ps(_mm_set1_ps(Bin[k][i]), _mm_mul_ps(Aders[i][0][3], SKL[k-i][l][1])));
v[2] = _mm_sub_ps(v[2], _mm_mul_ps(_mm_set1_ps(Bin[k][i]), _mm_mul_ps(Aders[i][0][3], SKL[k-i][l][2])));
std::vector<sfloat4,Eigen::aligned_allocator<sfloat4> > v2(3);
for(GLint j=1; j<=l; j++){
v2[0] = _mm_add_ps(v2[0], _mm_mul_ps(_mm_set1_ps(Bin[l][j]), _mm_mul_ps(Aders[i][j][3], SKL[k-i][l-j][0])));
v2[1] = _mm_add_ps(v2[1], _mm_mul_ps(_mm_set1_ps(Bin[l][j]), _mm_mul_ps(Aders[i][j][3], SKL[k-i][l-j][1])));
v2[2] = _mm_add_ps(v2[2], _mm_mul_ps(_mm_set1_ps(Bin[l][j]), _mm_mul_ps(Aders[i][j][3], SKL[k-i][l-j][2])));
}
v[0] = _mm_sub_ps(v[0], _mm_mul_ps(_mm_set1_ps(Bin[k][i]), v2[0]));
v[1] = _mm_sub_ps(v[1], _mm_mul_ps(_mm_set1_ps(Bin[k][i]), v2[1]));
v[2] = _mm_sub_ps(v[2], _mm_mul_ps(_mm_set1_ps(Bin[k][i]), v2[2]));
}
SKL[k][l][0] = _mm_div_ps(v[0], Aders[0][0][3]);
SKL[k][l][1] = _mm_div_ps(v[1], Aders[0][0][3]);
SKL[k][l][2] = _mm_div_ps(v[2], Aders[0][0][3]);
}
}
for(GLint i=0; i<=d; i++){
for(GLint j=0; j<=d; j++){
Res[(i*(d+1)+j)*4] = (glm::detail::tvec3<T>(SKL[i][j][0].m128_f32[0], SKL[i][j][1].m128_f32[0], SKL[i][j][2].m128_f32[0]));
Res[(i*(d+1)+j)*4+1] = (glm::detail::tvec3<T>(SKL[i][j][0].m128_f32[1], SKL[i][j][1].m128_f32[1], SKL[i][j][2].m128_f32[1]));
Res[(i*(d+1)+j)*4+2] = (glm::detail::tvec3<T>(SKL[i][j][0].m128_f32[2], SKL[i][j][1].m128_f32[2], SKL[i][j][2].m128_f32[2]));
Res[(i*(d+1)+j)*4+3] = (glm::detail::tvec3<T>(SKL[i][j][0].m128_f32[3], SKL[i][j][1].m128_f32[3], SKL[i][j][2].m128_f32[3]));
}
}
} |
Partager