1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213
| /////////////////////////////////////////////////////////////////////////////
// am_cos_ps
__m128 __declspec(naked) __stdcall am_cos_ps(__m128 x) // any x
{
__asm
{
andps xmm0, _ps_am_inv_sign_mask
mov ecx, esp
addps xmm0, _ps_am_pi_o_2
mulps xmm0, _ps_am_2_o_pi
and ecx, ~15
movhlps xmm2, xmm0
cvttps2pi mm0, xmm0
movq mm2, mm0
cvttps2pi mm1, xmm2
pslld mm2, (31 - 1)
movq mm3, mm1
pslld mm3, (31 - 1)
pand mm2, _pi32_0x80000000
movq [ecx - 32], mm2
pand mm3, _pi32_0x80000000
movq [ecx - 32 + 8], mm3
cvtpi2ps xmm4, mm1
pand mm1, _pi32_1
ASM_MOVE_L2H(xmm4)
pxor mm7, mm7
movaps xmm5, _ps_am_1
cvtpi2ps xmm4, mm0
pand mm0, _pi32_1
pcmpeqd mm1, mm7
subps xmm0, xmm4
movq [ecx - 16 + 8], mm1
minps xmm0, xmm5
pcmpeqd mm0, mm7
subps xmm5, xmm0
movq [ecx - 16], mm0
movaps xmm6, [ecx - 16]
andps xmm0, xmm6
andnps xmm6, xmm5
movaps xmm3, _ps_sincos_p3
orps xmm0, xmm6
movaps xmm5, _ps_sincos_p2
movaps xmm1, xmm0
mulps xmm0, xmm0
movaps xmm7, [ecx - 32]
movaps xmm2, xmm0
mulps xmm0, xmm3
movaps xmm3, _ps_sincos_p1
addps xmm0, xmm5
movaps xmm5, _ps_sincos_p0
mulps xmm0, xmm2
addps xmm0, xmm3
mulps xmm0, xmm2
orps xmm1, xmm7
addps xmm0, xmm5
mulps xmm0, xmm1
ret 16
}
}
/////////////////////////////////////////////////////////////////////////////
// am_cos_eps
__m128 __declspec(naked) __stdcall am_cos_eps(__m128 x) // any x
{
__asm
{
andps xmm0, _ps_am_inv_sign_mask
addps xmm0, _ps_am_pi_o_2
mulps xmm0, _ps_am_2_o_pi
pxor xmm3, xmm3
movdqa xmm5, _epi32_1
movaps xmm4, _ps_am_1
cvttps2dq xmm2, xmm0
pand xmm5, xmm2
pcmpeqd xmm5, xmm3
cvtdq2ps xmm6, xmm2
pand xmm2, _epi32_2
pslld xmm2, (31 - 1)
subps xmm0, xmm6
minps xmm0, xmm4
subps xmm4, xmm0
andps xmm0, xmm5
andnps xmm5, xmm4
orps xmm0, xmm5
movaps xmm1, xmm0
mulps xmm0, xmm0
orps xmm1, xmm2
movaps xmm7, xmm0
mulps xmm0, _ps_sincos_p3
addps xmm0, _ps_sincos_p2
mulps xmm0, xmm7
addps xmm0, _ps_sincos_p1
mulps xmm0, xmm7
addps xmm0, _ps_sincos_p0
mulps xmm0, xmm1
ret 16
}
}
/////////////////////////////////////////////////////////////////////////////
// am_cos_ss
__m128 __declspec(naked) __stdcall am_cos_ss(__m128 x)
{
__asm
{
movss xmm1, _ps_am_inv_sign_mask
andps xmm0, xmm1
addss xmm0, _ps_am_pi_o_2
mulss xmm0, _ps_am_2_o_pi
cvttss2si ecx, xmm0
movss xmm5, _ps_am_1
mov edx, ecx
shl edx, (31 - 1)
cvtsi2ss xmm1, ecx
and edx, 0x80000000
and ecx, 0x1
subss xmm0, xmm1
movss xmm6, _sincos_masks[ecx * 4]
minss xmm0, xmm5
movss xmm1, _ps_sincos_p3
subss xmm5, xmm0
andps xmm5, xmm6
movss xmm7, _ps_sincos_p2
andnps xmm6, xmm0
mov [esp - 4], edx
orps xmm5, xmm6
movss xmm0, xmm5
mulss xmm5, xmm5
movss xmm4, _ps_sincos_p1
movss xmm2, xmm5
mulss xmm5, xmm1
movss xmm1, _ps_sincos_p0
addss xmm5, xmm7
mulss xmm5, xmm2
movss xmm3, [esp - 4]
addss xmm5, xmm4
mulss xmm5, xmm2
orps xmm0, xmm3
addss xmm5, xmm1
mulss xmm0, xmm5
ret 16
}
}
/////////////////////////////////////////////////////////////////////////////
// am_cos_ess
__m128 __declspec(naked) __stdcall am_cos_ess(__m128 x) // any x
{
__asm
{
movss xmm1, _ps_am_inv_sign_mask
movss xmm2, _ps_am_pi_o_2
movss xmm3, _ps_am_2_o_pi
andps xmm0, xmm1
addss xmm0, xmm2
mulss xmm0, xmm3
pxor xmm3, xmm3
movd xmm5, _epi32_1
movss xmm4, _ps_am_1
cvttps2dq xmm2, xmm0
pand xmm5, xmm2
movd xmm1, _epi32_2
pcmpeqd xmm5, xmm3
cvtdq2ps xmm6, xmm2
pand xmm2, xmm1
pslld xmm2, (31 - 1)
subss xmm0, xmm6
movss xmm3, _ps_sincos_p3
minss xmm0, xmm4
subss xmm4, xmm0
andps xmm0, xmm5
andnps xmm5, xmm4
orps xmm0, xmm5
movaps xmm1, xmm0
movss xmm4, _ps_sincos_p2
mulss xmm0, xmm0
movss xmm5, _ps_sincos_p1
orps xmm1, xmm2
movaps xmm7, xmm0
mulss xmm0, xmm3
movss xmm6, _ps_sincos_p0
addss xmm0, xmm4
mulss xmm0, xmm7
addss xmm0, xmm5
mulss xmm0, xmm7
addss xmm0, xmm6
mulss xmm0, xmm1
ret 16
}
} |
Partager