使用SSE指令优化的数学函数(整理)

王朝other·作者佚名  2006-01-09
窄屏简体版  字體: |||超大  

好多指令不认识...慢慢研究

float _SSE_cos( float x)

{

float temp;

__asm

{

movss xmm0, x

movss xmm1, _ps_am_inv_sign_mask

andps xmm0, xmm1

addss xmm0, _ps_am_pi_o_2

mulss xmm0, _ps_am_2_o_pi

cvttss2si ecx, xmm0

movss xmm5, _ps_am_1

mov edx, ecx

shl edx, (31 - 1)

cvtsi2ss xmm1, ecx

and edx, 0x80000000

and ecx, 0x1

subss xmm0, xmm1

movss xmm6, _sincos_masks[ecx * 4]

minss xmm0, xmm5

movss xmm1, _ps_sincos_p3

subss xmm5, xmm0

andps xmm5, xmm6

movss xmm7, _ps_sincos_p2

andnps xmm6, xmm0

mov temp, edx

orps xmm5, xmm6

movss xmm0, xmm5

mulss xmm5, xmm5

movss xmm4, _ps_sincos_p1

movss xmm2, xmm5

mulss xmm5, xmm1

movss xmm1, _ps_sincos_p0

addss xmm5, xmm7

mulss xmm5, xmm2

movss xmm3, temp

addss xmm5, xmm4

mulss xmm5, xmm2

orps xmm0, xmm3

addss xmm5, xmm1

mulss xmm0, xmm5

movss x, xmm0

}

return x;

}

float _SSE2_cos(float x)

{

__asm

{

movss xmm0, x

movss xmm1, _ps_am_inv_sign_mask

movss xmm2, _ps_am_pi_o_2

movss xmm3, _ps_am_2_o_pi

andps xmm0, xmm1

addss xmm0, xmm2

mulss xmm0, xmm3

pxor xmm3, xmm3

movd xmm5, _epi32_1

movss xmm4, _ps_am_1

cvttps2dq xmm2, xmm0

pand xmm5, xmm2

movd xmm1, _epi32_2

pcmpeqd xmm5, xmm3

cvtdq2ps xmm6, xmm2

pand xmm2, xmm1

pslld xmm2, (31 - 1)

subss xmm0, xmm6

movss xmm3, _ps_sincos_p3

minss xmm0, xmm4

subss xmm4, xmm0

andps xmm0, xmm5

andnps xmm5, xmm4

orps xmm0, xmm5

movaps xmm1, xmm0

movss xmm4, _ps_sincos_p2

mulss xmm0, xmm0

movss xmm5, _ps_sincos_p1

orps xmm1, xmm2

movaps xmm7, xmm0

mulss xmm0, xmm3

movss xmm6, _ps_sincos_p0

addss xmm0, xmm4

mulss xmm0, xmm7

addss xmm0, xmm5

mulss xmm0, xmm7

addss xmm0, xmm6

mulss xmm0, xmm1

movss x, xmm0

}

return x;

}

float _SSE_Sqrt(float x)

{

float root = 0.f;

_asm

{

sqrtss xmm0, x

movss root, xmm0

}

return root;

}

 
 
 
免责声明:本文为网络用户发布,其观点仅代表作者个人观点,与本站无关,本站仅提供信息存储服务。文中陈述内容未经本站证实,其真实性、完整性、及时性本站不作任何保证或承诺,请读者仅作参考,并请自行核实相关内容。
 
 
© 2005- 王朝網路 版權所有 導航