42 #include <emmintrin.h>
44 #define RETf inline __m128
45 #define RETi inline __m128i
50 RETf sse_set(
const float &x ) {
return _mm_set1_ps(x); }
51 RETf sse_set(
float x,
float y,
float z,
float w ) {
return _mm_set_ps(x,y,z,w); }
52 RETi sse_set(
const int &x ) {
return _mm_set1_epi32(x); }
53 RETf sse_ld(
const float &x ) {
return _mm_load_ps(&x); }
54 RETf sse_ldu(
const float &x ) {
return _mm_loadu_ps(&x); }
55 RETf sse_str(
float &x,
const __m128 y ) { _mm_store_ps(&x,y);
return y; }
56 RETf sse_str1(
float &x,
const __m128 y ) { _mm_store_ss(&x,y);
return y; }
57 RETf sse_stru(
float &x,
const __m128 y ) { _mm_storeu_ps(&x,y);
return y; }
58 RETf sse_str(
float &x,
const float y ) {
return sse_str(x,sse_set(y)); }
61 RETi sse_add(
const __m128i x,
const __m128i y ) {
return _mm_add_epi32(x,y); }
62 RETf sse_add(
const __m128 x,
const __m128 y ) {
return _mm_add_ps(x,y); }
63 RETf sse_add(
const __m128 x,
const __m128 y,
const __m128 z ) {
64 return sse_add(sse_add(x,y),z); }
65 RETf sse_add(
const __m128 a,
const __m128 b,
const __m128 c,
const __m128 &d ) {
66 return sse_add(sse_add(sse_add(a,b),c),d); }
67 RETf sse_sub(
const __m128 x,
const __m128 y ) {
return _mm_sub_ps(x,y); }
68 RETf sse_mul(
const __m128 x,
const __m128 y ) {
return _mm_mul_ps(x,y); }
69 RETf sse_mul(
const __m128 x,
const float y ) {
return sse_mul(x,sse_set(y)); }
70 RETf sse_mul(
const float x,
const __m128 y ) {
return sse_mul(sse_set(x),y); }
71 RETf sse_inc( __m128 &x,
const __m128 y ) {
return x = sse_add(x,y); }
72 RETf sse_inc(
float &x,
const __m128 y ) { __m128 t=sse_add(sse_ld(x),y);
return sse_str(x,t); }
73 RETf sse_dec( __m128 &x,
const __m128 y ) {
return x = sse_sub(x,y); }
74 RETf sse_dec(
float &x,
const __m128 y ) { __m128 t=sse_sub(sse_ld(x),y);
return sse_str(x,t); }
75 RETf sse_min(
const __m128 x,
const __m128 y ) {
return _mm_min_ps(x,y); }
76 RETf sse_rcp(
const __m128 x ) {
return _mm_rcp_ps(x); }
77 RETf sse_rcpsqrt(
const __m128 x ) {
return _mm_rsqrt_ps(x); }
80 RETf sse_and(
const __m128 x,
const __m128 y ) {
return _mm_and_ps(x,y); }
81 RETi sse_and(
const __m128i x,
const __m128i y ) {
return _mm_and_si128(x,y); }
82 RETf sse_andnot(
const __m128 x,
const __m128 y ) {
return _mm_andnot_ps(x,y); }
83 RETf sse_or(
const __m128 x,
const __m128 y ) {
return _mm_or_ps(x,y); }
84 RETf sse_xor(
const __m128 x,
const __m128 y ) {
return _mm_xor_ps(x,y); }
87 RETf sse_cmpgt(
const __m128 x,
const __m128 y ) {
return _mm_cmpgt_ps(x,y); }
88 RETi sse_cmpgt(
const __m128i x,
const __m128i y ) {
return _mm_cmpgt_epi32(x,y); }
91 RETf sse_cvt(
const __m128i x ) {
return _mm_cvtepi32_ps(x); }
92 RETi sse_cvt(
const __m128 x ) {
return _mm_cvttps_epi32(x); }