32 #ifndef _Random123_sse_dot_h__
33 #define _Random123_sse_dot_h__
37 #if R123_USE_X86INTRIN_H
38 #include <x86intrin.h>
40 #if R123_USE_IA32INTRIN_H
41 #include <ia32intrin.h>
43 #if R123_USE_XMMINTRIN_H
44 #include <xmmintrin.h>
46 #if R123_USE_EMMINTRIN_H
47 #include <emmintrin.h>
49 #if R123_USE_SMMINTRIN_H
50 #include <smmintrin.h>
52 #if R123_USE_WMMINTRIN_H
53 #include <wmmintrin.h>
68 unsigned int eax, ebx, ecx, edx;
69 __asm__ __volatile__ (
"cpuid":
"=a" (eax),
"=b" (ebx),
"=c" (ecx),
"=d" (edx) :
73 #elif R123_USE_CPUID_MSVC
77 return (CPUInfo[2]>>25)&1;
80 #warning "No R123_USE_CPUID_XXX method chosen. haveAESNI will always return false"
93 #if (defined(__ICC) && __ICC<1210) || (defined(_MSC_VER) && !defined(_WIN64))
98 R123_STATIC_INLINE __m128i _mm_set_epi64x(uint64_t v1, uint64_t v0){
105 return _mm_set_epi32(u1.u32[1], u1.u32[0], u0.u32[1], u0.u32[0]);
118 #if !defined(__x86_64__) || defined(_MSC_VER) || defined(__OPEN64__)
124 _mm_store_si128(&u.m, si);
127 #elif defined(__llvm__) || defined(__ICC)
129 return (uint64_t)_mm_cvtsi128_si64(si);
136 return (uint64_t)_mm_cvtsi128_si64x(si);
139 #if defined(__GNUC__) && __GNUC__ < 4
141 R123_STATIC_INLINE __m128 _mm_castsi128_ps(__m128i si){
150 #if R123_USE_CXX11_UNRESTRICTED_UNIONS
161 #if R123_USE_CXX11_EXPLICIT_CONVERSIONS
165 explicit operator bool()
const {
return _bool();}
169 operator const void*()
const{
return _bool()?
this:0;}
171 operator __m128i()
const {
return m;}
175 bool _bool()
const{
return !_mm_testz_si128(
m,
m); }
177 bool _bool()
const{
return 0xf != _mm_movemask_ps(_mm_castsi128_ps(_mm_cmpeq_epi32(
m, _mm_setzero_si128()))); }
183 __m128i zeroone = _mm_set_epi64x(R123_64BIT(0), R123_64BIT(1));
184 c = _mm_add_epi64(c, zeroone);
187 __m128i zerofff = _mm_set_epi64x(0, ~(R123_64BIT(0)));
188 if( R123_BUILTIN_EXPECT(_mm_testz_si128(c,zerofff), 0) ){
189 __m128i onezero = _mm_set_epi64x(R123_64BIT(1), R123_64BIT(0));
190 c = _mm_add_epi64(c, onezero);
193 unsigned mask = _mm_movemask_ps( _mm_castsi128_ps(_mm_cmpeq_epi32(c, _mm_setzero_si128())));
196 if( R123_BUILTIN_EXPECT((mask&0x3) == 0x3, 0) ){
197 __m128i onezero = _mm_set_epi64x(1,0);
198 c = _mm_add_epi64(c, onezero);
206 __m128i incr128 = _mm_set_epi64x(0, n);
207 c = _mm_add_epi64(c, incr128);
211 if((uint64_t)lo64 < n)
212 c = _mm_add_epi64(c, _mm_set_epi64x(1,0));
219 throw std::runtime_error(
"operator<=(unsigned long long, r123m128i) is unimplemented.");}
225 throw std::runtime_error(
"operator<(r123m128i, r123m128i) is unimplemented.");}
227 throw std::runtime_error(
"operator<=(r123m128i, r123m128i) is unimplemented.");}
229 throw std::runtime_error(
"operator>(r123m128i, r123m128i) is unimplemented.");}
231 throw std::runtime_error(
"operator>=(r123m128i, r123m128i) is unimplemented.");}
234 return 0xf==_mm_movemask_ps(_mm_castsi128_ps(_mm_cmpeq_epi32(lhs, rhs))); }
238 r123m128i LHS; LHS.
m=_mm_set_epi64x(0, lhs);
return LHS == rhs; }
246 _mm_storeu_si128(&u.m, m.
m);
247 return os << u.u64[0] <<
" " << u.u64[1];
252 is >> u64[0] >> u64[1];
253 m.
m = _mm_set_epi64x(u64[1], u64[0]);
262 ret.
m = _mm_set_epi32(p32[3], p32[2], p32[1], p32[0]);
static std::ostream & operator<<(std::ostream &os, const r123m128i &m)
Definition: sse.h:241
static bool operator==(const r123m128i &lhs, const r123m128i &rhs)
Definition: sse.h:233
static uint64_t _mm_extract_lo64(__m128i si)
Definition: sse.h:119
static bool operator!=(const r123m128i &lhs, const r123m128i &rhs)
Definition: sse.h:235
static r123m128i & operator++(r123m128i &v)
Definition: sse.h:181
static std::istream & operator>>(std::istream &is, r123m128i &m)
Definition: sse.h:250
static bool operator>(const r123m128i &, const r123m128i &)
Definition: sse.h:228
static bool operator>=(const r123m128i &, const r123m128i &)
Definition: sse.h:230
r123m128i assemble_from_u32< r123m128i >(uint32_t *p32)
Definition: sse.h:260
T assemble_from_u32(uint32_t *p32)
static r123m128i & operator+=(r123m128i &lhs, R123_ULONG_LONG n)
Definition: sse.h:204
static int haveAESNI()
Definition: sse.h:81
static bool operator<=(R123_ULONG_LONG, const r123m128i &)
Definition: sse.h:218
static bool operator<(const r123m128i &, const r123m128i &)
Definition: sse.h:224
r123m128i & operator=(R123_ULONG_LONG n)
Definition: sse.h:160
r123m128i & operator=(const __m128i &rhs)
Definition: sse.h:159
__m128i m
Definition: sse.h:149