00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032 #ifndef _Random123_sse_dot_h__
00033 #define _Random123_sse_dot_h__
00034
00035 #if R123_USE_SSE
00036
00037 #if R123_USE_X86INTRIN_H
00038 #include <x86intrin.h>
00039 #endif
00040 #if R123_USE_IA32INTRIN_H
00041 #include <ia32intrin.h>
00042 #endif
00043 #if R123_USE_XMMINTRIN_H
00044 #include <xmmintrin.h>
00045 #endif
00046 #if R123_USE_EMMINTRIN_H
00047 #include <emmintrin.h>
00048 #endif
00049 #if R123_USE_SMMINTRIN_H
00050 #include <smmintrin.h>
00051 #endif
00052 #if R123_USE_WMMINTRIN_H
00053 #include <wmmintrin.h>
00054 #endif
00055 #if R123_USE_INTRIN_H
00056 #include <intrin.h>
00057 #endif
00058 #ifdef __cplusplus
00059 #include <iostream>
00060 #include <limits>
00061 #include <stdexcept>
00062 #endif
00063
00064 #if R123_USE_ASM_GNU
00065
00066
00067 R123_STATIC_INLINE int haveAESNI(){
00068 unsigned int eax, ebx, ecx, edx;
00069 __asm__ __volatile__ ("cpuid": "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx) :
00070 "a" (1));
00071 return (ecx>>25) & 1;
00072 }
00073 #elif R123_USE_CPUID_MSVC
00074 R123_STATIC_INLINE int haveAESNI(){
00075 int CPUInfo[4];
00076 __cpuid(CPUInfo, 1);
00077 return (CPUInfo[2]>>25)&1;
00078 }
00079 #else
00080 #warning "No R123_USE_CPUID_XXX method chosen. haveAESNI will always return false"
00081 R123_STATIC_INLINE int haveAESNI(){
00082 return 0;
00083 }
00084 #endif
00085
00086
00087
00088
00089
00090
00091
00092
00093 #if (defined(__ICC) && __ICC<1210) || (defined(_MSC_VER) && !defined(_WIN64))
00094
00095
00096
00097
00098 R123_STATIC_INLINE __m128i _mm_set_epi64x(uint64_t v1, uint64_t v0){
00099 union{
00100 uint64_t u64;
00101 uint32_t u32[2];
00102 } u1, u0;
00103 u1.u64 = v1;
00104 u0.u64 = v0;
00105 return _mm_set_epi32(u1.u32[1], u1.u32[0], u0.u32[1], u0.u32[0]);
00106 }
00107 #endif
00108
00109
00110
00111
00112
00113
00114
00115
00116
00117
00118 #if !defined(__x86_64__) || defined(_MSC_VER) || defined(__OPEN64__)
00119 R123_STATIC_INLINE uint64_t _mm_extract_lo64(__m128i si){
00120 union{
00121 uint64_t u64[2];
00122 __m128i m;
00123 }u;
00124 _mm_store_si128(&u.m, si);
00125 return u.u64[0];
00126 }
00127 #elif defined(__llvm__) || defined(__ICC)
00128 R123_STATIC_INLINE uint64_t _mm_extract_lo64(__m128i si){
00129 return (uint64_t)_mm_cvtsi128_si64(si);
00130 }
00131 #else
00132
00133
00134
00135 R123_STATIC_INLINE uint64_t _mm_extract_lo64(__m128i si){
00136 return (uint64_t)_mm_cvtsi128_si64x(si);
00137 }
00138 #endif
00139 #if defined(__GNUC__) && __GNUC__ < 4
00140
00141 R123_STATIC_INLINE __m128 _mm_castsi128_ps(__m128i si){
00142 return (__m128)si;
00143 }
00144 #endif
00145
00146 #ifdef __cplusplus
00147
00148 struct r123m128i{
00149 __m128i m;
00150 #if R123_USE_CXX11_UNRESTRICTED_UNIONS
00151
00152
00153
00154
00155
00156 r123m128i() = default;
00157 r123m128i(__m128i _m): m(_m){}
00158 #endif
00159 r123m128i& operator=(const __m128i& rhs){ m=rhs; return *this;}
00160 r123m128i& operator=(R123_ULONG_LONG n){ m = _mm_set_epi64x(0, n); return *this;}
00161 #if R123_USE_CXX11_EXPLICIT_CONVERSIONS
00162
00163
00164
00165 explicit operator bool() const {return _bool();}
00166 #else
00167
00168
00169 operator const void*() const{return _bool()?this:0;}
00170 #endif
00171 operator __m128i() const {return m;}
00172
00173 private:
00174 #if R123_USE_SSE4_1
00175 bool _bool() const{ return !_mm_testz_si128(m,m); }
00176 #else
00177 bool _bool() const{ return 0xf != _mm_movemask_ps(_mm_castsi128_ps(_mm_cmpeq_epi32(m, _mm_setzero_si128()))); }
00178 #endif
00179 };
00180
00181 R123_STATIC_INLINE r123m128i& operator++(r123m128i& v){
00182 __m128i& c = v.m;
00183 __m128i zeroone = _mm_set_epi64x(R123_64BIT(0), R123_64BIT(1));
00184 c = _mm_add_epi64(c, zeroone);
00185
00186 #if R123_USE_SSE4_1
00187 __m128i zerofff = _mm_set_epi64x(0, ~(R123_64BIT(0)));
00188 if( R123_BUILTIN_EXPECT(_mm_testz_si128(c,zerofff), 0) ){
00189 __m128i onezero = _mm_set_epi64x(R123_64BIT(1), R123_64BIT(0));
00190 c = _mm_add_epi64(c, onezero);
00191 }
00192 #else
00193 unsigned mask = _mm_movemask_ps( _mm_castsi128_ps(_mm_cmpeq_epi32(c, _mm_setzero_si128())));
00194
00195
00196 if( R123_BUILTIN_EXPECT((mask&0x3) == 0x3, 0) ){
00197 __m128i onezero = _mm_set_epi64x(1,0);
00198 c = _mm_add_epi64(c, onezero);
00199 }
00200 #endif
00201 return v;
00202 }
00203
00204 R123_STATIC_INLINE r123m128i& operator+=(r123m128i& lhs, R123_ULONG_LONG n){
00205 __m128i c = lhs.m;
00206 __m128i incr128 = _mm_set_epi64x(0, n);
00207 c = _mm_add_epi64(c, incr128);
00208
00209
00210 int64_t lo64 = _mm_extract_lo64(c);
00211 if((uint64_t)lo64 < n)
00212 c = _mm_add_epi64(c, _mm_set_epi64x(1,0));
00213 lhs.m = c;
00214 return lhs;
00215 }
00216
00217
00218 R123_STATIC_INLINE bool operator<=(R123_ULONG_LONG, const r123m128i &){
00219 throw std::runtime_error("operator<=(unsigned long long, r123m128i) is unimplemented.");}
00220
00221
00222
00223
00224 R123_STATIC_INLINE bool operator<(const r123m128i&, const r123m128i&){
00225 throw std::runtime_error("operator<(r123m128i, r123m128i) is unimplemented.");}
00226 R123_STATIC_INLINE bool operator<=(const r123m128i&, const r123m128i&){
00227 throw std::runtime_error("operator<=(r123m128i, r123m128i) is unimplemented.");}
00228 R123_STATIC_INLINE bool operator>(const r123m128i&, const r123m128i&){
00229 throw std::runtime_error("operator>(r123m128i, r123m128i) is unimplemented.");}
00230 R123_STATIC_INLINE bool operator>=(const r123m128i&, const r123m128i&){
00231 throw std::runtime_error("operator>=(r123m128i, r123m128i) is unimplemented.");}
00232
00233 R123_STATIC_INLINE bool operator==(const r123m128i &lhs, const r123m128i &rhs){
00234 return 0xf==_mm_movemask_ps(_mm_castsi128_ps(_mm_cmpeq_epi32(lhs, rhs))); }
00235 R123_STATIC_INLINE bool operator!=(const r123m128i &lhs, const r123m128i &rhs){
00236 return !(lhs==rhs);}
00237 R123_STATIC_INLINE bool operator==(R123_ULONG_LONG lhs, const r123m128i &rhs){
00238 r123m128i LHS; LHS.m=_mm_set_epi64x(0, lhs); return LHS == rhs; }
00239 R123_STATIC_INLINE bool operator!=(R123_ULONG_LONG lhs, const r123m128i &rhs){
00240 return !(lhs==rhs);}
00241 R123_STATIC_INLINE std::ostream& operator<<(std::ostream& os, const r123m128i& m){
00242 union{
00243 uint64_t u64[2];
00244 __m128i m;
00245 }u;
00246 _mm_storeu_si128(&u.m, m.m);
00247 return os << u.u64[0] << " " << u.u64[1];
00248 }
00249
00250 R123_STATIC_INLINE std::istream& operator>>(std::istream& is, r123m128i& m){
00251 uint64_t u64[2];
00252 is >> u64[0] >> u64[1];
00253 m.m = _mm_set_epi64x(u64[1], u64[0]);
00254 return is;
00255 }
00256
00257 template<typename T> inline T assemble_from_u32(uint32_t *p32);
00258
00259 template <>
00260 inline r123m128i assemble_from_u32<r123m128i>(uint32_t *p32){
00261 r123m128i ret;
00262 ret.m = _mm_set_epi32(p32[3], p32[2], p32[1], p32[0]);
00263 return ret;
00264 }
00265
00266 #else
00267
00268 typedef struct {
00269 __m128i m;
00270 } r123m128i;
00271
00272 #endif
00273
00274 #else
00275 R123_STATIC_INLINE int haveAESNI(){
00276 return 0;
00277 }
00278 #endif
00279
00280 #endif