00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032 #ifndef __Random123_ars_dot_hpp__
00033 #define __Random123_ars_dot_hpp__
00034
00035 #include "features/compilerfeatures.h"
00036 #include "array.h"
00037
00038 #if R123_USE_AES_NI
00039
00040 #ifndef ARS1xm128i_DEFAULT_ROUNDS
00041 #define ARS1xm128i_DEFAULT_ROUNDS 7
00042 #endif
00043
00045 enum r123_enum_ars1xm128i {ars1xm128i_rounds = ARS1xm128i_DEFAULT_ROUNDS};
00046
00047
00049 typedef struct r123array1xm128i ars1xm128i_ctr_t;
00051 typedef struct r123array1xm128i ars1xm128i_key_t;
00053 typedef struct r123array1xm128i ars1xm128i_ukey_t;
00055 R123_STATIC_INLINE ars1xm128i_key_t ars1xm128ikeyinit(ars1xm128i_ukey_t uk) { return uk; }
00057 R123_STATIC_INLINE ars1xm128i_ctr_t ars1xm128i_R(unsigned int Nrounds, ars1xm128i_ctr_t in, ars1xm128i_key_t k){
00058 __m128i kweyl = _mm_set_epi64x(R123_64BIT(0xBB67AE8584CAA73B),
00059 R123_64BIT(0x9E3779B97F4A7C15));
00060
00061
00062
00063 __m128i kk = k.v[0].m;
00064 __m128i v = _mm_xor_si128(in.v[0].m, kk);
00065 ars1xm128i_ctr_t ret;
00066 R123_ASSERT(Nrounds<=10);
00067 if( Nrounds>1 ){
00068 kk = _mm_add_epi64(kk, kweyl);
00069 v = _mm_aesenc_si128(v, kk);
00070 }
00071 if( Nrounds>2 ){
00072 kk = _mm_add_epi64(kk, kweyl);
00073 v = _mm_aesenc_si128(v, kk);
00074 }
00075 if( Nrounds>3 ){
00076 kk = _mm_add_epi64(kk, kweyl);
00077 v = _mm_aesenc_si128(v, kk);
00078 }
00079 if( Nrounds>4 ){
00080 kk = _mm_add_epi64(kk, kweyl);
00081 v = _mm_aesenc_si128(v, kk);
00082 }
00083 if( Nrounds>5 ){
00084 kk = _mm_add_epi64(kk, kweyl);
00085 v = _mm_aesenc_si128(v, kk);
00086 }
00087 if( Nrounds>6 ){
00088 kk = _mm_add_epi64(kk, kweyl);
00089 v = _mm_aesenc_si128(v, kk);
00090 }
00091 if( Nrounds>7 ){
00092 kk = _mm_add_epi64(kk, kweyl);
00093 v = _mm_aesenc_si128(v, kk);
00094 }
00095 if( Nrounds>8 ){
00096 kk = _mm_add_epi64(kk, kweyl);
00097 v = _mm_aesenc_si128(v, kk);
00098 }
00099 if( Nrounds>9 ){
00100 kk = _mm_add_epi64(kk, kweyl);
00101 v = _mm_aesenc_si128(v, kk);
00102 }
00103 kk = _mm_add_epi64(kk, kweyl);
00104 v = _mm_aesenclast_si128(v, kk);
00105 ret.v[0].m = v;
00106 return ret;
00107 }
00108
00112 #define ars1xm128i(c,k) ars1xm128i_R(ars1xm128i_rounds, c, k)
00113
00115 typedef struct r123array4x32 ars4x32_ctr_t;
00117 typedef struct r123array4x32 ars4x32_key_t;
00119 typedef struct r123array4x32 ars4x32_ukey_t;
00121 enum r123_enum_ars4x32 {ars4x32_rounds = ARS1xm128i_DEFAULT_ROUNDS};
00123 R123_STATIC_INLINE ars4x32_key_t ars4x32keyinit(ars4x32_ukey_t uk) { return uk; }
00125 R123_STATIC_INLINE ars4x32_ctr_t ars4x32_R(unsigned int Nrounds, ars4x32_ctr_t c, ars4x32_key_t k){
00126 ars1xm128i_ctr_t c128;
00127 ars1xm128i_key_t k128;
00128 c128.v[0].m = _mm_set_epi32(c.v[3], c.v[2], c.v[1], c.v[0]);
00129 k128.v[0].m = _mm_set_epi32(k.v[3], k.v[2], k.v[1], k.v[0]);
00130 c128 = ars1xm128i_R(Nrounds, c128, k128);
00131 _mm_storeu_si128((__m128i*)&c.v[0], c128.v[0].m);
00132 return c;
00133 }
00134
00138 #define ars4x32(c,k) ars4x32_R(ars4x32_rounds, c, k)
00139
00140 #ifdef __cplusplus
00141 namespace r123{
00163 template<unsigned int ROUNDS>
00164 struct ARS1xm128i_R{
00165 typedef ars1xm128i_ctr_t ctr_type;
00166 typedef ars1xm128i_key_t key_type;
00167 typedef ars1xm128i_key_t ukey_type;
00168 static const unsigned int rounds=ROUNDS;
00169 R123_FORCE_INLINE(ctr_type operator()(ctr_type ctr, key_type key) const){
00170 return ars1xm128i_R(ROUNDS, ctr, key);
00171 }
00172 };
00173
00178 template<unsigned int ROUNDS>
00179 struct ARS4x32_R{
00180 typedef ars4x32_ctr_t ctr_type;
00181 typedef ars4x32_key_t key_type;
00182 typedef ars4x32_key_t ukey_type;
00183 static const unsigned int rounds=ROUNDS;
00184 R123_FORCE_INLINE(ctr_type operator()(ctr_type ctr, key_type key) const){
00185 return ars4x32_R(ROUNDS, ctr, key);
00186 }
00187 };
00196 typedef ARS1xm128i_R<ars1xm128i_rounds> ARS1xm128i;
00197 typedef ARS4x32_R<ars4x32_rounds> ARS4x32;
00198 }
00199
00200 #endif
00201
00202 #endif
00203
00204 #endif