00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032 #ifndef __Random123_ars_dot_hpp__
00033 #define __Random123_ars_dot_hpp__
00034
00035 #include "features/compilerfeatures.h"
00036 #include "array.h"
00037
00038 #if R123_USE_AES_NI
00039
00040 #ifndef ARS1xm128i_DEFAULT_ROUNDS
00041 #define ARS1xm128i_DEFAULT_ROUNDS 7
00042 #endif
00043
00045 enum {ars1xm128i_rounds = ARS1xm128i_DEFAULT_ROUNDS};
00046
00047
00048 typedef struct r123array1xm128i ars1xm128i_ctr_t;
00049 typedef struct r123array1xm128i ars1xm128i_key_t;
00050 typedef struct r123array1xm128i ars1xm128i_ukey_t;
00051 R123_STATIC_INLINE ars1xm128i_key_t ars1xm128ikeyinit(ars1xm128i_ukey_t uk) { return uk; }
00052 R123_STATIC_INLINE ars1xm128i_ctr_t ars1xm128i_R(unsigned int Nrounds, ars1xm128i_ctr_t in, ars1xm128i_key_t k){
00053 __m128i kweyl = _mm_set_epi64x(R123_64BIT(0xBB67AE8584CAA73B),
00054 R123_64BIT(0x9E3779B97F4A7C15));
00055
00056
00057
00058 __m128i kk = k.v[0].m;
00059 __m128i v = _mm_xor_si128(in.v[0].m, kk);
00060 ars1xm128i_ctr_t ret;
00061 R123_ASSERT(Nrounds<=10);
00062 if( Nrounds>1 ){
00063 kk = _mm_add_epi64(kk, kweyl);
00064 v = _mm_aesenc_si128(v, kk);
00065 }
00066 if( Nrounds>2 ){
00067 kk = _mm_add_epi64(kk, kweyl);
00068 v = _mm_aesenc_si128(v, kk);
00069 }
00070 if( Nrounds>3 ){
00071 kk = _mm_add_epi64(kk, kweyl);
00072 v = _mm_aesenc_si128(v, kk);
00073 }
00074 if( Nrounds>4 ){
00075 kk = _mm_add_epi64(kk, kweyl);
00076 v = _mm_aesenc_si128(v, kk);
00077 }
00078 if( Nrounds>5 ){
00079 kk = _mm_add_epi64(kk, kweyl);
00080 v = _mm_aesenc_si128(v, kk);
00081 }
00082 if( Nrounds>6 ){
00083 kk = _mm_add_epi64(kk, kweyl);
00084 v = _mm_aesenc_si128(v, kk);
00085 }
00086 if( Nrounds>7 ){
00087 kk = _mm_add_epi64(kk, kweyl);
00088 v = _mm_aesenc_si128(v, kk);
00089 }
00090 if( Nrounds>8 ){
00091 kk = _mm_add_epi64(kk, kweyl);
00092 v = _mm_aesenc_si128(v, kk);
00093 }
00094 if( Nrounds>9 ){
00095 kk = _mm_add_epi64(kk, kweyl);
00096 v = _mm_aesenc_si128(v, kk);
00097 }
00098 kk = _mm_add_epi64(kk, kweyl);
00099 v = _mm_aesenclast_si128(v, kk);
00100 ret.v[0].m = v;
00101 return ret;
00102 }
00103
00107 #define ars1xm128i(c,k) ars1xm128i_R(ars1xm128i_rounds, c, k)
00108
00110 typedef struct r123array4x32 ars4x32_ctr_t;
00112 typedef struct r123array4x32 ars4x32_key_t;
00114 typedef struct r123array4x32 ars4x32_ukey_t;
00116 enum {ars4x32_rounds = ARS1xm128i_DEFAULT_ROUNDS};
00118 R123_STATIC_INLINE ars4x32_key_t ars4x32keyinit(ars4x32_ukey_t uk) { return uk; }
00120 R123_STATIC_INLINE ars4x32_ctr_t ars4x32_R(unsigned int Nrounds, ars4x32_ctr_t c, ars4x32_key_t k){
00121 ars1xm128i_ctr_t c128;
00122 ars1xm128i_key_t k128;
00123 c128.v[0].m = _mm_set_epi32(c.v[3], c.v[2], c.v[1], c.v[0]);
00124 k128.v[0].m = _mm_set_epi32(k.v[3], k.v[2], k.v[1], k.v[0]);
00125 c128 = ars1xm128i_R(Nrounds, c128, k128);
00126 _mm_storeu_si128((__m128i*)&c.v[0], c128.v[0].m);
00127 return c;
00128 }
00129
00133 #define ars4x32(c,k) ars4x32_R(ars4x32_rounds, c, k)
00134
00135 #ifdef __cplusplus
00136 namespace r123{
00158 template<unsigned int ROUNDS>
00159 struct ARS1xm128i_R{
00160 typedef ars1xm128i_ctr_t ctr_type;
00161 typedef ars1xm128i_key_t key_type;
00162 typedef ars1xm128i_key_t ukey_type;
00163 static const unsigned int rounds=ROUNDS;
00164 R123_FORCE_INLINE(ctr_type operator()(ctr_type ctr, key_type key) const){
00165 return ars1xm128i_R(ROUNDS, ctr, key);
00166 }
00167 };
00168
00173 template<unsigned int ROUNDS>
00174 struct ARS4x32_R{
00175 typedef ars4x32_ctr_t ctr_type;
00176 typedef ars4x32_key_t key_type;
00177 typedef ars4x32_key_t ukey_type;
00178 static const unsigned int rounds=ROUNDS;
00179 R123_FORCE_INLINE(ctr_type operator()(ctr_type ctr, key_type key) const){
00180 return ars4x32_R(ROUNDS, ctr, key);
00181 }
00182 };
00191 typedef ARS1xm128i_R<ars1xm128i_rounds> ARS1xm128i;
00192 typedef ARS4x32_R<ars4x32_rounds> ARS4x32;
00193 }
00194
00195 #endif
00196
00197 #endif
00198
00199 #endif