00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032 #ifndef _threefry_dot_h_
00033 #define _threefry_dot_h_
00034 #include "features/compilerfeatures.h"
00035 #include "array.h"
00036
00038
00039
00040
00041
00042
00043
00044
00045
00046
00047
00048
00049
00050
00051
00052
00053
00054
00055
00056
00057
00058
00059
00060
00061
00062
00063
00064
00065
00066 enum{
00067
00068
00069 R_64x4_0_0=14, R_64x4_0_1=16,
00070 R_64x4_1_0=52, R_64x4_1_1=57,
00071 R_64x4_2_0=23, R_64x4_2_1=40,
00072 R_64x4_3_0= 5, R_64x4_3_1=37,
00073 R_64x4_4_0=25, R_64x4_4_1=33,
00074 R_64x4_5_0=46, R_64x4_5_1=12,
00075 R_64x4_6_0=58, R_64x4_6_1=22,
00076 R_64x4_7_0=32, R_64x4_7_1=32
00077 };
00078
00079 enum{
00080
00081
00082
00083
00084
00085
00086 R_64x2_0_0=16,
00087 R_64x2_1_0=42,
00088 R_64x2_2_0=12,
00089 R_64x2_3_0=31,
00090 R_64x2_4_0=16,
00091 R_64x2_5_0=32,
00092 R_64x2_6_0=24,
00093 R_64x2_7_0=21
00094
00095
00096
00097
00098
00099
00100
00101
00102 };
00103
00104 enum{
00105
00106
00107
00108
00109
00110 R_32x4_0_0=10, R_32x4_0_1=26,
00111 R_32x4_1_0=11, R_32x4_1_1=21,
00112 R_32x4_2_0=13, R_32x4_2_1=27,
00113 R_32x4_3_0=23, R_32x4_3_1= 5,
00114 R_32x4_4_0= 6, R_32x4_4_1=20,
00115 R_32x4_5_0=17, R_32x4_5_1=11,
00116 R_32x4_6_0=25, R_32x4_6_1=10,
00117 R_32x4_7_0=18, R_32x4_7_1=20
00118
00119
00120
00121
00122
00123
00124
00125
00126
00127
00128 };
00129
00130 enum{
00131
00132
00133
00134
00135 R_32x2_0_0=13,
00136 R_32x2_1_0=15,
00137 R_32x2_2_0=26,
00138 R_32x2_3_0= 6,
00139 R_32x2_4_0=17,
00140 R_32x2_5_0=29,
00141 R_32x2_6_0=16,
00142 R_32x2_7_0=24
00143
00144
00145
00146
00147
00148
00149
00150
00151
00152 };
00153
00154 enum{
00155 WCNT2=2,
00156 WCNT4=4
00157 };
00158 R123_CUDA_DEVICE R123_STATIC_INLINE R123_FORCE_INLINE(uint64_t RotL_64(uint64_t x, unsigned int N));
00159 R123_CUDA_DEVICE R123_STATIC_INLINE uint64_t RotL_64(uint64_t x, unsigned int N)
00160 {
00161 return (x << (N & 63)) | (x >> ((64-N) & 63));
00162 }
00163
00164 R123_CUDA_DEVICE R123_STATIC_INLINE R123_FORCE_INLINE(uint32_t RotL_32(uint32_t x, unsigned int N));
00165 R123_CUDA_DEVICE R123_STATIC_INLINE uint32_t RotL_32(uint32_t x, unsigned int N)
00166 {
00167 return (x << (N & 31)) | (x >> ((32-N) & 31));
00168 }
00169
00170 #define SKEIN_MK_64(hi32,lo32) ((lo32) + (((uint64_t) (hi32)) << 32))
00171 #define SKEIN_KS_PARITY64 SKEIN_MK_64(0x1BD11BDA,0xA9FC1A22)
00172 #define SKEIN_KS_PARITY32 0x1BD11BDA
00173
00174 #ifndef THREEFRY2x32_DEFAULT_ROUNDS
00175 #define THREEFRY2x32_DEFAULT_ROUNDS 20
00176 #endif
00177
00178 #ifndef THREEFRY2x64_DEFAULT_ROUNDS
00179 #define THREEFRY2x64_DEFAULT_ROUNDS 20
00180 #endif
00181
00182 #ifndef THREEFRY4x32_DEFAULT_ROUNDS
00183 #define THREEFRY4x32_DEFAULT_ROUNDS 20
00184 #endif
00185
00186 #ifndef THREEFRY4x64_DEFAULT_ROUNDS
00187 #define THREEFRY4x64_DEFAULT_ROUNDS 20
00188 #endif
00189
00190 #define _threefry2x_tpl(W) \
00191 typedef struct r123array2x##W threefry2x##W##_ctr_t; \
00192 typedef struct r123array2x##W threefry2x##W##_key_t; \
00193 typedef struct r123array2x##W threefry2x##W##_ukey_t; \
00194 R123_CUDA_DEVICE R123_STATIC_INLINE threefry2x##W##_key_t threefry2x##W##keyinit(threefry2x##W##_ukey_t uk) { return uk; } \
00195 R123_CUDA_DEVICE R123_STATIC_INLINE R123_FORCE_INLINE(threefry2x##W##_ctr_t threefry2x##W##_R(unsigned int Nrounds, threefry2x##W##_ctr_t in, threefry2x##W##_key_t k)); \
00196 R123_CUDA_DEVICE R123_STATIC_INLINE \
00197 threefry2x##W##_ctr_t threefry2x##W##_R(unsigned int Nrounds, threefry2x##W##_ctr_t in, threefry2x##W##_key_t k){ \
00198 threefry2x##W##_ctr_t X; \
00199 uint##W##_t ks[WCNT2+1]; \
00200 int i; \
00201 R123_ASSERT(Nrounds<=32); \
00202 ks[WCNT2] = SKEIN_KS_PARITY##W; \
00203 for (i=0;i < WCNT2; i++) \
00204 { \
00205 ks[i] = k.v[i]; \
00206 X.v[i] = in.v[i]; \
00207 ks[WCNT2] ^= k.v[i]; \
00208 } \
00209 \
00210 \
00211 X.v[0] += ks[0]; X.v[1] += ks[1]; \
00212 \
00213 if(Nrounds>0){ X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_0_0); X.v[1] ^= X.v[0]; } \
00214 if(Nrounds>1){ X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_1_0); X.v[1] ^= X.v[0]; } \
00215 if(Nrounds>2){ X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_2_0); X.v[1] ^= X.v[0]; } \
00216 if(Nrounds>3){ X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_3_0); X.v[1] ^= X.v[0]; } \
00217 if(Nrounds>3){ \
00218 \
00219 X.v[0] += ks[1]; X.v[1] += ks[2]; \
00220 X.v[1] += 1; \
00221 } \
00222 if(Nrounds>4){ X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_4_0); X.v[1] ^= X.v[0]; } \
00223 if(Nrounds>5){ X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_5_0); X.v[1] ^= X.v[0]; } \
00224 if(Nrounds>6){ X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_6_0); X.v[1] ^= X.v[0]; } \
00225 if(Nrounds>7){ X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_7_0); X.v[1] ^= X.v[0]; } \
00226 if(Nrounds>7){ \
00227 \
00228 X.v[0] += ks[2]; X.v[1] += ks[0]; \
00229 X.v[1] += 2; \
00230 } \
00231 if(Nrounds>8){ X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_0_0); X.v[1] ^= X.v[0]; } \
00232 if(Nrounds>9){ X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_1_0); X.v[1] ^= X.v[0]; } \
00233 if(Nrounds>10){ X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_2_0); X.v[1] ^= X.v[0]; } \
00234 if(Nrounds>11){ X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_3_0); X.v[1] ^= X.v[0]; } \
00235 if(Nrounds>11){ \
00236 \
00237 X.v[0] += ks[0]; X.v[1] += ks[1]; \
00238 X.v[1] += 3; \
00239 } \
00240 if(Nrounds>12){ X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_4_0); X.v[1] ^= X.v[0]; } \
00241 if(Nrounds>13){ X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_5_0); X.v[1] ^= X.v[0]; } \
00242 if(Nrounds>14){ X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_6_0); X.v[1] ^= X.v[0]; } \
00243 if(Nrounds>15){ X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_7_0); X.v[1] ^= X.v[0]; } \
00244 if(Nrounds>15){ \
00245 \
00246 X.v[0] += ks[1]; X.v[1] += ks[2]; \
00247 X.v[1] += 4; \
00248 } \
00249 if(Nrounds>16){ X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_0_0); X.v[1] ^= X.v[0]; } \
00250 if(Nrounds>17){ X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_1_0); X.v[1] ^= X.v[0]; } \
00251 if(Nrounds>18){ X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_2_0); X.v[1] ^= X.v[0]; } \
00252 if(Nrounds>19){ X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_3_0); X.v[1] ^= X.v[0]; } \
00253 if(Nrounds>19){ \
00254 \
00255 X.v[0] += ks[2]; X.v[1] += ks[0]; \
00256 X.v[1] += 5; \
00257 } \
00258 if(Nrounds>20){ X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_0_0); X.v[1] ^= X.v[0]; } \
00259 if(Nrounds>21){ X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_1_0); X.v[1] ^= X.v[0]; } \
00260 if(Nrounds>22){ X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_2_0); X.v[1] ^= X.v[0]; } \
00261 if(Nrounds>23){ X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_3_0); X.v[1] ^= X.v[0]; } \
00262 if(Nrounds>23){ \
00263 \
00264 X.v[0] += ks[0]; X.v[1] += ks[1]; \
00265 X.v[1] += 6; \
00266 } \
00267 if(Nrounds>24){ X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_4_0); X.v[1] ^= X.v[0]; } \
00268 if(Nrounds>25){ X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_5_0); X.v[1] ^= X.v[0]; } \
00269 if(Nrounds>26){ X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_6_0); X.v[1] ^= X.v[0]; } \
00270 if(Nrounds>27){ X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_7_0); X.v[1] ^= X.v[0]; } \
00271 if(Nrounds>27){ \
00272 \
00273 X.v[0] += ks[1]; X.v[1] += ks[2]; \
00274 X.v[1] += 7; \
00275 } \
00276 if(Nrounds>28){ X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_0_0); X.v[1] ^= X.v[0]; } \
00277 if(Nrounds>29){ X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_1_0); X.v[1] ^= X.v[0]; } \
00278 if(Nrounds>30){ X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_2_0); X.v[1] ^= X.v[0]; } \
00279 if(Nrounds>31){ X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_3_0); X.v[1] ^= X.v[0]; } \
00280 if(Nrounds>31){ \
00281 \
00282 X.v[0] += ks[2]; X.v[1] += ks[0]; \
00283 X.v[1] += 8; \
00284 } \
00285 return X; \
00286 } \
00287 \
00288 enum { threefry2x##W##_rounds = THREEFRY2x##W##_DEFAULT_ROUNDS }; \
00289 R123_CUDA_DEVICE R123_STATIC_INLINE R123_FORCE_INLINE(threefry2x##W##_ctr_t threefry2x##W(threefry2x##W##_ctr_t in, threefry2x##W##_key_t k)); \
00290 R123_CUDA_DEVICE R123_STATIC_INLINE \
00291 threefry2x##W##_ctr_t threefry2x##W(threefry2x##W##_ctr_t in, threefry2x##W##_key_t k){ \
00292 return threefry2x##W##_R(threefry2x##W##_rounds, in, k); \
00293 }
00294
00295
00296 #define _threefry4x_tpl(W) \
00297 typedef struct r123array4x##W threefry4x##W##_ctr_t; \
00298 typedef struct r123array4x##W threefry4x##W##_key_t; \
00299 typedef struct r123array4x##W threefry4x##W##_ukey_t; \
00300 R123_CUDA_DEVICE R123_STATIC_INLINE threefry4x##W##_key_t threefry4x##W##keyinit(threefry4x##W##_ukey_t uk) { return uk; } \
00301 R123_CUDA_DEVICE R123_STATIC_INLINE R123_FORCE_INLINE(threefry4x##W##_ctr_t threefry4x##W##_R(unsigned int Nrounds, threefry4x##W##_ctr_t in, threefry4x##W##_key_t k)); \
00302 R123_CUDA_DEVICE R123_STATIC_INLINE \
00303 threefry4x##W##_ctr_t threefry4x##W##_R(unsigned int Nrounds, threefry4x##W##_ctr_t in, threefry4x##W##_key_t k){ \
00304 threefry4x##W##_ctr_t X; \
00305 uint##W##_t ks[WCNT4+1]; \
00306 int i; \
00307 R123_ASSERT(Nrounds<=72); \
00308 ks[WCNT4] = SKEIN_KS_PARITY##W; \
00309 for (i=0;i < WCNT4; i++) \
00310 { \
00311 ks[i] = k.v[i]; \
00312 X.v[i] = in.v[i]; \
00313 ks[WCNT4] ^= k.v[i]; \
00314 } \
00315 \
00316 \
00317 X.v[0] += ks[0]; X.v[1] += ks[1]; X.v[2] += ks[2]; X.v[3] += ks[3]; \
00318 \
00319 if(Nrounds>0){ \
00320 X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_0_0); X.v[1] ^= X.v[0]; \
00321 X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_0_1); X.v[3] ^= X.v[2]; \
00322 } \
00323 if(Nrounds>1){ \
00324 X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_1_0); X.v[3] ^= X.v[0]; \
00325 X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_1_1); X.v[1] ^= X.v[2]; \
00326 } \
00327 if(Nrounds>2){ \
00328 X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_2_0); X.v[1] ^= X.v[0]; \
00329 X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_2_1); X.v[3] ^= X.v[2]; \
00330 } \
00331 if(Nrounds>3){ \
00332 X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_3_0); X.v[3] ^= X.v[0]; \
00333 X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_3_1); X.v[1] ^= X.v[2]; \
00334 } \
00335 if(Nrounds>3){ \
00336 \
00337 X.v[0] += ks[1]; X.v[1] += ks[2]; X.v[2] += ks[3]; X.v[3] += ks[4]; \
00338 X.v[WCNT4-1] += 1; \
00339 } \
00340 \
00341 if(Nrounds>4){ \
00342 X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_4_0); X.v[1] ^= X.v[0]; \
00343 X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_4_1); X.v[3] ^= X.v[2]; \
00344 } \
00345 if(Nrounds>5){ \
00346 X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_5_0); X.v[3] ^= X.v[0]; \
00347 X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_5_1); X.v[1] ^= X.v[2]; \
00348 } \
00349 if(Nrounds>6){ \
00350 X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_6_0); X.v[1] ^= X.v[0]; \
00351 X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_6_1); X.v[3] ^= X.v[2]; \
00352 } \
00353 if(Nrounds>7){ \
00354 X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_7_0); X.v[3] ^= X.v[0]; \
00355 X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_7_1); X.v[1] ^= X.v[2]; \
00356 } \
00357 if(Nrounds>7){ \
00358 \
00359 X.v[0] += ks[2]; X.v[1] += ks[3]; X.v[2] += ks[4]; X.v[3] += ks[0]; \
00360 X.v[WCNT4-1] += 2; \
00361 } \
00362 \
00363 if(Nrounds>8){ \
00364 X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_0_0); X.v[1] ^= X.v[0]; \
00365 X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_0_1); X.v[3] ^= X.v[2]; \
00366 } \
00367 if(Nrounds>9){ \
00368 X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_1_0); X.v[3] ^= X.v[0]; \
00369 X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_1_1); X.v[1] ^= X.v[2]; \
00370 } \
00371 if(Nrounds>10){ \
00372 X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_2_0); X.v[1] ^= X.v[0]; \
00373 X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_2_1); X.v[3] ^= X.v[2]; \
00374 } \
00375 if(Nrounds>11){ \
00376 X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_3_0); X.v[3] ^= X.v[0]; \
00377 X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_3_1); X.v[1] ^= X.v[2]; \
00378 } \
00379 if(Nrounds>11){ \
00380 \
00381 X.v[0] += ks[3]; X.v[1] += ks[4]; X.v[2] += ks[0]; X.v[3] += ks[1]; \
00382 X.v[WCNT4-1] += 3; \
00383 } \
00384 \
00385 if(Nrounds>12){ \
00386 X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_4_0); X.v[1] ^= X.v[0]; \
00387 X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_4_1); X.v[3] ^= X.v[2]; \
00388 } \
00389 if(Nrounds>13){ \
00390 X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_5_0); X.v[3] ^= X.v[0]; \
00391 X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_5_1); X.v[1] ^= X.v[2]; \
00392 } \
00393 if(Nrounds>14){ \
00394 X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_6_0); X.v[1] ^= X.v[0]; \
00395 X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_6_1); X.v[3] ^= X.v[2]; \
00396 } \
00397 if(Nrounds>15){ \
00398 X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_7_0); X.v[3] ^= X.v[0]; \
00399 X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_7_1); X.v[1] ^= X.v[2]; \
00400 } \
00401 if(Nrounds>15){ \
00402 \
00403 X.v[0] += ks[4]; X.v[1] += ks[0]; X.v[2] += ks[1]; X.v[3] += ks[2]; \
00404 X.v[WCNT4-1] += 4; \
00405 } \
00406 \
00407 if(Nrounds>16){ \
00408 X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_0_0); X.v[1] ^= X.v[0]; \
00409 X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_0_1); X.v[3] ^= X.v[2]; \
00410 } \
00411 if(Nrounds>17){ \
00412 X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_1_0); X.v[3] ^= X.v[0]; \
00413 X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_1_1); X.v[1] ^= X.v[2]; \
00414 } \
00415 if(Nrounds>18){ \
00416 X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_2_0); X.v[1] ^= X.v[0]; \
00417 X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_2_1); X.v[3] ^= X.v[2]; \
00418 } \
00419 if(Nrounds>19){ \
00420 X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_3_0); X.v[3] ^= X.v[0]; \
00421 X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_3_1); X.v[1] ^= X.v[2]; \
00422 } \
00423 if(Nrounds>19){ \
00424 \
00425 X.v[0] += ks[0]; X.v[1] += ks[1]; X.v[2] += ks[2]; X.v[3] += ks[3]; \
00426 X.v[WCNT4-1] += 5; \
00427 } \
00428 \
00429 if(Nrounds>20){ \
00430 X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_4_0); X.v[1] ^= X.v[0]; \
00431 X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_4_1); X.v[3] ^= X.v[2]; \
00432 } \
00433 if(Nrounds>21){ \
00434 X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_5_0); X.v[3] ^= X.v[0]; \
00435 X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_5_1); X.v[1] ^= X.v[2]; \
00436 } \
00437 if(Nrounds>22){ \
00438 X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_6_0); X.v[1] ^= X.v[0]; \
00439 X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_6_1); X.v[3] ^= X.v[2]; \
00440 } \
00441 if(Nrounds>23){ \
00442 X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_7_0); X.v[3] ^= X.v[0]; \
00443 X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_7_1); X.v[1] ^= X.v[2]; \
00444 } \
00445 if(Nrounds>23){ \
00446 \
00447 X.v[0] += ks[1]; X.v[1] += ks[2]; X.v[2] += ks[3]; X.v[3] += ks[4]; \
00448 X.v[WCNT4-1] += 6; \
00449 } \
00450 \
00451 if(Nrounds>24){ \
00452 X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_0_0); X.v[1] ^= X.v[0]; \
00453 X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_0_1); X.v[3] ^= X.v[2]; \
00454 } \
00455 if(Nrounds>25){ \
00456 X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_1_0); X.v[3] ^= X.v[0]; \
00457 X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_1_1); X.v[1] ^= X.v[2]; \
00458 } \
00459 if(Nrounds>26){ \
00460 X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_2_0); X.v[1] ^= X.v[0]; \
00461 X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_2_1); X.v[3] ^= X.v[2]; \
00462 } \
00463 if(Nrounds>27){ \
00464 X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_3_0); X.v[3] ^= X.v[0]; \
00465 X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_3_1); X.v[1] ^= X.v[2]; \
00466 } \
00467 if(Nrounds>27){ \
00468 \
00469 X.v[0] += ks[2]; X.v[1] += ks[3]; X.v[2] += ks[4]; X.v[3] += ks[0]; \
00470 X.v[WCNT4-1] += 7; \
00471 } \
00472 \
00473 if(Nrounds>28){ \
00474 X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_4_0); X.v[1] ^= X.v[0]; \
00475 X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_4_1); X.v[3] ^= X.v[2]; \
00476 } \
00477 if(Nrounds>29){ \
00478 X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_5_0); X.v[3] ^= X.v[0]; \
00479 X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_5_1); X.v[1] ^= X.v[2]; \
00480 } \
00481 if(Nrounds>30){ \
00482 X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_6_0); X.v[1] ^= X.v[0]; \
00483 X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_6_1); X.v[3] ^= X.v[2]; \
00484 } \
00485 if(Nrounds>31){ \
00486 X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_7_0); X.v[3] ^= X.v[0]; \
00487 X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_7_1); X.v[1] ^= X.v[2]; \
00488 } \
00489 if(Nrounds>31){ \
00490 \
00491 X.v[0] += ks[3]; X.v[1] += ks[4]; X.v[2] += ks[0]; X.v[3] += ks[1]; \
00492 X.v[WCNT4-1] += 8; \
00493 } \
00494 \
00495 if(Nrounds>32){ \
00496 X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_0_0); X.v[1] ^= X.v[0]; \
00497 X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_0_1); X.v[3] ^= X.v[2]; \
00498 } \
00499 if(Nrounds>33){ \
00500 X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_1_0); X.v[3] ^= X.v[0]; \
00501 X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_1_1); X.v[1] ^= X.v[2]; \
00502 } \
00503 if(Nrounds>34){ \
00504 X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_2_0); X.v[1] ^= X.v[0]; \
00505 X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_2_1); X.v[3] ^= X.v[2]; \
00506 } \
00507 if(Nrounds>35){ \
00508 X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_3_0); X.v[3] ^= X.v[0]; \
00509 X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_3_1); X.v[1] ^= X.v[2]; \
00510 } \
00511 if(Nrounds>35){ \
00512 \
00513 X.v[0] += ks[4]; X.v[1] += ks[0]; X.v[2] += ks[1]; X.v[3] += ks[2]; \
00514 X.v[WCNT4-1] += 9; \
00515 } \
00516 \
00517 if(Nrounds>36){ \
00518 X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_4_0); X.v[1] ^= X.v[0]; \
00519 X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_4_1); X.v[3] ^= X.v[2]; \
00520 } \
00521 if(Nrounds>37){ \
00522 X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_5_0); X.v[3] ^= X.v[0]; \
00523 X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_5_1); X.v[1] ^= X.v[2]; \
00524 } \
00525 if(Nrounds>38){ \
00526 X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_6_0); X.v[1] ^= X.v[0]; \
00527 X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_6_1); X.v[3] ^= X.v[2]; \
00528 } \
00529 if(Nrounds>39){ \
00530 X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_7_0); X.v[3] ^= X.v[0]; \
00531 X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_7_1); X.v[1] ^= X.v[2]; \
00532 } \
00533 if(Nrounds>39){ \
00534 \
00535 X.v[0] += ks[0]; X.v[1] += ks[1]; X.v[2] += ks[2]; X.v[3] += ks[3]; \
00536 X.v[WCNT4-1] += 10; \
00537 } \
00538 \
00539 if(Nrounds>40){ \
00540 X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_0_0); X.v[1] ^= X.v[0]; \
00541 X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_0_1); X.v[3] ^= X.v[2]; \
00542 } \
00543 if(Nrounds>41){ \
00544 X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_1_0); X.v[3] ^= X.v[0]; \
00545 X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_1_1); X.v[1] ^= X.v[2]; \
00546 } \
00547 if(Nrounds>42){ \
00548 X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_2_0); X.v[1] ^= X.v[0]; \
00549 X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_2_1); X.v[3] ^= X.v[2]; \
00550 } \
00551 if(Nrounds>43){ \
00552 X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_3_0); X.v[3] ^= X.v[0]; \
00553 X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_3_1); X.v[1] ^= X.v[2]; \
00554 } \
00555 if(Nrounds>43){ \
00556 \
00557 X.v[0] += ks[1]; X.v[1] += ks[2]; X.v[2] += ks[3]; X.v[3] += ks[4]; \
00558 X.v[WCNT4-1] += 11; \
00559 } \
00560 \
00561 if(Nrounds>44){ \
00562 X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_4_0); X.v[1] ^= X.v[0]; \
00563 X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_4_1); X.v[3] ^= X.v[2]; \
00564 } \
00565 if(Nrounds>45){ \
00566 X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_5_0); X.v[3] ^= X.v[0]; \
00567 X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_5_1); X.v[1] ^= X.v[2]; \
00568 } \
00569 if(Nrounds>46){ \
00570 X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_6_0); X.v[1] ^= X.v[0]; \
00571 X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_6_1); X.v[3] ^= X.v[2]; \
00572 } \
00573 if(Nrounds>47){ \
00574 X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_7_0); X.v[3] ^= X.v[0]; \
00575 X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_7_1); X.v[1] ^= X.v[2]; \
00576 } \
00577 if(Nrounds>47){ \
00578 \
00579 X.v[0] += ks[2]; X.v[1] += ks[3]; X.v[2] += ks[4]; X.v[3] += ks[0]; \
00580 X.v[WCNT4-1] += 12; \
00581 } \
00582 \
00583 if(Nrounds>48){ \
00584 X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_0_0); X.v[1] ^= X.v[0]; \
00585 X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_0_1); X.v[3] ^= X.v[2]; \
00586 } \
00587 if(Nrounds>49){ \
00588 X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_1_0); X.v[3] ^= X.v[0]; \
00589 X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_1_1); X.v[1] ^= X.v[2]; \
00590 } \
00591 if(Nrounds>50){ \
00592 X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_2_0); X.v[1] ^= X.v[0]; \
00593 X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_2_1); X.v[3] ^= X.v[2]; \
00594 } \
00595 if(Nrounds>51){ \
00596 X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_3_0); X.v[3] ^= X.v[0]; \
00597 X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_3_1); X.v[1] ^= X.v[2]; \
00598 } \
00599 if(Nrounds>51){ \
00600 \
00601 X.v[0] += ks[3]; X.v[1] += ks[4]; X.v[2] += ks[0]; X.v[3] += ks[1]; \
00602 X.v[WCNT4-1] += 13; \
00603 } \
00604 \
00605 if(Nrounds>52){ \
00606 X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_4_0); X.v[1] ^= X.v[0]; \
00607 X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_4_1); X.v[3] ^= X.v[2]; \
00608 } \
00609 if(Nrounds>53){ \
00610 X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_5_0); X.v[3] ^= X.v[0]; \
00611 X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_5_1); X.v[1] ^= X.v[2]; \
00612 } \
00613 if(Nrounds>54){ \
00614 X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_6_0); X.v[1] ^= X.v[0]; \
00615 X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_6_1); X.v[3] ^= X.v[2]; \
00616 } \
00617 if(Nrounds>55){ \
00618 X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_7_0); X.v[3] ^= X.v[0]; \
00619 X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_7_1); X.v[1] ^= X.v[2]; \
00620 } \
00621 if(Nrounds>55){ \
00622 \
00623 X.v[0] += ks[4]; X.v[1] += ks[0]; X.v[2] += ks[1]; X.v[3] += ks[2]; \
00624 X.v[WCNT4-1] += 14; \
00625 } \
00626 \
00627 if(Nrounds>56){ \
00628 X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_0_0); X.v[1] ^= X.v[0]; \
00629 X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_0_1); X.v[3] ^= X.v[2]; \
00630 } \
00631 if(Nrounds>57){ \
00632 X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_1_0); X.v[3] ^= X.v[0]; \
00633 X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_1_1); X.v[1] ^= X.v[2]; \
00634 } \
00635 if(Nrounds>58){ \
00636 X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_2_0); X.v[1] ^= X.v[0]; \
00637 X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_2_1); X.v[3] ^= X.v[2]; \
00638 } \
00639 if(Nrounds>59){ \
00640 X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_3_0); X.v[3] ^= X.v[0]; \
00641 X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_3_1); X.v[1] ^= X.v[2]; \
00642 } \
00643 if(Nrounds>59){ \
00644 \
00645 X.v[0] += ks[0]; X.v[1] += ks[1]; X.v[2] += ks[2]; X.v[3] += ks[3]; \
00646 X.v[WCNT4-1] += 15; \
00647 } \
00648 \
00649 if(Nrounds>60){ \
00650 X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_4_0); X.v[1] ^= X.v[0]; \
00651 X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_4_1); X.v[3] ^= X.v[2]; \
00652 } \
00653 if(Nrounds>61){ \
00654 X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_5_0); X.v[3] ^= X.v[0]; \
00655 X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_5_1); X.v[1] ^= X.v[2]; \
00656 } \
00657 if(Nrounds>62){ \
00658 X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_6_0); X.v[1] ^= X.v[0]; \
00659 X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_6_1); X.v[3] ^= X.v[2]; \
00660 } \
00661 if(Nrounds>63){ \
00662 X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_7_0); X.v[3] ^= X.v[0]; \
00663 X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_7_1); X.v[1] ^= X.v[2]; \
00664 } \
00665 if(Nrounds>63){ \
00666 \
00667 X.v[0] += ks[1]; X.v[1] += ks[2]; X.v[2] += ks[3]; X.v[3] += ks[4]; \
00668 X.v[WCNT4-1] += 16; \
00669 } \
00670 \
00671 if(Nrounds>64){ \
00672 X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_0_0); X.v[1] ^= X.v[0]; \
00673 X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_0_1); X.v[3] ^= X.v[2]; \
00674 } \
00675 if(Nrounds>65){ \
00676 X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_1_0); X.v[3] ^= X.v[0]; \
00677 X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_1_1); X.v[1] ^= X.v[2]; \
00678 } \
00679 if(Nrounds>66){ \
00680 X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_2_0); X.v[1] ^= X.v[0]; \
00681 X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_2_1); X.v[3] ^= X.v[2]; \
00682 } \
00683 if(Nrounds>67){ \
00684 X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_3_0); X.v[3] ^= X.v[0]; \
00685 X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_3_1); X.v[1] ^= X.v[2]; \
00686 } \
00687 if(Nrounds>67){ \
00688 \
00689 X.v[0] += ks[2]; X.v[1] += ks[3]; X.v[2] += ks[4]; X.v[3] += ks[0]; \
00690 X.v[WCNT4-1] += 17; \
00691 } \
00692 \
00693 if(Nrounds>68){ \
00694 X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_4_0); X.v[1] ^= X.v[0]; \
00695 X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_4_1); X.v[3] ^= X.v[2]; \
00696 } \
00697 if(Nrounds>69){ \
00698 X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_5_0); X.v[3] ^= X.v[0]; \
00699 X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_5_1); X.v[1] ^= X.v[2]; \
00700 } \
00701 if(Nrounds>70){ \
00702 X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_6_0); X.v[1] ^= X.v[0]; \
00703 X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_6_1); X.v[3] ^= X.v[2]; \
00704 } \
00705 if(Nrounds>71){ \
00706 X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_7_0); X.v[3] ^= X.v[0]; \
00707 X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_7_1); X.v[1] ^= X.v[2]; \
00708 } \
00709 if(Nrounds>71){ \
00710 \
00711 X.v[0] += ks[3]; X.v[1] += ks[4]; X.v[2] += ks[0]; X.v[3] += ks[1]; \
00712 X.v[WCNT4-1] += 18; \
00713 } \
00714 \
00715 return X; \
00716 } \
00717 \
00718 enum { threefry4x##W##_rounds = THREEFRY4x##W##_DEFAULT_ROUNDS }; \
00719 R123_CUDA_DEVICE R123_STATIC_INLINE R123_FORCE_INLINE(threefry4x##W##_ctr_t threefry4x##W(threefry4x##W##_ctr_t in, threefry4x##W##_key_t k)); \
00720 R123_CUDA_DEVICE R123_STATIC_INLINE \
00721 threefry4x##W##_ctr_t threefry4x##W(threefry4x##W##_ctr_t in, threefry4x##W##_key_t k){ \
00722 return threefry4x##W##_R(threefry4x##W##_rounds, in, k); \
00723 }
00724
00726 _threefry2x_tpl(64)
00727 _threefry2x_tpl(32)
00728 _threefry4x_tpl(64)
00729 _threefry4x_tpl(32)
00730
00731
00732
00733 #define threefry2x32(c,k) threefry2x32_R(threefry2x32_rounds, c, k)
00734 #define threefry4x32(c,k) threefry4x32_R(threefry4x32_rounds, c, k)
00735 #define threefry2x64(c,k) threefry2x64_R(threefry2x64_rounds, c, k)
00736 #define threefry4x64(c,k) threefry4x64_R(threefry4x64_rounds, c, k)
00737
00738 #ifdef __cplusplus
00739
00740 #define _threefryNxWclass_tpl(NxW) \
00741 namespace r123{ \
00742 template<unsigned int R> \
00743 struct Threefry##NxW##_R{ \
00744 typedef threefry##NxW##_ctr_t ctr_type; \
00745 typedef threefry##NxW##_key_t key_type; \
00746 typedef threefry##NxW##_key_t ukey_type; \
00747 static const unsigned int rounds=R; \
00748 inline R123_CUDA_DEVICE R123_FORCE_INLINE(ctr_type operator()(ctr_type ctr, key_type key)){ \
00749 R123_STATIC_ASSERT(R<=72, "threefry is only unrolled up to 72 rounds\n"); \
00750 return threefry##NxW##_R(R, ctr, key); \
00751 } \
00752 }; \
00753 typedef Threefry##NxW##_R<threefry##NxW##_rounds> Threefry##NxW; \
00754 } // namespace r123
00755
00758 _threefryNxWclass_tpl(2x32)
00759 _threefryNxWclass_tpl(4x32)
00760 _threefryNxWclass_tpl(2x64)
00761 _threefryNxWclass_tpl(4x64)
00762
00763
00764
00765
00862 #endif
00863
00864 #endif