Go to the documentation of this file.00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032 #ifndef _r123array_dot_h__
00033 #define _r123array_dot_h__
00034 #include "features/compilerfeatures.h"
00035 #include "features/sse.h"
00036
00037 #ifndef __cplusplus
00038 #define CXXMETHODS(_N, W, T)
00039 #define CXXOVERLOADS(_N, W, T)
00040 #else
00041
00042 #include <stddef.h>
00043 #include <algorithm>
00044 #include <stdexcept>
00045 #include <iterator>
00046 #include <limits>
00047 #include <iostream>
00048
00068 template <typename value_type>
00069 inline R123_CUDA_DEVICE value_type assemble_from_u32(uint32_t *p32){
00070 value_type v=0;
00071 for(size_t i=0; i<(3+sizeof(value_type))/4; ++i)
00072 v |= ((value_type)(*p32++)) << (32*i);
00073 return v;
00074 }
00075
00076
00077 #define CXXMETHODS(_N, W, T) \
00078 typedef T value_type; \
00079 typedef T* iterator; \
00080 typedef const T* const_iterator; \
00081 typedef value_type& reference; \
00082 typedef const value_type& const_reference; \
00083 typedef size_t size_type; \
00084 typedef ptrdiff_t difference_type; \
00085 typedef T* pointer; \
00086 typedef const T* const_pointer; \
00087 typedef std::reverse_iterator<iterator> reverse_iterator; \
00088 typedef std::reverse_iterator<const_iterator> const_reverse_iterator; \
00089 \
00090 enum {static_size = _N}; \
00091 R123_CUDA_DEVICE reference operator[](size_type i){return v[i];} \
00092 R123_CUDA_DEVICE const_reference operator[](size_type i) const {return v[i];} \
00093 R123_CUDA_DEVICE reference at(size_type i){ if(i >= _N) R123_THROW(std::out_of_range("array index out of range")); return (*this)[i]; } \
00094 R123_CUDA_DEVICE const_reference at(size_type i) const { if(i >= _N) R123_THROW(std::out_of_range("array index out of range")); return (*this)[i]; } \
00095 R123_CUDA_DEVICE size_type size() const { return _N; } \
00096 R123_CUDA_DEVICE size_type max_size() const { return _N; } \
00097 R123_CUDA_DEVICE bool empty() const { return _N==0; }; \
00098 R123_CUDA_DEVICE iterator begin() { return &v[0]; } \
00099 R123_CUDA_DEVICE iterator end() { return &v[_N]; } \
00100 R123_CUDA_DEVICE const_iterator begin() const { return &v[0]; } \
00101 R123_CUDA_DEVICE const_iterator end() const { return &v[_N]; } \
00102 R123_CUDA_DEVICE const_iterator cbegin() const { return &v[0]; } \
00103 R123_CUDA_DEVICE const_iterator cend() const { return &v[_N]; } \
00104 R123_CUDA_DEVICE reverse_iterator rbegin(){ return reverse_iterator(end()); } \
00105 R123_CUDA_DEVICE const_reverse_iterator rbegin() const{ return const_reverse_iterator(end()); } \
00106 R123_CUDA_DEVICE reverse_iterator rend(){ return reverse_iterator(begin()); } \
00107 R123_CUDA_DEVICE const_reverse_iterator rend() const{ return const_reverse_iterator(begin()); } \
00108 R123_CUDA_DEVICE const_reverse_iterator crbegin() const{ return const_reverse_iterator(cend()); } \
00109 R123_CUDA_DEVICE const_reverse_iterator crend() const{ return const_reverse_iterator(cbegin()); } \
00110 R123_CUDA_DEVICE pointer data(){ return &v[0]; } \
00111 R123_CUDA_DEVICE const_pointer data() const{ return &v[0]; } \
00112 R123_CUDA_DEVICE reference front(){ return v[0]; } \
00113 R123_CUDA_DEVICE const_reference front() const{ return v[0]; } \
00114 R123_CUDA_DEVICE reference back(){ return v[_N-1]; } \
00115 R123_CUDA_DEVICE const_reference back() const{ return v[_N-1]; } \
00116 R123_CUDA_DEVICE bool operator==(const r123array##_N##x##W& rhs) const{ \
00117 \
00118 for (size_t i = 0; i < _N; ++i) \
00119 if (v[i] != rhs.v[i]) return false; \
00120 return true; \
00121 } \
00122 R123_CUDA_DEVICE bool operator!=(const r123array##_N##x##W& rhs) const{ return !(*this == rhs); } \
00123 \
00124 R123_CUDA_DEVICE void fill(const value_type& val){ for (size_t i = 0; i < _N; ++i) v[i] = val; } \
00125 R123_CUDA_DEVICE void swap(r123array##_N##x##W& rhs){ \
00126 \
00127 for (size_t i = 0; i < _N; ++i) { \
00128 T tmp = v[i]; \
00129 v[i] = rhs.v[i]; \
00130 rhs.v[i] = tmp; \
00131 } \
00132 } \
00133 R123_CUDA_DEVICE r123array##_N##x##W& incr(R123_ULONG_LONG n=1){ \
00134
00135
00136 \
00137 if(sizeof(T)<sizeof(n) && n>>((sizeof(T)<sizeof(n))?8*sizeof(T):0) ) \
00138 return incr_carefully(n); \
00139 if(n==1){ \
00140 ++v[0]; \
00141 if(_N==1 || R123_BUILTIN_EXPECT(!!v[0], 1)) return *this; \
00142 }else{ \
00143 v[0] += n; \
00144 if(_N==1 || R123_BUILTIN_EXPECT(n<=v[0], 1)) return *this; \
00145 } \
00146
00147
00148
00149
00150
00151
00152
00153
00154 \
00155 ++v[_N>1?1:0]; \
00156 if(_N==2 || R123_BUILTIN_EXPECT(!!v[_N>1?1:0], 1)) return *this; \
00157 ++v[_N>2?2:0]; \
00158 if(_N==3 || R123_BUILTIN_EXPECT(!!v[_N>2?2:0], 1)) return *this; \
00159 ++v[_N>3?3:0]; \
00160 for(size_t i=4; i<_N; ++i){ \
00161 if( R123_BUILTIN_EXPECT(!!v[i-1], 1) ) return *this; \
00162 ++v[i]; \
00163 } \
00164 return *this; \
00165 } \
00166 \
00167 \
00168 template <typename SeedSeq> \
00169 R123_CUDA_DEVICE static r123array##_N##x##W seed(SeedSeq &ss){ \
00170 r123array##_N##x##W ret; \
00171 const size_t Ngen = _N*((3+sizeof(value_type))/4); \
00172 uint32_t u32[Ngen]; \
00173 uint32_t *p32 = &u32[0]; \
00174 ss.generate(&u32[0], &u32[Ngen]); \
00175 for(size_t i=0; i<_N; ++i){ \
00176 ret.v[i] = assemble_from_u32<value_type>(p32); \
00177 p32 += (3+sizeof(value_type))/4; \
00178 } \
00179 return ret; \
00180 } \
00181 protected: \
00182 R123_CUDA_DEVICE r123array##_N##x##W& incr_carefully(R123_ULONG_LONG n){ \
00183 \
00184 value_type vtn; \
00185 vtn = n; \
00186 v[0] += n; \
00187 const unsigned rshift = 8* ((sizeof(n)>sizeof(value_type))? sizeof(value_type) : 0); \
00188 for(size_t i=1; i<_N; ++i){ \
00189 if(rshift){ \
00190 n >>= rshift; \
00191 }else{ \
00192 n=0; \
00193 } \
00194 if( v[i-1] < vtn ) \
00195 ++n; \
00196 if( n==0 ) break; \
00197 vtn = n; \
00198 v[i] += n; \
00199 } \
00200 return *this; \
00201 } \
00202
00203
00204
00205
00206
00207
00208
00209
00210
00211
00212
00213 template<typename T>
00214 struct r123arrayinsertable{
00215 const T& v;
00216 r123arrayinsertable(const T& t_) : v(t_) {}
00217 friend std::ostream& operator<<(std::ostream& os, const r123arrayinsertable<T>& t){
00218 return os << t.v;
00219 }
00220 };
00221
00222 template<>
00223 struct r123arrayinsertable<uint8_t>{
00224 const uint8_t& v;
00225 r123arrayinsertable(const uint8_t& t_) : v(t_) {}
00226 friend std::ostream& operator<<(std::ostream& os, const r123arrayinsertable<uint8_t>& t){
00227 return os << (int)t.v;
00228 }
00229 };
00230
00231 template<typename T>
00232 struct r123arrayextractable{
00233 T& v;
00234 r123arrayextractable(T& t_) : v(t_) {}
00235 friend std::istream& operator>>(std::istream& is, r123arrayextractable<T>& t){
00236 return is >> t.v;
00237 }
00238 };
00239
00240 template<>
00241 struct r123arrayextractable<uint8_t>{
00242 uint8_t& v;
00243 r123arrayextractable(uint8_t& t_) : v(t_) {}
00244 friend std::istream& operator>>(std::istream& is, r123arrayextractable<uint8_t>& t){
00245 int i;
00246 is >> i;
00247 t.v = i;
00248 return is;
00249 }
00250 };
00251
00252 #define CXXOVERLOADS(_N, W, T) \
00253 \
00254 inline std::ostream& operator<<(std::ostream& os, const r123array##_N##x##W& a){ \
00255 os << r123arrayinsertable<T>(a.v[0]); \
00256 for(size_t i=1; i<_N; ++i) \
00257 os << " " << r123arrayinsertable<T>(a.v[i]); \
00258 return os; \
00259 } \
00260 \
00261 inline std::istream& operator>>(std::istream& is, r123array##_N##x##W& a){ \
00262 for(size_t i=0; i<_N; ++i){ \
00263 r123arrayextractable<T> x(a.v[i]); \
00264 is >> x; \
00265 } \
00266 return is; \
00267 } \
00268 \
00269 namespace r123{ \
00270 typedef r123array##_N##x##W Array##_N##x##W; \
00271 }
00272
00273 #endif
00274
00275
00276
00277
00278
00279
00280
00281
00282
00283
00284
00285
00286
00287
00288 #define _r123array_tpl(_N, W, T) \
00289 \
00290 \
00291 struct r123array##_N##x##W{ \
00292 T v[_N]; \
00293 CXXMETHODS(_N, W, T) \
00294 }; \
00295 \
00296 CXXOVERLOADS(_N, W, T)
00297
00300 _r123array_tpl(1, 32, uint32_t)
00301 _r123array_tpl(2, 32, uint32_t)
00302 _r123array_tpl(4, 32, uint32_t)
00303 _r123array_tpl(8, 32, uint32_t)
00304
00305 _r123array_tpl(1, 64, uint64_t)
00306 _r123array_tpl(2, 64, uint64_t)
00307 _r123array_tpl(4, 64, uint64_t)
00308
00309 _r123array_tpl(16, 8, uint8_t)
00310
00311 #if R123_USE_SSE
00312 _r123array_tpl(1, m128i, r123m128i)
00313 #endif
00314
00315
00316
00317
00318
00319 #define R123_W(a) (8*sizeof(((a *)0)->v[0]))
00320
00325 #endif
00326