Go to the documentation of this file.00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032 #ifndef _r123array_dot_h__
00033 #define _r123array_dot_h__
00034 #include "features/compilerfeatures.h"
00035 #include "features/sse.h"
00036
00037 #ifndef __cplusplus
00038 #define CXXMETHODS(_N, W, T)
00039 #define CXXOVERLOADS(_N, W, T)
00040 #else
00041
00042 #include <stddef.h>
00043 #include <algorithm>
00044 #include <stdexcept>
00045 #include <iterator>
00046 #include <limits>
00047 #include <iostream>
00048
00068 template <typename value_type>
00069 inline R123_CUDA_DEVICE value_type assemble_from_u32(uint32_t *p32){
00070 value_type v=0;
00071 for(size_t i=0; i<(3+sizeof(value_type))/4; ++i)
00072 v |= ((value_type)(*p32++)) << (32*i);
00073 return v;
00074 }
00075
00076
00077 #define CXXMETHODS(_N, W, T) \
00078 typedef T value_type; \
00079 typedef T* iterator; \
00080 typedef const T* const_iterator; \
00081 typedef value_type& reference; \
00082 typedef const value_type& const_reference; \
00083 typedef size_t size_type; \
00084 typedef ptrdiff_t difference_type; \
00085 typedef T* pointer; \
00086 typedef const T* const_pointer; \
00087 typedef std::reverse_iterator<iterator> reverse_iterator; \
00088 typedef std::reverse_iterator<const_iterator> const_reverse_iterator; \
00089 R123_CUDA_DEVICE reference operator[](size_type i){return v[i];} \
00090 R123_CUDA_DEVICE const_reference operator[](size_type i) const {return v[i];} \
00091 R123_CUDA_DEVICE reference at(size_type i){ if(i >= _N) R123_THROW(std::out_of_range("array index out of range")); return (*this)[i]; } \
00092 R123_CUDA_DEVICE const_reference at(size_type i) const { if(i >= _N) R123_THROW(std::out_of_range("array index out of range")); return (*this)[i]; } \
00093 R123_CUDA_DEVICE size_type size() const { return _N; } \
00094 R123_CUDA_DEVICE size_type max_size() const { return _N; } \
00095 R123_CUDA_DEVICE bool empty() const { return _N==0; }; \
00096 R123_CUDA_DEVICE iterator begin() { return &v[0]; } \
00097 R123_CUDA_DEVICE iterator end() { return &v[_N]; } \
00098 R123_CUDA_DEVICE const_iterator begin() const { return &v[0]; } \
00099 R123_CUDA_DEVICE const_iterator end() const { return &v[_N]; } \
00100 R123_CUDA_DEVICE const_iterator cbegin() const { return &v[0]; } \
00101 R123_CUDA_DEVICE const_iterator cend() const { return &v[_N]; } \
00102 R123_CUDA_DEVICE reverse_iterator rbegin(){ return reverse_iterator(end()); } \
00103 R123_CUDA_DEVICE const_reverse_iterator rbegin() const{ return const_reverse_iterator(end()); } \
00104 R123_CUDA_DEVICE reverse_iterator rend(){ return reverse_iterator(begin()); } \
00105 R123_CUDA_DEVICE const_reverse_iterator rend() const{ return const_reverse_iterator(begin()); } \
00106 R123_CUDA_DEVICE const_reverse_iterator crbegin() const{ return const_reverse_iterator(cend()); } \
00107 R123_CUDA_DEVICE const_reverse_iterator crend() const{ return const_reverse_iterator(cbegin()); } \
00108 R123_CUDA_DEVICE pointer data(){ return &v[0]; } \
00109 R123_CUDA_DEVICE const_pointer data() const{ return &v[0]; } \
00110 R123_CUDA_DEVICE reference front(){ return v[0]; } \
00111 R123_CUDA_DEVICE const_reference front() const{ return v[0]; } \
00112 R123_CUDA_DEVICE reference back(){ return v[_N-1]; } \
00113 R123_CUDA_DEVICE const_reference back() const{ return v[_N-1]; } \
00114 R123_CUDA_DEVICE bool operator==(const r123array##_N##x##W& rhs) const{ \
00115 \
00116 for (size_t i = 0; i < _N; ++i) \
00117 if (v[i] != rhs.v[i]) return false; \
00118 return true; \
00119 } \
00120 R123_CUDA_DEVICE bool operator!=(const r123array##_N##x##W& rhs) const{ return !(*this == rhs); } \
00121 \
00122 R123_CUDA_DEVICE void fill(const value_type& val){ for (size_t i = 0; i < _N; ++i) v[i] = val; } \
00123 R123_CUDA_DEVICE void swap(r123array##_N##x##W& rhs){ \
00124 \
00125 for (size_t i = 0; i < _N; ++i) { \
00126 T tmp = v[i]; \
00127 v[i] = rhs.v[i]; \
00128 rhs.v[i] = tmp; \
00129 } \
00130 } \
00131 R123_CUDA_DEVICE r123array##_N##x##W& incr(R123_ULONG_LONG n=1){ \
00132
00133
00134 \
00135 if(sizeof(T)<sizeof(n) && n>>((sizeof(T)<sizeof(n))?8*sizeof(T):0) ) \
00136 return incr_carefully(n); \
00137 if(n==1){ \
00138 ++v[0]; \
00139 if(_N==1 || R123_BUILTIN_EXPECT(!!v[0], 1)) return *this; \
00140 }else{ \
00141 v[0] += n; \
00142 if(_N==1 || R123_BUILTIN_EXPECT(n<=v[0], 1)) return *this; \
00143 } \
00144
00145
00146
00147
00148
00149
00150
00151
00152 \
00153 ++v[_N>1?1:0]; \
00154 if(_N==2 || R123_BUILTIN_EXPECT(!!v[_N>1?1:0], 1)) return *this; \
00155 ++v[_N>2?2:0]; \
00156 if(_N==3 || R123_BUILTIN_EXPECT(!!v[_N>2?2:0], 1)) return *this; \
00157 ++v[_N>3?3:0]; \
00158 for(size_t i=4; i<_N; ++i){ \
00159 if( R123_BUILTIN_EXPECT(!!v[i-1], 1) ) return *this; \
00160 ++v[i]; \
00161 } \
00162 return *this; \
00163 } \
00164 \
00165 \
00166 template <typename SeedSeq> \
00167 R123_CUDA_DEVICE static r123array##_N##x##W seed(SeedSeq &ss){ \
00168 r123array##_N##x##W ret; \
00169 const size_t Ngen = _N*((3+sizeof(value_type))/4); \
00170 uint32_t u32[Ngen]; \
00171 uint32_t *p32 = &u32[0]; \
00172 ss.generate(&u32[0], &u32[Ngen]); \
00173 for(size_t i=0; i<_N; ++i){ \
00174 ret.v[i] = assemble_from_u32<value_type>(p32); \
00175 p32 += (3+sizeof(value_type))/4; \
00176 } \
00177 return ret; \
00178 } \
00179 protected: \
00180 R123_CUDA_DEVICE r123array##_N##x##W& incr_carefully(R123_ULONG_LONG n){ \
00181 \
00182 value_type vtn; \
00183 vtn = n; \
00184 v[0] += n; \
00185 const unsigned rshift = 8* ((sizeof(n)>sizeof(value_type))? sizeof(value_type) : 0); \
00186 for(size_t i=1; i<_N; ++i){ \
00187 if(rshift){ \
00188 n >>= rshift; \
00189 }else{ \
00190 n=0; \
00191 } \
00192 if( v[i-1] < vtn ) \
00193 ++n; \
00194 if( n==0 ) break; \
00195 vtn = n; \
00196 v[i] += n; \
00197 } \
00198 return *this; \
00199 } \
00200
00201
00202
00203
00204
00205
00206
00207
00208
00209
00210
00211 template<typename T>
00212 struct r123arrayinsertable{
00213 const T& v;
00214 r123arrayinsertable(const T& t_) : v(t_) {}
00215 friend std::ostream& operator<<(std::ostream& os, const r123arrayinsertable<T>& t){
00216 return os << t.v;
00217 }
00218 };
00219
00220 template<>
00221 struct r123arrayinsertable<uint8_t>{
00222 const uint8_t& v;
00223 r123arrayinsertable(const uint8_t& t_) : v(t_) {}
00224 friend std::ostream& operator<<(std::ostream& os, const r123arrayinsertable<uint8_t>& t){
00225 return os << (int)t.v;
00226 }
00227 };
00228
00229 template<typename T>
00230 struct r123arrayextractable{
00231 T& v;
00232 r123arrayextractable(T& t_) : v(t_) {}
00233 friend std::istream& operator>>(std::istream& is, r123arrayextractable<T>& t){
00234 return is >> t.v;
00235 }
00236 };
00237
00238 template<>
00239 struct r123arrayextractable<uint8_t>{
00240 uint8_t& v;
00241 r123arrayextractable(uint8_t& t_) : v(t_) {}
00242 friend std::istream& operator>>(std::istream& is, r123arrayextractable<uint8_t>& t){
00243 int i;
00244 is >> i;
00245 t.v = i;
00246 return is;
00247 }
00248 };
00249
00250 #define CXXOVERLOADS(_N, W, T) \
00251 \
00252 std::ostream& operator<<(std::ostream& os, const r123array##_N##x##W& a){ \
00253 os << r123arrayinsertable<T>(a.v[0]); \
00254 for(size_t i=1; i<_N; ++i) \
00255 os << " " << r123arrayinsertable<T>(a.v[i]); \
00256 return os; \
00257 } \
00258 \
00259 std::istream& operator>>(std::istream& is, r123array##_N##x##W& a){ \
00260 for(size_t i=0; i<_N; ++i){ \
00261 r123arrayextractable<T> x(a.v[i]); \
00262 is >> x; \
00263 } \
00264 return is; \
00265 } \
00266 \
00267 namespace r123{ \
00268 typedef r123array##_N##x##W Array##_N##x##W; \
00269 }
00270
00271 #endif
00272
00273
00274
00275
00276
00277
00278
00279
00280
00281
00282
00283
00284
00285
00286 #define _r123array_tpl(_N, W, T) \
00287 \
00288 \
00289 struct r123array##_N##x##W{ \
00290 T v[_N]; \
00291 CXXMETHODS(_N, W, T) \
00292 }; \
00293 \
00294 CXXOVERLOADS(_N, W, T)
00295
00298 _r123array_tpl(1, 32, uint32_t)
00299 _r123array_tpl(2, 32, uint32_t)
00300 _r123array_tpl(4, 32, uint32_t)
00301 _r123array_tpl(8, 32, uint32_t)
00302
00303 _r123array_tpl(1, 64, uint64_t)
00304 _r123array_tpl(2, 64, uint64_t)
00305 _r123array_tpl(4, 64, uint64_t)
00306
00307 _r123array_tpl(16, 8, uint8_t)
00308
00309 #if R123_USE_SSE
00310 _r123array_tpl(1, m128i, r123m128i)
00311 #endif
00312
00313
00314
00315
00316
00317 #define R123_W(a) (8*sizeof(((a *)0)->v[0]))
00318
00323 #endif
00324