Go to the documentation of this file.00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032 #ifndef _r123array_dot_h__
00033 #define _r123array_dot_h__
00034 #include "features/compilerfeatures.h"
00035 #include "features/sse.h"
00036
00037 #ifndef __cplusplus
00038 #define CXXMETHODS(_N, W, T)
00039 #define CXXOVERLOADS(_N, W, T)
00040 #else
00041
00042 #include <stddef.h>
00043 #include <algorithm>
00044 #include <stdexcept>
00045 #include <iterator>
00046 #include <limits>
00047 #include <iostream>
00048
00068 template <typename value_type>
00069 inline R123_CUDA_DEVICE value_type assemble_from_u32(uint32_t *p32){
00070 value_type v=0;
00071 for(size_t i=0; i<(3+sizeof(value_type))/4; ++i)
00072 v |= ((value_type)(*p32++)) << (32*i);
00073 return v;
00074 }
00075
00076
00077 #define CXXMETHODS(_N, W, T) \
00078 typedef T value_type; \
00079 typedef T* iterator; \
00080 typedef const T* const_iterator; \
00081 typedef value_type& reference; \
00082 typedef const value_type& const_reference; \
00083 typedef size_t size_type; \
00084 typedef ptrdiff_t difference_type; \
00085 typedef T* pointer; \
00086 typedef const T* const_pointer; \
00087 typedef std::reverse_iterator<iterator> reverse_iterator; \
00088 typedef std::reverse_iterator<const_iterator> const_reverse_iterator; \
00089 R123_CUDA_DEVICE reference operator[](size_type i){return v[i];} \
00090 R123_CUDA_DEVICE const_reference operator[](size_type i) const {return v[i];} \
00091 R123_CUDA_DEVICE reference at(size_type i){ if(i >= _N) R123_THROW(std::out_of_range("array index out of range")); return (*this)[i]; } \
00092 R123_CUDA_DEVICE const_reference at(size_type i) const { if(i >= _N) R123_THROW(std::out_of_range("array index out of range")); return (*this)[i]; } \
00093 R123_CUDA_DEVICE size_type size() const { return _N; } \
00094 R123_CUDA_DEVICE size_type max_size() const { return _N; } \
00095 R123_CUDA_DEVICE bool empty() const { return _N==0; }; \
00096 R123_CUDA_DEVICE iterator begin() { return &v[0]; } \
00097 R123_CUDA_DEVICE iterator end() { return &v[_N]; } \
00098 R123_CUDA_DEVICE const_iterator begin() const { return &v[0]; } \
00099 R123_CUDA_DEVICE const_iterator end() const { return &v[_N]; } \
00100 R123_CUDA_DEVICE const_iterator cbegin() const { return &v[0]; } \
00101 R123_CUDA_DEVICE const_iterator cend() const { return &v[_N]; } \
00102 R123_CUDA_DEVICE reverse_iterator rbegin(){ return reverse_iterator(end()); } \
00103 R123_CUDA_DEVICE const_reverse_iterator rbegin() const{ return const_reverse_iterator(end()); } \
00104 R123_CUDA_DEVICE reverse_iterator rend(){ return reverse_iterator(begin()); } \
00105 R123_CUDA_DEVICE const_reverse_iterator rend() const{ return const_reverse_iterator(begin()); } \
00106 R123_CUDA_DEVICE const_reverse_iterator crbegin() const{ return const_reverse_iterator(cend()); } \
00107 R123_CUDA_DEVICE const_reverse_iterator crend() const{ return const_reverse_iterator(cbegin()); } \
00108 R123_CUDA_DEVICE pointer data(){ return &v[0]; } \
00109 R123_CUDA_DEVICE const_pointer data() const{ return &v[0]; } \
00110 R123_CUDA_DEVICE reference front(){ return v[0]; } \
00111 R123_CUDA_DEVICE const_reference front() const{ return v[0]; } \
00112 R123_CUDA_DEVICE reference back(){ return v[_N-1]; } \
00113 R123_CUDA_DEVICE const_reference back() const{ return v[_N-1]; } \
00114 R123_CUDA_DEVICE bool operator==(const r123array##_N##x##W& rhs) const{ \
00115 \
00116 for (size_t i = 0; i < _N; ++i) \
00117 if (v[i] != rhs.v[i]) return false; \
00118 return true; \
00119 } \
00120 R123_CUDA_DEVICE bool operator!=(const r123array##_N##x##W& rhs) const{ return !(*this == rhs); } \
00121 \
00122 R123_CUDA_DEVICE void fill(const value_type& val){ for (size_t i = 0; i < _N; ++i) v[i] = val; } \
00123 R123_CUDA_DEVICE void swap(r123array##_N##x##W& rhs){ \
00124 \
00125 for (size_t i = 0; i < _N; ++i) { \
00126 T tmp = v[i]; \
00127 v[i] = rhs.v[i]; \
00128 rhs.v[i] = tmp; \
00129 } \
00130 } \
00131 R123_CUDA_DEVICE r123array##_N##x##W& incr(R123_ULONG_LONG n=1){ \
00132
00133
00134 \
00135 if(sizeof(T)<sizeof(n) && n>>((sizeof(T)<sizeof(n))?8*sizeof(T):0) ) \
00136 return incr_carefully(n); \
00137 if(n==1){ \
00138 ++v[0]; \
00139 if(_N==1 || R123_BUILTIN_EXPECT(!!v[0], 1)) return *this; \
00140 }else{ \
00141 v[0] += n; \
00142 if(_N==1 || R123_BUILTIN_EXPECT(n<=v[0], 1)) return *this; \
00143 } \
00144
00145
00146
00147
00148
00149
00150
00151
00152 \
00153 ++v[_N>1?1:0]; \
00154 if(_N==2 || R123_BUILTIN_EXPECT(!!v[_N>1?1:0], 1)) return *this; \
00155 ++v[_N>2?2:0]; \
00156 if(_N==3 || R123_BUILTIN_EXPECT(!!v[_N>2?2:0], 1)) return *this; \
00157 ++v[_N>3?3:0]; \
00158 for(size_t i=4; i<_N; ++i){ \
00159 if( R123_BUILTIN_EXPECT(!!v[i-1], 1) ) return *this; \
00160 ++v[i]; \
00161 } \
00162 return *this; \
00163 } \
00164 R123_CUDA_DEVICE size_t assembly_count() const{ return _N*((3+sizeof(value_type))/4); } \
00165 R123_CUDA_DEVICE void assemble(uint32_t *p32){ \
00166 for(size_t i=0; i<_N; ++i){ \
00167 v[i] = assemble_from_u32<value_type>(p32); \
00168 p32 += (3+sizeof(value_type))/4; \
00169 } \
00170 } \
00171 protected: \
00172 R123_CUDA_DEVICE r123array##_N##x##W& incr_carefully(R123_ULONG_LONG n){ \
00173 \
00174 value_type vtn; \
00175 vtn = n; \
00176 v[0] += n; \
00177 const unsigned rshift = 8* ((sizeof(n)>sizeof(value_type))? sizeof(value_type) : 0); \
00178 for(size_t i=1; i<_N; ++i){ \
00179 if(rshift){ \
00180 n >>= rshift; \
00181 }else{ \
00182 n=0; \
00183 } \
00184 if( v[i-1] < vtn ) \
00185 ++n; \
00186 if( n==0 ) break; \
00187 vtn = n; \
00188 v[i] += n; \
00189 } \
00190 return *this; \
00191 } \
00192
00193
00194
00195
00196
00197
00198
00199
00200
00201
00202
00203 template<typename T>
00204 struct r123arrayinsertable{
00205 const T& v;
00206 r123arrayinsertable(const T& t_) : v(t_) {}
00207 friend std::ostream& operator<<(std::ostream& os, const r123arrayinsertable<T>& t){
00208 return os << t.v;
00209 }
00210 };
00211
00212 template<>
00213 struct r123arrayinsertable<uint8_t>{
00214 const uint8_t& v;
00215 r123arrayinsertable(const uint8_t& t_) : v(t_) {}
00216 friend std::ostream& operator<<(std::ostream& os, const r123arrayinsertable<uint8_t>& t){
00217 return os << (int)t.v;
00218 }
00219 };
00220
00221 template<typename T>
00222 struct r123arrayextractable{
00223 T& v;
00224 r123arrayextractable(T& t_) : v(t_) {}
00225 friend std::istream& operator>>(std::istream& is, r123arrayextractable<T>& t){
00226 return is >> t.v;
00227 }
00228 };
00229
00230 template<>
00231 struct r123arrayextractable<uint8_t>{
00232 uint8_t& v;
00233 r123arrayextractable(uint8_t& t_) : v(t_) {}
00234 friend std::istream& operator>>(std::istream& is, r123arrayextractable<uint8_t>& t){
00235 int i;
00236 is >> i;
00237 t.v = i;
00238 return is;
00239 }
00240 };
00241
00242 #define CXXOVERLOADS(_N, W, T) \
00243 \
00244 std::ostream& operator<<(std::ostream& os, const r123array##_N##x##W& a){ \
00245 os << r123arrayinsertable<T>(a.v[0]); \
00246 for(size_t i=1; i<_N; ++i) \
00247 os << " " << r123arrayinsertable<T>(a.v[i]); \
00248 return os; \
00249 } \
00250 \
00251 std::istream& operator>>(std::istream& is, r123array##_N##x##W& a){ \
00252 for(size_t i=0; i<_N; ++i){ \
00253 r123arrayextractable<T> x(a.v[i]); \
00254 is >> x; \
00255 } \
00256 return is; \
00257 } \
00258 \
00259 namespace r123{ \
00260 typedef r123array##_N##x##W Array##_N##x##W; \
00261 }
00262
00263 #endif
00264
00265
00266
00267
00268
00269
00270
00271
00272
00273
00274
00275
00276
00277
00278 #define _r123array_tpl(_N, W, T) \
00279 \
00280 \
00281 struct r123array##_N##x##W{ \
00282 T v[_N]; \
00283 CXXMETHODS(_N, W, T) \
00284 }; \
00285 \
00286 CXXOVERLOADS(_N, W, T)
00287
00290 _r123array_tpl(1, 32, uint32_t)
00291 _r123array_tpl(2, 32, uint32_t)
00292 _r123array_tpl(4, 32, uint32_t)
00293 _r123array_tpl(8, 32, uint32_t)
00294
00295 _r123array_tpl(1, 64, uint64_t)
00296 _r123array_tpl(2, 64, uint64_t)
00297 _r123array_tpl(4, 64, uint64_t)
00298
00299 _r123array_tpl(16, 8, uint8_t)
00300
00301 #if R123_USE_SSE
00302 _r123array_tpl(1, m128i, r123m128i)
00303 #endif
00304
00305
00306
00307
00308
00309 #define R123_W(a) (8*sizeof(((a *)0)->v[0]))
00310
00315 #endif
00316