Go to the documentation of this file.00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032 #ifndef _r123array_dot_h__
00033 #define _r123array_dot_h__
00034 #include "features/compilerfeatures.h"
00035 #include "features/sse.h"
00036
00037 #ifndef __cplusplus
00038 #define CXXMETHODS(_N, W, T)
00039 #define CXXOVERLOADS(_N, W, T)
00040 #else
00041
00042 #include <stddef.h>
00043 #include <algorithm>
00044 #include <stdexcept>
00045 #include <iterator>
00046 #include <limits>
00047 #include <iostream>
00048
00068 template <typename value_type>
00069 inline R123_CUDA_DEVICE value_type assemble_from_u32(uint32_t *p32){
00070 value_type v=0;
00071 for(size_t i=0; i<(3+sizeof(value_type))/4; ++i)
00072 v |= ((value_type)(*p32++)) << (32*i);
00073 return v;
00074 }
00075
00076 #ifdef __CUDACC__
00077
00078 #define R123_THROW(x) R123_ASSERT(0)
00079 #else
00080 #define R123_THROW(x) throw (x)
00081 #endif
00082
00083
00084 #define CXXMETHODS(_N, W, T) \
00085 typedef T value_type; \
00086 typedef T* iterator; \
00087 typedef const T* const_iterator; \
00088 typedef value_type& reference; \
00089 typedef const value_type& const_reference; \
00090 typedef size_t size_type; \
00091 typedef ptrdiff_t difference_type; \
00092 typedef T* pointer; \
00093 typedef const T* const_pointer; \
00094 typedef std::reverse_iterator<iterator> reverse_iterator; \
00095 typedef std::reverse_iterator<const_iterator> const_reverse_iterator; \
00096 R123_CUDA_DEVICE reference operator[](size_type i){return v[i];} \
00097 R123_CUDA_DEVICE const_reference operator[](size_type i) const {return v[i];} \
00098 R123_CUDA_DEVICE reference at(size_type i){ if(i >= _N) R123_THROW(std::out_of_range("array index out of range")); return (*this)[i]; } \
00099 R123_CUDA_DEVICE const_reference at(size_type i) const { if(i >= _N) R123_THROW(std::out_of_range("array index out of range")); return (*this)[i]; } \
00100 R123_CUDA_DEVICE size_type size() const { return _N; } \
00101 R123_CUDA_DEVICE size_type max_size() const { return _N; } \
00102 R123_CUDA_DEVICE bool empty() const { return _N==0; }; \
00103 R123_CUDA_DEVICE iterator begin() { return &v[0]; } \
00104 R123_CUDA_DEVICE iterator end() { return &v[_N]; } \
00105 R123_CUDA_DEVICE const_iterator begin() const { return &v[0]; } \
00106 R123_CUDA_DEVICE const_iterator end() const { return &v[_N]; } \
00107 R123_CUDA_DEVICE const_iterator cbegin() const { return &v[0]; } \
00108 R123_CUDA_DEVICE const_iterator cend() const { return &v[_N]; } \
00109 R123_CUDA_DEVICE reverse_iterator rbegin(){ return reverse_iterator(end()); } \
00110 R123_CUDA_DEVICE const_reverse_iterator rbegin() const{ return const_reverse_iterator(end()); } \
00111 R123_CUDA_DEVICE reverse_iterator rend(){ return reverse_iterator(begin()); } \
00112 R123_CUDA_DEVICE const_reverse_iterator rend() const{ return const_reverse_iterator(begin()); } \
00113 R123_CUDA_DEVICE const_reverse_iterator crbegin() const{ return const_reverse_iterator(cend()); } \
00114 R123_CUDA_DEVICE const_reverse_iterator crend() const{ return const_reverse_iterator(cbegin()); } \
00115 R123_CUDA_DEVICE pointer data(){ return &v[0]; } \
00116 R123_CUDA_DEVICE const_pointer data() const{ return &v[0]; } \
00117 R123_CUDA_DEVICE reference front(){ return v[0]; } \
00118 R123_CUDA_DEVICE const_reference front() const{ return v[0]; } \
00119 R123_CUDA_DEVICE reference back(){ return v[_N-1]; } \
00120 R123_CUDA_DEVICE const_reference back() const{ return v[_N-1]; } \
00121 R123_CUDA_DEVICE bool operator==(const r123array##_N##x##W& rhs) const{ \
00122 \
00123 for (size_t i = 0; i < _N; ++i) \
00124 if (v[i] != rhs.v[i]) return false; \
00125 return true; \
00126 } \
00127 R123_CUDA_DEVICE bool operator!=(const r123array##_N##x##W& rhs) const{ return !(*this == rhs); } \
00128 \
00129 R123_CUDA_DEVICE void fill(const value_type& val){ for (size_t i = 0; i < _N; ++i) v[i] = val; } \
00130 R123_CUDA_DEVICE void swap(r123array##_N##x##W& rhs){ \
00131 \
00132 for (size_t i = 0; i < _N; ++i) { \
00133 T tmp = v[i]; \
00134 v[i] = rhs.v[i]; \
00135 rhs.v[i] = tmp; \
00136 } \
00137 } \
00138 R123_CUDA_DEVICE r123array##_N##x##W& incr(R123_ULONG_LONG n=1){ \
00139
00140
00141 \
00142 if(sizeof(T)<sizeof(n) && n>>((sizeof(T)<sizeof(n))?8*sizeof(T):0) ) \
00143 R123_THROW(std::invalid_argument("arrayNxW::incr: Cannot increment by a value that does not fit in the value_type")); \
00144 if(n==1){ \
00145 ++v[0]; \
00146 if(_N==1 || R123_BUILTIN_EXPECT(!!v[0], 1)) return *this; \
00147 }else{ \
00148 v[0] += n; \
00149 if(_N==1 || R123_BUILTIN_EXPECT(n<=v[0], 1)) return *this; \
00150 } \
00151
00152
00153
00154
00155
00156
00157
00158
00159 \
00160 ++v[_N>1?1:0]; \
00161 if(_N==2 || R123_BUILTIN_EXPECT(!!v[_N>1?1:0], 1)) return *this; \
00162 ++v[_N>2?2:0]; \
00163 if(_N==3 || R123_BUILTIN_EXPECT(!!v[_N>2?2:0], 1)) return *this; \
00164 ++v[_N>3?3:0]; \
00165 for(size_t i=4; i<_N; ++i){ \
00166 if( R123_BUILTIN_EXPECT(!!v[i-1], 1) ) return *this; \
00167 ++v[i]; \
00168 } \
00169 return *this; \
00170 } \
00171 R123_CUDA_DEVICE size_t assembly_count() const{ return _N*((3+sizeof(value_type))/4); } \
00172 R123_CUDA_DEVICE void assemble(uint32_t *p32){ \
00173 for(size_t i=0; i<_N; ++i){ \
00174 v[i] = assemble_from_u32<value_type>(p32); \
00175 p32 += (3+sizeof(value_type))/4; \
00176 } \
00177 } \
00178
00179
00180
00181
00182
00183
00184
00185
00186
00187
00188
00189 template<typename T>
00190 struct r123arrayinsertable{
00191 const T& v;
00192 r123arrayinsertable(const T& t_) : v(t_) {}
00193 friend std::ostream& operator<<(std::ostream& os, const r123arrayinsertable<T>& t){
00194 return os << t.v;
00195 }
00196 };
00197
00198 template<>
00199 struct r123arrayinsertable<uint8_t>{
00200 const uint8_t& v;
00201 r123arrayinsertable(const uint8_t& t_) : v(t_) {}
00202 friend std::ostream& operator<<(std::ostream& os, const r123arrayinsertable<uint8_t>& t){
00203 return os << (int)t.v;
00204 }
00205 };
00206
00207 template<typename T>
00208 struct r123arrayextractable{
00209 T& v;
00210 r123arrayextractable(T& t_) : v(t_) {}
00211 friend std::istream& operator>>(std::istream& is, r123arrayextractable<T>& t){
00212 return is >> t.v;
00213 }
00214 };
00215
00216 template<>
00217 struct r123arrayextractable<uint8_t>{
00218 uint8_t& v;
00219 r123arrayextractable(uint8_t& t_) : v(t_) {}
00220 friend std::istream& operator>>(std::istream& is, r123arrayextractable<uint8_t>& t){
00221 int i;
00222 is >> i;
00223 t.v = i;
00224 return is;
00225 }
00226 };
00227
00228 #define CXXOVERLOADS(_N, W, T) \
00229 \
00230 std::ostream& operator<<(std::ostream& os, const r123array##_N##x##W& a){ \
00231 os << r123arrayinsertable<T>(a.v[0]); \
00232 for(size_t i=1; i<_N; ++i) \
00233 os << " " << r123arrayinsertable<T>(a.v[i]); \
00234 return os; \
00235 } \
00236 \
00237 std::istream& operator>>(std::istream& is, r123array##_N##x##W& a){ \
00238 for(size_t i=0; i<_N; ++i){ \
00239 r123arrayextractable<T> x(a.v[i]); \
00240 is >> x; \
00241 } \
00242 return is; \
00243 } \
00244 \
00245 namespace r123{ \
00246 typedef r123array##_N##x##W Array##_N##x##W; \
00247 }
00248
00249 #endif
00250
00251
00252
00253
00254
00255
00256
00257
00258
00259
00260
00261
00262
00263
00264 #define _r123array_tpl(_N, W, T) \
00265 \
00266 \
00267 struct r123array##_N##x##W{ \
00268 T v[_N]; \
00269 CXXMETHODS(_N, W, T) \
00270 }; \
00271 \
00272 CXXOVERLOADS(_N, W, T)
00273
00276 _r123array_tpl(1, 32, uint32_t)
00277 _r123array_tpl(2, 32, uint32_t)
00278 _r123array_tpl(4, 32, uint32_t)
00279 _r123array_tpl(8, 32, uint32_t)
00280
00281 _r123array_tpl(1, 64, uint64_t)
00282 _r123array_tpl(2, 64, uint64_t)
00283 _r123array_tpl(4, 64, uint64_t)
00284
00285 _r123array_tpl(16, 8, uint8_t)
00286
00287 #if R123_USE_SSE
00288 _r123array_tpl(1, m128i, r123m128i)
00289 #endif
00290
00291
00292
00293
00294
00295 #define R123_W(a) (8*sizeof(((a *)0)->v[0]))
00296
00301 #endif
00302