32 #ifndef _r123array_dot_h__
33 #define _r123array_dot_h__
38 #define CXXMETHODS(_N, W, T)
39 #define CXXOVERLOADS(_N, W, T)
68 template <
typename value_type>
71 for(
size_t i=0; i<(3+
sizeof(value_type))/4; ++i)
72 v |= ((value_type)(*p32++)) << (32*i);
77 #define CXXMETHODS(_N, W, T) \
78 typedef T value_type; \
79 typedef T* iterator; \
80 typedef const T* const_iterator; \
81 typedef value_type& reference; \
82 typedef const value_type& const_reference; \
83 typedef size_t size_type; \
84 typedef ptrdiff_t difference_type; \
86 typedef const T* const_pointer; \
87 typedef std::reverse_iterator<iterator> reverse_iterator; \
88 typedef std::reverse_iterator<const_iterator> const_reverse_iterator; \
90 enum {static_size = _N}; \
91 R123_CUDA_DEVICE reference operator[](size_type i){return v[i];} \
92 R123_CUDA_DEVICE const_reference operator[](size_type i) const {return v[i];} \
93 R123_CUDA_DEVICE reference at(size_type i){ if(i >= _N) R123_THROW(std::out_of_range("array index out of range")); return (*this)[i]; } \
94 R123_CUDA_DEVICE const_reference at(size_type i) const { if(i >= _N) R123_THROW(std::out_of_range("array index out of range")); return (*this)[i]; } \
95 R123_CUDA_DEVICE size_type size() const { return _N; } \
96 R123_CUDA_DEVICE size_type max_size() const { return _N; } \
97 R123_CUDA_DEVICE bool empty() const { return _N==0; }; \
98 R123_CUDA_DEVICE iterator begin() { return &v[0]; } \
99 R123_CUDA_DEVICE iterator end() { return &v[_N]; } \
100 R123_CUDA_DEVICE const_iterator begin() const { return &v[0]; } \
101 R123_CUDA_DEVICE const_iterator end() const { return &v[_N]; } \
102 R123_CUDA_DEVICE const_iterator cbegin() const { return &v[0]; } \
103 R123_CUDA_DEVICE const_iterator cend() const { return &v[_N]; } \
104 R123_CUDA_DEVICE reverse_iterator rbegin(){ return reverse_iterator(end()); } \
105 R123_CUDA_DEVICE const_reverse_iterator rbegin() const{ return const_reverse_iterator(end()); } \
106 R123_CUDA_DEVICE reverse_iterator rend(){ return reverse_iterator(begin()); } \
107 R123_CUDA_DEVICE const_reverse_iterator rend() const{ return const_reverse_iterator(begin()); } \
108 R123_CUDA_DEVICE const_reverse_iterator crbegin() const{ return const_reverse_iterator(cend()); } \
109 R123_CUDA_DEVICE const_reverse_iterator crend() const{ return const_reverse_iterator(cbegin()); } \
110 R123_CUDA_DEVICE pointer data(){ return &v[0]; } \
111 R123_CUDA_DEVICE const_pointer data() const{ return &v[0]; } \
112 R123_CUDA_DEVICE reference front(){ return v[0]; } \
113 R123_CUDA_DEVICE const_reference front() const{ return v[0]; } \
114 R123_CUDA_DEVICE reference back(){ return v[_N-1]; } \
115 R123_CUDA_DEVICE const_reference back() const{ return v[_N-1]; } \
116 R123_CUDA_DEVICE bool operator==(const r123array##_N##x##W& rhs) const{ \
118 for (size_t i = 0; i < _N; ++i) \
119 if (v[i] != rhs.v[i]) return false; \
122 R123_CUDA_DEVICE bool operator!=(const r123array##_N##x##W& rhs) const{ return !(*this == rhs); } \
124 R123_CUDA_DEVICE void fill(const value_type& val){ for (size_t i = 0; i < _N; ++i) v[i] = val; } \
125 R123_CUDA_DEVICE void swap(r123array##_N##x##W& rhs){ \
127 for (size_t i = 0; i < _N; ++i) { \
133 R123_CUDA_DEVICE r123array##_N##x##W& incr(R123_ULONG_LONG n=1){ \
137 if(sizeof(T)<sizeof(n) && n>>((sizeof(T)<sizeof(n))?8*sizeof(T):0) ) \
138 return incr_carefully(n); \
141 if(_N==1 || R123_BUILTIN_EXPECT(!!v[0], 1)) return *this; \
144 if(_N==1 || R123_BUILTIN_EXPECT(n<=v[0], 1)) return *this; \
156 if(_N==2 || R123_BUILTIN_EXPECT(!!v[_N>1?1:0], 1)) return *this; \
158 if(_N==3 || R123_BUILTIN_EXPECT(!!v[_N>2?2:0], 1)) return *this; \
160 for(size_t i=4; i<_N; ++i){ \
161 if( R123_BUILTIN_EXPECT(!!v[i-1], 1) ) return *this; \
168 template <typename SeedSeq> \
169 R123_CUDA_DEVICE static r123array##_N##x##W seed(SeedSeq &ss){ \
170 r123array##_N##x##W ret; \
171 const size_t Ngen = _N*((3+sizeof(value_type))/4); \
172 uint32_t u32[Ngen]; \
173 uint32_t *p32 = &u32[0]; \
174 ss.generate(&u32[0], &u32[Ngen]); \
175 for(size_t i=0; i<_N; ++i){ \
176 ret.v[i] = assemble_from_u32<value_type>(p32); \
177 p32 += (3+sizeof(value_type))/4; \
182 R123_CUDA_DEVICE r123array##_N##x##W& incr_carefully(R123_ULONG_LONG n){ \
187 const unsigned rshift = 8* ((sizeof(n)>sizeof(value_type))? sizeof(value_type) : 0); \
188 for(size_t i=1; i<_N; ++i){ \
214 struct r123arrayinsertable{
216 r123arrayinsertable(
const T& t_) : v(t_) {}
217 friend std::ostream& operator<<(std::ostream& os, const r123arrayinsertable<T>& t){
223 struct r123arrayinsertable<uint8_t>{
225 r123arrayinsertable(
const uint8_t& t_) : v(t_) {}
226 friend std::ostream& operator<<(std::ostream& os, const r123arrayinsertable<uint8_t>& t){
227 return os << (int)t.v;
232 struct r123arrayextractable{
234 r123arrayextractable(T& t_) : v(t_) {}
235 friend std::istream&
operator>>(std::istream& is, r123arrayextractable<T>& t){
241 struct r123arrayextractable<uint8_t>{
243 r123arrayextractable(uint8_t& t_) : v(t_) {}
244 friend std::istream&
operator>>(std::istream& is, r123arrayextractable<uint8_t>& t){
252 #define CXXOVERLOADS(_N, W, T) \
254 inline std::ostream& operator<<(std::ostream& os, const r123array##_N##x##W& a){ \
255 os << r123arrayinsertable<T>(a.v[0]); \
256 for(size_t i=1; i<_N; ++i) \
257 os << " " << r123arrayinsertable<T>(a.v[i]); \
261 inline std::istream& operator>>(std::istream& is, r123array##_N##x##W& a){ \
262 for(size_t i=0; i<_N; ++i){ \
263 r123arrayextractable<T> x(a.v[i]); \
270 typedef r123array##_N##x##W Array##_N##x##W; \
288 #define _r123array_tpl(_N, W, T) \
291 struct r123array##_N##x##W{ \
293 CXXMETHODS(_N, W, T) \
296 CXXOVERLOADS(_N, W, T)
302 _r123array_tpl(4, 32, uint32_t)
303 _r123array_tpl(8, 32, uint32_t)
305 _r123array_tpl(1, 64, uint64_t)
306 _r123array_tpl(2, 64, uint64_t)
307 _r123array_tpl(4, 64, uint64_t)
309 _r123array_tpl(16, 8, uint8_t)
319 #define R123_W(a) (8*sizeof(((a *)0)->v[0]))
_r123array_tpl(1, 32, uint32_t) _r123array_tpl(2
static std::istream & operator>>(std::istream &is, r123m128i &m)
Definition: sse.h:250
T assemble_from_u32(uint32_t *p32)