/search.css" rel="stylesheet" type="text/css"/> /search.js">
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
boxmuller.hpp
Go to the documentation of this file.
1 /*
2 Copyright 2010-2011, D. E. Shaw Research.
3 All rights reserved.
4 
5 Redistribution and use in source and binary forms, with or without
6 modification, are permitted provided that the following conditions are
7 met:
8 
9 * Redistributions of source code must retain the above copyright
10  notice, this list of conditions, and the following disclaimer.
11 
12 * Redistributions in binary form must reproduce the above copyright
13  notice, this list of conditions, and the following disclaimer in the
14  documentation and/or other materials provided with the distribution.
15 
16 * Neither the name of D. E. Shaw Research nor the names of its
17  contributors may be used to endorse or promote products derived from
18  this software without specific prior written permission.
19 
20 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 */
32 
33 // This file implements the Box-Muller method for generating gaussian
34 // random variables (GRVs). Box-Muller has the advantage of
35 // deterministically requiring exactly two uniform random variables as
36 // input and producing exactly two GRVs as output, which makes it
37 // especially well-suited to the counter-based generators in
38 // Random123. Other methods (e.g., Ziggurat, polar) require an
39 // indeterminate number of inputs for each output and so require a
40 // 'MicroURNG' to be used with Random123. The down side of Box-Muller
41 // is that it calls sincos, log and sqrt, which may be slow. However,
42 // on GPUs, these functions are remarkably fast, which makes
43 // Box-Muller the fastest GRV generator we know of on GPUs.
44 //
45 // This file exports two structs and one overloaded function,
46 // all in the r123 namespace:
47 // struct r123::float2{ float x,y; }
48 // struct r123::double2{ double x,y; }
49 //
50 // r123::float2 r123::boxmuller(uint32_t u0, uint32_t u1);
51 // r123::double2 r123::boxmuller(uint64_t u0, uint64_t u1);
52 //
53 // float2 and double2 are identical to their synonymous global-
54 // namespace structures in CUDA.
55 //
56 // This file may not be as portable, and has not been tested as
57 // rigorously as other files in the library, e.g., the generators.
58 // Nevertheless, we hope it is useful and we encourage developers to
59 // copy it and modify it for their own use. We invite comments and
60 // improvements.
61 
62 #ifndef _r123_BOXMULLER_HPP__
63 #define _r123_BOXMULLER_HPP__
64 
66 #include <Random123/uniform.hpp>
67 #include <math.h>
68 
69 namespace r123{
70 
71 #if !defined(__CUDACC__)
72 typedef struct { float x, y; } float2;
73 typedef struct { double x, y; } double2;
74 #else
75 typedef ::float2 float2;
76 typedef ::double2 double2;
77 #endif
78 
79 #if !defined(R123_NO_SINCOS) && defined(__APPLE__)
80 /* MacOS X 10.10.5 (2015) doesn't have sincosf */
81 #define R123_NO_SINCOS 1
82 #endif
83 
84 #if R123_NO_SINCOS /* enable this if sincos and sincosf are not in the math library */
85 R123_CUDA_DEVICE R123_STATIC_INLINE void sincosf(float x, float *s, float *c) {
86  *s = sinf(x);
87  *c = cosf(x);
88 }
89 
90 R123_CUDA_DEVICE R123_STATIC_INLINE void sincos(double x, double *s, double *c) {
91  *s = sin(x);
92  *c = cos(x);
93 }
94 #endif /* sincos is not in the math library */
95 
96 #if !defined(CUDART_VERSION) || CUDART_VERSION < 5000 /* enabled if sincospi and sincospif are not in math lib */
97 
98 R123_CUDA_DEVICE R123_STATIC_INLINE void sincospif(float x, float *s, float *c){
99  const float PIf = 3.1415926535897932f;
100  sincosf(PIf*x, s, c);
101 }
102 
103 R123_CUDA_DEVICE R123_STATIC_INLINE void sincospi(double x, double *s, double *c) {
104  const double PI = 3.1415926535897932;
105  sincos(PI*x, s, c);
106 }
107 #endif /* sincospi is not in math lib */
108 
109 /*
110  * take two 32bit unsigned random values and return a float2 with
111  * two random floats in a normal distribution via a Box-Muller transform
112  */
113 R123_CUDA_DEVICE R123_STATIC_INLINE float2 boxmuller(uint32_t u0, uint32_t u1) {
114  float r;
115  float2 f;
116  sincospif(uneg11<float>(u0), &f.x, &f.y);
117  r = sqrtf(-2.f * logf(u01<float>(u1))); // u01 is guaranteed to avoid 0.
118  f.x *= r;
119  f.y *= r;
120  return f;
121 }
122 
123 /*
124  * take two 64bit unsigned random values and return a double2 with
125  * two random doubles in a normal distribution via a Box-Muller transform
126  */
127 R123_CUDA_DEVICE R123_STATIC_INLINE double2 boxmuller(uint64_t u0, uint64_t u1) {
128  double r;
129  double2 f;
130 
131  sincospi(uneg11<double>(u0), &f.x, &f.y);
132  r = sqrt(-2. * log(u01<double>(u1))); // u01 is guaranteed to avoid 0.
133  f.x *= r;
134  f.y *= r;
135  return f;
136 }
137 } // namespace r123
138 
139 #endif /* BOXMULLER_H__ */
static float2 boxmuller(uint32_t u0, uint32_t u1)
Definition: boxmuller.hpp:113
double y
Definition: boxmuller.hpp:73
float y
Definition: boxmuller.hpp:72
double x
Definition: boxmuller.hpp:73
float x
Definition: boxmuller.hpp:72
static void sincospi(double x, double *s, double *c)
Definition: boxmuller.hpp:103
Definition: boxmuller.hpp:72
static void sincospif(float x, float *s, float *c)
Definition: boxmuller.hpp:98
Definition: boxmuller.hpp:73