TensorFunctors.h
1 // This file is part of Eigen, a lightweight C++ template library
2 // for linear algebra.
3 //
4 // Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
5 //
6 // This Source Code Form is subject to the terms of the Mozilla
7 // Public License v. 2.0. If a copy of the MPL was not distributed
8 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 
10 #ifndef EIGEN_CXX11_TENSOR_TENSOR_FUNCTORS_H
11 #define EIGEN_CXX11_TENSOR_TENSOR_FUNCTORS_H
12 
13 namespace Eigen {
14 namespace internal {
15 
16 
20 template <typename Scalar>
21 struct scalar_mod_op {
22  EIGEN_DEVICE_FUNC scalar_mod_op(const Scalar& divisor) : m_divisor(divisor) {}
23  EIGEN_DEVICE_FUNC inline Scalar operator() (const Scalar& a) const { return a % m_divisor; }
24  const Scalar m_divisor;
25 };
26 template <typename Scalar>
27 struct functor_traits<scalar_mod_op<Scalar> >
28 { enum { Cost = NumTraits<Scalar>::template Div<false>::Cost, PacketAccess = false }; };
29 
30 
34 template <typename Scalar>
35 struct scalar_mod2_op {
36  EIGEN_EMPTY_STRUCT_CTOR(scalar_mod2_op);
37  EIGEN_DEVICE_FUNC inline Scalar operator() (const Scalar& a, const Scalar& b) const { return a % b; }
38 };
39 template <typename Scalar>
40 struct functor_traits<scalar_mod2_op<Scalar> >
41 { enum { Cost = NumTraits<Scalar>::template Div<false>::Cost, PacketAccess = false }; };
42 
43 template <typename Scalar>
44 struct scalar_fmod_op {
45  EIGEN_EMPTY_STRUCT_CTOR(scalar_fmod_op);
46  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar
47  operator()(const Scalar& a, const Scalar& b) const {
48  return numext::fmod(a, b);
49  }
50 };
51 template <typename Scalar>
52 struct functor_traits<scalar_fmod_op<Scalar> > {
53  enum { Cost = 13, // Reciprocal throughput of FPREM on Haswell.
54  PacketAccess = false };
55 };
56 
57 
62 template <typename T>
63 struct scalar_sigmoid_op {
64  EIGEN_EMPTY_STRUCT_CTOR(scalar_sigmoid_op)
65  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T operator()(const T& x) const {
66  const T one = T(1);
67  return one / (one + numext::exp(-x));
68  }
69 
70  template <typename Packet> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
71  Packet packetOp(const Packet& x) const {
72  const Packet one = pset1<Packet>(T(1));
73  return pdiv(one, padd(one, pexp(pnegate(x))));
74  }
75 };
76 
77 template <typename T>
78 struct functor_traits<scalar_sigmoid_op<T> > {
79  enum {
80  Cost = NumTraits<T>::AddCost * 2 + NumTraits<T>::MulCost * 6,
81  PacketAccess = packet_traits<T>::HasAdd && packet_traits<T>::HasDiv &&
82  packet_traits<T>::HasNegate && packet_traits<T>::HasExp
83  };
84 };
85 
86 
87 template<typename Reducer, typename Device>
88 struct reducer_traits {
89  enum {
90  Cost = 1,
91  PacketAccess = false
92  };
93 };
94 
95 // Standard reduction functors
96 template <typename T> struct SumReducer
97 {
98  static const bool PacketAccess = packet_traits<T>::HasAdd;
99  static const bool IsStateful = false;
100 
101  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const T t, T* accum) const {
102  (*accum) += t;
103  }
104  template <typename Packet>
105  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reducePacket(const Packet& p, Packet* accum) const {
106  (*accum) = padd<Packet>(*accum, p);
107  }
108 
109  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T initialize() const {
110  internal::scalar_cast_op<int, T> conv;
111  return conv(0);
112  }
113  template <typename Packet>
114  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet initializePacket() const {
115  return pset1<Packet>(initialize());
116  }
117  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T finalize(const T accum) const {
118  return accum;
119  }
120  template <typename Packet>
121  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet finalizePacket(const Packet& vaccum) const {
122  return vaccum;
123  }
124  template <typename Packet>
125  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T finalizeBoth(const T saccum, const Packet& vaccum) const {
126  return saccum + predux(vaccum);
127  }
128 };
129 
130 template <typename T, typename Device>
131 struct reducer_traits<SumReducer<T>, Device> {
132  enum {
133  Cost = NumTraits<T>::AddCost,
134  PacketAccess = PacketType<T, Device>::HasAdd
135  };
136 };
137 
138 
139 template <typename T> struct MeanReducer
140 {
141  static const bool PacketAccess = packet_traits<T>::HasAdd && !NumTraits<T>::IsInteger;
142  static const bool IsStateful = true;
143 
144  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
145  MeanReducer() : scalarCount_(0), packetCount_(0) { }
146 
147  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const T t, T* accum) {
148  (*accum) += t;
149  scalarCount_++;
150  }
151  template <typename Packet>
152  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reducePacket(const Packet& p, Packet* accum) {
153  (*accum) = padd<Packet>(*accum, p);
154  packetCount_++;
155  }
156 
157  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T initialize() const {
158  internal::scalar_cast_op<int, T> conv;
159  return conv(0);
160  }
161  template <typename Packet>
162  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet initializePacket() const {
163  return pset1<Packet>(initialize());
164  }
165  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T finalize(const T accum) const {
166  return accum / scalarCount_;
167  }
168  template <typename Packet>
169  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet finalizePacket(const Packet& vaccum) const {
170  return pdiv(vaccum, pset1<Packet>(packetCount_));
171  }
172  template <typename Packet>
173  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T finalizeBoth(const T saccum, const Packet& vaccum) const {
174  return (saccum + predux(vaccum)) / (scalarCount_ + packetCount_ * unpacket_traits<Packet>::size);
175  }
176 
177  protected:
178  DenseIndex scalarCount_;
179  DenseIndex packetCount_;
180 };
181 
182 template <typename T, typename Device>
183 struct reducer_traits<MeanReducer<T>, Device> {
184  enum {
185  Cost = NumTraits<T>::AddCost,
186  PacketAccess = PacketType<T, Device>::HasAdd
187  };
188 };
189 
190 
191 template <typename T> struct MaxReducer
192 {
193  static const bool PacketAccess = packet_traits<T>::HasMax;
194  static const bool IsStateful = false;
195 
196  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const T t, T* accum) const {
197  if (t > *accum) { *accum = t; }
198  }
199  template <typename Packet>
200  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reducePacket(const Packet& p, Packet* accum) const {
201  (*accum) = pmax<Packet>(*accum, p);
202  }
203 
204  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T initialize() const {
205  return Eigen::NumTraits<T>::lowest();
206  }
207  template <typename Packet>
208  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet initializePacket() const {
209  return pset1<Packet>(initialize());
210  }
211  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T finalize(const T accum) const {
212  return accum;
213  }
214  template <typename Packet>
215  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet finalizePacket(const Packet& vaccum) const {
216  return vaccum;
217  }
218  template <typename Packet>
219  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T finalizeBoth(const T saccum, const Packet& vaccum) const {
220  return numext::maxi(saccum, predux_max(vaccum));
221  }
222 };
223 
224 template <typename T, typename Device>
225 struct reducer_traits<MaxReducer<T>, Device> {
226  enum {
227  Cost = NumTraits<T>::AddCost,
228  PacketAccess = PacketType<T, Device>::HasMax
229  };
230 };
231 
232 
233 template <typename T> struct MinReducer
234 {
235  static const bool PacketAccess = packet_traits<T>::HasMin;
236  static const bool IsStateful = false;
237 
238  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const T t, T* accum) const {
239  if (t < *accum) { *accum = t; }
240  }
241  template <typename Packet>
242  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reducePacket(const Packet& p, Packet* accum) const {
243  (*accum) = pmin<Packet>(*accum, p);
244  }
245 
246  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T initialize() const {
247  return Eigen::NumTraits<T>::highest();
248  }
249  template <typename Packet>
250  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet initializePacket() const {
251  return pset1<Packet>(initialize());
252  }
253  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T finalize(const T accum) const {
254  return accum;
255  }
256  template <typename Packet>
257  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet finalizePacket(const Packet& vaccum) const {
258  return vaccum;
259  }
260  template <typename Packet>
261  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T finalizeBoth(const T saccum, const Packet& vaccum) const {
262  return numext::mini(saccum, predux_min(vaccum));
263  }
264 };
265 
266 template <typename T, typename Device>
267 struct reducer_traits<MinReducer<T>, Device> {
268  enum {
269  Cost = NumTraits<T>::AddCost,
270  PacketAccess = PacketType<T, Device>::HasMin
271  };
272 };
273 
274 
275 template <typename T> struct ProdReducer
276 {
277  static const bool PacketAccess = packet_traits<T>::HasMul;
278  static const bool IsStateful = false;
279 
280  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const T t, T* accum) const {
281  (*accum) *= t;
282  }
283  template <typename Packet>
284  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reducePacket(const Packet& p, Packet* accum) const {
285  (*accum) = pmul<Packet>(*accum, p);
286  }
287 
288  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T initialize() const {
289  internal::scalar_cast_op<int, T> conv;
290  return conv(1);
291  }
292  template <typename Packet>
293  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet initializePacket() const {
294  return pset1<Packet>(initialize());
295  }
296  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T finalize(const T accum) const {
297  return accum;
298  }
299  template <typename Packet>
300  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet finalizePacket(const Packet& vaccum) const {
301  return vaccum;
302  }
303  template <typename Packet>
304  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T finalizeBoth(const T saccum, const Packet& vaccum) const {
305  return saccum * predux_mul(vaccum);
306  }
307 };
308 
309 template <typename T, typename Device>
310 struct reducer_traits<ProdReducer<T>, Device> {
311  enum {
312  Cost = NumTraits<T>::MulCost,
313  PacketAccess = PacketType<T, Device>::HasMul
314  };
315 };
316 
317 
318 struct AndReducer
319 {
320  static const bool PacketAccess = false;
321  static const bool IsStateful = false;
322 
323  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(bool t, bool* accum) const {
324  *accum = *accum && t;
325  }
326  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool initialize() const {
327  return true;
328  }
329  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool finalize(bool accum) const {
330  return accum;
331  }
332 };
333 
334 template <typename Device>
335 struct reducer_traits<AndReducer, Device> {
336  enum {
337  Cost = 1,
338  PacketAccess = false
339  };
340 };
341 
342 
343 struct OrReducer {
344  static const bool PacketAccess = false;
345  static const bool IsStateful = false;
346 
347  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(bool t, bool* accum) const {
348  *accum = *accum || t;
349  }
350  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool initialize() const {
351  return false;
352  }
353  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool finalize(bool accum) const {
354  return accum;
355  }
356 };
357 
358 template <typename Device>
359 struct reducer_traits<OrReducer, Device> {
360  enum {
361  Cost = 1,
362  PacketAccess = false
363  };
364 };
365 
366 
367 // Argmin/Argmax reducers
368 template <typename T> struct ArgMaxTupleReducer
369 {
370  static const bool PacketAccess = false;
371  static const bool IsStateful = false;
372 
373  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const T t, T* accum) const {
374  if (t.second > accum->second) { *accum = t; }
375  }
376  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T initialize() const {
377  return T(0, NumTraits<typename T::second_type>::lowest());
378  }
379  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T finalize(const T& accum) const {
380  return accum;
381  }
382 };
383 
384 template <typename T, typename Device>
385 struct reducer_traits<ArgMaxTupleReducer<T>, Device> {
386  enum {
387  Cost = NumTraits<T>::AddCost,
388  PacketAccess = false
389  };
390 };
391 
392 
393 template <typename T> struct ArgMinTupleReducer
394 {
395  static const bool PacketAccess = false;
396  static const bool IsStateful = false;
397 
398  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const T& t, T* accum) const {
399  if (t.second < accum->second) { *accum = t; }
400  }
401  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T initialize() const {
402  return T(0, NumTraits<typename T::second_type>::highest());
403  }
404  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T finalize(const T& accum) const {
405  return accum;
406  }
407 };
408 
409 template <typename T, typename Device>
410 struct reducer_traits<ArgMinTupleReducer<T>, Device> {
411  enum {
412  Cost = NumTraits<T>::AddCost,
413  PacketAccess = false
414  };
415 };
416 
417 
418 // Random number generation
419 namespace {
420 #ifdef __CUDA_ARCH__
421 __device__ int get_random_seed() {
422  return clock();
423 }
424 #else
425 static inline int get_random_seed() {
426 #ifdef _WIN32
427  SYSTEMTIME st;
428  GetSystemTime(&st);
429  return st.wSecond + 1000 * st.wMilliseconds;
430 #elif defined __APPLE__
431  return static_cast<int>(mach_absolute_time());
432 #else
433  timespec ts;
434  clock_gettime(CLOCK_REALTIME, &ts);
435  return static_cast<int>(ts.tv_nsec);
436 #endif
437 }
438 #endif
439 }
440 
441 #if !defined (EIGEN_USE_GPU) || !defined(__CUDACC__) || !defined(__CUDA_ARCH__)
442 // We're not compiling a cuda kernel
443 template <typename T> class UniformRandomGenerator {
444 
445  public:
446  static const bool PacketAccess = true;
447 
448  UniformRandomGenerator(bool deterministic = true) : m_deterministic(deterministic) {
449  if (!deterministic) {
450  srand(get_random_seed());
451  }
452  }
453  UniformRandomGenerator(const UniformRandomGenerator& other) {
454  m_deterministic = other.m_deterministic;
455  }
456 
457  template<typename Index>
458  T operator()(Index) const {
459  return random<T>();
460  }
461  template<typename Index, typename PacketType>
462  PacketType packetOp(Index) const {
463  const int packetSize = internal::unpacket_traits<PacketType>::size;
464  EIGEN_ALIGN_MAX T values[packetSize];
465  for (int i = 0; i < packetSize; ++i) {
466  values[i] = random<T>();
467  }
468  return internal::pload<PacketType>(values);
469  }
470 
471  private:
472  bool m_deterministic;
473 };
474 
475 #if __cplusplus > 199711 || EIGEN_COMP_MSVC >= 1900
476 template <> class UniformRandomGenerator<float> {
477  public:
478  static const bool PacketAccess = true;
479 
480  UniformRandomGenerator(bool deterministic = true) : m_deterministic(deterministic), m_generator(new std::mt19937()) {
481  if (!deterministic) {
482  m_generator->seed(get_random_seed());
483  }
484  }
485  UniformRandomGenerator(const UniformRandomGenerator<float>& other) {
486  m_generator = new std::mt19937();
487  m_generator->seed(other(0) * UINT_MAX);
488  m_deterministic = other.m_deterministic;
489  }
490  ~UniformRandomGenerator() {
491  delete m_generator;
492  }
493 
494  template<typename Index>
495  float operator()(Index) const {
496  return m_distribution(*m_generator);
497  }
498  template<typename Index, typename PacketType>
499  PacketType packetOp(Index i) const {
500  const int packetSize = internal::unpacket_traits<PacketType>::size;
501  EIGEN_ALIGN_MAX float values[packetSize];
502  for (int k = 0; k < packetSize; ++k) {
503  values[k] = this->operator()(i);
504  }
505  return internal::pload<PacketType>(values);
506  }
507 
508  private:
509  UniformRandomGenerator& operator = (const UniformRandomGenerator&);
510  // Make sure m_deterministic comes first to match the layout of the cpu
511  // version of the code.
512  bool m_deterministic;
513  std::mt19937* m_generator;
514  mutable std::uniform_real_distribution<float> m_distribution;
515 };
516 
517 template <> class UniformRandomGenerator<double> {
518  public:
519  static const bool PacketAccess = true;
520 
521  UniformRandomGenerator(bool deterministic = true) : m_deterministic(deterministic), m_generator(new std::mt19937()) {
522  if (!deterministic) {
523  m_generator->seed(get_random_seed());
524  }
525  }
526  UniformRandomGenerator(const UniformRandomGenerator<double>& other) {
527  m_generator = new std::mt19937();
528  m_generator->seed(other(0) * UINT_MAX);
529  m_deterministic = other.m_deterministic;
530  }
531  ~UniformRandomGenerator() {
532  delete m_generator;
533  }
534 
535  template<typename Index>
536  double operator()(Index) const {
537  return m_distribution(*m_generator);
538  }
539  template<typename Index, typename PacketType>
540  PacketType packetOp(Index i) const {
541  const int packetSize = internal::unpacket_traits<PacketType>::size;
542  EIGEN_ALIGN_MAX double values[packetSize];
543  for (int k = 0; k < packetSize; ++k) {
544  values[k] = this->operator()(i);
545  }
546  return internal::pload<PacketType>(values);
547  }
548 
549  private:
550  UniformRandomGenerator& operator = (const UniformRandomGenerator&);
551  // Make sure m_deterministic comes first to match the layout of the cpu
552  // version of the code.
553  bool m_deterministic;
554  std::mt19937* m_generator;
555  mutable std::uniform_real_distribution<double> m_distribution;
556 };
557 #endif
558 
559 #else
560 
561 // We're compiling a cuda kernel
562 template <typename T> class UniformRandomGenerator;
563 
564 template <> class UniformRandomGenerator<float> {
565  public:
566  static const bool PacketAccess = true;
567 
568  __device__ UniformRandomGenerator(bool deterministic = true) : m_deterministic(deterministic) {
569  const int tid = blockIdx.x * blockDim.x + threadIdx.x;
570  const int seed = deterministic ? 0 : get_random_seed();
571  curand_init(seed, tid, 0, &m_state);
572  }
573 
574  __device__ UniformRandomGenerator(const UniformRandomGenerator& other) {
575  m_deterministic = other.m_deterministic;
576  const int tid = blockIdx.x * blockDim.x + threadIdx.x;
577  const int seed = m_deterministic ? 0 : get_random_seed();
578  curand_init(seed, tid, 0, &m_state);
579  }
580 
581  template<typename Index>
582  __device__ float operator()(Index) const {
583  return curand_uniform(&m_state);
584  }
585  template<typename Index, typename PacketType>
586  __device__ float4 packetOp(Index) const {
587  EIGEN_STATIC_ASSERT((is_same<PacketType, float4>::value), YOU_MADE_A_PROGRAMMING_MISTAKE);
588  return curand_uniform4(&m_state);
589  }
590 
591  private:
592  bool m_deterministic;
593  mutable curandStatePhilox4_32_10_t m_state;
594 };
595 
596 template <> class UniformRandomGenerator<double> {
597  public:
598  static const bool PacketAccess = true;
599 
600  __device__ UniformRandomGenerator(bool deterministic = true) : m_deterministic(deterministic) {
601  const int tid = blockIdx.x * blockDim.x + threadIdx.x;
602  const int seed = deterministic ? 0 : get_random_seed();
603  curand_init(seed, tid, 0, &m_state);
604  }
605  __device__ UniformRandomGenerator(const UniformRandomGenerator& other) {
606  m_deterministic = other.m_deterministic;
607  const int tid = blockIdx.x * blockDim.x + threadIdx.x;
608  const int seed = m_deterministic ? 0 : get_random_seed();
609  curand_init(seed, tid, 0, &m_state);
610  }
611  template<typename Index>
612  __device__ double operator()(Index) const {
613  return curand_uniform_double(&m_state);
614  }
615  template<typename Index, typename PacketType>
616  __device__ double2 packetOp(Index) const {
617  EIGEN_STATIC_ASSERT((is_same<PacketType, double2>::value), YOU_MADE_A_PROGRAMMING_MISTAKE);
618  return curand_uniform2_double(&m_state);
619  }
620 
621  private:
622  bool m_deterministic;
623  mutable curandStatePhilox4_32_10_t m_state;
624 };
625 
626 template <> class UniformRandomGenerator<std::complex<float> > {
627  public:
628  static const bool PacketAccess = false;
629 
630  __device__ UniformRandomGenerator(bool deterministic = true) : m_deterministic(deterministic) {
631  const int tid = blockIdx.x * blockDim.x + threadIdx.x;
632  const int seed = deterministic ? 0 : get_random_seed();
633  curand_init(seed, tid, 0, &m_state);
634  }
635  __device__ UniformRandomGenerator(const UniformRandomGenerator& other) {
636  m_deterministic = other.m_deterministic;
637  const int tid = blockIdx.x * blockDim.x + threadIdx.x;
638  const int seed = m_deterministic ? 0 : get_random_seed();
639  curand_init(seed, tid, 0, &m_state);
640  }
641  template<typename Index>
642  __device__ std::complex<float> operator()(Index) const {
643  float4 vals = curand_uniform4(&m_state);
644  return std::complex<float>(vals.x, vals.y);
645  }
646 
647  private:
648  bool m_deterministic;
649  mutable curandStatePhilox4_32_10_t m_state;
650 };
651 
652 template <> class UniformRandomGenerator<std::complex<double> > {
653  public:
654  static const bool PacketAccess = false;
655 
656  __device__ UniformRandomGenerator(bool deterministic = true) : m_deterministic(deterministic) {
657  const int tid = blockIdx.x * blockDim.x + threadIdx.x;
658  const int seed = deterministic ? 0 : get_random_seed();
659  curand_init(seed, tid, 0, &m_state);
660  }
661  __device__ UniformRandomGenerator(const UniformRandomGenerator& other) {
662  m_deterministic = other.m_deterministic;
663  const int tid = blockIdx.x * blockDim.x + threadIdx.x;
664  const int seed = m_deterministic ? 0 : get_random_seed();
665  curand_init(seed, tid, 0, &m_state);
666  }
667  template<typename Index>
668  __device__ std::complex<double> operator()(Index) const {
669  double2 vals = curand_uniform2_double(&m_state);
670  return std::complex<double>(vals.x, vals.y);
671  }
672 
673  private:
674  bool m_deterministic;
675  mutable curandStatePhilox4_32_10_t m_state;
676 };
677 
678 #endif
679 
680 template <typename Scalar>
681 struct functor_traits<UniformRandomGenerator<Scalar> > {
682  enum {
683  // Rough estimate.
684  Cost = 100 * NumTraits<Scalar>::MulCost,
685  PacketAccess = UniformRandomGenerator<Scalar>::PacketAccess
686  };
687 };
688 
689 
690 
691 #if (!defined (EIGEN_USE_GPU) || !defined(__CUDACC__) || !defined(__CUDA_ARCH__)) && (__cplusplus > 199711 || EIGEN_COMP_MSVC >= 1900)
692 // We're not compiling a cuda kernel
693 template <typename T> class NormalRandomGenerator {
694  public:
695  static const bool PacketAccess = true;
696 
697  NormalRandomGenerator(bool deterministic = true) : m_deterministic(deterministic), m_distribution(0, 1), m_generator(new std::mt19937()) {
698  if (!deterministic) {
699  m_generator->seed(get_random_seed());
700  }
701  }
702  NormalRandomGenerator(const NormalRandomGenerator& other)
703  : m_deterministic(other.m_deterministic), m_distribution(other.m_distribution), m_generator(new std::mt19937()) {
704  m_generator->seed(other(0) * UINT_MAX);
705  }
706  ~NormalRandomGenerator() {
707  delete m_generator;
708  }
709  template<typename Index>
710  T operator()(Index) const {
711  return m_distribution(*m_generator);
712  }
713  template<typename Index, typename PacketType>
714  PacketType packetOp(Index) const {
715  const int packetSize = internal::unpacket_traits<PacketType>::size;
716  EIGEN_ALIGN_MAX T values[packetSize];
717  for (int i = 0; i < packetSize; ++i) {
718  values[i] = m_distribution(*m_generator);
719  }
720  return internal::pload<PacketType>(values);
721  }
722 
723  private:
724  // No assignment
725  NormalRandomGenerator& operator = (const NormalRandomGenerator&);
726 
727  bool m_deterministic;
728  mutable std::normal_distribution<T> m_distribution;
729  std::mt19937* m_generator;
730 };
731 
732 #elif defined (EIGEN_USE_GPU) && defined(__CUDACC__) && defined(__CUDA_ARCH__)
733 
734 // We're compiling a cuda kernel
735 template <typename T> class NormalRandomGenerator;
736 
737 template <> class NormalRandomGenerator<float> {
738  public:
739  static const bool PacketAccess = true;
740 
741  __device__ NormalRandomGenerator(bool deterministic = true) : m_deterministic(deterministic) {
742  const int tid = blockIdx.x * blockDim.x + threadIdx.x;
743  const int seed = deterministic ? 0 : get_random_seed();
744  curand_init(seed, tid, 0, &m_state);
745  }
746  __device__ NormalRandomGenerator(const NormalRandomGenerator<float>& other) {
747  m_deterministic = other.m_deterministic;
748  const int tid = blockIdx.x * blockDim.x + threadIdx.x;
749  const int seed = m_deterministic ? 0 : get_random_seed();
750  curand_init(seed, tid, 0, &m_state);
751  }
752  template<typename Index>
753  __device__ float operator()(Index) const {
754  return curand_normal(&m_state);
755  }
756  template<typename Index, typename PacketType>
757  __device__ float4 packetOp(Index) const {
758  EIGEN_STATIC_ASSERT((is_same<PacketType, float4>::value), YOU_MADE_A_PROGRAMMING_MISTAKE);
759  return curand_normal4(&m_state);
760  }
761 
762  private:
763  bool m_deterministic;
764  mutable curandStatePhilox4_32_10_t m_state;
765 };
766 
767 template <> class NormalRandomGenerator<double> {
768  public:
769  static const bool PacketAccess = true;
770 
771  __device__ NormalRandomGenerator(bool deterministic = true) : m_deterministic(deterministic) {
772  const int tid = blockIdx.x * blockDim.x + threadIdx.x;
773  const int seed = deterministic ? 0 : get_random_seed();
774  curand_init(seed, tid, 0, &m_state);
775  }
776  __device__ NormalRandomGenerator(const NormalRandomGenerator<double>& other) {
777  m_deterministic = other.m_deterministic;
778  const int tid = blockIdx.x * blockDim.x + threadIdx.x;
779  const int seed = m_deterministic ? 0 : get_random_seed();
780  curand_init(seed, tid, 0, &m_state);
781  }
782  template<typename Index>
783  __device__ double operator()(Index) const {
784  return curand_normal_double(&m_state);
785  }
786  template<typename Index, typename PacketType>
787  __device__ double2 packetOp(Index) const {
788  EIGEN_STATIC_ASSERT((is_same<PacketType, double2>::value), YOU_MADE_A_PROGRAMMING_MISTAKE);
789  return curand_normal2_double(&m_state);
790  }
791 
792  private:
793  bool m_deterministic;
794  mutable curandStatePhilox4_32_10_t m_state;
795 };
796 
797 template <> class NormalRandomGenerator<std::complex<float> > {
798  public:
799  static const bool PacketAccess = false;
800 
801  __device__ NormalRandomGenerator(bool deterministic = true) : m_deterministic(deterministic) {
802  const int tid = blockIdx.x * blockDim.x + threadIdx.x;
803  const int seed = deterministic ? 0 : get_random_seed();
804  curand_init(seed, tid, 0, &m_state);
805  }
806  __device__ NormalRandomGenerator(const NormalRandomGenerator& other) {
807  m_deterministic = other.m_deterministic;
808  const int tid = blockIdx.x * blockDim.x + threadIdx.x;
809  const int seed = m_deterministic ? 0 : get_random_seed();
810  curand_init(seed, tid, 0, &m_state);
811  }
812  template<typename Index>
813  __device__ std::complex<float> operator()(Index) const {
814  float4 vals = curand_normal4(&m_state);
815  return std::complex<float>(vals.x, vals.y);
816  }
817 
818  private:
819  bool m_deterministic;
820  mutable curandStatePhilox4_32_10_t m_state;
821 };
822 
823 template <> class NormalRandomGenerator<std::complex<double> > {
824  public:
825  static const bool PacketAccess = false;
826 
827  __device__ NormalRandomGenerator(bool deterministic = true) : m_deterministic(deterministic) {
828  const int tid = blockIdx.x * blockDim.x + threadIdx.x;
829  const int seed = deterministic ? 0 : get_random_seed();
830  curand_init(seed, tid, 0, &m_state);
831  }
832  __device__ NormalRandomGenerator(const NormalRandomGenerator& other) {
833  m_deterministic = other.m_deterministic;
834  const int tid = blockIdx.x * blockDim.x + threadIdx.x;
835  const int seed = m_deterministic ? 0 : get_random_seed();
836  curand_init(seed, tid, 0, &m_state);
837  }
838  template<typename Index>
839  __device__ std::complex<double> operator()(Index) const {
840  double2 vals = curand_normal2_double(&m_state);
841  return std::complex<double>(vals.x, vals.y);
842  }
843 
844  private:
845  bool m_deterministic;
846  mutable curandStatePhilox4_32_10_t m_state;
847 };
848 
849 #else
850 
851 template <typename T> class NormalRandomGenerator {
852  public:
853  static const bool PacketAccess = false;
854  NormalRandomGenerator(bool deterministic = true) : m_deterministic(deterministic) {}
855 
856  private:
857  bool m_deterministic;
858 };
859 
860 #endif
861 
862 template <typename Scalar>
863 struct functor_traits<NormalRandomGenerator<Scalar> > {
864  enum {
865  // Rough estimate.
866  Cost = 100 * NumTraits<Scalar>::MulCost,
867  PacketAccess = NormalRandomGenerator<Scalar>::PacketAccess
868  };
869 };
870 
871 
872 template <typename T, typename Index, size_t NumDims>
873 class GaussianGenerator {
874  public:
875  static const bool PacketAccess = false;
876 
877  EIGEN_DEVICE_FUNC GaussianGenerator(const array<T, NumDims>& means,
878  const array<T, NumDims>& std_devs)
879  : m_means(means)
880  {
881  for (size_t i = 0; i < NumDims; ++i) {
882  m_two_sigmas[i] = std_devs[i] * std_devs[i] * 2;
883  }
884  }
885 
886  T operator()(const array<Index, NumDims>& coordinates) const {
887  T tmp = T(0);
888  for (size_t i = 0; i < NumDims; ++i) {
889  T offset = coordinates[i] - m_means[i];
890  tmp += offset * offset / m_two_sigmas[i];
891  }
892  return numext::exp(-tmp);
893  }
894 
895  private:
896  array<T, NumDims> m_means;
897  array<T, NumDims> m_two_sigmas;
898 };
899 
900 template <typename T, typename Index, size_t NumDims>
901 struct functor_traits<GaussianGenerator<T, Index, NumDims> > {
902  enum {
903  Cost = NumDims * (2 * NumTraits<T>::AddCost + NumTraits<T>::MulCost +
904  functor_traits<scalar_quotient_op<T, T> >::Cost) +
905  functor_traits<scalar_exp_op<T> >::Cost,
906  PacketAccess = GaussianGenerator<T, Index, NumDims>::PacketAccess
907  };
908 };
909 
910 } // end namespace internal
911 } // end namespace Eigen
912 
913 #endif // EIGEN_CXX11_TENSOR_TENSOR_FUNCTORS_H
Namespace containing all symbols from the Eigen library.
Definition: AdolcForward:45