Eigen  3.2.93
AssignEvaluator.h
1 // This file is part of Eigen, a lightweight C++ template library
2 // for linear algebra.
3 //
4 // Copyright (C) 2011 Benoit Jacob <jacob.benoit.1@gmail.com>
5 // Copyright (C) 2011-2014 Gael Guennebaud <gael.guennebaud@inria.fr>
6 // Copyright (C) 2011-2012 Jitse Niesen <jitse@maths.leeds.ac.uk>
7 //
8 // This Source Code Form is subject to the terms of the Mozilla
9 // Public License v. 2.0. If a copy of the MPL was not distributed
10 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
11 
12 #ifndef EIGEN_ASSIGN_EVALUATOR_H
13 #define EIGEN_ASSIGN_EVALUATOR_H
14 
15 namespace Eigen {
16 
17 // This implementation is based on Assign.h
18 
19 namespace internal {
20 
21 /***************************************************************************
22 * Part 1 : the logic deciding a strategy for traversal and unrolling *
23 ***************************************************************************/
24 
25 // copy_using_evaluator_traits is based on assign_traits
26 
27 template <typename DstEvaluator, typename SrcEvaluator, typename AssignFunc>
28 struct copy_using_evaluator_traits
29 {
30  typedef typename DstEvaluator::XprType Dst;
31  typedef typename Dst::Scalar DstScalar;
32 
33  enum {
34  DstFlags = DstEvaluator::Flags,
35  SrcFlags = SrcEvaluator::Flags
36  };
37 
38 public:
39  enum {
40  DstAlignment = DstEvaluator::Alignment,
41  SrcAlignment = SrcEvaluator::Alignment,
42  DstHasDirectAccess = DstFlags & DirectAccessBit,
43  JointAlignment = EIGEN_PLAIN_ENUM_MIN(DstAlignment,SrcAlignment)
44  };
45 
46 private:
47  enum {
48  InnerSize = int(Dst::IsVectorAtCompileTime) ? int(Dst::SizeAtCompileTime)
49  : int(DstFlags)&RowMajorBit ? int(Dst::ColsAtCompileTime)
50  : int(Dst::RowsAtCompileTime),
51  InnerMaxSize = int(Dst::IsVectorAtCompileTime) ? int(Dst::MaxSizeAtCompileTime)
52  : int(DstFlags)&RowMajorBit ? int(Dst::MaxColsAtCompileTime)
53  : int(Dst::MaxRowsAtCompileTime),
54  OuterStride = int(outer_stride_at_compile_time<Dst>::ret),
55  MaxSizeAtCompileTime = Dst::SizeAtCompileTime
56  };
57 
58  // TODO distinguish between linear traversal and inner-traversals
59  typedef typename find_best_packet<DstScalar,Dst::SizeAtCompileTime>::type LinearPacketType;
60  typedef typename find_best_packet<DstScalar,InnerSize>::type InnerPacketType;
61 
62  enum {
63  LinearPacketSize = unpacket_traits<LinearPacketType>::size,
64  InnerPacketSize = unpacket_traits<InnerPacketType>::size
65  };
66 
67 public:
68  enum {
69  LinearRequiredAlignment = unpacket_traits<LinearPacketType>::alignment,
70  InnerRequiredAlignment = unpacket_traits<InnerPacketType>::alignment
71  };
72 
73 private:
74  enum {
75  DstIsRowMajor = DstFlags&RowMajorBit,
76  SrcIsRowMajor = SrcFlags&RowMajorBit,
77  StorageOrdersAgree = (int(DstIsRowMajor) == int(SrcIsRowMajor)),
78  MightVectorize = bool(StorageOrdersAgree)
79  && (int(DstFlags) & int(SrcFlags) & ActualPacketAccessBit)
80  && bool(functor_traits<AssignFunc>::PacketAccess),
81  MayInnerVectorize = MightVectorize
82  && int(InnerSize)!=Dynamic && int(InnerSize)%int(InnerPacketSize)==0
83  && int(OuterStride)!=Dynamic && int(OuterStride)%int(InnerPacketSize)==0
84  && (EIGEN_UNALIGNED_VECTORIZE || int(JointAlignment)>=int(InnerRequiredAlignment)),
85  MayLinearize = bool(StorageOrdersAgree) && (int(DstFlags) & int(SrcFlags) & LinearAccessBit),
86  MayLinearVectorize = bool(MightVectorize) && MayLinearize && DstHasDirectAccess
87  && (EIGEN_UNALIGNED_VECTORIZE || (int(DstAlignment)>=int(LinearRequiredAlignment)) || MaxSizeAtCompileTime == Dynamic),
88  /* If the destination isn't aligned, we have to do runtime checks and we don't unroll,
89  so it's only good for large enough sizes. */
90  MaySliceVectorize = bool(MightVectorize) && bool(DstHasDirectAccess)
91  && (int(InnerMaxSize)==Dynamic || int(InnerMaxSize)>=3*InnerPacketSize)
92  /* slice vectorization can be slow, so we only want it if the slices are big, which is
93  indicated by InnerMaxSize rather than InnerSize, think of the case of a dynamic block
94  in a fixed-size matrix */
95  };
96 
97 public:
98  enum {
99  Traversal = int(MayLinearVectorize) && (LinearPacketSize>InnerPacketSize) ? int(LinearVectorizedTraversal)
100  : int(MayInnerVectorize) ? int(InnerVectorizedTraversal)
101  : int(MayLinearVectorize) ? int(LinearVectorizedTraversal)
102  : int(MaySliceVectorize) ? int(SliceVectorizedTraversal)
103  : int(MayLinearize) ? int(LinearTraversal)
104  : int(DefaultTraversal),
105  Vectorized = int(Traversal) == InnerVectorizedTraversal
106  || int(Traversal) == LinearVectorizedTraversal
107  || int(Traversal) == SliceVectorizedTraversal
108  };
109 
110  typedef typename conditional<int(Traversal)==LinearVectorizedTraversal, LinearPacketType, InnerPacketType>::type PacketType;
111 
112 private:
113  enum {
114  ActualPacketSize = int(Traversal)==LinearVectorizedTraversal ? LinearPacketSize
115  : Vectorized ? InnerPacketSize
116  : 1,
117  UnrollingLimit = EIGEN_UNROLLING_LIMIT * ActualPacketSize,
118  MayUnrollCompletely = int(Dst::SizeAtCompileTime) != Dynamic
119  && int(Dst::SizeAtCompileTime) * (int(DstEvaluator::CoeffReadCost)+int(SrcEvaluator::CoeffReadCost)) <= int(UnrollingLimit),
120  MayUnrollInner = int(InnerSize) != Dynamic
121  && int(InnerSize) * (int(DstEvaluator::CoeffReadCost)+int(SrcEvaluator::CoeffReadCost)) <= int(UnrollingLimit)
122  };
123 
124 public:
125  enum {
126  Unrolling = (int(Traversal) == int(InnerVectorizedTraversal) || int(Traversal) == int(DefaultTraversal))
127  ? (
128  int(MayUnrollCompletely) ? int(CompleteUnrolling)
129  : int(MayUnrollInner) ? int(InnerUnrolling)
130  : int(NoUnrolling)
131  )
132  : int(Traversal) == int(LinearVectorizedTraversal)
133  ? ( bool(MayUnrollCompletely) && ( EIGEN_UNALIGNED_VECTORIZE || (int(DstAlignment)>=int(LinearRequiredAlignment)))
134  ? int(CompleteUnrolling)
135  : int(NoUnrolling) )
136  : int(Traversal) == int(LinearTraversal)
137  ? ( bool(MayUnrollCompletely) ? int(CompleteUnrolling)
138  : int(NoUnrolling) )
139  : int(NoUnrolling)
140  };
141 
142 #ifdef EIGEN_DEBUG_ASSIGN
143  static void debug()
144  {
145  std::cerr << "DstXpr: " << typeid(typename DstEvaluator::XprType).name() << std::endl;
146  std::cerr << "SrcXpr: " << typeid(typename SrcEvaluator::XprType).name() << std::endl;
147  std::cerr.setf(std::ios::hex, std::ios::basefield);
148  std::cerr << "DstFlags" << " = " << DstFlags << " (" << demangle_flags(DstFlags) << " )" << std::endl;
149  std::cerr << "SrcFlags" << " = " << SrcFlags << " (" << demangle_flags(SrcFlags) << " )" << std::endl;
150  std::cerr.unsetf(std::ios::hex);
151  EIGEN_DEBUG_VAR(DstAlignment)
152  EIGEN_DEBUG_VAR(SrcAlignment)
153  EIGEN_DEBUG_VAR(LinearRequiredAlignment)
154  EIGEN_DEBUG_VAR(InnerRequiredAlignment)
155  EIGEN_DEBUG_VAR(JointAlignment)
156  EIGEN_DEBUG_VAR(InnerSize)
157  EIGEN_DEBUG_VAR(InnerMaxSize)
158  EIGEN_DEBUG_VAR(LinearPacketSize)
159  EIGEN_DEBUG_VAR(InnerPacketSize)
160  EIGEN_DEBUG_VAR(ActualPacketSize)
161  EIGEN_DEBUG_VAR(StorageOrdersAgree)
162  EIGEN_DEBUG_VAR(MightVectorize)
163  EIGEN_DEBUG_VAR(MayLinearize)
164  EIGEN_DEBUG_VAR(MayInnerVectorize)
165  EIGEN_DEBUG_VAR(MayLinearVectorize)
166  EIGEN_DEBUG_VAR(MaySliceVectorize)
167  std::cerr << "Traversal" << " = " << Traversal << " (" << demangle_traversal(Traversal) << ")" << std::endl;
168  EIGEN_DEBUG_VAR(SrcEvaluator::CoeffReadCost)
169  EIGEN_DEBUG_VAR(UnrollingLimit)
170  EIGEN_DEBUG_VAR(MayUnrollCompletely)
171  EIGEN_DEBUG_VAR(MayUnrollInner)
172  std::cerr << "Unrolling" << " = " << Unrolling << " (" << demangle_unrolling(Unrolling) << ")" << std::endl;
173  std::cerr << std::endl;
174  }
175 #endif
176 };
177 
178 /***************************************************************************
179 * Part 2 : meta-unrollers
180 ***************************************************************************/
181 
182 /************************
183 *** Default traversal ***
184 ************************/
185 
186 template<typename Kernel, int Index, int Stop>
187 struct copy_using_evaluator_DefaultTraversal_CompleteUnrolling
188 {
189  // FIXME: this is not very clean, perhaps this information should be provided by the kernel?
190  typedef typename Kernel::DstEvaluatorType DstEvaluatorType;
191  typedef typename DstEvaluatorType::XprType DstXprType;
192 
193  enum {
194  outer = Index / DstXprType::InnerSizeAtCompileTime,
195  inner = Index % DstXprType::InnerSizeAtCompileTime
196  };
197 
198  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
199  {
200  kernel.assignCoeffByOuterInner(outer, inner);
201  copy_using_evaluator_DefaultTraversal_CompleteUnrolling<Kernel, Index+1, Stop>::run(kernel);
202  }
203 };
204 
205 template<typename Kernel, int Stop>
206 struct copy_using_evaluator_DefaultTraversal_CompleteUnrolling<Kernel, Stop, Stop>
207 {
208  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel&) { }
209 };
210 
211 template<typename Kernel, int Index_, int Stop>
212 struct copy_using_evaluator_DefaultTraversal_InnerUnrolling
213 {
214  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel, Index outer)
215  {
216  kernel.assignCoeffByOuterInner(outer, Index_);
217  copy_using_evaluator_DefaultTraversal_InnerUnrolling<Kernel, Index_+1, Stop>::run(kernel, outer);
218  }
219 };
220 
221 template<typename Kernel, int Stop>
222 struct copy_using_evaluator_DefaultTraversal_InnerUnrolling<Kernel, Stop, Stop>
223 {
224  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel&, Index) { }
225 };
226 
227 /***********************
228 *** Linear traversal ***
229 ***********************/
230 
231 template<typename Kernel, int Index, int Stop>
232 struct copy_using_evaluator_LinearTraversal_CompleteUnrolling
233 {
234  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel& kernel)
235  {
236  kernel.assignCoeff(Index);
237  copy_using_evaluator_LinearTraversal_CompleteUnrolling<Kernel, Index+1, Stop>::run(kernel);
238  }
239 };
240 
241 template<typename Kernel, int Stop>
242 struct copy_using_evaluator_LinearTraversal_CompleteUnrolling<Kernel, Stop, Stop>
243 {
244  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel&) { }
245 };
246 
247 /**************************
248 *** Inner vectorization ***
249 **************************/
250 
251 template<typename Kernel, int Index, int Stop>
252 struct copy_using_evaluator_innervec_CompleteUnrolling
253 {
254  // FIXME: this is not very clean, perhaps this information should be provided by the kernel?
255  typedef typename Kernel::DstEvaluatorType DstEvaluatorType;
256  typedef typename DstEvaluatorType::XprType DstXprType;
257  typedef typename Kernel::PacketType PacketType;
258 
259  enum {
260  outer = Index / DstXprType::InnerSizeAtCompileTime,
261  inner = Index % DstXprType::InnerSizeAtCompileTime,
262  SrcAlignment = Kernel::AssignmentTraits::SrcAlignment,
263  DstAlignment = Kernel::AssignmentTraits::DstAlignment
264  };
265 
266  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
267  {
268  kernel.template assignPacketByOuterInner<DstAlignment, SrcAlignment, PacketType>(outer, inner);
269  enum { NextIndex = Index + unpacket_traits<PacketType>::size };
270  copy_using_evaluator_innervec_CompleteUnrolling<Kernel, NextIndex, Stop>::run(kernel);
271  }
272 };
273 
274 template<typename Kernel, int Stop>
275 struct copy_using_evaluator_innervec_CompleteUnrolling<Kernel, Stop, Stop>
276 {
277  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel&) { }
278 };
279 
280 template<typename Kernel, int Index_, int Stop>
281 struct copy_using_evaluator_innervec_InnerUnrolling
282 {
283  typedef typename Kernel::PacketType PacketType;
284  enum {
285  SrcAlignment = Kernel::AssignmentTraits::SrcAlignment,
286  DstAlignment = Kernel::AssignmentTraits::DstAlignment
287  };
288  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel, Index outer)
289  {
290  kernel.template assignPacketByOuterInner<DstAlignment, SrcAlignment, PacketType>(outer, Index_);
291  enum { NextIndex = Index_ + unpacket_traits<PacketType>::size };
292  copy_using_evaluator_innervec_InnerUnrolling<Kernel, NextIndex, Stop>::run(kernel, outer);
293  }
294 };
295 
296 template<typename Kernel, int Stop>
297 struct copy_using_evaluator_innervec_InnerUnrolling<Kernel, Stop, Stop>
298 {
299  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &, Index) { }
300 };
301 
302 /***************************************************************************
303 * Part 3 : implementation of all cases
304 ***************************************************************************/
305 
306 // dense_assignment_loop is based on assign_impl
307 
308 template<typename Kernel,
309  int Traversal = Kernel::AssignmentTraits::Traversal,
310  int Unrolling = Kernel::AssignmentTraits::Unrolling>
311 struct dense_assignment_loop;
312 
313 /************************
314 *** Default traversal ***
315 ************************/
316 
317 template<typename Kernel>
318 struct dense_assignment_loop<Kernel, DefaultTraversal, NoUnrolling>
319 {
320  EIGEN_DEVICE_FUNC static void EIGEN_STRONG_INLINE run(Kernel &kernel)
321  {
322  for(Index outer = 0; outer < kernel.outerSize(); ++outer) {
323  for(Index inner = 0; inner < kernel.innerSize(); ++inner) {
324  kernel.assignCoeffByOuterInner(outer, inner);
325  }
326  }
327  }
328 };
329 
330 template<typename Kernel>
331 struct dense_assignment_loop<Kernel, DefaultTraversal, CompleteUnrolling>
332 {
333  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
334  {
335  typedef typename Kernel::DstEvaluatorType::XprType DstXprType;
336  copy_using_evaluator_DefaultTraversal_CompleteUnrolling<Kernel, 0, DstXprType::SizeAtCompileTime>::run(kernel);
337  }
338 };
339 
340 template<typename Kernel>
341 struct dense_assignment_loop<Kernel, DefaultTraversal, InnerUnrolling>
342 {
343  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
344  {
345  typedef typename Kernel::DstEvaluatorType::XprType DstXprType;
346 
347  const Index outerSize = kernel.outerSize();
348  for(Index outer = 0; outer < outerSize; ++outer)
349  copy_using_evaluator_DefaultTraversal_InnerUnrolling<Kernel, 0, DstXprType::InnerSizeAtCompileTime>::run(kernel, outer);
350  }
351 };
352 
353 /***************************
354 *** Linear vectorization ***
355 ***************************/
356 
357 
358 // The goal of unaligned_dense_assignment_loop is simply to factorize the handling
359 // of the non vectorizable beginning and ending parts
360 
361 template <bool IsAligned = false>
362 struct unaligned_dense_assignment_loop
363 {
364  // if IsAligned = true, then do nothing
365  template <typename Kernel>
366  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel&, Index, Index) {}
367 };
368 
369 template <>
370 struct unaligned_dense_assignment_loop<false>
371 {
372  // MSVC must not inline this functions. If it does, it fails to optimize the
373  // packet access path.
374  // FIXME check which version exhibits this issue
375 #if EIGEN_COMP_MSVC
376  template <typename Kernel>
377  static EIGEN_DONT_INLINE void run(Kernel &kernel,
378  Index start,
379  Index end)
380 #else
381  template <typename Kernel>
382  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel,
383  Index start,
384  Index end)
385 #endif
386  {
387  for (Index index = start; index < end; ++index)
388  kernel.assignCoeff(index);
389  }
390 };
391 
392 template<typename Kernel>
393 struct dense_assignment_loop<Kernel, LinearVectorizedTraversal, NoUnrolling>
394 {
395  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
396  {
397  const Index size = kernel.size();
398  typedef typename Kernel::Scalar Scalar;
399  typedef typename Kernel::PacketType PacketType;
400  enum {
401  requestedAlignment = Kernel::AssignmentTraits::LinearRequiredAlignment,
402  packetSize = unpacket_traits<PacketType>::size,
403  dstIsAligned = int(Kernel::AssignmentTraits::DstAlignment)>=int(requestedAlignment),
404  dstAlignment = packet_traits<Scalar>::AlignedOnScalar ? int(requestedAlignment)
405  : int(Kernel::AssignmentTraits::DstAlignment),
406  srcAlignment = Kernel::AssignmentTraits::JointAlignment
407  };
408  const Index alignedStart = dstIsAligned ? 0 : internal::first_aligned<requestedAlignment>(&kernel.dstEvaluator().coeffRef(0), size);
409  const Index alignedEnd = alignedStart + ((size-alignedStart)/packetSize)*packetSize;
410 
411  unaligned_dense_assignment_loop<dstIsAligned!=0>::run(kernel, 0, alignedStart);
412 
413  for(Index index = alignedStart; index < alignedEnd; index += packetSize)
414  kernel.template assignPacket<dstAlignment, srcAlignment, PacketType>(index);
415 
416  unaligned_dense_assignment_loop<>::run(kernel, alignedEnd, size);
417  }
418 };
419 
420 template<typename Kernel>
421 struct dense_assignment_loop<Kernel, LinearVectorizedTraversal, CompleteUnrolling>
422 {
423  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
424  {
425  typedef typename Kernel::DstEvaluatorType::XprType DstXprType;
426 
427  enum { size = DstXprType::SizeAtCompileTime,
428  packetSize = packet_traits<typename Kernel::Scalar>::size,
429  alignedSize = (size/packetSize)*packetSize };
430 
431  copy_using_evaluator_innervec_CompleteUnrolling<Kernel, 0, alignedSize>::run(kernel);
432  copy_using_evaluator_DefaultTraversal_CompleteUnrolling<Kernel, alignedSize, size>::run(kernel);
433  }
434 };
435 
436 /**************************
437 *** Inner vectorization ***
438 **************************/
439 
440 template<typename Kernel>
441 struct dense_assignment_loop<Kernel, InnerVectorizedTraversal, NoUnrolling>
442 {
443  typedef typename Kernel::PacketType PacketType;
444  enum {
445  SrcAlignment = Kernel::AssignmentTraits::SrcAlignment,
446  DstAlignment = Kernel::AssignmentTraits::DstAlignment
447  };
448  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
449  {
450  const Index innerSize = kernel.innerSize();
451  const Index outerSize = kernel.outerSize();
452  const Index packetSize = unpacket_traits<PacketType>::size;
453  for(Index outer = 0; outer < outerSize; ++outer)
454  for(Index inner = 0; inner < innerSize; inner+=packetSize)
455  kernel.template assignPacketByOuterInner<DstAlignment, SrcAlignment, PacketType>(outer, inner);
456  }
457 };
458 
459 template<typename Kernel>
460 struct dense_assignment_loop<Kernel, InnerVectorizedTraversal, CompleteUnrolling>
461 {
462  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
463  {
464  typedef typename Kernel::DstEvaluatorType::XprType DstXprType;
465  copy_using_evaluator_innervec_CompleteUnrolling<Kernel, 0, DstXprType::SizeAtCompileTime>::run(kernel);
466  }
467 };
468 
469 template<typename Kernel>
470 struct dense_assignment_loop<Kernel, InnerVectorizedTraversal, InnerUnrolling>
471 {
472  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
473  {
474  typedef typename Kernel::DstEvaluatorType::XprType DstXprType;
475  const Index outerSize = kernel.outerSize();
476  for(Index outer = 0; outer < outerSize; ++outer)
477  copy_using_evaluator_innervec_InnerUnrolling<Kernel, 0, DstXprType::InnerSizeAtCompileTime>::run(kernel, outer);
478  }
479 };
480 
481 /***********************
482 *** Linear traversal ***
483 ***********************/
484 
485 template<typename Kernel>
486 struct dense_assignment_loop<Kernel, LinearTraversal, NoUnrolling>
487 {
488  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
489  {
490  const Index size = kernel.size();
491  for(Index i = 0; i < size; ++i)
492  kernel.assignCoeff(i);
493  }
494 };
495 
496 template<typename Kernel>
497 struct dense_assignment_loop<Kernel, LinearTraversal, CompleteUnrolling>
498 {
499  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
500  {
501  typedef typename Kernel::DstEvaluatorType::XprType DstXprType;
502  copy_using_evaluator_LinearTraversal_CompleteUnrolling<Kernel, 0, DstXprType::SizeAtCompileTime>::run(kernel);
503  }
504 };
505 
506 /**************************
507 *** Slice vectorization ***
508 ***************************/
509 
510 template<typename Kernel>
511 struct dense_assignment_loop<Kernel, SliceVectorizedTraversal, NoUnrolling>
512 {
513  EIGEN_DEVICE_FUNC static inline void run(Kernel &kernel)
514  {
515  typedef typename Kernel::Scalar Scalar;
516  typedef typename Kernel::PacketType PacketType;
517  enum {
518  packetSize = unpacket_traits<PacketType>::size,
519  requestedAlignment = int(Kernel::AssignmentTraits::InnerRequiredAlignment),
520  alignable = packet_traits<Scalar>::AlignedOnScalar || int(Kernel::AssignmentTraits::DstAlignment)>=sizeof(Scalar),
521  dstIsAligned = int(Kernel::AssignmentTraits::DstAlignment)>=int(requestedAlignment),
522  dstAlignment = alignable ? int(requestedAlignment)
523  : int(Kernel::AssignmentTraits::DstAlignment)
524  };
525  const Scalar *dst_ptr = &kernel.dstEvaluator().coeffRef(0,0);
526  if((!bool(dstIsAligned)) && (UIntPtr(dst_ptr) % sizeof(Scalar))>0)
527  {
528  // the pointer is not aligend-on scalar, so alignment is not possible
529  return dense_assignment_loop<Kernel,DefaultTraversal,NoUnrolling>::run(kernel);
530  }
531  const Index packetAlignedMask = packetSize - 1;
532  const Index innerSize = kernel.innerSize();
533  const Index outerSize = kernel.outerSize();
534  const Index alignedStep = alignable ? (packetSize - kernel.outerStride() % packetSize) & packetAlignedMask : 0;
535  Index alignedStart = ((!alignable) || bool(dstIsAligned)) ? 0 : internal::first_aligned<requestedAlignment>(dst_ptr, innerSize);
536 
537  for(Index outer = 0; outer < outerSize; ++outer)
538  {
539  const Index alignedEnd = alignedStart + ((innerSize-alignedStart) & ~packetAlignedMask);
540  // do the non-vectorizable part of the assignment
541  for(Index inner = 0; inner<alignedStart ; ++inner)
542  kernel.assignCoeffByOuterInner(outer, inner);
543 
544  // do the vectorizable part of the assignment
545  for(Index inner = alignedStart; inner<alignedEnd; inner+=packetSize)
546  kernel.template assignPacketByOuterInner<dstAlignment, Unaligned, PacketType>(outer, inner);
547 
548  // do the non-vectorizable part of the assignment
549  for(Index inner = alignedEnd; inner<innerSize ; ++inner)
550  kernel.assignCoeffByOuterInner(outer, inner);
551 
552  alignedStart = std::min<Index>((alignedStart+alignedStep)%packetSize, innerSize);
553  }
554  }
555 };
556 
557 /***************************************************************************
558 * Part 4 : Generic dense assignment kernel
559 ***************************************************************************/
560 
561 // This class generalize the assignment of a coefficient (or packet) from one dense evaluator
562 // to another dense writable evaluator.
563 // It is parametrized by the two evaluators, and the actual assignment functor.
564 // This abstraction level permits to keep the evaluation loops as simple and as generic as possible.
565 // One can customize the assignment using this generic dense_assignment_kernel with different
566 // functors, or by completely overloading it, by-passing a functor.
567 template<typename DstEvaluatorTypeT, typename SrcEvaluatorTypeT, typename Functor, int Version = Specialized>
568 class generic_dense_assignment_kernel
569 {
570 protected:
571  typedef typename DstEvaluatorTypeT::XprType DstXprType;
572  typedef typename SrcEvaluatorTypeT::XprType SrcXprType;
573 public:
574 
575  typedef DstEvaluatorTypeT DstEvaluatorType;
576  typedef SrcEvaluatorTypeT SrcEvaluatorType;
577  typedef typename DstEvaluatorType::Scalar Scalar;
578  typedef copy_using_evaluator_traits<DstEvaluatorTypeT, SrcEvaluatorTypeT, Functor> AssignmentTraits;
579  typedef typename AssignmentTraits::PacketType PacketType;
580 
581 
582  EIGEN_DEVICE_FUNC generic_dense_assignment_kernel(DstEvaluatorType &dst, const SrcEvaluatorType &src, const Functor &func, DstXprType& dstExpr)
583  : m_dst(dst), m_src(src), m_functor(func), m_dstExpr(dstExpr)
584  {
585  #ifdef EIGEN_DEBUG_ASSIGN
586  AssignmentTraits::debug();
587  #endif
588  }
589 
590  EIGEN_DEVICE_FUNC Index size() const { return m_dstExpr.size(); }
591  EIGEN_DEVICE_FUNC Index innerSize() const { return m_dstExpr.innerSize(); }
592  EIGEN_DEVICE_FUNC Index outerSize() const { return m_dstExpr.outerSize(); }
593  EIGEN_DEVICE_FUNC Index rows() const { return m_dstExpr.rows(); }
594  EIGEN_DEVICE_FUNC Index cols() const { return m_dstExpr.cols(); }
595  EIGEN_DEVICE_FUNC Index outerStride() const { return m_dstExpr.outerStride(); }
596 
597  EIGEN_DEVICE_FUNC DstEvaluatorType& dstEvaluator() { return m_dst; }
598  EIGEN_DEVICE_FUNC const SrcEvaluatorType& srcEvaluator() const { return m_src; }
599 
601  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(Index row, Index col)
602  {
603  m_functor.assignCoeff(m_dst.coeffRef(row,col), m_src.coeff(row,col));
604  }
605 
607  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(Index index)
608  {
609  m_functor.assignCoeff(m_dst.coeffRef(index), m_src.coeff(index));
610  }
611 
613  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeffByOuterInner(Index outer, Index inner)
614  {
615  Index row = rowIndexByOuterInner(outer, inner);
616  Index col = colIndexByOuterInner(outer, inner);
617  assignCoeff(row, col);
618  }
619 
620 
621  template<int StoreMode, int LoadMode, typename PacketType>
622  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignPacket(Index row, Index col)
623  {
624  m_functor.template assignPacket<StoreMode>(&m_dst.coeffRef(row,col), m_src.template packet<LoadMode,PacketType>(row,col));
625  }
626 
627  template<int StoreMode, int LoadMode, typename PacketType>
628  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignPacket(Index index)
629  {
630  m_functor.template assignPacket<StoreMode>(&m_dst.coeffRef(index), m_src.template packet<LoadMode,PacketType>(index));
631  }
632 
633  template<int StoreMode, int LoadMode, typename PacketType>
634  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignPacketByOuterInner(Index outer, Index inner)
635  {
636  Index row = rowIndexByOuterInner(outer, inner);
637  Index col = colIndexByOuterInner(outer, inner);
638  assignPacket<StoreMode,LoadMode,PacketType>(row, col);
639  }
640 
641  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Index rowIndexByOuterInner(Index outer, Index inner)
642  {
643  typedef typename DstEvaluatorType::ExpressionTraits Traits;
644  return int(Traits::RowsAtCompileTime) == 1 ? 0
645  : int(Traits::ColsAtCompileTime) == 1 ? inner
646  : int(DstEvaluatorType::Flags)&RowMajorBit ? outer
647  : inner;
648  }
649 
650  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Index colIndexByOuterInner(Index outer, Index inner)
651  {
652  typedef typename DstEvaluatorType::ExpressionTraits Traits;
653  return int(Traits::ColsAtCompileTime) == 1 ? 0
654  : int(Traits::RowsAtCompileTime) == 1 ? inner
655  : int(DstEvaluatorType::Flags)&RowMajorBit ? inner
656  : outer;
657  }
658 
659 protected:
660  DstEvaluatorType& m_dst;
661  const SrcEvaluatorType& m_src;
662  const Functor &m_functor;
663  // TODO find a way to avoid the needs of the original expression
664  DstXprType& m_dstExpr;
665 };
666 
667 /***************************************************************************
668 * Part 5 : Entry point for dense rectangular assignment
669 ***************************************************************************/
670 
671 template<typename DstXprType, typename SrcXprType, typename Functor>
672 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void call_dense_assignment_loop(const DstXprType& dst, const SrcXprType& src, const Functor &func)
673 {
674  eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
675 
676  typedef evaluator<DstXprType> DstEvaluatorType;
677  typedef evaluator<SrcXprType> SrcEvaluatorType;
678 
679  DstEvaluatorType dstEvaluator(dst);
680  SrcEvaluatorType srcEvaluator(src);
681 
682  typedef generic_dense_assignment_kernel<DstEvaluatorType,SrcEvaluatorType,Functor> Kernel;
683  Kernel kernel(dstEvaluator, srcEvaluator, func, dst.const_cast_derived());
684 
685  dense_assignment_loop<Kernel>::run(kernel);
686 }
687 
688 template<typename DstXprType, typename SrcXprType>
689 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void call_dense_assignment_loop(const DstXprType& dst, const SrcXprType& src)
690 {
691  call_dense_assignment_loop(dst, src, internal::assign_op<typename DstXprType::Scalar,typename SrcXprType::Scalar>());
692 }
693 
694 /***************************************************************************
695 * Part 6 : Generic assignment
696 ***************************************************************************/
697 
698 // Based on the respective shapes of the destination and source,
699 // the class AssignmentKind determine the kind of assignment mechanism.
700 // AssignmentKind must define a Kind typedef.
701 template<typename DstShape, typename SrcShape> struct AssignmentKind;
702 
703 // Assignement kind defined in this file:
704 struct Dense2Dense {};
705 struct EigenBase2EigenBase {};
706 
707 template<typename,typename> struct AssignmentKind { typedef EigenBase2EigenBase Kind; };
708 template<> struct AssignmentKind<DenseShape,DenseShape> { typedef Dense2Dense Kind; };
709 
710 // This is the main assignment class
711 template< typename DstXprType, typename SrcXprType, typename Functor,
712  typename Kind = typename AssignmentKind< typename evaluator_traits<DstXprType>::Shape , typename evaluator_traits<SrcXprType>::Shape >::Kind,
713  typename EnableIf = void>
714 struct Assignment;
715 
716 
717 // The only purpose of this call_assignment() function is to deal with noalias() / "assume-aliasing" and automatic transposition.
718 // Indeed, I (Gael) think that this concept of "assume-aliasing" was a mistake, and it makes thing quite complicated.
719 // So this intermediate function removes everything related to "assume-aliasing" such that Assignment
720 // does not has to bother about these annoying details.
721 
722 template<typename Dst, typename Src>
723 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
724 void call_assignment(Dst& dst, const Src& src)
725 {
726  call_assignment(dst, src, internal::assign_op<typename Dst::Scalar,typename Src::Scalar>());
727 }
728 template<typename Dst, typename Src>
729 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
730 void call_assignment(const Dst& dst, const Src& src)
731 {
732  call_assignment(dst, src, internal::assign_op<typename Dst::Scalar,typename Src::Scalar>());
733 }
734 
735 // Deal with "assume-aliasing"
736 template<typename Dst, typename Src, typename Func>
737 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
738 void call_assignment(Dst& dst, const Src& src, const Func& func, typename enable_if< evaluator_assume_aliasing<Src>::value, void*>::type = 0)
739 {
740  typename plain_matrix_type<Src>::type tmp(src);
741  call_assignment_no_alias(dst, tmp, func);
742 }
743 
744 template<typename Dst, typename Src, typename Func>
745 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
746 void call_assignment(Dst& dst, const Src& src, const Func& func, typename enable_if<!evaluator_assume_aliasing<Src>::value, void*>::type = 0)
747 {
748  call_assignment_no_alias(dst, src, func);
749 }
750 
751 // by-pass "assume-aliasing"
752 // When there is no aliasing, we require that 'dst' has been properly resized
753 template<typename Dst, template <typename> class StorageBase, typename Src, typename Func>
754 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
755 void call_assignment(NoAlias<Dst,StorageBase>& dst, const Src& src, const Func& func)
756 {
757  call_assignment_no_alias(dst.expression(), src, func);
758 }
759 
760 
761 template<typename Dst, typename Src, typename Func>
762 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
763 void call_assignment_no_alias(Dst& dst, const Src& src, const Func& func)
764 {
765  enum {
766  NeedToTranspose = ( (int(Dst::RowsAtCompileTime) == 1 && int(Src::ColsAtCompileTime) == 1)
767  || (int(Dst::ColsAtCompileTime) == 1 && int(Src::RowsAtCompileTime) == 1)
768  ) && int(Dst::SizeAtCompileTime) != 1
769  };
770 
771  Index dstRows = NeedToTranspose ? src.cols() : src.rows();
772  Index dstCols = NeedToTranspose ? src.rows() : src.cols();
773  if((dst.rows()!=dstRows) || (dst.cols()!=dstCols))
774  dst.resize(dstRows, dstCols);
775 
776  typedef typename internal::conditional<NeedToTranspose, Transpose<Dst>, Dst>::type ActualDstTypeCleaned;
777  typedef typename internal::conditional<NeedToTranspose, Transpose<Dst>, Dst&>::type ActualDstType;
778  ActualDstType actualDst(dst);
779 
780  // TODO check whether this is the right place to perform these checks:
781  EIGEN_STATIC_ASSERT_LVALUE(Dst)
782  EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(ActualDstTypeCleaned,Src)
783  EIGEN_CHECK_BINARY_COMPATIBILIY(Func,typename ActualDstTypeCleaned::Scalar,typename Src::Scalar);
784 
785  Assignment<ActualDstTypeCleaned,Src,Func>::run(actualDst, src, func);
786 }
787 template<typename Dst, typename Src>
788 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
789 void call_assignment_no_alias(Dst& dst, const Src& src)
790 {
791  call_assignment_no_alias(dst, src, internal::assign_op<typename Dst::Scalar,typename Src::Scalar>());
792 }
793 
794 template<typename Dst, typename Src, typename Func>
795 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
796 void call_assignment_no_alias_no_transpose(Dst& dst, const Src& src, const Func& func)
797 {
798  Index dstRows = src.rows();
799  Index dstCols = src.cols();
800  if((dst.rows()!=dstRows) || (dst.cols()!=dstCols))
801  dst.resize(dstRows, dstCols);
802 
803  // TODO check whether this is the right place to perform these checks:
804  EIGEN_STATIC_ASSERT_LVALUE(Dst)
805  EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(Dst,Src)
806 
807  Assignment<Dst,Src,Func>::run(dst, src, func);
808 }
809 template<typename Dst, typename Src>
810 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
811 void call_assignment_no_alias_no_transpose(Dst& dst, const Src& src)
812 {
813  call_assignment_no_alias_no_transpose(dst, src, internal::assign_op<typename Dst::Scalar,typename Src::Scalar>());
814 }
815 
816 // forward declaration
817 template<typename Dst, typename Src> void check_for_aliasing(const Dst &dst, const Src &src);
818 
819 // Generic Dense to Dense assignment
820 // Note that the last template argument "Weak" is needed to make it possible to perform
821 // both partial specialization+SFINAE without ambiguous specialization
822 template< typename DstXprType, typename SrcXprType, typename Functor, typename Weak>
823 struct Assignment<DstXprType, SrcXprType, Functor, Dense2Dense, Weak>
824 {
825  EIGEN_DEVICE_FUNC
826  static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src, const Functor &func)
827  {
828  eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
829 
830 #ifndef EIGEN_NO_DEBUG
831  internal::check_for_aliasing(dst, src);
832 #endif
833 
834  call_dense_assignment_loop(dst, src, func);
835  }
836 };
837 
838 // Generic assignment through evalTo.
839 // TODO: not sure we have to keep that one, but it helps porting current code to new evaluator mechanism.
840 // Note that the last template argument "Weak" is needed to make it possible to perform
841 // both partial specialization+SFINAE without ambiguous specialization
842 template< typename DstXprType, typename SrcXprType, typename Functor, typename Weak>
843 struct Assignment<DstXprType, SrcXprType, Functor, EigenBase2EigenBase, Weak>
844 {
845  EIGEN_DEVICE_FUNC
846  static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<typename DstXprType::Scalar,typename SrcXprType::Scalar> &/*func*/)
847  {
848  eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
849  src.evalTo(dst);
850  }
851 };
852 
853 } // namespace internal
854 
855 } // end namespace Eigen
856 
857 #endif // EIGEN_ASSIGN_EVALUATOR_H
const unsigned int DirectAccessBit
Definition: Constants.h:150
Namespace containing all symbols from the Eigen library.
Definition: Core:271
Definition: Half.h:502
const unsigned int RowMajorBit
Definition: Constants.h:61
EIGEN_DEFAULT_DENSE_INDEX_TYPE Index
The Index type as used for the API.
Definition: XprHelper.h:35
Definition: Eigen_Colamd.h:50
const int Dynamic
Definition: Constants.h:21
const unsigned int ActualPacketAccessBit
Definition: Constants.h:100
const unsigned int LinearAccessBit
Definition: Constants.h:125