Eigen  3.2.93
NEON/Complex.h
1 // This file is part of Eigen, a lightweight C++ template library
2 // for linear algebra.
3 //
4 // Copyright (C) 2010 Gael Guennebaud <gael.guennebaud@inria.fr>
5 // Copyright (C) 2010 Konstantinos Margaritis <markos@freevec.org>
6 //
7 // This Source Code Form is subject to the terms of the Mozilla
8 // Public License v. 2.0. If a copy of the MPL was not distributed
9 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
10 
11 #ifndef EIGEN_COMPLEX_NEON_H
12 #define EIGEN_COMPLEX_NEON_H
13 
14 namespace Eigen {
15 
16 namespace internal {
17 
18 inline uint32x4_t p4ui_CONJ_XOR() {
19  static const uint32_t conj_XOR_DATA[] = { 0x00000000, 0x80000000, 0x00000000, 0x80000000 };
20  return vld1q_u32( conj_XOR_DATA );
21 }
22 
23 inline uint32x2_t p2ui_CONJ_XOR() {
24  static const uint32_t conj_XOR_DATA[] = { 0x00000000, 0x80000000 };
25  return vld1_u32( conj_XOR_DATA );
26 }
27 
28 //---------- float ----------
29 struct Packet2cf
30 {
31  EIGEN_STRONG_INLINE Packet2cf() {}
32  EIGEN_STRONG_INLINE explicit Packet2cf(const Packet4f& a) : v(a) {}
33  Packet4f v;
34 };
35 
36 template<> struct packet_traits<std::complex<float> > : default_packet_traits
37 {
38  typedef Packet2cf type;
39  typedef Packet2cf half;
40  enum {
41  Vectorizable = 1,
42  AlignedOnScalar = 1,
43  size = 2,
44  HasHalfPacket = 0,
45 
46  HasAdd = 1,
47  HasSub = 1,
48  HasMul = 1,
49  HasDiv = 1,
50  HasNegate = 1,
51  HasAbs = 0,
52  HasAbs2 = 0,
53  HasMin = 0,
54  HasMax = 0,
55  HasSetLinear = 0
56  };
57 };
58 
59 template<> struct unpacket_traits<Packet2cf> { typedef std::complex<float> type; enum {size=2, alignment=Aligned16}; typedef Packet2cf half; };
60 
61 template<> EIGEN_STRONG_INLINE Packet2cf pset1<Packet2cf>(const std::complex<float>& from)
62 {
63  float32x2_t r64;
64  r64 = vld1_f32((float *)&from);
65 
66  return Packet2cf(vcombine_f32(r64, r64));
67 }
68 
69 template<> EIGEN_STRONG_INLINE Packet2cf padd<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(padd<Packet4f>(a.v,b.v)); }
70 template<> EIGEN_STRONG_INLINE Packet2cf psub<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(psub<Packet4f>(a.v,b.v)); }
71 template<> EIGEN_STRONG_INLINE Packet2cf pnegate(const Packet2cf& a) { return Packet2cf(pnegate<Packet4f>(a.v)); }
72 template<> EIGEN_STRONG_INLINE Packet2cf pconj(const Packet2cf& a)
73 {
74  Packet4ui b = vreinterpretq_u32_f32(a.v);
75  return Packet2cf(vreinterpretq_f32_u32(veorq_u32(b, p4ui_CONJ_XOR())));
76 }
77 
78 template<> EIGEN_STRONG_INLINE Packet2cf pmul<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
79 {
80  Packet4f v1, v2;
81 
82  // Get the real values of a | a1_re | a1_re | a2_re | a2_re |
83  v1 = vcombine_f32(vdup_lane_f32(vget_low_f32(a.v), 0), vdup_lane_f32(vget_high_f32(a.v), 0));
84  // Get the imag values of a | a1_im | a1_im | a2_im | a2_im |
85  v2 = vcombine_f32(vdup_lane_f32(vget_low_f32(a.v), 1), vdup_lane_f32(vget_high_f32(a.v), 1));
86  // Multiply the real a with b
87  v1 = vmulq_f32(v1, b.v);
88  // Multiply the imag a with b
89  v2 = vmulq_f32(v2, b.v);
90  // Conjugate v2
91  v2 = vreinterpretq_f32_u32(veorq_u32(vreinterpretq_u32_f32(v2), p4ui_CONJ_XOR()));
92  // Swap real/imag elements in v2.
93  v2 = vrev64q_f32(v2);
94  // Add and return the result
95  return Packet2cf(vaddq_f32(v1, v2));
96 }
97 
98 template<> EIGEN_STRONG_INLINE Packet2cf pand <Packet2cf>(const Packet2cf& a, const Packet2cf& b)
99 {
100  return Packet2cf(vreinterpretq_f32_u32(vandq_u32(vreinterpretq_u32_f32(a.v),vreinterpretq_u32_f32(b.v))));
101 }
102 template<> EIGEN_STRONG_INLINE Packet2cf por <Packet2cf>(const Packet2cf& a, const Packet2cf& b)
103 {
104  return Packet2cf(vreinterpretq_f32_u32(vorrq_u32(vreinterpretq_u32_f32(a.v),vreinterpretq_u32_f32(b.v))));
105 }
106 template<> EIGEN_STRONG_INLINE Packet2cf pxor <Packet2cf>(const Packet2cf& a, const Packet2cf& b)
107 {
108  return Packet2cf(vreinterpretq_f32_u32(veorq_u32(vreinterpretq_u32_f32(a.v),vreinterpretq_u32_f32(b.v))));
109 }
110 template<> EIGEN_STRONG_INLINE Packet2cf pandnot<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
111 {
112  return Packet2cf(vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(a.v),vreinterpretq_u32_f32(b.v))));
113 }
114 
115 template<> EIGEN_STRONG_INLINE Packet2cf pload<Packet2cf>(const std::complex<float>* from) { EIGEN_DEBUG_ALIGNED_LOAD return Packet2cf(pload<Packet4f>((const float*)from)); }
116 template<> EIGEN_STRONG_INLINE Packet2cf ploadu<Packet2cf>(const std::complex<float>* from) { EIGEN_DEBUG_UNALIGNED_LOAD return Packet2cf(ploadu<Packet4f>((const float*)from)); }
117 
118 template<> EIGEN_STRONG_INLINE Packet2cf ploaddup<Packet2cf>(const std::complex<float>* from) { return pset1<Packet2cf>(*from); }
119 
120 template<> EIGEN_STRONG_INLINE void pstore <std::complex<float> >(std::complex<float> * to, const Packet2cf& from) { EIGEN_DEBUG_ALIGNED_STORE pstore((float*)to, from.v); }
121 template<> EIGEN_STRONG_INLINE void pstoreu<std::complex<float> >(std::complex<float> * to, const Packet2cf& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu((float*)to, from.v); }
122 
123 template<> EIGEN_DEVICE_FUNC inline Packet2cf pgather<std::complex<float>, Packet2cf>(const std::complex<float>* from, Index stride)
124 {
125  Packet4f res = pset1<Packet4f>(0.f);
126  res = vsetq_lane_f32(std::real(from[0*stride]), res, 0);
127  res = vsetq_lane_f32(std::imag(from[0*stride]), res, 1);
128  res = vsetq_lane_f32(std::real(from[1*stride]), res, 2);
129  res = vsetq_lane_f32(std::imag(from[1*stride]), res, 3);
130  return Packet2cf(res);
131 }
132 
133 template<> EIGEN_DEVICE_FUNC inline void pscatter<std::complex<float>, Packet2cf>(std::complex<float>* to, const Packet2cf& from, Index stride)
134 {
135  to[stride*0] = std::complex<float>(vgetq_lane_f32(from.v, 0), vgetq_lane_f32(from.v, 1));
136  to[stride*1] = std::complex<float>(vgetq_lane_f32(from.v, 2), vgetq_lane_f32(from.v, 3));
137 }
138 
139 template<> EIGEN_STRONG_INLINE void prefetch<std::complex<float> >(const std::complex<float> * addr) { EIGEN_ARM_PREFETCH((float *)addr); }
140 
141 template<> EIGEN_STRONG_INLINE std::complex<float> pfirst<Packet2cf>(const Packet2cf& a)
142 {
143  std::complex<float> EIGEN_ALIGN16 x[2];
144  vst1q_f32((float *)x, a.v);
145  return x[0];
146 }
147 
148 template<> EIGEN_STRONG_INLINE Packet2cf preverse(const Packet2cf& a)
149 {
150  float32x2_t a_lo, a_hi;
151  Packet4f a_r128;
152 
153  a_lo = vget_low_f32(a.v);
154  a_hi = vget_high_f32(a.v);
155  a_r128 = vcombine_f32(a_hi, a_lo);
156 
157  return Packet2cf(a_r128);
158 }
159 
160 template<> EIGEN_STRONG_INLINE Packet2cf pcplxflip<Packet2cf>(const Packet2cf& a)
161 {
162  return Packet2cf(vrev64q_f32(a.v));
163 }
164 
165 template<> EIGEN_STRONG_INLINE std::complex<float> predux<Packet2cf>(const Packet2cf& a)
166 {
167  float32x2_t a1, a2;
168  std::complex<float> s;
169 
170  a1 = vget_low_f32(a.v);
171  a2 = vget_high_f32(a.v);
172  a2 = vadd_f32(a1, a2);
173  vst1_f32((float *)&s, a2);
174 
175  return s;
176 }
177 
178 template<> EIGEN_STRONG_INLINE Packet2cf preduxp<Packet2cf>(const Packet2cf* vecs)
179 {
180  Packet4f sum1, sum2, sum;
181 
182  // Add the first two 64-bit float32x2_t of vecs[0]
183  sum1 = vcombine_f32(vget_low_f32(vecs[0].v), vget_low_f32(vecs[1].v));
184  sum2 = vcombine_f32(vget_high_f32(vecs[0].v), vget_high_f32(vecs[1].v));
185  sum = vaddq_f32(sum1, sum2);
186 
187  return Packet2cf(sum);
188 }
189 
190 template<> EIGEN_STRONG_INLINE std::complex<float> predux_mul<Packet2cf>(const Packet2cf& a)
191 {
192  float32x2_t a1, a2, v1, v2, prod;
193  std::complex<float> s;
194 
195  a1 = vget_low_f32(a.v);
196  a2 = vget_high_f32(a.v);
197  // Get the real values of a | a1_re | a1_re | a2_re | a2_re |
198  v1 = vdup_lane_f32(a1, 0);
199  // Get the real values of a | a1_im | a1_im | a2_im | a2_im |
200  v2 = vdup_lane_f32(a1, 1);
201  // Multiply the real a with b
202  v1 = vmul_f32(v1, a2);
203  // Multiply the imag a with b
204  v2 = vmul_f32(v2, a2);
205  // Conjugate v2
206  v2 = vreinterpret_f32_u32(veor_u32(vreinterpret_u32_f32(v2), p2ui_CONJ_XOR()));
207  // Swap real/imag elements in v2.
208  v2 = vrev64_f32(v2);
209  // Add v1, v2
210  prod = vadd_f32(v1, v2);
211 
212  vst1_f32((float *)&s, prod);
213 
214  return s;
215 }
216 
217 template<int Offset>
218 struct palign_impl<Offset,Packet2cf>
219 {
220  EIGEN_STRONG_INLINE static void run(Packet2cf& first, const Packet2cf& second)
221  {
222  if (Offset==1)
223  {
224  first.v = vextq_f32(first.v, second.v, 2);
225  }
226  }
227 };
228 
229 template<> struct conj_helper<Packet2cf, Packet2cf, false,true>
230 {
231  EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, const Packet2cf& c) const
232  { return padd(pmul(x,y),c); }
233 
234  EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const
235  {
236  return internal::pmul(a, pconj(b));
237  }
238 };
239 
240 template<> struct conj_helper<Packet2cf, Packet2cf, true,false>
241 {
242  EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, const Packet2cf& c) const
243  { return padd(pmul(x,y),c); }
244 
245  EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const
246  {
247  return internal::pmul(pconj(a), b);
248  }
249 };
250 
251 template<> struct conj_helper<Packet2cf, Packet2cf, true,true>
252 {
253  EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, const Packet2cf& c) const
254  { return padd(pmul(x,y),c); }
255 
256  EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const
257  {
258  return pconj(internal::pmul(a, b));
259  }
260 };
261 
262 template<> EIGEN_STRONG_INLINE Packet2cf pdiv<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
263 {
264  // TODO optimize it for NEON
265  Packet2cf res = conj_helper<Packet2cf,Packet2cf,false,true>().pmul(a,b);
266  Packet4f s, rev_s;
267 
268  // this computes the norm
269  s = vmulq_f32(b.v, b.v);
270  rev_s = vrev64q_f32(s);
271 
272  return Packet2cf(pdiv(res.v, vaddq_f32(s,rev_s)));
273 }
274 
275 EIGEN_DEVICE_FUNC inline void
276 ptranspose(PacketBlock<Packet2cf,2>& kernel) {
277  Packet4f tmp = vcombine_f32(vget_high_f32(kernel.packet[0].v), vget_high_f32(kernel.packet[1].v));
278  kernel.packet[0].v = vcombine_f32(vget_low_f32(kernel.packet[0].v), vget_low_f32(kernel.packet[1].v));
279  kernel.packet[1].v = tmp;
280 }
281 
282 //---------- double ----------
283 #if EIGEN_ARCH_ARM64 && !EIGEN_APPLE_DOUBLE_NEON_BUG
284 
285 const uint64_t p2ul_conj_XOR_DATA[] = { 0x0, 0x8000000000000000 };
286 static uint64x2_t p2ul_CONJ_XOR = vld1q_u64( p2ul_conj_XOR_DATA );
287 
288 struct Packet1cd
289 {
290  EIGEN_STRONG_INLINE Packet1cd() {}
291  EIGEN_STRONG_INLINE explicit Packet1cd(const Packet2d& a) : v(a) {}
292  Packet2d v;
293 };
294 
295 template<> struct packet_traits<std::complex<double> > : default_packet_traits
296 {
297  typedef Packet1cd type;
298  typedef Packet1cd half;
299  enum {
300  Vectorizable = 1,
301  AlignedOnScalar = 0,
302  size = 1,
303  HasHalfPacket = 0,
304 
305  HasAdd = 1,
306  HasSub = 1,
307  HasMul = 1,
308  HasDiv = 1,
309  HasNegate = 1,
310  HasAbs = 0,
311  HasAbs2 = 0,
312  HasMin = 0,
313  HasMax = 0,
314  HasSetLinear = 0
315  };
316 };
317 
318 template<> struct unpacket_traits<Packet1cd> { typedef std::complex<double> type; enum {size=1, alignment=Aligned16}; typedef Packet1cd half; };
319 
320 template<> EIGEN_STRONG_INLINE Packet1cd pload<Packet1cd>(const std::complex<double>* from) { EIGEN_DEBUG_ALIGNED_LOAD return Packet1cd(pload<Packet2d>((const double*)from)); }
321 template<> EIGEN_STRONG_INLINE Packet1cd ploadu<Packet1cd>(const std::complex<double>* from) { EIGEN_DEBUG_UNALIGNED_LOAD return Packet1cd(ploadu<Packet2d>((const double*)from)); }
322 
323 template<> EIGEN_STRONG_INLINE Packet1cd pset1<Packet1cd>(const std::complex<double>& from)
324 { /* here we really have to use unaligned loads :( */ return ploadu<Packet1cd>(&from); }
325 
326 template<> EIGEN_STRONG_INLINE Packet1cd padd<Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(padd<Packet2d>(a.v,b.v)); }
327 template<> EIGEN_STRONG_INLINE Packet1cd psub<Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(psub<Packet2d>(a.v,b.v)); }
328 template<> EIGEN_STRONG_INLINE Packet1cd pnegate(const Packet1cd& a) { return Packet1cd(pnegate<Packet2d>(a.v)); }
329 template<> EIGEN_STRONG_INLINE Packet1cd pconj(const Packet1cd& a) { return Packet1cd(vreinterpretq_f64_u64(veorq_u64(vreinterpretq_u64_f64(a.v), p2ul_CONJ_XOR))); }
330 
331 template<> EIGEN_STRONG_INLINE Packet1cd pmul<Packet1cd>(const Packet1cd& a, const Packet1cd& b)
332 {
333  Packet2d v1, v2;
334 
335  // Get the real values of a
336  v1 = vdupq_lane_f64(vget_low_f64(a.v), 0);
337  // Get the imag values of a
338  v2 = vdupq_lane_f64(vget_high_f64(a.v), 0);
339  // Multiply the real a with b
340  v1 = vmulq_f64(v1, b.v);
341  // Multiply the imag a with b
342  v2 = vmulq_f64(v2, b.v);
343  // Conjugate v2
344  v2 = vreinterpretq_f64_u64(veorq_u64(vreinterpretq_u64_f64(v2), p2ul_CONJ_XOR));
345  // Swap real/imag elements in v2.
346  v2 = preverse<Packet2d>(v2);
347  // Add and return the result
348  return Packet1cd(vaddq_f64(v1, v2));
349 }
350 
351 template<> EIGEN_STRONG_INLINE Packet1cd pand <Packet1cd>(const Packet1cd& a, const Packet1cd& b)
352 {
353  return Packet1cd(vreinterpretq_f64_u64(vandq_u64(vreinterpretq_u64_f64(a.v),vreinterpretq_u64_f64(b.v))));
354 }
355 template<> EIGEN_STRONG_INLINE Packet1cd por <Packet1cd>(const Packet1cd& a, const Packet1cd& b)
356 {
357  return Packet1cd(vreinterpretq_f64_u64(vorrq_u64(vreinterpretq_u64_f64(a.v),vreinterpretq_u64_f64(b.v))));
358 }
359 template<> EIGEN_STRONG_INLINE Packet1cd pxor <Packet1cd>(const Packet1cd& a, const Packet1cd& b)
360 {
361  return Packet1cd(vreinterpretq_f64_u64(veorq_u64(vreinterpretq_u64_f64(a.v),vreinterpretq_u64_f64(b.v))));
362 }
363 template<> EIGEN_STRONG_INLINE Packet1cd pandnot<Packet1cd>(const Packet1cd& a, const Packet1cd& b)
364 {
365  return Packet1cd(vreinterpretq_f64_u64(vbicq_u64(vreinterpretq_u64_f64(a.v),vreinterpretq_u64_f64(b.v))));
366 }
367 
368 template<> EIGEN_STRONG_INLINE Packet1cd ploaddup<Packet1cd>(const std::complex<double>* from) { return pset1<Packet1cd>(*from); }
369 
370 template<> EIGEN_STRONG_INLINE void pstore <std::complex<double> >(std::complex<double> * to, const Packet1cd& from) { EIGEN_DEBUG_ALIGNED_STORE pstore((double*)to, from.v); }
371 template<> EIGEN_STRONG_INLINE void pstoreu<std::complex<double> >(std::complex<double> * to, const Packet1cd& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu((double*)to, from.v); }
372 
373 template<> EIGEN_STRONG_INLINE void prefetch<std::complex<double> >(const std::complex<double> * addr) { EIGEN_ARM_PREFETCH((double *)addr); }
374 
375 template<> EIGEN_DEVICE_FUNC inline Packet1cd pgather<std::complex<double>, Packet1cd>(const std::complex<double>* from, Index stride)
376 {
377  Packet2d res = pset1<Packet2d>(0.0);
378  res = vsetq_lane_f64(std::real(from[0*stride]), res, 0);
379  res = vsetq_lane_f64(std::imag(from[0*stride]), res, 1);
380  return Packet1cd(res);
381 }
382 
383 template<> EIGEN_DEVICE_FUNC inline void pscatter<std::complex<double>, Packet1cd>(std::complex<double>* to, const Packet1cd& from, Index stride)
384 {
385  to[stride*0] = std::complex<double>(vgetq_lane_f64(from.v, 0), vgetq_lane_f64(from.v, 1));
386 }
387 
388 
389 template<> EIGEN_STRONG_INLINE std::complex<double> pfirst<Packet1cd>(const Packet1cd& a)
390 {
391  std::complex<double> EIGEN_ALIGN16 res;
392  pstore<std::complex<double> >(&res, a);
393 
394  return res;
395 }
396 
397 template<> EIGEN_STRONG_INLINE Packet1cd preverse(const Packet1cd& a) { return a; }
398 
399 template<> EIGEN_STRONG_INLINE std::complex<double> predux<Packet1cd>(const Packet1cd& a) { return pfirst(a); }
400 
401 template<> EIGEN_STRONG_INLINE Packet1cd preduxp<Packet1cd>(const Packet1cd* vecs) { return vecs[0]; }
402 
403 template<> EIGEN_STRONG_INLINE std::complex<double> predux_mul<Packet1cd>(const Packet1cd& a) { return pfirst(a); }
404 
405 template<int Offset>
406 struct palign_impl<Offset,Packet1cd>
407 {
408  static EIGEN_STRONG_INLINE void run(Packet1cd& /*first*/, const Packet1cd& /*second*/)
409  {
410  // FIXME is it sure we never have to align a Packet1cd?
411  // Even though a std::complex<double> has 16 bytes, it is not necessarily aligned on a 16 bytes boundary...
412  }
413 };
414 
415 template<> struct conj_helper<Packet1cd, Packet1cd, false,true>
416 {
417  EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet1cd& y, const Packet1cd& c) const
418  { return padd(pmul(x,y),c); }
419 
420  EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& a, const Packet1cd& b) const
421  {
422  return internal::pmul(a, pconj(b));
423  }
424 };
425 
426 template<> struct conj_helper<Packet1cd, Packet1cd, true,false>
427 {
428  EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet1cd& y, const Packet1cd& c) const
429  { return padd(pmul(x,y),c); }
430 
431  EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& a, const Packet1cd& b) const
432  {
433  return internal::pmul(pconj(a), b);
434  }
435 };
436 
437 template<> struct conj_helper<Packet1cd, Packet1cd, true,true>
438 {
439  EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet1cd& y, const Packet1cd& c) const
440  { return padd(pmul(x,y),c); }
441 
442  EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& a, const Packet1cd& b) const
443  {
444  return pconj(internal::pmul(a, b));
445  }
446 };
447 
448 template<> EIGEN_STRONG_INLINE Packet1cd pdiv<Packet1cd>(const Packet1cd& a, const Packet1cd& b)
449 {
450  // TODO optimize it for NEON
451  Packet1cd res = conj_helper<Packet1cd,Packet1cd,false,true>().pmul(a,b);
452  Packet2d s = pmul<Packet2d>(b.v, b.v);
453  Packet2d rev_s = preverse<Packet2d>(s);
454 
455  return Packet1cd(pdiv(res.v, padd<Packet2d>(s,rev_s)));
456 }
457 
458 EIGEN_STRONG_INLINE Packet1cd pcplxflip/*<Packet1cd>*/(const Packet1cd& x)
459 {
460  return Packet1cd(preverse(Packet2d(x.v)));
461 }
462 
463 EIGEN_STRONG_INLINE void ptranspose(PacketBlock<Packet1cd,2>& kernel)
464 {
465  Packet2d tmp = vcombine_f64(vget_high_f64(kernel.packet[0].v), vget_high_f64(kernel.packet[1].v));
466  kernel.packet[0].v = vcombine_f64(vget_low_f64(kernel.packet[0].v), vget_low_f64(kernel.packet[1].v));
467  kernel.packet[1].v = tmp;
468 }
469 #endif // EIGEN_ARCH_ARM64
470 
471 } // end namespace internal
472 
473 } // end namespace Eigen
474 
475 #endif // EIGEN_COMPLEX_NEON_H
Definition: Constants.h:230
Namespace containing all symbols from the Eigen library.
Definition: Core:271
Definition: Half.h:502
EIGEN_DEFAULT_DENSE_INDEX_TYPE Index
The Index type as used for the API.
Definition: XprHelper.h:35
Definition: Eigen_Colamd.h:50