11 #ifndef EIGEN_COMPLEX_NEON_H 12 #define EIGEN_COMPLEX_NEON_H 18 inline uint32x4_t p4ui_CONJ_XOR() {
19 static const uint32_t conj_XOR_DATA[] = { 0x00000000, 0x80000000, 0x00000000, 0x80000000 };
20 return vld1q_u32( conj_XOR_DATA );
23 inline uint32x2_t p2ui_CONJ_XOR() {
24 static const uint32_t conj_XOR_DATA[] = { 0x00000000, 0x80000000 };
25 return vld1_u32( conj_XOR_DATA );
31 EIGEN_STRONG_INLINE Packet2cf() {}
32 EIGEN_STRONG_INLINE
explicit Packet2cf(
const Packet4f& a) : v(a) {}
36 template<>
struct packet_traits<
std::complex<float> > : default_packet_traits
38 typedef Packet2cf type;
39 typedef Packet2cf half;
59 template<>
struct unpacket_traits<Packet2cf> {
typedef std::complex<float> type;
enum {size=2, alignment=
Aligned16};
typedef Packet2cf half; };
61 template<> EIGEN_STRONG_INLINE Packet2cf pset1<Packet2cf>(
const std::complex<float>& from)
64 r64 = vld1_f32((
float *)&from);
66 return Packet2cf(vcombine_f32(r64, r64));
69 template<> EIGEN_STRONG_INLINE Packet2cf padd<Packet2cf>(
const Packet2cf& a,
const Packet2cf& b) {
return Packet2cf(padd<Packet4f>(a.v,b.v)); }
70 template<> EIGEN_STRONG_INLINE Packet2cf psub<Packet2cf>(
const Packet2cf& a,
const Packet2cf& b) {
return Packet2cf(psub<Packet4f>(a.v,b.v)); }
71 template<> EIGEN_STRONG_INLINE Packet2cf pnegate(
const Packet2cf& a) {
return Packet2cf(pnegate<Packet4f>(a.v)); }
72 template<> EIGEN_STRONG_INLINE Packet2cf pconj(
const Packet2cf& a)
74 Packet4ui b = vreinterpretq_u32_f32(a.v);
75 return Packet2cf(vreinterpretq_f32_u32(veorq_u32(b, p4ui_CONJ_XOR())));
78 template<> EIGEN_STRONG_INLINE Packet2cf pmul<Packet2cf>(
const Packet2cf& a,
const Packet2cf& b)
83 v1 = vcombine_f32(vdup_lane_f32(vget_low_f32(a.v), 0), vdup_lane_f32(vget_high_f32(a.v), 0));
85 v2 = vcombine_f32(vdup_lane_f32(vget_low_f32(a.v), 1), vdup_lane_f32(vget_high_f32(a.v), 1));
87 v1 = vmulq_f32(v1, b.v);
89 v2 = vmulq_f32(v2, b.v);
91 v2 = vreinterpretq_f32_u32(veorq_u32(vreinterpretq_u32_f32(v2), p4ui_CONJ_XOR()));
95 return Packet2cf(vaddq_f32(v1, v2));
98 template<> EIGEN_STRONG_INLINE Packet2cf pand <Packet2cf>(
const Packet2cf& a,
const Packet2cf& b)
100 return Packet2cf(vreinterpretq_f32_u32(vandq_u32(vreinterpretq_u32_f32(a.v),vreinterpretq_u32_f32(b.v))));
102 template<> EIGEN_STRONG_INLINE Packet2cf por <Packet2cf>(
const Packet2cf& a,
const Packet2cf& b)
104 return Packet2cf(vreinterpretq_f32_u32(vorrq_u32(vreinterpretq_u32_f32(a.v),vreinterpretq_u32_f32(b.v))));
106 template<> EIGEN_STRONG_INLINE Packet2cf pxor <Packet2cf>(
const Packet2cf& a,
const Packet2cf& b)
108 return Packet2cf(vreinterpretq_f32_u32(veorq_u32(vreinterpretq_u32_f32(a.v),vreinterpretq_u32_f32(b.v))));
110 template<> EIGEN_STRONG_INLINE Packet2cf pandnot<Packet2cf>(
const Packet2cf& a,
const Packet2cf& b)
112 return Packet2cf(vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(a.v),vreinterpretq_u32_f32(b.v))));
115 template<> EIGEN_STRONG_INLINE Packet2cf pload<Packet2cf>(
const std::complex<float>* from) { EIGEN_DEBUG_ALIGNED_LOAD
return Packet2cf(pload<Packet4f>((
const float*)from)); }
116 template<> EIGEN_STRONG_INLINE Packet2cf ploadu<Packet2cf>(
const std::complex<float>* from) { EIGEN_DEBUG_UNALIGNED_LOAD
return Packet2cf(ploadu<Packet4f>((
const float*)from)); }
118 template<> EIGEN_STRONG_INLINE Packet2cf ploaddup<Packet2cf>(
const std::complex<float>* from) {
return pset1<Packet2cf>(*from); }
120 template<> EIGEN_STRONG_INLINE
void pstore <std::complex<float> >(std::complex<float> * to,
const Packet2cf& from) { EIGEN_DEBUG_ALIGNED_STORE pstore((
float*)to, from.v); }
121 template<> EIGEN_STRONG_INLINE
void pstoreu<std::complex<float> >(std::complex<float> * to,
const Packet2cf& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu((
float*)to, from.v); }
123 template<> EIGEN_DEVICE_FUNC
inline Packet2cf pgather<std::complex<float>, Packet2cf>(
const std::complex<float>* from,
Index stride)
125 Packet4f res = pset1<Packet4f>(0.f);
126 res = vsetq_lane_f32(std::real(from[0*stride]), res, 0);
127 res = vsetq_lane_f32(std::imag(from[0*stride]), res, 1);
128 res = vsetq_lane_f32(std::real(from[1*stride]), res, 2);
129 res = vsetq_lane_f32(std::imag(from[1*stride]), res, 3);
130 return Packet2cf(res);
133 template<> EIGEN_DEVICE_FUNC
inline void pscatter<std::complex<float>, Packet2cf>(std::complex<float>* to,
const Packet2cf& from,
Index stride)
135 to[stride*0] = std::complex<float>(vgetq_lane_f32(from.v, 0), vgetq_lane_f32(from.v, 1));
136 to[stride*1] = std::complex<float>(vgetq_lane_f32(from.v, 2), vgetq_lane_f32(from.v, 3));
139 template<> EIGEN_STRONG_INLINE
void prefetch<std::complex<float> >(
const std::complex<float> * addr) { EIGEN_ARM_PREFETCH((
float *)addr); }
141 template<> EIGEN_STRONG_INLINE std::complex<float> pfirst<Packet2cf>(
const Packet2cf& a)
143 std::complex<float> EIGEN_ALIGN16 x[2];
144 vst1q_f32((
float *)x, a.v);
148 template<> EIGEN_STRONG_INLINE Packet2cf preverse(
const Packet2cf& a)
150 float32x2_t a_lo, a_hi;
153 a_lo = vget_low_f32(a.v);
154 a_hi = vget_high_f32(a.v);
155 a_r128 = vcombine_f32(a_hi, a_lo);
157 return Packet2cf(a_r128);
160 template<> EIGEN_STRONG_INLINE Packet2cf pcplxflip<Packet2cf>(
const Packet2cf& a)
162 return Packet2cf(vrev64q_f32(a.v));
165 template<> EIGEN_STRONG_INLINE std::complex<float> predux<Packet2cf>(
const Packet2cf& a)
168 std::complex<float> s;
170 a1 = vget_low_f32(a.v);
171 a2 = vget_high_f32(a.v);
172 a2 = vadd_f32(a1, a2);
173 vst1_f32((
float *)&s, a2);
178 template<> EIGEN_STRONG_INLINE Packet2cf preduxp<Packet2cf>(
const Packet2cf* vecs)
180 Packet4f sum1, sum2, sum;
183 sum1 = vcombine_f32(vget_low_f32(vecs[0].v), vget_low_f32(vecs[1].v));
184 sum2 = vcombine_f32(vget_high_f32(vecs[0].v), vget_high_f32(vecs[1].v));
185 sum = vaddq_f32(sum1, sum2);
187 return Packet2cf(sum);
190 template<> EIGEN_STRONG_INLINE std::complex<float> predux_mul<Packet2cf>(
const Packet2cf& a)
192 float32x2_t a1, a2, v1, v2, prod;
193 std::complex<float> s;
195 a1 = vget_low_f32(a.v);
196 a2 = vget_high_f32(a.v);
198 v1 = vdup_lane_f32(a1, 0);
200 v2 = vdup_lane_f32(a1, 1);
202 v1 = vmul_f32(v1, a2);
204 v2 = vmul_f32(v2, a2);
206 v2 = vreinterpret_f32_u32(veor_u32(vreinterpret_u32_f32(v2), p2ui_CONJ_XOR()));
210 prod = vadd_f32(v1, v2);
212 vst1_f32((
float *)&s, prod);
218 struct palign_impl<Offset,Packet2cf>
220 EIGEN_STRONG_INLINE
static void run(Packet2cf& first,
const Packet2cf& second)
224 first.v = vextq_f32(first.v, second.v, 2);
229 template<>
struct conj_helper<Packet2cf, Packet2cf, false,true>
231 EIGEN_STRONG_INLINE Packet2cf pmadd(
const Packet2cf& x,
const Packet2cf& y,
const Packet2cf& c)
const 232 {
return padd(pmul(x,y),c); }
234 EIGEN_STRONG_INLINE Packet2cf pmul(
const Packet2cf& a,
const Packet2cf& b)
const 236 return internal::pmul(a, pconj(b));
240 template<>
struct conj_helper<Packet2cf, Packet2cf, true,false>
242 EIGEN_STRONG_INLINE Packet2cf pmadd(
const Packet2cf& x,
const Packet2cf& y,
const Packet2cf& c)
const 243 {
return padd(pmul(x,y),c); }
245 EIGEN_STRONG_INLINE Packet2cf pmul(
const Packet2cf& a,
const Packet2cf& b)
const 247 return internal::pmul(pconj(a), b);
251 template<>
struct conj_helper<Packet2cf, Packet2cf, true,true>
253 EIGEN_STRONG_INLINE Packet2cf pmadd(
const Packet2cf& x,
const Packet2cf& y,
const Packet2cf& c)
const 254 {
return padd(pmul(x,y),c); }
256 EIGEN_STRONG_INLINE Packet2cf pmul(
const Packet2cf& a,
const Packet2cf& b)
const 258 return pconj(internal::pmul(a, b));
262 template<> EIGEN_STRONG_INLINE Packet2cf pdiv<Packet2cf>(
const Packet2cf& a,
const Packet2cf& b)
265 Packet2cf res = conj_helper<Packet2cf,Packet2cf,false,true>().pmul(a,b);
269 s = vmulq_f32(b.v, b.v);
270 rev_s = vrev64q_f32(s);
272 return Packet2cf(pdiv(res.v, vaddq_f32(s,rev_s)));
275 EIGEN_DEVICE_FUNC
inline void 276 ptranspose(PacketBlock<Packet2cf,2>& kernel) {
277 Packet4f tmp = vcombine_f32(vget_high_f32(kernel.packet[0].v), vget_high_f32(kernel.packet[1].v));
278 kernel.packet[0].v = vcombine_f32(vget_low_f32(kernel.packet[0].v), vget_low_f32(kernel.packet[1].v));
279 kernel.packet[1].v = tmp;
283 #if EIGEN_ARCH_ARM64 && !EIGEN_APPLE_DOUBLE_NEON_BUG 285 const uint64_t p2ul_conj_XOR_DATA[] = { 0x0, 0x8000000000000000 };
286 static uint64x2_t p2ul_CONJ_XOR = vld1q_u64( p2ul_conj_XOR_DATA );
290 EIGEN_STRONG_INLINE Packet1cd() {}
291 EIGEN_STRONG_INLINE
explicit Packet1cd(
const Packet2d& a) : v(a) {}
295 template<>
struct packet_traits<
std::complex<double> > : default_packet_traits
297 typedef Packet1cd type;
298 typedef Packet1cd half;
318 template<>
struct unpacket_traits<Packet1cd> {
typedef std::complex<double> type;
enum {size=1, alignment=
Aligned16};
typedef Packet1cd half; };
320 template<> EIGEN_STRONG_INLINE Packet1cd pload<Packet1cd>(
const std::complex<double>* from) { EIGEN_DEBUG_ALIGNED_LOAD
return Packet1cd(pload<Packet2d>((
const double*)from)); }
321 template<> EIGEN_STRONG_INLINE Packet1cd ploadu<Packet1cd>(
const std::complex<double>* from) { EIGEN_DEBUG_UNALIGNED_LOAD
return Packet1cd(ploadu<Packet2d>((
const double*)from)); }
323 template<> EIGEN_STRONG_INLINE Packet1cd pset1<Packet1cd>(
const std::complex<double>& from)
324 {
return ploadu<Packet1cd>(&from); }
326 template<> EIGEN_STRONG_INLINE Packet1cd padd<Packet1cd>(
const Packet1cd& a,
const Packet1cd& b) {
return Packet1cd(padd<Packet2d>(a.v,b.v)); }
327 template<> EIGEN_STRONG_INLINE Packet1cd psub<Packet1cd>(
const Packet1cd& a,
const Packet1cd& b) {
return Packet1cd(psub<Packet2d>(a.v,b.v)); }
328 template<> EIGEN_STRONG_INLINE Packet1cd pnegate(
const Packet1cd& a) {
return Packet1cd(pnegate<Packet2d>(a.v)); }
329 template<> EIGEN_STRONG_INLINE Packet1cd pconj(
const Packet1cd& a) {
return Packet1cd(vreinterpretq_f64_u64(veorq_u64(vreinterpretq_u64_f64(a.v), p2ul_CONJ_XOR))); }
331 template<> EIGEN_STRONG_INLINE Packet1cd pmul<Packet1cd>(
const Packet1cd& a,
const Packet1cd& b)
336 v1 = vdupq_lane_f64(vget_low_f64(a.v), 0);
338 v2 = vdupq_lane_f64(vget_high_f64(a.v), 0);
340 v1 = vmulq_f64(v1, b.v);
342 v2 = vmulq_f64(v2, b.v);
344 v2 = vreinterpretq_f64_u64(veorq_u64(vreinterpretq_u64_f64(v2), p2ul_CONJ_XOR));
346 v2 = preverse<Packet2d>(v2);
348 return Packet1cd(vaddq_f64(v1, v2));
351 template<> EIGEN_STRONG_INLINE Packet1cd pand <Packet1cd>(
const Packet1cd& a,
const Packet1cd& b)
353 return Packet1cd(vreinterpretq_f64_u64(vandq_u64(vreinterpretq_u64_f64(a.v),vreinterpretq_u64_f64(b.v))));
355 template<> EIGEN_STRONG_INLINE Packet1cd por <Packet1cd>(
const Packet1cd& a,
const Packet1cd& b)
357 return Packet1cd(vreinterpretq_f64_u64(vorrq_u64(vreinterpretq_u64_f64(a.v),vreinterpretq_u64_f64(b.v))));
359 template<> EIGEN_STRONG_INLINE Packet1cd pxor <Packet1cd>(
const Packet1cd& a,
const Packet1cd& b)
361 return Packet1cd(vreinterpretq_f64_u64(veorq_u64(vreinterpretq_u64_f64(a.v),vreinterpretq_u64_f64(b.v))));
363 template<> EIGEN_STRONG_INLINE Packet1cd pandnot<Packet1cd>(
const Packet1cd& a,
const Packet1cd& b)
365 return Packet1cd(vreinterpretq_f64_u64(vbicq_u64(vreinterpretq_u64_f64(a.v),vreinterpretq_u64_f64(b.v))));
368 template<> EIGEN_STRONG_INLINE Packet1cd ploaddup<Packet1cd>(
const std::complex<double>* from) {
return pset1<Packet1cd>(*from); }
370 template<> EIGEN_STRONG_INLINE
void pstore <std::complex<double> >(std::complex<double> * to,
const Packet1cd& from) { EIGEN_DEBUG_ALIGNED_STORE pstore((
double*)to, from.v); }
371 template<> EIGEN_STRONG_INLINE
void pstoreu<std::complex<double> >(std::complex<double> * to,
const Packet1cd& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu((
double*)to, from.v); }
373 template<> EIGEN_STRONG_INLINE
void prefetch<std::complex<double> >(
const std::complex<double> * addr) { EIGEN_ARM_PREFETCH((
double *)addr); }
375 template<> EIGEN_DEVICE_FUNC
inline Packet1cd pgather<std::complex<double>, Packet1cd>(
const std::complex<double>* from,
Index stride)
377 Packet2d res = pset1<Packet2d>(0.0);
378 res = vsetq_lane_f64(std::real(from[0*stride]), res, 0);
379 res = vsetq_lane_f64(std::imag(from[0*stride]), res, 1);
380 return Packet1cd(res);
383 template<> EIGEN_DEVICE_FUNC
inline void pscatter<std::complex<double>, Packet1cd>(std::complex<double>* to,
const Packet1cd& from,
Index stride)
385 to[stride*0] = std::complex<double>(vgetq_lane_f64(from.v, 0), vgetq_lane_f64(from.v, 1));
389 template<> EIGEN_STRONG_INLINE std::complex<double> pfirst<Packet1cd>(
const Packet1cd& a)
391 std::complex<double> EIGEN_ALIGN16 res;
392 pstore<std::complex<double> >(&res, a);
397 template<> EIGEN_STRONG_INLINE Packet1cd preverse(
const Packet1cd& a) {
return a; }
399 template<> EIGEN_STRONG_INLINE std::complex<double> predux<Packet1cd>(
const Packet1cd& a) {
return pfirst(a); }
401 template<> EIGEN_STRONG_INLINE Packet1cd preduxp<Packet1cd>(
const Packet1cd* vecs) {
return vecs[0]; }
403 template<> EIGEN_STRONG_INLINE std::complex<double> predux_mul<Packet1cd>(
const Packet1cd& a) {
return pfirst(a); }
406 struct palign_impl<Offset,Packet1cd>
408 static EIGEN_STRONG_INLINE
void run(Packet1cd& ,
const Packet1cd& )
415 template<>
struct conj_helper<Packet1cd, Packet1cd, false,true>
417 EIGEN_STRONG_INLINE Packet1cd pmadd(
const Packet1cd& x,
const Packet1cd& y,
const Packet1cd& c)
const 418 {
return padd(pmul(x,y),c); }
420 EIGEN_STRONG_INLINE Packet1cd pmul(
const Packet1cd& a,
const Packet1cd& b)
const 422 return internal::pmul(a, pconj(b));
426 template<>
struct conj_helper<Packet1cd, Packet1cd, true,false>
428 EIGEN_STRONG_INLINE Packet1cd pmadd(
const Packet1cd& x,
const Packet1cd& y,
const Packet1cd& c)
const 429 {
return padd(pmul(x,y),c); }
431 EIGEN_STRONG_INLINE Packet1cd pmul(
const Packet1cd& a,
const Packet1cd& b)
const 433 return internal::pmul(pconj(a), b);
437 template<>
struct conj_helper<Packet1cd, Packet1cd, true,true>
439 EIGEN_STRONG_INLINE Packet1cd pmadd(
const Packet1cd& x,
const Packet1cd& y,
const Packet1cd& c)
const 440 {
return padd(pmul(x,y),c); }
442 EIGEN_STRONG_INLINE Packet1cd pmul(
const Packet1cd& a,
const Packet1cd& b)
const 444 return pconj(internal::pmul(a, b));
448 template<> EIGEN_STRONG_INLINE Packet1cd pdiv<Packet1cd>(
const Packet1cd& a,
const Packet1cd& b)
451 Packet1cd res = conj_helper<Packet1cd,Packet1cd,false,true>().pmul(a,b);
452 Packet2d s = pmul<Packet2d>(b.v, b.v);
453 Packet2d rev_s = preverse<Packet2d>(s);
455 return Packet1cd(pdiv(res.v, padd<Packet2d>(s,rev_s)));
458 EIGEN_STRONG_INLINE Packet1cd pcplxflip(
const Packet1cd& x)
460 return Packet1cd(preverse(Packet2d(x.v)));
463 EIGEN_STRONG_INLINE
void ptranspose(PacketBlock<Packet1cd,2>& kernel)
465 Packet2d tmp = vcombine_f64(vget_high_f64(kernel.packet[0].v), vget_high_f64(kernel.packet[1].v));
466 kernel.packet[0].v = vcombine_f64(vget_low_f64(kernel.packet[0].v), vget_low_f64(kernel.packet[1].v));
467 kernel.packet[1].v = tmp;
469 #endif // EIGEN_ARCH_ARM64 475 #endif // EIGEN_COMPLEX_NEON_H Definition: Constants.h:230
Namespace containing all symbols from the Eigen library.
Definition: Core:271
EIGEN_DEFAULT_DENSE_INDEX_TYPE Index
The Index type as used for the API.
Definition: XprHelper.h:35
Definition: Eigen_Colamd.h:50