10 #ifndef EIGEN_COMPLEX32_ALTIVEC_H
11 #define EIGEN_COMPLEX32_ALTIVEC_H
17 static Packet4ui p4ui_CONJ_XOR = vec_mergeh((Packet4ui)p4i_ZERO, (Packet4ui)p4f_ZERO_);
19 static Packet2ul p2ul_CONJ_XOR1 = (Packet2ul) vec_sld((Packet4ui) p2d_ZERO_, (Packet4ui) p2l_ZERO, 8);
20 static Packet2ul p2ul_CONJ_XOR2 = (Packet2ul) vec_sld((Packet4ui) p2l_ZERO, (Packet4ui) p2d_ZERO_, 8);
22 static Packet2ul p2ul_CONJ_XOR1 = (Packet2ul) vec_sld((Packet4ui) p2l_ZERO, (Packet4ui) p2d_ZERO_, 8);
23 static Packet2ul p2ul_CONJ_XOR2 = (Packet2ul) vec_sld((Packet4ui) p2d_ZERO_, (Packet4ui) p2l_ZERO, 8);
29 EIGEN_STRONG_INLINE Packet2cf() {}
30 EIGEN_STRONG_INLINE
explicit Packet2cf(
const Packet4f& a) : v(a) {}
34 template<>
struct packet_traits<
std::complex<float> > : default_packet_traits
36 typedef Packet2cf type;
37 typedef Packet2cf half;
56 template<>
struct unpacket_traits<Packet2cf> {
typedef std::complex<float> type;
enum {size=2, alignment=
Aligned16};
typedef Packet2cf half; };
58 template<> EIGEN_STRONG_INLINE Packet2cf pset1<Packet2cf>(
const std::complex<float>& from)
62 if((ptrdiff_t(&from) % 16) == 0)
63 res.v = pload<Packet4f>((
const float *)&from);
65 res.v = ploadu<Packet4f>((
const float *)&from);
66 res.v = vec_perm(res.v, res.v, p16uc_PSET64_HI);
70 template<> EIGEN_DEVICE_FUNC
inline Packet2cf pgather<std::complex<float>, Packet2cf>(
const std::complex<float>* from, Index stride)
72 std::complex<float> EIGEN_ALIGN16 af[2];
73 af[0] = from[0*stride];
74 af[1] = from[1*stride];
75 return Packet2cf(vec_ld(0, (
const float*)af));
77 template<> EIGEN_DEVICE_FUNC
inline void pscatter<std::complex<float>, Packet2cf>(std::complex<float>* to,
const Packet2cf& from, Index stride)
79 std::complex<float> EIGEN_ALIGN16 af[2];
80 vec_st(from.v, 0, (
float*)af);
86 template<> EIGEN_STRONG_INLINE Packet2cf padd<Packet2cf>(
const Packet2cf& a,
const Packet2cf& b) {
return Packet2cf(vec_add(a.v,b.v)); }
87 template<> EIGEN_STRONG_INLINE Packet2cf psub<Packet2cf>(
const Packet2cf& a,
const Packet2cf& b) {
return Packet2cf(vec_sub(a.v,b.v)); }
88 template<> EIGEN_STRONG_INLINE Packet2cf pnegate(
const Packet2cf& a) {
return Packet2cf(pnegate(a.v)); }
89 template<> EIGEN_STRONG_INLINE Packet2cf pconj(
const Packet2cf& a) {
return Packet2cf((Packet4f)vec_xor((Packet4ui)a.v, p4ui_CONJ_XOR)); }
91 template<> EIGEN_STRONG_INLINE Packet2cf pmul<Packet2cf>(
const Packet2cf& a,
const Packet2cf& b)
96 v1 = vec_perm(a.v, a.v, p16uc_PSET32_WODD);
98 v2 = vec_perm(a.v, a.v, p16uc_PSET32_WEVEN);
100 v1 = vec_madd(v1, b.v, p4f_ZERO);
102 v2 = vec_madd(v2, b.v, p4f_ZERO);
103 v2 = (Packet4f) vec_xor((Packet4ui)v2, p4ui_CONJ_XOR);
105 v2 = vec_perm(v2, v2, p16uc_COMPLEX32_REV);
107 return Packet2cf(vec_add(v1, v2));
110 template<> EIGEN_STRONG_INLINE Packet2cf pand <Packet2cf>(
const Packet2cf& a,
const Packet2cf& b) {
return Packet2cf(vec_and(a.v,b.v)); }
111 template<> EIGEN_STRONG_INLINE Packet2cf por <Packet2cf>(
const Packet2cf& a,
const Packet2cf& b) {
return Packet2cf(vec_or(a.v,b.v)); }
112 template<> EIGEN_STRONG_INLINE Packet2cf pxor <Packet2cf>(
const Packet2cf& a,
const Packet2cf& b) {
return Packet2cf(vec_xor(a.v,b.v)); }
113 template<> EIGEN_STRONG_INLINE Packet2cf pandnot<Packet2cf>(
const Packet2cf& a,
const Packet2cf& b) {
return Packet2cf(vec_and(a.v, vec_nor(b.v,b.v))); }
115 template<> EIGEN_STRONG_INLINE Packet2cf pload <Packet2cf>(
const std::complex<float>* from) { EIGEN_DEBUG_ALIGNED_LOAD
return Packet2cf(pload<Packet4f>((
const float*)from)); }
116 template<> EIGEN_STRONG_INLINE Packet2cf ploadu<Packet2cf>(
const std::complex<float>* from) { EIGEN_DEBUG_UNALIGNED_LOAD
return Packet2cf(ploadu<Packet4f>((
const float*)from)); }
118 template<> EIGEN_STRONG_INLINE Packet2cf ploaddup<Packet2cf>(
const std::complex<float>* from)
120 return pset1<Packet2cf>(*from);
123 template<> EIGEN_STRONG_INLINE
void pstore <std::complex<float> >(std::complex<float> * to,
const Packet2cf& from) { EIGEN_DEBUG_ALIGNED_STORE pstore((
float*)to, from.v); }
124 template<> EIGEN_STRONG_INLINE
void pstoreu<std::complex<float> >(std::complex<float> * to,
const Packet2cf& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu((
float*)to, from.v); }
126 template<> EIGEN_STRONG_INLINE
void prefetch<std::complex<float> >(
const std::complex<float> * addr) { vec_dstt((
float *)addr, DST_CTRL(2,2,32), DST_CHAN); }
128 template<> EIGEN_STRONG_INLINE std::complex<float> pfirst<Packet2cf>(
const Packet2cf& a)
130 std::complex<float> EIGEN_ALIGN16 res[2];
131 pstore((
float *)&res, a.v);
136 template<> EIGEN_STRONG_INLINE Packet2cf preverse(
const Packet2cf& a)
139 rev_a = vec_perm(a.v, a.v, p16uc_COMPLEX32_REV2);
140 return Packet2cf(rev_a);
143 template<> EIGEN_STRONG_INLINE std::complex<float> predux<Packet2cf>(
const Packet2cf& a)
146 b = (Packet4f) vec_sld(a.v, a.v, 8);
148 return pfirst(Packet2cf(b));
151 template<> EIGEN_STRONG_INLINE Packet2cf preduxp<Packet2cf>(
const Packet2cf* vecs)
155 b1 = (Packet4f) vec_sld(vecs[0].v, vecs[1].v, 8);
156 b2 = (Packet4f) vec_sld(vecs[1].v, vecs[0].v, 8);
158 b1 = (Packet4f) vec_sld(vecs[1].v, vecs[0].v, 8);
159 b2 = (Packet4f) vec_sld(vecs[0].v, vecs[1].v, 8);
161 b2 = (Packet4f) vec_sld(b2, b2, 8);
164 return Packet2cf(b2);
167 template<> EIGEN_STRONG_INLINE std::complex<float> predux_mul<Packet2cf>(
const Packet2cf& a)
171 b = (Packet4f) vec_sld(a.v, a.v, 8);
172 prod = pmul(a, Packet2cf(b));
178 struct palign_impl<Offset,Packet2cf>
180 static EIGEN_STRONG_INLINE
void run(Packet2cf& first,
const Packet2cf& second)
185 first.v = vec_sld(first.v, second.v, 8);
187 first.v = vec_sld(second.v, first.v, 8);
193 template<>
struct conj_helper<Packet2cf, Packet2cf, false,true>
195 EIGEN_STRONG_INLINE Packet2cf pmadd(
const Packet2cf& x,
const Packet2cf& y,
const Packet2cf& c)
const
196 {
return padd(pmul(x,y),c); }
198 EIGEN_STRONG_INLINE Packet2cf pmul(
const Packet2cf& a,
const Packet2cf& b)
const
200 return internal::pmul(a, pconj(b));
204 template<>
struct conj_helper<Packet2cf, Packet2cf, true,false>
206 EIGEN_STRONG_INLINE Packet2cf pmadd(
const Packet2cf& x,
const Packet2cf& y,
const Packet2cf& c)
const
207 {
return padd(pmul(x,y),c); }
209 EIGEN_STRONG_INLINE Packet2cf pmul(
const Packet2cf& a,
const Packet2cf& b)
const
211 return internal::pmul(pconj(a), b);
215 template<>
struct conj_helper<Packet2cf, Packet2cf, true,true>
217 EIGEN_STRONG_INLINE Packet2cf pmadd(
const Packet2cf& x,
const Packet2cf& y,
const Packet2cf& c)
const
218 {
return padd(pmul(x,y),c); }
220 EIGEN_STRONG_INLINE Packet2cf pmul(
const Packet2cf& a,
const Packet2cf& b)
const
222 return pconj(internal::pmul(a, b));
226 template<> EIGEN_STRONG_INLINE Packet2cf pdiv<Packet2cf>(
const Packet2cf& a,
const Packet2cf& b)
229 Packet2cf res = conj_helper<Packet2cf,Packet2cf,false,true>().pmul(a,b);
230 Packet4f s = vec_madd(b.v, b.v, p4f_ZERO);
231 return Packet2cf(pdiv(res.v, vec_add(s,vec_perm(s, s, p16uc_COMPLEX32_REV))));
234 template<> EIGEN_STRONG_INLINE Packet2cf pcplxflip<Packet2cf>(
const Packet2cf& x)
236 return Packet2cf(vec_perm(x.v, x.v, p16uc_COMPLEX32_REV));
239 EIGEN_STRONG_INLINE
void ptranspose(PacketBlock<Packet2cf,2>& kernel)
241 Packet4f tmp = vec_perm(kernel.packet[0].v, kernel.packet[1].v, p16uc_TRANSPOSE64_HI);
242 kernel.packet[1].v = vec_perm(kernel.packet[0].v, kernel.packet[1].v, p16uc_TRANSPOSE64_LO);
243 kernel.packet[0].v = tmp;
250 EIGEN_STRONG_INLINE Packet1cd() {}
251 EIGEN_STRONG_INLINE
explicit Packet1cd(
const Packet2d& a) : v(a) {}
255 template<>
struct packet_traits<
std::complex<double> > : default_packet_traits
257 typedef Packet1cd type;
258 typedef Packet1cd half;
278 template<>
struct unpacket_traits<Packet1cd> {
typedef std::complex<double> type;
enum {size=1, alignment=
Aligned16};
typedef Packet1cd half; };
280 template<> EIGEN_STRONG_INLINE Packet1cd pload <Packet1cd>(
const std::complex<double>* from) { EIGEN_DEBUG_ALIGNED_LOAD
return Packet1cd(pload<Packet2d>((
const double*)from)); }
281 template<> EIGEN_STRONG_INLINE Packet1cd ploadu<Packet1cd>(
const std::complex<double>* from) { EIGEN_DEBUG_UNALIGNED_LOAD
return Packet1cd(ploadu<Packet2d>((
const double*)from)); }
282 template<> EIGEN_STRONG_INLINE
void pstore <std::complex<double> >(std::complex<double> * to,
const Packet1cd& from) { EIGEN_DEBUG_ALIGNED_STORE pstore((
double*)to, from.v); }
283 template<> EIGEN_STRONG_INLINE
void pstoreu<std::complex<double> >(std::complex<double> * to,
const Packet1cd& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu((
double*)to, from.v); }
285 template<> EIGEN_STRONG_INLINE Packet1cd pset1<Packet1cd>(
const std::complex<double>& from)
286 {
return ploadu<Packet1cd>(&from); }
288 template<> EIGEN_DEVICE_FUNC
inline Packet1cd pgather<std::complex<double>, Packet1cd>(
const std::complex<double>* from, Index stride)
290 std::complex<double> EIGEN_ALIGN16 af[2];
291 af[0] = from[0*stride];
292 af[1] = from[1*stride];
293 return pload<Packet1cd>(af);
295 template<> EIGEN_DEVICE_FUNC
inline void pscatter<std::complex<double>, Packet1cd>(std::complex<double>* to,
const Packet1cd& from, Index stride)
297 std::complex<double> EIGEN_ALIGN16 af[2];
298 pstore<std::complex<double> >(af, from);
299 to[0*stride] = af[0];
300 to[1*stride] = af[1];
303 template<> EIGEN_STRONG_INLINE Packet1cd padd<Packet1cd>(
const Packet1cd& a,
const Packet1cd& b) {
return Packet1cd(vec_add(a.v,b.v)); }
304 template<> EIGEN_STRONG_INLINE Packet1cd psub<Packet1cd>(
const Packet1cd& a,
const Packet1cd& b) {
return Packet1cd(vec_sub(a.v,b.v)); }
305 template<> EIGEN_STRONG_INLINE Packet1cd pnegate(
const Packet1cd& a) {
return Packet1cd(pnegate(Packet2d(a.v))); }
306 template<> EIGEN_STRONG_INLINE Packet1cd pconj(
const Packet1cd& a) {
return Packet1cd((Packet2d)vec_xor((Packet2d)a.v, (Packet2d)p2ul_CONJ_XOR2)); }
308 template<> EIGEN_STRONG_INLINE Packet1cd pmul<Packet1cd>(
const Packet1cd& a,
const Packet1cd& b)
310 Packet2d a_re, a_im, v1, v2;
313 a_re = vec_perm(a.v, a.v, p16uc_PSET64_HI);
315 a_im = vec_perm(a.v, a.v, p16uc_PSET64_LO);
317 v1 = vec_madd(a_re, b.v, p2d_ZERO);
319 v2 = vec_madd(a_im, b.v, p2d_ZERO);
320 v2 = (Packet2d) vec_sld((Packet4ui)v2, (Packet4ui)v2, 8);
321 v2 = (Packet2d) vec_xor((Packet2d)v2, (Packet2d) p2ul_CONJ_XOR1);
323 return Packet1cd(vec_add(v1, v2));
326 template<> EIGEN_STRONG_INLINE Packet1cd pand <Packet1cd>(
const Packet1cd& a,
const Packet1cd& b) {
return Packet1cd(vec_and(a.v,b.v)); }
327 template<> EIGEN_STRONG_INLINE Packet1cd por <Packet1cd>(
const Packet1cd& a,
const Packet1cd& b) {
return Packet1cd(vec_or(a.v,b.v)); }
328 template<> EIGEN_STRONG_INLINE Packet1cd pxor <Packet1cd>(
const Packet1cd& a,
const Packet1cd& b) {
return Packet1cd(vec_xor(a.v,b.v)); }
329 template<> EIGEN_STRONG_INLINE Packet1cd pandnot<Packet1cd>(
const Packet1cd& a,
const Packet1cd& b) {
return Packet1cd(vec_and(a.v, vec_nor(b.v,b.v))); }
331 template<> EIGEN_STRONG_INLINE Packet1cd ploaddup<Packet1cd>(
const std::complex<double>* from)
333 return pset1<Packet1cd>(*from);
336 template<> EIGEN_STRONG_INLINE
void prefetch<std::complex<double> >(
const std::complex<double> * addr) { vec_dstt((
long *)addr, DST_CTRL(2,2,32), DST_CHAN); }
338 template<> EIGEN_STRONG_INLINE std::complex<double> pfirst<Packet1cd>(
const Packet1cd& a)
340 std::complex<double> EIGEN_ALIGN16 res[2];
341 pstore<std::complex<double> >(res, a);
346 template<> EIGEN_STRONG_INLINE Packet1cd preverse(
const Packet1cd& a) {
return a; }
348 template<> EIGEN_STRONG_INLINE std::complex<double> predux<Packet1cd>(
const Packet1cd& a)
353 template<> EIGEN_STRONG_INLINE Packet1cd preduxp<Packet1cd>(
const Packet1cd* vecs)
358 template<> EIGEN_STRONG_INLINE std::complex<double> predux_mul<Packet1cd>(
const Packet1cd& a)
364 struct palign_impl<Offset,Packet1cd>
366 static EIGEN_STRONG_INLINE
void run(Packet1cd& ,
const Packet1cd& )
373 template<>
struct conj_helper<Packet1cd, Packet1cd, false,true>
375 EIGEN_STRONG_INLINE Packet1cd pmadd(
const Packet1cd& x,
const Packet1cd& y,
const Packet1cd& c)
const
376 {
return padd(pmul(x,y),c); }
378 EIGEN_STRONG_INLINE Packet1cd pmul(
const Packet1cd& a,
const Packet1cd& b)
const
380 return internal::pmul(a, pconj(b));
384 template<>
struct conj_helper<Packet1cd, Packet1cd, true,false>
386 EIGEN_STRONG_INLINE Packet1cd pmadd(
const Packet1cd& x,
const Packet1cd& y,
const Packet1cd& c)
const
387 {
return padd(pmul(x,y),c); }
389 EIGEN_STRONG_INLINE Packet1cd pmul(
const Packet1cd& a,
const Packet1cd& b)
const
391 return internal::pmul(pconj(a), b);
395 template<>
struct conj_helper<Packet1cd, Packet1cd, true,true>
397 EIGEN_STRONG_INLINE Packet1cd pmadd(
const Packet1cd& x,
const Packet1cd& y,
const Packet1cd& c)
const
398 {
return padd(pmul(x,y),c); }
400 EIGEN_STRONG_INLINE Packet1cd pmul(
const Packet1cd& a,
const Packet1cd& b)
const
402 return pconj(internal::pmul(a, b));
406 template<> EIGEN_STRONG_INLINE Packet1cd pdiv<Packet1cd>(
const Packet1cd& a,
const Packet1cd& b)
409 Packet1cd res = conj_helper<Packet1cd,Packet1cd,false,true>().pmul(a,b);
410 Packet2d s = vec_madd(b.v, b.v, p2d_ZERO_);
411 return Packet1cd(pdiv(res.v, vec_add(s,vec_perm(s, s, p16uc_REVERSE64))));
414 EIGEN_STRONG_INLINE Packet1cd pcplxflip(
const Packet1cd& x)
416 return Packet1cd(preverse(Packet2d(x.v)));
419 EIGEN_STRONG_INLINE
void ptranspose(PacketBlock<Packet1cd,2>& kernel)
421 Packet2d tmp = vec_perm(kernel.packet[0].v, kernel.packet[1].v, p16uc_TRANSPOSE64_HI);
422 kernel.packet[1].v = vec_perm(kernel.packet[0].v, kernel.packet[1].v, p16uc_TRANSPOSE64_LO);
423 kernel.packet[0].v = tmp;
430 #endif // EIGEN_COMPLEX32_ALTIVEC_H
Definition: Constants.h:230
Definition: StdDeque.h:58
Definition: Eigen_Colamd.h:54