40 #define SUMSUB_BA( a, b ) \ 41 "paddw "#b", "#a" \n\t"\ 42 "paddw "#b", "#b" \n\t"\ 43 "psubw "#a", "#b" \n\t" 51 static inline void cavs_idct8_1d(int16_t *
block, uint64_t bias)
54 "movq 112(%0), %%mm4 \n\t" 55 "movq 16(%0), %%mm5 \n\t" 56 "movq 80(%0), %%mm2 \n\t" 57 "movq 48(%0), %%mm7 \n\t" 58 "movq %%mm4, %%mm0 \n\t" 59 "movq %%mm5, %%mm3 \n\t" 60 "movq %%mm2, %%mm6 \n\t" 61 "movq %%mm7, %%mm1 \n\t" 63 "paddw %%mm4, %%mm4 \n\t" 64 "paddw %%mm3, %%mm3 \n\t" 65 "paddw %%mm6, %%mm6 \n\t" 66 "paddw %%mm1, %%mm1 \n\t" 67 "paddw %%mm4, %%mm0 \n\t" 68 "paddw %%mm3, %%mm5 \n\t" 69 "paddw %%mm6, %%mm2 \n\t" 70 "paddw %%mm1, %%mm7 \n\t" 71 "psubw %%mm4, %%mm5 \n\t" 72 "paddw %%mm6, %%mm7 \n\t" 73 "psubw %%mm2, %%mm1 \n\t" 74 "paddw %%mm0, %%mm3 \n\t" 76 "movq %%mm5, %%mm4 \n\t" 77 "movq %%mm7, %%mm6 \n\t" 78 "movq %%mm3, %%mm0 \n\t" 79 "movq %%mm1, %%mm2 \n\t" 80 SUMSUB_BA( %%mm7, %%mm5 )
81 "paddw %%mm3, %%mm7 \n\t" 82 "paddw %%mm1, %%mm5 \n\t" 83 "paddw %%mm7, %%mm7 \n\t" 84 "paddw %%mm5, %%mm5 \n\t" 85 "paddw %%mm6, %%mm7 \n\t" 86 "paddw %%mm4, %%mm5 \n\t" 88 SUMSUB_BA( %%mm1, %%mm3 )
89 "psubw %%mm1, %%mm4 \n\t" 90 "movq %%mm4, %%mm1 \n\t" 91 "psubw %%mm6, %%mm3 \n\t" 92 "paddw %%mm1, %%mm1 \n\t" 93 "paddw %%mm3, %%mm3 \n\t" 94 "psubw %%mm2, %%mm1 \n\t" 95 "paddw %%mm0, %%mm3 \n\t" 97 "movq 32(%0), %%mm2 \n\t" 98 "movq 96(%0), %%mm6 \n\t" 99 "movq %%mm2, %%mm4 \n\t" 100 "movq %%mm6, %%mm0 \n\t" 101 "psllw $2, %%mm4 \n\t" 102 "psllw $2, %%mm6 \n\t" 103 "paddw %%mm4, %%mm2 \n\t" 104 "paddw %%mm6, %%mm0 \n\t" 105 "paddw %%mm2, %%mm2 \n\t" 106 "paddw %%mm0, %%mm0 \n\t" 107 "psubw %%mm0, %%mm4 \n\t" 108 "paddw %%mm2, %%mm6 \n\t" 110 "movq (%0), %%mm2 \n\t" 111 "movq 64(%0), %%mm0 \n\t" 112 SUMSUB_BA( %%mm0, %%mm2 )
113 "psllw $3, %%mm0 \n\t" 114 "psllw $3, %%mm2 \n\t" 115 "paddw %1, %%mm0 \n\t" 116 "paddw %1, %%mm2 \n\t" 118 SUMSUB_BA( %%mm6, %%mm0 )
119 SUMSUB_BA( %%mm4, %%mm2 )
120 SUMSUB_BA( %%mm7, %%mm6 )
121 SUMSUB_BA( %%mm5, %%mm4 )
122 SUMSUB_BA( %%mm3, %%mm2 )
123 SUMSUB_BA( %%mm1, %%mm0 )
124 :: "
r"(block), "m"(bias)
128 #define SBUTTERFLY(a,b,t,n,m)\ 129 "mov" #m " " #a ", " #t " \n\t" \ 130 "punpckl" #n " " #b ", " #a " \n\t" \ 131 "punpckh" #n " " #b ", " #t " \n\t" \ 133 #define TRANSPOSE4(a,b,c,d,t)\ 134 SBUTTERFLY(a,b,t,wd,q) \ 135 SBUTTERFLY(c,d,b,wd,q) \ 136 SBUTTERFLY(a,c,d,dq,q) \ 137 SBUTTERFLY(t,b,c,dq,q) 139 static void cavs_idct8_add_mmx(
uint8_t *dst, int16_t *block,
int stride)
145 cavs_idct8_1d(block + 4 * i,
ff_pw_4.
a);
148 "psraw $3, %%mm7 \n\t" 149 "psraw $3, %%mm6 \n\t" 150 "psraw $3, %%mm5 \n\t" 151 "psraw $3, %%mm4 \n\t" 152 "psraw $3, %%mm3 \n\t" 153 "psraw $3, %%mm2 \n\t" 154 "psraw $3, %%mm1 \n\t" 155 "psraw $3, %%mm0 \n\t" 156 "movq %%mm7, (%0) \n\t" 157 TRANSPOSE4( %%mm0, %%mm2, %%mm4, %%mm6, %%mm7 )
158 "movq %%mm0, 8(%0) \n\t" 159 "movq %%mm6, 24(%0) \n\t" 160 "movq %%mm7, 40(%0) \n\t" 161 "movq %%mm4, 56(%0) \n\t" 162 "movq (%0), %%mm7 \n\t" 163 TRANSPOSE4( %%mm7, %%mm5, %%mm3, %%mm1, %%mm0 )
164 "movq %%mm7, (%0) \n\t" 165 "movq %%mm1, 16(%0) \n\t" 166 "movq %%mm0, 32(%0) \n\t" 167 "movq %%mm3, 48(%0) \n\t" 178 "psraw $7, %%mm7 \n\t" 179 "psraw $7, %%mm6 \n\t" 180 "psraw $7, %%mm5 \n\t" 181 "psraw $7, %%mm4 \n\t" 182 "psraw $7, %%mm3 \n\t" 183 "psraw $7, %%mm2 \n\t" 184 "psraw $7, %%mm1 \n\t" 185 "psraw $7, %%mm0 \n\t" 186 "movq %%mm7, (%0) \n\t" 187 "movq %%mm5, 16(%0) \n\t" 188 "movq %%mm3, 32(%0) \n\t" 189 "movq %%mm1, 48(%0) \n\t" 190 "movq %%mm0, 64(%0) \n\t" 191 "movq %%mm2, 80(%0) \n\t" 192 "movq %%mm4, 96(%0) \n\t" 193 "movq %%mm6, 112(%0) \n\t" 204 #if (HAVE_MMXEXT_INLINE || HAVE_AMD3DNOW_INLINE) 213 #define QPEL_CAVSV1(A,B,C,D,E,F,OP,MUL2) \ 214 "movd (%0), "#F" \n\t"\ 215 "movq "#C", %%mm6 \n\t"\ 216 "pmullw %5, %%mm6 \n\t"\ 217 "movq "#D", %%mm7 \n\t"\ 218 "pmullw "MANGLE(MUL2)", %%mm7\n\t"\ 219 "psllw $3, "#E" \n\t"\ 220 "psubw "#E", %%mm6 \n\t"\ 221 "psraw $3, "#E" \n\t"\ 222 "paddw %%mm7, %%mm6 \n\t"\ 223 "paddw "#E", %%mm6 \n\t"\ 224 "paddw "#B", "#B" \n\t"\ 225 "pxor %%mm7, %%mm7 \n\t"\ 227 "punpcklbw %%mm7, "#F" \n\t"\ 228 "psubw "#B", %%mm6 \n\t"\ 229 "psraw $1, "#B" \n\t"\ 230 "psubw "#A", %%mm6 \n\t"\ 231 "paddw %4, %%mm6 \n\t"\ 232 "psraw $7, %%mm6 \n\t"\ 233 "packuswb %%mm6, %%mm6 \n\t"\ 234 OP(%%mm6, (%1), A, d) \ 238 #define QPEL_CAVSV2(A,B,C,D,E,F,OP,MUL2) \ 239 "movd (%0), "#F" \n\t"\ 240 "movq "#C", %%mm6 \n\t"\ 241 "paddw "#D", %%mm6 \n\t"\ 242 "pmullw %5, %%mm6 \n\t"\ 244 "punpcklbw %%mm7, "#F" \n\t"\ 245 "psubw "#B", %%mm6 \n\t"\ 246 "psubw "#E", %%mm6 \n\t"\ 247 "paddw %4, %%mm6 \n\t"\ 248 "psraw $3, %%mm6 \n\t"\ 249 "packuswb %%mm6, %%mm6 \n\t"\ 250 OP(%%mm6, (%1), A, d) \ 254 #define QPEL_CAVSV3(A,B,C,D,E,F,OP,MUL2) \ 255 "movd (%0), "#F" \n\t"\ 256 "movq "#C", %%mm6 \n\t"\ 257 "pmullw "MANGLE(MUL2)", %%mm6\n\t"\ 258 "movq "#D", %%mm7 \n\t"\ 259 "pmullw %5, %%mm7 \n\t"\ 260 "psllw $3, "#B" \n\t"\ 261 "psubw "#B", %%mm6 \n\t"\ 262 "psraw $3, "#B" \n\t"\ 263 "paddw %%mm7, %%mm6 \n\t"\ 264 "paddw "#B", %%mm6 \n\t"\ 265 "paddw "#E", "#E" \n\t"\ 266 "pxor %%mm7, %%mm7 \n\t"\ 268 "punpcklbw %%mm7, "#F" \n\t"\ 269 "psubw "#E", %%mm6 \n\t"\ 270 "psraw $1, "#E" \n\t"\ 271 "psubw "#F", %%mm6 \n\t"\ 272 "paddw %4, %%mm6 \n\t"\ 273 "psraw $7, %%mm6 \n\t"\ 274 "packuswb %%mm6, %%mm6 \n\t"\ 275 OP(%%mm6, (%1), A, d) \ 279 #define QPEL_CAVSVNUM(VOP,OP,ADD,MUL1,MUL2)\ 285 "pxor %%mm7, %%mm7 \n\t"\ 286 "movd (%0), %%mm0 \n\t"\ 288 "movd (%0), %%mm1 \n\t"\ 290 "movd (%0), %%mm2 \n\t"\ 292 "movd (%0), %%mm3 \n\t"\ 294 "movd (%0), %%mm4 \n\t"\ 296 "punpcklbw %%mm7, %%mm0 \n\t"\ 297 "punpcklbw %%mm7, %%mm1 \n\t"\ 298 "punpcklbw %%mm7, %%mm2 \n\t"\ 299 "punpcklbw %%mm7, %%mm3 \n\t"\ 300 "punpcklbw %%mm7, %%mm4 \n\t"\ 301 VOP(%%mm0, %%mm1, %%mm2, %%mm3, %%mm4, %%mm5, OP, MUL2)\ 302 VOP(%%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm0, OP, MUL2)\ 303 VOP(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, OP, MUL2)\ 304 VOP(%%mm3, %%mm4, %%mm5, %%mm0, %%mm1, %%mm2, OP, MUL2)\ 305 VOP(%%mm4, %%mm5, %%mm0, %%mm1, %%mm2, %%mm3, OP, MUL2)\ 306 VOP(%%mm5, %%mm0, %%mm1, %%mm2, %%mm3, %%mm4, OP, MUL2)\ 307 VOP(%%mm0, %%mm1, %%mm2, %%mm3, %%mm4, %%mm5, OP, MUL2)\ 308 VOP(%%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm0, OP, MUL2)\ 310 : "+a"(src), "+c"(dst)\ 311 : "S"((x86_reg)srcStride), "r"((x86_reg)dstStride), "m"(ADD), "m"(MUL1)\ 316 VOP(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, OP, MUL2)\ 317 VOP(%%mm3, %%mm4, %%mm5, %%mm0, %%mm1, %%mm2, OP, MUL2)\ 318 VOP(%%mm4, %%mm5, %%mm0, %%mm1, %%mm2, %%mm3, OP, MUL2)\ 319 VOP(%%mm5, %%mm0, %%mm1, %%mm2, %%mm3, %%mm4, OP, MUL2)\ 320 VOP(%%mm0, %%mm1, %%mm2, %%mm3, %%mm4, %%mm5, OP, MUL2)\ 321 VOP(%%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm0, OP, MUL2)\ 322 VOP(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, OP, MUL2)\ 323 VOP(%%mm3, %%mm4, %%mm5, %%mm0, %%mm1, %%mm2, OP, MUL2)\ 325 : "+a"(src), "+c"(dst)\ 326 : "S"((x86_reg)srcStride), "r"((x86_reg)dstStride), "m"(ADD), "m"(MUL1)\ 330 src += 4-(h+5)*srcStride;\ 331 dst += 4-h*dstStride;\ 334 #define QPEL_CAVS(OPNAME, OP, MMX)\ 335 static void OPNAME ## cavs_qpel8_h_ ## MMX(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride){\ 338 "pxor %%mm7, %%mm7 \n\t"\ 339 "movq %5, %%mm6 \n\t"\ 341 "movq (%0), %%mm0 \n\t"\ 342 "movq 1(%0), %%mm2 \n\t"\ 343 "movq %%mm0, %%mm1 \n\t"\ 344 "movq %%mm2, %%mm3 \n\t"\ 345 "punpcklbw %%mm7, %%mm0 \n\t"\ 346 "punpckhbw %%mm7, %%mm1 \n\t"\ 347 "punpcklbw %%mm7, %%mm2 \n\t"\ 348 "punpckhbw %%mm7, %%mm3 \n\t"\ 349 "paddw %%mm2, %%mm0 \n\t"\ 350 "paddw %%mm3, %%mm1 \n\t"\ 351 "pmullw %%mm6, %%mm0 \n\t"\ 352 "pmullw %%mm6, %%mm1 \n\t"\ 353 "movq -1(%0), %%mm2 \n\t"\ 354 "movq 2(%0), %%mm4 \n\t"\ 355 "movq %%mm2, %%mm3 \n\t"\ 356 "movq %%mm4, %%mm5 \n\t"\ 357 "punpcklbw %%mm7, %%mm2 \n\t"\ 358 "punpckhbw %%mm7, %%mm3 \n\t"\ 359 "punpcklbw %%mm7, %%mm4 \n\t"\ 360 "punpckhbw %%mm7, %%mm5 \n\t"\ 361 "paddw %%mm4, %%mm2 \n\t"\ 362 "paddw %%mm3, %%mm5 \n\t"\ 363 "psubw %%mm2, %%mm0 \n\t"\ 364 "psubw %%mm5, %%mm1 \n\t"\ 365 "movq %6, %%mm5 \n\t"\ 366 "paddw %%mm5, %%mm0 \n\t"\ 367 "paddw %%mm5, %%mm1 \n\t"\ 368 "psraw $3, %%mm0 \n\t"\ 369 "psraw $3, %%mm1 \n\t"\ 370 "packuswb %%mm1, %%mm0 \n\t"\ 371 OP(%%mm0, (%1),%%mm5, q) \ 376 : "+a"(src), "+c"(dst), "+m"(h)\ 377 : "d"((x86_reg)srcStride), "S"((x86_reg)dstStride), "m"(ff_pw_5), "m"(ff_pw_4)\ 382 static inline void OPNAME ## cavs_qpel8or16_v1_ ## MMX(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride, int h){\ 383 QPEL_CAVSVNUM(QPEL_CAVSV1,OP,ff_pw_64,ff_pw_96,ff_pw_42) \ 386 static inline void OPNAME ## cavs_qpel8or16_v2_ ## MMX(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride, int h){\ 387 QPEL_CAVSVNUM(QPEL_CAVSV2,OP,ff_pw_4,ff_pw_5,ff_pw_5) \ 390 static inline void OPNAME ## cavs_qpel8or16_v3_ ## MMX(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride, int h){\ 391 QPEL_CAVSVNUM(QPEL_CAVSV3,OP,ff_pw_64,ff_pw_96,ff_pw_42) \ 394 static void OPNAME ## cavs_qpel8_v1_ ## MMX(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride){\ 395 OPNAME ## cavs_qpel8or16_v1_ ## MMX(dst , src , dstStride, srcStride, 8);\ 397 static void OPNAME ## cavs_qpel16_v1_ ## MMX(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride){\ 398 OPNAME ## cavs_qpel8or16_v1_ ## MMX(dst , src , dstStride, srcStride, 16);\ 399 OPNAME ## cavs_qpel8or16_v1_ ## MMX(dst+8, src+8, dstStride, srcStride, 16);\ 402 static void OPNAME ## cavs_qpel8_v2_ ## MMX(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride){\ 403 OPNAME ## cavs_qpel8or16_v2_ ## MMX(dst , src , dstStride, srcStride, 8);\ 405 static void OPNAME ## cavs_qpel16_v2_ ## MMX(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride){\ 406 OPNAME ## cavs_qpel8or16_v2_ ## MMX(dst , src , dstStride, srcStride, 16);\ 407 OPNAME ## cavs_qpel8or16_v2_ ## MMX(dst+8, src+8, dstStride, srcStride, 16);\ 410 static void OPNAME ## cavs_qpel8_v3_ ## MMX(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride){\ 411 OPNAME ## cavs_qpel8or16_v3_ ## MMX(dst , src , dstStride, srcStride, 8);\ 413 static void OPNAME ## cavs_qpel16_v3_ ## MMX(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride){\ 414 OPNAME ## cavs_qpel8or16_v3_ ## MMX(dst , src , dstStride, srcStride, 16);\ 415 OPNAME ## cavs_qpel8or16_v3_ ## MMX(dst+8, src+8, dstStride, srcStride, 16);\ 418 static void OPNAME ## cavs_qpel16_h_ ## MMX(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride){\ 419 OPNAME ## cavs_qpel8_h_ ## MMX(dst , src , dstStride, srcStride);\ 420 OPNAME ## cavs_qpel8_h_ ## MMX(dst+8, src+8, dstStride, srcStride);\ 423 OPNAME ## cavs_qpel8_h_ ## MMX(dst , src , dstStride, srcStride);\ 424 OPNAME ## cavs_qpel8_h_ ## MMX(dst+8, src+8, dstStride, srcStride);\ 427 #define CAVS_MC(OPNAME, SIZE, MMX) \ 428 static void OPNAME ## cavs_qpel ## SIZE ## _mc20_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\ 430 OPNAME ## cavs_qpel ## SIZE ## _h_ ## MMX(dst, src, stride, stride);\ 433 static void OPNAME ## cavs_qpel ## SIZE ## _mc01_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\ 435 OPNAME ## cavs_qpel ## SIZE ## _v1_ ## MMX(dst, src, stride, stride);\ 438 static void OPNAME ## cavs_qpel ## SIZE ## _mc02_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\ 440 OPNAME ## cavs_qpel ## SIZE ## _v2_ ## MMX(dst, src, stride, stride);\ 443 static void OPNAME ## cavs_qpel ## SIZE ## _mc03_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\ 445 OPNAME ## cavs_qpel ## SIZE ## _v3_ ## MMX(dst, src, stride, stride);\ 448 #define PUT_OP(a,b,temp, size) "mov" #size " " #a ", " #b " \n\t" 449 #define AVG_3DNOW_OP(a,b,temp, size) \ 450 "mov" #size " " #b ", " #temp " \n\t"\ 451 "pavgusb " #temp ", " #a " \n\t"\ 452 "mov" #size " " #a ", " #b " \n\t" 453 #define AVG_MMXEXT_OP(a, b, temp, size) \ 454 "mov" #size " " #b ", " #temp " \n\t"\ 455 "pavgb " #temp ", " #a " \n\t"\ 456 "mov" #size " " #a ", " #b " \n\t" 461 static void put_cavs_qpel8_mc00_mmx(
uint8_t *dst,
const uint8_t *src,
467 static void avg_cavs_qpel8_mc00_mmx(
uint8_t *dst,
const uint8_t *src,
473 static void put_cavs_qpel16_mc00_mmx(
uint8_t *dst,
const uint8_t *src,
479 static void avg_cavs_qpel16_mc00_mmx(
uint8_t *dst,
const uint8_t *src,
498 #define DSPFUNC(PFX, IDX, NUM, EXT) \ 499 c->PFX ## _cavs_qpel_pixels_tab[IDX][ 2] = PFX ## _cavs_qpel ## NUM ## _mc20_ ## EXT; \ 500 c->PFX ## _cavs_qpel_pixels_tab[IDX][ 4] = PFX ## _cavs_qpel ## NUM ## _mc01_ ## EXT; \ 501 c->PFX ## _cavs_qpel_pixels_tab[IDX][ 8] = PFX ## _cavs_qpel ## NUM ## _mc02_ ## EXT; \ 502 c->PFX ## _cavs_qpel_pixels_tab[IDX][12] = PFX ## _cavs_qpel ## NUM ## _mc03_ ## EXT; \ 504 #if HAVE_MMXEXT_INLINE 505 QPEL_CAVS(put_, PUT_OP, mmxext)
506 QPEL_CAVS(avg_, AVG_MMXEXT_OP, mmxext)
523 #if HAVE_AMD3DNOW_INLINE 524 QPEL_CAVS(put_, PUT_OP, 3dnow)
525 QPEL_CAVS(avg_, AVG_3DNOW_OP, 3dnow)
548 cavsdsp_init_mmx(c, avctx);
550 #if HAVE_AMD3DNOW_INLINE 552 cavsdsp_init_3dnow(c, avctx);
554 #if HAVE_MMXEXT_INLINE 556 cavsdsp_init_mmxext(c, avctx);
void ff_put_pixels8_mmx(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
#define DECLARE_ALIGNED(n, t, v)
Macro definitions for various function/variable attributes.
#define DSPFUNC(PFX, IDX, NUM, EXT)
#define INLINE_MMX(flags)
qpel_mc_func put_cavs_qpel_pixels_tab[2][16]
#define CAVS_MC(OPNAME, SIZE)
#define INLINE_AMD3DNOW(flags)
main external API structure.
qpel_mc_func avg_cavs_qpel_pixels_tab[2][16]
av_cold void ff_cavsdsp_init_x86(CAVSDSPContext *c, AVCodecContext *avctx)
int av_get_cpu_flags(void)
Return the flags which specify extensions supported by the CPU.
void(* cavs_idct8_add)(uint8_t *dst, int16_t *block, int stride)
#define INLINE_MMXEXT(flags)
void ff_avg_pixels8_mmx(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
common internal and external API header
void ff_avg_pixels16_mmx(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
void ff_add_pixels_clamped_mmx(const int16_t *block, uint8_t *pixels, int line_size)
void ff_put_pixels16_mmx(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)