40 #define OP_AVG(S,D) "pavgb " #S ", " #D " \n\t" 43 #define NORMALIZE_MMX(SHIFT) \ 44 "paddw %%mm7, %%mm3 \n\t" \ 45 "paddw %%mm7, %%mm4 \n\t" \ 46 "psraw "SHIFT", %%mm3 \n\t" \ 47 "psraw "SHIFT", %%mm4 \n\t" 49 #define TRANSFER_DO_PACK(OP) \ 50 "packuswb %%mm4, %%mm3 \n\t" \ 52 "movq %%mm3, (%2) \n\t" 54 #define TRANSFER_DONT_PACK(OP) \ 57 "movq %%mm3, 0(%2) \n\t" \ 58 "movq %%mm4, 8(%2) \n\t" 61 #define DO_UNPACK(reg) "punpcklbw %%mm0, " reg "\n\t" 62 #define DONT_UNPACK(reg) 65 #define LOAD_ROUNDER_MMX(ROUND) \ 66 "movd "ROUND", %%mm7 \n\t" \ 67 "punpcklwd %%mm7, %%mm7 \n\t" \ 68 "punpckldq %%mm7, %%mm7 \n\t" 70 #define SHIFT2_LINE(OFF, R0,R1,R2,R3) \ 71 "paddw %%mm"#R2", %%mm"#R1" \n\t" \ 72 "movd (%0,%3), %%mm"#R0" \n\t" \ 73 "pmullw %%mm6, %%mm"#R1" \n\t" \ 74 "punpcklbw %%mm0, %%mm"#R0" \n\t" \ 75 "movd (%0,%2), %%mm"#R3" \n\t" \ 76 "psubw %%mm"#R0", %%mm"#R1" \n\t" \ 77 "punpcklbw %%mm0, %%mm"#R3" \n\t" \ 78 "paddw %%mm7, %%mm"#R1" \n\t" \ 79 "psubw %%mm"#R3", %%mm"#R1" \n\t" \ 80 "psraw %4, %%mm"#R1" \n\t" \ 81 "movq %%mm"#R1", "#OFF"(%1) \n\t" \ 85 static void vc1_put_ver_16b_shift2_mmx(int16_t *dst,
87 int rnd, int64_t shift)
90 "mov $3, %%"REG_c
" \n\t" 91 LOAD_ROUNDER_MMX(
"%5")
94 "movd (%0), %%mm2 \n\t" 96 "movd (%0), %%mm3 \n\t" 97 "punpcklbw %%mm0, %%mm2 \n\t" 98 "punpcklbw %%mm0, %%mm3 \n\t" 99 SHIFT2_LINE( 0, 1, 2, 3, 4)
100 SHIFT2_LINE( 24, 2, 3, 4, 1)
101 SHIFT2_LINE( 48, 3, 4, 1, 2)
102 SHIFT2_LINE( 72, 4, 1, 2, 3)
103 SHIFT2_LINE( 96, 1, 2, 3, 4)
104 SHIFT2_LINE(120, 2, 3, 4, 1)
105 SHIFT2_LINE(144, 3, 4, 1, 2)
106 SHIFT2_LINE(168, 4, 1, 2, 3)
111 : "+
r"(src), "+
r"(dst)
112 : "
r"(stride), "
r"(-2*stride),
113 "m"(shift), "m"(rnd), "
r"(9*stride-4)
122 #define VC1_HOR_16b_SHIFT2(OP, OPNAME)\ 123 static void OPNAME ## vc1_hor_16b_shift2_mmx(uint8_t *dst, x86_reg stride,\ 124 const int16_t *src, int rnd)\ 129 rnd -= (-1+9+9-1)*1024; \ 131 LOAD_ROUNDER_MMX("%4")\ 132 "movq "MANGLE(ff_pw_128)", %%mm6\n\t"\ 133 "movq "MANGLE(ff_pw_9)", %%mm5 \n\t"\ 135 "movq 2*0+0(%1), %%mm1 \n\t"\ 136 "movq 2*0+8(%1), %%mm2 \n\t"\ 137 "movq 2*1+0(%1), %%mm3 \n\t"\ 138 "movq 2*1+8(%1), %%mm4 \n\t"\ 139 "paddw 2*3+0(%1), %%mm1 \n\t"\ 140 "paddw 2*3+8(%1), %%mm2 \n\t"\ 141 "paddw 2*2+0(%1), %%mm3 \n\t"\ 142 "paddw 2*2+8(%1), %%mm4 \n\t"\ 143 "pmullw %%mm5, %%mm3 \n\t"\ 144 "pmullw %%mm5, %%mm4 \n\t"\ 145 "psubw %%mm1, %%mm3 \n\t"\ 146 "psubw %%mm2, %%mm4 \n\t"\ 149 "paddw %%mm6, %%mm3 \n\t"\ 150 "paddw %%mm6, %%mm4 \n\t"\ 151 TRANSFER_DO_PACK(OP)\ 156 : "+r"(h), "+r" (src), "+r" (dst)\ 157 : "r"(stride), "m"(rnd)\ 162 VC1_HOR_16b_SHIFT2(
OP_PUT, put_)
163 VC1_HOR_16b_SHIFT2(
OP_AVG, avg_)
170 #define VC1_SHIFT2(OP, OPNAME)\ 171 static void OPNAME ## vc1_shift2_mmx(uint8_t *dst, const uint8_t *src,\ 172 x86_reg stride, int rnd, x86_reg offset)\ 176 "mov $8, %%"REG_c" \n\t"\ 177 LOAD_ROUNDER_MMX("%5")\ 178 "movq "MANGLE(ff_pw_9)", %%mm6\n\t"\ 180 "movd 0(%0 ), %%mm3 \n\t"\ 181 "movd 4(%0 ), %%mm4 \n\t"\ 182 "movd 0(%0,%2), %%mm1 \n\t"\ 183 "movd 4(%0,%2), %%mm2 \n\t"\ 185 "punpcklbw %%mm0, %%mm3 \n\t"\ 186 "punpcklbw %%mm0, %%mm4 \n\t"\ 187 "punpcklbw %%mm0, %%mm1 \n\t"\ 188 "punpcklbw %%mm0, %%mm2 \n\t"\ 189 "paddw %%mm1, %%mm3 \n\t"\ 190 "paddw %%mm2, %%mm4 \n\t"\ 191 "movd 0(%0,%3), %%mm1 \n\t"\ 192 "movd 4(%0,%3), %%mm2 \n\t"\ 193 "pmullw %%mm6, %%mm3 \n\t" \ 194 "pmullw %%mm6, %%mm4 \n\t" \ 195 "punpcklbw %%mm0, %%mm1 \n\t"\ 196 "punpcklbw %%mm0, %%mm2 \n\t"\ 197 "psubw %%mm1, %%mm3 \n\t" \ 198 "psubw %%mm2, %%mm4 \n\t" \ 199 "movd 0(%0,%2), %%mm1 \n\t"\ 200 "movd 4(%0,%2), %%mm2 \n\t"\ 201 "punpcklbw %%mm0, %%mm1 \n\t"\ 202 "punpcklbw %%mm0, %%mm2 \n\t"\ 203 "psubw %%mm1, %%mm3 \n\t" \ 204 "psubw %%mm2, %%mm4 \n\t" \ 206 "packuswb %%mm4, %%mm3 \n\t"\ 208 "movq %%mm3, (%1) \n\t"\ 211 "dec %%"REG_c" \n\t"\ 213 : "+r"(src), "+r"(dst)\ 214 : "r"(offset), "r"(-2*offset), "g"(stride), "m"(rnd),\ 216 : "%"REG_c, "memory"\ 221 VC1_SHIFT2(OP_AVG, avg_)
233 #define MSPEL_FILTER13_CORE(UNPACK, MOVQ, A1, A2, A3, A4) \ 234 MOVQ "*0+"A1", %%mm1 \n\t" \ 235 MOVQ "*4+"A1", %%mm2 \n\t" \ 238 "pmullw "MANGLE(ff_pw_3)", %%mm1\n\t" \ 239 "pmullw "MANGLE(ff_pw_3)", %%mm2\n\t" \ 240 MOVQ "*0+"A2", %%mm3 \n\t" \ 241 MOVQ "*4+"A2", %%mm4 \n\t" \ 244 "pmullw %%mm6, %%mm3 \n\t" \ 245 "pmullw %%mm6, %%mm4 \n\t" \ 246 "psubw %%mm1, %%mm3 \n\t" \ 247 "psubw %%mm2, %%mm4 \n\t" \ 248 MOVQ "*0+"A4", %%mm1 \n\t" \ 249 MOVQ "*4+"A4", %%mm2 \n\t" \ 252 "psllw $2, %%mm1 \n\t" \ 253 "psllw $2, %%mm2 \n\t" \ 254 "psubw %%mm1, %%mm3 \n\t" \ 255 "psubw %%mm2, %%mm4 \n\t" \ 256 MOVQ "*0+"A3", %%mm1 \n\t" \ 257 MOVQ "*4+"A3", %%mm2 \n\t" \ 260 "pmullw %%mm5, %%mm1 \n\t" \ 261 "pmullw %%mm5, %%mm2 \n\t" \ 262 "paddw %%mm1, %%mm3 \n\t" \ 263 "paddw %%mm2, %%mm4 \n\t" 273 #define MSPEL_FILTER13_VER_16B(NAME, A1, A2, A3, A4) \ 275 vc1_put_ver_16b_ ## NAME ## _mmx(int16_t *dst, const uint8_t *src, \ 276 x86_reg src_stride, \ 277 int rnd, int64_t shift) \ 282 LOAD_ROUNDER_MMX("%5") \ 283 "movq "MANGLE(ff_pw_53)", %%mm5\n\t" \ 284 "movq "MANGLE(ff_pw_18)", %%mm6\n\t" \ 287 MSPEL_FILTER13_CORE(DO_UNPACK, "movd 1", A1, A2, A3, A4) \ 288 NORMALIZE_MMX("%6") \ 289 TRANSFER_DONT_PACK(OP_PUT) \ 291 "movd 8+"A1", %%mm1 \n\t" \ 293 "movq %%mm1, %%mm3 \n\t" \ 294 "paddw %%mm1, %%mm1 \n\t" \ 295 "paddw %%mm3, %%mm1 \n\t" \ 296 "movd 8+"A2", %%mm3 \n\t" \ 298 "pmullw %%mm6, %%mm3 \n\t" \ 299 "psubw %%mm1, %%mm3 \n\t" \ 300 "movd 8+"A3", %%mm1 \n\t" \ 302 "pmullw %%mm5, %%mm1 \n\t" \ 303 "paddw %%mm1, %%mm3 \n\t" \ 304 "movd 8+"A4", %%mm1 \n\t" \ 306 "psllw $2, %%mm1 \n\t" \ 307 "psubw %%mm1, %%mm3 \n\t" \ 308 "paddw %%mm7, %%mm3 \n\t" \ 309 "psraw %6, %%mm3 \n\t" \ 310 "movq %%mm3, 16(%2) \n\t" \ 315 : "+r"(h), "+r" (src), "+r" (dst) \ 316 : "r"(src_stride), "r"(3*src_stride), \ 317 "m"(rnd), "m"(shift) \ 329 #define MSPEL_FILTER13_HOR_16B(NAME, A1, A2, A3, A4, OP, OPNAME) \ 331 OPNAME ## vc1_hor_16b_ ## NAME ## _mmx(uint8_t *dst, x86_reg stride, \ 332 const int16_t *src, int rnd) \ 336 rnd -= (-4+58+13-3)*256; \ 338 LOAD_ROUNDER_MMX("%4") \ 339 "movq "MANGLE(ff_pw_18)", %%mm6 \n\t" \ 340 "movq "MANGLE(ff_pw_53)", %%mm5 \n\t" \ 343 MSPEL_FILTER13_CORE(DONT_UNPACK, "movq 2", A1, A2, A3, A4) \ 344 NORMALIZE_MMX("$7") \ 346 "paddw "MANGLE(ff_pw_128)", %%mm3 \n\t" \ 347 "paddw "MANGLE(ff_pw_128)", %%mm4 \n\t" \ 348 TRANSFER_DO_PACK(OP) \ 353 : "+r"(h), "+r" (src), "+r" (dst) \ 354 : "r"(stride), "m"(rnd) \ 367 #define MSPEL_FILTER13_8B(NAME, A1, A2, A3, A4, OP, OPNAME) \ 369 OPNAME ## vc1_## NAME ## _mmx(uint8_t *dst, const uint8_t *src, \ 370 x86_reg stride, int rnd, x86_reg offset) \ 376 LOAD_ROUNDER_MMX("%6") \ 377 "movq "MANGLE(ff_pw_53)", %%mm5 \n\t" \ 378 "movq "MANGLE(ff_pw_18)", %%mm6 \n\t" \ 381 MSPEL_FILTER13_CORE(DO_UNPACK, "movd 1", A1, A2, A3, A4) \ 382 NORMALIZE_MMX("$6") \ 383 TRANSFER_DO_PACK(OP) \ 388 : "+r"(h), "+r" (src), "+r" (dst) \ 389 : "r"(offset), "r"(3*offset), "g"(stride), "m"(rnd) \ 395 MSPEL_FILTER13_8B (
shift1,
"0(%1,%4 )",
"0(%1,%3,2)",
"0(%1,%3 )",
"0(%1 )",
OP_PUT, put_)
396 MSPEL_FILTER13_8B (
shift1, "0(%1,%4 )", "0(%1,%3,2)", "0(%1,%3 )", "0(%1 )", OP_AVG, avg_)
397 MSPEL_FILTER13_VER_16B(shift1, "0(%1,%4 )", "0(%1,%3,2)", "0(%1,%3 )", "0(%1 )")
398 MSPEL_FILTER13_HOR_16B(shift1, "2*3(%1)", "2*2(%1)", "2*1(%1)", "2*0(%1)",
OP_PUT, put_)
399 MSPEL_FILTER13_HOR_16B(shift1, "2*3(%1)", "2*2(%1)", "2*1(%1)", "2*0(%1)", OP_AVG, avg_)
402 MSPEL_FILTER13_8B (shift3, "0(%1 )", "0(%1,%3 )", "0(%1,%3,2)", "0(%1,%4 )", OP_PUT, put_)
403 MSPEL_FILTER13_8B (shift3, "0(%1 )", "0(%1,%3 )", "0(%1,%3,2)", "0(%1,%4 )", OP_AVG, avg_)
404 MSPEL_FILTER13_VER_16B(shift3, "0(%1 )", "0(%1,%3 )", "0(%1,%3,2)", "0(%1,%4 )")
405 MSPEL_FILTER13_HOR_16B(shift3, "2*0(%1)", "2*1(%1)", "2*2(%1)", "2*3(%1)", OP_PUT, put_)
406 MSPEL_FILTER13_HOR_16B(shift3, "2*0(%1)", "2*1(%1)", "2*2(%1)", "2*3(%1)", OP_AVG, avg_)
408 typedef
void (*vc1_mspel_mc_filter_ver_16bits)(int16_t *dst, const
uint8_t *src,
x86_reg src_stride,
int rnd, int64_t shift);
409 typedef
void (*vc1_mspel_mc_filter_hor_16bits)(
uint8_t *dst,
x86_reg dst_stride, const int16_t *src,
int rnd);
423 #define VC1_MSPEL_MC(OP)\ 424 static void OP ## vc1_mspel_mc(uint8_t *dst, const uint8_t *src, int stride,\ 425 int hmode, int vmode, int rnd)\ 427 static const vc1_mspel_mc_filter_ver_16bits vc1_put_shift_ver_16bits[] =\ 428 { NULL, vc1_put_ver_16b_shift1_mmx, vc1_put_ver_16b_shift2_mmx, vc1_put_ver_16b_shift3_mmx };\ 429 static const vc1_mspel_mc_filter_hor_16bits vc1_put_shift_hor_16bits[] =\ 430 { NULL, OP ## vc1_hor_16b_shift1_mmx, OP ## vc1_hor_16b_shift2_mmx, OP ## vc1_hor_16b_shift3_mmx };\ 431 static const vc1_mspel_mc_filter_8bits vc1_put_shift_8bits[] =\ 432 { NULL, OP ## vc1_shift1_mmx, OP ## vc1_shift2_mmx, OP ## vc1_shift3_mmx };\ 435 "pxor %%mm0, %%mm0 \n\t"\ 441 static const int shift_value[] = { 0, 5, 1, 5 };\ 442 int shift = (shift_value[hmode]+shift_value[vmode])>>1;\ 444 DECLARE_ALIGNED(16, int16_t, tmp)[12*8];\ 446 r = (1<<(shift-1)) + rnd-1;\ 447 vc1_put_shift_ver_16bits[vmode](tmp, src-1, stride, r, shift);\ 449 vc1_put_shift_hor_16bits[hmode](dst, stride, tmp+1, 64-rnd);\ 453 vc1_put_shift_8bits[vmode](dst, src, stride, 1-rnd, stride);\ 459 vc1_put_shift_8bits[hmode](dst, src, stride, rnd, 1);\ 466 #define DECLARE_FUNCTION(a, b) \ 467 static void put_vc1_mspel_mc ## a ## b ## _mmx(uint8_t *dst, \ 468 const uint8_t *src, \ 472 put_vc1_mspel_mc(dst, src, stride, a, b, rnd); \ 474 static void avg_vc1_mspel_mc ## a ## b ## _mmxext(uint8_t *dst, \ 475 const uint8_t *src, \ 479 avg_vc1_mspel_mc(dst, src, stride, a, b, rnd); \ 482 DECLARE_FUNCTION(0, 1)
483 DECLARE_FUNCTION(0, 2)
484 DECLARE_FUNCTION(0, 3)
486 DECLARE_FUNCTION(1, 0)
487 DECLARE_FUNCTION(1, 1)
488 DECLARE_FUNCTION(1, 2)
489 DECLARE_FUNCTION(1, 3)
491 DECLARE_FUNCTION(2, 0)
492 DECLARE_FUNCTION(2, 1)
493 DECLARE_FUNCTION(2, 2)
494 DECLARE_FUNCTION(2, 3)
496 DECLARE_FUNCTION(3, 0)
497 DECLARE_FUNCTION(3, 1)
498 DECLARE_FUNCTION(3, 2)
499 DECLARE_FUNCTION(3, 3)
501 static
void vc1_inv_trans_4x4_dc_mmxext(
uint8_t *dest,
int linesize,
505 dc = (17 * dc + 4) >> 3;
506 dc = (17 * dc + 64) >> 7;
508 "movd %0, %%mm0 \n\t" 509 "pshufw $0, %%mm0, %%mm0 \n\t" 510 "pxor %%mm1, %%mm1 \n\t" 511 "psubw %%mm0, %%mm1 \n\t" 512 "packuswb %%mm0, %%mm0 \n\t" 513 "packuswb %%mm1, %%mm1 \n\t" 517 "movd %0, %%mm2 \n\t" 518 "movd %1, %%mm3 \n\t" 519 "movd %2, %%mm4 \n\t" 520 "movd %3, %%mm5 \n\t" 521 "paddusb %%mm0, %%mm2 \n\t" 522 "paddusb %%mm0, %%mm3 \n\t" 523 "paddusb %%mm0, %%mm4 \n\t" 524 "paddusb %%mm0, %%mm5 \n\t" 525 "psubusb %%mm1, %%mm2 \n\t" 526 "psubusb %%mm1, %%mm3 \n\t" 527 "psubusb %%mm1, %%mm4 \n\t" 528 "psubusb %%mm1, %%mm5 \n\t" 529 "movd %%mm2, %0 \n\t" 530 "movd %%mm3, %1 \n\t" 531 "movd %%mm4, %2 \n\t" 532 "movd %%mm5, %3 \n\t" 533 :
"+m"(*(uint32_t*)(dest+0*linesize)),
534 "+m"(*(uint32_t*)(dest+1*linesize)),
535 "+m"(*(uint32_t*)(dest+2*linesize)),
536 "+m"(*(uint32_t*)(dest+3*linesize))
540 static void vc1_inv_trans_4x8_dc_mmxext(
uint8_t *dest,
int linesize,
544 dc = (17 * dc + 4) >> 3;
545 dc = (12 * dc + 64) >> 7;
547 "movd %0, %%mm0 \n\t" 548 "pshufw $0, %%mm0, %%mm0 \n\t" 549 "pxor %%mm1, %%mm1 \n\t" 550 "psubw %%mm0, %%mm1 \n\t" 551 "packuswb %%mm0, %%mm0 \n\t" 552 "packuswb %%mm1, %%mm1 \n\t" 556 "movd %0, %%mm2 \n\t" 557 "movd %1, %%mm3 \n\t" 558 "movd %2, %%mm4 \n\t" 559 "movd %3, %%mm5 \n\t" 560 "paddusb %%mm0, %%mm2 \n\t" 561 "paddusb %%mm0, %%mm3 \n\t" 562 "paddusb %%mm0, %%mm4 \n\t" 563 "paddusb %%mm0, %%mm5 \n\t" 564 "psubusb %%mm1, %%mm2 \n\t" 565 "psubusb %%mm1, %%mm3 \n\t" 566 "psubusb %%mm1, %%mm4 \n\t" 567 "psubusb %%mm1, %%mm5 \n\t" 568 "movd %%mm2, %0 \n\t" 569 "movd %%mm3, %1 \n\t" 570 "movd %%mm4, %2 \n\t" 571 "movd %%mm5, %3 \n\t" 572 :
"+m"(*(uint32_t*)(dest+0*linesize)),
573 "+m"(*(uint32_t*)(dest+1*linesize)),
574 "+m"(*(uint32_t*)(dest+2*linesize)),
575 "+m"(*(uint32_t*)(dest+3*linesize))
579 "movd %0, %%mm2 \n\t" 580 "movd %1, %%mm3 \n\t" 581 "movd %2, %%mm4 \n\t" 582 "movd %3, %%mm5 \n\t" 583 "paddusb %%mm0, %%mm2 \n\t" 584 "paddusb %%mm0, %%mm3 \n\t" 585 "paddusb %%mm0, %%mm4 \n\t" 586 "paddusb %%mm0, %%mm5 \n\t" 587 "psubusb %%mm1, %%mm2 \n\t" 588 "psubusb %%mm1, %%mm3 \n\t" 589 "psubusb %%mm1, %%mm4 \n\t" 590 "psubusb %%mm1, %%mm5 \n\t" 591 "movd %%mm2, %0 \n\t" 592 "movd %%mm3, %1 \n\t" 593 "movd %%mm4, %2 \n\t" 594 "movd %%mm5, %3 \n\t" 595 :
"+m"(*(uint32_t*)(dest+0*linesize)),
596 "+m"(*(uint32_t*)(dest+1*linesize)),
597 "+m"(*(uint32_t*)(dest+2*linesize)),
598 "+m"(*(uint32_t*)(dest+3*linesize))
602 static void vc1_inv_trans_8x4_dc_mmxext(
uint8_t *dest,
int linesize,
606 dc = ( 3 * dc + 1) >> 1;
607 dc = (17 * dc + 64) >> 7;
609 "movd %0, %%mm0 \n\t" 610 "pshufw $0, %%mm0, %%mm0 \n\t" 611 "pxor %%mm1, %%mm1 \n\t" 612 "psubw %%mm0, %%mm1 \n\t" 613 "packuswb %%mm0, %%mm0 \n\t" 614 "packuswb %%mm1, %%mm1 \n\t" 618 "movq %0, %%mm2 \n\t" 619 "movq %1, %%mm3 \n\t" 620 "movq %2, %%mm4 \n\t" 621 "movq %3, %%mm5 \n\t" 622 "paddusb %%mm0, %%mm2 \n\t" 623 "paddusb %%mm0, %%mm3 \n\t" 624 "paddusb %%mm0, %%mm4 \n\t" 625 "paddusb %%mm0, %%mm5 \n\t" 626 "psubusb %%mm1, %%mm2 \n\t" 627 "psubusb %%mm1, %%mm3 \n\t" 628 "psubusb %%mm1, %%mm4 \n\t" 629 "psubusb %%mm1, %%mm5 \n\t" 630 "movq %%mm2, %0 \n\t" 631 "movq %%mm3, %1 \n\t" 632 "movq %%mm4, %2 \n\t" 633 "movq %%mm5, %3 \n\t" 634 :
"+m"(*(uint32_t*)(dest+0*linesize)),
635 "+m"(*(uint32_t*)(dest+1*linesize)),
636 "+m"(*(uint32_t*)(dest+2*linesize)),
637 "+m"(*(uint32_t*)(dest+3*linesize))
641 static void vc1_inv_trans_8x8_dc_mmxext(
uint8_t *dest,
int linesize,
645 dc = (3 * dc + 1) >> 1;
646 dc = (3 * dc + 16) >> 5;
648 "movd %0, %%mm0 \n\t" 649 "pshufw $0, %%mm0, %%mm0 \n\t" 650 "pxor %%mm1, %%mm1 \n\t" 651 "psubw %%mm0, %%mm1 \n\t" 652 "packuswb %%mm0, %%mm0 \n\t" 653 "packuswb %%mm1, %%mm1 \n\t" 657 "movq %0, %%mm2 \n\t" 658 "movq %1, %%mm3 \n\t" 659 "movq %2, %%mm4 \n\t" 660 "movq %3, %%mm5 \n\t" 661 "paddusb %%mm0, %%mm2 \n\t" 662 "paddusb %%mm0, %%mm3 \n\t" 663 "paddusb %%mm0, %%mm4 \n\t" 664 "paddusb %%mm0, %%mm5 \n\t" 665 "psubusb %%mm1, %%mm2 \n\t" 666 "psubusb %%mm1, %%mm3 \n\t" 667 "psubusb %%mm1, %%mm4 \n\t" 668 "psubusb %%mm1, %%mm5 \n\t" 669 "movq %%mm2, %0 \n\t" 670 "movq %%mm3, %1 \n\t" 671 "movq %%mm4, %2 \n\t" 672 "movq %%mm5, %3 \n\t" 673 :
"+m"(*(uint32_t*)(dest+0*linesize)),
674 "+m"(*(uint32_t*)(dest+1*linesize)),
675 "+m"(*(uint32_t*)(dest+2*linesize)),
676 "+m"(*(uint32_t*)(dest+3*linesize))
680 "movq %0, %%mm2 \n\t" 681 "movq %1, %%mm3 \n\t" 682 "movq %2, %%mm4 \n\t" 683 "movq %3, %%mm5 \n\t" 684 "paddusb %%mm0, %%mm2 \n\t" 685 "paddusb %%mm0, %%mm3 \n\t" 686 "paddusb %%mm0, %%mm4 \n\t" 687 "paddusb %%mm0, %%mm5 \n\t" 688 "psubusb %%mm1, %%mm2 \n\t" 689 "psubusb %%mm1, %%mm3 \n\t" 690 "psubusb %%mm1, %%mm4 \n\t" 691 "psubusb %%mm1, %%mm5 \n\t" 692 "movq %%mm2, %0 \n\t" 693 "movq %%mm3, %1 \n\t" 694 "movq %%mm4, %2 \n\t" 695 "movq %%mm5, %3 \n\t" 696 :
"+m"(*(uint32_t*)(dest+0*linesize)),
697 "+m"(*(uint32_t*)(dest+1*linesize)),
698 "+m"(*(uint32_t*)(dest+2*linesize)),
699 "+m"(*(uint32_t*)(dest+3*linesize))
703 static void put_vc1_mspel_mc00_mmx(
uint8_t *dst,
const uint8_t *src,
704 ptrdiff_t stride,
int rnd)
op_pixels_func avg_vc1_mspel_pixels_tab[16]
void ff_put_pixels8_mmx(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
memory handling functions
void(* vc1_inv_trans_8x4_dc)(uint8_t *dest, int line_size, int16_t *block)
void ff_vc1dsp_init_mmx(VC1DSPContext *dsp)
static const int shift1[6]
void(* vc1_inv_trans_8x8_dc)(uint8_t *dest, int line_size, int16_t *block)
common internal API header
void(* vc1_inv_trans_4x8_dc)(uint8_t *dest, int line_size, int16_t *block)
void ff_vc1dsp_init_mmxext(VC1DSPContext *dsp)
#define VC1_MSPEL_MC(OP, OPNAME)
void(* vc1_inv_trans_4x4_dc)(uint8_t *dest, int line_size, int16_t *block)
uint8_t pi<< 24) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_U8, uint8_t,(*(const uint8_t *) pi - 0x80) *(1.0f/(1<< 7))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_U8, uint8_t,(*(const uint8_t *) pi - 0x80) *(1.0/(1<< 7))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_S16, int16_t,(*(const int16_t *) pi >> 8)+0x80) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_S16, int16_t, *(const int16_t *) pi *(1.0f/(1<< 15))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_S16, int16_t, *(const int16_t *) pi *(1.0/(1<< 15))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_S32, int32_t,(*(const int32_t *) pi >> 24)+0x80) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_S32, int32_t, *(const int32_t *) pi *(1.0f/(1U<< 31))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_S32, int32_t, *(const int32_t *) pi *(1.0/(1U<< 31))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_FLT, float, av_clip_uint8(lrintf(*(const float *) pi *(1<< 7))+0x80)) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_FLT, float, av_clip_int16(lrintf(*(const float *) pi *(1<< 15)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_FLT, float, av_clipl_int32(llrintf(*(const float *) pi *(1U<< 31)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_DBL, double, av_clip_uint8(lrint(*(const double *) pi *(1<< 7))+0x80)) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_DBL, double, av_clip_int16(lrint(*(const double *) pi *(1<< 15)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_DBL, double, av_clipl_int32(llrint(*(const double *) pi *(1U<< 31)))) #define SET_CONV_FUNC_GROUP(ofmt, ifmt) static void set_generic_function(AudioConvert *ac) { } void ff_audio_convert_free(AudioConvert **ac) { if(! *ac) return;ff_dither_free(&(*ac) ->dc);av_freep(ac);} AudioConvert *ff_audio_convert_alloc(AVAudioResampleContext *avr, enum AVSampleFormat out_fmt, enum AVSampleFormat in_fmt, int channels, int sample_rate, int apply_map) { AudioConvert *ac;int in_planar, out_planar;ac=av_mallocz(sizeof(*ac));if(!ac) return NULL;ac->avr=avr;ac->out_fmt=out_fmt;ac->in_fmt=in_fmt;ac->channels=channels;ac->apply_map=apply_map;if(avr->dither_method !=AV_RESAMPLE_DITHER_NONE &&av_get_packed_sample_fmt(out_fmt)==AV_SAMPLE_FMT_S16 &&av_get_bytes_per_sample(in_fmt) > 2) { ac->dc=ff_dither_alloc(avr, out_fmt, in_fmt, channels, sample_rate, apply_map);if(!ac->dc) { av_free(ac);return NULL;} return ac;} in_planar=ff_sample_fmt_is_planar(in_fmt, channels);out_planar=ff_sample_fmt_is_planar(out_fmt, channels);if(in_planar==out_planar) { ac->func_type=CONV_FUNC_TYPE_FLAT;ac->planes=in_planar ? ac->channels :1;} else if(in_planar) ac->func_type=CONV_FUNC_TYPE_INTERLEAVE;else ac->func_type=CONV_FUNC_TYPE_DEINTERLEAVE;set_generic_function(ac);if(ARCH_AARCH64) ff_audio_convert_init_aarch64(ac);if(ARCH_ARM) ff_audio_convert_init_arm(ac);if(ARCH_X86) ff_audio_convert_init_x86(ac);return ac;} int ff_audio_convert(AudioConvert *ac, AudioData *out, AudioData *in) { int use_generic=1;int len=in->nb_samples;int p;if(ac->dc) { av_dlog(ac->avr, "%d samples - audio_convert: %s to %s (dithered)\", len, av_get_sample_fmt_name(ac->in_fmt), av_get_sample_fmt_name(ac->out_fmt));return ff_convert_dither(ac-> dc
op_pixels_func put_vc1_mspel_pixels_tab[16]