41 #define C0 23170 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 42 #define C1 22725 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 43 #define C2 21407 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 44 #define C3 19266 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 45 #define C4 16383 //cos(i*M_PI/16)*sqrt(2)*(1<<14) - 0.5 46 #define C5 12873 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 47 #define C6 8867 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 48 #define C7 4520 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 51 #define COL_SHIFT 20 // 6 57 1<<(ROW_SHIFT-1), 0, 1<<(ROW_SHIFT-1), 0,
60 1<<(ROW_SHIFT-1), 1, 1<<(ROW_SHIFT-1), 0,
87 int16_t *
const temp= (int16_t*)align_tmp;
90 #if 0 //Alternative, simpler variant 92 #define ROW_IDCT(src0, src4, src1, src5, dst, rounder, shift) \ 93 "movq " #src0 ", %%mm0 \n\t" \ 94 "movq " #src4 ", %%mm1 \n\t" \ 95 "movq " #src1 ", %%mm2 \n\t" \ 96 "movq " #src5 ", %%mm3 \n\t" \ 97 "movq 16(%2), %%mm4 \n\t" \ 98 "pmaddwd %%mm0, %%mm4 \n\t" \ 99 "movq 24(%2), %%mm5 \n\t" \ 100 "pmaddwd %%mm5, %%mm0 \n\t" \ 101 "movq 32(%2), %%mm5 \n\t" \ 102 "pmaddwd %%mm1, %%mm5 \n\t" \ 103 "movq 40(%2), %%mm6 \n\t" \ 104 "pmaddwd %%mm6, %%mm1 \n\t" \ 105 "movq 48(%2), %%mm7 \n\t" \ 106 "pmaddwd %%mm2, %%mm7 \n\t" \ 107 #rounder ", %%mm4 \n\t"\ 108 "movq %%mm4, %%mm6 \n\t" \ 109 "paddd %%mm5, %%mm4 \n\t" \ 110 "psubd %%mm5, %%mm6 \n\t" \ 111 "movq 56(%2), %%mm5 \n\t" \ 112 "pmaddwd %%mm3, %%mm5 \n\t" \ 113 #rounder ", %%mm0 \n\t"\ 114 "paddd %%mm0, %%mm1 \n\t" \ 115 "paddd %%mm0, %%mm0 \n\t" \ 116 "psubd %%mm1, %%mm0 \n\t" \ 117 "pmaddwd 64(%2), %%mm2 \n\t" \ 118 "paddd %%mm5, %%mm7 \n\t" \ 119 "movq 72(%2), %%mm5 \n\t" \ 120 "pmaddwd %%mm3, %%mm5 \n\t" \ 121 "paddd %%mm4, %%mm7 \n\t" \ 122 "paddd %%mm4, %%mm4 \n\t" \ 123 "psubd %%mm7, %%mm4 \n\t" \ 124 "paddd %%mm2, %%mm5 \n\t" \ 125 "psrad $" #shift ", %%mm7 \n\t"\ 126 "psrad $" #shift ", %%mm4 \n\t"\ 127 "movq %%mm1, %%mm2 \n\t" \ 128 "paddd %%mm5, %%mm1 \n\t" \ 129 "psubd %%mm5, %%mm2 \n\t" \ 130 "psrad $" #shift ", %%mm1 \n\t"\ 131 "psrad $" #shift ", %%mm2 \n\t"\ 132 "packssdw %%mm1, %%mm7 \n\t" \ 133 "packssdw %%mm4, %%mm2 \n\t" \ 134 "movq %%mm7, " #dst " \n\t"\ 135 "movq " #src1 ", %%mm1 \n\t" \ 136 "movq 80(%2), %%mm4 \n\t" \ 137 "movq %%mm2, 24+" #dst " \n\t"\ 138 "pmaddwd %%mm1, %%mm4 \n\t" \ 139 "movq 88(%2), %%mm7 \n\t" \ 140 "pmaddwd 96(%2), %%mm1 \n\t" \ 141 "pmaddwd %%mm3, %%mm7 \n\t" \ 142 "movq %%mm0, %%mm2 \n\t" \ 143 "pmaddwd 104(%2), %%mm3 \n\t" \ 144 "paddd %%mm7, %%mm4 \n\t" \ 145 "paddd %%mm4, %%mm2 \n\t" \ 146 "psubd %%mm4, %%mm0 \n\t" \ 147 "psrad $" #shift ", %%mm2 \n\t"\ 148 "psrad $" #shift ", %%mm0 \n\t"\ 149 "movq %%mm6, %%mm4 \n\t" \ 150 "paddd %%mm1, %%mm3 \n\t" \ 151 "paddd %%mm3, %%mm6 \n\t" \ 152 "psubd %%mm3, %%mm4 \n\t" \ 153 "psrad $" #shift ", %%mm6 \n\t"\ 154 "packssdw %%mm6, %%mm2 \n\t" \ 155 "movq %%mm2, 8+" #dst " \n\t"\ 156 "psrad $" #shift ", %%mm4 \n\t"\ 157 "packssdw %%mm0, %%mm4 \n\t" \ 158 "movq %%mm4, 16+" #dst " \n\t"\ 160 #define COL_IDCT(src0, src4, src1, src5, dst, shift) \ 161 "movq " #src0 ", %%mm0 \n\t" \ 162 "movq " #src4 ", %%mm1 \n\t" \ 163 "movq " #src1 ", %%mm2 \n\t" \ 164 "movq " #src5 ", %%mm3 \n\t" \ 165 "movq 16(%2), %%mm4 \n\t" \ 166 "pmaddwd %%mm0, %%mm4 \n\t" \ 167 "movq 24(%2), %%mm5 \n\t" \ 168 "pmaddwd %%mm5, %%mm0 \n\t" \ 169 "movq 32(%2), %%mm5 \n\t" \ 170 "pmaddwd %%mm1, %%mm5 \n\t" \ 171 "movq 40(%2), %%mm6 \n\t" \ 172 "pmaddwd %%mm6, %%mm1 \n\t" \ 173 "movq %%mm4, %%mm6 \n\t" \ 174 "movq 48(%2), %%mm7 \n\t" \ 175 "pmaddwd %%mm2, %%mm7 \n\t" \ 176 "paddd %%mm5, %%mm4 \n\t" \ 177 "psubd %%mm5, %%mm6 \n\t" \ 178 "movq %%mm0, %%mm5 \n\t" \ 179 "paddd %%mm1, %%mm0 \n\t" \ 180 "psubd %%mm1, %%mm5 \n\t" \ 181 "movq 56(%2), %%mm1 \n\t" \ 182 "pmaddwd %%mm3, %%mm1 \n\t" \ 183 "pmaddwd 64(%2), %%mm2 \n\t" \ 184 "paddd %%mm1, %%mm7 \n\t" \ 185 "movq 72(%2), %%mm1 \n\t" \ 186 "pmaddwd %%mm3, %%mm1 \n\t" \ 187 "paddd %%mm4, %%mm7 \n\t" \ 188 "paddd %%mm4, %%mm4 \n\t" \ 189 "psubd %%mm7, %%mm4 \n\t" \ 190 "paddd %%mm2, %%mm1 \n\t" \ 191 "psrad $" #shift ", %%mm7 \n\t"\ 192 "psrad $" #shift ", %%mm4 \n\t"\ 193 "movq %%mm0, %%mm2 \n\t" \ 194 "paddd %%mm1, %%mm0 \n\t" \ 195 "psubd %%mm1, %%mm2 \n\t" \ 196 "psrad $" #shift ", %%mm0 \n\t"\ 197 "psrad $" #shift ", %%mm2 \n\t"\ 198 "packssdw %%mm7, %%mm7 \n\t" \ 199 "movd %%mm7, " #dst " \n\t"\ 200 "packssdw %%mm0, %%mm0 \n\t" \ 201 "movd %%mm0, 16+" #dst " \n\t"\ 202 "packssdw %%mm2, %%mm2 \n\t" \ 203 "movd %%mm2, 96+" #dst " \n\t"\ 204 "packssdw %%mm4, %%mm4 \n\t" \ 205 "movd %%mm4, 112+" #dst " \n\t"\ 206 "movq " #src1 ", %%mm0 \n\t" \ 207 "movq 80(%2), %%mm4 \n\t" \ 208 "pmaddwd %%mm0, %%mm4 \n\t" \ 209 "movq 88(%2), %%mm7 \n\t" \ 210 "pmaddwd 96(%2), %%mm0 \n\t" \ 211 "pmaddwd %%mm3, %%mm7 \n\t" \ 212 "movq %%mm5, %%mm2 \n\t" \ 213 "pmaddwd 104(%2), %%mm3 \n\t" \ 214 "paddd %%mm7, %%mm4 \n\t" \ 215 "paddd %%mm4, %%mm2 \n\t" \ 216 "psubd %%mm4, %%mm5 \n\t" \ 217 "psrad $" #shift ", %%mm2 \n\t"\ 218 "psrad $" #shift ", %%mm5 \n\t"\ 219 "movq %%mm6, %%mm4 \n\t" \ 220 "paddd %%mm0, %%mm3 \n\t" \ 221 "paddd %%mm3, %%mm6 \n\t" \ 222 "psubd %%mm3, %%mm4 \n\t" \ 223 "psrad $" #shift ", %%mm6 \n\t"\ 224 "psrad $" #shift ", %%mm4 \n\t"\ 225 "packssdw %%mm2, %%mm2 \n\t" \ 226 "packssdw %%mm6, %%mm6 \n\t" \ 227 "movd %%mm2, 32+" #dst " \n\t"\ 228 "packssdw %%mm4, %%mm4 \n\t" \ 229 "packssdw %%mm5, %%mm5 \n\t" \ 230 "movd %%mm6, 48+" #dst " \n\t"\ 231 "movd %%mm4, 64+" #dst " \n\t"\ 232 "movd %%mm5, 80+" #dst " \n\t"\ 235 #define DC_COND_ROW_IDCT(src0, src4, src1, src5, dst, rounder, shift) \ 236 "movq " #src0 ", %%mm0 \n\t" \ 237 "movq " #src4 ", %%mm1 \n\t" \ 238 "movq " #src1 ", %%mm2 \n\t" \ 239 "movq " #src5 ", %%mm3 \n\t" \ 240 "movq "MANGLE(wm1010)", %%mm4 \n\t"\ 241 "pand %%mm0, %%mm4 \n\t"\ 242 "por %%mm1, %%mm4 \n\t"\ 243 "por %%mm2, %%mm4 \n\t"\ 244 "por %%mm3, %%mm4 \n\t"\ 245 "packssdw %%mm4,%%mm4 \n\t"\ 246 "movd %%mm4, %%eax \n\t"\ 247 "orl %%eax, %%eax \n\t"\ 249 "movq 16(%2), %%mm4 \n\t" \ 250 "pmaddwd %%mm0, %%mm4 \n\t" \ 251 "movq 24(%2), %%mm5 \n\t" \ 252 "pmaddwd %%mm5, %%mm0 \n\t" \ 253 "movq 32(%2), %%mm5 \n\t" \ 254 "pmaddwd %%mm1, %%mm5 \n\t" \ 255 "movq 40(%2), %%mm6 \n\t" \ 256 "pmaddwd %%mm6, %%mm1 \n\t" \ 257 "movq 48(%2), %%mm7 \n\t" \ 258 "pmaddwd %%mm2, %%mm7 \n\t" \ 259 #rounder ", %%mm4 \n\t"\ 260 "movq %%mm4, %%mm6 \n\t" \ 261 "paddd %%mm5, %%mm4 \n\t" \ 262 "psubd %%mm5, %%mm6 \n\t" \ 263 "movq 56(%2), %%mm5 \n\t" \ 264 "pmaddwd %%mm3, %%mm5 \n\t" \ 265 #rounder ", %%mm0 \n\t"\ 266 "paddd %%mm0, %%mm1 \n\t" \ 267 "paddd %%mm0, %%mm0 \n\t" \ 268 "psubd %%mm1, %%mm0 \n\t" \ 269 "pmaddwd 64(%2), %%mm2 \n\t" \ 270 "paddd %%mm5, %%mm7 \n\t" \ 271 "movq 72(%2), %%mm5 \n\t" \ 272 "pmaddwd %%mm3, %%mm5 \n\t" \ 273 "paddd %%mm4, %%mm7 \n\t" \ 274 "paddd %%mm4, %%mm4 \n\t" \ 275 "psubd %%mm7, %%mm4 \n\t" \ 276 "paddd %%mm2, %%mm5 \n\t" \ 277 "psrad $" #shift ", %%mm7 \n\t"\ 278 "psrad $" #shift ", %%mm4 \n\t"\ 279 "movq %%mm1, %%mm2 \n\t" \ 280 "paddd %%mm5, %%mm1 \n\t" \ 281 "psubd %%mm5, %%mm2 \n\t" \ 282 "psrad $" #shift ", %%mm1 \n\t"\ 283 "psrad $" #shift ", %%mm2 \n\t"\ 284 "packssdw %%mm1, %%mm7 \n\t" \ 285 "packssdw %%mm4, %%mm2 \n\t" \ 286 "movq %%mm7, " #dst " \n\t"\ 287 "movq " #src1 ", %%mm1 \n\t" \ 288 "movq 80(%2), %%mm4 \n\t" \ 289 "movq %%mm2, 24+" #dst " \n\t"\ 290 "pmaddwd %%mm1, %%mm4 \n\t" \ 291 "movq 88(%2), %%mm7 \n\t" \ 292 "pmaddwd 96(%2), %%mm1 \n\t" \ 293 "pmaddwd %%mm3, %%mm7 \n\t" \ 294 "movq %%mm0, %%mm2 \n\t" \ 295 "pmaddwd 104(%2), %%mm3 \n\t" \ 296 "paddd %%mm7, %%mm4 \n\t" \ 297 "paddd %%mm4, %%mm2 \n\t" \ 298 "psubd %%mm4, %%mm0 \n\t" \ 299 "psrad $" #shift ", %%mm2 \n\t"\ 300 "psrad $" #shift ", %%mm0 \n\t"\ 301 "movq %%mm6, %%mm4 \n\t" \ 302 "paddd %%mm1, %%mm3 \n\t" \ 303 "paddd %%mm3, %%mm6 \n\t" \ 304 "psubd %%mm3, %%mm4 \n\t" \ 305 "psrad $" #shift ", %%mm6 \n\t"\ 306 "packssdw %%mm6, %%mm2 \n\t" \ 307 "movq %%mm2, 8+" #dst " \n\t"\ 308 "psrad $" #shift ", %%mm4 \n\t"\ 309 "packssdw %%mm0, %%mm4 \n\t" \ 310 "movq %%mm4, 16+" #dst " \n\t"\ 313 "pslld $16, %%mm0 \n\t"\ 314 "#paddd "MANGLE(d40000)", %%mm0 \n\t"\ 315 "psrad $13, %%mm0 \n\t"\ 316 "packssdw %%mm0, %%mm0 \n\t"\ 317 "movq %%mm0, " #dst " \n\t"\ 318 "movq %%mm0, 8+" #dst " \n\t"\ 319 "movq %%mm0, 16+" #dst " \n\t"\ 320 "movq %%mm0, 24+" #dst " \n\t"\ 325 ROW_IDCT( (%0), 8(%0), 16(%0), 24(%0), 0(%1),paddd 8(%2), 11)
330 DC_COND_ROW_IDCT( 32(%0), 40(%0), 48(%0), 56(%0), 32(%1),paddd (%2), 11)
331 DC_COND_ROW_IDCT( 64(%0), 72(%0), 80(%0), 88(%0), 64(%1),paddd (%2), 11)
332 DC_COND_ROW_IDCT( 96(%0),104(%0),112(%0),120(%0), 96(%1),paddd (%2), 11)
336 COL_IDCT( (%1), 64(%1), 32(%1), 96(%1), 0(%0), 20)
337 COL_IDCT( 8(%1), 72(%1), 40(%1), 104(%1), 4(%0), 20)
338 COL_IDCT( 16(%1), 80(%1), 48(%1), 112(%1), 8(%0), 20)
339 COL_IDCT( 24(%1), 88(%1), 56(%1), 120(%1), 12(%0), 20)
343 #define DC_COND_IDCT(src0, src4, src1, src5, dst, rounder, shift) \ 344 "movq " #src0 ", %%mm0 \n\t" \ 345 "movq " #src4 ", %%mm1 \n\t" \ 346 "movq " #src1 ", %%mm2 \n\t" \ 347 "movq " #src5 ", %%mm3 \n\t" \ 348 "movq "MANGLE(wm1010)", %%mm4 \n\t"\ 349 "pand %%mm0, %%mm4 \n\t"\ 350 "por %%mm1, %%mm4 \n\t"\ 351 "por %%mm2, %%mm4 \n\t"\ 352 "por %%mm3, %%mm4 \n\t"\ 353 "packssdw %%mm4,%%mm4 \n\t"\ 354 "movd %%mm4, %%eax \n\t"\ 355 "orl %%eax, %%eax \n\t"\ 357 "movq 16(%2), %%mm4 \n\t" \ 358 "pmaddwd %%mm0, %%mm4 \n\t" \ 359 "movq 24(%2), %%mm5 \n\t" \ 360 "pmaddwd %%mm5, %%mm0 \n\t" \ 361 "movq 32(%2), %%mm5 \n\t" \ 362 "pmaddwd %%mm1, %%mm5 \n\t" \ 363 "movq 40(%2), %%mm6 \n\t" \ 364 "pmaddwd %%mm6, %%mm1 \n\t" \ 365 "movq 48(%2), %%mm7 \n\t" \ 366 "pmaddwd %%mm2, %%mm7 \n\t" \ 367 #rounder ", %%mm4 \n\t"\ 368 "movq %%mm4, %%mm6 \n\t" \ 369 "paddd %%mm5, %%mm4 \n\t" \ 370 "psubd %%mm5, %%mm6 \n\t" \ 371 "movq 56(%2), %%mm5 \n\t" \ 372 "pmaddwd %%mm3, %%mm5 \n\t" \ 373 #rounder ", %%mm0 \n\t"\ 374 "paddd %%mm0, %%mm1 \n\t" \ 375 "paddd %%mm0, %%mm0 \n\t" \ 376 "psubd %%mm1, %%mm0 \n\t" \ 377 "pmaddwd 64(%2), %%mm2 \n\t" \ 378 "paddd %%mm5, %%mm7 \n\t" \ 379 "movq 72(%2), %%mm5 \n\t" \ 380 "pmaddwd %%mm3, %%mm5 \n\t" \ 381 "paddd %%mm4, %%mm7 \n\t" \ 382 "paddd %%mm4, %%mm4 \n\t" \ 383 "psubd %%mm7, %%mm4 \n\t" \ 384 "paddd %%mm2, %%mm5 \n\t" \ 385 "psrad $" #shift ", %%mm7 \n\t"\ 386 "psrad $" #shift ", %%mm4 \n\t"\ 387 "movq %%mm1, %%mm2 \n\t" \ 388 "paddd %%mm5, %%mm1 \n\t" \ 389 "psubd %%mm5, %%mm2 \n\t" \ 390 "psrad $" #shift ", %%mm1 \n\t"\ 391 "psrad $" #shift ", %%mm2 \n\t"\ 392 "packssdw %%mm1, %%mm7 \n\t" \ 393 "packssdw %%mm4, %%mm2 \n\t" \ 394 "movq %%mm7, " #dst " \n\t"\ 395 "movq " #src1 ", %%mm1 \n\t" \ 396 "movq 80(%2), %%mm4 \n\t" \ 397 "movq %%mm2, 24+" #dst " \n\t"\ 398 "pmaddwd %%mm1, %%mm4 \n\t" \ 399 "movq 88(%2), %%mm7 \n\t" \ 400 "pmaddwd 96(%2), %%mm1 \n\t" \ 401 "pmaddwd %%mm3, %%mm7 \n\t" \ 402 "movq %%mm0, %%mm2 \n\t" \ 403 "pmaddwd 104(%2), %%mm3 \n\t" \ 404 "paddd %%mm7, %%mm4 \n\t" \ 405 "paddd %%mm4, %%mm2 \n\t" \ 406 "psubd %%mm4, %%mm0 \n\t" \ 407 "psrad $" #shift ", %%mm2 \n\t"\ 408 "psrad $" #shift ", %%mm0 \n\t"\ 409 "movq %%mm6, %%mm4 \n\t" \ 410 "paddd %%mm1, %%mm3 \n\t" \ 411 "paddd %%mm3, %%mm6 \n\t" \ 412 "psubd %%mm3, %%mm4 \n\t" \ 413 "psrad $" #shift ", %%mm6 \n\t"\ 414 "packssdw %%mm6, %%mm2 \n\t" \ 415 "movq %%mm2, 8+" #dst " \n\t"\ 416 "psrad $" #shift ", %%mm4 \n\t"\ 417 "packssdw %%mm0, %%mm4 \n\t" \ 418 "movq %%mm4, 16+" #dst " \n\t"\ 421 "pslld $16, %%mm0 \n\t"\ 422 "paddd "MANGLE(d40000)", %%mm0 \n\t"\ 423 "psrad $13, %%mm0 \n\t"\ 424 "packssdw %%mm0, %%mm0 \n\t"\ 425 "movq %%mm0, " #dst " \n\t"\ 426 "movq %%mm0, 8+" #dst " \n\t"\ 427 "movq %%mm0, 16+" #dst " \n\t"\ 428 "movq %%mm0, 24+" #dst " \n\t"\ 431 #define Z_COND_IDCT(src0, src4, src1, src5, dst, rounder, shift, bt) \ 432 "movq " #src0 ", %%mm0 \n\t" \ 433 "movq " #src4 ", %%mm1 \n\t" \ 434 "movq " #src1 ", %%mm2 \n\t" \ 435 "movq " #src5 ", %%mm3 \n\t" \ 436 "movq %%mm0, %%mm4 \n\t"\ 437 "por %%mm1, %%mm4 \n\t"\ 438 "por %%mm2, %%mm4 \n\t"\ 439 "por %%mm3, %%mm4 \n\t"\ 440 "packssdw %%mm4,%%mm4 \n\t"\ 441 "movd %%mm4, %%eax \n\t"\ 442 "orl %%eax, %%eax \n\t"\ 444 "movq 16(%2), %%mm4 \n\t" \ 445 "pmaddwd %%mm0, %%mm4 \n\t" \ 446 "movq 24(%2), %%mm5 \n\t" \ 447 "pmaddwd %%mm5, %%mm0 \n\t" \ 448 "movq 32(%2), %%mm5 \n\t" \ 449 "pmaddwd %%mm1, %%mm5 \n\t" \ 450 "movq 40(%2), %%mm6 \n\t" \ 451 "pmaddwd %%mm6, %%mm1 \n\t" \ 452 "movq 48(%2), %%mm7 \n\t" \ 453 "pmaddwd %%mm2, %%mm7 \n\t" \ 454 #rounder ", %%mm4 \n\t"\ 455 "movq %%mm4, %%mm6 \n\t" \ 456 "paddd %%mm5, %%mm4 \n\t" \ 457 "psubd %%mm5, %%mm6 \n\t" \ 458 "movq 56(%2), %%mm5 \n\t" \ 459 "pmaddwd %%mm3, %%mm5 \n\t" \ 460 #rounder ", %%mm0 \n\t"\ 461 "paddd %%mm0, %%mm1 \n\t" \ 462 "paddd %%mm0, %%mm0 \n\t" \ 463 "psubd %%mm1, %%mm0 \n\t" \ 464 "pmaddwd 64(%2), %%mm2 \n\t" \ 465 "paddd %%mm5, %%mm7 \n\t" \ 466 "movq 72(%2), %%mm5 \n\t" \ 467 "pmaddwd %%mm3, %%mm5 \n\t" \ 468 "paddd %%mm4, %%mm7 \n\t" \ 469 "paddd %%mm4, %%mm4 \n\t" \ 470 "psubd %%mm7, %%mm4 \n\t" \ 471 "paddd %%mm2, %%mm5 \n\t" \ 472 "psrad $" #shift ", %%mm7 \n\t"\ 473 "psrad $" #shift ", %%mm4 \n\t"\ 474 "movq %%mm1, %%mm2 \n\t" \ 475 "paddd %%mm5, %%mm1 \n\t" \ 476 "psubd %%mm5, %%mm2 \n\t" \ 477 "psrad $" #shift ", %%mm1 \n\t"\ 478 "psrad $" #shift ", %%mm2 \n\t"\ 479 "packssdw %%mm1, %%mm7 \n\t" \ 480 "packssdw %%mm4, %%mm2 \n\t" \ 481 "movq %%mm7, " #dst " \n\t"\ 482 "movq " #src1 ", %%mm1 \n\t" \ 483 "movq 80(%2), %%mm4 \n\t" \ 484 "movq %%mm2, 24+" #dst " \n\t"\ 485 "pmaddwd %%mm1, %%mm4 \n\t" \ 486 "movq 88(%2), %%mm7 \n\t" \ 487 "pmaddwd 96(%2), %%mm1 \n\t" \ 488 "pmaddwd %%mm3, %%mm7 \n\t" \ 489 "movq %%mm0, %%mm2 \n\t" \ 490 "pmaddwd 104(%2), %%mm3 \n\t" \ 491 "paddd %%mm7, %%mm4 \n\t" \ 492 "paddd %%mm4, %%mm2 \n\t" \ 493 "psubd %%mm4, %%mm0 \n\t" \ 494 "psrad $" #shift ", %%mm2 \n\t"\ 495 "psrad $" #shift ", %%mm0 \n\t"\ 496 "movq %%mm6, %%mm4 \n\t" \ 497 "paddd %%mm1, %%mm3 \n\t" \ 498 "paddd %%mm3, %%mm6 \n\t" \ 499 "psubd %%mm3, %%mm4 \n\t" \ 500 "psrad $" #shift ", %%mm6 \n\t"\ 501 "packssdw %%mm6, %%mm2 \n\t" \ 502 "movq %%mm2, 8+" #dst " \n\t"\ 503 "psrad $" #shift ", %%mm4 \n\t"\ 504 "packssdw %%mm0, %%mm4 \n\t" \ 505 "movq %%mm4, 16+" #dst " \n\t"\ 507 #define ROW_IDCT(src0, src4, src1, src5, dst, rounder, shift) \ 508 "movq " #src0 ", %%mm0 \n\t" \ 509 "movq " #src4 ", %%mm1 \n\t" \ 510 "movq " #src1 ", %%mm2 \n\t" \ 511 "movq " #src5 ", %%mm3 \n\t" \ 512 "movq 16(%2), %%mm4 \n\t" \ 513 "pmaddwd %%mm0, %%mm4 \n\t" \ 514 "movq 24(%2), %%mm5 \n\t" \ 515 "pmaddwd %%mm5, %%mm0 \n\t" \ 516 "movq 32(%2), %%mm5 \n\t" \ 517 "pmaddwd %%mm1, %%mm5 \n\t" \ 518 "movq 40(%2), %%mm6 \n\t" \ 519 "pmaddwd %%mm6, %%mm1 \n\t" \ 520 "movq 48(%2), %%mm7 \n\t" \ 521 "pmaddwd %%mm2, %%mm7 \n\t" \ 522 #rounder ", %%mm4 \n\t"\ 523 "movq %%mm4, %%mm6 \n\t" \ 524 "paddd %%mm5, %%mm4 \n\t" \ 525 "psubd %%mm5, %%mm6 \n\t" \ 526 "movq 56(%2), %%mm5 \n\t" \ 527 "pmaddwd %%mm3, %%mm5 \n\t" \ 528 #rounder ", %%mm0 \n\t"\ 529 "paddd %%mm0, %%mm1 \n\t" \ 530 "paddd %%mm0, %%mm0 \n\t" \ 531 "psubd %%mm1, %%mm0 \n\t" \ 532 "pmaddwd 64(%2), %%mm2 \n\t" \ 533 "paddd %%mm5, %%mm7 \n\t" \ 534 "movq 72(%2), %%mm5 \n\t" \ 535 "pmaddwd %%mm3, %%mm5 \n\t" \ 536 "paddd %%mm4, %%mm7 \n\t" \ 537 "paddd %%mm4, %%mm4 \n\t" \ 538 "psubd %%mm7, %%mm4 \n\t" \ 539 "paddd %%mm2, %%mm5 \n\t" \ 540 "psrad $" #shift ", %%mm7 \n\t"\ 541 "psrad $" #shift ", %%mm4 \n\t"\ 542 "movq %%mm1, %%mm2 \n\t" \ 543 "paddd %%mm5, %%mm1 \n\t" \ 544 "psubd %%mm5, %%mm2 \n\t" \ 545 "psrad $" #shift ", %%mm1 \n\t"\ 546 "psrad $" #shift ", %%mm2 \n\t"\ 547 "packssdw %%mm1, %%mm7 \n\t" \ 548 "packssdw %%mm4, %%mm2 \n\t" \ 549 "movq %%mm7, " #dst " \n\t"\ 550 "movq " #src1 ", %%mm1 \n\t" \ 551 "movq 80(%2), %%mm4 \n\t" \ 552 "movq %%mm2, 24+" #dst " \n\t"\ 553 "pmaddwd %%mm1, %%mm4 \n\t" \ 554 "movq 88(%2), %%mm7 \n\t" \ 555 "pmaddwd 96(%2), %%mm1 \n\t" \ 556 "pmaddwd %%mm3, %%mm7 \n\t" \ 557 "movq %%mm0, %%mm2 \n\t" \ 558 "pmaddwd 104(%2), %%mm3 \n\t" \ 559 "paddd %%mm7, %%mm4 \n\t" \ 560 "paddd %%mm4, %%mm2 \n\t" \ 561 "psubd %%mm4, %%mm0 \n\t" \ 562 "psrad $" #shift ", %%mm2 \n\t"\ 563 "psrad $" #shift ", %%mm0 \n\t"\ 564 "movq %%mm6, %%mm4 \n\t" \ 565 "paddd %%mm1, %%mm3 \n\t" \ 566 "paddd %%mm3, %%mm6 \n\t" \ 567 "psubd %%mm3, %%mm4 \n\t" \ 568 "psrad $" #shift ", %%mm6 \n\t"\ 569 "packssdw %%mm6, %%mm2 \n\t" \ 570 "movq %%mm2, 8+" #dst " \n\t"\ 571 "psrad $" #shift ", %%mm4 \n\t"\ 572 "packssdw %%mm0, %%mm4 \n\t" \ 573 "movq %%mm4, 16+" #dst " \n\t"\ 576 DC_COND_IDCT( 0(%0), 8(%0), 16(%0), 24(%0), 0(%1),paddd 8(%2), 11)
577 Z_COND_IDCT( 32(%0), 40(%0), 48(%0), 56(%0), 32(%1),paddd (%2), 11, 4f)
578 Z_COND_IDCT( 64(%0), 72(%0), 80(%0), 88(%0), 64(%1),paddd (%2), 11, 2f)
579 Z_COND_IDCT( 96(%0),104(%0),112(%0),120(%0), 96(%1),paddd (%2), 11, 1f)
582 #define IDCT(src0, src4, src1, src5, dst, shift) \ 583 "movq " #src0 ", %%mm0 \n\t" \ 584 "movq " #src4 ", %%mm1 \n\t" \ 585 "movq " #src1 ", %%mm2 \n\t" \ 586 "movq " #src5 ", %%mm3 \n\t" \ 587 "movq 16(%2), %%mm4 \n\t" \ 588 "pmaddwd %%mm0, %%mm4 \n\t" \ 589 "movq 24(%2), %%mm5 \n\t" \ 590 "pmaddwd %%mm5, %%mm0 \n\t" \ 591 "movq 32(%2), %%mm5 \n\t" \ 592 "pmaddwd %%mm1, %%mm5 \n\t" \ 593 "movq 40(%2), %%mm6 \n\t" \ 594 "pmaddwd %%mm6, %%mm1 \n\t" \ 595 "movq %%mm4, %%mm6 \n\t" \ 596 "movq 48(%2), %%mm7 \n\t" \ 597 "pmaddwd %%mm2, %%mm7 \n\t" \ 598 "paddd %%mm5, %%mm4 \n\t" \ 599 "psubd %%mm5, %%mm6 \n\t" \ 600 "movq %%mm0, %%mm5 \n\t" \ 601 "paddd %%mm1, %%mm0 \n\t" \ 602 "psubd %%mm1, %%mm5 \n\t" \ 603 "movq 56(%2), %%mm1 \n\t" \ 604 "pmaddwd %%mm3, %%mm1 \n\t" \ 605 "pmaddwd 64(%2), %%mm2 \n\t" \ 606 "paddd %%mm1, %%mm7 \n\t" \ 607 "movq 72(%2), %%mm1 \n\t" \ 608 "pmaddwd %%mm3, %%mm1 \n\t" \ 609 "paddd %%mm4, %%mm7 \n\t" \ 610 "paddd %%mm4, %%mm4 \n\t" \ 611 "psubd %%mm7, %%mm4 \n\t" \ 612 "paddd %%mm2, %%mm1 \n\t" \ 613 "psrad $" #shift ", %%mm7 \n\t"\ 614 "psrad $" #shift ", %%mm4 \n\t"\ 615 "movq %%mm0, %%mm2 \n\t" \ 616 "paddd %%mm1, %%mm0 \n\t" \ 617 "psubd %%mm1, %%mm2 \n\t" \ 618 "psrad $" #shift ", %%mm0 \n\t"\ 619 "psrad $" #shift ", %%mm2 \n\t"\ 620 "packssdw %%mm7, %%mm7 \n\t" \ 621 "movd %%mm7, " #dst " \n\t"\ 622 "packssdw %%mm0, %%mm0 \n\t" \ 623 "movd %%mm0, 16+" #dst " \n\t"\ 624 "packssdw %%mm2, %%mm2 \n\t" \ 625 "movd %%mm2, 96+" #dst " \n\t"\ 626 "packssdw %%mm4, %%mm4 \n\t" \ 627 "movd %%mm4, 112+" #dst " \n\t"\ 628 "movq " #src1 ", %%mm0 \n\t" \ 629 "movq 80(%2), %%mm4 \n\t" \ 630 "pmaddwd %%mm0, %%mm4 \n\t" \ 631 "movq 88(%2), %%mm7 \n\t" \ 632 "pmaddwd 96(%2), %%mm0 \n\t" \ 633 "pmaddwd %%mm3, %%mm7 \n\t" \ 634 "movq %%mm5, %%mm2 \n\t" \ 635 "pmaddwd 104(%2), %%mm3 \n\t" \ 636 "paddd %%mm7, %%mm4 \n\t" \ 637 "paddd %%mm4, %%mm2 \n\t" \ 638 "psubd %%mm4, %%mm5 \n\t" \ 639 "psrad $" #shift ", %%mm2 \n\t"\ 640 "psrad $" #shift ", %%mm5 \n\t"\ 641 "movq %%mm6, %%mm4 \n\t" \ 642 "paddd %%mm0, %%mm3 \n\t" \ 643 "paddd %%mm3, %%mm6 \n\t" \ 644 "psubd %%mm3, %%mm4 \n\t" \ 645 "psrad $" #shift ", %%mm6 \n\t"\ 646 "psrad $" #shift ", %%mm4 \n\t"\ 647 "packssdw %%mm2, %%mm2 \n\t" \ 648 "packssdw %%mm6, %%mm6 \n\t" \ 649 "movd %%mm2, 32+" #dst " \n\t"\ 650 "packssdw %%mm4, %%mm4 \n\t" \ 651 "packssdw %%mm5, %%mm5 \n\t" \ 652 "movd %%mm6, 48+" #dst " \n\t"\ 653 "movd %%mm4, 64+" #dst " \n\t"\ 654 "movd %%mm5, 80+" #dst " \n\t" 658 IDCT( (%1), 64(%1), 32(%1), 96(%1), 0(%0), 20)
659 IDCT( 8(%1), 72(%1), 40(%1), 104(%1), 4(%0), 20)
660 IDCT( 16(%1), 80(%1), 48(%1), 112(%1), 8(%0), 20)
661 IDCT( 24(%1), 88(%1), 56(%1), 120(%1), 12(%0), 20)
666 Z_COND_IDCT( 64(%0), 72(%0), 80(%0), 88(%0), 64(%1),paddd (%2), 11, 6f)
667 Z_COND_IDCT( 96(%0),104(%0),112(%0),120(%0), 96(%1),paddd (%2), 11, 5f)
670 #define IDCT(src0, src4, src1, src5, dst, shift) \ 671 "movq " #src0 ", %%mm0 \n\t" \ 672 "movq " #src4 ", %%mm1 \n\t" \ 673 "movq " #src5 ", %%mm3 \n\t" \ 674 "movq 16(%2), %%mm4 \n\t" \ 675 "pmaddwd %%mm0, %%mm4 \n\t" \ 676 "movq 24(%2), %%mm5 \n\t" \ 677 "pmaddwd %%mm5, %%mm0 \n\t" \ 678 "movq 32(%2), %%mm5 \n\t" \ 679 "pmaddwd %%mm1, %%mm5 \n\t" \ 680 "movq 40(%2), %%mm6 \n\t" \ 681 "pmaddwd %%mm6, %%mm1 \n\t" \ 682 "movq %%mm4, %%mm6 \n\t" \ 683 "paddd %%mm5, %%mm4 \n\t" \ 684 "psubd %%mm5, %%mm6 \n\t" \ 685 "movq %%mm0, %%mm5 \n\t" \ 686 "paddd %%mm1, %%mm0 \n\t" \ 687 "psubd %%mm1, %%mm5 \n\t" \ 688 "movq 56(%2), %%mm1 \n\t" \ 689 "pmaddwd %%mm3, %%mm1 \n\t" \ 690 "movq 72(%2), %%mm7 \n\t" \ 691 "pmaddwd %%mm3, %%mm7 \n\t" \ 692 "paddd %%mm4, %%mm1 \n\t" \ 693 "paddd %%mm4, %%mm4 \n\t" \ 694 "psubd %%mm1, %%mm4 \n\t" \ 695 "psrad $" #shift ", %%mm1 \n\t"\ 696 "psrad $" #shift ", %%mm4 \n\t"\ 697 "movq %%mm0, %%mm2 \n\t" \ 698 "paddd %%mm7, %%mm0 \n\t" \ 699 "psubd %%mm7, %%mm2 \n\t" \ 700 "psrad $" #shift ", %%mm0 \n\t"\ 701 "psrad $" #shift ", %%mm2 \n\t"\ 702 "packssdw %%mm1, %%mm1 \n\t" \ 703 "movd %%mm1, " #dst " \n\t"\ 704 "packssdw %%mm0, %%mm0 \n\t" \ 705 "movd %%mm0, 16+" #dst " \n\t"\ 706 "packssdw %%mm2, %%mm2 \n\t" \ 707 "movd %%mm2, 96+" #dst " \n\t"\ 708 "packssdw %%mm4, %%mm4 \n\t" \ 709 "movd %%mm4, 112+" #dst " \n\t"\ 710 "movq 88(%2), %%mm1 \n\t" \ 711 "pmaddwd %%mm3, %%mm1 \n\t" \ 712 "movq %%mm5, %%mm2 \n\t" \ 713 "pmaddwd 104(%2), %%mm3 \n\t" \ 714 "paddd %%mm1, %%mm2 \n\t" \ 715 "psubd %%mm1, %%mm5 \n\t" \ 716 "psrad $" #shift ", %%mm2 \n\t"\ 717 "psrad $" #shift ", %%mm5 \n\t"\ 718 "movq %%mm6, %%mm1 \n\t" \ 719 "paddd %%mm3, %%mm6 \n\t" \ 720 "psubd %%mm3, %%mm1 \n\t" \ 721 "psrad $" #shift ", %%mm6 \n\t"\ 722 "psrad $" #shift ", %%mm1 \n\t"\ 723 "packssdw %%mm2, %%mm2 \n\t" \ 724 "packssdw %%mm6, %%mm6 \n\t" \ 725 "movd %%mm2, 32+" #dst " \n\t"\ 726 "packssdw %%mm1, %%mm1 \n\t" \ 727 "packssdw %%mm5, %%mm5 \n\t" \ 728 "movd %%mm6, 48+" #dst " \n\t"\ 729 "movd %%mm1, 64+" #dst " \n\t"\ 730 "movd %%mm5, 80+" #dst " \n\t" 733 IDCT( (%1), 64(%1), 32(%1), 96(%1), 0(%0), 20)
734 IDCT( 8(%1), 72(%1), 40(%1), 104(%1), 4(%0), 20)
735 IDCT( 16(%1), 80(%1), 48(%1), 112(%1), 8(%0), 20)
736 IDCT( 24(%1), 88(%1), 56(%1), 120(%1), 12(%0), 20)
741 Z_COND_IDCT( 96(%0),104(%0),112(%0),120(%0), 96(%1),paddd (%2), 11, 7f)
744 #define IDCT(src0, src4, src1, src5, dst, shift) \ 745 "movq " #src0 ", %%mm0 \n\t" \ 746 "movq " #src5 ", %%mm3 \n\t" \ 747 "movq 16(%2), %%mm4 \n\t" \ 748 "pmaddwd %%mm0, %%mm4 \n\t" \ 749 "movq 24(%2), %%mm5 \n\t" \ 750 "pmaddwd %%mm5, %%mm0 \n\t" \ 751 "movq %%mm4, %%mm6 \n\t" \ 752 "movq %%mm0, %%mm5 \n\t" \ 753 "movq 56(%2), %%mm1 \n\t" \ 754 "pmaddwd %%mm3, %%mm1 \n\t" \ 755 "movq 72(%2), %%mm7 \n\t" \ 756 "pmaddwd %%mm3, %%mm7 \n\t" \ 757 "paddd %%mm4, %%mm1 \n\t" \ 758 "paddd %%mm4, %%mm4 \n\t" \ 759 "psubd %%mm1, %%mm4 \n\t" \ 760 "psrad $" #shift ", %%mm1 \n\t"\ 761 "psrad $" #shift ", %%mm4 \n\t"\ 762 "movq %%mm0, %%mm2 \n\t" \ 763 "paddd %%mm7, %%mm0 \n\t" \ 764 "psubd %%mm7, %%mm2 \n\t" \ 765 "psrad $" #shift ", %%mm0 \n\t"\ 766 "psrad $" #shift ", %%mm2 \n\t"\ 767 "packssdw %%mm1, %%mm1 \n\t" \ 768 "movd %%mm1, " #dst " \n\t"\ 769 "packssdw %%mm0, %%mm0 \n\t" \ 770 "movd %%mm0, 16+" #dst " \n\t"\ 771 "packssdw %%mm2, %%mm2 \n\t" \ 772 "movd %%mm2, 96+" #dst " \n\t"\ 773 "packssdw %%mm4, %%mm4 \n\t" \ 774 "movd %%mm4, 112+" #dst " \n\t"\ 775 "movq 88(%2), %%mm1 \n\t" \ 776 "pmaddwd %%mm3, %%mm1 \n\t" \ 777 "movq %%mm5, %%mm2 \n\t" \ 778 "pmaddwd 104(%2), %%mm3 \n\t" \ 779 "paddd %%mm1, %%mm2 \n\t" \ 780 "psubd %%mm1, %%mm5 \n\t" \ 781 "psrad $" #shift ", %%mm2 \n\t"\ 782 "psrad $" #shift ", %%mm5 \n\t"\ 783 "movq %%mm6, %%mm1 \n\t" \ 784 "paddd %%mm3, %%mm6 \n\t" \ 785 "psubd %%mm3, %%mm1 \n\t" \ 786 "psrad $" #shift ", %%mm6 \n\t"\ 787 "psrad $" #shift ", %%mm1 \n\t"\ 788 "packssdw %%mm2, %%mm2 \n\t" \ 789 "packssdw %%mm6, %%mm6 \n\t" \ 790 "movd %%mm2, 32+" #dst " \n\t"\ 791 "packssdw %%mm1, %%mm1 \n\t" \ 792 "packssdw %%mm5, %%mm5 \n\t" \ 793 "movd %%mm6, 48+" #dst " \n\t"\ 794 "movd %%mm1, 64+" #dst " \n\t"\ 795 "movd %%mm5, 80+" #dst " \n\t" 799 IDCT( (%1), 64(%1), 32(%1), 96(%1), 0(%0), 20)
800 IDCT( 8(%1), 72(%1), 40(%1), 104(%1), 4(%0), 20)
801 IDCT( 16(%1), 80(%1), 48(%1), 112(%1), 8(%0), 20)
802 IDCT( 24(%1), 88(%1), 56(%1), 120(%1), 12(%0), 20)
807 Z_COND_IDCT( 96(%0),104(%0),112(%0),120(%0), 96(%1),paddd (%2), 11, 3f)
810 #define IDCT(src0, src4, src1, src5, dst, shift) \ 811 "movq " #src0 ", %%mm0 \n\t" \ 812 "movq " #src1 ", %%mm2 \n\t" \ 813 "movq " #src5 ", %%mm3 \n\t" \ 814 "movq 16(%2), %%mm4 \n\t" \ 815 "pmaddwd %%mm0, %%mm4 \n\t" \ 816 "movq 24(%2), %%mm5 \n\t" \ 817 "pmaddwd %%mm5, %%mm0 \n\t" \ 818 "movq %%mm4, %%mm6 \n\t" \ 819 "movq 48(%2), %%mm7 \n\t" \ 820 "pmaddwd %%mm2, %%mm7 \n\t" \ 821 "movq %%mm0, %%mm5 \n\t" \ 822 "movq 56(%2), %%mm1 \n\t" \ 823 "pmaddwd %%mm3, %%mm1 \n\t" \ 824 "pmaddwd 64(%2), %%mm2 \n\t" \ 825 "paddd %%mm1, %%mm7 \n\t" \ 826 "movq 72(%2), %%mm1 \n\t" \ 827 "pmaddwd %%mm3, %%mm1 \n\t" \ 828 "paddd %%mm4, %%mm7 \n\t" \ 829 "paddd %%mm4, %%mm4 \n\t" \ 830 "psubd %%mm7, %%mm4 \n\t" \ 831 "paddd %%mm2, %%mm1 \n\t" \ 832 "psrad $" #shift ", %%mm7 \n\t"\ 833 "psrad $" #shift ", %%mm4 \n\t"\ 834 "movq %%mm0, %%mm2 \n\t" \ 835 "paddd %%mm1, %%mm0 \n\t" \ 836 "psubd %%mm1, %%mm2 \n\t" \ 837 "psrad $" #shift ", %%mm0 \n\t"\ 838 "psrad $" #shift ", %%mm2 \n\t"\ 839 "packssdw %%mm7, %%mm7 \n\t" \ 840 "movd %%mm7, " #dst " \n\t"\ 841 "packssdw %%mm0, %%mm0 \n\t" \ 842 "movd %%mm0, 16+" #dst " \n\t"\ 843 "packssdw %%mm2, %%mm2 \n\t" \ 844 "movd %%mm2, 96+" #dst " \n\t"\ 845 "packssdw %%mm4, %%mm4 \n\t" \ 846 "movd %%mm4, 112+" #dst " \n\t"\ 847 "movq " #src1 ", %%mm0 \n\t" \ 848 "movq 80(%2), %%mm4 \n\t" \ 849 "pmaddwd %%mm0, %%mm4 \n\t" \ 850 "movq 88(%2), %%mm7 \n\t" \ 851 "pmaddwd 96(%2), %%mm0 \n\t" \ 852 "pmaddwd %%mm3, %%mm7 \n\t" \ 853 "movq %%mm5, %%mm2 \n\t" \ 854 "pmaddwd 104(%2), %%mm3 \n\t" \ 855 "paddd %%mm7, %%mm4 \n\t" \ 856 "paddd %%mm4, %%mm2 \n\t" \ 857 "psubd %%mm4, %%mm5 \n\t" \ 858 "psrad $" #shift ", %%mm2 \n\t"\ 859 "psrad $" #shift ", %%mm5 \n\t"\ 860 "movq %%mm6, %%mm4 \n\t" \ 861 "paddd %%mm0, %%mm3 \n\t" \ 862 "paddd %%mm3, %%mm6 \n\t" \ 863 "psubd %%mm3, %%mm4 \n\t" \ 864 "psrad $" #shift ", %%mm6 \n\t"\ 865 "psrad $" #shift ", %%mm4 \n\t"\ 866 "packssdw %%mm2, %%mm2 \n\t" \ 867 "packssdw %%mm6, %%mm6 \n\t" \ 868 "movd %%mm2, 32+" #dst " \n\t"\ 869 "packssdw %%mm4, %%mm4 \n\t" \ 870 "packssdw %%mm5, %%mm5 \n\t" \ 871 "movd %%mm6, 48+" #dst " \n\t"\ 872 "movd %%mm4, 64+" #dst " \n\t"\ 873 "movd %%mm5, 80+" #dst " \n\t" 876 IDCT( (%1), 64(%1), 32(%1), 96(%1), 0(%0), 20)
877 IDCT( 8(%1), 72(%1), 40(%1), 104(%1), 4(%0), 20)
878 IDCT( 16(%1), 80(%1), 48(%1), 112(%1), 8(%0), 20)
879 IDCT( 24(%1), 88(%1), 56(%1), 120(%1), 12(%0), 20)
885 #define IDCT(src0, src4, src1, src5, dst, shift) \ 886 "movq " #src0 ", %%mm0 \n\t" \ 887 "movq " #src1 ", %%mm2 \n\t" \ 888 "movq 16(%2), %%mm4 \n\t" \ 889 "pmaddwd %%mm0, %%mm4 \n\t" \ 890 "movq 24(%2), %%mm5 \n\t" \ 891 "pmaddwd %%mm5, %%mm0 \n\t" \ 892 "movq %%mm4, %%mm6 \n\t" \ 893 "movq 48(%2), %%mm7 \n\t" \ 894 "pmaddwd %%mm2, %%mm7 \n\t" \ 895 "movq %%mm0, %%mm5 \n\t" \ 896 "movq 64(%2), %%mm3 \n\t"\ 897 "pmaddwd %%mm2, %%mm3 \n\t" \ 898 "paddd %%mm4, %%mm7 \n\t" \ 899 "paddd %%mm4, %%mm4 \n\t" \ 900 "psubd %%mm7, %%mm4 \n\t" \ 901 "psrad $" #shift ", %%mm7 \n\t"\ 902 "psrad $" #shift ", %%mm4 \n\t"\ 903 "movq %%mm0, %%mm1 \n\t" \ 904 "paddd %%mm3, %%mm0 \n\t" \ 905 "psubd %%mm3, %%mm1 \n\t" \ 906 "psrad $" #shift ", %%mm0 \n\t"\ 907 "psrad $" #shift ", %%mm1 \n\t"\ 908 "packssdw %%mm7, %%mm7 \n\t" \ 909 "movd %%mm7, " #dst " \n\t"\ 910 "packssdw %%mm0, %%mm0 \n\t" \ 911 "movd %%mm0, 16+" #dst " \n\t"\ 912 "packssdw %%mm1, %%mm1 \n\t" \ 913 "movd %%mm1, 96+" #dst " \n\t"\ 914 "packssdw %%mm4, %%mm4 \n\t" \ 915 "movd %%mm4, 112+" #dst " \n\t"\ 916 "movq 80(%2), %%mm4 \n\t" \ 917 "pmaddwd %%mm2, %%mm4 \n\t" \ 918 "pmaddwd 96(%2), %%mm2 \n\t" \ 919 "movq %%mm5, %%mm1 \n\t" \ 920 "paddd %%mm4, %%mm1 \n\t" \ 921 "psubd %%mm4, %%mm5 \n\t" \ 922 "psrad $" #shift ", %%mm1 \n\t"\ 923 "psrad $" #shift ", %%mm5 \n\t"\ 924 "movq %%mm6, %%mm4 \n\t" \ 925 "paddd %%mm2, %%mm6 \n\t" \ 926 "psubd %%mm2, %%mm4 \n\t" \ 927 "psrad $" #shift ", %%mm6 \n\t"\ 928 "psrad $" #shift ", %%mm4 \n\t"\ 929 "packssdw %%mm1, %%mm1 \n\t" \ 930 "packssdw %%mm6, %%mm6 \n\t" \ 931 "movd %%mm1, 32+" #dst " \n\t"\ 932 "packssdw %%mm4, %%mm4 \n\t" \ 933 "packssdw %%mm5, %%mm5 \n\t" \ 934 "movd %%mm6, 48+" #dst " \n\t"\ 935 "movd %%mm4, 64+" #dst " \n\t"\ 936 "movd %%mm5, 80+" #dst " \n\t" 940 IDCT( (%1), 64(%1), 32(%1), 96(%1), 0(%0), 20)
941 IDCT( 8(%1), 72(%1), 40(%1), 104(%1), 4(%0), 20)
942 IDCT( 16(%1), 80(%1), 48(%1), 112(%1), 8(%0), 20)
943 IDCT( 24(%1), 88(%1), 56(%1), 120(%1), 12(%0), 20)
949 #define IDCT(src0, src4, src1, src5, dst, shift) \ 950 "movq " #src0 ", %%mm0 \n\t" \ 951 "movq " #src4 ", %%mm1 \n\t" \ 952 "movq 16(%2), %%mm4 \n\t" \ 953 "pmaddwd %%mm0, %%mm4 \n\t" \ 954 "movq 24(%2), %%mm5 \n\t" \ 955 "pmaddwd %%mm5, %%mm0 \n\t" \ 956 "movq 32(%2), %%mm5 \n\t" \ 957 "pmaddwd %%mm1, %%mm5 \n\t" \ 958 "movq 40(%2), %%mm6 \n\t" \ 959 "pmaddwd %%mm6, %%mm1 \n\t" \ 960 "movq %%mm4, %%mm6 \n\t" \ 961 "paddd %%mm5, %%mm4 \n\t" \ 962 "psubd %%mm5, %%mm6 \n\t" \ 963 "movq %%mm0, %%mm5 \n\t" \ 964 "paddd %%mm1, %%mm0 \n\t" \ 965 "psubd %%mm1, %%mm5 \n\t" \ 966 "movq 8+" #src0 ", %%mm2 \n\t" \ 967 "movq 8+" #src4 ", %%mm3 \n\t" \ 968 "movq 16(%2), %%mm1 \n\t" \ 969 "pmaddwd %%mm2, %%mm1 \n\t" \ 970 "movq 24(%2), %%mm7 \n\t" \ 971 "pmaddwd %%mm7, %%mm2 \n\t" \ 972 "movq 32(%2), %%mm7 \n\t" \ 973 "pmaddwd %%mm3, %%mm7 \n\t" \ 974 "pmaddwd 40(%2), %%mm3 \n\t" \ 975 "paddd %%mm1, %%mm7 \n\t" \ 976 "paddd %%mm1, %%mm1 \n\t" \ 977 "psubd %%mm7, %%mm1 \n\t" \ 978 "paddd %%mm2, %%mm3 \n\t" \ 979 "paddd %%mm2, %%mm2 \n\t" \ 980 "psubd %%mm3, %%mm2 \n\t" \ 981 "psrad $" #shift ", %%mm4 \n\t"\ 982 "psrad $" #shift ", %%mm7 \n\t"\ 983 "psrad $" #shift ", %%mm3 \n\t"\ 984 "packssdw %%mm7, %%mm4 \n\t" \ 985 "movq %%mm4, " #dst " \n\t"\ 986 "psrad $" #shift ", %%mm0 \n\t"\ 987 "packssdw %%mm3, %%mm0 \n\t" \ 988 "movq %%mm0, 16+" #dst " \n\t"\ 989 "movq %%mm0, 96+" #dst " \n\t"\ 990 "movq %%mm4, 112+" #dst " \n\t"\ 991 "psrad $" #shift ", %%mm5 \n\t"\ 992 "psrad $" #shift ", %%mm6 \n\t"\ 993 "psrad $" #shift ", %%mm2 \n\t"\ 994 "packssdw %%mm2, %%mm5 \n\t" \ 995 "movq %%mm5, 32+" #dst " \n\t"\ 996 "psrad $" #shift ", %%mm1 \n\t"\ 997 "packssdw %%mm1, %%mm6 \n\t" \ 998 "movq %%mm6, 48+" #dst " \n\t"\ 999 "movq %%mm6, 64+" #dst " \n\t"\ 1000 "movq %%mm5, 80+" #dst " \n\t" 1004 IDCT( 0(%1), 64(%1), 32(%1), 96(%1), 0(%0), 20)
1006 IDCT( 16(%1), 80(%1), 48(%1), 112(%1), 8(%0), 20)
1011 "
# .p2align 4 \n\t"\ 1014 #define IDCT(src0, src4, src1, src5, dst, shift) \ 1015 "movq " #src0 ", %%mm0 \n\t" \ 1016 "movq " #src4 ", %%mm1 \n\t" \ 1017 "movq " #src1 ", %%mm2 \n\t" \ 1018 "movq 16(%2), %%mm4 \n\t" \ 1019 "pmaddwd %%mm0, %%mm4 \n\t" \ 1020 "movq 24(%2), %%mm5 \n\t" \ 1021 "pmaddwd %%mm5, %%mm0 \n\t" \ 1022 "movq 32(%2), %%mm5 \n\t" \ 1023 "pmaddwd %%mm1, %%mm5 \n\t" \ 1024 "movq 40(%2), %%mm6 \n\t" \ 1025 "pmaddwd %%mm6, %%mm1 \n\t" \ 1026 "movq %%mm4, %%mm6 \n\t" \ 1027 "movq 48(%2), %%mm7 \n\t" \ 1028 "pmaddwd %%mm2, %%mm7 \n\t" \ 1029 "paddd %%mm5, %%mm4 \n\t" \ 1030 "psubd %%mm5, %%mm6 \n\t" \ 1031 "movq %%mm0, %%mm5 \n\t" \ 1032 "paddd %%mm1, %%mm0 \n\t" \ 1033 "psubd %%mm1, %%mm5 \n\t" \ 1034 "movq 64(%2), %%mm1 \n\t"\ 1035 "pmaddwd %%mm2, %%mm1 \n\t" \ 1036 "paddd %%mm4, %%mm7 \n\t" \ 1037 "paddd %%mm4, %%mm4 \n\t" \ 1038 "psubd %%mm7, %%mm4 \n\t" \ 1039 "psrad $" #shift ", %%mm7 \n\t"\ 1040 "psrad $" #shift ", %%mm4 \n\t"\ 1041 "movq %%mm0, %%mm3 \n\t" \ 1042 "paddd %%mm1, %%mm0 \n\t" \ 1043 "psubd %%mm1, %%mm3 \n\t" \ 1044 "psrad $" #shift ", %%mm0 \n\t"\ 1045 "psrad $" #shift ", %%mm3 \n\t"\ 1046 "packssdw %%mm7, %%mm7 \n\t" \ 1047 "movd %%mm7, " #dst " \n\t"\ 1048 "packssdw %%mm0, %%mm0 \n\t" \ 1049 "movd %%mm0, 16+" #dst " \n\t"\ 1050 "packssdw %%mm3, %%mm3 \n\t" \ 1051 "movd %%mm3, 96+" #dst " \n\t"\ 1052 "packssdw %%mm4, %%mm4 \n\t" \ 1053 "movd %%mm4, 112+" #dst " \n\t"\ 1054 "movq 80(%2), %%mm4 \n\t" \ 1055 "pmaddwd %%mm2, %%mm4 \n\t" \ 1056 "pmaddwd 96(%2), %%mm2 \n\t" \ 1057 "movq %%mm5, %%mm3 \n\t" \ 1058 "paddd %%mm4, %%mm3 \n\t" \ 1059 "psubd %%mm4, %%mm5 \n\t" \ 1060 "psrad $" #shift ", %%mm3 \n\t"\ 1061 "psrad $" #shift ", %%mm5 \n\t"\ 1062 "movq %%mm6, %%mm4 \n\t" \ 1063 "paddd %%mm2, %%mm6 \n\t" \ 1064 "psubd %%mm2, %%mm4 \n\t" \ 1065 "psrad $" #shift ", %%mm6 \n\t"\ 1066 "packssdw %%mm3, %%mm3 \n\t" \ 1067 "movd %%mm3, 32+" #dst " \n\t"\ 1068 "psrad $" #shift ", %%mm4 \n\t"\ 1069 "packssdw %%mm6, %%mm6 \n\t" \ 1070 "movd %%mm6, 48+" #dst " \n\t"\ 1071 "packssdw %%mm4, %%mm4 \n\t" \ 1072 "packssdw %%mm5, %%mm5 \n\t" \ 1073 "movd %%mm4, 64+" #dst " \n\t"\ 1074 "movd %%mm5, 80+" #dst " \n\t" 1078 IDCT( (%1), 64(%1), 32(%1), 96(%1), 0(%0), 20)
1079 IDCT( 8(%1), 72(%1), 40(%1), 104(%1), 4(%0), 20)
1080 IDCT( 16(%1), 80(%1), 48(%1), 112(%1), 8(%0), 20)
1081 IDCT( 24(%1), 88(%1), 56(%1), 120(%1), 12(%0), 20)
1088 #define IDCT(src0, src4, src1, src5, dst, shift) \ 1089 "movq " #src0 ", %%mm0 \n\t" \ 1090 "movq 16(%2), %%mm4 \n\t" \ 1091 "pmaddwd %%mm0, %%mm4 \n\t" \ 1092 "movq 24(%2), %%mm5 \n\t" \ 1093 "pmaddwd %%mm5, %%mm0 \n\t" \ 1094 "psrad $" #shift ", %%mm4 \n\t"\ 1095 "psrad $" #shift ", %%mm0 \n\t"\ 1096 "movq 8+" #src0 ", %%mm2 \n\t" \ 1097 "movq 16(%2), %%mm1 \n\t" \ 1098 "pmaddwd %%mm2, %%mm1 \n\t" \ 1099 "movq 24(%2), %%mm7 \n\t" \ 1100 "pmaddwd %%mm7, %%mm2 \n\t" \ 1101 "movq 32(%2), %%mm7 \n\t" \ 1102 "psrad $" #shift ", %%mm1 \n\t"\ 1103 "packssdw %%mm1, %%mm4 \n\t" \ 1104 "movq %%mm4, " #dst " \n\t"\ 1105 "psrad $" #shift ", %%mm2 \n\t"\ 1106 "packssdw %%mm2, %%mm0 \n\t" \ 1107 "movq %%mm0, 16+" #dst " \n\t"\ 1108 "movq %%mm0, 96+" #dst " \n\t"\ 1109 "movq %%mm4, 112+" #dst " \n\t"\ 1110 "movq %%mm0, 32+" #dst " \n\t"\ 1111 "movq %%mm4, 48+" #dst " \n\t"\ 1112 "movq %%mm4, 64+" #dst " \n\t"\ 1113 "movq %%mm0, 80+" #dst " \n\t" 1116 IDCT( 0(%1), 64(%1), 32(%1), 96(%1), 0(%0), 20)
1118 IDCT( 16(%1), 80(%1), 48(%1), 112(%1), 8(%0), 20)
1147 ::
"r" (
block),
"r" (temp),
"r" (coeffs)
memory handling functions
#define DECLARE_ALIGNED(n, t, v)
#define DECLARE_ASM_CONST(n, t, v)
void ff_put_pixels_clamped_mmx(const int16_t *block, uint8_t *pixels, int line_size)
void ff_simple_idct_add_mmx(uint8_t *dest, int line_size, int16_t *block)
common internal API header
void ff_simple_idct_put_mmx(uint8_t *dest, int line_size, int16_t *block)
void ff_simple_idct_mmx(int16_t *block)
void ff_add_pixels_clamped_mmx(const int16_t *block, uint8_t *pixels, int line_size)