21 #include "../SDL_internal.h" 23 #if (__GNUC__ > 2) && defined(__i386__) && __OPTIMIZE__ && SDL_ASSEMBLY_ROUTINES 31 static mmx_t MMX_0080w = { .ud = {0x00800080, 0x00800080} };
32 static mmx_t MMX_00FFw = { .ud = {0x00ff00ff, 0x00ff00ff} };
33 static mmx_t MMX_FF00w = { .ud = {0xff00ff00, 0xff00ff00} };
35 static mmx_t MMX_Ycoeff = { .uw = {0x004a, 0x004a, 0x004a, 0x004a} };
37 static mmx_t MMX_UbluRGB = { .uw = {0x0072, 0x0072, 0x0072, 0x0072} };
38 static mmx_t MMX_VredRGB = { .uw = {0x0059, 0x0059, 0x0059, 0x0059} };
39 static mmx_t MMX_UgrnRGB = { .uw = {0xffea, 0xffea, 0xffea, 0xffea} };
40 static mmx_t MMX_VgrnRGB = { .uw = {0xffd2, 0xffd2, 0xffd2, 0xffd2} };
42 static mmx_t MMX_Ublu5x5 = { .uw = {0x0081, 0x0081, 0x0081, 0x0081} };
43 static mmx_t MMX_Vred5x5 = { .uw = {0x0066, 0x0066, 0x0066, 0x0066} };
44 static mmx_t MMX_Ugrn565 = { .uw = {0xffe8, 0xffe8, 0xffe8, 0xffe8} };
45 static mmx_t MMX_Vgrn565 = { .uw = {0xffcd, 0xffcd, 0xffcd, 0xffcd} };
47 static mmx_t MMX_red565 = { .uw = {0xf800, 0xf800, 0xf800, 0xf800} };
48 static mmx_t MMX_grn565 = { .uw = {0x07e0, 0x07e0, 0x07e0, 0x07e0} };
79 void ColorRGBDitherYV12MMX1X(
int *colortab,
Uint32 *rgb_2_pix,
80 unsigned char *lum,
unsigned char *cr,
81 unsigned char *cb,
unsigned char *out,
82 int rows,
int cols,
int mod )
87 unsigned char*
y = lum +cols*rows;
90 row2 = (
Uint32 *)out+cols+mod;
91 mod = (mod+cols+mod)*4;
93 __asm__ __volatile__ (
99 "movl %%ebx,4(%%esp)\n" 107 "movl 4(%%esp),%%ebx\n" 108 "movd (%%ebx),%%mm1\n" 112 "punpcklbw %%mm7,%%mm1\n" 113 "punpckldq %%mm1,%%mm1\n" 125 "movq (%2,%4),%%mm3\n" 126 "punpckldq %%mm3,%%mm2\n" 135 "paddsw %%mm1, %%mm5\n" 136 "paddsw %%mm1, %%mm6\n" 137 "packuswb %%mm5,%%mm5\n" 138 "packuswb %%mm6,%%mm6\n" 140 "punpcklbw %%mm5,%%mm6\n" 144 "punpcklbw %%mm7,%%mm1\n" 145 "punpckldq %%mm1,%%mm1\n" 158 "paddsw %%mm5, %%mm3\n" 159 "paddsw %%mm5, %%mm7\n" 160 "paddsw %%mm0, %%mm3\n" 161 "paddsw %%mm0, %%mm7\n" 162 "packuswb %%mm3,%%mm3\n" 163 "packuswb %%mm7,%%mm7\n" 164 "punpcklbw %%mm3,%%mm7\n" 169 "paddsw %%mm1, %%mm3\n" 170 "paddsw %%mm1, %%mm5\n" 171 "packuswb %%mm3,%%mm3\n" 172 "packuswb %%mm5,%%mm5\n" 173 "punpcklbw %%mm3,%%mm5\n" 183 "punpcklbw %%mm4,%%mm1\n" 184 "punpcklbw %%mm4,%%mm3\n" 187 "punpcklwd %%mm1,%%mm3\n" 188 "punpckhwd %%mm2,%%mm0\n" 192 "punpcklbw %%mm1,%%mm2\n" 193 "punpcklwd %%mm4,%%mm2\n" 198 "punpcklbw %%mm1,%%mm4\n" 199 "punpckhwd %%mm2,%%mm4\n" 206 "punpckhbw %%mm2,%%mm6\n" 207 "punpckhbw %%mm1,%%mm5\n" 209 "punpcklwd %%mm6,%%mm1\n" 211 "punpckhwd %%mm6,%%mm5\n" 234 :
"m" (cr),
"r"(cb),
"r"(lum),
235 "r"(row1),
"r"(cols),
"r"(row2),
"m"(x),
"m"(y),
"m"(mod),
236 "m"(MMX_0080w),
"m"(MMX_VgrnRGB),
"m"(MMX_VredRGB),
237 "m"(MMX_FF00w),
"m"(MMX_00FFw),
"m"(MMX_UgrnRGB),
242 void Color565DitherYV12MMX1X(
int *colortab,
Uint32 *rgb_2_pix,
243 unsigned char *lum,
unsigned char *cr,
244 unsigned char *cb,
unsigned char *out,
245 int rows,
int cols,
int mod )
250 unsigned char* y = lum +cols*rows;
253 row2 = (
Uint16 *)out+cols+mod;
254 mod = (mod+cols+mod)*2;
256 __asm__ __volatile__(
262 "movl %%ebx, 4(%%esp)\n" 269 "pxor %%mm7, %%mm7\n" 271 "movl 4(%%esp), %%ebx\n" 272 "movd (%%ebx), %%mm1\n" 275 "punpcklbw %%mm7, %%mm0\n" 276 "punpcklbw %%mm7, %%mm1\n" 279 "movq %%mm0, %%mm2\n" 280 "movq %%mm1, %%mm3\n" 281 "pmullw %10, %%mm2\n" 283 "pmullw %11, %%mm0\n" 285 "pmullw %13, %%mm3\n" 287 "pmullw %14, %%mm1\n" 289 "pmullw %15, %%mm6\n" 290 "paddw %%mm3, %%mm2\n" 291 "pmullw %15, %%mm7\n" 293 "movq %%mm6, %%mm4\n" 294 "paddw %%mm0, %%mm6\n" 295 "movq %%mm4, %%mm5\n" 296 "paddw %%mm1, %%mm4\n" 297 "paddw %%mm2, %%mm5\n" 299 "movq %%mm7, %%mm3\n" 301 "paddw %%mm0, %%mm7\n" 303 "packuswb %%mm4, %%mm4\n" 304 "packuswb %%mm5, %%mm5\n" 305 "packuswb %%mm6, %%mm6\n" 306 "punpcklbw %%mm4, %%mm4\n" 307 "punpcklbw %%mm5, %%mm5\n" 311 "punpcklbw %%mm6, %%mm6\n" 316 "movq %%mm3, %%mm5\n" 317 "paddw %%mm1, %%mm3\n" 318 "paddw %%mm2, %%mm5\n" 322 "movq (%2, %4), %%mm6\n" 324 "packuswb %%mm3, %%mm3\n" 325 "packuswb %%mm5, %%mm5\n" 326 "packuswb %%mm7, %%mm7\n" 328 "punpcklbw %%mm3, %%mm3\n" 329 "punpcklbw %%mm5, %%mm5\n" 330 "pmullw %15, %%mm6\n" 331 "punpcklbw %%mm7, %%mm7\n" 338 "movq (%2,%4), %%mm7\n" 341 "movq %%mm4, %%mm5\n" 342 "punpcklwd %%mm3, %%mm4\n" 343 "pmullw %15, %%mm7\n" 344 "punpckhwd %%mm3, %%mm5\n" 347 "movq %%mm5, 8(%3)\n" 349 "movq %%mm6, %%mm4\n" 350 "paddw %%mm0, %%mm6\n" 352 "movq %%mm4, %%mm5\n" 353 "paddw %%mm1, %%mm4\n" 354 "paddw %%mm2, %%mm5\n" 356 "movq %%mm7, %%mm3\n" 358 "paddw %%mm0, %%mm7\n" 360 "movq %%mm3, %%mm0\n" 361 "packuswb %%mm4, %%mm4\n" 362 "paddw %%mm1, %%mm3\n" 363 "packuswb %%mm5, %%mm5\n" 364 "paddw %%mm2, %%mm0\n" 365 "packuswb %%mm6, %%mm6\n" 366 "punpcklbw %%mm4, %%mm4\n" 367 "punpcklbw %%mm5, %%mm5\n" 368 "punpcklbw %%mm6, %%mm6\n" 379 "packuswb %%mm3, %%mm3\n" 380 "packuswb %%mm0, %%mm0\n" 381 "packuswb %%mm7, %%mm7\n" 382 "punpcklbw %%mm3, %%mm3\n" 383 "punpcklbw %%mm0, %%mm0\n" 384 "punpcklbw %%mm7, %%mm7\n" 393 "movq %%mm4, %%mm5\n" 395 "punpcklwd %%mm3, %%mm4\n" 396 "punpckhwd %%mm3, %%mm5\n" 399 "movq %%mm5, 8(%5)\n" 419 :
"m" (cr),
"r"(cb),
"r"(lum),
420 "r"(row1),
"r"(cols),
"r"(row2),
"m"(x),
"m"(y),
"m"(mod),
421 "m"(MMX_0080w),
"m"(MMX_Ugrn565),
"m"(MMX_Ublu5x5),
422 "m"(MMX_00FFw),
"m"(MMX_Vgrn565),
"m"(MMX_Vred5x5),
423 "m"(MMX_Ycoeff),
"m"(MMX_red565),
"m"(MMX_grn565)
uint32_t Uint32
An unsigned 32-bit integer type.
GLint GLint GLint GLint GLint x
GLint GLint GLint GLint GLint GLint y
uint16_t Uint16
An unsigned 16-bit integer type.