Libav
swscale_template.c
Go to the documentation of this file.
1 /*
2  * Copyright (C) 2001-2003 Michael Niedermayer <michaelni@gmx.at>
3  *
4  * This file is part of Libav.
5  *
6  * Libav is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * Libav is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with Libav; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19  */
20 
21 #include <stdint.h>
22 
23 #include "libavutil/x86/asm.h"
25 
26 #undef REAL_MOVNTQ
27 #undef MOVNTQ
28 #undef PREFETCH
29 
30 #if COMPILE_TEMPLATE_MMXEXT
31 #define PREFETCH "prefetchnta"
32 #else
33 #define PREFETCH " # nop"
34 #endif
35 
36 #if COMPILE_TEMPLATE_MMXEXT
37 #define REAL_MOVNTQ(a,b) "movntq " #a ", " #b " \n\t"
38 #else
39 #define REAL_MOVNTQ(a,b) "movq " #a ", " #b " \n\t"
40 #endif
41 #define MOVNTQ(a,b) REAL_MOVNTQ(a,b)
42 
43 #define YSCALEYUV2PACKEDX_UV \
44  __asm__ volatile(\
45  "xor %%"REG_a", %%"REG_a" \n\t"\
46  ".p2align 4 \n\t"\
47  "nop \n\t"\
48  "1: \n\t"\
49  "lea "CHR_MMX_FILTER_OFFSET"(%0), %%"REG_d" \n\t"\
50  "mov (%%"REG_d"), %%"REG_S" \n\t"\
51  "movq "VROUNDER_OFFSET"(%0), %%mm3 \n\t"\
52  "movq %%mm3, %%mm4 \n\t"\
53  ".p2align 4 \n\t"\
54  "2: \n\t"\
55  "movq 8(%%"REG_d"), %%mm0 \n\t" /* filterCoeff */\
56  "movq (%%"REG_S", %%"REG_a"), %%mm2 \n\t" /* UsrcData */\
57  "add %6, %%"REG_S" \n\t" \
58  "movq (%%"REG_S", %%"REG_a"), %%mm5 \n\t" /* VsrcData */\
59  "add $16, %%"REG_d" \n\t"\
60  "mov (%%"REG_d"), %%"REG_S" \n\t"\
61  "pmulhw %%mm0, %%mm2 \n\t"\
62  "pmulhw %%mm0, %%mm5 \n\t"\
63  "paddw %%mm2, %%mm3 \n\t"\
64  "paddw %%mm5, %%mm4 \n\t"\
65  "test %%"REG_S", %%"REG_S" \n\t"\
66  " jnz 2b \n\t"\
67 
68 #define YSCALEYUV2PACKEDX_YA(offset,coeff,src1,src2,dst1,dst2) \
69  "lea "offset"(%0), %%"REG_d" \n\t"\
70  "mov (%%"REG_d"), %%"REG_S" \n\t"\
71  "movq "VROUNDER_OFFSET"(%0), "#dst1" \n\t"\
72  "movq "#dst1", "#dst2" \n\t"\
73  ".p2align 4 \n\t"\
74  "2: \n\t"\
75  "movq 8(%%"REG_d"), "#coeff" \n\t" /* filterCoeff */\
76  "movq (%%"REG_S", %%"REG_a", 2), "#src1" \n\t" /* Y1srcData */\
77  "movq 8(%%"REG_S", %%"REG_a", 2), "#src2" \n\t" /* Y2srcData */\
78  "add $16, %%"REG_d" \n\t"\
79  "mov (%%"REG_d"), %%"REG_S" \n\t"\
80  "pmulhw "#coeff", "#src1" \n\t"\
81  "pmulhw "#coeff", "#src2" \n\t"\
82  "paddw "#src1", "#dst1" \n\t"\
83  "paddw "#src2", "#dst2" \n\t"\
84  "test %%"REG_S", %%"REG_S" \n\t"\
85  " jnz 2b \n\t"\
86 
87 #define YSCALEYUV2PACKEDX \
88  YSCALEYUV2PACKEDX_UV \
89  YSCALEYUV2PACKEDX_YA(LUM_MMX_FILTER_OFFSET,%%mm0,%%mm2,%%mm5,%%mm1,%%mm7) \
90 
91 #define YSCALEYUV2PACKEDX_END \
92  :: "r" (&c->redDither), \
93  "m" (dummy), "m" (dummy), "m" (dummy),\
94  "r" (dest), "m" (dstW_reg), "m"(uv_off) \
95  : "%"REG_a, "%"REG_d, "%"REG_S \
96  );
97 
98 #define YSCALEYUV2PACKEDX_ACCURATE_UV \
99  __asm__ volatile(\
100  "xor %%"REG_a", %%"REG_a" \n\t"\
101  ".p2align 4 \n\t"\
102  "nop \n\t"\
103  "1: \n\t"\
104  "lea "CHR_MMX_FILTER_OFFSET"(%0), %%"REG_d" \n\t"\
105  "mov (%%"REG_d"), %%"REG_S" \n\t"\
106  "pxor %%mm4, %%mm4 \n\t"\
107  "pxor %%mm5, %%mm5 \n\t"\
108  "pxor %%mm6, %%mm6 \n\t"\
109  "pxor %%mm7, %%mm7 \n\t"\
110  ".p2align 4 \n\t"\
111  "2: \n\t"\
112  "movq (%%"REG_S", %%"REG_a"), %%mm0 \n\t" /* UsrcData */\
113  "add %6, %%"REG_S" \n\t" \
114  "movq (%%"REG_S", %%"REG_a"), %%mm2 \n\t" /* VsrcData */\
115  "mov "STR(APCK_PTR2)"(%%"REG_d"), %%"REG_S" \n\t"\
116  "movq (%%"REG_S", %%"REG_a"), %%mm1 \n\t" /* UsrcData */\
117  "movq %%mm0, %%mm3 \n\t"\
118  "punpcklwd %%mm1, %%mm0 \n\t"\
119  "punpckhwd %%mm1, %%mm3 \n\t"\
120  "movq "STR(APCK_COEF)"(%%"REG_d"),%%mm1 \n\t" /* filterCoeff */\
121  "pmaddwd %%mm1, %%mm0 \n\t"\
122  "pmaddwd %%mm1, %%mm3 \n\t"\
123  "paddd %%mm0, %%mm4 \n\t"\
124  "paddd %%mm3, %%mm5 \n\t"\
125  "add %6, %%"REG_S" \n\t" \
126  "movq (%%"REG_S", %%"REG_a"), %%mm3 \n\t" /* VsrcData */\
127  "mov "STR(APCK_SIZE)"(%%"REG_d"), %%"REG_S" \n\t"\
128  "add $"STR(APCK_SIZE)", %%"REG_d" \n\t"\
129  "test %%"REG_S", %%"REG_S" \n\t"\
130  "movq %%mm2, %%mm0 \n\t"\
131  "punpcklwd %%mm3, %%mm2 \n\t"\
132  "punpckhwd %%mm3, %%mm0 \n\t"\
133  "pmaddwd %%mm1, %%mm2 \n\t"\
134  "pmaddwd %%mm1, %%mm0 \n\t"\
135  "paddd %%mm2, %%mm6 \n\t"\
136  "paddd %%mm0, %%mm7 \n\t"\
137  " jnz 2b \n\t"\
138  "psrad $16, %%mm4 \n\t"\
139  "psrad $16, %%mm5 \n\t"\
140  "psrad $16, %%mm6 \n\t"\
141  "psrad $16, %%mm7 \n\t"\
142  "movq "VROUNDER_OFFSET"(%0), %%mm0 \n\t"\
143  "packssdw %%mm5, %%mm4 \n\t"\
144  "packssdw %%mm7, %%mm6 \n\t"\
145  "paddw %%mm0, %%mm4 \n\t"\
146  "paddw %%mm0, %%mm6 \n\t"\
147  "movq %%mm4, "U_TEMP"(%0) \n\t"\
148  "movq %%mm6, "V_TEMP"(%0) \n\t"\
149 
150 #define YSCALEYUV2PACKEDX_ACCURATE_YA(offset) \
151  "lea "offset"(%0), %%"REG_d" \n\t"\
152  "mov (%%"REG_d"), %%"REG_S" \n\t"\
153  "pxor %%mm1, %%mm1 \n\t"\
154  "pxor %%mm5, %%mm5 \n\t"\
155  "pxor %%mm7, %%mm7 \n\t"\
156  "pxor %%mm6, %%mm6 \n\t"\
157  ".p2align 4 \n\t"\
158  "2: \n\t"\
159  "movq (%%"REG_S", %%"REG_a", 2), %%mm0 \n\t" /* Y1srcData */\
160  "movq 8(%%"REG_S", %%"REG_a", 2), %%mm2 \n\t" /* Y2srcData */\
161  "mov "STR(APCK_PTR2)"(%%"REG_d"), %%"REG_S" \n\t"\
162  "movq (%%"REG_S", %%"REG_a", 2), %%mm4 \n\t" /* Y1srcData */\
163  "movq %%mm0, %%mm3 \n\t"\
164  "punpcklwd %%mm4, %%mm0 \n\t"\
165  "punpckhwd %%mm4, %%mm3 \n\t"\
166  "movq "STR(APCK_COEF)"(%%"REG_d"), %%mm4 \n\t" /* filterCoeff */\
167  "pmaddwd %%mm4, %%mm0 \n\t"\
168  "pmaddwd %%mm4, %%mm3 \n\t"\
169  "paddd %%mm0, %%mm1 \n\t"\
170  "paddd %%mm3, %%mm5 \n\t"\
171  "movq 8(%%"REG_S", %%"REG_a", 2), %%mm3 \n\t" /* Y2srcData */\
172  "mov "STR(APCK_SIZE)"(%%"REG_d"), %%"REG_S" \n\t"\
173  "add $"STR(APCK_SIZE)", %%"REG_d" \n\t"\
174  "test %%"REG_S", %%"REG_S" \n\t"\
175  "movq %%mm2, %%mm0 \n\t"\
176  "punpcklwd %%mm3, %%mm2 \n\t"\
177  "punpckhwd %%mm3, %%mm0 \n\t"\
178  "pmaddwd %%mm4, %%mm2 \n\t"\
179  "pmaddwd %%mm4, %%mm0 \n\t"\
180  "paddd %%mm2, %%mm7 \n\t"\
181  "paddd %%mm0, %%mm6 \n\t"\
182  " jnz 2b \n\t"\
183  "psrad $16, %%mm1 \n\t"\
184  "psrad $16, %%mm5 \n\t"\
185  "psrad $16, %%mm7 \n\t"\
186  "psrad $16, %%mm6 \n\t"\
187  "movq "VROUNDER_OFFSET"(%0), %%mm0 \n\t"\
188  "packssdw %%mm5, %%mm1 \n\t"\
189  "packssdw %%mm6, %%mm7 \n\t"\
190  "paddw %%mm0, %%mm1 \n\t"\
191  "paddw %%mm0, %%mm7 \n\t"\
192  "movq "U_TEMP"(%0), %%mm3 \n\t"\
193  "movq "V_TEMP"(%0), %%mm4 \n\t"\
194 
195 #define YSCALEYUV2PACKEDX_ACCURATE \
196  YSCALEYUV2PACKEDX_ACCURATE_UV \
197  YSCALEYUV2PACKEDX_ACCURATE_YA(LUM_MMX_FILTER_OFFSET)
198 
199 #define YSCALEYUV2RGBX \
200  "psubw "U_OFFSET"(%0), %%mm3 \n\t" /* (U-128)8*/\
201  "psubw "V_OFFSET"(%0), %%mm4 \n\t" /* (V-128)8*/\
202  "movq %%mm3, %%mm2 \n\t" /* (U-128)8*/\
203  "movq %%mm4, %%mm5 \n\t" /* (V-128)8*/\
204  "pmulhw "UG_COEFF"(%0), %%mm3 \n\t"\
205  "pmulhw "VG_COEFF"(%0), %%mm4 \n\t"\
206  /* mm2=(U-128)8, mm3=ug, mm4=vg mm5=(V-128)8 */\
207  "pmulhw "UB_COEFF"(%0), %%mm2 \n\t"\
208  "pmulhw "VR_COEFF"(%0), %%mm5 \n\t"\
209  "psubw "Y_OFFSET"(%0), %%mm1 \n\t" /* 8(Y-16)*/\
210  "psubw "Y_OFFSET"(%0), %%mm7 \n\t" /* 8(Y-16)*/\
211  "pmulhw "Y_COEFF"(%0), %%mm1 \n\t"\
212  "pmulhw "Y_COEFF"(%0), %%mm7 \n\t"\
213  /* mm1= Y1, mm2=ub, mm3=ug, mm4=vg mm5=vr, mm7=Y2 */\
214  "paddw %%mm3, %%mm4 \n\t"\
215  "movq %%mm2, %%mm0 \n\t"\
216  "movq %%mm5, %%mm6 \n\t"\
217  "movq %%mm4, %%mm3 \n\t"\
218  "punpcklwd %%mm2, %%mm2 \n\t"\
219  "punpcklwd %%mm5, %%mm5 \n\t"\
220  "punpcklwd %%mm4, %%mm4 \n\t"\
221  "paddw %%mm1, %%mm2 \n\t"\
222  "paddw %%mm1, %%mm5 \n\t"\
223  "paddw %%mm1, %%mm4 \n\t"\
224  "punpckhwd %%mm0, %%mm0 \n\t"\
225  "punpckhwd %%mm6, %%mm6 \n\t"\
226  "punpckhwd %%mm3, %%mm3 \n\t"\
227  "paddw %%mm7, %%mm0 \n\t"\
228  "paddw %%mm7, %%mm6 \n\t"\
229  "paddw %%mm7, %%mm3 \n\t"\
230  /* mm0=B1, mm2=B2, mm3=G2, mm4=G1, mm5=R1, mm6=R2 */\
231  "packuswb %%mm0, %%mm2 \n\t"\
232  "packuswb %%mm6, %%mm5 \n\t"\
233  "packuswb %%mm3, %%mm4 \n\t"\
234 
235 #define REAL_WRITEBGR32(dst, dstw, index, b, g, r, a, q0, q2, q3, t) \
236  "movq "#b", "#q2" \n\t" /* B */\
237  "movq "#r", "#t" \n\t" /* R */\
238  "punpcklbw "#g", "#b" \n\t" /* GBGBGBGB 0 */\
239  "punpcklbw "#a", "#r" \n\t" /* ARARARAR 0 */\
240  "punpckhbw "#g", "#q2" \n\t" /* GBGBGBGB 2 */\
241  "punpckhbw "#a", "#t" \n\t" /* ARARARAR 2 */\
242  "movq "#b", "#q0" \n\t" /* GBGBGBGB 0 */\
243  "movq "#q2", "#q3" \n\t" /* GBGBGBGB 2 */\
244  "punpcklwd "#r", "#q0" \n\t" /* ARGBARGB 0 */\
245  "punpckhwd "#r", "#b" \n\t" /* ARGBARGB 1 */\
246  "punpcklwd "#t", "#q2" \n\t" /* ARGBARGB 2 */\
247  "punpckhwd "#t", "#q3" \n\t" /* ARGBARGB 3 */\
248 \
249  MOVNTQ( q0, (dst, index, 4))\
250  MOVNTQ( b, 8(dst, index, 4))\
251  MOVNTQ( q2, 16(dst, index, 4))\
252  MOVNTQ( q3, 24(dst, index, 4))\
253 \
254  "add $8, "#index" \n\t"\
255  "cmp "#dstw", "#index" \n\t"\
256  " jb 1b \n\t"
257 #define WRITEBGR32(dst, dstw, index, b, g, r, a, q0, q2, q3, t) REAL_WRITEBGR32(dst, dstw, index, b, g, r, a, q0, q2, q3, t)
258 
259 static void RENAME(yuv2rgb32_X_ar)(SwsContext *c, const int16_t *lumFilter,
260  const int16_t **lumSrc, int lumFilterSize,
261  const int16_t *chrFilter, const int16_t **chrUSrc,
262  const int16_t **chrVSrc,
263  int chrFilterSize, const int16_t **alpSrc,
264  uint8_t *dest, int dstW, int dstY)
265 {
266  x86_reg dummy=0;
267  x86_reg dstW_reg = dstW;
268  x86_reg uv_off = c->uv_off_byte;
269 
270  if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {
273  "movq %%mm2, "U_TEMP"(%0) \n\t"
274  "movq %%mm4, "V_TEMP"(%0) \n\t"
275  "movq %%mm5, "Y_TEMP"(%0) \n\t"
277  "movq "Y_TEMP"(%0), %%mm5 \n\t"
278  "psraw $3, %%mm1 \n\t"
279  "psraw $3, %%mm7 \n\t"
280  "packuswb %%mm7, %%mm1 \n\t"
281  WRITEBGR32(%4, %5, %%REGa, %%mm3, %%mm4, %%mm5, %%mm1, %%mm0, %%mm7, %%mm2, %%mm6)
283  } else {
286  "pcmpeqd %%mm7, %%mm7 \n\t"
287  WRITEBGR32(%4, %5, %%REGa, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
289  }
290 }
291 
292 static void RENAME(yuv2rgb32_X)(SwsContext *c, const int16_t *lumFilter,
293  const int16_t **lumSrc, int lumFilterSize,
294  const int16_t *chrFilter, const int16_t **chrUSrc,
295  const int16_t **chrVSrc,
296  int chrFilterSize, const int16_t **alpSrc,
297  uint8_t *dest, int dstW, int dstY)
298 {
299  x86_reg dummy=0;
300  x86_reg dstW_reg = dstW;
301  x86_reg uv_off = c->uv_off_byte;
302 
303  if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {
306  YSCALEYUV2PACKEDX_YA(ALP_MMX_FILTER_OFFSET, %%mm0, %%mm3, %%mm6, %%mm1, %%mm7)
307  "psraw $3, %%mm1 \n\t"
308  "psraw $3, %%mm7 \n\t"
309  "packuswb %%mm7, %%mm1 \n\t"
310  WRITEBGR32(%4, %5, %%REGa, %%mm2, %%mm4, %%mm5, %%mm1, %%mm0, %%mm7, %%mm3, %%mm6)
312  } else {
315  "pcmpeqd %%mm7, %%mm7 \n\t"
316  WRITEBGR32(%4, %5, %%REGa, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
318  }
319 }
320 
321 #define REAL_WRITERGB16(dst, dstw, index) \
322  "pand "MANGLE(bF8)", %%mm2 \n\t" /* B */\
323  "pand "MANGLE(bFC)", %%mm4 \n\t" /* G */\
324  "pand "MANGLE(bF8)", %%mm5 \n\t" /* R */\
325  "psrlq $3, %%mm2 \n\t"\
326 \
327  "movq %%mm2, %%mm1 \n\t"\
328  "movq %%mm4, %%mm3 \n\t"\
329 \
330  "punpcklbw %%mm7, %%mm3 \n\t"\
331  "punpcklbw %%mm5, %%mm2 \n\t"\
332  "punpckhbw %%mm7, %%mm4 \n\t"\
333  "punpckhbw %%mm5, %%mm1 \n\t"\
334 \
335  "psllq $3, %%mm3 \n\t"\
336  "psllq $3, %%mm4 \n\t"\
337 \
338  "por %%mm3, %%mm2 \n\t"\
339  "por %%mm4, %%mm1 \n\t"\
340 \
341  MOVNTQ(%%mm2, (dst, index, 2))\
342  MOVNTQ(%%mm1, 8(dst, index, 2))\
343 \
344  "add $8, "#index" \n\t"\
345  "cmp "#dstw", "#index" \n\t"\
346  " jb 1b \n\t"
347 #define WRITERGB16(dst, dstw, index) REAL_WRITERGB16(dst, dstw, index)
348 
349 static void RENAME(yuv2rgb565_X_ar)(SwsContext *c, const int16_t *lumFilter,
350  const int16_t **lumSrc, int lumFilterSize,
351  const int16_t *chrFilter, const int16_t **chrUSrc,
352  const int16_t **chrVSrc,
353  int chrFilterSize, const int16_t **alpSrc,
354  uint8_t *dest, int dstW, int dstY)
355 {
356  x86_reg dummy=0;
357  x86_reg dstW_reg = dstW;
358  x86_reg uv_off = c->uv_off_byte;
359 
362  "pxor %%mm7, %%mm7 \n\t"
363  /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
364 #ifdef DITHER1XBPP
365  "paddusb "BLUE_DITHER"(%0), %%mm2\n\t"
366  "paddusb "GREEN_DITHER"(%0), %%mm4\n\t"
367  "paddusb "RED_DITHER"(%0), %%mm5\n\t"
368 #endif
369  WRITERGB16(%4, %5, %%REGa)
371 }
372 
373 static void RENAME(yuv2rgb565_X)(SwsContext *c, const int16_t *lumFilter,
374  const int16_t **lumSrc, int lumFilterSize,
375  const int16_t *chrFilter, const int16_t **chrUSrc,
376  const int16_t **chrVSrc,
377  int chrFilterSize, const int16_t **alpSrc,
378  uint8_t *dest, int dstW, int dstY)
379 {
380  x86_reg dummy=0;
381  x86_reg dstW_reg = dstW;
382  x86_reg uv_off = c->uv_off_byte;
383 
386  "pxor %%mm7, %%mm7 \n\t"
387  /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
388 #ifdef DITHER1XBPP
389  "paddusb "BLUE_DITHER"(%0), %%mm2 \n\t"
390  "paddusb "GREEN_DITHER"(%0), %%mm4 \n\t"
391  "paddusb "RED_DITHER"(%0), %%mm5 \n\t"
392 #endif
393  WRITERGB16(%4, %5, %%REGa)
395 }
396 
397 #define REAL_WRITERGB15(dst, dstw, index) \
398  "pand "MANGLE(bF8)", %%mm2 \n\t" /* B */\
399  "pand "MANGLE(bF8)", %%mm4 \n\t" /* G */\
400  "pand "MANGLE(bF8)", %%mm5 \n\t" /* R */\
401  "psrlq $3, %%mm2 \n\t"\
402  "psrlq $1, %%mm5 \n\t"\
403 \
404  "movq %%mm2, %%mm1 \n\t"\
405  "movq %%mm4, %%mm3 \n\t"\
406 \
407  "punpcklbw %%mm7, %%mm3 \n\t"\
408  "punpcklbw %%mm5, %%mm2 \n\t"\
409  "punpckhbw %%mm7, %%mm4 \n\t"\
410  "punpckhbw %%mm5, %%mm1 \n\t"\
411 \
412  "psllq $2, %%mm3 \n\t"\
413  "psllq $2, %%mm4 \n\t"\
414 \
415  "por %%mm3, %%mm2 \n\t"\
416  "por %%mm4, %%mm1 \n\t"\
417 \
418  MOVNTQ(%%mm2, (dst, index, 2))\
419  MOVNTQ(%%mm1, 8(dst, index, 2))\
420 \
421  "add $8, "#index" \n\t"\
422  "cmp "#dstw", "#index" \n\t"\
423  " jb 1b \n\t"
424 #define WRITERGB15(dst, dstw, index) REAL_WRITERGB15(dst, dstw, index)
425 
426 static void RENAME(yuv2rgb555_X_ar)(SwsContext *c, const int16_t *lumFilter,
427  const int16_t **lumSrc, int lumFilterSize,
428  const int16_t *chrFilter, const int16_t **chrUSrc,
429  const int16_t **chrVSrc,
430  int chrFilterSize, const int16_t **alpSrc,
431  uint8_t *dest, int dstW, int dstY)
432 {
433  x86_reg dummy=0;
434  x86_reg dstW_reg = dstW;
435  x86_reg uv_off = c->uv_off_byte;
436 
439  "pxor %%mm7, %%mm7 \n\t"
440  /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
441 #ifdef DITHER1XBPP
442  "paddusb "BLUE_DITHER"(%0), %%mm2\n\t"
443  "paddusb "GREEN_DITHER"(%0), %%mm4\n\t"
444  "paddusb "RED_DITHER"(%0), %%mm5\n\t"
445 #endif
446  WRITERGB15(%4, %5, %%REGa)
448 }
449 
450 static void RENAME(yuv2rgb555_X)(SwsContext *c, const int16_t *lumFilter,
451  const int16_t **lumSrc, int lumFilterSize,
452  const int16_t *chrFilter, const int16_t **chrUSrc,
453  const int16_t **chrVSrc,
454  int chrFilterSize, const int16_t **alpSrc,
455  uint8_t *dest, int dstW, int dstY)
456 {
457  x86_reg dummy=0;
458  x86_reg dstW_reg = dstW;
459  x86_reg uv_off = c->uv_off_byte;
460 
463  "pxor %%mm7, %%mm7 \n\t"
464  /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
465 #ifdef DITHER1XBPP
466  "paddusb "BLUE_DITHER"(%0), %%mm2 \n\t"
467  "paddusb "GREEN_DITHER"(%0), %%mm4 \n\t"
468  "paddusb "RED_DITHER"(%0), %%mm5 \n\t"
469 #endif
470  WRITERGB15(%4, %5, %%REGa)
472 }
473 
474 #define WRITEBGR24MMX(dst, dstw, index) \
475  /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */\
476  "movq %%mm2, %%mm1 \n\t" /* B */\
477  "movq %%mm5, %%mm6 \n\t" /* R */\
478  "punpcklbw %%mm4, %%mm2 \n\t" /* GBGBGBGB 0 */\
479  "punpcklbw %%mm7, %%mm5 \n\t" /* 0R0R0R0R 0 */\
480  "punpckhbw %%mm4, %%mm1 \n\t" /* GBGBGBGB 2 */\
481  "punpckhbw %%mm7, %%mm6 \n\t" /* 0R0R0R0R 2 */\
482  "movq %%mm2, %%mm0 \n\t" /* GBGBGBGB 0 */\
483  "movq %%mm1, %%mm3 \n\t" /* GBGBGBGB 2 */\
484  "punpcklwd %%mm5, %%mm0 \n\t" /* 0RGB0RGB 0 */\
485  "punpckhwd %%mm5, %%mm2 \n\t" /* 0RGB0RGB 1 */\
486  "punpcklwd %%mm6, %%mm1 \n\t" /* 0RGB0RGB 2 */\
487  "punpckhwd %%mm6, %%mm3 \n\t" /* 0RGB0RGB 3 */\
488 \
489  "movq %%mm0, %%mm4 \n\t" /* 0RGB0RGB 0 */\
490  "movq %%mm2, %%mm6 \n\t" /* 0RGB0RGB 1 */\
491  "movq %%mm1, %%mm5 \n\t" /* 0RGB0RGB 2 */\
492  "movq %%mm3, %%mm7 \n\t" /* 0RGB0RGB 3 */\
493 \
494  "psllq $40, %%mm0 \n\t" /* RGB00000 0 */\
495  "psllq $40, %%mm2 \n\t" /* RGB00000 1 */\
496  "psllq $40, %%mm1 \n\t" /* RGB00000 2 */\
497  "psllq $40, %%mm3 \n\t" /* RGB00000 3 */\
498 \
499  "punpckhdq %%mm4, %%mm0 \n\t" /* 0RGBRGB0 0 */\
500  "punpckhdq %%mm6, %%mm2 \n\t" /* 0RGBRGB0 1 */\
501  "punpckhdq %%mm5, %%mm1 \n\t" /* 0RGBRGB0 2 */\
502  "punpckhdq %%mm7, %%mm3 \n\t" /* 0RGBRGB0 3 */\
503 \
504  "psrlq $8, %%mm0 \n\t" /* 00RGBRGB 0 */\
505  "movq %%mm2, %%mm6 \n\t" /* 0RGBRGB0 1 */\
506  "psllq $40, %%mm2 \n\t" /* GB000000 1 */\
507  "por %%mm2, %%mm0 \n\t" /* GBRGBRGB 0 */\
508  MOVNTQ(%%mm0, (dst))\
509 \
510  "psrlq $24, %%mm6 \n\t" /* 0000RGBR 1 */\
511  "movq %%mm1, %%mm5 \n\t" /* 0RGBRGB0 2 */\
512  "psllq $24, %%mm1 \n\t" /* BRGB0000 2 */\
513  "por %%mm1, %%mm6 \n\t" /* BRGBRGBR 1 */\
514  MOVNTQ(%%mm6, 8(dst))\
515 \
516  "psrlq $40, %%mm5 \n\t" /* 000000RG 2 */\
517  "psllq $8, %%mm3 \n\t" /* RGBRGB00 3 */\
518  "por %%mm3, %%mm5 \n\t" /* RGBRGBRG 2 */\
519  MOVNTQ(%%mm5, 16(dst))\
520 \
521  "add $24, "#dst" \n\t"\
522 \
523  "add $8, "#index" \n\t"\
524  "cmp "#dstw", "#index" \n\t"\
525  " jb 1b \n\t"
526 
527 #define WRITEBGR24MMXEXT(dst, dstw, index) \
528  /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */\
529  "movq "MANGLE(ff_M24A)", %%mm0 \n\t"\
530  "movq "MANGLE(ff_M24C)", %%mm7 \n\t"\
531  "pshufw $0x50, %%mm2, %%mm1 \n\t" /* B3 B2 B3 B2 B1 B0 B1 B0 */\
532  "pshufw $0x50, %%mm4, %%mm3 \n\t" /* G3 G2 G3 G2 G1 G0 G1 G0 */\
533  "pshufw $0x00, %%mm5, %%mm6 \n\t" /* R1 R0 R1 R0 R1 R0 R1 R0 */\
534 \
535  "pand %%mm0, %%mm1 \n\t" /* B2 B1 B0 */\
536  "pand %%mm0, %%mm3 \n\t" /* G2 G1 G0 */\
537  "pand %%mm7, %%mm6 \n\t" /* R1 R0 */\
538 \
539  "psllq $8, %%mm3 \n\t" /* G2 G1 G0 */\
540  "por %%mm1, %%mm6 \n\t"\
541  "por %%mm3, %%mm6 \n\t"\
542  MOVNTQ(%%mm6, (dst))\
543 \
544  "psrlq $8, %%mm4 \n\t" /* 00 G7 G6 G5 G4 G3 G2 G1 */\
545  "pshufw $0xA5, %%mm2, %%mm1 \n\t" /* B5 B4 B5 B4 B3 B2 B3 B2 */\
546  "pshufw $0x55, %%mm4, %%mm3 \n\t" /* G4 G3 G4 G3 G4 G3 G4 G3 */\
547  "pshufw $0xA5, %%mm5, %%mm6 \n\t" /* R5 R4 R5 R4 R3 R2 R3 R2 */\
548 \
549  "pand "MANGLE(ff_M24B)", %%mm1 \n\t" /* B5 B4 B3 */\
550  "pand %%mm7, %%mm3 \n\t" /* G4 G3 */\
551  "pand %%mm0, %%mm6 \n\t" /* R4 R3 R2 */\
552 \
553  "por %%mm1, %%mm3 \n\t" /* B5 G4 B4 G3 B3 */\
554  "por %%mm3, %%mm6 \n\t"\
555  MOVNTQ(%%mm6, 8(dst))\
556 \
557  "pshufw $0xFF, %%mm2, %%mm1 \n\t" /* B7 B6 B7 B6 B7 B6 B6 B7 */\
558  "pshufw $0xFA, %%mm4, %%mm3 \n\t" /* 00 G7 00 G7 G6 G5 G6 G5 */\
559  "pshufw $0xFA, %%mm5, %%mm6 \n\t" /* R7 R6 R7 R6 R5 R4 R5 R4 */\
560 \
561  "pand %%mm7, %%mm1 \n\t" /* B7 B6 */\
562  "pand %%mm0, %%mm3 \n\t" /* G7 G6 G5 */\
563  "pand "MANGLE(ff_M24B)", %%mm6 \n\t" /* R7 R6 R5 */\
564 \
565  "por %%mm1, %%mm3 \n\t"\
566  "por %%mm3, %%mm6 \n\t"\
567  MOVNTQ(%%mm6, 16(dst))\
568 \
569  "add $24, "#dst" \n\t"\
570 \
571  "add $8, "#index" \n\t"\
572  "cmp "#dstw", "#index" \n\t"\
573  " jb 1b \n\t"
574 
575 #if COMPILE_TEMPLATE_MMXEXT
576 #undef WRITEBGR24
577 #define WRITEBGR24(dst, dstw, index) WRITEBGR24MMXEXT(dst, dstw, index)
578 #else
579 #undef WRITEBGR24
580 #define WRITEBGR24(dst, dstw, index) WRITEBGR24MMX(dst, dstw, index)
581 #endif
582 
583 static void RENAME(yuv2bgr24_X_ar)(SwsContext *c, const int16_t *lumFilter,
584  const int16_t **lumSrc, int lumFilterSize,
585  const int16_t *chrFilter, const int16_t **chrUSrc,
586  const int16_t **chrVSrc,
587  int chrFilterSize, const int16_t **alpSrc,
588  uint8_t *dest, int dstW, int dstY)
589 {
590  x86_reg dummy=0;
591  x86_reg dstW_reg = dstW;
592  x86_reg uv_off = c->uv_off_byte;
593 
596  "pxor %%mm7, %%mm7 \n\t"
597  "lea (%%"REG_a", %%"REG_a", 2), %%"REG_c"\n\t" //FIXME optimize
598  "add %4, %%"REG_c" \n\t"
599  WRITEBGR24(%%REGc, %5, %%REGa)
600  :: "r" (&c->redDither),
601  "m" (dummy), "m" (dummy), "m" (dummy),
602  "r" (dest), "m" (dstW_reg), "m"(uv_off)
603  : "%"REG_a, "%"REG_c, "%"REG_d, "%"REG_S
604  );
605 }
606 
607 static void RENAME(yuv2bgr24_X)(SwsContext *c, const int16_t *lumFilter,
608  const int16_t **lumSrc, int lumFilterSize,
609  const int16_t *chrFilter, const int16_t **chrUSrc,
610  const int16_t **chrVSrc,
611  int chrFilterSize, const int16_t **alpSrc,
612  uint8_t *dest, int dstW, int dstY)
613 {
614  x86_reg dummy=0;
615  x86_reg dstW_reg = dstW;
616  x86_reg uv_off = c->uv_off_byte;
617 
620  "pxor %%mm7, %%mm7 \n\t"
621  "lea (%%"REG_a", %%"REG_a", 2), %%"REG_c" \n\t" //FIXME optimize
622  "add %4, %%"REG_c" \n\t"
623  WRITEBGR24(%%REGc, %5, %%REGa)
624  :: "r" (&c->redDither),
625  "m" (dummy), "m" (dummy), "m" (dummy),
626  "r" (dest), "m" (dstW_reg), "m"(uv_off)
627  : "%"REG_a, "%"REG_c, "%"REG_d, "%"REG_S
628  );
629 }
630 
631 #define REAL_WRITEYUY2(dst, dstw, index) \
632  "packuswb %%mm3, %%mm3 \n\t"\
633  "packuswb %%mm4, %%mm4 \n\t"\
634  "packuswb %%mm7, %%mm1 \n\t"\
635  "punpcklbw %%mm4, %%mm3 \n\t"\
636  "movq %%mm1, %%mm7 \n\t"\
637  "punpcklbw %%mm3, %%mm1 \n\t"\
638  "punpckhbw %%mm3, %%mm7 \n\t"\
639 \
640  MOVNTQ(%%mm1, (dst, index, 2))\
641  MOVNTQ(%%mm7, 8(dst, index, 2))\
642 \
643  "add $8, "#index" \n\t"\
644  "cmp "#dstw", "#index" \n\t"\
645  " jb 1b \n\t"
646 #define WRITEYUY2(dst, dstw, index) REAL_WRITEYUY2(dst, dstw, index)
647 
648 static void RENAME(yuv2yuyv422_X_ar)(SwsContext *c, const int16_t *lumFilter,
649  const int16_t **lumSrc, int lumFilterSize,
650  const int16_t *chrFilter, const int16_t **chrUSrc,
651  const int16_t **chrVSrc,
652  int chrFilterSize, const int16_t **alpSrc,
653  uint8_t *dest, int dstW, int dstY)
654 {
655  x86_reg dummy=0;
656  x86_reg dstW_reg = dstW;
657  x86_reg uv_off = c->uv_off_byte;
658 
660  /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
661  "psraw $3, %%mm3 \n\t"
662  "psraw $3, %%mm4 \n\t"
663  "psraw $3, %%mm1 \n\t"
664  "psraw $3, %%mm7 \n\t"
665  WRITEYUY2(%4, %5, %%REGa)
667 }
668 
669 static void RENAME(yuv2yuyv422_X)(SwsContext *c, const int16_t *lumFilter,
670  const int16_t **lumSrc, int lumFilterSize,
671  const int16_t *chrFilter, const int16_t **chrUSrc,
672  const int16_t **chrVSrc,
673  int chrFilterSize, const int16_t **alpSrc,
674  uint8_t *dest, int dstW, int dstY)
675 {
676  x86_reg dummy=0;
677  x86_reg dstW_reg = dstW;
678  x86_reg uv_off = c->uv_off_byte;
679 
681  /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
682  "psraw $3, %%mm3 \n\t"
683  "psraw $3, %%mm4 \n\t"
684  "psraw $3, %%mm1 \n\t"
685  "psraw $3, %%mm7 \n\t"
686  WRITEYUY2(%4, %5, %%REGa)
688 }
689 
690 #define REAL_YSCALEYUV2RGB_UV(index, c) \
691  "xor "#index", "#index" \n\t"\
692  ".p2align 4 \n\t"\
693  "1: \n\t"\
694  "movq (%2, "#index"), %%mm2 \n\t" /* uvbuf0[eax]*/\
695  "movq (%3, "#index"), %%mm3 \n\t" /* uvbuf1[eax]*/\
696  "add "UV_OFF_BYTE"("#c"), "#index" \n\t" \
697  "movq (%2, "#index"), %%mm5 \n\t" /* uvbuf0[eax+2048]*/\
698  "movq (%3, "#index"), %%mm4 \n\t" /* uvbuf1[eax+2048]*/\
699  "sub "UV_OFF_BYTE"("#c"), "#index" \n\t" \
700  "psubw %%mm3, %%mm2 \n\t" /* uvbuf0[eax] - uvbuf1[eax]*/\
701  "psubw %%mm4, %%mm5 \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048]*/\
702  "movq "CHR_MMX_FILTER_OFFSET"+8("#c"), %%mm0 \n\t"\
703  "pmulhw %%mm0, %%mm2 \n\t" /* (uvbuf0[eax] - uvbuf1[eax])uvalpha1>>16*/\
704  "pmulhw %%mm0, %%mm5 \n\t" /* (uvbuf0[eax+2048] - uvbuf1[eax+2048])uvalpha1>>16*/\
705  "psraw $4, %%mm3 \n\t" /* uvbuf0[eax] - uvbuf1[eax] >>4*/\
706  "psraw $4, %%mm4 \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048] >>4*/\
707  "paddw %%mm2, %%mm3 \n\t" /* uvbuf0[eax]uvalpha1 - uvbuf1[eax](1-uvalpha1)*/\
708  "paddw %%mm5, %%mm4 \n\t" /* uvbuf0[eax+2048]uvalpha1 - uvbuf1[eax+2048](1-uvalpha1)*/\
709  "psubw "U_OFFSET"("#c"), %%mm3 \n\t" /* (U-128)8*/\
710  "psubw "V_OFFSET"("#c"), %%mm4 \n\t" /* (V-128)8*/\
711  "movq %%mm3, %%mm2 \n\t" /* (U-128)8*/\
712  "movq %%mm4, %%mm5 \n\t" /* (V-128)8*/\
713  "pmulhw "UG_COEFF"("#c"), %%mm3 \n\t"\
714  "pmulhw "VG_COEFF"("#c"), %%mm4 \n\t"\
715  /* mm2=(U-128)8, mm3=ug, mm4=vg mm5=(V-128)8 */\
716 
717 #define REAL_YSCALEYUV2RGB_YA(index, c, b1, b2) \
718  "movq ("#b1", "#index", 2), %%mm0 \n\t" /*buf0[eax]*/\
719  "movq ("#b2", "#index", 2), %%mm1 \n\t" /*buf1[eax]*/\
720  "movq 8("#b1", "#index", 2), %%mm6 \n\t" /*buf0[eax]*/\
721  "movq 8("#b2", "#index", 2), %%mm7 \n\t" /*buf1[eax]*/\
722  "psubw %%mm1, %%mm0 \n\t" /* buf0[eax] - buf1[eax]*/\
723  "psubw %%mm7, %%mm6 \n\t" /* buf0[eax] - buf1[eax]*/\
724  "pmulhw "LUM_MMX_FILTER_OFFSET"+8("#c"), %%mm0 \n\t" /* (buf0[eax] - buf1[eax])yalpha1>>16*/\
725  "pmulhw "LUM_MMX_FILTER_OFFSET"+8("#c"), %%mm6 \n\t" /* (buf0[eax] - buf1[eax])yalpha1>>16*/\
726  "psraw $4, %%mm1 \n\t" /* buf0[eax] - buf1[eax] >>4*/\
727  "psraw $4, %%mm7 \n\t" /* buf0[eax] - buf1[eax] >>4*/\
728  "paddw %%mm0, %%mm1 \n\t" /* buf0[eax]yalpha1 + buf1[eax](1-yalpha1) >>16*/\
729  "paddw %%mm6, %%mm7 \n\t" /* buf0[eax]yalpha1 + buf1[eax](1-yalpha1) >>16*/\
730 
731 #define REAL_YSCALEYUV2RGB_COEFF(c) \
732  "pmulhw "UB_COEFF"("#c"), %%mm2 \n\t"\
733  "pmulhw "VR_COEFF"("#c"), %%mm5 \n\t"\
734  "psubw "Y_OFFSET"("#c"), %%mm1 \n\t" /* 8(Y-16)*/\
735  "psubw "Y_OFFSET"("#c"), %%mm7 \n\t" /* 8(Y-16)*/\
736  "pmulhw "Y_COEFF"("#c"), %%mm1 \n\t"\
737  "pmulhw "Y_COEFF"("#c"), %%mm7 \n\t"\
738  /* mm1= Y1, mm2=ub, mm3=ug, mm4=vg mm5=vr, mm7=Y2 */\
739  "paddw %%mm3, %%mm4 \n\t"\
740  "movq %%mm2, %%mm0 \n\t"\
741  "movq %%mm5, %%mm6 \n\t"\
742  "movq %%mm4, %%mm3 \n\t"\
743  "punpcklwd %%mm2, %%mm2 \n\t"\
744  "punpcklwd %%mm5, %%mm5 \n\t"\
745  "punpcklwd %%mm4, %%mm4 \n\t"\
746  "paddw %%mm1, %%mm2 \n\t"\
747  "paddw %%mm1, %%mm5 \n\t"\
748  "paddw %%mm1, %%mm4 \n\t"\
749  "punpckhwd %%mm0, %%mm0 \n\t"\
750  "punpckhwd %%mm6, %%mm6 \n\t"\
751  "punpckhwd %%mm3, %%mm3 \n\t"\
752  "paddw %%mm7, %%mm0 \n\t"\
753  "paddw %%mm7, %%mm6 \n\t"\
754  "paddw %%mm7, %%mm3 \n\t"\
755  /* mm0=B1, mm2=B2, mm3=G2, mm4=G1, mm5=R1, mm6=R2 */\
756  "packuswb %%mm0, %%mm2 \n\t"\
757  "packuswb %%mm6, %%mm5 \n\t"\
758  "packuswb %%mm3, %%mm4 \n\t"\
759 
760 #define YSCALEYUV2RGB_YA(index, c, b1, b2) REAL_YSCALEYUV2RGB_YA(index, c, b1, b2)
761 
762 #define YSCALEYUV2RGB(index, c) \
763  REAL_YSCALEYUV2RGB_UV(index, c) \
764  REAL_YSCALEYUV2RGB_YA(index, c, %0, %1) \
765  REAL_YSCALEYUV2RGB_COEFF(c)
766 
770 static void RENAME(yuv2rgb32_2)(SwsContext *c, const int16_t *buf[2],
771  const int16_t *ubuf[2], const int16_t *vbuf[2],
772  const int16_t *abuf[2], uint8_t *dest,
773  int dstW, int yalpha, int uvalpha, int y)
774 {
775  const int16_t *buf0 = buf[0], *buf1 = buf[1],
776  *ubuf0 = ubuf[0], *ubuf1 = ubuf[1];
777 
778  if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {
779  const int16_t *abuf0 = abuf[0], *abuf1 = abuf[1];
780 #if ARCH_X86_64
781  __asm__ volatile(
782  YSCALEYUV2RGB(%%r8, %5)
783  YSCALEYUV2RGB_YA(%%r8, %5, %6, %7)
784  "psraw $3, %%mm1 \n\t" /* abuf0[eax] - abuf1[eax] >>7*/
785  "psraw $3, %%mm7 \n\t" /* abuf0[eax] - abuf1[eax] >>7*/
786  "packuswb %%mm7, %%mm1 \n\t"
787  WRITEBGR32(%4, 8280(%5), %%r8, %%mm2, %%mm4, %%mm5, %%mm1, %%mm0, %%mm7, %%mm3, %%mm6)
788  :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "r" (dest),
789  "a" (&c->redDither),
790  "r" (abuf0), "r" (abuf1)
791  : "%r8"
792  );
793 #else
794  *(const uint16_t **)(&c->u_temp)=abuf0;
795  *(const uint16_t **)(&c->v_temp)=abuf1;
796  __asm__ volatile(
797  "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
798  "mov %4, %%"REG_b" \n\t"
799  "push %%"REG_BP" \n\t"
800  YSCALEYUV2RGB(%%REGBP, %5)
801  "push %0 \n\t"
802  "push %1 \n\t"
803  "mov "U_TEMP"(%5), %0 \n\t"
804  "mov "V_TEMP"(%5), %1 \n\t"
805  YSCALEYUV2RGB_YA(%%REGBP, %5, %0, %1)
806  "psraw $3, %%mm1 \n\t" /* abuf0[eax] - abuf1[eax] >>7*/
807  "psraw $3, %%mm7 \n\t" /* abuf0[eax] - abuf1[eax] >>7*/
808  "packuswb %%mm7, %%mm1 \n\t"
809  "pop %1 \n\t"
810  "pop %0 \n\t"
811  WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm1, %%mm0, %%mm7, %%mm3, %%mm6)
812  "pop %%"REG_BP" \n\t"
813  "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
814  :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
815  "a" (&c->redDither)
816  );
817 #endif
818  } else {
819  __asm__ volatile(
820  "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
821  "mov %4, %%"REG_b" \n\t"
822  "push %%"REG_BP" \n\t"
823  YSCALEYUV2RGB(%%REGBP, %5)
824  "pcmpeqd %%mm7, %%mm7 \n\t"
825  WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
826  "pop %%"REG_BP" \n\t"
827  "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
828  :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
829  "a" (&c->redDither)
830  );
831  }
832 }
833 
834 static void RENAME(yuv2bgr24_2)(SwsContext *c, const int16_t *buf[2],
835  const int16_t *ubuf[2], const int16_t *vbuf[2],
836  const int16_t *abuf[2], uint8_t *dest,
837  int dstW, int yalpha, int uvalpha, int y)
838 {
839  const int16_t *buf0 = buf[0], *buf1 = buf[1],
840  *ubuf0 = ubuf[0], *ubuf1 = ubuf[1];
841 
842  //Note 8280 == DSTW_OFFSET but the preprocessor can't handle that there :(
843  __asm__ volatile(
844  "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
845  "mov %4, %%"REG_b" \n\t"
846  "push %%"REG_BP" \n\t"
847  YSCALEYUV2RGB(%%REGBP, %5)
848  "pxor %%mm7, %%mm7 \n\t"
849  WRITEBGR24(%%REGb, 8280(%5), %%REGBP)
850  "pop %%"REG_BP" \n\t"
851  "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
852  :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
853  "a" (&c->redDither)
854  );
855 }
856 
857 static void RENAME(yuv2rgb555_2)(SwsContext *c, const int16_t *buf[2],
858  const int16_t *ubuf[2], const int16_t *vbuf[2],
859  const int16_t *abuf[2], uint8_t *dest,
860  int dstW, int yalpha, int uvalpha, int y)
861 {
862  const int16_t *buf0 = buf[0], *buf1 = buf[1],
863  *ubuf0 = ubuf[0], *ubuf1 = ubuf[1];
864 
865  //Note 8280 == DSTW_OFFSET but the preprocessor can't handle that there :(
866  __asm__ volatile(
867  "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
868  "mov %4, %%"REG_b" \n\t"
869  "push %%"REG_BP" \n\t"
870  YSCALEYUV2RGB(%%REGBP, %5)
871  "pxor %%mm7, %%mm7 \n\t"
872  /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
873 #ifdef DITHER1XBPP
874  "paddusb "BLUE_DITHER"(%5), %%mm2 \n\t"
875  "paddusb "GREEN_DITHER"(%5), %%mm4 \n\t"
876  "paddusb "RED_DITHER"(%5), %%mm5 \n\t"
877 #endif
878  WRITERGB15(%%REGb, 8280(%5), %%REGBP)
879  "pop %%"REG_BP" \n\t"
880  "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
881  :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
882  "a" (&c->redDither)
883  );
884 }
885 
886 static void RENAME(yuv2rgb565_2)(SwsContext *c, const int16_t *buf[2],
887  const int16_t *ubuf[2], const int16_t *vbuf[2],
888  const int16_t *abuf[2], uint8_t *dest,
889  int dstW, int yalpha, int uvalpha, int y)
890 {
891  const int16_t *buf0 = buf[0], *buf1 = buf[1],
892  *ubuf0 = ubuf[0], *ubuf1 = ubuf[1];
893 
894  //Note 8280 == DSTW_OFFSET but the preprocessor can't handle that there :(
895  __asm__ volatile(
896  "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
897  "mov %4, %%"REG_b" \n\t"
898  "push %%"REG_BP" \n\t"
899  YSCALEYUV2RGB(%%REGBP, %5)
900  "pxor %%mm7, %%mm7 \n\t"
901  /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
902 #ifdef DITHER1XBPP
903  "paddusb "BLUE_DITHER"(%5), %%mm2 \n\t"
904  "paddusb "GREEN_DITHER"(%5), %%mm4 \n\t"
905  "paddusb "RED_DITHER"(%5), %%mm5 \n\t"
906 #endif
907  WRITERGB16(%%REGb, 8280(%5), %%REGBP)
908  "pop %%"REG_BP" \n\t"
909  "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
910  :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
911  "a" (&c->redDither)
912  );
913 }
914 
915 #define REAL_YSCALEYUV2PACKED(index, c) \
916  "movq "CHR_MMX_FILTER_OFFSET"+8("#c"), %%mm0 \n\t"\
917  "movq "LUM_MMX_FILTER_OFFSET"+8("#c"), %%mm1 \n\t"\
918  "psraw $3, %%mm0 \n\t"\
919  "psraw $3, %%mm1 \n\t"\
920  "movq %%mm0, "CHR_MMX_FILTER_OFFSET"+8("#c") \n\t"\
921  "movq %%mm1, "LUM_MMX_FILTER_OFFSET"+8("#c") \n\t"\
922  "xor "#index", "#index" \n\t"\
923  ".p2align 4 \n\t"\
924  "1: \n\t"\
925  "movq (%2, "#index"), %%mm2 \n\t" /* uvbuf0[eax]*/\
926  "movq (%3, "#index"), %%mm3 \n\t" /* uvbuf1[eax]*/\
927  "add "UV_OFF_BYTE"("#c"), "#index" \n\t" \
928  "movq (%2, "#index"), %%mm5 \n\t" /* uvbuf0[eax+2048]*/\
929  "movq (%3, "#index"), %%mm4 \n\t" /* uvbuf1[eax+2048]*/\
930  "sub "UV_OFF_BYTE"("#c"), "#index" \n\t" \
931  "psubw %%mm3, %%mm2 \n\t" /* uvbuf0[eax] - uvbuf1[eax]*/\
932  "psubw %%mm4, %%mm5 \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048]*/\
933  "movq "CHR_MMX_FILTER_OFFSET"+8("#c"), %%mm0 \n\t"\
934  "pmulhw %%mm0, %%mm2 \n\t" /* (uvbuf0[eax] - uvbuf1[eax])uvalpha1>>16*/\
935  "pmulhw %%mm0, %%mm5 \n\t" /* (uvbuf0[eax+2048] - uvbuf1[eax+2048])uvalpha1>>16*/\
936  "psraw $7, %%mm3 \n\t" /* uvbuf0[eax] - uvbuf1[eax] >>4*/\
937  "psraw $7, %%mm4 \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048] >>4*/\
938  "paddw %%mm2, %%mm3 \n\t" /* uvbuf0[eax]uvalpha1 - uvbuf1[eax](1-uvalpha1)*/\
939  "paddw %%mm5, %%mm4 \n\t" /* uvbuf0[eax+2048]uvalpha1 - uvbuf1[eax+2048](1-uvalpha1)*/\
940  "movq (%0, "#index", 2), %%mm0 \n\t" /*buf0[eax]*/\
941  "movq (%1, "#index", 2), %%mm1 \n\t" /*buf1[eax]*/\
942  "movq 8(%0, "#index", 2), %%mm6 \n\t" /*buf0[eax]*/\
943  "movq 8(%1, "#index", 2), %%mm7 \n\t" /*buf1[eax]*/\
944  "psubw %%mm1, %%mm0 \n\t" /* buf0[eax] - buf1[eax]*/\
945  "psubw %%mm7, %%mm6 \n\t" /* buf0[eax] - buf1[eax]*/\
946  "pmulhw "LUM_MMX_FILTER_OFFSET"+8("#c"), %%mm0 \n\t" /* (buf0[eax] - buf1[eax])yalpha1>>16*/\
947  "pmulhw "LUM_MMX_FILTER_OFFSET"+8("#c"), %%mm6 \n\t" /* (buf0[eax] - buf1[eax])yalpha1>>16*/\
948  "psraw $7, %%mm1 \n\t" /* buf0[eax] - buf1[eax] >>4*/\
949  "psraw $7, %%mm7 \n\t" /* buf0[eax] - buf1[eax] >>4*/\
950  "paddw %%mm0, %%mm1 \n\t" /* buf0[eax]yalpha1 + buf1[eax](1-yalpha1) >>16*/\
951  "paddw %%mm6, %%mm7 \n\t" /* buf0[eax]yalpha1 + buf1[eax](1-yalpha1) >>16*/\
952 
953 #define YSCALEYUV2PACKED(index, c) REAL_YSCALEYUV2PACKED(index, c)
954 
955 static void RENAME(yuv2yuyv422_2)(SwsContext *c, const int16_t *buf[2],
956  const int16_t *ubuf[2], const int16_t *vbuf[2],
957  const int16_t *abuf[2], uint8_t *dest,
958  int dstW, int yalpha, int uvalpha, int y)
959 {
960  const int16_t *buf0 = buf[0], *buf1 = buf[1],
961  *ubuf0 = ubuf[0], *ubuf1 = ubuf[1];
962 
963  //Note 8280 == DSTW_OFFSET but the preprocessor can't handle that there :(
964  __asm__ volatile(
965  "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
966  "mov %4, %%"REG_b" \n\t"
967  "push %%"REG_BP" \n\t"
968  YSCALEYUV2PACKED(%%REGBP, %5)
969  WRITEYUY2(%%REGb, 8280(%5), %%REGBP)
970  "pop %%"REG_BP" \n\t"
971  "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
972  :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
973  "a" (&c->redDither)
974  );
975 }
976 
977 #define REAL_YSCALEYUV2RGB1(index, c) \
978  "xor "#index", "#index" \n\t"\
979  ".p2align 4 \n\t"\
980  "1: \n\t"\
981  "movq (%2, "#index"), %%mm3 \n\t" /* uvbuf0[eax]*/\
982  "add "UV_OFF_BYTE"("#c"), "#index" \n\t" \
983  "movq (%2, "#index"), %%mm4 \n\t" /* uvbuf0[eax+2048]*/\
984  "sub "UV_OFF_BYTE"("#c"), "#index" \n\t" \
985  "psraw $4, %%mm3 \n\t" /* uvbuf0[eax] - uvbuf1[eax] >>4*/\
986  "psraw $4, %%mm4 \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048] >>4*/\
987  "psubw "U_OFFSET"("#c"), %%mm3 \n\t" /* (U-128)8*/\
988  "psubw "V_OFFSET"("#c"), %%mm4 \n\t" /* (V-128)8*/\
989  "movq %%mm3, %%mm2 \n\t" /* (U-128)8*/\
990  "movq %%mm4, %%mm5 \n\t" /* (V-128)8*/\
991  "pmulhw "UG_COEFF"("#c"), %%mm3 \n\t"\
992  "pmulhw "VG_COEFF"("#c"), %%mm4 \n\t"\
993  /* mm2=(U-128)8, mm3=ug, mm4=vg mm5=(V-128)8 */\
994  "movq (%0, "#index", 2), %%mm1 \n\t" /*buf0[eax]*/\
995  "movq 8(%0, "#index", 2), %%mm7 \n\t" /*buf0[eax]*/\
996  "psraw $4, %%mm1 \n\t" /* buf0[eax] - buf1[eax] >>4*/\
997  "psraw $4, %%mm7 \n\t" /* buf0[eax] - buf1[eax] >>4*/\
998  "pmulhw "UB_COEFF"("#c"), %%mm2 \n\t"\
999  "pmulhw "VR_COEFF"("#c"), %%mm5 \n\t"\
1000  "psubw "Y_OFFSET"("#c"), %%mm1 \n\t" /* 8(Y-16)*/\
1001  "psubw "Y_OFFSET"("#c"), %%mm7 \n\t" /* 8(Y-16)*/\
1002  "pmulhw "Y_COEFF"("#c"), %%mm1 \n\t"\
1003  "pmulhw "Y_COEFF"("#c"), %%mm7 \n\t"\
1004  /* mm1= Y1, mm2=ub, mm3=ug, mm4=vg mm5=vr, mm7=Y2 */\
1005  "paddw %%mm3, %%mm4 \n\t"\
1006  "movq %%mm2, %%mm0 \n\t"\
1007  "movq %%mm5, %%mm6 \n\t"\
1008  "movq %%mm4, %%mm3 \n\t"\
1009  "punpcklwd %%mm2, %%mm2 \n\t"\
1010  "punpcklwd %%mm5, %%mm5 \n\t"\
1011  "punpcklwd %%mm4, %%mm4 \n\t"\
1012  "paddw %%mm1, %%mm2 \n\t"\
1013  "paddw %%mm1, %%mm5 \n\t"\
1014  "paddw %%mm1, %%mm4 \n\t"\
1015  "punpckhwd %%mm0, %%mm0 \n\t"\
1016  "punpckhwd %%mm6, %%mm6 \n\t"\
1017  "punpckhwd %%mm3, %%mm3 \n\t"\
1018  "paddw %%mm7, %%mm0 \n\t"\
1019  "paddw %%mm7, %%mm6 \n\t"\
1020  "paddw %%mm7, %%mm3 \n\t"\
1021  /* mm0=B1, mm2=B2, mm3=G2, mm4=G1, mm5=R1, mm6=R2 */\
1022  "packuswb %%mm0, %%mm2 \n\t"\
1023  "packuswb %%mm6, %%mm5 \n\t"\
1024  "packuswb %%mm3, %%mm4 \n\t"\
1025 
1026 #define YSCALEYUV2RGB1(index, c) REAL_YSCALEYUV2RGB1(index, c)
1027 
1028 // do vertical chrominance interpolation
1029 #define REAL_YSCALEYUV2RGB1b(index, c) \
1030  "xor "#index", "#index" \n\t"\
1031  ".p2align 4 \n\t"\
1032  "1: \n\t"\
1033  "movq (%2, "#index"), %%mm2 \n\t" /* uvbuf0[eax]*/\
1034  "movq (%3, "#index"), %%mm3 \n\t" /* uvbuf1[eax]*/\
1035  "add "UV_OFF_BYTE"("#c"), "#index" \n\t" \
1036  "movq (%2, "#index"), %%mm5 \n\t" /* uvbuf0[eax+2048]*/\
1037  "movq (%3, "#index"), %%mm4 \n\t" /* uvbuf1[eax+2048]*/\
1038  "sub "UV_OFF_BYTE"("#c"), "#index" \n\t" \
1039  "paddw %%mm2, %%mm3 \n\t" /* uvbuf0[eax] + uvbuf1[eax]*/\
1040  "paddw %%mm5, %%mm4 \n\t" /* uvbuf0[eax+2048] + uvbuf1[eax+2048]*/\
1041  "psrlw $5, %%mm3 \n\t" /*FIXME might overflow*/\
1042  "psrlw $5, %%mm4 \n\t" /*FIXME might overflow*/\
1043  "psubw "U_OFFSET"("#c"), %%mm3 \n\t" /* (U-128)8*/\
1044  "psubw "V_OFFSET"("#c"), %%mm4 \n\t" /* (V-128)8*/\
1045  "movq %%mm3, %%mm2 \n\t" /* (U-128)8*/\
1046  "movq %%mm4, %%mm5 \n\t" /* (V-128)8*/\
1047  "pmulhw "UG_COEFF"("#c"), %%mm3 \n\t"\
1048  "pmulhw "VG_COEFF"("#c"), %%mm4 \n\t"\
1049  /* mm2=(U-128)8, mm3=ug, mm4=vg mm5=(V-128)8 */\
1050  "movq (%0, "#index", 2), %%mm1 \n\t" /*buf0[eax]*/\
1051  "movq 8(%0, "#index", 2), %%mm7 \n\t" /*buf0[eax]*/\
1052  "psraw $4, %%mm1 \n\t" /* buf0[eax] - buf1[eax] >>4*/\
1053  "psraw $4, %%mm7 \n\t" /* buf0[eax] - buf1[eax] >>4*/\
1054  "pmulhw "UB_COEFF"("#c"), %%mm2 \n\t"\
1055  "pmulhw "VR_COEFF"("#c"), %%mm5 \n\t"\
1056  "psubw "Y_OFFSET"("#c"), %%mm1 \n\t" /* 8(Y-16)*/\
1057  "psubw "Y_OFFSET"("#c"), %%mm7 \n\t" /* 8(Y-16)*/\
1058  "pmulhw "Y_COEFF"("#c"), %%mm1 \n\t"\
1059  "pmulhw "Y_COEFF"("#c"), %%mm7 \n\t"\
1060  /* mm1= Y1, mm2=ub, mm3=ug, mm4=vg mm5=vr, mm7=Y2 */\
1061  "paddw %%mm3, %%mm4 \n\t"\
1062  "movq %%mm2, %%mm0 \n\t"\
1063  "movq %%mm5, %%mm6 \n\t"\
1064  "movq %%mm4, %%mm3 \n\t"\
1065  "punpcklwd %%mm2, %%mm2 \n\t"\
1066  "punpcklwd %%mm5, %%mm5 \n\t"\
1067  "punpcklwd %%mm4, %%mm4 \n\t"\
1068  "paddw %%mm1, %%mm2 \n\t"\
1069  "paddw %%mm1, %%mm5 \n\t"\
1070  "paddw %%mm1, %%mm4 \n\t"\
1071  "punpckhwd %%mm0, %%mm0 \n\t"\
1072  "punpckhwd %%mm6, %%mm6 \n\t"\
1073  "punpckhwd %%mm3, %%mm3 \n\t"\
1074  "paddw %%mm7, %%mm0 \n\t"\
1075  "paddw %%mm7, %%mm6 \n\t"\
1076  "paddw %%mm7, %%mm3 \n\t"\
1077  /* mm0=B1, mm2=B2, mm3=G2, mm4=G1, mm5=R1, mm6=R2 */\
1078  "packuswb %%mm0, %%mm2 \n\t"\
1079  "packuswb %%mm6, %%mm5 \n\t"\
1080  "packuswb %%mm3, %%mm4 \n\t"\
1081 
1082 #define YSCALEYUV2RGB1b(index, c) REAL_YSCALEYUV2RGB1b(index, c)
1083 
1084 #define REAL_YSCALEYUV2RGB1_ALPHA(index) \
1085  "movq (%1, "#index", 2), %%mm7 \n\t" /* abuf0[index ] */\
1086  "movq 8(%1, "#index", 2), %%mm1 \n\t" /* abuf0[index+4] */\
1087  "psraw $7, %%mm7 \n\t" /* abuf0[index ] >>7 */\
1088  "psraw $7, %%mm1 \n\t" /* abuf0[index+4] >>7 */\
1089  "packuswb %%mm1, %%mm7 \n\t"
1090 #define YSCALEYUV2RGB1_ALPHA(index) REAL_YSCALEYUV2RGB1_ALPHA(index)
1091 
1095 static void RENAME(yuv2rgb32_1)(SwsContext *c, const int16_t *buf0,
1096  const int16_t *ubuf[2], const int16_t *vbuf[2],
1097  const int16_t *abuf0, uint8_t *dest,
1098  int dstW, int uvalpha, int y)
1099 {
1100  const int16_t *ubuf0 = ubuf[0];
1101  const int16_t *buf1= buf0; //FIXME needed for RGB1/BGR1
1102 
1103  if (uvalpha < 2048) { // note this is not correct (shifts chrominance by 0.5 pixels) but it is a bit faster
1104  const int16_t *ubuf1 = ubuf[0];
1105  if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {
1106  __asm__ volatile(
1107  "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
1108  "mov %4, %%"REG_b" \n\t"
1109  "push %%"REG_BP" \n\t"
1110  YSCALEYUV2RGB1(%%REGBP, %5)
1111  YSCALEYUV2RGB1_ALPHA(%%REGBP)
1112  WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
1113  "pop %%"REG_BP" \n\t"
1114  "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
1115  :: "c" (buf0), "d" (abuf0), "S" (ubuf0), "D" (ubuf1), "m" (dest),
1116  "a" (&c->redDither)
1117  );
1118  } else {
1119  __asm__ volatile(
1120  "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
1121  "mov %4, %%"REG_b" \n\t"
1122  "push %%"REG_BP" \n\t"
1123  YSCALEYUV2RGB1(%%REGBP, %5)
1124  "pcmpeqd %%mm7, %%mm7 \n\t"
1125  WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
1126  "pop %%"REG_BP" \n\t"
1127  "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
1128  :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
1129  "a" (&c->redDither)
1130  );
1131  }
1132  } else {
1133  const int16_t *ubuf1 = ubuf[1];
1134  if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {
1135  __asm__ volatile(
1136  "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
1137  "mov %4, %%"REG_b" \n\t"
1138  "push %%"REG_BP" \n\t"
1139  YSCALEYUV2RGB1b(%%REGBP, %5)
1140  YSCALEYUV2RGB1_ALPHA(%%REGBP)
1141  WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
1142  "pop %%"REG_BP" \n\t"
1143  "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
1144  :: "c" (buf0), "d" (abuf0), "S" (ubuf0), "D" (ubuf1), "m" (dest),
1145  "a" (&c->redDither)
1146  );
1147  } else {
1148  __asm__ volatile(
1149  "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
1150  "mov %4, %%"REG_b" \n\t"
1151  "push %%"REG_BP" \n\t"
1152  YSCALEYUV2RGB1b(%%REGBP, %5)
1153  "pcmpeqd %%mm7, %%mm7 \n\t"
1154  WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
1155  "pop %%"REG_BP" \n\t"
1156  "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
1157  :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
1158  "a" (&c->redDither)
1159  );
1160  }
1161  }
1162 }
1163 
1164 static void RENAME(yuv2bgr24_1)(SwsContext *c, const int16_t *buf0,
1165  const int16_t *ubuf[2], const int16_t *vbuf[2],
1166  const int16_t *abuf0, uint8_t *dest,
1167  int dstW, int uvalpha, int y)
1168 {
1169  const int16_t *ubuf0 = ubuf[0];
1170  const int16_t *buf1= buf0; //FIXME needed for RGB1/BGR1
1171 
1172  if (uvalpha < 2048) { // note this is not correct (shifts chrominance by 0.5 pixels) but it is a bit faster
1173  const int16_t *ubuf1 = ubuf[0];
1174  __asm__ volatile(
1175  "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
1176  "mov %4, %%"REG_b" \n\t"
1177  "push %%"REG_BP" \n\t"
1178  YSCALEYUV2RGB1(%%REGBP, %5)
1179  "pxor %%mm7, %%mm7 \n\t"
1180  WRITEBGR24(%%REGb, 8280(%5), %%REGBP)
1181  "pop %%"REG_BP" \n\t"
1182  "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
1183  :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
1184  "a" (&c->redDither)
1185  );
1186  } else {
1187  const int16_t *ubuf1 = ubuf[1];
1188  __asm__ volatile(
1189  "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
1190  "mov %4, %%"REG_b" \n\t"
1191  "push %%"REG_BP" \n\t"
1192  YSCALEYUV2RGB1b(%%REGBP, %5)
1193  "pxor %%mm7, %%mm7 \n\t"
1194  WRITEBGR24(%%REGb, 8280(%5), %%REGBP)
1195  "pop %%"REG_BP" \n\t"
1196  "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
1197  :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
1198  "a" (&c->redDither)
1199  );
1200  }
1201 }
1202 
1203 static void RENAME(yuv2rgb555_1)(SwsContext *c, const int16_t *buf0,
1204  const int16_t *ubuf[2], const int16_t *vbuf[2],
1205  const int16_t *abuf0, uint8_t *dest,
1206  int dstW, int uvalpha, int y)
1207 {
1208  const int16_t *ubuf0 = ubuf[0];
1209  const int16_t *buf1= buf0; //FIXME needed for RGB1/BGR1
1210 
1211  if (uvalpha < 2048) { // note this is not correct (shifts chrominance by 0.5 pixels) but it is a bit faster
1212  const int16_t *ubuf1 = ubuf[0];
1213  __asm__ volatile(
1214  "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
1215  "mov %4, %%"REG_b" \n\t"
1216  "push %%"REG_BP" \n\t"
1217  YSCALEYUV2RGB1(%%REGBP, %5)
1218  "pxor %%mm7, %%mm7 \n\t"
1219  /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
1220 #ifdef DITHER1XBPP
1221  "paddusb "BLUE_DITHER"(%5), %%mm2 \n\t"
1222  "paddusb "GREEN_DITHER"(%5), %%mm4 \n\t"
1223  "paddusb "RED_DITHER"(%5), %%mm5 \n\t"
1224 #endif
1225  WRITERGB15(%%REGb, 8280(%5), %%REGBP)
1226  "pop %%"REG_BP" \n\t"
1227  "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
1228  :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
1229  "a" (&c->redDither)
1230  );
1231  } else {
1232  const int16_t *ubuf1 = ubuf[1];
1233  __asm__ volatile(
1234  "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
1235  "mov %4, %%"REG_b" \n\t"
1236  "push %%"REG_BP" \n\t"
1237  YSCALEYUV2RGB1b(%%REGBP, %5)
1238  "pxor %%mm7, %%mm7 \n\t"
1239  /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
1240 #ifdef DITHER1XBPP
1241  "paddusb "BLUE_DITHER"(%5), %%mm2 \n\t"
1242  "paddusb "GREEN_DITHER"(%5), %%mm4 \n\t"
1243  "paddusb "RED_DITHER"(%5), %%mm5 \n\t"
1244 #endif
1245  WRITERGB15(%%REGb, 8280(%5), %%REGBP)
1246  "pop %%"REG_BP" \n\t"
1247  "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
1248  :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
1249  "a" (&c->redDither)
1250  );
1251  }
1252 }
1253 
1254 static void RENAME(yuv2rgb565_1)(SwsContext *c, const int16_t *buf0,
1255  const int16_t *ubuf[2], const int16_t *vbuf[2],
1256  const int16_t *abuf0, uint8_t *dest,
1257  int dstW, int uvalpha, int y)
1258 {
1259  const int16_t *ubuf0 = ubuf[0];
1260  const int16_t *buf1= buf0; //FIXME needed for RGB1/BGR1
1261 
1262  if (uvalpha < 2048) { // note this is not correct (shifts chrominance by 0.5 pixels) but it is a bit faster
1263  const int16_t *ubuf1 = ubuf[0];
1264  __asm__ volatile(
1265  "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
1266  "mov %4, %%"REG_b" \n\t"
1267  "push %%"REG_BP" \n\t"
1268  YSCALEYUV2RGB1(%%REGBP, %5)
1269  "pxor %%mm7, %%mm7 \n\t"
1270  /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
1271 #ifdef DITHER1XBPP
1272  "paddusb "BLUE_DITHER"(%5), %%mm2 \n\t"
1273  "paddusb "GREEN_DITHER"(%5), %%mm4 \n\t"
1274  "paddusb "RED_DITHER"(%5), %%mm5 \n\t"
1275 #endif
1276  WRITERGB16(%%REGb, 8280(%5), %%REGBP)
1277  "pop %%"REG_BP" \n\t"
1278  "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
1279  :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
1280  "a" (&c->redDither)
1281  );
1282  } else {
1283  const int16_t *ubuf1 = ubuf[1];
1284  __asm__ volatile(
1285  "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
1286  "mov %4, %%"REG_b" \n\t"
1287  "push %%"REG_BP" \n\t"
1288  YSCALEYUV2RGB1b(%%REGBP, %5)
1289  "pxor %%mm7, %%mm7 \n\t"
1290  /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
1291 #ifdef DITHER1XBPP
1292  "paddusb "BLUE_DITHER"(%5), %%mm2 \n\t"
1293  "paddusb "GREEN_DITHER"(%5), %%mm4 \n\t"
1294  "paddusb "RED_DITHER"(%5), %%mm5 \n\t"
1295 #endif
1296  WRITERGB16(%%REGb, 8280(%5), %%REGBP)
1297  "pop %%"REG_BP" \n\t"
1298  "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
1299  :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
1300  "a" (&c->redDither)
1301  );
1302  }
1303 }
1304 
1305 #define REAL_YSCALEYUV2PACKED1(index, c) \
1306  "xor "#index", "#index" \n\t"\
1307  ".p2align 4 \n\t"\
1308  "1: \n\t"\
1309  "movq (%2, "#index"), %%mm3 \n\t" /* uvbuf0[eax]*/\
1310  "add "UV_OFF_BYTE"("#c"), "#index" \n\t" \
1311  "movq (%2, "#index"), %%mm4 \n\t" /* uvbuf0[eax+2048]*/\
1312  "sub "UV_OFF_BYTE"("#c"), "#index" \n\t" \
1313  "psraw $7, %%mm3 \n\t" \
1314  "psraw $7, %%mm4 \n\t" \
1315  "movq (%0, "#index", 2), %%mm1 \n\t" /*buf0[eax]*/\
1316  "movq 8(%0, "#index", 2), %%mm7 \n\t" /*buf0[eax]*/\
1317  "psraw $7, %%mm1 \n\t" \
1318  "psraw $7, %%mm7 \n\t" \
1319 
1320 #define YSCALEYUV2PACKED1(index, c) REAL_YSCALEYUV2PACKED1(index, c)
1321 
1322 #define REAL_YSCALEYUV2PACKED1b(index, c) \
1323  "xor "#index", "#index" \n\t"\
1324  ".p2align 4 \n\t"\
1325  "1: \n\t"\
1326  "movq (%2, "#index"), %%mm2 \n\t" /* uvbuf0[eax]*/\
1327  "movq (%3, "#index"), %%mm3 \n\t" /* uvbuf1[eax]*/\
1328  "add "UV_OFF_BYTE"("#c"), "#index" \n\t" \
1329  "movq (%2, "#index"), %%mm5 \n\t" /* uvbuf0[eax+2048]*/\
1330  "movq (%3, "#index"), %%mm4 \n\t" /* uvbuf1[eax+2048]*/\
1331  "sub "UV_OFF_BYTE"("#c"), "#index" \n\t" \
1332  "paddw %%mm2, %%mm3 \n\t" /* uvbuf0[eax] + uvbuf1[eax]*/\
1333  "paddw %%mm5, %%mm4 \n\t" /* uvbuf0[eax+2048] + uvbuf1[eax+2048]*/\
1334  "psrlw $8, %%mm3 \n\t" \
1335  "psrlw $8, %%mm4 \n\t" \
1336  "movq (%0, "#index", 2), %%mm1 \n\t" /*buf0[eax]*/\
1337  "movq 8(%0, "#index", 2), %%mm7 \n\t" /*buf0[eax]*/\
1338  "psraw $7, %%mm1 \n\t" \
1339  "psraw $7, %%mm7 \n\t"
1340 #define YSCALEYUV2PACKED1b(index, c) REAL_YSCALEYUV2PACKED1b(index, c)
1341 
1342 static void RENAME(yuv2yuyv422_1)(SwsContext *c, const int16_t *buf0,
1343  const int16_t *ubuf[2], const int16_t *vbuf[2],
1344  const int16_t *abuf0, uint8_t *dest,
1345  int dstW, int uvalpha, int y)
1346 {
1347  const int16_t *ubuf0 = ubuf[0];
1348  const int16_t *buf1= buf0; //FIXME needed for RGB1/BGR1
1349 
1350  if (uvalpha < 2048) { // note this is not correct (shifts chrominance by 0.5 pixels) but it is a bit faster
1351  const int16_t *ubuf1 = ubuf[0];
1352  __asm__ volatile(
1353  "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
1354  "mov %4, %%"REG_b" \n\t"
1355  "push %%"REG_BP" \n\t"
1356  YSCALEYUV2PACKED1(%%REGBP, %5)
1357  WRITEYUY2(%%REGb, 8280(%5), %%REGBP)
1358  "pop %%"REG_BP" \n\t"
1359  "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
1360  :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
1361  "a" (&c->redDither)
1362  );
1363  } else {
1364  const int16_t *ubuf1 = ubuf[1];
1365  __asm__ volatile(
1366  "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
1367  "mov %4, %%"REG_b" \n\t"
1368  "push %%"REG_BP" \n\t"
1369  YSCALEYUV2PACKED1b(%%REGBP, %5)
1370  WRITEYUY2(%%REGb, 8280(%5), %%REGBP)
1371  "pop %%"REG_BP" \n\t"
1372  "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
1373  :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
1374  "a" (&c->redDither)
1375  );
1376  }
1377 }
1378 
1379 #if COMPILE_TEMPLATE_MMXEXT
1380 static void RENAME(hyscale_fast)(SwsContext *c, int16_t *dst,
1381  int dstWidth, const uint8_t *src,
1382  int srcW, int xInc)
1383 {
1384  int32_t *filterPos = c->hLumFilterPos;
1385  int16_t *filter = c->hLumFilter;
1386  void *mmxextFilterCode = c->lumMmxextFilterCode;
1387  int i;
1388 #if defined(PIC)
1389  uint64_t ebxsave;
1390 #endif
1391 #if ARCH_X86_64
1392  uint64_t retsave;
1393 #endif
1394 
1395  __asm__ volatile(
1396 #if defined(PIC)
1397  "mov %%"REG_b", %5 \n\t"
1398 #if ARCH_X86_64
1399  "mov -8(%%rsp), %%"REG_a" \n\t"
1400  "mov %%"REG_a", %6 \n\t"
1401 #endif
1402 #else
1403 #if ARCH_X86_64
1404  "mov -8(%%rsp), %%"REG_a" \n\t"
1405  "mov %%"REG_a", %5 \n\t"
1406 #endif
1407 #endif
1408  "pxor %%mm7, %%mm7 \n\t"
1409  "mov %0, %%"REG_c" \n\t"
1410  "mov %1, %%"REG_D" \n\t"
1411  "mov %2, %%"REG_d" \n\t"
1412  "mov %3, %%"REG_b" \n\t"
1413  "xor %%"REG_a", %%"REG_a" \n\t" // i
1414  PREFETCH" (%%"REG_c") \n\t"
1415  PREFETCH" 32(%%"REG_c") \n\t"
1416  PREFETCH" 64(%%"REG_c") \n\t"
1417 
1418 #if ARCH_X86_64
1419 #define CALL_MMXEXT_FILTER_CODE \
1420  "movl (%%"REG_b"), %%esi \n\t"\
1421  "call *%4 \n\t"\
1422  "movl (%%"REG_b", %%"REG_a"), %%esi \n\t"\
1423  "add %%"REG_S", %%"REG_c" \n\t"\
1424  "add %%"REG_a", %%"REG_D" \n\t"\
1425  "xor %%"REG_a", %%"REG_a" \n\t"\
1426 
1427 #else
1428 #define CALL_MMXEXT_FILTER_CODE \
1429  "movl (%%"REG_b"), %%esi \n\t"\
1430  "call *%4 \n\t"\
1431  "addl (%%"REG_b", %%"REG_a"), %%"REG_c" \n\t"\
1432  "add %%"REG_a", %%"REG_D" \n\t"\
1433  "xor %%"REG_a", %%"REG_a" \n\t"\
1434 
1435 #endif /* ARCH_X86_64 */
1436 
1437  CALL_MMXEXT_FILTER_CODE
1438  CALL_MMXEXT_FILTER_CODE
1439  CALL_MMXEXT_FILTER_CODE
1440  CALL_MMXEXT_FILTER_CODE
1441  CALL_MMXEXT_FILTER_CODE
1442  CALL_MMXEXT_FILTER_CODE
1443  CALL_MMXEXT_FILTER_CODE
1444  CALL_MMXEXT_FILTER_CODE
1445 
1446 #if defined(PIC)
1447  "mov %5, %%"REG_b" \n\t"
1448 #if ARCH_X86_64
1449  "mov %6, %%"REG_a" \n\t"
1450  "mov %%"REG_a", -8(%%rsp) \n\t"
1451 #endif
1452 #else
1453 #if ARCH_X86_64
1454  "mov %5, %%"REG_a" \n\t"
1455  "mov %%"REG_a", -8(%%rsp) \n\t"
1456 #endif
1457 #endif
1458  :: "m" (src), "m" (dst), "m" (filter), "m" (filterPos),
1459  "m" (mmxextFilterCode)
1460 #if defined(PIC)
1461  ,"m" (ebxsave)
1462 #endif
1463 #if ARCH_X86_64
1464  ,"m"(retsave)
1465 #endif
1466  : "%"REG_a, "%"REG_c, "%"REG_d, "%"REG_S, "%"REG_D
1467 #if !defined(PIC)
1468  ,"%"REG_b
1469 #endif
1470  );
1471 
1472  for (i=dstWidth-1; (i*xInc)>>16 >=srcW-1; i--)
1473  dst[i] = src[srcW-1]*128;
1474 }
1475 
1476 static void RENAME(hcscale_fast)(SwsContext *c, int16_t *dst1, int16_t *dst2,
1477  int dstWidth, const uint8_t *src1,
1478  const uint8_t *src2, int srcW, int xInc)
1479 {
1480  int32_t *filterPos = c->hChrFilterPos;
1481  int16_t *filter = c->hChrFilter;
1482  void *mmxextFilterCode = c->chrMmxextFilterCode;
1483  int i;
1484 #if defined(PIC)
1485  DECLARE_ALIGNED(8, uint64_t, ebxsave);
1486 #endif
1487 #if ARCH_X86_64
1488  DECLARE_ALIGNED(8, uint64_t, retsave);
1489 #endif
1490 
1491  __asm__ volatile(
1492 #if defined(PIC)
1493  "mov %%"REG_b", %7 \n\t"
1494 #if ARCH_X86_64
1495  "mov -8(%%rsp), %%"REG_a" \n\t"
1496  "mov %%"REG_a", %8 \n\t"
1497 #endif
1498 #else
1499 #if ARCH_X86_64
1500  "mov -8(%%rsp), %%"REG_a" \n\t"
1501  "mov %%"REG_a", %7 \n\t"
1502 #endif
1503 #endif
1504  "pxor %%mm7, %%mm7 \n\t"
1505  "mov %0, %%"REG_c" \n\t"
1506  "mov %1, %%"REG_D" \n\t"
1507  "mov %2, %%"REG_d" \n\t"
1508  "mov %3, %%"REG_b" \n\t"
1509  "xor %%"REG_a", %%"REG_a" \n\t" // i
1510  PREFETCH" (%%"REG_c") \n\t"
1511  PREFETCH" 32(%%"REG_c") \n\t"
1512  PREFETCH" 64(%%"REG_c") \n\t"
1513 
1514  CALL_MMXEXT_FILTER_CODE
1515  CALL_MMXEXT_FILTER_CODE
1516  CALL_MMXEXT_FILTER_CODE
1517  CALL_MMXEXT_FILTER_CODE
1518  "xor %%"REG_a", %%"REG_a" \n\t" // i
1519  "mov %5, %%"REG_c" \n\t" // src
1520  "mov %6, %%"REG_D" \n\t" // buf2
1521  PREFETCH" (%%"REG_c") \n\t"
1522  PREFETCH" 32(%%"REG_c") \n\t"
1523  PREFETCH" 64(%%"REG_c") \n\t"
1524 
1525  CALL_MMXEXT_FILTER_CODE
1526  CALL_MMXEXT_FILTER_CODE
1527  CALL_MMXEXT_FILTER_CODE
1528  CALL_MMXEXT_FILTER_CODE
1529 
1530 #if defined(PIC)
1531  "mov %7, %%"REG_b" \n\t"
1532 #if ARCH_X86_64
1533  "mov %8, %%"REG_a" \n\t"
1534  "mov %%"REG_a", -8(%%rsp) \n\t"
1535 #endif
1536 #else
1537 #if ARCH_X86_64
1538  "mov %7, %%"REG_a" \n\t"
1539  "mov %%"REG_a", -8(%%rsp) \n\t"
1540 #endif
1541 #endif
1542  :: "m" (src1), "m" (dst1), "m" (filter), "m" (filterPos),
1543  "m" (mmxextFilterCode), "m" (src2), "m"(dst2)
1544 #if defined(PIC)
1545  ,"m" (ebxsave)
1546 #endif
1547 #if ARCH_X86_64
1548  ,"m"(retsave)
1549 #endif
1550  : "%"REG_a, "%"REG_c, "%"REG_d, "%"REG_S, "%"REG_D
1551 #if !defined(PIC)
1552  ,"%"REG_b
1553 #endif
1554  );
1555 
1556  for (i=dstWidth-1; (i*xInc)>>16 >=srcW-1; i--) {
1557  dst1[i] = src1[srcW-1]*128;
1558  dst2[i] = src2[srcW-1]*128;
1559  }
1560 }
1561 #endif /* COMPILE_TEMPLATE_MMXEXT */
1562 
1564 {
1565  enum AVPixelFormat dstFormat = c->dstFormat;
1566 
1567  if (!is16BPS(dstFormat) && !is9_OR_10BPS(dstFormat) &&
1568  dstFormat != AV_PIX_FMT_NV12 && dstFormat != AV_PIX_FMT_NV21) {
1569  if (!(c->flags & SWS_BITEXACT)) {
1570  if (c->flags & SWS_ACCURATE_RND) {
1571  if (!(c->flags & SWS_FULL_CHR_H_INT)) {
1572  switch (c->dstFormat) {
1573  case AV_PIX_FMT_RGB32: c->yuv2packedX = RENAME(yuv2rgb32_X_ar); break;
1574  case AV_PIX_FMT_BGR24: c->yuv2packedX = RENAME(yuv2bgr24_X_ar); break;
1575  case AV_PIX_FMT_RGB555: c->yuv2packedX = RENAME(yuv2rgb555_X_ar); break;
1576  case AV_PIX_FMT_RGB565: c->yuv2packedX = RENAME(yuv2rgb565_X_ar); break;
1577  case AV_PIX_FMT_YUYV422: c->yuv2packedX = RENAME(yuv2yuyv422_X_ar); break;
1578  default: break;
1579  }
1580  }
1581  } else {
1582  if (!(c->flags & SWS_FULL_CHR_H_INT)) {
1583  switch (c->dstFormat) {
1584  case AV_PIX_FMT_RGB32: c->yuv2packedX = RENAME(yuv2rgb32_X); break;
1585  case AV_PIX_FMT_BGR24: c->yuv2packedX = RENAME(yuv2bgr24_X); break;
1586  case AV_PIX_FMT_RGB555: c->yuv2packedX = RENAME(yuv2rgb555_X); break;
1587  case AV_PIX_FMT_RGB565: c->yuv2packedX = RENAME(yuv2rgb565_X); break;
1588  case AV_PIX_FMT_YUYV422: c->yuv2packedX = RENAME(yuv2yuyv422_X); break;
1589  default: break;
1590  }
1591  }
1592  }
1593  }
1594  if (!(c->flags & SWS_FULL_CHR_H_INT)) {
1595  switch (c->dstFormat) {
1596  case AV_PIX_FMT_RGB32:
1597  c->yuv2packed1 = RENAME(yuv2rgb32_1);
1598  c->yuv2packed2 = RENAME(yuv2rgb32_2);
1599  break;
1600  case AV_PIX_FMT_BGR24:
1601  c->yuv2packed1 = RENAME(yuv2bgr24_1);
1602  c->yuv2packed2 = RENAME(yuv2bgr24_2);
1603  break;
1604  case AV_PIX_FMT_RGB555:
1605  c->yuv2packed1 = RENAME(yuv2rgb555_1);
1606  c->yuv2packed2 = RENAME(yuv2rgb555_2);
1607  break;
1608  case AV_PIX_FMT_RGB565:
1609  c->yuv2packed1 = RENAME(yuv2rgb565_1);
1610  c->yuv2packed2 = RENAME(yuv2rgb565_2);
1611  break;
1612  case AV_PIX_FMT_YUYV422:
1613  c->yuv2packed1 = RENAME(yuv2yuyv422_1);
1614  c->yuv2packed2 = RENAME(yuv2yuyv422_2);
1615  break;
1616  default:
1617  break;
1618  }
1619  }
1620  }
1621 
1622  if (c->srcBpc == 8 && c->dstBpc <= 10) {
1623  // Use the new MMX scaler if the MMXEXT one can't be used (it is faster than the x86 ASM one).
1624 #if COMPILE_TEMPLATE_MMXEXT
1625  if (c->flags & SWS_FAST_BILINEAR && c->canMMXEXTBeUsed) {
1626  c->hyscale_fast = RENAME(hyscale_fast);
1627  c->hcscale_fast = RENAME(hcscale_fast);
1628  } else {
1629 #endif /* COMPILE_TEMPLATE_MMXEXT */
1630  c->hyscale_fast = NULL;
1631  c->hcscale_fast = NULL;
1632 #if COMPILE_TEMPLATE_MMXEXT
1633  }
1634 #endif /* COMPILE_TEMPLATE_MMXEXT */
1635  }
1636 }
#define YSCALEYUV2RGB1_ALPHA(index)
#define ALP_MMX_FILTER_OFFSET
#define RENAME(a)
static void RENAME() yuv2rgb32_X_ar(SwsContext *c, const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize, const int16_t *chrFilter, const int16_t **chrUSrc, const int16_t **chrVSrc, int chrFilterSize, const int16_t **alpSrc, uint8_t *dest, int dstW, int dstY)
#define YSCALEYUV2RGBX
#define YSCALEYUV2PACKED1(index, c)
static void RENAME() yuv2yuyv422_X(SwsContext *c, const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize, const int16_t *chrFilter, const int16_t **chrUSrc, const int16_t **chrVSrc, int chrFilterSize, const int16_t **alpSrc, uint8_t *dest, int dstW, int dstY)
#define DECLARE_ALIGNED(n, t, v)
Definition: mem.h:58
static av_always_inline int is16BPS(enum AVPixelFormat pix_fmt)
#define CONFIG_SWSCALE_ALPHA
Definition: config.h:367
void(* hyscale_fast)(struct SwsContext *c, int16_t *dst, int dstWidth, const uint8_t *src, int srcW, int xInc)
Scale one horizontal line of input data using a bilinear filter to produce one line of output data...
int dstY
Last destination vertical line output from last slice.
#define YSCALEYUV2PACKEDX_END
uint8_t
#define av_cold
Definition: attributes.h:66
int x86_reg
Definition: asm.h:70
#define SWS_FULL_CHR_H_INT
Definition: swscale.h:78
#define SWS_FAST_BILINEAR
Definition: swscale.h:57
enum AVPixelFormat dstFormat
Destination pixel format.
#define WRITERGB15(dst, dstw, index)
#define ESP_OFFSET
static void RENAME() yuv2rgb565_2(SwsContext *c, const int16_t *buf[2], const int16_t *ubuf[2], const int16_t *vbuf[2], const int16_t *abuf[2], uint8_t *dest, int dstW, int yalpha, int uvalpha, int y)
#define ARCH_X86_64
Definition: config.h:35
static void RENAME() yuv2rgb32_X(SwsContext *c, const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize, const int16_t *chrFilter, const int16_t **chrUSrc, const int16_t **chrVSrc, int chrFilterSize, const int16_t **alpSrc, uint8_t *dest, int dstW, int dstY)
static void RENAME() yuv2rgb565_X_ar(SwsContext *c, const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize, const int16_t *chrFilter, const int16_t **chrUSrc, const int16_t **chrVSrc, int chrFilterSize, const int16_t **alpSrc, uint8_t *dest, int dstW, int dstY)
#define PREFETCH
#define YSCALEYUV2PACKEDX
#define GREEN_DITHER
static void RENAME() yuv2rgb32_1(SwsContext *c, const int16_t *buf0, const int16_t *ubuf[2], const int16_t *vbuf[2], const int16_t *abuf0, uint8_t *dest, int dstW, int uvalpha, int y)
YV12 to RGB without scaling or interpolating.
#define WRITEBGR24(dst, dstw, index)
planar YUV 4:2:0, 12bpp, 1 plane for Y and 1 plane for the UV components, which are interleaved (firs...
Definition: pixfmt.h:92
#define YSCALEYUV2RGB1b(index, c)
void(* hcscale_fast)(struct SwsContext *c, int16_t *dst1, int16_t *dst2, int dstWidth, const uint8_t *src1, const uint8_t *src2, int srcW, int xInc)
#define YSCALEYUV2RGB_YA(index, c, b1, b2)
#define YSCALEYUV2PACKED(index, c)
static void RENAME() yuv2rgb565_X(SwsContext *c, const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize, const int16_t *chrFilter, const int16_t **chrUSrc, const int16_t **chrVSrc, int chrFilterSize, const int16_t **alpSrc, uint8_t *dest, int dstW, int dstY)
static void RENAME() yuv2bgr24_X(SwsContext *c, const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize, const int16_t *chrFilter, const int16_t **chrUSrc, const int16_t **chrVSrc, int chrFilterSize, const int16_t **alpSrc, uint8_t *dest, int dstW, int dstY)
#define WRITERGB16(dst, dstw, index)
static void filter(MpegAudioContext *s, int ch, const short *samples, int incr)
Definition: mpegaudioenc.c:307
as above, but U and V bytes are swapped
Definition: pixfmt.h:93
#define YSCALEYUV2PACKEDX_ACCURATE
static av_always_inline int is9_OR_10BPS(enum AVPixelFormat pix_fmt)
static void RENAME() yuv2rgb555_1(SwsContext *c, const int16_t *buf0, const int16_t *ubuf[2], const int16_t *vbuf[2], const int16_t *abuf0, uint8_t *dest, int dstW, int uvalpha, int y)
#define YSCALEYUV2PACKEDX_YA(offset, coeff, src1, src2, dst1, dst2)
int32_t
static void RENAME() yuv2yuyv422_1(SwsContext *c, const int16_t *buf0, const int16_t *ubuf[2], const int16_t *vbuf[2], const int16_t *abuf0, uint8_t *dest, int dstW, int uvalpha, int y)
static av_cold void RENAME() sws_init_swscale(SwsContext *c)
packed RGB 8:8:8, 24bpp, BGRBGR...
Definition: pixfmt.h:68
#define U_TEMP
int dstW
Width of destination luma/alpha planes.
static void RENAME() yuv2yuyv422_2(SwsContext *c, const int16_t *buf[2], const int16_t *ubuf[2], const int16_t *vbuf[2], const int16_t *abuf[2], uint8_t *dest, int dstW, int yalpha, int uvalpha, int y)
NULL
Definition: eval.c:55
static void RENAME() yuv2rgb555_X(SwsContext *c, const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize, const int16_t *chrFilter, const int16_t **chrUSrc, const int16_t **chrVSrc, int chrFilterSize, const int16_t **alpSrc, uint8_t *dest, int dstW, int dstY)
#define AV_PIX_FMT_RGB32
Definition: pixfmt.h:222
#define YSCALEYUV2RGB(index, c)
packed YUV 4:2:2, 16bpp, Y0 Cb Y1 Cr
Definition: pixfmt.h:66
#define YSCALEYUV2PACKED1b(index, c)
static void RENAME() yuv2rgb555_X_ar(SwsContext *c, const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize, const int16_t *chrFilter, const int16_t **chrUSrc, const int16_t **chrVSrc, int chrFilterSize, const int16_t **alpSrc, uint8_t *dest, int dstW, int dstY)
static void RENAME() yuv2bgr24_X_ar(SwsContext *c, const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize, const int16_t *chrFilter, const int16_t **chrUSrc, const int16_t **chrVSrc, int chrFilterSize, const int16_t **alpSrc, uint8_t *dest, int dstW, int dstY)
#define SWS_ACCURATE_RND
Definition: swscale.h:82
#define V_TEMP
static void RENAME() yuv2yuyv422_X_ar(SwsContext *c, const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize, const int16_t *chrFilter, const int16_t **chrUSrc, const int16_t **chrVSrc, int chrFilterSize, const int16_t **alpSrc, uint8_t *dest, int dstW, int dstY)
#define Y_TEMP
#define SWS_BITEXACT
Definition: swscale.h:83
static void RENAME() yuv2rgb32_2(SwsContext *c, const int16_t *buf[2], const int16_t *ubuf[2], const int16_t *vbuf[2], const int16_t *abuf[2], uint8_t *dest, int dstW, int yalpha, int uvalpha, int y)
vertical bilinear scale YV12 to RGB
#define RED_DITHER
#define AV_PIX_FMT_RGB555
Definition: pixfmt.h:231
static void RENAME() yuv2rgb565_1(SwsContext *c, const int16_t *buf0, const int16_t *ubuf[2], const int16_t *vbuf[2], const int16_t *abuf0, uint8_t *dest, int dstW, int uvalpha, int y)
#define YSCALEYUV2PACKEDX_ACCURATE_YA(offset)
#define AV_PIX_FMT_RGB565
Definition: pixfmt.h:230
static void RENAME() yuv2bgr24_2(SwsContext *c, const int16_t *buf[2], const int16_t *ubuf[2], const int16_t *vbuf[2], const int16_t *abuf[2], uint8_t *dest, int dstW, int yalpha, int uvalpha, int y)
#define BLUE_DITHER
static void RENAME() yuv2rgb555_2(SwsContext *c, const int16_t *buf[2], const int16_t *ubuf[2], const int16_t *vbuf[2], const int16_t *abuf[2], uint8_t *dest, int dstW, int yalpha, int uvalpha, int y)
#define WRITEBGR32(dst, dstw, index, b, g, r, a, q0, q2, q3, t)
#define WRITEYUY2(dst, dstw, index)
int srcW
Width of source luma/alpha planes.
AVPixelFormat
Pixel format.
Definition: pixfmt.h:63
static void RENAME() yuv2bgr24_1(SwsContext *c, const int16_t *buf0, const int16_t *ubuf[2], const int16_t *vbuf[2], const int16_t *abuf0, uint8_t *dest, int dstW, int uvalpha, int y)
#define YSCALEYUV2RGB1(index, c)