Libav
dct-test.c
Go to the documentation of this file.
1 /*
2  * (c) 2001 Fabrice Bellard
3  * 2007 Marc Hoffman <marc.hoffman@analog.com>
4  *
5  * This file is part of Libav.
6  *
7  * Libav is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU Lesser General Public
9  * License as published by the Free Software Foundation; either
10  * version 2.1 of the License, or (at your option) any later version.
11  *
12  * Libav is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15  * Lesser General Public License for more details.
16  *
17  * You should have received a copy of the GNU Lesser General Public
18  * License along with Libav; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20  */
21 
28 #include "config.h"
29 #include <stdlib.h>
30 #include <stdio.h>
31 #include <string.h>
32 #if HAVE_UNISTD_H
33 #include <unistd.h>
34 #endif
35 #include <math.h>
36 
37 #include "libavutil/cpu.h"
38 #include "libavutil/common.h"
39 #include "libavutil/lfg.h"
40 #include "libavutil/time.h"
41 
42 #include "dct.h"
43 #include "idctdsp.h"
44 #include "simple_idct.h"
45 #include "aandcttab.h"
46 #include "faandct.h"
47 #include "faanidct.h"
48 #include "dctref.h"
49 
50 struct algo {
51  const char *name;
52  void (*func)(int16_t *block);
54  int cpu_flag;
55  int nonspec;
56 };
57 
58 static const struct algo fdct_tab[4] = {
59  { "REF-DBL", ff_ref_fdct, FF_IDCT_PERM_NONE },
60  { "FAAN", ff_faandct, FF_IDCT_PERM_NONE },
61  { "IJG-AAN-INT", ff_fdct_ifast, FF_IDCT_PERM_NONE },
62  { "IJG-LLM-INT", ff_jpeg_fdct_islow_8, FF_IDCT_PERM_NONE },
63 };
64 
65 static const struct algo idct_tab[4] = {
66  { "FAANI", ff_faanidct, FF_IDCT_PERM_NONE },
67  { "REF-DBL", ff_ref_idct, FF_IDCT_PERM_NONE },
69  { "SIMPLE-C", ff_simple_idct_8, FF_IDCT_PERM_NONE },
70 };
71 
72 #if ARCH_ARM
73 #include "arm/dct-test.c"
74 #elif ARCH_PPC
75 #include "ppc/dct-test.c"
76 #elif ARCH_X86
77 #include "x86/dct-test.c"
78 #else
79 static const struct algo fdct_tab_arch[] = { 0 };
80 static const struct algo idct_tab_arch[] = { 0 };
81 #endif
82 
83 #define AANSCALE_BITS 12
84 
85 #define NB_ITS 20000
86 #define NB_ITS_SPEED 50000
87 
88 DECLARE_ALIGNED(16, static int16_t, block)[64];
89 DECLARE_ALIGNED(8, static int16_t, block1)[64];
90 
91 static void init_block(int16_t block[64], int test, int is_idct, AVLFG *prng)
92 {
93  int i, j;
94 
95  memset(block, 0, 64 * sizeof(*block));
96 
97  switch (test) {
98  case 0:
99  for (i = 0; i < 64; i++)
100  block[i] = (av_lfg_get(prng) % 512) - 256;
101  if (is_idct) {
102  ff_ref_fdct(block);
103  for (i = 0; i < 64; i++)
104  block[i] >>= 3;
105  }
106  break;
107  case 1:
108  j = av_lfg_get(prng) % 10 + 1;
109  for (i = 0; i < j; i++)
110  block[av_lfg_get(prng) % 64] = av_lfg_get(prng) % 512 - 256;
111  break;
112  case 2:
113  block[ 0] = av_lfg_get(prng) % 4096 - 2048;
114  block[63] = (block[0] & 1) ^ 1;
115  break;
116  }
117 }
118 
119 static void permute(int16_t dst[64], const int16_t src[64],
121 {
122  int i;
123 
124 #if ARCH_X86
125  if (permute_x86(dst, src, perm_type))
126  return;
127 #endif
128 
129  switch (perm_type) {
131  for (i = 0; i < 64; i++)
132  dst[(i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2)] = src[i];
133  break;
135  for (i = 0; i < 64; i++)
136  dst[(i & 0x24) | ((i & 3) << 3) | ((i >> 3) & 3)] = src[i];
137  break;
138  default:
139  for (i = 0; i < 64; i++)
140  dst[i] = src[i];
141  break;
142  }
143 }
144 
145 static int dct_error(const struct algo *dct, int test, int is_idct, int speed)
146 {
147  void (*ref)(int16_t *block) = is_idct ? ff_ref_idct : ff_ref_fdct;
148  int it, i, scale;
149  int err_inf, v;
150  int64_t err2, ti, ti1, it1, err_sum = 0;
151  int64_t sysErr[64], sysErrMax = 0;
152  int maxout = 0;
153  int blockSumErrMax = 0, blockSumErr;
154  AVLFG prng;
155  double omse, ome;
156  int spec_err;
157 
158  av_lfg_init(&prng, 1);
159 
160  err_inf = 0;
161  err2 = 0;
162  for (i = 0; i < 64; i++)
163  sysErr[i] = 0;
164  for (it = 0; it < NB_ITS; it++) {
165  init_block(block1, test, is_idct, &prng);
166  permute(block, block1, dct->perm_type);
167 
168  dct->func(block);
169  emms_c();
170 
171  if (!strcmp(dct->name, "IJG-AAN-INT")) {
172  for (i = 0; i < 64; i++) {
173  scale = 8 * (1 << (AANSCALE_BITS + 11)) / ff_aanscales[i];
174  block[i] = (block[i] * scale) >> AANSCALE_BITS;
175  }
176  }
177 
178  ref(block1);
179 
180  blockSumErr = 0;
181  for (i = 0; i < 64; i++) {
182  int err = block[i] - block1[i];
183  err_sum += err;
184  v = abs(err);
185  if (v > err_inf)
186  err_inf = v;
187  err2 += v * v;
188  sysErr[i] += block[i] - block1[i];
189  blockSumErr += v;
190  if (abs(block[i]) > maxout)
191  maxout = abs(block[i]);
192  }
193  if (blockSumErrMax < blockSumErr)
194  blockSumErrMax = blockSumErr;
195  }
196  for (i = 0; i < 64; i++)
197  sysErrMax = FFMAX(sysErrMax, FFABS(sysErr[i]));
198 
199  for (i = 0; i < 64; i++) {
200  if (i % 8 == 0)
201  printf("\n");
202  printf("%7d ", (int) sysErr[i]);
203  }
204  printf("\n");
205 
206  omse = (double) err2 / NB_ITS / 64;
207  ome = (double) err_sum / NB_ITS / 64;
208 
209  spec_err = is_idct && (err_inf > 1 || omse > 0.02 || fabs(ome) > 0.0015);
210 
211  printf("%s %s: ppe=%d omse=%0.8f ome=%0.8f syserr=%0.8f maxout=%d blockSumErr=%d\n",
212  is_idct ? "IDCT" : "DCT", dct->name, err_inf,
213  omse, ome, (double) sysErrMax / NB_ITS,
214  maxout, blockSumErrMax);
215 
216  if (spec_err && !dct->nonspec)
217  return 1;
218 
219  if (!speed)
220  return 0;
221 
222  /* speed test */
223  init_block(block, test, is_idct, &prng);
224  permute(block1, block, dct->perm_type);
225 
226  ti = av_gettime();
227  it1 = 0;
228  do {
229  for (it = 0; it < NB_ITS_SPEED; it++) {
230  memcpy(block, block1, sizeof(block));
231  dct->func(block);
232  }
233  it1 += NB_ITS_SPEED;
234  ti1 = av_gettime() - ti;
235  } while (ti1 < 1000000);
236  emms_c();
237 
238  printf("%s %s: %0.1f kdct/s\n", is_idct ? "IDCT" : "DCT", dct->name,
239  (double) it1 * 1000.0 / (double) ti1);
240 
241  return 0;
242 }
243 
246 
247 static void idct248_ref(uint8_t *dest, int linesize, int16_t *block)
248 {
249  static int init;
250  static double c8[8][8];
251  static double c4[4][4];
252  double block1[64], block2[64], block3[64];
253  double s, sum, v;
254  int i, j, k;
255 
256  if (!init) {
257  init = 1;
258 
259  for (i = 0; i < 8; i++) {
260  sum = 0;
261  for (j = 0; j < 8; j++) {
262  s = (i == 0) ? sqrt(1.0 / 8.0) : sqrt(1.0 / 4.0);
263  c8[i][j] = s * cos(M_PI * i * (j + 0.5) / 8.0);
264  sum += c8[i][j] * c8[i][j];
265  }
266  }
267 
268  for (i = 0; i < 4; i++) {
269  sum = 0;
270  for (j = 0; j < 4; j++) {
271  s = (i == 0) ? sqrt(1.0 / 4.0) : sqrt(1.0 / 2.0);
272  c4[i][j] = s * cos(M_PI * i * (j + 0.5) / 4.0);
273  sum += c4[i][j] * c4[i][j];
274  }
275  }
276  }
277 
278  /* butterfly */
279  s = 0.5 * sqrt(2.0);
280  for (i = 0; i < 4; i++) {
281  for (j = 0; j < 8; j++) {
282  block1[8 * (2 * i) + j] =
283  (block[8 * (2 * i) + j] + block[8 * (2 * i + 1) + j]) * s;
284  block1[8 * (2 * i + 1) + j] =
285  (block[8 * (2 * i) + j] - block[8 * (2 * i + 1) + j]) * s;
286  }
287  }
288 
289  /* idct8 on lines */
290  for (i = 0; i < 8; i++) {
291  for (j = 0; j < 8; j++) {
292  sum = 0;
293  for (k = 0; k < 8; k++)
294  sum += c8[k][j] * block1[8 * i + k];
295  block2[8 * i + j] = sum;
296  }
297  }
298 
299  /* idct4 */
300  for (i = 0; i < 8; i++) {
301  for (j = 0; j < 4; j++) {
302  /* top */
303  sum = 0;
304  for (k = 0; k < 4; k++)
305  sum += c4[k][j] * block2[8 * (2 * k) + i];
306  block3[8 * (2 * j) + i] = sum;
307 
308  /* bottom */
309  sum = 0;
310  for (k = 0; k < 4; k++)
311  sum += c4[k][j] * block2[8 * (2 * k + 1) + i];
312  block3[8 * (2 * j + 1) + i] = sum;
313  }
314  }
315 
316  /* clamp and store the result */
317  for (i = 0; i < 8; i++) {
318  for (j = 0; j < 8; j++) {
319  v = block3[8 * i + j];
320  if (v < 0) v = 0;
321  else if (v > 255) v = 255;
322  dest[i * linesize + j] = (int) rint(v);
323  }
324  }
325 }
326 
327 static void idct248_error(const char *name,
328  void (*idct248_put)(uint8_t *dest, int line_size,
329  int16_t *block),
330  int speed)
331 {
332  int it, i, it1, ti, ti1, err_max, v;
333  AVLFG prng;
334 
335  av_lfg_init(&prng, 1);
336 
337  /* just one test to see if code is correct (precision is less
338  important here) */
339  err_max = 0;
340  for (it = 0; it < NB_ITS; it++) {
341  /* XXX: use forward transform to generate values */
342  for (i = 0; i < 64; i++)
343  block1[i] = av_lfg_get(&prng) % 256 - 128;
344  block1[0] += 1024;
345 
346  for (i = 0; i < 64; i++)
347  block[i] = block1[i];
348  idct248_ref(img_dest1, 8, block);
349 
350  for (i = 0; i < 64; i++)
351  block[i] = block1[i];
352  idct248_put(img_dest, 8, block);
353 
354  for (i = 0; i < 64; i++) {
355  v = abs((int) img_dest[i] - (int) img_dest1[i]);
356  if (v == 255)
357  printf("%d %d\n", img_dest[i], img_dest1[i]);
358  if (v > err_max)
359  err_max = v;
360  }
361  }
362  printf("%s %s: err_inf=%d\n", 1 ? "IDCT248" : "DCT248", name, err_max);
363 
364  if (!speed)
365  return;
366 
367  ti = av_gettime();
368  it1 = 0;
369  do {
370  for (it = 0; it < NB_ITS_SPEED; it++) {
371  for (i = 0; i < 64; i++)
372  block[i] = block1[i];
373  idct248_put(img_dest, 8, block);
374  }
375  it1 += NB_ITS_SPEED;
376  ti1 = av_gettime() - ti;
377  } while (ti1 < 1000000);
378  emms_c();
379 
380  printf("%s %s: %0.1f kdct/s\n", 1 ? "IDCT248" : "DCT248", name,
381  (double) it1 * 1000.0 / (double) ti1);
382 }
383 
384 static void help(void)
385 {
386  printf("dct-test [-i] [<test-number>]\n"
387  "test-number 0 -> test with random matrixes\n"
388  " 1 -> test with random sparse matrixes\n"
389  " 2 -> do 3. test from mpeg4 std\n"
390  "-i test IDCT implementations\n"
391  "-4 test IDCT248 implementations\n"
392  "-t speed test\n");
393 }
394 
395 #if !HAVE_GETOPT
396 #include "compat/getopt.c"
397 #endif
398 
399 int main(int argc, char **argv)
400 {
401  int test_idct = 0, test_248_dct = 0;
402  int c, i;
403  int test = 1;
404  int speed = 0;
405  int err = 0;
406 
407  ff_ref_dct_init();
408 
409  for (;;) {
410  c = getopt(argc, argv, "ih4t");
411  if (c == -1)
412  break;
413  switch (c) {
414  case 'i':
415  test_idct = 1;
416  break;
417  case '4':
418  test_248_dct = 1;
419  break;
420  case 't':
421  speed = 1;
422  break;
423  default:
424  case 'h':
425  help();
426  return 0;
427  }
428  }
429 
430  if (optind < argc)
431  test = atoi(argv[optind]);
432 
433  printf("Libav DCT/IDCT test\n");
434 
435  if (test_248_dct) {
436  idct248_error("SIMPLE-C", ff_simple_idct248_put, speed);
437  } else {
438  const int cpu_flags = av_get_cpu_flags();
439  if (test_idct) {
440  for (i = 0; i < FF_ARRAY_ELEMS(idct_tab); i++)
441  err |= dct_error(&idct_tab[i], test, test_idct, speed);
442 
443  for (i = 0; idct_tab_arch[i].name; i++)
444  if (!(~cpu_flags & idct_tab_arch[i].cpu_flag))
445  err |= dct_error(&idct_tab_arch[i], test, test_idct, speed);
446  }
447 #if CONFIG_FDCTDSP
448  else {
449  for (i = 0; i < FF_ARRAY_ELEMS(fdct_tab); i++)
450  err |= dct_error(&fdct_tab[i], test, test_idct, speed);
451 
452  for (i = 0; fdct_tab_arch[i].name; i++)
453  if (!(~cpu_flags & fdct_tab_arch[i].cpu_flag))
454  err |= dct_error(&fdct_tab_arch[i], test, test_idct, speed);
455  }
456 #endif /* CONFIG_FDCTDSP */
457  }
458 
459  if (err)
460  printf("Error: %d.\n", err);
461 
462  return !!err;
463 }
#define FFMAX(a, b)
Definition: common.h:55
Definition: lfg.h:25
idct_permutation_type
Definition: idctdsp.h:35
static const struct algo fdct_tab[4]
Definition: dct-test.c:58
static double rint(double x)
Definition: libm.h:130
void ff_fdct_ifast(int16_t *data)
Definition: jfdctfst.c:208
static uint8_t img_dest[64]
Definition: dct-test.c:244
static int optind
Definition: getopt.c:37
const char * name
Definition: dct-test.c:51
static const struct algo fdct_tab_arch[]
Definition: dct-test.c:79
#define DECLARE_ALIGNED(n, t, v)
Definition: mem.h:58
int main(int argc, char **argv)
Definition: dct-test.c:399
static int permute_x86(int16_t dst[64], const int16_t src[64], enum idct_permutation_type perm_type)
Definition: dct-test.c:69
av_cold void ff_ref_dct_init(void)
Initialize the double precision discrete cosine transform functions fdct & idct.
Definition: dctref.c:41
int nonspec
Definition: dct-test.c:55
uint8_t
void ff_faanidct(int16_t block[64])
Definition: faanidct.c:132
static unsigned int av_lfg_get(AVLFG *c)
Get the next random unsigned 32-bit number using an ALFG.
Definition: lfg.h:38
#define emms_c()
Definition: internal.h:47
int cpu_flag
Definition: dct-test.c:54
const char * name
void ff_simple_idct248_put(uint8_t *dest, int line_size, int16_t *block)
Definition: simple_idct.c:88
static const struct algo idct_tab[4]
Definition: dct-test.c:65
const uint16_t ff_aanscales[64]
Definition: aandcttab.c:26
static void permute(int16_t dst[64], const int16_t src[64], enum idct_permutation_type perm_type)
Definition: dct-test.c:119
int av_get_cpu_flags(void)
Return the flags which specify extensions supported by the CPU.
Definition: cpu.c:47
#define AANSCALE_BITS
Definition: dct-test.c:83
void(* func)(int16_t *block)
Definition: dct-test.c:52
Definition: dct-test.c:50
int64_t av_gettime(void)
Get the current time in microseconds.
Definition: time.c:37
static void idct248_ref(uint8_t *dest, int linesize, int16_t *block)
Definition: dct-test.c:247
static void test(const char *pattern, const char *host)
Definition: noproxy-test.c:23
#define FF_ARRAY_ELEMS(a)
Definition: common.h:61
void ff_jpeg_fdct_islow_8(int16_t *data)
void ff_j_rev_dct(int16_t *data)
#define NB_ITS_SPEED
Definition: dct-test.c:86
void ff_faandct(int16_t *data)
Definition: faandct.c:121
void av_lfg_init(AVLFG *c, unsigned int seed)
Definition: lfg.c:30
static void(WINAPI *cond_broadcast)(pthread_cond_t *cond)
static int getopt(int argc, char *argv[], char *opts)
Definition: getopt.c:41
void ff_ref_fdct(short *block)
Transform 8x8 block of data with a double precision forward DCT This is a reference implementation...
Definition: dctref.c:59
static int16_t block1[64]
Definition: dct-test.c:89
AAN (Arai Agui Nakajima) (I)DCT tables.
static uint8_t img_dest1[64]
Definition: dct-test.c:245
enum idct_permutation_type perm_type
Definition: dct-test.c:53
static void idct248_error(const char *name, void(*idct248_put)(uint8_t *dest, int line_size, int16_t *block), int speed)
Definition: dct-test.c:327
static av_cold int init(AVCodecParserContext *s)
Definition: h264_parser.c:499
void ff_ref_idct(short *block)
Transform 8x8 block of data with a double precision inverse DCT This is a reference implementation...
Definition: dctref.c:95
static const struct algo idct_tab_arch[]
Definition: dct-test.c:80
simple idct header.
static int dct_error(const struct algo *dct, int test, int is_idct, int speed)
Definition: dct-test.c:145
#define NB_ITS
Definition: dct-test.c:85
Floating point AAN DCT
#define FFABS(a)
Definition: common.h:52
static void help(void)
Definition: dct-test.c:384
static void init_block(int16_t block[64], int test, int is_idct, AVLFG *prng)
Definition: dct-test.c:91
void ff_simple_idct_8(int16_t *block)
static int16_t block[64]
Definition: dct-test.c:88