37 #include "libavutil/cpu.h"
38 #include "libavutil/common.h"
39 #include "libavutil/lfg.h"
40 #include "libavutil/time.h"
83 #define AANSCALE_BITS 12
86 #define NB_ITS_SPEED 50000
95 memset(block, 0, 64 *
sizeof(*block));
99 for (i = 0; i < 64; i++)
103 for (i = 0; i < 64; i++)
109 for (i = 0; i < j; i++)
114 block[63] = (block[0] & 1) ^ 1;
119 static void permute(int16_t dst[64],
const int16_t src[64],
131 for (i = 0; i < 64; i++)
132 dst[(i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2)] = src[i];
135 for (i = 0; i < 64; i++)
136 dst[(i & 0x24) | ((i & 3) << 3) | ((i >> 3) & 3)] = src[i];
139 for (i = 0; i < 64; i++)
150 int64_t err2, ti, ti1, it1, err_sum = 0;
151 int64_t sysErr[64], sysErrMax = 0;
153 int blockSumErrMax = 0, blockSumErr;
162 for (i = 0; i < 64; i++)
164 for (it = 0; it <
NB_ITS; it++) {
171 if (!strcmp(dct->
name,
"IJG-AAN-INT")) {
172 for (i = 0; i < 64; i++) {
181 for (i = 0; i < 64; i++) {
188 sysErr[i] +=
block[i] - block1[i];
190 if (abs(
block[i]) > maxout)
191 maxout = abs(
block[i]);
193 if (blockSumErrMax < blockSumErr)
194 blockSumErrMax = blockSumErr;
196 for (i = 0; i < 64; i++)
197 sysErrMax =
FFMAX(sysErrMax,
FFABS(sysErr[i]));
199 for (i = 0; i < 64; i++) {
202 printf(
"%7d ", (
int) sysErr[i]);
206 omse = (double) err2 / NB_ITS / 64;
207 ome = (double) err_sum / NB_ITS / 64;
209 spec_err = is_idct && (err_inf > 1 || omse > 0.02 || fabs(ome) > 0.0015);
211 printf(
"%s %s: ppe=%d omse=%0.8f ome=%0.8f syserr=%0.8f maxout=%d blockSumErr=%d\n",
212 is_idct ?
"IDCT" :
"DCT", dct->
name, err_inf,
213 omse, ome, (
double) sysErrMax / NB_ITS,
214 maxout, blockSumErrMax);
235 }
while (ti1 < 1000000);
238 printf(
"%s %s: %0.1f kdct/s\n", is_idct ?
"IDCT" :
"DCT", dct->
name,
239 (
double) it1 * 1000.0 / (
double) ti1);
250 static double c8[8][8];
251 static double c4[4][4];
252 double block1[64], block2[64], block3[64];
259 for (i = 0; i < 8; i++) {
261 for (j = 0; j < 8; j++) {
262 s = (i == 0) ? sqrt(1.0 / 8.0) : sqrt(1.0 / 4.0);
263 c8[i][j] = s * cos(M_PI * i * (j + 0.5) / 8.0);
264 sum += c8[i][j] * c8[i][j];
268 for (i = 0; i < 4; i++) {
270 for (j = 0; j < 4; j++) {
271 s = (i == 0) ? sqrt(1.0 / 4.0) : sqrt(1.0 / 2.0);
272 c4[i][j] = s * cos(M_PI * i * (j + 0.5) / 4.0);
273 sum += c4[i][j] * c4[i][j];
280 for (i = 0; i < 4; i++) {
281 for (j = 0; j < 8; j++) {
282 block1[8 * (2 * i) + j] =
283 (block[8 * (2 * i) + j] + block[8 * (2 * i + 1) + j]) * s;
284 block1[8 * (2 * i + 1) + j] =
285 (block[8 * (2 * i) + j] - block[8 * (2 * i + 1) + j]) * s;
290 for (i = 0; i < 8; i++) {
291 for (j = 0; j < 8; j++) {
293 for (k = 0; k < 8; k++)
294 sum += c8[k][j] * block1[8 * i + k];
295 block2[8 * i + j] = sum;
300 for (i = 0; i < 8; i++) {
301 for (j = 0; j < 4; j++) {
304 for (k = 0; k < 4; k++)
305 sum += c4[k][j] * block2[8 * (2 * k) + i];
306 block3[8 * (2 * j) + i] = sum;
310 for (k = 0; k < 4; k++)
311 sum += c4[k][j] * block2[8 * (2 * k + 1) + i];
312 block3[8 * (2 * j + 1) + i] = sum;
317 for (i = 0; i < 8; i++) {
318 for (j = 0; j < 8; j++) {
319 v = block3[8 * i + j];
321 else if (v > 255) v = 255;
322 dest[i * linesize + j] = (int)
rint(v);
328 void (*idct248_put)(
uint8_t *dest,
int line_size,
332 int it, i, it1, ti, ti1, err_max, v;
340 for (it = 0; it <
NB_ITS; it++) {
342 for (i = 0; i < 64; i++)
346 for (i = 0; i < 64; i++)
350 for (i = 0; i < 64; i++)
354 for (i = 0; i < 64; i++) {
362 printf(
"%s %s: err_inf=%d\n", 1 ?
"IDCT248" :
"DCT248", name, err_max);
371 for (i = 0; i < 64; i++)
377 }
while (ti1 < 1000000);
380 printf(
"%s %s: %0.1f kdct/s\n", 1 ?
"IDCT248" :
"DCT248", name,
381 (
double) it1 * 1000.0 / (
double) ti1);
386 printf(
"dct-test [-i] [<test-number>]\n"
387 "test-number 0 -> test with random matrixes\n"
388 " 1 -> test with random sparse matrixes\n"
389 " 2 -> do 3. test from mpeg4 std\n"
390 "-i test IDCT implementations\n"
391 "-4 test IDCT248 implementations\n"
399 int main(
int argc,
char **argv)
401 int test_idct = 0, test_248_dct = 0;
410 c =
getopt(argc, argv,
"ih4t");
431 test = atoi(argv[
optind]);
433 printf(
"Libav DCT/IDCT test\n");
441 err |=
dct_error(&idct_tab[i], test, test_idct, speed);
443 for (i = 0; idct_tab_arch[i].
name; i++)
444 if (!(~cpu_flags & idct_tab_arch[i].
cpu_flag))
445 err |=
dct_error(&idct_tab_arch[i], test, test_idct, speed);
450 err |=
dct_error(&fdct_tab[i], test, test_idct, speed);
452 for (i = 0; fdct_tab_arch[i].
name; i++)
453 if (!(~cpu_flags & fdct_tab_arch[i].
cpu_flag))
454 err |=
dct_error(&fdct_tab_arch[i], test, test_idct, speed);
460 printf(
"Error: %d.\n", err);
static const struct algo fdct_tab[4]
static double rint(double x)
void ff_fdct_ifast(int16_t *data)
static uint8_t img_dest[64]
static const struct algo fdct_tab_arch[]
#define DECLARE_ALIGNED(n, t, v)
int main(int argc, char **argv)
static int permute_x86(int16_t dst[64], const int16_t src[64], enum idct_permutation_type perm_type)
av_cold void ff_ref_dct_init(void)
Initialize the double precision discrete cosine transform functions fdct & idct.
void ff_faanidct(int16_t block[64])
static unsigned int av_lfg_get(AVLFG *c)
Get the next random unsigned 32-bit number using an ALFG.
void ff_simple_idct248_put(uint8_t *dest, int line_size, int16_t *block)
static const struct algo idct_tab[4]
const uint16_t ff_aanscales[64]
static void permute(int16_t dst[64], const int16_t src[64], enum idct_permutation_type perm_type)
int av_get_cpu_flags(void)
Return the flags which specify extensions supported by the CPU.
void(* func)(int16_t *block)
int64_t av_gettime(void)
Get the current time in microseconds.
static void idct248_ref(uint8_t *dest, int linesize, int16_t *block)
static void test(const char *pattern, const char *host)
#define FF_ARRAY_ELEMS(a)
void ff_jpeg_fdct_islow_8(int16_t *data)
void ff_j_rev_dct(int16_t *data)
void ff_faandct(int16_t *data)
void av_lfg_init(AVLFG *c, unsigned int seed)
static void(WINAPI *cond_broadcast)(pthread_cond_t *cond)
static int getopt(int argc, char *argv[], char *opts)
void ff_ref_fdct(short *block)
Transform 8x8 block of data with a double precision forward DCT This is a reference implementation...
static int16_t block1[64]
AAN (Arai Agui Nakajima) (I)DCT tables.
static uint8_t img_dest1[64]
enum idct_permutation_type perm_type
static void idct248_error(const char *name, void(*idct248_put)(uint8_t *dest, int line_size, int16_t *block), int speed)
static av_cold int init(AVCodecParserContext *s)
void ff_ref_idct(short *block)
Transform 8x8 block of data with a double precision inverse DCT This is a reference implementation...
static const struct algo idct_tab_arch[]
static int dct_error(const struct algo *dct, int test, int is_idct, int speed)
static void init_block(int16_t block[64], int test, int is_idct, AVLFG *prng)
void ff_simple_idct_8(int16_t *block)