| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| |
|
| | #include <string.h> |
| |
|
| | #include "checkasm.h" |
| |
|
| | #include "libavcodec/vc1dsp.h" |
| |
|
| | #include "libavutil/common.h" |
| | #include "libavutil/internal.h" |
| | #include "libavutil/intreadwrite.h" |
| | #include "libavutil/mem_internal.h" |
| |
|
| | #define VC1DSP_TEST(func) { #func, offsetof(VC1DSPContext, func) }, |
| | #define VC1DSP_SIZED_TEST(func, width, height) { #func, offsetof(VC1DSPContext, func), width, height }, |
| |
|
| | typedef struct { |
| | const char *name; |
| | size_t offset; |
| | int width; |
| | int height; |
| | } test; |
| |
|
| | typedef struct matrix { |
| | size_t width; |
| | size_t height; |
| | float d[]; |
| | } matrix; |
| |
|
| | static const matrix T8 = { 8, 8, { |
| | 12, 12, 12, 12, 12, 12, 12, 12, |
| | 16, 15, 9, 4, -4, -9, -15, -16, |
| | 16, 6, -6, -16, -16, -6, 6, 16, |
| | 15, -4, -16, -9, 9, 16, 4, -15, |
| | 12, -12, -12, 12, 12, -12, -12, 12, |
| | 9, -16, 4, 15, -15, -4, 16, -9, |
| | 6, -16, 16, -6, -6, 16, -16, 6, |
| | 4, -9, 15, -16, 16, -15, 9, -4 |
| | } }; |
| |
|
| | static const matrix T4 = { 4, 4, { |
| | 17, 17, 17, 17, |
| | 22, 10, -10, -22, |
| | 17, -17, -17, 17, |
| | 10, -22, 22, -10 |
| | } }; |
| |
|
| | static const matrix T8t = { 8, 8, { |
| | 12, 16, 16, 15, 12, 9, 6, 4, |
| | 12, 15, 6, -4, -12, -16, -16, -9, |
| | 12, 9, -6, -16, -12, 4, 16, 15, |
| | 12, 4, -16, -9, 12, 15, -6, -16, |
| | 12, -4, -16, 9, 12, -15, -6, 16, |
| | 12, -9, -6, 16, -12, -4, 16, -15, |
| | 12, -15, 6, 4, -12, 16, -16, 9, |
| | 12, -16, 16, -15, 12, -9, 6, -4 |
| | } }; |
| |
|
| | static const matrix T4t = { 4, 4, { |
| | 17, 22, 17, 10, |
| | 17, 10, -17, -22, |
| | 17, -10, -17, 22, |
| | 17, -22, 17, -10 |
| | } }; |
| |
|
| | static matrix *new_matrix(size_t width, size_t height) |
| | { |
| | matrix *out = av_mallocz(sizeof (matrix) + height * width * sizeof (float)); |
| | if (out == NULL) { |
| | fprintf(stderr, "Memory allocation failure\n"); |
| | exit(EXIT_FAILURE); |
| | } |
| | out->width = width; |
| | out->height = height; |
| | return out; |
| | } |
| |
|
| | static matrix *multiply(const matrix *a, const matrix *b) |
| | { |
| | matrix *out; |
| | if (a->width != b->height) { |
| | fprintf(stderr, "Incompatible multiplication\n"); |
| | exit(EXIT_FAILURE); |
| | } |
| | out = new_matrix(b->width, a->height); |
| | for (int j = 0; j < out->height; ++j) |
| | for (int i = 0; i < out->width; ++i) { |
| | float sum = 0; |
| | for (int k = 0; k < a->width; ++k) |
| | sum += a->d[j * a->width + k] * b->d[k * b->width + i]; |
| | out->d[j * out->width + i] = sum; |
| | } |
| | return out; |
| | } |
| |
|
| | static void normalise(matrix *a) |
| | { |
| | for (int j = 0; j < a->height; ++j) |
| | for (int i = 0; i < a->width; ++i) { |
| | float *p = a->d + j * a->width + i; |
| | *p *= 64; |
| | if (a->height == 4) |
| | *p /= (const unsigned[]) { 289, 292, 289, 292 } [j]; |
| | else |
| | *p /= (const unsigned[]) { 288, 289, 292, 289, 288, 289, 292, 289 } [j]; |
| | if (a->width == 4) |
| | *p /= (const unsigned[]) { 289, 292, 289, 292 } [i]; |
| | else |
| | *p /= (const unsigned[]) { 288, 289, 292, 289, 288, 289, 292, 289 } [i]; |
| | } |
| | } |
| |
|
| | static void divide_and_round_nearest(matrix *a, float by) |
| | { |
| | for (int j = 0; j < a->height; ++j) |
| | for (int i = 0; i < a->width; ++i) { |
| | float *p = a->d + j * a->width + i; |
| | *p = rintf(*p / by); |
| | } |
| | } |
| |
|
| | static void tweak(matrix *a) |
| | { |
| | for (int j = 4; j < a->height; ++j) |
| | for (int i = 0; i < a->width; ++i) { |
| | float *p = a->d + j * a->width + i; |
| | *p += 1; |
| | } |
| | } |
| |
|
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | #define ATTENUATION 4 |
| |
|
| | static matrix *generate_inverse_quantized_transform_coefficients(size_t width, size_t height) |
| | { |
| | matrix *raw, *tmp, *D, *E, *R; |
| | raw = new_matrix(width, height); |
| | for (int i = 0; i < width * height; ++i) |
| | raw->d[i] = (int) (rnd() % (1024/ATTENUATION)) - 512/ATTENUATION; |
| | tmp = multiply(height == 8 ? &T8 : &T4, raw); |
| | D = multiply(tmp, width == 8 ? &T8t : &T4t); |
| | normalise(D); |
| | divide_and_round_nearest(D, 1); |
| | for (int i = 0; i < width * height; ++i) { |
| | if (D->d[i] < -2048/ATTENUATION || D->d[i] > 2048/ATTENUATION-1) { |
| | |
| | av_free(raw); |
| | av_free(tmp); |
| | av_free(D); |
| | return generate_inverse_quantized_transform_coefficients(width, height); |
| | } |
| | } |
| | E = multiply(D, width == 8 ? &T8 : &T4); |
| | divide_and_round_nearest(E, 8); |
| | for (int i = 0; i < width * height; ++i) |
| | if (E->d[i] < -4096/ATTENUATION || E->d[i] > 4096/ATTENUATION-1) { |
| | |
| | av_free(raw); |
| | av_free(tmp); |
| | av_free(D); |
| | av_free(E); |
| | return generate_inverse_quantized_transform_coefficients(width, height); |
| | } |
| | R = multiply(height == 8 ? &T8t : &T4t, E); |
| | tweak(R); |
| | divide_and_round_nearest(R, 128); |
| | for (int i = 0; i < width * height; ++i) |
| | if (R->d[i] < -512/ATTENUATION || R->d[i] > 512/ATTENUATION-1) { |
| | |
| | av_free(raw); |
| | av_free(tmp); |
| | av_free(D); |
| | av_free(E); |
| | av_free(R); |
| | return generate_inverse_quantized_transform_coefficients(width, height); |
| | } |
| | av_free(raw); |
| | av_free(tmp); |
| | av_free(E); |
| | av_free(R); |
| | return D; |
| | } |
| |
|
| | #define RANDOMIZE_BUFFER16(name, size) \ |
| | do { \ |
| | int i; \ |
| | for (i = 0; i < size; ++i) { \ |
| | uint16_t r = rnd(); \ |
| | AV_WN16A(name##0 + i, r); \ |
| | AV_WN16A(name##1 + i, r); \ |
| | } \ |
| | } while (0) |
| |
|
| | #define RANDOMIZE_BUFFER8(name, size) \ |
| | do { \ |
| | int i; \ |
| | for (i = 0; i < size; ++i) { \ |
| | uint8_t r = rnd(); \ |
| | name##0[i] = r; \ |
| | name##1[i] = r; \ |
| | } \ |
| | } while (0) |
| |
|
| | #define RANDOMIZE_BUFFER8_MID_WEIGHTED(name, size) \ |
| | do { \ |
| | uint8_t *p##0 = name##0, *p##1 = name##1; \ |
| | int i = (size); \ |
| | while (i-- > 0) { \ |
| | int x = 0x80 | (rnd() & 0x7F); \ |
| | x >>= rnd() % 9; \ |
| | if (rnd() & 1) \ |
| | x = -x; \ |
| | *p##1++ = *p##0++ = 0x80 + x; \ |
| | } \ |
| | } while (0) |
| |
|
| | static void check_inv_trans_inplace(void) |
| | { |
| | |
| | |
| | |
| | |
| | |
| | LOCAL_ALIGNED_16(int16_t, inv_trans_in0, [10 * 8]); |
| | LOCAL_ALIGNED_16(int16_t, inv_trans_in1, [10 * 8]); |
| |
|
| | VC1DSPContext h; |
| |
|
| | ff_vc1dsp_init(&h); |
| |
|
| | if (check_func(h.vc1_inv_trans_8x8, "vc1dsp.vc1_inv_trans_8x8")) { |
| | matrix *coeffs; |
| | declare_func(void, int16_t *); |
| | RANDOMIZE_BUFFER16(inv_trans_in, 10 * 8); |
| | coeffs = generate_inverse_quantized_transform_coefficients(8, 8); |
| | for (int j = 0; j < 8; ++j) |
| | for (int i = 0; i < 8; ++i) { |
| | int idx = 8 + i * 8 + j; |
| | inv_trans_in1[idx] = inv_trans_in0[idx] = coeffs->d[j * 8 + i]; |
| | } |
| | call_ref(inv_trans_in0 + 8); |
| | call_new(inv_trans_in1 + 8); |
| | if (memcmp(inv_trans_in0, inv_trans_in1, 10 * 8 * sizeof (int16_t))) |
| | fail(); |
| | bench_new(inv_trans_in1 + 8); |
| | av_free(coeffs); |
| | } |
| | } |
| |
|
| | static void check_inv_trans_adding(void) |
| | { |
| | |
| | |
| | LOCAL_ALIGNED_16(int16_t, inv_trans_in0, [8 * 8]); |
| | LOCAL_ALIGNED_16(int16_t, inv_trans_in1, [8 * 8]); |
| |
|
| | |
| | |
| | |
| | LOCAL_ALIGNED_8(uint8_t, inv_trans_out0, [10 * 24]); |
| | LOCAL_ALIGNED_8(uint8_t, inv_trans_out1, [10 * 24]); |
| |
|
| | VC1DSPContext h; |
| |
|
| | const test tests[] = { |
| | VC1DSP_SIZED_TEST(vc1_inv_trans_8x4, 8, 4) |
| | VC1DSP_SIZED_TEST(vc1_inv_trans_4x8, 4, 8) |
| | VC1DSP_SIZED_TEST(vc1_inv_trans_4x4, 4, 4) |
| | VC1DSP_SIZED_TEST(vc1_inv_trans_8x8_dc, 8, 8) |
| | VC1DSP_SIZED_TEST(vc1_inv_trans_8x4_dc, 8, 4) |
| | VC1DSP_SIZED_TEST(vc1_inv_trans_4x8_dc, 4, 8) |
| | VC1DSP_SIZED_TEST(vc1_inv_trans_4x4_dc, 4, 4) |
| | }; |
| |
|
| | ff_vc1dsp_init(&h); |
| |
|
| | for (size_t t = 0; t < FF_ARRAY_ELEMS(tests); ++t) { |
| | void (*func)(uint8_t *, ptrdiff_t, int16_t *) = *(void **)((intptr_t) &h + tests[t].offset); |
| | if (check_func(func, "vc1dsp.%s", tests[t].name)) { |
| | matrix *coeffs; |
| | declare_func_emms(AV_CPU_FLAG_MMX, void, uint8_t *, ptrdiff_t, int16_t *); |
| | RANDOMIZE_BUFFER16(inv_trans_in, 8 * 8); |
| | RANDOMIZE_BUFFER8(inv_trans_out, 10 * 24); |
| | coeffs = generate_inverse_quantized_transform_coefficients(tests[t].width, tests[t].height); |
| | for (int j = 0; j < tests[t].height; ++j) |
| | for (int i = 0; i < tests[t].width; ++i) { |
| | int idx = j * 8 + i; |
| | inv_trans_in1[idx] = inv_trans_in0[idx] = coeffs->d[j * tests[t].width + i]; |
| | } |
| | call_ref(inv_trans_out0 + 24 + 8, 24, inv_trans_in0); |
| | call_new(inv_trans_out1 + 24 + 8, 24, inv_trans_in1); |
| | if (memcmp(inv_trans_out0, inv_trans_out1, 10 * 24)) |
| | fail(); |
| | bench_new(inv_trans_out1 + 24 + 8, 24, inv_trans_in1 + 8); |
| | av_free(coeffs); |
| | } |
| | } |
| | } |
| |
|
| | static void check_loop_filter(void) |
| | { |
| | |
| | |
| | |
| | |
| | |
| | LOCAL_ALIGNED_16(uint8_t, filter_buf0, [24 * 48]); |
| | LOCAL_ALIGNED_16(uint8_t, filter_buf1, [24 * 48]); |
| |
|
| | VC1DSPContext h; |
| |
|
| | const test tests[] = { |
| | VC1DSP_TEST(vc1_v_loop_filter4) |
| | VC1DSP_TEST(vc1_h_loop_filter4) |
| | VC1DSP_TEST(vc1_v_loop_filter8) |
| | VC1DSP_TEST(vc1_h_loop_filter8) |
| | VC1DSP_TEST(vc1_v_loop_filter16) |
| | VC1DSP_TEST(vc1_h_loop_filter16) |
| | }; |
| |
|
| | ff_vc1dsp_init(&h); |
| |
|
| | for (size_t t = 0; t < FF_ARRAY_ELEMS(tests); ++t) { |
| | void (*func)(uint8_t *, ptrdiff_t, int) = *(void **)((intptr_t) &h + tests[t].offset); |
| | declare_func_emms(AV_CPU_FLAG_MMX, void, uint8_t *, ptrdiff_t, int); |
| | if (check_func(func, "vc1dsp.%s", tests[t].name)) { |
| | for (int count = 1000; count > 0; --count) { |
| | int pq = rnd() % 31 + 1; |
| | RANDOMIZE_BUFFER8_MID_WEIGHTED(filter_buf, 24 * 48); |
| | call_ref(filter_buf0 + 4 * 48 + 16, 48, pq); |
| | call_new(filter_buf1 + 4 * 48 + 16, 48, pq); |
| | if (memcmp(filter_buf0, filter_buf1, 24 * 48)) |
| | fail(); |
| | } |
| | } |
| | for (int j = 0; j < 24; ++j) |
| | for (int i = 0; i < 48; ++i) |
| | filter_buf1[j * 48 + i] = 0x60 + 0x40 * (i >= 16 && j >= 4); |
| | if (check_func(func, "vc1dsp.%s_bestcase", tests[t].name)) |
| | bench_new(filter_buf1 + 4 * 48 + 16, 48, 1); |
| | if (check_func(func, "vc1dsp.%s_worstcase", tests[t].name)) |
| | bench_new(filter_buf1 + 4 * 48 + 16, 48, 31); |
| | } |
| | } |
| |
|
| | #define TEST_UNESCAPE \ |
| | do { \ |
| | for (int count = 100; count > 0; --count) { \ |
| | escaped_offset = rnd() & 7; \ |
| | unescaped_offset = rnd() & 7; \ |
| | escaped_len = (1u << (rnd() % 8) + 3) - (rnd() & 7); \ |
| | RANDOMIZE_BUFFER8(unescaped, UNESCAPE_BUF_SIZE); \ |
| | len0 = call_ref(escaped0 + escaped_offset, escaped_len, unescaped0 + unescaped_offset); \ |
| | len1 = call_new(escaped1 + escaped_offset, escaped_len, unescaped1 + unescaped_offset); \ |
| | if (len0 != len1 || memcmp(unescaped0, unescaped1, UNESCAPE_BUF_SIZE)) \ |
| | fail(); \ |
| | } \ |
| | } while (0) |
| |
|
| | static void check_unescape(void) |
| | { |
| | |
| | #define LOG2_UNESCAPE_BUF_SIZE 17 |
| | #define UNESCAPE_BUF_SIZE (1u<<LOG2_UNESCAPE_BUF_SIZE) |
| | LOCAL_ALIGNED_8(uint8_t, escaped0, [UNESCAPE_BUF_SIZE]); |
| | LOCAL_ALIGNED_8(uint8_t, escaped1, [UNESCAPE_BUF_SIZE]); |
| | LOCAL_ALIGNED_8(uint8_t, unescaped0, [UNESCAPE_BUF_SIZE]); |
| | LOCAL_ALIGNED_8(uint8_t, unescaped1, [UNESCAPE_BUF_SIZE]); |
| |
|
| | VC1DSPContext h; |
| |
|
| | ff_vc1dsp_init(&h); |
| |
|
| | if (check_func(h.vc1_unescape_buffer, "vc1dsp.vc1_unescape_buffer")) { |
| | int len0, len1, escaped_offset, unescaped_offset, escaped_len; |
| | declare_func(int, const uint8_t *, int, uint8_t *); |
| |
|
| | |
| | for (int x = 0; x < UNESCAPE_BUF_SIZE; ++x) |
| | escaped1[x] = escaped0[x] = 3 * (x % 3 == 0); |
| | TEST_UNESCAPE; |
| |
|
| | |
| | RANDOMIZE_BUFFER8(escaped, UNESCAPE_BUF_SIZE); |
| | TEST_UNESCAPE; |
| |
|
| | |
| | for (int x = 0; x <= UNESCAPE_BUF_SIZE - 4;) { |
| | int gap, gap_msb; |
| | escaped1[x+0] = escaped0[x+0] = 0; |
| | escaped1[x+1] = escaped0[x+1] = 0; |
| | escaped1[x+2] = escaped0[x+2] = 3; |
| | escaped1[x+3] = escaped0[x+3] = rnd() & 3; |
| | gap_msb = 2u << (rnd() % 8); |
| | gap = (rnd() &~ -gap_msb) | gap_msb; |
| | x += gap; |
| | } |
| | TEST_UNESCAPE; |
| |
|
| | |
| | memset(escaped0, 0xFF, UNESCAPE_BUF_SIZE); |
| | memset(escaped1, 0xFF, UNESCAPE_BUF_SIZE); |
| | TEST_UNESCAPE; |
| |
|
| | |
| | bench_new(escaped1, UNESCAPE_BUF_SIZE, unescaped1); |
| | } |
| | } |
| |
|
| | void checkasm_check_vc1dsp(void) |
| | { |
| | check_inv_trans_inplace(); |
| | check_inv_trans_adding(); |
| | report("inv_trans"); |
| |
|
| | check_loop_filter(); |
| | report("loop_filter"); |
| |
|
| | check_unescape(); |
| | report("unescape_buffer"); |
| | } |
| |
|