#include "unity/unity.h" #define PCRE2_CODE_UNIT_WIDTH 8 #include "pcre2.h" #include "pcre2_compile.h" #include #include #include /* External wrapper provided by the module under test */ extern size_t test_parse_class(uint32_t *ptr, uint32_t options, uint32_t *buffer); static void assert_ranges_equal(const uint32_t *actual, size_t actual_size_u32, const uint32_t (*expected)[2], size_t expected_pairs) { /* actual_size_u32 is the number of uint32_t entries filled in actual. It should be exactly expected_pairs * 2. */ TEST_ASSERT_EQUAL_UINT32((uint32_t)(expected_pairs * 2), (uint32_t)actual_size_u32); for (size_t i = 0; i < expected_pairs; i++) { TEST_ASSERT_EQUAL_UINT32(expected[i][0], actual[2*i + 0]); TEST_ASSERT_EQUAL_UINT32(expected[i][1], actual[2*i + 1]); } } void setUp(void) { /* Setup code here, or leave empty */ } void tearDown(void) { /* Cleanup code here, or leave empty */ } /* Helper to run parse_class twice (query size then fill) */ static size_t run_parse_and_fill(uint32_t *tokens, uint32_t options, uint32_t **out_buf) { size_t sz = test_parse_class(tokens, options, NULL); if (sz == 0) { *out_buf = NULL; return 0; } uint32_t *buf = (uint32_t *)malloc(sz * sizeof(uint32_t)); TEST_ASSERT_NOT_NULL(buf); memset(buf, 0xCC, sz * sizeof(uint32_t)); size_t sz2 = test_parse_class(tokens, options, buf); TEST_ASSERT_EQUAL_UINT32((uint32_t)sz, (uint32_t)sz2); *out_buf = buf; return sz; } void test_parse_class_empty_returns_zero(void) { uint32_t tokens[] = { META_CLASS_END }; size_t sz = test_parse_class(tokens, 0 /* options */, NULL); TEST_ASSERT_EQUAL_UINT32(0, (uint32_t)sz); uint32_t *buf = NULL; size_t sz2 = run_parse_and_fill(tokens, 0, &buf); TEST_ASSERT_EQUAL_UINT32(0, (uint32_t)sz2); TEST_ASSERT_NULL(buf); } void test_parse_class_single_literal_A(void) { uint32_t tokens[] = { (uint32_t)'A', META_CLASS_END }; uint32_t *buf = NULL; size_t sz = run_parse_and_fill(tokens, 0, &buf); const uint32_t expected[][2] = { { (uint32_t)'A', (uint32_t)'A' } }; assert_ranges_equal(buf, sz, expected, 1); free(buf); } void test_parse_class_range_literal_a_to_d(void) { uint32_t tokens[] = { (uint32_t)'a', META_RANGE_LITERAL, (uint32_t)'d', META_CLASS_END }; uint32_t *buf = NULL; size_t sz = run_parse_and_fill(tokens, 0, &buf); const uint32_t expected[][2] = { { (uint32_t)'a', (uint32_t)'d' } }; assert_ranges_equal(buf, sz, expected, 1); free(buf); } void test_parse_class_multiple_mixed_literals_and_ranges(void) { uint32_t tokens[] = { (uint32_t)'b', (uint32_t)'c', (uint32_t)'x', META_RANGE_LITERAL, (uint32_t)'z', (uint32_t)'Q', META_CLASS_END }; uint32_t *buf = NULL; size_t sz = run_parse_and_fill(tokens, 0, &buf); const uint32_t expected[][2] = { { (uint32_t)'b', (uint32_t)'b' }, { (uint32_t)'c', (uint32_t)'c' }, { (uint32_t)'x', (uint32_t)'z' }, { (uint32_t)'Q', (uint32_t)'Q' }, }; assert_ranges_equal(buf, sz, expected, 4); free(buf); } void test_parse_class_bigvalue_single_literal(void) { /* U+20AC EURO SIGN as a big value literal */ uint32_t tokens[] = { META_BIGVALUE, 0x20AC, META_CLASS_END }; uint32_t *buf = NULL; size_t sz = run_parse_and_fill(tokens, 0, &buf); const uint32_t expected[][2] = { { 0x20ACu, 0x20ACu } }; assert_ranges_equal(buf, sz, expected, 1); free(buf); } void test_parse_class_range_with_bigvalue_end_a_to_0100(void) { /* 'a' - 0x0100 range, where the end uses META_BIGVALUE encoding */ uint32_t tokens[] = { (uint32_t)'a', META_RANGE_LITERAL, META_BIGVALUE, 0x0100u, META_CLASS_END }; uint32_t *buf = NULL; size_t sz = run_parse_and_fill(tokens, 0, &buf); const uint32_t expected[][2] = { { (uint32_t)'a', 0x0100u } }; assert_ranges_equal(buf, sz, expected, 1); free(buf); } void test_parse_class_posix_neg_adds_non_ascii_range(void) { /* POSIX negated class (the specific class value is ignored by parse_class for this path) */ uint32_t tokens[] = { META_POSIX_NEG, 0, /* second word is class identifier (ignored here) */ META_CLASS_END }; uint32_t *buf = NULL; size_t sz = run_parse_and_fill(tokens, 0, &buf); /* In 8-bit builds, highest char is MAX_UTF_CODE_POINT */ TEST_ASSERT_EQUAL_UINT32(2, (uint32_t)sz); TEST_ASSERT_NOT_NULL(buf); TEST_ASSERT_EQUAL_UINT32(0x100u, buf[0]); /* Expect upper bound to be the max char point for 8-bit builds */ #ifdef MAX_UTF_CODE_POINT TEST_ASSERT_EQUAL_UINT32((uint32_t)MAX_UTF_CODE_POINT, buf[1]); #else /* Fallback: at least ensure it's a large value and >= start */ TEST_ASSERT_MESSAGE(buf[1] >= 0x100u, "Upper bound for non-ASCII range must be >= 0x100"); #endif free(buf); } int main(void) { UNITY_BEGIN(); RUN_TEST(test_parse_class_empty_returns_zero); RUN_TEST(test_parse_class_single_literal_A); RUN_TEST(test_parse_class_range_literal_a_to_d); RUN_TEST(test_parse_class_multiple_mixed_literals_and_ranges); RUN_TEST(test_parse_class_bigvalue_single_literal); RUN_TEST(test_parse_class_range_with_bigvalue_end_a_to_0100); RUN_TEST(test_parse_class_posix_neg_adds_non_ascii_range); return UNITY_END(); }