pcre2 / tests /tests_pcre2_compile_class_parse_class.c
AryaWu's picture
Upload folder using huggingface_hub
864071c verified
#include "unity/unity.h"
#define PCRE2_CODE_UNIT_WIDTH 8
#include "pcre2.h"
#include "pcre2_compile.h"
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
/* External wrapper provided by the module under test */
extern size_t test_parse_class(uint32_t *ptr, uint32_t options, uint32_t *buffer);
static void assert_ranges_equal(const uint32_t *actual, size_t actual_size_u32, const uint32_t (*expected)[2], size_t expected_pairs)
{
/* actual_size_u32 is the number of uint32_t entries filled in actual.
It should be exactly expected_pairs * 2. */
TEST_ASSERT_EQUAL_UINT32((uint32_t)(expected_pairs * 2), (uint32_t)actual_size_u32);
for (size_t i = 0; i < expected_pairs; i++) {
TEST_ASSERT_EQUAL_UINT32(expected[i][0], actual[2*i + 0]);
TEST_ASSERT_EQUAL_UINT32(expected[i][1], actual[2*i + 1]);
}
}
void setUp(void) {
/* Setup code here, or leave empty */
}
void tearDown(void) {
/* Cleanup code here, or leave empty */
}
/* Helper to run parse_class twice (query size then fill) */
static size_t run_parse_and_fill(uint32_t *tokens, uint32_t options, uint32_t **out_buf)
{
size_t sz = test_parse_class(tokens, options, NULL);
if (sz == 0) {
*out_buf = NULL;
return 0;
}
uint32_t *buf = (uint32_t *)malloc(sz * sizeof(uint32_t));
TEST_ASSERT_NOT_NULL(buf);
memset(buf, 0xCC, sz * sizeof(uint32_t));
size_t sz2 = test_parse_class(tokens, options, buf);
TEST_ASSERT_EQUAL_UINT32((uint32_t)sz, (uint32_t)sz2);
*out_buf = buf;
return sz;
}
void test_parse_class_empty_returns_zero(void)
{
uint32_t tokens[] = {
META_CLASS_END
};
size_t sz = test_parse_class(tokens, 0 /* options */, NULL);
TEST_ASSERT_EQUAL_UINT32(0, (uint32_t)sz);
uint32_t *buf = NULL;
size_t sz2 = run_parse_and_fill(tokens, 0, &buf);
TEST_ASSERT_EQUAL_UINT32(0, (uint32_t)sz2);
TEST_ASSERT_NULL(buf);
}
void test_parse_class_single_literal_A(void)
{
uint32_t tokens[] = {
(uint32_t)'A',
META_CLASS_END
};
uint32_t *buf = NULL;
size_t sz = run_parse_and_fill(tokens, 0, &buf);
const uint32_t expected[][2] = { { (uint32_t)'A', (uint32_t)'A' } };
assert_ranges_equal(buf, sz, expected, 1);
free(buf);
}
void test_parse_class_range_literal_a_to_d(void)
{
uint32_t tokens[] = {
(uint32_t)'a',
META_RANGE_LITERAL,
(uint32_t)'d',
META_CLASS_END
};
uint32_t *buf = NULL;
size_t sz = run_parse_and_fill(tokens, 0, &buf);
const uint32_t expected[][2] = { { (uint32_t)'a', (uint32_t)'d' } };
assert_ranges_equal(buf, sz, expected, 1);
free(buf);
}
void test_parse_class_multiple_mixed_literals_and_ranges(void)
{
uint32_t tokens[] = {
(uint32_t)'b',
(uint32_t)'c',
(uint32_t)'x', META_RANGE_LITERAL, (uint32_t)'z',
(uint32_t)'Q',
META_CLASS_END
};
uint32_t *buf = NULL;
size_t sz = run_parse_and_fill(tokens, 0, &buf);
const uint32_t expected[][2] = {
{ (uint32_t)'b', (uint32_t)'b' },
{ (uint32_t)'c', (uint32_t)'c' },
{ (uint32_t)'x', (uint32_t)'z' },
{ (uint32_t)'Q', (uint32_t)'Q' },
};
assert_ranges_equal(buf, sz, expected, 4);
free(buf);
}
void test_parse_class_bigvalue_single_literal(void)
{
/* U+20AC EURO SIGN as a big value literal */
uint32_t tokens[] = {
META_BIGVALUE, 0x20AC,
META_CLASS_END
};
uint32_t *buf = NULL;
size_t sz = run_parse_and_fill(tokens, 0, &buf);
const uint32_t expected[][2] = { { 0x20ACu, 0x20ACu } };
assert_ranges_equal(buf, sz, expected, 1);
free(buf);
}
void test_parse_class_range_with_bigvalue_end_a_to_0100(void)
{
/* 'a' - 0x0100 range, where the end uses META_BIGVALUE encoding */
uint32_t tokens[] = {
(uint32_t)'a', META_RANGE_LITERAL, META_BIGVALUE, 0x0100u,
META_CLASS_END
};
uint32_t *buf = NULL;
size_t sz = run_parse_and_fill(tokens, 0, &buf);
const uint32_t expected[][2] = { { (uint32_t)'a', 0x0100u } };
assert_ranges_equal(buf, sz, expected, 1);
free(buf);
}
void test_parse_class_posix_neg_adds_non_ascii_range(void)
{
/* POSIX negated class (the specific class value is ignored by parse_class for this path) */
uint32_t tokens[] = {
META_POSIX_NEG, 0, /* second word is class identifier (ignored here) */
META_CLASS_END
};
uint32_t *buf = NULL;
size_t sz = run_parse_and_fill(tokens, 0, &buf);
/* In 8-bit builds, highest char is MAX_UTF_CODE_POINT */
TEST_ASSERT_EQUAL_UINT32(2, (uint32_t)sz);
TEST_ASSERT_NOT_NULL(buf);
TEST_ASSERT_EQUAL_UINT32(0x100u, buf[0]);
/* Expect upper bound to be the max char point for 8-bit builds */
#ifdef MAX_UTF_CODE_POINT
TEST_ASSERT_EQUAL_UINT32((uint32_t)MAX_UTF_CODE_POINT, buf[1]);
#else
/* Fallback: at least ensure it's a large value and >= start */
TEST_ASSERT_MESSAGE(buf[1] >= 0x100u, "Upper bound for non-ASCII range must be >= 0x100");
#endif
free(buf);
}
int main(void)
{
UNITY_BEGIN();
RUN_TEST(test_parse_class_empty_returns_zero);
RUN_TEST(test_parse_class_single_literal_A);
RUN_TEST(test_parse_class_range_literal_a_to_d);
RUN_TEST(test_parse_class_multiple_mixed_literals_and_ranges);
RUN_TEST(test_parse_class_bigvalue_single_literal);
RUN_TEST(test_parse_class_range_with_bigvalue_end_a_to_0100);
RUN_TEST(test_parse_class_posix_neg_adds_non_ascii_range);
return UNITY_END();
}