pcre2 / tests /tests_pcre2_compile_class_utf_caseless_extend.c
AryaWu's picture
Upload folder using huggingface_hub
864071c verified
#include "unity/unity.h"
#define PCRE2_CODE_UNIT_WIDTH 8
#include "pcre2.h"
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
/* The module provides this wrapper that calls the static target function. */
extern size_t test_utf_caseless_extend(uint32_t start, uint32_t end, uint32_t options, uint32_t *buffer);
/* Helper: check if buffer holds a given [s,e] pair anywhere among the pairs. */
static int has_pair(const uint32_t *buf, size_t count_u32, uint32_t s, uint32_t e)
{
if (count_u32 % 2 != 0) return 0;
for (size_t i = 0; i + 1 < count_u32; i += 2) {
if (buf[i] == s && buf[i+1] == e) return 1;
}
return 0;
}
void setUp(void) {
/* Setup code here, or leave empty */
}
void tearDown(void) {
/* Cleanup code here, or leave empty */
}
/* PARSE_CLASS_UTF internal option bit is 0x1 in this module; set it to satisfy the assert. */
static const uint32_t OPT_UTF = 0x1u;
void test_utf_caseless_extend_digit_only_single_range(void)
{
/* '5' should contribute only the literal range ["5","5"]. */
uint32_t start = (uint32_t)('5');
uint32_t end = (uint32_t)('5');
size_t n0 = test_utf_caseless_extend(start, end, OPT_UTF, NULL);
TEST_ASSERT_EQUAL_UINT32(2u, (uint32_t)n0);
uint32_t buf[4];
for (size_t i = 0; i < 4; ++i) buf[i] = 0xDEADBEEFu;
size_t n1 = test_utf_caseless_extend(start, end, OPT_UTF, buf);
TEST_ASSERT_EQUAL_UINT32(2u, (uint32_t)n1);
TEST_ASSERT_TRUE(has_pair(buf, n1, start, end));
}
void test_utf_caseless_extend_lowercase_singleton_includes_uppercase(void)
{
/* 'a' should add uppercase 'A' as a singleton and then [a,a]. */
uint32_t start = (uint32_t)('a');
uint32_t end = (uint32_t)('a');
size_t n = test_utf_caseless_extend(start, end, OPT_UTF, NULL);
TEST_ASSERT_EQUAL_UINT32(4u, (uint32_t)n);
uint32_t *buf = (uint32_t*)calloc(n, sizeof(uint32_t));
TEST_ASSERT_NOT_NULL(buf);
size_t n1 = test_utf_caseless_extend(start, end, OPT_UTF, buf);
TEST_ASSERT_EQUAL_UINT32(4u, (uint32_t)n1);
TEST_ASSERT_TRUE(has_pair(buf, n1, (uint32_t)('A'), (uint32_t)('A')));
TEST_ASSERT_TRUE(has_pair(buf, n1, (uint32_t)('a'), (uint32_t)('a')));
/* No extra pairs expected. */
unsigned count = 0;
for (size_t i = 0; i + 1 < n1; i += 2) count++;
TEST_ASSERT_EQUAL_UINT32(2u, count);
free(buf);
}
void test_utf_caseless_extend_uppercase_singleton_includes_lowercase(void)
{
/* 'A' should add lowercase 'a' as a singleton and then [A,A]. */
uint32_t start = (uint32_t)('A');
uint32_t end = (uint32_t)('A');
size_t n = test_utf_caseless_extend(start, end, OPT_UTF, NULL);
TEST_ASSERT_EQUAL_UINT32(4u, (uint32_t)n);
uint32_t out[4] = {0};
size_t n1 = test_utf_caseless_extend(start, end, OPT_UTF, out);
TEST_ASSERT_EQUAL_UINT32(4u, (uint32_t)n1);
TEST_ASSERT_TRUE(has_pair(out, n1, (uint32_t)('a'), (uint32_t)('a')));
TEST_ASSERT_TRUE(has_pair(out, n1, (uint32_t)('A'), (uint32_t)('A')));
}
void test_utf_caseless_extend_ascii_range_yields_upper_singles_and_lower_range(void)
{
/* For [a-c], expect singles [A],[B],[C] and then range [a,c]. */
uint32_t start = (uint32_t)('a');
uint32_t end = (uint32_t)('c');
size_t n = test_utf_caseless_extend(start, end, OPT_UTF, NULL);
TEST_ASSERT_EQUAL_UINT32(8u, (uint32_t)n);
uint32_t *buf = (uint32_t*)calloc(n, sizeof(uint32_t));
TEST_ASSERT_NOT_NULL(buf);
size_t n1 = test_utf_caseless_extend(start, end, OPT_UTF, buf);
TEST_ASSERT_EQUAL_UINT32(8u, (uint32_t)n1);
TEST_ASSERT_TRUE(has_pair(buf, n1, (uint32_t)('A'), (uint32_t)('A')));
TEST_ASSERT_TRUE(has_pair(buf, n1, (uint32_t)('B'), (uint32_t)('B')));
TEST_ASSERT_TRUE(has_pair(buf, n1, (uint32_t)('C'), (uint32_t)('C')));
TEST_ASSERT_TRUE(has_pair(buf, n1, (uint32_t)('a'), (uint32_t)('c')));
/* Exactly 4 pairs expected. */
unsigned count = 0;
for (size_t i = 0; i + 1 < n1; i += 2) count++;
TEST_ASSERT_EQUAL_UINT32(4u, count);
free(buf);
}
void test_utf_caseless_extend_size_consistency_null_vs_buffer(void)
{
/* A slightly different range, [m-p], check sizes and contents. */
uint32_t start = (uint32_t)('m');
uint32_t end = (uint32_t)('p');
size_t n0 = test_utf_caseless_extend(start, end, OPT_UTF, NULL);
TEST_ASSERT_EQUAL_UINT32(10u, (uint32_t)n0); /* 4 singles + 1 range => 5 pairs => 10 u32s */
uint32_t *buf = (uint32_t*)calloc(n0, sizeof(uint32_t));
TEST_ASSERT_NOT_NULL(buf);
size_t n1 = test_utf_caseless_extend(start, end, OPT_UTF, buf);
TEST_ASSERT_EQUAL_UINT32((uint32_t)n0, (uint32_t)n1);
TEST_ASSERT_TRUE(has_pair(buf, n1, (uint32_t)('M'), (uint32_t)('M')));
TEST_ASSERT_TRUE(has_pair(buf, n1, (uint32_t)('N'), (uint32_t)('N')));
TEST_ASSERT_TRUE(has_pair(buf, n1, (uint32_t)('O'), (uint32_t)('O')));
TEST_ASSERT_TRUE(has_pair(buf, n1, (uint32_t)('P'), (uint32_t)('P')));
TEST_ASSERT_TRUE(has_pair(buf, n1, (uint32_t)('m'), (uint32_t)('p')));
free(buf);
}
void test_utf_caseless_extend_no_overwrite_beyond_returned_size(void)
{
uint32_t start = (uint32_t)('a');
uint32_t end = (uint32_t)('a');
size_t n = test_utf_caseless_extend(start, end, OPT_UTF, NULL);
TEST_ASSERT_EQUAL_UINT32(4u, (uint32_t)n);
size_t extra = 4; /* extra slack */
uint32_t *buf = (uint32_t*)malloc((n + extra) * sizeof(uint32_t));
TEST_ASSERT_NOT_NULL(buf);
for (size_t i = 0; i < n + extra; ++i) buf[i] = 0xDEADBEEFu;
size_t n1 = test_utf_caseless_extend(start, end, OPT_UTF, buf);
TEST_ASSERT_EQUAL_UINT32((uint32_t)n, (uint32_t)n1);
/* Ensure the extra tail remains untouched. */
for (size_t i = n; i < n + extra; ++i) {
TEST_ASSERT_EQUAL_UINT32(0xDEADBEEFu, buf[i]);
}
free(buf);
}
int main(void)
{
UNITY_BEGIN();
RUN_TEST(test_utf_caseless_extend_digit_only_single_range);
RUN_TEST(test_utf_caseless_extend_lowercase_singleton_includes_uppercase);
RUN_TEST(test_utf_caseless_extend_uppercase_singleton_includes_lowercase);
RUN_TEST(test_utf_caseless_extend_ascii_range_yields_upper_singles_and_lower_range);
RUN_TEST(test_utf_caseless_extend_size_consistency_null_vs_buffer);
RUN_TEST(test_utf_caseless_extend_no_overwrite_beyond_returned_size);
return UNITY_END();
}