pcre2 / tests /tests_pcre2_compile_parse_regex.c
AryaWu's picture
Upload folder using huggingface_hub
864071c verified
#include "unity/unity.h"
#define PCRE2_CODE_UNIT_WIDTH 8
#include "pcre2.h"
#include "pcre2_compile.h"
#include <stdlib.h>
#include <string.h>
/* External wrapper provided in the module under test */
extern int test_parse_regex(PCRE2_SPTR ptr, uint32_t options, uint32_t xoptions, BOOL *has_lookbehind, compile_block *cb);
static pcre2_compile_context *g_cctx = NULL;
/* Simple helper to allocate and initialize a compile_block for parse_regex */
static void init_cb(compile_block *cb,
PCRE2_SPTR pattern,
size_t patlen,
uint32_t workspace_units,
uint32_t parsed_units)
{
memset(cb, 0, sizeof(*cb));
cb->start_pattern = pattern;
cb->end_pattern = pattern + patlen;
/* Provide a compile context for limits */
cb->cx = g_cctx;
/* Workspace for nested constructs */
cb->start_workspace = (PCRE2_UCHAR *)calloc(workspace_units, sizeof(PCRE2_UCHAR));
cb->workspace_size = workspace_units;
/* Parsed pattern output area */
cb->parsed_pattern = (uint32_t *)calloc(parsed_units, sizeof(uint32_t));
cb->parsed_pattern_end = cb->parsed_pattern + parsed_units;
/* Basic invariants for other fields are zero and set during parse */
}
static void free_cb(compile_block *cb)
{
if (cb->start_workspace) free(cb->start_workspace);
if (cb->parsed_pattern) free(cb->parsed_pattern);
memset(cb, 0, sizeof(*cb));
}
void setUp(void) {
/* Create a default compile context */
g_cctx = pcre2_compile_context_create(NULL);
/* Leave defaults (parens_nest_limit etc.) as provided by PCRE2 */
}
void tearDown(void) {
if (g_cctx) {
pcre2_compile_context_free(g_cctx);
g_cctx = NULL;
}
}
/* Test 1: Literal-only mode, simple content */
void test_parse_regex_literal_basic(void)
{
const char *pat_cstr = "abc";
PCRE2_SPTR pattern = (PCRE2_SPTR)(const unsigned char *)pat_cstr;
size_t patlen = strlen(pat_cstr);
compile_block cb;
init_cb(&cb, pattern, patlen, /*workspace*/ 1024, /*parsed*/ 64);
BOOL has_lb = FALSE;
uint32_t options = PCRE2_LITERAL;
uint32_t xoptions = 0;
int rc = test_parse_regex(pattern, options, xoptions, &has_lb, &cb);
TEST_ASSERT_EQUAL_INT(0, rc);
TEST_ASSERT_FALSE(has_lb);
/* Verify that parsed buffer starts with the literal characters a, b, c */
TEST_ASSERT_NOT_NULL(cb.parsed_pattern);
TEST_ASSERT_EQUAL_UINT32('a', cb.parsed_pattern[0]);
TEST_ASSERT_EQUAL_UINT32('b', cb.parsed_pattern[1]);
TEST_ASSERT_EQUAL_UINT32('c', cb.parsed_pattern[2]);
free_cb(&cb);
}
/* Test 2: Lookbehind detection sets has_lookbehind = TRUE */
void test_parse_regex_lookbehind_flag(void)
{
const char *pat_cstr = "(?<=a)b";
PCRE2_SPTR pattern = (PCRE2_SPTR)(const unsigned char *)pat_cstr;
size_t patlen = strlen(pat_cstr);
compile_block cb;
init_cb(&cb, pattern, patlen, /*workspace*/ 2048, /*parsed*/ 128);
BOOL has_lb = FALSE;
uint32_t options = 0;
uint32_t xoptions = 0;
int rc = test_parse_regex(pattern, options, xoptions, &has_lb, &cb);
TEST_ASSERT_EQUAL_INT(0, rc);
TEST_ASSERT_TRUE(has_lb);
/* Non-capturing lookbehind should not increase capturing count */
TEST_ASSERT_EQUAL_UINT32(0, cb.bracount);
free_cb(&cb);
}
/* Test 3: Unmatched '(' yields an error and sets error offset */
void test_parse_regex_unmatched_paren_error_offset(void)
{
const char *pat_cstr = "(";
PCRE2_SPTR pattern = (PCRE2_SPTR)(const unsigned char *)pat_cstr;
size_t patlen = strlen(pat_cstr);
compile_block cb;
init_cb(&cb, pattern, patlen, /*workspace*/ 512, /*parsed*/ 32);
BOOL has_lb = FALSE;
uint32_t options = 0;
uint32_t xoptions = 0;
int rc = test_parse_regex(pattern, options, xoptions, &has_lb, &cb);
TEST_ASSERT_NOT_EQUAL(0, rc);
/* Error offset should point just past the '(' (i.e. index 1) */
TEST_ASSERT_EQUAL_SIZE(1, cb.erroroffset);
free_cb(&cb);
}
/* Test 4: Trailing backslash error, ensures proper error offset */
void test_parse_regex_trailing_backslash_error(void)
{
const char *pat_cstr = "\\";
PCRE2_SPTR pattern = (PCRE2_SPTR)(const unsigned char *)pat_cstr;
size_t patlen = strlen(pat_cstr);
compile_block cb;
init_cb(&cb, pattern, patlen, /*workspace*/ 512, /*parsed*/ 32);
BOOL has_lb = FALSE;
uint32_t options = 0;
uint32_t xoptions = 0;
int rc = test_parse_regex(pattern, options, xoptions, &has_lb, &cb);
TEST_ASSERT_NOT_EQUAL(0, rc);
/* Error offset should be at end of pattern (1) */
TEST_ASSERT_EQUAL_SIZE(1, cb.erroroffset);
free_cb(&cb);
}
/* Test 5: Simple capturing group increments bracount */
void test_parse_regex_capturing_group_count(void)
{
const char *pat_cstr = "(a)";
PCRE2_SPTR pattern = (PCRE2_SPTR)(const unsigned char *)pat_cstr;
size_t patlen = strlen(pat_cstr);
compile_block cb;
init_cb(&cb, pattern, patlen, /*workspace*/ 2048, /*parsed*/ 128);
BOOL has_lb = FALSE;
uint32_t options = 0;
uint32_t xoptions = 0;
int rc = test_parse_regex(pattern, options, xoptions, &has_lb, &cb);
TEST_ASSERT_EQUAL_INT(0, rc);
TEST_ASSERT_FALSE(has_lb);
TEST_ASSERT_EQUAL_UINT32(1, cb.bracount);
free_cb(&cb);
}
int main(void)
{
UNITY_BEGIN();
RUN_TEST(test_parse_regex_literal_basic);
RUN_TEST(test_parse_regex_lookbehind_flag);
RUN_TEST(test_parse_regex_unmatched_paren_error_offset);
RUN_TEST(test_parse_regex_trailing_backslash_error);
RUN_TEST(test_parse_regex_capturing_group_count);
return UNITY_END();
}