pcre2 / tests /tests_pcre2_compile_class_fold_binary.c
AryaWu's picture
Upload folder using huggingface_hub
864071c verified
#include "unity/unity.h"
#define PCRE2_CODE_UNIT_WIDTH 8
#include "pcre2.h"
#include <stdint.h>
#include <string.h>
#include <stdlib.h>
/* We need internal definitions/macros (eclass_op_info, ECL_*), so include compile header. */
#include "pcre2_compile.h"
/* The module exposes a wrapper around the static function under test. */
extern void test_fold_binary(int op, eclass_op_info *lhs_op_info, eclass_op_info *rhs_op_info, PCRE2_SIZE *lengthptr);
/* Helpers */
static void set_bits_pattern(eclass_op_info *oi, uint32_t pattern)
{
for (int i = 0; i < 8; i++) oi->bits.classwords[i] = pattern;
}
static void set_bits_two_patterns(eclass_op_info *oi, uint32_t p0, uint32_t p1)
{
for (int i = 0; i < 8; i++) oi->bits.classwords[i] = (i % 2 == 0) ? p0 : p1;
}
static void expect_bits_eq(const eclass_op_info *oi, uint32_t pattern)
{
for (int i = 0; i < 8; i++) {
TEST_ASSERT_EQUAL_HEX32(pattern, oi->bits.classwords[i]);
}
}
static void expect_bits_specific(const eclass_op_info *oi, uint32_t p0, uint32_t p1)
{
for (int i = 0; i < 8; i++) {
uint32_t exp = (i % 2 == 0) ? p0 : p1;
TEST_ASSERT_EQUAL_HEX32(exp, oi->bits.classwords[i]);
}
}
void setUp(void) {
/* no-op */
}
void tearDown(void) {
/* no-op */
}
/* AND: RHS is ANY -> drop RHS; bits &= allones (no change); no code change */
void test_fold_binary_AND_rhs_any_drop(void)
{
PCRE2_UCHAR bufL[8] = {0x11, 0x22, 0x33};
PCRE2_UCHAR bufR[4] = {ECL_ANY};
eclass_op_info lhs = {0};
eclass_op_info rhs = {0};
lhs.code_start = bufL;
lhs.length = 3;
lhs.op_single_type = 0; /* compound */
set_bits_pattern(&lhs, 0x0F0F0F0Fu);
rhs.code_start = bufR;
rhs.length = 1;
rhs.op_single_type = ECL_ANY;
set_bits_pattern(&rhs, 0xFFFFFFFFu);
test_fold_binary(ECL_AND, &lhs, &rhs, NULL);
TEST_ASSERT_EQUAL_UINT(3, lhs.length);
TEST_ASSERT_EQUAL_INT(0, lhs.op_single_type);
TEST_ASSERT_EQUAL_HEX8(0x11, bufL[0]);
TEST_ASSERT_EQUAL_HEX8(0x22, bufL[1]);
TEST_ASSERT_EQUAL_HEX8(0x33, bufL[2]);
expect_bits_eq(&lhs, 0x0F0F0F0Fu);
}
/* AND: LHS is ANY -> adopt RHS (memmove), bits &= -> equals RHS bits */
void test_fold_binary_AND_lhs_any_adopt_rhs(void)
{
PCRE2_UCHAR bufL[8] = {ECL_ANY, 0xAA, 0xBB, 0xCC};
PCRE2_UCHAR bufR[8] = {0x10, 0x20, 0x30};
eclass_op_info lhs = {0};
eclass_op_info rhs = {0};
lhs.code_start = bufL;
lhs.length = 1;
lhs.op_single_type = ECL_ANY;
set_bits_pattern(&lhs, 0xFFFFFFFFu);
rhs.code_start = bufR;
rhs.length = 3;
rhs.op_single_type = ECL_XCLASS; /* some specific type */
set_bits_pattern(&rhs, 0x12345678u);
test_fold_binary(ECL_AND, &lhs, &rhs, NULL);
TEST_ASSERT_EQUAL_UINT(3, lhs.length);
TEST_ASSERT_EQUAL_INT(ECL_XCLASS, lhs.op_single_type);
TEST_ASSERT_EQUAL_HEX8(0x10, bufL[0]);
TEST_ASSERT_EQUAL_HEX8(0x20, bufL[1]);
TEST_ASSERT_EQUAL_HEX8(0x30, bufL[2]);
expect_bits_eq(&lhs, 0x12345678u);
}
/* AND: RHS is NONE -> result NONE in LHS; bits become zero */
void test_fold_binary_AND_rhs_none_result_none(void)
{
PCRE2_UCHAR bufL[8] = {0x44, 0x55, 0x66};
PCRE2_UCHAR bufR[1] = {ECL_NONE};
eclass_op_info lhs = {0};
eclass_op_info rhs = {0};
lhs.code_start = bufL;
lhs.length = 3;
lhs.op_single_type = 0; /* compound */
set_bits_pattern(&lhs, 0x00FF00FFu);
rhs.code_start = bufR;
rhs.length = 1;
rhs.op_single_type = ECL_NONE;
set_bits_pattern(&rhs, 0x00000000u);
test_fold_binary(ECL_AND, &lhs, &rhs, NULL);
TEST_ASSERT_EQUAL_UINT(1, lhs.length);
TEST_ASSERT_EQUAL_INT(ECL_NONE, lhs.op_single_type);
TEST_ASSERT_EQUAL_HEX8(ECL_NONE, bufL[0]);
expect_bits_eq(&lhs, 0x00000000u);
}
/* AND: both compound -> append ECL_AND in RPN, length += rhs.len + 1, bits &= */
void test_fold_binary_AND_both_compound_append(void)
{
PCRE2_UCHAR buf[16] = {10, 11, 20, 21, 22, 0};
eclass_op_info lhs = {0};
eclass_op_info rhs = {0};
lhs.code_start = buf;
lhs.length = 2; /* [10, 11] */
lhs.op_single_type = 0; /* compound */
set_bits_two_patterns(&lhs, 0xFFFFFFFFu, 0x00FF00FFu);
rhs.code_start = buf + 2;
rhs.length = 3; /* [20, 21, 22] */
rhs.op_single_type = 0; /* compound */
set_bits_two_patterns(&rhs, 0x0F0F0F0Fu, 0xF0F0F0F0u);
test_fold_binary(ECL_AND, &lhs, &rhs, NULL);
TEST_ASSERT_EQUAL_UINT(6, lhs.length);
TEST_ASSERT_EQUAL_INT(0, lhs.op_single_type);
/* buf[5] should be operator ECL_AND */
TEST_ASSERT_EQUAL_HEX8(ECL_AND, buf[5]);
/* bits &= */
for (int i = 0; i < 8; i++) {
uint32_t a = (i % 2 == 0) ? 0xFFFFFFFFu : 0x00FF00FFu;
uint32_t b = (i % 2 == 0) ? 0x0F0F0F0Fu : 0xF0F0F0F0u;
TEST_ASSERT_EQUAL_HEX32(a & b, lhs.bits.classwords[i]);
}
}
/* OR: RHS is NONE -> drop RHS; no change to code/length; bits |= 0 (no change) */
void test_fold_binary_OR_rhs_none_drop(void)
{
PCRE2_UCHAR bufL[8] = {0xAB, 0xCD};
PCRE2_UCHAR bufR[1] = {ECL_NONE};
eclass_op_info lhs = {0};
eclass_op_info rhs = {0};
lhs.code_start = bufL;
lhs.length = 2;
lhs.op_single_type = 0;
set_bits_pattern(&lhs, 0x13579BDFu);
rhs.code_start = bufR;
rhs.length = 1;
rhs.op_single_type = ECL_NONE;
set_bits_pattern(&rhs, 0x00000000u);
test_fold_binary(ECL_OR, &lhs, &rhs, NULL);
TEST_ASSERT_EQUAL_UINT(2, lhs.length);
TEST_ASSERT_EQUAL_INT(0, lhs.op_single_type);
TEST_ASSERT_EQUAL_HEX8(0xAB, bufL[0]);
TEST_ASSERT_EQUAL_HEX8(0xCD, bufL[1]);
expect_bits_eq(&lhs, 0x13579BDFu);
}
/* OR: LHS is NONE -> adopt RHS; bits |= rhs */
void test_fold_binary_OR_lhs_none_adopt_rhs(void)
{
PCRE2_UCHAR bufL[8] = {ECL_NONE, 0, 0};
PCRE2_UCHAR bufR[4] = {0xDE, 0xAD, 0xBE};
eclass_op_info lhs = {0};
eclass_op_info rhs = {0};
lhs.code_start = bufL;
lhs.length = 1;
lhs.op_single_type = ECL_NONE;
set_bits_pattern(&lhs, 0x00000000u);
rhs.code_start = bufR;
rhs.length = 3;
rhs.op_single_type = ECL_XCLASS;
set_bits_pattern(&rhs, 0xCAFEBABEu);
test_fold_binary(ECL_OR, &lhs, &rhs, NULL);
TEST_ASSERT_EQUAL_UINT(3, lhs.length);
TEST_ASSERT_EQUAL_INT(ECL_XCLASS, lhs.op_single_type);
TEST_ASSERT_EQUAL_HEX8(0xDE, bufL[0]);
TEST_ASSERT_EQUAL_HEX8(0xAD, bufL[1]);
TEST_ASSERT_EQUAL_HEX8(0xBE, bufL[2]);
expect_bits_eq(&lhs, 0xCAFEBABEu);
}
/* OR: RHS is ANY -> result ANY in LHS; bits become all ones */
void test_fold_binary_OR_rhs_any_result_any(void)
{
PCRE2_UCHAR bufL[8] = {0x01, 0x02};
PCRE2_UCHAR bufR[1] = {ECL_ANY};
eclass_op_info lhs = {0};
eclass_op_info rhs = {0};
lhs.code_start = bufL;
lhs.length = 2;
lhs.op_single_type = 0;
set_bits_pattern(&lhs, 0x00F000F0u);
rhs.code_start = bufR;
rhs.length = 1;
rhs.op_single_type = ECL_ANY;
set_bits_pattern(&rhs, 0xFFFFFFFFu);
test_fold_binary(ECL_OR, &lhs, &rhs, NULL);
TEST_ASSERT_EQUAL_UINT(1, lhs.length);
TEST_ASSERT_EQUAL_INT(ECL_ANY, lhs.op_single_type);
TEST_ASSERT_EQUAL_HEX8(ECL_ANY, bufL[0]);
expect_bits_eq(&lhs, 0xFFFFFFFFu);
}
/* OR: both compound -> append ECL_OR */
void test_fold_binary_OR_both_compound_append(void)
{
PCRE2_UCHAR buf[16] = {1, 2, 3, 4, 5, 0};
eclass_op_info lhs = {0};
eclass_op_info rhs = {0};
lhs.code_start = buf;
lhs.length = 2;
lhs.op_single_type = 0;
set_bits_pattern(&lhs, 0x0F0F0F0Fu);
rhs.code_start = buf + 2;
rhs.length = 3;
rhs.op_single_type = 0;
set_bits_pattern(&rhs, 0xF0F0F0F0u);
test_fold_binary(ECL_OR, &lhs, &rhs, NULL);
TEST_ASSERT_EQUAL_UINT(6, lhs.length);
TEST_ASSERT_EQUAL_INT(0, lhs.op_single_type);
TEST_ASSERT_EQUAL_HEX8(ECL_OR, buf[5]);
expect_bits_eq(&lhs, 0xFFFFFFFFu);
}
/* XOR: RHS is NONE -> drop RHS; bits ^= 0 (no change) */
void test_fold_binary_XOR_rhs_none_drop(void)
{
PCRE2_UCHAR bufL[8] = {0x10, 0x20};
PCRE2_UCHAR bufR[1] = {ECL_NONE};
eclass_op_info lhs = {0};
eclass_op_info rhs = {0};
lhs.code_start = bufL;
lhs.length = 2;
lhs.op_single_type = 0;
set_bits_pattern(&lhs, 0xAAAAAAAAu);
rhs.code_start = bufR;
rhs.length = 1;
rhs.op_single_type = ECL_NONE;
set_bits_pattern(&rhs, 0x00000000u);
test_fold_binary(ECL_XOR, &lhs, &rhs, NULL);
TEST_ASSERT_EQUAL_UINT(2, lhs.length);
TEST_ASSERT_EQUAL_INT(0, lhs.op_single_type);
TEST_ASSERT_EQUAL_HEX8(0x10, bufL[0]);
TEST_ASSERT_EQUAL_HEX8(0x20, bufL[1]);
expect_bits_eq(&lhs, 0xAAAAAAAAu);
}
/* XOR: LHS is NONE -> adopt RHS; bits ^= rhs */
void test_fold_binary_XOR_lhs_none_adopt_rhs(void)
{
PCRE2_UCHAR bufL[8] = {ECL_NONE, 0, 0};
PCRE2_UCHAR bufR[3] = {0x7A, 0x7B, 0x7C};
eclass_op_info lhs = {0};
eclass_op_info rhs = {0};
lhs.code_start = bufL;
lhs.length = 1;
lhs.op_single_type = ECL_NONE;
set_bits_pattern(&lhs, 0x00000000u);
rhs.code_start = bufR;
rhs.length = 3;
rhs.op_single_type = ECL_XCLASS;
set_bits_pattern(&rhs, 0x0F0F0F0Fu);
test_fold_binary(ECL_XOR, &lhs, &rhs, NULL);
TEST_ASSERT_EQUAL_UINT(3, lhs.length);
TEST_ASSERT_EQUAL_INT(ECL_XCLASS, lhs.op_single_type);
TEST_ASSERT_EQUAL_HEX8(0x7A, bufL[0]);
TEST_ASSERT_EQUAL_HEX8(0x7B, bufL[1]);
TEST_ASSERT_EQUAL_HEX8(0x7C, bufL[2]);
expect_bits_eq(&lhs, 0x0F0F0F0Fu);
}
/* XOR: RHS is ANY, LHS compound -> fold_negation: append ECL_NOT; bits ^= allones => invert */
void test_fold_binary_XOR_rhs_any_lhs_compound_append_not(void)
{
PCRE2_UCHAR bufL[8] = {0xAA, 0xBB, 0xCC, 0};
PCRE2_UCHAR bufR[1] = {ECL_ANY};
eclass_op_info lhs = {0};
eclass_op_info rhs = {0};
lhs.code_start = bufL;
lhs.length = 3;
lhs.op_single_type = 0; /* compound */
set_bits_pattern(&lhs, 0x00FF00FFu);
rhs.code_start = bufR;
rhs.length = 1;
rhs.op_single_type = ECL_ANY;
set_bits_pattern(&rhs, 0xFFFFFFFFu);
test_fold_binary(ECL_XOR, &lhs, &rhs, NULL);
TEST_ASSERT_EQUAL_UINT(4, lhs.length);
TEST_ASSERT_EQUAL_HEX8(ECL_NOT, bufL[3]);
/* op_single_type remains compound (0) */
TEST_ASSERT_EQUAL_INT(0, lhs.op_single_type);
/* bits inverted */
expect_bits_eq(&lhs, ~0x00FF00FFu);
}
/* XOR: RHS is ANY, LHS single ANY -> becomes NONE; bits inverted */
void test_fold_binary_XOR_rhs_any_lhs_single_any_toggle(void)
{
PCRE2_UCHAR bufL[2] = {ECL_ANY, 0};
PCRE2_UCHAR bufR[1] = {ECL_ANY};
eclass_op_info lhs = {0};
eclass_op_info rhs = {0};
lhs.code_start = bufL;
lhs.length = 1;
lhs.op_single_type = ECL_ANY;
set_bits_pattern(&lhs, 0xAAAAAAAAu);
rhs.code_start = bufR;
rhs.length = 1;
rhs.op_single_type = ECL_ANY;
set_bits_pattern(&rhs, 0xFFFFFFFFu);
test_fold_binary(ECL_XOR, &lhs, &rhs, NULL);
TEST_ASSERT_EQUAL_UINT(1, lhs.length);
TEST_ASSERT_EQUAL_INT(ECL_NONE, lhs.op_single_type);
TEST_ASSERT_EQUAL_HEX8(ECL_NONE, bufL[0]);
expect_bits_eq(&lhs, ~0xAAAAAAAAu);
}
/* XOR: both compound -> append ECL_XOR, bits ^= */
void test_fold_binary_XOR_both_compound_append(void)
{
PCRE2_UCHAR buf[16] = {9, 8, 7, 6, 5, 0};
eclass_op_info lhs = {0};
eclass_op_info rhs = {0};
lhs.code_start = buf;
lhs.length = 2;
lhs.op_single_type = 0;
set_bits_two_patterns(&lhs, 0xAAAAAAAAu, 0x55555555u);
rhs.code_start = buf + 2;
rhs.length = 3;
rhs.op_single_type = 0;
set_bits_two_patterns(&rhs, 0x0F0F0F0Fu, 0xF0F0F0F0u);
test_fold_binary(ECL_XOR, &lhs, &rhs, NULL);
TEST_ASSERT_EQUAL_UINT(6, lhs.length);
TEST_ASSERT_EQUAL_INT(0, lhs.op_single_type);
TEST_ASSERT_EQUAL_HEX8(ECL_XOR, buf[5]);
for (int i = 0; i < 8; i++) {
uint32_t a = (i % 2 == 0) ? 0xAAAAAAAAu : 0x55555555u;
uint32_t b = (i % 2 == 0) ? 0x0F0F0F0Fu : 0xF0F0F0F0u;
TEST_ASSERT_EQUAL_HEX32(a ^ b, lhs.bits.classwords[i]);
}
}
/* lengthptr mode: both compound (AND) -> *lengthptr increments by 1 */
void test_fold_binary_lengthptr_increment_on_compound_AND(void)
{
PCRE2_UCHAR buf[8] = {0};
eclass_op_info lhs = {0};
eclass_op_info rhs = {0};
PCRE2_SIZE len = 10;
lhs.code_start = buf;
lhs.length = 2;
lhs.op_single_type = 0;
set_bits_pattern(&lhs, 0xFFFFFFFFu);
rhs.code_start = buf + 2;
rhs.length = 3;
rhs.op_single_type = 0;
set_bits_pattern(&rhs, 0xFFFFFFFFu);
test_fold_binary(ECL_AND, &lhs, &rhs, &len);
TEST_ASSERT_EQUAL_UINT(10 + 1, len);
TEST_ASSERT_EQUAL_UINT(2 + 3 + 1, lhs.length);
}
/* lengthptr mode: XOR rhs ANY and lhs compound -> fold_negation increments by 1 */
void test_fold_binary_lengthptr_increment_on_xor_rhs_any_negation(void)
{
PCRE2_UCHAR bufL[8] = {0};
PCRE2_UCHAR bufR[1] = {ECL_ANY};
eclass_op_info lhs = {0};
eclass_op_info rhs = {0};
PCRE2_SIZE len = 100;
lhs.code_start = bufL;
lhs.length = 3;
lhs.op_single_type = 0;
set_bits_pattern(&lhs, 0x12345678u);
rhs.code_start = bufR;
rhs.length = 1;
rhs.op_single_type = ECL_ANY;
set_bits_pattern(&rhs, 0xFFFFFFFFu);
test_fold_binary(ECL_XOR, &lhs, &rhs, &len);
TEST_ASSERT_EQUAL_UINT(100 + 1, len);
TEST_ASSERT_EQUAL_UINT(4, lhs.length);
/* In sizing mode, code bytes are not written/relied on here. */
}
int main(void)
{
UNITY_BEGIN();
RUN_TEST(test_fold_binary_AND_rhs_any_drop);
RUN_TEST(test_fold_binary_AND_lhs_any_adopt_rhs);
RUN_TEST(test_fold_binary_AND_rhs_none_result_none);
RUN_TEST(test_fold_binary_AND_both_compound_append);
RUN_TEST(test_fold_binary_OR_rhs_none_drop);
RUN_TEST(test_fold_binary_OR_lhs_none_adopt_rhs);
RUN_TEST(test_fold_binary_OR_rhs_any_result_any);
RUN_TEST(test_fold_binary_OR_both_compound_append);
RUN_TEST(test_fold_binary_XOR_rhs_none_drop);
RUN_TEST(test_fold_binary_XOR_lhs_none_adopt_rhs);
RUN_TEST(test_fold_binary_XOR_rhs_any_lhs_compound_append_not);
RUN_TEST(test_fold_binary_XOR_rhs_any_lhs_single_any_toggle);
RUN_TEST(test_fold_binary_XOR_both_compound_append);
RUN_TEST(test_fold_binary_lengthptr_increment_on_compound_AND);
RUN_TEST(test_fold_binary_lengthptr_increment_on_xor_rhs_any_negation);
return UNITY_END();
}