Spaces:
Sleeping
Sleeping
| """Tests for the math answer verifier.""" | |
| import pytest | |
| from data.verifiers.math_verifier import verify_math_answer | |
| class TestExactAndStringMatches: | |
| def test_identical_integer_strings(self): | |
| assert verify_math_answer("17", "17") is True | |
| def test_identical_latex_fractions(self): | |
| assert verify_math_answer(r"\frac{1}{2}", r"\frac{1}{2}") is True | |
| def test_whitespace_differences(self): | |
| assert verify_math_answer(" 42 ", "42") is True | |
| def test_strips_boxed_wrapper(self): | |
| assert verify_math_answer(r"\boxed{17}", "17") is True | |
| def test_strips_dollar_wrapper(self): | |
| assert verify_math_answer("$17$", "17") is True | |
| def test_strips_both_wrappers(self): | |
| assert verify_math_answer(r"$\boxed{\frac{1}{2}}$", r"\frac{1}{2}") is True | |
| class TestSymbolicEquivalence: | |
| def test_fraction_equals_decimal(self): | |
| assert verify_math_answer("1/2", "0.5") is True | |
| def test_latex_fraction_equals_decimal(self): | |
| assert verify_math_answer(r"\frac{1}{2}", "0.5") is True | |
| def test_surd_latex_vs_sympy(self): | |
| assert verify_math_answer(r"2\sqrt{3}", "2*sqrt(3)") is True | |
| def test_integer_vs_float(self): | |
| assert verify_math_answer("17", "17.0") is True | |
| def test_negative_integer(self): | |
| assert verify_math_answer("-3", "-3") is True | |
| def test_negative_fraction_vs_decimal(self): | |
| assert verify_math_answer(r"-\frac{1}{4}", "-0.25") is True | |
| def test_algebraically_equal_products(self): | |
| assert verify_math_answer("2*3", "6") is True | |
| def test_latex_sqrt_over_latex_fraction(self): | |
| assert verify_math_answer(r"\frac{\sqrt{2}}{2}", r"\frac{1}{\sqrt{2}}") is True | |
| class TestNegativeCases: | |
| def test_different_integers(self): | |
| assert verify_math_answer("17", "18") is False | |
| def test_different_fractions(self): | |
| assert verify_math_answer("1/2", "1/3") is False | |
| def test_sign_flip(self): | |
| assert verify_math_answer("3", "-3") is False | |
| class TestMalformedInput: | |
| def test_malformed_returns_false_not_raise(self, bad, good): | |
| # Should never raise, should return False for these malformed inputs. | |
| result = verify_math_answer(bad, good) | |
| assert result is False | |
| def test_none_inputs_do_not_crash(self): | |
| # Pydantic would never pass None, but the verifier must be defensive. | |
| assert verify_math_answer(None, None) is False # type: ignore[arg-type] | |
| assert verify_math_answer(None, "17") is False # type: ignore[arg-type] | |
| assert verify_math_answer("17", None) is False # type: ignore[arg-type] | |
| def test_non_string_numeric_inputs(self): | |
| # Defensive: should coerce to str before comparing. | |
| assert verify_math_answer(17, "17") is True # type: ignore[arg-type] | |