File size: 5,917 Bytes
3040767
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
"""Tests for the code answer verifier."""

import time

import pytest

from data.verifiers.code_verifier import verify_code_answer


# ---------------------------------------------------------------------------
# execute_and_assert (MBPP-style)
# ---------------------------------------------------------------------------


class TestExecuteAndAssert:
    def test_correct_solution_passes(self):
        meta = {
            "verification_type": "execute_and_assert",
            "test_list": [
                "assert add(1, 2) == 3",
                "assert add(-1, 1) == 0",
                "assert add(0, 0) == 0",
            ],
        }
        code = "def add(a, b):\n    return a + b\n"
        assert verify_code_answer(code, meta) is True

    def test_buggy_solution_fails(self):
        meta = {
            "verification_type": "execute_and_assert",
            "test_list": ["assert add(1, 2) == 3"],
        }
        code = "def add(a, b):\n    return a - b\n"  # bug
        assert verify_code_answer(code, meta) is False

    def test_syntax_error_returns_false(self):
        meta = {
            "verification_type": "execute_and_assert",
            "test_list": ["assert add(1, 2) == 3"],
        }
        code = "def add(a, b:\n    return a + b"  # broken syntax
        assert verify_code_answer(code, meta) is False

    def test_runtime_error_returns_false(self):
        meta = {
            "verification_type": "execute_and_assert",
            "test_list": ["assert boom() == 1"],
        }
        code = "def boom():\n    raise RuntimeError('nope')\n"
        assert verify_code_answer(code, meta) is False

    def test_infinite_loop_times_out(self):
        meta = {
            "verification_type": "execute_and_assert",
            "test_list": ["assert spin() == 1"],
        }
        code = "def spin():\n    while True:\n        pass\n"
        start = time.monotonic()
        result = verify_code_answer(code, meta, timeout_seconds=2)
        elapsed = time.monotonic() - start
        assert result is False
        # Must return promptly — the test itself must not hang.
        assert elapsed < 6, f"verifier hung for {elapsed:.1f}s"

    def test_missing_test_list_returns_false(self):
        meta = {"verification_type": "execute_and_assert", "test_list": []}
        code = "def add(a, b):\n    return a + b\n"
        assert verify_code_answer(code, meta) is False

    def test_test_imports_are_executed(self):
        meta = {
            "verification_type": "execute_and_assert",
            "test_imports": ["import math"],
            "test_list": ["assert sqrt2() == math.sqrt(2)"],
        }
        code = "import math\ndef sqrt2():\n    return math.sqrt(2)\n"
        assert verify_code_answer(code, meta) is True


# ---------------------------------------------------------------------------
# stdin_stdout (APPS-style)
# ---------------------------------------------------------------------------


class TestStdinStdout:
    def test_echo_program_passes(self):
        meta = {
            "verification_type": "stdin_stdout",
            "inputs": ["hello\n"],
            "outputs": ["hello\n"],
        }
        code = "import sys\nprint(sys.stdin.read().strip())\n"
        assert verify_code_answer(code, meta) is True

    def test_multiple_cases_all_pass(self):
        meta = {
            "verification_type": "stdin_stdout",
            "inputs": ["3\n4\n", "10\n20\n"],
            "outputs": ["7\n", "30\n"],
        }
        code = (
            "import sys\n"
            "nums = [int(x) for x in sys.stdin.read().split()]\n"
            "print(sum(nums))\n"
        )
        assert verify_code_answer(code, meta) is True

    def test_wrong_output_fails(self):
        meta = {
            "verification_type": "stdin_stdout",
            "inputs": ["3\n4\n"],
            "outputs": ["7\n"],
        }
        code = "import sys\nprint(99)\n"
        assert verify_code_answer(code, meta) is False

    def test_normalizes_trailing_whitespace(self):
        meta = {
            "verification_type": "stdin_stdout",
            "inputs": ["1\n"],
            "outputs": ["42\n\n\n"],  # trailing blank lines should be stripped
        }
        code = "print(42)\n"
        assert verify_code_answer(code, meta) is True

    def test_empty_io_lists_fail(self):
        meta = {
            "verification_type": "stdin_stdout",
            "inputs": [],
            "outputs": [],
        }
        code = "print('anything')\n"
        assert verify_code_answer(code, meta) is False

    def test_mismatched_io_lengths_fail(self):
        meta = {
            "verification_type": "stdin_stdout",
            "inputs": ["1\n", "2\n"],
            "outputs": ["1\n"],  # length mismatch
        }
        code = "import sys\nprint(sys.stdin.read().strip())\n"
        assert verify_code_answer(code, meta) is False


# ---------------------------------------------------------------------------
# Defensive / routing behavior
# ---------------------------------------------------------------------------


class TestDefensive:
    def test_unknown_verification_type_returns_false(self):
        meta = {"verification_type": "nonsense", "test_list": []}
        assert verify_code_answer("print(1)", meta) is False

    def test_non_string_code_returns_false(self):
        meta = {
            "verification_type": "execute_and_assert",
            "test_list": ["assert True"],
        }
        assert verify_code_answer(None, meta) is False  # type: ignore[arg-type]
        assert verify_code_answer(123, meta) is False  # type: ignore[arg-type]
        assert verify_code_answer("", meta) is False

    def test_non_dict_metadata_returns_false(self):
        assert verify_code_answer("print(1)", None) is False  # type: ignore[arg-type]
        assert verify_code_answer("print(1)", "bad") is False  # type: ignore[arg-type]