File size: 3,737 Bytes
0dd7c80
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
"""Subprocess-based test runner — lighter sandbox for trusted eval flows.

Used when Docker isn't available (e.g. CHTC execute nodes). Runs pytest in
a subprocess with a wall-clock timeout. No namespace isolation — appropriate
only for stock-model baselines, NEVER for Week 4 GRPO where model output is
actively adversarial. For that, use SandboxVerifier (Docker) or a future
Apptainer backend.

Returns the same `RunResult` dataclass as `runner.run_code` so the Verifier
interface stays backend-agnostic.
"""

from __future__ import annotations

import os
import re
import subprocess
import sys
import tempfile
import time
from pathlib import Path

from .runner import RunResult

DEFAULT_TIMEOUT_S: float = 10.0


def run_code_subprocess(
    code: str,
    tests: str,
    *,
    timeout_s: float = DEFAULT_TIMEOUT_S,
) -> RunResult:
    """Run `tests` against `code` via subprocess pytest; return a RunResult."""
    with tempfile.TemporaryDirectory(ignore_cleanup_errors=True) as workdir:
        work = Path(workdir)
        (work / "solution.py").write_text(code)
        (work / "test_solution.py").write_text(tests)

        start = time.monotonic()
        try:
            proc = subprocess.run(
                [
                    sys.executable, "-m", "pytest", "-q",
                    "-p", "no:cacheprovider",
                    "test_solution.py",
                ],
                cwd=str(work),
                timeout=timeout_s,
                capture_output=True,
                text=True,
                check=False,
                env=_minimal_env(),
            )
        except subprocess.TimeoutExpired as exc:
            return RunResult(
                passed=False,
                num_tests_passed=0,
                num_tests_total=0,
                runtime_ms=int(timeout_s * 1000),
                stdout=(exc.stdout or b"").decode("utf-8", errors="replace")
                if isinstance(exc.stdout, bytes) else (exc.stdout or ""),
                stderr="timeout",
                timed_out=True,
                oom=False,
                error=None,
            )
        except Exception as exc:
            return RunResult(
                passed=False, num_tests_passed=0, num_tests_total=0,
                runtime_ms=0, stdout="", stderr="", timed_out=False,
                oom=False, error=f"runner error: {exc}",
            )

        runtime_ms = int((time.monotonic() - start) * 1000)
        n_passed, n_total = _parse_pytest_output(proc.stdout + proc.stderr)
        passed = proc.returncode == 0 and n_total > 0 and n_passed == n_total
        return RunResult(
            passed=passed,
            num_tests_passed=n_passed,
            num_tests_total=n_total,
            runtime_ms=runtime_ms,
            stdout=proc.stdout,
            stderr=proc.stderr,
            timed_out=False,
            oom=False,
            error=None,
        )


def _parse_pytest_output(output: str) -> tuple[int, int]:
    """Extract (num_passed, num_total) from pytest's summary output."""
    n_passed = 0
    n_failed = 0
    n_error = 0
    if m := re.search(r"(\d+) passed", output):
        n_passed = int(m.group(1))
    if m := re.search(r"(\d+) failed", output):
        n_failed = int(m.group(1))
    if m := re.search(r"(\d+) error", output):
        n_error = int(m.group(1))
    return n_passed, n_passed + n_failed + n_error


def _minimal_env() -> dict[str, str]:
    """Inherit only what pytest needs; strip everything else for cleanliness."""
    keep = ("PATH", "PYTHONPATH", "HOME", "LANG", "LC_ALL", "USER", "LOGNAME")
    out = {"PYTHONDONTWRITEBYTECODE": "1"}
    for k in keep:
        if k in os.environ:
            out[k] = os.environ[k]
    return out