dmaheshwar22's picture
deploy: replace template with real demo
0dd7c80 verified
"""Subprocess-based test runner — lighter sandbox for trusted eval flows.
Used when Docker isn't available (e.g. CHTC execute nodes). Runs pytest in
a subprocess with a wall-clock timeout. No namespace isolation — appropriate
only for stock-model baselines, NEVER for Week 4 GRPO where model output is
actively adversarial. For that, use SandboxVerifier (Docker) or a future
Apptainer backend.
Returns the same `RunResult` dataclass as `runner.run_code` so the Verifier
interface stays backend-agnostic.
"""
from __future__ import annotations
import os
import re
import subprocess
import sys
import tempfile
import time
from pathlib import Path
from .runner import RunResult
DEFAULT_TIMEOUT_S: float = 10.0
def run_code_subprocess(
code: str,
tests: str,
*,
timeout_s: float = DEFAULT_TIMEOUT_S,
) -> RunResult:
"""Run `tests` against `code` via subprocess pytest; return a RunResult."""
with tempfile.TemporaryDirectory(ignore_cleanup_errors=True) as workdir:
work = Path(workdir)
(work / "solution.py").write_text(code)
(work / "test_solution.py").write_text(tests)
start = time.monotonic()
try:
proc = subprocess.run(
[
sys.executable, "-m", "pytest", "-q",
"-p", "no:cacheprovider",
"test_solution.py",
],
cwd=str(work),
timeout=timeout_s,
capture_output=True,
text=True,
check=False,
env=_minimal_env(),
)
except subprocess.TimeoutExpired as exc:
return RunResult(
passed=False,
num_tests_passed=0,
num_tests_total=0,
runtime_ms=int(timeout_s * 1000),
stdout=(exc.stdout or b"").decode("utf-8", errors="replace")
if isinstance(exc.stdout, bytes) else (exc.stdout or ""),
stderr="timeout",
timed_out=True,
oom=False,
error=None,
)
except Exception as exc:
return RunResult(
passed=False, num_tests_passed=0, num_tests_total=0,
runtime_ms=0, stdout="", stderr="", timed_out=False,
oom=False, error=f"runner error: {exc}",
)
runtime_ms = int((time.monotonic() - start) * 1000)
n_passed, n_total = _parse_pytest_output(proc.stdout + proc.stderr)
passed = proc.returncode == 0 and n_total > 0 and n_passed == n_total
return RunResult(
passed=passed,
num_tests_passed=n_passed,
num_tests_total=n_total,
runtime_ms=runtime_ms,
stdout=proc.stdout,
stderr=proc.stderr,
timed_out=False,
oom=False,
error=None,
)
def _parse_pytest_output(output: str) -> tuple[int, int]:
"""Extract (num_passed, num_total) from pytest's summary output."""
n_passed = 0
n_failed = 0
n_error = 0
if m := re.search(r"(\d+) passed", output):
n_passed = int(m.group(1))
if m := re.search(r"(\d+) failed", output):
n_failed = int(m.group(1))
if m := re.search(r"(\d+) error", output):
n_error = int(m.group(1))
return n_passed, n_passed + n_failed + n_error
def _minimal_env() -> dict[str, str]:
"""Inherit only what pytest needs; strip everything else for cleanliness."""
keep = ("PATH", "PYTHONPATH", "HOME", "LANG", "LC_ALL", "USER", "LOGNAME")
out = {"PYTHONDONTWRITEBYTECODE": "1"}
for k in keep:
if k in os.environ:
out[k] = os.environ[k]
return out