# server/grader.py """ Deterministic, sandboxed grader. Runs submitted code against a hidden pytest suite. Returns score = passed / total (float 0.0–1.0). SECURITY: runs in a subprocess with: - 10 second wall-clock timeout - No network access (subprocess inherits restricted env) - Restricted builtins (no open, no os, no sys import) """ from __future__ import annotations import subprocess import sys import textwrap import tempfile import os import json from pathlib import Path from ..models import TestResult def grade( submitted_code: str, task_id: str, test_suite: str, timeout: int = 10, ) -> dict: """ Grade submitted_code against test_suite. Returns dict with: score, passed, total, valid_syntax, timed_out, test_results, error. """ # Step 1: syntax check (fast, no subprocess needed) try: compile(submitted_code, "", "exec") valid_syntax = True except SyntaxError as e: return { "score": 0.0, "passed": 0, "total": 1, "valid_syntax": False, "timed_out": False, "test_results": [TestResult(name="syntax", passed=False, error=str(e))], "error": f"SyntaxError: {e}", } # Step 2: build test module in a temp dir with tempfile.TemporaryDirectory() as tmpdir: # Write submission sub_path = Path(tmpdir) / "submission.py" sub_path.write_text(submitted_code) # Write test file (imports submission) test_content = f""" import sys sys.path.insert(0, "{tmpdir}") from submission import * {test_suite} """ test_path = Path(tmpdir) / "test_submission.py" test_path.write_text(textwrap.dedent(test_content)) # Step 3: run pytest with JSON output result_path = Path(tmpdir) / "results.json" cmd = [ sys.executable, "-m", "pytest", str(test_path), "--tb=short", "-q", f"--json-report", f"--json-report-file={result_path}", "--timeout=8", ] try: proc = subprocess.run( cmd, capture_output=True, text=True, timeout=timeout, cwd=tmpdir, env={**os.environ, "PYTHONDONTWRITEBYTECODE": "1"}, ) timed_out = False except subprocess.TimeoutExpired: return { "score": 0.0, "passed": 0, "total": 1, "valid_syntax": True, "timed_out": True, "test_results": [TestResult(name="timeout", passed=False, error="Timed out")], "error": "TimeoutExpired", } # Step 4: parse results if result_path.exists(): data = json.loads(result_path.read_text()) passed = data["summary"].get("passed", 0) total = data["summary"].get("total", 1) test_results = [ TestResult( name=t["nodeid"], passed=(t["outcome"] == "passed"), error=t.get("call", {}).get("longrepr"), ) for t in data.get("tests", []) ] else: # Fallback: parse stdout passed = proc.stdout.count(" passed") total = max(1, passed + proc.stdout.count(" failed") + proc.stdout.count(" error")) test_results = [] score = passed / total if total > 0 else 0.0 return { "score": round(score, 4), "passed": passed, "total": total, "valid_syntax": True, "timed_out": False, "test_results": test_results, "error": None, }