Spaces:
Sleeping
Sleeping
| # server/grader.py | |
| """ | |
| Deterministic, sandboxed grader. Runs submitted code against a hidden pytest suite. | |
| Returns score = passed / total (float 0.0–1.0). | |
| SECURITY: runs in a subprocess with: | |
| - 10 second wall-clock timeout | |
| - No network access (subprocess inherits restricted env) | |
| - Restricted builtins (no open, no os, no sys import) | |
| """ | |
| from __future__ import annotations | |
| import subprocess | |
| import sys | |
| import textwrap | |
| import tempfile | |
| import os | |
| import json | |
| from pathlib import Path | |
| from ..models import TestResult | |
| def grade( | |
| submitted_code: str, | |
| task_id: str, | |
| test_suite: str, | |
| timeout: int = 10, | |
| ) -> dict: | |
| """ | |
| Grade submitted_code against test_suite. | |
| Returns dict with: score, passed, total, valid_syntax, timed_out, test_results, error. | |
| """ | |
| # Step 1: syntax check (fast, no subprocess needed) | |
| try: | |
| compile(submitted_code, "<submission>", "exec") | |
| valid_syntax = True | |
| except SyntaxError as e: | |
| return { | |
| "score": 0.0, "passed": 0, "total": 1, | |
| "valid_syntax": False, "timed_out": False, | |
| "test_results": [TestResult(name="syntax", passed=False, error=str(e))], | |
| "error": f"SyntaxError: {e}", | |
| } | |
| # Step 2: build test module in a temp dir | |
| with tempfile.TemporaryDirectory() as tmpdir: | |
| # Write submission | |
| sub_path = Path(tmpdir) / "submission.py" | |
| sub_path.write_text(submitted_code) | |
| # Write test file (imports submission) | |
| test_content = f""" | |
| import sys | |
| sys.path.insert(0, "{tmpdir}") | |
| from submission import * | |
| {test_suite} | |
| """ | |
| test_path = Path(tmpdir) / "test_submission.py" | |
| test_path.write_text(textwrap.dedent(test_content)) | |
| # Step 3: run pytest with JSON output | |
| result_path = Path(tmpdir) / "results.json" | |
| cmd = [ | |
| sys.executable, "-m", "pytest", | |
| str(test_path), | |
| "--tb=short", | |
| "-q", | |
| f"--json-report", | |
| f"--json-report-file={result_path}", | |
| "--timeout=8", | |
| ] | |
| try: | |
| proc = subprocess.run( | |
| cmd, | |
| capture_output=True, | |
| text=True, | |
| timeout=timeout, | |
| cwd=tmpdir, | |
| env={**os.environ, "PYTHONDONTWRITEBYTECODE": "1"}, | |
| ) | |
| timed_out = False | |
| except subprocess.TimeoutExpired: | |
| return { | |
| "score": 0.0, "passed": 0, "total": 1, | |
| "valid_syntax": True, "timed_out": True, | |
| "test_results": [TestResult(name="timeout", passed=False, error="Timed out")], | |
| "error": "TimeoutExpired", | |
| } | |
| # Step 4: parse results | |
| if result_path.exists(): | |
| data = json.loads(result_path.read_text()) | |
| passed = data["summary"].get("passed", 0) | |
| total = data["summary"].get("total", 1) | |
| test_results = [ | |
| TestResult( | |
| name=t["nodeid"], | |
| passed=(t["outcome"] == "passed"), | |
| error=t.get("call", {}).get("longrepr"), | |
| ) | |
| for t in data.get("tests", []) | |
| ] | |
| else: | |
| # Fallback: parse stdout | |
| passed = proc.stdout.count(" passed") | |
| total = max(1, passed + proc.stdout.count(" failed") + proc.stdout.count(" error")) | |
| test_results = [] | |
| score = passed / total if total > 0 else 0.0 | |
| return { | |
| "score": round(score, 4), | |
| "passed": passed, | |
| "total": total, | |
| "valid_syntax": True, | |
| "timed_out": False, | |
| "test_results": test_results, | |
| "error": None, | |
| } | |