File size: 3,750 Bytes
cacd58c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
# server/grader.py
"""
Deterministic, sandboxed grader. Runs submitted code against a hidden pytest suite.
Returns score = passed / total (float 0.0–1.0).

SECURITY: runs in a subprocess with:
- 10 second wall-clock timeout
- No network access (subprocess inherits restricted env)
- Restricted builtins (no open, no os, no sys import)
"""
from __future__ import annotations
import subprocess
import sys
import textwrap
import tempfile
import os
import json
from pathlib import Path
from ..models import TestResult


def grade(
    submitted_code: str,
    task_id: str,
    test_suite: str,
    timeout: int = 10,
) -> dict:
    """
    Grade submitted_code against test_suite.
    Returns dict with: score, passed, total, valid_syntax, timed_out, test_results, error.
    """
    # Step 1: syntax check (fast, no subprocess needed)
    try:
        compile(submitted_code, "<submission>", "exec")
        valid_syntax = True
    except SyntaxError as e:
        return {
            "score": 0.0, "passed": 0, "total": 1,
            "valid_syntax": False, "timed_out": False,
            "test_results": [TestResult(name="syntax", passed=False, error=str(e))],
            "error": f"SyntaxError: {e}",
        }

    # Step 2: build test module in a temp dir
    with tempfile.TemporaryDirectory() as tmpdir:
        # Write submission
        sub_path = Path(tmpdir) / "submission.py"
        sub_path.write_text(submitted_code)

        # Write test file (imports submission)
        test_content = f"""
import sys
sys.path.insert(0, "{tmpdir}")
from submission import *
{test_suite}
"""
        test_path = Path(tmpdir) / "test_submission.py"
        test_path.write_text(textwrap.dedent(test_content))

        # Step 3: run pytest with JSON output
        result_path = Path(tmpdir) / "results.json"
        cmd = [
            sys.executable, "-m", "pytest",
            str(test_path),
            "--tb=short",
            "-q",
            f"--json-report",
            f"--json-report-file={result_path}",
            "--timeout=8",
        ]

        try:
            proc = subprocess.run(
                cmd,
                capture_output=True,
                text=True,
                timeout=timeout,
                cwd=tmpdir,
                env={**os.environ, "PYTHONDONTWRITEBYTECODE": "1"},
            )
            timed_out = False
        except subprocess.TimeoutExpired:
            return {
                "score": 0.0, "passed": 0, "total": 1,
                "valid_syntax": True, "timed_out": True,
                "test_results": [TestResult(name="timeout", passed=False, error="Timed out")],
                "error": "TimeoutExpired",
            }

        # Step 4: parse results
        if result_path.exists():
            data = json.loads(result_path.read_text())
            passed = data["summary"].get("passed", 0)
            total  = data["summary"].get("total", 1)
            test_results = [
                TestResult(
                    name=t["nodeid"],
                    passed=(t["outcome"] == "passed"),
                    error=t.get("call", {}).get("longrepr"),
                )
                for t in data.get("tests", [])
            ]
        else:
            # Fallback: parse stdout
            passed = proc.stdout.count(" passed")
            total  = max(1, passed + proc.stdout.count(" failed") + proc.stdout.count(" error"))
            test_results = []

        score = passed / total if total > 0 else 0.0
        return {
            "score": round(score, 4),
            "passed": passed,
            "total": total,
            "valid_syntax": True,
            "timed_out": False,
            "test_results": test_results,
            "error": None,
        }