Spaces:

luciferai-devil
/

code-debug-env

Sleeping

App Files Files Community

code-debug-env / server /grader.py

luciferai-devil

Upload folder using huggingface_hub

cacd58c verified about 2 months ago

raw

history blame contribute delete

3.75 kB

	# server/grader.py
	"""
	Deterministic, sandboxed grader. Runs submitted code against a hidden pytest suite.
	Returns score = passed / total (float 0.0–1.0).

	SECURITY: runs in a subprocess with:
	- 10 second wall-clock timeout
	- No network access (subprocess inherits restricted env)
	- Restricted builtins (no open, no os, no sys import)
	"""
	from __future__ import annotations
	import subprocess
	import sys
	import textwrap
	import tempfile
	import os
	import json
	from pathlib import Path
	from ..models import TestResult


	def grade(
	submitted_code: str,
	task_id: str,
	test_suite: str,
	timeout: int = 10,
	) -> dict:
	"""
	Grade submitted_code against test_suite.
	Returns dict with: score, passed, total, valid_syntax, timed_out, test_results, error.
	"""
	# Step 1: syntax check (fast, no subprocess needed)
	try:
	compile(submitted_code, "<submission>", "exec")
	valid_syntax = True
	except SyntaxError as e:
	return {
	"score": 0.0, "passed": 0, "total": 1,
	"valid_syntax": False, "timed_out": False,
	"test_results": [TestResult(name="syntax", passed=False, error=str(e))],
	"error": f"SyntaxError: {e}",
	}

	# Step 2: build test module in a temp dir
	with tempfile.TemporaryDirectory() as tmpdir:
	# Write submission
	sub_path = Path(tmpdir) / "submission.py"
	sub_path.write_text(submitted_code)

	# Write test file (imports submission)
	test_content = f"""
	import sys
	sys.path.insert(0, "{tmpdir}")
	from submission import *
	{test_suite}
	"""
	test_path = Path(tmpdir) / "test_submission.py"
	test_path.write_text(textwrap.dedent(test_content))

	# Step 3: run pytest with JSON output
	result_path = Path(tmpdir) / "results.json"
	cmd = [
	sys.executable, "-m", "pytest",
	str(test_path),
	"--tb=short",
	"-q",
	f"--json-report",
	f"--json-report-file={result_path}",
	"--timeout=8",
	]

	try:
	proc = subprocess.run(
	cmd,
	capture_output=True,
	text=True,
	timeout=timeout,
	cwd=tmpdir,
	env={**os.environ, "PYTHONDONTWRITEBYTECODE": "1"},
	)
	timed_out = False
	except subprocess.TimeoutExpired:
	return {
	"score": 0.0, "passed": 0, "total": 1,
	"valid_syntax": True, "timed_out": True,
	"test_results": [TestResult(name="timeout", passed=False, error="Timed out")],
	"error": "TimeoutExpired",
	}

	# Step 4: parse results
	if result_path.exists():
	data = json.loads(result_path.read_text())
	passed = data["summary"].get("passed", 0)
	total = data["summary"].get("total", 1)
	test_results = [
	TestResult(
	name=t["nodeid"],
	passed=(t["outcome"] == "passed"),
	error=t.get("call", {}).get("longrepr"),
	)
	for t in data.get("tests", [])
	]
	else:
	# Fallback: parse stdout
	passed = proc.stdout.count(" passed")
	total = max(1, passed + proc.stdout.count(" failed") + proc.stdout.count(" error"))
	test_results = []

	score = passed / total if total > 0 else 0.0
	return {
	"score": round(score, 4),
	"passed": passed,
	"total": total,
	"valid_syntax": True,
	"timed_out": False,
	"test_results": test_results,
	"error": None,
	}