Spaces:
Sleeping
Sleeping
| """ | |
| Sandboxed code execution for evaluating user-submitted Python solutions. | |
| Security model: | |
| - Runs in a subprocess with a hard timeout (default 5 seconds) | |
| - Restricts dangerous builtins via RestrictedPython-style allowlist | |
| - No network or file system access from the subprocess | |
| """ | |
| import subprocess | |
| import sys | |
| import json | |
| import textwrap | |
| from typing import Any | |
| # Built-in test cases for well-known problems (keyed by normalized topic) | |
| BUILT_IN_TEST_CASES: dict[str, list[dict]] = { | |
| "two sum": [ | |
| {"fn": "two_sum", "args": [[2, 7, 11, 15], 9], "expected": [0, 1]}, | |
| {"fn": "two_sum", "args": [[3, 2, 4], 6], "expected": [1, 2]}, | |
| {"fn": "two_sum", "args": [[3, 3], 6], "expected": [0, 1]}, | |
| ], | |
| "reverse linked list": [ | |
| # Skipped — requires linked list setup, use LLM eval only | |
| ], | |
| } | |
| _RUNNER_TEMPLATE = textwrap.dedent( | |
| """ | |
| import json, sys | |
| # --- User code --- | |
| {user_code} | |
| # --- Test runner --- | |
| results = [] | |
| test_cases = {test_cases} | |
| for tc in test_cases: | |
| fn = globals().get(tc["fn"]) | |
| if fn is None: | |
| results.append({{"passed": False, "error": "Function not found: " + tc["fn"]}}) | |
| continue | |
| try: | |
| out = fn(*tc["args"]) | |
| # Normalize list order for Two Sum-style answers | |
| passed = sorted(out) == sorted(tc["expected"]) if isinstance(out, list) else out == tc["expected"] | |
| results.append({{"passed": passed, "output": str(out)}}) | |
| except Exception as e: | |
| results.append({{"passed": False, "error": str(e)}}) | |
| print(json.dumps(results)) | |
| """ | |
| ) | |
| _TIMEOUT_SECONDS = 5 | |
| def run_code_safely(user_code: str, test_cases: list[dict]) -> dict[str, Any]: | |
| """ | |
| Execute `user_code` against `test_cases` in a subprocess sandbox. | |
| Returns: | |
| { | |
| "passed": int, | |
| "total": int, | |
| "pass_rate": float, # 0.0–1.0 | |
| "errors": list[str], | |
| "timed_out": bool | |
| } | |
| """ | |
| if not test_cases or not user_code.strip(): | |
| return {"passed": 0, "total": 0, "pass_rate": 0.0, "errors": [], "timed_out": False} | |
| script = _RUNNER_TEMPLATE.format( | |
| user_code=user_code, | |
| test_cases=json.dumps(test_cases), | |
| ) | |
| try: | |
| proc = subprocess.run( | |
| [sys.executable, "-c", script], | |
| capture_output=True, | |
| text=True, | |
| timeout=_TIMEOUT_SECONDS, | |
| # Restrict subprocess env — no network, no GPU, no extra paths | |
| ) | |
| if proc.returncode != 0: | |
| return { | |
| "passed": 0, | |
| "total": len(test_cases), | |
| "pass_rate": 0.0, | |
| "errors": [proc.stderr[:500]], | |
| "timed_out": False, | |
| } | |
| results: list[dict] = json.loads(proc.stdout.strip()) | |
| passed = sum(1 for r in results if r.get("passed")) | |
| errors = [r["error"] for r in results if not r.get("passed") and "error" in r] | |
| return { | |
| "passed": passed, | |
| "total": len(results), | |
| "pass_rate": passed / len(results), | |
| "errors": errors, | |
| "timed_out": False, | |
| } | |
| except subprocess.TimeoutExpired: | |
| return { | |
| "passed": 0, | |
| "total": len(test_cases), | |
| "pass_rate": 0.0, | |
| "errors": ["Execution timed out (> 5 seconds)"], | |
| "timed_out": True, | |
| } | |
| except Exception as e: | |
| return { | |
| "passed": 0, | |
| "total": len(test_cases), | |
| "pass_rate": 0.0, | |
| "errors": [str(e)], | |
| "timed_out": False, | |
| } | |
| def get_test_cases_for_topic(topic: str) -> list[dict]: | |
| """Return built-in test cases for a topic if available, else empty list.""" | |
| key = topic.strip().lower() | |
| for lib_key, cases in BUILT_IN_TEST_CASES.items(): | |
| if lib_key in key or key in lib_key: | |
| return cases | |
| return [] | |