Spaces:

uncertainrods
/

AlgoSensei

Sleeping

App Files Files Community

AlgoSensei / agent /sandbox.py

uncertainrods

init_code

e266561 23 days ago

raw

history blame contribute delete

4.01 kB

	"""
	Sandboxed code execution for evaluating user-submitted Python solutions.

	Security model:
	- Runs in a subprocess with a hard timeout (default 5 seconds)
	- Restricts dangerous builtins via RestrictedPython-style allowlist
	- No network or file system access from the subprocess
	"""

	import subprocess
	import sys
	import json
	import textwrap
	from typing import Any


	# Built-in test cases for well-known problems (keyed by normalized topic)
	BUILT_IN_TEST_CASES: dict[str, list[dict]] = {
	"two sum": [
	{"fn": "two_sum", "args": [[2, 7, 11, 15], 9], "expected": [0, 1]},
	{"fn": "two_sum", "args": [[3, 2, 4], 6], "expected": [1, 2]},
	{"fn": "two_sum", "args": [[3, 3], 6], "expected": [0, 1]},
	],
	"reverse linked list": [
	# Skipped — requires linked list setup, use LLM eval only
	],
	}

	_RUNNER_TEMPLATE = textwrap.dedent(
	"""
	import json, sys

	# --- User code ---
	{user_code}

	# --- Test runner ---
	results = []
	test_cases = {test_cases}
	for tc in test_cases:
	fn = globals().get(tc["fn"])
	if fn is None:
	results.append({{"passed": False, "error": "Function not found: " + tc["fn"]}})
	continue
	try:
	out = fn(*tc["args"])
	# Normalize list order for Two Sum-style answers
	passed = sorted(out) == sorted(tc["expected"]) if isinstance(out, list) else out == tc["expected"]
	results.append({{"passed": passed, "output": str(out)}})
	except Exception as e:
	results.append({{"passed": False, "error": str(e)}})

	print(json.dumps(results))
	"""
	)

	_TIMEOUT_SECONDS = 5


	def run_code_safely(user_code: str, test_cases: list[dict]) -> dict[str, Any]:
	"""
	Execute `user_code` against `test_cases` in a subprocess sandbox.

	Returns:
	{
	"passed": int,
	"total": int,
	"pass_rate": float, # 0.0–1.0
	"errors": list[str],
	"timed_out": bool
	}
	"""
	if not test_cases or not user_code.strip():
	return {"passed": 0, "total": 0, "pass_rate": 0.0, "errors": [], "timed_out": False}

	script = _RUNNER_TEMPLATE.format(
	user_code=user_code,
	test_cases=json.dumps(test_cases),
	)

	try:
	proc = subprocess.run(
	[sys.executable, "-c", script],
	capture_output=True,
	text=True,
	timeout=_TIMEOUT_SECONDS,
	# Restrict subprocess env — no network, no GPU, no extra paths
	)
	if proc.returncode != 0:
	return {
	"passed": 0,
	"total": len(test_cases),
	"pass_rate": 0.0,
	"errors": [proc.stderr[:500]],
	"timed_out": False,
	}

	results: list[dict] = json.loads(proc.stdout.strip())
	passed = sum(1 for r in results if r.get("passed"))
	errors = [r["error"] for r in results if not r.get("passed") and "error" in r]
	return {
	"passed": passed,
	"total": len(results),
	"pass_rate": passed / len(results),
	"errors": errors,
	"timed_out": False,
	}

	except subprocess.TimeoutExpired:
	return {
	"passed": 0,
	"total": len(test_cases),
	"pass_rate": 0.0,
	"errors": ["Execution timed out (> 5 seconds)"],
	"timed_out": True,
	}
	except Exception as e:
	return {
	"passed": 0,
	"total": len(test_cases),
	"pass_rate": 0.0,
	"errors": [str(e)],
	"timed_out": False,
	}


	def get_test_cases_for_topic(topic: str) -> list[dict]:
	"""Return built-in test cases for a topic if available, else empty list."""
	key = topic.strip().lower()
	for lib_key, cases in BUILT_IN_TEST_CASES.items():
	if lib_key in key or key in lib_key:
	return cases
	return []