Spaces:

Aswini-Kumar
/

cross-session-continuity-env

Sleeping

App Files Files Community

cross-session-continuity-env / server /sandbox.py

Aswini-Kumar

upload: server/sandbox.py

3a0b5ed verified about 1 month ago

raw

history blame contribute delete

5.47 kB

	"""
	server/sandbox.py

	Safe code execution sandbox using subprocess + ulimits.

	Resource limits enforced:
	- CPU: 8s hard limit
	- RAM: 256 MB
	- File handles: 20
	- Subprocesses: 10 (prevents fork bombs)
	- Network: PATH stripped to /usr/bin:/bin (no pip, no curl)

	Note: For production training on HF Jobs, upgrade to Docker container
	isolation. subprocess + ulimits is sufficient for dev and demo.
	"""

	import os
	import subprocess
	import tempfile
	from dataclasses import dataclass
	from typing import Dict


	@dataclass
	class TestResult:
	passed: int
	total: int
	compiled: bool
	summary: str


	class Sandbox:
	"""
	Executes pytest in an isolated subprocess with strict resource limits.
	"""

	def __init__(self, timeout: int = 10):
	self.timeout = timeout

	def run_tests(self, files: Dict[str, str], test_code: str) -> TestResult:
	"""
	Write files + test code to a temp directory and run pytest.

	Args:
	files: Dict[filename → source code] — the agent's current file state.
	test_code: pytest test suite source code as a string.

	Returns:
	TestResult with pass/fail counts and compilation flag.
	"""
	with tempfile.TemporaryDirectory() as tmpdir:
	self._write_files(tmpdir, files, test_code)

	try:
	result = subprocess.run(
	[
	"python", "-m", "pytest",
	"test_solution.py",
	"--tb=short", "-q", "--no-header",
	],
	capture_output=True,
	text=True,
	timeout=self.timeout,
	cwd=tmpdir,
	preexec_fn=self._set_limits, # POSIX only
	env={"PATH": "/usr/bin:/bin"}, # no network access
	)
	return self._parse_result(result.stdout, result.returncode)

	except subprocess.TimeoutExpired:
	return TestResult(
	passed=0, total=1, compiled=False,
	summary="Timeout — likely infinite loop or blocking call.",
	)
	except PermissionError:
	# preexec_fn not available on Windows — run without ulimits (dev only)
	result = subprocess.run(
	["python", "-m", "pytest", "test_solution.py",
	"--tb=short", "-q", "--no-header"],
	capture_output=True, text=True,
	timeout=self.timeout, cwd=tmpdir,
	)
	return self._parse_result(result.stdout, result.returncode)
	except Exception as e:
	return TestResult(
	passed=0, total=1, compiled=False,
	summary=f"Sandbox error: {e}",
	)

	# ------------------------------------------------------------------
	# Helpers
	# ------------------------------------------------------------------

	def _write_files(
	self,
	tmpdir: str,
	files: Dict[str, str],
	test_code: str,
	) -> None:
	for filename, content in files.items():
	path = os.path.join(tmpdir, filename)
	os.makedirs(os.path.dirname(path), exist_ok=True)
	with open(path, "w") as f:
	f.write(content)
	with open(os.path.join(tmpdir, "test_solution.py"), "w") as f:
	f.write(test_code)

	@staticmethod
	def _set_limits() -> None:
	"""ulimits — only runs on POSIX (Linux/Mac)."""
	try:
	import resource
	resource.setrlimit(resource.RLIMIT_CPU, (8, 8))
	resource.setrlimit(resource.RLIMIT_AS, (256 * 1024 * 1024,) * 2)
	resource.setrlimit(resource.RLIMIT_NOFILE, (20, 20))
	resource.setrlimit(resource.RLIMIT_NPROC, (10, 10))
	except Exception:
	pass # graceful degradation on Windows

	@staticmethod
	def _parse_result(stdout: str, returncode: int) -> TestResult:
	"""
	Parse pytest -q output like:
	"3 passed, 1 failed in 0.42s"
	"4 passed in 0.18s"
	"ERROR: ..."
	"""
	compiled = "SyntaxError" not in stdout and "ImportError" not in stdout
	passed, total = 0, 0

	for line in stdout.split("\n"):
	line = line.strip()
	if "passed" in line or "failed" in line or "error" in line:
	parts = line.split()
	p, f = 0, 0
	for i, part in enumerate(parts):
	if part == "passed":
	try:
	p = int(parts[i - 1])
	except (IndexError, ValueError):
	pass
	if part in ("failed", "error"):
	try:
	f = int(parts[i - 1])
	except (IndexError, ValueError):
	pass
	total = p + f
	passed = p
	break

	if total == 0 and returncode == 0:
	# pytest found no tests — treat as 0/0
	compiled = True

	return TestResult(
	passed=passed,
	total=max(total, 1),
	compiled=compiled,
	summary=stdout[:500] if stdout else "No output.",
	)