File size: 5,466 Bytes
3a0b5ed | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 | """
server/sandbox.py
Safe code execution sandbox using subprocess + ulimits.
Resource limits enforced:
- CPU: 8s hard limit
- RAM: 256 MB
- File handles: 20
- Subprocesses: 10 (prevents fork bombs)
- Network: PATH stripped to /usr/bin:/bin (no pip, no curl)
Note: For production training on HF Jobs, upgrade to Docker container
isolation. subprocess + ulimits is sufficient for dev and demo.
"""
import os
import subprocess
import tempfile
from dataclasses import dataclass
from typing import Dict
@dataclass
class TestResult:
passed: int
total: int
compiled: bool
summary: str
class Sandbox:
"""
Executes pytest in an isolated subprocess with strict resource limits.
"""
def __init__(self, timeout: int = 10):
self.timeout = timeout
def run_tests(self, files: Dict[str, str], test_code: str) -> TestResult:
"""
Write files + test code to a temp directory and run pytest.
Args:
files: Dict[filename → source code] — the agent's current file state.
test_code: pytest test suite source code as a string.
Returns:
TestResult with pass/fail counts and compilation flag.
"""
with tempfile.TemporaryDirectory() as tmpdir:
self._write_files(tmpdir, files, test_code)
try:
result = subprocess.run(
[
"python", "-m", "pytest",
"test_solution.py",
"--tb=short", "-q", "--no-header",
],
capture_output=True,
text=True,
timeout=self.timeout,
cwd=tmpdir,
preexec_fn=self._set_limits, # POSIX only
env={"PATH": "/usr/bin:/bin"}, # no network access
)
return self._parse_result(result.stdout, result.returncode)
except subprocess.TimeoutExpired:
return TestResult(
passed=0, total=1, compiled=False,
summary="Timeout — likely infinite loop or blocking call.",
)
except PermissionError:
# preexec_fn not available on Windows — run without ulimits (dev only)
result = subprocess.run(
["python", "-m", "pytest", "test_solution.py",
"--tb=short", "-q", "--no-header"],
capture_output=True, text=True,
timeout=self.timeout, cwd=tmpdir,
)
return self._parse_result(result.stdout, result.returncode)
except Exception as e:
return TestResult(
passed=0, total=1, compiled=False,
summary=f"Sandbox error: {e}",
)
# ------------------------------------------------------------------
# Helpers
# ------------------------------------------------------------------
def _write_files(
self,
tmpdir: str,
files: Dict[str, str],
test_code: str,
) -> None:
for filename, content in files.items():
path = os.path.join(tmpdir, filename)
os.makedirs(os.path.dirname(path), exist_ok=True)
with open(path, "w") as f:
f.write(content)
with open(os.path.join(tmpdir, "test_solution.py"), "w") as f:
f.write(test_code)
@staticmethod
def _set_limits() -> None:
"""ulimits — only runs on POSIX (Linux/Mac)."""
try:
import resource
resource.setrlimit(resource.RLIMIT_CPU, (8, 8))
resource.setrlimit(resource.RLIMIT_AS, (256 * 1024 * 1024,) * 2)
resource.setrlimit(resource.RLIMIT_NOFILE, (20, 20))
resource.setrlimit(resource.RLIMIT_NPROC, (10, 10))
except Exception:
pass # graceful degradation on Windows
@staticmethod
def _parse_result(stdout: str, returncode: int) -> TestResult:
"""
Parse pytest -q output like:
"3 passed, 1 failed in 0.42s"
"4 passed in 0.18s"
"ERROR: ..."
"""
compiled = "SyntaxError" not in stdout and "ImportError" not in stdout
passed, total = 0, 0
for line in stdout.split("\n"):
line = line.strip()
if "passed" in line or "failed" in line or "error" in line:
parts = line.split()
p, f = 0, 0
for i, part in enumerate(parts):
if part == "passed":
try:
p = int(parts[i - 1])
except (IndexError, ValueError):
pass
if part in ("failed", "error"):
try:
f = int(parts[i - 1])
except (IndexError, ValueError):
pass
total = p + f
passed = p
break
if total == 0 and returncode == 0:
# pytest found no tests — treat as 0/0
compiled = True
return TestResult(
passed=passed,
total=max(total, 1),
compiled=compiled,
summary=stdout[:500] if stdout else "No output.",
)
|