code-gen-assistant / src /eval /sandbox.py
Rushabh147's picture
Initial deploy to HF Spaces (clean history, LFS for all binaries)
b89e6d6
Raw
History Blame Contribute Delete
1.63 kB
"""Run untrusted generated code in a subprocess with a timeout.
NOTE: this is isolation-by-subprocess + timeout, NOT a real security sandbox.
It protects against hangs and lets us capture tracebacks for the repair loop and
pass@k scoring. For production / public deployment, run inside a container with
no network and dropped privileges (see Dockerfile + README security note).
"""
from __future__ import annotations
import subprocess
import sys
import tempfile
from dataclasses import dataclass
from pathlib import Path
@dataclass
class ExecResult:
ok: bool
stdout: str
stderr: str
timed_out: bool = False
@property
def error(self) -> str:
"""Short error summary for feeding back into a repair prompt."""
if self.timed_out:
return "Execution timed out."
return self.stderr.strip().splitlines()[-1] if self.stderr.strip() else ""
def run_code(program: str, timeout: float = 8.0) -> ExecResult:
"""Execute a full Python program string; return pass/fail + captured output."""
with tempfile.TemporaryDirectory() as d:
path = Path(d) / "candidate.py"
path.write_text(program)
try:
proc = subprocess.run(
[sys.executable, str(path)],
capture_output=True, text=True, timeout=timeout,
)
return ExecResult(ok=proc.returncode == 0, stdout=proc.stdout,
stderr=proc.stderr)
except subprocess.TimeoutExpired as e:
return ExecResult(ok=False, stdout=e.stdout or "", stderr="Timeout",
timed_out=True)