smolcode / engine /sandbox.py
seanpoyner's picture
Upload folder using huggingface_hub
daea45b verified
Raw
History Blame Contribute Delete
5.12 kB
"""Execution sandbox for model-generated code.
This is the agentic core's "hands": it runs code the model writes and reports
back stdout/stderr/exit so the agent can iterate to green.
SECURITY: model-generated code is untrusted. The default here is a *soft*
sandbox — a subprocess with a wall-clock timeout, a scratch working directory,
and output caps. It is adequate for local/laptop use. Before exposing a public
HF Space, wrap `_run` with a real isolator (nsjail/firejail/bubblewrap or an
e2b/Docker microVM); the interface below does not change.
"""
from __future__ import annotations
import os
import shutil
import subprocess
import tempfile
from dataclasses import dataclass
from pathlib import Path
DEFAULT_TIMEOUT = 20 # seconds
MAX_OUTPUT = 20_000 # chars per stream, to keep the LLM context bounded
@dataclass
class RunResult:
ok: bool
stdout: str
stderr: str
exit_code: int
timed_out: bool = False
def as_tool_payload(self) -> dict:
"""Compact dict handed back to the LLM as the tool result."""
return {
"ok": self.ok,
"exit_code": self.exit_code,
"timed_out": self.timed_out,
"stdout": _clip(self.stdout),
"stderr": _clip(self.stderr),
}
def _clip(s: str, limit: int = MAX_OUTPUT) -> str:
if len(s) <= limit:
return s
return s[:limit] + f"\n...[truncated {len(s) - limit} chars]"
class Workspace:
"""A scratch directory the agent reads/writes/executes within.
All file tools are confined to this directory; paths are resolved and
checked so the model cannot escape via `..` or absolute paths.
"""
def __init__(self, root: str | None = None) -> None:
self.root = Path(root) if root else Path(tempfile.mkdtemp(prefix="smallcode-"))
self.root.mkdir(parents=True, exist_ok=True)
# --- path safety -----------------------------------------------------
def _resolve(self, rel: str) -> Path:
p = (self.root / rel).resolve()
if not str(p).startswith(str(self.root.resolve())):
raise ValueError(f"path escapes workspace: {rel!r}")
return p
# --- file ops --------------------------------------------------------
def write_file(self, path: str, content: str) -> dict:
p = self._resolve(path)
p.parent.mkdir(parents=True, exist_ok=True)
p.write_text(content)
return {"ok": True, "path": path, "bytes": len(content.encode())}
def read_file(self, path: str) -> dict:
p = self._resolve(path)
if not p.exists():
return {"ok": False, "error": "not found", "path": path}
return {"ok": True, "path": path, "content": _clip(p.read_text())}
def list_files(self) -> list[str]:
return sorted(
str(p.relative_to(self.root))
for p in self.root.rglob("*")
if p.is_file()
)
# --- execution -------------------------------------------------------
def run_python(self, code: str | None = None, path: str | None = None,
timeout: int = DEFAULT_TIMEOUT) -> RunResult:
if path:
target = self._resolve(path)
argv = ["python3", str(target)]
else:
f = self._resolve("_snippet.py")
f.write_text(code or "")
argv = ["python3", str(f)]
return self._run(argv, timeout)
def run_tests(self, timeout: int = DEFAULT_TIMEOUT) -> RunResult:
# pytest if available, falling back to unittest discovery.
argv = ["python3", "-m", "pytest", "-q"]
return self._run(argv, timeout)
def run_shell(self, command: str, timeout: int = DEFAULT_TIMEOUT) -> RunResult:
"""Run a shell command in the workspace (login shell for full PATH).
Lets the router smoke-run non-Python solutions (go/rust/node/sqlite/…) the
same way run_python checks Python. Mirrors the Rust agent's run_shell and the
eval grader (smolcode-cli/src/eval.rs:check_cmd_ok), which also use `bash -lc`.
"""
return self._run(["bash", "-lc", command], timeout)
def _run(self, argv: list[str], timeout: int) -> RunResult:
env = {**os.environ, "PYTHONDONTWRITEBYTECODE": "1"}
try:
proc = subprocess.run(
argv,
cwd=self.root,
env=env,
capture_output=True,
text=True,
timeout=timeout,
)
return RunResult(
ok=proc.returncode == 0,
stdout=proc.stdout,
stderr=proc.stderr,
exit_code=proc.returncode,
)
except subprocess.TimeoutExpired as e:
return RunResult(
ok=False,
stdout=e.stdout.decode() if isinstance(e.stdout, bytes) else (e.stdout or ""),
stderr=f"timed out after {timeout}s",
exit_code=124,
timed_out=True,
)
def cleanup(self) -> None:
shutil.rmtree(self.root, ignore_errors=True)