"""Execution sandbox for model-generated code. This is the agentic core's "hands": it runs code the model writes and reports back stdout/stderr/exit so the agent can iterate to green. SECURITY: model-generated code is untrusted. The default here is a *soft* sandbox — a subprocess with a wall-clock timeout, a scratch working directory, and output caps. It is adequate for local/laptop use. Before exposing a public HF Space, wrap `_run` with a real isolator (nsjail/firejail/bubblewrap or an e2b/Docker microVM); the interface below does not change. """ from __future__ import annotations import os import shutil import subprocess import tempfile from dataclasses import dataclass from pathlib import Path DEFAULT_TIMEOUT = 20 # seconds MAX_OUTPUT = 20_000 # chars per stream, to keep the LLM context bounded @dataclass class RunResult: ok: bool stdout: str stderr: str exit_code: int timed_out: bool = False def as_tool_payload(self) -> dict: """Compact dict handed back to the LLM as the tool result.""" return { "ok": self.ok, "exit_code": self.exit_code, "timed_out": self.timed_out, "stdout": _clip(self.stdout), "stderr": _clip(self.stderr), } def _clip(s: str, limit: int = MAX_OUTPUT) -> str: if len(s) <= limit: return s return s[:limit] + f"\n...[truncated {len(s) - limit} chars]" class Workspace: """A scratch directory the agent reads/writes/executes within. All file tools are confined to this directory; paths are resolved and checked so the model cannot escape via `..` or absolute paths. """ def __init__(self, root: str | None = None) -> None: self.root = Path(root) if root else Path(tempfile.mkdtemp(prefix="smallcode-")) self.root.mkdir(parents=True, exist_ok=True) # --- path safety ----------------------------------------------------- def _resolve(self, rel: str) -> Path: p = (self.root / rel).resolve() if not str(p).startswith(str(self.root.resolve())): raise ValueError(f"path escapes workspace: {rel!r}") return p # --- file ops -------------------------------------------------------- def write_file(self, path: str, content: str) -> dict: p = self._resolve(path) p.parent.mkdir(parents=True, exist_ok=True) p.write_text(content) return {"ok": True, "path": path, "bytes": len(content.encode())} def read_file(self, path: str) -> dict: p = self._resolve(path) if not p.exists(): return {"ok": False, "error": "not found", "path": path} return {"ok": True, "path": path, "content": _clip(p.read_text())} def list_files(self) -> list[str]: return sorted( str(p.relative_to(self.root)) for p in self.root.rglob("*") if p.is_file() ) # --- execution ------------------------------------------------------- def run_python(self, code: str | None = None, path: str | None = None, timeout: int = DEFAULT_TIMEOUT) -> RunResult: if path: target = self._resolve(path) argv = ["python3", str(target)] else: f = self._resolve("_snippet.py") f.write_text(code or "") argv = ["python3", str(f)] return self._run(argv, timeout) def run_tests(self, timeout: int = DEFAULT_TIMEOUT) -> RunResult: # pytest if available, falling back to unittest discovery. argv = ["python3", "-m", "pytest", "-q"] return self._run(argv, timeout) def run_shell(self, command: str, timeout: int = DEFAULT_TIMEOUT) -> RunResult: """Run a shell command in the workspace (login shell for full PATH). Lets the router smoke-run non-Python solutions (go/rust/node/sqlite/…) the same way run_python checks Python. Mirrors the Rust agent's run_shell and the eval grader (smolcode-cli/src/eval.rs:check_cmd_ok), which also use `bash -lc`. """ return self._run(["bash", "-lc", command], timeout) def _run(self, argv: list[str], timeout: int) -> RunResult: env = {**os.environ, "PYTHONDONTWRITEBYTECODE": "1"} try: proc = subprocess.run( argv, cwd=self.root, env=env, capture_output=True, text=True, timeout=timeout, ) return RunResult( ok=proc.returncode == 0, stdout=proc.stdout, stderr=proc.stderr, exit_code=proc.returncode, ) except subprocess.TimeoutExpired as e: return RunResult( ok=False, stdout=e.stdout.decode() if isinstance(e.stdout, bytes) else (e.stdout or ""), stderr=f"timed out after {timeout}s", exit_code=124, timed_out=True, ) def cleanup(self) -> None: shutil.rmtree(self.root, ignore_errors=True)