Spaces:
Paused
Paused
| """Execution sandbox for model-generated code. | |
| This is the agentic core's "hands": it runs code the model writes and reports | |
| back stdout/stderr/exit so the agent can iterate to green. | |
| SECURITY: model-generated code is untrusted. The default here is a *soft* | |
| sandbox — a subprocess with a wall-clock timeout, a scratch working directory, | |
| and output caps. It is adequate for local/laptop use. Before exposing a public | |
| HF Space, wrap `_run` with a real isolator (nsjail/firejail/bubblewrap or an | |
| e2b/Docker microVM); the interface below does not change. | |
| """ | |
| from __future__ import annotations | |
| import os | |
| import shutil | |
| import subprocess | |
| import tempfile | |
| from dataclasses import dataclass | |
| from pathlib import Path | |
| DEFAULT_TIMEOUT = 20 # seconds | |
| MAX_OUTPUT = 20_000 # chars per stream, to keep the LLM context bounded | |
| class RunResult: | |
| ok: bool | |
| stdout: str | |
| stderr: str | |
| exit_code: int | |
| timed_out: bool = False | |
| def as_tool_payload(self) -> dict: | |
| """Compact dict handed back to the LLM as the tool result.""" | |
| return { | |
| "ok": self.ok, | |
| "exit_code": self.exit_code, | |
| "timed_out": self.timed_out, | |
| "stdout": _clip(self.stdout), | |
| "stderr": _clip(self.stderr), | |
| } | |
| def _clip(s: str, limit: int = MAX_OUTPUT) -> str: | |
| if len(s) <= limit: | |
| return s | |
| return s[:limit] + f"\n...[truncated {len(s) - limit} chars]" | |
| class Workspace: | |
| """A scratch directory the agent reads/writes/executes within. | |
| All file tools are confined to this directory; paths are resolved and | |
| checked so the model cannot escape via `..` or absolute paths. | |
| """ | |
| def __init__(self, root: str | None = None) -> None: | |
| self.root = Path(root) if root else Path(tempfile.mkdtemp(prefix="smallcode-")) | |
| self.root.mkdir(parents=True, exist_ok=True) | |
| # --- path safety ----------------------------------------------------- | |
| def _resolve(self, rel: str) -> Path: | |
| p = (self.root / rel).resolve() | |
| if not str(p).startswith(str(self.root.resolve())): | |
| raise ValueError(f"path escapes workspace: {rel!r}") | |
| return p | |
| # --- file ops -------------------------------------------------------- | |
| def write_file(self, path: str, content: str) -> dict: | |
| p = self._resolve(path) | |
| p.parent.mkdir(parents=True, exist_ok=True) | |
| p.write_text(content) | |
| return {"ok": True, "path": path, "bytes": len(content.encode())} | |
| def read_file(self, path: str) -> dict: | |
| p = self._resolve(path) | |
| if not p.exists(): | |
| return {"ok": False, "error": "not found", "path": path} | |
| return {"ok": True, "path": path, "content": _clip(p.read_text())} | |
| def list_files(self) -> list[str]: | |
| return sorted( | |
| str(p.relative_to(self.root)) | |
| for p in self.root.rglob("*") | |
| if p.is_file() | |
| ) | |
| # --- execution ------------------------------------------------------- | |
| def run_python(self, code: str | None = None, path: str | None = None, | |
| timeout: int = DEFAULT_TIMEOUT) -> RunResult: | |
| if path: | |
| target = self._resolve(path) | |
| argv = ["python3", str(target)] | |
| else: | |
| f = self._resolve("_snippet.py") | |
| f.write_text(code or "") | |
| argv = ["python3", str(f)] | |
| return self._run(argv, timeout) | |
| def run_tests(self, timeout: int = DEFAULT_TIMEOUT) -> RunResult: | |
| # pytest if available, falling back to unittest discovery. | |
| argv = ["python3", "-m", "pytest", "-q"] | |
| return self._run(argv, timeout) | |
| def run_shell(self, command: str, timeout: int = DEFAULT_TIMEOUT) -> RunResult: | |
| """Run a shell command in the workspace (login shell for full PATH). | |
| Lets the router smoke-run non-Python solutions (go/rust/node/sqlite/…) the | |
| same way run_python checks Python. Mirrors the Rust agent's run_shell and the | |
| eval grader (smolcode-cli/src/eval.rs:check_cmd_ok), which also use `bash -lc`. | |
| """ | |
| return self._run(["bash", "-lc", command], timeout) | |
| def _run(self, argv: list[str], timeout: int) -> RunResult: | |
| env = {**os.environ, "PYTHONDONTWRITEBYTECODE": "1"} | |
| try: | |
| proc = subprocess.run( | |
| argv, | |
| cwd=self.root, | |
| env=env, | |
| capture_output=True, | |
| text=True, | |
| timeout=timeout, | |
| ) | |
| return RunResult( | |
| ok=proc.returncode == 0, | |
| stdout=proc.stdout, | |
| stderr=proc.stderr, | |
| exit_code=proc.returncode, | |
| ) | |
| except subprocess.TimeoutExpired as e: | |
| return RunResult( | |
| ok=False, | |
| stdout=e.stdout.decode() if isinstance(e.stdout, bytes) else (e.stdout or ""), | |
| stderr=f"timed out after {timeout}s", | |
| exit_code=124, | |
| timed_out=True, | |
| ) | |
| def cleanup(self) -> None: | |
| shutil.rmtree(self.root, ignore_errors=True) | |