Spaces:
Paused
Paused
File size: 5,115 Bytes
daea45b | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 | """Execution sandbox for model-generated code.
This is the agentic core's "hands": it runs code the model writes and reports
back stdout/stderr/exit so the agent can iterate to green.
SECURITY: model-generated code is untrusted. The default here is a *soft*
sandbox — a subprocess with a wall-clock timeout, a scratch working directory,
and output caps. It is adequate for local/laptop use. Before exposing a public
HF Space, wrap `_run` with a real isolator (nsjail/firejail/bubblewrap or an
e2b/Docker microVM); the interface below does not change.
"""
from __future__ import annotations
import os
import shutil
import subprocess
import tempfile
from dataclasses import dataclass
from pathlib import Path
DEFAULT_TIMEOUT = 20 # seconds
MAX_OUTPUT = 20_000 # chars per stream, to keep the LLM context bounded
@dataclass
class RunResult:
ok: bool
stdout: str
stderr: str
exit_code: int
timed_out: bool = False
def as_tool_payload(self) -> dict:
"""Compact dict handed back to the LLM as the tool result."""
return {
"ok": self.ok,
"exit_code": self.exit_code,
"timed_out": self.timed_out,
"stdout": _clip(self.stdout),
"stderr": _clip(self.stderr),
}
def _clip(s: str, limit: int = MAX_OUTPUT) -> str:
if len(s) <= limit:
return s
return s[:limit] + f"\n...[truncated {len(s) - limit} chars]"
class Workspace:
"""A scratch directory the agent reads/writes/executes within.
All file tools are confined to this directory; paths are resolved and
checked so the model cannot escape via `..` or absolute paths.
"""
def __init__(self, root: str | None = None) -> None:
self.root = Path(root) if root else Path(tempfile.mkdtemp(prefix="smallcode-"))
self.root.mkdir(parents=True, exist_ok=True)
# --- path safety -----------------------------------------------------
def _resolve(self, rel: str) -> Path:
p = (self.root / rel).resolve()
if not str(p).startswith(str(self.root.resolve())):
raise ValueError(f"path escapes workspace: {rel!r}")
return p
# --- file ops --------------------------------------------------------
def write_file(self, path: str, content: str) -> dict:
p = self._resolve(path)
p.parent.mkdir(parents=True, exist_ok=True)
p.write_text(content)
return {"ok": True, "path": path, "bytes": len(content.encode())}
def read_file(self, path: str) -> dict:
p = self._resolve(path)
if not p.exists():
return {"ok": False, "error": "not found", "path": path}
return {"ok": True, "path": path, "content": _clip(p.read_text())}
def list_files(self) -> list[str]:
return sorted(
str(p.relative_to(self.root))
for p in self.root.rglob("*")
if p.is_file()
)
# --- execution -------------------------------------------------------
def run_python(self, code: str | None = None, path: str | None = None,
timeout: int = DEFAULT_TIMEOUT) -> RunResult:
if path:
target = self._resolve(path)
argv = ["python3", str(target)]
else:
f = self._resolve("_snippet.py")
f.write_text(code or "")
argv = ["python3", str(f)]
return self._run(argv, timeout)
def run_tests(self, timeout: int = DEFAULT_TIMEOUT) -> RunResult:
# pytest if available, falling back to unittest discovery.
argv = ["python3", "-m", "pytest", "-q"]
return self._run(argv, timeout)
def run_shell(self, command: str, timeout: int = DEFAULT_TIMEOUT) -> RunResult:
"""Run a shell command in the workspace (login shell for full PATH).
Lets the router smoke-run non-Python solutions (go/rust/node/sqlite/…) the
same way run_python checks Python. Mirrors the Rust agent's run_shell and the
eval grader (smolcode-cli/src/eval.rs:check_cmd_ok), which also use `bash -lc`.
"""
return self._run(["bash", "-lc", command], timeout)
def _run(self, argv: list[str], timeout: int) -> RunResult:
env = {**os.environ, "PYTHONDONTWRITEBYTECODE": "1"}
try:
proc = subprocess.run(
argv,
cwd=self.root,
env=env,
capture_output=True,
text=True,
timeout=timeout,
)
return RunResult(
ok=proc.returncode == 0,
stdout=proc.stdout,
stderr=proc.stderr,
exit_code=proc.returncode,
)
except subprocess.TimeoutExpired as e:
return RunResult(
ok=False,
stdout=e.stdout.decode() if isinstance(e.stdout, bytes) else (e.stdout or ""),
stderr=f"timed out after {timeout}s",
exit_code=124,
timed_out=True,
)
def cleanup(self) -> None:
shutil.rmtree(self.root, ignore_errors=True)
|