Spaces:

seanpoyner
/

smolcode

Paused

App Files Files Community

smolcode / engine /sandbox.py

seanpoyner

Upload folder using huggingface_hub

daea45b verified 14 days ago

Raw

History Blame Contribute Delete

5.12 kB

	"""Execution sandbox for model-generated code.

	This is the agentic core's "hands": it runs code the model writes and reports
	back stdout/stderr/exit so the agent can iterate to green.

	SECURITY: model-generated code is untrusted. The default here is a soft
	sandbox — a subprocess with a wall-clock timeout, a scratch working directory,
	and output caps. It is adequate for local/laptop use. Before exposing a public
	HF Space, wrap `_run` with a real isolator (nsjail/firejail/bubblewrap or an
	e2b/Docker microVM); the interface below does not change.
	"""
	from __future__ import annotations

	import os
	import shutil
	import subprocess
	import tempfile
	from dataclasses import dataclass
	from pathlib import Path

	DEFAULT_TIMEOUT = 20 # seconds
	MAX_OUTPUT = 20_000 # chars per stream, to keep the LLM context bounded


	@dataclass
	class RunResult:
	ok: bool
	stdout: str
	stderr: str
	exit_code: int
	timed_out: bool = False

	def as_tool_payload(self) -> dict:
	"""Compact dict handed back to the LLM as the tool result."""
	return {
	"ok": self.ok,
	"exit_code": self.exit_code,
	"timed_out": self.timed_out,
	"stdout": _clip(self.stdout),
	"stderr": _clip(self.stderr),
	}


	def _clip(s: str, limit: int = MAX_OUTPUT) -> str:
	if len(s) <= limit:
	return s
	return s[:limit] + f"\n...[truncated {len(s) - limit} chars]"


	class Workspace:
	"""A scratch directory the agent reads/writes/executes within.

	All file tools are confined to this directory; paths are resolved and
	checked so the model cannot escape via `..` or absolute paths.
	"""

	def __init__(self, root: str \| None = None) -> None:
	self.root = Path(root) if root else Path(tempfile.mkdtemp(prefix="smallcode-"))
	self.root.mkdir(parents=True, exist_ok=True)

	# --- path safety -----------------------------------------------------
	def _resolve(self, rel: str) -> Path:
	p = (self.root / rel).resolve()
	if not str(p).startswith(str(self.root.resolve())):
	raise ValueError(f"path escapes workspace: {rel!r}")
	return p

	# --- file ops --------------------------------------------------------
	def write_file(self, path: str, content: str) -> dict:
	p = self._resolve(path)
	p.parent.mkdir(parents=True, exist_ok=True)
	p.write_text(content)
	return {"ok": True, "path": path, "bytes": len(content.encode())}

	def read_file(self, path: str) -> dict:
	p = self._resolve(path)
	if not p.exists():
	return {"ok": False, "error": "not found", "path": path}
	return {"ok": True, "path": path, "content": _clip(p.read_text())}

	def list_files(self) -> list[str]:
	return sorted(
	str(p.relative_to(self.root))
	for p in self.root.rglob("*")
	if p.is_file()
	)

	# --- execution -------------------------------------------------------
	def run_python(self, code: str \| None = None, path: str \| None = None,
	timeout: int = DEFAULT_TIMEOUT) -> RunResult:
	if path:
	target = self._resolve(path)
	argv = ["python3", str(target)]
	else:
	f = self._resolve("_snippet.py")
	f.write_text(code or "")
	argv = ["python3", str(f)]
	return self._run(argv, timeout)

	def run_tests(self, timeout: int = DEFAULT_TIMEOUT) -> RunResult:
	# pytest if available, falling back to unittest discovery.
	argv = ["python3", "-m", "pytest", "-q"]
	return self._run(argv, timeout)

	def run_shell(self, command: str, timeout: int = DEFAULT_TIMEOUT) -> RunResult:
	"""Run a shell command in the workspace (login shell for full PATH).

	Lets the router smoke-run non-Python solutions (go/rust/node/sqlite/…) the
	same way run_python checks Python. Mirrors the Rust agent's run_shell and the
	eval grader (smolcode-cli/src/eval.rs:check_cmd_ok), which also use `bash -lc`.
	"""
	return self._run(["bash", "-lc", command], timeout)

	def _run(self, argv: list[str], timeout: int) -> RunResult:
	env = {**os.environ, "PYTHONDONTWRITEBYTECODE": "1"}
	try:
	proc = subprocess.run(
	argv,
	cwd=self.root,
	env=env,
	capture_output=True,
	text=True,
	timeout=timeout,
	)
	return RunResult(
	ok=proc.returncode == 0,
	stdout=proc.stdout,
	stderr=proc.stderr,
	exit_code=proc.returncode,
	)
	except subprocess.TimeoutExpired as e:
	return RunResult(
	ok=False,
	stdout=e.stdout.decode() if isinstance(e.stdout, bytes) else (e.stdout or ""),
	stderr=f"timed out after {timeout}s",
	exit_code=124,
	timed_out=True,
	)

	def cleanup(self) -> None:
	shutil.rmtree(self.root, ignore_errors=True)