Spaces:

seanpoyner
/

smolcode

Paused

File size: 5,115 Bytes

daea45b

"""Execution sandbox for model-generated code.

This is the agentic core's "hands": it runs code the model writes and reports
back stdout/stderr/exit so the agent can iterate to green.

SECURITY: model-generated code is untrusted. The default here is a *soft*
sandbox — a subprocess with a wall-clock timeout, a scratch working directory,
and output caps. It is adequate for local/laptop use. Before exposing a public
HF Space, wrap `_run` with a real isolator (nsjail/firejail/bubblewrap or an
e2b/Docker microVM); the interface below does not change.
"""
from __future__ import annotations

import os
import shutil
import subprocess
import tempfile
from dataclasses import dataclass
from pathlib import Path

DEFAULT_TIMEOUT = 20  # seconds
MAX_OUTPUT = 20_000   # chars per stream, to keep the LLM context bounded


@dataclass
class RunResult:
    ok: bool
    stdout: str
    stderr: str
    exit_code: int
    timed_out: bool = False

    def as_tool_payload(self) -> dict:
        """Compact dict handed back to the LLM as the tool result."""
        return {
            "ok": self.ok,
            "exit_code": self.exit_code,
            "timed_out": self.timed_out,
            "stdout": _clip(self.stdout),
            "stderr": _clip(self.stderr),
        }


def _clip(s: str, limit: int = MAX_OUTPUT) -> str:
    if len(s) <= limit:
        return s
    return s[:limit] + f"\n...[truncated {len(s) - limit} chars]"


class Workspace:
    """A scratch directory the agent reads/writes/executes within.

    All file tools are confined to this directory; paths are resolved and
    checked so the model cannot escape via `..` or absolute paths.
    """

    def __init__(self, root: str | None = None) -> None:
        self.root = Path(root) if root else Path(tempfile.mkdtemp(prefix="smallcode-"))
        self.root.mkdir(parents=True, exist_ok=True)

    # --- path safety -----------------------------------------------------
    def _resolve(self, rel: str) -> Path:
        p = (self.root / rel).resolve()
        if not str(p).startswith(str(self.root.resolve())):
            raise ValueError(f"path escapes workspace: {rel!r}")
        return p

    # --- file ops --------------------------------------------------------
    def write_file(self, path: str, content: str) -> dict:
        p = self._resolve(path)
        p.parent.mkdir(parents=True, exist_ok=True)
        p.write_text(content)
        return {"ok": True, "path": path, "bytes": len(content.encode())}

    def read_file(self, path: str) -> dict:
        p = self._resolve(path)
        if not p.exists():
            return {"ok": False, "error": "not found", "path": path}
        return {"ok": True, "path": path, "content": _clip(p.read_text())}

    def list_files(self) -> list[str]:
        return sorted(
            str(p.relative_to(self.root))
            for p in self.root.rglob("*")
            if p.is_file()
        )

    # --- execution -------------------------------------------------------
    def run_python(self, code: str | None = None, path: str | None = None,
                   timeout: int = DEFAULT_TIMEOUT) -> RunResult:
        if path:
            target = self._resolve(path)
            argv = ["python3", str(target)]
        else:
            f = self._resolve("_snippet.py")
            f.write_text(code or "")
            argv = ["python3", str(f)]
        return self._run(argv, timeout)

    def run_tests(self, timeout: int = DEFAULT_TIMEOUT) -> RunResult:
        # pytest if available, falling back to unittest discovery.
        argv = ["python3", "-m", "pytest", "-q"]
        return self._run(argv, timeout)

    def run_shell(self, command: str, timeout: int = DEFAULT_TIMEOUT) -> RunResult:
        """Run a shell command in the workspace (login shell for full PATH).

        Lets the router smoke-run non-Python solutions (go/rust/node/sqlite/…) the
        same way run_python checks Python. Mirrors the Rust agent's run_shell and the
        eval grader (smolcode-cli/src/eval.rs:check_cmd_ok), which also use `bash -lc`.
        """
        return self._run(["bash", "-lc", command], timeout)

    def _run(self, argv: list[str], timeout: int) -> RunResult:
        env = {**os.environ, "PYTHONDONTWRITEBYTECODE": "1"}
        try:
            proc = subprocess.run(
                argv,
                cwd=self.root,
                env=env,
                capture_output=True,
                text=True,
                timeout=timeout,
            )
            return RunResult(
                ok=proc.returncode == 0,
                stdout=proc.stdout,
                stderr=proc.stderr,
                exit_code=proc.returncode,
            )
        except subprocess.TimeoutExpired as e:
            return RunResult(
                ok=False,
                stdout=e.stdout.decode() if isinstance(e.stdout, bytes) else (e.stdout or ""),
                stderr=f"timed out after {timeout}s",
                exit_code=124,
                timed_out=True,
            )

    def cleanup(self) -> None:
        shutil.rmtree(self.root, ignore_errors=True)