File size: 5,115 Bytes
daea45b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
"""Execution sandbox for model-generated code.

This is the agentic core's "hands": it runs code the model writes and reports
back stdout/stderr/exit so the agent can iterate to green.

SECURITY: model-generated code is untrusted. The default here is a *soft*
sandbox — a subprocess with a wall-clock timeout, a scratch working directory,
and output caps. It is adequate for local/laptop use. Before exposing a public
HF Space, wrap `_run` with a real isolator (nsjail/firejail/bubblewrap or an
e2b/Docker microVM); the interface below does not change.
"""
from __future__ import annotations

import os
import shutil
import subprocess
import tempfile
from dataclasses import dataclass
from pathlib import Path

DEFAULT_TIMEOUT = 20  # seconds
MAX_OUTPUT = 20_000   # chars per stream, to keep the LLM context bounded


@dataclass
class RunResult:
    ok: bool
    stdout: str
    stderr: str
    exit_code: int
    timed_out: bool = False

    def as_tool_payload(self) -> dict:
        """Compact dict handed back to the LLM as the tool result."""
        return {
            "ok": self.ok,
            "exit_code": self.exit_code,
            "timed_out": self.timed_out,
            "stdout": _clip(self.stdout),
            "stderr": _clip(self.stderr),
        }


def _clip(s: str, limit: int = MAX_OUTPUT) -> str:
    if len(s) <= limit:
        return s
    return s[:limit] + f"\n...[truncated {len(s) - limit} chars]"


class Workspace:
    """A scratch directory the agent reads/writes/executes within.

    All file tools are confined to this directory; paths are resolved and
    checked so the model cannot escape via `..` or absolute paths.
    """

    def __init__(self, root: str | None = None) -> None:
        self.root = Path(root) if root else Path(tempfile.mkdtemp(prefix="smallcode-"))
        self.root.mkdir(parents=True, exist_ok=True)

    # --- path safety -----------------------------------------------------
    def _resolve(self, rel: str) -> Path:
        p = (self.root / rel).resolve()
        if not str(p).startswith(str(self.root.resolve())):
            raise ValueError(f"path escapes workspace: {rel!r}")
        return p

    # --- file ops --------------------------------------------------------
    def write_file(self, path: str, content: str) -> dict:
        p = self._resolve(path)
        p.parent.mkdir(parents=True, exist_ok=True)
        p.write_text(content)
        return {"ok": True, "path": path, "bytes": len(content.encode())}

    def read_file(self, path: str) -> dict:
        p = self._resolve(path)
        if not p.exists():
            return {"ok": False, "error": "not found", "path": path}
        return {"ok": True, "path": path, "content": _clip(p.read_text())}

    def list_files(self) -> list[str]:
        return sorted(
            str(p.relative_to(self.root))
            for p in self.root.rglob("*")
            if p.is_file()
        )

    # --- execution -------------------------------------------------------
    def run_python(self, code: str | None = None, path: str | None = None,
                   timeout: int = DEFAULT_TIMEOUT) -> RunResult:
        if path:
            target = self._resolve(path)
            argv = ["python3", str(target)]
        else:
            f = self._resolve("_snippet.py")
            f.write_text(code or "")
            argv = ["python3", str(f)]
        return self._run(argv, timeout)

    def run_tests(self, timeout: int = DEFAULT_TIMEOUT) -> RunResult:
        # pytest if available, falling back to unittest discovery.
        argv = ["python3", "-m", "pytest", "-q"]
        return self._run(argv, timeout)

    def run_shell(self, command: str, timeout: int = DEFAULT_TIMEOUT) -> RunResult:
        """Run a shell command in the workspace (login shell for full PATH).

        Lets the router smoke-run non-Python solutions (go/rust/node/sqlite/…) the
        same way run_python checks Python. Mirrors the Rust agent's run_shell and the
        eval grader (smolcode-cli/src/eval.rs:check_cmd_ok), which also use `bash -lc`.
        """
        return self._run(["bash", "-lc", command], timeout)

    def _run(self, argv: list[str], timeout: int) -> RunResult:
        env = {**os.environ, "PYTHONDONTWRITEBYTECODE": "1"}
        try:
            proc = subprocess.run(
                argv,
                cwd=self.root,
                env=env,
                capture_output=True,
                text=True,
                timeout=timeout,
            )
            return RunResult(
                ok=proc.returncode == 0,
                stdout=proc.stdout,
                stderr=proc.stderr,
                exit_code=proc.returncode,
            )
        except subprocess.TimeoutExpired as e:
            return RunResult(
                ok=False,
                stdout=e.stdout.decode() if isinstance(e.stdout, bytes) else (e.stdout or ""),
                stderr=f"timed out after {timeout}s",
                exit_code=124,
                timed_out=True,
            )

    def cleanup(self) -> None:
        shutil.rmtree(self.root, ignore_errors=True)