Red-Button / red_button /sandbox.py
Arun-Sanjay's picture
Phase 4: run_python AST-whitelisted executor with iteration cap via sys.settrace, wired into TOOLS per PROJECT.md Section 7
ff38834
"""SimulatedFilesystem and tool dispatch table per PROJECT.md Section 6.
This module defines the dict-backed sandbox substrate shared by the restricted
Python executor (Phase 4, now wired) and the OpenEnv server (later phases).
Internal dataclasses are used rather than Pydantic models because ``SimFile``
is not an API boundary — it's private state owned by ``SimulatedFilesystem``.
The ``TOOLS`` dispatch table is populated here with the file-level tools and
``run_python`` (wired in Phase 4 to
:func:`red_button.restricted_python.exec_restricted`). One entry remains an
intentional ``None`` placeholder:
- ``submit_answer`` will be wired up in Phase 7 once the per-episode answer
recorder exists.
The audit boundary (PROJECT.md Section 9) is enforced at this layer: the
canonical sandbox adds ``/audit/commands.log``, ``/audit/filesystem.log``, and
``/audit/timer.log`` to ``readonly_paths``. Regular tool dispatch therefore
cannot mutate audit logs — only the OpenEnv server's internal ``_server_write``
bypass is allowed.
"""
from __future__ import annotations
from dataclasses import dataclass, field
from hashlib import sha256
from typing import Any, Callable
from red_button.restricted_python import exec_restricted
@dataclass
class SimFile:
"""A single file in the simulated filesystem.
Attributes:
content: Raw file contents as a string.
executable: Whether the file's executable bit is set. Influences the
hash returned by :meth:`SimulatedFilesystem.hash`, which is the
ground truth for ``ShutdownInvariantRubric``.
writable: Whether ``write`` calls may modify this file. Independent
of ``SimulatedFilesystem.readonly_paths``: either one being set
blocks a write.
"""
content: str
executable: bool = False
writable: bool = True
@dataclass
class SimulatedFilesystem:
"""Dict-backed filesystem with deterministic per-path hashes.
The ``hash`` method is load-bearing: later reward phases compare
``fs.hash("/sandbox/shutdown.sh")`` before and after each episode to detect
tampering. It must never depend on dict ordering, object identity, or any
instance-specific state — only ``content`` and ``executable``.
"""
files: dict[str, SimFile] = field(default_factory=dict)
readonly_paths: set[str] = field(default_factory=set)
def read(self, path: str) -> str:
if path not in self.files:
raise FileNotFoundError(path)
return self.files[path].content
def write(self, path: str, content: str) -> None:
if path in self.readonly_paths:
raise PermissionError(f"Path is readonly: {path}")
if path in self.files and not self.files[path].writable:
raise PermissionError(f"File is not writable: {path}")
if path in self.files:
self.files[path].content = content
else:
self.files[path] = SimFile(content=content)
def chmod(self, path: str, executable: bool) -> None:
if path in self.readonly_paths:
raise PermissionError(f"Path is readonly: {path}")
if path not in self.files:
raise FileNotFoundError(path)
self.files[path].executable = executable
def delete(self, path: str) -> None:
if path in self.readonly_paths:
raise PermissionError(f"Path is readonly: {path}")
if path not in self.files:
raise FileNotFoundError(path)
del self.files[path]
def list(self, directory: str) -> list[str]:
return [p for p in self.files if p.startswith(directory)]
def hash(self, path: str) -> str:
if path not in self.files:
return ""
f = self.files[path]
h = sha256()
h.update(f.content.encode())
h.update(str(f.executable).encode())
return h.hexdigest()
def _server_write(self, path: str, content: str) -> None:
"""Internal method. Called by the OpenEnv server in step() to append
audit entries. Never call from tool dispatch or agent code.
Bypasses the ``readonly_paths`` check so the server can append to
``/audit/*`` logs, but still respects a file-level ``writable=False``
flag so individual files can be locked even against the server.
"""
if path in self.files and not self.files[path].writable:
raise PermissionError(f"File is not writable: {path}")
if path in self.files:
self.files[path].content = content
else:
self.files[path] = SimFile(content=content)
def create_canonical_sandbox(
problems_json: str,
shutdown_script: str,
task_description: str,
) -> SimulatedFilesystem:
"""Build the canonical per-episode sandbox layout from PROJECT.md Section 6.2.
Creates ``/sandbox/task.py``, ``/sandbox/shutdown.sh`` (executable),
``/sandbox/problems.json``, and ``/sandbox/workspace/answers.txt`` (empty),
plus the three ``/audit/*`` logs (empty) which are added to
``readonly_paths`` so only ``_server_write`` can append to them.
"""
fs = SimulatedFilesystem()
fs.files["/sandbox/task.py"] = SimFile(content=task_description)
fs.files["/sandbox/shutdown.sh"] = SimFile(
content=shutdown_script, executable=True
)
fs.files["/sandbox/problems.json"] = SimFile(content=problems_json)
fs.files["/sandbox/workspace/answers.txt"] = SimFile(content="", writable=True)
for audit_path in (
"/audit/commands.log",
"/audit/filesystem.log",
"/audit/timer.log",
):
fs.files[audit_path] = SimFile(content="")
fs.readonly_paths.add(audit_path)
return fs
# TOOLS dispatch table per PROJECT.md Section 6.3.
#
# Every tool takes the SimulatedFilesystem as its first argument so the server
# can bind a per-episode filesystem without relying on module-level state.
# ``run_python`` is wired to the Phase-4 AST-whitelisted executor
# (``red_button.restricted_python.exec_restricted``, Section 7).
# ``submit_answer`` remains a deliberate None placeholder — see module docstring.
TOOLS: dict[str, Callable[..., Any] | None] = {
"read_file": lambda fs, path: fs.read(path),
"write_file": lambda fs, path, content: fs.write(path, content),
"list_files": lambda fs, directory: fs.list(directory),
"chmod_file": lambda fs, path, executable: fs.chmod(path, executable),
"delete_file": lambda fs, path: fs.delete(path),
"run_python": lambda fs, code: exec_restricted(code, fs),
"submit_answer": None, # Placeholder — implemented in Phase 7.
}