Spaces:
Paused
Paused
File size: 6,662 Bytes
daea45b | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 | """Coding tools exposed to the LiteForge agent.
Each tool is a Python callable registered via `liteforge.create_tool`. The agent
(running in Rust) decides when to call them; LiteForge invokes the callable with
a single `dict` of arguments and feeds the returned JSON-able dict back to the
model. All file/exec tools are confined to one `Workspace`.
Tool surface (kept deliberately small so a 3B model can use it reliably):
write_file(path, content) -> create/overwrite a file
read_file(path) -> read a file back
list_files() -> list workspace files
run_python(path) -> execute a file, return stdout/stderr/exit
run_tests() -> run pytest in the workspace
"""
from __future__ import annotations
import liteforge as lf
from . import browsercheck
from .preview import inline_app
from .sandbox import Workspace
from .trace_collector import TraceCollector
def _wrap(name: str, fn, collector: TraceCollector | None):
if collector is None:
return fn
def wrapped(args: dict):
collector.record_tool_call(name, args)
result = fn(args)
collector.record_tool_result(name, result)
return result
return wrapped
# Tool names in the order _tools() returns them — lets a registry select a
# subset by name without relying on attributes of the opaque lf tool object.
_TOOL_ORDER = ("write_file", "read_file", "list_files", "run_python", "run_tests")
# Tools the web builder needs. Static apps are "verified" by rendering, not by
# running Python, so we drop run_python/run_tests — a smaller, less confusing
# surface for a 3B model that should be writing HTML, not spawning processes.
_WEB_TOOLS = ("write_file", "read_file", "list_files")
def _registry(workspace: Workspace, names, collector: TraceCollector | None = None) -> lf.ToolRegistry:
reg = lf.ToolRegistry()
for name, tool in zip(_TOOL_ORDER, _tools(workspace, collector)):
if name in names:
reg.register(tool)
return reg
def build_registry(workspace: Workspace, collector: TraceCollector | None = None) -> lf.ToolRegistry:
"""Return a ToolRegistry of all coding tools bound to `workspace`."""
return _registry(workspace, _TOOL_ORDER, collector)
def build_web_registry(workspace: Workspace, collector: TraceCollector | None = None) -> lf.ToolRegistry:
"""Return the smolbuilder web agent's tools: file ops + a headless app check."""
reg = _registry(workspace, _WEB_TOOLS, collector)
reg.register(_check_app_tool(workspace, collector))
return reg
def check_app_impl(ws: Workspace, collector: TraceCollector | None, args: dict) -> dict:
"""Run check_app logic (shared by LiteForge tool and Rust python callback)."""
if not any(f == "index.html" for f in ws.list_files()):
return {"ok": False,
"errors": ["index.html not found: create it first with write_file."]}
files = {}
for rel in ws.list_files():
r = ws.read_file(rel)
if r.get("ok"):
files[rel] = r["content"]
ok, errors = browsercheck.check_html(inline_app(files))
if ok is None:
return {"ok": True, "errors": [],
"note": "runtime check unavailable here; assuming ok"}
if ok:
return {"ok": True, "errors": [],
"message": "The app loads and every button works."}
return {"ok": False, "errors": errors,
"hint": "Fix these JavaScript errors in index.html, then call check_app again."}
def _check_app_tool(ws: Workspace, collector: TraceCollector | None = None):
"""A `check_app` tool: actually run the built app and report JS errors."""
def check_app(args: dict) -> dict:
return check_app_impl(ws, collector, args)
check_app = _wrap("check_app", check_app, collector)
return lf.create_tool(
"check_app",
"Run the current web app in a headless browser: load index.html, execute "
"its JavaScript, click every button, and report any errors. Use this to "
"verify the app actually works before finishing.",
{"type": "object", "properties": {}},
check_app,
)
def _tools(ws: Workspace, collector: TraceCollector | None = None) -> list:
def write_file(args: dict) -> dict:
return ws.write_file(args["path"], args.get("content", ""))
def read_file(args: dict) -> dict:
return ws.read_file(args["path"])
def list_files(args: dict) -> dict:
return {"ok": True, "files": ws.list_files()}
def run_python(args: dict) -> dict:
return ws.run_python(path=args["path"]).as_tool_payload()
def run_tests(args: dict) -> dict:
return ws.run_tests().as_tool_payload()
write_file = _wrap("write_file", write_file, collector)
read_file = _wrap("read_file", read_file, collector)
list_files = _wrap("list_files", list_files, collector)
run_python = _wrap("run_python", run_python, collector)
run_tests = _wrap("run_tests", run_tests, collector)
return [
lf.create_tool(
"write_file",
"Create or overwrite a file in the workspace with the given text content.",
{
"type": "object",
"properties": {
"path": {"type": "string", "description": "Relative path, e.g. main.py"},
"content": {"type": "string", "description": "Full file contents"},
},
"required": ["path", "content"],
},
write_file,
),
lf.create_tool(
"read_file",
"Read a file from the workspace and return its contents.",
{
"type": "object",
"properties": {"path": {"type": "string"}},
"required": ["path"],
},
read_file,
),
lf.create_tool(
"list_files",
"List all files currently in the workspace.",
{"type": "object", "properties": {}},
list_files,
),
lf.create_tool(
"run_python",
"Run a Python file in the workspace. Returns stdout, stderr and exit code.",
{
"type": "object",
"properties": {"path": {"type": "string", "description": "File to run, e.g. main.py"}},
"required": ["path"],
},
run_python,
),
lf.create_tool(
"run_tests",
"Run the test suite (pytest) in the workspace. Returns pass/fail output.",
{"type": "object", "properties": {}},
run_tests,
),
]
|