smolcode / engine /builder.py
seanpoyner's picture
Upload folder using huggingface_hub
daea45b verified
Raw
History Blame Contribute Delete
12.3 kB
"""smolbuilder — a Lovable/Replit-style web-app builder on a tiny local model.
Where `Router` (engine/router.py) answers one coding *task* per call with a
fresh workspace, `WebBuilder` is a **stateful session**: you describe a web app,
the agent builds a self-contained `index.html`, and then you keep talking to it
("make it dark mode", "add a reset button") and it edits the *same* workspace.
First build uses the router's escalation idea — start small, and if the tiny
model can't produce a usable app, retry on the next-bigger model — but once a
tier succeeds we **lock onto that agent and its workspace** so every later turn
is a cheap incremental edit rather than a from-scratch rebuild.
The build is verified by rendering: did the agent leave a non-trivial HTML
entrypoint behind? Static apps have no `run_python` signal, so "it produced an
app you can preview" is the success criterion the UI also relies on.
"""
from __future__ import annotations
from collections.abc import AsyncIterator
from dataclasses import dataclass, field
from .agent import SmallCodeAgent, Step
from .config import Preset, Tier, load_preset
from .live_run import LiveFrame
from .preview import find_entry, inline_app, preview_iframe
from .router import classify_tier
from .sandbox import Workspace
from .tools import build_web_registry
from .trace_collector import TraceEvent
from .ui_trace import merge_step_metadata
from . import browsercheck
BUILD_SYSTEM_PROMPT = """You are smolbuilder, a web app builder running on a small local model.
You build small, self-contained web apps that run directly in a browser — like a tiny Lovable or Replit.
Your workspace tools:
- write_file(path, content): create or overwrite a file.
- read_file(path): read a file back.
- list_files(): see what already exists.
- check_app(): run the current app in a headless browser — load index.html, execute its JavaScript, click every button — and report any errors.
Hard rules:
1. The app's entrypoint is ALWAYS a single file named index.html, and it must start with <!doctype html><html> and include <head> and <body>.
2. Put the CSS in a <style> tag and the JavaScript in a <script> tag INSIDE index.html. Prefer one self-contained file — it must run with no build step and no server.
3. Put the <script> tag at the very END of <body>, AFTER the elements it uses (or wrap your code in window.addEventListener('DOMContentLoaded', ...)). If a script runs before its elements exist, document.getElementById returns null and every button silently breaks.
4. Every button or interactive control must have a working handler that you actually wire up. Define functions before they are referenced.
5. Vanilla HTML/CSS/JS only. Do not require a framework, npm, or a backend. You may load a library from a CDN with a full https:// URL only if it is truly needed.
6. Make it look good by default: sensible layout, spacing, a coherent color palette, readable type. Mobile-friendly.
Method — follow it every time:
1. Write a complete index.html in one write_file call.
2. Call check_app() to test it.
3. If check_app reports errors, read them, fix index.html (write the FULL file again), and call check_app again. Repeat until it reports ok.
4. To CHANGE an existing app, write the FULL updated index.html (never a partial file — keep everything that already worked), then check_app again.
Only finish once check_app reports the app works. Then reply with one short sentence describing what the app does. Do not paste the code in your reply.
"""
# Minimum entrypoint size (chars) to count as "a real app" and not a stub.
_MIN_APP_CHARS = 60
@dataclass
class BuildResult:
final: str
steps: list[Step]
files: dict[str, str]
preview_html: str
entry: str | None
tier_name: str
tier_model: str
start_tier: str
escalations: int
verified: bool
turn: int = 0
trace_events: list[TraceEvent] = field(default_factory=list)
agent: SmallCodeAgent | None = None
@property
def app_html(self) -> str:
"""The self-contained document — for the 'download app' button."""
return inline_app(self.files)
def _evaluate(agent: SmallCodeAgent) -> tuple[bool, str | None, dict[str, str]]:
"""Did the agent leave a *working* app behind? Drives the verified badge and
escalation. Structural first (is there a real HTML entrypoint), then a
runtime check — a broken app (JS errors) counts as a failure so the router
escalates to a bigger model. An unverifiable check (no Node) doesn't fail.
"""
files = agent.files()
entry = find_entry(files)
if entry is None or len(files[entry].strip()) < _MIN_APP_CHARS:
return False, entry, files
if entry.lower().endswith((".html", ".htm")):
ok, _errors = browsercheck.check_html(inline_app(files))
if ok is False:
return False, entry, files
return True, entry, files
class WebBuilder:
"""A persistent build session. One instance per browser session (gr.State)."""
def __init__(self, preset: Preset | None = None, max_steps: int = 16,
preview_height: int = 540) -> None:
self.preset = preset or load_preset()
self.tiers: list[Tier] = self.preset.tiers
self.max_steps = max_steps
self.preview_height = preview_height
# The workspace (the built app on disk) persists across turns; the tier
# that built it is remembered so edits stay on the same model. A spent
# LiteForge agent can't be re-run, so each turn gets a fresh agent over
# this same workspace.
self.workspace: Workspace | None = None
self.tier_idx = 0
self.turn = 0
self.think = "off"
self.yolo = False
@property
def has_app(self) -> bool:
"""True once a first build has produced a workspace to iterate on."""
return self.workspace is not None
# --- public API ------------------------------------------------------
async def send(self, message: str) -> BuildResult:
"""Build (first turn) or edit (later turns) and return a BuildResult."""
result: BuildResult | None = None
async for frame in self.send_live(message):
if frame.done and isinstance(frame.result, BuildResult):
result = frame.result
assert result is not None
return result
async def send_live(self, message: str) -> AsyncIterator[LiveFrame]:
"""Yield live frames while building or editing."""
self.turn += 1
if self.workspace is None:
async for frame in self._first_build_live(message):
yield frame
else:
async for frame in self._iterate_live(message):
yield frame
def reset(self) -> None:
"""Drop the current app and start a fresh session."""
self.cleanup()
self.workspace = None
self.tier_idx = 0
self.turn = 0
def cleanup(self) -> None:
if self.workspace is not None:
self.workspace.cleanup()
def empty_preview(self) -> str:
return preview_iframe({}, height=self.preview_height)
# --- internals -------------------------------------------------------
def _new_agent(self, tier: Tier, workspace: Workspace | None = None) -> SmallCodeAgent:
return SmallCodeAgent(
preset=self.preset, model=tier.model, max_steps=self.max_steps,
system_prompt=BUILD_SYSTEM_PROMPT, registry_builder=build_web_registry,
workspace=workspace, name="smolbuilder",
agent="build", profile="web",
)
async def _first_build_live(self, message: str) -> AsyncIterator[LiveFrame]:
"""Escalate the model ladder until one produces a previewable app."""
start = classify_tier(message, len(self.tiers))
task = (f"Build this web app as a self-contained index.html:\n\n{message}")
escalations = 0
last: BuildResult | None = None
prev_tier_name: str | None = None
for idx in range(start, len(self.tiers)):
tier = self.tiers[idx]
if prev_tier_name is not None:
yield LiveFrame(events=[
TraceEvent(kind="tier_escalation", name=tier.name,
detail=f"escalated from {prev_tier_name}"),
])
agent = self._new_agent(tier)
async for frame in agent.run_live_turn(
task, think=self.think, yolo=self.yolo,
):
if not frame.done:
yield frame
continue
final, steps = frame.result
ok, entry, files = _evaluate(agent)
ok = ok and not (agent.hit_max_steps or agent.errored)
last = self._result(agent, final, steps, files, entry, tier,
self.tiers[start].name, escalations, ok)
is_last_tier = idx == len(self.tiers) - 1
if ok or is_last_tier:
self.workspace = agent.workspace
self.tier_idx = idx
yield LiveFrame(
steps=steps,
events=last.trace_events,
files=last.files,
done=True,
result=last,
)
return
if idx < len(self.tiers) - 1:
agent.trace_collector.record_escalation(tier.name, self.tiers[idx + 1].name)
agent.cleanup()
escalations += 1
prev_tier_name = tier.name
if last is not None:
yield LiveFrame(
steps=last.steps,
events=last.trace_events,
files=last.files,
done=True,
result=last,
)
async def _iterate_live(self, message: str) -> AsyncIterator[LiveFrame]:
tier = self.tiers[self.tier_idx]
agent = self._new_agent(tier, self.workspace)
cur = self.workspace.read_file("index.html")
body = cur["content"] if cur.get("ok") else ""
task = (
"You are editing an existing web app. Here is the current "
"index.html:\n\n```html\n" + body + "\n```\n\n"
"Apply the change below, then save the COMPLETE updated file with a "
"single write_file(\"index.html\", <full new contents>). Keep "
"everything that already works and output the whole file, never a "
"fragment.\n\nChange to make: " + message
)
async for frame in agent.run_live_turn(
task, think=self.think, yolo=self.yolo,
):
if not frame.done:
yield frame
continue
final, steps = frame.result
ok, entry, files = _evaluate(agent)
ok = ok and not (agent.hit_max_steps or agent.errored)
result = self._result(agent, final, steps, files, entry, tier, tier.name, 0, ok)
yield LiveFrame(
steps=steps,
events=result.trace_events,
files=result.files,
done=True,
result=result,
)
def _result(self, agent: SmallCodeAgent, final, steps, files, entry, tier, start_name,
escalations, verified) -> BuildResult:
# Small models sometimes write the file but return an empty answer; give
# the chat something sensible rather than a blank bubble.
if not (final or "").strip():
final = "✅ Done: check the live preview." if verified else \
"I made an attempt; have a look and tell me what to fix."
events = merge_step_metadata(agent.trace_collector.snapshot(), agent.raw_history())
return BuildResult(
final=final, steps=steps, files=files,
preview_html=preview_iframe(files, height=self.preview_height),
entry=entry, tier_name=tier.name, tier_model=tier.model,
start_tier=start_name, escalations=escalations,
verified=bool(verified), turn=self.turn,
trace_events=events, agent=agent,
)