Spaces:
Paused
Paused
File size: 12,275 Bytes
daea45b | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 | """smolbuilder β a Lovable/Replit-style web-app builder on a tiny local model.
Where `Router` (engine/router.py) answers one coding *task* per call with a
fresh workspace, `WebBuilder` is a **stateful session**: you describe a web app,
the agent builds a self-contained `index.html`, and then you keep talking to it
("make it dark mode", "add a reset button") and it edits the *same* workspace.
First build uses the router's escalation idea β start small, and if the tiny
model can't produce a usable app, retry on the next-bigger model β but once a
tier succeeds we **lock onto that agent and its workspace** so every later turn
is a cheap incremental edit rather than a from-scratch rebuild.
The build is verified by rendering: did the agent leave a non-trivial HTML
entrypoint behind? Static apps have no `run_python` signal, so "it produced an
app you can preview" is the success criterion the UI also relies on.
"""
from __future__ import annotations
from collections.abc import AsyncIterator
from dataclasses import dataclass, field
from .agent import SmallCodeAgent, Step
from .config import Preset, Tier, load_preset
from .live_run import LiveFrame
from .preview import find_entry, inline_app, preview_iframe
from .router import classify_tier
from .sandbox import Workspace
from .tools import build_web_registry
from .trace_collector import TraceEvent
from .ui_trace import merge_step_metadata
from . import browsercheck
BUILD_SYSTEM_PROMPT = """You are smolbuilder, a web app builder running on a small local model.
You build small, self-contained web apps that run directly in a browser β like a tiny Lovable or Replit.
Your workspace tools:
- write_file(path, content): create or overwrite a file.
- read_file(path): read a file back.
- list_files(): see what already exists.
- check_app(): run the current app in a headless browser β load index.html, execute its JavaScript, click every button β and report any errors.
Hard rules:
1. The app's entrypoint is ALWAYS a single file named index.html, and it must start with <!doctype html><html> and include <head> and <body>.
2. Put the CSS in a <style> tag and the JavaScript in a <script> tag INSIDE index.html. Prefer one self-contained file β it must run with no build step and no server.
3. Put the <script> tag at the very END of <body>, AFTER the elements it uses (or wrap your code in window.addEventListener('DOMContentLoaded', ...)). If a script runs before its elements exist, document.getElementById returns null and every button silently breaks.
4. Every button or interactive control must have a working handler that you actually wire up. Define functions before they are referenced.
5. Vanilla HTML/CSS/JS only. Do not require a framework, npm, or a backend. You may load a library from a CDN with a full https:// URL only if it is truly needed.
6. Make it look good by default: sensible layout, spacing, a coherent color palette, readable type. Mobile-friendly.
Method β follow it every time:
1. Write a complete index.html in one write_file call.
2. Call check_app() to test it.
3. If check_app reports errors, read them, fix index.html (write the FULL file again), and call check_app again. Repeat until it reports ok.
4. To CHANGE an existing app, write the FULL updated index.html (never a partial file β keep everything that already worked), then check_app again.
Only finish once check_app reports the app works. Then reply with one short sentence describing what the app does. Do not paste the code in your reply.
"""
# Minimum entrypoint size (chars) to count as "a real app" and not a stub.
_MIN_APP_CHARS = 60
@dataclass
class BuildResult:
final: str
steps: list[Step]
files: dict[str, str]
preview_html: str
entry: str | None
tier_name: str
tier_model: str
start_tier: str
escalations: int
verified: bool
turn: int = 0
trace_events: list[TraceEvent] = field(default_factory=list)
agent: SmallCodeAgent | None = None
@property
def app_html(self) -> str:
"""The self-contained document β for the 'download app' button."""
return inline_app(self.files)
def _evaluate(agent: SmallCodeAgent) -> tuple[bool, str | None, dict[str, str]]:
"""Did the agent leave a *working* app behind? Drives the verified badge and
escalation. Structural first (is there a real HTML entrypoint), then a
runtime check β a broken app (JS errors) counts as a failure so the router
escalates to a bigger model. An unverifiable check (no Node) doesn't fail.
"""
files = agent.files()
entry = find_entry(files)
if entry is None or len(files[entry].strip()) < _MIN_APP_CHARS:
return False, entry, files
if entry.lower().endswith((".html", ".htm")):
ok, _errors = browsercheck.check_html(inline_app(files))
if ok is False:
return False, entry, files
return True, entry, files
class WebBuilder:
"""A persistent build session. One instance per browser session (gr.State)."""
def __init__(self, preset: Preset | None = None, max_steps: int = 16,
preview_height: int = 540) -> None:
self.preset = preset or load_preset()
self.tiers: list[Tier] = self.preset.tiers
self.max_steps = max_steps
self.preview_height = preview_height
# The workspace (the built app on disk) persists across turns; the tier
# that built it is remembered so edits stay on the same model. A spent
# LiteForge agent can't be re-run, so each turn gets a fresh agent over
# this same workspace.
self.workspace: Workspace | None = None
self.tier_idx = 0
self.turn = 0
self.think = "off"
self.yolo = False
@property
def has_app(self) -> bool:
"""True once a first build has produced a workspace to iterate on."""
return self.workspace is not None
# --- public API ------------------------------------------------------
async def send(self, message: str) -> BuildResult:
"""Build (first turn) or edit (later turns) and return a BuildResult."""
result: BuildResult | None = None
async for frame in self.send_live(message):
if frame.done and isinstance(frame.result, BuildResult):
result = frame.result
assert result is not None
return result
async def send_live(self, message: str) -> AsyncIterator[LiveFrame]:
"""Yield live frames while building or editing."""
self.turn += 1
if self.workspace is None:
async for frame in self._first_build_live(message):
yield frame
else:
async for frame in self._iterate_live(message):
yield frame
def reset(self) -> None:
"""Drop the current app and start a fresh session."""
self.cleanup()
self.workspace = None
self.tier_idx = 0
self.turn = 0
def cleanup(self) -> None:
if self.workspace is not None:
self.workspace.cleanup()
def empty_preview(self) -> str:
return preview_iframe({}, height=self.preview_height)
# --- internals -------------------------------------------------------
def _new_agent(self, tier: Tier, workspace: Workspace | None = None) -> SmallCodeAgent:
return SmallCodeAgent(
preset=self.preset, model=tier.model, max_steps=self.max_steps,
system_prompt=BUILD_SYSTEM_PROMPT, registry_builder=build_web_registry,
workspace=workspace, name="smolbuilder",
agent="build", profile="web",
)
async def _first_build_live(self, message: str) -> AsyncIterator[LiveFrame]:
"""Escalate the model ladder until one produces a previewable app."""
start = classify_tier(message, len(self.tiers))
task = (f"Build this web app as a self-contained index.html:\n\n{message}")
escalations = 0
last: BuildResult | None = None
prev_tier_name: str | None = None
for idx in range(start, len(self.tiers)):
tier = self.tiers[idx]
if prev_tier_name is not None:
yield LiveFrame(events=[
TraceEvent(kind="tier_escalation", name=tier.name,
detail=f"escalated from {prev_tier_name}"),
])
agent = self._new_agent(tier)
async for frame in agent.run_live_turn(
task, think=self.think, yolo=self.yolo,
):
if not frame.done:
yield frame
continue
final, steps = frame.result
ok, entry, files = _evaluate(agent)
ok = ok and not (agent.hit_max_steps or agent.errored)
last = self._result(agent, final, steps, files, entry, tier,
self.tiers[start].name, escalations, ok)
is_last_tier = idx == len(self.tiers) - 1
if ok or is_last_tier:
self.workspace = agent.workspace
self.tier_idx = idx
yield LiveFrame(
steps=steps,
events=last.trace_events,
files=last.files,
done=True,
result=last,
)
return
if idx < len(self.tiers) - 1:
agent.trace_collector.record_escalation(tier.name, self.tiers[idx + 1].name)
agent.cleanup()
escalations += 1
prev_tier_name = tier.name
if last is not None:
yield LiveFrame(
steps=last.steps,
events=last.trace_events,
files=last.files,
done=True,
result=last,
)
async def _iterate_live(self, message: str) -> AsyncIterator[LiveFrame]:
tier = self.tiers[self.tier_idx]
agent = self._new_agent(tier, self.workspace)
cur = self.workspace.read_file("index.html")
body = cur["content"] if cur.get("ok") else ""
task = (
"You are editing an existing web app. Here is the current "
"index.html:\n\n```html\n" + body + "\n```\n\n"
"Apply the change below, then save the COMPLETE updated file with a "
"single write_file(\"index.html\", <full new contents>). Keep "
"everything that already works and output the whole file, never a "
"fragment.\n\nChange to make: " + message
)
async for frame in agent.run_live_turn(
task, think=self.think, yolo=self.yolo,
):
if not frame.done:
yield frame
continue
final, steps = frame.result
ok, entry, files = _evaluate(agent)
ok = ok and not (agent.hit_max_steps or agent.errored)
result = self._result(agent, final, steps, files, entry, tier, tier.name, 0, ok)
yield LiveFrame(
steps=steps,
events=result.trace_events,
files=result.files,
done=True,
result=result,
)
def _result(self, agent: SmallCodeAgent, final, steps, files, entry, tier, start_name,
escalations, verified) -> BuildResult:
# Small models sometimes write the file but return an empty answer; give
# the chat something sensible rather than a blank bubble.
if not (final or "").strip():
final = "β
Done: check the live preview." if verified else \
"I made an attempt; have a look and tell me what to fix."
events = merge_step_metadata(agent.trace_collector.snapshot(), agent.raw_history())
return BuildResult(
final=final, steps=steps, files=files,
preview_html=preview_iframe(files, height=self.preview_height),
entry=entry, tier_name=tier.name, tier_model=tier.model,
start_tier=start_name, escalations=escalations,
verified=bool(verified), turn=self.turn,
trace_events=events, agent=agent,
)
|