Spaces:
Paused
Paused
| """smolbuilder — a Lovable/Replit-style web-app builder on a tiny local model. | |
| Where `Router` (engine/router.py) answers one coding *task* per call with a | |
| fresh workspace, `WebBuilder` is a **stateful session**: you describe a web app, | |
| the agent builds a self-contained `index.html`, and then you keep talking to it | |
| ("make it dark mode", "add a reset button") and it edits the *same* workspace. | |
| First build uses the router's escalation idea — start small, and if the tiny | |
| model can't produce a usable app, retry on the next-bigger model — but once a | |
| tier succeeds we **lock onto that agent and its workspace** so every later turn | |
| is a cheap incremental edit rather than a from-scratch rebuild. | |
| The build is verified by rendering: did the agent leave a non-trivial HTML | |
| entrypoint behind? Static apps have no `run_python` signal, so "it produced an | |
| app you can preview" is the success criterion the UI also relies on. | |
| """ | |
| from __future__ import annotations | |
| from collections.abc import AsyncIterator | |
| from dataclasses import dataclass, field | |
| from .agent import SmallCodeAgent, Step | |
| from .config import Preset, Tier, load_preset | |
| from .live_run import LiveFrame | |
| from .preview import find_entry, inline_app, preview_iframe | |
| from .router import classify_tier | |
| from .sandbox import Workspace | |
| from .tools import build_web_registry | |
| from .trace_collector import TraceEvent | |
| from .ui_trace import merge_step_metadata | |
| from . import browsercheck | |
| BUILD_SYSTEM_PROMPT = """You are smolbuilder, a web app builder running on a small local model. | |
| You build small, self-contained web apps that run directly in a browser — like a tiny Lovable or Replit. | |
| Your workspace tools: | |
| - write_file(path, content): create or overwrite a file. | |
| - read_file(path): read a file back. | |
| - list_files(): see what already exists. | |
| - check_app(): run the current app in a headless browser — load index.html, execute its JavaScript, click every button — and report any errors. | |
| Hard rules: | |
| 1. The app's entrypoint is ALWAYS a single file named index.html, and it must start with <!doctype html><html> and include <head> and <body>. | |
| 2. Put the CSS in a <style> tag and the JavaScript in a <script> tag INSIDE index.html. Prefer one self-contained file — it must run with no build step and no server. | |
| 3. Put the <script> tag at the very END of <body>, AFTER the elements it uses (or wrap your code in window.addEventListener('DOMContentLoaded', ...)). If a script runs before its elements exist, document.getElementById returns null and every button silently breaks. | |
| 4. Every button or interactive control must have a working handler that you actually wire up. Define functions before they are referenced. | |
| 5. Vanilla HTML/CSS/JS only. Do not require a framework, npm, or a backend. You may load a library from a CDN with a full https:// URL only if it is truly needed. | |
| 6. Make it look good by default: sensible layout, spacing, a coherent color palette, readable type. Mobile-friendly. | |
| Method — follow it every time: | |
| 1. Write a complete index.html in one write_file call. | |
| 2. Call check_app() to test it. | |
| 3. If check_app reports errors, read them, fix index.html (write the FULL file again), and call check_app again. Repeat until it reports ok. | |
| 4. To CHANGE an existing app, write the FULL updated index.html (never a partial file — keep everything that already worked), then check_app again. | |
| Only finish once check_app reports the app works. Then reply with one short sentence describing what the app does. Do not paste the code in your reply. | |
| """ | |
| # Minimum entrypoint size (chars) to count as "a real app" and not a stub. | |
| _MIN_APP_CHARS = 60 | |
| class BuildResult: | |
| final: str | |
| steps: list[Step] | |
| files: dict[str, str] | |
| preview_html: str | |
| entry: str | None | |
| tier_name: str | |
| tier_model: str | |
| start_tier: str | |
| escalations: int | |
| verified: bool | |
| turn: int = 0 | |
| trace_events: list[TraceEvent] = field(default_factory=list) | |
| agent: SmallCodeAgent | None = None | |
| def app_html(self) -> str: | |
| """The self-contained document — for the 'download app' button.""" | |
| return inline_app(self.files) | |
| def _evaluate(agent: SmallCodeAgent) -> tuple[bool, str | None, dict[str, str]]: | |
| """Did the agent leave a *working* app behind? Drives the verified badge and | |
| escalation. Structural first (is there a real HTML entrypoint), then a | |
| runtime check — a broken app (JS errors) counts as a failure so the router | |
| escalates to a bigger model. An unverifiable check (no Node) doesn't fail. | |
| """ | |
| files = agent.files() | |
| entry = find_entry(files) | |
| if entry is None or len(files[entry].strip()) < _MIN_APP_CHARS: | |
| return False, entry, files | |
| if entry.lower().endswith((".html", ".htm")): | |
| ok, _errors = browsercheck.check_html(inline_app(files)) | |
| if ok is False: | |
| return False, entry, files | |
| return True, entry, files | |
| class WebBuilder: | |
| """A persistent build session. One instance per browser session (gr.State).""" | |
| def __init__(self, preset: Preset | None = None, max_steps: int = 16, | |
| preview_height: int = 540) -> None: | |
| self.preset = preset or load_preset() | |
| self.tiers: list[Tier] = self.preset.tiers | |
| self.max_steps = max_steps | |
| self.preview_height = preview_height | |
| # The workspace (the built app on disk) persists across turns; the tier | |
| # that built it is remembered so edits stay on the same model. A spent | |
| # LiteForge agent can't be re-run, so each turn gets a fresh agent over | |
| # this same workspace. | |
| self.workspace: Workspace | None = None | |
| self.tier_idx = 0 | |
| self.turn = 0 | |
| self.think = "off" | |
| self.yolo = False | |
| def has_app(self) -> bool: | |
| """True once a first build has produced a workspace to iterate on.""" | |
| return self.workspace is not None | |
| # --- public API ------------------------------------------------------ | |
| async def send(self, message: str) -> BuildResult: | |
| """Build (first turn) or edit (later turns) and return a BuildResult.""" | |
| result: BuildResult | None = None | |
| async for frame in self.send_live(message): | |
| if frame.done and isinstance(frame.result, BuildResult): | |
| result = frame.result | |
| assert result is not None | |
| return result | |
| async def send_live(self, message: str) -> AsyncIterator[LiveFrame]: | |
| """Yield live frames while building or editing.""" | |
| self.turn += 1 | |
| if self.workspace is None: | |
| async for frame in self._first_build_live(message): | |
| yield frame | |
| else: | |
| async for frame in self._iterate_live(message): | |
| yield frame | |
| def reset(self) -> None: | |
| """Drop the current app and start a fresh session.""" | |
| self.cleanup() | |
| self.workspace = None | |
| self.tier_idx = 0 | |
| self.turn = 0 | |
| def cleanup(self) -> None: | |
| if self.workspace is not None: | |
| self.workspace.cleanup() | |
| def empty_preview(self) -> str: | |
| return preview_iframe({}, height=self.preview_height) | |
| # --- internals ------------------------------------------------------- | |
| def _new_agent(self, tier: Tier, workspace: Workspace | None = None) -> SmallCodeAgent: | |
| return SmallCodeAgent( | |
| preset=self.preset, model=tier.model, max_steps=self.max_steps, | |
| system_prompt=BUILD_SYSTEM_PROMPT, registry_builder=build_web_registry, | |
| workspace=workspace, name="smolbuilder", | |
| agent="build", profile="web", | |
| ) | |
| async def _first_build_live(self, message: str) -> AsyncIterator[LiveFrame]: | |
| """Escalate the model ladder until one produces a previewable app.""" | |
| start = classify_tier(message, len(self.tiers)) | |
| task = (f"Build this web app as a self-contained index.html:\n\n{message}") | |
| escalations = 0 | |
| last: BuildResult | None = None | |
| prev_tier_name: str | None = None | |
| for idx in range(start, len(self.tiers)): | |
| tier = self.tiers[idx] | |
| if prev_tier_name is not None: | |
| yield LiveFrame(events=[ | |
| TraceEvent(kind="tier_escalation", name=tier.name, | |
| detail=f"escalated from {prev_tier_name}"), | |
| ]) | |
| agent = self._new_agent(tier) | |
| async for frame in agent.run_live_turn( | |
| task, think=self.think, yolo=self.yolo, | |
| ): | |
| if not frame.done: | |
| yield frame | |
| continue | |
| final, steps = frame.result | |
| ok, entry, files = _evaluate(agent) | |
| ok = ok and not (agent.hit_max_steps or agent.errored) | |
| last = self._result(agent, final, steps, files, entry, tier, | |
| self.tiers[start].name, escalations, ok) | |
| is_last_tier = idx == len(self.tiers) - 1 | |
| if ok or is_last_tier: | |
| self.workspace = agent.workspace | |
| self.tier_idx = idx | |
| yield LiveFrame( | |
| steps=steps, | |
| events=last.trace_events, | |
| files=last.files, | |
| done=True, | |
| result=last, | |
| ) | |
| return | |
| if idx < len(self.tiers) - 1: | |
| agent.trace_collector.record_escalation(tier.name, self.tiers[idx + 1].name) | |
| agent.cleanup() | |
| escalations += 1 | |
| prev_tier_name = tier.name | |
| if last is not None: | |
| yield LiveFrame( | |
| steps=last.steps, | |
| events=last.trace_events, | |
| files=last.files, | |
| done=True, | |
| result=last, | |
| ) | |
| async def _iterate_live(self, message: str) -> AsyncIterator[LiveFrame]: | |
| tier = self.tiers[self.tier_idx] | |
| agent = self._new_agent(tier, self.workspace) | |
| cur = self.workspace.read_file("index.html") | |
| body = cur["content"] if cur.get("ok") else "" | |
| task = ( | |
| "You are editing an existing web app. Here is the current " | |
| "index.html:\n\n```html\n" + body + "\n```\n\n" | |
| "Apply the change below, then save the COMPLETE updated file with a " | |
| "single write_file(\"index.html\", <full new contents>). Keep " | |
| "everything that already works and output the whole file, never a " | |
| "fragment.\n\nChange to make: " + message | |
| ) | |
| async for frame in agent.run_live_turn( | |
| task, think=self.think, yolo=self.yolo, | |
| ): | |
| if not frame.done: | |
| yield frame | |
| continue | |
| final, steps = frame.result | |
| ok, entry, files = _evaluate(agent) | |
| ok = ok and not (agent.hit_max_steps or agent.errored) | |
| result = self._result(agent, final, steps, files, entry, tier, tier.name, 0, ok) | |
| yield LiveFrame( | |
| steps=steps, | |
| events=result.trace_events, | |
| files=result.files, | |
| done=True, | |
| result=result, | |
| ) | |
| def _result(self, agent: SmallCodeAgent, final, steps, files, entry, tier, start_name, | |
| escalations, verified) -> BuildResult: | |
| # Small models sometimes write the file but return an empty answer; give | |
| # the chat something sensible rather than a blank bubble. | |
| if not (final or "").strip(): | |
| final = "✅ Done: check the live preview." if verified else \ | |
| "I made an attempt; have a look and tell me what to fix." | |
| events = merge_step_metadata(agent.trace_collector.snapshot(), agent.raw_history()) | |
| return BuildResult( | |
| final=final, steps=steps, files=files, | |
| preview_html=preview_iframe(files, height=self.preview_height), | |
| entry=entry, tier_name=tier.name, tier_model=tier.model, | |
| start_tier=start_name, escalations=escalations, | |
| verified=bool(verified), turn=self.turn, | |
| trace_events=events, agent=agent, | |
| ) | |