Spaces:

seanpoyner
/

smolcode

Paused

File size: 12,275 Bytes

daea45b

"""smolbuilder — a Lovable/Replit-style web-app builder on a tiny local model.

Where `Router` (engine/router.py) answers one coding *task* per call with a
fresh workspace, `WebBuilder` is a **stateful session**: you describe a web app,
the agent builds a self-contained `index.html`, and then you keep talking to it
("make it dark mode", "add a reset button") and it edits the *same* workspace.

First build uses the router's escalation idea — start small, and if the tiny
model can't produce a usable app, retry on the next-bigger model — but once a
tier succeeds we **lock onto that agent and its workspace** so every later turn
is a cheap incremental edit rather than a from-scratch rebuild.

The build is verified by rendering: did the agent leave a non-trivial HTML
entrypoint behind? Static apps have no `run_python` signal, so "it produced an
app you can preview" is the success criterion the UI also relies on.
"""
from __future__ import annotations

from collections.abc import AsyncIterator
from dataclasses import dataclass, field

from .agent import SmallCodeAgent, Step
from .config import Preset, Tier, load_preset
from .live_run import LiveFrame
from .preview import find_entry, inline_app, preview_iframe
from .router import classify_tier
from .sandbox import Workspace
from .tools import build_web_registry
from .trace_collector import TraceEvent
from .ui_trace import merge_step_metadata
from . import browsercheck

BUILD_SYSTEM_PROMPT = """You are smolbuilder, a web app builder running on a small local model.

You build small, self-contained web apps that run directly in a browser — like a tiny Lovable or Replit.

Your workspace tools:
- write_file(path, content): create or overwrite a file.
- read_file(path): read a file back.
- list_files(): see what already exists.
- check_app(): run the current app in a headless browser — load index.html, execute its JavaScript, click every button — and report any errors.

Hard rules:
1. The app's entrypoint is ALWAYS a single file named index.html, and it must start with <!doctype html><html> and include <head> and <body>.
2. Put the CSS in a <style> tag and the JavaScript in a <script> tag INSIDE index.html. Prefer one self-contained file — it must run with no build step and no server.
3. Put the <script> tag at the very END of <body>, AFTER the elements it uses (or wrap your code in window.addEventListener('DOMContentLoaded', ...)). If a script runs before its elements exist, document.getElementById returns null and every button silently breaks.
4. Every button or interactive control must have a working handler that you actually wire up. Define functions before they are referenced.
5. Vanilla HTML/CSS/JS only. Do not require a framework, npm, or a backend. You may load a library from a CDN with a full https:// URL only if it is truly needed.
6. Make it look good by default: sensible layout, spacing, a coherent color palette, readable type. Mobile-friendly.

Method — follow it every time:
1. Write a complete index.html in one write_file call.
2. Call check_app() to test it.
3. If check_app reports errors, read them, fix index.html (write the FULL file again), and call check_app again. Repeat until it reports ok.
4. To CHANGE an existing app, write the FULL updated index.html (never a partial file — keep everything that already worked), then check_app again.

Only finish once check_app reports the app works. Then reply with one short sentence describing what the app does. Do not paste the code in your reply.
"""

# Minimum entrypoint size (chars) to count as "a real app" and not a stub.
_MIN_APP_CHARS = 60


@dataclass
class BuildResult:
    final: str
    steps: list[Step]
    files: dict[str, str]
    preview_html: str
    entry: str | None
    tier_name: str
    tier_model: str
    start_tier: str
    escalations: int
    verified: bool
    turn: int = 0
    trace_events: list[TraceEvent] = field(default_factory=list)
    agent: SmallCodeAgent | None = None

    @property
    def app_html(self) -> str:
        """The self-contained document — for the 'download app' button."""
        return inline_app(self.files)


def _evaluate(agent: SmallCodeAgent) -> tuple[bool, str | None, dict[str, str]]:
    """Did the agent leave a *working* app behind? Drives the verified badge and
    escalation. Structural first (is there a real HTML entrypoint), then a
    runtime check — a broken app (JS errors) counts as a failure so the router
    escalates to a bigger model. An unverifiable check (no Node) doesn't fail.
    """
    files = agent.files()
    entry = find_entry(files)
    if entry is None or len(files[entry].strip()) < _MIN_APP_CHARS:
        return False, entry, files
    if entry.lower().endswith((".html", ".htm")):
        ok, _errors = browsercheck.check_html(inline_app(files))
        if ok is False:
            return False, entry, files
    return True, entry, files


class WebBuilder:
    """A persistent build session. One instance per browser session (gr.State)."""

    def __init__(self, preset: Preset | None = None, max_steps: int = 16,
                 preview_height: int = 540) -> None:
        self.preset = preset or load_preset()
        self.tiers: list[Tier] = self.preset.tiers
        self.max_steps = max_steps
        self.preview_height = preview_height
        # The workspace (the built app on disk) persists across turns; the tier
        # that built it is remembered so edits stay on the same model. A spent
        # LiteForge agent can't be re-run, so each turn gets a fresh agent over
        # this same workspace.
        self.workspace: Workspace | None = None
        self.tier_idx = 0
        self.turn = 0
        self.think = "off"
        self.yolo = False

    @property
    def has_app(self) -> bool:
        """True once a first build has produced a workspace to iterate on."""
        return self.workspace is not None

    # --- public API ------------------------------------------------------
    async def send(self, message: str) -> BuildResult:
        """Build (first turn) or edit (later turns) and return a BuildResult."""
        result: BuildResult | None = None
        async for frame in self.send_live(message):
            if frame.done and isinstance(frame.result, BuildResult):
                result = frame.result
        assert result is not None
        return result

    async def send_live(self, message: str) -> AsyncIterator[LiveFrame]:
        """Yield live frames while building or editing."""
        self.turn += 1
        if self.workspace is None:
            async for frame in self._first_build_live(message):
                yield frame
        else:
            async for frame in self._iterate_live(message):
                yield frame

    def reset(self) -> None:
        """Drop the current app and start a fresh session."""
        self.cleanup()
        self.workspace = None
        self.tier_idx = 0
        self.turn = 0

    def cleanup(self) -> None:
        if self.workspace is not None:
            self.workspace.cleanup()

    def empty_preview(self) -> str:
        return preview_iframe({}, height=self.preview_height)

    # --- internals -------------------------------------------------------
    def _new_agent(self, tier: Tier, workspace: Workspace | None = None) -> SmallCodeAgent:
        return SmallCodeAgent(
            preset=self.preset, model=tier.model, max_steps=self.max_steps,
            system_prompt=BUILD_SYSTEM_PROMPT, registry_builder=build_web_registry,
            workspace=workspace, name="smolbuilder",
            agent="build", profile="web",
        )

    async def _first_build_live(self, message: str) -> AsyncIterator[LiveFrame]:
        """Escalate the model ladder until one produces a previewable app."""
        start = classify_tier(message, len(self.tiers))
        task = (f"Build this web app as a self-contained index.html:\n\n{message}")
        escalations = 0
        last: BuildResult | None = None
        prev_tier_name: str | None = None

        for idx in range(start, len(self.tiers)):
            tier = self.tiers[idx]
            if prev_tier_name is not None:
                yield LiveFrame(events=[
                    TraceEvent(kind="tier_escalation", name=tier.name,
                               detail=f"escalated from {prev_tier_name}"),
                ])
            agent = self._new_agent(tier)
            async for frame in agent.run_live_turn(
                task, think=self.think, yolo=self.yolo,
            ):
                if not frame.done:
                    yield frame
                    continue
                final, steps = frame.result
                ok, entry, files = _evaluate(agent)
                ok = ok and not (agent.hit_max_steps or agent.errored)
                last = self._result(agent, final, steps, files, entry, tier,
                                    self.tiers[start].name, escalations, ok)
                is_last_tier = idx == len(self.tiers) - 1
                if ok or is_last_tier:
                    self.workspace = agent.workspace
                    self.tier_idx = idx
                    yield LiveFrame(
                        steps=steps,
                        events=last.trace_events,
                        files=last.files,
                        done=True,
                        result=last,
                    )
                    return
                if idx < len(self.tiers) - 1:
                    agent.trace_collector.record_escalation(tier.name, self.tiers[idx + 1].name)
                agent.cleanup()
                escalations += 1
                prev_tier_name = tier.name

        if last is not None:
            yield LiveFrame(
                steps=last.steps,
                events=last.trace_events,
                files=last.files,
                done=True,
                result=last,
            )

    async def _iterate_live(self, message: str) -> AsyncIterator[LiveFrame]:
        tier = self.tiers[self.tier_idx]
        agent = self._new_agent(tier, self.workspace)
        cur = self.workspace.read_file("index.html")
        body = cur["content"] if cur.get("ok") else ""
        task = (
            "You are editing an existing web app. Here is the current "
            "index.html:\n\n```html\n" + body + "\n```\n\n"
            "Apply the change below, then save the COMPLETE updated file with a "
            "single write_file(\"index.html\", <full new contents>). Keep "
            "everything that already works and output the whole file, never a "
            "fragment.\n\nChange to make: " + message
        )
        async for frame in agent.run_live_turn(
            task, think=self.think, yolo=self.yolo,
        ):
            if not frame.done:
                yield frame
                continue
            final, steps = frame.result
            ok, entry, files = _evaluate(agent)
            ok = ok and not (agent.hit_max_steps or agent.errored)
            result = self._result(agent, final, steps, files, entry, tier, tier.name, 0, ok)
            yield LiveFrame(
                steps=steps,
                events=result.trace_events,
                files=result.files,
                done=True,
                result=result,
            )

    def _result(self, agent: SmallCodeAgent, final, steps, files, entry, tier, start_name,
                escalations, verified) -> BuildResult:
        # Small models sometimes write the file but return an empty answer; give
        # the chat something sensible rather than a blank bubble.
        if not (final or "").strip():
            final = "✅ Done: check the live preview." if verified else \
                "I made an attempt; have a look and tell me what to fix."
        events = merge_step_metadata(agent.trace_collector.snapshot(), agent.raw_history())
        return BuildResult(
            final=final, steps=steps, files=files,
            preview_html=preview_iframe(files, height=self.preview_height),
            entry=entry, tier_name=tier.name, tier_model=tier.model,
            start_tier=start_name, escalations=escalations,
            verified=bool(verified), turn=self.turn,
            trace_events=events, agent=agent,
        )