Spaces:
Paused
Paused
File size: 7,015 Bytes
daea45b | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 | """Live-preview rendering for smolbuilder.
Turns the agent's workspace (a `path -> content` dict of a small static web app)
into a single self-contained HTML document, then into a sandboxed iframe that
Gradio can drop straight into a `gr.HTML`. This is the "Replit/Lovable" preview:
what the tiny model just built, running live in the browser.
Deliberately dependency-free (stdlib only) so it can be unit-tested without
Gradio or the Rust engine, and so the rendering logic stays trivially auditable.
Design choices:
- We inline locally-referenced `<link rel=stylesheet>` and `<script src=...>`
from sibling files, so a model that splits style.css / script.js out of
index.html still previews correctly — but we never touch absolute/CDN URLs.
- The iframe is loaded via `srcdoc=` (not a `data:` URI). A `data:` URL has an
*opaque origin*, where `localStorage`/`sessionStorage` throw `SecurityError` —
so any app that persists state (a notepad, a to-do list) dies on load before it
can wire up its buttons. A `srcdoc` frame inherits the embedder's (Gradio's)
origin, so storage and scripts work the way the model expects.
- SECURITY TRADE-OFF: `sandbox="allow-scripts allow-same-origin ..."` is required
for storage to work, but that combination also lets the framed (model-written)
code reach the parent page. This is acceptable for a *local, single-user*
builder — the framed code is the same user's own request, on a page holding no
one else's secrets. Do NOT reuse this wrapper to embed untrusted third-party
apps on an origin that holds other users' data; the isolation-preserving fix is
to serve the preview from a separate origin (out of scope here).
- The same wrapper (`PREVIEW_SANDBOX`/`_escape_srcdoc`) is reused by the headless
verification check (engine/browsercheck.py) so the agent tests *exactly* what
the user sees.
"""
from __future__ import annotations
import html
import re
# Sandbox flags shared by the live preview and the verification check.
# allow-same-origin is required so srcdoc inherits the parent origin and web
# storage works; combined with allow-scripts it weakens isolation (see docstring).
PREVIEW_SANDBOX = "allow-scripts allow-same-origin allow-modals allow-popups allow-forms"
# Files we know how to treat as the app entrypoint, best first.
_ENTRY_CANDIDATES = ("index.html", "main.html", "app.html")
_LINK_RE = re.compile(
r"""<link\b[^>]*?\brel\s*=\s*['"]?stylesheet['"]?[^>]*?>""", re.I | re.S)
_SCRIPT_SRC_RE = re.compile(
r"""<script\b[^>]*?\bsrc\s*=\s*['"]([^'"]+)['"][^>]*?>\s*</script>""", re.I | re.S)
_HREF_RE = re.compile(r"""\bhref\s*=\s*['"]([^'"]+)['"]""", re.I)
def find_entry(files: dict[str, str]) -> str | None:
"""Pick the HTML entrypoint to preview, or None if there's nothing webby."""
lower = {p.lower(): p for p in files}
for cand in _ENTRY_CANDIDATES:
if cand in lower:
return lower[cand]
# Fall back to any .html file (shallowest path wins for determinism).
htmls = sorted((p for p in files if p.lower().endswith(".html")),
key=lambda p: (p.count("/"), p))
return htmls[0] if htmls else None
def _is_local(url: str) -> bool:
"""True for a same-app relative reference we can inline (not a CDN/data URI)."""
u = url.strip()
if not u:
return False
return not re.match(r"^(?:[a-z]+:)?//|^https?:|^data:|^mailto:|^#", u, re.I)
def _lookup(files: dict[str, str], ref: str) -> str | None:
"""Resolve a relative href/src against the workspace file map."""
ref = ref.split("?", 1)[0].split("#", 1)[0].lstrip("./").lstrip("/")
if ref in files:
return files[ref]
# Case-insensitive / basename fallback so '/style.css' finds 'style.css'.
base = ref.rsplit("/", 1)[-1].lower()
for path, content in files.items():
if path.lower() == ref.lower() or path.rsplit("/", 1)[-1].lower() == base:
return content
return None
def inline_app(files: dict[str, str]) -> str:
"""Return one self-contained HTML document for the app in `files`.
If there's no HTML entrypoint, render a friendly placeholder (e.g. the model
has only written notes or a not-yet-web file).
"""
entry = find_entry(files)
if entry is None:
return _placeholder(files)
doc = files[entry]
def _inline_css(match: re.Match) -> str:
tag = match.group(0)
href_m = _HREF_RE.search(tag)
if not href_m or not _is_local(href_m.group(1)):
return tag
css = _lookup(files, href_m.group(1))
if css is None:
return tag
return f"<style>\n{css}\n</style>"
def _inline_js(match: re.Match) -> str:
src = match.group(1)
if not _is_local(src):
return match.group(0)
js = _lookup(files, src)
if js is None:
return match.group(0)
# Guard against the inlined body prematurely closing the script element.
safe = js.replace("</script>", "<\\/script>")
return f"<script>\n{safe}\n</script>"
doc = _LINK_RE.sub(_inline_css, doc)
doc = _SCRIPT_SRC_RE.sub(_inline_js, doc)
return doc
def _escape_srcdoc(doc: str) -> str:
"""Escape an HTML document for a double-quoted `srcdoc="..."` attribute.
Only `&` and `"` are significant inside a double-quoted attribute value, and
`&` must go first (so the `&` we introduce for `"` isn't re-escaped). `<`,
`>` and even a literal `</script>` are FINE here — the parser is in
attribute-value state, not script-data state — so we must NOT touch them
(html.escape would corrupt the rendered document).
"""
return doc.replace("&", "&").replace('"', """)
def preview_iframe(files: dict[str, str], *, height: int = 540) -> str:
"""Render the app as a sandboxed `srcdoc` iframe ready for `gr.HTML`."""
srcdoc = _escape_srcdoc(inline_app(files))
return (
f'<iframe title="smolbuilder preview" '
f'style="width:100%;height:{height}px;border:0;border-radius:12px;'
f'background:#fff;box-shadow:0 1px 0 rgba(0,0,0,.06)" '
f'sandbox="{PREVIEW_SANDBOX}" '
f'srcdoc="{srcdoc}"></iframe>'
)
def _placeholder(files: dict[str, str]) -> str:
listing = "".join(
f"<li><code>{html.escape(p)}</code></li>" for p in sorted(files)
) or "<li><em>workspace is empty</em></li>"
return (
"<!doctype html><html><head><meta charset='utf-8'>"
"<style>body{font:15px/1.5 system-ui,sans-serif;color:#475569;"
"background:#f8fafc;padding:2rem}h2{color:#7c3aed;margin:.2rem 0 1rem}"
"code{background:#ede9fe;color:#5b21b6;padding:1px 6px;border-radius:6px}"
"</style></head><body>"
"<h2>No preview yet</h2>"
"<p>smolbuilder previews the app's <code>index.html</code>. "
"Describe a web app on the left and it'll appear here, live.</p>"
f"<p>Files in the workspace:</p><ul>{listing}</ul>"
"</body></html>"
)
|