smolcode / engine /browsercheck.py
seanpoyner's picture
Upload folder using huggingface_hub
daea45b verified
Raw
History Blame Contribute Delete
4.23 kB
"""Real-browser verification of model-built web apps, with a jsdom fallback.
The web equivalent of `run_python`, but faithful: it drives a REAL headless
browser (Firefox via Selenium, in engine/browser_runner.py as a subprocess) and
loads the app in the exact `srcdoc`/`sandbox` wrapper the live preview uses — so
the agent's verdict matches what the user actually sees. jsdom
(engine/webcheck.py) can't: it has a working localStorage and never applies the
sandbox, so it falsely passes apps that break in a browser (e.g. a notepad on a
`data:` opaque origin).
Same contract as webcheck.check_html — (True, []) / (False, [...]) / (None, [...]).
Fallback chain: real browser -> jsdom -> unverifiable. A browser that's missing,
slow, or crashes returns None internally and falls back rather than failing the
build (a flaky checker must never cause spurious model escalation).
The browser must be installed wherever this runs (rootless conda-forge Firefox —
see DEVELOPING.md); on a minimal image (e.g. the HF Space) it isn't, and we use
jsdom.
"""
from __future__ import annotations
import functools
import json
import os
import subprocess
import sys
import tempfile
from pathlib import Path
from . import webcheck
# Real-browser runners, tried in order. Playwright/Chromium first (the reachable
# rootless browser in this devcontainer), then conda-forge Firefox/Selenium.
# Whichever launches first is cached for the life of the process. Both speak the
# same JSON contract, so the choice is invisible to callers.
_RUNNERS = [
Path(__file__).with_name("playwright_runner.py"),
Path(__file__).with_name("browser_runner.py"),
]
_BROWSER_PREFIX = Path(os.environ.get(
"SMOLBUILDER_BROWSER_PREFIX",
str(Path(__file__).resolve().parent.parent / ".browser")))
def _child_env() -> dict:
"""Env for the runner subprocess: Firefox's conda libs on LD_LIBRARY_PATH."""
env = dict(os.environ)
libdir = str(_BROWSER_PREFIX / "lib")
prev = env.get("LD_LIBRARY_PATH", "")
env["LD_LIBRARY_PATH"] = f"{libdir}:{prev}" if prev else libdir
env["SMOLBUILDER_BROWSER_PREFIX"] = str(_BROWSER_PREFIX)
return env
@functools.lru_cache(maxsize=1)
def _active_runner() -> Path | None:
"""First runner whose browser actually launches (probed once; cached, since a
launch is slow and availability is fixed for the life of the process)."""
probe = "<!doctype html><html><body><button>probe</button></body></html>"
for runner in _RUNNERS:
if not runner.exists():
continue
ok, _ = _invoke(probe, 45, runner)
if ok is not None:
return runner
return None
def available() -> bool:
"""True if any real-browser check actually runs."""
return _active_runner() is not None
def check_html(html: str, timeout: int = 35) -> tuple[bool | None, list[str]]:
"""Real-browser check with graceful fallback to jsdom, then unverifiable."""
runner = _active_runner()
if runner is not None:
ok, errors = _invoke(html, timeout, runner)
if ok is not None:
return ok, errors
if webcheck.available():
return webcheck.check_html(html, timeout=min(timeout, 20))
return None, ["no runtime checker available (browser + jsdom both missing)"]
def _invoke(html: str, timeout: int, runner: Path) -> tuple[bool | None, list[str]]:
"""Run a browser runner once. Returns (ok|None, errors); None = couldn't run."""
with tempfile.NamedTemporaryFile("w", suffix=".html", delete=False) as f:
f.write(html)
path = f.name
try:
proc = subprocess.run(
[sys.executable, str(runner), path],
capture_output=True, text=True, timeout=timeout, env=_child_env())
except subprocess.TimeoutExpired:
return None, []
finally:
Path(path).unlink(missing_ok=True)
if proc.returncode == 3:
return None, []
lines = (proc.stdout or "").strip().splitlines()
if not lines:
return None, []
try:
data = json.loads(lines[-1])
except json.JSONDecodeError:
return None, []
if data.get("ok") is None:
return None, []
return bool(data.get("ok")), list(data.get("errors", []))