"""Capture real Objectverse Diary UI assets for the HyperFrames demo video. This script starts the local Gradio app with explicit mock runtimes, drives a system Chrome instance through the Chrome DevTools Protocol, and saves screenshots into ``video/objectverse-diary-demo/assets``. """ from __future__ import annotations import argparse import json import os import shutil import signal import socket import subprocess import textwrap import time import urllib.request from pathlib import Path PROJECT_ROOT = Path(__file__).resolve().parents[1] DEFAULT_ASSET_DIR = PROJECT_ROOT / "video" / "objectverse-diary-demo" / "assets" DEFAULT_TRACE_DIR = PROJECT_ROOT / ".tmp" / "demo-video-traces" DEFAULT_CHROME_PATHS = [ Path("/Applications/Google Chrome.app/Contents/MacOS/Google Chrome"), Path("/Applications/Chromium.app/Contents/MacOS/Chromium"), ] NODE_CAPTURE_SCRIPT = r""" const fs = require("fs"); const path = require("path"); const http = require("http"); const { spawn } = require("child_process"); const options = JSON.parse(process.argv[1] || process.argv[2]); const chromeArgs = [ "--headless=new", "--disable-gpu", "--hide-scrollbars", "--mute-audio", "--no-first-run", "--no-default-browser-check", "--disable-background-networking", "--disable-sync", "--disable-extensions", "--disable-features=Translate", "--remote-debugging-port=0", `--user-data-dir=${options.profileDir}`, "about:blank", ]; function sleep(ms) { return new Promise((resolve) => setTimeout(resolve, ms)); } function requestJson(url) { return new Promise((resolve, reject) => { http.get(url, (res) => { let body = ""; res.setEncoding("utf8"); res.on("data", (chunk) => (body += chunk)); res.on("end", () => { try { resolve(JSON.parse(body)); } catch (error) { reject(error); } }); }).on("error", reject); }); } async function openWebSocket(url) { if (typeof WebSocket === "undefined") { throw new Error("Node WebSocket global is unavailable. Node 22+ is required."); } return new Promise((resolve, reject) => { const socket = new WebSocket(url); socket.addEventListener("open", () => resolve(socket), { once: true }); socket.addEventListener("error", (event) => reject(event.error || new Error("WebSocket error")), { once: true }); }); } function makeClient(socket) { let id = 0; const pending = new Map(); socket.addEventListener("message", (event) => { const message = JSON.parse(event.data); if (message.id && pending.has(message.id)) { const { resolve, reject } = pending.get(message.id); pending.delete(message.id); if (message.error) reject(new Error(`${message.error.message || "CDP error"} ${message.error.data || ""}`)); else resolve(message.result || {}); } }); return { send(method, params = {}) { const commandId = ++id; socket.send(JSON.stringify({ id: commandId, method, params })); return new Promise((resolve, reject) => { pending.set(commandId, { resolve, reject }); setTimeout(() => { if (pending.has(commandId)) { pending.delete(commandId); reject(new Error(`Timed out waiting for ${method}`)); } }, 15000); }); }, close() { socket.close(); }, }; } async function waitForExpression(client, expression, timeoutMs = 20000) { const start = Date.now(); while (Date.now() - start < timeoutMs) { const result = await client.send("Runtime.evaluate", { expression, returnByValue: true, awaitPromise: true, }); if (result.result && result.result.value) return true; await sleep(250); } let debugValue = {}; try { const debug = await client.send("Runtime.evaluate", { expression: "(() => ({ href: location.href, readyState: document.readyState, text: document.body ? document.body.innerText.slice(0, 500) : '', html: document.documentElement ? document.documentElement.outerHTML.slice(0, 500) : '' }))()", returnByValue: true, }); debugValue = debug.result ? debug.result.value : {}; } catch (error) { debugValue = { debugError: String(error) }; } throw new Error(`Timed out waiting for expression: ${expression}\nCurrent page: ${JSON.stringify(debugValue, null, 2)}`); } async function evaluate(client, expression) { return client.send("Runtime.evaluate", { expression, returnByValue: true, awaitPromise: true, }); } function withGradioRoot(source) { return ` (() => { const app = document.querySelector('gradio-app'); const root = app && app.shadowRoot ? app.shadowRoot : document; const pageText = document.body ? document.body.innerText : ''; ${source} })() `; } async function screenshot(client, name) { await sleep(650); const result = await client.send("Page.captureScreenshot", { format: "png", captureBeyondViewport: false, fromSurface: true, }); const output = path.join(options.assetDir, `${name}.png`); fs.writeFileSync(output, Buffer.from(result.data, "base64")); console.log(output); } async function main() { fs.mkdirSync(options.assetDir, { recursive: true }); fs.mkdirSync(options.profileDir, { recursive: true }); const chrome = spawn(options.chromePath, chromeArgs, { stdio: ["ignore", "ignore", "pipe"] }); let stderr = ""; chrome.stderr.on("data", (chunk) => { stderr += chunk.toString(); }); let debuggerUrl = ""; const start = Date.now(); while (Date.now() - start < 12000) { const match = stderr.match(/DevTools listening on (ws:\/\/[^\s]+)/); if (match) { debuggerUrl = match[1]; break; } await sleep(100); } if (!debuggerUrl) { chrome.kill("SIGTERM"); throw new Error("Could not find Chrome DevTools WebSocket URL."); } const debugPort = new URL(debuggerUrl).port; const browserSocket = await openWebSocket(debuggerUrl); const browserClient = makeClient(browserSocket); const createdTarget = await browserClient.send("Target.createTarget", { url: options.appUrl, }); browserClient.close(); await sleep(500); const targets = await requestJson(`http://127.0.0.1:${debugPort}/json/list`); const pageTarget = targets.find((target) => target.id === createdTarget.targetId) || targets.find( (target) => target.type === "page" && target.webSocketDebuggerUrl && String(target.url || "").startsWith(options.appUrl) ); if (!pageTarget) { chrome.kill("SIGTERM"); throw new Error("Could not find a Chrome page target."); } const socket = await openWebSocket(pageTarget.webSocketDebuggerUrl); const client = makeClient(socket); try { await client.send("Page.enable"); await client.send("Runtime.enable"); await client.send("Emulation.setDeviceMetricsOverride", { width: 1920, height: 1080, deviceScaleFactor: 1, mobile: false, }); await client.send("Page.navigate", { url: options.appUrl }); await waitForExpression(client, withGradioRoot("return Boolean(root.querySelector('#objectverse-app') || pageText.includes('Objectverse Diary'));"), 60000); await sleep(3000); await screenshot(client, "01-hero"); await evaluate(client, withGradioRoot(` const button = [...root.querySelectorAll('button')].find((el) => el.textContent.includes('OVD-001')); if (!button) throw new Error('Coffee mug example button not found'); button.click(); return true; `)); await waitForExpression(client, withGradioRoot("return pageText.includes('CoffeeMug worth is awake');"), 35000); await sleep(1300); await screenshot(client, "02-intake-awake"); const captureScroll = async (name, selector, offset = -120) => { await evaluate(client, withGradioRoot(` const el = document.querySelector(${JSON.stringify(selector)}); const shadowEl = root.querySelector(${JSON.stringify(selector)}); const target = shadowEl || el; if (!target) throw new Error('Missing selector ${selector}'); target.scrollIntoView({ block: 'start', inline: 'nearest' }); window.scrollBy(0, ${offset}); return true; `)); await screenshot(client, name); }; await captureScroll("03-object-file", "#results", -80); await captureScroll("04-diary", "#diary-output", -190); await captureScroll("05-share-card", "#share-chat", -80); await evaluate(client, withGradioRoot(` const input = root.querySelector('#chat-panel textarea'); if (!input) throw new Error('Chat textarea not found'); input.value = "What have you been hiding on my desk?"; input.dispatchEvent(new Event('input', { bubbles: true })); const button = [...root.querySelectorAll('#chat-panel button')].find((el) => el.textContent.trim() === 'Ask'); if (!button) throw new Error('Ask button not found'); button.click(); return true; `)); await waitForExpression(client, withGradioRoot("return pageText.includes('unlimited office hours');"), 20000); await evaluate(client, withGradioRoot(` const input = root.querySelector('#chat-panel textarea'); if (input) { input.value = ""; input.dispatchEvent(new Event('input', { bubbles: true })); input.blur(); } return true; `)); await captureScroll("06-chat", "#chat-panel", -140); await evaluate(client, withGradioRoot(` const label = [...root.querySelectorAll('button, summary, [role="button"], .label-wrap, .accordion')].find((el) => el.textContent.includes('Developer details')); const trigger = label ? (label.closest('button, summary, [role="button"]') || label) : null; if (!trigger) throw new Error('Developer details trigger not found'); const expanded = trigger.getAttribute('aria-expanded'); if (expanded !== 'true') trigger.click(); window.scrollTo({ top: document.body.scrollHeight, behavior: 'instant' }); return true; `)); await waitForExpression(client, withGradioRoot("return pageText.includes('Trace saved') || pageText.includes('sample-01');"), 10000); await sleep(1200); await screenshot(client, "07-trace"); } finally { client.close(); chrome.kill("SIGTERM"); } } main().catch((error) => { console.error(error.stack || String(error)); process.exit(1); }); """ def parse_args() -> argparse.Namespace: parser = argparse.ArgumentParser(description=__doc__) parser.add_argument("--asset-dir", type=Path, default=DEFAULT_ASSET_DIR) parser.add_argument("--trace-dir", type=Path, default=DEFAULT_TRACE_DIR) parser.add_argument("--port", type=int, default=7860) parser.add_argument("--chrome-path", type=Path, default=None) parser.add_argument("--python", type=Path, default=PROJECT_ROOT / ".venv" / "bin" / "python") parser.add_argument( "--use-uv", action="store_true", help="Run the app in a temporary uv environment instead of the project virtualenv.", ) return parser.parse_args() def find_free_port(preferred: int) -> int: for port in range(preferred, preferred + 50): with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as probe: probe.settimeout(0.2) if probe.connect_ex(("127.0.0.1", port)) != 0: return port raise RuntimeError("Could not find a free local port for Gradio.") def wait_for_http(url: str, timeout: float = 45.0) -> None: deadline = time.time() + timeout last_error: Exception | None = None while time.time() < deadline: try: with urllib.request.urlopen(url, timeout=3) as response: if response.status < 500: return except Exception as exc: # noqa: BLE001 - retain last startup error for diagnostics. last_error = exc time.sleep(0.5) raise RuntimeError(f"Timed out waiting for {url}: {last_error}") def chrome_path(explicit_path: Path | None) -> Path: candidates = [explicit_path] if explicit_path else [] candidates.extend(DEFAULT_CHROME_PATHS) for candidate in candidates: if candidate and candidate.exists(): return candidate raise RuntimeError("Could not find Chrome. Pass --chrome-path with a Chrome or Chromium executable.") def start_gradio( python_path: Path, port: int, trace_dir: Path, *, use_uv: bool = False, ) -> subprocess.Popen[str]: env = os.environ.copy() env.update( { "OBJECTVERSE_VISION_BACKEND": "mock", "OBJECTVERSE_TEXT_BACKEND": "mock", "TRACE_OUTPUT_DIR": str(trace_dir), "GRADIO_SERVER_NAME": "127.0.0.1", "GRADIO_SERVER_PORT": str(port), } ) return subprocess.Popen( _app_command(python_path, use_uv), cwd=PROJECT_ROOT, env=env, text=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, ) def _app_command(python_path: Path, use_uv: bool) -> list[str]: if not use_uv: return [str(python_path), "app.py"] return [ "uv", "run", "--no-project", "--with", "gradio>=4.44,<6", "--with", "pydantic>=2.7,<3", "--with", "spaces>=0.30", "--python", str(python_path), "python", "app.py", ] def run_node_capture(app_url: str, asset_dir: Path, chrome: Path) -> None: profile_dir = PROJECT_ROOT / ".tmp" / "demo-video-chrome-profile" if profile_dir.exists(): shutil.rmtree(profile_dir) options = { "appUrl": app_url, "assetDir": str(asset_dir), "profileDir": str(profile_dir), "chromePath": str(chrome), } node = subprocess.run( ["node", "-e", NODE_CAPTURE_SCRIPT, json.dumps(options)], cwd=PROJECT_ROOT, text=True, check=False, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, ) print(node.stdout, end="") if node.returncode != 0: raise RuntimeError(f"Node UI capture failed with exit code {node.returncode}.") def main() -> int: args = parse_args() asset_dir = args.asset_dir.resolve() trace_dir = args.trace_dir.resolve() python_path = args.python.resolve() if not args.use_uv and not python_path.exists(): raise RuntimeError(f"Python interpreter not found: {python_path}") if args.use_uv and shutil.which("uv") is None: raise RuntimeError("uv is required when --use-uv is set.") chrome = chrome_path(args.chrome_path) port = find_free_port(args.port) app_url = f"http://127.0.0.1:{port}" asset_dir.mkdir(parents=True, exist_ok=True) trace_dir.mkdir(parents=True, exist_ok=True) print( textwrap.dedent( f""" Capturing Objectverse Diary demo assets app: {app_url} assets: {asset_dir} traces: {trace_dir} chrome: {chrome} """ ).strip() ) server = start_gradio(python_path, port, trace_dir, use_uv=args.use_uv) try: wait_for_http(app_url) run_node_capture(app_url, asset_dir, chrome) finally: if server.poll() is None: server.send_signal(signal.SIGINT) try: server.wait(timeout=8) except subprocess.TimeoutExpired: server.terminate() server.wait(timeout=8) if server.stdout: output = server.stdout.read() if output: print(output) expected = [ "01-hero.png", "02-intake-awake.png", "03-object-file.png", "04-diary.png", "05-share-card.png", "06-chat.png", "07-trace.png", ] missing = [name for name in expected if not (asset_dir / name).exists()] if missing: raise RuntimeError(f"Missing captured assets: {', '.join(missing)}") print("Captured assets:") for name in expected: print(f"- {asset_dir / name}") return 0 if __name__ == "__main__": raise SystemExit(main())