Spaces:
Running
Running
| // Puck's eyes. Capture what's on screen, hand it to the vision brain (Modal), | |
| // which returns wire events that the daemon queues β so the existing poll picks | |
| // them up and Puck reacts, no special handling here. Vision is just another | |
| // event source that happens to come from pixels instead of a hook. | |
| // | |
| // Sim/Space: snapshot the rendered desktop scene (html-to-image). The overlay's | |
| // real-screen path (ScreenCaptureKit via Rust) lands later; same /api/brain/see. | |
| import { toPng } from "html-to-image"; | |
| import type { FairyState } from "../engine"; | |
| import { moodFor } from "../engine"; | |
| import { inTauri, lookAroundNative, type PeekResult, peekNative, type Region } from "./tauri"; | |
| // Puck's own surfaces β he observes the desktop, not himself. Reading his own | |
| // feed back through vision would be a hall-of-mirrors (and trains on noise). | |
| const PUCK_UI = [ | |
| "puck", // the sprite | |
| "comp", // companion (shows his own feed β the worst offender) | |
| "bubble", | |
| "toasts", | |
| "drop", // menu dropdown | |
| "settings", | |
| "interrupt-wrap", | |
| "bloom", | |
| ]; | |
| /** Sim: render the fake desktop and crop to the small region Puck is peering at | |
| * (Puck's own UI filtered out). The overlay crops in Rust instead. */ | |
| async function snapshotRegion(region: Region): Promise<string | null> { | |
| const root = document.getElementById("root"); | |
| if (!root) return null; | |
| try { | |
| const full = await toPng(root, { | |
| pixelRatio: 1, | |
| filter: (node) => !(node instanceof HTMLElement) || !PUCK_UI.some((c) => node.classList?.contains?.(c)), | |
| }); | |
| const im = new Image(); | |
| im.src = full; | |
| await im.decode(); | |
| const c = document.createElement("canvas"); | |
| c.width = region.w; | |
| c.height = region.h; | |
| const ctx = c.getContext("2d"); | |
| if (!ctx) return null; | |
| ctx.drawImage(im, region.x, region.y, region.w, region.h, 0, 0, region.w, region.h); | |
| return c.toDataURL("image/jpeg", 0.85); | |
| } catch (e) { | |
| console.error("puck: region snapshot failed", e); | |
| return null; | |
| } | |
| } | |
| /** Nudge the vision backend awake on load (fire-and-forget). On a hosted Space the | |
| * cloud 12B scales to zero; pinging now means it's warming before the first peek | |
| * (~50-95s out). No-op cost locally (a warm /models call). */ | |
| export function warmVision(): void { | |
| void fetch("/api/brain/warm", { method: "POST" }).catch(() => {}); | |
| } | |
| /** The companion loop's eye: peek at the patch under Puck β `{quip, emotion}` (or null). | |
| * Overlay: Rust captures the region + the daemon voices it. Sim: crop the fake | |
| * desktop and POST it. No events, no queue β a line for a bubble + a felt reaction. */ | |
| export async function peekScene(fs: FairyState, region: Region): Promise<PeekResult | null> { | |
| const fairyState = { mischief: fs.mischief, mood: moodFor(fs) }; | |
| if (inTauri()) return peekNative(region, fairyState); | |
| const image = await snapshotRegion(region); | |
| if (!image) return null; | |
| try { | |
| const res = await fetch("/api/brain/peek", { | |
| method: "POST", | |
| headers: { "Content-Type": "application/json" }, | |
| body: JSON.stringify({ image, fairy_state: fairyState }), | |
| signal: AbortSignal.timeout(70000), | |
| }); | |
| if (!res.ok) return null; | |
| const r = (await res.json()) as { quip?: string; emotion?: string }; | |
| return r.quip ? { quip: r.quip, emotion: r.emotion ?? "curious" } : null; | |
| } catch { | |
| return null; | |
| } | |
| } | |
| /** Snapshot the SIMULATED desktop (sim/Space) to a PNG data URL, with Puck's own | |
| * UI filtered out. The overlay's real-screen path lives in `perceive` (it's | |
| * captured + posted entirely in Rust). */ | |
| async function snapshotScene(): Promise<string | null> { | |
| const root = document.getElementById("root"); | |
| if (!root) return null; | |
| return toPng(root, { | |
| pixelRatio: 1, // the VLM needs legible content, not retina detail β keep it small | |
| filter: (node) => !(node instanceof HTMLElement) || !PUCK_UI.some((c) => node.classList?.contains?.(c)), | |
| }).catch((e) => { | |
| console.error("puck: scene snapshot failed", e); | |
| return null; | |
| }); | |
| } | |
| /** Look at the screen; perceived events are queued daemon-side (the existing poll | |
| * delivers them). Returns the count it queued for UI feedback, or **null when vision | |
| * is unavailable** (no endpoint / offline / timeout) β caller treats null distinctly | |
| * ("my eyes are shut") from 0 ("nothing worth a fuss"). Null is graceful by design; | |
| * vision is optional and the sim runs without it. */ | |
| export async function perceive(fs: FairyState): Promise<number | null> { | |
| const fairyState = { mischief: fs.mischief, mood: moodFor(fs) }; | |
| // Overlay: Rust captures the real screen AND posts it (background) β the image | |
| // never enters the webview. Blank Puck for ~2 frames so he isn't in the shot; | |
| // lookAroundNative returns right after the capture (NOT after inference), so the | |
| // blank is brief. Perceived events arrive via the poll. 0 = dispatched, null = failed. | |
| if (inTauri()) { | |
| const html = document.documentElement; | |
| html.classList.add("capturing"); | |
| await new Promise((r) => requestAnimationFrame(() => requestAnimationFrame(r))); | |
| try { | |
| return (await lookAroundNative(fairyState)) ? 0 : null; | |
| } finally { | |
| html.classList.remove("capturing"); | |
| } | |
| } | |
| const image = await snapshotScene(); | |
| if (!image) return null; | |
| try { | |
| const res = await fetch("/api/brain/see", { | |
| method: "POST", | |
| headers: { "Content-Type": "application/json" }, | |
| body: JSON.stringify({ image, fairy_state: fairyState }), | |
| signal: AbortSignal.timeout(70000), // cloud VLM + possible cold start | |
| }); | |
| if (!res.ok) return null; // 503 = no vision endpoint configured; silent like the brain seam | |
| // daemon returns { observed, queued, events }; we only surface the queued count | |
| const data: unknown = await res.json(); | |
| return (data as { queued?: number }).queued ?? 0; | |
| } catch { | |
| return null; // offline / timeout β vision is optional, the sim runs without it | |
| } | |
| } | |