// frontend/js/controller.js — Lightloom · the CORRIDOR controller (LL.controller) // // The world IS the interface. Voice (or typed text) -> LL.api.streamWorld -> each // vivid scene streams into LL.corridor as it is painted, and the camera flies // forward through the world growing ahead of you. A discreet HUD lights up the // tiny model working right now (Voice / Director / Painter / Depth). // // Vanilla ES module. Reuses LL.api, LL.recorder, LL.corridor. import "./api.js"; import "./recorder.js"; import "./stage-scroll.js"; // (subtitles.js is intentionally not used: captions render in #transcript, which // carries aria-live="polite" for screen readers.) const LL = (window.LL = window.LL || {}); const COPY = { warming: { en: "Warming the projector — the first scene takes a few seconds…", es: "Encendiendo el proyector — la primera escena tarda unos segundos…" }, listening: { en: "Listening… speak your story, tap the mic to finish.", es: "Te escucho… cuenta tu historia, toca el micro para terminar." }, speaking: { en: "Listening — your world forms as you speak…", es: "Te escucho — tu mundo se forma mientras hablas…" }, transcribing: { en: "Hearing your words…", es: "Escuchando tus palabras…" }, micDenied: { en: "No mic — type your story instead.", es: "Sin micrófono — escribe tu historia." }, noSpeech: { en: "I didn't catch that. Try again, or type it.", es: "No te escuché. Inténtalo de nuevo, o escríbelo." }, empty: { en: "Say or type a story first.", es: "Di o escribe una historia primero." }, quota: { en: "The world ran out of GPU for today — watch the showcase.", es: "Se acabó la GPU de hoy — mira el showcase." }, building: { en: "Painting your world…", es: "Pintando tu mundo…" }, // transient feedback so a dropped phrase is never indistinguishable from a working one // (was: silent/asr_error/filtered fell through to a no-op and the sticky "building" line lied forever) notHeard: { en: "I didn't catch that — say it again?", es: "No te entendí — ¿otra vez?" }, unclear: { en: "Couldn't hear that clearly — try again.", es: "No se escuchó claro — inténtalo de nuevo." }, filtered: { en: "Let's keep it friendly — try rephrasing.", es: "Mantengámoslo amable — reformula." }, sceneSlip: { en: "That scene slipped — keep going.", es: "Esa escena se escapó — sigue hablando." }, // cycled during the cold-start wait so the ~35s never reads as a frozen hang (see _warmCycle) warmSteps: [ { en: "Warming the projector…", es: "Encendiendo el proyector…" }, { en: "Waking the voice…", es: "Despertando la voz…" }, { en: "The director is reading your words…", es: "El director lee tus palabras…" }, { en: "The painter is mixing light…", es: "El pintor mezcla la luz…" }, ], }; const t = (p, l) => (p && p[l]) || (p && p.en) || ""; const controller = { lang: "en", reducedMotion: false, running: false, _scenes: new Map(), // index -> { image, depth, caption } _count: 0, _statusTimer: null, init() { this._restore(); const canvas = document.getElementById("world-canvas"); if (canvas && LL.scroll) { try { LL.scroll.init(canvas); } catch (e) { console.error("scroll init", e); } // the caption tracks whichever section is under the centre of the scroll — BUT it must not // clobber a just-spoken transcript for a brief pin window, or your words flash and instantly // vanish under the OLDER on-screen section's caption (the "text feels slow/late" complaint). LL.scroll.onFocus = (meta) => { if (Date.now() < (this._tPin || 0)) return; this._transcript(meta && meta.caption); }; } this._initRecorder(); this._wire(); this._applyLang(this.lang); this._startAmbient(); // a pre-rendered scroll flows behind the intro + during warm-up this._loadLedger(); // keep the About param count honest + current from /health this._prewarm(); // load the GPU models DURING the intro so the first phrase is ~instant return this; }, /** Pre-warm the GPU worker (ASR + Director + painter + depth) while the user is still * reading the intro, so their first spoken phrase paints in ~3 s instead of ~40 s. The * showcase covers the load visually. Fire-and-forget; one warm per page. */ _prewarm() { if (this._prewarmed) return; this._prewarmed = true; if (!this._liveSession) this._liveSession = randomHex(); setTimeout(() => { try { LL.api.streamScrollLive(this._liveSession, "__warm__", this.lang, () => {}); } catch (_) {} }, 1200); }, /** Pull the runtime parameter total straight from the ledger (/health) so the About * panel always shows the TRUE count — never a hand-typed number that can drift. */ async _loadLedger() { try { const r = await fetch("/health", { cache: "no-store" }); if (!r.ok) return; const h = await r.json(); const b = (h.params_total || 0) / 1e9; const el = document.getElementById("about-ledger"); if (b > 0 && el) el.textContent = `${b.toFixed(2)}B / 32B parameters · all local · Off the Grid`; } catch (_) {} }, /** Keep a pre-rendered scroll flowing as a living BACKDROP that GUARANTEES the screen * is never black — a gap-filler, not a fixed loop. Every tick it checks how much * painted world is still ahead of the camera (LL.scroll.pendingAhead); only when that * buffer runs low does it extend the strip with a showcase section. So it fills the * cold-start (behind the intro + the ~30s warm-up), and tops up mid-session if the * live generation rate ever dips below the scroll — yet stays SILENT while the user's * real sections flow ahead, so they take over seamlessly with no wipe and no void. * GPU-free (bundled assets). */ // Pick a RANDOM pre-rendered cover variant (a different art style each load) from variants.json, // falling back to the single bundled manifest — so the world looks different from the very first // frame, not only once the live strips arrive. async _pickAmbientManifest() { const load = async (url) => { try { const r = await fetch(url, { cache: "no-store" }); if (r.ok) return (await r.json()).filter((m) => m && m.image); } catch (_) {} return null; }; try { const r = await fetch("/frontend/assets/scroll/variants.json", { cache: "no-store" }); if (r.ok) { const vs = await r.json(); if (Array.isArray(vs) && vs.length) { const v = vs[Math.floor(Math.random() * vs.length)]; const m = await load(`/frontend/assets/scroll/${v}/manifest.json`); if (m && m.length) return m; } } } catch (_) {} return (await load("/frontend/assets/scroll/manifest.json")) || []; }, async _startAmbient() { if (this._ambientOn) return; if (!this._ambientManifest) this._ambientManifest = await this._pickAmbientManifest(); const manifest = this._ambientManifest; if (!manifest.length || !LL.scroll) return; this._ambientOn = true; this._ambientStop = false; let i = 0; const FILL_AHEAD_PX = 1200; // keep at least ~one screen of painted world ahead of the view const tick = () => { // Stop once the user's real world begins: from then on the live sections own the // strip (showcase frames must NOT splice into the narrated world), and the scroll // clamps at the painted frontier so a generation lull pauses — never a black void. if (this._ambientStop || this._realStarted) { this._ambientOn = false; this._ambientTimer = null; return; } let ahead = Infinity; try { ahead = LL.scroll.pendingAhead; } catch (_) {} if (ahead < FILL_AHEAD_PX) { const m = manifest[i++ % manifest.length]; LL.scroll.addSection({ imageUrl: absUrl(m.image), depthUrl: m.depth ? absUrl(m.depth) : null, meta: { caption: "" } }); } this._ambientTimer = setTimeout(tick, 700); }; tick(); }, _wire() { on("mic-btn", "click", () => this._mic()); on("mic-mini", "click", () => this._mic()); on("type-toggle", "click", () => this._toggleComposer()); on("begin-btn", "click", () => { const ta = document.getElementById("story-text"); this.startWorld(ta ? ta.value : ""); }); on("showcase-btn", "click", () => this.playShowcase()); on("stop-btn", "click", () => this.reset()); on("save-btn", "click", () => this._saveWorld()); on("rm-btn", "click", () => this.setReducedMotion(!this.reducedMotion)); on("about-btn", "click", () => this._about(true)); on("about-close", "click", () => this._about(false)); const mq = window.matchMedia && window.matchMedia("(prefers-reduced-motion: reduce)"); if (mq && mq.matches) this.setReducedMotion(true, true); }, _initRecorder() { if (!LL.recorder || typeof LL.recorder.init !== "function") return; LL.recorder.init({ getLang: () => this.lang, onState: (s) => { this._orch("asr", s === "recording"); const mic = document.getElementById("mic-btn"); const mini = document.getElementById("mic-mini"); if (mic) mic.classList.toggle("is-recording", s === "recording"); if (mini) mini.classList.toggle("is-recording", s === "recording"); if (s === "recording") this._beginLive(); // the world starts building AS you speak else this._status(""); }, // LIVE narration: each VAD-cut phrase is painted into the SAME scroll as it's spoken. onSegment: (wavB64) => this._onLiveSegment(wavB64), onTranscript: (text, error) => { const clean = (text || "").trim(); if (clean) this.startWorld(clean); else { const denied = error === "mic_denied" || error === "unsupported"; this._status(t(denied ? COPY.micDenied : COPY.noSpeech, this.lang), 3600); if (denied) this._toggleComposer(true); } }, }); }, _mic() { if (LL.recorder && LL.recorder.supported && typeof LL.recorder.toggle === "function") { LL.recorder.toggle(); } else { this._toggleComposer(true); this._status(t(COPY.micDenied, this.lang), 3000); } }, _toggleComposer(force) { const c = document.getElementById("composer"); if (!c) return; const show = force === true ? true : c.hidden; c.hidden = !show; if (show) { const ta = document.getElementById("story-text"); if (ta) ta.focus(); } }, // ---- the experience: stream a world ---- async startWorld(text) { if (this.running) return; const story = (text || "").trim(); if (!story) { this._status(t(COPY.empty, this.lang), 3000, "warn"); this._toggleComposer(true); return; } if (LL.recorder && LL.recorder.cancel) LL.recorder.cancel(); this.running = true; this._scenes.clear(); this._count = 0; this._realStarted = false; // the ambient keeps flowing until the first real section if (!this._liveSession) this._liveSession = randomHex(); this._liveOn = true; // typed stories now use the SAME live continuous pipeline as voice this._enterWorld(); this._badge("✦ showcase · warming up your world…"); this._warmCycle(true); // Split the story into phrases and paint each through /scroll_live, CONTINUING the last — identical // Director/painter/continuity to the spoken path (no separate, divergent /scroll codepath). const phrases = story.split(/(?<=[.!?…])\s+/).map((s) => s.trim()).filter(Boolean); try { for (const p of (phrases.length ? phrases : [story])) { if (!this._liveOn) break; await LL.api.streamScrollLive(this._liveSession, "", this.lang, (ev) => this._onEvent(ev), p); } } catch (err) { console.error("[controller] typed live failed", err); } finally { this.running = false; this._warmCycle(false); this._orchAllOff(); this._status(""); // leave _liveOn so the user can keep going (speak more / type more) in the SAME world } }, // ---- LIVE narration: build the world AS the user speaks ---- /** Enter the world the instant the mic opens and warm the models, so the first * spoken phrase paints with minimal delay. The ambient gap-filler keeps the screen * full until the user's strips arrive. One session per mic session (continuity). */ _beginLive() { if (this._liveOn) { this._status(t(COPY.speaking, this.lang), 0, "work"); return; } this._liveOn = true; this.running = true; if (!this._liveSession) this._liveSession = randomHex(); this._phraseQ = this._phraseQ || []; this._enterWorld(); // honest cover: the showcase flows during warm-up but is clearly LABELLED, and it is // removed the instant the user's own first strip is painted (see _tryAdd). this._badge("✦ showcase · warming up your world…"); this._status(t(COPY.speaking, this.lang), 0, "work"); // models were already pre-warmed during the intro (_prewarm); if that was skipped, // fire one warm now as a fallback so the first phrase still loads them. if (!this._prewarmed) { try { LL.api.streamScrollLive(this._liveSession, "", this.lang, () => {}); } catch (_) {} } }, /** A freshly spoken phrase (base64 WAV) — queue it; the drainer paints phrases in * spoken order, each one CONTINUING the same panorama. */ _onLiveSegment(wavB64) { if (!this._liveOn || !wavB64) return; if (!this._liveSession) this._liveSession = randomHex(); (this._phraseQ = this._phraseQ || []).push(wavB64); this._drainLive(); }, async _drainLive() { if (this._draining) return; this._draining = true; try { // Loop on the QUEUE, not on _liveOn: if the session ends mid-await the queued phrases must still be // serviced/cleared, otherwise they strand and the drain never resumes. Drop (don't paint) once the // session is gone. while (this._phraseQ && this._phraseQ.length) { // While a keepsake is rendering, the live painter and postprocess share the single GPU slot, so // submitting a phrase now would queue it behind the ~20s render and trip the 25s phrase watchdog // (the words would be silently dropped). BUFFER instead: leave phrases in _phraseQ and drain them // the instant the save finishes (_saveWorld's finish() re-calls _drainLive) -> nothing is lost. if (this._saving) break; // BASELINE drain: paint EVERY spoken phrase as its own strip, in spoken order (the coalescing // experiment merged phrases into one strip and visibly "skipped" what the user said -> reverted). const wav = this._phraseQ.shift(); if (!this._liveOn) continue; // session ended -> drain the rest without painting // instant "is it listening?" feedback: the moment a spoken phrase enters the painter, say so — // the server transcript/section events arrive ~1-2s later, so without this there is dead air. this._status(t(COPY.transcribing, this.lang), 0, "work"); try { await LL.api.streamScrollLive(this._liveSession, wav, this.lang, (ev) => this._onEvent(ev)); } catch (err) { console.error("[controller] live phrase failed", err); } } } finally { this._draining = false; if (!this._phraseQ || !this._phraseQ.length) this._orchAllOff(); } }, // ---- DIRECTOR'S CUT keepsake: save the finished world as a named fly-through MP4 (post-process; the live // world keeps running). Captures _liveSession WITHOUT resetting it, so you can save AND keep speaking. ---- _saveWorld() { if (this._saving) return; // a keepsake is already rendering -> ignore repeat clicks (no double quota burn) const session = this._liveSession; const es = this.lang === "es"; if (!session) { this._status(es ? "Habla primero para crear un mundo" : "Speak first to create a world", 2800); return; } this._saving = true; // _drainLive buffers spoken phrases while this is set (they paint after the render) const token = (this._saveToken = (this._saveToken || 0) + 1); // invalidated by reset() / a newer save const btn = document.getElementById("save-btn"); if (btn) btn.disabled = true; // The single robust clear point: streamPostprocess ALWAYS resolves exactly once (even on a silent // socket close, thanks to its watchdog), so .finally() reliably re-enables Save and drains the phrases // buffered during the render. Token-gated so a stale save that resolves late can't clobber a new world. const finish = () => { if (this._saveToken !== token) return; this._saving = false; if (btn) btn.disabled = false; this._drainLive(); }; this._status(es ? "Creando tu recuerdo…" : "Crafting your keepsake…", 0); LL.api.streamPostprocess(session, this.lang, (ev) => { if (this._saveToken !== token) return; // drop events from a save the user superseded (no pop over a new world) switch (ev && ev.stage) { case "stitched": this._status(es ? "Uniendo tu mundo…" : "Stitching your world…", 0); break; // do NOT write the seer title onto the live #world-title — it shows only inside the keepsake overlay, // so an abandoned/failed save never leaves a stale title on the live world chrome. case "titled": this._status(es ? "Nombrando y filmando…" : "Naming & filming…", 0); break; case "rendering": this._status((es ? "Renderizando… " : "Rendering… ") + Math.round((100 * (ev.frame || 0)) / (ev.total || 1)) + "%", 0); break; case "encoding": this._status(es ? "Codificando la película…" : "Encoding the film…", 0); break; case "ready": this._showKeepsake(ev); break; case "render_timeout": this._status(es ? "El mundo es muy largo para filmar — graba uno más corto" : "That world is too long to film — try a shorter one", 4600); break; case "quota_exceeded": this._status(es ? "Se acabó la GPU de hoy — inténtalo más tarde" : "Out of GPU for today — try later", 4200); break; case "postprocess_error": case "error": this._status(es ? "No se pudo guardar (intenta de nuevo)" : "Couldn't save (try again)", 3400); break; default: break; // seer_skipped etc. -> the MP4 still renders with the fallback title } }).finally(finish); }, /** Show the keepsake overlay with the named fly-through