// frontend/js/api.js — Lightloom "La Sala" · backend transport (LL.api) // // The ONLY sanctioned way to reach the GPU pipeline is @gradio/client. Raw // fetch() against /api/recital BREAKS ZeroGPU quota attribution (the client // injects the session/quota auth headers fetch cannot), so we never do that // here. We connect once, submit "/recital", and fan each streamed message out // to onEvent. See app.py @app.api(name="recital", ...) for the validated shape. // // Public surface (per the shared contract): // LL.api.connect() -> Promise (memoized) // LL.api.streamRecital(text, lang, onEvent) -> Promise<{session,done}> // LL.api.cancel() -> void (aborts the live job) // // No frameworks, no build step — vanilla ES module. // PINNED to the exact version currently resolving (2.2.2) and the real dist/index.js entry. The old // unpinned @latest import meant a future @gradio/client publish could 404/restructure and fail to // parse at load time -> window.LL.api never assigned -> the world is dead-on-stage with NO error. // Re-pin deliberately when the Space's gradio SDK is bumped (keep client major in step with gradio). import { Client } from "https://cdn.jsdelivr.net/npm/@gradio/client@2.2.2/dist/index.js"; const LL = (window.LL = window.LL || {}); // msg.data may arrive as the event object itself OR a 1-element array [event] // (Gradio wraps single outputs either way depending on version). Normalize so // the controller always receives a plain event object. const asEvent = (d) => (Array.isArray(d) ? d[0] : d); const api = { _clientPromise: null, // memoized Client.connect() so we connect at most once _job: null, // the live submit() job handle (has .cancel()) _cancelled: false, // guards against late events after an explicit cancel() /** * Connect (once) to the gr.Server that serves this page. Memoized: repeated * calls return the same in-flight/resolved promise. A failed connect clears * the memo so a later call can retry. * @returns {Promise} */ async connect() { if (!this._clientPromise) { // window.location.origin => same Space that served index.html. This is // what carries the ZeroGPU quota identity; do not hardcode a URL. this._clientPromise = Client.connect(window.location.origin).catch((err) => { this._clientPromise = null; // allow a retry on the next call throw err; }); } return this._clientPromise; }, // Removed obsolete one-shot endpoints (streamRecital / streamPanorama / streamWorld / streamScroll) — // all superseded by streamScrollLive (the continuous-panorama path). The typed path now feeds // /scroll_live per sentence, identical Director/painter/continuity to voice. /** * LIVE narration — the world builds AS YOU SPEAK. Submit ONE spoken phrase segment * (base64 WAV); the server transcribes it AND paints a few strips that CONTINUE this * `session`'s panorama, streaming transcript/section/depth events. Call it once per * VAD-cut phrase; continuity is keyed server-side by `session`. Short-lived per call * (it does NOT touch the long-stream _job handle), so phrases can pipeline in order. * Pass an empty `audioB64` once on mic-tap to WARM the models ahead of the first word. * @returns {Promise<{session:string, done:boolean}>} */ async streamScrollLive(session, audioB64 = "", lang = "en", onEvent, text = "") { const emit = typeof onEvent === "function" ? onEvent : () => {}; let client; try { client = await this.connect(); } catch (err) { emit({ stage: "error", error: `connect_failed: ${shortErr(err)}` }); return { session, done: false }; } let timedOut = false; try { const job = client.submit("/scroll_live", { session: session || "", text: text || "", audio_b64: audioB64 || "", lang: lang || "en", }); // IDLE WATCHDOG: a `for await` over the job NEVER resolves if the server closes the socket cleanly // mid-job without the iterator signalling done -> the live drain wedges forever on one scene. If no // message arrives for 25 s, cancel the job so the loop ends and the drain can continue. let watchdog = null; const arm = () => { if (watchdog) clearTimeout(watchdog); watchdog = setTimeout(() => { timedOut = true; try { job.cancel(); } catch (_) {} }, 25000); }; arm(); try { for await (const msg of job) { arm(); if (!msg || msg.type !== "data") continue; const ev = asEvent(msg.data); if (!ev || typeof ev !== "object") continue; if (ev.session) session = ev.session; try { emit(ev); } catch (e) { console.error("[LL.api] live onEvent threw", e); } } } finally { if (watchdog) clearTimeout(watchdog); } } catch (err) { if (!timedOut) emit({ stage: "error", error: `stream_failed: ${shortErr(err)}` }); } if (timedOut) emit({ stage: "section_error", error: "phrase_timeout" }); // surfaced as a transient status, not a silent stall return { session, done: true }; }, /** * DIRECTOR'S CUT keepsake. Over the FINISHED session panorama (already on disk under `session`), run the * post-process: the MiniCPM-V-4.6 Art Director names it from the pixels, then a depth-parallax fly-through * is rendered to world.mp4. Streams {stitched,titled,rendering,encoding,ready} (ready carries .video + the * .title). Short-lived per call; does NOT touch the live _job handle. Independent of the live painter. * @returns {Promise<{session:string, done:boolean}>} */ async streamPostprocess(session, lang = "en", onEvent) { const emit = typeof onEvent === "function" ? onEvent : () => {}; let client; try { client = await this.connect(); } catch (err) { emit({ stage: "postprocess_error", error: `connect_failed: ${shortErr(err)}` }); return { session, done: false }; } let timedOut = false; let sawTerminal = false; const isTerminal = (s) => s === "ready" || s === "postprocess_error" || s === "quota_exceeded" || s === "render_timeout" || s === "error"; try { const job = client.submit("/postprocess", { session: session || "", lang: lang || "en" }); // IDLE WATCHDOG (mirrors streamScrollLive): a clean mid-stream socket close would otherwise leave the // keepsake stuck forever on the sticky "Crafting…" status. 45s — renders have longer legitimate gaps // (cold model load, a long encode) than a live phrase, so the live 25s budget would false-cancel. let watchdog = null; const arm = () => { if (watchdog) clearTimeout(watchdog); watchdog = setTimeout(() => { timedOut = true; try { job.cancel(); } catch (_) {} }, 45000); }; arm(); try { for await (const msg of job) { arm(); if (!msg || msg.type !== "data") continue; const ev = asEvent(msg.data); if (!ev || typeof ev !== "object") continue; if (ev.session) session = ev.session; if (isTerminal(ev.stage)) sawTerminal = true; try { emit(ev); } catch (e) { console.error("[LL.api] postprocess onEvent threw", e); } } } finally { if (watchdog) clearTimeout(watchdog); } } catch (err) { if (!timedOut) { emit({ stage: "postprocess_error", error: `stream_failed: ${shortErr(err)}` }); sawTerminal = true; } } // Never strand the UI on the sticky "Crafting…" status: if the stream ended (clean close or a watchdog // cancel) without a terminal event, synthesize one so the controller clears the status and toasts. if (!sawTerminal) emit({ stage: "postprocess_error", error: timedOut ? "postprocess_timeout" : "stream_ended" }); return { session, done: true }; }, /** * "Ask Your World" — MiniCPM-V-4.6 answers a free-form question about the FINISHED world from its pixels. * Short (one VLM call); resolves with { answer, error? }. Independent of live image creation. */ async ask(session, question, lang = "en") { let client; try { client = await this.connect(); } catch (err) { return { answer: "", error: `connect_failed: ${shortErr(err)}` }; } try { const job = client.submit("/ask", { session: session || "", question: question || "", lang: lang || "en" }); let res = { answer: "" }; for await (const msg of job) { if (!msg || msg.type !== "data") continue; const ev = asEvent(msg.data); if (ev && typeof ev === "object") res = ev; } return res; } catch (err) { return { answer: "", error: `stream_failed: ${shortErr(err)}` }; } }, /** * Transcribe a base64 WAV (the narrator's mic audio) via Cohere Transcribe. * Resolves with {text, error}. Used by LL.recorder; the transcript then drives * streamPanorama just like a typed recital. * * @param {string} audioB64 data:audio/wav;base64,... (or bare base64) * @param {string} lang ISO code ("en"/"es"/...); default "en" * @param {(event:object)=>void} [onEvent] optional per-stage callback * @returns {Promise<{text:string, error:?string}>} */ async transcribe(audioB64, lang = "en", onEvent) { const emit = typeof onEvent === "function" ? onEvent : () => {}; let client; try { client = await this.connect(); } catch (err) { return { text: "", error: `connect_failed: ${shortErr(err)}` }; } let text = ""; let error = null; try { const job = client.submit("/transcribe", { audio_b64: audioB64, lang: lang || "en" }); for await (const msg of job) { if (!msg || msg.type !== "data") continue; const ev = asEvent(msg.data); if (!ev || typeof ev !== "object") continue; emit(ev); if (ev.stage === "transcript") text = ev.text || ""; if (ev.stage === "error" || ev.stage === "quota_exceeded") error = ev.error || ev.stage; } } catch (err) { error = `stream_failed: ${shortErr(err)}`; } return { text, error }; }, /** Shared streaming core for the @app.api endpoints (recital + panorama). */ async _streamEndpoint(endpoint, text, lang = "en", onEvent) { const emit = typeof onEvent === "function" ? onEvent : () => {}; // Guard empty input before spending a GPU connect: surface a soft error // event the controller can render as a toast, then resolve cleanly. const poem = (text || "").trim(); if (!poem) { emit({ stage: "error", error: "empty_text" }); return { session: null, done: false, cancelled: false }; } // A fresh stream supersedes any previous one. this.cancel(); this._cancelled = false; let client; try { client = await this.connect(); } catch (err) { // Connection failure is terminal for this attempt — report and stop. emit({ stage: "error", error: `connect_failed: ${shortErr(err)}` }); return { session: null, done: false, cancelled: false }; } let session = null; let done = false; let job = null; try { // Payload keys MUST be text+lang (the @app.api signatures take text, lang). job = client.submit(endpoint, { text: poem, lang: lang || "en" }); this._job = job; // The job is an async iterable of status/data messages. We only act on // "data" messages; "status" carries queue/progress info we ignore here. for await (const msg of job) { if (this._cancelled) break; if (!msg || msg.type !== "data") continue; const event = asEvent(msg.data); if (!event || typeof event !== "object") continue; if (event.session) session = event.session; if (event.stage === "done") done = true; try { emit(event); } catch (handlerErr) { // A throwing handler must not tear down the whole stream — one bad // beat render shouldn't end the film. Log and keep streaming. console.error("[LL.api] onEvent handler threw", handlerErr); } } } catch (err) { if (!this._cancelled) { emit({ stage: "error", error: `stream_failed: ${shortErr(err)}` }); } } finally { // Only clear the handle if a newer submit() hasn't already replaced it. if (this._job === job) this._job = null; } return { session, done, cancelled: this._cancelled }; }, /** * Abort the in-flight recital job (if any). Idempotent and safe to call when * nothing is running. Marks the stream cancelled so any already-queued events * are dropped by the loop guard above. */ cancel() { this._cancelled = true; const job = this._job; this._job = null; if (job && typeof job.cancel === "function") { try { job.cancel(); } catch (err) { console.warn("[LL.api] job.cancel() failed", err); } } }, }; // Tiny helper kept local (no extra globals). function shortErr(err) { const m = err && (err.message || err.toString()); return String(m || "unknown").slice(0, 160); } LL.api = api; export default api;