Spaces:
Running on Zero
Running on Zero
| // frontend/js/api.js — Lightloom "La Sala" · backend transport (LL.api) | |
| // | |
| // The ONLY sanctioned way to reach the GPU pipeline is @gradio/client. Raw | |
| // fetch() against /api/recital BREAKS ZeroGPU quota attribution (the client | |
| // injects the session/quota auth headers fetch cannot), so we never do that | |
| // here. We connect once, submit "/recital", and fan each streamed message out | |
| // to onEvent. See app.py @app.api(name="recital", ...) for the validated shape. | |
| // | |
| // Public surface (per the shared contract): | |
| // LL.api.connect() -> Promise<Client> (memoized) | |
| // LL.api.streamRecital(text, lang, onEvent) -> Promise<{session,done}> | |
| // LL.api.cancel() -> void (aborts the live job) | |
| // | |
| // No frameworks, no build step — vanilla ES module. | |
| // PINNED to the exact version currently resolving (2.2.2) and the real dist/index.js entry. The old | |
| // unpinned @latest import meant a future @gradio/client publish could 404/restructure and fail to | |
| // parse at load time -> window.LL.api never assigned -> the world is dead-on-stage with NO error. | |
| // Re-pin deliberately when the Space's gradio SDK is bumped (keep client major in step with gradio). | |
| import { Client } from "https://cdn.jsdelivr.net/npm/@gradio/client@2.2.2/dist/index.js"; | |
| const LL = (window.LL = window.LL || {}); | |
| // msg.data may arrive as the event object itself OR a 1-element array [event] | |
| // (Gradio wraps single outputs either way depending on version). Normalize so | |
| // the controller always receives a plain event object. | |
| const asEvent = (d) => (Array.isArray(d) ? d[0] : d); | |
| const api = { | |
| _clientPromise: null, // memoized Client.connect() so we connect at most once | |
| _job: null, // the live submit() job handle (has .cancel()) | |
| _cancelled: false, // guards against late events after an explicit cancel() | |
| /** | |
| * Connect (once) to the gr.Server that serves this page. Memoized: repeated | |
| * calls return the same in-flight/resolved promise. A failed connect clears | |
| * the memo so a later call can retry. | |
| * @returns {Promise<import("@gradio/client").Client>} | |
| */ | |
| async connect() { | |
| if (!this._clientPromise) { | |
| // window.location.origin => same Space that served index.html. This is | |
| // what carries the ZeroGPU quota identity; do not hardcode a URL. | |
| this._clientPromise = Client.connect(window.location.origin).catch((err) => { | |
| this._clientPromise = null; // allow a retry on the next call | |
| throw err; | |
| }); | |
| } | |
| return this._clientPromise; | |
| }, | |
| // Removed obsolete one-shot endpoints (streamRecital / streamPanorama / streamWorld / streamScroll) — | |
| // all superseded by streamScrollLive (the continuous-panorama path). The typed path now feeds | |
| // /scroll_live per sentence, identical Director/painter/continuity to voice. | |
| /** | |
| * LIVE narration — the world builds AS YOU SPEAK. Submit ONE spoken phrase segment | |
| * (base64 WAV); the server transcribes it AND paints a few strips that CONTINUE this | |
| * `session`'s panorama, streaming transcript/section/depth events. Call it once per | |
| * VAD-cut phrase; continuity is keyed server-side by `session`. Short-lived per call | |
| * (it does NOT touch the long-stream _job handle), so phrases can pipeline in order. | |
| * Pass an empty `audioB64` once on mic-tap to WARM the models ahead of the first word. | |
| * @returns {Promise<{session:string, done:boolean}>} | |
| */ | |
| async streamScrollLive(session, audioB64 = "", lang = "en", onEvent, text = "") { | |
| const emit = typeof onEvent === "function" ? onEvent : () => {}; | |
| let client; | |
| try { | |
| client = await this.connect(); | |
| } catch (err) { | |
| emit({ stage: "error", error: `connect_failed: ${shortErr(err)}` }); | |
| return { session, done: false }; | |
| } | |
| let timedOut = false; | |
| try { | |
| const job = client.submit("/scroll_live", { | |
| session: session || "", | |
| text: text || "", | |
| audio_b64: audioB64 || "", | |
| lang: lang || "en", | |
| }); | |
| // IDLE WATCHDOG: a `for await` over the job NEVER resolves if the server closes the socket cleanly | |
| // mid-job without the iterator signalling done -> the live drain wedges forever on one scene. If no | |
| // message arrives for 25 s, cancel the job so the loop ends and the drain can continue. | |
| let watchdog = null; | |
| const arm = () => { if (watchdog) clearTimeout(watchdog); watchdog = setTimeout(() => { timedOut = true; try { job.cancel(); } catch (_) {} }, 25000); }; | |
| arm(); | |
| try { | |
| for await (const msg of job) { | |
| arm(); | |
| if (!msg || msg.type !== "data") continue; | |
| const ev = asEvent(msg.data); | |
| if (!ev || typeof ev !== "object") continue; | |
| if (ev.session) session = ev.session; | |
| try { emit(ev); } catch (e) { console.error("[LL.api] live onEvent threw", e); } | |
| } | |
| } finally { | |
| if (watchdog) clearTimeout(watchdog); | |
| } | |
| } catch (err) { | |
| if (!timedOut) emit({ stage: "error", error: `stream_failed: ${shortErr(err)}` }); | |
| } | |
| if (timedOut) emit({ stage: "section_error", error: "phrase_timeout" }); // surfaced as a transient status, not a silent stall | |
| return { session, done: true }; | |
| }, | |
| /** | |
| * DIRECTOR'S CUT keepsake. Over the FINISHED session panorama (already on disk under `session`), run the | |
| * post-process: the MiniCPM-V-4.6 Art Director names it from the pixels, then a depth-parallax fly-through | |
| * is rendered to world.mp4. Streams {stitched,titled,rendering,encoding,ready} (ready carries .video + the | |
| * .title). Short-lived per call; does NOT touch the live _job handle. Independent of the live painter. | |
| * @returns {Promise<{session:string, done:boolean}>} | |
| */ | |
| async streamPostprocess(session, lang = "en", onEvent) { | |
| const emit = typeof onEvent === "function" ? onEvent : () => {}; | |
| let client; | |
| try { | |
| client = await this.connect(); | |
| } catch (err) { | |
| emit({ stage: "postprocess_error", error: `connect_failed: ${shortErr(err)}` }); | |
| return { session, done: false }; | |
| } | |
| let timedOut = false; | |
| let sawTerminal = false; | |
| const isTerminal = (s) => s === "ready" || s === "postprocess_error" || s === "quota_exceeded" || s === "render_timeout" || s === "error"; | |
| try { | |
| const job = client.submit("/postprocess", { session: session || "", lang: lang || "en" }); | |
| // IDLE WATCHDOG (mirrors streamScrollLive): a clean mid-stream socket close would otherwise leave the | |
| // keepsake stuck forever on the sticky "Crafting…" status. 45s — renders have longer legitimate gaps | |
| // (cold model load, a long encode) than a live phrase, so the live 25s budget would false-cancel. | |
| let watchdog = null; | |
| const arm = () => { if (watchdog) clearTimeout(watchdog); watchdog = setTimeout(() => { timedOut = true; try { job.cancel(); } catch (_) {} }, 45000); }; | |
| arm(); | |
| try { | |
| for await (const msg of job) { | |
| arm(); | |
| if (!msg || msg.type !== "data") continue; | |
| const ev = asEvent(msg.data); | |
| if (!ev || typeof ev !== "object") continue; | |
| if (ev.session) session = ev.session; | |
| if (isTerminal(ev.stage)) sawTerminal = true; | |
| try { emit(ev); } catch (e) { console.error("[LL.api] postprocess onEvent threw", e); } | |
| } | |
| } finally { | |
| if (watchdog) clearTimeout(watchdog); | |
| } | |
| } catch (err) { | |
| if (!timedOut) { emit({ stage: "postprocess_error", error: `stream_failed: ${shortErr(err)}` }); sawTerminal = true; } | |
| } | |
| // Never strand the UI on the sticky "Crafting…" status: if the stream ended (clean close or a watchdog | |
| // cancel) without a terminal event, synthesize one so the controller clears the status and toasts. | |
| if (!sawTerminal) emit({ stage: "postprocess_error", error: timedOut ? "postprocess_timeout" : "stream_ended" }); | |
| return { session, done: true }; | |
| }, | |
| /** | |
| * "Ask Your World" — MiniCPM-V-4.6 answers a free-form question about the FINISHED world from its pixels. | |
| * Short (one VLM call); resolves with { answer, error? }. Independent of live image creation. | |
| */ | |
| async ask(session, question, lang = "en") { | |
| let client; | |
| try { client = await this.connect(); } catch (err) { return { answer: "", error: `connect_failed: ${shortErr(err)}` }; } | |
| try { | |
| const job = client.submit("/ask", { session: session || "", question: question || "", lang: lang || "en" }); | |
| let res = { answer: "" }; | |
| for await (const msg of job) { | |
| if (!msg || msg.type !== "data") continue; | |
| const ev = asEvent(msg.data); | |
| if (ev && typeof ev === "object") res = ev; | |
| } | |
| return res; | |
| } catch (err) { return { answer: "", error: `stream_failed: ${shortErr(err)}` }; } | |
| }, | |
| /** | |
| * Transcribe a base64 WAV (the narrator's mic audio) via Cohere Transcribe. | |
| * Resolves with {text, error}. Used by LL.recorder; the transcript then drives | |
| * streamPanorama just like a typed recital. | |
| * | |
| * @param {string} audioB64 data:audio/wav;base64,... (or bare base64) | |
| * @param {string} lang ISO code ("en"/"es"/...); default "en" | |
| * @param {(event:object)=>void} [onEvent] optional per-stage callback | |
| * @returns {Promise<{text:string, error:?string}>} | |
| */ | |
| async transcribe(audioB64, lang = "en", onEvent) { | |
| const emit = typeof onEvent === "function" ? onEvent : () => {}; | |
| let client; | |
| try { | |
| client = await this.connect(); | |
| } catch (err) { | |
| return { text: "", error: `connect_failed: ${shortErr(err)}` }; | |
| } | |
| let text = ""; | |
| let error = null; | |
| try { | |
| const job = client.submit("/transcribe", { audio_b64: audioB64, lang: lang || "en" }); | |
| for await (const msg of job) { | |
| if (!msg || msg.type !== "data") continue; | |
| const ev = asEvent(msg.data); | |
| if (!ev || typeof ev !== "object") continue; | |
| emit(ev); | |
| if (ev.stage === "transcript") text = ev.text || ""; | |
| if (ev.stage === "error" || ev.stage === "quota_exceeded") error = ev.error || ev.stage; | |
| } | |
| } catch (err) { | |
| error = `stream_failed: ${shortErr(err)}`; | |
| } | |
| return { text, error }; | |
| }, | |
| /** Shared streaming core for the @app.api endpoints (recital + panorama). */ | |
| async _streamEndpoint(endpoint, text, lang = "en", onEvent) { | |
| const emit = typeof onEvent === "function" ? onEvent : () => {}; | |
| // Guard empty input before spending a GPU connect: surface a soft error | |
| // event the controller can render as a toast, then resolve cleanly. | |
| const poem = (text || "").trim(); | |
| if (!poem) { | |
| emit({ stage: "error", error: "empty_text" }); | |
| return { session: null, done: false, cancelled: false }; | |
| } | |
| // A fresh stream supersedes any previous one. | |
| this.cancel(); | |
| this._cancelled = false; | |
| let client; | |
| try { | |
| client = await this.connect(); | |
| } catch (err) { | |
| // Connection failure is terminal for this attempt — report and stop. | |
| emit({ stage: "error", error: `connect_failed: ${shortErr(err)}` }); | |
| return { session: null, done: false, cancelled: false }; | |
| } | |
| let session = null; | |
| let done = false; | |
| let job = null; | |
| try { | |
| // Payload keys MUST be text+lang (the @app.api signatures take text, lang). | |
| job = client.submit(endpoint, { text: poem, lang: lang || "en" }); | |
| this._job = job; | |
| // The job is an async iterable of status/data messages. We only act on | |
| // "data" messages; "status" carries queue/progress info we ignore here. | |
| for await (const msg of job) { | |
| if (this._cancelled) break; | |
| if (!msg || msg.type !== "data") continue; | |
| const event = asEvent(msg.data); | |
| if (!event || typeof event !== "object") continue; | |
| if (event.session) session = event.session; | |
| if (event.stage === "done") done = true; | |
| try { | |
| emit(event); | |
| } catch (handlerErr) { | |
| // A throwing handler must not tear down the whole stream — one bad | |
| // beat render shouldn't end the film. Log and keep streaming. | |
| console.error("[LL.api] onEvent handler threw", handlerErr); | |
| } | |
| } | |
| } catch (err) { | |
| if (!this._cancelled) { | |
| emit({ stage: "error", error: `stream_failed: ${shortErr(err)}` }); | |
| } | |
| } finally { | |
| // Only clear the handle if a newer submit() hasn't already replaced it. | |
| if (this._job === job) this._job = null; | |
| } | |
| return { session, done, cancelled: this._cancelled }; | |
| }, | |
| /** | |
| * Abort the in-flight recital job (if any). Idempotent and safe to call when | |
| * nothing is running. Marks the stream cancelled so any already-queued events | |
| * are dropped by the loop guard above. | |
| */ | |
| cancel() { | |
| this._cancelled = true; | |
| const job = this._job; | |
| this._job = null; | |
| if (job && typeof job.cancel === "function") { | |
| try { | |
| job.cancel(); | |
| } catch (err) { | |
| console.warn("[LL.api] job.cancel() failed", err); | |
| } | |
| } | |
| }, | |
| }; | |
| // Tiny helper kept local (no extra globals). | |
| function shortErr(err) { | |
| const m = err && (err.message || err.toString()); | |
| return String(m || "unknown").slice(0, 160); | |
| } | |
| LL.api = api; | |
| export default api; | |