Spaces:
Configuration error
Configuration error
| import { env, pipeline } from "https://cdn.jsdelivr.net/npm/@huggingface/transformers@4.2.0"; | |
| env.allowLocalModels = false; | |
| env.useBrowserCache = true; | |
| installFetchTelemetry("tts"); | |
| let synthesizer = null; | |
| let modelId = ""; | |
| let cancelEpoch = 0; | |
| let queue = Promise.resolve(); | |
| const voiceEmbeddings = new Map(); | |
| const voicePromises = new Map(); | |
| const VOICES = ["F1", "F2", "M1", "M2"]; | |
| function installFetchTelemetry(scope) { | |
| const originalFetch = globalThis.fetch?.bind(globalThis); | |
| if (!originalFetch || globalThis.__browserSpeakFetchTelemetryInstalled) return; | |
| globalThis.__browserSpeakFetchTelemetryInstalled = true; | |
| globalThis.fetch = async (input, init) => { | |
| const startedAt = performance.now(); | |
| const url = fetchUrl(input); | |
| const method = String(init?.method || input?.method || "GET").toUpperCase(); | |
| try { | |
| const response = await originalFetch(input, init); | |
| self.postMessage({ | |
| type: "network", | |
| scope, | |
| method, | |
| url, | |
| responseUrl: response.url || url, | |
| status: response.status, | |
| ok: response.ok, | |
| durationMs: performance.now() - startedAt, | |
| }); | |
| return response; | |
| } catch (error) { | |
| self.postMessage({ | |
| type: "network", | |
| scope, | |
| method, | |
| url, | |
| status: null, | |
| ok: false, | |
| durationMs: performance.now() - startedAt, | |
| error: error.message ?? String(error), | |
| }); | |
| throw error; | |
| } | |
| }; | |
| } | |
| function fetchUrl(input) { | |
| if (typeof input === "string") return input; | |
| if (input instanceof URL) return input.href; | |
| return input?.url ?? ""; | |
| } | |
| self.onmessage = (event) => { | |
| const message = event.data; | |
| if (message.type === "load") { | |
| queue = queue.then(() => load(message)).catch(postError); | |
| } else if (message.type === "preload-voice") { | |
| queue = queue.then(() => preloadVoice(message)).catch(postError); | |
| } else if (message.type === "speak") { | |
| const item = { ...message, cancelEpoch }; | |
| queue = queue.then(() => speak(item)).catch(postError); | |
| } else if (message.type === "synthesize") { | |
| const item = { ...message, cancelEpoch }; | |
| queue = queue.then(() => synthesize(item)).catch(postError); | |
| } else if (message.type === "cancel") { | |
| cancelEpoch += 1; | |
| } | |
| }; | |
| async function load({ model, device, voice = "F2", warmup = true }) { | |
| modelId = model; | |
| voiceEmbeddings.clear(); | |
| voicePromises.clear(); | |
| self.postMessage({ | |
| type: "status", | |
| message: "Loading", | |
| mode: "warn", | |
| detail: "Loading Supertonic pipeline.", | |
| }); | |
| synthesizer = await pipeline("text-to-speech", model, { | |
| device, | |
| dtype: "fp32", | |
| progress_callback: reportProgress("TTS"), | |
| }); | |
| self.postMessage({ | |
| type: "status", | |
| message: `Voice ${voice}`, | |
| mode: "warn", | |
| detail: `Supertonic pipeline loaded; loading ${voice} voice embedding.`, | |
| }); | |
| const speaker = await ensureVoice(model, voice); | |
| if (warmup) { | |
| self.postMessage({ | |
| type: "status", | |
| message: "Warming", | |
| mode: "warn", | |
| detail: "Warming Supertonic with the selected voice.", | |
| }); | |
| await synthesizer("Ready.", { | |
| speaker_embeddings: speaker, | |
| num_inference_steps: 2, | |
| speed: 1.08, | |
| }); | |
| } | |
| self.postMessage({ type: "status", message: "Ready", mode: "ready", detail: "Supertonic is ready." }); | |
| self.postMessage({ type: "ready" }); | |
| preloadVoices(model, VOICES.filter((candidate) => candidate !== voice)); | |
| } | |
| async function preloadVoice({ voice = "F2", requestId = null }) { | |
| try { | |
| await ensureVoice(modelId, voice); | |
| self.postMessage({ type: "voice-ready", voice, requestId }); | |
| } catch (error) { | |
| self.postMessage({ | |
| type: "voice-error", | |
| voice, | |
| requestId, | |
| message: error.message ?? String(error), | |
| }); | |
| } | |
| } | |
| function preloadVoices(model, voices) { | |
| for (const voice of voices) { | |
| ensureVoice(model, voice).catch((error) => { | |
| self.postMessage({ | |
| type: "voice-error", | |
| voice, | |
| requestId: null, | |
| message: error.message ?? String(error), | |
| }); | |
| }); | |
| } | |
| } | |
| function reportProgress(label) { | |
| return (progress) => { | |
| if (progress.status === "progress") { | |
| const pct = Number.isFinite(progress.progress) ? ` ${progress.progress.toFixed(0)}%` : ""; | |
| self.postMessage({ type: "status", message: `${label}${pct}`, mode: "warn" }); | |
| } | |
| }; | |
| } | |
| async function speak({ text, voice, steps, speed, turnId, sequence, enqueuedAt, cancelEpoch: itemEpoch }) { | |
| if (!synthesizer || !text.trim()) return; | |
| if (itemEpoch !== cancelEpoch) return; | |
| self.postMessage({ type: "status", message: "Synthesizing", mode: "active" }); | |
| const speaker = await ensureVoice(modelId, voice); | |
| if (itemEpoch !== cancelEpoch) return; | |
| const startedAt = performance.now(); | |
| const output = await synthesizer(text.trim(), { | |
| speaker_embeddings: speaker, | |
| num_inference_steps: steps, | |
| speed, | |
| }); | |
| if (itemEpoch !== cancelEpoch) return; | |
| self.postMessage( | |
| { | |
| type: "audio", | |
| turnId, | |
| sequence, | |
| enqueuedAt, | |
| text, | |
| audio: output.audio, | |
| sampleRate: output.sampling_rate, | |
| synthesisMs: performance.now() - startedAt, | |
| }, | |
| [output.audio.buffer], | |
| ); | |
| self.postMessage({ type: "done", turnId, sequence }); | |
| } | |
| async function synthesize({ text, voice, steps, speed, requestId, cancelEpoch: itemEpoch }) { | |
| if (!synthesizer || !text.trim()) return; | |
| if (itemEpoch !== cancelEpoch) return; | |
| self.postMessage({ type: "status", message: "Loopback", mode: "active" }); | |
| const speaker = await ensureVoice(modelId, voice); | |
| if (itemEpoch !== cancelEpoch) return; | |
| const startedAt = performance.now(); | |
| const output = await synthesizer(text.trim(), { | |
| speaker_embeddings: speaker, | |
| num_inference_steps: steps, | |
| speed, | |
| }); | |
| if (itemEpoch !== cancelEpoch) return; | |
| self.postMessage( | |
| { | |
| type: "synthetic-audio", | |
| requestId, | |
| audio: output.audio, | |
| sampleRate: output.sampling_rate, | |
| synthesisMs: performance.now() - startedAt, | |
| }, | |
| [output.audio.buffer], | |
| ); | |
| self.postMessage({ type: "done", requestId }); | |
| } | |
| function voiceUrl(model, voice) { | |
| return `https://huggingface.co/${model}/resolve/main/voices/${voice}.bin`; | |
| } | |
| async function loadVoice(model, voice) { | |
| const response = await fetch(voiceUrl(model, voice), { cache: "no-store" }); | |
| if (!response.ok) { | |
| throw new Error(`Failed to load ${voice} voice embedding: ${response.status}`); | |
| } | |
| voiceEmbeddings.set(voice, new Float32Array(await response.arrayBuffer())); | |
| return voiceEmbeddings.get(voice); | |
| } | |
| async function ensureVoice(model, voice) { | |
| if (!model) throw new Error("TTS model has not loaded."); | |
| const embedding = voiceEmbeddings.get(voice); | |
| if (embedding) return embedding; | |
| if (!voicePromises.has(voice)) { | |
| voicePromises.set( | |
| voice, | |
| loadVoice(model, voice).finally(() => { | |
| voicePromises.delete(voice); | |
| }), | |
| ); | |
| } | |
| return voicePromises.get(voice); | |
| } | |
| function postError(error) { | |
| self.postMessage({ type: "error", message: error.message ?? String(error) }); | |
| } | |