browser-speak / workers /tts-worker.js
Mike0021's picture
Add worker network telemetry to browser evidence
d2ae80e verified
Raw
History Blame Contribute Delete
7.08 kB
import { env, pipeline } from "https://cdn.jsdelivr.net/npm/@huggingface/transformers@4.2.0";
env.allowLocalModels = false;
env.useBrowserCache = true;
installFetchTelemetry("tts");
let synthesizer = null;
let modelId = "";
let cancelEpoch = 0;
let queue = Promise.resolve();
const voiceEmbeddings = new Map();
const voicePromises = new Map();
const VOICES = ["F1", "F2", "M1", "M2"];
function installFetchTelemetry(scope) {
const originalFetch = globalThis.fetch?.bind(globalThis);
if (!originalFetch || globalThis.__browserSpeakFetchTelemetryInstalled) return;
globalThis.__browserSpeakFetchTelemetryInstalled = true;
globalThis.fetch = async (input, init) => {
const startedAt = performance.now();
const url = fetchUrl(input);
const method = String(init?.method || input?.method || "GET").toUpperCase();
try {
const response = await originalFetch(input, init);
self.postMessage({
type: "network",
scope,
method,
url,
responseUrl: response.url || url,
status: response.status,
ok: response.ok,
durationMs: performance.now() - startedAt,
});
return response;
} catch (error) {
self.postMessage({
type: "network",
scope,
method,
url,
status: null,
ok: false,
durationMs: performance.now() - startedAt,
error: error.message ?? String(error),
});
throw error;
}
};
}
function fetchUrl(input) {
if (typeof input === "string") return input;
if (input instanceof URL) return input.href;
return input?.url ?? "";
}
self.onmessage = (event) => {
const message = event.data;
if (message.type === "load") {
queue = queue.then(() => load(message)).catch(postError);
} else if (message.type === "preload-voice") {
queue = queue.then(() => preloadVoice(message)).catch(postError);
} else if (message.type === "speak") {
const item = { ...message, cancelEpoch };
queue = queue.then(() => speak(item)).catch(postError);
} else if (message.type === "synthesize") {
const item = { ...message, cancelEpoch };
queue = queue.then(() => synthesize(item)).catch(postError);
} else if (message.type === "cancel") {
cancelEpoch += 1;
}
};
async function load({ model, device, voice = "F2", warmup = true }) {
modelId = model;
voiceEmbeddings.clear();
voicePromises.clear();
self.postMessage({
type: "status",
message: "Loading",
mode: "warn",
detail: "Loading Supertonic pipeline.",
});
synthesizer = await pipeline("text-to-speech", model, {
device,
dtype: "fp32",
progress_callback: reportProgress("TTS"),
});
self.postMessage({
type: "status",
message: `Voice ${voice}`,
mode: "warn",
detail: `Supertonic pipeline loaded; loading ${voice} voice embedding.`,
});
const speaker = await ensureVoice(model, voice);
if (warmup) {
self.postMessage({
type: "status",
message: "Warming",
mode: "warn",
detail: "Warming Supertonic with the selected voice.",
});
await synthesizer("Ready.", {
speaker_embeddings: speaker,
num_inference_steps: 2,
speed: 1.08,
});
}
self.postMessage({ type: "status", message: "Ready", mode: "ready", detail: "Supertonic is ready." });
self.postMessage({ type: "ready" });
preloadVoices(model, VOICES.filter((candidate) => candidate !== voice));
}
async function preloadVoice({ voice = "F2", requestId = null }) {
try {
await ensureVoice(modelId, voice);
self.postMessage({ type: "voice-ready", voice, requestId });
} catch (error) {
self.postMessage({
type: "voice-error",
voice,
requestId,
message: error.message ?? String(error),
});
}
}
function preloadVoices(model, voices) {
for (const voice of voices) {
ensureVoice(model, voice).catch((error) => {
self.postMessage({
type: "voice-error",
voice,
requestId: null,
message: error.message ?? String(error),
});
});
}
}
function reportProgress(label) {
return (progress) => {
if (progress.status === "progress") {
const pct = Number.isFinite(progress.progress) ? ` ${progress.progress.toFixed(0)}%` : "";
self.postMessage({ type: "status", message: `${label}${pct}`, mode: "warn" });
}
};
}
async function speak({ text, voice, steps, speed, turnId, sequence, enqueuedAt, cancelEpoch: itemEpoch }) {
if (!synthesizer || !text.trim()) return;
if (itemEpoch !== cancelEpoch) return;
self.postMessage({ type: "status", message: "Synthesizing", mode: "active" });
const speaker = await ensureVoice(modelId, voice);
if (itemEpoch !== cancelEpoch) return;
const startedAt = performance.now();
const output = await synthesizer(text.trim(), {
speaker_embeddings: speaker,
num_inference_steps: steps,
speed,
});
if (itemEpoch !== cancelEpoch) return;
self.postMessage(
{
type: "audio",
turnId,
sequence,
enqueuedAt,
text,
audio: output.audio,
sampleRate: output.sampling_rate,
synthesisMs: performance.now() - startedAt,
},
[output.audio.buffer],
);
self.postMessage({ type: "done", turnId, sequence });
}
async function synthesize({ text, voice, steps, speed, requestId, cancelEpoch: itemEpoch }) {
if (!synthesizer || !text.trim()) return;
if (itemEpoch !== cancelEpoch) return;
self.postMessage({ type: "status", message: "Loopback", mode: "active" });
const speaker = await ensureVoice(modelId, voice);
if (itemEpoch !== cancelEpoch) return;
const startedAt = performance.now();
const output = await synthesizer(text.trim(), {
speaker_embeddings: speaker,
num_inference_steps: steps,
speed,
});
if (itemEpoch !== cancelEpoch) return;
self.postMessage(
{
type: "synthetic-audio",
requestId,
audio: output.audio,
sampleRate: output.sampling_rate,
synthesisMs: performance.now() - startedAt,
},
[output.audio.buffer],
);
self.postMessage({ type: "done", requestId });
}
function voiceUrl(model, voice) {
return `https://huggingface.co/${model}/resolve/main/voices/${voice}.bin`;
}
async function loadVoice(model, voice) {
const response = await fetch(voiceUrl(model, voice), { cache: "no-store" });
if (!response.ok) {
throw new Error(`Failed to load ${voice} voice embedding: ${response.status}`);
}
voiceEmbeddings.set(voice, new Float32Array(await response.arrayBuffer()));
return voiceEmbeddings.get(voice);
}
async function ensureVoice(model, voice) {
if (!model) throw new Error("TTS model has not loaded.");
const embedding = voiceEmbeddings.get(voice);
if (embedding) return embedding;
if (!voicePromises.has(voice)) {
voicePromises.set(
voice,
loadVoice(model, voice).finally(() => {
voicePromises.delete(voice);
}),
);
}
return voicePromises.get(voice);
}
function postError(error) {
self.postMessage({ type: "error", message: error.message ?? String(error) });
}