Spaces:

Mike0021
/

browser-speak

Configuration error

App Files Files Community

browser-speak / workers /tts-worker.js

Mike0021's picture

Add worker network telemetry to browser evidence

d2ae80e verified about 1 month ago

History Blame Contribute Delete

7.08 kB

	import { env, pipeline } from "https://cdn.jsdelivr.net/npm/@huggingface/transformers@4.2.0";

	env.allowLocalModels = false;
	env.useBrowserCache = true;
	installFetchTelemetry("tts");

	let synthesizer = null;
	let modelId = "";
	let cancelEpoch = 0;
	let queue = Promise.resolve();
	const voiceEmbeddings = new Map();
	const voicePromises = new Map();
	const VOICES = ["F1", "F2", "M1", "M2"];

	function installFetchTelemetry(scope) {
	const originalFetch = globalThis.fetch?.bind(globalThis);
	if (!originalFetch \|\| globalThis.__browserSpeakFetchTelemetryInstalled) return;
	globalThis.__browserSpeakFetchTelemetryInstalled = true;
	globalThis.fetch = async (input, init) => {
	const startedAt = performance.now();
	const url = fetchUrl(input);
	const method = String(init?.method \|\| input?.method \|\| "GET").toUpperCase();
	try {
	const response = await originalFetch(input, init);
	self.postMessage({
	type: "network",
	scope,
	method,
	url,
	responseUrl: response.url \|\| url,
	status: response.status,
	ok: response.ok,
	durationMs: performance.now() - startedAt,
	});
	return response;
	} catch (error) {
	self.postMessage({
	type: "network",
	scope,
	method,
	url,
	status: null,
	ok: false,
	durationMs: performance.now() - startedAt,
	error: error.message ?? String(error),
	});
	throw error;
	}
	};
	}

	function fetchUrl(input) {
	if (typeof input === "string") return input;
	if (input instanceof URL) return input.href;
	return input?.url ?? "";
	}

	self.onmessage = (event) => {
	const message = event.data;
	if (message.type === "load") {
	queue = queue.then(() => load(message)).catch(postError);
	} else if (message.type === "preload-voice") {
	queue = queue.then(() => preloadVoice(message)).catch(postError);
	} else if (message.type === "speak") {
	const item = { ...message, cancelEpoch };
	queue = queue.then(() => speak(item)).catch(postError);
	} else if (message.type === "synthesize") {
	const item = { ...message, cancelEpoch };
	queue = queue.then(() => synthesize(item)).catch(postError);
	} else if (message.type === "cancel") {
	cancelEpoch += 1;
	}
	};

	async function load({ model, device, voice = "F2", warmup = true }) {
	modelId = model;
	voiceEmbeddings.clear();
	voicePromises.clear();
	self.postMessage({
	type: "status",
	message: "Loading",
	mode: "warn",
	detail: "Loading Supertonic pipeline.",
	});
	synthesizer = await pipeline("text-to-speech", model, {
	device,
	dtype: "fp32",
	progress_callback: reportProgress("TTS"),
	});
	self.postMessage({
	type: "status",
	message: `Voice ${voice}`,
	mode: "warn",
	detail: `Supertonic pipeline loaded; loading ${voice} voice embedding.`,
	});
	const speaker = await ensureVoice(model, voice);
	if (warmup) {
	self.postMessage({
	type: "status",
	message: "Warming",
	mode: "warn",
	detail: "Warming Supertonic with the selected voice.",
	});
	await synthesizer("Ready.", {
	speaker_embeddings: speaker,
	num_inference_steps: 2,
	speed: 1.08,
	});
	}
	self.postMessage({ type: "status", message: "Ready", mode: "ready", detail: "Supertonic is ready." });
	self.postMessage({ type: "ready" });
	preloadVoices(model, VOICES.filter((candidate) => candidate !== voice));
	}

	async function preloadVoice({ voice = "F2", requestId = null }) {
	try {
	await ensureVoice(modelId, voice);
	self.postMessage({ type: "voice-ready", voice, requestId });
	} catch (error) {
	self.postMessage({
	type: "voice-error",
	voice,
	requestId,
	message: error.message ?? String(error),
	});
	}
	}

	function preloadVoices(model, voices) {
	for (const voice of voices) {
	ensureVoice(model, voice).catch((error) => {
	self.postMessage({
	type: "voice-error",
	voice,
	requestId: null,
	message: error.message ?? String(error),
	});
	});
	}
	}

	function reportProgress(label) {
	return (progress) => {
	if (progress.status === "progress") {
	const pct = Number.isFinite(progress.progress) ? ` ${progress.progress.toFixed(0)}%` : "";
	self.postMessage({ type: "status", message: `${label}${pct}`, mode: "warn" });
	}
	};
	}

	async function speak({ text, voice, steps, speed, turnId, sequence, enqueuedAt, cancelEpoch: itemEpoch }) {
	if (!synthesizer \|\| !text.trim()) return;
	if (itemEpoch !== cancelEpoch) return;
	self.postMessage({ type: "status", message: "Synthesizing", mode: "active" });
	const speaker = await ensureVoice(modelId, voice);
	if (itemEpoch !== cancelEpoch) return;
	const startedAt = performance.now();
	const output = await synthesizer(text.trim(), {
	speaker_embeddings: speaker,
	num_inference_steps: steps,
	speed,
	});
	if (itemEpoch !== cancelEpoch) return;
	self.postMessage(
	{
	type: "audio",
	turnId,
	sequence,
	enqueuedAt,
	text,
	audio: output.audio,
	sampleRate: output.sampling_rate,
	synthesisMs: performance.now() - startedAt,
	},
	[output.audio.buffer],
	);
	self.postMessage({ type: "done", turnId, sequence });
	}

	async function synthesize({ text, voice, steps, speed, requestId, cancelEpoch: itemEpoch }) {
	if (!synthesizer \|\| !text.trim()) return;
	if (itemEpoch !== cancelEpoch) return;
	self.postMessage({ type: "status", message: "Loopback", mode: "active" });
	const speaker = await ensureVoice(modelId, voice);
	if (itemEpoch !== cancelEpoch) return;
	const startedAt = performance.now();
	const output = await synthesizer(text.trim(), {
	speaker_embeddings: speaker,
	num_inference_steps: steps,
	speed,
	});
	if (itemEpoch !== cancelEpoch) return;
	self.postMessage(
	{
	type: "synthetic-audio",
	requestId,
	audio: output.audio,
	sampleRate: output.sampling_rate,
	synthesisMs: performance.now() - startedAt,
	},
	[output.audio.buffer],
	);
	self.postMessage({ type: "done", requestId });
	}

	function voiceUrl(model, voice) {
	return `https://huggingface.co/${model}/resolve/main/voices/${voice}.bin`;
	}

	async function loadVoice(model, voice) {
	const response = await fetch(voiceUrl(model, voice), { cache: "no-store" });
	if (!response.ok) {
	throw new Error(`Failed to load ${voice} voice embedding: ${response.status}`);
	}
	voiceEmbeddings.set(voice, new Float32Array(await response.arrayBuffer()));
	return voiceEmbeddings.get(voice);
	}

	async function ensureVoice(model, voice) {
	if (!model) throw new Error("TTS model has not loaded.");
	const embedding = voiceEmbeddings.get(voice);
	if (embedding) return embedding;
	if (!voicePromises.has(voice)) {
	voicePromises.set(
	voice,
	loadVoice(model, voice).finally(() => {
	voicePromises.delete(voice);
	}),
	);
	}
	return voicePromises.get(voice);
	}

	function postError(error) {
	self.postMessage({ type: "error", message: error.message ?? String(error) });
	}