Spaces:

Duplicated from andito/parakeet-v3-streaming

RobinsAIWorld
/

parakeet-v3-streaming

Running

App Files Files Community

parakeet-v3-streaming / source /src /worker.js

andito's picture

andito HF Staff

Re-add progress bar functionality for model downloads

c54543a about 1 month ago

history blame contribute delete

6.81 kB

	/**
	* Web Worker for Parakeet ONNX Model Inference
	*
	* Handles model loading and transcription in a separate thread using parakeet.js
	* https://github.com/ysdede/parakeet.js
	*/

	import { fromHub } from 'parakeet.js';

	let model = null;
	let isLoading = false;

	/**
	* Load the Parakeet model using parakeet.js
	*/
	async function loadModel(modelVersion = 'parakeet-tdt-0.6b-v3', options = {}) {
	if (isLoading) {
	return { status: 'loading', message: 'Model is already loading...' };
	}

	if (model) {
	return { status: 'ready', message: 'Model already loaded' };
	}

	try {
	isLoading = true;

	// Use 'webgpu-hybrid' for WebGPU encoder + WASM decoder (best performance)
	// Use 'wasm' for full WASM execution
	const backend = options.device === 'webgpu' ? 'webgpu-hybrid' : 'wasm';

	self.postMessage({
	status: 'loading',
	message: `Loading Parakeet ${modelVersion}... (~2.5GB)`,
	});

	console.log('[Worker] Starting model load with backend:', backend);

	// Load model using parakeet.js fromHub helper
	// webgpu-hybrid: FP32 encoder on WebGPU + INT8 decoder on WASM (optimal)
	// wasm: Both INT8 on WASM (CPU only)
	const quantization = backend === 'wasm'
	? { encoderQuant: 'int8', decoderQuant: 'int8', preprocessor: 'nemo128' } // WASM: both INT8
	: { encoderQuant: 'fp32', decoderQuant: 'int8', preprocessor: 'nemo128' }; // WebGPU-hybrid: FP32 encoder + INT8 decoder

	console.log('[Worker] Calling fromHub...');

	// Track which files we've already sent 'initiate' for
	const initiatedFiles = new Set();

	model = await fromHub(modelVersion, {
	backend,
	...quantization,
	progress: (progressData) => {
	const { loaded, total, file } = progressData;
	const progress = total > 0 ? Math.round((loaded / total) * 100) : 0;

	// Send 'initiate' message for new files
	if (!initiatedFiles.has(file)) {
	initiatedFiles.add(file);
	self.postMessage({
	status: 'initiate',
	file,
	progress: 0,
	total,
	});
	}

	// Send progress update
	self.postMessage({
	status: 'progress',
	file,
	progress,
	total,
	loaded,
	});

	// Send 'done' when complete
	if (loaded >= total) {
	self.postMessage({
	status: 'done',
	file,
	});
	}
	},
	});
	console.log('[Worker] fromHub completed successfully');

	self.postMessage({
	status: 'loading',
	message: 'Model loaded, warming up...',
	});

	// Warm-up inference (recommended by parakeet.js)
	const dummyAudio = new Float32Array(16000); // 1 second of silence
	await model.transcribe(dummyAudio, 16000);

	self.postMessage({
	status: 'ready',
	message: `Parakeet ${modelVersion} ready!`,
	device: backend,
	modelVersion,
	});

	return { status: 'ready', device: backend };
	} catch (error) {
	console.error('Failed to load model:', error);

	self.postMessage({
	status: 'error',
	message: `Failed to load model: ${error.message}`,
	error: error.toString(),
	});

	return { status: 'error', error: error.toString() };
	} finally {
	isLoading = false;
	}
	}

	/**
	* Transcribe audio chunk using Parakeet
	*/
	async function transcribe(audio, language = null) {
	if (!model) {
	throw new Error('Model not loaded. Call load() first.');
	}

	try {
	const startTime = performance.now();


	// Transcribe with parakeet.js
	const result = await model.transcribe(audio, 16000, {
	returnTimestamps: true, // Get word-level timestamps
	returnConfidences: true, // Get confidence scores
	temperature: 1.0, // Greedy decoding
	});

	const endTime = performance.now();
	const latency = (endTime - startTime) / 1000; // seconds
	const audioDuration = audio.length / 16000;
	const rtf = audioDuration / latency; // Speed factor (inverse of traditional RTF)

	// Convert parakeet.js word format to our sentence format
	const sentences = groupWordsIntoSentences(result.words \|\| []);

	return {
	text: result.utterance_text \|\| '',
	sentences,
	words: result.words \|\| [],
	chunks: result.words \|\| [], // For compatibility
	metadata: {
	latency,
	audioDuration,
	rtf,
	language,
	confidence: result.confidence_scores,
	metrics: result.metrics,
	},
	};
	} catch (error) {
	console.error('Transcription error:', error);
	throw error;
	}
	}

	/**
	* Group words into sentences based on punctuation
	*
	* Note: This is a simplified implementation since parakeet.js provides word-level
	* alignments but not sentence-level. The Python implementation uses model-provided
	* sentence boundaries. We split on sentence-ending punctuation (.!?) to approximate
	* sentence boundaries for the progressive streaming window management.
	*/
	function groupWordsIntoSentences(words) {
	if (!words \|\| words.length === 0) {
	return [];
	}

	const sentences = [];
	let currentWords = [];
	let currentStart = words[0].start_time \|\| 0;

	for (let i = 0; i < words.length; i++) {
	const word = words[i];
	currentWords.push(word.text);

	// Check if this word ends a sentence (only period, question mark, exclamation)
	// Note: We explicitly ignore commas - they don't end sentences
	const endsWithTerminalPunctuation = /[.!?]$/.test(word.text);

	if (endsWithTerminalPunctuation \|\| i === words.length - 1) {
	// Create sentence
	sentences.push({
	text: currentWords.join(' ').trim(),
	start: currentStart,
	end: word.end_time \|\| (word.start_time \|\| 0),
	});

	// Start new sentence if there are more words
	if (i < words.length - 1) {
	currentWords = [];
	currentStart = words[i + 1].start_time \|\| (word.end_time \|\| 0);
	}
	}
	}

	return sentences;
	}

	/**
	* Message handler
	*/
	self.onmessage = async (event) => {
	const { type, data } = event.data;

	try {
	switch (type) {
	case 'load':
	await loadModel(data?.modelVersion, data?.options \|\| {});
	break;

	case 'transcribe':
	const result = await transcribe(data.audio, data.language);
	self.postMessage({
	status: 'transcription',
	result,
	});
	break;

	case 'ping':
	self.postMessage({ status: 'pong' });
	break;

	default:
	self.postMessage({
	status: 'error',
	message: `Unknown message type: ${type}`,
	});
	}
	} catch (error) {
	self.postMessage({
	status: 'error',
	message: error.message,
	error: error.toString(),
	});
	}
	};