/** * Web Worker for Parakeet ONNX Model Inference * * Handles model loading and transcription in a separate thread using parakeet.js * https://github.com/ysdede/parakeet.js */ import { fromHub } from 'parakeet.js'; let model = null; let isLoading = false; /** * Load the Parakeet model using parakeet.js */ async function loadModel(modelVersion = 'parakeet-tdt-0.6b-v3', options = {}) { if (isLoading) { return { status: 'loading', message: 'Model is already loading...' }; } if (model) { return { status: 'ready', message: 'Model already loaded' }; } try { isLoading = true; // Use 'webgpu-hybrid' for WebGPU encoder + WASM decoder (best performance) // Use 'wasm' for full WASM execution const backend = options.device === 'webgpu' ? 'webgpu-hybrid' : 'wasm'; self.postMessage({ status: 'loading', message: `Loading Parakeet ${modelVersion}... (~2.5GB)`, }); console.log('[Worker] Starting model load with backend:', backend); // Load model using parakeet.js fromHub helper // webgpu-hybrid: FP32 encoder on WebGPU + INT8 decoder on WASM (optimal) // wasm: Both INT8 on WASM (CPU only) const quantization = backend === 'wasm' ? { encoderQuant: 'int8', decoderQuant: 'int8', preprocessor: 'nemo128' } // WASM: both INT8 : { encoderQuant: 'fp32', decoderQuant: 'int8', preprocessor: 'nemo128' }; // WebGPU-hybrid: FP32 encoder + INT8 decoder console.log('[Worker] Calling fromHub...'); // Track which files we've already sent 'initiate' for const initiatedFiles = new Set(); model = await fromHub(modelVersion, { backend, ...quantization, progress: (progressData) => { const { loaded, total, file } = progressData; const progress = total > 0 ? Math.round((loaded / total) * 100) : 0; // Send 'initiate' message for new files if (!initiatedFiles.has(file)) { initiatedFiles.add(file); self.postMessage({ status: 'initiate', file, progress: 0, total, }); } // Send progress update self.postMessage({ status: 'progress', file, progress, total, loaded, }); // Send 'done' when complete if (loaded >= total) { self.postMessage({ status: 'done', file, }); } }, }); console.log('[Worker] fromHub completed successfully'); self.postMessage({ status: 'loading', message: 'Model loaded, warming up...', }); // Warm-up inference (recommended by parakeet.js) const dummyAudio = new Float32Array(16000); // 1 second of silence await model.transcribe(dummyAudio, 16000); self.postMessage({ status: 'ready', message: `Parakeet ${modelVersion} ready!`, device: backend, modelVersion, }); return { status: 'ready', device: backend }; } catch (error) { console.error('Failed to load model:', error); self.postMessage({ status: 'error', message: `Failed to load model: ${error.message}`, error: error.toString(), }); return { status: 'error', error: error.toString() }; } finally { isLoading = false; } } /** * Transcribe audio chunk using Parakeet */ async function transcribe(audio, language = null) { if (!model) { throw new Error('Model not loaded. Call load() first.'); } try { const startTime = performance.now(); // Transcribe with parakeet.js const result = await model.transcribe(audio, 16000, { returnTimestamps: true, // Get word-level timestamps returnConfidences: true, // Get confidence scores temperature: 1.0, // Greedy decoding }); const endTime = performance.now(); const latency = (endTime - startTime) / 1000; // seconds const audioDuration = audio.length / 16000; const rtf = audioDuration / latency; // Speed factor (inverse of traditional RTF) // Convert parakeet.js word format to our sentence format const sentences = groupWordsIntoSentences(result.words || []); return { text: result.utterance_text || '', sentences, words: result.words || [], chunks: result.words || [], // For compatibility metadata: { latency, audioDuration, rtf, language, confidence: result.confidence_scores, metrics: result.metrics, }, }; } catch (error) { console.error('Transcription error:', error); throw error; } } /** * Group words into sentences based on punctuation * * Note: This is a simplified implementation since parakeet.js provides word-level * alignments but not sentence-level. The Python implementation uses model-provided * sentence boundaries. We split on sentence-ending punctuation (.!?) to approximate * sentence boundaries for the progressive streaming window management. */ function groupWordsIntoSentences(words) { if (!words || words.length === 0) { return []; } const sentences = []; let currentWords = []; let currentStart = words[0].start_time || 0; for (let i = 0; i < words.length; i++) { const word = words[i]; currentWords.push(word.text); // Check if this word ends a sentence (only period, question mark, exclamation) // Note: We explicitly ignore commas - they don't end sentences const endsWithTerminalPunctuation = /[.!?]$/.test(word.text); if (endsWithTerminalPunctuation || i === words.length - 1) { // Create sentence sentences.push({ text: currentWords.join(' ').trim(), start: currentStart, end: word.end_time || (word.start_time || 0), }); // Start new sentence if there are more words if (i < words.length - 1) { currentWords = []; currentStart = words[i + 1].start_time || (word.end_time || 0); } } } return sentences; } /** * Message handler */ self.onmessage = async (event) => { const { type, data } = event.data; try { switch (type) { case 'load': await loadModel(data?.modelVersion, data?.options || {}); break; case 'transcribe': const result = await transcribe(data.audio, data.language); self.postMessage({ status: 'transcription', result, }); break; case 'ping': self.postMessage({ status: 'pong' }); break; default: self.postMessage({ status: 'error', message: `Unknown message type: ${type}`, }); } } catch (error) { self.postMessage({ status: 'error', message: error.message, error: error.toString(), }); } };