| | |
| | |
| | |
| | |
| | |
| | |
| |
|
| | import { fromHub } from 'parakeet.js'; |
| |
|
| | let model = null; |
| | let isLoading = false; |
| |
|
| | |
| | |
| | |
| | async function loadModel(modelVersion = 'parakeet-tdt-0.6b-v3', options = {}) { |
| | if (isLoading) { |
| | return { status: 'loading', message: 'Model is already loading...' }; |
| | } |
| |
|
| | if (model) { |
| | return { status: 'ready', message: 'Model already loaded' }; |
| | } |
| |
|
| | try { |
| | isLoading = true; |
| |
|
| | |
| | |
| | const backend = options.device === 'webgpu' ? 'webgpu-hybrid' : 'wasm'; |
| |
|
| | self.postMessage({ |
| | status: 'loading', |
| | message: `Loading Parakeet ${modelVersion}... (~2.5GB)`, |
| | }); |
| |
|
| | console.log('[Worker] Starting model load with backend:', backend); |
| |
|
| | |
| | |
| | |
| | const quantization = backend === 'wasm' |
| | ? { encoderQuant: 'int8', decoderQuant: 'int8', preprocessor: 'nemo128' } |
| | : { encoderQuant: 'fp32', decoderQuant: 'int8', preprocessor: 'nemo128' }; |
| |
|
| | console.log('[Worker] Calling fromHub...'); |
| |
|
| | |
| | const initiatedFiles = new Set(); |
| |
|
| | model = await fromHub(modelVersion, { |
| | backend, |
| | ...quantization, |
| | progress: (progressData) => { |
| | const { loaded, total, file } = progressData; |
| | const progress = total > 0 ? Math.round((loaded / total) * 100) : 0; |
| |
|
| | |
| | if (!initiatedFiles.has(file)) { |
| | initiatedFiles.add(file); |
| | self.postMessage({ |
| | status: 'initiate', |
| | file, |
| | progress: 0, |
| | total, |
| | }); |
| | } |
| |
|
| | |
| | self.postMessage({ |
| | status: 'progress', |
| | file, |
| | progress, |
| | total, |
| | loaded, |
| | }); |
| |
|
| | |
| | if (loaded >= total) { |
| | self.postMessage({ |
| | status: 'done', |
| | file, |
| | }); |
| | } |
| | }, |
| | }); |
| | console.log('[Worker] fromHub completed successfully'); |
| |
|
| | self.postMessage({ |
| | status: 'loading', |
| | message: 'Model loaded, warming up...', |
| | }); |
| |
|
| | |
| | const dummyAudio = new Float32Array(16000); |
| | await model.transcribe(dummyAudio, 16000); |
| |
|
| | self.postMessage({ |
| | status: 'ready', |
| | message: `Parakeet ${modelVersion} ready!`, |
| | device: backend, |
| | modelVersion, |
| | }); |
| |
|
| | return { status: 'ready', device: backend }; |
| | } catch (error) { |
| | console.error('Failed to load model:', error); |
| |
|
| | self.postMessage({ |
| | status: 'error', |
| | message: `Failed to load model: ${error.message}`, |
| | error: error.toString(), |
| | }); |
| |
|
| | return { status: 'error', error: error.toString() }; |
| | } finally { |
| | isLoading = false; |
| | } |
| | } |
| |
|
| | |
| | |
| | |
| | async function transcribe(audio, language = null) { |
| | if (!model) { |
| | throw new Error('Model not loaded. Call load() first.'); |
| | } |
| |
|
| | try { |
| | const startTime = performance.now(); |
| |
|
| |
|
| | |
| | const result = await model.transcribe(audio, 16000, { |
| | returnTimestamps: true, |
| | returnConfidences: true, |
| | temperature: 1.0, |
| | }); |
| |
|
| | const endTime = performance.now(); |
| | const latency = (endTime - startTime) / 1000; |
| | const audioDuration = audio.length / 16000; |
| | const rtf = audioDuration / latency; |
| |
|
| | |
| | const sentences = groupWordsIntoSentences(result.words || []); |
| |
|
| | return { |
| | text: result.utterance_text || '', |
| | sentences, |
| | words: result.words || [], |
| | chunks: result.words || [], |
| | metadata: { |
| | latency, |
| | audioDuration, |
| | rtf, |
| | language, |
| | confidence: result.confidence_scores, |
| | metrics: result.metrics, |
| | }, |
| | }; |
| | } catch (error) { |
| | console.error('Transcription error:', error); |
| | throw error; |
| | } |
| | } |
| |
|
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | function groupWordsIntoSentences(words) { |
| | if (!words || words.length === 0) { |
| | return []; |
| | } |
| |
|
| | const sentences = []; |
| | let currentWords = []; |
| | let currentStart = words[0].start_time || 0; |
| |
|
| | for (let i = 0; i < words.length; i++) { |
| | const word = words[i]; |
| | currentWords.push(word.text); |
| |
|
| | |
| | |
| | const endsWithTerminalPunctuation = /[.!?]$/.test(word.text); |
| |
|
| | if (endsWithTerminalPunctuation || i === words.length - 1) { |
| | |
| | sentences.push({ |
| | text: currentWords.join(' ').trim(), |
| | start: currentStart, |
| | end: word.end_time || (word.start_time || 0), |
| | }); |
| |
|
| | |
| | if (i < words.length - 1) { |
| | currentWords = []; |
| | currentStart = words[i + 1].start_time || (word.end_time || 0); |
| | } |
| | } |
| | } |
| |
|
| | return sentences; |
| | } |
| |
|
| | |
| | |
| | |
| | self.onmessage = async (event) => { |
| | const { type, data } = event.data; |
| |
|
| | try { |
| | switch (type) { |
| | case 'load': |
| | await loadModel(data?.modelVersion, data?.options || {}); |
| | break; |
| |
|
| | case 'transcribe': |
| | const result = await transcribe(data.audio, data.language); |
| | self.postMessage({ |
| | status: 'transcription', |
| | result, |
| | }); |
| | break; |
| |
|
| | case 'ping': |
| | self.postMessage({ status: 'pong' }); |
| | break; |
| |
|
| | default: |
| | self.postMessage({ |
| | status: 'error', |
| | message: `Unknown message type: ${type}`, |
| | }); |
| | } |
| | } catch (error) { |
| | self.postMessage({ |
| | status: 'error', |
| | message: error.message, |
| | error: error.toString(), |
| | }); |
| | } |
| | }; |
| |
|