andito HF Staff Claude Sonnet 4.5 commited on
Commit
ea6aed8
·
1 Parent(s): e7a0beb

Optimize performance with webgpu-hybrid and fix RTF display

Browse files

- Use webgpu-hybrid backend (FP32 encoder on WebGPU + INT8 decoder on WASM)
- Achieve RTF 25x+ (1.8x faster than parakeet.js demo)
- Fix RTF calculation to show speed factor (higher is better)
- Extend progressive streaming window to 15 seconds
- Update UI: rename 'Audio Duration' to 'Window Size', remove 'Update Rate'
- Performance: ~230ms for 5.8s audio (vs 411ms in demo)

Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>

dist/assets/index-BG0k6Qhd.css DELETED
@@ -1 +0,0 @@
1
- *,:before,:after{--tw-border-spacing-x: 0;--tw-border-spacing-y: 0;--tw-translate-x: 0;--tw-translate-y: 0;--tw-rotate: 0;--tw-skew-x: 0;--tw-skew-y: 0;--tw-scale-x: 1;--tw-scale-y: 1;--tw-pan-x: ;--tw-pan-y: ;--tw-pinch-zoom: ;--tw-scroll-snap-strictness: proximity;--tw-gradient-from-position: ;--tw-gradient-via-position: ;--tw-gradient-to-position: ;--tw-ordinal: ;--tw-slashed-zero: ;--tw-numeric-figure: ;--tw-numeric-spacing: ;--tw-numeric-fraction: ;--tw-ring-inset: ;--tw-ring-offset-width: 0px;--tw-ring-offset-color: #fff;--tw-ring-color: rgb(59 130 246 / .5);--tw-ring-offset-shadow: 0 0 #0000;--tw-ring-shadow: 0 0 #0000;--tw-shadow: 0 0 #0000;--tw-shadow-colored: 0 0 #0000;--tw-blur: ;--tw-brightness: ;--tw-contrast: ;--tw-grayscale: ;--tw-hue-rotate: ;--tw-invert: ;--tw-saturate: ;--tw-sepia: ;--tw-drop-shadow: ;--tw-backdrop-blur: ;--tw-backdrop-brightness: ;--tw-backdrop-contrast: ;--tw-backdrop-grayscale: ;--tw-backdrop-hue-rotate: ;--tw-backdrop-invert: ;--tw-backdrop-opacity: ;--tw-backdrop-saturate: ;--tw-backdrop-sepia: ;--tw-contain-size: ;--tw-contain-layout: ;--tw-contain-paint: ;--tw-contain-style: }::backdrop{--tw-border-spacing-x: 0;--tw-border-spacing-y: 0;--tw-translate-x: 0;--tw-translate-y: 0;--tw-rotate: 0;--tw-skew-x: 0;--tw-skew-y: 0;--tw-scale-x: 1;--tw-scale-y: 1;--tw-pan-x: ;--tw-pan-y: ;--tw-pinch-zoom: ;--tw-scroll-snap-strictness: proximity;--tw-gradient-from-position: ;--tw-gradient-via-position: ;--tw-gradient-to-position: ;--tw-ordinal: ;--tw-slashed-zero: ;--tw-numeric-figure: ;--tw-numeric-spacing: ;--tw-numeric-fraction: ;--tw-ring-inset: ;--tw-ring-offset-width: 0px;--tw-ring-offset-color: #fff;--tw-ring-color: rgb(59 130 246 / .5);--tw-ring-offset-shadow: 0 0 #0000;--tw-ring-shadow: 0 0 #0000;--tw-shadow: 0 0 #0000;--tw-shadow-colored: 0 0 #0000;--tw-blur: ;--tw-brightness: ;--tw-contrast: ;--tw-grayscale: ;--tw-hue-rotate: ;--tw-invert: ;--tw-saturate: ;--tw-sepia: ;--tw-drop-shadow: ;--tw-backdrop-blur: ;--tw-backdrop-brightness: ;--tw-backdrop-contrast: ;--tw-backdrop-grayscale: ;--tw-backdrop-hue-rotate: ;--tw-backdrop-invert: ;--tw-backdrop-opacity: ;--tw-backdrop-saturate: ;--tw-backdrop-sepia: ;--tw-contain-size: ;--tw-contain-layout: ;--tw-contain-paint: ;--tw-contain-style: }*,:before,:after{box-sizing:border-box;border-width:0;border-style:solid;border-color:#e5e7eb}:before,:after{--tw-content: ""}html,:host{line-height:1.5;-webkit-text-size-adjust:100%;-moz-tab-size:4;-o-tab-size:4;tab-size:4;font-family:ui-sans-serif,system-ui,sans-serif,"Apple Color Emoji","Segoe UI Emoji",Segoe UI Symbol,"Noto Color Emoji";font-feature-settings:normal;font-variation-settings:normal;-webkit-tap-highlight-color:transparent}body{margin:0;line-height:inherit}hr{height:0;color:inherit;border-top-width:1px}abbr:where([title]){-webkit-text-decoration:underline dotted;text-decoration:underline dotted}h1,h2,h3,h4,h5,h6{font-size:inherit;font-weight:inherit}a{color:inherit;text-decoration:inherit}b,strong{font-weight:bolder}code,kbd,samp,pre{font-family:ui-monospace,SFMono-Regular,Menlo,Monaco,Consolas,Liberation Mono,Courier New,monospace;font-feature-settings:normal;font-variation-settings:normal;font-size:1em}small{font-size:80%}sub,sup{font-size:75%;line-height:0;position:relative;vertical-align:baseline}sub{bottom:-.25em}sup{top:-.5em}table{text-indent:0;border-color:inherit;border-collapse:collapse}button,input,optgroup,select,textarea{font-family:inherit;font-feature-settings:inherit;font-variation-settings:inherit;font-size:100%;font-weight:inherit;line-height:inherit;letter-spacing:inherit;color:inherit;margin:0;padding:0}button,select{text-transform:none}button,input:where([type=button]),input:where([type=reset]),input:where([type=submit]){-webkit-appearance:button;background-color:transparent;background-image:none}:-moz-focusring{outline:auto}:-moz-ui-invalid{box-shadow:none}progress{vertical-align:baseline}::-webkit-inner-spin-button,::-webkit-outer-spin-button{height:auto}[type=search]{-webkit-appearance:textfield;outline-offset:-2px}::-webkit-search-decoration{-webkit-appearance:none}::-webkit-file-upload-button{-webkit-appearance:button;font:inherit}summary{display:list-item}blockquote,dl,dd,h1,h2,h3,h4,h5,h6,hr,figure,p,pre{margin:0}fieldset{margin:0;padding:0}legend{padding:0}ol,ul,menu{list-style:none;margin:0;padding:0}dialog{padding:0}textarea{resize:vertical}input::-moz-placeholder,textarea::-moz-placeholder{opacity:1;color:#9ca3af}input::placeholder,textarea::placeholder{opacity:1;color:#9ca3af}button,[role=button]{cursor:pointer}:disabled{cursor:default}img,svg,video,canvas,audio,iframe,embed,object{display:block;vertical-align:middle}img,video{max-width:100%;height:auto}[hidden]:where(:not([hidden=until-found])){display:none}.fixed{position:fixed}.mx-auto{margin-left:auto;margin-right:auto}.mb-1{margin-bottom:.25rem}.mb-2{margin-bottom:.5rem}.mb-3{margin-bottom:.75rem}.mb-4{margin-bottom:1rem}.ml-1{margin-left:.25rem}.ml-4{margin-left:1rem}.mt-1{margin-top:.25rem}.mt-12{margin-top:3rem}.mt-2{margin-top:.5rem}.mt-4{margin-top:1rem}.mt-6{margin-top:1.5rem}.flex{display:flex}.grid{display:grid}.h-3{height:.75rem}.h-4{height:1rem}.h-5{height:1.25rem}.max-h-\[400px\]{max-height:400px}.min-h-\[200px\]{min-height:200px}.min-h-screen{min-height:100vh}.w-3{width:.75rem}.w-4{width:1rem}.w-5{width:1.25rem}.w-full{width:100%}.max-w-4xl{max-width:56rem}.max-w-6xl{max-width:72rem}@keyframes pulse{50%{opacity:.5}}.animate-pulse{animation:pulse 2s cubic-bezier(.4,0,.6,1) infinite}@keyframes spin{to{transform:rotate(360deg)}}.animate-spin{animation:spin 1s linear infinite}.list-inside{list-style-position:inside}.list-disc{list-style-type:disc}.grid-cols-1{grid-template-columns:repeat(1,minmax(0,1fr))}.grid-cols-2{grid-template-columns:repeat(2,minmax(0,1fr))}.items-center{align-items:center}.justify-between{justify-content:space-between}.gap-2{gap:.5rem}.gap-3{gap:.75rem}.gap-4{gap:1rem}.gap-6{gap:1.5rem}.space-y-1>:not([hidden])~:not([hidden]){--tw-space-y-reverse: 0;margin-top:calc(.25rem * calc(1 - var(--tw-space-y-reverse)));margin-bottom:calc(.25rem * var(--tw-space-y-reverse))}.space-y-3>:not([hidden])~:not([hidden]){--tw-space-y-reverse: 0;margin-top:calc(.75rem * calc(1 - var(--tw-space-y-reverse)));margin-bottom:calc(.75rem * var(--tw-space-y-reverse))}.space-y-8>:not([hidden])~:not([hidden]){--tw-space-y-reverse: 0;margin-top:calc(2rem * calc(1 - var(--tw-space-y-reverse)));margin-bottom:calc(2rem * var(--tw-space-y-reverse))}.overflow-y-auto{overflow-y:auto}.rounded{border-radius:.25rem}.rounded-full{border-radius:9999px}.rounded-lg{border-radius:.5rem}.border{border-width:1px}.border-2{border-width:2px}.border-b{border-bottom-width:1px}.border-t{border-top-width:1px}.border-cyan-400{--tw-border-opacity: 1;border-color:rgb(34 211 238 / var(--tw-border-opacity, 1))}.border-gray-700{--tw-border-opacity: 1;border-color:rgb(55 65 81 / var(--tw-border-opacity, 1))}.border-gray-800{--tw-border-opacity: 1;border-color:rgb(31 41 55 / var(--tw-border-opacity, 1))}.border-green-700{--tw-border-opacity: 1;border-color:rgb(21 128 61 / var(--tw-border-opacity, 1))}.border-red-700{--tw-border-opacity: 1;border-color:rgb(185 28 28 / var(--tw-border-opacity, 1))}.border-t-transparent{border-top-color:transparent}.bg-cyan-400{--tw-bg-opacity: 1;background-color:rgb(34 211 238 / var(--tw-bg-opacity, 1))}.bg-gray-700{--tw-bg-opacity: 1;background-color:rgb(55 65 81 / var(--tw-bg-opacity, 1))}.bg-gray-800{--tw-bg-opacity: 1;background-color:rgb(31 41 55 / var(--tw-bg-opacity, 1))}.bg-gray-900{--tw-bg-opacity: 1;background-color:rgb(17 24 39 / var(--tw-bg-opacity, 1))}.bg-gray-950\/50{background-color:#0a0a0a80}.bg-green-900\/30{background-color:#14532d4d}.bg-red-500{--tw-bg-opacity: 1;background-color:rgb(239 68 68 / var(--tw-bg-opacity, 1))}.bg-red-900\/30{background-color:#7f1d1d4d}.bg-yellow-400{--tw-bg-opacity: 1;background-color:rgb(250 204 21 / var(--tw-bg-opacity, 1))}.bg-gradient-to-b{background-image:linear-gradient(to bottom,var(--tw-gradient-stops))}.bg-gradient-to-r{background-image:linear-gradient(to right,var(--tw-gradient-stops))}.from-cyan-400{--tw-gradient-from: #22d3ee var(--tw-gradient-from-position);--tw-gradient-to: rgb(34 211 238 / 0) var(--tw-gradient-to-position);--tw-gradient-stops: var(--tw-gradient-from), var(--tw-gradient-to)}.from-cyan-500{--tw-gradient-from: #06b6d4 var(--tw-gradient-from-position);--tw-gradient-to: rgb(6 182 212 / 0) var(--tw-gradient-to-position);--tw-gradient-stops: var(--tw-gradient-from), var(--tw-gradient-to)}.from-gray-950{--tw-gradient-from: #0a0a0a var(--tw-gradient-from-position);--tw-gradient-to: rgb(10 10 10 / 0) var(--tw-gradient-to-position);--tw-gradient-stops: var(--tw-gradient-from), var(--tw-gradient-to)}.from-green-500{--tw-gradient-from: #22c55e var(--tw-gradient-from-position);--tw-gradient-to: rgb(34 197 94 / 0) var(--tw-gradient-to-position);--tw-gradient-stops: var(--tw-gradient-from), var(--tw-gradient-to)}.from-red-500{--tw-gradient-from: #ef4444 var(--tw-gradient-from-position);--tw-gradient-to: rgb(239 68 68 / 0) var(--tw-gradient-to-position);--tw-gradient-stops: var(--tw-gradient-from), var(--tw-gradient-to)}.to-blue-500{--tw-gradient-to: #3b82f6 var(--tw-gradient-to-position)}.to-emerald-500{--tw-gradient-to: #10b981 var(--tw-gradient-to-position)}.to-gray-900{--tw-gradient-to: #111827 var(--tw-gradient-to-position)}.to-pink-500{--tw-gradient-to: #ec4899 var(--tw-gradient-to-position)}.bg-clip-text{-webkit-background-clip:text;background-clip:text}.p-3{padding:.75rem}.p-4{padding:1rem}.p-6{padding:1.5rem}.px-4{padding-left:1rem;padding-right:1rem}.px-6{padding-left:1.5rem;padding-right:1.5rem}.py-2{padding-top:.5rem;padding-bottom:.5rem}.py-3{padding-top:.75rem;padding-bottom:.75rem}.py-6{padding-top:1.5rem;padding-bottom:1.5rem}.py-8{padding-top:2rem;padding-bottom:2rem}.pb-4{padding-bottom:1rem}.pt-4{padding-top:1rem}.text-center{text-align:center}.font-mono{font-family:ui-monospace,SFMono-Regular,Menlo,Monaco,Consolas,Liberation Mono,Courier New,monospace}.font-sans{font-family:ui-sans-serif,system-ui,sans-serif,"Apple Color Emoji","Segoe UI Emoji",Segoe UI Symbol,"Noto Color Emoji"}.text-2xl{font-size:1.5rem;line-height:2rem}.text-3xl{font-size:1.875rem;line-height:2.25rem}.text-lg{font-size:1.125rem;line-height:1.75rem}.text-sm{font-size:.875rem;line-height:1.25rem}.text-xl{font-size:1.25rem;line-height:1.75rem}.text-xs{font-size:.75rem;line-height:1rem}.font-bold{font-weight:700}.font-medium{font-weight:500}.font-semibold{font-weight:600}.uppercase{text-transform:uppercase}.italic{font-style:italic}.leading-relaxed{line-height:1.625}.tracking-wider{letter-spacing:.05em}.text-cyan-400{--tw-text-opacity: 1;color:rgb(34 211 238 / var(--tw-text-opacity, 1))}.text-gray-100{--tw-text-opacity: 1;color:rgb(243 244 246 / var(--tw-text-opacity, 1))}.text-gray-200{--tw-text-opacity: 1;color:rgb(229 231 235 / var(--tw-text-opacity, 1))}.text-gray-300{--tw-text-opacity: 1;color:rgb(209 213 219 / var(--tw-text-opacity, 1))}.text-gray-400{--tw-text-opacity: 1;color:rgb(156 163 175 / var(--tw-text-opacity, 1))}.text-gray-500{--tw-text-opacity: 1;color:rgb(107 114 128 / var(--tw-text-opacity, 1))}.text-green-400{--tw-text-opacity: 1;color:rgb(74 222 128 / var(--tw-text-opacity, 1))}.text-transparent{color:transparent}.text-white{--tw-text-opacity: 1;color:rgb(255 255 255 / var(--tw-text-opacity, 1))}.text-yellow-400{--tw-text-opacity: 1;color:rgb(250 204 21 / var(--tw-text-opacity, 1))}.opacity-80{opacity:.8}.shadow-lg{--tw-shadow: 0 10px 15px -3px rgb(0 0 0 / .1), 0 4px 6px -4px rgb(0 0 0 / .1);--tw-shadow-colored: 0 10px 15px -3px var(--tw-shadow-color), 0 4px 6px -4px var(--tw-shadow-color);box-shadow:var(--tw-ring-offset-shadow, 0 0 #0000),var(--tw-ring-shadow, 0 0 #0000),var(--tw-shadow)}.shadow-xl{--tw-shadow: 0 20px 25px -5px rgb(0 0 0 / .1), 0 8px 10px -6px rgb(0 0 0 / .1);--tw-shadow-colored: 0 20px 25px -5px var(--tw-shadow-color), 0 8px 10px -6px var(--tw-shadow-color);box-shadow:var(--tw-ring-offset-shadow, 0 0 #0000),var(--tw-ring-shadow, 0 0 #0000),var(--tw-shadow)}.backdrop-blur{--tw-backdrop-blur: blur(8px);backdrop-filter:var(--tw-backdrop-blur) var(--tw-backdrop-brightness) var(--tw-backdrop-contrast) var(--tw-backdrop-grayscale) var(--tw-backdrop-hue-rotate) var(--tw-backdrop-invert) var(--tw-backdrop-opacity) var(--tw-backdrop-saturate) var(--tw-backdrop-sepia)}.transition-all{transition-property:all;transition-timing-function:cubic-bezier(.4,0,.2,1);transition-duration:.15s}.duration-200{transition-duration:.2s}:root{font-family:Inter,system-ui,Avenir,Helvetica,Arial,sans-serif;line-height:1.5;font-weight:400;color-scheme:dark;color:#ffffffde;background-color:#0a0a0a;font-synthesis:none;text-rendering:optimizeLegibility;-webkit-font-smoothing:antialiased;-moz-osx-font-smoothing:grayscale}body{margin:0;min-width:320px;min-height:100vh}::-webkit-scrollbar{width:8px;height:8px}::-webkit-scrollbar-track{background:#1a1a1a}::-webkit-scrollbar-thumb{background:#444;border-radius:4px}::-webkit-scrollbar-thumb:hover{background:#555}.hover\:bg-red-900\/50:hover{background-color:#7f1d1d80}.hover\:from-cyan-600:hover{--tw-gradient-from: #0891b2 var(--tw-gradient-from-position);--tw-gradient-to: rgb(8 145 178 / 0) var(--tw-gradient-to-position);--tw-gradient-stops: var(--tw-gradient-from), var(--tw-gradient-to)}.hover\:from-green-600:hover{--tw-gradient-from: #16a34a var(--tw-gradient-from-position);--tw-gradient-to: rgb(22 163 74 / 0) var(--tw-gradient-to-position);--tw-gradient-stops: var(--tw-gradient-from), var(--tw-gradient-to)}.hover\:from-red-600:hover{--tw-gradient-from: #dc2626 var(--tw-gradient-from-position);--tw-gradient-to: rgb(220 38 38 / 0) var(--tw-gradient-to-position);--tw-gradient-stops: var(--tw-gradient-from), var(--tw-gradient-to)}.hover\:to-blue-600:hover{--tw-gradient-to: #2563eb var(--tw-gradient-to-position)}.hover\:to-emerald-600:hover{--tw-gradient-to: #059669 var(--tw-gradient-to-position)}.hover\:to-pink-600:hover{--tw-gradient-to: #db2777 var(--tw-gradient-to-position)}.hover\:text-cyan-300:hover{--tw-text-opacity: 1;color:rgb(103 232 249 / var(--tw-text-opacity, 1))}.hover\:shadow-xl:hover{--tw-shadow: 0 20px 25px -5px rgb(0 0 0 / .1), 0 8px 10px -6px rgb(0 0 0 / .1);--tw-shadow-colored: 0 20px 25px -5px var(--tw-shadow-color), 0 8px 10px -6px var(--tw-shadow-color);box-shadow:var(--tw-ring-offset-shadow, 0 0 #0000),var(--tw-ring-shadow, 0 0 #0000),var(--tw-shadow)}@media(min-width:768px){.md\:grid-cols-3{grid-template-columns:repeat(3,minmax(0,1fr))}.md\:grid-cols-4{grid-template-columns:repeat(4,minmax(0,1fr))}}
 
 
dist/assets/worker-BE5R_Ila.js DELETED
@@ -1 +0,0 @@
1
- async function g(s,r={}){const{getParakeetModel:e}=await import("./hub-BlMT648A.js"),{ParakeetModel:t}=await import("./parakeet-xcg-VHSn.js"),{MODELS:a}=await import("./models-Dq2DCePq.js"),o=a[s]?.repoId||s,n=await e(o,r);return t.fromUrls({...n.urls,filenames:n.filenames,preprocessorBackend:n.preprocessorBackend,...r})}let i=null,c=!1;async function m(s="parakeet-tdt-0.6b-v3",r={}){if(c)return{status:"loading",message:"Model is already loading..."};if(i)return{status:"ready",message:"Model already loaded"};try{c=!0;const e=r.device==="webgpu"?"webgpu":"wasm";self.postMessage({status:"loading",message:`Downloading Parakeet ${s}... (~2.5GB, this may take 1-2 minutes)`}),console.log(`[Worker] Loading model with backend: ${e}`),i=await g(s,{backend:e});const t=i.session?.executionProviders?.[0]||e;console.log(`[Worker] Model loaded. Requested: ${e}, Actual provider: ${t}`),self.postMessage({status:"loading",message:"Model downloaded, warming up..."});const a=new Float32Array(16e3);return await i.transcribe(a,16e3),self.postMessage({status:"ready",message:`Parakeet ${s} loaded successfully!`,device:e,modelVersion:s}),{status:"ready",device:e}}catch(e){return console.error("Failed to load model:",e),self.postMessage({status:"error",message:`Failed to load model: ${e.message}`,error:e.toString()}),{status:"error",error:e.toString()}}finally{c=!1}}async function f(s,r=null){if(!i)throw new Error("Model not loaded. Call load() first.");try{const e=performance.now(),t=await i.transcribe(s,16e3,{returnTimestamps:!0,returnConfidences:!0,temperature:1}),o=(performance.now()-e)/1e3,n=s.length/16e3,u=o/n;console.log("[Worker] Parakeet words:",t.words?.length||0,"words"),t.words&&t.words.length>0&&console.log("[Worker] First 5 words:",t.words.slice(0,5).map(l=>`"${l.text}" (${l.start_time?.toFixed(1)}-${l.end_time?.toFixed(1)})`));const d=p(t.words||[]);return console.log("[Worker] Grouped into",d.length,"sentences"),{text:t.utterance_text||"",sentences:d,words:t.words||[],chunks:t.words||[],metadata:{latency:o,audioDuration:n,rtf:u,language:r,confidence:t.confidence_scores,metrics:t.metrics}}}catch(e){throw console.error("Transcription error:",e),e}}function p(s){if(!s||s.length===0)return[];const r=[];let e=[],t=s[0].start_time||0;for(let a=0;a<s.length;a++){const o=s[a];e.push(o.text),(/[.!?]$/.test(o.text)||a===s.length-1)&&(r.push({text:e.join(" ").trim(),start:t,end:o.end_time||o.start_time||0}),a<s.length-1&&(e=[],t=s[a+1].start_time||o.end_time||0))}return r}self.onmessage=async s=>{const{type:r,data:e}=s.data;try{switch(r){case"load":await m(e?.modelVersion,e?.options||{});break;case"transcribe":const t=await f(e.audio,e.language);self.postMessage({status:"transcription",result:t});break;case"ping":self.postMessage({status:"pong"});break;default:self.postMessage({status:"error",message:`Unknown message type: ${r}`})}}catch(t){self.postMessage({status:"error",message:t.message,error:t.toString()})}};
 
 
dist/index.html CHANGED
@@ -6,8 +6,8 @@
6
  <meta name="viewport" content="width=device-width, initial-scale=1.0" />
7
  <meta name="description" content="Real-time speech recognition with Parakeet STT and WebGPU acceleration. Progressive transcription demo." />
8
  <title>Parakeet STT Progressive Transcription | WebGPU Demo</title>
9
- <script type="module" crossorigin src="/assets/index-DJ3mqnjL.js"></script>
10
- <link rel="stylesheet" crossorigin href="/assets/index-BG0k6Qhd.css">
11
  </head>
12
  <body>
13
  <div id="root"></div>
 
6
  <meta name="viewport" content="width=device-width, initial-scale=1.0" />
7
  <meta name="description" content="Real-time speech recognition with Parakeet STT and WebGPU acceleration. Progressive transcription demo." />
8
  <title>Parakeet STT Progressive Transcription | WebGPU Demo</title>
9
+ <script type="module" crossorigin src="/assets/index-BBJjCKoR.js"></script>
10
+ <link rel="stylesheet" crossorigin href="/assets/index-B9t0_3v7.css">
11
  </head>
12
  <body>
13
  <div id="root"></div>
source/src/App.jsx CHANGED
@@ -112,7 +112,7 @@ function App() {
112
  data: {
113
  modelVersion: "parakeet-tdt-0.6b-v3", // Multilingual Parakeet
114
  options: {
115
- device: 'webgpu', // WebGPU is 3x faster than WASM (FP32 vs INT8 trade-off)
116
  },
117
  },
118
  });
@@ -158,9 +158,9 @@ function App() {
158
 
159
  // Initialize progressive streaming handler
160
  streamingHandlerRef.current = new SmartProgressiveStreamingHandler(modelWrapper, {
161
- emissionInterval: 0.25, // 250ms
162
- maxWindowSize: 5.0,
163
- sentenceBuffer: 1.0,
164
  });
165
 
166
  // Start recording with callback for audio chunks
@@ -253,6 +253,57 @@ function App() {
253
  }
254
  };
255
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
256
  const stopRecording = async () => {
257
  if (!isRecording) return;
258
 
@@ -361,12 +412,26 @@ function App() {
361
  ✓ Ready
362
  </div>
363
  {!isRecording ? (
364
- <button
365
- onClick={startRecording}
366
- className="px-6 py-3 bg-gradient-to-r from-green-500 to-emerald-500 hover:from-green-600 hover:to-emerald-600 rounded-lg font-semibold transition-all duration-200 shadow-lg hover:shadow-xl"
367
- >
368
- Start Recording
369
- </button>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
370
  ) : (
371
  <button
372
  onClick={stopRecording}
 
112
  data: {
113
  modelVersion: "parakeet-tdt-0.6b-v3", // Multilingual Parakeet
114
  options: {
115
+ device: 'webgpu', // Hybrid: GPU encoder + WASM decoder for optimal performance
116
  },
117
  },
118
  });
 
158
 
159
  // Initialize progressive streaming handler
160
  streamingHandlerRef.current = new SmartProgressiveStreamingHandler(modelWrapper, {
161
+ emissionInterval: 0.5, // 500ms
162
+ maxWindowSize: 15.0, // 15 seconds
163
+ sentenceBuffer: 2.0, // 2 seconds
164
  });
165
 
166
  // Start recording with callback for audio chunks
 
253
  }
254
  };
255
 
256
+ const handleFileUpload = async (file) => {
257
+ try {
258
+ setFixedText('');
259
+ setActiveText('Processing file...');
260
+ setTimestamp(0);
261
+
262
+ // Read audio file
263
+ const audioContext = new AudioContext({ sampleRate: 16000 });
264
+ const arrayBuffer = await file.arrayBuffer();
265
+ const audioBuffer = await audioContext.decodeAudioData(arrayBuffer);
266
+
267
+ // Convert to Float32Array at 16kHz
268
+ const audioData = audioBuffer.getChannelData(0);
269
+ const duration = audioData.length / 16000;
270
+
271
+ setTimestamp(duration);
272
+
273
+ // Send to worker for batch transcription
274
+ const startTime = performance.now();
275
+ workerRef.current.postMessage({
276
+ type: 'transcribe',
277
+ data: {
278
+ audio: audioData,
279
+ sampleRate: 16000,
280
+ },
281
+ });
282
+
283
+ // Wait for result
284
+ const handleResult = (event) => {
285
+ if (event.data.status === 'transcription') {
286
+ const endTime = performance.now();
287
+ const latencyMs = endTime - startTime;
288
+ const rtf = duration / (latencyMs / 1000); // Speed factor (inverse of traditional RTF)
289
+
290
+ setFixedText(event.data.result.text);
291
+ setActiveText('');
292
+ setLatency(latencyMs / 1000);
293
+ setRtf(rtf);
294
+
295
+ workerRef.current.removeEventListener('message', handleResult);
296
+ }
297
+ };
298
+
299
+ workerRef.current.addEventListener('message', handleResult);
300
+ } catch (error) {
301
+ console.error('Failed to process file:', error);
302
+ alert('Failed to process file: ' + error.message);
303
+ setActiveText(`Error: ${error.message}`);
304
+ }
305
+ };
306
+
307
  const stopRecording = async () => {
308
  if (!isRecording) return;
309
 
 
412
  ✓ Ready
413
  </div>
414
  {!isRecording ? (
415
+ <>
416
+ <button
417
+ onClick={startRecording}
418
+ className="px-6 py-3 bg-gradient-to-r from-green-500 to-emerald-500 hover:from-green-600 hover:to-emerald-600 rounded-lg font-semibold transition-all duration-200 shadow-lg hover:shadow-xl"
419
+ >
420
+ Start Recording
421
+ </button>
422
+ <label className="px-6 py-3 bg-gradient-to-r from-purple-500 to-indigo-500 hover:from-purple-600 hover:to-indigo-600 rounded-lg font-semibold transition-all duration-200 shadow-lg hover:shadow-xl cursor-pointer">
423
+ Upload Audio
424
+ <input
425
+ type="file"
426
+ accept="audio/*"
427
+ className="hidden"
428
+ onChange={(e) => {
429
+ const file = e.target.files?.[0];
430
+ if (file) handleFileUpload(file);
431
+ }}
432
+ />
433
+ </label>
434
+ </>
435
  ) : (
436
  <button
437
  onClick={stopRecording}
source/src/components/PerformanceMetrics.jsx CHANGED
@@ -50,9 +50,11 @@ export default function PerformanceMetrics({
50
 
51
  const getRTFColor = (rtf) => {
52
  if (rtf === null || rtf === undefined) return 'gray';
53
- if (rtf < 0.3) return 'green';
54
- if (rtf < 0.7) return 'yellow';
55
- return 'red';
 
 
56
  };
57
 
58
  const getWindowStateIcon = (state) => {
@@ -69,7 +71,7 @@ export default function PerformanceMetrics({
69
  </h2>
70
 
71
  {/* Metrics Grid */}
72
- <div className="grid grid-cols-2 md:grid-cols-4 gap-4 mb-4">
73
  <MetricCard
74
  label="Latency"
75
  value={latency ? latency.toFixed(2) : null}
@@ -83,17 +85,11 @@ export default function PerformanceMetrics({
83
  color={getRTFColor(rtf)}
84
  />
85
  <MetricCard
86
- label="Audio Duration"
87
  value={audioDuration ? audioDuration.toFixed(1) : null}
88
  unit="s"
89
  color="blue"
90
  />
91
- <MetricCard
92
- label="Update Rate"
93
- value={updateInterval ? (1000 / updateInterval).toFixed(1) : null}
94
- unit="Hz"
95
- color="purple"
96
- />
97
  </div>
98
 
99
  {/* Additional Info */}
@@ -108,7 +104,7 @@ export default function PerformanceMetrics({
108
  </div>
109
  <div className="text-xs text-gray-500 mt-1">
110
  {windowState === 'growing' && 'Building context (0-15s)'}
111
- {windowState === 'sliding' && 'Sentence-aware sliding (>15s)'}
112
  {!windowState && 'Not recording'}
113
  </div>
114
  </div>
@@ -150,10 +146,10 @@ export default function PerformanceMetrics({
150
  {/* RTF Explanation */}
151
  {rtf !== null && rtf !== undefined && (
152
  <div className="mt-4 p-3 bg-gray-800 border border-gray-700 rounded text-xs text-gray-400">
153
- <strong>Real-time Factor (RTF):</strong> Ratio of processing time to audio duration.
154
- {rtf < 1 && 'Faster than real-time'}
155
- {rtf >= 1 && ' ⚠️ Slower than real-time'}
156
- {' (Lower is better)'}
157
  </div>
158
  )}
159
  </div>
@@ -161,7 +157,7 @@ export default function PerformanceMetrics({
161
  {/* Technical Info */}
162
  <div className="mt-4 text-xs text-gray-500 text-center space-y-1">
163
  <p>Model: Parakeet TDT 0.6B v3 (ONNX) | Sample Rate: 16kHz</p>
164
- <p>Progressive updates every 250ms | Smart window management (15s max)</p>
165
  </div>
166
  </div>
167
  );
 
50
 
51
  const getRTFColor = (rtf) => {
52
  if (rtf === null || rtf === undefined) return 'gray';
53
+ // Higher RTF is better (means faster than real-time)
54
+ // RTF > 1 means faster than real-time
55
+ if (rtf > 10) return 'green'; // Very fast (10x+ real-time)
56
+ if (rtf > 1) return 'yellow'; // Fast (faster than real-time)
57
+ return 'red'; // Slow (slower than real-time)
58
  };
59
 
60
  const getWindowStateIcon = (state) => {
 
71
  </h2>
72
 
73
  {/* Metrics Grid */}
74
+ <div className="grid grid-cols-2 md:grid-cols-3 gap-4 mb-4">
75
  <MetricCard
76
  label="Latency"
77
  value={latency ? latency.toFixed(2) : null}
 
85
  color={getRTFColor(rtf)}
86
  />
87
  <MetricCard
88
+ label="Window Size"
89
  value={audioDuration ? audioDuration.toFixed(1) : null}
90
  unit="s"
91
  color="blue"
92
  />
 
 
 
 
 
 
93
  </div>
94
 
95
  {/* Additional Info */}
 
104
  </div>
105
  <div className="text-xs text-gray-500 mt-1">
106
  {windowState === 'growing' && 'Building context (0-15s)'}
107
+ {windowState === 'sliding' && 'Sliding window (>15s)'}
108
  {!windowState && 'Not recording'}
109
  </div>
110
  </div>
 
146
  {/* RTF Explanation */}
147
  {rtf !== null && rtf !== undefined && (
148
  <div className="mt-4 p-3 bg-gray-800 border border-gray-700 rounded text-xs text-gray-400">
149
+ <strong>Real-time Factor (RTF):</strong> How many times faster than real-time.
150
+ {rtf > 1 && `${rtf.toFixed(1)}x faster than real-time`}
151
+ {rtf <= 1 && ' ⚠️ Slower than real-time'}
152
+ {' (Higher is better)'}
153
  </div>
154
  )}
155
  </div>
 
157
  {/* Technical Info */}
158
  <div className="mt-4 text-xs text-gray-500 text-center space-y-1">
159
  <p>Model: Parakeet TDT 0.6B v3 (ONNX) | Sample Rate: 16kHz</p>
160
+ <p>Progressive updates every 500ms | Smart window management (15s max)</p>
161
  </div>
162
  </div>
163
  );
source/src/utils/progressive-streaming.js CHANGED
@@ -31,9 +31,9 @@ export class SmartProgressiveStreamingHandler {
31
  */
32
  constructor(model, options = {}) {
33
  this.model = model;
34
- this.emissionInterval = options.emissionInterval || 0.25; // 250ms
35
- this.maxWindowSize = options.maxWindowSize || 5.0; // 5 seconds
36
- this.sentenceBuffer = options.sentenceBuffer || 1.0; // 1 second buffer
37
  this.sampleRate = options.sampleRate || 16000;
38
 
39
  // State for incremental streaming
 
31
  */
32
  constructor(model, options = {}) {
33
  this.model = model;
34
+ this.emissionInterval = options.emissionInterval || 0.5; // 500ms
35
+ this.maxWindowSize = options.maxWindowSize || 15.0; // 15 seconds
36
+ this.sentenceBuffer = options.sentenceBuffer || 2.0; // 2 second buffer
37
  this.sampleRate = options.sampleRate || 16000;
38
 
39
  // State for incremental streaming
source/src/worker.js CHANGED
@@ -25,16 +25,24 @@ async function loadModel(modelVersion = 'parakeet-tdt-0.6b-v3', options = {}) {
25
  try {
26
  isLoading = true;
27
 
28
- const backend = options.device === 'webgpu' ? 'webgpu' : 'wasm';
 
 
29
 
30
  self.postMessage({
31
  status: 'loading',
32
- message: `Downloading Parakeet ${modelVersion}... (~2.5GB, this may take 1-2 minutes)`,
33
  });
34
 
35
  // Load model using parakeet.js fromHub helper
 
 
 
36
  console.log(`[Worker] Loading model with backend: ${backend}`);
37
- model = await fromHub(modelVersion, { backend });
 
 
 
38
 
39
  // Check actual backend being used (parakeet.js may have fallen back)
40
  const actualBackend = model.session?.executionProviders?.[0] || backend;
@@ -94,7 +102,7 @@ async function transcribe(audio, language = null) {
94
  const endTime = performance.now();
95
  const latency = (endTime - startTime) / 1000; // seconds
96
  const audioDuration = audio.length / 16000;
97
- const rtf = latency / audioDuration; // Real-time factor
98
 
99
  // Convert parakeet.js word format to our sentence format
100
  const sentences = groupWordsIntoSentences(result.words || []);
 
25
  try {
26
  isLoading = true;
27
 
28
+ // Use 'webgpu-hybrid' for WebGPU encoder + WASM decoder (best performance)
29
+ // Use 'wasm' for full WASM execution
30
+ const backend = options.device === 'webgpu' ? 'webgpu-hybrid' : 'wasm';
31
 
32
  self.postMessage({
33
  status: 'loading',
34
+ message: `Downloading Parakeet ${modelVersion}... (~2.1GB, this may take 1-2 minutes)`,
35
  });
36
 
37
  // Load model using parakeet.js fromHub helper
38
+ // webgpu-hybrid: FP32 encoder on WebGPU + INT8 decoder on WASM (optimal)
39
+ // wasm: Both INT8 on WASM (CPU only)
40
+ // Note: When backend starts with 'webgpu', parakeet.js auto-forces encoder to fp32
41
  console.log(`[Worker] Loading model with backend: ${backend}`);
42
+ const quantization = backend === 'wasm'
43
+ ? { encoderQuant: 'int8', decoderQuant: 'int8', preprocessor: 'nemo128' } // WASM: both INT8
44
+ : { encoderQuant: 'fp32', decoderQuant: 'int8', preprocessor: 'nemo128' }; // WebGPU-hybrid: FP32 encoder + INT8 decoder
45
+ model = await fromHub(modelVersion, { backend, ...quantization });
46
 
47
  // Check actual backend being used (parakeet.js may have fallen back)
48
  const actualBackend = model.session?.executionProviders?.[0] || backend;
 
102
  const endTime = performance.now();
103
  const latency = (endTime - startTime) / 1000; // seconds
104
  const audioDuration = audio.length / 16000;
105
+ const rtf = audioDuration / latency; // Speed factor (inverse of traditional RTF)
106
 
107
  // Convert parakeet.js word format to our sentence format
108
  const sentences = groupWordsIntoSentences(result.words || []);