diff --git a/.env.example b/.env.example new file mode 100644 index 0000000000000000000000000000000000000000..05e8cde9acd952be25fdf5b01d8e238b40197c6e --- /dev/null +++ b/.env.example @@ -0,0 +1 @@ +VITE_GEMINI_API_KEY="YOUR_API_KEY_HERE" \ No newline at end of file diff --git a/.gitignore b/.gitignore index 4d29575de80483b005c29bfcac5061cd2f45313e..595d1abb50554fef678e96546162846f98795c11 100644 --- a/.gitignore +++ b/.gitignore @@ -1,23 +1,64 @@ -# See https://help.github.com/articles/ignoring-files/ for more about ignoring files. +# Dependencies +bower_components/ +node_modules/ +package-lock.json -# dependencies -/node_modules -/.pnp -.pnp.js +# Build and Distribution +build/ +dist/ +dist-ssr/ +*.js.map +*.map +*.tsbuildinfo -# testing -/coverage - -# production -/build - -# misc -.DS_Store +# Environment and Local Config +.env .env.local -.env.development.local -.env.test.local -.env.production.local +.env.*.local +*.local +*.pem +# Logs +*.log +lerna-debug.log* +logs/ npm-debug.log* +pnpm-debug.log* +tsd-debug.log yarn-debug.log* yarn-error.log* + +# Editor and OS +.DS_Store +.idea/ +.vscode/ +!.vscode/extensions.json +*.njsproj +*.ntvs* +*.sln +*.suo +*.sw? + +# Testing and Coverage +.nyc_output/ +.pytest_cache/ +coverage/ +lib-cov/ + +# Runtime and Temporary Files +*.pid +*.pid.lock +*.seed +pids/ + +# Large trace files (Chrome performance traces) +metrics/*.json +!metrics/trace_analysis_summary.json + +# Misc +.grunt/ +*.wim +docs/ +legacy/ +public/models/ +performance-trace.json.gz \ No newline at end of file diff --git a/README.md b/README.md index 7a71e936eed8014213c39ffcc3ddd40f38c9f28e..49d4cb2182928e9c63959c137772c999b6ae0624 100644 --- a/README.md +++ b/README.md @@ -1,81 +1,49 @@ --- title: Keet Streaming -emoji: 🐠 +emoji: 🎙️ colorFrom: indigo -colorTo: red +colorTo: blue sdk: static pinned: false +license: mit +short_description: Real-time browser ASR with parakeet.js + WebGPU app_build_command: npm run build -app_file: build/index.html +app_file: dist/index.html +models: +- istupakov/parakeet-tdt-0.6b-v2-onnx +tags: +- asr +- speech-to-text +- parakeet +- parakeet-js +- onnx +- webgpu +custom_headers: + cross-origin-embedder-policy: require-corp + cross-origin-opener-policy: same-origin + cross-origin-resource-policy: cross-origin --- -# Getting Started with Create React App +# Keet Streaming -This project was bootstrapped with [Create React App](https://github.com/facebook/create-react-app). +Keet is a real-time, privacy-first transcription app built with SolidJS + Vite and powered by `parakeet.js`. +Inference runs in the browser (WebGPU/WASM) with no backend transcription service. -## Available Scripts +## Run locally -In the project directory, you can run: +```bash +npm install +npm run dev +``` -### `npm start` +## Hugging Face Spaces -Runs the app in the development mode.\ -Open [http://localhost:3000](http://localhost:3000) to view it in your browser. +This Space uses the **static** SDK and builds directly from source: -The page will reload when you make changes.\ -You may also see any lint errors in the console. +- Build command: `npm run build` +- Output: `dist/index.html` -### `npm test` +## Notes -Launches the test runner in the interactive watch mode.\ -See the section about [running tests](https://facebook.github.io/create-react-app/docs/running-tests) for more information. - -### `npm run build` - -Builds the app for production to the `build` folder.\ -It correctly bundles React in production mode and optimizes the build for the best performance. - -The build is minified and the filenames include the hashes.\ -Your app is ready to be deployed! - -See the section about [deployment](https://facebook.github.io/create-react-app/docs/deployment) for more information. - -### `npm run eject` - -**Note: this is a one-way operation. Once you `eject`, you can't go back!** - -If you aren't satisfied with the build tool and configuration choices, you can `eject` at any time. This command will remove the single build dependency from your project. - -Instead, it will copy all the configuration files and the transitive dependencies (webpack, Babel, ESLint, etc) right into your project so you have full control over them. All of the commands except `eject` will still work, but they will point to the copied scripts so you can tweak them. At this point you're on your own. - -You don't have to ever use `eject`. The curated feature set is suitable for small and middle deployments, and you shouldn't feel obligated to use this feature. However we understand that this tool wouldn't be useful if you couldn't customize it when you are ready for it. - -## Learn More - -You can learn more in the [Create React App documentation](https://facebook.github.io/create-react-app/docs/getting-started). - -To learn React, check out the [React documentation](https://reactjs.org/). - -### Code Splitting - -This section has moved here: [https://facebook.github.io/create-react-app/docs/code-splitting](https://facebook.github.io/create-react-app/docs/code-splitting) - -### Analyzing the Bundle Size - -This section has moved here: [https://facebook.github.io/create-react-app/docs/analyzing-the-bundle-size](https://facebook.github.io/create-react-app/docs/analyzing-the-bundle-size) - -### Making a Progressive Web App - -This section has moved here: [https://facebook.github.io/create-react-app/docs/making-a-progressive-web-app](https://facebook.github.io/create-react-app/docs/making-a-progressive-web-app) - -### Advanced Configuration - -This section has moved here: [https://facebook.github.io/create-react-app/docs/advanced-configuration](https://facebook.github.io/create-react-app/docs/advanced-configuration) - -### Deployment - -This section has moved here: [https://facebook.github.io/create-react-app/docs/deployment](https://facebook.github.io/create-react-app/docs/deployment) - -### `npm run build` fails to minify - -This section has moved here: [https://facebook.github.io/create-react-app/docs/troubleshooting#npm-run-build-fails-to-minify](https://facebook.github.io/create-react-app/docs/troubleshooting#npm-run-build-fails-to-minify) +- The app requires microphone access in the browser. +- COEP/COOP headers are configured in this README front matter for worker/wasm isolation. diff --git a/index.html b/index.html new file mode 100644 index 0000000000000000000000000000000000000000..126c55250877f3c424e6511cc4b545bb859c5e36 --- /dev/null +++ b/index.html @@ -0,0 +1,44 @@ + + + + + + + + + + + + + Keet - Real-time Transcription + + + + + + + + + + + + +
+ + + + + + diff --git a/package.json b/package.json index 9623fe016ea523afac617d6d68ddc568905ee0fb..c44985b1bb7b338a7bd1c6aea3abc32093147d6c 100644 --- a/package.json +++ b/package.json @@ -1,39 +1,43 @@ { - "name": "react-template", - "version": "0.1.0", - "private": true, - "dependencies": { - "@testing-library/dom": "^10.4.0", - "@testing-library/jest-dom": "^6.6.3", - "@testing-library/react": "^16.3.0", - "@testing-library/user-event": "^13.5.0", - "react": "^19.1.0", - "react-dom": "^19.1.0", - "react-scripts": "5.0.1", - "web-vitals": "^2.1.4" - }, + "name": "keet", + "version": "1.0.0", + "description": "Keet real-time transcription with parakeet.js", + "type": "module", "scripts": { - "start": "react-scripts start", - "build": "react-scripts build", - "test": "react-scripts test", - "eject": "react-scripts eject" + "start": "vite", + "dev": "vite", + "dev:local": "cross-env USE_LOCAL_PARAKEET=true vite", + "build": "vite build", + "build:local": "cross-env USE_LOCAL_PARAKEET=true vite build", + "serve": "vite preview", + "test": "vitest run", + "test:watch": "vitest" }, - "eslintConfig": { - "extends": [ - "react-app", - "react-app/jest" - ] + "license": "MIT", + "devDependencies": { + "@tailwindcss/forms": "^0.5.10", + "@tailwindcss/vite": "^4.1.18", + "@vitest/web-worker": "^4.0.18", + "cross-env": "^7.0.3", + "happy-dom": "^20.5.0", + "postcss": "^8.5.6", + "tailwindcss": "^4.1.11", + "typescript": "^5.7.2", + "vite": "^6.0.0", + "vite-plugin-solid": "^2.11.6", + "vitest": "^4.0.18" }, - "browserslist": { - "production": [ - ">0.2%", - "not dead", - "not op_mini all" - ], - "development": [ - "last 1 chrome version", - "last 1 firefox version", - "last 1 safari version" - ] + "dependencies": { + "@google/generative-ai": "^0.24.1", + "@huggingface/transformers": "^3.6.1", + "@solid-primitives/transition-group": "^1.1.2", + "@thisbeyond/solid-dnd": "^0.7.5", + "material-icons": "^1.13.14", + "onnxruntime-web": "1.24.1", + "parakeet.js": "1.2.1", + "solid-js": "^1.9.5", + "uuid": "^11.1.0", + "wink-eng-lite-web-model": "^1.8.1", + "wink-nlp": "^2.4.0" } } diff --git a/public/icons/.keep b/public/icons/.keep new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/public/index.html b/public/index.html deleted file mode 100644 index aa069f27cbd9d53394428171c3989fd03db73c76..0000000000000000000000000000000000000000 --- a/public/index.html +++ /dev/null @@ -1,43 +0,0 @@ - - - - - - - - - - - - - React App - - - -
- - - diff --git a/public/logo192.png b/public/logo192.png deleted file mode 100644 index fc44b0a3796c0e0a64c3d858ca038bd4570465d9..0000000000000000000000000000000000000000 Binary files a/public/logo192.png and /dev/null differ diff --git a/public/logo512.png b/public/logo512.png deleted file mode 100644 index a4e47a6545bc15971f8f63fba70e4013df88a664..0000000000000000000000000000000000000000 Binary files a/public/logo512.png and /dev/null differ diff --git a/public/manifest.json b/public/manifest.json index 080d6c77ac21bb2ef88a6992b2b73ad93daaca92..029bfe74282cf94749361f1d5547fda86fdf2310 100644 --- a/public/manifest.json +++ b/public/manifest.json @@ -1,25 +1,15 @@ { - "short_name": "React App", - "name": "Create React App Sample", + "short_name": "Keet", + "name": "Keet - Real-time Transcription", + "start_url": ".", + "display": "standalone", + "theme_color": "#6B705C", + "background_color": "#F9F7F2", "icons": [ { "src": "favicon.ico", "sizes": "64x64 32x32 24x24 16x16", "type": "image/x-icon" - }, - { - "src": "logo192.png", - "type": "image/png", - "sizes": "192x192" - }, - { - "src": "logo512.png", - "type": "image/png", - "sizes": "512x512" } - ], - "start_url": ".", - "display": "standalone", - "theme_color": "#000000", - "background_color": "#ffffff" + ] } diff --git a/public/robots.txt b/public/robots.txt deleted file mode 100644 index e9e57dc4d41b9b46e05112e9f45b7ea6ac0ba15e..0000000000000000000000000000000000000000 --- a/public/robots.txt +++ /dev/null @@ -1,3 +0,0 @@ -# https://www.robotstxt.org/robotstxt.html -User-agent: * -Disallow: diff --git a/public/sw.js b/public/sw.js new file mode 100644 index 0000000000000000000000000000000000000000..537af2f892e31b914bb9b88db50a7806bfba58d0 --- /dev/null +++ b/public/sw.js @@ -0,0 +1,172 @@ +/** + * Keet Service Worker + * Story 3.1: Offline-first caching strategy + * + * Strategy: + * - App shell (HTML, CSS, JS): Cache-first, update in background + * - Model files: Cache-first (large files, rarely change) + * - API/Dynamic: Network-first with fallback + * - Cross-origin isolation headers injected for SharedArrayBuffer / WebGPU support + */ + +const CACHE_NAME = 'keet-v2'; +const MODEL_CACHE = 'keet-models-v1'; + +// Base path from SW script URL (works at / and /keet/ on GitHub Pages) +const BASE = (() => { + const path = self.location.pathname; + const i = path.lastIndexOf('/'); + return i >= 0 ? path.slice(0, i + 1) : '/'; +})(); + +// App shell files to pre-cache (base-relative) +const APP_SHELL = [ + BASE, + BASE + 'index.html', + BASE + 'manifest.json', +]; + +// Model file patterns (cached on-demand) +const MODEL_PATTERNS = [ + /\.onnx\.data$/, + /\.onnx$/, + /\.bin$/, + /vocab\.txt$/, + /tokenizer\.json$/, +]; + +/** + * Add Cross-Origin Isolation headers to a response. + * This enables SharedArrayBuffer and WebGPU on static hosts (e.g. GitHub Pages) + * that don't allow custom response headers. + * Equivalent to what coi-serviceworker.js does in the parakeet.js demo. + */ +function addCOIHeaders(response) { + if (response.status === 0) { + return response; + } + const newHeaders = new Headers(response.headers); + newHeaders.set("Cross-Origin-Embedder-Policy", "require-corp"); + newHeaders.set("Cross-Origin-Resource-Policy", "cross-origin"); + newHeaders.set("Cross-Origin-Opener-Policy", "same-origin"); + return new Response(response.body, { + status: response.status, + statusText: response.statusText, + headers: newHeaders, + }); +} + +// Install event - pre-cache app shell +self.addEventListener('install', (event) => { + console.log('[SW] Installing...'); + event.waitUntil( + caches.open(CACHE_NAME) + .then((cache) => { + console.log('[SW] Pre-caching app shell'); + return cache.addAll(APP_SHELL); + }) + .then(() => self.skipWaiting()) + ); +}); + +// Activate event - clean old caches +self.addEventListener('activate', (event) => { + console.log('[SW] Activating...'); + event.waitUntil( + caches.keys() + .then((cacheNames) => { + return Promise.all( + cacheNames + .filter((name) => name !== CACHE_NAME && name !== MODEL_CACHE) + .map((name) => { + console.log('[SW] Deleting old cache:', name); + return caches.delete(name); + }) + ); + }) + .then(() => self.clients.claim()) + ); +}); + +// Fetch event - serve from cache or network, inject COI headers +self.addEventListener('fetch', (event) => { + const url = new URL(event.request.url); + + // Skip non-GET requests + if (event.request.method !== 'GET') return; + + // Skip chrome-extension and other non-http(s) requests + if (!url.protocol.startsWith('http')) return; + + // Check if this is a model file + const isModelFile = MODEL_PATTERNS.some((pattern) => pattern.test(url.pathname)); + + if (isModelFile) { + // Model files: Cache-first (they're large and rarely change) + event.respondWith( + caches.open(MODEL_CACHE) + .then((cache) => { + return cache.match(event.request) + .then((cached) => { + if (cached) { + console.log('[SW] Model from cache:', url.pathname); + return cached; + } + console.log('[SW] Fetching model:', url.pathname); + return fetch(event.request) + .then((response) => { + if (response.ok) { + cache.put(event.request, response.clone()); + } + return response; + }); + }); + }) + ); + return; + } + + // App shell: Cache-first with network fallback + COI headers + if (url.origin === self.location.origin) { + event.respondWith( + caches.match(event.request) + .then((cached) => { + const fetchPromise = fetch(event.request) + .then((response) => { + if (response.ok) { + const responseClone = response.clone(); + caches.open(CACHE_NAME) + .then((cache) => cache.put(event.request, responseClone)); + } + return addCOIHeaders(response); + }) + .catch(() => cached); + + return cached ? addCOIHeaders(cached) : fetchPromise; + }) + ); + return; + } + + // External resources: Network-first + COI headers + event.respondWith( + fetch(event.request) + .then((response) => addCOIHeaders(response)) + .catch(() => caches.match(event.request)) + ); +}); + +// Message handler for cache management +self.addEventListener('message', (event) => { + if (event.data.type === 'SKIP_WAITING') { + self.skipWaiting(); + } + + if (event.data.type === 'CLEAR_MODEL_CACHE') { + caches.delete(MODEL_CACHE) + .then(() => { + console.log('[SW] Model cache cleared'); + event.ports[0].postMessage({ success: true }); + }); + } +}); diff --git a/public/wasm/ten_vad.js b/public/wasm/ten_vad.js new file mode 100644 index 0000000000000000000000000000000000000000..c5615acd83f9ab1b7705b40e63a37dc3f635b4de --- /dev/null +++ b/public/wasm/ten_vad.js @@ -0,0 +1,30 @@ + +var createVADModule = (() => { + var _scriptDir = import.meta.url; + + return ( +function(createVADModule) { + createVADModule = createVADModule || {}; + + +var a;a||(a=typeof createVADModule !== 'undefined' ? createVADModule : {});var k,l;a.ready=new Promise(function(b,c){k=b;l=c});var p=Object.assign({},a),r="object"==typeof window,u="function"==typeof importScripts,v="",w; +if(r||u)u?v=self.location.href:"undefined"!=typeof document&&document.currentScript&&(v=document.currentScript.src),_scriptDir&&(v=_scriptDir),0!==v.indexOf("blob:")?v=v.substr(0,v.replace(/[?#].*/,"").lastIndexOf("/")+1):v="",u&&(w=b=>{var c=new XMLHttpRequest;c.open("GET",b,!1);c.responseType="arraybuffer";c.send(null);return new Uint8Array(c.response)});var aa=a.print||console.log.bind(console),x=a.printErr||console.warn.bind(console);Object.assign(a,p);p=null;var y;a.wasmBinary&&(y=a.wasmBinary); +var noExitRuntime=a.noExitRuntime||!0;"object"!=typeof WebAssembly&&z("no native wasm support detected");var A,B=!1,C="undefined"!=typeof TextDecoder?new TextDecoder("utf8"):void 0,D,E,F;function J(){var b=A.buffer;D=b;a.HEAP8=new Int8Array(b);a.HEAP16=new Int16Array(b);a.HEAP32=new Int32Array(b);a.HEAPU8=E=new Uint8Array(b);a.HEAPU16=new Uint16Array(b);a.HEAPU32=F=new Uint32Array(b);a.HEAPF32=new Float32Array(b);a.HEAPF64=new Float64Array(b)}var K=[],L=[],M=[]; +function ba(){var b=a.preRun.shift();K.unshift(b)}var N=0,O=null,P=null;function z(b){if(a.onAbort)a.onAbort(b);b="Aborted("+b+")";x(b);B=!0;b=new WebAssembly.RuntimeError(b+". Build with -sASSERTIONS for more info.");l(b);throw b;}function Q(){return R.startsWith("data:application/octet-stream;base64,")}var R;if(a.locateFile){if(R="ten_vad.wasm",!Q()){var S=R;R=a.locateFile?a.locateFile(S,v):v+S}}else R=(new URL("ten_vad.wasm",import.meta.url)).href; +function T(){var b=R;try{if(b==R&&y)return new Uint8Array(y);if(w)return w(b);throw"both async and sync fetching of the wasm failed";}catch(c){z(c)}}function ca(){return y||!r&&!u||"function"!=typeof fetch?Promise.resolve().then(function(){return T()}):fetch(R,{credentials:"same-origin"}).then(function(b){if(!b.ok)throw"failed to load wasm binary file at '"+R+"'";return b.arrayBuffer()}).catch(function(){return T()})}function U(b){for(;0>>=0;if(2147483648=m;m*=2){var h=c*(1+.2/m);h=Math.min(h,b+100663296);var d=Math;h=Math.max(b,h);d=d.min.call(d,2147483648,h+(65536-h%65536)%65536);a:{try{A.grow(d-D.byteLength+65535>>>16);J();var e=1;break a}catch(W){}e=void 0}if(e)return!0}return!1},e:function(){return 52},b:function(){return 70},d:function(b,c,m,h){for(var d=0,e=0;e>2], +X=F[c+4>>2];c+=8;for(var G=0;G=q);)++t;if(16g?q+=String.fromCharCode(g):(g-=65536,q+=String.fromCharCode(55296|g>>10,56320|g&1023))}}else q+=String.fromCharCode(g)}f=q}(1=== +b?aa:x)(f);H.length=0}else H.push(f)}d+=X}F[h>>2]=d;return 0}}; +(function(){function b(d){a.asm=d.exports;A=a.asm.g;J();L.unshift(a.asm.h);N--;a.monitorRunDependencies&&a.monitorRunDependencies(N);0==N&&(null!==O&&(clearInterval(O),O=null),P&&(d=P,P=null,d()))}function c(d){b(d.instance)}function m(d){return ca().then(function(e){return WebAssembly.instantiate(e,h)}).then(function(e){return e}).then(d,function(e){x("failed to asynchronously prepare wasm: "+e);z(e)})}var h={a:ea};N++;a.monitorRunDependencies&&a.monitorRunDependencies(N);if(a.instantiateWasm)try{return a.instantiateWasm(h, +b)}catch(d){x("Module.instantiateWasm callback failed with error: "+d),l(d)}(function(){return y||"function"!=typeof WebAssembly.instantiateStreaming||Q()||"function"!=typeof fetch?m(c):fetch(R,{credentials:"same-origin"}).then(function(d){return WebAssembly.instantiateStreaming(d,h).then(c,function(e){x("wasm streaming compile failed: "+e);x("falling back to ArrayBuffer instantiation");return m(c)})})})().catch(l);return{}})(); +a.___wasm_call_ctors=function(){return(a.___wasm_call_ctors=a.asm.h).apply(null,arguments)};a._malloc=function(){return(a._malloc=a.asm.i).apply(null,arguments)};a._free=function(){return(a._free=a.asm.j).apply(null,arguments)};a._ten_vad_create=function(){return(a._ten_vad_create=a.asm.k).apply(null,arguments)};a._ten_vad_process=function(){return(a._ten_vad_process=a.asm.l).apply(null,arguments)};a._ten_vad_destroy=function(){return(a._ten_vad_destroy=a.asm.m).apply(null,arguments)}; +a._ten_vad_get_version=function(){return(a._ten_vad_get_version=a.asm.n).apply(null,arguments)};var V;P=function fa(){V||Z();V||(P=fa)}; +function Z(){function b(){if(!V&&(V=!0,a.calledRun=!0,!B)){U(L);k(a);if(a.onRuntimeInitialized)a.onRuntimeInitialized();if(a.postRun)for("function"==typeof a.postRun&&(a.postRun=[a.postRun]);a.postRun.length;){var c=a.postRun.shift();M.unshift(c)}U(M)}}if(!(0 -
- logo -

- Edit src/App.js and save to reload. -

- - Learn React - -
- - ); -} - -export default App; diff --git a/src/App.test.js b/src/App.test.js deleted file mode 100644 index 1f03afeece5ac28064fa3c73a29215037465f789..0000000000000000000000000000000000000000 --- a/src/App.test.js +++ /dev/null @@ -1,8 +0,0 @@ -import { render, screen } from '@testing-library/react'; -import App from './App'; - -test('renders learn react link', () => { - render(); - const linkElement = screen.getByText(/learn react/i); - expect(linkElement).toBeInTheDocument(); -}); diff --git a/src/App.tsx b/src/App.tsx new file mode 100644 index 0000000000000000000000000000000000000000..e2ea6b54678c2a85d3bcc2caf383cdde74bf0cfb --- /dev/null +++ b/src/App.tsx @@ -0,0 +1,1037 @@ +import { Component, Show, For, createSignal, createEffect, onMount, onCleanup } from 'solid-js'; +import { appStore } from './stores/appStore'; +import { CompactWaveform, ModelLoadingOverlay, DebugPanel, TranscriptionDisplay, SettingsContent } from './components'; +import { getModelDisplayName, MODELS } from './components/ModelLoadingOverlay'; +import { AudioEngine } from './lib/audio'; +import { MelWorkerClient } from './lib/audio/MelWorkerClient'; +import { TranscriptionWorkerClient } from './lib/transcription'; +import { HybridVAD } from './lib/vad'; +import { WindowBuilder } from './lib/transcription/WindowBuilder'; +import { BufferWorkerClient } from './lib/buffer'; +import { TenVADWorkerClient } from './lib/vad/TenVADWorkerClient'; +import type { V4ProcessResult } from './lib/transcription/TranscriptionWorkerClient'; +import type { BufferWorkerConfig, TenVADResult } from './lib/buffer/types'; +import { formatDuration } from './utils/time'; + +// Singleton instances +let audioEngine: AudioEngine | null = null; +export const [audioEngineSignal, setAudioEngineSignal] = createSignal(null); + +let workerClient: TranscriptionWorkerClient | null = null; +let melClient: MelWorkerClient | null = null; +export const [melClientSignal, setMelClientSignal] = createSignal(null); +let segmentUnsubscribe: (() => void) | null = null; +let windowUnsubscribe: (() => void) | null = null; +let melChunkUnsubscribe: (() => void) | null = null; +let visualizationUnsubscribe: (() => void) | undefined; +// v4 pipeline instances +let hybridVAD: HybridVAD | null = null; +let bufferClient: BufferWorkerClient | null = null; +let tenVADClient: TenVADWorkerClient | null = null; +let windowBuilder: WindowBuilder | null = null; +let v4TickTimeout: number | undefined; +let v4TickRunning = false; +let v4AudioChunkUnsubscribe: (() => void) | null = null; +let v4MelChunkUnsubscribe: (() => void) | null = null; +let v4InferenceBusy = false; +let v4LastInferenceTime = 0; +// Global sample counter for audio chunks (tracks total samples written to BufferWorker) +let v4GlobalSampleOffset = 0; +// Throttle UI updates from TEN-VAD to at most once per frame +let pendingSileroProb: number | null = null; +let sileroUpdateScheduled = false; +let pendingVadState: { + isSpeech: boolean; + energy: number; + snr: number; + hybridState: string; + sileroProbability?: number; +} | null = null; +let vadUpdateScheduled = false; + +const scheduleSileroUpdate = (prob: number) => { + pendingSileroProb = prob; + if (sileroUpdateScheduled) return; + sileroUpdateScheduled = true; + requestAnimationFrame(() => { + sileroUpdateScheduled = false; + if (pendingSileroProb === null) return; + const currentState = appStore.vadState(); + appStore.setVadState({ + ...currentState, + sileroProbability: pendingSileroProb, + }); + }); +}; + +const scheduleVadStateUpdate = (next: { + isSpeech: boolean; + energy: number; + snr: number; + hybridState: string; + sileroProbability?: number; +}) => { + pendingVadState = next; + if (vadUpdateScheduled) return; + vadUpdateScheduled = true; + requestAnimationFrame(() => { + vadUpdateScheduled = false; + if (!pendingVadState) return; + const currentState = appStore.vadState(); + const sileroProbability = + pendingVadState.sileroProbability !== undefined + ? pendingVadState.sileroProbability + : currentState.sileroProbability; + appStore.setVadState({ + ...currentState, + ...pendingVadState, + sileroProbability, + }); + appStore.setIsSpeechDetected(pendingVadState.isSpeech); + pendingVadState = null; + }); +}; + +const Header: Component<{ + onToggleDebug: () => void; +}> = (props) => { + const sessionLabel = () => + appStore.modelState() === 'ready' ? getModelDisplayName(appStore.selectedModelId()) : 'Session'; + return ( +
+
+
+
+ auto_awesome +
+
+

keet

+

{sessionLabel()}

+
+
+
+
+ + +
+
+ ); +}; + +const WIDGET_STORAGE_KEY = 'boncukjs-control-widget-pos'; +const WIDGET_MAX_W = 672; +const WIDGET_MIN_H = 80; + +const App: Component = () => { + const [showModelOverlay, setShowModelOverlay] = createSignal(false); + const [showContextPanel, setShowContextPanel] = createSignal(false); + type SettingsPanelSection = 'full' | 'audio' | 'model'; + const [settingsPanelSection, setSettingsPanelSection] = createSignal('full'); + let panelHoverCloseTimeout: number | undefined; + const [workerReady, setWorkerReady] = createSignal(false); + const [widgetPos, setWidgetPos] = createSignal<{ x: number; y: number } | null>(null); + const [isDragging, setIsDragging] = createSignal(false); + + const isRecording = () => appStore.recordingState() === 'recording'; + const isModelReady = () => appStore.modelState() === 'ready'; + + let dragStart = { x: 0, y: 0 }; + let posStart = { x: 0, y: 0 }; + + const [windowHeight, setWindowHeight] = createSignal(typeof window !== 'undefined' ? window.innerHeight : 600); + const settingsExpandUp = () => { + const pos = widgetPos(); + if (!pos) return true; + return pos.y >= windowHeight() / 2; + }; + + const handleWidgetDragStart = (e: MouseEvent) => { + if ((e.target as HTMLElement).closest('button, select, input')) return; + e.preventDefault(); + const pos = widgetPos(); + if (!pos) return; + setIsDragging(true); + dragStart = { x: e.clientX, y: e.clientY }; + posStart = { ...pos }; + const onMove = (e2: MouseEvent) => { + const dx = e2.clientX - dragStart.x; + const dy = e2.clientY - dragStart.y; + const w = typeof window !== 'undefined' ? window.innerWidth : 800; + const h = typeof window !== 'undefined' ? window.innerHeight : 600; + const newX = Math.max(0, Math.min(w - WIDGET_MAX_W, posStart.x + dx)); + const newY = Math.max(0, Math.min(h - WIDGET_MIN_H, posStart.y + dy)); + setWidgetPos({ x: newX, y: newY }); + }; + const onUp = () => { + setIsDragging(false); + window.removeEventListener('mousemove', onMove); + window.removeEventListener('mouseup', onUp); + const p = widgetPos(); + if (p && typeof localStorage !== 'undefined') { + try { + localStorage.setItem(WIDGET_STORAGE_KEY, JSON.stringify(p)); + } catch (_) {} + } + }; + window.addEventListener('mousemove', onMove); + window.addEventListener('mouseup', onUp); + }; + + createEffect(() => { + if (!showContextPanel()) return; + const handler = (e: KeyboardEvent) => { + if (e.key === 'Escape') { + e.preventDefault(); + setShowContextPanel(false); + } + }; + document.addEventListener('keydown', handler); + return () => document.removeEventListener('keydown', handler); + }); + + createEffect(() => { + if (appStore.modelState() === 'ready' && showContextPanel() && settingsPanelSection() === 'model') { + setShowContextPanel(false); + } + }); + + onMount(() => { + const onResize = () => setWindowHeight(window.innerHeight); + window.addEventListener('resize', onResize); + + const stored = + typeof localStorage !== 'undefined' ? localStorage.getItem(WIDGET_STORAGE_KEY) : null; + let posRestored = false; + if (stored) { + try { + const parsed = JSON.parse(stored) as { x: number; y: number }; + if (Number.isFinite(parsed.x) && Number.isFinite(parsed.y)) { + setWidgetPos({ x: parsed.x, y: parsed.y }); + posRestored = true; + } + } catch (_) {} + } + if (!posRestored) { + const w = window.innerWidth; + const h = window.innerHeight; + setWidgetPos({ + x: Math.max(0, (w - WIDGET_MAX_W) / 2), + y: h - 140, + }); + } + + workerClient = new TranscriptionWorkerClient(); + + workerClient.onModelProgress = (p) => { + appStore.setModelProgress(p.progress); + appStore.setModelMessage(p.message || ''); + if (p.file) appStore.setModelFile(p.file); + }; + + workerClient.onModelStateChange = (s) => { + appStore.setModelState(s); + }; + + workerClient.onV3Confirmed = (text) => { + appStore.setTranscript(text); + }; + + workerClient.onV3Pending = (text) => { + appStore.setPendingText(text); + }; + + workerClient.onError = (msg) => { + appStore.setErrorMessage(msg); + }; + + appStore.refreshDevices(); + setWorkerReady(true); + + return () => window.removeEventListener('resize', onResize); + }); + + // No longer auto-show blocking model overlay; model selection is in the settings panel. + // createEffect(() => { ... setShowModelOverlay(true); }); + + onCleanup(() => { + clearTimeout(panelHoverCloseTimeout); + visualizationUnsubscribe?.(); + cleanupV4Pipeline(); + melClient?.dispose(); + workerClient?.dispose(); + }); + + // ---- v4 pipeline tick: periodic window building + inference ---- + let v4TickCount = 0; + let v4ModelNotReadyLogged = false; + const v4Tick = async () => { + if (!workerClient || !windowBuilder || !audioEngine || !bufferClient || v4InferenceBusy) return; + + // Skip inference if model is not ready (but still allow audio/mel/VAD to process) + if (appStore.modelState() !== 'ready') { + if (!v4ModelNotReadyLogged) { + console.log('[v4Tick] Model not ready yet - audio is being captured and preprocessed'); + v4ModelNotReadyLogged = true; + } + return; + } + // Reset the flag once model becomes ready + if (v4ModelNotReadyLogged) { + console.log('[v4Tick] Model is now ready - starting inference'); + v4ModelNotReadyLogged = false; + // Initialize the v4 service now that model is ready + await workerClient.initV4Service({ debug: false }); + } + + v4TickCount++; + const now = performance.now(); + // Use the store's configurable inference interval (minus a small margin for the tick jitter) + const minInterval = Math.max(200, appStore.v4InferenceIntervalMs() - 100); + if (now - v4LastInferenceTime < minInterval) return; + + // Check if there is speech via the BufferWorker (async query). + // We check both energy and inference VAD layers; either one detecting speech triggers inference. + const cursorSample = windowBuilder.getMatureCursorFrame(); // frame === sample in our pipeline + const currentSample = v4GlobalSampleOffset; + const startSample = cursorSample > 0 ? cursorSample : 0; + + let hasSpeech = false; + if (currentSample > startSample) { + // Check energy VAD first (always available, low latency) + const energyResult = await bufferClient.hasSpeech('energyVad', startSample, currentSample, 0.3); + + // When inference VAD is ready, require BOTH energy AND inference to agree + // This prevents false positives from music/noise that has high energy but no speech + if (tenVADClient?.isReady()) { + const inferenceResult = await bufferClient.hasSpeech('inferenceVad', startSample, currentSample, 0.5); + // Require both energy and inference VAD to agree (AND logic) + hasSpeech = energyResult.hasSpeech && inferenceResult.hasSpeech; + } else { + // Fall back to energy-only if inference VAD is not available + hasSpeech = energyResult.hasSpeech; + } + } + + if (v4TickCount <= 5 || v4TickCount % 20 === 0) { + const vadState = appStore.vadState(); + const rb = audioEngine.getRingBuffer(); + const rbFrame = rb.getCurrentFrame(); + const rbBase = rb.getBaseFrameOffset(); + console.log( + `[v4Tick #${v4TickCount}] hasSpeech=${hasSpeech}, vadState=${vadState.hybridState}, ` + + `energy=${vadState.energy.toFixed(4)}, inferenceVAD=${(vadState.sileroProbability || 0).toFixed(2)}, ` + + `samples=[${startSample}:${currentSample}], ` + + `ringBuf=[base=${rbBase}, head=${rbFrame}, avail=${rbFrame - rbBase}]` + ); + } + + // Periodic buffer worker state dump (every 40 ticks) + if (v4TickCount % 40 === 0 && bufferClient) { + try { + const state = await bufferClient.getState(); + const layerSummary = Object.entries(state.layers) + .map(([id, l]) => `${id}:${l.fillCount}/${l.maxEntries}@${l.currentSample}`) + .join(', '); + console.log(`[v4Tick #${v4TickCount}] BufferState: ${layerSummary}`); + } catch (_) { /* ignore state query errors */ } + } + + if (!hasSpeech) { + // Check for silence-based flush using BufferWorker + const silenceDuration = await bufferClient.getSilenceTailDuration('energyVad', 0.3); + if (silenceDuration >= appStore.v4SilenceFlushSec()) { + // Flush pending sentence via timeout finalization + try { + const flushResult = await workerClient.v4FinalizeTimeout(); + if (flushResult) { + appStore.setMatureText(flushResult.matureText); + appStore.setImmatureText(flushResult.immatureText); + appStore.setMatureCursorTime(flushResult.matureCursorTime); + appStore.setTranscript(flushResult.fullText); + appStore.appendV4SentenceEntries(flushResult.matureSentences); + appStore.setV4MergerStats({ + sentencesFinalized: flushResult.matureSentenceCount, + cursorUpdates: flushResult.stats?.matureCursorUpdates || 0, + utterancesProcessed: flushResult.stats?.utterancesProcessed || 0, + }); + // Advance window builder cursor + windowBuilder.advanceMatureCursorByTime(flushResult.matureCursorTime); + } + } catch (err) { + console.error('[v4Tick] Flush error:', err); + } + } + return; + } + + // Build window from cursor to current position + const window = windowBuilder.buildWindow(); + if (!window) { + if (v4TickCount <= 10 || v4TickCount % 20 === 0) { + const rb = audioEngine.getRingBuffer(); + const rbHead = rb.getCurrentFrame(); + const rbBase = rb.getBaseFrameOffset(); + console.log( + `[v4Tick #${v4TickCount}] buildWindow=null, ` + + `ringBuf=[base=${rbBase}, head=${rbHead}, avail=${rbHead - rbBase}], ` + + `cursor=${windowBuilder.getMatureCursorFrame()}` + ); + } + return; + } + + console.log(`[v4Tick #${v4TickCount}] Window [${window.startFrame}:${window.endFrame}] ${window.durationSeconds.toFixed(2)}s (initial=${window.isInitial})`); + + v4InferenceBusy = true; + v4LastInferenceTime = now; + + try { + const inferenceStart = performance.now(); + + // Get mel features for the window + let features: { features: Float32Array; T: number; melBins: number } | null = null; + if (melClient) { + features = await melClient.getFeatures(window.startFrame, window.endFrame); + } + + if (!features) { + v4InferenceBusy = false; + return; + } + + // Calculate time offset for absolute timestamps + const timeOffset = window.startFrame / 16000; + + // Calculate incremental cache parameters + const cursorFrame = windowBuilder.getMatureCursorFrame(); + const prefixSeconds = cursorFrame > 0 ? (window.startFrame - cursorFrame) / 16000 : 0; + + const result: V4ProcessResult = await workerClient.processV4ChunkWithFeatures({ + features: features.features, + T: features.T, + melBins: features.melBins, + timeOffset, + endTime: window.endFrame / 16000, + segmentId: `v4_${Date.now()}`, + incrementalCache: prefixSeconds > 0 ? { + cacheKey: 'v4-stream', + prefixSeconds, + } : undefined, + }); + + const inferenceMs = performance.now() - inferenceStart; + + // Update UI state + appStore.setMatureText(result.matureText); + appStore.setImmatureText(result.immatureText); + appStore.setTranscript(result.fullText); + appStore.setPendingText(result.immatureText); + appStore.appendV4SentenceEntries(result.matureSentences); + appStore.setInferenceLatency(inferenceMs); + + // Update RTF + const audioDurationMs = window.durationSeconds * 1000; + appStore.setRtf(inferenceMs / audioDurationMs); + + // Advance cursor if merger advanced it + if (result.matureCursorTime > windowBuilder.getMatureCursorTime()) { + appStore.setMatureCursorTime(result.matureCursorTime); + windowBuilder.advanceMatureCursorByTime(result.matureCursorTime); + windowBuilder.markSentenceEnd(Math.round(result.matureCursorTime * 16000)); + } + + // Update stats + appStore.setV4MergerStats({ + sentencesFinalized: result.matureSentenceCount, + cursorUpdates: result.stats?.matureCursorUpdates || 0, + utterancesProcessed: result.stats?.utterancesProcessed || 0, + }); + + // Update buffer metrics + const ring = audioEngine.getRingBuffer(); + appStore.setBufferMetrics({ + fillRatio: ring.getFillCount() / ring.getSize(), + latencyMs: (ring.getFillCount() / 16000) * 1000, + }); + + // Update metrics + if (result.metrics) { + appStore.setSystemMetrics({ + throughput: 0, + modelConfidence: 0, + }); + } + } catch (err: any) { + console.error('[v4Tick] Inference error:', err); + } finally { + v4InferenceBusy = false; + } + }; + + // ---- Cleanup v4 pipeline resources ---- + const cleanupV4Pipeline = () => { + v4TickRunning = false; + if (v4TickTimeout) { + clearTimeout(v4TickTimeout); + v4TickTimeout = undefined; + } + if (v4AudioChunkUnsubscribe) { + v4AudioChunkUnsubscribe(); + v4AudioChunkUnsubscribe = null; + } + if (v4MelChunkUnsubscribe) { + v4MelChunkUnsubscribe(); + v4MelChunkUnsubscribe = null; + } + hybridVAD = null; + if (tenVADClient) { + tenVADClient.dispose(); + tenVADClient = null; + } + if (bufferClient) { + bufferClient.dispose(); + bufferClient = null; + } + windowBuilder = null; + v4InferenceBusy = false; + v4LastInferenceTime = 0; + v4GlobalSampleOffset = 0; + }; + + const toggleRecording = async () => { + if (isRecording()) { + // Update UI immediately so the stop button always takes effect even if cleanup throws + visualizationUnsubscribe?.(); + visualizationUnsubscribe = undefined; + appStore.stopRecording(); + appStore.setAudioLevel(0); + appStore.setBarLevels(new Float32Array(0)); + + try { + audioEngine?.stop(); + + if (segmentUnsubscribe) segmentUnsubscribe(); + if (windowUnsubscribe) windowUnsubscribe(); + if (melChunkUnsubscribe) melChunkUnsubscribe(); + cleanupV4Pipeline(); + + if (workerClient) { + const final = await workerClient.finalize(); + let text = ''; + if ('text' in final && typeof final.text === 'string') { + text = final.text; + } else if ('fullText' in final && typeof final.fullText === 'string') { + text = final.fullText; + } + appStore.setTranscript(text); + appStore.setPendingText(''); + } + + melClient?.reset(); + audioEngine?.reset(); + } catch (err) { + console.warn('[App] Error during stop recording cleanup:', err); + } + } else { + try { + if (!audioEngine) { + audioEngine = new AudioEngine({ + sampleRate: 16000, + deviceId: appStore.selectedDeviceId(), + }); + setAudioEngineSignal(audioEngine); + } else { + audioEngine.updateConfig({ deviceId: appStore.selectedDeviceId() }); + audioEngine.reset(); + } + + const mode = appStore.transcriptionMode(); + + // v4 mode: Always start audio capture, mel preprocessing, and VAD + // Inference only runs when model is ready (checked in v4Tick) + if (mode === 'v4-utterance') { + // ---- v4: Utterance-based pipeline with BufferWorker + TEN-VAD ---- + + // Initialize merger in worker only if model is ready + if (isModelReady() && workerClient) { + await workerClient.initV4Service({ debug: false }); + } + + // Initialize mel worker (always needed for preprocessing) + if (!melClient) { + melClient = new MelWorkerClient(); + setMelClientSignal(melClient); + } + try { + await melClient.init({ nMels: 128 }); + } catch (e) { + melClient.dispose(); + melClient = null; + setMelClientSignal(null); + } + + // Initialize BufferWorker (centralized multi-layer data store) + bufferClient = new BufferWorkerClient(); + const bufferConfig: BufferWorkerConfig = { + sampleRate: 16000, + layers: { + audio: { hopSamples: 1, entryDimension: 1, maxDurationSec: 120 }, + mel: { hopSamples: 160, entryDimension: 128, maxDurationSec: 120 }, + energyVad: { hopSamples: 1280, entryDimension: 1, maxDurationSec: 120 }, + inferenceVad: { hopSamples: 256, entryDimension: 1, maxDurationSec: 120 }, + }, + }; + await bufferClient.init(bufferConfig); + + // Initialize TEN-VAD worker (inference-based VAD) + tenVADClient = new TenVADWorkerClient(); + tenVADClient.onResult((result: TenVADResult) => { + if (!bufferClient) return; + // Batch-write hop probabilities to inferenceVad (single worker message) + if (result.hopCount > 0) { + const lastProb = result.probabilities[result.hopCount - 1]; + if (bufferClient.writeBatchTransfer) { + bufferClient.writeBatchTransfer('inferenceVad', result.probabilities, result.globalSampleOffset); + } else { + bufferClient.writeBatch('inferenceVad', result.probabilities, result.globalSampleOffset); + } + + // Update UI at most once per frame with the latest probability + scheduleSileroUpdate(lastProb); + } + }); + // TEN-VAD init is non-blocking; falls back gracefully if WASM fails + const wasmPath = `${import.meta.env.BASE_URL}wasm/`; + tenVADClient.init({ hopSize: 256, threshold: 0.5, wasmPath }).catch((err) => { + console.warn('[v4] TEN-VAD init failed, using energy-only:', err); + }); + + // Initialize hybrid VAD for energy-based detection (always runs, fast) + hybridVAD = new HybridVAD({ + sileroThreshold: 0.5, + onsetConfirmations: 2, + offsetConfirmations: 3, + sampleRate: 16000, + }); + // Do NOT init Silero in HybridVAD (TEN-VAD replaces it) + + // NOTE: WindowBuilder is created AFTER audioEngine.start() below, + // because start() may re-create the internal RingBuffer. + + // Reset global sample counter + v4GlobalSampleOffset = 0; + + // Feed audio chunks to mel worker from the main v4 audio handler below + v4MelChunkUnsubscribe = null; + + // Process each audio chunk: energy VAD + write to BufferWorker + forward to TEN-VAD + v4AudioChunkUnsubscribe = audioEngine.onAudioChunk((chunk) => { + if (!hybridVAD || !bufferClient) return; + + const chunkOffset = v4GlobalSampleOffset; + v4GlobalSampleOffset += chunk.length; + + // 1. Run energy VAD (synchronous, fast) and write to BufferWorker + const vadResult = hybridVAD.processEnergyOnly(chunk); + const energyProb = vadResult.isSpeech ? 0.9 : 0.1; + bufferClient.writeScalar('energyVad', energyProb); + + // 2. Forward audio to mel worker (copy, keep chunk for TEN-VAD transfer) + melClient?.pushAudioCopy(chunk); + + // 3. Forward audio to TEN-VAD worker for inference-based VAD (transfer, no copy) + if (tenVADClient?.isReady()) { + tenVADClient.processTransfer(chunk, chunkOffset); + } + + // 4. Update VAD state for UI + const sileroProbability = tenVADClient?.isReady() + ? undefined + : (vadResult.sileroProbability || 0); + scheduleVadStateUpdate({ + isSpeech: vadResult.isSpeech, + energy: vadResult.energy, + snr: vadResult.snr || 0, + hybridState: vadResult.state, + ...(sileroProbability !== undefined ? { sileroProbability } : {}), + }); + }); + + // Start adaptive inference tick loop (reads interval from appStore) + // Note: v4Tick internally checks if model is ready before running inference + v4TickRunning = true; + const scheduleNextTick = () => { + if (!v4TickRunning) return; + v4TickTimeout = window.setTimeout(async () => { + if (!v4TickRunning) return; + await v4Tick(); + scheduleNextTick(); + }, appStore.v4InferenceIntervalMs()); + }; + scheduleNextTick(); + + } else if (isModelReady() && workerClient) { + // v3 and v2 modes still require model to be ready + if (mode === 'v3-streaming') { + // ---- v3: Fixed-window token streaming (existing) ---- + const windowDur = appStore.streamingWindow(); + const triggerInt = appStore.triggerInterval(); + const overlapDur = Math.max(1.0, windowDur - triggerInt); + + await workerClient.initV3Service({ + windowDuration: windowDur, + overlapDuration: overlapDur, + sampleRate: 16000, + frameStride: appStore.frameStride(), + }); + + if (!melClient) { + melClient = new MelWorkerClient(); + setMelClientSignal(melClient); + } + try { + await melClient.init({ nMels: 128 }); + } catch (e) { + melClient.dispose(); + melClient = null; + setMelClientSignal(null); + } + + melChunkUnsubscribe = audioEngine.onAudioChunk((chunk) => { + melClient?.pushAudioCopy(chunk); + }); + + windowUnsubscribe = audioEngine.onWindowChunk( + windowDur, + overlapDur, + triggerInt, + async (audio, startTime) => { + if (!workerClient) return; + const start = performance.now(); + + let result; + if (melClient) { + const startSample = Math.round(startTime * 16000); + const endSample = startSample + audio.length; + const melFeatures = await melClient.getFeatures(startSample, endSample); + + if (melFeatures) { + result = await workerClient.processV3ChunkWithFeatures( + melFeatures.features, + melFeatures.T, + melFeatures.melBins, + startTime, + overlapDur, + ); + } else { + result = await workerClient.processV3Chunk(audio, startTime); + } + } else { + result = await workerClient.processV3Chunk(audio, startTime); + } + + const duration = performance.now() - start; + const stride = appStore.triggerInterval(); + appStore.setRtf(duration / (stride * 1000)); + appStore.setInferenceLatency(duration); + + if (audioEngine) { + const ring = audioEngine.getRingBuffer(); + appStore.setBufferMetrics({ + fillRatio: ring.getFillCount() / ring.getSize(), + latencyMs: (ring.getFillCount() / 16000) * 1000, + }); + } + + appStore.setMergeInfo({ + lcsLength: result.lcsLength, + anchorValid: result.anchorValid, + chunkCount: result.chunkCount, + anchorTokens: result.anchorTokens + }); + } + ); + } else { + // ---- v2: Per-utterance (existing) ---- + await workerClient.initService({ sampleRate: 16000 }); + segmentUnsubscribe = audioEngine.onSpeechSegment(async (segment) => { + if (workerClient) { + const start = Date.now(); + const samples = audioEngine!.getRingBuffer().read(segment.startFrame, segment.endFrame); + const result = await workerClient.transcribeSegment(samples); + if (result.text) appStore.appendTranscript(result.text + ' '); + appStore.setInferenceLatency(Date.now() - start); + } + }); + } + } + + await audioEngine.start(); + + // Create WindowBuilder AFTER start() so we get the final RingBuffer reference + // (AudioEngine.init() re-creates the RingBuffer internally) + if (mode === 'v4-utterance') { + windowBuilder = new WindowBuilder( + audioEngine.getRingBuffer(), + null, // No VADRingBuffer; hasSpeech now goes through BufferWorker + { + sampleRate: 16000, + minDurationSec: 3.0, + maxDurationSec: 30.0, + minInitialDurationSec: 1.5, + useVadBoundaries: false, // VAD boundaries now managed by BufferWorker + vadSilenceThreshold: 0.3, + debug: true, // Enable debug logging for diagnostics + } + ); + } + + appStore.startRecording(); + + // Use same 30fps tick (onVisualizationUpdate throttled to 33ms). + // Bar levels from AnalyserNode (native FFT, low CPU) instead of mel worker. + visualizationUnsubscribe = audioEngine.onVisualizationUpdate((_data, metrics) => { + appStore.setAudioLevel(metrics.currentEnergy); + if (appStore.transcriptionMode() !== 'v4-utterance') { + appStore.setIsSpeechDetected(audioEngine?.isSpeechActive() ?? false); + } + appStore.setBarLevels(audioEngine!.getBarLevels()); + }); + } catch (err: any) { + appStore.setErrorMessage(err.message); + } + } + }; + + const loadSelectedModel = async () => { + if (!workerClient) return; + if (appStore.modelState() === 'ready') return; + if (appStore.modelState() === 'loading') return; + setShowContextPanel(true); + try { + await workerClient.initModel(appStore.selectedModelId()); + } catch (e) { + console.error('Failed to load model:', e); + appStore.setModelState('error'); + appStore.setErrorMessage(e instanceof Error ? e.message : String(e)); + } + }; + + const openPanelForAudio = () => { + clearTimeout(panelHoverCloseTimeout); + setSettingsPanelSection('audio'); + setShowContextPanel(true); + }; + const openPanelForModel = () => { + clearTimeout(panelHoverCloseTimeout); + setSettingsPanelSection('model'); + setShowContextPanel(true); + }; + const schedulePanelCloseIfHover = () => { + panelHoverCloseTimeout = window.setTimeout(() => { + if (settingsPanelSection() !== 'full' && appStore.modelState() !== 'loading') { + setShowContextPanel(false); + } + }, 250); + }; + const cancelPanelClose = () => clearTimeout(panelHoverCloseTimeout); + const panelMouseLeave = () => { + if (settingsPanelSection() !== 'full') schedulePanelCloseIfHover(); + }; + + const handleLocalLoad = async (files: FileList) => { + if (!workerClient) return; + setShowContextPanel(true); + try { + await workerClient.initLocalModel(files); + } catch (e) { + console.error('Failed to load local model:', e); + } + }; + + return ( +
+ appStore.setSelectedModelId(id)} + onStart={() => loadSelectedModel()} + onLocalLoad={handleLocalLoad} + onClose={() => setShowModelOverlay(false)} + /> + +
appStore.setShowDebugPanel(!appStore.showDebugPanel())} + /> + +
+
+
+ +
+
+
+ + {/* Draggable floating control widget */} +
+
+ {/* Settings panel: expands up or down depending on bar position vs half screen height */} +
+
+ setShowContextPanel(false)} + onLoadModel={() => loadSelectedModel()} + onLocalLoad={handleLocalLoad} + onOpenDebug={() => appStore.setShowDebugPanel(true)} + onDeviceSelect={(id) => { + if (audioEngine) audioEngine.updateConfig({ deviceId: id }); + }} + audioEngine={audioEngineSignal() ?? undefined} + expandUp={settingsExpandUp} + /> +
+
+ + {/* Control bar: steady, fixed position; never moves when settings open */} + +
+ + {/* Foldable debug panel (bottom drawer) */} + +
+ +
+
+
+ ); +}; + +export default App; + diff --git a/src/assets/css/material-icons.css b/src/assets/css/material-icons.css new file mode 100644 index 0000000000000000000000000000000000000000..0fa058a4b7452cf2e9fba6f611b904ffb837a706 --- /dev/null +++ b/src/assets/css/material-icons.css @@ -0,0 +1,34 @@ +@font-face { + font-family: 'Material Icons'; + font-style: normal; + font-weight: 400; + src: local('Material Icons'), + local('MaterialIcons-Regular'), + url('/fonts/material-icons/material-icons.woff2') format('woff2'), + url('/fonts/material-icons/material-icons.woff') format('woff'); +} + +.material-icons { + font-family: 'Material Icons'; + font-weight: normal; + font-style: normal; + font-size: 24px; /* Preferred icon size */ + display: inline-block; + line-height: 1; + text-transform: none; + letter-spacing: normal; + word-wrap: normal; + white-space: nowrap; + direction: ltr; + + /* Support for all WebKit browsers. */ + -webkit-font-smoothing: antialiased; + /* Support for Safari and Chrome. */ + text-rendering: optimizeLegibility; + + /* Support for Firefox. */ + -moz-osx-font-smoothing: grayscale; + + /* Support for IE. */ + font-feature-settings: 'liga'; +} \ No newline at end of file diff --git a/src/components/BufferVisualizer.tsx b/src/components/BufferVisualizer.tsx new file mode 100644 index 0000000000000000000000000000000000000000..243fa3f8c8529f388aa3230afc48456e097fc9da --- /dev/null +++ b/src/components/BufferVisualizer.tsx @@ -0,0 +1,511 @@ +/** +"""""""""" * Keet - Buffer Visualizer Component + * Canvas-based real-time audio waveform visualization. + * Ported from legacy UI project (Svelte) to SolidJS. + */ + +import { Component, createSignal, onMount, onCleanup, createEffect } from 'solid-js'; +import type { AudioEngine, AudioMetrics } from '../lib/audio'; + +interface BufferVisualizerProps { + /** AudioEngine instance for subscribing to visualization updates */ + audioEngine?: AudioEngine; + /** Height of the canvas in pixels (default: 80) */ + height?: number; + /** Whether to show SNR threshold line (default: true) */ + showThreshold?: boolean; + /** SNR threshold in dB for visualization (default: 6.0) */ + snrThreshold?: number; + /** Whether to show time markers (default: true) */ + showTimeMarkers?: boolean; + /** Whether the visualizer is visible (optimization - reduces frame rate when hidden) */ + visible?: boolean; +} + +export const BufferVisualizer: Component = (props) => { + // Canvas element ref + let canvasRef: HTMLCanvasElement | undefined; + let ctx: CanvasRenderingContext2D | null = null; + let parentRef: HTMLDivElement | undefined; + + // State + const [isDarkSignal, setIsDarkSignal] = createSignal(false); + const [canvasWidth, setCanvasWidth] = createSignal(0); + const [waveformData, setWaveformData] = createSignal(new Float32Array(0)); + const [metrics, setMetrics] = createSignal({ + currentEnergy: 0, + averageEnergy: 0, + peakEnergy: 0, + noiseFloor: 0.01, + currentSNR: 0, + isSpeaking: false, + }); + const [segments, setSegments] = createSignal>([]); + // Track the end time of the current waveform snapshot for strict synchronization + const [bufferEndTime, setBufferEndTime] = createSignal(0); + + const height = () => props.height ?? 80; + const showThreshold = () => props.showThreshold ?? true; + const snrThreshold = () => props.snrThreshold ?? 6.0; + const showTimeMarkers = () => props.showTimeMarkers ?? true; + const visible = () => props.visible ?? true; + + let animationFrameId: number | undefined; + let resizeObserver: ResizeObserver | null = null; + let needsRedraw = true; + let lastDrawTime = 0; + const DRAW_INTERVAL_MS = 33; + + // Draw function + const draw = () => { + if (!ctx || !canvasRef) return; + + const width = canvasRef.width; + const canvasHeight = canvasRef.height; + const centerY = canvasHeight / 2; + const data = waveformData(); + const currentMetrics = metrics(); + + // Clear canvas + ctx.clearRect(0, 0, width, canvasHeight); + + // Optimized theme detection (using signal instead of DOM access) + const isDarkMode = isDarkSignal(); + + // Colors (Mechanical Etched Palette) - Cached values + const bgColor = isDarkMode ? '#1e293b' : '#f1f5f9'; + const highlightColor = isDarkMode ? 'rgba(255, 255, 255, 0.05)' : 'rgba(255, 255, 255, 0.8)'; + const shadowColor = isDarkMode ? 'rgba(0, 0, 0, 0.4)' : 'rgba(0, 0, 0, 0.1)'; + const etchColor = isDarkMode ? '#334155' : '#cbd5e1'; + const signalActiveColor = '#3b82f6'; + + // Background + if (ctx) { + ctx.fillStyle = bgColor; + ctx.fillRect(0, 0, width, canvasHeight); + + // Baseline (Etched indent) + ctx.beginPath(); + ctx.strokeStyle = shadowColor; + ctx.lineWidth = 0.5; + ctx.moveTo(0, centerY); + ctx.lineTo(width, centerY); + ctx.stroke(); + + // Draw time markers at the top + if (showTimeMarkers() && props.audioEngine) { + // Use the new textColor and tickColor based on the etched palette + const textColor = isDarkMode ? '#94a3b8' : '#94a3b8'; + const tickColor = isDarkMode ? 'rgba(255, 255, 255, 0.05)' : 'rgba(0, 0, 0, 0.05)'; + drawTimeMarkers(width, canvasHeight, textColor, tickColor); + } + + // Draw segment boundaries (before waveform so they appear behind) + if (props.audioEngine) { + drawSegments(width, canvasHeight, isDarkMode); + } + + // Draw waveform using legacy UI project logic (Etched Mercury Style) + if (data.length >= 2) { + // Data is already subsampled to ~400 points (min, max pairs) + const numPoints = data.length / 2; + const step = width / numPoints; // Use simple step as points ~ width/2 + + // Helper to draw the full waveform path + // Optimized Waveform Path (Consolidated passes) + ctx.lineCap = 'round'; + + // Helper to draw the full waveform path + const drawPath = (offsetX: number, offsetY: number) => { + if (!ctx) return; + ctx.beginPath(); + for (let i = 0; i < numPoints; i++) { + const x = i * step + offsetX; + // Ensure min/max have at least 1px difference for visibility even when silent + let minVal = data[i * 2]; + let maxVal = data[i * 2 + 1]; + + // Scaled values + let yMin = centerY - (minVal * centerY * 0.9) + offsetY; + let yMax = centerY - (maxVal * centerY * 0.9) + offsetY; + + // Ensure tiny signals are visible (min 1px height) + if (Math.abs(yMax - yMin) < 1) { + yMin = centerY - 0.5 + offsetY; + yMax = centerY + 0.5 + offsetY; + } + + ctx.moveTo(x, yMin); + ctx.lineTo(x, yMax); + } + ctx.stroke(); + }; + + // 1. Highlight Pass (Sharp top-left edge) + ctx.strokeStyle = highlightColor; + ctx.lineWidth = 1.0; + drawPath(-0.5, -0.5); + + // 2. Shadow Pass (Depressed groove) + ctx.strokeStyle = shadowColor; + ctx.lineWidth = 1.2; + drawPath(0.5, 0.5); + + // 3. Main Etch Pass (Base material) - Slate color for contrast + ctx.strokeStyle = etchColor; + ctx.lineWidth = 1.0; + drawPath(0, 0); + + // 4. Active signal glow + if (currentMetrics.isSpeaking) { + ctx.globalAlpha = 0.5; + ctx.shadowBlur = 4; + ctx.shadowColor = signalActiveColor; + ctx.strokeStyle = signalActiveColor; + ctx.lineWidth = 1.0; + drawPath(0, 0); + ctx.shadowBlur = 0; + ctx.globalAlpha = 1.0; + } + } + + // Draw adaptive threshold (Etched dashes) + if (showThreshold() && currentMetrics.noiseFloor > 0) { + const snrRatio = Math.pow(10, snrThreshold() / 10); + const adaptiveThreshold = currentMetrics.noiseFloor * snrRatio; + + const drawThresholdLine = (offsetY: number, color: string) => { + if (!ctx) return; + ctx.beginPath(); + ctx.strokeStyle = color; + ctx.lineWidth = 1; + ctx.setLineDash([2, 4]); + const adaptiveYPos = centerY - adaptiveThreshold * centerY + offsetY; + ctx.moveTo(0, adaptiveYPos); ctx.lineTo(width, adaptiveYPos); + const adaptiveYNeg = centerY + adaptiveThreshold * centerY + offsetY; + ctx.moveTo(0, adaptiveYNeg); ctx.lineTo(width, adaptiveYNeg); + ctx.stroke(); + }; + + drawThresholdLine(1, highlightColor); + drawThresholdLine(0, shadowColor); + ctx.setLineDash([]); + + // Label (Etched text) + ctx.fillStyle = isDarkMode ? 'rgba(255, 255, 255, 0.15)' : 'rgba(0, 0, 0, 0.2)'; + ctx.font = '900 9px "JetBrains Mono", monospace'; + const labelY = centerY - adaptiveThreshold * centerY - 8; + ctx.fillText(`THRSH: ${snrThreshold().toFixed(1)}dB`, 10, labelY); + } + + // Draw noise floor level (retained original style for clarity) + if (currentMetrics.noiseFloor > 0) { + const nfColor = isDarkMode ? 'rgba(74, 222, 128, 0.1)' : 'rgba(34, 197, 94, 0.1)'; + const noiseFloorY = centerY - currentMetrics.noiseFloor * centerY; + const noiseFloorYNeg = centerY + currentMetrics.noiseFloor * centerY; + + ctx.beginPath(); + ctx.strokeStyle = nfColor; + ctx.lineWidth = 1; + ctx.moveTo(0, noiseFloorY); + ctx.lineTo(width, noiseFloorY); + ctx.moveTo(0, noiseFloorYNeg); + ctx.lineTo(width, noiseFloorYNeg); + ctx.stroke(); + } + + // Draw speaking indicator (Neumorphic dot) + if (currentMetrics.isSpeaking) { + const speakingColor = '#22c55e'; + const indicatorX = width - 60; + const indicatorY = 25; + const radius = 6; + + // Glow effect + ctx.shadowBlur = 10; + ctx.shadowColor = speakingColor; + + ctx.beginPath(); + ctx.arc(indicatorX, indicatorY, radius, 0, Math.PI * 2); + ctx.fillStyle = speakingColor; + ctx.fill(); + + ctx.shadowBlur = 0; + + // Pulse ring + const time = performance.now() / 1000; + const rippleRadius = radius + (time % 1) * 10; + const rippleOpacity = 1 - (time % 1); + + ctx.beginPath(); + ctx.arc(indicatorX, indicatorY, rippleRadius, 0, Math.PI * 2); + ctx.strokeStyle = `rgba(34, 197, 94, ${rippleOpacity})`; + ctx.lineWidth = 1.5; + ctx.stroke(); + } + + // SNR meter on the right side - Etched mechanical gauge + if (currentMetrics.currentSNR > 0) { + const meterPadding = 15; + const meterWidth = 6; + const meterX = width - 20; + const meterHeight = canvasHeight - (meterPadding * 2); + + // Meter Housing (Inset) + ctx.fillStyle = shadowColor; + ctx.beginPath(); + ctx.roundRect(meterX, meterPadding, meterWidth, meterHeight, 3); + ctx.fill(); + + ctx.strokeStyle = highlightColor; + ctx.lineWidth = 1; + ctx.stroke(); + + // Gauge Level + const maxSNR = 60; + const cappedSNR = Math.min(maxSNR, currentMetrics.currentSNR); + const fillHeight = (cappedSNR / maxSNR) * meterHeight; + const fillY = (meterPadding + meterHeight) - fillHeight; + + // Glow for the active portion + ctx.shadowBlur = 8; + ctx.shadowColor = currentMetrics.currentSNR >= snrThreshold() ? 'rgba(34, 197, 94, 0.4)' : 'rgba(96, 165, 250, 0.4)'; + + ctx.fillStyle = currentMetrics.currentSNR >= snrThreshold() ? '#22c55e' : signalActiveColor; + ctx.beginPath(); + ctx.roundRect(meterX, fillY, meterWidth, fillHeight, 3); + ctx.fill(); + + ctx.shadowBlur = 0; + + // Threshold marker notched in + const thresholdMarkerY = (meterPadding + meterHeight) - (Math.min(maxSNR, snrThreshold()) / maxSNR * meterHeight); + ctx.beginPath(); + ctx.strokeStyle = '#ef4444'; + ctx.lineWidth = 2; + ctx.moveTo(meterX - 4, thresholdMarkerY); + ctx.lineTo(meterX + meterWidth + 4, thresholdMarkerY); + ctx.stroke(); + + // Digital Readout + ctx.fillStyle = isDarkMode ? '#f8fafc' : '#1e293b'; + ctx.font = '900 10px "JetBrains Mono", monospace'; + ctx.textAlign = 'right'; + ctx.fillText(`${currentMetrics.currentSNR.toFixed(0)}`, meterX - 8, thresholdMarkerY + 4); + ctx.textAlign = 'left'; + } + } + }; + + // Draw time markers + const drawTimeMarkers = (width: number, canvasHeight: number, textColor: string, tickColor: string) => { + if (!ctx || !props.audioEngine) return; + + const bufferDuration = props.audioEngine.getVisualizationDuration(); + const currentTime = bufferEndTime(); // Use synchronized end time of buffer + const windowStart = currentTime - bufferDuration; + + ctx.fillStyle = textColor; + ctx.font = '10px system-ui, sans-serif'; + + const markerInterval = 5; // Every 5 seconds + const firstMarkerTime = Math.ceil(windowStart / markerInterval) * markerInterval; + + for (let time = firstMarkerTime; time <= currentTime; time += markerInterval) { + const x = ((time - windowStart) / bufferDuration) * width; + + // Draw tick mark + ctx.beginPath(); + ctx.strokeStyle = tickColor; + ctx.moveTo(x, 0); + ctx.lineTo(x, 15); + ctx.stroke(); + + // Draw time label + ctx.fillText(`${time}s`, x + 2, 12); + } + }; + + // Draw segment boundaries + const drawSegments = (width: number, canvasHeight: number, isDarkMode: boolean) => { + const context = ctx; + if (!context || !props.audioEngine) return; + + const bufferDuration = props.audioEngine.getVisualizationDuration(); + const currentTime = bufferEndTime(); // Use synchronized end time of buffer + const windowStart = currentTime - bufferDuration; + const segmentList = segments(); + + // Colors for segments + const pendingColor = isDarkMode ? 'rgba(250, 204, 21, 0.15)' : 'rgba(234, 179, 8, 0.15)'; + const processedColor = isDarkMode ? 'rgba(34, 197, 94, 0.15)' : 'rgba(22, 163, 74, 0.15)'; + const pendingBorderColor = isDarkMode ? 'rgba(250, 204, 21, 0.5)' : 'rgba(234, 179, 8, 0.5)'; + const processedBorderColor = isDarkMode ? 'rgba(34, 197, 94, 0.5)' : 'rgba(22, 163, 74, 0.5)'; + + // Log segment count for debugging + // console.log('Drawing segments:', segmentList.length); + + segmentList.forEach(segment => { + // Calculate relative position in visualization window + const relativeStart = segment.startTime - windowStart; + const relativeEnd = segment.endTime - windowStart; + + // Only draw if segment is within visible window + if (relativeEnd > 0 && relativeStart < bufferDuration) { + // Pixel-snap boundaries to prevent anti-aliasing jitter/widening + const startX = Math.floor(Math.max(0, (relativeStart / bufferDuration)) * width); + const endX = Math.ceil(Math.min(1, (relativeEnd / bufferDuration)) * width); + + // Fill segment area - increased opacity for visibility + context.fillStyle = segment.isProcessed ? + (isDarkMode ? 'rgba(34, 197, 94, 0.3)' : 'rgba(22, 163, 74, 0.3)') : + (isDarkMode ? 'rgba(250, 204, 21, 0.3)' : 'rgba(234, 179, 8, 0.3)'); + + context.fillRect(startX, 0, endX - startX, canvasHeight); + + // Draw segment boundaries (snap to pixel + 0.5 for sharp 1px lines) + context.strokeStyle = segment.isProcessed ? processedBorderColor : pendingBorderColor; + context.lineWidth = 1; + context.beginPath(); + context.moveTo(startX + 0.5, 0); + context.lineTo(startX + 0.5, canvasHeight); + context.moveTo(endX - 0.5, 0); + context.lineTo(endX - 0.5, canvasHeight); + context.stroke(); + } + }); + }; + + // Animation loop + const drawLoop = () => { + if (!ctx || !canvasRef || canvasRef.width === 0) { + if (visible()) { + animationFrameId = requestAnimationFrame(drawLoop); + } else { + animationFrameId = window.setTimeout(drawLoop, 100) as unknown as number; + } + return; + } + + if (visible()) { + const now = performance.now(); + if (needsRedraw && now - lastDrawTime >= DRAW_INTERVAL_MS) { + lastDrawTime = now; + needsRedraw = false; + draw(); + } + animationFrameId = requestAnimationFrame(drawLoop); + } else { + // When not visible, check less frequently to save CPU + animationFrameId = window.setTimeout(drawLoop, 100) as unknown as number; + } + }; + + // Resize handler + const handleResize = () => { + if (canvasRef && parentRef) { + const newWidth = parentRef.clientWidth; + if (newWidth > 0 && newWidth !== canvasWidth()) { + canvasRef.width = newWidth; + canvasRef.height = height(); + setCanvasWidth(newWidth); + + // Refetch visualization data for new width + if (props.audioEngine && visible()) { + setWaveformData(props.audioEngine.getVisualizationData(newWidth)); + needsRedraw = true; + // Note: can't update bufferEndTime here easily without calling another method on engine, + // but next update loop will catch it. + } + } + } + }; + + // Subscribe to audio engine updates + createEffect(() => { + const engine = props.audioEngine; + if (engine && visible()) { + // Initial data fetch + if (canvasWidth() > 0) { + setWaveformData(engine.getVisualizationData(canvasWidth())); + setBufferEndTime(engine.getCurrentTime()); + } + + // Subscribe to updates + const sub = engine.onVisualizationUpdate((data, newMetrics, endTime) => { + if (visible()) { + setWaveformData(data); + setMetrics(newMetrics); + setBufferEndTime(endTime); + + // Fetch segments for visualization + setSegments(engine.getSegmentsForVisualization()); + needsRedraw = true; + } else { + // Still update metrics even when not visible + setMetrics(newMetrics); + } + }); + + onCleanup(() => sub()); + } + }); + + // Mark for redraw when visibility toggles + createEffect(() => { + if (visible()) { + needsRedraw = true; + } + }); + + onMount(() => { + if (canvasRef) { + ctx = canvasRef.getContext('2d'); + } + + // Setup dark mode observer + setIsDarkSignal(document.documentElement.classList.contains('dark')); + const themeObserver = new MutationObserver(() => { + setIsDarkSignal(document.documentElement.classList.contains('dark')); + }); + themeObserver.observe(document.documentElement, { + attributes: true, + attributeFilter: ['class'], + }); + + onCleanup(() => themeObserver.disconnect()); + + // Setup resize observer + handleResize(); + resizeObserver = new ResizeObserver(handleResize); + if (parentRef) { + resizeObserver.observe(parentRef); + } + + // Start animation loop + animationFrameId = requestAnimationFrame(drawLoop); + }); + + onCleanup(() => { + if (animationFrameId) { + cancelAnimationFrame(animationFrameId); + clearTimeout(animationFrameId); + } + if (resizeObserver) { + resizeObserver.disconnect(); + } + }); + + return ( +
+ +
+ ); +}; + +export default BufferVisualizer; diff --git a/src/components/ContextPanel.tsx b/src/components/ContextPanel.tsx new file mode 100644 index 0000000000000000000000000000000000000000..d4dbe844b243611c4dd35f15c61d7a561c0fdda3 --- /dev/null +++ b/src/components/ContextPanel.tsx @@ -0,0 +1,125 @@ +import { Component, For, Show, createEffect, onCleanup } from 'solid-js'; +import { appStore } from '../stores/appStore'; +import { getModelDisplayName, MODELS } from './ModelLoadingOverlay'; + +interface ContextPanelProps { + isOpen: boolean; + onClose: () => void; + onLoadModel: () => void; + onOpenDebug: () => void; + onDeviceSelect?: (id: string) => void; +} + +export const ContextPanel: Component = (props) => { + createEffect(() => { + if (!props.isOpen) return; + const handler = (e: KeyboardEvent) => { + if (e.key === 'Escape') { + e.preventDefault(); + props.onClose(); + } + }; + document.addEventListener('keydown', handler); + onCleanup(() => document.removeEventListener('keydown', handler)); + }); + + return ( + + + + ); +}; diff --git a/src/components/DebugPanel.tsx b/src/components/DebugPanel.tsx new file mode 100644 index 0000000000000000000000000000000000000000..bed0c1485acf9c2feeb9514ab9bd8b8a46b56380 --- /dev/null +++ b/src/components/DebugPanel.tsx @@ -0,0 +1,397 @@ +import { Component, createMemo, For, Show, createSignal, onCleanup, createEffect } from 'solid-js'; +import { appStore, type TranscriptionMode } from '../stores/appStore'; +import type { AudioEngine } from '../lib/audio/types'; +import type { MelWorkerClient } from '../lib/audio/MelWorkerClient'; +import { LayeredBufferVisualizer } from './LayeredBufferVisualizer'; + +interface DebugPanelProps { + audioEngine?: AudioEngine; + melClient?: MelWorkerClient; +} + +const MODES: { id: TranscriptionMode; label: string; short: string }[] = [ + { id: 'v4-utterance', label: 'Utterance (v4)', short: 'v4' }, + { id: 'v3-streaming', label: 'Streaming (v3)', short: 'v3' }, + { id: 'v2-utterance', label: 'Legacy (v2)', short: 'v2' }, +]; + +export const DebugPanel: Component = (props) => { + const isRecording = () => appStore.recordingState() === 'recording'; + const isV4 = () => appStore.transcriptionMode() === 'v4-utterance'; + const isV3 = () => appStore.transcriptionMode() === 'v3-streaming'; + + const [height, setHeight] = createSignal(260); + const [isResizing, setIsResizing] = createSignal(false); + + let startY = 0; + let startHeight = 0; + let scrollContainer: HTMLDivElement | undefined; + + // Auto-scroll to bottom of finalized sentences + createEffect(() => { + appStore.matureText(); // Track dependency + if (scrollContainer) { + scrollContainer.scrollTop = scrollContainer.scrollHeight; + } + }); + + const handleMouseDown = (e: MouseEvent) => { + setIsResizing(true); + startY = e.clientY; + startHeight = height(); + window.addEventListener('mousemove', handleMouseMove); + window.addEventListener('mouseup', handleMouseUp); + }; + + const handleMouseMove = (e: MouseEvent) => { + if (!isResizing()) return; + const delta = startY - e.clientY; + const newHeight = Math.min(Math.max(startHeight + delta, 150), 600); + setHeight(newHeight); + }; + + const handleMouseUp = () => { + setIsResizing(false); + window.removeEventListener('mousemove', handleMouseMove); + window.removeEventListener('mouseup', handleMouseUp); + }; + + onCleanup(() => { + window.removeEventListener('mousemove', handleMouseMove); + window.removeEventListener('mouseup', handleMouseUp); + }); + + const rtfColor = createMemo(() => { + const rtfx = appStore.rtfxAverage(); + if (rtfx === 0) return 'text-[var(--color-earthy-soft-brown)]'; + if (rtfx >= 2) return 'text-[var(--color-earthy-muted-green)] font-bold'; + if (rtfx >= 1) return 'text-[var(--color-earthy-coral)] font-bold'; + return 'text-[var(--color-earthy-coral)] font-bold'; + }); + + return ( +
+ {/* Resize Handle */} +
+ + {/* ---- Column 1: System & Signal (merged indicators) ---- */} +
+
+ System & Signal +
+ {appStore.backend()} +
+ VAD +
+
+ +
+ Mode +
+ + {(mode) => ( + + )} + +
+
+ +
+
+ RTFx + + {appStore.rtfxAverage() > 0 ? Math.round(appStore.rtfxAverage()) : '–'} + +
+
+ Latency + {Math.round(appStore.inferenceLatencyAverage())}ms +
+
+ +
+
+ Buffer + {(appStore.bufferMetrics().fillRatio * 100).toFixed(0)}% +
+
+
+
+
+ + +
+ Merger +
+
+
Sent
+
{appStore.v4MergerStats().sentencesFinalized}
+
+
+
Cursor
+
{appStore.matureCursorTime().toFixed(1)}s
+
+
+
Uttr
+
{appStore.v4MergerStats().utterancesProcessed}
+
+
+
+
+ +
+
+ RMS Energy + appStore.energyThreshold() ? 'text-[var(--color-earthy-muted-green)]' : 'text-[var(--color-earthy-soft-brown)]'}> + {(appStore.audioLevel() * 100).toFixed(1)}% + +
+
+
+
+
+
+ + +
0 ? 'opacity-100' : 'opacity-40'}`}> +
+ VAD Prob + appStore.sileroThreshold() ? 'text-[var(--color-earthy-coral)] font-bold' : 'text-[var(--color-earthy-soft-brown)]'}> + {(appStore.vadState().sileroProbability * 100).toFixed(0)}% + +
+
+
+
appStore.sileroThreshold() ? 'bg-[var(--color-earthy-coral)]' : 'bg-[var(--color-earthy-soft-brown)]'}`} + style={{ width: `${Math.min(100, appStore.vadState().sileroProbability * 100)}%` }} + /> +
+
+
+ SNR + 3 ? 'text-[var(--color-earthy-muted-green)]' : 'text-[var(--color-earthy-soft-brown)]'}`}> + {appStore.vadState().snr.toFixed(1)} dB + +
+ + +
+ +
+
Overlap
+
{appStore.streamingOverlap().toFixed(1)}s
+
+
+
Chunks
+
{appStore.mergeInfo().chunkCount}
+
+
+ +
+
State
+
+ {appStore.vadState().hybridState} +
+
+
+
Windows
+
{appStore.v4MergerStats().utterancesProcessed}
+
+
+
+
+ + {/* ---- Column 2: Live Context (mode-dependent) ---- */} +
+
+ + {isV4() ? 'Transcript State' : isV3() ? 'Stream Sync' : 'Segments'} + + + {/* v3: LCS indicators */} + +
+
+
+ Lock +
+
+ join_inner + Match: {appStore.mergeInfo().lcsLength} +
+
+ + + {/* v4: VAD state indicator */} + +
+
+
+
+ {appStore.vadState().hybridState} +
+
+
0 ? 'opacity-100' : 'opacity-0'}`}> + VAD + 0.5 ? 'text-[var(--color-earthy-coral)]' : 'text-[var(--color-earthy-soft-brown)]'}`}> + {(appStore.vadState().sileroProbability * 100).toFixed(0)}% + +
+
+ +
+ +
+ {/* v4: Mature + Immature text display */} + +
+ {/* Mature (finalized) sentences */} +
+

+ + Finalized Sentences +

+
+ No finalized sentences yet... + }> + {appStore.matureText()} + +
+
+ + {/* Immature (active) sentence */} +
+

+ + Active Sentence +

+
+ Waiting for speech... + }> + {appStore.immatureText()} + + +
+
+ + {/* Pending sentence info */} + 0}> +
+ {appStore.v4MergerStats().sentencesFinalized} sentences finalized + | + Cursor at {appStore.matureCursorTime().toFixed(2)}s + | + {appStore.v4MergerStats().utterancesProcessed} windows processed +
+
+
+
+ + {/* v3: Transition cache + anchors */} + +
+

+ + Transition Cache +

+
+ + {(token) => ( +
0.8 ? '#F9F7F2' : 'rgba(249,247,242,0.6)', + "border-color": `rgba(107, 112, 92, ${Math.max(0.2, token.confidence * 0.4)})`, + "color": token.confidence > 0.8 ? '#3D405B' : '#A5A58D', + "opacity": Math.max(0.5, token.confidence) + }} + title={`Confidence: ${(token.confidence * 100).toFixed(0)}%`} + > + {token.text} +
+ )} +
+ + + {appStore.pendingText()}... + + + + Waiting for speech input... + +
+
+ +
+

+ + Stable Anchors +

+
+ + {(token) => ( + + {token} + + )} + + + No stable anchors locked yet. + +
+
+
+ + {/* v2: basic info */} + +
+ Legacy per-utterance mode. Segments are transcribed individually. +
+
+ + {/* New Layered Buffer Visualizer */} +
+ +
+
+
+
+ ); +}; diff --git a/src/components/EnergyMeter.tsx b/src/components/EnergyMeter.tsx new file mode 100644 index 0000000000000000000000000000000000000000..46c72af525fcfff49eebac24201b7cf998b2d5d1 --- /dev/null +++ b/src/components/EnergyMeter.tsx @@ -0,0 +1,103 @@ +import { Component, createSignal, createEffect, onCleanup } from 'solid-js'; +import { AudioEngine } from '../lib/audio/types'; +import { appStore } from '../stores/appStore'; + +interface EnergyMeterProps { + audioEngine?: AudioEngine; +} + +export const EnergyMeter: Component = (props) => { + const [energy, setEnergy] = createSignal(0); + const [metrics, setMetrics] = createSignal({ noiseFloor: 0, snr: 0, threshold: 0.02, snrThreshold: 3.0 }); + const [isSpeaking, setIsSpeaking] = createSignal(false); + + const updateFromEngine = (engine: AudioEngine) => { + const currentE = engine.getCurrentEnergy(); + const currentM = engine.getSignalMetrics(); + + setEnergy(currentE); + setMetrics(currentM); + // Check if speaking based on SNR threshold (matching VAD logic) + setIsSpeaking(currentM.snr > currentM.snrThreshold || currentE > currentM.threshold); + }; + + createEffect(() => { + const engine = props.audioEngine; + if (!engine) return; + + updateFromEngine(engine); + const unsubscribe = engine.onVisualizationUpdate(() => { + updateFromEngine(engine); + }); + + onCleanup(() => unsubscribe()); + }); + + // Logarithmic scaling for better visualization + const toPercent = (val: number) => { + // e.g. mapping 0.0001 -> 1.0 to 0% -> 100% log scale + // log10(0.0001) = -4, log10(1) = 0 + const minLog = -4; + const maxLog = 0; + const v = Math.max(0.0001, val); + const log = Math.log10(v); + return Math.max(0, Math.min(100, ((log - minLog) / (maxLog - minLog)) * 100)); + }; + + return ( +
+
+

Signal_Analysis

+ {/* Speaking indicator - Neumorphic LED style */} +
+
+ + {isSpeaking() ? 'SPEECH' : 'SILENCE'} + +
+
+ + {/* Energy Bar */} +
+ {/* Energy Fill - color based on speech state */} +
+ + {/* Noise Floor Marker */} +
+ + {/* Energy Threshold Marker */} +
+
+ +
+
+ Noise + {metrics().noiseFloor.toFixed(5)} +
+
+ Energy + {energy().toFixed(4)} +
+
+ SNR_Ratio + metrics().snrThreshold ? 'text-emerald-500' : 'text-amber-500'}`}> + {metrics().snr.toFixed(1)} dB + +
+
+
+ ); +}; diff --git a/src/components/LayeredBufferVisualizer.tsx b/src/components/LayeredBufferVisualizer.tsx new file mode 100644 index 0000000000000000000000000000000000000000..e37641087aeb8b2d75c2b7012764da25a8b5687b --- /dev/null +++ b/src/components/LayeredBufferVisualizer.tsx @@ -0,0 +1,442 @@ +import { Component, onMount, onCleanup, createSignal } from 'solid-js'; +import type { AudioEngine } from '../lib/audio/types'; +import type { MelWorkerClient } from '../lib/audio/MelWorkerClient'; +import { normalizeMelForDisplay } from '../lib/audio/mel-display'; +import { appStore } from '../stores/appStore'; + +interface LayeredBufferVisualizerProps { + audioEngine?: AudioEngine; + melClient?: MelWorkerClient; + height?: number; // Total height + windowDuration?: number; // default 8.0s +} + +const MEL_BINS = 128; // Standard for this app + +// dB scaling is in mel-display.ts (shared with bar visualizer) + +// Pre-computed 256-entry RGB lookup table for mel heatmap (black to red). +// Built once at module load; indexed by Math.round(intensity * 255). +// Colormap: black -> blue -> purple -> green -> yellow -> orange -> red. +const COLORMAP_LUT = (() => { + const stops: [number, number, number, number][] = [ + [0, 0, 0, 0], // black + [0.12, 0, 0, 180], // blue + [0.30, 120, 0, 160], // purple + [0.48, 0, 180, 80], // green + [0.65, 220, 220, 0], // yellow + [0.82, 255, 140, 0], // orange + [1, 255, 0, 0], // red + ]; + // 256 entries * 3 channels (R, G, B) packed into a Uint8Array + const lut = new Uint8Array(256 * 3); + for (let i = 0; i < 256; i++) { + const intensity = i / 255; + let r = 0, g = 0, b = 0; + for (let s = 0; s < stops.length - 1; s++) { + const [t0, r0, g0, b0] = stops[s]; + const [t1, r1, g1, b1] = stops[s + 1]; + if (intensity >= t0 && intensity <= t1) { + const t = (intensity - t0) / (t1 - t0); + r = Math.round(r0 + t * (r1 - r0)); + g = Math.round(g0 + t * (g1 - g0)); + b = Math.round(b0 + t * (b1 - b0)); + break; + } + } + if (intensity >= stops[stops.length - 1][0]) { + const last = stops[stops.length - 1]; + r = last[1]; g = last[2]; b = last[3]; + } + const base = i * 3; + lut[base] = r; + lut[base + 1] = g; + lut[base + 2] = b; + } + return lut; +})(); + +export const LayeredBufferVisualizer: Component = (props) => { + let canvasRef: HTMLCanvasElement | undefined; + let ctx: CanvasRenderingContext2D | null = null; + let animationFrameId: number; + + const getWindowDuration = () => props.windowDuration || 8.0; + + // Offscreen canvas for spectrogram caching (scrolling) + let specCanvas: HTMLCanvasElement | undefined; + let specCtx: CanvasRenderingContext2D | null = null; + + // State for last fetch to throttle spectrogram updates + let lastSpecFetchTime = 0; + const SPEC_FETCH_INTERVAL = 100; // Update spectrogram every 100ms (10fps) + const DRAW_INTERVAL_MS = 33; // Throttle full redraw to ~30fps + let lastDrawTime = 0; + + // --- Cached layout dimensions (updated via ResizeObserver, NOT per-frame) --- + // Avoids getBoundingClientRect() every animation frame which forces synchronous + // layout reflow and was the #1 perf bottleneck (1.5s layout-shift clusters). + let cachedPhysicalWidth = 0; + let cachedPhysicalHeight = 0; + let cachedDpr = window.devicePixelRatio || 1; + let resizeObserver: ResizeObserver | null = null; + let dprMediaQuery: MediaQueryList | null = null; + + /** Recompute physical canvas dimensions from cached logical size + DPR. */ + const updateCanvasDimensions = (logicalW: number, logicalH: number) => { + cachedDpr = window.devicePixelRatio || 1; + cachedPhysicalWidth = Math.floor(logicalW * cachedDpr); + cachedPhysicalHeight = Math.floor(logicalH * cachedDpr); + + // Resize canvases immediately so next frame uses correct size + if (canvasRef && (canvasRef.width !== cachedPhysicalWidth || canvasRef.height !== cachedPhysicalHeight)) { + canvasRef.width = cachedPhysicalWidth; + canvasRef.height = cachedPhysicalHeight; + } + if (specCanvas && (specCanvas.width !== cachedPhysicalWidth || specCanvas.height !== cachedPhysicalHeight)) { + specCanvas.width = cachedPhysicalWidth; + specCanvas.height = cachedPhysicalHeight; + } + }; + + // --- Pre-allocated ImageData for spectrogram rendering --- + // Avoids creating a new ImageData object every spectrogram draw (~10fps), + // which caused GC pressure from large short-lived allocations. + let cachedSpecImgData: ImageData | null = null; + let cachedSpecImgWidth = 0; + let cachedSpecImgHeight = 0; + + // --- Pre-allocated waveform read buffer --- + // Avoids allocating a new Float32Array(~128000) every animation frame. + // Grows only when the required size exceeds current capacity. + let waveformReadBuf: Float32Array | null = null; + + // Store spectrogram data with its time alignment + let cachedSpecData: { + features: Float32Array; + melBins: number; + timeSteps: number; + startTime: number; + endTime: number; + } | null = null; + + onMount(() => { + if (canvasRef) { + ctx = canvasRef.getContext('2d', { alpha: false }); + + // Use ResizeObserver to cache dimensions instead of per-frame getBoundingClientRect + resizeObserver = new ResizeObserver((entries) => { + for (const entry of entries) { + // contentRect gives CSS-pixel (logical) dimensions without forcing layout + const cr = entry.contentRect; + updateCanvasDimensions(cr.width, cr.height); + } + }); + resizeObserver.observe(canvasRef); + + // Watch for DPR changes (browser zoom, display change) + const setupDprWatch = () => { + dprMediaQuery = window.matchMedia(`(resolution: ${window.devicePixelRatio}dppx)`); + const onDprChange = () => { + if (canvasRef) { + const rect = canvasRef.getBoundingClientRect(); // one-time on zoom change only + updateCanvasDimensions(rect.width, rect.height); + } + // Re-register for the next change at the new DPR + setupDprWatch(); + }; + dprMediaQuery.addEventListener('change', onDprChange, { once: true }); + }; + setupDprWatch(); + + // Initial dimensions (one-time) + const rect = canvasRef.getBoundingClientRect(); + updateCanvasDimensions(rect.width, rect.height); + } + + // Create offscreen canvas + specCanvas = document.createElement('canvas'); + specCtx = specCanvas.getContext('2d', { alpha: false }); + + loop(); + }); + + onCleanup(() => { + cancelAnimationFrame(animationFrameId); + if (resizeObserver) { + resizeObserver.disconnect(); + resizeObserver = null; + } + }); + + const loop = (now: number = performance.now()) => { + if (!ctx || !canvasRef || !props.audioEngine) { + animationFrameId = requestAnimationFrame(loop); + return; + } + + if (now - lastDrawTime < DRAW_INTERVAL_MS) { + animationFrameId = requestAnimationFrame(loop); + return; + } + lastDrawTime = now; + + // Use cached dimensions (updated by ResizeObserver / DPR watcher) + const dpr = cachedDpr; + const width = cachedPhysicalWidth; + const height = cachedPhysicalHeight; + + if (width === 0 || height === 0) { + animationFrameId = requestAnimationFrame(loop); + return; + } + + // Colors + const bgColor = '#0f172a'; + ctx.fillStyle = bgColor; + ctx.fillRect(0, 0, width, height); + + const ringBuffer = props.audioEngine.getRingBuffer(); + const currentTime = ringBuffer.getCurrentTime(); + const duration = getWindowDuration(); + const startTime = currentTime - duration; + const sampleRate = ringBuffer.sampleRate; + + // Layout: + // Top 55%: Spectrogram + // Middle 35%: Waveform + // Bottom 10%: VAD signal + const specHeight = Math.floor(height * 0.55); + const waveHeight = Math.floor(height * 0.35); + const vadHeight = height - specHeight - waveHeight; + const waveY = specHeight; + const vadY = specHeight + waveHeight; + + // 1. Spectrogram (async fetch with stored alignment) + if (props.melClient && specCtx && specCanvas) { + if (now - lastSpecFetchTime > SPEC_FETCH_INTERVAL) { + lastSpecFetchTime = now; + + const fetchStartSample = Math.round(startTime * sampleRate); + const fetchEndSample = Math.round(currentTime * sampleRate); + + // Request RAW (unnormalized) features for fixed dB scaling. + // ASR transcription still uses normalized features (default). + props.melClient.getFeatures(fetchStartSample, fetchEndSample, false).then(features => { + if (features && specCtx && specCanvas) { + // Store with time alignment info + cachedSpecData = { + features: features.features, + melBins: features.melBins, + timeSteps: features.T, + startTime: startTime, + endTime: currentTime + }; + drawSpectrogramToCanvas(specCtx, features.features, features.melBins, features.T, width, specHeight); + } + }).catch(() => { }); + } + + // Draw cached spectrogram aligned to current view + if (cachedSpecData && cachedSpecData.timeSteps > 0) { + // Calculate offset to align cached data with current time window + const cachedDuration = cachedSpecData.endTime - cachedSpecData.startTime; + const timeOffset = startTime - cachedSpecData.startTime; + const offsetX = Math.floor((timeOffset / cachedDuration) * width); + + // Draw the portion of cached spectrogram that's still visible + ctx.drawImage(specCanvas, offsetX, 0, width - offsetX, specHeight, 0, 0, width - offsetX, specHeight); + } + } + + // 2. Waveform (sync with current time window, zero-allocation read) + try { + const startSample = Math.floor(startTime * sampleRate); + const endSample = Math.floor(currentTime * sampleRate); + const neededLen = endSample - startSample; + + const baseFrame = ringBuffer.getBaseFrameOffset(); + if (startSample >= baseFrame && neededLen > 0) { + // Use readInto if available (zero-alloc), fall back to read() + if (ringBuffer.readInto) { + // Grow the pre-allocated buffer only when capacity is insufficient + if (!waveformReadBuf || waveformReadBuf.length < neededLen) { + waveformReadBuf = new Float32Array(neededLen); + } + const written = ringBuffer.readInto(startSample, endSample, waveformReadBuf); + // Pass a subarray view (no copy) of the exact length + drawWaveform(ctx, waveformReadBuf.subarray(0, written), width, waveHeight, waveY); + } else { + const audioData = ringBuffer.read(startSample, endSample); + drawWaveform(ctx, audioData, width, waveHeight, waveY); + } + } + } catch (e) { + // Data likely overwritten or not available + } + + // 3. VAD Signal Layer + drawVadLayer(ctx, width, vadHeight, vadY, startTime, duration, dpr); + + // 4. Overlay (time labels, trigger line) + drawOverlay(ctx, width, height, startTime, duration, dpr); + + animationFrameId = requestAnimationFrame(loop); + }; + + const drawSpectrogramToCanvas = ( + ctx: CanvasRenderingContext2D, + features: Float32Array, + melBins: number, + timeSteps: number, + width: number, + height: number + ) => { + // features layout: [melBins, T] (mel-major, flattened from [mel, time]) + // So features[m * timeSteps + t]. + + if (timeSteps === 0) return; + + // Reuse cached ImageData if dimensions match; allocate only on size change + if (!cachedSpecImgData || cachedSpecImgWidth !== width || cachedSpecImgHeight !== height) { + cachedSpecImgData = ctx.createImageData(width, height); + cachedSpecImgWidth = width; + cachedSpecImgHeight = height; + } + const imgData = cachedSpecImgData; + const data = imgData.data; + + // Scaling factors + const timeScale = timeSteps / width; + const freqScale = melBins / height; + + for (let x = 0; x < width; x++) { + const t = Math.floor(x * timeScale); + if (t >= timeSteps) break; + + for (let y = 0; y < height; y++) { + // y=0 is top (high freq), y=height is bottom (low freq). + const m = Math.floor((height - 1 - y) * freqScale); + if (m >= melBins) continue; + + const val = features[m * timeSteps + t]; + const clamped = normalizeMelForDisplay(val); + const lutIdx = (clamped * 255) | 0; + const lutBase = lutIdx * 3; + + const idx = (y * width + x) * 4; + data[idx] = COLORMAP_LUT[lutBase]; + data[idx + 1] = COLORMAP_LUT[lutBase + 1]; + data[idx + 2] = COLORMAP_LUT[lutBase + 2]; + data[idx + 3] = 255; + } + } + ctx.putImageData(imgData, 0, 0); + }; + + // Use gain 1 so waveform shows true amplitude (float32 in [-1,1] fills half-height). + // No display amplification; ASR pipeline is unchanged. + const WAVEFORM_GAIN = 1; + + const drawWaveform = (ctx: CanvasRenderingContext2D, data: Float32Array, width: number, height: number, offsetY: number) => { + if (data.length === 0) return; + + const step = Math.ceil(data.length / width); + const amp = (height / 2) * WAVEFORM_GAIN; + const centerY = offsetY + height / 2; + + ctx.strokeStyle = '#4ade80'; // Green + ctx.lineWidth = 1; + ctx.beginPath(); + + for (let x = 0; x < width; x++) { + const startIdx = x * step; + const endIdx = Math.min((x + 1) * step, data.length); + + let min = 1; + let max = -1; + let hasData = false; + + for (let i = startIdx; i < endIdx; i += Math.max(1, Math.floor((endIdx - startIdx) / 10))) { + const s = data[i]; + if (s < min) min = s; + if (s > max) max = s; + hasData = true; + } + + if (hasData) { + const yMin = centerY - min * amp; + const yMax = centerY - max * amp; + ctx.moveTo(x, Math.max(offsetY, Math.min(offsetY + height, yMin))); + ctx.lineTo(x, Math.max(offsetY, Math.min(offsetY + height, yMax))); + } + } + ctx.stroke(); + }; + + const drawVadLayer = (ctx: CanvasRenderingContext2D, width: number, height: number, offsetY: number, startTime: number, duration: number, dpr: number) => { + // Draw VAD state as a colored bar + // For now, just show current VAD state as a solid bar (could be enhanced with historical data) + const vadState = appStore.vadState(); + const isSpeech = vadState.isSpeech; + + // Background + ctx.fillStyle = isSpeech ? 'rgba(249, 115, 22, 0.4)' : 'rgba(100, 116, 139, 0.2)'; // Orange when speech, slate when silence + ctx.fillRect(0, offsetY, width, height); + + // If energy-based detection is active, show energy level as a bar + const energyLevel = appStore.audioLevel(); + const energyThreshold = appStore.energyThreshold(); + + if (energyLevel > 0) { + const barWidth = Math.min(width, width * (energyLevel / 0.3)); // Scale to max 30% energy + ctx.fillStyle = energyLevel > energyThreshold ? 'rgba(249, 115, 22, 0.8)' : 'rgba(74, 222, 128, 0.6)'; + ctx.fillRect(width - barWidth, offsetY, barWidth, height); + } + + // Draw a thin separator line at top + ctx.strokeStyle = 'rgba(148, 163, 184, 0.3)'; + ctx.lineWidth = 1 * dpr; + ctx.beginPath(); + ctx.moveTo(0, offsetY); + ctx.lineTo(width, offsetY); + ctx.stroke(); + + // Label + ctx.fillStyle = isSpeech ? '#fb923c' : '#64748b'; + ctx.font = `${8 * dpr}px monospace`; + ctx.fillText(isSpeech ? 'SPEECH' : 'SILENCE', 4 * dpr, offsetY + height - 2 * dpr); + }; + + const drawOverlay = (ctx: CanvasRenderingContext2D, width: number, height: number, startTime: number, duration: number, dpr: number) => { + // Draw Trigger line (1.5s from right) if in V3 mode + const triggerX = width - (1.5 / duration) * width; + ctx.strokeStyle = 'rgba(255, 255, 0, 0.5)'; + ctx.lineWidth = 1 * dpr; + ctx.beginPath(); + ctx.moveTo(triggerX, 0); + ctx.lineTo(triggerX, height); + ctx.stroke(); + + // Time labels + ctx.fillStyle = '#94a3b8'; + ctx.font = `${10 * dpr}px monospace`; + for (let i = 0; i <= 8; i += 2) { + const t = i; + const x = width - (t / duration) * width; + ctx.fillText(`-${t}s`, x + 3 * dpr, height - 6 * dpr); + } + }; + + return ( +
+ +
+ SPECTROGRAM + WAVEFORM ({getWindowDuration()}s) +
+
+ ); +}; diff --git a/src/components/ModelLoadingOverlay.tsx b/src/components/ModelLoadingOverlay.tsx new file mode 100644 index 0000000000000000000000000000000000000000..de089eb9a301e2a1a2426fba0f56ac4952b76ee1 --- /dev/null +++ b/src/components/ModelLoadingOverlay.tsx @@ -0,0 +1,212 @@ +import { Component, Show, For, createEffect } from 'solid-js'; + +interface ModelLoadingOverlayProps { + isVisible: boolean; + progress: number; + message: string; + file?: string; + backend: 'webgpu' | 'wasm'; + state: 'unloaded' | 'loading' | 'ready' | 'error'; + selectedModelId: string; + onModelSelect: (id: string) => void; + onStart: () => void; + onLocalLoad: (files: FileList) => void; + onClose?: () => void; +} + +export const MODELS = [ + { id: 'parakeet-tdt-0.6b-v2', name: 'Parakeet v2', desc: 'English optimized' }, + { id: 'parakeet-tdt-0.6b-v3', name: 'Parakeet v3', desc: 'Multilingual Streaming' }, +]; + +export function getModelDisplayName(id: string): string { + return (MODELS.find((m) => m.id === id)?.name ?? id) || 'Unknown model'; +} + +export const ModelLoadingOverlay: Component = (props) => { + const progressWidth = () => `${Math.max(0, Math.min(100, props.progress))}%`; + let fileInput: HTMLInputElement | undefined; + + const handleFileChange = (e: Event) => { + const files = (e.target as HTMLInputElement).files; + if (files && files.length > 0) { + props.onLocalLoad(files); + } + }; + + const handleClose = () => props.onClose?.(); + + createEffect(() => { + if (!props.isVisible || !props.onClose) return; + const handler = (e: KeyboardEvent) => { + if (e.key === 'Escape') { + e.preventDefault(); + props.onClose?.(); + } + }; + document.addEventListener('keydown', handler); + return () => document.removeEventListener('keydown', handler); + }); + + return ( + +