Spaces:
Running
Running
feat(space): migrate Hugging Face Space to keet SolidJS app
Browse filesReplace the CRA template with the keet SolidJS/Vite app, workers, and pipeline modules for browser ASR.
Update Space metadata for static dist deployment with COOP/COEP/CORP headers and keep repository content HF-compatible by excluding rejected binary assets.
This view is limited to 50 files because it contains too many changes. See raw diff
- .env.example +1 -0
- .gitignore +57 -16
- README.md +33 -65
- index.html +44 -0
- package.json +37 -33
- public/icons/.keep +0 -0
- public/index.html +0 -43
- public/logo192.png +0 -0
- public/logo512.png +0 -0
- public/manifest.json +7 -17
- public/robots.txt +0 -3
- public/sw.js +172 -0
- public/wasm/ten_vad.js +30 -0
- public/wasm/ten_vad.wasm +3 -0
- src/App.css +0 -38
- src/App.js +0 -25
- src/App.test.js +0 -8
- src/App.tsx +1037 -0
- src/assets/css/material-icons.css +34 -0
- src/components/BufferVisualizer.tsx +511 -0
- src/components/ContextPanel.tsx +125 -0
- src/components/DebugPanel.tsx +397 -0
- src/components/EnergyMeter.tsx +103 -0
- src/components/LayeredBufferVisualizer.tsx +442 -0
- src/components/ModelLoadingOverlay.tsx +212 -0
- src/components/PrivacyBadge.tsx +19 -0
- src/components/SettingsPanel.tsx +225 -0
- src/components/Sidebar.tsx +142 -0
- src/components/StatusBar.tsx +55 -0
- src/components/TranscriptionDisplay.tsx +425 -0
- src/components/Waveform.tsx +95 -0
- src/components/index.ts +12 -0
- src/csp.test.ts +34 -0
- src/index.css +250 -8
- src/index.js +0 -17
- src/index.tsx +18 -0
- src/lib/audio/AudioEngine.ts +1014 -0
- src/lib/audio/AudioSegmentProcessor.test.ts +69 -0
- src/lib/audio/AudioSegmentProcessor.ts +609 -0
- src/lib/audio/MelWorkerClient.ts +182 -0
- src/lib/audio/RingBuffer.test.ts +202 -0
- src/lib/audio/RingBuffer.ts +150 -0
- src/lib/audio/audioParams.ts +207 -0
- src/lib/audio/capture-processor.ts +21 -0
- src/lib/audio/energy-calculation.test.ts +75 -0
- src/lib/audio/index.ts +9 -0
- src/lib/audio/mel-display.ts +19 -0
- src/lib/audio/mel-e2e.test.ts +483 -0
- src/lib/audio/mel-math.test.ts +500 -0
- src/lib/audio/mel-math.ts +275 -0
.env.example
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
VITE_GEMINI_API_KEY="YOUR_API_KEY_HERE"
|
.gitignore
CHANGED
|
@@ -1,23 +1,64 @@
|
|
| 1 |
-
#
|
|
|
|
|
|
|
|
|
|
| 2 |
|
| 3 |
-
#
|
| 4 |
-
/
|
| 5 |
-
/
|
| 6 |
-
|
|
|
|
|
|
|
|
|
|
| 7 |
|
| 8 |
-
#
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
# production
|
| 12 |
-
/build
|
| 13 |
-
|
| 14 |
-
# misc
|
| 15 |
-
.DS_Store
|
| 16 |
.env.local
|
| 17 |
-
.env.
|
| 18 |
-
.
|
| 19 |
-
.
|
| 20 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 21 |
npm-debug.log*
|
|
|
|
|
|
|
| 22 |
yarn-debug.log*
|
| 23 |
yarn-error.log*
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Dependencies
|
| 2 |
+
bower_components/
|
| 3 |
+
node_modules/
|
| 4 |
+
package-lock.json
|
| 5 |
|
| 6 |
+
# Build and Distribution
|
| 7 |
+
build/
|
| 8 |
+
dist/
|
| 9 |
+
dist-ssr/
|
| 10 |
+
*.js.map
|
| 11 |
+
*.map
|
| 12 |
+
*.tsbuildinfo
|
| 13 |
|
| 14 |
+
# Environment and Local Config
|
| 15 |
+
.env
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 16 |
.env.local
|
| 17 |
+
.env.*.local
|
| 18 |
+
*.local
|
| 19 |
+
*.pem
|
| 20 |
|
| 21 |
+
# Logs
|
| 22 |
+
*.log
|
| 23 |
+
lerna-debug.log*
|
| 24 |
+
logs/
|
| 25 |
npm-debug.log*
|
| 26 |
+
pnpm-debug.log*
|
| 27 |
+
tsd-debug.log
|
| 28 |
yarn-debug.log*
|
| 29 |
yarn-error.log*
|
| 30 |
+
|
| 31 |
+
# Editor and OS
|
| 32 |
+
.DS_Store
|
| 33 |
+
.idea/
|
| 34 |
+
.vscode/
|
| 35 |
+
!.vscode/extensions.json
|
| 36 |
+
*.njsproj
|
| 37 |
+
*.ntvs*
|
| 38 |
+
*.sln
|
| 39 |
+
*.suo
|
| 40 |
+
*.sw?
|
| 41 |
+
|
| 42 |
+
# Testing and Coverage
|
| 43 |
+
.nyc_output/
|
| 44 |
+
.pytest_cache/
|
| 45 |
+
coverage/
|
| 46 |
+
lib-cov/
|
| 47 |
+
|
| 48 |
+
# Runtime and Temporary Files
|
| 49 |
+
*.pid
|
| 50 |
+
*.pid.lock
|
| 51 |
+
*.seed
|
| 52 |
+
pids/
|
| 53 |
+
|
| 54 |
+
# Large trace files (Chrome performance traces)
|
| 55 |
+
metrics/*.json
|
| 56 |
+
!metrics/trace_analysis_summary.json
|
| 57 |
+
|
| 58 |
+
# Misc
|
| 59 |
+
.grunt/
|
| 60 |
+
*.wim
|
| 61 |
+
docs/
|
| 62 |
+
legacy/
|
| 63 |
+
public/models/
|
| 64 |
+
performance-trace.json.gz
|
README.md
CHANGED
|
@@ -1,81 +1,49 @@
|
|
| 1 |
---
|
| 2 |
title: Keet Streaming
|
| 3 |
-
emoji:
|
| 4 |
colorFrom: indigo
|
| 5 |
-
colorTo:
|
| 6 |
sdk: static
|
| 7 |
pinned: false
|
|
|
|
|
|
|
| 8 |
app_build_command: npm run build
|
| 9 |
-
app_file:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 10 |
---
|
| 11 |
|
| 12 |
-
#
|
| 13 |
|
| 14 |
-
|
|
|
|
| 15 |
|
| 16 |
-
##
|
| 17 |
|
| 18 |
-
|
|
|
|
|
|
|
|
|
|
| 19 |
|
| 20 |
-
##
|
| 21 |
|
| 22 |
-
|
| 23 |
-
Open [http://localhost:3000](http://localhost:3000) to view it in your browser.
|
| 24 |
|
| 25 |
-
|
| 26 |
-
|
| 27 |
|
| 28 |
-
##
|
| 29 |
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
### `npm run build`
|
| 34 |
-
|
| 35 |
-
Builds the app for production to the `build` folder.\
|
| 36 |
-
It correctly bundles React in production mode and optimizes the build for the best performance.
|
| 37 |
-
|
| 38 |
-
The build is minified and the filenames include the hashes.\
|
| 39 |
-
Your app is ready to be deployed!
|
| 40 |
-
|
| 41 |
-
See the section about [deployment](https://facebook.github.io/create-react-app/docs/deployment) for more information.
|
| 42 |
-
|
| 43 |
-
### `npm run eject`
|
| 44 |
-
|
| 45 |
-
**Note: this is a one-way operation. Once you `eject`, you can't go back!**
|
| 46 |
-
|
| 47 |
-
If you aren't satisfied with the build tool and configuration choices, you can `eject` at any time. This command will remove the single build dependency from your project.
|
| 48 |
-
|
| 49 |
-
Instead, it will copy all the configuration files and the transitive dependencies (webpack, Babel, ESLint, etc) right into your project so you have full control over them. All of the commands except `eject` will still work, but they will point to the copied scripts so you can tweak them. At this point you're on your own.
|
| 50 |
-
|
| 51 |
-
You don't have to ever use `eject`. The curated feature set is suitable for small and middle deployments, and you shouldn't feel obligated to use this feature. However we understand that this tool wouldn't be useful if you couldn't customize it when you are ready for it.
|
| 52 |
-
|
| 53 |
-
## Learn More
|
| 54 |
-
|
| 55 |
-
You can learn more in the [Create React App documentation](https://facebook.github.io/create-react-app/docs/getting-started).
|
| 56 |
-
|
| 57 |
-
To learn React, check out the [React documentation](https://reactjs.org/).
|
| 58 |
-
|
| 59 |
-
### Code Splitting
|
| 60 |
-
|
| 61 |
-
This section has moved here: [https://facebook.github.io/create-react-app/docs/code-splitting](https://facebook.github.io/create-react-app/docs/code-splitting)
|
| 62 |
-
|
| 63 |
-
### Analyzing the Bundle Size
|
| 64 |
-
|
| 65 |
-
This section has moved here: [https://facebook.github.io/create-react-app/docs/analyzing-the-bundle-size](https://facebook.github.io/create-react-app/docs/analyzing-the-bundle-size)
|
| 66 |
-
|
| 67 |
-
### Making a Progressive Web App
|
| 68 |
-
|
| 69 |
-
This section has moved here: [https://facebook.github.io/create-react-app/docs/making-a-progressive-web-app](https://facebook.github.io/create-react-app/docs/making-a-progressive-web-app)
|
| 70 |
-
|
| 71 |
-
### Advanced Configuration
|
| 72 |
-
|
| 73 |
-
This section has moved here: [https://facebook.github.io/create-react-app/docs/advanced-configuration](https://facebook.github.io/create-react-app/docs/advanced-configuration)
|
| 74 |
-
|
| 75 |
-
### Deployment
|
| 76 |
-
|
| 77 |
-
This section has moved here: [https://facebook.github.io/create-react-app/docs/deployment](https://facebook.github.io/create-react-app/docs/deployment)
|
| 78 |
-
|
| 79 |
-
### `npm run build` fails to minify
|
| 80 |
-
|
| 81 |
-
This section has moved here: [https://facebook.github.io/create-react-app/docs/troubleshooting#npm-run-build-fails-to-minify](https://facebook.github.io/create-react-app/docs/troubleshooting#npm-run-build-fails-to-minify)
|
|
|
|
| 1 |
---
|
| 2 |
title: Keet Streaming
|
| 3 |
+
emoji: 🎙️
|
| 4 |
colorFrom: indigo
|
| 5 |
+
colorTo: blue
|
| 6 |
sdk: static
|
| 7 |
pinned: false
|
| 8 |
+
license: mit
|
| 9 |
+
short_description: Real-time browser ASR with parakeet.js + WebGPU
|
| 10 |
app_build_command: npm run build
|
| 11 |
+
app_file: dist/index.html
|
| 12 |
+
models:
|
| 13 |
+
- istupakov/parakeet-tdt-0.6b-v2-onnx
|
| 14 |
+
tags:
|
| 15 |
+
- asr
|
| 16 |
+
- speech-to-text
|
| 17 |
+
- parakeet
|
| 18 |
+
- parakeet-js
|
| 19 |
+
- onnx
|
| 20 |
+
- webgpu
|
| 21 |
+
custom_headers:
|
| 22 |
+
cross-origin-embedder-policy: require-corp
|
| 23 |
+
cross-origin-opener-policy: same-origin
|
| 24 |
+
cross-origin-resource-policy: cross-origin
|
| 25 |
---
|
| 26 |
|
| 27 |
+
# Keet Streaming
|
| 28 |
|
| 29 |
+
Keet is a real-time, privacy-first transcription app built with SolidJS + Vite and powered by `parakeet.js`.
|
| 30 |
+
Inference runs in the browser (WebGPU/WASM) with no backend transcription service.
|
| 31 |
|
| 32 |
+
## Run locally
|
| 33 |
|
| 34 |
+
```bash
|
| 35 |
+
npm install
|
| 36 |
+
npm run dev
|
| 37 |
+
```
|
| 38 |
|
| 39 |
+
## Hugging Face Spaces
|
| 40 |
|
| 41 |
+
This Space uses the **static** SDK and builds directly from source:
|
|
|
|
| 42 |
|
| 43 |
+
- Build command: `npm run build`
|
| 44 |
+
- Output: `dist/index.html`
|
| 45 |
|
| 46 |
+
## Notes
|
| 47 |
|
| 48 |
+
- The app requires microphone access in the browser.
|
| 49 |
+
- COEP/COOP headers are configured in this README front matter for worker/wasm isolation.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
index.html
ADDED
|
@@ -0,0 +1,44 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<!DOCTYPE html>
|
| 2 |
+
<html lang="en">
|
| 3 |
+
|
| 4 |
+
<head>
|
| 5 |
+
<meta charset="UTF-8" />
|
| 6 |
+
<meta http-equiv="Content-Security-Policy"
|
| 7 |
+
content="default-src 'self'; connect-src 'self' blob: https://huggingface.co https://*.huggingface.co https://*.hf.co https://fonts.googleapis.com https://fonts.gstatic.com; font-src 'self' https://fonts.gstatic.com; frame-src 'self'; img-src 'self' data:; object-src 'none'; script-src 'self' 'unsafe-inline' 'unsafe-eval' blob:; style-src 'self' 'unsafe-inline' https://fonts.googleapis.com; worker-src 'self' blob:;" />
|
| 8 |
+
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
| 9 |
+
<meta name="description" content="Keet - Privacy-first real-time transcription. Your audio stays on your device." />
|
| 10 |
+
<meta name="theme-color" content="#6B705C" />
|
| 11 |
+
<meta name="mobile-web-app-capable" content="yes" />
|
| 12 |
+
<meta name="apple-mobile-web-app-capable" content="yes" />
|
| 13 |
+
<meta name="apple-mobile-web-app-status-bar-style" content="black-translucent" />
|
| 14 |
+
<title>Keet - Real-time Transcription</title>
|
| 15 |
+
|
| 16 |
+
<link rel="preconnect" href="https://fonts.googleapis.com">
|
| 17 |
+
<link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
|
| 18 |
+
<link
|
| 19 |
+
href="https://fonts.googleapis.com/css2?family=Crimson+Pro:ital,wght@0,400;0,500;0,600;1,400&family=Plus+Jakarta+Sans:wght@300;400;500;600&family=JetBrains+Mono:wght@400;500&display=swap"
|
| 20 |
+
rel="stylesheet">
|
| 21 |
+
<link href="https://fonts.googleapis.com/css2?family=Material+Symbols+Outlined:wght,FILL@100..700,0..1&display=swap"
|
| 22 |
+
rel="stylesheet" />
|
| 23 |
+
|
| 24 |
+
<!-- PWA Manifest -->
|
| 25 |
+
<link rel="manifest" href="./manifest.json" />
|
| 26 |
+
<link rel="icon" type="image/x-icon" href="./favicon.ico" />
|
| 27 |
+
</head>
|
| 28 |
+
|
| 29 |
+
<body>
|
| 30 |
+
<div id="root"></div>
|
| 31 |
+
<script type="module" src="/src/index.tsx"></script>
|
| 32 |
+
|
| 33 |
+
<script>
|
| 34 |
+
if ('serviceWorker' in navigator && location.hostname !== 'localhost') {
|
| 35 |
+
window.addEventListener('load', () => {
|
| 36 |
+
navigator.serviceWorker.register('./sw.js')
|
| 37 |
+
.then((reg) => console.log('[App] SW registered:', reg.scope))
|
| 38 |
+
.catch((err) => console.warn('[App] SW registration failed:', err));
|
| 39 |
+
});
|
| 40 |
+
}
|
| 41 |
+
</script>
|
| 42 |
+
</body>
|
| 43 |
+
|
| 44 |
+
</html>
|
package.json
CHANGED
|
@@ -1,39 +1,43 @@
|
|
| 1 |
{
|
| 2 |
-
"name": "
|
| 3 |
-
"version": "
|
| 4 |
-
"
|
| 5 |
-
"
|
| 6 |
-
"@testing-library/dom": "^10.4.0",
|
| 7 |
-
"@testing-library/jest-dom": "^6.6.3",
|
| 8 |
-
"@testing-library/react": "^16.3.0",
|
| 9 |
-
"@testing-library/user-event": "^13.5.0",
|
| 10 |
-
"react": "^19.1.0",
|
| 11 |
-
"react-dom": "^19.1.0",
|
| 12 |
-
"react-scripts": "5.0.1",
|
| 13 |
-
"web-vitals": "^2.1.4"
|
| 14 |
-
},
|
| 15 |
"scripts": {
|
| 16 |
-
"start": "
|
| 17 |
-
"
|
| 18 |
-
"
|
| 19 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
| 20 |
},
|
| 21 |
-
"
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 26 |
},
|
| 27 |
-
"
|
| 28 |
-
"
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
"
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
|
|
|
|
| 38 |
}
|
| 39 |
}
|
|
|
|
| 1 |
{
|
| 2 |
+
"name": "keet",
|
| 3 |
+
"version": "1.0.0",
|
| 4 |
+
"description": "Keet real-time transcription with parakeet.js",
|
| 5 |
+
"type": "module",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6 |
"scripts": {
|
| 7 |
+
"start": "vite",
|
| 8 |
+
"dev": "vite",
|
| 9 |
+
"dev:local": "cross-env USE_LOCAL_PARAKEET=true vite",
|
| 10 |
+
"build": "vite build",
|
| 11 |
+
"build:local": "cross-env USE_LOCAL_PARAKEET=true vite build",
|
| 12 |
+
"serve": "vite preview",
|
| 13 |
+
"test": "vitest run",
|
| 14 |
+
"test:watch": "vitest"
|
| 15 |
},
|
| 16 |
+
"license": "MIT",
|
| 17 |
+
"devDependencies": {
|
| 18 |
+
"@tailwindcss/forms": "^0.5.10",
|
| 19 |
+
"@tailwindcss/vite": "^4.1.18",
|
| 20 |
+
"@vitest/web-worker": "^4.0.18",
|
| 21 |
+
"cross-env": "^7.0.3",
|
| 22 |
+
"happy-dom": "^20.5.0",
|
| 23 |
+
"postcss": "^8.5.6",
|
| 24 |
+
"tailwindcss": "^4.1.11",
|
| 25 |
+
"typescript": "^5.7.2",
|
| 26 |
+
"vite": "^6.0.0",
|
| 27 |
+
"vite-plugin-solid": "^2.11.6",
|
| 28 |
+
"vitest": "^4.0.18"
|
| 29 |
},
|
| 30 |
+
"dependencies": {
|
| 31 |
+
"@google/generative-ai": "^0.24.1",
|
| 32 |
+
"@huggingface/transformers": "^3.6.1",
|
| 33 |
+
"@solid-primitives/transition-group": "^1.1.2",
|
| 34 |
+
"@thisbeyond/solid-dnd": "^0.7.5",
|
| 35 |
+
"material-icons": "^1.13.14",
|
| 36 |
+
"onnxruntime-web": "1.24.1",
|
| 37 |
+
"parakeet.js": "1.2.1",
|
| 38 |
+
"solid-js": "^1.9.5",
|
| 39 |
+
"uuid": "^11.1.0",
|
| 40 |
+
"wink-eng-lite-web-model": "^1.8.1",
|
| 41 |
+
"wink-nlp": "^2.4.0"
|
| 42 |
}
|
| 43 |
}
|
public/icons/.keep
ADDED
|
File without changes
|
public/index.html
DELETED
|
@@ -1,43 +0,0 @@
|
|
| 1 |
-
<!DOCTYPE html>
|
| 2 |
-
<html lang="en">
|
| 3 |
-
<head>
|
| 4 |
-
<meta charset="utf-8" />
|
| 5 |
-
<link rel="icon" href="%PUBLIC_URL%/favicon.ico" />
|
| 6 |
-
<meta name="viewport" content="width=device-width, initial-scale=1" />
|
| 7 |
-
<meta name="theme-color" content="#000000" />
|
| 8 |
-
<meta
|
| 9 |
-
name="description"
|
| 10 |
-
content="Web site created using create-react-app"
|
| 11 |
-
/>
|
| 12 |
-
<link rel="apple-touch-icon" href="%PUBLIC_URL%/logo192.png" />
|
| 13 |
-
<!--
|
| 14 |
-
manifest.json provides metadata used when your web app is installed on a
|
| 15 |
-
user's mobile device or desktop. See https://developers.google.com/web/fundamentals/web-app-manifest/
|
| 16 |
-
-->
|
| 17 |
-
<link rel="manifest" href="%PUBLIC_URL%/manifest.json" />
|
| 18 |
-
<!--
|
| 19 |
-
Notice the use of %PUBLIC_URL% in the tags above.
|
| 20 |
-
It will be replaced with the URL of the `public` folder during the build.
|
| 21 |
-
Only files inside the `public` folder can be referenced from the HTML.
|
| 22 |
-
|
| 23 |
-
Unlike "/favicon.ico" or "favicon.ico", "%PUBLIC_URL%/favicon.ico" will
|
| 24 |
-
work correctly both with client-side routing and a non-root public URL.
|
| 25 |
-
Learn how to configure a non-root public URL by running `npm run build`.
|
| 26 |
-
-->
|
| 27 |
-
<title>React App</title>
|
| 28 |
-
</head>
|
| 29 |
-
<body>
|
| 30 |
-
<noscript>You need to enable JavaScript to run this app.</noscript>
|
| 31 |
-
<div id="root"></div>
|
| 32 |
-
<!--
|
| 33 |
-
This HTML file is a template.
|
| 34 |
-
If you open it directly in the browser, you will see an empty page.
|
| 35 |
-
|
| 36 |
-
You can add webfonts, meta tags, or analytics to this file.
|
| 37 |
-
The build step will place the bundled scripts into the <body> tag.
|
| 38 |
-
|
| 39 |
-
To begin the development, run `npm start` or `yarn start`.
|
| 40 |
-
To create a production bundle, use `npm run build` or `yarn build`.
|
| 41 |
-
-->
|
| 42 |
-
</body>
|
| 43 |
-
</html>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
public/logo192.png
DELETED
|
Binary file (5.35 kB)
|
|
|
public/logo512.png
DELETED
|
Binary file (9.66 kB)
|
|
|
public/manifest.json
CHANGED
|
@@ -1,25 +1,15 @@
|
|
| 1 |
{
|
| 2 |
-
"short_name": "
|
| 3 |
-
"name": "
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4 |
"icons": [
|
| 5 |
{
|
| 6 |
"src": "favicon.ico",
|
| 7 |
"sizes": "64x64 32x32 24x24 16x16",
|
| 8 |
"type": "image/x-icon"
|
| 9 |
-
},
|
| 10 |
-
{
|
| 11 |
-
"src": "logo192.png",
|
| 12 |
-
"type": "image/png",
|
| 13 |
-
"sizes": "192x192"
|
| 14 |
-
},
|
| 15 |
-
{
|
| 16 |
-
"src": "logo512.png",
|
| 17 |
-
"type": "image/png",
|
| 18 |
-
"sizes": "512x512"
|
| 19 |
}
|
| 20 |
-
]
|
| 21 |
-
"start_url": ".",
|
| 22 |
-
"display": "standalone",
|
| 23 |
-
"theme_color": "#000000",
|
| 24 |
-
"background_color": "#ffffff"
|
| 25 |
}
|
|
|
|
| 1 |
{
|
| 2 |
+
"short_name": "Keet",
|
| 3 |
+
"name": "Keet - Real-time Transcription",
|
| 4 |
+
"start_url": ".",
|
| 5 |
+
"display": "standalone",
|
| 6 |
+
"theme_color": "#6B705C",
|
| 7 |
+
"background_color": "#F9F7F2",
|
| 8 |
"icons": [
|
| 9 |
{
|
| 10 |
"src": "favicon.ico",
|
| 11 |
"sizes": "64x64 32x32 24x24 16x16",
|
| 12 |
"type": "image/x-icon"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 13 |
}
|
| 14 |
+
]
|
|
|
|
|
|
|
|
|
|
|
|
|
| 15 |
}
|
public/robots.txt
DELETED
|
@@ -1,3 +0,0 @@
|
|
| 1 |
-
# https://www.robotstxt.org/robotstxt.html
|
| 2 |
-
User-agent: *
|
| 3 |
-
Disallow:
|
|
|
|
|
|
|
|
|
|
|
|
public/sw.js
ADDED
|
@@ -0,0 +1,172 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/**
|
| 2 |
+
* Keet Service Worker
|
| 3 |
+
* Story 3.1: Offline-first caching strategy
|
| 4 |
+
*
|
| 5 |
+
* Strategy:
|
| 6 |
+
* - App shell (HTML, CSS, JS): Cache-first, update in background
|
| 7 |
+
* - Model files: Cache-first (large files, rarely change)
|
| 8 |
+
* - API/Dynamic: Network-first with fallback
|
| 9 |
+
* - Cross-origin isolation headers injected for SharedArrayBuffer / WebGPU support
|
| 10 |
+
*/
|
| 11 |
+
|
| 12 |
+
const CACHE_NAME = 'keet-v2';
|
| 13 |
+
const MODEL_CACHE = 'keet-models-v1';
|
| 14 |
+
|
| 15 |
+
// Base path from SW script URL (works at / and /keet/ on GitHub Pages)
|
| 16 |
+
const BASE = (() => {
|
| 17 |
+
const path = self.location.pathname;
|
| 18 |
+
const i = path.lastIndexOf('/');
|
| 19 |
+
return i >= 0 ? path.slice(0, i + 1) : '/';
|
| 20 |
+
})();
|
| 21 |
+
|
| 22 |
+
// App shell files to pre-cache (base-relative)
|
| 23 |
+
const APP_SHELL = [
|
| 24 |
+
BASE,
|
| 25 |
+
BASE + 'index.html',
|
| 26 |
+
BASE + 'manifest.json',
|
| 27 |
+
];
|
| 28 |
+
|
| 29 |
+
// Model file patterns (cached on-demand)
|
| 30 |
+
const MODEL_PATTERNS = [
|
| 31 |
+
/\.onnx\.data$/,
|
| 32 |
+
/\.onnx$/,
|
| 33 |
+
/\.bin$/,
|
| 34 |
+
/vocab\.txt$/,
|
| 35 |
+
/tokenizer\.json$/,
|
| 36 |
+
];
|
| 37 |
+
|
| 38 |
+
/**
|
| 39 |
+
* Add Cross-Origin Isolation headers to a response.
|
| 40 |
+
* This enables SharedArrayBuffer and WebGPU on static hosts (e.g. GitHub Pages)
|
| 41 |
+
* that don't allow custom response headers.
|
| 42 |
+
* Equivalent to what coi-serviceworker.js does in the parakeet.js demo.
|
| 43 |
+
*/
|
| 44 |
+
function addCOIHeaders(response) {
|
| 45 |
+
if (response.status === 0) {
|
| 46 |
+
return response;
|
| 47 |
+
}
|
| 48 |
+
const newHeaders = new Headers(response.headers);
|
| 49 |
+
newHeaders.set("Cross-Origin-Embedder-Policy", "require-corp");
|
| 50 |
+
newHeaders.set("Cross-Origin-Resource-Policy", "cross-origin");
|
| 51 |
+
newHeaders.set("Cross-Origin-Opener-Policy", "same-origin");
|
| 52 |
+
return new Response(response.body, {
|
| 53 |
+
status: response.status,
|
| 54 |
+
statusText: response.statusText,
|
| 55 |
+
headers: newHeaders,
|
| 56 |
+
});
|
| 57 |
+
}
|
| 58 |
+
|
| 59 |
+
// Install event - pre-cache app shell
|
| 60 |
+
self.addEventListener('install', (event) => {
|
| 61 |
+
console.log('[SW] Installing...');
|
| 62 |
+
event.waitUntil(
|
| 63 |
+
caches.open(CACHE_NAME)
|
| 64 |
+
.then((cache) => {
|
| 65 |
+
console.log('[SW] Pre-caching app shell');
|
| 66 |
+
return cache.addAll(APP_SHELL);
|
| 67 |
+
})
|
| 68 |
+
.then(() => self.skipWaiting())
|
| 69 |
+
);
|
| 70 |
+
});
|
| 71 |
+
|
| 72 |
+
// Activate event - clean old caches
|
| 73 |
+
self.addEventListener('activate', (event) => {
|
| 74 |
+
console.log('[SW] Activating...');
|
| 75 |
+
event.waitUntil(
|
| 76 |
+
caches.keys()
|
| 77 |
+
.then((cacheNames) => {
|
| 78 |
+
return Promise.all(
|
| 79 |
+
cacheNames
|
| 80 |
+
.filter((name) => name !== CACHE_NAME && name !== MODEL_CACHE)
|
| 81 |
+
.map((name) => {
|
| 82 |
+
console.log('[SW] Deleting old cache:', name);
|
| 83 |
+
return caches.delete(name);
|
| 84 |
+
})
|
| 85 |
+
);
|
| 86 |
+
})
|
| 87 |
+
.then(() => self.clients.claim())
|
| 88 |
+
);
|
| 89 |
+
});
|
| 90 |
+
|
| 91 |
+
// Fetch event - serve from cache or network, inject COI headers
|
| 92 |
+
self.addEventListener('fetch', (event) => {
|
| 93 |
+
const url = new URL(event.request.url);
|
| 94 |
+
|
| 95 |
+
// Skip non-GET requests
|
| 96 |
+
if (event.request.method !== 'GET') return;
|
| 97 |
+
|
| 98 |
+
// Skip chrome-extension and other non-http(s) requests
|
| 99 |
+
if (!url.protocol.startsWith('http')) return;
|
| 100 |
+
|
| 101 |
+
// Check if this is a model file
|
| 102 |
+
const isModelFile = MODEL_PATTERNS.some((pattern) => pattern.test(url.pathname));
|
| 103 |
+
|
| 104 |
+
if (isModelFile) {
|
| 105 |
+
// Model files: Cache-first (they're large and rarely change)
|
| 106 |
+
event.respondWith(
|
| 107 |
+
caches.open(MODEL_CACHE)
|
| 108 |
+
.then((cache) => {
|
| 109 |
+
return cache.match(event.request)
|
| 110 |
+
.then((cached) => {
|
| 111 |
+
if (cached) {
|
| 112 |
+
console.log('[SW] Model from cache:', url.pathname);
|
| 113 |
+
return cached;
|
| 114 |
+
}
|
| 115 |
+
console.log('[SW] Fetching model:', url.pathname);
|
| 116 |
+
return fetch(event.request)
|
| 117 |
+
.then((response) => {
|
| 118 |
+
if (response.ok) {
|
| 119 |
+
cache.put(event.request, response.clone());
|
| 120 |
+
}
|
| 121 |
+
return response;
|
| 122 |
+
});
|
| 123 |
+
});
|
| 124 |
+
})
|
| 125 |
+
);
|
| 126 |
+
return;
|
| 127 |
+
}
|
| 128 |
+
|
| 129 |
+
// App shell: Cache-first with network fallback + COI headers
|
| 130 |
+
if (url.origin === self.location.origin) {
|
| 131 |
+
event.respondWith(
|
| 132 |
+
caches.match(event.request)
|
| 133 |
+
.then((cached) => {
|
| 134 |
+
const fetchPromise = fetch(event.request)
|
| 135 |
+
.then((response) => {
|
| 136 |
+
if (response.ok) {
|
| 137 |
+
const responseClone = response.clone();
|
| 138 |
+
caches.open(CACHE_NAME)
|
| 139 |
+
.then((cache) => cache.put(event.request, responseClone));
|
| 140 |
+
}
|
| 141 |
+
return addCOIHeaders(response);
|
| 142 |
+
})
|
| 143 |
+
.catch(() => cached);
|
| 144 |
+
|
| 145 |
+
return cached ? addCOIHeaders(cached) : fetchPromise;
|
| 146 |
+
})
|
| 147 |
+
);
|
| 148 |
+
return;
|
| 149 |
+
}
|
| 150 |
+
|
| 151 |
+
// External resources: Network-first + COI headers
|
| 152 |
+
event.respondWith(
|
| 153 |
+
fetch(event.request)
|
| 154 |
+
.then((response) => addCOIHeaders(response))
|
| 155 |
+
.catch(() => caches.match(event.request))
|
| 156 |
+
);
|
| 157 |
+
});
|
| 158 |
+
|
| 159 |
+
// Message handler for cache management
|
| 160 |
+
self.addEventListener('message', (event) => {
|
| 161 |
+
if (event.data.type === 'SKIP_WAITING') {
|
| 162 |
+
self.skipWaiting();
|
| 163 |
+
}
|
| 164 |
+
|
| 165 |
+
if (event.data.type === 'CLEAR_MODEL_CACHE') {
|
| 166 |
+
caches.delete(MODEL_CACHE)
|
| 167 |
+
.then(() => {
|
| 168 |
+
console.log('[SW] Model cache cleared');
|
| 169 |
+
event.ports[0].postMessage({ success: true });
|
| 170 |
+
});
|
| 171 |
+
}
|
| 172 |
+
});
|
public/wasm/ten_vad.js
ADDED
|
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
var createVADModule = (() => {
|
| 3 |
+
var _scriptDir = import.meta.url;
|
| 4 |
+
|
| 5 |
+
return (
|
| 6 |
+
function(createVADModule) {
|
| 7 |
+
createVADModule = createVADModule || {};
|
| 8 |
+
|
| 9 |
+
|
| 10 |
+
var a;a||(a=typeof createVADModule !== 'undefined' ? createVADModule : {});var k,l;a.ready=new Promise(function(b,c){k=b;l=c});var p=Object.assign({},a),r="object"==typeof window,u="function"==typeof importScripts,v="",w;
|
| 11 |
+
if(r||u)u?v=self.location.href:"undefined"!=typeof document&&document.currentScript&&(v=document.currentScript.src),_scriptDir&&(v=_scriptDir),0!==v.indexOf("blob:")?v=v.substr(0,v.replace(/[?#].*/,"").lastIndexOf("/")+1):v="",u&&(w=b=>{var c=new XMLHttpRequest;c.open("GET",b,!1);c.responseType="arraybuffer";c.send(null);return new Uint8Array(c.response)});var aa=a.print||console.log.bind(console),x=a.printErr||console.warn.bind(console);Object.assign(a,p);p=null;var y;a.wasmBinary&&(y=a.wasmBinary);
|
| 12 |
+
var noExitRuntime=a.noExitRuntime||!0;"object"!=typeof WebAssembly&&z("no native wasm support detected");var A,B=!1,C="undefined"!=typeof TextDecoder?new TextDecoder("utf8"):void 0,D,E,F;function J(){var b=A.buffer;D=b;a.HEAP8=new Int8Array(b);a.HEAP16=new Int16Array(b);a.HEAP32=new Int32Array(b);a.HEAPU8=E=new Uint8Array(b);a.HEAPU16=new Uint16Array(b);a.HEAPU32=F=new Uint32Array(b);a.HEAPF32=new Float32Array(b);a.HEAPF64=new Float64Array(b)}var K=[],L=[],M=[];
|
| 13 |
+
function ba(){var b=a.preRun.shift();K.unshift(b)}var N=0,O=null,P=null;function z(b){if(a.onAbort)a.onAbort(b);b="Aborted("+b+")";x(b);B=!0;b=new WebAssembly.RuntimeError(b+". Build with -sASSERTIONS for more info.");l(b);throw b;}function Q(){return R.startsWith("data:application/octet-stream;base64,")}var R;if(a.locateFile){if(R="ten_vad.wasm",!Q()){var S=R;R=a.locateFile?a.locateFile(S,v):v+S}}else R=(new URL("ten_vad.wasm",import.meta.url)).href;
|
| 14 |
+
function T(){var b=R;try{if(b==R&&y)return new Uint8Array(y);if(w)return w(b);throw"both async and sync fetching of the wasm failed";}catch(c){z(c)}}function ca(){return y||!r&&!u||"function"!=typeof fetch?Promise.resolve().then(function(){return T()}):fetch(R,{credentials:"same-origin"}).then(function(b){if(!b.ok)throw"failed to load wasm binary file at '"+R+"'";return b.arrayBuffer()}).catch(function(){return T()})}function U(b){for(;0<b.length;)b.shift()(a)}
|
| 15 |
+
var da=[null,[],[]],ea={a:function(){z("")},f:function(b,c,m){E.copyWithin(b,c,c+m)},c:function(b){var c=E.length;b>>>=0;if(2147483648<b)return!1;for(var m=1;4>=m;m*=2){var h=c*(1+.2/m);h=Math.min(h,b+100663296);var d=Math;h=Math.max(b,h);d=d.min.call(d,2147483648,h+(65536-h%65536)%65536);a:{try{A.grow(d-D.byteLength+65535>>>16);J();var e=1;break a}catch(W){}e=void 0}if(e)return!0}return!1},e:function(){return 52},b:function(){return 70},d:function(b,c,m,h){for(var d=0,e=0;e<m;e++){var W=F[c>>2],
|
| 16 |
+
X=F[c+4>>2];c+=8;for(var G=0;G<X;G++){var f=E[W+G],H=da[b];if(0===f||10===f){f=H;for(var n=0,q=n+NaN,t=n;f[t]&&!(t>=q);)++t;if(16<t-n&&f.buffer&&C)f=C.decode(f.subarray(n,t));else{for(q="";n<t;){var g=f[n++];if(g&128){var I=f[n++]&63;if(192==(g&224))q+=String.fromCharCode((g&31)<<6|I);else{var Y=f[n++]&63;g=224==(g&240)?(g&15)<<12|I<<6|Y:(g&7)<<18|I<<12|Y<<6|f[n++]&63;65536>g?q+=String.fromCharCode(g):(g-=65536,q+=String.fromCharCode(55296|g>>10,56320|g&1023))}}else q+=String.fromCharCode(g)}f=q}(1===
|
| 17 |
+
b?aa:x)(f);H.length=0}else H.push(f)}d+=X}F[h>>2]=d;return 0}};
|
| 18 |
+
(function(){function b(d){a.asm=d.exports;A=a.asm.g;J();L.unshift(a.asm.h);N--;a.monitorRunDependencies&&a.monitorRunDependencies(N);0==N&&(null!==O&&(clearInterval(O),O=null),P&&(d=P,P=null,d()))}function c(d){b(d.instance)}function m(d){return ca().then(function(e){return WebAssembly.instantiate(e,h)}).then(function(e){return e}).then(d,function(e){x("failed to asynchronously prepare wasm: "+e);z(e)})}var h={a:ea};N++;a.monitorRunDependencies&&a.monitorRunDependencies(N);if(a.instantiateWasm)try{return a.instantiateWasm(h,
|
| 19 |
+
b)}catch(d){x("Module.instantiateWasm callback failed with error: "+d),l(d)}(function(){return y||"function"!=typeof WebAssembly.instantiateStreaming||Q()||"function"!=typeof fetch?m(c):fetch(R,{credentials:"same-origin"}).then(function(d){return WebAssembly.instantiateStreaming(d,h).then(c,function(e){x("wasm streaming compile failed: "+e);x("falling back to ArrayBuffer instantiation");return m(c)})})})().catch(l);return{}})();
|
| 20 |
+
a.___wasm_call_ctors=function(){return(a.___wasm_call_ctors=a.asm.h).apply(null,arguments)};a._malloc=function(){return(a._malloc=a.asm.i).apply(null,arguments)};a._free=function(){return(a._free=a.asm.j).apply(null,arguments)};a._ten_vad_create=function(){return(a._ten_vad_create=a.asm.k).apply(null,arguments)};a._ten_vad_process=function(){return(a._ten_vad_process=a.asm.l).apply(null,arguments)};a._ten_vad_destroy=function(){return(a._ten_vad_destroy=a.asm.m).apply(null,arguments)};
|
| 21 |
+
a._ten_vad_get_version=function(){return(a._ten_vad_get_version=a.asm.n).apply(null,arguments)};var V;P=function fa(){V||Z();V||(P=fa)};
|
| 22 |
+
function Z(){function b(){if(!V&&(V=!0,a.calledRun=!0,!B)){U(L);k(a);if(a.onRuntimeInitialized)a.onRuntimeInitialized();if(a.postRun)for("function"==typeof a.postRun&&(a.postRun=[a.postRun]);a.postRun.length;){var c=a.postRun.shift();M.unshift(c)}U(M)}}if(!(0<N)){if(a.preRun)for("function"==typeof a.preRun&&(a.preRun=[a.preRun]);a.preRun.length;)ba();U(K);0<N||(a.setStatus?(a.setStatus("Running..."),setTimeout(function(){setTimeout(function(){a.setStatus("")},1);b()},1)):b())}}
|
| 23 |
+
if(a.preInit)for("function"==typeof a.preInit&&(a.preInit=[a.preInit]);0<a.preInit.length;)a.preInit.pop()();Z();
|
| 24 |
+
|
| 25 |
+
|
| 26 |
+
return createVADModule.ready
|
| 27 |
+
}
|
| 28 |
+
);
|
| 29 |
+
})();
|
| 30 |
+
export default createVADModule;
|
public/wasm/ten_vad.wasm
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1ec0b9640683987e15a4e54e4ce5642b2447c6e5d82b1be889b5099c75434fc3
|
| 3 |
+
size 283349
|
src/App.css
DELETED
|
@@ -1,38 +0,0 @@
|
|
| 1 |
-
.App {
|
| 2 |
-
text-align: center;
|
| 3 |
-
}
|
| 4 |
-
|
| 5 |
-
.App-logo {
|
| 6 |
-
height: 40vmin;
|
| 7 |
-
pointer-events: none;
|
| 8 |
-
}
|
| 9 |
-
|
| 10 |
-
@media (prefers-reduced-motion: no-preference) {
|
| 11 |
-
.App-logo {
|
| 12 |
-
animation: App-logo-spin infinite 20s linear;
|
| 13 |
-
}
|
| 14 |
-
}
|
| 15 |
-
|
| 16 |
-
.App-header {
|
| 17 |
-
background-color: #282c34;
|
| 18 |
-
min-height: 100vh;
|
| 19 |
-
display: flex;
|
| 20 |
-
flex-direction: column;
|
| 21 |
-
align-items: center;
|
| 22 |
-
justify-content: center;
|
| 23 |
-
font-size: calc(10px + 2vmin);
|
| 24 |
-
color: white;
|
| 25 |
-
}
|
| 26 |
-
|
| 27 |
-
.App-link {
|
| 28 |
-
color: #61dafb;
|
| 29 |
-
}
|
| 30 |
-
|
| 31 |
-
@keyframes App-logo-spin {
|
| 32 |
-
from {
|
| 33 |
-
transform: rotate(0deg);
|
| 34 |
-
}
|
| 35 |
-
to {
|
| 36 |
-
transform: rotate(360deg);
|
| 37 |
-
}
|
| 38 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
src/App.js
DELETED
|
@@ -1,25 +0,0 @@
|
|
| 1 |
-
import logo from './logo.svg';
|
| 2 |
-
import './App.css';
|
| 3 |
-
|
| 4 |
-
function App() {
|
| 5 |
-
return (
|
| 6 |
-
<div className="App">
|
| 7 |
-
<header className="App-header">
|
| 8 |
-
<img src={logo} className="App-logo" alt="logo" />
|
| 9 |
-
<p>
|
| 10 |
-
Edit <code>src/App.js</code> and save to reload.
|
| 11 |
-
</p>
|
| 12 |
-
<a
|
| 13 |
-
className="App-link"
|
| 14 |
-
href="https://reactjs.org"
|
| 15 |
-
target="_blank"
|
| 16 |
-
rel="noopener noreferrer"
|
| 17 |
-
>
|
| 18 |
-
Learn React
|
| 19 |
-
</a>
|
| 20 |
-
</header>
|
| 21 |
-
</div>
|
| 22 |
-
);
|
| 23 |
-
}
|
| 24 |
-
|
| 25 |
-
export default App;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
src/App.test.js
DELETED
|
@@ -1,8 +0,0 @@
|
|
| 1 |
-
import { render, screen } from '@testing-library/react';
|
| 2 |
-
import App from './App';
|
| 3 |
-
|
| 4 |
-
test('renders learn react link', () => {
|
| 5 |
-
render(<App />);
|
| 6 |
-
const linkElement = screen.getByText(/learn react/i);
|
| 7 |
-
expect(linkElement).toBeInTheDocument();
|
| 8 |
-
});
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
src/App.tsx
ADDED
|
@@ -0,0 +1,1037 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import { Component, Show, For, createSignal, createEffect, onMount, onCleanup } from 'solid-js';
|
| 2 |
+
import { appStore } from './stores/appStore';
|
| 3 |
+
import { CompactWaveform, ModelLoadingOverlay, DebugPanel, TranscriptionDisplay, SettingsContent } from './components';
|
| 4 |
+
import { getModelDisplayName, MODELS } from './components/ModelLoadingOverlay';
|
| 5 |
+
import { AudioEngine } from './lib/audio';
|
| 6 |
+
import { MelWorkerClient } from './lib/audio/MelWorkerClient';
|
| 7 |
+
import { TranscriptionWorkerClient } from './lib/transcription';
|
| 8 |
+
import { HybridVAD } from './lib/vad';
|
| 9 |
+
import { WindowBuilder } from './lib/transcription/WindowBuilder';
|
| 10 |
+
import { BufferWorkerClient } from './lib/buffer';
|
| 11 |
+
import { TenVADWorkerClient } from './lib/vad/TenVADWorkerClient';
|
| 12 |
+
import type { V4ProcessResult } from './lib/transcription/TranscriptionWorkerClient';
|
| 13 |
+
import type { BufferWorkerConfig, TenVADResult } from './lib/buffer/types';
|
| 14 |
+
import { formatDuration } from './utils/time';
|
| 15 |
+
|
| 16 |
+
// Singleton instances
|
| 17 |
+
let audioEngine: AudioEngine | null = null;
|
| 18 |
+
export const [audioEngineSignal, setAudioEngineSignal] = createSignal<AudioEngine | null>(null);
|
| 19 |
+
|
| 20 |
+
let workerClient: TranscriptionWorkerClient | null = null;
|
| 21 |
+
let melClient: MelWorkerClient | null = null;
|
| 22 |
+
export const [melClientSignal, setMelClientSignal] = createSignal<MelWorkerClient | null>(null);
|
| 23 |
+
let segmentUnsubscribe: (() => void) | null = null;
|
| 24 |
+
let windowUnsubscribe: (() => void) | null = null;
|
| 25 |
+
let melChunkUnsubscribe: (() => void) | null = null;
|
| 26 |
+
let visualizationUnsubscribe: (() => void) | undefined;
|
| 27 |
+
// v4 pipeline instances
|
| 28 |
+
let hybridVAD: HybridVAD | null = null;
|
| 29 |
+
let bufferClient: BufferWorkerClient | null = null;
|
| 30 |
+
let tenVADClient: TenVADWorkerClient | null = null;
|
| 31 |
+
let windowBuilder: WindowBuilder | null = null;
|
| 32 |
+
let v4TickTimeout: number | undefined;
|
| 33 |
+
let v4TickRunning = false;
|
| 34 |
+
let v4AudioChunkUnsubscribe: (() => void) | null = null;
|
| 35 |
+
let v4MelChunkUnsubscribe: (() => void) | null = null;
|
| 36 |
+
let v4InferenceBusy = false;
|
| 37 |
+
let v4LastInferenceTime = 0;
|
| 38 |
+
// Global sample counter for audio chunks (tracks total samples written to BufferWorker)
|
| 39 |
+
let v4GlobalSampleOffset = 0;
|
| 40 |
+
// Throttle UI updates from TEN-VAD to at most once per frame
|
| 41 |
+
let pendingSileroProb: number | null = null;
|
| 42 |
+
let sileroUpdateScheduled = false;
|
| 43 |
+
let pendingVadState: {
|
| 44 |
+
isSpeech: boolean;
|
| 45 |
+
energy: number;
|
| 46 |
+
snr: number;
|
| 47 |
+
hybridState: string;
|
| 48 |
+
sileroProbability?: number;
|
| 49 |
+
} | null = null;
|
| 50 |
+
let vadUpdateScheduled = false;
|
| 51 |
+
|
| 52 |
+
const scheduleSileroUpdate = (prob: number) => {
|
| 53 |
+
pendingSileroProb = prob;
|
| 54 |
+
if (sileroUpdateScheduled) return;
|
| 55 |
+
sileroUpdateScheduled = true;
|
| 56 |
+
requestAnimationFrame(() => {
|
| 57 |
+
sileroUpdateScheduled = false;
|
| 58 |
+
if (pendingSileroProb === null) return;
|
| 59 |
+
const currentState = appStore.vadState();
|
| 60 |
+
appStore.setVadState({
|
| 61 |
+
...currentState,
|
| 62 |
+
sileroProbability: pendingSileroProb,
|
| 63 |
+
});
|
| 64 |
+
});
|
| 65 |
+
};
|
| 66 |
+
|
| 67 |
+
const scheduleVadStateUpdate = (next: {
|
| 68 |
+
isSpeech: boolean;
|
| 69 |
+
energy: number;
|
| 70 |
+
snr: number;
|
| 71 |
+
hybridState: string;
|
| 72 |
+
sileroProbability?: number;
|
| 73 |
+
}) => {
|
| 74 |
+
pendingVadState = next;
|
| 75 |
+
if (vadUpdateScheduled) return;
|
| 76 |
+
vadUpdateScheduled = true;
|
| 77 |
+
requestAnimationFrame(() => {
|
| 78 |
+
vadUpdateScheduled = false;
|
| 79 |
+
if (!pendingVadState) return;
|
| 80 |
+
const currentState = appStore.vadState();
|
| 81 |
+
const sileroProbability =
|
| 82 |
+
pendingVadState.sileroProbability !== undefined
|
| 83 |
+
? pendingVadState.sileroProbability
|
| 84 |
+
: currentState.sileroProbability;
|
| 85 |
+
appStore.setVadState({
|
| 86 |
+
...currentState,
|
| 87 |
+
...pendingVadState,
|
| 88 |
+
sileroProbability,
|
| 89 |
+
});
|
| 90 |
+
appStore.setIsSpeechDetected(pendingVadState.isSpeech);
|
| 91 |
+
pendingVadState = null;
|
| 92 |
+
});
|
| 93 |
+
};
|
| 94 |
+
|
| 95 |
+
const Header: Component<{
|
| 96 |
+
onToggleDebug: () => void;
|
| 97 |
+
}> = (props) => {
|
| 98 |
+
const sessionLabel = () =>
|
| 99 |
+
appStore.modelState() === 'ready' ? getModelDisplayName(appStore.selectedModelId()) : 'Session';
|
| 100 |
+
return (
|
| 101 |
+
<header class="h-20 flex items-center justify-between px-8 bg-[var(--color-earthy-bg)]/80 backdrop-blur-sm z-30 shrink-0">
|
| 102 |
+
<div class="flex items-center gap-6">
|
| 103 |
+
<div class="flex items-center gap-3">
|
| 104 |
+
<div class="w-10 h-10 rounded-full bg-[var(--color-earthy-muted-green)] flex items-center justify-center text-white">
|
| 105 |
+
<span class="material-symbols-outlined text-xl">auto_awesome</span>
|
| 106 |
+
</div>
|
| 107 |
+
<div>
|
| 108 |
+
<h1 class="text-lg font-semibold tracking-tight text-[var(--color-earthy-dark-brown)]">keet</h1>
|
| 109 |
+
<p class="text-[10px] uppercase tracking-[0.2em] text-[var(--color-earthy-soft-brown)] font-medium">{sessionLabel()}</p>
|
| 110 |
+
</div>
|
| 111 |
+
</div>
|
| 112 |
+
</div>
|
| 113 |
+
<div class="flex items-center gap-4">
|
| 114 |
+
<button
|
| 115 |
+
type="button"
|
| 116 |
+
onClick={props.onToggleDebug}
|
| 117 |
+
class={`p-2 rounded-full transition-colors ${appStore.showDebugPanel() ? 'bg-[var(--color-earthy-muted-green)] text-white' : 'text-[var(--color-earthy-muted-green)] hover:bg-[var(--color-earthy-sage)]/30'}`}
|
| 118 |
+
title={appStore.showDebugPanel() ? 'Hide debug panel' : 'Show debug panel'}
|
| 119 |
+
aria-label="Toggle debug panel"
|
| 120 |
+
>
|
| 121 |
+
<span class="material-symbols-outlined">bug_report</span>
|
| 122 |
+
</button>
|
| 123 |
+
<button
|
| 124 |
+
type="button"
|
| 125 |
+
class="p-2 text-[var(--color-earthy-muted-green)] hover:scale-110 transition-transform"
|
| 126 |
+
aria-label="More options"
|
| 127 |
+
>
|
| 128 |
+
<span class="material-symbols-outlined">more_vert</span>
|
| 129 |
+
</button>
|
| 130 |
+
</div>
|
| 131 |
+
</header>
|
| 132 |
+
);
|
| 133 |
+
};
|
| 134 |
+
|
| 135 |
+
const WIDGET_STORAGE_KEY = 'boncukjs-control-widget-pos';
|
| 136 |
+
const WIDGET_MAX_W = 672;
|
| 137 |
+
const WIDGET_MIN_H = 80;
|
| 138 |
+
|
| 139 |
+
const App: Component = () => {
|
| 140 |
+
const [showModelOverlay, setShowModelOverlay] = createSignal(false);
|
| 141 |
+
const [showContextPanel, setShowContextPanel] = createSignal(false);
|
| 142 |
+
type SettingsPanelSection = 'full' | 'audio' | 'model';
|
| 143 |
+
const [settingsPanelSection, setSettingsPanelSection] = createSignal<SettingsPanelSection>('full');
|
| 144 |
+
let panelHoverCloseTimeout: number | undefined;
|
| 145 |
+
const [workerReady, setWorkerReady] = createSignal(false);
|
| 146 |
+
const [widgetPos, setWidgetPos] = createSignal<{ x: number; y: number } | null>(null);
|
| 147 |
+
const [isDragging, setIsDragging] = createSignal(false);
|
| 148 |
+
|
| 149 |
+
const isRecording = () => appStore.recordingState() === 'recording';
|
| 150 |
+
const isModelReady = () => appStore.modelState() === 'ready';
|
| 151 |
+
|
| 152 |
+
let dragStart = { x: 0, y: 0 };
|
| 153 |
+
let posStart = { x: 0, y: 0 };
|
| 154 |
+
|
| 155 |
+
const [windowHeight, setWindowHeight] = createSignal(typeof window !== 'undefined' ? window.innerHeight : 600);
|
| 156 |
+
const settingsExpandUp = () => {
|
| 157 |
+
const pos = widgetPos();
|
| 158 |
+
if (!pos) return true;
|
| 159 |
+
return pos.y >= windowHeight() / 2;
|
| 160 |
+
};
|
| 161 |
+
|
| 162 |
+
const handleWidgetDragStart = (e: MouseEvent) => {
|
| 163 |
+
if ((e.target as HTMLElement).closest('button, select, input')) return;
|
| 164 |
+
e.preventDefault();
|
| 165 |
+
const pos = widgetPos();
|
| 166 |
+
if (!pos) return;
|
| 167 |
+
setIsDragging(true);
|
| 168 |
+
dragStart = { x: e.clientX, y: e.clientY };
|
| 169 |
+
posStart = { ...pos };
|
| 170 |
+
const onMove = (e2: MouseEvent) => {
|
| 171 |
+
const dx = e2.clientX - dragStart.x;
|
| 172 |
+
const dy = e2.clientY - dragStart.y;
|
| 173 |
+
const w = typeof window !== 'undefined' ? window.innerWidth : 800;
|
| 174 |
+
const h = typeof window !== 'undefined' ? window.innerHeight : 600;
|
| 175 |
+
const newX = Math.max(0, Math.min(w - WIDGET_MAX_W, posStart.x + dx));
|
| 176 |
+
const newY = Math.max(0, Math.min(h - WIDGET_MIN_H, posStart.y + dy));
|
| 177 |
+
setWidgetPos({ x: newX, y: newY });
|
| 178 |
+
};
|
| 179 |
+
const onUp = () => {
|
| 180 |
+
setIsDragging(false);
|
| 181 |
+
window.removeEventListener('mousemove', onMove);
|
| 182 |
+
window.removeEventListener('mouseup', onUp);
|
| 183 |
+
const p = widgetPos();
|
| 184 |
+
if (p && typeof localStorage !== 'undefined') {
|
| 185 |
+
try {
|
| 186 |
+
localStorage.setItem(WIDGET_STORAGE_KEY, JSON.stringify(p));
|
| 187 |
+
} catch (_) {}
|
| 188 |
+
}
|
| 189 |
+
};
|
| 190 |
+
window.addEventListener('mousemove', onMove);
|
| 191 |
+
window.addEventListener('mouseup', onUp);
|
| 192 |
+
};
|
| 193 |
+
|
| 194 |
+
createEffect(() => {
|
| 195 |
+
if (!showContextPanel()) return;
|
| 196 |
+
const handler = (e: KeyboardEvent) => {
|
| 197 |
+
if (e.key === 'Escape') {
|
| 198 |
+
e.preventDefault();
|
| 199 |
+
setShowContextPanel(false);
|
| 200 |
+
}
|
| 201 |
+
};
|
| 202 |
+
document.addEventListener('keydown', handler);
|
| 203 |
+
return () => document.removeEventListener('keydown', handler);
|
| 204 |
+
});
|
| 205 |
+
|
| 206 |
+
createEffect(() => {
|
| 207 |
+
if (appStore.modelState() === 'ready' && showContextPanel() && settingsPanelSection() === 'model') {
|
| 208 |
+
setShowContextPanel(false);
|
| 209 |
+
}
|
| 210 |
+
});
|
| 211 |
+
|
| 212 |
+
onMount(() => {
|
| 213 |
+
const onResize = () => setWindowHeight(window.innerHeight);
|
| 214 |
+
window.addEventListener('resize', onResize);
|
| 215 |
+
|
| 216 |
+
const stored =
|
| 217 |
+
typeof localStorage !== 'undefined' ? localStorage.getItem(WIDGET_STORAGE_KEY) : null;
|
| 218 |
+
let posRestored = false;
|
| 219 |
+
if (stored) {
|
| 220 |
+
try {
|
| 221 |
+
const parsed = JSON.parse(stored) as { x: number; y: number };
|
| 222 |
+
if (Number.isFinite(parsed.x) && Number.isFinite(parsed.y)) {
|
| 223 |
+
setWidgetPos({ x: parsed.x, y: parsed.y });
|
| 224 |
+
posRestored = true;
|
| 225 |
+
}
|
| 226 |
+
} catch (_) {}
|
| 227 |
+
}
|
| 228 |
+
if (!posRestored) {
|
| 229 |
+
const w = window.innerWidth;
|
| 230 |
+
const h = window.innerHeight;
|
| 231 |
+
setWidgetPos({
|
| 232 |
+
x: Math.max(0, (w - WIDGET_MAX_W) / 2),
|
| 233 |
+
y: h - 140,
|
| 234 |
+
});
|
| 235 |
+
}
|
| 236 |
+
|
| 237 |
+
workerClient = new TranscriptionWorkerClient();
|
| 238 |
+
|
| 239 |
+
workerClient.onModelProgress = (p) => {
|
| 240 |
+
appStore.setModelProgress(p.progress);
|
| 241 |
+
appStore.setModelMessage(p.message || '');
|
| 242 |
+
if (p.file) appStore.setModelFile(p.file);
|
| 243 |
+
};
|
| 244 |
+
|
| 245 |
+
workerClient.onModelStateChange = (s) => {
|
| 246 |
+
appStore.setModelState(s);
|
| 247 |
+
};
|
| 248 |
+
|
| 249 |
+
workerClient.onV3Confirmed = (text) => {
|
| 250 |
+
appStore.setTranscript(text);
|
| 251 |
+
};
|
| 252 |
+
|
| 253 |
+
workerClient.onV3Pending = (text) => {
|
| 254 |
+
appStore.setPendingText(text);
|
| 255 |
+
};
|
| 256 |
+
|
| 257 |
+
workerClient.onError = (msg) => {
|
| 258 |
+
appStore.setErrorMessage(msg);
|
| 259 |
+
};
|
| 260 |
+
|
| 261 |
+
appStore.refreshDevices();
|
| 262 |
+
setWorkerReady(true);
|
| 263 |
+
|
| 264 |
+
return () => window.removeEventListener('resize', onResize);
|
| 265 |
+
});
|
| 266 |
+
|
| 267 |
+
// No longer auto-show blocking model overlay; model selection is in the settings panel.
|
| 268 |
+
// createEffect(() => { ... setShowModelOverlay(true); });
|
| 269 |
+
|
| 270 |
+
onCleanup(() => {
|
| 271 |
+
clearTimeout(panelHoverCloseTimeout);
|
| 272 |
+
visualizationUnsubscribe?.();
|
| 273 |
+
cleanupV4Pipeline();
|
| 274 |
+
melClient?.dispose();
|
| 275 |
+
workerClient?.dispose();
|
| 276 |
+
});
|
| 277 |
+
|
| 278 |
+
// ---- v4 pipeline tick: periodic window building + inference ----
|
| 279 |
+
let v4TickCount = 0;
|
| 280 |
+
let v4ModelNotReadyLogged = false;
|
| 281 |
+
const v4Tick = async () => {
|
| 282 |
+
if (!workerClient || !windowBuilder || !audioEngine || !bufferClient || v4InferenceBusy) return;
|
| 283 |
+
|
| 284 |
+
// Skip inference if model is not ready (but still allow audio/mel/VAD to process)
|
| 285 |
+
if (appStore.modelState() !== 'ready') {
|
| 286 |
+
if (!v4ModelNotReadyLogged) {
|
| 287 |
+
console.log('[v4Tick] Model not ready yet - audio is being captured and preprocessed');
|
| 288 |
+
v4ModelNotReadyLogged = true;
|
| 289 |
+
}
|
| 290 |
+
return;
|
| 291 |
+
}
|
| 292 |
+
// Reset the flag once model becomes ready
|
| 293 |
+
if (v4ModelNotReadyLogged) {
|
| 294 |
+
console.log('[v4Tick] Model is now ready - starting inference');
|
| 295 |
+
v4ModelNotReadyLogged = false;
|
| 296 |
+
// Initialize the v4 service now that model is ready
|
| 297 |
+
await workerClient.initV4Service({ debug: false });
|
| 298 |
+
}
|
| 299 |
+
|
| 300 |
+
v4TickCount++;
|
| 301 |
+
const now = performance.now();
|
| 302 |
+
// Use the store's configurable inference interval (minus a small margin for the tick jitter)
|
| 303 |
+
const minInterval = Math.max(200, appStore.v4InferenceIntervalMs() - 100);
|
| 304 |
+
if (now - v4LastInferenceTime < minInterval) return;
|
| 305 |
+
|
| 306 |
+
// Check if there is speech via the BufferWorker (async query).
|
| 307 |
+
// We check both energy and inference VAD layers; either one detecting speech triggers inference.
|
| 308 |
+
const cursorSample = windowBuilder.getMatureCursorFrame(); // frame === sample in our pipeline
|
| 309 |
+
const currentSample = v4GlobalSampleOffset;
|
| 310 |
+
const startSample = cursorSample > 0 ? cursorSample : 0;
|
| 311 |
+
|
| 312 |
+
let hasSpeech = false;
|
| 313 |
+
if (currentSample > startSample) {
|
| 314 |
+
// Check energy VAD first (always available, low latency)
|
| 315 |
+
const energyResult = await bufferClient.hasSpeech('energyVad', startSample, currentSample, 0.3);
|
| 316 |
+
|
| 317 |
+
// When inference VAD is ready, require BOTH energy AND inference to agree
|
| 318 |
+
// This prevents false positives from music/noise that has high energy but no speech
|
| 319 |
+
if (tenVADClient?.isReady()) {
|
| 320 |
+
const inferenceResult = await bufferClient.hasSpeech('inferenceVad', startSample, currentSample, 0.5);
|
| 321 |
+
// Require both energy and inference VAD to agree (AND logic)
|
| 322 |
+
hasSpeech = energyResult.hasSpeech && inferenceResult.hasSpeech;
|
| 323 |
+
} else {
|
| 324 |
+
// Fall back to energy-only if inference VAD is not available
|
| 325 |
+
hasSpeech = energyResult.hasSpeech;
|
| 326 |
+
}
|
| 327 |
+
}
|
| 328 |
+
|
| 329 |
+
if (v4TickCount <= 5 || v4TickCount % 20 === 0) {
|
| 330 |
+
const vadState = appStore.vadState();
|
| 331 |
+
const rb = audioEngine.getRingBuffer();
|
| 332 |
+
const rbFrame = rb.getCurrentFrame();
|
| 333 |
+
const rbBase = rb.getBaseFrameOffset();
|
| 334 |
+
console.log(
|
| 335 |
+
`[v4Tick #${v4TickCount}] hasSpeech=${hasSpeech}, vadState=${vadState.hybridState}, ` +
|
| 336 |
+
`energy=${vadState.energy.toFixed(4)}, inferenceVAD=${(vadState.sileroProbability || 0).toFixed(2)}, ` +
|
| 337 |
+
`samples=[${startSample}:${currentSample}], ` +
|
| 338 |
+
`ringBuf=[base=${rbBase}, head=${rbFrame}, avail=${rbFrame - rbBase}]`
|
| 339 |
+
);
|
| 340 |
+
}
|
| 341 |
+
|
| 342 |
+
// Periodic buffer worker state dump (every 40 ticks)
|
| 343 |
+
if (v4TickCount % 40 === 0 && bufferClient) {
|
| 344 |
+
try {
|
| 345 |
+
const state = await bufferClient.getState();
|
| 346 |
+
const layerSummary = Object.entries(state.layers)
|
| 347 |
+
.map(([id, l]) => `${id}:${l.fillCount}/${l.maxEntries}@${l.currentSample}`)
|
| 348 |
+
.join(', ');
|
| 349 |
+
console.log(`[v4Tick #${v4TickCount}] BufferState: ${layerSummary}`);
|
| 350 |
+
} catch (_) { /* ignore state query errors */ }
|
| 351 |
+
}
|
| 352 |
+
|
| 353 |
+
if (!hasSpeech) {
|
| 354 |
+
// Check for silence-based flush using BufferWorker
|
| 355 |
+
const silenceDuration = await bufferClient.getSilenceTailDuration('energyVad', 0.3);
|
| 356 |
+
if (silenceDuration >= appStore.v4SilenceFlushSec()) {
|
| 357 |
+
// Flush pending sentence via timeout finalization
|
| 358 |
+
try {
|
| 359 |
+
const flushResult = await workerClient.v4FinalizeTimeout();
|
| 360 |
+
if (flushResult) {
|
| 361 |
+
appStore.setMatureText(flushResult.matureText);
|
| 362 |
+
appStore.setImmatureText(flushResult.immatureText);
|
| 363 |
+
appStore.setMatureCursorTime(flushResult.matureCursorTime);
|
| 364 |
+
appStore.setTranscript(flushResult.fullText);
|
| 365 |
+
appStore.appendV4SentenceEntries(flushResult.matureSentences);
|
| 366 |
+
appStore.setV4MergerStats({
|
| 367 |
+
sentencesFinalized: flushResult.matureSentenceCount,
|
| 368 |
+
cursorUpdates: flushResult.stats?.matureCursorUpdates || 0,
|
| 369 |
+
utterancesProcessed: flushResult.stats?.utterancesProcessed || 0,
|
| 370 |
+
});
|
| 371 |
+
// Advance window builder cursor
|
| 372 |
+
windowBuilder.advanceMatureCursorByTime(flushResult.matureCursorTime);
|
| 373 |
+
}
|
| 374 |
+
} catch (err) {
|
| 375 |
+
console.error('[v4Tick] Flush error:', err);
|
| 376 |
+
}
|
| 377 |
+
}
|
| 378 |
+
return;
|
| 379 |
+
}
|
| 380 |
+
|
| 381 |
+
// Build window from cursor to current position
|
| 382 |
+
const window = windowBuilder.buildWindow();
|
| 383 |
+
if (!window) {
|
| 384 |
+
if (v4TickCount <= 10 || v4TickCount % 20 === 0) {
|
| 385 |
+
const rb = audioEngine.getRingBuffer();
|
| 386 |
+
const rbHead = rb.getCurrentFrame();
|
| 387 |
+
const rbBase = rb.getBaseFrameOffset();
|
| 388 |
+
console.log(
|
| 389 |
+
`[v4Tick #${v4TickCount}] buildWindow=null, ` +
|
| 390 |
+
`ringBuf=[base=${rbBase}, head=${rbHead}, avail=${rbHead - rbBase}], ` +
|
| 391 |
+
`cursor=${windowBuilder.getMatureCursorFrame()}`
|
| 392 |
+
);
|
| 393 |
+
}
|
| 394 |
+
return;
|
| 395 |
+
}
|
| 396 |
+
|
| 397 |
+
console.log(`[v4Tick #${v4TickCount}] Window [${window.startFrame}:${window.endFrame}] ${window.durationSeconds.toFixed(2)}s (initial=${window.isInitial})`);
|
| 398 |
+
|
| 399 |
+
v4InferenceBusy = true;
|
| 400 |
+
v4LastInferenceTime = now;
|
| 401 |
+
|
| 402 |
+
try {
|
| 403 |
+
const inferenceStart = performance.now();
|
| 404 |
+
|
| 405 |
+
// Get mel features for the window
|
| 406 |
+
let features: { features: Float32Array; T: number; melBins: number } | null = null;
|
| 407 |
+
if (melClient) {
|
| 408 |
+
features = await melClient.getFeatures(window.startFrame, window.endFrame);
|
| 409 |
+
}
|
| 410 |
+
|
| 411 |
+
if (!features) {
|
| 412 |
+
v4InferenceBusy = false;
|
| 413 |
+
return;
|
| 414 |
+
}
|
| 415 |
+
|
| 416 |
+
// Calculate time offset for absolute timestamps
|
| 417 |
+
const timeOffset = window.startFrame / 16000;
|
| 418 |
+
|
| 419 |
+
// Calculate incremental cache parameters
|
| 420 |
+
const cursorFrame = windowBuilder.getMatureCursorFrame();
|
| 421 |
+
const prefixSeconds = cursorFrame > 0 ? (window.startFrame - cursorFrame) / 16000 : 0;
|
| 422 |
+
|
| 423 |
+
const result: V4ProcessResult = await workerClient.processV4ChunkWithFeatures({
|
| 424 |
+
features: features.features,
|
| 425 |
+
T: features.T,
|
| 426 |
+
melBins: features.melBins,
|
| 427 |
+
timeOffset,
|
| 428 |
+
endTime: window.endFrame / 16000,
|
| 429 |
+
segmentId: `v4_${Date.now()}`,
|
| 430 |
+
incrementalCache: prefixSeconds > 0 ? {
|
| 431 |
+
cacheKey: 'v4-stream',
|
| 432 |
+
prefixSeconds,
|
| 433 |
+
} : undefined,
|
| 434 |
+
});
|
| 435 |
+
|
| 436 |
+
const inferenceMs = performance.now() - inferenceStart;
|
| 437 |
+
|
| 438 |
+
// Update UI state
|
| 439 |
+
appStore.setMatureText(result.matureText);
|
| 440 |
+
appStore.setImmatureText(result.immatureText);
|
| 441 |
+
appStore.setTranscript(result.fullText);
|
| 442 |
+
appStore.setPendingText(result.immatureText);
|
| 443 |
+
appStore.appendV4SentenceEntries(result.matureSentences);
|
| 444 |
+
appStore.setInferenceLatency(inferenceMs);
|
| 445 |
+
|
| 446 |
+
// Update RTF
|
| 447 |
+
const audioDurationMs = window.durationSeconds * 1000;
|
| 448 |
+
appStore.setRtf(inferenceMs / audioDurationMs);
|
| 449 |
+
|
| 450 |
+
// Advance cursor if merger advanced it
|
| 451 |
+
if (result.matureCursorTime > windowBuilder.getMatureCursorTime()) {
|
| 452 |
+
appStore.setMatureCursorTime(result.matureCursorTime);
|
| 453 |
+
windowBuilder.advanceMatureCursorByTime(result.matureCursorTime);
|
| 454 |
+
windowBuilder.markSentenceEnd(Math.round(result.matureCursorTime * 16000));
|
| 455 |
+
}
|
| 456 |
+
|
| 457 |
+
// Update stats
|
| 458 |
+
appStore.setV4MergerStats({
|
| 459 |
+
sentencesFinalized: result.matureSentenceCount,
|
| 460 |
+
cursorUpdates: result.stats?.matureCursorUpdates || 0,
|
| 461 |
+
utterancesProcessed: result.stats?.utterancesProcessed || 0,
|
| 462 |
+
});
|
| 463 |
+
|
| 464 |
+
// Update buffer metrics
|
| 465 |
+
const ring = audioEngine.getRingBuffer();
|
| 466 |
+
appStore.setBufferMetrics({
|
| 467 |
+
fillRatio: ring.getFillCount() / ring.getSize(),
|
| 468 |
+
latencyMs: (ring.getFillCount() / 16000) * 1000,
|
| 469 |
+
});
|
| 470 |
+
|
| 471 |
+
// Update metrics
|
| 472 |
+
if (result.metrics) {
|
| 473 |
+
appStore.setSystemMetrics({
|
| 474 |
+
throughput: 0,
|
| 475 |
+
modelConfidence: 0,
|
| 476 |
+
});
|
| 477 |
+
}
|
| 478 |
+
} catch (err: any) {
|
| 479 |
+
console.error('[v4Tick] Inference error:', err);
|
| 480 |
+
} finally {
|
| 481 |
+
v4InferenceBusy = false;
|
| 482 |
+
}
|
| 483 |
+
};
|
| 484 |
+
|
| 485 |
+
// ---- Cleanup v4 pipeline resources ----
|
| 486 |
+
const cleanupV4Pipeline = () => {
|
| 487 |
+
v4TickRunning = false;
|
| 488 |
+
if (v4TickTimeout) {
|
| 489 |
+
clearTimeout(v4TickTimeout);
|
| 490 |
+
v4TickTimeout = undefined;
|
| 491 |
+
}
|
| 492 |
+
if (v4AudioChunkUnsubscribe) {
|
| 493 |
+
v4AudioChunkUnsubscribe();
|
| 494 |
+
v4AudioChunkUnsubscribe = null;
|
| 495 |
+
}
|
| 496 |
+
if (v4MelChunkUnsubscribe) {
|
| 497 |
+
v4MelChunkUnsubscribe();
|
| 498 |
+
v4MelChunkUnsubscribe = null;
|
| 499 |
+
}
|
| 500 |
+
hybridVAD = null;
|
| 501 |
+
if (tenVADClient) {
|
| 502 |
+
tenVADClient.dispose();
|
| 503 |
+
tenVADClient = null;
|
| 504 |
+
}
|
| 505 |
+
if (bufferClient) {
|
| 506 |
+
bufferClient.dispose();
|
| 507 |
+
bufferClient = null;
|
| 508 |
+
}
|
| 509 |
+
windowBuilder = null;
|
| 510 |
+
v4InferenceBusy = false;
|
| 511 |
+
v4LastInferenceTime = 0;
|
| 512 |
+
v4GlobalSampleOffset = 0;
|
| 513 |
+
};
|
| 514 |
+
|
| 515 |
+
const toggleRecording = async () => {
|
| 516 |
+
if (isRecording()) {
|
| 517 |
+
// Update UI immediately so the stop button always takes effect even if cleanup throws
|
| 518 |
+
visualizationUnsubscribe?.();
|
| 519 |
+
visualizationUnsubscribe = undefined;
|
| 520 |
+
appStore.stopRecording();
|
| 521 |
+
appStore.setAudioLevel(0);
|
| 522 |
+
appStore.setBarLevels(new Float32Array(0));
|
| 523 |
+
|
| 524 |
+
try {
|
| 525 |
+
audioEngine?.stop();
|
| 526 |
+
|
| 527 |
+
if (segmentUnsubscribe) segmentUnsubscribe();
|
| 528 |
+
if (windowUnsubscribe) windowUnsubscribe();
|
| 529 |
+
if (melChunkUnsubscribe) melChunkUnsubscribe();
|
| 530 |
+
cleanupV4Pipeline();
|
| 531 |
+
|
| 532 |
+
if (workerClient) {
|
| 533 |
+
const final = await workerClient.finalize();
|
| 534 |
+
let text = '';
|
| 535 |
+
if ('text' in final && typeof final.text === 'string') {
|
| 536 |
+
text = final.text;
|
| 537 |
+
} else if ('fullText' in final && typeof final.fullText === 'string') {
|
| 538 |
+
text = final.fullText;
|
| 539 |
+
}
|
| 540 |
+
appStore.setTranscript(text);
|
| 541 |
+
appStore.setPendingText('');
|
| 542 |
+
}
|
| 543 |
+
|
| 544 |
+
melClient?.reset();
|
| 545 |
+
audioEngine?.reset();
|
| 546 |
+
} catch (err) {
|
| 547 |
+
console.warn('[App] Error during stop recording cleanup:', err);
|
| 548 |
+
}
|
| 549 |
+
} else {
|
| 550 |
+
try {
|
| 551 |
+
if (!audioEngine) {
|
| 552 |
+
audioEngine = new AudioEngine({
|
| 553 |
+
sampleRate: 16000,
|
| 554 |
+
deviceId: appStore.selectedDeviceId(),
|
| 555 |
+
});
|
| 556 |
+
setAudioEngineSignal(audioEngine);
|
| 557 |
+
} else {
|
| 558 |
+
audioEngine.updateConfig({ deviceId: appStore.selectedDeviceId() });
|
| 559 |
+
audioEngine.reset();
|
| 560 |
+
}
|
| 561 |
+
|
| 562 |
+
const mode = appStore.transcriptionMode();
|
| 563 |
+
|
| 564 |
+
// v4 mode: Always start audio capture, mel preprocessing, and VAD
|
| 565 |
+
// Inference only runs when model is ready (checked in v4Tick)
|
| 566 |
+
if (mode === 'v4-utterance') {
|
| 567 |
+
// ---- v4: Utterance-based pipeline with BufferWorker + TEN-VAD ----
|
| 568 |
+
|
| 569 |
+
// Initialize merger in worker only if model is ready
|
| 570 |
+
if (isModelReady() && workerClient) {
|
| 571 |
+
await workerClient.initV4Service({ debug: false });
|
| 572 |
+
}
|
| 573 |
+
|
| 574 |
+
// Initialize mel worker (always needed for preprocessing)
|
| 575 |
+
if (!melClient) {
|
| 576 |
+
melClient = new MelWorkerClient();
|
| 577 |
+
setMelClientSignal(melClient);
|
| 578 |
+
}
|
| 579 |
+
try {
|
| 580 |
+
await melClient.init({ nMels: 128 });
|
| 581 |
+
} catch (e) {
|
| 582 |
+
melClient.dispose();
|
| 583 |
+
melClient = null;
|
| 584 |
+
setMelClientSignal(null);
|
| 585 |
+
}
|
| 586 |
+
|
| 587 |
+
// Initialize BufferWorker (centralized multi-layer data store)
|
| 588 |
+
bufferClient = new BufferWorkerClient();
|
| 589 |
+
const bufferConfig: BufferWorkerConfig = {
|
| 590 |
+
sampleRate: 16000,
|
| 591 |
+
layers: {
|
| 592 |
+
audio: { hopSamples: 1, entryDimension: 1, maxDurationSec: 120 },
|
| 593 |
+
mel: { hopSamples: 160, entryDimension: 128, maxDurationSec: 120 },
|
| 594 |
+
energyVad: { hopSamples: 1280, entryDimension: 1, maxDurationSec: 120 },
|
| 595 |
+
inferenceVad: { hopSamples: 256, entryDimension: 1, maxDurationSec: 120 },
|
| 596 |
+
},
|
| 597 |
+
};
|
| 598 |
+
await bufferClient.init(bufferConfig);
|
| 599 |
+
|
| 600 |
+
// Initialize TEN-VAD worker (inference-based VAD)
|
| 601 |
+
tenVADClient = new TenVADWorkerClient();
|
| 602 |
+
tenVADClient.onResult((result: TenVADResult) => {
|
| 603 |
+
if (!bufferClient) return;
|
| 604 |
+
// Batch-write hop probabilities to inferenceVad (single worker message)
|
| 605 |
+
if (result.hopCount > 0) {
|
| 606 |
+
const lastProb = result.probabilities[result.hopCount - 1];
|
| 607 |
+
if (bufferClient.writeBatchTransfer) {
|
| 608 |
+
bufferClient.writeBatchTransfer('inferenceVad', result.probabilities, result.globalSampleOffset);
|
| 609 |
+
} else {
|
| 610 |
+
bufferClient.writeBatch('inferenceVad', result.probabilities, result.globalSampleOffset);
|
| 611 |
+
}
|
| 612 |
+
|
| 613 |
+
// Update UI at most once per frame with the latest probability
|
| 614 |
+
scheduleSileroUpdate(lastProb);
|
| 615 |
+
}
|
| 616 |
+
});
|
| 617 |
+
// TEN-VAD init is non-blocking; falls back gracefully if WASM fails
|
| 618 |
+
const wasmPath = `${import.meta.env.BASE_URL}wasm/`;
|
| 619 |
+
tenVADClient.init({ hopSize: 256, threshold: 0.5, wasmPath }).catch((err) => {
|
| 620 |
+
console.warn('[v4] TEN-VAD init failed, using energy-only:', err);
|
| 621 |
+
});
|
| 622 |
+
|
| 623 |
+
// Initialize hybrid VAD for energy-based detection (always runs, fast)
|
| 624 |
+
hybridVAD = new HybridVAD({
|
| 625 |
+
sileroThreshold: 0.5,
|
| 626 |
+
onsetConfirmations: 2,
|
| 627 |
+
offsetConfirmations: 3,
|
| 628 |
+
sampleRate: 16000,
|
| 629 |
+
});
|
| 630 |
+
// Do NOT init Silero in HybridVAD (TEN-VAD replaces it)
|
| 631 |
+
|
| 632 |
+
// NOTE: WindowBuilder is created AFTER audioEngine.start() below,
|
| 633 |
+
// because start() may re-create the internal RingBuffer.
|
| 634 |
+
|
| 635 |
+
// Reset global sample counter
|
| 636 |
+
v4GlobalSampleOffset = 0;
|
| 637 |
+
|
| 638 |
+
// Feed audio chunks to mel worker from the main v4 audio handler below
|
| 639 |
+
v4MelChunkUnsubscribe = null;
|
| 640 |
+
|
| 641 |
+
// Process each audio chunk: energy VAD + write to BufferWorker + forward to TEN-VAD
|
| 642 |
+
v4AudioChunkUnsubscribe = audioEngine.onAudioChunk((chunk) => {
|
| 643 |
+
if (!hybridVAD || !bufferClient) return;
|
| 644 |
+
|
| 645 |
+
const chunkOffset = v4GlobalSampleOffset;
|
| 646 |
+
v4GlobalSampleOffset += chunk.length;
|
| 647 |
+
|
| 648 |
+
// 1. Run energy VAD (synchronous, fast) and write to BufferWorker
|
| 649 |
+
const vadResult = hybridVAD.processEnergyOnly(chunk);
|
| 650 |
+
const energyProb = vadResult.isSpeech ? 0.9 : 0.1;
|
| 651 |
+
bufferClient.writeScalar('energyVad', energyProb);
|
| 652 |
+
|
| 653 |
+
// 2. Forward audio to mel worker (copy, keep chunk for TEN-VAD transfer)
|
| 654 |
+
melClient?.pushAudioCopy(chunk);
|
| 655 |
+
|
| 656 |
+
// 3. Forward audio to TEN-VAD worker for inference-based VAD (transfer, no copy)
|
| 657 |
+
if (tenVADClient?.isReady()) {
|
| 658 |
+
tenVADClient.processTransfer(chunk, chunkOffset);
|
| 659 |
+
}
|
| 660 |
+
|
| 661 |
+
// 4. Update VAD state for UI
|
| 662 |
+
const sileroProbability = tenVADClient?.isReady()
|
| 663 |
+
? undefined
|
| 664 |
+
: (vadResult.sileroProbability || 0);
|
| 665 |
+
scheduleVadStateUpdate({
|
| 666 |
+
isSpeech: vadResult.isSpeech,
|
| 667 |
+
energy: vadResult.energy,
|
| 668 |
+
snr: vadResult.snr || 0,
|
| 669 |
+
hybridState: vadResult.state,
|
| 670 |
+
...(sileroProbability !== undefined ? { sileroProbability } : {}),
|
| 671 |
+
});
|
| 672 |
+
});
|
| 673 |
+
|
| 674 |
+
// Start adaptive inference tick loop (reads interval from appStore)
|
| 675 |
+
// Note: v4Tick internally checks if model is ready before running inference
|
| 676 |
+
v4TickRunning = true;
|
| 677 |
+
const scheduleNextTick = () => {
|
| 678 |
+
if (!v4TickRunning) return;
|
| 679 |
+
v4TickTimeout = window.setTimeout(async () => {
|
| 680 |
+
if (!v4TickRunning) return;
|
| 681 |
+
await v4Tick();
|
| 682 |
+
scheduleNextTick();
|
| 683 |
+
}, appStore.v4InferenceIntervalMs());
|
| 684 |
+
};
|
| 685 |
+
scheduleNextTick();
|
| 686 |
+
|
| 687 |
+
} else if (isModelReady() && workerClient) {
|
| 688 |
+
// v3 and v2 modes still require model to be ready
|
| 689 |
+
if (mode === 'v3-streaming') {
|
| 690 |
+
// ---- v3: Fixed-window token streaming (existing) ----
|
| 691 |
+
const windowDur = appStore.streamingWindow();
|
| 692 |
+
const triggerInt = appStore.triggerInterval();
|
| 693 |
+
const overlapDur = Math.max(1.0, windowDur - triggerInt);
|
| 694 |
+
|
| 695 |
+
await workerClient.initV3Service({
|
| 696 |
+
windowDuration: windowDur,
|
| 697 |
+
overlapDuration: overlapDur,
|
| 698 |
+
sampleRate: 16000,
|
| 699 |
+
frameStride: appStore.frameStride(),
|
| 700 |
+
});
|
| 701 |
+
|
| 702 |
+
if (!melClient) {
|
| 703 |
+
melClient = new MelWorkerClient();
|
| 704 |
+
setMelClientSignal(melClient);
|
| 705 |
+
}
|
| 706 |
+
try {
|
| 707 |
+
await melClient.init({ nMels: 128 });
|
| 708 |
+
} catch (e) {
|
| 709 |
+
melClient.dispose();
|
| 710 |
+
melClient = null;
|
| 711 |
+
setMelClientSignal(null);
|
| 712 |
+
}
|
| 713 |
+
|
| 714 |
+
melChunkUnsubscribe = audioEngine.onAudioChunk((chunk) => {
|
| 715 |
+
melClient?.pushAudioCopy(chunk);
|
| 716 |
+
});
|
| 717 |
+
|
| 718 |
+
windowUnsubscribe = audioEngine.onWindowChunk(
|
| 719 |
+
windowDur,
|
| 720 |
+
overlapDur,
|
| 721 |
+
triggerInt,
|
| 722 |
+
async (audio, startTime) => {
|
| 723 |
+
if (!workerClient) return;
|
| 724 |
+
const start = performance.now();
|
| 725 |
+
|
| 726 |
+
let result;
|
| 727 |
+
if (melClient) {
|
| 728 |
+
const startSample = Math.round(startTime * 16000);
|
| 729 |
+
const endSample = startSample + audio.length;
|
| 730 |
+
const melFeatures = await melClient.getFeatures(startSample, endSample);
|
| 731 |
+
|
| 732 |
+
if (melFeatures) {
|
| 733 |
+
result = await workerClient.processV3ChunkWithFeatures(
|
| 734 |
+
melFeatures.features,
|
| 735 |
+
melFeatures.T,
|
| 736 |
+
melFeatures.melBins,
|
| 737 |
+
startTime,
|
| 738 |
+
overlapDur,
|
| 739 |
+
);
|
| 740 |
+
} else {
|
| 741 |
+
result = await workerClient.processV3Chunk(audio, startTime);
|
| 742 |
+
}
|
| 743 |
+
} else {
|
| 744 |
+
result = await workerClient.processV3Chunk(audio, startTime);
|
| 745 |
+
}
|
| 746 |
+
|
| 747 |
+
const duration = performance.now() - start;
|
| 748 |
+
const stride = appStore.triggerInterval();
|
| 749 |
+
appStore.setRtf(duration / (stride * 1000));
|
| 750 |
+
appStore.setInferenceLatency(duration);
|
| 751 |
+
|
| 752 |
+
if (audioEngine) {
|
| 753 |
+
const ring = audioEngine.getRingBuffer();
|
| 754 |
+
appStore.setBufferMetrics({
|
| 755 |
+
fillRatio: ring.getFillCount() / ring.getSize(),
|
| 756 |
+
latencyMs: (ring.getFillCount() / 16000) * 1000,
|
| 757 |
+
});
|
| 758 |
+
}
|
| 759 |
+
|
| 760 |
+
appStore.setMergeInfo({
|
| 761 |
+
lcsLength: result.lcsLength,
|
| 762 |
+
anchorValid: result.anchorValid,
|
| 763 |
+
chunkCount: result.chunkCount,
|
| 764 |
+
anchorTokens: result.anchorTokens
|
| 765 |
+
});
|
| 766 |
+
}
|
| 767 |
+
);
|
| 768 |
+
} else {
|
| 769 |
+
// ---- v2: Per-utterance (existing) ----
|
| 770 |
+
await workerClient.initService({ sampleRate: 16000 });
|
| 771 |
+
segmentUnsubscribe = audioEngine.onSpeechSegment(async (segment) => {
|
| 772 |
+
if (workerClient) {
|
| 773 |
+
const start = Date.now();
|
| 774 |
+
const samples = audioEngine!.getRingBuffer().read(segment.startFrame, segment.endFrame);
|
| 775 |
+
const result = await workerClient.transcribeSegment(samples);
|
| 776 |
+
if (result.text) appStore.appendTranscript(result.text + ' ');
|
| 777 |
+
appStore.setInferenceLatency(Date.now() - start);
|
| 778 |
+
}
|
| 779 |
+
});
|
| 780 |
+
}
|
| 781 |
+
}
|
| 782 |
+
|
| 783 |
+
await audioEngine.start();
|
| 784 |
+
|
| 785 |
+
// Create WindowBuilder AFTER start() so we get the final RingBuffer reference
|
| 786 |
+
// (AudioEngine.init() re-creates the RingBuffer internally)
|
| 787 |
+
if (mode === 'v4-utterance') {
|
| 788 |
+
windowBuilder = new WindowBuilder(
|
| 789 |
+
audioEngine.getRingBuffer(),
|
| 790 |
+
null, // No VADRingBuffer; hasSpeech now goes through BufferWorker
|
| 791 |
+
{
|
| 792 |
+
sampleRate: 16000,
|
| 793 |
+
minDurationSec: 3.0,
|
| 794 |
+
maxDurationSec: 30.0,
|
| 795 |
+
minInitialDurationSec: 1.5,
|
| 796 |
+
useVadBoundaries: false, // VAD boundaries now managed by BufferWorker
|
| 797 |
+
vadSilenceThreshold: 0.3,
|
| 798 |
+
debug: true, // Enable debug logging for diagnostics
|
| 799 |
+
}
|
| 800 |
+
);
|
| 801 |
+
}
|
| 802 |
+
|
| 803 |
+
appStore.startRecording();
|
| 804 |
+
|
| 805 |
+
// Use same 30fps tick (onVisualizationUpdate throttled to 33ms).
|
| 806 |
+
// Bar levels from AnalyserNode (native FFT, low CPU) instead of mel worker.
|
| 807 |
+
visualizationUnsubscribe = audioEngine.onVisualizationUpdate((_data, metrics) => {
|
| 808 |
+
appStore.setAudioLevel(metrics.currentEnergy);
|
| 809 |
+
if (appStore.transcriptionMode() !== 'v4-utterance') {
|
| 810 |
+
appStore.setIsSpeechDetected(audioEngine?.isSpeechActive() ?? false);
|
| 811 |
+
}
|
| 812 |
+
appStore.setBarLevels(audioEngine!.getBarLevels());
|
| 813 |
+
});
|
| 814 |
+
} catch (err: any) {
|
| 815 |
+
appStore.setErrorMessage(err.message);
|
| 816 |
+
}
|
| 817 |
+
}
|
| 818 |
+
};
|
| 819 |
+
|
| 820 |
+
const loadSelectedModel = async () => {
|
| 821 |
+
if (!workerClient) return;
|
| 822 |
+
if (appStore.modelState() === 'ready') return;
|
| 823 |
+
if (appStore.modelState() === 'loading') return;
|
| 824 |
+
setShowContextPanel(true);
|
| 825 |
+
try {
|
| 826 |
+
await workerClient.initModel(appStore.selectedModelId());
|
| 827 |
+
} catch (e) {
|
| 828 |
+
console.error('Failed to load model:', e);
|
| 829 |
+
appStore.setModelState('error');
|
| 830 |
+
appStore.setErrorMessage(e instanceof Error ? e.message : String(e));
|
| 831 |
+
}
|
| 832 |
+
};
|
| 833 |
+
|
| 834 |
+
const openPanelForAudio = () => {
|
| 835 |
+
clearTimeout(panelHoverCloseTimeout);
|
| 836 |
+
setSettingsPanelSection('audio');
|
| 837 |
+
setShowContextPanel(true);
|
| 838 |
+
};
|
| 839 |
+
const openPanelForModel = () => {
|
| 840 |
+
clearTimeout(panelHoverCloseTimeout);
|
| 841 |
+
setSettingsPanelSection('model');
|
| 842 |
+
setShowContextPanel(true);
|
| 843 |
+
};
|
| 844 |
+
const schedulePanelCloseIfHover = () => {
|
| 845 |
+
panelHoverCloseTimeout = window.setTimeout(() => {
|
| 846 |
+
if (settingsPanelSection() !== 'full' && appStore.modelState() !== 'loading') {
|
| 847 |
+
setShowContextPanel(false);
|
| 848 |
+
}
|
| 849 |
+
}, 250);
|
| 850 |
+
};
|
| 851 |
+
const cancelPanelClose = () => clearTimeout(panelHoverCloseTimeout);
|
| 852 |
+
const panelMouseLeave = () => {
|
| 853 |
+
if (settingsPanelSection() !== 'full') schedulePanelCloseIfHover();
|
| 854 |
+
};
|
| 855 |
+
|
| 856 |
+
const handleLocalLoad = async (files: FileList) => {
|
| 857 |
+
if (!workerClient) return;
|
| 858 |
+
setShowContextPanel(true);
|
| 859 |
+
try {
|
| 860 |
+
await workerClient.initLocalModel(files);
|
| 861 |
+
} catch (e) {
|
| 862 |
+
console.error('Failed to load local model:', e);
|
| 863 |
+
}
|
| 864 |
+
};
|
| 865 |
+
|
| 866 |
+
return (
|
| 867 |
+
<div class="h-screen flex flex-col overflow-hidden bg-[var(--color-earthy-bg)] selection:bg-[var(--color-earthy-coral)] selection:text-white">
|
| 868 |
+
<ModelLoadingOverlay
|
| 869 |
+
isVisible={showModelOverlay()}
|
| 870 |
+
state={appStore.modelState()}
|
| 871 |
+
progress={appStore.modelProgress()}
|
| 872 |
+
message={appStore.modelMessage()}
|
| 873 |
+
file={appStore.modelFile()}
|
| 874 |
+
backend={appStore.backend()}
|
| 875 |
+
selectedModelId={appStore.selectedModelId()}
|
| 876 |
+
onModelSelect={(id: string) => appStore.setSelectedModelId(id)}
|
| 877 |
+
onStart={() => loadSelectedModel()}
|
| 878 |
+
onLocalLoad={handleLocalLoad}
|
| 879 |
+
onClose={() => setShowModelOverlay(false)}
|
| 880 |
+
/>
|
| 881 |
+
|
| 882 |
+
<Header
|
| 883 |
+
onToggleDebug={() => appStore.setShowDebugPanel(!appStore.showDebugPanel())}
|
| 884 |
+
/>
|
| 885 |
+
|
| 886 |
+
<div class="flex-1 flex overflow-hidden relative">
|
| 887 |
+
<main class="flex-1 overflow-y-auto custom-scrollbar px-4 sm:px-6 lg:px-10 xl:px-14 2xl:px-20 flex flex-col items-center">
|
| 888 |
+
<div class="w-full max-w-[1680px] py-8 md:py-10 lg:py-12">
|
| 889 |
+
<TranscriptionDisplay
|
| 890 |
+
confirmedText={appStore.transcriptionMode() === 'v4-utterance' ? appStore.matureText() : appStore.transcript()}
|
| 891 |
+
pendingText={appStore.transcriptionMode() === 'v4-utterance' ? appStore.immatureText() : appStore.pendingText()}
|
| 892 |
+
sentenceEntries={appStore.v4SentenceEntries()}
|
| 893 |
+
isV4Mode={appStore.transcriptionMode() === 'v4-utterance'}
|
| 894 |
+
isRecording={isRecording()}
|
| 895 |
+
lcsLength={appStore.mergeInfo().lcsLength}
|
| 896 |
+
anchorValid={appStore.mergeInfo().anchorValid}
|
| 897 |
+
showConfidence={appStore.transcriptionMode() === 'v3-streaming'}
|
| 898 |
+
class="min-h-[56vh]"
|
| 899 |
+
/>
|
| 900 |
+
</div>
|
| 901 |
+
</main>
|
| 902 |
+
</div>
|
| 903 |
+
|
| 904 |
+
{/* Draggable floating control widget */}
|
| 905 |
+
<div
|
| 906 |
+
class={widgetPos() !== null ? 'fixed z-30 w-full max-w-2xl px-6 select-none' : 'absolute bottom-8 left-1/2 -translate-x-1/2 z-30 w-full max-w-2xl px-6'}
|
| 907 |
+
style={widgetPos() ? { left: `${widgetPos()!.x}px`, top: `${widgetPos()!.y}px` } : {}}
|
| 908 |
+
>
|
| 909 |
+
<div class="relative">
|
| 910 |
+
{/* Settings panel: expands up or down depending on bar position vs half screen height */}
|
| 911 |
+
<div
|
| 912 |
+
class="absolute left-0 right-0 overflow-hidden transition-[max-height] duration-300 ease-out border border-[var(--color-earthy-sage)]/30 bg-[var(--color-earthy-bg)]/95 backdrop-blur-sm shadow-lg"
|
| 913 |
+
classList={{
|
| 914 |
+
'max-h-0': !showContextPanel(),
|
| 915 |
+
'max-h-[70vh]': showContextPanel(),
|
| 916 |
+
'bottom-full rounded-t-2xl border-b-0': settingsExpandUp(),
|
| 917 |
+
'top-full rounded-b-2xl border-t-0': !settingsExpandUp(),
|
| 918 |
+
}}
|
| 919 |
+
onMouseEnter={cancelPanelClose}
|
| 920 |
+
onMouseLeave={panelMouseLeave}
|
| 921 |
+
>
|
| 922 |
+
<div class="max-h-[70vh] min-h-0 flex flex-col overflow-y-auto custom-scrollbar">
|
| 923 |
+
<SettingsContent
|
| 924 |
+
section={settingsPanelSection()}
|
| 925 |
+
onClose={() => setShowContextPanel(false)}
|
| 926 |
+
onLoadModel={() => loadSelectedModel()}
|
| 927 |
+
onLocalLoad={handleLocalLoad}
|
| 928 |
+
onOpenDebug={() => appStore.setShowDebugPanel(true)}
|
| 929 |
+
onDeviceSelect={(id) => {
|
| 930 |
+
if (audioEngine) audioEngine.updateConfig({ deviceId: id });
|
| 931 |
+
}}
|
| 932 |
+
audioEngine={audioEngineSignal() ?? undefined}
|
| 933 |
+
expandUp={settingsExpandUp}
|
| 934 |
+
/>
|
| 935 |
+
</div>
|
| 936 |
+
</div>
|
| 937 |
+
|
| 938 |
+
{/* Control bar: steady, fixed position; never moves when settings open */}
|
| 939 |
+
<div
|
| 940 |
+
class="bg-white/90 backdrop-blur-md shadow-lg border border-[var(--color-earthy-sage)]/30 rounded-2xl overflow-hidden"
|
| 941 |
+
onMouseDown={handleWidgetDragStart}
|
| 942 |
+
role="presentation"
|
| 943 |
+
>
|
| 944 |
+
<div class="p-4 flex items-center justify-between gap-6 cursor-grab active:cursor-grabbing">
|
| 945 |
+
<div class="flex items-center gap-2 flex-shrink-0">
|
| 946 |
+
<span class="material-symbols-outlined text-[var(--color-earthy-soft-brown)] text-lg opacity-60" aria-hidden="true">drag_indicator</span>
|
| 947 |
+
<div class="flex flex-col min-w-[60px]">
|
| 948 |
+
<span class="text-[10px] uppercase tracking-wider text-[var(--color-earthy-soft-brown)] font-bold">Rec</span>
|
| 949 |
+
<span class="font-mono text-sm text-[var(--color-earthy-dark-brown)]">{formatDuration(appStore.sessionDuration())}</span>
|
| 950 |
+
</div>
|
| 951 |
+
</div>
|
| 952 |
+
<div class="flex-1 min-w-0 flex flex-col justify-center gap-1">
|
| 953 |
+
<div class="h-8 flex items-center justify-center gap-1 overflow-hidden opacity-80 abstract-wave">
|
| 954 |
+
<CompactWaveform audioLevel={appStore.audioLevel()} barLevels={appStore.barLevels()} isRecording={isRecording()} />
|
| 955 |
+
</div>
|
| 956 |
+
<Show when={appStore.modelState() === 'loading'}>
|
| 957 |
+
<div class="flex items-center gap-2 px-1">
|
| 958 |
+
<div class="flex-1 h-1.5 rounded-full overflow-hidden bg-[var(--color-earthy-sage)]/20">
|
| 959 |
+
<div
|
| 960 |
+
class="h-full bg-[var(--color-earthy-muted-green)] rounded-full transition-all duration-300"
|
| 961 |
+
style={{ width: `${Math.max(0, Math.min(100, appStore.modelProgress()))}%` }}
|
| 962 |
+
/>
|
| 963 |
+
</div>
|
| 964 |
+
<span class="text-[10px] font-mono text-[var(--color-earthy-soft-brown)] tabular-nums">{Math.round(appStore.modelProgress())}%</span>
|
| 965 |
+
</div>
|
| 966 |
+
</Show>
|
| 967 |
+
</div>
|
| 968 |
+
<div class="flex items-center gap-2 flex-shrink-0">
|
| 969 |
+
<button
|
| 970 |
+
type="button"
|
| 971 |
+
onClick={toggleRecording}
|
| 972 |
+
onMouseEnter={openPanelForAudio}
|
| 973 |
+
onMouseLeave={schedulePanelCloseIfHover}
|
| 974 |
+
class={`w-10 h-10 rounded-full flex items-center justify-center transition-colors border ${isRecording() ? 'bg-[var(--color-earthy-coral)] text-white border-[var(--color-earthy-coral)]' : 'text-[var(--color-earthy-dark-brown)] hover:bg-[var(--color-earthy-bg)] border-transparent hover:border-[var(--color-earthy-sage)]/30'}`}
|
| 975 |
+
title={isRecording() ? 'Stop recording' : 'Start recording'}
|
| 976 |
+
>
|
| 977 |
+
<span class="material-symbols-outlined">mic</span>
|
| 978 |
+
</button>
|
| 979 |
+
<button
|
| 980 |
+
type="button"
|
| 981 |
+
onClick={() => loadSelectedModel()}
|
| 982 |
+
onMouseEnter={openPanelForModel}
|
| 983 |
+
onMouseLeave={schedulePanelCloseIfHover}
|
| 984 |
+
disabled={appStore.modelState() === 'loading' || appStore.modelState() === 'ready'}
|
| 985 |
+
class="w-10 h-10 rounded-full flex items-center justify-center text-[var(--color-earthy-dark-brown)] hover:bg-[var(--color-earthy-bg)] transition-colors border border-transparent hover:border-[var(--color-earthy-sage)]/30 disabled:opacity-40 disabled:cursor-not-allowed relative"
|
| 986 |
+
title={appStore.modelState() === 'ready' ? 'Model loaded' : appStore.modelState() === 'loading' ? 'Loading...' : 'Load model'}
|
| 987 |
+
>
|
| 988 |
+
<Show when={appStore.modelState() === 'loading'} fallback={<span class="material-symbols-outlined">power_settings_new</span>}>
|
| 989 |
+
<span class="material-symbols-outlined load-btn-spin">progress_activity</span>
|
| 990 |
+
</Show>
|
| 991 |
+
</button>
|
| 992 |
+
<button
|
| 993 |
+
type="button"
|
| 994 |
+
onClick={() => { setSettingsPanelSection('full'); setShowContextPanel((v) => !v); }}
|
| 995 |
+
class={`w-10 h-10 rounded-full flex items-center justify-center transition-colors border ${showContextPanel() ? 'bg-[var(--color-earthy-sage)]/30 text-[var(--color-earthy-muted-green)] border-[var(--color-earthy-sage)]/50' : 'text-[var(--color-earthy-dark-brown)] hover:bg-[var(--color-earthy-bg)] border-transparent hover:border-[var(--color-earthy-sage)]/30'}`}
|
| 996 |
+
title="Settings"
|
| 997 |
+
>
|
| 998 |
+
<span class="material-symbols-outlined">tune</span>
|
| 999 |
+
</button>
|
| 1000 |
+
<button
|
| 1001 |
+
type="button"
|
| 1002 |
+
onClick={() => isRecording() && toggleRecording()}
|
| 1003 |
+
disabled={!isRecording()}
|
| 1004 |
+
class="w-10 h-10 rounded-full flex items-center justify-center text-[var(--color-earthy-dark-brown)] hover:bg-[var(--color-earthy-bg)] transition-colors border border-transparent hover:border-[var(--color-earthy-sage)]/30 disabled:opacity-40 disabled:cursor-not-allowed"
|
| 1005 |
+
title="Pause"
|
| 1006 |
+
>
|
| 1007 |
+
<span class="material-symbols-outlined">pause</span>
|
| 1008 |
+
</button>
|
| 1009 |
+
<button
|
| 1010 |
+
type="button"
|
| 1011 |
+
onClick={() => appStore.copyTranscript()}
|
| 1012 |
+
class="w-10 h-10 rounded-full flex items-center justify-center text-[var(--color-earthy-dark-brown)] hover:bg-[var(--color-earthy-bg)] transition-colors border border-transparent hover:border-[var(--color-earthy-sage)]/30"
|
| 1013 |
+
title="Copy transcript"
|
| 1014 |
+
>
|
| 1015 |
+
<span class="material-symbols-outlined">content_copy</span>
|
| 1016 |
+
</button>
|
| 1017 |
+
</div>
|
| 1018 |
+
</div>
|
| 1019 |
+
</div>
|
| 1020 |
+
</div>
|
| 1021 |
+
</div>
|
| 1022 |
+
|
| 1023 |
+
{/* Foldable debug panel (bottom drawer) */}
|
| 1024 |
+
<Show when={appStore.showDebugPanel()}>
|
| 1025 |
+
<div class="absolute bottom-0 left-0 right-0 z-20 flex flex-col bg-[var(--color-earthy-bg)] border-t border-[var(--color-earthy-sage)]/30 shadow-[0_-4px_20px_rgba(0,0,0,0.08)] max-h-[70vh] overflow-hidden transition-all">
|
| 1026 |
+
<DebugPanel
|
| 1027 |
+
audioEngine={audioEngineSignal() ?? undefined}
|
| 1028 |
+
melClient={melClientSignal() ?? undefined}
|
| 1029 |
+
/>
|
| 1030 |
+
</div>
|
| 1031 |
+
</Show>
|
| 1032 |
+
</div>
|
| 1033 |
+
);
|
| 1034 |
+
};
|
| 1035 |
+
|
| 1036 |
+
export default App;
|
| 1037 |
+
|
src/assets/css/material-icons.css
ADDED
|
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
@font-face {
|
| 2 |
+
font-family: 'Material Icons';
|
| 3 |
+
font-style: normal;
|
| 4 |
+
font-weight: 400;
|
| 5 |
+
src: local('Material Icons'),
|
| 6 |
+
local('MaterialIcons-Regular'),
|
| 7 |
+
url('/fonts/material-icons/material-icons.woff2') format('woff2'),
|
| 8 |
+
url('/fonts/material-icons/material-icons.woff') format('woff');
|
| 9 |
+
}
|
| 10 |
+
|
| 11 |
+
.material-icons {
|
| 12 |
+
font-family: 'Material Icons';
|
| 13 |
+
font-weight: normal;
|
| 14 |
+
font-style: normal;
|
| 15 |
+
font-size: 24px; /* Preferred icon size */
|
| 16 |
+
display: inline-block;
|
| 17 |
+
line-height: 1;
|
| 18 |
+
text-transform: none;
|
| 19 |
+
letter-spacing: normal;
|
| 20 |
+
word-wrap: normal;
|
| 21 |
+
white-space: nowrap;
|
| 22 |
+
direction: ltr;
|
| 23 |
+
|
| 24 |
+
/* Support for all WebKit browsers. */
|
| 25 |
+
-webkit-font-smoothing: antialiased;
|
| 26 |
+
/* Support for Safari and Chrome. */
|
| 27 |
+
text-rendering: optimizeLegibility;
|
| 28 |
+
|
| 29 |
+
/* Support for Firefox. */
|
| 30 |
+
-moz-osx-font-smoothing: grayscale;
|
| 31 |
+
|
| 32 |
+
/* Support for IE. */
|
| 33 |
+
font-feature-settings: 'liga';
|
| 34 |
+
}
|
src/components/BufferVisualizer.tsx
ADDED
|
@@ -0,0 +1,511 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/**
|
| 2 |
+
"""""""""" * Keet - Buffer Visualizer Component
|
| 3 |
+
* Canvas-based real-time audio waveform visualization.
|
| 4 |
+
* Ported from legacy UI project (Svelte) to SolidJS.
|
| 5 |
+
*/
|
| 6 |
+
|
| 7 |
+
import { Component, createSignal, onMount, onCleanup, createEffect } from 'solid-js';
|
| 8 |
+
import type { AudioEngine, AudioMetrics } from '../lib/audio';
|
| 9 |
+
|
| 10 |
+
interface BufferVisualizerProps {
|
| 11 |
+
/** AudioEngine instance for subscribing to visualization updates */
|
| 12 |
+
audioEngine?: AudioEngine;
|
| 13 |
+
/** Height of the canvas in pixels (default: 80) */
|
| 14 |
+
height?: number;
|
| 15 |
+
/** Whether to show SNR threshold line (default: true) */
|
| 16 |
+
showThreshold?: boolean;
|
| 17 |
+
/** SNR threshold in dB for visualization (default: 6.0) */
|
| 18 |
+
snrThreshold?: number;
|
| 19 |
+
/** Whether to show time markers (default: true) */
|
| 20 |
+
showTimeMarkers?: boolean;
|
| 21 |
+
/** Whether the visualizer is visible (optimization - reduces frame rate when hidden) */
|
| 22 |
+
visible?: boolean;
|
| 23 |
+
}
|
| 24 |
+
|
| 25 |
+
export const BufferVisualizer: Component<BufferVisualizerProps> = (props) => {
|
| 26 |
+
// Canvas element ref
|
| 27 |
+
let canvasRef: HTMLCanvasElement | undefined;
|
| 28 |
+
let ctx: CanvasRenderingContext2D | null = null;
|
| 29 |
+
let parentRef: HTMLDivElement | undefined;
|
| 30 |
+
|
| 31 |
+
// State
|
| 32 |
+
const [isDarkSignal, setIsDarkSignal] = createSignal(false);
|
| 33 |
+
const [canvasWidth, setCanvasWidth] = createSignal(0);
|
| 34 |
+
const [waveformData, setWaveformData] = createSignal<Float32Array>(new Float32Array(0));
|
| 35 |
+
const [metrics, setMetrics] = createSignal<AudioMetrics>({
|
| 36 |
+
currentEnergy: 0,
|
| 37 |
+
averageEnergy: 0,
|
| 38 |
+
peakEnergy: 0,
|
| 39 |
+
noiseFloor: 0.01,
|
| 40 |
+
currentSNR: 0,
|
| 41 |
+
isSpeaking: false,
|
| 42 |
+
});
|
| 43 |
+
const [segments, setSegments] = createSignal<Array<{ startTime: number; endTime: number; isProcessed: boolean }>>([]);
|
| 44 |
+
// Track the end time of the current waveform snapshot for strict synchronization
|
| 45 |
+
const [bufferEndTime, setBufferEndTime] = createSignal(0);
|
| 46 |
+
|
| 47 |
+
const height = () => props.height ?? 80;
|
| 48 |
+
const showThreshold = () => props.showThreshold ?? true;
|
| 49 |
+
const snrThreshold = () => props.snrThreshold ?? 6.0;
|
| 50 |
+
const showTimeMarkers = () => props.showTimeMarkers ?? true;
|
| 51 |
+
const visible = () => props.visible ?? true;
|
| 52 |
+
|
| 53 |
+
let animationFrameId: number | undefined;
|
| 54 |
+
let resizeObserver: ResizeObserver | null = null;
|
| 55 |
+
let needsRedraw = true;
|
| 56 |
+
let lastDrawTime = 0;
|
| 57 |
+
const DRAW_INTERVAL_MS = 33;
|
| 58 |
+
|
| 59 |
+
// Draw function
|
| 60 |
+
const draw = () => {
|
| 61 |
+
if (!ctx || !canvasRef) return;
|
| 62 |
+
|
| 63 |
+
const width = canvasRef.width;
|
| 64 |
+
const canvasHeight = canvasRef.height;
|
| 65 |
+
const centerY = canvasHeight / 2;
|
| 66 |
+
const data = waveformData();
|
| 67 |
+
const currentMetrics = metrics();
|
| 68 |
+
|
| 69 |
+
// Clear canvas
|
| 70 |
+
ctx.clearRect(0, 0, width, canvasHeight);
|
| 71 |
+
|
| 72 |
+
// Optimized theme detection (using signal instead of DOM access)
|
| 73 |
+
const isDarkMode = isDarkSignal();
|
| 74 |
+
|
| 75 |
+
// Colors (Mechanical Etched Palette) - Cached values
|
| 76 |
+
const bgColor = isDarkMode ? '#1e293b' : '#f1f5f9';
|
| 77 |
+
const highlightColor = isDarkMode ? 'rgba(255, 255, 255, 0.05)' : 'rgba(255, 255, 255, 0.8)';
|
| 78 |
+
const shadowColor = isDarkMode ? 'rgba(0, 0, 0, 0.4)' : 'rgba(0, 0, 0, 0.1)';
|
| 79 |
+
const etchColor = isDarkMode ? '#334155' : '#cbd5e1';
|
| 80 |
+
const signalActiveColor = '#3b82f6';
|
| 81 |
+
|
| 82 |
+
// Background
|
| 83 |
+
if (ctx) {
|
| 84 |
+
ctx.fillStyle = bgColor;
|
| 85 |
+
ctx.fillRect(0, 0, width, canvasHeight);
|
| 86 |
+
|
| 87 |
+
// Baseline (Etched indent)
|
| 88 |
+
ctx.beginPath();
|
| 89 |
+
ctx.strokeStyle = shadowColor;
|
| 90 |
+
ctx.lineWidth = 0.5;
|
| 91 |
+
ctx.moveTo(0, centerY);
|
| 92 |
+
ctx.lineTo(width, centerY);
|
| 93 |
+
ctx.stroke();
|
| 94 |
+
|
| 95 |
+
// Draw time markers at the top
|
| 96 |
+
if (showTimeMarkers() && props.audioEngine) {
|
| 97 |
+
// Use the new textColor and tickColor based on the etched palette
|
| 98 |
+
const textColor = isDarkMode ? '#94a3b8' : '#94a3b8';
|
| 99 |
+
const tickColor = isDarkMode ? 'rgba(255, 255, 255, 0.05)' : 'rgba(0, 0, 0, 0.05)';
|
| 100 |
+
drawTimeMarkers(width, canvasHeight, textColor, tickColor);
|
| 101 |
+
}
|
| 102 |
+
|
| 103 |
+
// Draw segment boundaries (before waveform so they appear behind)
|
| 104 |
+
if (props.audioEngine) {
|
| 105 |
+
drawSegments(width, canvasHeight, isDarkMode);
|
| 106 |
+
}
|
| 107 |
+
|
| 108 |
+
// Draw waveform using legacy UI project logic (Etched Mercury Style)
|
| 109 |
+
if (data.length >= 2) {
|
| 110 |
+
// Data is already subsampled to ~400 points (min, max pairs)
|
| 111 |
+
const numPoints = data.length / 2;
|
| 112 |
+
const step = width / numPoints; // Use simple step as points ~ width/2
|
| 113 |
+
|
| 114 |
+
// Helper to draw the full waveform path
|
| 115 |
+
// Optimized Waveform Path (Consolidated passes)
|
| 116 |
+
ctx.lineCap = 'round';
|
| 117 |
+
|
| 118 |
+
// Helper to draw the full waveform path
|
| 119 |
+
const drawPath = (offsetX: number, offsetY: number) => {
|
| 120 |
+
if (!ctx) return;
|
| 121 |
+
ctx.beginPath();
|
| 122 |
+
for (let i = 0; i < numPoints; i++) {
|
| 123 |
+
const x = i * step + offsetX;
|
| 124 |
+
// Ensure min/max have at least 1px difference for visibility even when silent
|
| 125 |
+
let minVal = data[i * 2];
|
| 126 |
+
let maxVal = data[i * 2 + 1];
|
| 127 |
+
|
| 128 |
+
// Scaled values
|
| 129 |
+
let yMin = centerY - (minVal * centerY * 0.9) + offsetY;
|
| 130 |
+
let yMax = centerY - (maxVal * centerY * 0.9) + offsetY;
|
| 131 |
+
|
| 132 |
+
// Ensure tiny signals are visible (min 1px height)
|
| 133 |
+
if (Math.abs(yMax - yMin) < 1) {
|
| 134 |
+
yMin = centerY - 0.5 + offsetY;
|
| 135 |
+
yMax = centerY + 0.5 + offsetY;
|
| 136 |
+
}
|
| 137 |
+
|
| 138 |
+
ctx.moveTo(x, yMin);
|
| 139 |
+
ctx.lineTo(x, yMax);
|
| 140 |
+
}
|
| 141 |
+
ctx.stroke();
|
| 142 |
+
};
|
| 143 |
+
|
| 144 |
+
// 1. Highlight Pass (Sharp top-left edge)
|
| 145 |
+
ctx.strokeStyle = highlightColor;
|
| 146 |
+
ctx.lineWidth = 1.0;
|
| 147 |
+
drawPath(-0.5, -0.5);
|
| 148 |
+
|
| 149 |
+
// 2. Shadow Pass (Depressed groove)
|
| 150 |
+
ctx.strokeStyle = shadowColor;
|
| 151 |
+
ctx.lineWidth = 1.2;
|
| 152 |
+
drawPath(0.5, 0.5);
|
| 153 |
+
|
| 154 |
+
// 3. Main Etch Pass (Base material) - Slate color for contrast
|
| 155 |
+
ctx.strokeStyle = etchColor;
|
| 156 |
+
ctx.lineWidth = 1.0;
|
| 157 |
+
drawPath(0, 0);
|
| 158 |
+
|
| 159 |
+
// 4. Active signal glow
|
| 160 |
+
if (currentMetrics.isSpeaking) {
|
| 161 |
+
ctx.globalAlpha = 0.5;
|
| 162 |
+
ctx.shadowBlur = 4;
|
| 163 |
+
ctx.shadowColor = signalActiveColor;
|
| 164 |
+
ctx.strokeStyle = signalActiveColor;
|
| 165 |
+
ctx.lineWidth = 1.0;
|
| 166 |
+
drawPath(0, 0);
|
| 167 |
+
ctx.shadowBlur = 0;
|
| 168 |
+
ctx.globalAlpha = 1.0;
|
| 169 |
+
}
|
| 170 |
+
}
|
| 171 |
+
|
| 172 |
+
// Draw adaptive threshold (Etched dashes)
|
| 173 |
+
if (showThreshold() && currentMetrics.noiseFloor > 0) {
|
| 174 |
+
const snrRatio = Math.pow(10, snrThreshold() / 10);
|
| 175 |
+
const adaptiveThreshold = currentMetrics.noiseFloor * snrRatio;
|
| 176 |
+
|
| 177 |
+
const drawThresholdLine = (offsetY: number, color: string) => {
|
| 178 |
+
if (!ctx) return;
|
| 179 |
+
ctx.beginPath();
|
| 180 |
+
ctx.strokeStyle = color;
|
| 181 |
+
ctx.lineWidth = 1;
|
| 182 |
+
ctx.setLineDash([2, 4]);
|
| 183 |
+
const adaptiveYPos = centerY - adaptiveThreshold * centerY + offsetY;
|
| 184 |
+
ctx.moveTo(0, adaptiveYPos); ctx.lineTo(width, adaptiveYPos);
|
| 185 |
+
const adaptiveYNeg = centerY + adaptiveThreshold * centerY + offsetY;
|
| 186 |
+
ctx.moveTo(0, adaptiveYNeg); ctx.lineTo(width, adaptiveYNeg);
|
| 187 |
+
ctx.stroke();
|
| 188 |
+
};
|
| 189 |
+
|
| 190 |
+
drawThresholdLine(1, highlightColor);
|
| 191 |
+
drawThresholdLine(0, shadowColor);
|
| 192 |
+
ctx.setLineDash([]);
|
| 193 |
+
|
| 194 |
+
// Label (Etched text)
|
| 195 |
+
ctx.fillStyle = isDarkMode ? 'rgba(255, 255, 255, 0.15)' : 'rgba(0, 0, 0, 0.2)';
|
| 196 |
+
ctx.font = '900 9px "JetBrains Mono", monospace';
|
| 197 |
+
const labelY = centerY - adaptiveThreshold * centerY - 8;
|
| 198 |
+
ctx.fillText(`THRSH: ${snrThreshold().toFixed(1)}dB`, 10, labelY);
|
| 199 |
+
}
|
| 200 |
+
|
| 201 |
+
// Draw noise floor level (retained original style for clarity)
|
| 202 |
+
if (currentMetrics.noiseFloor > 0) {
|
| 203 |
+
const nfColor = isDarkMode ? 'rgba(74, 222, 128, 0.1)' : 'rgba(34, 197, 94, 0.1)';
|
| 204 |
+
const noiseFloorY = centerY - currentMetrics.noiseFloor * centerY;
|
| 205 |
+
const noiseFloorYNeg = centerY + currentMetrics.noiseFloor * centerY;
|
| 206 |
+
|
| 207 |
+
ctx.beginPath();
|
| 208 |
+
ctx.strokeStyle = nfColor;
|
| 209 |
+
ctx.lineWidth = 1;
|
| 210 |
+
ctx.moveTo(0, noiseFloorY);
|
| 211 |
+
ctx.lineTo(width, noiseFloorY);
|
| 212 |
+
ctx.moveTo(0, noiseFloorYNeg);
|
| 213 |
+
ctx.lineTo(width, noiseFloorYNeg);
|
| 214 |
+
ctx.stroke();
|
| 215 |
+
}
|
| 216 |
+
|
| 217 |
+
// Draw speaking indicator (Neumorphic dot)
|
| 218 |
+
if (currentMetrics.isSpeaking) {
|
| 219 |
+
const speakingColor = '#22c55e';
|
| 220 |
+
const indicatorX = width - 60;
|
| 221 |
+
const indicatorY = 25;
|
| 222 |
+
const radius = 6;
|
| 223 |
+
|
| 224 |
+
// Glow effect
|
| 225 |
+
ctx.shadowBlur = 10;
|
| 226 |
+
ctx.shadowColor = speakingColor;
|
| 227 |
+
|
| 228 |
+
ctx.beginPath();
|
| 229 |
+
ctx.arc(indicatorX, indicatorY, radius, 0, Math.PI * 2);
|
| 230 |
+
ctx.fillStyle = speakingColor;
|
| 231 |
+
ctx.fill();
|
| 232 |
+
|
| 233 |
+
ctx.shadowBlur = 0;
|
| 234 |
+
|
| 235 |
+
// Pulse ring
|
| 236 |
+
const time = performance.now() / 1000;
|
| 237 |
+
const rippleRadius = radius + (time % 1) * 10;
|
| 238 |
+
const rippleOpacity = 1 - (time % 1);
|
| 239 |
+
|
| 240 |
+
ctx.beginPath();
|
| 241 |
+
ctx.arc(indicatorX, indicatorY, rippleRadius, 0, Math.PI * 2);
|
| 242 |
+
ctx.strokeStyle = `rgba(34, 197, 94, ${rippleOpacity})`;
|
| 243 |
+
ctx.lineWidth = 1.5;
|
| 244 |
+
ctx.stroke();
|
| 245 |
+
}
|
| 246 |
+
|
| 247 |
+
// SNR meter on the right side - Etched mechanical gauge
|
| 248 |
+
if (currentMetrics.currentSNR > 0) {
|
| 249 |
+
const meterPadding = 15;
|
| 250 |
+
const meterWidth = 6;
|
| 251 |
+
const meterX = width - 20;
|
| 252 |
+
const meterHeight = canvasHeight - (meterPadding * 2);
|
| 253 |
+
|
| 254 |
+
// Meter Housing (Inset)
|
| 255 |
+
ctx.fillStyle = shadowColor;
|
| 256 |
+
ctx.beginPath();
|
| 257 |
+
ctx.roundRect(meterX, meterPadding, meterWidth, meterHeight, 3);
|
| 258 |
+
ctx.fill();
|
| 259 |
+
|
| 260 |
+
ctx.strokeStyle = highlightColor;
|
| 261 |
+
ctx.lineWidth = 1;
|
| 262 |
+
ctx.stroke();
|
| 263 |
+
|
| 264 |
+
// Gauge Level
|
| 265 |
+
const maxSNR = 60;
|
| 266 |
+
const cappedSNR = Math.min(maxSNR, currentMetrics.currentSNR);
|
| 267 |
+
const fillHeight = (cappedSNR / maxSNR) * meterHeight;
|
| 268 |
+
const fillY = (meterPadding + meterHeight) - fillHeight;
|
| 269 |
+
|
| 270 |
+
// Glow for the active portion
|
| 271 |
+
ctx.shadowBlur = 8;
|
| 272 |
+
ctx.shadowColor = currentMetrics.currentSNR >= snrThreshold() ? 'rgba(34, 197, 94, 0.4)' : 'rgba(96, 165, 250, 0.4)';
|
| 273 |
+
|
| 274 |
+
ctx.fillStyle = currentMetrics.currentSNR >= snrThreshold() ? '#22c55e' : signalActiveColor;
|
| 275 |
+
ctx.beginPath();
|
| 276 |
+
ctx.roundRect(meterX, fillY, meterWidth, fillHeight, 3);
|
| 277 |
+
ctx.fill();
|
| 278 |
+
|
| 279 |
+
ctx.shadowBlur = 0;
|
| 280 |
+
|
| 281 |
+
// Threshold marker notched in
|
| 282 |
+
const thresholdMarkerY = (meterPadding + meterHeight) - (Math.min(maxSNR, snrThreshold()) / maxSNR * meterHeight);
|
| 283 |
+
ctx.beginPath();
|
| 284 |
+
ctx.strokeStyle = '#ef4444';
|
| 285 |
+
ctx.lineWidth = 2;
|
| 286 |
+
ctx.moveTo(meterX - 4, thresholdMarkerY);
|
| 287 |
+
ctx.lineTo(meterX + meterWidth + 4, thresholdMarkerY);
|
| 288 |
+
ctx.stroke();
|
| 289 |
+
|
| 290 |
+
// Digital Readout
|
| 291 |
+
ctx.fillStyle = isDarkMode ? '#f8fafc' : '#1e293b';
|
| 292 |
+
ctx.font = '900 10px "JetBrains Mono", monospace';
|
| 293 |
+
ctx.textAlign = 'right';
|
| 294 |
+
ctx.fillText(`${currentMetrics.currentSNR.toFixed(0)}`, meterX - 8, thresholdMarkerY + 4);
|
| 295 |
+
ctx.textAlign = 'left';
|
| 296 |
+
}
|
| 297 |
+
}
|
| 298 |
+
};
|
| 299 |
+
|
| 300 |
+
// Draw time markers
|
| 301 |
+
const drawTimeMarkers = (width: number, canvasHeight: number, textColor: string, tickColor: string) => {
|
| 302 |
+
if (!ctx || !props.audioEngine) return;
|
| 303 |
+
|
| 304 |
+
const bufferDuration = props.audioEngine.getVisualizationDuration();
|
| 305 |
+
const currentTime = bufferEndTime(); // Use synchronized end time of buffer
|
| 306 |
+
const windowStart = currentTime - bufferDuration;
|
| 307 |
+
|
| 308 |
+
ctx.fillStyle = textColor;
|
| 309 |
+
ctx.font = '10px system-ui, sans-serif';
|
| 310 |
+
|
| 311 |
+
const markerInterval = 5; // Every 5 seconds
|
| 312 |
+
const firstMarkerTime = Math.ceil(windowStart / markerInterval) * markerInterval;
|
| 313 |
+
|
| 314 |
+
for (let time = firstMarkerTime; time <= currentTime; time += markerInterval) {
|
| 315 |
+
const x = ((time - windowStart) / bufferDuration) * width;
|
| 316 |
+
|
| 317 |
+
// Draw tick mark
|
| 318 |
+
ctx.beginPath();
|
| 319 |
+
ctx.strokeStyle = tickColor;
|
| 320 |
+
ctx.moveTo(x, 0);
|
| 321 |
+
ctx.lineTo(x, 15);
|
| 322 |
+
ctx.stroke();
|
| 323 |
+
|
| 324 |
+
// Draw time label
|
| 325 |
+
ctx.fillText(`${time}s`, x + 2, 12);
|
| 326 |
+
}
|
| 327 |
+
};
|
| 328 |
+
|
| 329 |
+
// Draw segment boundaries
|
| 330 |
+
const drawSegments = (width: number, canvasHeight: number, isDarkMode: boolean) => {
|
| 331 |
+
const context = ctx;
|
| 332 |
+
if (!context || !props.audioEngine) return;
|
| 333 |
+
|
| 334 |
+
const bufferDuration = props.audioEngine.getVisualizationDuration();
|
| 335 |
+
const currentTime = bufferEndTime(); // Use synchronized end time of buffer
|
| 336 |
+
const windowStart = currentTime - bufferDuration;
|
| 337 |
+
const segmentList = segments();
|
| 338 |
+
|
| 339 |
+
// Colors for segments
|
| 340 |
+
const pendingColor = isDarkMode ? 'rgba(250, 204, 21, 0.15)' : 'rgba(234, 179, 8, 0.15)';
|
| 341 |
+
const processedColor = isDarkMode ? 'rgba(34, 197, 94, 0.15)' : 'rgba(22, 163, 74, 0.15)';
|
| 342 |
+
const pendingBorderColor = isDarkMode ? 'rgba(250, 204, 21, 0.5)' : 'rgba(234, 179, 8, 0.5)';
|
| 343 |
+
const processedBorderColor = isDarkMode ? 'rgba(34, 197, 94, 0.5)' : 'rgba(22, 163, 74, 0.5)';
|
| 344 |
+
|
| 345 |
+
// Log segment count for debugging
|
| 346 |
+
// console.log('Drawing segments:', segmentList.length);
|
| 347 |
+
|
| 348 |
+
segmentList.forEach(segment => {
|
| 349 |
+
// Calculate relative position in visualization window
|
| 350 |
+
const relativeStart = segment.startTime - windowStart;
|
| 351 |
+
const relativeEnd = segment.endTime - windowStart;
|
| 352 |
+
|
| 353 |
+
// Only draw if segment is within visible window
|
| 354 |
+
if (relativeEnd > 0 && relativeStart < bufferDuration) {
|
| 355 |
+
// Pixel-snap boundaries to prevent anti-aliasing jitter/widening
|
| 356 |
+
const startX = Math.floor(Math.max(0, (relativeStart / bufferDuration)) * width);
|
| 357 |
+
const endX = Math.ceil(Math.min(1, (relativeEnd / bufferDuration)) * width);
|
| 358 |
+
|
| 359 |
+
// Fill segment area - increased opacity for visibility
|
| 360 |
+
context.fillStyle = segment.isProcessed ?
|
| 361 |
+
(isDarkMode ? 'rgba(34, 197, 94, 0.3)' : 'rgba(22, 163, 74, 0.3)') :
|
| 362 |
+
(isDarkMode ? 'rgba(250, 204, 21, 0.3)' : 'rgba(234, 179, 8, 0.3)');
|
| 363 |
+
|
| 364 |
+
context.fillRect(startX, 0, endX - startX, canvasHeight);
|
| 365 |
+
|
| 366 |
+
// Draw segment boundaries (snap to pixel + 0.5 for sharp 1px lines)
|
| 367 |
+
context.strokeStyle = segment.isProcessed ? processedBorderColor : pendingBorderColor;
|
| 368 |
+
context.lineWidth = 1;
|
| 369 |
+
context.beginPath();
|
| 370 |
+
context.moveTo(startX + 0.5, 0);
|
| 371 |
+
context.lineTo(startX + 0.5, canvasHeight);
|
| 372 |
+
context.moveTo(endX - 0.5, 0);
|
| 373 |
+
context.lineTo(endX - 0.5, canvasHeight);
|
| 374 |
+
context.stroke();
|
| 375 |
+
}
|
| 376 |
+
});
|
| 377 |
+
};
|
| 378 |
+
|
| 379 |
+
// Animation loop
|
| 380 |
+
const drawLoop = () => {
|
| 381 |
+
if (!ctx || !canvasRef || canvasRef.width === 0) {
|
| 382 |
+
if (visible()) {
|
| 383 |
+
animationFrameId = requestAnimationFrame(drawLoop);
|
| 384 |
+
} else {
|
| 385 |
+
animationFrameId = window.setTimeout(drawLoop, 100) as unknown as number;
|
| 386 |
+
}
|
| 387 |
+
return;
|
| 388 |
+
}
|
| 389 |
+
|
| 390 |
+
if (visible()) {
|
| 391 |
+
const now = performance.now();
|
| 392 |
+
if (needsRedraw && now - lastDrawTime >= DRAW_INTERVAL_MS) {
|
| 393 |
+
lastDrawTime = now;
|
| 394 |
+
needsRedraw = false;
|
| 395 |
+
draw();
|
| 396 |
+
}
|
| 397 |
+
animationFrameId = requestAnimationFrame(drawLoop);
|
| 398 |
+
} else {
|
| 399 |
+
// When not visible, check less frequently to save CPU
|
| 400 |
+
animationFrameId = window.setTimeout(drawLoop, 100) as unknown as number;
|
| 401 |
+
}
|
| 402 |
+
};
|
| 403 |
+
|
| 404 |
+
// Resize handler
|
| 405 |
+
const handleResize = () => {
|
| 406 |
+
if (canvasRef && parentRef) {
|
| 407 |
+
const newWidth = parentRef.clientWidth;
|
| 408 |
+
if (newWidth > 0 && newWidth !== canvasWidth()) {
|
| 409 |
+
canvasRef.width = newWidth;
|
| 410 |
+
canvasRef.height = height();
|
| 411 |
+
setCanvasWidth(newWidth);
|
| 412 |
+
|
| 413 |
+
// Refetch visualization data for new width
|
| 414 |
+
if (props.audioEngine && visible()) {
|
| 415 |
+
setWaveformData(props.audioEngine.getVisualizationData(newWidth));
|
| 416 |
+
needsRedraw = true;
|
| 417 |
+
// Note: can't update bufferEndTime here easily without calling another method on engine,
|
| 418 |
+
// but next update loop will catch it.
|
| 419 |
+
}
|
| 420 |
+
}
|
| 421 |
+
}
|
| 422 |
+
};
|
| 423 |
+
|
| 424 |
+
// Subscribe to audio engine updates
|
| 425 |
+
createEffect(() => {
|
| 426 |
+
const engine = props.audioEngine;
|
| 427 |
+
if (engine && visible()) {
|
| 428 |
+
// Initial data fetch
|
| 429 |
+
if (canvasWidth() > 0) {
|
| 430 |
+
setWaveformData(engine.getVisualizationData(canvasWidth()));
|
| 431 |
+
setBufferEndTime(engine.getCurrentTime());
|
| 432 |
+
}
|
| 433 |
+
|
| 434 |
+
// Subscribe to updates
|
| 435 |
+
const sub = engine.onVisualizationUpdate((data, newMetrics, endTime) => {
|
| 436 |
+
if (visible()) {
|
| 437 |
+
setWaveformData(data);
|
| 438 |
+
setMetrics(newMetrics);
|
| 439 |
+
setBufferEndTime(endTime);
|
| 440 |
+
|
| 441 |
+
// Fetch segments for visualization
|
| 442 |
+
setSegments(engine.getSegmentsForVisualization());
|
| 443 |
+
needsRedraw = true;
|
| 444 |
+
} else {
|
| 445 |
+
// Still update metrics even when not visible
|
| 446 |
+
setMetrics(newMetrics);
|
| 447 |
+
}
|
| 448 |
+
});
|
| 449 |
+
|
| 450 |
+
onCleanup(() => sub());
|
| 451 |
+
}
|
| 452 |
+
});
|
| 453 |
+
|
| 454 |
+
// Mark for redraw when visibility toggles
|
| 455 |
+
createEffect(() => {
|
| 456 |
+
if (visible()) {
|
| 457 |
+
needsRedraw = true;
|
| 458 |
+
}
|
| 459 |
+
});
|
| 460 |
+
|
| 461 |
+
onMount(() => {
|
| 462 |
+
if (canvasRef) {
|
| 463 |
+
ctx = canvasRef.getContext('2d');
|
| 464 |
+
}
|
| 465 |
+
|
| 466 |
+
// Setup dark mode observer
|
| 467 |
+
setIsDarkSignal(document.documentElement.classList.contains('dark'));
|
| 468 |
+
const themeObserver = new MutationObserver(() => {
|
| 469 |
+
setIsDarkSignal(document.documentElement.classList.contains('dark'));
|
| 470 |
+
});
|
| 471 |
+
themeObserver.observe(document.documentElement, {
|
| 472 |
+
attributes: true,
|
| 473 |
+
attributeFilter: ['class'],
|
| 474 |
+
});
|
| 475 |
+
|
| 476 |
+
onCleanup(() => themeObserver.disconnect());
|
| 477 |
+
|
| 478 |
+
// Setup resize observer
|
| 479 |
+
handleResize();
|
| 480 |
+
resizeObserver = new ResizeObserver(handleResize);
|
| 481 |
+
if (parentRef) {
|
| 482 |
+
resizeObserver.observe(parentRef);
|
| 483 |
+
}
|
| 484 |
+
|
| 485 |
+
// Start animation loop
|
| 486 |
+
animationFrameId = requestAnimationFrame(drawLoop);
|
| 487 |
+
});
|
| 488 |
+
|
| 489 |
+
onCleanup(() => {
|
| 490 |
+
if (animationFrameId) {
|
| 491 |
+
cancelAnimationFrame(animationFrameId);
|
| 492 |
+
clearTimeout(animationFrameId);
|
| 493 |
+
}
|
| 494 |
+
if (resizeObserver) {
|
| 495 |
+
resizeObserver.disconnect();
|
| 496 |
+
}
|
| 497 |
+
});
|
| 498 |
+
|
| 499 |
+
return (
|
| 500 |
+
<div ref={parentRef} class="w-full relative" style={{ height: `${height()}px` }}>
|
| 501 |
+
<canvas
|
| 502 |
+
ref={canvasRef}
|
| 503 |
+
class="w-full h-full block"
|
| 504 |
+
style={{ 'image-rendering': 'auto' }}
|
| 505 |
+
aria-label="Audio waveform visualization"
|
| 506 |
+
/>
|
| 507 |
+
</div>
|
| 508 |
+
);
|
| 509 |
+
};
|
| 510 |
+
|
| 511 |
+
export default BufferVisualizer;
|
src/components/ContextPanel.tsx
ADDED
|
@@ -0,0 +1,125 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import { Component, For, Show, createEffect, onCleanup } from 'solid-js';
|
| 2 |
+
import { appStore } from '../stores/appStore';
|
| 3 |
+
import { getModelDisplayName, MODELS } from './ModelLoadingOverlay';
|
| 4 |
+
|
| 5 |
+
interface ContextPanelProps {
|
| 6 |
+
isOpen: boolean;
|
| 7 |
+
onClose: () => void;
|
| 8 |
+
onLoadModel: () => void;
|
| 9 |
+
onOpenDebug: () => void;
|
| 10 |
+
onDeviceSelect?: (id: string) => void;
|
| 11 |
+
}
|
| 12 |
+
|
| 13 |
+
export const ContextPanel: Component<ContextPanelProps> = (props) => {
|
| 14 |
+
createEffect(() => {
|
| 15 |
+
if (!props.isOpen) return;
|
| 16 |
+
const handler = (e: KeyboardEvent) => {
|
| 17 |
+
if (e.key === 'Escape') {
|
| 18 |
+
e.preventDefault();
|
| 19 |
+
props.onClose();
|
| 20 |
+
}
|
| 21 |
+
};
|
| 22 |
+
document.addEventListener('keydown', handler);
|
| 23 |
+
onCleanup(() => document.removeEventListener('keydown', handler));
|
| 24 |
+
});
|
| 25 |
+
|
| 26 |
+
return (
|
| 27 |
+
<Show when={props.isOpen}>
|
| 28 |
+
<div
|
| 29 |
+
class="fixed inset-0 z-40 flex items-center justify-center bg-[var(--color-earthy-dark-brown)]/30 backdrop-blur-sm"
|
| 30 |
+
role="dialog"
|
| 31 |
+
aria-modal="true"
|
| 32 |
+
aria-label="Context and settings"
|
| 33 |
+
onClick={(e) => e.target === e.currentTarget && props.onClose()}
|
| 34 |
+
>
|
| 35 |
+
<div
|
| 36 |
+
class="w-full max-w-md mx-4 bg-[var(--color-earthy-bg)] rounded-2xl border border-[var(--color-earthy-sage)] shadow-xl overflow-hidden"
|
| 37 |
+
onClick={(e) => e.stopPropagation()}
|
| 38 |
+
>
|
| 39 |
+
<div class="px-6 py-4 border-b border-[var(--color-earthy-sage)]/30 flex items-center justify-between">
|
| 40 |
+
<h2 class="text-lg font-semibold tracking-tight text-[var(--color-earthy-dark-brown)]">Context</h2>
|
| 41 |
+
<button
|
| 42 |
+
type="button"
|
| 43 |
+
onClick={props.onClose}
|
| 44 |
+
class="p-2 rounded-full text-[var(--color-earthy-muted-green)] hover:bg-[var(--color-earthy-sage)]/30 transition-colors"
|
| 45 |
+
aria-label="Close"
|
| 46 |
+
>
|
| 47 |
+
<span class="material-symbols-outlined text-xl">close</span>
|
| 48 |
+
</button>
|
| 49 |
+
</div>
|
| 50 |
+
|
| 51 |
+
<div class="p-6 space-y-6">
|
| 52 |
+
<section>
|
| 53 |
+
<h3 class="text-[10px] font-bold uppercase tracking-widest text-[var(--color-earthy-soft-brown)] mb-3">Model</h3>
|
| 54 |
+
<div class="flex flex-col gap-2">
|
| 55 |
+
<select
|
| 56 |
+
class="w-full text-sm bg-white border border-[var(--color-earthy-sage)] rounded-xl px-3 py-2 text-[var(--color-earthy-dark-brown)] focus:outline-none focus:ring-2 focus:ring-[var(--color-earthy-coral)]/30"
|
| 57 |
+
value={appStore.selectedModelId()}
|
| 58 |
+
onInput={(e) => appStore.setSelectedModelId((e.target as HTMLSelectElement).value)}
|
| 59 |
+
disabled={appStore.modelState() === 'loading'}
|
| 60 |
+
>
|
| 61 |
+
<For each={MODELS}>
|
| 62 |
+
{(m) => <option value={m.id}>{m.name}</option>}
|
| 63 |
+
</For>
|
| 64 |
+
</select>
|
| 65 |
+
<p class="text-xs text-[var(--color-earthy-soft-brown)]">
|
| 66 |
+
{appStore.modelState() === 'ready' ? getModelDisplayName(appStore.selectedModelId()) : appStore.modelState()}
|
| 67 |
+
</p>
|
| 68 |
+
<button
|
| 69 |
+
type="button"
|
| 70 |
+
onClick={props.onLoadModel}
|
| 71 |
+
disabled={appStore.modelState() === 'ready' || appStore.modelState() === 'loading'}
|
| 72 |
+
class="flex items-center gap-2 px-4 py-2 rounded-full border border-[var(--color-earthy-sage)] text-[var(--color-earthy-muted-green)] hover:bg-[var(--color-earthy-muted-green)] hover:text-white transition-all text-sm font-medium disabled:opacity-50 disabled:cursor-not-allowed"
|
| 73 |
+
>
|
| 74 |
+
<span class="material-symbols-outlined text-lg">power_settings_new</span>
|
| 75 |
+
{appStore.modelState() === 'ready' ? 'Model loaded' : appStore.modelState() === 'loading' ? 'Loading...' : 'Load model'}
|
| 76 |
+
</button>
|
| 77 |
+
</div>
|
| 78 |
+
</section>
|
| 79 |
+
|
| 80 |
+
<section>
|
| 81 |
+
<h3 class="text-[10px] font-bold uppercase tracking-widest text-[var(--color-earthy-soft-brown)] mb-3">Audio input</h3>
|
| 82 |
+
<select
|
| 83 |
+
class="w-full text-sm bg-white border border-[var(--color-earthy-sage)] rounded-xl px-3 py-2 text-[var(--color-earthy-dark-brown)] focus:outline-none focus:ring-2 focus:ring-[var(--color-earthy-coral)]/30"
|
| 84 |
+
value={appStore.selectedDeviceId()}
|
| 85 |
+
onInput={(e) => {
|
| 86 |
+
const id = (e.target as HTMLSelectElement).value;
|
| 87 |
+
appStore.setSelectedDeviceId(id);
|
| 88 |
+
props.onDeviceSelect?.(id);
|
| 89 |
+
}}
|
| 90 |
+
>
|
| 91 |
+
<For each={appStore.availableDevices()}>
|
| 92 |
+
{(device) => (
|
| 93 |
+
<option value={device.deviceId}>
|
| 94 |
+
{device.label || `Device ${device.deviceId.slice(0, 8)}`}
|
| 95 |
+
</option>
|
| 96 |
+
)}
|
| 97 |
+
</For>
|
| 98 |
+
</select>
|
| 99 |
+
</section>
|
| 100 |
+
|
| 101 |
+
<section>
|
| 102 |
+
<h3 class="text-[10px] font-bold uppercase tracking-widest text-[var(--color-earthy-soft-brown)] mb-2">Backend</h3>
|
| 103 |
+
<p class="text-sm text-[var(--color-earthy-dark-brown)] font-medium">{appStore.backend().toUpperCase()}</p>
|
| 104 |
+
</section>
|
| 105 |
+
|
| 106 |
+
<div class="pt-2 border-t border-[var(--color-earthy-sage)]/30 flex items-center justify-between">
|
| 107 |
+
<span class="text-xs text-[var(--color-earthy-soft-brown)]">Developer</span>
|
| 108 |
+
<button
|
| 109 |
+
type="button"
|
| 110 |
+
onClick={() => {
|
| 111 |
+
props.onOpenDebug();
|
| 112 |
+
props.onClose();
|
| 113 |
+
}}
|
| 114 |
+
class="flex items-center gap-1.5 px-3 py-1.5 rounded-lg text-xs font-medium text-[var(--color-earthy-muted-green)] hover:bg-[var(--color-earthy-sage)]/30 transition-colors"
|
| 115 |
+
>
|
| 116 |
+
<span class="material-symbols-outlined text-base">bug_report</span>
|
| 117 |
+
Open Debug panel
|
| 118 |
+
</button>
|
| 119 |
+
</div>
|
| 120 |
+
</div>
|
| 121 |
+
</div>
|
| 122 |
+
</div>
|
| 123 |
+
</Show>
|
| 124 |
+
);
|
| 125 |
+
};
|
src/components/DebugPanel.tsx
ADDED
|
@@ -0,0 +1,397 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import { Component, createMemo, For, Show, createSignal, onCleanup, createEffect } from 'solid-js';
|
| 2 |
+
import { appStore, type TranscriptionMode } from '../stores/appStore';
|
| 3 |
+
import type { AudioEngine } from '../lib/audio/types';
|
| 4 |
+
import type { MelWorkerClient } from '../lib/audio/MelWorkerClient';
|
| 5 |
+
import { LayeredBufferVisualizer } from './LayeredBufferVisualizer';
|
| 6 |
+
|
| 7 |
+
interface DebugPanelProps {
|
| 8 |
+
audioEngine?: AudioEngine;
|
| 9 |
+
melClient?: MelWorkerClient;
|
| 10 |
+
}
|
| 11 |
+
|
| 12 |
+
const MODES: { id: TranscriptionMode; label: string; short: string }[] = [
|
| 13 |
+
{ id: 'v4-utterance', label: 'Utterance (v4)', short: 'v4' },
|
| 14 |
+
{ id: 'v3-streaming', label: 'Streaming (v3)', short: 'v3' },
|
| 15 |
+
{ id: 'v2-utterance', label: 'Legacy (v2)', short: 'v2' },
|
| 16 |
+
];
|
| 17 |
+
|
| 18 |
+
export const DebugPanel: Component<DebugPanelProps> = (props) => {
|
| 19 |
+
const isRecording = () => appStore.recordingState() === 'recording';
|
| 20 |
+
const isV4 = () => appStore.transcriptionMode() === 'v4-utterance';
|
| 21 |
+
const isV3 = () => appStore.transcriptionMode() === 'v3-streaming';
|
| 22 |
+
|
| 23 |
+
const [height, setHeight] = createSignal(260);
|
| 24 |
+
const [isResizing, setIsResizing] = createSignal(false);
|
| 25 |
+
|
| 26 |
+
let startY = 0;
|
| 27 |
+
let startHeight = 0;
|
| 28 |
+
let scrollContainer: HTMLDivElement | undefined;
|
| 29 |
+
|
| 30 |
+
// Auto-scroll to bottom of finalized sentences
|
| 31 |
+
createEffect(() => {
|
| 32 |
+
appStore.matureText(); // Track dependency
|
| 33 |
+
if (scrollContainer) {
|
| 34 |
+
scrollContainer.scrollTop = scrollContainer.scrollHeight;
|
| 35 |
+
}
|
| 36 |
+
});
|
| 37 |
+
|
| 38 |
+
const handleMouseDown = (e: MouseEvent) => {
|
| 39 |
+
setIsResizing(true);
|
| 40 |
+
startY = e.clientY;
|
| 41 |
+
startHeight = height();
|
| 42 |
+
window.addEventListener('mousemove', handleMouseMove);
|
| 43 |
+
window.addEventListener('mouseup', handleMouseUp);
|
| 44 |
+
};
|
| 45 |
+
|
| 46 |
+
const handleMouseMove = (e: MouseEvent) => {
|
| 47 |
+
if (!isResizing()) return;
|
| 48 |
+
const delta = startY - e.clientY;
|
| 49 |
+
const newHeight = Math.min(Math.max(startHeight + delta, 150), 600);
|
| 50 |
+
setHeight(newHeight);
|
| 51 |
+
};
|
| 52 |
+
|
| 53 |
+
const handleMouseUp = () => {
|
| 54 |
+
setIsResizing(false);
|
| 55 |
+
window.removeEventListener('mousemove', handleMouseMove);
|
| 56 |
+
window.removeEventListener('mouseup', handleMouseUp);
|
| 57 |
+
};
|
| 58 |
+
|
| 59 |
+
onCleanup(() => {
|
| 60 |
+
window.removeEventListener('mousemove', handleMouseMove);
|
| 61 |
+
window.removeEventListener('mouseup', handleMouseUp);
|
| 62 |
+
});
|
| 63 |
+
|
| 64 |
+
const rtfColor = createMemo(() => {
|
| 65 |
+
const rtfx = appStore.rtfxAverage();
|
| 66 |
+
if (rtfx === 0) return 'text-[var(--color-earthy-soft-brown)]';
|
| 67 |
+
if (rtfx >= 2) return 'text-[var(--color-earthy-muted-green)] font-bold';
|
| 68 |
+
if (rtfx >= 1) return 'text-[var(--color-earthy-coral)] font-bold';
|
| 69 |
+
return 'text-[var(--color-earthy-coral)] font-bold';
|
| 70 |
+
});
|
| 71 |
+
|
| 72 |
+
return (
|
| 73 |
+
<div
|
| 74 |
+
class="bg-[var(--color-earthy-bg)] border-t border-[var(--color-earthy-sage)] text-[10px] font-mono text-[var(--color-earthy-dark-brown)] flex overflow-hidden shrink-0 transition-colors duration-300 selection:bg-[var(--color-earthy-coral)]/20 selection:text-[var(--color-earthy-coral)] z-20 relative"
|
| 75 |
+
style={{ height: `${height()}px` }}
|
| 76 |
+
>
|
| 77 |
+
{/* Resize Handle */}
|
| 78 |
+
<div
|
| 79 |
+
class="absolute top-0 left-0 w-full h-1 cursor-ns-resize z-50 hover:bg-[var(--color-earthy-muted-green)]/50 transition-colors bg-transparent"
|
| 80 |
+
onMouseDown={handleMouseDown}
|
| 81 |
+
/>
|
| 82 |
+
|
| 83 |
+
{/* ---- Column 1: System & Signal (merged indicators) ---- */}
|
| 84 |
+
<div class="w-60 flex flex-col p-3 gap-2.5 border-r border-[var(--color-earthy-sage)] bg-[var(--color-earthy-sage)]/10 overflow-y-auto">
|
| 85 |
+
<div class="flex items-center justify-between pb-2 border-b border-[var(--color-earthy-sage)]">
|
| 86 |
+
<span class="font-bold tracking-wider uppercase text-[var(--color-earthy-soft-brown)]">System & Signal</span>
|
| 87 |
+
<div class="flex items-center gap-2">
|
| 88 |
+
<span class="font-bold text-[var(--color-earthy-soft-brown)] uppercase text-[9px]">{appStore.backend()}</span>
|
| 89 |
+
<div class={`w-2 h-2 rounded-full border border-[var(--color-earthy-bg)] shadow-sm transition-all duration-300 ${isRecording() ? 'bg-[var(--color-earthy-coral)] animate-pulse' : 'bg-[var(--color-earthy-sage)]'}`} />
|
| 90 |
+
<span class={`font-bold text-white bg-[var(--color-earthy-coral)] px-1.5 py-px rounded text-[9px] transition-opacity duration-100 ${appStore.isSpeechDetected() ? 'opacity-100 animate-pulse' : 'opacity-0'}`}>VAD</span>
|
| 91 |
+
</div>
|
| 92 |
+
</div>
|
| 93 |
+
|
| 94 |
+
<div class="space-y-1.5">
|
| 95 |
+
<span class="font-bold text-[9px] text-[var(--color-earthy-soft-brown)] uppercase tracking-wider">Mode</span>
|
| 96 |
+
<div class="flex gap-1">
|
| 97 |
+
<For each={MODES}>
|
| 98 |
+
{(mode) => (
|
| 99 |
+
<button
|
| 100 |
+
class={`flex-1 px-1 py-1 rounded text-[9px] font-bold uppercase tracking-wide border transition-all ${appStore.transcriptionMode() === mode.id
|
| 101 |
+
? 'bg-[var(--color-earthy-muted-green)] text-white border-[var(--color-earthy-muted-green)] shadow-sm'
|
| 102 |
+
: 'bg-[var(--color-earthy-bg)] text-[var(--color-earthy-soft-brown)] border-[var(--color-earthy-sage)] hover:border-[var(--color-earthy-soft-brown)] hover:bg-[var(--color-earthy-sage)]/20'
|
| 103 |
+
} ${isRecording() ? 'opacity-50 cursor-not-allowed' : 'cursor-pointer'}`}
|
| 104 |
+
onClick={() => {
|
| 105 |
+
if (!isRecording()) {
|
| 106 |
+
appStore.setTranscriptionMode(mode.id);
|
| 107 |
+
}
|
| 108 |
+
}}
|
| 109 |
+
disabled={isRecording()}
|
| 110 |
+
title={isRecording() ? 'Stop recording to change mode' : mode.label}
|
| 111 |
+
>
|
| 112 |
+
{mode.short}
|
| 113 |
+
</button>
|
| 114 |
+
)}
|
| 115 |
+
</For>
|
| 116 |
+
</div>
|
| 117 |
+
</div>
|
| 118 |
+
|
| 119 |
+
<div class="grid grid-cols-2 gap-1.5">
|
| 120 |
+
<div class="bg-[var(--color-earthy-bg)] border border-[var(--color-earthy-sage)] rounded p-1.5 flex flex-col items-center justify-center">
|
| 121 |
+
<span class="font-bold text-[var(--color-earthy-soft-brown)] uppercase tracking-tight text-[8px] mb-0.5">RTFx</span>
|
| 122 |
+
<span class={`text-xs ${rtfColor()}`}>
|
| 123 |
+
{appStore.rtfxAverage() > 0 ? Math.round(appStore.rtfxAverage()) : '–'}
|
| 124 |
+
</span>
|
| 125 |
+
</div>
|
| 126 |
+
<div class="bg-[var(--color-earthy-bg)] border border-[var(--color-earthy-sage)] rounded p-1.5 flex flex-col items-center justify-center">
|
| 127 |
+
<span class="font-bold text-[var(--color-earthy-soft-brown)] uppercase tracking-tight text-[8px] mb-0.5">Latency</span>
|
| 128 |
+
<span class="text-xs font-bold text-[var(--color-earthy-dark-brown)]">{Math.round(appStore.inferenceLatencyAverage())}ms</span>
|
| 129 |
+
</div>
|
| 130 |
+
</div>
|
| 131 |
+
|
| 132 |
+
<div class="space-y-1">
|
| 133 |
+
<div class="flex justify-between font-bold text-[var(--color-earthy-soft-brown)] uppercase px-0.5 text-[9px]">
|
| 134 |
+
<span>Buffer</span>
|
| 135 |
+
<span>{(appStore.bufferMetrics().fillRatio * 100).toFixed(0)}%</span>
|
| 136 |
+
</div>
|
| 137 |
+
<div class="h-1.5 w-full bg-[var(--color-earthy-sage)] rounded-full overflow-hidden">
|
| 138 |
+
<div
|
| 139 |
+
class="h-full bg-[var(--color-earthy-muted-green)] transition-all duration-300 ease-out rounded-full"
|
| 140 |
+
style={{ width: `${(appStore.bufferMetrics().fillRatio * 100).toFixed(0)}%` }}
|
| 141 |
+
/>
|
| 142 |
+
</div>
|
| 143 |
+
</div>
|
| 144 |
+
|
| 145 |
+
<Show when={isV4()}>
|
| 146 |
+
<div class="space-y-1 pt-1 border-t border-[var(--color-earthy-sage)]">
|
| 147 |
+
<span class="font-bold text-[8px] text-[var(--color-earthy-soft-brown)] uppercase tracking-wider">Merger</span>
|
| 148 |
+
<div class="grid grid-cols-3 gap-1">
|
| 149 |
+
<div class="bg-[var(--color-earthy-bg)] border border-[var(--color-earthy-sage)] rounded px-1 py-0.5 text-center">
|
| 150 |
+
<div class="text-[7px] font-bold text-[var(--color-earthy-soft-brown)] uppercase">Sent</div>
|
| 151 |
+
<div class="text-[10px] font-bold text-[var(--color-earthy-dark-brown)]">{appStore.v4MergerStats().sentencesFinalized}</div>
|
| 152 |
+
</div>
|
| 153 |
+
<div class="bg-[var(--color-earthy-bg)] border border-[var(--color-earthy-sage)] rounded px-1 py-0.5 text-center">
|
| 154 |
+
<div class="text-[7px] font-bold text-[var(--color-earthy-soft-brown)] uppercase">Cursor</div>
|
| 155 |
+
<div class="text-[10px] font-bold text-[var(--color-earthy-dark-brown)]">{appStore.matureCursorTime().toFixed(1)}s</div>
|
| 156 |
+
</div>
|
| 157 |
+
<div class="bg-[var(--color-earthy-bg)] border border-[var(--color-earthy-sage)] rounded px-1 py-0.5 text-center">
|
| 158 |
+
<div class="text-[7px] font-bold text-[var(--color-earthy-soft-brown)] uppercase">Uttr</div>
|
| 159 |
+
<div class="text-[10px] font-bold text-[var(--color-earthy-dark-brown)]">{appStore.v4MergerStats().utterancesProcessed}</div>
|
| 160 |
+
</div>
|
| 161 |
+
</div>
|
| 162 |
+
</div>
|
| 163 |
+
</Show>
|
| 164 |
+
|
| 165 |
+
<div class="space-y-1">
|
| 166 |
+
<div class="flex justify-between font-bold text-[var(--color-earthy-soft-brown)] uppercase text-[9px]">
|
| 167 |
+
<span>RMS Energy</span>
|
| 168 |
+
<span class={appStore.audioLevel() > appStore.energyThreshold() ? 'text-[var(--color-earthy-muted-green)]' : 'text-[var(--color-earthy-soft-brown)]'}>
|
| 169 |
+
{(appStore.audioLevel() * 100).toFixed(1)}%
|
| 170 |
+
</span>
|
| 171 |
+
</div>
|
| 172 |
+
<div class="h-2 w-full bg-[var(--color-earthy-sage)] rounded overflow-hidden relative">
|
| 173 |
+
<div class="absolute top-0 bottom-0 w-px bg-[var(--color-earthy-coral)] z-10" style={{ left: `${appStore.energyThreshold() * 100}%` }} title="Energy threshold"></div>
|
| 174 |
+
<div
|
| 175 |
+
class={`h-full transition-all duration-75 ${appStore.isSpeechDetected() ? 'bg-[var(--color-earthy-coral)]' : 'bg-[var(--color-earthy-muted-green)]'}`}
|
| 176 |
+
style={{ width: `${Math.min(100, appStore.audioLevel() * 100)}%` }}
|
| 177 |
+
/>
|
| 178 |
+
</div>
|
| 179 |
+
</div>
|
| 180 |
+
|
| 181 |
+
<Show when={isV4()}>
|
| 182 |
+
<div class={`space-y-1 transition-opacity duration-300 ${appStore.vadState().sileroProbability > 0 ? 'opacity-100' : 'opacity-40'}`}>
|
| 183 |
+
<div class="flex justify-between font-bold text-[var(--color-earthy-soft-brown)] uppercase text-[9px]">
|
| 184 |
+
<span>VAD Prob</span>
|
| 185 |
+
<span class={appStore.vadState().sileroProbability > appStore.sileroThreshold() ? 'text-[var(--color-earthy-coral)] font-bold' : 'text-[var(--color-earthy-soft-brown)]'}>
|
| 186 |
+
{(appStore.vadState().sileroProbability * 100).toFixed(0)}%
|
| 187 |
+
</span>
|
| 188 |
+
</div>
|
| 189 |
+
<div class="h-2 w-full bg-[var(--color-earthy-sage)] rounded overflow-hidden relative">
|
| 190 |
+
<div class="absolute top-0 bottom-0 w-px bg-[var(--color-earthy-coral)] z-10" style={{ left: `${appStore.sileroThreshold() * 100}%` }} title="VAD threshold"></div>
|
| 191 |
+
<div
|
| 192 |
+
class={`h-full transition-all duration-75 ${appStore.vadState().sileroProbability > appStore.sileroThreshold() ? 'bg-[var(--color-earthy-coral)]' : 'bg-[var(--color-earthy-soft-brown)]'}`}
|
| 193 |
+
style={{ width: `${Math.min(100, appStore.vadState().sileroProbability * 100)}%` }}
|
| 194 |
+
/>
|
| 195 |
+
</div>
|
| 196 |
+
</div>
|
| 197 |
+
<div class={`flex justify-between items-center bg-[var(--color-earthy-bg)] p-1.5 rounded border border-[var(--color-earthy-sage)] transition-opacity duration-300 ${appStore.vadState().snr !== 0 ? 'opacity-100' : 'opacity-40'}`}>
|
| 198 |
+
<span class="font-bold text-[9px] text-[var(--color-earthy-soft-brown)] uppercase">SNR</span>
|
| 199 |
+
<span class={`font-bold text-[10px] ${appStore.vadState().snr > 3 ? 'text-[var(--color-earthy-muted-green)]' : 'text-[var(--color-earthy-soft-brown)]'}`}>
|
| 200 |
+
{appStore.vadState().snr.toFixed(1)} dB
|
| 201 |
+
</span>
|
| 202 |
+
</div>
|
| 203 |
+
</Show>
|
| 204 |
+
|
| 205 |
+
<div class="grid grid-cols-2 gap-1.5 pt-1 border-t border-[var(--color-earthy-sage)]">
|
| 206 |
+
<Show when={isV3()}>
|
| 207 |
+
<div class="bg-[var(--color-earthy-bg)] border border-[var(--color-earthy-sage)] rounded p-1.5 text-center">
|
| 208 |
+
<div class="text-[7px] font-bold text-[var(--color-earthy-soft-brown)] uppercase mb-px">Overlap</div>
|
| 209 |
+
<div class="text-[10px] font-bold text-[var(--color-earthy-dark-brown)]">{appStore.streamingOverlap().toFixed(1)}s</div>
|
| 210 |
+
</div>
|
| 211 |
+
<div class="bg-[var(--color-earthy-bg)] border border-[var(--color-earthy-sage)] rounded p-1.5 text-center">
|
| 212 |
+
<div class="text-[7px] font-bold text-[var(--color-earthy-soft-brown)] uppercase mb-px">Chunks</div>
|
| 213 |
+
<div class="text-[10px] font-bold text-[var(--color-earthy-dark-brown)]">{appStore.mergeInfo().chunkCount}</div>
|
| 214 |
+
</div>
|
| 215 |
+
</Show>
|
| 216 |
+
<Show when={isV4()}>
|
| 217 |
+
<div class="bg-[var(--color-earthy-bg)] border border-[var(--color-earthy-sage)] rounded p-1.5 text-center">
|
| 218 |
+
<div class="text-[7px] font-bold text-[var(--color-earthy-soft-brown)] uppercase mb-px">State</div>
|
| 219 |
+
<div class={`text-[10px] font-bold whitespace-nowrap w-24 overflow-hidden text-ellipsis mx-auto ${appStore.vadState().isSpeech ? 'text-[var(--color-earthy-coral)]' : 'text-[var(--color-earthy-soft-brown)]'}`}>
|
| 220 |
+
{appStore.vadState().hybridState}
|
| 221 |
+
</div>
|
| 222 |
+
</div>
|
| 223 |
+
<div class="bg-[var(--color-earthy-bg)] border border-[var(--color-earthy-sage)] rounded p-1.5 text-center">
|
| 224 |
+
<div class="text-[7px] font-bold text-[var(--color-earthy-soft-brown)] uppercase mb-px">Windows</div>
|
| 225 |
+
<div class="text-[10px] font-bold text-[var(--color-earthy-dark-brown)]">{appStore.v4MergerStats().utterancesProcessed}</div>
|
| 226 |
+
</div>
|
| 227 |
+
</Show>
|
| 228 |
+
</div>
|
| 229 |
+
</div>
|
| 230 |
+
|
| 231 |
+
{/* ---- Column 2: Live Context (mode-dependent) ---- */}
|
| 232 |
+
<div class="flex-1 flex flex-col min-w-0 bg-[var(--color-earthy-bg)]">
|
| 233 |
+
<div class="px-3 py-2 border-b border-[var(--color-earthy-sage)] bg-[var(--color-earthy-sage)]/10 flex items-center justify-between">
|
| 234 |
+
<span class="font-bold tracking-wider uppercase text-[var(--color-earthy-soft-brown)]">
|
| 235 |
+
{isV4() ? 'Transcript State' : isV3() ? 'Stream Sync' : 'Segments'}
|
| 236 |
+
</span>
|
| 237 |
+
|
| 238 |
+
{/* v3: LCS indicators */}
|
| 239 |
+
<Show when={isV3()}>
|
| 240 |
+
<div class="flex items-center gap-3">
|
| 241 |
+
<div class="flex items-center gap-1.5 px-2 py-0.5 bg-[var(--color-earthy-bg)] rounded border border-[var(--color-earthy-sage)]">
|
| 242 |
+
<div class={`w-1.5 h-1.5 rounded-full transition-colors duration-300 ${appStore.mergeInfo().anchorValid ? 'bg-[var(--color-earthy-muted-green)]' : 'bg-[var(--color-earthy-coral)]'}`} />
|
| 243 |
+
<span class="font-bold uppercase text-[var(--color-earthy-soft-brown)] tracking-wide">Lock</span>
|
| 244 |
+
</div>
|
| 245 |
+
<div class="flex items-center gap-1.5 px-2 py-0.5 bg-[var(--color-earthy-bg)] rounded border border-[var(--color-earthy-sage)]">
|
| 246 |
+
<span class="material-symbols-outlined text-[14px] text-[var(--color-earthy-soft-brown)]">join_inner</span>
|
| 247 |
+
<span class="font-bold uppercase text-[var(--color-earthy-dark-brown)]">Match: <span class="text-[var(--color-earthy-muted-green)]">{appStore.mergeInfo().lcsLength}</span></span>
|
| 248 |
+
</div>
|
| 249 |
+
</div>
|
| 250 |
+
</Show>
|
| 251 |
+
|
| 252 |
+
{/* v4: VAD state indicator */}
|
| 253 |
+
<Show when={isV4()}>
|
| 254 |
+
<div class="flex items-center gap-3">
|
| 255 |
+
<div class="flex items-center gap-1.5 px-2 py-0.5 bg-[var(--color-earthy-bg)] rounded border border-[var(--color-earthy-sage)]">
|
| 256 |
+
<div class={`w-1.5 h-1.5 rounded-full transition-colors duration-300 ${appStore.vadState().isSpeech ? 'bg-[var(--color-earthy-coral)] animate-pulse' : 'bg-[var(--color-earthy-sage)]'}`} />
|
| 257 |
+
<div class="w-24 overflow-hidden text-ellipsis whitespace-nowrap">
|
| 258 |
+
<span class="font-bold uppercase text-[var(--color-earthy-soft-brown)] tracking-wide">{appStore.vadState().hybridState}</span>
|
| 259 |
+
</div>
|
| 260 |
+
</div>
|
| 261 |
+
<div class={`flex items-center gap-1.5 px-2 py-0.5 bg-[var(--color-earthy-bg)] rounded border border-[var(--color-earthy-sage)] transition-opacity duration-300 ${appStore.vadState().sileroProbability > 0 ? 'opacity-100' : 'opacity-0'}`}>
|
| 262 |
+
<span class="font-bold uppercase text-[var(--color-earthy-soft-brown)] text-[9px]">VAD</span>
|
| 263 |
+
<span class={`font-bold ${appStore.vadState().sileroProbability > 0.5 ? 'text-[var(--color-earthy-coral)]' : 'text-[var(--color-earthy-soft-brown)]'}`}>
|
| 264 |
+
{(appStore.vadState().sileroProbability * 100).toFixed(0)}%
|
| 265 |
+
</span>
|
| 266 |
+
</div>
|
| 267 |
+
</div>
|
| 268 |
+
</Show>
|
| 269 |
+
</div>
|
| 270 |
+
|
| 271 |
+
<div class="flex-1 overflow-y-auto p-3 space-y-4 custom-scrollbar">
|
| 272 |
+
{/* v4: Mature + Immature text display */}
|
| 273 |
+
<Show when={isV4()}>
|
| 274 |
+
<div class="space-y-3">
|
| 275 |
+
{/* Mature (finalized) sentences */}
|
| 276 |
+
<div class="space-y-1.5">
|
| 277 |
+
<h4 class="font-bold text-[var(--color-earthy-soft-brown)] uppercase tracking-widest flex items-center gap-2 text-[9px]">
|
| 278 |
+
<span class="w-1.5 h-1.5 bg-[var(--color-earthy-muted-green)] rounded-full"></span>
|
| 279 |
+
Finalized Sentences
|
| 280 |
+
</h4>
|
| 281 |
+
<div
|
| 282 |
+
ref={scrollContainer}
|
| 283 |
+
class="p-2 border border-[var(--color-earthy-sage)] bg-[var(--color-earthy-muted-green)]/10 rounded h-32 overflow-y-auto resize-y"
|
| 284 |
+
>
|
| 285 |
+
<Show when={appStore.matureText()} fallback={
|
| 286 |
+
<span class="text-[var(--color-earthy-soft-brown)] italic text-[10px] opacity-50">No finalized sentences yet...</span>
|
| 287 |
+
}>
|
| 288 |
+
<span class="text-[11px] text-[var(--color-earthy-dark-brown)] leading-relaxed">{appStore.matureText()}</span>
|
| 289 |
+
</Show>
|
| 290 |
+
</div>
|
| 291 |
+
</div>
|
| 292 |
+
|
| 293 |
+
{/* Immature (active) sentence */}
|
| 294 |
+
<div class="space-y-1.5">
|
| 295 |
+
<h4 class="font-bold text-[var(--color-earthy-soft-brown)] uppercase tracking-widest flex items-center gap-2 text-[9px]">
|
| 296 |
+
<span class="w-1.5 h-1.5 bg-[var(--color-earthy-coral)] rounded-full animate-pulse"></span>
|
| 297 |
+
Active Sentence
|
| 298 |
+
</h4>
|
| 299 |
+
<div class="p-2 border border-[var(--color-earthy-coral)]/30 bg-[var(--color-earthy-coral)]/10 rounded min-h-[36px]">
|
| 300 |
+
<Show when={appStore.immatureText()} fallback={
|
| 301 |
+
<span class="text-[var(--color-earthy-soft-brown)] italic text-[10px] opacity-50">Waiting for speech...</span>
|
| 302 |
+
}>
|
| 303 |
+
<span class="text-[11px] text-[var(--color-earthy-coral)] italic leading-relaxed">{appStore.immatureText()}</span>
|
| 304 |
+
<span class="inline-block w-0.5 h-3 bg-[var(--color-earthy-coral)] animate-pulse ml-0.5 align-middle"></span>
|
| 305 |
+
</Show>
|
| 306 |
+
</div>
|
| 307 |
+
</div>
|
| 308 |
+
|
| 309 |
+
{/* Pending sentence info */}
|
| 310 |
+
<Show when={appStore.v4MergerStats().sentencesFinalized > 0}>
|
| 311 |
+
<div class="text-[9px] text-[var(--color-earthy-soft-brown)] flex items-center gap-3 pt-1">
|
| 312 |
+
<span>{appStore.v4MergerStats().sentencesFinalized} sentences finalized</span>
|
| 313 |
+
<span class="text-[var(--color-earthy-sage)]">|</span>
|
| 314 |
+
<span>Cursor at {appStore.matureCursorTime().toFixed(2)}s</span>
|
| 315 |
+
<span class="text-[var(--color-earthy-sage)]">|</span>
|
| 316 |
+
<span>{appStore.v4MergerStats().utterancesProcessed} windows processed</span>
|
| 317 |
+
</div>
|
| 318 |
+
</Show>
|
| 319 |
+
</div>
|
| 320 |
+
</Show>
|
| 321 |
+
|
| 322 |
+
{/* v3: Transition cache + anchors */}
|
| 323 |
+
<Show when={isV3()}>
|
| 324 |
+
<div class="space-y-2">
|
| 325 |
+
<h4 class="font-bold text-[var(--color-earthy-soft-brown)] uppercase tracking-widest flex items-center gap-2 text-[9px]">
|
| 326 |
+
<span class="w-1 h-1 bg-[var(--color-earthy-sage)] rounded-full"></span>
|
| 327 |
+
Transition Cache
|
| 328 |
+
</h4>
|
| 329 |
+
<div class="p-2 border border-[var(--color-earthy-sage)] bg-[var(--color-earthy-sage)]/10 rounded min-h-[48px] flex flex-wrap gap-1.5 content-start">
|
| 330 |
+
<For each={appStore.debugTokens().slice(-24)}>
|
| 331 |
+
{(token) => (
|
| 332 |
+
<div
|
| 333 |
+
class="px-1.5 py-0.5 rounded text-[10px] font-medium border transition-colors"
|
| 334 |
+
style={{
|
| 335 |
+
"background-color": token.confidence > 0.8 ? '#F9F7F2' : 'rgba(249,247,242,0.6)',
|
| 336 |
+
"border-color": `rgba(107, 112, 92, ${Math.max(0.2, token.confidence * 0.4)})`,
|
| 337 |
+
"color": token.confidence > 0.8 ? '#3D405B' : '#A5A58D',
|
| 338 |
+
"opacity": Math.max(0.5, token.confidence)
|
| 339 |
+
}}
|
| 340 |
+
title={`Confidence: ${(token.confidence * 100).toFixed(0)}%`}
|
| 341 |
+
>
|
| 342 |
+
{token.text}
|
| 343 |
+
</div>
|
| 344 |
+
)}
|
| 345 |
+
</For>
|
| 346 |
+
<Show when={appStore.pendingText()}>
|
| 347 |
+
<span class="px-1.5 py-0.5 text-[var(--color-earthy-coral)] font-medium italic border border-dashed border-[var(--color-earthy-coral)]/30 rounded bg-[var(--color-earthy-coral)]/10">
|
| 348 |
+
{appStore.pendingText()}...
|
| 349 |
+
</span>
|
| 350 |
+
</Show>
|
| 351 |
+
<Show when={!appStore.debugTokens().length && !appStore.pendingText()}>
|
| 352 |
+
<span class="text-[var(--color-earthy-soft-brown)] italic text-[10px] w-full text-center py-2 op-50">Waiting for speech input...</span>
|
| 353 |
+
</Show>
|
| 354 |
+
</div>
|
| 355 |
+
</div>
|
| 356 |
+
|
| 357 |
+
<div class="space-y-2">
|
| 358 |
+
<h4 class="font-bold text-[var(--color-earthy-soft-brown)] uppercase tracking-widest flex items-center gap-2 text-[9px]">
|
| 359 |
+
<span class="w-1 h-1 bg-[var(--color-earthy-muted-green)] rounded-full"></span>
|
| 360 |
+
Stable Anchors
|
| 361 |
+
</h4>
|
| 362 |
+
<div class="flex flex-wrap gap-1">
|
| 363 |
+
<For each={appStore.mergeInfo().anchorTokens || []}>
|
| 364 |
+
{(token) => (
|
| 365 |
+
<span class="px-1.5 py-0.5 bg-[var(--color-earthy-muted-green)]/20 text-[var(--color-earthy-muted-green)] border border-[var(--color-earthy-sage)] rounded font-medium">
|
| 366 |
+
{token}
|
| 367 |
+
</span>
|
| 368 |
+
)}
|
| 369 |
+
</For>
|
| 370 |
+
<Show when={!appStore.mergeInfo().anchorTokens?.length}>
|
| 371 |
+
<span class="text-[var(--color-earthy-soft-brown)] text-[10px] italic px-1 opacity-50">No stable anchors locked yet.</span>
|
| 372 |
+
</Show>
|
| 373 |
+
</div>
|
| 374 |
+
</div>
|
| 375 |
+
</Show>
|
| 376 |
+
|
| 377 |
+
{/* v2: basic info */}
|
| 378 |
+
<Show when={!isV3() && !isV4()}>
|
| 379 |
+
<div class="text-[var(--color-earthy-soft-brown)] italic text-center py-4">
|
| 380 |
+
Legacy per-utterance mode. Segments are transcribed individually.
|
| 381 |
+
</div>
|
| 382 |
+
</Show>
|
| 383 |
+
|
| 384 |
+
{/* New Layered Buffer Visualizer */}
|
| 385 |
+
<div class="pt-2 border-t border-[var(--color-earthy-sage)]">
|
| 386 |
+
<LayeredBufferVisualizer
|
| 387 |
+
audioEngine={props.audioEngine}
|
| 388 |
+
melClient={props.melClient}
|
| 389 |
+
height={120} // Compact height
|
| 390 |
+
windowDuration={8.0}
|
| 391 |
+
/>
|
| 392 |
+
</div>
|
| 393 |
+
</div>
|
| 394 |
+
</div>
|
| 395 |
+
</div>
|
| 396 |
+
);
|
| 397 |
+
};
|
src/components/EnergyMeter.tsx
ADDED
|
@@ -0,0 +1,103 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import { Component, createSignal, createEffect, onCleanup } from 'solid-js';
|
| 2 |
+
import { AudioEngine } from '../lib/audio/types';
|
| 3 |
+
import { appStore } from '../stores/appStore';
|
| 4 |
+
|
| 5 |
+
interface EnergyMeterProps {
|
| 6 |
+
audioEngine?: AudioEngine;
|
| 7 |
+
}
|
| 8 |
+
|
| 9 |
+
export const EnergyMeter: Component<EnergyMeterProps> = (props) => {
|
| 10 |
+
const [energy, setEnergy] = createSignal(0);
|
| 11 |
+
const [metrics, setMetrics] = createSignal({ noiseFloor: 0, snr: 0, threshold: 0.02, snrThreshold: 3.0 });
|
| 12 |
+
const [isSpeaking, setIsSpeaking] = createSignal(false);
|
| 13 |
+
|
| 14 |
+
const updateFromEngine = (engine: AudioEngine) => {
|
| 15 |
+
const currentE = engine.getCurrentEnergy();
|
| 16 |
+
const currentM = engine.getSignalMetrics();
|
| 17 |
+
|
| 18 |
+
setEnergy(currentE);
|
| 19 |
+
setMetrics(currentM);
|
| 20 |
+
// Check if speaking based on SNR threshold (matching VAD logic)
|
| 21 |
+
setIsSpeaking(currentM.snr > currentM.snrThreshold || currentE > currentM.threshold);
|
| 22 |
+
};
|
| 23 |
+
|
| 24 |
+
createEffect(() => {
|
| 25 |
+
const engine = props.audioEngine;
|
| 26 |
+
if (!engine) return;
|
| 27 |
+
|
| 28 |
+
updateFromEngine(engine);
|
| 29 |
+
const unsubscribe = engine.onVisualizationUpdate(() => {
|
| 30 |
+
updateFromEngine(engine);
|
| 31 |
+
});
|
| 32 |
+
|
| 33 |
+
onCleanup(() => unsubscribe());
|
| 34 |
+
});
|
| 35 |
+
|
| 36 |
+
// Logarithmic scaling for better visualization
|
| 37 |
+
const toPercent = (val: number) => {
|
| 38 |
+
// e.g. mapping 0.0001 -> 1.0 to 0% -> 100% log scale
|
| 39 |
+
// log10(0.0001) = -4, log10(1) = 0
|
| 40 |
+
const minLog = -4;
|
| 41 |
+
const maxLog = 0;
|
| 42 |
+
const v = Math.max(0.0001, val);
|
| 43 |
+
const log = Math.log10(v);
|
| 44 |
+
return Math.max(0, Math.min(100, ((log - minLog) / (maxLog - minLog)) * 100));
|
| 45 |
+
};
|
| 46 |
+
|
| 47 |
+
return (
|
| 48 |
+
<div class="flex flex-col gap-4 p-5 nm-inset rounded-3xl bg-slate-500/5 transition-all">
|
| 49 |
+
<div class="flex items-center justify-between px-1">
|
| 50 |
+
<h3 class="text-[9px] font-black text-slate-400 uppercase tracking-widest leading-none">Signal_Analysis</h3>
|
| 51 |
+
{/* Speaking indicator - Neumorphic LED style */}
|
| 52 |
+
<div class={`flex items-center gap-2 px-3 py-1 rounded-full nm-flat transition-all ${isSpeaking()
|
| 53 |
+
? 'text-emerald-500'
|
| 54 |
+
: 'text-slate-500 opacity-60'
|
| 55 |
+
}`}>
|
| 56 |
+
<div class={`w-1.5 h-1.5 rounded-full ${isSpeaking() ? 'bg-emerald-500 shadow-[0_0_8px_rgba(16,185,129,0.8)]' : 'bg-slate-400'}`} />
|
| 57 |
+
<span class="text-[9px] font-black tracking-widest">
|
| 58 |
+
{isSpeaking() ? 'SPEECH' : 'SILENCE'}
|
| 59 |
+
</span>
|
| 60 |
+
</div>
|
| 61 |
+
</div>
|
| 62 |
+
|
| 63 |
+
{/* Energy Bar */}
|
| 64 |
+
<div class="relative w-full h-3 nm-inset bg-slate-900/10 rounded-full overflow-hidden p-0.5">
|
| 65 |
+
{/* Energy Fill - color based on speech state */}
|
| 66 |
+
<div
|
| 67 |
+
class={`h-full rounded-full transition-all duration-75 ${isSpeaking() ? 'bg-emerald-500 shadow-[0_0_12px_rgba(16,185,129,0.5)]' : 'bg-blue-500 shadow-[0_0_12px_rgba(59,130,246,0.5)]'
|
| 68 |
+
}`}
|
| 69 |
+
style={{ width: `${toPercent(energy())}%` }}
|
| 70 |
+
/>
|
| 71 |
+
|
| 72 |
+
{/* Noise Floor Marker */}
|
| 73 |
+
<div
|
| 74 |
+
class="absolute top-0 bottom-0 w-0.5 bg-amber-500 opacity-50 z-20"
|
| 75 |
+
style={{ left: `${toPercent(metrics().noiseFloor)}%` }}
|
| 76 |
+
/>
|
| 77 |
+
|
| 78 |
+
{/* Energy Threshold Marker */}
|
| 79 |
+
<div
|
| 80 |
+
class="absolute top-0 bottom-0 w-px bg-red-500 z-30"
|
| 81 |
+
style={{ left: `${toPercent(metrics().threshold)}%` }}
|
| 82 |
+
/>
|
| 83 |
+
</div>
|
| 84 |
+
|
| 85 |
+
<div class="grid grid-cols-3 items-center px-1">
|
| 86 |
+
<div class="flex flex-col">
|
| 87 |
+
<span class="text-[8px] font-black text-slate-500 uppercase tracking-tighter">Noise</span>
|
| 88 |
+
<span class="text-[10px] font-bold text-slate-400">{metrics().noiseFloor.toFixed(5)}</span>
|
| 89 |
+
</div>
|
| 90 |
+
<div class="flex flex-col items-center">
|
| 91 |
+
<span class="text-[8px] font-black text-slate-500 uppercase tracking-tighter">Energy</span>
|
| 92 |
+
<span class="text-[10px] font-bold text-slate-400">{energy().toFixed(4)}</span>
|
| 93 |
+
</div>
|
| 94 |
+
<div class="flex flex-col items-end">
|
| 95 |
+
<span class="text-[8px] font-black text-slate-500 uppercase tracking-tighter">SNR_Ratio</span>
|
| 96 |
+
<span class={`text-[10px] font-black ${metrics().snr > metrics().snrThreshold ? 'text-emerald-500' : 'text-amber-500'}`}>
|
| 97 |
+
{metrics().snr.toFixed(1)} dB
|
| 98 |
+
</span>
|
| 99 |
+
</div>
|
| 100 |
+
</div>
|
| 101 |
+
</div>
|
| 102 |
+
);
|
| 103 |
+
};
|
src/components/LayeredBufferVisualizer.tsx
ADDED
|
@@ -0,0 +1,442 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import { Component, onMount, onCleanup, createSignal } from 'solid-js';
|
| 2 |
+
import type { AudioEngine } from '../lib/audio/types';
|
| 3 |
+
import type { MelWorkerClient } from '../lib/audio/MelWorkerClient';
|
| 4 |
+
import { normalizeMelForDisplay } from '../lib/audio/mel-display';
|
| 5 |
+
import { appStore } from '../stores/appStore';
|
| 6 |
+
|
| 7 |
+
interface LayeredBufferVisualizerProps {
|
| 8 |
+
audioEngine?: AudioEngine;
|
| 9 |
+
melClient?: MelWorkerClient;
|
| 10 |
+
height?: number; // Total height
|
| 11 |
+
windowDuration?: number; // default 8.0s
|
| 12 |
+
}
|
| 13 |
+
|
| 14 |
+
const MEL_BINS = 128; // Standard for this app
|
| 15 |
+
|
| 16 |
+
// dB scaling is in mel-display.ts (shared with bar visualizer)
|
| 17 |
+
|
| 18 |
+
// Pre-computed 256-entry RGB lookup table for mel heatmap (black to red).
|
| 19 |
+
// Built once at module load; indexed by Math.round(intensity * 255).
|
| 20 |
+
// Colormap: black -> blue -> purple -> green -> yellow -> orange -> red.
|
| 21 |
+
const COLORMAP_LUT = (() => {
|
| 22 |
+
const stops: [number, number, number, number][] = [
|
| 23 |
+
[0, 0, 0, 0], // black
|
| 24 |
+
[0.12, 0, 0, 180], // blue
|
| 25 |
+
[0.30, 120, 0, 160], // purple
|
| 26 |
+
[0.48, 0, 180, 80], // green
|
| 27 |
+
[0.65, 220, 220, 0], // yellow
|
| 28 |
+
[0.82, 255, 140, 0], // orange
|
| 29 |
+
[1, 255, 0, 0], // red
|
| 30 |
+
];
|
| 31 |
+
// 256 entries * 3 channels (R, G, B) packed into a Uint8Array
|
| 32 |
+
const lut = new Uint8Array(256 * 3);
|
| 33 |
+
for (let i = 0; i < 256; i++) {
|
| 34 |
+
const intensity = i / 255;
|
| 35 |
+
let r = 0, g = 0, b = 0;
|
| 36 |
+
for (let s = 0; s < stops.length - 1; s++) {
|
| 37 |
+
const [t0, r0, g0, b0] = stops[s];
|
| 38 |
+
const [t1, r1, g1, b1] = stops[s + 1];
|
| 39 |
+
if (intensity >= t0 && intensity <= t1) {
|
| 40 |
+
const t = (intensity - t0) / (t1 - t0);
|
| 41 |
+
r = Math.round(r0 + t * (r1 - r0));
|
| 42 |
+
g = Math.round(g0 + t * (g1 - g0));
|
| 43 |
+
b = Math.round(b0 + t * (b1 - b0));
|
| 44 |
+
break;
|
| 45 |
+
}
|
| 46 |
+
}
|
| 47 |
+
if (intensity >= stops[stops.length - 1][0]) {
|
| 48 |
+
const last = stops[stops.length - 1];
|
| 49 |
+
r = last[1]; g = last[2]; b = last[3];
|
| 50 |
+
}
|
| 51 |
+
const base = i * 3;
|
| 52 |
+
lut[base] = r;
|
| 53 |
+
lut[base + 1] = g;
|
| 54 |
+
lut[base + 2] = b;
|
| 55 |
+
}
|
| 56 |
+
return lut;
|
| 57 |
+
})();
|
| 58 |
+
|
| 59 |
+
export const LayeredBufferVisualizer: Component<LayeredBufferVisualizerProps> = (props) => {
|
| 60 |
+
let canvasRef: HTMLCanvasElement | undefined;
|
| 61 |
+
let ctx: CanvasRenderingContext2D | null = null;
|
| 62 |
+
let animationFrameId: number;
|
| 63 |
+
|
| 64 |
+
const getWindowDuration = () => props.windowDuration || 8.0;
|
| 65 |
+
|
| 66 |
+
// Offscreen canvas for spectrogram caching (scrolling)
|
| 67 |
+
let specCanvas: HTMLCanvasElement | undefined;
|
| 68 |
+
let specCtx: CanvasRenderingContext2D | null = null;
|
| 69 |
+
|
| 70 |
+
// State for last fetch to throttle spectrogram updates
|
| 71 |
+
let lastSpecFetchTime = 0;
|
| 72 |
+
const SPEC_FETCH_INTERVAL = 100; // Update spectrogram every 100ms (10fps)
|
| 73 |
+
const DRAW_INTERVAL_MS = 33; // Throttle full redraw to ~30fps
|
| 74 |
+
let lastDrawTime = 0;
|
| 75 |
+
|
| 76 |
+
// --- Cached layout dimensions (updated via ResizeObserver, NOT per-frame) ---
|
| 77 |
+
// Avoids getBoundingClientRect() every animation frame which forces synchronous
|
| 78 |
+
// layout reflow and was the #1 perf bottleneck (1.5s layout-shift clusters).
|
| 79 |
+
let cachedPhysicalWidth = 0;
|
| 80 |
+
let cachedPhysicalHeight = 0;
|
| 81 |
+
let cachedDpr = window.devicePixelRatio || 1;
|
| 82 |
+
let resizeObserver: ResizeObserver | null = null;
|
| 83 |
+
let dprMediaQuery: MediaQueryList | null = null;
|
| 84 |
+
|
| 85 |
+
/** Recompute physical canvas dimensions from cached logical size + DPR. */
|
| 86 |
+
const updateCanvasDimensions = (logicalW: number, logicalH: number) => {
|
| 87 |
+
cachedDpr = window.devicePixelRatio || 1;
|
| 88 |
+
cachedPhysicalWidth = Math.floor(logicalW * cachedDpr);
|
| 89 |
+
cachedPhysicalHeight = Math.floor(logicalH * cachedDpr);
|
| 90 |
+
|
| 91 |
+
// Resize canvases immediately so next frame uses correct size
|
| 92 |
+
if (canvasRef && (canvasRef.width !== cachedPhysicalWidth || canvasRef.height !== cachedPhysicalHeight)) {
|
| 93 |
+
canvasRef.width = cachedPhysicalWidth;
|
| 94 |
+
canvasRef.height = cachedPhysicalHeight;
|
| 95 |
+
}
|
| 96 |
+
if (specCanvas && (specCanvas.width !== cachedPhysicalWidth || specCanvas.height !== cachedPhysicalHeight)) {
|
| 97 |
+
specCanvas.width = cachedPhysicalWidth;
|
| 98 |
+
specCanvas.height = cachedPhysicalHeight;
|
| 99 |
+
}
|
| 100 |
+
};
|
| 101 |
+
|
| 102 |
+
// --- Pre-allocated ImageData for spectrogram rendering ---
|
| 103 |
+
// Avoids creating a new ImageData object every spectrogram draw (~10fps),
|
| 104 |
+
// which caused GC pressure from large short-lived allocations.
|
| 105 |
+
let cachedSpecImgData: ImageData | null = null;
|
| 106 |
+
let cachedSpecImgWidth = 0;
|
| 107 |
+
let cachedSpecImgHeight = 0;
|
| 108 |
+
|
| 109 |
+
// --- Pre-allocated waveform read buffer ---
|
| 110 |
+
// Avoids allocating a new Float32Array(~128000) every animation frame.
|
| 111 |
+
// Grows only when the required size exceeds current capacity.
|
| 112 |
+
let waveformReadBuf: Float32Array | null = null;
|
| 113 |
+
|
| 114 |
+
// Store spectrogram data with its time alignment
|
| 115 |
+
let cachedSpecData: {
|
| 116 |
+
features: Float32Array;
|
| 117 |
+
melBins: number;
|
| 118 |
+
timeSteps: number;
|
| 119 |
+
startTime: number;
|
| 120 |
+
endTime: number;
|
| 121 |
+
} | null = null;
|
| 122 |
+
|
| 123 |
+
onMount(() => {
|
| 124 |
+
if (canvasRef) {
|
| 125 |
+
ctx = canvasRef.getContext('2d', { alpha: false });
|
| 126 |
+
|
| 127 |
+
// Use ResizeObserver to cache dimensions instead of per-frame getBoundingClientRect
|
| 128 |
+
resizeObserver = new ResizeObserver((entries) => {
|
| 129 |
+
for (const entry of entries) {
|
| 130 |
+
// contentRect gives CSS-pixel (logical) dimensions without forcing layout
|
| 131 |
+
const cr = entry.contentRect;
|
| 132 |
+
updateCanvasDimensions(cr.width, cr.height);
|
| 133 |
+
}
|
| 134 |
+
});
|
| 135 |
+
resizeObserver.observe(canvasRef);
|
| 136 |
+
|
| 137 |
+
// Watch for DPR changes (browser zoom, display change)
|
| 138 |
+
const setupDprWatch = () => {
|
| 139 |
+
dprMediaQuery = window.matchMedia(`(resolution: ${window.devicePixelRatio}dppx)`);
|
| 140 |
+
const onDprChange = () => {
|
| 141 |
+
if (canvasRef) {
|
| 142 |
+
const rect = canvasRef.getBoundingClientRect(); // one-time on zoom change only
|
| 143 |
+
updateCanvasDimensions(rect.width, rect.height);
|
| 144 |
+
}
|
| 145 |
+
// Re-register for the next change at the new DPR
|
| 146 |
+
setupDprWatch();
|
| 147 |
+
};
|
| 148 |
+
dprMediaQuery.addEventListener('change', onDprChange, { once: true });
|
| 149 |
+
};
|
| 150 |
+
setupDprWatch();
|
| 151 |
+
|
| 152 |
+
// Initial dimensions (one-time)
|
| 153 |
+
const rect = canvasRef.getBoundingClientRect();
|
| 154 |
+
updateCanvasDimensions(rect.width, rect.height);
|
| 155 |
+
}
|
| 156 |
+
|
| 157 |
+
// Create offscreen canvas
|
| 158 |
+
specCanvas = document.createElement('canvas');
|
| 159 |
+
specCtx = specCanvas.getContext('2d', { alpha: false });
|
| 160 |
+
|
| 161 |
+
loop();
|
| 162 |
+
});
|
| 163 |
+
|
| 164 |
+
onCleanup(() => {
|
| 165 |
+
cancelAnimationFrame(animationFrameId);
|
| 166 |
+
if (resizeObserver) {
|
| 167 |
+
resizeObserver.disconnect();
|
| 168 |
+
resizeObserver = null;
|
| 169 |
+
}
|
| 170 |
+
});
|
| 171 |
+
|
| 172 |
+
const loop = (now: number = performance.now()) => {
|
| 173 |
+
if (!ctx || !canvasRef || !props.audioEngine) {
|
| 174 |
+
animationFrameId = requestAnimationFrame(loop);
|
| 175 |
+
return;
|
| 176 |
+
}
|
| 177 |
+
|
| 178 |
+
if (now - lastDrawTime < DRAW_INTERVAL_MS) {
|
| 179 |
+
animationFrameId = requestAnimationFrame(loop);
|
| 180 |
+
return;
|
| 181 |
+
}
|
| 182 |
+
lastDrawTime = now;
|
| 183 |
+
|
| 184 |
+
// Use cached dimensions (updated by ResizeObserver / DPR watcher)
|
| 185 |
+
const dpr = cachedDpr;
|
| 186 |
+
const width = cachedPhysicalWidth;
|
| 187 |
+
const height = cachedPhysicalHeight;
|
| 188 |
+
|
| 189 |
+
if (width === 0 || height === 0) {
|
| 190 |
+
animationFrameId = requestAnimationFrame(loop);
|
| 191 |
+
return;
|
| 192 |
+
}
|
| 193 |
+
|
| 194 |
+
// Colors
|
| 195 |
+
const bgColor = '#0f172a';
|
| 196 |
+
ctx.fillStyle = bgColor;
|
| 197 |
+
ctx.fillRect(0, 0, width, height);
|
| 198 |
+
|
| 199 |
+
const ringBuffer = props.audioEngine.getRingBuffer();
|
| 200 |
+
const currentTime = ringBuffer.getCurrentTime();
|
| 201 |
+
const duration = getWindowDuration();
|
| 202 |
+
const startTime = currentTime - duration;
|
| 203 |
+
const sampleRate = ringBuffer.sampleRate;
|
| 204 |
+
|
| 205 |
+
// Layout:
|
| 206 |
+
// Top 55%: Spectrogram
|
| 207 |
+
// Middle 35%: Waveform
|
| 208 |
+
// Bottom 10%: VAD signal
|
| 209 |
+
const specHeight = Math.floor(height * 0.55);
|
| 210 |
+
const waveHeight = Math.floor(height * 0.35);
|
| 211 |
+
const vadHeight = height - specHeight - waveHeight;
|
| 212 |
+
const waveY = specHeight;
|
| 213 |
+
const vadY = specHeight + waveHeight;
|
| 214 |
+
|
| 215 |
+
// 1. Spectrogram (async fetch with stored alignment)
|
| 216 |
+
if (props.melClient && specCtx && specCanvas) {
|
| 217 |
+
if (now - lastSpecFetchTime > SPEC_FETCH_INTERVAL) {
|
| 218 |
+
lastSpecFetchTime = now;
|
| 219 |
+
|
| 220 |
+
const fetchStartSample = Math.round(startTime * sampleRate);
|
| 221 |
+
const fetchEndSample = Math.round(currentTime * sampleRate);
|
| 222 |
+
|
| 223 |
+
// Request RAW (unnormalized) features for fixed dB scaling.
|
| 224 |
+
// ASR transcription still uses normalized features (default).
|
| 225 |
+
props.melClient.getFeatures(fetchStartSample, fetchEndSample, false).then(features => {
|
| 226 |
+
if (features && specCtx && specCanvas) {
|
| 227 |
+
// Store with time alignment info
|
| 228 |
+
cachedSpecData = {
|
| 229 |
+
features: features.features,
|
| 230 |
+
melBins: features.melBins,
|
| 231 |
+
timeSteps: features.T,
|
| 232 |
+
startTime: startTime,
|
| 233 |
+
endTime: currentTime
|
| 234 |
+
};
|
| 235 |
+
drawSpectrogramToCanvas(specCtx, features.features, features.melBins, features.T, width, specHeight);
|
| 236 |
+
}
|
| 237 |
+
}).catch(() => { });
|
| 238 |
+
}
|
| 239 |
+
|
| 240 |
+
// Draw cached spectrogram aligned to current view
|
| 241 |
+
if (cachedSpecData && cachedSpecData.timeSteps > 0) {
|
| 242 |
+
// Calculate offset to align cached data with current time window
|
| 243 |
+
const cachedDuration = cachedSpecData.endTime - cachedSpecData.startTime;
|
| 244 |
+
const timeOffset = startTime - cachedSpecData.startTime;
|
| 245 |
+
const offsetX = Math.floor((timeOffset / cachedDuration) * width);
|
| 246 |
+
|
| 247 |
+
// Draw the portion of cached spectrogram that's still visible
|
| 248 |
+
ctx.drawImage(specCanvas, offsetX, 0, width - offsetX, specHeight, 0, 0, width - offsetX, specHeight);
|
| 249 |
+
}
|
| 250 |
+
}
|
| 251 |
+
|
| 252 |
+
// 2. Waveform (sync with current time window, zero-allocation read)
|
| 253 |
+
try {
|
| 254 |
+
const startSample = Math.floor(startTime * sampleRate);
|
| 255 |
+
const endSample = Math.floor(currentTime * sampleRate);
|
| 256 |
+
const neededLen = endSample - startSample;
|
| 257 |
+
|
| 258 |
+
const baseFrame = ringBuffer.getBaseFrameOffset();
|
| 259 |
+
if (startSample >= baseFrame && neededLen > 0) {
|
| 260 |
+
// Use readInto if available (zero-alloc), fall back to read()
|
| 261 |
+
if (ringBuffer.readInto) {
|
| 262 |
+
// Grow the pre-allocated buffer only when capacity is insufficient
|
| 263 |
+
if (!waveformReadBuf || waveformReadBuf.length < neededLen) {
|
| 264 |
+
waveformReadBuf = new Float32Array(neededLen);
|
| 265 |
+
}
|
| 266 |
+
const written = ringBuffer.readInto(startSample, endSample, waveformReadBuf);
|
| 267 |
+
// Pass a subarray view (no copy) of the exact length
|
| 268 |
+
drawWaveform(ctx, waveformReadBuf.subarray(0, written), width, waveHeight, waveY);
|
| 269 |
+
} else {
|
| 270 |
+
const audioData = ringBuffer.read(startSample, endSample);
|
| 271 |
+
drawWaveform(ctx, audioData, width, waveHeight, waveY);
|
| 272 |
+
}
|
| 273 |
+
}
|
| 274 |
+
} catch (e) {
|
| 275 |
+
// Data likely overwritten or not available
|
| 276 |
+
}
|
| 277 |
+
|
| 278 |
+
// 3. VAD Signal Layer
|
| 279 |
+
drawVadLayer(ctx, width, vadHeight, vadY, startTime, duration, dpr);
|
| 280 |
+
|
| 281 |
+
// 4. Overlay (time labels, trigger line)
|
| 282 |
+
drawOverlay(ctx, width, height, startTime, duration, dpr);
|
| 283 |
+
|
| 284 |
+
animationFrameId = requestAnimationFrame(loop);
|
| 285 |
+
};
|
| 286 |
+
|
| 287 |
+
const drawSpectrogramToCanvas = (
|
| 288 |
+
ctx: CanvasRenderingContext2D,
|
| 289 |
+
features: Float32Array,
|
| 290 |
+
melBins: number,
|
| 291 |
+
timeSteps: number,
|
| 292 |
+
width: number,
|
| 293 |
+
height: number
|
| 294 |
+
) => {
|
| 295 |
+
// features layout: [melBins, T] (mel-major, flattened from [mel, time])
|
| 296 |
+
// So features[m * timeSteps + t].
|
| 297 |
+
|
| 298 |
+
if (timeSteps === 0) return;
|
| 299 |
+
|
| 300 |
+
// Reuse cached ImageData if dimensions match; allocate only on size change
|
| 301 |
+
if (!cachedSpecImgData || cachedSpecImgWidth !== width || cachedSpecImgHeight !== height) {
|
| 302 |
+
cachedSpecImgData = ctx.createImageData(width, height);
|
| 303 |
+
cachedSpecImgWidth = width;
|
| 304 |
+
cachedSpecImgHeight = height;
|
| 305 |
+
}
|
| 306 |
+
const imgData = cachedSpecImgData;
|
| 307 |
+
const data = imgData.data;
|
| 308 |
+
|
| 309 |
+
// Scaling factors
|
| 310 |
+
const timeScale = timeSteps / width;
|
| 311 |
+
const freqScale = melBins / height;
|
| 312 |
+
|
| 313 |
+
for (let x = 0; x < width; x++) {
|
| 314 |
+
const t = Math.floor(x * timeScale);
|
| 315 |
+
if (t >= timeSteps) break;
|
| 316 |
+
|
| 317 |
+
for (let y = 0; y < height; y++) {
|
| 318 |
+
// y=0 is top (high freq), y=height is bottom (low freq).
|
| 319 |
+
const m = Math.floor((height - 1 - y) * freqScale);
|
| 320 |
+
if (m >= melBins) continue;
|
| 321 |
+
|
| 322 |
+
const val = features[m * timeSteps + t];
|
| 323 |
+
const clamped = normalizeMelForDisplay(val);
|
| 324 |
+
const lutIdx = (clamped * 255) | 0;
|
| 325 |
+
const lutBase = lutIdx * 3;
|
| 326 |
+
|
| 327 |
+
const idx = (y * width + x) * 4;
|
| 328 |
+
data[idx] = COLORMAP_LUT[lutBase];
|
| 329 |
+
data[idx + 1] = COLORMAP_LUT[lutBase + 1];
|
| 330 |
+
data[idx + 2] = COLORMAP_LUT[lutBase + 2];
|
| 331 |
+
data[idx + 3] = 255;
|
| 332 |
+
}
|
| 333 |
+
}
|
| 334 |
+
ctx.putImageData(imgData, 0, 0);
|
| 335 |
+
};
|
| 336 |
+
|
| 337 |
+
// Use gain 1 so waveform shows true amplitude (float32 in [-1,1] fills half-height).
|
| 338 |
+
// No display amplification; ASR pipeline is unchanged.
|
| 339 |
+
const WAVEFORM_GAIN = 1;
|
| 340 |
+
|
| 341 |
+
const drawWaveform = (ctx: CanvasRenderingContext2D, data: Float32Array, width: number, height: number, offsetY: number) => {
|
| 342 |
+
if (data.length === 0) return;
|
| 343 |
+
|
| 344 |
+
const step = Math.ceil(data.length / width);
|
| 345 |
+
const amp = (height / 2) * WAVEFORM_GAIN;
|
| 346 |
+
const centerY = offsetY + height / 2;
|
| 347 |
+
|
| 348 |
+
ctx.strokeStyle = '#4ade80'; // Green
|
| 349 |
+
ctx.lineWidth = 1;
|
| 350 |
+
ctx.beginPath();
|
| 351 |
+
|
| 352 |
+
for (let x = 0; x < width; x++) {
|
| 353 |
+
const startIdx = x * step;
|
| 354 |
+
const endIdx = Math.min((x + 1) * step, data.length);
|
| 355 |
+
|
| 356 |
+
let min = 1;
|
| 357 |
+
let max = -1;
|
| 358 |
+
let hasData = false;
|
| 359 |
+
|
| 360 |
+
for (let i = startIdx; i < endIdx; i += Math.max(1, Math.floor((endIdx - startIdx) / 10))) {
|
| 361 |
+
const s = data[i];
|
| 362 |
+
if (s < min) min = s;
|
| 363 |
+
if (s > max) max = s;
|
| 364 |
+
hasData = true;
|
| 365 |
+
}
|
| 366 |
+
|
| 367 |
+
if (hasData) {
|
| 368 |
+
const yMin = centerY - min * amp;
|
| 369 |
+
const yMax = centerY - max * amp;
|
| 370 |
+
ctx.moveTo(x, Math.max(offsetY, Math.min(offsetY + height, yMin)));
|
| 371 |
+
ctx.lineTo(x, Math.max(offsetY, Math.min(offsetY + height, yMax)));
|
| 372 |
+
}
|
| 373 |
+
}
|
| 374 |
+
ctx.stroke();
|
| 375 |
+
};
|
| 376 |
+
|
| 377 |
+
const drawVadLayer = (ctx: CanvasRenderingContext2D, width: number, height: number, offsetY: number, startTime: number, duration: number, dpr: number) => {
|
| 378 |
+
// Draw VAD state as a colored bar
|
| 379 |
+
// For now, just show current VAD state as a solid bar (could be enhanced with historical data)
|
| 380 |
+
const vadState = appStore.vadState();
|
| 381 |
+
const isSpeech = vadState.isSpeech;
|
| 382 |
+
|
| 383 |
+
// Background
|
| 384 |
+
ctx.fillStyle = isSpeech ? 'rgba(249, 115, 22, 0.4)' : 'rgba(100, 116, 139, 0.2)'; // Orange when speech, slate when silence
|
| 385 |
+
ctx.fillRect(0, offsetY, width, height);
|
| 386 |
+
|
| 387 |
+
// If energy-based detection is active, show energy level as a bar
|
| 388 |
+
const energyLevel = appStore.audioLevel();
|
| 389 |
+
const energyThreshold = appStore.energyThreshold();
|
| 390 |
+
|
| 391 |
+
if (energyLevel > 0) {
|
| 392 |
+
const barWidth = Math.min(width, width * (energyLevel / 0.3)); // Scale to max 30% energy
|
| 393 |
+
ctx.fillStyle = energyLevel > energyThreshold ? 'rgba(249, 115, 22, 0.8)' : 'rgba(74, 222, 128, 0.6)';
|
| 394 |
+
ctx.fillRect(width - barWidth, offsetY, barWidth, height);
|
| 395 |
+
}
|
| 396 |
+
|
| 397 |
+
// Draw a thin separator line at top
|
| 398 |
+
ctx.strokeStyle = 'rgba(148, 163, 184, 0.3)';
|
| 399 |
+
ctx.lineWidth = 1 * dpr;
|
| 400 |
+
ctx.beginPath();
|
| 401 |
+
ctx.moveTo(0, offsetY);
|
| 402 |
+
ctx.lineTo(width, offsetY);
|
| 403 |
+
ctx.stroke();
|
| 404 |
+
|
| 405 |
+
// Label
|
| 406 |
+
ctx.fillStyle = isSpeech ? '#fb923c' : '#64748b';
|
| 407 |
+
ctx.font = `${8 * dpr}px monospace`;
|
| 408 |
+
ctx.fillText(isSpeech ? 'SPEECH' : 'SILENCE', 4 * dpr, offsetY + height - 2 * dpr);
|
| 409 |
+
};
|
| 410 |
+
|
| 411 |
+
const drawOverlay = (ctx: CanvasRenderingContext2D, width: number, height: number, startTime: number, duration: number, dpr: number) => {
|
| 412 |
+
// Draw Trigger line (1.5s from right) if in V3 mode
|
| 413 |
+
const triggerX = width - (1.5 / duration) * width;
|
| 414 |
+
ctx.strokeStyle = 'rgba(255, 255, 0, 0.5)';
|
| 415 |
+
ctx.lineWidth = 1 * dpr;
|
| 416 |
+
ctx.beginPath();
|
| 417 |
+
ctx.moveTo(triggerX, 0);
|
| 418 |
+
ctx.lineTo(triggerX, height);
|
| 419 |
+
ctx.stroke();
|
| 420 |
+
|
| 421 |
+
// Time labels
|
| 422 |
+
ctx.fillStyle = '#94a3b8';
|
| 423 |
+
ctx.font = `${10 * dpr}px monospace`;
|
| 424 |
+
for (let i = 0; i <= 8; i += 2) {
|
| 425 |
+
const t = i;
|
| 426 |
+
const x = width - (t / duration) * width;
|
| 427 |
+
ctx.fillText(`-${t}s`, x + 3 * dpr, height - 6 * dpr);
|
| 428 |
+
}
|
| 429 |
+
};
|
| 430 |
+
|
| 431 |
+
return (
|
| 432 |
+
<div
|
| 433 |
+
class="relative w-full bg-slate-900 rounded border border-slate-700 overflow-hidden shadow-inner"
|
| 434 |
+
style={{ height: `${props.height || 200}px` }}
|
| 435 |
+
>
|
| 436 |
+
<canvas ref={canvasRef} class="w-full h-full block" />
|
| 437 |
+
<div class="absolute top-2 left-2 text-[10px] text-slate-400 pointer-events-none">
|
| 438 |
+
SPECTROGRAM + WAVEFORM ({getWindowDuration()}s)
|
| 439 |
+
</div>
|
| 440 |
+
</div>
|
| 441 |
+
);
|
| 442 |
+
};
|
src/components/ModelLoadingOverlay.tsx
ADDED
|
@@ -0,0 +1,212 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import { Component, Show, For, createEffect } from 'solid-js';
|
| 2 |
+
|
| 3 |
+
interface ModelLoadingOverlayProps {
|
| 4 |
+
isVisible: boolean;
|
| 5 |
+
progress: number;
|
| 6 |
+
message: string;
|
| 7 |
+
file?: string;
|
| 8 |
+
backend: 'webgpu' | 'wasm';
|
| 9 |
+
state: 'unloaded' | 'loading' | 'ready' | 'error';
|
| 10 |
+
selectedModelId: string;
|
| 11 |
+
onModelSelect: (id: string) => void;
|
| 12 |
+
onStart: () => void;
|
| 13 |
+
onLocalLoad: (files: FileList) => void;
|
| 14 |
+
onClose?: () => void;
|
| 15 |
+
}
|
| 16 |
+
|
| 17 |
+
export const MODELS = [
|
| 18 |
+
{ id: 'parakeet-tdt-0.6b-v2', name: 'Parakeet v2', desc: 'English optimized' },
|
| 19 |
+
{ id: 'parakeet-tdt-0.6b-v3', name: 'Parakeet v3', desc: 'Multilingual Streaming' },
|
| 20 |
+
];
|
| 21 |
+
|
| 22 |
+
export function getModelDisplayName(id: string): string {
|
| 23 |
+
return (MODELS.find((m) => m.id === id)?.name ?? id) || 'Unknown model';
|
| 24 |
+
}
|
| 25 |
+
|
| 26 |
+
export const ModelLoadingOverlay: Component<ModelLoadingOverlayProps> = (props) => {
|
| 27 |
+
const progressWidth = () => `${Math.max(0, Math.min(100, props.progress))}%`;
|
| 28 |
+
let fileInput: HTMLInputElement | undefined;
|
| 29 |
+
|
| 30 |
+
const handleFileChange = (e: Event) => {
|
| 31 |
+
const files = (e.target as HTMLInputElement).files;
|
| 32 |
+
if (files && files.length > 0) {
|
| 33 |
+
props.onLocalLoad(files);
|
| 34 |
+
}
|
| 35 |
+
};
|
| 36 |
+
|
| 37 |
+
const handleClose = () => props.onClose?.();
|
| 38 |
+
|
| 39 |
+
createEffect(() => {
|
| 40 |
+
if (!props.isVisible || !props.onClose) return;
|
| 41 |
+
const handler = (e: KeyboardEvent) => {
|
| 42 |
+
if (e.key === 'Escape') {
|
| 43 |
+
e.preventDefault();
|
| 44 |
+
props.onClose?.();
|
| 45 |
+
}
|
| 46 |
+
};
|
| 47 |
+
document.addEventListener('keydown', handler);
|
| 48 |
+
return () => document.removeEventListener('keydown', handler);
|
| 49 |
+
});
|
| 50 |
+
|
| 51 |
+
return (
|
| 52 |
+
<Show when={props.isVisible}>
|
| 53 |
+
<div
|
| 54 |
+
class="fixed inset-0 z-50 flex items-center justify-center bg-[var(--color-earthy-dark-brown)]/30 backdrop-blur-sm"
|
| 55 |
+
role="dialog"
|
| 56 |
+
aria-modal="true"
|
| 57 |
+
aria-labelledby="model-overlay-title"
|
| 58 |
+
onClick={(e) => e.target === e.currentTarget && handleClose()}
|
| 59 |
+
>
|
| 60 |
+
<input
|
| 61 |
+
type="file"
|
| 62 |
+
multiple
|
| 63 |
+
ref={fileInput}
|
| 64 |
+
class="hidden"
|
| 65 |
+
onChange={handleFileChange}
|
| 66 |
+
/>
|
| 67 |
+
|
| 68 |
+
<div class="w-full max-w-lg mx-4">
|
| 69 |
+
<div class="relative nm-flat rounded-[40px] overflow-hidden transition-all duration-300 animate-in fade-in slide-in-from-bottom-4">
|
| 70 |
+
{/* Close Button - show whenever onClose is provided so user can dismiss in any state */}
|
| 71 |
+
<Show when={props.onClose}>
|
| 72 |
+
<button
|
| 73 |
+
type="button"
|
| 74 |
+
onClick={handleClose}
|
| 75 |
+
class="absolute top-8 right-8 neu-square-btn text-[var(--color-earthy-soft-brown)] hover:text-[var(--color-earthy-coral)] transition-all z-10"
|
| 76 |
+
aria-label="Close"
|
| 77 |
+
>
|
| 78 |
+
<span class="material-symbols-outlined text-xl">close</span>
|
| 79 |
+
</button>
|
| 80 |
+
</Show>
|
| 81 |
+
|
| 82 |
+
{/* Header */}
|
| 83 |
+
<div class="p-10 pb-6 text-center">
|
| 84 |
+
<div class="w-20 h-20 mx-auto mb-8 rounded-[32px] nm-inset flex items-center justify-center">
|
| 85 |
+
<Show
|
| 86 |
+
when={props.state !== 'error'}
|
| 87 |
+
fallback={<span class="material-symbols-outlined text-[var(--color-earthy-coral)] text-4xl">warning</span>}
|
| 88 |
+
>
|
| 89 |
+
<span class={`material-symbols-outlined text-[var(--color-earthy-muted-green)] text-4xl ${props.state === 'loading' ? 'animate-pulse' : ''}`}>
|
| 90 |
+
{props.state === 'loading' ? 'downloading' : 'neurology'}
|
| 91 |
+
</span>
|
| 92 |
+
</Show>
|
| 93 |
+
</div>
|
| 94 |
+
|
| 95 |
+
<h2 id="model-overlay-title" class="text-3xl font-extrabold text-[var(--color-earthy-dark-brown)] tracking-tight">
|
| 96 |
+
{props.state === 'unloaded' ? 'Engine Selection' :
|
| 97 |
+
props.state === 'error' ? 'Loading Failed' : 'Model Installation'}
|
| 98 |
+
</h2>
|
| 99 |
+
|
| 100 |
+
<p class="text-sm text-[var(--color-earthy-soft-brown)] font-medium mt-3 px-10">
|
| 101 |
+
{props.state === 'unloaded' ? 'Select the AI engine for this transcription session.' : props.message}
|
| 102 |
+
</p>
|
| 103 |
+
</div>
|
| 104 |
+
|
| 105 |
+
{/* Content */}
|
| 106 |
+
<div class="px-10 pb-10">
|
| 107 |
+
<Show when={props.state === 'unloaded'}>
|
| 108 |
+
<div class="space-y-4">
|
| 109 |
+
<div class="grid gap-4">
|
| 110 |
+
<For each={MODELS}>
|
| 111 |
+
{(model) => (
|
| 112 |
+
<button
|
| 113 |
+
onClick={() => props.onModelSelect(model.id)}
|
| 114 |
+
class={`flex items-center text-left p-6 rounded-3xl transition-all ${props.selectedModelId === model.id
|
| 115 |
+
? 'nm-inset text-[var(--color-earthy-muted-green)] ring-2 ring-[var(--color-earthy-muted-green)]/20'
|
| 116 |
+
: 'nm-flat text-[var(--color-earthy-dark-brown)] hover:shadow-neu-btn-hover'
|
| 117 |
+
}`}
|
| 118 |
+
>
|
| 119 |
+
<div class={`w-6 h-6 rounded-full nm-inset mr-5 flex flex-none items-center justify-center ${props.selectedModelId === model.id ? 'text-[var(--color-earthy-muted-green)]' : 'text-[var(--color-earthy-sage)]'
|
| 120 |
+
}`}>
|
| 121 |
+
<Show when={props.selectedModelId === model.id}>
|
| 122 |
+
<div class="w-2.5 h-2.5 bg-[var(--color-earthy-muted-green)] rounded-full shadow-[0_0_8px_var(--color-earthy-muted-green)]" />
|
| 123 |
+
</Show>
|
| 124 |
+
</div>
|
| 125 |
+
<div>
|
| 126 |
+
<div class="font-bold text-lg leading-tight">{model.name}</div>
|
| 127 |
+
<div class="text-[10px] font-black opacity-40 uppercase tracking-widest mt-1">{model.desc}</div>
|
| 128 |
+
</div>
|
| 129 |
+
</button>
|
| 130 |
+
)}
|
| 131 |
+
</For>
|
| 132 |
+
|
| 133 |
+
<button
|
| 134 |
+
onClick={() => fileInput?.click()}
|
| 135 |
+
class="flex items-center text-left p-6 rounded-3xl nm-flat opacity-70 hover:opacity-100 transition-all hover:shadow-neu-btn-hover"
|
| 136 |
+
>
|
| 137 |
+
<div class="w-10 h-10 rounded-2xl nm-inset flex items-center justify-center mr-5">
|
| 138 |
+
<span class="material-symbols-outlined text-[var(--color-earthy-soft-brown)] text-xl">file_open</span>
|
| 139 |
+
</div>
|
| 140 |
+
<div>
|
| 141 |
+
<div class="font-bold text-lg leading-tight">Local Model</div>
|
| 142 |
+
<div class="text-[10px] font-black opacity-40 uppercase tracking-widest mt-1">Load from disk</div>
|
| 143 |
+
</div>
|
| 144 |
+
</button>
|
| 145 |
+
</div>
|
| 146 |
+
|
| 147 |
+
<button
|
| 148 |
+
onClick={() => props.onStart()}
|
| 149 |
+
class="w-full mt-6 py-5 bg-[var(--color-earthy-muted-green)] text-white font-extrabold rounded-3xl shadow-xl active:scale-[0.98] transition-all uppercase tracking-widest text-xs"
|
| 150 |
+
>
|
| 151 |
+
Initialize AI Engine
|
| 152 |
+
</button>
|
| 153 |
+
</div>
|
| 154 |
+
</Show>
|
| 155 |
+
|
| 156 |
+
{/* Progress */}
|
| 157 |
+
<Show when={props.state === 'loading'}>
|
| 158 |
+
<div class="mt-4">
|
| 159 |
+
<div class="h-4 nm-inset rounded-full overflow-hidden p-1">
|
| 160 |
+
<div
|
| 161 |
+
class="h-full bg-[var(--color-earthy-muted-green)] rounded-full transition-all duration-300 ease-out shadow-[0_0_12px_var(--color-earthy-muted-green)]"
|
| 162 |
+
style={{ width: progressWidth() }}
|
| 163 |
+
/>
|
| 164 |
+
</div>
|
| 165 |
+
|
| 166 |
+
<div class="flex justify-between items-center mt-6 px-1">
|
| 167 |
+
<div class="flex flex-col">
|
| 168 |
+
<span class="text-[10px] font-black text-[var(--color-earthy-soft-brown)] uppercase tracking-widest leading-none mb-1">Downloaded</span>
|
| 169 |
+
<span class="text-[var(--color-earthy-muted-green)] font-black text-2xl">{props.progress}%</span>
|
| 170 |
+
</div>
|
| 171 |
+
<div class="flex flex-col text-right">
|
| 172 |
+
<span class="text-[10px] font-black text-[var(--color-earthy-soft-brown)] uppercase tracking-widest leading-none mb-1">Active File</span>
|
| 173 |
+
<span class="text-[var(--color-earthy-soft-brown)] font-bold text-[11px] truncate max-w-[200px]">
|
| 174 |
+
{props.file || 'Preparing assets...'}
|
| 175 |
+
</span>
|
| 176 |
+
</div>
|
| 177 |
+
</div>
|
| 178 |
+
</div>
|
| 179 |
+
</Show>
|
| 180 |
+
|
| 181 |
+
<Show when={props.state === 'error'}>
|
| 182 |
+
<div>
|
| 183 |
+
<button
|
| 184 |
+
onClick={() => props.onStart()}
|
| 185 |
+
class="w-full py-5 nm-flat text-[var(--color-earthy-coral)] font-black rounded-3xl shadow-none hover:opacity-90 transition-all"
|
| 186 |
+
>
|
| 187 |
+
Retry Connection
|
| 188 |
+
</button>
|
| 189 |
+
</div>
|
| 190 |
+
</Show>
|
| 191 |
+
</div>
|
| 192 |
+
|
| 193 |
+
{/* Footer */}
|
| 194 |
+
<div class="px-10 py-6 border-t border-[var(--color-earthy-sage)]/30 flex items-center justify-between opacity-80">
|
| 195 |
+
<div class="flex items-center gap-2">
|
| 196 |
+
<span class="material-symbols-outlined text-base text-[var(--color-earthy-soft-brown)]">offline_bolt</span>
|
| 197 |
+
<span class="text-[10px] font-black text-[var(--color-earthy-soft-brown)] uppercase tracking-widest">
|
| 198 |
+
{props.backend === 'webgpu' ? 'GPU Accelerated' : 'WASM Native'}
|
| 199 |
+
</span>
|
| 200 |
+
</div>
|
| 201 |
+
<span class="text-[10px] text-[var(--color-earthy-sage)] font-black tracking-widest">
|
| 202 |
+
PRIVACY SECURED
|
| 203 |
+
</span>
|
| 204 |
+
</div>
|
| 205 |
+
</div>
|
| 206 |
+
</div>
|
| 207 |
+
</div>
|
| 208 |
+
</Show>
|
| 209 |
+
);
|
| 210 |
+
};
|
| 211 |
+
|
| 212 |
+
|
src/components/PrivacyBadge.tsx
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import { Component } from 'solid-js';
|
| 2 |
+
|
| 3 |
+
export const PrivacyBadge: Component = () => {
|
| 4 |
+
return (
|
| 5 |
+
<div class="fixed bottom-16 right-8 z-30 group">
|
| 6 |
+
<div class="nm-flat rounded-full px-5 py-2.5 flex items-center gap-2 cursor-help transition-all hover:scale-105 active:scale-95 group-hover:bg-green-500/5">
|
| 7 |
+
<span class="material-icons-round text-green-500 text-sm shadow-[0_0_8px_rgba(34,197,94,0.4)]">shield</span>
|
| 8 |
+
<span class="text-[10px] font-black text-green-600 dark:text-green-400 tracking-widest uppercase">Private_Secure</span>
|
| 9 |
+
</div>
|
| 10 |
+
|
| 11 |
+
<div class="absolute bottom-full right-0 mb-6 w-64 p-5 nm-flat rounded-[28px] opacity-0 group-hover:opacity-100 pointer-events-none transition-all transform translate-y-4 group-hover:translate-y-0">
|
| 12 |
+
<h4 class="font-black text-xs mb-2 tracking-tight uppercase text-slate-700 dark:text-slate-200">Local_Vault_Secure</h4>
|
| 13 |
+
<p class="text-[11px] text-slate-500 leading-relaxed font-medium">
|
| 14 |
+
Your audio never leaves this device. All transcription and AI processing happens locally in your browser's WebGPU sandbox.
|
| 15 |
+
</p>
|
| 16 |
+
</div>
|
| 17 |
+
</div>
|
| 18 |
+
);
|
| 19 |
+
};
|
src/components/SettingsPanel.tsx
ADDED
|
@@ -0,0 +1,225 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import { Component, For, Show } from 'solid-js';
|
| 2 |
+
import { appStore } from '../stores/appStore';
|
| 3 |
+
import { getModelDisplayName, MODELS } from './ModelLoadingOverlay';
|
| 4 |
+
import type { AudioEngine } from '../lib/audio/types';
|
| 5 |
+
|
| 6 |
+
const formatInterval = (ms: number) => {
|
| 7 |
+
if (ms >= 1000) return `${(ms / 1000).toFixed(1)}s`;
|
| 8 |
+
return `${ms}ms`;
|
| 9 |
+
};
|
| 10 |
+
|
| 11 |
+
export type SettingsPanelSection = 'full' | 'audio' | 'model';
|
| 12 |
+
|
| 13 |
+
export interface SettingsContentProps {
|
| 14 |
+
/** When 'audio' or 'model', only that section is shown (e.g. hover on mic or load button). */
|
| 15 |
+
section?: SettingsPanelSection;
|
| 16 |
+
onClose: () => void;
|
| 17 |
+
onLoadModel: () => void;
|
| 18 |
+
onLocalLoad?: (files: FileList) => void;
|
| 19 |
+
onOpenDebug: () => void;
|
| 20 |
+
onDeviceSelect?: (id: string) => void;
|
| 21 |
+
audioEngine?: AudioEngine | null;
|
| 22 |
+
/** When true, panel expands upward (bar in lower half); content order is reversed so ASR model stays adjacent to the bar. */
|
| 23 |
+
expandUp?: () => boolean;
|
| 24 |
+
}
|
| 25 |
+
|
| 26 |
+
/** Embeddable settings form (e.g. inside floating bar expansion). */
|
| 27 |
+
export const SettingsContent: Component<SettingsContentProps> = (props) => {
|
| 28 |
+
const isV4 = () => appStore.transcriptionMode() === 'v4-utterance';
|
| 29 |
+
const isV3 = () => appStore.transcriptionMode() === 'v3-streaming';
|
| 30 |
+
|
| 31 |
+
const expandUp = () => props.expandUp?.() ?? false;
|
| 32 |
+
const section = () => props.section ?? 'full';
|
| 33 |
+
const showAsr = () => section() === 'full' || section() === 'model';
|
| 34 |
+
const showAudio = () => section() === 'full' || section() === 'audio';
|
| 35 |
+
const showSliders = () => section() === 'full';
|
| 36 |
+
const showDebug = () => section() === 'full';
|
| 37 |
+
|
| 38 |
+
return (
|
| 39 |
+
<div class="flex flex-col min-h-0">
|
| 40 |
+
<div
|
| 41 |
+
class="flex flex-col flex-1 min-h-0 overflow-y-auto p-3 gap-4 custom-scrollbar"
|
| 42 |
+
classList={{ 'flex-col-reverse': expandUp() }}
|
| 43 |
+
>
|
| 44 |
+
<Show when={showAsr()}>
|
| 45 |
+
<section class="space-y-2">
|
| 46 |
+
<h3 class="text-[10px] font-bold uppercase tracking-widest text-[var(--color-earthy-soft-brown)]">ASR model</h3>
|
| 47 |
+
<div class="flex items-center gap-2 flex-wrap">
|
| 48 |
+
<select
|
| 49 |
+
class="flex-1 min-w-0 text-sm bg-transparent border-b border-[var(--color-earthy-sage)]/40 px-0 py-1.5 text-[var(--color-earthy-dark-brown)] focus:outline-none focus:border-[var(--color-earthy-muted-green)]"
|
| 50 |
+
value={appStore.selectedModelId()}
|
| 51 |
+
onInput={(e) => appStore.setSelectedModelId((e.target as HTMLSelectElement).value)}
|
| 52 |
+
disabled={appStore.modelState() === 'loading'}
|
| 53 |
+
>
|
| 54 |
+
<For each={MODELS}>
|
| 55 |
+
{(m) => <option value={m.id}>{m.name}</option>}
|
| 56 |
+
</For>
|
| 57 |
+
</select>
|
| 58 |
+
<button
|
| 59 |
+
type="button"
|
| 60 |
+
onClick={props.onLoadModel}
|
| 61 |
+
disabled={appStore.modelState() === 'ready' || appStore.modelState() === 'loading'}
|
| 62 |
+
class="flex items-center gap-1.5 px-3 py-1.5 text-sm font-medium text-[var(--color-earthy-muted-green)] hover:bg-[var(--color-earthy-sage)]/20 transition-colors disabled:opacity-50 disabled:cursor-not-allowed shrink-0"
|
| 63 |
+
>
|
| 64 |
+
<span class="material-symbols-outlined text-lg">power_settings_new</span>
|
| 65 |
+
{appStore.modelState() === 'ready' ? 'Loaded' : appStore.modelState() === 'loading' ? '...' : 'Load'}
|
| 66 |
+
</button>
|
| 67 |
+
<Show when={props.onLocalLoad}>
|
| 68 |
+
<label class="flex items-center gap-1.5 px-3 py-1.5 text-sm font-medium text-[var(--color-earthy-soft-brown)] hover:bg-[var(--color-earthy-sage)]/20 transition-colors cursor-pointer shrink-0">
|
| 69 |
+
<span class="material-symbols-outlined text-lg">folder_open</span>
|
| 70 |
+
Load from file
|
| 71 |
+
<input
|
| 72 |
+
type="file"
|
| 73 |
+
multiple
|
| 74 |
+
class="hidden"
|
| 75 |
+
accept=".onnx,.bin"
|
| 76 |
+
onChange={(e) => {
|
| 77 |
+
const files = e.currentTarget.files;
|
| 78 |
+
if (files && files.length > 0) props.onLocalLoad?.(files);
|
| 79 |
+
e.currentTarget.value = '';
|
| 80 |
+
}}
|
| 81 |
+
/>
|
| 82 |
+
</label>
|
| 83 |
+
</Show>
|
| 84 |
+
</div>
|
| 85 |
+
<p class="text-xs text-[var(--color-earthy-soft-brown)]">
|
| 86 |
+
{appStore.modelState() === 'ready' ? getModelDisplayName(appStore.selectedModelId()) : appStore.modelState()}
|
| 87 |
+
</p>
|
| 88 |
+
<Show when={appStore.modelState() === 'loading'}>
|
| 89 |
+
<div class="space-y-1">
|
| 90 |
+
<div class="flex justify-between text-xs">
|
| 91 |
+
<span>{appStore.modelMessage()}</span>
|
| 92 |
+
<span class="font-mono text-[var(--color-earthy-muted-green)]">{Math.round(appStore.modelProgress())}%</span>
|
| 93 |
+
</div>
|
| 94 |
+
<div class="h-1.5 rounded-full overflow-hidden bg-[var(--color-earthy-sage)]/20">
|
| 95 |
+
<div
|
| 96 |
+
class="h-full bg-[var(--color-earthy-muted-green)] rounded-full transition-all duration-300"
|
| 97 |
+
style={{ width: `${Math.max(0, Math.min(100, appStore.modelProgress()))}%` }}
|
| 98 |
+
/>
|
| 99 |
+
</div>
|
| 100 |
+
</div>
|
| 101 |
+
</Show>
|
| 102 |
+
</section>
|
| 103 |
+
</Show>
|
| 104 |
+
|
| 105 |
+
<Show when={showAudio()}>
|
| 106 |
+
<section class="space-y-2">
|
| 107 |
+
<h3 class="text-[10px] font-bold uppercase tracking-widest text-[var(--color-earthy-soft-brown)]">Audio input</h3>
|
| 108 |
+
<select
|
| 109 |
+
class="w-full text-sm bg-transparent border-b border-[var(--color-earthy-sage)]/40 px-0 py-1.5 text-[var(--color-earthy-dark-brown)] focus:outline-none focus:border-[var(--color-earthy-muted-green)]"
|
| 110 |
+
value={appStore.selectedDeviceId()}
|
| 111 |
+
onInput={(e) => {
|
| 112 |
+
const id = (e.target as HTMLSelectElement).value;
|
| 113 |
+
appStore.setSelectedDeviceId(id);
|
| 114 |
+
props.onDeviceSelect?.(id);
|
| 115 |
+
}}
|
| 116 |
+
>
|
| 117 |
+
<For each={appStore.availableDevices()}>
|
| 118 |
+
{(device) => (
|
| 119 |
+
<option value={device.deviceId}>
|
| 120 |
+
{device.label || `Device ${device.deviceId.slice(0, 8)}`}
|
| 121 |
+
</option>
|
| 122 |
+
)}
|
| 123 |
+
</For>
|
| 124 |
+
</select>
|
| 125 |
+
</section>
|
| 126 |
+
</Show>
|
| 127 |
+
|
| 128 |
+
<Show when={showSliders()}>
|
| 129 |
+
<section class="grid grid-cols-2 gap-x-4 gap-y-3">
|
| 130 |
+
<div class="space-y-1.5 min-w-0">
|
| 131 |
+
<div class="flex justify-between items-center gap-2">
|
| 132 |
+
<span class="text-[10px] font-bold uppercase tracking-widest text-[var(--color-earthy-soft-brown)]">Energy threshold</span>
|
| 133 |
+
<span class="text-sm text-[var(--color-earthy-dark-brown)] tabular-nums shrink-0">{(appStore.energyThreshold() * 100).toFixed(1)}%</span>
|
| 134 |
+
</div>
|
| 135 |
+
<input
|
| 136 |
+
type="range" min="0.005" max="0.3" step="0.005"
|
| 137 |
+
value={appStore.energyThreshold()}
|
| 138 |
+
onInput={(e) => {
|
| 139 |
+
const val = parseFloat(e.currentTarget.value);
|
| 140 |
+
appStore.setEnergyThreshold(val);
|
| 141 |
+
props.audioEngine?.updateConfig({ energyThreshold: val });
|
| 142 |
+
}}
|
| 143 |
+
class="debug-slider w-full h-2 rounded-full appearance-none cursor-pointer bg-[var(--color-earthy-sage)]/30"
|
| 144 |
+
/>
|
| 145 |
+
</div>
|
| 146 |
+
|
| 147 |
+
<Show when={isV4()}>
|
| 148 |
+
<div class="space-y-1.5 min-w-0">
|
| 149 |
+
<div class="flex justify-between items-center gap-2">
|
| 150 |
+
<span class="text-[10px] font-bold uppercase tracking-widest text-[var(--color-earthy-soft-brown)]">VAD threshold</span>
|
| 151 |
+
<span class="text-sm text-[var(--color-earthy-dark-brown)] tabular-nums shrink-0">{(appStore.sileroThreshold() * 100).toFixed(0)}%</span>
|
| 152 |
+
</div>
|
| 153 |
+
<input
|
| 154 |
+
type="range" min="0.1" max="0.9" step="0.05"
|
| 155 |
+
value={appStore.sileroThreshold()}
|
| 156 |
+
onInput={(e) => appStore.setSileroThreshold(parseFloat(e.currentTarget.value))}
|
| 157 |
+
class="debug-slider w-full h-2 rounded-full appearance-none cursor-pointer bg-[var(--color-earthy-sage)]/30"
|
| 158 |
+
/>
|
| 159 |
+
</div>
|
| 160 |
+
<div class="space-y-1.5 min-w-0">
|
| 161 |
+
<div class="flex justify-between items-center gap-2">
|
| 162 |
+
<span class="text-[10px] font-bold uppercase tracking-widest text-[var(--color-earthy-soft-brown)]">Tick interval</span>
|
| 163 |
+
<span class="text-sm text-[var(--color-earthy-dark-brown)] tabular-nums shrink-0">{formatInterval(appStore.v4InferenceIntervalMs())}</span>
|
| 164 |
+
</div>
|
| 165 |
+
<input
|
| 166 |
+
type="range" min="160" max="8000" step="80"
|
| 167 |
+
value={appStore.v4InferenceIntervalMs()}
|
| 168 |
+
onInput={(e) => appStore.setV4InferenceIntervalMs(parseInt(e.currentTarget.value))}
|
| 169 |
+
class="debug-slider w-full h-2 rounded-full appearance-none cursor-pointer bg-[var(--color-earthy-sage)]/30"
|
| 170 |
+
/>
|
| 171 |
+
<div class="flex justify-between text-[9px] text-[var(--color-earthy-soft-brown)]">
|
| 172 |
+
<span>320ms</span>
|
| 173 |
+
<span>8.0s</span>
|
| 174 |
+
</div>
|
| 175 |
+
</div>
|
| 176 |
+
<div class="space-y-1.5 min-w-0">
|
| 177 |
+
<div class="flex justify-between items-center gap-2">
|
| 178 |
+
<span class="text-[10px] font-bold uppercase tracking-widest text-[var(--color-earthy-soft-brown)]">Silence flush</span>
|
| 179 |
+
<span class="text-sm text-[var(--color-earthy-dark-brown)] tabular-nums shrink-0">{appStore.v4SilenceFlushSec().toFixed(1)}s</span>
|
| 180 |
+
</div>
|
| 181 |
+
<input
|
| 182 |
+
type="range" min="0.3" max="5.0" step="0.1"
|
| 183 |
+
value={appStore.v4SilenceFlushSec()}
|
| 184 |
+
onInput={(e) => appStore.setV4SilenceFlushSec(parseFloat(e.currentTarget.value))}
|
| 185 |
+
class="debug-slider w-full h-2 rounded-full appearance-none cursor-pointer bg-[var(--color-earthy-sage)]/30"
|
| 186 |
+
/>
|
| 187 |
+
</div>
|
| 188 |
+
</Show>
|
| 189 |
+
|
| 190 |
+
<Show when={isV3()}>
|
| 191 |
+
<div class="space-y-1.5 min-w-0">
|
| 192 |
+
<div class="flex justify-between items-center gap-2">
|
| 193 |
+
<span class="text-[10px] font-bold uppercase tracking-widest text-[var(--color-earthy-soft-brown)]">Window</span>
|
| 194 |
+
<span class="text-sm text-[var(--color-earthy-dark-brown)] tabular-nums shrink-0">{appStore.streamingWindow().toFixed(1)}s</span>
|
| 195 |
+
</div>
|
| 196 |
+
<input
|
| 197 |
+
type="range" min="2.0" max="15.0" step="0.5"
|
| 198 |
+
value={appStore.streamingWindow()}
|
| 199 |
+
onInput={(e) => appStore.setStreamingWindow(parseFloat(e.currentTarget.value))}
|
| 200 |
+
class="debug-slider w-full h-2 rounded-full appearance-none cursor-pointer bg-[var(--color-earthy-sage)]/30"
|
| 201 |
+
/>
|
| 202 |
+
</div>
|
| 203 |
+
</Show>
|
| 204 |
+
</section>
|
| 205 |
+
</Show>
|
| 206 |
+
|
| 207 |
+
<Show when={showDebug()}>
|
| 208 |
+
<div class="pt-2">
|
| 209 |
+
<button
|
| 210 |
+
type="button"
|
| 211 |
+
onClick={() => {
|
| 212 |
+
props.onOpenDebug();
|
| 213 |
+
props.onClose();
|
| 214 |
+
}}
|
| 215 |
+
class="flex items-center gap-2 px-0 py-2 text-sm font-medium text-[var(--color-earthy-muted-green)] hover:opacity-80 transition-opacity w-full"
|
| 216 |
+
>
|
| 217 |
+
<span class="material-symbols-outlined text-lg">bug_report</span>
|
| 218 |
+
Open Debug panel
|
| 219 |
+
</button>
|
| 220 |
+
</div>
|
| 221 |
+
</Show>
|
| 222 |
+
</div>
|
| 223 |
+
</div>
|
| 224 |
+
);
|
| 225 |
+
};
|
src/components/Sidebar.tsx
ADDED
|
@@ -0,0 +1,142 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import { Component, For, Show, createEffect, createSignal, onCleanup } from 'solid-js';
|
| 2 |
+
|
| 3 |
+
interface SidebarProps {
|
| 4 |
+
activeTab: string;
|
| 5 |
+
onTabChange: (tab: string) => void;
|
| 6 |
+
// Recording controls
|
| 7 |
+
isRecording: boolean;
|
| 8 |
+
onToggleRecording: () => void;
|
| 9 |
+
// Model state
|
| 10 |
+
isModelReady: boolean;
|
| 11 |
+
onLoadModel: () => void;
|
| 12 |
+
modelState: string;
|
| 13 |
+
// Device selection
|
| 14 |
+
availableDevices: MediaDeviceInfo[];
|
| 15 |
+
selectedDeviceId: string;
|
| 16 |
+
onDeviceSelect: (id: string) => void;
|
| 17 |
+
// Audio feedback
|
| 18 |
+
audioLevel: number;
|
| 19 |
+
}
|
| 20 |
+
|
| 21 |
+
export const Sidebar: Component<SidebarProps> = (props) => {
|
| 22 |
+
const [showDevices, setShowDevices] = createSignal(false);
|
| 23 |
+
let triggerContainerRef: HTMLDivElement | undefined;
|
| 24 |
+
let popoverRef: HTMLDivElement | undefined;
|
| 25 |
+
|
| 26 |
+
createEffect(() => {
|
| 27 |
+
if (!showDevices()) return;
|
| 28 |
+
const onMouseDown = (e: MouseEvent) => {
|
| 29 |
+
const target = e.target as Node;
|
| 30 |
+
if (triggerContainerRef?.contains(target) || popoverRef?.contains(target)) return;
|
| 31 |
+
setShowDevices(false);
|
| 32 |
+
};
|
| 33 |
+
const onKeyDown = (e: KeyboardEvent) => {
|
| 34 |
+
if (e.key === 'Escape') setShowDevices(false);
|
| 35 |
+
};
|
| 36 |
+
document.addEventListener('mousedown', onMouseDown);
|
| 37 |
+
document.addEventListener('keydown', onKeyDown);
|
| 38 |
+
onCleanup(() => {
|
| 39 |
+
document.removeEventListener('mousedown', onMouseDown);
|
| 40 |
+
document.removeEventListener('keydown', onKeyDown);
|
| 41 |
+
});
|
| 42 |
+
});
|
| 43 |
+
|
| 44 |
+
return (
|
| 45 |
+
<aside class="w-20 min-w-[80px] bg-neu-bg flex flex-col items-center py-6 h-full border-r border-sidebar-border/30">
|
| 46 |
+
{/* Power Button - Reflects System Readiness; disabled when model already loaded or loading */}
|
| 47 |
+
<div class="mb-8 relative">
|
| 48 |
+
<button
|
| 49 |
+
onClick={() => !props.isModelReady && props.modelState !== 'loading' && props.onLoadModel()}
|
| 50 |
+
disabled={props.isModelReady || props.modelState === 'loading'}
|
| 51 |
+
class="neu-circle-btn text-slate-600 transition-all active:scale-95 disabled:opacity-70 disabled:cursor-not-allowed disabled:active:scale-100"
|
| 52 |
+
title={props.modelState === 'loading' ? "Loading..." : props.isModelReady ? "Model Loaded" : "Load Model"}
|
| 53 |
+
>
|
| 54 |
+
<span class="material-symbols-outlined text-xl">power_settings_new</span>
|
| 55 |
+
<span class={`status-led ${props.isModelReady ? 'bg-green-500 shadow-[0_0_8px_#22c55e]' : 'bg-slate-300'}`}></span>
|
| 56 |
+
</button>
|
| 57 |
+
</div>
|
| 58 |
+
|
| 59 |
+
<nav class="flex flex-col gap-6 items-center w-full px-2">
|
| 60 |
+
{/* Record Button - Always enabled, recording works even before model is loaded */}
|
| 61 |
+
<button
|
| 62 |
+
onClick={() => props.onToggleRecording()}
|
| 63 |
+
class={`neu-circle-btn transition-all active:scale-95 ${props.isRecording ? 'text-red-500 active' : 'text-slate-500'}`}
|
| 64 |
+
title={props.isRecording ? "Stop Recording" : "Start Recording"}
|
| 65 |
+
>
|
| 66 |
+
<span class="material-symbols-outlined text-xl">mic</span>
|
| 67 |
+
</button>
|
| 68 |
+
|
| 69 |
+
<div class="w-8 h-[1px] bg-slate-300/60 my-2"></div>
|
| 70 |
+
|
| 71 |
+
{/* Model Selection Icon */}
|
| 72 |
+
<button
|
| 73 |
+
onClick={() => props.onLoadModel()}
|
| 74 |
+
class={`neu-square-btn transition-all active:scale-95 ${props.activeTab === 'ai' ? 'active' : 'text-slate-500'}`}
|
| 75 |
+
title="AI Model Selection"
|
| 76 |
+
>
|
| 77 |
+
<span class="material-symbols-outlined text-xl">psychology</span>
|
| 78 |
+
</button>
|
| 79 |
+
|
| 80 |
+
{/* Device Selection Popover Trigger */}
|
| 81 |
+
<div class="relative" ref={(el) => { triggerContainerRef = el; }}>
|
| 82 |
+
<button
|
| 83 |
+
class={`neu-square-btn transition-all active:scale-95 ${showDevices() ? 'active' : 'text-slate-500'}`}
|
| 84 |
+
onClick={() => setShowDevices(!showDevices())}
|
| 85 |
+
title="Audio Input Selection"
|
| 86 |
+
>
|
| 87 |
+
<span class="material-symbols-outlined text-xl">settings_input_composite</span>
|
| 88 |
+
</button>
|
| 89 |
+
|
| 90 |
+
{/* Device Selection Popover */}
|
| 91 |
+
<Show when={showDevices()}>
|
| 92 |
+
<div ref={(el) => { popoverRef = el; }} class="absolute left-full bottom-0 ml-6 w-64 nm-flat rounded-[32px] p-4 z-50 animate-in fade-in slide-in-from-left-2 duration-200">
|
| 93 |
+
<div class="text-[9px] font-black text-slate-400 p-2 uppercase tracking-widest mb-2 border-b border-slate-200">Mechanical_Input</div>
|
| 94 |
+
<div class="flex flex-col gap-1 max-h-64 overflow-y-auto pr-1">
|
| 95 |
+
<For each={props.availableDevices}>
|
| 96 |
+
{(device) => (
|
| 97 |
+
<button
|
| 98 |
+
class={`w-full text-left px-4 py-3 rounded-2xl text-xs transition-all flex items-center gap-3 ${props.selectedDeviceId === device.deviceId
|
| 99 |
+
? 'nm-inset text-primary font-bold'
|
| 100 |
+
: 'text-slate-600 hover:nm-flat'
|
| 101 |
+
}`}
|
| 102 |
+
onClick={() => {
|
| 103 |
+
props.onDeviceSelect(device.deviceId);
|
| 104 |
+
setShowDevices(false);
|
| 105 |
+
}}
|
| 106 |
+
>
|
| 107 |
+
<span class="material-symbols-outlined text-lg opacity-40">mic</span>
|
| 108 |
+
<span class="truncate font-medium">{device.label || `Channel ${device.deviceId.slice(0, 4)}`}</span>
|
| 109 |
+
</button>
|
| 110 |
+
)}
|
| 111 |
+
</For>
|
| 112 |
+
</div>
|
| 113 |
+
</div>
|
| 114 |
+
</Show>
|
| 115 |
+
</div>
|
| 116 |
+
|
| 117 |
+
{/* Placeholder Items matching design */}
|
| 118 |
+
<button class="neu-square-btn text-slate-300 cursor-not-allowed" title="Translation (Pro)">
|
| 119 |
+
<span class="material-symbols-outlined text-xl">translate</span>
|
| 120 |
+
</button>
|
| 121 |
+
|
| 122 |
+
<button class="neu-square-btn text-slate-500" title="Export Transcript" onClick={() => (window as any).appStore?.copyTranscript()}>
|
| 123 |
+
<span class="material-symbols-outlined text-xl">download</span>
|
| 124 |
+
</button>
|
| 125 |
+
</nav>
|
| 126 |
+
|
| 127 |
+
<div class="mt-auto">
|
| 128 |
+
<button
|
| 129 |
+
class={`neu-square-btn transition-all active:scale-95 ${props.activeTab === 'settings' ? 'active' : 'text-slate-500'}`}
|
| 130 |
+
onClick={() => props.onTabChange('settings')}
|
| 131 |
+
title="Settings"
|
| 132 |
+
>
|
| 133 |
+
<span class="material-symbols-outlined text-xl">settings</span>
|
| 134 |
+
</button>
|
| 135 |
+
</div>
|
| 136 |
+
</aside>
|
| 137 |
+
);
|
| 138 |
+
};
|
| 139 |
+
|
| 140 |
+
export default Sidebar;
|
| 141 |
+
|
| 142 |
+
|
src/components/StatusBar.tsx
ADDED
|
@@ -0,0 +1,55 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import { Component, Show } from 'solid-js';
|
| 2 |
+
import { appStore } from '../stores/appStore';
|
| 3 |
+
|
| 4 |
+
export const StatusBar: Component = () => {
|
| 5 |
+
const modelStatusText = () => {
|
| 6 |
+
switch (appStore.modelState()) {
|
| 7 |
+
case 'unloaded': return 'Model not loaded';
|
| 8 |
+
case 'loading': return appStore.modelMessage() || `Loading... ${appStore.modelProgress()}%`;
|
| 9 |
+
case 'ready': return 'Ready';
|
| 10 |
+
case 'error': return 'Error';
|
| 11 |
+
default: return '';
|
| 12 |
+
}
|
| 13 |
+
};
|
| 14 |
+
|
| 15 |
+
const statusDotClass = () => {
|
| 16 |
+
switch (appStore.modelState()) {
|
| 17 |
+
case 'ready': return 'bg-green-500 shadow-[0_0_8px_rgba(34,197,94,0.6)]';
|
| 18 |
+
case 'loading': return 'bg-yellow-500 animate-pulse';
|
| 19 |
+
case 'error': return 'bg-red-500';
|
| 20 |
+
default: return 'bg-gray-400';
|
| 21 |
+
}
|
| 22 |
+
};
|
| 23 |
+
|
| 24 |
+
return (
|
| 25 |
+
<div class="flex-none h-10 nm-inset mx-4 mb-4 rounded-2xl px-6 flex items-center justify-between text-[10px] font-bold text-slate-400 uppercase tracking-widest transition-all duration-300">
|
| 26 |
+
<div class="flex items-center gap-8">
|
| 27 |
+
<div class="flex items-center gap-2">
|
| 28 |
+
<div class={`w-2 h-2 rounded-full ${statusDotClass()}`}></div>
|
| 29 |
+
<span class="text-slate-600 dark:text-slate-300">{modelStatusText()}</span>
|
| 30 |
+
</div>
|
| 31 |
+
|
| 32 |
+
<div class="flex items-center gap-2 opacity-60">
|
| 33 |
+
<span class="material-icons-round text-sm">memory</span>
|
| 34 |
+
<span>BACKEND: <span class="text-blue-500 font-black">{appStore.backend().toUpperCase()}</span></span>
|
| 35 |
+
</div>
|
| 36 |
+
</div>
|
| 37 |
+
|
| 38 |
+
<div class="flex items-center gap-8">
|
| 39 |
+
<div class="flex items-center gap-2 opacity-40 hover:opacity-100 transition-opacity cursor-default">
|
| 40 |
+
<span class="text-[8px] font-black tracking-[0.2em]">BUILD: 20250828.VAD_REFIX</span>
|
| 41 |
+
</div>
|
| 42 |
+
<Show when={appStore.isOfflineReady()}>
|
| 43 |
+
<div class="flex items-center gap-1.5 text-indigo-500 font-black">
|
| 44 |
+
<span class="material-icons-round text-sm">offline_bolt</span>
|
| 45 |
+
<span>100% On-Device</span>
|
| 46 |
+
</div>
|
| 47 |
+
</Show>
|
| 48 |
+
<div class="flex items-center gap-1.5 opacity-80">
|
| 49 |
+
<div class={`w-2 h-2 rounded-full ${appStore.isOnline() ? 'bg-green-500 shadow-[0_0_8px_rgba(34,197,94,0.4)]' : 'bg-red-500 shadow-[0_0_8px_rgba(239,68,68,0.4)]'}`}></div>
|
| 50 |
+
<span>{appStore.isOnline() ? 'SYNC: CONNECTED' : 'SYNC: OFFLINE'}</span>
|
| 51 |
+
</div>
|
| 52 |
+
</div>
|
| 53 |
+
</div>
|
| 54 |
+
);
|
| 55 |
+
};
|
src/components/TranscriptionDisplay.tsx
ADDED
|
@@ -0,0 +1,425 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import { Component, For, Show, createEffect, createMemo, createSignal, onCleanup } from 'solid-js';
|
| 2 |
+
import type { V4SentenceEntry } from '../lib/transcription/TranscriptionWorkerClient';
|
| 3 |
+
|
| 4 |
+
export interface TranscriptionDisplayProps {
|
| 5 |
+
confirmedText: string;
|
| 6 |
+
pendingText: string;
|
| 7 |
+
sentenceEntries?: V4SentenceEntry[];
|
| 8 |
+
isV4Mode?: boolean;
|
| 9 |
+
isRecording: boolean;
|
| 10 |
+
lcsLength?: number;
|
| 11 |
+
anchorValid?: boolean;
|
| 12 |
+
showConfidence?: boolean;
|
| 13 |
+
placeholder?: string;
|
| 14 |
+
class?: string;
|
| 15 |
+
}
|
| 16 |
+
|
| 17 |
+
const formatClockTime = (timestamp: number): string => {
|
| 18 |
+
if (!Number.isFinite(timestamp)) return '--:--:--';
|
| 19 |
+
return new Date(timestamp).toLocaleTimeString([], {
|
| 20 |
+
hour12: false,
|
| 21 |
+
hour: '2-digit',
|
| 22 |
+
minute: '2-digit',
|
| 23 |
+
second: '2-digit',
|
| 24 |
+
});
|
| 25 |
+
};
|
| 26 |
+
|
| 27 |
+
const formatAudioTime = (seconds: number): string => {
|
| 28 |
+
if (!Number.isFinite(seconds)) return '0:00.00';
|
| 29 |
+
const totalSeconds = Math.max(0, seconds);
|
| 30 |
+
const minutes = Math.floor(totalSeconds / 60);
|
| 31 |
+
const secondPart = (totalSeconds % 60).toFixed(2).padStart(5, '0');
|
| 32 |
+
return `${minutes}:${secondPart}`;
|
| 33 |
+
};
|
| 34 |
+
|
| 35 |
+
const formatAudioRange = (startTime: number, endTime: number): string =>
|
| 36 |
+
`${formatAudioTime(startTime)} -> ${formatAudioTime(endTime)}`;
|
| 37 |
+
|
| 38 |
+
const MERGED_SPLIT_STORAGE_KEY = 'keet-merged-split-ratio';
|
| 39 |
+
const MIN_MERGED_SPLIT_RATIO = 0.3;
|
| 40 |
+
const MAX_MERGED_SPLIT_RATIO = 0.7;
|
| 41 |
+
|
| 42 |
+
const clampMergedSplitRatio = (ratio: number): number =>
|
| 43 |
+
Math.min(MAX_MERGED_SPLIT_RATIO, Math.max(MIN_MERGED_SPLIT_RATIO, ratio));
|
| 44 |
+
|
| 45 |
+
const getInitialMergedSplitRatio = (): number => {
|
| 46 |
+
if (typeof localStorage === 'undefined') return 0.5;
|
| 47 |
+
try {
|
| 48 |
+
const raw = Number(localStorage.getItem(MERGED_SPLIT_STORAGE_KEY));
|
| 49 |
+
if (Number.isFinite(raw)) return clampMergedSplitRatio(raw);
|
| 50 |
+
} catch (_) {}
|
| 51 |
+
return 0.5;
|
| 52 |
+
};
|
| 53 |
+
|
| 54 |
+
export const TranscriptionDisplay: Component<TranscriptionDisplayProps> = (props) => {
|
| 55 |
+
let liveContainerRef: HTMLDivElement | undefined;
|
| 56 |
+
let mergedContainerRef: HTMLDivElement | undefined;
|
| 57 |
+
let mergedSplitContainerRef: HTMLDivElement | undefined;
|
| 58 |
+
let sentenceListDesktopRef: HTMLDivElement | undefined;
|
| 59 |
+
let sentenceListMobileRef: HTMLDivElement | undefined;
|
| 60 |
+
let scrollScheduled = false;
|
| 61 |
+
const [activeTab, setActiveTab] = createSignal<'live' | 'merged'>('live');
|
| 62 |
+
const [mergedSplitRatio, setMergedSplitRatio] = createSignal(getInitialMergedSplitRatio());
|
| 63 |
+
const [isSplitResizing, setIsSplitResizing] = createSignal(false);
|
| 64 |
+
let splitMouseMoveHandler: ((event: MouseEvent) => void) | null = null;
|
| 65 |
+
let splitMouseUpHandler: (() => void) | null = null;
|
| 66 |
+
|
| 67 |
+
const scrollToBottom = () => {
|
| 68 |
+
if (scrollScheduled) return;
|
| 69 |
+
scrollScheduled = true;
|
| 70 |
+
requestAnimationFrame(() => {
|
| 71 |
+
scrollScheduled = false;
|
| 72 |
+
const activeContainer = activeTab() === 'merged' ? mergedContainerRef : liveContainerRef;
|
| 73 |
+
if (activeContainer) {
|
| 74 |
+
activeContainer.scrollTop = activeContainer.scrollHeight;
|
| 75 |
+
}
|
| 76 |
+
});
|
| 77 |
+
};
|
| 78 |
+
|
| 79 |
+
const getVisibleSentenceListContainer = (): HTMLDivElement | undefined => {
|
| 80 |
+
if (sentenceListDesktopRef && sentenceListDesktopRef.offsetParent !== null) {
|
| 81 |
+
return sentenceListDesktopRef;
|
| 82 |
+
}
|
| 83 |
+
if (sentenceListMobileRef && sentenceListMobileRef.offsetParent !== null) {
|
| 84 |
+
return sentenceListMobileRef;
|
| 85 |
+
}
|
| 86 |
+
return sentenceListDesktopRef ?? sentenceListMobileRef;
|
| 87 |
+
};
|
| 88 |
+
|
| 89 |
+
const scrollSentenceListToBottom = () => {
|
| 90 |
+
requestAnimationFrame(() => {
|
| 91 |
+
const container = getVisibleSentenceListContainer();
|
| 92 |
+
if (!container) return;
|
| 93 |
+
container.scrollTop = container.scrollHeight;
|
| 94 |
+
});
|
| 95 |
+
};
|
| 96 |
+
|
| 97 |
+
const persistMergedSplitRatio = (ratio: number) => {
|
| 98 |
+
if (typeof localStorage === 'undefined') return;
|
| 99 |
+
try {
|
| 100 |
+
localStorage.setItem(MERGED_SPLIT_STORAGE_KEY, String(ratio));
|
| 101 |
+
} catch (_) {}
|
| 102 |
+
};
|
| 103 |
+
|
| 104 |
+
const startSplitResize = (event: MouseEvent) => {
|
| 105 |
+
if (!mergedSplitContainerRef) return;
|
| 106 |
+
event.preventDefault();
|
| 107 |
+
|
| 108 |
+
const rect = mergedSplitContainerRef.getBoundingClientRect();
|
| 109 |
+
if (rect.width <= 0) return;
|
| 110 |
+
|
| 111 |
+
setIsSplitResizing(true);
|
| 112 |
+
document.body.style.cursor = 'col-resize';
|
| 113 |
+
document.body.style.userSelect = 'none';
|
| 114 |
+
|
| 115 |
+
const applyRatioFromClientX = (clientX: number) => {
|
| 116 |
+
const nextRatio = clampMergedSplitRatio((clientX - rect.left) / rect.width);
|
| 117 |
+
setMergedSplitRatio(nextRatio);
|
| 118 |
+
};
|
| 119 |
+
|
| 120 |
+
const onMouseMove = (moveEvent: MouseEvent) => {
|
| 121 |
+
applyRatioFromClientX(moveEvent.clientX);
|
| 122 |
+
};
|
| 123 |
+
|
| 124 |
+
const onMouseUp = () => {
|
| 125 |
+
setIsSplitResizing(false);
|
| 126 |
+
document.body.style.cursor = '';
|
| 127 |
+
document.body.style.userSelect = '';
|
| 128 |
+
persistMergedSplitRatio(mergedSplitRatio());
|
| 129 |
+
window.removeEventListener('mousemove', onMouseMove);
|
| 130 |
+
window.removeEventListener('mouseup', onMouseUp);
|
| 131 |
+
splitMouseMoveHandler = null;
|
| 132 |
+
splitMouseUpHandler = null;
|
| 133 |
+
};
|
| 134 |
+
|
| 135 |
+
splitMouseMoveHandler = onMouseMove;
|
| 136 |
+
splitMouseUpHandler = onMouseUp;
|
| 137 |
+
window.addEventListener('mousemove', onMouseMove);
|
| 138 |
+
window.addEventListener('mouseup', onMouseUp);
|
| 139 |
+
};
|
| 140 |
+
|
| 141 |
+
createEffect(() => {
|
| 142 |
+
if (!props.isV4Mode && activeTab() !== 'live') {
|
| 143 |
+
setActiveTab('live');
|
| 144 |
+
}
|
| 145 |
+
});
|
| 146 |
+
|
| 147 |
+
const hasContent = createMemo(() =>
|
| 148 |
+
(props.confirmedText?.length ?? 0) > 0 || (props.pendingText?.length ?? 0) > 0
|
| 149 |
+
);
|
| 150 |
+
|
| 151 |
+
const finalizedEntries = createMemo(() => props.sentenceEntries ?? []);
|
| 152 |
+
const mergedCount = createMemo(() => finalizedEntries().length + (props.pendingText?.trim() ? 1 : 0));
|
| 153 |
+
const fullTextBody = createMemo(() => {
|
| 154 |
+
const finalized = finalizedEntries()
|
| 155 |
+
.map((entry) => entry.text.trim())
|
| 156 |
+
.filter((text) => text.length > 0)
|
| 157 |
+
.join(' ')
|
| 158 |
+
.trim();
|
| 159 |
+
const live = props.pendingText.trim();
|
| 160 |
+
if (finalized && live) return `${finalized} ${live}`.trim();
|
| 161 |
+
return finalized || live || '';
|
| 162 |
+
});
|
| 163 |
+
|
| 164 |
+
createEffect(() => {
|
| 165 |
+
activeTab();
|
| 166 |
+
props.confirmedText;
|
| 167 |
+
props.pendingText;
|
| 168 |
+
finalizedEntries().length;
|
| 169 |
+
scrollToBottom();
|
| 170 |
+
});
|
| 171 |
+
|
| 172 |
+
createEffect(() => {
|
| 173 |
+
if (!props.isV4Mode || activeTab() !== 'merged') return;
|
| 174 |
+
finalizedEntries().length;
|
| 175 |
+
props.pendingText;
|
| 176 |
+
scrollSentenceListToBottom();
|
| 177 |
+
});
|
| 178 |
+
|
| 179 |
+
onCleanup(() => {
|
| 180 |
+
document.body.style.cursor = '';
|
| 181 |
+
document.body.style.userSelect = '';
|
| 182 |
+
if (splitMouseMoveHandler) {
|
| 183 |
+
window.removeEventListener('mousemove', splitMouseMoveHandler);
|
| 184 |
+
}
|
| 185 |
+
if (splitMouseUpHandler) {
|
| 186 |
+
window.removeEventListener('mouseup', splitMouseUpHandler);
|
| 187 |
+
}
|
| 188 |
+
});
|
| 189 |
+
|
| 190 |
+
const renderFullTextContent = () => (
|
| 191 |
+
<Show when={fullTextBody().length > 0} fallback={
|
| 192 |
+
<p class="text-sm text-[var(--color-earthy-soft-brown)] italic opacity-70">
|
| 193 |
+
Waiting for transcript text...
|
| 194 |
+
</p>
|
| 195 |
+
}>
|
| 196 |
+
<p class="text-sm md:text-base lg:text-[1.05rem] text-[var(--color-earthy-dark-brown)] leading-7">
|
| 197 |
+
{fullTextBody()}
|
| 198 |
+
</p>
|
| 199 |
+
</Show>
|
| 200 |
+
);
|
| 201 |
+
|
| 202 |
+
const renderSentenceListContent = () => (
|
| 203 |
+
<Show when={finalizedEntries().length > 0 || !!props.pendingText.trim()} fallback={
|
| 204 |
+
<div class="flex flex-col items-center justify-center h-full opacity-50 py-6">
|
| 205 |
+
<span class="material-symbols-outlined text-3xl mb-2 text-[var(--color-earthy-soft-brown)]">view_list</span>
|
| 206 |
+
<p class="text-sm text-[var(--color-earthy-soft-brown)] italic">
|
| 207 |
+
No merged conversation entries yet...
|
| 208 |
+
</p>
|
| 209 |
+
</div>
|
| 210 |
+
}>
|
| 211 |
+
<div class="space-y-2">
|
| 212 |
+
<For each={finalizedEntries()}>
|
| 213 |
+
{(entry) => (
|
| 214 |
+
<div class="grid grid-cols-1 sm:grid-cols-[86px_138px_1fr] xl:grid-cols-[94px_150px_1fr] gap-1.5 sm:gap-3 items-baseline px-2 py-2 rounded-lg hover:bg-[var(--color-earthy-sage)]/10 transition-colors">
|
| 215 |
+
<span class="font-mono text-xs text-[var(--color-earthy-soft-brown)]">
|
| 216 |
+
{formatClockTime(entry.emittedAt)}
|
| 217 |
+
</span>
|
| 218 |
+
<span class="font-mono text-xs text-[var(--color-earthy-soft-brown)]">
|
| 219 |
+
[{formatAudioRange(entry.startTime, entry.endTime)}]
|
| 220 |
+
</span>
|
| 221 |
+
<span class="text-sm md:text-base text-[var(--color-earthy-dark-brown)]">
|
| 222 |
+
{entry.text}
|
| 223 |
+
</span>
|
| 224 |
+
</div>
|
| 225 |
+
)}
|
| 226 |
+
</For>
|
| 227 |
+
|
| 228 |
+
<Show when={props.pendingText.trim()}>
|
| 229 |
+
<div class="grid grid-cols-1 sm:grid-cols-[86px_138px_1fr] xl:grid-cols-[94px_150px_1fr] gap-1.5 sm:gap-3 items-baseline px-2 py-2 rounded-lg bg-[var(--color-earthy-muted-green)]/10 border border-[var(--color-earthy-sage)]/40">
|
| 230 |
+
<span class="font-mono text-xs text-[var(--color-earthy-soft-brown)]">
|
| 231 |
+
{formatClockTime(Date.now())}
|
| 232 |
+
</span>
|
| 233 |
+
<span class="text-[10px] font-semibold uppercase tracking-wide text-[var(--color-earthy-coral)]">
|
| 234 |
+
LIVE
|
| 235 |
+
</span>
|
| 236 |
+
<span class="text-sm md:text-base text-[var(--color-earthy-coral)] italic">
|
| 237 |
+
{props.pendingText}
|
| 238 |
+
</span>
|
| 239 |
+
</div>
|
| 240 |
+
</Show>
|
| 241 |
+
</div>
|
| 242 |
+
</Show>
|
| 243 |
+
);
|
| 244 |
+
|
| 245 |
+
return (
|
| 246 |
+
<div class={`flex flex-col h-full bg-transparent ${props.class ?? ''}`}>
|
| 247 |
+
<Show when={props.isV4Mode}>
|
| 248 |
+
<div class="mb-4 flex items-center gap-2">
|
| 249 |
+
<button
|
| 250 |
+
type="button"
|
| 251 |
+
class={`px-3 py-1.5 rounded-lg text-xs font-semibold uppercase tracking-wide border transition-colors ${
|
| 252 |
+
activeTab() === 'live'
|
| 253 |
+
? 'bg-[var(--color-earthy-muted-green)] text-white border-[var(--color-earthy-muted-green)]'
|
| 254 |
+
: 'bg-[var(--color-earthy-bg)] text-[var(--color-earthy-soft-brown)] border-[var(--color-earthy-sage)]/50 hover:border-[var(--color-earthy-soft-brown)]'
|
| 255 |
+
}`}
|
| 256 |
+
onClick={() => setActiveTab('live')}
|
| 257 |
+
>
|
| 258 |
+
Live
|
| 259 |
+
</button>
|
| 260 |
+
<button
|
| 261 |
+
type="button"
|
| 262 |
+
class={`px-3 py-1.5 rounded-lg text-xs font-semibold uppercase tracking-wide border transition-colors flex items-center gap-2 ${
|
| 263 |
+
activeTab() === 'merged'
|
| 264 |
+
? 'bg-[var(--color-earthy-muted-green)] text-white border-[var(--color-earthy-muted-green)]'
|
| 265 |
+
: 'bg-[var(--color-earthy-bg)] text-[var(--color-earthy-soft-brown)] border-[var(--color-earthy-sage)]/50 hover:border-[var(--color-earthy-soft-brown)]'
|
| 266 |
+
}`}
|
| 267 |
+
onClick={() => setActiveTab('merged')}
|
| 268 |
+
>
|
| 269 |
+
<span>Merged</span>
|
| 270 |
+
<span class={`px-1.5 py-0.5 rounded text-[10px] leading-none ${
|
| 271 |
+
activeTab() === 'merged'
|
| 272 |
+
? 'bg-white/20'
|
| 273 |
+
: 'bg-[var(--color-earthy-sage)]/30'
|
| 274 |
+
}`}>
|
| 275 |
+
{mergedCount()}
|
| 276 |
+
</span>
|
| 277 |
+
</button>
|
| 278 |
+
</div>
|
| 279 |
+
</Show>
|
| 280 |
+
|
| 281 |
+
<Show when={props.isV4Mode && activeTab() === 'merged'} fallback={
|
| 282 |
+
<div
|
| 283 |
+
ref={liveContainerRef}
|
| 284 |
+
class="flex-1 overflow-y-auto scroll-smooth"
|
| 285 |
+
>
|
| 286 |
+
<Show
|
| 287 |
+
when={hasContent()}
|
| 288 |
+
fallback={
|
| 289 |
+
<div class="flex flex-col items-center justify-center h-full opacity-50 story-font">
|
| 290 |
+
<span class="material-symbols-outlined text-5xl mb-4 text-[var(--color-earthy-soft-brown)]">graphic_eq</span>
|
| 291 |
+
<p class="text-2xl md:text-3xl leading-[1.6] text-[var(--color-earthy-muted-green)] italic">
|
| 292 |
+
{props.placeholder ?? 'Ready to transcribe...'}
|
| 293 |
+
</p>
|
| 294 |
+
</div>
|
| 295 |
+
}
|
| 296 |
+
>
|
| 297 |
+
<div class="story-font space-y-12 py-4">
|
| 298 |
+
<div class="group">
|
| 299 |
+
<div class="pl-4 border-l-2 border-[var(--color-earthy-coral)]/30 group-hover:border-[var(--color-earthy-coral)]/50 transition-colors duration-300">
|
| 300 |
+
{/* Confirmed text */}
|
| 301 |
+
<p class="text-2xl md:text-3xl leading-[1.6] text-[var(--color-earthy-dark-brown)] font-normal inline">
|
| 302 |
+
{props.confirmedText}
|
| 303 |
+
</p>
|
| 304 |
+
|
| 305 |
+
{/* Pending text */}
|
| 306 |
+
<Show when={props.pendingText}>
|
| 307 |
+
<span class="text-2xl md:text-3xl leading-[1.6] text-[var(--color-earthy-coral)] font-medium italic ml-1 transition-all duration-300">
|
| 308 |
+
{props.pendingText}
|
| 309 |
+
<span class="inline-block w-[3px] h-8 bg-[var(--color-earthy-coral)] align-middle ml-1 opacity-60 animate-pulse" />
|
| 310 |
+
</span>
|
| 311 |
+
</Show>
|
| 312 |
+
</div>
|
| 313 |
+
</div>
|
| 314 |
+
|
| 315 |
+
{/* Listening indicator when idle but recording */}
|
| 316 |
+
<Show when={props.isRecording && !props.pendingText && !props.confirmedText}>
|
| 317 |
+
<div class="flex items-center gap-3">
|
| 318 |
+
<div class="w-2 h-2 rounded-full bg-[var(--color-earthy-coral)] animate-pulse" />
|
| 319 |
+
<span class="text-[10px] font-semibold uppercase tracking-widest text-[var(--color-earthy-soft-brown)]">Listening...</span>
|
| 320 |
+
<div class="flex gap-1">
|
| 321 |
+
<div class="w-1.5 h-1.5 bg-[var(--color-earthy-muted-green)] rounded-full animate-bounce opacity-60" />
|
| 322 |
+
<div class="w-1.5 h-1.5 bg-[var(--color-earthy-muted-green)] rounded-full animate-bounce opacity-80 [animation-delay:0.2s]" />
|
| 323 |
+
<div class="w-1.5 h-1.5 bg-[var(--color-earthy-muted-green)] rounded-full animate-bounce [animation-delay:0.4s]" />
|
| 324 |
+
</div>
|
| 325 |
+
</div>
|
| 326 |
+
</Show>
|
| 327 |
+
</div>
|
| 328 |
+
</Show>
|
| 329 |
+
</div>
|
| 330 |
+
}>
|
| 331 |
+
<div
|
| 332 |
+
ref={mergedContainerRef}
|
| 333 |
+
class="flex-1 overflow-y-auto scroll-smooth"
|
| 334 |
+
>
|
| 335 |
+
<div class="story-font py-2 space-y-4">
|
| 336 |
+
{/* Mobile / tablet stacked layout */}
|
| 337 |
+
<div class="flex flex-col gap-4 lg:hidden">
|
| 338 |
+
<div class="px-3 py-3 rounded-lg border border-[var(--color-earthy-sage)]/40 bg-[var(--color-earthy-sage)]/10 flex flex-col">
|
| 339 |
+
<div class="text-[10px] font-semibold uppercase tracking-widest text-[var(--color-earthy-soft-brown)] mb-2 shrink-0">
|
| 340 |
+
Full Text Body
|
| 341 |
+
</div>
|
| 342 |
+
<div class="max-h-[38vh] overflow-y-auto custom-scrollbar pr-1">
|
| 343 |
+
{renderFullTextContent()}
|
| 344 |
+
</div>
|
| 345 |
+
</div>
|
| 346 |
+
|
| 347 |
+
<div class="px-3 py-3 rounded-lg border border-[var(--color-earthy-sage)]/40 bg-[var(--color-earthy-bg)]/60 flex flex-col">
|
| 348 |
+
<div class="text-[10px] font-semibold uppercase tracking-widest text-[var(--color-earthy-soft-brown)] mb-2 shrink-0">
|
| 349 |
+
Sentence List
|
| 350 |
+
</div>
|
| 351 |
+
<div ref={sentenceListMobileRef} class="max-h-[46vh] overflow-y-auto custom-scrollbar pr-1">
|
| 352 |
+
{renderSentenceListContent()}
|
| 353 |
+
</div>
|
| 354 |
+
</div>
|
| 355 |
+
</div>
|
| 356 |
+
|
| 357 |
+
{/* Desktop adjustable split layout (defaults to 50/50) */}
|
| 358 |
+
<div
|
| 359 |
+
ref={mergedSplitContainerRef}
|
| 360 |
+
class="hidden lg:flex items-stretch h-[min(70vh,760px)]"
|
| 361 |
+
>
|
| 362 |
+
<div
|
| 363 |
+
class="min-w-0 px-3 py-3 rounded-l-lg border border-[var(--color-earthy-sage)]/40 bg-[var(--color-earthy-sage)]/10 flex flex-col"
|
| 364 |
+
style={{ width: `calc(${(mergedSplitRatio() * 100).toFixed(3)}% - 6px)` }}
|
| 365 |
+
>
|
| 366 |
+
<div class="text-[10px] font-semibold uppercase tracking-widest text-[var(--color-earthy-soft-brown)] mb-2 shrink-0">
|
| 367 |
+
Full Text Body
|
| 368 |
+
</div>
|
| 369 |
+
<div class="flex-1 overflow-y-auto custom-scrollbar pr-1">
|
| 370 |
+
{renderFullTextContent()}
|
| 371 |
+
</div>
|
| 372 |
+
</div>
|
| 373 |
+
|
| 374 |
+
<div
|
| 375 |
+
class="w-3 shrink-0 relative cursor-col-resize group touch-none"
|
| 376 |
+
onMouseDown={startSplitResize}
|
| 377 |
+
role="separator"
|
| 378 |
+
aria-orientation="vertical"
|
| 379 |
+
aria-label="Adjust merged split"
|
| 380 |
+
>
|
| 381 |
+
<div class={`absolute inset-y-0 left-1/2 -translate-x-1/2 w-px transition-colors ${
|
| 382 |
+
isSplitResizing()
|
| 383 |
+
? 'bg-[var(--color-earthy-muted-green)]'
|
| 384 |
+
: 'bg-[var(--color-earthy-sage)]/70 group-hover:bg-[var(--color-earthy-soft-brown)]/80'
|
| 385 |
+
}`} />
|
| 386 |
+
<div class={`absolute top-1/2 left-1/2 -translate-x-1/2 -translate-y-1/2 w-2.5 h-10 rounded-full border transition-colors ${
|
| 387 |
+
isSplitResizing()
|
| 388 |
+
? 'bg-[var(--color-earthy-sage)] border-[var(--color-earthy-muted-green)]/80'
|
| 389 |
+
: 'bg-[var(--color-earthy-bg)] border-[var(--color-earthy-sage)]/60 group-hover:border-[var(--color-earthy-soft-brown)]/80'
|
| 390 |
+
}`} />
|
| 391 |
+
</div>
|
| 392 |
+
|
| 393 |
+
<div
|
| 394 |
+
class="min-w-0 px-3 py-3 rounded-r-lg border border-[var(--color-earthy-sage)]/40 bg-[var(--color-earthy-bg)]/60 flex flex-col"
|
| 395 |
+
style={{ width: `calc(${((1 - mergedSplitRatio()) * 100).toFixed(3)}% - 6px)` }}
|
| 396 |
+
>
|
| 397 |
+
<div class="text-[10px] font-semibold uppercase tracking-widest text-[var(--color-earthy-soft-brown)] mb-2 shrink-0">
|
| 398 |
+
Sentence List
|
| 399 |
+
</div>
|
| 400 |
+
<div ref={sentenceListDesktopRef} class="flex-1 overflow-y-auto custom-scrollbar pr-1">
|
| 401 |
+
{renderSentenceListContent()}
|
| 402 |
+
</div>
|
| 403 |
+
</div>
|
| 404 |
+
</div>
|
| 405 |
+
</div>
|
| 406 |
+
</div>
|
| 407 |
+
</Show>
|
| 408 |
+
|
| 409 |
+
{/* Merge Stats / Legend (Floating style inside container) */}
|
| 410 |
+
<Show when={props.showConfidence && props.isRecording && (props.lcsLength !== undefined)}>
|
| 411 |
+
<div class="mt-4 flex items-center gap-4 text-[10px] font-bold text-[var(--color-earthy-soft-brown)] uppercase tracking-widest bg-[var(--color-earthy-bg)]/80 backdrop-blur-sm self-start px-4 py-2 rounded-full border border-[var(--color-earthy-sage)]/50">
|
| 412 |
+
<div class="flex items-center gap-1.5">
|
| 413 |
+
<span class={`w-2 h-2 rounded-full ${props.anchorValid ? 'bg-[var(--color-earthy-muted-green)]' : 'bg-[var(--color-earthy-coral)]'}`} />
|
| 414 |
+
<span>LCS: {props.lcsLength}</span>
|
| 415 |
+
</div>
|
| 416 |
+
<div class="w-px h-3 bg-[var(--color-earthy-sage)]" />
|
| 417 |
+
<span class="opacity-60">PTFA Merged</span>
|
| 418 |
+
</div>
|
| 419 |
+
</Show>
|
| 420 |
+
</div>
|
| 421 |
+
);
|
| 422 |
+
};
|
| 423 |
+
|
| 424 |
+
export default TranscriptionDisplay;
|
| 425 |
+
|
src/components/Waveform.tsx
ADDED
|
@@ -0,0 +1,95 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import { Component, onCleanup, onMount } from 'solid-js';
|
| 2 |
+
|
| 3 |
+
interface WaveformProps {
|
| 4 |
+
audioLevel: number;
|
| 5 |
+
/** Oscilloscope samples: Float32Array -1..1 from getByteTimeDomainData */
|
| 6 |
+
barLevels?: Float32Array;
|
| 7 |
+
isRecording: boolean;
|
| 8 |
+
barCount?: number;
|
| 9 |
+
}
|
| 10 |
+
|
| 11 |
+
/**
|
| 12 |
+
* Oscilloscope-style waveform using AnalyserNode.getByteTimeDomainData (native, fast).
|
| 13 |
+
*/
|
| 14 |
+
export const Waveform: Component<WaveformProps> = (props) => {
|
| 15 |
+
let canvasRef: HTMLCanvasElement | undefined;
|
| 16 |
+
let ctx: CanvasRenderingContext2D | null = null;
|
| 17 |
+
let animationId: number | undefined;
|
| 18 |
+
let resizeObserver: ResizeObserver | null = null;
|
| 19 |
+
|
| 20 |
+
const updateCanvasSize = () => {
|
| 21 |
+
if (!canvasRef?.parentElement) return;
|
| 22 |
+
const rect = canvasRef.parentElement.getBoundingClientRect();
|
| 23 |
+
const dpr = window.devicePixelRatio || 1;
|
| 24 |
+
const w = Math.floor(rect.width * dpr);
|
| 25 |
+
const h = Math.floor(rect.height * dpr);
|
| 26 |
+
if (canvasRef.width !== w || canvasRef.height !== h) {
|
| 27 |
+
canvasRef.width = w;
|
| 28 |
+
canvasRef.height = h;
|
| 29 |
+
}
|
| 30 |
+
};
|
| 31 |
+
|
| 32 |
+
const animate = () => {
|
| 33 |
+
animationId = requestAnimationFrame(animate);
|
| 34 |
+
if (!ctx || !canvasRef) return;
|
| 35 |
+
|
| 36 |
+
const w = canvasRef.width;
|
| 37 |
+
const h = canvasRef.height;
|
| 38 |
+
if (w === 0 || h === 0) return;
|
| 39 |
+
|
| 40 |
+
const samples = props.barLevels;
|
| 41 |
+
const n = samples && samples.length > 0 ? samples.length : 0;
|
| 42 |
+
|
| 43 |
+
const bg = getComputedStyle(canvasRef).getPropertyValue('--color-earthy-bg').trim() || '#faf8f5';
|
| 44 |
+
const color = getComputedStyle(canvasRef).getPropertyValue('--color-primary').trim() || '#14b8a6';
|
| 45 |
+
|
| 46 |
+
ctx.fillStyle = bg;
|
| 47 |
+
ctx.fillRect(0, 0, w, h);
|
| 48 |
+
|
| 49 |
+
if (props.isRecording && samples && n > 0) {
|
| 50 |
+
const centerY = h / 2;
|
| 51 |
+
const amp = (h / 2) * 0.9;
|
| 52 |
+
|
| 53 |
+
ctx.strokeStyle = color;
|
| 54 |
+
ctx.lineWidth = 2;
|
| 55 |
+
ctx.beginPath();
|
| 56 |
+
ctx.moveTo(0, centerY - Math.max(-1, Math.min(1, samples[0])) * amp);
|
| 57 |
+
for (let i = 1; i < n; i++) {
|
| 58 |
+
const x = (i / (n - 1)) * w;
|
| 59 |
+
const y = centerY - Math.max(-1, Math.min(1, samples[i])) * amp;
|
| 60 |
+
ctx.lineTo(x, y);
|
| 61 |
+
}
|
| 62 |
+
ctx.stroke();
|
| 63 |
+
}
|
| 64 |
+
};
|
| 65 |
+
|
| 66 |
+
onMount(() => {
|
| 67 |
+
if (canvasRef) {
|
| 68 |
+
updateCanvasSize();
|
| 69 |
+
ctx = canvasRef.getContext('2d', { alpha: false });
|
| 70 |
+
if (resizeObserver = typeof ResizeObserver !== 'undefined' ? new ResizeObserver(updateCanvasSize) : null) {
|
| 71 |
+
resizeObserver.observe(canvasRef.parentElement ?? canvasRef);
|
| 72 |
+
}
|
| 73 |
+
}
|
| 74 |
+
animationId = requestAnimationFrame(animate);
|
| 75 |
+
});
|
| 76 |
+
|
| 77 |
+
onCleanup(() => {
|
| 78 |
+
cancelAnimationFrame(animationId!);
|
| 79 |
+
resizeObserver?.disconnect();
|
| 80 |
+
});
|
| 81 |
+
|
| 82 |
+
return (
|
| 83 |
+
<div class="h-12 w-full overflow-hidden rounded-md bg-[var(--color-earthy-bg)]">
|
| 84 |
+
<canvas ref={canvasRef} class="w-full h-full block" />
|
| 85 |
+
</div>
|
| 86 |
+
);
|
| 87 |
+
};
|
| 88 |
+
|
| 89 |
+
export const SPECTRUM_BAR_COUNT = 128;
|
| 90 |
+
|
| 91 |
+
export const CompactWaveform: Component<WaveformProps> = (props) => (
|
| 92 |
+
<Waveform {...props} barCount={props.barLevels?.length} />
|
| 93 |
+
);
|
| 94 |
+
|
| 95 |
+
export default Waveform;
|
src/components/index.ts
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
export * from './LayeredBufferVisualizer';
|
| 2 |
+
export * from './Sidebar';
|
| 3 |
+
export * from './StatusBar';
|
| 4 |
+
export * from './TranscriptionDisplay';
|
| 5 |
+
export * from './Waveform';
|
| 6 |
+
export * from './ModelLoadingOverlay';
|
| 7 |
+
export * from './DebugPanel';
|
| 8 |
+
export * from './BufferVisualizer';
|
| 9 |
+
export * from './EnergyMeter';
|
| 10 |
+
export * from './PrivacyBadge';
|
| 11 |
+
export * from './ContextPanel';
|
| 12 |
+
export * from './SettingsPanel';
|
src/csp.test.ts
ADDED
|
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import { describe, it, expect } from 'vitest';
|
| 2 |
+
import { Window } from 'happy-dom';
|
| 3 |
+
import fs from 'fs';
|
| 4 |
+
import path from 'path';
|
| 5 |
+
import { fileURLToPath } from 'url';
|
| 6 |
+
|
| 7 |
+
const __filename = fileURLToPath(import.meta.url);
|
| 8 |
+
const __dirname = path.dirname(__filename);
|
| 9 |
+
|
| 10 |
+
describe('Content Security Policy', () => {
|
| 11 |
+
it('should be present in index.html', () => {
|
| 12 |
+
const htmlPath = path.resolve(__dirname, '../index.html');
|
| 13 |
+
const html = fs.readFileSync(htmlPath, 'utf-8');
|
| 14 |
+
const window = new Window();
|
| 15 |
+
const document = window.document;
|
| 16 |
+
document.write(html);
|
| 17 |
+
|
| 18 |
+
const meta = document.querySelector('meta[http-equiv="Content-Security-Policy"]');
|
| 19 |
+
expect(meta).not.toBeNull();
|
| 20 |
+
|
| 21 |
+
const content = meta?.getAttribute('content') || '';
|
| 22 |
+
|
| 23 |
+
// Check for critical directives
|
| 24 |
+
expect(content).toContain("default-src 'self'");
|
| 25 |
+
expect(content).toContain("script-src 'self'");
|
| 26 |
+
expect(content).toContain("object-src 'none'");
|
| 27 |
+
|
| 28 |
+
// Check specific allowances
|
| 29 |
+
expect(content).toContain('https://huggingface.co');
|
| 30 |
+
expect(content).toContain('https://*.hf.co');
|
| 31 |
+
expect(content).toContain('blob:'); // For workers
|
| 32 |
+
expect(content).toContain("worker-src 'self' blob:");
|
| 33 |
+
});
|
| 34 |
+
});
|
src/index.css
CHANGED
|
@@ -1,13 +1,255 @@
|
|
| 1 |
-
|
| 2 |
-
|
| 3 |
-
|
| 4 |
-
|
| 5 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6 |
-webkit-font-smoothing: antialiased;
|
| 7 |
-moz-osx-font-smoothing: grayscale;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 8 |
}
|
| 9 |
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 13 |
}
|
|
|
|
| 1 |
+
@import url('https://fonts.googleapis.com/css2?family=Crimson+Pro:ital,wght@0,400;0,500;0,600;1,400&family=Plus+Jakarta+Sans:wght@300;400;500;600&family=JetBrains+Mono:wght@400;500&display=swap');
|
| 2 |
+
@import "tailwindcss";
|
| 3 |
+
|
| 4 |
+
@theme {
|
| 5 |
+
/* Earthy palette from reference */
|
| 6 |
+
--color-earthy-bg: #F9F7F2;
|
| 7 |
+
--color-earthy-muted-green: #6B705C;
|
| 8 |
+
--color-earthy-soft-brown: #A5A58D;
|
| 9 |
+
--color-earthy-dark-brown: #3D405B;
|
| 10 |
+
--color-earthy-coral: #E07A5F;
|
| 11 |
+
--color-earthy-sage: #B7B7A4;
|
| 12 |
+
|
| 13 |
+
/* Semantic aliases (accent = coral for focus/selection) */
|
| 14 |
+
--color-primary: #6B705C;
|
| 15 |
+
--color-accent: #E07A5F;
|
| 16 |
+
--color-neu-bg: #F9F7F2;
|
| 17 |
+
--color-workspace-bg: #F9F7F2;
|
| 18 |
+
--color-sidebar-border: #B7B7A4;
|
| 19 |
+
|
| 20 |
+
--font-display: "Plus Jakarta Sans", sans-serif;
|
| 21 |
+
--font-sans: "Plus Jakarta Sans", sans-serif;
|
| 22 |
+
--font-story: "Crimson Pro", serif;
|
| 23 |
+
|
| 24 |
+
/* Softer shadows for earthy theme */
|
| 25 |
+
--shadow-neu-flat: 4px 4px 10px rgba(183, 183, 164, 0.25), -4px -4px 10px rgba(255, 255, 255, 0.8);
|
| 26 |
+
--shadow-neu-pressed: inset 3px 3px 6px rgba(183, 183, 164, 0.2), inset -3px -3px 6px rgba(255, 255, 255, 0.6);
|
| 27 |
+
--shadow-neu-btn: 3px 3px 8px rgba(183, 183, 164, 0.2), -3px -3px 8px rgba(255, 255, 255, 0.7);
|
| 28 |
+
--shadow-neu-btn-hover: 2px 2px 5px rgba(183, 183, 164, 0.2), -2px -2px 5px rgba(255, 255, 255, 0.6);
|
| 29 |
+
}
|
| 30 |
+
|
| 31 |
+
/* Material Symbols - ensure they load and look sharp. */
|
| 32 |
+
.material-symbols-outlined {
|
| 33 |
+
font-family: 'Material Symbols Outlined' !important;
|
| 34 |
+
font-weight: normal;
|
| 35 |
+
font-style: normal;
|
| 36 |
+
font-size: 24px;
|
| 37 |
+
line-height: 1;
|
| 38 |
+
letter-spacing: normal;
|
| 39 |
+
text-transform: none;
|
| 40 |
+
display: inline-block;
|
| 41 |
+
white-space: nowrap;
|
| 42 |
+
word-wrap: normal;
|
| 43 |
+
direction: ltr;
|
| 44 |
-webkit-font-smoothing: antialiased;
|
| 45 |
-moz-osx-font-smoothing: grayscale;
|
| 46 |
+
text-rendering: optimizeLegibility;
|
| 47 |
+
font-feature-settings: 'liga';
|
| 48 |
+
font-variation-settings: 'FILL' 0, 'wght' 400, 'GRAD' 0, 'opsz' 24;
|
| 49 |
+
}
|
| 50 |
+
|
| 51 |
+
/* Story/content font class */
|
| 52 |
+
.story-font {
|
| 53 |
+
font-family: var(--font-story);
|
| 54 |
+
}
|
| 55 |
+
|
| 56 |
+
/* Custom Utilities (earthy) */
|
| 57 |
+
@utility neu-icon-btn {
|
| 58 |
+
background-color: var(--color-neu-bg);
|
| 59 |
+
box-shadow: var(--shadow-neu-btn);
|
| 60 |
+
transition: all 0.2s;
|
| 61 |
+
border: 1px solid var(--color-earthy-sage);
|
| 62 |
+
|
| 63 |
+
&:hover {
|
| 64 |
+
box-shadow: var(--shadow-neu-btn-hover);
|
| 65 |
+
transform: translateY(0.5px);
|
| 66 |
+
}
|
| 67 |
+
|
| 68 |
+
&:active,
|
| 69 |
+
&.active {
|
| 70 |
+
box-shadow: var(--shadow-neu-pressed);
|
| 71 |
+
color: var(--color-earthy-coral);
|
| 72 |
+
}
|
| 73 |
+
}
|
| 74 |
+
|
| 75 |
+
@utility neu-circle-btn {
|
| 76 |
+
@apply w-12 h-12 rounded-full flex items-center justify-center neu-icon-btn;
|
| 77 |
+
}
|
| 78 |
+
|
| 79 |
+
@utility neu-square-btn {
|
| 80 |
+
@apply w-12 h-12 rounded-xl flex items-center justify-center neu-icon-btn;
|
| 81 |
+
}
|
| 82 |
+
|
| 83 |
+
@utility nm-flat {
|
| 84 |
+
background-color: var(--color-neu-bg);
|
| 85 |
+
box-shadow: var(--shadow-neu-flat);
|
| 86 |
+
}
|
| 87 |
+
|
| 88 |
+
@utility nm-inset {
|
| 89 |
+
background-color: var(--color-neu-bg);
|
| 90 |
+
box-shadow: var(--shadow-neu-pressed);
|
| 91 |
+
}
|
| 92 |
+
|
| 93 |
+
@utility status-led {
|
| 94 |
+
@apply absolute bottom-1 right-1 w-2.5 h-2.5 rounded-full border-2;
|
| 95 |
+
border-color: var(--color-earthy-bg);
|
| 96 |
+
}
|
| 97 |
+
|
| 98 |
+
/* Load model button: spinning indicator while model is loading */
|
| 99 |
+
@keyframes load-btn-spin {
|
| 100 |
+
from { transform: rotate(0deg); }
|
| 101 |
+
to { transform: rotate(360deg); }
|
| 102 |
+
}
|
| 103 |
+
.load-btn-spin {
|
| 104 |
+
animation: load-btn-spin 1s linear infinite;
|
| 105 |
+
}
|
| 106 |
+
|
| 107 |
+
/* Custom scrollbar (earthy-sage) */
|
| 108 |
+
::-webkit-scrollbar {
|
| 109 |
+
width: 4px;
|
| 110 |
+
height: 4px;
|
| 111 |
+
}
|
| 112 |
+
|
| 113 |
+
::-webkit-scrollbar-track {
|
| 114 |
+
background: transparent;
|
| 115 |
+
}
|
| 116 |
+
|
| 117 |
+
::-webkit-scrollbar-thumb {
|
| 118 |
+
background: var(--color-earthy-sage);
|
| 119 |
+
border-radius: 10px;
|
| 120 |
+
}
|
| 121 |
+
|
| 122 |
+
::-webkit-scrollbar-thumb:hover {
|
| 123 |
+
background: var(--color-earthy-soft-brown);
|
| 124 |
+
}
|
| 125 |
+
|
| 126 |
+
.custom-scrollbar::-webkit-scrollbar {
|
| 127 |
+
width: 4px;
|
| 128 |
+
}
|
| 129 |
+
|
| 130 |
+
.custom-scrollbar::-webkit-scrollbar-track {
|
| 131 |
+
background: transparent;
|
| 132 |
}
|
| 133 |
|
| 134 |
+
.custom-scrollbar::-webkit-scrollbar-thumb {
|
| 135 |
+
background: var(--color-earthy-sage);
|
| 136 |
+
border-radius: 10px;
|
| 137 |
+
}
|
| 138 |
+
|
| 139 |
+
.abstract-wave {
|
| 140 |
+
mask-image: linear-gradient(to right, transparent, black 15%, black 85%, transparent);
|
| 141 |
+
}
|
| 142 |
+
|
| 143 |
+
/* Global resets for the theme */
|
| 144 |
+
body {
|
| 145 |
+
background-color: var(--color-earthy-bg);
|
| 146 |
+
font-family: var(--font-sans);
|
| 147 |
+
color: var(--color-earthy-dark-brown);
|
| 148 |
+
-webkit-font-smoothing: antialiased;
|
| 149 |
+
margin: 0;
|
| 150 |
+
padding: 0;
|
| 151 |
+
}
|
| 152 |
+
|
| 153 |
+
code,
|
| 154 |
+
pre,
|
| 155 |
+
.font-mono {
|
| 156 |
+
font-family: 'JetBrains Mono', monospace;
|
| 157 |
+
}
|
| 158 |
+
|
| 159 |
+
/* Waveform animation */
|
| 160 |
+
@keyframes wave {
|
| 161 |
+
|
| 162 |
+
0%,
|
| 163 |
+
100% {
|
| 164 |
+
height: 10%;
|
| 165 |
+
opacity: 0.5;
|
| 166 |
+
}
|
| 167 |
+
|
| 168 |
+
50% {
|
| 169 |
+
height: 100%;
|
| 170 |
+
opacity: 1;
|
| 171 |
+
}
|
| 172 |
+
}
|
| 173 |
+
|
| 174 |
+
.waveform-bar-anim {
|
| 175 |
+
animation: wave 1.2s ease-in-out infinite;
|
| 176 |
+
}
|
| 177 |
+
|
| 178 |
+
::selection {
|
| 179 |
+
background-color: var(--color-earthy-coral);
|
| 180 |
+
color: white;
|
| 181 |
+
}
|
| 182 |
+
|
| 183 |
+
.transition-glass {
|
| 184 |
+
transition: all 0.3s cubic-bezier(0.4, 0, 0.2, 1);
|
| 185 |
+
}
|
| 186 |
+
|
| 187 |
+
@keyframes pulse-gentle {
|
| 188 |
+
|
| 189 |
+
0%,
|
| 190 |
+
100% {
|
| 191 |
+
opacity: 1;
|
| 192 |
+
}
|
| 193 |
+
|
| 194 |
+
50% {
|
| 195 |
+
opacity: 0.5;
|
| 196 |
+
}
|
| 197 |
+
}
|
| 198 |
+
|
| 199 |
+
.animate-pulse-gentle {
|
| 200 |
+
animation: pulse-gentle 2s ease-in-out infinite;
|
| 201 |
+
}
|
| 202 |
+
|
| 203 |
+
/* Debug panel adjustable settings: earthy track + dark handle */
|
| 204 |
+
.debug-slider {
|
| 205 |
+
-webkit-appearance: none;
|
| 206 |
+
appearance: none;
|
| 207 |
+
background: transparent;
|
| 208 |
+
}
|
| 209 |
+
|
| 210 |
+
.debug-slider::-webkit-slider-runnable-track {
|
| 211 |
+
height: 8px;
|
| 212 |
+
border-radius: 999px;
|
| 213 |
+
background: var(--color-earthy-sage);
|
| 214 |
+
opacity: 0.4;
|
| 215 |
+
}
|
| 216 |
+
|
| 217 |
+
.debug-slider::-webkit-slider-thumb {
|
| 218 |
+
-webkit-appearance: none;
|
| 219 |
+
appearance: none;
|
| 220 |
+
width: 18px;
|
| 221 |
+
height: 18px;
|
| 222 |
+
border-radius: 50%;
|
| 223 |
+
background: var(--color-earthy-muted-green);
|
| 224 |
+
border: 1px solid var(--color-earthy-dark-brown);
|
| 225 |
+
box-shadow: 0 1px 3px rgba(0, 0, 0, 0.15);
|
| 226 |
+
margin-top: -5px;
|
| 227 |
+
cursor: pointer;
|
| 228 |
+
}
|
| 229 |
+
|
| 230 |
+
.debug-slider::-moz-range-track {
|
| 231 |
+
height: 8px;
|
| 232 |
+
border-radius: 999px;
|
| 233 |
+
background: var(--color-earthy-sage);
|
| 234 |
+
opacity: 0.4;
|
| 235 |
+
}
|
| 236 |
+
|
| 237 |
+
.debug-slider::-moz-range-thumb {
|
| 238 |
+
width: 18px;
|
| 239 |
+
height: 18px;
|
| 240 |
+
border-radius: 50%;
|
| 241 |
+
background: var(--color-earthy-muted-green);
|
| 242 |
+
border: 1px solid var(--color-earthy-dark-brown);
|
| 243 |
+
box-shadow: 0 1px 3px rgba(0, 0, 0, 0.15);
|
| 244 |
+
cursor: pointer;
|
| 245 |
+
}
|
| 246 |
+
|
| 247 |
+
/* Settings panel: slide in from the right, no full-screen block */
|
| 248 |
+
.settings-panel-slide {
|
| 249 |
+
transform: translateX(100%);
|
| 250 |
+
transition: transform 0.3s ease-out;
|
| 251 |
+
box-shadow: -4px 0 24px rgba(0, 0, 0, 0.08);
|
| 252 |
+
}
|
| 253 |
+
.settings-panel-slide--open {
|
| 254 |
+
transform: translateX(0);
|
| 255 |
}
|
src/index.js
DELETED
|
@@ -1,17 +0,0 @@
|
|
| 1 |
-
import React from 'react';
|
| 2 |
-
import ReactDOM from 'react-dom/client';
|
| 3 |
-
import './index.css';
|
| 4 |
-
import App from './App';
|
| 5 |
-
import reportWebVitals from './reportWebVitals';
|
| 6 |
-
|
| 7 |
-
const root = ReactDOM.createRoot(document.getElementById('root'));
|
| 8 |
-
root.render(
|
| 9 |
-
<React.StrictMode>
|
| 10 |
-
<App />
|
| 11 |
-
</React.StrictMode>
|
| 12 |
-
);
|
| 13 |
-
|
| 14 |
-
// If you want to start measuring performance in your app, pass a function
|
| 15 |
-
// to log results (for example: reportWebVitals(console.log))
|
| 16 |
-
// or send to an analytics endpoint. Learn more: https://bit.ly/CRA-vitals
|
| 17 |
-
reportWebVitals();
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
src/index.tsx
ADDED
|
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/**
|
| 2 |
+
* Keet v2.0 - Entry Point
|
| 3 |
+
*
|
| 4 |
+
* Privacy-first, offline-capable real-time transcription.
|
| 5 |
+
*/
|
| 6 |
+
|
| 7 |
+
/* @refresh reload */
|
| 8 |
+
import { render } from 'solid-js/web';
|
| 9 |
+
import App from './App';
|
| 10 |
+
import './index.css';
|
| 11 |
+
|
| 12 |
+
const root = document.getElementById('root');
|
| 13 |
+
|
| 14 |
+
if (!root) {
|
| 15 |
+
throw new Error('Root element not found');
|
| 16 |
+
}
|
| 17 |
+
|
| 18 |
+
render(() => <App />, root);
|
src/lib/audio/AudioEngine.ts
ADDED
|
@@ -0,0 +1,1014 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import { AudioEngine as IAudioEngine, AudioEngineConfig, AudioSegment, IRingBuffer, AudioMetrics } from './types';
|
| 2 |
+
import { RingBuffer } from './RingBuffer';
|
| 3 |
+
import { AudioSegmentProcessor, ProcessedSegment } from './AudioSegmentProcessor';
|
| 4 |
+
import { resampleLinear } from './utils';
|
| 5 |
+
|
| 6 |
+
/** Duration of the visualization buffer in seconds */
|
| 7 |
+
const VISUALIZATION_BUFFER_DURATION = 30;
|
| 8 |
+
|
| 9 |
+
/**
|
| 10 |
+
* AudioEngine implementation for capturing audio, buffering it, and performing VAD.
|
| 11 |
+
* Uses AudioSegmentProcessor for robust speech detection (incl. lookback).
|
| 12 |
+
*/
|
| 13 |
+
export class AudioEngine implements IAudioEngine {
|
| 14 |
+
private config: AudioEngineConfig;
|
| 15 |
+
private ringBuffer: IRingBuffer;
|
| 16 |
+
private audioProcessor: AudioSegmentProcessor; // Replaces EnergyVAD
|
| 17 |
+
private deviceId: string | null = null;
|
| 18 |
+
|
| 19 |
+
private audioContext: AudioContext | null = null;
|
| 20 |
+
private mediaStream: MediaStream | null = null;
|
| 21 |
+
private workletNode: AudioWorkletNode | null = null;
|
| 22 |
+
private sourceNode: MediaStreamAudioSourceNode | null = null;
|
| 23 |
+
|
| 24 |
+
// AnalyserNode for oscilloscope waveform (native getByteTimeDomainData)
|
| 25 |
+
private analyserNode: AnalyserNode | null = null;
|
| 26 |
+
private analyserSourceNode: MediaStreamAudioSourceNode | null = null;
|
| 27 |
+
private analyserGainNode: GainNode | null = null;
|
| 28 |
+
private analyserTimeBuffer: Uint8Array | null = null;
|
| 29 |
+
private waveformOut: Float32Array | null = null;
|
| 30 |
+
private readonly ANALYSER_FFT_SIZE = 256;
|
| 31 |
+
private readonly ANALYSER_SMOOTHING = 0.3; // Low = fast oscilloscope response
|
| 32 |
+
|
| 33 |
+
// Track device vs target sample rates
|
| 34 |
+
private deviceSampleRate: number = 48000;
|
| 35 |
+
private targetSampleRate: number = 16000;
|
| 36 |
+
|
| 37 |
+
private currentEnergy: number = 0;
|
| 38 |
+
|
| 39 |
+
private segmentCallbacks: Array<(segment: AudioSegment) => void> = [];
|
| 40 |
+
|
| 41 |
+
// Fixed-window streaming state (v3 token streaming mode)
|
| 42 |
+
private windowCallbacks: Array<{
|
| 43 |
+
windowDuration: number;
|
| 44 |
+
overlapDuration: number;
|
| 45 |
+
triggerInterval: number;
|
| 46 |
+
callback: (audio: Float32Array, startTime: number) => void;
|
| 47 |
+
lastWindowEnd: number; // Frame offset of last window end
|
| 48 |
+
}> = [];
|
| 49 |
+
|
| 50 |
+
// Resampled audio chunk callbacks (for mel worker, etc.)
|
| 51 |
+
private audioChunkCallbacks: Array<(chunk: Float32Array) => void> = [];
|
| 52 |
+
|
| 53 |
+
// SMA buffer for energy calculation
|
| 54 |
+
private energyHistory: number[] = [];
|
| 55 |
+
|
| 56 |
+
// Last N energy values for bar visualizer (oldest first when read)
|
| 57 |
+
private energyBarHistory: number[] = [];
|
| 58 |
+
private readonly BAR_LEVELS_SIZE = 64;
|
| 59 |
+
|
| 60 |
+
// Visualization Summary Buffer (Low-Res Min/Max pairs)
|
| 61 |
+
private visualizationSummary: Float32Array | null = null;
|
| 62 |
+
private visualizationSummaryPosition: number = 0;
|
| 63 |
+
private readonly VIS_SUMMARY_SIZE = 2000; // 2000 min/max pairs for 30 seconds = 15ms resolution
|
| 64 |
+
|
| 65 |
+
// Raw visualization buffer (still kept for higher-res requests if needed, but summary is preferred)
|
| 66 |
+
private visualizationBuffer: Float32Array | null = null;
|
| 67 |
+
private visualizationBufferPosition: number = 0;
|
| 68 |
+
private visualizationBufferSize: number = 0;
|
| 69 |
+
|
| 70 |
+
// Metrics for UI components
|
| 71 |
+
private metrics: AudioMetrics = {
|
| 72 |
+
currentEnergy: 0,
|
| 73 |
+
averageEnergy: 0,
|
| 74 |
+
peakEnergy: 0,
|
| 75 |
+
noiseFloor: 0.01,
|
| 76 |
+
currentSNR: 0,
|
| 77 |
+
isSpeaking: false,
|
| 78 |
+
};
|
| 79 |
+
|
| 80 |
+
// Subscribers for visualization updates
|
| 81 |
+
private visualizationCallbacks: Array<(data: Float32Array, metrics: AudioMetrics, bufferEndTime: number) => void> = [];
|
| 82 |
+
private lastVisualizationNotifyTime: number = 0;
|
| 83 |
+
private readonly VISUALIZATION_NOTIFY_INTERVAL_MS = 16; // ~60fps for responsive oscilloscope
|
| 84 |
+
|
| 85 |
+
// Recent segments for visualization (stores timing info only)
|
| 86 |
+
private recentSegments: Array<{ startTime: number; endTime: number; isProcessed: boolean }> = [];
|
| 87 |
+
private readonly MAX_SEGMENTS_FOR_VISUALIZATION = 50;
|
| 88 |
+
|
| 89 |
+
constructor(config: Partial<AudioEngineConfig> = {}) {
|
| 90 |
+
this.config = {
|
| 91 |
+
sampleRate: 16000,
|
| 92 |
+
bufferDuration: 120,
|
| 93 |
+
energyThreshold: 0.08, // Match legacy UI project 'medium'
|
| 94 |
+
minSpeechDuration: 240, // Match legacy UI project
|
| 95 |
+
minSilenceDuration: 400, // Match legacy UI project
|
| 96 |
+
maxSegmentDuration: 4.8, // Match legacy UI project
|
| 97 |
+
|
| 98 |
+
// Advanced VAD defaults
|
| 99 |
+
lookbackDuration: 0.120,
|
| 100 |
+
speechHangover: 0.16,
|
| 101 |
+
minEnergyIntegral: 22,
|
| 102 |
+
minEnergyPerSecond: 5,
|
| 103 |
+
useAdaptiveEnergyThresholds: true,
|
| 104 |
+
adaptiveEnergyIntegralFactor: 25.0,
|
| 105 |
+
adaptiveEnergyPerSecondFactor: 10.0,
|
| 106 |
+
minAdaptiveEnergyIntegral: 3,
|
| 107 |
+
minAdaptiveEnergyPerSecond: 1,
|
| 108 |
+
maxSilenceWithinSpeech: 0.160,
|
| 109 |
+
endingSpeechTolerance: 0.240,
|
| 110 |
+
...config,
|
| 111 |
+
};
|
| 112 |
+
|
| 113 |
+
this.deviceId = this.config.deviceId || null;
|
| 114 |
+
this.targetSampleRate = this.config.sampleRate;
|
| 115 |
+
|
| 116 |
+
// RingBuffer operates at TARGET sample rate (16kHz)
|
| 117 |
+
this.ringBuffer = new RingBuffer(this.targetSampleRate, this.config.bufferDuration);
|
| 118 |
+
|
| 119 |
+
// Initialize AudioSegmentProcessor
|
| 120 |
+
this.audioProcessor = new AudioSegmentProcessor({
|
| 121 |
+
sampleRate: this.targetSampleRate,
|
| 122 |
+
energyThreshold: this.config.energyThreshold,
|
| 123 |
+
minSpeechDuration: this.config.minSpeechDuration,
|
| 124 |
+
silenceThreshold: this.config.minSilenceDuration,
|
| 125 |
+
maxSegmentDuration: this.config.maxSegmentDuration,
|
| 126 |
+
lookbackDuration: this.config.lookbackDuration,
|
| 127 |
+
maxSilenceWithinSpeech: this.config.maxSilenceWithinSpeech,
|
| 128 |
+
endingSpeechTolerance: this.config.endingSpeechTolerance,
|
| 129 |
+
snrThreshold: 3.0,
|
| 130 |
+
minSnrThreshold: 1.0,
|
| 131 |
+
noiseFloorAdaptationRate: 0.05,
|
| 132 |
+
fastAdaptationRate: 0.15,
|
| 133 |
+
minBackgroundDuration: 1.0,
|
| 134 |
+
energyRiseThreshold: 0.08
|
| 135 |
+
});
|
| 136 |
+
|
| 137 |
+
// Initialize visualization buffer (30 seconds at target sample rate)
|
| 138 |
+
this.visualizationBufferSize = Math.round(this.targetSampleRate * VISUALIZATION_BUFFER_DURATION);
|
| 139 |
+
this.visualizationBuffer = new Float32Array(this.visualizationBufferSize);
|
| 140 |
+
this.visualizationBufferPosition = 0;
|
| 141 |
+
|
| 142 |
+
// Initialize visualization summary (2000 points for 30s)
|
| 143 |
+
this.visualizationSummary = new Float32Array(this.VIS_SUMMARY_SIZE * 2);
|
| 144 |
+
this.visualizationSummaryPosition = 0;
|
| 145 |
+
|
| 146 |
+
console.log('[AudioEngine] Initialized with config:', this.config);
|
| 147 |
+
}
|
| 148 |
+
|
| 149 |
+
private isWorkletInitialized = false;
|
| 150 |
+
|
| 151 |
+
async init(): Promise<void> {
|
| 152 |
+
// Request microphone permission with optional deviceId
|
| 153 |
+
try {
|
| 154 |
+
if (this.mediaStream) {
|
| 155 |
+
this.mediaStream.getTracks().forEach(t => t.stop());
|
| 156 |
+
}
|
| 157 |
+
|
| 158 |
+
const constraints: MediaStreamConstraints = {
|
| 159 |
+
audio: {
|
| 160 |
+
deviceId: this.deviceId ? { exact: this.deviceId } : undefined,
|
| 161 |
+
channelCount: 1,
|
| 162 |
+
echoCancellation: false,
|
| 163 |
+
noiseSuppression: false,
|
| 164 |
+
autoGainControl: false,
|
| 165 |
+
},
|
| 166 |
+
};
|
| 167 |
+
|
| 168 |
+
console.log('[AudioEngine] Requesting microphone:', constraints);
|
| 169 |
+
this.mediaStream = await navigator.mediaDevices.getUserMedia(constraints);
|
| 170 |
+
console.log('[AudioEngine] Microphone stream acquired:', this.mediaStream.id);
|
| 171 |
+
} catch (err) {
|
| 172 |
+
console.error('[AudioEngine] Failed to get media stream:', err);
|
| 173 |
+
throw err;
|
| 174 |
+
}
|
| 175 |
+
|
| 176 |
+
const track = this.mediaStream!.getAudioTracks()[0];
|
| 177 |
+
const trackSettings = track?.getSettings?.();
|
| 178 |
+
// Device sample rate (what the mic gives us)
|
| 179 |
+
this.deviceSampleRate = trackSettings?.sampleRate ?? 48000;
|
| 180 |
+
console.log('[AudioEngine] Device sample rate:', this.deviceSampleRate, '-> Target:', this.targetSampleRate);
|
| 181 |
+
|
| 182 |
+
if (this.audioContext && this.audioContext.sampleRate !== this.deviceSampleRate) {
|
| 183 |
+
await this.audioContext.close();
|
| 184 |
+
this.audioContext = null;
|
| 185 |
+
}
|
| 186 |
+
if (!this.audioContext) {
|
| 187 |
+
this.audioContext = new AudioContext({
|
| 188 |
+
sampleRate: this.deviceSampleRate,
|
| 189 |
+
latencyHint: 'interactive',
|
| 190 |
+
});
|
| 191 |
+
console.log('[AudioEngine] Created AudioContext:', this.audioContext.state, 'sampleRate:', this.audioContext.sampleRate);
|
| 192 |
+
}
|
| 193 |
+
|
| 194 |
+
// Re-initialize components with correct rates
|
| 195 |
+
this.ringBuffer = new RingBuffer(this.targetSampleRate, this.config.bufferDuration);
|
| 196 |
+
|
| 197 |
+
// Update processor config
|
| 198 |
+
this.audioProcessor = new AudioSegmentProcessor({
|
| 199 |
+
sampleRate: this.targetSampleRate,
|
| 200 |
+
energyThreshold: this.config.energyThreshold,
|
| 201 |
+
minSpeechDuration: this.config.minSpeechDuration,
|
| 202 |
+
silenceThreshold: this.config.minSilenceDuration,
|
| 203 |
+
maxSegmentDuration: this.config.maxSegmentDuration,
|
| 204 |
+
});
|
| 205 |
+
|
| 206 |
+
if (!this.isWorkletInitialized) {
|
| 207 |
+
const windowDuration = 0.080;
|
| 208 |
+
const processorCode = `
|
| 209 |
+
class CaptureProcessor extends AudioWorkletProcessor {
|
| 210 |
+
constructor(options) {
|
| 211 |
+
super(options);
|
| 212 |
+
const opts = options?.processorOptions || {};
|
| 213 |
+
this.inputSampleRate = opts.inputSampleRate || 16000;
|
| 214 |
+
this.targetSampleRate = opts.targetSampleRate || this.inputSampleRate;
|
| 215 |
+
this.ratio = this.inputSampleRate / this.targetSampleRate;
|
| 216 |
+
this.bufferSize = Math.round(${windowDuration} * this.inputSampleRate);
|
| 217 |
+
this.buffer = new Float32Array(this.bufferSize);
|
| 218 |
+
this.index = 0;
|
| 219 |
+
this._lastLog = 0;
|
| 220 |
+
}
|
| 221 |
+
|
| 222 |
+
_emitChunk() {
|
| 223 |
+
let out;
|
| 224 |
+
let maxAbs = 0;
|
| 225 |
+
|
| 226 |
+
if (this.targetSampleRate === this.inputSampleRate) {
|
| 227 |
+
out = new Float32Array(this.bufferSize);
|
| 228 |
+
for (let i = 0; i < this.bufferSize; i++) {
|
| 229 |
+
const v = this.buffer[i];
|
| 230 |
+
out[i] = v;
|
| 231 |
+
const a = v < 0 ? -v : v;
|
| 232 |
+
if (a > maxAbs) maxAbs = a;
|
| 233 |
+
}
|
| 234 |
+
} else {
|
| 235 |
+
const outLength = Math.floor(this.bufferSize / this.ratio);
|
| 236 |
+
out = new Float32Array(outLength);
|
| 237 |
+
for (let i = 0; i < outLength; i++) {
|
| 238 |
+
const srcIndex = i * this.ratio;
|
| 239 |
+
const srcIndexFloor = Math.floor(srcIndex);
|
| 240 |
+
const srcIndexCeil = Math.min(srcIndexFloor + 1, this.bufferSize - 1);
|
| 241 |
+
const t = srcIndex - srcIndexFloor;
|
| 242 |
+
const v = this.buffer[srcIndexFloor] * (1 - t) + this.buffer[srcIndexCeil] * t;
|
| 243 |
+
out[i] = v;
|
| 244 |
+
const a = v < 0 ? -v : v;
|
| 245 |
+
if (a > maxAbs) maxAbs = a;
|
| 246 |
+
}
|
| 247 |
+
}
|
| 248 |
+
|
| 249 |
+
this.port.postMessage(
|
| 250 |
+
{ type: 'audio', samples: out, sampleRate: this.targetSampleRate, maxAbs },
|
| 251 |
+
[out.buffer]
|
| 252 |
+
);
|
| 253 |
+
}
|
| 254 |
+
|
| 255 |
+
process(inputs) {
|
| 256 |
+
const input = inputs[0];
|
| 257 |
+
if (!input || !input[0]) return true;
|
| 258 |
+
|
| 259 |
+
const channelData = input[0];
|
| 260 |
+
|
| 261 |
+
// Buffer the data
|
| 262 |
+
for (let i = 0; i < channelData.length; i++) {
|
| 263 |
+
this.buffer[this.index++] = channelData[i];
|
| 264 |
+
|
| 265 |
+
if (this.index >= this.bufferSize) {
|
| 266 |
+
this._emitChunk();
|
| 267 |
+
this.index = 0;
|
| 268 |
+
|
| 269 |
+
// Debug log every ~5 seconds
|
| 270 |
+
const now = Date.now();
|
| 271 |
+
if (now - this._lastLog > 5000) {
|
| 272 |
+
this.port.postMessage({ type: 'log', message: '[AudioWorklet] Active' });
|
| 273 |
+
this._lastLog = now;
|
| 274 |
+
}
|
| 275 |
+
}
|
| 276 |
+
}
|
| 277 |
+
|
| 278 |
+
return true;
|
| 279 |
+
}
|
| 280 |
+
}
|
| 281 |
+
registerProcessor('capture-processor', CaptureProcessor);
|
| 282 |
+
`;
|
| 283 |
+
const blob = new Blob([processorCode], { type: 'application/javascript' });
|
| 284 |
+
const url = URL.createObjectURL(blob);
|
| 285 |
+
try {
|
| 286 |
+
await this.audioContext.audioWorklet.addModule(url);
|
| 287 |
+
this.isWorkletInitialized = true;
|
| 288 |
+
console.log('[AudioEngine] AudioWorklet module loaded');
|
| 289 |
+
} catch (err) {
|
| 290 |
+
console.error('[AudioEngine] Failed to load worklet:', err);
|
| 291 |
+
if (err instanceof Error && err.name === 'InvalidStateError') {
|
| 292 |
+
// Ignore if already registered
|
| 293 |
+
this.isWorkletInitialized = true;
|
| 294 |
+
}
|
| 295 |
+
}
|
| 296 |
+
}
|
| 297 |
+
|
| 298 |
+
// Re-create worklet node if needed (it might handle dispose differently, but safe to new)
|
| 299 |
+
if (this.workletNode) this.workletNode.disconnect();
|
| 300 |
+
|
| 301 |
+
this.workletNode = new AudioWorkletNode(this.audioContext, 'capture-processor', {
|
| 302 |
+
processorOptions: { inputSampleRate: this.deviceSampleRate, targetSampleRate: this.targetSampleRate },
|
| 303 |
+
});
|
| 304 |
+
this.workletNode.port.onmessage = (event: MessageEvent<any>) => {
|
| 305 |
+
if (event.data?.type === 'audio' && event.data.samples instanceof Float32Array) {
|
| 306 |
+
this.handleAudioChunk(event.data.samples, event.data.maxAbs, event.data.sampleRate);
|
| 307 |
+
} else if (event.data instanceof Float32Array) {
|
| 308 |
+
this.handleAudioChunk(event.data, undefined, this.deviceSampleRate);
|
| 309 |
+
} else if (event.data?.type === 'log') {
|
| 310 |
+
console.log(event.data.message);
|
| 311 |
+
}
|
| 312 |
+
};
|
| 313 |
+
this.workletNode.onprocessorerror = (e) => {
|
| 314 |
+
console.error('[AudioEngine] Worklet processor error:', e);
|
| 315 |
+
};
|
| 316 |
+
|
| 317 |
+
// Reconnect source node
|
| 318 |
+
this.sourceNode?.disconnect();
|
| 319 |
+
this.sourceNode = this.audioContext.createMediaStreamSource(this.mediaStream);
|
| 320 |
+
this.sourceNode.connect(this.workletNode);
|
| 321 |
+
|
| 322 |
+
// AnalyserNode branch for lightweight preview bars (native FFT, no mel worker)
|
| 323 |
+
this.disposeAnalyser();
|
| 324 |
+
this.analyserSourceNode = this.audioContext.createMediaStreamSource(this.mediaStream);
|
| 325 |
+
this.analyserNode = this.audioContext.createAnalyser();
|
| 326 |
+
this.analyserNode.fftSize = this.ANALYSER_FFT_SIZE;
|
| 327 |
+
this.analyserNode.smoothingTimeConstant = this.ANALYSER_SMOOTHING;
|
| 328 |
+
this.analyserTimeBuffer = new Uint8Array(this.analyserNode.fftSize);
|
| 329 |
+
this.waveformOut = new Float32Array(this.analyserNode.fftSize);
|
| 330 |
+
|
| 331 |
+
this.analyserGainNode = this.audioContext.createGain();
|
| 332 |
+
this.analyserGainNode.gain.value = 0;
|
| 333 |
+
|
| 334 |
+
this.analyserSourceNode.connect(this.analyserNode);
|
| 335 |
+
this.analyserNode.connect(this.analyserGainNode);
|
| 336 |
+
this.analyserGainNode.connect(this.audioContext.destination);
|
| 337 |
+
|
| 338 |
+
// Keep graph alive
|
| 339 |
+
this.workletNode.connect(this.audioContext.destination);
|
| 340 |
+
console.log('[AudioEngine] Graph connected: Source -> Worklet, AnalyserNode for oscilloscope');
|
| 341 |
+
}
|
| 342 |
+
|
| 343 |
+
async start(): Promise<void> {
|
| 344 |
+
if (!this.mediaStream || !this.audioContext || !this.workletNode) {
|
| 345 |
+
await this.init();
|
| 346 |
+
}
|
| 347 |
+
|
| 348 |
+
if (this.audioContext?.state === 'suspended') {
|
| 349 |
+
await this.audioContext.resume();
|
| 350 |
+
}
|
| 351 |
+
}
|
| 352 |
+
|
| 353 |
+
stop(): void {
|
| 354 |
+
if (this.audioContext?.state === 'running') {
|
| 355 |
+
this.audioContext.suspend();
|
| 356 |
+
}
|
| 357 |
+
}
|
| 358 |
+
|
| 359 |
+
/**
|
| 360 |
+
* Reset buffers and VAD state for a new session while keeping the audio graph.
|
| 361 |
+
* Aligns visualization + segment timebase to 0, matching legacy UI project behavior.
|
| 362 |
+
*/
|
| 363 |
+
reset(): void {
|
| 364 |
+
// Reset audio/VAD state
|
| 365 |
+
this.ringBuffer.reset();
|
| 366 |
+
this.audioProcessor.reset();
|
| 367 |
+
this.currentEnergy = 0;
|
| 368 |
+
|
| 369 |
+
// Reset metrics
|
| 370 |
+
this.metrics = {
|
| 371 |
+
currentEnergy: 0,
|
| 372 |
+
averageEnergy: 0,
|
| 373 |
+
peakEnergy: 0,
|
| 374 |
+
noiseFloor: 0.01,
|
| 375 |
+
currentSNR: 0,
|
| 376 |
+
isSpeaking: false,
|
| 377 |
+
};
|
| 378 |
+
|
| 379 |
+
// Clear segment history used by the visualizer
|
| 380 |
+
this.recentSegments = [];
|
| 381 |
+
this.energyBarHistory = [];
|
| 382 |
+
|
| 383 |
+
// Reset visualization buffer
|
| 384 |
+
if (this.visualizationBuffer) {
|
| 385 |
+
this.visualizationBuffer.fill(0);
|
| 386 |
+
}
|
| 387 |
+
this.visualizationBufferPosition = 0;
|
| 388 |
+
|
| 389 |
+
// Reset windowed streaming cursors
|
| 390 |
+
for (const entry of this.windowCallbacks) {
|
| 391 |
+
entry.lastWindowEnd = 0;
|
| 392 |
+
}
|
| 393 |
+
|
| 394 |
+
// Push a blank update so UI clears stale waveform/segments
|
| 395 |
+
this.notifyVisualizationUpdate();
|
| 396 |
+
}
|
| 397 |
+
|
| 398 |
+
getCurrentEnergy(): number {
|
| 399 |
+
return this.currentEnergy;
|
| 400 |
+
}
|
| 401 |
+
|
| 402 |
+
/** Oscilloscope waveform from AnalyserNode.getByteTimeDomainData (native, fast). Values -1..1. */
|
| 403 |
+
getBarLevels(): Float32Array {
|
| 404 |
+
if (this.analyserNode && this.analyserTimeBuffer && this.waveformOut) {
|
| 405 |
+
(this.analyserNode as { getByteTimeDomainData(array: Uint8Array): void }).getByteTimeDomainData(this.analyserTimeBuffer);
|
| 406 |
+
for (let i = 0; i < this.analyserTimeBuffer.length; i++) {
|
| 407 |
+
this.waveformOut[i] = (this.analyserTimeBuffer[i] - 128) / 128; // 0..255 -> -1..1
|
| 408 |
+
}
|
| 409 |
+
return this.waveformOut;
|
| 410 |
+
}
|
| 411 |
+
const out = new Float32Array(this.BAR_LEVELS_SIZE);
|
| 412 |
+
const h = this.energyBarHistory;
|
| 413 |
+
const start = h.length <= this.BAR_LEVELS_SIZE ? 0 : h.length - this.BAR_LEVELS_SIZE;
|
| 414 |
+
for (let i = 0; i < this.BAR_LEVELS_SIZE; i++) {
|
| 415 |
+
const idx = start + i;
|
| 416 |
+
out[i] = idx < h.length ? Math.min(1, Math.max(0, h[idx])) : 0;
|
| 417 |
+
}
|
| 418 |
+
return out;
|
| 419 |
+
}
|
| 420 |
+
|
| 421 |
+
getSignalMetrics(): { noiseFloor: number; snr: number; threshold: number; snrThreshold: number } {
|
| 422 |
+
const stats = this.audioProcessor.getStats();
|
| 423 |
+
return {
|
| 424 |
+
noiseFloor: stats.noiseFloor ?? 0.0001,
|
| 425 |
+
snr: stats.snr ?? 0,
|
| 426 |
+
threshold: this.config.energyThreshold,
|
| 427 |
+
snrThreshold: stats.snrThreshold ?? 3.0
|
| 428 |
+
};
|
| 429 |
+
}
|
| 430 |
+
|
| 431 |
+
isSpeechActive(): boolean {
|
| 432 |
+
return this.audioProcessor.getStateInfo().inSpeech;
|
| 433 |
+
}
|
| 434 |
+
|
| 435 |
+
getRingBuffer(): IRingBuffer {
|
| 436 |
+
return this.ringBuffer;
|
| 437 |
+
}
|
| 438 |
+
|
| 439 |
+
onSpeechSegment(callback: (segment: AudioSegment) => void): () => void {
|
| 440 |
+
this.segmentCallbacks.push(callback);
|
| 441 |
+
return () => {
|
| 442 |
+
this.segmentCallbacks = this.segmentCallbacks.filter((cb) => cb !== callback);
|
| 443 |
+
};
|
| 444 |
+
}
|
| 445 |
+
|
| 446 |
+
/**
|
| 447 |
+
* Subscribe to fixed-window chunks for token streaming mode.
|
| 448 |
+
* Fires every triggerInterval seconds with windowDuration of audio.
|
| 449 |
+
*/
|
| 450 |
+
onWindowChunk(
|
| 451 |
+
windowDuration: number,
|
| 452 |
+
overlapDuration: number,
|
| 453 |
+
triggerInterval: number,
|
| 454 |
+
callback: (audio: Float32Array, startTime: number) => void
|
| 455 |
+
): () => void {
|
| 456 |
+
const entry = {
|
| 457 |
+
windowDuration,
|
| 458 |
+
overlapDuration,
|
| 459 |
+
triggerInterval,
|
| 460 |
+
callback,
|
| 461 |
+
lastWindowEnd: 0, // Will be set on first chunk
|
| 462 |
+
};
|
| 463 |
+
this.windowCallbacks.push(entry);
|
| 464 |
+
|
| 465 |
+
return () => {
|
| 466 |
+
this.windowCallbacks = this.windowCallbacks.filter((e) => e !== entry);
|
| 467 |
+
};
|
| 468 |
+
}
|
| 469 |
+
|
| 470 |
+
/**
|
| 471 |
+
* Subscribe to every resampled audio chunk (16kHz).
|
| 472 |
+
* Used to feed the continuous mel producer worker.
|
| 473 |
+
* Returns an unsubscribe function.
|
| 474 |
+
*/
|
| 475 |
+
onAudioChunk(callback: (chunk: Float32Array) => void): () => void {
|
| 476 |
+
this.audioChunkCallbacks.push(callback);
|
| 477 |
+
return () => {
|
| 478 |
+
this.audioChunkCallbacks = this.audioChunkCallbacks.filter((cb) => cb !== callback);
|
| 479 |
+
};
|
| 480 |
+
}
|
| 481 |
+
|
| 482 |
+
updateConfig(config: Partial<AudioEngineConfig>): void {
|
| 483 |
+
this.config = { ...this.config, ...config };
|
| 484 |
+
|
| 485 |
+
// Update processor config
|
| 486 |
+
if (config.energyThreshold !== undefined) this.audioProcessor.setThreshold(config.energyThreshold);
|
| 487 |
+
if (config.minSpeechDuration !== undefined) this.audioProcessor.setMinSpeechDuration(config.minSpeechDuration);
|
| 488 |
+
if (config.minSilenceDuration !== undefined) this.audioProcessor.setSilenceLength(config.minSilenceDuration);
|
| 489 |
+
if (config.maxSegmentDuration !== undefined) this.audioProcessor.setMaxSegmentDuration(config.maxSegmentDuration);
|
| 490 |
+
|
| 491 |
+
// Advanced VAD updates
|
| 492 |
+
if (config.lookbackDuration !== undefined) this.audioProcessor.setLookbackDuration(config.lookbackDuration);
|
| 493 |
+
if (config.overlapDuration !== undefined) this.audioProcessor.setOverlapDuration(config.overlapDuration);
|
| 494 |
+
if (config.maxSilenceWithinSpeech !== undefined) this.audioProcessor.setMaxSilenceWithinSpeech(config.maxSilenceWithinSpeech);
|
| 495 |
+
if (config.endingSpeechTolerance !== undefined) this.audioProcessor.setEndingSpeechTolerance(config.endingSpeechTolerance);
|
| 496 |
+
|
| 497 |
+
if (config.snrThreshold !== undefined) this.audioProcessor.setSnrThreshold(config.snrThreshold);
|
| 498 |
+
if (config.minSnrThreshold !== undefined) this.audioProcessor.setMinSnrThreshold(config.minSnrThreshold);
|
| 499 |
+
}
|
| 500 |
+
|
| 501 |
+
async setDevice(deviceId: string): Promise<void> {
|
| 502 |
+
this.deviceId = deviceId;
|
| 503 |
+
await this.init();
|
| 504 |
+
|
| 505 |
+
// Reconnect if running
|
| 506 |
+
if (this.audioContext && this.workletNode) {
|
| 507 |
+
this.sourceNode?.disconnect();
|
| 508 |
+
this.sourceNode = this.audioContext.createMediaStreamSource(this.mediaStream!);
|
| 509 |
+
this.sourceNode.connect(this.workletNode);
|
| 510 |
+
}
|
| 511 |
+
}
|
| 512 |
+
|
| 513 |
+
private disposeAnalyser(): void {
|
| 514 |
+
this.analyserSourceNode?.disconnect();
|
| 515 |
+
this.analyserNode?.disconnect();
|
| 516 |
+
this.analyserGainNode?.disconnect();
|
| 517 |
+
this.analyserSourceNode = null;
|
| 518 |
+
this.analyserNode = null;
|
| 519 |
+
this.analyserGainNode = null;
|
| 520 |
+
this.analyserTimeBuffer = null;
|
| 521 |
+
this.waveformOut = null;
|
| 522 |
+
}
|
| 523 |
+
|
| 524 |
+
dispose(): void {
|
| 525 |
+
this.stop();
|
| 526 |
+
this.disposeAnalyser();
|
| 527 |
+
this.mediaStream?.getTracks().forEach(track => track.stop());
|
| 528 |
+
this.audioContext?.close();
|
| 529 |
+
this.audioContext = null;
|
| 530 |
+
this.mediaStream = null;
|
| 531 |
+
this.workletNode = null;
|
| 532 |
+
this.sourceNode = null;
|
| 533 |
+
}
|
| 534 |
+
|
| 535 |
+
private handleAudioChunk(rawChunk: Float32Array, precomputedMaxAbs?: number, chunkSampleRate?: number): void {
|
| 536 |
+
// 0. Ensure chunk is at target sample rate (resample only if needed)
|
| 537 |
+
const sampleRate = chunkSampleRate ?? this.targetSampleRate;
|
| 538 |
+
const needsResample = sampleRate !== this.targetSampleRate;
|
| 539 |
+
const chunk = needsResample
|
| 540 |
+
? resampleLinear(rawChunk, sampleRate, this.targetSampleRate)
|
| 541 |
+
: rawChunk;
|
| 542 |
+
|
| 543 |
+
// Calculate chunk energy (Peak Amplitude) + SMA for VAD compatibility
|
| 544 |
+
let maxAbs = (!needsResample && precomputedMaxAbs !== undefined) ? precomputedMaxAbs : 0;
|
| 545 |
+
if (precomputedMaxAbs === undefined || needsResample) {
|
| 546 |
+
for (let i = 0; i < chunk.length; i++) {
|
| 547 |
+
const abs = Math.abs(chunk[i]);
|
| 548 |
+
if (abs > maxAbs) maxAbs = abs;
|
| 549 |
+
}
|
| 550 |
+
}
|
| 551 |
+
|
| 552 |
+
// SMA Smoothing (matching legacy UI project logic)
|
| 553 |
+
this.energyHistory.push(maxAbs);
|
| 554 |
+
if (this.energyHistory.length > 6) {
|
| 555 |
+
this.energyHistory.shift();
|
| 556 |
+
}
|
| 557 |
+
const energy = this.energyHistory.reduce((a: number, b: number) => a + b, 0) / this.energyHistory.length;
|
| 558 |
+
|
| 559 |
+
this.currentEnergy = energy;
|
| 560 |
+
this.energyBarHistory.push(energy);
|
| 561 |
+
if (this.energyBarHistory.length > this.BAR_LEVELS_SIZE) {
|
| 562 |
+
this.energyBarHistory.shift();
|
| 563 |
+
}
|
| 564 |
+
|
| 565 |
+
// Log when energy crosses threshold if state is close to changing
|
| 566 |
+
const isSpeech = energy > this.config.energyThreshold;
|
| 567 |
+
const wasSpeaking = this.metrics.isSpeaking;
|
| 568 |
+
if (isSpeech !== wasSpeaking) {
|
| 569 |
+
console.debug(`[AudioEngine] Energy threshold crossed: ${energy.toFixed(6)} > ${this.config.energyThreshold} = ${isSpeech}`);
|
| 570 |
+
}
|
| 571 |
+
|
| 572 |
+
// 1. Write to ring buffer before any callbacks can transfer the chunk.
|
| 573 |
+
this.ringBuffer.write(chunk);
|
| 574 |
+
|
| 575 |
+
const endFrame = this.ringBuffer.getCurrentFrame();
|
| 576 |
+
|
| 577 |
+
// 2. Process VAD on resampled audio
|
| 578 |
+
// The processor uses its own internal history for lookback, but we pull full audio from ring buffer later.
|
| 579 |
+
const currentTime = this.ringBuffer.getCurrentTime();
|
| 580 |
+
const segments = this.audioProcessor.processAudioData(chunk, currentTime, energy);
|
| 581 |
+
|
| 582 |
+
// 2.5 Update visualization buffer
|
| 583 |
+
this.updateVisualizationBuffer(chunk);
|
| 584 |
+
|
| 585 |
+
// 2.6 Update metrics
|
| 586 |
+
const stats = this.audioProcessor.getStats();
|
| 587 |
+
const stateInfo = this.audioProcessor.getStateInfo();
|
| 588 |
+
|
| 589 |
+
this.metrics.currentEnergy = energy;
|
| 590 |
+
this.metrics.averageEnergy = this.metrics.averageEnergy * 0.95 + energy * 0.05;
|
| 591 |
+
this.metrics.peakEnergy = Math.max(this.metrics.peakEnergy * 0.99, energy);
|
| 592 |
+
this.metrics.noiseFloor = stats.noiseFloor ?? 0.01;
|
| 593 |
+
this.metrics.currentSNR = stats.snr ?? 0;
|
| 594 |
+
this.metrics.isSpeaking = stateInfo.inSpeech;
|
| 595 |
+
|
| 596 |
+
// Periodic debug log
|
| 597 |
+
if (Math.random() < 0.05) {
|
| 598 |
+
console.debug(`[AudioEngine] Metrics: E=${energy.toFixed(6)}, NF=${this.metrics.noiseFloor.toFixed(6)}, SNR=${this.metrics.currentSNR.toFixed(2)}, Speaking=${this.metrics.isSpeaking}`);
|
| 599 |
+
}
|
| 600 |
+
|
| 601 |
+
// 3. Handle segments
|
| 602 |
+
if (segments.length > 0) {
|
| 603 |
+
for (const seg of segments) {
|
| 604 |
+
// Apply lookback and overlap adjustments matching legacy UI project
|
| 605 |
+
const lookbackDuration = this.config.lookbackDuration ?? 0.120;
|
| 606 |
+
const startTime = Math.max(0, seg.startTime - lookbackDuration);
|
| 607 |
+
|
| 608 |
+
// Calculate the sample positions for audio extraction
|
| 609 |
+
const startFrame = Math.round(startTime * this.targetSampleRate);
|
| 610 |
+
const endFrame = Math.round(seg.endTime * this.targetSampleRate);
|
| 611 |
+
|
| 612 |
+
// Retrieval with padding (hangover)
|
| 613 |
+
const speechHangover = this.config.speechHangover ?? 0.16;
|
| 614 |
+
const paddedEndFrame = Math.min(
|
| 615 |
+
this.ringBuffer.getCurrentFrame(),
|
| 616 |
+
endFrame + Math.round(speechHangover * this.targetSampleRate)
|
| 617 |
+
);
|
| 618 |
+
|
| 619 |
+
try {
|
| 620 |
+
const audioData = this.ringBuffer.read(startFrame, paddedEndFrame);
|
| 621 |
+
|
| 622 |
+
// Calculate precise energy metrics for filtering
|
| 623 |
+
const metrics = this.calculateSegmentEnergyMetrics(audioData, this.targetSampleRate);
|
| 624 |
+
|
| 625 |
+
// Normalize power to 16kHz equivalent
|
| 626 |
+
const normalizedPowerAt16k = metrics.averagePower * 16000;
|
| 627 |
+
const normalizedEnergyIntegralAt16k = normalizedPowerAt16k * metrics.duration;
|
| 628 |
+
|
| 629 |
+
// Adaptive threshold calculation
|
| 630 |
+
let minEnergyIntegralThreshold = this.config.minEnergyIntegral ?? 22;
|
| 631 |
+
let minEnergyPerSecondThreshold = this.config.minEnergyPerSecond ?? 5;
|
| 632 |
+
|
| 633 |
+
if (this.config.useAdaptiveEnergyThresholds) {
|
| 634 |
+
const windowSize = this.config.windowSize ?? Math.round(0.080 * this.targetSampleRate);
|
| 635 |
+
const normalizedNoiseFloor = windowSize > 0 ? this.metrics.noiseFloor / windowSize : 0;
|
| 636 |
+
const noiseFloorAt16k = normalizedNoiseFloor * 16000;
|
| 637 |
+
|
| 638 |
+
const adaptiveMinEnergyIntegral = noiseFloorAt16k * (this.config.adaptiveEnergyIntegralFactor ?? 25.0);
|
| 639 |
+
minEnergyIntegralThreshold = Math.max(this.config.minAdaptiveEnergyIntegral ?? 3, adaptiveMinEnergyIntegral);
|
| 640 |
+
|
| 641 |
+
const adaptiveMinEnergyPerSecond = noiseFloorAt16k * (this.config.adaptiveEnergyPerSecondFactor ?? 10.0);
|
| 642 |
+
minEnergyPerSecondThreshold = Math.max(this.config.minAdaptiveEnergyPerSecond ?? 1, adaptiveMinEnergyPerSecond);
|
| 643 |
+
}
|
| 644 |
+
|
| 645 |
+
const isValidSpeech =
|
| 646 |
+
metrics.duration >= (this.config.minSpeechDuration / 1000) &&
|
| 647 |
+
normalizedPowerAt16k >= minEnergyPerSecondThreshold &&
|
| 648 |
+
normalizedEnergyIntegralAt16k >= minEnergyIntegralThreshold;
|
| 649 |
+
|
| 650 |
+
if (isValidSpeech) {
|
| 651 |
+
const audioSegment: AudioSegment = {
|
| 652 |
+
startFrame: startFrame,
|
| 653 |
+
endFrame: paddedEndFrame,
|
| 654 |
+
duration: metrics.duration,
|
| 655 |
+
averageEnergy: metrics.averagePower,
|
| 656 |
+
timestamp: Date.now(),
|
| 657 |
+
};
|
| 658 |
+
this.notifySegment(audioSegment);
|
| 659 |
+
} else {
|
| 660 |
+
console.log('[AudioEngine] Filtered out noise segment:', {
|
| 661 |
+
duration: metrics.duration,
|
| 662 |
+
power: normalizedPowerAt16k,
|
| 663 |
+
integral: normalizedEnergyIntegralAt16k
|
| 664 |
+
});
|
| 665 |
+
}
|
| 666 |
+
} catch (err) {
|
| 667 |
+
console.warn('[AudioEngine] Failed to extract audio for validation:', err);
|
| 668 |
+
}
|
| 669 |
+
}
|
| 670 |
+
}
|
| 671 |
+
|
| 672 |
+
// 6. Fixed-window streaming (v3 token streaming mode)
|
| 673 |
+
this.processWindowCallbacks(endFrame);
|
| 674 |
+
|
| 675 |
+
// 7. Notify audio chunk subscribers AFTER internal processing.
|
| 676 |
+
// Callbacks may transfer the chunk's buffer; do not use `chunk` after this.
|
| 677 |
+
for (const cb of this.audioChunkCallbacks) {
|
| 678 |
+
cb(chunk);
|
| 679 |
+
}
|
| 680 |
+
|
| 681 |
+
// 8. Notify visualization subscribers
|
| 682 |
+
this.notifyVisualizationUpdate();
|
| 683 |
+
}
|
| 684 |
+
|
| 685 |
+
/**
|
| 686 |
+
* Helper to read audio from ring buffer and calculate energy metrics for a detected segment.
|
| 687 |
+
*/
|
| 688 |
+
private calculateSegmentEnergyMetrics(audioData: Float32Array, sampleRate: number): { averagePower: number; duration: number; numSamples: number } {
|
| 689 |
+
if (!audioData || audioData.length === 0) {
|
| 690 |
+
return { averagePower: 0, duration: 0, numSamples: 0 };
|
| 691 |
+
}
|
| 692 |
+
|
| 693 |
+
const numSamples = audioData.length;
|
| 694 |
+
let sumOfSquares = 0;
|
| 695 |
+
|
| 696 |
+
for (let i = 0; i < numSamples; i++) {
|
| 697 |
+
sumOfSquares += audioData[i] * audioData[i];
|
| 698 |
+
}
|
| 699 |
+
|
| 700 |
+
const duration = numSamples / sampleRate;
|
| 701 |
+
const averagePower = numSamples > 0 ? sumOfSquares / numSamples : 0;
|
| 702 |
+
|
| 703 |
+
return {
|
| 704 |
+
averagePower,
|
| 705 |
+
duration,
|
| 706 |
+
numSamples
|
| 707 |
+
};
|
| 708 |
+
}
|
| 709 |
+
|
| 710 |
+
/**
|
| 711 |
+
* Process fixed-window callbacks for token streaming mode.
|
| 712 |
+
* Fires when enough audio has accumulated for a new window.
|
| 713 |
+
*/
|
| 714 |
+
private processWindowCallbacks(currentFrame: number): void {
|
| 715 |
+
for (const entry of this.windowCallbacks) {
|
| 716 |
+
const windowFrames = Math.floor(entry.windowDuration * this.targetSampleRate);
|
| 717 |
+
const stepFrames = Math.floor(entry.triggerInterval * this.targetSampleRate);
|
| 718 |
+
|
| 719 |
+
// Initialize lastWindowEnd on first call
|
| 720 |
+
if (entry.lastWindowEnd === 0) {
|
| 721 |
+
entry.lastWindowEnd = currentFrame;
|
| 722 |
+
continue;
|
| 723 |
+
}
|
| 724 |
+
|
| 725 |
+
// Check if we have enough new audio for the next window
|
| 726 |
+
const framesSinceLastWindow = currentFrame - entry.lastWindowEnd;
|
| 727 |
+
if (framesSinceLastWindow >= stepFrames) {
|
| 728 |
+
// Calculate window boundaries
|
| 729 |
+
const windowEnd = currentFrame;
|
| 730 |
+
const windowStart = windowEnd - windowFrames;
|
| 731 |
+
|
| 732 |
+
// Ensure we have enough data in the ring buffer
|
| 733 |
+
const baseOffset = this.ringBuffer.getBaseFrameOffset();
|
| 734 |
+
if (windowStart >= baseOffset) {
|
| 735 |
+
try {
|
| 736 |
+
const audio = this.ringBuffer.read(windowStart, windowEnd);
|
| 737 |
+
const startTime = windowStart / this.targetSampleRate;
|
| 738 |
+
|
| 739 |
+
entry.callback(audio, startTime);
|
| 740 |
+
entry.lastWindowEnd = windowEnd;
|
| 741 |
+
} catch (e) {
|
| 742 |
+
console.warn('[AudioEngine] Window read failed:', e);
|
| 743 |
+
}
|
| 744 |
+
}
|
| 745 |
+
}
|
| 746 |
+
}
|
| 747 |
+
}
|
| 748 |
+
|
| 749 |
+
private notifySegment(segment: AudioSegment): void {
|
| 750 |
+
// Track segment for visualization
|
| 751 |
+
this.recentSegments.push({
|
| 752 |
+
startTime: segment.startFrame / this.targetSampleRate,
|
| 753 |
+
endTime: segment.endFrame / this.targetSampleRate,
|
| 754 |
+
isProcessed: false
|
| 755 |
+
});
|
| 756 |
+
|
| 757 |
+
// Limit segments count
|
| 758 |
+
if (this.recentSegments.length > this.MAX_SEGMENTS_FOR_VISUALIZATION) {
|
| 759 |
+
this.recentSegments.shift();
|
| 760 |
+
}
|
| 761 |
+
|
| 762 |
+
this.segmentCallbacks.forEach((cb) => cb(segment));
|
| 763 |
+
}
|
| 764 |
+
|
| 765 |
+
/**
|
| 766 |
+
* Get recent segments for visualization.
|
| 767 |
+
*/
|
| 768 |
+
getSegmentsForVisualization(): Array<{ startTime: number; endTime: number; isProcessed: boolean }> {
|
| 769 |
+
const segments = [...this.recentSegments];
|
| 770 |
+
|
| 771 |
+
// Add pending segment if speech is currently active
|
| 772 |
+
const vadState = this.audioProcessor.getStateInfo();
|
| 773 |
+
if (vadState.inSpeech && vadState.speechStartTime !== null) {
|
| 774 |
+
segments.push({
|
| 775 |
+
startTime: vadState.speechStartTime,
|
| 776 |
+
endTime: this.ringBuffer.getCurrentTime(),
|
| 777 |
+
isProcessed: false // Pending
|
| 778 |
+
});
|
| 779 |
+
}
|
| 780 |
+
|
| 781 |
+
return segments;
|
| 782 |
+
}
|
| 783 |
+
|
| 784 |
+
/**
|
| 785 |
+
* Mark a segment as processed (for visualization color coding).
|
| 786 |
+
*/
|
| 787 |
+
markSegmentProcessed(startTime: number): void {
|
| 788 |
+
const segment = this.recentSegments.find(s => Math.abs(s.startTime - startTime) < 0.1);
|
| 789 |
+
if (segment) {
|
| 790 |
+
segment.isProcessed = true;
|
| 791 |
+
}
|
| 792 |
+
}
|
| 793 |
+
|
| 794 |
+
/**
|
| 795 |
+
* Update the visualization buffer and summary with new audio data.
|
| 796 |
+
*/
|
| 797 |
+
private updateVisualizationBuffer(chunk: Float32Array): void {
|
| 798 |
+
if (!this.visualizationBuffer || !this.visualizationSummary) return;
|
| 799 |
+
|
| 800 |
+
const chunkLength = chunk.length;
|
| 801 |
+
const bufferLength = this.visualizationBufferSize;
|
| 802 |
+
|
| 803 |
+
// 1. Update raw circular buffer
|
| 804 |
+
if (chunkLength >= bufferLength) {
|
| 805 |
+
this.visualizationBuffer.set(chunk.subarray(chunkLength - bufferLength));
|
| 806 |
+
this.visualizationBufferPosition = 0;
|
| 807 |
+
} else {
|
| 808 |
+
const endPosition = this.visualizationBufferPosition + chunkLength;
|
| 809 |
+
if (endPosition <= bufferLength) {
|
| 810 |
+
this.visualizationBuffer.set(chunk, this.visualizationBufferPosition);
|
| 811 |
+
this.visualizationBufferPosition = endPosition % bufferLength;
|
| 812 |
+
} else {
|
| 813 |
+
const firstPart = bufferLength - this.visualizationBufferPosition;
|
| 814 |
+
this.visualizationBuffer.set(chunk.subarray(0, firstPart), this.visualizationBufferPosition);
|
| 815 |
+
this.visualizationBuffer.set(chunk.subarray(firstPart), 0);
|
| 816 |
+
this.visualizationBufferPosition = (chunkLength - firstPart) % bufferLength;
|
| 817 |
+
}
|
| 818 |
+
}
|
| 819 |
+
|
| 820 |
+
// 2. Update summary buffer (Low-res min/max pairs)
|
| 821 |
+
// Each point in VIS_SUMMARY_SIZE represents bufferLength / VIS_SUMMARY_SIZE samples
|
| 822 |
+
const samplesPerPoint = bufferLength / this.VIS_SUMMARY_SIZE;
|
| 823 |
+
const numNewPoints = Math.round(chunkLength / samplesPerPoint);
|
| 824 |
+
|
| 825 |
+
for (let i = 0; i < numNewPoints; i++) {
|
| 826 |
+
const start = Math.floor(i * samplesPerPoint);
|
| 827 |
+
const end = Math.min(chunkLength, Math.floor((i + 1) * samplesPerPoint));
|
| 828 |
+
if (start >= end) continue;
|
| 829 |
+
|
| 830 |
+
let min = chunk[start];
|
| 831 |
+
let max = chunk[start];
|
| 832 |
+
for (let s = start + 1; s < end; s++) {
|
| 833 |
+
const v = chunk[s];
|
| 834 |
+
if (v < min) min = v;
|
| 835 |
+
if (v > max) max = v;
|
| 836 |
+
}
|
| 837 |
+
|
| 838 |
+
// Write to circular summary
|
| 839 |
+
const targetIdx = this.visualizationSummaryPosition * 2;
|
| 840 |
+
this.visualizationSummary[targetIdx] = min;
|
| 841 |
+
this.visualizationSummary[targetIdx + 1] = max;
|
| 842 |
+
this.visualizationSummaryPosition = (this.visualizationSummaryPosition + 1) % this.VIS_SUMMARY_SIZE;
|
| 843 |
+
}
|
| 844 |
+
}
|
| 845 |
+
|
| 846 |
+
/**
|
| 847 |
+
* Get visualization data subsampled to fit the target width.
|
| 848 |
+
* Returns min/max pairs for each pixel to preserve peaks in the waveform.
|
| 849 |
+
* Zero-allocation except for the returned result.
|
| 850 |
+
* @param targetWidth - The desired number of data points (e.g., canvas width).
|
| 851 |
+
* @returns Float32Array containing alternating min/max values, length targetWidth * 2.
|
| 852 |
+
*/
|
| 853 |
+
getVisualizationData(targetWidth: number): Float32Array {
|
| 854 |
+
if (!this.visualizationSummary || !targetWidth || targetWidth <= 0) {
|
| 855 |
+
return new Float32Array(0);
|
| 856 |
+
}
|
| 857 |
+
|
| 858 |
+
// If targetWidth is close to or less than our summary size, use the summary (MUCH faster)
|
| 859 |
+
if (targetWidth <= this.VIS_SUMMARY_SIZE) {
|
| 860 |
+
const subsampledBuffer = new Float32Array(targetWidth * 2);
|
| 861 |
+
const samplesPerTarget = this.VIS_SUMMARY_SIZE / targetWidth;
|
| 862 |
+
|
| 863 |
+
for (let i = 0; i < targetWidth; i++) {
|
| 864 |
+
const rangeStart = i * samplesPerTarget;
|
| 865 |
+
const rangeEnd = (i + 1) * samplesPerTarget;
|
| 866 |
+
|
| 867 |
+
let minVal = 0;
|
| 868 |
+
let maxVal = 0;
|
| 869 |
+
let first = true;
|
| 870 |
+
|
| 871 |
+
for (let s = Math.floor(rangeStart); s < Math.floor(rangeEnd); s++) {
|
| 872 |
+
const idx = ((this.visualizationSummaryPosition + s) % this.VIS_SUMMARY_SIZE) * 2;
|
| 873 |
+
const vMin = this.visualizationSummary[idx];
|
| 874 |
+
const vMax = this.visualizationSummary[idx + 1];
|
| 875 |
+
|
| 876 |
+
if (first) {
|
| 877 |
+
minVal = vMin;
|
| 878 |
+
maxVal = vMax;
|
| 879 |
+
first = false;
|
| 880 |
+
} else {
|
| 881 |
+
if (vMin < minVal) minVal = vMin;
|
| 882 |
+
if (vMax > maxVal) maxVal = vMax;
|
| 883 |
+
}
|
| 884 |
+
}
|
| 885 |
+
|
| 886 |
+
subsampledBuffer[i * 2] = minVal;
|
| 887 |
+
subsampledBuffer[i * 2 + 1] = maxVal;
|
| 888 |
+
}
|
| 889 |
+
return subsampledBuffer;
|
| 890 |
+
}
|
| 891 |
+
|
| 892 |
+
return this.getVisualizationDataFromRaw(targetWidth);
|
| 893 |
+
}
|
| 894 |
+
|
| 895 |
+
private getVisualizationDataFromRaw(targetWidth: number): Float32Array {
|
| 896 |
+
if (!this.visualizationBuffer) return new Float32Array(0);
|
| 897 |
+
const buffer = this.visualizationBuffer;
|
| 898 |
+
const bufferLength = this.visualizationBufferSize;
|
| 899 |
+
const pos = this.visualizationBufferPosition;
|
| 900 |
+
const samplesPerPoint = bufferLength / targetWidth;
|
| 901 |
+
const subsampledBuffer = new Float32Array(targetWidth * 2);
|
| 902 |
+
|
| 903 |
+
// Logical index s maps to physical index:
|
| 904 |
+
// if s < wrapS: pos + s
|
| 905 |
+
// else: s - wrapS (which is s - (bufferLength - pos) = s + pos - bufferLength)
|
| 906 |
+
const wrapS = bufferLength - pos;
|
| 907 |
+
|
| 908 |
+
for (let i = 0; i < targetWidth; i++) {
|
| 909 |
+
const startS = Math.floor(i * samplesPerPoint);
|
| 910 |
+
const endS = Math.floor((i + 1) * samplesPerPoint);
|
| 911 |
+
|
| 912 |
+
let minVal = 0;
|
| 913 |
+
let maxVal = 0;
|
| 914 |
+
let first = true;
|
| 915 |
+
|
| 916 |
+
// Part 1: Before wrap (Logical indices < wrapS)
|
| 917 |
+
// Physical indices: pos + s
|
| 918 |
+
const end1 = (endS < wrapS) ? endS : wrapS;
|
| 919 |
+
if (startS < end1) {
|
| 920 |
+
let p = pos + startS;
|
| 921 |
+
const pEnd = pos + end1;
|
| 922 |
+
|
| 923 |
+
if (first && p < pEnd) {
|
| 924 |
+
const val = buffer[p];
|
| 925 |
+
minVal = val;
|
| 926 |
+
maxVal = val;
|
| 927 |
+
first = false;
|
| 928 |
+
p++;
|
| 929 |
+
}
|
| 930 |
+
|
| 931 |
+
for (; p < pEnd; p++) {
|
| 932 |
+
const val = buffer[p];
|
| 933 |
+
if (val < minVal) minVal = val;
|
| 934 |
+
else if (val > maxVal) maxVal = val;
|
| 935 |
+
}
|
| 936 |
+
}
|
| 937 |
+
|
| 938 |
+
// Part 2: After wrap (Logical indices >= wrapS)
|
| 939 |
+
// Physical indices: s - wrapS
|
| 940 |
+
const start2 = (startS > wrapS) ? startS : wrapS;
|
| 941 |
+
if (start2 < endS) {
|
| 942 |
+
let p = start2 - wrapS;
|
| 943 |
+
const pEnd = endS - wrapS;
|
| 944 |
+
|
| 945 |
+
if (first && p < pEnd) {
|
| 946 |
+
const val = buffer[p];
|
| 947 |
+
minVal = val;
|
| 948 |
+
maxVal = val;
|
| 949 |
+
first = false;
|
| 950 |
+
p++;
|
| 951 |
+
}
|
| 952 |
+
|
| 953 |
+
for (; p < pEnd; p++) {
|
| 954 |
+
const val = buffer[p];
|
| 955 |
+
if (val < minVal) minVal = val;
|
| 956 |
+
else if (val > maxVal) maxVal = val;
|
| 957 |
+
}
|
| 958 |
+
}
|
| 959 |
+
|
| 960 |
+
subsampledBuffer[i * 2] = minVal;
|
| 961 |
+
subsampledBuffer[i * 2 + 1] = maxVal;
|
| 962 |
+
}
|
| 963 |
+
return subsampledBuffer;
|
| 964 |
+
}
|
| 965 |
+
|
| 966 |
+
|
| 967 |
+
/**
|
| 968 |
+
* Get current audio metrics for UI visualization.
|
| 969 |
+
*/
|
| 970 |
+
getMetrics(): AudioMetrics {
|
| 971 |
+
return { ...this.metrics };
|
| 972 |
+
}
|
| 973 |
+
|
| 974 |
+
/**
|
| 975 |
+
* Get current time in seconds (for waveform time markers).
|
| 976 |
+
*/
|
| 977 |
+
getCurrentTime(): number {
|
| 978 |
+
return this.ringBuffer.getCurrentTime();
|
| 979 |
+
}
|
| 980 |
+
|
| 981 |
+
/**
|
| 982 |
+
* Get the visualization buffer duration in seconds.
|
| 983 |
+
*/
|
| 984 |
+
getVisualizationDuration(): number {
|
| 985 |
+
return VISUALIZATION_BUFFER_DURATION;
|
| 986 |
+
}
|
| 987 |
+
|
| 988 |
+
/**
|
| 989 |
+
* Subscribe to visualization updates.
|
| 990 |
+
* Callback is invoked after each audio chunk is processed.
|
| 991 |
+
*/
|
| 992 |
+
onVisualizationUpdate(callback: (data: Float32Array, metrics: AudioMetrics, bufferEndTime: number) => void): () => void {
|
| 993 |
+
this.visualizationCallbacks.push(callback);
|
| 994 |
+
return () => {
|
| 995 |
+
this.visualizationCallbacks = this.visualizationCallbacks.filter((cb) => cb !== callback);
|
| 996 |
+
};
|
| 997 |
+
}
|
| 998 |
+
|
| 999 |
+
/**
|
| 1000 |
+
* Notify visualization subscribers with updated data.
|
| 1001 |
+
* Throttled to ~30fps to avoid UI stuttering.
|
| 1002 |
+
*/
|
| 1003 |
+
private notifyVisualizationUpdate(): void {
|
| 1004 |
+
const now = performance.now();
|
| 1005 |
+
if (now - this.lastVisualizationNotifyTime < this.VISUALIZATION_NOTIFY_INTERVAL_MS) {
|
| 1006 |
+
return;
|
| 1007 |
+
}
|
| 1008 |
+
this.lastVisualizationNotifyTime = now;
|
| 1009 |
+
|
| 1010 |
+
const data = this.getVisualizationData(400); // 400 points is enough for modern displays and saves CPU
|
| 1011 |
+
const bufferEndTime = this.ringBuffer.getCurrentTime();
|
| 1012 |
+
this.visualizationCallbacks.forEach((cb) => cb(data, this.getMetrics(), bufferEndTime));
|
| 1013 |
+
}
|
| 1014 |
+
}
|
src/lib/audio/AudioSegmentProcessor.test.ts
ADDED
|
@@ -0,0 +1,69 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
import { describe, it, expect } from 'vitest';
|
| 3 |
+
import { AudioSegmentProcessor } from './AudioSegmentProcessor';
|
| 4 |
+
|
| 5 |
+
describe('AudioSegmentProcessor', () => {
|
| 6 |
+
it('should initialize without errors', () => {
|
| 7 |
+
const processor = new AudioSegmentProcessor();
|
| 8 |
+
expect(processor).toBeDefined();
|
| 9 |
+
const stats = processor.getStats();
|
| 10 |
+
expect(stats).toBeDefined();
|
| 11 |
+
expect(stats.noiseFloor).toBeGreaterThan(0);
|
| 12 |
+
});
|
| 13 |
+
|
| 14 |
+
it('should process silence without detecting segments', () => {
|
| 15 |
+
const processor = new AudioSegmentProcessor({
|
| 16 |
+
sampleRate: 16000,
|
| 17 |
+
energyThreshold: 0.1
|
| 18 |
+
});
|
| 19 |
+
|
| 20 |
+
// 16000 samples = 1 second
|
| 21 |
+
const silence = new Float32Array(16000).fill(0);
|
| 22 |
+
const energy = 0.0001;
|
| 23 |
+
const currentTime = 1.0;
|
| 24 |
+
|
| 25 |
+
const segments = processor.processAudioData(silence, currentTime, energy);
|
| 26 |
+
|
| 27 |
+
expect(segments).toEqual([]);
|
| 28 |
+
const state = processor.getStateInfo();
|
| 29 |
+
expect(state.inSpeech).toBe(false);
|
| 30 |
+
});
|
| 31 |
+
|
| 32 |
+
it('should process speech and detect segments', () => {
|
| 33 |
+
// This is a simplified test.
|
| 34 |
+
// Real VAD is complex, so we just check state transitions if we force high energy
|
| 35 |
+
const processor = new AudioSegmentProcessor({
|
| 36 |
+
sampleRate: 16000,
|
| 37 |
+
energyThreshold: 0.01
|
| 38 |
+
});
|
| 39 |
+
|
| 40 |
+
const speech = new Float32Array(1600).fill(0.5); // 100ms
|
| 41 |
+
const energy = 0.5; // High energy
|
| 42 |
+
|
| 43 |
+
// Process a few chunks to trigger speech detection
|
| 44 |
+
let segments = processor.processAudioData(speech, 1.0, energy);
|
| 45 |
+
|
| 46 |
+
// It might not trigger immediately due to lookback/SNR checks,
|
| 47 |
+
// but let's check internal state or just that it doesn't crash
|
| 48 |
+
|
| 49 |
+
// Force state check
|
| 50 |
+
// processor.processAudioData is complex, so let's just ensure it runs
|
| 51 |
+
expect(Array.isArray(segments)).toBe(true);
|
| 52 |
+
});
|
| 53 |
+
|
| 54 |
+
it('should reset state correctly', () => {
|
| 55 |
+
const processor = new AudioSegmentProcessor();
|
| 56 |
+
|
| 57 |
+
// Simulate some state change
|
| 58 |
+
const chunk = new Float32Array(100).fill(0.1);
|
| 59 |
+
processor.processAudioData(chunk, 1.0, 0.5);
|
| 60 |
+
|
| 61 |
+
processor.reset();
|
| 62 |
+
|
| 63 |
+
const stats = processor.getStats();
|
| 64 |
+
expect(stats.noiseFloor).toBe(0.005); // Default reset value
|
| 65 |
+
const state = processor.getStateInfo();
|
| 66 |
+
expect(state.inSpeech).toBe(false);
|
| 67 |
+
expect(state.speechStartTime).toBeNull();
|
| 68 |
+
});
|
| 69 |
+
});
|
src/lib/audio/AudioSegmentProcessor.ts
ADDED
|
@@ -0,0 +1,609 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/**
|
| 2 |
+
* Keet - Audio Segment Processor
|
| 3 |
+
* Ported from legacy UI project/AudioSegmentProcessor.js
|
| 4 |
+
*
|
| 5 |
+
* Sophisticated VAD-based segment processor with:
|
| 6 |
+
* - Speech onset detection with lookback
|
| 7 |
+
* - Rising energy trend analysis
|
| 8 |
+
* - Adaptive noise floor tracking
|
| 9 |
+
* - SNR-based speech detection
|
| 10 |
+
* - Proactive segment splitting for long utterances
|
| 11 |
+
*/
|
| 12 |
+
|
| 13 |
+
import { defaultAudioParams, windowDuration as DEFAULT_WINDOW_DURATION } from './audioParams';
|
| 14 |
+
|
| 15 |
+
/** Chunk metadata for speech tracking */
|
| 16 |
+
interface ChunkInfo {
|
| 17 |
+
time: number;
|
| 18 |
+
energy: number;
|
| 19 |
+
isSpeech: boolean;
|
| 20 |
+
snr: number;
|
| 21 |
+
}
|
| 22 |
+
|
| 23 |
+
/** Speech/silence statistics */
|
| 24 |
+
interface SegmentStats {
|
| 25 |
+
startTime: number;
|
| 26 |
+
endTime: number;
|
| 27 |
+
duration: number;
|
| 28 |
+
avgEnergy: number;
|
| 29 |
+
energyIntegral: number;
|
| 30 |
+
}
|
| 31 |
+
|
| 32 |
+
/** Statistics summary */
|
| 33 |
+
interface StatsSummary {
|
| 34 |
+
avgDuration: number;
|
| 35 |
+
avgEnergy: number;
|
| 36 |
+
avgEnergyIntegral: number;
|
| 37 |
+
}
|
| 38 |
+
|
| 39 |
+
/** Current stats snapshot */
|
| 40 |
+
interface CurrentStats {
|
| 41 |
+
silence: StatsSummary;
|
| 42 |
+
speech: StatsSummary;
|
| 43 |
+
noiseFloor: number;
|
| 44 |
+
snr: number;
|
| 45 |
+
snrThreshold: number;
|
| 46 |
+
minSnrThreshold: number;
|
| 47 |
+
energyRiseThreshold: number;
|
| 48 |
+
}
|
| 49 |
+
|
| 50 |
+
/** Processor state */
|
| 51 |
+
interface ProcessorState {
|
| 52 |
+
inSpeech: boolean;
|
| 53 |
+
speechStartTime: number | null;
|
| 54 |
+
silenceStartTime: number | null;
|
| 55 |
+
silenceCounter: number;
|
| 56 |
+
recentChunks: ChunkInfo[];
|
| 57 |
+
speechEnergies: number[];
|
| 58 |
+
silenceEnergies: number[];
|
| 59 |
+
speechStats: SegmentStats[];
|
| 60 |
+
silenceStats: SegmentStats[];
|
| 61 |
+
currentStats: CurrentStats;
|
| 62 |
+
segmentCounter: number;
|
| 63 |
+
noiseFloor: number;
|
| 64 |
+
recentEnergies: number[];
|
| 65 |
+
silenceDuration: number;
|
| 66 |
+
}
|
| 67 |
+
|
| 68 |
+
/** Segment output */
|
| 69 |
+
export interface ProcessedSegment {
|
| 70 |
+
startTime: number;
|
| 71 |
+
endTime: number;
|
| 72 |
+
duration: number;
|
| 73 |
+
}
|
| 74 |
+
|
| 75 |
+
/** Processor configuration */
|
| 76 |
+
export interface AudioSegmentProcessorConfig {
|
| 77 |
+
sampleRate: number;
|
| 78 |
+
windowSize: number;
|
| 79 |
+
minSpeechDuration: number;
|
| 80 |
+
silenceThreshold: number;
|
| 81 |
+
energyThreshold: number;
|
| 82 |
+
smaLength: number;
|
| 83 |
+
lookbackChunks: number;
|
| 84 |
+
overlapDuration: number;
|
| 85 |
+
lookbackDuration: number;
|
| 86 |
+
maxHistoryLength: number;
|
| 87 |
+
noiseFloorAdaptationRate: number;
|
| 88 |
+
fastAdaptationRate: number;
|
| 89 |
+
snrThreshold: number;
|
| 90 |
+
minBackgroundDuration: number;
|
| 91 |
+
minSnrThreshold: number;
|
| 92 |
+
energyRiseThreshold: number;
|
| 93 |
+
maxSegmentDuration: number;
|
| 94 |
+
maxSilenceWithinSpeech: number;
|
| 95 |
+
endingSpeechTolerance: number;
|
| 96 |
+
logger?: (message: string, data?: unknown) => void;
|
| 97 |
+
}
|
| 98 |
+
|
| 99 |
+
/**
|
| 100 |
+
* AudioSegmentProcessor - Sophisticated VAD with speech onset detection
|
| 101 |
+
*/
|
| 102 |
+
export class AudioSegmentProcessor {
|
| 103 |
+
private options: AudioSegmentProcessorConfig;
|
| 104 |
+
private state!: ProcessorState;
|
| 105 |
+
|
| 106 |
+
constructor(options: Partial<AudioSegmentProcessorConfig> = {}) {
|
| 107 |
+
const sampleRate = options.sampleRate ?? defaultAudioParams.sampleRate ?? 16000;
|
| 108 |
+
|
| 109 |
+
// Calculate window size based on sample rate (80ms window)
|
| 110 |
+
const windowSize = Math.round(DEFAULT_WINDOW_DURATION * sampleRate);
|
| 111 |
+
|
| 112 |
+
this.options = {
|
| 113 |
+
sampleRate,
|
| 114 |
+
|
| 115 |
+
minSpeechDuration: defaultAudioParams.minSpeechDuration,
|
| 116 |
+
silenceThreshold: defaultAudioParams.silenceLength,
|
| 117 |
+
energyThreshold: defaultAudioParams.audioThreshold,
|
| 118 |
+
smaLength: defaultAudioParams.smaLength,
|
| 119 |
+
lookbackChunks: defaultAudioParams.lookbackChunks,
|
| 120 |
+
overlapDuration: defaultAudioParams.overlapDuration,
|
| 121 |
+
lookbackDuration: defaultAudioParams.lookbackDuration,
|
| 122 |
+
maxHistoryLength: defaultAudioParams.maxHistoryLength,
|
| 123 |
+
noiseFloorAdaptationRate: defaultAudioParams.noiseFloorAdaptationRate,
|
| 124 |
+
fastAdaptationRate: defaultAudioParams.fastAdaptationRate,
|
| 125 |
+
snrThreshold: defaultAudioParams.snrThreshold,
|
| 126 |
+
minBackgroundDuration: defaultAudioParams.minBackgroundDuration,
|
| 127 |
+
minSnrThreshold: defaultAudioParams.minSnrThreshold,
|
| 128 |
+
energyRiseThreshold: defaultAudioParams.energyRiseThreshold,
|
| 129 |
+
maxSegmentDuration: defaultAudioParams.maxSegmentDuration,
|
| 130 |
+
maxSilenceWithinSpeech: defaultAudioParams.maxSilenceWithinSpeech,
|
| 131 |
+
endingSpeechTolerance: defaultAudioParams.endingSpeechTolerance,
|
| 132 |
+
logger: console.log,
|
| 133 |
+
...options,
|
| 134 |
+
// Ensure windowSize is recalculated if sampleRate was overridden
|
| 135 |
+
windowSize: Math.round(DEFAULT_WINDOW_DURATION * (options.sampleRate ?? sampleRate))
|
| 136 |
+
};
|
| 137 |
+
|
| 138 |
+
this.log('Initialized AudioSegmentProcessor', {
|
| 139 |
+
sampleRate: this.options.sampleRate,
|
| 140 |
+
windowSize: this.options.windowSize,
|
| 141 |
+
lookbackDuration: this.options.lookbackDuration,
|
| 142 |
+
overlapDuration: this.options.overlapDuration,
|
| 143 |
+
snrThreshold: this.options.snrThreshold,
|
| 144 |
+
minSnrThreshold: this.options.minSnrThreshold
|
| 145 |
+
});
|
| 146 |
+
|
| 147 |
+
this.reset();
|
| 148 |
+
}
|
| 149 |
+
|
| 150 |
+
private log(message: string, data?: unknown): void {
|
| 151 |
+
if (typeof this.options.logger === 'function') {
|
| 152 |
+
this.options.logger(`[AudioSegmentProcessor] ${message}`, data);
|
| 153 |
+
}
|
| 154 |
+
}
|
| 155 |
+
|
| 156 |
+
/**
|
| 157 |
+
* Process an audio chunk and return any detected segments.
|
| 158 |
+
*/
|
| 159 |
+
processAudioData(
|
| 160 |
+
chunk: Float32Array,
|
| 161 |
+
currentTime: number,
|
| 162 |
+
energy: number
|
| 163 |
+
): ProcessedSegment[] {
|
| 164 |
+
if (!chunk || !chunk.length) return [];
|
| 165 |
+
|
| 166 |
+
const segments: ProcessedSegment[] = [];
|
| 167 |
+
const isSpeech = energy > this.options.energyThreshold;
|
| 168 |
+
|
| 169 |
+
// Update silence duration tracking
|
| 170 |
+
if (!isSpeech) {
|
| 171 |
+
const chunkDurationSec = chunk.length / this.options.sampleRate;
|
| 172 |
+
this.state.silenceDuration += chunkDurationSec;
|
| 173 |
+
} else {
|
| 174 |
+
this.state.silenceDuration = 0;
|
| 175 |
+
}
|
| 176 |
+
|
| 177 |
+
// Update noise floor and calculate SNR
|
| 178 |
+
this.updateNoiseFloor(energy, isSpeech);
|
| 179 |
+
const snr = this.calculateSNR(energy);
|
| 180 |
+
|
| 181 |
+
// Track recent chunks for lookback
|
| 182 |
+
this.state.recentChunks.push({
|
| 183 |
+
time: currentTime,
|
| 184 |
+
energy,
|
| 185 |
+
isSpeech,
|
| 186 |
+
snr
|
| 187 |
+
});
|
| 188 |
+
|
| 189 |
+
if (this.state.recentChunks.length > this.options.maxHistoryLength * 10) {
|
| 190 |
+
this.state.recentChunks.shift();
|
| 191 |
+
}
|
| 192 |
+
|
| 193 |
+
// --- Proactive Segment Splitting ---
|
| 194 |
+
if (this.state.inSpeech && this.state.speechStartTime !== null) {
|
| 195 |
+
const currentSpeechDuration = currentTime - this.state.speechStartTime;
|
| 196 |
+
if (currentSpeechDuration > this.options.maxSegmentDuration) {
|
| 197 |
+
this.log('Splitting long segment', {
|
| 198 |
+
startTime: this.state.speechStartTime.toFixed(2),
|
| 199 |
+
splitTime: currentTime.toFixed(2),
|
| 200 |
+
duration: currentSpeechDuration.toFixed(2)
|
| 201 |
+
});
|
| 202 |
+
|
| 203 |
+
const segment = this.createSegment(this.state.speechStartTime, currentTime);
|
| 204 |
+
if (segment) {
|
| 205 |
+
segments.push(segment);
|
| 206 |
+
}
|
| 207 |
+
|
| 208 |
+
// Start new segment immediately
|
| 209 |
+
this.startSpeech(currentTime, energy);
|
| 210 |
+
}
|
| 211 |
+
}
|
| 212 |
+
|
| 213 |
+
// --- Speech State Machine ---
|
| 214 |
+
if (!this.state.inSpeech && isSpeech) {
|
| 215 |
+
// Transition: Silence -> Speech
|
| 216 |
+
const realStartIndex = this.findSpeechStart();
|
| 217 |
+
const realStartTime = realStartIndex !== -1
|
| 218 |
+
? this.state.recentChunks[realStartIndex].time
|
| 219 |
+
: currentTime;
|
| 220 |
+
|
| 221 |
+
this.startSpeech(realStartTime, energy);
|
| 222 |
+
|
| 223 |
+
this.log('Speech start detected', {
|
| 224 |
+
detectedAt: currentTime.toFixed(2),
|
| 225 |
+
actualStart: realStartTime.toFixed(2),
|
| 226 |
+
lookbackDiff: (currentTime - realStartTime).toFixed(2),
|
| 227 |
+
snr: snr.toFixed(2),
|
| 228 |
+
noiseFloor: this.state.noiseFloor.toFixed(6)
|
| 229 |
+
});
|
| 230 |
+
} else if (this.state.inSpeech && !isSpeech) {
|
| 231 |
+
// Transition: Speech -> potentially Silence
|
| 232 |
+
this.state.silenceCounter++;
|
| 233 |
+
|
| 234 |
+
const chunksNeeded = Math.ceil(this.options.silenceThreshold / (this.options.windowSize / this.options.sampleRate));
|
| 235 |
+
|
| 236 |
+
if (this.state.silenceCounter % 5 === 0) {
|
| 237 |
+
this.log('Silence progressing', {
|
| 238 |
+
counter: this.state.silenceCounter,
|
| 239 |
+
needed: chunksNeeded,
|
| 240 |
+
energy: energy.toFixed(6),
|
| 241 |
+
snr: snr.toFixed(2)
|
| 242 |
+
});
|
| 243 |
+
}
|
| 244 |
+
|
| 245 |
+
// Implement ending speech tolerance and max silence within speech
|
| 246 |
+
const silenceDuration = this.state.silenceCounter * (this.options.windowSize / this.options.sampleRate);
|
| 247 |
+
const isConfirmedSilence = this.state.silenceCounter >= chunksNeeded;
|
| 248 |
+
|
| 249 |
+
// Check if we should allow some silence within speech
|
| 250 |
+
if (silenceDuration < this.options.maxSilenceWithinSpeech) {
|
| 251 |
+
// Not yet enough silence to consider it a break
|
| 252 |
+
this.state.speechEnergies.push(energy);
|
| 253 |
+
} else if (isConfirmedSilence) {
|
| 254 |
+
// Confirmed silence - end speech segment
|
| 255 |
+
if (this.state.speechStartTime !== null) {
|
| 256 |
+
const speechDuration = currentTime - this.state.speechStartTime;
|
| 257 |
+
const avgEnergy = this.state.speechEnergies.length > 0
|
| 258 |
+
? this.state.speechEnergies.reduce((a, b) => a + b, 0) / this.state.speechEnergies.length
|
| 259 |
+
: 0;
|
| 260 |
+
|
| 261 |
+
this.state.speechStats.push({
|
| 262 |
+
startTime: this.state.speechStartTime,
|
| 263 |
+
endTime: currentTime,
|
| 264 |
+
duration: speechDuration,
|
| 265 |
+
avgEnergy,
|
| 266 |
+
energyIntegral: avgEnergy * speechDuration
|
| 267 |
+
});
|
| 268 |
+
|
| 269 |
+
if (this.state.speechStats.length > this.options.maxHistoryLength) {
|
| 270 |
+
this.state.speechStats.shift();
|
| 271 |
+
}
|
| 272 |
+
}
|
| 273 |
+
|
| 274 |
+
const segment = this.createSegment(this.state.speechStartTime!, currentTime);
|
| 275 |
+
if (segment) {
|
| 276 |
+
segments.push(segment);
|
| 277 |
+
}
|
| 278 |
+
|
| 279 |
+
this.startSilence(currentTime);
|
| 280 |
+
} else {
|
| 281 |
+
// Accumulate silence energies while deciding
|
| 282 |
+
this.state.silenceEnergies.push(energy);
|
| 283 |
+
}
|
| 284 |
+
} else {
|
| 285 |
+
// Continue in current state
|
| 286 |
+
if (this.state.inSpeech) {
|
| 287 |
+
this.state.speechEnergies.push(energy);
|
| 288 |
+
} else {
|
| 289 |
+
this.state.silenceEnergies.push(energy);
|
| 290 |
+
}
|
| 291 |
+
}
|
| 292 |
+
|
| 293 |
+
this.updateStats();
|
| 294 |
+
|
| 295 |
+
return segments;
|
| 296 |
+
}
|
| 297 |
+
|
| 298 |
+
/**
|
| 299 |
+
* Update noise floor using adaptive exponential moving average.
|
| 300 |
+
*/
|
| 301 |
+
private updateNoiseFloor(energy: number, isSpeech: boolean): void {
|
| 302 |
+
if (!isSpeech) {
|
| 303 |
+
// Blend between fast and normal adaptation rates based on silence duration
|
| 304 |
+
let adaptationRate = this.options.noiseFloorAdaptationRate;
|
| 305 |
+
|
| 306 |
+
if (this.state.silenceDuration < this.options.minBackgroundDuration) {
|
| 307 |
+
const blendFactor = Math.min(1, this.state.silenceDuration / this.options.minBackgroundDuration);
|
| 308 |
+
adaptationRate = this.options.fastAdaptationRate * (1 - blendFactor) +
|
| 309 |
+
this.options.noiseFloorAdaptationRate * blendFactor;
|
| 310 |
+
}
|
| 311 |
+
|
| 312 |
+
// Exponential moving average for noise floor tracking
|
| 313 |
+
this.state.noiseFloor = this.state.noiseFloor * (1 - adaptationRate) + energy * adaptationRate;
|
| 314 |
+
this.state.noiseFloor = Math.max(0.00001, this.state.noiseFloor);
|
| 315 |
+
}
|
| 316 |
+
|
| 317 |
+
// Track recent energies for analysis
|
| 318 |
+
this.state.recentEnergies.push(energy);
|
| 319 |
+
if (this.state.recentEnergies.length > 50) {
|
| 320 |
+
this.state.recentEnergies.shift();
|
| 321 |
+
}
|
| 322 |
+
}
|
| 323 |
+
|
| 324 |
+
/**
|
| 325 |
+
* Calculate Signal-to-Noise Ratio in dB.
|
| 326 |
+
*/
|
| 327 |
+
private calculateSNR(energy: number): number {
|
| 328 |
+
const noiseFloor = Math.max(0.0001, this.state.noiseFloor);
|
| 329 |
+
return 10 * Math.log10(energy / noiseFloor);
|
| 330 |
+
}
|
| 331 |
+
|
| 332 |
+
/**
|
| 333 |
+
* Start tracking a new speech segment.
|
| 334 |
+
*/
|
| 335 |
+
private startSpeech(time: number, energy: number): void {
|
| 336 |
+
this.state.inSpeech = true;
|
| 337 |
+
this.state.speechStartTime = time;
|
| 338 |
+
this.state.silenceCounter = 0;
|
| 339 |
+
this.state.speechEnergies = [energy];
|
| 340 |
+
this.state.silenceStartTime = null;
|
| 341 |
+
this.state.silenceDuration = 0;
|
| 342 |
+
|
| 343 |
+
const snr = this.calculateSNR(energy);
|
| 344 |
+
this.log('Speech state started', {
|
| 345 |
+
time: time.toFixed(2),
|
| 346 |
+
energy: energy.toFixed(6),
|
| 347 |
+
snr: snr.toFixed(2),
|
| 348 |
+
noiseFloor: this.state.noiseFloor.toFixed(6)
|
| 349 |
+
});
|
| 350 |
+
}
|
| 351 |
+
|
| 352 |
+
/**
|
| 353 |
+
* Transition to silence state.
|
| 354 |
+
*/
|
| 355 |
+
private startSilence(time: number): void {
|
| 356 |
+
this.state.inSpeech = false;
|
| 357 |
+
this.state.silenceStartTime = time;
|
| 358 |
+
this.state.speechStartTime = null;
|
| 359 |
+
this.state.silenceCounter = 0;
|
| 360 |
+
this.state.silenceEnergies = [];
|
| 361 |
+
this.state.silenceDuration = 0.001; // Avoid division by zero
|
| 362 |
+
|
| 363 |
+
this.log('Silence state started', {
|
| 364 |
+
time: time.toFixed(2),
|
| 365 |
+
noiseFloor: this.state.noiseFloor.toFixed(6)
|
| 366 |
+
});
|
| 367 |
+
}
|
| 368 |
+
|
| 369 |
+
/**
|
| 370 |
+
* Find the actual speech start using lookback and energy trend analysis.
|
| 371 |
+
*/
|
| 372 |
+
private findSpeechStart(): number {
|
| 373 |
+
const chunks = this.state.recentChunks;
|
| 374 |
+
const minSnrThreshold = this.options.minSnrThreshold;
|
| 375 |
+
|
| 376 |
+
// Find the most recent speech chunk
|
| 377 |
+
let firstSpeechIndex = 0;
|
| 378 |
+
for (let i = chunks.length - 1; i >= 0; i--) {
|
| 379 |
+
if (chunks[i].isSpeech) {
|
| 380 |
+
firstSpeechIndex = i;
|
| 381 |
+
break;
|
| 382 |
+
}
|
| 383 |
+
}
|
| 384 |
+
|
| 385 |
+
// Look for the earliest point where energy starts rising towards speech
|
| 386 |
+
let earliestRisingIndex = firstSpeechIndex;
|
| 387 |
+
let foundRisingTrend = false;
|
| 388 |
+
|
| 389 |
+
for (let i = firstSpeechIndex - 1; i >= 0; i--) {
|
| 390 |
+
// Check for rising energy trend
|
| 391 |
+
if (i < chunks.length - 1 &&
|
| 392 |
+
chunks[i + 1].energy > chunks[i].energy * (1 + this.options.energyRiseThreshold)) {
|
| 393 |
+
earliestRisingIndex = i;
|
| 394 |
+
foundRisingTrend = true;
|
| 395 |
+
}
|
| 396 |
+
|
| 397 |
+
// Stop if SNR drops significantly below threshold
|
| 398 |
+
if (chunks[i].snr < minSnrThreshold / 2) {
|
| 399 |
+
break;
|
| 400 |
+
}
|
| 401 |
+
|
| 402 |
+
// Limit lookback to ~500ms (assuming 80ms chunks)
|
| 403 |
+
if (firstSpeechIndex - i > 6) {
|
| 404 |
+
break;
|
| 405 |
+
}
|
| 406 |
+
}
|
| 407 |
+
|
| 408 |
+
if (foundRisingTrend) {
|
| 409 |
+
this.log('Found rising energy trend for speech onset', {
|
| 410 |
+
index: earliestRisingIndex,
|
| 411 |
+
time: chunks[earliestRisingIndex].time.toFixed(3),
|
| 412 |
+
energy: chunks[earliestRisingIndex].energy.toFixed(6),
|
| 413 |
+
snr: chunks[earliestRisingIndex].snr.toFixed(2)
|
| 414 |
+
});
|
| 415 |
+
return earliestRisingIndex;
|
| 416 |
+
}
|
| 417 |
+
|
| 418 |
+
// Check for SNR crossing
|
| 419 |
+
for (let i = firstSpeechIndex; i >= 0; i--) {
|
| 420 |
+
if (chunks[i].snr < minSnrThreshold) {
|
| 421 |
+
return Math.min(chunks.length - 1, i + 1);
|
| 422 |
+
}
|
| 423 |
+
}
|
| 424 |
+
|
| 425 |
+
// Default lookback
|
| 426 |
+
return Math.max(0, firstSpeechIndex - 4);
|
| 427 |
+
}
|
| 428 |
+
|
| 429 |
+
/**
|
| 430 |
+
* Create a segment object from start/end times.
|
| 431 |
+
*/
|
| 432 |
+
private createSegment(startTime: number, endTime: number): ProcessedSegment | null {
|
| 433 |
+
const duration = endTime - startTime;
|
| 434 |
+
|
| 435 |
+
if (duration <= 0) {
|
| 436 |
+
this.log('Skipping segment with zero/negative duration');
|
| 437 |
+
return null;
|
| 438 |
+
}
|
| 439 |
+
|
| 440 |
+
return {
|
| 441 |
+
startTime,
|
| 442 |
+
endTime,
|
| 443 |
+
duration
|
| 444 |
+
};
|
| 445 |
+
}
|
| 446 |
+
|
| 447 |
+
/**
|
| 448 |
+
* Update internal statistics.
|
| 449 |
+
*/
|
| 450 |
+
private updateStats(): void {
|
| 451 |
+
const stats: CurrentStats = {
|
| 452 |
+
silence: { avgDuration: 0, avgEnergy: 0, avgEnergyIntegral: 0 },
|
| 453 |
+
speech: { avgDuration: 0, avgEnergy: 0, avgEnergyIntegral: 0 },
|
| 454 |
+
noiseFloor: this.state.noiseFloor,
|
| 455 |
+
snr: this.state.recentChunks.length > 0
|
| 456 |
+
? this.state.recentChunks[this.state.recentChunks.length - 1].snr
|
| 457 |
+
: 0,
|
| 458 |
+
snrThreshold: this.options.snrThreshold,
|
| 459 |
+
minSnrThreshold: this.options.minSnrThreshold,
|
| 460 |
+
energyRiseThreshold: this.options.energyRiseThreshold
|
| 461 |
+
};
|
| 462 |
+
|
| 463 |
+
if (this.state.silenceStats.length > 0) {
|
| 464 |
+
stats.silence = {
|
| 465 |
+
avgDuration: this.average(this.state.silenceStats.map(s => s.duration)),
|
| 466 |
+
avgEnergy: this.average(this.state.silenceStats.map(s => s.avgEnergy)),
|
| 467 |
+
avgEnergyIntegral: this.average(this.state.silenceStats.map(s => s.energyIntegral))
|
| 468 |
+
};
|
| 469 |
+
}
|
| 470 |
+
|
| 471 |
+
if (this.state.speechStats.length > 0) {
|
| 472 |
+
stats.speech = {
|
| 473 |
+
avgDuration: this.average(this.state.speechStats.map(s => s.duration)),
|
| 474 |
+
avgEnergy: this.average(this.state.speechStats.map(s => s.avgEnergy)),
|
| 475 |
+
avgEnergyIntegral: this.average(this.state.speechStats.map(s => s.energyIntegral))
|
| 476 |
+
};
|
| 477 |
+
}
|
| 478 |
+
|
| 479 |
+
this.state.currentStats = stats;
|
| 480 |
+
}
|
| 481 |
+
|
| 482 |
+
private average(arr: number[]): number {
|
| 483 |
+
if (arr.length === 0) return 0;
|
| 484 |
+
return arr.reduce((a, b) => a + b, 0) / arr.length;
|
| 485 |
+
}
|
| 486 |
+
|
| 487 |
+
/**
|
| 488 |
+
* Get current statistics.
|
| 489 |
+
*/
|
| 490 |
+
getStats(): CurrentStats {
|
| 491 |
+
return this.state.currentStats;
|
| 492 |
+
}
|
| 493 |
+
|
| 494 |
+
/**
|
| 495 |
+
* Get current state info for debugging.
|
| 496 |
+
*/
|
| 497 |
+
getStateInfo(): { inSpeech: boolean; noiseFloor: number; snr: number; speechStartTime: number | null } {
|
| 498 |
+
return {
|
| 499 |
+
inSpeech: this.state.inSpeech,
|
| 500 |
+
noiseFloor: this.state.noiseFloor,
|
| 501 |
+
snr: this.state.currentStats.snr,
|
| 502 |
+
speechStartTime: this.state.speechStartTime
|
| 503 |
+
};
|
| 504 |
+
}
|
| 505 |
+
|
| 506 |
+
/**
|
| 507 |
+
* Reset all state.
|
| 508 |
+
*/
|
| 509 |
+
reset(): void {
|
| 510 |
+
this.state = {
|
| 511 |
+
inSpeech: false,
|
| 512 |
+
speechStartTime: null,
|
| 513 |
+
silenceStartTime: null,
|
| 514 |
+
silenceCounter: 0,
|
| 515 |
+
recentChunks: [],
|
| 516 |
+
speechEnergies: [],
|
| 517 |
+
silenceEnergies: [],
|
| 518 |
+
speechStats: [],
|
| 519 |
+
silenceStats: [],
|
| 520 |
+
currentStats: {
|
| 521 |
+
silence: { avgDuration: 0, avgEnergy: 0, avgEnergyIntegral: 0 },
|
| 522 |
+
speech: { avgDuration: 0, avgEnergy: 0, avgEnergyIntegral: 0 },
|
| 523 |
+
noiseFloor: 0.005,
|
| 524 |
+
snr: 0,
|
| 525 |
+
snrThreshold: this.options.snrThreshold,
|
| 526 |
+
minSnrThreshold: this.options.minSnrThreshold,
|
| 527 |
+
energyRiseThreshold: this.options.energyRiseThreshold
|
| 528 |
+
},
|
| 529 |
+
segmentCounter: 0,
|
| 530 |
+
noiseFloor: 0.005,
|
| 531 |
+
recentEnergies: [],
|
| 532 |
+
silenceDuration: 0
|
| 533 |
+
};
|
| 534 |
+
}
|
| 535 |
+
|
| 536 |
+
// ========================================================================
|
| 537 |
+
// Configuration Setters
|
| 538 |
+
// ========================================================================
|
| 539 |
+
|
| 540 |
+
setThreshold(threshold: number): void {
|
| 541 |
+
this.options.energyThreshold = threshold;
|
| 542 |
+
this.log('Updated energy threshold', threshold);
|
| 543 |
+
}
|
| 544 |
+
|
| 545 |
+
setSilenceLength(length: number): void {
|
| 546 |
+
this.options.silenceThreshold = length;
|
| 547 |
+
this.log('Updated silence threshold', length);
|
| 548 |
+
}
|
| 549 |
+
|
| 550 |
+
setLookbackDuration(duration: number): void {
|
| 551 |
+
this.options.lookbackDuration = duration;
|
| 552 |
+
this.log('Updated lookback duration', duration);
|
| 553 |
+
}
|
| 554 |
+
|
| 555 |
+
setOverlapDuration(duration: number): void {
|
| 556 |
+
this.options.overlapDuration = duration;
|
| 557 |
+
this.log('Updated overlap duration', duration);
|
| 558 |
+
}
|
| 559 |
+
|
| 560 |
+
setSnrThreshold(threshold: number): void {
|
| 561 |
+
this.options.snrThreshold = threshold;
|
| 562 |
+
this.log('Updated SNR threshold', threshold);
|
| 563 |
+
}
|
| 564 |
+
|
| 565 |
+
setMinSnrThreshold(threshold: number): void {
|
| 566 |
+
this.options.minSnrThreshold = threshold;
|
| 567 |
+
this.log('Updated minimum SNR threshold', threshold);
|
| 568 |
+
}
|
| 569 |
+
|
| 570 |
+
setNoiseFloorAdaptationRate(rate: number): void {
|
| 571 |
+
this.options.noiseFloorAdaptationRate = rate;
|
| 572 |
+
this.log('Updated noise floor adaptation rate', rate);
|
| 573 |
+
}
|
| 574 |
+
|
| 575 |
+
setFastAdaptationRate(rate: number): void {
|
| 576 |
+
this.options.fastAdaptationRate = rate;
|
| 577 |
+
this.log('Updated fast adaptation rate', rate);
|
| 578 |
+
}
|
| 579 |
+
|
| 580 |
+
setEnergyRiseThreshold(threshold: number): void {
|
| 581 |
+
this.options.energyRiseThreshold = threshold;
|
| 582 |
+
this.log('Updated energy rise threshold', threshold);
|
| 583 |
+
}
|
| 584 |
+
|
| 585 |
+
setMinBackgroundDuration(duration: number): void {
|
| 586 |
+
this.options.minBackgroundDuration = duration;
|
| 587 |
+
this.log('Updated minimum background duration', duration);
|
| 588 |
+
}
|
| 589 |
+
|
| 590 |
+
setMaxSegmentDuration(duration: number): void {
|
| 591 |
+
this.options.maxSegmentDuration = duration;
|
| 592 |
+
this.log('Updated maximum segment duration', duration);
|
| 593 |
+
}
|
| 594 |
+
|
| 595 |
+
setMinSpeechDuration(duration: number): void {
|
| 596 |
+
this.options.minSpeechDuration = duration;
|
| 597 |
+
this.log('Updated minimum speech duration', duration);
|
| 598 |
+
}
|
| 599 |
+
|
| 600 |
+
setMaxSilenceWithinSpeech(duration: number): void {
|
| 601 |
+
this.options.maxSilenceWithinSpeech = duration;
|
| 602 |
+
this.log('Updated max silence within speech', duration);
|
| 603 |
+
}
|
| 604 |
+
|
| 605 |
+
setEndingSpeechTolerance(duration: number): void {
|
| 606 |
+
this.options.endingSpeechTolerance = duration;
|
| 607 |
+
this.log('Updated ending speech tolerance', duration);
|
| 608 |
+
}
|
| 609 |
+
}
|
src/lib/audio/MelWorkerClient.ts
ADDED
|
@@ -0,0 +1,182 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/**
|
| 2 |
+
* Keet - Mel Worker Client
|
| 3 |
+
*
|
| 4 |
+
* Manages the mel producer Web Worker lifecycle and provides a promise-based API.
|
| 5 |
+
*
|
| 6 |
+
* Usage:
|
| 7 |
+
* const melClient = new MelWorkerClient();
|
| 8 |
+
* await melClient.init({ nMels: 128 });
|
| 9 |
+
*
|
| 10 |
+
* // Continuously push audio chunks (fire-and-forget)
|
| 11 |
+
* melClient.pushAudio(chunk);
|
| 12 |
+
*
|
| 13 |
+
* // When inference needs features:
|
| 14 |
+
* const features = await melClient.getFeatures(startSample, endSample);
|
| 15 |
+
* // features = { features: Float32Array, T: number, melBins: number }
|
| 16 |
+
*/
|
| 17 |
+
|
| 18 |
+
export interface MelFeatures {
|
| 19 |
+
features: Float32Array;
|
| 20 |
+
T: number;
|
| 21 |
+
melBins: number;
|
| 22 |
+
}
|
| 23 |
+
|
| 24 |
+
export class MelWorkerClient {
|
| 25 |
+
private worker: Worker;
|
| 26 |
+
private messageId = 0;
|
| 27 |
+
private pendingPromises = new Map<number, { resolve: (v: any) => void; reject: (e: any) => void }>();
|
| 28 |
+
private initFailed = false;
|
| 29 |
+
|
| 30 |
+
constructor() {
|
| 31 |
+
// Create worker eagerly in constructor (matching TranscriptionWorkerClient pattern).
|
| 32 |
+
// Vite's worker detection reliably picks up new Worker(new URL(...)) in constructors.
|
| 33 |
+
this.worker = new Worker(new URL('./mel.worker.ts', import.meta.url), {
|
| 34 |
+
type: 'module'
|
| 35 |
+
});
|
| 36 |
+
|
| 37 |
+
this.worker.onmessage = (e: MessageEvent) => {
|
| 38 |
+
this.handleMessage(e);
|
| 39 |
+
};
|
| 40 |
+
|
| 41 |
+
this.worker.onerror = (e: Event) => {
|
| 42 |
+
// Worker load errors fire as plain Event, not ErrorEvent
|
| 43 |
+
const errEvent = e as ErrorEvent;
|
| 44 |
+
const msg = errEvent.message || 'Worker failed to load';
|
| 45 |
+
const loc = errEvent.filename ? ` at ${errEvent.filename}:${errEvent.lineno}:${errEvent.colno}` : '';
|
| 46 |
+
console.error(`[MelWorkerClient] Worker error: ${msg}${loc}`, e);
|
| 47 |
+
this.initFailed = true;
|
| 48 |
+
// Reject all pending promises so callers don't hang
|
| 49 |
+
for (const [, promise] of this.pendingPromises) {
|
| 50 |
+
promise.reject(new Error(`[MelWorkerClient] ${msg}${loc}`));
|
| 51 |
+
}
|
| 52 |
+
this.pendingPromises.clear();
|
| 53 |
+
};
|
| 54 |
+
}
|
| 55 |
+
|
| 56 |
+
/**
|
| 57 |
+
* Initialize the mel worker with configuration.
|
| 58 |
+
*/
|
| 59 |
+
async init(config: { nMels?: number } = {}): Promise<void> {
|
| 60 |
+
if (this.initFailed) {
|
| 61 |
+
throw new Error('[MelWorkerClient] Worker failed to load');
|
| 62 |
+
}
|
| 63 |
+
await this.sendRequest('INIT', config);
|
| 64 |
+
}
|
| 65 |
+
|
| 66 |
+
/**
|
| 67 |
+
* Push a resampled audio chunk to the mel worker (fire-and-forget).
|
| 68 |
+
* Call this for every audio chunk from AudioEngine.
|
| 69 |
+
*/
|
| 70 |
+
pushAudio(chunk: Float32Array): void {
|
| 71 |
+
if (this.initFailed) return;
|
| 72 |
+
// Transfer the buffer for zero-copy (caller must not reuse the chunk)
|
| 73 |
+
// If caller needs to keep it, they should slice() first
|
| 74 |
+
this.worker.postMessage(
|
| 75 |
+
{ type: 'PUSH_AUDIO', payload: chunk },
|
| 76 |
+
[chunk.buffer]
|
| 77 |
+
);
|
| 78 |
+
}
|
| 79 |
+
|
| 80 |
+
/**
|
| 81 |
+
* Push audio without transferring ownership (caller keeps the buffer).
|
| 82 |
+
* Slightly less efficient but safe when caller needs the data.
|
| 83 |
+
*/
|
| 84 |
+
pushAudioCopy(chunk: Float32Array): void {
|
| 85 |
+
if (this.initFailed) return;
|
| 86 |
+
const copy = new Float32Array(chunk);
|
| 87 |
+
this.worker.postMessage(
|
| 88 |
+
{ type: 'PUSH_AUDIO', payload: copy },
|
| 89 |
+
[copy.buffer]
|
| 90 |
+
);
|
| 91 |
+
}
|
| 92 |
+
|
| 93 |
+
/**
|
| 94 |
+
* Request mel features for a sample range.
|
| 95 |
+
* Returns null if no frames are available in the range.
|
| 96 |
+
*
|
| 97 |
+
* @param startSample - Start sample index
|
| 98 |
+
* @param endSample - End sample index
|
| 99 |
+
* @param normalize - If true (default), return normalized features for ASR.
|
| 100 |
+
* If false, return raw log-mel values for visualization with fixed dB scaling.
|
| 101 |
+
* See mel.worker.ts for performance notes when using normalize=false.
|
| 102 |
+
*/
|
| 103 |
+
async getFeatures(startSample: number, endSample: number, normalize: boolean = true): Promise<MelFeatures | null> {
|
| 104 |
+
return this.sendRequest('GET_FEATURES', { startSample, endSample, normalize });
|
| 105 |
+
}
|
| 106 |
+
|
| 107 |
+
/**
|
| 108 |
+
* Get the last mel frame (raw log-mel, 128 bins) for equalizer-style display.
|
| 109 |
+
* Averages last 2 frames for smoother bars. Returns null if no frames yet.
|
| 110 |
+
*/
|
| 111 |
+
async getLastMelFrame(): Promise<Float32Array | null> {
|
| 112 |
+
const p = await this.sendRequest('GET_LAST_MEL_FRAME', {});
|
| 113 |
+
return (p && p.melFrame) ? p.melFrame : null;
|
| 114 |
+
}
|
| 115 |
+
|
| 116 |
+
/**
|
| 117 |
+
* Get worker status (total samples, computed frames, etc.)
|
| 118 |
+
*/
|
| 119 |
+
async getStatus(): Promise<{
|
| 120 |
+
totalSamples: number;
|
| 121 |
+
computedFrames: number;
|
| 122 |
+
bufferCapacityFrames: number;
|
| 123 |
+
melBins: number;
|
| 124 |
+
}> {
|
| 125 |
+
return this.sendRequest('GET_STATUS', {});
|
| 126 |
+
}
|
| 127 |
+
|
| 128 |
+
/**
|
| 129 |
+
* Reset the mel worker (clear all buffers).
|
| 130 |
+
*/
|
| 131 |
+
async reset(): Promise<void> {
|
| 132 |
+
return this.sendRequest('RESET', {});
|
| 133 |
+
}
|
| 134 |
+
|
| 135 |
+
/**
|
| 136 |
+
* Dispose the worker.
|
| 137 |
+
*/
|
| 138 |
+
dispose(): void {
|
| 139 |
+
this.worker.terminate();
|
| 140 |
+
// Reject all pending promises
|
| 141 |
+
for (const [, promise] of this.pendingPromises) {
|
| 142 |
+
promise.reject(new Error('MelWorkerClient disposed'));
|
| 143 |
+
}
|
| 144 |
+
this.pendingPromises.clear();
|
| 145 |
+
}
|
| 146 |
+
|
| 147 |
+
// ─── Internal ────────────────────────────────────────────────────────
|
| 148 |
+
|
| 149 |
+
private handleMessage(e: MessageEvent): void {
|
| 150 |
+
const { type, payload, id } = e.data;
|
| 151 |
+
|
| 152 |
+
if (type === 'ERROR') {
|
| 153 |
+
const pending = this.pendingPromises.get(id);
|
| 154 |
+
if (pending) {
|
| 155 |
+
this.pendingPromises.delete(id);
|
| 156 |
+
pending.reject(new Error(payload));
|
| 157 |
+
}
|
| 158 |
+
return;
|
| 159 |
+
}
|
| 160 |
+
|
| 161 |
+
// Match response to request by id
|
| 162 |
+
if (id !== undefined) {
|
| 163 |
+
const pending = this.pendingPromises.get(id);
|
| 164 |
+
if (pending) {
|
| 165 |
+
this.pendingPromises.delete(id);
|
| 166 |
+
pending.resolve(payload);
|
| 167 |
+
}
|
| 168 |
+
}
|
| 169 |
+
}
|
| 170 |
+
|
| 171 |
+
private sendRequest(type: string, payload: any): Promise<any> {
|
| 172 |
+
return new Promise((resolve, reject) => {
|
| 173 |
+
if (this.initFailed) {
|
| 174 |
+
reject(new Error('MelWorkerClient: worker failed to load'));
|
| 175 |
+
return;
|
| 176 |
+
}
|
| 177 |
+
const id = ++this.messageId;
|
| 178 |
+
this.pendingPromises.set(id, { resolve, reject });
|
| 179 |
+
this.worker.postMessage({ type, payload, id });
|
| 180 |
+
});
|
| 181 |
+
}
|
| 182 |
+
}
|
src/lib/audio/RingBuffer.test.ts
ADDED
|
@@ -0,0 +1,202 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import { describe, it, expect, beforeEach } from 'vitest';
|
| 2 |
+
import { RingBuffer } from './RingBuffer';
|
| 3 |
+
|
| 4 |
+
describe('RingBuffer', () => {
|
| 5 |
+
let ringBuffer: RingBuffer;
|
| 6 |
+
const SAMPLE_RATE = 16000;
|
| 7 |
+
const DURATION_SECONDS = 1; // 1 second buffer for easy calculations
|
| 8 |
+
const MAX_FRAMES = SAMPLE_RATE * DURATION_SECONDS;
|
| 9 |
+
|
| 10 |
+
beforeEach(() => {
|
| 11 |
+
ringBuffer = new RingBuffer(SAMPLE_RATE, DURATION_SECONDS);
|
| 12 |
+
});
|
| 13 |
+
|
| 14 |
+
describe('Initialization', () => {
|
| 15 |
+
it('should initialize with correct parameters', () => {
|
| 16 |
+
expect(ringBuffer.sampleRate).toBe(SAMPLE_RATE);
|
| 17 |
+
expect(ringBuffer.maxFrames).toBe(MAX_FRAMES);
|
| 18 |
+
expect(ringBuffer.getSize()).toBe(MAX_FRAMES);
|
| 19 |
+
expect(ringBuffer.getCurrentFrame()).toBe(0);
|
| 20 |
+
expect(ringBuffer.getFillCount()).toBe(0);
|
| 21 |
+
});
|
| 22 |
+
|
| 23 |
+
it('should calculate maxFrames based on duration', () => {
|
| 24 |
+
const rb = new RingBuffer(8000, 0.5);
|
| 25 |
+
expect(rb.maxFrames).toBe(4000);
|
| 26 |
+
});
|
| 27 |
+
});
|
| 28 |
+
|
| 29 |
+
describe('Writing Data', () => {
|
| 30 |
+
it('should write data correctly when buffer is empty', () => {
|
| 31 |
+
const chunk = new Float32Array([1, 2, 3]);
|
| 32 |
+
ringBuffer.write(chunk);
|
| 33 |
+
|
| 34 |
+
expect(ringBuffer.getCurrentFrame()).toBe(3);
|
| 35 |
+
expect(ringBuffer.getFillCount()).toBe(3);
|
| 36 |
+
|
| 37 |
+
const readData = ringBuffer.read(0, 3);
|
| 38 |
+
expect(readData).toEqual(chunk);
|
| 39 |
+
});
|
| 40 |
+
|
| 41 |
+
it('should append data correctly', () => {
|
| 42 |
+
const chunk1 = new Float32Array([1, 2]);
|
| 43 |
+
const chunk2 = new Float32Array([3, 4]);
|
| 44 |
+
|
| 45 |
+
ringBuffer.write(chunk1);
|
| 46 |
+
ringBuffer.write(chunk2);
|
| 47 |
+
|
| 48 |
+
expect(ringBuffer.getCurrentFrame()).toBe(4);
|
| 49 |
+
const readData = ringBuffer.read(0, 4);
|
| 50 |
+
expect(readData).toEqual(new Float32Array([1, 2, 3, 4]));
|
| 51 |
+
});
|
| 52 |
+
|
| 53 |
+
it('should handle wrap-around correctly', () => {
|
| 54 |
+
// Fill buffer almost to the end
|
| 55 |
+
const initialFill = new Float32Array(MAX_FRAMES - 2);
|
| 56 |
+
initialFill.fill(0.5);
|
| 57 |
+
ringBuffer.write(initialFill);
|
| 58 |
+
|
| 59 |
+
// Write a chunk that wraps around
|
| 60 |
+
const chunk = new Float32Array([1, 2, 3, 4]);
|
| 61 |
+
ringBuffer.write(chunk);
|
| 62 |
+
|
| 63 |
+
expect(ringBuffer.getCurrentFrame()).toBe(MAX_FRAMES - 2 + 4);
|
| 64 |
+
|
| 65 |
+
// Read the wrapped chunk
|
| 66 |
+
// Start reading from where we wrote the chunk
|
| 67 |
+
const startFrame = MAX_FRAMES - 2;
|
| 68 |
+
const endFrame = startFrame + 4;
|
| 69 |
+
const readData = ringBuffer.read(startFrame, endFrame);
|
| 70 |
+
|
| 71 |
+
expect(readData).toEqual(chunk);
|
| 72 |
+
});
|
| 73 |
+
|
| 74 |
+
it('should handle chunk larger than buffer size', () => {
|
| 75 |
+
const largeChunk = new Float32Array(MAX_FRAMES + 10);
|
| 76 |
+
for(let i = 0; i < largeChunk.length; i++) {
|
| 77 |
+
largeChunk[i] = i;
|
| 78 |
+
}
|
| 79 |
+
|
| 80 |
+
ringBuffer.write(largeChunk);
|
| 81 |
+
|
| 82 |
+
expect(ringBuffer.getCurrentFrame()).toBe(MAX_FRAMES + 10);
|
| 83 |
+
expect(ringBuffer.getFillCount()).toBe(MAX_FRAMES);
|
| 84 |
+
|
| 85 |
+
// Should contain the last MAX_FRAMES of the large chunk
|
| 86 |
+
const expectedData = largeChunk.subarray(10);
|
| 87 |
+
// The buffer now holds frames from 10 to MAX_FRAMES + 10
|
| 88 |
+
const readData = ringBuffer.read(10, MAX_FRAMES + 10);
|
| 89 |
+
|
| 90 |
+
expect(readData).toEqual(expectedData);
|
| 91 |
+
});
|
| 92 |
+
});
|
| 93 |
+
|
| 94 |
+
describe('Reading Data', () => {
|
| 95 |
+
it('should read valid range correctly', () => {
|
| 96 |
+
const chunk = new Float32Array([1, 2, 3, 4, 5]);
|
| 97 |
+
ringBuffer.write(chunk);
|
| 98 |
+
|
| 99 |
+
const readData = ringBuffer.read(1, 4); // indices 1, 2, 3
|
| 100 |
+
expect(readData).toEqual(new Float32Array([2, 3, 4]));
|
| 101 |
+
});
|
| 102 |
+
|
| 103 |
+
it('should return empty array when startFrame >= endFrame', () => {
|
| 104 |
+
const chunk = new Float32Array([1, 2, 3]);
|
| 105 |
+
ringBuffer.write(chunk);
|
| 106 |
+
|
| 107 |
+
expect(ringBuffer.read(1, 1).length).toBe(0);
|
| 108 |
+
expect(ringBuffer.read(2, 1).length).toBe(0);
|
| 109 |
+
});
|
| 110 |
+
|
| 111 |
+
it('should throw RangeError when startFrame is negative', () => {
|
| 112 |
+
expect(() => ringBuffer.read(-1, 5)).toThrow(RangeError);
|
| 113 |
+
});
|
| 114 |
+
|
| 115 |
+
it('should throw RangeError when reading overwritten data', () => {
|
| 116 |
+
// Write more than capacity
|
| 117 |
+
const chunk = new Float32Array(MAX_FRAMES + 10);
|
| 118 |
+
ringBuffer.write(chunk);
|
| 119 |
+
|
| 120 |
+
// Oldest available frame is 10
|
| 121 |
+
// Trying to read frame 5 should fail
|
| 122 |
+
expect(() => ringBuffer.read(5, 15)).toThrow(RangeError);
|
| 123 |
+
});
|
| 124 |
+
|
| 125 |
+
it('should throw RangeError when reading future data', () => {
|
| 126 |
+
const chunk = new Float32Array([1, 2, 3]);
|
| 127 |
+
ringBuffer.write(chunk);
|
| 128 |
+
|
| 129 |
+
// Current frame is 3. Requesting up to 5 should fail.
|
| 130 |
+
expect(() => ringBuffer.read(0, 5)).toThrow(RangeError);
|
| 131 |
+
});
|
| 132 |
+
|
| 133 |
+
it('should handle reading across wrap-around point', () => {
|
| 134 |
+
// Fill buffer almost to the end
|
| 135 |
+
const initialFill = new Float32Array(MAX_FRAMES - 2);
|
| 136 |
+
for (let i = 0; i < MAX_FRAMES - 2; i++) initialFill[i] = i;
|
| 137 |
+
ringBuffer.write(initialFill);
|
| 138 |
+
|
| 139 |
+
// Write more to wrap around
|
| 140 |
+
const chunk = new Float32Array([100, 101, 102, 103]);
|
| 141 |
+
ringBuffer.write(chunk);
|
| 142 |
+
|
| 143 |
+
// Buffer now has:
|
| 144 |
+
// [ ... (MAX_FRAMES-2 items), 100, 101, 102, 103 ] logically
|
| 145 |
+
// Physically:
|
| 146 |
+
// Indices [MAX_FRAMES-2, MAX_FRAMES-1] have [100, 101]
|
| 147 |
+
// Indices [0, 1] have [102, 103]
|
| 148 |
+
|
| 149 |
+
// Read across the boundary
|
| 150 |
+
const startFrame = MAX_FRAMES - 3; // One before the new chunk
|
| 151 |
+
const endFrame = MAX_FRAMES + 1; // Into the wrapped part
|
| 152 |
+
|
| 153 |
+
const readData = ringBuffer.read(startFrame, endFrame);
|
| 154 |
+
// Expected: [last of initial, 100, 101, 102]
|
| 155 |
+
const expected = new Float32Array([
|
| 156 |
+
initialFill[initialFill.length - 1],
|
| 157 |
+
100, 101, 102
|
| 158 |
+
]);
|
| 159 |
+
|
| 160 |
+
expect(readData).toEqual(expected);
|
| 161 |
+
});
|
| 162 |
+
});
|
| 163 |
+
|
| 164 |
+
describe('Helper Methods', () => {
|
| 165 |
+
it('getCurrentTime should return correct time in seconds', () => {
|
| 166 |
+
// 1 second buffer
|
| 167 |
+
const chunk = new Float32Array(SAMPLE_RATE / 2); // 0.5 seconds
|
| 168 |
+
ringBuffer.write(chunk);
|
| 169 |
+
|
| 170 |
+
expect(ringBuffer.getCurrentTime()).toBe(0.5);
|
| 171 |
+
});
|
| 172 |
+
|
| 173 |
+
it('getBaseFrameOffset should return 0 when not full', () => {
|
| 174 |
+
const chunk = new Float32Array(100);
|
| 175 |
+
ringBuffer.write(chunk);
|
| 176 |
+
expect(ringBuffer.getBaseFrameOffset()).toBe(0);
|
| 177 |
+
});
|
| 178 |
+
|
| 179 |
+
it('getBaseFrameOffset should update when overwritten', () => {
|
| 180 |
+
const chunk = new Float32Array(MAX_FRAMES + 50);
|
| 181 |
+
ringBuffer.write(chunk);
|
| 182 |
+
expect(ringBuffer.getBaseFrameOffset()).toBe(50);
|
| 183 |
+
});
|
| 184 |
+
|
| 185 |
+
it('reset should clear buffer and reset counters', () => {
|
| 186 |
+
const chunk = new Float32Array([1, 2, 3]);
|
| 187 |
+
ringBuffer.write(chunk);
|
| 188 |
+
|
| 189 |
+
ringBuffer.reset();
|
| 190 |
+
|
| 191 |
+
expect(ringBuffer.getCurrentFrame()).toBe(0);
|
| 192 |
+
expect(ringBuffer.getFillCount()).toBe(0);
|
| 193 |
+
expect(ringBuffer.read(0, 0).length).toBe(0); // Check consistency
|
| 194 |
+
|
| 195 |
+
// Verify buffer content is cleared (or at least pointer is reset)
|
| 196 |
+
// Writing new data should start from 0
|
| 197 |
+
const newChunk = new Float32Array([9, 9]);
|
| 198 |
+
ringBuffer.write(newChunk);
|
| 199 |
+
expect(ringBuffer.read(0, 2)).toEqual(newChunk);
|
| 200 |
+
});
|
| 201 |
+
});
|
| 202 |
+
});
|
src/lib/audio/RingBuffer.ts
ADDED
|
@@ -0,0 +1,150 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import { IRingBuffer } from './types';
|
| 2 |
+
|
| 3 |
+
/**
|
| 4 |
+
* Fixed-size circular buffer for PCM audio samples.
|
| 5 |
+
* Uses global frame offsets for absolute addressing.
|
| 6 |
+
*/
|
| 7 |
+
export class RingBuffer implements IRingBuffer {
|
| 8 |
+
readonly sampleRate: number;
|
| 9 |
+
readonly maxFrames: number;
|
| 10 |
+
private buffer: Float32Array;
|
| 11 |
+
private currentFrame: number = 0; // The next frame to be written (global)
|
| 12 |
+
|
| 13 |
+
constructor(sampleRate: number, durationSeconds: number) {
|
| 14 |
+
this.sampleRate = sampleRate;
|
| 15 |
+
this.maxFrames = Math.floor(sampleRate * durationSeconds);
|
| 16 |
+
this.buffer = new Float32Array(this.maxFrames);
|
| 17 |
+
}
|
| 18 |
+
|
| 19 |
+
/**
|
| 20 |
+
* Append PCM frames to the buffer.
|
| 21 |
+
*/
|
| 22 |
+
write(chunk: Float32Array): void {
|
| 23 |
+
let chunkLength = chunk.length;
|
| 24 |
+
let dataToWrite = chunk;
|
| 25 |
+
|
| 26 |
+
// If chunk is larger than buffer (unlikely but handle it), only take the end
|
| 27 |
+
if (chunkLength > this.maxFrames) {
|
| 28 |
+
const start = chunkLength - this.maxFrames;
|
| 29 |
+
dataToWrite = chunk.subarray(start);
|
| 30 |
+
// Advance frame counter for the skipped part
|
| 31 |
+
this.currentFrame += start;
|
| 32 |
+
// Now we only write maxFrames
|
| 33 |
+
chunkLength = this.maxFrames;
|
| 34 |
+
}
|
| 35 |
+
|
| 36 |
+
const writePos = this.currentFrame % this.maxFrames;
|
| 37 |
+
const remainingSpace = this.maxFrames - writePos;
|
| 38 |
+
|
| 39 |
+
if (chunkLength <= remainingSpace) {
|
| 40 |
+
// Single operation
|
| 41 |
+
this.buffer.set(dataToWrite, writePos);
|
| 42 |
+
} else {
|
| 43 |
+
// Wrap around
|
| 44 |
+
this.buffer.set(dataToWrite.subarray(0, remainingSpace), writePos);
|
| 45 |
+
this.buffer.set(dataToWrite.subarray(remainingSpace), 0);
|
| 46 |
+
}
|
| 47 |
+
|
| 48 |
+
this.currentFrame += chunkLength;
|
| 49 |
+
}
|
| 50 |
+
|
| 51 |
+
/**
|
| 52 |
+
* Read samples from [startFrame, endFrame).
|
| 53 |
+
* @throws RangeError if data has been overwritten by circular buffer.
|
| 54 |
+
*/
|
| 55 |
+
read(startFrame: number, endFrame: number): Float32Array {
|
| 56 |
+
if (startFrame < 0) throw new RangeError('startFrame must be non-negative');
|
| 57 |
+
if (endFrame <= startFrame) return new Float32Array(0);
|
| 58 |
+
|
| 59 |
+
const baseFrame = this.getBaseFrameOffset();
|
| 60 |
+
if (startFrame < baseFrame) {
|
| 61 |
+
throw new RangeError(
|
| 62 |
+
`Requested frame ${startFrame} has been overwritten. Oldest available: ${baseFrame}`
|
| 63 |
+
);
|
| 64 |
+
}
|
| 65 |
+
|
| 66 |
+
if (endFrame > this.currentFrame) {
|
| 67 |
+
throw new RangeError(
|
| 68 |
+
`Requested frame ${endFrame} is in the future. Latest available: ${this.currentFrame}`
|
| 69 |
+
);
|
| 70 |
+
}
|
| 71 |
+
|
| 72 |
+
const length = endFrame - startFrame;
|
| 73 |
+
const result = new Float32Array(length);
|
| 74 |
+
|
| 75 |
+
const readPos = startFrame % this.maxFrames;
|
| 76 |
+
const remainingAtEnd = this.maxFrames - readPos;
|
| 77 |
+
|
| 78 |
+
if (length <= remainingAtEnd) {
|
| 79 |
+
result.set(this.buffer.subarray(readPos, readPos + length));
|
| 80 |
+
} else {
|
| 81 |
+
result.set(this.buffer.subarray(readPos, this.maxFrames));
|
| 82 |
+
result.set(this.buffer.subarray(0, length - remainingAtEnd), remainingAtEnd);
|
| 83 |
+
}
|
| 84 |
+
|
| 85 |
+
return result;
|
| 86 |
+
}
|
| 87 |
+
|
| 88 |
+
/**
|
| 89 |
+
* Read samples from [startFrame, endFrame) into a caller-supplied buffer.
|
| 90 |
+
* Zero-allocation: writes into `dest` starting at offset 0.
|
| 91 |
+
* Returns the number of samples actually written (may be less than
|
| 92 |
+
* dest.length if the requested range is shorter).
|
| 93 |
+
* @throws RangeError if data has been overwritten or is in the future.
|
| 94 |
+
*/
|
| 95 |
+
readInto(startFrame: number, endFrame: number, dest: Float32Array): number {
|
| 96 |
+
if (startFrame < 0) throw new RangeError('startFrame must be non-negative');
|
| 97 |
+
if (endFrame <= startFrame) return 0;
|
| 98 |
+
|
| 99 |
+
const baseFrame = this.getBaseFrameOffset();
|
| 100 |
+
if (startFrame < baseFrame) {
|
| 101 |
+
throw new RangeError(
|
| 102 |
+
`Requested frame ${startFrame} has been overwritten. Oldest available: ${baseFrame}`
|
| 103 |
+
);
|
| 104 |
+
}
|
| 105 |
+
|
| 106 |
+
if (endFrame > this.currentFrame) {
|
| 107 |
+
throw new RangeError(
|
| 108 |
+
`Requested frame ${endFrame} is in the future. Latest available: ${this.currentFrame}`
|
| 109 |
+
);
|
| 110 |
+
}
|
| 111 |
+
|
| 112 |
+
const length = endFrame - startFrame;
|
| 113 |
+
const readPos = startFrame % this.maxFrames;
|
| 114 |
+
const remainingAtEnd = this.maxFrames - readPos;
|
| 115 |
+
|
| 116 |
+
if (length <= remainingAtEnd) {
|
| 117 |
+
dest.set(this.buffer.subarray(readPos, readPos + length));
|
| 118 |
+
} else {
|
| 119 |
+
dest.set(this.buffer.subarray(readPos, this.maxFrames));
|
| 120 |
+
dest.set(this.buffer.subarray(0, length - remainingAtEnd), remainingAtEnd);
|
| 121 |
+
}
|
| 122 |
+
|
| 123 |
+
return length;
|
| 124 |
+
}
|
| 125 |
+
|
| 126 |
+
getCurrentFrame(): number {
|
| 127 |
+
return this.currentFrame;
|
| 128 |
+
}
|
| 129 |
+
|
| 130 |
+
getFillCount(): number {
|
| 131 |
+
return Math.min(this.currentFrame, this.maxFrames);
|
| 132 |
+
}
|
| 133 |
+
|
| 134 |
+
getSize(): number {
|
| 135 |
+
return this.maxFrames;
|
| 136 |
+
}
|
| 137 |
+
|
| 138 |
+
getCurrentTime(): number {
|
| 139 |
+
return this.currentFrame / this.sampleRate;
|
| 140 |
+
}
|
| 141 |
+
|
| 142 |
+
getBaseFrameOffset(): number {
|
| 143 |
+
return Math.max(0, this.currentFrame - this.maxFrames);
|
| 144 |
+
}
|
| 145 |
+
|
| 146 |
+
reset(): void {
|
| 147 |
+
this.currentFrame = 0;
|
| 148 |
+
this.buffer.fill(0);
|
| 149 |
+
}
|
| 150 |
+
}
|
src/lib/audio/audioParams.ts
ADDED
|
@@ -0,0 +1,207 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/**
|
| 2 |
+
* Keet - Audio Processing Parameters
|
| 3 |
+
* Ported from legacy UI project/config/audioParams.js
|
| 4 |
+
*
|
| 5 |
+
* Contains all parameters for Voice Activity Detection (VAD) and segment processing.
|
| 6 |
+
* Values are sample-rate-aligned to ensure exact integer sample counts.
|
| 7 |
+
*/
|
| 8 |
+
|
| 9 |
+
/** Segmentation preset configuration */
|
| 10 |
+
export interface SegmentationPreset {
|
| 11 |
+
name: string;
|
| 12 |
+
icon: string;
|
| 13 |
+
speechHangover: number;
|
| 14 |
+
audioThreshold: number;
|
| 15 |
+
silenceLength: number;
|
| 16 |
+
}
|
| 17 |
+
|
| 18 |
+
/** All audio processing parameters */
|
| 19 |
+
export interface AudioParams {
|
| 20 |
+
// Basic VAD settings
|
| 21 |
+
audioThreshold: number;
|
| 22 |
+
silenceLength: number;
|
| 23 |
+
speechHangover: number;
|
| 24 |
+
|
| 25 |
+
// Advanced VAD settings
|
| 26 |
+
energyScale: number;
|
| 27 |
+
hysteresisRatio: number;
|
| 28 |
+
minSpeechDuration: number;
|
| 29 |
+
maxSilenceWithinSpeech: number;
|
| 30 |
+
endingSpeechTolerance: number;
|
| 31 |
+
endingEnergyThreshold: number;
|
| 32 |
+
minEnergyIntegral: number;
|
| 33 |
+
minEnergyPerSecond: number;
|
| 34 |
+
|
| 35 |
+
// Sample-rate-aligned timing parameters
|
| 36 |
+
windowDuration: number;
|
| 37 |
+
lookbackDuration: number;
|
| 38 |
+
overlapDuration: number;
|
| 39 |
+
|
| 40 |
+
// Buffer durations
|
| 41 |
+
recentAudioDuration: number;
|
| 42 |
+
visualizationDuration: number;
|
| 43 |
+
|
| 44 |
+
// SNR and Noise Floor adaptation settings
|
| 45 |
+
snrThreshold: number;
|
| 46 |
+
minSnrThreshold: number;
|
| 47 |
+
noiseFloorAdaptationRate: number;
|
| 48 |
+
fastAdaptationRate: number;
|
| 49 |
+
minBackgroundDuration: number;
|
| 50 |
+
energyRiseThreshold: number;
|
| 51 |
+
|
| 52 |
+
// Processor-specific parameters
|
| 53 |
+
smaLength: number;
|
| 54 |
+
lookbackChunks: number;
|
| 55 |
+
maxHistoryLength: number;
|
| 56 |
+
maxSegmentDuration: number;
|
| 57 |
+
|
| 58 |
+
// Adaptive energy threshold settings
|
| 59 |
+
useAdaptiveEnergyThresholds: boolean;
|
| 60 |
+
adaptiveEnergyIntegralFactor: number;
|
| 61 |
+
adaptiveEnergyPerSecondFactor: number;
|
| 62 |
+
minAdaptiveEnergyIntegral: number;
|
| 63 |
+
minAdaptiveEnergyPerSecond: number;
|
| 64 |
+
|
| 65 |
+
// Default sample rate
|
| 66 |
+
sampleRate: number;
|
| 67 |
+
}
|
| 68 |
+
|
| 69 |
+
/**
|
| 70 |
+
* Segmentation presets for different use cases
|
| 71 |
+
*/
|
| 72 |
+
export const segmentationPresets: Record<'fast' | 'medium' | 'slow', SegmentationPreset> = {
|
| 73 |
+
fast: {
|
| 74 |
+
name: 'Fast (Short Segments)',
|
| 75 |
+
icon: 'bolt',
|
| 76 |
+
speechHangover: 0.08,
|
| 77 |
+
audioThreshold: 0.120, // Higher threshold
|
| 78 |
+
silenceLength: 0.1 // Short silence duration (2 windows)
|
| 79 |
+
},
|
| 80 |
+
medium: {
|
| 81 |
+
name: 'Medium (Balanced)',
|
| 82 |
+
icon: 'av_timer',
|
| 83 |
+
speechHangover: 0.16,
|
| 84 |
+
audioThreshold: 0.08, // Medium threshold
|
| 85 |
+
silenceLength: 0.4 // Medium silence duration (5 windows)
|
| 86 |
+
},
|
| 87 |
+
slow: {
|
| 88 |
+
name: 'Slow (Long Segments)',
|
| 89 |
+
icon: 'hourglass_bottom',
|
| 90 |
+
speechHangover: 0.24,
|
| 91 |
+
audioThreshold: 0.06, // Lower threshold (original default)
|
| 92 |
+
silenceLength: 1.0 // Long silence duration (10 windows)
|
| 93 |
+
}
|
| 94 |
+
};
|
| 95 |
+
|
| 96 |
+
// ============================================================================
|
| 97 |
+
// Default Parameter Values (derived from 'medium' preset)
|
| 98 |
+
// ============================================================================
|
| 99 |
+
|
| 100 |
+
// Basic VAD settings - Derived from 'medium' preset
|
| 101 |
+
export const audioThreshold = segmentationPresets.medium.audioThreshold;
|
| 102 |
+
export const silenceLength = segmentationPresets.medium.silenceLength;
|
| 103 |
+
export const speechHangover = segmentationPresets.medium.speechHangover;
|
| 104 |
+
|
| 105 |
+
// Advanced VAD settings
|
| 106 |
+
export const energyScale = 2.0; // Scaling factor for energy calculation
|
| 107 |
+
export const hysteresisRatio = 1.2; // Hysteresis ratio for threshold comparison
|
| 108 |
+
export const minSpeechDuration = 0.240; // 240ms minimum speech duration (3 * 80ms)
|
| 109 |
+
export const maxSilenceWithinSpeech = 0.160; // 160ms max silence within speech (2 * 80ms)
|
| 110 |
+
export const endingSpeechTolerance = 0.240; // 240ms tolerance for ending speech
|
| 111 |
+
export const endingEnergyThreshold = 0.600; // Threshold multiplier for ending speech detection
|
| 112 |
+
export const minEnergyIntegral = 22; // Minimum energy integral for speech detection
|
| 113 |
+
export const minEnergyPerSecond = 5; // Minimum energy per second for speech detection
|
| 114 |
+
|
| 115 |
+
// Adaptive energy threshold settings
|
| 116 |
+
export const useAdaptiveEnergyThresholds = true;
|
| 117 |
+
export const adaptiveEnergyIntegralFactor = 25.0; // Multiplier for noise floor to get integral threshold
|
| 118 |
+
export const adaptiveEnergyPerSecondFactor = 10.0; // Multiplier for noise floor to get per-second threshold
|
| 119 |
+
export const minAdaptiveEnergyIntegral = 3; // Floor for the adaptive threshold
|
| 120 |
+
export const minAdaptiveEnergyPerSecond = 1; // Floor for the adaptive threshold
|
| 121 |
+
|
| 122 |
+
// Sample-rate-aligned timing parameters
|
| 123 |
+
export const windowDuration = 0.080; // 80ms window - Perfectly divisible by common sample rates
|
| 124 |
+
export const lookbackDuration = 0.120; // 120ms lookback - Perfectly divisible by common sample rates
|
| 125 |
+
export const overlapDuration = 0.080; // 80ms overlap - Perfectly divisible by common sample rates
|
| 126 |
+
|
| 127 |
+
// Buffer durations
|
| 128 |
+
export const recentAudioDuration = 3.0; // 3 seconds of recent audio storage
|
| 129 |
+
export const visualizationDuration = 30.0; // 30 seconds of visualization buffer
|
| 130 |
+
|
| 131 |
+
// SNR and Noise Floor adaptation settings
|
| 132 |
+
export const snrThreshold = 3.0; // SNR threshold in dB for speech detection
|
| 133 |
+
export const minSnrThreshold = 1.0; // Minimum SNR threshold for low energy speech
|
| 134 |
+
export const noiseFloorAdaptationRate = 0.05; // Standard adaptation rate for noise floor (0-1)
|
| 135 |
+
export const fastAdaptationRate = 0.15; // Fast adaptation rate for initial calibration
|
| 136 |
+
export const minBackgroundDuration = 1.0; // Minimum silence duration to be "background" for fast adaptation
|
| 137 |
+
export const energyRiseThreshold = 0.08; // Threshold for detecting rising energy trend
|
| 138 |
+
|
| 139 |
+
// Processor-specific parameters
|
| 140 |
+
export const smaLength = 6; // Length of Simple Moving Average for energy smoothing
|
| 141 |
+
export const lookbackChunks = 3; // Number of chunks to look back for speech start
|
| 142 |
+
export const maxHistoryLength = 20; // Max length for storing speech/silence stats history
|
| 143 |
+
export const maxSegmentDuration = 4.8; // Automatically split segments longer than this (seconds)
|
| 144 |
+
export const sampleRate = 16000; // Default sample rate for Parakeet models
|
| 145 |
+
|
| 146 |
+
/**
|
| 147 |
+
* Get sample counts for different parameters at a given sample rate
|
| 148 |
+
*/
|
| 149 |
+
export function getSampleCounts(sampleRate: number): {
|
| 150 |
+
windowSamples: number;
|
| 151 |
+
lookbackSamples: number;
|
| 152 |
+
overlapSamples: number;
|
| 153 |
+
recentAudioSamples: number;
|
| 154 |
+
visualizationSamples: number;
|
| 155 |
+
minSpeechSamples: number;
|
| 156 |
+
silenceSamples: number;
|
| 157 |
+
} {
|
| 158 |
+
return {
|
| 159 |
+
windowSamples: Math.round(windowDuration * sampleRate),
|
| 160 |
+
lookbackSamples: Math.round(lookbackDuration * sampleRate),
|
| 161 |
+
overlapSamples: Math.round(overlapDuration * sampleRate),
|
| 162 |
+
recentAudioSamples: Math.round(recentAudioDuration * sampleRate),
|
| 163 |
+
visualizationSamples: Math.round(visualizationDuration * sampleRate),
|
| 164 |
+
minSpeechSamples: Math.round(minSpeechDuration * sampleRate),
|
| 165 |
+
silenceSamples: Math.round(silenceLength * sampleRate)
|
| 166 |
+
};
|
| 167 |
+
}
|
| 168 |
+
|
| 169 |
+
/**
|
| 170 |
+
* Default audio parameters object
|
| 171 |
+
*/
|
| 172 |
+
export const defaultAudioParams: AudioParams = {
|
| 173 |
+
audioThreshold,
|
| 174 |
+
silenceLength,
|
| 175 |
+
speechHangover,
|
| 176 |
+
energyScale,
|
| 177 |
+
hysteresisRatio,
|
| 178 |
+
minSpeechDuration,
|
| 179 |
+
maxSilenceWithinSpeech,
|
| 180 |
+
endingSpeechTolerance,
|
| 181 |
+
endingEnergyThreshold,
|
| 182 |
+
minEnergyIntegral,
|
| 183 |
+
minEnergyPerSecond,
|
| 184 |
+
windowDuration,
|
| 185 |
+
lookbackDuration,
|
| 186 |
+
overlapDuration,
|
| 187 |
+
recentAudioDuration,
|
| 188 |
+
visualizationDuration,
|
| 189 |
+
snrThreshold,
|
| 190 |
+
minSnrThreshold,
|
| 191 |
+
noiseFloorAdaptationRate,
|
| 192 |
+
fastAdaptationRate,
|
| 193 |
+
minBackgroundDuration,
|
| 194 |
+
energyRiseThreshold,
|
| 195 |
+
smaLength,
|
| 196 |
+
lookbackChunks,
|
| 197 |
+
maxHistoryLength,
|
| 198 |
+
maxSegmentDuration,
|
| 199 |
+
useAdaptiveEnergyThresholds,
|
| 200 |
+
adaptiveEnergyIntegralFactor,
|
| 201 |
+
adaptiveEnergyPerSecondFactor,
|
| 202 |
+
minAdaptiveEnergyIntegral,
|
| 203 |
+
minAdaptiveEnergyPerSecond,
|
| 204 |
+
sampleRate
|
| 205 |
+
};
|
| 206 |
+
|
| 207 |
+
export default defaultAudioParams;
|
src/lib/audio/capture-processor.ts
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/**
|
| 2 |
+
* Simple AudioWorkletProcessor for capturing raw audio chunks.
|
| 3 |
+
* Minimal logic to keep latency low.
|
| 4 |
+
*/
|
| 5 |
+
class CaptureProcessor extends AudioWorkletProcessor {
|
| 6 |
+
process(inputs: Float32Array[][], _outputs: Float32Array[][]): boolean {
|
| 7 |
+
const input = inputs[0];
|
| 8 |
+
if (!input || input.length === 0) return true;
|
| 9 |
+
|
| 10 |
+
// Use only the first channel (mono)
|
| 11 |
+
const channelData = input[0];
|
| 12 |
+
|
| 13 |
+
// Send audio chunk to the main thread
|
| 14 |
+
// We clone the data to avoid issues with SharedArrayBuffer (if not available)
|
| 15 |
+
this.port.postMessage(channelData);
|
| 16 |
+
|
| 17 |
+
return true;
|
| 18 |
+
}
|
| 19 |
+
}
|
| 20 |
+
|
| 21 |
+
registerProcessor('capture-processor', CaptureProcessor);
|
src/lib/audio/energy-calculation.test.ts
ADDED
|
@@ -0,0 +1,75 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/**
|
| 2 |
+
* Unit tests for the VAD energy calculation: Peak Amplitude + 6-sample SMA.
|
| 3 |
+
*
|
| 4 |
+
* Matches the logic in AudioEngine (vad-correction-peak-energy fix).
|
| 5 |
+
* legacy UI project uses peak + 6-sample SMA; RMS was causing all-audio-marked-as-silence.
|
| 6 |
+
*
|
| 7 |
+
* Run: npm test
|
| 8 |
+
*/
|
| 9 |
+
|
| 10 |
+
import { describe, it, expect } from 'vitest';
|
| 11 |
+
|
| 12 |
+
/**
|
| 13 |
+
* Compute chunk energy using Peak Amplitude + 6-sample SMA (mirrors AudioEngine).
|
| 14 |
+
* Each chunk contributes one peak (max absolute sample); history is smoothed over 6 values.
|
| 15 |
+
*/
|
| 16 |
+
function computeEnergyWithPeakSMA(
|
| 17 |
+
chunk: Float32Array,
|
| 18 |
+
energyHistory: number[]
|
| 19 |
+
): { energy: number; newHistory: number[] } {
|
| 20 |
+
let maxAbs = 0;
|
| 21 |
+
for (let i = 0; i < chunk.length; i++) {
|
| 22 |
+
const abs = Math.abs(chunk[i]);
|
| 23 |
+
if (abs > maxAbs) maxAbs = abs;
|
| 24 |
+
}
|
| 25 |
+
const newHistory = [...energyHistory, maxAbs];
|
| 26 |
+
if (newHistory.length > 6) newHistory.shift();
|
| 27 |
+
const energy = newHistory.reduce((a, b) => a + b, 0) / newHistory.length;
|
| 28 |
+
return { energy, newHistory };
|
| 29 |
+
}
|
| 30 |
+
|
| 31 |
+
describe('Energy calculation (Peak + 6-sample SMA)', () => {
|
| 32 |
+
it('should use peak amplitude per chunk (not RMS)', () => {
|
| 33 |
+
const chunk = new Float32Array(100);
|
| 34 |
+
chunk[50] = 0.5;
|
| 35 |
+
const { energy } = computeEnergyWithPeakSMA(chunk, []);
|
| 36 |
+
expect(energy).toBe(0.5);
|
| 37 |
+
});
|
| 38 |
+
|
| 39 |
+
it('should smooth over up to 6 samples', () => {
|
| 40 |
+
const history: number[] = [];
|
| 41 |
+
let h = history;
|
| 42 |
+
for (let i = 0; i < 6; i++) {
|
| 43 |
+
const chunk = new Float32Array(10);
|
| 44 |
+
chunk[0] = 0.1 * (i + 1);
|
| 45 |
+
const out = computeEnergyWithPeakSMA(chunk, h);
|
| 46 |
+
h = out.newHistory;
|
| 47 |
+
}
|
| 48 |
+
const avg = h.reduce((a, b) => a + b, 0) / h.length;
|
| 49 |
+
expect(avg).toBeCloseTo((0.1 + 0.2 + 0.3 + 0.4 + 0.5 + 0.6) / 6, 5);
|
| 50 |
+
});
|
| 51 |
+
|
| 52 |
+
it('should keep only the last 6 peaks in history', () => {
|
| 53 |
+
let h: number[] = [];
|
| 54 |
+
for (let i = 0; i < 10; i++) {
|
| 55 |
+
const chunk = new Float32Array(1);
|
| 56 |
+
chunk[0] = i + 1;
|
| 57 |
+
const out = computeEnergyWithPeakSMA(chunk, h);
|
| 58 |
+
h = out.newHistory;
|
| 59 |
+
}
|
| 60 |
+
expect(h.length).toBe(6);
|
| 61 |
+
expect(h).toEqual([5, 6, 7, 8, 9, 10]);
|
| 62 |
+
});
|
| 63 |
+
|
| 64 |
+
it('should produce higher value for loud chunk than quiet chunk', () => {
|
| 65 |
+
const quiet = new Float32Array(100);
|
| 66 |
+
quiet.fill(0.01);
|
| 67 |
+
const loud = new Float32Array(100);
|
| 68 |
+
loud.fill(0.8);
|
| 69 |
+
const { energy: eQuiet } = computeEnergyWithPeakSMA(quiet, []);
|
| 70 |
+
const { energy: eLoud } = computeEnergyWithPeakSMA(loud, []);
|
| 71 |
+
expect(eLoud).toBeGreaterThan(eQuiet);
|
| 72 |
+
expect(eQuiet).toBeCloseTo(0.01, 5);
|
| 73 |
+
expect(eLoud).toBeCloseTo(0.8, 5);
|
| 74 |
+
});
|
| 75 |
+
});
|
src/lib/audio/index.ts
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
export type { AudioEngineConfig, AudioSegment, IRingBuffer, AudioEngine as IAudioEngine, AudioMetrics } from './types';
|
| 2 |
+
export { AudioEngine } from './AudioEngine';
|
| 3 |
+
export { RingBuffer } from './RingBuffer';
|
| 4 |
+
export { MelWorkerClient, type MelFeatures } from './MelWorkerClient';
|
| 5 |
+
export { MEL_CONSTANTS, hzToMel, melToHz, createMelFilterbank, createPaddedHannWindow, precomputeTwiddles, fft, preemphasize, computeMelFrame, normalizeMelFeatures, sampleToFrame } from './mel-math';
|
| 6 |
+
export { MEL_DISPLAY_MIN_DB, MEL_DISPLAY_MAX_DB, MEL_DISPLAY_DB_RANGE, normalizeMelForDisplay } from './mel-display';
|
| 7 |
+
export { AudioSegmentProcessor, type ProcessedSegment, type AudioSegmentProcessorConfig } from './AudioSegmentProcessor';
|
| 8 |
+
export { defaultAudioParams, segmentationPresets, getSampleCounts } from './audioParams';
|
| 9 |
+
export type { AudioParams, SegmentationPreset } from './audioParams';
|
src/lib/audio/mel-display.ts
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/**
|
| 2 |
+
* Shared scaling for raw log-mel display (spectrogram and bar visualizer).
|
| 3 |
+
* Raw mel from the worker is already log(mel_power) - do NOT apply log() again.
|
| 4 |
+
*
|
| 5 |
+
* Typical range: silence ~-11 to -8, speech ~-4 to 0.
|
| 6 |
+
* Fixed scaling avoids "gain hunting" where silence stretches to full brightness.
|
| 7 |
+
*/
|
| 8 |
+
export const MEL_DISPLAY_MIN_DB = -11.0;
|
| 9 |
+
export const MEL_DISPLAY_MAX_DB = 0.0;
|
| 10 |
+
export const MEL_DISPLAY_DB_RANGE = MEL_DISPLAY_MAX_DB - MEL_DISPLAY_MIN_DB;
|
| 11 |
+
|
| 12 |
+
/**
|
| 13 |
+
* Map raw log-mel value to 0..1 for display (same as debug spectrogram).
|
| 14 |
+
* Input is already in log space; no extra log().
|
| 15 |
+
*/
|
| 16 |
+
export function normalizeMelForDisplay(rawLogMel: number): number {
|
| 17 |
+
const normalized = (rawLogMel - MEL_DISPLAY_MIN_DB) / MEL_DISPLAY_DB_RANGE;
|
| 18 |
+
return Math.max(0, Math.min(1, normalized));
|
| 19 |
+
}
|
src/lib/audio/mel-e2e.test.ts
ADDED
|
@@ -0,0 +1,483 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/**
|
| 2 |
+
* End-to-End mel spectrogram tests using real audio and ONNX reference data.
|
| 3 |
+
*
|
| 4 |
+
* Tests:
|
| 5 |
+
* 1. Cross-validation against ONNX reference (mel_reference.json from parakeet.js)
|
| 6 |
+
* 2. Real WAV file processing (life_Jim.wav from parakeet.js demo)
|
| 7 |
+
* 3. Mel filterbank accuracy against ONNX reference
|
| 8 |
+
*
|
| 9 |
+
* These tests catch regressions in mel computation that unit tests might miss,
|
| 10 |
+
* such as incorrect normalization, wrong filterbank values, or precision issues.
|
| 11 |
+
*
|
| 12 |
+
* The ONNX reference is generated by parakeet.js's tests/generate_mel_reference.py
|
| 13 |
+
* using the official NeMo ONNX preprocessor as ground truth.
|
| 14 |
+
*
|
| 15 |
+
* Run: npm test
|
| 16 |
+
*/
|
| 17 |
+
|
| 18 |
+
import { readFileSync, existsSync } from 'fs';
|
| 19 |
+
import { join } from 'path';
|
| 20 |
+
import https from 'https';
|
| 21 |
+
import { describe, it, expect, beforeAll } from 'vitest';
|
| 22 |
+
import {
|
| 23 |
+
MEL_CONSTANTS,
|
| 24 |
+
hzToMel,
|
| 25 |
+
melToHz,
|
| 26 |
+
createMelFilterbank,
|
| 27 |
+
createPaddedHannWindow,
|
| 28 |
+
precomputeTwiddles,
|
| 29 |
+
fft,
|
| 30 |
+
preemphasize,
|
| 31 |
+
computeMelFrame,
|
| 32 |
+
normalizeMelFeatures,
|
| 33 |
+
sampleToFrame,
|
| 34 |
+
} from './mel-math';
|
| 35 |
+
import { resampleLinear } from './utils';
|
| 36 |
+
|
| 37 |
+
// ─── Helpers ──────────────────────────────────────────────────────────────
|
| 38 |
+
|
| 39 |
+
/** Decode base64 to Float32Array (matching parakeet.js test format) */
|
| 40 |
+
function base64ToFloat32(b64: string): Float32Array {
|
| 41 |
+
const buf = Buffer.from(b64, 'base64');
|
| 42 |
+
return new Float32Array(buf.buffer, buf.byteOffset, buf.byteLength / Float32Array.BYTES_PER_ELEMENT);
|
| 43 |
+
}
|
| 44 |
+
|
| 45 |
+
/** Compute error metrics between two arrays */
|
| 46 |
+
function computeError(actual: Float32Array, expected: Float32Array, validCount?: number) {
|
| 47 |
+
const n = validCount || Math.min(actual.length, expected.length);
|
| 48 |
+
let maxErr = 0;
|
| 49 |
+
let sumErr = 0;
|
| 50 |
+
|
| 51 |
+
for (let i = 0; i < n; i++) {
|
| 52 |
+
const err = Math.abs(actual[i] - expected[i]);
|
| 53 |
+
sumErr += err;
|
| 54 |
+
if (err > maxErr) maxErr = err;
|
| 55 |
+
}
|
| 56 |
+
|
| 57 |
+
return {
|
| 58 |
+
maxAbsError: maxErr,
|
| 59 |
+
meanAbsError: sumErr / n,
|
| 60 |
+
n,
|
| 61 |
+
};
|
| 62 |
+
}
|
| 63 |
+
|
| 64 |
+
/** Parse a 16-bit PCM WAV file into Float32Array at the native sample rate */
|
| 65 |
+
function parseWav(buffer: ArrayBuffer): { audio: Float32Array; sampleRate: number; channels: number } {
|
| 66 |
+
const view = new DataView(buffer);
|
| 67 |
+
|
| 68 |
+
// RIFF header
|
| 69 |
+
const riff = String.fromCharCode(view.getUint8(0), view.getUint8(1), view.getUint8(2), view.getUint8(3));
|
| 70 |
+
if (riff !== 'RIFF') throw new Error('Not a valid WAV file: missing RIFF header');
|
| 71 |
+
|
| 72 |
+
const wave = String.fromCharCode(view.getUint8(8), view.getUint8(9), view.getUint8(10), view.getUint8(11));
|
| 73 |
+
if (wave !== 'WAVE') throw new Error('Not a valid WAV file: missing WAVE format');
|
| 74 |
+
|
| 75 |
+
// Find fmt and data chunks
|
| 76 |
+
let offset = 12;
|
| 77 |
+
let sampleRate = 0;
|
| 78 |
+
let channels = 0;
|
| 79 |
+
let bitsPerSample = 0;
|
| 80 |
+
let dataOffset = 0;
|
| 81 |
+
let dataSize = 0;
|
| 82 |
+
|
| 83 |
+
while (offset < buffer.byteLength - 8) {
|
| 84 |
+
const chunkId = String.fromCharCode(
|
| 85 |
+
view.getUint8(offset), view.getUint8(offset + 1),
|
| 86 |
+
view.getUint8(offset + 2), view.getUint8(offset + 3),
|
| 87 |
+
);
|
| 88 |
+
const chunkSize = view.getUint32(offset + 4, true);
|
| 89 |
+
|
| 90 |
+
if (chunkId === 'fmt ') {
|
| 91 |
+
channels = view.getUint16(offset + 10, true);
|
| 92 |
+
sampleRate = view.getUint32(offset + 12, true);
|
| 93 |
+
bitsPerSample = view.getUint16(offset + 22, true);
|
| 94 |
+
} else if (chunkId === 'data') {
|
| 95 |
+
dataOffset = offset + 8;
|
| 96 |
+
dataSize = chunkSize;
|
| 97 |
+
break;
|
| 98 |
+
}
|
| 99 |
+
|
| 100 |
+
offset += 8 + chunkSize;
|
| 101 |
+
// Align to even byte boundary
|
| 102 |
+
if (chunkSize % 2 !== 0) offset++;
|
| 103 |
+
}
|
| 104 |
+
|
| 105 |
+
if (dataOffset === 0) throw new Error('No data chunk found in WAV file');
|
| 106 |
+
if (bitsPerSample !== 16) throw new Error(`Unsupported bit depth: ${bitsPerSample} (expected 16)`);
|
| 107 |
+
|
| 108 |
+
// Extract PCM samples and convert to Float32 [-1, 1]
|
| 109 |
+
const numSamples = dataSize / (bitsPerSample / 8) / channels;
|
| 110 |
+
const audio = new Float32Array(numSamples);
|
| 111 |
+
|
| 112 |
+
for (let i = 0; i < numSamples; i++) {
|
| 113 |
+
// Read first channel (mono or left channel)
|
| 114 |
+
const sampleOffset = dataOffset + i * channels * 2;
|
| 115 |
+
const sample = view.getInt16(sampleOffset, true);
|
| 116 |
+
audio[i] = sample / 32768.0;
|
| 117 |
+
}
|
| 118 |
+
|
| 119 |
+
return { audio, sampleRate, channels };
|
| 120 |
+
}
|
| 121 |
+
|
| 122 |
+
/**
|
| 123 |
+
* Run our full mel pipeline on raw PCM audio.
|
| 124 |
+
* Matches the JsPreprocessor.process() pipeline in parakeet.js/src/mel.js.
|
| 125 |
+
*/
|
| 126 |
+
function fullMelPipeline(audio: Float32Array, nMels: number = 128) {
|
| 127 |
+
const { N_FFT, HOP_LENGTH, PREEMPH } = MEL_CONSTANTS;
|
| 128 |
+
|
| 129 |
+
// 1. Pre-emphasize
|
| 130 |
+
const preemph = preemphasize(audio, 0, PREEMPH);
|
| 131 |
+
|
| 132 |
+
// 2. Compute mel frames
|
| 133 |
+
const numFrames = sampleToFrame(audio.length);
|
| 134 |
+
if (numFrames === 0) return { features: new Float32Array(0), T: 0 };
|
| 135 |
+
|
| 136 |
+
const hannWindow = createPaddedHannWindow();
|
| 137 |
+
const twiddles = precomputeTwiddles(N_FFT);
|
| 138 |
+
const melFilterbank = createMelFilterbank(nMels);
|
| 139 |
+
|
| 140 |
+
// Raw mel buffer [nMels × numFrames], mel-major layout
|
| 141 |
+
const rawMel = new Float32Array(nMels * numFrames);
|
| 142 |
+
for (let t = 0; t < numFrames; t++) {
|
| 143 |
+
const frame = computeMelFrame(preemph, t, hannWindow, twiddles, melFilterbank, nMels);
|
| 144 |
+
for (let m = 0; m < nMels; m++) {
|
| 145 |
+
rawMel[m * numFrames + t] = frame[m];
|
| 146 |
+
}
|
| 147 |
+
}
|
| 148 |
+
|
| 149 |
+
// 3. Normalize
|
| 150 |
+
const features = normalizeMelFeatures(rawMel, nMels, numFrames);
|
| 151 |
+
|
| 152 |
+
return { features, T: numFrames };
|
| 153 |
+
}
|
| 154 |
+
|
| 155 |
+
// ─── Paths ────────────────────────────────────────────────────────────────
|
| 156 |
+
|
| 157 |
+
// parakeet.js is sibling to keet: __dirname = src/lib/audio, 4 levels up = N:\github\ysdede
|
| 158 |
+
const PARAKEET_ROOT = join(__dirname, '..', '..', '..', '..', 'parakeet.js');
|
| 159 |
+
const MEL_REFERENCE_PATH = join(PARAKEET_ROOT, 'tests', 'mel_reference.json');
|
| 160 |
+
const WAV_LOCAL_PATH = join(PARAKEET_ROOT, 'examples', 'demo', 'public', 'assets', 'life_Jim.wav');
|
| 161 |
+
const WAV_GITHUB_URL = 'https://github.com/ysdede/parakeet.js/raw/refs/heads/master/examples/demo/public/assets/life_Jim.wav';
|
| 162 |
+
|
| 163 |
+
// ─── ONNX Reference Cross-Validation ─────────────────────────────────────
|
| 164 |
+
|
| 165 |
+
describe('Cross-validation against ONNX reference', () => {
|
| 166 |
+
let reference: any;
|
| 167 |
+
let hasReference = false;
|
| 168 |
+
|
| 169 |
+
beforeAll(() => {
|
| 170 |
+
try {
|
| 171 |
+
if (existsSync(MEL_REFERENCE_PATH)) {
|
| 172 |
+
const content = readFileSync(MEL_REFERENCE_PATH, 'utf-8');
|
| 173 |
+
reference = JSON.parse(content);
|
| 174 |
+
hasReference = true;
|
| 175 |
+
}
|
| 176 |
+
} catch {
|
| 177 |
+
// Reference not available — tests will be skipped
|
| 178 |
+
}
|
| 179 |
+
});
|
| 180 |
+
|
| 181 |
+
it('should load mel_reference.json from parakeet.js', () => {
|
| 182 |
+
if (!hasReference) {
|
| 183 |
+
console.log(`SKIP: mel_reference.json not found at ${MEL_REFERENCE_PATH}`);
|
| 184 |
+
console.log('Run: cd ../parakeet.js && python tests/generate_mel_reference.py');
|
| 185 |
+
return;
|
| 186 |
+
}
|
| 187 |
+
expect(reference).toBeDefined();
|
| 188 |
+
expect(reference.nMels).toBe(128);
|
| 189 |
+
expect(reference.tests).toBeDefined();
|
| 190 |
+
});
|
| 191 |
+
|
| 192 |
+
it('should match ONNX mel filterbank within 1e-5', () => {
|
| 193 |
+
if (!hasReference || !reference.melFilterbank) {
|
| 194 |
+
console.log('SKIP: No filterbank reference');
|
| 195 |
+
return;
|
| 196 |
+
}
|
| 197 |
+
|
| 198 |
+
const refFb = base64ToFloat32(reference.melFilterbank.data);
|
| 199 |
+
const refShape = reference.melFilterbank.shape; // [257, 128]
|
| 200 |
+
const jsFb = createMelFilterbank(128);
|
| 201 |
+
|
| 202 |
+
// Compare (ref is [257,128] row-major, ours is [128,257] row-major)
|
| 203 |
+
let maxErr = 0;
|
| 204 |
+
for (let freq = 0; freq < 257; freq++) {
|
| 205 |
+
for (let mel = 0; mel < 128; mel++) {
|
| 206 |
+
const refVal = refFb[freq * 128 + mel];
|
| 207 |
+
const jsVal = jsFb[mel * 257 + freq];
|
| 208 |
+
const err = Math.abs(refVal - jsVal);
|
| 209 |
+
if (err > maxErr) maxErr = err;
|
| 210 |
+
}
|
| 211 |
+
}
|
| 212 |
+
|
| 213 |
+
console.log(`Filterbank max error vs ONNX: ${maxErr.toExponential(3)}`);
|
| 214 |
+
expect(maxErr).toBeLessThan(1e-5);
|
| 215 |
+
});
|
| 216 |
+
|
| 217 |
+
it('should match ONNX full pipeline for each test signal (max<0.05, mean<0.005)', () => {
|
| 218 |
+
if (!hasReference) {
|
| 219 |
+
console.log('SKIP: No reference data');
|
| 220 |
+
return;
|
| 221 |
+
}
|
| 222 |
+
|
| 223 |
+
const nMels = reference.nMels;
|
| 224 |
+
|
| 225 |
+
for (const [name, test] of Object.entries(reference.tests) as [string, any][]) {
|
| 226 |
+
const audio = base64ToFloat32(test.audio);
|
| 227 |
+
const refFeatures = base64ToFloat32(test.features);
|
| 228 |
+
const refLen = test.featuresLen;
|
| 229 |
+
|
| 230 |
+
// Run our pipeline
|
| 231 |
+
const { features: ourFeatures, T: ourLen } = fullMelPipeline(audio, nMels);
|
| 232 |
+
|
| 233 |
+
console.log(`Signal "${name}": ${audio.length} samples (${(audio.length / 16000).toFixed(2)}s), ` +
|
| 234 |
+
`frames: ours=${ourLen}, ref=${refLen}`);
|
| 235 |
+
|
| 236 |
+
// Frame count should match
|
| 237 |
+
expect(ourLen).toBe(refLen);
|
| 238 |
+
|
| 239 |
+
// Compare valid frames (mel-by-mel)
|
| 240 |
+
const nFramesOurs = ourFeatures.length / nMels;
|
| 241 |
+
const nFramesRef = refFeatures.length / nMels;
|
| 242 |
+
|
| 243 |
+
let maxErr = 0;
|
| 244 |
+
let sumErr = 0;
|
| 245 |
+
let n = 0;
|
| 246 |
+
|
| 247 |
+
for (let m = 0; m < nMels; m++) {
|
| 248 |
+
for (let t = 0; t < ourLen; t++) {
|
| 249 |
+
const ourVal = ourFeatures[m * nFramesOurs + t];
|
| 250 |
+
const refVal = refFeatures[m * nFramesRef + t];
|
| 251 |
+
const err = Math.abs(ourVal - refVal);
|
| 252 |
+
sumErr += err;
|
| 253 |
+
if (err > maxErr) maxErr = err;
|
| 254 |
+
n++;
|
| 255 |
+
}
|
| 256 |
+
}
|
| 257 |
+
|
| 258 |
+
const meanErr = sumErr / n;
|
| 259 |
+
console.log(` Max error: ${maxErr.toExponential(3)}, Mean error: ${meanErr.toExponential(3)}`);
|
| 260 |
+
|
| 261 |
+
// Same thresholds as parakeet.js test_mel.mjs
|
| 262 |
+
expect(maxErr).toBeLessThan(0.05);
|
| 263 |
+
expect(meanErr).toBeLessThan(0.005);
|
| 264 |
+
}
|
| 265 |
+
});
|
| 266 |
+
});
|
| 267 |
+
|
| 268 |
+
// ─── Real WAV File Tests ──────────────────────────────────────────────────
|
| 269 |
+
|
| 270 |
+
describe('Real audio: life_Jim.wav', () => {
|
| 271 |
+
let audioData: Float32Array;
|
| 272 |
+
let audioDuration: number;
|
| 273 |
+
const EXPECTED_TRANSCRIPT = 'it is not life as we know or understand it';
|
| 274 |
+
|
| 275 |
+
beforeAll(async () => {
|
| 276 |
+
let wavBuffer: ArrayBuffer;
|
| 277 |
+
|
| 278 |
+
if (existsSync(WAV_LOCAL_PATH)) {
|
| 279 |
+
// Read local file (fast, no network dependency)
|
| 280 |
+
const fileBuffer = readFileSync(WAV_LOCAL_PATH);
|
| 281 |
+
wavBuffer = fileBuffer.buffer.slice(
|
| 282 |
+
fileBuffer.byteOffset,
|
| 283 |
+
fileBuffer.byteOffset + fileBuffer.byteLength,
|
| 284 |
+
);
|
| 285 |
+
console.log(`Loaded local WAV: ${WAV_LOCAL_PATH} (${fileBuffer.length} bytes)`);
|
| 286 |
+
} else {
|
| 287 |
+
// Download from GitHub using Node.js https (happy-dom blocks CORS fetch)
|
| 288 |
+
console.log(`Local WAV not found, downloading from ${WAV_GITHUB_URL}`);
|
| 289 |
+
wavBuffer = await new Promise<ArrayBuffer>((resolve, reject) => {
|
| 290 |
+
const download = (url: string, redirects = 0) => {
|
| 291 |
+
if (redirects > 5) return reject(new Error('Too many redirects'));
|
| 292 |
+
https.get(url, (res) => {
|
| 293 |
+
// Follow redirects (GitHub sends 301/302)
|
| 294 |
+
if (res.statusCode && res.statusCode >= 300 && res.statusCode < 400 && res.headers.location) {
|
| 295 |
+
return download(res.headers.location, redirects + 1);
|
| 296 |
+
}
|
| 297 |
+
if (res.statusCode !== 200) return reject(new Error(`HTTP ${res.statusCode}`));
|
| 298 |
+
const chunks: Buffer[] = [];
|
| 299 |
+
res.on('data', (chunk: Buffer) => chunks.push(chunk));
|
| 300 |
+
res.on('end', () => {
|
| 301 |
+
const buf = Buffer.concat(chunks);
|
| 302 |
+
resolve(buf.buffer.slice(buf.byteOffset, buf.byteOffset + buf.byteLength));
|
| 303 |
+
});
|
| 304 |
+
res.on('error', reject);
|
| 305 |
+
}).on('error', reject);
|
| 306 |
+
};
|
| 307 |
+
download(WAV_GITHUB_URL);
|
| 308 |
+
});
|
| 309 |
+
console.log(`Downloaded WAV: ${wavBuffer.byteLength} bytes`);
|
| 310 |
+
}
|
| 311 |
+
|
| 312 |
+
// Parse WAV
|
| 313 |
+
const { audio, sampleRate, channels } = parseWav(wavBuffer);
|
| 314 |
+
console.log(`Parsed WAV: ${audio.length} samples, ${sampleRate} Hz, ${channels} ch`);
|
| 315 |
+
|
| 316 |
+
// Resample to 16kHz if needed
|
| 317 |
+
if (sampleRate !== 16000) {
|
| 318 |
+
audioData = resampleLinear(audio, sampleRate, 16000);
|
| 319 |
+
console.log(`Resampled: ${audio.length} → ${audioData.length} samples (${sampleRate} → 16000 Hz)`);
|
| 320 |
+
} else {
|
| 321 |
+
audioData = audio;
|
| 322 |
+
}
|
| 323 |
+
|
| 324 |
+
audioDuration = audioData.length / 16000;
|
| 325 |
+
console.log(`Audio duration: ${audioDuration.toFixed(2)}s`);
|
| 326 |
+
});
|
| 327 |
+
|
| 328 |
+
it('should parse the WAV file correctly', () => {
|
| 329 |
+
expect(audioData).toBeInstanceOf(Float32Array);
|
| 330 |
+
expect(audioData.length).toBeGreaterThan(0);
|
| 331 |
+
// life_Jim.wav is about 1.4 seconds of speech
|
| 332 |
+
expect(audioDuration).toBeGreaterThan(0.5);
|
| 333 |
+
expect(audioDuration).toBeLessThan(10);
|
| 334 |
+
});
|
| 335 |
+
|
| 336 |
+
it('should have valid PCM values in [-1, 1] range', () => {
|
| 337 |
+
let min = Infinity, max = -Infinity;
|
| 338 |
+
for (let i = 0; i < audioData.length; i++) {
|
| 339 |
+
if (audioData[i] < min) min = audioData[i];
|
| 340 |
+
if (audioData[i] > max) max = audioData[i];
|
| 341 |
+
expect(isFinite(audioData[i])).toBe(true);
|
| 342 |
+
}
|
| 343 |
+
expect(min).toBeGreaterThanOrEqual(-1.0);
|
| 344 |
+
expect(max).toBeLessThanOrEqual(1.0);
|
| 345 |
+
// Should have actual audio content (not silence)
|
| 346 |
+
expect(max - min).toBeGreaterThan(0.01);
|
| 347 |
+
console.log(`Audio range: [${min.toFixed(4)}, ${max.toFixed(4)}]`);
|
| 348 |
+
});
|
| 349 |
+
|
| 350 |
+
it('should produce correct number of mel frames', () => {
|
| 351 |
+
const expectedFrames = sampleToFrame(audioData.length);
|
| 352 |
+
expect(expectedFrames).toBeGreaterThan(0);
|
| 353 |
+
console.log(`Expected frames: ${expectedFrames} (${audioDuration.toFixed(2)}s × 100 fps)`);
|
| 354 |
+
});
|
| 355 |
+
|
| 356 |
+
it('should produce finite, normalized mel features', () => {
|
| 357 |
+
const { features, T } = fullMelPipeline(audioData, 128);
|
| 358 |
+
|
| 359 |
+
expect(T).toBeGreaterThan(0);
|
| 360 |
+
expect(features.length).toBe(128 * T);
|
| 361 |
+
|
| 362 |
+
// All values should be finite
|
| 363 |
+
for (let i = 0; i < features.length; i++) {
|
| 364 |
+
expect(isFinite(features[i])).toBe(true);
|
| 365 |
+
}
|
| 366 |
+
|
| 367 |
+
// Per-mel-bin: should have ~zero mean (normalized)
|
| 368 |
+
for (let m = 0; m < 128; m++) {
|
| 369 |
+
let sum = 0;
|
| 370 |
+
for (let t = 0; t < T; t++) {
|
| 371 |
+
sum += features[m * T + t];
|
| 372 |
+
}
|
| 373 |
+
const mean = sum / T;
|
| 374 |
+
expect(Math.abs(mean)).toBeLessThan(0.01);
|
| 375 |
+
}
|
| 376 |
+
});
|
| 377 |
+
|
| 378 |
+
it('should produce deterministic results', () => {
|
| 379 |
+
const result1 = fullMelPipeline(audioData, 128);
|
| 380 |
+
const result2 = fullMelPipeline(audioData, 128);
|
| 381 |
+
|
| 382 |
+
expect(result1.T).toBe(result2.T);
|
| 383 |
+
expect(result1.features.length).toBe(result2.features.length);
|
| 384 |
+
|
| 385 |
+
for (let i = 0; i < result1.features.length; i++) {
|
| 386 |
+
expect(result1.features[i]).toBe(result2.features[i]);
|
| 387 |
+
}
|
| 388 |
+
});
|
| 389 |
+
|
| 390 |
+
it('should produce different features for different time windows', () => {
|
| 391 |
+
const { features, T } = fullMelPipeline(audioData, 128);
|
| 392 |
+
|
| 393 |
+
// Compare first and second halves — they should differ (it's speech, not silence)
|
| 394 |
+
const halfT = Math.floor(T / 2);
|
| 395 |
+
if (halfT < 2) return; // too short
|
| 396 |
+
|
| 397 |
+
let diffCount = 0;
|
| 398 |
+
for (let m = 0; m < 128; m++) {
|
| 399 |
+
const v1 = features[m * T + 0]; // first frame
|
| 400 |
+
const v2 = features[m * T + halfT]; // middle frame
|
| 401 |
+
if (Math.abs(v1 - v2) > 0.01) diffCount++;
|
| 402 |
+
}
|
| 403 |
+
// At least some mel bins should differ between speech regions
|
| 404 |
+
expect(diffCount).toBeGreaterThan(10);
|
| 405 |
+
});
|
| 406 |
+
|
| 407 |
+
it('should match mel-worker output for the same audio', async () => {
|
| 408 |
+
// This test validates that our mel-math (used by mel.worker.ts) produces
|
| 409 |
+
// the same features as the full pipeline, ensuring the worker's incremental
|
| 410 |
+
// computation matches batch processing.
|
| 411 |
+
|
| 412 |
+
const nMels = 128;
|
| 413 |
+
const { features: batchFeatures, T } = fullMelPipeline(audioData, nMels);
|
| 414 |
+
|
| 415 |
+
// Simulate incremental processing (like mel.worker does):
|
| 416 |
+
// Push all audio at once, then extract all frames
|
| 417 |
+
const hannWindow = createPaddedHannWindow();
|
| 418 |
+
const twiddles = precomputeTwiddles(MEL_CONSTANTS.N_FFT);
|
| 419 |
+
const melFilterbank = createMelFilterbank(nMels);
|
| 420 |
+
|
| 421 |
+
// Pre-emphasize the full audio
|
| 422 |
+
const preemph = preemphasize(audioData);
|
| 423 |
+
|
| 424 |
+
// Compute frames one by one (like worker does incrementally)
|
| 425 |
+
const rawMel = new Float32Array(nMels * T);
|
| 426 |
+
for (let t = 0; t < T; t++) {
|
| 427 |
+
const frame = computeMelFrame(preemph, t, hannWindow, twiddles, melFilterbank, nMels);
|
| 428 |
+
for (let m = 0; m < nMels; m++) {
|
| 429 |
+
rawMel[m * T + t] = frame[m];
|
| 430 |
+
}
|
| 431 |
+
}
|
| 432 |
+
|
| 433 |
+
// Normalize (same as getFeatures in worker)
|
| 434 |
+
const incrementalFeatures = normalizeMelFeatures(rawMel, nMels, T);
|
| 435 |
+
|
| 436 |
+
// Should be bit-for-bit identical since same code path
|
| 437 |
+
expect(incrementalFeatures.length).toBe(batchFeatures.length);
|
| 438 |
+
for (let i = 0; i < incrementalFeatures.length; i++) {
|
| 439 |
+
expect(incrementalFeatures[i]).toBe(batchFeatures[i]);
|
| 440 |
+
}
|
| 441 |
+
});
|
| 442 |
+
|
| 443 |
+
it('should complete mel processing under 100ms for this audio', () => {
|
| 444 |
+
const t0 = performance.now();
|
| 445 |
+
const { features, T } = fullMelPipeline(audioData, 128);
|
| 446 |
+
const elapsed = performance.now() - t0;
|
| 447 |
+
|
| 448 |
+
console.log(`Mel pipeline: ${T} frames in ${elapsed.toFixed(1)}ms ` +
|
| 449 |
+
`(${(audioDuration / (elapsed / 1000)).toFixed(1)}x realtime)`);
|
| 450 |
+
|
| 451 |
+
// Should be fast enough for real-time use
|
| 452 |
+
expect(elapsed).toBeLessThan(100);
|
| 453 |
+
});
|
| 454 |
+
});
|
| 455 |
+
|
| 456 |
+
// ─── WAV Parser Tests ─────────────────────────────────────────────────────
|
| 457 |
+
|
| 458 |
+
describe('WAV parser', () => {
|
| 459 |
+
it('should parse a known WAV file correctly', () => {
|
| 460 |
+
if (!existsSync(WAV_LOCAL_PATH)) {
|
| 461 |
+
console.log('SKIP: WAV file not available locally');
|
| 462 |
+
return;
|
| 463 |
+
}
|
| 464 |
+
|
| 465 |
+
const buffer = readFileSync(WAV_LOCAL_PATH);
|
| 466 |
+
const wavBuffer = buffer.buffer.slice(buffer.byteOffset, buffer.byteOffset + buffer.byteLength);
|
| 467 |
+
const { audio, sampleRate, channels } = parseWav(wavBuffer);
|
| 468 |
+
|
| 469 |
+
expect(audio).toBeInstanceOf(Float32Array);
|
| 470 |
+
expect(audio.length).toBeGreaterThan(0);
|
| 471 |
+
expect(sampleRate).toBeGreaterThan(0);
|
| 472 |
+
expect(channels).toBeGreaterThanOrEqual(1);
|
| 473 |
+
|
| 474 |
+
console.log(`WAV: ${audio.length} samples, ${sampleRate} Hz, ${channels} ch, ` +
|
| 475 |
+
`${(audio.length / sampleRate).toFixed(2)}s`);
|
| 476 |
+
});
|
| 477 |
+
|
| 478 |
+
it('should reject non-WAV data', () => {
|
| 479 |
+
const notWav = new ArrayBuffer(44);
|
| 480 |
+
new Uint8Array(notWav).fill(0);
|
| 481 |
+
expect(() => parseWav(notWav)).toThrow();
|
| 482 |
+
});
|
| 483 |
+
});
|
src/lib/audio/mel-math.test.ts
ADDED
|
@@ -0,0 +1,500 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/**
|
| 2 |
+
* Unit tests for mel spectrogram computation functions.
|
| 3 |
+
*
|
| 4 |
+
* These tests verify that the mel math functions produce correct results
|
| 5 |
+
* and match the expected behavior of NeMo/parakeet.js mel processing.
|
| 6 |
+
*
|
| 7 |
+
* Run: npm test
|
| 8 |
+
*/
|
| 9 |
+
|
| 10 |
+
import { describe, it, expect } from 'vitest';
|
| 11 |
+
import {
|
| 12 |
+
MEL_CONSTANTS,
|
| 13 |
+
hzToMel,
|
| 14 |
+
melToHz,
|
| 15 |
+
createMelFilterbank,
|
| 16 |
+
createPaddedHannWindow,
|
| 17 |
+
precomputeTwiddles,
|
| 18 |
+
fft,
|
| 19 |
+
preemphasize,
|
| 20 |
+
computeMelFrame,
|
| 21 |
+
normalizeMelFeatures,
|
| 22 |
+
sampleToFrame,
|
| 23 |
+
} from './mel-math';
|
| 24 |
+
|
| 25 |
+
// ─── Constants ────────────────────────────────────────────────────────────
|
| 26 |
+
|
| 27 |
+
describe('MEL_CONSTANTS', () => {
|
| 28 |
+
it('should have correct NeMo-compatible values', () => {
|
| 29 |
+
expect(MEL_CONSTANTS.SAMPLE_RATE).toBe(16000);
|
| 30 |
+
expect(MEL_CONSTANTS.N_FFT).toBe(512);
|
| 31 |
+
expect(MEL_CONSTANTS.WIN_LENGTH).toBe(400);
|
| 32 |
+
expect(MEL_CONSTANTS.HOP_LENGTH).toBe(160);
|
| 33 |
+
expect(MEL_CONSTANTS.PREEMPH).toBe(0.97);
|
| 34 |
+
expect(MEL_CONSTANTS.N_FREQ_BINS).toBe(257);
|
| 35 |
+
expect(MEL_CONSTANTS.DEFAULT_N_MELS).toBe(128);
|
| 36 |
+
});
|
| 37 |
+
});
|
| 38 |
+
|
| 39 |
+
// ─── Mel Scale ────────────────────────────────────────────────────────────
|
| 40 |
+
|
| 41 |
+
describe('hzToMel / melToHz', () => {
|
| 42 |
+
it('should return 0 for 0 Hz', () => {
|
| 43 |
+
expect(hzToMel(0)).toBe(0);
|
| 44 |
+
});
|
| 45 |
+
|
| 46 |
+
it('should return mel in linear region for freq < 1000 Hz', () => {
|
| 47 |
+
// In linear region: mel = freq / (200/3) = freq * 3/200
|
| 48 |
+
const freq = 500;
|
| 49 |
+
const expected = freq / (200 / 3);
|
| 50 |
+
expect(hzToMel(freq)).toBeCloseTo(expected, 5);
|
| 51 |
+
});
|
| 52 |
+
|
| 53 |
+
it('should return mel in log region for freq >= 1000 Hz', () => {
|
| 54 |
+
// At 1000 Hz, mel = 1000 / (200/3) = 15.0
|
| 55 |
+
expect(hzToMel(1000)).toBeCloseTo(15.0, 5);
|
| 56 |
+
// Above 1000 Hz, should be in log region
|
| 57 |
+
expect(hzToMel(2000)).toBeGreaterThan(15.0);
|
| 58 |
+
});
|
| 59 |
+
|
| 60 |
+
it('should be invertible (roundtrip)', () => {
|
| 61 |
+
const freqs = [0, 100, 500, 1000, 2000, 4000, 8000];
|
| 62 |
+
for (const freq of freqs) {
|
| 63 |
+
const mel = hzToMel(freq);
|
| 64 |
+
const recovered = melToHz(mel);
|
| 65 |
+
expect(recovered).toBeCloseTo(freq, 3);
|
| 66 |
+
}
|
| 67 |
+
});
|
| 68 |
+
|
| 69 |
+
it('should be monotonically increasing', () => {
|
| 70 |
+
const freqs = [0, 100, 500, 1000, 2000, 4000, 8000];
|
| 71 |
+
const mels = freqs.map(hzToMel);
|
| 72 |
+
for (let i = 1; i < mels.length; i++) {
|
| 73 |
+
expect(mels[i]).toBeGreaterThan(mels[i - 1]);
|
| 74 |
+
}
|
| 75 |
+
});
|
| 76 |
+
});
|
| 77 |
+
|
| 78 |
+
// ─── Mel Filterbank ───────────────────────────────────────────────────────
|
| 79 |
+
|
| 80 |
+
describe('createMelFilterbank', () => {
|
| 81 |
+
it('should create filterbank with correct dimensions', () => {
|
| 82 |
+
const nMels = 128;
|
| 83 |
+
const fb = createMelFilterbank(nMels);
|
| 84 |
+
expect(fb).toBeInstanceOf(Float32Array);
|
| 85 |
+
expect(fb.length).toBe(nMels * MEL_CONSTANTS.N_FREQ_BINS);
|
| 86 |
+
});
|
| 87 |
+
|
| 88 |
+
it('should have non-negative values', () => {
|
| 89 |
+
const fb = createMelFilterbank(128);
|
| 90 |
+
for (let i = 0; i < fb.length; i++) {
|
| 91 |
+
expect(fb[i]).toBeGreaterThanOrEqual(0);
|
| 92 |
+
}
|
| 93 |
+
});
|
| 94 |
+
|
| 95 |
+
it('should have non-zero values in each mel bin', () => {
|
| 96 |
+
const nMels = 128;
|
| 97 |
+
const fb = createMelFilterbank(nMels);
|
| 98 |
+
for (let m = 0; m < nMels; m++) {
|
| 99 |
+
const offset = m * MEL_CONSTANTS.N_FREQ_BINS;
|
| 100 |
+
let sum = 0;
|
| 101 |
+
for (let k = 0; k < MEL_CONSTANTS.N_FREQ_BINS; k++) {
|
| 102 |
+
sum += fb[offset + k];
|
| 103 |
+
}
|
| 104 |
+
expect(sum).toBeGreaterThan(0);
|
| 105 |
+
}
|
| 106 |
+
});
|
| 107 |
+
|
| 108 |
+
it('should create triangular filters (each row is a triangle)', () => {
|
| 109 |
+
const nMels = 64;
|
| 110 |
+
const fb = createMelFilterbank(nMels);
|
| 111 |
+
// Check that each filter has a single peak region (no multiple peaks)
|
| 112 |
+
for (let m = 0; m < nMels; m++) {
|
| 113 |
+
const offset = m * MEL_CONSTANTS.N_FREQ_BINS;
|
| 114 |
+
// Find first and last non-zero
|
| 115 |
+
let firstNonZero = -1;
|
| 116 |
+
let lastNonZero = -1;
|
| 117 |
+
for (let k = 0; k < MEL_CONSTANTS.N_FREQ_BINS; k++) {
|
| 118 |
+
if (fb[offset + k] > 0) {
|
| 119 |
+
if (firstNonZero === -1) firstNonZero = k;
|
| 120 |
+
lastNonZero = k;
|
| 121 |
+
}
|
| 122 |
+
}
|
| 123 |
+
// Should have at least one non-zero bin
|
| 124 |
+
expect(firstNonZero).toBeGreaterThanOrEqual(0);
|
| 125 |
+
// All values between first and last should be > 0 (contiguous support)
|
| 126 |
+
for (let k = firstNonZero; k <= lastNonZero; k++) {
|
| 127 |
+
expect(fb[offset + k]).toBeGreaterThan(0);
|
| 128 |
+
}
|
| 129 |
+
}
|
| 130 |
+
});
|
| 131 |
+
|
| 132 |
+
it('should work for different nMels values', () => {
|
| 133 |
+
for (const nMels of [40, 64, 80, 128]) {
|
| 134 |
+
const fb = createMelFilterbank(nMels);
|
| 135 |
+
expect(fb.length).toBe(nMels * MEL_CONSTANTS.N_FREQ_BINS);
|
| 136 |
+
}
|
| 137 |
+
});
|
| 138 |
+
});
|
| 139 |
+
|
| 140 |
+
// ─── Hann Window ──────────────────────────────────────────────────────────
|
| 141 |
+
|
| 142 |
+
describe('createPaddedHannWindow', () => {
|
| 143 |
+
it('should return a Float64Array of length N_FFT', () => {
|
| 144 |
+
const win = createPaddedHannWindow();
|
| 145 |
+
expect(win).toBeInstanceOf(Float64Array);
|
| 146 |
+
expect(win.length).toBe(MEL_CONSTANTS.N_FFT);
|
| 147 |
+
});
|
| 148 |
+
|
| 149 |
+
it('should have zero padding at edges', () => {
|
| 150 |
+
const win = createPaddedHannWindow();
|
| 151 |
+
const padLeft = (MEL_CONSTANTS.N_FFT - MEL_CONSTANTS.WIN_LENGTH) >> 1; // 56
|
| 152 |
+
// Left padding should be zero
|
| 153 |
+
for (let i = 0; i < padLeft; i++) {
|
| 154 |
+
expect(win[i]).toBe(0);
|
| 155 |
+
}
|
| 156 |
+
// Right padding should be zero
|
| 157 |
+
const padRight = padLeft + MEL_CONSTANTS.WIN_LENGTH;
|
| 158 |
+
for (let i = padRight; i < MEL_CONSTANTS.N_FFT; i++) {
|
| 159 |
+
expect(win[i]).toBe(0);
|
| 160 |
+
}
|
| 161 |
+
});
|
| 162 |
+
|
| 163 |
+
it('should have symmetric Hann values in the active region', () => {
|
| 164 |
+
const win = createPaddedHannWindow();
|
| 165 |
+
const padLeft = (MEL_CONSTANTS.N_FFT - MEL_CONSTANTS.WIN_LENGTH) >> 1;
|
| 166 |
+
// Hann window should be symmetric
|
| 167 |
+
for (let i = 0; i < MEL_CONSTANTS.WIN_LENGTH; i++) {
|
| 168 |
+
const mirror = MEL_CONSTANTS.WIN_LENGTH - 1 - i;
|
| 169 |
+
expect(win[padLeft + i]).toBeCloseTo(win[padLeft + mirror], 10);
|
| 170 |
+
}
|
| 171 |
+
});
|
| 172 |
+
|
| 173 |
+
it('should peak at center with value ~1.0', () => {
|
| 174 |
+
const win = createPaddedHannWindow();
|
| 175 |
+
const padLeft = (MEL_CONSTANTS.N_FFT - MEL_CONSTANTS.WIN_LENGTH) >> 1;
|
| 176 |
+
const center = padLeft + Math.floor(MEL_CONSTANTS.WIN_LENGTH / 2);
|
| 177 |
+
// Center of Hann window should be close to 1.0
|
| 178 |
+
expect(win[center]).toBeCloseTo(1.0, 2);
|
| 179 |
+
});
|
| 180 |
+
});
|
| 181 |
+
|
| 182 |
+
// ─── FFT ──────────────────────────────────────────────────────────────────
|
| 183 |
+
|
| 184 |
+
describe('fft', () => {
|
| 185 |
+
it('should handle a DC signal', () => {
|
| 186 |
+
const n = 8;
|
| 187 |
+
const tw = precomputeTwiddles(n);
|
| 188 |
+
const re = new Float64Array([1, 1, 1, 1, 1, 1, 1, 1]);
|
| 189 |
+
const im = new Float64Array(n);
|
| 190 |
+
fft(re, im, n, tw);
|
| 191 |
+
// DC component (re[0]) should be n
|
| 192 |
+
expect(re[0]).toBeCloseTo(n, 5);
|
| 193 |
+
// All other components should be ~0
|
| 194 |
+
for (let i = 1; i < n; i++) {
|
| 195 |
+
expect(re[i]).toBeCloseTo(0, 5);
|
| 196 |
+
expect(im[i]).toBeCloseTo(0, 5);
|
| 197 |
+
}
|
| 198 |
+
});
|
| 199 |
+
|
| 200 |
+
it('should handle a single frequency signal', () => {
|
| 201 |
+
const n = 16;
|
| 202 |
+
const tw = precomputeTwiddles(n);
|
| 203 |
+
// Create a sinusoid at bin k=1: cos(2πk/N * n) for n=0..N-1
|
| 204 |
+
const re = new Float64Array(n);
|
| 205 |
+
const im = new Float64Array(n);
|
| 206 |
+
for (let i = 0; i < n; i++) {
|
| 207 |
+
re[i] = Math.cos(2 * Math.PI * i / n);
|
| 208 |
+
}
|
| 209 |
+
fft(re, im, n, tw);
|
| 210 |
+
// Should have energy at bin 1 and bin N-1 (conjugate symmetry)
|
| 211 |
+
expect(Math.abs(re[1])).toBeCloseTo(n / 2, 3);
|
| 212 |
+
expect(Math.abs(re[n - 1])).toBeCloseTo(n / 2, 3);
|
| 213 |
+
// Other bins should be near zero
|
| 214 |
+
for (let i = 2; i < n - 1; i++) {
|
| 215 |
+
expect(Math.abs(re[i])).toBeLessThan(1e-6);
|
| 216 |
+
expect(Math.abs(im[i])).toBeLessThan(1e-6);
|
| 217 |
+
}
|
| 218 |
+
});
|
| 219 |
+
|
| 220 |
+
it('should handle 512-point FFT (actual size used)', () => {
|
| 221 |
+
const n = 512;
|
| 222 |
+
const tw = precomputeTwiddles(n);
|
| 223 |
+
// All zeros
|
| 224 |
+
const re = new Float64Array(n);
|
| 225 |
+
const im = new Float64Array(n);
|
| 226 |
+
fft(re, im, n, tw);
|
| 227 |
+
// All outputs should be zero
|
| 228 |
+
for (let i = 0; i < n; i++) {
|
| 229 |
+
expect(re[i]).toBeCloseTo(0, 10);
|
| 230 |
+
expect(im[i]).toBeCloseTo(0, 10);
|
| 231 |
+
}
|
| 232 |
+
});
|
| 233 |
+
|
| 234 |
+
it('should satisfy Parseval\'s theorem (energy conservation)', () => {
|
| 235 |
+
const n = 64;
|
| 236 |
+
const tw = precomputeTwiddles(n);
|
| 237 |
+
// Random-ish signal
|
| 238 |
+
const re = new Float64Array(n);
|
| 239 |
+
const im = new Float64Array(n);
|
| 240 |
+
for (let i = 0; i < n; i++) {
|
| 241 |
+
re[i] = Math.sin(i * 0.37) + Math.cos(i * 0.83);
|
| 242 |
+
}
|
| 243 |
+
// Time domain energy
|
| 244 |
+
let timeEnergy = 0;
|
| 245 |
+
for (let i = 0; i < n; i++) {
|
| 246 |
+
timeEnergy += re[i] * re[i] + im[i] * im[i];
|
| 247 |
+
}
|
| 248 |
+
|
| 249 |
+
fft(re, im, n, tw);
|
| 250 |
+
|
| 251 |
+
// Frequency domain energy
|
| 252 |
+
let freqEnergy = 0;
|
| 253 |
+
for (let i = 0; i < n; i++) {
|
| 254 |
+
freqEnergy += re[i] * re[i] + im[i] * im[i];
|
| 255 |
+
}
|
| 256 |
+
// Parseval: sum|x|^2 = (1/N) * sum|X|^2
|
| 257 |
+
expect(freqEnergy / n).toBeCloseTo(timeEnergy, 5);
|
| 258 |
+
});
|
| 259 |
+
});
|
| 260 |
+
|
| 261 |
+
// ─── Twiddle Factors ──────────────────────────────────────────────���───────
|
| 262 |
+
|
| 263 |
+
describe('precomputeTwiddles', () => {
|
| 264 |
+
it('should produce cos and sin arrays of half the FFT size', () => {
|
| 265 |
+
const tw = precomputeTwiddles(512);
|
| 266 |
+
expect(tw.cos.length).toBe(256);
|
| 267 |
+
expect(tw.sin.length).toBe(256);
|
| 268 |
+
});
|
| 269 |
+
|
| 270 |
+
it('should start with cos[0]=1, sin[0]=0', () => {
|
| 271 |
+
const tw = precomputeTwiddles(512);
|
| 272 |
+
expect(tw.cos[0]).toBeCloseTo(1.0, 10);
|
| 273 |
+
expect(tw.sin[0]).toBeCloseTo(0.0, 10);
|
| 274 |
+
});
|
| 275 |
+
});
|
| 276 |
+
|
| 277 |
+
// ─── Pre-emphasis ─────────────────────────────────────────────────────────
|
| 278 |
+
|
| 279 |
+
describe('preemphasize', () => {
|
| 280 |
+
it('should apply pre-emphasis filter correctly', () => {
|
| 281 |
+
const chunk = new Float32Array([1.0, 2.0, 3.0, 4.0]);
|
| 282 |
+
const result = preemphasize(chunk, 0, 0.97);
|
| 283 |
+
// out[0] = 1.0 - 0.97 * 0 = 1.0
|
| 284 |
+
expect(result[0]).toBeCloseTo(1.0, 5);
|
| 285 |
+
// out[1] = 2.0 - 0.97 * 1.0 = 1.03
|
| 286 |
+
expect(result[1]).toBeCloseTo(1.03, 5);
|
| 287 |
+
// out[2] = 3.0 - 0.97 * 2.0 = 1.06
|
| 288 |
+
expect(result[2]).toBeCloseTo(1.06, 5);
|
| 289 |
+
// out[3] = 4.0 - 0.97 * 3.0 = 1.09
|
| 290 |
+
expect(result[3]).toBeCloseTo(1.09, 5);
|
| 291 |
+
});
|
| 292 |
+
|
| 293 |
+
it('should use lastSample for continuity across chunks', () => {
|
| 294 |
+
const chunk = new Float32Array([5.0, 6.0]);
|
| 295 |
+
const result = preemphasize(chunk, 4.0, 0.97);
|
| 296 |
+
// out[0] = 5.0 - 0.97 * 4.0 = 1.12
|
| 297 |
+
expect(result[0]).toBeCloseTo(1.12, 5);
|
| 298 |
+
// out[1] = 6.0 - 0.97 * 5.0 = 1.15
|
| 299 |
+
expect(result[1]).toBeCloseTo(1.15, 5);
|
| 300 |
+
});
|
| 301 |
+
|
| 302 |
+
it('should return zeros for constant signal', () => {
|
| 303 |
+
const chunk = new Float32Array([1.0, 1.0, 1.0, 1.0]);
|
| 304 |
+
const result = preemphasize(chunk, 1.0, 0.97);
|
| 305 |
+
// All should be 1 - 0.97 = 0.03
|
| 306 |
+
for (let i = 0; i < result.length; i++) {
|
| 307 |
+
expect(result[i]).toBeCloseTo(0.03, 5);
|
| 308 |
+
}
|
| 309 |
+
});
|
| 310 |
+
});
|
| 311 |
+
|
| 312 |
+
// ─── Mel Frame Computation ────────────────────────────────────────────────
|
| 313 |
+
|
| 314 |
+
describe('computeMelFrame', () => {
|
| 315 |
+
it('should produce correct number of mel bins', () => {
|
| 316 |
+
const nMels = 128;
|
| 317 |
+
const window = createPaddedHannWindow();
|
| 318 |
+
const tw = precomputeTwiddles(MEL_CONSTANTS.N_FFT);
|
| 319 |
+
const fb = createMelFilterbank(nMels);
|
| 320 |
+
|
| 321 |
+
// 1 second of silence
|
| 322 |
+
const audio = new Float32Array(16000);
|
| 323 |
+
const frame = computeMelFrame(audio, 0, window, tw, fb, nMels);
|
| 324 |
+
|
| 325 |
+
expect(frame).toBeInstanceOf(Float32Array);
|
| 326 |
+
expect(frame.length).toBe(nMels);
|
| 327 |
+
});
|
| 328 |
+
|
| 329 |
+
it('should produce finite values for silence', () => {
|
| 330 |
+
const nMels = 128;
|
| 331 |
+
const window = createPaddedHannWindow();
|
| 332 |
+
const tw = precomputeTwiddles(MEL_CONSTANTS.N_FFT);
|
| 333 |
+
const fb = createMelFilterbank(nMels);
|
| 334 |
+
|
| 335 |
+
const audio = new Float32Array(16000);
|
| 336 |
+
const frame = computeMelFrame(audio, 10, window, tw, fb, nMels);
|
| 337 |
+
|
| 338 |
+
for (let i = 0; i < nMels; i++) {
|
| 339 |
+
expect(isFinite(frame[i])).toBe(true);
|
| 340 |
+
}
|
| 341 |
+
});
|
| 342 |
+
|
| 343 |
+
it('should produce larger values for louder signal', () => {
|
| 344 |
+
const nMels = 128;
|
| 345 |
+
const window = createPaddedHannWindow();
|
| 346 |
+
const tw = precomputeTwiddles(MEL_CONSTANTS.N_FFT);
|
| 347 |
+
const fb = createMelFilterbank(nMels);
|
| 348 |
+
|
| 349 |
+
// Silence
|
| 350 |
+
const silence = new Float32Array(16000);
|
| 351 |
+
const silenceFrame = computeMelFrame(silence, 10, window, tw, fb, nMels);
|
| 352 |
+
|
| 353 |
+
// Loud sine wave
|
| 354 |
+
const loud = new Float32Array(16000);
|
| 355 |
+
for (let i = 0; i < 16000; i++) {
|
| 356 |
+
loud[i] = Math.sin(2 * Math.PI * 440 * i / 16000);
|
| 357 |
+
}
|
| 358 |
+
const preemph = preemphasize(loud);
|
| 359 |
+
const loudFrame = computeMelFrame(preemph, 10, window, tw, fb, nMels);
|
| 360 |
+
|
| 361 |
+
// At least some mel bins should be larger for the loud signal
|
| 362 |
+
let louderCount = 0;
|
| 363 |
+
for (let i = 0; i < nMels; i++) {
|
| 364 |
+
if (loudFrame[i] > silenceFrame[i]) louderCount++;
|
| 365 |
+
}
|
| 366 |
+
expect(louderCount).toBeGreaterThan(0);
|
| 367 |
+
});
|
| 368 |
+
});
|
| 369 |
+
|
| 370 |
+
// ─── Normalization ────────────────────────────────────────────────────────
|
| 371 |
+
|
| 372 |
+
describe('normalizeMelFeatures', () => {
|
| 373 |
+
it('should produce zero-mean per feature', () => {
|
| 374 |
+
const nMels = 4;
|
| 375 |
+
const T = 10;
|
| 376 |
+
const features = new Float32Array(nMels * T);
|
| 377 |
+
// Fill with some values
|
| 378 |
+
for (let m = 0; m < nMels; m++) {
|
| 379 |
+
for (let t = 0; t < T; t++) {
|
| 380 |
+
features[m * T + t] = m * 10 + t;
|
| 381 |
+
}
|
| 382 |
+
}
|
| 383 |
+
|
| 384 |
+
const normalized = normalizeMelFeatures(features, nMels, T);
|
| 385 |
+
|
| 386 |
+
// Each mel bin should have ~zero mean
|
| 387 |
+
for (let m = 0; m < nMels; m++) {
|
| 388 |
+
let sum = 0;
|
| 389 |
+
for (let t = 0; t < T; t++) {
|
| 390 |
+
sum += normalized[m * T + t];
|
| 391 |
+
}
|
| 392 |
+
expect(sum / T).toBeCloseTo(0, 4);
|
| 393 |
+
}
|
| 394 |
+
});
|
| 395 |
+
|
| 396 |
+
it('should produce unit variance per feature', () => {
|
| 397 |
+
const nMels = 4;
|
| 398 |
+
const T = 100;
|
| 399 |
+
const features = new Float32Array(nMels * T);
|
| 400 |
+
// Fill with varying values
|
| 401 |
+
for (let m = 0; m < nMels; m++) {
|
| 402 |
+
for (let t = 0; t < T; t++) {
|
| 403 |
+
features[m * T + t] = Math.sin(t * 0.1 + m);
|
| 404 |
+
}
|
| 405 |
+
}
|
| 406 |
+
|
| 407 |
+
const normalized = normalizeMelFeatures(features, nMels, T);
|
| 408 |
+
|
| 409 |
+
// Each mel bin should have ~unit Bessel-corrected std
|
| 410 |
+
for (let m = 0; m < nMels; m++) {
|
| 411 |
+
let sum = 0;
|
| 412 |
+
for (let t = 0; t < T; t++) {
|
| 413 |
+
sum += normalized[m * T + t];
|
| 414 |
+
}
|
| 415 |
+
const mean = sum / T;
|
| 416 |
+
|
| 417 |
+
let varSum = 0;
|
| 418 |
+
for (let t = 0; t < T; t++) {
|
| 419 |
+
const d = normalized[m * T + t] - mean;
|
| 420 |
+
varSum += d * d;
|
| 421 |
+
}
|
| 422 |
+
const std = Math.sqrt(varSum / (T - 1));
|
| 423 |
+
expect(std).toBeCloseTo(1.0, 1);
|
| 424 |
+
}
|
| 425 |
+
});
|
| 426 |
+
|
| 427 |
+
it('should handle single frame (T=1) gracefully', () => {
|
| 428 |
+
const nMels = 4;
|
| 429 |
+
const T = 1;
|
| 430 |
+
const features = new Float32Array([1, 2, 3, 4]);
|
| 431 |
+
|
| 432 |
+
const normalized = normalizeMelFeatures(features, nMels, T);
|
| 433 |
+
// With T=1, invStd=0, so all should be 0
|
| 434 |
+
for (let i = 0; i < normalized.length; i++) {
|
| 435 |
+
expect(normalized[i]).toBe(0);
|
| 436 |
+
}
|
| 437 |
+
});
|
| 438 |
+
|
| 439 |
+
it('should not modify the original array', () => {
|
| 440 |
+
const features = new Float32Array([1, 2, 3, 4, 5, 6, 7, 8]);
|
| 441 |
+
const copy = new Float32Array(features);
|
| 442 |
+
normalizeMelFeatures(features, 2, 4);
|
| 443 |
+
expect(features).toEqual(copy);
|
| 444 |
+
});
|
| 445 |
+
});
|
| 446 |
+
|
| 447 |
+
// ─── sampleToFrame ────────────────────────────────────────────────────────
|
| 448 |
+
|
| 449 |
+
describe('sampleToFrame', () => {
|
| 450 |
+
it('should convert 0 samples to frame 0', () => {
|
| 451 |
+
expect(sampleToFrame(0)).toBe(0);
|
| 452 |
+
});
|
| 453 |
+
|
| 454 |
+
it('should convert HOP_LENGTH samples to frame 1', () => {
|
| 455 |
+
expect(sampleToFrame(MEL_CONSTANTS.HOP_LENGTH)).toBe(1);
|
| 456 |
+
});
|
| 457 |
+
|
| 458 |
+
it('should convert 1 second (16000 samples) to 100 frames', () => {
|
| 459 |
+
expect(sampleToFrame(16000)).toBe(100);
|
| 460 |
+
});
|
| 461 |
+
|
| 462 |
+
it('should floor partial frames', () => {
|
| 463 |
+
expect(sampleToFrame(MEL_CONSTANTS.HOP_LENGTH - 1)).toBe(0);
|
| 464 |
+
expect(sampleToFrame(MEL_CONSTANTS.HOP_LENGTH + 1)).toBe(1);
|
| 465 |
+
});
|
| 466 |
+
});
|
| 467 |
+
|
| 468 |
+
// ─── End-to-End Mel Pipeline ──────────────────────────────────────────────
|
| 469 |
+
|
| 470 |
+
describe('End-to-End Mel Pipeline', () => {
|
| 471 |
+
it('should produce deterministic results for the same input', () => {
|
| 472 |
+
const nMels = 128;
|
| 473 |
+
const window = createPaddedHannWindow();
|
| 474 |
+
const tw = precomputeTwiddles(MEL_CONSTANTS.N_FFT);
|
| 475 |
+
const fb = createMelFilterbank(nMels);
|
| 476 |
+
|
| 477 |
+
// Create a repeatable signal
|
| 478 |
+
const audio = new Float32Array(4800); // 300ms
|
| 479 |
+
for (let i = 0; i < audio.length; i++) {
|
| 480 |
+
audio[i] = Math.sin(2 * Math.PI * 440 * i / 16000) * 0.5;
|
| 481 |
+
}
|
| 482 |
+
const preemph = preemphasize(audio);
|
| 483 |
+
|
| 484 |
+
const frame1 = computeMelFrame(preemph, 5, window, tw, fb, nMels);
|
| 485 |
+
const frame2 = computeMelFrame(preemph, 5, window, tw, fb, nMels);
|
| 486 |
+
|
| 487 |
+
for (let i = 0; i < nMels; i++) {
|
| 488 |
+
expect(frame1[i]).toBe(frame2[i]);
|
| 489 |
+
}
|
| 490 |
+
});
|
| 491 |
+
|
| 492 |
+
it('should produce correct number of frames for given audio length', () => {
|
| 493 |
+
// 1 second = 16000 samples → 100 frames
|
| 494 |
+
expect(sampleToFrame(16000)).toBe(100);
|
| 495 |
+
// 5 seconds = 80000 samples → 500 frames
|
| 496 |
+
expect(sampleToFrame(80000)).toBe(500);
|
| 497 |
+
// 7 seconds = 112000 samples → 700 frames
|
| 498 |
+
expect(sampleToFrame(112000)).toBe(700);
|
| 499 |
+
});
|
| 500 |
+
});
|
src/lib/audio/mel-math.ts
ADDED
|
@@ -0,0 +1,275 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/**
|
| 2 |
+
* Keet - Mel Spectrogram Math
|
| 3 |
+
*
|
| 4 |
+
* Pure computation functions for mel spectrogram feature extraction.
|
| 5 |
+
* Matches NeMo / onnx-asr / parakeet.js mel.js exactly.
|
| 6 |
+
*
|
| 7 |
+
* Designed to be self-contained and reusable:
|
| 8 |
+
* - No external dependencies
|
| 9 |
+
* - All functions are pure (no side effects)
|
| 10 |
+
* - Can be imported by workers, tests, or bundled as a standalone package
|
| 11 |
+
*/
|
| 12 |
+
|
| 13 |
+
// ═══════════════════════════════════════════════════════════════════════════
|
| 14 |
+
// Constants
|
| 15 |
+
// ═══════════════════════════════════════════════════════════════════════════
|
| 16 |
+
|
| 17 |
+
export const MEL_CONSTANTS = {
|
| 18 |
+
SAMPLE_RATE: 16000,
|
| 19 |
+
N_FFT: 512,
|
| 20 |
+
WIN_LENGTH: 400,
|
| 21 |
+
HOP_LENGTH: 160,
|
| 22 |
+
PREEMPH: 0.97,
|
| 23 |
+
LOG_ZERO_GUARD: 2 ** -24, // float(2**-24) ≈ 5.96e-8
|
| 24 |
+
N_FREQ_BINS: (512 >> 1) + 1, // 257
|
| 25 |
+
DEFAULT_N_MELS: 128,
|
| 26 |
+
} as const;
|
| 27 |
+
|
| 28 |
+
// Slaney Mel Scale constants
|
| 29 |
+
const F_SP = 200.0 / 3; // ~66.667 Hz spacing in linear region
|
| 30 |
+
const MIN_LOG_HZ = 1000.0;
|
| 31 |
+
const MIN_LOG_MEL = MIN_LOG_HZ / F_SP; // = 15.0
|
| 32 |
+
const LOG_STEP = Math.log(6.4) / 27.0;
|
| 33 |
+
|
| 34 |
+
// ═══════════════════════════════════════════════════════════════════════════
|
| 35 |
+
// Mel Scale Helpers
|
| 36 |
+
// ═══════════════════════════════════════════════════════════════════════════
|
| 37 |
+
|
| 38 |
+
/**
|
| 39 |
+
* Convert frequency in Hz to mel scale (Slaney variant).
|
| 40 |
+
*/
|
| 41 |
+
export function hzToMel(freq: number): number {
|
| 42 |
+
return freq >= MIN_LOG_HZ
|
| 43 |
+
? MIN_LOG_MEL + Math.log(freq / MIN_LOG_HZ) / LOG_STEP
|
| 44 |
+
: freq / F_SP;
|
| 45 |
+
}
|
| 46 |
+
|
| 47 |
+
/**
|
| 48 |
+
* Convert mel scale value back to Hz (Slaney variant).
|
| 49 |
+
*/
|
| 50 |
+
export function melToHz(mel: number): number {
|
| 51 |
+
return mel >= MIN_LOG_MEL
|
| 52 |
+
? MIN_LOG_HZ * Math.exp(LOG_STEP * (mel - MIN_LOG_MEL))
|
| 53 |
+
: mel * F_SP;
|
| 54 |
+
}
|
| 55 |
+
|
| 56 |
+
/**
|
| 57 |
+
* Create mel filterbank matrix [nMels × N_FREQ_BINS] with Slaney normalization.
|
| 58 |
+
* Returns a flat Float32Array in row-major order.
|
| 59 |
+
*/
|
| 60 |
+
export function createMelFilterbank(nMels: number): Float32Array {
|
| 61 |
+
const { SAMPLE_RATE, N_FREQ_BINS } = MEL_CONSTANTS;
|
| 62 |
+
const fMax = SAMPLE_RATE / 2; // 8000
|
| 63 |
+
|
| 64 |
+
const allFreqs = new Float64Array(N_FREQ_BINS);
|
| 65 |
+
for (let i = 0; i < N_FREQ_BINS; i++) {
|
| 66 |
+
allFreqs[i] = (fMax * i) / (N_FREQ_BINS - 1);
|
| 67 |
+
}
|
| 68 |
+
|
| 69 |
+
const melMin = hzToMel(0);
|
| 70 |
+
const melMax = hzToMel(fMax);
|
| 71 |
+
const nPoints = nMels + 2;
|
| 72 |
+
const fPts = new Float64Array(nPoints);
|
| 73 |
+
for (let i = 0; i < nPoints; i++) {
|
| 74 |
+
fPts[i] = melToHz(melMin + ((melMax - melMin) * i) / (nPoints - 1));
|
| 75 |
+
}
|
| 76 |
+
|
| 77 |
+
const fDiff = new Float64Array(nPoints - 1);
|
| 78 |
+
for (let i = 0; i < nPoints - 1; i++) {
|
| 79 |
+
fDiff[i] = fPts[i + 1] - fPts[i];
|
| 80 |
+
}
|
| 81 |
+
|
| 82 |
+
const fb = new Float32Array(nMels * N_FREQ_BINS);
|
| 83 |
+
for (let m = 0; m < nMels; m++) {
|
| 84 |
+
const enorm = 2.0 / (fPts[m + 2] - fPts[m]); // slaney normalization
|
| 85 |
+
const fbOffset = m * N_FREQ_BINS;
|
| 86 |
+
for (let k = 0; k < N_FREQ_BINS; k++) {
|
| 87 |
+
const downSlope = (allFreqs[k] - fPts[m]) / fDiff[m];
|
| 88 |
+
const upSlope = (fPts[m + 2] - allFreqs[k]) / fDiff[m + 1];
|
| 89 |
+
fb[fbOffset + k] = Math.max(0, Math.min(downSlope, upSlope)) * enorm;
|
| 90 |
+
}
|
| 91 |
+
}
|
| 92 |
+
return fb;
|
| 93 |
+
}
|
| 94 |
+
|
| 95 |
+
/**
|
| 96 |
+
* Create a Hann window of length WIN_LENGTH, zero-padded to N_FFT.
|
| 97 |
+
*/
|
| 98 |
+
export function createPaddedHannWindow(): Float64Array {
|
| 99 |
+
const { N_FFT, WIN_LENGTH } = MEL_CONSTANTS;
|
| 100 |
+
const window = new Float64Array(N_FFT);
|
| 101 |
+
const padLeft = (N_FFT - WIN_LENGTH) >> 1; // 56
|
| 102 |
+
for (let n = 0; n < WIN_LENGTH; n++) {
|
| 103 |
+
window[padLeft + n] = 0.5 * (1 - Math.cos((2 * Math.PI * n) / (WIN_LENGTH - 1)));
|
| 104 |
+
}
|
| 105 |
+
return window;
|
| 106 |
+
}
|
| 107 |
+
|
| 108 |
+
/**
|
| 109 |
+
* Precompute FFT twiddle factors for a given size N.
|
| 110 |
+
*/
|
| 111 |
+
export function precomputeTwiddles(N: number): { cos: Float64Array; sin: Float64Array } {
|
| 112 |
+
const half = N >> 1;
|
| 113 |
+
const cos = new Float64Array(half);
|
| 114 |
+
const sin = new Float64Array(half);
|
| 115 |
+
for (let i = 0; i < half; i++) {
|
| 116 |
+
const angle = (-2 * Math.PI * i) / N;
|
| 117 |
+
cos[i] = Math.cos(angle);
|
| 118 |
+
sin[i] = Math.sin(angle);
|
| 119 |
+
}
|
| 120 |
+
return { cos, sin };
|
| 121 |
+
}
|
| 122 |
+
|
| 123 |
+
/**
|
| 124 |
+
* In-place radix-2 Cooley-Tukey FFT.
|
| 125 |
+
* @param re Real part (modified in-place)
|
| 126 |
+
* @param im Imaginary part (modified in-place)
|
| 127 |
+
* @param n FFT size (must be power of 2)
|
| 128 |
+
* @param tw Precomputed twiddle factors
|
| 129 |
+
*/
|
| 130 |
+
export function fft(re: Float64Array, im: Float64Array, n: number, tw: { cos: Float64Array; sin: Float64Array }): void {
|
| 131 |
+
// Bit-reversal permutation
|
| 132 |
+
for (let i = 1, j = 0; i < n; i++) {
|
| 133 |
+
let bit = n >> 1;
|
| 134 |
+
while (j & bit) { j ^= bit; bit >>= 1; }
|
| 135 |
+
j ^= bit;
|
| 136 |
+
if (i < j) {
|
| 137 |
+
let tmp = re[i]; re[i] = re[j]; re[j] = tmp;
|
| 138 |
+
tmp = im[i]; im[i] = im[j]; im[j] = tmp;
|
| 139 |
+
}
|
| 140 |
+
}
|
| 141 |
+
// Cooley-Tukey butterfly
|
| 142 |
+
for (let size = 2; size <= n; size <<= 1) {
|
| 143 |
+
const half = size >> 1;
|
| 144 |
+
const step = n / size;
|
| 145 |
+
for (let i = 0; i < n; i += size) {
|
| 146 |
+
for (let j = 0; j < half; j++) {
|
| 147 |
+
const idx = j * step;
|
| 148 |
+
const tRe = re[i + j + half] * tw.cos[idx] - im[i + j + half] * tw.sin[idx];
|
| 149 |
+
const tIm = re[i + j + half] * tw.sin[idx] + im[i + j + half] * tw.cos[idx];
|
| 150 |
+
re[i + j + half] = re[i + j] - tRe;
|
| 151 |
+
im[i + j + half] = im[i + j] - tIm;
|
| 152 |
+
re[i + j] += tRe;
|
| 153 |
+
im[i + j] += tIm;
|
| 154 |
+
}
|
| 155 |
+
}
|
| 156 |
+
}
|
| 157 |
+
}
|
| 158 |
+
|
| 159 |
+
/**
|
| 160 |
+
* Apply pre-emphasis filter to audio samples.
|
| 161 |
+
* @param chunk Raw audio chunk
|
| 162 |
+
* @param lastSample Last sample from previous chunk (for continuity)
|
| 163 |
+
* @param coeff Pre-emphasis coefficient (default 0.97)
|
| 164 |
+
* @returns Pre-emphasized samples
|
| 165 |
+
*/
|
| 166 |
+
export function preemphasize(chunk: Float32Array, lastSample: number = 0, coeff: number = MEL_CONSTANTS.PREEMPH): Float32Array {
|
| 167 |
+
const out = new Float32Array(chunk.length);
|
| 168 |
+
out[0] = chunk[0] - coeff * lastSample;
|
| 169 |
+
for (let i = 1; i < chunk.length; i++) {
|
| 170 |
+
out[i] = chunk[i] - coeff * chunk[i - 1];
|
| 171 |
+
}
|
| 172 |
+
return out;
|
| 173 |
+
}
|
| 174 |
+
|
| 175 |
+
/**
|
| 176 |
+
* Compute a single mel spectrogram frame from pre-emphasized audio.
|
| 177 |
+
* @param preemphAudio Full pre-emphasized audio buffer
|
| 178 |
+
* @param frameIdx Frame index
|
| 179 |
+
* @param hannWindow Pre-computed Hann window
|
| 180 |
+
* @param twiddles Pre-computed FFT twiddle factors
|
| 181 |
+
* @param melFilterbank Pre-computed mel filterbank
|
| 182 |
+
* @param nMels Number of mel bins
|
| 183 |
+
* @returns Raw (un-normalized) log-mel values for this frame
|
| 184 |
+
*/
|
| 185 |
+
export function computeMelFrame(
|
| 186 |
+
preemphAudio: Float32Array,
|
| 187 |
+
frameIdx: number,
|
| 188 |
+
hannWindow: Float64Array,
|
| 189 |
+
twiddles: { cos: Float64Array; sin: Float64Array },
|
| 190 |
+
melFilterbank: Float32Array,
|
| 191 |
+
nMels: number,
|
| 192 |
+
): Float32Array {
|
| 193 |
+
const { N_FFT, HOP_LENGTH, N_FREQ_BINS, LOG_ZERO_GUARD } = MEL_CONSTANTS;
|
| 194 |
+
const pad = N_FFT >> 1; // 256
|
| 195 |
+
const frameStart = frameIdx * HOP_LENGTH - pad;
|
| 196 |
+
const preemphLen = preemphAudio.length;
|
| 197 |
+
|
| 198 |
+
// Window the frame
|
| 199 |
+
const fftRe = new Float64Array(N_FFT);
|
| 200 |
+
const fftIm = new Float64Array(N_FFT);
|
| 201 |
+
for (let k = 0; k < N_FFT; k++) {
|
| 202 |
+
const idx = frameStart + k;
|
| 203 |
+
const sample = (idx >= 0 && idx < preemphLen) ? preemphAudio[idx] : 0;
|
| 204 |
+
fftRe[k] = sample * hannWindow[k];
|
| 205 |
+
fftIm[k] = 0;
|
| 206 |
+
}
|
| 207 |
+
|
| 208 |
+
// FFT
|
| 209 |
+
fft(fftRe, fftIm, N_FFT, twiddles);
|
| 210 |
+
|
| 211 |
+
// Power spectrum
|
| 212 |
+
const power = new Float32Array(N_FREQ_BINS);
|
| 213 |
+
for (let k = 0; k < N_FREQ_BINS; k++) {
|
| 214 |
+
power[k] = fftRe[k] * fftRe[k] + fftIm[k] * fftIm[k];
|
| 215 |
+
}
|
| 216 |
+
|
| 217 |
+
// Mel filterbank multiply + log
|
| 218 |
+
const melFrame = new Float32Array(nMels);
|
| 219 |
+
for (let m = 0; m < nMels; m++) {
|
| 220 |
+
let melVal = 0;
|
| 221 |
+
const fbOff = m * N_FREQ_BINS;
|
| 222 |
+
for (let k = 0; k < N_FREQ_BINS; k++) {
|
| 223 |
+
melVal += power[k] * melFilterbank[fbOff + k];
|
| 224 |
+
}
|
| 225 |
+
melFrame[m] = Math.log(melVal + LOG_ZERO_GUARD);
|
| 226 |
+
}
|
| 227 |
+
return melFrame;
|
| 228 |
+
}
|
| 229 |
+
|
| 230 |
+
/**
|
| 231 |
+
* Normalize mel features per-feature with Bessel-corrected variance.
|
| 232 |
+
* @param features Flat array [nMels × T], mel-major layout
|
| 233 |
+
* @param nMels Number of mel bins
|
| 234 |
+
* @param T Number of time frames
|
| 235 |
+
* @returns Normalized features (new array)
|
| 236 |
+
*/
|
| 237 |
+
export function normalizeMelFeatures(features: Float32Array, nMels: number, T: number): Float32Array {
|
| 238 |
+
const out = new Float32Array(features.length);
|
| 239 |
+
|
| 240 |
+
for (let m = 0; m < nMels; m++) {
|
| 241 |
+
const base = m * T;
|
| 242 |
+
|
| 243 |
+
// Copy and compute mean
|
| 244 |
+
let sum = 0;
|
| 245 |
+
for (let t = 0; t < T; t++) {
|
| 246 |
+
out[base + t] = features[base + t];
|
| 247 |
+
sum += features[base + t];
|
| 248 |
+
}
|
| 249 |
+
const mean = sum / T;
|
| 250 |
+
|
| 251 |
+
// Variance
|
| 252 |
+
let varSum = 0;
|
| 253 |
+
for (let t = 0; t < T; t++) {
|
| 254 |
+
const d = out[base + t] - mean;
|
| 255 |
+
varSum += d * d;
|
| 256 |
+
}
|
| 257 |
+
const invStd = T > 1
|
| 258 |
+
? 1.0 / (Math.sqrt(varSum / (T - 1)) + 1e-5)
|
| 259 |
+
: 0;
|
| 260 |
+
|
| 261 |
+
// Normalize
|
| 262 |
+
for (let t = 0; t < T; t++) {
|
| 263 |
+
out[base + t] = (out[base + t] - mean) * invStd;
|
| 264 |
+
}
|
| 265 |
+
}
|
| 266 |
+
|
| 267 |
+
return out;
|
| 268 |
+
}
|
| 269 |
+
|
| 270 |
+
/**
|
| 271 |
+
* Convert sample offset to frame index.
|
| 272 |
+
*/
|
| 273 |
+
export function sampleToFrame(sampleOffset: number): number {
|
| 274 |
+
return Math.floor(sampleOffset / MEL_CONSTANTS.HOP_LENGTH);
|
| 275 |
+
}
|