andito HF Staff Claude Sonnet 4.5 commited on
Commit
5151e54
·
1 Parent(s): 0f739b8

Add microphone selector, audio level meter, and fix Chrome compatibility

Browse files

- Add microphone device selector with auto-selection of default device
- Add real-time audio level meter during recording
- Fix stack overflow error in worker.js for large audio buffers
- Reorganize UI to combine controls in one panel
- Disable echo cancellation/noise suppression for Chrome compatibility
- Add audio resampling from native rate to 16kHz
- Include microphone test page for debugging

Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>

source/index.html ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!doctype html>
2
+ <html lang="en">
3
+ <head>
4
+ <meta charset="UTF-8" />
5
+ <link rel="icon" type="image/svg+xml" href="/vite.svg" />
6
+ <meta name="viewport" content="width=device-width, initial-scale=1.0" />
7
+ <meta name="description" content="Real-time speech recognition with Parakeet STT and WebGPU acceleration. Progressive transcription demo." />
8
+ <title>Parakeet STT Progressive Transcription | WebGPU Demo</title>
9
+ </head>
10
+ <body>
11
+ <div id="root"></div>
12
+ <script type="module" src="/src/main.jsx"></script>
13
+ </body>
14
+ </html>
source/microphone-test.html ADDED
@@ -0,0 +1,238 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html lang="en">
3
+ <head>
4
+ <meta charset="UTF-8">
5
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
6
+ <title>Microphone Test</title>
7
+ <style>
8
+ body {
9
+ font-family: monospace;
10
+ padding: 20px;
11
+ max-width: 800px;
12
+ margin: 0 auto;
13
+ }
14
+ button {
15
+ padding: 10px 20px;
16
+ font-size: 16px;
17
+ margin: 10px 0;
18
+ }
19
+ #log {
20
+ background: #000;
21
+ color: #0f0;
22
+ padding: 10px;
23
+ font-size: 12px;
24
+ height: 400px;
25
+ overflow-y: scroll;
26
+ margin-top: 20px;
27
+ }
28
+ #meter {
29
+ width: 100%;
30
+ height: 40px;
31
+ background: #222;
32
+ margin-top: 10px;
33
+ position: relative;
34
+ }
35
+ #meter-bar {
36
+ height: 100%;
37
+ background: linear-gradient(to right, green, yellow, red);
38
+ width: 0%;
39
+ transition: width 0.05s;
40
+ }
41
+ </style>
42
+ </head>
43
+ <body>
44
+ <h1>Microphone Test</h1>
45
+ <p>This tests if your microphone is working with Web Audio API</p>
46
+
47
+ <label for="deviceSelect">Select Microphone:</label>
48
+ <select id="deviceSelect" style="width: 100%; padding: 8px; margin: 10px 0; background: #222; color: #0f0; border: 1px solid #0f0; font-family: monospace;">
49
+ <option value="">Default Microphone</option>
50
+ </select>
51
+
52
+ <button id="start">Start Microphone Test</button>
53
+ <button id="stop" disabled>Stop Test</button>
54
+
55
+ <div id="meter">
56
+ <div id="meter-bar"></div>
57
+ </div>
58
+
59
+ <div id="log"></div>
60
+
61
+ <script>
62
+ const logEl = document.getElementById('log');
63
+ const meterBar = document.getElementById('meter-bar');
64
+ const startBtn = document.getElementById('start');
65
+ const stopBtn = document.getElementById('stop');
66
+ const deviceSelect = document.getElementById('deviceSelect');
67
+
68
+ let audioContext = null;
69
+ let source = null;
70
+ let analyser = null;
71
+ let processor = null;
72
+ let animationId = null;
73
+
74
+ // Enumerate devices on page load
75
+ async function loadDevices() {
76
+ try {
77
+ const devices = await navigator.mediaDevices.enumerateDevices();
78
+ const audioInputs = devices.filter(d => d.kind === 'audioinput');
79
+
80
+ deviceSelect.innerHTML = '<option value="">Default Microphone</option>';
81
+ audioInputs.forEach(device => {
82
+ const option = document.createElement('option');
83
+ option.value = device.deviceId;
84
+ option.textContent = device.label || `Microphone ${device.deviceId.slice(0, 8)}...`;
85
+ deviceSelect.appendChild(option);
86
+ });
87
+
88
+ console.log('Available devices:', audioInputs);
89
+ } catch (error) {
90
+ console.error('Failed to enumerate devices:', error);
91
+ }
92
+ }
93
+
94
+ loadDevices();
95
+
96
+ function log(msg) {
97
+ const line = document.createElement('div');
98
+ line.textContent = `[${new Date().toLocaleTimeString()}] ${msg}`;
99
+ logEl.appendChild(line);
100
+ logEl.scrollTop = logEl.scrollHeight;
101
+ console.log(msg);
102
+ }
103
+
104
+ async function startTest() {
105
+ try {
106
+ const selectedDeviceId = deviceSelect.value;
107
+ log(`Requesting microphone access... ${selectedDeviceId ? `(Device: ${deviceSelect.options[deviceSelect.selectedIndex].text})` : '(Default)'}`);
108
+
109
+ const audioConstraints = {
110
+ channelCount: 1,
111
+ echoCancellation: false,
112
+ noiseSuppression: false,
113
+ autoGainControl: false,
114
+ };
115
+
116
+ if (selectedDeviceId) {
117
+ audioConstraints.deviceId = { exact: selectedDeviceId };
118
+ }
119
+
120
+ const stream = await navigator.mediaDevices.getUserMedia({
121
+ audio: audioConstraints
122
+ });
123
+
124
+ log('✓ Microphone access granted');
125
+
126
+ // Refresh device list now that we have permission
127
+ await loadDevices();
128
+
129
+ const tracks = stream.getAudioTracks();
130
+ log(`Stream has ${tracks.length} audio tracks`);
131
+ if (tracks.length > 0) {
132
+ const track = tracks[0];
133
+ const settings = track.getSettings();
134
+ log(`Track: ${track.label}`);
135
+ log(`Settings: ${JSON.stringify(settings, null, 2)}`);
136
+ log(`Enabled: ${track.enabled}, Muted: ${track.muted}, State: ${track.readyState}`);
137
+ }
138
+
139
+ audioContext = new AudioContext();
140
+ log(`AudioContext created: ${audioContext.sampleRate}Hz, state: ${audioContext.state}`);
141
+
142
+ if (audioContext.state === 'suspended') {
143
+ await audioContext.resume();
144
+ log(`AudioContext resumed to: ${audioContext.state}`);
145
+ }
146
+
147
+ source = audioContext.createMediaStreamSource(stream);
148
+ log('MediaStreamSource created');
149
+
150
+ // Test 1: AnalyserNode
151
+ analyser = audioContext.createAnalyser();
152
+ analyser.fftSize = 2048;
153
+ source.connect(analyser);
154
+ log('AnalyserNode connected');
155
+
156
+ const dataArray = new Uint8Array(analyser.frequencyBinCount);
157
+
158
+ function checkAnalyser() {
159
+ analyser.getByteTimeDomainData(dataArray);
160
+ let sum = 0;
161
+ let max = 0;
162
+ for (let i = 0; i < dataArray.length; i++) {
163
+ const val = Math.abs(dataArray[i] - 128);
164
+ sum += val;
165
+ max = Math.max(max, val);
166
+ }
167
+ const avg = sum / dataArray.length;
168
+ const percent = (max / 128) * 100;
169
+ meterBar.style.width = percent + '%';
170
+
171
+ log(`Analyser - Avg: ${avg.toFixed(2)}, Max: ${max}, Samples: ${dataArray.length}`);
172
+
173
+ if (avg < 0.1) {
174
+ log('⚠️ WARNING: Audio level is 0 or very quiet!');
175
+ }
176
+ }
177
+
178
+ setTimeout(checkAnalyser, 500);
179
+ setTimeout(checkAnalyser, 1000);
180
+ setTimeout(checkAnalyser, 2000);
181
+
182
+ // Test 2: ScriptProcessorNode
183
+ processor = audioContext.createScriptProcessor(4096, 1, 1);
184
+ source.connect(processor);
185
+ processor.connect(audioContext.destination);
186
+
187
+ let chunkCount = 0;
188
+ processor.onaudioprocess = (event) => {
189
+ const inputData = event.inputBuffer.getChannelData(0);
190
+ const max = Math.max(...Array.from(inputData).map(Math.abs));
191
+ const avg = Array.from(inputData).reduce((sum, val) => sum + Math.abs(val), 0) / inputData.length;
192
+
193
+ chunkCount++;
194
+ if (chunkCount % 10 === 0) {
195
+ log(`ScriptProcessor - Chunk ${chunkCount}, Avg: ${avg.toFixed(6)}, Max: ${max.toFixed(6)}, Length: ${inputData.length}`);
196
+
197
+ if (max < 0.0001) {
198
+ log('⚠️ WARNING: ScriptProcessor getting all zeros!');
199
+ }
200
+ }
201
+
202
+ // Update meter
203
+ const percent = (max * 100);
204
+ meterBar.style.width = Math.min(100, percent) + '%';
205
+ };
206
+
207
+ log('ScriptProcessorNode connected and listening...');
208
+ log('✓ Test running - speak into your microphone!');
209
+
210
+ startBtn.disabled = true;
211
+ stopBtn.disabled = false;
212
+ deviceSelect.disabled = true;
213
+
214
+ } catch (error) {
215
+ log(`❌ ERROR: ${error.message}`);
216
+ console.error(error);
217
+ }
218
+ }
219
+
220
+ function stopTest() {
221
+ if (processor) processor.disconnect();
222
+ if (analyser) analyser.disconnect();
223
+ if (source) source.disconnect();
224
+ if (audioContext) audioContext.close();
225
+ if (animationId) cancelAnimationFrame(animationId);
226
+
227
+ log('Test stopped');
228
+ startBtn.disabled = false;
229
+ stopBtn.disabled = true;
230
+ deviceSelect.disabled = false;
231
+ meterBar.style.width = '0%';
232
+ }
233
+
234
+ startBtn.addEventListener('click', startTest);
235
+ stopBtn.addEventListener('click', stopTest);
236
+ </script>
237
+ </body>
238
+ </html>
source/src/App.jsx CHANGED
@@ -19,11 +19,16 @@ function App() {
19
  const [modelMessage, setModelMessage] = useState('');
20
  const [device, setDevice] = useState(null);
21
 
 
 
 
 
22
  // Recording state
23
  const [isRecording, setIsRecording] = useState(false);
24
  const [fixedText, setFixedText] = useState('');
25
  const [activeText, setActiveText] = useState('');
26
  const [timestamp, setTimestamp] = useState(0);
 
27
 
28
  // Performance metrics
29
  const [latency, setLatency] = useState(null);
@@ -38,6 +43,29 @@ function App() {
38
  const streamingHandlerRef = useRef(null);
39
  const progressiveIntervalRef = useRef(null);
40
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
41
  // Initialize worker
42
  useEffect(() => {
43
  workerRef.current = new Worker(WorkerUrl, { type: 'module' });
@@ -138,12 +166,21 @@ function App() {
138
  // Start recording with callback for audio chunks
139
  recorderRef.current = new AudioRecorder((audioChunk) => {
140
  // Append PCM audio chunk directly (Float32Array)
141
- console.log('Audio chunk received:', audioChunk.length, 'samples (~' + (audioChunk.length / 16000 * 1000).toFixed(0) + 'ms)');
 
 
 
 
 
 
 
 
 
142
  audioProcessorRef.current.appendChunk(audioChunk);
143
  console.log('Total buffer:', audioProcessorRef.current.getBuffer().length, 'samples');
144
  });
145
 
146
- await recorderRef.current.start();
147
  setIsRecording(true);
148
 
149
  // Start progressive transcription updates
@@ -246,12 +283,46 @@ function App() {
246
 
247
  {/* Main Content */}
248
  <main className="max-w-6xl mx-auto px-6 py-8 space-y-8">
249
- {/* Model Status */}
250
  <div className="bg-gray-900 rounded-lg border border-gray-700 p-6">
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
251
  <div className="flex items-center justify-between">
252
  <div>
253
- <h2 className="text-lg font-semibold mb-2">Model Status</h2>
254
- <p className="text-sm text-gray-400">{modelMessage || 'Ready to load model'}</p>
255
  </div>
256
  <div>
257
  {modelStatus === 'not_loaded' && (
 
19
  const [modelMessage, setModelMessage] = useState('');
20
  const [device, setDevice] = useState(null);
21
 
22
+ // Microphone device selection
23
+ const [audioDevices, setAudioDevices] = useState([]);
24
+ const [selectedDeviceId, setSelectedDeviceId] = useState(null);
25
+
26
  // Recording state
27
  const [isRecording, setIsRecording] = useState(false);
28
  const [fixedText, setFixedText] = useState('');
29
  const [activeText, setActiveText] = useState('');
30
  const [timestamp, setTimestamp] = useState(0);
31
+ const [audioLevel, setAudioLevel] = useState(0);
32
 
33
  // Performance metrics
34
  const [latency, setLatency] = useState(null);
 
43
  const streamingHandlerRef = useRef(null);
44
  const progressiveIntervalRef = useRef(null);
45
 
46
+ // Enumerate audio input devices
47
+ useEffect(() => {
48
+ async function getDevices() {
49
+ try {
50
+ const devices = await navigator.mediaDevices.enumerateDevices();
51
+ const audioInputs = devices.filter(device => device.kind === 'audioinput');
52
+ setAudioDevices(audioInputs);
53
+
54
+ // Auto-select the default device (first one with "default" in deviceId)
55
+ const defaultDevice = audioInputs.find(d => d.deviceId === 'default');
56
+ if (defaultDevice && !selectedDeviceId) {
57
+ setSelectedDeviceId(defaultDevice.deviceId);
58
+ console.log('[App] Auto-selected default device:', defaultDevice.label);
59
+ }
60
+
61
+ console.log('[App] Available audio devices:', audioInputs.map(d => `${d.label || 'Unnamed'} (${d.deviceId.slice(0, 8)}...)`));
62
+ } catch (error) {
63
+ console.error('[App] Failed to enumerate devices:', error);
64
+ }
65
+ }
66
+ getDevices();
67
+ }, []);
68
+
69
  // Initialize worker
70
  useEffect(() => {
71
  workerRef.current = new Worker(WorkerUrl, { type: 'module' });
 
166
  // Start recording with callback for audio chunks
167
  recorderRef.current = new AudioRecorder((audioChunk) => {
168
  // Append PCM audio chunk directly (Float32Array)
169
+ const maxAmp = Math.max(...Array.from(audioChunk).map(Math.abs));
170
+ console.log('Audio chunk received:', audioChunk.length, 'samples (~' + (audioChunk.length / 16000 * 1000).toFixed(0) + 'ms), max amplitude:', maxAmp.toFixed(4));
171
+
172
+ // Update audio level meter (scale to 0-100%)
173
+ setAudioLevel(Math.min(100, maxAmp * 300)); // Scale up for visibility
174
+
175
+ if (maxAmp < 0.001) {
176
+ console.warn('⚠️ Very quiet audio - if using AirPods, they may need time to activate. Try speaking louder or tapping the mic.');
177
+ }
178
+
179
  audioProcessorRef.current.appendChunk(audioChunk);
180
  console.log('Total buffer:', audioProcessorRef.current.getBuffer().length, 'samples');
181
  });
182
 
183
+ await recorderRef.current.start(selectedDeviceId);
184
  setIsRecording(true);
185
 
186
  // Start progressive transcription updates
 
283
 
284
  {/* Main Content */}
285
  <main className="max-w-6xl mx-auto px-6 py-8 space-y-8">
286
+ {/* Controls */}
287
  <div className="bg-gray-900 rounded-lg border border-gray-700 p-6">
288
+ <h2 className="text-lg font-semibold mb-4">Controls</h2>
289
+
290
+ {/* Microphone Selection */}
291
+ <div className="mb-4">
292
+ <label className="block text-sm font-medium text-gray-400 mb-2">Microphone</label>
293
+ <select
294
+ value={selectedDeviceId || ''}
295
+ onChange={(e) => setSelectedDeviceId(e.target.value)}
296
+ className="w-full bg-gray-800 border border-gray-600 rounded px-4 py-2 text-white"
297
+ disabled={isRecording}
298
+ >
299
+ {audioDevices.length === 0 && <option value="">Loading devices...</option>}
300
+ {audioDevices.map((device) => (
301
+ <option key={device.deviceId} value={device.deviceId}>
302
+ {device.label || `Microphone ${device.deviceId.slice(0, 8)}...`}
303
+ </option>
304
+ ))}
305
+ </select>
306
+ </div>
307
+
308
+ {/* Audio Level Meter */}
309
+ {isRecording && (
310
+ <div className="mb-4">
311
+ <label className="block text-sm font-medium text-gray-400 mb-2">Audio Level</label>
312
+ <div className="w-full h-3 bg-gray-800 rounded-full overflow-hidden">
313
+ <div
314
+ className="h-full bg-gradient-to-r from-green-500 via-yellow-500 to-red-500 transition-all duration-75"
315
+ style={{ width: `${audioLevel}%` }}
316
+ ></div>
317
+ </div>
318
+ </div>
319
+ )}
320
+
321
+ {/* Model Status and Actions */}
322
  <div className="flex items-center justify-between">
323
  <div>
324
+ <h3 className="text-sm font-medium text-gray-400">Model Status</h3>
325
+ <p className="text-sm text-gray-300 mt-1">{modelMessage || 'Ready to load model'}</p>
326
  </div>
327
  <div>
328
  {modelStatus === 'not_loaded' && (
source/src/utils/audio.js CHANGED
@@ -17,29 +17,78 @@ export class AudioRecorder {
17
  this.audioChunks = [];
18
  }
19
 
20
- async start() {
21
  /**
22
  * Start recording audio from microphone using Web Audio API
 
23
  */
24
  try {
25
  // Request microphone access
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
  this.stream = await navigator.mediaDevices.getUserMedia({
27
- audio: {
28
- channelCount: 1,
29
- sampleRate: WHISPER_SAMPLING_RATE,
30
- echoCancellation: true,
31
- noiseSuppression: true,
32
- }
33
  });
34
 
35
- // Create AudioContext with 16kHz sample rate
36
- this.audioContext = new AudioContext({ sampleRate: WHISPER_SAMPLING_RATE });
 
 
 
 
 
 
 
 
 
37
 
38
  // Create source from stream
39
  this.source = this.audioContext.createMediaStreamSource(this.stream);
40
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
41
  // Create ScriptProcessorNode (deprecated but works everywhere)
42
- // 4096 samples = ~256ms at 16kHz
43
  const bufferSize = 4096;
44
  this.processor = this.audioContext.createScriptProcessor(bufferSize, 1, 1);
45
 
@@ -47,13 +96,20 @@ export class AudioRecorder {
47
  if (!this.isRecording) return;
48
 
49
  const inputData = event.inputBuffer.getChannelData(0);
50
- // Copy the data (important! buffer is reused)
51
- const audioChunk = new Float32Array(inputData);
52
 
53
- this.audioChunks.push(audioChunk);
 
 
 
 
 
 
 
 
 
54
 
55
  if (this.onDataAvailable) {
56
- this.onDataAvailable(audioChunk);
57
  }
58
  };
59
 
@@ -71,6 +127,32 @@ export class AudioRecorder {
71
  }
72
  }
73
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
74
  requestData() {
75
  /**
76
  * No-op for ScriptProcessor (data comes automatically)
 
17
  this.audioChunks = [];
18
  }
19
 
20
+ async start(deviceId = null) {
21
  /**
22
  * Start recording audio from microphone using Web Audio API
23
+ * @param {string} deviceId - Optional specific device ID to use
24
  */
25
  try {
26
  // Request microphone access
27
+ // Note: Disable echo cancellation and noise suppression in Chrome
28
+ // as they can conflict with cross-origin isolation headers
29
+ // Chrome + AirPods known issue: AirPods may send silence initially due to power saving.
30
+ // Recommendation: Use built-in/wired microphone for best results in Chrome.
31
+ const audioConstraints = {
32
+ channelCount: 1,
33
+ echoCancellation: false,
34
+ noiseSuppression: false,
35
+ autoGainControl: false,
36
+ };
37
+
38
+ // If specific device requested, add deviceId constraint
39
+ if (deviceId) {
40
+ audioConstraints.deviceId = { exact: deviceId };
41
+ }
42
+
43
  this.stream = await navigator.mediaDevices.getUserMedia({
44
+ audio: audioConstraints
 
 
 
 
 
45
  });
46
 
47
+ // Create AudioContext at native sample rate (browser will choose optimal rate)
48
+ this.audioContext = new AudioContext();
49
+ const nativeSampleRate = this.audioContext.sampleRate;
50
+ console.log(`[Audio] Native sample rate: ${nativeSampleRate}Hz, target: ${WHISPER_SAMPLING_RATE}Hz`);
51
+ console.log(`[Audio] AudioContext state: ${this.audioContext.state}`);
52
+
53
+ // Resume AudioContext if suspended (required by some browsers)
54
+ if (this.audioContext.state === 'suspended') {
55
+ await this.audioContext.resume();
56
+ console.log(`[Audio] AudioContext resumed to: ${this.audioContext.state}`);
57
+ }
58
 
59
  // Create source from stream
60
  this.source = this.audioContext.createMediaStreamSource(this.stream);
61
 
62
+ // Debug: Check stream and track status
63
+ const audioTracks = this.stream.getAudioTracks();
64
+ console.log(`[Audio] MediaStreamSource created, stream active: ${this.stream.active}, tracks: ${audioTracks.length}`);
65
+ if (audioTracks.length > 0) {
66
+ const track = audioTracks[0];
67
+ console.log(`[Audio] Track settings:`, track.getSettings());
68
+ console.log(`[Audio] Track enabled: ${track.enabled}, muted: ${track.muted}, readyState: ${track.readyState}`);
69
+ }
70
+
71
+ // Create AnalyserNode to verify microphone input
72
+ const analyser = this.audioContext.createAnalyser();
73
+ analyser.fftSize = 2048;
74
+ const bufferLength = analyser.frequencyBinCount;
75
+ const dataArray = new Uint8Array(bufferLength);
76
+
77
+ this.source.connect(analyser);
78
+
79
+ // Test microphone input with analyser
80
+ setTimeout(() => {
81
+ analyser.getByteTimeDomainData(dataArray);
82
+ let sum = 0;
83
+ for (let i = 0; i < bufferLength; i++) {
84
+ sum += Math.abs(dataArray[i] - 128);
85
+ }
86
+ const avgLevel = sum / bufferLength;
87
+ console.log(`[Audio] Analyser test - Average level: ${avgLevel.toFixed(2)} (should be >0 if mic working)`);
88
+ }, 500);
89
+
90
  // Create ScriptProcessorNode (deprecated but works everywhere)
91
+ // Use larger buffer at native rate
92
  const bufferSize = 4096;
93
  this.processor = this.audioContext.createScriptProcessor(bufferSize, 1, 1);
94
 
 
96
  if (!this.isRecording) return;
97
 
98
  const inputData = event.inputBuffer.getChannelData(0);
 
 
99
 
100
+ // Debug: Check raw input levels BEFORE resampling
101
+ const rawMax = Math.max(...Array.from(inputData).map(Math.abs));
102
+ if (this.audioChunks.length % 10 === 0) { // Log every 10th chunk
103
+ console.log(`[Audio] Raw input max amplitude: ${rawMax.toFixed(4)} (${inputData.length} samples at ${nativeSampleRate}Hz)`);
104
+ }
105
+
106
+ // Resample from native rate to 16kHz
107
+ const resampled = this.resample(inputData, nativeSampleRate, WHISPER_SAMPLING_RATE);
108
+
109
+ this.audioChunks.push(resampled);
110
 
111
  if (this.onDataAvailable) {
112
+ this.onDataAvailable(resampled);
113
  }
114
  };
115
 
 
127
  }
128
  }
129
 
130
+ resample(audioData, sourceSampleRate, targetSampleRate) {
131
+ /**
132
+ * Simple linear interpolation resampler
133
+ * Converts audio from sourceSampleRate to targetSampleRate
134
+ */
135
+ if (sourceSampleRate === targetSampleRate) {
136
+ return new Float32Array(audioData);
137
+ }
138
+
139
+ const ratio = sourceSampleRate / targetSampleRate;
140
+ const newLength = Math.round(audioData.length / ratio);
141
+ const result = new Float32Array(newLength);
142
+
143
+ for (let i = 0; i < newLength; i++) {
144
+ const srcIndex = i * ratio;
145
+ const srcIndexFloor = Math.floor(srcIndex);
146
+ const srcIndexCeil = Math.min(srcIndexFloor + 1, audioData.length - 1);
147
+ const t = srcIndex - srcIndexFloor;
148
+
149
+ // Linear interpolation
150
+ result[i] = audioData[srcIndexFloor] * (1 - t) + audioData[srcIndexCeil] * t;
151
+ }
152
+
153
+ return result;
154
+ }
155
+
156
  requestData() {
157
  /**
158
  * No-op for ScriptProcessor (data comes automatically)
source/src/worker.js CHANGED
@@ -83,6 +83,21 @@ async function transcribe(audio, language = null) {
83
  try {
84
  const startTime = performance.now();
85
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
86
  // Transcribe with parakeet.js
87
  const result = await model.transcribe(audio, 16000, {
88
  returnTimestamps: true, // Get word-level timestamps
@@ -95,6 +110,10 @@ async function transcribe(audio, language = null) {
95
  const audioDuration = audio.length / 16000;
96
  const rtf = latency / audioDuration; // Real-time factor
97
 
 
 
 
 
98
  // Convert parakeet.js word format to our sentence format
99
  console.log('[Worker] Parakeet words:', result.words?.length || 0, 'words');
100
  if (result.words && result.words.length > 0) {
 
83
  try {
84
  const startTime = performance.now();
85
 
86
+ // Debug: Check audio levels (use reduce to avoid stack overflow with large arrays)
87
+ let maxAmplitude = 0;
88
+ let avgAmplitude = 0;
89
+ for (let i = 0; i < audio.length; i++) {
90
+ const abs = Math.abs(audio[i]);
91
+ if (abs > maxAmplitude) maxAmplitude = abs;
92
+ avgAmplitude += abs;
93
+ }
94
+ avgAmplitude /= audio.length;
95
+ console.log('[Worker] Audio stats - Max:', maxAmplitude.toFixed(4), 'Avg:', avgAmplitude.toFixed(4), 'Length:', audio.length);
96
+
97
+ if (maxAmplitude < 0.01) {
98
+ console.warn('[Worker] WARNING: Audio is very quiet! Microphone may not be working.');
99
+ }
100
+
101
  // Transcribe with parakeet.js
102
  const result = await model.transcribe(audio, 16000, {
103
  returnTimestamps: true, // Get word-level timestamps
 
110
  const audioDuration = audio.length / 16000;
111
  const rtf = latency / audioDuration; // Real-time factor
112
 
113
+ // Debug: log full result to see what parakeet.js returns
114
+ console.log('[Worker] Full parakeet result:', result);
115
+ console.log('[Worker] utterance_text:', result.utterance_text);
116
+
117
  // Convert parakeet.js word format to our sentence format
118
  console.log('[Worker] Parakeet words:', result.words?.length || 0, 'words');
119
  if (result.words && result.words.length > 0) {