Add microphone selector, audio level meter, and fix Chrome compatibility
Browse files- Add microphone device selector with auto-selection of default device
- Add real-time audio level meter during recording
- Fix stack overflow error in worker.js for large audio buffers
- Reorganize UI to combine controls in one panel
- Disable echo cancellation/noise suppression for Chrome compatibility
- Add audio resampling from native rate to 16kHz
- Include microphone test page for debugging
Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
- source/index.html +14 -0
- source/microphone-test.html +238 -0
- source/src/App.jsx +76 -5
- source/src/utils/audio.js +96 -14
- source/src/worker.js +19 -0
source/index.html
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<!doctype html>
|
| 2 |
+
<html lang="en">
|
| 3 |
+
<head>
|
| 4 |
+
<meta charset="UTF-8" />
|
| 5 |
+
<link rel="icon" type="image/svg+xml" href="/vite.svg" />
|
| 6 |
+
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
| 7 |
+
<meta name="description" content="Real-time speech recognition with Parakeet STT and WebGPU acceleration. Progressive transcription demo." />
|
| 8 |
+
<title>Parakeet STT Progressive Transcription | WebGPU Demo</title>
|
| 9 |
+
</head>
|
| 10 |
+
<body>
|
| 11 |
+
<div id="root"></div>
|
| 12 |
+
<script type="module" src="/src/main.jsx"></script>
|
| 13 |
+
</body>
|
| 14 |
+
</html>
|
source/microphone-test.html
ADDED
|
@@ -0,0 +1,238 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<!DOCTYPE html>
|
| 2 |
+
<html lang="en">
|
| 3 |
+
<head>
|
| 4 |
+
<meta charset="UTF-8">
|
| 5 |
+
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
| 6 |
+
<title>Microphone Test</title>
|
| 7 |
+
<style>
|
| 8 |
+
body {
|
| 9 |
+
font-family: monospace;
|
| 10 |
+
padding: 20px;
|
| 11 |
+
max-width: 800px;
|
| 12 |
+
margin: 0 auto;
|
| 13 |
+
}
|
| 14 |
+
button {
|
| 15 |
+
padding: 10px 20px;
|
| 16 |
+
font-size: 16px;
|
| 17 |
+
margin: 10px 0;
|
| 18 |
+
}
|
| 19 |
+
#log {
|
| 20 |
+
background: #000;
|
| 21 |
+
color: #0f0;
|
| 22 |
+
padding: 10px;
|
| 23 |
+
font-size: 12px;
|
| 24 |
+
height: 400px;
|
| 25 |
+
overflow-y: scroll;
|
| 26 |
+
margin-top: 20px;
|
| 27 |
+
}
|
| 28 |
+
#meter {
|
| 29 |
+
width: 100%;
|
| 30 |
+
height: 40px;
|
| 31 |
+
background: #222;
|
| 32 |
+
margin-top: 10px;
|
| 33 |
+
position: relative;
|
| 34 |
+
}
|
| 35 |
+
#meter-bar {
|
| 36 |
+
height: 100%;
|
| 37 |
+
background: linear-gradient(to right, green, yellow, red);
|
| 38 |
+
width: 0%;
|
| 39 |
+
transition: width 0.05s;
|
| 40 |
+
}
|
| 41 |
+
</style>
|
| 42 |
+
</head>
|
| 43 |
+
<body>
|
| 44 |
+
<h1>Microphone Test</h1>
|
| 45 |
+
<p>This tests if your microphone is working with Web Audio API</p>
|
| 46 |
+
|
| 47 |
+
<label for="deviceSelect">Select Microphone:</label>
|
| 48 |
+
<select id="deviceSelect" style="width: 100%; padding: 8px; margin: 10px 0; background: #222; color: #0f0; border: 1px solid #0f0; font-family: monospace;">
|
| 49 |
+
<option value="">Default Microphone</option>
|
| 50 |
+
</select>
|
| 51 |
+
|
| 52 |
+
<button id="start">Start Microphone Test</button>
|
| 53 |
+
<button id="stop" disabled>Stop Test</button>
|
| 54 |
+
|
| 55 |
+
<div id="meter">
|
| 56 |
+
<div id="meter-bar"></div>
|
| 57 |
+
</div>
|
| 58 |
+
|
| 59 |
+
<div id="log"></div>
|
| 60 |
+
|
| 61 |
+
<script>
|
| 62 |
+
const logEl = document.getElementById('log');
|
| 63 |
+
const meterBar = document.getElementById('meter-bar');
|
| 64 |
+
const startBtn = document.getElementById('start');
|
| 65 |
+
const stopBtn = document.getElementById('stop');
|
| 66 |
+
const deviceSelect = document.getElementById('deviceSelect');
|
| 67 |
+
|
| 68 |
+
let audioContext = null;
|
| 69 |
+
let source = null;
|
| 70 |
+
let analyser = null;
|
| 71 |
+
let processor = null;
|
| 72 |
+
let animationId = null;
|
| 73 |
+
|
| 74 |
+
// Enumerate devices on page load
|
| 75 |
+
async function loadDevices() {
|
| 76 |
+
try {
|
| 77 |
+
const devices = await navigator.mediaDevices.enumerateDevices();
|
| 78 |
+
const audioInputs = devices.filter(d => d.kind === 'audioinput');
|
| 79 |
+
|
| 80 |
+
deviceSelect.innerHTML = '<option value="">Default Microphone</option>';
|
| 81 |
+
audioInputs.forEach(device => {
|
| 82 |
+
const option = document.createElement('option');
|
| 83 |
+
option.value = device.deviceId;
|
| 84 |
+
option.textContent = device.label || `Microphone ${device.deviceId.slice(0, 8)}...`;
|
| 85 |
+
deviceSelect.appendChild(option);
|
| 86 |
+
});
|
| 87 |
+
|
| 88 |
+
console.log('Available devices:', audioInputs);
|
| 89 |
+
} catch (error) {
|
| 90 |
+
console.error('Failed to enumerate devices:', error);
|
| 91 |
+
}
|
| 92 |
+
}
|
| 93 |
+
|
| 94 |
+
loadDevices();
|
| 95 |
+
|
| 96 |
+
function log(msg) {
|
| 97 |
+
const line = document.createElement('div');
|
| 98 |
+
line.textContent = `[${new Date().toLocaleTimeString()}] ${msg}`;
|
| 99 |
+
logEl.appendChild(line);
|
| 100 |
+
logEl.scrollTop = logEl.scrollHeight;
|
| 101 |
+
console.log(msg);
|
| 102 |
+
}
|
| 103 |
+
|
| 104 |
+
async function startTest() {
|
| 105 |
+
try {
|
| 106 |
+
const selectedDeviceId = deviceSelect.value;
|
| 107 |
+
log(`Requesting microphone access... ${selectedDeviceId ? `(Device: ${deviceSelect.options[deviceSelect.selectedIndex].text})` : '(Default)'}`);
|
| 108 |
+
|
| 109 |
+
const audioConstraints = {
|
| 110 |
+
channelCount: 1,
|
| 111 |
+
echoCancellation: false,
|
| 112 |
+
noiseSuppression: false,
|
| 113 |
+
autoGainControl: false,
|
| 114 |
+
};
|
| 115 |
+
|
| 116 |
+
if (selectedDeviceId) {
|
| 117 |
+
audioConstraints.deviceId = { exact: selectedDeviceId };
|
| 118 |
+
}
|
| 119 |
+
|
| 120 |
+
const stream = await navigator.mediaDevices.getUserMedia({
|
| 121 |
+
audio: audioConstraints
|
| 122 |
+
});
|
| 123 |
+
|
| 124 |
+
log('✓ Microphone access granted');
|
| 125 |
+
|
| 126 |
+
// Refresh device list now that we have permission
|
| 127 |
+
await loadDevices();
|
| 128 |
+
|
| 129 |
+
const tracks = stream.getAudioTracks();
|
| 130 |
+
log(`Stream has ${tracks.length} audio tracks`);
|
| 131 |
+
if (tracks.length > 0) {
|
| 132 |
+
const track = tracks[0];
|
| 133 |
+
const settings = track.getSettings();
|
| 134 |
+
log(`Track: ${track.label}`);
|
| 135 |
+
log(`Settings: ${JSON.stringify(settings, null, 2)}`);
|
| 136 |
+
log(`Enabled: ${track.enabled}, Muted: ${track.muted}, State: ${track.readyState}`);
|
| 137 |
+
}
|
| 138 |
+
|
| 139 |
+
audioContext = new AudioContext();
|
| 140 |
+
log(`AudioContext created: ${audioContext.sampleRate}Hz, state: ${audioContext.state}`);
|
| 141 |
+
|
| 142 |
+
if (audioContext.state === 'suspended') {
|
| 143 |
+
await audioContext.resume();
|
| 144 |
+
log(`AudioContext resumed to: ${audioContext.state}`);
|
| 145 |
+
}
|
| 146 |
+
|
| 147 |
+
source = audioContext.createMediaStreamSource(stream);
|
| 148 |
+
log('MediaStreamSource created');
|
| 149 |
+
|
| 150 |
+
// Test 1: AnalyserNode
|
| 151 |
+
analyser = audioContext.createAnalyser();
|
| 152 |
+
analyser.fftSize = 2048;
|
| 153 |
+
source.connect(analyser);
|
| 154 |
+
log('AnalyserNode connected');
|
| 155 |
+
|
| 156 |
+
const dataArray = new Uint8Array(analyser.frequencyBinCount);
|
| 157 |
+
|
| 158 |
+
function checkAnalyser() {
|
| 159 |
+
analyser.getByteTimeDomainData(dataArray);
|
| 160 |
+
let sum = 0;
|
| 161 |
+
let max = 0;
|
| 162 |
+
for (let i = 0; i < dataArray.length; i++) {
|
| 163 |
+
const val = Math.abs(dataArray[i] - 128);
|
| 164 |
+
sum += val;
|
| 165 |
+
max = Math.max(max, val);
|
| 166 |
+
}
|
| 167 |
+
const avg = sum / dataArray.length;
|
| 168 |
+
const percent = (max / 128) * 100;
|
| 169 |
+
meterBar.style.width = percent + '%';
|
| 170 |
+
|
| 171 |
+
log(`Analyser - Avg: ${avg.toFixed(2)}, Max: ${max}, Samples: ${dataArray.length}`);
|
| 172 |
+
|
| 173 |
+
if (avg < 0.1) {
|
| 174 |
+
log('⚠️ WARNING: Audio level is 0 or very quiet!');
|
| 175 |
+
}
|
| 176 |
+
}
|
| 177 |
+
|
| 178 |
+
setTimeout(checkAnalyser, 500);
|
| 179 |
+
setTimeout(checkAnalyser, 1000);
|
| 180 |
+
setTimeout(checkAnalyser, 2000);
|
| 181 |
+
|
| 182 |
+
// Test 2: ScriptProcessorNode
|
| 183 |
+
processor = audioContext.createScriptProcessor(4096, 1, 1);
|
| 184 |
+
source.connect(processor);
|
| 185 |
+
processor.connect(audioContext.destination);
|
| 186 |
+
|
| 187 |
+
let chunkCount = 0;
|
| 188 |
+
processor.onaudioprocess = (event) => {
|
| 189 |
+
const inputData = event.inputBuffer.getChannelData(0);
|
| 190 |
+
const max = Math.max(...Array.from(inputData).map(Math.abs));
|
| 191 |
+
const avg = Array.from(inputData).reduce((sum, val) => sum + Math.abs(val), 0) / inputData.length;
|
| 192 |
+
|
| 193 |
+
chunkCount++;
|
| 194 |
+
if (chunkCount % 10 === 0) {
|
| 195 |
+
log(`ScriptProcessor - Chunk ${chunkCount}, Avg: ${avg.toFixed(6)}, Max: ${max.toFixed(6)}, Length: ${inputData.length}`);
|
| 196 |
+
|
| 197 |
+
if (max < 0.0001) {
|
| 198 |
+
log('⚠️ WARNING: ScriptProcessor getting all zeros!');
|
| 199 |
+
}
|
| 200 |
+
}
|
| 201 |
+
|
| 202 |
+
// Update meter
|
| 203 |
+
const percent = (max * 100);
|
| 204 |
+
meterBar.style.width = Math.min(100, percent) + '%';
|
| 205 |
+
};
|
| 206 |
+
|
| 207 |
+
log('ScriptProcessorNode connected and listening...');
|
| 208 |
+
log('✓ Test running - speak into your microphone!');
|
| 209 |
+
|
| 210 |
+
startBtn.disabled = true;
|
| 211 |
+
stopBtn.disabled = false;
|
| 212 |
+
deviceSelect.disabled = true;
|
| 213 |
+
|
| 214 |
+
} catch (error) {
|
| 215 |
+
log(`❌ ERROR: ${error.message}`);
|
| 216 |
+
console.error(error);
|
| 217 |
+
}
|
| 218 |
+
}
|
| 219 |
+
|
| 220 |
+
function stopTest() {
|
| 221 |
+
if (processor) processor.disconnect();
|
| 222 |
+
if (analyser) analyser.disconnect();
|
| 223 |
+
if (source) source.disconnect();
|
| 224 |
+
if (audioContext) audioContext.close();
|
| 225 |
+
if (animationId) cancelAnimationFrame(animationId);
|
| 226 |
+
|
| 227 |
+
log('Test stopped');
|
| 228 |
+
startBtn.disabled = false;
|
| 229 |
+
stopBtn.disabled = true;
|
| 230 |
+
deviceSelect.disabled = false;
|
| 231 |
+
meterBar.style.width = '0%';
|
| 232 |
+
}
|
| 233 |
+
|
| 234 |
+
startBtn.addEventListener('click', startTest);
|
| 235 |
+
stopBtn.addEventListener('click', stopTest);
|
| 236 |
+
</script>
|
| 237 |
+
</body>
|
| 238 |
+
</html>
|
source/src/App.jsx
CHANGED
|
@@ -19,11 +19,16 @@ function App() {
|
|
| 19 |
const [modelMessage, setModelMessage] = useState('');
|
| 20 |
const [device, setDevice] = useState(null);
|
| 21 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 22 |
// Recording state
|
| 23 |
const [isRecording, setIsRecording] = useState(false);
|
| 24 |
const [fixedText, setFixedText] = useState('');
|
| 25 |
const [activeText, setActiveText] = useState('');
|
| 26 |
const [timestamp, setTimestamp] = useState(0);
|
|
|
|
| 27 |
|
| 28 |
// Performance metrics
|
| 29 |
const [latency, setLatency] = useState(null);
|
|
@@ -38,6 +43,29 @@ function App() {
|
|
| 38 |
const streamingHandlerRef = useRef(null);
|
| 39 |
const progressiveIntervalRef = useRef(null);
|
| 40 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 41 |
// Initialize worker
|
| 42 |
useEffect(() => {
|
| 43 |
workerRef.current = new Worker(WorkerUrl, { type: 'module' });
|
|
@@ -138,12 +166,21 @@ function App() {
|
|
| 138 |
// Start recording with callback for audio chunks
|
| 139 |
recorderRef.current = new AudioRecorder((audioChunk) => {
|
| 140 |
// Append PCM audio chunk directly (Float32Array)
|
| 141 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 142 |
audioProcessorRef.current.appendChunk(audioChunk);
|
| 143 |
console.log('Total buffer:', audioProcessorRef.current.getBuffer().length, 'samples');
|
| 144 |
});
|
| 145 |
|
| 146 |
-
await recorderRef.current.start();
|
| 147 |
setIsRecording(true);
|
| 148 |
|
| 149 |
// Start progressive transcription updates
|
|
@@ -246,12 +283,46 @@ function App() {
|
|
| 246 |
|
| 247 |
{/* Main Content */}
|
| 248 |
<main className="max-w-6xl mx-auto px-6 py-8 space-y-8">
|
| 249 |
-
{/*
|
| 250 |
<div className="bg-gray-900 rounded-lg border border-gray-700 p-6">
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 251 |
<div className="flex items-center justify-between">
|
| 252 |
<div>
|
| 253 |
-
<
|
| 254 |
-
<p className="text-sm text-gray-
|
| 255 |
</div>
|
| 256 |
<div>
|
| 257 |
{modelStatus === 'not_loaded' && (
|
|
|
|
| 19 |
const [modelMessage, setModelMessage] = useState('');
|
| 20 |
const [device, setDevice] = useState(null);
|
| 21 |
|
| 22 |
+
// Microphone device selection
|
| 23 |
+
const [audioDevices, setAudioDevices] = useState([]);
|
| 24 |
+
const [selectedDeviceId, setSelectedDeviceId] = useState(null);
|
| 25 |
+
|
| 26 |
// Recording state
|
| 27 |
const [isRecording, setIsRecording] = useState(false);
|
| 28 |
const [fixedText, setFixedText] = useState('');
|
| 29 |
const [activeText, setActiveText] = useState('');
|
| 30 |
const [timestamp, setTimestamp] = useState(0);
|
| 31 |
+
const [audioLevel, setAudioLevel] = useState(0);
|
| 32 |
|
| 33 |
// Performance metrics
|
| 34 |
const [latency, setLatency] = useState(null);
|
|
|
|
| 43 |
const streamingHandlerRef = useRef(null);
|
| 44 |
const progressiveIntervalRef = useRef(null);
|
| 45 |
|
| 46 |
+
// Enumerate audio input devices
|
| 47 |
+
useEffect(() => {
|
| 48 |
+
async function getDevices() {
|
| 49 |
+
try {
|
| 50 |
+
const devices = await navigator.mediaDevices.enumerateDevices();
|
| 51 |
+
const audioInputs = devices.filter(device => device.kind === 'audioinput');
|
| 52 |
+
setAudioDevices(audioInputs);
|
| 53 |
+
|
| 54 |
+
// Auto-select the default device (first one with "default" in deviceId)
|
| 55 |
+
const defaultDevice = audioInputs.find(d => d.deviceId === 'default');
|
| 56 |
+
if (defaultDevice && !selectedDeviceId) {
|
| 57 |
+
setSelectedDeviceId(defaultDevice.deviceId);
|
| 58 |
+
console.log('[App] Auto-selected default device:', defaultDevice.label);
|
| 59 |
+
}
|
| 60 |
+
|
| 61 |
+
console.log('[App] Available audio devices:', audioInputs.map(d => `${d.label || 'Unnamed'} (${d.deviceId.slice(0, 8)}...)`));
|
| 62 |
+
} catch (error) {
|
| 63 |
+
console.error('[App] Failed to enumerate devices:', error);
|
| 64 |
+
}
|
| 65 |
+
}
|
| 66 |
+
getDevices();
|
| 67 |
+
}, []);
|
| 68 |
+
|
| 69 |
// Initialize worker
|
| 70 |
useEffect(() => {
|
| 71 |
workerRef.current = new Worker(WorkerUrl, { type: 'module' });
|
|
|
|
| 166 |
// Start recording with callback for audio chunks
|
| 167 |
recorderRef.current = new AudioRecorder((audioChunk) => {
|
| 168 |
// Append PCM audio chunk directly (Float32Array)
|
| 169 |
+
const maxAmp = Math.max(...Array.from(audioChunk).map(Math.abs));
|
| 170 |
+
console.log('Audio chunk received:', audioChunk.length, 'samples (~' + (audioChunk.length / 16000 * 1000).toFixed(0) + 'ms), max amplitude:', maxAmp.toFixed(4));
|
| 171 |
+
|
| 172 |
+
// Update audio level meter (scale to 0-100%)
|
| 173 |
+
setAudioLevel(Math.min(100, maxAmp * 300)); // Scale up for visibility
|
| 174 |
+
|
| 175 |
+
if (maxAmp < 0.001) {
|
| 176 |
+
console.warn('⚠️ Very quiet audio - if using AirPods, they may need time to activate. Try speaking louder or tapping the mic.');
|
| 177 |
+
}
|
| 178 |
+
|
| 179 |
audioProcessorRef.current.appendChunk(audioChunk);
|
| 180 |
console.log('Total buffer:', audioProcessorRef.current.getBuffer().length, 'samples');
|
| 181 |
});
|
| 182 |
|
| 183 |
+
await recorderRef.current.start(selectedDeviceId);
|
| 184 |
setIsRecording(true);
|
| 185 |
|
| 186 |
// Start progressive transcription updates
|
|
|
|
| 283 |
|
| 284 |
{/* Main Content */}
|
| 285 |
<main className="max-w-6xl mx-auto px-6 py-8 space-y-8">
|
| 286 |
+
{/* Controls */}
|
| 287 |
<div className="bg-gray-900 rounded-lg border border-gray-700 p-6">
|
| 288 |
+
<h2 className="text-lg font-semibold mb-4">Controls</h2>
|
| 289 |
+
|
| 290 |
+
{/* Microphone Selection */}
|
| 291 |
+
<div className="mb-4">
|
| 292 |
+
<label className="block text-sm font-medium text-gray-400 mb-2">Microphone</label>
|
| 293 |
+
<select
|
| 294 |
+
value={selectedDeviceId || ''}
|
| 295 |
+
onChange={(e) => setSelectedDeviceId(e.target.value)}
|
| 296 |
+
className="w-full bg-gray-800 border border-gray-600 rounded px-4 py-2 text-white"
|
| 297 |
+
disabled={isRecording}
|
| 298 |
+
>
|
| 299 |
+
{audioDevices.length === 0 && <option value="">Loading devices...</option>}
|
| 300 |
+
{audioDevices.map((device) => (
|
| 301 |
+
<option key={device.deviceId} value={device.deviceId}>
|
| 302 |
+
{device.label || `Microphone ${device.deviceId.slice(0, 8)}...`}
|
| 303 |
+
</option>
|
| 304 |
+
))}
|
| 305 |
+
</select>
|
| 306 |
+
</div>
|
| 307 |
+
|
| 308 |
+
{/* Audio Level Meter */}
|
| 309 |
+
{isRecording && (
|
| 310 |
+
<div className="mb-4">
|
| 311 |
+
<label className="block text-sm font-medium text-gray-400 mb-2">Audio Level</label>
|
| 312 |
+
<div className="w-full h-3 bg-gray-800 rounded-full overflow-hidden">
|
| 313 |
+
<div
|
| 314 |
+
className="h-full bg-gradient-to-r from-green-500 via-yellow-500 to-red-500 transition-all duration-75"
|
| 315 |
+
style={{ width: `${audioLevel}%` }}
|
| 316 |
+
></div>
|
| 317 |
+
</div>
|
| 318 |
+
</div>
|
| 319 |
+
)}
|
| 320 |
+
|
| 321 |
+
{/* Model Status and Actions */}
|
| 322 |
<div className="flex items-center justify-between">
|
| 323 |
<div>
|
| 324 |
+
<h3 className="text-sm font-medium text-gray-400">Model Status</h3>
|
| 325 |
+
<p className="text-sm text-gray-300 mt-1">{modelMessage || 'Ready to load model'}</p>
|
| 326 |
</div>
|
| 327 |
<div>
|
| 328 |
{modelStatus === 'not_loaded' && (
|
source/src/utils/audio.js
CHANGED
|
@@ -17,29 +17,78 @@ export class AudioRecorder {
|
|
| 17 |
this.audioChunks = [];
|
| 18 |
}
|
| 19 |
|
| 20 |
-
async start() {
|
| 21 |
/**
|
| 22 |
* Start recording audio from microphone using Web Audio API
|
|
|
|
| 23 |
*/
|
| 24 |
try {
|
| 25 |
// Request microphone access
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 26 |
this.stream = await navigator.mediaDevices.getUserMedia({
|
| 27 |
-
audio:
|
| 28 |
-
channelCount: 1,
|
| 29 |
-
sampleRate: WHISPER_SAMPLING_RATE,
|
| 30 |
-
echoCancellation: true,
|
| 31 |
-
noiseSuppression: true,
|
| 32 |
-
}
|
| 33 |
});
|
| 34 |
|
| 35 |
-
// Create AudioContext
|
| 36 |
-
this.audioContext = new AudioContext(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 37 |
|
| 38 |
// Create source from stream
|
| 39 |
this.source = this.audioContext.createMediaStreamSource(this.stream);
|
| 40 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 41 |
// Create ScriptProcessorNode (deprecated but works everywhere)
|
| 42 |
-
//
|
| 43 |
const bufferSize = 4096;
|
| 44 |
this.processor = this.audioContext.createScriptProcessor(bufferSize, 1, 1);
|
| 45 |
|
|
@@ -47,13 +96,20 @@ export class AudioRecorder {
|
|
| 47 |
if (!this.isRecording) return;
|
| 48 |
|
| 49 |
const inputData = event.inputBuffer.getChannelData(0);
|
| 50 |
-
// Copy the data (important! buffer is reused)
|
| 51 |
-
const audioChunk = new Float32Array(inputData);
|
| 52 |
|
| 53 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 54 |
|
| 55 |
if (this.onDataAvailable) {
|
| 56 |
-
this.onDataAvailable(
|
| 57 |
}
|
| 58 |
};
|
| 59 |
|
|
@@ -71,6 +127,32 @@ export class AudioRecorder {
|
|
| 71 |
}
|
| 72 |
}
|
| 73 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 74 |
requestData() {
|
| 75 |
/**
|
| 76 |
* No-op for ScriptProcessor (data comes automatically)
|
|
|
|
| 17 |
this.audioChunks = [];
|
| 18 |
}
|
| 19 |
|
| 20 |
+
async start(deviceId = null) {
|
| 21 |
/**
|
| 22 |
* Start recording audio from microphone using Web Audio API
|
| 23 |
+
* @param {string} deviceId - Optional specific device ID to use
|
| 24 |
*/
|
| 25 |
try {
|
| 26 |
// Request microphone access
|
| 27 |
+
// Note: Disable echo cancellation and noise suppression in Chrome
|
| 28 |
+
// as they can conflict with cross-origin isolation headers
|
| 29 |
+
// Chrome + AirPods known issue: AirPods may send silence initially due to power saving.
|
| 30 |
+
// Recommendation: Use built-in/wired microphone for best results in Chrome.
|
| 31 |
+
const audioConstraints = {
|
| 32 |
+
channelCount: 1,
|
| 33 |
+
echoCancellation: false,
|
| 34 |
+
noiseSuppression: false,
|
| 35 |
+
autoGainControl: false,
|
| 36 |
+
};
|
| 37 |
+
|
| 38 |
+
// If specific device requested, add deviceId constraint
|
| 39 |
+
if (deviceId) {
|
| 40 |
+
audioConstraints.deviceId = { exact: deviceId };
|
| 41 |
+
}
|
| 42 |
+
|
| 43 |
this.stream = await navigator.mediaDevices.getUserMedia({
|
| 44 |
+
audio: audioConstraints
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 45 |
});
|
| 46 |
|
| 47 |
+
// Create AudioContext at native sample rate (browser will choose optimal rate)
|
| 48 |
+
this.audioContext = new AudioContext();
|
| 49 |
+
const nativeSampleRate = this.audioContext.sampleRate;
|
| 50 |
+
console.log(`[Audio] Native sample rate: ${nativeSampleRate}Hz, target: ${WHISPER_SAMPLING_RATE}Hz`);
|
| 51 |
+
console.log(`[Audio] AudioContext state: ${this.audioContext.state}`);
|
| 52 |
+
|
| 53 |
+
// Resume AudioContext if suspended (required by some browsers)
|
| 54 |
+
if (this.audioContext.state === 'suspended') {
|
| 55 |
+
await this.audioContext.resume();
|
| 56 |
+
console.log(`[Audio] AudioContext resumed to: ${this.audioContext.state}`);
|
| 57 |
+
}
|
| 58 |
|
| 59 |
// Create source from stream
|
| 60 |
this.source = this.audioContext.createMediaStreamSource(this.stream);
|
| 61 |
|
| 62 |
+
// Debug: Check stream and track status
|
| 63 |
+
const audioTracks = this.stream.getAudioTracks();
|
| 64 |
+
console.log(`[Audio] MediaStreamSource created, stream active: ${this.stream.active}, tracks: ${audioTracks.length}`);
|
| 65 |
+
if (audioTracks.length > 0) {
|
| 66 |
+
const track = audioTracks[0];
|
| 67 |
+
console.log(`[Audio] Track settings:`, track.getSettings());
|
| 68 |
+
console.log(`[Audio] Track enabled: ${track.enabled}, muted: ${track.muted}, readyState: ${track.readyState}`);
|
| 69 |
+
}
|
| 70 |
+
|
| 71 |
+
// Create AnalyserNode to verify microphone input
|
| 72 |
+
const analyser = this.audioContext.createAnalyser();
|
| 73 |
+
analyser.fftSize = 2048;
|
| 74 |
+
const bufferLength = analyser.frequencyBinCount;
|
| 75 |
+
const dataArray = new Uint8Array(bufferLength);
|
| 76 |
+
|
| 77 |
+
this.source.connect(analyser);
|
| 78 |
+
|
| 79 |
+
// Test microphone input with analyser
|
| 80 |
+
setTimeout(() => {
|
| 81 |
+
analyser.getByteTimeDomainData(dataArray);
|
| 82 |
+
let sum = 0;
|
| 83 |
+
for (let i = 0; i < bufferLength; i++) {
|
| 84 |
+
sum += Math.abs(dataArray[i] - 128);
|
| 85 |
+
}
|
| 86 |
+
const avgLevel = sum / bufferLength;
|
| 87 |
+
console.log(`[Audio] Analyser test - Average level: ${avgLevel.toFixed(2)} (should be >0 if mic working)`);
|
| 88 |
+
}, 500);
|
| 89 |
+
|
| 90 |
// Create ScriptProcessorNode (deprecated but works everywhere)
|
| 91 |
+
// Use larger buffer at native rate
|
| 92 |
const bufferSize = 4096;
|
| 93 |
this.processor = this.audioContext.createScriptProcessor(bufferSize, 1, 1);
|
| 94 |
|
|
|
|
| 96 |
if (!this.isRecording) return;
|
| 97 |
|
| 98 |
const inputData = event.inputBuffer.getChannelData(0);
|
|
|
|
|
|
|
| 99 |
|
| 100 |
+
// Debug: Check raw input levels BEFORE resampling
|
| 101 |
+
const rawMax = Math.max(...Array.from(inputData).map(Math.abs));
|
| 102 |
+
if (this.audioChunks.length % 10 === 0) { // Log every 10th chunk
|
| 103 |
+
console.log(`[Audio] Raw input max amplitude: ${rawMax.toFixed(4)} (${inputData.length} samples at ${nativeSampleRate}Hz)`);
|
| 104 |
+
}
|
| 105 |
+
|
| 106 |
+
// Resample from native rate to 16kHz
|
| 107 |
+
const resampled = this.resample(inputData, nativeSampleRate, WHISPER_SAMPLING_RATE);
|
| 108 |
+
|
| 109 |
+
this.audioChunks.push(resampled);
|
| 110 |
|
| 111 |
if (this.onDataAvailable) {
|
| 112 |
+
this.onDataAvailable(resampled);
|
| 113 |
}
|
| 114 |
};
|
| 115 |
|
|
|
|
| 127 |
}
|
| 128 |
}
|
| 129 |
|
| 130 |
+
resample(audioData, sourceSampleRate, targetSampleRate) {
|
| 131 |
+
/**
|
| 132 |
+
* Simple linear interpolation resampler
|
| 133 |
+
* Converts audio from sourceSampleRate to targetSampleRate
|
| 134 |
+
*/
|
| 135 |
+
if (sourceSampleRate === targetSampleRate) {
|
| 136 |
+
return new Float32Array(audioData);
|
| 137 |
+
}
|
| 138 |
+
|
| 139 |
+
const ratio = sourceSampleRate / targetSampleRate;
|
| 140 |
+
const newLength = Math.round(audioData.length / ratio);
|
| 141 |
+
const result = new Float32Array(newLength);
|
| 142 |
+
|
| 143 |
+
for (let i = 0; i < newLength; i++) {
|
| 144 |
+
const srcIndex = i * ratio;
|
| 145 |
+
const srcIndexFloor = Math.floor(srcIndex);
|
| 146 |
+
const srcIndexCeil = Math.min(srcIndexFloor + 1, audioData.length - 1);
|
| 147 |
+
const t = srcIndex - srcIndexFloor;
|
| 148 |
+
|
| 149 |
+
// Linear interpolation
|
| 150 |
+
result[i] = audioData[srcIndexFloor] * (1 - t) + audioData[srcIndexCeil] * t;
|
| 151 |
+
}
|
| 152 |
+
|
| 153 |
+
return result;
|
| 154 |
+
}
|
| 155 |
+
|
| 156 |
requestData() {
|
| 157 |
/**
|
| 158 |
* No-op for ScriptProcessor (data comes automatically)
|
source/src/worker.js
CHANGED
|
@@ -83,6 +83,21 @@ async function transcribe(audio, language = null) {
|
|
| 83 |
try {
|
| 84 |
const startTime = performance.now();
|
| 85 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 86 |
// Transcribe with parakeet.js
|
| 87 |
const result = await model.transcribe(audio, 16000, {
|
| 88 |
returnTimestamps: true, // Get word-level timestamps
|
|
@@ -95,6 +110,10 @@ async function transcribe(audio, language = null) {
|
|
| 95 |
const audioDuration = audio.length / 16000;
|
| 96 |
const rtf = latency / audioDuration; // Real-time factor
|
| 97 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 98 |
// Convert parakeet.js word format to our sentence format
|
| 99 |
console.log('[Worker] Parakeet words:', result.words?.length || 0, 'words');
|
| 100 |
if (result.words && result.words.length > 0) {
|
|
|
|
| 83 |
try {
|
| 84 |
const startTime = performance.now();
|
| 85 |
|
| 86 |
+
// Debug: Check audio levels (use reduce to avoid stack overflow with large arrays)
|
| 87 |
+
let maxAmplitude = 0;
|
| 88 |
+
let avgAmplitude = 0;
|
| 89 |
+
for (let i = 0; i < audio.length; i++) {
|
| 90 |
+
const abs = Math.abs(audio[i]);
|
| 91 |
+
if (abs > maxAmplitude) maxAmplitude = abs;
|
| 92 |
+
avgAmplitude += abs;
|
| 93 |
+
}
|
| 94 |
+
avgAmplitude /= audio.length;
|
| 95 |
+
console.log('[Worker] Audio stats - Max:', maxAmplitude.toFixed(4), 'Avg:', avgAmplitude.toFixed(4), 'Length:', audio.length);
|
| 96 |
+
|
| 97 |
+
if (maxAmplitude < 0.01) {
|
| 98 |
+
console.warn('[Worker] WARNING: Audio is very quiet! Microphone may not be working.');
|
| 99 |
+
}
|
| 100 |
+
|
| 101 |
// Transcribe with parakeet.js
|
| 102 |
const result = await model.transcribe(audio, 16000, {
|
| 103 |
returnTimestamps: true, // Get word-level timestamps
|
|
|
|
| 110 |
const audioDuration = audio.length / 16000;
|
| 111 |
const rtf = latency / audioDuration; // Real-time factor
|
| 112 |
|
| 113 |
+
// Debug: log full result to see what parakeet.js returns
|
| 114 |
+
console.log('[Worker] Full parakeet result:', result);
|
| 115 |
+
console.log('[Worker] utterance_text:', result.utterance_text);
|
| 116 |
+
|
| 117 |
// Convert parakeet.js word format to our sentence format
|
| 118 |
console.log('[Worker] Parakeet words:', result.words?.length || 0, 'words');
|
| 119 |
if (result.words && result.words.length > 0) {
|