/** * Audio capture and processing utilities * * Uses Web Audio API with ScriptProcessorNode for real-time PCM audio capture */ const WHISPER_SAMPLING_RATE = 16000; export class AudioRecorder { constructor(onDataAvailable) { this.onDataAvailable = onDataAvailable; this.audioContext = null; this.stream = null; this.source = null; this.processor = null; this.isRecording = false; this.audioChunks = []; } async start(deviceId = null) { /** * Start recording audio from microphone using Web Audio API * @param {string} deviceId - Optional specific device ID to use */ try { // Request microphone access // Note: Disable echo cancellation and noise suppression in Chrome // as they can conflict with cross-origin isolation headers const audioConstraints = { channelCount: 1, echoCancellation: false, noiseSuppression: false, autoGainControl: false, }; // If specific device requested, add deviceId constraint if (deviceId) { audioConstraints.deviceId = { exact: deviceId }; } this.stream = await navigator.mediaDevices.getUserMedia({ audio: audioConstraints }); // Create AudioContext at native sample rate (browser will choose optimal rate) this.audioContext = new AudioContext(); const nativeSampleRate = this.audioContext.sampleRate; // Resume AudioContext if suspended (required by some browsers) if (this.audioContext.state === 'suspended') { await this.audioContext.resume(); } // Create source from stream this.source = this.audioContext.createMediaStreamSource(this.stream); // Create ScriptProcessorNode (deprecated but works everywhere) // Use larger buffer at native rate const bufferSize = 4096; this.processor = this.audioContext.createScriptProcessor(bufferSize, 1, 1); this.processor.onaudioprocess = (event) => { if (!this.isRecording) return; const inputData = event.inputBuffer.getChannelData(0); // Resample from native rate to 16kHz const resampled = this.resample(inputData, nativeSampleRate, WHISPER_SAMPLING_RATE); this.audioChunks.push(resampled); if (this.onDataAvailable) { this.onDataAvailable(resampled); } }; // Connect: source -> processor -> destination this.source.connect(this.processor); this.processor.connect(this.audioContext.destination); this.isRecording = true; return true; } catch (error) { console.error('Failed to start recording:', error); throw error; } } resample(audioData, sourceSampleRate, targetSampleRate) { /** * Simple linear interpolation resampler * Converts audio from sourceSampleRate to targetSampleRate */ if (sourceSampleRate === targetSampleRate) { return new Float32Array(audioData); } const ratio = sourceSampleRate / targetSampleRate; const newLength = Math.round(audioData.length / ratio); const result = new Float32Array(newLength); for (let i = 0; i < newLength; i++) { const srcIndex = i * ratio; const srcIndexFloor = Math.floor(srcIndex); const srcIndexCeil = Math.min(srcIndexFloor + 1, audioData.length - 1); const t = srcIndex - srcIndexFloor; // Linear interpolation result[i] = audioData[srcIndexFloor] * (1 - t) + audioData[srcIndexCeil] * t; } return result; } requestData() { /** * No-op for ScriptProcessor (data comes automatically) */ // Data is emitted automatically via onaudioprocess } async stop() { /** * Stop recording and return complete audio as Float32Array */ return new Promise((resolve) => { this.isRecording = false; // Disconnect nodes if (this.processor) { this.processor.disconnect(); this.processor = null; } if (this.source) { this.source.disconnect(); this.source = null; } // Concatenate all chunks let totalLength = 0; for (const chunk of this.audioChunks) { totalLength += chunk.length; } const completeAudio = new Float32Array(totalLength); let offset = 0; for (const chunk of this.audioChunks) { completeAudio.set(chunk, offset); offset += chunk.length; } // Clean up this.cleanup(); resolve(completeAudio); }); } cleanup() { /** * Clean up resources */ if (this.stream) { this.stream.getTracks().forEach(track => track.stop()); this.stream = null; } if (this.audioContext && this.audioContext.state !== 'closed') { this.audioContext.close(); this.audioContext = null; } this.audioChunks = []; this.isRecording = false; } } export class AudioProcessor { /** * Process audio chunks for real-time transcription */ constructor(sampleRate = WHISPER_SAMPLING_RATE) { this.sampleRate = sampleRate; this.audioBuffer = new Float32Array(0); } appendChunk(chunk) { /** * Append new audio chunk to buffer */ const newBuffer = new Float32Array(this.audioBuffer.length + chunk.length); newBuffer.set(this.audioBuffer); newBuffer.set(chunk, this.audioBuffer.length); this.audioBuffer = newBuffer; } getBuffer() { /** * Get current audio buffer */ return this.audioBuffer; } getDuration() { /** * Get current buffer duration in seconds */ return this.audioBuffer.length / this.sampleRate; } reset() { /** * Clear audio buffer */ this.audioBuffer = new Float32Array(0); } trimToSize(maxDuration) { /** * Trim buffer to maximum duration (in seconds) */ const maxSamples = Math.floor(maxDuration * this.sampleRate); if (this.audioBuffer.length > maxSamples) { this.audioBuffer = this.audioBuffer.slice(-maxSamples); } } } export { WHISPER_SAMPLING_RATE };