andito's picture
andito HF Staff
Add cross-origin headers and optimize progressive streaming
e74c555
/**
* Audio capture and processing utilities
*
* Uses Web Audio API with ScriptProcessorNode for real-time PCM audio capture
*/
const WHISPER_SAMPLING_RATE = 16000;
export class AudioRecorder {
constructor(onDataAvailable) {
this.onDataAvailable = onDataAvailable;
this.audioContext = null;
this.stream = null;
this.source = null;
this.processor = null;
this.isRecording = false;
this.audioChunks = [];
}
async start(deviceId = null) {
/**
* Start recording audio from microphone using Web Audio API
* @param {string} deviceId - Optional specific device ID to use
*/
try {
// Request microphone access
// Note: Disable echo cancellation and noise suppression in Chrome
// as they can conflict with cross-origin isolation headers
const audioConstraints = {
channelCount: 1,
echoCancellation: false,
noiseSuppression: false,
autoGainControl: false,
};
// If specific device requested, add deviceId constraint
if (deviceId) {
audioConstraints.deviceId = { exact: deviceId };
}
this.stream = await navigator.mediaDevices.getUserMedia({
audio: audioConstraints
});
// Create AudioContext at native sample rate (browser will choose optimal rate)
this.audioContext = new AudioContext();
const nativeSampleRate = this.audioContext.sampleRate;
// Resume AudioContext if suspended (required by some browsers)
if (this.audioContext.state === 'suspended') {
await this.audioContext.resume();
}
// Create source from stream
this.source = this.audioContext.createMediaStreamSource(this.stream);
// Create ScriptProcessorNode (deprecated but works everywhere)
// Use larger buffer at native rate
const bufferSize = 4096;
this.processor = this.audioContext.createScriptProcessor(bufferSize, 1, 1);
this.processor.onaudioprocess = (event) => {
if (!this.isRecording) return;
const inputData = event.inputBuffer.getChannelData(0);
// Resample from native rate to 16kHz
const resampled = this.resample(inputData, nativeSampleRate, WHISPER_SAMPLING_RATE);
this.audioChunks.push(resampled);
if (this.onDataAvailable) {
this.onDataAvailable(resampled);
}
};
// Connect: source -> processor -> destination
this.source.connect(this.processor);
this.processor.connect(this.audioContext.destination);
this.isRecording = true;
return true;
} catch (error) {
console.error('Failed to start recording:', error);
throw error;
}
}
resample(audioData, sourceSampleRate, targetSampleRate) {
/**
* Simple linear interpolation resampler
* Converts audio from sourceSampleRate to targetSampleRate
*/
if (sourceSampleRate === targetSampleRate) {
return new Float32Array(audioData);
}
const ratio = sourceSampleRate / targetSampleRate;
const newLength = Math.round(audioData.length / ratio);
const result = new Float32Array(newLength);
for (let i = 0; i < newLength; i++) {
const srcIndex = i * ratio;
const srcIndexFloor = Math.floor(srcIndex);
const srcIndexCeil = Math.min(srcIndexFloor + 1, audioData.length - 1);
const t = srcIndex - srcIndexFloor;
// Linear interpolation
result[i] = audioData[srcIndexFloor] * (1 - t) + audioData[srcIndexCeil] * t;
}
return result;
}
requestData() {
/**
* No-op for ScriptProcessor (data comes automatically)
*/
// Data is emitted automatically via onaudioprocess
}
async stop() {
/**
* Stop recording and return complete audio as Float32Array
*/
return new Promise((resolve) => {
this.isRecording = false;
// Disconnect nodes
if (this.processor) {
this.processor.disconnect();
this.processor = null;
}
if (this.source) {
this.source.disconnect();
this.source = null;
}
// Concatenate all chunks
let totalLength = 0;
for (const chunk of this.audioChunks) {
totalLength += chunk.length;
}
const completeAudio = new Float32Array(totalLength);
let offset = 0;
for (const chunk of this.audioChunks) {
completeAudio.set(chunk, offset);
offset += chunk.length;
}
// Clean up
this.cleanup();
resolve(completeAudio);
});
}
cleanup() {
/**
* Clean up resources
*/
if (this.stream) {
this.stream.getTracks().forEach(track => track.stop());
this.stream = null;
}
if (this.audioContext && this.audioContext.state !== 'closed') {
this.audioContext.close();
this.audioContext = null;
}
this.audioChunks = [];
this.isRecording = false;
}
}
export class AudioProcessor {
/**
* Process audio chunks for real-time transcription
*/
constructor(sampleRate = WHISPER_SAMPLING_RATE) {
this.sampleRate = sampleRate;
this.audioBuffer = new Float32Array(0);
}
appendChunk(chunk) {
/**
* Append new audio chunk to buffer
*/
const newBuffer = new Float32Array(this.audioBuffer.length + chunk.length);
newBuffer.set(this.audioBuffer);
newBuffer.set(chunk, this.audioBuffer.length);
this.audioBuffer = newBuffer;
}
getBuffer() {
/**
* Get current audio buffer
*/
return this.audioBuffer;
}
getDuration() {
/**
* Get current buffer duration in seconds
*/
return this.audioBuffer.length / this.sampleRate;
}
reset() {
/**
* Clear audio buffer
*/
this.audioBuffer = new Float32Array(0);
}
trimToSize(maxDuration) {
/**
* Trim buffer to maximum duration (in seconds)
*/
const maxSamples = Math.floor(maxDuration * this.sampleRate);
if (this.audioBuffer.length > maxSamples) {
this.audioBuffer = this.audioBuffer.slice(-maxSamples);
}
}
}
export { WHISPER_SAMPLING_RATE };