Spaces:

andito
/

parakeet-v3-streaming

Running

File size: 6,086 Bytes

/**
 * Audio capture and processing utilities
 *
 * Uses Web Audio API with ScriptProcessorNode for real-time PCM audio capture
 */

const WHISPER_SAMPLING_RATE = 16000;

export class AudioRecorder {
  constructor(onDataAvailable) {
    this.onDataAvailable = onDataAvailable;
    this.audioContext = null;
    this.stream = null;
    this.source = null;
    this.processor = null;
    this.isRecording = false;
    this.audioChunks = [];
  }

  async start(deviceId = null) {
    /**
     * Start recording audio from microphone using Web Audio API
     * @param {string} deviceId - Optional specific device ID to use
     */
    try {
      // Request microphone access
      // Note: Disable echo cancellation and noise suppression in Chrome
      // as they can conflict with cross-origin isolation headers
      const audioConstraints = {
        channelCount: 1,
        echoCancellation: false,
        noiseSuppression: false,
        autoGainControl: false,
      };

      // If specific device requested, add deviceId constraint
      if (deviceId) {
        audioConstraints.deviceId = { exact: deviceId };
      }

      this.stream = await navigator.mediaDevices.getUserMedia({
        audio: audioConstraints
      });

      // Create AudioContext at native sample rate (browser will choose optimal rate)
      this.audioContext = new AudioContext();
      const nativeSampleRate = this.audioContext.sampleRate;

      // Resume AudioContext if suspended (required by some browsers)
      if (this.audioContext.state === 'suspended') {
        await this.audioContext.resume();
      }

      // Create source from stream
      this.source = this.audioContext.createMediaStreamSource(this.stream);

      // Create ScriptProcessorNode (deprecated but works everywhere)
      // Use larger buffer at native rate
      const bufferSize = 4096;
      this.processor = this.audioContext.createScriptProcessor(bufferSize, 1, 1);

      this.processor.onaudioprocess = (event) => {
        if (!this.isRecording) return;

        const inputData = event.inputBuffer.getChannelData(0);

        // Resample from native rate to 16kHz
        const resampled = this.resample(inputData, nativeSampleRate, WHISPER_SAMPLING_RATE);

        this.audioChunks.push(resampled);

        if (this.onDataAvailable) {
          this.onDataAvailable(resampled);
        }
      };

      // Connect: source -> processor -> destination
      this.source.connect(this.processor);
      this.processor.connect(this.audioContext.destination);

      this.isRecording = true;

      return true;
    } catch (error) {
      console.error('Failed to start recording:', error);
      throw error;
    }
  }

  resample(audioData, sourceSampleRate, targetSampleRate) {
    /**
     * Simple linear interpolation resampler
     * Converts audio from sourceSampleRate to targetSampleRate
     */
    if (sourceSampleRate === targetSampleRate) {
      return new Float32Array(audioData);
    }

    const ratio = sourceSampleRate / targetSampleRate;
    const newLength = Math.round(audioData.length / ratio);
    const result = new Float32Array(newLength);

    for (let i = 0; i < newLength; i++) {
      const srcIndex = i * ratio;
      const srcIndexFloor = Math.floor(srcIndex);
      const srcIndexCeil = Math.min(srcIndexFloor + 1, audioData.length - 1);
      const t = srcIndex - srcIndexFloor;

      // Linear interpolation
      result[i] = audioData[srcIndexFloor] * (1 - t) + audioData[srcIndexCeil] * t;
    }

    return result;
  }

  requestData() {
    /**
     * No-op for ScriptProcessor (data comes automatically)
     */
    // Data is emitted automatically via onaudioprocess
  }

  async stop() {
    /**
     * Stop recording and return complete audio as Float32Array
     */
    return new Promise((resolve) => {
      this.isRecording = false;

      // Disconnect nodes
      if (this.processor) {
        this.processor.disconnect();
        this.processor = null;
      }

      if (this.source) {
        this.source.disconnect();
        this.source = null;
      }

      // Concatenate all chunks
      let totalLength = 0;
      for (const chunk of this.audioChunks) {
        totalLength += chunk.length;
      }

      const completeAudio = new Float32Array(totalLength);
      let offset = 0;
      for (const chunk of this.audioChunks) {
        completeAudio.set(chunk, offset);
        offset += chunk.length;
      }

      // Clean up
      this.cleanup();

      resolve(completeAudio);
    });
  }

  cleanup() {
    /**
     * Clean up resources
     */
    if (this.stream) {
      this.stream.getTracks().forEach(track => track.stop());
      this.stream = null;
    }

    if (this.audioContext && this.audioContext.state !== 'closed') {
      this.audioContext.close();
      this.audioContext = null;
    }

    this.audioChunks = [];
    this.isRecording = false;
  }
}

export class AudioProcessor {
  /**
   * Process audio chunks for real-time transcription
   */
  constructor(sampleRate = WHISPER_SAMPLING_RATE) {
    this.sampleRate = sampleRate;
    this.audioBuffer = new Float32Array(0);
  }

  appendChunk(chunk) {
    /**
     * Append new audio chunk to buffer
     */
    const newBuffer = new Float32Array(this.audioBuffer.length + chunk.length);
    newBuffer.set(this.audioBuffer);
    newBuffer.set(chunk, this.audioBuffer.length);
    this.audioBuffer = newBuffer;
  }

  getBuffer() {
    /**
     * Get current audio buffer
     */
    return this.audioBuffer;
  }

  getDuration() {
    /**
     * Get current buffer duration in seconds
     */
    return this.audioBuffer.length / this.sampleRate;
  }

  reset() {
    /**
     * Clear audio buffer
     */
    this.audioBuffer = new Float32Array(0);
  }

  trimToSize(maxDuration) {
    /**
     * Trim buffer to maximum duration (in seconds)
     */
    const maxSamples = Math.floor(maxDuration * this.sampleRate);
    if (this.audioBuffer.length > maxSamples) {
      this.audioBuffer = this.audioBuffer.slice(-maxSamples);
    }
  }
}

export { WHISPER_SAMPLING_RATE };