Spaces:
Sleeping
Sleeping
| /** | |
| * Audio Manager for TASTE Voice Bot | |
| * Handles audio recording, playback, and processing | |
| */ | |
| import { CONFIG } from './config.js'; | |
| export class AudioManager { | |
| /** | |
| * Initialize audio manager | |
| */ | |
| constructor() { | |
| this.audioContext = null; | |
| this.mediaStream = null; | |
| this.processor = null; | |
| this.analyser = null; // For visualizing microphone input | |
| this.dataArray = null; // For storing frequency/time domain data | |
| this.playbackAnalyser = null; // For visualizing agent audio playback | |
| this.playbackDataArray = null; // For storing playback waveform data | |
| this.isRecording = false; | |
| this.isPlaying = false; | |
| this.sampleRate = 16000; | |
| this.channels = 1; | |
| this.volume = 0.7; | |
| this.audioWorkletModuleLoaded = false; // Track if AudioWorklet module is loaded | |
| // Audio playback queue (FIFO) - pull-based manual control | |
| this.audioQueue = []; | |
| this.isProcessingQueue = false; | |
| this.minBufferSize = CONFIG.minBufferSize || 2; // Minimum chunks before starting playback | |
| this.queueMaxSize = CONFIG.queueMaxSize || 30; // Max queue size | |
| this.queueTimeoutMs = CONFIG.queueTimeoutMs || 5000; // Timeout for old chunks | |
| this.callbacks = { | |
| onAudioChunk: null, | |
| onVoiceDetected: null, | |
| onRecordingComplete: null, | |
| onChunkPlayed: null, // Called after each chunk finishes playing (for pull-based) | |
| onTextDisplay: null // Called when text should be displayed (synchronized with audio) | |
| }; | |
| this.stats = { | |
| state: 'idle', | |
| isRecording: false, | |
| totalChunksRecorded: 0, | |
| voiceDetectionRate: 0 | |
| }; | |
| } | |
| /** | |
| * Set event callbacks | |
| * @param {Object} callbacks - Callback functions | |
| */ | |
| setCallbacks(callbacks) { | |
| Object.assign(this.callbacks, callbacks); | |
| } | |
| /** | |
| * Initialize audio context and request microphone access | |
| * @returns {Promise<boolean>} Initialization success status | |
| */ | |
| async initialize() { | |
| try { | |
| // If AudioContext exists but is closed, recreate it | |
| if (this.audioContext && this.audioContext.state === 'closed') { | |
| console.log('Previous AudioContext was closed, creating new one'); | |
| this.audioContext = null; | |
| this.audioWorkletModuleLoaded = false; // Reset module flag | |
| } | |
| // Create audio context if it doesn't exist | |
| if (!this.audioContext) { | |
| // Create audio context - let browser use optimal sample rate | |
| // Browser will handle resampling between 16kHz (mic) and 24kHz (playback) | |
| this.audioContext = new (window.AudioContext || window.webkitAudioContext)(); | |
| console.log('Created new AudioContext, sample rate:', this.audioContext.sampleRate); | |
| } else { | |
| console.log('Reusing existing AudioContext'); | |
| } | |
| // Load AudioWorklet module for exact chunk sizing (only once) | |
| if (!this.audioWorkletModuleLoaded) { | |
| try { | |
| await this.audioContext.audioWorklet.addModule('./js/audio-processor.js'); | |
| this.audioWorkletModuleLoaded = true; | |
| console.log('AudioWorklet processor loaded successfully'); | |
| } catch (error) { | |
| console.error('Error loading AudioWorklet:', error); | |
| throw error; | |
| } | |
| } else { | |
| console.log('AudioWorklet module already loaded, skipping'); | |
| } | |
| // Request microphone access (always request new stream) | |
| if (this.mediaStream) { | |
| // Stop existing tracks | |
| this.mediaStream.getTracks().forEach(track => track.stop()); | |
| } | |
| this.mediaStream = await navigator.mediaDevices.getUserMedia({ | |
| audio: { | |
| channelCount: 1, | |
| echoCancellation: true, | |
| noiseSuppression: true, | |
| autoGainControl: true | |
| } | |
| }); | |
| console.log('Audio initialized successfully'); | |
| this.stats.state = 'ready'; | |
| return true; | |
| } catch (error) { | |
| console.error('Error initializing audio:', error); | |
| this.stats.state = 'error'; | |
| return false; | |
| } | |
| } | |
| /** | |
| * Start audio recording | |
| * @param {string} sessionId - Session ID for recording | |
| * @returns {Promise<boolean>} Start recording success status | |
| */ | |
| async startRecording(sessionId) { | |
| if (!this.audioContext || !this.mediaStream) { | |
| console.error('Audio not initialized'); | |
| return false; | |
| } | |
| if (this.isRecording) { | |
| console.warn('Already recording'); | |
| return false; | |
| } | |
| try { | |
| // Create audio source from microphone stream | |
| const source = this.audioContext.createMediaStreamSource(this.mediaStream); | |
| // Create analyser for waveform visualization | |
| this.analyser = this.audioContext.createAnalyser(); | |
| this.analyser.fftSize = 2048; | |
| const bufferLength = this.analyser.frequencyBinCount; | |
| this.dataArray = new Uint8Array(bufferLength); | |
| // Use AudioWorklet for exact chunk sizing (600ms = 9600 samples at 16kHz) | |
| this.processor = new AudioWorkletNode(this.audioContext, 'audio-chunk-processor'); | |
| // Listen for audio chunks from the worklet | |
| this.processor.port.onmessage = (event) => { | |
| if (!this.isRecording) return; | |
| if (event.data.type === 'audio-chunk') { | |
| // Create audio chunk | |
| const chunk = { | |
| data: event.data.data, | |
| timestampMs: Date.now(), | |
| sampleRate: CONFIG.micSampleRate, | |
| channels: 1 | |
| }; | |
| this.stats.totalChunksRecorded++; | |
| // Send chunk via callback | |
| if (this.callbacks.onAudioChunk) { | |
| this.callbacks.onAudioChunk(chunk); | |
| } | |
| } | |
| }; | |
| // Connect nodes: source -> analyser -> processor -> destination | |
| source.connect(this.analyser); | |
| this.analyser.connect(this.processor); | |
| this.processor.connect(this.audioContext.destination); | |
| this.isRecording = true; | |
| this.stats.isRecording = true; | |
| this.stats.state = 'recording'; | |
| console.log(`Recording started with AudioWorklet (${CONFIG.chunkDurationMs}ms chunks = ${CONFIG.micSampleRate * CONFIG.chunkDurationMs / 1000} samples)`); | |
| return true; | |
| } catch (error) { | |
| console.error('Error starting recording:', error); | |
| return false; | |
| } | |
| } | |
| /** | |
| * Stop audio recording | |
| * @returns {Array} Array of recorded audio chunks | |
| */ | |
| stopRecording() { | |
| if (!this.isRecording) { | |
| return []; | |
| } | |
| this.isRecording = false; | |
| this.stats.isRecording = false; | |
| this.stats.state = 'ready'; | |
| if (this.processor) { | |
| this.processor.disconnect(); | |
| this.processor = null; | |
| } | |
| console.log('Recording stopped'); | |
| if (this.callbacks.onRecordingComplete) { | |
| this.callbacks.onRecordingComplete(); | |
| } | |
| return []; | |
| } | |
| /** | |
| * Start streaming audio (real-time processing) | |
| * @returns {Promise<boolean>} Start streaming success status | |
| */ | |
| async startStreaming() { | |
| return this.startRecording(); | |
| } | |
| /** | |
| * Stop streaming audio | |
| * @returns {boolean} Stop streaming success status | |
| */ | |
| stopStreaming() { | |
| this.stopRecording(); | |
| return true; | |
| } | |
| /** | |
| * Queue audio chunk for playback (pull-based manual control) | |
| * Adds chunk to buffer but does NOT auto-start playback | |
| * Call startPlayback() manually when buffer reaches minimum size | |
| * | |
| * @param {ArrayBuffer} audioData - Audio data as Int16Array buffer | |
| * @param {number} sampleRate - Sample rate of the audio | |
| * @param {string} responseText - Text to display when playing | |
| * @param {boolean} finalize - Whether this is the last audio chunk | |
| * @returns {number} Current queue size after adding | |
| */ | |
| queueAudio(audioData, sampleRate = 16000, responseText = '', finalize = false) { | |
| // Check queue size limit | |
| if (this.audioQueue.length >= this.queueMaxSize) { | |
| console.warn(`Audio queue full (${this.audioQueue.length}/${this.queueMaxSize}), dropping oldest chunk`); | |
| this.audioQueue.shift(); // Remove oldest chunk | |
| } | |
| // Add chunk to queue with timestamp, text, and finalize flag | |
| this.audioQueue.push({ | |
| audioData: audioData, | |
| sampleRate: sampleRate, | |
| responseText: responseText, | |
| finalize: finalize, | |
| timestamp: Date.now() | |
| }); | |
| console.log(`Audio chunk queued (${this.audioQueue.length} chunks, min: ${this.minBufferSize}), text: "${responseText}", finalize: ${finalize}`); | |
| return this.audioQueue.length; | |
| } | |
| /** | |
| * Start playback from queue (manual control) | |
| * Call this when buffer reaches minimum size | |
| * @returns {boolean} True if playback started | |
| */ | |
| startPlayback() { | |
| if (!this.isProcessingQueue && this.audioQueue.length > 0) { | |
| console.log(`[Playback] Starting with ${this.audioQueue.length} chunks`); | |
| this.processQueue(); | |
| return true; | |
| } | |
| return false; | |
| } | |
| /** | |
| * Process audio queue (FIFO) - Pull-based | |
| * Plays chunks continuously until queue is empty | |
| * Calls onChunkPlayed callback after each chunk for request management | |
| */ | |
| async processQueue() { | |
| if (this.isProcessingQueue) { | |
| return; // Already processing | |
| } | |
| this.isProcessingQueue = true; | |
| console.log('[Queue] Started processing'); | |
| // Play all chunks until queue is empty | |
| while (this.audioQueue.length > 0) { | |
| const chunk = this.audioQueue.shift(); // FIFO: get first chunk | |
| // Check if chunk is too old (timeout) | |
| const age = Date.now() - chunk.timestamp; | |
| if (age > this.queueTimeoutMs) { | |
| console.warn(`[Queue] Dropping old audio chunk (age: ${age}ms)`); | |
| continue; | |
| } | |
| // Play this chunk and wait for it to finish | |
| // Pass the text callback to be triggered when audio actually starts | |
| console.log(`[Queue] Playing chunk (${this.audioQueue.length} remaining), text: "${chunk.responseText || ''}"`); | |
| await this.playAudio(chunk.audioData, chunk.sampleRate, chunk.responseText); | |
| // Notify that chunk finished playing (trigger request logic) | |
| if (this.callbacks.onChunkPlayed) { | |
| this.callbacks.onChunkPlayed(this.audioQueue.length); | |
| } | |
| } | |
| this.isProcessingQueue = false; | |
| console.log('[Queue] Queue empty, stopped processing'); | |
| } | |
| /** | |
| * Clear audio queue and reset queue state | |
| * Should be called when session ends | |
| */ | |
| clearQueue() { | |
| const hadChunks = this.audioQueue.length; | |
| this.audioQueue = []; | |
| this.isProcessingQueue = false; | |
| if (hadChunks > 0) { | |
| console.log(`[Queue] Cleared ${hadChunks} pending chunks`); | |
| } | |
| } | |
| /** | |
| * Get current queue size (for buffer monitoring) | |
| * @returns {number} Number of chunks in queue | |
| */ | |
| getQueueSize() { | |
| return this.audioQueue.length; | |
| } | |
| /** | |
| * Check if should request more chunks (buffer below minimum) | |
| * @returns {boolean} True if should request more chunks | |
| */ | |
| shouldRequestMore() { | |
| return this.audioQueue.length < this.minBufferSize; | |
| } | |
| /** | |
| * Check if buffer is ready to start playback | |
| * Special case: If there's only 1 chunk and it's marked as finalize, ready to play | |
| * Otherwise: Needs at least minBufferSize chunks | |
| * @returns {boolean} True if buffer is ready for playback | |
| */ | |
| isBufferReady() { | |
| // Special case: single finalized chunk can be played immediately | |
| if (this.audioQueue.length === 1 && this.audioQueue[0].finalize) { | |
| console.log('[Buffer] Ready: Single finalized chunk detected'); | |
| return true; | |
| } | |
| // Normal case: need minimum buffer size | |
| return this.audioQueue.length >= this.minBufferSize; | |
| } | |
| /** | |
| * Play audio data (direct playback) | |
| * @param {ArrayBuffer} audioData - Audio data to play | |
| * @param {number} sampleRate - Sample rate of audio | |
| * @param {string} responseText - Optional text to display when audio starts | |
| * @returns {Promise<boolean>} Play success status | |
| */ | |
| async playAudio(audioData, sampleRate = 16000, responseText = '') { | |
| if (!this.audioContext) { | |
| console.error('Audio context not initialized'); | |
| return false; | |
| } | |
| try { | |
| console.log('Playing audio - received sample rate:', sampleRate, 'buffer size:', audioData.byteLength); | |
| // Convert Int16Array to Float32Array | |
| const int16Array = new Int16Array(audioData); | |
| console.log('Int16Array created - length:', int16Array.length, 'sample range:', Math.min(...int16Array), 'to', Math.max(...int16Array)); | |
| const float32Array = new Float32Array(int16Array.length); | |
| for (let i = 0; i < int16Array.length; i++) { | |
| float32Array[i] = int16Array[i] / (int16Array[i] < 0 ? 0x8000 : 0x7FFF); | |
| } | |
| console.log('Converted to float32, samples:', float32Array.length); | |
| // Create audio buffer with the RECEIVED sample rate (not context sample rate) | |
| // The browser will automatically resample if needed | |
| const audioBuffer = this.audioContext.createBuffer(1, float32Array.length, sampleRate); | |
| audioBuffer.getChannelData(0).set(float32Array); | |
| console.log('Audio buffer created - duration:', audioBuffer.duration, 's'); | |
| // Create playback analyser if not exists | |
| if (!this.playbackAnalyser) { | |
| this.playbackAnalyser = this.audioContext.createAnalyser(); | |
| this.playbackAnalyser.fftSize = 2048; | |
| this.playbackDataArray = new Uint8Array(this.playbackAnalyser.frequencyBinCount); | |
| } | |
| // Create buffer source | |
| const source = this.audioContext.createBufferSource(); | |
| source.buffer = audioBuffer; | |
| // Create gain node for volume control | |
| const gainNode = this.audioContext.createGain(); | |
| gainNode.gain.value = this.volume; | |
| // Connect: source -> gain -> analyser -> destination | |
| source.connect(gainNode); | |
| gainNode.connect(this.playbackAnalyser); | |
| this.playbackAnalyser.connect(this.audioContext.destination); | |
| // Play | |
| source.start(0); | |
| this.isPlaying = true; | |
| // Display text with delay to compensate for audio system latency | |
| // Web Audio API doesn't provide "actual start" event, so we estimate the delay | |
| if (responseText && this.callbacks.onTextDisplay) { | |
| // Adjusted to 250ms to better match actual audio playback start | |
| setTimeout(() => { | |
| this.callbacks.onTextDisplay(responseText); | |
| }, 250); | |
| } | |
| return new Promise((resolve) => { | |
| source.onended = () => { | |
| this.isPlaying = false; | |
| console.log('Audio playback finished'); | |
| resolve(true); | |
| }; | |
| }); | |
| } catch (error) { | |
| console.error('Error playing audio:', error); | |
| console.error('Error details:', error.message); | |
| return false; | |
| } | |
| } | |
| /** | |
| * Stop current audio playback | |
| * @returns {boolean} Stop playback success status | |
| */ | |
| stopPlayback() { | |
| this.isPlaying = false; | |
| return true; | |
| } | |
| /** | |
| * Set playback volume | |
| * @param {number} volume - Volume level (0.0 to 1.0) | |
| */ | |
| setPlaybackVolume(volume) { | |
| this.volume = Math.max(0, Math.min(1, volume)); | |
| } | |
| /** | |
| * Create audio visualizer | |
| * @param {HTMLCanvasElement} canvas - Canvas element for visualization | |
| */ | |
| createVisualizer(canvas) { | |
| // Simple placeholder - can be expanded later | |
| console.log('Visualizer created for canvas:', canvas); | |
| } | |
| /** | |
| * Get waveform data for visualization (microphone input) | |
| * @returns {Uint8Array|null} Time domain data for waveform | |
| */ | |
| getWaveformData() { | |
| if (!this.analyser || !this.dataArray) { | |
| return null; | |
| } | |
| this.analyser.getByteTimeDomainData(this.dataArray); | |
| return this.dataArray; | |
| } | |
| /** | |
| * Get playback waveform data for visualization (agent audio) | |
| * @returns {Uint8Array|null} Time domain data for playback waveform | |
| */ | |
| getPlaybackWaveformData() { | |
| if (!this.playbackAnalyser || !this.playbackDataArray) { | |
| return null; | |
| } | |
| this.playbackAnalyser.getByteTimeDomainData(this.playbackDataArray); | |
| return this.playbackDataArray; | |
| } | |
| /** | |
| * Get audio manager statistics | |
| * @returns {Object} Statistics object | |
| */ | |
| getStats() { | |
| return { | |
| state: this.stats.state, | |
| isRecording: this.stats.isRecording, | |
| totalChunksRecorded: this.stats.totalChunksRecorded, | |
| voiceDetectionRate: this.stats.voiceDetectionRate, | |
| sampleRate: this.audioContext?.sampleRate || 0, | |
| channels: this.channels | |
| }; | |
| } | |
| /** | |
| * Clean up audio resources | |
| */ | |
| cleanup() { | |
| console.log('Starting audio cleanup...'); | |
| this.stopRecording(); | |
| if (this.processor) { | |
| try { | |
| this.processor.disconnect(); | |
| this.processor.port.onmessage = null; // Clear message handler | |
| } catch (e) { | |
| console.warn('Error disconnecting processor:', e); | |
| } | |
| this.processor = null; | |
| } | |
| // Disconnect and cleanup playback analyser | |
| if (this.playbackAnalyser) { | |
| try { | |
| this.playbackAnalyser.disconnect(); | |
| } catch (e) { | |
| console.warn('Error disconnecting playback analyser:', e); | |
| } | |
| this.playbackAnalyser = null; | |
| } | |
| this.playbackDataArray = null; | |
| // Disconnect and cleanup recording analyser | |
| if (this.analyser) { | |
| try { | |
| this.analyser.disconnect(); | |
| } catch (e) { | |
| console.warn('Error disconnecting analyser:', e); | |
| } | |
| this.analyser = null; | |
| } | |
| this.dataArray = null; | |
| if (this.mediaStream) { | |
| this.mediaStream.getTracks().forEach(track => { | |
| track.stop(); | |
| console.log('Stopped media track:', track.kind); | |
| }); | |
| this.mediaStream = null; | |
| } | |
| if (this.audioContext && this.audioContext.state !== 'closed') { | |
| this.audioContext.close() | |
| .then(() => console.log('AudioContext closed successfully')) | |
| .catch(e => console.warn('Error closing AudioContext:', e)); | |
| this.audioContext = null; | |
| this.audioWorkletModuleLoaded = false; // Reset flag when context is closed | |
| } | |
| this.isRecording = false; | |
| this.isPlaying = false; | |
| this.stats.state = 'idle'; | |
| console.log('Audio resources cleaned up'); | |
| } | |
| } | |
| /** | |
| * Audio Chunk class for structured audio data | |
| */ | |
| export class AudioChunk { | |
| /** | |
| * Create audio chunk | |
| * @param {Object} params - Audio chunk parameters | |
| * @param {ArrayBuffer} params.data - Audio data | |
| * @param {number} params.timestampMs - Timestamp in milliseconds | |
| * @param {number} params.sampleRate - Sample rate | |
| * @param {number} params.channels - Number of channels | |
| * @param {number} params.durationMs - Duration in milliseconds | |
| * @param {boolean} params.isVoice - Whether contains voice | |
| * @param {string} params.chunkId - Unique chunk ID | |
| */ | |
| constructor({ data, timestampMs, sampleRate = 16000, channels = 1, durationMs, isVoice = false, chunkId }) { | |
| this.data = data; | |
| this.timestampMs = timestampMs; | |
| this.sampleRate = sampleRate; | |
| this.channels = channels; | |
| this.durationMs = durationMs; | |
| this.isVoice = isVoice; | |
| this.chunkId = chunkId; | |
| } | |
| } |