ChatTASTE-Voice-Bot / frontend /js /audio-manager.js
YC-Chen's picture
add frontend files
a445583
/**
* Audio Manager for TASTE Voice Bot
* Handles audio recording, playback, and processing
*/
import { CONFIG } from './config.js';
export class AudioManager {
/**
* Initialize audio manager
*/
constructor() {
this.audioContext = null;
this.mediaStream = null;
this.processor = null;
this.analyser = null; // For visualizing microphone input
this.dataArray = null; // For storing frequency/time domain data
this.playbackAnalyser = null; // For visualizing agent audio playback
this.playbackDataArray = null; // For storing playback waveform data
this.isRecording = false;
this.isPlaying = false;
this.sampleRate = 16000;
this.channels = 1;
this.volume = 0.7;
this.audioWorkletModuleLoaded = false; // Track if AudioWorklet module is loaded
// Audio playback queue (FIFO) - pull-based manual control
this.audioQueue = [];
this.isProcessingQueue = false;
this.minBufferSize = CONFIG.minBufferSize || 2; // Minimum chunks before starting playback
this.queueMaxSize = CONFIG.queueMaxSize || 30; // Max queue size
this.queueTimeoutMs = CONFIG.queueTimeoutMs || 5000; // Timeout for old chunks
this.callbacks = {
onAudioChunk: null,
onVoiceDetected: null,
onRecordingComplete: null,
onChunkPlayed: null, // Called after each chunk finishes playing (for pull-based)
onTextDisplay: null // Called when text should be displayed (synchronized with audio)
};
this.stats = {
state: 'idle',
isRecording: false,
totalChunksRecorded: 0,
voiceDetectionRate: 0
};
}
/**
* Set event callbacks
* @param {Object} callbacks - Callback functions
*/
setCallbacks(callbacks) {
Object.assign(this.callbacks, callbacks);
}
/**
* Initialize audio context and request microphone access
* @returns {Promise<boolean>} Initialization success status
*/
async initialize() {
try {
// If AudioContext exists but is closed, recreate it
if (this.audioContext && this.audioContext.state === 'closed') {
console.log('Previous AudioContext was closed, creating new one');
this.audioContext = null;
this.audioWorkletModuleLoaded = false; // Reset module flag
}
// Create audio context if it doesn't exist
if (!this.audioContext) {
// Create audio context - let browser use optimal sample rate
// Browser will handle resampling between 16kHz (mic) and 24kHz (playback)
this.audioContext = new (window.AudioContext || window.webkitAudioContext)();
console.log('Created new AudioContext, sample rate:', this.audioContext.sampleRate);
} else {
console.log('Reusing existing AudioContext');
}
// Load AudioWorklet module for exact chunk sizing (only once)
if (!this.audioWorkletModuleLoaded) {
try {
await this.audioContext.audioWorklet.addModule('./js/audio-processor.js');
this.audioWorkletModuleLoaded = true;
console.log('AudioWorklet processor loaded successfully');
} catch (error) {
console.error('Error loading AudioWorklet:', error);
throw error;
}
} else {
console.log('AudioWorklet module already loaded, skipping');
}
// Request microphone access (always request new stream)
if (this.mediaStream) {
// Stop existing tracks
this.mediaStream.getTracks().forEach(track => track.stop());
}
this.mediaStream = await navigator.mediaDevices.getUserMedia({
audio: {
channelCount: 1,
echoCancellation: true,
noiseSuppression: true,
autoGainControl: true
}
});
console.log('Audio initialized successfully');
this.stats.state = 'ready';
return true;
} catch (error) {
console.error('Error initializing audio:', error);
this.stats.state = 'error';
return false;
}
}
/**
* Start audio recording
* @param {string} sessionId - Session ID for recording
* @returns {Promise<boolean>} Start recording success status
*/
async startRecording(sessionId) {
if (!this.audioContext || !this.mediaStream) {
console.error('Audio not initialized');
return false;
}
if (this.isRecording) {
console.warn('Already recording');
return false;
}
try {
// Create audio source from microphone stream
const source = this.audioContext.createMediaStreamSource(this.mediaStream);
// Create analyser for waveform visualization
this.analyser = this.audioContext.createAnalyser();
this.analyser.fftSize = 2048;
const bufferLength = this.analyser.frequencyBinCount;
this.dataArray = new Uint8Array(bufferLength);
// Use AudioWorklet for exact chunk sizing (600ms = 9600 samples at 16kHz)
this.processor = new AudioWorkletNode(this.audioContext, 'audio-chunk-processor');
// Listen for audio chunks from the worklet
this.processor.port.onmessage = (event) => {
if (!this.isRecording) return;
if (event.data.type === 'audio-chunk') {
// Create audio chunk
const chunk = {
data: event.data.data,
timestampMs: Date.now(),
sampleRate: CONFIG.micSampleRate,
channels: 1
};
this.stats.totalChunksRecorded++;
// Send chunk via callback
if (this.callbacks.onAudioChunk) {
this.callbacks.onAudioChunk(chunk);
}
}
};
// Connect nodes: source -> analyser -> processor -> destination
source.connect(this.analyser);
this.analyser.connect(this.processor);
this.processor.connect(this.audioContext.destination);
this.isRecording = true;
this.stats.isRecording = true;
this.stats.state = 'recording';
console.log(`Recording started with AudioWorklet (${CONFIG.chunkDurationMs}ms chunks = ${CONFIG.micSampleRate * CONFIG.chunkDurationMs / 1000} samples)`);
return true;
} catch (error) {
console.error('Error starting recording:', error);
return false;
}
}
/**
* Stop audio recording
* @returns {Array} Array of recorded audio chunks
*/
stopRecording() {
if (!this.isRecording) {
return [];
}
this.isRecording = false;
this.stats.isRecording = false;
this.stats.state = 'ready';
if (this.processor) {
this.processor.disconnect();
this.processor = null;
}
console.log('Recording stopped');
if (this.callbacks.onRecordingComplete) {
this.callbacks.onRecordingComplete();
}
return [];
}
/**
* Start streaming audio (real-time processing)
* @returns {Promise<boolean>} Start streaming success status
*/
async startStreaming() {
return this.startRecording();
}
/**
* Stop streaming audio
* @returns {boolean} Stop streaming success status
*/
stopStreaming() {
this.stopRecording();
return true;
}
/**
* Queue audio chunk for playback (pull-based manual control)
* Adds chunk to buffer but does NOT auto-start playback
* Call startPlayback() manually when buffer reaches minimum size
*
* @param {ArrayBuffer} audioData - Audio data as Int16Array buffer
* @param {number} sampleRate - Sample rate of the audio
* @param {string} responseText - Text to display when playing
* @param {boolean} finalize - Whether this is the last audio chunk
* @returns {number} Current queue size after adding
*/
queueAudio(audioData, sampleRate = 16000, responseText = '', finalize = false) {
// Check queue size limit
if (this.audioQueue.length >= this.queueMaxSize) {
console.warn(`Audio queue full (${this.audioQueue.length}/${this.queueMaxSize}), dropping oldest chunk`);
this.audioQueue.shift(); // Remove oldest chunk
}
// Add chunk to queue with timestamp, text, and finalize flag
this.audioQueue.push({
audioData: audioData,
sampleRate: sampleRate,
responseText: responseText,
finalize: finalize,
timestamp: Date.now()
});
console.log(`Audio chunk queued (${this.audioQueue.length} chunks, min: ${this.minBufferSize}), text: "${responseText}", finalize: ${finalize}`);
return this.audioQueue.length;
}
/**
* Start playback from queue (manual control)
* Call this when buffer reaches minimum size
* @returns {boolean} True if playback started
*/
startPlayback() {
if (!this.isProcessingQueue && this.audioQueue.length > 0) {
console.log(`[Playback] Starting with ${this.audioQueue.length} chunks`);
this.processQueue();
return true;
}
return false;
}
/**
* Process audio queue (FIFO) - Pull-based
* Plays chunks continuously until queue is empty
* Calls onChunkPlayed callback after each chunk for request management
*/
async processQueue() {
if (this.isProcessingQueue) {
return; // Already processing
}
this.isProcessingQueue = true;
console.log('[Queue] Started processing');
// Play all chunks until queue is empty
while (this.audioQueue.length > 0) {
const chunk = this.audioQueue.shift(); // FIFO: get first chunk
// Check if chunk is too old (timeout)
const age = Date.now() - chunk.timestamp;
if (age > this.queueTimeoutMs) {
console.warn(`[Queue] Dropping old audio chunk (age: ${age}ms)`);
continue;
}
// Play this chunk and wait for it to finish
// Pass the text callback to be triggered when audio actually starts
console.log(`[Queue] Playing chunk (${this.audioQueue.length} remaining), text: "${chunk.responseText || ''}"`);
await this.playAudio(chunk.audioData, chunk.sampleRate, chunk.responseText);
// Notify that chunk finished playing (trigger request logic)
if (this.callbacks.onChunkPlayed) {
this.callbacks.onChunkPlayed(this.audioQueue.length);
}
}
this.isProcessingQueue = false;
console.log('[Queue] Queue empty, stopped processing');
}
/**
* Clear audio queue and reset queue state
* Should be called when session ends
*/
clearQueue() {
const hadChunks = this.audioQueue.length;
this.audioQueue = [];
this.isProcessingQueue = false;
if (hadChunks > 0) {
console.log(`[Queue] Cleared ${hadChunks} pending chunks`);
}
}
/**
* Get current queue size (for buffer monitoring)
* @returns {number} Number of chunks in queue
*/
getQueueSize() {
return this.audioQueue.length;
}
/**
* Check if should request more chunks (buffer below minimum)
* @returns {boolean} True if should request more chunks
*/
shouldRequestMore() {
return this.audioQueue.length < this.minBufferSize;
}
/**
* Check if buffer is ready to start playback
* Special case: If there's only 1 chunk and it's marked as finalize, ready to play
* Otherwise: Needs at least minBufferSize chunks
* @returns {boolean} True if buffer is ready for playback
*/
isBufferReady() {
// Special case: single finalized chunk can be played immediately
if (this.audioQueue.length === 1 && this.audioQueue[0].finalize) {
console.log('[Buffer] Ready: Single finalized chunk detected');
return true;
}
// Normal case: need minimum buffer size
return this.audioQueue.length >= this.minBufferSize;
}
/**
* Play audio data (direct playback)
* @param {ArrayBuffer} audioData - Audio data to play
* @param {number} sampleRate - Sample rate of audio
* @param {string} responseText - Optional text to display when audio starts
* @returns {Promise<boolean>} Play success status
*/
async playAudio(audioData, sampleRate = 16000, responseText = '') {
if (!this.audioContext) {
console.error('Audio context not initialized');
return false;
}
try {
console.log('Playing audio - received sample rate:', sampleRate, 'buffer size:', audioData.byteLength);
// Convert Int16Array to Float32Array
const int16Array = new Int16Array(audioData);
console.log('Int16Array created - length:', int16Array.length, 'sample range:', Math.min(...int16Array), 'to', Math.max(...int16Array));
const float32Array = new Float32Array(int16Array.length);
for (let i = 0; i < int16Array.length; i++) {
float32Array[i] = int16Array[i] / (int16Array[i] < 0 ? 0x8000 : 0x7FFF);
}
console.log('Converted to float32, samples:', float32Array.length);
// Create audio buffer with the RECEIVED sample rate (not context sample rate)
// The browser will automatically resample if needed
const audioBuffer = this.audioContext.createBuffer(1, float32Array.length, sampleRate);
audioBuffer.getChannelData(0).set(float32Array);
console.log('Audio buffer created - duration:', audioBuffer.duration, 's');
// Create playback analyser if not exists
if (!this.playbackAnalyser) {
this.playbackAnalyser = this.audioContext.createAnalyser();
this.playbackAnalyser.fftSize = 2048;
this.playbackDataArray = new Uint8Array(this.playbackAnalyser.frequencyBinCount);
}
// Create buffer source
const source = this.audioContext.createBufferSource();
source.buffer = audioBuffer;
// Create gain node for volume control
const gainNode = this.audioContext.createGain();
gainNode.gain.value = this.volume;
// Connect: source -> gain -> analyser -> destination
source.connect(gainNode);
gainNode.connect(this.playbackAnalyser);
this.playbackAnalyser.connect(this.audioContext.destination);
// Play
source.start(0);
this.isPlaying = true;
// Display text with delay to compensate for audio system latency
// Web Audio API doesn't provide "actual start" event, so we estimate the delay
if (responseText && this.callbacks.onTextDisplay) {
// Adjusted to 250ms to better match actual audio playback start
setTimeout(() => {
this.callbacks.onTextDisplay(responseText);
}, 250);
}
return new Promise((resolve) => {
source.onended = () => {
this.isPlaying = false;
console.log('Audio playback finished');
resolve(true);
};
});
} catch (error) {
console.error('Error playing audio:', error);
console.error('Error details:', error.message);
return false;
}
}
/**
* Stop current audio playback
* @returns {boolean} Stop playback success status
*/
stopPlayback() {
this.isPlaying = false;
return true;
}
/**
* Set playback volume
* @param {number} volume - Volume level (0.0 to 1.0)
*/
setPlaybackVolume(volume) {
this.volume = Math.max(0, Math.min(1, volume));
}
/**
* Create audio visualizer
* @param {HTMLCanvasElement} canvas - Canvas element for visualization
*/
createVisualizer(canvas) {
// Simple placeholder - can be expanded later
console.log('Visualizer created for canvas:', canvas);
}
/**
* Get waveform data for visualization (microphone input)
* @returns {Uint8Array|null} Time domain data for waveform
*/
getWaveformData() {
if (!this.analyser || !this.dataArray) {
return null;
}
this.analyser.getByteTimeDomainData(this.dataArray);
return this.dataArray;
}
/**
* Get playback waveform data for visualization (agent audio)
* @returns {Uint8Array|null} Time domain data for playback waveform
*/
getPlaybackWaveformData() {
if (!this.playbackAnalyser || !this.playbackDataArray) {
return null;
}
this.playbackAnalyser.getByteTimeDomainData(this.playbackDataArray);
return this.playbackDataArray;
}
/**
* Get audio manager statistics
* @returns {Object} Statistics object
*/
getStats() {
return {
state: this.stats.state,
isRecording: this.stats.isRecording,
totalChunksRecorded: this.stats.totalChunksRecorded,
voiceDetectionRate: this.stats.voiceDetectionRate,
sampleRate: this.audioContext?.sampleRate || 0,
channels: this.channels
};
}
/**
* Clean up audio resources
*/
cleanup() {
console.log('Starting audio cleanup...');
this.stopRecording();
if (this.processor) {
try {
this.processor.disconnect();
this.processor.port.onmessage = null; // Clear message handler
} catch (e) {
console.warn('Error disconnecting processor:', e);
}
this.processor = null;
}
// Disconnect and cleanup playback analyser
if (this.playbackAnalyser) {
try {
this.playbackAnalyser.disconnect();
} catch (e) {
console.warn('Error disconnecting playback analyser:', e);
}
this.playbackAnalyser = null;
}
this.playbackDataArray = null;
// Disconnect and cleanup recording analyser
if (this.analyser) {
try {
this.analyser.disconnect();
} catch (e) {
console.warn('Error disconnecting analyser:', e);
}
this.analyser = null;
}
this.dataArray = null;
if (this.mediaStream) {
this.mediaStream.getTracks().forEach(track => {
track.stop();
console.log('Stopped media track:', track.kind);
});
this.mediaStream = null;
}
if (this.audioContext && this.audioContext.state !== 'closed') {
this.audioContext.close()
.then(() => console.log('AudioContext closed successfully'))
.catch(e => console.warn('Error closing AudioContext:', e));
this.audioContext = null;
this.audioWorkletModuleLoaded = false; // Reset flag when context is closed
}
this.isRecording = false;
this.isPlaying = false;
this.stats.state = 'idle';
console.log('Audio resources cleaned up');
}
}
/**
* Audio Chunk class for structured audio data
*/
export class AudioChunk {
/**
* Create audio chunk
* @param {Object} params - Audio chunk parameters
* @param {ArrayBuffer} params.data - Audio data
* @param {number} params.timestampMs - Timestamp in milliseconds
* @param {number} params.sampleRate - Sample rate
* @param {number} params.channels - Number of channels
* @param {number} params.durationMs - Duration in milliseconds
* @param {boolean} params.isVoice - Whether contains voice
* @param {string} params.chunkId - Unique chunk ID
*/
constructor({ data, timestampMs, sampleRate = 16000, channels = 1, durationMs, isVoice = false, chunkId }) {
this.data = data;
this.timestampMs = timestampMs;
this.sampleRate = sampleRate;
this.channels = channels;
this.durationMs = durationMs;
this.isVoice = isVoice;
this.chunkId = chunkId;
}
}