Spaces:

YC-Chen
/

ChatTASTE-Voice-Bot

Sleeping

App Files Files Community

ChatTASTE-Voice-Bot / frontend /js /audio-manager.js

YC-Chen's picture

add frontend files

a445583 2 months ago

history blame contribute delete

21.3 kB

	/**
	* Audio Manager for TASTE Voice Bot
	* Handles audio recording, playback, and processing
	*/

	import { CONFIG } from './config.js';

	export class AudioManager {
	/**
	* Initialize audio manager
	*/
	constructor() {
	this.audioContext = null;
	this.mediaStream = null;
	this.processor = null;
	this.analyser = null; // For visualizing microphone input
	this.dataArray = null; // For storing frequency/time domain data
	this.playbackAnalyser = null; // For visualizing agent audio playback
	this.playbackDataArray = null; // For storing playback waveform data
	this.isRecording = false;
	this.isPlaying = false;
	this.sampleRate = 16000;
	this.channels = 1;
	this.volume = 0.7;
	this.audioWorkletModuleLoaded = false; // Track if AudioWorklet module is loaded

	// Audio playback queue (FIFO) - pull-based manual control
	this.audioQueue = [];
	this.isProcessingQueue = false;
	this.minBufferSize = CONFIG.minBufferSize \|\| 2; // Minimum chunks before starting playback
	this.queueMaxSize = CONFIG.queueMaxSize \|\| 30; // Max queue size
	this.queueTimeoutMs = CONFIG.queueTimeoutMs \|\| 5000; // Timeout for old chunks

	this.callbacks = {
	onAudioChunk: null,
	onVoiceDetected: null,
	onRecordingComplete: null,
	onChunkPlayed: null, // Called after each chunk finishes playing (for pull-based)
	onTextDisplay: null // Called when text should be displayed (synchronized with audio)
	};

	this.stats = {
	state: 'idle',
	isRecording: false,
	totalChunksRecorded: 0,
	voiceDetectionRate: 0
	};
	}

	/**
	* Set event callbacks
	* @param {Object} callbacks - Callback functions
	*/
	setCallbacks(callbacks) {
	Object.assign(this.callbacks, callbacks);
	}

	/**
	* Initialize audio context and request microphone access
	* @returns {Promise<boolean>} Initialization success status
	*/
	async initialize() {
	try {
	// If AudioContext exists but is closed, recreate it
	if (this.audioContext && this.audioContext.state === 'closed') {
	console.log('Previous AudioContext was closed, creating new one');
	this.audioContext = null;
	this.audioWorkletModuleLoaded = false; // Reset module flag
	}

	// Create audio context if it doesn't exist
	if (!this.audioContext) {
	// Create audio context - let browser use optimal sample rate
	// Browser will handle resampling between 16kHz (mic) and 24kHz (playback)
	this.audioContext = new (window.AudioContext \|\| window.webkitAudioContext)();
	console.log('Created new AudioContext, sample rate:', this.audioContext.sampleRate);
	} else {
	console.log('Reusing existing AudioContext');
	}

	// Load AudioWorklet module for exact chunk sizing (only once)
	if (!this.audioWorkletModuleLoaded) {
	try {
	await this.audioContext.audioWorklet.addModule('./js/audio-processor.js');
	this.audioWorkletModuleLoaded = true;
	console.log('AudioWorklet processor loaded successfully');
	} catch (error) {
	console.error('Error loading AudioWorklet:', error);
	throw error;
	}
	} else {
	console.log('AudioWorklet module already loaded, skipping');
	}

	// Request microphone access (always request new stream)
	if (this.mediaStream) {
	// Stop existing tracks
	this.mediaStream.getTracks().forEach(track => track.stop());
	}

	this.mediaStream = await navigator.mediaDevices.getUserMedia({
	audio: {
	channelCount: 1,
	echoCancellation: true,
	noiseSuppression: true,
	autoGainControl: true
	}
	});

	console.log('Audio initialized successfully');
	this.stats.state = 'ready';
	return true;

	} catch (error) {
	console.error('Error initializing audio:', error);
	this.stats.state = 'error';
	return false;
	}
	}

	/**
	* Start audio recording
	* @param {string} sessionId - Session ID for recording
	* @returns {Promise<boolean>} Start recording success status
	*/
	async startRecording(sessionId) {
	if (!this.audioContext \|\| !this.mediaStream) {
	console.error('Audio not initialized');
	return false;
	}

	if (this.isRecording) {
	console.warn('Already recording');
	return false;
	}

	try {
	// Create audio source from microphone stream
	const source = this.audioContext.createMediaStreamSource(this.mediaStream);

	// Create analyser for waveform visualization
	this.analyser = this.audioContext.createAnalyser();
	this.analyser.fftSize = 2048;
	const bufferLength = this.analyser.frequencyBinCount;
	this.dataArray = new Uint8Array(bufferLength);

	// Use AudioWorklet for exact chunk sizing (600ms = 9600 samples at 16kHz)
	this.processor = new AudioWorkletNode(this.audioContext, 'audio-chunk-processor');

	// Listen for audio chunks from the worklet
	this.processor.port.onmessage = (event) => {
	if (!this.isRecording) return;

	if (event.data.type === 'audio-chunk') {
	// Create audio chunk
	const chunk = {
	data: event.data.data,
	timestampMs: Date.now(),
	sampleRate: CONFIG.micSampleRate,
	channels: 1
	};

	this.stats.totalChunksRecorded++;

	// Send chunk via callback
	if (this.callbacks.onAudioChunk) {
	this.callbacks.onAudioChunk(chunk);
	}
	}
	};

	// Connect nodes: source -> analyser -> processor -> destination
	source.connect(this.analyser);
	this.analyser.connect(this.processor);
	this.processor.connect(this.audioContext.destination);

	this.isRecording = true;
	this.stats.isRecording = true;
	this.stats.state = 'recording';

	console.log(`Recording started with AudioWorklet (${CONFIG.chunkDurationMs}ms chunks = ${CONFIG.micSampleRate * CONFIG.chunkDurationMs / 1000} samples)`);
	return true;

	} catch (error) {
	console.error('Error starting recording:', error);
	return false;
	}
	}

	/**
	* Stop audio recording
	* @returns {Array} Array of recorded audio chunks
	*/
	stopRecording() {
	if (!this.isRecording) {
	return [];
	}

	this.isRecording = false;
	this.stats.isRecording = false;
	this.stats.state = 'ready';

	if (this.processor) {
	this.processor.disconnect();
	this.processor = null;
	}

	console.log('Recording stopped');

	if (this.callbacks.onRecordingComplete) {
	this.callbacks.onRecordingComplete();
	}

	return [];
	}

	/**
	* Start streaming audio (real-time processing)
	* @returns {Promise<boolean>} Start streaming success status
	*/
	async startStreaming() {
	return this.startRecording();
	}

	/**
	* Stop streaming audio
	* @returns {boolean} Stop streaming success status
	*/
	stopStreaming() {
	this.stopRecording();
	return true;
	}

	/**
	* Queue audio chunk for playback (pull-based manual control)
	* Adds chunk to buffer but does NOT auto-start playback
	* Call startPlayback() manually when buffer reaches minimum size
	*
	* @param {ArrayBuffer} audioData - Audio data as Int16Array buffer
	* @param {number} sampleRate - Sample rate of the audio
	* @param {string} responseText - Text to display when playing
	* @param {boolean} finalize - Whether this is the last audio chunk
	* @returns {number} Current queue size after adding
	*/
	queueAudio(audioData, sampleRate = 16000, responseText = '', finalize = false) {
	// Check queue size limit
	if (this.audioQueue.length >= this.queueMaxSize) {
	console.warn(`Audio queue full (${this.audioQueue.length}/${this.queueMaxSize}), dropping oldest chunk`);
	this.audioQueue.shift(); // Remove oldest chunk
	}

	// Add chunk to queue with timestamp, text, and finalize flag
	this.audioQueue.push({
	audioData: audioData,
	sampleRate: sampleRate,
	responseText: responseText,
	finalize: finalize,
	timestamp: Date.now()
	});

	console.log(`Audio chunk queued (${this.audioQueue.length} chunks, min: ${this.minBufferSize}), text: "${responseText}", finalize: ${finalize}`);

	return this.audioQueue.length;
	}

	/**
	* Start playback from queue (manual control)
	* Call this when buffer reaches minimum size
	* @returns {boolean} True if playback started
	*/
	startPlayback() {
	if (!this.isProcessingQueue && this.audioQueue.length > 0) {
	console.log(`[Playback] Starting with ${this.audioQueue.length} chunks`);
	this.processQueue();
	return true;
	}
	return false;
	}

	/**
	* Process audio queue (FIFO) - Pull-based
	* Plays chunks continuously until queue is empty
	* Calls onChunkPlayed callback after each chunk for request management
	*/
	async processQueue() {
	if (this.isProcessingQueue) {
	return; // Already processing
	}

	this.isProcessingQueue = true;
	console.log('[Queue] Started processing');

	// Play all chunks until queue is empty
	while (this.audioQueue.length > 0) {
	const chunk = this.audioQueue.shift(); // FIFO: get first chunk

	// Check if chunk is too old (timeout)
	const age = Date.now() - chunk.timestamp;
	if (age > this.queueTimeoutMs) {
	console.warn(`[Queue] Dropping old audio chunk (age: ${age}ms)`);
	continue;
	}

	// Play this chunk and wait for it to finish
	// Pass the text callback to be triggered when audio actually starts
	console.log(`[Queue] Playing chunk (${this.audioQueue.length} remaining), text: "${chunk.responseText \|\| ''}"`);
	await this.playAudio(chunk.audioData, chunk.sampleRate, chunk.responseText);

	// Notify that chunk finished playing (trigger request logic)
	if (this.callbacks.onChunkPlayed) {
	this.callbacks.onChunkPlayed(this.audioQueue.length);
	}
	}

	this.isProcessingQueue = false;
	console.log('[Queue] Queue empty, stopped processing');
	}

	/**
	* Clear audio queue and reset queue state
	* Should be called when session ends
	*/
	clearQueue() {
	const hadChunks = this.audioQueue.length;
	this.audioQueue = [];
	this.isProcessingQueue = false;
	if (hadChunks > 0) {
	console.log(`[Queue] Cleared ${hadChunks} pending chunks`);
	}
	}

	/**
	* Get current queue size (for buffer monitoring)
	* @returns {number} Number of chunks in queue
	*/
	getQueueSize() {
	return this.audioQueue.length;
	}

	/**
	* Check if should request more chunks (buffer below minimum)
	* @returns {boolean} True if should request more chunks
	*/
	shouldRequestMore() {
	return this.audioQueue.length < this.minBufferSize;
	}

	/**
	* Check if buffer is ready to start playback
	* Special case: If there's only 1 chunk and it's marked as finalize, ready to play
	* Otherwise: Needs at least minBufferSize chunks
	* @returns {boolean} True if buffer is ready for playback
	*/
	isBufferReady() {
	// Special case: single finalized chunk can be played immediately
	if (this.audioQueue.length === 1 && this.audioQueue[0].finalize) {
	console.log('[Buffer] Ready: Single finalized chunk detected');
	return true;
	}
	// Normal case: need minimum buffer size
	return this.audioQueue.length >= this.minBufferSize;
	}

	/**
	* Play audio data (direct playback)
	* @param {ArrayBuffer} audioData - Audio data to play
	* @param {number} sampleRate - Sample rate of audio
	* @param {string} responseText - Optional text to display when audio starts
	* @returns {Promise<boolean>} Play success status
	*/
	async playAudio(audioData, sampleRate = 16000, responseText = '') {
	if (!this.audioContext) {
	console.error('Audio context not initialized');
	return false;
	}

	try {
	console.log('Playing audio - received sample rate:', sampleRate, 'buffer size:', audioData.byteLength);

	// Convert Int16Array to Float32Array
	const int16Array = new Int16Array(audioData);
	console.log('Int16Array created - length:', int16Array.length, 'sample range:', Math.min(...int16Array), 'to', Math.max(...int16Array));
	const float32Array = new Float32Array(int16Array.length);

	for (let i = 0; i < int16Array.length; i++) {
	float32Array[i] = int16Array[i] / (int16Array[i] < 0 ? 0x8000 : 0x7FFF);
	}

	console.log('Converted to float32, samples:', float32Array.length);

	// Create audio buffer with the RECEIVED sample rate (not context sample rate)
	// The browser will automatically resample if needed
	const audioBuffer = this.audioContext.createBuffer(1, float32Array.length, sampleRate);
	audioBuffer.getChannelData(0).set(float32Array);

	console.log('Audio buffer created - duration:', audioBuffer.duration, 's');

	// Create playback analyser if not exists
	if (!this.playbackAnalyser) {
	this.playbackAnalyser = this.audioContext.createAnalyser();
	this.playbackAnalyser.fftSize = 2048;
	this.playbackDataArray = new Uint8Array(this.playbackAnalyser.frequencyBinCount);
	}

	// Create buffer source
	const source = this.audioContext.createBufferSource();
	source.buffer = audioBuffer;

	// Create gain node for volume control
	const gainNode = this.audioContext.createGain();
	gainNode.gain.value = this.volume;

	// Connect: source -> gain -> analyser -> destination
	source.connect(gainNode);
	gainNode.connect(this.playbackAnalyser);
	this.playbackAnalyser.connect(this.audioContext.destination);

	// Play
	source.start(0);
	this.isPlaying = true;

	// Display text with delay to compensate for audio system latency
	// Web Audio API doesn't provide "actual start" event, so we estimate the delay
	if (responseText && this.callbacks.onTextDisplay) {
	// Adjusted to 250ms to better match actual audio playback start
	setTimeout(() => {
	this.callbacks.onTextDisplay(responseText);
	}, 250);
	}

	return new Promise((resolve) => {
	source.onended = () => {
	this.isPlaying = false;
	console.log('Audio playback finished');
	resolve(true);
	};
	});

	} catch (error) {
	console.error('Error playing audio:', error);
	console.error('Error details:', error.message);
	return false;
	}
	}

	/**
	* Stop current audio playback
	* @returns {boolean} Stop playback success status
	*/
	stopPlayback() {
	this.isPlaying = false;
	return true;
	}

	/**
	* Set playback volume
	* @param {number} volume - Volume level (0.0 to 1.0)
	*/
	setPlaybackVolume(volume) {
	this.volume = Math.max(0, Math.min(1, volume));
	}

	/**
	* Create audio visualizer
	* @param {HTMLCanvasElement} canvas - Canvas element for visualization
	*/
	createVisualizer(canvas) {
	// Simple placeholder - can be expanded later
	console.log('Visualizer created for canvas:', canvas);
	}

	/**
	* Get waveform data for visualization (microphone input)
	* @returns {Uint8Array\|null} Time domain data for waveform
	*/
	getWaveformData() {
	if (!this.analyser \|\| !this.dataArray) {
	return null;
	}
	this.analyser.getByteTimeDomainData(this.dataArray);
	return this.dataArray;
	}

	/**
	* Get playback waveform data for visualization (agent audio)
	* @returns {Uint8Array\|null} Time domain data for playback waveform
	*/
	getPlaybackWaveformData() {
	if (!this.playbackAnalyser \|\| !this.playbackDataArray) {
	return null;
	}
	this.playbackAnalyser.getByteTimeDomainData(this.playbackDataArray);
	return this.playbackDataArray;
	}

	/**
	* Get audio manager statistics
	* @returns {Object} Statistics object
	*/
	getStats() {
	return {
	state: this.stats.state,
	isRecording: this.stats.isRecording,
	totalChunksRecorded: this.stats.totalChunksRecorded,
	voiceDetectionRate: this.stats.voiceDetectionRate,
	sampleRate: this.audioContext?.sampleRate \|\| 0,
	channels: this.channels
	};
	}

	/**
	* Clean up audio resources
	*/
	cleanup() {
	console.log('Starting audio cleanup...');

	this.stopRecording();

	if (this.processor) {
	try {
	this.processor.disconnect();
	this.processor.port.onmessage = null; // Clear message handler
	} catch (e) {
	console.warn('Error disconnecting processor:', e);
	}
	this.processor = null;
	}

	// Disconnect and cleanup playback analyser
	if (this.playbackAnalyser) {
	try {
	this.playbackAnalyser.disconnect();
	} catch (e) {
	console.warn('Error disconnecting playback analyser:', e);
	}
	this.playbackAnalyser = null;
	}
	this.playbackDataArray = null;

	// Disconnect and cleanup recording analyser
	if (this.analyser) {
	try {
	this.analyser.disconnect();
	} catch (e) {
	console.warn('Error disconnecting analyser:', e);
	}
	this.analyser = null;
	}
	this.dataArray = null;

	if (this.mediaStream) {
	this.mediaStream.getTracks().forEach(track => {
	track.stop();
	console.log('Stopped media track:', track.kind);
	});
	this.mediaStream = null;
	}

	if (this.audioContext && this.audioContext.state !== 'closed') {
	this.audioContext.close()
	.then(() => console.log('AudioContext closed successfully'))
	.catch(e => console.warn('Error closing AudioContext:', e));
	this.audioContext = null;
	this.audioWorkletModuleLoaded = false; // Reset flag when context is closed
	}

	this.isRecording = false;
	this.isPlaying = false;
	this.stats.state = 'idle';

	console.log('Audio resources cleaned up');
	}
	}

	/**
	* Audio Chunk class for structured audio data
	*/
	export class AudioChunk {
	/**
	* Create audio chunk
	* @param {Object} params - Audio chunk parameters
	* @param {ArrayBuffer} params.data - Audio data
	* @param {number} params.timestampMs - Timestamp in milliseconds
	* @param {number} params.sampleRate - Sample rate
	* @param {number} params.channels - Number of channels
	* @param {number} params.durationMs - Duration in milliseconds
	* @param {boolean} params.isVoice - Whether contains voice
	* @param {string} params.chunkId - Unique chunk ID
	*/
	constructor({ data, timestampMs, sampleRate = 16000, channels = 1, durationMs, isVoice = false, chunkId }) {
	this.data = data;
	this.timestampMs = timestampMs;
	this.sampleRate = sampleRate;
	this.channels = channels;
	this.durationMs = durationMs;
	this.isVoice = isVoice;
	this.chunkId = chunkId;
	}
	}