Spaces:
Runtime error
Runtime error
| /** | |
| * WebSocket TTS Streaming Client | |
| * | |
| * Because apparently HTTP requests are so 2023. | |
| * Now we need real-time streaming for everything. | |
| */ | |
| class WebSocketTTSClient { | |
| constructor(options = {}) { | |
| this.socketUrl = options.socketUrl || window.location.origin; | |
| this.socket = null; | |
| this.activeRequests = new Map(); | |
| this.reconnectAttempts = 0; | |
| this.maxReconnectAttempts = options.maxReconnectAttempts || 5; | |
| this.reconnectDelay = options.reconnectDelay || 1000; | |
| this.debug = options.debug || false; | |
| // Audio context for seamless playback | |
| this.audioContext = null; | |
| this.audioQueue = new Map(); // request_id -> audio chunks | |
| // Event handlers | |
| this.onConnect = options.onConnect || (() => {}); | |
| this.onDisconnect = options.onDisconnect || (() => {}); | |
| this.onError = options.onError || ((error) => console.error('WebSocket error:', error)); | |
| // Initialize | |
| this.connect(); | |
| } | |
| connect() { | |
| if (this.socket && this.socket.connected) { | |
| this.log('Already connected'); | |
| return; | |
| } | |
| this.log('Connecting to WebSocket server...'); | |
| // Initialize Socket.IO connection | |
| this.socket = io(this.socketUrl, { | |
| transports: ['websocket', 'polling'], | |
| reconnection: true, | |
| reconnectionAttempts: this.maxReconnectAttempts, | |
| reconnectionDelay: this.reconnectDelay | |
| }); | |
| // Set up event handlers | |
| this.setupEventHandlers(); | |
| } | |
| setupEventHandlers() { | |
| // Connection events | |
| this.socket.on('connect', () => { | |
| this.log('Connected to WebSocket server'); | |
| this.reconnectAttempts = 0; | |
| this.onConnect(); | |
| }); | |
| this.socket.on('disconnect', (reason) => { | |
| this.log('Disconnected from WebSocket server:', reason); | |
| this.onDisconnect(reason); | |
| }); | |
| this.socket.on('connect_error', (error) => { | |
| this.log('Connection error:', error); | |
| this.reconnectAttempts++; | |
| this.onError({ | |
| type: 'connection_error', | |
| message: error.message, | |
| attempts: this.reconnectAttempts | |
| }); | |
| }); | |
| // TTS streaming events | |
| this.socket.on('connected', (data) => { | |
| this.log('Session established:', data.session_id); | |
| }); | |
| this.socket.on('stream_started', (data) => { | |
| this.log('Stream started:', data.request_id); | |
| const request = this.activeRequests.get(data.request_id); | |
| if (request && request.onStart) { | |
| request.onStart(data); | |
| } | |
| }); | |
| this.socket.on('audio_chunk', (data) => { | |
| this.handleAudioChunk(data); | |
| }); | |
| this.socket.on('stream_progress', (data) => { | |
| this.handleProgress(data); | |
| }); | |
| this.socket.on('stream_complete', (data) => { | |
| this.handleStreamComplete(data); | |
| }); | |
| this.socket.on('stream_error', (data) => { | |
| this.handleStreamError(data); | |
| }); | |
| } | |
| /** | |
| * Generate speech with real-time streaming | |
| */ | |
| generateSpeech(text, options = {}) { | |
| return new Promise((resolve, reject) => { | |
| if (!this.socket || !this.socket.connected) { | |
| reject(new Error('WebSocket not connected')); | |
| return; | |
| } | |
| const requestId = this.generateRequestId(); | |
| const audioChunks = []; | |
| // Store request info | |
| this.activeRequests.set(requestId, { | |
| resolve, | |
| reject, | |
| audioChunks, | |
| options, | |
| startTime: Date.now(), | |
| onStart: options.onStart, | |
| onProgress: options.onProgress, | |
| onChunk: options.onChunk, | |
| onComplete: options.onComplete, | |
| onError: options.onError | |
| }); | |
| // Initialize audio queue for this request | |
| this.audioQueue.set(requestId, []); | |
| // Emit generation request | |
| this.socket.emit('generate_stream', { | |
| request_id: requestId, | |
| text: text, | |
| voice: options.voice || 'alloy', | |
| format: options.format || 'mp3', | |
| chunk_size: options.chunkSize || 1024 | |
| }); | |
| this.log('Requested speech generation:', requestId); | |
| }); | |
| } | |
| handleAudioChunk(data) { | |
| const request = this.activeRequests.get(data.request_id); | |
| if (!request) { | |
| this.log('Received chunk for unknown request:', data.request_id); | |
| return; | |
| } | |
| // Convert hex string back to binary | |
| const audioData = this.hexToArrayBuffer(data.audio_data); | |
| // Store chunk | |
| request.audioChunks.push({ | |
| index: data.chunk_index, | |
| data: audioData, | |
| duration: data.duration, | |
| format: data.format | |
| }); | |
| // Add to audio queue for streaming playback | |
| const queue = this.audioQueue.get(data.request_id); | |
| if (queue) { | |
| queue.push(audioData); | |
| } | |
| // Call chunk handler if provided | |
| if (request.onChunk) { | |
| request.onChunk({ | |
| chunkIndex: data.chunk_index, | |
| totalChunks: data.total_chunks, | |
| audioData: audioData, | |
| duration: data.duration, | |
| text: data.chunk_text | |
| }); | |
| } | |
| this.log(`Received chunk ${data.chunk_index + 1}/${data.total_chunks} for request ${data.request_id}`); | |
| } | |
| handleProgress(data) { | |
| const request = this.activeRequests.get(data.request_id); | |
| if (request && request.onProgress) { | |
| request.onProgress({ | |
| progress: data.progress, | |
| chunksCompleted: data.chunks_completed, | |
| totalChunks: data.total_chunks, | |
| status: data.status | |
| }); | |
| } | |
| } | |
| handleStreamComplete(data) { | |
| const request = this.activeRequests.get(data.request_id); | |
| if (!request) { | |
| this.log('Completion for unknown request:', data.request_id); | |
| return; | |
| } | |
| // Sort chunks by index | |
| request.audioChunks.sort((a, b) => a.index - b.index); | |
| // Combine all audio chunks | |
| const combinedAudio = this.combineAudioChunks(request.audioChunks); | |
| const result = { | |
| requestId: data.request_id, | |
| audioData: combinedAudio, | |
| chunks: request.audioChunks, | |
| duration: request.audioChunks.reduce((sum, chunk) => sum + chunk.duration, 0), | |
| generationTime: Date.now() - request.startTime, | |
| format: request.audioChunks[0]?.format || 'mp3' | |
| }; | |
| // Call complete handler | |
| if (request.onComplete) { | |
| request.onComplete(result); | |
| } | |
| // Resolve promise | |
| request.resolve(result); | |
| // Cleanup | |
| this.activeRequests.delete(data.request_id); | |
| this.audioQueue.delete(data.request_id); | |
| this.log('Stream completed:', data.request_id); | |
| } | |
| handleStreamError(data) { | |
| const request = this.activeRequests.get(data.request_id); | |
| if (!request) { | |
| this.log('Error for unknown request:', data.request_id); | |
| return; | |
| } | |
| const error = new Error(data.error); | |
| error.requestId = data.request_id; | |
| error.timestamp = data.timestamp; | |
| // Call error handler | |
| if (request.onError) { | |
| request.onError(error); | |
| } | |
| // Reject promise | |
| request.reject(error); | |
| // Cleanup | |
| this.activeRequests.delete(data.request_id); | |
| this.audioQueue.delete(data.request_id); | |
| this.log('Stream error:', data.request_id, data.error); | |
| } | |
| /** | |
| * Cancel an active stream | |
| */ | |
| cancelStream(requestId) { | |
| if (!this.socket || !this.socket.connected) { | |
| throw new Error('WebSocket not connected'); | |
| } | |
| this.socket.emit('cancel_stream', { request_id: requestId }); | |
| // Clean up local state | |
| const request = this.activeRequests.get(requestId); | |
| if (request) { | |
| request.reject(new Error('Stream cancelled by user')); | |
| this.activeRequests.delete(requestId); | |
| this.audioQueue.delete(requestId); | |
| } | |
| } | |
| /** | |
| * Combine audio chunks into a single buffer | |
| */ | |
| combineAudioChunks(chunks) { | |
| if (chunks.length === 0) return new ArrayBuffer(0); | |
| // Calculate total size | |
| const totalSize = chunks.reduce((sum, chunk) => sum + chunk.data.byteLength, 0); | |
| // Create combined buffer | |
| const combined = new ArrayBuffer(totalSize); | |
| const view = new Uint8Array(combined); | |
| let offset = 0; | |
| for (const chunk of chunks) { | |
| view.set(new Uint8Array(chunk.data), offset); | |
| offset += chunk.data.byteLength; | |
| } | |
| return combined; | |
| } | |
| /** | |
| * Play audio directly (experimental streaming playback) | |
| */ | |
| async playAudioStream(requestId) { | |
| if (!this.audioContext) { | |
| this.audioContext = new (window.AudioContext || window.webkitAudioContext)(); | |
| } | |
| const queue = this.audioQueue.get(requestId); | |
| if (!queue) { | |
| throw new Error('No audio queue found for request'); | |
| } | |
| // This is a simplified version - real implementation would need | |
| // proper audio decoding and buffering for seamless playback | |
| this.log('Streaming audio playback not fully implemented yet'); | |
| } | |
| /** | |
| * Utility functions | |
| */ | |
| hexToArrayBuffer(hex) { | |
| const bytes = new Uint8Array(hex.length / 2); | |
| for (let i = 0; i < hex.length; i += 2) { | |
| bytes[i / 2] = parseInt(hex.substr(i, 2), 16); | |
| } | |
| return bytes.buffer; | |
| } | |
| generateRequestId() { | |
| return `req_${Date.now()}_${Math.random().toString(36).substr(2, 9)}`; | |
| } | |
| log(...args) { | |
| if (this.debug) { | |
| console.log('[WebSocketTTS]', ...args); | |
| } | |
| } | |
| /** | |
| * Get connection status | |
| */ | |
| isConnected() { | |
| return this.socket && this.socket.connected; | |
| } | |
| /** | |
| * Disconnect from server | |
| */ | |
| disconnect() { | |
| if (this.socket) { | |
| this.socket.disconnect(); | |
| this.socket = null; | |
| } | |
| // Clear all active requests | |
| for (const [requestId, request] of this.activeRequests) { | |
| request.reject(new Error('Client disconnected')); | |
| } | |
| this.activeRequests.clear(); | |
| this.audioQueue.clear(); | |
| } | |
| } | |
| // Export for use | |
| window.WebSocketTTSClient = WebSocketTTSClient; |