ttsfm / static /js /websocket-tts.js
NitinBot001's picture
Upload 20 files
bf90fc9 verified
/**
* WebSocket TTS Streaming Client
*
* Because apparently HTTP requests are so 2023.
* Now we need real-time streaming for everything.
*/
class WebSocketTTSClient {
constructor(options = {}) {
this.socketUrl = options.socketUrl || window.location.origin;
this.socket = null;
this.activeRequests = new Map();
this.reconnectAttempts = 0;
this.maxReconnectAttempts = options.maxReconnectAttempts || 5;
this.reconnectDelay = options.reconnectDelay || 1000;
this.debug = options.debug || false;
// Audio context for seamless playback
this.audioContext = null;
this.audioQueue = new Map(); // request_id -> audio chunks
// Event handlers
this.onConnect = options.onConnect || (() => {});
this.onDisconnect = options.onDisconnect || (() => {});
this.onError = options.onError || ((error) => console.error('WebSocket error:', error));
// Initialize
this.connect();
}
connect() {
if (this.socket && this.socket.connected) {
this.log('Already connected');
return;
}
this.log('Connecting to WebSocket server...');
// Initialize Socket.IO connection
this.socket = io(this.socketUrl, {
transports: ['websocket', 'polling'],
reconnection: true,
reconnectionAttempts: this.maxReconnectAttempts,
reconnectionDelay: this.reconnectDelay
});
// Set up event handlers
this.setupEventHandlers();
}
setupEventHandlers() {
// Connection events
this.socket.on('connect', () => {
this.log('Connected to WebSocket server');
this.reconnectAttempts = 0;
this.onConnect();
});
this.socket.on('disconnect', (reason) => {
this.log('Disconnected from WebSocket server:', reason);
this.onDisconnect(reason);
});
this.socket.on('connect_error', (error) => {
this.log('Connection error:', error);
this.reconnectAttempts++;
this.onError({
type: 'connection_error',
message: error.message,
attempts: this.reconnectAttempts
});
});
// TTS streaming events
this.socket.on('connected', (data) => {
this.log('Session established:', data.session_id);
});
this.socket.on('stream_started', (data) => {
this.log('Stream started:', data.request_id);
const request = this.activeRequests.get(data.request_id);
if (request && request.onStart) {
request.onStart(data);
}
});
this.socket.on('audio_chunk', (data) => {
this.handleAudioChunk(data);
});
this.socket.on('stream_progress', (data) => {
this.handleProgress(data);
});
this.socket.on('stream_complete', (data) => {
this.handleStreamComplete(data);
});
this.socket.on('stream_error', (data) => {
this.handleStreamError(data);
});
}
/**
* Generate speech with real-time streaming
*/
generateSpeech(text, options = {}) {
return new Promise((resolve, reject) => {
if (!this.socket || !this.socket.connected) {
reject(new Error('WebSocket not connected'));
return;
}
const requestId = this.generateRequestId();
const audioChunks = [];
// Store request info
this.activeRequests.set(requestId, {
resolve,
reject,
audioChunks,
options,
startTime: Date.now(),
onStart: options.onStart,
onProgress: options.onProgress,
onChunk: options.onChunk,
onComplete: options.onComplete,
onError: options.onError
});
// Initialize audio queue for this request
this.audioQueue.set(requestId, []);
// Emit generation request
this.socket.emit('generate_stream', {
request_id: requestId,
text: text,
voice: options.voice || 'alloy',
format: options.format || 'mp3',
chunk_size: options.chunkSize || 1024
});
this.log('Requested speech generation:', requestId);
});
}
handleAudioChunk(data) {
const request = this.activeRequests.get(data.request_id);
if (!request) {
this.log('Received chunk for unknown request:', data.request_id);
return;
}
// Convert hex string back to binary
const audioData = this.hexToArrayBuffer(data.audio_data);
// Store chunk
request.audioChunks.push({
index: data.chunk_index,
data: audioData,
duration: data.duration,
format: data.format
});
// Add to audio queue for streaming playback
const queue = this.audioQueue.get(data.request_id);
if (queue) {
queue.push(audioData);
}
// Call chunk handler if provided
if (request.onChunk) {
request.onChunk({
chunkIndex: data.chunk_index,
totalChunks: data.total_chunks,
audioData: audioData,
duration: data.duration,
text: data.chunk_text
});
}
this.log(`Received chunk ${data.chunk_index + 1}/${data.total_chunks} for request ${data.request_id}`);
}
handleProgress(data) {
const request = this.activeRequests.get(data.request_id);
if (request && request.onProgress) {
request.onProgress({
progress: data.progress,
chunksCompleted: data.chunks_completed,
totalChunks: data.total_chunks,
status: data.status
});
}
}
handleStreamComplete(data) {
const request = this.activeRequests.get(data.request_id);
if (!request) {
this.log('Completion for unknown request:', data.request_id);
return;
}
// Sort chunks by index
request.audioChunks.sort((a, b) => a.index - b.index);
// Combine all audio chunks
const combinedAudio = this.combineAudioChunks(request.audioChunks);
const result = {
requestId: data.request_id,
audioData: combinedAudio,
chunks: request.audioChunks,
duration: request.audioChunks.reduce((sum, chunk) => sum + chunk.duration, 0),
generationTime: Date.now() - request.startTime,
format: request.audioChunks[0]?.format || 'mp3'
};
// Call complete handler
if (request.onComplete) {
request.onComplete(result);
}
// Resolve promise
request.resolve(result);
// Cleanup
this.activeRequests.delete(data.request_id);
this.audioQueue.delete(data.request_id);
this.log('Stream completed:', data.request_id);
}
handleStreamError(data) {
const request = this.activeRequests.get(data.request_id);
if (!request) {
this.log('Error for unknown request:', data.request_id);
return;
}
const error = new Error(data.error);
error.requestId = data.request_id;
error.timestamp = data.timestamp;
// Call error handler
if (request.onError) {
request.onError(error);
}
// Reject promise
request.reject(error);
// Cleanup
this.activeRequests.delete(data.request_id);
this.audioQueue.delete(data.request_id);
this.log('Stream error:', data.request_id, data.error);
}
/**
* Cancel an active stream
*/
cancelStream(requestId) {
if (!this.socket || !this.socket.connected) {
throw new Error('WebSocket not connected');
}
this.socket.emit('cancel_stream', { request_id: requestId });
// Clean up local state
const request = this.activeRequests.get(requestId);
if (request) {
request.reject(new Error('Stream cancelled by user'));
this.activeRequests.delete(requestId);
this.audioQueue.delete(requestId);
}
}
/**
* Combine audio chunks into a single buffer
*/
combineAudioChunks(chunks) {
if (chunks.length === 0) return new ArrayBuffer(0);
// Calculate total size
const totalSize = chunks.reduce((sum, chunk) => sum + chunk.data.byteLength, 0);
// Create combined buffer
const combined = new ArrayBuffer(totalSize);
const view = new Uint8Array(combined);
let offset = 0;
for (const chunk of chunks) {
view.set(new Uint8Array(chunk.data), offset);
offset += chunk.data.byteLength;
}
return combined;
}
/**
* Play audio directly (experimental streaming playback)
*/
async playAudioStream(requestId) {
if (!this.audioContext) {
this.audioContext = new (window.AudioContext || window.webkitAudioContext)();
}
const queue = this.audioQueue.get(requestId);
if (!queue) {
throw new Error('No audio queue found for request');
}
// This is a simplified version - real implementation would need
// proper audio decoding and buffering for seamless playback
this.log('Streaming audio playback not fully implemented yet');
}
/**
* Utility functions
*/
hexToArrayBuffer(hex) {
const bytes = new Uint8Array(hex.length / 2);
for (let i = 0; i < hex.length; i += 2) {
bytes[i / 2] = parseInt(hex.substr(i, 2), 16);
}
return bytes.buffer;
}
generateRequestId() {
return `req_${Date.now()}_${Math.random().toString(36).substr(2, 9)}`;
}
log(...args) {
if (this.debug) {
console.log('[WebSocketTTS]', ...args);
}
}
/**
* Get connection status
*/
isConnected() {
return this.socket && this.socket.connected;
}
/**
* Disconnect from server
*/
disconnect() {
if (this.socket) {
this.socket.disconnect();
this.socket = null;
}
// Clear all active requests
for (const [requestId, request] of this.activeRequests) {
request.reject(new Error('Client disconnected'));
}
this.activeRequests.clear();
this.audioQueue.clear();
}
}
// Export for use
window.WebSocketTTSClient = WebSocketTTSClient;