/** * Avatar Stream SDK * * SDK para streaming sincronizado de áudio e vídeo em tempo real. * Recebe frames JPEG e chunks de áudio PCM via WebSocket binário. * * @example * ```typescript * const avatar = new AvatarStreamSDK({ * wsUrl: 'ws://localhost:8080/ws', * canvas: document.getElementById('canvas') as HTMLCanvasElement, * onStatusChange: (status) => console.log('Status:', status), * onMetricsUpdate: (metrics) => console.log('Metrics:', metrics), * }); * * await avatar.connect(); * await avatar.generate('Hello world!', 'tara'); * ``` */ // ============================================================================ // Types // ============================================================================ export interface AvatarStreamConfig { /** WebSocket URL do servidor */ wsUrl: string; /** Canvas onde os frames serão renderizados */ canvas: HTMLCanvasElement; /** FPS alvo para renderização (default: 25) */ targetFps?: number; /** Sample rate do áudio (default: 24000) */ audioSampleRate?: number; /** Mínimo de frames antes de iniciar playback (default: 10) */ minFramesBuffer?: number; /** Mínimo de chunks de áudio antes de iniciar playback (default: 2) */ minAudioChunks?: number; /** Callback quando o status muda */ onStatusChange?: (status: StreamStatus) => void; /** Callback quando as métricas são atualizadas */ onMetricsUpdate?: (metrics: StreamMetrics) => void; /** Callback quando ocorre um erro */ onError?: (error: Error) => void; /** Callback quando o stream termina */ onComplete?: (summary: StreamSummary) => void; /** Callback quando o primeiro frame é recebido */ onFirstFrame?: (latencyMs: number) => void; /** Callback quando o playback inicia */ onPlaybackStart?: (latencyMs: number) => void; } export type StreamStatus = | 'disconnected' | 'connecting' | 'connected' | 'generating' | 'buffering' | 'playing' | 'completed' | 'error'; export interface StreamMetrics { /** Frames recebidos do servidor */ framesReceived: number; /** Frames renderizados no canvas */ framesRendered: number; /** Frames no buffer aguardando renderização */ bufferSize: number; /** Bytes totais recebidos */ bytesReceived: number; /** Duração do vídeo em segundos (frames / fps) */ videoDuration: number; /** Duração do áudio em segundos (samples / sampleRate) */ audioDuration: number; /** Diferença entre áudio e vídeo (audioDuration - videoDuration) */ syncDiff: number; /** FPS atual da renderização */ currentFps: number; /** Bandwidth em KB/s */ bandwidthKBps: number; /** Chunks de áudio recebidos */ audioChunksReceived: number; /** Latência até o primeiro frame (ms) */ firstFrameLatencyMs: number | null; /** Latência até o início do playback (ms) */ playbackLatencyMs: number | null; } export interface StreamSummary { /** Total de frames renderizados */ totalFrames: number; /** Duração total em segundos */ durationSeconds: number; /** Bytes totais transferidos */ totalBytes: number; /** Duração do vídeo */ videoDuration: number; /** Duração do áudio */ audioDuration: number; /** Diferença de sincronização final */ finalSyncDiff: number; } interface FrameData { url: string; index: number; } // ============================================================================ // Constants // ============================================================================ /** Tipos de mensagem binária (devem corresponder ao servidor) */ const MSG_FRAME = 0x01; const MSG_AUDIO = 0x02; const MSG_AUDIO_CHUNK = 0x03; // ============================================================================ // SDK Class // ============================================================================ export class AvatarStreamSDK { // Configuration private config: Required; private ctx: CanvasRenderingContext2D; // WebSocket private ws: WebSocket | null = null; // State private status: StreamStatus = 'disconnected'; private startTime: number = 0; private streamDone: boolean = false; private playbackStarted: boolean = false; // Frame management private allFrames: FrameData[] = []; private frameQueue: FrameData[] = []; private renderedFrames: number = 0; private renderInterval: number | null = null; private lastRenderTime: number = 0; private totalBytes: number = 0; // Audio management private audioContext: AudioContext | null = null; private audioChunks: Uint8Array[] = []; private audioChunksComplete: boolean = false; private nextAudioTime: number = 0; private audioScheduledChunks: number = 0; private totalAudioSamples: number = 0; // Metrics private firstFrameLatencyMs: number | null = null; private playbackLatencyMs: number | null = null; // Heartbeat private heartbeatInterval: number | null = null; constructor(config: AvatarStreamConfig) { // Validate required config if (!config.wsUrl) throw new Error('wsUrl is required'); if (!config.canvas) throw new Error('canvas is required'); const ctx = config.canvas.getContext('2d'); if (!ctx) throw new Error('Could not get 2D context from canvas'); this.ctx = ctx; // Apply defaults this.config = { wsUrl: config.wsUrl, canvas: config.canvas, targetFps: config.targetFps ?? 25, audioSampleRate: config.audioSampleRate ?? 24000, minFramesBuffer: config.minFramesBuffer ?? 10, minAudioChunks: config.minAudioChunks ?? 2, onStatusChange: config.onStatusChange ?? (() => {}), onMetricsUpdate: config.onMetricsUpdate ?? (() => {}), onError: config.onError ?? (() => {}), onComplete: config.onComplete ?? (() => {}), onFirstFrame: config.onFirstFrame ?? (() => {}), onPlaybackStart: config.onPlaybackStart ?? (() => {}), }; } // ========================================================================== // Public API // ========================================================================== /** * Conecta ao servidor WebSocket */ async connect(): Promise { if (this.ws?.readyState === WebSocket.OPEN) { return; } return new Promise((resolve, reject) => { this.setStatus('connecting'); this.ws = new WebSocket(this.config.wsUrl); this.ws.binaryType = 'arraybuffer'; this.ws.onopen = () => { this.setStatus('connected'); this.startHeartbeat(); resolve(); }; this.ws.onmessage = (event) => { if (event.data instanceof ArrayBuffer) { this.handleBinaryMessage(event.data); } else { try { const data = JSON.parse(event.data); this.handleJsonMessage(data); } catch (e) { this.config.onError(new Error(`Invalid JSON: ${e}`)); } } }; this.ws.onclose = () => { this.setStatus('disconnected'); this.stopHeartbeat(); }; this.ws.onerror = () => { const error = new Error('WebSocket connection failed'); this.config.onError(error); reject(error); }; }); } /** * Desconecta do servidor */ disconnect(): void { this.stop(); this.stopHeartbeat(); if (this.ws) { this.ws.close(); this.ws = null; } this.setStatus('disconnected'); } /** * Gera um avatar falando o texto especificado */ async generate(text: string, voice: string = 'tara'): Promise { if (!this.ws || this.ws.readyState !== WebSocket.OPEN) { throw new Error('WebSocket not connected. Call connect() first.'); } if (!text.trim()) { throw new Error('Text cannot be empty'); } // Reset state this.resetState(); this.startTime = Date.now(); this.setStatus('generating'); // Send request this.ws.send(JSON.stringify({ action: 'generate', text: text.trim(), voice, })); } /** * Para a geração atual */ stop(): void { if (this.ws?.readyState === WebSocket.OPEN) { this.ws.send(JSON.stringify({ action: 'stop' })); } this.cleanup(); this.setStatus('connected'); } /** * Retorna o status atual */ getStatus(): StreamStatus { return this.status; } /** * Retorna as métricas atuais */ getMetrics(): StreamMetrics { const elapsed = (Date.now() - this.startTime) / 1000; const videoDuration = this.allFrames.length / this.config.targetFps; const audioDuration = this.totalAudioSamples / this.config.audioSampleRate; return { framesReceived: this.allFrames.length, framesRendered: this.renderedFrames, bufferSize: this.frameQueue.length, bytesReceived: this.totalBytes, videoDuration, audioDuration, syncDiff: audioDuration - videoDuration, currentFps: this.lastRenderTime > 0 ? 1000 / (performance.now() - this.lastRenderTime) : 0, bandwidthKBps: elapsed > 0 ? (this.totalBytes / 1024 / elapsed) : 0, audioChunksReceived: this.audioChunks.length, firstFrameLatencyMs: this.firstFrameLatencyMs, playbackLatencyMs: this.playbackLatencyMs, }; } /** * Verifica se está conectado */ isConnected(): boolean { return this.ws?.readyState === WebSocket.OPEN; } /** * Verifica se está reproduzindo */ isPlaying(): boolean { return this.playbackStarted && !this.streamDone; } // ========================================================================== // Private Methods - Message Handling // ========================================================================== private handleBinaryMessage(buffer: ArrayBuffer): void { const view = new DataView(buffer); const msgType = view.getUint8(0); this.totalBytes += buffer.byteLength; if (msgType === MSG_FRAME) { const frameIndex = view.getUint32(1, true); const dataSize = view.getUint32(5, true); const frameData = new Uint8Array(buffer, 9, dataSize); this.handleFrame(frameData, frameIndex); } else if (msgType === MSG_AUDIO) { const sampleRate = view.getUint32(1, true); const dataSize = view.getUint32(5, true); const audioData = new Uint8Array(buffer, 9, dataSize); this.handleAudioComplete(audioData, sampleRate); } else if (msgType === MSG_AUDIO_CHUNK) { const chunkIndex = view.getUint32(1, true); const isLast = view.getUint8(5) === 1; const dataSize = view.getUint32(6, true); const chunkData = new Uint8Array(buffer, 10, dataSize); this.handleAudioChunk(chunkData, chunkIndex, isLast); } this.emitMetrics(); } private handleJsonMessage(data: any): void { switch (data.type) { case 'first_frame': // Server-side first frame latency break; case 'done': this.streamDone = true; this.tryStartStreamingPlayback(); break; case 'status': // Status message from server break; case 'error': this.config.onError(new Error(data.message)); this.cleanup(); this.setStatus('error'); break; } } // ========================================================================== // Private Methods - Frame Handling // ========================================================================== private handleFrame(frameData: Uint8Array, frameIndex: number): void { const blob = new Blob([frameData], { type: 'image/jpeg' }); const url = URL.createObjectURL(blob); this.allFrames.push({ url, index: frameIndex }); if (this.playbackStarted) { this.frameQueue.push({ url, index: frameIndex }); } if (this.allFrames.length === 1) { this.firstFrameLatencyMs = Date.now() - this.startTime; this.config.onFirstFrame(this.firstFrameLatencyMs); this.setStatus('buffering'); } this.tryStartStreamingPlayback(); } private startRendering(): void { if (this.renderInterval !== null) return; this.lastRenderTime = performance.now(); const frameInterval = 1000 / this.config.targetFps; this.renderInterval = window.setInterval(() => { this.renderNextFrame(); }, frameInterval); } private renderNextFrame(): void { if (this.frameQueue.length === 0) { if (this.streamDone) { this.finishPlayback(); } return; } const frame = this.frameQueue.shift()!; const img = new Image(); img.onload = () => { if (this.config.canvas.width !== img.width || this.config.canvas.height !== img.height) { this.config.canvas.width = img.width; this.config.canvas.height = img.height; } this.ctx.drawImage(img, 0, 0); URL.revokeObjectURL(frame.url); this.renderedFrames++; this.emitMetrics(); }; img.src = frame.url; this.lastRenderTime = performance.now(); } // ========================================================================== // Private Methods - Audio Handling // ========================================================================== private initAudioContext(): void { if (!this.audioContext) { this.audioContext = new (window.AudioContext || (window as any).webkitAudioContext)({ sampleRate: this.config.audioSampleRate, }); } this.nextAudioTime = this.audioContext.currentTime + 0.05; this.audioScheduledChunks = 0; } private handleAudioChunk(chunkData: Uint8Array, chunkIndex: number, isLast: boolean): void { if (this.audioChunks.length === 0 && chunkData.length > 0) { this.initAudioContext(); } if (chunkData.length > 0) { this.audioChunks.push(chunkData); this.scheduleAudioChunk(chunkData); } if (isLast) { this.audioChunksComplete = true; } this.tryStartStreamingPlayback(); } private scheduleAudioChunk(chunkData: Uint8Array): void { if (!this.audioContext) return; try { // Convert PCM int16 to float32 const alignedBuffer = new ArrayBuffer(chunkData.length); new Uint8Array(alignedBuffer).set(chunkData); const samples = new Int16Array(alignedBuffer); const floatSamples = new Float32Array(samples.length); for (let i = 0; i < samples.length; i++) { floatSamples[i] = samples[i] / 32768; } this.totalAudioSamples += samples.length; // Create audio buffer const chunkBuffer = this.audioContext.createBuffer( 1, floatSamples.length, this.config.audioSampleRate ); chunkBuffer.getChannelData(0).set(floatSamples); // Create and schedule source const source = this.audioContext.createBufferSource(); source.buffer = chunkBuffer; source.connect(this.audioContext.destination); if (this.nextAudioTime < this.audioContext.currentTime) { this.nextAudioTime = this.audioContext.currentTime + 0.01; } source.start(this.nextAudioTime); const chunkDuration = floatSamples.length / this.config.audioSampleRate; this.nextAudioTime += chunkDuration; this.audioScheduledChunks++; } catch (e) { console.error('Error scheduling audio chunk:', e); } } private handleAudioComplete(audioData: Uint8Array, sampleRate: number): void { // Fallback for complete audio mode try { if (!this.audioContext) { this.audioContext = new (window.AudioContext || (window as any).webkitAudioContext)({ sampleRate, }); } const alignedBuffer = new ArrayBuffer(audioData.length); new Uint8Array(alignedBuffer).set(audioData); const samples = new Int16Array(alignedBuffer); const floatSamples = new Float32Array(samples.length); for (let i = 0; i < samples.length; i++) { floatSamples[i] = samples[i] / 32768; } this.totalAudioSamples = samples.length; this.audioChunksComplete = true; const audioBuffer = this.audioContext.createBuffer(1, floatSamples.length, sampleRate); audioBuffer.getChannelData(0).set(floatSamples); const source = this.audioContext.createBufferSource(); source.buffer = audioBuffer; source.connect(this.audioContext.destination); source.start(); this.tryStartStreamingPlayback(); } catch (e) { this.config.onError(new Error(`Audio processing error: ${e}`)); } } // ========================================================================== // Private Methods - Playback Control // ========================================================================== private tryStartStreamingPlayback(): void { if (this.playbackStarted) return; if (this.allFrames.length < this.config.minFramesBuffer) { return; } if (this.audioScheduledChunks < this.config.minAudioChunks) { return; } // Ready to start this.playbackStarted = true; this.playbackLatencyMs = Date.now() - this.startTime; this.config.onPlaybackStart(this.playbackLatencyMs); this.frameQueue = [...this.allFrames]; this.startRendering(); this.setStatus('playing'); } private finishPlayback(): void { if (this.renderInterval !== null) { clearInterval(this.renderInterval); this.renderInterval = null; } const elapsed = (Date.now() - this.startTime) / 1000; const videoDuration = this.allFrames.length / this.config.targetFps; const audioDuration = this.totalAudioSamples / this.config.audioSampleRate; const summary: StreamSummary = { totalFrames: this.renderedFrames, durationSeconds: elapsed, totalBytes: this.totalBytes, videoDuration, audioDuration, finalSyncDiff: audioDuration - videoDuration, }; this.config.onComplete(summary); this.setStatus('completed'); } // ========================================================================== // Private Methods - Utilities // ========================================================================== private setStatus(status: StreamStatus): void { this.status = status; this.config.onStatusChange(status); } private emitMetrics(): void { this.config.onMetricsUpdate(this.getMetrics()); } private resetState(): void { this.cleanup(); this.allFrames = []; this.frameQueue = []; this.renderedFrames = 0; this.totalBytes = 0; this.startTime = 0; this.streamDone = false; this.playbackStarted = false; this.audioChunks = []; this.audioChunksComplete = false; this.nextAudioTime = 0; this.audioScheduledChunks = 0; this.totalAudioSamples = 0; this.firstFrameLatencyMs = null; this.playbackLatencyMs = null; } private cleanup(): void { if (this.renderInterval !== null) { clearInterval(this.renderInterval); this.renderInterval = null; } // Release blob URLs this.frameQueue.forEach(f => URL.revokeObjectURL(f.url)); this.allFrames.forEach(f => URL.revokeObjectURL(f.url)); } private startHeartbeat(): void { this.heartbeatInterval = window.setInterval(() => { if (this.ws?.readyState === WebSocket.OPEN) { this.ws.send(JSON.stringify({ action: 'ping' })); } }, 30000); } private stopHeartbeat(): void { if (this.heartbeatInterval !== null) { clearInterval(this.heartbeatInterval); this.heartbeatInterval = null; } } } // Export default export default AvatarStreamSDK;