| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
|
|
| |
| |
| |
|
|
| export interface AvatarStreamConfig { |
| |
| wsUrl: string; |
|
|
| |
| canvas: HTMLCanvasElement; |
|
|
| |
| targetFps?: number; |
|
|
| |
| audioSampleRate?: number; |
|
|
| |
| minFramesBuffer?: number; |
|
|
| |
| minAudioChunks?: number; |
|
|
| |
| onStatusChange?: (status: StreamStatus) => void; |
|
|
| |
| onMetricsUpdate?: (metrics: StreamMetrics) => void; |
|
|
| |
| onError?: (error: Error) => void; |
|
|
| |
| onComplete?: (summary: StreamSummary) => void; |
|
|
| |
| onFirstFrame?: (latencyMs: number) => void; |
|
|
| |
| onPlaybackStart?: (latencyMs: number) => void; |
| } |
|
|
| export type StreamStatus = |
| | 'disconnected' |
| | 'connecting' |
| | 'connected' |
| | 'generating' |
| | 'buffering' |
| | 'playing' |
| | 'completed' |
| | 'error'; |
|
|
| export interface StreamMetrics { |
| |
| framesReceived: number; |
|
|
| |
| framesRendered: number; |
|
|
| |
| bufferSize: number; |
|
|
| |
| bytesReceived: number; |
|
|
| |
| videoDuration: number; |
|
|
| |
| audioDuration: number; |
|
|
| |
| syncDiff: number; |
|
|
| |
| currentFps: number; |
|
|
| |
| bandwidthKBps: number; |
|
|
| |
| audioChunksReceived: number; |
|
|
| |
| firstFrameLatencyMs: number | null; |
|
|
| |
| playbackLatencyMs: number | null; |
| } |
|
|
| export interface StreamSummary { |
| |
| totalFrames: number; |
|
|
| |
| durationSeconds: number; |
|
|
| |
| totalBytes: number; |
|
|
| |
| videoDuration: number; |
|
|
| |
| audioDuration: number; |
|
|
| |
| finalSyncDiff: number; |
| } |
|
|
| interface FrameData { |
| url: string; |
| index: number; |
| } |
|
|
| |
| |
| |
|
|
| |
| const MSG_FRAME = 0x01; |
| const MSG_AUDIO = 0x02; |
| const MSG_AUDIO_CHUNK = 0x03; |
|
|
| |
| |
| |
|
|
| export class AvatarStreamSDK { |
| |
| private config: Required<AvatarStreamConfig>; |
| private ctx: CanvasRenderingContext2D; |
|
|
| |
| private ws: WebSocket | null = null; |
|
|
| |
| private status: StreamStatus = 'disconnected'; |
| private startTime: number = 0; |
| private streamDone: boolean = false; |
| private playbackStarted: boolean = false; |
|
|
| |
| private allFrames: FrameData[] = []; |
| private frameQueue: FrameData[] = []; |
| private renderedFrames: number = 0; |
| private renderInterval: number | null = null; |
| private lastRenderTime: number = 0; |
| private totalBytes: number = 0; |
|
|
| |
| private audioContext: AudioContext | null = null; |
| private audioChunks: Uint8Array[] = []; |
| private audioChunksComplete: boolean = false; |
| private nextAudioTime: number = 0; |
| private audioScheduledChunks: number = 0; |
| private totalAudioSamples: number = 0; |
|
|
| |
| private firstFrameLatencyMs: number | null = null; |
| private playbackLatencyMs: number | null = null; |
|
|
| |
| private heartbeatInterval: number | null = null; |
|
|
| constructor(config: AvatarStreamConfig) { |
| |
| if (!config.wsUrl) throw new Error('wsUrl is required'); |
| if (!config.canvas) throw new Error('canvas is required'); |
|
|
| const ctx = config.canvas.getContext('2d'); |
| if (!ctx) throw new Error('Could not get 2D context from canvas'); |
| this.ctx = ctx; |
|
|
| |
| this.config = { |
| wsUrl: config.wsUrl, |
| canvas: config.canvas, |
| targetFps: config.targetFps ?? 25, |
| audioSampleRate: config.audioSampleRate ?? 24000, |
| minFramesBuffer: config.minFramesBuffer ?? 10, |
| minAudioChunks: config.minAudioChunks ?? 2, |
| onStatusChange: config.onStatusChange ?? (() => {}), |
| onMetricsUpdate: config.onMetricsUpdate ?? (() => {}), |
| onError: config.onError ?? (() => {}), |
| onComplete: config.onComplete ?? (() => {}), |
| onFirstFrame: config.onFirstFrame ?? (() => {}), |
| onPlaybackStart: config.onPlaybackStart ?? (() => {}), |
| }; |
| } |
|
|
| |
| |
| |
|
|
| |
| |
| |
| async connect(): Promise<void> { |
| if (this.ws?.readyState === WebSocket.OPEN) { |
| return; |
| } |
|
|
| return new Promise((resolve, reject) => { |
| this.setStatus('connecting'); |
|
|
| this.ws = new WebSocket(this.config.wsUrl); |
| this.ws.binaryType = 'arraybuffer'; |
|
|
| this.ws.onopen = () => { |
| this.setStatus('connected'); |
| this.startHeartbeat(); |
| resolve(); |
| }; |
|
|
| this.ws.onmessage = (event) => { |
| if (event.data instanceof ArrayBuffer) { |
| this.handleBinaryMessage(event.data); |
| } else { |
| try { |
| const data = JSON.parse(event.data); |
| this.handleJsonMessage(data); |
| } catch (e) { |
| this.config.onError(new Error(`Invalid JSON: ${e}`)); |
| } |
| } |
| }; |
|
|
| this.ws.onclose = () => { |
| this.setStatus('disconnected'); |
| this.stopHeartbeat(); |
| }; |
|
|
| this.ws.onerror = () => { |
| const error = new Error('WebSocket connection failed'); |
| this.config.onError(error); |
| reject(error); |
| }; |
| }); |
| } |
|
|
| |
| |
| |
| disconnect(): void { |
| this.stop(); |
| this.stopHeartbeat(); |
|
|
| if (this.ws) { |
| this.ws.close(); |
| this.ws = null; |
| } |
|
|
| this.setStatus('disconnected'); |
| } |
|
|
| |
| |
| |
| async generate(text: string, voice: string = 'tara'): Promise<void> { |
| if (!this.ws || this.ws.readyState !== WebSocket.OPEN) { |
| throw new Error('WebSocket not connected. Call connect() first.'); |
| } |
|
|
| if (!text.trim()) { |
| throw new Error('Text cannot be empty'); |
| } |
|
|
| |
| this.resetState(); |
| this.startTime = Date.now(); |
| this.setStatus('generating'); |
|
|
| |
| this.ws.send(JSON.stringify({ |
| action: 'generate', |
| text: text.trim(), |
| voice, |
| })); |
| } |
|
|
| |
| |
| |
| stop(): void { |
| if (this.ws?.readyState === WebSocket.OPEN) { |
| this.ws.send(JSON.stringify({ action: 'stop' })); |
| } |
|
|
| this.cleanup(); |
| this.setStatus('connected'); |
| } |
|
|
| |
| |
| |
| getStatus(): StreamStatus { |
| return this.status; |
| } |
|
|
| |
| |
| |
| getMetrics(): StreamMetrics { |
| const elapsed = (Date.now() - this.startTime) / 1000; |
| const videoDuration = this.allFrames.length / this.config.targetFps; |
| const audioDuration = this.totalAudioSamples / this.config.audioSampleRate; |
|
|
| return { |
| framesReceived: this.allFrames.length, |
| framesRendered: this.renderedFrames, |
| bufferSize: this.frameQueue.length, |
| bytesReceived: this.totalBytes, |
| videoDuration, |
| audioDuration, |
| syncDiff: audioDuration - videoDuration, |
| currentFps: this.lastRenderTime > 0 ? 1000 / (performance.now() - this.lastRenderTime) : 0, |
| bandwidthKBps: elapsed > 0 ? (this.totalBytes / 1024 / elapsed) : 0, |
| audioChunksReceived: this.audioChunks.length, |
| firstFrameLatencyMs: this.firstFrameLatencyMs, |
| playbackLatencyMs: this.playbackLatencyMs, |
| }; |
| } |
|
|
| |
| |
| |
| isConnected(): boolean { |
| return this.ws?.readyState === WebSocket.OPEN; |
| } |
|
|
| |
| |
| |
| isPlaying(): boolean { |
| return this.playbackStarted && !this.streamDone; |
| } |
|
|
| |
| |
| |
|
|
| private handleBinaryMessage(buffer: ArrayBuffer): void { |
| const view = new DataView(buffer); |
| const msgType = view.getUint8(0); |
|
|
| this.totalBytes += buffer.byteLength; |
|
|
| if (msgType === MSG_FRAME) { |
| const frameIndex = view.getUint32(1, true); |
| const dataSize = view.getUint32(5, true); |
| const frameData = new Uint8Array(buffer, 9, dataSize); |
| this.handleFrame(frameData, frameIndex); |
|
|
| } else if (msgType === MSG_AUDIO) { |
| const sampleRate = view.getUint32(1, true); |
| const dataSize = view.getUint32(5, true); |
| const audioData = new Uint8Array(buffer, 9, dataSize); |
| this.handleAudioComplete(audioData, sampleRate); |
|
|
| } else if (msgType === MSG_AUDIO_CHUNK) { |
| const chunkIndex = view.getUint32(1, true); |
| const isLast = view.getUint8(5) === 1; |
| const dataSize = view.getUint32(6, true); |
| const chunkData = new Uint8Array(buffer, 10, dataSize); |
| this.handleAudioChunk(chunkData, chunkIndex, isLast); |
| } |
|
|
| this.emitMetrics(); |
| } |
|
|
| private handleJsonMessage(data: any): void { |
| switch (data.type) { |
| case 'first_frame': |
| |
| break; |
|
|
| case 'done': |
| this.streamDone = true; |
| this.tryStartStreamingPlayback(); |
| break; |
|
|
| case 'status': |
| |
| break; |
|
|
| case 'error': |
| this.config.onError(new Error(data.message)); |
| this.cleanup(); |
| this.setStatus('error'); |
| break; |
| } |
| } |
|
|
| |
| |
| |
|
|
| private handleFrame(frameData: Uint8Array, frameIndex: number): void { |
| const blob = new Blob([frameData], { type: 'image/jpeg' }); |
| const url = URL.createObjectURL(blob); |
|
|
| this.allFrames.push({ url, index: frameIndex }); |
|
|
| if (this.playbackStarted) { |
| this.frameQueue.push({ url, index: frameIndex }); |
| } |
|
|
| if (this.allFrames.length === 1) { |
| this.firstFrameLatencyMs = Date.now() - this.startTime; |
| this.config.onFirstFrame(this.firstFrameLatencyMs); |
| this.setStatus('buffering'); |
| } |
|
|
| this.tryStartStreamingPlayback(); |
| } |
|
|
| private startRendering(): void { |
| if (this.renderInterval !== null) return; |
|
|
| this.lastRenderTime = performance.now(); |
| const frameInterval = 1000 / this.config.targetFps; |
|
|
| this.renderInterval = window.setInterval(() => { |
| this.renderNextFrame(); |
| }, frameInterval); |
| } |
|
|
| private renderNextFrame(): void { |
| if (this.frameQueue.length === 0) { |
| if (this.streamDone) { |
| this.finishPlayback(); |
| } |
| return; |
| } |
|
|
| const frame = this.frameQueue.shift()!; |
|
|
| const img = new Image(); |
| img.onload = () => { |
| if (this.config.canvas.width !== img.width || |
| this.config.canvas.height !== img.height) { |
| this.config.canvas.width = img.width; |
| this.config.canvas.height = img.height; |
| } |
|
|
| this.ctx.drawImage(img, 0, 0); |
| URL.revokeObjectURL(frame.url); |
| this.renderedFrames++; |
| this.emitMetrics(); |
| }; |
| img.src = frame.url; |
|
|
| this.lastRenderTime = performance.now(); |
| } |
|
|
| |
| |
| |
|
|
| private initAudioContext(): void { |
| if (!this.audioContext) { |
| this.audioContext = new (window.AudioContext || (window as any).webkitAudioContext)({ |
| sampleRate: this.config.audioSampleRate, |
| }); |
| } |
| this.nextAudioTime = this.audioContext.currentTime + 0.05; |
| this.audioScheduledChunks = 0; |
| } |
|
|
| private handleAudioChunk(chunkData: Uint8Array, chunkIndex: number, isLast: boolean): void { |
| if (this.audioChunks.length === 0 && chunkData.length > 0) { |
| this.initAudioContext(); |
| } |
|
|
| if (chunkData.length > 0) { |
| this.audioChunks.push(chunkData); |
| this.scheduleAudioChunk(chunkData); |
| } |
|
|
| if (isLast) { |
| this.audioChunksComplete = true; |
| } |
|
|
| this.tryStartStreamingPlayback(); |
| } |
|
|
| private scheduleAudioChunk(chunkData: Uint8Array): void { |
| if (!this.audioContext) return; |
|
|
| try { |
| |
| const alignedBuffer = new ArrayBuffer(chunkData.length); |
| new Uint8Array(alignedBuffer).set(chunkData); |
| const samples = new Int16Array(alignedBuffer); |
| const floatSamples = new Float32Array(samples.length); |
|
|
| for (let i = 0; i < samples.length; i++) { |
| floatSamples[i] = samples[i] / 32768; |
| } |
|
|
| this.totalAudioSamples += samples.length; |
|
|
| |
| const chunkBuffer = this.audioContext.createBuffer( |
| 1, |
| floatSamples.length, |
| this.config.audioSampleRate |
| ); |
| chunkBuffer.getChannelData(0).set(floatSamples); |
|
|
| |
| const source = this.audioContext.createBufferSource(); |
| source.buffer = chunkBuffer; |
| source.connect(this.audioContext.destination); |
|
|
| if (this.nextAudioTime < this.audioContext.currentTime) { |
| this.nextAudioTime = this.audioContext.currentTime + 0.01; |
| } |
|
|
| source.start(this.nextAudioTime); |
|
|
| const chunkDuration = floatSamples.length / this.config.audioSampleRate; |
| this.nextAudioTime += chunkDuration; |
| this.audioScheduledChunks++; |
|
|
| } catch (e) { |
| console.error('Error scheduling audio chunk:', e); |
| } |
| } |
|
|
| private handleAudioComplete(audioData: Uint8Array, sampleRate: number): void { |
| |
| try { |
| if (!this.audioContext) { |
| this.audioContext = new (window.AudioContext || (window as any).webkitAudioContext)({ |
| sampleRate, |
| }); |
| } |
|
|
| const alignedBuffer = new ArrayBuffer(audioData.length); |
| new Uint8Array(alignedBuffer).set(audioData); |
| const samples = new Int16Array(alignedBuffer); |
| const floatSamples = new Float32Array(samples.length); |
|
|
| for (let i = 0; i < samples.length; i++) { |
| floatSamples[i] = samples[i] / 32768; |
| } |
|
|
| this.totalAudioSamples = samples.length; |
| this.audioChunksComplete = true; |
|
|
| const audioBuffer = this.audioContext.createBuffer(1, floatSamples.length, sampleRate); |
| audioBuffer.getChannelData(0).set(floatSamples); |
|
|
| const source = this.audioContext.createBufferSource(); |
| source.buffer = audioBuffer; |
| source.connect(this.audioContext.destination); |
| source.start(); |
|
|
| this.tryStartStreamingPlayback(); |
|
|
| } catch (e) { |
| this.config.onError(new Error(`Audio processing error: ${e}`)); |
| } |
| } |
|
|
| |
| |
| |
|
|
| private tryStartStreamingPlayback(): void { |
| if (this.playbackStarted) return; |
|
|
| if (this.allFrames.length < this.config.minFramesBuffer) { |
| return; |
| } |
|
|
| if (this.audioScheduledChunks < this.config.minAudioChunks) { |
| return; |
| } |
|
|
| |
| this.playbackStarted = true; |
| this.playbackLatencyMs = Date.now() - this.startTime; |
| this.config.onPlaybackStart(this.playbackLatencyMs); |
|
|
| this.frameQueue = [...this.allFrames]; |
| this.startRendering(); |
| this.setStatus('playing'); |
| } |
|
|
| private finishPlayback(): void { |
| if (this.renderInterval !== null) { |
| clearInterval(this.renderInterval); |
| this.renderInterval = null; |
| } |
|
|
| const elapsed = (Date.now() - this.startTime) / 1000; |
| const videoDuration = this.allFrames.length / this.config.targetFps; |
| const audioDuration = this.totalAudioSamples / this.config.audioSampleRate; |
|
|
| const summary: StreamSummary = { |
| totalFrames: this.renderedFrames, |
| durationSeconds: elapsed, |
| totalBytes: this.totalBytes, |
| videoDuration, |
| audioDuration, |
| finalSyncDiff: audioDuration - videoDuration, |
| }; |
|
|
| this.config.onComplete(summary); |
| this.setStatus('completed'); |
| } |
|
|
| |
| |
| |
|
|
| private setStatus(status: StreamStatus): void { |
| this.status = status; |
| this.config.onStatusChange(status); |
| } |
|
|
| private emitMetrics(): void { |
| this.config.onMetricsUpdate(this.getMetrics()); |
| } |
|
|
| private resetState(): void { |
| this.cleanup(); |
|
|
| this.allFrames = []; |
| this.frameQueue = []; |
| this.renderedFrames = 0; |
| this.totalBytes = 0; |
| this.startTime = 0; |
| this.streamDone = false; |
| this.playbackStarted = false; |
|
|
| this.audioChunks = []; |
| this.audioChunksComplete = false; |
| this.nextAudioTime = 0; |
| this.audioScheduledChunks = 0; |
| this.totalAudioSamples = 0; |
|
|
| this.firstFrameLatencyMs = null; |
| this.playbackLatencyMs = null; |
| } |
|
|
| private cleanup(): void { |
| if (this.renderInterval !== null) { |
| clearInterval(this.renderInterval); |
| this.renderInterval = null; |
| } |
|
|
| |
| this.frameQueue.forEach(f => URL.revokeObjectURL(f.url)); |
| this.allFrames.forEach(f => URL.revokeObjectURL(f.url)); |
| } |
|
|
| private startHeartbeat(): void { |
| this.heartbeatInterval = window.setInterval(() => { |
| if (this.ws?.readyState === WebSocket.OPEN) { |
| this.ws.send(JSON.stringify({ action: 'ping' })); |
| } |
| }, 30000); |
| } |
|
|
| private stopHeartbeat(): void { |
| if (this.heartbeatInterval !== null) { |
| clearInterval(this.heartbeatInterval); |
| this.heartbeatInterval = null; |
| } |
| } |
| } |
|
|
| |
| export default AvatarStreamSDK; |
|
|