| |
| |
| |
| |
| |
| |
| |
| |
| |
|
|
| import { SyncEngine, SyncConfig, SyncResult, SyncDebugInfo } from './SyncEngine'; |
|
|
| |
| |
| |
|
|
| export interface SyncedStreamConfig { |
| |
| wsUrl: string; |
|
|
| |
| canvas: HTMLCanvasElement; |
|
|
| |
| targetFps?: number; |
|
|
| |
| audioSampleRate?: number; |
|
|
| |
| enableSmartSync?: boolean; |
|
|
| |
| waitForComplete?: boolean; |
|
|
| |
| audioThreshold?: number; |
|
|
| |
| videoThreshold?: number; |
|
|
| |
| onStatusChange?: (status: SyncedStreamStatus) => void; |
| onMetricsUpdate?: (metrics: SyncedStreamMetrics) => void; |
| onSyncUpdate?: (sync: SyncDebugInfo) => void; |
| onError?: (error: Error) => void; |
| onComplete?: (summary: SyncedStreamSummary) => void; |
| } |
|
|
| export type SyncedStreamStatus = |
| | 'disconnected' |
| | 'connecting' |
| | 'connected' |
| | 'receiving' |
| | 'analyzing' |
| | 'syncing' |
| | 'playing' |
| | 'completed' |
| | 'error'; |
|
|
| export interface SyncedStreamMetrics { |
| framesReceived: number; |
| framesRendered: number; |
| audioChunksReceived: number; |
| totalAudioSamples: number; |
| videoDurationMs: number; |
| audioDurationMs: number; |
| originalFps: number; |
| adjustedFps: number; |
| syncDiffMs: number; |
| audioSpeechStartMs: number | null; |
| audioSpeechEndMs: number | null; |
| videoSpeechStartFrame: number | null; |
| videoSpeechEndFrame: number | null; |
| } |
|
|
| export interface SyncedStreamSummary { |
| totalFrames: number; |
| totalDurationMs: number; |
| originalFps: number; |
| adjustedFps: number; |
| syncStrategy: string; |
| audioSpeechDurationMs: number; |
| videoSpeechDurationMs: number; |
| finalSyncDiffMs: number; |
| } |
|
|
| |
| |
| |
|
|
| const MSG_FRAME = 0x01; |
| const MSG_AUDIO = 0x02; |
| const MSG_AUDIO_CHUNK = 0x03; |
|
|
| |
| |
| |
|
|
| export class AvatarStreamSDKSync { |
| private config: Required<SyncedStreamConfig>; |
| private ctx: CanvasRenderingContext2D; |
| private syncEngine: SyncEngine; |
|
|
| |
| private ws: WebSocket | null = null; |
|
|
| |
| private status: SyncedStreamStatus = 'disconnected'; |
| private startTime: number = 0; |
|
|
| |
| private frameBlobs: Blob[] = []; |
| private frameImages: HTMLImageElement[] = []; |
| private audioChunks: Uint8Array[] = []; |
| private allAudioSamples: Float32Array | null = null; |
| private totalAudioSamples: number = 0; |
|
|
| |
| private isPlaying: boolean = false; |
| private currentFrameIndex: number = 0; |
| private renderInterval: number | null = null; |
| private audioContext: AudioContext | null = null; |
| private audioSource: AudioBufferSourceNode | null = null; |
|
|
| |
| private syncResult: SyncResult | null = null; |
|
|
| |
| private streamingStarted: boolean = false; |
| private nextAudioTime: number = 0; |
| private scheduledAudioChunks: number = 0; |
|
|
| |
| private heartbeatInterval: number | null = null; |
|
|
| constructor(config: SyncedStreamConfig) { |
| if (!config.wsUrl) throw new Error('wsUrl is required'); |
| if (!config.canvas) throw new Error('canvas is required'); |
|
|
| const ctx = config.canvas.getContext('2d'); |
| if (!ctx) throw new Error('Could not get 2D context from canvas'); |
| this.ctx = ctx; |
|
|
| this.config = { |
| wsUrl: config.wsUrl, |
| canvas: config.canvas, |
| targetFps: config.targetFps ?? 25, |
| audioSampleRate: config.audioSampleRate ?? 24000, |
| enableSmartSync: config.enableSmartSync ?? true, |
| waitForComplete: config.waitForComplete ?? false, |
| audioThreshold: config.audioThreshold ?? 0.02, |
| videoThreshold: config.videoThreshold ?? 0.05, |
| onStatusChange: config.onStatusChange ?? (() => {}), |
| onMetricsUpdate: config.onMetricsUpdate ?? (() => {}), |
| onSyncUpdate: config.onSyncUpdate ?? (() => {}), |
| onError: config.onError ?? (() => {}), |
| onComplete: config.onComplete ?? (() => {}), |
| }; |
|
|
| this.syncEngine = new SyncEngine({ |
| audioSampleRate: this.config.audioSampleRate, |
| videoFps: this.config.targetFps, |
| audioThreshold: this.config.audioThreshold, |
| videoThreshold: this.config.videoThreshold, |
| onDebug: (info) => this.config.onSyncUpdate(info), |
| }); |
| } |
|
|
| |
| |
| |
|
|
| async connect(): Promise<void> { |
| if (this.ws?.readyState === WebSocket.OPEN) return; |
|
|
| return new Promise((resolve, reject) => { |
| this.setStatus('connecting'); |
|
|
| this.ws = new WebSocket(this.config.wsUrl); |
| this.ws.binaryType = 'arraybuffer'; |
|
|
| this.ws.onopen = () => { |
| this.setStatus('connected'); |
| this.startHeartbeat(); |
| resolve(); |
| }; |
|
|
| this.ws.onmessage = (event) => { |
| if (event.data instanceof ArrayBuffer) { |
| this.handleBinaryMessage(event.data); |
| } else { |
| try { |
| this.handleJsonMessage(JSON.parse(event.data)); |
| } catch (e) { |
| this.config.onError(new Error(`Invalid JSON: ${e}`)); |
| } |
| } |
| }; |
|
|
| this.ws.onclose = () => { |
| this.setStatus('disconnected'); |
| this.stopHeartbeat(); |
| }; |
|
|
| this.ws.onerror = () => { |
| const error = new Error('WebSocket connection failed'); |
| this.config.onError(error); |
| reject(error); |
| }; |
| }); |
| } |
|
|
| disconnect(): void { |
| this.stop(); |
| this.stopHeartbeat(); |
| if (this.ws) { |
| this.ws.close(); |
| this.ws = null; |
| } |
| this.setStatus('disconnected'); |
| } |
|
|
| async generate(text: string, voice: string = 'tara'): Promise<void> { |
| if (!this.ws || this.ws.readyState !== WebSocket.OPEN) { |
| throw new Error('WebSocket not connected'); |
| } |
|
|
| this.resetState(); |
| this.startTime = Date.now(); |
| this.setStatus('receiving'); |
|
|
| this.ws.send(JSON.stringify({ |
| action: 'generate', |
| text: text.trim(), |
| voice, |
| })); |
| } |
|
|
| stop(): void { |
| if (this.ws?.readyState === WebSocket.OPEN) { |
| this.ws.send(JSON.stringify({ action: 'stop' })); |
| } |
| this.cleanup(); |
| this.setStatus('connected'); |
| } |
|
|
| getMetrics(): SyncedStreamMetrics { |
| const videoDurationMs = (this.frameBlobs.length / this.config.targetFps) * 1000; |
| const audioDurationMs = (this.totalAudioSamples / this.config.audioSampleRate) * 1000; |
|
|
| return { |
| framesReceived: this.frameBlobs.length, |
| framesRendered: this.currentFrameIndex, |
| audioChunksReceived: this.audioChunks.length, |
| totalAudioSamples: this.totalAudioSamples, |
| videoDurationMs, |
| audioDurationMs, |
| originalFps: this.config.targetFps, |
| adjustedFps: this.syncResult?.adjustedFps ?? this.config.targetFps, |
| syncDiffMs: audioDurationMs - videoDurationMs, |
| audioSpeechStartMs: this.syncResult?.debug.audioStartMs ?? null, |
| audioSpeechEndMs: this.syncResult?.debug.audioEndMs ?? null, |
| videoSpeechStartFrame: this.syncResult?.debug.videoStartFrame ?? null, |
| videoSpeechEndFrame: this.syncResult?.debug.videoEndFrame ?? null, |
| }; |
| } |
|
|
| |
| |
| |
|
|
| private handleBinaryMessage(buffer: ArrayBuffer): void { |
| const view = new DataView(buffer); |
| const msgType = view.getUint8(0); |
|
|
| if (msgType === MSG_FRAME) { |
| const frameIndex = view.getUint32(1, true); |
| const dataSize = view.getUint32(5, true); |
| const frameData = new Uint8Array(buffer, 9, dataSize); |
| this.handleFrame(frameData, frameIndex); |
|
|
| } else if (msgType === MSG_AUDIO_CHUNK) { |
| const chunkIndex = view.getUint32(1, true); |
| const isLast = view.getUint8(5) === 1; |
| const dataSize = view.getUint32(6, true); |
| const chunkData = new Uint8Array(buffer, 10, dataSize); |
| this.handleAudioChunk(chunkData, chunkIndex, isLast); |
|
|
| } else if (msgType === MSG_AUDIO) { |
| const sampleRate = view.getUint32(1, true); |
| const dataSize = view.getUint32(5, true); |
| const audioData = new Uint8Array(buffer, 9, dataSize); |
| this.handleFullAudio(audioData, sampleRate); |
| } |
|
|
| this.emitMetrics(); |
| } |
|
|
| private handleJsonMessage(data: any): void { |
| if (data.type === 'done') { |
| this.onStreamComplete(); |
| } else if (data.type === 'error') { |
| this.config.onError(new Error(data.message)); |
| this.setStatus('error'); |
| } |
| } |
|
|
| private handleFrame(frameData: Uint8Array, _frameIndex: number): void { |
| const blob = new Blob([frameData], { type: 'image/jpeg' }); |
| this.frameBlobs.push(blob); |
|
|
| |
| if (!this.config.waitForComplete && !this.streamingStarted) { |
| this.tryStartStreaming(); |
| } |
| } |
|
|
| private handleAudioChunk(chunkData: Uint8Array, _chunkIndex: number, _isLast: boolean): void { |
| if (chunkData.length > 0) { |
| this.audioChunks.push(new Uint8Array(chunkData)); |
|
|
| |
| const alignedBuffer = new ArrayBuffer(chunkData.length); |
| new Uint8Array(alignedBuffer).set(chunkData); |
| const samples = new Int16Array(alignedBuffer); |
| this.totalAudioSamples += samples.length; |
|
|
| |
| if (!this.config.waitForComplete) { |
| this.scheduleAudioChunk(chunkData); |
| } |
| } |
| } |
|
|
| private handleFullAudio(audioData: Uint8Array, _sampleRate: number): void { |
| this.audioChunks = [audioData]; |
| const alignedBuffer = new ArrayBuffer(audioData.length); |
| new Uint8Array(alignedBuffer).set(audioData); |
| const samples = new Int16Array(alignedBuffer); |
| this.totalAudioSamples = samples.length; |
| } |
|
|
| |
| |
| |
|
|
| private tryStartStreaming(): void { |
| |
| if (this.frameBlobs.length >= 10 && this.scheduledAudioChunks >= 2 && !this.streamingStarted) { |
| this.streamingStarted = true; |
| this.setStatus('playing'); |
| this.startStreamingPlayback(); |
| } |
| } |
|
|
| private scheduleAudioChunk(chunkData: Uint8Array): void { |
| if (!this.audioContext) { |
| this.audioContext = new (window.AudioContext || (window as any).webkitAudioContext)({ |
| sampleRate: this.config.audioSampleRate, |
| }); |
| this.nextAudioTime = this.audioContext.currentTime + 0.05; |
| } |
|
|
| try { |
| const alignedBuffer = new ArrayBuffer(chunkData.length); |
| new Uint8Array(alignedBuffer).set(chunkData); |
| const samples = new Int16Array(alignedBuffer); |
| const floatSamples = new Float32Array(samples.length); |
|
|
| for (let i = 0; i < samples.length; i++) { |
| floatSamples[i] = samples[i] / 32768; |
| } |
|
|
| const chunkBuffer = this.audioContext.createBuffer(1, floatSamples.length, this.config.audioSampleRate); |
| chunkBuffer.getChannelData(0).set(floatSamples); |
|
|
| const source = this.audioContext.createBufferSource(); |
| source.buffer = chunkBuffer; |
| source.connect(this.audioContext.destination); |
|
|
| if (this.nextAudioTime < this.audioContext.currentTime) { |
| this.nextAudioTime = this.audioContext.currentTime + 0.01; |
| } |
|
|
| source.start(this.nextAudioTime); |
| this.nextAudioTime += floatSamples.length / this.config.audioSampleRate; |
| this.scheduledAudioChunks++; |
|
|
| this.tryStartStreaming(); |
|
|
| } catch (e) { |
| console.error('Error scheduling audio:', e); |
| } |
| } |
|
|
| private startStreamingPlayback(): void { |
| |
| const frameInterval = 1000 / this.config.targetFps; |
|
|
| this.renderInterval = window.setInterval(() => { |
| this.renderNextStreamingFrame(); |
| }, frameInterval); |
| } |
|
|
| private renderNextStreamingFrame(): void { |
| if (this.currentFrameIndex >= this.frameBlobs.length) { |
| |
| return; |
| } |
|
|
| const blob = this.frameBlobs[this.currentFrameIndex]; |
| const url = URL.createObjectURL(blob); |
| const img = new Image(); |
|
|
| img.onload = () => { |
| if (this.config.canvas.width !== img.width) { |
| this.config.canvas.width = img.width; |
| this.config.canvas.height = img.height; |
| } |
| this.ctx.drawImage(img, 0, 0); |
| URL.revokeObjectURL(url); |
| }; |
|
|
| img.src = url; |
| this.currentFrameIndex++; |
| this.emitMetrics(); |
| } |
|
|
| |
| |
| |
|
|
| private async onStreamComplete(): Promise<void> { |
| |
| if (this.streamingStarted) { |
| this.finishPlayback(); |
| return; |
| } |
|
|
| this.setStatus('analyzing'); |
|
|
| try { |
| |
| await this.prepareFullAudio(); |
|
|
| |
| await this.prepareFrameImages(); |
|
|
| if (this.config.enableSmartSync && this.allAudioSamples && this.frameImages.length > 0) { |
| this.setStatus('syncing'); |
|
|
| |
| this.syncResult = await this.syncEngine.calculateSync( |
| this.allAudioSamples, |
| this.frameImages, |
| (this.totalAudioSamples / this.config.audioSampleRate) * 1000 |
| ); |
|
|
| console.log('Sync result:', this.syncResult.debug); |
| } |
|
|
| |
| this.setStatus('playing'); |
| await this.startSyncedPlayback(); |
|
|
| } catch (e) { |
| console.error('Sync error:', e); |
| this.config.onError(e as Error); |
| this.setStatus('error'); |
| } |
| } |
|
|
| private async prepareFullAudio(): Promise<void> { |
| if (this.audioChunks.length === 0) return; |
|
|
| |
| const totalLength = this.audioChunks.reduce((sum, c) => sum + c.length, 0); |
| const combined = new Uint8Array(totalLength); |
| let offset = 0; |
|
|
| for (const chunk of this.audioChunks) { |
| combined.set(chunk, offset); |
| offset += chunk.length; |
| } |
|
|
| |
| const alignedBuffer = new ArrayBuffer(combined.length); |
| new Uint8Array(alignedBuffer).set(combined); |
| const samples = new Int16Array(alignedBuffer); |
|
|
| this.allAudioSamples = new Float32Array(samples.length); |
| for (let i = 0; i < samples.length; i++) { |
| this.allAudioSamples[i] = samples[i] / 32768; |
| } |
| } |
|
|
| private async prepareFrameImages(): Promise<void> { |
| this.frameImages = []; |
|
|
| for (const blob of this.frameBlobs) { |
| const img = new Image(); |
| const url = URL.createObjectURL(blob); |
|
|
| await new Promise<void>((resolve) => { |
| img.onload = () => { |
| this.frameImages.push(img); |
| resolve(); |
| }; |
| img.src = url; |
| }); |
|
|
| |
| } |
| } |
|
|
| private async startSyncedPlayback(): Promise<void> { |
| if (!this.allAudioSamples) return; |
|
|
| |
| if (!this.audioContext) { |
| this.audioContext = new (window.AudioContext || (window as any).webkitAudioContext)({ |
| sampleRate: this.config.audioSampleRate, |
| }); |
| } |
|
|
| const audioBuffer = this.audioContext.createBuffer( |
| 1, |
| this.allAudioSamples.length, |
| this.config.audioSampleRate |
| ); |
| audioBuffer.getChannelData(0).set(this.allAudioSamples); |
|
|
| |
| const audioDelay = this.syncResult?.audioDelayMs ?? 0; |
| const videoDelay = this.syncResult?.videoDelayMs ?? 0; |
|
|
| |
| this.audioSource = this.audioContext.createBufferSource(); |
| this.audioSource.buffer = audioBuffer; |
| this.audioSource.connect(this.audioContext.destination); |
|
|
| const audioStartTime = this.audioContext.currentTime + (audioDelay / 1000); |
| this.audioSource.start(audioStartTime); |
|
|
| |
| setTimeout(() => { |
| this.startSyncedVideoPlayback(); |
| }, videoDelay); |
| } |
|
|
| private startSyncedVideoPlayback(): void { |
| const fps = this.syncResult?.adjustedFps ?? this.config.targetFps; |
| const frameInterval = 1000 / fps; |
| const frameMap = this.syncResult?.frameMap; |
|
|
| this.currentFrameIndex = 0; |
| this.isPlaying = true; |
|
|
| this.renderInterval = window.setInterval(() => { |
| if (!this.isPlaying) { |
| this.finishPlayback(); |
| return; |
| } |
|
|
| |
| let frameToRender: number; |
| if (frameMap && this.currentFrameIndex < frameMap.length) { |
| frameToRender = frameMap[this.currentFrameIndex]; |
| } else if (this.currentFrameIndex < this.frameImages.length) { |
| frameToRender = this.currentFrameIndex; |
| } else { |
| this.finishPlayback(); |
| return; |
| } |
|
|
| |
| if (frameToRender < this.frameImages.length) { |
| const img = this.frameImages[frameToRender]; |
|
|
| if (this.config.canvas.width !== img.width) { |
| this.config.canvas.width = img.width; |
| this.config.canvas.height = img.height; |
| } |
|
|
| this.ctx.drawImage(img, 0, 0); |
| } |
|
|
| this.currentFrameIndex++; |
| this.emitMetrics(); |
|
|
| }, frameInterval); |
| } |
|
|
| private finishPlayback(): void { |
| this.isPlaying = false; |
|
|
| if (this.renderInterval) { |
| clearInterval(this.renderInterval); |
| this.renderInterval = null; |
| } |
|
|
| const metrics = this.getMetrics(); |
|
|
| const summary: SyncedStreamSummary = { |
| totalFrames: this.currentFrameIndex, |
| totalDurationMs: Date.now() - this.startTime, |
| originalFps: this.config.targetFps, |
| adjustedFps: this.syncResult?.adjustedFps ?? this.config.targetFps, |
| syncStrategy: this.syncResult?.debug.syncStrategy ?? 'normal', |
| audioSpeechDurationMs: this.syncResult?.debug.audioSpeechDuration ?? metrics.audioDurationMs, |
| videoSpeechDurationMs: ((this.syncResult?.debug.videoSpeechFrames ?? this.frameBlobs.length) / this.config.targetFps) * 1000, |
| finalSyncDiffMs: metrics.syncDiffMs, |
| }; |
|
|
| this.config.onComplete(summary); |
| this.setStatus('completed'); |
| } |
|
|
| |
| |
| |
|
|
| private setStatus(status: SyncedStreamStatus): void { |
| this.status = status; |
| this.config.onStatusChange(status); |
| } |
|
|
| private emitMetrics(): void { |
| this.config.onMetricsUpdate(this.getMetrics()); |
| } |
|
|
| private resetState(): void { |
| this.cleanup(); |
| this.frameBlobs = []; |
| this.frameImages = []; |
| this.audioChunks = []; |
| this.allAudioSamples = null; |
| this.totalAudioSamples = 0; |
| this.currentFrameIndex = 0; |
| this.syncResult = null; |
| this.streamingStarted = false; |
| this.nextAudioTime = 0; |
| this.scheduledAudioChunks = 0; |
| } |
|
|
| private cleanup(): void { |
| this.isPlaying = false; |
|
|
| if (this.renderInterval) { |
| clearInterval(this.renderInterval); |
| this.renderInterval = null; |
| } |
|
|
| if (this.audioSource) { |
| try { this.audioSource.stop(); } catch {} |
| this.audioSource = null; |
| } |
| } |
|
|
| private startHeartbeat(): void { |
| this.heartbeatInterval = window.setInterval(() => { |
| if (this.ws?.readyState === WebSocket.OPEN) { |
| this.ws.send(JSON.stringify({ action: 'ping' })); |
| } |
| }, 30000); |
| } |
|
|
| private stopHeartbeat(): void { |
| if (this.heartbeatInterval) { |
| clearInterval(this.heartbeatInterval); |
| this.heartbeatInterval = null; |
| } |
| } |
| } |
|
|
| export default AvatarStreamSDKSync; |
|
|