| <!DOCTYPE html> |
| <html lang="pt-BR"> |
| <head> |
| <meta charset="UTF-8"> |
| <meta name="viewport" content="width=device-width, initial-scale=1.0"> |
| <title>Avatar - Streaming Progressivo</title> |
| <style> |
| * { margin: 0; padding: 0; box-sizing: border-box; } |
| body { font-family: system-ui, sans-serif; background: #0a0a1a; color: #fff; min-height: 100vh; padding: 20px; } |
| .container { max-width: 900px; margin: 0 auto; } |
| .status { text-align: center; padding: 8px; margin-bottom: 15px; border-radius: 8px; font-size: 13px; background: rgba(255,255,255,0.1); } |
| .status.ok { background: rgba(0,255,100,0.2); color: #0f0; } |
| .status.busy { background: rgba(255,200,0,0.2); color: #fc0; } |
| .video-box { background: #000; border-radius: 10px; overflow: hidden; margin-bottom: 20px; aspect-ratio: 16/9; position: relative; } |
| video, canvas { width: 100%; height: 100%; object-fit: contain; position: absolute; top: 0; left: 0; } |
| #idleVideo { z-index: 1; } |
| #talkCanvas { z-index: 2; display: none; } |
| .controls { display: flex; gap: 10px; margin-bottom: 20px; flex-wrap: wrap; } |
| textarea { flex: 1; min-width: 200px; padding: 12px; border: 1px solid #333; border-radius: 8px; background: #1a1a2e; color: #fff; font-size: 14px; resize: none; height: 50px; } |
| select { padding: 12px; border: 1px solid #333; border-radius: 8px; background: #1a1a2e; color: #fff; font-size: 14px; } |
| button { padding: 12px 24px; border: none; border-radius: 8px; font-size: 14px; font-weight: bold; cursor: pointer; } |
| button:disabled { opacity: 0.5; cursor: not-allowed; } |
| .btn-go { background: #00ff88; color: #000; } |
| .btn-stop { background: #ff4444; color: #fff; } |
| .metrics { display: grid; grid-template-columns: repeat(auto-fit, minmax(120px, 1fr)); gap: 8px; padding: 12px; background: #1a1a2e; border-radius: 8px; font-size: 12px; } |
| .metric { display: flex; justify-content: space-between; } |
| .val { color: #00ff88; font-family: monospace; } |
| .progress { height: 4px; background: #333; border-radius: 2px; margin-top: 10px; overflow: hidden; } |
| .progress-bar { height: 100%; background: #00ff88; width: 0%; transition: width 0.1s; } |
| </style> |
| </head> |
| <body> |
| <div class="container"> |
| <div class="status" id="status">Carregando...</div> |
| <div class="video-box"> |
| <video id="idleVideo" playsinline muted loop></video> |
| <canvas id="talkCanvas"></canvas> |
| </div> |
| <div class="controls"> |
| <textarea id="text">Hello! I am testing the avatar streaming.</textarea> |
| <select id="voice"> |
| <option value="tara">Tara</option> |
| <option value="leah">Leah</option> |
| <option value="jess">Jess</option> |
| <option value="leo">Leo</option> |
| <option value="dan">Dan</option> |
| </select> |
| <button class="btn-go" id="btnGo">Gerar</button> |
| <button class="btn-stop" id="btnStop" disabled>Parar</button> |
| </div> |
| <div class="progress"><div class="progress-bar" id="progress"></div></div> |
| <div class="metrics"> |
| <div class="metric"><span>TTFB:</span><span class="val" id="mTtfb">--</span></div> |
| <div class="metric"><span>Frames:</span><span class="val" id="mFrames">--</span></div> |
| <div class="metric"><span>FPS:</span><span class="val" id="mFps">--</span></div> |
| <div class="metric"><span>Audio:</span><span class="val" id="mAudio">--</span></div> |
| </div> |
| </div> |
| <script> |
| const idleVideo = document.getElementById('idleVideo'); |
| const talkCanvas = document.getElementById('talkCanvas'); |
| const ctx = talkCanvas.getContext('2d'); |
| const status = document.getElementById('status'); |
| const progress = document.getElementById('progress'); |
| const btnGo = document.getElementById('btnGo'); |
| const btnStop = document.getElementById('btnStop'); |
| |
| let ws = null; |
| let frames = []; |
| let isPlaying = false; |
| let isBuffering = false; |
| let audioContext = null; |
| let audioBuffer = null; |
| let audioSource = null; |
| let startTime = 0; |
| let frameCount = 0; |
| let totalFrames = 0; |
| let playbackStartTime = 0; |
| let animationId = null; |
| let lastRenderedFrame = -1; |
| let endVideoTimeMs = 0; |
| |
| |
| const TARGET_FPS = 25; |
| const FRAME_DURATION = 1000 / TARGET_FPS; |
| |
| |
| idleVideo.src = 'idle.mp4'; |
| idleVideo.oncanplay = () => { |
| idleVideo.play().catch(() => { |
| setStatus('Clique na tela para iniciar', 'busy'); |
| document.body.onclick = () => { |
| idleVideo.play(); |
| document.body.onclick = null; |
| }; |
| }); |
| }; |
| idleVideo.onplay = () => { |
| if (!ws || ws.readyState !== WebSocket.OPEN) { |
| setStatus('Conectando...', 'busy'); |
| } |
| }; |
| |
| function setStatus(txt, cls) { |
| status.textContent = txt; |
| status.className = 'status ' + (cls || ''); |
| } |
| |
| function setMetric(id, val) { |
| document.getElementById(id).textContent = val; |
| } |
| |
| function resetMetrics() { |
| setMetric('mTtfb', '--'); |
| setMetric('mFrames', '--'); |
| setMetric('mFps', '--'); |
| setMetric('mAudio', '--'); |
| progress.style.width = '0%'; |
| } |
| |
| function connect() { |
| if (ws && ws.readyState === WebSocket.OPEN) return; |
| |
| ws = new WebSocket('ws://' + location.host + '/ws'); |
| |
| ws.onopen = () => setStatus('Pronto', 'ok'); |
| ws.onclose = () => { setStatus('Desconectado'); setTimeout(connect, 3000); }; |
| ws.onerror = () => setStatus('Erro de conexao'); |
| |
| ws.onmessage = (e) => { |
| const msg = JSON.parse(e.data); |
| console.log('MSG:', msg.type); |
| |
| switch (msg.type) { |
| case 'status': |
| setStatus(msg.message, 'busy'); |
| break; |
| |
| case 'stream_start': |
| setMetric('mTtfb', msg.ttfb_ms + 'ms'); |
| setStatus('Recebendo frames...', 'busy'); |
| |
| startBuffering(); |
| break; |
| |
| case 'frame': |
| |
| addFrame(msg.frame, msg.index); |
| break; |
| |
| case 'audio': |
| |
| setMetric('mAudio', (msg.duration_ms / 1000).toFixed(2) + 's'); |
| |
| const realFps = (totalFrames || frameCount) / (msg.duration_ms / 1000); |
| setMetric('mFps', realFps.toFixed(1)); |
| startSyncedPlayback(msg.audio, msg.duration_ms); |
| break; |
| |
| case 'done': |
| totalFrames = msg.frames; |
| setMetric('mFrames', msg.frames); |
| |
| endVideoTimeMs = msg.end_video_time_ms || 0; |
| console.log(`Done: ${msg.frames} frames, end_video_time: ${endVideoTimeMs}ms`); |
| break; |
| |
| case 'error': |
| setStatus('Erro: ' + msg.message); |
| stopPlayback(); |
| setButtons(false); |
| break; |
| } |
| }; |
| } |
| |
| function addFrame(base64Frame, index) { |
| |
| const img = new Image(); |
| img.onload = () => { |
| |
| frames[index] = img; |
| frameCount++; |
| |
| |
| if (index === 0) { |
| talkCanvas.width = img.width; |
| talkCanvas.height = img.height; |
| } |
| |
| |
| if (isBuffering && !isPlaying) { |
| setStatus(`Buffering: ${frameCount} frames...`, 'busy'); |
| } |
| }; |
| img.src = 'data:image/jpeg;base64,' + base64Frame; |
| } |
| |
| function startBuffering() { |
| |
| isBuffering = true; |
| isPlaying = false; |
| frames = []; |
| frameCount = 0; |
| totalFrames = 0; |
| lastRenderedFrame = -1; |
| |
| |
| |
| } |
| |
| |
| let audioDurationMs = 0; |
| let dynamicFrameDuration = FRAME_DURATION; |
| |
| async function startSyncedPlayback(base64Audio, durationMs) { |
| |
| setStatus('Reproduzindo...', 'ok'); |
| |
| try { |
| |
| if (!audioContext) { |
| audioContext = new (window.AudioContext || window.webkitAudioContext)(); |
| } |
| |
| |
| if (audioContext.state === 'suspended') { |
| await audioContext.resume(); |
| } |
| |
| |
| const binaryString = atob(base64Audio); |
| const bytes = new Uint8Array(binaryString.length); |
| for (let i = 0; i < binaryString.length; i++) { |
| bytes[i] = binaryString.charCodeAt(i); |
| } |
| |
| |
| let pcmOffset = 0; |
| if (bytes.length > 44 && |
| bytes[0] === 0x52 && bytes[1] === 0x49 && |
| bytes[2] === 0x46 && bytes[3] === 0x46) { |
| console.log('WAV header detected, skipping 44 bytes'); |
| pcmOffset = 44; |
| } |
| |
| |
| const pcmData = new Int16Array(bytes.buffer, pcmOffset); |
| const floatData = new Float32Array(pcmData.length); |
| for (let i = 0; i < pcmData.length; i++) { |
| floatData[i] = pcmData[i] / 32768.0; |
| } |
| |
| |
| const fadeInSamples = 1200; |
| for (let i = 0; i < Math.min(fadeInSamples, floatData.length); i++) { |
| |
| const t = i / fadeInSamples; |
| floatData[i] *= t * t; |
| } |
| |
| |
| const fadeOutSamples = 720; |
| const fadeOutStart = floatData.length - fadeOutSamples; |
| for (let i = 0; i < fadeOutSamples && fadeOutStart + i < floatData.length; i++) { |
| floatData[fadeOutStart + i] *= (fadeOutSamples - i) / fadeOutSamples; |
| } |
| |
| audioBuffer = audioContext.createBuffer(1, floatData.length, 24000); |
| audioBuffer.getChannelData(0).set(floatData); |
| |
| |
| audioSource = audioContext.createBufferSource(); |
| audioSource.buffer = audioBuffer; |
| audioSource.connect(audioContext.destination); |
| |
| audioSource.onended = () => { |
| |
| audioSource = null; |
| }; |
| |
| |
| |
| audioDurationMs = durationMs; |
| dynamicFrameDuration = FRAME_DURATION; |
| |
| |
| const framesNeeded = Math.floor(durationMs / FRAME_DURATION); |
| const numFrames = totalFrames || frameCount; |
| |
| |
| const framesToUse = Math.min(framesNeeded, numFrames); |
| |
| console.log(`Audio: ${durationMs}ms, Frames disponiveis: ${numFrames}, Frames a usar: ${framesToUse} (${(1000/dynamicFrameDuration).toFixed(1)}fps)`); |
| |
| |
| totalFrames = framesToUse; |
| |
| |
| |
| if (frames[0]) { |
| ctx.drawImage(frames[0], 0, 0); |
| lastRenderedFrame = 0; |
| } |
| |
| |
| talkCanvas.style.display = 'block'; |
| |
| |
| isPlaying = true; |
| isBuffering = false; |
| playbackStartTime = performance.now(); |
| |
| |
| audioSource.start(0); |
| console.log('Playback sincronizado iniciado:', frameCount, 'frames,', durationMs, 'ms audio'); |
| |
| |
| renderLoop(); |
| |
| } catch (err) { |
| console.error('Erro ao iniciar playback:', err); |
| setStatus('Erro: ' + err.message); |
| stopPlayback(); |
| } |
| } |
| |
| function renderLoop() { |
| if (!isPlaying) return; |
| |
| |
| if (!audioSource) { |
| stopPlayback(); |
| return; |
| } |
| |
| const elapsed = performance.now() - playbackStartTime; |
| |
| const targetFrame = Math.floor(elapsed / dynamicFrameDuration); |
| const total = totalFrames || frameCount; |
| |
| |
| if (targetFrame !== lastRenderedFrame && targetFrame < total) { |
| |
| let frameToRender = frames[targetFrame]; |
| |
| |
| if (!frameToRender) { |
| |
| for (let i = targetFrame - 1; i >= 0; i--) { |
| if (frames[i]) { |
| frameToRender = frames[i]; |
| break; |
| } |
| } |
| } |
| |
| if (frameToRender) { |
| ctx.drawImage(frameToRender, 0, 0); |
| lastRenderedFrame = targetFrame; |
| } |
| } |
| |
| |
| if (total > 0) { |
| const displayedFrame = Math.min(targetFrame, total); |
| progress.style.width = (displayedFrame / total * 100) + '%'; |
| } |
| |
| |
| animationId = requestAnimationFrame(renderLoop); |
| } |
| |
| function stopPlayback() { |
| isPlaying = false; |
| isBuffering = false; |
| |
| if (animationId) { |
| cancelAnimationFrame(animationId); |
| animationId = null; |
| } |
| |
| if (audioSource) { |
| try { |
| audioSource.stop(); |
| } catch (e) {} |
| audioSource = null; |
| } |
| |
| |
| |
| talkCanvas.style.display = 'none'; |
| ctx.clearRect(0, 0, talkCanvas.width, talkCanvas.height); |
| |
| |
| |
| if (endVideoTimeMs > 0 && idleVideo.duration > 0) { |
| const targetTime = (endVideoTimeMs / 1000) % idleVideo.duration; |
| console.log(`Idle video sync: seeking to ${targetTime.toFixed(2)}s (endVideoTimeMs=${endVideoTimeMs})`); |
| |
| |
| if (idleVideo.fastSeek) { |
| idleVideo.fastSeek(targetTime); |
| } else { |
| idleVideo.currentTime = targetTime; |
| } |
| |
| |
| idleVideo.play().catch(() => {}); |
| |
| endVideoTimeMs = 0; |
| } |
| |
| frames = []; |
| lastRenderedFrame = -1; |
| setStatus('Pronto', 'ok'); |
| setButtons(false); |
| } |
| |
| function setButtons(generating) { |
| btnGo.disabled = generating; |
| btnStop.disabled = !generating; |
| } |
| |
| btnGo.onclick = () => { |
| const text = document.getElementById('text').value.trim(); |
| if (!text) return; |
| if (!ws || ws.readyState !== WebSocket.OPEN) { |
| setStatus('Nao conectado'); |
| return; |
| } |
| |
| resetMetrics(); |
| setButtons(true); |
| setStatus('Gerando...', 'busy'); |
| startTime = Date.now(); |
| |
| |
| const idleVideoTimeMs = Math.floor(idleVideo.currentTime * 1000); |
| console.log(`Idle video time: ${idleVideoTimeMs}ms`); |
| |
| ws.send(JSON.stringify({ |
| action: 'generate', |
| text: text, |
| voice: document.getElementById('voice').value, |
| idle_video_time_ms: idleVideoTimeMs |
| })); |
| }; |
| |
| btnStop.onclick = () => { |
| stopPlayback(); |
| }; |
| |
| connect(); |
| </script> |
| </body> |
| </html> |
|
|