speech2speech-interface / interface /index_realtime.html
marcosremar2's picture
Add real-time streaming avatar interface
14b6b3e
Raw
History Blame Contribute Delete
17.8 kB
<!DOCTYPE html>
<!--
Avatar - Realtime Streaming
VERSAO FUNCIONANDO - 2024-12-27
Renderiza frames JPEG em Canvas + Audio via Web Audio API
Sem ffmpeg/WebM - menor latência
Métricas exibidas:
- Latência do primeiro frame
- Frames recebidos vs renderizados
- Buffer de frames
- FPS real
-->
<html lang="pt-BR">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Avatar - Realtime Streaming</title>
<style>
* { margin: 0; padding: 0; box-sizing: border-box; }
body {
font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, sans-serif;
background: linear-gradient(135deg, #0a0a1a 0%, #1a1a3a 100%);
color: #fff;
min-height: 100vh;
padding: 20px;
}
.container { max-width: 1200px; margin: 0 auto; }
h1 { text-align: center; margin-bottom: 20px; color: #00d4ff; }
.status-bar {
display: flex; gap: 20px; justify-content: center; margin-bottom: 20px;
flex-wrap: wrap;
}
.status-item {
padding: 10px 20px; border-radius: 20px;
background: rgba(255,255,255,0.1);
font-size: 14px;
}
.status-item.online { background: rgba(0,255,100,0.2); color: #0f0; }
.status-item.offline { background: rgba(255,0,0,0.2); color: #f00; }
.status-item.streaming { background: rgba(0,200,255,0.2); color: #0cf; }
.main-content { display: flex; gap: 20px; flex-wrap: wrap; }
.video-section {
flex: 1; min-width: 400px;
background: rgba(0,0,0,0.3); border-radius: 15px; padding: 20px;
}
.canvas-container {
width: 100%; aspect-ratio: 16/9;
background: #000; border-radius: 10px; overflow: hidden;
position: relative;
}
#avatar-canvas {
width: 100%; height: 100%;
object-fit: contain;
}
.canvas-overlay {
position: absolute; top: 0; left: 0; right: 0; bottom: 0;
display: flex; align-items: center; justify-content: center;
background: rgba(0,0,0,0.7);
font-size: 18px; color: #aaa;
pointer-events: none;
transition: opacity 0.3s;
}
.canvas-overlay.hidden { opacity: 0; pointer-events: none; }
.control-section {
flex: 1; min-width: 300px;
background: rgba(0,0,0,0.3); border-radius: 15px; padding: 20px;
}
.input-group { margin-bottom: 15px; }
label { display: block; margin-bottom: 5px; color: #aaa; }
textarea {
width: 100%; height: 100px; padding: 10px;
border: 1px solid #333; border-radius: 8px;
background: rgba(255,255,255,0.05); color: #fff;
resize: vertical; font-size: 14px;
}
select, button {
width: 100%; padding: 12px; margin-top: 10px;
border: none; border-radius: 8px; cursor: pointer;
}
select { background: rgba(255,255,255,0.1); color: #fff; }
.btn-primary {
background: linear-gradient(135deg, #00d4ff, #0066ff);
color: #fff; font-weight: bold; font-size: 16px;
}
.btn-danger {
background: linear-gradient(135deg, #ff4444, #cc0000);
color: #fff; font-weight: bold; font-size: 16px;
}
button:hover { opacity: 0.9; }
button:disabled { opacity: 0.5; cursor: not-allowed; }
.metrics {
margin-top: 20px; padding: 15px;
background: rgba(255,255,255,0.05); border-radius: 8px;
}
.metrics h4 { margin-bottom: 10px; color: #00d4ff; }
.metric-row { display: flex; justify-content: space-between; padding: 5px 0; }
.metric-value { color: #00d4ff; font-weight: bold; }
.log {
margin-top: 20px; padding: 10px;
background: #000; border-radius: 8px;
font-family: monospace; font-size: 11px;
max-height: 120px; overflow-y: auto;
}
.log-entry { padding: 2px 0; border-bottom: 1px solid #222; }
.log-time { color: #666; }
.log-msg { color: #0f0; }
.log-error { color: #f00; }
.log-status { color: #fc0; }
</style>
</head>
<body>
<div class="container">
<h1>Avatar - Realtime Streaming</h1>
<div class="status-bar">
<div class="status-item" id="ws-status">WebSocket: --</div>
<div class="status-item" id="stream-status">Stream: Idle</div>
<div class="status-item" id="fps-status">FPS: --</div>
</div>
<div class="main-content">
<div class="video-section">
<h3>Avatar Stream</h3>
<div class="canvas-container">
<canvas id="avatar-canvas"></canvas>
<div class="canvas-overlay" id="overlay">
Aguardando...
</div>
</div>
</div>
<div class="control-section">
<h3>Controles</h3>
<div class="input-group">
<label>Texto:</label>
<textarea id="text-input" placeholder="Digite o texto...">Hello! I am a real-time streaming avatar.</textarea>
</div>
<div class="input-group">
<label>Voz:</label>
<select id="voice-select">
<option value="tara">Tara (Female)</option>
<option value="leah">Leah (Female)</option>
<option value="jess">Jess (Female)</option>
<option value="leo">Leo (Male)</option>
<option value="dan">Dan (Male)</option>
</select>
</div>
<button id="generate-btn" class="btn-primary" onclick="generate()">
Gerar
</button>
<button id="stop-btn" class="btn-danger" onclick="stop()" disabled>
Parar
</button>
<div class="metrics">
<h4>Metricas</h4>
<div class="metric-row">
<span>Latencia 1o frame:</span>
<span class="metric-value" id="latency">--</span>
</div>
<div class="metric-row">
<span>Frames recebidos:</span>
<span class="metric-value" id="frames">0</span>
</div>
<div class="metric-row">
<span>Frames renderizados:</span>
<span class="metric-value" id="rendered">0</span>
</div>
<div class="metric-row">
<span>Buffer:</span>
<span class="metric-value" id="buffer">0</span>
</div>
</div>
<div class="log" id="log"></div>
</div>
</div>
</div>
<script>
const WS_URL = "ws://" + window.location.host + "/ws";
const TARGET_FPS = 25;
const FRAME_INTERVAL = 1000 / TARGET_FPS;
let ws = null;
let isStreaming = false;
let startTime = null;
// Frame buffer e rendering
let frameQueue = [];
let renderedFrames = 0;
let renderInterval = null;
let lastRenderTime = 0;
// Audio
let audioContext = null;
let audioBuffer = null;
let audioSource = null;
let audioStartTime = null;
// Canvas
const canvas = document.getElementById("avatar-canvas");
const ctx = canvas.getContext("2d");
const overlay = document.getElementById("overlay");
// Pre-create image for faster decoding
const frameImage = new Image();
function log(msg, type = "msg") {
const logDiv = document.getElementById("log");
const time = new Date().toLocaleTimeString();
logDiv.innerHTML = `<div class="log-entry"><span class="log-time">${time}</span> <span class="log-${type}">${msg}</span></div>` + logDiv.innerHTML;
while (logDiv.children.length > 30) logDiv.removeChild(logDiv.lastChild);
}
function updateStatus(id, status, text) {
const el = document.getElementById(id);
el.textContent = text;
el.className = "status-item " + status;
}
function connectWebSocket() {
if (ws && ws.readyState === WebSocket.OPEN) return;
updateStatus("ws-status", "", "WebSocket: Conectando...");
ws = new WebSocket(WS_URL);
ws.onopen = () => {
updateStatus("ws-status", "online", "WebSocket: Conectado");
log("Conectado", "msg");
};
ws.onmessage = (event) => {
try {
const data = JSON.parse(event.data);
handleMessage(data);
} catch (e) {
log("Erro: " + e, "error");
}
};
ws.onclose = () => {
updateStatus("ws-status", "offline", "WebSocket: Desconectado");
setTimeout(connectWebSocket, 3000);
};
ws.onerror = () => log("Erro WebSocket", "error");
}
function handleMessage(data) {
switch (data.type) {
case "frame":
handleFrame(data);
break;
case "audio":
handleAudio(data);
break;
case "first_frame":
document.getElementById("latency").textContent = data.latency_ms + "ms";
log(`Primeiro frame: ${data.latency_ms}ms`, "status");
break;
case "done":
log(`Concluido: ${data.total_frames} frames`, "msg");
updateStatus("stream-status", "online", "Stream: Concluido");
isStreaming = false;
document.getElementById("generate-btn").disabled = false;
document.getElementById("stop-btn").disabled = true;
break;
case "status":
log(data.message, "status");
break;
case "error":
log("Erro: " + data.message, "error");
stopStream();
break;
}
}
function handleFrame(data) {
// Adicionar frame ao buffer
frameQueue.push(data.frame);
const received = parseInt(document.getElementById("frames").textContent) + 1;
document.getElementById("frames").textContent = received;
document.getElementById("buffer").textContent = frameQueue.length;
// Esconder overlay no primeiro frame
if (received === 1) {
overlay.classList.add("hidden");
updateStatus("stream-status", "streaming", "Stream: Ativo");
}
// Iniciar rendering se ainda não começou
if (!renderInterval && frameQueue.length > 0) {
startRendering();
}
}
function startRendering() {
if (renderInterval) return;
lastRenderTime = performance.now();
renderInterval = setInterval(() => {
renderNextFrame();
}, FRAME_INTERVAL);
}
function renderNextFrame() {
if (frameQueue.length === 0) {
// Buffer vazio - manter último frame
document.getElementById("buffer").textContent = "0";
return;
}
const frameB64 = frameQueue.shift();
document.getElementById("buffer").textContent = frameQueue.length;
// Decodificar e renderizar
frameImage.onload = () => {
// Ajustar canvas ao tamanho do frame (primeira vez)
if (canvas.width !== frameImage.width || canvas.height !== frameImage.height) {
canvas.width = frameImage.width;
canvas.height = frameImage.height;
}
ctx.drawImage(frameImage, 0, 0);
renderedFrames++;
document.getElementById("rendered").textContent = renderedFrames;
};
frameImage.src = "data:image/jpeg;base64," + frameB64;
// Calcular FPS real
const now = performance.now();
const actualFps = 1000 / (now - lastRenderTime);
lastRenderTime = now;
updateStatus("fps-status", "", `FPS: ${actualFps.toFixed(0)}`);
}
async function handleAudio(data) {
try {
// Inicializar AudioContext
if (!audioContext) {
audioContext = new (window.AudioContext || window.webkitAudioContext)({
sampleRate: data.sample_rate || 24000
});
}
// Decodificar WAV base64
const audioData = base64ToArrayBuffer(data.audio);
// Verificar se é WAV e extrair PCM
let pcmData;
const view = new DataView(audioData);
if (String.fromCharCode(view.getUint8(0), view.getUint8(1), view.getUint8(2), view.getUint8(3)) === 'RIFF') {
// É WAV - pular header de 44 bytes
pcmData = audioData.slice(44);
} else {
pcmData = audioData;
}
// Converter PCM int16 para float32
const samples = new Int16Array(pcmData);
const floatSamples = new Float32Array(samples.length);
for (let i = 0; i < samples.length; i++) {
floatSamples[i] = samples[i] / 32768;
}
// Criar buffer de audio
audioBuffer = audioContext.createBuffer(1, floatSamples.length, data.sample_rate || 24000);
audioBuffer.getChannelData(0).set(floatSamples);
// Reproduzir audio
if (audioSource) {
audioSource.stop();
}
audioSource = audioContext.createBufferSource();
audioSource.buffer = audioBuffer;
audioSource.connect(audioContext.destination);
audioSource.start();
log(`Audio: ${data.duration_ms}ms`, "status");
} catch (e) {
log("Erro audio: " + e, "error");
console.error("Audio error:", e);
}
}
function base64ToArrayBuffer(base64) {
const binary = atob(base64);
const bytes = new Uint8Array(binary.length);
for (let i = 0; i < binary.length; i++) {
bytes[i] = binary.charCodeAt(i);
}
return bytes.buffer;
}
function generate() {
const text = document.getElementById("text-input").value.trim();
const voice = document.getElementById("voice-select").value;
if (!text) {
log("Digite um texto", "error");
return;
}
if (!ws || ws.readyState !== WebSocket.OPEN) {
log("WebSocket desconectado", "error");
return;
}
// Reset
stopStream();
frameQueue = [];
renderedFrames = 0;
startTime = Date.now();
document.getElementById("frames").textContent = "0";
document.getElementById("rendered").textContent = "0";
document.getElementById("buffer").textContent = "0";
document.getElementById("latency").textContent = "--";
overlay.textContent = "Gerando...";
overlay.classList.remove("hidden");
isStreaming = true;
document.getElementById("generate-btn").disabled = true;
document.getElementById("stop-btn").disabled = false;
updateStatus("stream-status", "", "Stream: Iniciando...");
log("Enviando: " + text.substring(0, 40) + "...", "status");
ws.send(JSON.stringify({
action: "generate",
text: text,
voice: voice
}));
}
function stop() {
if (ws && ws.readyState === WebSocket.OPEN) {
ws.send(JSON.stringify({ action: "stop" }));
}
stopStream();
}
function stopStream() {
isStreaming = false;
if (renderInterval) {
clearInterval(renderInterval);
renderInterval = null;
}
if (audioSource) {
try { audioSource.stop(); } catch (e) {}
audioSource = null;
}
document.getElementById("generate-btn").disabled = false;
document.getElementById("stop-btn").disabled = true;
updateStatus("stream-status", "", "Stream: Idle");
}
// Heartbeat
setInterval(() => {
if (ws && ws.readyState === WebSocket.OPEN) {
ws.send(JSON.stringify({ action: "ping" }));
}
}, 30000);
// Init
connectWebSocket();
</script>
</body>
</html>