marcosremar2 Claude Opus 4.5 commited on
Commit
0e82ee5
·
1 Parent(s): 4f5ba94

Improve idle video sync and smooth transitions

Browse files

- Add idle_video_time_ms tracking for seamless transitions
- Sync idle video position when speech ends (end_video_time_ms)
- Pre-render first frame before showing canvas (eliminates flash)
- Use indexed array for O(1) frame access
- Add generate_complete action proxy for Wav2Lip
- Remove server-side crossfade (transition handled by client)

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>

interface/index_streaming.html CHANGED
@@ -66,9 +66,9 @@ const btnGo = document.getElementById('btnGo');
66
  const btnStop = document.getElementById('btnStop');
67
 
68
  let ws = null;
69
- let frameQueue = [];
70
  let isPlaying = false;
71
- let isBuffering = false; // NOVO: acumulando frames antes de tocar
72
  let audioContext = null;
73
  let audioBuffer = null;
74
  let audioSource = null;
@@ -77,6 +77,8 @@ let frameCount = 0;
77
  let totalFrames = 0;
78
  let playbackStartTime = 0;
79
  let animationId = null;
 
 
80
 
81
  // Configuracao
82
  const TARGET_FPS = 25;
@@ -149,14 +151,18 @@ function connect() {
149
  case 'audio':
150
  // MUDANCA: Audio chegou - AGORA iniciar playback sincronizado!
151
  setMetric('mAudio', (msg.duration_ms / 1000).toFixed(2) + 's');
 
 
 
152
  startSyncedPlayback(msg.audio, msg.duration_ms);
153
  break;
154
 
155
  case 'done':
156
  totalFrames = msg.frames;
157
  setMetric('mFrames', msg.frames);
158
- const elapsed = msg.elapsed_ms / 1000;
159
- setMetric('mFps', (msg.frames / elapsed).toFixed(1));
 
160
  break;
161
 
162
  case 'error':
@@ -169,14 +175,15 @@ function connect() {
169
  }
170
 
171
  function addFrame(base64Frame, index) {
172
- // Decodificar frame e adicionar na fila
173
  const img = new Image();
174
  img.onload = () => {
175
- frameQueue.push({ img, index, loaded: true });
 
176
  frameCount++;
177
 
178
  // Ajustar tamanho do canvas no primeiro frame
179
- if (frameCount === 1) {
180
  talkCanvas.width = img.width;
181
  talkCanvas.height = img.height;
182
  }
@@ -193,14 +200,19 @@ function startBuffering() {
193
  // Preparar para receber frames, mas NAO iniciar playback
194
  isBuffering = true;
195
  isPlaying = false;
196
- frameQueue = [];
197
  frameCount = 0;
198
  totalFrames = 0;
 
199
 
200
- // Mostrar canvas (mas ainda sem renderizar)
201
- talkCanvas.style.display = 'block';
202
  }
203
 
 
 
 
 
204
  async function startSyncedPlayback(base64Audio, durationMs) {
205
  // Audio chegou - iniciar playback sincronizado de video + audio
206
  setStatus('Reproduzindo...', 'ok');
@@ -240,14 +252,37 @@ async function startSyncedPlayback(base64Audio, durationMs) {
240
 
241
  audioSource.onended = () => {
242
  audioSource = null;
243
- // Dar tempo para ultimo frame antes de parar
244
- setTimeout(() => {
245
- if (!audioSource) {
246
- stopPlayback();
247
- }
248
- }, 200);
249
  };
250
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
251
  // INICIAR TUDO SINCRONIZADO: audio + video ao mesmo tempo!
252
  isPlaying = true;
253
  isBuffering = false;
@@ -271,27 +306,40 @@ function renderLoop() {
271
  if (!isPlaying) return;
272
 
273
  const elapsed = performance.now() - playbackStartTime;
274
- const targetFrame = Math.floor(elapsed / FRAME_DURATION);
275
-
276
- // Encontrar e renderizar o frame correspondente ao tempo
277
- const frame = frameQueue.find(f => f.index === targetFrame);
278
 
279
- if (frame && frame.loaded) {
280
- ctx.drawImage(frame.img, 0, 0);
 
 
 
 
 
 
 
 
 
 
 
 
 
281
 
282
- // Limpar frames antigos para liberar memoria
283
- frameQueue = frameQueue.filter(f => f.index >= targetFrame - 1);
 
 
284
  }
285
 
286
  // Atualizar progresso visual
287
- const total = totalFrames || frameCount;
288
  if (total > 0) {
289
  const displayedFrame = Math.min(targetFrame, total);
290
  progress.style.width = (displayedFrame / total * 100) + '%';
291
  }
292
 
293
  // Continuar enquanto tiver audio ou frames
294
- if (audioSource || targetFrame < (totalFrames || frameCount)) {
295
  animationId = requestAnimationFrame(renderLoop);
296
  } else {
297
  stopPlayback();
@@ -314,11 +362,23 @@ function stopPlayback() {
314
  audioSource = null;
315
  }
316
 
 
 
 
 
 
 
 
 
 
 
 
317
  // Esconder canvas, mostrar idle
318
  talkCanvas.style.display = 'none';
319
  ctx.clearRect(0, 0, talkCanvas.width, talkCanvas.height);
320
 
321
- frameQueue = [];
 
322
  setStatus('Pronto', 'ok');
323
  setButtons(false);
324
  }
@@ -341,10 +401,15 @@ btnGo.onclick = () => {
341
  setStatus('Gerando...', 'busy');
342
  startTime = Date.now();
343
 
 
 
 
 
344
  ws.send(JSON.stringify({
345
  action: 'generate',
346
  text: text,
347
- voice: document.getElementById('voice').value
 
348
  }));
349
  };
350
 
 
66
  const btnStop = document.getElementById('btnStop');
67
 
68
  let ws = null;
69
+ let frames = []; // Array indexado para acesso O(1)
70
  let isPlaying = false;
71
+ let isBuffering = false;
72
  let audioContext = null;
73
  let audioBuffer = null;
74
  let audioSource = null;
 
77
  let totalFrames = 0;
78
  let playbackStartTime = 0;
79
  let animationId = null;
80
+ let lastRenderedFrame = -1; // Para evitar re-renderizar mesmo frame
81
+ let endVideoTimeMs = 0; // Tempo do idle video onde a fala termina
82
 
83
  // Configuracao
84
  const TARGET_FPS = 25;
 
151
  case 'audio':
152
  // MUDANCA: Audio chegou - AGORA iniciar playback sincronizado!
153
  setMetric('mAudio', (msg.duration_ms / 1000).toFixed(2) + 's');
154
+ // Calcular FPS real baseado na duracao do audio
155
+ const realFps = (totalFrames || frameCount) / (msg.duration_ms / 1000);
156
+ setMetric('mFps', realFps.toFixed(1));
157
  startSyncedPlayback(msg.audio, msg.duration_ms);
158
  break;
159
 
160
  case 'done':
161
  totalFrames = msg.frames;
162
  setMetric('mFrames', msg.frames);
163
+ // Salvar end_video_time_ms para sincronizar idle quando fala terminar
164
+ endVideoTimeMs = msg.end_video_time_ms || 0;
165
+ console.log(`Done: ${msg.frames} frames, end_video_time: ${endVideoTimeMs}ms`);
166
  break;
167
 
168
  case 'error':
 
175
  }
176
 
177
  function addFrame(base64Frame, index) {
178
+ // Decodificar frame e adicionar no array indexado
179
  const img = new Image();
180
  img.onload = () => {
181
+ // Armazenar no indice correto para acesso O(1)
182
+ frames[index] = img;
183
  frameCount++;
184
 
185
  // Ajustar tamanho do canvas no primeiro frame
186
+ if (index === 0) {
187
  talkCanvas.width = img.width;
188
  talkCanvas.height = img.height;
189
  }
 
200
  // Preparar para receber frames, mas NAO iniciar playback
201
  isBuffering = true;
202
  isPlaying = false;
203
+ frames = []; // Reset array indexado
204
  frameCount = 0;
205
  totalFrames = 0;
206
+ lastRenderedFrame = -1;
207
 
208
+ // NAO mostrar canvas ainda - so quando primeiro frame estiver pronto
209
+ // talkCanvas.style.display = 'block';
210
  }
211
 
212
+ // Duracao real do audio (usado para sincronizar frames)
213
+ let audioDurationMs = 0;
214
+ let dynamicFrameDuration = FRAME_DURATION;
215
+
216
  async function startSyncedPlayback(base64Audio, durationMs) {
217
  // Audio chegou - iniciar playback sincronizado de video + audio
218
  setStatus('Reproduzindo...', 'ok');
 
252
 
253
  audioSource.onended = () => {
254
  audioSource = null;
255
+ // Transicao imediata quando audio termina
256
+ stopPlayback();
 
 
 
 
257
  };
258
 
259
+ // Calcular quantos frames usar baseado na duracao do audio
260
+ // Manter 25fps fixo e usar apenas os frames necessarios
261
+ audioDurationMs = durationMs;
262
+ dynamicFrameDuration = FRAME_DURATION; // Sempre 40ms (25fps)
263
+
264
+ // Calcular quantos frames cabem na duracao do audio
265
+ const framesNeeded = Math.floor(durationMs / FRAME_DURATION);
266
+ const numFrames = totalFrames || frameCount;
267
+
268
+ // Limitar ao numero de frames disponiveis ou necessarios (o menor)
269
+ const framesToUse = Math.min(framesNeeded, numFrames);
270
+
271
+ console.log(`Audio: ${durationMs}ms, Frames disponiveis: ${numFrames}, Frames a usar: ${framesToUse} (${(1000/dynamicFrameDuration).toFixed(1)}fps)`);
272
+
273
+ // Atualizar totalFrames para usar apenas os necessarios
274
+ totalFrames = framesToUse;
275
+
276
+ // PRE-RENDERIZAR primeiro frame ANTES de mostrar canvas
277
+ // Isso evita o "tec" de um frame em branco
278
+ if (frames[0]) {
279
+ ctx.drawImage(frames[0], 0, 0);
280
+ lastRenderedFrame = 0;
281
+ }
282
+
283
+ // Agora mostrar o canvas (ja com o primeiro frame renderizado)
284
+ talkCanvas.style.display = 'block';
285
+
286
  // INICIAR TUDO SINCRONIZADO: audio + video ao mesmo tempo!
287
  isPlaying = true;
288
  isBuffering = false;
 
306
  if (!isPlaying) return;
307
 
308
  const elapsed = performance.now() - playbackStartTime;
309
+ // Usar duracao dinamica para sincronizar com audio
310
+ const targetFrame = Math.floor(elapsed / dynamicFrameDuration);
311
+ const total = totalFrames || frameCount;
 
312
 
313
+ // So renderizar se for um frame diferente do anterior
314
+ if (targetFrame !== lastRenderedFrame && targetFrame < total) {
315
+ // Acesso O(1) ao frame pelo indice
316
+ let frameToRender = frames[targetFrame];
317
+
318
+ // Se frame ainda nao chegou, usar o ultimo frame disponivel
319
+ if (!frameToRender) {
320
+ // Procurar frame mais proximo anterior
321
+ for (let i = targetFrame - 1; i >= 0; i--) {
322
+ if (frames[i]) {
323
+ frameToRender = frames[i];
324
+ break;
325
+ }
326
+ }
327
+ }
328
 
329
+ if (frameToRender) {
330
+ ctx.drawImage(frameToRender, 0, 0);
331
+ lastRenderedFrame = targetFrame;
332
+ }
333
  }
334
 
335
  // Atualizar progresso visual
 
336
  if (total > 0) {
337
  const displayedFrame = Math.min(targetFrame, total);
338
  progress.style.width = (displayedFrame / total * 100) + '%';
339
  }
340
 
341
  // Continuar enquanto tiver audio ou frames
342
+ if (audioSource || targetFrame < total) {
343
  animationId = requestAnimationFrame(renderLoop);
344
  } else {
345
  stopPlayback();
 
362
  audioSource = null;
363
  }
364
 
365
+ // Sincronizar idle video para o tempo correto (onde a fala terminou)
366
+ if (endVideoTimeMs > 0) {
367
+ const targetTime = endVideoTimeMs / 1000;
368
+ // Garantir que o tempo esta dentro da duracao do video
369
+ if (idleVideo.duration > 0) {
370
+ idleVideo.currentTime = targetTime % idleVideo.duration;
371
+ console.log(`Idle video sync: ${targetTime.toFixed(2)}s`);
372
+ }
373
+ endVideoTimeMs = 0; // Reset para proxima vez
374
+ }
375
+
376
  // Esconder canvas, mostrar idle
377
  talkCanvas.style.display = 'none';
378
  ctx.clearRect(0, 0, talkCanvas.width, talkCanvas.height);
379
 
380
+ frames = [];
381
+ lastRenderedFrame = -1;
382
  setStatus('Pronto', 'ok');
383
  setButtons(false);
384
  }
 
401
  setStatus('Gerando...', 'busy');
402
  startTime = Date.now();
403
 
404
+ // Capturar o tempo atual do video idle para sincronizacao
405
+ const idleVideoTimeMs = Math.floor(idleVideo.currentTime * 1000);
406
+ console.log(`Idle video time: ${idleVideoTimeMs}ms`);
407
+
408
  ws.send(JSON.stringify({
409
  action: 'generate',
410
  text: text,
411
+ voice: document.getElementById('voice').value,
412
+ idle_video_time_ms: idleVideoTimeMs // Enviar para servidor sincronizar frames
413
  }));
414
  };
415
 
interface/server_streaming.py CHANGED
@@ -1,6 +1,6 @@
1
  """
2
- Interface Server - Streaming Progressivo
3
- Envia frames JPEG + audio conforme recebe do Wav2Lip
4
  """
5
  from aiohttp import web
6
  import aiohttp
@@ -9,18 +9,91 @@ import json
9
  import base64
10
  import os
11
  import time
 
 
12
 
13
  WAV2LIP_WS = os.getenv("WAV2LIP_WS", "ws://localhost:8082/ws")
14
  PORT = int(os.getenv("PORT", "8000"))
 
 
 
 
15
 
16
  routes = web.RouteTableDef()
17
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
  @routes.get("/ws")
19
  async def websocket_handler(request):
20
  ws = web.WebSocketResponse()
21
  await ws.prepare(request)
22
  print("Cliente conectado")
23
 
 
 
 
24
  try:
25
  async for msg in ws:
26
  if msg.type == aiohttp.WSMsgType.TEXT:
@@ -30,12 +103,13 @@ async def websocket_handler(request):
30
  if action == "generate":
31
  text = data.get("text", "").strip()
32
  voice = data.get("voice", "tara")
 
33
 
34
  if not text:
35
  await ws.send_json({"type": "error", "message": "Text required"})
36
  continue
37
 
38
- print(f"Gerando: {text[:50]}...")
39
  start_time = time.time()
40
 
41
  try:
@@ -45,15 +119,18 @@ async def websocket_handler(request):
45
  timeout=aiohttp.ClientWSTimeout(ws_close=120)
46
  )
47
 
48
- # Usar action: generate que faz streaming de frames
49
  await wav2lip_ws.send_json({
50
  "action": "generate",
51
  "text": text,
52
- "voice": voice
 
53
  })
54
 
55
- frame_count = 0
56
- first_frame_sent = False
 
 
 
57
 
58
  async for w2l_msg in wav2lip_ws:
59
  if w2l_msg.type == aiohttp.WSMsgType.TEXT:
@@ -63,45 +140,19 @@ async def websocket_handler(request):
63
  if msg_type == "status":
64
  await ws.send_json(w2l_data)
65
 
66
- elif msg_type == "first_chunk":
67
- latency = w2l_data.get("latency_ms", 0)
68
- await ws.send_json({
69
- "type": "first_chunk",
70
- "latency_ms": latency
71
- })
72
-
73
  elif msg_type == "frame":
74
- # Repassar frame diretamente
75
- frame_count += 1
76
- if not first_frame_sent:
77
- first_frame_sent = True
78
- ttfb = int((time.time() - start_time) * 1000)
79
- await ws.send_json({
80
- "type": "stream_start",
81
- "ttfb_ms": ttfb
82
- })
83
-
84
- await ws.send_json({
85
- "type": "frame",
86
- "frame": w2l_data.get("frame"),
87
- "index": frame_count - 1
88
- })
89
 
90
  elif msg_type == "full_audio":
91
- # Enviar audio completo
92
- await ws.send_json({
93
- "type": "audio",
94
- "audio": w2l_data.get("audio"),
95
- "duration_ms": w2l_data.get("duration_ms", 0)
96
- })
97
 
98
  elif msg_type == "done":
99
- elapsed = int((time.time() - start_time) * 1000)
100
- await ws.send_json({
101
- "type": "done",
102
- "frames": frame_count,
103
- "elapsed_ms": elapsed
104
- })
105
  break
106
 
107
  elif msg_type == "error":
@@ -113,8 +164,96 @@ async def websocket_handler(request):
113
 
114
  await wav2lip_ws.close()
115
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
116
  except Exception as e:
117
  print(f"Erro: {e}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
118
  await ws.send_json({"type": "error", "message": str(e)})
119
 
120
  elif action == "ping":
@@ -128,16 +267,9 @@ async def websocket_handler(request):
128
  return ws
129
 
130
 
131
- @routes.get("/health")
132
- async def health(request):
133
- return web.json_response({"status": "ok", "mode": "streaming"})
134
-
135
-
136
  @routes.get("/")
137
  async def index(request):
138
- return web.FileResponse(
139
- os.path.join(os.path.dirname(__file__), "index_streaming.html")
140
- )
141
 
142
 
143
  @routes.get("/{filename}")
@@ -154,9 +286,14 @@ app.add_routes(routes)
154
 
155
  if __name__ == "__main__":
156
  print("=" * 50)
157
- print("Streaming Server - Progressive Frame Delivery")
 
 
 
158
  print("=" * 50)
159
- print(f"Porta: {PORT}")
160
- print(f"Wav2Lip: {WAV2LIP_WS}")
 
 
161
  print("=" * 50)
162
  web.run_app(app, host="0.0.0.0", port=PORT)
 
1
  """
2
+ Interface Server - Streaming com Crossfade Suave
3
+ Faz transicao suave entre idle e fala usando blending de frames
4
  """
5
  from aiohttp import web
6
  import aiohttp
 
9
  import base64
10
  import os
11
  import time
12
+ import cv2
13
+ import numpy as np
14
 
15
  WAV2LIP_WS = os.getenv("WAV2LIP_WS", "ws://localhost:8082/ws")
16
  PORT = int(os.getenv("PORT", "8000"))
17
+ IDLE_VIDEO = os.path.join(os.path.dirname(__file__), "idle.mp4")
18
+
19
+ # Configuracao de crossfade
20
+ CROSSFADE_FRAMES = 5 # Numero de frames para transicao (200ms @ 25fps)
21
 
22
  routes = web.RouteTableDef()
23
 
24
+ # Cache de frames idle
25
+ idle_frames = []
26
+ idle_frame_count = 0
27
+
28
+
29
+ def load_idle_frames():
30
+ """Carrega frames do idle.mp4"""
31
+ global idle_frames, idle_frame_count
32
+
33
+ if idle_frames:
34
+ return
35
+
36
+ if not os.path.exists(IDLE_VIDEO):
37
+ print(f"[AVISO] Idle video nao encontrado: {IDLE_VIDEO}")
38
+ return
39
+
40
+ print(f"Carregando idle frames de {IDLE_VIDEO}...")
41
+ cap = cv2.VideoCapture(IDLE_VIDEO)
42
+
43
+ while True:
44
+ ret, frame = cap.read()
45
+ if not ret:
46
+ break
47
+ # Manter em BGR para processamento, converter para JPEG depois
48
+ idle_frames.append(frame)
49
+
50
+ cap.release()
51
+ idle_frame_count = len(idle_frames)
52
+ print(f"Carregados {idle_frame_count} frames idle")
53
+
54
+
55
+ def frame_to_jpeg_base64(frame, quality=85):
56
+ """Converte frame numpy para JPEG base64"""
57
+ encode_param = [int(cv2.IMWRITE_JPEG_QUALITY), quality]
58
+ _, buffer = cv2.imencode('.jpg', frame, encode_param)
59
+ return base64.b64encode(buffer).decode('utf-8')
60
+
61
+
62
+ def jpeg_base64_to_frame(b64_data):
63
+ """Converte JPEG base64 para frame numpy"""
64
+ jpeg_data = base64.b64decode(b64_data)
65
+ nparr = np.frombuffer(jpeg_data, np.uint8)
66
+ return cv2.imdecode(nparr, cv2.IMREAD_COLOR)
67
+
68
+
69
+ def blend_frames(frame1, frame2, alpha):
70
+ """Blend entre dois frames. alpha=0 -> frame1, alpha=1 -> frame2"""
71
+ # Garantir que ambos frames tem o mesmo tamanho
72
+ if frame1.shape != frame2.shape:
73
+ frame2 = cv2.resize(frame2, (frame1.shape[1], frame1.shape[0]))
74
+
75
+ return cv2.addWeighted(frame1, 1 - alpha, frame2, alpha, 0)
76
+
77
+
78
+ def create_crossfade_frames(from_frame, to_frame, num_frames):
79
+ """Cria frames de transicao suave entre dois frames"""
80
+ frames = []
81
+ for i in range(num_frames):
82
+ alpha = (i + 1) / (num_frames + 1) # 0.16, 0.33, 0.5, 0.66, 0.83 para 5 frames
83
+ blended = blend_frames(from_frame, to_frame, alpha)
84
+ frames.append(blended)
85
+ return frames
86
+
87
+
88
  @routes.get("/ws")
89
  async def websocket_handler(request):
90
  ws = web.WebSocketResponse()
91
  await ws.prepare(request)
92
  print("Cliente conectado")
93
 
94
+ # Posicao atual no idle loop (para continuidade)
95
+ idle_position = 0
96
+
97
  try:
98
  async for msg in ws:
99
  if msg.type == aiohttp.WSMsgType.TEXT:
 
103
  if action == "generate":
104
  text = data.get("text", "").strip()
105
  voice = data.get("voice", "tara")
106
+ idle_video_time_ms = data.get("idle_video_time_ms", 0)
107
 
108
  if not text:
109
  await ws.send_json({"type": "error", "message": "Text required"})
110
  continue
111
 
112
+ print(f"Gerando: {text[:50]}... (idle_time: {idle_video_time_ms}ms)")
113
  start_time = time.time()
114
 
115
  try:
 
119
  timeout=aiohttp.ClientWSTimeout(ws_close=120)
120
  )
121
 
 
122
  await wav2lip_ws.send_json({
123
  "action": "generate",
124
  "text": text,
125
+ "voice": voice,
126
+ "idle_video_time_ms": idle_video_time_ms
127
  })
128
 
129
+ # Coletar todos os frames
130
+ speaking_frames = []
131
+ audio_data = None
132
+ audio_duration = 0
133
+ end_video_time_ms = 0
134
 
135
  async for w2l_msg in wav2lip_ws:
136
  if w2l_msg.type == aiohttp.WSMsgType.TEXT:
 
140
  if msg_type == "status":
141
  await ws.send_json(w2l_data)
142
 
 
 
 
 
 
 
 
143
  elif msg_type == "frame":
144
+ frame_b64 = w2l_data.get("frame", "")
145
+ if frame_b64:
146
+ frame = jpeg_base64_to_frame(frame_b64)
147
+ speaking_frames.append(frame)
 
 
 
 
 
 
 
 
 
 
 
148
 
149
  elif msg_type == "full_audio":
150
+ audio_data = w2l_data.get("audio", "")
151
+ audio_duration = w2l_data.get("duration_ms", 0)
 
 
 
 
152
 
153
  elif msg_type == "done":
154
+ # Capturar end_video_time_ms para sincronizar idle
155
+ end_video_time_ms = w2l_data.get("end_video_time_ms", 0)
 
 
 
 
156
  break
157
 
158
  elif msg_type == "error":
 
164
 
165
  await wav2lip_ws.close()
166
 
167
+ # Enviar frames SEM crossfade - transicao e feita no cliente
168
+ if speaking_frames:
169
+ # Atualizar posicao do idle para continuidade apos fala
170
+ if idle_frames:
171
+ idle_position = (idle_position + len(speaking_frames)) % idle_frame_count
172
+
173
+ # Enviar stream_start
174
+ ttfb = int((time.time() - start_time) * 1000)
175
+ await ws.send_json({"type": "stream_start", "ttfb_ms": ttfb})
176
+
177
+ # Enviar apenas os frames de fala (sem crossfade)
178
+ for idx, frame in enumerate(speaking_frames):
179
+ frame_b64 = frame_to_jpeg_base64(frame)
180
+ await ws.send_json({
181
+ "type": "frame",
182
+ "frame": frame_b64,
183
+ "index": idx
184
+ })
185
+
186
+ # Enviar audio
187
+ if audio_data:
188
+ await ws.send_json({
189
+ "type": "audio",
190
+ "audio": audio_data,
191
+ "duration_ms": audio_duration
192
+ })
193
+
194
+ # Enviar done com end_video_time_ms para sincronizar idle
195
+ elapsed = int((time.time() - start_time) * 1000)
196
+ await ws.send_json({
197
+ "type": "done",
198
+ "frames": len(speaking_frames),
199
+ "elapsed_ms": elapsed,
200
+ "end_video_time_ms": end_video_time_ms
201
+ })
202
+
203
+ print(f"Enviados {len(speaking_frames)} frames de fala (sem crossfade)")
204
+
205
  except Exception as e:
206
  print(f"Erro: {e}")
207
+ import traceback
208
+ traceback.print_exc()
209
+ await ws.send_json({"type": "error", "message": str(e)})
210
+
211
+ elif action == "generate_complete":
212
+ # Proxy para generate_complete do Wav2Lip
213
+ text = data.get("text", "").strip()
214
+ voice = data.get("voice", "tara")
215
+ idle_before_frames = data.get("idle_before_frames", 0)
216
+ idle_after_frames = data.get("idle_after_frames", 0)
217
+ crossfade_frames = data.get("crossfade_frames", 0)
218
+ jpeg_quality = data.get("jpeg_quality", 95)
219
+
220
+ if not text:
221
+ await ws.send_json({"type": "error", "message": "Text required"})
222
+ continue
223
+
224
+ print(f"Generate Complete: {text[:50]}...")
225
+
226
+ try:
227
+ async with aiohttp.ClientSession() as session:
228
+ wav2lip_ws = await session.ws_connect(
229
+ WAV2LIP_WS,
230
+ timeout=aiohttp.ClientWSTimeout(ws_close=120)
231
+ )
232
+
233
+ await wav2lip_ws.send_json({
234
+ "action": "generate_complete",
235
+ "text": text,
236
+ "voice": voice,
237
+ "idle_before_frames": idle_before_frames,
238
+ "idle_after_frames": idle_after_frames,
239
+ "crossfade_frames": crossfade_frames,
240
+ "jpeg_quality": jpeg_quality
241
+ })
242
+
243
+ # Repassar todas as mensagens
244
+ async for w2l_msg in wav2lip_ws:
245
+ if w2l_msg.type == aiohttp.WSMsgType.TEXT:
246
+ await ws.send_str(w2l_msg.data)
247
+ w2l_data = json.loads(w2l_msg.data)
248
+ if w2l_data.get("type") in ("done", "error"):
249
+ break
250
+ elif w2l_msg.type in (aiohttp.WSMsgType.CLOSED, aiohttp.WSMsgType.ERROR):
251
+ break
252
+
253
+ await wav2lip_ws.close()
254
+
255
+ except Exception as e:
256
+ print(f"Erro generate_complete: {e}")
257
  await ws.send_json({"type": "error", "message": str(e)})
258
 
259
  elif action == "ping":
 
267
  return ws
268
 
269
 
 
 
 
 
 
270
  @routes.get("/")
271
  async def index(request):
272
+ return web.FileResponse(os.path.join(os.path.dirname(__file__), "index_streaming.html"))
 
 
273
 
274
 
275
  @routes.get("/{filename}")
 
286
 
287
  if __name__ == "__main__":
288
  print("=" * 50)
289
+ print("Streaming Server com Crossfade - Porta", PORT)
290
+ print("Wav2Lip:", WAV2LIP_WS)
291
+ print("Idle Video:", IDLE_VIDEO)
292
+ print("Crossfade: DESABILITADO (transicao no cliente)")
293
  print("=" * 50)
294
+
295
+ # Carregar idle frames
296
+ load_idle_frames()
297
+
298
  print("=" * 50)
299
  web.run_app(app, host="0.0.0.0", port=PORT)