marcosremar2
/

speech2speech-interface

Model card Files Files and versions

speech2speech-interface / interface /test_client.py

marcosremar2's picture

Add real-time streaming avatar interface

14b6b3e 6 months ago

History Blame Contribute Delete

3.36 kB

	"""
	Cliente de teste para gerar vídeo via WebSocket e salvar localmente.
	"""
	import asyncio
	import aiohttp
	import json
	import base64
	import sys

	async def generate_video(text: str, voice: str = "tara", output_file: str = "output.webm"):
	"""Conecta ao servidor, gera vídeo e salva."""

	ws_url = "ws://localhost:8080/ws"

	print(f"Conectando a {ws_url}...")

	timeout = aiohttp.ClientTimeout(total=300) # 5 minutos
	async with aiohttp.ClientSession(timeout=timeout) as session:
	async with session.ws_connect(ws_url, heartbeat=30, receive_timeout=300, max_msg_size=5010241024) as ws:
	print("Conectado!")

	# Enviar requisição de geração
	request = {
	"action": "generate",
	"text": text,
	"voice": voice
	}
	print(f"Enviando: {text[:50]}...")
	await ws.send_json(request)

	webm_data = None

	# Receber mensagens
	async for msg in ws:
	if msg.type == aiohttp.WSMsgType.TEXT:
	data = json.loads(msg.data)
	msg_type = data.get("type", "")

	if msg_type == "status":
	print(f" Status: {data.get('message')}")

	elif msg_type == "first_frame":
	print(f" Primeiro frame: {data.get('latency_ms')}ms")

	elif msg_type == "webm_chunk":
	print(f" Recebendo WebM chunk...")
	raw_data = data.get("data", "")
	print(f" Base64 length: {len(raw_data)}")
	chunk_data = base64.b64decode(raw_data)
	size = data.get("size", 0)
	print(f" WebM chunk recebido: {len(chunk_data)} bytes (esperado: {size})")
	webm_data = chunk_data

	elif msg_type == "done":
	print(f" Concluído!")
	print(f" Frames: {data.get('total_frames')}")
	print(f" Duração: {data.get('total_duration_ms')}ms")
	print(f" Tempo total: {data.get('elapsed_ms')}ms")
	break

	elif msg_type == "error":
	print(f" ERRO: {data.get('message')}")
	break

	elif msg.type in (aiohttp.WSMsgType.CLOSED, aiohttp.WSMsgType.ERROR):
	print("Conexão fechada")
	break

	# Salvar arquivo
	if webm_data:
	with open(output_file, 'wb') as f:
	f.write(webm_data)
	print(f"\nVídeo salvo: {output_file} ({len(webm_data)} bytes)")
	return output_file
	else:
	print("\nNenhum vídeo recebido!")
	return None


	if __name__ == "__main__":
	text = "Hello! I am a real-time streaming avatar powered by AI."
	if len(sys.argv) > 1:
	text = sys.argv[1]

	voice = "tara"
	if len(sys.argv) > 2:
	voice = sys.argv[2]

	output = "output.webm"
	if len(sys.argv) > 3:
	output = sys.argv[3]

	result = asyncio.run(generate_video(text, voice, output))
	if result:
	print(f"\nPara reproduzir: ffplay {result}")