File size: 4,694 Bytes
8235c8e 0de89f9 8235c8e 175993c 8235c8e 175993c 8235c8e 175993c 8235c8e 0de89f9 175993c 8235c8e 0de89f9 8235c8e 175993c 8235c8e 175993c 8235c8e 175993c 8235c8e 175993c 8235c8e 0de89f9 8235c8e 175993c 0de89f9 175993c 0de89f9 175993c 0de89f9 175993c 0de89f9 175993c 8235c8e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 |
from fastapi import FastAPI, HTTPException
from fastapi.middleware.cors import CORSMiddleware
from pydantic import BaseModel
from typing import Optional
import edge_tts
import httpx
import base64
import os
from groq import Groq
app = FastAPI(title="Anima - AI Avatar Chat")
# CORS para permitir frontend
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
# Groq client
client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
# Configurações
VOICE = "pt-BR-FranciscaNeural"
SADTALKER_API = os.environ.get("SADTALKER_API", "https://madras1-sadtalker-api.hf.space")
class ChatRequest(BaseModel):
message: str
history: list = []
avatar_image: Optional[str] = None # Base64 da imagem do avatar
use_video: bool = False # Se True, gera vídeo com SadTalker
class ChatResponse(BaseModel):
text: str
audio_base64: Optional[str] = None
video_base64: Optional[str] = None
@app.get("/health")
async def health():
return {"status": "ok", "sadtalker_api": SADTALKER_API}
@app.post("/chat", response_model=ChatResponse)
async def chat(request: ChatRequest):
try:
# 1. Chama o LLM (Groq)
messages = [
{"role": "system", "content": "Você é Anima, uma assistente virtual amigável e prestativa. Responda de forma natural e concisa em português brasileiro."}
]
for msg in request.history:
messages.append(msg)
messages.append({"role": "user", "content": request.message})
completion = client.chat.completions.create(
model="llama-3.1-8b-instant",
messages=messages,
temperature=0.7,
max_tokens=500,
)
response_text = completion.choices[0].message.content
# 2. Gera áudio com Edge-TTS
communicate = edge_tts.Communicate(response_text, VOICE)
audio_data = b""
async for chunk in communicate.stream():
if chunk["type"] == "audio":
audio_data += chunk["data"]
audio_base64 = base64.b64encode(audio_data).decode("utf-8")
# 3. Se pediu vídeo e tem imagem, chama SadTalker
video_base64 = None
if request.use_video and request.avatar_image:
try:
video_base64 = await generate_video(request.avatar_image, audio_base64)
except Exception as e:
print(f"SadTalker error: {e}")
# Continua sem vídeo se falhar
return ChatResponse(
text=response_text,
audio_base64=audio_base64,
video_base64=video_base64
)
except Exception as e:
print(f"Chat error: {e}")
raise HTTPException(status_code=500, detail=str(e))
async def generate_video(image_base64: str, audio_base64: str) -> str:
"""Chama SadTalker API para gerar vídeo"""
async with httpx.AsyncClient(timeout=180.0) as http_client:
# Gradio API endpoint - usa o endpoint correto
response = await http_client.post(
f"{SADTALKER_API}/run/predict",
json={
"data": [
{"path": None, "data": f"data:image/png;base64,{image_base64}"},
{"path": None, "data": f"data:audio/mp3;base64,{audio_base64}"}
]
}
)
if response.status_code != 200:
raise Exception(f"SadTalker API error: {response.status_code} - {response.text}")
result = response.json()
# O resultado pode ser um path ou base64
video_data = result.get("data", [None])[0]
if video_data:
if isinstance(video_data, dict) and "data" in video_data:
# É base64 no formato data URL
return video_data["data"].split(",")[1] if "," in video_data["data"] else video_data["data"]
elif isinstance(video_data, str) and video_data.startswith("data:"):
return video_data.split(",")[1]
elif isinstance(video_data, str):
# É um path, precisa baixar
video_response = await http_client.get(f"{SADTALKER_API}/file={video_data}")
return base64.b64encode(video_response.content).decode("utf-8")
raise Exception("No video in response")
if __name__ == "__main__":
import uvicorn
uvicorn.run(app, host="0.0.0.0", port=7860)
|