Anima / app.py
Madras1's picture
Upload 4 files
0de89f9 verified
from fastapi import FastAPI, HTTPException
from fastapi.middleware.cors import CORSMiddleware
from pydantic import BaseModel
from typing import Optional
import edge_tts
import httpx
import base64
import os
from groq import Groq
app = FastAPI(title="Anima - AI Avatar Chat")
# CORS para permitir frontend
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
# Groq client
client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
# Configurações
VOICE = "pt-BR-FranciscaNeural"
SADTALKER_API = os.environ.get("SADTALKER_API", "https://madras1-sadtalker-api.hf.space")
class ChatRequest(BaseModel):
message: str
history: list = []
avatar_image: Optional[str] = None # Base64 da imagem do avatar
use_video: bool = False # Se True, gera vídeo com SadTalker
class ChatResponse(BaseModel):
text: str
audio_base64: Optional[str] = None
video_base64: Optional[str] = None
@app.get("/health")
async def health():
return {"status": "ok", "sadtalker_api": SADTALKER_API}
@app.post("/chat", response_model=ChatResponse)
async def chat(request: ChatRequest):
try:
# 1. Chama o LLM (Groq)
messages = [
{"role": "system", "content": "Você é Anima, uma assistente virtual amigável e prestativa. Responda de forma natural e concisa em português brasileiro."}
]
for msg in request.history:
messages.append(msg)
messages.append({"role": "user", "content": request.message})
completion = client.chat.completions.create(
model="llama-3.1-8b-instant",
messages=messages,
temperature=0.7,
max_tokens=500,
)
response_text = completion.choices[0].message.content
# 2. Gera áudio com Edge-TTS
communicate = edge_tts.Communicate(response_text, VOICE)
audio_data = b""
async for chunk in communicate.stream():
if chunk["type"] == "audio":
audio_data += chunk["data"]
audio_base64 = base64.b64encode(audio_data).decode("utf-8")
# 3. Se pediu vídeo e tem imagem, chama SadTalker
video_base64 = None
if request.use_video and request.avatar_image:
try:
video_base64 = await generate_video(request.avatar_image, audio_base64)
except Exception as e:
print(f"SadTalker error: {e}")
# Continua sem vídeo se falhar
return ChatResponse(
text=response_text,
audio_base64=audio_base64,
video_base64=video_base64
)
except Exception as e:
print(f"Chat error: {e}")
raise HTTPException(status_code=500, detail=str(e))
async def generate_video(image_base64: str, audio_base64: str) -> str:
"""Chama SadTalker API para gerar vídeo"""
async with httpx.AsyncClient(timeout=180.0) as http_client:
# Gradio API endpoint - usa o endpoint correto
response = await http_client.post(
f"{SADTALKER_API}/run/predict",
json={
"data": [
{"path": None, "data": f"data:image/png;base64,{image_base64}"},
{"path": None, "data": f"data:audio/mp3;base64,{audio_base64}"}
]
}
)
if response.status_code != 200:
raise Exception(f"SadTalker API error: {response.status_code} - {response.text}")
result = response.json()
# O resultado pode ser um path ou base64
video_data = result.get("data", [None])[0]
if video_data:
if isinstance(video_data, dict) and "data" in video_data:
# É base64 no formato data URL
return video_data["data"].split(",")[1] if "," in video_data["data"] else video_data["data"]
elif isinstance(video_data, str) and video_data.startswith("data:"):
return video_data.split(",")[1]
elif isinstance(video_data, str):
# É um path, precisa baixar
video_response = await http_client.get(f"{SADTALKER_API}/file={video_data}")
return base64.b64encode(video_response.content).decode("utf-8")
raise Exception("No video in response")
if __name__ == "__main__":
import uvicorn
uvicorn.run(app, host="0.0.0.0", port=7860)