from fastapi import FastAPI, HTTPException from fastapi.middleware.cors import CORSMiddleware from pydantic import BaseModel from typing import Optional import edge_tts import httpx import base64 import os from groq import Groq app = FastAPI(title="Anima - AI Avatar Chat") # CORS para permitir frontend app.add_middleware( CORSMiddleware, allow_origins=["*"], allow_credentials=True, allow_methods=["*"], allow_headers=["*"], ) # Groq client client = Groq(api_key=os.environ.get("GROQ_API_KEY")) # Configurações VOICE = "pt-BR-FranciscaNeural" SADTALKER_API = os.environ.get("SADTALKER_API", "https://madras1-sadtalker-api.hf.space") class ChatRequest(BaseModel): message: str history: list = [] avatar_image: Optional[str] = None # Base64 da imagem do avatar use_video: bool = False # Se True, gera vídeo com SadTalker class ChatResponse(BaseModel): text: str audio_base64: Optional[str] = None video_base64: Optional[str] = None @app.get("/health") async def health(): return {"status": "ok", "sadtalker_api": SADTALKER_API} @app.post("/chat", response_model=ChatResponse) async def chat(request: ChatRequest): try: # 1. Chama o LLM (Groq) messages = [ {"role": "system", "content": "Você é Anima, uma assistente virtual amigável e prestativa. Responda de forma natural e concisa em português brasileiro."} ] for msg in request.history: messages.append(msg) messages.append({"role": "user", "content": request.message}) completion = client.chat.completions.create( model="llama-3.1-8b-instant", messages=messages, temperature=0.7, max_tokens=500, ) response_text = completion.choices[0].message.content # 2. Gera áudio com Edge-TTS communicate = edge_tts.Communicate(response_text, VOICE) audio_data = b"" async for chunk in communicate.stream(): if chunk["type"] == "audio": audio_data += chunk["data"] audio_base64 = base64.b64encode(audio_data).decode("utf-8") # 3. Se pediu vídeo e tem imagem, chama SadTalker video_base64 = None if request.use_video and request.avatar_image: try: video_base64 = await generate_video(request.avatar_image, audio_base64) except Exception as e: print(f"SadTalker error: {e}") # Continua sem vídeo se falhar return ChatResponse( text=response_text, audio_base64=audio_base64, video_base64=video_base64 ) except Exception as e: print(f"Chat error: {e}") raise HTTPException(status_code=500, detail=str(e)) async def generate_video(image_base64: str, audio_base64: str) -> str: """Chama SadTalker API para gerar vídeo""" async with httpx.AsyncClient(timeout=180.0) as http_client: # Gradio API endpoint - usa o endpoint correto response = await http_client.post( f"{SADTALKER_API}/run/predict", json={ "data": [ {"path": None, "data": f"data:image/png;base64,{image_base64}"}, {"path": None, "data": f"data:audio/mp3;base64,{audio_base64}"} ] } ) if response.status_code != 200: raise Exception(f"SadTalker API error: {response.status_code} - {response.text}") result = response.json() # O resultado pode ser um path ou base64 video_data = result.get("data", [None])[0] if video_data: if isinstance(video_data, dict) and "data" in video_data: # É base64 no formato data URL return video_data["data"].split(",")[1] if "," in video_data["data"] else video_data["data"] elif isinstance(video_data, str) and video_data.startswith("data:"): return video_data.split(",")[1] elif isinstance(video_data, str): # É um path, precisa baixar video_response = await http_client.get(f"{SADTALKER_API}/file={video_data}") return base64.b64encode(video_response.content).decode("utf-8") raise Exception("No video in response") if __name__ == "__main__": import uvicorn uvicorn.run(app, host="0.0.0.0", port=7860)