File size: 4,694 Bytes
8235c8e
 
 
0de89f9
8235c8e
175993c
8235c8e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
175993c
8235c8e
175993c
8235c8e
 
 
 
0de89f9
175993c
8235c8e
 
 
0de89f9
 
8235c8e
 
 
175993c
8235c8e
 
 
 
175993c
8235c8e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
175993c
8235c8e
 
 
 
 
 
 
 
 
175993c
 
 
 
 
 
 
 
 
 
 
 
 
 
8235c8e
 
0de89f9
8235c8e
 
175993c
 
0de89f9
 
 
 
175993c
 
0de89f9
 
175993c
 
 
 
 
0de89f9
175993c
 
 
 
 
 
0de89f9
 
 
 
 
 
 
 
 
 
175993c
 
 
8235c8e
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
from fastapi import FastAPI, HTTPException
from fastapi.middleware.cors import CORSMiddleware
from pydantic import BaseModel
from typing import Optional
import edge_tts
import httpx
import base64
import os
from groq import Groq

app = FastAPI(title="Anima - AI Avatar Chat")

# CORS para permitir frontend
app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],
    allow_credentials=True,
    allow_methods=["*"],
    allow_headers=["*"],
)

# Groq client
client = Groq(api_key=os.environ.get("GROQ_API_KEY"))

# Configurações
VOICE = "pt-BR-FranciscaNeural"
SADTALKER_API = os.environ.get("SADTALKER_API", "https://madras1-sadtalker-api.hf.space")

class ChatRequest(BaseModel):
    message: str
    history: list = []
    avatar_image: Optional[str] = None  # Base64 da imagem do avatar
    use_video: bool = False   # Se True, gera vídeo com SadTalker

class ChatResponse(BaseModel):
    text: str
    audio_base64: Optional[str] = None
    video_base64: Optional[str] = None

@app.get("/health")
async def health():
    return {"status": "ok", "sadtalker_api": SADTALKER_API}

@app.post("/chat", response_model=ChatResponse)
async def chat(request: ChatRequest):
    try:
        # 1. Chama o LLM (Groq)
        messages = [
            {"role": "system", "content": "Você é Anima, uma assistente virtual amigável e prestativa. Responda de forma natural e concisa em português brasileiro."}
        ]
        
        for msg in request.history:
            messages.append(msg)
        
        messages.append({"role": "user", "content": request.message})
        
        completion = client.chat.completions.create(
            model="llama-3.1-8b-instant",
            messages=messages,
            temperature=0.7,
            max_tokens=500,
        )
        
        response_text = completion.choices[0].message.content
        
        # 2. Gera áudio com Edge-TTS
        communicate = edge_tts.Communicate(response_text, VOICE)
        audio_data = b""
        
        async for chunk in communicate.stream():
            if chunk["type"] == "audio":
                audio_data += chunk["data"]
        
        audio_base64 = base64.b64encode(audio_data).decode("utf-8")
        
        # 3. Se pediu vídeo e tem imagem, chama SadTalker
        video_base64 = None
        if request.use_video and request.avatar_image:
            try:
                video_base64 = await generate_video(request.avatar_image, audio_base64)
            except Exception as e:
                print(f"SadTalker error: {e}")
                # Continua sem vídeo se falhar
        
        return ChatResponse(
            text=response_text,
            audio_base64=audio_base64,
            video_base64=video_base64
        )
        
    except Exception as e:
        print(f"Chat error: {e}")
        raise HTTPException(status_code=500, detail=str(e))

async def generate_video(image_base64: str, audio_base64: str) -> str:
    """Chama SadTalker API para gerar vídeo"""
    async with httpx.AsyncClient(timeout=180.0) as http_client:
        # Gradio API endpoint - usa o endpoint correto
        response = await http_client.post(
            f"{SADTALKER_API}/run/predict",
            json={
                "data": [
                    {"path": None, "data": f"data:image/png;base64,{image_base64}"},
                    {"path": None, "data": f"data:audio/mp3;base64,{audio_base64}"}
                ]
            }
        )
        
        if response.status_code != 200:
            raise Exception(f"SadTalker API error: {response.status_code} - {response.text}")
        
        result = response.json()
        
        # O resultado pode ser um path ou base64
        video_data = result.get("data", [None])[0]
        
        if video_data:
            if isinstance(video_data, dict) and "data" in video_data:
                # É base64 no formato data URL
                return video_data["data"].split(",")[1] if "," in video_data["data"] else video_data["data"]
            elif isinstance(video_data, str) and video_data.startswith("data:"):
                return video_data.split(",")[1]
            elif isinstance(video_data, str):
                # É um path, precisa baixar
                video_response = await http_client.get(f"{SADTALKER_API}/file={video_data}")
                return base64.b64encode(video_response.content).decode("utf-8")
        
        raise Exception("No video in response")

if __name__ == "__main__":
    import uvicorn
    uvicorn.run(app, host="0.0.0.0", port=7860)