Madras1 commited on
Commit
175993c
·
verified ·
1 Parent(s): 899afa1

Upload 4 files

Browse files
Files changed (1) hide show
  1. app.py +56 -8
app.py CHANGED
@@ -2,7 +2,7 @@ from fastapi import FastAPI, HTTPException
2
  from fastapi.middleware.cors import CORSMiddleware
3
  from pydantic import BaseModel
4
  import edge_tts
5
- import asyncio
6
  import base64
7
  import os
8
  from groq import Groq
@@ -21,25 +21,29 @@ app.add_middleware(
21
  # Groq client
22
  client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
23
 
24
- # Voz do Edge-TTS (português brasileiro feminino)
25
  VOICE = "pt-BR-FranciscaNeural"
 
26
 
27
  class ChatRequest(BaseModel):
28
  message: str
29
  history: list = []
 
 
30
 
31
  class ChatResponse(BaseModel):
32
  text: str
33
- audio_base64: str
 
34
 
35
  @app.get("/health")
36
  async def health():
37
- return {"status": "ok"}
38
 
39
  @app.post("/chat", response_model=ChatResponse)
40
  async def chat(request: ChatRequest):
41
  try:
42
- # Monta o histórico para o LLM
43
  messages = [
44
  {"role": "system", "content": "Você é Anima, uma assistente virtual amigável e prestativa. Responda de forma natural e concisa em português brasileiro."}
45
  ]
@@ -49,7 +53,6 @@ async def chat(request: ChatRequest):
49
 
50
  messages.append({"role": "user", "content": request.message})
51
 
52
- # Chama o Groq
53
  completion = client.chat.completions.create(
54
  model="llama-3.1-8b-instant",
55
  messages=messages,
@@ -59,7 +62,7 @@ async def chat(request: ChatRequest):
59
 
60
  response_text = completion.choices[0].message.content
61
 
62
- # Gera áudio com Edge-TTS
63
  communicate = edge_tts.Communicate(response_text, VOICE)
64
  audio_data = b""
65
 
@@ -69,11 +72,56 @@ async def chat(request: ChatRequest):
69
 
70
  audio_base64 = base64.b64encode(audio_data).decode("utf-8")
71
 
72
- return ChatResponse(text=response_text, audio_base64=audio_base64)
 
 
 
 
 
 
 
 
 
 
 
 
 
73
 
74
  except Exception as e:
75
  raise HTTPException(status_code=500, detail=str(e))
76
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
77
  if __name__ == "__main__":
78
  import uvicorn
79
  uvicorn.run(app, host="0.0.0.0", port=7860)
 
2
  from fastapi.middleware.cors import CORSMiddleware
3
  from pydantic import BaseModel
4
  import edge_tts
5
+ import httpx
6
  import base64
7
  import os
8
  from groq import Groq
 
21
  # Groq client
22
  client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
23
 
24
+ # Configurações
25
  VOICE = "pt-BR-FranciscaNeural"
26
+ SADTALKER_API = os.environ.get("SADTALKER_API", "https://madras1-sadtalker-api.hf.space")
27
 
28
  class ChatRequest(BaseModel):
29
  message: str
30
  history: list = []
31
+ avatar_image: str = None # Base64 da imagem do avatar
32
+ use_video: bool = False # Se True, gera vídeo com SadTalker
33
 
34
  class ChatResponse(BaseModel):
35
  text: str
36
+ audio_base64: str = None
37
+ video_base64: str = None # Novo: vídeo do SadTalker
38
 
39
  @app.get("/health")
40
  async def health():
41
+ return {"status": "ok", "sadtalker_api": SADTALKER_API}
42
 
43
  @app.post("/chat", response_model=ChatResponse)
44
  async def chat(request: ChatRequest):
45
  try:
46
+ # 1. Chama o LLM (Groq)
47
  messages = [
48
  {"role": "system", "content": "Você é Anima, uma assistente virtual amigável e prestativa. Responda de forma natural e concisa em português brasileiro."}
49
  ]
 
53
 
54
  messages.append({"role": "user", "content": request.message})
55
 
 
56
  completion = client.chat.completions.create(
57
  model="llama-3.1-8b-instant",
58
  messages=messages,
 
62
 
63
  response_text = completion.choices[0].message.content
64
 
65
+ # 2. Gera áudio com Edge-TTS
66
  communicate = edge_tts.Communicate(response_text, VOICE)
67
  audio_data = b""
68
 
 
72
 
73
  audio_base64 = base64.b64encode(audio_data).decode("utf-8")
74
 
75
+ # 3. Se pediu vídeo e tem imagem, chama SadTalker
76
+ video_base64 = None
77
+ if request.use_video and request.avatar_image:
78
+ try:
79
+ video_base64 = await generate_video(request.avatar_image, audio_base64)
80
+ except Exception as e:
81
+ print(f"SadTalker error: {e}")
82
+ # Continua sem vídeo se falhar
83
+
84
+ return ChatResponse(
85
+ text=response_text,
86
+ audio_base64=audio_base64,
87
+ video_base64=video_base64
88
+ )
89
 
90
  except Exception as e:
91
  raise HTTPException(status_code=500, detail=str(e))
92
 
93
+ async def generate_video(image_base64: str, audio_base64: str) -> str:
94
+ """Chama SadTalker API para gerar vídeo"""
95
+ async with httpx.AsyncClient(timeout=180.0) as client:
96
+ # Gradio API endpoint
97
+ response = await client.post(
98
+ f"{SADTALKER_API}/api/predict",
99
+ json={
100
+ "data": [
101
+ f"data:image/png;base64,{image_base64}",
102
+ f"data:audio/mp3;base64,{audio_base64}"
103
+ ]
104
+ }
105
+ )
106
+
107
+ if response.status_code != 200:
108
+ raise Exception(f"SadTalker API error: {response.status_code}")
109
+
110
+ result = response.json()
111
+
112
+ # O resultado pode ser um path ou base64
113
+ video_data = result.get("data", [None])[0]
114
+
115
+ if video_data and video_data.startswith("data:"):
116
+ # É base64
117
+ return video_data.split(",")[1]
118
+ elif video_data:
119
+ # É um path, precisa baixar
120
+ video_response = await client.get(f"{SADTALKER_API}/file={video_data}")
121
+ return base64.b64encode(video_response.content).decode("utf-8")
122
+
123
+ raise Exception("No video in response")
124
+
125
  if __name__ == "__main__":
126
  import uvicorn
127
  uvicorn.run(app, host="0.0.0.0", port=7860)