Madras1 commited on
Commit
19ea065
·
verified ·
1 Parent(s): 6ab3259

Upload 3 files

Browse files
Files changed (3) hide show
  1. README.md +83 -11
  2. app.py +236 -0
  3. requirements.txt +5 -0
README.md CHANGED
@@ -1,11 +1,83 @@
1
- ---
2
- title: OpenAda
3
- emoji: 🔥
4
- colorFrom: indigo
5
- colorTo: pink
6
- sdk: docker
7
- pinned: false
8
- license: mit
9
- ---
10
-
11
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # 3D Avatar Chat - Backend
2
+
3
+ Backend para o sistema de chat com avatar 3D falante.
4
+
5
+ ## Tech Stack
6
+ - **FastAPI** - API REST
7
+ - **OpenRouter** - LLM para respostas
8
+ - **Edge TTS** - Síntese de voz
9
+
10
+ ## Executar Localmente
11
+
12
+ ```bash
13
+ # Instalar dependências
14
+ pip install -r requirements.txt
15
+
16
+ # Configurar variável de ambiente
17
+ export OPENROUTER_API_KEY="sua-chave-aqui"
18
+
19
+ # Rodar servidor
20
+ python app.py
21
+ # ou
22
+ uvicorn app:app --reload --port 7860
23
+ ```
24
+
25
+ ## Deploy no HuggingFace Spaces
26
+
27
+ 1. Criar novo Space (SDK: Docker ou Gradio)
28
+ 2. Adicionar secret `OPENROUTER_API_KEY` nas configurações
29
+ 3. Push do código
30
+
31
+ ### Dockerfile (se usar Docker SDK)
32
+
33
+ ```dockerfile
34
+ FROM python:3.11-slim
35
+
36
+ WORKDIR /app
37
+ COPY requirements.txt .
38
+ RUN pip install --no-cache-dir -r requirements.txt
39
+
40
+ COPY app.py .
41
+
42
+ EXPOSE 7860
43
+ CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
44
+ ```
45
+
46
+ ## Endpoints
47
+
48
+ ### GET /
49
+ Health check básico.
50
+
51
+ ### GET /health
52
+ Status de saúde da API.
53
+
54
+ ### POST /chat
55
+ Processa mensagem e retorna resposta com áudio.
56
+
57
+ **Request:**
58
+ ```json
59
+ {
60
+ "message": "Olá, como você está?",
61
+ "history": []
62
+ }
63
+ ```
64
+
65
+ **Response:**
66
+ ```json
67
+ {
68
+ "text": "Olá! Estou muito bem, obrigada por perguntar!",
69
+ "audio_base64": "//uQxAAAAAANIAAAAAExBTUUzLjEwMFVV...",
70
+ "visemes": [
71
+ {"time": 0.0, "viseme": "O", "duration": 0.065},
72
+ {"time": 0.065, "viseme": "L", "duration": 0.065},
73
+ ...
74
+ ],
75
+ "duration": 3.2
76
+ }
77
+ ```
78
+
79
+ ## Configuração
80
+
81
+ Variáveis de ambiente:
82
+ - `OPENROUTER_API_KEY` - Chave da API OpenRouter (obrigatório)
83
+ - `OPENROUTER_MODEL` - Modelo a usar (default: `google/gemini-2.0-flash-001`)
app.py ADDED
@@ -0,0 +1,236 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ 3D Avatar Chat Backend
3
+ FastAPI + OpenRouter + Edge TTS
4
+ Deploy: HuggingFace Spaces
5
+ """
6
+
7
+ import os
8
+ import asyncio
9
+ import base64
10
+ import httpx
11
+ import re
12
+ from io import BytesIO
13
+ from fastapi import FastAPI, HTTPException
14
+ from fastapi.middleware.cors import CORSMiddleware
15
+ from pydantic import BaseModel
16
+ import edge_tts
17
+
18
+ # ============================================================================
19
+ # Configuration
20
+ # ============================================================================
21
+
22
+ OPENROUTER_API_KEY = os.getenv("OPENROUTER_API_KEY", "")
23
+ OPENROUTER_MODEL = os.getenv("OPENROUTER_MODEL", "google/gemini-2.0-flash-001")
24
+ TTS_VOICE = "pt-BR-FranciscaNeural"
25
+ TTS_RATE = "+0%"
26
+
27
+ # System prompt for the chatbot
28
+ SYSTEM_PROMPT = """Você é uma assistente virtual amigável e prestativa.
29
+ Responda de forma natural, concisa e conversacional.
30
+ Mantenha respostas curtas (máximo 2-3 frases) para manter a conversa fluida.
31
+ """
32
+
33
+ # ============================================================================
34
+ # Phoneme to Viseme Mapping
35
+ # ============================================================================
36
+
37
+ # Simplified phoneme detection for Portuguese
38
+ VISEME_MAP = {
39
+ # Vowels
40
+ 'a': 'A', 'á': 'A', 'à': 'A', 'ã': 'A', 'â': 'A',
41
+ 'e': 'E', 'é': 'E', 'ê': 'E',
42
+ 'i': 'I', 'í': 'I',
43
+ 'o': 'O', 'ó': 'O', 'ô': 'O', 'õ': 'O',
44
+ 'u': 'U', 'ú': 'U',
45
+ # Consonants
46
+ 'm': 'M', 'b': 'M', 'p': 'M',
47
+ 'f': 'F', 'v': 'F',
48
+ 'l': 'L', 'n': 'L', 't': 'L', 'd': 'L',
49
+ 's': 'S', 'z': 'S', 'c': 'S', 'ç': 'S',
50
+ 'r': 'R', 'x': 'S', 'j': 'S', 'g': 'L', 'q': 'L', 'k': 'L',
51
+ 'h': 'X', # Silent
52
+ ' ': 'X', # Silence
53
+ }
54
+
55
+ # Average duration per character in seconds (approximate for pt-BR)
56
+ CHAR_DURATION = 0.065
57
+
58
+
59
+ def text_to_visemes(text: str) -> list[dict]:
60
+ """
61
+ Convert text to a timeline of visemes.
62
+ Returns list of {time: float, viseme: str, duration: float}
63
+ """
64
+ visemes = []
65
+ current_time = 0.0
66
+ text_lower = text.lower()
67
+
68
+ i = 0
69
+ while i < len(text_lower):
70
+ char = text_lower[i]
71
+
72
+ # Skip punctuation (add pause)
73
+ if char in '.,!?;:':
74
+ visemes.append({
75
+ 'time': current_time,
76
+ 'viseme': 'X',
77
+ 'duration': 0.15
78
+ })
79
+ current_time += 0.15
80
+ i += 1
81
+ continue
82
+
83
+ # Get viseme for character
84
+ viseme = VISEME_MAP.get(char, 'X')
85
+
86
+ # Merge consecutive same visemes
87
+ if visemes and visemes[-1]['viseme'] == viseme:
88
+ visemes[-1]['duration'] += CHAR_DURATION
89
+ else:
90
+ visemes.append({
91
+ 'time': current_time,
92
+ 'viseme': viseme,
93
+ 'duration': CHAR_DURATION
94
+ })
95
+
96
+ current_time += CHAR_DURATION
97
+ i += 1
98
+
99
+ # Add final silence
100
+ visemes.append({
101
+ 'time': current_time,
102
+ 'viseme': 'X',
103
+ 'duration': 0.2
104
+ })
105
+
106
+ return visemes
107
+
108
+
109
+ # ============================================================================
110
+ # FastAPI App
111
+ # ============================================================================
112
+
113
+ app = FastAPI(title="3D Avatar Chat API")
114
+
115
+ # CORS for frontend
116
+ app.add_middleware(
117
+ CORSMiddleware,
118
+ allow_origins=["*"],
119
+ allow_credentials=True,
120
+ allow_methods=["*"],
121
+ allow_headers=["*"],
122
+ )
123
+
124
+
125
+ class ChatRequest(BaseModel):
126
+ message: str
127
+ history: list[dict] = []
128
+
129
+
130
+ class ChatResponse(BaseModel):
131
+ text: str
132
+ audio_base64: str
133
+ visemes: list[dict]
134
+ duration: float
135
+
136
+
137
+ @app.get("/")
138
+ async def root():
139
+ return {"status": "ok", "message": "3D Avatar Chat API"}
140
+
141
+
142
+ @app.get("/health")
143
+ async def health():
144
+ return {"status": "healthy"}
145
+
146
+
147
+ @app.post("/chat", response_model=ChatResponse)
148
+ async def chat(request: ChatRequest):
149
+ """
150
+ Process chat message:
151
+ 1. Send to OpenRouter for response
152
+ 2. Generate audio with Edge TTS
153
+ 3. Analyze phonemes for lip sync
154
+ 4. Return everything
155
+ """
156
+
157
+ if not OPENROUTER_API_KEY:
158
+ raise HTTPException(status_code=500, detail="OPENROUTER_API_KEY not configured")
159
+
160
+ # Build messages for OpenRouter
161
+ messages = [{"role": "system", "content": SYSTEM_PROMPT}]
162
+
163
+ # Add history (last 10 messages)
164
+ for msg in request.history[-10:]:
165
+ messages.append({
166
+ "role": msg.get("role", "user"),
167
+ "content": msg.get("content", "")
168
+ })
169
+
170
+ # Add current message
171
+ messages.append({"role": "user", "content": request.message})
172
+
173
+ # Call OpenRouter
174
+ try:
175
+ async with httpx.AsyncClient(timeout=30.0) as client:
176
+ response = await client.post(
177
+ "https://openrouter.ai/api/v1/chat/completions",
178
+ headers={
179
+ "Authorization": f"Bearer {OPENROUTER_API_KEY}",
180
+ "Content-Type": "application/json",
181
+ },
182
+ json={
183
+ "model": OPENROUTER_MODEL,
184
+ "messages": messages,
185
+ "max_tokens": 150,
186
+ "temperature": 0.7,
187
+ }
188
+ )
189
+ response.raise_for_status()
190
+ data = response.json()
191
+
192
+ bot_text = data["choices"][0]["message"]["content"].strip()
193
+
194
+ except httpx.HTTPError as e:
195
+ raise HTTPException(status_code=500, detail=f"OpenRouter error: {str(e)}")
196
+
197
+ # Clean text for TTS (remove markdown, emojis, etc.)
198
+ clean_text = re.sub(r'[*_`~]', '', bot_text)
199
+ clean_text = re.sub(r'\[.*?\]\(.*?\)', '', clean_text)
200
+
201
+ # Generate audio with Edge TTS
202
+ try:
203
+ communicate = edge_tts.Communicate(clean_text, TTS_VOICE, rate=TTS_RATE)
204
+ audio_buffer = BytesIO()
205
+
206
+ async for chunk in communicate.stream():
207
+ if chunk["type"] == "audio":
208
+ audio_buffer.write(chunk["data"])
209
+
210
+ audio_buffer.seek(0)
211
+ audio_base64 = base64.b64encode(audio_buffer.read()).decode('utf-8')
212
+
213
+ except Exception as e:
214
+ raise HTTPException(status_code=500, detail=f"TTS error: {str(e)}")
215
+
216
+ # Generate viseme timeline
217
+ visemes = text_to_visemes(clean_text)
218
+
219
+ # Calculate total duration
220
+ duration = sum(v['duration'] for v in visemes)
221
+
222
+ return ChatResponse(
223
+ text=bot_text,
224
+ audio_base64=audio_base64,
225
+ visemes=visemes,
226
+ duration=duration
227
+ )
228
+
229
+
230
+ # ============================================================================
231
+ # Run locally
232
+ # ============================================================================
233
+
234
+ if __name__ == "__main__":
235
+ import uvicorn
236
+ uvicorn.run(app, host="0.0.0.0", port=7860)
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ fastapi==0.115.6
2
+ uvicorn[standard]==0.34.0
3
+ edge-tts==6.1.12
4
+ httpx==0.28.1
5
+ python-multipart==0.0.20