Madras1 commited on
Commit
0f0b357
·
verified ·
1 Parent(s): 75421ec

Upload 5 files

Browse files
Files changed (4) hide show
  1. Dockerfile +8 -3
  2. app.py +349 -266
  3. memory.py +128 -0
  4. requirements.txt +2 -0
Dockerfile CHANGED
@@ -3,25 +3,30 @@ FROM python:3.11-slim
3
  # Set working directory
4
  WORKDIR /app
5
 
6
- # Install system dependencies for edge-tts
7
  RUN apt-get update && apt-get install -y \
8
  --no-install-recommends \
 
9
  && rm -rf /var/lib/apt/lists/*
10
 
11
  # Copy requirements first for better caching
12
  COPY requirements.txt .
13
 
14
- # Install Python dependencies
15
  RUN pip install --no-cache-dir -r requirements.txt
16
 
 
 
 
17
  # Copy application code
18
  COPY app.py .
 
19
 
20
  # Expose port (HuggingFace uses 7860)
21
  EXPOSE 7860
22
 
23
  # Health check
24
- HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \
25
  CMD python -c "import httpx; httpx.get('http://localhost:7860/health')" || exit 1
26
 
27
  # Run the application
 
3
  # Set working directory
4
  WORKDIR /app
5
 
6
+ # Install system dependencies
7
  RUN apt-get update && apt-get install -y \
8
  --no-install-recommends \
9
+ build-essential \
10
  && rm -rf /var/lib/apt/lists/*
11
 
12
  # Copy requirements first for better caching
13
  COPY requirements.txt .
14
 
15
+ # Install Python dependencies (pode demorar devido ao sentence-transformers)
16
  RUN pip install --no-cache-dir -r requirements.txt
17
 
18
+ # Pre-download the embedding model during build
19
+ RUN python -c "from sentence_transformers import SentenceTransformer; SentenceTransformer('all-MiniLM-L6-v2')"
20
+
21
  # Copy application code
22
  COPY app.py .
23
+ COPY memory.py .
24
 
25
  # Expose port (HuggingFace uses 7860)
26
  EXPOSE 7860
27
 
28
  # Health check
29
+ HEALTHCHECK --interval=30s --timeout=10s --start-period=60s --retries=3 \
30
  CMD python -c "import httpx; httpx.get('http://localhost:7860/health')" || exit 1
31
 
32
  # Run the application
app.py CHANGED
@@ -1,266 +1,349 @@
1
- """
2
- 3D Avatar Chat Backend (v2 - Fixed)
3
- FastAPI + OpenRouter + Edge TTS
4
- Deploy: HuggingFace Spaces
5
- """
6
-
7
- import os
8
- import base64
9
- import re
10
- from io import BytesIO
11
- from fastapi import FastAPI, HTTPException
12
- from fastapi.middleware.cors import CORSMiddleware
13
- from pydantic import BaseModel
14
- import edge_tts
15
- import httpx
16
-
17
- # ============================================================================
18
- # Configuration
19
- # ============================================================================
20
-
21
- OPENROUTER_API_KEY = os.getenv("OPENROUTER_API_KEY", "")
22
- OPENROUTER_MODEL = os.getenv("OPENROUTER_MODEL", "nousresearch/hermes-3-llama-3.1-405b:free")
23
- TTS_VOICE = "pt-BR-FranciscaNeural"
24
- TTS_RATE = "+0%"
25
-
26
- # System prompt for the chatbot
27
- SYSTEM_PROMPT = """Você é Ada, uma assistente virtual amigável e prestativa.
28
- Responda de forma natural, concisa e conversacional em português brasileiro.
29
- Mantenha respostas curtas (máximo 2-3 frases) para manter a conversa fluida.
30
- Seja simpática e use tom casual."""
31
-
32
- # ============================================================================
33
- # Phoneme to Viseme Mapping
34
- # ============================================================================
35
-
36
- VISEME_MAP = {
37
- 'a': 'A', 'á': 'A', 'à': 'A', 'ã': 'A', 'â': 'A',
38
- 'e': 'E', 'é': 'E', 'ê': 'E',
39
- 'i': 'I', 'í': 'I',
40
- 'o': 'O', 'ó': 'O', 'ô': 'O', 'õ': 'O',
41
- 'u': 'U', 'ú': 'U',
42
- 'm': 'M', 'b': 'M', 'p': 'M',
43
- 'f': 'F', 'v': 'F',
44
- 'l': 'L', 'n': 'L', 't': 'L', 'd': 'L',
45
- 's': 'S', 'z': 'S', 'c': 'S', 'ç': 'S',
46
- 'r': 'R', 'x': 'S', 'j': 'S', 'g': 'L', 'q': 'L', 'k': 'L',
47
- 'h': 'X', ' ': 'X',
48
- }
49
-
50
- CHAR_DURATION = 0.065
51
-
52
-
53
- def text_to_visemes(text: str) -> list[dict]:
54
- """Convert text to a timeline of visemes."""
55
- visemes = []
56
- current_time = 0.0
57
- text_lower = text.lower()
58
-
59
- i = 0
60
- while i < len(text_lower):
61
- char = text_lower[i]
62
-
63
- if char in '.,!?;:':
64
- visemes.append({
65
- 'time': current_time,
66
- 'viseme': 'X',
67
- 'duration': 0.15
68
- })
69
- current_time += 0.15
70
- i += 1
71
- continue
72
-
73
- viseme = VISEME_MAP.get(char, 'X')
74
-
75
- if visemes and visemes[-1]['viseme'] == viseme:
76
- visemes[-1]['duration'] += CHAR_DURATION
77
- else:
78
- visemes.append({
79
- 'time': current_time,
80
- 'viseme': viseme,
81
- 'duration': CHAR_DURATION
82
- })
83
-
84
- current_time += CHAR_DURATION
85
- i += 1
86
-
87
- visemes.append({
88
- 'time': current_time,
89
- 'viseme': 'X',
90
- 'duration': 0.2
91
- })
92
-
93
- return visemes
94
-
95
-
96
- # ============================================================================
97
- # FastAPI App
98
- # ============================================================================
99
-
100
- app = FastAPI(title="3D Avatar Chat API")
101
-
102
- app.add_middleware(
103
- CORSMiddleware,
104
- allow_origins=["*"],
105
- allow_credentials=True,
106
- allow_methods=["*"],
107
- allow_headers=["*"],
108
- )
109
-
110
-
111
- class ChatRequest(BaseModel):
112
- message: str
113
- history: list[dict] = []
114
-
115
-
116
- class ChatResponse(BaseModel):
117
- text: str
118
- audio_base64: str
119
- visemes: list[dict]
120
- duration: float
121
-
122
-
123
- @app.get("/")
124
- async def root():
125
- return {"status": "ok", "message": "3D Avatar Chat API v2"}
126
-
127
-
128
- @app.get("/health")
129
- async def health():
130
- has_key = bool(OPENROUTER_API_KEY)
131
- return {"status": "healthy", "has_api_key": has_key, "model": OPENROUTER_MODEL}
132
-
133
-
134
- @app.post("/chat", response_model=ChatResponse)
135
- async def chat(request: ChatRequest):
136
- """Process chat message and return response with audio."""
137
-
138
- # Validar API key
139
- if not OPENROUTER_API_KEY:
140
- raise HTTPException(
141
- status_code=500,
142
- detail="OPENROUTER_API_KEY não configurada. Configure nas secrets do Space."
143
- )
144
-
145
- # Validar mensagem
146
- if not request.message or not request.message.strip():
147
- raise HTTPException(status_code=400, detail="Mensagem vazia")
148
-
149
- # Build messages
150
- messages = [{"role": "system", "content": SYSTEM_PROMPT}]
151
-
152
- for msg in request.history[-10:]:
153
- role = msg.get("role", "user")
154
- content = msg.get("content", "")
155
- if role in ["user", "assistant"] and content:
156
- messages.append({"role": role, "content": content})
157
-
158
- messages.append({"role": "user", "content": request.message})
159
-
160
- # Call OpenRouter
161
- bot_text = ""
162
- try:
163
- async with httpx.AsyncClient(timeout=30.0) as client:
164
- response = await client.post(
165
- "https://openrouter.ai/api/v1/chat/completions",
166
- headers={
167
- "Authorization": f"Bearer {OPENROUTER_API_KEY}",
168
- "Content-Type": "application/json",
169
- "HTTP-Referer": "https://huggingface.co/spaces",
170
- "X-Title": "OpenAda Avatar Chat"
171
- },
172
- json={
173
- "model": OPENROUTER_MODEL,
174
- "messages": messages,
175
- "max_tokens": 200,
176
- "temperature": 0.7,
177
- }
178
- )
179
-
180
- # Log para debug
181
- print(f"OpenRouter status: {response.status_code}")
182
-
183
- if response.status_code != 200:
184
- error_text = response.text
185
- print(f"OpenRouter error: {error_text}")
186
- raise HTTPException(
187
- status_code=500,
188
- detail=f"OpenRouter retornou {response.status_code}: {error_text[:200]}"
189
- )
190
-
191
- data = response.json()
192
- print(f"OpenRouter response: {data}")
193
-
194
- # Extrair texto da resposta
195
- if "choices" in data and len(data["choices"]) > 0:
196
- choice = data["choices"][0]
197
- if "message" in choice and "content" in choice["message"]:
198
- bot_text = choice["message"]["content"]
199
- elif "text" in choice:
200
- bot_text = choice["text"]
201
-
202
- # Fallback se não encontrou texto
203
- if not bot_text:
204
- print(f"Não encontrou texto na resposta: {data}")
205
- bot_text = "Desculpe, não consegui processar sua mensagem."
206
-
207
- except httpx.TimeoutException:
208
- raise HTTPException(status_code=504, detail="Timeout ao conectar com OpenRouter")
209
- except httpx.HTTPError as e:
210
- print(f"HTTP Error: {e}")
211
- raise HTTPException(status_code=500, detail=f"Erro de conexão: {str(e)}")
212
- except Exception as e:
213
- print(f"Unexpected error: {e}")
214
- raise HTTPException(status_code=500, detail=f"Erro inesperado: {str(e)}")
215
-
216
- # Limpar texto
217
- bot_text = bot_text.strip()
218
- if not bot_text:
219
- bot_text = "Hmm, não entendi. Pode reformular?"
220
-
221
- # Limpar para TTS
222
- clean_text = re.sub(r'[*_`~#]', '', bot_text)
223
- clean_text = re.sub(r'\[.*?\]\(.*?\)', '', clean_text)
224
- clean_text = re.sub(r'<[^>]+>', '', clean_text)
225
- clean_text = clean_text.strip()
226
-
227
- if not clean_text:
228
- clean_text = bot_text
229
-
230
- # Generate audio
231
- audio_base64 = ""
232
- try:
233
- communicate = edge_tts.Communicate(clean_text, TTS_VOICE, rate=TTS_RATE)
234
- audio_buffer = BytesIO()
235
-
236
- async for chunk in communicate.stream():
237
- if chunk["type"] == "audio":
238
- audio_buffer.write(chunk["data"])
239
-
240
- audio_buffer.seek(0)
241
- audio_data = audio_buffer.read()
242
-
243
- if len(audio_data) > 0:
244
- audio_base64 = base64.b64encode(audio_data).decode('utf-8')
245
- else:
246
- print("TTS retornou áudio vazio")
247
-
248
- except Exception as e:
249
- print(f"TTS error: {e}")
250
- # Continua sem áudio
251
-
252
- # Generate visemes
253
- visemes = text_to_visemes(clean_text)
254
- duration = sum(v['duration'] for v in visemes)
255
-
256
- return ChatResponse(
257
- text=bot_text,
258
- audio_base64=audio_base64,
259
- visemes=visemes,
260
- duration=duration
261
- )
262
-
263
-
264
- if __name__ == "__main__":
265
- import uvicorn
266
- uvicorn.run(app, host="0.0.0.0", port=7860)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ 3D Avatar Chat Backend (v3 - With Vector Memory)
3
+ FastAPI + OpenRouter + Edge TTS + ChromaDB
4
+ Deploy: HuggingFace Spaces
5
+ """
6
+
7
+ import os
8
+ import base64
9
+ import re
10
+ from io import BytesIO
11
+ from fastapi import FastAPI, HTTPException
12
+ from fastapi.middleware.cors import CORSMiddleware
13
+ from pydantic import BaseModel
14
+ import edge_tts
15
+ import httpx
16
+
17
+ # Importar módulo de memória (lazy load para não atrasar startup)
18
+ memory_module = None
19
+
20
+ def get_memory():
21
+ """Lazy load memory module."""
22
+ global memory_module
23
+ if memory_module is None:
24
+ try:
25
+ from memory import get_memory as _get_memory
26
+ memory_module = _get_memory()
27
+ except Exception as e:
28
+ print(f"Erro ao carregar memória: {e}")
29
+ return None
30
+ return memory_module
31
+
32
+ # ============================================================================
33
+ # Configuration
34
+ # ============================================================================
35
+
36
+ OPENROUTER_API_KEY = os.getenv("OPENROUTER_API_KEY", "")
37
+ OPENROUTER_MODEL = os.getenv("OPENROUTER_MODEL", "google/gemini-2.0-flash-001")
38
+ TTS_VOICE = "pt-BR-FranciscaNeural"
39
+ TTS_RATE = "+0%"
40
+
41
+ # System prompt for the chatbot
42
+ SYSTEM_PROMPT = """Você é Ada, uma assistente virtual amigável e prestativa.
43
+ Responda de forma natural, concisa e conversacional em português brasileiro.
44
+ Mantenha respostas curtas (máximo 2-3 frases) para manter a conversa fluida.
45
+ Seja simpática e use tom casual.
46
+
47
+ Se houver contexto de conversas anteriores fornecido, use-o para dar respostas mais personalizadas e lembrar de informações que o usuário compartilhou."""
48
+
49
+ # ============================================================================
50
+ # Phoneme to Viseme Mapping
51
+ # ============================================================================
52
+
53
+ VISEME_MAP = {
54
+ 'a': 'A', 'á': 'A', 'à': 'A', 'ã': 'A', 'â': 'A',
55
+ 'e': 'E', 'é': 'E', 'ê': 'E',
56
+ 'i': 'I', 'í': 'I',
57
+ 'o': 'O', 'ó': 'O', 'ô': 'O', 'õ': 'O',
58
+ 'u': 'U', 'ú': 'U',
59
+ 'm': 'M', 'b': 'M', 'p': 'M',
60
+ 'f': 'F', 'v': 'F',
61
+ 'l': 'L', 'n': 'L', 't': 'L', 'd': 'L',
62
+ 's': 'S', 'z': 'S', 'c': 'S', 'ç': 'S',
63
+ 'r': 'R', 'x': 'S', 'j': 'S', 'g': 'L', 'q': 'L', 'k': 'L',
64
+ 'h': 'X', ' ': 'X',
65
+ }
66
+
67
+ CHAR_DURATION = 0.065
68
+
69
+
70
+ def text_to_visemes(text: str) -> list[dict]:
71
+ """Convert text to a timeline of visemes."""
72
+ visemes = []
73
+ current_time = 0.0
74
+ text_lower = text.lower()
75
+
76
+ i = 0
77
+ while i < len(text_lower):
78
+ char = text_lower[i]
79
+
80
+ if char in '.,!?;:':
81
+ visemes.append({
82
+ 'time': current_time,
83
+ 'viseme': 'X',
84
+ 'duration': 0.15
85
+ })
86
+ current_time += 0.15
87
+ i += 1
88
+ continue
89
+
90
+ viseme = VISEME_MAP.get(char, 'X')
91
+
92
+ if visemes and visemes[-1]['viseme'] == viseme:
93
+ visemes[-1]['duration'] += CHAR_DURATION
94
+ else:
95
+ visemes.append({
96
+ 'time': current_time,
97
+ 'viseme': viseme,
98
+ 'duration': CHAR_DURATION
99
+ })
100
+
101
+ current_time += CHAR_DURATION
102
+ i += 1
103
+
104
+ visemes.append({
105
+ 'time': current_time,
106
+ 'viseme': 'X',
107
+ 'duration': 0.2
108
+ })
109
+
110
+ return visemes
111
+
112
+
113
+ # ============================================================================
114
+ # FastAPI App
115
+ # ============================================================================
116
+
117
+ app = FastAPI(title="3D Avatar Chat API")
118
+
119
+ app.add_middleware(
120
+ CORSMiddleware,
121
+ allow_origins=["*"],
122
+ allow_credentials=True,
123
+ allow_methods=["*"],
124
+ allow_headers=["*"],
125
+ )
126
+
127
+
128
+ class ChatRequest(BaseModel):
129
+ message: str
130
+ history: list[dict] = []
131
+
132
+
133
+ class ChatResponse(BaseModel):
134
+ text: str
135
+ audio_base64: str
136
+ visemes: list[dict]
137
+ duration: float
138
+ memory_context: list[str] = [] # Contexto recuperado
139
+
140
+
141
+ @app.get("/")
142
+ async def root():
143
+ return {"status": "ok", "message": "3D Avatar Chat API v3 (with memory)"}
144
+
145
+
146
+ @app.get("/health")
147
+ async def health():
148
+ has_key = bool(OPENROUTER_API_KEY)
149
+ memory = get_memory()
150
+ memory_stats = memory.get_stats() if memory else {"error": "not loaded"}
151
+ return {
152
+ "status": "healthy",
153
+ "has_api_key": has_key,
154
+ "model": OPENROUTER_MODEL,
155
+ "memory": memory_stats
156
+ }
157
+
158
+
159
+ @app.get("/memory/stats")
160
+ async def memory_stats():
161
+ """Get memory statistics."""
162
+ memory = get_memory()
163
+ if not memory:
164
+ return {"error": "Memory not initialized"}
165
+ return memory.get_stats()
166
+
167
+
168
+ @app.delete("/memory/clear")
169
+ async def clear_memory():
170
+ """Clear all memories."""
171
+ memory = get_memory()
172
+ if not memory:
173
+ return {"error": "Memory not initialized"}
174
+ memory.clear_memories()
175
+ return {"status": "cleared"}
176
+
177
+
178
+ @app.post("/chat", response_model=ChatResponse)
179
+ async def chat(request: ChatRequest):
180
+ """Process chat message and return response with audio."""
181
+
182
+ # Validar API key
183
+ if not OPENROUTER_API_KEY:
184
+ raise HTTPException(
185
+ status_code=500,
186
+ detail="OPENROUTER_API_KEY não configurada. Configure nas secrets do Space."
187
+ )
188
+
189
+ # Validar mensagem
190
+ if not request.message or not request.message.strip():
191
+ raise HTTPException(status_code=400, detail="Mensagem vazia")
192
+
193
+ # =========================================================================
194
+ # Buscar contexto na memória vetorial
195
+ # =========================================================================
196
+ memory_context = []
197
+ memory = get_memory()
198
+
199
+ if memory:
200
+ try:
201
+ relevant_memories = memory.search_memories(request.message, k=3)
202
+ for mem in relevant_memories:
203
+ if mem['score'] > 0.3: # Só usar se relevância > 30%
204
+ memory_context.append(
205
+ f"[Conversa anterior] {mem['user_message']} {mem['bot_response']}"
206
+ )
207
+ print(f"Memórias relevantes encontradas: {len(memory_context)}")
208
+ except Exception as e:
209
+ print(f"Erro ao buscar memória: {e}")
210
+
211
+ # =========================================================================
212
+ # Build messages
213
+ # =========================================================================
214
+ messages = [{"role": "system", "content": SYSTEM_PROMPT}]
215
+
216
+ # Adicionar contexto de memória se houver
217
+ if memory_context:
218
+ context_text = "\n\n**Contexto de conversas anteriores:**\n" + "\n".join(memory_context)
219
+ messages.append({
220
+ "role": "system",
221
+ "content": f"Informações relevantes de conversas anteriores:\n{context_text}"
222
+ })
223
+
224
+ # Histórico recente
225
+ for msg in request.history[-10:]:
226
+ role = msg.get("role", "user")
227
+ content = msg.get("content", "")
228
+ if role in ["user", "assistant"] and content:
229
+ messages.append({"role": role, "content": content})
230
+
231
+ messages.append({"role": "user", "content": request.message})
232
+
233
+ # =========================================================================
234
+ # Call OpenRouter
235
+ # =========================================================================
236
+ bot_text = ""
237
+ try:
238
+ async with httpx.AsyncClient(timeout=30.0) as client:
239
+ response = await client.post(
240
+ "https://openrouter.ai/api/v1/chat/completions",
241
+ headers={
242
+ "Authorization": f"Bearer {OPENROUTER_API_KEY}",
243
+ "Content-Type": "application/json",
244
+ "HTTP-Referer": "https://huggingface.co/spaces",
245
+ "X-Title": "OpenAda Avatar Chat"
246
+ },
247
+ json={
248
+ "model": OPENROUTER_MODEL,
249
+ "messages": messages,
250
+ "max_tokens": 200,
251
+ "temperature": 0.7,
252
+ }
253
+ )
254
+
255
+ print(f"OpenRouter status: {response.status_code}")
256
+
257
+ if response.status_code != 200:
258
+ error_text = response.text
259
+ print(f"OpenRouter error: {error_text}")
260
+ raise HTTPException(
261
+ status_code=500,
262
+ detail=f"OpenRouter retornou {response.status_code}: {error_text[:200]}"
263
+ )
264
+
265
+ data = response.json()
266
+ print(f"OpenRouter response received")
267
+
268
+ # Extrair texto da resposta
269
+ if "choices" in data and len(data["choices"]) > 0:
270
+ choice = data["choices"][0]
271
+ if "message" in choice and "content" in choice["message"]:
272
+ bot_text = choice["message"]["content"]
273
+ elif "text" in choice:
274
+ bot_text = choice["text"]
275
+
276
+ if not bot_text:
277
+ print(f"Não encontrou texto na resposta: {data}")
278
+ bot_text = "Desculpe, não consegui processar sua mensagem."
279
+
280
+ except httpx.TimeoutException:
281
+ raise HTTPException(status_code=504, detail="Timeout ao conectar com OpenRouter")
282
+ except httpx.HTTPError as e:
283
+ print(f"HTTP Error: {e}")
284
+ raise HTTPException(status_code=500, detail=f"Erro de conexão: {str(e)}")
285
+ except Exception as e:
286
+ print(f"Unexpected error: {e}")
287
+ raise HTTPException(status_code=500, detail=f"Erro inesperado: {str(e)}")
288
+
289
+ # Limpar texto
290
+ bot_text = bot_text.strip()
291
+ if not bot_text:
292
+ bot_text = "Hmm, não entendi. Pode reformular?"
293
+
294
+ # =========================================================================
295
+ # Salvar na memória
296
+ # =========================================================================
297
+ if memory:
298
+ try:
299
+ memory.add_memory(request.message, bot_text)
300
+ except Exception as e:
301
+ print(f"Erro ao salvar memória: {e}")
302
+
303
+ # =========================================================================
304
+ # Generate TTS
305
+ # =========================================================================
306
+ clean_text = re.sub(r'[*_`~#]', '', bot_text)
307
+ clean_text = re.sub(r'\[.*?\]\(.*?\)', '', clean_text)
308
+ clean_text = re.sub(r'<[^>]+>', '', clean_text)
309
+ clean_text = clean_text.strip()
310
+
311
+ if not clean_text:
312
+ clean_text = bot_text
313
+
314
+ audio_base64 = ""
315
+ try:
316
+ communicate = edge_tts.Communicate(clean_text, TTS_VOICE, rate=TTS_RATE)
317
+ audio_buffer = BytesIO()
318
+
319
+ async for chunk in communicate.stream():
320
+ if chunk["type"] == "audio":
321
+ audio_buffer.write(chunk["data"])
322
+
323
+ audio_buffer.seek(0)
324
+ audio_data = audio_buffer.read()
325
+
326
+ if len(audio_data) > 0:
327
+ audio_base64 = base64.b64encode(audio_data).decode('utf-8')
328
+ else:
329
+ print("TTS retornou áudio vazio")
330
+
331
+ except Exception as e:
332
+ print(f"TTS error: {e}")
333
+
334
+ # Generate visemes
335
+ visemes = text_to_visemes(clean_text)
336
+ duration = sum(v['duration'] for v in visemes)
337
+
338
+ return ChatResponse(
339
+ text=bot_text,
340
+ audio_base64=audio_base64,
341
+ visemes=visemes,
342
+ duration=duration,
343
+ memory_context=memory_context
344
+ )
345
+
346
+
347
+ if __name__ == "__main__":
348
+ import uvicorn
349
+ uvicorn.run(app, host="0.0.0.0", port=7860)
memory.py ADDED
@@ -0,0 +1,128 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Vector Memory Module
3
+ Memória de longo prazo usando ChromaDB + Sentence Transformers
4
+ """
5
+
6
+ import chromadb
7
+ from sentence_transformers import SentenceTransformer
8
+ from datetime import datetime
9
+ import hashlib
10
+
11
+ # Usar modelo leve para embeddings
12
+ EMBEDDING_MODEL = "all-MiniLM-L6-v2" # ~80MB, rápido
13
+
14
+ # Singleton para evitar recarregar
15
+ _memory_instance = None
16
+
17
+
18
+ def get_memory():
19
+ """Get or create memory instance."""
20
+ global _memory_instance
21
+ if _memory_instance is None:
22
+ _memory_instance = VectorMemory()
23
+ return _memory_instance
24
+
25
+
26
+ class VectorMemory:
27
+ def __init__(self):
28
+ print("Inicializando memória vetorial...")
29
+
30
+ # Modelo de embeddings
31
+ self.model = SentenceTransformer(EMBEDDING_MODEL)
32
+ print(f"Modelo carregado: {EMBEDDING_MODEL}")
33
+
34
+ # ChromaDB em memória (persiste enquanto o servidor estiver rodando)
35
+ self.client = chromadb.Client()
36
+ self.collection = self.client.get_or_create_collection(
37
+ name="chat_memory",
38
+ metadata={"hnsw:space": "cosine"}
39
+ )
40
+
41
+ print(f"Memória pronta. {self.collection.count()} memórias existentes.")
42
+
43
+ def _generate_id(self, text: str) -> str:
44
+ """Generate unique ID for a memory."""
45
+ timestamp = datetime.now().isoformat()
46
+ content = f"{timestamp}:{text}"
47
+ return hashlib.md5(content.encode()).hexdigest()
48
+
49
+ def add_memory(self, user_message: str, bot_response: str):
50
+ """
51
+ Add a conversation exchange to memory.
52
+ Stores the combined context for better retrieval.
53
+ """
54
+ # Combinar mensagem e resposta para contexto completo
55
+ combined = f"Usuário: {user_message}\nAssistente: {bot_response}"
56
+
57
+ # Gerar embedding
58
+ embedding = self.model.encode(combined).tolist()
59
+
60
+ # Gerar ID único
61
+ doc_id = self._generate_id(combined)
62
+
63
+ # Metadados
64
+ metadata = {
65
+ "user_message": user_message[:500], # Truncar se muito longo
66
+ "bot_response": bot_response[:500],
67
+ "timestamp": datetime.now().isoformat()
68
+ }
69
+
70
+ # Adicionar ao banco
71
+ self.collection.add(
72
+ ids=[doc_id],
73
+ embeddings=[embedding],
74
+ documents=[combined],
75
+ metadatas=[metadata]
76
+ )
77
+
78
+ print(f"Memória adicionada. Total: {self.collection.count()}")
79
+
80
+ def search_memories(self, query: str, k: int = 3) -> list[dict]:
81
+ """
82
+ Search for relevant memories based on the query.
83
+ Returns list of {text, user_message, bot_response, score}
84
+ """
85
+ if self.collection.count() == 0:
86
+ return []
87
+
88
+ # Gerar embedding da query
89
+ query_embedding = self.model.encode(query).tolist()
90
+
91
+ # Buscar similares
92
+ results = self.collection.query(
93
+ query_embeddings=[query_embedding],
94
+ n_results=min(k, self.collection.count())
95
+ )
96
+
97
+ memories = []
98
+ if results and results['documents'] and results['documents'][0]:
99
+ for i, doc in enumerate(results['documents'][0]):
100
+ metadata = results['metadatas'][0][i] if results['metadatas'] else {}
101
+ distance = results['distances'][0][i] if results['distances'] else 0
102
+
103
+ memories.append({
104
+ "text": doc,
105
+ "user_message": metadata.get("user_message", ""),
106
+ "bot_response": metadata.get("bot_response", ""),
107
+ "score": 1 - distance, # Converter distância em similaridade
108
+ "timestamp": metadata.get("timestamp", "")
109
+ })
110
+
111
+ return memories
112
+
113
+ def clear_memories(self):
114
+ """Clear all memories."""
115
+ # Recriar collection
116
+ self.client.delete_collection("chat_memory")
117
+ self.collection = self.client.get_or_create_collection(
118
+ name="chat_memory",
119
+ metadata={"hnsw:space": "cosine"}
120
+ )
121
+ print("Memórias limpas.")
122
+
123
+ def get_stats(self) -> dict:
124
+ """Get memory statistics."""
125
+ return {
126
+ "total_memories": self.collection.count(),
127
+ "model": EMBEDDING_MODEL
128
+ }
requirements.txt CHANGED
@@ -3,3 +3,5 @@ uvicorn[standard]==0.34.0
3
  edge-tts==6.1.12
4
  httpx==0.28.1
5
  python-multipart==0.0.20
 
 
 
3
  edge-tts==6.1.12
4
  httpx==0.28.1
5
  python-multipart==0.0.20
6
+ chromadb==0.4.22
7
+ sentence-transformers==2.2.2