""" Vector Memory Module Memória de longo prazo usando ChromaDB + Sentence Transformers """ import chromadb from sentence_transformers import SentenceTransformer from datetime import datetime import hashlib # Usar modelo leve para embeddings EMBEDDING_MODEL = "all-MiniLM-L6-v2" # ~80MB, rápido # Singleton para evitar recarregar _memory_instance = None def get_memory(): """Get or create memory instance.""" global _memory_instance if _memory_instance is None: _memory_instance = VectorMemory() return _memory_instance class VectorMemory: def __init__(self): print("Inicializando memória vetorial...") # Modelo de embeddings self.model = SentenceTransformer(EMBEDDING_MODEL) print(f"Modelo carregado: {EMBEDDING_MODEL}") # ChromaDB em memória (persiste enquanto o servidor estiver rodando) self.client = chromadb.Client() self.collection = self.client.get_or_create_collection( name="chat_memory", metadata={"hnsw:space": "cosine"} ) print(f"Memória pronta. {self.collection.count()} memórias existentes.") def _generate_id(self, text: str) -> str: """Generate unique ID for a memory.""" timestamp = datetime.now().isoformat() content = f"{timestamp}:{text}" return hashlib.md5(content.encode()).hexdigest() def add_memory(self, user_message: str, bot_response: str): """ Add a conversation exchange to memory. Stores the combined context for better retrieval. """ # Combinar mensagem e resposta para contexto completo combined = f"Usuário: {user_message}\nAssistente: {bot_response}" # Gerar embedding embedding = self.model.encode(combined).tolist() # Gerar ID único doc_id = self._generate_id(combined) # Metadados metadata = { "user_message": user_message[:500], # Truncar se muito longo "bot_response": bot_response[:500], "timestamp": datetime.now().isoformat() } # Adicionar ao banco self.collection.add( ids=[doc_id], embeddings=[embedding], documents=[combined], metadatas=[metadata] ) print(f"Memória adicionada. Total: {self.collection.count()}") def search_memories(self, query: str, k: int = 3) -> list[dict]: """ Search for relevant memories based on the query. Returns list of {text, user_message, bot_response, score} """ if self.collection.count() == 0: return [] # Gerar embedding da query query_embedding = self.model.encode(query).tolist() # Buscar similares results = self.collection.query( query_embeddings=[query_embedding], n_results=min(k, self.collection.count()) ) memories = [] if results and results['documents'] and results['documents'][0]: for i, doc in enumerate(results['documents'][0]): metadata = results['metadatas'][0][i] if results['metadatas'] else {} distance = results['distances'][0][i] if results['distances'] else 0 memories.append({ "text": doc, "user_message": metadata.get("user_message", ""), "bot_response": metadata.get("bot_response", ""), "score": 1 - distance, # Converter distância em similaridade "timestamp": metadata.get("timestamp", "") }) return memories def clear_memories(self): """Clear all memories.""" # Recriar collection self.client.delete_collection("chat_memory") self.collection = self.client.get_or_create_collection( name="chat_memory", metadata={"hnsw:space": "cosine"} ) print("Memórias limpas.") def get_stats(self) -> dict: """Get memory statistics.""" return { "total_memories": self.collection.count(), "model": EMBEDDING_MODEL }