|
|
"""
|
|
|
Vector Memory Module
|
|
|
Mem贸ria de longo prazo usando ChromaDB + Sentence Transformers
|
|
|
"""
|
|
|
|
|
|
import chromadb
|
|
|
from sentence_transformers import SentenceTransformer
|
|
|
from datetime import datetime
|
|
|
import hashlib
|
|
|
|
|
|
|
|
|
EMBEDDING_MODEL = "all-MiniLM-L6-v2"
|
|
|
|
|
|
|
|
|
_memory_instance = None
|
|
|
|
|
|
|
|
|
def get_memory():
|
|
|
"""Get or create memory instance."""
|
|
|
global _memory_instance
|
|
|
if _memory_instance is None:
|
|
|
_memory_instance = VectorMemory()
|
|
|
return _memory_instance
|
|
|
|
|
|
|
|
|
class VectorMemory:
|
|
|
def __init__(self):
|
|
|
print("Inicializando mem贸ria vetorial...")
|
|
|
|
|
|
|
|
|
self.model = SentenceTransformer(EMBEDDING_MODEL)
|
|
|
print(f"Modelo carregado: {EMBEDDING_MODEL}")
|
|
|
|
|
|
|
|
|
self.client = chromadb.Client()
|
|
|
self.collection = self.client.get_or_create_collection(
|
|
|
name="chat_memory",
|
|
|
metadata={"hnsw:space": "cosine"}
|
|
|
)
|
|
|
|
|
|
print(f"Mem贸ria pronta. {self.collection.count()} mem贸rias existentes.")
|
|
|
|
|
|
def _generate_id(self, text: str) -> str:
|
|
|
"""Generate unique ID for a memory."""
|
|
|
timestamp = datetime.now().isoformat()
|
|
|
content = f"{timestamp}:{text}"
|
|
|
return hashlib.md5(content.encode()).hexdigest()
|
|
|
|
|
|
def add_memory(self, user_message: str, bot_response: str):
|
|
|
"""
|
|
|
Add a conversation exchange to memory.
|
|
|
Stores the combined context for better retrieval.
|
|
|
"""
|
|
|
|
|
|
combined = f"Usu谩rio: {user_message}\nAssistente: {bot_response}"
|
|
|
|
|
|
|
|
|
embedding = self.model.encode(combined).tolist()
|
|
|
|
|
|
|
|
|
doc_id = self._generate_id(combined)
|
|
|
|
|
|
|
|
|
metadata = {
|
|
|
"user_message": user_message[:500],
|
|
|
"bot_response": bot_response[:500],
|
|
|
"timestamp": datetime.now().isoformat()
|
|
|
}
|
|
|
|
|
|
|
|
|
self.collection.add(
|
|
|
ids=[doc_id],
|
|
|
embeddings=[embedding],
|
|
|
documents=[combined],
|
|
|
metadatas=[metadata]
|
|
|
)
|
|
|
|
|
|
print(f"Mem贸ria adicionada. Total: {self.collection.count()}")
|
|
|
|
|
|
def search_memories(self, query: str, k: int = 3) -> list[dict]:
|
|
|
"""
|
|
|
Search for relevant memories based on the query.
|
|
|
Returns list of {text, user_message, bot_response, score}
|
|
|
"""
|
|
|
if self.collection.count() == 0:
|
|
|
return []
|
|
|
|
|
|
|
|
|
query_embedding = self.model.encode(query).tolist()
|
|
|
|
|
|
|
|
|
results = self.collection.query(
|
|
|
query_embeddings=[query_embedding],
|
|
|
n_results=min(k, self.collection.count())
|
|
|
)
|
|
|
|
|
|
memories = []
|
|
|
if results and results['documents'] and results['documents'][0]:
|
|
|
for i, doc in enumerate(results['documents'][0]):
|
|
|
metadata = results['metadatas'][0][i] if results['metadatas'] else {}
|
|
|
distance = results['distances'][0][i] if results['distances'] else 0
|
|
|
|
|
|
memories.append({
|
|
|
"text": doc,
|
|
|
"user_message": metadata.get("user_message", ""),
|
|
|
"bot_response": metadata.get("bot_response", ""),
|
|
|
"score": 1 - distance,
|
|
|
"timestamp": metadata.get("timestamp", "")
|
|
|
})
|
|
|
|
|
|
return memories
|
|
|
|
|
|
def clear_memories(self):
|
|
|
"""Clear all memories."""
|
|
|
|
|
|
self.client.delete_collection("chat_memory")
|
|
|
self.collection = self.client.get_or_create_collection(
|
|
|
name="chat_memory",
|
|
|
metadata={"hnsw:space": "cosine"}
|
|
|
)
|
|
|
print("Mem贸rias limpas.")
|
|
|
|
|
|
def get_stats(self) -> dict:
|
|
|
"""Get memory statistics."""
|
|
|
return {
|
|
|
"total_memories": self.collection.count(),
|
|
|
"model": EMBEDDING_MODEL
|
|
|
}
|
|
|
|