Legal-Chatbot / core /memory.py
msi
first commit
f7b069f
from pymongo import MongoClient
from datetime import datetime, timezone
from core.config import MONGO_DATABASE_HOST
import uuid
import json
import re
from langchain_mongodb import MongoDBChatMessageHistory
def normalize_message(msg):
"""Normalise les messages pour LangChain/Streamlit."""
if isinstance(msg, dict):
if "data" in msg and "content" in msg["data"]:
return {"type": msg.get("type", "human"), "content": msg["data"]["content"]}
elif "type" in msg and "content" in msg:
return {"type": msg["type"], "content": msg["content"]}
return None
STOP_WORDS = {
"je", "tu", "il", "elle", "on", "nous", "vous", "ils", "elles",
"le", "la", "les", "un", "une", "des", "de", "du", "et", "en", "à",
"pour", "comment", "quoi", "où", "qui", "que", "dans"
}
def generate_session_title(first_message: str) -> str:
"""Génère un titre cohérent à partir du premier message."""
# Nettoyage du texte
text = re.sub(r"[^a-zA-ZÀ-ÿ0-9\s]", "", first_message.lower())
words = text.strip().split()
keywords = [w for w in words if w not in STOP_WORDS]
if not keywords:
return "Nouvelle session"
# Prendre les 3-5 premiers mots clés pour le titre
title_words = keywords[:5]
# Capitaliser les mots
title = " ".join(w.capitalize() for w in title_words)
return title
def load_all_sessions():
"""Charge toutes les sessions depuis MongoDB existantes."""
client = MongoClient(MONGO_DATABASE_HOST)
db = client["douane_db"]
collection = db["chat_history"]
all_sessions = {}
for doc in collection.find():
session_id = doc.get("SessionId") or str(doc.get("_id"))
try:
raw_messages = json.loads(doc.get("History", "[]"))
if isinstance(raw_messages, dict):
raw_messages = [raw_messages]
messages = [normalize_message(m) for m in raw_messages if normalize_message(m)]
except Exception:
messages = []
if not messages:
continue
created_at = doc.get("created_at") or doc["_id"].generation_time
if created_at and created_at.tzinfo is None:
created_at = created_at.replace(tzinfo=timezone.utc)
all_sessions[session_id] = {
"title": doc.get("title", "Session sans titre"),
"history": messages,
"created_at": created_at
}
return dict(sorted(all_sessions.items(), key=lambda x: x[1]["created_at"], reverse=True))
def start_new_session(session_state: dict) -> str:
"""Crée un nouvel ID de session en mémoire, pas encore dans MongoDB."""
session_id = f"session_{uuid.uuid4()}"
session_state["session_id"] = session_id
session_state["sessions"][session_id] = {
"history": [],
"created_at": datetime.now(timezone.utc),
"title": "Nouvelle session"
}
return session_id
def add_message_to_session(session_id: str, message: dict):
"""
Ajoute un message dans MongoDB.
Si c’est le premier message de la session, crée la session et génère un titre.
"""
if "data" in message and "content" in message["data"]:
msg = {"type": message.get("type", "human"), "content": message["data"]["content"]}
elif "type" in message and "content" in message:
msg = {"type": message["type"], "content": message["content"]}
else:
return
client = MongoClient(MONGO_DATABASE_HOST)
db = client["douane_db"]
collection = db["chat_history"]
session = collection.find_one({"SessionId": session_id})
if session:
try:
history = json.loads(session.get("History", "[]"))
if isinstance(history, dict):
history = [history]
elif not isinstance(history, list):
history = []
except:
history = []
history.append(msg)
collection.update_one(
{"SessionId": session_id},
{"$set": {"History": json.dumps(history)}}
)
else:
title = generate_session_title(msg["content"])
collection.insert_one({
"SessionId": session_id,
"title": title,
"History": json.dumps([msg]),
"created_at": datetime.now(timezone.utc)
})
def rename_session(session_id: str, new_title: str):
"""Renommer manuellement une session."""
client = MongoClient(MONGO_DATABASE_HOST)
db = client["douane_db"]
collection = db["chat_history"]
collection.update_one({"SessionId": session_id}, {"$set": {"title": new_title}})
def update_session_title(session_id: str):
"""
Si la session n’a pas de titre ou a un titre générique,
on le met à jour avec le titre généré à partir du premier message.
"""
client = MongoClient(MONGO_DATABASE_HOST)
db = client["douane_db"]
collection = db["chat_history"]
session = collection.find_one({"SessionId": session_id})
if not session:
return
title = session.get("title", "")
if title.strip() in ["", "Nouvelle session", "Session sans titre"]:
try:
history = json.loads(session.get("History", "[]"))
if isinstance(history, dict):
history = [history]
except:
history = []
if history:
first_message = history[0].get("content", "")
if first_message:
new_title = generate_session_title(first_message)
collection.update_one(
{"SessionId": session_id},
{"$set": {"title": new_title}}
)
def get_messages_for_session(session_id: str):
"""Récupère les messages depuis MongoDB, vide si session pas encore créée."""
client = MongoClient(MONGO_DATABASE_HOST)
db = client["douane_db"]
collection = db["chat_history"]
doc = collection.find_one({"SessionId": session_id})
if not doc:
return []
try:
raw_history = json.loads(doc.get("History", "[]"))
if isinstance(raw_history, dict):
raw_history = [raw_history]
except:
raw_history = []
messages = [normalize_message(m) for m in raw_history if normalize_message(m)]
return messages
def get_session_history(session_id: str) -> MongoDBChatMessageHistory:
return MongoDBChatMessageHistory(
connection_string=MONGO_DATABASE_HOST,
session_id=session_id,
database_name="douane_db",
collection_name="chat_history"
)