from pymongo import MongoClient from datetime import datetime, timezone from core.config import MONGO_DATABASE_HOST import uuid import json import re from langchain_mongodb import MongoDBChatMessageHistory def normalize_message(msg): """Normalise les messages pour LangChain/Streamlit.""" if isinstance(msg, dict): if "data" in msg and "content" in msg["data"]: return {"type": msg.get("type", "human"), "content": msg["data"]["content"]} elif "type" in msg and "content" in msg: return {"type": msg["type"], "content": msg["content"]} return None STOP_WORDS = { "je", "tu", "il", "elle", "on", "nous", "vous", "ils", "elles", "le", "la", "les", "un", "une", "des", "de", "du", "et", "en", "à", "pour", "comment", "quoi", "où", "qui", "que", "dans" } def generate_session_title(first_message: str) -> str: """Génère un titre cohérent à partir du premier message.""" # Nettoyage du texte text = re.sub(r"[^a-zA-ZÀ-ÿ0-9\s]", "", first_message.lower()) words = text.strip().split() keywords = [w for w in words if w not in STOP_WORDS] if not keywords: return "Nouvelle session" # Prendre les 3-5 premiers mots clés pour le titre title_words = keywords[:5] # Capitaliser les mots title = " ".join(w.capitalize() for w in title_words) return title def load_all_sessions(): """Charge toutes les sessions depuis MongoDB existantes.""" client = MongoClient(MONGO_DATABASE_HOST) db = client["douane_db"] collection = db["chat_history"] all_sessions = {} for doc in collection.find(): session_id = doc.get("SessionId") or str(doc.get("_id")) try: raw_messages = json.loads(doc.get("History", "[]")) if isinstance(raw_messages, dict): raw_messages = [raw_messages] messages = [normalize_message(m) for m in raw_messages if normalize_message(m)] except Exception: messages = [] if not messages: continue created_at = doc.get("created_at") or doc["_id"].generation_time if created_at and created_at.tzinfo is None: created_at = created_at.replace(tzinfo=timezone.utc) all_sessions[session_id] = { "title": doc.get("title", "Session sans titre"), "history": messages, "created_at": created_at } return dict(sorted(all_sessions.items(), key=lambda x: x[1]["created_at"], reverse=True)) def start_new_session(session_state: dict) -> str: """Crée un nouvel ID de session en mémoire, pas encore dans MongoDB.""" session_id = f"session_{uuid.uuid4()}" session_state["session_id"] = session_id session_state["sessions"][session_id] = { "history": [], "created_at": datetime.now(timezone.utc), "title": "Nouvelle session" } return session_id def add_message_to_session(session_id: str, message: dict): """ Ajoute un message dans MongoDB. Si c’est le premier message de la session, crée la session et génère un titre. """ if "data" in message and "content" in message["data"]: msg = {"type": message.get("type", "human"), "content": message["data"]["content"]} elif "type" in message and "content" in message: msg = {"type": message["type"], "content": message["content"]} else: return client = MongoClient(MONGO_DATABASE_HOST) db = client["douane_db"] collection = db["chat_history"] session = collection.find_one({"SessionId": session_id}) if session: try: history = json.loads(session.get("History", "[]")) if isinstance(history, dict): history = [history] elif not isinstance(history, list): history = [] except: history = [] history.append(msg) collection.update_one( {"SessionId": session_id}, {"$set": {"History": json.dumps(history)}} ) else: title = generate_session_title(msg["content"]) collection.insert_one({ "SessionId": session_id, "title": title, "History": json.dumps([msg]), "created_at": datetime.now(timezone.utc) }) def rename_session(session_id: str, new_title: str): """Renommer manuellement une session.""" client = MongoClient(MONGO_DATABASE_HOST) db = client["douane_db"] collection = db["chat_history"] collection.update_one({"SessionId": session_id}, {"$set": {"title": new_title}}) def update_session_title(session_id: str): """ Si la session n’a pas de titre ou a un titre générique, on le met à jour avec le titre généré à partir du premier message. """ client = MongoClient(MONGO_DATABASE_HOST) db = client["douane_db"] collection = db["chat_history"] session = collection.find_one({"SessionId": session_id}) if not session: return title = session.get("title", "") if title.strip() in ["", "Nouvelle session", "Session sans titre"]: try: history = json.loads(session.get("History", "[]")) if isinstance(history, dict): history = [history] except: history = [] if history: first_message = history[0].get("content", "") if first_message: new_title = generate_session_title(first_message) collection.update_one( {"SessionId": session_id}, {"$set": {"title": new_title}} ) def get_messages_for_session(session_id: str): """Récupère les messages depuis MongoDB, vide si session pas encore créée.""" client = MongoClient(MONGO_DATABASE_HOST) db = client["douane_db"] collection = db["chat_history"] doc = collection.find_one({"SessionId": session_id}) if not doc: return [] try: raw_history = json.loads(doc.get("History", "[]")) if isinstance(raw_history, dict): raw_history = [raw_history] except: raw_history = [] messages = [normalize_message(m) for m in raw_history if normalize_message(m)] return messages def get_session_history(session_id: str) -> MongoDBChatMessageHistory: return MongoDBChatMessageHistory( connection_string=MONGO_DATABASE_HOST, session_id=session_id, database_name="douane_db", collection_name="chat_history" )