Spaces:
Sleeping
Sleeping
| from pymongo import MongoClient | |
| from datetime import datetime, timezone | |
| from core.config import MONGO_DATABASE_HOST | |
| import uuid | |
| import json | |
| import re | |
| from langchain_mongodb import MongoDBChatMessageHistory | |
| def normalize_message(msg): | |
| """Normalise les messages pour LangChain/Streamlit.""" | |
| if isinstance(msg, dict): | |
| if "data" in msg and "content" in msg["data"]: | |
| return {"type": msg.get("type", "human"), "content": msg["data"]["content"]} | |
| elif "type" in msg and "content" in msg: | |
| return {"type": msg["type"], "content": msg["content"]} | |
| return None | |
| STOP_WORDS = { | |
| "je", "tu", "il", "elle", "on", "nous", "vous", "ils", "elles", | |
| "le", "la", "les", "un", "une", "des", "de", "du", "et", "en", "à", | |
| "pour", "comment", "quoi", "où", "qui", "que", "dans" | |
| } | |
| def generate_session_title(first_message: str) -> str: | |
| """Génère un titre cohérent à partir du premier message.""" | |
| # Nettoyage du texte | |
| text = re.sub(r"[^a-zA-ZÀ-ÿ0-9\s]", "", first_message.lower()) | |
| words = text.strip().split() | |
| keywords = [w for w in words if w not in STOP_WORDS] | |
| if not keywords: | |
| return "Nouvelle session" | |
| # Prendre les 3-5 premiers mots clés pour le titre | |
| title_words = keywords[:5] | |
| # Capitaliser les mots | |
| title = " ".join(w.capitalize() for w in title_words) | |
| return title | |
| def load_all_sessions(): | |
| """Charge toutes les sessions depuis MongoDB existantes.""" | |
| client = MongoClient(MONGO_DATABASE_HOST) | |
| db = client["douane_db"] | |
| collection = db["chat_history"] | |
| all_sessions = {} | |
| for doc in collection.find(): | |
| session_id = doc.get("SessionId") or str(doc.get("_id")) | |
| try: | |
| raw_messages = json.loads(doc.get("History", "[]")) | |
| if isinstance(raw_messages, dict): | |
| raw_messages = [raw_messages] | |
| messages = [normalize_message(m) for m in raw_messages if normalize_message(m)] | |
| except Exception: | |
| messages = [] | |
| if not messages: | |
| continue | |
| created_at = doc.get("created_at") or doc["_id"].generation_time | |
| if created_at and created_at.tzinfo is None: | |
| created_at = created_at.replace(tzinfo=timezone.utc) | |
| all_sessions[session_id] = { | |
| "title": doc.get("title", "Session sans titre"), | |
| "history": messages, | |
| "created_at": created_at | |
| } | |
| return dict(sorted(all_sessions.items(), key=lambda x: x[1]["created_at"], reverse=True)) | |
| def start_new_session(session_state: dict) -> str: | |
| """Crée un nouvel ID de session en mémoire, pas encore dans MongoDB.""" | |
| session_id = f"session_{uuid.uuid4()}" | |
| session_state["session_id"] = session_id | |
| session_state["sessions"][session_id] = { | |
| "history": [], | |
| "created_at": datetime.now(timezone.utc), | |
| "title": "Nouvelle session" | |
| } | |
| return session_id | |
| def add_message_to_session(session_id: str, message: dict): | |
| """ | |
| Ajoute un message dans MongoDB. | |
| Si c’est le premier message de la session, crée la session et génère un titre. | |
| """ | |
| if "data" in message and "content" in message["data"]: | |
| msg = {"type": message.get("type", "human"), "content": message["data"]["content"]} | |
| elif "type" in message and "content" in message: | |
| msg = {"type": message["type"], "content": message["content"]} | |
| else: | |
| return | |
| client = MongoClient(MONGO_DATABASE_HOST) | |
| db = client["douane_db"] | |
| collection = db["chat_history"] | |
| session = collection.find_one({"SessionId": session_id}) | |
| if session: | |
| try: | |
| history = json.loads(session.get("History", "[]")) | |
| if isinstance(history, dict): | |
| history = [history] | |
| elif not isinstance(history, list): | |
| history = [] | |
| except: | |
| history = [] | |
| history.append(msg) | |
| collection.update_one( | |
| {"SessionId": session_id}, | |
| {"$set": {"History": json.dumps(history)}} | |
| ) | |
| else: | |
| title = generate_session_title(msg["content"]) | |
| collection.insert_one({ | |
| "SessionId": session_id, | |
| "title": title, | |
| "History": json.dumps([msg]), | |
| "created_at": datetime.now(timezone.utc) | |
| }) | |
| def rename_session(session_id: str, new_title: str): | |
| """Renommer manuellement une session.""" | |
| client = MongoClient(MONGO_DATABASE_HOST) | |
| db = client["douane_db"] | |
| collection = db["chat_history"] | |
| collection.update_one({"SessionId": session_id}, {"$set": {"title": new_title}}) | |
| def update_session_title(session_id: str): | |
| """ | |
| Si la session n’a pas de titre ou a un titre générique, | |
| on le met à jour avec le titre généré à partir du premier message. | |
| """ | |
| client = MongoClient(MONGO_DATABASE_HOST) | |
| db = client["douane_db"] | |
| collection = db["chat_history"] | |
| session = collection.find_one({"SessionId": session_id}) | |
| if not session: | |
| return | |
| title = session.get("title", "") | |
| if title.strip() in ["", "Nouvelle session", "Session sans titre"]: | |
| try: | |
| history = json.loads(session.get("History", "[]")) | |
| if isinstance(history, dict): | |
| history = [history] | |
| except: | |
| history = [] | |
| if history: | |
| first_message = history[0].get("content", "") | |
| if first_message: | |
| new_title = generate_session_title(first_message) | |
| collection.update_one( | |
| {"SessionId": session_id}, | |
| {"$set": {"title": new_title}} | |
| ) | |
| def get_messages_for_session(session_id: str): | |
| """Récupère les messages depuis MongoDB, vide si session pas encore créée.""" | |
| client = MongoClient(MONGO_DATABASE_HOST) | |
| db = client["douane_db"] | |
| collection = db["chat_history"] | |
| doc = collection.find_one({"SessionId": session_id}) | |
| if not doc: | |
| return [] | |
| try: | |
| raw_history = json.loads(doc.get("History", "[]")) | |
| if isinstance(raw_history, dict): | |
| raw_history = [raw_history] | |
| except: | |
| raw_history = [] | |
| messages = [normalize_message(m) for m in raw_history if normalize_message(m)] | |
| return messages | |
| def get_session_history(session_id: str) -> MongoDBChatMessageHistory: | |
| return MongoDBChatMessageHistory( | |
| connection_string=MONGO_DATABASE_HOST, | |
| session_id=session_id, | |
| database_name="douane_db", | |
| collection_name="chat_history" | |
| ) | |