Spaces:
Sleeping
Sleeping
File size: 6,571 Bytes
f7b069f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 |
from pymongo import MongoClient
from datetime import datetime, timezone
from core.config import MONGO_DATABASE_HOST
import uuid
import json
import re
from langchain_mongodb import MongoDBChatMessageHistory
def normalize_message(msg):
"""Normalise les messages pour LangChain/Streamlit."""
if isinstance(msg, dict):
if "data" in msg and "content" in msg["data"]:
return {"type": msg.get("type", "human"), "content": msg["data"]["content"]}
elif "type" in msg and "content" in msg:
return {"type": msg["type"], "content": msg["content"]}
return None
STOP_WORDS = {
"je", "tu", "il", "elle", "on", "nous", "vous", "ils", "elles",
"le", "la", "les", "un", "une", "des", "de", "du", "et", "en", "à",
"pour", "comment", "quoi", "où", "qui", "que", "dans"
}
def generate_session_title(first_message: str) -> str:
"""Génère un titre cohérent à partir du premier message."""
# Nettoyage du texte
text = re.sub(r"[^a-zA-ZÀ-ÿ0-9\s]", "", first_message.lower())
words = text.strip().split()
keywords = [w for w in words if w not in STOP_WORDS]
if not keywords:
return "Nouvelle session"
# Prendre les 3-5 premiers mots clés pour le titre
title_words = keywords[:5]
# Capitaliser les mots
title = " ".join(w.capitalize() for w in title_words)
return title
def load_all_sessions():
"""Charge toutes les sessions depuis MongoDB existantes."""
client = MongoClient(MONGO_DATABASE_HOST)
db = client["douane_db"]
collection = db["chat_history"]
all_sessions = {}
for doc in collection.find():
session_id = doc.get("SessionId") or str(doc.get("_id"))
try:
raw_messages = json.loads(doc.get("History", "[]"))
if isinstance(raw_messages, dict):
raw_messages = [raw_messages]
messages = [normalize_message(m) for m in raw_messages if normalize_message(m)]
except Exception:
messages = []
if not messages:
continue
created_at = doc.get("created_at") or doc["_id"].generation_time
if created_at and created_at.tzinfo is None:
created_at = created_at.replace(tzinfo=timezone.utc)
all_sessions[session_id] = {
"title": doc.get("title", "Session sans titre"),
"history": messages,
"created_at": created_at
}
return dict(sorted(all_sessions.items(), key=lambda x: x[1]["created_at"], reverse=True))
def start_new_session(session_state: dict) -> str:
"""Crée un nouvel ID de session en mémoire, pas encore dans MongoDB."""
session_id = f"session_{uuid.uuid4()}"
session_state["session_id"] = session_id
session_state["sessions"][session_id] = {
"history": [],
"created_at": datetime.now(timezone.utc),
"title": "Nouvelle session"
}
return session_id
def add_message_to_session(session_id: str, message: dict):
"""
Ajoute un message dans MongoDB.
Si c’est le premier message de la session, crée la session et génère un titre.
"""
if "data" in message and "content" in message["data"]:
msg = {"type": message.get("type", "human"), "content": message["data"]["content"]}
elif "type" in message and "content" in message:
msg = {"type": message["type"], "content": message["content"]}
else:
return
client = MongoClient(MONGO_DATABASE_HOST)
db = client["douane_db"]
collection = db["chat_history"]
session = collection.find_one({"SessionId": session_id})
if session:
try:
history = json.loads(session.get("History", "[]"))
if isinstance(history, dict):
history = [history]
elif not isinstance(history, list):
history = []
except:
history = []
history.append(msg)
collection.update_one(
{"SessionId": session_id},
{"$set": {"History": json.dumps(history)}}
)
else:
title = generate_session_title(msg["content"])
collection.insert_one({
"SessionId": session_id,
"title": title,
"History": json.dumps([msg]),
"created_at": datetime.now(timezone.utc)
})
def rename_session(session_id: str, new_title: str):
"""Renommer manuellement une session."""
client = MongoClient(MONGO_DATABASE_HOST)
db = client["douane_db"]
collection = db["chat_history"]
collection.update_one({"SessionId": session_id}, {"$set": {"title": new_title}})
def update_session_title(session_id: str):
"""
Si la session n’a pas de titre ou a un titre générique,
on le met à jour avec le titre généré à partir du premier message.
"""
client = MongoClient(MONGO_DATABASE_HOST)
db = client["douane_db"]
collection = db["chat_history"]
session = collection.find_one({"SessionId": session_id})
if not session:
return
title = session.get("title", "")
if title.strip() in ["", "Nouvelle session", "Session sans titre"]:
try:
history = json.loads(session.get("History", "[]"))
if isinstance(history, dict):
history = [history]
except:
history = []
if history:
first_message = history[0].get("content", "")
if first_message:
new_title = generate_session_title(first_message)
collection.update_one(
{"SessionId": session_id},
{"$set": {"title": new_title}}
)
def get_messages_for_session(session_id: str):
"""Récupère les messages depuis MongoDB, vide si session pas encore créée."""
client = MongoClient(MONGO_DATABASE_HOST)
db = client["douane_db"]
collection = db["chat_history"]
doc = collection.find_one({"SessionId": session_id})
if not doc:
return []
try:
raw_history = json.loads(doc.get("History", "[]"))
if isinstance(raw_history, dict):
raw_history = [raw_history]
except:
raw_history = []
messages = [normalize_message(m) for m in raw_history if normalize_message(m)]
return messages
def get_session_history(session_id: str) -> MongoDBChatMessageHistory:
return MongoDBChatMessageHistory(
connection_string=MONGO_DATABASE_HOST,
session_id=session_id,
database_name="douane_db",
collection_name="chat_history"
)
|