Assistant-Web-Educatif / backend /services /question_handler.py
hamba-ho's picture
Socratic Mode, Comparative Reading, PDF OCR Alert
122e1ff
from sqlalchemy.orm import Session
from services.vector_store import VectorStore
from services.llm_service import generate_response, generate_quiz
from services.cache_manager import CacheManager
from models.document import Document
class QuestionHandler:
def __init__(self):
self.vector_store = VectorStore()
self.cache = CacheManager()
def get_answer(self, question: str, session_id: str, db: Session, socratic_mode: bool = False):
cache_key = f"{session_id}::{question}::{socratic_mode}"
cached_answer = self.cache.get(cache_key)
if cached_answer:
return {**cached_answer, "cached": True}
search_results = self.vector_store.find_similar_chunks(
question=question,
session_id=session_id,
n_results=5
)
if not search_results or not search_results.get("documents") or not search_results["documents"][0]:
return {
"question": question,
"answer": "Désolé, je n'ai trouvé aucune information pertinente dans les documents fournis. Assurez-vous qu'un document a bien été chargé pour cette session.",
"sources": [],
"cached": False
}
documents_content = search_results["documents"][0]
metadatas = search_results["metadatas"][0]
context_parts = []
for i, doc_text in enumerate(documents_content):
meta = metadatas[i]
filename = meta.get("filename", "Document inconnu")
page = meta.get("page", "?")
context_parts.append(f"--- SOURCE : {filename} (Page {page}) ---\nCONTENU : {doc_text}")
full_context = "\n\n".join(documents_content)
answer = generate_response(question, full_context, socratic_mode)
sources = []
doc_ids = {meta.get("document_id") for meta in metadatas if meta.get("document_id") is not None}
if doc_ids and db:
db_docs = db.query(Document).filter(Document.id.in_(doc_ids)).all()
db_titles = {doc.id: doc.file_name for doc in db_docs}
else:
db_titles = {}
seen_sources = set()
for meta in metadatas:
doc_id = meta.get("document_id")
final_title = db_titles.get(doc_id, meta.get("filename", "Document inconnu"))
page = meta.get("page")
key = (final_title, page)
if key not in seen_sources:
seen_sources.add(key)
sources.append({"document": final_title, "page": page, "doc_id": doc_id})
final_response = {
"question": question,
"answer": answer,
"sources": sources
}
self.cache.set(cache_key, final_response)
return {**final_response, "cached": False}
def get_quiz(self, session_id: str, db: Session):
import random
search_results = self.vector_store.find_similar_chunks(
question="concepts importants et points cles",
session_id=session_id,
n_results=30
)
if not search_results or not search_results.get("documents") or not search_results["documents"][0]:
return {"quiz": '{"questions": []}'}
documents_content = search_results["documents"][0]
random.shuffle(documents_content)
sampled_content = documents_content[:5]
full_context = "\n\n".join(sampled_content)
quiz_json = generate_quiz(full_context)
final_response = {"quiz": quiz_json}
return final_response