Spaces:

hamba-ho
/

Assistant-Web-Educatif

Sleeping

App Files Files Community

Assistant-Web-Educatif / backend /services /question_handler.py

hamba-ho

Socratic Mode, Comparative Reading, PDF OCR Alert

122e1ff 24 days ago

raw

history blame contribute delete

3.6 kB

	from sqlalchemy.orm import Session
	from services.vector_store import VectorStore
	from services.llm_service import generate_response, generate_quiz
	from services.cache_manager import CacheManager
	from models.document import Document

	class QuestionHandler:
	def __init__(self):
	self.vector_store = VectorStore()
	self.cache = CacheManager()

	def get_answer(self, question: str, session_id: str, db: Session, socratic_mode: bool = False):
	cache_key = f"{session_id}::{question}::{socratic_mode}"
	cached_answer = self.cache.get(cache_key)
	if cached_answer:
	return {**cached_answer, "cached": True}

	search_results = self.vector_store.find_similar_chunks(
	question=question,
	session_id=session_id,
	n_results=5
	)

	if not search_results or not search_results.get("documents") or not search_results["documents"][0]:
	return {
	"question": question,
	"answer": "Désolé, je n'ai trouvé aucune information pertinente dans les documents fournis. Assurez-vous qu'un document a bien été chargé pour cette session.",
	"sources": [],
	"cached": False
	}

	documents_content = search_results["documents"][0]
	metadatas = search_results["metadatas"][0]

	context_parts = []
	for i, doc_text in enumerate(documents_content):
	meta = metadatas[i]
	filename = meta.get("filename", "Document inconnu")
	page = meta.get("page", "?")
	context_parts.append(f"--- SOURCE : {filename} (Page {page}) ---\nCONTENU : {doc_text}")

	full_context = "\n\n".join(documents_content)

	answer = generate_response(question, full_context, socratic_mode)

	sources = []
	doc_ids = {meta.get("document_id") for meta in metadatas if meta.get("document_id") is not None}
	if doc_ids and db:
	db_docs = db.query(Document).filter(Document.id.in_(doc_ids)).all()
	db_titles = {doc.id: doc.file_name for doc in db_docs}
	else:
	db_titles = {}

	seen_sources = set()
	for meta in metadatas:
	doc_id = meta.get("document_id")
	final_title = db_titles.get(doc_id, meta.get("filename", "Document inconnu"))
	page = meta.get("page")
	key = (final_title, page)
	if key not in seen_sources:
	seen_sources.add(key)
	sources.append({"document": final_title, "page": page, "doc_id": doc_id})

	final_response = {
	"question": question,
	"answer": answer,
	"sources": sources
	}
	self.cache.set(cache_key, final_response)
	return {**final_response, "cached": False}

	def get_quiz(self, session_id: str, db: Session):
	import random

	search_results = self.vector_store.find_similar_chunks(
	question="concepts importants et points cles",
	session_id=session_id,
	n_results=30
	)

	if not search_results or not search_results.get("documents") or not search_results["documents"][0]:
	return {"quiz": '{"questions": []}'}

	documents_content = search_results["documents"][0]
	random.shuffle(documents_content)
	sampled_content = documents_content[:5]
	full_context = "\n\n".join(sampled_content)

	quiz_json = generate_quiz(full_context)

	final_response = {"quiz": quiz_json}
	return final_response