File size: 3,602 Bytes
c2f46ba
 
46ea92c
c2f46ba
fab35c8
ee966d6
7826da3
 
 
c2f46ba
7826da3
122e1ff
 
fab35c8
83b4232
 
fab35c8
 
 
 
 
 
 
c2f46ba
fab35c8
c2f46ba
fab35c8
c2f46ba
 
 
 
ee966d6
 
fab35c8
ee966d6
 
 
 
 
 
 
122e1ff
 
 
c2f46ba
35b525b
ee966d6
fab35c8
 
 
 
 
 
 
ee966d6
c2f46ba
ee966d6
fab35c8
 
 
 
010c6c2
c2f46ba
83b4232
7826da3
 
35b525b
83b4232
fab35c8
46ea92c
 
 
327232f
 
46ea92c
327232f
46ea92c
327232f
46ea92c
 
 
 
 
 
327232f
 
 
46ea92c
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
from sqlalchemy.orm import Session
from services.vector_store import VectorStore
from services.llm_service import generate_response, generate_quiz
from services.cache_manager import CacheManager
from models.document import Document

class QuestionHandler:
    def __init__(self):
        self.vector_store = VectorStore()
        self.cache = CacheManager()

    def get_answer(self, question: str, session_id: str, db: Session, socratic_mode: bool = False):
        cache_key = f"{session_id}::{question}::{socratic_mode}"
        cached_answer = self.cache.get(cache_key)
        if cached_answer:
            return {**cached_answer, "cached": True}

        search_results = self.vector_store.find_similar_chunks(
            question=question,
            session_id=session_id,
            n_results=5
        )

        if not search_results or not search_results.get("documents") or not search_results["documents"][0]:
            return {
                "question": question,
                "answer": "Désolé, je n'ai trouvé aucune information pertinente dans les documents fournis. Assurez-vous qu'un document a bien été chargé pour cette session.",
                "sources": [],
                "cached": False
            }

        documents_content = search_results["documents"][0]
        metadatas = search_results["metadatas"][0]

        context_parts = []
        for i, doc_text in enumerate(documents_content):
            meta = metadatas[i]
            filename = meta.get("filename", "Document inconnu")
            page = meta.get("page", "?")
            context_parts.append(f"--- SOURCE : {filename} (Page {page}) ---\nCONTENU : {doc_text}")

        full_context = "\n\n".join(documents_content)
        
        answer = generate_response(question, full_context, socratic_mode)

        sources = []
        doc_ids = {meta.get("document_id") for meta in metadatas if meta.get("document_id") is not None}
        if doc_ids and db:
            db_docs = db.query(Document).filter(Document.id.in_(doc_ids)).all()
            db_titles = {doc.id: doc.file_name for doc in db_docs}
        else:
            db_titles = {}

        seen_sources = set()
        for meta in metadatas:
            doc_id = meta.get("document_id")
            final_title = db_titles.get(doc_id, meta.get("filename", "Document inconnu"))
            page = meta.get("page")
            key = (final_title, page)
            if key not in seen_sources:
                seen_sources.add(key)
                sources.append({"document": final_title, "page": page, "doc_id": doc_id})

        final_response = {
            "question": question,
            "answer": answer,
            "sources": sources
        }
        self.cache.set(cache_key, final_response)
        return {**final_response, "cached": False}

    def get_quiz(self, session_id: str, db: Session):
        import random
        
        search_results = self.vector_store.find_similar_chunks(
            question="concepts importants et points cles",
            session_id=session_id,
            n_results=30
        )

        if not search_results or not search_results.get("documents") or not search_results["documents"][0]:
            return {"quiz": '{"questions": []}'}

        documents_content = search_results["documents"][0]
        random.shuffle(documents_content)
        sampled_content = documents_content[:5]
        full_context = "\n\n".join(sampled_content)

        quiz_json = generate_quiz(full_context)
        
        final_response = {"quiz": quiz_json}
        return final_response