File size: 1,456 Bytes
de84956
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
"""
BƯỚC 5: RETRIEVER
-----------------
Tạo LangChain Retriever từ FAISS VectorStore.

Retriever sẽ dùng trong bước RAG sau này:
- retriever.get_relevant_documents(query)
"""

from langchain_community.vectorstores import FAISS

# số chunk sẽ lấy cho mỗi câu hỏi
RETRIEVER_K = 4

def get_retriever(vectorstore: FAISS, k: int = RETRIEVER_K):
    """
    Tạo retriever từ FAISS VectorStore.
    """
    print(f">>> Creating retriever with k={k} ...")
    retriever = vectorstore.as_retriever(search_kwargs={"k": k})
    print(">>> Retriever ready.\n")
    return retriever

if __name__ == "__main__":
    # Test: load -> split -> FAISS -> retriever.get_relevant_documents()
    from load_documents import load_documents
    from split_documents import split_documents
    from vectorstore import build_vectorstore

    print("=== TEST: retriever.get_relevant_documents ===\n")

    docs = load_documents()
    chunks = split_documents(docs)
    vs = build_vectorstore(chunks)
    retriever = get_retriever(vs, k=4)

    query = "Wie lange habe ich Zeit, eine Prüfungsleistung zu wiederholen?"
    print("Test query:")
    print(" ", query, "\n")

    retrieved_docs = retriever.invoke(query)

    print(f"Retriever returned {len(retrieved_docs)} documents.")
    for i, d in enumerate(retrieved_docs, start=1):
        print(f"\n=== DOC {i} ===")
        print(d.page_content[:400], "...")
        print("Metadata:", d.metadata)