chatbot / retriever.py
Nguyen5's picture
commit
de84956
"""
BƯỚC 5: RETRIEVER
-----------------
Tạo LangChain Retriever từ FAISS VectorStore.
Retriever sẽ dùng trong bước RAG sau này:
- retriever.get_relevant_documents(query)
"""
from langchain_community.vectorstores import FAISS
# số chunk sẽ lấy cho mỗi câu hỏi
RETRIEVER_K = 4
def get_retriever(vectorstore: FAISS, k: int = RETRIEVER_K):
"""
Tạo retriever từ FAISS VectorStore.
"""
print(f">>> Creating retriever with k={k} ...")
retriever = vectorstore.as_retriever(search_kwargs={"k": k})
print(">>> Retriever ready.\n")
return retriever
if __name__ == "__main__":
# Test: load -> split -> FAISS -> retriever.get_relevant_documents()
from load_documents import load_documents
from split_documents import split_documents
from vectorstore import build_vectorstore
print("=== TEST: retriever.get_relevant_documents ===\n")
docs = load_documents()
chunks = split_documents(docs)
vs = build_vectorstore(chunks)
retriever = get_retriever(vs, k=4)
query = "Wie lange habe ich Zeit, eine Prüfungsleistung zu wiederholen?"
print("Test query:")
print(" ", query, "\n")
retrieved_docs = retriever.invoke(query)
print(f"Retriever returned {len(retrieved_docs)} documents.")
for i, d in enumerate(retrieved_docs, start=1):
print(f"\n=== DOC {i} ===")
print(d.page_content[:400], "...")
print("Metadata:", d.metadata)