""" BƯỚC 5: RETRIEVER ----------------- Tạo LangChain Retriever từ FAISS VectorStore. Retriever sẽ dùng trong bước RAG sau này: - retriever.get_relevant_documents(query) """ from langchain_community.vectorstores import FAISS # số chunk sẽ lấy cho mỗi câu hỏi RETRIEVER_K = 4 def get_retriever(vectorstore: FAISS, k: int = RETRIEVER_K): """ Tạo retriever từ FAISS VectorStore. """ print(f">>> Creating retriever with k={k} ...") retriever = vectorstore.as_retriever(search_kwargs={"k": k}) print(">>> Retriever ready.\n") return retriever if __name__ == "__main__": # Test: load -> split -> FAISS -> retriever.get_relevant_documents() from load_documents import load_documents from split_documents import split_documents from vectorstore import build_vectorstore print("=== TEST: retriever.get_relevant_documents ===\n") docs = load_documents() chunks = split_documents(docs) vs = build_vectorstore(chunks) retriever = get_retriever(vs, k=4) query = "Wie lange habe ich Zeit, eine Prüfungsleistung zu wiederholen?" print("Test query:") print(" ", query, "\n") retrieved_docs = retriever.invoke(query) print(f"Retriever returned {len(retrieved_docs)} documents.") for i, d in enumerate(retrieved_docs, start=1): print(f"\n=== DOC {i} ===") print(d.page_content[:400], "...") print("Metadata:", d.metadata)