|
|
""" |
|
|
BƯỚC 5: RETRIEVER |
|
|
----------------- |
|
|
Tạo LangChain Retriever từ FAISS VectorStore. |
|
|
|
|
|
Retriever sẽ dùng trong bước RAG sau này: |
|
|
- retriever.get_relevant_documents(query) |
|
|
""" |
|
|
|
|
|
from langchain_community.vectorstores import FAISS |
|
|
|
|
|
|
|
|
RETRIEVER_K = 4 |
|
|
|
|
|
def get_retriever(vectorstore: FAISS, k: int = RETRIEVER_K): |
|
|
""" |
|
|
Tạo retriever từ FAISS VectorStore. |
|
|
""" |
|
|
print(f">>> Creating retriever with k={k} ...") |
|
|
retriever = vectorstore.as_retriever(search_kwargs={"k": k}) |
|
|
print(">>> Retriever ready.\n") |
|
|
return retriever |
|
|
|
|
|
if __name__ == "__main__": |
|
|
|
|
|
from load_documents import load_documents |
|
|
from split_documents import split_documents |
|
|
from vectorstore import build_vectorstore |
|
|
|
|
|
print("=== TEST: retriever.get_relevant_documents ===\n") |
|
|
|
|
|
docs = load_documents() |
|
|
chunks = split_documents(docs) |
|
|
vs = build_vectorstore(chunks) |
|
|
retriever = get_retriever(vs, k=4) |
|
|
|
|
|
query = "Wie lange habe ich Zeit, eine Prüfungsleistung zu wiederholen?" |
|
|
print("Test query:") |
|
|
print(" ", query, "\n") |
|
|
|
|
|
retrieved_docs = retriever.invoke(query) |
|
|
|
|
|
print(f"Retriever returned {len(retrieved_docs)} documents.") |
|
|
for i, d in enumerate(retrieved_docs, start=1): |
|
|
print(f"\n=== DOC {i} ===") |
|
|
print(d.page_content[:400], "...") |
|
|
print("Metadata:", d.metadata) |
|
|
|