cs-ai-sakura-dev / src /tests /document_retriever_test.py
lifedebugger's picture
Deploy files from GitHub repository
05269f9
from src.internal.rag import LangChainRetriever
async def test_document_retriever():
print(" ===== Testing document retriever ==== ")
"""Example usage of LangChainRetriever"""
# Initialize retriever
retriever = LangChainRetriever(
embedding_model="text-embedding-3-small",
vectorstore_type="chroma",
vectorstore_path="./my_vectorstore",
use_hybrid_search=True,
chunk_size=1000,
chunk_overlap=200
)
# Add documents from files
file_paths = [
"../documents/file.pdf",
]
for file_path in file_paths:
result = await retriever.add_document_from_file(file_path)
if result.success:
print(f"Successfully processed: {result.document_metadata.file_name}")
print(f"Chunks created: {result.document_metadata.chunk_count}")
else:
print(f"Failed to process: {result.error_message}")
# Query documents
query = "Recurrent neural network (RNN) is"
result = await retriever.retrieve(query, k=5)
print(f"\nQuery: {result.query}")
print(f"Found {len(result.documents)} relevant documents")
print(f"Retrieval time: {result.retrieval_time:.2f}s")
for i, doc in enumerate(result.documents):
print(f"\nDocument {i+1}:")
print(f"Score: {result.scores[i]:.3f}")
print(f"Content: {doc.page_content[:200]}...")
print(f"Metadata: {doc.metadata}")
print(" ===== Testing document retriever DONE ==== ")