Spaces:
Configuration error
Configuration error
File size: 1,514 Bytes
05269f9 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 | from src.internal.rag import LangChainRetriever
async def test_document_retriever():
print(" ===== Testing document retriever ==== ")
"""Example usage of LangChainRetriever"""
# Initialize retriever
retriever = LangChainRetriever(
embedding_model="text-embedding-3-small",
vectorstore_type="chroma",
vectorstore_path="./my_vectorstore",
use_hybrid_search=True,
chunk_size=1000,
chunk_overlap=200
)
# Add documents from files
file_paths = [
"../documents/file.pdf",
]
for file_path in file_paths:
result = await retriever.add_document_from_file(file_path)
if result.success:
print(f"Successfully processed: {result.document_metadata.file_name}")
print(f"Chunks created: {result.document_metadata.chunk_count}")
else:
print(f"Failed to process: {result.error_message}")
# Query documents
query = "Recurrent neural network (RNN) is"
result = await retriever.retrieve(query, k=5)
print(f"\nQuery: {result.query}")
print(f"Found {len(result.documents)} relevant documents")
print(f"Retrieval time: {result.retrieval_time:.2f}s")
for i, doc in enumerate(result.documents):
print(f"\nDocument {i+1}:")
print(f"Score: {result.scores[i]:.3f}")
print(f"Content: {doc.page_content[:200]}...")
print(f"Metadata: {doc.metadata}")
print(" ===== Testing document retriever DONE ==== ") |