from src.internal.rag import LangChainRetriever async def test_document_retriever(): print(" ===== Testing document retriever ==== ") """Example usage of LangChainRetriever""" # Initialize retriever retriever = LangChainRetriever( embedding_model="text-embedding-3-small", vectorstore_type="chroma", vectorstore_path="./my_vectorstore", use_hybrid_search=True, chunk_size=1000, chunk_overlap=200 ) # Add documents from files file_paths = [ "../documents/file.pdf", ] for file_path in file_paths: result = await retriever.add_document_from_file(file_path) if result.success: print(f"Successfully processed: {result.document_metadata.file_name}") print(f"Chunks created: {result.document_metadata.chunk_count}") else: print(f"Failed to process: {result.error_message}") # Query documents query = "Recurrent neural network (RNN) is" result = await retriever.retrieve(query, k=5) print(f"\nQuery: {result.query}") print(f"Found {len(result.documents)} relevant documents") print(f"Retrieval time: {result.retrieval_time:.2f}s") for i, doc in enumerate(result.documents): print(f"\nDocument {i+1}:") print(f"Score: {result.scores[i]:.3f}") print(f"Content: {doc.page_content[:200]}...") print(f"Metadata: {doc.metadata}") print(" ===== Testing document retriever DONE ==== ")