import pytest import tempfile from core.retrieval import BaseRAG, HierarchicalRAG, RAGManager from core.index import VectorStore from core.utils import Chunk, generate_id class TestBaseRAG: def test_retrieve(self): """Test Base-RAG retrieval""" persist_dir = tempfile.mkdtemp() vector_store = VectorStore(persist_directory=persist_dir) # Add test documents chunks = [ Chunk( doc_id=generate_id(), chunk_id=generate_id(), content="This is a test document about emergency procedures", metadata={"level1": "Clinical", "level2": "Emergency"} ) ] vector_store.add_documents("test_collection", chunks) rag = BaseRAG(vector_store, "test_collection") result = rag.retrieve("emergency procedures", k=1) assert result is not None assert hasattr(result, "content") assert hasattr(result, "sources") assert hasattr(result, "latency") assert len(result.sources) > 0 class TestHierarchicalRAG: def test_build_filters(self): """Test hierarchical filter building""" persist_dir = tempfile.mkdtemp() vector_store = VectorStore(persist_directory=persist_dir) rag = HierarchicalRAG(vector_store) filters = rag._build_filters("Clinical", "Emergency", None, "Report") assert filters["level1"] == "Clinical" assert filters["level2"] == "Emergency" assert filters["doc_type"] == "Report" assert "level3" not in filters def test_build_filters_none(self): """Test filter building with None values""" persist_dir = tempfile.mkdtemp() vector_store = VectorStore(persist_directory=persist_dir) rag = HierarchicalRAG(vector_store) filters = rag._build_filters(None, None, None, None) assert filters is None def test_retrieve_with_filters(self): """Test Hier-RAG retrieval with metadata filters""" persist_dir = tempfile.mkdtemp() vector_store = VectorStore(persist_directory=persist_dir) # Add test documents chunks = [ Chunk( doc_id=generate_id(), chunk_id=generate_id(), content="Clinical emergency document", metadata={"level1": "Clinical", "level2": "Emergency", "doc_type": "Report"} ), Chunk( doc_id=generate_id(), chunk_id=generate_id(), content="Administrative document", metadata={"level1": "Administrative", "level2": "Billing", "doc_type": "Policy"} ) ] vector_store.add_documents("test_collection", chunks) rag = HierarchicalRAG(vector_store, "test_collection") result = rag.retrieve( "emergency", k=1, level1="Clinical", level2="Emergency", doc_type="Report" ) assert result is not None assert len(result.sources) > 0 class TestRAGManager: def test_compare_retrieval(self): """Test comparison of Base-RAG vs Hier-RAG""" persist_dir = tempfile.mkdtemp() rag_manager = RAGManager(persist_directory=persist_dir) # Add test documents from core.index import VectorStore vector_store = VectorStore(persist_directory=persist_dir) chunks = [ Chunk( doc_id=generate_id(), chunk_id=generate_id(), content="Test document about clinical procedures", metadata={"level1": "Clinical", "level2": "Emergency"} ) ] vector_store.add_documents("documents", chunks) base_result, hier_result = rag_manager.compare_retrieval( "clinical procedures", k=1, level1="Clinical" ) assert base_result is not None assert hier_result is not None assert hasattr(base_result, "latency") assert hasattr(hier_result, "latency")