Spaces:
Sleeping
Sleeping
| import pytest | |
| import tempfile | |
| from core.retrieval import BaseRAG, HierarchicalRAG, RAGManager | |
| from core.index import VectorStore | |
| from core.utils import Chunk, generate_id | |
| class TestBaseRAG: | |
| def test_retrieve(self): | |
| """Test Base-RAG retrieval""" | |
| persist_dir = tempfile.mkdtemp() | |
| vector_store = VectorStore(persist_directory=persist_dir) | |
| # Add test documents | |
| chunks = [ | |
| Chunk( | |
| doc_id=generate_id(), | |
| chunk_id=generate_id(), | |
| content="This is a test document about emergency procedures", | |
| metadata={"level1": "Clinical", "level2": "Emergency"} | |
| ) | |
| ] | |
| vector_store.add_documents("test_collection", chunks) | |
| rag = BaseRAG(vector_store, "test_collection") | |
| result = rag.retrieve("emergency procedures", k=1) | |
| assert result is not None | |
| assert hasattr(result, "content") | |
| assert hasattr(result, "sources") | |
| assert hasattr(result, "latency") | |
| assert len(result.sources) > 0 | |
| class TestHierarchicalRAG: | |
| def test_build_filters(self): | |
| """Test hierarchical filter building""" | |
| persist_dir = tempfile.mkdtemp() | |
| vector_store = VectorStore(persist_directory=persist_dir) | |
| rag = HierarchicalRAG(vector_store) | |
| filters = rag._build_filters("Clinical", "Emergency", None, "Report") | |
| assert filters["level1"] == "Clinical" | |
| assert filters["level2"] == "Emergency" | |
| assert filters["doc_type"] == "Report" | |
| assert "level3" not in filters | |
| def test_build_filters_none(self): | |
| """Test filter building with None values""" | |
| persist_dir = tempfile.mkdtemp() | |
| vector_store = VectorStore(persist_directory=persist_dir) | |
| rag = HierarchicalRAG(vector_store) | |
| filters = rag._build_filters(None, None, None, None) | |
| assert filters is None | |
| def test_retrieve_with_filters(self): | |
| """Test Hier-RAG retrieval with metadata filters""" | |
| persist_dir = tempfile.mkdtemp() | |
| vector_store = VectorStore(persist_directory=persist_dir) | |
| # Add test documents | |
| chunks = [ | |
| Chunk( | |
| doc_id=generate_id(), | |
| chunk_id=generate_id(), | |
| content="Clinical emergency document", | |
| metadata={"level1": "Clinical", "level2": "Emergency", "doc_type": "Report"} | |
| ), | |
| Chunk( | |
| doc_id=generate_id(), | |
| chunk_id=generate_id(), | |
| content="Administrative document", | |
| metadata={"level1": "Administrative", "level2": "Billing", "doc_type": "Policy"} | |
| ) | |
| ] | |
| vector_store.add_documents("test_collection", chunks) | |
| rag = HierarchicalRAG(vector_store, "test_collection") | |
| result = rag.retrieve( | |
| "emergency", | |
| k=1, | |
| level1="Clinical", | |
| level2="Emergency", | |
| doc_type="Report" | |
| ) | |
| assert result is not None | |
| assert len(result.sources) > 0 | |
| class TestRAGManager: | |
| def test_compare_retrieval(self): | |
| """Test comparison of Base-RAG vs Hier-RAG""" | |
| persist_dir = tempfile.mkdtemp() | |
| rag_manager = RAGManager(persist_directory=persist_dir) | |
| # Add test documents | |
| from core.index import VectorStore | |
| vector_store = VectorStore(persist_directory=persist_dir) | |
| chunks = [ | |
| Chunk( | |
| doc_id=generate_id(), | |
| chunk_id=generate_id(), | |
| content="Test document about clinical procedures", | |
| metadata={"level1": "Clinical", "level2": "Emergency"} | |
| ) | |
| ] | |
| vector_store.add_documents("documents", chunks) | |
| base_result, hier_result = rag_manager.compare_retrieval( | |
| "clinical procedures", | |
| k=1, | |
| level1="Clinical" | |
| ) | |
| assert base_result is not None | |
| assert hier_result is not None | |
| assert hasattr(base_result, "latency") | |
| assert hasattr(hier_result, "latency") |