rag-evaluation-system / tests /test_retrieval.py
soft.engineer
init project
e71fabd
import pytest
import tempfile
from core.retrieval import BaseRAG, HierarchicalRAG, RAGManager
from core.index import VectorStore
from core.utils import Chunk, generate_id
class TestBaseRAG:
def test_retrieve(self):
"""Test Base-RAG retrieval"""
persist_dir = tempfile.mkdtemp()
vector_store = VectorStore(persist_directory=persist_dir)
# Add test documents
chunks = [
Chunk(
doc_id=generate_id(),
chunk_id=generate_id(),
content="This is a test document about emergency procedures",
metadata={"level1": "Clinical", "level2": "Emergency"}
)
]
vector_store.add_documents("test_collection", chunks)
rag = BaseRAG(vector_store, "test_collection")
result = rag.retrieve("emergency procedures", k=1)
assert result is not None
assert hasattr(result, "content")
assert hasattr(result, "sources")
assert hasattr(result, "latency")
assert len(result.sources) > 0
class TestHierarchicalRAG:
def test_build_filters(self):
"""Test hierarchical filter building"""
persist_dir = tempfile.mkdtemp()
vector_store = VectorStore(persist_directory=persist_dir)
rag = HierarchicalRAG(vector_store)
filters = rag._build_filters("Clinical", "Emergency", None, "Report")
assert filters["level1"] == "Clinical"
assert filters["level2"] == "Emergency"
assert filters["doc_type"] == "Report"
assert "level3" not in filters
def test_build_filters_none(self):
"""Test filter building with None values"""
persist_dir = tempfile.mkdtemp()
vector_store = VectorStore(persist_directory=persist_dir)
rag = HierarchicalRAG(vector_store)
filters = rag._build_filters(None, None, None, None)
assert filters is None
def test_retrieve_with_filters(self):
"""Test Hier-RAG retrieval with metadata filters"""
persist_dir = tempfile.mkdtemp()
vector_store = VectorStore(persist_directory=persist_dir)
# Add test documents
chunks = [
Chunk(
doc_id=generate_id(),
chunk_id=generate_id(),
content="Clinical emergency document",
metadata={"level1": "Clinical", "level2": "Emergency", "doc_type": "Report"}
),
Chunk(
doc_id=generate_id(),
chunk_id=generate_id(),
content="Administrative document",
metadata={"level1": "Administrative", "level2": "Billing", "doc_type": "Policy"}
)
]
vector_store.add_documents("test_collection", chunks)
rag = HierarchicalRAG(vector_store, "test_collection")
result = rag.retrieve(
"emergency",
k=1,
level1="Clinical",
level2="Emergency",
doc_type="Report"
)
assert result is not None
assert len(result.sources) > 0
class TestRAGManager:
def test_compare_retrieval(self):
"""Test comparison of Base-RAG vs Hier-RAG"""
persist_dir = tempfile.mkdtemp()
rag_manager = RAGManager(persist_directory=persist_dir)
# Add test documents
from core.index import VectorStore
vector_store = VectorStore(persist_directory=persist_dir)
chunks = [
Chunk(
doc_id=generate_id(),
chunk_id=generate_id(),
content="Test document about clinical procedures",
metadata={"level1": "Clinical", "level2": "Emergency"}
)
]
vector_store.add_documents("documents", chunks)
base_result, hier_result = rag_manager.compare_retrieval(
"clinical procedures",
k=1,
level1="Clinical"
)
assert base_result is not None
assert hier_result is not None
assert hasattr(base_result, "latency")
assert hasattr(hier_result, "latency")