Ragcore / tests /test_retrieval.py
NinjainPJs's picture
Initial deploy: RagCore RAG system with hybrid search and Gradio UI
a34068e
from app.core.retriever import HybridRetriever
def test_rrf_fusion_basic():
dense = [
{"chunk_id": "a", "text": "doc a", "score": 0.9, "metadata": {}},
{"chunk_id": "b", "text": "doc b", "score": 0.8, "metadata": {}},
]
sparse = [
{"chunk_id": "b", "text": "doc b", "score": 5.0, "metadata": {}},
{"chunk_id": "c", "text": "doc c", "score": 4.0, "metadata": {}},
]
fused = HybridRetriever.rrf_fuse([dense, sparse])
ids = [item["chunk_id"] for item in fused]
# "b" appears in both lists so should rank highest
assert ids[0] == "b"
assert len(fused) == 3
def test_rrf_fusion_empty():
fused = HybridRetriever.rrf_fuse([[], []])
assert fused == []
def test_rrf_fusion_single_list():
results = [
{"chunk_id": "x", "text": "x", "score": 1.0, "metadata": {}},
]
fused = HybridRetriever.rrf_fuse([results])
assert len(fused) == 1
assert fused[0]["chunk_id"] == "x"
def test_rrf_fusion_with_weights():
dense = [
{"chunk_id": "a", "text": "a", "score": 0.9, "metadata": {}},
]
sparse = [
{"chunk_id": "b", "text": "b", "score": 5.0, "metadata": {}},
]
fused = HybridRetriever.rrf_fuse([dense, sparse], weights=[1.0, 0.0])
# With weight 0 on sparse, only dense matters
assert fused[0]["chunk_id"] == "a"
def test_apply_filters():
results = [
{"chunk_id": "1", "text": "t", "score": 1, "metadata": {"doc_type": "pdf", "source": "a.pdf", "tags": []}},
{"chunk_id": "2", "text": "t", "score": 1, "metadata": {"doc_type": "html", "source": "b.html", "tags": []}},
]
from app.models.schemas import SearchFilters
filters = SearchFilters(doc_type="pdf")
filtered = HybridRetriever._apply_filters(results, filters)
assert len(filtered) == 1
assert filtered[0]["chunk_id"] == "1"