hierarchical-rag-eval / tests /test_index.py
hh786's picture
Deployment of Hierarchical RAG system
c54dcef
"""Tests for vector indexing."""
import pytest
import tempfile
import shutil
from pathlib import Path
from core.index import EmbeddingModel, VectorStore, IndexManager
def test_embedding_model():
"""Test embedding model initialization and encoding."""
model = EmbeddingModel()
texts = ["This is a test.", "Another test sentence."]
embeddings = model.embed_texts(texts)
assert embeddings.shape[0] == 2
assert embeddings.shape[1] == model.embedding_dim
def test_vector_store():
"""Test vector store operations."""
with tempfile.TemporaryDirectory() as temp_dir:
store = VectorStore(
collection_name="test_collection",
persist_directory=temp_dir
)
# Add documents
chunks = [
{
"text": "Test document one",
"metadata": {"chunk_id": "test_1", "level1": "domain1"}
},
{
"text": "Test document two",
"metadata": {"chunk_id": "test_2", "level1": "domain2"}
}
]
num_added = store.add_documents(chunks)
assert num_added == 2
# Search
results = store.search("test document", n_results=2)
assert len(results) == 2
def test_hierarchical_search():
"""Test hierarchical filtering in search."""
with tempfile.TemporaryDirectory() as temp_dir:
store = VectorStore(collection_name="test_hier", persist_directory=temp_dir)
chunks = [
{
"text": "Clinical care document",
"metadata": {
"chunk_id": "c1",
"level1": "Clinical Care",
"level2": "Patient Records"
}
},
{
"text": "Administrative document",
"metadata": {
"chunk_id": "c2",
"level1": "Administrative",
"level2": "Policies"
}
}
]
store.add_documents(chunks)
# Search with filter
results = store.search_with_hierarchy(
query="document",
n_results=5,
level1="Clinical Care"
)
assert len(results) >= 1
assert results[0]["metadata"]["level1"] == "Clinical Care"
def test_index_manager():
"""Test index manager operations."""
with tempfile.TemporaryDirectory() as temp_dir:
manager = IndexManager(persist_directory=temp_dir)
chunks = [
{
"text": "Sample text",
"metadata": {"chunk_id": "s1", "level1": "test"}
}
]
stats = manager.index_documents(chunks, "test_collection")
assert stats["chunks_added"] == 1
assert "test_collection" in manager.list_collections()