rag-evaluation-system / tests /test_index.py
soft.engineer
init project
e71fabd
import pytest
import tempfile
import os
from core.index import VectorStore
from core.utils import Chunk, generate_id
class TestVectorStore:
def test_create_collection(self):
"""Test collection creation"""
persist_dir = tempfile.mkdtemp()
vector_store = VectorStore(persist_directory=persist_dir)
collection = vector_store.create_collection("test_collection")
assert collection is not None
assert collection.name == "test_collection"
def test_embed_text(self):
"""Test text embedding generation"""
persist_dir = tempfile.mkdtemp()
vector_store = VectorStore(persist_directory=persist_dir)
text = "This is a test sentence"
embedding = vector_store.embed_text(text)
assert isinstance(embedding, list)
assert len(embedding) > 0
assert all(isinstance(x, float) for x in embedding)
def test_add_documents(self):
"""Test adding documents to vector store"""
persist_dir = tempfile.mkdtemp()
vector_store = VectorStore(persist_directory=persist_dir)
chunks = [
Chunk(
doc_id=generate_id(),
chunk_id=generate_id(),
content="Test document content 1",
metadata={"level1": "Clinical", "doc_type": "Report"}
),
Chunk(
doc_id=generate_id(),
chunk_id=generate_id(),
content="Test document content 2",
metadata={"level1": "Administrative", "doc_type": "Policy"}
)
]
vector_store.add_documents("test_collection", chunks)
stats = vector_store.get_collection_stats("test_collection")
assert stats["document_count"] == 2
def test_search_without_filters(self):
"""Test vector search without metadata filters"""
persist_dir = tempfile.mkdtemp()
vector_store = VectorStore(persist_directory=persist_dir)
# Add test documents
chunks = [
Chunk(
doc_id=generate_id(),
chunk_id=generate_id(),
content="Emergency procedures for cardiac arrest",
metadata={"level1": "Clinical", "level2": "Emergency"}
)
]
vector_store.add_documents("test_collection", chunks)
# Search
results = vector_store.search("test_collection", "cardiac emergency", k=1)
assert len(results) > 0
assert "content" in results[0]
assert "metadata" in results[0]
assert "score" in results[0]
def test_search_with_filters(self):
"""Test vector search with metadata filters"""
persist_dir = tempfile.mkdtemp()
vector_store = VectorStore(persist_directory=persist_dir)
# Add test documents with different metadata
chunks = [
Chunk(
doc_id=generate_id(),
chunk_id=generate_id(),
content="Clinical document",
metadata={"level1": "Clinical", "level2": "Emergency"}
),
Chunk(
doc_id=generate_id(),
chunk_id=generate_id(),
content="Administrative document",
metadata={"level1": "Administrative", "level2": "Billing"}
)
]
vector_store.add_documents("test_collection", chunks)
# Search with filter
filters = {"level1": "Clinical"}
results = vector_store.search("test_collection", "document", filters=filters, k=2)
assert len(results) > 0
# All results should match the filter
for result in results:
assert result["metadata"]["level1"] == "Clinical"
def test_get_collection_stats(self):
"""Test collection statistics retrieval"""
persist_dir = tempfile.mkdtemp()
vector_store = VectorStore(persist_directory=persist_dir)
stats = vector_store.get_collection_stats("new_collection")
assert "document_count" in stats
assert "collection_name" in stats
assert stats["collection_name"] == "new_collection"