Spaces:
Sleeping
Sleeping
| """Tests for vector indexing.""" | |
| import pytest | |
| import tempfile | |
| import shutil | |
| from pathlib import Path | |
| from core.index import EmbeddingModel, VectorStore, IndexManager | |
| def test_embedding_model(): | |
| """Test embedding model initialization and encoding.""" | |
| model = EmbeddingModel() | |
| texts = ["This is a test.", "Another test sentence."] | |
| embeddings = model.embed_texts(texts) | |
| assert embeddings.shape[0] == 2 | |
| assert embeddings.shape[1] == model.embedding_dim | |
| def test_vector_store(): | |
| """Test vector store operations.""" | |
| with tempfile.TemporaryDirectory() as temp_dir: | |
| store = VectorStore( | |
| collection_name="test_collection", | |
| persist_directory=temp_dir | |
| ) | |
| # Add documents | |
| chunks = [ | |
| { | |
| "text": "Test document one", | |
| "metadata": {"chunk_id": "test_1", "level1": "domain1"} | |
| }, | |
| { | |
| "text": "Test document two", | |
| "metadata": {"chunk_id": "test_2", "level1": "domain2"} | |
| } | |
| ] | |
| num_added = store.add_documents(chunks) | |
| assert num_added == 2 | |
| # Search | |
| results = store.search("test document", n_results=2) | |
| assert len(results) == 2 | |
| def test_hierarchical_search(): | |
| """Test hierarchical filtering in search.""" | |
| with tempfile.TemporaryDirectory() as temp_dir: | |
| store = VectorStore(collection_name="test_hier", persist_directory=temp_dir) | |
| chunks = [ | |
| { | |
| "text": "Clinical care document", | |
| "metadata": { | |
| "chunk_id": "c1", | |
| "level1": "Clinical Care", | |
| "level2": "Patient Records" | |
| } | |
| }, | |
| { | |
| "text": "Administrative document", | |
| "metadata": { | |
| "chunk_id": "c2", | |
| "level1": "Administrative", | |
| "level2": "Policies" | |
| } | |
| } | |
| ] | |
| store.add_documents(chunks) | |
| # Search with filter | |
| results = store.search_with_hierarchy( | |
| query="document", | |
| n_results=5, | |
| level1="Clinical Care" | |
| ) | |
| assert len(results) >= 1 | |
| assert results[0]["metadata"]["level1"] == "Clinical Care" | |
| def test_index_manager(): | |
| """Test index manager operations.""" | |
| with tempfile.TemporaryDirectory() as temp_dir: | |
| manager = IndexManager(persist_directory=temp_dir) | |
| chunks = [ | |
| { | |
| "text": "Sample text", | |
| "metadata": {"chunk_id": "s1", "level1": "test"} | |
| } | |
| ] | |
| stats = manager.index_documents(chunks, "test_collection") | |
| assert stats["chunks_added"] == 1 | |
| assert "test_collection" in manager.list_collections() |