Spaces:
Sleeping
Sleeping
| import pytest | |
| import tempfile | |
| import os | |
| from core.index import VectorStore | |
| from core.utils import Chunk, generate_id | |
| class TestVectorStore: | |
| def test_create_collection(self): | |
| """Test collection creation""" | |
| persist_dir = tempfile.mkdtemp() | |
| vector_store = VectorStore(persist_directory=persist_dir) | |
| collection = vector_store.create_collection("test_collection") | |
| assert collection is not None | |
| assert collection.name == "test_collection" | |
| def test_embed_text(self): | |
| """Test text embedding generation""" | |
| persist_dir = tempfile.mkdtemp() | |
| vector_store = VectorStore(persist_directory=persist_dir) | |
| text = "This is a test sentence" | |
| embedding = vector_store.embed_text(text) | |
| assert isinstance(embedding, list) | |
| assert len(embedding) > 0 | |
| assert all(isinstance(x, float) for x in embedding) | |
| def test_add_documents(self): | |
| """Test adding documents to vector store""" | |
| persist_dir = tempfile.mkdtemp() | |
| vector_store = VectorStore(persist_directory=persist_dir) | |
| chunks = [ | |
| Chunk( | |
| doc_id=generate_id(), | |
| chunk_id=generate_id(), | |
| content="Test document content 1", | |
| metadata={"level1": "Clinical", "doc_type": "Report"} | |
| ), | |
| Chunk( | |
| doc_id=generate_id(), | |
| chunk_id=generate_id(), | |
| content="Test document content 2", | |
| metadata={"level1": "Administrative", "doc_type": "Policy"} | |
| ) | |
| ] | |
| vector_store.add_documents("test_collection", chunks) | |
| stats = vector_store.get_collection_stats("test_collection") | |
| assert stats["document_count"] == 2 | |
| def test_search_without_filters(self): | |
| """Test vector search without metadata filters""" | |
| persist_dir = tempfile.mkdtemp() | |
| vector_store = VectorStore(persist_directory=persist_dir) | |
| # Add test documents | |
| chunks = [ | |
| Chunk( | |
| doc_id=generate_id(), | |
| chunk_id=generate_id(), | |
| content="Emergency procedures for cardiac arrest", | |
| metadata={"level1": "Clinical", "level2": "Emergency"} | |
| ) | |
| ] | |
| vector_store.add_documents("test_collection", chunks) | |
| # Search | |
| results = vector_store.search("test_collection", "cardiac emergency", k=1) | |
| assert len(results) > 0 | |
| assert "content" in results[0] | |
| assert "metadata" in results[0] | |
| assert "score" in results[0] | |
| def test_search_with_filters(self): | |
| """Test vector search with metadata filters""" | |
| persist_dir = tempfile.mkdtemp() | |
| vector_store = VectorStore(persist_directory=persist_dir) | |
| # Add test documents with different metadata | |
| chunks = [ | |
| Chunk( | |
| doc_id=generate_id(), | |
| chunk_id=generate_id(), | |
| content="Clinical document", | |
| metadata={"level1": "Clinical", "level2": "Emergency"} | |
| ), | |
| Chunk( | |
| doc_id=generate_id(), | |
| chunk_id=generate_id(), | |
| content="Administrative document", | |
| metadata={"level1": "Administrative", "level2": "Billing"} | |
| ) | |
| ] | |
| vector_store.add_documents("test_collection", chunks) | |
| # Search with filter | |
| filters = {"level1": "Clinical"} | |
| results = vector_store.search("test_collection", "document", filters=filters, k=2) | |
| assert len(results) > 0 | |
| # All results should match the filter | |
| for result in results: | |
| assert result["metadata"]["level1"] == "Clinical" | |
| def test_get_collection_stats(self): | |
| """Test collection statistics retrieval""" | |
| persist_dir = tempfile.mkdtemp() | |
| vector_store = VectorStore(persist_directory=persist_dir) | |
| stats = vector_store.get_collection_stats("new_collection") | |
| assert "document_count" in stats | |
| assert "collection_name" in stats | |
| assert stats["collection_name"] == "new_collection" | |