Spaces:
Running
Running
| """ | |
| tests/test_search.py — C1.3 | |
| Tests for SearchIndex, HybridSearch, and synonym expansion. | |
| """ | |
| import sys | |
| import os | |
| import pytest | |
| sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "src")) | |
| # --------------------------------------------------------------------------- | |
| # SearchIndex unit tests | |
| # --------------------------------------------------------------------------- | |
| class TestSearchIndex: | |
| def _make_obs(self, obs_id, title, narrative="", concepts=None, obs_type="other"): | |
| return { | |
| "id": obs_id, | |
| "sessionId": "sess_test", | |
| "title": title, | |
| "narrative": narrative, | |
| "concepts": concepts or [], | |
| "files": [], | |
| "type": obs_type, | |
| } | |
| def test_add_and_exact_match_returns_rank_one(self): | |
| from search import SearchIndex | |
| idx = SearchIndex() | |
| obs = self._make_obs("obs_001", "authentication middleware refactor") | |
| idx.add(obs) | |
| results = idx.search("authentication middleware") | |
| assert len(results) > 0 | |
| assert results[0]["obsId"] == "obs_001" | |
| def test_prefix_matching(self): | |
| from search import SearchIndex | |
| idx = SearchIndex() | |
| obs = self._make_obs("obs_002", "authentication token validation") | |
| idx.add(obs) | |
| results = idx.search("authen") | |
| assert any(r["obsId"] == "obs_002" for r in results) | |
| def test_synonym_expansion_db_conn(self): | |
| """'db conn' should find document indexed with 'database connection'.""" | |
| from search import SearchIndex | |
| idx = SearchIndex() | |
| obs = self._make_obs("obs_003", "database connection pooling setup", "configure database connection pool") | |
| idx.add(obs) | |
| results = idx.search("db conn") | |
| assert any(r["obsId"] == "obs_003" for r in results) | |
| def test_remove_document(self): | |
| from search import SearchIndex | |
| idx = SearchIndex() | |
| obs = self._make_obs("obs_004", "deploy kubernetes service mesh") | |
| idx.add(obs) | |
| idx.remove("obs_004") | |
| results = idx.search("kubernetes") | |
| assert not any(r["obsId"] == "obs_004" for r in results) | |
| def test_empty_index_returns_empty(self): | |
| from search import SearchIndex | |
| idx = SearchIndex() | |
| results = idx.search("anything") | |
| assert results == [] | |
| def test_multiple_docs_rank_order(self): | |
| from search import SearchIndex | |
| idx = SearchIndex() | |
| idx.add(self._make_obs("obs_a", "authentication login system", "user authentication flow")) | |
| idx.add(self._make_obs("obs_b", "database migration script", "run database migration")) | |
| idx.add(self._make_obs("obs_c", "deployment pipeline CI", "CI CD pipeline deployment")) | |
| results = idx.search("authentication") | |
| assert results[0]["obsId"] == "obs_a" | |
| def test_size_property(self): | |
| from search import SearchIndex | |
| idx = SearchIndex() | |
| assert idx.size == 0 | |
| idx.add(self._make_obs("x1", "title one")) | |
| idx.add(self._make_obs("x2", "title two")) | |
| assert idx.size == 2 | |
| idx.remove("x1") | |
| assert idx.size == 1 | |
| def test_clear(self): | |
| from search import SearchIndex | |
| idx = SearchIndex() | |
| idx.add(self._make_obs("x1", "something")) | |
| idx.clear() | |
| assert idx.size == 0 | |
| assert idx.search("something") == [] | |
| def test_dirty_flag_set_on_add(self): | |
| from search import SearchIndex | |
| idx = SearchIndex() | |
| assert idx._dirty is False | |
| idx.add(self._make_obs("x1", "test dirty flag")) | |
| assert idx._dirty is True | |
| def test_dirty_flag_set_on_remove(self): | |
| from search import SearchIndex | |
| idx = SearchIndex() | |
| idx.add(self._make_obs("x1", "test remove dirty")) | |
| idx._dirty = False # reset manually | |
| idx.remove("x1") | |
| assert idx._dirty is True | |
| def test_dirty_flag_reset_after_restore(self): | |
| from search import SearchIndex | |
| idx = SearchIndex() | |
| idx.add(self._make_obs("x1", "test restore")) | |
| data = idx.serialize_data() | |
| idx2 = SearchIndex() | |
| idx2.restore_from_data(data) | |
| assert idx2._dirty is False | |
| def test_has_method(self): | |
| from search import SearchIndex | |
| idx = SearchIndex() | |
| obs = self._make_obs("obs_has", "has method test") | |
| assert not idx.has("obs_has") | |
| idx.add(obs) | |
| assert idx.has("obs_has") | |
| idx.remove("obs_has") | |
| assert not idx.has("obs_has") | |
| # --------------------------------------------------------------------------- | |
| # VectorIndex unit tests | |
| # --------------------------------------------------------------------------- | |
| class TestVectorIndex: | |
| def test_dirty_flag_on_add(self): | |
| from search import VectorIndex | |
| vi = VectorIndex() | |
| assert vi._dirty is False | |
| vi.add("v1", "sess", [0.1, 0.2, 0.3]) | |
| assert vi._dirty is True | |
| def test_dirty_flag_on_remove(self): | |
| from search import VectorIndex | |
| vi = VectorIndex() | |
| vi.add("v1", "sess", [0.1, 0.2, 0.3]) | |
| vi._dirty = False | |
| vi.remove("v1") | |
| assert vi._dirty is True | |
| def test_dirty_flag_reset_after_restore(self): | |
| from search import VectorIndex | |
| vi = VectorIndex() | |
| vi.add("v1", "sess", [0.1, 0.2, 0.3]) | |
| data = vi.serialize_data() | |
| vi2 = VectorIndex() | |
| vi2.restore_from_data(data) | |
| assert vi2._dirty is False | |
| # --------------------------------------------------------------------------- | |
| # HybridSearch in BM25-only mode | |
| # --------------------------------------------------------------------------- | |
| class TestHybridSearchBM25Only: | |
| def _make_obs(self, obs_id, title, narrative=""): | |
| return { | |
| "id": obs_id, | |
| "sessionId": "sess_hybrid", | |
| "title": title, | |
| "narrative": narrative, | |
| "concepts": [], | |
| "files": [], | |
| "type": "other", | |
| } | |
| def test_hybrid_bm25_only_returns_same_results_as_search_index(self): | |
| from search import SearchIndex, VectorIndex, HybridSearch | |
| bm25 = SearchIndex() | |
| vector = VectorIndex() | |
| docs = [ | |
| self._make_obs("h1", "authentication middleware implementation"), | |
| self._make_obs("h2", "database migration scripts"), | |
| self._make_obs("h3", "deployment kubernetes configuration"), | |
| ] | |
| for d in docs: | |
| bm25.add(d) | |
| # HybridSearch with no embedding provider — BM25 only | |
| hybrid = HybridSearch(bm25, vector, None, None) | |
| bm25_direct = bm25.search("authentication", 10) | |
| hybrid_results = hybrid.search("authentication", 10) | |
| bm25_ids = [r["obsId"] for r in bm25_direct] | |
| hybrid_ids = [r["obsId"] for r in hybrid_results] | |
| # The same document should appear at the top in both | |
| assert bm25_ids[0] == hybrid_ids[0] | |
| def test_hybrid_returns_empty_for_no_matches(self): | |
| from search import SearchIndex, VectorIndex, HybridSearch | |
| bm25 = SearchIndex() | |
| hybrid = HybridSearch(bm25, VectorIndex(), None, None) | |
| assert hybrid.search("zzznomatch", 10) == [] | |
| # --------------------------------------------------------------------------- | |
| # Serialization round-trip | |
| # --------------------------------------------------------------------------- | |
| class TestSearchIndexSerialization: | |
| def test_roundtrip_preserves_search_results(self): | |
| from search import SearchIndex | |
| idx = SearchIndex() | |
| idx.add({ | |
| "id": "rt_001", | |
| "sessionId": "sess_rt", | |
| "title": "serialization round trip test", | |
| "narrative": "verify that index survives serialize/restore", | |
| "concepts": ["test"], | |
| "files": [], | |
| "type": "other", | |
| }) | |
| data = idx.serialize_data() | |
| idx2 = SearchIndex() | |
| idx2.restore_from_data(data) | |
| results = idx2.search("serialization round trip") | |
| assert any(r["obsId"] == "rt_001" for r in results) | |