""" Tests for multi-agent query analysis, filter building, metadata loading, and resolution of ambiguous references. Requires: QDRANT_URL, QDRANT_API_KEY, OPENAI_API_KEY in environment / .env Run: python -m pytest tests/test_agent_intelligence.py -v """ import os import sys import json import pytest from pathlib import Path from unittest.mock import MagicMock, patch from dataclasses import asdict sys.path.insert(0, str(Path(__file__).resolve().parent.parent)) from src.agents.base_multi_agent_chatbot import QueryContext, BaseMultiAgentChatbot # --------------------------------------------------------------------------- # Helpers # --------------------------------------------------------------------------- def _load_env(): env_file = Path(__file__).resolve().parent.parent / ".env" if env_file.exists(): for line in env_file.read_text().splitlines(): line = line.strip() if line and not line.startswith("#") and "=" in line: k, v = line.split("=", 1) os.environ.setdefault(k.strip(), v.strip().strip('"').strip("'")) _load_env() # --------------------------------------------------------------------------- # Unit tests – QueryContext (no network, no LLM) # --------------------------------------------------------------------------- class TestQueryContext: def test_all_year_passthrough(self): ctx = QueryContext(has_year=True, extracted_year="ALL") assert ctx.extracted_year == "ALL" def test_all_source_passthrough(self): ctx = QueryContext(has_source=True, extracted_source="ALL") assert ctx.extracted_source == "ALL" def test_all_district_passthrough(self): ctx = QueryContext(has_district=True, extracted_district="ALL") assert ctx.extracted_district == "ALL" def test_single_district_title_case(self): """[unit] A lowercase district name passed to QueryContext is normalised to title case in __post_init__.""" ctx = QueryContext(has_district=True, extracted_district="gulu") assert ctx.extracted_district == "Gulu" def test_multi_district_title_case(self): ctx = QueryContext( has_district=True, extracted_district=["gulu", "pader", "lira"] ) assert ctx.extracted_district == ["Gulu", "Pader", "Lira"] def test_single_source_title_case(self): ctx = QueryContext(has_source=True, extracted_source="hospital") assert ctx.extracted_source == "Hospital" def test_none_stays_none(self): ctx = QueryContext() assert ctx.extracted_district is None assert ctx.extracted_source is None assert ctx.extracted_year is None def test_resolution_notes_default(self): ctx = QueryContext() assert ctx.resolution_notes is None assert ctx.needs_metadata_lookup is False def test_resolution_notes_set(self): ctx = QueryContext( resolution_notes="Resolved 'biggest' to top 5 districts by doc count.", needs_metadata_lookup=False, ) assert "biggest" in ctx.resolution_notes # --------------------------------------------------------------------------- # Unit tests – _build_filters (no network, no LLM) # --------------------------------------------------------------------------- class _StubChatbot(BaseMultiAgentChatbot): """Concrete stub that satisfies abstract methods for unit tests.""" def __init__(self): pass def _perform_retrieval(self, query, filters): return MagicMock(sources=[], answer="") def _generate_conversational_response(self, *a, **kw): return "" def _generate_conversational_response_without_docs(self, *a, **kw): return "" class TestBuildFilters: """Test filter building logic using a stub chatbot.""" @pytest.fixture def mock_bot(self): bot = _StubChatbot() # Whitelist must cover every district referenced by tests in this class, # otherwise _validate_filter_values (correctly) strips unknown values. bot.district_whitelist = [ "Gulu", "Pader", "Kampala", "Bushenyi", "Jinja", "Amuru", "Kalungu", "Buikwe", "Mbale", ] bot.year_whitelist = ["2020", "2021", "2022", "2023", "2024", "2025"] bot.source_whitelist = [ "Consolidated", "Hospital", "Local Government", "Ministry, Department and Agency", "Project", "Value for Money", ] return bot def test_no_filters(self, mock_bot): ctx = QueryContext() filters, anchored = mock_bot._build_filters(ctx) assert filters == {} assert anchored == set() def test_year_filter(self, mock_bot): ctx = QueryContext(has_year=True, extracted_year="2023") filters, anchored = mock_bot._build_filters(ctx) assert filters == {"year": ["2023"]} assert "year" in anchored def test_year_all_skips_filter(self, mock_bot): ctx = QueryContext(has_year=True, extracted_year="ALL") filters, _ = mock_bot._build_filters(ctx) assert "year" not in filters def test_district_all_skips_filter(self, mock_bot): ctx = QueryContext(has_district=True, extracted_district="ALL") filters, _ = mock_bot._build_filters(ctx) assert "district" not in filters def test_source_all_skips_filter(self, mock_bot): ctx = QueryContext(has_source=True, extracted_source="ALL") filters, _ = mock_bot._build_filters(ctx) assert "sources" not in filters def test_multi_year_filter(self, mock_bot): ctx = QueryContext(has_year=True, extracted_year=["2022", "2023"]) filters, anchored = mock_bot._build_filters(ctx) assert filters == {"year": ["2022", "2023"]} assert "year" in anchored def test_single_district_filter(self, mock_bot): ctx = QueryContext(has_district=True, extracted_district="Gulu") filters, anchored = mock_bot._build_filters(ctx) assert "district" in filters assert "Gulu" in filters["district"] assert "district" in anchored def test_multi_district_filter(self, mock_bot): ctx = QueryContext( has_district=True, extracted_district=["Gulu", "Pader"] ) filters, _ = mock_bot._build_filters(ctx) assert set(filters["district"]) == {"Gulu", "Pader"} def test_source_filter(self, mock_bot): ctx = QueryContext(has_source=True, extracted_source="Hospital") filters, anchored = mock_bot._build_filters(ctx) assert filters == {"sources": ["Hospital"]} assert "sources" in anchored def test_llm_extraction_overrides_stale_ui(self, mock_bot): """When LLM extracts a DIFFERENT year than sidebar, LLM wins (user changed context).""" ctx = QueryContext( has_year=True, extracted_year="2020", ui_filters={"years": ["2024"]}, ) filters, anchored = mock_bot._build_filters(ctx) assert filters["year"] == ["2020"] assert "year" in anchored def test_filename_filter_short_circuits(self, mock_bot): ctx = QueryContext( has_year=True, extracted_year="2023", ui_filters={"filenames": ["report.pdf"]}, ) filters, anchored = mock_bot._build_filters(ctx) assert filters == {"filenames": ["report.pdf"]} assert "year" not in filters assert "filenames" in anchored def test_district_drops_auto_inferred_source(self, mock_bot): """[unit, regression] When district is present and source was NOT explicitly mentioned, source should be dropped. Regression: locks in the district-priority rule (without this, adding source=Local Government on top of a district query excludes VFM / Project audits that also cover the district). """ ctx = QueryContext( has_year=True, extracted_year="2024", has_source=False, extracted_source="Local Government", has_district=True, extracted_district="Gulu", ) filters, anchored = mock_bot._build_filters(ctx) assert filters["year"] == ["2024"] assert "sources" not in filters assert "Gulu" in filters["district"] assert "district" in anchored def test_district_keeps_explicit_source(self, mock_bot): """When district is present but source WAS explicitly mentioned, both are kept.""" ctx = QueryContext( has_year=True, extracted_year="2024", has_source=True, extracted_source="Local Government", has_district=True, extracted_district="Gulu", ) filters, anchored = mock_bot._build_filters(ctx) assert filters["year"] == ["2024"] assert filters["sources"] == ["Local Government"] assert "Gulu" in filters["district"] assert "sources" in anchored assert "district" in anchored def test_anchored_keys_from_ui(self, mock_bot): """UI sidebar selections are always anchored.""" ctx = QueryContext( ui_filters={"sources": ["Hospital"], "years": ["2024"]}, ) filters, anchored = mock_bot._build_filters(ctx) assert "sources" in anchored assert "year" in anchored def test_llm_overrides_stale_sidebar_district(self, mock_bot): """When user changes district in conversation, LLM extraction overrides stale sidebar.""" ctx = QueryContext( has_district=True, extracted_district=["Bushenyi", "Amuru", "Kalungu", "Buikwe", "Mbale"], has_year=True, extracted_year="2023", ui_filters={"districts": ["Jinja"], "years": ["2023"]}, ) filters, anchored = mock_bot._build_filters(ctx) assert set(filters["district"]) == {"Bushenyi", "Amuru", "Kalungu", "Buikwe", "Mbale"} assert "Jinja" not in filters["district"] assert "district" in anchored def test_llm_same_as_sidebar_uses_sidebar(self, mock_bot): """When LLM extraction matches sidebar, sidebar wins (no override).""" ctx = QueryContext( has_district=True, extracted_district="Gulu", ui_filters={"districts": ["Gulu"]}, ) filters, anchored = mock_bot._build_filters(ctx) assert filters["district"] == ["Gulu"] assert "district" in anchored def test_llm_overrides_stale_sidebar_year(self, mock_bot): """When user mentions new years, LLM extraction overrides sidebar.""" ctx = QueryContext( has_year=True, extracted_year=["2023", "2025"], ui_filters={"years": ["2023"]}, ) filters, anchored = mock_bot._build_filters(ctx) assert set(filters["year"]) == {"2023", "2025"} assert "year" in anchored def test_no_has_flag_sidebar_wins(self, mock_bot): """When LLM did NOT detect a filter dimension, sidebar stays.""" ctx = QueryContext( has_district=False, extracted_district=None, ui_filters={"districts": ["Jinja"]}, ) filters, anchored = mock_bot._build_filters(ctx) assert filters["district"] == ["Jinja"] assert "district" in anchored # --------------------------------------------------------------------------- # Unit tests – UGANDA_REGIONS # --------------------------------------------------------------------------- class TestUgandaRegions: def test_regions_exist(self): assert hasattr(BaseMultiAgentChatbot, "UGANDA_REGIONS") regions = BaseMultiAgentChatbot.UGANDA_REGIONS assert "Northern" in regions assert "Eastern" in regions assert "Western" in regions assert "Central" in regions assert "Karamoja" in regions def test_gulu_is_northern(self): assert "Gulu" in BaseMultiAgentChatbot.UGANDA_REGIONS["Northern"] def test_kampala_is_central(self): assert "Kampala" in BaseMultiAgentChatbot.UGANDA_REGIONS["Central"] def test_moroto_is_karamoja(self): assert "Moroto" in BaseMultiAgentChatbot.UGANDA_REGIONS["Karamoja"] def test_no_duplicate_across_regions(self): all_dists = [] for dists in BaseMultiAgentChatbot.UGANDA_REGIONS.values(): all_dists.extend(dists) assert len(all_dists) == len(set(all_dists)), "Duplicate district in UGANDA_REGIONS" # --------------------------------------------------------------------------- # Integration tests – require network + Qdrant + OpenAI # --------------------------------------------------------------------------- def _skip_if_no_env(): for var in ("QDRANT_URL", "QDRANT_API_KEY", "OPENAI_API_KEY"): if not os.environ.get(var): pytest.skip(f"{var} not set") @pytest.fixture(scope="module") def chatbot(): _skip_if_no_env() from src.agents.multi_agent_chatbot import MultiAgentRAGChatbot bot = MultiAgentRAGChatbot() return bot @pytest.mark.live_qdrant class TestMetadataLoading: @pytest.mark.smoke def test_db_metadata_context_populated(self, chatbot): """[integration, smoke] Booting the chatbot must produce a non-empty live-metadata context string from Qdrant. Smoke because failure here means the whole Qdrant integration is broken.""" assert chatbot.db_metadata_context is not None assert len(chatbot.db_metadata_context) > 100 def test_year_whitelist_from_qdrant(self, chatbot): assert "2020" in chatbot.year_whitelist assert "2024" in chatbot.year_whitelist assert "2025" in chatbot.year_whitelist def test_district_doc_counts_populated(self, chatbot): assert len(chatbot.district_doc_counts) > 50 def test_latest_data_year(self, chatbot): assert chatbot.latest_data_year == "2025" def test_regions_in_context(self, chatbot): assert "Northern" in chatbot.db_metadata_context assert "Central" in chatbot.db_metadata_context @pytest.mark.live_llm @pytest.mark.live_qdrant class TestQueryAnalysisLLM: """ Integration tests that call the real LLM (gpt-4.1) for query analysis. Each test validates a specific capability of the analysis prompt. Marked ``live_llm`` — automatically skipped when OpenAI quota is unavailable (see ``tests/conftest.py``). """ @pytest.mark.smoke def test_all_years_extraction(self, chatbot): """[integration, smoke, quality] LLM correctly interprets the phrase 'for all years' as the sentinel 'ALL' (not as a year list). Quality: depends on the model understanding our prompt contract.""" ctx = chatbot._analyze_query_context( "What are the main audit findings for all years?", [], {} ) assert ctx.needs_follow_up is False assert ctx.extracted_year == "ALL" def test_greeting_triggers_follow_up(self, chatbot): ctx = chatbot._analyze_query_context("hello", [], {}) assert ctx.needs_follow_up is True assert ctx.follow_up_question is not None def test_last_n_years(self, chatbot): ctx = chatbot._analyze_query_context( "Revenue performance in the last 3 years", [], {} ) assert ctx.needs_follow_up is False if ctx.extracted_year and ctx.extracted_year != "ALL": years = ctx.extracted_year if isinstance(ctx.extracted_year, list) else [ctx.extracted_year] assert len(years) >= 2 def test_explicit_district(self, chatbot): ctx = chatbot._analyze_query_context( "What issues were found in Gulu?", [], {} ) assert ctx.needs_follow_up is False assert ctx.has_district is True district = ctx.extracted_district if isinstance(district, list): assert any("Gulu" in d for d in district) else: assert "Gulu" in str(district) def test_source_alias_ministries(self, chatbot): ctx = chatbot._analyze_query_context( "What are the audit findings for ministries?", [], {} ) assert ctx.needs_follow_up is False assert ctx.has_source is True src = ctx.extracted_source if isinstance(src, list): assert any("Ministry" in s for s in src) else: assert "Ministry" in str(src) def test_biggest_districts_resolution(self, chatbot): ctx = chatbot._analyze_query_context( "Audit findings for the biggest districts", [], {} ) assert ctx.needs_follow_up is False assert ctx.has_district is True if ctx.resolution_notes: assert "biggest" in ctx.resolution_notes.lower() or "top" in ctx.resolution_notes.lower() or "most" in ctx.resolution_notes.lower() or "document" in ctx.resolution_notes.lower() def test_northern_uganda_resolution(self, chatbot): ctx = chatbot._analyze_query_context( "Revenue issues in northern Uganda for all years", [], {} ) assert ctx.needs_follow_up is False districts = ctx.extracted_district if isinstance(districts, list): northern = BaseMultiAgentChatbot.UGANDA_REGIONS["Northern"] northern_lower = {d.lower() for d in northern} found = [d for d in districts if d.lower() in northern_lower] assert len(found) >= 2, f"Expected Northern districts, got {districts}" def test_substantive_question_no_follow_up(self, chatbot): ctx = chatbot._analyze_query_context( "What are the top challenges in budget allocation?", [], {} ) assert ctx.needs_follow_up is False @pytest.mark.live_qdrant class TestFilterQueryExecution: """Integration tests that verify Qdrant filter queries work with real data.""" def test_year_filter_built_correctly(self, chatbot): ctx = QueryContext(has_year=True, extracted_year="2024") filters, anchored = chatbot._build_filters(ctx) assert filters == {"year": ["2024"]} assert "year" in anchored def test_district_filter_built_correctly(self, chatbot): ctx = QueryContext(has_district=True, extracted_district="Gulu") filters, anchored = chatbot._build_filters(ctx) assert "district" in filters assert "Gulu" in filters["district"] assert "district" in anchored def test_all_year_produces_no_filter(self, chatbot): ctx = QueryContext(has_year=True, extracted_year="ALL") filters, _ = chatbot._build_filters(ctx) assert "year" not in filters def test_source_filter_built_correctly(self, chatbot): ctx = QueryContext(has_source=True, extracted_source="Hospital") filters, anchored = chatbot._build_filters(ctx) assert filters == {"sources": ["Hospital"]} assert "sources" in anchored def test_district_drops_auto_source_live(self, chatbot): """Integration: district without explicit source should drop auto-inferred source.""" ctx = QueryContext( has_district=True, extracted_district="Gulu", has_source=False, extracted_source="Local Government", ) filters, anchored = chatbot._build_filters(ctx) assert "district" in filters assert "sources" not in filters assert "district" in anchored @pytest.mark.xfail(reason="Pipeline reranker returns 0 docs in test context — pre-existing issue") def test_unfiltered_retrieval_returns_results(self, chatbot): result = chatbot._perform_retrieval("audit findings and recommendations", {}) assert len(result.sources) > 0 # --------------------------------------------------------------------------- # Integration tests – prevalidation (requires Qdrant) # --------------------------------------------------------------------------- @pytest.mark.live_qdrant class TestPrevalidation: """Test the _prevalidate_filters mechanism against real Qdrant data.""" def test_valid_combo_is_ok(self, chatbot): """A filter combo that exists should return ok=True.""" filters = {"year": ["2024"]} diagnosis = chatbot._prevalidate_filters(filters, set()) assert diagnosis["ok"] is True assert diagnosis["total_count"] > 0 def test_gulu_2023_gap(self, chatbot): """Gulu + 2023 should be detected as a data gap.""" filters = {"district": ["Gulu"], "year": ["2023"]} anchored = {"district", "year"} diagnosis = chatbot._prevalidate_filters(filters, anchored) assert diagnosis["ok"] is False assert diagnosis["suggestion"] is not None assert len(diagnosis["gap_dimensions"]) > 0 def test_jinja_2023_exists(self, chatbot): """Jinja + 2023 should have data.""" filters = {"district": ["Jinja"], "year": ["2023"]} diagnosis = chatbot._prevalidate_filters(filters, set()) assert diagnosis["ok"] is True assert diagnosis["total_count"] > 0 def test_nonexistent_year(self, chatbot): """A year with no data should fail individual dim check.""" filters = {"year": ["1999"]} diagnosis = chatbot._prevalidate_filters(filters, {"year"}) assert diagnosis["ok"] is False assert any("1999" in str(d.get("value", "")) for d in diagnosis["gap_dimensions"]) def test_empty_filters_ok(self, chatbot): """No filters should always be ok.""" diagnosis = chatbot._prevalidate_filters({}, set()) assert diagnosis["ok"] is True # --------------------------------------------------------------------------- # Unit tests – post-relaxation relevance check (no network) # --------------------------------------------------------------------------- class TestPostRelaxationRelevanceCheck: @pytest.fixture def mock_bot(self): bot = _StubChatbot() bot.district_whitelist = ["Gulu", "Jinja"] bot.year_whitelist = ["2020", "2021", "2022", "2023", "2024", "2025"] bot.source_whitelist = ["Local Government", "Hospital"] return bot def test_relevant_docs(self, mock_bot): docs = [MagicMock(metadata={"district": "Gulu", "year": "2023"})] result = mock_bot._post_relaxation_relevance_check( docs, {"district"}, {"district": ["Gulu"]} ) assert result["relevant"] is True def test_irrelevant_docs(self, mock_bot): docs = [ MagicMock(metadata={"district": "Hoima", "year": "2023"}), MagicMock(metadata={"district": "Kumi", "year": "2023"}), ] result = mock_bot._post_relaxation_relevance_check( docs, {"district"}, {"district": ["Gulu"]} ) assert result["relevant"] is False assert "Gulu" in result["details"] def test_no_anchored_keys(self, mock_bot): docs = [MagicMock(metadata={"district": "Hoima"})] result = mock_bot._post_relaxation_relevance_check( docs, set(), {"district": ["Gulu"]} ) assert result["relevant"] is True # --------------------------------------------------------------------------- # Unit tests – district priority over source (no network) # --------------------------------------------------------------------------- class TestDistrictSourcePriority: @pytest.fixture def mock_bot(self): bot = _StubChatbot() bot.district_whitelist = ["Gulu", "Jinja", "Kampala"] bot.source_whitelist = ["Local Government", "Hospital"] bot.year_whitelist = ["2020", "2021", "2022", "2023", "2024", "2025"] return bot def test_district_with_auto_source_drops_source(self, mock_bot): ctx = QueryContext( has_district=True, extracted_district="Gulu", has_source=False, extracted_source="Local Government", ) filters, anchored = mock_bot._build_filters(ctx) assert "district" in filters assert "sources" not in filters assert "district" in anchored def test_district_with_explicit_source_keeps_both(self, mock_bot): ctx = QueryContext( has_district=True, extracted_district="Gulu", has_source=True, extracted_source="Local Government", ) filters, anchored = mock_bot._build_filters(ctx) assert "district" in filters assert "sources" in filters assert "district" in anchored assert "sources" in anchored def test_district_with_ui_source_keeps_both(self, mock_bot): ctx = QueryContext( has_district=True, extracted_district="Gulu", ui_filters={"sources": ["Hospital"]}, ) filters, anchored = mock_bot._build_filters(ctx) assert "district" in filters assert "sources" in filters assert "sources" in anchored # --------------------------------------------------------------------------- # Unit tests – source name normalization (no network) # --------------------------------------------------------------------------- class TestSourceNormalization: @pytest.fixture def mock_bot(self): bot = _StubChatbot() bot.district_whitelist = ["Gulu"] bot.source_whitelist = [ "Ministry, Department and Agency", "Hospital", "Local Government", "Consolidated", ] bot.year_whitelist = ["2020", "2021", "2022", "2023", "2024", "2025"] return bot def test_case_mismatch_normalized(self, mock_bot): """LLM returns 'And' but Qdrant has 'and' — should be corrected.""" ctx = QueryContext( has_source=True, extracted_source="Ministry, Department And Agency", ) filters, _ = mock_bot._build_filters(ctx) assert filters["sources"] == ["Ministry, Department and Agency"] def test_already_correct_stays(self, mock_bot): ctx = QueryContext( has_source=True, extracted_source="Hospital", ) filters, _ = mock_bot._build_filters(ctx) assert filters["sources"] == ["Hospital"] def test_unknown_source_dropped_by_validation(self, mock_bot): """Unknown source values are stripped by _validate_filter_values. Previously this test asserted that an unknown source "passes through" as-is; that behaviour was changed when _validate_filter_values was added to guard against invalid Qdrant filter values. The current (correct) behaviour: unknown values are removed; if all values for a dimension are unknown, the entire filter dimension is dropped. """ ctx = QueryContext( has_source=True, extracted_source="something new", ) filters, _ = mock_bot._build_filters(ctx) assert "sources" not in filters # --------------------------------------------------------------------------- # Unit tests – resolver agent extensions (no network) # --------------------------------------------------------------------------- class TestResolverAgentExtensions: """The resolver agent answers metadata-shaped questions without LLM. These tests use a stub vectorstore so we can verify the resolver dispatches correctly and produces the expected payload shapes. """ @pytest.fixture def mock_bot(self): bot = _StubChatbot() bot.district_whitelist = ["Gulu", "Lira", "Mbale", "Pader", "Jinja"] bot.year_whitelist = ["2020", "2021", "2022", "2023", "2024", "2025"] bot.source_whitelist = [ "Consolidated", "Hospital", "Local Government", "Ministry, Department and Agency", "Project", "Value for Money", ] bot.district_doc_counts = { "Gulu": 50, "Lira": 30, "Mbale": 80, "Pader": 10, "Jinja": 65, } bot.year_doc_counts = { "2020": 100, "2021": 120, "2022": 180, "2023": 200, "2024": 90, } bot.source_doc_counts = { "Local Government": 500, "Ministry, Department and Agency": 300, "Hospital": 80, } bot.source_year_coverage = { "Local Government": {"2020": 100, "2022": 200, "2024": 90}, } bot.district_year_coverage = { "Gulu": {"2020": 10, "2022": 25, "2023": 15}, "Lira": {"2021": 12, "2023": 18}, "Mbale": {"2020": 30, "2024": 50}, } bot.district_source_coverage = { "Gulu": {"Local Government": 40, "Hospital": 10}, } # Stub vectorstore with a count() that returns deterministic values client = MagicMock() client.count = MagicMock(return_value=MagicMock(count=42)) vs = MagicMock(_client=client, collection_name="test-collection") bot._get_vectorstore = lambda: vs return bot def _make_state(self, query: str, ctx_kwargs: dict = None): ctx = QueryContext(**(ctx_kwargs or {})) return { "current_query": query, "query_context": ctx, "agent_logs": [], "resolution_attempted": False, "resolution_result": None, } def test_top_districts(self, mock_bot): """[unit] Resolver dispatches on 'biggest' and returns the pre-cached district_doc_counts sorted descending. Mocked vectorstore — verifies our logic, not Qdrant.""" state = self._make_state("biggest districts overall") out = mock_bot._resolver_agent(state) assert "top_districts" in out["resolution_result"] top = out["resolution_result"]["top_districts"] assert top[0]["district"] == "Mbale" assert top[0]["doc_count"] == 80 def test_bottom_districts(self, mock_bot): state = self._make_state("smallest districts in the corpus") out = mock_bot._resolver_agent(state) bottom = out["resolution_result"]["bottom_districts"] assert bottom[0]["district"] == "Pader" def test_top_sources(self, mock_bot): state = self._make_state("largest source category") out = mock_bot._resolver_agent(state) assert "top_sources" in out["resolution_result"] assert out["resolution_result"]["top_sources"][0]["source"] == "Local Government" def test_top_years(self, mock_bot): state = self._make_state("most documented year") out = mock_bot._resolver_agent(state) assert "top_years" in out["resolution_result"] assert out["resolution_result"]["top_years"][0]["year"] == "2023" def test_per_district_live_count(self, mock_bot): state = self._make_state( "audit findings in Gulu", ctx_kwargs={"has_district": True, "extracted_district": "Gulu"}, ) out = mock_bot._resolver_agent(state) assert out["resolution_result"]["district_counts"] == {"Gulu": 42} def test_combination_district_year(self, mock_bot): state = self._make_state( "Gulu 2022", ctx_kwargs={ "has_district": True, "extracted_district": "Gulu", "has_year": True, "extracted_year": "2022", }, ) out = mock_bot._resolver_agent(state) combo = out["resolution_result"]["combination_counts"] assert "district+year" in combo assert combo["district+year"][0] == {"a": "Gulu", "b": "2022", "doc_count": 42} def test_date_range_overall(self, mock_bot): state = self._make_state("latest reports across the corpus") out = mock_bot._resolver_agent(state) dr = out["resolution_result"]["date_range"] assert dr["overall"] == {"min_year": "2020", "max_year": "2024"} def test_latest_year_for_district(self, mock_bot): state = self._make_state( "give me whatever the latest you have on Gulu", ctx_kwargs={"has_district": True, "extracted_district": "Gulu"}, ) out = mock_bot._resolver_agent(state) assert out["resolution_result"]["latest_year_for_district"] == {"Gulu": "2023"} def test_earliest_year_for_district(self, mock_bot): state = self._make_state( "oldest record for Mbale", ctx_kwargs={"has_district": True, "extracted_district": "Mbale"}, ) out = mock_bot._resolver_agent(state) assert out["resolution_result"]["earliest_year_for_district"] == {"Mbale": "2020"} def test_coverage_report(self, mock_bot): state = self._make_state( "what do you have on Gulu?", ctx_kwargs={"has_district": True, "extracted_district": "Gulu"}, ) out = mock_bot._resolver_agent(state) cov = out["resolution_result"]["coverage"]["per_district"]["Gulu"] assert cov["total_docs"] == 50 assert set(cov["years"]) == {"2020", "2022", "2023"} assert set(cov["sources"]) == {"Local Government", "Hospital"} def test_no_vectorstore_safe(self, mock_bot): mock_bot._get_vectorstore = lambda: None state = self._make_state("biggest districts") out = mock_bot._resolver_agent(state) # Should not crash; resolution_result is an empty dict assert out["resolution_result"] == {} class TestResolverPostResolutionIntegration: """Verify the main_agent post-resolution pass injects resolver outputs back into context for the downstream RAG path. """ @pytest.fixture def mock_bot(self): bot = _StubChatbot() bot.district_whitelist = ["Gulu", "Lira", "Nwoya"] bot.year_whitelist = ["2020", "2021", "2022", "2023", "2024"] bot.source_whitelist = ["Local Government", "Hospital"] bot.district_doc_counts = {"Gulu": 10, "Lira": 20} bot.year_doc_counts = {} bot.source_doc_counts = {} bot.source_year_coverage = {} bot.district_year_coverage = {"Nwoya": {"2020": 5, "2022": 8}} bot.district_source_coverage = {} return bot def test_latest_year_for_district_injects_year(self, mock_bot): ctx = QueryContext( has_district=True, extracted_district="Nwoya", needs_metadata_lookup=True, ) state = { "current_query": "give me whatever the latest you have on Nwoya", "query_context": ctx, "agent_logs": [], "resolution_attempted": True, "resolution_result": { "latest_year_for_district": {"Nwoya": "2022"}, }, "final_response": None, } out = mock_bot._main_agent(state) # Year should have been injected from the resolver lookup assert out["query_context"].extracted_year == "2022" assert out["query_context"].has_year is True assert out["query_context"].needs_metadata_lookup is False assert "latest available year" in (out["query_context"].resolution_notes or "") # --------------------------------------------------------------------------- # Multi-turn conversation simulations for the resolver agent # --------------------------------------------------------------------------- class TestResolverMultiTurnFlow: """End-to-end simulations of multi-turn conversations that exercise the resolver agent + main-agent post-resolution handoff. These tests bypass the LLM (``_analyze_query_context``) so they run without any API quota. Each turn's ``query_context`` is supplied directly, mimicking what the LLM would have produced. The test then drives ``_resolver_agent`` + ``_main_agent`` and asserts the state after each step. """ @pytest.fixture def mock_bot(self): bot = _StubChatbot() bot.district_whitelist = ["Nwoya", "Gulu", "Lira", "Mbale"] bot.year_whitelist = ["2020", "2021", "2022", "2023", "2024"] bot.source_whitelist = ["Local Government", "Hospital", "Ministry, Department and Agency"] bot.district_doc_counts = {"Nwoya": 8, "Gulu": 50, "Lira": 30, "Mbale": 80} bot.year_doc_counts = {"2020": 100, "2021": 80, "2022": 60, "2023": 40} bot.source_doc_counts = {"Local Government": 200, "Ministry, Department and Agency": 150, "Hospital": 30} bot.source_year_coverage = { "Local Government": {"2020": 50, "2021": 40, "2022": 30, "2023": 20}, } bot.district_year_coverage = { "Nwoya": {"2020": 3, "2022": 5}, "Gulu": {"2020": 10, "2022": 25, "2023": 15}, "Mbale": {"2020": 30, "2024": 50}, } bot.district_source_coverage = { "Nwoya": {"Local Government": 8}, "Gulu": {"Local Government": 40, "Hospital": 10}, } # Deterministic count() stub for live combination queries client = MagicMock() client.count = MagicMock(return_value=MagicMock(count=7)) vs = MagicMock(_client=client, collection_name="test-collection") bot._get_vectorstore = lambda: vs return bot def _state(self, query: str, **ctx_kwargs): """Build a minimal state dict for a single turn.""" return { "current_query": query, "query_context": QueryContext(**ctx_kwargs), "agent_logs": [], "resolution_attempted": False, "resolution_result": None, "final_response": None, } # ----- Scenario 1: empty result for X 2024 → user asks "latest for X" --- @pytest.mark.smoke def test_empty_combo_then_latest_for_district(self, mock_bot): """[unit, smoke] Multi-turn flow simulation: empty result for Nwoya 2024 → user asks 'latest for Nwoya' → resolver computes max(year) = 2022 → main_agent injects year=2022 → ready for RAG. Mocked vectorstore + supplied QueryContext; verifies our new resolver+main_agent handoff without an LLM call. T1: "audit findings for Nwoya in 2024" → 0 docs (Nwoya has no data for 2024; pre-validation would catch it upstream). T2: "okay, give me whatever the latest you have on Nwoya" → resolver computes max(year for Nwoya) = 2022 → injects year=2022 → RAG would now retrieve for Nwoya 2022. """ # --- Turn 2: user asks for "latest" with district=Nwoya, year unset --- state = self._state( "give me whatever is the latest you have on Nwoya", has_district=True, extracted_district="Nwoya", needs_metadata_lookup=True, ) # Step A — resolver runs first state = mock_bot._resolver_agent(state) assert state["resolution_attempted"] is True assert "latest_year_for_district" in state["resolution_result"] assert state["resolution_result"]["latest_year_for_district"] == {"Nwoya": "2022"} # Step B — main_agent post-resolution pass injects the year state = mock_bot._main_agent(state) ctx = state["query_context"] assert ctx.extracted_year == "2022" assert ctx.has_year is True assert ctx.needs_metadata_lookup is False # The resolution note explains what happened so the LLM can cite it assert "Nwoya" in (ctx.resolution_notes or "") # ----- Scenario 2: "biggest districts" → carries forward to follow-up --- def test_top_districts_then_followup_keeps_them(self, mock_bot): """Simulates: T1: "what are the audit issues for the biggest districts?" → resolver returns top 5 by doc count → main_agent injects them as extracted_district → RAG retrieves accordingly. T2: "now focus only on 2023" → LLM carries forward the districts from T1 (the LLM rule that EXPANDS or PRESERVES past filters; here we simulate the carry-forward by re-using the same district list with year added). """ # --- T1 --- t1 = self._state( "what are the audit issues for the biggest districts?", needs_metadata_lookup=True, ) t1 = mock_bot._resolver_agent(t1) assert "top_districts" in t1["resolution_result"] t1 = mock_bot._main_agent(t1) ctx1 = t1["query_context"] assert ctx1.has_district is True assert ctx1.extracted_district == ["Mbale", "Gulu", "Lira", "Nwoya"] assert ctx1.needs_metadata_lookup is False # --- T2: carry-forward simulated (this is what _analyze_query_context # would do based on previous_filters). Verify the resolver isn't # needed for this turn and the filter is preserved. t2 = self._state( "now focus only on 2023", has_district=True, extracted_district=ctx1.extracted_district, has_year=True, extracted_year="2023", needs_metadata_lookup=False, ) # No resolver call this turn; just build filters directly filters, anchored = mock_bot._build_filters(t2["query_context"]) assert set(filters["district"]) == {"Mbale", "Gulu", "Lira", "Nwoya"} assert filters["year"] == ["2023"] assert "year" in anchored # District should also be anchored since it was carried forward from # an LLM extraction in T1 (has_district=True) assert "district" in anchored # ----- Scenario 3: date-range question → resolver answers without LLM --- def test_date_range_for_source(self, mock_bot): """Simulates a single-turn metadata question: 'What years do you have for Local Government?' → resolver populates date_range.per_source with min/max years. This is the kind of question we want to answer purely from cached aggregates — no LLM, no RAG retrieval. """ state = self._state( "what is the earliest year you have for Local Government?", has_source=True, extracted_source="Local Government", ) state = mock_bot._resolver_agent(state) dr = state["resolution_result"]["date_range"] assert "per_source" in dr assert dr["per_source"]["Local Government"] == { "min_year": "2020", "max_year": "2023" } # The overall range should also be present assert dr["overall"] == {"min_year": "2020", "max_year": "2023"} # ----- Scenario 4: coverage question multi-step -------------------------- def test_coverage_then_year_specific(self, mock_bot): """T1: "what do you have on Gulu?" → coverage report. T2: simulated follow-up uses one of the years from the coverage report; verifies the system can chain. """ t1 = self._state( "what do you have on Gulu?", has_district=True, extracted_district="Gulu", ) t1 = mock_bot._resolver_agent(t1) cov = t1["resolution_result"]["coverage"]["per_district"]["Gulu"] assert cov["total_docs"] == 50 assert "2023" in cov["years"] # T2 — the user picks one of the surfaced years and asks a # substantive question. Filter building should succeed cleanly. t2_ctx = QueryContext( has_district=True, extracted_district="Gulu", has_year=True, extracted_year="2023", ) filters, anchored = mock_bot._build_filters(t2_ctx) assert filters["year"] == ["2023"] assert filters["district"] == ["Gulu"] assert {"year", "district"}.issubset(anchored)