Spaces:
Running
Running
| """ | |
| Tests for multi-agent query analysis, filter building, metadata loading, | |
| and resolution of ambiguous references. | |
| Requires: QDRANT_URL, QDRANT_API_KEY, OPENAI_API_KEY in environment / .env | |
| Run: python -m pytest tests/test_agent_intelligence.py -v | |
| """ | |
| import os | |
| import sys | |
| import json | |
| import pytest | |
| from pathlib import Path | |
| from unittest.mock import MagicMock, patch | |
| from dataclasses import asdict | |
| sys.path.insert(0, str(Path(__file__).resolve().parent.parent)) | |
| from src.agents.base_multi_agent_chatbot import QueryContext, BaseMultiAgentChatbot | |
| # --------------------------------------------------------------------------- | |
| # Helpers | |
| # --------------------------------------------------------------------------- | |
| def _load_env(): | |
| env_file = Path(__file__).resolve().parent.parent / ".env" | |
| if env_file.exists(): | |
| for line in env_file.read_text().splitlines(): | |
| line = line.strip() | |
| if line and not line.startswith("#") and "=" in line: | |
| k, v = line.split("=", 1) | |
| os.environ.setdefault(k.strip(), v.strip().strip('"').strip("'")) | |
| _load_env() | |
| # --------------------------------------------------------------------------- | |
| # Unit tests β QueryContext (no network, no LLM) | |
| # --------------------------------------------------------------------------- | |
| class TestQueryContext: | |
| def test_all_year_passthrough(self): | |
| ctx = QueryContext(has_year=True, extracted_year="ALL") | |
| assert ctx.extracted_year == "ALL" | |
| def test_all_source_passthrough(self): | |
| ctx = QueryContext(has_source=True, extracted_source="ALL") | |
| assert ctx.extracted_source == "ALL" | |
| def test_all_district_passthrough(self): | |
| ctx = QueryContext(has_district=True, extracted_district="ALL") | |
| assert ctx.extracted_district == "ALL" | |
| def test_single_district_title_case(self): | |
| """[unit] A lowercase district name passed to QueryContext is | |
| normalised to title case in __post_init__.""" | |
| ctx = QueryContext(has_district=True, extracted_district="gulu") | |
| assert ctx.extracted_district == "Gulu" | |
| def test_multi_district_title_case(self): | |
| ctx = QueryContext( | |
| has_district=True, extracted_district=["gulu", "pader", "lira"] | |
| ) | |
| assert ctx.extracted_district == ["Gulu", "Pader", "Lira"] | |
| def test_single_source_title_case(self): | |
| ctx = QueryContext(has_source=True, extracted_source="hospital") | |
| assert ctx.extracted_source == "Hospital" | |
| def test_none_stays_none(self): | |
| ctx = QueryContext() | |
| assert ctx.extracted_district is None | |
| assert ctx.extracted_source is None | |
| assert ctx.extracted_year is None | |
| def test_resolution_notes_default(self): | |
| ctx = QueryContext() | |
| assert ctx.resolution_notes is None | |
| assert ctx.needs_metadata_lookup is False | |
| def test_resolution_notes_set(self): | |
| ctx = QueryContext( | |
| resolution_notes="Resolved 'biggest' to top 5 districts by doc count.", | |
| needs_metadata_lookup=False, | |
| ) | |
| assert "biggest" in ctx.resolution_notes | |
| # --------------------------------------------------------------------------- | |
| # Unit tests β _build_filters (no network, no LLM) | |
| # --------------------------------------------------------------------------- | |
| class _StubChatbot(BaseMultiAgentChatbot): | |
| """Concrete stub that satisfies abstract methods for unit tests.""" | |
| def __init__(self): | |
| pass | |
| def _perform_retrieval(self, query, filters): | |
| return MagicMock(sources=[], answer="") | |
| def _generate_conversational_response(self, *a, **kw): | |
| return "" | |
| def _generate_conversational_response_without_docs(self, *a, **kw): | |
| return "" | |
| class TestBuildFilters: | |
| """Test filter building logic using a stub chatbot.""" | |
| def mock_bot(self): | |
| bot = _StubChatbot() | |
| # Whitelist must cover every district referenced by tests in this class, | |
| # otherwise _validate_filter_values (correctly) strips unknown values. | |
| bot.district_whitelist = [ | |
| "Gulu", "Pader", "Kampala", "Bushenyi", "Jinja", | |
| "Amuru", "Kalungu", "Buikwe", "Mbale", | |
| ] | |
| bot.year_whitelist = ["2020", "2021", "2022", "2023", "2024", "2025"] | |
| bot.source_whitelist = [ | |
| "Consolidated", "Hospital", "Local Government", | |
| "Ministry, Department and Agency", "Project", "Value for Money", | |
| ] | |
| return bot | |
| def test_no_filters(self, mock_bot): | |
| ctx = QueryContext() | |
| filters, anchored = mock_bot._build_filters(ctx) | |
| assert filters == {} | |
| assert anchored == set() | |
| def test_year_filter(self, mock_bot): | |
| ctx = QueryContext(has_year=True, extracted_year="2023") | |
| filters, anchored = mock_bot._build_filters(ctx) | |
| assert filters == {"year": ["2023"]} | |
| assert "year" in anchored | |
| def test_year_all_skips_filter(self, mock_bot): | |
| ctx = QueryContext(has_year=True, extracted_year="ALL") | |
| filters, _ = mock_bot._build_filters(ctx) | |
| assert "year" not in filters | |
| def test_district_all_skips_filter(self, mock_bot): | |
| ctx = QueryContext(has_district=True, extracted_district="ALL") | |
| filters, _ = mock_bot._build_filters(ctx) | |
| assert "district" not in filters | |
| def test_source_all_skips_filter(self, mock_bot): | |
| ctx = QueryContext(has_source=True, extracted_source="ALL") | |
| filters, _ = mock_bot._build_filters(ctx) | |
| assert "sources" not in filters | |
| def test_multi_year_filter(self, mock_bot): | |
| ctx = QueryContext(has_year=True, extracted_year=["2022", "2023"]) | |
| filters, anchored = mock_bot._build_filters(ctx) | |
| assert filters == {"year": ["2022", "2023"]} | |
| assert "year" in anchored | |
| def test_single_district_filter(self, mock_bot): | |
| ctx = QueryContext(has_district=True, extracted_district="Gulu") | |
| filters, anchored = mock_bot._build_filters(ctx) | |
| assert "district" in filters | |
| assert "Gulu" in filters["district"] | |
| assert "district" in anchored | |
| def test_multi_district_filter(self, mock_bot): | |
| ctx = QueryContext( | |
| has_district=True, extracted_district=["Gulu", "Pader"] | |
| ) | |
| filters, _ = mock_bot._build_filters(ctx) | |
| assert set(filters["district"]) == {"Gulu", "Pader"} | |
| def test_source_filter(self, mock_bot): | |
| ctx = QueryContext(has_source=True, extracted_source="Hospital") | |
| filters, anchored = mock_bot._build_filters(ctx) | |
| assert filters == {"sources": ["Hospital"]} | |
| assert "sources" in anchored | |
| def test_llm_extraction_overrides_stale_ui(self, mock_bot): | |
| """When LLM extracts a DIFFERENT year than sidebar, LLM wins (user changed context).""" | |
| ctx = QueryContext( | |
| has_year=True, | |
| extracted_year="2020", | |
| ui_filters={"years": ["2024"]}, | |
| ) | |
| filters, anchored = mock_bot._build_filters(ctx) | |
| assert filters["year"] == ["2020"] | |
| assert "year" in anchored | |
| def test_filename_filter_short_circuits(self, mock_bot): | |
| ctx = QueryContext( | |
| has_year=True, | |
| extracted_year="2023", | |
| ui_filters={"filenames": ["report.pdf"]}, | |
| ) | |
| filters, anchored = mock_bot._build_filters(ctx) | |
| assert filters == {"filenames": ["report.pdf"]} | |
| assert "year" not in filters | |
| assert "filenames" in anchored | |
| def test_district_drops_auto_inferred_source(self, mock_bot): | |
| """[unit, regression] When district is present and source was | |
| NOT explicitly mentioned, source should be dropped. | |
| Regression: locks in the district-priority rule (without this, | |
| adding source=Local Government on top of a district query | |
| excludes VFM / Project audits that also cover the district). | |
| """ | |
| ctx = QueryContext( | |
| has_year=True, | |
| extracted_year="2024", | |
| has_source=False, | |
| extracted_source="Local Government", | |
| has_district=True, | |
| extracted_district="Gulu", | |
| ) | |
| filters, anchored = mock_bot._build_filters(ctx) | |
| assert filters["year"] == ["2024"] | |
| assert "sources" not in filters | |
| assert "Gulu" in filters["district"] | |
| assert "district" in anchored | |
| def test_district_keeps_explicit_source(self, mock_bot): | |
| """When district is present but source WAS explicitly mentioned, both are kept.""" | |
| ctx = QueryContext( | |
| has_year=True, | |
| extracted_year="2024", | |
| has_source=True, | |
| extracted_source="Local Government", | |
| has_district=True, | |
| extracted_district="Gulu", | |
| ) | |
| filters, anchored = mock_bot._build_filters(ctx) | |
| assert filters["year"] == ["2024"] | |
| assert filters["sources"] == ["Local Government"] | |
| assert "Gulu" in filters["district"] | |
| assert "sources" in anchored | |
| assert "district" in anchored | |
| def test_anchored_keys_from_ui(self, mock_bot): | |
| """UI sidebar selections are always anchored.""" | |
| ctx = QueryContext( | |
| ui_filters={"sources": ["Hospital"], "years": ["2024"]}, | |
| ) | |
| filters, anchored = mock_bot._build_filters(ctx) | |
| assert "sources" in anchored | |
| assert "year" in anchored | |
| def test_llm_overrides_stale_sidebar_district(self, mock_bot): | |
| """When user changes district in conversation, LLM extraction overrides stale sidebar.""" | |
| ctx = QueryContext( | |
| has_district=True, | |
| extracted_district=["Bushenyi", "Amuru", "Kalungu", "Buikwe", "Mbale"], | |
| has_year=True, | |
| extracted_year="2023", | |
| ui_filters={"districts": ["Jinja"], "years": ["2023"]}, | |
| ) | |
| filters, anchored = mock_bot._build_filters(ctx) | |
| assert set(filters["district"]) == {"Bushenyi", "Amuru", "Kalungu", "Buikwe", "Mbale"} | |
| assert "Jinja" not in filters["district"] | |
| assert "district" in anchored | |
| def test_llm_same_as_sidebar_uses_sidebar(self, mock_bot): | |
| """When LLM extraction matches sidebar, sidebar wins (no override).""" | |
| ctx = QueryContext( | |
| has_district=True, | |
| extracted_district="Gulu", | |
| ui_filters={"districts": ["Gulu"]}, | |
| ) | |
| filters, anchored = mock_bot._build_filters(ctx) | |
| assert filters["district"] == ["Gulu"] | |
| assert "district" in anchored | |
| def test_llm_overrides_stale_sidebar_year(self, mock_bot): | |
| """When user mentions new years, LLM extraction overrides sidebar.""" | |
| ctx = QueryContext( | |
| has_year=True, | |
| extracted_year=["2023", "2025"], | |
| ui_filters={"years": ["2023"]}, | |
| ) | |
| filters, anchored = mock_bot._build_filters(ctx) | |
| assert set(filters["year"]) == {"2023", "2025"} | |
| assert "year" in anchored | |
| def test_no_has_flag_sidebar_wins(self, mock_bot): | |
| """When LLM did NOT detect a filter dimension, sidebar stays.""" | |
| ctx = QueryContext( | |
| has_district=False, | |
| extracted_district=None, | |
| ui_filters={"districts": ["Jinja"]}, | |
| ) | |
| filters, anchored = mock_bot._build_filters(ctx) | |
| assert filters["district"] == ["Jinja"] | |
| assert "district" in anchored | |
| # --------------------------------------------------------------------------- | |
| # Unit tests β UGANDA_REGIONS | |
| # --------------------------------------------------------------------------- | |
| class TestUgandaRegions: | |
| def test_regions_exist(self): | |
| assert hasattr(BaseMultiAgentChatbot, "UGANDA_REGIONS") | |
| regions = BaseMultiAgentChatbot.UGANDA_REGIONS | |
| assert "Northern" in regions | |
| assert "Eastern" in regions | |
| assert "Western" in regions | |
| assert "Central" in regions | |
| assert "Karamoja" in regions | |
| def test_gulu_is_northern(self): | |
| assert "Gulu" in BaseMultiAgentChatbot.UGANDA_REGIONS["Northern"] | |
| def test_kampala_is_central(self): | |
| assert "Kampala" in BaseMultiAgentChatbot.UGANDA_REGIONS["Central"] | |
| def test_moroto_is_karamoja(self): | |
| assert "Moroto" in BaseMultiAgentChatbot.UGANDA_REGIONS["Karamoja"] | |
| def test_no_duplicate_across_regions(self): | |
| all_dists = [] | |
| for dists in BaseMultiAgentChatbot.UGANDA_REGIONS.values(): | |
| all_dists.extend(dists) | |
| assert len(all_dists) == len(set(all_dists)), "Duplicate district in UGANDA_REGIONS" | |
| # --------------------------------------------------------------------------- | |
| # Integration tests β require network + Qdrant + OpenAI | |
| # --------------------------------------------------------------------------- | |
| def _skip_if_no_env(): | |
| for var in ("QDRANT_URL", "QDRANT_API_KEY", "OPENAI_API_KEY"): | |
| if not os.environ.get(var): | |
| pytest.skip(f"{var} not set") | |
| def chatbot(): | |
| _skip_if_no_env() | |
| from src.agents.multi_agent_chatbot import MultiAgentRAGChatbot | |
| bot = MultiAgentRAGChatbot() | |
| return bot | |
| class TestMetadataLoading: | |
| def test_db_metadata_context_populated(self, chatbot): | |
| """[integration, smoke] Booting the chatbot must produce a | |
| non-empty live-metadata context string from Qdrant. Smoke | |
| because failure here means the whole Qdrant integration is | |
| broken.""" | |
| assert chatbot.db_metadata_context is not None | |
| assert len(chatbot.db_metadata_context) > 100 | |
| def test_year_whitelist_from_qdrant(self, chatbot): | |
| assert "2020" in chatbot.year_whitelist | |
| assert "2024" in chatbot.year_whitelist | |
| assert "2025" in chatbot.year_whitelist | |
| def test_district_doc_counts_populated(self, chatbot): | |
| assert len(chatbot.district_doc_counts) > 50 | |
| def test_latest_data_year(self, chatbot): | |
| assert chatbot.latest_data_year == "2025" | |
| def test_regions_in_context(self, chatbot): | |
| assert "Northern" in chatbot.db_metadata_context | |
| assert "Central" in chatbot.db_metadata_context | |
| class TestQueryAnalysisLLM: | |
| """ | |
| Integration tests that call the real LLM (gpt-4.1) for query analysis. | |
| Each test validates a specific capability of the analysis prompt. | |
| Marked ``live_llm`` β automatically skipped when OpenAI quota is | |
| unavailable (see ``tests/conftest.py``). | |
| """ | |
| def test_all_years_extraction(self, chatbot): | |
| """[integration, smoke, quality] LLM correctly interprets the | |
| phrase 'for all years' as the sentinel 'ALL' (not as a year | |
| list). Quality: depends on the model understanding our prompt | |
| contract.""" | |
| ctx = chatbot._analyze_query_context( | |
| "What are the main audit findings for all years?", [], {} | |
| ) | |
| assert ctx.needs_follow_up is False | |
| assert ctx.extracted_year == "ALL" | |
| def test_greeting_triggers_follow_up(self, chatbot): | |
| ctx = chatbot._analyze_query_context("hello", [], {}) | |
| assert ctx.needs_follow_up is True | |
| assert ctx.follow_up_question is not None | |
| def test_last_n_years(self, chatbot): | |
| ctx = chatbot._analyze_query_context( | |
| "Revenue performance in the last 3 years", [], {} | |
| ) | |
| assert ctx.needs_follow_up is False | |
| if ctx.extracted_year and ctx.extracted_year != "ALL": | |
| years = ctx.extracted_year if isinstance(ctx.extracted_year, list) else [ctx.extracted_year] | |
| assert len(years) >= 2 | |
| def test_explicit_district(self, chatbot): | |
| ctx = chatbot._analyze_query_context( | |
| "What issues were found in Gulu?", [], {} | |
| ) | |
| assert ctx.needs_follow_up is False | |
| assert ctx.has_district is True | |
| district = ctx.extracted_district | |
| if isinstance(district, list): | |
| assert any("Gulu" in d for d in district) | |
| else: | |
| assert "Gulu" in str(district) | |
| def test_source_alias_ministries(self, chatbot): | |
| ctx = chatbot._analyze_query_context( | |
| "What are the audit findings for ministries?", [], {} | |
| ) | |
| assert ctx.needs_follow_up is False | |
| assert ctx.has_source is True | |
| src = ctx.extracted_source | |
| if isinstance(src, list): | |
| assert any("Ministry" in s for s in src) | |
| else: | |
| assert "Ministry" in str(src) | |
| def test_biggest_districts_resolution(self, chatbot): | |
| ctx = chatbot._analyze_query_context( | |
| "Audit findings for the biggest districts", [], {} | |
| ) | |
| assert ctx.needs_follow_up is False | |
| assert ctx.has_district is True | |
| if ctx.resolution_notes: | |
| assert "biggest" in ctx.resolution_notes.lower() or "top" in ctx.resolution_notes.lower() or "most" in ctx.resolution_notes.lower() or "document" in ctx.resolution_notes.lower() | |
| def test_northern_uganda_resolution(self, chatbot): | |
| ctx = chatbot._analyze_query_context( | |
| "Revenue issues in northern Uganda for all years", [], {} | |
| ) | |
| assert ctx.needs_follow_up is False | |
| districts = ctx.extracted_district | |
| if isinstance(districts, list): | |
| northern = BaseMultiAgentChatbot.UGANDA_REGIONS["Northern"] | |
| northern_lower = {d.lower() for d in northern} | |
| found = [d for d in districts if d.lower() in northern_lower] | |
| assert len(found) >= 2, f"Expected Northern districts, got {districts}" | |
| def test_substantive_question_no_follow_up(self, chatbot): | |
| ctx = chatbot._analyze_query_context( | |
| "What are the top challenges in budget allocation?", [], {} | |
| ) | |
| assert ctx.needs_follow_up is False | |
| class TestFilterQueryExecution: | |
| """Integration tests that verify Qdrant filter queries work with real data.""" | |
| def test_year_filter_built_correctly(self, chatbot): | |
| ctx = QueryContext(has_year=True, extracted_year="2024") | |
| filters, anchored = chatbot._build_filters(ctx) | |
| assert filters == {"year": ["2024"]} | |
| assert "year" in anchored | |
| def test_district_filter_built_correctly(self, chatbot): | |
| ctx = QueryContext(has_district=True, extracted_district="Gulu") | |
| filters, anchored = chatbot._build_filters(ctx) | |
| assert "district" in filters | |
| assert "Gulu" in filters["district"] | |
| assert "district" in anchored | |
| def test_all_year_produces_no_filter(self, chatbot): | |
| ctx = QueryContext(has_year=True, extracted_year="ALL") | |
| filters, _ = chatbot._build_filters(ctx) | |
| assert "year" not in filters | |
| def test_source_filter_built_correctly(self, chatbot): | |
| ctx = QueryContext(has_source=True, extracted_source="Hospital") | |
| filters, anchored = chatbot._build_filters(ctx) | |
| assert filters == {"sources": ["Hospital"]} | |
| assert "sources" in anchored | |
| def test_district_drops_auto_source_live(self, chatbot): | |
| """Integration: district without explicit source should drop auto-inferred source.""" | |
| ctx = QueryContext( | |
| has_district=True, extracted_district="Gulu", | |
| has_source=False, extracted_source="Local Government", | |
| ) | |
| filters, anchored = chatbot._build_filters(ctx) | |
| assert "district" in filters | |
| assert "sources" not in filters | |
| assert "district" in anchored | |
| def test_unfiltered_retrieval_returns_results(self, chatbot): | |
| result = chatbot._perform_retrieval("audit findings and recommendations", {}) | |
| assert len(result.sources) > 0 | |
| # --------------------------------------------------------------------------- | |
| # Integration tests β prevalidation (requires Qdrant) | |
| # --------------------------------------------------------------------------- | |
| class TestPrevalidation: | |
| """Test the _prevalidate_filters mechanism against real Qdrant data.""" | |
| def test_valid_combo_is_ok(self, chatbot): | |
| """A filter combo that exists should return ok=True.""" | |
| filters = {"year": ["2024"]} | |
| diagnosis = chatbot._prevalidate_filters(filters, set()) | |
| assert diagnosis["ok"] is True | |
| assert diagnosis["total_count"] > 0 | |
| def test_gulu_2023_gap(self, chatbot): | |
| """Gulu + 2023 should be detected as a data gap.""" | |
| filters = {"district": ["Gulu"], "year": ["2023"]} | |
| anchored = {"district", "year"} | |
| diagnosis = chatbot._prevalidate_filters(filters, anchored) | |
| assert diagnosis["ok"] is False | |
| assert diagnosis["suggestion"] is not None | |
| assert len(diagnosis["gap_dimensions"]) > 0 | |
| def test_jinja_2023_exists(self, chatbot): | |
| """Jinja + 2023 should have data.""" | |
| filters = {"district": ["Jinja"], "year": ["2023"]} | |
| diagnosis = chatbot._prevalidate_filters(filters, set()) | |
| assert diagnosis["ok"] is True | |
| assert diagnosis["total_count"] > 0 | |
| def test_nonexistent_year(self, chatbot): | |
| """A year with no data should fail individual dim check.""" | |
| filters = {"year": ["1999"]} | |
| diagnosis = chatbot._prevalidate_filters(filters, {"year"}) | |
| assert diagnosis["ok"] is False | |
| assert any("1999" in str(d.get("value", "")) for d in diagnosis["gap_dimensions"]) | |
| def test_empty_filters_ok(self, chatbot): | |
| """No filters should always be ok.""" | |
| diagnosis = chatbot._prevalidate_filters({}, set()) | |
| assert diagnosis["ok"] is True | |
| # --------------------------------------------------------------------------- | |
| # Unit tests β post-relaxation relevance check (no network) | |
| # --------------------------------------------------------------------------- | |
| class TestPostRelaxationRelevanceCheck: | |
| def mock_bot(self): | |
| bot = _StubChatbot() | |
| bot.district_whitelist = ["Gulu", "Jinja"] | |
| bot.year_whitelist = ["2020", "2021", "2022", "2023", "2024", "2025"] | |
| bot.source_whitelist = ["Local Government", "Hospital"] | |
| return bot | |
| def test_relevant_docs(self, mock_bot): | |
| docs = [MagicMock(metadata={"district": "Gulu", "year": "2023"})] | |
| result = mock_bot._post_relaxation_relevance_check( | |
| docs, {"district"}, {"district": ["Gulu"]} | |
| ) | |
| assert result["relevant"] is True | |
| def test_irrelevant_docs(self, mock_bot): | |
| docs = [ | |
| MagicMock(metadata={"district": "Hoima", "year": "2023"}), | |
| MagicMock(metadata={"district": "Kumi", "year": "2023"}), | |
| ] | |
| result = mock_bot._post_relaxation_relevance_check( | |
| docs, {"district"}, {"district": ["Gulu"]} | |
| ) | |
| assert result["relevant"] is False | |
| assert "Gulu" in result["details"] | |
| def test_no_anchored_keys(self, mock_bot): | |
| docs = [MagicMock(metadata={"district": "Hoima"})] | |
| result = mock_bot._post_relaxation_relevance_check( | |
| docs, set(), {"district": ["Gulu"]} | |
| ) | |
| assert result["relevant"] is True | |
| # --------------------------------------------------------------------------- | |
| # Unit tests β district priority over source (no network) | |
| # --------------------------------------------------------------------------- | |
| class TestDistrictSourcePriority: | |
| def mock_bot(self): | |
| bot = _StubChatbot() | |
| bot.district_whitelist = ["Gulu", "Jinja", "Kampala"] | |
| bot.source_whitelist = ["Local Government", "Hospital"] | |
| bot.year_whitelist = ["2020", "2021", "2022", "2023", "2024", "2025"] | |
| return bot | |
| def test_district_with_auto_source_drops_source(self, mock_bot): | |
| ctx = QueryContext( | |
| has_district=True, extracted_district="Gulu", | |
| has_source=False, extracted_source="Local Government", | |
| ) | |
| filters, anchored = mock_bot._build_filters(ctx) | |
| assert "district" in filters | |
| assert "sources" not in filters | |
| assert "district" in anchored | |
| def test_district_with_explicit_source_keeps_both(self, mock_bot): | |
| ctx = QueryContext( | |
| has_district=True, extracted_district="Gulu", | |
| has_source=True, extracted_source="Local Government", | |
| ) | |
| filters, anchored = mock_bot._build_filters(ctx) | |
| assert "district" in filters | |
| assert "sources" in filters | |
| assert "district" in anchored | |
| assert "sources" in anchored | |
| def test_district_with_ui_source_keeps_both(self, mock_bot): | |
| ctx = QueryContext( | |
| has_district=True, extracted_district="Gulu", | |
| ui_filters={"sources": ["Hospital"]}, | |
| ) | |
| filters, anchored = mock_bot._build_filters(ctx) | |
| assert "district" in filters | |
| assert "sources" in filters | |
| assert "sources" in anchored | |
| # --------------------------------------------------------------------------- | |
| # Unit tests β source name normalization (no network) | |
| # --------------------------------------------------------------------------- | |
| class TestSourceNormalization: | |
| def mock_bot(self): | |
| bot = _StubChatbot() | |
| bot.district_whitelist = ["Gulu"] | |
| bot.source_whitelist = [ | |
| "Ministry, Department and Agency", "Hospital", | |
| "Local Government", "Consolidated", | |
| ] | |
| bot.year_whitelist = ["2020", "2021", "2022", "2023", "2024", "2025"] | |
| return bot | |
| def test_case_mismatch_normalized(self, mock_bot): | |
| """LLM returns 'And' but Qdrant has 'and' β should be corrected.""" | |
| ctx = QueryContext( | |
| has_source=True, | |
| extracted_source="Ministry, Department And Agency", | |
| ) | |
| filters, _ = mock_bot._build_filters(ctx) | |
| assert filters["sources"] == ["Ministry, Department and Agency"] | |
| def test_already_correct_stays(self, mock_bot): | |
| ctx = QueryContext( | |
| has_source=True, | |
| extracted_source="Hospital", | |
| ) | |
| filters, _ = mock_bot._build_filters(ctx) | |
| assert filters["sources"] == ["Hospital"] | |
| def test_unknown_source_dropped_by_validation(self, mock_bot): | |
| """Unknown source values are stripped by _validate_filter_values. | |
| Previously this test asserted that an unknown source "passes through" | |
| as-is; that behaviour was changed when _validate_filter_values was | |
| added to guard against invalid Qdrant filter values. The current | |
| (correct) behaviour: unknown values are removed; if all values for | |
| a dimension are unknown, the entire filter dimension is dropped. | |
| """ | |
| ctx = QueryContext( | |
| has_source=True, | |
| extracted_source="something new", | |
| ) | |
| filters, _ = mock_bot._build_filters(ctx) | |
| assert "sources" not in filters | |
| # --------------------------------------------------------------------------- | |
| # Unit tests β resolver agent extensions (no network) | |
| # --------------------------------------------------------------------------- | |
| class TestResolverAgentExtensions: | |
| """The resolver agent answers metadata-shaped questions without LLM. | |
| These tests use a stub vectorstore so we can verify the resolver | |
| dispatches correctly and produces the expected payload shapes. | |
| """ | |
| def mock_bot(self): | |
| bot = _StubChatbot() | |
| bot.district_whitelist = ["Gulu", "Lira", "Mbale", "Pader", "Jinja"] | |
| bot.year_whitelist = ["2020", "2021", "2022", "2023", "2024", "2025"] | |
| bot.source_whitelist = [ | |
| "Consolidated", "Hospital", "Local Government", | |
| "Ministry, Department and Agency", "Project", "Value for Money", | |
| ] | |
| bot.district_doc_counts = { | |
| "Gulu": 50, "Lira": 30, "Mbale": 80, "Pader": 10, "Jinja": 65, | |
| } | |
| bot.year_doc_counts = { | |
| "2020": 100, "2021": 120, "2022": 180, "2023": 200, "2024": 90, | |
| } | |
| bot.source_doc_counts = { | |
| "Local Government": 500, | |
| "Ministry, Department and Agency": 300, | |
| "Hospital": 80, | |
| } | |
| bot.source_year_coverage = { | |
| "Local Government": {"2020": 100, "2022": 200, "2024": 90}, | |
| } | |
| bot.district_year_coverage = { | |
| "Gulu": {"2020": 10, "2022": 25, "2023": 15}, | |
| "Lira": {"2021": 12, "2023": 18}, | |
| "Mbale": {"2020": 30, "2024": 50}, | |
| } | |
| bot.district_source_coverage = { | |
| "Gulu": {"Local Government": 40, "Hospital": 10}, | |
| } | |
| # Stub vectorstore with a count() that returns deterministic values | |
| client = MagicMock() | |
| client.count = MagicMock(return_value=MagicMock(count=42)) | |
| vs = MagicMock(_client=client, collection_name="test-collection") | |
| bot._get_vectorstore = lambda: vs | |
| return bot | |
| def _make_state(self, query: str, ctx_kwargs: dict = None): | |
| ctx = QueryContext(**(ctx_kwargs or {})) | |
| return { | |
| "current_query": query, | |
| "query_context": ctx, | |
| "agent_logs": [], | |
| "resolution_attempted": False, | |
| "resolution_result": None, | |
| } | |
| def test_top_districts(self, mock_bot): | |
| """[unit] Resolver dispatches on 'biggest' and returns the | |
| pre-cached district_doc_counts sorted descending. Mocked | |
| vectorstore β verifies our logic, not Qdrant.""" | |
| state = self._make_state("biggest districts overall") | |
| out = mock_bot._resolver_agent(state) | |
| assert "top_districts" in out["resolution_result"] | |
| top = out["resolution_result"]["top_districts"] | |
| assert top[0]["district"] == "Mbale" | |
| assert top[0]["doc_count"] == 80 | |
| def test_bottom_districts(self, mock_bot): | |
| state = self._make_state("smallest districts in the corpus") | |
| out = mock_bot._resolver_agent(state) | |
| bottom = out["resolution_result"]["bottom_districts"] | |
| assert bottom[0]["district"] == "Pader" | |
| def test_top_sources(self, mock_bot): | |
| state = self._make_state("largest source category") | |
| out = mock_bot._resolver_agent(state) | |
| assert "top_sources" in out["resolution_result"] | |
| assert out["resolution_result"]["top_sources"][0]["source"] == "Local Government" | |
| def test_top_years(self, mock_bot): | |
| state = self._make_state("most documented year") | |
| out = mock_bot._resolver_agent(state) | |
| assert "top_years" in out["resolution_result"] | |
| assert out["resolution_result"]["top_years"][0]["year"] == "2023" | |
| def test_per_district_live_count(self, mock_bot): | |
| state = self._make_state( | |
| "audit findings in Gulu", | |
| ctx_kwargs={"has_district": True, "extracted_district": "Gulu"}, | |
| ) | |
| out = mock_bot._resolver_agent(state) | |
| assert out["resolution_result"]["district_counts"] == {"Gulu": 42} | |
| def test_combination_district_year(self, mock_bot): | |
| state = self._make_state( | |
| "Gulu 2022", | |
| ctx_kwargs={ | |
| "has_district": True, "extracted_district": "Gulu", | |
| "has_year": True, "extracted_year": "2022", | |
| }, | |
| ) | |
| out = mock_bot._resolver_agent(state) | |
| combo = out["resolution_result"]["combination_counts"] | |
| assert "district+year" in combo | |
| assert combo["district+year"][0] == {"a": "Gulu", "b": "2022", "doc_count": 42} | |
| def test_date_range_overall(self, mock_bot): | |
| state = self._make_state("latest reports across the corpus") | |
| out = mock_bot._resolver_agent(state) | |
| dr = out["resolution_result"]["date_range"] | |
| assert dr["overall"] == {"min_year": "2020", "max_year": "2024"} | |
| def test_latest_year_for_district(self, mock_bot): | |
| state = self._make_state( | |
| "give me whatever the latest you have on Gulu", | |
| ctx_kwargs={"has_district": True, "extracted_district": "Gulu"}, | |
| ) | |
| out = mock_bot._resolver_agent(state) | |
| assert out["resolution_result"]["latest_year_for_district"] == {"Gulu": "2023"} | |
| def test_earliest_year_for_district(self, mock_bot): | |
| state = self._make_state( | |
| "oldest record for Mbale", | |
| ctx_kwargs={"has_district": True, "extracted_district": "Mbale"}, | |
| ) | |
| out = mock_bot._resolver_agent(state) | |
| assert out["resolution_result"]["earliest_year_for_district"] == {"Mbale": "2020"} | |
| def test_coverage_report(self, mock_bot): | |
| state = self._make_state( | |
| "what do you have on Gulu?", | |
| ctx_kwargs={"has_district": True, "extracted_district": "Gulu"}, | |
| ) | |
| out = mock_bot._resolver_agent(state) | |
| cov = out["resolution_result"]["coverage"]["per_district"]["Gulu"] | |
| assert cov["total_docs"] == 50 | |
| assert set(cov["years"]) == {"2020", "2022", "2023"} | |
| assert set(cov["sources"]) == {"Local Government", "Hospital"} | |
| def test_no_vectorstore_safe(self, mock_bot): | |
| mock_bot._get_vectorstore = lambda: None | |
| state = self._make_state("biggest districts") | |
| out = mock_bot._resolver_agent(state) | |
| # Should not crash; resolution_result is an empty dict | |
| assert out["resolution_result"] == {} | |
| class TestResolverPostResolutionIntegration: | |
| """Verify the main_agent post-resolution pass injects resolver outputs | |
| back into context for the downstream RAG path. | |
| """ | |
| def mock_bot(self): | |
| bot = _StubChatbot() | |
| bot.district_whitelist = ["Gulu", "Lira", "Nwoya"] | |
| bot.year_whitelist = ["2020", "2021", "2022", "2023", "2024"] | |
| bot.source_whitelist = ["Local Government", "Hospital"] | |
| bot.district_doc_counts = {"Gulu": 10, "Lira": 20} | |
| bot.year_doc_counts = {} | |
| bot.source_doc_counts = {} | |
| bot.source_year_coverage = {} | |
| bot.district_year_coverage = {"Nwoya": {"2020": 5, "2022": 8}} | |
| bot.district_source_coverage = {} | |
| return bot | |
| def test_latest_year_for_district_injects_year(self, mock_bot): | |
| ctx = QueryContext( | |
| has_district=True, extracted_district="Nwoya", | |
| needs_metadata_lookup=True, | |
| ) | |
| state = { | |
| "current_query": "give me whatever the latest you have on Nwoya", | |
| "query_context": ctx, | |
| "agent_logs": [], | |
| "resolution_attempted": True, | |
| "resolution_result": { | |
| "latest_year_for_district": {"Nwoya": "2022"}, | |
| }, | |
| "final_response": None, | |
| } | |
| out = mock_bot._main_agent(state) | |
| # Year should have been injected from the resolver lookup | |
| assert out["query_context"].extracted_year == "2022" | |
| assert out["query_context"].has_year is True | |
| assert out["query_context"].needs_metadata_lookup is False | |
| assert "latest available year" in (out["query_context"].resolution_notes or "") | |
| # --------------------------------------------------------------------------- | |
| # Multi-turn conversation simulations for the resolver agent | |
| # --------------------------------------------------------------------------- | |
| class TestResolverMultiTurnFlow: | |
| """End-to-end simulations of multi-turn conversations that exercise | |
| the resolver agent + main-agent post-resolution handoff. | |
| These tests bypass the LLM (``_analyze_query_context``) so they run | |
| without any API quota. Each turn's ``query_context`` is supplied | |
| directly, mimicking what the LLM would have produced. The test then | |
| drives ``_resolver_agent`` + ``_main_agent`` and asserts the state | |
| after each step. | |
| """ | |
| def mock_bot(self): | |
| bot = _StubChatbot() | |
| bot.district_whitelist = ["Nwoya", "Gulu", "Lira", "Mbale"] | |
| bot.year_whitelist = ["2020", "2021", "2022", "2023", "2024"] | |
| bot.source_whitelist = ["Local Government", "Hospital", | |
| "Ministry, Department and Agency"] | |
| bot.district_doc_counts = {"Nwoya": 8, "Gulu": 50, "Lira": 30, "Mbale": 80} | |
| bot.year_doc_counts = {"2020": 100, "2021": 80, "2022": 60, "2023": 40} | |
| bot.source_doc_counts = {"Local Government": 200, | |
| "Ministry, Department and Agency": 150, | |
| "Hospital": 30} | |
| bot.source_year_coverage = { | |
| "Local Government": {"2020": 50, "2021": 40, "2022": 30, "2023": 20}, | |
| } | |
| bot.district_year_coverage = { | |
| "Nwoya": {"2020": 3, "2022": 5}, | |
| "Gulu": {"2020": 10, "2022": 25, "2023": 15}, | |
| "Mbale": {"2020": 30, "2024": 50}, | |
| } | |
| bot.district_source_coverage = { | |
| "Nwoya": {"Local Government": 8}, | |
| "Gulu": {"Local Government": 40, "Hospital": 10}, | |
| } | |
| # Deterministic count() stub for live combination queries | |
| client = MagicMock() | |
| client.count = MagicMock(return_value=MagicMock(count=7)) | |
| vs = MagicMock(_client=client, collection_name="test-collection") | |
| bot._get_vectorstore = lambda: vs | |
| return bot | |
| def _state(self, query: str, **ctx_kwargs): | |
| """Build a minimal state dict for a single turn.""" | |
| return { | |
| "current_query": query, | |
| "query_context": QueryContext(**ctx_kwargs), | |
| "agent_logs": [], | |
| "resolution_attempted": False, | |
| "resolution_result": None, | |
| "final_response": None, | |
| } | |
| # ----- Scenario 1: empty result for X 2024 β user asks "latest for X" --- | |
| def test_empty_combo_then_latest_for_district(self, mock_bot): | |
| """[unit, smoke] Multi-turn flow simulation: empty result for | |
| Nwoya 2024 β user asks 'latest for Nwoya' β resolver computes | |
| max(year) = 2022 β main_agent injects year=2022 β ready for | |
| RAG. Mocked vectorstore + supplied QueryContext; verifies our | |
| new resolver+main_agent handoff without an LLM call. | |
| T1: "audit findings for Nwoya in 2024" β 0 docs (Nwoya has no | |
| data for 2024; pre-validation would catch it upstream). | |
| T2: "okay, give me whatever the latest you have on Nwoya" β | |
| resolver computes max(year for Nwoya) = 2022 β injects | |
| year=2022 β RAG would now retrieve for Nwoya 2022. | |
| """ | |
| # --- Turn 2: user asks for "latest" with district=Nwoya, year unset --- | |
| state = self._state( | |
| "give me whatever is the latest you have on Nwoya", | |
| has_district=True, | |
| extracted_district="Nwoya", | |
| needs_metadata_lookup=True, | |
| ) | |
| # Step A β resolver runs first | |
| state = mock_bot._resolver_agent(state) | |
| assert state["resolution_attempted"] is True | |
| assert "latest_year_for_district" in state["resolution_result"] | |
| assert state["resolution_result"]["latest_year_for_district"] == {"Nwoya": "2022"} | |
| # Step B β main_agent post-resolution pass injects the year | |
| state = mock_bot._main_agent(state) | |
| ctx = state["query_context"] | |
| assert ctx.extracted_year == "2022" | |
| assert ctx.has_year is True | |
| assert ctx.needs_metadata_lookup is False | |
| # The resolution note explains what happened so the LLM can cite it | |
| assert "Nwoya" in (ctx.resolution_notes or "") | |
| # ----- Scenario 2: "biggest districts" β carries forward to follow-up --- | |
| def test_top_districts_then_followup_keeps_them(self, mock_bot): | |
| """Simulates: | |
| T1: "what are the audit issues for the biggest districts?" β | |
| resolver returns top 5 by doc count β main_agent injects | |
| them as extracted_district β RAG retrieves accordingly. | |
| T2: "now focus only on 2023" β LLM carries forward the | |
| districts from T1 (the LLM rule that EXPANDS or PRESERVES | |
| past filters; here we simulate the carry-forward by | |
| re-using the same district list with year added). | |
| """ | |
| # --- T1 --- | |
| t1 = self._state( | |
| "what are the audit issues for the biggest districts?", | |
| needs_metadata_lookup=True, | |
| ) | |
| t1 = mock_bot._resolver_agent(t1) | |
| assert "top_districts" in t1["resolution_result"] | |
| t1 = mock_bot._main_agent(t1) | |
| ctx1 = t1["query_context"] | |
| assert ctx1.has_district is True | |
| assert ctx1.extracted_district == ["Mbale", "Gulu", "Lira", "Nwoya"] | |
| assert ctx1.needs_metadata_lookup is False | |
| # --- T2: carry-forward simulated (this is what _analyze_query_context | |
| # would do based on previous_filters). Verify the resolver isn't | |
| # needed for this turn and the filter is preserved. | |
| t2 = self._state( | |
| "now focus only on 2023", | |
| has_district=True, | |
| extracted_district=ctx1.extracted_district, | |
| has_year=True, | |
| extracted_year="2023", | |
| needs_metadata_lookup=False, | |
| ) | |
| # No resolver call this turn; just build filters directly | |
| filters, anchored = mock_bot._build_filters(t2["query_context"]) | |
| assert set(filters["district"]) == {"Mbale", "Gulu", "Lira", "Nwoya"} | |
| assert filters["year"] == ["2023"] | |
| assert "year" in anchored | |
| # District should also be anchored since it was carried forward from | |
| # an LLM extraction in T1 (has_district=True) | |
| assert "district" in anchored | |
| # ----- Scenario 3: date-range question β resolver answers without LLM --- | |
| def test_date_range_for_source(self, mock_bot): | |
| """Simulates a single-turn metadata question: | |
| 'What years do you have for Local Government?' β | |
| resolver populates date_range.per_source with min/max years. | |
| This is the kind of question we want to answer purely from | |
| cached aggregates β no LLM, no RAG retrieval. | |
| """ | |
| state = self._state( | |
| "what is the earliest year you have for Local Government?", | |
| has_source=True, | |
| extracted_source="Local Government", | |
| ) | |
| state = mock_bot._resolver_agent(state) | |
| dr = state["resolution_result"]["date_range"] | |
| assert "per_source" in dr | |
| assert dr["per_source"]["Local Government"] == { | |
| "min_year": "2020", "max_year": "2023" | |
| } | |
| # The overall range should also be present | |
| assert dr["overall"] == {"min_year": "2020", "max_year": "2023"} | |
| # ----- Scenario 4: coverage question multi-step -------------------------- | |
| def test_coverage_then_year_specific(self, mock_bot): | |
| """T1: "what do you have on Gulu?" β coverage report. | |
| T2: simulated follow-up uses one of the years from the coverage | |
| report; verifies the system can chain. | |
| """ | |
| t1 = self._state( | |
| "what do you have on Gulu?", | |
| has_district=True, | |
| extracted_district="Gulu", | |
| ) | |
| t1 = mock_bot._resolver_agent(t1) | |
| cov = t1["resolution_result"]["coverage"]["per_district"]["Gulu"] | |
| assert cov["total_docs"] == 50 | |
| assert "2023" in cov["years"] | |
| # T2 β the user picks one of the surfaced years and asks a | |
| # substantive question. Filter building should succeed cleanly. | |
| t2_ctx = QueryContext( | |
| has_district=True, extracted_district="Gulu", | |
| has_year=True, extracted_year="2023", | |
| ) | |
| filters, anchored = mock_bot._build_filters(t2_ctx) | |
| assert filters["year"] == ["2023"] | |
| assert filters["district"] == ["Gulu"] | |
| assert {"year", "district"}.issubset(anchored) | |