audit_assistant / tests /test_agent_intelligence.py
akryldigital's picture
add unit, integration and smoke tests witht heir conftest.py
b1ebd9a verified
"""
Tests for multi-agent query analysis, filter building, metadata loading,
and resolution of ambiguous references.
Requires: QDRANT_URL, QDRANT_API_KEY, OPENAI_API_KEY in environment / .env
Run: python -m pytest tests/test_agent_intelligence.py -v
"""
import os
import sys
import json
import pytest
from pathlib import Path
from unittest.mock import MagicMock, patch
from dataclasses import asdict
sys.path.insert(0, str(Path(__file__).resolve().parent.parent))
from src.agents.base_multi_agent_chatbot import QueryContext, BaseMultiAgentChatbot
# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------
def _load_env():
env_file = Path(__file__).resolve().parent.parent / ".env"
if env_file.exists():
for line in env_file.read_text().splitlines():
line = line.strip()
if line and not line.startswith("#") and "=" in line:
k, v = line.split("=", 1)
os.environ.setdefault(k.strip(), v.strip().strip('"').strip("'"))
_load_env()
# ---------------------------------------------------------------------------
# Unit tests – QueryContext (no network, no LLM)
# ---------------------------------------------------------------------------
class TestQueryContext:
def test_all_year_passthrough(self):
ctx = QueryContext(has_year=True, extracted_year="ALL")
assert ctx.extracted_year == "ALL"
def test_all_source_passthrough(self):
ctx = QueryContext(has_source=True, extracted_source="ALL")
assert ctx.extracted_source == "ALL"
def test_all_district_passthrough(self):
ctx = QueryContext(has_district=True, extracted_district="ALL")
assert ctx.extracted_district == "ALL"
def test_single_district_title_case(self):
"""[unit] A lowercase district name passed to QueryContext is
normalised to title case in __post_init__."""
ctx = QueryContext(has_district=True, extracted_district="gulu")
assert ctx.extracted_district == "Gulu"
def test_multi_district_title_case(self):
ctx = QueryContext(
has_district=True, extracted_district=["gulu", "pader", "lira"]
)
assert ctx.extracted_district == ["Gulu", "Pader", "Lira"]
def test_single_source_title_case(self):
ctx = QueryContext(has_source=True, extracted_source="hospital")
assert ctx.extracted_source == "Hospital"
def test_none_stays_none(self):
ctx = QueryContext()
assert ctx.extracted_district is None
assert ctx.extracted_source is None
assert ctx.extracted_year is None
def test_resolution_notes_default(self):
ctx = QueryContext()
assert ctx.resolution_notes is None
assert ctx.needs_metadata_lookup is False
def test_resolution_notes_set(self):
ctx = QueryContext(
resolution_notes="Resolved 'biggest' to top 5 districts by doc count.",
needs_metadata_lookup=False,
)
assert "biggest" in ctx.resolution_notes
# ---------------------------------------------------------------------------
# Unit tests – _build_filters (no network, no LLM)
# ---------------------------------------------------------------------------
class _StubChatbot(BaseMultiAgentChatbot):
"""Concrete stub that satisfies abstract methods for unit tests."""
def __init__(self):
pass
def _perform_retrieval(self, query, filters):
return MagicMock(sources=[], answer="")
def _generate_conversational_response(self, *a, **kw):
return ""
def _generate_conversational_response_without_docs(self, *a, **kw):
return ""
class TestBuildFilters:
"""Test filter building logic using a stub chatbot."""
@pytest.fixture
def mock_bot(self):
bot = _StubChatbot()
# Whitelist must cover every district referenced by tests in this class,
# otherwise _validate_filter_values (correctly) strips unknown values.
bot.district_whitelist = [
"Gulu", "Pader", "Kampala", "Bushenyi", "Jinja",
"Amuru", "Kalungu", "Buikwe", "Mbale",
]
bot.year_whitelist = ["2020", "2021", "2022", "2023", "2024", "2025"]
bot.source_whitelist = [
"Consolidated", "Hospital", "Local Government",
"Ministry, Department and Agency", "Project", "Value for Money",
]
return bot
def test_no_filters(self, mock_bot):
ctx = QueryContext()
filters, anchored = mock_bot._build_filters(ctx)
assert filters == {}
assert anchored == set()
def test_year_filter(self, mock_bot):
ctx = QueryContext(has_year=True, extracted_year="2023")
filters, anchored = mock_bot._build_filters(ctx)
assert filters == {"year": ["2023"]}
assert "year" in anchored
def test_year_all_skips_filter(self, mock_bot):
ctx = QueryContext(has_year=True, extracted_year="ALL")
filters, _ = mock_bot._build_filters(ctx)
assert "year" not in filters
def test_district_all_skips_filter(self, mock_bot):
ctx = QueryContext(has_district=True, extracted_district="ALL")
filters, _ = mock_bot._build_filters(ctx)
assert "district" not in filters
def test_source_all_skips_filter(self, mock_bot):
ctx = QueryContext(has_source=True, extracted_source="ALL")
filters, _ = mock_bot._build_filters(ctx)
assert "sources" not in filters
def test_multi_year_filter(self, mock_bot):
ctx = QueryContext(has_year=True, extracted_year=["2022", "2023"])
filters, anchored = mock_bot._build_filters(ctx)
assert filters == {"year": ["2022", "2023"]}
assert "year" in anchored
def test_single_district_filter(self, mock_bot):
ctx = QueryContext(has_district=True, extracted_district="Gulu")
filters, anchored = mock_bot._build_filters(ctx)
assert "district" in filters
assert "Gulu" in filters["district"]
assert "district" in anchored
def test_multi_district_filter(self, mock_bot):
ctx = QueryContext(
has_district=True, extracted_district=["Gulu", "Pader"]
)
filters, _ = mock_bot._build_filters(ctx)
assert set(filters["district"]) == {"Gulu", "Pader"}
def test_source_filter(self, mock_bot):
ctx = QueryContext(has_source=True, extracted_source="Hospital")
filters, anchored = mock_bot._build_filters(ctx)
assert filters == {"sources": ["Hospital"]}
assert "sources" in anchored
def test_llm_extraction_overrides_stale_ui(self, mock_bot):
"""When LLM extracts a DIFFERENT year than sidebar, LLM wins (user changed context)."""
ctx = QueryContext(
has_year=True,
extracted_year="2020",
ui_filters={"years": ["2024"]},
)
filters, anchored = mock_bot._build_filters(ctx)
assert filters["year"] == ["2020"]
assert "year" in anchored
def test_filename_filter_short_circuits(self, mock_bot):
ctx = QueryContext(
has_year=True,
extracted_year="2023",
ui_filters={"filenames": ["report.pdf"]},
)
filters, anchored = mock_bot._build_filters(ctx)
assert filters == {"filenames": ["report.pdf"]}
assert "year" not in filters
assert "filenames" in anchored
def test_district_drops_auto_inferred_source(self, mock_bot):
"""[unit, regression] When district is present and source was
NOT explicitly mentioned, source should be dropped.
Regression: locks in the district-priority rule (without this,
adding source=Local Government on top of a district query
excludes VFM / Project audits that also cover the district).
"""
ctx = QueryContext(
has_year=True,
extracted_year="2024",
has_source=False,
extracted_source="Local Government",
has_district=True,
extracted_district="Gulu",
)
filters, anchored = mock_bot._build_filters(ctx)
assert filters["year"] == ["2024"]
assert "sources" not in filters
assert "Gulu" in filters["district"]
assert "district" in anchored
def test_district_keeps_explicit_source(self, mock_bot):
"""When district is present but source WAS explicitly mentioned, both are kept."""
ctx = QueryContext(
has_year=True,
extracted_year="2024",
has_source=True,
extracted_source="Local Government",
has_district=True,
extracted_district="Gulu",
)
filters, anchored = mock_bot._build_filters(ctx)
assert filters["year"] == ["2024"]
assert filters["sources"] == ["Local Government"]
assert "Gulu" in filters["district"]
assert "sources" in anchored
assert "district" in anchored
def test_anchored_keys_from_ui(self, mock_bot):
"""UI sidebar selections are always anchored."""
ctx = QueryContext(
ui_filters={"sources": ["Hospital"], "years": ["2024"]},
)
filters, anchored = mock_bot._build_filters(ctx)
assert "sources" in anchored
assert "year" in anchored
def test_llm_overrides_stale_sidebar_district(self, mock_bot):
"""When user changes district in conversation, LLM extraction overrides stale sidebar."""
ctx = QueryContext(
has_district=True,
extracted_district=["Bushenyi", "Amuru", "Kalungu", "Buikwe", "Mbale"],
has_year=True,
extracted_year="2023",
ui_filters={"districts": ["Jinja"], "years": ["2023"]},
)
filters, anchored = mock_bot._build_filters(ctx)
assert set(filters["district"]) == {"Bushenyi", "Amuru", "Kalungu", "Buikwe", "Mbale"}
assert "Jinja" not in filters["district"]
assert "district" in anchored
def test_llm_same_as_sidebar_uses_sidebar(self, mock_bot):
"""When LLM extraction matches sidebar, sidebar wins (no override)."""
ctx = QueryContext(
has_district=True,
extracted_district="Gulu",
ui_filters={"districts": ["Gulu"]},
)
filters, anchored = mock_bot._build_filters(ctx)
assert filters["district"] == ["Gulu"]
assert "district" in anchored
def test_llm_overrides_stale_sidebar_year(self, mock_bot):
"""When user mentions new years, LLM extraction overrides sidebar."""
ctx = QueryContext(
has_year=True,
extracted_year=["2023", "2025"],
ui_filters={"years": ["2023"]},
)
filters, anchored = mock_bot._build_filters(ctx)
assert set(filters["year"]) == {"2023", "2025"}
assert "year" in anchored
def test_no_has_flag_sidebar_wins(self, mock_bot):
"""When LLM did NOT detect a filter dimension, sidebar stays."""
ctx = QueryContext(
has_district=False,
extracted_district=None,
ui_filters={"districts": ["Jinja"]},
)
filters, anchored = mock_bot._build_filters(ctx)
assert filters["district"] == ["Jinja"]
assert "district" in anchored
# ---------------------------------------------------------------------------
# Unit tests – UGANDA_REGIONS
# ---------------------------------------------------------------------------
class TestUgandaRegions:
def test_regions_exist(self):
assert hasattr(BaseMultiAgentChatbot, "UGANDA_REGIONS")
regions = BaseMultiAgentChatbot.UGANDA_REGIONS
assert "Northern" in regions
assert "Eastern" in regions
assert "Western" in regions
assert "Central" in regions
assert "Karamoja" in regions
def test_gulu_is_northern(self):
assert "Gulu" in BaseMultiAgentChatbot.UGANDA_REGIONS["Northern"]
def test_kampala_is_central(self):
assert "Kampala" in BaseMultiAgentChatbot.UGANDA_REGIONS["Central"]
def test_moroto_is_karamoja(self):
assert "Moroto" in BaseMultiAgentChatbot.UGANDA_REGIONS["Karamoja"]
def test_no_duplicate_across_regions(self):
all_dists = []
for dists in BaseMultiAgentChatbot.UGANDA_REGIONS.values():
all_dists.extend(dists)
assert len(all_dists) == len(set(all_dists)), "Duplicate district in UGANDA_REGIONS"
# ---------------------------------------------------------------------------
# Integration tests – require network + Qdrant + OpenAI
# ---------------------------------------------------------------------------
def _skip_if_no_env():
for var in ("QDRANT_URL", "QDRANT_API_KEY", "OPENAI_API_KEY"):
if not os.environ.get(var):
pytest.skip(f"{var} not set")
@pytest.fixture(scope="module")
def chatbot():
_skip_if_no_env()
from src.agents.multi_agent_chatbot import MultiAgentRAGChatbot
bot = MultiAgentRAGChatbot()
return bot
@pytest.mark.live_qdrant
class TestMetadataLoading:
@pytest.mark.smoke
def test_db_metadata_context_populated(self, chatbot):
"""[integration, smoke] Booting the chatbot must produce a
non-empty live-metadata context string from Qdrant. Smoke
because failure here means the whole Qdrant integration is
broken."""
assert chatbot.db_metadata_context is not None
assert len(chatbot.db_metadata_context) > 100
def test_year_whitelist_from_qdrant(self, chatbot):
assert "2020" in chatbot.year_whitelist
assert "2024" in chatbot.year_whitelist
assert "2025" in chatbot.year_whitelist
def test_district_doc_counts_populated(self, chatbot):
assert len(chatbot.district_doc_counts) > 50
def test_latest_data_year(self, chatbot):
assert chatbot.latest_data_year == "2025"
def test_regions_in_context(self, chatbot):
assert "Northern" in chatbot.db_metadata_context
assert "Central" in chatbot.db_metadata_context
@pytest.mark.live_llm
@pytest.mark.live_qdrant
class TestQueryAnalysisLLM:
"""
Integration tests that call the real LLM (gpt-4.1) for query analysis.
Each test validates a specific capability of the analysis prompt.
Marked ``live_llm`` β€” automatically skipped when OpenAI quota is
unavailable (see ``tests/conftest.py``).
"""
@pytest.mark.smoke
def test_all_years_extraction(self, chatbot):
"""[integration, smoke, quality] LLM correctly interprets the
phrase 'for all years' as the sentinel 'ALL' (not as a year
list). Quality: depends on the model understanding our prompt
contract."""
ctx = chatbot._analyze_query_context(
"What are the main audit findings for all years?", [], {}
)
assert ctx.needs_follow_up is False
assert ctx.extracted_year == "ALL"
def test_greeting_triggers_follow_up(self, chatbot):
ctx = chatbot._analyze_query_context("hello", [], {})
assert ctx.needs_follow_up is True
assert ctx.follow_up_question is not None
def test_last_n_years(self, chatbot):
ctx = chatbot._analyze_query_context(
"Revenue performance in the last 3 years", [], {}
)
assert ctx.needs_follow_up is False
if ctx.extracted_year and ctx.extracted_year != "ALL":
years = ctx.extracted_year if isinstance(ctx.extracted_year, list) else [ctx.extracted_year]
assert len(years) >= 2
def test_explicit_district(self, chatbot):
ctx = chatbot._analyze_query_context(
"What issues were found in Gulu?", [], {}
)
assert ctx.needs_follow_up is False
assert ctx.has_district is True
district = ctx.extracted_district
if isinstance(district, list):
assert any("Gulu" in d for d in district)
else:
assert "Gulu" in str(district)
def test_source_alias_ministries(self, chatbot):
ctx = chatbot._analyze_query_context(
"What are the audit findings for ministries?", [], {}
)
assert ctx.needs_follow_up is False
assert ctx.has_source is True
src = ctx.extracted_source
if isinstance(src, list):
assert any("Ministry" in s for s in src)
else:
assert "Ministry" in str(src)
def test_biggest_districts_resolution(self, chatbot):
ctx = chatbot._analyze_query_context(
"Audit findings for the biggest districts", [], {}
)
assert ctx.needs_follow_up is False
assert ctx.has_district is True
if ctx.resolution_notes:
assert "biggest" in ctx.resolution_notes.lower() or "top" in ctx.resolution_notes.lower() or "most" in ctx.resolution_notes.lower() or "document" in ctx.resolution_notes.lower()
def test_northern_uganda_resolution(self, chatbot):
ctx = chatbot._analyze_query_context(
"Revenue issues in northern Uganda for all years", [], {}
)
assert ctx.needs_follow_up is False
districts = ctx.extracted_district
if isinstance(districts, list):
northern = BaseMultiAgentChatbot.UGANDA_REGIONS["Northern"]
northern_lower = {d.lower() for d in northern}
found = [d for d in districts if d.lower() in northern_lower]
assert len(found) >= 2, f"Expected Northern districts, got {districts}"
def test_substantive_question_no_follow_up(self, chatbot):
ctx = chatbot._analyze_query_context(
"What are the top challenges in budget allocation?", [], {}
)
assert ctx.needs_follow_up is False
@pytest.mark.live_qdrant
class TestFilterQueryExecution:
"""Integration tests that verify Qdrant filter queries work with real data."""
def test_year_filter_built_correctly(self, chatbot):
ctx = QueryContext(has_year=True, extracted_year="2024")
filters, anchored = chatbot._build_filters(ctx)
assert filters == {"year": ["2024"]}
assert "year" in anchored
def test_district_filter_built_correctly(self, chatbot):
ctx = QueryContext(has_district=True, extracted_district="Gulu")
filters, anchored = chatbot._build_filters(ctx)
assert "district" in filters
assert "Gulu" in filters["district"]
assert "district" in anchored
def test_all_year_produces_no_filter(self, chatbot):
ctx = QueryContext(has_year=True, extracted_year="ALL")
filters, _ = chatbot._build_filters(ctx)
assert "year" not in filters
def test_source_filter_built_correctly(self, chatbot):
ctx = QueryContext(has_source=True, extracted_source="Hospital")
filters, anchored = chatbot._build_filters(ctx)
assert filters == {"sources": ["Hospital"]}
assert "sources" in anchored
def test_district_drops_auto_source_live(self, chatbot):
"""Integration: district without explicit source should drop auto-inferred source."""
ctx = QueryContext(
has_district=True, extracted_district="Gulu",
has_source=False, extracted_source="Local Government",
)
filters, anchored = chatbot._build_filters(ctx)
assert "district" in filters
assert "sources" not in filters
assert "district" in anchored
@pytest.mark.xfail(reason="Pipeline reranker returns 0 docs in test context β€” pre-existing issue")
def test_unfiltered_retrieval_returns_results(self, chatbot):
result = chatbot._perform_retrieval("audit findings and recommendations", {})
assert len(result.sources) > 0
# ---------------------------------------------------------------------------
# Integration tests – prevalidation (requires Qdrant)
# ---------------------------------------------------------------------------
@pytest.mark.live_qdrant
class TestPrevalidation:
"""Test the _prevalidate_filters mechanism against real Qdrant data."""
def test_valid_combo_is_ok(self, chatbot):
"""A filter combo that exists should return ok=True."""
filters = {"year": ["2024"]}
diagnosis = chatbot._prevalidate_filters(filters, set())
assert diagnosis["ok"] is True
assert diagnosis["total_count"] > 0
def test_gulu_2023_gap(self, chatbot):
"""Gulu + 2023 should be detected as a data gap."""
filters = {"district": ["Gulu"], "year": ["2023"]}
anchored = {"district", "year"}
diagnosis = chatbot._prevalidate_filters(filters, anchored)
assert diagnosis["ok"] is False
assert diagnosis["suggestion"] is not None
assert len(diagnosis["gap_dimensions"]) > 0
def test_jinja_2023_exists(self, chatbot):
"""Jinja + 2023 should have data."""
filters = {"district": ["Jinja"], "year": ["2023"]}
diagnosis = chatbot._prevalidate_filters(filters, set())
assert diagnosis["ok"] is True
assert diagnosis["total_count"] > 0
def test_nonexistent_year(self, chatbot):
"""A year with no data should fail individual dim check."""
filters = {"year": ["1999"]}
diagnosis = chatbot._prevalidate_filters(filters, {"year"})
assert diagnosis["ok"] is False
assert any("1999" in str(d.get("value", "")) for d in diagnosis["gap_dimensions"])
def test_empty_filters_ok(self, chatbot):
"""No filters should always be ok."""
diagnosis = chatbot._prevalidate_filters({}, set())
assert diagnosis["ok"] is True
# ---------------------------------------------------------------------------
# Unit tests – post-relaxation relevance check (no network)
# ---------------------------------------------------------------------------
class TestPostRelaxationRelevanceCheck:
@pytest.fixture
def mock_bot(self):
bot = _StubChatbot()
bot.district_whitelist = ["Gulu", "Jinja"]
bot.year_whitelist = ["2020", "2021", "2022", "2023", "2024", "2025"]
bot.source_whitelist = ["Local Government", "Hospital"]
return bot
def test_relevant_docs(self, mock_bot):
docs = [MagicMock(metadata={"district": "Gulu", "year": "2023"})]
result = mock_bot._post_relaxation_relevance_check(
docs, {"district"}, {"district": ["Gulu"]}
)
assert result["relevant"] is True
def test_irrelevant_docs(self, mock_bot):
docs = [
MagicMock(metadata={"district": "Hoima", "year": "2023"}),
MagicMock(metadata={"district": "Kumi", "year": "2023"}),
]
result = mock_bot._post_relaxation_relevance_check(
docs, {"district"}, {"district": ["Gulu"]}
)
assert result["relevant"] is False
assert "Gulu" in result["details"]
def test_no_anchored_keys(self, mock_bot):
docs = [MagicMock(metadata={"district": "Hoima"})]
result = mock_bot._post_relaxation_relevance_check(
docs, set(), {"district": ["Gulu"]}
)
assert result["relevant"] is True
# ---------------------------------------------------------------------------
# Unit tests – district priority over source (no network)
# ---------------------------------------------------------------------------
class TestDistrictSourcePriority:
@pytest.fixture
def mock_bot(self):
bot = _StubChatbot()
bot.district_whitelist = ["Gulu", "Jinja", "Kampala"]
bot.source_whitelist = ["Local Government", "Hospital"]
bot.year_whitelist = ["2020", "2021", "2022", "2023", "2024", "2025"]
return bot
def test_district_with_auto_source_drops_source(self, mock_bot):
ctx = QueryContext(
has_district=True, extracted_district="Gulu",
has_source=False, extracted_source="Local Government",
)
filters, anchored = mock_bot._build_filters(ctx)
assert "district" in filters
assert "sources" not in filters
assert "district" in anchored
def test_district_with_explicit_source_keeps_both(self, mock_bot):
ctx = QueryContext(
has_district=True, extracted_district="Gulu",
has_source=True, extracted_source="Local Government",
)
filters, anchored = mock_bot._build_filters(ctx)
assert "district" in filters
assert "sources" in filters
assert "district" in anchored
assert "sources" in anchored
def test_district_with_ui_source_keeps_both(self, mock_bot):
ctx = QueryContext(
has_district=True, extracted_district="Gulu",
ui_filters={"sources": ["Hospital"]},
)
filters, anchored = mock_bot._build_filters(ctx)
assert "district" in filters
assert "sources" in filters
assert "sources" in anchored
# ---------------------------------------------------------------------------
# Unit tests – source name normalization (no network)
# ---------------------------------------------------------------------------
class TestSourceNormalization:
@pytest.fixture
def mock_bot(self):
bot = _StubChatbot()
bot.district_whitelist = ["Gulu"]
bot.source_whitelist = [
"Ministry, Department and Agency", "Hospital",
"Local Government", "Consolidated",
]
bot.year_whitelist = ["2020", "2021", "2022", "2023", "2024", "2025"]
return bot
def test_case_mismatch_normalized(self, mock_bot):
"""LLM returns 'And' but Qdrant has 'and' β€” should be corrected."""
ctx = QueryContext(
has_source=True,
extracted_source="Ministry, Department And Agency",
)
filters, _ = mock_bot._build_filters(ctx)
assert filters["sources"] == ["Ministry, Department and Agency"]
def test_already_correct_stays(self, mock_bot):
ctx = QueryContext(
has_source=True,
extracted_source="Hospital",
)
filters, _ = mock_bot._build_filters(ctx)
assert filters["sources"] == ["Hospital"]
def test_unknown_source_dropped_by_validation(self, mock_bot):
"""Unknown source values are stripped by _validate_filter_values.
Previously this test asserted that an unknown source "passes through"
as-is; that behaviour was changed when _validate_filter_values was
added to guard against invalid Qdrant filter values. The current
(correct) behaviour: unknown values are removed; if all values for
a dimension are unknown, the entire filter dimension is dropped.
"""
ctx = QueryContext(
has_source=True,
extracted_source="something new",
)
filters, _ = mock_bot._build_filters(ctx)
assert "sources" not in filters
# ---------------------------------------------------------------------------
# Unit tests – resolver agent extensions (no network)
# ---------------------------------------------------------------------------
class TestResolverAgentExtensions:
"""The resolver agent answers metadata-shaped questions without LLM.
These tests use a stub vectorstore so we can verify the resolver
dispatches correctly and produces the expected payload shapes.
"""
@pytest.fixture
def mock_bot(self):
bot = _StubChatbot()
bot.district_whitelist = ["Gulu", "Lira", "Mbale", "Pader", "Jinja"]
bot.year_whitelist = ["2020", "2021", "2022", "2023", "2024", "2025"]
bot.source_whitelist = [
"Consolidated", "Hospital", "Local Government",
"Ministry, Department and Agency", "Project", "Value for Money",
]
bot.district_doc_counts = {
"Gulu": 50, "Lira": 30, "Mbale": 80, "Pader": 10, "Jinja": 65,
}
bot.year_doc_counts = {
"2020": 100, "2021": 120, "2022": 180, "2023": 200, "2024": 90,
}
bot.source_doc_counts = {
"Local Government": 500,
"Ministry, Department and Agency": 300,
"Hospital": 80,
}
bot.source_year_coverage = {
"Local Government": {"2020": 100, "2022": 200, "2024": 90},
}
bot.district_year_coverage = {
"Gulu": {"2020": 10, "2022": 25, "2023": 15},
"Lira": {"2021": 12, "2023": 18},
"Mbale": {"2020": 30, "2024": 50},
}
bot.district_source_coverage = {
"Gulu": {"Local Government": 40, "Hospital": 10},
}
# Stub vectorstore with a count() that returns deterministic values
client = MagicMock()
client.count = MagicMock(return_value=MagicMock(count=42))
vs = MagicMock(_client=client, collection_name="test-collection")
bot._get_vectorstore = lambda: vs
return bot
def _make_state(self, query: str, ctx_kwargs: dict = None):
ctx = QueryContext(**(ctx_kwargs or {}))
return {
"current_query": query,
"query_context": ctx,
"agent_logs": [],
"resolution_attempted": False,
"resolution_result": None,
}
def test_top_districts(self, mock_bot):
"""[unit] Resolver dispatches on 'biggest' and returns the
pre-cached district_doc_counts sorted descending. Mocked
vectorstore β€” verifies our logic, not Qdrant."""
state = self._make_state("biggest districts overall")
out = mock_bot._resolver_agent(state)
assert "top_districts" in out["resolution_result"]
top = out["resolution_result"]["top_districts"]
assert top[0]["district"] == "Mbale"
assert top[0]["doc_count"] == 80
def test_bottom_districts(self, mock_bot):
state = self._make_state("smallest districts in the corpus")
out = mock_bot._resolver_agent(state)
bottom = out["resolution_result"]["bottom_districts"]
assert bottom[0]["district"] == "Pader"
def test_top_sources(self, mock_bot):
state = self._make_state("largest source category")
out = mock_bot._resolver_agent(state)
assert "top_sources" in out["resolution_result"]
assert out["resolution_result"]["top_sources"][0]["source"] == "Local Government"
def test_top_years(self, mock_bot):
state = self._make_state("most documented year")
out = mock_bot._resolver_agent(state)
assert "top_years" in out["resolution_result"]
assert out["resolution_result"]["top_years"][0]["year"] == "2023"
def test_per_district_live_count(self, mock_bot):
state = self._make_state(
"audit findings in Gulu",
ctx_kwargs={"has_district": True, "extracted_district": "Gulu"},
)
out = mock_bot._resolver_agent(state)
assert out["resolution_result"]["district_counts"] == {"Gulu": 42}
def test_combination_district_year(self, mock_bot):
state = self._make_state(
"Gulu 2022",
ctx_kwargs={
"has_district": True, "extracted_district": "Gulu",
"has_year": True, "extracted_year": "2022",
},
)
out = mock_bot._resolver_agent(state)
combo = out["resolution_result"]["combination_counts"]
assert "district+year" in combo
assert combo["district+year"][0] == {"a": "Gulu", "b": "2022", "doc_count": 42}
def test_date_range_overall(self, mock_bot):
state = self._make_state("latest reports across the corpus")
out = mock_bot._resolver_agent(state)
dr = out["resolution_result"]["date_range"]
assert dr["overall"] == {"min_year": "2020", "max_year": "2024"}
def test_latest_year_for_district(self, mock_bot):
state = self._make_state(
"give me whatever the latest you have on Gulu",
ctx_kwargs={"has_district": True, "extracted_district": "Gulu"},
)
out = mock_bot._resolver_agent(state)
assert out["resolution_result"]["latest_year_for_district"] == {"Gulu": "2023"}
def test_earliest_year_for_district(self, mock_bot):
state = self._make_state(
"oldest record for Mbale",
ctx_kwargs={"has_district": True, "extracted_district": "Mbale"},
)
out = mock_bot._resolver_agent(state)
assert out["resolution_result"]["earliest_year_for_district"] == {"Mbale": "2020"}
def test_coverage_report(self, mock_bot):
state = self._make_state(
"what do you have on Gulu?",
ctx_kwargs={"has_district": True, "extracted_district": "Gulu"},
)
out = mock_bot._resolver_agent(state)
cov = out["resolution_result"]["coverage"]["per_district"]["Gulu"]
assert cov["total_docs"] == 50
assert set(cov["years"]) == {"2020", "2022", "2023"}
assert set(cov["sources"]) == {"Local Government", "Hospital"}
def test_no_vectorstore_safe(self, mock_bot):
mock_bot._get_vectorstore = lambda: None
state = self._make_state("biggest districts")
out = mock_bot._resolver_agent(state)
# Should not crash; resolution_result is an empty dict
assert out["resolution_result"] == {}
class TestResolverPostResolutionIntegration:
"""Verify the main_agent post-resolution pass injects resolver outputs
back into context for the downstream RAG path.
"""
@pytest.fixture
def mock_bot(self):
bot = _StubChatbot()
bot.district_whitelist = ["Gulu", "Lira", "Nwoya"]
bot.year_whitelist = ["2020", "2021", "2022", "2023", "2024"]
bot.source_whitelist = ["Local Government", "Hospital"]
bot.district_doc_counts = {"Gulu": 10, "Lira": 20}
bot.year_doc_counts = {}
bot.source_doc_counts = {}
bot.source_year_coverage = {}
bot.district_year_coverage = {"Nwoya": {"2020": 5, "2022": 8}}
bot.district_source_coverage = {}
return bot
def test_latest_year_for_district_injects_year(self, mock_bot):
ctx = QueryContext(
has_district=True, extracted_district="Nwoya",
needs_metadata_lookup=True,
)
state = {
"current_query": "give me whatever the latest you have on Nwoya",
"query_context": ctx,
"agent_logs": [],
"resolution_attempted": True,
"resolution_result": {
"latest_year_for_district": {"Nwoya": "2022"},
},
"final_response": None,
}
out = mock_bot._main_agent(state)
# Year should have been injected from the resolver lookup
assert out["query_context"].extracted_year == "2022"
assert out["query_context"].has_year is True
assert out["query_context"].needs_metadata_lookup is False
assert "latest available year" in (out["query_context"].resolution_notes or "")
# ---------------------------------------------------------------------------
# Multi-turn conversation simulations for the resolver agent
# ---------------------------------------------------------------------------
class TestResolverMultiTurnFlow:
"""End-to-end simulations of multi-turn conversations that exercise
the resolver agent + main-agent post-resolution handoff.
These tests bypass the LLM (``_analyze_query_context``) so they run
without any API quota. Each turn's ``query_context`` is supplied
directly, mimicking what the LLM would have produced. The test then
drives ``_resolver_agent`` + ``_main_agent`` and asserts the state
after each step.
"""
@pytest.fixture
def mock_bot(self):
bot = _StubChatbot()
bot.district_whitelist = ["Nwoya", "Gulu", "Lira", "Mbale"]
bot.year_whitelist = ["2020", "2021", "2022", "2023", "2024"]
bot.source_whitelist = ["Local Government", "Hospital",
"Ministry, Department and Agency"]
bot.district_doc_counts = {"Nwoya": 8, "Gulu": 50, "Lira": 30, "Mbale": 80}
bot.year_doc_counts = {"2020": 100, "2021": 80, "2022": 60, "2023": 40}
bot.source_doc_counts = {"Local Government": 200,
"Ministry, Department and Agency": 150,
"Hospital": 30}
bot.source_year_coverage = {
"Local Government": {"2020": 50, "2021": 40, "2022": 30, "2023": 20},
}
bot.district_year_coverage = {
"Nwoya": {"2020": 3, "2022": 5},
"Gulu": {"2020": 10, "2022": 25, "2023": 15},
"Mbale": {"2020": 30, "2024": 50},
}
bot.district_source_coverage = {
"Nwoya": {"Local Government": 8},
"Gulu": {"Local Government": 40, "Hospital": 10},
}
# Deterministic count() stub for live combination queries
client = MagicMock()
client.count = MagicMock(return_value=MagicMock(count=7))
vs = MagicMock(_client=client, collection_name="test-collection")
bot._get_vectorstore = lambda: vs
return bot
def _state(self, query: str, **ctx_kwargs):
"""Build a minimal state dict for a single turn."""
return {
"current_query": query,
"query_context": QueryContext(**ctx_kwargs),
"agent_logs": [],
"resolution_attempted": False,
"resolution_result": None,
"final_response": None,
}
# ----- Scenario 1: empty result for X 2024 β†’ user asks "latest for X" ---
@pytest.mark.smoke
def test_empty_combo_then_latest_for_district(self, mock_bot):
"""[unit, smoke] Multi-turn flow simulation: empty result for
Nwoya 2024 β†’ user asks 'latest for Nwoya' β†’ resolver computes
max(year) = 2022 β†’ main_agent injects year=2022 β†’ ready for
RAG. Mocked vectorstore + supplied QueryContext; verifies our
new resolver+main_agent handoff without an LLM call.
T1: "audit findings for Nwoya in 2024" β†’ 0 docs (Nwoya has no
data for 2024; pre-validation would catch it upstream).
T2: "okay, give me whatever the latest you have on Nwoya" β†’
resolver computes max(year for Nwoya) = 2022 β†’ injects
year=2022 β†’ RAG would now retrieve for Nwoya 2022.
"""
# --- Turn 2: user asks for "latest" with district=Nwoya, year unset ---
state = self._state(
"give me whatever is the latest you have on Nwoya",
has_district=True,
extracted_district="Nwoya",
needs_metadata_lookup=True,
)
# Step A β€” resolver runs first
state = mock_bot._resolver_agent(state)
assert state["resolution_attempted"] is True
assert "latest_year_for_district" in state["resolution_result"]
assert state["resolution_result"]["latest_year_for_district"] == {"Nwoya": "2022"}
# Step B β€” main_agent post-resolution pass injects the year
state = mock_bot._main_agent(state)
ctx = state["query_context"]
assert ctx.extracted_year == "2022"
assert ctx.has_year is True
assert ctx.needs_metadata_lookup is False
# The resolution note explains what happened so the LLM can cite it
assert "Nwoya" in (ctx.resolution_notes or "")
# ----- Scenario 2: "biggest districts" β†’ carries forward to follow-up ---
def test_top_districts_then_followup_keeps_them(self, mock_bot):
"""Simulates:
T1: "what are the audit issues for the biggest districts?" β†’
resolver returns top 5 by doc count β†’ main_agent injects
them as extracted_district β†’ RAG retrieves accordingly.
T2: "now focus only on 2023" β†’ LLM carries forward the
districts from T1 (the LLM rule that EXPANDS or PRESERVES
past filters; here we simulate the carry-forward by
re-using the same district list with year added).
"""
# --- T1 ---
t1 = self._state(
"what are the audit issues for the biggest districts?",
needs_metadata_lookup=True,
)
t1 = mock_bot._resolver_agent(t1)
assert "top_districts" in t1["resolution_result"]
t1 = mock_bot._main_agent(t1)
ctx1 = t1["query_context"]
assert ctx1.has_district is True
assert ctx1.extracted_district == ["Mbale", "Gulu", "Lira", "Nwoya"]
assert ctx1.needs_metadata_lookup is False
# --- T2: carry-forward simulated (this is what _analyze_query_context
# would do based on previous_filters). Verify the resolver isn't
# needed for this turn and the filter is preserved.
t2 = self._state(
"now focus only on 2023",
has_district=True,
extracted_district=ctx1.extracted_district,
has_year=True,
extracted_year="2023",
needs_metadata_lookup=False,
)
# No resolver call this turn; just build filters directly
filters, anchored = mock_bot._build_filters(t2["query_context"])
assert set(filters["district"]) == {"Mbale", "Gulu", "Lira", "Nwoya"}
assert filters["year"] == ["2023"]
assert "year" in anchored
# District should also be anchored since it was carried forward from
# an LLM extraction in T1 (has_district=True)
assert "district" in anchored
# ----- Scenario 3: date-range question β†’ resolver answers without LLM ---
def test_date_range_for_source(self, mock_bot):
"""Simulates a single-turn metadata question:
'What years do you have for Local Government?' β†’
resolver populates date_range.per_source with min/max years.
This is the kind of question we want to answer purely from
cached aggregates β€” no LLM, no RAG retrieval.
"""
state = self._state(
"what is the earliest year you have for Local Government?",
has_source=True,
extracted_source="Local Government",
)
state = mock_bot._resolver_agent(state)
dr = state["resolution_result"]["date_range"]
assert "per_source" in dr
assert dr["per_source"]["Local Government"] == {
"min_year": "2020", "max_year": "2023"
}
# The overall range should also be present
assert dr["overall"] == {"min_year": "2020", "max_year": "2023"}
# ----- Scenario 4: coverage question multi-step --------------------------
def test_coverage_then_year_specific(self, mock_bot):
"""T1: "what do you have on Gulu?" β†’ coverage report.
T2: simulated follow-up uses one of the years from the coverage
report; verifies the system can chain.
"""
t1 = self._state(
"what do you have on Gulu?",
has_district=True,
extracted_district="Gulu",
)
t1 = mock_bot._resolver_agent(t1)
cov = t1["resolution_result"]["coverage"]["per_district"]["Gulu"]
assert cov["total_docs"] == 50
assert "2023" in cov["years"]
# T2 β€” the user picks one of the surfaced years and asks a
# substantive question. Filter building should succeed cleanly.
t2_ctx = QueryContext(
has_district=True, extracted_district="Gulu",
has_year=True, extracted_year="2023",
)
filters, anchored = mock_bot._build_filters(t2_ctx)
assert filters["year"] == ["2023"]
assert filters["district"] == ["Gulu"]
assert {"year", "district"}.issubset(anchored)