Spaces:

akryldigital
/

audit_assistant

Running

App Files Files Community

audit_assistant / tests /test_agent_intelligence.py

akryldigital

add unit, integration and smoke tests witht heir conftest.py

b1ebd9a verified 10 days ago

raw

history blame contribute delete

44 kB

	"""
	Tests for multi-agent query analysis, filter building, metadata loading,
	and resolution of ambiguous references.

	Requires: QDRANT_URL, QDRANT_API_KEY, OPENAI_API_KEY in environment / .env
	Run: python -m pytest tests/test_agent_intelligence.py -v
	"""

	import os
	import sys
	import json
	import pytest
	from pathlib import Path
	from unittest.mock import MagicMock, patch
	from dataclasses import asdict

	sys.path.insert(0, str(Path(__file__).resolve().parent.parent))

	from src.agents.base_multi_agent_chatbot import QueryContext, BaseMultiAgentChatbot


	# ---------------------------------------------------------------------------
	# Helpers
	# ---------------------------------------------------------------------------

	def _load_env():
	env_file = Path(__file__).resolve().parent.parent / ".env"
	if env_file.exists():
	for line in env_file.read_text().splitlines():
	line = line.strip()
	if line and not line.startswith("#") and "=" in line:
	k, v = line.split("=", 1)
	os.environ.setdefault(k.strip(), v.strip().strip('"').strip("'"))


	_load_env()


	# ---------------------------------------------------------------------------
	# Unit tests – QueryContext (no network, no LLM)
	# ---------------------------------------------------------------------------


	class TestQueryContext:
	def test_all_year_passthrough(self):
	ctx = QueryContext(has_year=True, extracted_year="ALL")
	assert ctx.extracted_year == "ALL"

	def test_all_source_passthrough(self):
	ctx = QueryContext(has_source=True, extracted_source="ALL")
	assert ctx.extracted_source == "ALL"

	def test_all_district_passthrough(self):
	ctx = QueryContext(has_district=True, extracted_district="ALL")
	assert ctx.extracted_district == "ALL"

	def test_single_district_title_case(self):
	"""[unit] A lowercase district name passed to QueryContext is
	normalised to title case in __post_init__."""
	ctx = QueryContext(has_district=True, extracted_district="gulu")
	assert ctx.extracted_district == "Gulu"

	def test_multi_district_title_case(self):
	ctx = QueryContext(
	has_district=True, extracted_district=["gulu", "pader", "lira"]
	)
	assert ctx.extracted_district == ["Gulu", "Pader", "Lira"]

	def test_single_source_title_case(self):
	ctx = QueryContext(has_source=True, extracted_source="hospital")
	assert ctx.extracted_source == "Hospital"

	def test_none_stays_none(self):
	ctx = QueryContext()
	assert ctx.extracted_district is None
	assert ctx.extracted_source is None
	assert ctx.extracted_year is None

	def test_resolution_notes_default(self):
	ctx = QueryContext()
	assert ctx.resolution_notes is None
	assert ctx.needs_metadata_lookup is False

	def test_resolution_notes_set(self):
	ctx = QueryContext(
	resolution_notes="Resolved 'biggest' to top 5 districts by doc count.",
	needs_metadata_lookup=False,
	)
	assert "biggest" in ctx.resolution_notes


	# ---------------------------------------------------------------------------
	# Unit tests – _build_filters (no network, no LLM)
	# ---------------------------------------------------------------------------


	class _StubChatbot(BaseMultiAgentChatbot):
	"""Concrete stub that satisfies abstract methods for unit tests."""

	def __init__(self):
	pass

	def _perform_retrieval(self, query, filters):
	return MagicMock(sources=[], answer="")

	def _generate_conversational_response(self, a, *kw):
	return ""

	def _generate_conversational_response_without_docs(self, a, *kw):
	return ""


	class TestBuildFilters:
	"""Test filter building logic using a stub chatbot."""

	@pytest.fixture
	def mock_bot(self):
	bot = _StubChatbot()
	# Whitelist must cover every district referenced by tests in this class,
	# otherwise _validate_filter_values (correctly) strips unknown values.
	bot.district_whitelist = [
	"Gulu", "Pader", "Kampala", "Bushenyi", "Jinja",
	"Amuru", "Kalungu", "Buikwe", "Mbale",
	]
	bot.year_whitelist = ["2020", "2021", "2022", "2023", "2024", "2025"]
	bot.source_whitelist = [
	"Consolidated", "Hospital", "Local Government",
	"Ministry, Department and Agency", "Project", "Value for Money",
	]
	return bot

	def test_no_filters(self, mock_bot):
	ctx = QueryContext()
	filters, anchored = mock_bot._build_filters(ctx)
	assert filters == {}
	assert anchored == set()

	def test_year_filter(self, mock_bot):
	ctx = QueryContext(has_year=True, extracted_year="2023")
	filters, anchored = mock_bot._build_filters(ctx)
	assert filters == {"year": ["2023"]}
	assert "year" in anchored

	def test_year_all_skips_filter(self, mock_bot):
	ctx = QueryContext(has_year=True, extracted_year="ALL")
	filters, _ = mock_bot._build_filters(ctx)
	assert "year" not in filters

	def test_district_all_skips_filter(self, mock_bot):
	ctx = QueryContext(has_district=True, extracted_district="ALL")
	filters, _ = mock_bot._build_filters(ctx)
	assert "district" not in filters

	def test_source_all_skips_filter(self, mock_bot):
	ctx = QueryContext(has_source=True, extracted_source="ALL")
	filters, _ = mock_bot._build_filters(ctx)
	assert "sources" not in filters

	def test_multi_year_filter(self, mock_bot):
	ctx = QueryContext(has_year=True, extracted_year=["2022", "2023"])
	filters, anchored = mock_bot._build_filters(ctx)
	assert filters == {"year": ["2022", "2023"]}
	assert "year" in anchored

	def test_single_district_filter(self, mock_bot):
	ctx = QueryContext(has_district=True, extracted_district="Gulu")
	filters, anchored = mock_bot._build_filters(ctx)
	assert "district" in filters
	assert "Gulu" in filters["district"]
	assert "district" in anchored

	def test_multi_district_filter(self, mock_bot):
	ctx = QueryContext(
	has_district=True, extracted_district=["Gulu", "Pader"]
	)
	filters, _ = mock_bot._build_filters(ctx)
	assert set(filters["district"]) == {"Gulu", "Pader"}

	def test_source_filter(self, mock_bot):
	ctx = QueryContext(has_source=True, extracted_source="Hospital")
	filters, anchored = mock_bot._build_filters(ctx)
	assert filters == {"sources": ["Hospital"]}
	assert "sources" in anchored

	def test_llm_extraction_overrides_stale_ui(self, mock_bot):
	"""When LLM extracts a DIFFERENT year than sidebar, LLM wins (user changed context)."""
	ctx = QueryContext(
	has_year=True,
	extracted_year="2020",
	ui_filters={"years": ["2024"]},
	)
	filters, anchored = mock_bot._build_filters(ctx)
	assert filters["year"] == ["2020"]
	assert "year" in anchored

	def test_filename_filter_short_circuits(self, mock_bot):
	ctx = QueryContext(
	has_year=True,
	extracted_year="2023",
	ui_filters={"filenames": ["report.pdf"]},
	)
	filters, anchored = mock_bot._build_filters(ctx)
	assert filters == {"filenames": ["report.pdf"]}
	assert "year" not in filters
	assert "filenames" in anchored

	def test_district_drops_auto_inferred_source(self, mock_bot):
	"""[unit, regression] When district is present and source was
	NOT explicitly mentioned, source should be dropped.

	Regression: locks in the district-priority rule (without this,
	adding source=Local Government on top of a district query
	excludes VFM / Project audits that also cover the district).
	"""
	ctx = QueryContext(
	has_year=True,
	extracted_year="2024",
	has_source=False,
	extracted_source="Local Government",
	has_district=True,
	extracted_district="Gulu",
	)
	filters, anchored = mock_bot._build_filters(ctx)
	assert filters["year"] == ["2024"]
	assert "sources" not in filters
	assert "Gulu" in filters["district"]
	assert "district" in anchored

	def test_district_keeps_explicit_source(self, mock_bot):
	"""When district is present but source WAS explicitly mentioned, both are kept."""
	ctx = QueryContext(
	has_year=True,
	extracted_year="2024",
	has_source=True,
	extracted_source="Local Government",
	has_district=True,
	extracted_district="Gulu",
	)
	filters, anchored = mock_bot._build_filters(ctx)
	assert filters["year"] == ["2024"]
	assert filters["sources"] == ["Local Government"]
	assert "Gulu" in filters["district"]
	assert "sources" in anchored
	assert "district" in anchored

	def test_anchored_keys_from_ui(self, mock_bot):
	"""UI sidebar selections are always anchored."""
	ctx = QueryContext(
	ui_filters={"sources": ["Hospital"], "years": ["2024"]},
	)
	filters, anchored = mock_bot._build_filters(ctx)
	assert "sources" in anchored
	assert "year" in anchored

	def test_llm_overrides_stale_sidebar_district(self, mock_bot):
	"""When user changes district in conversation, LLM extraction overrides stale sidebar."""
	ctx = QueryContext(
	has_district=True,
	extracted_district=["Bushenyi", "Amuru", "Kalungu", "Buikwe", "Mbale"],
	has_year=True,
	extracted_year="2023",
	ui_filters={"districts": ["Jinja"], "years": ["2023"]},
	)
	filters, anchored = mock_bot._build_filters(ctx)
	assert set(filters["district"]) == {"Bushenyi", "Amuru", "Kalungu", "Buikwe", "Mbale"}
	assert "Jinja" not in filters["district"]
	assert "district" in anchored

	def test_llm_same_as_sidebar_uses_sidebar(self, mock_bot):
	"""When LLM extraction matches sidebar, sidebar wins (no override)."""
	ctx = QueryContext(
	has_district=True,
	extracted_district="Gulu",
	ui_filters={"districts": ["Gulu"]},
	)
	filters, anchored = mock_bot._build_filters(ctx)
	assert filters["district"] == ["Gulu"]
	assert "district" in anchored

	def test_llm_overrides_stale_sidebar_year(self, mock_bot):
	"""When user mentions new years, LLM extraction overrides sidebar."""
	ctx = QueryContext(
	has_year=True,
	extracted_year=["2023", "2025"],
	ui_filters={"years": ["2023"]},
	)
	filters, anchored = mock_bot._build_filters(ctx)
	assert set(filters["year"]) == {"2023", "2025"}
	assert "year" in anchored

	def test_no_has_flag_sidebar_wins(self, mock_bot):
	"""When LLM did NOT detect a filter dimension, sidebar stays."""
	ctx = QueryContext(
	has_district=False,
	extracted_district=None,
	ui_filters={"districts": ["Jinja"]},
	)
	filters, anchored = mock_bot._build_filters(ctx)
	assert filters["district"] == ["Jinja"]
	assert "district" in anchored


	# ---------------------------------------------------------------------------
	# Unit tests – UGANDA_REGIONS
	# ---------------------------------------------------------------------------


	class TestUgandaRegions:
	def test_regions_exist(self):
	assert hasattr(BaseMultiAgentChatbot, "UGANDA_REGIONS")
	regions = BaseMultiAgentChatbot.UGANDA_REGIONS
	assert "Northern" in regions
	assert "Eastern" in regions
	assert "Western" in regions
	assert "Central" in regions
	assert "Karamoja" in regions

	def test_gulu_is_northern(self):
	assert "Gulu" in BaseMultiAgentChatbot.UGANDA_REGIONS["Northern"]

	def test_kampala_is_central(self):
	assert "Kampala" in BaseMultiAgentChatbot.UGANDA_REGIONS["Central"]

	def test_moroto_is_karamoja(self):
	assert "Moroto" in BaseMultiAgentChatbot.UGANDA_REGIONS["Karamoja"]

	def test_no_duplicate_across_regions(self):
	all_dists = []
	for dists in BaseMultiAgentChatbot.UGANDA_REGIONS.values():
	all_dists.extend(dists)
	assert len(all_dists) == len(set(all_dists)), "Duplicate district in UGANDA_REGIONS"


	# ---------------------------------------------------------------------------
	# Integration tests – require network + Qdrant + OpenAI
	# ---------------------------------------------------------------------------


	def _skip_if_no_env():
	for var in ("QDRANT_URL", "QDRANT_API_KEY", "OPENAI_API_KEY"):
	if not os.environ.get(var):
	pytest.skip(f"{var} not set")


	@pytest.fixture(scope="module")
	def chatbot():
	_skip_if_no_env()
	from src.agents.multi_agent_chatbot import MultiAgentRAGChatbot

	bot = MultiAgentRAGChatbot()
	return bot


	@pytest.mark.live_qdrant
	class TestMetadataLoading:
	@pytest.mark.smoke
	def test_db_metadata_context_populated(self, chatbot):
	"""[integration, smoke] Booting the chatbot must produce a
	non-empty live-metadata context string from Qdrant. Smoke
	because failure here means the whole Qdrant integration is
	broken."""
	assert chatbot.db_metadata_context is not None
	assert len(chatbot.db_metadata_context) > 100

	def test_year_whitelist_from_qdrant(self, chatbot):
	assert "2020" in chatbot.year_whitelist
	assert "2024" in chatbot.year_whitelist
	assert "2025" in chatbot.year_whitelist

	def test_district_doc_counts_populated(self, chatbot):
	assert len(chatbot.district_doc_counts) > 50

	def test_latest_data_year(self, chatbot):
	assert chatbot.latest_data_year == "2025"

	def test_regions_in_context(self, chatbot):
	assert "Northern" in chatbot.db_metadata_context
	assert "Central" in chatbot.db_metadata_context


	@pytest.mark.live_llm
	@pytest.mark.live_qdrant
	class TestQueryAnalysisLLM:
	"""
	Integration tests that call the real LLM (gpt-4.1) for query analysis.
	Each test validates a specific capability of the analysis prompt.

	Marked ``live_llm`` — automatically skipped when OpenAI quota is
	unavailable (see ``tests/conftest.py``).
	"""

	@pytest.mark.smoke
	def test_all_years_extraction(self, chatbot):
	"""[integration, smoke, quality] LLM correctly interprets the
	phrase 'for all years' as the sentinel 'ALL' (not as a year
	list). Quality: depends on the model understanding our prompt
	contract."""
	ctx = chatbot._analyze_query_context(
	"What are the main audit findings for all years?", [], {}
	)
	assert ctx.needs_follow_up is False
	assert ctx.extracted_year == "ALL"

	def test_greeting_triggers_follow_up(self, chatbot):
	ctx = chatbot._analyze_query_context("hello", [], {})
	assert ctx.needs_follow_up is True
	assert ctx.follow_up_question is not None

	def test_last_n_years(self, chatbot):
	ctx = chatbot._analyze_query_context(
	"Revenue performance in the last 3 years", [], {}
	)
	assert ctx.needs_follow_up is False
	if ctx.extracted_year and ctx.extracted_year != "ALL":
	years = ctx.extracted_year if isinstance(ctx.extracted_year, list) else [ctx.extracted_year]
	assert len(years) >= 2

	def test_explicit_district(self, chatbot):
	ctx = chatbot._analyze_query_context(
	"What issues were found in Gulu?", [], {}
	)
	assert ctx.needs_follow_up is False
	assert ctx.has_district is True
	district = ctx.extracted_district
	if isinstance(district, list):
	assert any("Gulu" in d for d in district)
	else:
	assert "Gulu" in str(district)

	def test_source_alias_ministries(self, chatbot):
	ctx = chatbot._analyze_query_context(
	"What are the audit findings for ministries?", [], {}
	)
	assert ctx.needs_follow_up is False
	assert ctx.has_source is True
	src = ctx.extracted_source
	if isinstance(src, list):
	assert any("Ministry" in s for s in src)
	else:
	assert "Ministry" in str(src)

	def test_biggest_districts_resolution(self, chatbot):
	ctx = chatbot._analyze_query_context(
	"Audit findings for the biggest districts", [], {}
	)
	assert ctx.needs_follow_up is False
	assert ctx.has_district is True
	if ctx.resolution_notes:
	assert "biggest" in ctx.resolution_notes.lower() or "top" in ctx.resolution_notes.lower() or "most" in ctx.resolution_notes.lower() or "document" in ctx.resolution_notes.lower()

	def test_northern_uganda_resolution(self, chatbot):
	ctx = chatbot._analyze_query_context(
	"Revenue issues in northern Uganda for all years", [], {}
	)
	assert ctx.needs_follow_up is False
	districts = ctx.extracted_district
	if isinstance(districts, list):
	northern = BaseMultiAgentChatbot.UGANDA_REGIONS["Northern"]
	northern_lower = {d.lower() for d in northern}
	found = [d for d in districts if d.lower() in northern_lower]
	assert len(found) >= 2, f"Expected Northern districts, got {districts}"

	def test_substantive_question_no_follow_up(self, chatbot):
	ctx = chatbot._analyze_query_context(
	"What are the top challenges in budget allocation?", [], {}
	)
	assert ctx.needs_follow_up is False


	@pytest.mark.live_qdrant
	class TestFilterQueryExecution:
	"""Integration tests that verify Qdrant filter queries work with real data."""

	def test_year_filter_built_correctly(self, chatbot):
	ctx = QueryContext(has_year=True, extracted_year="2024")
	filters, anchored = chatbot._build_filters(ctx)
	assert filters == {"year": ["2024"]}
	assert "year" in anchored

	def test_district_filter_built_correctly(self, chatbot):
	ctx = QueryContext(has_district=True, extracted_district="Gulu")
	filters, anchored = chatbot._build_filters(ctx)
	assert "district" in filters
	assert "Gulu" in filters["district"]
	assert "district" in anchored

	def test_all_year_produces_no_filter(self, chatbot):
	ctx = QueryContext(has_year=True, extracted_year="ALL")
	filters, _ = chatbot._build_filters(ctx)
	assert "year" not in filters

	def test_source_filter_built_correctly(self, chatbot):
	ctx = QueryContext(has_source=True, extracted_source="Hospital")
	filters, anchored = chatbot._build_filters(ctx)
	assert filters == {"sources": ["Hospital"]}
	assert "sources" in anchored

	def test_district_drops_auto_source_live(self, chatbot):
	"""Integration: district without explicit source should drop auto-inferred source."""
	ctx = QueryContext(
	has_district=True, extracted_district="Gulu",
	has_source=False, extracted_source="Local Government",
	)
	filters, anchored = chatbot._build_filters(ctx)
	assert "district" in filters
	assert "sources" not in filters
	assert "district" in anchored

	@pytest.mark.xfail(reason="Pipeline reranker returns 0 docs in test context — pre-existing issue")
	def test_unfiltered_retrieval_returns_results(self, chatbot):
	result = chatbot._perform_retrieval("audit findings and recommendations", {})
	assert len(result.sources) > 0


	# ---------------------------------------------------------------------------
	# Integration tests – prevalidation (requires Qdrant)
	# ---------------------------------------------------------------------------


	@pytest.mark.live_qdrant
	class TestPrevalidation:
	"""Test the _prevalidate_filters mechanism against real Qdrant data."""

	def test_valid_combo_is_ok(self, chatbot):
	"""A filter combo that exists should return ok=True."""
	filters = {"year": ["2024"]}
	diagnosis = chatbot._prevalidate_filters(filters, set())
	assert diagnosis["ok"] is True
	assert diagnosis["total_count"] > 0

	def test_gulu_2023_gap(self, chatbot):
	"""Gulu + 2023 should be detected as a data gap."""
	filters = {"district": ["Gulu"], "year": ["2023"]}
	anchored = {"district", "year"}
	diagnosis = chatbot._prevalidate_filters(filters, anchored)
	assert diagnosis["ok"] is False
	assert diagnosis["suggestion"] is not None
	assert len(diagnosis["gap_dimensions"]) > 0

	def test_jinja_2023_exists(self, chatbot):
	"""Jinja + 2023 should have data."""
	filters = {"district": ["Jinja"], "year": ["2023"]}
	diagnosis = chatbot._prevalidate_filters(filters, set())
	assert diagnosis["ok"] is True
	assert diagnosis["total_count"] > 0

	def test_nonexistent_year(self, chatbot):
	"""A year with no data should fail individual dim check."""
	filters = {"year": ["1999"]}
	diagnosis = chatbot._prevalidate_filters(filters, {"year"})
	assert diagnosis["ok"] is False
	assert any("1999" in str(d.get("value", "")) for d in diagnosis["gap_dimensions"])

	def test_empty_filters_ok(self, chatbot):
	"""No filters should always be ok."""
	diagnosis = chatbot._prevalidate_filters({}, set())
	assert diagnosis["ok"] is True


	# ---------------------------------------------------------------------------
	# Unit tests – post-relaxation relevance check (no network)
	# ---------------------------------------------------------------------------


	class TestPostRelaxationRelevanceCheck:

	@pytest.fixture
	def mock_bot(self):
	bot = _StubChatbot()
	bot.district_whitelist = ["Gulu", "Jinja"]
	bot.year_whitelist = ["2020", "2021", "2022", "2023", "2024", "2025"]
	bot.source_whitelist = ["Local Government", "Hospital"]
	return bot

	def test_relevant_docs(self, mock_bot):
	docs = [MagicMock(metadata={"district": "Gulu", "year": "2023"})]
	result = mock_bot._post_relaxation_relevance_check(
	docs, {"district"}, {"district": ["Gulu"]}
	)
	assert result["relevant"] is True

	def test_irrelevant_docs(self, mock_bot):
	docs = [
	MagicMock(metadata={"district": "Hoima", "year": "2023"}),
	MagicMock(metadata={"district": "Kumi", "year": "2023"}),
	]
	result = mock_bot._post_relaxation_relevance_check(
	docs, {"district"}, {"district": ["Gulu"]}
	)
	assert result["relevant"] is False
	assert "Gulu" in result["details"]

	def test_no_anchored_keys(self, mock_bot):
	docs = [MagicMock(metadata={"district": "Hoima"})]
	result = mock_bot._post_relaxation_relevance_check(
	docs, set(), {"district": ["Gulu"]}
	)
	assert result["relevant"] is True


	# ---------------------------------------------------------------------------
	# Unit tests – district priority over source (no network)
	# ---------------------------------------------------------------------------


	class TestDistrictSourcePriority:

	@pytest.fixture
	def mock_bot(self):
	bot = _StubChatbot()
	bot.district_whitelist = ["Gulu", "Jinja", "Kampala"]
	bot.source_whitelist = ["Local Government", "Hospital"]
	bot.year_whitelist = ["2020", "2021", "2022", "2023", "2024", "2025"]
	return bot

	def test_district_with_auto_source_drops_source(self, mock_bot):
	ctx = QueryContext(
	has_district=True, extracted_district="Gulu",
	has_source=False, extracted_source="Local Government",
	)
	filters, anchored = mock_bot._build_filters(ctx)
	assert "district" in filters
	assert "sources" not in filters
	assert "district" in anchored

	def test_district_with_explicit_source_keeps_both(self, mock_bot):
	ctx = QueryContext(
	has_district=True, extracted_district="Gulu",
	has_source=True, extracted_source="Local Government",
	)
	filters, anchored = mock_bot._build_filters(ctx)
	assert "district" in filters
	assert "sources" in filters
	assert "district" in anchored
	assert "sources" in anchored

	def test_district_with_ui_source_keeps_both(self, mock_bot):
	ctx = QueryContext(
	has_district=True, extracted_district="Gulu",
	ui_filters={"sources": ["Hospital"]},
	)
	filters, anchored = mock_bot._build_filters(ctx)
	assert "district" in filters
	assert "sources" in filters
	assert "sources" in anchored


	# ---------------------------------------------------------------------------
	# Unit tests – source name normalization (no network)
	# ---------------------------------------------------------------------------


	class TestSourceNormalization:

	@pytest.fixture
	def mock_bot(self):
	bot = _StubChatbot()
	bot.district_whitelist = ["Gulu"]
	bot.source_whitelist = [
	"Ministry, Department and Agency", "Hospital",
	"Local Government", "Consolidated",
	]
	bot.year_whitelist = ["2020", "2021", "2022", "2023", "2024", "2025"]
	return bot

	def test_case_mismatch_normalized(self, mock_bot):
	"""LLM returns 'And' but Qdrant has 'and' — should be corrected."""
	ctx = QueryContext(
	has_source=True,
	extracted_source="Ministry, Department And Agency",
	)
	filters, _ = mock_bot._build_filters(ctx)
	assert filters["sources"] == ["Ministry, Department and Agency"]

	def test_already_correct_stays(self, mock_bot):
	ctx = QueryContext(
	has_source=True,
	extracted_source="Hospital",
	)
	filters, _ = mock_bot._build_filters(ctx)
	assert filters["sources"] == ["Hospital"]

	def test_unknown_source_dropped_by_validation(self, mock_bot):
	"""Unknown source values are stripped by _validate_filter_values.

	Previously this test asserted that an unknown source "passes through"
	as-is; that behaviour was changed when _validate_filter_values was
	added to guard against invalid Qdrant filter values. The current
	(correct) behaviour: unknown values are removed; if all values for
	a dimension are unknown, the entire filter dimension is dropped.
	"""
	ctx = QueryContext(
	has_source=True,
	extracted_source="something new",
	)
	filters, _ = mock_bot._build_filters(ctx)
	assert "sources" not in filters


	# ---------------------------------------------------------------------------
	# Unit tests – resolver agent extensions (no network)
	# ---------------------------------------------------------------------------


	class TestResolverAgentExtensions:
	"""The resolver agent answers metadata-shaped questions without LLM.

	These tests use a stub vectorstore so we can verify the resolver
	dispatches correctly and produces the expected payload shapes.
	"""

	@pytest.fixture
	def mock_bot(self):
	bot = _StubChatbot()
	bot.district_whitelist = ["Gulu", "Lira", "Mbale", "Pader", "Jinja"]
	bot.year_whitelist = ["2020", "2021", "2022", "2023", "2024", "2025"]
	bot.source_whitelist = [
	"Consolidated", "Hospital", "Local Government",
	"Ministry, Department and Agency", "Project", "Value for Money",
	]
	bot.district_doc_counts = {
	"Gulu": 50, "Lira": 30, "Mbale": 80, "Pader": 10, "Jinja": 65,
	}
	bot.year_doc_counts = {
	"2020": 100, "2021": 120, "2022": 180, "2023": 200, "2024": 90,
	}
	bot.source_doc_counts = {
	"Local Government": 500,
	"Ministry, Department and Agency": 300,
	"Hospital": 80,
	}
	bot.source_year_coverage = {
	"Local Government": {"2020": 100, "2022": 200, "2024": 90},
	}
	bot.district_year_coverage = {
	"Gulu": {"2020": 10, "2022": 25, "2023": 15},
	"Lira": {"2021": 12, "2023": 18},
	"Mbale": {"2020": 30, "2024": 50},
	}
	bot.district_source_coverage = {
	"Gulu": {"Local Government": 40, "Hospital": 10},
	}

	# Stub vectorstore with a count() that returns deterministic values
	client = MagicMock()
	client.count = MagicMock(return_value=MagicMock(count=42))
	vs = MagicMock(_client=client, collection_name="test-collection")
	bot._get_vectorstore = lambda: vs

	return bot

	def _make_state(self, query: str, ctx_kwargs: dict = None):
	ctx = QueryContext(**(ctx_kwargs or {}))
	return {
	"current_query": query,
	"query_context": ctx,
	"agent_logs": [],
	"resolution_attempted": False,
	"resolution_result": None,
	}

	def test_top_districts(self, mock_bot):
	"""[unit] Resolver dispatches on 'biggest' and returns the
	pre-cached district_doc_counts sorted descending. Mocked
	vectorstore — verifies our logic, not Qdrant."""
	state = self._make_state("biggest districts overall")
	out = mock_bot._resolver_agent(state)
	assert "top_districts" in out["resolution_result"]
	top = out["resolution_result"]["top_districts"]
	assert top[0]["district"] == "Mbale"
	assert top[0]["doc_count"] == 80

	def test_bottom_districts(self, mock_bot):
	state = self._make_state("smallest districts in the corpus")
	out = mock_bot._resolver_agent(state)
	bottom = out["resolution_result"]["bottom_districts"]
	assert bottom[0]["district"] == "Pader"

	def test_top_sources(self, mock_bot):
	state = self._make_state("largest source category")
	out = mock_bot._resolver_agent(state)
	assert "top_sources" in out["resolution_result"]
	assert out["resolution_result"]["top_sources"][0]["source"] == "Local Government"

	def test_top_years(self, mock_bot):
	state = self._make_state("most documented year")
	out = mock_bot._resolver_agent(state)
	assert "top_years" in out["resolution_result"]
	assert out["resolution_result"]["top_years"][0]["year"] == "2023"

	def test_per_district_live_count(self, mock_bot):
	state = self._make_state(
	"audit findings in Gulu",
	ctx_kwargs={"has_district": True, "extracted_district": "Gulu"},
	)
	out = mock_bot._resolver_agent(state)
	assert out["resolution_result"]["district_counts"] == {"Gulu": 42}

	def test_combination_district_year(self, mock_bot):
	state = self._make_state(
	"Gulu 2022",
	ctx_kwargs={
	"has_district": True, "extracted_district": "Gulu",
	"has_year": True, "extracted_year": "2022",
	},
	)
	out = mock_bot._resolver_agent(state)
	combo = out["resolution_result"]["combination_counts"]
	assert "district+year" in combo
	assert combo["district+year"][0] == {"a": "Gulu", "b": "2022", "doc_count": 42}

	def test_date_range_overall(self, mock_bot):
	state = self._make_state("latest reports across the corpus")
	out = mock_bot._resolver_agent(state)
	dr = out["resolution_result"]["date_range"]
	assert dr["overall"] == {"min_year": "2020", "max_year": "2024"}

	def test_latest_year_for_district(self, mock_bot):
	state = self._make_state(
	"give me whatever the latest you have on Gulu",
	ctx_kwargs={"has_district": True, "extracted_district": "Gulu"},
	)
	out = mock_bot._resolver_agent(state)
	assert out["resolution_result"]["latest_year_for_district"] == {"Gulu": "2023"}

	def test_earliest_year_for_district(self, mock_bot):
	state = self._make_state(
	"oldest record for Mbale",
	ctx_kwargs={"has_district": True, "extracted_district": "Mbale"},
	)
	out = mock_bot._resolver_agent(state)
	assert out["resolution_result"]["earliest_year_for_district"] == {"Mbale": "2020"}

	def test_coverage_report(self, mock_bot):
	state = self._make_state(
	"what do you have on Gulu?",
	ctx_kwargs={"has_district": True, "extracted_district": "Gulu"},
	)
	out = mock_bot._resolver_agent(state)
	cov = out["resolution_result"]["coverage"]["per_district"]["Gulu"]
	assert cov["total_docs"] == 50
	assert set(cov["years"]) == {"2020", "2022", "2023"}
	assert set(cov["sources"]) == {"Local Government", "Hospital"}

	def test_no_vectorstore_safe(self, mock_bot):
	mock_bot._get_vectorstore = lambda: None
	state = self._make_state("biggest districts")
	out = mock_bot._resolver_agent(state)
	# Should not crash; resolution_result is an empty dict
	assert out["resolution_result"] == {}


	class TestResolverPostResolutionIntegration:
	"""Verify the main_agent post-resolution pass injects resolver outputs
	back into context for the downstream RAG path.
	"""

	@pytest.fixture
	def mock_bot(self):
	bot = _StubChatbot()
	bot.district_whitelist = ["Gulu", "Lira", "Nwoya"]
	bot.year_whitelist = ["2020", "2021", "2022", "2023", "2024"]
	bot.source_whitelist = ["Local Government", "Hospital"]
	bot.district_doc_counts = {"Gulu": 10, "Lira": 20}
	bot.year_doc_counts = {}
	bot.source_doc_counts = {}
	bot.source_year_coverage = {}
	bot.district_year_coverage = {"Nwoya": {"2020": 5, "2022": 8}}
	bot.district_source_coverage = {}
	return bot

	def test_latest_year_for_district_injects_year(self, mock_bot):
	ctx = QueryContext(
	has_district=True, extracted_district="Nwoya",
	needs_metadata_lookup=True,
	)
	state = {
	"current_query": "give me whatever the latest you have on Nwoya",
	"query_context": ctx,
	"agent_logs": [],
	"resolution_attempted": True,
	"resolution_result": {
	"latest_year_for_district": {"Nwoya": "2022"},
	},
	"final_response": None,
	}
	out = mock_bot._main_agent(state)
	# Year should have been injected from the resolver lookup
	assert out["query_context"].extracted_year == "2022"
	assert out["query_context"].has_year is True
	assert out["query_context"].needs_metadata_lookup is False
	assert "latest available year" in (out["query_context"].resolution_notes or "")


	# ---------------------------------------------------------------------------
	# Multi-turn conversation simulations for the resolver agent
	# ---------------------------------------------------------------------------


	class TestResolverMultiTurnFlow:
	"""End-to-end simulations of multi-turn conversations that exercise
	the resolver agent + main-agent post-resolution handoff.

	These tests bypass the LLM (``_analyze_query_context``) so they run
	without any API quota. Each turn's ``query_context`` is supplied
	directly, mimicking what the LLM would have produced. The test then
	drives ``_resolver_agent`` + ``_main_agent`` and asserts the state
	after each step.
	"""

	@pytest.fixture
	def mock_bot(self):
	bot = _StubChatbot()
	bot.district_whitelist = ["Nwoya", "Gulu", "Lira", "Mbale"]
	bot.year_whitelist = ["2020", "2021", "2022", "2023", "2024"]
	bot.source_whitelist = ["Local Government", "Hospital",
	"Ministry, Department and Agency"]
	bot.district_doc_counts = {"Nwoya": 8, "Gulu": 50, "Lira": 30, "Mbale": 80}
	bot.year_doc_counts = {"2020": 100, "2021": 80, "2022": 60, "2023": 40}
	bot.source_doc_counts = {"Local Government": 200,
	"Ministry, Department and Agency": 150,
	"Hospital": 30}
	bot.source_year_coverage = {
	"Local Government": {"2020": 50, "2021": 40, "2022": 30, "2023": 20},
	}
	bot.district_year_coverage = {
	"Nwoya": {"2020": 3, "2022": 5},
	"Gulu": {"2020": 10, "2022": 25, "2023": 15},
	"Mbale": {"2020": 30, "2024": 50},
	}
	bot.district_source_coverage = {
	"Nwoya": {"Local Government": 8},
	"Gulu": {"Local Government": 40, "Hospital": 10},
	}

	# Deterministic count() stub for live combination queries
	client = MagicMock()
	client.count = MagicMock(return_value=MagicMock(count=7))
	vs = MagicMock(_client=client, collection_name="test-collection")
	bot._get_vectorstore = lambda: vs
	return bot

	def _state(self, query: str, **ctx_kwargs):
	"""Build a minimal state dict for a single turn."""
	return {
	"current_query": query,
	"query_context": QueryContext(**ctx_kwargs),
	"agent_logs": [],
	"resolution_attempted": False,
	"resolution_result": None,
	"final_response": None,
	}

	# ----- Scenario 1: empty result for X 2024 → user asks "latest for X" ---

	@pytest.mark.smoke
	def test_empty_combo_then_latest_for_district(self, mock_bot):
	"""[unit, smoke] Multi-turn flow simulation: empty result for
	Nwoya 2024 → user asks 'latest for Nwoya' → resolver computes
	max(year) = 2022 → main_agent injects year=2022 → ready for
	RAG. Mocked vectorstore + supplied QueryContext; verifies our
	new resolver+main_agent handoff without an LLM call.

	T1: "audit findings for Nwoya in 2024" → 0 docs (Nwoya has no
	data for 2024; pre-validation would catch it upstream).
	T2: "okay, give me whatever the latest you have on Nwoya" →
	resolver computes max(year for Nwoya) = 2022 → injects
	year=2022 → RAG would now retrieve for Nwoya 2022.
	"""

	# --- Turn 2: user asks for "latest" with district=Nwoya, year unset ---
	state = self._state(
	"give me whatever is the latest you have on Nwoya",
	has_district=True,
	extracted_district="Nwoya",
	needs_metadata_lookup=True,
	)

	# Step A — resolver runs first
	state = mock_bot._resolver_agent(state)
	assert state["resolution_attempted"] is True
	assert "latest_year_for_district" in state["resolution_result"]
	assert state["resolution_result"]["latest_year_for_district"] == {"Nwoya": "2022"}

	# Step B — main_agent post-resolution pass injects the year
	state = mock_bot._main_agent(state)
	ctx = state["query_context"]
	assert ctx.extracted_year == "2022"
	assert ctx.has_year is True
	assert ctx.needs_metadata_lookup is False
	# The resolution note explains what happened so the LLM can cite it
	assert "Nwoya" in (ctx.resolution_notes or "")

	# ----- Scenario 2: "biggest districts" → carries forward to follow-up ---

	def test_top_districts_then_followup_keeps_them(self, mock_bot):
	"""Simulates:

	T1: "what are the audit issues for the biggest districts?" →
	resolver returns top 5 by doc count → main_agent injects
	them as extracted_district → RAG retrieves accordingly.
	T2: "now focus only on 2023" → LLM carries forward the
	districts from T1 (the LLM rule that EXPANDS or PRESERVES
	past filters; here we simulate the carry-forward by
	re-using the same district list with year added).
	"""

	# --- T1 ---
	t1 = self._state(
	"what are the audit issues for the biggest districts?",
	needs_metadata_lookup=True,
	)
	t1 = mock_bot._resolver_agent(t1)
	assert "top_districts" in t1["resolution_result"]

	t1 = mock_bot._main_agent(t1)
	ctx1 = t1["query_context"]
	assert ctx1.has_district is True
	assert ctx1.extracted_district == ["Mbale", "Gulu", "Lira", "Nwoya"]
	assert ctx1.needs_metadata_lookup is False

	# --- T2: carry-forward simulated (this is what _analyze_query_context
	# would do based on previous_filters). Verify the resolver isn't
	# needed for this turn and the filter is preserved.
	t2 = self._state(
	"now focus only on 2023",
	has_district=True,
	extracted_district=ctx1.extracted_district,
	has_year=True,
	extracted_year="2023",
	needs_metadata_lookup=False,
	)
	# No resolver call this turn; just build filters directly
	filters, anchored = mock_bot._build_filters(t2["query_context"])
	assert set(filters["district"]) == {"Mbale", "Gulu", "Lira", "Nwoya"}
	assert filters["year"] == ["2023"]
	assert "year" in anchored
	# District should also be anchored since it was carried forward from
	# an LLM extraction in T1 (has_district=True)
	assert "district" in anchored

	# ----- Scenario 3: date-range question → resolver answers without LLM ---

	def test_date_range_for_source(self, mock_bot):
	"""Simulates a single-turn metadata question:

	'What years do you have for Local Government?' →
	resolver populates date_range.per_source with min/max years.

	This is the kind of question we want to answer purely from
	cached aggregates — no LLM, no RAG retrieval.
	"""
	state = self._state(
	"what is the earliest year you have for Local Government?",
	has_source=True,
	extracted_source="Local Government",
	)
	state = mock_bot._resolver_agent(state)
	dr = state["resolution_result"]["date_range"]
	assert "per_source" in dr
	assert dr["per_source"]["Local Government"] == {
	"min_year": "2020", "max_year": "2023"
	}
	# The overall range should also be present
	assert dr["overall"] == {"min_year": "2020", "max_year": "2023"}

	# ----- Scenario 4: coverage question multi-step --------------------------

	def test_coverage_then_year_specific(self, mock_bot):
	"""T1: "what do you have on Gulu?" → coverage report.
	T2: simulated follow-up uses one of the years from the coverage
	report; verifies the system can chain.
	"""
	t1 = self._state(
	"what do you have on Gulu?",
	has_district=True,
	extracted_district="Gulu",
	)
	t1 = mock_bot._resolver_agent(t1)
	cov = t1["resolution_result"]["coverage"]["per_district"]["Gulu"]
	assert cov["total_docs"] == 50
	assert "2023" in cov["years"]

	# T2 — the user picks one of the surfaced years and asks a
	# substantive question. Filter building should succeed cleanly.
	t2_ctx = QueryContext(
	has_district=True, extracted_district="Gulu",
	has_year=True, extracted_year="2023",
	)
	filters, anchored = mock_bot._build_filters(t2_ctx)
	assert filters["year"] == ["2023"]
	assert filters["district"] == ["Gulu"]
	assert {"year", "district"}.issubset(anchored)