"""
Tests for multi-agent query analysis, filter building, metadata loading,
and resolution of ambiguous references.

Requires: QDRANT_URL, QDRANT_API_KEY, OPENAI_API_KEY in environment / .env
Run:  python -m pytest tests/test_agent_intelligence.py -v
"""

import os
import sys
import json
import pytest
from pathlib import Path
from unittest.mock import MagicMock, patch
from dataclasses import asdict

sys.path.insert(0, str(Path(__file__).resolve().parent.parent))

from src.agents.base_multi_agent_chatbot import QueryContext, BaseMultiAgentChatbot


# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------

def _load_env():
    env_file = Path(__file__).resolve().parent.parent / ".env"
    if env_file.exists():
        for line in env_file.read_text().splitlines():
            line = line.strip()
            if line and not line.startswith("#") and "=" in line:
                k, v = line.split("=", 1)
                os.environ.setdefault(k.strip(), v.strip().strip('"').strip("'"))


_load_env()


# ---------------------------------------------------------------------------
# Unit tests – QueryContext (no network, no LLM)
# ---------------------------------------------------------------------------


class TestQueryContext:
    def test_all_year_passthrough(self):
        ctx = QueryContext(has_year=True, extracted_year="ALL")
        assert ctx.extracted_year == "ALL"

    def test_all_source_passthrough(self):
        ctx = QueryContext(has_source=True, extracted_source="ALL")
        assert ctx.extracted_source == "ALL"

    def test_all_district_passthrough(self):
        ctx = QueryContext(has_district=True, extracted_district="ALL")
        assert ctx.extracted_district == "ALL"

    def test_single_district_title_case(self):
        """[unit] A lowercase district name passed to QueryContext is
        normalised to title case in __post_init__."""
        ctx = QueryContext(has_district=True, extracted_district="gulu")
        assert ctx.extracted_district == "Gulu"

    def test_multi_district_title_case(self):
        ctx = QueryContext(
            has_district=True, extracted_district=["gulu", "pader", "lira"]
        )
        assert ctx.extracted_district == ["Gulu", "Pader", "Lira"]

    def test_single_source_title_case(self):
        ctx = QueryContext(has_source=True, extracted_source="hospital")
        assert ctx.extracted_source == "Hospital"

    def test_none_stays_none(self):
        ctx = QueryContext()
        assert ctx.extracted_district is None
        assert ctx.extracted_source is None
        assert ctx.extracted_year is None

    def test_resolution_notes_default(self):
        ctx = QueryContext()
        assert ctx.resolution_notes is None
        assert ctx.needs_metadata_lookup is False

    def test_resolution_notes_set(self):
        ctx = QueryContext(
            resolution_notes="Resolved 'biggest' to top 5 districts by doc count.",
            needs_metadata_lookup=False,
        )
        assert "biggest" in ctx.resolution_notes


# ---------------------------------------------------------------------------
# Unit tests – _build_filters (no network, no LLM)
# ---------------------------------------------------------------------------


class _StubChatbot(BaseMultiAgentChatbot):
    """Concrete stub that satisfies abstract methods for unit tests."""

    def __init__(self):
        pass

    def _perform_retrieval(self, query, filters):
        return MagicMock(sources=[], answer="")

    def _generate_conversational_response(self, *a, **kw):
        return ""

    def _generate_conversational_response_without_docs(self, *a, **kw):
        return ""


class TestBuildFilters:
    """Test filter building logic using a stub chatbot."""

    @pytest.fixture
    def mock_bot(self):
        bot = _StubChatbot()
        # Whitelist must cover every district referenced by tests in this class,
        # otherwise _validate_filter_values (correctly) strips unknown values.
        bot.district_whitelist = [
            "Gulu", "Pader", "Kampala", "Bushenyi", "Jinja",
            "Amuru", "Kalungu", "Buikwe", "Mbale",
        ]
        bot.year_whitelist = ["2020", "2021", "2022", "2023", "2024", "2025"]
        bot.source_whitelist = [
            "Consolidated", "Hospital", "Local Government",
            "Ministry, Department and Agency", "Project", "Value for Money",
        ]
        return bot

    def test_no_filters(self, mock_bot):
        ctx = QueryContext()
        filters, anchored = mock_bot._build_filters(ctx)
        assert filters == {}
        assert anchored == set()

    def test_year_filter(self, mock_bot):
        ctx = QueryContext(has_year=True, extracted_year="2023")
        filters, anchored = mock_bot._build_filters(ctx)
        assert filters == {"year": ["2023"]}
        assert "year" in anchored

    def test_year_all_skips_filter(self, mock_bot):
        ctx = QueryContext(has_year=True, extracted_year="ALL")
        filters, _ = mock_bot._build_filters(ctx)
        assert "year" not in filters

    def test_district_all_skips_filter(self, mock_bot):
        ctx = QueryContext(has_district=True, extracted_district="ALL")
        filters, _ = mock_bot._build_filters(ctx)
        assert "district" not in filters

    def test_source_all_skips_filter(self, mock_bot):
        ctx = QueryContext(has_source=True, extracted_source="ALL")
        filters, _ = mock_bot._build_filters(ctx)
        assert "sources" not in filters

    def test_multi_year_filter(self, mock_bot):
        ctx = QueryContext(has_year=True, extracted_year=["2022", "2023"])
        filters, anchored = mock_bot._build_filters(ctx)
        assert filters == {"year": ["2022", "2023"]}
        assert "year" in anchored

    def test_single_district_filter(self, mock_bot):
        ctx = QueryContext(has_district=True, extracted_district="Gulu")
        filters, anchored = mock_bot._build_filters(ctx)
        assert "district" in filters
        assert "Gulu" in filters["district"]
        assert "district" in anchored

    def test_multi_district_filter(self, mock_bot):
        ctx = QueryContext(
            has_district=True, extracted_district=["Gulu", "Pader"]
        )
        filters, _ = mock_bot._build_filters(ctx)
        assert set(filters["district"]) == {"Gulu", "Pader"}

    def test_source_filter(self, mock_bot):
        ctx = QueryContext(has_source=True, extracted_source="Hospital")
        filters, anchored = mock_bot._build_filters(ctx)
        assert filters == {"sources": ["Hospital"]}
        assert "sources" in anchored

    def test_llm_extraction_overrides_stale_ui(self, mock_bot):
        """When LLM extracts a DIFFERENT year than sidebar, LLM wins (user changed context)."""
        ctx = QueryContext(
            has_year=True,
            extracted_year="2020",
            ui_filters={"years": ["2024"]},
        )
        filters, anchored = mock_bot._build_filters(ctx)
        assert filters["year"] == ["2020"]
        assert "year" in anchored

    def test_filename_filter_short_circuits(self, mock_bot):
        ctx = QueryContext(
            has_year=True,
            extracted_year="2023",
            ui_filters={"filenames": ["report.pdf"]},
        )
        filters, anchored = mock_bot._build_filters(ctx)
        assert filters == {"filenames": ["report.pdf"]}
        assert "year" not in filters
        assert "filenames" in anchored

    def test_district_drops_auto_inferred_source(self, mock_bot):
        """[unit, regression] When district is present and source was
        NOT explicitly mentioned, source should be dropped.

        Regression: locks in the district-priority rule (without this,
        adding source=Local Government on top of a district query
        excludes VFM / Project audits that also cover the district).
        """
        ctx = QueryContext(
            has_year=True,
            extracted_year="2024",
            has_source=False,
            extracted_source="Local Government",
            has_district=True,
            extracted_district="Gulu",
        )
        filters, anchored = mock_bot._build_filters(ctx)
        assert filters["year"] == ["2024"]
        assert "sources" not in filters
        assert "Gulu" in filters["district"]
        assert "district" in anchored

    def test_district_keeps_explicit_source(self, mock_bot):
        """When district is present but source WAS explicitly mentioned, both are kept."""
        ctx = QueryContext(
            has_year=True,
            extracted_year="2024",
            has_source=True,
            extracted_source="Local Government",
            has_district=True,
            extracted_district="Gulu",
        )
        filters, anchored = mock_bot._build_filters(ctx)
        assert filters["year"] == ["2024"]
        assert filters["sources"] == ["Local Government"]
        assert "Gulu" in filters["district"]
        assert "sources" in anchored
        assert "district" in anchored

    def test_anchored_keys_from_ui(self, mock_bot):
        """UI sidebar selections are always anchored."""
        ctx = QueryContext(
            ui_filters={"sources": ["Hospital"], "years": ["2024"]},
        )
        filters, anchored = mock_bot._build_filters(ctx)
        assert "sources" in anchored
        assert "year" in anchored

    def test_llm_overrides_stale_sidebar_district(self, mock_bot):
        """When user changes district in conversation, LLM extraction overrides stale sidebar."""
        ctx = QueryContext(
            has_district=True,
            extracted_district=["Bushenyi", "Amuru", "Kalungu", "Buikwe", "Mbale"],
            has_year=True,
            extracted_year="2023",
            ui_filters={"districts": ["Jinja"], "years": ["2023"]},
        )
        filters, anchored = mock_bot._build_filters(ctx)
        assert set(filters["district"]) == {"Bushenyi", "Amuru", "Kalungu", "Buikwe", "Mbale"}
        assert "Jinja" not in filters["district"]
        assert "district" in anchored

    def test_llm_same_as_sidebar_uses_sidebar(self, mock_bot):
        """When LLM extraction matches sidebar, sidebar wins (no override)."""
        ctx = QueryContext(
            has_district=True,
            extracted_district="Gulu",
            ui_filters={"districts": ["Gulu"]},
        )
        filters, anchored = mock_bot._build_filters(ctx)
        assert filters["district"] == ["Gulu"]
        assert "district" in anchored

    def test_llm_overrides_stale_sidebar_year(self, mock_bot):
        """When user mentions new years, LLM extraction overrides sidebar."""
        ctx = QueryContext(
            has_year=True,
            extracted_year=["2023", "2025"],
            ui_filters={"years": ["2023"]},
        )
        filters, anchored = mock_bot._build_filters(ctx)
        assert set(filters["year"]) == {"2023", "2025"}
        assert "year" in anchored

    def test_no_has_flag_sidebar_wins(self, mock_bot):
        """When LLM did NOT detect a filter dimension, sidebar stays."""
        ctx = QueryContext(
            has_district=False,
            extracted_district=None,
            ui_filters={"districts": ["Jinja"]},
        )
        filters, anchored = mock_bot._build_filters(ctx)
        assert filters["district"] == ["Jinja"]
        assert "district" in anchored


# ---------------------------------------------------------------------------
# Unit tests – UGANDA_REGIONS
# ---------------------------------------------------------------------------


class TestUgandaRegions:
    def test_regions_exist(self):
        assert hasattr(BaseMultiAgentChatbot, "UGANDA_REGIONS")
        regions = BaseMultiAgentChatbot.UGANDA_REGIONS
        assert "Northern" in regions
        assert "Eastern" in regions
        assert "Western" in regions
        assert "Central" in regions
        assert "Karamoja" in regions

    def test_gulu_is_northern(self):
        assert "Gulu" in BaseMultiAgentChatbot.UGANDA_REGIONS["Northern"]

    def test_kampala_is_central(self):
        assert "Kampala" in BaseMultiAgentChatbot.UGANDA_REGIONS["Central"]

    def test_moroto_is_karamoja(self):
        assert "Moroto" in BaseMultiAgentChatbot.UGANDA_REGIONS["Karamoja"]

    def test_no_duplicate_across_regions(self):
        all_dists = []
        for dists in BaseMultiAgentChatbot.UGANDA_REGIONS.values():
            all_dists.extend(dists)
        assert len(all_dists) == len(set(all_dists)), "Duplicate district in UGANDA_REGIONS"


# ---------------------------------------------------------------------------
# Integration tests – require network + Qdrant + OpenAI
# ---------------------------------------------------------------------------


def _skip_if_no_env():
    for var in ("QDRANT_URL", "QDRANT_API_KEY", "OPENAI_API_KEY"):
        if not os.environ.get(var):
            pytest.skip(f"{var} not set")


@pytest.fixture(scope="module")
def chatbot():
    _skip_if_no_env()
    from src.agents.multi_agent_chatbot import MultiAgentRAGChatbot

    bot = MultiAgentRAGChatbot()
    return bot


@pytest.mark.live_qdrant
class TestMetadataLoading:
    @pytest.mark.smoke
    def test_db_metadata_context_populated(self, chatbot):
        """[integration, smoke] Booting the chatbot must produce a
        non-empty live-metadata context string from Qdrant. Smoke
        because failure here means the whole Qdrant integration is
        broken."""
        assert chatbot.db_metadata_context is not None
        assert len(chatbot.db_metadata_context) > 100

    def test_year_whitelist_from_qdrant(self, chatbot):
        assert "2020" in chatbot.year_whitelist
        assert "2024" in chatbot.year_whitelist
        assert "2025" in chatbot.year_whitelist

    def test_district_doc_counts_populated(self, chatbot):
        assert len(chatbot.district_doc_counts) > 50

    def test_latest_data_year(self, chatbot):
        assert chatbot.latest_data_year == "2025"

    def test_regions_in_context(self, chatbot):
        assert "Northern" in chatbot.db_metadata_context
        assert "Central" in chatbot.db_metadata_context


@pytest.mark.live_llm
@pytest.mark.live_qdrant
class TestQueryAnalysisLLM:
    """
    Integration tests that call the real LLM (gpt-4.1) for query analysis.
    Each test validates a specific capability of the analysis prompt.

    Marked ``live_llm`` — automatically skipped when OpenAI quota is
    unavailable (see ``tests/conftest.py``).
    """

    @pytest.mark.smoke
    def test_all_years_extraction(self, chatbot):
        """[integration, smoke, quality] LLM correctly interprets the
        phrase 'for all years' as the sentinel 'ALL' (not as a year
        list). Quality: depends on the model understanding our prompt
        contract."""
        ctx = chatbot._analyze_query_context(
            "What are the main audit findings for all years?", [], {}
        )
        assert ctx.needs_follow_up is False
        assert ctx.extracted_year == "ALL"

    def test_greeting_triggers_follow_up(self, chatbot):
        ctx = chatbot._analyze_query_context("hello", [], {})
        assert ctx.needs_follow_up is True
        assert ctx.follow_up_question is not None

    def test_last_n_years(self, chatbot):
        ctx = chatbot._analyze_query_context(
            "Revenue performance in the last 3 years", [], {}
        )
        assert ctx.needs_follow_up is False
        if ctx.extracted_year and ctx.extracted_year != "ALL":
            years = ctx.extracted_year if isinstance(ctx.extracted_year, list) else [ctx.extracted_year]
            assert len(years) >= 2

    def test_explicit_district(self, chatbot):
        ctx = chatbot._analyze_query_context(
            "What issues were found in Gulu?", [], {}
        )
        assert ctx.needs_follow_up is False
        assert ctx.has_district is True
        district = ctx.extracted_district
        if isinstance(district, list):
            assert any("Gulu" in d for d in district)
        else:
            assert "Gulu" in str(district)

    def test_source_alias_ministries(self, chatbot):
        ctx = chatbot._analyze_query_context(
            "What are the audit findings for ministries?", [], {}
        )
        assert ctx.needs_follow_up is False
        assert ctx.has_source is True
        src = ctx.extracted_source
        if isinstance(src, list):
            assert any("Ministry" in s for s in src)
        else:
            assert "Ministry" in str(src)

    def test_biggest_districts_resolution(self, chatbot):
        ctx = chatbot._analyze_query_context(
            "Audit findings for the biggest districts", [], {}
        )
        assert ctx.needs_follow_up is False
        assert ctx.has_district is True
        if ctx.resolution_notes:
            assert "biggest" in ctx.resolution_notes.lower() or "top" in ctx.resolution_notes.lower() or "most" in ctx.resolution_notes.lower() or "document" in ctx.resolution_notes.lower()

    def test_northern_uganda_resolution(self, chatbot):
        ctx = chatbot._analyze_query_context(
            "Revenue issues in northern Uganda for all years", [], {}
        )
        assert ctx.needs_follow_up is False
        districts = ctx.extracted_district
        if isinstance(districts, list):
            northern = BaseMultiAgentChatbot.UGANDA_REGIONS["Northern"]
            northern_lower = {d.lower() for d in northern}
            found = [d for d in districts if d.lower() in northern_lower]
            assert len(found) >= 2, f"Expected Northern districts, got {districts}"

    def test_substantive_question_no_follow_up(self, chatbot):
        ctx = chatbot._analyze_query_context(
            "What are the top challenges in budget allocation?", [], {}
        )
        assert ctx.needs_follow_up is False


@pytest.mark.live_qdrant
class TestFilterQueryExecution:
    """Integration tests that verify Qdrant filter queries work with real data."""

    def test_year_filter_built_correctly(self, chatbot):
        ctx = QueryContext(has_year=True, extracted_year="2024")
        filters, anchored = chatbot._build_filters(ctx)
        assert filters == {"year": ["2024"]}
        assert "year" in anchored

    def test_district_filter_built_correctly(self, chatbot):
        ctx = QueryContext(has_district=True, extracted_district="Gulu")
        filters, anchored = chatbot._build_filters(ctx)
        assert "district" in filters
        assert "Gulu" in filters["district"]
        assert "district" in anchored

    def test_all_year_produces_no_filter(self, chatbot):
        ctx = QueryContext(has_year=True, extracted_year="ALL")
        filters, _ = chatbot._build_filters(ctx)
        assert "year" not in filters

    def test_source_filter_built_correctly(self, chatbot):
        ctx = QueryContext(has_source=True, extracted_source="Hospital")
        filters, anchored = chatbot._build_filters(ctx)
        assert filters == {"sources": ["Hospital"]}
        assert "sources" in anchored

    def test_district_drops_auto_source_live(self, chatbot):
        """Integration: district without explicit source should drop auto-inferred source."""
        ctx = QueryContext(
            has_district=True, extracted_district="Gulu",
            has_source=False, extracted_source="Local Government",
        )
        filters, anchored = chatbot._build_filters(ctx)
        assert "district" in filters
        assert "sources" not in filters
        assert "district" in anchored

    @pytest.mark.xfail(reason="Pipeline reranker returns 0 docs in test context — pre-existing issue")
    def test_unfiltered_retrieval_returns_results(self, chatbot):
        result = chatbot._perform_retrieval("audit findings and recommendations", {})
        assert len(result.sources) > 0


# ---------------------------------------------------------------------------
# Integration tests – prevalidation (requires Qdrant)
# ---------------------------------------------------------------------------


@pytest.mark.live_qdrant
class TestPrevalidation:
    """Test the _prevalidate_filters mechanism against real Qdrant data."""

    def test_valid_combo_is_ok(self, chatbot):
        """A filter combo that exists should return ok=True."""
        filters = {"year": ["2024"]}
        diagnosis = chatbot._prevalidate_filters(filters, set())
        assert diagnosis["ok"] is True
        assert diagnosis["total_count"] > 0

    def test_gulu_2023_gap(self, chatbot):
        """Gulu + 2023 should be detected as a data gap."""
        filters = {"district": ["Gulu"], "year": ["2023"]}
        anchored = {"district", "year"}
        diagnosis = chatbot._prevalidate_filters(filters, anchored)
        assert diagnosis["ok"] is False
        assert diagnosis["suggestion"] is not None
        assert len(diagnosis["gap_dimensions"]) > 0

    def test_jinja_2023_exists(self, chatbot):
        """Jinja + 2023 should have data."""
        filters = {"district": ["Jinja"], "year": ["2023"]}
        diagnosis = chatbot._prevalidate_filters(filters, set())
        assert diagnosis["ok"] is True
        assert diagnosis["total_count"] > 0

    def test_nonexistent_year(self, chatbot):
        """A year with no data should fail individual dim check."""
        filters = {"year": ["1999"]}
        diagnosis = chatbot._prevalidate_filters(filters, {"year"})
        assert diagnosis["ok"] is False
        assert any("1999" in str(d.get("value", "")) for d in diagnosis["gap_dimensions"])

    def test_empty_filters_ok(self, chatbot):
        """No filters should always be ok."""
        diagnosis = chatbot._prevalidate_filters({}, set())
        assert diagnosis["ok"] is True


# ---------------------------------------------------------------------------
# Unit tests – post-relaxation relevance check (no network)
# ---------------------------------------------------------------------------


class TestPostRelaxationRelevanceCheck:

    @pytest.fixture
    def mock_bot(self):
        bot = _StubChatbot()
        bot.district_whitelist = ["Gulu", "Jinja"]
        bot.year_whitelist = ["2020", "2021", "2022", "2023", "2024", "2025"]
        bot.source_whitelist = ["Local Government", "Hospital"]
        return bot

    def test_relevant_docs(self, mock_bot):
        docs = [MagicMock(metadata={"district": "Gulu", "year": "2023"})]
        result = mock_bot._post_relaxation_relevance_check(
            docs, {"district"}, {"district": ["Gulu"]}
        )
        assert result["relevant"] is True

    def test_irrelevant_docs(self, mock_bot):
        docs = [
            MagicMock(metadata={"district": "Hoima", "year": "2023"}),
            MagicMock(metadata={"district": "Kumi", "year": "2023"}),
        ]
        result = mock_bot._post_relaxation_relevance_check(
            docs, {"district"}, {"district": ["Gulu"]}
        )
        assert result["relevant"] is False
        assert "Gulu" in result["details"]

    def test_no_anchored_keys(self, mock_bot):
        docs = [MagicMock(metadata={"district": "Hoima"})]
        result = mock_bot._post_relaxation_relevance_check(
            docs, set(), {"district": ["Gulu"]}
        )
        assert result["relevant"] is True


# ---------------------------------------------------------------------------
# Unit tests – district priority over source (no network)
# ---------------------------------------------------------------------------


class TestDistrictSourcePriority:

    @pytest.fixture
    def mock_bot(self):
        bot = _StubChatbot()
        bot.district_whitelist = ["Gulu", "Jinja", "Kampala"]
        bot.source_whitelist = ["Local Government", "Hospital"]
        bot.year_whitelist = ["2020", "2021", "2022", "2023", "2024", "2025"]
        return bot

    def test_district_with_auto_source_drops_source(self, mock_bot):
        ctx = QueryContext(
            has_district=True, extracted_district="Gulu",
            has_source=False, extracted_source="Local Government",
        )
        filters, anchored = mock_bot._build_filters(ctx)
        assert "district" in filters
        assert "sources" not in filters
        assert "district" in anchored

    def test_district_with_explicit_source_keeps_both(self, mock_bot):
        ctx = QueryContext(
            has_district=True, extracted_district="Gulu",
            has_source=True, extracted_source="Local Government",
        )
        filters, anchored = mock_bot._build_filters(ctx)
        assert "district" in filters
        assert "sources" in filters
        assert "district" in anchored
        assert "sources" in anchored

    def test_district_with_ui_source_keeps_both(self, mock_bot):
        ctx = QueryContext(
            has_district=True, extracted_district="Gulu",
            ui_filters={"sources": ["Hospital"]},
        )
        filters, anchored = mock_bot._build_filters(ctx)
        assert "district" in filters
        assert "sources" in filters
        assert "sources" in anchored


# ---------------------------------------------------------------------------
# Unit tests – source name normalization (no network)
# ---------------------------------------------------------------------------


class TestSourceNormalization:

    @pytest.fixture
    def mock_bot(self):
        bot = _StubChatbot()
        bot.district_whitelist = ["Gulu"]
        bot.source_whitelist = [
            "Ministry, Department and Agency", "Hospital",
            "Local Government", "Consolidated",
        ]
        bot.year_whitelist = ["2020", "2021", "2022", "2023", "2024", "2025"]
        return bot

    def test_case_mismatch_normalized(self, mock_bot):
        """LLM returns 'And' but Qdrant has 'and' — should be corrected."""
        ctx = QueryContext(
            has_source=True,
            extracted_source="Ministry, Department And Agency",
        )
        filters, _ = mock_bot._build_filters(ctx)
        assert filters["sources"] == ["Ministry, Department and Agency"]

    def test_already_correct_stays(self, mock_bot):
        ctx = QueryContext(
            has_source=True,
            extracted_source="Hospital",
        )
        filters, _ = mock_bot._build_filters(ctx)
        assert filters["sources"] == ["Hospital"]

    def test_unknown_source_dropped_by_validation(self, mock_bot):
        """Unknown source values are stripped by _validate_filter_values.

        Previously this test asserted that an unknown source "passes through"
        as-is; that behaviour was changed when _validate_filter_values was
        added to guard against invalid Qdrant filter values. The current
        (correct) behaviour: unknown values are removed; if all values for
        a dimension are unknown, the entire filter dimension is dropped.
        """
        ctx = QueryContext(
            has_source=True,
            extracted_source="something new",
        )
        filters, _ = mock_bot._build_filters(ctx)
        assert "sources" not in filters


# ---------------------------------------------------------------------------
# Unit tests – resolver agent extensions (no network)
# ---------------------------------------------------------------------------


class TestResolverAgentExtensions:
    """The resolver agent answers metadata-shaped questions without LLM.

    These tests use a stub vectorstore so we can verify the resolver
    dispatches correctly and produces the expected payload shapes.
    """

    @pytest.fixture
    def mock_bot(self):
        bot = _StubChatbot()
        bot.district_whitelist = ["Gulu", "Lira", "Mbale", "Pader", "Jinja"]
        bot.year_whitelist = ["2020", "2021", "2022", "2023", "2024", "2025"]
        bot.source_whitelist = [
            "Consolidated", "Hospital", "Local Government",
            "Ministry, Department and Agency", "Project", "Value for Money",
        ]
        bot.district_doc_counts = {
            "Gulu": 50, "Lira": 30, "Mbale": 80, "Pader": 10, "Jinja": 65,
        }
        bot.year_doc_counts = {
            "2020": 100, "2021": 120, "2022": 180, "2023": 200, "2024": 90,
        }
        bot.source_doc_counts = {
            "Local Government": 500,
            "Ministry, Department and Agency": 300,
            "Hospital": 80,
        }
        bot.source_year_coverage = {
            "Local Government": {"2020": 100, "2022": 200, "2024": 90},
        }
        bot.district_year_coverage = {
            "Gulu": {"2020": 10, "2022": 25, "2023": 15},
            "Lira": {"2021": 12, "2023": 18},
            "Mbale": {"2020": 30, "2024": 50},
        }
        bot.district_source_coverage = {
            "Gulu": {"Local Government": 40, "Hospital": 10},
        }

        # Stub vectorstore with a count() that returns deterministic values
        client = MagicMock()
        client.count = MagicMock(return_value=MagicMock(count=42))
        vs = MagicMock(_client=client, collection_name="test-collection")
        bot._get_vectorstore = lambda: vs

        return bot

    def _make_state(self, query: str, ctx_kwargs: dict = None):
        ctx = QueryContext(**(ctx_kwargs or {}))
        return {
            "current_query": query,
            "query_context": ctx,
            "agent_logs": [],
            "resolution_attempted": False,
            "resolution_result": None,
        }

    def test_top_districts(self, mock_bot):
        """[unit] Resolver dispatches on 'biggest' and returns the
        pre-cached district_doc_counts sorted descending. Mocked
        vectorstore — verifies our logic, not Qdrant."""
        state = self._make_state("biggest districts overall")
        out = mock_bot._resolver_agent(state)
        assert "top_districts" in out["resolution_result"]
        top = out["resolution_result"]["top_districts"]
        assert top[0]["district"] == "Mbale"
        assert top[0]["doc_count"] == 80

    def test_bottom_districts(self, mock_bot):
        state = self._make_state("smallest districts in the corpus")
        out = mock_bot._resolver_agent(state)
        bottom = out["resolution_result"]["bottom_districts"]
        assert bottom[0]["district"] == "Pader"

    def test_top_sources(self, mock_bot):
        state = self._make_state("largest source category")
        out = mock_bot._resolver_agent(state)
        assert "top_sources" in out["resolution_result"]
        assert out["resolution_result"]["top_sources"][0]["source"] == "Local Government"

    def test_top_years(self, mock_bot):
        state = self._make_state("most documented year")
        out = mock_bot._resolver_agent(state)
        assert "top_years" in out["resolution_result"]
        assert out["resolution_result"]["top_years"][0]["year"] == "2023"

    def test_per_district_live_count(self, mock_bot):
        state = self._make_state(
            "audit findings in Gulu",
            ctx_kwargs={"has_district": True, "extracted_district": "Gulu"},
        )
        out = mock_bot._resolver_agent(state)
        assert out["resolution_result"]["district_counts"] == {"Gulu": 42}

    def test_combination_district_year(self, mock_bot):
        state = self._make_state(
            "Gulu 2022",
            ctx_kwargs={
                "has_district": True, "extracted_district": "Gulu",
                "has_year": True, "extracted_year": "2022",
            },
        )
        out = mock_bot._resolver_agent(state)
        combo = out["resolution_result"]["combination_counts"]
        assert "district+year" in combo
        assert combo["district+year"][0] == {"a": "Gulu", "b": "2022", "doc_count": 42}

    def test_date_range_overall(self, mock_bot):
        state = self._make_state("latest reports across the corpus")
        out = mock_bot._resolver_agent(state)
        dr = out["resolution_result"]["date_range"]
        assert dr["overall"] == {"min_year": "2020", "max_year": "2024"}

    def test_latest_year_for_district(self, mock_bot):
        state = self._make_state(
            "give me whatever the latest you have on Gulu",
            ctx_kwargs={"has_district": True, "extracted_district": "Gulu"},
        )
        out = mock_bot._resolver_agent(state)
        assert out["resolution_result"]["latest_year_for_district"] == {"Gulu": "2023"}

    def test_earliest_year_for_district(self, mock_bot):
        state = self._make_state(
            "oldest record for Mbale",
            ctx_kwargs={"has_district": True, "extracted_district": "Mbale"},
        )
        out = mock_bot._resolver_agent(state)
        assert out["resolution_result"]["earliest_year_for_district"] == {"Mbale": "2020"}

    def test_coverage_report(self, mock_bot):
        state = self._make_state(
            "what do you have on Gulu?",
            ctx_kwargs={"has_district": True, "extracted_district": "Gulu"},
        )
        out = mock_bot._resolver_agent(state)
        cov = out["resolution_result"]["coverage"]["per_district"]["Gulu"]
        assert cov["total_docs"] == 50
        assert set(cov["years"]) == {"2020", "2022", "2023"}
        assert set(cov["sources"]) == {"Local Government", "Hospital"}

    def test_no_vectorstore_safe(self, mock_bot):
        mock_bot._get_vectorstore = lambda: None
        state = self._make_state("biggest districts")
        out = mock_bot._resolver_agent(state)
        # Should not crash; resolution_result is an empty dict
        assert out["resolution_result"] == {}


class TestResolverPostResolutionIntegration:
    """Verify the main_agent post-resolution pass injects resolver outputs
    back into context for the downstream RAG path.
    """

    @pytest.fixture
    def mock_bot(self):
        bot = _StubChatbot()
        bot.district_whitelist = ["Gulu", "Lira", "Nwoya"]
        bot.year_whitelist = ["2020", "2021", "2022", "2023", "2024"]
        bot.source_whitelist = ["Local Government", "Hospital"]
        bot.district_doc_counts = {"Gulu": 10, "Lira": 20}
        bot.year_doc_counts = {}
        bot.source_doc_counts = {}
        bot.source_year_coverage = {}
        bot.district_year_coverage = {"Nwoya": {"2020": 5, "2022": 8}}
        bot.district_source_coverage = {}
        return bot

    def test_latest_year_for_district_injects_year(self, mock_bot):
        ctx = QueryContext(
            has_district=True, extracted_district="Nwoya",
            needs_metadata_lookup=True,
        )
        state = {
            "current_query": "give me whatever the latest you have on Nwoya",
            "query_context": ctx,
            "agent_logs": [],
            "resolution_attempted": True,
            "resolution_result": {
                "latest_year_for_district": {"Nwoya": "2022"},
            },
            "final_response": None,
        }
        out = mock_bot._main_agent(state)
        # Year should have been injected from the resolver lookup
        assert out["query_context"].extracted_year == "2022"
        assert out["query_context"].has_year is True
        assert out["query_context"].needs_metadata_lookup is False
        assert "latest available year" in (out["query_context"].resolution_notes or "")


# ---------------------------------------------------------------------------
# Multi-turn conversation simulations for the resolver agent
# ---------------------------------------------------------------------------


class TestResolverMultiTurnFlow:
    """End-to-end simulations of multi-turn conversations that exercise
    the resolver agent + main-agent post-resolution handoff.

    These tests bypass the LLM (``_analyze_query_context``) so they run
    without any API quota. Each turn's ``query_context`` is supplied
    directly, mimicking what the LLM would have produced. The test then
    drives ``_resolver_agent`` + ``_main_agent`` and asserts the state
    after each step.
    """

    @pytest.fixture
    def mock_bot(self):
        bot = _StubChatbot()
        bot.district_whitelist = ["Nwoya", "Gulu", "Lira", "Mbale"]
        bot.year_whitelist = ["2020", "2021", "2022", "2023", "2024"]
        bot.source_whitelist = ["Local Government", "Hospital",
                                "Ministry, Department and Agency"]
        bot.district_doc_counts = {"Nwoya": 8, "Gulu": 50, "Lira": 30, "Mbale": 80}
        bot.year_doc_counts = {"2020": 100, "2021": 80, "2022": 60, "2023": 40}
        bot.source_doc_counts = {"Local Government": 200,
                                 "Ministry, Department and Agency": 150,
                                 "Hospital": 30}
        bot.source_year_coverage = {
            "Local Government": {"2020": 50, "2021": 40, "2022": 30, "2023": 20},
        }
        bot.district_year_coverage = {
            "Nwoya": {"2020": 3, "2022": 5},
            "Gulu": {"2020": 10, "2022": 25, "2023": 15},
            "Mbale": {"2020": 30, "2024": 50},
        }
        bot.district_source_coverage = {
            "Nwoya": {"Local Government": 8},
            "Gulu": {"Local Government": 40, "Hospital": 10},
        }

        # Deterministic count() stub for live combination queries
        client = MagicMock()
        client.count = MagicMock(return_value=MagicMock(count=7))
        vs = MagicMock(_client=client, collection_name="test-collection")
        bot._get_vectorstore = lambda: vs
        return bot

    def _state(self, query: str, **ctx_kwargs):
        """Build a minimal state dict for a single turn."""
        return {
            "current_query": query,
            "query_context": QueryContext(**ctx_kwargs),
            "agent_logs": [],
            "resolution_attempted": False,
            "resolution_result": None,
            "final_response": None,
        }

    # ----- Scenario 1: empty result for X 2024 → user asks "latest for X" ---

    @pytest.mark.smoke
    def test_empty_combo_then_latest_for_district(self, mock_bot):
        """[unit, smoke] Multi-turn flow simulation: empty result for
        Nwoya 2024 → user asks 'latest for Nwoya' → resolver computes
        max(year) = 2022 → main_agent injects year=2022 → ready for
        RAG. Mocked vectorstore + supplied QueryContext; verifies our
        new resolver+main_agent handoff without an LLM call.

          T1: "audit findings for Nwoya in 2024" → 0 docs (Nwoya has no
              data for 2024; pre-validation would catch it upstream).
          T2: "okay, give me whatever the latest you have on Nwoya" →
              resolver computes max(year for Nwoya) = 2022 → injects
              year=2022 → RAG would now retrieve for Nwoya 2022.
        """

        # --- Turn 2: user asks for "latest" with district=Nwoya, year unset ---
        state = self._state(
            "give me whatever is the latest you have on Nwoya",
            has_district=True,
            extracted_district="Nwoya",
            needs_metadata_lookup=True,
        )

        # Step A — resolver runs first
        state = mock_bot._resolver_agent(state)
        assert state["resolution_attempted"] is True
        assert "latest_year_for_district" in state["resolution_result"]
        assert state["resolution_result"]["latest_year_for_district"] == {"Nwoya": "2022"}

        # Step B — main_agent post-resolution pass injects the year
        state = mock_bot._main_agent(state)
        ctx = state["query_context"]
        assert ctx.extracted_year == "2022"
        assert ctx.has_year is True
        assert ctx.needs_metadata_lookup is False
        # The resolution note explains what happened so the LLM can cite it
        assert "Nwoya" in (ctx.resolution_notes or "")

    # ----- Scenario 2: "biggest districts" → carries forward to follow-up ---

    def test_top_districts_then_followup_keeps_them(self, mock_bot):
        """Simulates:

          T1: "what are the audit issues for the biggest districts?" →
              resolver returns top 5 by doc count → main_agent injects
              them as extracted_district → RAG retrieves accordingly.
          T2: "now focus only on 2023" → LLM carries forward the
              districts from T1 (the LLM rule that EXPANDS or PRESERVES
              past filters; here we simulate the carry-forward by
              re-using the same district list with year added).
        """

        # --- T1 ---
        t1 = self._state(
            "what are the audit issues for the biggest districts?",
            needs_metadata_lookup=True,
        )
        t1 = mock_bot._resolver_agent(t1)
        assert "top_districts" in t1["resolution_result"]

        t1 = mock_bot._main_agent(t1)
        ctx1 = t1["query_context"]
        assert ctx1.has_district is True
        assert ctx1.extracted_district == ["Mbale", "Gulu", "Lira", "Nwoya"]
        assert ctx1.needs_metadata_lookup is False

        # --- T2: carry-forward simulated (this is what _analyze_query_context
        # would do based on previous_filters). Verify the resolver isn't
        # needed for this turn and the filter is preserved.
        t2 = self._state(
            "now focus only on 2023",
            has_district=True,
            extracted_district=ctx1.extracted_district,
            has_year=True,
            extracted_year="2023",
            needs_metadata_lookup=False,
        )
        # No resolver call this turn; just build filters directly
        filters, anchored = mock_bot._build_filters(t2["query_context"])
        assert set(filters["district"]) == {"Mbale", "Gulu", "Lira", "Nwoya"}
        assert filters["year"] == ["2023"]
        assert "year" in anchored
        # District should also be anchored since it was carried forward from
        # an LLM extraction in T1 (has_district=True)
        assert "district" in anchored

    # ----- Scenario 3: date-range question → resolver answers without LLM ---

    def test_date_range_for_source(self, mock_bot):
        """Simulates a single-turn metadata question:

          'What years do you have for Local Government?' →
          resolver populates date_range.per_source with min/max years.

        This is the kind of question we want to answer purely from
        cached aggregates — no LLM, no RAG retrieval.
        """
        state = self._state(
            "what is the earliest year you have for Local Government?",
            has_source=True,
            extracted_source="Local Government",
        )
        state = mock_bot._resolver_agent(state)
        dr = state["resolution_result"]["date_range"]
        assert "per_source" in dr
        assert dr["per_source"]["Local Government"] == {
            "min_year": "2020", "max_year": "2023"
        }
        # The overall range should also be present
        assert dr["overall"] == {"min_year": "2020", "max_year": "2023"}

    # ----- Scenario 4: coverage question multi-step --------------------------

    def test_coverage_then_year_specific(self, mock_bot):
        """T1: "what do you have on Gulu?" → coverage report.
        T2: simulated follow-up uses one of the years from the coverage
            report; verifies the system can chain.
        """
        t1 = self._state(
            "what do you have on Gulu?",
            has_district=True,
            extracted_district="Gulu",
        )
        t1 = mock_bot._resolver_agent(t1)
        cov = t1["resolution_result"]["coverage"]["per_district"]["Gulu"]
        assert cov["total_docs"] == 50
        assert "2023" in cov["years"]

        # T2 — the user picks one of the surfaced years and asks a
        # substantive question. Filter building should succeed cleanly.
        t2_ctx = QueryContext(
            has_district=True, extracted_district="Gulu",
            has_year=True, extracted_year="2023",
        )
        filters, anchored = mock_bot._build_filters(t2_ctx)
        assert filters["year"] == ["2023"]
        assert filters["district"] == ["Gulu"]
        assert {"year", "district"}.issubset(anchored)