"""Validated structured-output tests — fully offline, instructor + litellm faked.

No network and no real credentials.  Three layers are covered:

  * ``build_output_model`` is pure Pydantic: it constrains ``kind`` to the
    allowed grant and requires ``text`` (+ extra fields).
  * ``LiteLLMProvider.complete_structured`` wraps a faked
    ``instructor.from_litellm`` client and reads tokens + cost from the raw
    completion, mirroring ``complete``.
  * ``ManifestAgent`` takes the structured path when the provider offers
    ``complete_structured`` (validated payload, no ``_raw_fallback``) and the
    tolerant-parser path with the deterministic stub.
"""

from __future__ import annotations

import sys
import types
from dataclasses import dataclass

import pytest
from pydantic import BaseModel, ValidationError

from src.agents.base import ManifestAgent
from src.core.manifest import AgentManifest
from src.core.projections import StageProjection
from src.core.structured import AgentOutputError, build_output_model
from src.models.litellm_provider import LiteLLMProvider
from src.models.router import ModelRouter


# ── build_output_model (pure Pydantic) ─────────────────────────────────────────


class TestBuildOutputModel:
    def test_accepts_valid_kind(self):
        model = build_output_model(["agent.spoke", "judge.verdict"])
        out = model(kind="agent.spoke", text="I collect echoes.")
        assert out.kind == "agent.spoke"
        assert out.text == "I collect echoes."

    def test_rejects_kind_not_in_allowed(self):
        model = build_output_model(["agent.spoke"])
        with pytest.raises(ValidationError):
            model(kind="not.real", text="oops")

    def test_single_kind_still_constrains(self):
        model = build_output_model(["world.observed"])
        assert model(kind="world.observed", text="A booth opens.").kind == "world.observed"
        with pytest.raises(ValidationError):
            model(kind="judge.verdict", text="x")

    def test_extra_fields_required(self):
        model = build_output_model(["agent.spoke"], ["emotion"])
        out = model(kind="agent.spoke", text="hi", emotion="puzzled")
        assert out.emotion == "puzzled"
        with pytest.raises(ValidationError):
            model(kind="agent.spoke", text="hi")  # emotion missing

    def test_text_required(self):
        model = build_output_model(["agent.spoke"])
        with pytest.raises(ValidationError):
            model(kind="agent.spoke")

    def test_empty_allowed_kinds_raises(self):
        with pytest.raises(AgentOutputError):
            build_output_model([])

    def test_is_subclass_of_basemodel(self):
        model = build_output_model(["agent.spoke"])
        assert issubclass(model, BaseModel)


# ── fake instructor + litellm for the provider ──────────────────────────────────


@dataclass
class _FakeUsage:
    prompt_tokens: int = 11
    completion_tokens: int = 7
    total_tokens: int = 18


class _FakeRawCompletion:
    """Raw completion Instructor returns alongside the parsed model."""

    def __init__(self, *, hidden_cost: float | None = None) -> None:
        self.usage = _FakeUsage()
        self._hidden_params = {} if hidden_cost is None else {"response_cost": hidden_cost}


class _FakeInstructorClient:
    def __init__(self, *, hidden_cost=None, raise_exc=None, record=None) -> None:
        self._hidden_cost = hidden_cost
        self._raise = raise_exc
        self._record = record

    def create_with_completion(self, *, response_model, **kwargs):
        if self._record is not None:
            self._record.update(kwargs)
            self._record["response_model"] = response_model
        if self._raise is not None:
            raise self._raise
        # Instructor returns a validated instance of the requested model.
        result = response_model(kind=response_model.model_fields["kind"].annotation.__args__[0], text="a mossy booth")
        return result, _FakeRawCompletion(hidden_cost=self._hidden_cost)


def _install_fakes(monkeypatch, *, client, from_litellm_kw: dict | None = None) -> None:
    """Inject fake ``instructor`` (from_litellm -> client) and ``litellm`` modules.

    *from_litellm_kw*, when given, records the kwargs ``complete_structured`` passes to
    ``instructor.from_litellm`` (e.g. the chosen ``mode``) for assertion.
    """
    fake_litellm = types.ModuleType("litellm")
    fake_litellm.completion = lambda **kw: None

    def _completion_cost(completion_response=None, **_kw):
        return 0.0

    fake_litellm.completion_cost = _completion_cost

    def _from_litellm(completion, **kw):
        if from_litellm_kw is not None:
            from_litellm_kw.update(kw)
        return client

    fake_instructor = types.ModuleType("instructor")
    fake_instructor.from_litellm = _from_litellm
    # Mode is an enum on the real package; a name->value stand-in is enough for the
    # provider's ``getattr(instructor.Mode, structured_mode.upper())`` resolution.
    fake_instructor.Mode = types.SimpleNamespace(JSON_SCHEMA="json_schema", JSON="json", TOOLS="tools")

    monkeypatch.setitem(sys.modules, "litellm", fake_litellm)
    monkeypatch.setitem(sys.modules, "instructor", fake_instructor)


# ── provider.complete_structured ────────────────────────────────────────────────


class TestCompleteStructured:
    def test_returns_validated_model_and_captures_usage(self, monkeypatch):
        _install_fakes(monkeypatch, client=_FakeInstructorClient())
        provider = LiteLLMProvider(model="openai/m", api_base="https://x/v1")
        model = build_output_model(["world.observed"])
        out = provider.complete_structured("scene-whisperer", "grow the wood", model)
        assert isinstance(out, model)
        assert out.kind == "world.observed"
        assert provider.last_usage["prompt_tokens"] == 11
        assert provider.last_usage["completion_tokens"] == 7
        assert provider.last_usage["total_tokens"] == 18

    def test_captures_cost_from_hidden_params(self, monkeypatch):
        _install_fakes(monkeypatch, client=_FakeInstructorClient(hidden_cost=0.05))
        provider = LiteLLMProvider(model="openai/m", api_base="https://x/v1")
        provider.complete_structured("echo", "drop a pebble", build_output_model(["agent.spoke"]))
        assert provider.last_usage["cost_usd"] == pytest.approx(0.05)
        assert provider.last_cost == pytest.approx(0.05)

    def test_passes_response_model_and_retries(self, monkeypatch):
        record: dict = {}
        _install_fakes(monkeypatch, client=_FakeInstructorClient(record=record))
        provider = LiteLLMProvider(model="openai/m", api_base="https://x/v1", max_retries=4)
        model = build_output_model(["world.observed"])
        provider.complete_structured("seedkeeper", "observe", model)
        assert record["response_model"] is model
        assert record["max_retries"] == 4
        assert record["model"] == "openai/m"
        roles = [m["role"] for m in record["messages"]]
        assert roles == ["system", "user"]

    def test_defaults_to_guided_json_schema_mode(self, monkeypatch):
        # Guided decoding, not tool calling: a model with no tool-call parser (e.g. MiniCPM)
        # still validates instead of 400ing. The mode rides on from_litellm, not the call.
        kw: dict = {}
        _install_fakes(monkeypatch, client=_FakeInstructorClient(), from_litellm_kw=kw)
        provider = LiteLLMProvider(model="openai/m", api_base="https://x/v1")
        provider.complete_structured("echo", "x", build_output_model(["agent.spoke"]))
        assert kw["mode"] == "json_schema"

    def test_structured_mode_override_is_honored(self, monkeypatch):
        kw: dict = {}
        _install_fakes(monkeypatch, client=_FakeInstructorClient(), from_litellm_kw=kw)
        provider = LiteLLMProvider(model="openai/m", api_base="https://x/v1", structured_mode="tools")
        provider.complete_structured("echo", "x", build_output_model(["agent.spoke"]))
        assert kw["mode"] == "tools"

    def test_error_zeroes_usage_and_reraises(self, monkeypatch):
        _install_fakes(monkeypatch, client=_FakeInstructorClient(raise_exc=RuntimeError("boom")))
        provider = LiteLLMProvider(model="openai/m", api_base="https://x/v1")
        with pytest.raises(RuntimeError):
            provider.complete_structured("echo", "x", build_output_model(["agent.spoke"]))
        assert provider.last_usage["total_tokens"] == 0
        assert provider.last_usage["cost_usd"] == 0.0


# ── ManifestAgent path selection ────────────────────────────────────────────────


class _Agent(ManifestAgent):
    manifest = AgentManifest(
        name="scene-whisperer",
        persona="You grow the wood in one strange sentence.",
        may_emit=["world.observed"],
        model_profile="tiny",
    )


@dataclass
class _StructuredProvider:
    """Stand-in live provider exposing complete_structured."""

    last_usage: dict = None  # type: ignore[assignment]
    seen_model: object = None

    def __post_init__(self):
        self.last_usage = {
            "prompt_tokens": 5,
            "completion_tokens": 3,
            "total_tokens": 8,
            "cost_usd": 0.002,
        }

    def complete_structured(self, role, prompt, response_model):
        self.seen_model = response_model
        return response_model(kind="world.observed", text="A booth opens in a root.")

    def complete(self, role, prompt):  # pragma: no cover - must not be reached
        raise AssertionError("structured path must not call complete()")


@dataclass
class _FixedRouter:
    provider: object

    def for_profile(self, profile):
        return self.provider


class TestManifestAgentStructuredPath:
    def test_uses_structured_path_when_available(self):
        provider = _StructuredProvider()
        agent = _Agent(_FixedRouter(provider))
        ev = agent.act("r", 1, StageProjection(seed="moss"), ())
        assert ev.kind == "world.observed"
        assert ev.payload["text"] == "A booth opens in a root."
        # The validated path never wraps prose, so no fallback marker is present.
        assert "_raw_fallback" not in ev.payload
        # Cost/tokens flowed through from the provider for the Governor.
        assert agent.last_usage["cost_usd"] == pytest.approx(0.002)
        assert agent.last_usage["total_tokens"] == 8
        # The constructed model was constrained to the manifest's may_emit.
        assert provider.seen_model.model_fields["kind"].annotation.__args__ == ("world.observed",)

    def test_deterministic_stub_uses_parser_path(self):
        # Offline router yields the stub, which has no complete_structured: the
        # tolerant parser runs and (for prose) marks the fallback.
        agent = _Agent(ModelRouter(offline=True))
        provider = agent.router.for_profile("tiny")
        assert not hasattr(provider, "complete_structured")
        ev = agent.act("r", 1, StageProjection(seed="moss"), ())
        assert ev.kind == "world.observed"  # coerced to the only allowed kind
        assert ev.payload.get("_raw_fallback") is True
        assert agent.last_usage["total_tokens"] > 0

    def test_structured_failure_falls_back_to_parser(self):
        # If the live structured call raises, the agent still produces an event
        # via the parser path rather than dropping the turn.
        class _FailingProvider:
            def __init__(self):
                self.last_usage = {"total_tokens": 0, "cost_usd": 0.0}
                self.calls = []

            def complete_structured(self, role, prompt, response_model):
                raise RuntimeError("validation exhausted")

            def complete(self, role, prompt):
                self.calls.append(prompt)
                self.last_usage = {
                    "prompt_tokens": 4,
                    "completion_tokens": 2,
                    "total_tokens": 6,
                    "cost_usd": 0.0,
                }
                return '{"kind": "world.observed", "text": "fallback line"}'

        provider = _FailingProvider()
        agent = _Agent(_FixedRouter(provider))
        ev = agent.act("r", 1, StageProjection(), ())
        assert ev.kind == "world.observed"
        assert ev.payload["text"] == "fallback line"
        assert provider.calls, "fallback should call complete()"
        assert agent.last_usage["total_tokens"] == 6