"""LLM provider interface with a deterministic offline stub.

Every LLM call in the app goes through :class:`LLMProvider`. In production you set
``ANTHROPIC_API_KEY`` or ``OPENAI_API_KEY`` and a real model answers. With no key set
(CI, tests, local demo) the :class:`StubProvider` produces a grounded answer from the
retrieved context using simple extractive logic — no network, fully reproducible.

The stub is intentionally not a toy: it composes a real answer out of the retrieved
policy/catalog snippets so the end-to-end product (citations, refund decisions,
escalation) is demonstrable with zero paid keys.
"""

from __future__ import annotations

import re
from typing import Protocol, Sequence

from .config import Settings, get_settings


class LLMProvider(Protocol):
    """Minimal surface the app depends on. Easy to swap or mock."""

    name: str

    def answer(self, question: str, context: Sequence[str]) -> str:
        """Answer ``question`` grounded in ``context`` snippets."""
        ...


_SENT_SPLIT = re.compile(r"(?<=[.!?])\s+")
_WORD = re.compile(r"[a-z0-9]+")


def _tokens(text: str) -> list[str]:
    return _WORD.findall(text.lower())


class StubProvider:
    """Deterministic, offline, network-free provider.

    Strategy: rank the sentences inside the retrieved context by lexical overlap
    with the question and stitch the best ones into a concise grounded reply. This
    keeps answers faithful to the knowledge base (no hallucination) which is exactly
    what you want from a support agent.
    """

    name = "stub"

    def answer(self, question: str, context: Sequence[str]) -> str:
        q_terms = set(_tokens(question))
        if not context:
            return (
                "I couldn't find that in our policies or catalog, so I'm escalating "
                "this to a human teammate."
            )

        # Flatten context into candidate sentences. Each is scored by how many query
        # terms it covers (overlap). Trivially short fragments (e.g. a bare section
        # heading like "Shipping.") are skipped so they can't win on a single keyword
        # while carrying no actual answer. Ties prefer the more informative (longer)
        # sentence, then earlier source order, keeping the result deterministic.
        candidates: list[tuple[int, int, int, str]] = []
        for src_idx, block in enumerate(context):
            for sent in _SENT_SPLIT.split(block.strip()):
                sent = sent.strip()
                sent_tokens = _tokens(sent)
                if len(sent_tokens) < 4:
                    continue
                overlap = len(q_terms & set(sent_tokens))
                candidates.append((overlap, len(sent_tokens), -src_idx, sent))

        if not candidates:
            return context[0].strip()

        # Highest overlap first; then longer (more informative); then earlier source.
        candidates.sort(key=lambda c: (c[0], c[1], c[2]), reverse=True)
        if candidates[0][0] == 0:
            # No lexical match — surface the most relevant retrieved block rather than
            # fabricate. The orchestrator's confidence will be low and it will likely
            # escalate.
            return context[0].strip()

        chosen: list[str] = []
        for overlap, _len, _src, sent in candidates:
            if overlap == 0:
                break
            if sent not in chosen:
                chosen.append(sent)
            if len(chosen) >= 2:
                break
        return " ".join(chosen)


class AnthropicProvider:
    """Real provider backed by the Anthropic Messages API."""

    name = "anthropic"

    def __init__(self, settings: Settings):
        # Imported lazily so the dependency is optional and the offline path never
        # needs it installed.
        import anthropic  # type: ignore

        self._client = anthropic.Anthropic(api_key=settings.anthropic_api_key)
        self._model = settings.anthropic_model

    def answer(self, question: str, context: Sequence[str]) -> str:
        joined = "\n\n".join(f"[doc {i + 1}]\n{c}" for i, c in enumerate(context))
        system = (
            "You are an e-commerce support agent. Answer ONLY from the provided "
            "context. If the answer is not in the context, say you don't know. Be "
            "concise and cite the relevant policy heading or product name."
        )
        msg = self._client.messages.create(
            model=self._model,
            max_tokens=400,
            system=system,
            messages=[
                {
                    "role": "user",
                    "content": f"Context:\n{joined}\n\nCustomer question: {question}",
                }
            ],
        )
        return "".join(block.text for block in msg.content if block.type == "text").strip()


class OpenAIProvider:
    """Real provider backed by the OpenAI Chat Completions API."""

    name = "openai"

    def __init__(self, settings: Settings):
        from openai import OpenAI  # type: ignore

        self._client = OpenAI(api_key=settings.openai_api_key)
        self._model = settings.openai_model

    def answer(self, question: str, context: Sequence[str]) -> str:
        joined = "\n\n".join(f"[doc {i + 1}]\n{c}" for i, c in enumerate(context))
        system = (
            "You are an e-commerce support agent. Answer ONLY from the provided "
            "context. If the answer is not in the context, say you don't know. Be "
            "concise and cite the relevant policy heading or product name."
        )
        resp = self._client.chat.completions.create(
            model=self._model,
            max_tokens=400,
            messages=[
                {"role": "system", "content": system},
                {
                    "role": "user",
                    "content": f"Context:\n{joined}\n\nCustomer question: {question}",
                },
            ],
        )
        return (resp.choices[0].message.content or "").strip()


class SafeProvider:
    """Wrap a real provider so any runtime failure degrades to the offline stub.

    A bad API key, rate limit, or network blip on a live call should never 500 a
    support request — we fall back to a grounded extractive answer instead. The
    reported ``name`` is the wrapped provider's so observability stays accurate.
    """

    def __init__(self, inner: LLMProvider):
        self._inner = inner
        self._fallback = StubProvider()
        self.name = inner.name

    def answer(self, question: str, context: Sequence[str]) -> str:
        try:
            text = self._inner.answer(question, context)
            return text or self._fallback.answer(question, context)
        except Exception:
            return self._fallback.answer(question, context)


def get_provider(settings: Settings | None = None) -> LLMProvider:
    """Resolve a provider from settings.

    ``auto`` (default) uses a real provider only when its key is present, otherwise
    the offline stub. Explicit values (``anthropic``/``openai``/``stub``) are honored.
    Any failure to construct a real provider degrades gracefully to the stub so the
    app never hard-crashes on a missing optional dependency, and any failure at call
    time is caught by :class:`SafeProvider`.
    """
    settings = settings or get_settings()
    choice = settings.llm_provider.lower()

    def _try(builder) -> LLMProvider | None:
        try:
            return SafeProvider(builder(settings))
        except Exception:
            return None

    if choice == "stub":
        return StubProvider()
    if choice == "anthropic" and settings.anthropic_api_key:
        return _try(AnthropicProvider) or StubProvider()
    if choice == "openai" and settings.openai_api_key:
        return _try(OpenAIProvider) or StubProvider()
    if choice == "auto":
        if settings.anthropic_api_key:
            p = _try(AnthropicProvider)
            if p:
                return p
        if settings.openai_api_key:
            p = _try(OpenAIProvider)
            if p:
                return p
    return StubProvider()