"""LLM client protocol — provider-agnostic interface.

Spec: docs/Specs.md §8.2, docs/06-AILayer.md §3.1.
Today's only implementation is `engine/llm/gemini.py`.
"""

from __future__ import annotations

from dataclasses import dataclass
from enum import StrEnum
from typing import TYPE_CHECKING, Literal, Protocol

if TYPE_CHECKING:
    from pydantic import BaseModel


class Role(StrEnum):
    """LLM call role — drives model selection and prompt scaffolding."""

    REASONER = "reasoner"
    SUMMARIZER = "summarizer"


@dataclass(frozen=True)
class Message:
    role: Literal["system", "user", "assistant"]
    content: str


@dataclass
class LLMResponse:
    """Outcome of a single LLM call. `parsed` is populated when a response_schema was provided."""

    raw_text: str
    input_tokens: int
    output_tokens: int
    model: str
    latency_ms: int
    cost_usd: float
    parsed: BaseModel | None = None


class LLMClient(Protocol):
    """The contract every LLM provider implementation honours."""

    async def complete(  # noqa: PLR0913 — keyword-only contract from Specs §8.2
        self,
        *,
        role: Role,
        messages: list[Message],
        response_schema: type[BaseModel] | None = None,
        max_tokens: int,
        temperature: float = 0.0,
        timeout_ms: int,
        correlation_id: str,
        # Gemini-2.5 internal "thinking" tokens count against max_tokens. Set 0
        # to disable (Flash summarization, simple structured extraction). Leave
        # None for the default behavior (Reasoner verdicts benefit from thinking).
        thinking_budget: int | None = None,
    ) -> LLMResponse: ...