"""Provider Protocols and shared request/response models.

The codebase has two orthogonal capabilities:
- LLMProvider: chat-completion / SQL generation. All three slots implement it.
- EmbeddingProvider: vector embeddings. Only Mistral implements it for now —
  schema-RAG and few-shot retrieval are pinned to mistral-embed.
"""

from __future__ import annotations

from typing import Protocol, runtime_checkable

from pydantic import BaseModel, Field


class GenerateRequest(BaseModel):
    prompt: str
    system: str | None = None
    temperature: float = 0.0
    max_tokens: int = 2048
    json_mode: bool = False
    """When True and the provider supports it, ask the API to constrain
    output to a JSON object (OpenAI/Groq response_format=json_object).
    Mistral codestral's chat endpoint does NOT support response_format
    server-side, so we just send the request — the caller still owns
    parsing. Set ON for Groq/GitHub-Models to dramatically reduce the
    "model wrapped JSON in prose" failure rate that costs us 60% of
    valid pred_sql in the n=50 Groq smoke (2026-05-12)."""


class GenerateResponse(BaseModel):
    text: str
    model: str
    input_tokens: int = 0
    output_tokens: int = 0
    latency_ms: float = 0.0


class EmbedRequest(BaseModel):
    texts: list[str] = Field(min_length=1)


class EmbedResponse(BaseModel):
    vectors: list[list[float]]
    model: str


class ProviderError(RuntimeError):
    """Raised when a provider call fails for any non-network reason we surface."""


@runtime_checkable
class LLMProvider(Protocol):
    name: str
    model: str

    def generate(self, req: GenerateRequest) -> GenerateResponse: ...


@runtime_checkable
class EmbeddingProvider(Protocol):
    name: str
    embed_model: str

    def embed(self, req: EmbedRequest) -> EmbedResponse: ...