"""Provider Protocols and shared request/response models. The codebase has two orthogonal capabilities: - LLMProvider: chat-completion / SQL generation. All three slots implement it. - EmbeddingProvider: vector embeddings. Only Mistral implements it for now — schema-RAG and few-shot retrieval are pinned to mistral-embed. """ from __future__ import annotations from typing import Protocol, runtime_checkable from pydantic import BaseModel, Field class GenerateRequest(BaseModel): prompt: str system: str | None = None temperature: float = 0.0 max_tokens: int = 2048 json_mode: bool = False """When True and the provider supports it, ask the API to constrain output to a JSON object (OpenAI/Groq response_format=json_object). Mistral codestral's chat endpoint does NOT support response_format server-side, so we just send the request — the caller still owns parsing. Set ON for Groq/GitHub-Models to dramatically reduce the "model wrapped JSON in prose" failure rate that costs us 60% of valid pred_sql in the n=50 Groq smoke (2026-05-12).""" class GenerateResponse(BaseModel): text: str model: str input_tokens: int = 0 output_tokens: int = 0 latency_ms: float = 0.0 class EmbedRequest(BaseModel): texts: list[str] = Field(min_length=1) class EmbedResponse(BaseModel): vectors: list[list[float]] model: str class ProviderError(RuntimeError): """Raised when a provider call fails for any non-network reason we surface.""" @runtime_checkable class LLMProvider(Protocol): name: str model: str def generate(self, req: GenerateRequest) -> GenerateResponse: ... @runtime_checkable class EmbeddingProvider(Protocol): name: str embed_model: str def embed(self, req: EmbedRequest) -> EmbedResponse: ...