"""Inference protocols. Mock and Real backends implement these so the game
layer never knows which is wired up.

The `generate_messages` form is what smolagents calls into (via the adapter
in `game/smol_adapter.py`). The simpler `generate` form is kept for cheap
free-text classification (e.g. board sentiment) where chat formatting is
unnecessary overhead.
"""

from __future__ import annotations

from typing import Protocol


class LLMBackend(Protocol):
    model_id: str

    def generate(self, system_prompt: str, user_prompt: str) -> str:
        """One-shot text completion. Used for sentiment classification."""
        ...

    def generate_messages(
        self,
        messages: list[dict],
        stop_sequences: list[str] | None = None,
        max_tokens: int = 1024,
        grammar: str | None = None,
    ) -> str:
        """Chat-style completion. `messages` follows the OpenAI shape:
        `[{"role": "system|user|assistant", "content": str}]`.

        `grammar` is a llama.cpp GBNF grammar string (real backend only —
        mock ignores it). Used to constrain CodeAgent output to valid Python
        code blocks for small models.
        """
        ...


class ImageBackend(Protocol):
    def generate(self, prompt: str) -> bytes:
        """Return PNG image bytes."""
        ...