"""Inference protocols. Mock and Real backends implement these so the game layer never knows which is wired up. The `generate_messages` form is what smolagents calls into (via the adapter in `game/smol_adapter.py`). The simpler `generate` form is kept for cheap free-text classification (e.g. board sentiment) where chat formatting is unnecessary overhead. """ from __future__ import annotations from typing import Protocol class LLMBackend(Protocol): model_id: str def generate(self, system_prompt: str, user_prompt: str) -> str: """One-shot text completion. Used for sentiment classification.""" ... def generate_messages( self, messages: list[dict], stop_sequences: list[str] | None = None, max_tokens: int = 1024, grammar: str | None = None, ) -> str: """Chat-style completion. `messages` follows the OpenAI shape: `[{"role": "system|user|assistant", "content": str}]`. `grammar` is a llama.cpp GBNF grammar string (real backend only — mock ignores it). Used to constrain CodeAgent output to valid Python code blocks for small models. """ ... class ImageBackend(Protocol): def generate(self, prompt: str) -> bytes: """Return PNG image bytes.""" ...