| """Shared Agent protocol. | |
| Every agent that proposes candidate solutions — the base proposer, SFT-tuned | |
| model, GRPO-tuned model — implements this interface. Downstream code | |
| (benchmarks, reward evaluation, training rollouts) depends only on the | |
| protocol, never on a concrete backend. | |
| """ | |
| from __future__ import annotations | |
| from typing import Protocol | |
| class Agent(Protocol): | |
| """Generates candidate solutions for natural-language / signature-style prompts.""" | |
| def generate( | |
| self, | |
| prompts: list[str], | |
| *, | |
| n: int = 1, | |
| temperature: float = 0.2, | |
| top_p: float = 0.95, | |
| max_tokens: int = 1024, | |
| ) -> list[list[str]]: | |
| """For each prompt, return `n` candidate completions. | |
| Returns a list of length `len(prompts)`; each element is a list of | |
| length `n` containing the post-extracted Python source of each | |
| candidate solution (no markdown fences, no commentary). | |
| """ | |
| ... | |
| def close(self) -> None: | |
| """Release any model / GPU resources. Idempotent.""" | |
| ... | |