Spaces:
Running on Zero
Running on Zero
| """Declarative configuration schemas β config as validatable data. | |
| These Pydantic models are the contract for the 'easily configurable' surface. | |
| Every knob β which agents exist, what they emit, which model tier they use, the | |
| scenario goal, the cast that participates, tool grants, budgets β is expressed as | |
| data that: | |
| * round-trips to/from YAML files under ``config/`` (the on-disk surface), and | |
| * can equally be produced by a UI form or an LLM and checked with one call. | |
| That last property is the point: ``validate_world`` / ``validate_agent`` / | |
| ``validate_scenario`` turn an arbitrary dict into a typed, cross-checked object or | |
| a precise error. So "let an agent build the configuration from a prompt" reduces | |
| to "emit JSON, validate it, run it." See ADR-0011. | |
| The agent schema itself is :class:`AgentManifest` (``src/core/manifest.py``) β we | |
| reuse it here rather than duplicating, so the four stable contracts stay singular. | |
| """ | |
| from __future__ import annotations | |
| from typing import Literal | |
| from pydantic import BaseModel, ConfigDict, Field, model_validator | |
| from src.core.manifest import AgentManifest | |
| # ββ model profiles βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| class ModelProfileConfig(BaseModel): | |
| model_config = ConfigDict(extra="forbid", protected_namespaces=()) | |
| model: str | |
| base_url: str | None = None | |
| """OpenAI-compatible endpoint URL (ends in /v1). Env-templatable in YAML via | |
| ``${MODAL_LLM_BASE_URL}`` so the Modal workspace is never hard-coded.""" | |
| api_key: str | None = None | |
| """Key for the endpoint (env-templatable, e.g. ``${MODAL_LLM_KEY}``). vLLM | |
| accepts any token unless the server enforces one.""" | |
| temperature: float = 0.8 | |
| max_tokens: int = 256 | |
| def _blank_to_none(self) -> "ModelProfileConfig": | |
| # An unset ``${VAR}`` template expands to "" (see registry._expand_env); | |
| # normalise empty bindings back to None so the live transport omits them. | |
| if not self.base_url: | |
| self.base_url = None | |
| if not self.api_key: | |
| self.api_key = None | |
| return self | |
| class ModelsConfig(BaseModel): | |
| model_config = ConfigDict(extra="forbid") | |
| offline: bool | None = None | |
| """True = always use the deterministic stub; False = always live; None = auto | |
| (live when credentials are present, stub otherwise).""" | |
| profiles: dict[str, ModelProfileConfig] = Field(default_factory=dict) | |
| """Concrete binding per logical profile (tiny/fast/balanced/strong).""" | |
| # ββ budgets βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| class GovernorConfig(BaseModel): | |
| model_config = ConfigDict(extra="forbid") | |
| max_turns: int = 100 | |
| max_calls_per_turn: int = 8 | |
| max_total_calls: int = 500 | |
| max_total_tokens: int | None = None | |
| hourly_budget_usd: float | None = None | |
| # ββ competition ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| class CompetitionConfig(BaseModel): | |
| """Declares whether β and how β a scenario produces a winner (ADR-0029). | |
| A scenario can be a ``versus`` contest β between named ``teams`` (asymmetric | |
| sides, e.g. The Steeped's spy vs herd) or between ``symmetric_seats`` (identical | |
| seats that differ only by model, e.g. Debate Duel / Beat Battle β the comparison | |
| that makes the model-leaderboard meaningful) β a ``judged`` pick where the | |
| judge's verdict *is* the result, or ``none`` (the default) where nobody wins. | |
| ``winner`` downstream carries either an agent name or a team label, so the team | |
| labels here must stay distinct from agent names β that cross-cast check lives in | |
| :meth:`WorldConfig._check_cast_references`, while the rules a competition can | |
| enforce on its own (team shape, disjointness, seat count) live in the validator | |
| below. | |
| """ | |
| model_config = ConfigDict(extra="forbid") | |
| kind: Literal["versus", "judged", "none"] = "none" | |
| """How a winner is derived β ``versus`` (team or seat contest), ``judged`` (the | |
| judge's pick is the answer), or ``none`` (no winner; the default and the absent | |
| block).""" | |
| teams: dict[str, list[str]] | None = None | |
| """Team label β member agent names. Permitted only when ``kind == 'versus'``.""" | |
| symmetric_seats: list[str] | None = None | |
| """Cast members occupying *identical* seats that differ only by which model fills | |
| them β the "which model argues better" comparison. ``versus`` only; needs β₯2 | |
| entries. An alternative to ``teams`` (a versus scenario declares one or the other).""" | |
| def _check_teams(self) -> "CompetitionConfig": | |
| if self.kind != "versus": | |
| if self.teams is not None: | |
| raise ValueError(f"competition.teams is only allowed when kind is 'versus' (got kind={self.kind!r})") | |
| if self.symmetric_seats is not None: | |
| raise ValueError( | |
| f"competition.symmetric_seats is only allowed when kind is 'versus' (got kind={self.kind!r})" | |
| ) | |
| return self | |
| # kind == "versus": the contest is described by teams OR symmetric_seats. | |
| if self.teams is None and self.symmetric_seats is None: | |
| raise ValueError("competition.kind 'versus' requires either a 'teams' mapping or 'symmetric_seats'") | |
| if self.symmetric_seats is not None and len(self.symmetric_seats) < 2: | |
| raise ValueError("competition.symmetric_seats needs β₯2 entries to be a contest") | |
| if self.teams is not None: | |
| if not self.teams: | |
| raise ValueError("competition.kind 'versus' requires a non-empty 'teams' mapping") | |
| empty = [label for label, members in self.teams.items() if not members] | |
| if empty: | |
| raise ValueError(f"competition.teams has empty member lists for teams: {sorted(empty)}") | |
| seen: dict[str, str] = {} | |
| overlap: set[str] = set() | |
| for label, members in self.teams.items(): | |
| for member in members: | |
| if member in seen and seen[member] != label: | |
| overlap.add(member) | |
| seen[member] = label | |
| if overlap: | |
| raise ValueError( | |
| f"competition.teams must be mutually disjoint; agents on more than one team: {sorted(overlap)}" | |
| ) | |
| return self | |
| # ββ scenario βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| class ScenarioConfig(BaseModel): | |
| model_config = ConfigDict(extra="forbid") | |
| name: str | |
| title: str = "" | |
| goal: str = "" | |
| """The shared objective handed to the whole cast (rendered into every prompt).""" | |
| default_seed: str | |
| example_seeds: list[str] = Field(default_factory=list) | |
| cast: list[str] = Field(default_factory=list) | |
| """Agent names that participate, resolved against the agent registry. | |
| Selecting who participates is just editing this list.""" | |
| genesis_text: str | None = None | |
| governor: GovernorConfig | None = None | |
| competition: CompetitionConfig | None = None | |
| """Optional winner contract (ADR-0029); absent == ``none`` (no winner). The | |
| authoring checklist (``tests/test_scenario_contract.py``) requires an explicit | |
| block on every *shipped* scenario, but the schema stays permissive so a partial | |
| world from the Lab/an LLM still validates.""" | |
| # ββ the whole world ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| class WorldConfig(BaseModel): | |
| """A complete, self-contained, validatable description of a runnable world. | |
| A UI or an LLM can emit one of these (agents + scenarios + models + budgets | |
| inline) and ``validate_world`` confirms it is coherent before anything runs β | |
| including that every scenario's cast references a defined agent. | |
| """ | |
| model_config = ConfigDict(extra="forbid") | |
| models: ModelsConfig = Field(default_factory=ModelsConfig) | |
| governor: GovernorConfig = Field(default_factory=GovernorConfig) | |
| agents: list[AgentManifest] = Field(default_factory=list) | |
| scenarios: list[ScenarioConfig] = Field(default_factory=list) | |
| def _check_cast_references(self) -> "WorldConfig": | |
| defined = {a.name for a in self.agents} | |
| for scenario in self.scenarios: | |
| missing = [name for name in scenario.cast if name not in defined] | |
| if missing: | |
| raise ValueError( | |
| f"scenario {scenario.name!r} references undefined agents: {missing}. " | |
| f"Defined agents: {sorted(defined)}" | |
| ) | |
| competition = scenario.competition | |
| if competition is None: | |
| continue | |
| cast = set(scenario.cast) | |
| # Every team member AND every symmetric seat must be in this scenario's | |
| # cast (ADR-0029 Β§1). | |
| members = {m for members in (competition.teams or {}).values() for m in members} | |
| members.update(competition.symmetric_seats or []) | |
| off_cast = sorted(m for m in members if m not in cast) | |
| if off_cast: | |
| raise ValueError( | |
| f"scenario {scenario.name!r} competition members not in its cast: {off_cast}. " | |
| f"Cast: {sorted(cast)}" | |
| ) | |
| # A team label must not collide with any agent name, or the winner union | |
| # (agent name OR team label) becomes ambiguous (ADR-0029 Β§1). | |
| collisions = sorted(label for label in (competition.teams or {}) if label in defined) | |
| if collisions: | |
| raise ValueError( | |
| f"scenario {scenario.name!r} competition team labels collide with agent names: {collisions}. " | |
| f"Team labels must be distinct from agent names to keep the winner unambiguous." | |
| ) | |
| return self | |
| # ββ validation entrypoints (the 'configure from a prompt' surface) βββββββββββββββ | |
| def validate_agent(data: dict) -> AgentManifest: | |
| """Validate one agent definition (e.g. UI form output or LLM-proposed agent).""" | |
| return AgentManifest.model_validate(data) | |
| def validate_scenario(data: dict) -> ScenarioConfig: | |
| """Validate one scenario definition.""" | |
| return ScenarioConfig.model_validate(data) | |
| def validate_world(data: dict) -> WorldConfig: | |
| """Validate a whole world (agents + scenarios + models + budgets) at once.""" | |
| return WorldConfig.model_validate(data) | |