File size: 11,270 Bytes
5424fe6 0d0c561 5424fe6 0d0c561 5424fe6 7df0a45 5424fe6 7df0a45 5424fe6 0d0c561 ce159dc 1c7b5c9 0d0c561 ce159dc 1c7b5c9 0d0c561 ce159dc 0d0c561 1c7b5c9 0d0c561 ce159dc 1c7b5c9 0d0c561 1c7b5c9 ce159dc 1c7b5c9 0d0c561 5424fe6 ce159dc 1c7b5c9 0d0c561 5424fe6 ce159dc 1c7b5c9 ce159dc 1c7b5c9 ce159dc 1c7b5c9 ce159dc 1c7b5c9 ce159dc 0d0c561 ce159dc 0d0c561 5424fe6 0d0c561 5424fe6 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 | """Declarative configuration schemas β config as validatable data.
These Pydantic models are the contract for the 'easily configurable' surface.
Every knob β which agents exist, what they emit, which model tier they use, the
scenario goal, the cast that participates, tool grants, budgets β is expressed as
data that:
* round-trips to/from YAML files under ``config/`` (the on-disk surface), and
* can equally be produced by a UI form or an LLM and checked with one call.
That last property is the point: ``validate_world`` / ``validate_agent`` /
``validate_scenario`` turn an arbitrary dict into a typed, cross-checked object or
a precise error. So "let an agent build the configuration from a prompt" reduces
to "emit JSON, validate it, run it." See ADR-0011.
The agent schema itself is :class:`AgentManifest` (``src/core/manifest.py``) β we
reuse it here rather than duplicating, so the four stable contracts stay singular.
"""
from __future__ import annotations
from typing import Literal
from pydantic import BaseModel, ConfigDict, Field, model_validator
from src.core.manifest import AgentManifest
# ββ model profiles βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
class ModelProfileConfig(BaseModel):
model_config = ConfigDict(extra="forbid", protected_namespaces=())
model: str
base_url: str | None = None
"""OpenAI-compatible endpoint URL (ends in /v1). Env-templatable in YAML via
``${MODAL_LLM_BASE_URL}`` so the Modal workspace is never hard-coded."""
api_key: str | None = None
"""Key for the endpoint (env-templatable, e.g. ``${MODAL_LLM_KEY}``). vLLM
accepts any token unless the server enforces one."""
temperature: float = 0.8
max_tokens: int = 256
@model_validator(mode="after")
def _blank_to_none(self) -> "ModelProfileConfig":
# An unset ``${VAR}`` template expands to "" (see registry._expand_env);
# normalise empty bindings back to None so the live transport omits them.
if not self.base_url:
self.base_url = None
if not self.api_key:
self.api_key = None
return self
class ModelsConfig(BaseModel):
model_config = ConfigDict(extra="forbid")
offline: bool | None = None
"""True = always use the deterministic stub; False = always live; None = auto
(live when credentials are present, stub otherwise)."""
profiles: dict[str, ModelProfileConfig] = Field(default_factory=dict)
"""Concrete binding per logical profile (tiny/fast/balanced/strong)."""
# ββ budgets βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
class GovernorConfig(BaseModel):
model_config = ConfigDict(extra="forbid")
max_turns: int = 100
max_calls_per_turn: int = 8
max_total_calls: int = 500
max_total_tokens: int | None = None
hourly_budget_usd: float | None = None
# ββ competition ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
class CompetitionConfig(BaseModel):
"""Declares whether β and how β a scenario produces a winner (ADR-0029).
A scenario can be a ``versus`` contest β between named ``teams`` (asymmetric
sides, e.g. The Steeped's spy vs herd) or between ``symmetric_seats`` (identical
seats that differ only by model, e.g. Debate Duel / Beat Battle β the comparison
that makes the model-leaderboard meaningful) β a ``judged`` pick where the
judge's verdict *is* the result, or ``none`` (the default) where nobody wins.
``winner`` downstream carries either an agent name or a team label, so the team
labels here must stay distinct from agent names β that cross-cast check lives in
:meth:`WorldConfig._check_cast_references`, while the rules a competition can
enforce on its own (team shape, disjointness, seat count) live in the validator
below.
"""
model_config = ConfigDict(extra="forbid")
kind: Literal["versus", "judged", "none"] = "none"
"""How a winner is derived β ``versus`` (team or seat contest), ``judged`` (the
judge's pick is the answer), or ``none`` (no winner; the default and the absent
block)."""
teams: dict[str, list[str]] | None = None
"""Team label β member agent names. Permitted only when ``kind == 'versus'``."""
symmetric_seats: list[str] | None = None
"""Cast members occupying *identical* seats that differ only by which model fills
them β the "which model argues better" comparison. ``versus`` only; needs β₯2
entries. An alternative to ``teams`` (a versus scenario declares one or the other)."""
@model_validator(mode="after")
def _check_teams(self) -> "CompetitionConfig":
if self.kind != "versus":
if self.teams is not None:
raise ValueError(f"competition.teams is only allowed when kind is 'versus' (got kind={self.kind!r})")
if self.symmetric_seats is not None:
raise ValueError(
f"competition.symmetric_seats is only allowed when kind is 'versus' (got kind={self.kind!r})"
)
return self
# kind == "versus": the contest is described by teams OR symmetric_seats.
if self.teams is None and self.symmetric_seats is None:
raise ValueError("competition.kind 'versus' requires either a 'teams' mapping or 'symmetric_seats'")
if self.symmetric_seats is not None and len(self.symmetric_seats) < 2:
raise ValueError("competition.symmetric_seats needs β₯2 entries to be a contest")
if self.teams is not None:
if not self.teams:
raise ValueError("competition.kind 'versus' requires a non-empty 'teams' mapping")
empty = [label for label, members in self.teams.items() if not members]
if empty:
raise ValueError(f"competition.teams has empty member lists for teams: {sorted(empty)}")
seen: dict[str, str] = {}
overlap: set[str] = set()
for label, members in self.teams.items():
for member in members:
if member in seen and seen[member] != label:
overlap.add(member)
seen[member] = label
if overlap:
raise ValueError(
f"competition.teams must be mutually disjoint; agents on more than one team: {sorted(overlap)}"
)
return self
# ββ scenario βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
class ScenarioConfig(BaseModel):
model_config = ConfigDict(extra="forbid")
name: str
title: str = ""
goal: str = ""
"""The shared objective handed to the whole cast (rendered into every prompt)."""
default_seed: str
example_seeds: list[str] = Field(default_factory=list)
cast: list[str] = Field(default_factory=list)
"""Agent names that participate, resolved against the agent registry.
Selecting who participates is just editing this list."""
genesis_text: str | None = None
governor: GovernorConfig | None = None
competition: CompetitionConfig | None = None
"""Optional winner contract (ADR-0029); absent == ``none`` (no winner). The
authoring checklist (``tests/test_scenario_contract.py``) requires an explicit
block on every *shipped* scenario, but the schema stays permissive so a partial
world from the Lab/an LLM still validates."""
# ββ the whole world ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
class WorldConfig(BaseModel):
"""A complete, self-contained, validatable description of a runnable world.
A UI or an LLM can emit one of these (agents + scenarios + models + budgets
inline) and ``validate_world`` confirms it is coherent before anything runs β
including that every scenario's cast references a defined agent.
"""
model_config = ConfigDict(extra="forbid")
models: ModelsConfig = Field(default_factory=ModelsConfig)
governor: GovernorConfig = Field(default_factory=GovernorConfig)
agents: list[AgentManifest] = Field(default_factory=list)
scenarios: list[ScenarioConfig] = Field(default_factory=list)
@model_validator(mode="after")
def _check_cast_references(self) -> "WorldConfig":
defined = {a.name for a in self.agents}
for scenario in self.scenarios:
missing = [name for name in scenario.cast if name not in defined]
if missing:
raise ValueError(
f"scenario {scenario.name!r} references undefined agents: {missing}. "
f"Defined agents: {sorted(defined)}"
)
competition = scenario.competition
if competition is None:
continue
cast = set(scenario.cast)
# Every team member AND every symmetric seat must be in this scenario's
# cast (ADR-0029 Β§1).
members = {m for members in (competition.teams or {}).values() for m in members}
members.update(competition.symmetric_seats or [])
off_cast = sorted(m for m in members if m not in cast)
if off_cast:
raise ValueError(
f"scenario {scenario.name!r} competition members not in its cast: {off_cast}. "
f"Cast: {sorted(cast)}"
)
# A team label must not collide with any agent name, or the winner union
# (agent name OR team label) becomes ambiguous (ADR-0029 Β§1).
collisions = sorted(label for label in (competition.teams or {}) if label in defined)
if collisions:
raise ValueError(
f"scenario {scenario.name!r} competition team labels collide with agent names: {collisions}. "
f"Team labels must be distinct from agent names to keep the winner unambiguous."
)
return self
# ββ validation entrypoints (the 'configure from a prompt' surface) βββββββββββββββ
def validate_agent(data: dict) -> AgentManifest:
"""Validate one agent definition (e.g. UI form output or LLM-proposed agent)."""
return AgentManifest.model_validate(data)
def validate_scenario(data: dict) -> ScenarioConfig:
"""Validate one scenario definition."""
return ScenarioConfig.model_validate(data)
def validate_world(data: dict) -> WorldConfig:
"""Validate a whole world (agents + scenarios + models + budgets) at once."""
return WorldConfig.model_validate(data)
|