multi-agent-lab / src /core /config.py
agharsallah
Merge remote-tracking branch 'origin/main' into worktree-scenarios
1c7b5c9
Raw
History Blame Contribute Delete
11.3 kB
"""Declarative configuration schemas β€” config as validatable data.
These Pydantic models are the contract for the 'easily configurable' surface.
Every knob β€” which agents exist, what they emit, which model tier they use, the
scenario goal, the cast that participates, tool grants, budgets β€” is expressed as
data that:
* round-trips to/from YAML files under ``config/`` (the on-disk surface), and
* can equally be produced by a UI form or an LLM and checked with one call.
That last property is the point: ``validate_world`` / ``validate_agent`` /
``validate_scenario`` turn an arbitrary dict into a typed, cross-checked object or
a precise error. So "let an agent build the configuration from a prompt" reduces
to "emit JSON, validate it, run it." See ADR-0011.
The agent schema itself is :class:`AgentManifest` (``src/core/manifest.py``) β€” we
reuse it here rather than duplicating, so the four stable contracts stay singular.
"""
from __future__ import annotations
from typing import Literal
from pydantic import BaseModel, ConfigDict, Field, model_validator
from src.core.manifest import AgentManifest
# ── model profiles ─────────────────────────────────────────────────────────────
class ModelProfileConfig(BaseModel):
model_config = ConfigDict(extra="forbid", protected_namespaces=())
model: str
base_url: str | None = None
"""OpenAI-compatible endpoint URL (ends in /v1). Env-templatable in YAML via
``${MODAL_LLM_BASE_URL}`` so the Modal workspace is never hard-coded."""
api_key: str | None = None
"""Key for the endpoint (env-templatable, e.g. ``${MODAL_LLM_KEY}``). vLLM
accepts any token unless the server enforces one."""
temperature: float = 0.8
max_tokens: int = 256
@model_validator(mode="after")
def _blank_to_none(self) -> "ModelProfileConfig":
# An unset ``${VAR}`` template expands to "" (see registry._expand_env);
# normalise empty bindings back to None so the live transport omits them.
if not self.base_url:
self.base_url = None
if not self.api_key:
self.api_key = None
return self
class ModelsConfig(BaseModel):
model_config = ConfigDict(extra="forbid")
offline: bool | None = None
"""True = always use the deterministic stub; False = always live; None = auto
(live when credentials are present, stub otherwise)."""
profiles: dict[str, ModelProfileConfig] = Field(default_factory=dict)
"""Concrete binding per logical profile (tiny/fast/balanced/strong)."""
# ── budgets ─────────────────────────────────────────────────────────────────────
class GovernorConfig(BaseModel):
model_config = ConfigDict(extra="forbid")
max_turns: int = 100
max_calls_per_turn: int = 8
max_total_calls: int = 500
max_total_tokens: int | None = None
hourly_budget_usd: float | None = None
# ── competition ──────────────────────────────────────────────────────────────────
class CompetitionConfig(BaseModel):
"""Declares whether β€” and how β€” a scenario produces a winner (ADR-0029).
A scenario can be a ``versus`` contest β€” between named ``teams`` (asymmetric
sides, e.g. The Steeped's spy vs herd) or between ``symmetric_seats`` (identical
seats that differ only by model, e.g. Debate Duel / Beat Battle β€” the comparison
that makes the model-leaderboard meaningful) β€” a ``judged`` pick where the
judge's verdict *is* the result, or ``none`` (the default) where nobody wins.
``winner`` downstream carries either an agent name or a team label, so the team
labels here must stay distinct from agent names β€” that cross-cast check lives in
:meth:`WorldConfig._check_cast_references`, while the rules a competition can
enforce on its own (team shape, disjointness, seat count) live in the validator
below.
"""
model_config = ConfigDict(extra="forbid")
kind: Literal["versus", "judged", "none"] = "none"
"""How a winner is derived β€” ``versus`` (team or seat contest), ``judged`` (the
judge's pick is the answer), or ``none`` (no winner; the default and the absent
block)."""
teams: dict[str, list[str]] | None = None
"""Team label β†’ member agent names. Permitted only when ``kind == 'versus'``."""
symmetric_seats: list[str] | None = None
"""Cast members occupying *identical* seats that differ only by which model fills
them β€” the "which model argues better" comparison. ``versus`` only; needs β‰₯2
entries. An alternative to ``teams`` (a versus scenario declares one or the other)."""
@model_validator(mode="after")
def _check_teams(self) -> "CompetitionConfig":
if self.kind != "versus":
if self.teams is not None:
raise ValueError(f"competition.teams is only allowed when kind is 'versus' (got kind={self.kind!r})")
if self.symmetric_seats is not None:
raise ValueError(
f"competition.symmetric_seats is only allowed when kind is 'versus' (got kind={self.kind!r})"
)
return self
# kind == "versus": the contest is described by teams OR symmetric_seats.
if self.teams is None and self.symmetric_seats is None:
raise ValueError("competition.kind 'versus' requires either a 'teams' mapping or 'symmetric_seats'")
if self.symmetric_seats is not None and len(self.symmetric_seats) < 2:
raise ValueError("competition.symmetric_seats needs β‰₯2 entries to be a contest")
if self.teams is not None:
if not self.teams:
raise ValueError("competition.kind 'versus' requires a non-empty 'teams' mapping")
empty = [label for label, members in self.teams.items() if not members]
if empty:
raise ValueError(f"competition.teams has empty member lists for teams: {sorted(empty)}")
seen: dict[str, str] = {}
overlap: set[str] = set()
for label, members in self.teams.items():
for member in members:
if member in seen and seen[member] != label:
overlap.add(member)
seen[member] = label
if overlap:
raise ValueError(
f"competition.teams must be mutually disjoint; agents on more than one team: {sorted(overlap)}"
)
return self
# ── scenario ─────────────────────────────────────────────────────────────────────
class ScenarioConfig(BaseModel):
model_config = ConfigDict(extra="forbid")
name: str
title: str = ""
goal: str = ""
"""The shared objective handed to the whole cast (rendered into every prompt)."""
default_seed: str
example_seeds: list[str] = Field(default_factory=list)
cast: list[str] = Field(default_factory=list)
"""Agent names that participate, resolved against the agent registry.
Selecting who participates is just editing this list."""
genesis_text: str | None = None
governor: GovernorConfig | None = None
competition: CompetitionConfig | None = None
"""Optional winner contract (ADR-0029); absent == ``none`` (no winner). The
authoring checklist (``tests/test_scenario_contract.py``) requires an explicit
block on every *shipped* scenario, but the schema stays permissive so a partial
world from the Lab/an LLM still validates."""
# ── the whole world ──────────────────────────────────────────────────────────────
class WorldConfig(BaseModel):
"""A complete, self-contained, validatable description of a runnable world.
A UI or an LLM can emit one of these (agents + scenarios + models + budgets
inline) and ``validate_world`` confirms it is coherent before anything runs β€”
including that every scenario's cast references a defined agent.
"""
model_config = ConfigDict(extra="forbid")
models: ModelsConfig = Field(default_factory=ModelsConfig)
governor: GovernorConfig = Field(default_factory=GovernorConfig)
agents: list[AgentManifest] = Field(default_factory=list)
scenarios: list[ScenarioConfig] = Field(default_factory=list)
@model_validator(mode="after")
def _check_cast_references(self) -> "WorldConfig":
defined = {a.name for a in self.agents}
for scenario in self.scenarios:
missing = [name for name in scenario.cast if name not in defined]
if missing:
raise ValueError(
f"scenario {scenario.name!r} references undefined agents: {missing}. "
f"Defined agents: {sorted(defined)}"
)
competition = scenario.competition
if competition is None:
continue
cast = set(scenario.cast)
# Every team member AND every symmetric seat must be in this scenario's
# cast (ADR-0029 Β§1).
members = {m for members in (competition.teams or {}).values() for m in members}
members.update(competition.symmetric_seats or [])
off_cast = sorted(m for m in members if m not in cast)
if off_cast:
raise ValueError(
f"scenario {scenario.name!r} competition members not in its cast: {off_cast}. "
f"Cast: {sorted(cast)}"
)
# A team label must not collide with any agent name, or the winner union
# (agent name OR team label) becomes ambiguous (ADR-0029 Β§1).
collisions = sorted(label for label in (competition.teams or {}) if label in defined)
if collisions:
raise ValueError(
f"scenario {scenario.name!r} competition team labels collide with agent names: {collisions}. "
f"Team labels must be distinct from agent names to keep the winner unambiguous."
)
return self
# ── validation entrypoints (the 'configure from a prompt' surface) ───────────────
def validate_agent(data: dict) -> AgentManifest:
"""Validate one agent definition (e.g. UI form output or LLM-proposed agent)."""
return AgentManifest.model_validate(data)
def validate_scenario(data: dict) -> ScenarioConfig:
"""Validate one scenario definition."""
return ScenarioConfig.model_validate(data)
def validate_world(data: dict) -> WorldConfig:
"""Validate a whole world (agents + scenarios + models + budgets) at once."""
return WorldConfig.model_validate(data)