Spaces:

build-small-hackathon
/

multi-agent-lab

Sleeping

File size: 11,270 Bytes

"""Declarative configuration schemas — config as validatable data.

These Pydantic models are the contract for the 'easily configurable' surface.
Every knob — which agents exist, what they emit, which model tier they use, the
scenario goal, the cast that participates, tool grants, budgets — is expressed as
data that:

  * round-trips to/from YAML files under ``config/`` (the on-disk surface), and
  * can equally be produced by a UI form or an LLM and checked with one call.

That last property is the point: ``validate_world`` / ``validate_agent`` /
``validate_scenario`` turn an arbitrary dict into a typed, cross-checked object or
a precise error.  So "let an agent build the configuration from a prompt" reduces
to "emit JSON, validate it, run it."  See ADR-0011.

The agent schema itself is :class:`AgentManifest` (``src/core/manifest.py``) — we
reuse it here rather than duplicating, so the four stable contracts stay singular.
"""

from __future__ import annotations

from typing import Literal

from pydantic import BaseModel, ConfigDict, Field, model_validator

from src.core.manifest import AgentManifest

# ── model profiles ─────────────────────────────────────────────────────────────


class ModelProfileConfig(BaseModel):
    model_config = ConfigDict(extra="forbid", protected_namespaces=())

    model: str
    base_url: str | None = None
    """OpenAI-compatible endpoint URL (ends in /v1).  Env-templatable in YAML via
    ``${MODAL_LLM_BASE_URL}`` so the Modal workspace is never hard-coded."""

    api_key: str | None = None
    """Key for the endpoint (env-templatable, e.g. ``${MODAL_LLM_KEY}``).  vLLM
    accepts any token unless the server enforces one."""

    temperature: float = 0.8
    max_tokens: int = 256

    @model_validator(mode="after")
    def _blank_to_none(self) -> "ModelProfileConfig":
        # An unset ``${VAR}`` template expands to "" (see registry._expand_env);
        # normalise empty bindings back to None so the live transport omits them.
        if not self.base_url:
            self.base_url = None
        if not self.api_key:
            self.api_key = None
        return self


class ModelsConfig(BaseModel):
    model_config = ConfigDict(extra="forbid")

    offline: bool | None = None
    """True = always use the deterministic stub; False = always live; None = auto
    (live when credentials are present, stub otherwise)."""

    profiles: dict[str, ModelProfileConfig] = Field(default_factory=dict)
    """Concrete binding per logical profile (tiny/fast/balanced/strong)."""


# ── budgets ─────────────────────────────────────────────────────────────────────


class GovernorConfig(BaseModel):
    model_config = ConfigDict(extra="forbid")

    max_turns: int = 100
    max_calls_per_turn: int = 8
    max_total_calls: int = 500
    max_total_tokens: int | None = None
    hourly_budget_usd: float | None = None


# ── competition ──────────────────────────────────────────────────────────────────

class CompetitionConfig(BaseModel):
    """Declares whether — and how — a scenario produces a winner (ADR-0029).

    A scenario can be a ``versus`` contest — between named ``teams`` (asymmetric
    sides, e.g. The Steeped's spy vs herd) or between ``symmetric_seats`` (identical
    seats that differ only by model, e.g. Debate Duel / Beat Battle — the comparison
    that makes the model-leaderboard meaningful) — a ``judged`` pick where the
    judge's verdict *is* the result, or ``none`` (the default) where nobody wins.
    ``winner`` downstream carries either an agent name or a team label, so the team
    labels here must stay distinct from agent names — that cross-cast check lives in
    :meth:`WorldConfig._check_cast_references`, while the rules a competition can
    enforce on its own (team shape, disjointness, seat count) live in the validator
    below.
    """

    model_config = ConfigDict(extra="forbid")

    kind: Literal["versus", "judged", "none"] = "none"
    """How a winner is derived — ``versus`` (team or seat contest), ``judged`` (the
    judge's pick is the answer), or ``none`` (no winner; the default and the absent
    block)."""

    teams: dict[str, list[str]] | None = None
    """Team label → member agent names.  Permitted only when ``kind == 'versus'``."""

    symmetric_seats: list[str] | None = None
    """Cast members occupying *identical* seats that differ only by which model fills
    them — the "which model argues better" comparison.  ``versus`` only; needs ≥2
    entries.  An alternative to ``teams`` (a versus scenario declares one or the other)."""

    @model_validator(mode="after")
    def _check_teams(self) -> "CompetitionConfig":
        if self.kind != "versus":
            if self.teams is not None:
                raise ValueError(f"competition.teams is only allowed when kind is 'versus' (got kind={self.kind!r})")
            if self.symmetric_seats is not None:
                raise ValueError(
                    f"competition.symmetric_seats is only allowed when kind is 'versus' (got kind={self.kind!r})"
                )
            return self
        # kind == "versus": the contest is described by teams OR symmetric_seats.
        if self.teams is None and self.symmetric_seats is None:
            raise ValueError("competition.kind 'versus' requires either a 'teams' mapping or 'symmetric_seats'")
        if self.symmetric_seats is not None and len(self.symmetric_seats) < 2:
            raise ValueError("competition.symmetric_seats needs ≥2 entries to be a contest")
        if self.teams is not None:
            if not self.teams:
                raise ValueError("competition.kind 'versus' requires a non-empty 'teams' mapping")
            empty = [label for label, members in self.teams.items() if not members]
            if empty:
                raise ValueError(f"competition.teams has empty member lists for teams: {sorted(empty)}")
            seen: dict[str, str] = {}
            overlap: set[str] = set()
            for label, members in self.teams.items():
                for member in members:
                    if member in seen and seen[member] != label:
                        overlap.add(member)
                    seen[member] = label
            if overlap:
                raise ValueError(
                    f"competition.teams must be mutually disjoint; agents on more than one team: {sorted(overlap)}"
                )
        return self


# ── scenario ─────────────────────────────────────────────────────────────────────


class ScenarioConfig(BaseModel):
    model_config = ConfigDict(extra="forbid")

    name: str
    title: str = ""
    goal: str = ""
    """The shared objective handed to the whole cast (rendered into every prompt)."""

    default_seed: str
    example_seeds: list[str] = Field(default_factory=list)

    cast: list[str] = Field(default_factory=list)
    """Agent names that participate, resolved against the agent registry.
    Selecting who participates is just editing this list."""

    genesis_text: str | None = None
    governor: GovernorConfig | None = None

    competition: CompetitionConfig | None = None
    """Optional winner contract (ADR-0029); absent == ``none`` (no winner).  The
    authoring checklist (``tests/test_scenario_contract.py``) requires an explicit
    block on every *shipped* scenario, but the schema stays permissive so a partial
    world from the Lab/an LLM still validates."""


# ── the whole world ──────────────────────────────────────────────────────────────


class WorldConfig(BaseModel):
    """A complete, self-contained, validatable description of a runnable world.

    A UI or an LLM can emit one of these (agents + scenarios + models + budgets
    inline) and ``validate_world`` confirms it is coherent before anything runs —
    including that every scenario's cast references a defined agent.
    """

    model_config = ConfigDict(extra="forbid")

    models: ModelsConfig = Field(default_factory=ModelsConfig)
    governor: GovernorConfig = Field(default_factory=GovernorConfig)
    agents: list[AgentManifest] = Field(default_factory=list)
    scenarios: list[ScenarioConfig] = Field(default_factory=list)

    @model_validator(mode="after")
    def _check_cast_references(self) -> "WorldConfig":
        defined = {a.name for a in self.agents}
        for scenario in self.scenarios:
            missing = [name for name in scenario.cast if name not in defined]
            if missing:
                raise ValueError(
                    f"scenario {scenario.name!r} references undefined agents: {missing}. "
                    f"Defined agents: {sorted(defined)}"
                )
            competition = scenario.competition
            if competition is None:
                continue
            cast = set(scenario.cast)
            # Every team member AND every symmetric seat must be in this scenario's
            # cast (ADR-0029 §1).
            members = {m for members in (competition.teams or {}).values() for m in members}
            members.update(competition.symmetric_seats or [])
            off_cast = sorted(m for m in members if m not in cast)
            if off_cast:
                raise ValueError(
                    f"scenario {scenario.name!r} competition members not in its cast: {off_cast}. "
                    f"Cast: {sorted(cast)}"
                )
            # A team label must not collide with any agent name, or the winner union
            # (agent name OR team label) becomes ambiguous (ADR-0029 §1).
            collisions = sorted(label for label in (competition.teams or {}) if label in defined)
            if collisions:
                raise ValueError(
                    f"scenario {scenario.name!r} competition team labels collide with agent names: {collisions}. "
                    f"Team labels must be distinct from agent names to keep the winner unambiguous."
                )
        return self


# ── validation entrypoints (the 'configure from a prompt' surface) ───────────────


def validate_agent(data: dict) -> AgentManifest:
    """Validate one agent definition (e.g. UI form output or LLM-proposed agent)."""
    return AgentManifest.model_validate(data)


def validate_scenario(data: dict) -> ScenarioConfig:
    """Validate one scenario definition."""
    return ScenarioConfig.model_validate(data)


def validate_world(data: dict) -> WorldConfig:
    """Validate a whole world (agents + scenarios + models + budgets) at once."""
    return WorldConfig.model_validate(data)