Spaces:

build-small-hackathon
/

multi-agent-lab

Running on Zero

agharsallah

Merge remote-tracking branch 'origin/main' into worktree-scenarios

1c7b5c9 16 days ago

11.3 kB

	"""Declarative configuration schemas — config as validatable data.

	These Pydantic models are the contract for the 'easily configurable' surface.
	Every knob — which agents exist, what they emit, which model tier they use, the
	scenario goal, the cast that participates, tool grants, budgets — is expressed as
	data that:

	* round-trips to/from YAML files under ``config/`` (the on-disk surface), and
	* can equally be produced by a UI form or an LLM and checked with one call.

	That last property is the point: ``validate_world`` / ``validate_agent`` /
	``validate_scenario`` turn an arbitrary dict into a typed, cross-checked object or
	a precise error. So "let an agent build the configuration from a prompt" reduces
	to "emit JSON, validate it, run it." See ADR-0011.

	The agent schema itself is :class:`AgentManifest` (``src/core/manifest.py``) — we
	reuse it here rather than duplicating, so the four stable contracts stay singular.
	"""

	from __future__ import annotations

	from typing import Literal

	from pydantic import BaseModel, ConfigDict, Field, model_validator

	from src.core.manifest import AgentManifest

	# ── model profiles ─────────────────────────────────────────────────────────────


	class ModelProfileConfig(BaseModel):
	model_config = ConfigDict(extra="forbid", protected_namespaces=())

	model: str
	base_url: str \| None = None
	"""OpenAI-compatible endpoint URL (ends in /v1). Env-templatable in YAML via
	``${MODAL_LLM_BASE_URL}`` so the Modal workspace is never hard-coded."""

	api_key: str \| None = None
	"""Key for the endpoint (env-templatable, e.g. ``${MODAL_LLM_KEY}``). vLLM
	accepts any token unless the server enforces one."""

	temperature: float = 0.8
	max_tokens: int = 256

	@model_validator(mode="after")
	def _blank_to_none(self) -> "ModelProfileConfig":
	# An unset ``${VAR}`` template expands to "" (see registry._expand_env);
	# normalise empty bindings back to None so the live transport omits them.
	if not self.base_url:
	self.base_url = None
	if not self.api_key:
	self.api_key = None
	return self


	class ModelsConfig(BaseModel):
	model_config = ConfigDict(extra="forbid")

	offline: bool \| None = None
	"""True = always use the deterministic stub; False = always live; None = auto
	(live when credentials are present, stub otherwise)."""

	profiles: dict[str, ModelProfileConfig] = Field(default_factory=dict)
	"""Concrete binding per logical profile (tiny/fast/balanced/strong)."""


	# ── budgets ─────────────────────────────────────────────────────────────────────


	class GovernorConfig(BaseModel):
	model_config = ConfigDict(extra="forbid")

	max_turns: int = 100
	max_calls_per_turn: int = 8
	max_total_calls: int = 500
	max_total_tokens: int \| None = None
	hourly_budget_usd: float \| None = None


	# ── competition ──────────────────────────────────────────────────────────────────

	class CompetitionConfig(BaseModel):
	"""Declares whether — and how — a scenario produces a winner (ADR-0029).

	A scenario can be a ``versus`` contest — between named ``teams`` (asymmetric
	sides, e.g. The Steeped's spy vs herd) or between ``symmetric_seats`` (identical
	seats that differ only by model, e.g. Debate Duel / Beat Battle — the comparison
	that makes the model-leaderboard meaningful) — a ``judged`` pick where the
	judge's verdict is the result, or ``none`` (the default) where nobody wins.
	``winner`` downstream carries either an agent name or a team label, so the team
	labels here must stay distinct from agent names — that cross-cast check lives in
	:meth:`WorldConfig._check_cast_references`, while the rules a competition can
	enforce on its own (team shape, disjointness, seat count) live in the validator
	below.
	"""

	model_config = ConfigDict(extra="forbid")

	kind: Literal["versus", "judged", "none"] = "none"
	"""How a winner is derived — ``versus`` (team or seat contest), ``judged`` (the
	judge's pick is the answer), or ``none`` (no winner; the default and the absent
	block)."""

	teams: dict[str, list[str]] \| None = None
	"""Team label → member agent names. Permitted only when ``kind == 'versus'``."""

	symmetric_seats: list[str] \| None = None
	"""Cast members occupying identical seats that differ only by which model fills
	them — the "which model argues better" comparison. ``versus`` only; needs ≥2
	entries. An alternative to ``teams`` (a versus scenario declares one or the other)."""

	@model_validator(mode="after")
	def _check_teams(self) -> "CompetitionConfig":
	if self.kind != "versus":
	if self.teams is not None:
	raise ValueError(f"competition.teams is only allowed when kind is 'versus' (got kind={self.kind!r})")
	if self.symmetric_seats is not None:
	raise ValueError(
	f"competition.symmetric_seats is only allowed when kind is 'versus' (got kind={self.kind!r})"
	)
	return self
	# kind == "versus": the contest is described by teams OR symmetric_seats.
	if self.teams is None and self.symmetric_seats is None:
	raise ValueError("competition.kind 'versus' requires either a 'teams' mapping or 'symmetric_seats'")
	if self.symmetric_seats is not None and len(self.symmetric_seats) < 2:
	raise ValueError("competition.symmetric_seats needs ≥2 entries to be a contest")
	if self.teams is not None:
	if not self.teams:
	raise ValueError("competition.kind 'versus' requires a non-empty 'teams' mapping")
	empty = [label for label, members in self.teams.items() if not members]
	if empty:
	raise ValueError(f"competition.teams has empty member lists for teams: {sorted(empty)}")
	seen: dict[str, str] = {}
	overlap: set[str] = set()
	for label, members in self.teams.items():
	for member in members:
	if member in seen and seen[member] != label:
	overlap.add(member)
	seen[member] = label
	if overlap:
	raise ValueError(
	f"competition.teams must be mutually disjoint; agents on more than one team: {sorted(overlap)}"
	)
	return self


	# ── scenario ─────────────────────────────────────────────────────────────────────


	class ScenarioConfig(BaseModel):
	model_config = ConfigDict(extra="forbid")

	name: str
	title: str = ""
	goal: str = ""
	"""The shared objective handed to the whole cast (rendered into every prompt)."""

	default_seed: str
	example_seeds: list[str] = Field(default_factory=list)

	cast: list[str] = Field(default_factory=list)
	"""Agent names that participate, resolved against the agent registry.
	Selecting who participates is just editing this list."""

	genesis_text: str \| None = None
	governor: GovernorConfig \| None = None

	competition: CompetitionConfig \| None = None
	"""Optional winner contract (ADR-0029); absent == ``none`` (no winner). The
	authoring checklist (``tests/test_scenario_contract.py``) requires an explicit
	block on every shipped scenario, but the schema stays permissive so a partial
	world from the Lab/an LLM still validates."""


	# ── the whole world ──────────────────────────────────────────────────────────────


	class WorldConfig(BaseModel):
	"""A complete, self-contained, validatable description of a runnable world.

	A UI or an LLM can emit one of these (agents + scenarios + models + budgets
	inline) and ``validate_world`` confirms it is coherent before anything runs —
	including that every scenario's cast references a defined agent.
	"""

	model_config = ConfigDict(extra="forbid")

	models: ModelsConfig = Field(default_factory=ModelsConfig)
	governor: GovernorConfig = Field(default_factory=GovernorConfig)
	agents: list[AgentManifest] = Field(default_factory=list)
	scenarios: list[ScenarioConfig] = Field(default_factory=list)

	@model_validator(mode="after")
	def _check_cast_references(self) -> "WorldConfig":
	defined = {a.name for a in self.agents}
	for scenario in self.scenarios:
	missing = [name for name in scenario.cast if name not in defined]
	if missing:
	raise ValueError(
	f"scenario {scenario.name!r} references undefined agents: {missing}. "
	f"Defined agents: {sorted(defined)}"
	)
	competition = scenario.competition
	if competition is None:
	continue
	cast = set(scenario.cast)
	# Every team member AND every symmetric seat must be in this scenario's
	# cast (ADR-0029 §1).
	members = {m for members in (competition.teams or {}).values() for m in members}
	members.update(competition.symmetric_seats or [])
	off_cast = sorted(m for m in members if m not in cast)
	if off_cast:
	raise ValueError(
	f"scenario {scenario.name!r} competition members not in its cast: {off_cast}. "
	f"Cast: {sorted(cast)}"
	)
	# A team label must not collide with any agent name, or the winner union
	# (agent name OR team label) becomes ambiguous (ADR-0029 §1).
	collisions = sorted(label for label in (competition.teams or {}) if label in defined)
	if collisions:
	raise ValueError(
	f"scenario {scenario.name!r} competition team labels collide with agent names: {collisions}. "
	f"Team labels must be distinct from agent names to keep the winner unambiguous."
	)
	return self


	# ── validation entrypoints (the 'configure from a prompt' surface) ───────────────


	def validate_agent(data: dict) -> AgentManifest:
	"""Validate one agent definition (e.g. UI form output or LLM-proposed agent)."""
	return AgentManifest.model_validate(data)


	def validate_scenario(data: dict) -> ScenarioConfig:
	"""Validate one scenario definition."""
	return ScenarioConfig.model_validate(data)


	def validate_world(data: dict) -> WorldConfig:
	"""Validate a whole world (agents + scenarios + models + budgets) at once."""
	return WorldConfig.model_validate(data)