"""Competition handlers — turning a judge's ruling into a machine-readable winner. The arena contract (ADR-0029) says a winner is *data*, not prose: a ``judge.verdict`` carries ``payload["winner"]`` naming a cast member (or team), so the leaderboard can attribute the win to a model. Responsibilities split cleanly: * **The model + the engine handle the live path.** A judge manifest lists ``winner`` in ``output_extra_fields`` (a well-known typed field, ``src/core/structured.py``), and :class:`~src.agents.base.ManifestAgent` validates that name against the injected ``cast_names`` — one corrective re-ask, then ``no_contest`` if the model still won't name a real player. * **This handler keeps the OFFLINE demo watchable.** The deterministic stub never emits a ``winner`` (the field is optional, ADR-0029), so without help an offline judged run would crown no one. :class:`JudgedCompetition` fills an *empty* winner by reading the name out of the verdict prose, then falling back to the most active competitor — deterministic, so offline runs are reproducible. It defers entirely when the engine already forfeited the round (``no_contest``). A judge with a known ground truth (The Steeped's :class:`~src.agents.handlers.SpyHost`, Twenty Sprouts' :class:`~src.agents.twenty_sprouts.SproutJudge`) computes the winner in code instead — ``SproutJudge`` subclasses this and overrides :meth:`decide_winner`. The best-practice split the roadmap calls for: AI is load-bearing for judgment, code is load-bearing for bookkeeping. """ from __future__ import annotations from src.agents.base import ManifestAgent from src.core.events import Event from src.core.projections import StageProjection from src.core.registry import register_handler # Kinds a *competitor* emits — used to find who is eligible to win. A judge emits # ``judge.verdict`` and is excluded; the scene narrator emits ``world.observed`` (and at # genesis its actor is the *scenario name*, not a player), so that is excluded too. # Candidates are thus only the minds that actually spoke, derived from the run's events. _COMPETITOR_KINDS = frozenset({"agent.spoke", "agent.thought", "oracle.spoke"}) @register_handler("judged-competition") class JudgedCompetition(ManifestAgent): """A judge that fills an empty offline ``winner`` so the stub demo still crowns one. The generic turn (and the engine's live validation) handle a model-named winner. This handler runs *after* that: when the verdict carries no winner — the offline stub, which never emits the field — it derives one from the verdict prose, then from the most active competitor. It honours a model-named winner that is already a real competitor, and defers when the engine forfeited the round (``no_contest``). Deterministic, so offline runs are reproducible. """ def act( self, run_id: str, turn: int, projection: StageProjection, recent_events: tuple[Event, ...], ) -> Event | None: # A reactive judge (one that subscribes to the events that might end the game) # abstains until either its win condition is met or the show reaches its finale — # otherwise its first firing would rule on turn 1 and end the show (the "first # verdict ends the show" trap). Returns None to abstain (no event, no budget cost). if self._abstains(turn, recent_events): return None event = super().act(run_id, turn, projection, recent_events) candidates = self._candidates(recent_events) if not candidates: return event # nothing to attribute — leave the verdict as prose only winner = self.decide_winner(event, candidates, recent_events) if winner: event.payload["winner"] = winner # We crowned someone — clear any forfeit the engine stamped (a ground-truth # judge overrides no-contest; a repaired offline winner supersedes the empty). event.payload.pop("no_contest", None) return event # ── early termination (override ``has_early_winner`` to end a game the moment it's won) ── def _abstains(self, turn: int, recent_events: tuple[Event, ...]) -> bool: """Should this judge decline to rule on *this* invocation? The default judge (no subscriptions) never abstains — it fires once, on its scheduled tick or a forced curtain call, exactly as before. A *reactive* judge (one with ``subscribes_to`` set, woken by every spoken line) instead waits: it rules only when the game is already decided (:meth:`has_early_winner`), when its scheduled finale tick lands, or when forced (``_forced``, set by ``Conductor.force_verdict`` for the curtain call / budget end). Either way it never rules twice — once a verdict is on the ledger, every later invocation abstains.""" if any(e.kind == "judge.verdict" for e in recent_events): return True # already ruled — never emit a second verdict if not self.manifest.subscribes_to: return False # a plain tick/forced judge always rules when invoked if getattr(self, "_forced", False): return False # the curtain call must produce a ruling tick = self.manifest.schedule.tick_every if tick and turn % tick == 0: return False # the scheduled finale — rule now (timeout) return not self.has_early_winner(recent_events) # reactive: rule only once it's won def has_early_winner(self, recent_events: tuple[Event, ...]) -> bool: """True when the game is already decided and the judge should rule *now*. Default ``False``: a generic judged scenario has no code-known early win, so it rules only at its finale. A ground-truth scenario overrides this (Twenty Sprouts' :class:`~src.agents.twenty_sprouts.SproutJudge` returns True the moment the secret word is guessed) so the show ends on the win instead of running to the timeout.""" return False # ── decision (override for ground-truth judges) ────────────────────────────── def decide_winner( self, event: Event, candidates: list[str], recent_events: tuple[Event, ...], ) -> str | None: """Return the winning cast name (or team label). Honours the model's ``winner`` when it already names a real competitor or team; otherwise repairs it deterministically (prose mention → most-active fallback). Defers when the engine forfeited the round (``no_contest``) — a live model that refused to name a real player keeps its forfeit; only the offline empty-winner path (no forfeit) is repaired. Ground-truth judges override this and ignore ``no_contest`` so their code-decided winner always lands.""" if event.payload.get("no_contest"): return None named = (event.payload.get("winner") or "").strip() if named and (named in candidates or named in self._team_labels()): return named return self._winner_from_prose(event, candidates) or self._most_active(candidates, recent_events) # ── helpers ────────────────────────────────────────────────────────────────── def _team_labels(self) -> set[str]: """Valid team labels for this scenario (empty for judged / symmetric-seat duels).""" return set((getattr(self.competition, "teams", None) or {}).keys()) def _candidates(self, recent_events: tuple[Event, ...]) -> list[str]: """On-stage competitors: actors who actually spoke, minus this judge. Insertion-ordered (first appearance) so the fallback is stable and readable. """ seen: dict[str, None] = {} for e in recent_events: if e.kind in _COMPETITOR_KINDS and e.actor and e.actor != self.name: seen.setdefault(e.actor, None) return list(seen) @staticmethod def _winner_from_prose(event: Event, candidates: list[str]) -> str | None: """Find the candidate the verdict text names, by full-slug substring match. Matches on the exact cast slug (``debater-a``) — and the hyphen→space variant (``debater a``) a live model might write — so it distinguishes symmetric seats that share a stem (``debater-a`` vs ``debater-b``) instead of matching the stem. When several are named, the earliest-mentioned wins (the one the judge leads with). Returns ``None`` when the prose names no competitor.""" text = (event.payload.get("text") or "").lower() best: str | None = None best_pos = len(text) + 1 for name in candidates: for needle in (name.lower(), name.lower().replace("-", " ")): pos = text.find(needle) if 0 <= pos < best_pos: best, best_pos = name, pos break return best @staticmethod def _most_active(candidates: list[str], recent_events: tuple[Event, ...]) -> str: """Deterministic fallback: the competitor who spoke most (ties → cast order).""" counts = {name: 0 for name in candidates} for e in recent_events: if e.actor in counts and e.kind in _COMPETITOR_KINDS: counts[e.actor] += 1 return max(candidates, key=lambda name: (counts[name], -candidates.index(name)))