Spaces:
Running
Running
| """Unified policy interface for the OpenRA-Bench eval stack. | |
| Every actor that can drive a side of a scenario — an LLM agent, a human | |
| labeler, a scripted reference policy — implements the same contract: | |
| controller.act(observation, Command) -> list[Command] | |
| This is the keystone of the human-labeling machine and the 1v1 | |
| adversarial harness: one harness, interchangeable policy backends. | |
| `run_level` / `run_episode` drive a single Controller; a 1v1 match | |
| drives two, one per side, each fed its own side-specific observation. | |
| Back-compat is non-negotiable: the historical policy shape was a bare | |
| callable ``agent_fn(render_state, Command) -> [Command]`` and ~190 test | |
| files still pass one. `as_controller()` adapts any such callable (or a | |
| `ModelAgent` bound method) into a Controller, so every existing scripted | |
| policy and test keeps working unchanged — the eval loop simply coerces | |
| its policy argument through `as_controller()` before stepping. | |
| Design notes | |
| ------------ | |
| * `act` keeps `Command` as an explicit parameter rather than binding it | |
| at construction. `Command` is the pyo3 `openra_train.Command` factory | |
| handle, only available once an env exists; threading it per-call keeps | |
| Controllers constructible without an engine (cheap to unit-test) and | |
| is byte-identical to the legacy `agent_fn` signature. | |
| * `reset(ctx)` is the per-episode lifecycle hook. Scripted policies | |
| ignore it; the model agent re-arms history; a human controller would | |
| reset its click queue. The 1v1 harness calls it once per side with a | |
| `side`-stamped `EpisodeContext`. | |
| * `history` / `stats` are the optional introspection surface the | |
| playback writer reads. `BaseController` provides empty defaults so a | |
| caller can read them unconditionally. | |
| """ | |
| from __future__ import annotations | |
| from dataclasses import dataclass, field | |
| from typing import Any, Callable, Protocol, runtime_checkable | |
| # A bare legacy policy: (render_state, Command) -> [Command]. | |
| PolicyFn = Callable[[dict, Any], list] | |
| class EpisodeContext: | |
| """What a Controller is told once, at episode start (`reset`). | |
| A scenario eval populates `pack_id` / `level` / `seed` / `objective`; | |
| a 1v1 match additionally stamps `side` so the two Controllers know | |
| which colour they are driving.""" | |
| pack_id: str = "" | |
| level: str = "" | |
| seed: int = 0 | |
| side: str = "agent" # "agent" | "enemy" — which side this drives | |
| objective: str = "" | |
| max_turns: int = 0 | |
| extra: dict = field(default_factory=dict) | |
| class Controller(Protocol): | |
| """A policy that observes the world and emits engine Commands. | |
| Structural — anything exposing `name`, `reset`, and `act` satisfies | |
| it; `ModelAgent` does so without importing this module.""" | |
| name: str | |
| def reset(self, ctx: "EpisodeContext") -> None: ... | |
| def act(self, observation: dict, Command: Any) -> list: ... | |
| def is_controller(obj: Any) -> bool: | |
| """True if `obj` already satisfies the Controller contract. | |
| Deliberately structural and stricter than `isinstance(obj, | |
| Controller)`: a bare function is callable but is NOT a Controller, | |
| so it must carry callable `act` AND `reset` attributes — which a | |
| plain function never does.""" | |
| return callable(getattr(obj, "act", None)) and callable( | |
| getattr(obj, "reset", None) | |
| ) | |
| class BaseController: | |
| """Convenience base: a no-op `reset`, a `name`, empty introspection. | |
| Subclass and implement `act`. Concrete eval policies (the human | |
| bridge, scripted reference wrappers) derive from this so they share | |
| one introspection surface (`history`, `stats`).""" | |
| name: str = "controller" | |
| def __init__(self, name: str | None = None) -> None: | |
| if name: | |
| self.name = name | |
| self.history: list[dict] = [] | |
| self.stats: dict[str, Any] = {} | |
| def reset(self, ctx: EpisodeContext) -> None: # noqa: D401 | |
| """Per-episode lifecycle hook. Default: no-op.""" | |
| def act(self, observation: dict, Command: Any) -> list: | |
| raise NotImplementedError( | |
| f"{type(self).__name__} must implement act()" | |
| ) | |
| class FunctionController(BaseController): | |
| """Adapt a bare ``agent_fn(render_state, Command) -> [Command]`` | |
| callable into a Controller — the back-compat bridge for every | |
| scripted reference policy and the legacy `scripted_explore_agent`. | |
| When the callable is a bound method (e.g. ``ModelAgent.agent_fn``), | |
| its ``__self__`` is captured as `source` so the eval loop can still | |
| reach the underlying object's `history` / `stats` for playback.""" | |
| def __init__( | |
| self, fn: PolicyFn, name: str | None = None | |
| ) -> None: | |
| super().__init__( | |
| name or getattr(fn, "__name__", None) or "fn" | |
| ) | |
| self._fn = fn | |
| self.source: Any = getattr(fn, "__self__", None) | |
| def act(self, observation: dict, Command: Any) -> list: | |
| return self._fn(observation, Command) | |
| def as_controller(policy: Any, name: str | None = None) -> Controller: | |
| """Coerce anything policy-shaped into a Controller. | |
| Accepts, in priority order: | |
| * an object already satisfying the Controller contract — returned | |
| as-is (idempotent); | |
| * any callable — a bare `agent_fn` or a bound method — wrapped in | |
| a `FunctionController` (a bound method's `__self__` is kept | |
| reachable via `.source`). | |
| Raises `TypeError` for anything else.""" | |
| if is_controller(policy): | |
| return policy | |
| if callable(policy): | |
| return FunctionController(policy, name) | |
| raise TypeError( | |
| f"cannot coerce {type(policy).__name__} into a Controller: " | |
| "expected a Controller, a ModelAgent, or an " | |
| "agent_fn(render_state, Command) -> [Command] callable" | |
| ) | |
| def introspection_source(controller: Controller) -> Any: | |
| """The object carrying `history` / `stats` for playback. | |
| For a `FunctionController` wrapping a bound method this is the bound | |
| instance (`.source`); otherwise it is the Controller itself.""" | |
| src = getattr(controller, "source", None) | |
| return src if src is not None else controller | |