Spaces:

qpluslab
/

OpenRA-Bench

Running

yxc20098 commited on May 21

Commit

c68e036

1 Parent(s): 248d766

Phase 1: unified Controller interface for the eval stack

Introduce openra_bench/controller.py — the keystone for the
human-labeling machine and the 1v1 adversarial harness. Every policy
backend (LLM agent, human labeler, scripted reference) now implements
one contract:

controller.act(observation, Command) -> [Command]

- Controller protocol + BaseController + FunctionController.
- as_controller() coerces a bare agent_fn, a ModelAgent bound method,
or an existing Controller — so all ~190 legacy test files that pass
a bare function keep working unchanged.
- EpisodeContext carries per-episode info (pack/level/seed/side) to
reset(); the 'side' field makes the interface 1v1-ready.
- run_level / run_episode drive any Controller; introspection_source()
recovers the underlying object's history/stats for playback.
- ModelAgent now satisfies the contract directly (name/reset/act).

tests/test_controller.py: 13 tests — coercion, idempotency, bound-method
source recovery, abstract act(), ModelAgent conformance, and an
end-to-end run_level smoke proving a bare fn and its Controller wrapper
produce byte-identical EpisodeResults.

Files changed (4) hide show

openra_bench/agent.py +18 -0
openra_bench/controller.py +162 -0
openra_bench/eval_core.py +40 -6
tests/test_controller.py +256 -0

openra_bench/agent.py CHANGED Viewed

@@ -486,6 +486,11 @@ class ModelAgent:
             )
         self.history: list[dict] = [{"role": "system", "content": sys_content}]
         self.stats = {"turns": 0, "tool_calls": 0, "empty_replies": 0}
     def _user_message(self, render_state: dict) -> dict:
         # Briefing = vendored training briefing_v2 (one unit/line,
@@ -614,3 +619,16 @@ class ModelAgent:
                 {"role": "tool", "tool_call_id": f"c{i}", "content": "ok"}
             )
         return cmds

             )
         self.history: list[dict] = [{"role": "system", "content": sys_content}]
         self.stats = {"turns": 0, "tool_calls": 0, "empty_replies": 0}
+        # Controller contract (openra_bench/controller.py): a ModelAgent
+        # IS a Controller — it exposes `name`, `reset`, `act` so the
+        # eval loop, the 1v1 harness, and the human-labeling harness can
+        # all drive it interchangeably with any other policy backend.
+        self.name = getattr(cfg, "model", None) or "model"
     def _user_message(self, render_state: dict) -> dict:
         # Briefing = vendored training briefing_v2 (one unit/line,
                 {"role": "tool", "tool_call_id": f"c{i}", "content": "ok"}
             )
         return cmds
+    # ── Controller contract ──────────────────────────────────────────
+    def act(self, observation: dict, Command: Any) -> list:
+        """Controller contract — alias of `agent_fn`. Lets a ModelAgent
+        be passed straight to `run_level` / the 1v1 harness in place of
+        a bare `agent_fn` callable."""
+        return self.agent_fn(observation, Command)
+    def reset(self, ctx: Any = None) -> None:
+        """Controller contract per-episode hook. A ModelAgent is
+        constructed once per episode — its bounded chat history starts
+        fresh in `__init__` — so reset is a no-op; it exists so the
+        agent structurally satisfies the Controller protocol."""

openra_bench/controller.py ADDED Viewed

	@@ -0,0 +1,162 @@

+"""Unified policy interface for the OpenRA-Bench eval stack.
+Every actor that can drive a side of a scenario — an LLM agent, a human
+labeler, a scripted reference policy — implements the same contract:
+    controller.act(observation, Command) -> list[Command]
+This is the keystone of the human-labeling machine and the 1v1
+adversarial harness: one harness, interchangeable policy backends.
+`run_level` / `run_episode` drive a single Controller; a 1v1 match
+drives two, one per side, each fed its own side-specific observation.
+Back-compat is non-negotiable: the historical policy shape was a bare
+callable ``agent_fn(render_state, Command) -> [Command]`` and ~190 test
+files still pass one. `as_controller()` adapts any such callable (or a
+`ModelAgent` bound method) into a Controller, so every existing scripted
+policy and test keeps working unchanged — the eval loop simply coerces
+its policy argument through `as_controller()` before stepping.
+Design notes
+------------
+* `act` keeps `Command` as an explicit parameter rather than binding it
+  at construction. `Command` is the pyo3 `openra_train.Command` factory
+  handle, only available once an env exists; threading it per-call keeps
+  Controllers constructible without an engine (cheap to unit-test) and
+  is byte-identical to the legacy `agent_fn` signature.
+* `reset(ctx)` is the per-episode lifecycle hook. Scripted policies
+  ignore it; the model agent re-arms history; a human controller would
+  reset its click queue. The 1v1 harness calls it once per side with a
+  `side`-stamped `EpisodeContext`.
+* `history` / `stats` are the optional introspection surface the
+  playback writer reads. `BaseController` provides empty defaults so a
+  caller can read them unconditionally.
+"""
+from __future__ import annotations
+from dataclasses import dataclass, field
+from typing import Any, Callable, Protocol, runtime_checkable
+# A bare legacy policy: (render_state, Command) -> [Command].
+PolicyFn = Callable[[dict, Any], list]
+@dataclass
+class EpisodeContext:
+    """What a Controller is told once, at episode start (`reset`).
+    A scenario eval populates `pack_id` / `level` / `seed` / `objective`;
+    a 1v1 match additionally stamps `side` so the two Controllers know
+    which colour they are driving."""
+    pack_id: str = ""
+    level: str = ""
+    seed: int = 0
+    side: str = "agent"  # "agent" | "enemy" — which side this drives
+    objective: str = ""
+    max_turns: int = 0
+    extra: dict = field(default_factory=dict)
+@runtime_checkable
+class Controller(Protocol):
+    """A policy that observes the world and emits engine Commands.
+    Structural — anything exposing `name`, `reset`, and `act` satisfies
+    it; `ModelAgent` does so without importing this module."""
+    name: str
+    def reset(self, ctx: "EpisodeContext") -> None: ...
+    def act(self, observation: dict, Command: Any) -> list: ...
+def is_controller(obj: Any) -> bool:
+    """True if `obj` already satisfies the Controller contract.
+    Deliberately structural and stricter than `isinstance(obj,
+    Controller)`: a bare function is callable but is NOT a Controller,
+    so it must carry callable `act` AND `reset` attributes — which a
+    plain function never does."""
+    return callable(getattr(obj, "act", None)) and callable(
+        getattr(obj, "reset", None)
+    )
+class BaseController:
+    """Convenience base: a no-op `reset`, a `name`, empty introspection.
+    Subclass and implement `act`. Concrete eval policies (the human
+    bridge, scripted reference wrappers) derive from this so they share
+    one introspection surface (`history`, `stats`)."""
+    name: str = "controller"
+    def __init__(self, name: str | None = None) -> None:
+        if name:
+            self.name = name
+        self.history: list[dict] = []
+        self.stats: dict[str, Any] = {}
+    def reset(self, ctx: EpisodeContext) -> None:  # noqa: D401
+        """Per-episode lifecycle hook. Default: no-op."""
+    def act(self, observation: dict, Command: Any) -> list:
+        raise NotImplementedError(
+            f"{type(self).__name__} must implement act()"
+        )
+class FunctionController(BaseController):
+    """Adapt a bare ``agent_fn(render_state, Command) -> [Command]``
+    callable into a Controller — the back-compat bridge for every
+    scripted reference policy and the legacy `scripted_explore_agent`.
+    When the callable is a bound method (e.g. ``ModelAgent.agent_fn``),
+    its ``__self__`` is captured as `source` so the eval loop can still
+    reach the underlying object's `history` / `stats` for playback."""
+    def __init__(
+        self, fn: PolicyFn, name: str | None = None
+    ) -> None:
+        super().__init__(
+            name or getattr(fn, "__name__", None) or "fn"
+        )
+        self._fn = fn
+        self.source: Any = getattr(fn, "__self__", None)
+    def act(self, observation: dict, Command: Any) -> list:
+        return self._fn(observation, Command)
+def as_controller(policy: Any, name: str | None = None) -> Controller:
+    """Coerce anything policy-shaped into a Controller.
+    Accepts, in priority order:
+      * an object already satisfying the Controller contract — returned
+        as-is (idempotent);
+      * any callable — a bare `agent_fn` or a bound method — wrapped in
+        a `FunctionController` (a bound method's `__self__` is kept
+        reachable via `.source`).
+    Raises `TypeError` for anything else."""
+    if is_controller(policy):
+        return policy
+    if callable(policy):
+        return FunctionController(policy, name)
+    raise TypeError(
+        f"cannot coerce {type(policy).__name__} into a Controller: "
+        "expected a Controller, a ModelAgent, or an "
+        "agent_fn(render_state, Command) -> [Command] callable"
+    )
+def introspection_source(controller: Controller) -> Any:
+    """The object carrying `history` / `stats` for playback.
+    For a `FunctionController` wrapping a bound method this is the bound
+    instance (`.source`); otherwise it is the Controller itself."""
+    src = getattr(controller, "source", None)
+    return src if src is not None else controller

openra_bench/eval_core.py CHANGED Viewed

@@ -20,11 +20,20 @@ from typing import Any, Callable
 import yaml
 from openra_rl_training.training.rust_env_pool import RustEnvPool
 from .rust_adapter import EpisodeSignals, RustObsAdapter
 from .scenarios.schema import CompiledLevel
 from .scenarios.win_conditions import WinContext, evaluate
 AgentFn = Callable[[dict, Any], list]
 def _scenario_to_tmp_yaml(compiled: CompiledLevel) -> str:
@@ -136,11 +145,14 @@ def scripted_explore_agent(render_state: dict, Command: Any) -> list:
 def run_episode(
     scenario_path: str,
-    agent_fn: AgentFn = scripted_explore_agent,
     max_turns: int = 40,
     seed: int = 0,
     pool: RustEnvPool | None = None,
 ) -> EpisodeResult:
     owns_pool = pool is None
     if pool is None:
         pool = RustEnvPool(size=1, scenario_path=scenario_path)
@@ -149,12 +161,16 @@ def run_episode(
         adapter = RustObsAdapter()
         obs = env.reset(seed=seed)
         adapter.observe(obs)
         trace: list[dict] = []
         turns = 0
         issued = warned = 0
         for turns in range(1, max_turns + 1):
             rs = adapter.render_state()
-            cmds = agent_fn(rs, env.Command) or [env.Command.observe()]
             obs, _reward, done, info = env.step(cmds)
             adapter.observe(obs, done=done)
             issued += len(cmds)
@@ -188,13 +204,17 @@ def run_episode(
 def run_level(
     compiled: CompiledLevel,
-    agent_fn: AgentFn = scripted_explore_agent,
     seed: int = 0,
     playback=None,
 ) -> EpisodeResult:
     """Run one scenario-pack level, scoring against its declarative
     win/fail conditions (checked every turn). Outcome maps to the
     `reward_outcome` convention: win=1.0, draw=0.5, loss=0.0.
     """
     if not compiled.map_supported:
         raise RuntimeError(
@@ -207,6 +227,19 @@ def run_level(
     try:
         adapter = RustObsAdapter()
         adapter.observe(env.reset(seed=seed))
         trace: list[dict] = []
         outcome = "draw"
         turns = 0
@@ -242,7 +275,7 @@ def run_level(
         forbidden = {str(t).lower() for t in (compiled.forbidden_tools or [])}
         for turns in range(1, compiled.max_turns + 1):
             rs = adapter.render_state()
-            cmds = agent_fn(rs, env.Command) or [env.Command.observe()]
             for _cmd in cmds:
                 _tn = _cmd_tool_name(_cmd)
                 if _tn:
@@ -364,8 +397,9 @@ def run_level(
         )
         if playback is not None:
             # Dump the full model⇄env transcript when the agent is a
-            # ModelAgent (bound-method closure exposes the instance).
-            agent_obj = getattr(agent_fn, "__self__", None)
             hist = getattr(agent_obj, "history", None)
             if isinstance(hist, list):
                 playback.write_messages(hist)

 import yaml
 from openra_rl_training.training.rust_env_pool import RustEnvPool
+from .controller import (
+    Controller,
+    EpisodeContext,
+    as_controller,
+    introspection_source,
+)
 from .rust_adapter import EpisodeSignals, RustObsAdapter
 from .scenarios.schema import CompiledLevel
 from .scenarios.win_conditions import WinContext, evaluate
+# A policy is either a bare `agent_fn(render_state, Command) -> [Command]`
+# callable (the legacy shape, still accepted everywhere) or a Controller.
 AgentFn = Callable[[dict, Any], list]
+Policy = "AgentFn | Controller"
 def _scenario_to_tmp_yaml(compiled: CompiledLevel) -> str:
 def run_episode(
     scenario_path: str,
+    agent_fn: "AgentFn | Controller" = scripted_explore_agent,
     max_turns: int = 40,
     seed: int = 0,
     pool: RustEnvPool | None = None,
 ) -> EpisodeResult:
+    """Run a scenario for a fixed number of turns. `agent_fn` may be a
+    bare `agent_fn(render_state, Command) -> [Command]` callable or any
+    `Controller`; it is coerced through `as_controller()`."""
     owns_pool = pool is None
     if pool is None:
         pool = RustEnvPool(size=1, scenario_path=scenario_path)
         adapter = RustObsAdapter()
         obs = env.reset(seed=seed)
         adapter.observe(obs)
+        controller = as_controller(agent_fn)
+        controller.reset(
+            EpisodeContext(seed=seed, max_turns=max_turns)
+        )
         trace: list[dict] = []
         turns = 0
         issued = warned = 0
         for turns in range(1, max_turns + 1):
             rs = adapter.render_state()
+            cmds = controller.act(rs, env.Command) or [env.Command.observe()]
             obs, _reward, done, info = env.step(cmds)
             adapter.observe(obs, done=done)
             issued += len(cmds)
 def run_level(
     compiled: CompiledLevel,
+    agent_fn: "AgentFn | Controller" = scripted_explore_agent,
     seed: int = 0,
     playback=None,
 ) -> EpisodeResult:
     """Run one scenario-pack level, scoring against its declarative
     win/fail conditions (checked every turn). Outcome maps to the
     `reward_outcome` convention: win=1.0, draw=0.5, loss=0.0.
+    `agent_fn` may be a bare `agent_fn(render_state, Command) ->
+    [Command]` callable, a `ModelAgent` bound method, or any
+    `Controller`; it is coerced through `as_controller()`.
     """
     if not compiled.map_supported:
         raise RuntimeError(
     try:
         adapter = RustObsAdapter()
         adapter.observe(env.reset(seed=seed))
+        # Coerce the policy through the unified Controller contract:
+        # a bare agent_fn, a ModelAgent bound method, or a Controller
+        # all resolve to a Controller the loop drives identically.
+        controller = as_controller(agent_fn)
+        controller.reset(
+            EpisodeContext(
+                pack_id=compiled.pack_id,
+                level=compiled.level,
+                seed=seed,
+                objective=compiled.scenario.description or "",
+                max_turns=compiled.max_turns,
+            )
+        )
         trace: list[dict] = []
         outcome = "draw"
         turns = 0
         forbidden = {str(t).lower() for t in (compiled.forbidden_tools or [])}
         for turns in range(1, compiled.max_turns + 1):
             rs = adapter.render_state()
+            cmds = controller.act(rs, env.Command) or [env.Command.observe()]
             for _cmd in cmds:
                 _tn = _cmd_tool_name(_cmd)
                 if _tn:
         )
         if playback is not None:
             # Dump the full model⇄env transcript when the agent is a
+            # ModelAgent — the Controller layer surfaces the underlying
+            # instance (bound-method __self__ or the Controller itself).
+            agent_obj = introspection_source(controller)
             hist = getattr(agent_obj, "history", None)
             if isinstance(hist, list):
                 playback.write_messages(hist)

tests/test_controller.py ADDED Viewed

	@@ -0,0 +1,256 @@

+"""Phase 1 — the unified Controller contract.
+`openra_bench/controller.py` is the keystone of the human-labeling
+machine and the 1v1 adversarial harness: LLM agents, human labelers and
+scripted reference policies all implement one interface,
+    controller.act(observation, Command) -> [Command]
+and `run_level` / `run_episode` drive any of them. This file pins:
+* the coercion layer (`as_controller`) — a bare `agent_fn`, a bound
+  method, and an existing Controller all resolve correctly, so the ~190
+  legacy test files that pass a bare function keep working;
+* the introspection surface (`history` / `stats`) the playback writer
+  reads survives the coercion;
+* `ModelAgent` structurally satisfies the contract;
+* an end-to-end `run_level` smoke: the SAME scripted policy produces a
+  byte-identical outcome whether passed as a bare function or wrapped
+  in a Controller.
+"""
+from __future__ import annotations
+import pytest
+from openra_bench.controller import (
+    BaseController,
+    Controller,
+    EpisodeContext,
+    FunctionController,
+    as_controller,
+    introspection_source,
+    is_controller,
+)
+# ── Coercion (no engine needed) ─────────────────────────────────────
+def _bare_policy(render_state, Command):
+    """A legacy-shape agent_fn: ignore the world, just observe."""
+    return [("OBSERVE", id(render_state))]
+def test_as_controller_wraps_a_bare_function():
+    c = as_controller(_bare_policy)
+    assert is_controller(c)
+    assert isinstance(c, FunctionController)
+    # The wrapper delegates verbatim.
+    out = c.act({"k": 1}, Command=None)
+    assert out[0][0] == "OBSERVE"
+    # Name defaults to the function's __name__.
+    assert c.name == "_bare_policy"
+def test_as_controller_is_idempotent_on_a_controller():
+    c1 = as_controller(_bare_policy)
+    c2 = as_controller(c1)
+    assert c2 is c1, "coercing a Controller must return it unchanged"
+def test_as_controller_rejects_non_callable():
+    with pytest.raises(TypeError):
+        as_controller(42)
+    with pytest.raises(TypeError):
+        as_controller("not a policy")
+def test_as_controller_named_override():
+    c = as_controller(_bare_policy, name="custom")
+    assert c.name == "custom"
+def test_is_controller_discriminates():
+    # A bare function is callable but NOT a Controller.
+    assert not is_controller(_bare_policy)
+    assert not is_controller(lambda rs, C: [])
+    # A FunctionController is.
+    assert is_controller(as_controller(_bare_policy))
+def test_base_controller_act_is_abstract():
+    b = BaseController(name="x")
+    assert b.name == "x"
+    assert b.history == [] and b.stats == {}
+    with pytest.raises(NotImplementedError):
+        b.act({}, Command=None)
+def test_episode_context_defaults():
+    ctx = EpisodeContext()
+    assert ctx.side == "agent"
+    assert ctx.seed == 0 and ctx.max_turns == 0
+    assert ctx.extra == {}
+    ctx2 = EpisodeContext(pack_id="p", level="hard", side="enemy", seed=3)
+    assert (ctx2.pack_id, ctx2.level, ctx2.side, ctx2.seed) == (
+        "p", "hard", "enemy", 3
+    )
+# ── Bound-method source recovery (the playback path) ────────────────
+class _FakeAgent:
+    """Stand-in for ModelAgent: a bound `agent_fn` plus history/stats."""
+    def __init__(self):
+        self.history = [{"role": "system", "content": "hi"}]
+        self.stats = {"turns": 0}
+    def agent_fn(self, render_state, Command):
+        self.stats["turns"] += 1
+        return [("ACT", self.stats["turns"])]
+def test_bound_method_source_is_recovered_for_playback():
+    agent = _FakeAgent()
+    c = as_controller(agent.agent_fn)
+    assert is_controller(c)
+    # The bound instance is reachable so playback can dump history/stats.
+    assert c.source is agent
+    assert introspection_source(c) is agent
+    c.act({}, None)
+    assert agent.stats["turns"] == 1
+def test_introspection_source_falls_back_to_controller():
+    c = as_controller(_bare_policy)  # plain function, no __self__
+    assert c.source is None
+    assert introspection_source(c) is c
+# ── Subclassing BaseController ──────────────────────────────────────
+class _CountingController(BaseController):
+    def __init__(self):
+        super().__init__(name="counter")
+        self.turns = 0
+        self.reset_calls = 0
+    def reset(self, ctx: EpisodeContext) -> None:
+        self.reset_calls += 1
+        self.last_ctx = ctx
+    def act(self, observation, Command):
+        self.turns += 1
+        return [("TURN", self.turns)]
+def test_subclassed_controller_satisfies_contract():
+    c = _CountingController()
+    assert is_controller(c)
+    assert isinstance(c, Controller)  # runtime_checkable structural
+    c.reset(EpisodeContext(pack_id="p", side="enemy"))
+    assert c.reset_calls == 1 and c.last_ctx.side == "enemy"
+    assert c.act({}, None) == [("TURN", 1)]
+    assert as_controller(c) is c
+# ── ModelAgent structurally conforms ────────────────────────────────
+def test_model_agent_class_exposes_controller_contract():
+    # Structural check on the class — constructing a ModelAgent needs a
+    # provider, but the contract is method presence.
+    from openra_bench.agent import ModelAgent
+    for member in ("act", "reset", "agent_fn"):
+        assert callable(getattr(ModelAgent, member, None)), (
+            f"ModelAgent must expose {member}() for the Controller contract"
+        )
+# ── End-to-end: bare fn vs Controller produce identical runs ────────
+pytest.importorskip("openra_train", reason="Rust env wheel not installed")
+pytest.importorskip(
+    "openra_rl_training", reason="Rust env wheel not installed"
+)
+def _stall(render_state, Command):
+    return [Command.observe()]
+def _smallest_easy_pack():
+    """The active pack with the fewest easy-level turns — keeps the
+    end-to-end smoke fast and deterministic."""
+    from openra_bench.scenarios import load_pack
+    from openra_bench.scenarios.loader import PACKS_DIR, compile_level
+    best = None
+    for f in sorted(PACKS_DIR.glob("*.yaml")):
+        if f.name.startswith(("_", "TEMPLATE")):
+            continue
+        try:
+            pack = load_pack(f)
+            if pack.meta.status != "active" or "easy" not in pack.levels:
+                continue
+            c = compile_level(pack, "easy")
+        except Exception:  # noqa: BLE001
+            continue
+        if not c.map_supported:
+            continue
+        if best is None or c.max_turns < best.max_turns:
+            best = c
+    return best
+def test_run_level_identical_for_bare_fn_and_controller():
+    """The SAME scripted policy must yield a byte-identical EpisodeResult
+    whether passed as a bare agent_fn or wrapped in a Controller — proof
+    the coercion layer is transparent."""
+    from openra_bench.eval_core import run_level
+    compiled = _smallest_easy_pack()
+    assert compiled is not None, "no runnable active pack found"
+    r_fn = run_level(compiled, _stall, seed=1)
+    r_ctrl = run_level(compiled, as_controller(_stall, name="stall"), seed=1)
+    assert r_fn.outcome == r_ctrl.outcome
+    assert r_fn.turns == r_ctrl.turns
+    assert r_fn.actions_issued == r_ctrl.actions_issued
+    assert r_fn.signals.game_tick == r_ctrl.signals.game_tick
+def test_run_level_drives_a_subclassed_controller():
+    """A BaseController subclass — the shape HumanController and the
+    scripted-bot wrapper will take — runs end-to-end and its per-episode
+    `reset` hook fires with a populated EpisodeContext."""
+    from openra_bench.eval_core import run_level
+    compiled = _smallest_easy_pack()
+    assert compiled is not None
+    class _StallController(BaseController):
+        def __init__(self):
+            super().__init__(name="stall-ctrl")
+            self.acts = 0
+            self.ctx = None
+        def reset(self, ctx: EpisodeContext) -> None:
+            self.ctx = ctx
+        def act(self, observation, Command):
+            self.acts += 1
+            return [Command.observe()]
+    ctrl = _StallController()
+    res = run_level(compiled, ctrl, seed=1)
+    assert res.outcome in ("win", "loss", "draw")
+    assert ctrl.acts >= 1, "act() must have been called"
+    assert ctrl.ctx is not None and ctrl.ctx.pack_id == compiled.pack_id
+    assert ctrl.ctx.level == "easy" and ctrl.ctx.seed == 1