Spaces:

qpluslab
/

OpenRA-Bench

Running

yxc20098 commited on May 21

Commit

aab84a0

1 Parent(s): c044b07

Phase 3: 1v1 full-macro adversarial harness

Drives TWO Controllers (Phase 1 contract) — any LLM / human / scripted
mix — over one shared episode via the engine's new step_1v1 two-player
command channel (OpenRA-Rust commit on branch engine-1v1-channel).

openra_bench/one_v_one.py:
- run_1v1(scenario_path, agent_controller, enemy_controller) — each
side issues commands into the same engine frame and is fed its OWN
fog-of-war observation (via step_1v1 / enemy_observation), so an LLM
driving the enemy sees exactly the fogged view its opponent sees.
- Full macro game: economy, production, tech, combat all in play; the
match ends on base elimination or the turn cap, winner decided by
elimination or an economy tiebreak at the deadline.
- The two controllers are interchangeable: model-vs-model,
model-vs-bot, or a HumanController on either side.

tests/test_one_v_one.py: 4 tests — step_1v1 exposure, end-to-end match
termination with a winner, perspective correctness (each side sees its
own actors as own units, disjoint id sets — no perspective leak), and
both sides' commands reaching the engine.

The engine change (build_orders player-parameterization, viewer-
parameterized observation_for, step_1v1, enemy_observation) was
verified non-regressing: the full 8056-test bench suite passes green
against the rebuilt wheel.

Files changed (2) hide show

openra_bench/one_v_one.py +186 -0
tests/test_one_v_one.py +157 -0

openra_bench/one_v_one.py ADDED Viewed

	@@ -0,0 +1,186 @@

+"""Phase 3 — the 1v1 full-macro adversarial harness.
+Drives TWO Controllers (Phase 1 contract) — any mix of LLM / human /
+scripted — over ONE shared episode. Each side is fed its own
+fog-of-war observation and issues commands into the same engine frame
+via the `step_1v1` two-player command channel (engine change shipped
+alongside this module). Full macro game: economy, production, tech and
+combat all in play; the episode ends when one base falls or the turn
+cap is hit.
+`step_1v1` builds each side's orders independently (scoped to that
+player's unit ownership) and applies them into the SAME first frame, so
+neither side moves "first". It returns each player's observation from
+its own shroud — so an LLM driving the enemy sees exactly the fogged
+view its opponent sees, not a god's-eye board.
+Usage:
+    from openra_bench.one_v_one import run_1v1
+    result = run_1v1(scenario_path, agent_controller, enemy_controller)
+    print(result.winner, result.turns)
+The two controllers are interchangeable: pass two `ModelAgent`s for
+model-vs-model, a `ModelAgent` + a scripted Controller for
+agent-vs-bot, or a `HumanController` on either side.
+"""
+from __future__ import annotations
+from dataclasses import dataclass, field
+from typing import Any
+from .controller import EpisodeContext, as_controller
+from .rust_adapter import RustObsAdapter
+@dataclass
+class OneVOneResult:
+    """Outcome of a 1v1 match.
+    `winner` is from the agent side's frame of reference: "agent",
+    "enemy", or "draw". `reason` records why the match ended."""
+    winner: str  # "agent" | "enemy" | "draw"
+    reason: str
+    turns: int
+    ticks: int
+    agent_name: str
+    enemy_name: str
+    agent_trace: list[dict] = field(default_factory=list)
+    enemy_trace: list[dict] = field(default_factory=list)
+def _alive(render_state: dict) -> bool:
+    """A side is still in the game if it has any unit or any building."""
+    units = render_state.get("units_summary") or []
+    buildings = render_state.get("own_buildings") or []
+    return bool(units) or bool(buildings)
+def _economy_value(render_state: dict) -> int:
+    """Cash + stored resources — the deadline tie-break metric."""
+    return int(render_state.get("cash", 0) or 0) + int(
+        render_state.get("resources", 0) or 0
+    )
+def run_1v1(
+    scenario_path: str,
+    agent_controller: Any,
+    enemy_controller: Any,
+    seed: int = 0,
+    max_turns: int = 200,
+) -> OneVOneResult:
+    """Run one full-macro 1v1 match and return the result.
+    `agent_controller` / `enemy_controller` are each a Controller, a
+    `ModelAgent`, or a bare `agent_fn` callable — coerced through
+    `as_controller()`. The scenario should leave the enemy side
+    externally controlled (no `enemy.bot_type`); if it declares an
+    engine bot, that bot co-drives the enemy actors alongside this
+    harness's enemy controller.
+    """
+    from openra_rl_training.training.rust_env_pool import RustEnvPool
+    agent = as_controller(agent_controller)
+    enemy = as_controller(enemy_controller)
+    pool = RustEnvPool(size=1, scenario_path=scenario_path)
+    env = pool.acquire()
+    try:
+        agent.reset(
+            EpisodeContext(seed=seed, side="agent", max_turns=max_turns)
+        )
+        enemy.reset(
+            EpisodeContext(seed=seed, side="enemy", max_turns=max_turns)
+        )
+        agent_ad = RustObsAdapter()
+        enemy_ad = RustObsAdapter()
+        Command = env.Command
+        # Seed each side's first fog-of-war observation WITHOUT stepping:
+        # reset() gives the agent's; enemy_observation() gives the
+        # enemy's at the same tick-0 state. (A two-player idle bootstrap
+        # step would waste a whole decision turn — fatal on a combat map
+        # where forces start in contact.) NOTE: the pool rebuilds the
+        # underlying `_env` on reset(), so the raw env must be fetched
+        # AFTER reset() — fetching it earlier captures a stale env.
+        agent_ad.observe(env.reset(seed=seed))
+        raw = getattr(env, "_env", None)
+        if raw is None or not hasattr(raw, "step_1v1"):
+            raise RuntimeError(
+                "engine wheel lacks step_1v1 — rebuild the wheel "
+                "(maturin develop --release) to run 1v1 matches"
+            )
+        enemy_ad.observe(raw.enemy_observation())
+        agent_trace: list[dict] = []
+        enemy_trace: list[dict] = []
+        turns = 0
+        done = False
+        winner = "draw"
+        reason = "turn cap reached"
+        for turns in range(1, max_turns + 1):
+            a_rs = agent_ad.render_state()
+            e_rs = enemy_ad.render_state()
+            a_cmds = agent.act(a_rs, Command) or [Command.observe()]
+            e_cmds = enemy.act(e_rs, Command) or [Command.observe()]
+            a_obs, e_obs, done, _info = raw.step_1v1(a_cmds, e_cmds)
+            agent_ad.observe(a_obs, done=done)
+            enemy_ad.observe(e_obs, done=done)
+            agent_trace.append(
+                {
+                    "turn": turns,
+                    "tick": agent_ad.signals.game_tick,
+                    "n_cmds": len(a_cmds),
+                }
+            )
+            enemy_trace.append(
+                {
+                    "turn": turns,
+                    "tick": enemy_ad.signals.game_tick,
+                    "n_cmds": len(e_cmds),
+                }
+            )
+            if done:
+                break
+        # Decide the winner from the final boards.
+        a_rs = agent_ad.render_state()
+        e_rs = enemy_ad.render_state()
+        agent_alive = _alive(a_rs)
+        enemy_alive = _alive(e_rs)
+        if agent_alive and not enemy_alive:
+            winner, reason = "agent", "enemy base eliminated"
+        elif enemy_alive and not agent_alive:
+            winner, reason = "enemy", "agent base eliminated"
+        elif not agent_alive and not enemy_alive:
+            winner, reason = "draw", "mutual elimination"
+        else:
+            # Both standing — deadline / turn cap. Tie-break on economy.
+            av, ev = _economy_value(a_rs), _economy_value(e_rs)
+            if av > ev:
+                winner, reason = "agent", "deadline — agent ahead on economy"
+            elif ev > av:
+                winner, reason = "enemy", "deadline — enemy ahead on economy"
+            else:
+                winner, reason = "draw", "deadline — even"
+        return OneVOneResult(
+            winner=winner,
+            reason=reason,
+            turns=turns,
+            ticks=agent_ad.signals.game_tick,
+            agent_name=getattr(agent, "name", "agent"),
+            enemy_name=getattr(enemy, "name", "enemy"),
+            agent_trace=agent_trace,
+            enemy_trace=enemy_trace,
+        )
+    finally:
+        pool.release(env)
+        pool.shutdown()

tests/test_one_v_one.py ADDED Viewed

	@@ -0,0 +1,157 @@

+"""Phase 3 — the 1v1 full-macro adversarial harness.
+`openra_bench/one_v_one.py` drives two Controllers over one shared
+episode via the engine's `step_1v1` two-player command channel. This
+file pins:
+* the engine `step_1v1` itself — both players' commands apply into the
+  same frame, each side gets its OWN fog-of-war observation;
+* `run_1v1` end-to-end — the match terminates and decides a winner;
+* perspective correctness — the enemy controller sees the enemy's
+  actors as its own units, distinct from the agent's view;
+* both controllers' commands actually reach the engine.
+"""
+from __future__ import annotations
+import pytest
+from openra_bench.controller import BaseController, EpisodeContext
+pytest.importorskip("openra_train", reason="Rust env wheel not installed")
+pytest.importorskip(
+    "openra_rl_training", reason="Rust env wheel not installed"
+)
+def test_engine_exposes_step_1v1():
+    """The rebuilt wheel must carry the two-player command channel."""
+    from openra_train import OpenRAEnv
+    assert hasattr(OpenRAEnv, "step_1v1"), (
+        "engine wheel lacks step_1v1 — rebuild with maturin develop"
+    )
+def _combat_scenario_path() -> str:
+    """Compile a combat pack (both sides have units) to a temp scenario
+    YAML the Rust env can load."""
+    from openra_bench.eval_core import _scenario_to_tmp_yaml
+    from openra_bench.scenarios import load_pack
+    from openra_bench.scenarios.loader import PACKS_DIR, compile_level
+    for f in sorted(PACKS_DIR.glob("combat-*.yaml")):
+        try:
+            pack = load_pack(f)
+            if pack.meta.status != "active" or "easy" not in pack.levels:
+                continue
+            compiled = compile_level(pack, "easy")
+        except Exception:  # noqa: BLE001
+            continue
+        if compiled.map_supported:
+            return _scenario_to_tmp_yaml(compiled)
+    raise RuntimeError("no runnable combat pack found")
+def _stall(render_state, Command):
+    return [Command.observe()]
+def test_run_1v1_terminates_with_a_winner():
+    """Two scripted controllers play a full match; it ends and the
+    harness decides a winner from the final boards."""
+    from openra_bench.one_v_one import OneVOneResult, run_1v1
+    path = _combat_scenario_path()
+    res = run_1v1(path, _stall, _stall, seed=1, max_turns=60)
+    assert isinstance(res, OneVOneResult)
+    assert res.winner in ("agent", "enemy", "draw")
+    assert res.reason
+    assert res.turns >= 0
+    assert res.ticks >= 0
+    # Both sides' per-turn traces were recorded.
+    assert len(res.agent_trace) == len(res.enemy_trace)
+class _Recorder(BaseController):
+    """Captures the first render_state it is asked to act on, and the
+    EpisodeContext it was reset with."""
+    def __init__(self, name: str):
+        super().__init__(name=name)
+        self.first_rs: dict | None = None
+        self.ctx: EpisodeContext | None = None
+        self.act_calls = 0
+    def reset(self, ctx: EpisodeContext) -> None:
+        self.ctx = ctx
+    def act(self, observation, Command):
+        if self.first_rs is None:
+            self.first_rs = observation
+        self.act_calls += 1
+        return [Command.observe()]
+def test_each_side_gets_its_own_perspective():
+    """The agent and enemy controllers are driven with side-stamped
+    EpisodeContexts and fed DISTINCT fog-of-war observations."""
+    from openra_bench.one_v_one import run_1v1
+    path = _combat_scenario_path()
+    agent_rec = _Recorder("agent-side")
+    enemy_rec = _Recorder("enemy-side")
+    run_1v1(path, agent_rec, enemy_rec, seed=1, max_turns=20)
+    # reset() stamped each side correctly.
+    assert agent_rec.ctx is not None and agent_rec.ctx.side == "agent"
+    assert enemy_rec.ctx is not None and enemy_rec.ctx.side == "enemy"
+    # Both controllers were actually driven.
+    assert agent_rec.act_calls >= 1 and enemy_rec.act_calls >= 1
+    # Each saw its own board — the agent's own units are the enemy's
+    # opponents and vice versa, so the two observations are not equal.
+    a_units = {
+        u.get("id") for u in (agent_rec.first_rs or {}).get(
+            "units_summary", []
+        )
+    }
+    e_units = {
+        u.get("id") for u in (enemy_rec.first_rs or {}).get(
+            "units_summary", []
+        )
+    }
+    # At least one side must field units; the id sets must be disjoint
+    # (no actor is "own" to both players).
+    assert a_units or e_units, "neither side had any units"
+    assert a_units.isdisjoint(e_units), (
+        "agent and enemy observations share an own-unit id — "
+        "perspective leak"
+    )
+def test_both_controllers_commands_reach_the_engine():
+    """A controller that orders its units to move advances the game on
+    BOTH sides — proof step_1v1 applies each side's orders."""
+    from openra_bench.one_v_one import run_1v1
+    path = _combat_scenario_path()
+    def _wander(render_state, Command):
+        # Order every own unit toward the map centre.
+        cmds = []
+        for u in render_state.get("units_summary", []) or []:
+            uid = u.get("id")
+            if uid is not None:
+                cmds.append(
+                    Command.move_units([str(uid)], target_x=40, target_y=20)
+                )
+        return cmds or [Command.observe()]
+    res = run_1v1(path, _wander, _wander, seed=2, max_turns=40)
+    assert res.winner in ("agent", "enemy", "draw")
+    # The match advanced real ticks (the engine ran, not a no-op).
+    assert res.ticks > 0
+    # Commands were issued on at least one recorded turn per side.
+    assert any(t["n_cmds"] >= 1 for t in res.agent_trace)
+    assert any(t["n_cmds"] >= 1 for t in res.enemy_trace)