yxc20098 commited on
Commit
aab84a0
·
1 Parent(s): c044b07

Phase 3: 1v1 full-macro adversarial harness

Browse files

Drives TWO Controllers (Phase 1 contract) — any LLM / human / scripted
mix — over one shared episode via the engine's new step_1v1 two-player
command channel (OpenRA-Rust commit on branch engine-1v1-channel).

openra_bench/one_v_one.py:
- run_1v1(scenario_path, agent_controller, enemy_controller) — each
side issues commands into the same engine frame and is fed its OWN
fog-of-war observation (via step_1v1 / enemy_observation), so an LLM
driving the enemy sees exactly the fogged view its opponent sees.
- Full macro game: economy, production, tech, combat all in play; the
match ends on base elimination or the turn cap, winner decided by
elimination or an economy tiebreak at the deadline.
- The two controllers are interchangeable: model-vs-model,
model-vs-bot, or a HumanController on either side.

tests/test_one_v_one.py: 4 tests — step_1v1 exposure, end-to-end match
termination with a winner, perspective correctness (each side sees its
own actors as own units, disjoint id sets — no perspective leak), and
both sides' commands reaching the engine.

The engine change (build_orders player-parameterization, viewer-
parameterized observation_for, step_1v1, enemy_observation) was
verified non-regressing: the full 8056-test bench suite passes green
against the rebuilt wheel.

Files changed (2) hide show
  1. openra_bench/one_v_one.py +186 -0
  2. tests/test_one_v_one.py +157 -0
openra_bench/one_v_one.py ADDED
@@ -0,0 +1,186 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Phase 3 — the 1v1 full-macro adversarial harness.
2
+
3
+ Drives TWO Controllers (Phase 1 contract) — any mix of LLM / human /
4
+ scripted — over ONE shared episode. Each side is fed its own
5
+ fog-of-war observation and issues commands into the same engine frame
6
+ via the `step_1v1` two-player command channel (engine change shipped
7
+ alongside this module). Full macro game: economy, production, tech and
8
+ combat all in play; the episode ends when one base falls or the turn
9
+ cap is hit.
10
+
11
+ `step_1v1` builds each side's orders independently (scoped to that
12
+ player's unit ownership) and applies them into the SAME first frame, so
13
+ neither side moves "first". It returns each player's observation from
14
+ its own shroud — so an LLM driving the enemy sees exactly the fogged
15
+ view its opponent sees, not a god's-eye board.
16
+
17
+ Usage:
18
+
19
+ from openra_bench.one_v_one import run_1v1
20
+ result = run_1v1(scenario_path, agent_controller, enemy_controller)
21
+ print(result.winner, result.turns)
22
+
23
+ The two controllers are interchangeable: pass two `ModelAgent`s for
24
+ model-vs-model, a `ModelAgent` + a scripted Controller for
25
+ agent-vs-bot, or a `HumanController` on either side.
26
+ """
27
+
28
+ from __future__ import annotations
29
+
30
+ from dataclasses import dataclass, field
31
+ from typing import Any
32
+
33
+ from .controller import EpisodeContext, as_controller
34
+ from .rust_adapter import RustObsAdapter
35
+
36
+
37
+ @dataclass
38
+ class OneVOneResult:
39
+ """Outcome of a 1v1 match.
40
+
41
+ `winner` is from the agent side's frame of reference: "agent",
42
+ "enemy", or "draw". `reason` records why the match ended."""
43
+
44
+ winner: str # "agent" | "enemy" | "draw"
45
+ reason: str
46
+ turns: int
47
+ ticks: int
48
+ agent_name: str
49
+ enemy_name: str
50
+ agent_trace: list[dict] = field(default_factory=list)
51
+ enemy_trace: list[dict] = field(default_factory=list)
52
+
53
+
54
+ def _alive(render_state: dict) -> bool:
55
+ """A side is still in the game if it has any unit or any building."""
56
+ units = render_state.get("units_summary") or []
57
+ buildings = render_state.get("own_buildings") or []
58
+ return bool(units) or bool(buildings)
59
+
60
+
61
+ def _economy_value(render_state: dict) -> int:
62
+ """Cash + stored resources — the deadline tie-break metric."""
63
+ return int(render_state.get("cash", 0) or 0) + int(
64
+ render_state.get("resources", 0) or 0
65
+ )
66
+
67
+
68
+ def run_1v1(
69
+ scenario_path: str,
70
+ agent_controller: Any,
71
+ enemy_controller: Any,
72
+ seed: int = 0,
73
+ max_turns: int = 200,
74
+ ) -> OneVOneResult:
75
+ """Run one full-macro 1v1 match and return the result.
76
+
77
+ `agent_controller` / `enemy_controller` are each a Controller, a
78
+ `ModelAgent`, or a bare `agent_fn` callable — coerced through
79
+ `as_controller()`. The scenario should leave the enemy side
80
+ externally controlled (no `enemy.bot_type`); if it declares an
81
+ engine bot, that bot co-drives the enemy actors alongside this
82
+ harness's enemy controller.
83
+ """
84
+ from openra_rl_training.training.rust_env_pool import RustEnvPool
85
+
86
+ agent = as_controller(agent_controller)
87
+ enemy = as_controller(enemy_controller)
88
+
89
+ pool = RustEnvPool(size=1, scenario_path=scenario_path)
90
+ env = pool.acquire()
91
+ try:
92
+ agent.reset(
93
+ EpisodeContext(seed=seed, side="agent", max_turns=max_turns)
94
+ )
95
+ enemy.reset(
96
+ EpisodeContext(seed=seed, side="enemy", max_turns=max_turns)
97
+ )
98
+
99
+ agent_ad = RustObsAdapter()
100
+ enemy_ad = RustObsAdapter()
101
+ Command = env.Command
102
+
103
+ # Seed each side's first fog-of-war observation WITHOUT stepping:
104
+ # reset() gives the agent's; enemy_observation() gives the
105
+ # enemy's at the same tick-0 state. (A two-player idle bootstrap
106
+ # step would waste a whole decision turn — fatal on a combat map
107
+ # where forces start in contact.) NOTE: the pool rebuilds the
108
+ # underlying `_env` on reset(), so the raw env must be fetched
109
+ # AFTER reset() — fetching it earlier captures a stale env.
110
+ agent_ad.observe(env.reset(seed=seed))
111
+ raw = getattr(env, "_env", None)
112
+ if raw is None or not hasattr(raw, "step_1v1"):
113
+ raise RuntimeError(
114
+ "engine wheel lacks step_1v1 — rebuild the wheel "
115
+ "(maturin develop --release) to run 1v1 matches"
116
+ )
117
+ enemy_ad.observe(raw.enemy_observation())
118
+
119
+ agent_trace: list[dict] = []
120
+ enemy_trace: list[dict] = []
121
+ turns = 0
122
+ done = False
123
+ winner = "draw"
124
+ reason = "turn cap reached"
125
+
126
+ for turns in range(1, max_turns + 1):
127
+ a_rs = agent_ad.render_state()
128
+ e_rs = enemy_ad.render_state()
129
+
130
+ a_cmds = agent.act(a_rs, Command) or [Command.observe()]
131
+ e_cmds = enemy.act(e_rs, Command) or [Command.observe()]
132
+ a_obs, e_obs, done, _info = raw.step_1v1(a_cmds, e_cmds)
133
+ agent_ad.observe(a_obs, done=done)
134
+ enemy_ad.observe(e_obs, done=done)
135
+
136
+ agent_trace.append(
137
+ {
138
+ "turn": turns,
139
+ "tick": agent_ad.signals.game_tick,
140
+ "n_cmds": len(a_cmds),
141
+ }
142
+ )
143
+ enemy_trace.append(
144
+ {
145
+ "turn": turns,
146
+ "tick": enemy_ad.signals.game_tick,
147
+ "n_cmds": len(e_cmds),
148
+ }
149
+ )
150
+ if done:
151
+ break
152
+
153
+ # Decide the winner from the final boards.
154
+ a_rs = agent_ad.render_state()
155
+ e_rs = enemy_ad.render_state()
156
+ agent_alive = _alive(a_rs)
157
+ enemy_alive = _alive(e_rs)
158
+ if agent_alive and not enemy_alive:
159
+ winner, reason = "agent", "enemy base eliminated"
160
+ elif enemy_alive and not agent_alive:
161
+ winner, reason = "enemy", "agent base eliminated"
162
+ elif not agent_alive and not enemy_alive:
163
+ winner, reason = "draw", "mutual elimination"
164
+ else:
165
+ # Both standing — deadline / turn cap. Tie-break on economy.
166
+ av, ev = _economy_value(a_rs), _economy_value(e_rs)
167
+ if av > ev:
168
+ winner, reason = "agent", "deadline — agent ahead on economy"
169
+ elif ev > av:
170
+ winner, reason = "enemy", "deadline — enemy ahead on economy"
171
+ else:
172
+ winner, reason = "draw", "deadline — even"
173
+
174
+ return OneVOneResult(
175
+ winner=winner,
176
+ reason=reason,
177
+ turns=turns,
178
+ ticks=agent_ad.signals.game_tick,
179
+ agent_name=getattr(agent, "name", "agent"),
180
+ enemy_name=getattr(enemy, "name", "enemy"),
181
+ agent_trace=agent_trace,
182
+ enemy_trace=enemy_trace,
183
+ )
184
+ finally:
185
+ pool.release(env)
186
+ pool.shutdown()
tests/test_one_v_one.py ADDED
@@ -0,0 +1,157 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Phase 3 — the 1v1 full-macro adversarial harness.
2
+
3
+ `openra_bench/one_v_one.py` drives two Controllers over one shared
4
+ episode via the engine's `step_1v1` two-player command channel. This
5
+ file pins:
6
+
7
+ * the engine `step_1v1` itself — both players' commands apply into the
8
+ same frame, each side gets its OWN fog-of-war observation;
9
+ * `run_1v1` end-to-end — the match terminates and decides a winner;
10
+ * perspective correctness — the enemy controller sees the enemy's
11
+ actors as its own units, distinct from the agent's view;
12
+ * both controllers' commands actually reach the engine.
13
+ """
14
+
15
+ from __future__ import annotations
16
+
17
+ import pytest
18
+
19
+ from openra_bench.controller import BaseController, EpisodeContext
20
+
21
+ pytest.importorskip("openra_train", reason="Rust env wheel not installed")
22
+ pytest.importorskip(
23
+ "openra_rl_training", reason="Rust env wheel not installed"
24
+ )
25
+
26
+
27
+ def test_engine_exposes_step_1v1():
28
+ """The rebuilt wheel must carry the two-player command channel."""
29
+ from openra_train import OpenRAEnv
30
+
31
+ assert hasattr(OpenRAEnv, "step_1v1"), (
32
+ "engine wheel lacks step_1v1 — rebuild with maturin develop"
33
+ )
34
+
35
+
36
+ def _combat_scenario_path() -> str:
37
+ """Compile a combat pack (both sides have units) to a temp scenario
38
+ YAML the Rust env can load."""
39
+ from openra_bench.eval_core import _scenario_to_tmp_yaml
40
+ from openra_bench.scenarios import load_pack
41
+ from openra_bench.scenarios.loader import PACKS_DIR, compile_level
42
+
43
+ for f in sorted(PACKS_DIR.glob("combat-*.yaml")):
44
+ try:
45
+ pack = load_pack(f)
46
+ if pack.meta.status != "active" or "easy" not in pack.levels:
47
+ continue
48
+ compiled = compile_level(pack, "easy")
49
+ except Exception: # noqa: BLE001
50
+ continue
51
+ if compiled.map_supported:
52
+ return _scenario_to_tmp_yaml(compiled)
53
+ raise RuntimeError("no runnable combat pack found")
54
+
55
+
56
+ def _stall(render_state, Command):
57
+ return [Command.observe()]
58
+
59
+
60
+ def test_run_1v1_terminates_with_a_winner():
61
+ """Two scripted controllers play a full match; it ends and the
62
+ harness decides a winner from the final boards."""
63
+ from openra_bench.one_v_one import OneVOneResult, run_1v1
64
+
65
+ path = _combat_scenario_path()
66
+ res = run_1v1(path, _stall, _stall, seed=1, max_turns=60)
67
+
68
+ assert isinstance(res, OneVOneResult)
69
+ assert res.winner in ("agent", "enemy", "draw")
70
+ assert res.reason
71
+ assert res.turns >= 0
72
+ assert res.ticks >= 0
73
+ # Both sides' per-turn traces were recorded.
74
+ assert len(res.agent_trace) == len(res.enemy_trace)
75
+
76
+
77
+ class _Recorder(BaseController):
78
+ """Captures the first render_state it is asked to act on, and the
79
+ EpisodeContext it was reset with."""
80
+
81
+ def __init__(self, name: str):
82
+ super().__init__(name=name)
83
+ self.first_rs: dict | None = None
84
+ self.ctx: EpisodeContext | None = None
85
+ self.act_calls = 0
86
+
87
+ def reset(self, ctx: EpisodeContext) -> None:
88
+ self.ctx = ctx
89
+
90
+ def act(self, observation, Command):
91
+ if self.first_rs is None:
92
+ self.first_rs = observation
93
+ self.act_calls += 1
94
+ return [Command.observe()]
95
+
96
+
97
+ def test_each_side_gets_its_own_perspective():
98
+ """The agent and enemy controllers are driven with side-stamped
99
+ EpisodeContexts and fed DISTINCT fog-of-war observations."""
100
+ from openra_bench.one_v_one import run_1v1
101
+
102
+ path = _combat_scenario_path()
103
+ agent_rec = _Recorder("agent-side")
104
+ enemy_rec = _Recorder("enemy-side")
105
+ run_1v1(path, agent_rec, enemy_rec, seed=1, max_turns=20)
106
+
107
+ # reset() stamped each side correctly.
108
+ assert agent_rec.ctx is not None and agent_rec.ctx.side == "agent"
109
+ assert enemy_rec.ctx is not None and enemy_rec.ctx.side == "enemy"
110
+ # Both controllers were actually driven.
111
+ assert agent_rec.act_calls >= 1 and enemy_rec.act_calls >= 1
112
+ # Each saw its own board — the agent's own units are the enemy's
113
+ # opponents and vice versa, so the two observations are not equal.
114
+ a_units = {
115
+ u.get("id") for u in (agent_rec.first_rs or {}).get(
116
+ "units_summary", []
117
+ )
118
+ }
119
+ e_units = {
120
+ u.get("id") for u in (enemy_rec.first_rs or {}).get(
121
+ "units_summary", []
122
+ )
123
+ }
124
+ # At least one side must field units; the id sets must be disjoint
125
+ # (no actor is "own" to both players).
126
+ assert a_units or e_units, "neither side had any units"
127
+ assert a_units.isdisjoint(e_units), (
128
+ "agent and enemy observations share an own-unit id — "
129
+ "perspective leak"
130
+ )
131
+
132
+
133
+ def test_both_controllers_commands_reach_the_engine():
134
+ """A controller that orders its units to move advances the game on
135
+ BOTH sides — proof step_1v1 applies each side's orders."""
136
+ from openra_bench.one_v_one import run_1v1
137
+
138
+ path = _combat_scenario_path()
139
+
140
+ def _wander(render_state, Command):
141
+ # Order every own unit toward the map centre.
142
+ cmds = []
143
+ for u in render_state.get("units_summary", []) or []:
144
+ uid = u.get("id")
145
+ if uid is not None:
146
+ cmds.append(
147
+ Command.move_units([str(uid)], target_x=40, target_y=20)
148
+ )
149
+ return cmds or [Command.observe()]
150
+
151
+ res = run_1v1(path, _wander, _wander, seed=2, max_turns=40)
152
+ assert res.winner in ("agent", "enemy", "draw")
153
+ # The match advanced real ticks (the engine ran, not a no-op).
154
+ assert res.ticks > 0
155
+ # Commands were issued on at least one recorded turn per side.
156
+ assert any(t["n_cmds"] >= 1 for t in res.agent_trace)
157
+ assert any(t["n_cmds"] >= 1 for t in res.enemy_trace)