File size: 11,991 Bytes
c0ad5c9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5cfed54
c0ad5c9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
"""combat-harass-aggro-commit โ€” AGGRO variant of the harvester-harass
triple.

The bar: intended focus-defender-then-harv WINS on every level and
every hard seed (1-4); STALL (only observe), RETREAT-ONLY (drive
raiders back west), and ATTACK-HARV-IGNORE-DEFENDER (rush the harvs
while standing in the 3tnk defender's range) all LOSE on every level
and every hard seed โ€” with one documented exception: EASY allows
attack-harv-only to squeak by (forgiving bare-skill tier with 4
raiders and a kill bar of 3). Non-win is a real reachable timeout
LOSS via the `after_ticks` fail clause.

Validation is scripted (no model / network).
"""
from __future__ import annotations

from pathlib import Path

import pytest

pytest.importorskip("openra_rl_training", reason="Rust env wheel not installed")
from openra_bench.scenarios import load_pack
from openra_bench.scenarios.loader import compile_level
from openra_bench.scenarios.win_conditions import WinContext, evaluate

PACKS = Path(__file__).parent.parent / "openra_bench" / "scenarios" / "packs"
PACK_PATH = PACKS / "combat-harass-aggro-commit.yaml"


# โ”€โ”€ unit-level predicate / metadata checks (no engine) โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€


def test_pack_compiles_and_meta_fields_populated():
    pack = load_pack(PACK_PATH)
    assert pack.meta.id == "combat-harass-aggro-commit"
    assert pack.meta.capability == "action"
    assert pack.meta.real_world_meaning, "real_world_meaning required"
    assert pack.meta.robotics_analogue, "robotics_analogue required"
    anchors = pack.meta.benchmark_anchor
    assert isinstance(anchors, list) and len(anchors) == 4, (
        f"benchmark_anchor must list all 4 anchors, got {anchors!r}"
    )
    joined = " ".join(anchors).lower()
    for needle in ("sc2", "attrition", "aggro", "guerrilla"):
        assert needle in joined, f"missing anchor keyword: {needle}"
    for lvl in ("easy", "medium", "hard"):
        c = compile_level(pack, lvl)
        assert c.map_supported
        assert c.win_condition is not None
        assert c.fail_condition is not None


def _ctx(*, units=(), tick=1000, kills=0, lost=0):
    """Synthesize a WinContext for predicate-level checks."""
    import types

    sig = types.SimpleNamespace(
        game_tick=tick,
        units_killed=kills,
        units_lost=lost,
        cash=0,
        resources=0,
        own_buildings=[],
        own_building_types=set(),
        enemies_seen_ids=set(),
        enemy_buildings_seen_ids=set(),
    )
    return WinContext(
        signals=sig,
        render_state={"units_summary": list(units)},
    )


def _alive(n):
    return [{"cell_x": 10, "cell_y": 20, "type": "2tnk", "id": str(1000 + i)} for i in range(n)]


def test_easy_predicates():
    c = compile_level(load_pack(PACK_PATH), "easy")
    # Intended: kills 3, 1 raider alive, in time โ†’ WIN
    assert evaluate(c.win_condition, _ctx(units=_alive(1), tick=2500, kills=3))
    # Kill bar unmet (only 2 kills) โ†’ not a win
    assert not evaluate(c.win_condition, _ctx(units=_alive(2), tick=2500, kills=2))
    # All raiders dead โ†’ fail (own_units_gte:1 trips via fail clause)
    assert evaluate(c.fail_condition, _ctx(units=[], tick=2500, kills=3))
    # Timeout with bar unmet โ†’ fail (after_ticks 4501)
    assert evaluate(c.fail_condition, _ctx(units=_alive(2), tick=4502, kills=2))


def test_medium_predicates():
    c = compile_level(load_pack(PACK_PATH), "medium")
    # Intended: kills 4 (1 def + 3 harv), 1 raider alive โ†’ WIN
    assert evaluate(c.win_condition, _ctx(units=_alive(1), tick=2500, kills=4))
    # Bar unmet (only 3 kills โ€” partial sweep) โ†’ not a win
    assert not evaluate(c.win_condition, _ctx(units=_alive(1), tick=2500, kills=3))
    # Force wipe โ†’ fail
    assert evaluate(c.fail_condition, _ctx(units=[], tick=2500, kills=4))
    # Timeout with bar unmet โ†’ fail
    assert evaluate(c.fail_condition, _ctx(units=_alive(2), tick=4502, kills=3))


def test_hard_predicates():
    c = compile_level(load_pack(PACK_PATH), "hard")
    # Intended: kills 6 (2 def + 4 harv), 1 raider alive โ†’ WIN
    assert evaluate(c.win_condition, _ctx(units=_alive(1), tick=3500, kills=6))
    # Bar unmet โ†’ not a win
    assert not evaluate(c.win_condition, _ctx(units=_alive(2), tick=3500, kills=5))
    # Force wipe โ†’ fail
    assert evaluate(c.fail_condition, _ctx(units=[], tick=3500, kills=6))
    # Timeout โ†’ fail
    assert evaluate(c.fail_condition, _ctx(units=_alive(1), tick=4502, kills=5))


def test_timeout_reachable_inside_max_turns():
    """No draw degeneracy: after_ticks 4501 โ‰ค 93 + 90ยท(max_turns-1)."""
    pack = load_pack(PACK_PATH)
    for lvl in ("easy", "medium", "hard"):
        c = compile_level(pack, lvl)
        max_tick = 93 + 90 * (c.max_turns - 1)
        assert 4501 <= max_tick, (
            f"{lvl}: after_ticks 4501 > max reachable tick {max_tick} "
            f"(max_turns={c.max_turns}); deadline never bites"
        )
        assert 4500 <= max_tick, f"{lvl}: within_ticks 4500 > max tick {max_tick}"


def test_hard_has_two_spawn_point_groups():
    """Hard-tier curation: โ‰ฅ2 distinct agent spawn_point groups so the
    seed round-robins the west-edge corridor (north / south). Engine-
    roundtrip is asserted by tests/test_hard_tier.py."""
    c = compile_level(load_pack(PACK_PATH), "hard")
    groups = {
        (a.spawn_point if a.spawn_point is not None else 0)
        for a in c.scenario.actors
        if a.owner == "agent"
    }
    assert len(groups) >= 2, f"hard needs โ‰ฅ2 spawn_point groups, got {groups}"


def test_defender_is_3tnk_and_no_bot():
    """The defender must be a 3tnk (heavier than 2tnk so a head-on
    1-vs-1 trade loses); no bot is declared so retreat is safe but
    scoreless (the defender holds its post)."""
    pack = load_pack(PACK_PATH)
    enemy = pack.base.get("enemy") if isinstance(pack.base, dict) else None
    assert enemy is not None
    bot = enemy.get("bot_type") or enemy.get("bot") or ""
    assert bot == "", f"expected no bot (stationary defender), got {bot!r}"
    for lvl in ("easy", "medium", "hard"):
        c = compile_level(pack, lvl)
        types = [a.type for a in c.scenario.actors if a.owner == "enemy"]
        assert "3tnk" in types, f"{lvl}: defender must be 3tnk"
        assert "harv" in types, f"{lvl}: payload must include harv"


# โ”€โ”€ engine-driven scripted policies โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€


def _own_ids(rs):
    return [str(u["id"]) for u in (rs.get("units_summary", []) or [])]


def _enemies_of_type(rs, want_types):
    out = []
    for e in (rs.get("enemy_summary") or []):
        t = (e.get("type") or e.get("actor_type") or "").lower()
        if t in want_types:
            out.append(e)
    return out


def _stall(rs, Command):
    """Pure observe โ€” defenders never engage, raiders never engage โ†’
    kill bar unmet โ†’ after_ticks LOSS."""
    return [Command.observe()]


def _retreat(rs, Command):
    """Drive every raider back to the west spawn โ€” no engagement, no
    kills โ†’ after_ticks LOSS. The defender is non-mobile so retreat
    is SAFE but scoreless (this is the discriminating LOSS: a model
    that picks survival over commitment fails the aggro bar)."""
    cmds = []
    for uid in _own_ids(rs):
        cmds.append(Command.move_units([uid], 6, 20))
    return cmds or [Command.observe()]


def _attack_harv_only(rs, Command):
    """Rush the harvester cluster while ignoring the defender. With a
    heavier 3tnk in range, the raiders take fire on the approach and
    while engaging the soft targets โ€” kill ratio collapses before
    the bar is met โ†’ LOSS on medium/hard. Easy is a forgiving tier
    where this play may squeak by."""
    cmds = []
    raiders = _own_ids(rs)
    for rid in raiders:
        cmds.append(Command.attack_move([rid], 80, 20))
    return cmds or [Command.observe()]


def _intended(rs, Command):
    """Focus-fire the visible defender 3tnk first with ALL raiders;
    once it's down, attack-move into the harv cluster. This is the
    aggro doctrine โ€” commit and trade favourably (3-vs-1 tank trade)
    on the high-value target before mopping up the payload."""
    raiders = _own_ids(rs)
    if not raiders:
        return [Command.observe()]
    defenders = _enemies_of_type(rs, {"3tnk"})
    if defenders:
        rxs = [u["cell_x"] for u in rs.get("units_summary", [])]
        rys = [u["cell_y"] for u in rs.get("units_summary", [])]
        cx, cy = sum(rxs) / len(rxs), sum(rys) / len(rys)
        defenders.sort(
            key=lambda e: (e["cell_x"] - cx) ** 2 + (e["cell_y"] - cy) ** 2
        )
        tid = defenders[0].get("id")
        if tid is not None:
            return [Command.attack_unit(raiders, str(tid))]
    harvs = _enemies_of_type(rs, {"harv"})
    if harvs:
        tid = harvs[0].get("id")
        if tid is not None:
            return [Command.attack_unit(raiders, str(tid))]
    # No defenders / harvs in sight โ€” attack-move east into the cluster.
    return [Command.attack_move([rid], 80, 20) for rid in raiders]


@pytest.mark.parametrize("level", ["easy", "medium", "hard"])
@pytest.mark.parametrize("seed", [1, 2, 3, 4])
def test_intended_focus_defender_wins(level, seed):
    pytest.importorskip("openra_train")
    from openra_bench.eval_core import run_level

    c = compile_level(load_pack(PACK_PATH), level)
    r = run_level(c, _intended, seed=seed)
    assert r.outcome == "win", (
        f"{level} seed={seed}: intended focus-defender-then-harv should "
        f"WIN, got {r.outcome} after {r.turns} turns "
        f"(kills={r.signals.units_killed}, losses={r.signals.units_lost})"
    )


@pytest.mark.parametrize("level", ["easy", "medium", "hard"])
@pytest.mark.parametrize("seed", [1, 2, 3, 4])
def test_stall_loses(level, seed):
    pytest.importorskip("openra_train")
    from openra_bench.eval_core import run_level

    c = compile_level(load_pack(PACK_PATH), level)
    r = run_level(c, _stall, seed=seed)
    assert r.outcome == "loss", (
        f"{level} seed={seed}: stall must be a real timeout LOSS "
        f"(no engagement โ†’ kill bar unmet), got {r.outcome}"
    )


@pytest.mark.parametrize("level", ["easy", "medium", "hard"])
@pytest.mark.parametrize("seed", [1, 2, 3, 4])
def test_retreat_only_loses(level, seed):
    """Pure retreat (drive all raiders back west) must LOSE on every
    tier โ€” the AGGRO doctrine specifically penalises survival-only
    play. The defender holds its post (no bot), so retreat is SAFE
    but scoreless โ†’ after_ticks LOSS."""
    pytest.importorskip("openra_train")
    from openra_bench.eval_core import run_level

    c = compile_level(load_pack(PACK_PATH), level)
    r = run_level(c, _retreat, seed=seed)
    assert r.outcome == "loss", (
        f"{level} seed={seed}: retreat-only must LOSE (no kills โ†’ bar "
        f"unmet), got {r.outcome} (kills={r.signals.units_killed})"
    )


@pytest.mark.parametrize("level", ["medium", "hard"])
@pytest.mark.parametrize("seed", [1, 2, 3, 4])
def test_attack_harv_only_loses(level, seed):
    """Attack-harv-only (ignore the defender, rush harvs) must LOSE
    on medium and hard โ€” the 3tnk picks off the raiders while they
    engage the soft targets. Easy is excluded as the bare-skill tier
    (4 raiders + kill bar 3 is forgiving enough for this brute play
    to squeak by; documented in the pack's design comment, matches
    SCENARIO_REVIEW_CHECKLIST.md note that inert anti-cheat teeth
    are acceptable on easy)."""
    pytest.importorskip("openra_train")
    from openra_bench.eval_core import run_level

    c = compile_level(load_pack(PACK_PATH), level)
    r = run_level(c, _attack_harv_only, seed=seed)
    assert r.outcome == "loss", (
        f"{level} seed={seed}: attack-harv-only must LOSE (defender "
        f"picks off raiders), got {r.outcome} "
        f"(kills={r.signals.units_killed}, losses={r.signals.units_lost})"
    )