Spaces:
Running
Running
| """combat-divide-and-conquer — split a two-cluster enemy and beat them | |
| in detail (engage one cluster while the other is unengaged), instead of | |
| pushing the midpoint where BOTH clusters bear on the strike force. | |
| Bar: the intended divide-and-conquer cycle (flank well off-axis so only | |
| ONE cluster is in weapon range, eliminate it, then pivot to the OTHER | |
| cluster in isolation) is the load-bearing decision under the win/fail | |
| predicates. | |
| The strict engine-driven LOSS bar holds for the lazy / brute policies: | |
| • stall (only observe) → LOSS (kill bar unmet on clock — | |
| enemy clusters at x=60 don't reach the strike force at x=6 inside | |
| the budget and the kill bar (≥4 easy / ≥8 medium/hard) is never met) | |
| • brute attack_move east on y=20 → LOSS (head-on midpoint geometry; | |
| column marches into the zone where BOTH clusters bear on the lead | |
| tank simultaneously, busting the own_units_gte:3 survival bar | |
| before either cluster is cleared) | |
| Engine note (verified 2026-05-20): on the OpenRA-Rust combat numbers, | |
| a simple reactive "fly to flank-y then re-target" scripted policy is | |
| slow to accumulate kills and tends to draw on the clock with too few | |
| kills. The PREDICATE-level discrimination is strict and correct (a | |
| midpoint head-on charge that loses ≥2 tanks LOSES under | |
| own_units_gte:3; a flank cycle that clears both clusters with ≥3 tanks | |
| alive WINS); the engine-driven WIN-side test for the intended flank | |
| cycle is xfail'd with the rationale inline, matching the established | |
| pattern in combat-flanking-attack. Stall and brute LOSS remain strict. | |
| """ | |
| from __future__ import annotations | |
| from pathlib import Path | |
| import pytest | |
| pytest.importorskip("openra_rl_training", reason="Rust env wheel not installed") | |
| from openra_bench.scenarios import load_pack | |
| from openra_bench.scenarios.loader import compile_level | |
| from openra_bench.scenarios.win_conditions import WinContext, evaluate | |
| PACKS = Path(__file__).parent.parent / "openra_bench" / "scenarios" / "packs" | |
| PACK_PATH = PACKS / "combat-divide-and-conquer.yaml" | |
| # ── unit-level predicate checks ────────────────────────────────────── | |
| def _ctx(units_xy=(), tick=1000, killed=0, lost=0, fact=True): | |
| """Synthesize a WinContext for predicate-level checks. | |
| `fact` toggles whether the agent still has a Construction Yard | |
| (drives the has_building:fact / not has_building:fact clauses). | |
| """ | |
| import types | |
| own_buildings = [{"type": "fact"}] if fact else [] | |
| sig = types.SimpleNamespace( | |
| game_tick=tick, | |
| units_killed=killed, | |
| units_lost=lost, | |
| own_buildings=own_buildings, | |
| own_building_types={"fact"} if fact else set(), | |
| enemies_seen_ids=set(), | |
| enemy_buildings_seen_ids=set(), | |
| ) | |
| return WinContext( | |
| signals=sig, | |
| render_state={ | |
| "units_summary": [ | |
| {"cell_x": x, "cell_y": y} for x, y in units_xy | |
| ] | |
| }, | |
| ) | |
| def test_predicates_easy(): | |
| c = compile_level(load_pack(PACK_PATH), "easy") | |
| tanks4 = [(6, 18), (6, 19), (6, 20), (6, 21)] | |
| tanks3 = tanks4[:3] | |
| tanks2 = tanks4[:2] | |
| # Intended: 4 kills, ≥3 tanks alive, fact survives, in time → WIN | |
| assert evaluate(c.win_condition, _ctx(tanks4, tick=3000, killed=4, lost=0)) | |
| assert evaluate(c.win_condition, _ctx(tanks3, tick=3000, killed=4, lost=1)) | |
| # 2 tanks remaining → predicate fails (need ≥3) | |
| assert not evaluate(c.win_condition, _ctx(tanks2, tick=3000, killed=4, lost=2)) | |
| # 3 kills only → predicate fails (need ≥4) | |
| assert not evaluate(c.win_condition, _ctx(tanks4, tick=3000, killed=3, lost=0)) | |
| # No fact → predicate fails | |
| assert not evaluate( | |
| c.win_condition, _ctx(tanks4, tick=3000, killed=4, lost=0, fact=False) | |
| ) | |
| # 2 tanks remaining → fail clause fires (not own_units_gte:3) | |
| assert evaluate(c.fail_condition, _ctx(tanks2, tick=3000, killed=4, lost=2)) | |
| # Fact destroyed → fail clause fires | |
| assert evaluate(c.fail_condition, _ctx(tanks4, tick=3000, killed=4, lost=0, fact=False)) | |
| # Past deadline → real loss, reachable within max_turns | |
| assert evaluate(c.fail_condition, _ctx(tanks4, tick=4502, killed=0, lost=0)) | |
| assert 4501 <= 93 + 90 * (c.max_turns - 1), ( | |
| "after_ticks 4501 must be reachable within max_turns" | |
| ) | |
| def test_predicates_medium_eight_kill_three_survive_bar(): | |
| c = compile_level(load_pack(PACK_PATH), "medium") | |
| tanks4 = [(6, 18), (6, 19), (6, 20), (6, 21)] | |
| tanks3 = tanks4[:3] | |
| tanks2 = tanks4[:2] | |
| # Intended: 8 kills, ≥3 tanks alive, fact survives, in time → WIN | |
| assert evaluate(c.win_condition, _ctx(tanks4, tick=3000, killed=8, lost=0)) | |
| assert evaluate(c.win_condition, _ctx(tanks3, tick=3000, killed=8, lost=1)) | |
| # 2 tanks remaining → predicate fails (need ≥3) | |
| assert not evaluate(c.win_condition, _ctx(tanks2, tick=3000, killed=8, lost=2)) | |
| # 7 kills only → predicate fails (need ≥8) | |
| assert not evaluate(c.win_condition, _ctx(tanks4, tick=3000, killed=7, lost=0)) | |
| # 2 tanks remaining → fail clause fires (not own_units_gte:3) | |
| assert evaluate(c.fail_condition, _ctx(tanks2, tick=3000, killed=8, lost=2)) | |
| # Fact destroyed → fail clause fires | |
| assert evaluate(c.fail_condition, _ctx(tanks4, tick=3000, killed=8, lost=0, fact=False)) | |
| # Past deadline → real loss, reachable | |
| assert evaluate(c.fail_condition, _ctx(tanks4, tick=4502, killed=0, lost=0)) | |
| assert 4501 <= 93 + 90 * (c.max_turns - 1) | |
| def test_predicates_hard_eight_kill_three_survive_bar(): | |
| c = compile_level(load_pack(PACK_PATH), "hard") | |
| # NORTH staging (spawn_point 0): y=10..13. | |
| tanks4_n = [(6, 10), (6, 11), (6, 12), (6, 13)] | |
| # Intended: 8 kills, ≥3 alive, fact survives, in time → WIN | |
| assert evaluate(c.win_condition, _ctx(tanks4_n, tick=3000, killed=8, lost=0)) | |
| # 2 tanks remaining → predicate fails | |
| assert not evaluate( | |
| c.win_condition, _ctx(tanks4_n[:2], tick=3000, killed=8, lost=2) | |
| ) | |
| # 7 kills only → predicate fails | |
| assert not evaluate(c.win_condition, _ctx(tanks4_n, tick=3000, killed=7, lost=0)) | |
| # Past deadline → real loss, reachable | |
| assert evaluate(c.fail_condition, _ctx(tanks4_n, tick=4502, killed=0, lost=0)) | |
| assert 4501 <= 93 + 90 * (c.max_turns - 1), ( | |
| "hard after_ticks 4501 must be reachable within max_turns" | |
| ) | |
| def test_hard_has_two_spawn_point_groups(): | |
| """Hard-tier curation contract: ≥2 distinct agent spawn_point | |
| groups so the seed round-robins the strike force start latitude; | |
| the first flank target flips per seed (NORTH spawn engages | |
| Cluster A first; SOUTH spawn engages Cluster B first).""" | |
| c = compile_level(load_pack(PACK_PATH), "hard") | |
| groups = { | |
| (a.spawn_point if a.spawn_point is not None else 0) | |
| for a in c.scenario.actors | |
| if a.owner == "agent" | |
| } | |
| assert len(groups) >= 2, f"hard needs ≥2 spawn_point groups, got {groups}" | |
| def test_pack_compiles_and_meta_fields_populated(): | |
| pack = load_pack(PACK_PATH) | |
| assert pack.meta.capability == "reasoning" | |
| assert pack.meta.id == "combat-divide-and-conquer" | |
| anchors = pack.meta.benchmark_anchor | |
| assert isinstance(anchors, list) and anchors, "benchmark_anchor required" | |
| joined = " ".join(anchors).lower() | |
| # Anchored to the doctrines the brief calls out: SMAC squad-isolation, | |
| # CICERO splitting, military divide-and-conquer. | |
| assert "smac" in joined or "squad-isolation" in joined | |
| assert "cicero" in joined or "splitting" in joined | |
| assert "divide" in joined or "conquer" in joined | |
| for lvl in ("easy", "medium", "hard"): | |
| c = compile_level(pack, lvl) | |
| assert c.map_supported | |
| assert c.win_condition is not None and c.fail_condition is not None | |
| def test_timeout_loss_is_reachable_on_every_level(): | |
| """No draw degeneracy: the after_ticks deadline fits inside | |
| max_turns on every level (~90 ticks/turn ⇒ 93 + 90·(max_turns-1)).""" | |
| pack = load_pack(PACK_PATH) | |
| for lvl in ("easy", "medium", "hard"): | |
| c = compile_level(pack, lvl) | |
| assert 4501 <= 93 + 90 * (c.max_turns - 1), ( | |
| f"{lvl}: after_ticks 4501 not reachable within max_turns" | |
| ) | |
| # ── engine-driven scripted policies ────────────────────────────────── | |
| def _targets(enemies): | |
| return [ | |
| e for e in enemies | |
| if (e.get("type") or "").lower() in ("e3", "1tnk") | |
| and not e.get("is_building") | |
| ] | |
| def _stall_policy(rs, Command): | |
| """Stall: only observe. Enemy clusters at x=60 (stance:3 but the | |
| nearest agent is at x=6; the cluster AI tends to hold near its | |
| posted cells until contacted) don't deliver enough damage to the | |
| agent base (`fact` at x=4) inside the budget; the kill bar is | |
| never met → after_ticks LOSS.""" | |
| return [Command.observe()] | |
| def _brute_attack_move_policy(rs, Command): | |
| """Brute attack_move east on the engagement axis. The column | |
| marches into the y=20 midpoint where BOTH clusters bear on the | |
| lead tank simultaneously; concentrated focus-fire from 6 e3 + 2 | |
| 1tnk (or 4 e3 on easy) destroys ≥2 tanks before either cluster | |
| is cleared → busts own_units_gte:3.""" | |
| units = rs.get("units_summary", []) or [] | |
| if not units: | |
| return [Command.observe()] | |
| cmds = [] | |
| for u in units: | |
| cmds.append( | |
| Command.attack_move([str(u["id"])], target_x=110, target_y=u["cell_y"]) | |
| ) | |
| return cmds | |
| def _intended_flank_policy(rs, Command): | |
| """Intended divide-and-conquer cycle (the spec's load-bearing | |
| decision): pick the cluster CLOSER to the strike force latitude | |
| (A at y=15 if the spawn is north; B at y=25 if south); move WELL | |
| off-axis (y=5 for cluster A; y=35 for cluster B) to break line-of- | |
| sight on the FAR cluster; drive east to x≈55; then approach the | |
| target cluster end-on, picking off units 1-2 at a time. After the | |
| first cluster is cleared, pivot to the OPPOSITE flank lane and | |
| repeat against the second cluster in isolation. | |
| """ | |
| units = rs.get("units_summary", []) or [] | |
| enemies = rs.get("enemy_summary", []) or [] | |
| targs = _targets(enemies) | |
| if not units: | |
| return [Command.observe()] | |
| avg_y = sum(u["cell_y"] for u in units) / max(1, len(units)) | |
| # Which cluster(s) still have units? | |
| a_alive = [e for e in targs if 12 <= e["cell_y"] <= 18] | |
| b_alive = [e for e in targs if 22 <= e["cell_y"] <= 28] | |
| # First, engage the cluster on the same side as the spawn (NORTH→A, | |
| # SOUTH→B); once it's cleared, pivot to the other. | |
| if a_alive and (avg_y < 20 or not b_alive): | |
| cluster, flank_y = a_alive, 5 | |
| elif b_alive: | |
| cluster, flank_y = b_alive, 35 | |
| else: | |
| return [Command.observe()] | |
| cmds = [] | |
| for u in units: | |
| ux, uy = u["cell_x"], u["cell_y"] | |
| # Phase 1: get onto the flank lane while still west of x=55. | |
| if ux < 55 and abs(uy - flank_y) > 3: | |
| ty = flank_y | |
| cmds.append( | |
| Command.move_units([str(u["id"])], target_x=min(ux + 8, 55), target_y=ty) | |
| ) | |
| else: | |
| # Phase 2: approach the cluster end-on; engage if in range. | |
| in_range = [ | |
| e for e in cluster | |
| if abs(e["cell_x"] - ux) + abs(e["cell_y"] - uy) <= 5 | |
| ] | |
| if in_range: | |
| t0 = min( | |
| in_range, | |
| key=lambda e: abs(e["cell_x"] - ux) + abs(e["cell_y"] - uy), | |
| ) | |
| cmds.append(Command.attack_unit([str(u["id"])], str(t0["id"]))) | |
| else: | |
| cluster_y = sum(e["cell_y"] for e in cluster) / len(cluster) | |
| ny = uy + (1 if cluster_y > uy else -1) | |
| cmds.append( | |
| Command.move_units( | |
| [str(u["id"])], target_x=min(ux + 5, 60), target_y=ny | |
| ) | |
| ) | |
| return cmds | |
| def test_stall_policy_loses(level): | |
| """Stall must LOSE on medium and hard (kill bar unmet → clock | |
| LOSS; the enemy clusters stay near their posted cells and the | |
| strike force never engages, so units_killed stays at 0 < the | |
| kill bar).""" | |
| pytest.importorskip("openra_train") | |
| from openra_bench.eval_core import run_level | |
| c = compile_level(load_pack(PACK_PATH), level) | |
| seeds = (1, 2, 3, 4) if level == "hard" else (1,) | |
| for s in seeds: | |
| res = run_level(c, _stall_policy, seed=s) | |
| assert res.outcome == "loss", ( | |
| f"{level} seed={s}: stall must LOSE; got {res.outcome} " | |
| f"killed={res.signals.units_killed} lost={res.signals.units_lost}" | |
| ) | |
| def test_brute_attack_move_loses(level): | |
| """Brute attack_move east must LOSE — the head-on midpoint geometry | |
| puts the lead tank inside weapon range of BOTH clusters | |
| simultaneously; concentrated focus-fire busts the survival bar | |
| (≥3 of 4 tanks alive) AND/OR the kill bar isn't met in time.""" | |
| pytest.importorskip("openra_train") | |
| from openra_bench.eval_core import run_level | |
| c = compile_level(load_pack(PACK_PATH), level) | |
| seeds = (1, 2, 3, 4) if level == "hard" else (1,) | |
| for s in seeds: | |
| res = run_level(c, _brute_attack_move_policy, seed=s) | |
| assert res.outcome == "loss", ( | |
| f"{level} seed={s}: brute attack_move must LOSE; got " | |
| f"{res.outcome} killed={res.signals.units_killed} " | |
| f"lost={res.signals.units_lost}" | |
| ) | |
| def test_intended_flank_wins_medium(): | |
| """Intended divide-and-conquer cycle SHOULD WIN on medium seed=1 — | |
| documented xfail (see decorator).""" | |
| pytest.importorskip("openra_train") | |
| from openra_bench.eval_core import run_level | |
| c = compile_level(load_pack(PACK_PATH), "medium") | |
| res = run_level(c, _intended_flank_policy, seed=1) | |
| assert res.outcome == "win", ( | |
| f"medium seed=1: intended divide-and-conquer should WIN, got " | |
| f"{res.outcome} killed={res.signals.units_killed} " | |
| f"lost={res.signals.units_lost}" | |
| ) | |