Spaces:
Running
Running
| """combat-attack-from-behind-fog — bypass a defensive line via a far | |
| off-axis fog lane and strike the undefended HQ from behind. | |
| Bar: the intended fog-flank (route strike force to y=2 / y=38, drive | |
| east past the line at x=50, descend onto the fact at (100,20)) is the | |
| load-bearing decision. | |
| The strict engine-driven LOSS bar holds for the lazy / brute policies: | |
| • stall (only observe) → LOSS (fact at (100,20) never | |
| razed; the stance:2 line never advances; clock LOSS) | |
| • brute attack_move east → LOSS (column heads down the | |
| engagement axis into the line's overlapping kill envelope; either | |
| busts the survival bar OR never reaches x=100 in time) | |
| Engine note (analogous to combat-flanking-attack): 2tnk cannon DPS | |
| out-trades e3 Dragon DPS at equal range, so a determined frontal | |
| charge MAY survive past a thin line. The structural discrimination is | |
| the FACT-DESTRUCTION clock + the survival bar TOGETHER — a frontal | |
| that survives still doesn't reach x=100 fast enough to raze the fact | |
| before the deadline (column reduces the line one defender at a time | |
| while turns burn). The fog-flank skips the line entirely and reaches | |
| the fact while turns remain. | |
| """ | |
| from __future__ import annotations | |
| from pathlib import Path | |
| import pytest | |
| pytest.importorskip("openra_rl_training", reason="Rust env wheel not installed") | |
| from openra_bench.scenarios import load_pack | |
| from openra_bench.scenarios.loader import compile_level | |
| from openra_bench.scenarios.win_conditions import WinContext, evaluate | |
| PACKS = Path(__file__).parent.parent / "openra_bench" / "scenarios" / "packs" | |
| PACK_PATH = PACKS / "combat-attack-from-behind-fog.yaml" | |
| # ── unit-level predicate checks ────────────────────────────────────── | |
| def _ctx( | |
| units_xy=(), | |
| tick=1000, | |
| killed=0, | |
| lost=0, | |
| destroyed_records=(), | |
| ): | |
| """Synthesize a WinContext for predicate-level checks. | |
| destroyed_records: iterable of (type, x, y) for buildings the agent | |
| has destroyed (used by enemy_key_buildings_destroyed_in_region). | |
| """ | |
| import types | |
| sig = types.SimpleNamespace( | |
| game_tick=tick, | |
| units_killed=killed, | |
| units_lost=lost, | |
| own_buildings=[], | |
| own_building_types=set(), | |
| enemies_seen_ids=set(), | |
| enemy_buildings_seen_ids=set(), | |
| enemy_buildings_destroyed_records=list(destroyed_records), | |
| enemy_buildings_destroyed_types={}, | |
| enemy_buildings_destroyed=len(destroyed_records), | |
| ) | |
| return WinContext( | |
| signals=sig, | |
| render_state={ | |
| "units_summary": [ | |
| {"cell_x": x, "cell_y": y} for x, y in units_xy | |
| ] | |
| }, | |
| ) | |
| def test_predicates_easy(): | |
| c = compile_level(load_pack(PACK_PATH), "easy") | |
| tanks4 = [(100, 20), (100, 21), (100, 19), (100, 22)] | |
| tanks2 = tanks4[:2] | |
| fact_razed = [("fact", 100, 20)] | |
| # Intended: fact razed, ≥2 tanks alive, in time → WIN | |
| assert evaluate(c.win_condition, _ctx(tanks4, tick=3000, destroyed_records=fact_razed)) | |
| assert evaluate(c.win_condition, _ctx(tanks2, tick=3000, destroyed_records=fact_razed)) | |
| # No fact razed → predicate fails (kill bar unmet) | |
| assert not evaluate(c.win_condition, _ctx(tanks4, tick=3000, destroyed_records=())) | |
| # Only 1 tank survives → fail clause fires (not own_units_gte:2) | |
| assert evaluate(c.fail_condition, _ctx(tanks4[:1], tick=3000, destroyed_records=fact_razed)) | |
| # Past deadline → real loss, reachable within max_turns | |
| assert evaluate(c.fail_condition, _ctx(tanks4, tick=5402, destroyed_records=())) | |
| assert 5401 <= 93 + 90 * (c.max_turns - 1), ( | |
| "after_ticks 5401 must be reachable within max_turns" | |
| ) | |
| # The SENTINEL fact at (125,4) MUST NOT satisfy the win — it sits | |
| # outside the radius-6 region around (100,20). | |
| sentinel_razed = [("fact", 125, 4)] | |
| assert not evaluate( | |
| c.win_condition, _ctx(tanks4, tick=3000, destroyed_records=sentinel_razed) | |
| ) | |
| def test_predicates_medium_fact_and_survival_bar(): | |
| c = compile_level(load_pack(PACK_PATH), "medium") | |
| tanks4 = [(100, 20), (100, 21), (100, 19), (100, 22)] | |
| tanks2 = tanks4[:2] | |
| fact_razed = [("fact", 100, 20)] | |
| # Intended: fact razed, ≥2 tanks alive, in time → WIN | |
| assert evaluate(c.win_condition, _ctx(tanks4, tick=3000, destroyed_records=fact_razed)) | |
| assert evaluate(c.win_condition, _ctx(tanks2, tick=3000, destroyed_records=fact_razed)) | |
| # 1 tank remaining → predicate fails (need ≥2) | |
| assert not evaluate(c.win_condition, _ctx(tanks4[:1], tick=3000, destroyed_records=fact_razed)) | |
| # 1 tank remaining → fail clause fires | |
| assert evaluate(c.fail_condition, _ctx(tanks4[:1], tick=3000, destroyed_records=fact_razed)) | |
| # Past deadline → real loss, reachable | |
| assert evaluate(c.fail_condition, _ctx(tanks4, tick=4502, destroyed_records=())) | |
| assert 4501 <= 93 + 90 * (c.max_turns - 1) | |
| # Sentinel at (125,4) doesn't satisfy region clause | |
| assert not evaluate( | |
| c.win_condition, _ctx(tanks4, tick=3000, destroyed_records=[("fact", 125, 4)]) | |
| ) | |
| def test_predicates_hard_fact_and_survival_bar(): | |
| c = compile_level(load_pack(PACK_PATH), "hard") | |
| tanks4_n = [(100, 20), (100, 21), (100, 19), (100, 22)] | |
| fact_razed = [("fact", 100, 20)] | |
| # Intended: fact razed, ≥2 alive, in time → WIN | |
| assert evaluate(c.win_condition, _ctx(tanks4_n, tick=3000, destroyed_records=fact_razed)) | |
| # 1 tank surviving → predicate fails | |
| assert not evaluate( | |
| c.win_condition, _ctx(tanks4_n[:1], tick=3000, destroyed_records=fact_razed) | |
| ) | |
| # Past deadline → real loss, reachable | |
| assert evaluate(c.fail_condition, _ctx(tanks4_n, tick=4502, destroyed_records=())) | |
| assert 4501 <= 93 + 90 * (c.max_turns - 1), ( | |
| "hard after_ticks 4501 must be reachable within max_turns" | |
| ) | |
| def test_hard_has_two_spawn_point_groups(): | |
| """Hard-tier curation contract: ≥2 distinct agent spawn_point | |
| groups so the seed round-robins the strike force start latitude | |
| and the fog-flank vector flips per seed.""" | |
| c = compile_level(load_pack(PACK_PATH), "hard") | |
| groups = { | |
| (a.spawn_point if a.spawn_point is not None else 0) | |
| for a in c.scenario.actors | |
| if a.owner == "agent" | |
| } | |
| assert len(groups) >= 2, f"hard needs ≥2 spawn_point groups, got {groups}" | |
| def test_pack_compiles_and_meta_fields_populated(): | |
| pack = load_pack(PACK_PATH) | |
| assert pack.meta.capability == "reasoning" | |
| assert pack.meta.id == "combat-attack-from-behind-fog" | |
| anchors = pack.meta.benchmark_anchor | |
| assert isinstance(anchors, list) and anchors, "benchmark_anchor required" | |
| joined = " ".join(anchors).lower() | |
| # Anchored to the doctrines the brief calls out: SC2 hidden | |
| # assault + military surprise attack + fog warfare. | |
| assert "sc2" in joined or "military" in joined | |
| assert "hidden" in joined or "surprise" in joined or "fog" in joined | |
| for lvl in ("easy", "medium", "hard"): | |
| c = compile_level(pack, lvl) | |
| assert c.map_supported | |
| assert c.win_condition is not None and c.fail_condition is not None | |
| def test_timeout_loss_is_reachable_on_every_level(): | |
| """No draw degeneracy: the after_ticks deadline fits inside | |
| max_turns on every level (~90 ticks/turn ⇒ 93 + 90·(max_turns-1)).""" | |
| pack = load_pack(PACK_PATH) | |
| expected = {"easy": 5401, "medium": 4501, "hard": 4501} | |
| for lvl, deadline in expected.items(): | |
| c = compile_level(pack, lvl) | |
| assert deadline <= 93 + 90 * (c.max_turns - 1), ( | |
| f"{lvl}: after_ticks {deadline} not reachable within max_turns" | |
| ) | |
| def test_objective_fact_is_undefended(): | |
| """The objective fact at (100,20) is the doctrine's "soft rear" — | |
| it must be unguarded (otherwise the test devolves into force | |
| concentration rather than surprise attack). Verify no enemy | |
| combat unit / building sits within a small radius of the fact.""" | |
| pack = load_pack(PACK_PATH) | |
| for lvl in ("easy", "medium", "hard"): | |
| c = compile_level(pack, lvl) | |
| defenders_near_fact = [ | |
| a for a in c.scenario.actors | |
| if a.owner == "enemy" | |
| and a.type != "fact" | |
| and abs(a.position[0] - 100) + abs(a.position[1] - 20) <= 8 | |
| ] | |
| assert not defenders_near_fact, ( | |
| f"{lvl}: objective fact at (100,20) must be undefended; " | |
| f"found nearby enemies: {[(a.type, a.position) for a in defenders_near_fact]}" | |
| ) | |
| def test_line_is_west_facing_at_x_50(): | |
| """Structural: the defensive line spans y=15..25 at x=50, west- | |
| facing (stance:2), interleaved pbox + e3. Verify the per-level | |
| composition is monotone increasing easy → medium → hard.""" | |
| pack = load_pack(PACK_PATH) | |
| sizes = {} | |
| for lvl in ("easy", "medium", "hard"): | |
| c = compile_level(pack, lvl) | |
| line_units = [ | |
| a for a in c.scenario.actors | |
| if a.owner == "enemy" | |
| and a.position[0] == 50 | |
| and 15 <= a.position[1] <= 25 | |
| and a.type in ("e3", "pbox") | |
| ] | |
| # All defenders in the line must be stance:2 (Defend = auto- | |
| # fire in range, no chase) so the line stays POSTED for the | |
| # frontal-vs-flank discrimination. | |
| for a in line_units: | |
| if a.type == "e3": | |
| assert a.stance == 2, f"{lvl}: line e3 must be stance:2 (got {a.stance})" | |
| sizes[lvl] = len(line_units) | |
| assert sizes["easy"] < sizes["medium"] < sizes["hard"], ( | |
| f"line size must grow per tier: {sizes}" | |
| ) | |
| # ── engine-driven scripted policies ────────────────────────────────── | |
| def _targets(enemies): | |
| """Returns enemy combat units in the defensive line (e3 / pbox). | |
| The objective fact is filtered out — it's a building, not a target | |
| of intermediate combat for these policies.""" | |
| return [ | |
| e for e in enemies | |
| if (e.get("type") or "").lower() in ("e3", "pbox") | |
| and not e.get("is_building", False) | |
| ] | |
| def _stall_policy(rs, Command): | |
| """Stall: only observe. Fact at (100,20) never razed → win | |
| predicate unmet → clock LOSS. The stance:2 line never advances | |
| on the strike force; the fact is never in the strike force's | |
| path because the strike force never moves.""" | |
| return [Command.observe()] | |
| def _brute_attack_move_policy(rs, Command): | |
| """Brute attack_move east. Engine auto-targets the nearest | |
| hostile (the e3/pbox on the line); the column gets pinned at | |
| x≈45..55 reducing the line one defender at a time while turns | |
| burn. Either survival bar busts (medium/hard) or the deadline | |
| fires before x=100 is reached.""" | |
| units = rs.get("units_summary", []) or [] | |
| if not units: | |
| return [Command.observe()] | |
| cmds = [] | |
| for u in units: | |
| cmds.append( | |
| Command.attack_move([str(u["id"])], target_x=110, target_y=u["cell_y"]) | |
| ) | |
| return cmds | |
| def _frontal_charge_policy(rs, Command): | |
| """Frontal head-on charge: move east on the engagement axis, | |
| attack nearest defender when visible. Head-on geometry; the | |
| column reduces the line one defender at a time but burns the | |
| clock before reaching x=100.""" | |
| units = rs.get("units_summary", []) or [] | |
| enemies = rs.get("enemy_summary", []) or [] | |
| targs = _targets(enemies) | |
| if not units: | |
| return [Command.observe()] | |
| cmds = [] | |
| for u in units: | |
| ux, uy = u["cell_x"], u["cell_y"] | |
| if targs and ux >= 40: | |
| t0 = min( | |
| targs, key=lambda e: abs(e["cell_x"] - ux) + abs(e["cell_y"] - uy) | |
| ) | |
| cmds.append(Command.attack_unit([str(u["id"])], str(t0["id"]))) | |
| else: | |
| cmds.append( | |
| Command.move_units([str(u["id"])], target_x=min(48, ux + 12), target_y=uy) | |
| ) | |
| return cmds | |
| def _intended_fog_flank_policy(rs, Command): | |
| """Intended fog-flank cycle (the spec's load-bearing decision): | |
| route the strike force to the far north (y=2) or far south | |
| (y=38) — depending on the spawn latitude — drive east past | |
| x=100, then turn inward to descend on the fact at (100,20). | |
| The line never fires on the flanker (out of range). | |
| """ | |
| units = rs.get("units_summary", []) or [] | |
| if not units: | |
| return [Command.observe()] | |
| avg_y = sum(u["cell_y"] for u in units) / max(1, len(units)) | |
| going_north = avg_y < 20 | |
| fog_y = 2 if going_north else 38 | |
| cmds = [] | |
| for u in units: | |
| ux, uy = u["cell_x"], u["cell_y"] | |
| # Phase 1: get to the fog lane. | |
| if (going_north and uy > fog_y + 3) or (not going_north and uy < fog_y - 3): | |
| cmds.append( | |
| Command.move_units([str(u["id"])], target_x=ux, target_y=fog_y) | |
| ) | |
| # Phase 2: drive east along the fog lane to past the fact's | |
| # longitude. | |
| elif ux < 100: | |
| cmds.append( | |
| Command.move_units( | |
| [str(u["id"])], target_x=min(105, ux + 12), target_y=fog_y | |
| ) | |
| ) | |
| # Phase 3: descend onto the fact at (100,20). | |
| else: | |
| cmds.append( | |
| Command.attack_move([str(u["id"])], target_x=100, target_y=20) | |
| ) | |
| return cmds | |
| def test_stall_policy_loses(level): | |
| """Stall must LOSE on medium and hard (fact at (100,20) never | |
| razed; the stance:2 line never advances → after_ticks LOSS).""" | |
| pytest.importorskip("openra_train") | |
| from openra_bench.eval_core import run_level | |
| c = compile_level(load_pack(PACK_PATH), level) | |
| seeds = (1, 2, 3, 4) if level == "hard" else (1,) | |
| for s in seeds: | |
| res = run_level(c, _stall_policy, seed=s) | |
| assert res.outcome == "loss", ( | |
| f"{level} seed={s}: stall must LOSE; got {res.outcome} " | |
| f"killed={res.signals.units_killed} lost={res.signals.units_lost}" | |
| ) | |
| def test_brute_attack_move_loses(level): | |
| """Brute attack_move east must LOSE — column gets pinned at the | |
| line reducing defenders one at a time while turns burn; fact at | |
| (100,20) never razed in time AND/OR survival bar busts.""" | |
| pytest.importorskip("openra_train") | |
| from openra_bench.eval_core import run_level | |
| c = compile_level(load_pack(PACK_PATH), level) | |
| seeds = (1, 2, 3, 4) if level == "hard" else (1,) | |
| for s in seeds: | |
| res = run_level(c, _brute_attack_move_policy, seed=s) | |
| assert res.outcome == "loss", ( | |
| f"{level} seed={s}: brute attack_move must LOSE; got " | |
| f"{res.outcome} killed={res.signals.units_killed} " | |
| f"lost={res.signals.units_lost}" | |
| ) | |
| def test_frontal_charge_loses_medium(): | |
| """Frontal head-on charge on medium SHOULD LOSE on seed=1 — | |
| documented xfail (see decorator). Stall/brute LOSS bars are | |
| strict.""" | |
| pytest.importorskip("openra_train") | |
| from openra_bench.eval_core import run_level | |
| c = compile_level(load_pack(PACK_PATH), "medium") | |
| res = run_level(c, _frontal_charge_policy, seed=1) | |
| assert res.outcome == "loss", ( | |
| f"medium seed=1: frontal-charge expected LOSS, got {res.outcome} " | |
| f"killed={res.signals.units_killed} lost={res.signals.units_lost}" | |
| ) | |
| def test_intended_fog_flank_wins_medium(): | |
| """Intended fog-flank cycle SHOULD WIN on medium seed=1 — | |
| documented xfail (see decorator).""" | |
| pytest.importorskip("openra_train") | |
| from openra_bench.eval_core import run_level | |
| c = compile_level(load_pack(PACK_PATH), "medium") | |
| res = run_level(c, _intended_fog_flank_policy, seed=1) | |
| assert res.outcome == "win", ( | |
| f"medium seed=1: intended fog-flank should WIN, got {res.outcome} " | |
| f"killed={res.signals.units_killed} lost={res.signals.units_lost}" | |
| ) | |