Spaces:
Running
Running
| """Art-of-War long-horizon family: delayed terminal credit. | |
| Asserts the *ordering* property that makes these long-horizon (not the | |
| agent behaviour, which is the model's job): arriving at the objective | |
| early must NOT win when a prerequisite hold (after_ticks) is unmet โ | |
| the enabling phase is unrewarded, credit lands only at the end. Plus | |
| all 12 levels compile and run on the live engine. | |
| """ | |
| from __future__ import annotations | |
| from pathlib import Path | |
| import pytest | |
| pytest.importorskip("openra_rl_training", reason="Rust env wheel not installed") | |
| from openra_bench.scenarios import load_pack | |
| from openra_bench.scenarios.loader import compile_level | |
| from openra_bench.scenarios.win_conditions import WinContext, evaluate | |
| PACKS = Path(__file__).parent.parent / "openra_bench" / "scenarios" / "packs" | |
| FAMILY = [ | |
| # artofwar-decoy-sacrifice was archived (see | |
| # openra_bench/scenarios/packs/_archive/); the sister packs cover | |
| # the decoy/sacrifice idiom in the active set. | |
| "artofwar-indirect-approach", | |
| "artofwar-lure-the-tiger", | |
| "artofwar-sequenced-citadel", | |
| ] | |
| def _ctx(units_xy, tick, lost=0): | |
| return WinContext( | |
| signals=type("S", (), {"game_tick": tick, "units_lost": lost})(), | |
| render_state={ | |
| "units_summary": [ | |
| {"cell_x": x, "cell_y": y} for x, y in units_xy | |
| ] | |
| }, | |
| ) | |
| def _seq_ctx(sig, units_xy): | |
| """A ctx sharing one persistent signals (so waypoint_sequence's | |
| latch carries across calls, like a real episode).""" | |
| return WinContext( | |
| signals=sig, | |
| render_state={"units_summary": [ | |
| {"cell_x": x, "cell_y": y} for x, y in units_xy | |
| ]}, | |
| ) | |
| def test_sequenced_citadel_enforces_ordered_chain_then_timed_strike(): | |
| """Redesigned: win = waypoint_sequence(AโBโC, ordered/latched) AND | |
| a prerequisite hold (after_ticks) AND a reachable deadline. A | |
| beeline to C that skips A,B can NEVER satisfy the ordered latch; | |
| arriving in order but before the hold doesn't count; the timeout | |
| is a real, reachable LOSS (no draw degeneracy).""" | |
| import types | |
| c = compile_level(load_pack(PACKS / "artofwar-sequenced-citadel.yaml"), | |
| "easy") | |
| A, B, Cc = (35, 20), (70, 12), (110, 20) | |
| # Beeline straight to C, skipping A and B โ the ordered latch never | |
| # advances, so it never wins no matter the tick. | |
| sig = types.SimpleNamespace(game_tick=1500, units_lost=0, seq_progress={}) | |
| for _ in range(4): | |
| assert evaluate(c.win_condition, _seq_ctx(sig, [Cc, Cc, Cc])) is False | |
| # Visit A โ B in order, then arrive at C. Before the hold | |
| # (after_ticks): not yet. Inside the band: win. Past within_ticks: | |
| # no win (too late). | |
| sig = types.SimpleNamespace(game_tick=300, units_lost=0, seq_progress={}) | |
| evaluate(c.win_condition, _seq_ctx(sig, [A, A, A])) # latch A | |
| evaluate(c.win_condition, _seq_ctx(sig, [B, B, B])) # latch B | |
| sig.game_tick = 800 # chain done but before the hold โ no credit | |
| assert evaluate(c.win_condition, _seq_ctx(sig, [Cc, Cc, Cc])) is False | |
| sig.game_tick = 1800 # in [after_ticks, within_ticks] โ win | |
| assert evaluate(c.win_condition, _seq_ctx(sig, [Cc, Cc, Cc])) is True | |
| sig.game_tick = 999999 # past the deadline โ no win | |
| assert evaluate(c.win_condition, _seq_ctx(sig, [Cc, Cc, Cc])) is False | |
| # Timeout is a real LOSS and reachable within max_turns | |
| # (โ 93 + 90ยท(max_turns-1), ~90 ticks/turn). | |
| lose = types.SimpleNamespace(game_tick=2301, units_lost=0, | |
| seq_progress={}) | |
| assert evaluate(c.fail_condition, _seq_ctx(lose, [(6, 20)] * 3)) is True | |
| assert 2301 <= 93 + 90 * (c.max_turns - 1) | |
| def test_indirect_hard_requires_zero_loss_whole_force_arrival(): | |
| c = compile_level(load_pack(PACKS / "artofwar-indirect-approach.yaml"), | |
| "hard") | |
| # Redesigned: far-east objective (112,20); the WHOLE surviving force | |
| # (every unit, โฅ3) must arrive with ZERO losses, in budget. | |
| at_obj = [(112, 20)] * 3 | |
| assert evaluate(c.win_condition, _ctx(at_obj, 4000, lost=0)) is True | |
| # charging the lethal short lane (any loss) fails the hard rung. | |
| assert evaluate(c.win_condition, _ctx(at_obj, 4000, lost=1)) is False | |
| # a stale unit left behind (not all in region) fails all_units_in_region. | |
| assert evaluate( | |
| c.win_condition, _ctx([(112, 20), (112, 20), (40, 20)], 4000, lost=0) | |
| ) is False | |
| # past the deadline (within_ticks 5000) โ no win. | |
| assert evaluate(c.win_condition, _ctx(at_obj, 999999, lost=0)) is False | |
| def test_indirect_easy_short_lane_loss_fails_and_timeout_loses(): | |
| """Easy: loss cap 1; the timeout fail must be reachable in max_turns | |
| (no draw degeneracy) โ i.e. after_ticks <= 93 + 90*(max_turns-1).""" | |
| c = compile_level(load_pack(PACKS / "artofwar-indirect-approach.yaml"), | |
| "easy") | |
| arrived = [(112, 20)] * 3 | |
| assert evaluate(c.win_condition, _ctx(arrived, 1500, lost=0)) is True | |
| assert evaluate(c.win_condition, _ctx(arrived, 1500, lost=1)) is True | |
| # losing >1 (head-on charge) fails the win and trips the fail clause. | |
| assert evaluate(c.win_condition, _ctx(arrived, 1500, lost=2)) is False | |
| assert evaluate(c.fail_condition, _ctx(arrived, 1500, lost=2)) is True | |
| # timeout is a real LOSS, and reachable within max_turns. | |
| assert evaluate(c.fail_condition, _ctx([(6, 20)] * 3, 4001, lost=0)) is True | |
| assert 4001 <= 93 + 90 * (c.max_turns - 1) | |
| # test_decoy_hard_loss_cap_allows_bait_not_army was removed when | |
| # artofwar-decoy-sacrifice was archived (file moved to _archive/ and | |
| # marked status: quarantine). | |
| def test_lure_the_tiger_win_requires_preserved_main_body_in_budget(): | |
| """Redesigned (no-cheat, #4-idiom): far-east objective (112,20), | |
| win = THREE units in region (the main body, not one touring unit) | |
| AND a loss cap (only the two bait jeeps may be spent โ a survivable | |
| head-on grab that burns armour cannot win) AND the deadline. The | |
| lure is physically required (the guard wall is lethal head-on) and | |
| the predicate refuses every lazy/greedy substitute.""" | |
| for lvl, cap, wt in (("easy", 2, 5800), ("medium", 2, 6600), | |
| ("hard", 2, 7400)): | |
| c = compile_level(load_pack(PACKS / "artofwar-lure-the-tiger.yaml"), | |
| lvl) | |
| at_obj = [(112, 20)] * 3 | |
| # intended: 3 tanks landed, only the 2 bait jeeps spent โ win. | |
| assert evaluate(c.win_condition, _ctx(at_obj, wt - 100, lost=cap)) | |
| # one touring unit reaching the region does NOT win (n=3 split, | |
| # not reach_region โฅ1 โ the old cheat). | |
| assert evaluate( | |
| c.win_condition, _ctx([(112, 20)], wt - 100, lost=0) | |
| ) is False | |
| # a survivable head-on grab that burned armour (lost > cap) | |
| # cannot win AND trips the fail clause. | |
| assert evaluate(c.win_condition, _ctx(at_obj, wt - 100, | |
| lost=cap + 1)) is False | |
| assert evaluate(c.fail_condition, _ctx(at_obj, wt - 100, | |
| lost=cap + 1)) is True | |
| # past the deadline โ no win; the timeout is a real LOSS and is | |
| # reachable within max_turns (no draw degeneracy). | |
| assert evaluate(c.win_condition, _ctx(at_obj, 10 ** 7, | |
| lost=0)) is False | |
| assert evaluate(c.fail_condition, _ctx([(6, 20)] * 5, wt + 1, | |
| lost=0)) is True | |
| assert (wt + 1) <= 93 + 90 * (c.max_turns - 1) | |
| def test_lure_the_tiger_hard_seed_varied_two_spawn_groups(): | |
| """Hard-tier contract: โฅ2 spawn_point groups โ seed-varied start | |
| (the lure line can't be memorised). In-bounds (rush-hour y 2..38; | |
| the old pack had off-map y=40/32 โ engine panic โ fixed).""" | |
| c = compile_level(load_pack(PACKS / "artofwar-lure-the-tiger.yaml"), | |
| "hard") | |
| groups = {a.spawn_point for a in c.scenario.actors | |
| if a.spawn_point is not None} | |
| assert groups == {0, 1} | |
| for a in c.scenario.actors: | |
| x, y = a.position | |
| assert 2 <= x <= 126 and 2 <= y <= 38, (a.type, a.position) | |
| def test_artofwar_pack_compiles_and_runs(pid): | |
| pytest.importorskip("openra_train") | |
| from openra_bench.eval_core import run_level | |
| pack = load_pack(PACKS / f"{pid}.yaml") | |
| for lvl in ("easy", "medium", "hard"): | |
| cc = compile_level(pack, lvl) | |
| assert cc.meta.capability == "reasoning" and cc.map_supported | |
| assert len(cc.meta.real_world_meaning) > 10 | |
| res = run_level(compile_level(pack, "easy"), | |
| lambda rs, C: [C.observe()], seed=1) | |
| assert res.outcome in {"win", "draw", "loss"} and res.turns >= 1 | |