"""build-sequence-tech-fastest pack — full no-cheat validation on Rust. Wave-7 REASONING — cost-optimal build-order planning. The agent must reach the war factory (`weap`) on the SHORTEST prerequisite chain: powr → proc → weap Any detour (build a barracks/tent first, or a redundant power plant, or an early infantry queue) overruns the tight tick budget and loses. The chain is enforced by the Wave-2 `then:` happened-before composite; the deadline (`within_ticks`) is the cost-optimality teeth — slack is tuned so the OPTIMAL plan fits and the tent-detour plan does NOT. Bar (CLAUDE.md): the intended cost-optimal policy WINS on every (level, seed); stall and the tent-first wrong-path policy LOSE on every (level, seed). Real LOSS not DRAW — `fail after_ticks:T+1` inside max_turns is the bite. Scenario shape: - rush-hour-arena, allies vs soviet (bot disabled). - easy: T=3000, max_turns=40 — generous (4-turn buffer). - medium: T=2800, max_turns=35 — tight (≈2-turn buffer). - hard: T=2800, max_turns=35 — same tight T + ≥2 spawn_point groups (NORTH y=14 / SOUTH y=26 base, round-robined). Measured optimal timing (seed 1, scripted intended policy): powr completes ≈ tick 273 (turn 3) proc completes ≈ tick 1263 (turn 14) weap completes ≈ tick 2613 (turn 29) Measured tent-first wrong-path timing: weap completes ≈ tick 3063 (turn 34) — beyond every level's T. """ from __future__ import annotations import pytest pytest.importorskip("openra_train", reason="Rust env wheel not installed") pytest.importorskip("openra_rl_training", reason="Rust env wheel not installed") from openra_bench.eval_core import run_level from openra_bench.scenarios import load_pack from openra_bench.scenarios.loader import PACKS_DIR, compile_level PACK = PACKS_DIR / "build-sequence-tech-fastest.yaml" LEVELS = ("easy", "medium", "hard") SEEDS = (1, 2, 3, 4) # ── Policies ────────────────────────────────────────────────────── def _stall_policy(): """Do nothing — must LOSE on the clock on every level/seed.""" def pol(obs, Cmd): return [Cmd.observe()] return pol def _intended_policy(): """Cost-optimal play: build powr → proc → weap, each one placed relative to the agent's actual fact (so the policy generalises across the hard-tier spawn variation). This is the policy the pack is solvable by — must WIN on every (level, seed).""" milestone = {"powr": False, "proc": False, "weap": False} def pol(obs, Cmd): ob = obs.get("own_buildings", []) or [] own_b = {b["type"] for b in ob} prod = obs.get("production", []) or [] for b in ("powr", "proc", "weap"): if b in own_b: milestone[b] = True cmds = [] base = [b for b in ob if b["type"] == "fact"] if not milestone["powr"]: if "powr" not in prod: cmds.append(Cmd.build("powr")) if base: cmds.append(Cmd.place_building( "powr", base[0]["cell_x"] + 4, base[0]["cell_y"] )) elif not milestone["proc"]: if "proc" not in prod: cmds.append(Cmd.build("proc")) if base: cmds.append(Cmd.place_building( "proc", base[0]["cell_x"] + 6, base[0]["cell_y"] + 3 )) elif not milestone["weap"]: if "weap" not in prod: cmds.append(Cmd.build("weap")) if base: cmds.append(Cmd.place_building( "weap", base[0]["cell_x"] + 8, base[0]["cell_y"] )) if not cmds: cmds.append(Cmd.observe()) return cmds return pol def _tent_first_policy(): """Wrong cost-non-optimal play: powr → tent → proc → weap. The tent is not on the prerequisite chain for weap (only proc is); it bloats the BOM by 500 credits and ~5 turns. Must LOSE on the clock on every level/seed.""" milestone = {"powr": False, "tent": False, "proc": False, "weap": False} def pol(obs, Cmd): ob = obs.get("own_buildings", []) or [] own_b = {b["type"] for b in ob} prod = obs.get("production", []) or [] for b in ("powr", "tent", "proc", "weap"): if b in own_b: milestone[b] = True cmds = [] base = [b for b in ob if b["type"] == "fact"] if not milestone["powr"]: if "powr" not in prod: cmds.append(Cmd.build("powr")) if base: cmds.append(Cmd.place_building( "powr", base[0]["cell_x"] + 4, base[0]["cell_y"] )) elif not milestone["tent"]: if "tent" not in prod: cmds.append(Cmd.build("tent")) if base: cmds.append(Cmd.place_building( "tent", base[0]["cell_x"] + 4, base[0]["cell_y"] + 3 )) elif not milestone["proc"]: if "proc" not in prod: cmds.append(Cmd.build("proc")) if base: cmds.append(Cmd.place_building( "proc", base[0]["cell_x"] + 6, base[0]["cell_y"] + 3 )) elif not milestone["weap"]: if "weap" not in prod: cmds.append(Cmd.build("weap")) if base: cmds.append(Cmd.place_building( "weap", base[0]["cell_x"] + 8, base[0]["cell_y"] )) if not cmds: cmds.append(Cmd.observe()) return cmds return pol # ── Pack-shape tests (cheap; do not run the engine) ────────────── def test_pack_compiles_with_three_levels(): pack = load_pack(PACK) assert pack.meta.id == "build-sequence-tech-fastest" assert pack.meta.capability == "reasoning" assert set(pack.levels) == {"easy", "medium", "hard"} def test_meta_benchmark_anchor_set(): """Required by the seed taxonomy: PlanBench cost-optimal + BOM manufacturing critical-path planning.""" pack = load_pack(PACK) anchors = pack.meta.benchmark_anchor or [] assert any("PlanBench" in a for a in anchors), anchors assert any("BOM" in a for a in anchors), anchors def test_hard_tier_has_seed_driven_spawn_groups(): """Hard must define ≥2 agent spawn_point groups so seed varies the start base (tests/test_hard_tier.py::UPGRADED contract).""" c = compile_level(load_pack(PACK), "hard") sp = {a.spawn_point for a in c.scenario.actors if a.owner == "agent"} assert len(sp) >= 2, f"hard needs ≥2 spawn groups, got {sp}" def test_every_level_has_fail_condition(): """No silent draws — every level must be able to emit a LOSS.""" pack = load_pack(PACK) for lvl in LEVELS: c = compile_level(pack, lvl) assert c.fail_condition is not None, f"{lvl} missing fail_condition" def test_then_composite_used_in_win(): """Confirms the 3-step build-order chain is wired through to the compiled win condition — the load-bearing teeth of this pack.""" for lvl in LEVELS: c = compile_level(load_pack(PACK), lvl) win = c.win_condition.model_dump(exclude_none=True) inner = win.get("all_of") or [] assert any("then" in cl for cl in inner), ( f"{lvl} win missing then-chain: {win}" ) for cl in inner: if "then" in cl: clauses = (cl["then"] or {}).get("clauses") or [] assert len(clauses) == 3, ( f"{lvl} then-chain must be powr→proc→weap (3 clauses); " f"got {clauses}" ) # And in the exact engine-enforced prereq order. assert clauses[0].get("has_building") == "powr" assert clauses[1].get("has_building") == "proc" assert clauses[2].get("has_building") == "weap" def test_tick_budget_aligned_with_max_turns(): """within_ticks must be reachable inside max_turns. Engine advances ~90 ticks/turn → reachable max = 93 + 90·(N-1).""" pack = load_pack(PACK) for lvl in LEVELS: level_def = pack.levels[lvl] max_turns = level_def.max_turns reachable = 93 + 90 * (max_turns - 1) win = compile_level(pack, lvl).win_condition.model_dump(exclude_none=True) def _collect(node, key, out): if isinstance(node, dict): if key in node: out.append(node[key]) for v in node.values(): _collect(v, key, out) elif isinstance(node, list): for v in node: _collect(v, key, out) wts = [] _collect(win, "within_ticks", wts) assert wts, f"{lvl} has no within_ticks leaf (no clock teeth)" for wt in wts: assert wt <= reachable, ( f"{lvl} within_ticks={wt} > reachable={reachable} " f"(max_turns={max_turns}) — deadline never bites ⇒ draw" ) # ── Engine-bound tests (parameterised over seeds 1..4) ──────────── @pytest.mark.parametrize("seed", SEEDS) @pytest.mark.parametrize("level", LEVELS) def test_intended_cost_optimal_policy_wins(level, seed): """The intended cost-optimal play (powr → proc → weap) must WIN on every (level, seed). This is the load-bearing test that the pack is solvable inside the budget by the advertised capability.""" c = compile_level(load_pack(PACK), level) res = run_level(c, _intended_policy(), seed=seed) tp = getattr(res.signals, "then_progress", {}) or {} assert res.outcome == "win", ( f"intended cost-optimal must WIN on {level} s={seed}; " f"got {res.outcome} (tick={res.signals.game_tick}, " f"then_progress={tp}, " f"own_buildings={res.signals.own_building_types})" ) @pytest.mark.parametrize("seed", SEEDS) @pytest.mark.parametrize("level", LEVELS) def test_stall_loses(level, seed): """A do-nothing policy must LOSE on every (level, seed). The fail_condition's after_ticks clause bites at the budget; never a draw.""" c = compile_level(load_pack(PACK), level) res = run_level(c, _stall_policy(), seed=seed) assert res.outcome == "loss", ( f"stall must LOSE on {level} s={seed}; got {res.outcome} " f"(tick={res.signals.game_tick})" ) @pytest.mark.parametrize("seed", SEEDS) @pytest.mark.parametrize("level", LEVELS) def test_tent_first_wrong_path_loses(level, seed): """The cost-non-optimal tent-first play must LOSE on every (level, seed). The tent detour adds ~500 credits + ~5 turns, pushing weap completion to ~tick 3063 — beyond every level's deadline. The capability being measured is COST-OPTIMAL planning; a 'some plan that arrives' policy must not win.""" c = compile_level(load_pack(PACK), level) res = run_level(c, _tent_first_policy(), seed=seed) tp = getattr(res.signals, "then_progress", {}) or {} assert res.outcome == "loss", ( f"tent-first wrong-path must LOSE on {level} s={seed}; got " f"{res.outcome} (tick={res.signals.game_tick}, " f"then_progress={tp}, own_buildings={res.signals.own_building_types})" ) @pytest.mark.parametrize("seed", SEEDS) def test_hard_seeds_produce_distinct_starts(seed): """Hard's two spawn_point groups must actually round-robin — different seeds must place the agent fact at a different (x,y). Smoke-tests the spawn-variation contract that tests/test_hard_tier.py also enforces.""" c = compile_level(load_pack(PACK), "hard") captured = {"first_obs": None} def probe(obs, Cmd): if captured["first_obs"] is None: captured["first_obs"] = list(obs.get("own_buildings", []) or []) return [Cmd.observe()] res = run_level(c, probe, seed=seed) assert res.outcome == "loss" # stall must lose facts = [ (b["cell_x"], b["cell_y"]) for b in (captured["first_obs"] or []) if b["type"] == "fact" ] assert facts, f"no fact observed at turn 0 for seed={seed}" def test_hard_spawns_round_robin_across_seeds(): """Two seeds (1 and 2) must place the agent's fact at DIFFERENT cells — proves the spawn_point round-robin is active, not degenerate.""" c = compile_level(load_pack(PACK), "hard") def probe(): captured = {} def pol(obs, Cmd): if "fact_pos" not in captured: bs = obs.get("own_buildings", []) or [] facts = [(b["cell_x"], b["cell_y"]) for b in bs if b["type"] == "fact"] if facts: captured["fact_pos"] = facts[0] return [Cmd.observe()] pol.captured = captured return pol p1 = probe(); run_level(c, p1, seed=1) p2 = probe(); run_level(c, p2, seed=2) pos1 = p1.captured.get("fact_pos") pos2 = p2.captured.get("fact_pos") assert pos1 and pos2, f"missing fact obs: s1={pos1} s2={pos2}" assert pos1 != pos2, ( f"hard spawn round-robin is degenerate: seed 1 and 2 both " f"started at {pos1}" )