OpenRA-Bench / tests /test_build_sequence_tech_cheapest.py
yxc20098's picture
feat(scenario): build-sequence-tech-cheapest โ€” cost-minimal tech path (PlanBench cost-optimal anchor)
7cc5e9b
Raw
History Blame Contribute Delete
15.5 kB
"""build-sequence-tech-cheapest pack โ€” full no-cheat validation on Rust.
Wave-11 REASONING โ€” cost-MINIMAL build-order planning. Sibling of
build-sequence-tech-fastest (the time-optimal axis); here the binding
constraint is MONEY. The agent must reach the war factory (`weap`) on
the ONLY affordable prerequisite chain:
powr โ†’ proc โ†’ weap
There is NO ore on the map and NO harvester income โ€” the starting cash
is the entire, non-replenishing budget, tuned to exactly the cost of
the minimal path (powr $300 + proc $1400 + weap $2000 = $3700). Any
detour (build a barracks/tent or a pillbox first) bloats the bill of
materials, exhausts the fixed budget, and weap can then never be
funded โ€” the `then:` chain never completes and the episode times out.
The clock budget is GENEROUS: a policy loses by being WASTEFUL, not
by being slow.
Bar (CLAUDE.md): the intended cost-minimal policy WINS on every
(level, seed); stall and the wasteful-spend policies LOSE on every
(level, seed). Real LOSS not DRAW โ€” `fail after_ticks:T+1` inside
max_turns is the bite.
Scenario shape:
- rush-hour-arena, allies vs soviet (bot disabled).
- easy: budget $3750, T=3200, max_turns=40 โ€” 50-credit slack.
- medium: budget $3720, T=3200, max_turns=40 โ€” 20-credit slack.
- hard: budget $3720, T=3200, max_turns=40 โ€” same tight budget
+ โ‰ฅ2 spawn_point groups (NORTH y=14 / SOUTH y=26 base,
round-robined by seed).
Measured (seed 1, scripted policies):
intended powrโ†’procโ†’weap completes โ‰ˆ tick 2613 (well under T=3200)
tent-first wasteful: cash hits $0 โ‰ˆ tick 2703, weap stuck in queue
forever (no income โ‡’ no recovery) โ‡’ after_ticks LOSS at T+1.
"""
from __future__ import annotations
import pytest
pytest.importorskip("openra_train", reason="Rust env wheel not installed")
pytest.importorskip("openra_rl_training", reason="Rust env wheel not installed")
from openra_bench.eval_core import run_level
from openra_bench.scenarios import load_pack
from openra_bench.scenarios.loader import PACKS_DIR, compile_level
PACK = PACKS_DIR / "build-sequence-tech-cheapest.yaml"
LEVELS = ("easy", "medium", "hard")
SEEDS = (1, 2, 3, 4)
# โ”€โ”€ Policies โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
def _stall_policy():
"""Do nothing โ€” must LOSE on the clock on every level/seed."""
def pol(obs, Cmd):
return [Cmd.observe()]
return pol
def _intended_policy():
"""Cost-minimal play: build powr โ†’ proc โ†’ weap, each placed
relative to the agent's actual fact (so the policy generalises
across the hard-tier spawn variation). This is the policy the
pack is solvable by โ€” must WIN on every (level, seed)."""
milestone = {"powr": False, "proc": False, "weap": False}
def pol(obs, Cmd):
ob = obs.get("own_buildings", []) or []
own_b = {b["type"] for b in ob}
prod = obs.get("production", []) or []
for b in ("powr", "proc", "weap"):
if b in own_b:
milestone[b] = True
cmds = []
base = [b for b in ob if b["type"] == "fact"]
if not milestone["powr"]:
if "powr" not in prod:
cmds.append(Cmd.build("powr"))
if base:
cmds.append(Cmd.place_building(
"powr", base[0]["cell_x"] + 4, base[0]["cell_y"]
))
elif not milestone["proc"]:
if "proc" not in prod:
cmds.append(Cmd.build("proc"))
if base:
cmds.append(Cmd.place_building(
"proc", base[0]["cell_x"] + 6, base[0]["cell_y"] + 3
))
elif not milestone["weap"]:
if "weap" not in prod:
cmds.append(Cmd.build("weap"))
if base:
cmds.append(Cmd.place_building(
"weap", base[0]["cell_x"] + 8, base[0]["cell_y"]
))
if not cmds:
cmds.append(Cmd.observe())
return cmds
return pol
def _wasteful_policy(extra: str):
"""Cost-non-minimal play: powr โ†’ <extra> โ†’ proc โ†’ weap, where
<extra> ('tent' $500 or 'pbox' $600) is NOT on weap's prerequisite
chain. The detour bloats the bill of materials past the fixed
budget, so weap can never be funded โ€” cash hits $0 mid-queue and,
with no ore/income, never recovers. Must LOSE on every
(level, seed). The capability measured is COST-MINIMAL planning;
a 'some plan that arrives' policy must not win."""
milestone = {"powr": False, extra: False, "proc": False, "weap": False}
def pol(obs, Cmd):
ob = obs.get("own_buildings", []) or []
own_b = {b["type"] for b in ob}
prod = obs.get("production", []) or []
for b in ("powr", extra, "proc", "weap"):
if b in own_b:
milestone[b] = True
cmds = []
base = [b for b in ob if b["type"] == "fact"]
if not milestone["powr"]:
if "powr" not in prod:
cmds.append(Cmd.build("powr"))
if base:
cmds.append(Cmd.place_building(
"powr", base[0]["cell_x"] + 4, base[0]["cell_y"]
))
elif not milestone[extra]:
if extra not in prod:
cmds.append(Cmd.build(extra))
if base:
cmds.append(Cmd.place_building(
extra, base[0]["cell_x"] + 4, base[0]["cell_y"] + 3
))
elif not milestone["proc"]:
if "proc" not in prod:
cmds.append(Cmd.build("proc"))
if base:
cmds.append(Cmd.place_building(
"proc", base[0]["cell_x"] + 6, base[0]["cell_y"] + 3
))
elif not milestone["weap"]:
if "weap" not in prod:
cmds.append(Cmd.build("weap"))
if base:
cmds.append(Cmd.place_building(
"weap", base[0]["cell_x"] + 8, base[0]["cell_y"]
))
if not cmds:
cmds.append(Cmd.observe())
return cmds
return pol
# โ”€โ”€ Pack-shape tests (cheap; do not run the engine) โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
def test_pack_compiles_with_three_levels():
pack = load_pack(PACK)
assert pack.meta.id == "build-sequence-tech-cheapest"
assert pack.meta.capability == "reasoning"
assert set(pack.levels) == {"easy", "medium", "hard"}
def test_meta_benchmark_anchor_set():
"""Required by the seed taxonomy: PlanBench cost-optimal +
BOM cost minimization + budget-constrained planning."""
pack = load_pack(PACK)
anchors = pack.meta.benchmark_anchor or []
assert any("PlanBench" in a for a in anchors), anchors
assert any("BOM" in a for a in anchors), anchors
assert any("budget" in a for a in anchors), anchors
def test_budget_is_near_minimal_path_cost():
"""The whole pack hinges on starting_cash being tuned to the
minimal-path cost (powr 300 + proc 1400 + weap 2000 = 3700) with
near-zero slack โ€” enough to fund the minimal chain, never enough
to also afford a non-load-bearing structure."""
pack = load_pack(PACK)
minimal = 3700
for lvl in LEVELS:
cash = pack.levels[lvl].starting_cash
assert minimal <= cash <= minimal + 100, (
f"{lvl} starting_cash={cash} not near-minimal (3700 + โ‰ค100 "
f"slack); a wasteful detour must overrun the budget"
)
def test_no_ore_patches_placed():
"""The budget must be the entire, non-replenishing money supply:
no `mine` actors โ‡’ no harvester income โ‡’ a wasteful spend can
never be recovered no matter how generous the clock is."""
for lvl in LEVELS:
c = compile_level(load_pack(PACK), lvl)
mines = [a for a in c.scenario.actors if a.type == "mine"]
assert not mines, f"{lvl} has ore patches {mines} โ€” income would "\
"let a wasteful policy recover; budget must be fixed"
def test_hard_tier_has_seed_driven_spawn_groups():
"""Hard must define โ‰ฅ2 agent spawn_point groups so seed varies
the start base (tests/test_hard_tier.py::UPGRADED contract)."""
c = compile_level(load_pack(PACK), "hard")
sp = {a.spawn_point for a in c.scenario.actors if a.owner == "agent"}
assert len(sp) >= 2, f"hard needs โ‰ฅ2 spawn groups, got {sp}"
def test_every_level_has_fail_condition():
"""No silent draws โ€” every level must be able to emit a LOSS."""
pack = load_pack(PACK)
for lvl in LEVELS:
c = compile_level(pack, lvl)
assert c.fail_condition is not None, f"{lvl} missing fail_condition"
def test_then_composite_used_in_win():
"""Confirms the 3-step build-order chain is wired through to the
compiled win condition โ€” the load-bearing teeth of this pack."""
for lvl in LEVELS:
c = compile_level(load_pack(PACK), lvl)
win = c.win_condition.model_dump(exclude_none=True)
inner = win.get("all_of") or []
assert any("then" in cl for cl in inner), (
f"{lvl} win missing then-chain: {win}"
)
for cl in inner:
if "then" in cl:
clauses = (cl["then"] or {}).get("clauses") or []
assert len(clauses) == 3, (
f"{lvl} then-chain must be powrโ†’procโ†’weap (3 clauses); "
f"got {clauses}"
)
# And in the exact engine-enforced prereq order.
assert clauses[0].get("has_building") == "powr"
assert clauses[1].get("has_building") == "proc"
assert clauses[2].get("has_building") == "weap"
def test_tick_budget_aligned_with_max_turns():
"""within_ticks must be reachable inside max_turns. Engine
advances ~90 ticks/turn โ†’ reachable max = 93 + 90ยท(N-1)."""
pack = load_pack(PACK)
for lvl in LEVELS:
level_def = pack.levels[lvl]
max_turns = level_def.max_turns
reachable = 93 + 90 * (max_turns - 1)
win = compile_level(pack, lvl).win_condition.model_dump(exclude_none=True)
def _collect(node, key, out):
if isinstance(node, dict):
if key in node:
out.append(node[key])
for v in node.values():
_collect(v, key, out)
elif isinstance(node, list):
for v in node:
_collect(v, key, out)
wts = []
_collect(win, "within_ticks", wts)
assert wts, f"{lvl} has no within_ticks leaf (no clock teeth)"
for wt in wts:
assert wt <= reachable, (
f"{lvl} within_ticks={wt} > reachable={reachable} "
f"(max_turns={max_turns}) โ€” deadline never bites โ‡’ draw"
)
# โ”€โ”€ Engine-bound tests (parameterised over seeds 1..4) โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
@pytest.mark.parametrize("seed", SEEDS)
@pytest.mark.parametrize("level", LEVELS)
def test_intended_cost_minimal_policy_wins(level, seed):
"""The intended cost-minimal play (powr โ†’ proc โ†’ weap) must WIN
on every (level, seed). This is the load-bearing test that the
pack is solvable inside the budget by the advertised capability."""
c = compile_level(load_pack(PACK), level)
res = run_level(c, _intended_policy(), seed=seed)
tp = getattr(res.signals, "then_progress", {}) or {}
assert res.outcome == "win", (
f"intended cost-minimal must WIN on {level} s={seed}; "
f"got {res.outcome} (tick={res.signals.game_tick}, "
f"then_progress={tp}, "
f"own_buildings={res.signals.own_building_types})"
)
@pytest.mark.parametrize("seed", SEEDS)
@pytest.mark.parametrize("level", LEVELS)
def test_stall_loses(level, seed):
"""A do-nothing policy must LOSE on every (level, seed). The
fail_condition's after_ticks clause bites at the budget; never
a draw."""
c = compile_level(load_pack(PACK), level)
res = run_level(c, _stall_policy(), seed=seed)
assert res.outcome == "loss", (
f"stall must LOSE on {level} s={seed}; got {res.outcome} "
f"(tick={res.signals.game_tick})"
)
@pytest.mark.parametrize("seed", SEEDS)
@pytest.mark.parametrize("level", LEVELS)
@pytest.mark.parametrize("extra", ("tent", "pbox"))
def test_wasteful_spend_loses(level, seed, extra):
"""The cost-non-minimal wasteful play (powr โ†’ <extra> โ†’ proc โ†’
weap) must LOSE on every (level, seed). The <extra> detour
('tent' $500 / 'pbox' $600) bloats the bill of materials past the
fixed budget; weap can never be funded (cash hits $0 mid-queue,
no income โ‡’ no recovery) and the `then:` chain never completes.
The capability measured is COST-MINIMAL planning."""
c = compile_level(load_pack(PACK), level)
res = run_level(c, _wasteful_policy(extra), seed=seed)
tp = getattr(res.signals, "then_progress", {}) or {}
assert res.outcome == "loss", (
f"wasteful {extra}-first must LOSE on {level} s={seed}; got "
f"{res.outcome} (tick={res.signals.game_tick}, "
f"then_progress={tp}, own_buildings={res.signals.own_building_types})"
)
# weap must NOT have been built โ€” the budget could not fund it.
assert "weap" not in (res.signals.own_building_types or []), (
f"wasteful {extra}-first built weap on {level} s={seed} โ€” the "
f"budget trap leaked (own_buildings={res.signals.own_building_types})"
)
@pytest.mark.parametrize("seed", SEEDS)
def test_hard_seeds_produce_distinct_starts(seed):
"""Hard's two spawn_point groups must actually round-robin โ€”
different seeds must place the agent fact at a different (x,y).
Smoke-tests the spawn-variation contract that
tests/test_hard_tier.py also enforces."""
c = compile_level(load_pack(PACK), "hard")
captured = {"first_obs": None}
def probe(obs, Cmd):
if captured["first_obs"] is None:
captured["first_obs"] = list(obs.get("own_buildings", []) or [])
return [Cmd.observe()]
res = run_level(c, probe, seed=seed)
assert res.outcome == "loss" # stall must lose
facts = [
(b["cell_x"], b["cell_y"])
for b in (captured["first_obs"] or [])
if b["type"] == "fact"
]
assert facts, f"no fact observed at turn 0 for seed={seed}"
def test_hard_spawns_round_robin_across_seeds():
"""Two seeds (1 and 2) must place the agent's fact at DIFFERENT
cells โ€” proves the spawn_point round-robin is active, not
degenerate."""
c = compile_level(load_pack(PACK), "hard")
def probe():
captured = {}
def pol(obs, Cmd):
if "fact_pos" not in captured:
bs = obs.get("own_buildings", []) or []
facts = [(b["cell_x"], b["cell_y"]) for b in bs if b["type"] == "fact"]
if facts:
captured["fact_pos"] = facts[0]
return [Cmd.observe()]
pol.captured = captured
return pol
p1 = probe(); run_level(c, p1, seed=1)
p2 = probe(); run_level(c, p2, seed=2)
pos1 = p1.captured.get("fact_pos")
pos2 = p2.captured.get("fact_pos")
assert pos1 and pos2, f"missing fact obs: s1={pos1} s2={pos2}"
assert pos1 != pos2, (
f"hard spawn round-robin is degenerate: seed 1 and 2 both "
f"started at {pos1}"
)