OpenRA-Bench / tests /test_combat_target_priority_highvalue.py
yxc20098's picture
fix(scenario): combat-target-priority-highvalue โ€” recalibrate after engine movement fixes
248d766
Raw
History Blame Contribute Delete
8.99 kB
"""combat-target-priority-highvalue pack โ€” full no-cheat validation.
Wave-11 ACTION pack: threat-weighted target prioritization (SC2
focus-fire target priority / military target prioritization anchor).
A 4-tank squad faces a mixed enemy cluster โ€” a screen of cheap rifle
chaff (e1) backed by THREE high-threat anti-armour rocket soldiers
(e3). The squad must FOCUS THE ROCKET SOLDIERS FIRST: silence the
rocket soldiers fast, then mop up the chaff. Killing the chaff first
leaves all three rockets firing anti-armour fire through the entire
mop-up.
Win = units_killed_gte:K AND own_units_gte:3 AND within_ticks:2700
AND building_count_gte:fact:1 (a non-combat anchor: the squad must
still own its construction yard). Fail floors are aligned to the win
floor so every non-timeout outcome is a real WIN or LOSS โ€” no
dead-zone DRAW.
Recalibrated after the engine movement fixes (moving units take fire
en route; attack_unit on out-of-sight targets paths normally; no
sprint-invincibility). Finding: with the post-fix combat model the
squad takes ~the same tank losses regardless of fine target order
(concentrating fire bunches the stack's return-fire exposure too) โ€”
the survival floor is own_units_gte:3 (a perfect focus engagement
still loses ~1 tank closing the distance), NOT the old zero-loss
own_units_gte:4. The chaff-vs-threat discrimination is restored on
medium + hard via a BIGGER cluster (12 e1 + 3 e3 = 15, kill bar 15):
a chaff-first play cannot clear all 15 in budget while the three e3
attrit it, so it busts the kill bar AND the floor.
Bar (per CLAUDE.md), verified deterministic across seeds 1-4 on
every level:
โ€ข stall (observe only) โ†’ LOSS every level/seed (after_ticks).
โ€ข brute attack_move โ†’ LOSS every level/seed (drives into the
cluster, bleeds 2 tanks, fails the kill bar).
โ€ข kill-chaff-first โ†’ LOSS on MEDIUM + HARD (the 15-strong
cluster cannot be cleared chaff-first in budget; the e3 attrit
the squad). EASY is the forgiving bare-skill tier โ€” its smaller
12-unit cluster lets a chaff-first play still finish, so the
load-bearing chaff-first LOSS is medium + hard.
โ€ข focus-threats-first โ†’ WIN every level/seed.
"""
from __future__ import annotations
import pytest
pytest.importorskip("openra_train", reason="Rust env wheel not installed")
from openra_bench.eval_core import run_level
from openra_bench.scenarios import load_pack
from openra_bench.scenarios.loader import PACKS_DIR, compile_level
PACK = PACKS_DIR / "combat-target-priority-highvalue.yaml"
LEVELS = ("easy", "medium", "hard")
SEEDS = (1, 2, 3, 4)
# enemy cluster advance target โ€” the chaff column, mid-y.
_ADV = (66, 20)
# โ”€โ”€ helpers โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
def _tank_ids(obs):
return [
str(u["id"])
for u in (obs.get("units_summary") or [])
if str(u.get("type", "")).lower() == "2tnk"
]
def _enemy_units(obs):
"""(id, type) for every visible enemy combat unit."""
out = []
for e in (obs.get("enemy_summary") or []):
t = str(e.get("type", "")).lower()
if t in ("e1", "e3"):
out.append((str(e.get("id")), t))
return out
# โ”€โ”€ policies โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
def _stall_policy():
"""Do nothing โ€” the kill bar is never met, after_ticks LOSS."""
def pol(obs, Cmd):
return [Cmd.observe()]
return pol
def _brute_policy():
"""attack_move onto the cluster centroid โ€” the squad drives INTO
the cluster and is enveloped; it bleeds two tanks and fails to
clear the kill bar before the deadline โ‡’ LOSS."""
def pol(obs, Cmd):
ids = _tank_ids(obs)
if not ids:
return [Cmd.observe()]
es = [
e for e in (obs.get("enemy_summary") or [])
if str(e.get("type", "")).lower() in ("e1", "e3")
]
if es:
cx = sum(e["cell_x"] for e in es) // len(es)
cy = sum(e["cell_y"] for e in es) // len(es)
return [Cmd.attack_move(ids, cx, cy)]
return [Cmd.attack_move(ids, _ADV[0], _ADV[1])]
return pol
def _focus_policy(first: str):
"""attack_unit, prioritising the `first` unit type. first='e1' is
the kill-chaff-first trap; first='e3' is the intended threat-first
focus play."""
def pol(obs, Cmd):
ids = _tank_ids(obs)
if not ids:
return [Cmd.observe()]
es = _enemy_units(obs)
prio = [e for e in es if e[1] == first]
rest = [e for e in es if e[1] != first]
if prio:
return [Cmd.attack_unit(ids, prio[0][0])]
if rest:
return [Cmd.attack_unit(ids, rest[0][0])]
# no enemy in view โ€” close to contact range.
return [Cmd.attack_move(ids, _ADV[0], _ADV[1])]
return pol
# โ”€โ”€ tests โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
@pytest.mark.parametrize("level", LEVELS)
@pytest.mark.parametrize("seed", SEEDS)
def test_stall_loses(level, seed):
"""A do-nothing policy must LOSE on the deadline โ€” no draw."""
c = compile_level(load_pack(PACK), level)
res = run_level(c, _stall_policy(), seed=seed)
assert res.outcome == "loss", (
f"{level}/seed{seed}: stall must LOSE, got {res.outcome}"
)
@pytest.mark.parametrize("level", LEVELS)
@pytest.mark.parametrize("seed", SEEDS)
def test_brute_attack_move_loses(level, seed):
"""A brute attack-move play auto-targets the chaff screen; the
three rocket soldiers fire through the engagement and wipe the
squad โ€” a real LOSS, not a draw."""
c = compile_level(load_pack(PACK), level)
res = run_level(c, _brute_policy(), seed=seed)
assert res.outcome == "loss", (
f"{level}/seed{seed}: brute must LOSE, got {res.outcome}"
)
@pytest.mark.parametrize("level", ["medium", "hard"])
@pytest.mark.parametrize("seed", SEEDS)
def test_kill_chaff_first_loses(level, seed):
"""Explicitly attacking the cheap e1 chaff first leaves the three
rockets firing anti-armour fire through the whole mop-up; on the
15-strong medium/hard cluster the squad cannot clear all 15 in
budget and the e3 attrit it below the survival floor โ€” a real
LOSS. EASY is excluded: its smaller 12-unit cluster is the
forgiving bare-skill tier where a chaff-first play can still
finish (the load-bearing chaff-first LOSS is medium + hard)."""
c = compile_level(load_pack(PACK), level)
res = run_level(c, _focus_policy("e1"), seed=seed)
assert res.outcome == "loss", (
f"{level}/seed{seed}: kill-chaff-first must LOSE, got {res.outcome}"
)
@pytest.mark.parametrize("level", LEVELS)
@pytest.mark.parametrize("seed", SEEDS)
def test_focus_threats_first_wins(level, seed):
"""The intended capability โ€” concentrate all four tanks on the
rocket soldiers FIRST โ€” must WIN every level and seed."""
c = compile_level(load_pack(PACK), level)
res = run_level(c, _focus_policy("e3"), seed=seed)
assert res.outcome == "win", (
f"{level}/seed{seed}: focus-threats-first must WIN, got {res.outcome}"
)
def test_hard_agent_spawn_axis_has_two_groups():
"""The hard tier must define โ‰ฅ2 agent-side spawn_point groups
(the seed-driven staging-corridor axis); the strike force and its
construction yard are duplicated under each group."""
c = compile_level(load_pack(PACK), "hard")
agent_sps = {
a.spawn_point
for a in c.scenario.actors
if a.owner == "agent" and a.spawn_point is not None
}
assert len(agent_sps) >= 2, (
f"hard needs โ‰ฅ2 agent spawn_point groups, got {sorted(agent_sps)}"
)
def test_tick_budget_alignment():
"""within_ticks / after_ticks must be reachable inside max_turns
(tick โ‰ค 93 + 90ยท(max_turns-1)) so the deadline actually bites."""
for level in LEVELS:
c = compile_level(load_pack(PACK), level)
max_tick = 93 + 90 * (c.max_turns - 1)
win_clauses = c.win_condition.all_of or []
within = next(
(cl["within_ticks"] for cl in win_clauses if "within_ticks" in cl),
None,
)
assert within is not None and within <= max_tick, (
f"{level}: within_ticks {within} not reachable by {max_tick}"
)
fail_clauses = c.fail_condition.any_of or []
after = next(
(cl["after_ticks"] for cl in fail_clauses if "after_ticks" in cl),
None,
)
assert after is not None and after <= max_tick, (
f"{level}: after_ticks {after} not reachable by {max_tick}"
)