Spaces:

qpluslab
/

OpenRA-Bench

Running

App Files Files Community

OpenRA-Bench / tests /test_combat_target_priority_highvalue.py

yxc20098

fix(scenario): combat-target-priority-highvalue — recalibrate after engine movement fixes

248d766 about 1 month ago

Raw

History Blame Contribute Delete

8.99 kB

	"""combat-target-priority-highvalue pack — full no-cheat validation.

	Wave-11 ACTION pack: threat-weighted target prioritization (SC2
	focus-fire target priority / military target prioritization anchor).
	A 4-tank squad faces a mixed enemy cluster — a screen of cheap rifle
	chaff (e1) backed by THREE high-threat anti-armour rocket soldiers
	(e3). The squad must FOCUS THE ROCKET SOLDIERS FIRST: silence the
	rocket soldiers fast, then mop up the chaff. Killing the chaff first
	leaves all three rockets firing anti-armour fire through the entire
	mop-up.

	Win = units_killed_gte:K AND own_units_gte:3 AND within_ticks:2700
	AND building_count_gte:fact:1 (a non-combat anchor: the squad must
	still own its construction yard). Fail floors are aligned to the win
	floor so every non-timeout outcome is a real WIN or LOSS — no
	dead-zone DRAW.

	Recalibrated after the engine movement fixes (moving units take fire
	en route; attack_unit on out-of-sight targets paths normally; no
	sprint-invincibility). Finding: with the post-fix combat model the
	squad takes ~the same tank losses regardless of fine target order
	(concentrating fire bunches the stack's return-fire exposure too) —
	the survival floor is own_units_gte:3 (a perfect focus engagement
	still loses ~1 tank closing the distance), NOT the old zero-loss
	own_units_gte:4. The chaff-vs-threat discrimination is restored on
	medium + hard via a BIGGER cluster (12 e1 + 3 e3 = 15, kill bar 15):
	a chaff-first play cannot clear all 15 in budget while the three e3
	attrit it, so it busts the kill bar AND the floor.

	Bar (per CLAUDE.md), verified deterministic across seeds 1-4 on
	every level:
	• stall (observe only) → LOSS every level/seed (after_ticks).
	• brute attack_move → LOSS every level/seed (drives into the
	cluster, bleeds 2 tanks, fails the kill bar).
	• kill-chaff-first → LOSS on MEDIUM + HARD (the 15-strong
	cluster cannot be cleared chaff-first in budget; the e3 attrit
	the squad). EASY is the forgiving bare-skill tier — its smaller
	12-unit cluster lets a chaff-first play still finish, so the
	load-bearing chaff-first LOSS is medium + hard.
	• focus-threats-first → WIN every level/seed.
	"""

	from __future__ import annotations

	import pytest

	pytest.importorskip("openra_train", reason="Rust env wheel not installed")

	from openra_bench.eval_core import run_level
	from openra_bench.scenarios import load_pack
	from openra_bench.scenarios.loader import PACKS_DIR, compile_level

	PACK = PACKS_DIR / "combat-target-priority-highvalue.yaml"
	LEVELS = ("easy", "medium", "hard")
	SEEDS = (1, 2, 3, 4)
	# enemy cluster advance target — the chaff column, mid-y.
	_ADV = (66, 20)


	# ── helpers ───────────────────────────────────────────────────────


	def _tank_ids(obs):
	return [
	str(u["id"])
	for u in (obs.get("units_summary") or [])
	if str(u.get("type", "")).lower() == "2tnk"
	]


	def _enemy_units(obs):
	"""(id, type) for every visible enemy combat unit."""
	out = []
	for e in (obs.get("enemy_summary") or []):
	t = str(e.get("type", "")).lower()
	if t in ("e1", "e3"):
	out.append((str(e.get("id")), t))
	return out


	# ── policies ──────────────────────────────────────────────────────


	def _stall_policy():
	"""Do nothing — the kill bar is never met, after_ticks LOSS."""
	def pol(obs, Cmd):
	return [Cmd.observe()]
	return pol


	def _brute_policy():
	"""attack_move onto the cluster centroid — the squad drives INTO
	the cluster and is enveloped; it bleeds two tanks and fails to
	clear the kill bar before the deadline ⇒ LOSS."""
	def pol(obs, Cmd):
	ids = _tank_ids(obs)
	if not ids:
	return [Cmd.observe()]
	es = [
	e for e in (obs.get("enemy_summary") or [])
	if str(e.get("type", "")).lower() in ("e1", "e3")
	]
	if es:
	cx = sum(e["cell_x"] for e in es) // len(es)
	cy = sum(e["cell_y"] for e in es) // len(es)
	return [Cmd.attack_move(ids, cx, cy)]
	return [Cmd.attack_move(ids, _ADV[0], _ADV[1])]
	return pol


	def _focus_policy(first: str):
	"""attack_unit, prioritising the `first` unit type. first='e1' is
	the kill-chaff-first trap; first='e3' is the intended threat-first
	focus play."""
	def pol(obs, Cmd):
	ids = _tank_ids(obs)
	if not ids:
	return [Cmd.observe()]
	es = _enemy_units(obs)
	prio = [e for e in es if e[1] == first]
	rest = [e for e in es if e[1] != first]
	if prio:
	return [Cmd.attack_unit(ids, prio[0][0])]
	if rest:
	return [Cmd.attack_unit(ids, rest[0][0])]
	# no enemy in view — close to contact range.
	return [Cmd.attack_move(ids, _ADV[0], _ADV[1])]
	return pol


	# ── tests ─────────────────────────────────────────────────────────


	@pytest.mark.parametrize("level", LEVELS)
	@pytest.mark.parametrize("seed", SEEDS)
	def test_stall_loses(level, seed):
	"""A do-nothing policy must LOSE on the deadline — no draw."""
	c = compile_level(load_pack(PACK), level)
	res = run_level(c, _stall_policy(), seed=seed)
	assert res.outcome == "loss", (
	f"{level}/seed{seed}: stall must LOSE, got {res.outcome}"
	)


	@pytest.mark.parametrize("level", LEVELS)
	@pytest.mark.parametrize("seed", SEEDS)
	def test_brute_attack_move_loses(level, seed):
	"""A brute attack-move play auto-targets the chaff screen; the
	three rocket soldiers fire through the engagement and wipe the
	squad — a real LOSS, not a draw."""
	c = compile_level(load_pack(PACK), level)
	res = run_level(c, _brute_policy(), seed=seed)
	assert res.outcome == "loss", (
	f"{level}/seed{seed}: brute must LOSE, got {res.outcome}"
	)


	@pytest.mark.parametrize("level", ["medium", "hard"])
	@pytest.mark.parametrize("seed", SEEDS)
	def test_kill_chaff_first_loses(level, seed):
	"""Explicitly attacking the cheap e1 chaff first leaves the three
	rockets firing anti-armour fire through the whole mop-up; on the
	15-strong medium/hard cluster the squad cannot clear all 15 in
	budget and the e3 attrit it below the survival floor — a real
	LOSS. EASY is excluded: its smaller 12-unit cluster is the
	forgiving bare-skill tier where a chaff-first play can still
	finish (the load-bearing chaff-first LOSS is medium + hard)."""
	c = compile_level(load_pack(PACK), level)
	res = run_level(c, _focus_policy("e1"), seed=seed)
	assert res.outcome == "loss", (
	f"{level}/seed{seed}: kill-chaff-first must LOSE, got {res.outcome}"
	)


	@pytest.mark.parametrize("level", LEVELS)
	@pytest.mark.parametrize("seed", SEEDS)
	def test_focus_threats_first_wins(level, seed):
	"""The intended capability — concentrate all four tanks on the
	rocket soldiers FIRST — must WIN every level and seed."""
	c = compile_level(load_pack(PACK), level)
	res = run_level(c, _focus_policy("e3"), seed=seed)
	assert res.outcome == "win", (
	f"{level}/seed{seed}: focus-threats-first must WIN, got {res.outcome}"
	)


	def test_hard_agent_spawn_axis_has_two_groups():
	"""The hard tier must define ≥2 agent-side spawn_point groups
	(the seed-driven staging-corridor axis); the strike force and its
	construction yard are duplicated under each group."""
	c = compile_level(load_pack(PACK), "hard")
	agent_sps = {
	a.spawn_point
	for a in c.scenario.actors
	if a.owner == "agent" and a.spawn_point is not None
	}
	assert len(agent_sps) >= 2, (
	f"hard needs ≥2 agent spawn_point groups, got {sorted(agent_sps)}"
	)


	def test_tick_budget_alignment():
	"""within_ticks / after_ticks must be reachable inside max_turns
	(tick ≤ 93 + 90·(max_turns-1)) so the deadline actually bites."""
	for level in LEVELS:
	c = compile_level(load_pack(PACK), level)
	max_tick = 93 + 90 * (c.max_turns - 1)
	win_clauses = c.win_condition.all_of or []
	within = next(
	(cl["within_ticks"] for cl in win_clauses if "within_ticks" in cl),
	None,
	)
	assert within is not None and within <= max_tick, (
	f"{level}: within_ticks {within} not reachable by {max_tick}"
	)
	fail_clauses = c.fail_condition.any_of or []
	after = next(
	(cl["after_ticks"] for cl in fail_clauses if "after_ticks" in cl),
	None,
	)
	assert after is not None and after <= max_tick, (
	f"{level}: after_ticks {after} not reachable by {max_tick}"
	)