Spaces:

qpluslab
/

OpenRA-Bench

Running

App Files Files Community

OpenRA-Bench / openra_bench /human_study.py

yxc20098

Paper-experiment prep: human-study harness + pass^k + paper plan

07dfe2e about 1 month ago

Raw

History Blame Contribute Delete

4.61 kB

	"""Human-baseline study — the apples-to-apple human reference.

	External tournament replays cannot be scored on this benchmark: a real
	RTS game is a different engine, continuous-time, a different action
	space, and not one of the constructed tasks. The ONLY comparable human
	data comes through the Play tab — same engine, scenario, observation,
	tool API, win predicate, and `Playback` format the models use.

	This module defines a fixed, stratified 24-pack subset (one pack per
	scenario family, difficulty spread 8 easy / 8 medium / 8 hard) and the
	three conditions a player plays each under:

	* `vision-fog` — the canonical fogged minimap (normal play).
	* `vision-clear` — no fog (engine `reveal_map`): the fog-penalty pair.
	* `handoff-bad` — the player inherits a losing position (a `stall`
	prefix of `HANDOFF_K` turns), then plays on — the
	recovery / freeze comparison.

	A per-player counterbalanced playlist walks all 24 x 3 = 72 cells.
	"""

	from __future__ import annotations

	import random
	from typing import Any

	# Frozen subset — one representative pack per scenario family, levels
	# spread across difficulty. Stratified by family (not by the lopsided
	# `meta.capability` tag). Keep this list STABLE — it pins the baseline.
	STUDY_SUBSET: list[tuple[str, str]] = [
	("combat-pincer-coordination", "easy"),
	("econ-overflow-to-silos", "medium"),
	("def-pre-position-mobile-reserve", "hard"),
	("build-rally-point-management", "easy"),
	("scout-detect-incoming-army", "medium"),
	("lh-opening-to-defense-to-counter", "hard"),
	("proc-only-defend-no-attack", "easy"),
	("mfb-supply-line-link-between-bases", "medium"),
	("rob-objective-shift-with-or-clause", "hard"),
	("coord-mutual-support", "easy"),
	("tp-rush-objective-very-fast", "medium"),
	("mcv-deploy-relocate-under-pressure", "hard"),
	("artofwar-lure-the-tiger", "easy"),
	("economy-harvest-timebox", "medium"),
	("perception-frontier-reading", "hard"),
	("strategy-trilemma", "easy"),
	("tech-production-planning", "medium"),
	("expansion-balanced-2-base-defended", "hard"),
	("mid-economy-under-fire", "easy"),
	("strict-sequence", "medium"),
	("action-sequenced-execution", "hard"),
	("adv-rps-counter-pick", "easy"),
	("coordination-staggered-window", "medium"),
	("maint-sell-and-recoup-cash", "hard"),
	]

	STUDY_CONDITIONS: tuple[str, ...] = ("vision-fog", "vision-clear", "handoff-bad")

	# Prefix length for the `handoff-bad` condition — the player inherits a
	# game already `HANDOFF_K` observe-only (stall) turns deep.
	HANDOFF_K = 3

	# fog_mode each condition compiles the scenario under.
	_CONDITION_FOG = {
	"vision-fog": "vision",
	"vision-clear": "vision-clear",
	"handoff-bad": "vision",
	}


	def study_playlist(player_seed: int = 0) -> list[tuple[str, str, str]]:
	"""The 72-cell playlist — every (pack, level) x condition — in a
	per-player counterbalanced (deterministically shuffled) order, so
	condition ordering is not confounded across players."""
	cells = [
	(pack, level, cond)
	for pack, level in STUDY_SUBSET
	for cond in STUDY_CONDITIONS
	]
	random.Random(player_seed).shuffle(cells)
	return cells


	def open_study_session(
	pack: str,
	level: str,
	condition: str,
	player: str,
	seed: int = 1,
	playback_root: Any = None,
	):
	"""Open an `InteractiveSession` for one study cell, configured for
	`condition`. For `handoff-bad` the engine is advanced `HANDOFF_K`
	observe-only turns BEFORE the player takes over — so the player
	inherits a real deficit, exactly like the model handoff ablation.

	The run persists to the standard `Playback` format (apples-to-apple
	with model runs); `playback_root` defaults to a per-condition dir
	so the condition is recoverable from the path."""
	from pathlib import Path

	from .human_labeling import InteractiveSession

	if condition not in STUDY_CONDITIONS:
	raise ValueError(f"unknown study condition {condition!r}")
	if playback_root is None:
	playback_root = Path("playback/human_study") / condition

	sess = InteractiveSession.from_pack(
	pack, level, seed,
	record=True, playback_root=playback_root, player=player,
	fog_mode=_CONDITION_FOG[condition],
	)
	if condition == "handoff-bad":
	# Stall prefix — the player inherits the resulting losing board.
	for _ in range(HANDOFF_K):
	if sess.done:
	break
	sess.submit_turn([])
	return sess