OpenRA-Bench / openra_bench /human_study.py
yxc20098's picture
Paper-experiment prep: human-study harness + pass^k + paper plan
07dfe2e
Raw
History Blame Contribute Delete
4.61 kB
"""Human-baseline study โ€” the apples-to-apple human reference.
External tournament replays cannot be scored on this benchmark: a real
RTS game is a different engine, continuous-time, a different action
space, and not one of the constructed tasks. The ONLY comparable human
data comes through the Play tab โ€” same engine, scenario, observation,
tool API, win predicate, and `Playback` format the models use.
This module defines a fixed, stratified 24-pack subset (one pack per
scenario family, difficulty spread 8 easy / 8 medium / 8 hard) and the
three conditions a player plays each under:
* `vision-fog` โ€” the canonical fogged minimap (normal play).
* `vision-clear` โ€” no fog (engine `reveal_map`): the fog-penalty pair.
* `handoff-bad` โ€” the player inherits a losing position (a `stall`
prefix of `HANDOFF_K` turns), then plays on โ€” the
recovery / freeze comparison.
A per-player counterbalanced playlist walks all 24 x 3 = 72 cells.
"""
from __future__ import annotations
import random
from typing import Any
# Frozen subset โ€” one representative pack per scenario family, levels
# spread across difficulty. Stratified by family (not by the lopsided
# `meta.capability` tag). Keep this list STABLE โ€” it pins the baseline.
STUDY_SUBSET: list[tuple[str, str]] = [
("combat-pincer-coordination", "easy"),
("econ-overflow-to-silos", "medium"),
("def-pre-position-mobile-reserve", "hard"),
("build-rally-point-management", "easy"),
("scout-detect-incoming-army", "medium"),
("lh-opening-to-defense-to-counter", "hard"),
("proc-only-defend-no-attack", "easy"),
("mfb-supply-line-link-between-bases", "medium"),
("rob-objective-shift-with-or-clause", "hard"),
("coord-mutual-support", "easy"),
("tp-rush-objective-very-fast", "medium"),
("mcv-deploy-relocate-under-pressure", "hard"),
("artofwar-lure-the-tiger", "easy"),
("economy-harvest-timebox", "medium"),
("perception-frontier-reading", "hard"),
("strategy-trilemma", "easy"),
("tech-production-planning", "medium"),
("expansion-balanced-2-base-defended", "hard"),
("mid-economy-under-fire", "easy"),
("strict-sequence", "medium"),
("action-sequenced-execution", "hard"),
("adv-rps-counter-pick", "easy"),
("coordination-staggered-window", "medium"),
("maint-sell-and-recoup-cash", "hard"),
]
STUDY_CONDITIONS: tuple[str, ...] = ("vision-fog", "vision-clear", "handoff-bad")
# Prefix length for the `handoff-bad` condition โ€” the player inherits a
# game already `HANDOFF_K` observe-only (stall) turns deep.
HANDOFF_K = 3
# fog_mode each condition compiles the scenario under.
_CONDITION_FOG = {
"vision-fog": "vision",
"vision-clear": "vision-clear",
"handoff-bad": "vision",
}
def study_playlist(player_seed: int = 0) -> list[tuple[str, str, str]]:
"""The 72-cell playlist โ€” every (pack, level) x condition โ€” in a
per-player counterbalanced (deterministically shuffled) order, so
condition ordering is not confounded across players."""
cells = [
(pack, level, cond)
for pack, level in STUDY_SUBSET
for cond in STUDY_CONDITIONS
]
random.Random(player_seed).shuffle(cells)
return cells
def open_study_session(
pack: str,
level: str,
condition: str,
player: str,
seed: int = 1,
playback_root: Any = None,
):
"""Open an `InteractiveSession` for one study cell, configured for
`condition`. For `handoff-bad` the engine is advanced `HANDOFF_K`
observe-only turns BEFORE the player takes over โ€” so the player
inherits a real deficit, exactly like the model handoff ablation.
The run persists to the standard `Playback` format (apples-to-apple
with model runs); `playback_root` defaults to a per-condition dir
so the condition is recoverable from the path."""
from pathlib import Path
from .human_labeling import InteractiveSession
if condition not in STUDY_CONDITIONS:
raise ValueError(f"unknown study condition {condition!r}")
if playback_root is None:
playback_root = Path("playback/human_study") / condition
sess = InteractiveSession.from_pack(
pack, level, seed,
record=True, playback_root=playback_root, player=player,
fog_mode=_CONDITION_FOG[condition],
)
if condition == "handoff-bad":
# Stall prefix โ€” the player inherits the resulting losing board.
for _ in range(HANDOFF_K):
if sess.done:
break
sess.submit_turn([])
return sess