"""Human-baseline study — the apples-to-apple human reference. External tournament replays cannot be scored on this benchmark: a real RTS game is a different engine, continuous-time, a different action space, and not one of the constructed tasks. The ONLY comparable human data comes through the Play tab — same engine, scenario, observation, tool API, win predicate, and `Playback` format the models use. This module defines a fixed, stratified 24-pack subset (one pack per scenario family, difficulty spread 8 easy / 8 medium / 8 hard) and the three conditions a player plays each under: * `vision-fog` — the canonical fogged minimap (normal play). * `vision-clear` — no fog (engine `reveal_map`): the fog-penalty pair. * `handoff-bad` — the player inherits a losing position (a `stall` prefix of `HANDOFF_K` turns), then plays on — the recovery / freeze comparison. A per-player counterbalanced playlist walks all 24 x 3 = 72 cells. """ from __future__ import annotations import random from typing import Any # Frozen subset — one representative pack per scenario family, levels # spread across difficulty. Stratified by family (not by the lopsided # `meta.capability` tag). Keep this list STABLE — it pins the baseline. STUDY_SUBSET: list[tuple[str, str]] = [ ("combat-pincer-coordination", "easy"), ("econ-overflow-to-silos", "medium"), ("def-pre-position-mobile-reserve", "hard"), ("build-rally-point-management", "easy"), ("scout-detect-incoming-army", "medium"), ("lh-opening-to-defense-to-counter", "hard"), ("proc-only-defend-no-attack", "easy"), ("mfb-supply-line-link-between-bases", "medium"), ("rob-objective-shift-with-or-clause", "hard"), ("coord-mutual-support", "easy"), ("tp-rush-objective-very-fast", "medium"), ("mcv-deploy-relocate-under-pressure", "hard"), ("artofwar-lure-the-tiger", "easy"), ("economy-harvest-timebox", "medium"), ("perception-frontier-reading", "hard"), ("strategy-trilemma", "easy"), ("tech-production-planning", "medium"), ("expansion-balanced-2-base-defended", "hard"), ("mid-economy-under-fire", "easy"), ("strict-sequence", "medium"), ("action-sequenced-execution", "hard"), ("adv-rps-counter-pick", "easy"), ("coordination-staggered-window", "medium"), ("maint-sell-and-recoup-cash", "hard"), ] STUDY_CONDITIONS: tuple[str, ...] = ("vision-fog", "vision-clear", "handoff-bad") # Prefix length for the `handoff-bad` condition — the player inherits a # game already `HANDOFF_K` observe-only (stall) turns deep. HANDOFF_K = 3 # fog_mode each condition compiles the scenario under. _CONDITION_FOG = { "vision-fog": "vision", "vision-clear": "vision-clear", "handoff-bad": "vision", } def study_playlist(player_seed: int = 0) -> list[tuple[str, str, str]]: """The 72-cell playlist — every (pack, level) x condition — in a per-player counterbalanced (deterministically shuffled) order, so condition ordering is not confounded across players.""" cells = [ (pack, level, cond) for pack, level in STUDY_SUBSET for cond in STUDY_CONDITIONS ] random.Random(player_seed).shuffle(cells) return cells def open_study_session( pack: str, level: str, condition: str, player: str, seed: int = 1, playback_root: Any = None, ): """Open an `InteractiveSession` for one study cell, configured for `condition`. For `handoff-bad` the engine is advanced `HANDOFF_K` observe-only turns BEFORE the player takes over — so the player inherits a real deficit, exactly like the model handoff ablation. The run persists to the standard `Playback` format (apples-to-apple with model runs); `playback_root` defaults to a per-condition dir so the condition is recoverable from the path.""" from pathlib import Path from .human_labeling import InteractiveSession if condition not in STUDY_CONDITIONS: raise ValueError(f"unknown study condition {condition!r}") if playback_root is None: playback_root = Path("playback/human_study") / condition sess = InteractiveSession.from_pack( pack, level, seed, record=True, playback_root=playback_root, player=player, fog_mode=_CONDITION_FOG[condition], ) if condition == "handoff-bad": # Stall prefix — the player inherits the resulting losing board. for _ in range(HANDOFF_K): if sess.done: break sess.submit_turn([]) return sess