Spaces:
Running
Running
| """Human-baseline study โ the apples-to-apple human reference. | |
| External tournament replays cannot be scored on this benchmark: a real | |
| RTS game is a different engine, continuous-time, a different action | |
| space, and not one of the constructed tasks. The ONLY comparable human | |
| data comes through the Play tab โ same engine, scenario, observation, | |
| tool API, win predicate, and `Playback` format the models use. | |
| This module defines a fixed, stratified 24-pack subset (one pack per | |
| scenario family, difficulty spread 8 easy / 8 medium / 8 hard) and the | |
| three conditions a player plays each under: | |
| * `vision-fog` โ the canonical fogged minimap (normal play). | |
| * `vision-clear` โ no fog (engine `reveal_map`): the fog-penalty pair. | |
| * `handoff-bad` โ the player inherits a losing position (a `stall` | |
| prefix of `HANDOFF_K` turns), then plays on โ the | |
| recovery / freeze comparison. | |
| A per-player counterbalanced playlist walks all 24 x 3 = 72 cells. | |
| """ | |
| from __future__ import annotations | |
| import random | |
| from typing import Any | |
| # Frozen subset โ one representative pack per scenario family, levels | |
| # spread across difficulty. Stratified by family (not by the lopsided | |
| # `meta.capability` tag). Keep this list STABLE โ it pins the baseline. | |
| STUDY_SUBSET: list[tuple[str, str]] = [ | |
| ("combat-pincer-coordination", "easy"), | |
| ("econ-overflow-to-silos", "medium"), | |
| ("def-pre-position-mobile-reserve", "hard"), | |
| ("build-rally-point-management", "easy"), | |
| ("scout-detect-incoming-army", "medium"), | |
| ("lh-opening-to-defense-to-counter", "hard"), | |
| ("proc-only-defend-no-attack", "easy"), | |
| ("mfb-supply-line-link-between-bases", "medium"), | |
| ("rob-objective-shift-with-or-clause", "hard"), | |
| ("coord-mutual-support", "easy"), | |
| ("tp-rush-objective-very-fast", "medium"), | |
| ("mcv-deploy-relocate-under-pressure", "hard"), | |
| ("artofwar-lure-the-tiger", "easy"), | |
| ("economy-harvest-timebox", "medium"), | |
| ("perception-frontier-reading", "hard"), | |
| ("strategy-trilemma", "easy"), | |
| ("tech-production-planning", "medium"), | |
| ("expansion-balanced-2-base-defended", "hard"), | |
| ("mid-economy-under-fire", "easy"), | |
| ("strict-sequence", "medium"), | |
| ("action-sequenced-execution", "hard"), | |
| ("adv-rps-counter-pick", "easy"), | |
| ("coordination-staggered-window", "medium"), | |
| ("maint-sell-and-recoup-cash", "hard"), | |
| ] | |
| STUDY_CONDITIONS: tuple[str, ...] = ("vision-fog", "vision-clear", "handoff-bad") | |
| # Prefix length for the `handoff-bad` condition โ the player inherits a | |
| # game already `HANDOFF_K` observe-only (stall) turns deep. | |
| HANDOFF_K = 3 | |
| # fog_mode each condition compiles the scenario under. | |
| _CONDITION_FOG = { | |
| "vision-fog": "vision", | |
| "vision-clear": "vision-clear", | |
| "handoff-bad": "vision", | |
| } | |
| def study_playlist(player_seed: int = 0) -> list[tuple[str, str, str]]: | |
| """The 72-cell playlist โ every (pack, level) x condition โ in a | |
| per-player counterbalanced (deterministically shuffled) order, so | |
| condition ordering is not confounded across players.""" | |
| cells = [ | |
| (pack, level, cond) | |
| for pack, level in STUDY_SUBSET | |
| for cond in STUDY_CONDITIONS | |
| ] | |
| random.Random(player_seed).shuffle(cells) | |
| return cells | |
| def open_study_session( | |
| pack: str, | |
| level: str, | |
| condition: str, | |
| player: str, | |
| seed: int = 1, | |
| playback_root: Any = None, | |
| ): | |
| """Open an `InteractiveSession` for one study cell, configured for | |
| `condition`. For `handoff-bad` the engine is advanced `HANDOFF_K` | |
| observe-only turns BEFORE the player takes over โ so the player | |
| inherits a real deficit, exactly like the model handoff ablation. | |
| The run persists to the standard `Playback` format (apples-to-apple | |
| with model runs); `playback_root` defaults to a per-condition dir | |
| so the condition is recoverable from the path.""" | |
| from pathlib import Path | |
| from .human_labeling import InteractiveSession | |
| if condition not in STUDY_CONDITIONS: | |
| raise ValueError(f"unknown study condition {condition!r}") | |
| if playback_root is None: | |
| playback_root = Path("playback/human_study") / condition | |
| sess = InteractiveSession.from_pack( | |
| pack, level, seed, | |
| record=True, playback_root=playback_root, player=player, | |
| fog_mode=_CONDITION_FOG[condition], | |
| ) | |
| if condition == "handoff-bad": | |
| # Stall prefix โ the player inherits the resulting losing board. | |
| for _ in range(HANDOFF_K): | |
| if sess.done: | |
| break | |
| sess.submit_turn([]) | |
| return sess | |