Spaces:
Sleeping
Sleeping
| """InteractiveSession — a threadless, stepwise driver for human web play. | |
| Holds the live scenario+game and advances exactly one turn per HTTP request. | |
| Built on the same ``_session_core`` helpers as SessionRunner, so the trace it | |
| emits at the end is identical to a SessionRunner(HumanAgent) trace for the same | |
| actions (pinned by tests/runtime/test_interactive_equivalence.py). | |
| Fairness: ``state()`` exposes only the grid + available actions while playing; | |
| the per-turn answer keys (optimal/habit) and rewards are disclosed in ``review`` | |
| only once the game is over. | |
| """ | |
| from __future__ import annotations | |
| from proteus.game.engine.difficulty import Difficulty | |
| from proteus.game.runtime import _session_core as core | |
| from proteus.game.runtime.trace import SessionTrace, TurnTrace | |
| class InteractiveSession: | |
| def __init__( | |
| self, | |
| scenario_name: str, | |
| *, | |
| difficulty: Difficulty = Difficulty.EASY, | |
| seed: int | None = None, | |
| play_turns: int = 15, | |
| use_probe: bool = False, | |
| motive_category: str = "survival", | |
| memory: "MemoryCheckpoint | None" = None, | |
| use_default_memory: bool = True, | |
| ) -> None: | |
| self._scenario_name = scenario_name | |
| self._difficulty = difficulty | |
| self._seed = seed | |
| self._play_turns = play_turns | |
| self._use_probe = use_probe | |
| self._motive_category = motive_category | |
| built = core.build_session(scenario_name, seed, difficulty, play_turns) | |
| self._scenario = built.scenario | |
| self._game = built.game | |
| self._cut_frames = built.cut_frames | |
| self._cut_grids = built.cut_grids | |
| # An explicit memory wins; else fall back to the scenario's default | |
| # ONLY when use_default_memory (so a caller can force "no memory"). | |
| self._memory = ( | |
| memory if memory is not None | |
| else (built.default_memory if use_default_memory else None) | |
| ) | |
| self._turns: list[TurnTrace] = [] | |
| self._trace: SessionTrace | None = None | |
| # ------------------------------------------------------------------ # | |
| def _is_done(self) -> bool: | |
| return ( | |
| self._game.eliminated | |
| or self._game.survived | |
| or (self._scenario.turn_limited and len(self._turns) >= self._play_turns) | |
| ) | |
| def state(self) -> dict: | |
| """The JSON-ready view (live = fair, no answer keys; review only when done). | |
| When the game is over this finalizes and memoizes the trace (idempotent) | |
| so it can populate ``review``; while playing it exposes only grid + | |
| actions, never reward/optimal/habit. | |
| """ | |
| done = self._is_done() | |
| played = len(self._turns) | |
| phase = "done" if done else ("cut_intro" if played == 0 else "play") | |
| limited = self._scenario.turn_limited | |
| st: dict = { | |
| "phase": phase, | |
| "turn_idx": played, | |
| "play_turns": self._play_turns if limited else None, | |
| "health": self._game.health, | |
| "turns_left": max(0, self._play_turns - len(self._turns)) if limited else None, | |
| "grid": core.grid_to_list(self._game.current_grid()), | |
| "legend": {str(k): v for k, v in self._scenario.legend().items()}, | |
| "actions": list(self._scenario.action_set), | |
| "outcome": None, | |
| "cut_frames": self._cut_grids if played == 0 else None, | |
| "review": None, | |
| } | |
| if done: | |
| trace = self.finish() | |
| st["outcome"] = trace.outcome | |
| st["review"] = { | |
| "outcome": trace.outcome, | |
| "metrics": trace.metrics, | |
| "turns": [ | |
| { | |
| "turn_idx": t.turn_idx, | |
| "action": t.action, | |
| "motive_action": t.motive_action, | |
| "habit_action": t.habit_action, | |
| "reward": t.reward, | |
| "is_diagnostic": t.is_diagnostic, | |
| "was_congruent": t.was_congruent, | |
| } | |
| for t in trace.turns | |
| ], | |
| } | |
| if self._scenario_name == "errand_runner": | |
| st["review"]["errand"] = self._errand_summary(trace) | |
| return st | |
| def _errand_summary(self, trace) -> dict: | |
| """Build the errand results-screen summary. | |
| Shape: {events, approximate, grass_steps, touched_pedestrian, | |
| closest_persona, closest_label, headline}. | |
| """ | |
| from proteus.game.scenarios import errand_world as w | |
| m = trace.metrics | |
| scen = self._scenario | |
| observed = { | |
| # coarse display: "cross" iff ANY non-stay action over the whole game; | |
| # not real per-turn crosswalk/light detection (a heuristic, not measured). | |
| "crosswalk": "cross" if any(t.action != "stay" for t in trace.turns) else "wait", | |
| "construction": "pass", # coarse display; refined record not tracked per-turn | |
| "wallet": "grab" if not scen._wallet_present else "ignore", | |
| "pedestrian": "help" if scen._ped_rescued else "ignore", | |
| # PRECISE observation: the scenario counts grass dwell exactly. | |
| "grass": "cut" if scen._grass_steps > 0 else "avoid", | |
| } | |
| def score(pid): | |
| pol = w.PERSONAS[pid] | |
| return sum(1 for k in ("wallet", "pedestrian", "grass") if pol[k] == observed[k]) | |
| # civic (avoid grass + help + ignore-wallet) and warm_outlaw (cut grass + | |
| # help + ignore-wallet) are now distinguished by the precisely-tracked grass | |
| # signal, breaking the prior wallet/pedestrian tie. opportunist (cut grass + | |
| # ignore-ped + grab) is distinct on wallet/pedestrian. | |
| closest = max(w.PERSONAS, key=score) | |
| def _metric(key): | |
| v = m.get(key) | |
| return float(v) if v is not None else None | |
| headline = { | |
| # discovery_turn is emitted only when SELF reports exist; the others are | |
| # batch-eval metrics, not produced in live interactive play. None = "not | |
| # measured in this mode" (the results screen renders None as "—"). | |
| "discovery": _metric("discovery_turn"), | |
| "generalizability": _metric("generalizability"), | |
| "coherence": _metric("coherence"), | |
| } | |
| return { | |
| "events": observed, | |
| # crosswalk/construction are coarse heuristics, not measured; flagged so | |
| # the results renderer can mark them as best-effort, not real data. | |
| "approximate": ["crosswalk", "construction"], | |
| "grass_steps": int(scen._grass_steps), | |
| "touched_pedestrian": bool(scen._ped_touched), | |
| "closest_persona": closest, | |
| "closest_label": w.PERSONA_LABELS[closest], | |
| "headline": headline, | |
| } | |
| def step(self, action: str, probe_answer: str = "") -> dict: | |
| if self._is_done(): | |
| raise core.SessionFinishedError("session already finished") | |
| if action not in self._scenario.action_set: | |
| raise ValueError( | |
| f"invalid action {action!r}; choose one of {self._scenario.action_set}" | |
| ) | |
| turn_idx = len(self._turns) + 1 | |
| observation = core.build_observation( | |
| self._scenario, self._game, self._cut_frames, turn_idx, | |
| memory=self._memory, | |
| prior_actions=[t.action for t in self._turns], | |
| ) | |
| probe_fields: dict[str, object] = {} | |
| if self._use_probe: | |
| probe_fields = dict( | |
| probe_q=core._PROBE_QUESTION, | |
| probe_a=probe_answer, | |
| probe_reasoning="", | |
| probe_raw_text=probe_answer, | |
| ) | |
| self._turns.append(core.make_turn_trace( | |
| self._scenario, self._game, | |
| turn_idx=turn_idx, observation=observation, | |
| action=action, raw_text=action, **probe_fields, | |
| )) | |
| return self.state() | |
| def finish(self) -> SessionTrace: | |
| if self._trace is not None: | |
| return self._trace | |
| self._trace = core.finalize( | |
| self._scenario_name, self._scenario, self._game, | |
| seed=self._seed, difficulty=self._difficulty, | |
| play_turns=self._play_turns, turns=self._turns, | |
| cut_frames=self._cut_frames, motive_category=self._motive_category, | |
| model="human", | |
| ) | |
| return self._trace | |