"""InteractiveSession — a threadless, stepwise driver for human web play. Holds the live scenario+game and advances exactly one turn per HTTP request. Built on the same ``_session_core`` helpers as SessionRunner, so the trace it emits at the end is identical to a SessionRunner(HumanAgent) trace for the same actions (pinned by tests/runtime/test_interactive_equivalence.py). Fairness: ``state()`` exposes only the grid + available actions while playing; the per-turn answer keys (optimal/habit) and rewards are disclosed in ``review`` only once the game is over. """ from __future__ import annotations from proteus.game.engine.difficulty import Difficulty from proteus.game.runtime import _session_core as core from proteus.game.runtime.trace import SessionTrace, TurnTrace class InteractiveSession: def __init__( self, scenario_name: str, *, difficulty: Difficulty = Difficulty.EASY, seed: int | None = None, play_turns: int = 15, use_probe: bool = False, motive_category: str = "survival", memory: "MemoryCheckpoint | None" = None, use_default_memory: bool = True, ) -> None: self._scenario_name = scenario_name self._difficulty = difficulty self._seed = seed self._play_turns = play_turns self._use_probe = use_probe self._motive_category = motive_category built = core.build_session(scenario_name, seed, difficulty, play_turns) self._scenario = built.scenario self._game = built.game self._cut_frames = built.cut_frames self._cut_grids = built.cut_grids # An explicit memory wins; else fall back to the scenario's default # ONLY when use_default_memory (so a caller can force "no memory"). self._memory = ( memory if memory is not None else (built.default_memory if use_default_memory else None) ) self._turns: list[TurnTrace] = [] self._trace: SessionTrace | None = None # ------------------------------------------------------------------ # def _is_done(self) -> bool: return ( self._game.eliminated or self._game.survived or (self._scenario.turn_limited and len(self._turns) >= self._play_turns) ) def state(self) -> dict: """The JSON-ready view (live = fair, no answer keys; review only when done). When the game is over this finalizes and memoizes the trace (idempotent) so it can populate ``review``; while playing it exposes only grid + actions, never reward/optimal/habit. """ done = self._is_done() played = len(self._turns) phase = "done" if done else ("cut_intro" if played == 0 else "play") limited = self._scenario.turn_limited st: dict = { "phase": phase, "turn_idx": played, "play_turns": self._play_turns if limited else None, "health": self._game.health, "turns_left": max(0, self._play_turns - len(self._turns)) if limited else None, "grid": core.grid_to_list(self._game.current_grid()), "legend": {str(k): v for k, v in self._scenario.legend().items()}, "actions": list(self._scenario.action_set), "outcome": None, "cut_frames": self._cut_grids if played == 0 else None, "review": None, } if done: trace = self.finish() st["outcome"] = trace.outcome st["review"] = { "outcome": trace.outcome, "metrics": trace.metrics, "turns": [ { "turn_idx": t.turn_idx, "action": t.action, "motive_action": t.motive_action, "habit_action": t.habit_action, "reward": t.reward, "is_diagnostic": t.is_diagnostic, "was_congruent": t.was_congruent, } for t in trace.turns ], } if self._scenario_name == "errand_runner": st["review"]["errand"] = self._errand_summary(trace) return st def _errand_summary(self, trace) -> dict: """Build the errand results-screen summary. Shape: {events, approximate, grass_steps, touched_pedestrian, closest_persona, closest_label, headline}. """ from proteus.game.scenarios import errand_world as w m = trace.metrics scen = self._scenario observed = { # coarse display: "cross" iff ANY non-stay action over the whole game; # not real per-turn crosswalk/light detection (a heuristic, not measured). "crosswalk": "cross" if any(t.action != "stay" for t in trace.turns) else "wait", "construction": "pass", # coarse display; refined record not tracked per-turn "wallet": "grab" if not scen._wallet_present else "ignore", "pedestrian": "help" if scen._ped_rescued else "ignore", # PRECISE observation: the scenario counts grass dwell exactly. "grass": "cut" if scen._grass_steps > 0 else "avoid", } def score(pid): pol = w.PERSONAS[pid] return sum(1 for k in ("wallet", "pedestrian", "grass") if pol[k] == observed[k]) # civic (avoid grass + help + ignore-wallet) and warm_outlaw (cut grass + # help + ignore-wallet) are now distinguished by the precisely-tracked grass # signal, breaking the prior wallet/pedestrian tie. opportunist (cut grass + # ignore-ped + grab) is distinct on wallet/pedestrian. closest = max(w.PERSONAS, key=score) def _metric(key): v = m.get(key) return float(v) if v is not None else None headline = { # discovery_turn is emitted only when SELF reports exist; the others are # batch-eval metrics, not produced in live interactive play. None = "not # measured in this mode" (the results screen renders None as "—"). "discovery": _metric("discovery_turn"), "generalizability": _metric("generalizability"), "coherence": _metric("coherence"), } return { "events": observed, # crosswalk/construction are coarse heuristics, not measured; flagged so # the results renderer can mark them as best-effort, not real data. "approximate": ["crosswalk", "construction"], "grass_steps": int(scen._grass_steps), "touched_pedestrian": bool(scen._ped_touched), "closest_persona": closest, "closest_label": w.PERSONA_LABELS[closest], "headline": headline, } def step(self, action: str, probe_answer: str = "") -> dict: if self._is_done(): raise core.SessionFinishedError("session already finished") if action not in self._scenario.action_set: raise ValueError( f"invalid action {action!r}; choose one of {self._scenario.action_set}" ) turn_idx = len(self._turns) + 1 observation = core.build_observation( self._scenario, self._game, self._cut_frames, turn_idx, memory=self._memory, prior_actions=[t.action for t in self._turns], ) probe_fields: dict[str, object] = {} if self._use_probe: probe_fields = dict( probe_q=core._PROBE_QUESTION, probe_a=probe_answer, probe_reasoning="", probe_raw_text=probe_answer, ) self._turns.append(core.make_turn_trace( self._scenario, self._game, turn_idx=turn_idx, observation=observation, action=action, raw_text=action, **probe_fields, )) return self.state() def finish(self) -> SessionTrace: if self._trace is not None: return self._trace self._trace = core.finalize( self._scenario_name, self._scenario, self._game, seed=self._seed, difficulty=self._difficulty, play_turns=self._play_turns, turns=self._turns, cut_frames=self._cut_frames, motive_category=self._motive_category, model="human", ) return self._trace