AgentnessBench / proteus /game /runtime /interactive.py
irregular6612's picture
feat(errand): no move limit — ends only on reaching the house (analysis) or zero health
bb1f1e7
Raw
History Blame Contribute Delete
8.53 kB
"""InteractiveSession — a threadless, stepwise driver for human web play.
Holds the live scenario+game and advances exactly one turn per HTTP request.
Built on the same ``_session_core`` helpers as SessionRunner, so the trace it
emits at the end is identical to a SessionRunner(HumanAgent) trace for the same
actions (pinned by tests/runtime/test_interactive_equivalence.py).
Fairness: ``state()`` exposes only the grid + available actions while playing;
the per-turn answer keys (optimal/habit) and rewards are disclosed in ``review``
only once the game is over.
"""
from __future__ import annotations
from proteus.game.engine.difficulty import Difficulty
from proteus.game.runtime import _session_core as core
from proteus.game.runtime.trace import SessionTrace, TurnTrace
class InteractiveSession:
def __init__(
self,
scenario_name: str,
*,
difficulty: Difficulty = Difficulty.EASY,
seed: int | None = None,
play_turns: int = 15,
use_probe: bool = False,
motive_category: str = "survival",
memory: "MemoryCheckpoint | None" = None,
use_default_memory: bool = True,
) -> None:
self._scenario_name = scenario_name
self._difficulty = difficulty
self._seed = seed
self._play_turns = play_turns
self._use_probe = use_probe
self._motive_category = motive_category
built = core.build_session(scenario_name, seed, difficulty, play_turns)
self._scenario = built.scenario
self._game = built.game
self._cut_frames = built.cut_frames
self._cut_grids = built.cut_grids
# An explicit memory wins; else fall back to the scenario's default
# ONLY when use_default_memory (so a caller can force "no memory").
self._memory = (
memory if memory is not None
else (built.default_memory if use_default_memory else None)
)
self._turns: list[TurnTrace] = []
self._trace: SessionTrace | None = None
# ------------------------------------------------------------------ #
def _is_done(self) -> bool:
return (
self._game.eliminated
or self._game.survived
or (self._scenario.turn_limited and len(self._turns) >= self._play_turns)
)
def state(self) -> dict:
"""The JSON-ready view (live = fair, no answer keys; review only when done).
When the game is over this finalizes and memoizes the trace (idempotent)
so it can populate ``review``; while playing it exposes only grid +
actions, never reward/optimal/habit.
"""
done = self._is_done()
played = len(self._turns)
phase = "done" if done else ("cut_intro" if played == 0 else "play")
limited = self._scenario.turn_limited
st: dict = {
"phase": phase,
"turn_idx": played,
"play_turns": self._play_turns if limited else None,
"health": self._game.health,
"turns_left": max(0, self._play_turns - len(self._turns)) if limited else None,
"grid": core.grid_to_list(self._game.current_grid()),
"legend": {str(k): v for k, v in self._scenario.legend().items()},
"actions": list(self._scenario.action_set),
"outcome": None,
"cut_frames": self._cut_grids if played == 0 else None,
"review": None,
}
if done:
trace = self.finish()
st["outcome"] = trace.outcome
st["review"] = {
"outcome": trace.outcome,
"metrics": trace.metrics,
"turns": [
{
"turn_idx": t.turn_idx,
"action": t.action,
"motive_action": t.motive_action,
"habit_action": t.habit_action,
"reward": t.reward,
"is_diagnostic": t.is_diagnostic,
"was_congruent": t.was_congruent,
}
for t in trace.turns
],
}
if self._scenario_name == "errand_runner":
st["review"]["errand"] = self._errand_summary(trace)
return st
def _errand_summary(self, trace) -> dict:
"""Build the errand results-screen summary.
Shape: {events, approximate, grass_steps, touched_pedestrian,
closest_persona, closest_label, headline}.
"""
from proteus.game.scenarios import errand_world as w
m = trace.metrics
scen = self._scenario
observed = {
# coarse display: "cross" iff ANY non-stay action over the whole game;
# not real per-turn crosswalk/light detection (a heuristic, not measured).
"crosswalk": "cross" if any(t.action != "stay" for t in trace.turns) else "wait",
"construction": "pass", # coarse display; refined record not tracked per-turn
"wallet": "grab" if not scen._wallet_present else "ignore",
"pedestrian": "help" if scen._ped_rescued else "ignore",
# PRECISE observation: the scenario counts grass dwell exactly.
"grass": "cut" if scen._grass_steps > 0 else "avoid",
}
def score(pid):
pol = w.PERSONAS[pid]
return sum(1 for k in ("wallet", "pedestrian", "grass") if pol[k] == observed[k])
# civic (avoid grass + help + ignore-wallet) and warm_outlaw (cut grass +
# help + ignore-wallet) are now distinguished by the precisely-tracked grass
# signal, breaking the prior wallet/pedestrian tie. opportunist (cut grass +
# ignore-ped + grab) is distinct on wallet/pedestrian.
closest = max(w.PERSONAS, key=score)
def _metric(key):
v = m.get(key)
return float(v) if v is not None else None
headline = {
# discovery_turn is emitted only when SELF reports exist; the others are
# batch-eval metrics, not produced in live interactive play. None = "not
# measured in this mode" (the results screen renders None as "—").
"discovery": _metric("discovery_turn"),
"generalizability": _metric("generalizability"),
"coherence": _metric("coherence"),
}
return {
"events": observed,
# crosswalk/construction are coarse heuristics, not measured; flagged so
# the results renderer can mark them as best-effort, not real data.
"approximate": ["crosswalk", "construction"],
"grass_steps": int(scen._grass_steps),
"touched_pedestrian": bool(scen._ped_touched),
"closest_persona": closest,
"closest_label": w.PERSONA_LABELS[closest],
"headline": headline,
}
def step(self, action: str, probe_answer: str = "") -> dict:
if self._is_done():
raise core.SessionFinishedError("session already finished")
if action not in self._scenario.action_set:
raise ValueError(
f"invalid action {action!r}; choose one of {self._scenario.action_set}"
)
turn_idx = len(self._turns) + 1
observation = core.build_observation(
self._scenario, self._game, self._cut_frames, turn_idx,
memory=self._memory,
prior_actions=[t.action for t in self._turns],
)
probe_fields: dict[str, object] = {}
if self._use_probe:
probe_fields = dict(
probe_q=core._PROBE_QUESTION,
probe_a=probe_answer,
probe_reasoning="",
probe_raw_text=probe_answer,
)
self._turns.append(core.make_turn_trace(
self._scenario, self._game,
turn_idx=turn_idx, observation=observation,
action=action, raw_text=action, **probe_fields,
))
return self.state()
def finish(self) -> SessionTrace:
if self._trace is not None:
return self._trace
self._trace = core.finalize(
self._scenario_name, self._scenario, self._game,
seed=self._seed, difficulty=self._difficulty,
play_turns=self._play_turns, turns=self._turns,
cut_frames=self._cut_frames, motive_category=self._motive_category,
model="human",
)
return self._trace