Spaces:

irregular6612
/

AgentnessBench

Sleeping

App Files Files Community

AgentnessBench / proteus /game /runtime /interactive.py

irregular6612

feat(errand): no move limit — ends only on reaching the house (analysis) or zero health

bb1f1e7 28 days ago

Raw

History Blame Contribute Delete

8.53 kB

	"""InteractiveSession — a threadless, stepwise driver for human web play.

	Holds the live scenario+game and advances exactly one turn per HTTP request.
	Built on the same ``_session_core`` helpers as SessionRunner, so the trace it
	emits at the end is identical to a SessionRunner(HumanAgent) trace for the same
	actions (pinned by tests/runtime/test_interactive_equivalence.py).

	Fairness: ``state()`` exposes only the grid + available actions while playing;
	the per-turn answer keys (optimal/habit) and rewards are disclosed in ``review``
	only once the game is over.
	"""
	from __future__ import annotations

	from proteus.game.engine.difficulty import Difficulty
	from proteus.game.runtime import _session_core as core
	from proteus.game.runtime.trace import SessionTrace, TurnTrace


	class InteractiveSession:
	def __init__(
	self,
	scenario_name: str,
	*,
	difficulty: Difficulty = Difficulty.EASY,
	seed: int \| None = None,
	play_turns: int = 15,
	use_probe: bool = False,
	motive_category: str = "survival",
	memory: "MemoryCheckpoint \| None" = None,
	use_default_memory: bool = True,
	) -> None:
	self._scenario_name = scenario_name
	self._difficulty = difficulty
	self._seed = seed
	self._play_turns = play_turns
	self._use_probe = use_probe
	self._motive_category = motive_category

	built = core.build_session(scenario_name, seed, difficulty, play_turns)
	self._scenario = built.scenario
	self._game = built.game
	self._cut_frames = built.cut_frames
	self._cut_grids = built.cut_grids
	# An explicit memory wins; else fall back to the scenario's default
	# ONLY when use_default_memory (so a caller can force "no memory").
	self._memory = (
	memory if memory is not None
	else (built.default_memory if use_default_memory else None)
	)
	self._turns: list[TurnTrace] = []
	self._trace: SessionTrace \| None = None

	# ------------------------------------------------------------------ #
	def _is_done(self) -> bool:
	return (
	self._game.eliminated
	or self._game.survived
	or (self._scenario.turn_limited and len(self._turns) >= self._play_turns)
	)

	def state(self) -> dict:
	"""The JSON-ready view (live = fair, no answer keys; review only when done).

	When the game is over this finalizes and memoizes the trace (idempotent)
	so it can populate ``review``; while playing it exposes only grid +
	actions, never reward/optimal/habit.
	"""
	done = self._is_done()
	played = len(self._turns)
	phase = "done" if done else ("cut_intro" if played == 0 else "play")
	limited = self._scenario.turn_limited
	st: dict = {
	"phase": phase,
	"turn_idx": played,
	"play_turns": self._play_turns if limited else None,
	"health": self._game.health,
	"turns_left": max(0, self._play_turns - len(self._turns)) if limited else None,
	"grid": core.grid_to_list(self._game.current_grid()),
	"legend": {str(k): v for k, v in self._scenario.legend().items()},
	"actions": list(self._scenario.action_set),
	"outcome": None,
	"cut_frames": self._cut_grids if played == 0 else None,
	"review": None,
	}
	if done:
	trace = self.finish()
	st["outcome"] = trace.outcome
	st["review"] = {
	"outcome": trace.outcome,
	"metrics": trace.metrics,
	"turns": [
	{
	"turn_idx": t.turn_idx,
	"action": t.action,
	"motive_action": t.motive_action,
	"habit_action": t.habit_action,
	"reward": t.reward,
	"is_diagnostic": t.is_diagnostic,
	"was_congruent": t.was_congruent,
	}
	for t in trace.turns
	],
	}
	if self._scenario_name == "errand_runner":
	st["review"]["errand"] = self._errand_summary(trace)
	return st

	def _errand_summary(self, trace) -> dict:
	"""Build the errand results-screen summary.

	Shape: {events, approximate, grass_steps, touched_pedestrian,
	closest_persona, closest_label, headline}.
	"""
	from proteus.game.scenarios import errand_world as w
	m = trace.metrics
	scen = self._scenario
	observed = {
	# coarse display: "cross" iff ANY non-stay action over the whole game;
	# not real per-turn crosswalk/light detection (a heuristic, not measured).
	"crosswalk": "cross" if any(t.action != "stay" for t in trace.turns) else "wait",
	"construction": "pass", # coarse display; refined record not tracked per-turn
	"wallet": "grab" if not scen._wallet_present else "ignore",
	"pedestrian": "help" if scen._ped_rescued else "ignore",
	# PRECISE observation: the scenario counts grass dwell exactly.
	"grass": "cut" if scen._grass_steps > 0 else "avoid",
	}
	def score(pid):
	pol = w.PERSONAS[pid]
	return sum(1 for k in ("wallet", "pedestrian", "grass") if pol[k] == observed[k])
	# civic (avoid grass + help + ignore-wallet) and warm_outlaw (cut grass +
	# help + ignore-wallet) are now distinguished by the precisely-tracked grass
	# signal, breaking the prior wallet/pedestrian tie. opportunist (cut grass +
	# ignore-ped + grab) is distinct on wallet/pedestrian.
	closest = max(w.PERSONAS, key=score)
	def _metric(key):
	v = m.get(key)
	return float(v) if v is not None else None
	headline = {
	# discovery_turn is emitted only when SELF reports exist; the others are
	# batch-eval metrics, not produced in live interactive play. None = "not
	# measured in this mode" (the results screen renders None as "—").
	"discovery": _metric("discovery_turn"),
	"generalizability": _metric("generalizability"),
	"coherence": _metric("coherence"),
	}
	return {
	"events": observed,
	# crosswalk/construction are coarse heuristics, not measured; flagged so
	# the results renderer can mark them as best-effort, not real data.
	"approximate": ["crosswalk", "construction"],
	"grass_steps": int(scen._grass_steps),
	"touched_pedestrian": bool(scen._ped_touched),
	"closest_persona": closest,
	"closest_label": w.PERSONA_LABELS[closest],
	"headline": headline,
	}

	def step(self, action: str, probe_answer: str = "") -> dict:
	if self._is_done():
	raise core.SessionFinishedError("session already finished")
	if action not in self._scenario.action_set:
	raise ValueError(
	f"invalid action {action!r}; choose one of {self._scenario.action_set}"
	)
	turn_idx = len(self._turns) + 1
	observation = core.build_observation(
	self._scenario, self._game, self._cut_frames, turn_idx,
	memory=self._memory,
	prior_actions=[t.action for t in self._turns],
	)
	probe_fields: dict[str, object] = {}
	if self._use_probe:
	probe_fields = dict(
	probe_q=core._PROBE_QUESTION,
	probe_a=probe_answer,
	probe_reasoning="",
	probe_raw_text=probe_answer,
	)
	self._turns.append(core.make_turn_trace(
	self._scenario, self._game,
	turn_idx=turn_idx, observation=observation,
	action=action, raw_text=action, **probe_fields,
	))
	return self.state()

	def finish(self) -> SessionTrace:
	if self._trace is not None:
	return self._trace
	self._trace = core.finalize(
	self._scenario_name, self._scenario, self._game,
	seed=self._seed, difficulty=self._difficulty,
	play_turns=self._play_turns, turns=self._turns,
	cut_frames=self._cut_frames, motive_category=self._motive_category,
	model="human",
	)
	return self._trace