Spaces:

irregular6612
/

AgentnessBench

Sleeping

App Files Files Community

AgentnessBench / proteus /game /runtime /_session_core.py

irregular6612

feat(discovery): source available actions from scenario.action_set (interact reaches the agent)

11cd1de 30 days ago

Raw

History Blame Contribute Delete

13.4 kB

	"""Shared session-core helpers.

	The per-session logic (build + Cut replay, one-turn TurnTrace construction,
	finalize -> scored SessionTrace) lives here so both SessionRunner (agent-driven,
	batch) and InteractiveSession (HTTP-driven, stepwise) produce identical traces
	from the same actions. This is a pure extraction of what used to be inline in
	SessionRunner -- no behavior change (pinned by the existing suite). The one
	intentional deviation: ``finalize`` raises ``SessionNotFinishedError`` where the
	original used a bare ``assert`` (which vanishes under ``python -O``); the
	terminal condition checked is identical.
	"""
	from __future__ import annotations

	import random
	from dataclasses import dataclass

	import numpy as np

	from proteus.game.engine.ascii_view import frame_to_ascii, legend_text
	from proteus.game.engine.difficulty import Difficulty
	from proteus.game.engine.grid import MotiveGridGame
	from proteus.game.scenarios.base import Scenario, get_scenario
	from proteus.game.runtime.memory import MemoryCheckpoint, render_memory_block
	from proteus.game.metrics.metrics import compute_metrics
	from proteus.game.metrics.persona import (
	PersonaWeights,
	pressure as persona_pressure,
	reference_actions,
	reward_rw,
	)
	from proteus.game.metrics.rollout import optimal_rollout
	from proteus.game.runtime.trace import SessionTrace, TurnTrace
	from proteus.game.agents.parsing import extract_self_belief

	_ACTIONS = ["up", "down", "left", "right", "stay"]
	_DIRECTIONS = {"up", "down", "left", "right"}
	# Predator-evasion-scoped for the current slice; spec defers scenario generalization.
	# A future multi-scenario runner should source this from the Scenario.
	_PROBE_QUESTION = (
	"Where is the predator, and which direction should you move to stay safe?"
	)
	_HANDOVER_FRAMING = (
	"\n\nYou have been watching the game so far. From now on YOU control the "
	"focal agent. Continue playing to survive."
	)


	class SessionFinishedError(RuntimeError):
	"""Raised when stepping a session that has already reached a terminal state."""


	class SessionNotFinishedError(RuntimeError):
	"""Raised when finalizing a session that is not yet terminal / budget-done."""


	@dataclass
	class BuiltSession:
	"""The product of build_session: the live scenario+game and the captured Cut.

	cut_frames are ASCII (for the trace + the agent observation); cut_grids are
	the same frames as integer palette grids (for the web color animation).
	"""

	scenario: Scenario
	game: MotiveGridGame
	cut_frames: list[str]
	cut_grids: list[list[list[int]]]
	default_memory: MemoryCheckpoint \| None = None


	def grid_to_list(grid: np.ndarray) -> list[list[int]]:
	"""A (h, w) palette array -> JSON-serializable list[list[int]]."""
	return [[int(v) for v in row] for row in grid]


	def render_ascii(scenario: Scenario, game: MotiveGridGame) -> str:
	"""Render the live grid for the trace/observation via the scenario's hook."""
	return scenario.render_frame(game)


	def build_session(
	scenario_name: str,
	seed: int \| None,
	difficulty: Difficulty,
	play_turns: int,
	) -> BuiltSession:
	"""Build the scenario+game and replay the scripted Cut pre-roll.

	Behaviour-identical to SessionRunner._build_and_replay_cut, plus it also
	captures the per-frame integer palette grids for the web color animation.
	"""
	scenario = get_scenario(scenario_name)()
	rng = random.Random(seed)
	cut_length = scenario.cut_length(difficulty)
	game = MotiveGridGame(
	scenario, rng, difficulty, max_steps=cut_length + play_turns,
	)
	cut_frames = [render_ascii(scenario, game)]
	cut_grids = [grid_to_list(game.current_grid())]
	for _ in range(cut_length):
	action = scenario.cut_focal_policy(game)
	game.apply_motive_action(action)
	scenario.record_focal_move(action)
	cut_frames.append(render_ascii(scenario, game))
	cut_grids.append(grid_to_list(game.current_grid()))
	# The Cut pre-roll must not end the game; if it does, the scenario's
	# cut_focal_policy is buggy and any resulting trace would be corrupt.
	if game.eliminated or game.survived:
	raise RuntimeError(
	f"Game terminated during Cut replay of '{scenario_name}'. "
	"cut_focal_policy must not trigger elimination or survival."
	)
	default_memory = scenario.default_memory(seed, difficulty)
	return BuiltSession(scenario, game, cut_frames, cut_grids, default_memory)


	def build_observation(
	scenario: Scenario,
	game: MotiveGridGame,
	cut_frames: list[str],
	turn_idx: int,
	memory: MemoryCheckpoint \| None = None,
	prior_actions: list[str] \| None = None,
	) -> str:
	"""The self-contained, auto-regressive observation the agent sees this turn.

	Each turn the agent is called statelessly, so the observation must carry the
	full context the model needs to continue its OWN trajectory:

	* the handover ``memory`` (the prior episode / persona demonstration), shown
	EVERY turn so the model never loses it after turn 1;
	* the scripted ``cut_frames`` pre-roll (the lead-up before it took control);
	* ``prior_actions`` — the moves the model has already committed THIS run, so
	it plays auto-regressively (it can see and maintain its own line of play);
	* the current grid (``"Now:"``).

	``turn_idx`` is retained for the call signature; at turn 1 there are no
	prior_actions and the current grid is the handover state, so the observation
	matches the historical turn-1 layout.
	"""
	legend = scenario.legend()
	parts: list[str] = []
	if memory is not None:
	parts.append(render_memory_block(memory))
	parts.append("NOW — this run so far:")
	if cut_frames:
	for i, frame in enumerate(cut_frames[:-1], start=1):
	parts.append(f"Cut {i}:")
	parts.append(frame)
	if prior_actions:
	parts.append(
	"Your moves so far this run (most recent last): "
	+ ", ".join(prior_actions)
	)
	parts.append("Now:")
	parts.append(render_ascii(scenario, game))
	parts.append(legend_text(legend))
	parts.append(f"Available actions: [{', '.join(scenario.action_set)}]")
	return "\n".join(parts)


	def apply_action(scenario: Scenario, game: MotiveGridGame, action: str) -> bool:
	"""Apply the action; return True if a directional move was blocked.

	Behaviour-identical to SessionRunner._apply.
	"""
	focal = game.focal_sprite
	pre = (focal.x, focal.y) if focal else None
	game.apply_motive_action(action)
	scenario.record_focal_move(action)
	moved = game.focal_sprite
	post = (moved.x, moved.y) if moved else None
	return action in _DIRECTIONS and post == pre


	def make_turn_trace(
	scenario: Scenario,
	game: MotiveGridGame,
	*,
	turn_idx: int,
	observation: str,
	action: str,
	reasoning: str = "",
	raw_text: str = "",
	input_tokens: int = 0,
	output_tokens: int = 0,
	thinking_tokens: int = 0,
	probe_q: str = "",
	probe_a: str = "",
	probe_reasoning: str = "",
	probe_raw_text: str = "",
	probe_input_tokens: int = 0,
	probe_output_tokens: int = 0,
	probe_thinking_tokens: int = 0,
	persona: PersonaWeights \| None = None,
	) -> TurnTrace:
	"""Compute pre-move answer keys + positions, apply, score, build a TurnTrace.

	Behaviour-identical to the SessionRunner play-loop body: pre-move answer
	keys/positions are read BEFORE applying the action, the move is applied,
	then step_reward is scored against the pre-move positions. When a CP8
	persona is supplied, the reference action set / reference reward / pressure
	are read pre-move and the model's own R_w + regret post-move (the model
	never sees the weights — only the public reference set + scalars are stored).
	"""
	# Pre-move answer keys + positions.
	optimal = scenario.optimal_action(game)
	habit = scenario.habit_action(game)
	focal = game.focal_sprite
	predator = game.predator_sprite
	focal_pos = (focal.x, focal.y) if focal else (-1, -1)
	predator_pos = (predator.x, predator.y) if predator else (-1, -1)
	pre_bfs = scenario.safety_distance(game)
	# CP8 persona: reference action set + reference reward + pressure are read
	# from the PRE-move state (the model never sees the weights).
	ref_acts = ref_reward = turn_pressure = None
	if persona is not None:
	ref_acts = reference_actions(persona, scenario, game)
	ref_reward = reward_rw(
	persona, scenario, game, focal_pos, predator_pos, ref_acts[0],
	)
	turn_pressure = persona_pressure(scenario, game)

	blocked = apply_action(scenario, game, action)
	reward = scenario.step_reward(
	game, action, blocked,
	focal_before=focal_pos, predator_before=predator_pos,
	)
	# CP8: post-move positions + pre/post BFS + chase-corrected delta.
	post_focal = game.focal_sprite
	post_predator = game.predator_sprite
	post_focal_pos = (post_focal.x, post_focal.y) if post_focal else None
	post_predator_pos = (
	(post_predator.x, post_predator.y) if post_predator else None
	)
	post_bfs = scenario.safety_distance(game)
	agent_distance_delta = scenario.agent_distance_delta(
	game, focal_pos, predator_pos
	)
	# CP8 persona: the model's own R_w (BFS geometry is static, so the pre-move
	# positions + actual blocked status fully determine it) and its regret.
	model_reward = reward_regret = None
	if persona is not None:
	model_reward = reward_rw(
	persona, scenario, game, focal_pos, predator_pos, action,
	blocked=blocked,
	)
	reward_regret = ref_reward - model_reward
	# Find-your-body discovery: parse the optional SELF: report from raw_text
	# and score it against the scenario's (hidden) true body index. No-op for
	# non-discovery scenarios (discovery_candidates() == 0).
	n_candidates = scenario.discovery_candidates()
	self_belief = extract_self_belief(raw_text, n_candidates) if n_candidates else None
	true_index = scenario.discovery_true_index()
	self_correct = (
	(self_belief == true_index)
	if (self_belief is not None and true_index is not None)
	else None
	)
	return TurnTrace(
	turn_idx=turn_idx,
	observation=observation,
	probe_q=probe_q,
	probe_a=probe_a,
	probe_reasoning=probe_reasoning,
	probe_raw_text=probe_raw_text,
	probe_input_tokens=probe_input_tokens,
	probe_output_tokens=probe_output_tokens,
	probe_thinking_tokens=probe_thinking_tokens,
	reasoning=reasoning,
	raw_text=raw_text,
	action=action,
	motive_action=optimal,
	habit_action=habit,
	is_diagnostic=(optimal != habit),
	was_congruent=(action == optimal),
	reward=reward,
	focal_pos=focal_pos,
	predator_pos=predator_pos,
	input_tokens=input_tokens,
	output_tokens=output_tokens,
	thinking_tokens=thinking_tokens,
	post_focal_pos=post_focal_pos,
	post_predator_pos=post_predator_pos,
	pre_bfs_distance=pre_bfs,
	post_bfs_distance=post_bfs,
	agent_distance_delta=agent_distance_delta,
	reference_actions=ref_acts,
	reference_reward=ref_reward,
	model_reward=model_reward,
	reward_regret=reward_regret,
	pressure=turn_pressure,
	self_belief=self_belief,
	self_correct=self_correct,
	)


	def finalize(
	scenario_name: str,
	scenario: Scenario,
	game: MotiveGridGame,
	*,
	seed: int \| None,
	difficulty: Difficulty,
	play_turns: int,
	turns: list[TurnTrace],
	cut_frames: list[str],
	motive_category: str,
	model: str,
	memory_ref: str \| None = None,
	persona: PersonaWeights \| None = None,
	) -> SessionTrace:
	"""Score the played turns and assemble the SessionTrace.

	Behaviour-identical to the SessionRunner finalize block: requires a terminal
	state or budget exhaustion, then scores against the optimal rollout. The CP7
	``memory_ref`` and CP8 ``persona`` (its public id) are recorded on the trace.
	"""
	if not (game.eliminated or game.survived or len(turns) == play_turns):
	raise SessionNotFinishedError(
	"finalize called before a terminal state or budget exhaustion."
	)
	outcome = "eliminated" if game.eliminated else "survived"
	rollout = optimal_rollout(scenario_name, seed, difficulty, len(turns))
	realized_final_safety = scenario.safety_distance(game)
	metrics = compute_metrics(
	turns,
	played_turns=len(turns),
	play_turns=play_turns,
	outcome=outcome,
	optimal_focal_positions=rollout.focal_positions,
	realized_final_safety=realized_final_safety,
	optimal_final_safety=rollout.final_safety_distance,
	max_bfs_distance=scenario.max_bfs_distance(game),
	)
	return SessionTrace(
	scenario=scenario_name,
	motive_category=motive_category,
	seed=seed,
	difficulty=difficulty.value,
	model=model,
	cut_frames=list(cut_frames),
	turns=turns,
	outcome=outcome,
	metrics=metrics,
	memory_ref=memory_ref,
	horizon=play_turns,
	persona_weight_id=(
	persona.persona_weight_id if persona else None
	),
	)