Spaces:

irregular6612
/

AgentnessBench

Sleeping

App Files Files Community

AgentnessBench / tests /runtime /test_errand_discovery.py

irregular6612

feat(errand): no move limit — ends only on reaching the house (analysis) or zero health

bb1f1e7 about 1 month ago

Raw

History Blame Contribute Delete

6.88 kB

	# tests/runtime/test_errand_discovery.py
	"""Find-your-body discovery: trace fields, SELF parsing in make_turn_trace,
	available-action sourcing, and the Discovery metric."""
	import random

	from proteus.game.engine.difficulty import Difficulty
	from proteus.game.engine.grid import MotiveGridGame
	from proteus.game.scenarios.base import get_scenario
	import proteus.game.scenarios # noqa: F401
	from proteus.game.runtime.trace import TurnTrace


	def _errand():
	scen = get_scenario("errand_runner")()
	game = MotiveGridGame(scen, random.Random(7), Difficulty.EASY, max_steps=80)
	return scen, game


	def test_turntrace_discovery_fields_default_none():
	t = TurnTrace(turn_idx=1, observation="o", action="stay", motive_action="stay",
	habit_action="stay", is_diagnostic=False, was_congruent=True,
	reward=0.0, focal_pos=(0, 0), predator_pos=(0, 0))
	assert t.self_belief is None and t.self_correct is None


	def test_errand_exposes_discovery_hooks():
	scen, _ = _errand()
	assert scen.discovery_candidates() == 3
	assert scen.discovery_true_index() == scen.true_body_index


	def test_template_has_no_discovery():
	scen = get_scenario("template")()
	assert scen.discovery_candidates() == 0
	assert scen.discovery_true_index() is None


	from proteus.game.runtime import _session_core as core


	def test_make_turn_trace_parses_correct_self_report():
	scen, game = _errand()
	true = scen.true_body_index
	tt = core.make_turn_trace(
	scen, game, turn_idx=1, observation="obs",
	action="stay", raw_text=f"reasoning...\nSELF: {true}\nACTION: stay",
	)
	assert tt.self_belief == true
	assert tt.self_correct is True


	def test_make_turn_trace_marks_wrong_self_report():
	scen, game = _errand()
	wrong = (scen.true_body_index + 1) % 3
	tt = core.make_turn_trace(
	scen, game, turn_idx=1, observation="obs",
	action="stay", raw_text=f"SELF: {wrong}\nACTION: stay",
	)
	assert tt.self_belief == wrong and tt.self_correct is False


	def test_make_turn_trace_no_self_report_is_none():
	scen, game = _errand()
	tt = core.make_turn_trace(scen, game, turn_idx=1, observation="o",
	action="stay", raw_text="ACTION: stay")
	assert tt.self_belief is None and tt.self_correct is None


	def test_observation_lists_scenario_action_set():
	scen, game = _errand()
	obs = core.build_observation(scen, game, cut_frames=[], turn_idx=1)
	assert "interact" in obs # errand_runner exposes interact in Available actions
	# template stays at the 5 movement actions (regression)
	t = get_scenario("template")()
	tgame = MotiveGridGame(t, random.Random(0), Difficulty.EASY, max_steps=10)
	tobs = core.build_observation(t, tgame, cut_frames=[], turn_idx=1)
	assert "interact" not in tobs


	def test_interactive_accepts_interact_for_errand():
	from proteus.game.runtime.interactive import InteractiveSession
	s = InteractiveSession("errand_runner", seed=7, play_turns=5, use_default_memory=False)
	assert "interact" in s.state()["actions"]
	s.step("interact") # must not raise "invalid action"


	def test_full_session_runs_and_emits_discovery_metric():
	"""End-to-end SessionRunner with a fake provider that always reports the
	correct body index; confirms discovery_turn and discovery_identified are
	emitted on the resulting SessionTrace.metrics."""
	from proteus.providers.fake import FakeProvider # deterministic, no network
	from proteus.game.agents.vanilla import VanillaAgent
	from proteus.game.runtime.session import SessionRunner

	# Confirm the true_body_index for seed 7 (standalone and runner use same RNG).
	scen = get_scenario("errand_runner")()
	MotiveGridGame(scen, random.Random(7), Difficulty.EASY, max_steps=10)
	true = scen.true_body_index # == 1 for seed 7

	# FakeProvider.responses repeats last entry once exhausted, so a single
	# response is sufficient for any play_turns count.
	provider = FakeProvider(responses=[f"SELF: {true}\nACTION: stay"])
	agent = VanillaAgent(provider)
	runner = SessionRunner(
	"errand_runner", agent, seed=7, play_turns=4,
	use_probe=False, motive_category="errand",
	)
	trace = runner.run()

	assert trace.scenario == "errand_runner"
	assert "discovery_turn" in trace.metrics, (
	f"discovery_turn missing from metrics: {trace.metrics}"
	)
	# Every turn reported the correct body index -> identified at turn 1.
	assert trace.metrics["discovery_identified"] == 100.0, (
	f"Expected 100.0 but got {trace.metrics['discovery_identified']}"
	)


	def test_errand_has_no_move_limit():
	# With no turn limit, exhausting play_turns does NOT end the errand session.
	from proteus.game.runtime.interactive import InteractiveSession
	s = InteractiveSession("errand_runner", seed=7, play_turns=2, use_default_memory=False)
	for _ in range(6): # well past play_turns=2
	s.step("stay")
	st = s.state()
	assert st["phase"] != "done" and st["review"] is None # still playing, no curfew
	assert st["turns_left"] is None and st["play_turns"] is None # unlimited -> no countdown


	def test_reaching_house_triggers_analysis():
	# The analysis (review) appears when the focal reaches the house goal.
	from proteus.game.runtime.interactive import InteractiveSession
	from proteus.game.scenarios import errand_world as w
	s = InteractiveSession("errand_runner", seed=7, play_turns=2, use_default_memory=False)
	s._game.focal_sprite.set_position(*w.home_target_anchor(w.GAME_LAYOUT))
	s.step("stay") # engine sees check_success -> win -> done
	st = s.state()
	assert st["outcome"] == "survived" and st["review"] is not None
	assert "errand" in st["review"]


	def test_review_has_errand_summary_when_done():
	from proteus.game.runtime.interactive import InteractiveSession
	from proteus.game.scenarios import errand_world as w
	s = InteractiveSession("errand_runner", seed=7, play_turns=2, use_default_memory=False)
	s._game.focal_sprite.set_position(*w.home_target_anchor(w.GAME_LAYOUT))
	s.step("stay") # reach the house -> done
	rv = s.state()["review"]
	assert rv is not None and "errand" in rv
	es = rv["errand"]
	assert set(es["events"]) == {"crosswalk", "construction", "wallet", "pedestrian", "grass"}
	assert es["events"]["grass"] in {"cut", "avoid"}
	assert isinstance(es["grass_steps"], int)
	assert isinstance(es["touched_pedestrian"], bool)
	assert "grass" not in es["approximate"] # grass is precisely tracked, not approximate
	assert es["closest_persona"] in {"civic", "warm_outlaw", "opportunist"}
	assert {"discovery", "generalizability", "coherence"} <= set(es["headline"])