Spaces:

irregular6612
/

AgentnessBench

Sleeping

App Files Files Community

AgentnessBench / tests /runtime /test_session.py

irregular6612

test(template): restore generic eliminated-outcome + blocked step_reward coverage

1debdd3 24 days ago

Raw

History Blame Contribute Delete

3.71 kB

	from proteus.providers import FakeProvider
	from proteus.game.agents import VanillaAgent
	from proteus.game.runtime.session import SessionRunner
	from proteus.game.runtime.trace import SessionTrace


	def _agent(responses):
	return VanillaAgent(FakeProvider(responses=responses))


	def test_optimal_player_survives_and_scores_full_motive_reading():
	# At the start the motive-congruent escape is "up" (open column away from
	# the far-east predator). An agent that always plays "up" stays congruent;
	# the runner scores each turn against the live optimal answer key.
	agent = _agent(["ACTION: up"]) # FakeProvider repeats the last response
	runner = SessionRunner(
	"template", agent, seed=42, play_turns=10, use_probe=False,
	)
	trace = runner.run()
	assert isinstance(trace, SessionTrace)
	assert trace.scenario == "template"
	assert trace.cut_frames # Cut history captured
	assert len(trace.turns) >= 1
	first = trace.turns[0]
	assert first.motive_action == "up"
	assert first.action == "up"
	assert first.was_congruent is True
	assert "motive_reading_accuracy" in trace.metrics


	def test_probe_recorded_when_enabled():
	agent = _agent(["the predator is to my east; I should go up\nACTION: up"])
	runner = SessionRunner(
	"template", agent, seed=42, play_turns=3, use_probe=True,
	)
	trace = runner.run()
	assert trace.turns[0].probe_q # a question was asked
	assert trace.turns[0].probe_a # an answer was recorded


	def test_session_is_deterministic_for_same_inputs():
	t1 = SessionRunner("template", _agent(["ACTION: up"]), seed=42,
	play_turns=5, use_probe=False).run()
	t2 = SessionRunner("template", _agent(["ACTION: up"]), seed=42,
	play_turns=5, use_probe=False).run()
	# Same scripted agent + same seed -> identical realized trajectory.
	assert [t.focal_pos for t in t1.turns] == [t.focal_pos for t in t2.turns]
	assert t1.metrics == t2.metrics


	def test_short_budget_yields_survived_outcome():
	# With a tiny budget the step count is exhausted (without capture) right
	# after the played turns, so the engine fires `survived`.
	agent = _agent(["ACTION: up"])
	trace = SessionRunner(
	"template", agent, seed=42, play_turns=1, use_probe=False,
	).run()
	assert trace.outcome == "survived"
	assert trace.turns[-1].reward == 50.0 # _REWARD_SURVIVED


	def test_eliminated_outcome_is_explicit_and_terminal():
	# Generic property: the engine can drive a focal into capture and the
	# outcome is the explicit, terminal "eliminated". On template the predator
	# waits far to the east, so an agent that always charges "right" walks into
	# it; the session must end on capture (before the budget is spent) and pay
	# the capture penalty.
	agent = _agent(["ACTION: right"])
	trace = SessionRunner(
	"template", agent, seed=0, play_turns=40, use_probe=False,
	).run()
	assert trace.outcome == "eliminated"
	# Terminal: the run stopped on elimination rather than exhausting the budget.
	assert len(trace.turns) < 40
	assert trace.turns[-1].reward == -50.0 # _REWARD_CAPTURED


	def test_cut_frames_count_matches_cut_length_plus_one():
	from proteus.game.engine.difficulty import Difficulty
	from proteus.game.scenarios.base import get_scenario

	agent = _agent(["ACTION: up"])
	trace = SessionRunner(
	"template", agent, seed=42, play_turns=5, use_probe=False,
	).run()
	# initial frame + one frame per Cut pre-roll step (self-derived, not hardcoded).
	expected = get_scenario("template")().cut_length(Difficulty.EASY) + 1
	assert len(trace.cut_frames) == expected