AgentnessBench / tests /runtime /test_session.py
irregular6612's picture
test(template): restore generic eliminated-outcome + blocked step_reward coverage
1debdd3
Raw
History Blame Contribute Delete
3.71 kB
from proteus.providers import FakeProvider
from proteus.game.agents import VanillaAgent
from proteus.game.runtime.session import SessionRunner
from proteus.game.runtime.trace import SessionTrace
def _agent(responses):
return VanillaAgent(FakeProvider(responses=responses))
def test_optimal_player_survives_and_scores_full_motive_reading():
# At the start the motive-congruent escape is "up" (open column away from
# the far-east predator). An agent that always plays "up" stays congruent;
# the runner scores each turn against the live optimal answer key.
agent = _agent(["ACTION: up"]) # FakeProvider repeats the last response
runner = SessionRunner(
"template", agent, seed=42, play_turns=10, use_probe=False,
)
trace = runner.run()
assert isinstance(trace, SessionTrace)
assert trace.scenario == "template"
assert trace.cut_frames # Cut history captured
assert len(trace.turns) >= 1
first = trace.turns[0]
assert first.motive_action == "up"
assert first.action == "up"
assert first.was_congruent is True
assert "motive_reading_accuracy" in trace.metrics
def test_probe_recorded_when_enabled():
agent = _agent(["the predator is to my east; I should go up\nACTION: up"])
runner = SessionRunner(
"template", agent, seed=42, play_turns=3, use_probe=True,
)
trace = runner.run()
assert trace.turns[0].probe_q # a question was asked
assert trace.turns[0].probe_a # an answer was recorded
def test_session_is_deterministic_for_same_inputs():
t1 = SessionRunner("template", _agent(["ACTION: up"]), seed=42,
play_turns=5, use_probe=False).run()
t2 = SessionRunner("template", _agent(["ACTION: up"]), seed=42,
play_turns=5, use_probe=False).run()
# Same scripted agent + same seed -> identical realized trajectory.
assert [t.focal_pos for t in t1.turns] == [t.focal_pos for t in t2.turns]
assert t1.metrics == t2.metrics
def test_short_budget_yields_survived_outcome():
# With a tiny budget the step count is exhausted (without capture) right
# after the played turns, so the engine fires `survived`.
agent = _agent(["ACTION: up"])
trace = SessionRunner(
"template", agent, seed=42, play_turns=1, use_probe=False,
).run()
assert trace.outcome == "survived"
assert trace.turns[-1].reward == 50.0 # _REWARD_SURVIVED
def test_eliminated_outcome_is_explicit_and_terminal():
# Generic property: the engine can drive a focal into capture and the
# outcome is the explicit, terminal "eliminated". On template the predator
# waits far to the east, so an agent that always charges "right" walks into
# it; the session must end on capture (before the budget is spent) and pay
# the capture penalty.
agent = _agent(["ACTION: right"])
trace = SessionRunner(
"template", agent, seed=0, play_turns=40, use_probe=False,
).run()
assert trace.outcome == "eliminated"
# Terminal: the run stopped on elimination rather than exhausting the budget.
assert len(trace.turns) < 40
assert trace.turns[-1].reward == -50.0 # _REWARD_CAPTURED
def test_cut_frames_count_matches_cut_length_plus_one():
from proteus.game.engine.difficulty import Difficulty
from proteus.game.scenarios.base import get_scenario
agent = _agent(["ACTION: up"])
trace = SessionRunner(
"template", agent, seed=42, play_turns=5, use_probe=False,
).run()
# initial frame + one frame per Cut pre-roll step (self-derived, not hardcoded).
expected = get_scenario("template")().cut_length(Difficulty.EASY) + 1
assert len(trace.cut_frames) == expected