AgentnessBench / tests /runtime /test_interactive_session.py
irregular6612's picture
refactor(scenario): delete predator_evade; template is the canonical scenario
93cd78f
Raw
History Blame Contribute Delete
2.32 kB
"""Unit tests for InteractiveSession (HTTP-driven, stepwise play)."""
from __future__ import annotations
import pytest
import proteus.game.scenarios # noqa: F401
from proteus.game.engine.difficulty import Difficulty
from proteus.game.runtime._session_core import SessionFinishedError
from proteus.game.runtime.interactive import InteractiveSession
def _new(play_turns=10):
return InteractiveSession(
"template", difficulty=Difficulty.EASY, seed=42,
play_turns=play_turns, use_probe=False,
)
def test_initial_state_is_cut_intro_with_int_grid_and_no_answer_keys():
s = _new()
st = s.state()
assert st["phase"] == "cut_intro"
assert st["turn_idx"] == 0
assert st["outcome"] is None
assert st["review"] is None
# grid is a JSON-ready int matrix.
assert isinstance(st["grid"], list) and isinstance(st["grid"][0][0], int)
# cut animation frames are present on the first state only.
assert st["cut_frames"] is not None and len(st["cut_frames"]) >= 1
# fairness: live state leaks no reward / optimal / habit.
flat = str(st)
assert "reward" not in st and "motive_action" not in st and "habit" not in flat
def test_step_advances_turn_and_drops_cut_frames():
s = _new()
st = s.step("up")
assert st["phase"] == "play"
assert st["turn_idx"] == 1
assert st["cut_frames"] is None
def test_invalid_action_rejected():
s = _new()
with pytest.raises(ValueError):
s.step("northwest")
def test_play_to_budget_then_review_and_finish():
s = _new(play_turns=3)
for _ in range(3):
if s.state()["outcome"] is not None:
break
s.step("up")
st = s.state()
assert st["phase"] == "done"
assert st["outcome"] in ("survived", "eliminated")
# review is disclosed only when done.
assert st["review"] is not None
assert "metrics" in st["review"] and "turns" in st["review"]
trace = s.finish()
assert trace.model == "human"
assert trace.scenario == "template"
# finish() is memoized: repeated calls return the same trace object.
assert s.finish() is trace
def test_step_after_done_raises():
s = _new(play_turns=1)
s.step("up")
# play_turns=1 exhausts the budget -> done.
with pytest.raises(SessionFinishedError):
s.step("up")