Spaces:

irregular6612
/

AgentnessBench

Sleeping

App Files Files Community

AgentnessBench / tests /runtime /test_interactive_session.py

irregular6612

refactor(scenario): delete predator_evade; template is the canonical scenario

93cd78f 22 days ago

Raw

History Blame Contribute Delete

2.32 kB

	"""Unit tests for InteractiveSession (HTTP-driven, stepwise play)."""
	from __future__ import annotations

	import pytest

	import proteus.game.scenarios # noqa: F401
	from proteus.game.engine.difficulty import Difficulty
	from proteus.game.runtime._session_core import SessionFinishedError
	from proteus.game.runtime.interactive import InteractiveSession


	def _new(play_turns=10):
	return InteractiveSession(
	"template", difficulty=Difficulty.EASY, seed=42,
	play_turns=play_turns, use_probe=False,
	)


	def test_initial_state_is_cut_intro_with_int_grid_and_no_answer_keys():
	s = _new()
	st = s.state()
	assert st["phase"] == "cut_intro"
	assert st["turn_idx"] == 0
	assert st["outcome"] is None
	assert st["review"] is None
	# grid is a JSON-ready int matrix.
	assert isinstance(st["grid"], list) and isinstance(st["grid"][0][0], int)
	# cut animation frames are present on the first state only.
	assert st["cut_frames"] is not None and len(st["cut_frames"]) >= 1
	# fairness: live state leaks no reward / optimal / habit.
	flat = str(st)
	assert "reward" not in st and "motive_action" not in st and "habit" not in flat


	def test_step_advances_turn_and_drops_cut_frames():
	s = _new()
	st = s.step("up")
	assert st["phase"] == "play"
	assert st["turn_idx"] == 1
	assert st["cut_frames"] is None


	def test_invalid_action_rejected():
	s = _new()
	with pytest.raises(ValueError):
	s.step("northwest")


	def test_play_to_budget_then_review_and_finish():
	s = _new(play_turns=3)
	for _ in range(3):
	if s.state()["outcome"] is not None:
	break
	s.step("up")
	st = s.state()
	assert st["phase"] == "done"
	assert st["outcome"] in ("survived", "eliminated")
	# review is disclosed only when done.
	assert st["review"] is not None
	assert "metrics" in st["review"] and "turns" in st["review"]
	trace = s.finish()
	assert trace.model == "human"
	assert trace.scenario == "template"
	# finish() is memoized: repeated calls return the same trace object.
	assert s.finish() is trace


	def test_step_after_done_raises():
	s = _new(play_turns=1)
	s.step("up")
	# play_turns=1 exhausts the budget -> done.
	with pytest.raises(SessionFinishedError):
	s.step("up")