Spaces:

irregular6612
/

AgentnessBench

Sleeping

App Files Files Community

AgentnessBench / tests /runtime /test_template_memory.py

irregular6612

refactor(scenario): rename pack_evade -> template

d4716c0 24 days ago

Raw

History Blame Contribute Delete

2.38 kB

	"""template's handover memory is generated by a HIDDEN PERSONA weight vector
	(a reference-policy self-play episode), not hand-written — per the agentness
	design (docs/agentness_game_design_from_paper.md §3-4). The raw weights never
	leak; only the public persona_weight_id + the behaviour trajectory are stored."""
	from __future__ import annotations

	import proteus.game.scenarios # noqa: F401
	from proteus.game.agents.human import HumanAgent
	from proteus.game.engine.difficulty import Difficulty
	from proteus.game.scenarios.base import get_scenario
	from proteus.game.runtime.memory import MemoryCheckpoint
	from proteus.game.runtime.session import SessionRunner


	def test_default_memory_is_persona_generated():
	scenario = get_scenario("template")()
	ckpt = scenario.default_memory(42, Difficulty.EASY)
	assert isinstance(ckpt, MemoryCheckpoint)
	assert ckpt.scenario == "template"
	# Hidden persona weight vector -> only the public id is recorded.
	assert ckpt.persona_weight_id == "risk_averse"
	assert ckpt.model == "persona:risk_averse"
	# The memory is the reference policy's actual played trajectory.
	assert len(ckpt.memory_turns) >= 1
	# The raw weights must NOT leak into the participant-visible brief.
	assert "risk_cost" not in ckpt.transparent_prompt
	# Compact frames: prose coords + a wall-rectangle list, never the
	# 4096-char ASCII map of the 64x64 field.
	assert all(len(mt.frame_ascii) < 600 for mt in ckpt.memory_turns)
	# EASY has walls, so the per-turn observation names them.
	assert all("Walls (blocked rectangles" in mt.frame_ascii for mt in ckpt.memory_turns)


	def test_default_memory_is_deterministic():
	scenario = get_scenario("template")()
	a = scenario.default_memory(42, Difficulty.EASY)
	b = scenario.default_memory(42, Difficulty.EASY)
	assert a.model_dump() == b.model_dump()


	def test_template_session_shows_persona_memory_by_default():
	def feed():
	seq = iter(["stay"] * 5)
	return lambda _p: next(seq)

	agent = HumanAgent(input_fn=feed(), output_fn=lambda _t: None)
	runner = SessionRunner(
	"template", agent, difficulty=Difficulty.EASY, seed=42,
	play_turns=3, use_probe=False,
	)
	trace = runner.run()
	# The turn-1 observation carries the (persona-generated) memory block.
	assert "MEMORY" in trace.turns[0].observation