AgentnessBench / tests /cli /test_cli_memory.py
irregular6612's picture
refactor(scenario): delete predator_evade; template is the canonical scenario
93cd78f
Raw
History Blame Contribute Delete
2.27 kB
from proteus.cli import main
from proteus.game.runtime.memory import load_checkpoint
def test_memory_subcommand_writes_loadable_checkpoint(tmp_path):
out = tmp_path / "ck.json"
rc = main([
"memory", "--scenario", "template", "--model", "fake:demo",
"--difficulty", "easy", "--seed", "42", "--memory-turns", "4",
"--out", str(out),
])
assert rc == 0
ck = load_checkpoint(out)
assert ck.model == "demo"
assert ck.scenario == "template"
assert 1 <= len(ck.memory_turns) <= 4
def test_memory_subcommand_unknown_model_exits_2(tmp_path, capsys):
rc = main([
"memory", "--scenario", "template", "--model", "bogusprovider:x",
"--difficulty", "easy", "--seed", "42", "--out", str(tmp_path / "c.json"),
])
assert rc == 2
assert "bogusprovider" in capsys.readouterr().err
from proteus.game.runtime import read_traces
def test_run_memory_generate_sets_memory_ref(tmp_path):
out = tmp_path / "t.jsonl"
rc = main([
"run", "--scenario", "template", "--model", "fake:demo",
"--difficulty", "easy", "--seed", "42", "--play-turns", "3",
"--no-probe", "--memory", "generate", "--memory-turns", "4",
"--memory-root", str(tmp_path / "mem"), "--out", str(out),
])
assert rc == 0
traces = read_traces(out)
assert traces[0].memory_ref is not None
# the turn-1 observation carries the memory block
assert "MEMORY" in traces[0].turns[0].observation
def test_run_memory_latest_missing_exits_2(tmp_path, capsys):
out = tmp_path / "t.jsonl"
rc = main([
"run", "--scenario", "template", "--model", "fake:demo",
"--difficulty", "easy", "--seed", "42", "--play-turns", "3",
"--no-probe", "--memory", "latest",
"--memory-root", str(tmp_path / "mem"), "--out", str(out),
])
assert rc == 2
assert "memory" in capsys.readouterr().err.lower()
def test_run_memory_none_is_default(tmp_path):
out = tmp_path / "t.jsonl"
rc = main([
"run", "--scenario", "template", "--model", "fake:demo",
"--difficulty", "easy", "--seed", "42", "--play-turns", "3",
"--no-probe", "--out", str(out),
])
assert rc == 0
assert read_traces(out)[0].memory_ref is None