Spaces:
Sleeping
Sleeping
File size: 3,300 Bytes
f0cff65 426093b f0cff65 93cd78f f0cff65 93cd78f f0cff65 93cd78f f0cff65 19a19d8 426093b 19a19d8 93cd78f 19a19d8 93cd78f 19a19d8 93cd78f 19a19d8 93cd78f 19a19d8 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 | from proteus.cli import main
from proteus.game.runtime import read_traces
def test_run_with_persona_records_id_and_metrics(tmp_path):
out = tmp_path / "p.jsonl"
rc = main([
"run", "--scenario", "template", "--model", "fake:demo",
"--difficulty", "easy", "--seed", "42", "--play-turns", "3",
"--no-probe", "--no-gif", "--persona", "risk_averse", "--out", str(out),
])
assert rc == 0
trace = next(iter(read_traces(str(out))))
assert trace.persona_weight_id == "risk_averse"
assert "action_agreement" in trace.metrics
# the hidden weights never reach the serialized trace
assert "risk_cost" not in trace.model_dump_json()
def test_run_without_persona_has_no_persona_metrics(tmp_path):
out = tmp_path / "np.jsonl"
rc = main([
"run", "--scenario", "template", "--model", "fake:demo",
"--difficulty", "easy", "--seed", "42", "--play-turns", "3",
"--no-probe", "--no-gif", "--out", str(out),
])
assert rc == 0
trace = next(iter(read_traces(str(out))))
assert trace.persona_weight_id is None
assert "action_agreement" not in trace.metrics
def test_run_unknown_persona_errors(tmp_path):
out = tmp_path / "bad.jsonl"
rc = main([
"run", "--scenario", "template", "--model", "fake:demo",
"--difficulty", "easy", "--seed", "42", "--play-turns", "3",
"--no-probe", "--no-gif", "--persona", "nope", "--out", str(out),
])
assert rc == 2
def test_memory_with_persona_tags_checkpoint(tmp_path):
from proteus.game.runtime.memory import load_checkpoint
out = tmp_path / "mem.json"
rc = main([
"memory", "--scenario", "template", "--model", "fake:demo",
"--difficulty", "easy", "--seed", "42", "--memory-turns", "5",
"--persona", "risk_averse", "--out", str(out),
])
assert rc == 0
ck = load_checkpoint(str(out))
assert ck.persona_weight_id == "risk_averse"
assert "risk_cost" not in ck.model_dump_json()
def test_memory_unknown_persona_errors(tmp_path):
out = tmp_path / "bad.json"
rc = main([
"memory", "--scenario", "template", "--model", "fake:demo",
"--difficulty", "easy", "--seed", "42", "--memory-turns", "5",
"--persona", "nope", "--out", str(out),
])
assert rc == 2
def test_persona_memory_then_scored_run(tmp_path):
# Stage 4 acceptance: a persona demonstration memory + a scored run that
# measures whether the model continues that persona (same hidden weights).
mem = tmp_path / "demo.json"
rc = main([
"memory", "--scenario", "template", "--model", "fake:demo",
"--difficulty", "easy", "--seed", "42", "--memory-turns", "5",
"--persona", "risk_averse", "--out", str(mem),
])
assert rc == 0
out = tmp_path / "scored.jsonl"
rc = main([
"run", "--scenario", "template", "--model", "fake:demo",
"--difficulty", "easy", "--seed", "42", "--play-turns", "3",
"--no-probe", "--no-gif", "--memory", str(mem),
"--persona", "risk_averse", "--out", str(out),
])
assert rc == 0
trace = next(iter(read_traces(str(out))))
assert trace.persona_weight_id == "risk_averse"
assert trace.memory_ref == str(mem)
assert "action_agreement" in trace.metrics
|