from proteus.cli import main from proteus.game.runtime import read_traces def test_run_with_persona_records_id_and_metrics(tmp_path): out = tmp_path / "p.jsonl" rc = main([ "run", "--scenario", "template", "--model", "fake:demo", "--difficulty", "easy", "--seed", "42", "--play-turns", "3", "--no-probe", "--no-gif", "--persona", "risk_averse", "--out", str(out), ]) assert rc == 0 trace = next(iter(read_traces(str(out)))) assert trace.persona_weight_id == "risk_averse" assert "action_agreement" in trace.metrics # the hidden weights never reach the serialized trace assert "risk_cost" not in trace.model_dump_json() def test_run_without_persona_has_no_persona_metrics(tmp_path): out = tmp_path / "np.jsonl" rc = main([ "run", "--scenario", "template", "--model", "fake:demo", "--difficulty", "easy", "--seed", "42", "--play-turns", "3", "--no-probe", "--no-gif", "--out", str(out), ]) assert rc == 0 trace = next(iter(read_traces(str(out)))) assert trace.persona_weight_id is None assert "action_agreement" not in trace.metrics def test_run_unknown_persona_errors(tmp_path): out = tmp_path / "bad.jsonl" rc = main([ "run", "--scenario", "template", "--model", "fake:demo", "--difficulty", "easy", "--seed", "42", "--play-turns", "3", "--no-probe", "--no-gif", "--persona", "nope", "--out", str(out), ]) assert rc == 2 def test_memory_with_persona_tags_checkpoint(tmp_path): from proteus.game.runtime.memory import load_checkpoint out = tmp_path / "mem.json" rc = main([ "memory", "--scenario", "template", "--model", "fake:demo", "--difficulty", "easy", "--seed", "42", "--memory-turns", "5", "--persona", "risk_averse", "--out", str(out), ]) assert rc == 0 ck = load_checkpoint(str(out)) assert ck.persona_weight_id == "risk_averse" assert "risk_cost" not in ck.model_dump_json() def test_memory_unknown_persona_errors(tmp_path): out = tmp_path / "bad.json" rc = main([ "memory", "--scenario", "template", "--model", "fake:demo", "--difficulty", "easy", "--seed", "42", "--memory-turns", "5", "--persona", "nope", "--out", str(out), ]) assert rc == 2 def test_persona_memory_then_scored_run(tmp_path): # Stage 4 acceptance: a persona demonstration memory + a scored run that # measures whether the model continues that persona (same hidden weights). mem = tmp_path / "demo.json" rc = main([ "memory", "--scenario", "template", "--model", "fake:demo", "--difficulty", "easy", "--seed", "42", "--memory-turns", "5", "--persona", "risk_averse", "--out", str(mem), ]) assert rc == 0 out = tmp_path / "scored.jsonl" rc = main([ "run", "--scenario", "template", "--model", "fake:demo", "--difficulty", "easy", "--seed", "42", "--play-turns", "3", "--no-probe", "--no-gif", "--memory", str(mem), "--persona", "risk_averse", "--out", str(out), ]) assert rc == 0 trace = next(iter(read_traces(str(out)))) assert trace.persona_weight_id == "risk_averse" assert trace.memory_ref == str(mem) assert "action_agreement" in trace.metrics