Spaces:
Sleeping
Sleeping
| from proteus.cli import main | |
| from proteus.game.runtime import read_traces | |
| def test_run_with_persona_records_id_and_metrics(tmp_path): | |
| out = tmp_path / "p.jsonl" | |
| rc = main([ | |
| "run", "--scenario", "template", "--model", "fake:demo", | |
| "--difficulty", "easy", "--seed", "42", "--play-turns", "3", | |
| "--no-probe", "--no-gif", "--persona", "risk_averse", "--out", str(out), | |
| ]) | |
| assert rc == 0 | |
| trace = next(iter(read_traces(str(out)))) | |
| assert trace.persona_weight_id == "risk_averse" | |
| assert "action_agreement" in trace.metrics | |
| # the hidden weights never reach the serialized trace | |
| assert "risk_cost" not in trace.model_dump_json() | |
| def test_run_without_persona_has_no_persona_metrics(tmp_path): | |
| out = tmp_path / "np.jsonl" | |
| rc = main([ | |
| "run", "--scenario", "template", "--model", "fake:demo", | |
| "--difficulty", "easy", "--seed", "42", "--play-turns", "3", | |
| "--no-probe", "--no-gif", "--out", str(out), | |
| ]) | |
| assert rc == 0 | |
| trace = next(iter(read_traces(str(out)))) | |
| assert trace.persona_weight_id is None | |
| assert "action_agreement" not in trace.metrics | |
| def test_run_unknown_persona_errors(tmp_path): | |
| out = tmp_path / "bad.jsonl" | |
| rc = main([ | |
| "run", "--scenario", "template", "--model", "fake:demo", | |
| "--difficulty", "easy", "--seed", "42", "--play-turns", "3", | |
| "--no-probe", "--no-gif", "--persona", "nope", "--out", str(out), | |
| ]) | |
| assert rc == 2 | |
| def test_memory_with_persona_tags_checkpoint(tmp_path): | |
| from proteus.game.runtime.memory import load_checkpoint | |
| out = tmp_path / "mem.json" | |
| rc = main([ | |
| "memory", "--scenario", "template", "--model", "fake:demo", | |
| "--difficulty", "easy", "--seed", "42", "--memory-turns", "5", | |
| "--persona", "risk_averse", "--out", str(out), | |
| ]) | |
| assert rc == 0 | |
| ck = load_checkpoint(str(out)) | |
| assert ck.persona_weight_id == "risk_averse" | |
| assert "risk_cost" not in ck.model_dump_json() | |
| def test_memory_unknown_persona_errors(tmp_path): | |
| out = tmp_path / "bad.json" | |
| rc = main([ | |
| "memory", "--scenario", "template", "--model", "fake:demo", | |
| "--difficulty", "easy", "--seed", "42", "--memory-turns", "5", | |
| "--persona", "nope", "--out", str(out), | |
| ]) | |
| assert rc == 2 | |
| def test_persona_memory_then_scored_run(tmp_path): | |
| # Stage 4 acceptance: a persona demonstration memory + a scored run that | |
| # measures whether the model continues that persona (same hidden weights). | |
| mem = tmp_path / "demo.json" | |
| rc = main([ | |
| "memory", "--scenario", "template", "--model", "fake:demo", | |
| "--difficulty", "easy", "--seed", "42", "--memory-turns", "5", | |
| "--persona", "risk_averse", "--out", str(mem), | |
| ]) | |
| assert rc == 0 | |
| out = tmp_path / "scored.jsonl" | |
| rc = main([ | |
| "run", "--scenario", "template", "--model", "fake:demo", | |
| "--difficulty", "easy", "--seed", "42", "--play-turns", "3", | |
| "--no-probe", "--no-gif", "--memory", str(mem), | |
| "--persona", "risk_averse", "--out", str(out), | |
| ]) | |
| assert rc == 0 | |
| trace = next(iter(read_traces(str(out)))) | |
| assert trace.persona_weight_id == "risk_averse" | |
| assert trace.memory_ref == str(mem) | |
| assert "action_agreement" in trace.metrics | |