File size: 3,300 Bytes
f0cff65
426093b
f0cff65
 
 
 
 
93cd78f
f0cff65
 
 
 
 
 
 
 
 
 
 
 
 
 
93cd78f
f0cff65
 
 
 
 
 
 
 
 
 
 
 
93cd78f
f0cff65
 
 
 
19a19d8
 
 
426093b
19a19d8
 
93cd78f
19a19d8
 
 
 
 
 
 
 
 
 
 
 
93cd78f
19a19d8
 
 
 
 
 
 
 
 
 
 
93cd78f
19a19d8
 
 
 
 
 
 
93cd78f
19a19d8
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
from proteus.cli import main
from proteus.game.runtime import read_traces


def test_run_with_persona_records_id_and_metrics(tmp_path):
    out = tmp_path / "p.jsonl"
    rc = main([
        "run", "--scenario", "template", "--model", "fake:demo",
        "--difficulty", "easy", "--seed", "42", "--play-turns", "3",
        "--no-probe", "--no-gif", "--persona", "risk_averse", "--out", str(out),
    ])
    assert rc == 0
    trace = next(iter(read_traces(str(out))))
    assert trace.persona_weight_id == "risk_averse"
    assert "action_agreement" in trace.metrics
    # the hidden weights never reach the serialized trace
    assert "risk_cost" not in trace.model_dump_json()


def test_run_without_persona_has_no_persona_metrics(tmp_path):
    out = tmp_path / "np.jsonl"
    rc = main([
        "run", "--scenario", "template", "--model", "fake:demo",
        "--difficulty", "easy", "--seed", "42", "--play-turns", "3",
        "--no-probe", "--no-gif", "--out", str(out),
    ])
    assert rc == 0
    trace = next(iter(read_traces(str(out))))
    assert trace.persona_weight_id is None
    assert "action_agreement" not in trace.metrics


def test_run_unknown_persona_errors(tmp_path):
    out = tmp_path / "bad.jsonl"
    rc = main([
        "run", "--scenario", "template", "--model", "fake:demo",
        "--difficulty", "easy", "--seed", "42", "--play-turns", "3",
        "--no-probe", "--no-gif", "--persona", "nope", "--out", str(out),
    ])
    assert rc == 2


def test_memory_with_persona_tags_checkpoint(tmp_path):
    from proteus.game.runtime.memory import load_checkpoint
    out = tmp_path / "mem.json"
    rc = main([
        "memory", "--scenario", "template", "--model", "fake:demo",
        "--difficulty", "easy", "--seed", "42", "--memory-turns", "5",
        "--persona", "risk_averse", "--out", str(out),
    ])
    assert rc == 0
    ck = load_checkpoint(str(out))
    assert ck.persona_weight_id == "risk_averse"
    assert "risk_cost" not in ck.model_dump_json()


def test_memory_unknown_persona_errors(tmp_path):
    out = tmp_path / "bad.json"
    rc = main([
        "memory", "--scenario", "template", "--model", "fake:demo",
        "--difficulty", "easy", "--seed", "42", "--memory-turns", "5",
        "--persona", "nope", "--out", str(out),
    ])
    assert rc == 2


def test_persona_memory_then_scored_run(tmp_path):
    # Stage 4 acceptance: a persona demonstration memory + a scored run that
    # measures whether the model continues that persona (same hidden weights).
    mem = tmp_path / "demo.json"
    rc = main([
        "memory", "--scenario", "template", "--model", "fake:demo",
        "--difficulty", "easy", "--seed", "42", "--memory-turns", "5",
        "--persona", "risk_averse", "--out", str(mem),
    ])
    assert rc == 0

    out = tmp_path / "scored.jsonl"
    rc = main([
        "run", "--scenario", "template", "--model", "fake:demo",
        "--difficulty", "easy", "--seed", "42", "--play-turns", "3",
        "--no-probe", "--no-gif", "--memory", str(mem),
        "--persona", "risk_averse", "--out", str(out),
    ])
    assert rc == 0
    trace = next(iter(read_traces(str(out))))
    assert trace.persona_weight_id == "risk_averse"
    assert trace.memory_ref == str(mem)
    assert "action_agreement" in trace.metrics