File size: 5,766 Bytes
2d83c9f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
93cd78f
2d83c9f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
93cd78f
2d83c9f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
93cd78f
2d83c9f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
"""proteus.cli.parser — top-level argument parser construction."""

from __future__ import annotations

import argparse

from proteus.game.engine.difficulty import Difficulty
from proteus.providers import available_providers
from proteus.game.metrics.persona import available_personas

from proteus.cli.commands.run import _cmd_run
from proteus.cli.commands.play import _cmd_play
from proteus.cli.commands.memory import _cmd_memory
from proteus.cli.commands.list_scenarios import _cmd_list_scenarios
from proteus.cli.commands.replay import _cmd_replay
from proteus.cli.commands.compare import _cmd_compare


def build_parser() -> argparse.ArgumentParser:
    """Build the top-level argument parser."""
    parser = argparse.ArgumentParser(
        prog="proteus",
        description="PROTEUS — a grid arena for measuring LLM motive-reading.",
    )
    sub = parser.add_subparsers(dest="command", required=True)

    run = sub.add_parser("run", help="run one session and append its trace")
    run.add_argument("--scenario", default="template")
    run.add_argument(
        "--model",
        required=True,
        help=(
            "provider spec '<name>:<model>'. Providers: "
            f"{', '.join(available_providers())}. Use 'fake:<name>' for offline."
        ),
    )
    run.add_argument("--difficulty", default="easy", choices=[d.value for d in Difficulty])
    run.add_argument("--seed", type=int, default=None)
    run.add_argument("--play-turns", type=int, default=15, dest="play_turns")
    run.add_argument("--no-probe", action="store_true", dest="no_probe")
    run.add_argument("--out", required=True, help="JSONL file to append the trace to")
    run.add_argument("--no-gif", action="store_true", dest="no_gif",
                     help="do not auto-render a GIF of the played game")
    run.add_argument(
        "--persona", default=None,
        help=(
            "score persona maintenance against a hidden reference policy. "
            f"Built-ins: {', '.join(available_personas())}. "
            "Only the public id is recorded; the weights never enter the prompt."
        ),
    )
    run.add_argument(
        "--memory", default="none",
        help="memory pre-roll: none (default) | generate | latest | <checkpoint path>",
    )
    run.add_argument("--memory-turns", type=int, default=10, dest="memory_turns")
    run.add_argument(
        "--memory-root", default="runs/memory", dest="memory_root",
        help="root dir for generate/latest checkpoints",
    )
    run.set_defaults(func=_cmd_run)

    play = sub.add_parser("play", help="play a session as a human via stdin")
    play.add_argument("--scenario", default="template")
    play.add_argument(
        "--difficulty", default="easy", choices=[d.value for d in Difficulty]
    )
    play.add_argument("--seed", type=int, default=None)
    play.add_argument("--play-turns", type=int, default=15, dest="play_turns")
    play.add_argument(
        "--probe",
        action="store_true",
        help="also ask the per-turn comprehension probe (default: off for humans)",
    )
    play.add_argument(
        "--out", default=None, help="optional JSONL file to append the human trace to"
    )
    play.add_argument("--no-gif", action="store_true", dest="no_gif",
                      help="do not auto-render a GIF of the played game")
    play.set_defaults(func=_cmd_play)

    memory = sub.add_parser(
        "memory", help="generate + save an LLM memory pre-roll checkpoint"
    )
    memory.add_argument("--scenario", default="template")
    memory.add_argument(
        "--model", required=True,
        help=(
            "provider spec '<name>:<model>'. Providers: "
            f"{', '.join(available_providers())}. Use 'fake:<name>' for offline."
        ),
    )
    memory.add_argument(
        "--difficulty", default="easy", choices=[d.value for d in Difficulty]
    )
    memory.add_argument("--seed", type=int, default=None)
    memory.add_argument("--memory-turns", type=int, default=10, dest="memory_turns")
    memory.add_argument(
        "--memory-root", default="runs/memory", dest="memory_root",
        help="root dir for auto-named checkpoints (when --out is omitted)",
    )
    memory.add_argument(
        "--out", default=None, help="explicit checkpoint path (overrides --memory-root)"
    )
    memory.add_argument(
        "--persona", default=None,
        help=(
            "generate a persona demonstration (the hidden reference policy plays, "
            f"not the model). Built-ins: {', '.join(available_personas())}. Only "
            "the public id is stored; the weights never enter the checkpoint."
        ),
    )
    memory.set_defaults(func=_cmd_memory)

    listing = sub.add_parser("list-scenarios", help="list registered scenarios")
    listing.set_defaults(func=_cmd_list_scenarios)

    replay = sub.add_parser("replay", help="print a saved trace")
    replay.add_argument("trace_file", help="path to a .jsonl trace file")
    replay.add_argument(
        "--visual", action="store_true", help="truecolor terminal replay"
    )
    replay.add_argument(
        "--png", default=None, metavar="DIR",
        help="also write per-frame PNGs to DIR (needs the 'viz' extra)",
    )
    replay.add_argument("--fps", type=float, default=4.0, help="replay frames/sec")
    replay.set_defaults(func=_cmd_replay)

    compare = sub.add_parser(
        "compare", help="aggregate traces by (model, difficulty) for baseline comparison"
    )
    compare.add_argument("trace_files", nargs="+", help="one or more .jsonl trace files")
    compare.add_argument(
        "--out", default=None, help="optional JSON file to write the aggregate summary to"
    )
    compare.set_defaults(func=_cmd_compare)

    return parser