"""proteus.cli.parser — top-level argument parser construction.""" from __future__ import annotations import argparse from proteus.game.engine.difficulty import Difficulty from proteus.providers import available_providers from proteus.game.metrics.persona import available_personas from proteus.cli.commands.run import _cmd_run from proteus.cli.commands.play import _cmd_play from proteus.cli.commands.memory import _cmd_memory from proteus.cli.commands.list_scenarios import _cmd_list_scenarios from proteus.cli.commands.replay import _cmd_replay from proteus.cli.commands.compare import _cmd_compare def build_parser() -> argparse.ArgumentParser: """Build the top-level argument parser.""" parser = argparse.ArgumentParser( prog="proteus", description="PROTEUS — a grid arena for measuring LLM motive-reading.", ) sub = parser.add_subparsers(dest="command", required=True) run = sub.add_parser("run", help="run one session and append its trace") run.add_argument("--scenario", default="template") run.add_argument( "--model", required=True, help=( "provider spec ':'. Providers: " f"{', '.join(available_providers())}. Use 'fake:' for offline." ), ) run.add_argument("--difficulty", default="easy", choices=[d.value for d in Difficulty]) run.add_argument("--seed", type=int, default=None) run.add_argument("--play-turns", type=int, default=15, dest="play_turns") run.add_argument("--no-probe", action="store_true", dest="no_probe") run.add_argument("--out", required=True, help="JSONL file to append the trace to") run.add_argument("--no-gif", action="store_true", dest="no_gif", help="do not auto-render a GIF of the played game") run.add_argument( "--persona", default=None, help=( "score persona maintenance against a hidden reference policy. " f"Built-ins: {', '.join(available_personas())}. " "Only the public id is recorded; the weights never enter the prompt." ), ) run.add_argument( "--memory", default="none", help="memory pre-roll: none (default) | generate | latest | ", ) run.add_argument("--memory-turns", type=int, default=10, dest="memory_turns") run.add_argument( "--memory-root", default="runs/memory", dest="memory_root", help="root dir for generate/latest checkpoints", ) run.set_defaults(func=_cmd_run) play = sub.add_parser("play", help="play a session as a human via stdin") play.add_argument("--scenario", default="template") play.add_argument( "--difficulty", default="easy", choices=[d.value for d in Difficulty] ) play.add_argument("--seed", type=int, default=None) play.add_argument("--play-turns", type=int, default=15, dest="play_turns") play.add_argument( "--probe", action="store_true", help="also ask the per-turn comprehension probe (default: off for humans)", ) play.add_argument( "--out", default=None, help="optional JSONL file to append the human trace to" ) play.add_argument("--no-gif", action="store_true", dest="no_gif", help="do not auto-render a GIF of the played game") play.set_defaults(func=_cmd_play) memory = sub.add_parser( "memory", help="generate + save an LLM memory pre-roll checkpoint" ) memory.add_argument("--scenario", default="template") memory.add_argument( "--model", required=True, help=( "provider spec ':'. Providers: " f"{', '.join(available_providers())}. Use 'fake:' for offline." ), ) memory.add_argument( "--difficulty", default="easy", choices=[d.value for d in Difficulty] ) memory.add_argument("--seed", type=int, default=None) memory.add_argument("--memory-turns", type=int, default=10, dest="memory_turns") memory.add_argument( "--memory-root", default="runs/memory", dest="memory_root", help="root dir for auto-named checkpoints (when --out is omitted)", ) memory.add_argument( "--out", default=None, help="explicit checkpoint path (overrides --memory-root)" ) memory.add_argument( "--persona", default=None, help=( "generate a persona demonstration (the hidden reference policy plays, " f"not the model). Built-ins: {', '.join(available_personas())}. Only " "the public id is stored; the weights never enter the checkpoint." ), ) memory.set_defaults(func=_cmd_memory) listing = sub.add_parser("list-scenarios", help="list registered scenarios") listing.set_defaults(func=_cmd_list_scenarios) replay = sub.add_parser("replay", help="print a saved trace") replay.add_argument("trace_file", help="path to a .jsonl trace file") replay.add_argument( "--visual", action="store_true", help="truecolor terminal replay" ) replay.add_argument( "--png", default=None, metavar="DIR", help="also write per-frame PNGs to DIR (needs the 'viz' extra)", ) replay.add_argument("--fps", type=float, default=4.0, help="replay frames/sec") replay.set_defaults(func=_cmd_replay) compare = sub.add_parser( "compare", help="aggregate traces by (model, difficulty) for baseline comparison" ) compare.add_argument("trace_files", nargs="+", help="one or more .jsonl trace files") compare.add_argument( "--out", default=None, help="optional JSON file to write the aggregate summary to" ) compare.set_defaults(func=_cmd_compare) return parser