Spaces:
Sleeping
Sleeping
| """proteus.cli.parser — top-level argument parser construction.""" | |
| from __future__ import annotations | |
| import argparse | |
| from proteus.game.engine.difficulty import Difficulty | |
| from proteus.providers import available_providers | |
| from proteus.game.metrics.persona import available_personas | |
| from proteus.cli.commands.run import _cmd_run | |
| from proteus.cli.commands.play import _cmd_play | |
| from proteus.cli.commands.memory import _cmd_memory | |
| from proteus.cli.commands.list_scenarios import _cmd_list_scenarios | |
| from proteus.cli.commands.replay import _cmd_replay | |
| from proteus.cli.commands.compare import _cmd_compare | |
| def build_parser() -> argparse.ArgumentParser: | |
| """Build the top-level argument parser.""" | |
| parser = argparse.ArgumentParser( | |
| prog="proteus", | |
| description="PROTEUS — a grid arena for measuring LLM motive-reading.", | |
| ) | |
| sub = parser.add_subparsers(dest="command", required=True) | |
| run = sub.add_parser("run", help="run one session and append its trace") | |
| run.add_argument("--scenario", default="template") | |
| run.add_argument( | |
| "--model", | |
| required=True, | |
| help=( | |
| "provider spec '<name>:<model>'. Providers: " | |
| f"{', '.join(available_providers())}. Use 'fake:<name>' for offline." | |
| ), | |
| ) | |
| run.add_argument("--difficulty", default="easy", choices=[d.value for d in Difficulty]) | |
| run.add_argument("--seed", type=int, default=None) | |
| run.add_argument("--play-turns", type=int, default=15, dest="play_turns") | |
| run.add_argument("--no-probe", action="store_true", dest="no_probe") | |
| run.add_argument("--out", required=True, help="JSONL file to append the trace to") | |
| run.add_argument("--no-gif", action="store_true", dest="no_gif", | |
| help="do not auto-render a GIF of the played game") | |
| run.add_argument( | |
| "--persona", default=None, | |
| help=( | |
| "score persona maintenance against a hidden reference policy. " | |
| f"Built-ins: {', '.join(available_personas())}. " | |
| "Only the public id is recorded; the weights never enter the prompt." | |
| ), | |
| ) | |
| run.add_argument( | |
| "--memory", default="none", | |
| help="memory pre-roll: none (default) | generate | latest | <checkpoint path>", | |
| ) | |
| run.add_argument("--memory-turns", type=int, default=10, dest="memory_turns") | |
| run.add_argument( | |
| "--memory-root", default="runs/memory", dest="memory_root", | |
| help="root dir for generate/latest checkpoints", | |
| ) | |
| run.set_defaults(func=_cmd_run) | |
| play = sub.add_parser("play", help="play a session as a human via stdin") | |
| play.add_argument("--scenario", default="template") | |
| play.add_argument( | |
| "--difficulty", default="easy", choices=[d.value for d in Difficulty] | |
| ) | |
| play.add_argument("--seed", type=int, default=None) | |
| play.add_argument("--play-turns", type=int, default=15, dest="play_turns") | |
| play.add_argument( | |
| "--probe", | |
| action="store_true", | |
| help="also ask the per-turn comprehension probe (default: off for humans)", | |
| ) | |
| play.add_argument( | |
| "--out", default=None, help="optional JSONL file to append the human trace to" | |
| ) | |
| play.add_argument("--no-gif", action="store_true", dest="no_gif", | |
| help="do not auto-render a GIF of the played game") | |
| play.set_defaults(func=_cmd_play) | |
| memory = sub.add_parser( | |
| "memory", help="generate + save an LLM memory pre-roll checkpoint" | |
| ) | |
| memory.add_argument("--scenario", default="template") | |
| memory.add_argument( | |
| "--model", required=True, | |
| help=( | |
| "provider spec '<name>:<model>'. Providers: " | |
| f"{', '.join(available_providers())}. Use 'fake:<name>' for offline." | |
| ), | |
| ) | |
| memory.add_argument( | |
| "--difficulty", default="easy", choices=[d.value for d in Difficulty] | |
| ) | |
| memory.add_argument("--seed", type=int, default=None) | |
| memory.add_argument("--memory-turns", type=int, default=10, dest="memory_turns") | |
| memory.add_argument( | |
| "--memory-root", default="runs/memory", dest="memory_root", | |
| help="root dir for auto-named checkpoints (when --out is omitted)", | |
| ) | |
| memory.add_argument( | |
| "--out", default=None, help="explicit checkpoint path (overrides --memory-root)" | |
| ) | |
| memory.add_argument( | |
| "--persona", default=None, | |
| help=( | |
| "generate a persona demonstration (the hidden reference policy plays, " | |
| f"not the model). Built-ins: {', '.join(available_personas())}. Only " | |
| "the public id is stored; the weights never enter the checkpoint." | |
| ), | |
| ) | |
| memory.set_defaults(func=_cmd_memory) | |
| listing = sub.add_parser("list-scenarios", help="list registered scenarios") | |
| listing.set_defaults(func=_cmd_list_scenarios) | |
| replay = sub.add_parser("replay", help="print a saved trace") | |
| replay.add_argument("trace_file", help="path to a .jsonl trace file") | |
| replay.add_argument( | |
| "--visual", action="store_true", help="truecolor terminal replay" | |
| ) | |
| replay.add_argument( | |
| "--png", default=None, metavar="DIR", | |
| help="also write per-frame PNGs to DIR (needs the 'viz' extra)", | |
| ) | |
| replay.add_argument("--fps", type=float, default=4.0, help="replay frames/sec") | |
| replay.set_defaults(func=_cmd_replay) | |
| compare = sub.add_parser( | |
| "compare", help="aggregate traces by (model, difficulty) for baseline comparison" | |
| ) | |
| compare.add_argument("trace_files", nargs="+", help="one or more .jsonl trace files") | |
| compare.add_argument( | |
| "--out", default=None, help="optional JSON file to write the aggregate summary to" | |
| ) | |
| compare.set_defaults(func=_cmd_compare) | |
| return parser | |