AgentnessBench / proteus /cli /parser.py
irregular6612's picture
refactor(scenario): delete predator_evade; template is the canonical scenario
93cd78f
Raw
History Blame Contribute Delete
5.77 kB
"""proteus.cli.parser — top-level argument parser construction."""
from __future__ import annotations
import argparse
from proteus.game.engine.difficulty import Difficulty
from proteus.providers import available_providers
from proteus.game.metrics.persona import available_personas
from proteus.cli.commands.run import _cmd_run
from proteus.cli.commands.play import _cmd_play
from proteus.cli.commands.memory import _cmd_memory
from proteus.cli.commands.list_scenarios import _cmd_list_scenarios
from proteus.cli.commands.replay import _cmd_replay
from proteus.cli.commands.compare import _cmd_compare
def build_parser() -> argparse.ArgumentParser:
"""Build the top-level argument parser."""
parser = argparse.ArgumentParser(
prog="proteus",
description="PROTEUS — a grid arena for measuring LLM motive-reading.",
)
sub = parser.add_subparsers(dest="command", required=True)
run = sub.add_parser("run", help="run one session and append its trace")
run.add_argument("--scenario", default="template")
run.add_argument(
"--model",
required=True,
help=(
"provider spec '<name>:<model>'. Providers: "
f"{', '.join(available_providers())}. Use 'fake:<name>' for offline."
),
)
run.add_argument("--difficulty", default="easy", choices=[d.value for d in Difficulty])
run.add_argument("--seed", type=int, default=None)
run.add_argument("--play-turns", type=int, default=15, dest="play_turns")
run.add_argument("--no-probe", action="store_true", dest="no_probe")
run.add_argument("--out", required=True, help="JSONL file to append the trace to")
run.add_argument("--no-gif", action="store_true", dest="no_gif",
help="do not auto-render a GIF of the played game")
run.add_argument(
"--persona", default=None,
help=(
"score persona maintenance against a hidden reference policy. "
f"Built-ins: {', '.join(available_personas())}. "
"Only the public id is recorded; the weights never enter the prompt."
),
)
run.add_argument(
"--memory", default="none",
help="memory pre-roll: none (default) | generate | latest | <checkpoint path>",
)
run.add_argument("--memory-turns", type=int, default=10, dest="memory_turns")
run.add_argument(
"--memory-root", default="runs/memory", dest="memory_root",
help="root dir for generate/latest checkpoints",
)
run.set_defaults(func=_cmd_run)
play = sub.add_parser("play", help="play a session as a human via stdin")
play.add_argument("--scenario", default="template")
play.add_argument(
"--difficulty", default="easy", choices=[d.value for d in Difficulty]
)
play.add_argument("--seed", type=int, default=None)
play.add_argument("--play-turns", type=int, default=15, dest="play_turns")
play.add_argument(
"--probe",
action="store_true",
help="also ask the per-turn comprehension probe (default: off for humans)",
)
play.add_argument(
"--out", default=None, help="optional JSONL file to append the human trace to"
)
play.add_argument("--no-gif", action="store_true", dest="no_gif",
help="do not auto-render a GIF of the played game")
play.set_defaults(func=_cmd_play)
memory = sub.add_parser(
"memory", help="generate + save an LLM memory pre-roll checkpoint"
)
memory.add_argument("--scenario", default="template")
memory.add_argument(
"--model", required=True,
help=(
"provider spec '<name>:<model>'. Providers: "
f"{', '.join(available_providers())}. Use 'fake:<name>' for offline."
),
)
memory.add_argument(
"--difficulty", default="easy", choices=[d.value for d in Difficulty]
)
memory.add_argument("--seed", type=int, default=None)
memory.add_argument("--memory-turns", type=int, default=10, dest="memory_turns")
memory.add_argument(
"--memory-root", default="runs/memory", dest="memory_root",
help="root dir for auto-named checkpoints (when --out is omitted)",
)
memory.add_argument(
"--out", default=None, help="explicit checkpoint path (overrides --memory-root)"
)
memory.add_argument(
"--persona", default=None,
help=(
"generate a persona demonstration (the hidden reference policy plays, "
f"not the model). Built-ins: {', '.join(available_personas())}. Only "
"the public id is stored; the weights never enter the checkpoint."
),
)
memory.set_defaults(func=_cmd_memory)
listing = sub.add_parser("list-scenarios", help="list registered scenarios")
listing.set_defaults(func=_cmd_list_scenarios)
replay = sub.add_parser("replay", help="print a saved trace")
replay.add_argument("trace_file", help="path to a .jsonl trace file")
replay.add_argument(
"--visual", action="store_true", help="truecolor terminal replay"
)
replay.add_argument(
"--png", default=None, metavar="DIR",
help="also write per-frame PNGs to DIR (needs the 'viz' extra)",
)
replay.add_argument("--fps", type=float, default=4.0, help="replay frames/sec")
replay.set_defaults(func=_cmd_replay)
compare = sub.add_parser(
"compare", help="aggregate traces by (model, difficulty) for baseline comparison"
)
compare.add_argument("trace_files", nargs="+", help="one or more .jsonl trace files")
compare.add_argument(
"--out", default=None, help="optional JSON file to write the aggregate summary to"
)
compare.set_defaults(func=_cmd_compare)
return parser