Spaces:

irregular6612
/

AgentnessBench

Sleeping

App Files Files Community

AgentnessBench / proteus /cli /parser.py

irregular6612

refactor(scenario): delete predator_evade; template is the canonical scenario

93cd78f 24 days ago

Raw

History Blame Contribute Delete

5.77 kB

	"""proteus.cli.parser — top-level argument parser construction."""

	from __future__ import annotations

	import argparse

	from proteus.game.engine.difficulty import Difficulty
	from proteus.providers import available_providers
	from proteus.game.metrics.persona import available_personas

	from proteus.cli.commands.run import _cmd_run
	from proteus.cli.commands.play import _cmd_play
	from proteus.cli.commands.memory import _cmd_memory
	from proteus.cli.commands.list_scenarios import _cmd_list_scenarios
	from proteus.cli.commands.replay import _cmd_replay
	from proteus.cli.commands.compare import _cmd_compare


	def build_parser() -> argparse.ArgumentParser:
	"""Build the top-level argument parser."""
	parser = argparse.ArgumentParser(
	prog="proteus",
	description="PROTEUS — a grid arena for measuring LLM motive-reading.",
	)
	sub = parser.add_subparsers(dest="command", required=True)

	run = sub.add_parser("run", help="run one session and append its trace")
	run.add_argument("--scenario", default="template")
	run.add_argument(
	"--model",
	required=True,
	help=(
	"provider spec '<name>:<model>'. Providers: "
	f"{', '.join(available_providers())}. Use 'fake:<name>' for offline."
	),
	)
	run.add_argument("--difficulty", default="easy", choices=[d.value for d in Difficulty])
	run.add_argument("--seed", type=int, default=None)
	run.add_argument("--play-turns", type=int, default=15, dest="play_turns")
	run.add_argument("--no-probe", action="store_true", dest="no_probe")
	run.add_argument("--out", required=True, help="JSONL file to append the trace to")
	run.add_argument("--no-gif", action="store_true", dest="no_gif",
	help="do not auto-render a GIF of the played game")
	run.add_argument(
	"--persona", default=None,
	help=(
	"score persona maintenance against a hidden reference policy. "
	f"Built-ins: {', '.join(available_personas())}. "
	"Only the public id is recorded; the weights never enter the prompt."
	),
	)
	run.add_argument(
	"--memory", default="none",
	help="memory pre-roll: none (default) \| generate \| latest \| <checkpoint path>",
	)
	run.add_argument("--memory-turns", type=int, default=10, dest="memory_turns")
	run.add_argument(
	"--memory-root", default="runs/memory", dest="memory_root",
	help="root dir for generate/latest checkpoints",
	)
	run.set_defaults(func=_cmd_run)

	play = sub.add_parser("play", help="play a session as a human via stdin")
	play.add_argument("--scenario", default="template")
	play.add_argument(
	"--difficulty", default="easy", choices=[d.value for d in Difficulty]
	)
	play.add_argument("--seed", type=int, default=None)
	play.add_argument("--play-turns", type=int, default=15, dest="play_turns")
	play.add_argument(
	"--probe",
	action="store_true",
	help="also ask the per-turn comprehension probe (default: off for humans)",
	)
	play.add_argument(
	"--out", default=None, help="optional JSONL file to append the human trace to"
	)
	play.add_argument("--no-gif", action="store_true", dest="no_gif",
	help="do not auto-render a GIF of the played game")
	play.set_defaults(func=_cmd_play)

	memory = sub.add_parser(
	"memory", help="generate + save an LLM memory pre-roll checkpoint"
	)
	memory.add_argument("--scenario", default="template")
	memory.add_argument(
	"--model", required=True,
	help=(
	"provider spec '<name>:<model>'. Providers: "
	f"{', '.join(available_providers())}. Use 'fake:<name>' for offline."
	),
	)
	memory.add_argument(
	"--difficulty", default="easy", choices=[d.value for d in Difficulty]
	)
	memory.add_argument("--seed", type=int, default=None)
	memory.add_argument("--memory-turns", type=int, default=10, dest="memory_turns")
	memory.add_argument(
	"--memory-root", default="runs/memory", dest="memory_root",
	help="root dir for auto-named checkpoints (when --out is omitted)",
	)
	memory.add_argument(
	"--out", default=None, help="explicit checkpoint path (overrides --memory-root)"
	)
	memory.add_argument(
	"--persona", default=None,
	help=(
	"generate a persona demonstration (the hidden reference policy plays, "
	f"not the model). Built-ins: {', '.join(available_personas())}. Only "
	"the public id is stored; the weights never enter the checkpoint."
	),
	)
	memory.set_defaults(func=_cmd_memory)

	listing = sub.add_parser("list-scenarios", help="list registered scenarios")
	listing.set_defaults(func=_cmd_list_scenarios)

	replay = sub.add_parser("replay", help="print a saved trace")
	replay.add_argument("trace_file", help="path to a .jsonl trace file")
	replay.add_argument(
	"--visual", action="store_true", help="truecolor terminal replay"
	)
	replay.add_argument(
	"--png", default=None, metavar="DIR",
	help="also write per-frame PNGs to DIR (needs the 'viz' extra)",
	)
	replay.add_argument("--fps", type=float, default=4.0, help="replay frames/sec")
	replay.set_defaults(func=_cmd_replay)

	compare = sub.add_parser(
	"compare", help="aggregate traces by (model, difficulty) for baseline comparison"
	)
	compare.add_argument("trace_files", nargs="+", help="one or more .jsonl trace files")
	compare.add_argument(
	"--out", default=None, help="optional JSON file to write the aggregate summary to"
	)
	compare.set_defaults(func=_cmd_compare)

	return parser