Spaces:

irregular6612
/

AgentnessBench

Sleeping

File size: 5,766 Bytes

"""proteus.cli.parser — top-level argument parser construction."""

from __future__ import annotations

import argparse

from proteus.game.engine.difficulty import Difficulty
from proteus.providers import available_providers
from proteus.game.metrics.persona import available_personas

from proteus.cli.commands.run import _cmd_run
from proteus.cli.commands.play import _cmd_play
from proteus.cli.commands.memory import _cmd_memory
from proteus.cli.commands.list_scenarios import _cmd_list_scenarios
from proteus.cli.commands.replay import _cmd_replay
from proteus.cli.commands.compare import _cmd_compare


def build_parser() -> argparse.ArgumentParser:
    """Build the top-level argument parser."""
    parser = argparse.ArgumentParser(
        prog="proteus",
        description="PROTEUS — a grid arena for measuring LLM motive-reading.",
    )
    sub = parser.add_subparsers(dest="command", required=True)

    run = sub.add_parser("run", help="run one session and append its trace")
    run.add_argument("--scenario", default="template")
    run.add_argument(
        "--model",
        required=True,
        help=(
            "provider spec '<name>:<model>'. Providers: "
            f"{', '.join(available_providers())}. Use 'fake:<name>' for offline."
        ),
    )
    run.add_argument("--difficulty", default="easy", choices=[d.value for d in Difficulty])
    run.add_argument("--seed", type=int, default=None)
    run.add_argument("--play-turns", type=int, default=15, dest="play_turns")
    run.add_argument("--no-probe", action="store_true", dest="no_probe")
    run.add_argument("--out", required=True, help="JSONL file to append the trace to")
    run.add_argument("--no-gif", action="store_true", dest="no_gif",
                     help="do not auto-render a GIF of the played game")
    run.add_argument(
        "--persona", default=None,
        help=(
            "score persona maintenance against a hidden reference policy. "
            f"Built-ins: {', '.join(available_personas())}. "
            "Only the public id is recorded; the weights never enter the prompt."
        ),
    )
    run.add_argument(
        "--memory", default="none",
        help="memory pre-roll: none (default) | generate | latest | <checkpoint path>",
    )
    run.add_argument("--memory-turns", type=int, default=10, dest="memory_turns")
    run.add_argument(
        "--memory-root", default="runs/memory", dest="memory_root",
        help="root dir for generate/latest checkpoints",
    )
    run.set_defaults(func=_cmd_run)

    play = sub.add_parser("play", help="play a session as a human via stdin")
    play.add_argument("--scenario", default="template")
    play.add_argument(
        "--difficulty", default="easy", choices=[d.value for d in Difficulty]
    )
    play.add_argument("--seed", type=int, default=None)
    play.add_argument("--play-turns", type=int, default=15, dest="play_turns")
    play.add_argument(
        "--probe",
        action="store_true",
        help="also ask the per-turn comprehension probe (default: off for humans)",
    )
    play.add_argument(
        "--out", default=None, help="optional JSONL file to append the human trace to"
    )
    play.add_argument("--no-gif", action="store_true", dest="no_gif",
                      help="do not auto-render a GIF of the played game")
    play.set_defaults(func=_cmd_play)

    memory = sub.add_parser(
        "memory", help="generate + save an LLM memory pre-roll checkpoint"
    )
    memory.add_argument("--scenario", default="template")
    memory.add_argument(
        "--model", required=True,
        help=(
            "provider spec '<name>:<model>'. Providers: "
            f"{', '.join(available_providers())}. Use 'fake:<name>' for offline."
        ),
    )
    memory.add_argument(
        "--difficulty", default="easy", choices=[d.value for d in Difficulty]
    )
    memory.add_argument("--seed", type=int, default=None)
    memory.add_argument("--memory-turns", type=int, default=10, dest="memory_turns")
    memory.add_argument(
        "--memory-root", default="runs/memory", dest="memory_root",
        help="root dir for auto-named checkpoints (when --out is omitted)",
    )
    memory.add_argument(
        "--out", default=None, help="explicit checkpoint path (overrides --memory-root)"
    )
    memory.add_argument(
        "--persona", default=None,
        help=(
            "generate a persona demonstration (the hidden reference policy plays, "
            f"not the model). Built-ins: {', '.join(available_personas())}. Only "
            "the public id is stored; the weights never enter the checkpoint."
        ),
    )
    memory.set_defaults(func=_cmd_memory)

    listing = sub.add_parser("list-scenarios", help="list registered scenarios")
    listing.set_defaults(func=_cmd_list_scenarios)

    replay = sub.add_parser("replay", help="print a saved trace")
    replay.add_argument("trace_file", help="path to a .jsonl trace file")
    replay.add_argument(
        "--visual", action="store_true", help="truecolor terminal replay"
    )
    replay.add_argument(
        "--png", default=None, metavar="DIR",
        help="also write per-frame PNGs to DIR (needs the 'viz' extra)",
    )
    replay.add_argument("--fps", type=float, default=4.0, help="replay frames/sec")
    replay.set_defaults(func=_cmd_replay)

    compare = sub.add_parser(
        "compare", help="aggregate traces by (model, difficulty) for baseline comparison"
    )
    compare.add_argument("trace_files", nargs="+", help="one or more .jsonl trace files")
    compare.add_argument(
        "--out", default=None, help="optional JSON file to write the aggregate summary to"
    )
    compare.set_defaults(func=_cmd_compare)

    return parser