File size: 4,345 Bytes

#!/usr/bin/env python3
"""
Create a vLLM-ready model directory for Qwen3TerminatorForCausalLM.

Downloads the base Qwen3-8B config and weights from HuggingFace (if not
already cached), then creates a model directory with:
  - config.json        (Qwen3-8B base config + terminator fields)
  - tokenizer files    (symlinked from HF cache)
  - model weights      (symlinked from HF cache)

Usage:
    # Default: uses ./terminator.pt checkpoint, creates ./model_dir
    python setup_model_dir.py

    # Custom paths and settings:
    python setup_model_dir.py \\
        --checkpoint /path/to/terminator.pt \\
        --output-dir /path/to/model_dir \\
        --threshold 0.5
"""

import argparse
import os
import sys
from pathlib import Path

from huggingface_hub import snapshot_download
from transformers import AutoConfig


def main():
    parser = argparse.ArgumentParser(
        description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter
    )
    parser.add_argument(
        "--base-model", default="Qwen/Qwen3-8B",
        help="HuggingFace model ID for the base model (default: Qwen/Qwen3-8B).",
    )
    parser.add_argument(
        "--checkpoint", type=Path, default="./terminator.pt",
        help="Path to trained terminator .pt checkpoint (default: ./terminator.pt).",
    )
    parser.add_argument(
        "--output-dir", type=Path, default="./model_dir",
        help="Destination directory (default: ./model_dir; created if missing).",
    )
    parser.add_argument(
        "--threshold", type=float, default=0.7,
        help="Terminator firing threshold (default 0.7).",
    )
    parser.add_argument(
        "--window-size", type=int, default=10,
        help="Sliding window size for majority vote (default 10).",
    )
    parser.add_argument(
        "--exit-message", type=str,
        default="\nI've run out of thinking tokens. I need to commit to a final answer.",
        help="Message forced when terminator fires (default: standard exit message). "
             "Set to empty string to disable.",
    )
    parser.add_argument(
        "--no-download", action="store_true",
        help="Fail if the base model is not already cached locally "
             "(by default, downloads from HuggingFace if needed).",
    )
    parser.add_argument(
        "--force", action="store_true",
        help="Overwrite files in existing output directory.",
    )
    args = parser.parse_args()

    checkpoint = args.checkpoint.resolve()
    out_dir = args.output_dir.resolve()

    if not checkpoint.is_file():
        print(f"ERROR: checkpoint not found: {checkpoint}", file=sys.stderr)
        sys.exit(1)

    out_dir.mkdir(parents=True, exist_ok=True)

    # --- Build patched config.json ---
    print(f"Loading config for {args.base_model} from HF cache...")
    config = AutoConfig.from_pretrained(args.base_model)

    config.architectures = ["Qwen3TerminatorForCausalLM"]
    config.terminator_checkpoint_path = str(checkpoint)
    config.terminator_threshold = args.threshold
    config.terminator_window_size = args.window_size
    config.terminator_exit_message = args.exit_message

    # Remove auto_map if present from an older span-predictor config
    if hasattr(config, "auto_map"):
        del config.auto_map

    config.save_pretrained(out_dir)
    print(f"  Wrote config.json -> {out_dir / 'config.json'}")

    # --- Symlink weights and tokenizer files from HF cache ---
    print(f"Locating {args.base_model} in HF cache...")
    allow_download = not args.no_download
    base_dir = Path(snapshot_download(args.base_model, local_files_only=not allow_download))
    print(f"  Found: {base_dir}")

    linked = 0
    for src in sorted(base_dir.iterdir()):
        if src.name in ("config.json",):
            continue  # we already wrote our own

        dst = out_dir / src.name
        if dst.exists() or dst.is_symlink():
            if args.force:
                dst.unlink()
            else:
                continue

        os.symlink(src, dst)
        print(f"  Linked {src.name}")
        linked += 1

    print(f"\nDone. Linked {linked} files into {out_dir}")
    print(f"\nTo start the server:")
    print(f"  ./start_server.sh")
    print(f"\nOr manually:")
    print(f"  VLLM_MODEL={out_dir} REASONING_PARSER=qwen3 python serve.py")


if __name__ == "__main__":
    main()