File size: 5,719 Bytes

1e1d0ce

from __future__ import annotations

import argparse
from pathlib import Path

from .converter import BuildOptions, build_bundle
from .profiles import parse_profiles
from .runtime import Qwen3AneRerankRuntime


def _add_common_build_args(parser: argparse.ArgumentParser) -> None:
    parser.add_argument(
        "--profiles",
        type=str,
        default=None,
        help="Shape profiles as comma list BxS (e.g. 1x128,4x128)",
    )
    parser.add_argument(
        "--target",
        type=str,
        default="macOS14",
        choices=["macOS14", "macOS15", "iOS17", "iOS18"],
        help="Core ML minimum deployment target",
    )
    parser.add_argument(
        "--compile-mlmodelc",
        action=argparse.BooleanOptionalAction,
        default=True,
        help="Compile .mlpackage into .mlmodelc with coremlcompiler",
    )
    parser.add_argument(
        "--system-prompt",
        default=(
            "Judge whether the Document meets the requirements based on the Query and the "
            'Instruct provided. Note that the answer can only be "yes" or "no".'
        ),
        help="System prompt used in reranker prompt template",
    )


def cmd_convert(args: argparse.Namespace) -> None:
    profiles = parse_profiles(args.profiles)
    options = BuildOptions(
        model_dir=Path(args.model_dir),
        bundle_dir=Path(args.bundle_dir),
        profiles=profiles,
        compile_mlmodelc=bool(args.compile_mlmodelc),
        minimum_deployment_target=args.target,
        system_prompt=args.system_prompt,
    )
    manifest = build_bundle(options)
    print(f"Built bundle at: {Path(args.bundle_dir).resolve()}")
    print(f"Model: {manifest.model_name}")
    print(f"Task: {manifest.task}")
    print(f"Hidden size: {manifest.hidden_size}")
    print(f"Token ids yes/no: {manifest.yes_token_id}/{manifest.no_token_id}")
    print("Profiles:")
    for entry in manifest.profiles:
        print(
            f"  - {entry.profile_id}: batch={entry.batch_size}, seq={entry.seq_len}, "
            f"model={entry.compiled_path or entry.package_path}"
        )


def cmd_serve(args: argparse.Namespace) -> None:
    bundle_dir = Path(args.bundle_dir)
    manifest_path = bundle_dir / "manifest.json"

    if not manifest_path.exists():
        if not args.auto_build:
            raise SystemExit(
                f"Bundle not found at {bundle_dir}. Run convert first or pass --auto-build --model-dir."
            )
        if not args.model_dir:
            raise SystemExit("--model-dir is required when --auto-build is enabled")

        profiles = parse_profiles(args.profiles)
        options = BuildOptions(
            model_dir=Path(args.model_dir),
            bundle_dir=bundle_dir,
            profiles=profiles,
            compile_mlmodelc=bool(args.compile_mlmodelc),
            minimum_deployment_target=args.target,
            system_prompt=args.system_prompt,
        )
        print("Bundle not found; building from source model...")
        build_bundle(options)

    runtime = Qwen3AneRerankRuntime(bundle_dir=bundle_dir, compute_units=args.compute_units)

    from .api import create_app
    import uvicorn

    app = create_app(runtime=runtime, default_model_id=args.model_id)
    uvicorn.run(
        app,
        host=args.host,
        port=args.port,
        log_level=args.log_level,
    )


def build_parser() -> argparse.ArgumentParser:
    parser = argparse.ArgumentParser(
        prog="qwen3-ane-rerank",
        description="Convert Qwen3-Reranker model to Core ML ANE bundle and serve /v1/rerank endpoint.",
    )
    subparsers = parser.add_subparsers(dest="command", required=True)

    convert_parser = subparsers.add_parser(
        "convert",
        help="Convert local HF Qwen3-Reranker model into ANE-ready Core ML profile bundle",
    )
    convert_parser.add_argument("--model-dir", required=True, help="Path to source HF model directory")
    convert_parser.add_argument(
        "--bundle-dir",
        required=True,
        help="Output bundle directory (manifest + packages + tokenizer)",
    )
    _add_common_build_args(convert_parser)
    convert_parser.set_defaults(func=cmd_convert)

    serve_parser = subparsers.add_parser(
        "serve",
        help="Run /v1/rerank endpoint backed by Core ML ANE profiles",
    )
    serve_parser.add_argument(
        "--bundle-dir",
        required=True,
        help="Bundle directory created by convert",
    )
    serve_parser.add_argument(
        "--model-dir",
        default=None,
        help="Source HF model directory (required if --auto-build and bundle missing)",
    )
    serve_parser.add_argument(
        "--auto-build",
        action=argparse.BooleanOptionalAction,
        default=True,
        help="Auto-build bundle from --model-dir when manifest is missing",
    )
    _add_common_build_args(serve_parser)
    serve_parser.add_argument("--host", default="127.0.0.1")
    serve_parser.add_argument("--port", type=int, default=8000)
    serve_parser.add_argument(
        "--compute-units",
        default="cpu_and_ne",
        choices=["cpu_and_ne", "all", "cpu_only", "cpu_and_gpu"],
        help="Core ML compute units preference",
    )
    serve_parser.add_argument(
        "--model-id",
        default="qwen3-reranker-0.6b-ane",
        help="Model id returned in API responses",
    )
    serve_parser.add_argument(
        "--log-level",
        default="info",
        choices=["critical", "error", "warning", "info", "debug", "trace"],
    )
    serve_parser.set_defaults(func=cmd_serve)

    return parser


def main() -> None:
    parser = build_parser()
    args = parser.parse_args()
    args.func(args)


if __name__ == "__main__":
    main()