| from __future__ import annotations | |
| import argparse | |
| from pathlib import Path | |
| from .converter import BuildOptions, build_bundle | |
| from .profiles import parse_profiles | |
| from .runtime import Qwen3AneRerankRuntime | |
| def _add_common_build_args(parser: argparse.ArgumentParser) -> None: | |
| parser.add_argument( | |
| "--profiles", | |
| type=str, | |
| default=None, | |
| help="Shape profiles as comma list BxS (e.g. 1x128,4x128)", | |
| ) | |
| parser.add_argument( | |
| "--target", | |
| type=str, | |
| default="macOS14", | |
| choices=["macOS14", "macOS15", "iOS17", "iOS18"], | |
| help="Core ML minimum deployment target", | |
| ) | |
| parser.add_argument( | |
| "--compile-mlmodelc", | |
| action=argparse.BooleanOptionalAction, | |
| default=True, | |
| help="Compile .mlpackage into .mlmodelc with coremlcompiler", | |
| ) | |
| parser.add_argument( | |
| "--system-prompt", | |
| default=( | |
| "Judge whether the Document meets the requirements based on the Query and the " | |
| 'Instruct provided. Note that the answer can only be "yes" or "no".' | |
| ), | |
| help="System prompt used in reranker prompt template", | |
| ) | |
| def cmd_convert(args: argparse.Namespace) -> None: | |
| profiles = parse_profiles(args.profiles) | |
| options = BuildOptions( | |
| model_dir=Path(args.model_dir), | |
| bundle_dir=Path(args.bundle_dir), | |
| profiles=profiles, | |
| compile_mlmodelc=bool(args.compile_mlmodelc), | |
| minimum_deployment_target=args.target, | |
| system_prompt=args.system_prompt, | |
| ) | |
| manifest = build_bundle(options) | |
| print(f"Built bundle at: {Path(args.bundle_dir).resolve()}") | |
| print(f"Model: {manifest.model_name}") | |
| print(f"Task: {manifest.task}") | |
| print(f"Hidden size: {manifest.hidden_size}") | |
| print(f"Token ids yes/no: {manifest.yes_token_id}/{manifest.no_token_id}") | |
| print("Profiles:") | |
| for entry in manifest.profiles: | |
| print( | |
| f" - {entry.profile_id}: batch={entry.batch_size}, seq={entry.seq_len}, " | |
| f"model={entry.compiled_path or entry.package_path}" | |
| ) | |
| def cmd_serve(args: argparse.Namespace) -> None: | |
| bundle_dir = Path(args.bundle_dir) | |
| manifest_path = bundle_dir / "manifest.json" | |
| if not manifest_path.exists(): | |
| if not args.auto_build: | |
| raise SystemExit( | |
| f"Bundle not found at {bundle_dir}. Run convert first or pass --auto-build --model-dir." | |
| ) | |
| if not args.model_dir: | |
| raise SystemExit("--model-dir is required when --auto-build is enabled") | |
| profiles = parse_profiles(args.profiles) | |
| options = BuildOptions( | |
| model_dir=Path(args.model_dir), | |
| bundle_dir=bundle_dir, | |
| profiles=profiles, | |
| compile_mlmodelc=bool(args.compile_mlmodelc), | |
| minimum_deployment_target=args.target, | |
| system_prompt=args.system_prompt, | |
| ) | |
| print("Bundle not found; building from source model...") | |
| build_bundle(options) | |
| runtime = Qwen3AneRerankRuntime(bundle_dir=bundle_dir, compute_units=args.compute_units) | |
| from .api import create_app | |
| import uvicorn | |
| app = create_app(runtime=runtime, default_model_id=args.model_id) | |
| uvicorn.run( | |
| app, | |
| host=args.host, | |
| port=args.port, | |
| log_level=args.log_level, | |
| ) | |
| def build_parser() -> argparse.ArgumentParser: | |
| parser = argparse.ArgumentParser( | |
| prog="qwen3-ane-rerank", | |
| description="Convert Qwen3-Reranker model to Core ML ANE bundle and serve /v1/rerank endpoint.", | |
| ) | |
| subparsers = parser.add_subparsers(dest="command", required=True) | |
| convert_parser = subparsers.add_parser( | |
| "convert", | |
| help="Convert local HF Qwen3-Reranker model into ANE-ready Core ML profile bundle", | |
| ) | |
| convert_parser.add_argument("--model-dir", required=True, help="Path to source HF model directory") | |
| convert_parser.add_argument( | |
| "--bundle-dir", | |
| required=True, | |
| help="Output bundle directory (manifest + packages + tokenizer)", | |
| ) | |
| _add_common_build_args(convert_parser) | |
| convert_parser.set_defaults(func=cmd_convert) | |
| serve_parser = subparsers.add_parser( | |
| "serve", | |
| help="Run /v1/rerank endpoint backed by Core ML ANE profiles", | |
| ) | |
| serve_parser.add_argument( | |
| "--bundle-dir", | |
| required=True, | |
| help="Bundle directory created by convert", | |
| ) | |
| serve_parser.add_argument( | |
| "--model-dir", | |
| default=None, | |
| help="Source HF model directory (required if --auto-build and bundle missing)", | |
| ) | |
| serve_parser.add_argument( | |
| "--auto-build", | |
| action=argparse.BooleanOptionalAction, | |
| default=True, | |
| help="Auto-build bundle from --model-dir when manifest is missing", | |
| ) | |
| _add_common_build_args(serve_parser) | |
| serve_parser.add_argument("--host", default="127.0.0.1") | |
| serve_parser.add_argument("--port", type=int, default=8000) | |
| serve_parser.add_argument( | |
| "--compute-units", | |
| default="cpu_and_ne", | |
| choices=["cpu_and_ne", "all", "cpu_only", "cpu_and_gpu"], | |
| help="Core ML compute units preference", | |
| ) | |
| serve_parser.add_argument( | |
| "--model-id", | |
| default="qwen3-reranker-0.6b-ane", | |
| help="Model id returned in API responses", | |
| ) | |
| serve_parser.add_argument( | |
| "--log-level", | |
| default="info", | |
| choices=["critical", "error", "warning", "info", "debug", "trace"], | |
| ) | |
| serve_parser.set_defaults(func=cmd_serve) | |
| return parser | |
| def main() -> None: | |
| parser = build_parser() | |
| args = parser.parse_args() | |
| args.func(args) | |
| if __name__ == "__main__": | |
| main() | |