tooktang's picture
Initial release: Qwen3-Reranker-4B CoreML ANE-optimized bundle + service
1e1d0ce verified
from __future__ import annotations
import argparse
from pathlib import Path
from .converter import BuildOptions, build_bundle
from .profiles import parse_profiles
from .runtime import Qwen3AneRerankRuntime
def _add_common_build_args(parser: argparse.ArgumentParser) -> None:
parser.add_argument(
"--profiles",
type=str,
default=None,
help="Shape profiles as comma list BxS (e.g. 1x128,4x128)",
)
parser.add_argument(
"--target",
type=str,
default="macOS14",
choices=["macOS14", "macOS15", "iOS17", "iOS18"],
help="Core ML minimum deployment target",
)
parser.add_argument(
"--compile-mlmodelc",
action=argparse.BooleanOptionalAction,
default=True,
help="Compile .mlpackage into .mlmodelc with coremlcompiler",
)
parser.add_argument(
"--system-prompt",
default=(
"Judge whether the Document meets the requirements based on the Query and the "
'Instruct provided. Note that the answer can only be "yes" or "no".'
),
help="System prompt used in reranker prompt template",
)
def cmd_convert(args: argparse.Namespace) -> None:
profiles = parse_profiles(args.profiles)
options = BuildOptions(
model_dir=Path(args.model_dir),
bundle_dir=Path(args.bundle_dir),
profiles=profiles,
compile_mlmodelc=bool(args.compile_mlmodelc),
minimum_deployment_target=args.target,
system_prompt=args.system_prompt,
)
manifest = build_bundle(options)
print(f"Built bundle at: {Path(args.bundle_dir).resolve()}")
print(f"Model: {manifest.model_name}")
print(f"Task: {manifest.task}")
print(f"Hidden size: {manifest.hidden_size}")
print(f"Token ids yes/no: {manifest.yes_token_id}/{manifest.no_token_id}")
print("Profiles:")
for entry in manifest.profiles:
print(
f" - {entry.profile_id}: batch={entry.batch_size}, seq={entry.seq_len}, "
f"model={entry.compiled_path or entry.package_path}"
)
def cmd_serve(args: argparse.Namespace) -> None:
bundle_dir = Path(args.bundle_dir)
manifest_path = bundle_dir / "manifest.json"
if not manifest_path.exists():
if not args.auto_build:
raise SystemExit(
f"Bundle not found at {bundle_dir}. Run convert first or pass --auto-build --model-dir."
)
if not args.model_dir:
raise SystemExit("--model-dir is required when --auto-build is enabled")
profiles = parse_profiles(args.profiles)
options = BuildOptions(
model_dir=Path(args.model_dir),
bundle_dir=bundle_dir,
profiles=profiles,
compile_mlmodelc=bool(args.compile_mlmodelc),
minimum_deployment_target=args.target,
system_prompt=args.system_prompt,
)
print("Bundle not found; building from source model...")
build_bundle(options)
runtime = Qwen3AneRerankRuntime(bundle_dir=bundle_dir, compute_units=args.compute_units)
from .api import create_app
import uvicorn
app = create_app(runtime=runtime, default_model_id=args.model_id)
uvicorn.run(
app,
host=args.host,
port=args.port,
log_level=args.log_level,
)
def build_parser() -> argparse.ArgumentParser:
parser = argparse.ArgumentParser(
prog="qwen3-ane-rerank",
description="Convert Qwen3-Reranker model to Core ML ANE bundle and serve /v1/rerank endpoint.",
)
subparsers = parser.add_subparsers(dest="command", required=True)
convert_parser = subparsers.add_parser(
"convert",
help="Convert local HF Qwen3-Reranker model into ANE-ready Core ML profile bundle",
)
convert_parser.add_argument("--model-dir", required=True, help="Path to source HF model directory")
convert_parser.add_argument(
"--bundle-dir",
required=True,
help="Output bundle directory (manifest + packages + tokenizer)",
)
_add_common_build_args(convert_parser)
convert_parser.set_defaults(func=cmd_convert)
serve_parser = subparsers.add_parser(
"serve",
help="Run /v1/rerank endpoint backed by Core ML ANE profiles",
)
serve_parser.add_argument(
"--bundle-dir",
required=True,
help="Bundle directory created by convert",
)
serve_parser.add_argument(
"--model-dir",
default=None,
help="Source HF model directory (required if --auto-build and bundle missing)",
)
serve_parser.add_argument(
"--auto-build",
action=argparse.BooleanOptionalAction,
default=True,
help="Auto-build bundle from --model-dir when manifest is missing",
)
_add_common_build_args(serve_parser)
serve_parser.add_argument("--host", default="127.0.0.1")
serve_parser.add_argument("--port", type=int, default=8000)
serve_parser.add_argument(
"--compute-units",
default="cpu_and_ne",
choices=["cpu_and_ne", "all", "cpu_only", "cpu_and_gpu"],
help="Core ML compute units preference",
)
serve_parser.add_argument(
"--model-id",
default="qwen3-reranker-0.6b-ane",
help="Model id returned in API responses",
)
serve_parser.add_argument(
"--log-level",
default="info",
choices=["critical", "error", "warning", "info", "debug", "trace"],
)
serve_parser.set_defaults(func=cmd_serve)
return parser
def main() -> None:
parser = build_parser()
args = parser.parse_args()
args.func(args)
if __name__ == "__main__":
main()