File size: 5,719 Bytes
1e1d0ce | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 | from __future__ import annotations
import argparse
from pathlib import Path
from .converter import BuildOptions, build_bundle
from .profiles import parse_profiles
from .runtime import Qwen3AneRerankRuntime
def _add_common_build_args(parser: argparse.ArgumentParser) -> None:
parser.add_argument(
"--profiles",
type=str,
default=None,
help="Shape profiles as comma list BxS (e.g. 1x128,4x128)",
)
parser.add_argument(
"--target",
type=str,
default="macOS14",
choices=["macOS14", "macOS15", "iOS17", "iOS18"],
help="Core ML minimum deployment target",
)
parser.add_argument(
"--compile-mlmodelc",
action=argparse.BooleanOptionalAction,
default=True,
help="Compile .mlpackage into .mlmodelc with coremlcompiler",
)
parser.add_argument(
"--system-prompt",
default=(
"Judge whether the Document meets the requirements based on the Query and the "
'Instruct provided. Note that the answer can only be "yes" or "no".'
),
help="System prompt used in reranker prompt template",
)
def cmd_convert(args: argparse.Namespace) -> None:
profiles = parse_profiles(args.profiles)
options = BuildOptions(
model_dir=Path(args.model_dir),
bundle_dir=Path(args.bundle_dir),
profiles=profiles,
compile_mlmodelc=bool(args.compile_mlmodelc),
minimum_deployment_target=args.target,
system_prompt=args.system_prompt,
)
manifest = build_bundle(options)
print(f"Built bundle at: {Path(args.bundle_dir).resolve()}")
print(f"Model: {manifest.model_name}")
print(f"Task: {manifest.task}")
print(f"Hidden size: {manifest.hidden_size}")
print(f"Token ids yes/no: {manifest.yes_token_id}/{manifest.no_token_id}")
print("Profiles:")
for entry in manifest.profiles:
print(
f" - {entry.profile_id}: batch={entry.batch_size}, seq={entry.seq_len}, "
f"model={entry.compiled_path or entry.package_path}"
)
def cmd_serve(args: argparse.Namespace) -> None:
bundle_dir = Path(args.bundle_dir)
manifest_path = bundle_dir / "manifest.json"
if not manifest_path.exists():
if not args.auto_build:
raise SystemExit(
f"Bundle not found at {bundle_dir}. Run convert first or pass --auto-build --model-dir."
)
if not args.model_dir:
raise SystemExit("--model-dir is required when --auto-build is enabled")
profiles = parse_profiles(args.profiles)
options = BuildOptions(
model_dir=Path(args.model_dir),
bundle_dir=bundle_dir,
profiles=profiles,
compile_mlmodelc=bool(args.compile_mlmodelc),
minimum_deployment_target=args.target,
system_prompt=args.system_prompt,
)
print("Bundle not found; building from source model...")
build_bundle(options)
runtime = Qwen3AneRerankRuntime(bundle_dir=bundle_dir, compute_units=args.compute_units)
from .api import create_app
import uvicorn
app = create_app(runtime=runtime, default_model_id=args.model_id)
uvicorn.run(
app,
host=args.host,
port=args.port,
log_level=args.log_level,
)
def build_parser() -> argparse.ArgumentParser:
parser = argparse.ArgumentParser(
prog="qwen3-ane-rerank",
description="Convert Qwen3-Reranker model to Core ML ANE bundle and serve /v1/rerank endpoint.",
)
subparsers = parser.add_subparsers(dest="command", required=True)
convert_parser = subparsers.add_parser(
"convert",
help="Convert local HF Qwen3-Reranker model into ANE-ready Core ML profile bundle",
)
convert_parser.add_argument("--model-dir", required=True, help="Path to source HF model directory")
convert_parser.add_argument(
"--bundle-dir",
required=True,
help="Output bundle directory (manifest + packages + tokenizer)",
)
_add_common_build_args(convert_parser)
convert_parser.set_defaults(func=cmd_convert)
serve_parser = subparsers.add_parser(
"serve",
help="Run /v1/rerank endpoint backed by Core ML ANE profiles",
)
serve_parser.add_argument(
"--bundle-dir",
required=True,
help="Bundle directory created by convert",
)
serve_parser.add_argument(
"--model-dir",
default=None,
help="Source HF model directory (required if --auto-build and bundle missing)",
)
serve_parser.add_argument(
"--auto-build",
action=argparse.BooleanOptionalAction,
default=True,
help="Auto-build bundle from --model-dir when manifest is missing",
)
_add_common_build_args(serve_parser)
serve_parser.add_argument("--host", default="127.0.0.1")
serve_parser.add_argument("--port", type=int, default=8000)
serve_parser.add_argument(
"--compute-units",
default="cpu_and_ne",
choices=["cpu_and_ne", "all", "cpu_only", "cpu_and_gpu"],
help="Core ML compute units preference",
)
serve_parser.add_argument(
"--model-id",
default="qwen3-reranker-0.6b-ane",
help="Model id returned in API responses",
)
serve_parser.add_argument(
"--log-level",
default="info",
choices=["critical", "error", "warning", "info", "debug", "trace"],
)
serve_parser.set_defaults(func=cmd_serve)
return parser
def main() -> None:
parser = build_parser()
args = parser.parse_args()
args.func(args)
if __name__ == "__main__":
main()
|