Initial release: Qwen3-Reranker-4B CoreML ANE-optimized bundle + service

1e1d0ce verified 10 days ago

5.72 kB

	from __future__ import annotations

	import argparse
	from pathlib import Path

	from .converter import BuildOptions, build_bundle
	from .profiles import parse_profiles
	from .runtime import Qwen3AneRerankRuntime


	def _add_common_build_args(parser: argparse.ArgumentParser) -> None:
	parser.add_argument(
	"--profiles",
	type=str,
	default=None,
	help="Shape profiles as comma list BxS (e.g. 1x128,4x128)",
	)
	parser.add_argument(
	"--target",
	type=str,
	default="macOS14",
	choices=["macOS14", "macOS15", "iOS17", "iOS18"],
	help="Core ML minimum deployment target",
	)
	parser.add_argument(
	"--compile-mlmodelc",
	action=argparse.BooleanOptionalAction,
	default=True,
	help="Compile .mlpackage into .mlmodelc with coremlcompiler",
	)
	parser.add_argument(
	"--system-prompt",
	default=(
	"Judge whether the Document meets the requirements based on the Query and the "
	'Instruct provided. Note that the answer can only be "yes" or "no".'
	),
	help="System prompt used in reranker prompt template",
	)


	def cmd_convert(args: argparse.Namespace) -> None:
	profiles = parse_profiles(args.profiles)
	options = BuildOptions(
	model_dir=Path(args.model_dir),
	bundle_dir=Path(args.bundle_dir),
	profiles=profiles,
	compile_mlmodelc=bool(args.compile_mlmodelc),
	minimum_deployment_target=args.target,
	system_prompt=args.system_prompt,
	)
	manifest = build_bundle(options)
	print(f"Built bundle at: {Path(args.bundle_dir).resolve()}")
	print(f"Model: {manifest.model_name}")
	print(f"Task: {manifest.task}")
	print(f"Hidden size: {manifest.hidden_size}")
	print(f"Token ids yes/no: {manifest.yes_token_id}/{manifest.no_token_id}")
	print("Profiles:")
	for entry in manifest.profiles:
	print(
	f" - {entry.profile_id}: batch={entry.batch_size}, seq={entry.seq_len}, "
	f"model={entry.compiled_path or entry.package_path}"
	)


	def cmd_serve(args: argparse.Namespace) -> None:
	bundle_dir = Path(args.bundle_dir)
	manifest_path = bundle_dir / "manifest.json"

	if not manifest_path.exists():
	if not args.auto_build:
	raise SystemExit(
	f"Bundle not found at {bundle_dir}. Run convert first or pass --auto-build --model-dir."
	)
	if not args.model_dir:
	raise SystemExit("--model-dir is required when --auto-build is enabled")

	profiles = parse_profiles(args.profiles)
	options = BuildOptions(
	model_dir=Path(args.model_dir),
	bundle_dir=bundle_dir,
	profiles=profiles,
	compile_mlmodelc=bool(args.compile_mlmodelc),
	minimum_deployment_target=args.target,
	system_prompt=args.system_prompt,
	)
	print("Bundle not found; building from source model...")
	build_bundle(options)

	runtime = Qwen3AneRerankRuntime(bundle_dir=bundle_dir, compute_units=args.compute_units)

	from .api import create_app
	import uvicorn

	app = create_app(runtime=runtime, default_model_id=args.model_id)
	uvicorn.run(
	app,
	host=args.host,
	port=args.port,
	log_level=args.log_level,
	)


	def build_parser() -> argparse.ArgumentParser:
	parser = argparse.ArgumentParser(
	prog="qwen3-ane-rerank",
	description="Convert Qwen3-Reranker model to Core ML ANE bundle and serve /v1/rerank endpoint.",
	)
	subparsers = parser.add_subparsers(dest="command", required=True)

	convert_parser = subparsers.add_parser(
	"convert",
	help="Convert local HF Qwen3-Reranker model into ANE-ready Core ML profile bundle",
	)
	convert_parser.add_argument("--model-dir", required=True, help="Path to source HF model directory")
	convert_parser.add_argument(
	"--bundle-dir",
	required=True,
	help="Output bundle directory (manifest + packages + tokenizer)",
	)
	_add_common_build_args(convert_parser)
	convert_parser.set_defaults(func=cmd_convert)

	serve_parser = subparsers.add_parser(
	"serve",
	help="Run /v1/rerank endpoint backed by Core ML ANE profiles",
	)
	serve_parser.add_argument(
	"--bundle-dir",
	required=True,
	help="Bundle directory created by convert",
	)
	serve_parser.add_argument(
	"--model-dir",
	default=None,
	help="Source HF model directory (required if --auto-build and bundle missing)",
	)
	serve_parser.add_argument(
	"--auto-build",
	action=argparse.BooleanOptionalAction,
	default=True,
	help="Auto-build bundle from --model-dir when manifest is missing",
	)
	_add_common_build_args(serve_parser)
	serve_parser.add_argument("--host", default="127.0.0.1")
	serve_parser.add_argument("--port", type=int, default=8000)
	serve_parser.add_argument(
	"--compute-units",
	default="cpu_and_ne",
	choices=["cpu_and_ne", "all", "cpu_only", "cpu_and_gpu"],
	help="Core ML compute units preference",
	)
	serve_parser.add_argument(
	"--model-id",
	default="qwen3-reranker-0.6b-ane",
	help="Model id returned in API responses",
	)
	serve_parser.add_argument(
	"--log-level",
	default="info",
	choices=["critical", "error", "warning", "info", "debug", "trace"],
	)
	serve_parser.set_defaults(func=cmd_serve)

	return parser


	def main() -> None:
	parser = build_parser()
	args = parser.parse_args()
	args.func(args)


	if __name__ == "__main__":
	main()