import argparse import os from pathlib import Path from rkllm.api import RKLLM def export_rkllm( model_dir: Path, output_path: Path, target_platform: str, num_npu_core: int, optimization_level: int, ): llm = RKLLM() ret = llm.load_huggingface(model=str(model_dir), model_lora=None, device="cpu") if ret != 0: raise RuntimeError(f"load_huggingface failed for {model_dir}, ret={ret}") ret = llm.build( do_quantization=False, optimization_level=optimization_level, quantized_dtype="w8a8", quantized_algorithm="normal", target_platform=target_platform, num_npu_core=num_npu_core, extra_qparams=None, ) if ret != 0: raise RuntimeError(f"RKLLM build failed for {model_dir}, ret={ret}") output_path.parent.mkdir(parents=True, exist_ok=True) ret = llm.export_rkllm(str(output_path)) if ret != 0: raise RuntimeError(f"export_rkllm failed for {output_path}, ret={ret}") def main(): parser = argparse.ArgumentParser(description="Export a HuggingFace-format MiniCPM model to RKLLM.") parser.add_argument("--model-dir", required=True, help="Input HuggingFace model directory.") parser.add_argument("--output", required=True, help="Output .rkllm path.") parser.add_argument("--target-platform", default="rk3588", help="RK target platform.") parser.add_argument("--num-npu-core", type=int, default=1, help="NPU cores for RKLLM build.") parser.add_argument("--optimization-level", type=int, default=1, help="RKLLM optimization level.") parser.add_argument("--hf-home", default=None, help="Optional writable Hugging Face cache root.") args = parser.parse_args() if args.hf_home: hf_home = str(Path(args.hf_home).resolve()) os.environ["HF_HOME"] = hf_home os.environ["HUGGINGFACE_HUB_CACHE"] = str(Path(hf_home) / "hub") os.environ["TRANSFORMERS_CACHE"] = str(Path(hf_home) / "transformers") export_rkllm( model_dir=Path(args.model_dir), output_path=Path(args.output), target_platform=args.target_platform, num_npu_core=args.num_npu_core, optimization_level=args.optimization_level, ) print(f"Saved: {args.output}") if __name__ == "__main__": main()