| import argparse | |
| import os | |
| from pathlib import Path | |
| from rkllm.api import RKLLM | |
| def export_rkllm( | |
| model_dir: Path, | |
| output_path: Path, | |
| target_platform: str, | |
| num_npu_core: int, | |
| optimization_level: int, | |
| ): | |
| llm = RKLLM() | |
| ret = llm.load_huggingface(model=str(model_dir), model_lora=None, device="cpu") | |
| if ret != 0: | |
| raise RuntimeError(f"load_huggingface failed for {model_dir}, ret={ret}") | |
| ret = llm.build( | |
| do_quantization=False, | |
| optimization_level=optimization_level, | |
| quantized_dtype="w8a8", | |
| quantized_algorithm="normal", | |
| target_platform=target_platform, | |
| num_npu_core=num_npu_core, | |
| extra_qparams=None, | |
| ) | |
| if ret != 0: | |
| raise RuntimeError(f"RKLLM build failed for {model_dir}, ret={ret}") | |
| output_path.parent.mkdir(parents=True, exist_ok=True) | |
| ret = llm.export_rkllm(str(output_path)) | |
| if ret != 0: | |
| raise RuntimeError(f"export_rkllm failed for {output_path}, ret={ret}") | |
| def main(): | |
| parser = argparse.ArgumentParser(description="Export a HuggingFace-format MiniCPM model to RKLLM.") | |
| parser.add_argument("--model-dir", required=True, help="Input HuggingFace model directory.") | |
| parser.add_argument("--output", required=True, help="Output .rkllm path.") | |
| parser.add_argument("--target-platform", default="rk3588", help="RK target platform.") | |
| parser.add_argument("--num-npu-core", type=int, default=1, help="NPU cores for RKLLM build.") | |
| parser.add_argument("--optimization-level", type=int, default=1, help="RKLLM optimization level.") | |
| parser.add_argument("--hf-home", default=None, help="Optional writable Hugging Face cache root.") | |
| args = parser.parse_args() | |
| if args.hf_home: | |
| hf_home = str(Path(args.hf_home).resolve()) | |
| os.environ["HF_HOME"] = hf_home | |
| os.environ["HUGGINGFACE_HUB_CACHE"] = str(Path(hf_home) / "hub") | |
| os.environ["TRANSFORMERS_CACHE"] = str(Path(hf_home) / "transformers") | |
| export_rkllm( | |
| model_dir=Path(args.model_dir), | |
| output_path=Path(args.output), | |
| target_platform=args.target_platform, | |
| num_npu_core=args.num_npu_core, | |
| optimization_level=args.optimization_level, | |
| ) | |
| print(f"Saved: {args.output}") | |
| if __name__ == "__main__": | |
| main() | |