VoxCPM1.5-RKNN2 / convert /scripts /export_rkllm.py
happyme531's picture
Upload 68 files
e11f7fb verified
import argparse
import os
from pathlib import Path
from rkllm.api import RKLLM
def export_rkllm(
model_dir: Path,
output_path: Path,
target_platform: str,
num_npu_core: int,
optimization_level: int,
):
llm = RKLLM()
ret = llm.load_huggingface(model=str(model_dir), model_lora=None, device="cpu")
if ret != 0:
raise RuntimeError(f"load_huggingface failed for {model_dir}, ret={ret}")
ret = llm.build(
do_quantization=False,
optimization_level=optimization_level,
quantized_dtype="w8a8",
quantized_algorithm="normal",
target_platform=target_platform,
num_npu_core=num_npu_core,
extra_qparams=None,
)
if ret != 0:
raise RuntimeError(f"RKLLM build failed for {model_dir}, ret={ret}")
output_path.parent.mkdir(parents=True, exist_ok=True)
ret = llm.export_rkllm(str(output_path))
if ret != 0:
raise RuntimeError(f"export_rkllm failed for {output_path}, ret={ret}")
def main():
parser = argparse.ArgumentParser(description="Export a HuggingFace-format MiniCPM model to RKLLM.")
parser.add_argument("--model-dir", required=True, help="Input HuggingFace model directory.")
parser.add_argument("--output", required=True, help="Output .rkllm path.")
parser.add_argument("--target-platform", default="rk3588", help="RK target platform.")
parser.add_argument("--num-npu-core", type=int, default=1, help="NPU cores for RKLLM build.")
parser.add_argument("--optimization-level", type=int, default=1, help="RKLLM optimization level.")
parser.add_argument("--hf-home", default=None, help="Optional writable Hugging Face cache root.")
args = parser.parse_args()
if args.hf_home:
hf_home = str(Path(args.hf_home).resolve())
os.environ["HF_HOME"] = hf_home
os.environ["HUGGINGFACE_HUB_CACHE"] = str(Path(hf_home) / "hub")
os.environ["TRANSFORMERS_CACHE"] = str(Path(hf_home) / "transformers")
export_rkllm(
model_dir=Path(args.model_dir),
output_path=Path(args.output),
target_platform=args.target_platform,
num_npu_core=args.num_npu_core,
optimization_level=args.optimization_level,
)
print(f"Saved: {args.output}")
if __name__ == "__main__":
main()