File size: 2,303 Bytes
e11f7fb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
import argparse
import os
from pathlib import Path

from rkllm.api import RKLLM


def export_rkllm(
    model_dir: Path,
    output_path: Path,
    target_platform: str,
    num_npu_core: int,
    optimization_level: int,
):
    llm = RKLLM()
    ret = llm.load_huggingface(model=str(model_dir), model_lora=None, device="cpu")
    if ret != 0:
        raise RuntimeError(f"load_huggingface failed for {model_dir}, ret={ret}")

    ret = llm.build(
        do_quantization=False,
        optimization_level=optimization_level,
        quantized_dtype="w8a8",
        quantized_algorithm="normal",
        target_platform=target_platform,
        num_npu_core=num_npu_core,
        extra_qparams=None,
    )
    if ret != 0:
        raise RuntimeError(f"RKLLM build failed for {model_dir}, ret={ret}")

    output_path.parent.mkdir(parents=True, exist_ok=True)
    ret = llm.export_rkllm(str(output_path))
    if ret != 0:
        raise RuntimeError(f"export_rkllm failed for {output_path}, ret={ret}")


def main():
    parser = argparse.ArgumentParser(description="Export a HuggingFace-format MiniCPM model to RKLLM.")
    parser.add_argument("--model-dir", required=True, help="Input HuggingFace model directory.")
    parser.add_argument("--output", required=True, help="Output .rkllm path.")
    parser.add_argument("--target-platform", default="rk3588", help="RK target platform.")
    parser.add_argument("--num-npu-core", type=int, default=1, help="NPU cores for RKLLM build.")
    parser.add_argument("--optimization-level", type=int, default=1, help="RKLLM optimization level.")
    parser.add_argument("--hf-home", default=None, help="Optional writable Hugging Face cache root.")
    args = parser.parse_args()

    if args.hf_home:
        hf_home = str(Path(args.hf_home).resolve())
        os.environ["HF_HOME"] = hf_home
        os.environ["HUGGINGFACE_HUB_CACHE"] = str(Path(hf_home) / "hub")
        os.environ["TRANSFORMERS_CACHE"] = str(Path(hf_home) / "transformers")

    export_rkllm(
        model_dir=Path(args.model_dir),
        output_path=Path(args.output),
        target_platform=args.target_platform,
        num_npu_core=args.num_npu_core,
        optimization_level=args.optimization_level,
    )
    print(f"Saved: {args.output}")


if __name__ == "__main__":
    main()