Spaces:
Configuration error
Configuration error
| #!/usr/bin/env python3 | |
| """Convert openbmb/MiniCPM5-1B to a Transformers.js q4 ONNX repo.""" | |
| from __future__ import annotations | |
| import argparse | |
| import json | |
| import logging | |
| import os | |
| import shutil | |
| import subprocess | |
| import sys | |
| import tarfile | |
| import tempfile | |
| import urllib.request | |
| from dataclasses import asdict | |
| from pathlib import Path | |
| from huggingface_hub import HfApi, create_repo | |
| from optimum.exporters.onnx import main_export | |
| from transformers import AutoConfig | |
| SOURCE_MODEL = "openbmb/MiniCPM5-1B" | |
| TRANSFORMERS_JS_TAG = "3.8.1" | |
| TRANSFORMERS_JS_TARBALL = ( | |
| f"https://github.com/huggingface/transformers.js/archive/refs/tags/{TRANSFORMERS_JS_TAG}.tar.gz" | |
| ) | |
| def run(cmd: list[str], cwd: Path | None = None) -> None: | |
| print("+", " ".join(cmd), flush=True) | |
| subprocess.run(cmd, cwd=cwd, check=True) | |
| def download_transformers_js(work_dir: Path) -> Path: | |
| archive_path = work_dir / "transformers.js.tar.gz" | |
| urllib.request.urlretrieve(TRANSFORMERS_JS_TARBALL, archive_path) | |
| with tarfile.open(archive_path) as archive: | |
| archive.extractall(work_dir) | |
| return work_dir / f"transformers.js-{TRANSFORMERS_JS_TAG}" | |
| def patch_config(config_path: Path, dtype: str, q4_external_chunks: int) -> None: | |
| config = json.loads(config_path.read_text()) | |
| config.setdefault("transformers.js_config", {}) | |
| config["transformers.js_config"]["dtype"] = dtype | |
| if q4_external_chunks: | |
| config["transformers.js_config"]["use_external_data_format"] = { | |
| "model_q4.onnx": q4_external_chunks, | |
| } | |
| else: | |
| config["transformers.js_config"].pop("use_external_data_format", None) | |
| config_path.write_text(json.dumps(config, indent=2) + "\n") | |
| def write_readme(output_dir: Path, source_model: str, target_repo: str) -> None: | |
| readme = f"""--- | |
| license: apache-2.0 | |
| library_name: transformers.js | |
| pipeline_tag: text-generation | |
| base_model: {source_model} | |
| tags: | |
| - transformers.js | |
| - onnx | |
| - onnxruntime-web | |
| - llama | |
| - minicpm5 | |
| - text-generation | |
| - browser | |
| - webgpu | |
| --- | |
| # MiniCPM5-1B ONNX Web | |
| Transformers.js q4 ONNX export of `{source_model}` for browser text generation. | |
| ## Files | |
| - `onnx/model_q4.onnx`: ONNX Runtime 4-bit MatMul quantized decoder with KV cache. | |
| - `config.json`: includes `transformers.js_config.dtype = "q4"` so Transformers.js loads the q4 artifact by default. | |
| - tokenizer and generation config files copied from the source model export. | |
| ## Usage | |
| ```js | |
| import {{ pipeline }} from "@huggingface/transformers"; | |
| const generator = await pipeline("text-generation", "{target_repo}", {{ | |
| dtype: "q4", | |
| device: "webgpu", | |
| }}); | |
| const output = await generator("Briefly introduce yourself.", {{ | |
| max_new_tokens: 64, | |
| temperature: 0.2, | |
| do_sample: true, | |
| }}); | |
| console.log(output[0].generated_text); | |
| ``` | |
| If WebGPU is unavailable, use `device: "wasm"` in the browser. | |
| """ | |
| (output_dir / "README.md").write_text(readme) | |
| def convert(args: argparse.Namespace) -> Path: | |
| work_dir = Path(args.work_dir or tempfile.mkdtemp(prefix="minicpm5-tjs-")).resolve() | |
| work_dir.mkdir(parents=True, exist_ok=True) | |
| print(f"Working directory: {work_dir}", flush=True) | |
| transformers_js_dir = download_transformers_js(work_dir) | |
| scripts_dir = transformers_js_dir / "scripts" | |
| sys.path.insert(0, str(transformers_js_dir)) | |
| from scripts.quantize import QuantizationArguments, quantize | |
| logging.getLogger("onnxruntime.quantization.matmul_4bits_quantizer").setLevel(logging.WARNING) | |
| export_root = work_dir / "export" | |
| model_dir = export_root / args.source_model | |
| model_dir.mkdir(parents=True, exist_ok=True) | |
| device = args.device | |
| if device == "auto": | |
| try: | |
| import torch | |
| device = "cuda" if torch.cuda.is_available() else "cpu" | |
| except Exception: | |
| device = "cpu" | |
| print(f"Export device: {device}", flush=True) | |
| config = AutoConfig.from_pretrained(args.source_model) | |
| print( | |
| "Source config:", | |
| json.dumps( | |
| { | |
| "model_type": config.model_type, | |
| "architectures": getattr(config, "architectures", None), | |
| "hidden_size": getattr(config, "hidden_size", None), | |
| "num_hidden_layers": getattr(config, "num_hidden_layers", None), | |
| "torch_dtype": str(getattr(config, "torch_dtype", None)), | |
| }, | |
| indent=2, | |
| ), | |
| flush=True, | |
| ) | |
| main_export( | |
| model_name_or_path=args.source_model, | |
| output=model_dir, | |
| task="text-generation-with-past", | |
| opset=args.opset, | |
| device=device, | |
| dtype=args.export_dtype, | |
| do_validation=False, | |
| trust_remote_code=False, | |
| library_name="transformers", | |
| slim=False, | |
| ) | |
| onnx_dir = model_dir / "onnx" | |
| onnx_dir.mkdir(exist_ok=True) | |
| quant_args = QuantizationArguments( | |
| modes=["q4"], | |
| per_channel=False, | |
| reduce_range=False, | |
| block_size=args.block_size, | |
| is_symmetric=True, | |
| accuracy_level=None, | |
| op_block_list=None, | |
| ) | |
| quantize(str(model_dir), str(onnx_dir), quant_args) | |
| (model_dir / "quantize_config.json").write_text(json.dumps(asdict(quant_args), indent=2) + "\n") | |
| for path in model_dir.glob("*.onnx*"): | |
| path.unlink() | |
| q4_model = onnx_dir / "model_q4.onnx" | |
| if not q4_model.exists(): | |
| raise FileNotFoundError(f"Missing expected quantized model: {q4_model}") | |
| q4_external_chunks = 1 if (onnx_dir / "model_q4.onnx_data").exists() else 0 | |
| patch_config(model_dir / "config.json", "q4", q4_external_chunks) | |
| write_readme(model_dir, args.source_model, args.target_repo) | |
| if args.output_dir: | |
| final_dir = Path(args.output_dir).resolve() | |
| if final_dir.exists(): | |
| shutil.rmtree(final_dir) | |
| shutil.copytree(model_dir, final_dir) | |
| model_dir = final_dir | |
| print("Final files:", flush=True) | |
| for file in sorted(p.relative_to(model_dir).as_posix() for p in model_dir.rglob("*") if p.is_file()): | |
| print(file, flush=True) | |
| if args.target_repo: | |
| token = os.environ.get("HF_TOKEN") or True | |
| create_repo(args.target_repo, repo_type="model", private=args.private, exist_ok=True, token=token) | |
| api = HfApi(token=token) | |
| api.upload_folder( | |
| repo_id=args.target_repo, | |
| repo_type="model", | |
| folder_path=str(model_dir), | |
| commit_message=f"Add q4 Transformers.js export of {args.source_model}", | |
| ) | |
| print(f"Uploaded to https://huggingface.co/{args.target_repo}", flush=True) | |
| return model_dir | |
| def parse_args() -> argparse.Namespace: | |
| parser = argparse.ArgumentParser() | |
| parser.add_argument("--source-model", default=SOURCE_MODEL) | |
| parser.add_argument("--target-repo", default="") | |
| parser.add_argument("--output-dir", default="") | |
| parser.add_argument("--work-dir", default="") | |
| parser.add_argument("--device", default="auto", choices=["auto", "cpu", "cuda"]) | |
| parser.add_argument("--export-dtype", default="fp32", choices=["fp32", "fp16"]) | |
| parser.add_argument("--opset", type=int, default=18) | |
| parser.add_argument("--block-size", type=int, default=32) | |
| parser.add_argument("--private", action="store_true") | |
| return parser.parse_args() | |
| if __name__ == "__main__": | |
| convert(parse_args()) | |