pii-ner-model / export_model.py
xiaoxiae's picture
add export_model.py
3db92b8 verified
Raw
History Blame Contribute Delete
1.86 kB
"""Offline recipe: export a HF token-classification model to ONNX + dynamic INT8.
Needs torch + optimum (not a runtime dep). Produces the artifacts vendored next to this
file (quantized .onnx + tokenizer + config), which the agent loads with onnxruntime only
— no torch. Validated for akdeniz27/bert-base-turkish-cased-ner (see README.md).
"""
import argparse
import os
import shutil
from optimum.onnxruntime import ORTModelForTokenClassification, ORTQuantizer
from optimum.onnxruntime.configuration import AutoQuantizationConfig
from transformers import AutoTokenizer
def main() -> None:
"""Export + dynamic-INT8-quantize the given HF model into ``--out``."""
ap = argparse.ArgumentParser()
ap.add_argument("--model", required=True, help="HF model id, e.g. akdeniz27/bert-base-turkish-cased-ner")
ap.add_argument("--out", required=True, help="output dir for the quantized model + tokenizer")
args = ap.parse_args()
fp32_dir = args.out + "-fp32"
os.makedirs(args.out, exist_ok=True)
# Export FP32 ONNX, then dynamic-INT8 quantize into `out`.
ORTModelForTokenClassification.from_pretrained(args.model, export=True).save_pretrained(fp32_dir)
quantizer = ORTQuantizer.from_pretrained(fp32_dir)
qconfig = AutoQuantizationConfig.avx512_vnni(is_static=False, per_channel=False)
quantizer.quantize(save_dir=args.out, quantization_config=qconfig)
# Tokenizer (tokenizer.json for the Rust runtime) + config.json (id2label).
AutoTokenizer.from_pretrained(args.model).save_pretrained(args.out)
if not os.path.exists(os.path.join(args.out, "config.json")):
shutil.copy(os.path.join(fp32_dir, "config.json"), os.path.join(args.out, "config.json"))
shutil.rmtree(fp32_dir, ignore_errors=True)
print(f"Exported INT8 ONNX + tokenizer to {args.out}")
if __name__ == "__main__":
main()