# under mac osx m1
from sentence_transformers import SentenceTransformer, export_dynamic_quantized_onnx_model
model_id = "nlpai-lab/KURE-v1"

#output_dir = "models/KURE-v1" # locally saved out_dir.
#onnx_model = SentenceTransformer(model_id)
#onnx_model.save(output_dir)

onnx_model = SentenceTransformer(model_id, backend="onnx", model_kwargs={'provider':'CPUExecutionProvider'})
#onnx_model = SentenceTransformer(model_id, backend="onnx", model_kwargs={"file_name": "onnx/model_qint8_arm64.onnx", 'provider':'CPUExecutionProvider'})


export_dynamic_quantized_onnx_model(
    onnx_model,
    quantization_config="arm64",
    model_name_or_path="models/KURE-v1",
)

you can get onnx quantized model named model_qint8_arm64.onnx in the models/KURE-v1/onnx/

how to use this PR?

# git clone with git-lfs pre installed.
git clone https://huggingface.co/nlpai-lab/KURE-v1
cd KURE-v1
git fetch origin refs/pr/5:pr/5
git checkout pr/5
wkpark changed pull request status to open
Ready to merge
This branch is ready to get merged automatically.

Sign up or log in to comment