onnx arm64
#5
by
wkpark
- opened
# under mac osx m1
from sentence_transformers import SentenceTransformer, export_dynamic_quantized_onnx_model
model_id = "nlpai-lab/KURE-v1"
#output_dir = "models/KURE-v1" # locally saved out_dir.
#onnx_model = SentenceTransformer(model_id)
#onnx_model.save(output_dir)
onnx_model = SentenceTransformer(model_id, backend="onnx", model_kwargs={'provider':'CPUExecutionProvider'})
#onnx_model = SentenceTransformer(model_id, backend="onnx", model_kwargs={"file_name": "onnx/model_qint8_arm64.onnx", 'provider':'CPUExecutionProvider'})
export_dynamic_quantized_onnx_model(
onnx_model,
quantization_config="arm64",
model_name_or_path="models/KURE-v1",
)
you can get onnx quantized model named model_qint8_arm64.onnx in the models/KURE-v1/onnx/
how to use this PR?
# git clone with git-lfs pre installed.
git clone https://huggingface.co/nlpai-lab/KURE-v1
cd KURE-v1
git fetch origin refs/pr/5:pr/5
git checkout pr/5
wkpark
changed pull request status to
open