ruclip-vit-large-patch14-336-onnx / quantize_text_model.py
ttkacheff's picture
Upload folder using huggingface_hub
9d21083 verified
"""
Quantize text model to INT8.
Produces two variants:
- textual_int8.onnx — full dynamic quantization (incl. Conv → ConvInteger), max speed.
- textual_int8_no_conv.onnx — quantization without Conv (no ConvInteger), for runtimes like ONNX Runtime in Rust.
"""
import logging
logging.getLogger("onnxruntime").setLevel(logging.ERROR)
logging.getLogger().setLevel(logging.ERROR)
from onnxruntime.quantization import quantize_dynamic, QuantType
# Full quantization (default: all types from IntegerOpsRegistry, incl. Conv)
quantize_dynamic(
model_input="textual.onnx",
model_output="textual_int8.onnx",
weight_type=QuantType.QInt8,
)
print("✅ textual_int8.onnx created")
# Variant without Conv — no ConvInteger, compatible with ort in Rust etc.
OP_TYPES_NO_CONV = ["MatMul", "Attention", "Gather", "Transpose", "EmbedLayerNormalization"]
quantize_dynamic(
model_input="textual.onnx",
model_output="textual_int8_no_conv.onnx",
weight_type=QuantType.QInt8,
op_types_to_quantize=OP_TYPES_NO_CONV,
)
print("✅ textual_int8_no_conv.onnx created")