File size: 1,074 Bytes
9d21083
 
 
 
 
 
 
 
 
 
23f21f0
 
9d21083
23f21f0
 
 
 
 
9d21083
23f21f0
9d21083
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
"""
Quantize visual model to INT8.
Produces two variants:
- visual_int8.onnx — full dynamic quantization (incl. Conv → ConvInteger), max speed.
- visual_int8_no_conv.onnx — quantization without Conv (no ConvInteger), for runtimes like ONNX Runtime in Rust.
"""
import logging
logging.getLogger("onnxruntime").setLevel(logging.ERROR)
logging.getLogger().setLevel(logging.ERROR)

from onnxruntime.quantization import quantize_dynamic, QuantType

# Full quantization (default: all types from IntegerOpsRegistry, incl. Conv)
quantize_dynamic(
    model_input="visual.onnx",
    model_output="visual_int8.onnx",
    weight_type=QuantType.QInt8,
)
print("✅ visual_int8.onnx created")

# Variant without Conv — no ConvInteger, compatible with ort in Rust etc.
OP_TYPES_NO_CONV = ["MatMul", "Attention", "Gather", "Transpose", "EmbedLayerNormalization"]
quantize_dynamic(
    model_input="visual.onnx",
    model_output="visual_int8_no_conv.onnx",
    weight_type=QuantType.QInt8,
    op_types_to_quantize=OP_TYPES_NO_CONV,
)
print("✅ visual_int8_no_conv.onnx created")