RAG / quantize_.py
bakhil-aissa's picture
Upload 12 files
61fc96a verified
from onnxruntime.quantization import quantize_dynamic, QuantType
from onnxruntime.quantization.preprocess import quant_pre_process
import argparse
import os
# 1. Clean the graph first
def main() :
parser = argparse.ArgumentParser(description="ONNX Quantization")
parser.add_argument('model_path',type=str)
parser.add_argument('output_path',type=str)
parser.add_argument('--per_channel', action='store_true', help='Use per-channel quantization')
parser.add_argument('--quint8', action='store_true', help='Use QInt8 (signed); default is QUInt8')
#python3 quantize_.py model_path =""
args = parser.parse_args()
temp_file = "processed.onnx"
# Skip symbolic shape inference (often fails on detection/vision models with dynamic shapes)
quant_pre_process(args.model_path, temp_file, skip_symbolic_shape=True)
# 2. Quantize with SIGNED integers (better for OpenVINO)
quantize_dynamic(
model_input=temp_file,
model_output=args.output_path,
weight_type=QuantType.QInt8 if args.quint8 else QuantType.QUInt8,
per_channel= args.per_channel,
)
os.remove(temp_file)
if __name__ == "__main__" :
main()