from onnxruntime.quantization import quantize_dynamic, QuantType from onnxruntime.quantization.preprocess import quant_pre_process import argparse import os # 1. Clean the graph first def main() : parser = argparse.ArgumentParser(description="ONNX Quantization") parser.add_argument('model_path',type=str) parser.add_argument('output_path',type=str) parser.add_argument('--per_channel', action='store_true', help='Use per-channel quantization') parser.add_argument('--quint8', action='store_true', help='Use QInt8 (signed); default is QUInt8') #python3 quantize_.py model_path ="" args = parser.parse_args() temp_file = "processed.onnx" # Skip symbolic shape inference (often fails on detection/vision models with dynamic shapes) quant_pre_process(args.model_path, temp_file, skip_symbolic_shape=True) # 2. Quantize with SIGNED integers (better for OpenVINO) quantize_dynamic( model_input=temp_file, model_output=args.output_path, weight_type=QuantType.QInt8 if args.quint8 else QuantType.QUInt8, per_channel= args.per_channel, ) os.remove(temp_file) if __name__ == "__main__" : main()