| |
| |
| |
| |
| |
| |
| |
| from datetime import datetime |
| import glob |
| from hydra.core.hydra_config import HydraConfig |
| from omegaconf import DictConfig |
| import logging |
| import numpy as np |
| import onnx |
| import onnxruntime |
| from onnx import version_converter, checker |
| from onnxruntime import quantization |
| from onnxruntime.quantization import (CalibrationDataReader, CalibrationMethod, |
| QuantFormat, QuantType, quantize_static) |
| import os |
| from typing import List |
|
|
| from .quant_utils import get_weights_activations_quant_type, get_calibration_method |
|
|
|
|
| def _update_opset(input_model, target_opset, export_dir): |
| """ |
| updates the opset of an onnx model |
| inputs: |
| input_model: path of the input model. |
| target_opset: the target opset model is to be updated to. |
| """ |
| |
| ir_version_dict = {21: 10, |
| 20: 9, |
| 19: 9, |
| 18: 8, |
| 17: 8, |
| 16: 8, |
| 15: 8 |
| } |
|
|
| if not str(input_model).endswith('.onnx'): |
| raise TypeError("Error! The model file must be of onnx format!") |
| model = onnx.load(input_model) |
| |
| current_opset = model.opset_import[0].version |
| if current_opset >= target_opset: |
| print(f"[INFO] : The model is already using opset {current_opset} >= {target_opset}") |
| return input_model |
|
|
| |
| converted_model = version_converter.convert_version(model, target_opset) |
|
|
| |
| print(f"[INFO] : Model current IR version: {converted_model.ir_version}") |
| if target_opset >= 15 and converted_model.ir_version != ir_version_dict[target_opset]: |
| converted_model.ir_version = ir_version_dict[target_opset] |
| print(f"[INFO] : Update model IR version to {converted_model.ir_version} for compatibility with target opset " |
| f"{target_opset}") |
|
|
| |
| try: |
| checker.check_model(converted_model) |
| except checker.ValidationError as e: |
| print(f"[ERROR] : The model is invalid. {e}") |
|
|
| opset_model = f'{export_dir}/{os.path.basename(input_model)}'[:-5] + f'_opset{target_opset}.onnx' |
| onnx.save(converted_model, opset_model) |
|
|
| |
| session = onnxruntime.InferenceSession(opset_model) |
| try: |
| session.get_inputs() |
| except Exception as e: |
| print(f"[ERROR] : An error occurred while loading the modified model: {e}") |
| return |
|
|
| |
| print(f"[INFO] : The model has been converted to opset {target_opset}, IR {converted_model.ir_version} and saved " |
| f"at the same location.") |
| return opset_model |
|
|
| def _preprocess_random_images(height: int, width: int, channel: int, size_limit=10): |
| """ |
| Loads a batch of images and preprocess them |
| parameter height: image height in pixels |
| parameter width: image width in pixels |
| parameter size_limit: number of images to load. Default is 100 |
| return: list of matrices characterizing multiple images |
| """ |
| unconcatenated_batch_data = [] |
| for i in range(size_limit): |
| random_vals = np.random.uniform(0, 1, channel*height*width).astype('float32') |
| random_image = random_vals.reshape(1, channel, height, width) |
| unconcatenated_batch_data.append(random_image) |
| batch_data = np.concatenate(np.expand_dims(unconcatenated_batch_data, axis=0), axis=0) |
| print(f'[INFO] : random dataset with {size_limit} random images is prepared!') |
| return batch_data |
|
|
| class ImageDataReader(CalibrationDataReader): |
| ''' |
| ImageDataReader for the calibration during onnx quantization. |
| The initiation takes as input: |
| quantization_samples: an np array containing the calibration samples dataset |
| model_path: path of the model to be quantized |
| ''' |
| def __init__(self, |
| quantization_samples, |
| model_path: str): |
| |
| session = onnxruntime.InferenceSession(model_path, None) |
| (_, channel, height, width) = session.get_inputs()[0].shape |
|
|
| |
| if quantization_samples is not None: |
| self.nhwc_data_list = np.expand_dims(quantization_samples, axis=1) |
| else: |
| self.nhwc_data_list = _preprocess_random_images(height, |
| width, |
| channel) |
|
|
| self.input_name = session.get_inputs()[0].name |
| self.datasize = len(self.nhwc_data_list) |
|
|
| self.enum_data = None |
|
|
| def get_next(self): |
| if self.enum_data is None: |
| |
| |
| self.enum_data = iter( |
| [{self.input_name: nhwc_data} for nhwc_data in self.nhwc_data_list] |
| ) |
| |
| return next(self.enum_data, None) |
|
|
| def rewind(self): |
| self.enum_data = None |
|
|
|
|
| def quantize_onnx(configs: DictConfig, model_path=None, quantization_samples=None, model: object = None, extra_options: dict = None): |
| """ |
| Quantizes an ONNX model using onnx-runtime. |
| |
| Args: |
| configs (DictConfig): Configuration dictionary containing quantization and model settings. |
| quantization_samples: Calibration/representative dataset as a numpy array (optional). |
| model (object, optional): Model object with a model_path attribute, or None to use configs.model.model_path. |
| extra_options (dict, optional): Extra options for ONNX quantizer. |
| |
| Returns: |
| onnxruntime.InferenceSession: Quantized model session with model_path attribute. |
| """ |
| |
| |
| granularity = configs.quantization.granularity.lower() |
| target_opset = configs.quantization.target_opset |
| output_dir = HydraConfig.get().runtime.output_dir |
| export_dir = configs.quantization.export_dir |
|
|
| export_dir = output_dir + (f'/{export_dir}' if export_dir else '') |
| if not os.path.isdir(export_dir): |
| os.makedirs(export_dir) |
| if granularity == 'per_tensor': |
| quant_tag = f'quant_qdq_pt' |
| elif granularity == 'per_channel': |
| quant_tag = f'quant_qdq_pc' |
| else: |
| raise TypeError('Not a valid quantization_type!\n', |
| 'Only supported options for quantization_type are per_channel, or per_tensor!') |
|
|
| print(f'[INFO] : Quantizing model : {model_path}') |
|
|
| opset_model = _update_opset(input_model=model_path, target_opset=target_opset, export_dir=output_dir) |
|
|
| |
| print('[INFO] : Prepare the data reader for the representative dataset...') |
| dr = ImageDataReader(quantization_samples=quantization_samples, model_path=opset_model) |
| print('[INFO] : the data reader is ready') |
|
|
| |
| infer_model = os.path.join(export_dir, f'{os.path.basename(opset_model)[:-5]}_infer.onnx') |
| quantization.quant_pre_process(input_model_path=opset_model, |
| output_model_path=infer_model, |
| skip_optimization=False, |
| skip_symbolic_shape=True) |
|
|
| |
| weight_type, activ_type = get_weights_activations_quant_type(cfg=configs) |
| calibration_method = get_calibration_method(cfg=configs) |
| if configs.quantization.onnx_quant_parameters: |
| op_types_to_quantize = configs.quantization.onnx_quant_parameters.op_types_to_quantize |
| nodes_to_quantize = configs.quantization.onnx_quant_parameters.nodes_to_quantize |
| nodes_to_exclude = configs.quantization.onnx_quant_parameters.nodes_to_exclude |
| else: |
| op_types_to_quantize = None |
| nodes_to_quantize = None |
| nodes_to_exclude = None |
|
|
| |
| quant_model = os.path.join(export_dir,f'{os.path.basename(opset_model)[:-5]}_{quant_tag}.onnx') |
| print(f'[INFO] : Quantizing the model {os.path.basename(model_path)}, please wait...') |
|
|
| quantize_static( |
| infer_model, |
| quant_model, |
| dr, |
| op_types_to_quantize=op_types_to_quantize, |
| calibrate_method=calibration_method, |
| quant_format=QuantFormat.QDQ, |
| per_channel= granularity == 'per_channel', |
| weight_type=weight_type, |
| activation_type=activ_type, |
| nodes_to_quantize=nodes_to_quantize, |
| nodes_to_exclude=nodes_to_exclude, |
| |
| extra_options=extra_options) |
|
|
| |
| model = onnxruntime.InferenceSession(quant_model) |
| try: |
| model.get_inputs() |
| except Exception as e: |
| print(f"[ERROR] : An error occurred while quantizing the model: {e}") |
| return |
| print("Quantized model path:", quant_model) |
| setattr(model, 'model_path', quant_model) |
| return model |
|
|