Add script used to convert ONNX to fp32/fp16/int8/mixed engine
Browse files- onnx_to_engine.py +145 -0
onnx_to_engine.py
ADDED
|
@@ -0,0 +1,145 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
#
|
| 3 |
+
# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
| 4 |
+
# SPDX-License-Identifier: Apache-2.0
|
| 5 |
+
#
|
| 6 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
| 7 |
+
# you may not use this file except in compliance with the License.
|
| 8 |
+
# You may obtain a copy of the License at
|
| 9 |
+
#
|
| 10 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
| 11 |
+
#
|
| 12 |
+
# Unless required by applicable law or agreed to in writing, software
|
| 13 |
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
| 14 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
| 15 |
+
# See the License for the specific language governing permissions and
|
| 16 |
+
# limitations under the License.
|
| 17 |
+
#
|
| 18 |
+
|
| 19 |
+
"""
|
| 20 |
+
This script demonstrates how to use the Calibrator API provided by Polygraphy
|
| 21 |
+
to calibrate a TensorRT engine to run in INT8 precision.
|
| 22 |
+
"""
|
| 23 |
+
import numpy as np
|
| 24 |
+
from polygraphy.backend.trt import Calibrator, CreateConfig, EngineFromNetwork, NetworkFromOnnxPath, TrtRunner, save_engine, load_plugins, Profile
|
| 25 |
+
from termcolor import cprint
|
| 26 |
+
load_plugins(plugins=['libmmdeploy_tensorrt_ops.so'])
|
| 27 |
+
import cv2
|
| 28 |
+
import argparse
|
| 29 |
+
|
| 30 |
+
PREVIEW_CALIBRATOR_OUTPUT = True
|
| 31 |
+
|
| 32 |
+
def calib_data_from_video():
|
| 33 |
+
|
| 34 |
+
# image preproc3ssing taken from rtmlib
|
| 35 |
+
def preprocess(img: np.ndarray):
|
| 36 |
+
"""Do preprocessing for RTMPose model inference.
|
| 37 |
+
|
| 38 |
+
Args:
|
| 39 |
+
img (np.ndarray): Input image in shape.
|
| 40 |
+
|
| 41 |
+
Returns:
|
| 42 |
+
tuple:
|
| 43 |
+
- resized_img (np.ndarray): Preprocessed image.
|
| 44 |
+
- center (np.ndarray): Center of image.
|
| 45 |
+
- scale (np.ndarray): Scale of image.
|
| 46 |
+
"""
|
| 47 |
+
if len(img.shape) == 3:
|
| 48 |
+
padded_img = np.ones(
|
| 49 |
+
(MODEL_INPUT_SIZE[0], MODEL_INPUT_SIZE[1], 3),
|
| 50 |
+
dtype=np.uint8) * 114
|
| 51 |
+
else:
|
| 52 |
+
padded_img = np.ones(MODEL_INPUT_SIZE, dtype=np.uint8) * 114
|
| 53 |
+
|
| 54 |
+
ratio = min(MODEL_INPUT_SIZE[0] / img.shape[0],
|
| 55 |
+
MODEL_INPUT_SIZE[1] / img.shape[1])
|
| 56 |
+
resized_img = cv2.resize(
|
| 57 |
+
img,
|
| 58 |
+
(int(img.shape[1] * ratio), int(img.shape[0] * ratio)),
|
| 59 |
+
interpolation=cv2.INTER_LINEAR,
|
| 60 |
+
).astype(np.uint8)
|
| 61 |
+
padded_shape = (int(img.shape[0] * ratio), int(img.shape[1] * ratio))
|
| 62 |
+
padded_img[:padded_shape[0], :padded_shape[1]] = resized_img
|
| 63 |
+
|
| 64 |
+
return padded_img, ratio
|
| 65 |
+
|
| 66 |
+
cap = cv2.VideoCapture(filename=VIDEO_PATH)
|
| 67 |
+
while cap.isOpened():
|
| 68 |
+
|
| 69 |
+
success, frame = cap.read()
|
| 70 |
+
batch_id=0
|
| 71 |
+
if success:
|
| 72 |
+
img, ratio = preprocess(frame) # pad & resize
|
| 73 |
+
img = img.transpose(2, 0, 1) # transpose to 1,3,416,416
|
| 74 |
+
img = np.ascontiguousarray(img, dtype=np.float32) # to f32
|
| 75 |
+
#print(img.shape)
|
| 76 |
+
img = img[None, :, :, :] # add batch dim
|
| 77 |
+
|
| 78 |
+
# # Yield a dictionary mapping the input name of your model to the generated data
|
| 79 |
+
yield {"input": img}
|
| 80 |
+
else:
|
| 81 |
+
break
|
| 82 |
+
|
| 83 |
+
cap.release()
|
| 84 |
+
|
| 85 |
+
def main(onnx_path, engine_path, batch_size):
|
| 86 |
+
|
| 87 |
+
# We can provide a path or file-like object if we want to cache calibration data.
|
| 88 |
+
# This lets us avoid running calibration the next time we build the engine.
|
| 89 |
+
#
|
| 90 |
+
# TIP: You can use this calibrator with TensorRT APIs directly (e.g. config.int8_calibrator).
|
| 91 |
+
# You don't have to use it with Polygraphy loaders if you don't want to.
|
| 92 |
+
calibrator = Calibrator(data_loader=calib_data_from_video(), cache=f"{onnx_path}-calib.cache")
|
| 93 |
+
|
| 94 |
+
|
| 95 |
+
profiles = [
|
| 96 |
+
# The low-latency case. For best performance, min == opt == max.
|
| 97 |
+
Profile().add("input",
|
| 98 |
+
min=(1, 3, MODEL_INPUT_SIZE[0], MODEL_INPUT_SIZE[1]),
|
| 99 |
+
opt=(4, 3, MODEL_INPUT_SIZE[0], MODEL_INPUT_SIZE[1]),
|
| 100 |
+
max=(9, 3, MODEL_INPUT_SIZE[0], MODEL_INPUT_SIZE[1])),
|
| 101 |
+
]
|
| 102 |
+
|
| 103 |
+
# We must enable int8 mode in addition to providing the calibrator.
|
| 104 |
+
build_engine = EngineFromNetwork(
|
| 105 |
+
NetworkFromOnnxPath(f"{onnx_path}"), config=CreateConfig(
|
| 106 |
+
use_dla=False,
|
| 107 |
+
tf32=True,
|
| 108 |
+
fp16=True,
|
| 109 |
+
int8=True,
|
| 110 |
+
obey_precision_constraints=False,
|
| 111 |
+
sparse_weights=True,
|
| 112 |
+
calibrator=calibrator,
|
| 113 |
+
profiles=profiles,
|
| 114 |
+
max_workspace_size = 2 * 1024 * 1024 * 1024,
|
| 115 |
+
allow_gpu_fallback=True
|
| 116 |
+
)
|
| 117 |
+
)
|
| 118 |
+
|
| 119 |
+
# When we activate our runner, it will calibrate and build the engine. If we want to
|
| 120 |
+
# see the logging output from TensorRT, we can temporarily increase logging verbosity:
|
| 121 |
+
save_engine(build_engine, f'{engine_path}')
|
| 122 |
+
|
| 123 |
+
if __name__ == "__main__":
|
| 124 |
+
|
| 125 |
+
parser = argparse.ArgumentParser(description="Process a video file.")
|
| 126 |
+
parser.add_argument("video_path", type=str, help="The path to the video file used to calibrate int8 engine")
|
| 127 |
+
parser.add_argument("onnx_path", type=str, help="The path to the input ONNX model file")
|
| 128 |
+
parser.add_argument("engine_path", type=str, help="The path to the exported TensorRT Engine model file")
|
| 129 |
+
parser.add_argument("--batch_size", type=int, default=1, help="Input batch size")
|
| 130 |
+
args = parser.parse_args()
|
| 131 |
+
VIDEO_PATH = args.video_path
|
| 132 |
+
MODEL_INPUT_SIZE=(416,416) if 'rtmo-t' in args.onnx_path else (640,640)
|
| 133 |
+
|
| 134 |
+
if PREVIEW_CALIBRATOR_OUTPUT:
|
| 135 |
+
cprint('You are previwing video used to calibrate TensorRT int8 engine model ...', 'yellow')
|
| 136 |
+
for output_dict in calib_data_from_video():
|
| 137 |
+
if output_dict:
|
| 138 |
+
image = output_dict['input'] # get frame
|
| 139 |
+
image_to_show = image.squeeze(0).transpose(1, 2, 0) / 255.0 # to-uint8 transpose remove batch dim
|
| 140 |
+
cv2.imshow(VIDEO_PATH,image_to_show)
|
| 141 |
+
if cv2.waitKey(1) & 0xFF == ord('q'): # Exit loop if 'q' is pressed
|
| 142 |
+
break
|
| 143 |
+
cv2.destroyAllWindows() # Close all OpenCV windows
|
| 144 |
+
|
| 145 |
+
main(args.onnx_path, args.engine_path, args.batch_size)
|