Add support TensorRT engine support for RTMO
Browse files- rtmo_demo.py +5 -5
- rtmo_gpu.py +69 -36
rtmo_demo.py
CHANGED
|
@@ -12,18 +12,18 @@ if __name__ == "__main__":
|
|
| 12 |
# Set up argument parsing
|
| 13 |
parser = argparse.ArgumentParser(description='Process the path to a video file folder.')
|
| 14 |
parser.add_argument('path', type=str, help='Path to the folder containing video files (required)')
|
| 15 |
-
parser.add_argument('model_path', type=str, help='Path to a RTMO ONNX model file (required)')
|
| 16 |
parser.add_argument('--yolo_nas_pose', action='store_true', help='Use YOLO NAS Pose (flat format only) instead of RTMO Model')
|
| 17 |
|
| 18 |
# Parse the command-line arguments
|
| 19 |
args = parser.parse_args()
|
| 20 |
|
| 21 |
-
|
| 22 |
|
| 23 |
# Only Tiny Model has (416,416) as input model
|
| 24 |
-
model_input_size = (416,416) if 'rtmo-t' in
|
| 25 |
|
| 26 |
-
body = RTMO_GPU(
|
| 27 |
model_input_size=model_input_size, is_yolo_nas_pose=args.yolo_nas_pose)
|
| 28 |
|
| 29 |
for mp4_path in Path(args.path).glob('*'):
|
|
@@ -55,5 +55,5 @@ if __name__ == "__main__":
|
|
| 55 |
kpt_thr=0.3,
|
| 56 |
line_width=2)
|
| 57 |
img_show = cv2.resize(img_show, (788, 525))
|
| 58 |
-
cv2.imshow(f'{
|
| 59 |
cv2.waitKey(10)
|
|
|
|
| 12 |
# Set up argument parsing
|
| 13 |
parser = argparse.ArgumentParser(description='Process the path to a video file folder.')
|
| 14 |
parser.add_argument('path', type=str, help='Path to the folder containing video files (required)')
|
| 15 |
+
parser.add_argument('model_path', type=str, help='Path to a RTMO ONNX (or engine) model file (required)')
|
| 16 |
parser.add_argument('--yolo_nas_pose', action='store_true', help='Use YOLO NAS Pose (flat format only) instead of RTMO Model')
|
| 17 |
|
| 18 |
# Parse the command-line arguments
|
| 19 |
args = parser.parse_args()
|
| 20 |
|
| 21 |
+
model = args.model_path # 'rtmo-s_8xb32-600e_body7-640x640.onnx'
|
| 22 |
|
| 23 |
# Only Tiny Model has (416,416) as input model
|
| 24 |
+
model_input_size = (416,416) if 'rtmo-t' in model.lower() and not args.yolo_nas_pose else (640,640)
|
| 25 |
|
| 26 |
+
body = RTMO_GPU(model=model,
|
| 27 |
model_input_size=model_input_size, is_yolo_nas_pose=args.yolo_nas_pose)
|
| 28 |
|
| 29 |
for mp4_path in Path(args.path).glob('*'):
|
|
|
|
| 55 |
kpt_thr=0.3,
|
| 56 |
line_width=2)
|
| 57 |
img_show = cv2.resize(img_show, (788, 525))
|
| 58 |
+
cv2.imshow(f'{model}', img_show)
|
| 59 |
cv2.waitKey(10)
|
rtmo_gpu.py
CHANGED
|
@@ -334,32 +334,48 @@ class RTMO_GPU(object):
|
|
| 334 |
Returns:
|
| 335 |
outputs (np.ndarray): Output of RTMPose model.
|
| 336 |
"""
|
|
|
|
| 337 |
# build input to (1, 3, H, W)
|
| 338 |
img = img.transpose(2, 0, 1)
|
| 339 |
img = np.ascontiguousarray(img, dtype=np.float32 if not self.is_yolo_nas_pose else np.uint8)
|
| 340 |
input = img[None, :, :, :]
|
| 341 |
|
| 342 |
-
|
| 343 |
-
io_binding = self.session.io_binding()
|
| 344 |
|
| 345 |
-
|
| 346 |
-
|
| 347 |
-
io_binding.bind_input(name='input', device_type='cpu', device_id=0, element_type=np.float32, shape=input.shape, buffer_ptr=input.ctypes.data)
|
| 348 |
-
io_binding.bind_output(name='dets')
|
| 349 |
-
io_binding.bind_output(name='keypoints')
|
| 350 |
-
else:
|
| 351 |
-
# NAS Pose, flat format
|
| 352 |
-
io_binding.bind_input(name='input', device_type='cpu', device_id=0, element_type=np.uint8, shape=input.shape, buffer_ptr=input.ctypes.data)
|
| 353 |
-
io_binding.bind_output(name='graph2_flat_predictions')
|
| 354 |
|
| 355 |
-
|
| 356 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 357 |
|
| 358 |
-
# Retrieve the outputs from the IO Binding object
|
| 359 |
-
outputs = [output.numpy() for output in io_binding.get_outputs()]
|
| 360 |
-
|
| 361 |
return outputs
|
| 362 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 363 |
def __call__(self, image: np.ndarray):
|
| 364 |
image, ratio = self.preprocess(image)
|
| 365 |
|
|
@@ -371,33 +387,50 @@ class RTMO_GPU(object):
|
|
| 371 |
return keypoints, scores
|
| 372 |
|
| 373 |
def __init__(self,
|
| 374 |
-
|
| 375 |
model_input_size: tuple = (640, 640),
|
| 376 |
mean: tuple = None,
|
| 377 |
std: tuple = None,
|
| 378 |
device: str = 'cuda',
|
| 379 |
is_yolo_nas_pose = False):
|
| 380 |
-
|
| 381 |
-
if not os.path.exists(
|
| 382 |
# If the file does not exist, raise FileNotFoundError
|
| 383 |
-
raise FileNotFoundError(f"The specified ONNX model file was not found: {
|
| 384 |
-
|
| 385 |
-
|
| 386 |
-
|
| 387 |
-
|
| 388 |
-
|
| 389 |
-
|
| 390 |
-
|
| 391 |
-
|
| 392 |
-
|
| 393 |
-
|
| 394 |
-
|
| 395 |
-
|
| 396 |
-
|
| 397 |
-
self.
|
| 398 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 399 |
|
| 400 |
-
self.onnx_model = onnx_model
|
| 401 |
self.model_input_size = model_input_size
|
| 402 |
self.mean = mean
|
| 403 |
self.std = std
|
|
|
|
| 334 |
Returns:
|
| 335 |
outputs (np.ndarray): Output of RTMPose model.
|
| 336 |
"""
|
| 337 |
+
|
| 338 |
# build input to (1, 3, H, W)
|
| 339 |
img = img.transpose(2, 0, 1)
|
| 340 |
img = np.ascontiguousarray(img, dtype=np.float32 if not self.is_yolo_nas_pose else np.uint8)
|
| 341 |
input = img[None, :, :, :]
|
| 342 |
|
| 343 |
+
if self.model_format == 'onnx':
|
|
|
|
| 344 |
|
| 345 |
+
# Create an IO Binding object
|
| 346 |
+
io_binding = self.session.io_binding()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 347 |
|
| 348 |
+
if not self.is_yolo_nas_pose:
|
| 349 |
+
# RTMO
|
| 350 |
+
io_binding.bind_input(name='input', device_type='cpu', device_id=0, element_type=np.float32, shape=input.shape, buffer_ptr=input.ctypes.data)
|
| 351 |
+
io_binding.bind_output(name='dets')
|
| 352 |
+
io_binding.bind_output(name='keypoints')
|
| 353 |
+
else:
|
| 354 |
+
# NAS Pose, flat format
|
| 355 |
+
io_binding.bind_input(name='input', device_type='cpu', device_id=0, element_type=np.uint8, shape=input.shape, buffer_ptr=input.ctypes.data)
|
| 356 |
+
io_binding.bind_output(name='graph2_flat_predictions')
|
| 357 |
+
|
| 358 |
+
# Run inference with IO Binding
|
| 359 |
+
self.session.run_with_iobinding(io_binding)
|
| 360 |
+
|
| 361 |
+
# Retrieve the outputs from the IO Binding object
|
| 362 |
+
outputs = [output.numpy() for output in io_binding.get_outputs()]
|
| 363 |
+
|
| 364 |
+
else: # 'engine'
|
| 365 |
+
|
| 366 |
+
if not self.session.is_active:
|
| 367 |
+
self.session.activate()
|
| 368 |
+
|
| 369 |
+
outputs = self.session.infer(feed_dict={'input': input}, check_inputs=False)
|
| 370 |
+
outputs = [output for output in outputs.values()]
|
| 371 |
|
|
|
|
|
|
|
|
|
|
| 372 |
return outputs
|
| 373 |
|
| 374 |
+
def __exit__(self):
|
| 375 |
+
if self.model_format == 'engine':
|
| 376 |
+
if self.session.is_active:
|
| 377 |
+
self.session.deactivate()
|
| 378 |
+
|
| 379 |
def __call__(self, image: np.ndarray):
|
| 380 |
image, ratio = self.preprocess(image)
|
| 381 |
|
|
|
|
| 387 |
return keypoints, scores
|
| 388 |
|
| 389 |
def __init__(self,
|
| 390 |
+
model: str = None,
|
| 391 |
model_input_size: tuple = (640, 640),
|
| 392 |
mean: tuple = None,
|
| 393 |
std: tuple = None,
|
| 394 |
device: str = 'cuda',
|
| 395 |
is_yolo_nas_pose = False):
|
| 396 |
+
|
| 397 |
+
if not os.path.exists(model):
|
| 398 |
# If the file does not exist, raise FileNotFoundError
|
| 399 |
+
raise FileNotFoundError(f"The specified ONNX model file was not found: {model}")
|
| 400 |
+
|
| 401 |
+
self.model = model
|
| 402 |
+
if model.endswith('.onnx'):
|
| 403 |
+
self.model_format = 'onnx'
|
| 404 |
+
elif model.endswith('.engine'):
|
| 405 |
+
self.model_format = 'engine'
|
| 406 |
+
from polygraphy.backend.common import BytesFromPath
|
| 407 |
+
from polygraphy.backend.trt import EngineFromBytes, TrtRunner, load_plugins
|
| 408 |
+
load_plugins(plugins=['libmmdeploy_tensorrt_ops.so'])
|
| 409 |
+
else:
|
| 410 |
+
raise TypeError("Your model is neither ONNX nor Engine !")
|
| 411 |
+
|
| 412 |
+
|
| 413 |
+
if self.model_format == 'onnx':
|
| 414 |
+
|
| 415 |
+
providers = {'cpu': 'CPUExecutionProvider',
|
| 416 |
+
'cuda': [
|
| 417 |
+
#('TensorrtExecutionProvider', {
|
| 418 |
+
# 'trt_fp16_enable':True,
|
| 419 |
+
# 'trt_engine_cache_enable':True,
|
| 420 |
+
# 'trt_engine_cache_path':'cache'}),
|
| 421 |
+
('CUDAExecutionProvider', {
|
| 422 |
+
'cudnn_conv_algo_search': 'DEFAULT',
|
| 423 |
+
'cudnn_conv_use_max_workspace': True
|
| 424 |
+
}),
|
| 425 |
+
'CPUExecutionProvider']}
|
| 426 |
+
|
| 427 |
+
self.session = ort.InferenceSession(path_or_bytes=model,
|
| 428 |
+
providers=providers[device])
|
| 429 |
+
|
| 430 |
+
else: # 'engine'
|
| 431 |
+
engine = EngineFromBytes(BytesFromPath(model))
|
| 432 |
+
self.session = TrtRunner(engine)
|
| 433 |
|
|
|
|
| 434 |
self.model_input_size = model_input_size
|
| 435 |
self.mean = mean
|
| 436 |
self.std = std
|