Optimization for CUDAExecutionProvider

Browse files

- Use IO Binding & Enable Use MAX Workspace to Optimize for RTMO
- Eliminate the dependency on rtmlib

Files changed (2) hide show

rtmo_demo.py +1 -3
rtmo_gpu.py +369 -44

rtmo_demo.py CHANGED Viewed

@@ -2,11 +2,10 @@
 import time
 import cv2
-from rtmlib import draw_skeleton
 from pathlib import Path
 import argparse
 import os
-from rtmo_gpu import RTMO_GPU
 if __name__ == "__main__":
@@ -52,7 +51,6 @@ if __name__ == "__main__":
             img_show = draw_skeleton(img_show,
                                     keypoints,
                                     scores,
-                                    openpose_skeleton=False,
                                     kpt_thr=0.3,
                                     line_width=2)
             img_show = cv2.resize(img_show, (788, 525))

 import time
 import cv2
 from pathlib import Path
 import argparse
 import os
+from rtmo_gpu import RTMO_GPU, draw_skeleton
 if __name__ == "__main__":
             img_show = draw_skeleton(img_show,
                                     keypoints,
                                     scores,
                                     kpt_thr=0.3,
                                     line_width=2)
             img_show = cv2.resize(img_show, (788, 525))

rtmo_gpu.py CHANGED Viewed

@@ -1,55 +1,380 @@
 import os
-from rtmlib import RTMO
-class RTMO_GPU(RTMO):
     def __init__(self,
                  onnx_model: str = None,
                  model_input_size: tuple = (640, 640),
                  mean: tuple = None,
                  std: tuple = None,
-                 to_openpose: bool = False,
-                 backend: str = 'onnxruntime',
                  device: str = 'cuda'):
-        if backend == 'onnxruntime':
-            if not os.path.exists(onnx_model):
-                from rtmlib.tools.file import download_checkpoint
-                onnx_model = download_checkpoint(onnx_model)
-            import onnxruntime as ort
-            providers = {'cpu': 'CPUExecutionProvider',
-                         'cuda': [
-                                      ('CUDAExecutionProvider', {
-                                        'device_id': 0,
-                                        'arena_extend_strategy': 'kNextPowerOfTwo',
-                                        'gpu_mem_limit': 2 * 1024 * 1024 * 1024,
-                                        'cudnn_conv_algo_search': 'DEFAULT',
-                                        'do_copy_in_default_stream': True,
-                                        'enable_cuda_graph': False
-                                    }),
-                                  'CPUExecutionProvider']}
-            self.session = ort.InferenceSession(path_or_bytes=onnx_model,
-                                                providers=providers[device])
-            print(f'load {onnx_model} with {backend} backend')
-            self.onnx_model = onnx_model
-            self.model_input_size = model_input_size
-            self.mean = mean
-            self.std = std
-            self.backend = backend
-            self.device = device
-            self.to_openpose = to_openpose
-        else:
-            super().__init__(onnx_model,
-                             model_input_size,
-                             mean,
-                             std,
-                             to_openpose,
-                             backend,
-                             device)

 import os
+import numpy as np
+from typing import List, Tuple
+import onnxruntime as ort
+import cv2
+# dictionary from https://github.com/Tau-J/rtmlib/blob/4b29101d54b611048ef165277cebfffff3030074/rtmlib/visualization/skeleton/coco17.py
+coco17 = dict(name='coco17',
+              keypoint_info={
+                  0:
+                  dict(name='nose', id=0, color=[51, 153, 255], swap=''),
+                  1:
+                  dict(name='left_eye',
+                       id=1,
+                       color=[51, 153, 255],
+                       swap='right_eye'),
+                  2:
+                  dict(name='right_eye',
+                       id=2,
+                       color=[51, 153, 255],
+                       swap='left_eye'),
+                  3:
+                  dict(name='left_ear',
+                       id=3,
+                       color=[51, 153, 255],
+                       swap='right_ear'),
+                  4:
+                  dict(name='right_ear',
+                       id=4,
+                       color=[51, 153, 255],
+                       swap='left_ear'),
+                  5:
+                  dict(name='left_shoulder',
+                       id=5,
+                       color=[0, 255, 0],
+                       swap='right_shoulder'),
+                  6:
+                  dict(name='right_shoulder',
+                       id=6,
+                       color=[255, 128, 0],
+                       swap='left_shoulder'),
+                  7:
+                  dict(name='left_elbow',
+                       id=7,
+                       color=[0, 255, 0],
+                       swap='right_elbow'),
+                  8:
+                  dict(name='right_elbow',
+                       id=8,
+                       color=[255, 128, 0],
+                       swap='left_elbow'),
+                  9:
+                  dict(name='left_wrist',
+                       id=9,
+                       color=[0, 255, 0],
+                       swap='right_wrist'),
+                  10:
+                  dict(name='right_wrist',
+                       id=10,
+                       color=[255, 128, 0],
+                       swap='left_wrist'),
+                  11:
+                  dict(name='left_hip',
+                       id=11,
+                       color=[0, 255, 0],
+                       swap='right_hip'),
+                  12:
+                  dict(name='right_hip',
+                       id=12,
+                       color=[255, 128, 0],
+                       swap='left_hip'),
+                  13:
+                  dict(name='left_knee',
+                       id=13,
+                       color=[0, 255, 0],
+                       swap='right_knee'),
+                  14:
+                  dict(name='right_knee',
+                       id=14,
+                       color=[255, 128, 0],
+                       swap='left_knee'),
+                  15:
+                  dict(name='left_ankle',
+                       id=15,
+                       color=[0, 255, 0],
+                       swap='right_ankle'),
+                  16:
+                  dict(name='right_ankle',
+                       id=16,
+                       color=[255, 128, 0],
+                       swap='left_ankle')
+              },
+              skeleton_info={
+                  0:
+                  dict(link=('left_ankle', 'left_knee'),
+                       id=0,
+                       color=[0, 255, 0]),
+                  1:
+                  dict(link=('left_knee', 'left_hip'), id=1, color=[0, 255,
+                                                                    0]),
+                  2:
+                  dict(link=('right_ankle', 'right_knee'),
+                       id=2,
+                       color=[255, 128, 0]),
+                  3:
+                  dict(link=('right_knee', 'right_hip'),
+                       id=3,
+                       color=[255, 128, 0]),
+                  4:
+                  dict(link=('left_hip', 'right_hip'),
+                       id=4,
+                       color=[51, 153, 255]),
+                  5:
+                  dict(link=('left_shoulder', 'left_hip'),
+                       id=5,
+                       color=[51, 153, 255]),
+                  6:
+                  dict(link=('right_shoulder', 'right_hip'),
+                       id=6,
+                       color=[51, 153, 255]),
+                  7:
+                  dict(link=('left_shoulder', 'right_shoulder'),
+                       id=7,
+                       color=[51, 153, 255]),
+                  8:
+                  dict(link=('left_shoulder', 'left_elbow'),
+                       id=8,
+                       color=[0, 255, 0]),
+                  9:
+                  dict(link=('right_shoulder', 'right_elbow'),
+                       id=9,
+                       color=[255, 128, 0]),
+                  10:
+                  dict(link=('left_elbow', 'left_wrist'),
+                       id=10,
+                       color=[0, 255, 0]),
+                  11:
+                  dict(link=('right_elbow', 'right_wrist'),
+                       id=11,
+                       color=[255, 128, 0]),
+                  12:
+                  dict(link=('left_eye', 'right_eye'),
+                       id=12,
+                       color=[51, 153, 255]),
+                  13:
+                  dict(link=('nose', 'left_eye'), id=13, color=[51, 153, 255]),
+                  14:
+                  dict(link=('nose', 'right_eye'), id=14, color=[51, 153,
+                                                                 255]),
+                  15:
+                  dict(link=('left_eye', 'left_ear'),
+                       id=15,
+                       color=[51, 153, 255]),
+                  16:
+                  dict(link=('right_eye', 'right_ear'),
+                       id=16,
+                       color=[51, 153, 255]),
+                  17:
+                  dict(link=('left_ear', 'left_shoulder'),
+                       id=17,
+                       color=[51, 153, 255]),
+                  18:
+                  dict(link=('right_ear', 'right_shoulder'),
+                       id=18,
+                       color=[51, 153, 255])
+              })
+# functions from https://github.com/Tau-J/rtmlib/blob/4b29101d54b611048ef165277cebfffff3030074/rtmlib/visualization/draw.py#L71
+def draw_mmpose(img,
+                keypoints,
+                scores,
+                keypoint_info,
+                skeleton_info,
+                kpt_thr=0.5,
+                radius=2,
+                line_width=2):
+    assert len(keypoints.shape) == 2
+    vis_kpt = [s >= kpt_thr for s in scores]
+    link_dict = {}
+    for i, kpt_info in keypoint_info.items():
+        kpt_color = tuple(kpt_info['color'])
+        link_dict[kpt_info['name']] = kpt_info['id']
+        kpt = keypoints[i]
+        if vis_kpt[i]:
+            img = cv2.circle(img, (int(kpt[0]), int(kpt[1])), int(radius),
+                             kpt_color, -1)
+    for i, ske_info in skeleton_info.items():
+        link = ske_info['link']
+        pt0, pt1 = link_dict[link[0]], link_dict[link[1]]
+        if vis_kpt[pt0] and vis_kpt[pt1]:
+            link_color = ske_info['color']
+            kpt0 = keypoints[pt0]
+            kpt1 = keypoints[pt1]
+            img = cv2.line(img, (int(kpt0[0]), int(kpt0[1])),
+                           (int(kpt1[0]), int(kpt1[1])),
+                           link_color,
+                           thickness=line_width)
+    return img
+# with simplification to use onnxruntime only
+def draw_skeleton(img,
+                  keypoints,
+                  scores,
+                  kpt_thr=0.5,
+                  radius=2,
+                  line_width=2):
+    num_keypoints = keypoints.shape[1]
+    if num_keypoints == 17:
+        skeleton = 'coco17'
+    else:
+        raise NotImplementedError
+    skeleton_dict = eval(f'{skeleton}')
+    keypoint_info = skeleton_dict['keypoint_info']
+    skeleton_info = skeleton_dict['skeleton_info']
+    if len(keypoints.shape) == 2:
+        keypoints = keypoints[None, :, :]
+        scores = scores[None, :, :]
+    num_instance = keypoints.shape[0]
+    if skeleton in ['coco17']:
+        for i in range(num_instance):
+            img = draw_mmpose(img, keypoints[i], scores[i], keypoint_info,
+                              skeleton_info, kpt_thr, radius, line_width)
+    else:
+        raise NotImplementedError
+    return img
+class RTMO_GPU(object):
+    def preprocess(self, img: np.ndarray):
+        """Do preprocessing for RTMPose model inference.
+        Args:
+            img (np.ndarray): Input image in shape.
+        Returns:
+            tuple:
+            - resized_img (np.ndarray): Preprocessed image.
+            - center (np.ndarray): Center of image.
+            - scale (np.ndarray): Scale of image.
+        """
+        if len(img.shape) == 3:
+            padded_img = np.ones(
+                (self.model_input_size[0], self.model_input_size[1], 3),
+                dtype=np.uint8) * 114
+        else:
+            padded_img = np.ones(self.model_input_size, dtype=np.uint8) * 114
+        ratio = min(self.model_input_size[0] / img.shape[0],
+                    self.model_input_size[1] / img.shape[1])
+        resized_img = cv2.resize(
+            img,
+            (int(img.shape[1] * ratio), int(img.shape[0] * ratio)),
+            interpolation=cv2.INTER_LINEAR,
+        ).astype(np.uint8)
+        padded_shape = (int(img.shape[0] * ratio), int(img.shape[1] * ratio))
+        padded_img[:padded_shape[0], :padded_shape[1]] = resized_img
+        # normalize image
+        if self.mean is not None:
+            self.mean = np.array(self.mean)
+            self.std = np.array(self.std)
+            padded_img = (padded_img - self.mean) / self.std
+        return padded_img, ratio
+    def postprocess(
+        self,
+        outputs: List[np.ndarray],
+        ratio: float = 1.,
+    ) -> Tuple[np.ndarray, np.ndarray]:
+        """Do postprocessing for RTMO model inference.
+        Args:
+            outputs (List[np.ndarray]): Outputs of RTMO model.
+            ratio (float): Ratio of preprocessing.
+        Returns:
+            tuple:
+            - final_boxes (np.ndarray): Final bounding boxes.
+            - final_scores (np.ndarray): Final scores.
+        """
+        det_outputs, pose_outputs = outputs
+        # onnx contains nms module
+        pack_dets = (det_outputs[0, :, :4], det_outputs[0, :, 4])
+        final_boxes, final_scores = pack_dets
+        final_boxes /= ratio
+        isscore = final_scores > 0.3
+        isbbox = [i for i in isscore]
+        # final_boxes = final_boxes[isbbox]
+        # decode pose outputs
+        keypoints, scores = pose_outputs[0, :, :, :2], pose_outputs[0, :, :, 2]
+        keypoints = keypoints / ratio
+        keypoints = keypoints[isbbox]
+        scores = scores[isbbox]
+        return keypoints, scores
+    def inference(self, img: np.ndarray):
+            """Inference model.
+            Args:
+                img (np.ndarray): Input image in shape.
+            Returns:
+                outputs (np.ndarray): Output of RTMPose model.
+            """
+            # build input to (1, 3, H, W)
+            img = img.transpose(2, 0, 1)
+            img = np.ascontiguousarray(img, dtype=np.float32)
+            input = img[None, :, :, :]
+            # Create an IO Binding object
+            io_binding = self.session.io_binding()
+            # Bind the model inputs and outputs to the IO Binding object
+            io_binding.bind_input(name='input', device_type='cpu', device_id=0, element_type=np.float32, shape=input.shape, buffer_ptr=input.ctypes.data)
+            io_binding.bind_output(name='dets')
+            io_binding.bind_output(name='keypoints')
+            # Run inference with IO Binding
+            self.session.run_with_iobinding(io_binding)
+            # Retrieve the outputs from the IO Binding object
+            outputs = [output.numpy() for output in io_binding.get_outputs()]
+            return outputs
+    def __call__(self, image: np.ndarray):
+            image, ratio = self.preprocess(image)
+            outputs = self.inference(image)
+            keypoints, scores = self.postprocess(outputs, ratio)
+            return keypoints, scores
     def __init__(self,
                  onnx_model: str = None,
                  model_input_size: tuple = (640, 640),
                  mean: tuple = None,
                  std: tuple = None,
                  device: str = 'cuda'):
+        if not os.path.exists(onnx_model):
+            from rtmlib.tools.file import download_checkpoint
+            onnx_model = download_checkpoint(onnx_model)
+        providers = {'cpu': 'CPUExecutionProvider',
+                        'cuda': [
+                                    ('CUDAExecutionProvider', {
+                                    'cudnn_conv_algo_search': 'DEFAULT',
+                                    'cudnn_conv_use_max_workspace': True
+                                }),
+                                'CPUExecutionProvider']}
+        self.session = ort.InferenceSession(path_or_bytes=onnx_model,
+                                            providers=providers[device])
+        self.onnx_model = onnx_model
+        self.model_input_size = model_input_size
+        self.mean = mean
+        self.std = std
+        self.device = device