pesi
/

rtmo

Object Detection

ONNX

Pose Estimation

Model card Files Files and versions

xet

Community

Luigi commited on Mar 8, 2024

Commit

1f0f5d8

1 Parent(s): 9133777

Add option --yolo_nas_pose, used to read YOLO NAS Pose model instead of RTMO

Browse files

Files changed (2) hide show

rtmo_demo.py +3 -2
rtmo_gpu.py +43 -22

rtmo_demo.py CHANGED Viewed

@@ -13,6 +13,7 @@ if __name__ == "__main__":
     parser = argparse.ArgumentParser(description='Process the path to a video file folder.')
     parser.add_argument('path', type=str, help='Path to the folder containing video files (required)')
     parser.add_argument('model_path', type=str, help='Path to a RTMO ONNX model file (required)')
     # Parse the command-line arguments
     args = parser.parse_args()
@@ -20,10 +21,10 @@ if __name__ == "__main__":
     onnx_model = args.model_path # 'rtmo-s_8xb32-600e_body7-640x640.onnx'
     # Only Tiny Model has (416,416) as input model
-    model_input_size = (416,416) if 'rtmo-t' in onnx_model.lower() else (640,640)
     body = RTMO_GPU(onnx_model=onnx_model,
-                    model_input_size=model_input_size)
     for mp4_path in Path(args.path).glob('*'):

     parser = argparse.ArgumentParser(description='Process the path to a video file folder.')
     parser.add_argument('path', type=str, help='Path to the folder containing video files (required)')
     parser.add_argument('model_path', type=str, help='Path to a RTMO ONNX model file (required)')
+    parser.add_argument('--yolo_nas_pose', action='store_true', help='Use YOLO NAS Pose (flat format only) instead of RTMO Model')
     # Parse the command-line arguments
     args = parser.parse_args()
     onnx_model = args.model_path # 'rtmo-s_8xb32-600e_body7-640x640.onnx'
     # Only Tiny Model has (416,416) as input model
+    model_input_size = (416,416) if 'rtmo-t' in onnx_model.lower() and not args.yolo_nas_pose else (640,640)
     body = RTMO_GPU(onnx_model=onnx_model,
+                    model_input_size=model_input_size, is_yolo_nas_pose=args.yolo_nas_pose)
     for mp4_path in Path(args.path).glob('*'):

rtmo_gpu.py CHANGED Viewed

@@ -291,22 +291,36 @@ class RTMO_GPU(object):
             - final_boxes (np.ndarray): Final bounding boxes.
             - final_scores (np.ndarray): Final scores.
         """
-        det_outputs, pose_outputs = outputs
-        # onnx contains nms module
-        pack_dets = (det_outputs[0, :, :4], det_outputs[0, :, 4])
-        final_boxes, final_scores = pack_dets
-        final_boxes /= ratio
-        isscore = final_scores > 0.3
-        isbbox = [i for i in isscore]
-        # final_boxes = final_boxes[isbbox]
-        # decode pose outputs
-        keypoints, scores = pose_outputs[0, :, :, :2], pose_outputs[0, :, :, 2]
-        keypoints = keypoints / ratio
-        keypoints = keypoints[isbbox]
-        scores = scores[isbbox]
         return keypoints, scores
@@ -321,16 +335,21 @@ class RTMO_GPU(object):
             """
             # build input to (1, 3, H, W)
             img = img.transpose(2, 0, 1)
-            img = np.ascontiguousarray(img, dtype=np.float32)
             input = img[None, :, :, :]
             # Create an IO Binding object
             io_binding = self.session.io_binding()
-            # Bind the model inputs and outputs to the IO Binding object
-            io_binding.bind_input(name='input', device_type='cpu', device_id=0, element_type=np.float32, shape=input.shape, buffer_ptr=input.ctypes.data)
-            io_binding.bind_output(name='dets')
-            io_binding.bind_output(name='keypoints')
             # Run inference with IO Binding
             self.session.run_with_iobinding(io_binding)
@@ -355,7 +374,8 @@ class RTMO_GPU(object):
                  model_input_size: tuple = (640, 640),
                  mean: tuple = None,
                  std: tuple = None,
-                 device: str = 'cuda'):
         if not os.path.exists(onnx_model):
             # If the file does not exist, raise FileNotFoundError
@@ -381,6 +401,7 @@ class RTMO_GPU(object):
         self.mean = mean
         self.std = std
         self.device = device
 class RTMO_GPU_Batch(RTMO_GPU):
     def preprocess_batch(self, imgs: List[np.ndarray]) -> Tuple[np.ndarray, List[float]]:

             - final_boxes (np.ndarray): Final bounding boxes.
             - final_scores (np.ndarray): Final scores.
         """
+        if not self.is_yolo_nas_pose:
+            # RTMO
+            det_outputs, pose_outputs = outputs
+            # onnx contains nms module
+            pack_dets = (det_outputs[0, :, :4], det_outputs[0, :, 4])
+            final_boxes, final_scores = pack_dets
+            final_boxes /= ratio
+            isscore = final_scores > 0.3
+            isbbox = [i for i in isscore]
+            # final_boxes = final_boxes[isbbox]
+            # decode pose outputs
+            keypoints, scores = pose_outputs[0, :, :, :2], pose_outputs[0, :, :, 2]
+            keypoints = keypoints / ratio
+            keypoints = keypoints[isbbox]
+            scores = scores[isbbox]
+        else:
+            # NAS Pose
+            flat_predictions = outputs[0]
+            if flat_predictions.shape[0] > 0: # at least one person found
+                mask = flat_predictions[:, 0] == 0
+                pred_bboxes = flat_predictions[mask, 1:5]
+                pred_joints = flat_predictions[mask, 6:].reshape((len(pred_bboxes), -1, 3))
+                keypoints, scores = pred_joints[:,:,:2], pred_joints[:,:,-1]
+                keypoints = keypoints / ratio
+            else: # no detection
+                keypoints, scores = np.zeros((0, 17, 2)), np.zeros((0, 17))
         return keypoints, scores
             """
             # build input to (1, 3, H, W)
             img = img.transpose(2, 0, 1)
+            img = np.ascontiguousarray(img, dtype=np.float32 if not self.is_yolo_nas_pose else np.uint8)
             input = img[None, :, :, :]
             # Create an IO Binding object
             io_binding = self.session.io_binding()
+            if not self.is_yolo_nas_pose:
+                # RTMO
+                io_binding.bind_input(name='input', device_type='cpu', device_id=0, element_type=np.float32, shape=input.shape, buffer_ptr=input.ctypes.data)
+                io_binding.bind_output(name='dets')
+                io_binding.bind_output(name='keypoints')
+            else:
+                # NAS Pose, flat format
+                io_binding.bind_input(name='input', device_type='cpu', device_id=0, element_type=np.uint8, shape=input.shape, buffer_ptr=input.ctypes.data)
+                io_binding.bind_output(name='graph2_flat_predictions')
             # Run inference with IO Binding
             self.session.run_with_iobinding(io_binding)
                  model_input_size: tuple = (640, 640),
                  mean: tuple = None,
                  std: tuple = None,
+                 device: str = 'cuda',
+                 is_yolo_nas_pose = False):
         if not os.path.exists(onnx_model):
             # If the file does not exist, raise FileNotFoundError
         self.mean = mean
         self.std = std
         self.device = device
+        self.is_yolo_nas_pose = is_yolo_nas_pose
 class RTMO_GPU_Batch(RTMO_GPU):
     def preprocess_batch(self, imgs: List[np.ndarray]) -> Tuple[np.ndarray, List[float]]: