pesi
/

rtmo

Object Detection

ONNX

Pose Estimation

Model card Files Files and versions

xet

Community

Luigi commited on May 17, 2024

Commit

0bf1eb7

1 Parent(s): 846e714

Show bounding box on screen too

Browse files

Files changed (3) hide show

rtmo_demo.py +3 -2
rtmo_demo_batch.py +7 -3
rtmo_gpu.py +27 -17

rtmo_demo.py CHANGED Viewed

@@ -5,7 +5,7 @@ import cv2
 from pathlib import Path
 import argparse
 import os
-from rtmo_gpu import RTMO_GPU_Batch, draw_skeleton, resize_to_fit_screen
 if __name__ == "__main__":
@@ -36,7 +36,7 @@ if __name__ == "__main__":
             if not success:
                 break
-            frame_out, keypoints, scores = body(frame)
             if keypoints is not None:
                 if frame_idx % args.batch_size == 0 and frame_idx:
@@ -56,6 +56,7 @@ if __name__ == "__main__":
                                         scores,
                                         kpt_thr=0.3,
                                         line_width=2)
                 img_show = resize_to_fit_screen(img_show, 720, 480)
                 cv2.putText(img_show, f'{fps:.1f}', (10, 30), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 255, 0), 1, cv2.LINE_AA)
                 cv2.imshow(f'{model}', img_show)

 from pathlib import Path
 import argparse
 import os
+from rtmo_gpu import RTMO_GPU_Batch, draw_skeleton, resize_to_fit_screen, draw_bbox
 if __name__ == "__main__":
             if not success:
                 break
+            frame_out, bboxes, keypoints, scores = body(frame)
             if keypoints is not None:
                 if frame_idx % args.batch_size == 0 and frame_idx:
                                         scores,
                                         kpt_thr=0.3,
                                         line_width=2)
+                img_show = draw_bbox(img_show, bboxes)
                 img_show = resize_to_fit_screen(img_show, 720, 480)
                 cv2.putText(img_show, f'{fps:.1f}', (10, 30), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 255, 0), 1, cv2.LINE_AA)
                 cv2.imshow(f'{model}', img_show)

rtmo_demo_batch.py CHANGED Viewed

@@ -4,7 +4,7 @@ import time
 import cv2
 from pathlib import Path
 import argparse
-from rtmo_gpu import RTMO_GPU_Batch, draw_skeleton, resize_to_fit_screen  # Ensure to import RTMO_GPU_Batch
 def process_video(video_path, body_estimator, batch_size=4):
     cap = cv2.VideoCapture(video_path)
@@ -24,7 +24,7 @@ def process_video(video_path, body_estimator, batch_size=4):
         # Process the batch when it's full
         if len(batch_frames) == batch_size:
             s = time.time()
-            batch_keypoints, batch_scores = body_estimator.__batch_call__(batch_frames)
             det_time = time.time() - s
             fps = round(batch_size / det_time, 1)
             print(f'Batch det: {fps} FPS')
@@ -32,8 +32,10 @@ def process_video(video_path, body_estimator, batch_size=4):
             for i, keypoints in enumerate(batch_keypoints):
                 scores = batch_scores[i]
                 frame = batch_frames[i]
                 img_show = frame.copy()
                 img_show = draw_skeleton(img_show, keypoints, scores, kpt_thr=0.3, line_width=2)
                 img_show = resize_to_fit_screen(img_show, 720, 480)
                 cv2.putText(img_show, f'{fps:.1f}', (10, 30), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 255, 0), 1, cv2.LINE_AA)
                 cv2.imshow(f'{video_path}', img_show)
@@ -52,12 +54,14 @@ def process_video(video_path, body_estimator, batch_size=4):
             # Option 2: Duplicate the last frame
             batch_frames.append(batch_frames[-1])
-        batch_keypoints, batch_scores = body_estimator.__batch_call__(batch_frames)
         for i, keypoints in enumerate(batch_keypoints):
             scores = batch_scores[i]
             frame = batch_frames[i]
             img_show = frame.copy()
             img_show = draw_skeleton(img_show, keypoints, scores, kpt_thr=0.3, line_width=2)
             img_show = resize_to_fit_screen(img_show, 720, 480)
             cv2.imshow(f'{video_path}', img_show)
             #cv2.waitKey(10)

 import cv2
 from pathlib import Path
 import argparse
+from rtmo_gpu import RTMO_GPU_Batch, draw_skeleton, resize_to_fit_screen, draw_bbox # Ensure to import RTMO_GPU_Batch
 def process_video(video_path, body_estimator, batch_size=4):
     cap = cv2.VideoCapture(video_path)
         # Process the batch when it's full
         if len(batch_frames) == batch_size:
             s = time.time()
+            batch_bboxes, batch_keypoints, batch_scores = body_estimator.__batch_call__(batch_frames)
             det_time = time.time() - s
             fps = round(batch_size / det_time, 1)
             print(f'Batch det: {fps} FPS')
             for i, keypoints in enumerate(batch_keypoints):
                 scores = batch_scores[i]
                 frame = batch_frames[i]
+                bboxes = batch_bboxes[i]
                 img_show = frame.copy()
                 img_show = draw_skeleton(img_show, keypoints, scores, kpt_thr=0.3, line_width=2)
+                img_show = draw_bbox(img_show, bboxes)
                 img_show = resize_to_fit_screen(img_show, 720, 480)
                 cv2.putText(img_show, f'{fps:.1f}', (10, 30), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 255, 0), 1, cv2.LINE_AA)
                 cv2.imshow(f'{video_path}', img_show)
             # Option 2: Duplicate the last frame
             batch_frames.append(batch_frames[-1])
+        batch_bboxes, batch_keypoints, batch_scores = body_estimator.__batch_call__(batch_frames)
         for i, keypoints in enumerate(batch_keypoints):
             scores = batch_scores[i]
             frame = batch_frames[i]
+            bboxes = batch_bboxes[i]
             img_show = frame.copy()
             img_show = draw_skeleton(img_show, keypoints, scores, kpt_thr=0.3, line_width=2)
+            img_show = draw_bbox(img_show, bboxes)
             img_show = resize_to_fit_screen(img_show, 720, 480)
             cv2.imshow(f'{video_path}', img_show)
             #cv2.waitKey(10)

rtmo_gpu.py CHANGED Viewed

@@ -207,6 +207,12 @@ def draw_mmpose(img,
     return img
 # with simplification to use onnxruntime only
 def draw_skeleton(img,
                   keypoints,
@@ -339,7 +345,7 @@ class RTMO_GPU(object):
             final_boxes /= ratio
             isscore = final_scores > 0.3
             isbbox = [i for i in isscore]
-            # final_boxes = final_boxes[isbbox]
             # decode pose outputs
             keypoints, scores = pose_outputs[0, :, :, :2], pose_outputs[0, :, :, 2]
@@ -352,14 +358,15 @@ class RTMO_GPU(object):
             flat_predictions = outputs[0]
             if flat_predictions.shape[0] > 0: # at least one person found
                 mask = flat_predictions[:, 0] == 0
-                pred_bboxes = flat_predictions[mask, 1:5]
-                pred_joints = flat_predictions[mask, 6:].reshape((len(pred_bboxes), -1, 3))
                 keypoints, scores = pred_joints[:,:,:2], pred_joints[:,:,-1]
                 keypoints = keypoints / ratio
             else: # no detection
-                keypoints, scores = np.zeros((0, 17, 2)), np.zeros((0, 17))
-        return keypoints, scores
     def inference(self, img: np.ndarray):
             """Inference model.
@@ -418,9 +425,9 @@ class RTMO_GPU(object):
             outputs = self.inference(image)
-            keypoints, scores = self.postprocess(outputs, ratio)
-            return keypoints, scores
     def __init__(self,
                  model: str = None,
@@ -561,38 +568,41 @@ class RTMO_GPU_Batch(RTMO_GPU):
         """
         batch_keypoints = []
         batch_scores = []
         b_dets, b_keypoints = outputs
         for i, ratio in enumerate(ratios):
             output = [np.expand_dims(b_dets[i], axis=0), np.expand_dims(b_keypoints[i],axis=0)]
-            keypoints, scores = super().postprocess(output, ratio)
             batch_keypoints.append(keypoints)
             batch_scores.append(scores)
-        return batch_keypoints, batch_scores
     def __batch_call__(self, images: List[np.ndarray]):
         batch_img, ratios = self.preprocess_batch(images)
         outputs = self.inference(batch_img)
-        keypoints, scores = self.postprocess_batch(outputs, ratios)
-        return keypoints, scores
     def __call__(self, image: np.array):
         self.buffer.append(image)
         self.in_queue.put(image)
         if len(self.buffer) == self.batch_size:
-            b_keypoints, b_scores = self.__batch_call__(self.buffer)
-            for keypoints, scores in zip(b_keypoints, b_scores):
-                self.out_queue.put((keypoints, scores))
             self.buffer = []
-        frame, keypoints, scores = None, None, None
         if not self.out_queue.empty():
-            keypoints, scores = self.out_queue.get()
             frame = self.in_queue.get()
-        return frame, keypoints, scores
     def __init__(self,

     return img
+def draw_bbox(img, bboxes, color=(0, 255, 0)):
+    for bbox in bboxes:
+        img = cv2.rectangle(img, (int(bbox[0]), int(bbox[1])),
+                            (int(bbox[2]), int(bbox[3])), color, 2)
+    return img
 # with simplification to use onnxruntime only
 def draw_skeleton(img,
                   keypoints,
             final_boxes /= ratio
             isscore = final_scores > 0.3
             isbbox = [i for i in isscore]
+            final_boxes = final_boxes[isbbox]
             # decode pose outputs
             keypoints, scores = pose_outputs[0, :, :, :2], pose_outputs[0, :, :, 2]
             flat_predictions = outputs[0]
             if flat_predictions.shape[0] > 0: # at least one person found
                 mask = flat_predictions[:, 0] == 0
+                final_boxes = flat_predictions[mask, 1:5]
+                pred_joints = flat_predictions[mask, 6:].reshape((len(final_boxes), -1, 3))
                 keypoints, scores = pred_joints[:,:,:2], pred_joints[:,:,-1]
                 keypoints = keypoints / ratio
+                final_boxes = final_boxes / ratio
             else: # no detection
+                final_boxes, keypoints, scores = np.zeros((0, 4)),np.zeros((0, 17, 2)), np.zeros((0, 17))
+        return final_boxes, keypoints, scores
     def inference(self, img: np.ndarray):
             """Inference model.
             outputs = self.inference(image)
+            bboxes, keypoints, scores = self.postprocess(outputs, ratio)
+            return bboxes, keypoints, scores
     def __init__(self,
                  model: str = None,
         """
         batch_keypoints = []
         batch_scores = []
+        batch_bboxes = []
         b_dets, b_keypoints = outputs
         for i, ratio in enumerate(ratios):
             output = [np.expand_dims(b_dets[i], axis=0), np.expand_dims(b_keypoints[i],axis=0)]
+            bboxes, keypoints, scores = super().postprocess(output, ratio)
             batch_keypoints.append(keypoints)
             batch_scores.append(scores)
+            batch_bboxes.append(bboxes)
+        return batch_bboxes, batch_keypoints, batch_scores
     def __batch_call__(self, images: List[np.ndarray]):
         batch_img, ratios = self.preprocess_batch(images)
         outputs = self.inference(batch_img)
+        bboxes, keypoints, scores = self.postprocess_batch(outputs, ratios)
+        return bboxes, keypoints, scores
     def __call__(self, image: np.array):
         self.buffer.append(image)
         self.in_queue.put(image)
         if len(self.buffer) == self.batch_size:
+            b_bboxes, b_keypoints, b_scores = self.__batch_call__(self.buffer)
+            for i, (keypoints, scores) in enumerate(zip(b_keypoints, b_scores)):
+                bboxes = b_bboxes[i]
+                self.out_queue.put((bboxes, keypoints, scores))
             self.buffer = []
+        frame, bboxes, keypoints, scores = None, None, None, None
         if not self.out_queue.empty():
+            bboxes, keypoints, scores = self.out_queue.get()
             frame = self.in_queue.get()
+        return frame, bboxes, keypoints, scores
     def __init__(self,