Luigi
commited on
Commit
·
0bf1eb7
1
Parent(s):
846e714
Show bounding box on screen too
Browse files- rtmo_demo.py +3 -2
- rtmo_demo_batch.py +7 -3
- rtmo_gpu.py +27 -17
rtmo_demo.py
CHANGED
|
@@ -5,7 +5,7 @@ import cv2
|
|
| 5 |
from pathlib import Path
|
| 6 |
import argparse
|
| 7 |
import os
|
| 8 |
-
from rtmo_gpu import RTMO_GPU_Batch, draw_skeleton, resize_to_fit_screen
|
| 9 |
|
| 10 |
if __name__ == "__main__":
|
| 11 |
|
|
@@ -36,7 +36,7 @@ if __name__ == "__main__":
|
|
| 36 |
if not success:
|
| 37 |
break
|
| 38 |
|
| 39 |
-
frame_out, keypoints, scores = body(frame)
|
| 40 |
|
| 41 |
if keypoints is not None:
|
| 42 |
if frame_idx % args.batch_size == 0 and frame_idx:
|
|
@@ -56,6 +56,7 @@ if __name__ == "__main__":
|
|
| 56 |
scores,
|
| 57 |
kpt_thr=0.3,
|
| 58 |
line_width=2)
|
|
|
|
| 59 |
img_show = resize_to_fit_screen(img_show, 720, 480)
|
| 60 |
cv2.putText(img_show, f'{fps:.1f}', (10, 30), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 255, 0), 1, cv2.LINE_AA)
|
| 61 |
cv2.imshow(f'{model}', img_show)
|
|
|
|
| 5 |
from pathlib import Path
|
| 6 |
import argparse
|
| 7 |
import os
|
| 8 |
+
from rtmo_gpu import RTMO_GPU_Batch, draw_skeleton, resize_to_fit_screen, draw_bbox
|
| 9 |
|
| 10 |
if __name__ == "__main__":
|
| 11 |
|
|
|
|
| 36 |
if not success:
|
| 37 |
break
|
| 38 |
|
| 39 |
+
frame_out, bboxes, keypoints, scores = body(frame)
|
| 40 |
|
| 41 |
if keypoints is not None:
|
| 42 |
if frame_idx % args.batch_size == 0 and frame_idx:
|
|
|
|
| 56 |
scores,
|
| 57 |
kpt_thr=0.3,
|
| 58 |
line_width=2)
|
| 59 |
+
img_show = draw_bbox(img_show, bboxes)
|
| 60 |
img_show = resize_to_fit_screen(img_show, 720, 480)
|
| 61 |
cv2.putText(img_show, f'{fps:.1f}', (10, 30), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 255, 0), 1, cv2.LINE_AA)
|
| 62 |
cv2.imshow(f'{model}', img_show)
|
rtmo_demo_batch.py
CHANGED
|
@@ -4,7 +4,7 @@ import time
|
|
| 4 |
import cv2
|
| 5 |
from pathlib import Path
|
| 6 |
import argparse
|
| 7 |
-
from rtmo_gpu import RTMO_GPU_Batch, draw_skeleton, resize_to_fit_screen
|
| 8 |
|
| 9 |
def process_video(video_path, body_estimator, batch_size=4):
|
| 10 |
cap = cv2.VideoCapture(video_path)
|
|
@@ -24,7 +24,7 @@ def process_video(video_path, body_estimator, batch_size=4):
|
|
| 24 |
# Process the batch when it's full
|
| 25 |
if len(batch_frames) == batch_size:
|
| 26 |
s = time.time()
|
| 27 |
-
batch_keypoints, batch_scores = body_estimator.__batch_call__(batch_frames)
|
| 28 |
det_time = time.time() - s
|
| 29 |
fps = round(batch_size / det_time, 1)
|
| 30 |
print(f'Batch det: {fps} FPS')
|
|
@@ -32,8 +32,10 @@ def process_video(video_path, body_estimator, batch_size=4):
|
|
| 32 |
for i, keypoints in enumerate(batch_keypoints):
|
| 33 |
scores = batch_scores[i]
|
| 34 |
frame = batch_frames[i]
|
|
|
|
| 35 |
img_show = frame.copy()
|
| 36 |
img_show = draw_skeleton(img_show, keypoints, scores, kpt_thr=0.3, line_width=2)
|
|
|
|
| 37 |
img_show = resize_to_fit_screen(img_show, 720, 480)
|
| 38 |
cv2.putText(img_show, f'{fps:.1f}', (10, 30), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 255, 0), 1, cv2.LINE_AA)
|
| 39 |
cv2.imshow(f'{video_path}', img_show)
|
|
@@ -52,12 +54,14 @@ def process_video(video_path, body_estimator, batch_size=4):
|
|
| 52 |
|
| 53 |
# Option 2: Duplicate the last frame
|
| 54 |
batch_frames.append(batch_frames[-1])
|
| 55 |
-
batch_keypoints, batch_scores = body_estimator.__batch_call__(batch_frames)
|
| 56 |
for i, keypoints in enumerate(batch_keypoints):
|
| 57 |
scores = batch_scores[i]
|
| 58 |
frame = batch_frames[i]
|
|
|
|
| 59 |
img_show = frame.copy()
|
| 60 |
img_show = draw_skeleton(img_show, keypoints, scores, kpt_thr=0.3, line_width=2)
|
|
|
|
| 61 |
img_show = resize_to_fit_screen(img_show, 720, 480)
|
| 62 |
cv2.imshow(f'{video_path}', img_show)
|
| 63 |
#cv2.waitKey(10)
|
|
|
|
| 4 |
import cv2
|
| 5 |
from pathlib import Path
|
| 6 |
import argparse
|
| 7 |
+
from rtmo_gpu import RTMO_GPU_Batch, draw_skeleton, resize_to_fit_screen, draw_bbox # Ensure to import RTMO_GPU_Batch
|
| 8 |
|
| 9 |
def process_video(video_path, body_estimator, batch_size=4):
|
| 10 |
cap = cv2.VideoCapture(video_path)
|
|
|
|
| 24 |
# Process the batch when it's full
|
| 25 |
if len(batch_frames) == batch_size:
|
| 26 |
s = time.time()
|
| 27 |
+
batch_bboxes, batch_keypoints, batch_scores = body_estimator.__batch_call__(batch_frames)
|
| 28 |
det_time = time.time() - s
|
| 29 |
fps = round(batch_size / det_time, 1)
|
| 30 |
print(f'Batch det: {fps} FPS')
|
|
|
|
| 32 |
for i, keypoints in enumerate(batch_keypoints):
|
| 33 |
scores = batch_scores[i]
|
| 34 |
frame = batch_frames[i]
|
| 35 |
+
bboxes = batch_bboxes[i]
|
| 36 |
img_show = frame.copy()
|
| 37 |
img_show = draw_skeleton(img_show, keypoints, scores, kpt_thr=0.3, line_width=2)
|
| 38 |
+
img_show = draw_bbox(img_show, bboxes)
|
| 39 |
img_show = resize_to_fit_screen(img_show, 720, 480)
|
| 40 |
cv2.putText(img_show, f'{fps:.1f}', (10, 30), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 255, 0), 1, cv2.LINE_AA)
|
| 41 |
cv2.imshow(f'{video_path}', img_show)
|
|
|
|
| 54 |
|
| 55 |
# Option 2: Duplicate the last frame
|
| 56 |
batch_frames.append(batch_frames[-1])
|
| 57 |
+
batch_bboxes, batch_keypoints, batch_scores = body_estimator.__batch_call__(batch_frames)
|
| 58 |
for i, keypoints in enumerate(batch_keypoints):
|
| 59 |
scores = batch_scores[i]
|
| 60 |
frame = batch_frames[i]
|
| 61 |
+
bboxes = batch_bboxes[i]
|
| 62 |
img_show = frame.copy()
|
| 63 |
img_show = draw_skeleton(img_show, keypoints, scores, kpt_thr=0.3, line_width=2)
|
| 64 |
+
img_show = draw_bbox(img_show, bboxes)
|
| 65 |
img_show = resize_to_fit_screen(img_show, 720, 480)
|
| 66 |
cv2.imshow(f'{video_path}', img_show)
|
| 67 |
#cv2.waitKey(10)
|
rtmo_gpu.py
CHANGED
|
@@ -207,6 +207,12 @@ def draw_mmpose(img,
|
|
| 207 |
|
| 208 |
return img
|
| 209 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 210 |
# with simplification to use onnxruntime only
|
| 211 |
def draw_skeleton(img,
|
| 212 |
keypoints,
|
|
@@ -339,7 +345,7 @@ class RTMO_GPU(object):
|
|
| 339 |
final_boxes /= ratio
|
| 340 |
isscore = final_scores > 0.3
|
| 341 |
isbbox = [i for i in isscore]
|
| 342 |
-
|
| 343 |
|
| 344 |
# decode pose outputs
|
| 345 |
keypoints, scores = pose_outputs[0, :, :, :2], pose_outputs[0, :, :, 2]
|
|
@@ -352,14 +358,15 @@ class RTMO_GPU(object):
|
|
| 352 |
flat_predictions = outputs[0]
|
| 353 |
if flat_predictions.shape[0] > 0: # at least one person found
|
| 354 |
mask = flat_predictions[:, 0] == 0
|
| 355 |
-
|
| 356 |
-
pred_joints = flat_predictions[mask, 6:].reshape((len(
|
| 357 |
keypoints, scores = pred_joints[:,:,:2], pred_joints[:,:,-1]
|
| 358 |
keypoints = keypoints / ratio
|
|
|
|
| 359 |
else: # no detection
|
| 360 |
-
keypoints, scores = np.zeros((0, 17, 2)), np.zeros((0, 17))
|
| 361 |
|
| 362 |
-
return keypoints, scores
|
| 363 |
|
| 364 |
def inference(self, img: np.ndarray):
|
| 365 |
"""Inference model.
|
|
@@ -418,9 +425,9 @@ class RTMO_GPU(object):
|
|
| 418 |
|
| 419 |
outputs = self.inference(image)
|
| 420 |
|
| 421 |
-
keypoints, scores = self.postprocess(outputs, ratio)
|
| 422 |
|
| 423 |
-
return keypoints, scores
|
| 424 |
|
| 425 |
def __init__(self,
|
| 426 |
model: str = None,
|
|
@@ -561,38 +568,41 @@ class RTMO_GPU_Batch(RTMO_GPU):
|
|
| 561 |
"""
|
| 562 |
batch_keypoints = []
|
| 563 |
batch_scores = []
|
|
|
|
| 564 |
|
| 565 |
b_dets, b_keypoints = outputs
|
| 566 |
for i, ratio in enumerate(ratios):
|
| 567 |
output = [np.expand_dims(b_dets[i], axis=0), np.expand_dims(b_keypoints[i],axis=0)]
|
| 568 |
-
keypoints, scores = super().postprocess(output, ratio)
|
| 569 |
batch_keypoints.append(keypoints)
|
| 570 |
batch_scores.append(scores)
|
|
|
|
| 571 |
|
| 572 |
-
return batch_keypoints, batch_scores
|
| 573 |
|
| 574 |
def __batch_call__(self, images: List[np.ndarray]):
|
| 575 |
batch_img, ratios = self.preprocess_batch(images)
|
| 576 |
outputs = self.inference(batch_img)
|
| 577 |
-
keypoints, scores = self.postprocess_batch(outputs, ratios)
|
| 578 |
-
return keypoints, scores
|
| 579 |
|
| 580 |
def __call__(self, image: np.array):
|
| 581 |
self.buffer.append(image)
|
| 582 |
self.in_queue.put(image)
|
| 583 |
|
| 584 |
if len(self.buffer) == self.batch_size:
|
| 585 |
-
b_keypoints, b_scores = self.__batch_call__(self.buffer)
|
| 586 |
-
for keypoints, scores in zip(b_keypoints, b_scores):
|
| 587 |
-
|
|
|
|
| 588 |
self.buffer = []
|
| 589 |
|
| 590 |
-
frame, keypoints, scores = None, None, None
|
| 591 |
if not self.out_queue.empty():
|
| 592 |
-
keypoints, scores = self.out_queue.get()
|
| 593 |
frame = self.in_queue.get()
|
| 594 |
|
| 595 |
-
return frame, keypoints, scores
|
| 596 |
|
| 597 |
|
| 598 |
def __init__(self,
|
|
|
|
| 207 |
|
| 208 |
return img
|
| 209 |
|
| 210 |
+
def draw_bbox(img, bboxes, color=(0, 255, 0)):
|
| 211 |
+
for bbox in bboxes:
|
| 212 |
+
img = cv2.rectangle(img, (int(bbox[0]), int(bbox[1])),
|
| 213 |
+
(int(bbox[2]), int(bbox[3])), color, 2)
|
| 214 |
+
return img
|
| 215 |
+
|
| 216 |
# with simplification to use onnxruntime only
|
| 217 |
def draw_skeleton(img,
|
| 218 |
keypoints,
|
|
|
|
| 345 |
final_boxes /= ratio
|
| 346 |
isscore = final_scores > 0.3
|
| 347 |
isbbox = [i for i in isscore]
|
| 348 |
+
final_boxes = final_boxes[isbbox]
|
| 349 |
|
| 350 |
# decode pose outputs
|
| 351 |
keypoints, scores = pose_outputs[0, :, :, :2], pose_outputs[0, :, :, 2]
|
|
|
|
| 358 |
flat_predictions = outputs[0]
|
| 359 |
if flat_predictions.shape[0] > 0: # at least one person found
|
| 360 |
mask = flat_predictions[:, 0] == 0
|
| 361 |
+
final_boxes = flat_predictions[mask, 1:5]
|
| 362 |
+
pred_joints = flat_predictions[mask, 6:].reshape((len(final_boxes), -1, 3))
|
| 363 |
keypoints, scores = pred_joints[:,:,:2], pred_joints[:,:,-1]
|
| 364 |
keypoints = keypoints / ratio
|
| 365 |
+
final_boxes = final_boxes / ratio
|
| 366 |
else: # no detection
|
| 367 |
+
final_boxes, keypoints, scores = np.zeros((0, 4)),np.zeros((0, 17, 2)), np.zeros((0, 17))
|
| 368 |
|
| 369 |
+
return final_boxes, keypoints, scores
|
| 370 |
|
| 371 |
def inference(self, img: np.ndarray):
|
| 372 |
"""Inference model.
|
|
|
|
| 425 |
|
| 426 |
outputs = self.inference(image)
|
| 427 |
|
| 428 |
+
bboxes, keypoints, scores = self.postprocess(outputs, ratio)
|
| 429 |
|
| 430 |
+
return bboxes, keypoints, scores
|
| 431 |
|
| 432 |
def __init__(self,
|
| 433 |
model: str = None,
|
|
|
|
| 568 |
"""
|
| 569 |
batch_keypoints = []
|
| 570 |
batch_scores = []
|
| 571 |
+
batch_bboxes = []
|
| 572 |
|
| 573 |
b_dets, b_keypoints = outputs
|
| 574 |
for i, ratio in enumerate(ratios):
|
| 575 |
output = [np.expand_dims(b_dets[i], axis=0), np.expand_dims(b_keypoints[i],axis=0)]
|
| 576 |
+
bboxes, keypoints, scores = super().postprocess(output, ratio)
|
| 577 |
batch_keypoints.append(keypoints)
|
| 578 |
batch_scores.append(scores)
|
| 579 |
+
batch_bboxes.append(bboxes)
|
| 580 |
|
| 581 |
+
return batch_bboxes, batch_keypoints, batch_scores
|
| 582 |
|
| 583 |
def __batch_call__(self, images: List[np.ndarray]):
|
| 584 |
batch_img, ratios = self.preprocess_batch(images)
|
| 585 |
outputs = self.inference(batch_img)
|
| 586 |
+
bboxes, keypoints, scores = self.postprocess_batch(outputs, ratios)
|
| 587 |
+
return bboxes, keypoints, scores
|
| 588 |
|
| 589 |
def __call__(self, image: np.array):
|
| 590 |
self.buffer.append(image)
|
| 591 |
self.in_queue.put(image)
|
| 592 |
|
| 593 |
if len(self.buffer) == self.batch_size:
|
| 594 |
+
b_bboxes, b_keypoints, b_scores = self.__batch_call__(self.buffer)
|
| 595 |
+
for i, (keypoints, scores) in enumerate(zip(b_keypoints, b_scores)):
|
| 596 |
+
bboxes = b_bboxes[i]
|
| 597 |
+
self.out_queue.put((bboxes, keypoints, scores))
|
| 598 |
self.buffer = []
|
| 599 |
|
| 600 |
+
frame, bboxes, keypoints, scores = None, None, None, None
|
| 601 |
if not self.out_queue.empty():
|
| 602 |
+
bboxes, keypoints, scores = self.out_queue.get()
|
| 603 |
frame = self.in_queue.get()
|
| 604 |
|
| 605 |
+
return frame, bboxes, keypoints, scores
|
| 606 |
|
| 607 |
|
| 608 |
def __init__(self,
|