Add option --yolo_nas_pose, used to read YOLO NAS Pose model instead of RTMO
Browse files- rtmo_demo.py +3 -2
- rtmo_gpu.py +43 -22
rtmo_demo.py
CHANGED
|
@@ -13,6 +13,7 @@ if __name__ == "__main__":
|
|
| 13 |
parser = argparse.ArgumentParser(description='Process the path to a video file folder.')
|
| 14 |
parser.add_argument('path', type=str, help='Path to the folder containing video files (required)')
|
| 15 |
parser.add_argument('model_path', type=str, help='Path to a RTMO ONNX model file (required)')
|
|
|
|
| 16 |
|
| 17 |
# Parse the command-line arguments
|
| 18 |
args = parser.parse_args()
|
|
@@ -20,10 +21,10 @@ if __name__ == "__main__":
|
|
| 20 |
onnx_model = args.model_path # 'rtmo-s_8xb32-600e_body7-640x640.onnx'
|
| 21 |
|
| 22 |
# Only Tiny Model has (416,416) as input model
|
| 23 |
-
model_input_size = (416,416) if 'rtmo-t' in onnx_model.lower() else (640,640)
|
| 24 |
|
| 25 |
body = RTMO_GPU(onnx_model=onnx_model,
|
| 26 |
-
model_input_size=model_input_size)
|
| 27 |
|
| 28 |
for mp4_path in Path(args.path).glob('*'):
|
| 29 |
|
|
|
|
| 13 |
parser = argparse.ArgumentParser(description='Process the path to a video file folder.')
|
| 14 |
parser.add_argument('path', type=str, help='Path to the folder containing video files (required)')
|
| 15 |
parser.add_argument('model_path', type=str, help='Path to a RTMO ONNX model file (required)')
|
| 16 |
+
parser.add_argument('--yolo_nas_pose', action='store_true', help='Use YOLO NAS Pose (flat format only) instead of RTMO Model')
|
| 17 |
|
| 18 |
# Parse the command-line arguments
|
| 19 |
args = parser.parse_args()
|
|
|
|
| 21 |
onnx_model = args.model_path # 'rtmo-s_8xb32-600e_body7-640x640.onnx'
|
| 22 |
|
| 23 |
# Only Tiny Model has (416,416) as input model
|
| 24 |
+
model_input_size = (416,416) if 'rtmo-t' in onnx_model.lower() and not args.yolo_nas_pose else (640,640)
|
| 25 |
|
| 26 |
body = RTMO_GPU(onnx_model=onnx_model,
|
| 27 |
+
model_input_size=model_input_size, is_yolo_nas_pose=args.yolo_nas_pose)
|
| 28 |
|
| 29 |
for mp4_path in Path(args.path).glob('*'):
|
| 30 |
|
rtmo_gpu.py
CHANGED
|
@@ -291,22 +291,36 @@ class RTMO_GPU(object):
|
|
| 291 |
- final_boxes (np.ndarray): Final bounding boxes.
|
| 292 |
- final_scores (np.ndarray): Final scores.
|
| 293 |
"""
|
| 294 |
-
|
| 295 |
-
|
| 296 |
-
|
| 297 |
-
|
| 298 |
-
|
| 299 |
-
|
| 300 |
-
|
| 301 |
-
|
| 302 |
-
|
| 303 |
-
|
| 304 |
-
|
| 305 |
-
|
| 306 |
-
|
| 307 |
-
|
| 308 |
-
|
| 309 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 310 |
|
| 311 |
return keypoints, scores
|
| 312 |
|
|
@@ -321,16 +335,21 @@ class RTMO_GPU(object):
|
|
| 321 |
"""
|
| 322 |
# build input to (1, 3, H, W)
|
| 323 |
img = img.transpose(2, 0, 1)
|
| 324 |
-
img = np.ascontiguousarray(img, dtype=np.float32)
|
| 325 |
input = img[None, :, :, :]
|
| 326 |
|
| 327 |
# Create an IO Binding object
|
| 328 |
io_binding = self.session.io_binding()
|
| 329 |
|
| 330 |
-
|
| 331 |
-
|
| 332 |
-
|
| 333 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 334 |
|
| 335 |
# Run inference with IO Binding
|
| 336 |
self.session.run_with_iobinding(io_binding)
|
|
@@ -355,7 +374,8 @@ class RTMO_GPU(object):
|
|
| 355 |
model_input_size: tuple = (640, 640),
|
| 356 |
mean: tuple = None,
|
| 357 |
std: tuple = None,
|
| 358 |
-
device: str = 'cuda'
|
|
|
|
| 359 |
|
| 360 |
if not os.path.exists(onnx_model):
|
| 361 |
# If the file does not exist, raise FileNotFoundError
|
|
@@ -381,6 +401,7 @@ class RTMO_GPU(object):
|
|
| 381 |
self.mean = mean
|
| 382 |
self.std = std
|
| 383 |
self.device = device
|
|
|
|
| 384 |
|
| 385 |
class RTMO_GPU_Batch(RTMO_GPU):
|
| 386 |
def preprocess_batch(self, imgs: List[np.ndarray]) -> Tuple[np.ndarray, List[float]]:
|
|
|
|
| 291 |
- final_boxes (np.ndarray): Final bounding boxes.
|
| 292 |
- final_scores (np.ndarray): Final scores.
|
| 293 |
"""
|
| 294 |
+
|
| 295 |
+
if not self.is_yolo_nas_pose:
|
| 296 |
+
# RTMO
|
| 297 |
+
det_outputs, pose_outputs = outputs
|
| 298 |
+
|
| 299 |
+
# onnx contains nms module
|
| 300 |
+
pack_dets = (det_outputs[0, :, :4], det_outputs[0, :, 4])
|
| 301 |
+
final_boxes, final_scores = pack_dets
|
| 302 |
+
final_boxes /= ratio
|
| 303 |
+
isscore = final_scores > 0.3
|
| 304 |
+
isbbox = [i for i in isscore]
|
| 305 |
+
# final_boxes = final_boxes[isbbox]
|
| 306 |
+
|
| 307 |
+
# decode pose outputs
|
| 308 |
+
keypoints, scores = pose_outputs[0, :, :, :2], pose_outputs[0, :, :, 2]
|
| 309 |
+
keypoints = keypoints / ratio
|
| 310 |
+
|
| 311 |
+
keypoints = keypoints[isbbox]
|
| 312 |
+
scores = scores[isbbox]
|
| 313 |
+
else:
|
| 314 |
+
# NAS Pose
|
| 315 |
+
flat_predictions = outputs[0]
|
| 316 |
+
if flat_predictions.shape[0] > 0: # at least one person found
|
| 317 |
+
mask = flat_predictions[:, 0] == 0
|
| 318 |
+
pred_bboxes = flat_predictions[mask, 1:5]
|
| 319 |
+
pred_joints = flat_predictions[mask, 6:].reshape((len(pred_bboxes), -1, 3))
|
| 320 |
+
keypoints, scores = pred_joints[:,:,:2], pred_joints[:,:,-1]
|
| 321 |
+
keypoints = keypoints / ratio
|
| 322 |
+
else: # no detection
|
| 323 |
+
keypoints, scores = np.zeros((0, 17, 2)), np.zeros((0, 17))
|
| 324 |
|
| 325 |
return keypoints, scores
|
| 326 |
|
|
|
|
| 335 |
"""
|
| 336 |
# build input to (1, 3, H, W)
|
| 337 |
img = img.transpose(2, 0, 1)
|
| 338 |
+
img = np.ascontiguousarray(img, dtype=np.float32 if not self.is_yolo_nas_pose else np.uint8)
|
| 339 |
input = img[None, :, :, :]
|
| 340 |
|
| 341 |
# Create an IO Binding object
|
| 342 |
io_binding = self.session.io_binding()
|
| 343 |
|
| 344 |
+
if not self.is_yolo_nas_pose:
|
| 345 |
+
# RTMO
|
| 346 |
+
io_binding.bind_input(name='input', device_type='cpu', device_id=0, element_type=np.float32, shape=input.shape, buffer_ptr=input.ctypes.data)
|
| 347 |
+
io_binding.bind_output(name='dets')
|
| 348 |
+
io_binding.bind_output(name='keypoints')
|
| 349 |
+
else:
|
| 350 |
+
# NAS Pose, flat format
|
| 351 |
+
io_binding.bind_input(name='input', device_type='cpu', device_id=0, element_type=np.uint8, shape=input.shape, buffer_ptr=input.ctypes.data)
|
| 352 |
+
io_binding.bind_output(name='graph2_flat_predictions')
|
| 353 |
|
| 354 |
# Run inference with IO Binding
|
| 355 |
self.session.run_with_iobinding(io_binding)
|
|
|
|
| 374 |
model_input_size: tuple = (640, 640),
|
| 375 |
mean: tuple = None,
|
| 376 |
std: tuple = None,
|
| 377 |
+
device: str = 'cuda',
|
| 378 |
+
is_yolo_nas_pose = False):
|
| 379 |
|
| 380 |
if not os.path.exists(onnx_model):
|
| 381 |
# If the file does not exist, raise FileNotFoundError
|
|
|
|
| 401 |
self.mean = mean
|
| 402 |
self.std = std
|
| 403 |
self.device = device
|
| 404 |
+
self.is_yolo_nas_pose = is_yolo_nas_pose
|
| 405 |
|
| 406 |
class RTMO_GPU_Batch(RTMO_GPU):
|
| 407 |
def preprocess_batch(self, imgs: List[np.ndarray]) -> Tuple[np.ndarray, List[float]]:
|