rtmo / rtmo_gpu.py

Optimization for CUDAExecutionProvider

43a489d almost 2 years ago

13.4 kB

	import os
	import numpy as np
	from typing import List, Tuple
	import onnxruntime as ort
	import cv2

	# dictionary from https://github.com/Tau-J/rtmlib/blob/4b29101d54b611048ef165277cebfffff3030074/rtmlib/visualization/skeleton/coco17.py
	coco17 = dict(name='coco17',
	keypoint_info={
	0:
	dict(name='nose', id=0, color=[51, 153, 255], swap=''),
	1:
	dict(name='left_eye',
	id=1,
	color=[51, 153, 255],
	swap='right_eye'),
	2:
	dict(name='right_eye',
	id=2,
	color=[51, 153, 255],
	swap='left_eye'),
	3:
	dict(name='left_ear',
	id=3,
	color=[51, 153, 255],
	swap='right_ear'),
	4:
	dict(name='right_ear',
	id=4,
	color=[51, 153, 255],
	swap='left_ear'),
	5:
	dict(name='left_shoulder',
	id=5,
	color=[0, 255, 0],
	swap='right_shoulder'),
	6:
	dict(name='right_shoulder',
	id=6,
	color=[255, 128, 0],
	swap='left_shoulder'),
	7:
	dict(name='left_elbow',
	id=7,
	color=[0, 255, 0],
	swap='right_elbow'),
	8:
	dict(name='right_elbow',
	id=8,
	color=[255, 128, 0],
	swap='left_elbow'),
	9:
	dict(name='left_wrist',
	id=9,
	color=[0, 255, 0],
	swap='right_wrist'),
	10:
	dict(name='right_wrist',
	id=10,
	color=[255, 128, 0],
	swap='left_wrist'),
	11:
	dict(name='left_hip',
	id=11,
	color=[0, 255, 0],
	swap='right_hip'),
	12:
	dict(name='right_hip',
	id=12,
	color=[255, 128, 0],
	swap='left_hip'),
	13:
	dict(name='left_knee',
	id=13,
	color=[0, 255, 0],
	swap='right_knee'),
	14:
	dict(name='right_knee',
	id=14,
	color=[255, 128, 0],
	swap='left_knee'),
	15:
	dict(name='left_ankle',
	id=15,
	color=[0, 255, 0],
	swap='right_ankle'),
	16:
	dict(name='right_ankle',
	id=16,
	color=[255, 128, 0],
	swap='left_ankle')
	},
	skeleton_info={
	0:
	dict(link=('left_ankle', 'left_knee'),
	id=0,
	color=[0, 255, 0]),
	1:
	dict(link=('left_knee', 'left_hip'), id=1, color=[0, 255,
	0]),
	2:
	dict(link=('right_ankle', 'right_knee'),
	id=2,
	color=[255, 128, 0]),
	3:
	dict(link=('right_knee', 'right_hip'),
	id=3,
	color=[255, 128, 0]),
	4:
	dict(link=('left_hip', 'right_hip'),
	id=4,
	color=[51, 153, 255]),
	5:
	dict(link=('left_shoulder', 'left_hip'),
	id=5,
	color=[51, 153, 255]),
	6:
	dict(link=('right_shoulder', 'right_hip'),
	id=6,
	color=[51, 153, 255]),
	7:
	dict(link=('left_shoulder', 'right_shoulder'),
	id=7,
	color=[51, 153, 255]),
	8:
	dict(link=('left_shoulder', 'left_elbow'),
	id=8,
	color=[0, 255, 0]),
	9:
	dict(link=('right_shoulder', 'right_elbow'),
	id=9,
	color=[255, 128, 0]),
	10:
	dict(link=('left_elbow', 'left_wrist'),
	id=10,
	color=[0, 255, 0]),
	11:
	dict(link=('right_elbow', 'right_wrist'),
	id=11,
	color=[255, 128, 0]),
	12:
	dict(link=('left_eye', 'right_eye'),
	id=12,
	color=[51, 153, 255]),
	13:
	dict(link=('nose', 'left_eye'), id=13, color=[51, 153, 255]),
	14:
	dict(link=('nose', 'right_eye'), id=14, color=[51, 153,
	255]),
	15:
	dict(link=('left_eye', 'left_ear'),
	id=15,
	color=[51, 153, 255]),
	16:
	dict(link=('right_eye', 'right_ear'),
	id=16,
	color=[51, 153, 255]),
	17:
	dict(link=('left_ear', 'left_shoulder'),
	id=17,
	color=[51, 153, 255]),
	18:
	dict(link=('right_ear', 'right_shoulder'),
	id=18,
	color=[51, 153, 255])
	})

	# functions from https://github.com/Tau-J/rtmlib/blob/4b29101d54b611048ef165277cebfffff3030074/rtmlib/visualization/draw.py#L71
	def draw_mmpose(img,
	keypoints,
	scores,
	keypoint_info,
	skeleton_info,
	kpt_thr=0.5,
	radius=2,
	line_width=2):
	assert len(keypoints.shape) == 2

	vis_kpt = [s >= kpt_thr for s in scores]

	link_dict = {}
	for i, kpt_info in keypoint_info.items():
	kpt_color = tuple(kpt_info['color'])
	link_dict[kpt_info['name']] = kpt_info['id']

	kpt = keypoints[i]

	if vis_kpt[i]:
	img = cv2.circle(img, (int(kpt[0]), int(kpt[1])), int(radius),
	kpt_color, -1)

	for i, ske_info in skeleton_info.items():
	link = ske_info['link']
	pt0, pt1 = link_dict[link[0]], link_dict[link[1]]

	if vis_kpt[pt0] and vis_kpt[pt1]:
	link_color = ske_info['color']
	kpt0 = keypoints[pt0]
	kpt1 = keypoints[pt1]

	img = cv2.line(img, (int(kpt0[0]), int(kpt0[1])),
	(int(kpt1[0]), int(kpt1[1])),
	link_color,
	thickness=line_width)

	return img

	# with simplification to use onnxruntime only
	def draw_skeleton(img,
	keypoints,
	scores,
	kpt_thr=0.5,
	radius=2,
	line_width=2):
	num_keypoints = keypoints.shape[1]

	if num_keypoints == 17:
	skeleton = 'coco17'
	else:
	raise NotImplementedError

	skeleton_dict = eval(f'{skeleton}')
	keypoint_info = skeleton_dict['keypoint_info']
	skeleton_info = skeleton_dict['skeleton_info']

	if len(keypoints.shape) == 2:
	keypoints = keypoints[None, :, :]
	scores = scores[None, :, :]

	num_instance = keypoints.shape[0]
	if skeleton in ['coco17']:
	for i in range(num_instance):
	img = draw_mmpose(img, keypoints[i], scores[i], keypoint_info,
	skeleton_info, kpt_thr, radius, line_width)
	else:
	raise NotImplementedError
	return img

	class RTMO_GPU(object):

	def preprocess(self, img: np.ndarray):
	"""Do preprocessing for RTMPose model inference.

	Args:
	img (np.ndarray): Input image in shape.

	Returns:
	tuple:
	- resized_img (np.ndarray): Preprocessed image.
	- center (np.ndarray): Center of image.
	- scale (np.ndarray): Scale of image.
	"""
	if len(img.shape) == 3:
	padded_img = np.ones(
	(self.model_input_size[0], self.model_input_size[1], 3),
	dtype=np.uint8) * 114
	else:
	padded_img = np.ones(self.model_input_size, dtype=np.uint8) * 114

	ratio = min(self.model_input_size[0] / img.shape[0],
	self.model_input_size[1] / img.shape[1])
	resized_img = cv2.resize(
	img,
	(int(img.shape[1] * ratio), int(img.shape[0] * ratio)),
	interpolation=cv2.INTER_LINEAR,
	).astype(np.uint8)
	padded_shape = (int(img.shape[0] * ratio), int(img.shape[1] * ratio))
	padded_img[:padded_shape[0], :padded_shape[1]] = resized_img

	# normalize image
	if self.mean is not None:
	self.mean = np.array(self.mean)
	self.std = np.array(self.std)
	padded_img = (padded_img - self.mean) / self.std

	return padded_img, ratio

	def postprocess(
	self,
	outputs: List[np.ndarray],
	ratio: float = 1.,
	) -> Tuple[np.ndarray, np.ndarray]:
	"""Do postprocessing for RTMO model inference.

	Args:
	outputs (List[np.ndarray]): Outputs of RTMO model.
	ratio (float): Ratio of preprocessing.

	Returns:
	tuple:
	- final_boxes (np.ndarray): Final bounding boxes.
	- final_scores (np.ndarray): Final scores.
	"""
	det_outputs, pose_outputs = outputs

	# onnx contains nms module
	pack_dets = (det_outputs[0, :, :4], det_outputs[0, :, 4])
	final_boxes, final_scores = pack_dets
	final_boxes /= ratio
	isscore = final_scores > 0.3
	isbbox = [i for i in isscore]
	# final_boxes = final_boxes[isbbox]

	# decode pose outputs
	keypoints, scores = pose_outputs[0, :, :, :2], pose_outputs[0, :, :, 2]
	keypoints = keypoints / ratio

	keypoints = keypoints[isbbox]
	scores = scores[isbbox]

	return keypoints, scores

	def inference(self, img: np.ndarray):
	"""Inference model.

	Args:
	img (np.ndarray): Input image in shape.

	Returns:
	outputs (np.ndarray): Output of RTMPose model.
	"""
	# build input to (1, 3, H, W)
	img = img.transpose(2, 0, 1)
	img = np.ascontiguousarray(img, dtype=np.float32)
	input = img[None, :, :, :]

	# Create an IO Binding object
	io_binding = self.session.io_binding()

	# Bind the model inputs and outputs to the IO Binding object
	io_binding.bind_input(name='input', device_type='cpu', device_id=0, element_type=np.float32, shape=input.shape, buffer_ptr=input.ctypes.data)
	io_binding.bind_output(name='dets')
	io_binding.bind_output(name='keypoints')

	# Run inference with IO Binding
	self.session.run_with_iobinding(io_binding)

	# Retrieve the outputs from the IO Binding object
	outputs = [output.numpy() for output in io_binding.get_outputs()]

	return outputs

	def __call__(self, image: np.ndarray):
	image, ratio = self.preprocess(image)


	outputs = self.inference(image)

	keypoints, scores = self.postprocess(outputs, ratio)

	return keypoints, scores

	def __init__(self,
	onnx_model: str = None,
	model_input_size: tuple = (640, 640),
	mean: tuple = None,
	std: tuple = None,
	device: str = 'cuda'):

	if not os.path.exists(onnx_model):
	from rtmlib.tools.file import download_checkpoint
	onnx_model = download_checkpoint(onnx_model)

	providers = {'cpu': 'CPUExecutionProvider',
	'cuda': [
	('CUDAExecutionProvider', {
	'cudnn_conv_algo_search': 'DEFAULT',
	'cudnn_conv_use_max_workspace': True
	}),
	'CPUExecutionProvider']}

	self.session = ort.InferenceSession(path_or_bytes=onnx_model,
	providers=providers[device])

	self.onnx_model = onnx_model
	self.model_input_size = model_input_size
	self.mean = mean
	self.std = std
	self.device = device