Spaces:

algoryn
/

validation

Sleeping

App Files Files Community

validation / src /gesturedetection /onnx_models.py

tommulder

Deploy gesture detection & validation API

95db528 3 months ago

raw

history blame contribute delete

6.01 kB

	from abc import ABC

	import cv2
	import numpy as np
	import onnxruntime as ort


	class OnnxModel(ABC):
	def __init__(self, model_path, image_size):
	self.model_path = model_path
	self.image_size = image_size
	self.mean = np.array([127, 127, 127], dtype=np.float32)
	self.std = np.array([128, 128, 128], dtype=np.float32)
	options, prov_opts, providers = self.get_onnx_provider()
	self.sess = ort.InferenceSession(
	model_path, sess_options=options, providers=providers, provider_options=prov_opts
	)
	self._get_input_output()

	def preprocess(self, frame):
	"""
	Preprocess frame
	Parameters
	----------
	frame : np.ndarray
	Frame to preprocess
	Returns
	-------
	np.ndarray
	Preprocessed frame
	"""
	image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
	image = cv2.resize(image, self.image_size)
	image = (image - self.mean) / self.std
	image = np.transpose(image, [2, 0, 1])
	image = np.expand_dims(image, axis=0)
	return image

	def _get_input_output(self):
	inputs = self.sess.get_inputs()
	self.inputs = "".join(
	[
	f"\n {i}: {input.name}" f" Shape: ({','.join(map(str, input.shape))})" f" Dtype: {input.type}"
	for i, input in enumerate(inputs)
	]
	)

	outputs = self.sess.get_outputs()
	self.outputs = "".join(
	[
	f"\n {i}: {output.name}" f" Shape: ({','.join(map(str, output.shape))})" f" Dtype: {output.type}"
	for i, output in enumerate(outputs)
	]
	)

	@staticmethod
	def get_onnx_provider():
	"""
	Get onnx provider
	Returns
	-------
	options : onnxruntime.SessionOptions
	Session options
	prov_opts : dict
	Provider options
	providers : list
	List of providers
	"""
	providers = ["CPUExecutionProvider"]
	options = ort.SessionOptions()
	options.enable_mem_pattern = False
	options.execution_mode = ort.ExecutionMode.ORT_SEQUENTIAL
	prov_opts = []
	print("Using ONNX Runtime", ort.get_device())

	if "DML" in ort.get_device():
	prov_opts = [{"device_id": 0}]
	providers.append("DmlExecutionProvider")

	elif "GPU" in ort.get_device():
	prov_opts = [
	{
	"device_id": 0,
	"arena_extend_strategy": "kNextPowerOfTwo",
	"gpu_mem_limit": 2 * 1024 * 1024 * 1024,
	"cudnn_conv_algo_search": "EXHAUSTIVE",
	"do_copy_in_default_stream": True,
	}
	]
	providers.append("CUDAExecutionProvider")

	return options, prov_opts, providers

	def __repr__(self):
	return (
	f"Providers: {self.sess.get_providers()}\n"
	f"Model: {self.sess.get_modelmeta().description}\n"
	f"Version: {self.sess.get_modelmeta().version}\n"
	f"Inputs: {self.inputs}\n"
	f"Outputs: {self.outputs}"
	)

	class HandDetection(OnnxModel):
	def __init__(self, model_path, image_size=(320, 240)):
	super().__init__(model_path, image_size)
	self.image_size = image_size
	self.sess = ort.InferenceSession(model_path)
	self.input_name = self.sess.get_inputs()[0].name
	self.output_names = [output.name for output in self.sess.get_outputs()]

	def __call__(self, frame):
	input_tensor = self.preprocess(frame)
	boxes, _, probs = self.sess.run(self.output_names, {self.input_name: input_tensor})
	width, height = frame.shape[1], frame.shape[0]
	boxes[:, 0] *= width
	boxes[:, 1] *= height
	boxes[:, 2] *= width
	boxes[:, 3] *= height
	return boxes.astype(np.int32), probs


	class HandClassification(OnnxModel):
	def __init__(self, model_path, image_size=(128, 128)):
	super().__init__(model_path, image_size)

	@staticmethod
	def get_square(box, image):
	"""
	Get square box
	Parameters
	----------
	box : np.ndarray
	Box coordinates (x1, y1, x2, y2)
	image : np.ndarray
	Image for shape
	"""
	height, width, _ = image.shape
	x0, y0, x1, y1 = box
	w, h = x1 - x0, y1 - y0
	if h < w:
	y0 = y0 - int((w - h) / 2)
	y1 = y0 + w
	if h > w:
	x0 = x0 - int((h - w) / 2)
	x1 = x0 + h
	x0 = max(0, x0)
	y0 = max(0, y0)
	x1 = min(width - 1, x1)
	y1 = min(height - 1, y1)
	return x0, y0, x1, y1

	def get_crops(self, frame, bboxes):
	"""
	Get crops from frame
	Parameters
	----------
	frame : np.ndarray
	Frame to crop from bboxes
	bboxes : np.ndarray
	Bounding boxes

	Returns
	-------
	crops : np.ndarray
	Crops from frame
	"""
	crops = []
	for bbox in bboxes:
	bbox = self.get_square(bbox, frame)
	crop = frame[bbox[1] : bbox[3], bbox[0] : bbox[2]]
	crops.append(crop)
	return crops

	def __call__(self, image, bboxes):
	"""
	Get predictions from model
	Parameters
	----------
	image : np.ndarray
	Image to predict
	bboxes : np.ndarray
	Bounding boxes

	Returns
	-------
	predictions : np.ndarray
	Predictions from model
	"""
	crops = self.get_crops(image, bboxes)
	crops = [self.preprocess(crop) for crop in crops]
	input_name = self.sess.get_inputs()[0].name
	outputs = self.sess.run(None, {input_name: np.concatenate(crops, axis=0)})[0]
	labels = np.argmax(outputs, axis=1)
	return labels