Spaces:
Sleeping
Sleeping
| from abc import ABC | |
| import cv2 | |
| import numpy as np | |
| import onnxruntime as ort | |
| class OnnxModel(ABC): | |
| def __init__(self, model_path, image_size): | |
| self.model_path = model_path | |
| self.image_size = image_size | |
| self.mean = np.array([127, 127, 127], dtype=np.float32) | |
| self.std = np.array([128, 128, 128], dtype=np.float32) | |
| options, prov_opts, providers = self.get_onnx_provider() | |
| self.sess = ort.InferenceSession( | |
| model_path, sess_options=options, providers=providers, provider_options=prov_opts | |
| ) | |
| self._get_input_output() | |
| def preprocess(self, frame): | |
| """ | |
| Preprocess frame | |
| Parameters | |
| ---------- | |
| frame : np.ndarray | |
| Frame to preprocess | |
| Returns | |
| ------- | |
| np.ndarray | |
| Preprocessed frame | |
| """ | |
| image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) | |
| image = cv2.resize(image, self.image_size) | |
| image = (image - self.mean) / self.std | |
| image = np.transpose(image, [2, 0, 1]) | |
| image = np.expand_dims(image, axis=0) | |
| return image | |
| def _get_input_output(self): | |
| inputs = self.sess.get_inputs() | |
| self.inputs = "".join( | |
| [ | |
| f"\n {i}: {input.name}" f" Shape: ({','.join(map(str, input.shape))})" f" Dtype: {input.type}" | |
| for i, input in enumerate(inputs) | |
| ] | |
| ) | |
| outputs = self.sess.get_outputs() | |
| self.outputs = "".join( | |
| [ | |
| f"\n {i}: {output.name}" f" Shape: ({','.join(map(str, output.shape))})" f" Dtype: {output.type}" | |
| for i, output in enumerate(outputs) | |
| ] | |
| ) | |
| def get_onnx_provider(): | |
| """ | |
| Get onnx provider | |
| Returns | |
| ------- | |
| options : onnxruntime.SessionOptions | |
| Session options | |
| prov_opts : dict | |
| Provider options | |
| providers : list | |
| List of providers | |
| """ | |
| providers = ["CPUExecutionProvider"] | |
| options = ort.SessionOptions() | |
| options.enable_mem_pattern = False | |
| options.execution_mode = ort.ExecutionMode.ORT_SEQUENTIAL | |
| prov_opts = [] | |
| print("Using ONNX Runtime", ort.get_device()) | |
| if "DML" in ort.get_device(): | |
| prov_opts = [{"device_id": 0}] | |
| providers.append("DmlExecutionProvider") | |
| elif "GPU" in ort.get_device(): | |
| prov_opts = [ | |
| { | |
| "device_id": 0, | |
| "arena_extend_strategy": "kNextPowerOfTwo", | |
| "gpu_mem_limit": 2 * 1024 * 1024 * 1024, | |
| "cudnn_conv_algo_search": "EXHAUSTIVE", | |
| "do_copy_in_default_stream": True, | |
| } | |
| ] | |
| providers.append("CUDAExecutionProvider") | |
| return options, prov_opts, providers | |
| def __repr__(self): | |
| return ( | |
| f"Providers: {self.sess.get_providers()}\n" | |
| f"Model: {self.sess.get_modelmeta().description}\n" | |
| f"Version: {self.sess.get_modelmeta().version}\n" | |
| f"Inputs: {self.inputs}\n" | |
| f"Outputs: {self.outputs}" | |
| ) | |
| class HandDetection(OnnxModel): | |
| def __init__(self, model_path, image_size=(320, 240)): | |
| super().__init__(model_path, image_size) | |
| self.image_size = image_size | |
| self.sess = ort.InferenceSession(model_path) | |
| self.input_name = self.sess.get_inputs()[0].name | |
| self.output_names = [output.name for output in self.sess.get_outputs()] | |
| def __call__(self, frame): | |
| input_tensor = self.preprocess(frame) | |
| boxes, _, probs = self.sess.run(self.output_names, {self.input_name: input_tensor}) | |
| width, height = frame.shape[1], frame.shape[0] | |
| boxes[:, 0] *= width | |
| boxes[:, 1] *= height | |
| boxes[:, 2] *= width | |
| boxes[:, 3] *= height | |
| return boxes.astype(np.int32), probs | |
| class HandClassification(OnnxModel): | |
| def __init__(self, model_path, image_size=(128, 128)): | |
| super().__init__(model_path, image_size) | |
| def get_square(box, image): | |
| """ | |
| Get square box | |
| Parameters | |
| ---------- | |
| box : np.ndarray | |
| Box coordinates (x1, y1, x2, y2) | |
| image : np.ndarray | |
| Image for shape | |
| """ | |
| height, width, _ = image.shape | |
| x0, y0, x1, y1 = box | |
| w, h = x1 - x0, y1 - y0 | |
| if h < w: | |
| y0 = y0 - int((w - h) / 2) | |
| y1 = y0 + w | |
| if h > w: | |
| x0 = x0 - int((h - w) / 2) | |
| x1 = x0 + h | |
| x0 = max(0, x0) | |
| y0 = max(0, y0) | |
| x1 = min(width - 1, x1) | |
| y1 = min(height - 1, y1) | |
| return x0, y0, x1, y1 | |
| def get_crops(self, frame, bboxes): | |
| """ | |
| Get crops from frame | |
| Parameters | |
| ---------- | |
| frame : np.ndarray | |
| Frame to crop from bboxes | |
| bboxes : np.ndarray | |
| Bounding boxes | |
| Returns | |
| ------- | |
| crops : np.ndarray | |
| Crops from frame | |
| """ | |
| crops = [] | |
| for bbox in bboxes: | |
| bbox = self.get_square(bbox, frame) | |
| crop = frame[bbox[1] : bbox[3], bbox[0] : bbox[2]] | |
| crops.append(crop) | |
| return crops | |
| def __call__(self, image, bboxes): | |
| """ | |
| Get predictions from model | |
| Parameters | |
| ---------- | |
| image : np.ndarray | |
| Image to predict | |
| bboxes : np.ndarray | |
| Bounding boxes | |
| Returns | |
| ------- | |
| predictions : np.ndarray | |
| Predictions from model | |
| """ | |
| crops = self.get_crops(image, bboxes) | |
| crops = [self.preprocess(crop) for crop in crops] | |
| input_name = self.sess.get_inputs()[0].name | |
| outputs = self.sess.run(None, {input_name: np.concatenate(crops, axis=0)})[0] | |
| labels = np.argmax(outputs, axis=1) | |
| return labels | |