import torch import cv2 import os import numpy as np import onnxruntime def area_of(left_top, right_bottom) -> torch.Tensor: """Compute the areas of rectangles given two corners. Args: left_top (N, 2): left top corner. right_bottom (N, 2): right bottom corner. Returns: area (N): return the area. """ hw = torch.clamp(right_bottom - left_top, min=0.0) return hw[..., 0] * hw[..., 1] def iou_of(boxes0, boxes1, eps=1e-5): """Return intersection-over-union (Jaccard index) of boxes. Args: boxes0 (N, 4): ground truth boxes. boxes1 (N or 1, 4): predicted boxes. eps: a small number to avoid 0 as denominator. Returns: iou (N): IoU values. """ overlap_left_top = torch.max(boxes0[..., :2], boxes1[..., :2]) overlap_right_bottom = torch.min(boxes0[..., 2:], boxes1[..., 2:]) overlap_area = area_of(overlap_left_top, overlap_right_bottom) area0 = area_of(boxes0[..., :2], boxes0[..., 2:]) area1 = area_of(boxes1[..., :2], boxes1[..., 2:]) return overlap_area / (area0 + area1 - overlap_area + eps) def hard_nms(box_scores, iou_threshold, top_k=-1, candidate_size=200): """ Args: box_scores (N, 5): boxes in corner-form and probabilities. iou_threshold: intersection over union threshold. top_k: keep top_k results. If k <= 0, keep all the results. candidate_size: only consider the candidates with the highest scores. Returns: picked: a list of indexes of the kept boxes """ scores = box_scores[:, -1] boxes = box_scores[:, :-1] picked = [] _, indexes = scores.sort(descending=True) indexes = indexes[:candidate_size] while len(indexes) > 0: current = indexes[0] picked.append(current.item()) if 0 < top_k == len(picked) or len(indexes) == 1: break current_box = boxes[current, :] indexes = indexes[1:] rest_boxes = boxes[indexes, :] iou = iou_of( rest_boxes, current_box.unsqueeze(0), ) indexes = indexes[iou <= iou_threshold] return box_scores[picked, :] class Resize(object): def __init__(self, size=(300, 300)): self.size = size def __call__(self, image, boxes=None, labels=None): image = cv2.resize(image, (self.size[0], self.size[1])) return image, boxes, labels class SubtractMeans(object): def __init__(self, mean): self.mean = np.array(mean, dtype=np.float32) def __call__(self, image, boxes=None, labels=None): image = image.astype(np.float32) image -= self.mean return image.astype(np.float32), boxes, labels class ToTensor(object): def __call__(self, cvimage, boxes=None, labels=None): return torch.from_numpy(cvimage.astype(np.float32)).permute(2, 0, 1), boxes, labels class Compose(object): """Composes several augmentations together. Args: transforms (List[Transform]): list of transforms to compose. Example: >>> augmentations.Compose([ >>> transforms.CenterCrop(10), >>> transforms.ToTensor(), >>> ]) """ def __init__(self, transforms): self.transforms = transforms def __call__(self, img, boxes=None, labels=None): for t in self.transforms: img, boxes, labels = t(img, boxes, labels) return img, boxes, labels class PredictionTransform: def __init__(self, size, mean=0.0, std=1.0): self.transform = Compose([ Resize(size), SubtractMeans(mean), lambda img, boxes=None, labels=None: (img / std, boxes, labels), ToTensor() ]) def __call__(self, image): image, _, _ = self.transform(image) return image class Config: image_size = [320, 240] image_mean_test = np.array([127, 127, 127]) image_std = 128.0 class Predictor: """ Face detection with pretrained model. Reference: - https://github.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB """ def __init__( self, onnx_dir, device='cpu', device_id=0, iou_threshold=0.3, filter_threshold=0.01, candidate_size=200, ): onnx_file_name = os.path.join(onnx_dir, 'version-RFB-320.onnx') assert os.path.exists(onnx_file_name), \ '%s does not exist. Please check if it has been downloaded accurately.' % onnx_file_name self.ort_net = self.create_net(onnx_file_name, device, device_id) self.transform = PredictionTransform( Config.image_size, Config.image_mean_test, Config.image_std) self.iou_threshold = iou_threshold self.filter_threshold = filter_threshold self.candidate_size = candidate_size self.device = device def __call__(self, image, top_k=-1, prob_threshold=None): height, width, _ = image.shape image = self.transform(image) images = image.unsqueeze(0).numpy() # net inference inputs = {self.ort_net.get_inputs()[0].name:images} scores, boxes = self.ort_net.run(None, inputs) boxes = torch.from_numpy(boxes[0]) scores = torch.from_numpy(scores[0]) if not prob_threshold: prob_threshold = self.filter_threshold picked_box_probs = [] picked_labels = [] for class_index in range(1, scores.size(1)): probs = scores[:, class_index] mask = probs > prob_threshold probs = probs[mask] if probs.size(0) == 0: continue subset_boxes = boxes[mask, :] box_probs = torch.cat([subset_boxes, probs.reshape(-1, 1)], dim=1) box_probs = hard_nms(box_probs, self.iou_threshold, top_k, self.candidate_size) picked_box_probs.append(box_probs) picked_labels.extend([class_index] * box_probs.size(0)) if not picked_box_probs: return torch.tensor([]), torch.tensor([]), torch.tensor([]) picked_box_probs = torch.cat(picked_box_probs) picked_box_probs[:, 0] *= width picked_box_probs[:, 1] *= height picked_box_probs[:, 2] *= width picked_box_probs[:, 3] *= height return picked_box_probs[:, :4], torch.tensor(picked_labels), picked_box_probs[:, 4] def create_net(self, onnx_file_name, device='cpu', device_id=0): options = onnxruntime.SessionOptions() # set op_num_threads options.intra_op_num_threads = 8 options.inter_op_num_threads = 8 # set providers providers = ['CPUExecutionProvider'] if device == 'cuda': providers.insert(0, ('CUDAExecutionProvider', {'device_id': device_id})) ort_session = onnxruntime.InferenceSession(onnx_file_name, options, providers=providers) return ort_session if __name__ == '__main__': predictor_det = Predictor('pretrained_models', 'cuda', '0') image_input = np.random.randn(1920, 1080, 3).astype('float32') bboxes, _, probs = predictor_det(image_input, top_k=10, prob_threshold=0.9)