import colorsys import os import time import numpy as np import torch import torch.nn as nn import cv2 from PIL import ImageDraw, ImageFont, Image from nets.yolo import YoloBody from utils.utils import (cvtColor, get_anchors, get_classes, preprocess_input, resize_image, show_config) from utils.utils_bbox import DecodeBox, DecodeBoxNP class YOLO(object): _defaults = { "model_path" : 'model_data/rtts.pth', "classes_path" : 'model_data/rtts_classes.txt', "anchors_path" : 'model_data/yolo_anchors.txt', "anchors_mask" : [[3,4,5], [1,2,3]], "backbone" : 'tiny', "phi" : 0, "input_shape" : [416, 416], "confidence" : 0.5, "nms_iou" : 0.3, "letterbox_image" : False, "cuda" : False, } @classmethod def get_defaults(cls, n): if n in cls._defaults: return cls._defaults[n] else: return "Unrecognized attribute name '" + n + "'" def __init__(self, **kwargs): self.__dict__.update(self._defaults) for name, value in kwargs.items(): setattr(self, name, value) self._defaults[name] = value self.class_names, self.num_classes = get_classes(self.classes_path) self.anchors, self.num_anchors = get_anchors(self.anchors_path) self.bbox_util = DecodeBox(self.anchors, self.num_classes, (self.input_shape[0], self.input_shape[1]), self.anchors_mask) hsv_tuples = [(x / self.num_classes, 1., 1.) for x in range(self.num_classes)] self.colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples)) self.colors = list(map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)), self.colors)) self.generate() show_config(**self._defaults) def generate(self, onnx=False): self.net = YoloBody(self.anchors_mask, self.num_classes, self.phi, self.backbone) device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') self.net.load_state_dict(torch.load(self.model_path, map_location=device)) self.net = self.net.eval() print('{} model, anchors, and classes loaded.'.format(self.model_path)) if not onnx: if self.cuda: self.net = nn.DataParallel(self.net) self.net = self.net.cuda() def detect_image(self, image, crop = False, count = False): image_shape = np.array(np.shape(image)[0:2]) image = cvtColor(image) image_data = resize_image(image, (self.input_shape[1],self.input_shape[0]), self.letterbox_image) image_data = np.expand_dims(np.transpose(preprocess_input(np.array(image_data, dtype='float32')), (2, 0, 1)), 0) with torch.no_grad(): images = torch.from_numpy(image_data) if self.cuda: images = images.cuda() outputs = self.net(images) outputs = self.bbox_util.decode_box(outputs) results = self.bbox_util.non_max_suppression(torch.cat(outputs, 1), self.num_classes, self.input_shape, image_shape, self.letterbox_image, conf_thres = self.confidence, nms_thres = self.nms_iou) if results[0] is None: return image top_label = np.array(results[0][:, 6], dtype = 'int32') top_conf = results[0][:, 4] * results[0][:, 5] top_boxes = results[0][:, :4] font = ImageFont.truetype(font='model_data/simhei.ttf', size=np.floor(3e-2 * image.size[1] + 0.5).astype('int32')) thickness = int(max((image.size[0] + image.size[1]) // np.mean(self.input_shape), 1)) if count: print("top_label:", top_label) classes_nums = np.zeros([self.num_classes]) for i in range(self.num_classes): num = np.sum(top_label == i) if num > 0: print(self.class_names[i], " : ", num) classes_nums[i] = num print("classes_nums:", classes_nums) if crop: for i, c in list(enumerate(top_label)): top, left, bottom, right = top_boxes[i] top = max(0, np.floor(top).astype('int32')) left = max(0, np.floor(left).astype('int32')) bottom = min(image.size[1], np.floor(bottom).astype('int32')) right = min(image.size[0], np.floor(right).astype('int32')) dir_save_path = "img_crop" if not os.path.exists(dir_save_path): os.makedirs(dir_save_path) crop_image = image.crop([left, top, right, bottom]) crop_image.save(os.path.join(dir_save_path, "crop_" + str(i) + ".png"), quality=95, subsampling=0) print("save crop_" + str(i) + ".png to " + dir_save_path) for i, c in list(enumerate(top_label)): predicted_class = self.class_names[int(c)] box = top_boxes[i] score = top_conf[i] top, left, bottom, right = box top = max(0, np.floor(top).astype('int32')) left = max(0, np.floor(left).astype('int32')) bottom = min(image.size[1], np.floor(bottom).astype('int32')) right = min(image.size[0], np.floor(right).astype('int32')) label = '{} {:.2f}'.format(predicted_class, score) draw = ImageDraw.Draw(image) label_size = draw.textsize(label, font) label = label.encode('utf-8') print(label, top, left, bottom, right) if top - label_size[1] >= 0: text_origin = np.array([left, top - label_size[1]]) else: text_origin = np.array([left, top + 1]) for i in range(thickness): draw.rectangle([left + i, top + i, right - i, bottom - i], outline=self.colors[c]) draw.rectangle([tuple(text_origin), tuple(text_origin + label_size)], fill=self.colors[c]) draw.text(text_origin, str(label,'UTF-8'), fill=(0, 0, 0), font=font) del draw return image def get_FPS(self, image, test_interval): image_shape = np.array(np.shape(image)[0:2]) image = cvtColor(image) image_data = resize_image(image, (self.input_shape[1],self.input_shape[0]), self.letterbox_image) image_data = np.expand_dims(np.transpose(preprocess_input(np.array(image_data, dtype='float32')), (2, 0, 1)), 0) with torch.no_grad(): images = torch.from_numpy(image_data) if self.cuda: images = images.cuda() outputs = self.net(images) outputs = self.bbox_util.decode_box(outputs) results = self.bbox_util.non_max_suppression(torch.cat(outputs, 1), self.num_classes, self.input_shape, image_shape, self.letterbox_image, conf_thres=self.confidence, nms_thres=self.nms_iou) t1 = time.time() for _ in range(test_interval): with torch.no_grad(): outputs = self.net(images) outputs = self.bbox_util.decode_box(outputs) results = self.bbox_util.non_max_suppression(torch.cat(outputs, 1), self.num_classes, self.input_shape, image_shape, self.letterbox_image, conf_thres=self.confidence, nms_thres=self.nms_iou) t2 = time.time() tact_time = (t2 - t1) / test_interval return tact_time def detect_heatmap(self, image, heatmap_save_path): import cv2 import matplotlib.pyplot as plt def sigmoid(x): y = 1.0 / (1.0 + np.exp(-x)) return y image = cvtColor(image) image_data = resize_image(image, (self.input_shape[1],self.input_shape[0]), self.letterbox_image) image_data = np.expand_dims(np.transpose(preprocess_input(np.array(image_data, dtype='float32')), (2, 0, 1)), 0) with torch.no_grad(): images = torch.from_numpy(image_data) if self.cuda: images = images.cuda() outputs = self.net(images) plt.clf() plt.imshow(image, alpha=1) plt.axis('off') mask = np.zeros((image.size[1], image.size[0])) for sub_output in outputs: sub_output = sub_output.cpu().numpy() b, c, h, w = np.shape(sub_output) sub_output = np.transpose(np.reshape(sub_output, [b, 3, -1, h, w]), [0, 3, 4, 1, 2])[0] score = np.max(sigmoid(sub_output[..., 4]), -1) score = cv2.resize(score, (image.size[0], image.size[1])) normed_score = (score * 255).astype('uint8') mask = np.maximum(mask, normed_score) plt.imshow(mask, alpha=0.5, interpolation='nearest', cmap="jet") plt.axis('off') plt.subplots_adjust(top=1, bottom=0, right=1, left=0, hspace=0, wspace=0) plt.margins(0, 0) plt.savefig(heatmap_save_path, dpi=200, bbox_inches='tight', pad_inches = -0.1) print("Save to the " + heatmap_save_path) plt.show() def convert_to_onnx(self, simplify, model_path): import onnx self.generate(onnx=True) im = torch.zeros(1, 3, *self.input_shape).to('cpu') input_layer_names = ["images"] output_layer_names = ["output"] print(f'Starting export with onnx {onnx.__version__}.') torch.onnx.export(self.net, im, f = model_path, verbose = False, opset_version = 12, training = torch.onnx.TrainingMode.EVAL, do_constant_folding = True, input_names = input_layer_names, output_names = output_layer_names, dynamic_axes = None) model_onnx = onnx.load(model_path) onnx.checker.check_model(model_onnx) if simplify: import onnxsim print(f'Simplifying with onnx-simplifier {onnxsim.__version__}.') model_onnx, check = onnxsim.simplify( model_onnx, dynamic_input_shape=False, input_shapes=None) assert check, 'assert check failed' onnx.save(model_onnx, model_path) print('Onnx model save as {}'.format(model_path)) def get_map_txt(self, image_id, image, class_names, map_out_path): f = open(os.path.join(map_out_path, "detection-results/"+image_id+".txt"),"w") image_shape = np.array(np.shape(image)[0:2]) image = cvtColor(image) image_data = resize_image(image, (self.input_shape[1],self.input_shape[0]), self.letterbox_image) image_data = np.expand_dims(np.transpose(preprocess_input(np.array(image_data, dtype='float32')), (2, 0, 1)), 0) with torch.no_grad(): images = torch.from_numpy(image_data) if self.cuda: images = images.cuda() outputs = self.net(images) outputs = self.bbox_util.decode_box(outputs) results = self.bbox_util.non_max_suppression(torch.cat(outputs, 1), self.num_classes, self.input_shape, image_shape, self.letterbox_image, conf_thres = self.confidence, nms_thres = self.nms_iou) if results[0] is None: return top_label = np.array(results[0][:, 6], dtype = 'int32') top_conf = results[0][:, 4] * results[0][:, 5] top_boxes = results[0][:, :4] for i, c in list(enumerate(top_label)): predicted_class = self.class_names[int(c)] box = top_boxes[i] score = str(top_conf[i]) top, left, bottom, right = box if predicted_class not in class_names: continue f.write("%s %s %s %s %s %s\n" % (predicted_class, score[:6], str(int(left)), str(int(top)), str(int(right)),str(int(bottom)))) f.close() return class YOLO_ONNX(object): _defaults = { "onnx_path" : 'model_data/models.onnx', "classes_path" : 'model_data/rtts_classes.txt', "anchors_path" : 'model_data/yolo_anchors.txt', "anchors_mask" : [[3, 4, 5], [1, 2, 3]], "input_shape" : [416, 416], "confidence" : 0.5, "nms_iou" : 0.3, "letterbox_image" : True } @classmethod def get_defaults(cls, n): if n in cls._defaults: return cls._defaults[n] else: return "Unrecognized attribute name '" + n + "'" def __init__(self, **kwargs): self.__dict__.update(self._defaults) for name, value in kwargs.items(): setattr(self, name, value) self._defaults[name] = value import onnxruntime self.onnx_session = onnxruntime.InferenceSession(self.onnx_path) self.input_name = self.get_input_name() self.output_name = self.get_output_name() self.class_names, self.num_classes = self.get_classes(self.classes_path) self.anchors, self.num_anchors = self.get_anchors(self.anchors_path) self.bbox_util = DecodeBoxNP(self.anchors, self.num_classes, (self.input_shape[0], self.input_shape[1]), self.anchors_mask) hsv_tuples = [(x / self.num_classes, 1., 1.) for x in range(self.num_classes)] self.colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples)) self.colors = list(map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)), self.colors)) show_config(**self._defaults) def get_classes(self, classes_path): with open(classes_path, encoding='utf-8') as f: class_names = f.readlines() class_names = [c.strip() for c in class_names] return class_names, len(class_names) def get_anchors(self, anchors_path): '''loads the anchors from a file''' with open(anchors_path, encoding='utf-8') as f: anchors = f.readline() anchors = [float(x) for x in anchors.split(',')] anchors = np.array(anchors).reshape(-1, 2) return anchors, len(anchors) def get_input_name(self): input_name=[] for node in self.onnx_session.get_inputs(): input_name.append(node.name) return input_name def get_output_name(self): output_name=[] for node in self.onnx_session.get_outputs(): output_name.append(node.name) return output_name def get_input_feed(self,image_tensor): input_feed={} for name in self.input_name: input_feed[name]=image_tensor return input_feed def resize_image(self, image, size, letterbox_image, mode='PIL'): if mode == 'PIL': iw, ih = image.size w, h = size if letterbox_image: scale = min(w/iw, h/ih) nw = int(iw*scale) nh = int(ih*scale) image = image.resize((nw,nh), Image.BICUBIC) new_image = Image.new('RGB', size, (128,128,128)) new_image.paste(image, ((w-nw)//2, (h-nh)//2)) else: new_image = image.resize((w, h), Image.BICUBIC) else: image = np.array(image) if letterbox_image: shape = np.shape(image)[:2] if isinstance(size, int): size = (size, size) r = min(size[0] / shape[0], size[1] / shape[1]) new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r)) dw, dh = size[1] - new_unpad[0], size[0] - new_unpad[1] dw /= 2 dh /= 2 image = cv2.resize(image, new_unpad, interpolation=cv2.INTER_LINEAR) top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1)) left, right = int(round(dw - 0.1)), int(round(dw + 0.1)) else: new_image = cv2.resize(image, (w, h)) return new_image def detect_image(self, image): image_shape = np.array(np.shape(image)[0:2]) image = cvtColor(image) image_data = self.resize_image(image, self.input_shape, True) image_data = np.expand_dims(np.transpose(preprocess_input(np.array(image_data, dtype='float32')), (2, 0, 1)), 0) input_feed = self.get_input_feed(image_data) outputs = self.onnx_session.run(output_names=self.output_name, input_feed=input_feed) feature_map_shape = [[int(j / (2 ** (i + 4))) for j in self.input_shape] for i in range(len(self.anchors_mask))][::-1] for i in range(len(self.anchors_mask)): outputs[i] = np.reshape(outputs[i], (1, len(self.anchors_mask[i]) * (5 + self.num_classes), feature_map_shape[i][0], feature_map_shape[i][1])) outputs = self.bbox_util.decode_box(outputs) results = self.bbox_util.non_max_suppression(np.concatenate(outputs, 1), self.num_classes, self.input_shape, image_shape, self.letterbox_image, conf_thres = self.confidence, nms_thres = self.nms_iou) if results[0] is None: return image top_label = np.array(results[0][:, 6], dtype = 'int32') top_conf = results[0][:, 4] * results[0][:, 5] top_boxes = results[0][:, :4] font = ImageFont.truetype(font='model_data/simhei.ttf', size=np.floor(3e-2 * image.size[1] + 0.5).astype('int32')) thickness = int(max((image.size[0] + image.size[1]) // np.mean(self.input_shape), 1)) for i, c in list(enumerate(top_label)): predicted_class = self.class_names[int(c)] box = top_boxes[i] score = top_conf[i] top, left, bottom, right = box top = max(0, np.floor(top).astype('int32')) left = max(0, np.floor(left).astype('int32')) bottom = min(image.size[1], np.floor(bottom).astype('int32')) right = min(image.size[0], np.floor(right).astype('int32')) label = '{} {:.2f}'.format(predicted_class, score) draw = ImageDraw.Draw(image) label_size = draw.textsize(label, font) label = label.encode('utf-8') print(label, top, left, bottom, right) if top - label_size[1] >= 0: text_origin = np.array([left, top - label_size[1]]) else: text_origin = np.array([left, top + 1]) for i in range(thickness): draw.rectangle([left + i, top + i, right - i, bottom - i], outline=self.colors[c]) draw.rectangle([tuple(text_origin), tuple(text_origin + label_size)], fill=self.colors[c]) draw.text(text_origin, str(label,'UTF-8'), fill=(0, 0, 0), font=font) del draw return image