Spaces:
Runtime error
Runtime error
| import colorsys | |
| import os | |
| import time | |
| import numpy as np | |
| import torch | |
| import torch.nn as nn | |
| import cv2 | |
| from PIL import ImageDraw, ImageFont, Image | |
| from nets.yolo import YoloBody | |
| from utils.utils import (cvtColor, get_anchors, get_classes, preprocess_input, | |
| resize_image, show_config) | |
| from utils.utils_bbox import DecodeBox, DecodeBoxNP | |
| class YOLO(object): | |
| _defaults = { | |
| "model_path" : 'model_data/rtts.pth', | |
| "classes_path" : 'model_data/rtts_classes.txt', | |
| "anchors_path" : 'model_data/yolo_anchors.txt', | |
| "anchors_mask" : [[3,4,5], [1,2,3]], | |
| "backbone" : 'tiny', | |
| "phi" : 0, | |
| "input_shape" : [416, 416], | |
| "confidence" : 0.5, | |
| "nms_iou" : 0.3, | |
| "letterbox_image" : False, | |
| "cuda" : False, | |
| } | |
| def get_defaults(cls, n): | |
| if n in cls._defaults: | |
| return cls._defaults[n] | |
| else: | |
| return "Unrecognized attribute name '" + n + "'" | |
| def __init__(self, **kwargs): | |
| self.__dict__.update(self._defaults) | |
| for name, value in kwargs.items(): | |
| setattr(self, name, value) | |
| self._defaults[name] = value | |
| self.class_names, self.num_classes = get_classes(self.classes_path) | |
| self.anchors, self.num_anchors = get_anchors(self.anchors_path) | |
| self.bbox_util = DecodeBox(self.anchors, self.num_classes, (self.input_shape[0], self.input_shape[1]), self.anchors_mask) | |
| hsv_tuples = [(x / self.num_classes, 1., 1.) for x in range(self.num_classes)] | |
| self.colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples)) | |
| self.colors = list(map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)), self.colors)) | |
| self.generate() | |
| show_config(**self._defaults) | |
| def generate(self, onnx=False): | |
| self.net = YoloBody(self.anchors_mask, self.num_classes, self.phi, self.backbone) | |
| device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') | |
| self.net.load_state_dict(torch.load(self.model_path, map_location=device)) | |
| self.net = self.net.eval() | |
| print('{} model, anchors, and classes loaded.'.format(self.model_path)) | |
| if not onnx: | |
| if self.cuda: | |
| self.net = nn.DataParallel(self.net) | |
| self.net = self.net.cuda() | |
| def detect_image(self, image, crop = False, count = False): | |
| image_shape = np.array(np.shape(image)[0:2]) | |
| image = cvtColor(image) | |
| image_data = resize_image(image, (self.input_shape[1],self.input_shape[0]), self.letterbox_image) | |
| image_data = np.expand_dims(np.transpose(preprocess_input(np.array(image_data, dtype='float32')), (2, 0, 1)), 0) | |
| with torch.no_grad(): | |
| images = torch.from_numpy(image_data) | |
| if self.cuda: | |
| images = images.cuda() | |
| outputs = self.net(images) | |
| outputs = self.bbox_util.decode_box(outputs) | |
| results = self.bbox_util.non_max_suppression(torch.cat(outputs, 1), self.num_classes, self.input_shape, | |
| image_shape, self.letterbox_image, conf_thres = self.confidence, nms_thres = self.nms_iou) | |
| if results[0] is None: | |
| return image | |
| top_label = np.array(results[0][:, 6], dtype = 'int32') | |
| top_conf = results[0][:, 4] * results[0][:, 5] | |
| top_boxes = results[0][:, :4] | |
| font = ImageFont.truetype(font='model_data/simhei.ttf', size=np.floor(3e-2 * image.size[1] + 0.5).astype('int32')) | |
| thickness = int(max((image.size[0] + image.size[1]) // np.mean(self.input_shape), 1)) | |
| if count: | |
| print("top_label:", top_label) | |
| classes_nums = np.zeros([self.num_classes]) | |
| for i in range(self.num_classes): | |
| num = np.sum(top_label == i) | |
| if num > 0: | |
| print(self.class_names[i], " : ", num) | |
| classes_nums[i] = num | |
| print("classes_nums:", classes_nums) | |
| if crop: | |
| for i, c in list(enumerate(top_label)): | |
| top, left, bottom, right = top_boxes[i] | |
| top = max(0, np.floor(top).astype('int32')) | |
| left = max(0, np.floor(left).astype('int32')) | |
| bottom = min(image.size[1], np.floor(bottom).astype('int32')) | |
| right = min(image.size[0], np.floor(right).astype('int32')) | |
| dir_save_path = "img_crop" | |
| if not os.path.exists(dir_save_path): | |
| os.makedirs(dir_save_path) | |
| crop_image = image.crop([left, top, right, bottom]) | |
| crop_image.save(os.path.join(dir_save_path, "crop_" + str(i) + ".png"), quality=95, subsampling=0) | |
| print("save crop_" + str(i) + ".png to " + dir_save_path) | |
| for i, c in list(enumerate(top_label)): | |
| predicted_class = self.class_names[int(c)] | |
| box = top_boxes[i] | |
| score = top_conf[i] | |
| top, left, bottom, right = box | |
| top = max(0, np.floor(top).astype('int32')) | |
| left = max(0, np.floor(left).astype('int32')) | |
| bottom = min(image.size[1], np.floor(bottom).astype('int32')) | |
| right = min(image.size[0], np.floor(right).astype('int32')) | |
| label = '{} {:.2f}'.format(predicted_class, score) | |
| draw = ImageDraw.Draw(image) | |
| label_size = draw.textsize(label, font) | |
| label = label.encode('utf-8') | |
| print(label, top, left, bottom, right) | |
| if top - label_size[1] >= 0: | |
| text_origin = np.array([left, top - label_size[1]]) | |
| else: | |
| text_origin = np.array([left, top + 1]) | |
| for i in range(thickness): | |
| draw.rectangle([left + i, top + i, right - i, bottom - i], outline=self.colors[c]) | |
| draw.rectangle([tuple(text_origin), tuple(text_origin + label_size)], fill=self.colors[c]) | |
| draw.text(text_origin, str(label,'UTF-8'), fill=(0, 0, 0), font=font) | |
| del draw | |
| return image | |
| def get_FPS(self, image, test_interval): | |
| image_shape = np.array(np.shape(image)[0:2]) | |
| image = cvtColor(image) | |
| image_data = resize_image(image, (self.input_shape[1],self.input_shape[0]), self.letterbox_image) | |
| image_data = np.expand_dims(np.transpose(preprocess_input(np.array(image_data, dtype='float32')), (2, 0, 1)), 0) | |
| with torch.no_grad(): | |
| images = torch.from_numpy(image_data) | |
| if self.cuda: | |
| images = images.cuda() | |
| outputs = self.net(images) | |
| outputs = self.bbox_util.decode_box(outputs) | |
| results = self.bbox_util.non_max_suppression(torch.cat(outputs, 1), self.num_classes, self.input_shape, | |
| image_shape, self.letterbox_image, conf_thres=self.confidence, nms_thres=self.nms_iou) | |
| t1 = time.time() | |
| for _ in range(test_interval): | |
| with torch.no_grad(): | |
| outputs = self.net(images) | |
| outputs = self.bbox_util.decode_box(outputs) | |
| results = self.bbox_util.non_max_suppression(torch.cat(outputs, 1), self.num_classes, self.input_shape, | |
| image_shape, self.letterbox_image, conf_thres=self.confidence, nms_thres=self.nms_iou) | |
| t2 = time.time() | |
| tact_time = (t2 - t1) / test_interval | |
| return tact_time | |
| def detect_heatmap(self, image, heatmap_save_path): | |
| import cv2 | |
| import matplotlib.pyplot as plt | |
| def sigmoid(x): | |
| y = 1.0 / (1.0 + np.exp(-x)) | |
| return y | |
| image = cvtColor(image) | |
| image_data = resize_image(image, (self.input_shape[1],self.input_shape[0]), self.letterbox_image) | |
| image_data = np.expand_dims(np.transpose(preprocess_input(np.array(image_data, dtype='float32')), (2, 0, 1)), 0) | |
| with torch.no_grad(): | |
| images = torch.from_numpy(image_data) | |
| if self.cuda: | |
| images = images.cuda() | |
| outputs = self.net(images) | |
| plt.clf() | |
| plt.imshow(image, alpha=1) | |
| plt.axis('off') | |
| mask = np.zeros((image.size[1], image.size[0])) | |
| for sub_output in outputs: | |
| sub_output = sub_output.cpu().numpy() | |
| b, c, h, w = np.shape(sub_output) | |
| sub_output = np.transpose(np.reshape(sub_output, [b, 3, -1, h, w]), [0, 3, 4, 1, 2])[0] | |
| score = np.max(sigmoid(sub_output[..., 4]), -1) | |
| score = cv2.resize(score, (image.size[0], image.size[1])) | |
| normed_score = (score * 255).astype('uint8') | |
| mask = np.maximum(mask, normed_score) | |
| plt.imshow(mask, alpha=0.5, interpolation='nearest', cmap="jet") | |
| plt.axis('off') | |
| plt.subplots_adjust(top=1, bottom=0, right=1, left=0, hspace=0, wspace=0) | |
| plt.margins(0, 0) | |
| plt.savefig(heatmap_save_path, dpi=200, bbox_inches='tight', pad_inches = -0.1) | |
| print("Save to the " + heatmap_save_path) | |
| plt.show() | |
| def convert_to_onnx(self, simplify, model_path): | |
| import onnx | |
| self.generate(onnx=True) | |
| im = torch.zeros(1, 3, *self.input_shape).to('cpu') | |
| input_layer_names = ["images"] | |
| output_layer_names = ["output"] | |
| print(f'Starting export with onnx {onnx.__version__}.') | |
| torch.onnx.export(self.net, | |
| im, | |
| f = model_path, | |
| verbose = False, | |
| opset_version = 12, | |
| training = torch.onnx.TrainingMode.EVAL, | |
| do_constant_folding = True, | |
| input_names = input_layer_names, | |
| output_names = output_layer_names, | |
| dynamic_axes = None) | |
| model_onnx = onnx.load(model_path) | |
| onnx.checker.check_model(model_onnx) | |
| if simplify: | |
| import onnxsim | |
| print(f'Simplifying with onnx-simplifier {onnxsim.__version__}.') | |
| model_onnx, check = onnxsim.simplify( | |
| model_onnx, | |
| dynamic_input_shape=False, | |
| input_shapes=None) | |
| assert check, 'assert check failed' | |
| onnx.save(model_onnx, model_path) | |
| print('Onnx model save as {}'.format(model_path)) | |
| def get_map_txt(self, image_id, image, class_names, map_out_path): | |
| f = open(os.path.join(map_out_path, "detection-results/"+image_id+".txt"),"w") | |
| image_shape = np.array(np.shape(image)[0:2]) | |
| image = cvtColor(image) | |
| image_data = resize_image(image, (self.input_shape[1],self.input_shape[0]), self.letterbox_image) | |
| image_data = np.expand_dims(np.transpose(preprocess_input(np.array(image_data, dtype='float32')), (2, 0, 1)), 0) | |
| with torch.no_grad(): | |
| images = torch.from_numpy(image_data) | |
| if self.cuda: | |
| images = images.cuda() | |
| outputs = self.net(images) | |
| outputs = self.bbox_util.decode_box(outputs) | |
| results = self.bbox_util.non_max_suppression(torch.cat(outputs, 1), self.num_classes, self.input_shape, | |
| image_shape, self.letterbox_image, conf_thres = self.confidence, nms_thres = self.nms_iou) | |
| if results[0] is None: | |
| return | |
| top_label = np.array(results[0][:, 6], dtype = 'int32') | |
| top_conf = results[0][:, 4] * results[0][:, 5] | |
| top_boxes = results[0][:, :4] | |
| for i, c in list(enumerate(top_label)): | |
| predicted_class = self.class_names[int(c)] | |
| box = top_boxes[i] | |
| score = str(top_conf[i]) | |
| top, left, bottom, right = box | |
| if predicted_class not in class_names: | |
| continue | |
| f.write("%s %s %s %s %s %s\n" % (predicted_class, score[:6], str(int(left)), str(int(top)), str(int(right)),str(int(bottom)))) | |
| f.close() | |
| return | |
| class YOLO_ONNX(object): | |
| _defaults = { | |
| "onnx_path" : 'model_data/models.onnx', | |
| "classes_path" : 'model_data/rtts_classes.txt', | |
| "anchors_path" : 'model_data/yolo_anchors.txt', | |
| "anchors_mask" : [[3, 4, 5], [1, 2, 3]], | |
| "input_shape" : [416, 416], | |
| "confidence" : 0.5, | |
| "nms_iou" : 0.3, | |
| "letterbox_image" : True | |
| } | |
| def get_defaults(cls, n): | |
| if n in cls._defaults: | |
| return cls._defaults[n] | |
| else: | |
| return "Unrecognized attribute name '" + n + "'" | |
| def __init__(self, **kwargs): | |
| self.__dict__.update(self._defaults) | |
| for name, value in kwargs.items(): | |
| setattr(self, name, value) | |
| self._defaults[name] = value | |
| import onnxruntime | |
| self.onnx_session = onnxruntime.InferenceSession(self.onnx_path) | |
| self.input_name = self.get_input_name() | |
| self.output_name = self.get_output_name() | |
| self.class_names, self.num_classes = self.get_classes(self.classes_path) | |
| self.anchors, self.num_anchors = self.get_anchors(self.anchors_path) | |
| self.bbox_util = DecodeBoxNP(self.anchors, self.num_classes, (self.input_shape[0], self.input_shape[1]), self.anchors_mask) | |
| hsv_tuples = [(x / self.num_classes, 1., 1.) for x in range(self.num_classes)] | |
| self.colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples)) | |
| self.colors = list(map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)), self.colors)) | |
| show_config(**self._defaults) | |
| def get_classes(self, classes_path): | |
| with open(classes_path, encoding='utf-8') as f: | |
| class_names = f.readlines() | |
| class_names = [c.strip() for c in class_names] | |
| return class_names, len(class_names) | |
| def get_anchors(self, anchors_path): | |
| '''loads the anchors from a file''' | |
| with open(anchors_path, encoding='utf-8') as f: | |
| anchors = f.readline() | |
| anchors = [float(x) for x in anchors.split(',')] | |
| anchors = np.array(anchors).reshape(-1, 2) | |
| return anchors, len(anchors) | |
| def get_input_name(self): | |
| input_name=[] | |
| for node in self.onnx_session.get_inputs(): | |
| input_name.append(node.name) | |
| return input_name | |
| def get_output_name(self): | |
| output_name=[] | |
| for node in self.onnx_session.get_outputs(): | |
| output_name.append(node.name) | |
| return output_name | |
| def get_input_feed(self,image_tensor): | |
| input_feed={} | |
| for name in self.input_name: | |
| input_feed[name]=image_tensor | |
| return input_feed | |
| def resize_image(self, image, size, letterbox_image, mode='PIL'): | |
| if mode == 'PIL': | |
| iw, ih = image.size | |
| w, h = size | |
| if letterbox_image: | |
| scale = min(w/iw, h/ih) | |
| nw = int(iw*scale) | |
| nh = int(ih*scale) | |
| image = image.resize((nw,nh), Image.BICUBIC) | |
| new_image = Image.new('RGB', size, (128,128,128)) | |
| new_image.paste(image, ((w-nw)//2, (h-nh)//2)) | |
| else: | |
| new_image = image.resize((w, h), Image.BICUBIC) | |
| else: | |
| image = np.array(image) | |
| if letterbox_image: | |
| shape = np.shape(image)[:2] | |
| if isinstance(size, int): | |
| size = (size, size) | |
| r = min(size[0] / shape[0], size[1] / shape[1]) | |
| new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r)) | |
| dw, dh = size[1] - new_unpad[0], size[0] - new_unpad[1] | |
| dw /= 2 | |
| dh /= 2 | |
| image = cv2.resize(image, new_unpad, interpolation=cv2.INTER_LINEAR) | |
| top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1)) | |
| left, right = int(round(dw - 0.1)), int(round(dw + 0.1)) | |
| else: | |
| new_image = cv2.resize(image, (w, h)) | |
| return new_image | |
| def detect_image(self, image): | |
| image_shape = np.array(np.shape(image)[0:2]) | |
| image = cvtColor(image) | |
| image_data = self.resize_image(image, self.input_shape, True) | |
| image_data = np.expand_dims(np.transpose(preprocess_input(np.array(image_data, dtype='float32')), (2, 0, 1)), 0) | |
| input_feed = self.get_input_feed(image_data) | |
| outputs = self.onnx_session.run(output_names=self.output_name, input_feed=input_feed) | |
| feature_map_shape = [[int(j / (2 ** (i + 4))) for j in self.input_shape] for i in range(len(self.anchors_mask))][::-1] | |
| for i in range(len(self.anchors_mask)): | |
| outputs[i] = np.reshape(outputs[i], (1, len(self.anchors_mask[i]) * (5 + self.num_classes), feature_map_shape[i][0], feature_map_shape[i][1])) | |
| outputs = self.bbox_util.decode_box(outputs) | |
| results = self.bbox_util.non_max_suppression(np.concatenate(outputs, 1), self.num_classes, self.input_shape, | |
| image_shape, self.letterbox_image, conf_thres = self.confidence, nms_thres = self.nms_iou) | |
| if results[0] is None: | |
| return image | |
| top_label = np.array(results[0][:, 6], dtype = 'int32') | |
| top_conf = results[0][:, 4] * results[0][:, 5] | |
| top_boxes = results[0][:, :4] | |
| font = ImageFont.truetype(font='model_data/simhei.ttf', size=np.floor(3e-2 * image.size[1] + 0.5).astype('int32')) | |
| thickness = int(max((image.size[0] + image.size[1]) // np.mean(self.input_shape), 1)) | |
| for i, c in list(enumerate(top_label)): | |
| predicted_class = self.class_names[int(c)] | |
| box = top_boxes[i] | |
| score = top_conf[i] | |
| top, left, bottom, right = box | |
| top = max(0, np.floor(top).astype('int32')) | |
| left = max(0, np.floor(left).astype('int32')) | |
| bottom = min(image.size[1], np.floor(bottom).astype('int32')) | |
| right = min(image.size[0], np.floor(right).astype('int32')) | |
| label = '{} {:.2f}'.format(predicted_class, score) | |
| draw = ImageDraw.Draw(image) | |
| label_size = draw.textsize(label, font) | |
| label = label.encode('utf-8') | |
| print(label, top, left, bottom, right) | |
| if top - label_size[1] >= 0: | |
| text_origin = np.array([left, top - label_size[1]]) | |
| else: | |
| text_origin = np.array([left, top + 1]) | |
| for i in range(thickness): | |
| draw.rectangle([left + i, top + i, right - i, bottom - i], outline=self.colors[c]) | |
| draw.rectangle([tuple(text_origin), tuple(text_origin + label_size)], fill=self.colors[c]) | |
| draw.text(text_origin, str(label,'UTF-8'), fill=(0, 0, 0), font=font) | |
| del draw | |
| return image |