Spaces:

PolarisFTL
/

MASFNet

Runtime error

App Files Files Community

MASFNet / yolo.py

PolarisFTL

Update yolo.py

08f663c verified 8 months ago

raw

history blame contribute delete

19.8 kB

	import colorsys
	import os
	import time

	import numpy as np
	import torch
	import torch.nn as nn
	import cv2
	from PIL import ImageDraw, ImageFont, Image

	from nets.yolo import YoloBody
	from utils.utils import (cvtColor, get_anchors, get_classes, preprocess_input,
	resize_image, show_config)
	from utils.utils_bbox import DecodeBox, DecodeBoxNP


	class YOLO(object):
	_defaults = {
	"model_path" : 'model_data/rtts.pth',
	"classes_path" : 'model_data/rtts_classes.txt',
	"anchors_path" : 'model_data/yolo_anchors.txt',
	"anchors_mask" : [[3,4,5], [1,2,3]],
	"backbone" : 'tiny',
	"phi" : 0,
	"input_shape" : [416, 416],
	"confidence" : 0.5,
	"nms_iou" : 0.3,
	"letterbox_image" : False,
	"cuda" : False,
	}

	@classmethod
	def get_defaults(cls, n):
	if n in cls._defaults:
	return cls._defaults[n]
	else:
	return "Unrecognized attribute name '" + n + "'"

	def __init__(self, **kwargs):
	self.__dict__.update(self._defaults)
	for name, value in kwargs.items():
	setattr(self, name, value)
	self._defaults[name] = value

	self.class_names, self.num_classes = get_classes(self.classes_path)
	self.anchors, self.num_anchors = get_anchors(self.anchors_path)
	self.bbox_util = DecodeBox(self.anchors, self.num_classes, (self.input_shape[0], self.input_shape[1]), self.anchors_mask)

	hsv_tuples = [(x / self.num_classes, 1., 1.) for x in range(self.num_classes)]
	self.colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples))
	self.colors = list(map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)), self.colors))
	self.generate()

	show_config(**self._defaults)

	def generate(self, onnx=False):
	self.net = YoloBody(self.anchors_mask, self.num_classes, self.phi, self.backbone)
	device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
	self.net.load_state_dict(torch.load(self.model_path, map_location=device))
	self.net = self.net.eval()
	print('{} model, anchors, and classes loaded.'.format(self.model_path))
	if not onnx:
	if self.cuda:
	self.net = nn.DataParallel(self.net)
	self.net = self.net.cuda()

	def detect_image(self, image, crop = False, count = False):
	image_shape = np.array(np.shape(image)[0:2])
	image = cvtColor(image)
	image_data = resize_image(image, (self.input_shape[1],self.input_shape[0]), self.letterbox_image)
	image_data = np.expand_dims(np.transpose(preprocess_input(np.array(image_data, dtype='float32')), (2, 0, 1)), 0)

	with torch.no_grad():
	images = torch.from_numpy(image_data)
	if self.cuda:
	images = images.cuda()
	outputs = self.net(images)
	outputs = self.bbox_util.decode_box(outputs)
	results = self.bbox_util.non_max_suppression(torch.cat(outputs, 1), self.num_classes, self.input_shape,
	image_shape, self.letterbox_image, conf_thres = self.confidence, nms_thres = self.nms_iou)

	if results[0] is None:
	return image

	top_label = np.array(results[0][:, 6], dtype = 'int32')
	top_conf = results[0][:, 4] * results[0][:, 5]
	top_boxes = results[0][:, :4]
	font = ImageFont.truetype(font='model_data/simhei.ttf', size=np.floor(3e-2 * image.size[1] + 0.5).astype('int32'))
	thickness = int(max((image.size[0] + image.size[1]) // np.mean(self.input_shape), 1))
	if count:
	print("top_label:", top_label)
	classes_nums = np.zeros([self.num_classes])
	for i in range(self.num_classes):
	num = np.sum(top_label == i)
	if num > 0:
	print(self.class_names[i], " : ", num)
	classes_nums[i] = num
	print("classes_nums:", classes_nums)
	if crop:
	for i, c in list(enumerate(top_label)):
	top, left, bottom, right = top_boxes[i]
	top = max(0, np.floor(top).astype('int32'))
	left = max(0, np.floor(left).astype('int32'))
	bottom = min(image.size[1], np.floor(bottom).astype('int32'))
	right = min(image.size[0], np.floor(right).astype('int32'))

	dir_save_path = "img_crop"
	if not os.path.exists(dir_save_path):
	os.makedirs(dir_save_path)
	crop_image = image.crop([left, top, right, bottom])
	crop_image.save(os.path.join(dir_save_path, "crop_" + str(i) + ".png"), quality=95, subsampling=0)
	print("save crop_" + str(i) + ".png to " + dir_save_path)
	for i, c in list(enumerate(top_label)):
	predicted_class = self.class_names[int(c)]
	box = top_boxes[i]
	score = top_conf[i]

	top, left, bottom, right = box

	top = max(0, np.floor(top).astype('int32'))
	left = max(0, np.floor(left).astype('int32'))
	bottom = min(image.size[1], np.floor(bottom).astype('int32'))
	right = min(image.size[0], np.floor(right).astype('int32'))

	label = '{} {:.2f}'.format(predicted_class, score)
	draw = ImageDraw.Draw(image)
	label_size = draw.textsize(label, font)
	label = label.encode('utf-8')
	print(label, top, left, bottom, right)

	if top - label_size[1] >= 0:
	text_origin = np.array([left, top - label_size[1]])
	else:
	text_origin = np.array([left, top + 1])

	for i in range(thickness):
	draw.rectangle([left + i, top + i, right - i, bottom - i], outline=self.colors[c])
	draw.rectangle([tuple(text_origin), tuple(text_origin + label_size)], fill=self.colors[c])
	draw.text(text_origin, str(label,'UTF-8'), fill=(0, 0, 0), font=font)
	del draw

	return image

	def get_FPS(self, image, test_interval):
	image_shape = np.array(np.shape(image)[0:2])
	image = cvtColor(image)
	image_data = resize_image(image, (self.input_shape[1],self.input_shape[0]), self.letterbox_image)
	image_data = np.expand_dims(np.transpose(preprocess_input(np.array(image_data, dtype='float32')), (2, 0, 1)), 0)

	with torch.no_grad():
	images = torch.from_numpy(image_data)
	if self.cuda:
	images = images.cuda()
	outputs = self.net(images)
	outputs = self.bbox_util.decode_box(outputs)
	results = self.bbox_util.non_max_suppression(torch.cat(outputs, 1), self.num_classes, self.input_shape,
	image_shape, self.letterbox_image, conf_thres=self.confidence, nms_thres=self.nms_iou)

	t1 = time.time()
	for _ in range(test_interval):
	with torch.no_grad():
	outputs = self.net(images)
	outputs = self.bbox_util.decode_box(outputs)
	results = self.bbox_util.non_max_suppression(torch.cat(outputs, 1), self.num_classes, self.input_shape,
	image_shape, self.letterbox_image, conf_thres=self.confidence, nms_thres=self.nms_iou)

	t2 = time.time()
	tact_time = (t2 - t1) / test_interval
	return tact_time

	def detect_heatmap(self, image, heatmap_save_path):
	import cv2
	import matplotlib.pyplot as plt
	def sigmoid(x):
	y = 1.0 / (1.0 + np.exp(-x))
	return y
	image = cvtColor(image)
	image_data = resize_image(image, (self.input_shape[1],self.input_shape[0]), self.letterbox_image)
	image_data = np.expand_dims(np.transpose(preprocess_input(np.array(image_data, dtype='float32')), (2, 0, 1)), 0)

	with torch.no_grad():
	images = torch.from_numpy(image_data)
	if self.cuda:
	images = images.cuda()
	outputs = self.net(images)
	plt.clf()
	plt.imshow(image, alpha=1)
	plt.axis('off')
	mask = np.zeros((image.size[1], image.size[0]))
	for sub_output in outputs:
	sub_output = sub_output.cpu().numpy()
	b, c, h, w = np.shape(sub_output)
	sub_output = np.transpose(np.reshape(sub_output, [b, 3, -1, h, w]), [0, 3, 4, 1, 2])[0]
	score = np.max(sigmoid(sub_output[..., 4]), -1)
	score = cv2.resize(score, (image.size[0], image.size[1]))
	normed_score = (score * 255).astype('uint8')
	mask = np.maximum(mask, normed_score)

	plt.imshow(mask, alpha=0.5, interpolation='nearest', cmap="jet")

	plt.axis('off')
	plt.subplots_adjust(top=1, bottom=0, right=1, left=0, hspace=0, wspace=0)
	plt.margins(0, 0)
	plt.savefig(heatmap_save_path, dpi=200, bbox_inches='tight', pad_inches = -0.1)
	print("Save to the " + heatmap_save_path)
	plt.show()

	def convert_to_onnx(self, simplify, model_path):
	import onnx
	self.generate(onnx=True)
	im = torch.zeros(1, 3, *self.input_shape).to('cpu')
	input_layer_names = ["images"]
	output_layer_names = ["output"]

	print(f'Starting export with onnx {onnx.__version__}.')
	torch.onnx.export(self.net,
	im,
	f = model_path,
	verbose = False,
	opset_version = 12,
	training = torch.onnx.TrainingMode.EVAL,
	do_constant_folding = True,
	input_names = input_layer_names,
	output_names = output_layer_names,
	dynamic_axes = None)

	model_onnx = onnx.load(model_path)
	onnx.checker.check_model(model_onnx)
	if simplify:
	import onnxsim
	print(f'Simplifying with onnx-simplifier {onnxsim.__version__}.')
	model_onnx, check = onnxsim.simplify(
	model_onnx,
	dynamic_input_shape=False,
	input_shapes=None)
	assert check, 'assert check failed'
	onnx.save(model_onnx, model_path)

	print('Onnx model save as {}'.format(model_path))

	def get_map_txt(self, image_id, image, class_names, map_out_path):
	f = open(os.path.join(map_out_path, "detection-results/"+image_id+".txt"),"w")
	image_shape = np.array(np.shape(image)[0:2])
	image = cvtColor(image)
	image_data = resize_image(image, (self.input_shape[1],self.input_shape[0]), self.letterbox_image)
	image_data = np.expand_dims(np.transpose(preprocess_input(np.array(image_data, dtype='float32')), (2, 0, 1)), 0)

	with torch.no_grad():
	images = torch.from_numpy(image_data)
	if self.cuda:
	images = images.cuda()
	outputs = self.net(images)
	outputs = self.bbox_util.decode_box(outputs)
	results = self.bbox_util.non_max_suppression(torch.cat(outputs, 1), self.num_classes, self.input_shape,
	image_shape, self.letterbox_image, conf_thres = self.confidence, nms_thres = self.nms_iou)

	if results[0] is None:
	return

	top_label = np.array(results[0][:, 6], dtype = 'int32')
	top_conf = results[0][:, 4] * results[0][:, 5]
	top_boxes = results[0][:, :4]

	for i, c in list(enumerate(top_label)):
	predicted_class = self.class_names[int(c)]
	box = top_boxes[i]
	score = str(top_conf[i])

	top, left, bottom, right = box
	if predicted_class not in class_names:
	continue

	f.write("%s %s %s %s %s %s\n" % (predicted_class, score[:6], str(int(left)), str(int(top)), str(int(right)),str(int(bottom))))

	f.close()
	return

	class YOLO_ONNX(object):
	_defaults = {
	"onnx_path" : 'model_data/models.onnx',
	"classes_path" : 'model_data/rtts_classes.txt',
	"anchors_path" : 'model_data/yolo_anchors.txt',
	"anchors_mask" : [[3, 4, 5], [1, 2, 3]],
	"input_shape" : [416, 416],
	"confidence" : 0.5,
	"nms_iou" : 0.3,
	"letterbox_image" : True
	}

	@classmethod
	def get_defaults(cls, n):
	if n in cls._defaults:
	return cls._defaults[n]
	else:
	return "Unrecognized attribute name '" + n + "'"

	def __init__(self, **kwargs):
	self.__dict__.update(self._defaults)
	for name, value in kwargs.items():
	setattr(self, name, value)
	self._defaults[name] = value

	import onnxruntime
	self.onnx_session = onnxruntime.InferenceSession(self.onnx_path)
	self.input_name = self.get_input_name()
	self.output_name = self.get_output_name()

	self.class_names, self.num_classes = self.get_classes(self.classes_path)
	self.anchors, self.num_anchors = self.get_anchors(self.anchors_path)
	self.bbox_util = DecodeBoxNP(self.anchors, self.num_classes, (self.input_shape[0], self.input_shape[1]), self.anchors_mask)

	hsv_tuples = [(x / self.num_classes, 1., 1.) for x in range(self.num_classes)]
	self.colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples))
	self.colors = list(map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)), self.colors))

	show_config(**self._defaults)

	def get_classes(self, classes_path):
	with open(classes_path, encoding='utf-8') as f:
	class_names = f.readlines()
	class_names = [c.strip() for c in class_names]
	return class_names, len(class_names)

	def get_anchors(self, anchors_path):
	'''loads the anchors from a file'''
	with open(anchors_path, encoding='utf-8') as f:
	anchors = f.readline()
	anchors = [float(x) for x in anchors.split(',')]
	anchors = np.array(anchors).reshape(-1, 2)
	return anchors, len(anchors)

	def get_input_name(self):
	input_name=[]
	for node in self.onnx_session.get_inputs():
	input_name.append(node.name)
	return input_name

	def get_output_name(self):
	output_name=[]
	for node in self.onnx_session.get_outputs():
	output_name.append(node.name)
	return output_name

	def get_input_feed(self,image_tensor):
	input_feed={}
	for name in self.input_name:
	input_feed[name]=image_tensor
	return input_feed

	def resize_image(self, image, size, letterbox_image, mode='PIL'):
	if mode == 'PIL':
	iw, ih = image.size
	w, h = size

	if letterbox_image:
	scale = min(w/iw, h/ih)
	nw = int(iw*scale)
	nh = int(ih*scale)

	image = image.resize((nw,nh), Image.BICUBIC)
	new_image = Image.new('RGB', size, (128,128,128))
	new_image.paste(image, ((w-nw)//2, (h-nh)//2))
	else:
	new_image = image.resize((w, h), Image.BICUBIC)
	else:
	image = np.array(image)
	if letterbox_image:
	shape = np.shape(image)[:2]
	if isinstance(size, int):
	size = (size, size)

	r = min(size[0] / shape[0], size[1] / shape[1])

	new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r))
	dw, dh = size[1] - new_unpad[0], size[0] - new_unpad[1]

	dw /= 2
	dh /= 2

	image = cv2.resize(image, new_unpad, interpolation=cv2.INTER_LINEAR)
	top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1))
	left, right = int(round(dw - 0.1)), int(round(dw + 0.1))

	else:
	new_image = cv2.resize(image, (w, h))

	return new_image

	def detect_image(self, image):
	image_shape = np.array(np.shape(image)[0:2])
	image = cvtColor(image)

	image_data = self.resize_image(image, self.input_shape, True)
	image_data = np.expand_dims(np.transpose(preprocess_input(np.array(image_data, dtype='float32')), (2, 0, 1)), 0)

	input_feed = self.get_input_feed(image_data)
	outputs = self.onnx_session.run(output_names=self.output_name, input_feed=input_feed)

	feature_map_shape = [[int(j / (2 ** (i + 4))) for j in self.input_shape] for i in range(len(self.anchors_mask))][::-1]
	for i in range(len(self.anchors_mask)):
	outputs[i] = np.reshape(outputs[i], (1, len(self.anchors_mask[i]) * (5 + self.num_classes), feature_map_shape[i][0], feature_map_shape[i][1]))

	outputs = self.bbox_util.decode_box(outputs)
	results = self.bbox_util.non_max_suppression(np.concatenate(outputs, 1), self.num_classes, self.input_shape,
	image_shape, self.letterbox_image, conf_thres = self.confidence, nms_thres = self.nms_iou)

	if results[0] is None:
	return image

	top_label = np.array(results[0][:, 6], dtype = 'int32')
	top_conf = results[0][:, 4] * results[0][:, 5]
	top_boxes = results[0][:, :4]

	font = ImageFont.truetype(font='model_data/simhei.ttf', size=np.floor(3e-2 * image.size[1] + 0.5).astype('int32'))
	thickness = int(max((image.size[0] + image.size[1]) // np.mean(self.input_shape), 1))

	for i, c in list(enumerate(top_label)):
	predicted_class = self.class_names[int(c)]
	box = top_boxes[i]
	score = top_conf[i]

	top, left, bottom, right = box

	top = max(0, np.floor(top).astype('int32'))
	left = max(0, np.floor(left).astype('int32'))
	bottom = min(image.size[1], np.floor(bottom).astype('int32'))
	right = min(image.size[0], np.floor(right).astype('int32'))

	label = '{} {:.2f}'.format(predicted_class, score)
	draw = ImageDraw.Draw(image)
	label_size = draw.textsize(label, font)
	label = label.encode('utf-8')
	print(label, top, left, bottom, right)

	if top - label_size[1] >= 0:
	text_origin = np.array([left, top - label_size[1]])
	else:
	text_origin = np.array([left, top + 1])

	for i in range(thickness):
	draw.rectangle([left + i, top + i, right - i, bottom - i], outline=self.colors[c])
	draw.rectangle([tuple(text_origin), tuple(text_origin + label_size)], fill=self.colors[c])
	draw.text(text_origin, str(label,'UTF-8'), fill=(0, 0, 0), font=font)
	del draw

	return image