Voidljc

Your commit message

aa24fe8 about 1 year ago

14.1 kB

	import cv2
	import numpy as np
	import torch
	from PIL import Image
	from torch.utils.data.dataset import Dataset

	from utils.utils import cvtColor, preprocess_input


	class SSDDataset(Dataset):
	def __init__(self, annotation_lines, input_shape, anchors, batch_size, num_classes, train, overlap_threshold = 0.5):
	super(SSDDataset, self).__init__()
	self.annotation_lines = annotation_lines
	self.length = len(self.annotation_lines)

	self.input_shape = input_shape
	self.anchors = anchors
	self.num_anchors = len(anchors)
	self.batch_size = batch_size
	self.num_classes = num_classes
	self.train = train
	self.overlap_threshold = overlap_threshold

	def __len__(self):
	return self.length

	def __getitem__(self, index):
	index = index % self.length
	#---------------------------------------------------#
	# 训练时进行数据的随机增强
	# 验证时不进行数据的随机增强
	#---------------------------------------------------#
	image, box = self.get_random_data(self.annotation_lines[index], self.input_shape, random = self.train)
	image_data = np.transpose(preprocess_input(np.array(image, dtype = np.float32)), (2, 0, 1))
	if len(box)!=0:
	boxes = np.array(box[:,:4] , dtype=np.float32)
	# 进行归一化，调整到0-1之间
	boxes[:, [0, 2]] = boxes[:,[0, 2]] / self.input_shape[1]
	boxes[:, [1, 3]] = boxes[:,[1, 3]] / self.input_shape[0]
	# 对真实框的种类进行one hot处理
	one_hot_label = np.eye(self.num_classes - 1)[np.array(box[:,4], np.int32)]
	box = np.concatenate([boxes, one_hot_label], axis=-1)
	box = self.assign_boxes(box)

	return np.array(image_data, np.float32), np.array(box, np.float32)

	def rand(self, a=0, b=1):
	return np.random.rand()*(b-a) + a

	def get_random_data(self, annotation_line, input_shape, jitter=.3, hue=.1, sat=0.7, val=0.4, random=True):
	line = annotation_line.split()
	#------------------------------#
	# 读取图像并转换成RGB图像
	#------------------------------#
	image = Image.open(line[0])
	image = cvtColor(image)
	#------------------------------#
	# 获得图像的高宽与目标高宽
	#------------------------------#
	iw, ih = image.size
	h, w = input_shape
	#------------------------------#
	# 获得预测框
	#------------------------------#
	box = np.array([np.array(list(map(int,box.split(',')))) for box in line[1:]])

	if not random:
	scale = min(w/iw, h/ih)
	nw = int(iw*scale)
	nh = int(ih*scale)
	dx = (w-nw)//2
	dy = (h-nh)//2

	#---------------------------------#
	# 将图像多余的部分加上灰条
	#---------------------------------#
	image = image.resize((nw,nh), Image.BICUBIC)
	new_image = Image.new('RGB', (w,h), (128,128,128))
	new_image.paste(image, (dx, dy))
	image_data = np.array(new_image, np.float32)

	#---------------------------------#
	# 对真实框进行调整
	#---------------------------------#
	if len(box)>0:
	np.random.shuffle(box)
	box[:, [0,2]] = box[:, [0,2]]*nw/iw + dx
	box[:, [1,3]] = box[:, [1,3]]*nh/ih + dy
	box[:, 0:2][box[:, 0:2]<0] = 0
	box[:, 2][box[:, 2]>w] = w
	box[:, 3][box[:, 3]>h] = h
	box_w = box[:, 2] - box[:, 0]
	box_h = box[:, 3] - box[:, 1]
	box = box[np.logical_and(box_w>1, box_h>1)] # discard invalid box

	return image_data, box

	#------------------------------------------#
	# 对图像进行缩放并且进行长和宽的扭曲
	#------------------------------------------#
	new_ar = iw/ih * self.rand(1-jitter,1+jitter) / self.rand(1-jitter,1+jitter)
	scale = self.rand(.25, 2)
	if new_ar < 1:
	nh = int(scale*h)
	nw = int(nh*new_ar)
	else:
	nw = int(scale*w)
	nh = int(nw/new_ar)
	image = image.resize((nw,nh), Image.BICUBIC)

	#------------------------------------------#
	# 将图像多余的部分加上灰条
	#------------------------------------------#
	dx = int(self.rand(0, w-nw))
	dy = int(self.rand(0, h-nh))
	new_image = Image.new('RGB', (w,h), (128,128,128))
	new_image.paste(image, (dx, dy))
	image = new_image

	#------------------------------------------#
	# 翻转图像
	#------------------------------------------#
	flip = self.rand()<.5
	if flip: image = image.transpose(Image.FLIP_LEFT_RIGHT)

	image_data = np.array(image, np.uint8)
	#---------------------------------#
	# 对图像进行色域变换
	# 计算色域变换的参数
	#---------------------------------#
	r = np.random.uniform(-1, 1, 3) * [hue, sat, val] + 1
	#---------------------------------#
	# 将图像转到HSV上
	#---------------------------------#
	hue, sat, val = cv2.split(cv2.cvtColor(image_data, cv2.COLOR_RGB2HSV))
	dtype = image_data.dtype
	#---------------------------------#
	# 应用变换
	#---------------------------------#
	x = np.arange(0, 256, dtype=r.dtype)
	lut_hue = ((x * r[0]) % 180).astype(dtype)
	lut_sat = np.clip(x * r[1], 0, 255).astype(dtype)
	lut_val = np.clip(x * r[2], 0, 255).astype(dtype)

	image_data = cv2.merge((cv2.LUT(hue, lut_hue), cv2.LUT(sat, lut_sat), cv2.LUT(val, lut_val)))
	image_data = cv2.cvtColor(image_data, cv2.COLOR_HSV2RGB)

	#---------------------------------#
	# 对真实框进行调整
	#---------------------------------#
	if len(box)>0:
	np.random.shuffle(box)
	box[:, [0,2]] = box[:, [0,2]]*nw/iw + dx
	box[:, [1,3]] = box[:, [1,3]]*nh/ih + dy
	if flip: box[:, [0,2]] = w - box[:, [2,0]]
	box[:, 0:2][box[:, 0:2]<0] = 0
	box[:, 2][box[:, 2]>w] = w
	box[:, 3][box[:, 3]>h] = h
	box_w = box[:, 2] - box[:, 0]
	box_h = box[:, 3] - box[:, 1]
	box = box[np.logical_and(box_w>1, box_h>1)]

	return image_data, box

	def iou(self, box):
	#---------------------------------------------#
	# 计算出每个真实框与所有的先验框的iou
	# 判断真实框与先验框的重合情况
	#---------------------------------------------#
	inter_upleft = np.maximum(self.anchors[:, :2], box[:2])
	inter_botright = np.minimum(self.anchors[:, 2:4], box[2:])

	inter_wh = inter_botright - inter_upleft
	inter_wh = np.maximum(inter_wh, 0)
	inter = inter_wh[:, 0] * inter_wh[:, 1]
	#---------------------------------------------#
	# 真实框的面积
	#---------------------------------------------#
	area_true = (box[2] - box[0]) * (box[3] - box[1])
	#---------------------------------------------#
	# 先验框的面积
	#---------------------------------------------#
	area_gt = (self.anchors[:, 2] - self.anchors[:, 0])*(self.anchors[:, 3] - self.anchors[:, 1])
	#---------------------------------------------#
	# 计算iou
	#---------------------------------------------#
	union = area_true + area_gt - inter

	iou = inter / union
	return iou

	def encode_box(self, box, return_iou=True, variances = [0.1, 0.1, 0.2, 0.2]):
	#---------------------------------------------#
	# 计算当前真实框和先验框的重合情况
	# iou [self.num_anchors]
	# encoded_box [self.num_anchors, 5]
	#---------------------------------------------#
	iou = self.iou(box)
	encoded_box = np.zeros((self.num_anchors, 4 + return_iou))

	#---------------------------------------------#
	# 找到每一个真实框，重合程度较高的先验框
	# 真实框可以由这个先验框来负责预测
	#---------------------------------------------#
	assign_mask = iou > self.overlap_threshold

	#---------------------------------------------#
	# 如果没有一个先验框重合度大于self.overlap_threshold
	# 则选择重合度最大的为正样本
	#---------------------------------------------#
	if not assign_mask.any():
	assign_mask[iou.argmax()] = True

	#---------------------------------------------#
	# 利用iou进行赋值
	#---------------------------------------------#
	if return_iou:
	encoded_box[:, -1][assign_mask] = iou[assign_mask]

	#---------------------------------------------#
	# 找到对应的先验框
	#---------------------------------------------#
	assigned_anchors = self.anchors[assign_mask]

	#---------------------------------------------#
	# 逆向编码，将真实框转化为ssd预测结果的格式
	# 先计算真实框的中心与长宽
	#---------------------------------------------#
	box_center = 0.5 * (box[:2] + box[2:])
	box_wh = box[2:] - box[:2]
	#---------------------------------------------#
	# 再计算重合度较高的先验框的中心与长宽
	#---------------------------------------------#
	assigned_anchors_center = (assigned_anchors[:, 0:2] + assigned_anchors[:, 2:4]) * 0.5
	assigned_anchors_wh = (assigned_anchors[:, 2:4] - assigned_anchors[:, 0:2])

	#------------------------------------------------#
	# 逆向求取ssd应该有的预测结果
	# 先求取中心的预测结果，再求取宽高的预测结果
	# 存在改变数量级的参数，默认为[0.1,0.1,0.2,0.2]
	#------------------------------------------------#
	encoded_box[:, :2][assign_mask] = box_center - assigned_anchors_center
	encoded_box[:, :2][assign_mask] /= assigned_anchors_wh
	encoded_box[:, :2][assign_mask] /= np.array(variances)[:2]

	encoded_box[:, 2:4][assign_mask] = np.log(box_wh / assigned_anchors_wh)
	encoded_box[:, 2:4][assign_mask] /= np.array(variances)[2:4]
	return encoded_box.ravel()

	def assign_boxes(self, boxes):
	#---------------------------------------------------#
	# assignment分为3个部分
	# :4 的内容为网络应该有的回归预测结果
	# 4:-1 的内容为先验框所对应的种类，默认为背景
	# -1 的内容为当前先验框是否包含目标
	#---------------------------------------------------#
	assignment = np.zeros((self.num_anchors, 4 + self.num_classes + 1))
	assignment[:, 4] = 1.0
	if len(boxes) == 0:
	return assignment

	# 对每一个真实框都进行iou计算
	encoded_boxes = np.apply_along_axis(self.encode_box, 1, boxes[:, :4])
	#---------------------------------------------------#
	# 在reshape后，获得的encoded_boxes的shape为：
	# [num_true_box, num_anchors, 4 + 1]
	# 4是编码后的结果，1为iou
	#---------------------------------------------------#
	encoded_boxes = encoded_boxes.reshape(-1, self.num_anchors, 5)

	#---------------------------------------------------#
	# [num_anchors]求取每一个先验框重合度最大的真实框
	#---------------------------------------------------#
	best_iou = encoded_boxes[:, :, -1].max(axis=0)
	best_iou_idx = encoded_boxes[:, :, -1].argmax(axis=0)
	best_iou_mask = best_iou > 0
	best_iou_idx = best_iou_idx[best_iou_mask]

	#---------------------------------------------------#
	# 计算一共有多少先验框满足需求
	#---------------------------------------------------#
	assign_num = len(best_iou_idx)

	# 将编码后的真实框取出
	encoded_boxes = encoded_boxes[:, best_iou_mask, :]
	#---------------------------------------------------#
	# 编码后的真实框的赋值
	#---------------------------------------------------#
	assignment[:, :4][best_iou_mask] = encoded_boxes[best_iou_idx, np.arange(assign_num), :4]
	#----------------------------------------------------------#
	# 4代表为背景的概率，设定为0，因为这些先验框有对应的物体
	#----------------------------------------------------------#
	assignment[:, 4][best_iou_mask] = 0
	assignment[:, 5:-1][best_iou_mask] = boxes[best_iou_idx, 4:]
	#----------------------------------------------------------#
	# -1表示先验框是否有对应的物体
	#----------------------------------------------------------#
	assignment[:, -1][best_iou_mask] = 1
	# 通过assign_boxes我们就获得了，输入进来的这张图片，应该有的预测结果是什么样子的
	return assignment

	# DataLoader中collate_fn使用
	def ssd_dataset_collate(batch):
	images = []
	bboxes = []
	for img, box in batch:
	images.append(img)
	bboxes.append(box)
	images = torch.from_numpy(np.array(images)).type(torch.FloatTensor)
	bboxes = torch.from_numpy(np.array(bboxes)).type(torch.FloatTensor)
	return images, bboxes