Voidljc
Your commit message
aa24fe8
Raw
History Blame Contribute Delete
14.1 kB
import cv2
import numpy as np
import torch
from PIL import Image
from torch.utils.data.dataset import Dataset
from utils.utils import cvtColor, preprocess_input
class SSDDataset(Dataset):
def __init__(self, annotation_lines, input_shape, anchors, batch_size, num_classes, train, overlap_threshold = 0.5):
super(SSDDataset, self).__init__()
self.annotation_lines = annotation_lines
self.length = len(self.annotation_lines)
self.input_shape = input_shape
self.anchors = anchors
self.num_anchors = len(anchors)
self.batch_size = batch_size
self.num_classes = num_classes
self.train = train
self.overlap_threshold = overlap_threshold
def __len__(self):
return self.length
def __getitem__(self, index):
index = index % self.length
#---------------------------------------------------#
# 训练时进行数据的随机增强
# 验证时不进行数据的随机增强
#---------------------------------------------------#
image, box = self.get_random_data(self.annotation_lines[index], self.input_shape, random = self.train)
image_data = np.transpose(preprocess_input(np.array(image, dtype = np.float32)), (2, 0, 1))
if len(box)!=0:
boxes = np.array(box[:,:4] , dtype=np.float32)
# 进行归一化,调整到0-1之间
boxes[:, [0, 2]] = boxes[:,[0, 2]] / self.input_shape[1]
boxes[:, [1, 3]] = boxes[:,[1, 3]] / self.input_shape[0]
# 对真实框的种类进行one hot处理
one_hot_label = np.eye(self.num_classes - 1)[np.array(box[:,4], np.int32)]
box = np.concatenate([boxes, one_hot_label], axis=-1)
box = self.assign_boxes(box)
return np.array(image_data, np.float32), np.array(box, np.float32)
def rand(self, a=0, b=1):
return np.random.rand()*(b-a) + a
def get_random_data(self, annotation_line, input_shape, jitter=.3, hue=.1, sat=0.7, val=0.4, random=True):
line = annotation_line.split()
#------------------------------#
# 读取图像并转换成RGB图像
#------------------------------#
image = Image.open(line[0])
image = cvtColor(image)
#------------------------------#
# 获得图像的高宽与目标高宽
#------------------------------#
iw, ih = image.size
h, w = input_shape
#------------------------------#
# 获得预测框
#------------------------------#
box = np.array([np.array(list(map(int,box.split(',')))) for box in line[1:]])
if not random:
scale = min(w/iw, h/ih)
nw = int(iw*scale)
nh = int(ih*scale)
dx = (w-nw)//2
dy = (h-nh)//2
#---------------------------------#
# 将图像多余的部分加上灰条
#---------------------------------#
image = image.resize((nw,nh), Image.BICUBIC)
new_image = Image.new('RGB', (w,h), (128,128,128))
new_image.paste(image, (dx, dy))
image_data = np.array(new_image, np.float32)
#---------------------------------#
# 对真实框进行调整
#---------------------------------#
if len(box)>0:
np.random.shuffle(box)
box[:, [0,2]] = box[:, [0,2]]*nw/iw + dx
box[:, [1,3]] = box[:, [1,3]]*nh/ih + dy
box[:, 0:2][box[:, 0:2]<0] = 0
box[:, 2][box[:, 2]>w] = w
box[:, 3][box[:, 3]>h] = h
box_w = box[:, 2] - box[:, 0]
box_h = box[:, 3] - box[:, 1]
box = box[np.logical_and(box_w>1, box_h>1)] # discard invalid box
return image_data, box
#------------------------------------------#
# 对图像进行缩放并且进行长和宽的扭曲
#------------------------------------------#
new_ar = iw/ih * self.rand(1-jitter,1+jitter) / self.rand(1-jitter,1+jitter)
scale = self.rand(.25, 2)
if new_ar < 1:
nh = int(scale*h)
nw = int(nh*new_ar)
else:
nw = int(scale*w)
nh = int(nw/new_ar)
image = image.resize((nw,nh), Image.BICUBIC)
#------------------------------------------#
# 将图像多余的部分加上灰条
#------------------------------------------#
dx = int(self.rand(0, w-nw))
dy = int(self.rand(0, h-nh))
new_image = Image.new('RGB', (w,h), (128,128,128))
new_image.paste(image, (dx, dy))
image = new_image
#------------------------------------------#
# 翻转图像
#------------------------------------------#
flip = self.rand()<.5
if flip: image = image.transpose(Image.FLIP_LEFT_RIGHT)
image_data = np.array(image, np.uint8)
#---------------------------------#
# 对图像进行色域变换
# 计算色域变换的参数
#---------------------------------#
r = np.random.uniform(-1, 1, 3) * [hue, sat, val] + 1
#---------------------------------#
# 将图像转到HSV上
#---------------------------------#
hue, sat, val = cv2.split(cv2.cvtColor(image_data, cv2.COLOR_RGB2HSV))
dtype = image_data.dtype
#---------------------------------#
# 应用变换
#---------------------------------#
x = np.arange(0, 256, dtype=r.dtype)
lut_hue = ((x * r[0]) % 180).astype(dtype)
lut_sat = np.clip(x * r[1], 0, 255).astype(dtype)
lut_val = np.clip(x * r[2], 0, 255).astype(dtype)
image_data = cv2.merge((cv2.LUT(hue, lut_hue), cv2.LUT(sat, lut_sat), cv2.LUT(val, lut_val)))
image_data = cv2.cvtColor(image_data, cv2.COLOR_HSV2RGB)
#---------------------------------#
# 对真实框进行调整
#---------------------------------#
if len(box)>0:
np.random.shuffle(box)
box[:, [0,2]] = box[:, [0,2]]*nw/iw + dx
box[:, [1,3]] = box[:, [1,3]]*nh/ih + dy
if flip: box[:, [0,2]] = w - box[:, [2,0]]
box[:, 0:2][box[:, 0:2]<0] = 0
box[:, 2][box[:, 2]>w] = w
box[:, 3][box[:, 3]>h] = h
box_w = box[:, 2] - box[:, 0]
box_h = box[:, 3] - box[:, 1]
box = box[np.logical_and(box_w>1, box_h>1)]
return image_data, box
def iou(self, box):
#---------------------------------------------#
# 计算出每个真实框与所有的先验框的iou
# 判断真实框与先验框的重合情况
#---------------------------------------------#
inter_upleft = np.maximum(self.anchors[:, :2], box[:2])
inter_botright = np.minimum(self.anchors[:, 2:4], box[2:])
inter_wh = inter_botright - inter_upleft
inter_wh = np.maximum(inter_wh, 0)
inter = inter_wh[:, 0] * inter_wh[:, 1]
#---------------------------------------------#
# 真实框的面积
#---------------------------------------------#
area_true = (box[2] - box[0]) * (box[3] - box[1])
#---------------------------------------------#
# 先验框的面积
#---------------------------------------------#
area_gt = (self.anchors[:, 2] - self.anchors[:, 0])*(self.anchors[:, 3] - self.anchors[:, 1])
#---------------------------------------------#
# 计算iou
#---------------------------------------------#
union = area_true + area_gt - inter
iou = inter / union
return iou
def encode_box(self, box, return_iou=True, variances = [0.1, 0.1, 0.2, 0.2]):
#---------------------------------------------#
# 计算当前真实框和先验框的重合情况
# iou [self.num_anchors]
# encoded_box [self.num_anchors, 5]
#---------------------------------------------#
iou = self.iou(box)
encoded_box = np.zeros((self.num_anchors, 4 + return_iou))
#---------------------------------------------#
# 找到每一个真实框,重合程度较高的先验框
# 真实框可以由这个先验框来负责预测
#---------------------------------------------#
assign_mask = iou > self.overlap_threshold
#---------------------------------------------#
# 如果没有一个先验框重合度大于self.overlap_threshold
# 则选择重合度最大的为正样本
#---------------------------------------------#
if not assign_mask.any():
assign_mask[iou.argmax()] = True
#---------------------------------------------#
# 利用iou进行赋值
#---------------------------------------------#
if return_iou:
encoded_box[:, -1][assign_mask] = iou[assign_mask]
#---------------------------------------------#
# 找到对应的先验框
#---------------------------------------------#
assigned_anchors = self.anchors[assign_mask]
#---------------------------------------------#
# 逆向编码,将真实框转化为ssd预测结果的格式
# 先计算真实框的中心与长宽
#---------------------------------------------#
box_center = 0.5 * (box[:2] + box[2:])
box_wh = box[2:] - box[:2]
#---------------------------------------------#
# 再计算重合度较高的先验框的中心与长宽
#---------------------------------------------#
assigned_anchors_center = (assigned_anchors[:, 0:2] + assigned_anchors[:, 2:4]) * 0.5
assigned_anchors_wh = (assigned_anchors[:, 2:4] - assigned_anchors[:, 0:2])
#------------------------------------------------#
# 逆向求取ssd应该有的预测结果
# 先求取中心的预测结果,再求取宽高的预测结果
# 存在改变数量级的参数,默认为[0.1,0.1,0.2,0.2]
#------------------------------------------------#
encoded_box[:, :2][assign_mask] = box_center - assigned_anchors_center
encoded_box[:, :2][assign_mask] /= assigned_anchors_wh
encoded_box[:, :2][assign_mask] /= np.array(variances)[:2]
encoded_box[:, 2:4][assign_mask] = np.log(box_wh / assigned_anchors_wh)
encoded_box[:, 2:4][assign_mask] /= np.array(variances)[2:4]
return encoded_box.ravel()
def assign_boxes(self, boxes):
#---------------------------------------------------#
# assignment分为3个部分
# :4 的内容为网络应该有的回归预测结果
# 4:-1 的内容为先验框所对应的种类,默认为背景
# -1 的内容为当前先验框是否包含目标
#---------------------------------------------------#
assignment = np.zeros((self.num_anchors, 4 + self.num_classes + 1))
assignment[:, 4] = 1.0
if len(boxes) == 0:
return assignment
# 对每一个真实框都进行iou计算
encoded_boxes = np.apply_along_axis(self.encode_box, 1, boxes[:, :4])
#---------------------------------------------------#
# 在reshape后,获得的encoded_boxes的shape为:
# [num_true_box, num_anchors, 4 + 1]
# 4是编码后的结果,1为iou
#---------------------------------------------------#
encoded_boxes = encoded_boxes.reshape(-1, self.num_anchors, 5)
#---------------------------------------------------#
# [num_anchors]求取每一个先验框重合度最大的真实框
#---------------------------------------------------#
best_iou = encoded_boxes[:, :, -1].max(axis=0)
best_iou_idx = encoded_boxes[:, :, -1].argmax(axis=0)
best_iou_mask = best_iou > 0
best_iou_idx = best_iou_idx[best_iou_mask]
#---------------------------------------------------#
# 计算一共有多少先验框满足需求
#---------------------------------------------------#
assign_num = len(best_iou_idx)
# 将编码后的真实框取出
encoded_boxes = encoded_boxes[:, best_iou_mask, :]
#---------------------------------------------------#
# 编码后的真实框的赋值
#---------------------------------------------------#
assignment[:, :4][best_iou_mask] = encoded_boxes[best_iou_idx, np.arange(assign_num), :4]
#----------------------------------------------------------#
# 4代表为背景的概率,设定为0,因为这些先验框有对应的物体
#----------------------------------------------------------#
assignment[:, 4][best_iou_mask] = 0
assignment[:, 5:-1][best_iou_mask] = boxes[best_iou_idx, 4:]
#----------------------------------------------------------#
# -1表示先验框是否有对应的物体
#----------------------------------------------------------#
assignment[:, -1][best_iou_mask] = 1
# 通过assign_boxes我们就获得了,输入进来的这张图片,应该有的预测结果是什么样子的
return assignment
# DataLoader中collate_fn使用
def ssd_dataset_collate(batch):
images = []
bboxes = []
for img, box in batch:
images.append(img)
bboxes.append(box)
images = torch.from_numpy(np.array(images)).type(torch.FloatTensor)
bboxes = torch.from_numpy(np.array(bboxes)).type(torch.FloatTensor)
return images, bboxes