File size: 6,785 Bytes
aa24fe8 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 | import numpy as np
import torch
from torch import nn
from torchvision.ops import nms
class BBoxUtility(object):
def __init__(self, num_classes):
self.num_classes = num_classes
def ssd_correct_boxes(self, box_xy, box_wh, input_shape, image_shape, letterbox_image):
#-----------------------------------------------------------------#
# 把y轴放前面是因为方便预测框和图像的宽高进行相乘
#-----------------------------------------------------------------#
box_yx = box_xy[..., ::-1]
box_hw = box_wh[..., ::-1]
input_shape = np.array(input_shape)
image_shape = np.array(image_shape)
if letterbox_image:
#-----------------------------------------------------------------#
# 这里求出来的offset是图像有效区域相对于图像左上角的偏移情况
# new_shape指的是宽高缩放情况
#-----------------------------------------------------------------#
new_shape = np.round(image_shape * np.min(input_shape/image_shape))
offset = (input_shape - new_shape)/2./input_shape
scale = input_shape/new_shape
box_yx = (box_yx - offset) * scale
box_hw *= scale
box_mins = box_yx - (box_hw / 2.)
box_maxes = box_yx + (box_hw / 2.)
boxes = np.concatenate([box_mins[..., 0:1], box_mins[..., 1:2], box_maxes[..., 0:1], box_maxes[..., 1:2]], axis=-1)
boxes *= np.concatenate([image_shape, image_shape], axis=-1)
return boxes
def decode_boxes(self, mbox_loc, anchors, variances):
# 获得先验框的宽与高
anchor_width = anchors[:, 2] - anchors[:, 0]
anchor_height = anchors[:, 3] - anchors[:, 1]
# 获得先验框的中心点
anchor_center_x = 0.5 * (anchors[:, 2] + anchors[:, 0])
anchor_center_y = 0.5 * (anchors[:, 3] + anchors[:, 1])
# 真实框距离先验框中心的xy轴偏移情况
decode_bbox_center_x = mbox_loc[:, 0] * anchor_width * variances[0]
decode_bbox_center_x += anchor_center_x
decode_bbox_center_y = mbox_loc[:, 1] * anchor_height * variances[0]
decode_bbox_center_y += anchor_center_y
# 真实框的宽与高的求取
decode_bbox_width = torch.exp(mbox_loc[:, 2] * variances[1])
decode_bbox_width *= anchor_width
decode_bbox_height = torch.exp(mbox_loc[:, 3] * variances[1])
decode_bbox_height *= anchor_height
# 获取真实框的左上角与右下角
decode_bbox_xmin = decode_bbox_center_x - 0.5 * decode_bbox_width
decode_bbox_ymin = decode_bbox_center_y - 0.5 * decode_bbox_height
decode_bbox_xmax = decode_bbox_center_x + 0.5 * decode_bbox_width
decode_bbox_ymax = decode_bbox_center_y + 0.5 * decode_bbox_height
# 真实框的左上角与右下角进行堆叠
decode_bbox = torch.cat((decode_bbox_xmin[:, None],
decode_bbox_ymin[:, None],
decode_bbox_xmax[:, None],
decode_bbox_ymax[:, None]), dim=-1)
# 防止超出0与1
decode_bbox = torch.min(torch.max(decode_bbox, torch.zeros_like(decode_bbox)), torch.ones_like(decode_bbox))
return decode_bbox
def decode_box(self, predictions, anchors, image_shape, input_shape, letterbox_image, variances = [0.1, 0.2], nms_iou = 0.3, confidence = 0.5):
#---------------------------------------------------#
# :4是回归预测结果
#---------------------------------------------------#
mbox_loc = predictions[0]
#---------------------------------------------------#
# 获得种类的置信度
#---------------------------------------------------#
mbox_conf = nn.Softmax(-1)(predictions[1])
results = []
#----------------------------------------------------------------------------------------------------------------#
# 对每一张图片进行处理,由于在predict.py的时候,我们只输入一张图片,所以for i in range(len(mbox_loc))只进行一次
#----------------------------------------------------------------------------------------------------------------#
for i in range(len(mbox_loc)):
results.append([])
#--------------------------------#
# 利用回归结果对先验框进行解码
#--------------------------------#
decode_bbox = self.decode_boxes(mbox_loc[i], anchors, variances)
for c in range(1, self.num_classes):
#--------------------------------#
# 取出属于该类的所有框的置信度
# 判断是否大于门限
#--------------------------------#
c_confs = mbox_conf[i, :, c]
c_confs_m = c_confs > confidence
if len(c_confs[c_confs_m]) > 0:
#-----------------------------------------#
# 取出得分高于confidence的框
#-----------------------------------------#
boxes_to_process = decode_bbox[c_confs_m]
confs_to_process = c_confs[c_confs_m]
keep = nms(
boxes_to_process,
confs_to_process,
nms_iou
)
#-----------------------------------------#
# 取出在非极大抑制中效果较好的内容
#-----------------------------------------#
good_boxes = boxes_to_process[keep]
confs = confs_to_process[keep][:, None]
labels = (c - 1) * torch.ones((len(keep), 1)).cuda() if confs.is_cuda else (c - 1) * torch.ones((len(keep), 1))
#-----------------------------------------#
# 将label、置信度、框的位置进行堆叠。
#-----------------------------------------#
c_pred = torch.cat((good_boxes, labels, confs), dim=1).cpu().numpy()
# 添加进result里
results[-1].extend(c_pred)
if len(results[-1]) > 0:
results[-1] = np.array(results[-1])
box_xy, box_wh = (results[-1][:, 0:2] + results[-1][:, 2:4])/2, results[-1][:, 2:4] - results[-1][:, 0:2]
results[-1][:, :4] = self.ssd_correct_boxes(box_xy, box_wh, input_shape, image_shape, letterbox_image)
return results
|