File size: 5,412 Bytes
3dcfb26 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 |
# -*- coding: utf-8 -*-
"""
Custom loss function definitions.
"""
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable
from utils.utils import *
class IoULoss(nn.Module):
"""
Creates a criterion that computes the Intersection over Union (IoU)
between a segmentation mask and its ground truth.
Rahman, M.A. and Wang, Y:
Optimizing Intersection-Over-Union in Deep Neural Networks for
Image Segmentation. International Symposium on Visual Computing (2016)
http://www.cs.umanitoba.ca/~ywang/papers/isvc16.pdf
"""
def __init__(self, size_average=True):
super().__init__()
self.size_average = size_average
def forward(self, input, target):
input = F.sigmoid(input)
intersection = (input * target).sum()
union = ((input + target) - (input * target)).sum()
iou = intersection / union
iou_dual = input.size(0) - iou
if self.size_average:
iou_dual = iou_dual / input.size(0)
return iou_dual
def yolo_loss(input, target, gi, gj, best_n_list, w_coord=5.):
mseloss = torch.nn.MSELoss(size_average=True)
celoss = torch.nn.CrossEntropyLoss(size_average=True)
batch = input.size(0)
pred_bbox = Variable(torch.zeros(batch,4).cuda())
gt_bbox = Variable(torch.zeros(batch,4).cuda())
for ii in range(batch):
pred_bbox[ii, 0:2] = F.sigmoid(input[ii,best_n_list[ii],0:2,gj[ii],gi[ii]])
pred_bbox[ii, 2:4] = input[ii,best_n_list[ii],2:4,gj[ii],gi[ii]]
gt_bbox[ii, :] = target[ii,best_n_list[ii],:4,gj[ii],gi[ii]]
loss_x = mseloss(pred_bbox[:,0], gt_bbox[:,0])
loss_y = mseloss(pred_bbox[:,1], gt_bbox[:,1])
loss_w = mseloss(pred_bbox[:,2], gt_bbox[:,2])
loss_h = mseloss(pred_bbox[:,3], gt_bbox[:,3])
pred_conf_list, gt_conf_list = [], []
pred_conf_list.append(input[:,:,4,:,:].contiguous().view(batch,-1))
gt_conf_list.append(target[:,:,4,:,:].contiguous().view(batch,-1))
pred_conf = torch.cat(pred_conf_list, dim=1)
gt_conf = torch.cat(gt_conf_list, dim=1)
loss_conf = celoss(pred_conf, gt_conf.max(1)[1])
return (loss_x+loss_y+loss_w+loss_h)*w_coord + loss_conf
def build_target(raw_coord, anchors, args):
coord = Variable(torch.zeros(raw_coord.size(0), raw_coord.size(1)).cuda())
batch, grid = raw_coord.size(0), args.size//args.gsize
coord[:,0] = (raw_coord[:,0] + raw_coord[:,2])/(2*args.size) # x 相对原图归一化
coord[:,1] = (raw_coord[:,1] + raw_coord[:,3])/(2*args.size) # y
coord[:,2] = (raw_coord[:,2] - raw_coord[:,0])/(args.size) # w
coord[:,3] = (raw_coord[:,3] - raw_coord[:,1])/(args.size) # h
coord = coord * grid
bbox=torch.zeros(coord.size(0),len(anchors),5,grid,grid)
best_n_list, best_gi, best_gj = [],[],[]
for ii in range(batch):
gi = coord[ii,0].long()
gj = coord[ii,1].long()
tx = coord[ii,0] - gi.float()
ty = coord[ii,1] - gj.float()
gw = coord[ii,2]
gh = coord[ii,3]
scaled_anchors = [ (x[0] / (args.anchor_imsize/grid), \
x[1] / (args.anchor_imsize/grid)) for x in anchors]
## Get shape of gt box
gt_box = torch.FloatTensor(np.array([0, 0, gw, gh],dtype=np.float32)).unsqueeze(0) #[1,4]
## Get shape of anchor box
anchor_shapes = torch.FloatTensor(np.concatenate((np.zeros((len(scaled_anchors), 2)), np.array(scaled_anchors)), 1))
## Calculate iou between gt and anchor shapes
anch_ious = list(bbox_iou(gt_box, anchor_shapes,x1y1x2y2=False))
## Find the best matching anchor box
best_n = np.argmax(np.array(anch_ious))
tw = torch.log(gw / scaled_anchors[best_n][0] + 1e-16)
th = torch.log(gh / scaled_anchors[best_n][1] + 1e-16)
bbox[ii, best_n, :, gj, gi] = torch.stack([tx, ty, tw, th, torch.ones(1).cuda().squeeze()])
best_n_list.append(int(best_n))
best_gi.append(gi)
best_gj.append(gj)
bbox = Variable(bbox.cuda())
return bbox, best_gi, best_gj, best_n_list
def adjust_learning_rate(args, optimizer, i_iter):
# print(optimizer.param_groups[0]['lr'], optimizer.param_groups[1]['lr'])
if i_iter in args.steps:
#lr = args.lr * args.power
lr = args.lr * args.power ** (args.steps.index(i_iter) + 1)
optimizer.param_groups[0]['lr'] = lr
if len(optimizer.param_groups) > 1:
optimizer.param_groups[1]['lr'] = lr / 10
if len(optimizer.param_groups) > 2:
optimizer.param_groups[2]['lr'] = lr / 10
def cem_loss(co_energy):
loss = -1.0 * torch.log(co_energy+1e-6).sum()
return loss
class FocalLoss(nn.Module):
def __init__(self, alpha=0.25, gamma=2, logits=True, reduce=False):
super(FocalLoss, self).__init__()
self.alpha = alpha
self.gamma = gamma
self.logits = logits
self.reduce = reduce
def forward(self, inputs, targets):
if self.logits:
BCE_loss = F.binary_cross_entropy_with_logits(inputs, targets, reduce=False)
else:
BCE_loss = F.binary_cross_entropy(inputs, targets, reduce=False)
pt = torch.exp(-BCE_loss)
F_loss = self.alpha * (1-pt)**self.gamma * BCE_loss
if self.reduce:
return torch.mean(F_loss)
else:
return torch.sum(F_loss) |