Spaces:

stevfoy
/

Turfgrass_Divots

Runtime error

App Files Files Community

Turfgrass_Divots / pytorchyolo /utils /loss.py

stevfoy

extra

985c437 over 2 years ago

raw

history blame contribute delete

9.15 kB

	import math

	import torch
	import torch.nn as nn

	from .utils import to_cpu

	# This new loss function is based on https://github.com/ultralytics/yolov3/blob/master/utils/loss.py


	def bbox_iou(box1, box2, x1y1x2y2=True, GIoU=False, DIoU=False, CIoU=False, eps=1e-9):
	# Returns the IoU of box1 to box2. box1 is 4, box2 is nx4
	box2 = box2.T

	# Get the coordinates of bounding boxes
	if x1y1x2y2: # x1, y1, x2, y2 = box1
	b1_x1, b1_y1, b1_x2, b1_y2 = box1[0], box1[1], box1[2], box1[3]
	b2_x1, b2_y1, b2_x2, b2_y2 = box2[0], box2[1], box2[2], box2[3]
	else: # transform from xywh to xyxy
	b1_x1, b1_x2 = box1[0] - box1[2] / 2, box1[0] + box1[2] / 2
	b1_y1, b1_y2 = box1[1] - box1[3] / 2, box1[1] + box1[3] / 2
	b2_x1, b2_x2 = box2[0] - box2[2] / 2, box2[0] + box2[2] / 2
	b2_y1, b2_y2 = box2[1] - box2[3] / 2, box2[1] + box2[3] / 2

	# Intersection area
	inter = (torch.min(b1_x2, b2_x2) - torch.max(b1_x1, b2_x1)).clamp(0) * \
	(torch.min(b1_y2, b2_y2) - torch.max(b1_y1, b2_y1)).clamp(0)

	# Union Area
	w1, h1 = b1_x2 - b1_x1, b1_y2 - b1_y1 + eps
	w2, h2 = b2_x2 - b2_x1, b2_y2 - b2_y1 + eps
	union = w1 * h1 + w2 * h2 - inter + eps

	iou = inter / union
	if GIoU or DIoU or CIoU:
	# convex (smallest enclosing box) width
	cw = torch.max(b1_x2, b2_x2) - torch.min(b1_x1, b2_x1)
	ch = torch.max(b1_y2, b2_y2) - torch.min(b1_y1, b2_y1) # convex height
	if CIoU or DIoU: # Distance or Complete IoU https://arxiv.org/abs/1911.08287v1
	c2 = cw 2 + ch 2 + eps # convex diagonal squared
	rho2 = ((b2_x1 + b2_x2 - b1_x1 - b1_x2) ** 2 +
	(b2_y1 + b2_y2 - b1_y1 - b1_y2) ** 2) / 4 # center distance squared
	if DIoU:
	return iou - rho2 / c2 # DIoU
	elif CIoU: # https://github.com/Zzh-tju/DIoU-SSD-pytorch/blob/master/utils/box/box_utils.py#L47
	v = (4 / math.pi ** 2) * \
	torch.pow(torch.atan(w2 / h2) - torch.atan(w1 / h1), 2)
	with torch.no_grad():
	alpha = v / ((1 + eps) - iou + v)
	return iou - (rho2 / c2 + v * alpha) # CIoU
	else: # GIoU https://arxiv.org/pdf/1902.09630.pdf
	c_area = cw * ch + eps # convex area
	return iou - (c_area - union) / c_area # GIoU
	else:
	return iou # IoU


	def compute_loss(predictions, targets, model):
	# Check which device was used
	device = targets.device

	# Add placeholder varables for the different losses
	lcls, lbox, lobj = torch.zeros(1, device=device), torch.zeros(1, device=device), torch.zeros(1, device=device)

	# Build yolo targets
	tcls, tbox, indices, anchors = build_targets(predictions, targets, model) # targets

	# Define different loss functions classification
	BCEcls = nn.BCEWithLogitsLoss(
	pos_weight=torch.tensor([1.0], device=device))
	BCEobj = nn.BCEWithLogitsLoss(
	pos_weight=torch.tensor([1.0], device=device))

	# Calculate losses for each yolo layer
	for layer_index, layer_predictions in enumerate(predictions):
	# Get image ids, anchors, grid index i and j for each target in the current yolo layer
	b, anchor, grid_j, grid_i = indices[layer_index]
	# Build empty object target tensor with the same shape as the object prediction
	tobj = torch.zeros_like(layer_predictions[..., 0], device=device) # target obj
	# Get the number of targets for this layer.
	# Each target is a label box with some scaling and the association of an anchor box.
	# Label boxes may be associated to 0 or multiple anchors. So they are multiple times or not at all in the targets.
	num_targets = b.shape[0]
	# Check if there are targets for this batch
	if num_targets:
	# Load the corresponding values from the predictions for each of the targets
	ps = layer_predictions[b, anchor, grid_j, grid_i]

	# Regression of the box
	# Apply sigmoid to xy offset predictions in each cell that has a target
	pxy = ps[:, :2].sigmoid()
	# Apply exponent to wh predictions and multiply with the anchor box that matched best with the label for each cell that has a target
	pwh = torch.exp(ps[:, 2:4]) * anchors[layer_index]
	# Build box out of xy and wh
	pbox = torch.cat((pxy, pwh), 1)
	# Calculate CIoU or GIoU for each target with the predicted box for its cell + anchor
	iou = bbox_iou(pbox.T, tbox[layer_index], x1y1x2y2=False, CIoU=True)
	# We want to minimize our loss so we and the best possible IoU is 1 so we take 1 - IoU and reduce it with a mean
	lbox += (1.0 - iou).mean() # iou loss

	# Classification of the objectness
	# Fill our empty object target tensor with the IoU we just calculated for each target at the targets position
	tobj[b, anchor, grid_j, grid_i] = iou.detach().clamp(0).type(tobj.dtype) # Use cells with iou > 0 as object targets

	# Classification of the class
	# Check if we need to do a classification (number of classes > 1)
	if ps.size(1) - 5 > 1:
	# Hot one class encoding
	t = torch.zeros_like(ps[:, 5:], device=device) # targets
	t[range(num_targets), tcls[layer_index]] = 1
	# Use the tensor to calculate the BCE loss
	lcls += BCEcls(ps[:, 5:], t) # BCE

	# Classification of the objectness the sequel
	# Calculate the BCE loss between the on the fly generated target and the network prediction
	lobj += BCEobj(layer_predictions[..., 4], tobj) # obj loss

	lbox *= 0.05
	lobj *= 1.0
	lcls *= 0.5

	# Merge losses
	loss = lbox + lobj + lcls

	return loss, to_cpu(torch.cat((lbox, lobj, lcls, loss)))


	def build_targets(p, targets, model):
	# Build targets for compute_loss(), input targets(image,class,x,y,w,h)
	na, nt = 3, targets.shape[0] # number of anchors, targets #TODO
	tcls, tbox, indices, anch = [], [], [], []
	gain = torch.ones(7, device=targets.device) # normalized to gridspace gain
	# Make a tensor that iterates 0-2 for 3 anchors and repeat that as many times as we have target boxes
	ai = torch.arange(na, device=targets.device).float().view(na, 1).repeat(1, nt)
	# Copy target boxes anchor size times and append an anchor index to each copy the anchor index is also expressed by the new first dimension
	targets = torch.cat((targets.repeat(na, 1, 1), ai[:, :, None]), 2)

	for i, yolo_layer in enumerate(model.yolo_layers):
	# Scale anchors by the yolo grid cell size so that an anchor with the size of the cell would result in 1
	anchors = yolo_layer.anchors / yolo_layer.stride
	# Add the number of yolo cells in this layer the gain tensor
	# The gain tensor matches the collums of our targets (img id, class, x, y, w, h, anchor id)
	gain[2:6] = torch.tensor(p[i].shape)[[3, 2, 3, 2]] # xyxy gain
	# Scale targets by the number of yolo layer cells, they are now in the yolo cell coordinate system
	t = targets * gain
	# Check if we have targets
	if nt:
	# Calculate ration between anchor and target box for both width and height
	r = t[:, :, 4:6] / anchors[:, None]
	# Select the ratios that have the highest divergence in any axis and check if the ratio is less than 4
	j = torch.max(r, 1. / r).max(2)[0] < 4 # compare #TODO
	# Only use targets that have the correct ratios for their anchors
	# That means we only keep ones that have a matching anchor and we loose the anchor dimension
	# The anchor id is still saved in the 7th value of each target
	t = t[j]
	else:
	t = targets[0]

	# Extract image id in batch and class id
	b, c = t[:, :2].long().T
	# We isolate the target cell associations.
	# x, y, w, h are allready in the cell coordinate system meaning an x = 1.2 would be 1.2 times cellwidth
	gxy = t[:, 2:4]
	gwh = t[:, 4:6] # grid wh
	# Cast to int to get an cell index e.g. 1.2 gets associated to cell 1
	gij = gxy.long()
	# Isolate x and y index dimensions
	gi, gj = gij.T # grid xy indices

	# Convert anchor indexes to int
	a = t[:, 6].long()
	# Add target tensors for this yolo layer to the output lists
	# Add to index list and limit index range to prevent out of bounds
	indices.append((b, a, gj.clamp_(0, gain[3].long() - 1), gi.clamp_(0, gain[2].long() - 1)))
	# Add to target box list and convert box coordinates from global grid coordinates to local offsets in the grid cell
	tbox.append(torch.cat((gxy - gij, gwh), 1)) # box
	# Add correct anchor for each target to the list
	anch.append(anchors[a])
	# Add class for each target to the list
	tcls.append(c)

	return tcls, tbox, indices, anch