| | import torch |
| | import numpy as np |
| |
|
| |
|
| | def point_form(boxes): |
| | """ Convert prior_boxes to (xmin, ymin, xmax, ymax) |
| | representation for comparison to point form ground truth data. |
| | Args: |
| | boxes: (tensor) center-size default boxes from priorbox layers. |
| | Return: |
| | boxes: (tensor) Converted xmin, ymin, xmax, ymax form of boxes. |
| | """ |
| | return torch.cat((boxes[:, :2] - boxes[:, 2:]/2, |
| | boxes[:, :2] + boxes[:, 2:]/2), 1) |
| |
|
| |
|
| | def center_size(boxes): |
| | """ Convert prior_boxes to (cx, cy, w, h) |
| | representation for comparison to center-size form ground truth data. |
| | Args: |
| | boxes: (tensor) point_form boxes |
| | Return: |
| | boxes: (tensor) Converted xmin, ymin, xmax, ymax form of boxes. |
| | """ |
| | return torch.cat((boxes[:, 2:] + boxes[:, :2])/2, |
| | boxes[:, 2:] - boxes[:, :2], 1) |
| |
|
| |
|
| | def intersect(box_a, box_b): |
| | """ We resize both tensors to [A,B,2] without new malloc: |
| | [A,2] -> [A,1,2] -> [A,B,2] |
| | [B,2] -> [1,B,2] -> [A,B,2] |
| | Then we compute the area of intersect between box_a and box_b. |
| | Args: |
| | box_a: (tensor) bounding boxes, Shape: [A,4]. |
| | box_b: (tensor) bounding boxes, Shape: [B,4]. |
| | Return: |
| | (tensor) intersection area, Shape: [A,B]. |
| | """ |
| | A = box_a.size(0) |
| | B = box_b.size(0) |
| | max_xy = torch.min(box_a[:, 2:].unsqueeze(1).expand(A, B, 2), |
| | box_b[:, 2:].unsqueeze(0).expand(A, B, 2)) |
| | min_xy = torch.max(box_a[:, :2].unsqueeze(1).expand(A, B, 2), |
| | box_b[:, :2].unsqueeze(0).expand(A, B, 2)) |
| | inter = torch.clamp((max_xy - min_xy), min=0) |
| | return inter[:, :, 0] * inter[:, :, 1] |
| |
|
| |
|
| | def jaccard(box_a, box_b): |
| | """Compute the jaccard overlap of two sets of boxes. The jaccard overlap |
| | is simply the intersection over union of two boxes. Here we operate on |
| | ground truth boxes and default boxes. |
| | E.g.: |
| | A ∩ B / A ∪ B = A ∩ B / (area(A) + area(B) - A ∩ B) |
| | Args: |
| | box_a: (tensor) Ground truth bounding boxes, Shape: [num_objects,4] |
| | box_b: (tensor) Prior boxes from priorbox layers, Shape: [num_priors,4] |
| | Return: |
| | jaccard overlap: (tensor) Shape: [box_a.size(0), box_b.size(0)] |
| | """ |
| | inter = intersect(box_a, box_b) |
| | area_a = ((box_a[:, 2]-box_a[:, 0]) * |
| | (box_a[:, 3]-box_a[:, 1])).unsqueeze(1).expand_as(inter) |
| | area_b = ((box_b[:, 2]-box_b[:, 0]) * |
| | (box_b[:, 3]-box_b[:, 1])).unsqueeze(0).expand_as(inter) |
| | union = area_a + area_b - inter |
| | return inter / union |
| |
|
| |
|
| | def matrix_iou(a, b): |
| | """ |
| | return iou of a and b, numpy version for data augenmentation |
| | """ |
| | lt = np.maximum(a[:, np.newaxis, :2], b[:, :2]) |
| | rb = np.minimum(a[:, np.newaxis, 2:], b[:, 2:]) |
| |
|
| | area_i = np.prod(rb - lt, axis=2) * (lt < rb).all(axis=2) |
| | area_a = np.prod(a[:, 2:] - a[:, :2], axis=1) |
| | area_b = np.prod(b[:, 2:] - b[:, :2], axis=1) |
| | return area_i / (area_a[:, np.newaxis] + area_b - area_i) |
| |
|
| |
|
| | def matrix_iof(a, b): |
| | """ |
| | return iof of a and b, numpy version for data augenmentation |
| | """ |
| | lt = np.maximum(a[:, np.newaxis, :2], b[:, :2]) |
| | rb = np.minimum(a[:, np.newaxis, 2:], b[:, 2:]) |
| |
|
| | area_i = np.prod(rb - lt, axis=2) * (lt < rb).all(axis=2) |
| | area_a = np.prod(a[:, 2:] - a[:, :2], axis=1) |
| | return area_i / np.maximum(area_a[:, np.newaxis], 1) |
| |
|
| |
|
| | def match(threshold, truths, priors, variances, labels, loc_t, conf_t, idx): |
| | """Match each prior box with the ground truth box of the highest jaccard |
| | overlap, encode the bounding boxes, then return the matched indices |
| | corresponding to both confidence and location preds. |
| | Args: |
| | threshold: (float) The overlap threshold used when mathing boxes. |
| | truths: (tensor) Ground truth boxes, Shape: [num_obj, num_priors]. |
| | priors: (tensor) Prior boxes from priorbox layers, Shape: [n_priors,4]. |
| | variances: (tensor) Variances corresponding to each prior coord, |
| | Shape: [num_priors, 4]. |
| | labels: (tensor) All the class labels for the image, Shape: [num_obj]. |
| | loc_t: (tensor) Tensor to be filled w/ endcoded location targets. |
| | conf_t: (tensor) Tensor to be filled w/ matched indices for conf preds. |
| | idx: (int) current batch index |
| | Return: |
| | The matched indices corresponding to 1)location and 2)confidence preds. |
| | """ |
| | |
| | overlaps = jaccard( |
| | truths, |
| | point_form(priors) |
| | ) |
| | |
| | |
| | best_prior_overlap, best_prior_idx = overlaps.max(1, keepdim=True) |
| |
|
| | |
| | valid_gt_idx = best_prior_overlap[:, 0] >= 0.2 |
| | best_prior_idx_filter = best_prior_idx[valid_gt_idx, :] |
| | if best_prior_idx_filter.shape[0] <= 0: |
| | loc_t[idx] = 0 |
| | conf_t[idx] = 0 |
| | return |
| |
|
| | |
| | best_truth_overlap, best_truth_idx = overlaps.max(0, keepdim=True) |
| | best_truth_idx.squeeze_(0) |
| | best_truth_overlap.squeeze_(0) |
| | best_prior_idx.squeeze_(1) |
| | best_prior_idx_filter.squeeze_(1) |
| | best_prior_overlap.squeeze_(1) |
| | best_truth_overlap.index_fill_(0, best_prior_idx_filter, 2) |
| | |
| | |
| | for j in range(best_prior_idx.size(0)): |
| | best_truth_idx[best_prior_idx[j]] = j |
| | matches = truths[best_truth_idx] |
| | conf = labels[best_truth_idx] |
| | conf[best_truth_overlap < threshold] = 0 |
| | loc = encode(matches, priors, variances) |
| | loc_t[idx] = loc |
| | conf_t[idx] = conf |
| |
|
| |
|
| | def encode(matched, priors, variances): |
| | """Encode the variances from the priorbox layers into the ground truth boxes |
| | we have matched (based on jaccard overlap) with the prior boxes. |
| | Args: |
| | matched: (tensor) Coords of ground truth for each prior in point-form |
| | Shape: [num_priors, 4]. |
| | priors: (tensor) Prior boxes in center-offset form |
| | Shape: [num_priors,4]. |
| | variances: (list[float]) Variances of priorboxes |
| | Return: |
| | encoded boxes (tensor), Shape: [num_priors, 4] |
| | """ |
| |
|
| | |
| | g_cxcy = (matched[:, :2] + matched[:, 2:])/2 - priors[:, :2] |
| | |
| | g_cxcy /= (variances[0] * priors[:, 2:]) |
| | |
| | g_wh = (matched[:, 2:] - matched[:, :2]) / priors[:, 2:] |
| | g_wh = torch.log(g_wh) / variances[1] |
| | |
| | return torch.cat([g_cxcy, g_wh], 1) |
| |
|
| |
|
| | |
| | def decode(loc, priors, variances): |
| | """Decode locations from predictions using priors to undo |
| | the encoding we did for offset regression at train time. |
| | Args: |
| | loc (tensor): location predictions for loc layers, |
| | Shape: [num_priors,4] |
| | priors (tensor): Prior boxes in center-offset form. |
| | Shape: [num_priors,4]. |
| | variances: (list[float]) Variances of priorboxes |
| | Return: |
| | decoded bounding box predictions |
| | """ |
| |
|
| | boxes = torch.cat(( |
| | priors[:, :2] + loc[:, :2] * variances[0] * priors[:, 2:], |
| | priors[:, 2:] * torch.exp(loc[:, 2:] * variances[1])), 1) |
| | boxes[:, :2] -= boxes[:, 2:] / 2 |
| | boxes[:, 2:] += boxes[:, :2] |
| | return boxes |
| |
|
| |
|
| | def log_sum_exp(x): |
| | """Utility function for computing log_sum_exp while determining |
| | This will be used to determine unaveraged confidence loss across |
| | all examples in a batch. |
| | Args: |
| | x (Variable(tensor)): conf_preds from conf layers |
| | """ |
| | x_max = x.data.max() |
| | return torch.log(torch.sum(torch.exp(x-x_max), 1, keepdim=True)) + x_max |
| |
|
| |
|
| | |
| | |
| | |
| | def nms(boxes, scores, overlap=0.5, top_k=200): |
| | """Apply non-maximum suppression at test time to avoid detecting too many |
| | overlapping bounding boxes for a given object. |
| | Args: |
| | boxes: (tensor) The location preds for the img, Shape: [num_priors,4]. |
| | scores: (tensor) The class predscores for the img, Shape:[num_priors]. |
| | overlap: (float) The overlap thresh for suppressing unnecessary boxes. |
| | top_k: (int) The Maximum number of box preds to consider. |
| | Return: |
| | The indices of the kept boxes with respect to num_priors. |
| | """ |
| |
|
| | keep = torch.Tensor(scores.size(0)).fill_(0).long() |
| | if boxes.numel() == 0: |
| | return keep |
| | x1 = boxes[:, 0] |
| | y1 = boxes[:, 1] |
| | x2 = boxes[:, 2] |
| | y2 = boxes[:, 3] |
| | area = torch.mul(x2 - x1, y2 - y1) |
| | v, idx = scores.sort(0) |
| | |
| | idx = idx[-top_k:] |
| | xx1 = boxes.new() |
| | yy1 = boxes.new() |
| | xx2 = boxes.new() |
| | yy2 = boxes.new() |
| | w = boxes.new() |
| | h = boxes.new() |
| |
|
| | |
| | count = 0 |
| | while idx.numel() > 0: |
| | i = idx[-1] |
| | |
| | keep[count] = i |
| | count += 1 |
| | if idx.size(0) == 1: |
| | break |
| | idx = idx[:-1] |
| | |
| | torch.index_select(x1, 0, idx, out=xx1) |
| | torch.index_select(y1, 0, idx, out=yy1) |
| | torch.index_select(x2, 0, idx, out=xx2) |
| | torch.index_select(y2, 0, idx, out=yy2) |
| | |
| | xx1 = torch.clamp(xx1, min=x1[i]) |
| | yy1 = torch.clamp(yy1, min=y1[i]) |
| | xx2 = torch.clamp(xx2, max=x2[i]) |
| | yy2 = torch.clamp(yy2, max=y2[i]) |
| | w.resize_as_(xx2) |
| | h.resize_as_(yy2) |
| | w = xx2 - xx1 |
| | h = yy2 - yy1 |
| | |
| | w = torch.clamp(w, min=0.0) |
| | h = torch.clamp(h, min=0.0) |
| | inter = w*h |
| | |
| | rem_areas = torch.index_select(area, 0, idx) |
| | union = (rem_areas - inter) + area[i] |
| | IoU = inter/union |
| | |
| | idx = idx[IoU.le(overlap)] |
| | return keep, count |
| |
|
| |
|
| |
|