Spaces:
Running
Running
| import math | |
| import torch | |
| import torch.nn as nn | |
| import pkg_resources as pkg | |
| import torch.nn.functional as F | |
| import cv2 | |
| import numpy as np | |
| import time | |
| import torchvision | |
| def scale_img(img, ratio=1.0, same_shape=False, gs=32): # img(16,3,256,416) | |
| # scales img(bs,3,y,x) by ratio constrained to gs-multiple | |
| if ratio == 1.0: | |
| return img | |
| else: | |
| h, w = img.shape[2:] | |
| s = (int(h * ratio), int(w * ratio)) # new size | |
| img = F.interpolate(img, size=s, mode='bilinear', align_corners=False) # resize | |
| if not same_shape: # pad/crop img | |
| h, w = (math.ceil(x * ratio / gs) * gs for x in (h, w)) | |
| return F.pad(img, [0, w - s[1], 0, h - s[0]], value=0.447) # value = imagenet mean | |
| def fuse_conv_and_bn(conv, bn): | |
| # Fuse convolution and batchnorm layers https://tehnokv.com/posts/fusing-batchnorm-and-conv/ | |
| fusedconv = nn.Conv2d(conv.in_channels, | |
| conv.out_channels, | |
| kernel_size=conv.kernel_size, | |
| stride=conv.stride, | |
| padding=conv.padding, | |
| groups=conv.groups, | |
| bias=True).requires_grad_(False).to(conv.weight.device) | |
| # prepare filters | |
| w_conv = conv.weight.clone().view(conv.out_channels, -1) | |
| w_bn = torch.diag(bn.weight.div(torch.sqrt(bn.eps + bn.running_var))) | |
| fusedconv.weight.copy_(torch.mm(w_bn, w_conv).view(fusedconv.weight.shape)) | |
| # prepare spatial bias | |
| b_conv = torch.zeros(conv.weight.size(0), device=conv.weight.device) if conv.bias is None else conv.bias | |
| b_bn = bn.bias - bn.weight.mul(bn.running_mean).div(torch.sqrt(bn.running_var + bn.eps)) | |
| fusedconv.bias.copy_(torch.mm(w_bn, b_conv.reshape(-1, 1)).reshape(-1) + b_bn) | |
| return fusedconv | |
| def check_anchor_order(m): | |
| # Check anchor order against stride order for YOLOv5 Detect() module m, and correct if necessary | |
| a = m.anchors.prod(-1).view(-1) # anchor area | |
| da = a[-1] - a[0] # delta a | |
| ds = m.stride[-1] - m.stride[0] # delta s | |
| if da.sign() != ds.sign(): # same order | |
| m.anchors[:] = m.anchors.flip(0) | |
| def initialize_weights(model): | |
| for m in model.modules(): | |
| t = type(m) | |
| if t is nn.Conv2d: | |
| pass # nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') | |
| elif t is nn.BatchNorm2d: | |
| m.eps = 1e-3 | |
| m.momentum = 0.03 | |
| elif t in [nn.Hardswish, nn.LeakyReLU, nn.ReLU, nn.ReLU6, nn.SiLU]: | |
| m.inplace = True | |
| def make_divisible(x, divisor): | |
| # Returns nearest x divisible by divisor | |
| if isinstance(divisor, torch.Tensor): | |
| divisor = int(divisor.max()) # to int | |
| return math.ceil(x / divisor) * divisor | |
| def intersect_dicts(da, db, exclude=()): | |
| # Dictionary intersection of matching keys and shapes, omitting 'exclude' keys, using da values | |
| return {k: v for k, v in da.items() if k in db and not any(x in k for x in exclude) and v.shape == db[k].shape} | |
| def check_version(current='0.0.0', minimum='0.0.0', name='version ', pinned=False, hard=False): | |
| # Check version vs. required version | |
| current, minimum = (pkg.parse_version(x) for x in (current, minimum)) | |
| result = (current == minimum) if pinned else (current >= minimum) # bool | |
| if hard: # assert min requirements met | |
| assert result, f'{name}{minimum} required by YOLOv5, but {name}{current} is currently installed' | |
| else: | |
| return result | |
| class Colors: | |
| # Ultralytics color palette https://ultralytics.com/ | |
| def __init__(self): | |
| # hex = matplotlib.colors.TABLEAU_COLORS.values() | |
| hex = ('FF3838', 'FF9D97', 'FF701F', 'FFB21D', 'CFD231', '48F90A', '92CC17', '3DDB86', '1A9334', '00D4BB', | |
| '2C99A8', '00C2FF', '344593', '6473FF', '0018EC', '8438FF', '520085', 'CB38FF', 'FF95C8', 'FF37C7') | |
| self.palette = [self.hex2rgb('#' + c) for c in hex] | |
| self.n = len(self.palette) | |
| def __call__(self, i, bgr=False): | |
| c = self.palette[int(i) % self.n] | |
| return (c[2], c[1], c[0]) if bgr else c | |
| def hex2rgb(h): # rgb order (PIL) | |
| return tuple(int(h[1 + i:1 + i + 2], 16) for i in (0, 2, 4)) | |
| def box_iou(box1, box2): | |
| # https://github.com/pytorch/vision/blob/master/torchvision/ops/boxes.py | |
| """ | |
| Return intersection-over-union (Jaccard index) of boxes. | |
| Both sets of boxes are expected to be in (x1, y1, x2, y2) format. | |
| Arguments: | |
| box1 (Tensor[N, 4]) | |
| box2 (Tensor[M, 4]) | |
| Returns: | |
| iou (Tensor[N, M]): the NxM matrix containing the pairwise | |
| IoU values for every element in boxes1 and boxes2 | |
| """ | |
| def box_area(box): | |
| # box = 4xn | |
| return (box[2] - box[0]) * (box[3] - box[1]) | |
| area1 = box_area(box1.T) | |
| area2 = box_area(box2.T) | |
| # inter(N,M) = (rb(N,M,2) - lt(N,M,2)).clamp(0).prod(2) | |
| inter = (torch.min(box1[:, None, 2:], box2[:, 2:]) - torch.max(box1[:, None, :2], box2[:, :2])).clamp(0).prod(2) | |
| return inter / (area1[:, None] + area2 - inter) # iou = inter / (area1 + area2 - inter) | |
| def non_max_suppression(prediction, conf_thres=0.25, iou_thres=0.45, classes=None, agnostic=False, multi_label=False, | |
| labels=(), max_det=300): | |
| """Runs Non-Maximum Suppression (NMS) on inference results | |
| Returns: | |
| list of detections, on (n,6) tensor per image [xyxy, conf, cls] | |
| """ | |
| if isinstance(prediction, np.ndarray): | |
| prediction = torch.from_numpy(prediction) | |
| nc = prediction.shape[2] - 5 # number of classes | |
| xc = prediction[..., 4] > conf_thres # candidates | |
| # Checks | |
| assert 0 <= conf_thres <= 1, f'Invalid Confidence threshold {conf_thres}, valid values are between 0.0 and 1.0' | |
| assert 0 <= iou_thres <= 1, f'Invalid IoU {iou_thres}, valid values are between 0.0 and 1.0' | |
| # Settings | |
| min_wh, max_wh = 2, 4096 # (pixels) minimum and maximum box width and height | |
| max_nms = 30000 # maximum number of boxes into torchvision.ops.nms() | |
| time_limit = 10.0 # seconds to quit after | |
| redundant = True # require redundant detections | |
| multi_label &= nc > 1 # multiple labels per box (adds 0.5ms/img) | |
| merge = False # use merge-NMS | |
| t = time.time() | |
| output = [torch.zeros((0, 6), device=prediction.device)] * prediction.shape[0] | |
| for xi, x in enumerate(prediction): # image index, image inference | |
| # Apply constraints | |
| # x[((x[..., 2:4] < min_wh) | (x[..., 2:4] > max_wh)).any(1), 4] = 0 # width-height | |
| x = x[xc[xi]] # confidence | |
| # Cat apriori labels if autolabelling | |
| if labels and len(labels[xi]): | |
| l = labels[xi] | |
| v = torch.zeros((len(l), nc + 5), device=x.device) | |
| v[:, :4] = l[:, 1:5] # box | |
| v[:, 4] = 1.0 # conf | |
| v[range(len(l)), l[:, 0].long() + 5] = 1.0 # cls | |
| x = torch.cat((x, v), 0) | |
| # If none remain process next image | |
| if not x.shape[0]: | |
| continue | |
| # Compute conf | |
| x[:, 5:] *= x[:, 4:5] # conf = obj_conf * cls_conf | |
| # Box (center x, center y, width, height) to (x1, y1, x2, y2) | |
| box = xywh2xyxy(x[:, :4]) | |
| # Detections matrix nx6 (xyxy, conf, cls) | |
| if multi_label: | |
| i, j = (x[:, 5:] > conf_thres).nonzero(as_tuple=False).T | |
| x = torch.cat((box[i], x[i, j + 5, None], j[:, None].float()), 1) | |
| else: # best class only | |
| conf, j = x[:, 5:].max(1, keepdim=True) | |
| x = torch.cat((box, conf, j.float()), 1)[conf.view(-1) > conf_thres] | |
| # Filter by class | |
| if classes is not None: | |
| x = x[(x[:, 5:6] == torch.tensor(classes, device=x.device)).any(1)] | |
| # Apply finite constraint | |
| # if not torch.isfinite(x).all(): | |
| # x = x[torch.isfinite(x).all(1)] | |
| # Check shape | |
| n = x.shape[0] # number of boxes | |
| if not n: # no boxes | |
| continue | |
| elif n > max_nms: # excess boxes | |
| x = x[x[:, 4].argsort(descending=True)[:max_nms]] # sort by confidence | |
| # Batched NMS | |
| c = x[:, 5:6] * (0 if agnostic else max_wh) # classes | |
| boxes, scores = x[:, :4] + c, x[:, 4] # boxes (offset by class), scores | |
| i = torchvision.ops.nms(boxes, scores, iou_thres) # NMS | |
| if i.shape[0] > max_det: # limit detections | |
| i = i[:max_det] | |
| if merge and (1 < n < 3E3): # Merge NMS (boxes merged using weighted mean) | |
| # update boxes as boxes(i,4) = weights(i,n) * boxes(n,4) | |
| iou = box_iou(boxes[i], boxes) > iou_thres # iou matrix | |
| weights = iou * scores[None] # box weights | |
| x[i, :4] = torch.mm(weights, x[:, :4]).float() / weights.sum(1, keepdim=True) # merged boxes | |
| if redundant: | |
| i = i[iou.sum(1) > 1] # require redundancy | |
| output[xi] = x[i] | |
| if (time.time() - t) > time_limit: | |
| print(f'WARNING: NMS time limit {time_limit}s exceeded') | |
| break # time limit exceeded | |
| return output | |
| def xywh2xyxy(x): | |
| # Convert nx4 boxes from [x, y, w, h] to [x1, y1, x2, y2] where xy1=top-left, xy2=bottom-right | |
| y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x) | |
| y[:, 0] = x[:, 0] - x[:, 2] / 2 # top left x | |
| y[:, 1] = x[:, 1] - x[:, 3] / 2 # top left y | |
| y[:, 2] = x[:, 0] + x[:, 2] / 2 # bottom right x | |
| y[:, 3] = x[:, 1] + x[:, 3] / 2 # bottom right y | |
| return y | |
| DEFAULT_LANG_LIST = ['eng', 'ja'] | |
| def draw_bbox(pred, img, lang_list=None): | |
| if lang_list is None: | |
| lang_list = DEFAULT_LANG_LIST | |
| lw = max(round(sum(img.shape) / 2 * 0.003), 2) # line width | |
| pred = pred.astype(np.int32) | |
| colors = Colors() | |
| img = np.copy(img) | |
| for ii, obj in enumerate(pred): | |
| p1, p2 = (obj[0], obj[1]), (obj[2], obj[3]) | |
| label = lang_list[obj[-1]] + str(ii+1) | |
| cv2.rectangle(img, p1, p2, colors(obj[-1], bgr=True), lw, lineType=cv2.LINE_AA) | |
| t_w, t_h = cv2.getTextSize(label, 0, fontScale=lw / 3, thickness=lw)[0] | |
| cv2.putText(img, label, (p1[0], p1[1] + t_h + 2), 0, lw / 3, colors(obj[-1], bgr=True), max(lw-1, 1), cv2.LINE_AA) | |
| return img |