MRaCL

File size: 13,284 Bytes

3dcfb26

# -*- coding: utf-8 -*-

"""
Generic Image Transform utillities.
"""

import cv2
import random, math
import numpy as np
from collections.abc import Iterable
from torch import rand

import torch.nn.functional as F
from torch.autograd import Variable


class ResizePad:
    """
    Resize and pad an image to given size.
    """

    def __init__(self, size):
        if not isinstance(size, (int, Iterable)):
            raise TypeError('Got inappropriate size arg: {}'.format(size))

        self.h, self.w = size

    def __call__(self, img):
        h, w = img.shape[:2]
        scale = min(self.h / h, self.w / w)
        resized_h = int(np.round(h * scale))
        resized_w = int(np.round(w * scale))
        pad_h = int(np.floor(self.h - resized_h) / 2)
        pad_w = int(np.floor(self.w - resized_w) / 2)

        resized_img = cv2.resize(img, (resized_w, resized_h))

        # if img.ndim > 2:
        if img.ndim > 2:
            new_img = np.zeros(
                (self.h, self.w, img.shape[-1]), dtype=resized_img.dtype)
        else:
            resized_img = np.expand_dims(resized_img, -1)
            new_img = np.zeros((self.h, self.w, 1), dtype=resized_img.dtype)
        new_img[pad_h: pad_h + resized_h,
                pad_w: pad_w + resized_w, ...] = resized_img
        return new_img


class CropResize:
    """Remove padding and resize image to its original size."""

    def __call__(self, img, size):
        if not isinstance(size, (int, Iterable)):
            raise TypeError('Got inappropriate size arg: {}'.format(size))
        im_h, im_w = img.data.shape[:2]
        input_h, input_w = size
        scale = max(input_h / im_h, input_w / im_w)
        # scale = torch.Tensor([[input_h / im_h, input_w / im_w]]).max()
        resized_h = int(np.round(im_h * scale))
        # resized_h = torch.round(im_h * scale)
        resized_w = int(np.round(im_w * scale))
        # resized_w = torch.round(im_w * scale)
        crop_h = int(np.floor(resized_h - input_h) / 2)
        # crop_h = torch.floor(resized_h - input_h) // 2
        crop_w = int(np.floor(resized_w - input_w) / 2)
        # crop_w = torch.floor(resized_w - input_w) // 2
        # resized_img = cv2.resize(img, (resized_w, resized_h))
        resized_img = F.upsample(
            img.unsqueeze(0).unsqueeze(0), size=(resized_h, resized_w),
            mode='bilinear')

        resized_img = resized_img.squeeze().unsqueeze(0)

        return resized_img[0, crop_h: crop_h + input_h,
                           crop_w: crop_w + input_w]


class ResizeImage:
    """Resize the largest of the sides of the image to a given size"""
    def __init__(self, size):
        if not isinstance(size, (int, Iterable)):
            raise TypeError('Got inappropriate size arg: {}'.format(size))

        self.size = size

    def __call__(self, img):
        im_h, im_w = img.shape[-2:]
        scale = min(self.size / im_h, self.size / im_w)
        resized_h = int(np.round(im_h * scale))
        resized_w = int(np.round(im_w * scale))
        out = F.upsample(
            Variable(img).unsqueeze(0), size=(resized_h, resized_w),
            mode='bilinear').squeeze().data
        return out


class ResizeAnnotation:
    """Resize the largest of the sides of the annotation to a given size"""
    def __init__(self, size):
        if not isinstance(size, (int, Iterable)):
            raise TypeError('Got inappropriate size arg: {}'.format(size))

        self.size = size

    def __call__(self, img):
        im_h, im_w = img.shape[-2:]
        scale = min(self.size / im_h, self.size / im_w)
        resized_h = int(np.round(im_h * scale))
        resized_w = int(np.round(im_w * scale))
        out = F.upsample(
            Variable(img).unsqueeze(0).unsqueeze(0),
            size=(resized_h, resized_w),
            mode='bilinear').squeeze().data
        return out


class ToNumpy:
    """Transform an torch.*Tensor to an numpy ndarray."""

    def __call__(self, x):
        return x.numpy()

def letterbox(img, mask, height, color=(123.7, 116.3, 103.5)):  # resize a rectangular image to a padded square
    shape = img.shape[:2]  # shape = [height, width]
    ratio = float(height) / max(shape)  # ratio  = old / new
    new_shape = (round(shape[1] * ratio), round(shape[0] * ratio))
    dw = (height - new_shape[0]) / 2  # width padding
    dh = (height - new_shape[1]) / 2  # height padding
    top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1))
    left, right = int(round(dw - 0.1)), int(round(dw + 0.1))
    img = cv2.resize(img, new_shape, interpolation=cv2.INTER_AREA)  # resized, no border
    img = cv2.copyMakeBorder(img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color)  # padded square
    if mask is not None:
        mask = cv2.resize(mask, new_shape, interpolation=cv2.INTER_NEAREST)  # resized, no border
        mask = cv2.copyMakeBorder(mask, top, bottom, left, right, cv2.BORDER_CONSTANT, value=0)  # padded square
    return img, mask, ratio, dw, dh


def random_affine(img, mask, targets, degrees=(-10, 10), translate=(.1, .1), scale=(.9, 1.1), shear=(-2, 2),
                  borderValue=(123.7, 116.3, 103.5), all_bbox=None):
    border = 0  # width of added border (optional)
    height = max(img.shape[0], img.shape[1]) + border * 2

    # Rotation and Scale
    R = np.eye(3)
    a = random.random() * (degrees[1] - degrees[0]) + degrees[0]
    # a += random.choice([-180, -90, 0, 90])  # 90deg rotations added to small rotations
    s = random.random() * (scale[1] - scale[0]) + scale[0]
    R[:2] = cv2.getRotationMatrix2D(angle=a, center=(img.shape[1] / 2, img.shape[0] / 2), scale=s)

    # Translation
    T = np.eye(3)
    T[0, 2] = (random.random() * 2 - 1) * translate[0] * img.shape[0] + border  # x translation (pixels)
    T[1, 2] = (random.random() * 2 - 1) * translate[1] * img.shape[1] + border  # y translation (pixels)

    # Shear
    S = np.eye(3)
    S[0, 1] = math.tan((random.random() * (shear[1] - shear[0]) + shear[0]) * math.pi / 180)  # x shear (deg)
    S[1, 0] = math.tan((random.random() * (shear[1] - shear[0]) + shear[0]) * math.pi / 180)  # y shear (deg)

    M = S @ T @ R  # Combined rotation matrix. ORDER IS IMPORTANT HERE!!
    imw = cv2.warpPerspective(img, M, dsize=(height, height), flags=cv2.INTER_LINEAR,
                              borderValue=borderValue)  # BGR order borderValue
    if mask is not None:
        maskw = cv2.warpPerspective(mask, M, dsize=(height, height), flags=cv2.INTER_NEAREST,
                                  borderValue=0)  # BGR order borderValue
    else:
        maskw = None

    # Return warped points also
    if type(targets)==type([1]):
        targetlist=[]
        for bbox in targets:
            targetlist.append(wrap_points(bbox, M, height, a))
        return imw, maskw, targetlist, M
    elif all_bbox is not None:
        targets = wrap_points(targets, M, height, a)
        for ii in range(all_bbox.shape[0]):
            all_bbox[ii,:] = wrap_points(all_bbox[ii,:], M, height, a)
        return imw, maskw, targets, all_bbox, M
    elif targets is not None:   ## previous main
        targets = wrap_points(targets, M, height, a)
        return imw, maskw, targets, M
    else:
        return imw

def wrap_points(targets, M, height, a):
    # n = targets.shape[0]
    # points = targets[:, 1:5].copy()
    points = targets.copy()
    # area0 = (points[:, 2] - points[:, 0]) * (points[:, 3] - points[:, 1])
    area0 = (points[2] - points[0]) * (points[3] - points[1])

    # warp points
    xy = np.ones((4, 3))
    xy[:, :2] = points[[0, 1, 2, 3, 0, 3, 2, 1]].reshape(4, 2)  # x1y1, x2y2, x1y2, x2y1
    xy = (xy @ M.T)[:, :2].reshape(1, 8)

    # create new boxes
    x = xy[:, [0, 2, 4, 6]]
    y = xy[:, [1, 3, 5, 7]]
    xy = np.concatenate((x.min(1), y.min(1), x.max(1), y.max(1))).reshape(4, 1).T

    # apply angle-based reduction
    radians = a * math.pi / 180
    reduction = max(abs(math.sin(radians)), abs(math.cos(radians))) ** 0.5
    x = (xy[:, 2] + xy[:, 0]) / 2
    y = (xy[:, 3] + xy[:, 1]) / 2
    w = (xy[:, 2] - xy[:, 0]) * reduction
    h = (xy[:, 3] - xy[:, 1]) * reduction
    xy = np.concatenate((x - w / 2, y - h / 2, x + w / 2, y + h / 2)).reshape(4, 1).T

    # reject warped points outside of image
    np.clip(xy, 0, height, out=xy)
    w = xy[:, 2] - xy[:, 0]
    h = xy[:, 3] - xy[:, 1]
    area = w * h
    ar = np.maximum(w / (h + 1e-16), h / (w + 1e-16))
    i = (w > 4) & (h > 4) & (area / (area0 + 1e-16) > 0.1) & (ar < 10)

    ## print(targets, xy)
    ## [ 56  36 108 210] [[ 47.80464857  15.6096533  106.30993434 196.71267693]]
    # targets = targets[i]
    # targets[:, 1:5] = xy[i]
    targets = xy[0]
    return targets


def random_crop(img, seg, pad, h, w):
    if random.random() < 0.5:
        return img, seg

    img = cv2.copyMakeBorder(img, pad, pad, pad, pad, cv2.BORDER_CONSTANT, value=(123.7, 116.3, 103.5))
    seg = cv2.copyMakeBorder(seg, pad, pad, pad, pad, cv2.BORDER_CONSTANT, value=(0, 0, 0))

    Left = random.randint(0, pad * 2)
    Top = random.randint(0, pad * 2)

    seg_pixel = seg.sum()

    for _ in range(100):
        if seg[Top: Top + h, Left: Left + w].sum() / seg_pixel > 0.95 and seg[Top: Top + h, Left: Left + w].sum() > 0:
            img = img[Top: Top + h, Left: Left + w, :]
            seg = seg[Top: Top + h, Left: Left + w]

            return img, seg

        Left = random.randint(0, pad * 2)
        Top = random.randint(0, pad * 2)

    return img, seg


def random_copy(img, seg, phrase, bbox):
    if 'left' in phrase or 'right' in phrase or \
       'center' in phrase or 'middle' in phrase or \
       'front' in phrase or 'back' in phrase:
        return img, seg, phrase, bbox

    if random.random() < 0.75:
        return img, seg, phrase, bbox

    h, w = img.shape[0], img.shape[1]

    # x1, y1, x2, y2 = w, h, 0, 0
    # for j in range(h):
    #     for i in range(w):
    #         if seg[j, i] > 0:
    #             if i < x1: x1 = i
    #             if j < y1: y1 = j
    #             if i > x2: x2 = i
    #             if j > y2: y2 = j
    # x2 = x2 + 1
    # y2 = y2 + 1

    # contours, hierarchy = cv2.findContours(seg.astype(np.uint8), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
    # c = max(contours, key = cv2.contourArea)
    x, y, bboxw, bboxh = cv2.boundingRect(seg.astype(np.uint8))
    x1 = x
    y1 = y
    x2 = x + bboxw 
    y2 = y + bboxh 

    if x1 - (x2 - x1) < 0 or w - (x2 - x1) < x2:
        return img, seg, phrase, bbox

    # tmp = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    # color_mask = np.array([0, 255, 0], dtype=np.uint8)
    # mask = seg.astype(np.bool)
    # tmp[mask] = tmp[mask] * 0.5 + color_mask * 0.5
    # cv2.imwrite('./{}.png'.format(phrase.replace(' ', '_')), tmp)

    if random.random() < 0.5:
        new_x1 = random.randint(0, x1 - (x2 - x1))
        phrase += ' on left'
    else:
        new_x1 = random.randint(x2, w - (x2 - x1))
        phrase += ' on right'

    new_x2 = new_x1 + (x2 - x1)

    delta_y = random.randint((y1 - y2), y2 - y1)
    
    while y2 + delta_y > h or y1 + delta_y < 0:
        delta_y = random.randint((y1 - y2), y2 - y1)

    new_y1 = y1 + delta_y
    new_y2 = y2 + delta_y

    new_seg = np.zeros_like(seg)
    new_seg[new_y1: new_y2, new_x1: new_x2] = seg[y1: y2, x1: x2]

    # tmp = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    # color_mask = np.array([0, 255, 0], dtype=np.uint8)
    # mask = new_seg.astype(np.bool)
    # tmp[mask] = tmp[mask] * 0.5 + color_mask * 0.5
    # cv2.imwrite('./{}.png'.format(phrase.replace(' ', '_')), tmp)

    img[new_seg.astype(np.bool)] = img[seg.astype(np.bool)]
    # bbox = [new_x1, new_y1, new_x2 - 1, new_y2 - 1]
    seg = new_seg

    # tmp = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    # color_mask = np.array([0, 255, 0], dtype=np.uint8)
    # mask = seg.astype(np.bool)
    # tmp[mask] = tmp[mask] * 0.5 + color_mask * 0.5
    # cv2.imwrite('./{}.png'.format(phrase.replace(' ', '_')), tmp)

    # exit()

    return img, seg, phrase, bbox
        

def random_erase(img, seg):
    if random.random() < 0.5:
        return img, seg

    x, y, bboxw, bboxh = cv2.boundingRect(seg.astype(np.uint8))

    area = bboxw * bboxh * 0.5

    for attempt in range(100):
        target_area = random.uniform(0.02, 0.4)
        aspect_ratio = random.uniform(0.3, 1/0.3)

        h = int(round(math.sqrt(target_area * aspect_ratio)))
        w = int(round(math.sqrt(target_area / aspect_ratio)))

        if w < bboxw and h < bboxh:
            x1 = random.randint(0, bboxw - w)
            y1 = random.randint(0, bboxh - h)

            new_seg = seg.copy()
            new_seg[y+y1: y+y1+h, x+x1: x+x1+w] = 0

            if new_seg.sum() / seg.sum() > 0.75:
                continue

            seg[y+y1: y+y1+h, x+x1: x+x1+w] = 0

            img[y+y1: y+y1+h, x+x1: x+x1+w, 0] = 123.7
            img[y+y1: y+y1+h, x+x1: x+x1+w, 1] = 116.3
            img[y+y1: y+y1+h, x+x1: x+x1+w, 2] = 103.5

            # tmp = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
            # color_mask = np.array([0, 255, 0], dtype=np.uint8)
            # mask = seg.astype(np.bool)
            # tmp[mask] = tmp[mask] * 0.5 + color_mask * 0.5
            # cv2.imwrite('./erase.png', tmp)

            return img, seg

    return img, seg