Spaces:

hanquansanren
/

DvD

Running on Zero

File size: 9,549 Bytes

05fb4ab

import math
import random

import numpy as np
import torch
import torch.nn.functional as F


def TensorToArray(tensor, type):
    """Converts a torch.FloatTensor of shape (C x H x W) to a numpy.ndarray (H x W x C) """
    array=tensor.cpu().detach().numpy()
    if len(array.shape) == 4:
        if array.shape[3] > array.shape[1]:
            # shape is BxCxHxW
            array = np.transpose(array, (0,2,3,1))
    else:
        if array.shape[2] > array.shape[0]:
            # shape is CxHxW
            array=np.transpose(array, (1,2,0))
    return array.astype(type)


class ToTensor(object):
    """Converts a numpy.ndarray (H x W x C) to a torch.FloatTensor of shape (C x H x W), where values are in [0, 1]."""

    def __call__(self, img, *args, **kwargs):
        if not isinstance(img, np.ndarray):
            img = np.array(img)

        if len(img.shape) == 2:
            img = img[:, :, None]

        img = np.transpose(img, (2, 0, 1))

        img = torch.from_numpy(img)
        # put it from HWC to CHW forma
        if isinstance(img, torch.ByteTensor):
            return img.float().div(255)
        else:
            # supposed to be float tensor, within [0, 1]
            assert torch.max(img).le(1.0)
            return img


# class ToTensor of torchvision also normalised to 0 1
class ArrayToTensor(object):
    """Converts a numpy.ndarray (H x W x C) to a torch.FloatTensor of shape (C x H x W)."""
    def __init__(self, get_float=True):
        self.get_float = get_float

    def __call__(self, array):

        if not isinstance(array, np.ndarray):
            array = np.array(array)
        array = np.transpose(array, (2, 0, 1))
        # handle numpy array
        tensor = torch.from_numpy(array)
        # put it from HWC to CHW format
        if self.get_float:
            # carefull, this is not normalized to [0, 1]
            return tensor.float()
        else:
            return tensor


class PILToNumpy(object):
    """Converts a numpy.ndarray (H x W x C) to a torch.FloatTensor of shape (C x H x W)."""
    def __init__(self, get_float=True):
        self.get_float = get_float

    def __call__(self, array):

        array = np.array(array)
        return array


class ScaleToZeroOne(object):
    """Converts torch.FloatTensor of shape (C x H x W)."""
    def __call__(self, array):
        return array.float().div(255)


class RandomColorWarp(object):
    """Applies random color warp to a numpy HxWx3 image"""
    def __init__(self, mean_range=0, std_range=0, gamma=[0.7, 1.2], contrast_change=[0.5, 1.5]):
        self.mean_range = mean_range
        self.std_range = std_range
        self.gamma = gamma
        self.contrast_change = contrast_change

    def __call__(self, image):

        # adjust contrast
        contrast = random.uniform(self.contrast_change[0], self.contrast_change[1])
        mean = np.mean(image, axis=(0, 1))
        image = mean + (image - mean) * contrast
        image = np.clip(image, 0, 255)

        # apply gamma augmentation
        random_gamma = random.uniform(self.gamma[0], self.gamma[1])
        image = (image/255.0) ** random_gamma * 255.0

        # multiplicative brighness changes (per image)
        random_std = random.uniform(-self.std_range, self.std_range)
        image *= (1 + random_std)

        # additive brightness change
        random_mean = random.uniform(-self.mean_range, self.mean_range)
        image += random_mean

        # random_order = np.random.permutation(3)
        # image = image[:,:,random_order]

        image = np.clip(image, 0, 255)
        return image.astype(np.uint8)


class RGBtoBGR(object):
    """converts the RGB channels of a numpy array HxWxC into BGR"""

    def __call__(self, array):
        assert(isinstance(array, np.ndarray))
        ch_arr = [2, 1, 0]
        img = array[..., ch_arr]
        return img


# Flow transform here
class ResizeFlow(object):
    """Resize a provided flow field (must be in shape 2xHxW to the given load_size."""
    def __init__(self, size):
        if not isinstance(size, tuple):
            size = (size, size)
        self.size = size

    def __call__(self, tensor):
        assert(tensor.shape[0] == 2 and len(tensor.shape) == 3)
        _, h_original, w_original = tensor.shape
        resized_tensor = F.interpolate(tensor.unsqueeze(0), self.size, mode='bilinear', align_corners=False)
        resized_tensor[:, 0, :, :] *= float(self.size[1])/float(w_original)
        resized_tensor[:, 1, :, :] *= float(self.size[0])/float(h_original)
        return resized_tensor.squeeze(0)


class Blur(torch.nn.Module):
    """ Blur the image by applying a gaussian kernel with given sigma.
    Image must be tensor or numpy, 3 dimensional. """

    def __init__(self, sigma):
        super().__init__()
        if isinstance(sigma, (float, int)):
            sigma = (sigma, sigma)
        self.sigma = sigma
        self.filter_size = [math.ceil(2 * s) for s in self.sigma]
        x_coord = [torch.arange(-sz, sz + 1, dtype=torch.float32) for sz in self.filter_size]
        self.filter = [torch.exp(-(x ** 2) / (2 * s ** 2)) for x, s in zip(x_coord, self.sigma)]
        self.filter[0] = self.filter[0].view(1, 1, -1, 1) / self.filter[0].sum()
        self.filter[1] = self.filter[1].view(1, 1, 1, -1) / self.filter[1].sum()

    def forward(self, image):
        if torch.is_tensor(image):
            sz = image.shape[2:]
            im1 = F.conv2d(image.view(-1, 1, sz[0], sz[1]), self.filter[0], padding=(self.filter_size[0], 0))
            return F.conv2d(im1, self.filter[1], padding=(0, self.filter_size[1])).view(-1, sz[0], sz[1])
        else:
            raise NotImplementedError


def to_tensor(pic):
    """Convert a ``numpy.ndarray`` to tensor.
    This function does not support torchscript.
    See :class:`~torchvision.transforms.ToTensor` for more details.
    Args:
        pic (PIL Image or numpy.ndarray): Image to be converted to tensor.
    Returns:
        Tensor: Converted image.
    """

    default_float_dtype = torch.get_default_dtype()

    if isinstance(pic, np.ndarray):
        # handle numpy array
        if len(pic.shape) == 4:
            if pic.shape[1] != 3 or pic.shape[1] != 1:
                pic = pic.transpose((0, 3, 1, 2))
        elif len(pic.shape) == 3:
            if pic.shape[0] != 3 or pic.shape[0] != 1:
                pic = pic.transpose((2, 0, 1))
        if pic.ndim == 2:
            pic = pic[:, :, None]
            pic = pic.transpose((2, 0, 1))

        img = torch.from_numpy(pic).contiguous()
        # backward compatibility
        if isinstance(img, torch.ByteTensor):
            return img.to(dtype=default_float_dtype).div(255)
        else:
            return img
    else:
        raise TypeError('pic should be ndarray. Got {}'.format(type(pic)))


class RandomBlur(torch.nn.Module):
    """ Blur the image, with a given probability, by applying a gaussian kernel with given sigma.
    Image must be tensor or numpy, 3 dimensional. """

    def __init__(self, sigma=(0.2, 2.0), kernel_size=(3, 7), probability=0.1):
        super().__init__()
        self.probability = probability
        self.sigma = sigma
        self.kernel_size = kernel_size

    def get_params(self, sigma_min: float, sigma_max: float):
        """Choose sigma for random gaussian blurring.

        Args:
            sigma_min (float): Minimum standard deviation that can be chosen for blurring kernel.
            sigma_max (float): Maximum standard deviation that can be chosen for blurring kernel.

        Returns:
            float: Standard deviation to be passed to calculate kernel for gaussian blurring.
        """
        kernel_size = random.randint(self.kernel_size[0], self.kernel_size[1])
        if kernel_size % 2 == 0:
            kernel_size += 1
        return torch.empty(1).uniform_(sigma_min, sigma_max).item(), kernel_size

    def forward(self, image, do_blur=True):
        # only apply blur with a probability
        if random.random() < self.probability:

            # sample random sigma
            sigma, filter_size = self.get_params(self.sigma[0], self.sigma[1])

            sigma = (sigma, sigma)
            filter_size = [filter_size, filter_size]
            # filter_size = [math.ceil(2 * s) for s in sigma]
            x_coord = [torch.arange(-sz, sz + 1, dtype=torch.float32) for sz in filter_size]
            filter = [torch.exp(-(x ** 2) / (2 * s ** 2)) for x, s in zip(x_coord, sigma)]
            filter[0] = filter[0].view(1, 1, -1, 1) / filter[0].sum()
            filter[1] = filter[1].view(1, 1, 1, -1) / filter[1].sum()

            to_numpy = False
            if isinstance(image, np.ndarray):
                image = to_tensor(image)  # C, H, W and in range [0, 1]
                to_numpy = True

            if image.shape[0] != 3:
                image = image.permute(3, 0, 1)

            if torch.is_tensor(image):
                sz = image.shape[1:]
                im1 = F.conv2d(image.view(-1, 1, sz[0], sz[1]), filter[0].to(image.device), padding=(filter_size[0], 0))
                img_blur = F.conv2d(im1, filter[1].to(image.device), padding=(0, filter_size[1])).view(-1, sz[0], sz[1])

                if to_numpy:
                    if img_blur.is_floating_point():
                        img_blur = img_blur.mul(255).byte()
                    img_blur = np.transpose(img_blur.cpu().numpy(), (1, 2, 0))
                return img_blur
            else:
                raise NotImplementedError
        else:
            return image