Spaces:
Runtime error
Runtime error
| import math | |
| import numbers | |
| import warnings | |
| from enum import Enum | |
| import numpy as np | |
| from PIL import Image | |
| import torch | |
| from torch import Tensor | |
| from typing import List, Tuple, Any, Optional | |
| try: | |
| import accimage | |
| except ImportError: | |
| accimage = None | |
| from . import functional_pil as F_pil | |
| from . import functional_tensor as F_t | |
| class InterpolationMode(Enum): | |
| """Interpolation modes | |
| Available interpolation methods are ``nearest``, ``bilinear``, ``bicubic``, ``box``, ``hamming``, and ``lanczos``. | |
| """ | |
| NEAREST = "nearest" | |
| BILINEAR = "bilinear" | |
| BICUBIC = "bicubic" | |
| # For PIL compatibility | |
| BOX = "box" | |
| HAMMING = "hamming" | |
| LANCZOS = "lanczos" | |
| # TODO: Once torchscript supports Enums with staticmethod | |
| # this can be put into InterpolationMode as staticmethod | |
| def _interpolation_modes_from_int(i: int) -> InterpolationMode: | |
| inverse_modes_mapping = { | |
| 0: InterpolationMode.NEAREST, | |
| 2: InterpolationMode.BILINEAR, | |
| 3: InterpolationMode.BICUBIC, | |
| 4: InterpolationMode.BOX, | |
| 5: InterpolationMode.HAMMING, | |
| 1: InterpolationMode.LANCZOS, | |
| } | |
| return inverse_modes_mapping[i] | |
| pil_modes_mapping = { | |
| InterpolationMode.NEAREST: 0, | |
| InterpolationMode.BILINEAR: 2, | |
| InterpolationMode.BICUBIC: 3, | |
| InterpolationMode.BOX: 4, | |
| InterpolationMode.HAMMING: 5, | |
| InterpolationMode.LANCZOS: 1, | |
| } | |
| _is_pil_image = F_pil._is_pil_image | |
| def _get_image_size(img: Tensor) -> List[int]: | |
| """Returns image size as [w, h] | |
| """ | |
| if isinstance(img, torch.Tensor): | |
| return F_t._get_image_size(img) | |
| return F_pil._get_image_size(img) | |
| def _get_image_num_channels(img: Tensor) -> int: | |
| """Returns number of image channels | |
| """ | |
| if isinstance(img, torch.Tensor): | |
| return F_t._get_image_num_channels(img) | |
| return F_pil._get_image_num_channels(img) | |
| def _is_numpy(img: Any) -> bool: | |
| return isinstance(img, np.ndarray) | |
| def _is_numpy_image(img: Any) -> bool: | |
| return img.ndim in {2, 3} | |
| def to_tensor(pic): | |
| """Convert a ``PIL Image`` or ``numpy.ndarray`` to tensor. | |
| This function does not support torchscript. | |
| See :class:`~torchvision.transforms.ToTensor` for more details. | |
| Args: | |
| pic (PIL Image or numpy.ndarray): Image to be converted to tensor. | |
| Returns: | |
| Tensor: Converted image. | |
| """ | |
| if not(F_pil._is_pil_image(pic) or _is_numpy(pic)): | |
| raise TypeError('pic should be PIL Image or ndarray. Got {}'.format(type(pic))) | |
| if _is_numpy(pic) and not _is_numpy_image(pic): | |
| raise ValueError('pic should be 2/3 dimensional. Got {} dimensions.'.format(pic.ndim)) | |
| default_float_dtype = torch.get_default_dtype() | |
| if isinstance(pic, np.ndarray): | |
| # handle numpy array | |
| if pic.ndim == 2: | |
| pic = pic[:, :, None] | |
| img = torch.from_numpy(pic.transpose((2, 0, 1))).contiguous() | |
| # backward compatibility | |
| if isinstance(img, torch.ByteTensor): | |
| return img.to(dtype=default_float_dtype).div(255) | |
| else: | |
| return img | |
| if accimage is not None and isinstance(pic, accimage.Image): | |
| nppic = np.zeros([pic.channels, pic.height, pic.width], dtype=np.float32) | |
| pic.copyto(nppic) | |
| return torch.from_numpy(nppic).to(dtype=default_float_dtype) | |
| # handle PIL Image | |
| mode_to_nptype = {'I': np.int32, 'I;16': np.int16, 'F': np.float32} | |
| img = torch.from_numpy( | |
| np.array(pic, mode_to_nptype.get(pic.mode, np.uint8), copy=True) | |
| ) | |
| if pic.mode == '1': | |
| img = 255 * img | |
| img = img.view(pic.size[1], pic.size[0], len(pic.getbands())) | |
| # put it from HWC to CHW format | |
| img = img.permute((2, 0, 1)).contiguous() | |
| if isinstance(img, torch.ByteTensor): | |
| return img.to(dtype=default_float_dtype).div(255) | |
| else: | |
| return img | |
| def pil_to_tensor(pic): | |
| """Convert a ``PIL Image`` to a tensor of the same type. | |
| This function does not support torchscript. | |
| See :class:`~torchvision.transforms.PILToTensor` for more details. | |
| Args: | |
| pic (PIL Image): Image to be converted to tensor. | |
| Returns: | |
| Tensor: Converted image. | |
| """ | |
| if not F_pil._is_pil_image(pic): | |
| raise TypeError('pic should be PIL Image. Got {}'.format(type(pic))) | |
| if accimage is not None and isinstance(pic, accimage.Image): | |
| # accimage format is always uint8 internally, so always return uint8 here | |
| nppic = np.zeros([pic.channels, pic.height, pic.width], dtype=np.uint8) | |
| pic.copyto(nppic) | |
| return torch.as_tensor(nppic) | |
| # handle PIL Image | |
| img = torch.as_tensor(np.asarray(pic)) | |
| img = img.view(pic.size[1], pic.size[0], len(pic.getbands())) | |
| # put it from HWC to CHW format | |
| img = img.permute((2, 0, 1)) | |
| return img | |
| def convert_image_dtype(image: torch.Tensor, dtype: torch.dtype = torch.float) -> torch.Tensor: | |
| """Convert a tensor image to the given ``dtype`` and scale the values accordingly | |
| This function does not support PIL Image. | |
| Args: | |
| image (torch.Tensor): Image to be converted | |
| dtype (torch.dtype): Desired data type of the output | |
| Returns: | |
| Tensor: Converted image | |
| .. note:: | |
| When converting from a smaller to a larger integer ``dtype`` the maximum values are **not** mapped exactly. | |
| If converted back and forth, this mismatch has no effect. | |
| Raises: | |
| RuntimeError: When trying to cast :class:`torch.float32` to :class:`torch.int32` or :class:`torch.int64` as | |
| well as for trying to cast :class:`torch.float64` to :class:`torch.int64`. These conversions might lead to | |
| overflow errors since the floating point ``dtype`` cannot store consecutive integers over the whole range | |
| of the integer ``dtype``. | |
| """ | |
| if not isinstance(image, torch.Tensor): | |
| raise TypeError('Input img should be Tensor Image') | |
| return F_t.convert_image_dtype(image, dtype) | |
| def to_pil_image(pic, mode=None): | |
| """Convert a tensor or an ndarray to PIL Image. This function does not support torchscript. | |
| See :class:`~torchvision.transforms.ToPILImage` for more details. | |
| Args: | |
| pic (Tensor or numpy.ndarray): Image to be converted to PIL Image. | |
| mode (`PIL.Image mode`_): color space and pixel depth of input data (optional). | |
| .. _PIL.Image mode: https://pillow.readthedocs.io/en/latest/handbook/concepts.html#concept-modes | |
| Returns: | |
| PIL Image: Image converted to PIL Image. | |
| """ | |
| if not(isinstance(pic, torch.Tensor) or isinstance(pic, np.ndarray)): | |
| raise TypeError('pic should be Tensor or ndarray. Got {}.'.format(type(pic))) | |
| elif isinstance(pic, torch.Tensor): | |
| if pic.ndimension() not in {2, 3}: | |
| raise ValueError('pic should be 2/3 dimensional. Got {} dimensions.'.format(pic.ndimension())) | |
| elif pic.ndimension() == 2: | |
| # if 2D image, add channel dimension (CHW) | |
| pic = pic.unsqueeze(0) | |
| # check number of channels | |
| if pic.shape[-3] > 4: | |
| raise ValueError('pic should not have > 4 channels. Got {} channels.'.format(pic.shape[-3])) | |
| elif isinstance(pic, np.ndarray): | |
| if pic.ndim not in {2, 3}: | |
| raise ValueError('pic should be 2/3 dimensional. Got {} dimensions.'.format(pic.ndim)) | |
| elif pic.ndim == 2: | |
| # if 2D image, add channel dimension (HWC) | |
| pic = np.expand_dims(pic, 2) | |
| # check number of channels | |
| if pic.shape[-1] > 4: | |
| raise ValueError('pic should not have > 4 channels. Got {} channels.'.format(pic.shape[-1])) | |
| npimg = pic | |
| if isinstance(pic, torch.Tensor): | |
| if pic.is_floating_point() and mode != 'F': | |
| pic = pic.mul(255).byte() | |
| npimg = np.transpose(pic.cpu().numpy(), (1, 2, 0)) | |
| if not isinstance(npimg, np.ndarray): | |
| raise TypeError('Input pic must be a torch.Tensor or NumPy ndarray, ' + | |
| 'not {}'.format(type(npimg))) | |
| if npimg.shape[2] == 1: | |
| expected_mode = None | |
| npimg = npimg[:, :, 0] | |
| if npimg.dtype == np.uint8: | |
| expected_mode = 'L' | |
| elif npimg.dtype == np.int16: | |
| expected_mode = 'I;16' | |
| elif npimg.dtype == np.int32: | |
| expected_mode = 'I' | |
| elif npimg.dtype == np.float32: | |
| expected_mode = 'F' | |
| if mode is not None and mode != expected_mode: | |
| raise ValueError("Incorrect mode ({}) supplied for input type {}. Should be {}" | |
| .format(mode, np.dtype, expected_mode)) | |
| mode = expected_mode | |
| elif npimg.shape[2] == 2: | |
| permitted_2_channel_modes = ['LA'] | |
| if mode is not None and mode not in permitted_2_channel_modes: | |
| raise ValueError("Only modes {} are supported for 2D inputs".format(permitted_2_channel_modes)) | |
| if mode is None and npimg.dtype == np.uint8: | |
| mode = 'LA' | |
| elif npimg.shape[2] == 4: | |
| permitted_4_channel_modes = ['RGBA', 'CMYK', 'RGBX'] | |
| if mode is not None and mode not in permitted_4_channel_modes: | |
| raise ValueError("Only modes {} are supported for 4D inputs".format(permitted_4_channel_modes)) | |
| if mode is None and npimg.dtype == np.uint8: | |
| mode = 'RGBA' | |
| else: | |
| permitted_3_channel_modes = ['RGB', 'YCbCr', 'HSV'] | |
| if mode is not None and mode not in permitted_3_channel_modes: | |
| raise ValueError("Only modes {} are supported for 3D inputs".format(permitted_3_channel_modes)) | |
| if mode is None and npimg.dtype == np.uint8: | |
| mode = 'RGB' | |
| if mode is None: | |
| raise TypeError('Input type {} is not supported'.format(npimg.dtype)) | |
| return Image.fromarray(npimg, mode=mode) | |
| def normalize(tensor: Tensor, mean: List[float], std: List[float], inplace: bool = False) -> Tensor: | |
| """Normalize a float tensor image with mean and standard deviation. | |
| This transform does not support PIL Image. | |
| .. note:: | |
| This transform acts out of place by default, i.e., it does not mutates the input tensor. | |
| See :class:`~torchvision.transforms.Normalize` for more details. | |
| Args: | |
| tensor (Tensor): Float tensor image of size (C, H, W) or (B, C, H, W) to be normalized. | |
| mean (sequence): Sequence of means for each channel. | |
| std (sequence): Sequence of standard deviations for each channel. | |
| inplace(bool,optional): Bool to make this operation inplace. | |
| Returns: | |
| Tensor: Normalized Tensor image. | |
| """ | |
| if not isinstance(tensor, torch.Tensor): | |
| raise TypeError('Input tensor should be a torch tensor. Got {}.'.format(type(tensor))) | |
| if not tensor.is_floating_point(): | |
| raise TypeError('Input tensor should be a float tensor. Got {}.'.format(tensor.dtype)) | |
| if tensor.ndim < 3: | |
| raise ValueError('Expected tensor to be a tensor image of size (..., C, H, W). Got tensor.size() = ' | |
| '{}.'.format(tensor.size())) | |
| if not inplace: | |
| tensor = tensor.clone() | |
| dtype = tensor.dtype | |
| mean = torch.as_tensor(mean, dtype=dtype, device=tensor.device) | |
| std = torch.as_tensor(std, dtype=dtype, device=tensor.device) | |
| if (std == 0).any(): | |
| raise ValueError('std evaluated to zero after conversion to {}, leading to division by zero.'.format(dtype)) | |
| if mean.ndim == 1: | |
| mean = mean.view(-1, 1, 1) | |
| if std.ndim == 1: | |
| std = std.view(-1, 1, 1) | |
| tensor.sub_(mean).div_(std) | |
| return tensor | |
| def resize(img: Tensor, size: List[int], interpolation: InterpolationMode = InterpolationMode.BILINEAR, | |
| max_size: Optional[int] = None, antialias: Optional[bool] = None) -> Tensor: | |
| r"""Resize the input image to the given size. | |
| If the image is torch Tensor, it is expected | |
| to have [..., H, W] shape, where ... means an arbitrary number of leading dimensions | |
| .. warning:: | |
| The output image might be different depending on its type: when downsampling, the interpolation of PIL images | |
| and tensors is slightly different, because PIL applies antialiasing. This may lead to significant differences | |
| in the performance of a network. Therefore, it is preferable to train and serve a model with the same input | |
| types. See also below the ``antialias`` parameter, which can help making the output of PIL images and tensors | |
| closer. | |
| Args: | |
| img (PIL Image or Tensor): Image to be resized. | |
| size (sequence or int): Desired output size. If size is a sequence like | |
| (h, w), the output size will be matched to this. If size is an int, | |
| the smaller edge of the image will be matched to this number maintaining | |
| the aspect ratio. i.e, if height > width, then image will be rescaled to | |
| :math:`\left(\text{size} \times \frac{\text{height}}{\text{width}}, \text{size}\right)`. | |
| .. note:: | |
| In torchscript mode size as single int is not supported, use a sequence of length 1: ``[size, ]``. | |
| interpolation (InterpolationMode): Desired interpolation enum defined by | |
| :class:`torchvision.transforms.InterpolationMode`. | |
| Default is ``InterpolationMode.BILINEAR``. If input is Tensor, only ``InterpolationMode.NEAREST``, | |
| ``InterpolationMode.BILINEAR`` and ``InterpolationMode.BICUBIC`` are supported. | |
| For backward compatibility integer values (e.g. ``PIL.Image.NEAREST``) are still acceptable. | |
| max_size (int, optional): The maximum allowed for the longer edge of | |
| the resized image: if the longer edge of the image is greater | |
| than ``max_size`` after being resized according to ``size``, then | |
| the image is resized again so that the longer edge is equal to | |
| ``max_size``. As a result, ``size`` might be overruled, i.e the | |
| smaller edge may be shorter than ``size``. This is only supported | |
| if ``size`` is an int (or a sequence of length 1 in torchscript | |
| mode). | |
| antialias (bool, optional): antialias flag. If ``img`` is PIL Image, the flag is ignored and anti-alias | |
| is always used. If ``img`` is Tensor, the flag is False by default and can be set to True for | |
| ``InterpolationMode.BILINEAR`` only mode. This can help making the output for PIL images and tensors | |
| closer. | |
| .. warning:: | |
| There is no autodiff support for ``antialias=True`` option with input ``img`` as Tensor. | |
| Returns: | |
| PIL Image or Tensor: Resized image. | |
| """ | |
| # Backward compatibility with integer value | |
| if isinstance(interpolation, int): | |
| warnings.warn( | |
| "Argument interpolation should be of type InterpolationMode instead of int. " | |
| "Please, use InterpolationMode enum." | |
| ) | |
| interpolation = _interpolation_modes_from_int(interpolation) | |
| if not isinstance(interpolation, InterpolationMode): | |
| raise TypeError("Argument interpolation should be a InterpolationMode") | |
| if not isinstance(img, torch.Tensor): | |
| if antialias is not None and not antialias: | |
| warnings.warn( | |
| "Anti-alias option is always applied for PIL Image input. Argument antialias is ignored." | |
| ) | |
| pil_interpolation = pil_modes_mapping[interpolation] | |
| return F_pil.resize(img, size=size, interpolation=pil_interpolation, max_size=max_size) | |
| return F_t.resize(img, size=size, interpolation=interpolation.value, max_size=max_size, antialias=antialias) | |
| def scale(*args, **kwargs): | |
| warnings.warn("The use of the transforms.Scale transform is deprecated, " + | |
| "please use transforms.Resize instead.") | |
| return resize(*args, **kwargs) | |
| def pad(img: Tensor, padding: List[int], fill: int = 0, padding_mode: str = "constant") -> Tensor: | |
| r"""Pad the given image on all sides with the given "pad" value. | |
| If the image is torch Tensor, it is expected | |
| to have [..., H, W] shape, where ... means at most 2 leading dimensions for mode reflect and symmetric, | |
| at most 3 leading dimensions for mode edge, | |
| and an arbitrary number of leading dimensions for mode constant | |
| Args: | |
| img (PIL Image or Tensor): Image to be padded. | |
| padding (int or sequence): Padding on each border. If a single int is provided this | |
| is used to pad all borders. If sequence of length 2 is provided this is the padding | |
| on left/right and top/bottom respectively. If a sequence of length 4 is provided | |
| this is the padding for the left, top, right and bottom borders respectively. | |
| .. note:: | |
| In torchscript mode padding as single int is not supported, use a sequence of | |
| length 1: ``[padding, ]``. | |
| fill (number or str or tuple): Pixel fill value for constant fill. Default is 0. | |
| If a tuple of length 3, it is used to fill R, G, B channels respectively. | |
| This value is only used when the padding_mode is constant. | |
| Only number is supported for torch Tensor. | |
| Only int or str or tuple value is supported for PIL Image. | |
| padding_mode (str): Type of padding. Should be: constant, edge, reflect or symmetric. | |
| Default is constant. | |
| - constant: pads with a constant value, this value is specified with fill | |
| - edge: pads with the last value at the edge of the image. | |
| If input a 5D torch Tensor, the last 3 dimensions will be padded instead of the last 2 | |
| - reflect: pads with reflection of image without repeating the last value on the edge. | |
| For example, padding [1, 2, 3, 4] with 2 elements on both sides in reflect mode | |
| will result in [3, 2, 1, 2, 3, 4, 3, 2] | |
| - symmetric: pads with reflection of image repeating the last value on the edge. | |
| For example, padding [1, 2, 3, 4] with 2 elements on both sides in symmetric mode | |
| will result in [2, 1, 1, 2, 3, 4, 4, 3] | |
| Returns: | |
| PIL Image or Tensor: Padded image. | |
| """ | |
| if not isinstance(img, torch.Tensor): | |
| return F_pil.pad(img, padding=padding, fill=fill, padding_mode=padding_mode) | |
| return F_t.pad(img, padding=padding, fill=fill, padding_mode=padding_mode) | |
| def crop(img: Tensor, top: int, left: int, height: int, width: int) -> Tensor: | |
| """Crop the given image at specified location and output size. | |
| If the image is torch Tensor, it is expected | |
| to have [..., H, W] shape, where ... means an arbitrary number of leading dimensions. | |
| If image size is smaller than output size along any edge, image is padded with 0 and then cropped. | |
| Args: | |
| img (PIL Image or Tensor): Image to be cropped. (0,0) denotes the top left corner of the image. | |
| top (int): Vertical component of the top left corner of the crop box. | |
| left (int): Horizontal component of the top left corner of the crop box. | |
| height (int): Height of the crop box. | |
| width (int): Width of the crop box. | |
| Returns: | |
| PIL Image or Tensor: Cropped image. | |
| """ | |
| if not isinstance(img, torch.Tensor): | |
| return F_pil.crop(img, top, left, height, width) | |
| return F_t.crop(img, top, left, height, width) | |
| def center_crop(img: Tensor, output_size: List[int]) -> Tensor: | |
| """Crops the given image at the center. | |
| If the image is torch Tensor, it is expected | |
| to have [..., H, W] shape, where ... means an arbitrary number of leading dimensions. | |
| If image size is smaller than output size along any edge, image is padded with 0 and then center cropped. | |
| Args: | |
| img (PIL Image or Tensor): Image to be cropped. | |
| output_size (sequence or int): (height, width) of the crop box. If int or sequence with single int, | |
| it is used for both directions. | |
| Returns: | |
| PIL Image or Tensor: Cropped image. | |
| """ | |
| if isinstance(output_size, numbers.Number): | |
| output_size = (int(output_size), int(output_size)) | |
| elif isinstance(output_size, (tuple, list)) and len(output_size) == 1: | |
| output_size = (output_size[0], output_size[0]) | |
| image_width, image_height = _get_image_size(img) | |
| crop_height, crop_width = output_size | |
| if crop_width > image_width or crop_height > image_height: | |
| padding_ltrb = [ | |
| (crop_width - image_width) // 2 if crop_width > image_width else 0, | |
| (crop_height - image_height) // 2 if crop_height > image_height else 0, | |
| (crop_width - image_width + 1) // 2 if crop_width > image_width else 0, | |
| (crop_height - image_height + 1) // 2 if crop_height > image_height else 0, | |
| ] | |
| img = pad(img, padding_ltrb, fill=0) # PIL uses fill value 0 | |
| image_width, image_height = _get_image_size(img) | |
| if crop_width == image_width and crop_height == image_height: | |
| return img | |
| crop_top = int(round((image_height - crop_height) / 2.)) | |
| crop_left = int(round((image_width - crop_width) / 2.)) | |
| return crop(img, crop_top, crop_left, crop_height, crop_width) | |
| def resized_crop( | |
| img: Tensor, top: int, left: int, height: int, width: int, size: List[int], | |
| interpolation: InterpolationMode = InterpolationMode.BILINEAR | |
| ) -> Tensor: | |
| """Crop the given image and resize it to desired size. | |
| If the image is torch Tensor, it is expected | |
| to have [..., H, W] shape, where ... means an arbitrary number of leading dimensions | |
| Notably used in :class:`~torchvision.transforms.RandomResizedCrop`. | |
| Args: | |
| img (PIL Image or Tensor): Image to be cropped. (0,0) denotes the top left corner of the image. | |
| top (int): Vertical component of the top left corner of the crop box. | |
| left (int): Horizontal component of the top left corner of the crop box. | |
| height (int): Height of the crop box. | |
| width (int): Width of the crop box. | |
| size (sequence or int): Desired output size. Same semantics as ``resize``. | |
| interpolation (InterpolationMode): Desired interpolation enum defined by | |
| :class:`torchvision.transforms.InterpolationMode`. | |
| Default is ``InterpolationMode.BILINEAR``. If input is Tensor, only ``InterpolationMode.NEAREST``, | |
| ``InterpolationMode.BILINEAR`` and ``InterpolationMode.BICUBIC`` are supported. | |
| For backward compatibility integer values (e.g. ``PIL.Image.NEAREST``) are still acceptable. | |
| Returns: | |
| PIL Image or Tensor: Cropped image. | |
| """ | |
| img = crop(img, top, left, height, width) | |
| img = resize(img, size, interpolation) | |
| return img | |
| def hflip(img: Tensor) -> Tensor: | |
| """Horizontally flip the given image. | |
| Args: | |
| img (PIL Image or Tensor): Image to be flipped. If img | |
| is a Tensor, it is expected to be in [..., H, W] format, | |
| where ... means it can have an arbitrary number of leading | |
| dimensions. | |
| Returns: | |
| PIL Image or Tensor: Horizontally flipped image. | |
| """ | |
| if not isinstance(img, torch.Tensor): | |
| return F_pil.hflip(img) | |
| return F_t.hflip(img) | |
| def _get_perspective_coeffs( | |
| startpoints: List[List[int]], endpoints: List[List[int]] | |
| ) -> List[float]: | |
| """Helper function to get the coefficients (a, b, c, d, e, f, g, h) for the perspective transforms. | |
| In Perspective Transform each pixel (x, y) in the original image gets transformed as, | |
| (x, y) -> ( (ax + by + c) / (gx + hy + 1), (dx + ey + f) / (gx + hy + 1) ) | |
| Args: | |
| startpoints (list of list of ints): List containing four lists of two integers corresponding to four corners | |
| ``[top-left, top-right, bottom-right, bottom-left]`` of the original image. | |
| endpoints (list of list of ints): List containing four lists of two integers corresponding to four corners | |
| ``[top-left, top-right, bottom-right, bottom-left]`` of the transformed image. | |
| Returns: | |
| octuple (a, b, c, d, e, f, g, h) for transforming each pixel. | |
| """ | |
| a_matrix = torch.zeros(2 * len(startpoints), 8, dtype=torch.float) | |
| for i, (p1, p2) in enumerate(zip(endpoints, startpoints)): | |
| a_matrix[2 * i, :] = torch.tensor([p1[0], p1[1], 1, 0, 0, 0, -p2[0] * p1[0], -p2[0] * p1[1]]) | |
| a_matrix[2 * i + 1, :] = torch.tensor([0, 0, 0, p1[0], p1[1], 1, -p2[1] * p1[0], -p2[1] * p1[1]]) | |
| b_matrix = torch.tensor(startpoints, dtype=torch.float).view(8) | |
| res = torch.linalg.lstsq(a_matrix, b_matrix, driver='gels').solution | |
| output: List[float] = res.tolist() | |
| return output | |
| def perspective( | |
| img: Tensor, | |
| startpoints: List[List[int]], | |
| endpoints: List[List[int]], | |
| interpolation: InterpolationMode = InterpolationMode.BILINEAR, | |
| fill: Optional[List[float]] = None | |
| ) -> Tensor: | |
| """Perform perspective transform of the given image. | |
| If the image is torch Tensor, it is expected | |
| to have [..., H, W] shape, where ... means an arbitrary number of leading dimensions. | |
| Args: | |
| img (PIL Image or Tensor): Image to be transformed. | |
| startpoints (list of list of ints): List containing four lists of two integers corresponding to four corners | |
| ``[top-left, top-right, bottom-right, bottom-left]`` of the original image. | |
| endpoints (list of list of ints): List containing four lists of two integers corresponding to four corners | |
| ``[top-left, top-right, bottom-right, bottom-left]`` of the transformed image. | |
| interpolation (InterpolationMode): Desired interpolation enum defined by | |
| :class:`torchvision.transforms.InterpolationMode`. Default is ``InterpolationMode.BILINEAR``. | |
| If input is Tensor, only ``InterpolationMode.NEAREST``, ``InterpolationMode.BILINEAR`` are supported. | |
| For backward compatibility integer values (e.g. ``PIL.Image.NEAREST``) are still acceptable. | |
| fill (sequence or number, optional): Pixel fill value for the area outside the transformed | |
| image. If given a number, the value is used for all bands respectively. | |
| .. note:: | |
| In torchscript mode single int/float value is not supported, please use a sequence | |
| of length 1: ``[value, ]``. | |
| Returns: | |
| PIL Image or Tensor: transformed Image. | |
| """ | |
| coeffs = _get_perspective_coeffs(startpoints, endpoints) | |
| # Backward compatibility with integer value | |
| if isinstance(interpolation, int): | |
| warnings.warn( | |
| "Argument interpolation should be of type InterpolationMode instead of int. " | |
| "Please, use InterpolationMode enum." | |
| ) | |
| interpolation = _interpolation_modes_from_int(interpolation) | |
| if not isinstance(interpolation, InterpolationMode): | |
| raise TypeError("Argument interpolation should be a InterpolationMode") | |
| if not isinstance(img, torch.Tensor): | |
| pil_interpolation = pil_modes_mapping[interpolation] | |
| return F_pil.perspective(img, coeffs, interpolation=pil_interpolation, fill=fill) | |
| return F_t.perspective(img, coeffs, interpolation=interpolation.value, fill=fill) | |
| def vflip(img: Tensor) -> Tensor: | |
| """Vertically flip the given image. | |
| Args: | |
| img (PIL Image or Tensor): Image to be flipped. If img | |
| is a Tensor, it is expected to be in [..., H, W] format, | |
| where ... means it can have an arbitrary number of leading | |
| dimensions. | |
| Returns: | |
| PIL Image or Tensor: Vertically flipped image. | |
| """ | |
| if not isinstance(img, torch.Tensor): | |
| return F_pil.vflip(img) | |
| return F_t.vflip(img) | |
| def five_crop(img: Tensor, size: List[int]) -> Tuple[Tensor, Tensor, Tensor, Tensor, Tensor]: | |
| """Crop the given image into four corners and the central crop. | |
| If the image is torch Tensor, it is expected | |
| to have [..., H, W] shape, where ... means an arbitrary number of leading dimensions | |
| .. Note:: | |
| This transform returns a tuple of images and there may be a | |
| mismatch in the number of inputs and targets your ``Dataset`` returns. | |
| Args: | |
| img (PIL Image or Tensor): Image to be cropped. | |
| size (sequence or int): Desired output size of the crop. If size is an | |
| int instead of sequence like (h, w), a square crop (size, size) is | |
| made. If provided a sequence of length 1, it will be interpreted as (size[0], size[0]). | |
| Returns: | |
| tuple: tuple (tl, tr, bl, br, center) | |
| Corresponding top left, top right, bottom left, bottom right and center crop. | |
| """ | |
| if isinstance(size, numbers.Number): | |
| size = (int(size), int(size)) | |
| elif isinstance(size, (tuple, list)) and len(size) == 1: | |
| size = (size[0], size[0]) | |
| if len(size) != 2: | |
| raise ValueError("Please provide only two dimensions (h, w) for size.") | |
| image_width, image_height = _get_image_size(img) | |
| crop_height, crop_width = size | |
| if crop_width > image_width or crop_height > image_height: | |
| msg = "Requested crop size {} is bigger than input size {}" | |
| raise ValueError(msg.format(size, (image_height, image_width))) | |
| tl = crop(img, 0, 0, crop_height, crop_width) | |
| tr = crop(img, 0, image_width - crop_width, crop_height, crop_width) | |
| bl = crop(img, image_height - crop_height, 0, crop_height, crop_width) | |
| br = crop(img, image_height - crop_height, image_width - crop_width, crop_height, crop_width) | |
| center = center_crop(img, [crop_height, crop_width]) | |
| return tl, tr, bl, br, center | |
| def ten_crop(img: Tensor, size: List[int], vertical_flip: bool = False) -> List[Tensor]: | |
| """Generate ten cropped images from the given image. | |
| Crop the given image into four corners and the central crop plus the | |
| flipped version of these (horizontal flipping is used by default). | |
| If the image is torch Tensor, it is expected | |
| to have [..., H, W] shape, where ... means an arbitrary number of leading dimensions | |
| .. Note:: | |
| This transform returns a tuple of images and there may be a | |
| mismatch in the number of inputs and targets your ``Dataset`` returns. | |
| Args: | |
| img (PIL Image or Tensor): Image to be cropped. | |
| size (sequence or int): Desired output size of the crop. If size is an | |
| int instead of sequence like (h, w), a square crop (size, size) is | |
| made. If provided a sequence of length 1, it will be interpreted as (size[0], size[0]). | |
| vertical_flip (bool): Use vertical flipping instead of horizontal | |
| Returns: | |
| tuple: tuple (tl, tr, bl, br, center, tl_flip, tr_flip, bl_flip, br_flip, center_flip) | |
| Corresponding top left, top right, bottom left, bottom right and | |
| center crop and same for the flipped image. | |
| """ | |
| if isinstance(size, numbers.Number): | |
| size = (int(size), int(size)) | |
| elif isinstance(size, (tuple, list)) and len(size) == 1: | |
| size = (size[0], size[0]) | |
| if len(size) != 2: | |
| raise ValueError("Please provide only two dimensions (h, w) for size.") | |
| first_five = five_crop(img, size) | |
| if vertical_flip: | |
| img = vflip(img) | |
| else: | |
| img = hflip(img) | |
| second_five = five_crop(img, size) | |
| return first_five + second_five | |
| def adjust_brightness(img: Tensor, brightness_factor: float) -> Tensor: | |
| """Adjust brightness of an image. | |
| Args: | |
| img (PIL Image or Tensor): Image to be adjusted. | |
| If img is torch Tensor, it is expected to be in [..., 1 or 3, H, W] format, | |
| where ... means it can have an arbitrary number of leading dimensions. | |
| brightness_factor (float): How much to adjust the brightness. Can be | |
| any non negative number. 0 gives a black image, 1 gives the | |
| original image while 2 increases the brightness by a factor of 2. | |
| Returns: | |
| PIL Image or Tensor: Brightness adjusted image. | |
| """ | |
| if not isinstance(img, torch.Tensor): | |
| return F_pil.adjust_brightness(img, brightness_factor) | |
| return F_t.adjust_brightness(img, brightness_factor) | |
| def adjust_contrast(img: Tensor, contrast_factor: float) -> Tensor: | |
| """Adjust contrast of an image. | |
| Args: | |
| img (PIL Image or Tensor): Image to be adjusted. | |
| If img is torch Tensor, it is expected to be in [..., 3, H, W] format, | |
| where ... means it can have an arbitrary number of leading dimensions. | |
| contrast_factor (float): How much to adjust the contrast. Can be any | |
| non negative number. 0 gives a solid gray image, 1 gives the | |
| original image while 2 increases the contrast by a factor of 2. | |
| Returns: | |
| PIL Image or Tensor: Contrast adjusted image. | |
| """ | |
| if not isinstance(img, torch.Tensor): | |
| return F_pil.adjust_contrast(img, contrast_factor) | |
| return F_t.adjust_contrast(img, contrast_factor) | |
| def adjust_saturation(img: Tensor, saturation_factor: float) -> Tensor: | |
| """Adjust color saturation of an image. | |
| Args: | |
| img (PIL Image or Tensor): Image to be adjusted. | |
| If img is torch Tensor, it is expected to be in [..., 3, H, W] format, | |
| where ... means it can have an arbitrary number of leading dimensions. | |
| saturation_factor (float): How much to adjust the saturation. 0 will | |
| give a black and white image, 1 will give the original image while | |
| 2 will enhance the saturation by a factor of 2. | |
| Returns: | |
| PIL Image or Tensor: Saturation adjusted image. | |
| """ | |
| if not isinstance(img, torch.Tensor): | |
| return F_pil.adjust_saturation(img, saturation_factor) | |
| return F_t.adjust_saturation(img, saturation_factor) | |
| def adjust_hue(img: Tensor, hue_factor: float) -> Tensor: | |
| """Adjust hue of an image. | |
| The image hue is adjusted by converting the image to HSV and | |
| cyclically shifting the intensities in the hue channel (H). | |
| The image is then converted back to original image mode. | |
| `hue_factor` is the amount of shift in H channel and must be in the | |
| interval `[-0.5, 0.5]`. | |
| See `Hue`_ for more details. | |
| .. _Hue: https://en.wikipedia.org/wiki/Hue | |
| Args: | |
| img (PIL Image or Tensor): Image to be adjusted. | |
| If img is torch Tensor, it is expected to be in [..., 3, H, W] format, | |
| where ... means it can have an arbitrary number of leading dimensions. | |
| If img is PIL Image mode "1", "L", "I", "F" and modes with transparency (alpha channel) are not supported. | |
| hue_factor (float): How much to shift the hue channel. Should be in | |
| [-0.5, 0.5]. 0.5 and -0.5 give complete reversal of hue channel in | |
| HSV space in positive and negative direction respectively. | |
| 0 means no shift. Therefore, both -0.5 and 0.5 will give an image | |
| with complementary colors while 0 gives the original image. | |
| Returns: | |
| PIL Image or Tensor: Hue adjusted image. | |
| """ | |
| if not isinstance(img, torch.Tensor): | |
| return F_pil.adjust_hue(img, hue_factor) | |
| return F_t.adjust_hue(img, hue_factor) | |
| def adjust_gamma(img: Tensor, gamma: float, gain: float = 1) -> Tensor: | |
| r"""Perform gamma correction on an image. | |
| Also known as Power Law Transform. Intensities in RGB mode are adjusted | |
| based on the following equation: | |
| .. math:: | |
| I_{\text{out}} = 255 \times \text{gain} \times \left(\frac{I_{\text{in}}}{255}\right)^{\gamma} | |
| See `Gamma Correction`_ for more details. | |
| .. _Gamma Correction: https://en.wikipedia.org/wiki/Gamma_correction | |
| Args: | |
| img (PIL Image or Tensor): PIL Image to be adjusted. | |
| If img is torch Tensor, it is expected to be in [..., 1 or 3, H, W] format, | |
| where ... means it can have an arbitrary number of leading dimensions. | |
| If img is PIL Image, modes with transparency (alpha channel) are not supported. | |
| gamma (float): Non negative real number, same as :math:`\gamma` in the equation. | |
| gamma larger than 1 make the shadows darker, | |
| while gamma smaller than 1 make dark regions lighter. | |
| gain (float): The constant multiplier. | |
| Returns: | |
| PIL Image or Tensor: Gamma correction adjusted image. | |
| """ | |
| if not isinstance(img, torch.Tensor): | |
| return F_pil.adjust_gamma(img, gamma, gain) | |
| return F_t.adjust_gamma(img, gamma, gain) | |
| def _get_inverse_affine_matrix( | |
| center: List[float], angle: float, translate: List[float], scale: float, shear: List[float] | |
| ) -> List[float]: | |
| # Helper method to compute inverse matrix for affine transformation | |
| # As it is explained in PIL.Image.rotate | |
| # We need compute INVERSE of affine transformation matrix: M = T * C * RSS * C^-1 | |
| # where T is translation matrix: [1, 0, tx | 0, 1, ty | 0, 0, 1] | |
| # C is translation matrix to keep center: [1, 0, cx | 0, 1, cy | 0, 0, 1] | |
| # RSS is rotation with scale and shear matrix | |
| # RSS(a, s, (sx, sy)) = | |
| # = R(a) * S(s) * SHy(sy) * SHx(sx) | |
| # = [ s*cos(a - sy)/cos(sy), s*(-cos(a - sy)*tan(x)/cos(y) - sin(a)), 0 ] | |
| # [ s*sin(a + sy)/cos(sy), s*(-sin(a - sy)*tan(x)/cos(y) + cos(a)), 0 ] | |
| # [ 0 , 0 , 1 ] | |
| # | |
| # where R is a rotation matrix, S is a scaling matrix, and SHx and SHy are the shears: | |
| # SHx(s) = [1, -tan(s)] and SHy(s) = [1 , 0] | |
| # [0, 1 ] [-tan(s), 1] | |
| # | |
| # Thus, the inverse is M^-1 = C * RSS^-1 * C^-1 * T^-1 | |
| rot = math.radians(angle) | |
| sx, sy = [math.radians(s) for s in shear] | |
| cx, cy = center | |
| tx, ty = translate | |
| # RSS without scaling | |
| a = math.cos(rot - sy) / math.cos(sy) | |
| b = -math.cos(rot - sy) * math.tan(sx) / math.cos(sy) - math.sin(rot) | |
| c = math.sin(rot - sy) / math.cos(sy) | |
| d = -math.sin(rot - sy) * math.tan(sx) / math.cos(sy) + math.cos(rot) | |
| # Inverted rotation matrix with scale and shear | |
| # det([[a, b], [c, d]]) == 1, since det(rotation) = 1 and det(shear) = 1 | |
| matrix = [d, -b, 0.0, -c, a, 0.0] | |
| matrix = [x / scale for x in matrix] | |
| # Apply inverse of translation and of center translation: RSS^-1 * C^-1 * T^-1 | |
| matrix[2] += matrix[0] * (-cx - tx) + matrix[1] * (-cy - ty) | |
| matrix[5] += matrix[3] * (-cx - tx) + matrix[4] * (-cy - ty) | |
| # Apply center translation: C * RSS^-1 * C^-1 * T^-1 | |
| matrix[2] += cx | |
| matrix[5] += cy | |
| return matrix | |
| def rotate( | |
| img: Tensor, angle: float, interpolation: InterpolationMode = InterpolationMode.NEAREST, | |
| expand: bool = False, center: Optional[List[int]] = None, | |
| fill: Optional[List[float]] = None, resample: Optional[int] = None | |
| ) -> Tensor: | |
| """Rotate the image by angle. | |
| If the image is torch Tensor, it is expected | |
| to have [..., H, W] shape, where ... means an arbitrary number of leading dimensions. | |
| Args: | |
| img (PIL Image or Tensor): image to be rotated. | |
| angle (number): rotation angle value in degrees, counter-clockwise. | |
| interpolation (InterpolationMode): Desired interpolation enum defined by | |
| :class:`torchvision.transforms.InterpolationMode`. Default is ``InterpolationMode.NEAREST``. | |
| If input is Tensor, only ``InterpolationMode.NEAREST``, ``InterpolationMode.BILINEAR`` are supported. | |
| For backward compatibility integer values (e.g. ``PIL.Image.NEAREST``) are still acceptable. | |
| expand (bool, optional): Optional expansion flag. | |
| If true, expands the output image to make it large enough to hold the entire rotated image. | |
| If false or omitted, make the output image the same size as the input image. | |
| Note that the expand flag assumes rotation around the center and no translation. | |
| center (sequence, optional): Optional center of rotation. Origin is the upper left corner. | |
| Default is the center of the image. | |
| fill (sequence or number, optional): Pixel fill value for the area outside the transformed | |
| image. If given a number, the value is used for all bands respectively. | |
| .. note:: | |
| In torchscript mode single int/float value is not supported, please use a sequence | |
| of length 1: ``[value, ]``. | |
| Returns: | |
| PIL Image or Tensor: Rotated image. | |
| .. _filters: https://pillow.readthedocs.io/en/latest/handbook/concepts.html#filters | |
| """ | |
| if resample is not None: | |
| warnings.warn( | |
| "Argument resample is deprecated and will be removed since v0.10.0. Please, use interpolation instead" | |
| ) | |
| interpolation = _interpolation_modes_from_int(resample) | |
| # Backward compatibility with integer value | |
| if isinstance(interpolation, int): | |
| warnings.warn( | |
| "Argument interpolation should be of type InterpolationMode instead of int. " | |
| "Please, use InterpolationMode enum." | |
| ) | |
| interpolation = _interpolation_modes_from_int(interpolation) | |
| if not isinstance(angle, (int, float)): | |
| raise TypeError("Argument angle should be int or float") | |
| if center is not None and not isinstance(center, (list, tuple)): | |
| raise TypeError("Argument center should be a sequence") | |
| if not isinstance(interpolation, InterpolationMode): | |
| raise TypeError("Argument interpolation should be a InterpolationMode") | |
| if not isinstance(img, torch.Tensor): | |
| pil_interpolation = pil_modes_mapping[interpolation] | |
| return F_pil.rotate(img, angle=angle, interpolation=pil_interpolation, expand=expand, center=center, fill=fill) | |
| center_f = [0.0, 0.0] | |
| if center is not None: | |
| img_size = _get_image_size(img) | |
| # Center values should be in pixel coordinates but translated such that (0, 0) corresponds to image center. | |
| center_f = [1.0 * (c - s * 0.5) for c, s in zip(center, img_size)] | |
| # due to current incoherence of rotation angle direction between affine and rotate implementations | |
| # we need to set -angle. | |
| matrix = _get_inverse_affine_matrix(center_f, -angle, [0.0, 0.0], 1.0, [0.0, 0.0]) | |
| return F_t.rotate(img, matrix=matrix, interpolation=interpolation.value, expand=expand, fill=fill) | |
| def affine( | |
| img: Tensor, angle: float, translate: List[int], scale: float, shear: List[float], | |
| interpolation: InterpolationMode = InterpolationMode.NEAREST, fill: Optional[List[float]] = None, | |
| resample: Optional[int] = None, fillcolor: Optional[List[float]] = None | |
| ) -> Tensor: | |
| """Apply affine transformation on the image keeping image center invariant. | |
| If the image is torch Tensor, it is expected | |
| to have [..., H, W] shape, where ... means an arbitrary number of leading dimensions. | |
| Args: | |
| img (PIL Image or Tensor): image to transform. | |
| angle (number): rotation angle in degrees between -180 and 180, clockwise direction. | |
| translate (sequence of integers): horizontal and vertical translations (post-rotation translation) | |
| scale (float): overall scale | |
| shear (float or sequence): shear angle value in degrees between -180 to 180, clockwise direction. | |
| If a sequence is specified, the first value corresponds to a shear parallel to the x axis, while | |
| the second value corresponds to a shear parallel to the y axis. | |
| interpolation (InterpolationMode): Desired interpolation enum defined by | |
| :class:`torchvision.transforms.InterpolationMode`. Default is ``InterpolationMode.NEAREST``. | |
| If input is Tensor, only ``InterpolationMode.NEAREST``, ``InterpolationMode.BILINEAR`` are supported. | |
| For backward compatibility integer values (e.g. ``PIL.Image.NEAREST``) are still acceptable. | |
| fill (sequence or number, optional): Pixel fill value for the area outside the transformed | |
| image. If given a number, the value is used for all bands respectively. | |
| .. note:: | |
| In torchscript mode single int/float value is not supported, please use a sequence | |
| of length 1: ``[value, ]``. | |
| fillcolor (sequence, int, float): deprecated argument and will be removed since v0.10.0. | |
| Please use the ``fill`` parameter instead. | |
| resample (int, optional): deprecated argument and will be removed since v0.10.0. | |
| Please use the ``interpolation`` parameter instead. | |
| Returns: | |
| PIL Image or Tensor: Transformed image. | |
| """ | |
| if resample is not None: | |
| warnings.warn( | |
| "Argument resample is deprecated and will be removed since v0.10.0. Please, use interpolation instead" | |
| ) | |
| interpolation = _interpolation_modes_from_int(resample) | |
| # Backward compatibility with integer value | |
| if isinstance(interpolation, int): | |
| warnings.warn( | |
| "Argument interpolation should be of type InterpolationMode instead of int. " | |
| "Please, use InterpolationMode enum." | |
| ) | |
| interpolation = _interpolation_modes_from_int(interpolation) | |
| if fillcolor is not None: | |
| warnings.warn( | |
| "Argument fillcolor is deprecated and will be removed since v0.10.0. Please, use fill instead" | |
| ) | |
| fill = fillcolor | |
| if not isinstance(angle, (int, float)): | |
| raise TypeError("Argument angle should be int or float") | |
| if not isinstance(translate, (list, tuple)): | |
| raise TypeError("Argument translate should be a sequence") | |
| if len(translate) != 2: | |
| raise ValueError("Argument translate should be a sequence of length 2") | |
| if scale <= 0.0: | |
| raise ValueError("Argument scale should be positive") | |
| if not isinstance(shear, (numbers.Number, (list, tuple))): | |
| raise TypeError("Shear should be either a single value or a sequence of two values") | |
| if not isinstance(interpolation, InterpolationMode): | |
| raise TypeError("Argument interpolation should be a InterpolationMode") | |
| if isinstance(angle, int): | |
| angle = float(angle) | |
| if isinstance(translate, tuple): | |
| translate = list(translate) | |
| if isinstance(shear, numbers.Number): | |
| shear = [shear, 0.0] | |
| if isinstance(shear, tuple): | |
| shear = list(shear) | |
| if len(shear) == 1: | |
| shear = [shear[0], shear[0]] | |
| if len(shear) != 2: | |
| raise ValueError("Shear should be a sequence containing two values. Got {}".format(shear)) | |
| img_size = _get_image_size(img) | |
| if not isinstance(img, torch.Tensor): | |
| # center = (img_size[0] * 0.5 + 0.5, img_size[1] * 0.5 + 0.5) | |
| # it is visually better to estimate the center without 0.5 offset | |
| # otherwise image rotated by 90 degrees is shifted vs output image of torch.rot90 or F_t.affine | |
| center = [img_size[0] * 0.5, img_size[1] * 0.5] | |
| matrix = _get_inverse_affine_matrix(center, angle, translate, scale, shear) | |
| pil_interpolation = pil_modes_mapping[interpolation] | |
| return F_pil.affine(img, matrix=matrix, interpolation=pil_interpolation, fill=fill) | |
| translate_f = [1.0 * t for t in translate] | |
| matrix = _get_inverse_affine_matrix([0.0, 0.0], angle, translate_f, scale, shear) | |
| return F_t.affine(img, matrix=matrix, interpolation=interpolation.value, fill=fill) | |
| def to_grayscale(img, num_output_channels=1): | |
| """Convert PIL image of any mode (RGB, HSV, LAB, etc) to grayscale version of image. | |
| This transform does not support torch Tensor. | |
| Args: | |
| img (PIL Image): PIL Image to be converted to grayscale. | |
| num_output_channels (int): number of channels of the output image. Value can be 1 or 3. Default is 1. | |
| Returns: | |
| PIL Image: Grayscale version of the image. | |
| - if num_output_channels = 1 : returned image is single channel | |
| - if num_output_channels = 3 : returned image is 3 channel with r = g = b | |
| """ | |
| if isinstance(img, Image.Image): | |
| return F_pil.to_grayscale(img, num_output_channels) | |
| raise TypeError("Input should be PIL Image") | |
| def rgb_to_grayscale(img: Tensor, num_output_channels: int = 1) -> Tensor: | |
| """Convert RGB image to grayscale version of image. | |
| If the image is torch Tensor, it is expected | |
| to have [..., 3, H, W] shape, where ... means an arbitrary number of leading dimensions | |
| Note: | |
| Please, note that this method supports only RGB images as input. For inputs in other color spaces, | |
| please, consider using meth:`~torchvision.transforms.functional.to_grayscale` with PIL Image. | |
| Args: | |
| img (PIL Image or Tensor): RGB Image to be converted to grayscale. | |
| num_output_channels (int): number of channels of the output image. Value can be 1 or 3. Default, 1. | |
| Returns: | |
| PIL Image or Tensor: Grayscale version of the image. | |
| - if num_output_channels = 1 : returned image is single channel | |
| - if num_output_channels = 3 : returned image is 3 channel with r = g = b | |
| """ | |
| if not isinstance(img, torch.Tensor): | |
| return F_pil.to_grayscale(img, num_output_channels) | |
| return F_t.rgb_to_grayscale(img, num_output_channels) | |
| def erase(img: Tensor, i: int, j: int, h: int, w: int, v: Tensor, inplace: bool = False) -> Tensor: | |
| """ Erase the input Tensor Image with given value. | |
| This transform does not support PIL Image. | |
| Args: | |
| img (Tensor Image): Tensor image of size (C, H, W) to be erased | |
| i (int): i in (i,j) i.e coordinates of the upper left corner. | |
| j (int): j in (i,j) i.e coordinates of the upper left corner. | |
| h (int): Height of the erased region. | |
| w (int): Width of the erased region. | |
| v: Erasing value. | |
| inplace(bool, optional): For in-place operations. By default is set False. | |
| Returns: | |
| Tensor Image: Erased image. | |
| """ | |
| if not isinstance(img, torch.Tensor): | |
| raise TypeError('img should be Tensor Image. Got {}'.format(type(img))) | |
| if not inplace: | |
| img = img.clone() | |
| img[..., i:i + h, j:j + w] = v | |
| return img | |
| def gaussian_blur(img: Tensor, kernel_size: List[int], sigma: Optional[List[float]] = None) -> Tensor: | |
| """Performs Gaussian blurring on the image by given kernel. | |
| If the image is torch Tensor, it is expected | |
| to have [..., H, W] shape, where ... means an arbitrary number of leading dimensions. | |
| Args: | |
| img (PIL Image or Tensor): Image to be blurred | |
| kernel_size (sequence of ints or int): Gaussian kernel size. Can be a sequence of integers | |
| like ``(kx, ky)`` or a single integer for square kernels. | |
| .. note:: | |
| In torchscript mode kernel_size as single int is not supported, use a sequence of | |
| length 1: ``[ksize, ]``. | |
| sigma (sequence of floats or float, optional): Gaussian kernel standard deviation. Can be a | |
| sequence of floats like ``(sigma_x, sigma_y)`` or a single float to define the | |
| same sigma in both X/Y directions. If None, then it is computed using | |
| ``kernel_size`` as ``sigma = 0.3 * ((kernel_size - 1) * 0.5 - 1) + 0.8``. | |
| Default, None. | |
| .. note:: | |
| In torchscript mode sigma as single float is | |
| not supported, use a sequence of length 1: ``[sigma, ]``. | |
| Returns: | |
| PIL Image or Tensor: Gaussian Blurred version of the image. | |
| """ | |
| if not isinstance(kernel_size, (int, list, tuple)): | |
| raise TypeError('kernel_size should be int or a sequence of integers. Got {}'.format(type(kernel_size))) | |
| if isinstance(kernel_size, int): | |
| kernel_size = [kernel_size, kernel_size] | |
| if len(kernel_size) != 2: | |
| raise ValueError('If kernel_size is a sequence its length should be 2. Got {}'.format(len(kernel_size))) | |
| for ksize in kernel_size: | |
| if ksize % 2 == 0 or ksize < 0: | |
| raise ValueError('kernel_size should have odd and positive integers. Got {}'.format(kernel_size)) | |
| if sigma is None: | |
| sigma = [ksize * 0.15 + 0.35 for ksize in kernel_size] | |
| if sigma is not None and not isinstance(sigma, (int, float, list, tuple)): | |
| raise TypeError('sigma should be either float or sequence of floats. Got {}'.format(type(sigma))) | |
| if isinstance(sigma, (int, float)): | |
| sigma = [float(sigma), float(sigma)] | |
| if isinstance(sigma, (list, tuple)) and len(sigma) == 1: | |
| sigma = [sigma[0], sigma[0]] | |
| if len(sigma) != 2: | |
| raise ValueError('If sigma is a sequence, its length should be 2. Got {}'.format(len(sigma))) | |
| for s in sigma: | |
| if s <= 0.: | |
| raise ValueError('sigma should have positive values. Got {}'.format(sigma)) | |
| t_img = img | |
| if not isinstance(img, torch.Tensor): | |
| if not F_pil._is_pil_image(img): | |
| raise TypeError('img should be PIL Image or Tensor. Got {}'.format(type(img))) | |
| t_img = to_tensor(img) | |
| output = F_t.gaussian_blur(t_img, kernel_size, sigma) | |
| if not isinstance(img, torch.Tensor): | |
| output = to_pil_image(output) | |
| return output | |
| def invert(img: Tensor) -> Tensor: | |
| """Invert the colors of an RGB/grayscale image. | |
| Args: | |
| img (PIL Image or Tensor): Image to have its colors inverted. | |
| If img is torch Tensor, it is expected to be in [..., 1 or 3, H, W] format, | |
| where ... means it can have an arbitrary number of leading dimensions. | |
| If img is PIL Image, it is expected to be in mode "L" or "RGB". | |
| Returns: | |
| PIL Image or Tensor: Color inverted image. | |
| """ | |
| if not isinstance(img, torch.Tensor): | |
| return F_pil.invert(img) | |
| return F_t.invert(img) | |
| def posterize(img: Tensor, bits: int) -> Tensor: | |
| """Posterize an image by reducing the number of bits for each color channel. | |
| Args: | |
| img (PIL Image or Tensor): Image to have its colors posterized. | |
| If img is torch Tensor, it should be of type torch.uint8 and | |
| it is expected to be in [..., 1 or 3, H, W] format, where ... means | |
| it can have an arbitrary number of leading dimensions. | |
| If img is PIL Image, it is expected to be in mode "L" or "RGB". | |
| bits (int): The number of bits to keep for each channel (0-8). | |
| Returns: | |
| PIL Image or Tensor: Posterized image. | |
| """ | |
| if not (0 <= bits <= 8): | |
| raise ValueError('The number if bits should be between 0 and 8. Got {}'.format(bits)) | |
| if not isinstance(img, torch.Tensor): | |
| return F_pil.posterize(img, bits) | |
| return F_t.posterize(img, bits) | |
| def solarize(img: Tensor, threshold: float) -> Tensor: | |
| """Solarize an RGB/grayscale image by inverting all pixel values above a threshold. | |
| Args: | |
| img (PIL Image or Tensor): Image to have its colors inverted. | |
| If img is torch Tensor, it is expected to be in [..., 1 or 3, H, W] format, | |
| where ... means it can have an arbitrary number of leading dimensions. | |
| If img is PIL Image, it is expected to be in mode "L" or "RGB". | |
| threshold (float): All pixels equal or above this value are inverted. | |
| Returns: | |
| PIL Image or Tensor: Solarized image. | |
| """ | |
| if not isinstance(img, torch.Tensor): | |
| return F_pil.solarize(img, threshold) | |
| return F_t.solarize(img, threshold) | |
| def adjust_sharpness(img: Tensor, sharpness_factor: float) -> Tensor: | |
| """Adjust the sharpness of an image. | |
| Args: | |
| img (PIL Image or Tensor): Image to be adjusted. | |
| If img is torch Tensor, it is expected to be in [..., 1 or 3, H, W] format, | |
| where ... means it can have an arbitrary number of leading dimensions. | |
| sharpness_factor (float): How much to adjust the sharpness. Can be | |
| any non negative number. 0 gives a blurred image, 1 gives the | |
| original image while 2 increases the sharpness by a factor of 2. | |
| Returns: | |
| PIL Image or Tensor: Sharpness adjusted image. | |
| """ | |
| if not isinstance(img, torch.Tensor): | |
| return F_pil.adjust_sharpness(img, sharpness_factor) | |
| return F_t.adjust_sharpness(img, sharpness_factor) | |
| def autocontrast(img: Tensor) -> Tensor: | |
| """Maximize contrast of an image by remapping its | |
| pixels per channel so that the lowest becomes black and the lightest | |
| becomes white. | |
| Args: | |
| img (PIL Image or Tensor): Image on which autocontrast is applied. | |
| If img is torch Tensor, it is expected to be in [..., 1 or 3, H, W] format, | |
| where ... means it can have an arbitrary number of leading dimensions. | |
| If img is PIL Image, it is expected to be in mode "L" or "RGB". | |
| Returns: | |
| PIL Image or Tensor: An image that was autocontrasted. | |
| """ | |
| if not isinstance(img, torch.Tensor): | |
| return F_pil.autocontrast(img) | |
| return F_t.autocontrast(img) | |
| def equalize(img: Tensor) -> Tensor: | |
| """Equalize the histogram of an image by applying | |
| a non-linear mapping to the input in order to create a uniform | |
| distribution of grayscale values in the output. | |
| Args: | |
| img (PIL Image or Tensor): Image on which equalize is applied. | |
| If img is torch Tensor, it is expected to be in [..., 1 or 3, H, W] format, | |
| where ... means it can have an arbitrary number of leading dimensions. | |
| The tensor dtype must be ``torch.uint8`` and values are expected to be in ``[0, 255]``. | |
| If img is PIL Image, it is expected to be in mode "P", "L" or "RGB". | |
| Returns: | |
| PIL Image or Tensor: An image that was equalized. | |
| """ | |
| if not isinstance(img, torch.Tensor): | |
| return F_pil.equalize(img) | |
| return F_t.equalize(img) | |