Spaces:
Running
Running
| """This module contains transforms for videos.""" | |
| import numbers | |
| import random | |
| import numpy as np | |
| from torchvision.transforms import RandomResizedCrop | |
| from . import functional_video as F | |
| __all__ = [ | |
| "RandomResizedCropVideo", | |
| "CenterCropVideo", | |
| "NormalizeVideo", | |
| "ToTensorVideo", | |
| "RandomHorizontalFlipVideo", | |
| ] | |
| class ResizeVideo: | |
| def __init__(self, size, interpolation_mode="bilinear"): | |
| self.size = size | |
| self.interpolation_mode = interpolation_mode | |
| def __call__(self, clip): | |
| return F.resize(clip, self.size, self.interpolation_mode) | |
| class RandomResizedCropVideo(RandomResizedCrop): | |
| def __init__( | |
| self, | |
| size, | |
| crop, | |
| interpolation_mode="bilinear", | |
| ): | |
| if isinstance(size, tuple): | |
| assert len(size) == 2, "size should be tuple (height, width)" | |
| self.size = size | |
| else: | |
| self.size = (size, size) | |
| self.interpolation_mode = interpolation_mode | |
| self.crop = crop | |
| def __call__(self, clip): | |
| """ | |
| Args: | |
| clip (torch.tensor): Video clip to be cropped. Size is (C, T, H, W) | |
| Returns: | |
| torch.tensor: randomly cropped/resized video clip. | |
| size is (C, T, H, W) | |
| """ | |
| clip = F.resize(clip, self.size, self.interpolation_mode) | |
| # print(clip.shape) | |
| if clip.shape[2] - self.crop > 0: | |
| i = np.random.randint(clip.shape[2] - self.crop) | |
| else: | |
| i = 0 | |
| if clip.shape[3] - self.crop > 0: | |
| j = np.random.randint(clip.shape[3] - self.crop) | |
| else: | |
| j = 0 | |
| clip = clip[..., i : i + self.crop, j : j + self.crop] | |
| return clip | |
| def __repr__(self): | |
| return ( | |
| self.__class__.__name__ | |
| + f"(size={self.size}, interpolation_mode={self.interpolation_mode}, " | |
| + f"scale={self.scale}, ratio={self.ratio})" | |
| ) | |
| class CenterCropVideo: | |
| def __init__(self, crop_size): | |
| if isinstance(crop_size, numbers.Number): | |
| self.crop_size = (int(crop_size), int(crop_size)) | |
| else: | |
| self.crop_size = crop_size | |
| def __call__(self, clip): | |
| """ | |
| Args: | |
| clip (torch.tensor): Video clip to be cropped. Size is (C, T, H, W) | |
| Returns: | |
| torch.tensor: central cropping of video clip. Size is | |
| (C, T, crop_size, crop_size) | |
| """ | |
| return F.center_crop(clip, self.crop_size) | |
| def __repr__(self): | |
| return self.__class__.__name__ + f"(crop_size={self.crop_size})" | |
| class NormalizeVideo: | |
| """ | |
| Normalize the video clip by mean subtraction and division by standard deviation | |
| Args: | |
| mean (3-tuple): pixel RGB mean | |
| std (3-tuple): pixel RGB standard deviation | |
| inplace (boolean): whether do in-place normalization | |
| """ | |
| def __init__(self, mean, std, inplace=False): | |
| self.mean = mean | |
| self.std = std | |
| self.inplace = inplace | |
| def __call__(self, clip): | |
| """ | |
| Args: | |
| clip (torch.tensor): video clip to be normalized. Size is (C, T, H, W) | |
| """ | |
| return F.normalize(clip, self.mean, self.std, self.inplace) | |
| def __repr__(self): | |
| return ( | |
| self.__class__.__name__ | |
| + f"(mean={self.mean}, std={self.std}, inplace={self.inplace})" | |
| ) | |
| class ToTensorVideo: | |
| """Convert tensor data type from uint8 to float, divide value by 255.0 and | |
| permute the dimenions of clip tensor.""" | |
| def __init__(self): | |
| pass | |
| def __call__(self, clip): | |
| """ | |
| Args: | |
| clip (torch.tensor, dtype=torch.uint8): Size is (T, H, W, C) | |
| Return: | |
| clip (torch.tensor, dtype=torch.float): Size is (C, T, H, W) | |
| """ | |
| return F.to_tensor(clip) | |
| def __repr__(self): | |
| return self.__class__.__name__ | |
| class RandomHorizontalFlipVideo: | |
| """ | |
| Flip the video clip along the horizonal direction with a given probability | |
| Args: | |
| p (float): probability of the clip being flipped. Default value is 0.5 | |
| """ | |
| def __init__(self, p=0.5): | |
| self.p = p | |
| def __call__(self, clip): | |
| """ | |
| Args: | |
| clip (torch.tensor): Size is (C, T, H, W) | |
| Return: | |
| clip (torch.tensor): Size is (C, T, H, W) | |
| """ | |
| if random.random() < self.p: | |
| clip = F.hflip(clip) | |
| return clip | |
| def __repr__(self): | |
| return self.__class__.__name__ + f"(p={self.p})" | |