| |
| |
| import torch |
|
|
|
|
| def _is_tensor_video_clip(clip): |
| if not torch.is_tensor(clip): |
| raise TypeError("clip should be Tesnor. Got %s" % type(clip)) |
|
|
| if not clip.ndimension() == 4: |
| raise ValueError("clip should be 4D. Got %dD" % clip.dim()) |
|
|
| return True |
|
|
|
|
| def crop(clip, i, j, h, w): |
| """ |
| Args: |
| clip (torch.tensor): Video clip to be cropped. Size is (C, T, H, W) |
| """ |
| assert len(clip.size()) == 4, "clip should be a 4D tensor" |
| return clip[..., i:i + h, j:j + w] |
|
|
|
|
| def resize(clip, target_size, interpolation_mode): |
| assert len(target_size) == 2, "target size should be tuple (height, width)" |
| return torch.nn.functional.interpolate( |
| clip, size=target_size, mode=interpolation_mode |
| ) |
|
|
|
|
| def resized_crop(clip, i, j, h, w, size, interpolation_mode="bilinear"): |
| """ |
| Do spatial cropping and resizing to the video clip |
| Args: |
| clip (torch.tensor): Video clip to be cropped. Size is (C, T, H, W) |
| i (int): i in (i,j) i.e coordinates of the upper left corner. |
| j (int): j in (i,j) i.e coordinates of the upper left corner. |
| h (int): Height of the cropped region. |
| w (int): Width of the cropped region. |
| size (tuple(int, int)): height and width of resized clip |
| Returns: |
| clip (torch.tensor): Resized and cropped clip. Size is (C, T, H, W) |
| """ |
| assert _is_tensor_video_clip(clip), "clip should be a 4D torch.tensor" |
| clip = crop(clip, i, j, h, w) |
| clip = resize(clip, size, interpolation_mode) |
| return clip |
|
|
|
|
| def center_crop(clip, crop_size): |
| assert _is_tensor_video_clip(clip), "clip should be a 4D torch.tensor" |
| h, w = clip.size(-2), clip.size(-1) |
| th, tw = crop_size |
| assert h >= th and w >= tw, "height and width must be no smaller than crop_size" |
|
|
| i = int(round((h - th) / 2.0)) |
| j = int(round((w - tw) / 2.0)) |
| return crop(clip, i, j, th, tw) |
|
|
| def corner_crop(clip, crop_size, i, j): |
| assert _is_tensor_video_clip(clip),"clip should be a 4d torch tensor" |
| h, w = clip.size(-2), clip.size(-1) |
| th, tw = crop_size |
| assert h>=th and w>=tw, "height and width must be no smaller than crop_size" |
| return crop(clip, i, j, th, tw) |
|
|
|
|
| def to_tensor(clip): |
| """ |
| Convert tensor data type from uint8 to float, divide value by 255.0 and |
| permute the dimenions of clip tensor |
| Args: |
| clip (torch.tensor, dtype=torch.uint8): Size is (T, H, W, C) |
| Return: |
| """ |
| _is_tensor_video_clip(clip) |
| if not clip.dtype == torch.uint8: |
| raise TypeError("clip tensor should have data type uint8. Got %s" % str(clip.dtype)) |
| return clip.float().permute(3, 0, 1, 2) / 255.0 |
|
|
|
|
| def normalize(clip, mean, std, inplace=False): |
| """ |
| Args: |
| clip (torch.tensor): Video clip to be normalized. Size is (C, T, H, W) |
| mean (tuple): pixel RGB mean. Size is (3) |
| std (tuple): pixel standard deviation. Size is (3) |
| Returns: |
| normalized clip (torch.tensor): Size is (C, T, H, W) |
| """ |
| assert _is_tensor_video_clip(clip), "clip should be a 4D torch.tensor" |
| if not inplace: |
| clip = clip.clone() |
| mean = torch.as_tensor(mean, dtype=clip.dtype, device=clip.device) |
| std = torch.as_tensor(std, dtype=clip.dtype, device=clip.device) |
| if clip.size(0) == 3: |
| clip.sub_(mean[:, None, None, None]).div_(std[:, None, None, None]) |
| elif clip.size(0) == 1: |
| |
| mean = mean.mean() |
| std = std.mean() |
| clip.sub_(mean).div_(std) |
| else: |
| raise NotImplementedError() |
| return clip |
|
|
|
|
| def hflip(clip): |
| """ |
| Args: |
| clip (torch.tensor): Video clip to be normalized. Size is (C, T, H, W) |
| Returns: |
| flipped clip (torch.tensor): Size is (C, T, H, W) |
| """ |
| assert _is_tensor_video_clip(clip), "clip should be a 4D torch.tensor" |
| return clip.flip((-1)) |
|
|