Spaces:
Sleeping
Sleeping
| # Copyright (c) Meta Platforms, Inc. and affiliates. | |
| # All rights reserved. | |
| # This source code is licensed under the license found in the | |
| # LICENSE file in the root directory of this source tree. | |
| import numpy as np | |
| import torch | |
| from torch.nn import functional as F | |
| from torchvision.transforms.functional import resize, to_pil_image # type: ignore | |
| from typing import List | |
| from copy import deepcopy | |
| from typing import Tuple | |
| class ResizeLongestSide: | |
| """ | |
| Resizes images to the longest side 'target_length', as well as provides | |
| methods for resizing coordinates and boxes. Provides methods for | |
| transforming both numpy array and batched torch tensors. | |
| """ | |
| def __init__(self, target_length: int, | |
| pixel_mean: List[float] = [123.675, 116.28, 103.53], | |
| pixel_std: List[float] = [58.395, 57.12, 57.375],) -> None: | |
| self.target_length = target_length | |
| self.pixel_mean = torch.Tensor(pixel_mean).view(-1, 1, 1) | |
| self.pixel_std = torch.Tensor(pixel_std).view(-1, 1, 1) | |
| def apply_image(self, image: np.ndarray) -> np.ndarray: | |
| """ | |
| Expects a numpy array with shape HxWxC in uint8 format. | |
| """ | |
| target_size = self.get_preprocess_shape(image.shape[0], image.shape[1], self.target_length) | |
| return np.array(resize(to_pil_image(image), target_size)) | |
| def apply_coords(self, coords: np.ndarray, original_size: Tuple[int, ...]) -> np.ndarray: | |
| """ | |
| Expects a numpy array of length 2 in the final dimension. Requires the | |
| original image size in (H, W) format. | |
| """ | |
| old_h, old_w = original_size | |
| new_h, new_w = self.get_preprocess_shape( | |
| original_size[0], original_size[1], self.target_length | |
| ) | |
| coords = deepcopy(coords).astype(float) | |
| coords[..., 0] = coords[..., 0] * (new_w / old_w) | |
| coords[..., 1] = coords[..., 1] * (new_h / old_h) | |
| return coords | |
| def apply_boxes(self, boxes: np.ndarray, original_size: Tuple[int, ...]) -> np.ndarray: | |
| """ | |
| Expects a numpy array shape Bx4. Requires the original image size | |
| in (H, W) format. | |
| """ | |
| boxes = self.apply_coords(boxes.reshape(-1, 2, 2), original_size) | |
| return boxes.reshape(-1, 4) | |
| def apply_image_torch(self, image: torch.Tensor) -> torch.Tensor: | |
| """ | |
| Expects batched images with shape BxCxHxW and float format. This | |
| transformation may not exactly match apply_image. apply_image is | |
| the transformation expected by the model. | |
| """ | |
| # Expects an image in BCHW format. May not exactly match apply_image. | |
| target_size = self.get_preprocess_shape(image.shape[-2], image.shape[-1], self.target_length) | |
| if len(image.shape) == 3: | |
| image = image.unsqueeze(0) | |
| image = F.interpolate( | |
| image, target_size, | |
| mode="bilinear", | |
| align_corners=False, | |
| antialias=True | |
| ) | |
| return image.squeeze(0) | |
| elif len(image.shape) == 2: | |
| image = image.unsqueeze(0).unsqueeze(0) | |
| image = F.interpolate( | |
| image, target_size, | |
| mode="bilinear", | |
| align_corners=False, | |
| antialias=True | |
| ) | |
| return image.squeeze(0).squeeze(0) | |
| else: | |
| return F.interpolate( | |
| image, target_size, mode="bilinear", align_corners=False, antialias=True | |
| ) | |
| def preprocess(self, x: torch.Tensor) -> torch.Tensor: | |
| """Normalize pixel values and pad to a square input.""" | |
| # Normalize colors | |
| if len(x.shape)==2: | |
| pass | |
| else: | |
| device = x.device | |
| x = (x - self.pixel_mean.to(device)) / self.pixel_std.to(device) # TODO uncomment this | |
| # x = x / 255 | |
| pass | |
| # Pad | |
| h, w = x.shape[-2:] | |
| padh = self.target_length - h | |
| padw = self.target_length - w | |
| x = F.pad(x, (0, padw, 0, padh)) | |
| return x | |
| def apply_coords_torch( | |
| self, coords: torch.Tensor, original_size: Tuple[int, ...] | |
| ) -> torch.Tensor: | |
| """ | |
| Expects a torch tensor with length 2 in the last dimension. Requires the | |
| original image size in (H, W) format. | |
| """ | |
| old_h, old_w = original_size | |
| new_h, new_w = self.get_preprocess_shape( | |
| original_size[0], original_size[1], self.target_length | |
| ) | |
| coords = deepcopy(coords).to(torch.float) | |
| coords[..., 0] = coords[..., 0] * (new_w / old_w) | |
| coords[..., 1] = coords[..., 1] * (new_h / old_h) | |
| return coords | |
| def apply_boxes_torch( | |
| self, boxes: torch.Tensor, original_size: Tuple[int, ...] | |
| ) -> torch.Tensor: | |
| """ | |
| Expects a torch tensor with shape Bx4. Requires the original image | |
| size in (H, W) format. | |
| """ | |
| boxes = self.apply_coords_torch(boxes.reshape(-1, 2, 2), original_size) | |
| return boxes.reshape(-1, 4) | |
| def get_preprocess_shape(oldh: int, oldw: int, long_side_length: int) -> Tuple[int, int]: | |
| """ | |
| Compute the output size given input size and target long side length. | |
| """ | |
| scale = long_side_length * 1.0 / max(oldh, oldw) | |
| newh, neww = oldh * scale, oldw * scale | |
| neww = int(neww + 0.5) | |
| newh = int(newh + 0.5) | |
| return (newh, neww) | |