Spaces:
Build error
Build error
| # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. | |
| # SPDX-License-Identifier: Apache-2.0 | |
| # | |
| # Licensed under the Apache License, Version 2.0 (the "License"); | |
| # you may not use this file except in compliance with the License. | |
| # You may obtain a copy of the License at | |
| # | |
| # http://www.apache.org/licenses/LICENSE-2.0 | |
| # | |
| # Unless required by applicable law or agreed to in writing, software | |
| # distributed under the License is distributed on an "AS IS" BASIS, | |
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
| # See the License for the specific language governing permissions and | |
| # limitations under the License. | |
| from typing import Any, Optional | |
| import torch | |
| import torchvision.transforms.functional as transforms_F | |
| from PIL import Image | |
| def obtain_image_size(data_dict: dict, input_keys: list) -> tuple[int, int]: | |
| r"""Function for obtaining the image size from the data dict. | |
| Args: | |
| data_dict (dict): Input data dict | |
| input_keys (list): List of input keys | |
| Returns: | |
| width (int): Width of the input image | |
| height (int): Height of the input image | |
| """ | |
| data1 = data_dict[input_keys[0]] | |
| if isinstance(data1, Image.Image): | |
| width, height = data1.size | |
| elif isinstance(data1, torch.Tensor): | |
| height, width = data1.size()[-2:] | |
| else: | |
| raise ValueError("data to random crop should be PIL Image or tensor") | |
| return width, height | |
| class Augmentor: | |
| def __init__(self, input_keys: list, output_keys: Optional[list] = None, args: Optional[dict] = None) -> None: | |
| r"""Base augmentor class | |
| Args: | |
| input_keys (list): List of input keys | |
| output_keys (list): List of output keys | |
| args (dict): Arguments associated with the augmentation | |
| """ | |
| self.input_keys = input_keys | |
| self.output_keys = output_keys | |
| self.args = args | |
| def __call__(self, *args: Any, **kwds: Any) -> Any: | |
| raise ValueError("Augmentor not implemented") | |
| class ResizeSmallestSideAspectPreserving(Augmentor): | |
| def __init__(self, input_keys: list, output_keys: Optional[list] = None, args: Optional[dict] = None) -> None: | |
| super().__init__(input_keys, output_keys, args) | |
| def __call__(self, data_dict: dict) -> dict: | |
| r"""Performs aspect-ratio preserving resizing. | |
| Image is resized to the dimension which has the smaller ratio of (size / target_size). | |
| First we compute (w_img / w_target) and (h_img / h_target) and resize the image | |
| to the dimension that has the smaller of these ratios. | |
| Args: | |
| data_dict (dict): Input data dict | |
| Returns: | |
| data_dict (dict): Output dict where images are resized | |
| """ | |
| if self.output_keys is None: | |
| self.output_keys = self.input_keys | |
| assert self.args is not None, "Please specify args in augmentations" | |
| img_w, img_h = self.args["img_w"], self.args["img_h"] | |
| orig_w, orig_h = obtain_image_size(data_dict, self.input_keys) | |
| scaling_ratio = max((img_w / orig_w), (img_h / orig_h)) | |
| target_size = (int(scaling_ratio * orig_h + 0.5), int(scaling_ratio * orig_w + 0.5)) | |
| assert ( | |
| target_size[0] >= img_h and target_size[1] >= img_w | |
| ), f"Resize error. orig {(orig_w, orig_h)} desire {(img_w, img_h)} compute {target_size}" | |
| for inp_key, out_key in zip(self.input_keys, self.output_keys): | |
| data_dict[out_key] = transforms_F.resize( | |
| data_dict[inp_key], | |
| size=target_size, # type: ignore | |
| interpolation=getattr(self.args, "interpolation", transforms_F.InterpolationMode.BICUBIC), | |
| antialias=True, | |
| ) | |
| if out_key != inp_key: | |
| del data_dict[inp_key] | |
| return data_dict | |
| class CenterCrop(Augmentor): | |
| def __init__(self, input_keys: list, output_keys: Optional[list] = None, args: Optional[dict] = None) -> None: | |
| super().__init__(input_keys, output_keys, args) | |
| def __call__(self, data_dict: dict) -> dict: | |
| r"""Performs center crop. | |
| Args: | |
| data_dict (dict): Input data dict | |
| Returns: | |
| data_dict (dict): Output dict where images are center cropped. | |
| We also save the cropping parameters in the aug_params dict | |
| so that it will be used by other transforms. | |
| """ | |
| assert ( | |
| (self.args is not None) and ("img_w" in self.args) and ("img_h" in self.args) | |
| ), "Please specify size in args" | |
| img_w, img_h = self.args["img_w"], self.args["img_h"] | |
| orig_w, orig_h = obtain_image_size(data_dict, self.input_keys) | |
| for key in self.input_keys: | |
| data_dict[key] = transforms_F.center_crop(data_dict[key], [img_h, img_w]) | |
| # We also add the aug params we use. This will be useful for other transforms | |
| crop_x0 = (orig_w - img_w) // 2 | |
| crop_y0 = (orig_h - img_h) // 2 | |
| cropping_params = { | |
| "resize_w": orig_w, | |
| "resize_h": orig_h, | |
| "crop_x0": crop_x0, | |
| "crop_y0": crop_y0, | |
| "crop_w": img_w, | |
| "crop_h": img_h, | |
| } | |
| if "aug_params" not in data_dict: | |
| data_dict["aug_params"] = dict() | |
| data_dict["aug_params"]["cropping"] = cropping_params | |
| data_dict["padding_mask"] = torch.zeros((1, cropping_params["crop_h"], cropping_params["crop_w"])) | |
| return data_dict | |
| class Normalize(Augmentor): | |
| def __init__(self, input_keys: list, output_keys: Optional[list] = None, args: Optional[dict] = None) -> None: | |
| super().__init__(input_keys, output_keys, args) | |
| def __call__(self, data_dict: dict) -> dict: | |
| r"""Performs data normalization. | |
| Args: | |
| data_dict (dict): Input data dict | |
| Returns: | |
| data_dict (dict): Output dict where images are center cropped. | |
| """ | |
| assert self.args is not None, "Please specify args" | |
| mean = self.args["mean"] | |
| std = self.args["std"] | |
| for key in self.input_keys: | |
| if isinstance(data_dict[key], torch.Tensor): | |
| data_dict[key] = data_dict[key].to(dtype=torch.get_default_dtype()).div(255) | |
| else: | |
| data_dict[key] = transforms_F.to_tensor(data_dict[key]) # division by 255 is applied in to_tensor() | |
| data_dict[key] = transforms_F.normalize(tensor=data_dict[key], mean=mean, std=std) | |
| return data_dict | |