File size: 6,004 Bytes
17c6d62 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 |
# coding=utf-8
# Copyright 2023 The Intel Labs Team Authors, The Microsoft Research Team Authors and HuggingFace Inc. team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import unittest
from typing import Dict, List, Optional, Union
import numpy as np
from transformers.testing_utils import require_torch, require_vision
from transformers.utils import is_vision_available
from ...test_image_processing_common import ImageProcessingTestMixin, prepare_image_inputs
if is_vision_available():
from PIL import Image
from transformers import BridgeTowerImageProcessor
class BridgeTowerImageProcessingTester:
def __init__(
self,
parent,
do_resize: bool = True,
size: Dict[str, int] = None,
size_divisor: int = 32,
do_rescale: bool = True,
rescale_factor: Union[int, float] = 1 / 255,
do_normalize: bool = True,
do_center_crop: bool = True,
image_mean: Optional[Union[float, List[float]]] = [0.48145466, 0.4578275, 0.40821073],
image_std: Optional[Union[float, List[float]]] = [0.26862954, 0.26130258, 0.27577711],
do_pad: bool = True,
batch_size=7,
min_resolution=30,
max_resolution=400,
num_channels=3,
):
self.parent = parent
self.do_resize = do_resize
self.size = size if size is not None else {"shortest_edge": 288}
self.size_divisor = size_divisor
self.do_rescale = do_rescale
self.rescale_factor = rescale_factor
self.do_normalize = do_normalize
self.do_center_crop = do_center_crop
self.image_mean = image_mean
self.image_std = image_std
self.do_pad = do_pad
self.batch_size = batch_size
self.num_channels = num_channels
self.min_resolution = min_resolution
self.max_resolution = max_resolution
def prepare_image_processor_dict(self):
return {
"image_mean": self.image_mean,
"image_std": self.image_std,
"do_normalize": self.do_normalize,
"do_resize": self.do_resize,
"size": self.size,
"size_divisor": self.size_divisor,
}
def get_expected_values(self, image_inputs, batched=False):
"""
This function computes the expected height and width when providing images to BridgeTowerImageProcessor,
assuming do_resize is set to True with a scalar size and size_divisor.
"""
if not batched:
size = self.size["shortest_edge"]
image = image_inputs[0]
if isinstance(image, Image.Image):
w, h = image.size
elif isinstance(image, np.ndarray):
h, w = image.shape[0], image.shape[1]
else:
h, w = image.shape[1], image.shape[2]
scale = size / min(w, h)
if h < w:
newh, neww = size, scale * w
else:
newh, neww = scale * h, size
max_size = int((1333 / 800) * size)
if max(newh, neww) > max_size:
scale = max_size / max(newh, neww)
newh = newh * scale
neww = neww * scale
newh, neww = int(newh + 0.5), int(neww + 0.5)
expected_height, expected_width = (
newh // self.size_divisor * self.size_divisor,
neww // self.size_divisor * self.size_divisor,
)
else:
expected_values = []
for image in image_inputs:
expected_height, expected_width = self.get_expected_values([image])
expected_values.append((expected_height, expected_width))
expected_height = max(expected_values, key=lambda item: item[0])[0]
expected_width = max(expected_values, key=lambda item: item[1])[1]
return expected_height, expected_width
def expected_output_image_shape(self, images):
height, width = self.get_expected_values(images, batched=True)
return self.num_channels, height, width
def prepare_image_inputs(self, equal_resolution=False, numpify=False, torchify=False):
return prepare_image_inputs(
batch_size=self.batch_size,
num_channels=self.num_channels,
min_resolution=self.min_resolution,
max_resolution=self.max_resolution,
equal_resolution=equal_resolution,
numpify=numpify,
torchify=torchify,
)
@require_torch
@require_vision
class BridgeTowerImageProcessingTest(ImageProcessingTestMixin, unittest.TestCase):
image_processing_class = BridgeTowerImageProcessor if is_vision_available() else None
def setUp(self):
super().setUp()
self.image_processor_tester = BridgeTowerImageProcessingTester(self)
@property
def image_processor_dict(self):
return self.image_processor_tester.prepare_image_processor_dict()
def test_image_processor_properties(self):
image_processing = self.image_processing_class(**self.image_processor_dict)
self.assertTrue(hasattr(image_processing, "image_mean"))
self.assertTrue(hasattr(image_processing, "image_std"))
self.assertTrue(hasattr(image_processing, "do_normalize"))
self.assertTrue(hasattr(image_processing, "do_resize"))
self.assertTrue(hasattr(image_processing, "size"))
self.assertTrue(hasattr(image_processing, "size_divisor"))
|