Spaces:
Running
Running
File size: 7,828 Bytes
756b108 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 | """Image transforms for preprocessing."""
from typing import Literal, Optional, Tuple, Union
import cv2
import numpy as np
from PIL import Image, ImageOps
def _default(val, default_val):
"""Return val if not None, else default_val (or call it if callable)."""
if val is not None:
return val
return default_val() if callable(default_val) else default_val
class AspectPreserveResize:
"""
Resize images while preserving aspect ratio.
Args:
target_size: Target (width, height)
mode: Resize mode
- "fit": Scale to fit within target (may be smaller)
- "exceed": Scale to exceed target (may be larger)
- "short": Scale based on shorter dimension
- "long": Scale based on longer dimension
backend: "pil" or "opencv"
"""
def __init__(
self,
target_size: Tuple[int, int],
mode: Literal["short", "long", "fit", "exceed"] = "fit",
backend: Literal["pil", "opencv"] = "pil",
):
self.target_size = target_size
self.mode = mode
self.backend = backend
def _get_or_infer_scale_factor(
self, width: int, height: int, allow_upsampling: bool = True
) -> float:
target_width, target_height = self.target_size
scale_factor_width = target_width / width
scale_factor_height = target_height / height
if self.mode == "long":
scale_factor = min(scale_factor_width, scale_factor_height)
elif self.mode == "short":
scale_factor = max(scale_factor_width, scale_factor_height)
elif self.mode == "fit":
scale_factor = min(scale_factor_width, scale_factor_height)
elif self.mode == "exceed":
scale_factor = max(scale_factor_width, scale_factor_height)
else:
raise ValueError("Invalid mode. It should be 'short', 'long', 'fit', or 'exceed'.")
if not allow_upsampling and scale_factor > 1.0:
return 1.0
return scale_factor
def _resize_image_pil(
self, img: Image.Image, scale_factor: float, interpolation: Optional[int] = None
) -> Image.Image:
if scale_factor == 1.0:
return img
width, height = img.size
new_width = int(scale_factor * width)
new_height = int(scale_factor * height)
interpolation = _default(interpolation, Image.LANCZOS)
return img.resize((new_width, new_height), interpolation)
def _resize_image_opencv(
self, img: np.ndarray, scale_factor: float, interpolation: Optional[int] = None
) -> np.ndarray:
if scale_factor == 1.0:
return img
height, width = img.shape[:2]
new_width = int(scale_factor * width)
new_height = int(scale_factor * height)
interpolation = _default(
interpolation, cv2.INTER_LANCZOS4 if scale_factor > 1 else cv2.INTER_AREA
)
return cv2.resize(img, (new_width, new_height), interpolation=interpolation)
def __call__(
self,
img: Union[Image.Image, np.ndarray],
allow_upsampling: bool = True,
interpolation: Optional[int] = None,
) -> Union[Image.Image, np.ndarray]:
if self.backend == "pil":
width, height = img.size
elif self.backend == "opencv":
height, width = img.shape[:2]
scale_factor = self._get_or_infer_scale_factor(width, height, allow_upsampling)
if self.backend == "pil":
return self._resize_image_pil(img, scale_factor, interpolation=interpolation)
elif self.backend == "opencv":
return self._resize_image_opencv(img, scale_factor, interpolation=interpolation)
class PadToShape:
"""
Pad images to a target shape with symmetric padding.
Args:
target_size: Target (width, height)
fill_value: Padding color (int for grayscale, tuple for RGB)
backend: "pil" or "opencv"
"""
def __init__(
self,
target_size: Tuple[int, int],
fill_value: Union[int, tuple] = 0,
backend: Literal["pil", "opencv"] = "opencv",
) -> None:
self.target_width, self.target_height = target_size
self.backend = backend
if isinstance(fill_value, int):
self.fill_value = (fill_value,) * 3
else:
self.fill_value = fill_value
self.padding_mem: Optional[Tuple[int, int, int, int]] = None
@staticmethod
def _calculate_needed_padding(
width: int, height: int, target_width: int, target_height: int
) -> Tuple[int, int, int, int]:
total_width_padding = max(target_width - width, 0)
total_height_padding = max(target_height - height, 0)
pad_left = total_width_padding // 2
pad_top = total_height_padding // 2
pad_right = total_width_padding - pad_left
pad_bottom = total_height_padding - pad_top
return pad_left, pad_top, pad_right, pad_bottom
def _pad_image_pil(self, img: Image.Image, padding: Tuple[int, int, int, int]) -> Image.Image:
return ImageOps.expand(img, border=padding, fill=self.fill_value)
def _pad_image_opencv(self, img: np.ndarray, padding: Tuple[int, int, int, int]) -> np.ndarray:
pad_left, pad_top, pad_right, pad_bottom = padding
return cv2.copyMakeBorder(
img, pad_top, pad_bottom, pad_left, pad_right, cv2.BORDER_CONSTANT, value=self.fill_value
)
def unpad(self, img: Union[Image.Image, np.ndarray]) -> Union[Image.Image, np.ndarray]:
"""Remove padding using stored padding dimensions."""
if self.padding_mem is None:
raise ValueError("Padding memory is not set.")
pad_left, pad_top, pad_right, pad_bottom = self.padding_mem
if isinstance(img, Image.Image):
return img.crop((pad_left, pad_top, img.width - pad_right, img.height - pad_bottom))
return img[pad_top : img.shape[0] - pad_bottom, pad_left : img.shape[1] - pad_right]
def __call__(
self,
img: Union[Image.Image, np.ndarray],
mem_padding: bool = False,
) -> Union[Image.Image, np.ndarray]:
if self.backend == "pil":
width, height = img.size
else:
height, width = img.shape[:2]
padding = self._calculate_needed_padding(width, height, self.target_width, self.target_height)
if mem_padding:
self.padding_mem = padding
if self.backend == "pil":
return self._pad_image_pil(img, padding)
return self._pad_image_opencv(img, padding)
class ResizePad:
"""
Aspect-preserving resize followed by symmetric padding.
Combines AspectPreserveResize and PadToShape to resize images to fit
within target dimensions while preserving aspect ratio, then pads
to reach exact target size.
Args:
target_image_size: Target (width, height)
backend: "pil" or "opencv"
"""
def __init__(
self,
target_image_size: Tuple[int, int],
backend: Literal["pil", "opencv"] = "opencv",
) -> None:
self.resize_fn = AspectPreserveResize(target_size=target_image_size, mode="fit", backend=backend)
self.pad_fn = PadToShape(target_image_size, backend=backend)
def unpad(self, img: Union[Image.Image, np.ndarray]) -> Union[Image.Image, np.ndarray]:
"""Remove padding to restore original dimensions."""
return self.pad_fn.unpad(img)
def __call__(
self,
img: Union[Image.Image, np.ndarray],
mem_padding: bool = False,
interpolation: Optional[int] = None,
) -> Union[Image.Image, np.ndarray]:
img = self.resize_fn(img, interpolation=interpolation)
return self.pad_fn(img, mem_padding=mem_padding)
|