| from transformers.image_processing_utils import BaseImageProcessor, BatchFeature |
| from transformers.utils import TensorType |
| import numpy as np |
| from PIL import Image |
| import torch |
|
|
| class PrivateDetectorImageProcessor(BaseImageProcessor): |
| model_input_names = ["pixel_values"] |
| |
| def __init__(self, size=480, **kwargs): |
| super().__init__(**kwargs) |
| self.size = size |
| |
| def preprocess(self, images, return_tensors=None, **kwargs): |
| if not isinstance(images, list): |
| images = [images] |
| |
| processed_images = [] |
| for img in images: |
| if not isinstance(img, Image.Image): |
| |
| if isinstance(img, torch.Tensor): |
| img = img.numpy() |
| if isinstance(img, np.ndarray): |
| |
| if img.shape[0] == 3: |
| img = img.transpose(1, 2, 0) |
| img = Image.fromarray(img.astype(np.uint8)) |
| else: |
| raise ValueError("Unsupported image type") |
|
|
| |
| w, h = img.size |
| scale = self.size / max(w, h) |
| new_w = int(round(w * scale)) |
| new_h = int(round(h * scale)) |
| |
| |
| img_resized = img.resize((new_w, new_h), Image.Resampling.BILINEAR) |
| |
| |
| pad_w = self.size - new_w |
| pad_h = self.size - new_h |
| left = pad_w // 2 |
| top = pad_h // 2 |
| |
| |
| bg = Image.new("RGB", (self.size, self.size), (128, 128, 128)) |
| bg.paste(img_resized, (left, top)) |
| |
| |
| arr = np.array(bg, dtype=np.float32) |
| arr = (arr - 128.0) / 128.0 |
| |
| |
| arr = np.transpose(arr, (2, 0, 1)) |
| processed_images.append(arr) |
| |
| data = {"pixel_values": processed_images} |
| return BatchFeature(data=data, tensor_type=return_tensors) |
|
|