private_detector_hf / image_processing_private_detector.py
derenrich's picture
Upload folder using huggingface_hub
1ffeffa verified
Raw
History Blame Contribute Delete
2.38 kB
from transformers.image_processing_utils import BaseImageProcessor, BatchFeature
from transformers.utils import TensorType
import numpy as np
from PIL import Image
import torch
class PrivateDetectorImageProcessor(BaseImageProcessor):
model_input_names = ["pixel_values"]
def __init__(self, size=480, **kwargs):
super().__init__(**kwargs)
self.size = size
def preprocess(self, images, return_tensors=None, **kwargs):
if not isinstance(images, list):
images = [images]
processed_images = []
for img in images:
if not isinstance(img, Image.Image):
# If it's a numpy array or torch tensor, convert to PIL Image
if isinstance(img, torch.Tensor):
img = img.numpy()
if isinstance(img, np.ndarray):
# Handle channels first vs channels last
if img.shape[0] == 3:
img = img.transpose(1, 2, 0)
img = Image.fromarray(img.astype(np.uint8))
else:
raise ValueError("Unsupported image type")
# 1. Resize preserving aspect ratio to fit inside size x size
w, h = img.size
scale = self.size / max(w, h)
new_w = int(round(w * scale))
new_h = int(round(h * scale))
# bilinear interpolation matching TF bilinear resize
img_resized = img.resize((new_w, new_h), Image.Resampling.BILINEAR)
# 2. Pad with 128 (gray background) to size x size
pad_w = self.size - new_w
pad_h = self.size - new_h
left = pad_w // 2
top = pad_h // 2
# Create a gray background image
bg = Image.new("RGB", (self.size, self.size), (128, 128, 128))
bg.paste(img_resized, (left, top))
# Convert to numpy and normalize to [-1.0, 1.0]
arr = np.array(bg, dtype=np.float32)
arr = (arr - 128.0) / 128.0
# Channels first: (3, H, W)
arr = np.transpose(arr, (2, 0, 1))
processed_images.append(arr)
data = {"pixel_values": processed_images}
return BatchFeature(data=data, tensor_type=return_tensors)