private_detector_hf / image_processing_private_detector.py

Upload folder using huggingface_hub

1ffeffa verified 16 days ago

2.38 kB

	from transformers.image_processing_utils import BaseImageProcessor, BatchFeature
	from transformers.utils import TensorType
	import numpy as np
	from PIL import Image
	import torch

	class PrivateDetectorImageProcessor(BaseImageProcessor):
	model_input_names = ["pixel_values"]

	def __init__(self, size=480, **kwargs):
	super().__init__(**kwargs)
	self.size = size

	def preprocess(self, images, return_tensors=None, **kwargs):
	if not isinstance(images, list):
	images = [images]

	processed_images = []
	for img in images:
	if not isinstance(img, Image.Image):
	# If it's a numpy array or torch tensor, convert to PIL Image
	if isinstance(img, torch.Tensor):
	img = img.numpy()
	if isinstance(img, np.ndarray):
	# Handle channels first vs channels last
	if img.shape[0] == 3:
	img = img.transpose(1, 2, 0)
	img = Image.fromarray(img.astype(np.uint8))
	else:
	raise ValueError("Unsupported image type")

	# 1. Resize preserving aspect ratio to fit inside size x size
	w, h = img.size
	scale = self.size / max(w, h)
	new_w = int(round(w * scale))
	new_h = int(round(h * scale))

	# bilinear interpolation matching TF bilinear resize
	img_resized = img.resize((new_w, new_h), Image.Resampling.BILINEAR)

	# 2. Pad with 128 (gray background) to size x size
	pad_w = self.size - new_w
	pad_h = self.size - new_h
	left = pad_w // 2
	top = pad_h // 2

	# Create a gray background image
	bg = Image.new("RGB", (self.size, self.size), (128, 128, 128))
	bg.paste(img_resized, (left, top))

	# Convert to numpy and normalize to [-1.0, 1.0]
	arr = np.array(bg, dtype=np.float32)
	arr = (arr - 128.0) / 128.0

	# Channels first: (3, H, W)
	arr = np.transpose(arr, (2, 0, 1))
	processed_images.append(arr)

	data = {"pixel_values": processed_images}
	return BatchFeature(data=data, tensor_type=return_tensors)