File size: 2,491 Bytes
e72ee82 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 |
"""
Standalone FontClassifierImageProcessor for HuggingFace Hub deployment.
"""
import numpy as np
import torch
import torchvision.transforms as T
from PIL import Image
from transformers import AutoImageProcessor
def pad_to_square(image):
"""
Shared utility function to pad image to square while preserving aspect ratio.
Works with both PIL Images and numpy arrays.
"""
if isinstance(image, Image.Image):
w, h = image.size
max_size = max(w, h)
pad_w = (max_size - w) // 2
pad_h = (max_size - h) // 2
padding = (pad_w, pad_h, max_size - w - pad_w, max_size - h - pad_h)
return T.Pad(padding, fill=0)(image)
elif isinstance(image, np.ndarray):
# Convert numpy array to PIL, process, then back
if image.ndim == 3 and image.shape[2] == 3: # RGB
pil_img = Image.fromarray(image.astype(np.uint8))
padded_pil = pad_to_square(pil_img) # Recursive call with PIL image
return np.array(padded_pil)
return image
class FontClassifierImageProcessor(AutoImageProcessor):
"""
Custom image processor that includes pad_to_square transformation.
This ensures that Inference Endpoints will apply the same preprocessing as training.
"""
model_input_names = ["pixel_values"]
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
# Store the original preprocess method
self._original_preprocess = super().preprocess
def preprocess(self, images, **kwargs):
"""Override preprocess to include pad_to_square"""
# Handle single image or list of images
if isinstance(images, (Image.Image, np.ndarray)):
images = [images]
single_image = True
else:
single_image = False
# Apply pad_to_square to each image using shared utility
padded_images = [pad_to_square(img) for img in images]
# Call original preprocess with padded images
result = self._original_preprocess(padded_images, **kwargs)
# If single image was passed, ensure we return the format expected
if single_image and isinstance(result, dict) and 'pixel_values' in result:
# Keep batch dimension for consistency
pass
return result
# Register the custom processor class
AutoImageProcessor.register("FontClassifierImageProcessor", FontClassifierImageProcessor) |