font_classifier_v4 / font_classifier_processor.py
dchen0's picture
Add merged model + custom processor with pad_to_square
e72ee82 verified
"""
Standalone FontClassifierImageProcessor for HuggingFace Hub deployment.
"""
import numpy as np
import torch
import torchvision.transforms as T
from PIL import Image
from transformers import AutoImageProcessor
def pad_to_square(image):
"""
Shared utility function to pad image to square while preserving aspect ratio.
Works with both PIL Images and numpy arrays.
"""
if isinstance(image, Image.Image):
w, h = image.size
max_size = max(w, h)
pad_w = (max_size - w) // 2
pad_h = (max_size - h) // 2
padding = (pad_w, pad_h, max_size - w - pad_w, max_size - h - pad_h)
return T.Pad(padding, fill=0)(image)
elif isinstance(image, np.ndarray):
# Convert numpy array to PIL, process, then back
if image.ndim == 3 and image.shape[2] == 3: # RGB
pil_img = Image.fromarray(image.astype(np.uint8))
padded_pil = pad_to_square(pil_img) # Recursive call with PIL image
return np.array(padded_pil)
return image
class FontClassifierImageProcessor(AutoImageProcessor):
"""
Custom image processor that includes pad_to_square transformation.
This ensures that Inference Endpoints will apply the same preprocessing as training.
"""
model_input_names = ["pixel_values"]
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
# Store the original preprocess method
self._original_preprocess = super().preprocess
def preprocess(self, images, **kwargs):
"""Override preprocess to include pad_to_square"""
# Handle single image or list of images
if isinstance(images, (Image.Image, np.ndarray)):
images = [images]
single_image = True
else:
single_image = False
# Apply pad_to_square to each image using shared utility
padded_images = [pad_to_square(img) for img in images]
# Call original preprocess with padded images
result = self._original_preprocess(padded_images, **kwargs)
# If single image was passed, ensure we return the format expected
if single_image and isinstance(result, dict) and 'pixel_values' in result:
# Keep batch dimension for consistency
pass
return result
# Register the custom processor class
AutoImageProcessor.register("FontClassifierImageProcessor", FontClassifierImageProcessor)