Add merged model + custom processor with pad_to_square

Browse files

Files changed (3) hide show

font_classifier_processor.py +67 -0
model.safetensors +1 -1
preprocessor_config.json +5 -2

font_classifier_processor.py ADDED Viewed

	@@ -0,0 +1,67 @@

+"""
+Standalone FontClassifierImageProcessor for HuggingFace Hub deployment.
+"""
+import numpy as np
+import torch
+import torchvision.transforms as T
+from PIL import Image
+from transformers import AutoImageProcessor
+def pad_to_square(image):
+    """
+    Shared utility function to pad image to square while preserving aspect ratio.
+    Works with both PIL Images and numpy arrays.
+    """
+    if isinstance(image, Image.Image):
+        w, h = image.size
+        max_size = max(w, h)
+        pad_w = (max_size - w) // 2
+        pad_h = (max_size - h) // 2
+        padding = (pad_w, pad_h, max_size - w - pad_w, max_size - h - pad_h)
+        return T.Pad(padding, fill=0)(image)
+    elif isinstance(image, np.ndarray):
+        # Convert numpy array to PIL, process, then back
+        if image.ndim == 3 and image.shape[2] == 3:  # RGB
+            pil_img = Image.fromarray(image.astype(np.uint8))
+            padded_pil = pad_to_square(pil_img)  # Recursive call with PIL image
+            return np.array(padded_pil)
+    return image
+class FontClassifierImageProcessor(AutoImageProcessor):
+    """
+    Custom image processor that includes pad_to_square transformation.
+    This ensures that Inference Endpoints will apply the same preprocessing as training.
+    """
+    model_input_names = ["pixel_values"]
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        # Store the original preprocess method
+        self._original_preprocess = super().preprocess
+    def preprocess(self, images, **kwargs):
+        """Override preprocess to include pad_to_square"""
+        # Handle single image or list of images
+        if isinstance(images, (Image.Image, np.ndarray)):
+            images = [images]
+            single_image = True
+        else:
+            single_image = False
+        # Apply pad_to_square to each image using shared utility
+        padded_images = [pad_to_square(img) for img in images]
+        # Call original preprocess with padded images
+        result = self._original_preprocess(padded_images, **kwargs)
+        # If single image was passed, ensure we return the format expected
+        if single_image and isinstance(result, dict) and 'pixel_values' in result:
+            # Keep batch dimension for consistency
+            pass
+        return result
+# Register the custom processor class
+AutoImageProcessor.register("FontClassifierImageProcessor", FontClassifierImageProcessor)

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c316e9ff382101ad0e9f5ea5b36573d34ac699c2cd98e54f16bb2acdef344b07
 size 348769976

 version https://git-lfs.github.com/spec/v1
+oid sha256:0cd7bb6aa8492746ab58c79cc0a667d0654be31e1d50270140d1027e3523e0cb
 size 348769976

preprocessor_config.json CHANGED Viewed

@@ -13,7 +13,7 @@
     0.456,
     0.406
   ],
-  "image_processor_type": "BitImageProcessor",
   "image_std": [
     0.229,
     0.224,
@@ -23,5 +23,8 @@
   "rescale_factor": 0.00392156862745098,
   "size": {
     "shortest_edge": 256
   }
-}

     0.456,
     0.406
   ],
+  "image_processor_type": "FontClassifierImageProcessor",
   "image_std": [
     0.229,
     0.224,
   "rescale_factor": 0.00392156862745098,
   "size": {
     "shortest_edge": 256
+  },
+  "auto_map": {
+    "AutoImageProcessor": "font_classifier_processor.FontClassifierImageProcessor"
   }
+}