Add merged model + custom processor with pad_to_square
Browse files- font_classifier_processor.py +67 -0
- model.safetensors +1 -1
- preprocessor_config.json +5 -2
font_classifier_processor.py
ADDED
|
@@ -0,0 +1,67 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Standalone FontClassifierImageProcessor for HuggingFace Hub deployment.
|
| 3 |
+
"""
|
| 4 |
+
import numpy as np
|
| 5 |
+
import torch
|
| 6 |
+
import torchvision.transforms as T
|
| 7 |
+
from PIL import Image
|
| 8 |
+
from transformers import AutoImageProcessor
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
def pad_to_square(image):
|
| 12 |
+
"""
|
| 13 |
+
Shared utility function to pad image to square while preserving aspect ratio.
|
| 14 |
+
Works with both PIL Images and numpy arrays.
|
| 15 |
+
"""
|
| 16 |
+
if isinstance(image, Image.Image):
|
| 17 |
+
w, h = image.size
|
| 18 |
+
max_size = max(w, h)
|
| 19 |
+
pad_w = (max_size - w) // 2
|
| 20 |
+
pad_h = (max_size - h) // 2
|
| 21 |
+
padding = (pad_w, pad_h, max_size - w - pad_w, max_size - h - pad_h)
|
| 22 |
+
return T.Pad(padding, fill=0)(image)
|
| 23 |
+
elif isinstance(image, np.ndarray):
|
| 24 |
+
# Convert numpy array to PIL, process, then back
|
| 25 |
+
if image.ndim == 3 and image.shape[2] == 3: # RGB
|
| 26 |
+
pil_img = Image.fromarray(image.astype(np.uint8))
|
| 27 |
+
padded_pil = pad_to_square(pil_img) # Recursive call with PIL image
|
| 28 |
+
return np.array(padded_pil)
|
| 29 |
+
return image
|
| 30 |
+
|
| 31 |
+
class FontClassifierImageProcessor(AutoImageProcessor):
|
| 32 |
+
"""
|
| 33 |
+
Custom image processor that includes pad_to_square transformation.
|
| 34 |
+
This ensures that Inference Endpoints will apply the same preprocessing as training.
|
| 35 |
+
"""
|
| 36 |
+
|
| 37 |
+
model_input_names = ["pixel_values"]
|
| 38 |
+
|
| 39 |
+
def __init__(self, *args, **kwargs):
|
| 40 |
+
super().__init__(*args, **kwargs)
|
| 41 |
+
# Store the original preprocess method
|
| 42 |
+
self._original_preprocess = super().preprocess
|
| 43 |
+
|
| 44 |
+
def preprocess(self, images, **kwargs):
|
| 45 |
+
"""Override preprocess to include pad_to_square"""
|
| 46 |
+
# Handle single image or list of images
|
| 47 |
+
if isinstance(images, (Image.Image, np.ndarray)):
|
| 48 |
+
images = [images]
|
| 49 |
+
single_image = True
|
| 50 |
+
else:
|
| 51 |
+
single_image = False
|
| 52 |
+
|
| 53 |
+
# Apply pad_to_square to each image using shared utility
|
| 54 |
+
padded_images = [pad_to_square(img) for img in images]
|
| 55 |
+
|
| 56 |
+
# Call original preprocess with padded images
|
| 57 |
+
result = self._original_preprocess(padded_images, **kwargs)
|
| 58 |
+
|
| 59 |
+
# If single image was passed, ensure we return the format expected
|
| 60 |
+
if single_image and isinstance(result, dict) and 'pixel_values' in result:
|
| 61 |
+
# Keep batch dimension for consistency
|
| 62 |
+
pass
|
| 63 |
+
|
| 64 |
+
return result
|
| 65 |
+
|
| 66 |
+
# Register the custom processor class
|
| 67 |
+
AutoImageProcessor.register("FontClassifierImageProcessor", FontClassifierImageProcessor)
|
model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 348769976
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0cd7bb6aa8492746ab58c79cc0a667d0654be31e1d50270140d1027e3523e0cb
|
| 3 |
size 348769976
|
preprocessor_config.json
CHANGED
|
@@ -13,7 +13,7 @@
|
|
| 13 |
0.456,
|
| 14 |
0.406
|
| 15 |
],
|
| 16 |
-
"image_processor_type": "
|
| 17 |
"image_std": [
|
| 18 |
0.229,
|
| 19 |
0.224,
|
|
@@ -23,5 +23,8 @@
|
|
| 23 |
"rescale_factor": 0.00392156862745098,
|
| 24 |
"size": {
|
| 25 |
"shortest_edge": 256
|
|
|
|
|
|
|
|
|
|
| 26 |
}
|
| 27 |
-
}
|
|
|
|
| 13 |
0.456,
|
| 14 |
0.406
|
| 15 |
],
|
| 16 |
+
"image_processor_type": "FontClassifierImageProcessor",
|
| 17 |
"image_std": [
|
| 18 |
0.229,
|
| 19 |
0.224,
|
|
|
|
| 23 |
"rescale_factor": 0.00392156862745098,
|
| 24 |
"size": {
|
| 25 |
"shortest_edge": 256
|
| 26 |
+
},
|
| 27 |
+
"auto_map": {
|
| 28 |
+
"AutoImageProcessor": "font_classifier_processor.FontClassifierImageProcessor"
|
| 29 |
}
|
| 30 |
+
}
|