dchen0 commited on
Commit
e72ee82
·
verified ·
1 Parent(s): 111a675

Add merged model + custom processor with pad_to_square

Browse files
font_classifier_processor.py ADDED
@@ -0,0 +1,67 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Standalone FontClassifierImageProcessor for HuggingFace Hub deployment.
3
+ """
4
+ import numpy as np
5
+ import torch
6
+ import torchvision.transforms as T
7
+ from PIL import Image
8
+ from transformers import AutoImageProcessor
9
+
10
+
11
+ def pad_to_square(image):
12
+ """
13
+ Shared utility function to pad image to square while preserving aspect ratio.
14
+ Works with both PIL Images and numpy arrays.
15
+ """
16
+ if isinstance(image, Image.Image):
17
+ w, h = image.size
18
+ max_size = max(w, h)
19
+ pad_w = (max_size - w) // 2
20
+ pad_h = (max_size - h) // 2
21
+ padding = (pad_w, pad_h, max_size - w - pad_w, max_size - h - pad_h)
22
+ return T.Pad(padding, fill=0)(image)
23
+ elif isinstance(image, np.ndarray):
24
+ # Convert numpy array to PIL, process, then back
25
+ if image.ndim == 3 and image.shape[2] == 3: # RGB
26
+ pil_img = Image.fromarray(image.astype(np.uint8))
27
+ padded_pil = pad_to_square(pil_img) # Recursive call with PIL image
28
+ return np.array(padded_pil)
29
+ return image
30
+
31
+ class FontClassifierImageProcessor(AutoImageProcessor):
32
+ """
33
+ Custom image processor that includes pad_to_square transformation.
34
+ This ensures that Inference Endpoints will apply the same preprocessing as training.
35
+ """
36
+
37
+ model_input_names = ["pixel_values"]
38
+
39
+ def __init__(self, *args, **kwargs):
40
+ super().__init__(*args, **kwargs)
41
+ # Store the original preprocess method
42
+ self._original_preprocess = super().preprocess
43
+
44
+ def preprocess(self, images, **kwargs):
45
+ """Override preprocess to include pad_to_square"""
46
+ # Handle single image or list of images
47
+ if isinstance(images, (Image.Image, np.ndarray)):
48
+ images = [images]
49
+ single_image = True
50
+ else:
51
+ single_image = False
52
+
53
+ # Apply pad_to_square to each image using shared utility
54
+ padded_images = [pad_to_square(img) for img in images]
55
+
56
+ # Call original preprocess with padded images
57
+ result = self._original_preprocess(padded_images, **kwargs)
58
+
59
+ # If single image was passed, ensure we return the format expected
60
+ if single_image and isinstance(result, dict) and 'pixel_values' in result:
61
+ # Keep batch dimension for consistency
62
+ pass
63
+
64
+ return result
65
+
66
+ # Register the custom processor class
67
+ AutoImageProcessor.register("FontClassifierImageProcessor", FontClassifierImageProcessor)
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c316e9ff382101ad0e9f5ea5b36573d34ac699c2cd98e54f16bb2acdef344b07
3
  size 348769976
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0cd7bb6aa8492746ab58c79cc0a667d0654be31e1d50270140d1027e3523e0cb
3
  size 348769976
preprocessor_config.json CHANGED
@@ -13,7 +13,7 @@
13
  0.456,
14
  0.406
15
  ],
16
- "image_processor_type": "BitImageProcessor",
17
  "image_std": [
18
  0.229,
19
  0.224,
@@ -23,5 +23,8 @@
23
  "rescale_factor": 0.00392156862745098,
24
  "size": {
25
  "shortest_edge": 256
 
 
 
26
  }
27
- }
 
13
  0.456,
14
  0.406
15
  ],
16
+ "image_processor_type": "FontClassifierImageProcessor",
17
  "image_std": [
18
  0.229,
19
  0.224,
 
23
  "rescale_factor": 0.00392156862745098,
24
  "size": {
25
  "shortest_edge": 256
26
+ },
27
+ "auto_map": {
28
+ "AutoImageProcessor": "font_classifier_processor.FontClassifierImageProcessor"
29
  }
30
+ }