intial json for preprocessor

Browse files

Files changed (3) hide show

preprocessor_config.json +10 -0
processor_config.json +0 -4
processor_dfine.py +62 -0

preprocessor_config.json ADDED Viewed

	@@ -0,0 +1,10 @@

+{
+    "image_processor_type": "DFineProcessor",
+    "size": 640,
+    "do_resize": true,
+    "do_pad": true,
+    "return_tensor": "pt",
+    "return_ratio": true,
+    "return_padding": true,
+    "return_orig_size": true
+}

processor_config.json DELETED Viewed

@@ -1,4 +0,0 @@
-{
-    "processor_class": "DFineProcessor"
-}

processor_dfine.py ADDED Viewed

	@@ -0,0 +1,62 @@

+from transformers import ProcessorMixin
+from PIL import Image
+import torchvision.transforms as T
+import torch
+import numpy as np
+class DFineProcessor(ProcessorMixin):
+    def __init__(self, size=640):
+        self.size = size
+    def resize_with_aspect_ratio(self, image):
+        original_width, original_height = image.size
+        ratio = min(self.size / original_width, self.size / original_height)
+        new_width = int(original_width * ratio)
+        new_height = int(original_height * ratio)
+        image = image.resize((new_width, new_height), Image.BILINEAR)
+        new_image = Image.new("RGB", (self.size, self.size))
+        pad_w = (self.size - new_width) // 2
+        pad_h = (self.size - new_height) // 2
+        new_image.paste(image, (pad_w, pad_h))
+        return new_image, ratio, pad_w, pad_h
+    def __call__(self, images):
+        if not isinstance(images, (list, tuple)):
+            images = [images]
+        tensors, orig_sizes, ratios, pad_ws, pad_hs = [], [], [], [], []
+        for image in images:
+            if isinstance(image, np.ndarray):
+                image = Image.fromarray(image)
+            elif not isinstance(image, Image.Image):
+                raise ValueError("Input must be PIL.Image, numpy.ndarray, or list of them.")
+            resized, ratio, pad_w, pad_h = self.resize_with_aspect_ratio(image)
+            tensor = T.ToTensor()(resized)
+            tensors.append(tensor)
+            orig_sizes.append([resized.size[1], resized.size[0]])
+            ratios.append(ratio)
+            pad_ws.append(pad_w)
+            pad_hs.append(pad_h)
+        batch_tensor = torch.stack(tensors)
+        return {
+            "images": batch_tensor,
+            "orig_target_sizes": torch.tensor(orig_sizes),
+            "ratio": torch.tensor(ratios),
+            "pad_w": torch.tensor(pad_ws),
+            "pad_h": torch.tensor(pad_hs),
+        }
+    def save_pretrained(self, save_directory):
+        # Optional: save size or metadata here if needed
+        pass
+    @classmethod
+    def from_pretrained(cls, pretrained_model_name_or_path, **kwargs):
+        # Optionally load metadata like `size` from processor_config.json
+        return cls()