File size: 1,710 Bytes
291f7a0 ac66c5d 0028462 291f7a0 0028462 291f7a0 0028462 291f7a0 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 | from transformers import BitImageProcessor as BaseProcessor
import numpy as np
from transformers import AutoImageProcessor
from transformers.image_utils import PILImageResampling
class CustomDinov2Processor(BaseProcessor):
model_type = "dinov2"
def preprocess(self, images, **kwargs):
# 1. Handle "Already Normalized" (0-1) check
test_img = images[0] if isinstance(images, list) else images
# Determine max value to see if we should rescale
if hasattr(test_img, "getextrema"): # PIL
extrema = test_img.getextrema()
max_val = max([b[1] for b in extrema]) if isinstance(extrema[0], tuple) else extrema[1]
elif hasattr(test_img, "max"): # Numpy/Tensor
max_val = test_img.max()
else:
max_val = 255
# If already 0-1, disable rescaling (1/255)
if max_val <= 1.0:
kwargs["do_rescale"] = False
else:
kwargs["do_rescale"] = True
# 2. Force RGB Conversion (handles grayscale)
kwargs["do_convert_rgb"] = True
return super().preprocess(images, **kwargs)
def resize(self, image: np.ndarray, size=None, resample=PILImageResampling.BILINEAR, **kwargs) -> np.ndarray:
# 3. Your dynamic "nearest 14" logic
h, w = image.shape[:2]
new_h = (h // 14) * 14
new_w = (w // 14) * 14
return super().resize(
image,
size={"height": new_h, "width": new_w},
resample=resample,
**kwargs
)
# Register the class so it saves to the Hub correctly
CustomDinov2Processor.register_for_auto_class("AutoImageProcessor")
|