File size: 1,972 Bytes
bc34bcf | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 | """FineViT image processor wrapper."""
from __future__ import annotations
from transformers import AutoImageProcessor
from transformers.image_processing_utils import BaseImageProcessor
class FineViTImageProcessor(BaseImageProcessor):
model_input_names = ["pixel_values"]
def __init__(
self,
backbone_model_name: str = "facebook/dinov2-with-registers-base",
image_size: int = 224,
**kwargs,
):
super().__init__(**kwargs)
self.backbone_model_name = backbone_model_name
self.image_size = int(image_size)
self._backbone_processor = None
@property
def backbone_processor(self):
if self._backbone_processor is None:
processor = AutoImageProcessor.from_pretrained(self.backbone_model_name)
self._set_square_size(processor)
self._backbone_processor = processor
return self._backbone_processor
@property
def image_mean(self):
return getattr(self.backbone_processor, "image_mean", [0.485, 0.456, 0.406])
@property
def image_std(self):
return getattr(self.backbone_processor, "image_std", [0.229, 0.224, 0.225])
def _set_square_size(self, processor) -> None:
size = {"height": self.image_size, "width": self.image_size}
if hasattr(processor, "size"):
current = getattr(processor, "size")
if isinstance(current, dict) and "shortest_edge" in current:
processor.size = {"shortest_edge": self.image_size}
else:
processor.size = size
if hasattr(processor, "crop_size"):
processor.crop_size = size
def __call__(self, images, **kwargs):
return self.backbone_processor(images=images, **kwargs)
def to_dict(self):
output = super().to_dict()
output.pop("_backbone_processor", None)
return output
FineViTImageProcessor.register_for_auto_class("AutoImageProcessor")
|