finevit-base / image_processor_finevit.py
toilaluan's picture
Upload folder using huggingface_hub
bc34bcf verified
"""FineViT image processor wrapper."""
from __future__ import annotations
from transformers import AutoImageProcessor
from transformers.image_processing_utils import BaseImageProcessor
class FineViTImageProcessor(BaseImageProcessor):
model_input_names = ["pixel_values"]
def __init__(
self,
backbone_model_name: str = "facebook/dinov2-with-registers-base",
image_size: int = 224,
**kwargs,
):
super().__init__(**kwargs)
self.backbone_model_name = backbone_model_name
self.image_size = int(image_size)
self._backbone_processor = None
@property
def backbone_processor(self):
if self._backbone_processor is None:
processor = AutoImageProcessor.from_pretrained(self.backbone_model_name)
self._set_square_size(processor)
self._backbone_processor = processor
return self._backbone_processor
@property
def image_mean(self):
return getattr(self.backbone_processor, "image_mean", [0.485, 0.456, 0.406])
@property
def image_std(self):
return getattr(self.backbone_processor, "image_std", [0.229, 0.224, 0.225])
def _set_square_size(self, processor) -> None:
size = {"height": self.image_size, "width": self.image_size}
if hasattr(processor, "size"):
current = getattr(processor, "size")
if isinstance(current, dict) and "shortest_edge" in current:
processor.size = {"shortest_edge": self.image_size}
else:
processor.size = size
if hasattr(processor, "crop_size"):
processor.crop_size = size
def __call__(self, images, **kwargs):
return self.backbone_processor(images=images, **kwargs)
def to_dict(self):
output = super().to_dict()
output.pop("_backbone_processor", None)
return output
FineViTImageProcessor.register_for_auto_class("AutoImageProcessor")