"""FineViT image processor wrapper.""" from __future__ import annotations from transformers import AutoImageProcessor from transformers.image_processing_utils import BaseImageProcessor class FineViTImageProcessor(BaseImageProcessor): model_input_names = ["pixel_values"] def __init__( self, backbone_model_name: str = "facebook/dinov2-with-registers-base", image_size: int = 224, **kwargs, ): super().__init__(**kwargs) self.backbone_model_name = backbone_model_name self.image_size = int(image_size) self._backbone_processor = None @property def backbone_processor(self): if self._backbone_processor is None: processor = AutoImageProcessor.from_pretrained(self.backbone_model_name) self._set_square_size(processor) self._backbone_processor = processor return self._backbone_processor @property def image_mean(self): return getattr(self.backbone_processor, "image_mean", [0.485, 0.456, 0.406]) @property def image_std(self): return getattr(self.backbone_processor, "image_std", [0.229, 0.224, 0.225]) def _set_square_size(self, processor) -> None: size = {"height": self.image_size, "width": self.image_size} if hasattr(processor, "size"): current = getattr(processor, "size") if isinstance(current, dict) and "shortest_edge" in current: processor.size = {"shortest_edge": self.image_size} else: processor.size = size if hasattr(processor, "crop_size"): processor.crop_size = size def __call__(self, images, **kwargs): return self.backbone_processor(images=images, **kwargs) def to_dict(self): output = super().to_dict() output.pop("_backbone_processor", None) return output FineViTImageProcessor.register_for_auto_class("AutoImageProcessor")