| """FineViT image processor wrapper.""" |
|
|
| from __future__ import annotations |
|
|
| from transformers import AutoImageProcessor |
| from transformers.image_processing_utils import BaseImageProcessor |
|
|
|
|
| class FineViTImageProcessor(BaseImageProcessor): |
| model_input_names = ["pixel_values"] |
|
|
| def __init__( |
| self, |
| backbone_model_name: str = "facebook/dinov2-with-registers-base", |
| image_size: int = 224, |
| **kwargs, |
| ): |
| super().__init__(**kwargs) |
| self.backbone_model_name = backbone_model_name |
| self.image_size = int(image_size) |
| self._backbone_processor = None |
|
|
| @property |
| def backbone_processor(self): |
| if self._backbone_processor is None: |
| processor = AutoImageProcessor.from_pretrained(self.backbone_model_name) |
| self._set_square_size(processor) |
| self._backbone_processor = processor |
| return self._backbone_processor |
|
|
| @property |
| def image_mean(self): |
| return getattr(self.backbone_processor, "image_mean", [0.485, 0.456, 0.406]) |
|
|
| @property |
| def image_std(self): |
| return getattr(self.backbone_processor, "image_std", [0.229, 0.224, 0.225]) |
|
|
| def _set_square_size(self, processor) -> None: |
| size = {"height": self.image_size, "width": self.image_size} |
| if hasattr(processor, "size"): |
| current = getattr(processor, "size") |
| if isinstance(current, dict) and "shortest_edge" in current: |
| processor.size = {"shortest_edge": self.image_size} |
| else: |
| processor.size = size |
| if hasattr(processor, "crop_size"): |
| processor.crop_size = size |
|
|
| def __call__(self, images, **kwargs): |
| return self.backbone_processor(images=images, **kwargs) |
|
|
| def to_dict(self): |
| output = super().to_dict() |
| output.pop("_backbone_processor", None) |
| return output |
|
|
|
|
| FineViTImageProcessor.register_for_auto_class("AutoImageProcessor") |
|
|