File size: 2,343 Bytes
5ca5652 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 | # coding=utf-8
from transformers.utils import TensorType, is_vision_available, logging
from .image_processing_movqgan import MoVQImageProcessor
logger = logging.get_logger(__name__)
class DualViTokImageProcessor(MoVQImageProcessor):
r"""
Constructs a DualViTok image processor that dynamically resizes images based on the original images.
This image processor is based on MoVQImageProcessor with spatial_factor of 16.
Args:
do_resize (`bool`, *optional*, defaults to `True`):
Whether to resize the image's (height, width) dimensions.
resample (`PILImageResampling`, *optional*, defaults to `Resampling.BICUBIC`):
Resampling filter to use when resizing the image.
do_rescale (`bool`, *optional*, defaults to `True`):
Whether to rescale the image by the specified scale `rescale_factor`.
rescale_factor (`int` or `float`, *optional*, defaults to `1/255`):
Scale factor to use if rescaling the image.
do_normalize (`bool`, *optional*, defaults to `True`):
Whether to normalize the image.
image_mean (`float` or `List[float]`, *optional*, defaults to `[0.5, 0.5, 0.5]`):
Mean to use if normalizing the image. This is a float or list of floats for each channel in the image.
image_std (`float` or `List[float]`, *optional*, defaults to `[0.5, 0.5, 0.5]`):
Standard deviation to use if normalizing the image. This is a float or list of floats for each channel in the image.
do_convert_rgb (`bool`, *optional*, defaults to `True`):
Whether to convert the image to RGB.
min_pixels (`int`, *optional*, defaults to `512 * 512`):
The min pixels of the image to resize the image.
max_pixels (`int`, *optional*, defaults to `1024 * 1024`):
The max pixels of the image to resize the image.
spatial_factor (`int`, *optional*, defautls to 8):
The spatial downsample factor the image will be downsampled in feature extracting phase
"""
model_input_names = ["pixel_values"]
def __init__(
self,
*args,
spatial_factor: int = 16,
**kwargs,
) -> None:
super().__init__(*args, spatial_factor=spatial_factor, **kwargs)
|