# coding=utf-8 from transformers.utils import TensorType, is_vision_available, logging from .image_processing_movqgan import MoVQImageProcessor logger = logging.get_logger(__name__) class DualViTokImageProcessor(MoVQImageProcessor): r""" Constructs a DualViTok image processor that dynamically resizes images based on the original images. This image processor is based on MoVQImageProcessor with spatial_factor of 16. Args: do_resize (`bool`, *optional*, defaults to `True`): Whether to resize the image's (height, width) dimensions. resample (`PILImageResampling`, *optional*, defaults to `Resampling.BICUBIC`): Resampling filter to use when resizing the image. do_rescale (`bool`, *optional*, defaults to `True`): Whether to rescale the image by the specified scale `rescale_factor`. rescale_factor (`int` or `float`, *optional*, defaults to `1/255`): Scale factor to use if rescaling the image. do_normalize (`bool`, *optional*, defaults to `True`): Whether to normalize the image. image_mean (`float` or `List[float]`, *optional*, defaults to `[0.5, 0.5, 0.5]`): Mean to use if normalizing the image. This is a float or list of floats for each channel in the image. image_std (`float` or `List[float]`, *optional*, defaults to `[0.5, 0.5, 0.5]`): Standard deviation to use if normalizing the image. This is a float or list of floats for each channel in the image. do_convert_rgb (`bool`, *optional*, defaults to `True`): Whether to convert the image to RGB. min_pixels (`int`, *optional*, defaults to `512 * 512`): The min pixels of the image to resize the image. max_pixels (`int`, *optional*, defaults to `1024 * 1024`): The max pixels of the image to resize the image. spatial_factor (`int`, *optional*, defautls to 8): The spatial downsample factor the image will be downsampled in feature extracting phase """ model_input_names = ["pixel_values"] def __init__( self, *args, spatial_factor: int = 16, **kwargs, ) -> None: super().__init__(*args, spatial_factor=spatial_factor, **kwargs)