# coding=utf-8

from transformers.utils import TensorType, is_vision_available, logging

from .image_processing_movqgan import MoVQImageProcessor

logger = logging.get_logger(__name__)


class DualViTokImageProcessor(MoVQImageProcessor):
    r"""
    Constructs a DualViTok image processor that dynamically resizes images based on the original images.
    This image processor is based on MoVQImageProcessor with spatial_factor of 16.

    Args:
        do_resize (`bool`, *optional*, defaults to `True`):
            Whether to resize the image's (height, width) dimensions.
        resample (`PILImageResampling`, *optional*, defaults to `Resampling.BICUBIC`):
            Resampling filter to use when resizing the image.
        do_rescale (`bool`, *optional*, defaults to `True`):
            Whether to rescale the image by the specified scale `rescale_factor`.
        rescale_factor (`int` or `float`, *optional*, defaults to `1/255`):
            Scale factor to use if rescaling the image.
        do_normalize (`bool`, *optional*, defaults to `True`):
            Whether to normalize the image.
        image_mean (`float` or `List[float]`, *optional*, defaults to `[0.5, 0.5, 0.5]`):
            Mean to use if normalizing the image. This is a float or list of floats for each channel in the image.
        image_std (`float` or `List[float]`, *optional*, defaults to `[0.5, 0.5, 0.5]`):
            Standard deviation to use if normalizing the image. This is a float or list of floats for each channel in the image.
        do_convert_rgb (`bool`, *optional*, defaults to `True`):
            Whether to convert the image to RGB.
        min_pixels (`int`, *optional*, defaults to `512 * 512`):
            The min pixels of the image to resize the image.
        max_pixels (`int`, *optional*, defaults to `1024 * 1024`):
            The max pixels of the image to resize the image.
        spatial_factor (`int`, *optional*, defautls to 8):
            The spatial downsample factor the image will be downsampled in feature extracting phase
    """

    model_input_names = ["pixel_values"]

    def __init__(
        self,
        *args,
        spatial_factor: int = 16,
        **kwargs,
    ) -> None:
        super().__init__(*args, spatial_factor=spatial_factor, **kwargs)