Diffusers
PyTorch
custom_code
dualvitok / image_processing_dualvitok.py
huangrh9's picture
Upload folder using huggingface_hub
5ca5652 verified
# coding=utf-8
from transformers.utils import TensorType, is_vision_available, logging
from .image_processing_movqgan import MoVQImageProcessor
logger = logging.get_logger(__name__)
class DualViTokImageProcessor(MoVQImageProcessor):
r"""
Constructs a DualViTok image processor that dynamically resizes images based on the original images.
This image processor is based on MoVQImageProcessor with spatial_factor of 16.
Args:
do_resize (`bool`, *optional*, defaults to `True`):
Whether to resize the image's (height, width) dimensions.
resample (`PILImageResampling`, *optional*, defaults to `Resampling.BICUBIC`):
Resampling filter to use when resizing the image.
do_rescale (`bool`, *optional*, defaults to `True`):
Whether to rescale the image by the specified scale `rescale_factor`.
rescale_factor (`int` or `float`, *optional*, defaults to `1/255`):
Scale factor to use if rescaling the image.
do_normalize (`bool`, *optional*, defaults to `True`):
Whether to normalize the image.
image_mean (`float` or `List[float]`, *optional*, defaults to `[0.5, 0.5, 0.5]`):
Mean to use if normalizing the image. This is a float or list of floats for each channel in the image.
image_std (`float` or `List[float]`, *optional*, defaults to `[0.5, 0.5, 0.5]`):
Standard deviation to use if normalizing the image. This is a float or list of floats for each channel in the image.
do_convert_rgb (`bool`, *optional*, defaults to `True`):
Whether to convert the image to RGB.
min_pixels (`int`, *optional*, defaults to `512 * 512`):
The min pixels of the image to resize the image.
max_pixels (`int`, *optional*, defaults to `1024 * 1024`):
The max pixels of the image to resize the image.
spatial_factor (`int`, *optional*, defautls to 8):
The spatial downsample factor the image will be downsampled in feature extracting phase
"""
model_input_names = ["pixel_values"]
def __init__(
self,
*args,
spatial_factor: int = 16,
**kwargs,
) -> None:
super().__init__(*args, spatial_factor=spatial_factor, **kwargs)