ILLUME-MLLM
/

dualvitok

Model card Files Files and versions

dualvitok / image_processing_dualvitok.py

huangrh9's picture

Upload folder using huggingface_hub

5ca5652 verified 9 months ago

history blame contribute delete

2.34 kB

	# coding=utf-8

	from transformers.utils import TensorType, is_vision_available, logging

	from .image_processing_movqgan import MoVQImageProcessor

	logger = logging.get_logger(__name__)


	class DualViTokImageProcessor(MoVQImageProcessor):
	r"""
	Constructs a DualViTok image processor that dynamically resizes images based on the original images.
	This image processor is based on MoVQImageProcessor with spatial_factor of 16.

	Args:
	do_resize (`bool`, optional, defaults to `True`):
	Whether to resize the image's (height, width) dimensions.
	resample (`PILImageResampling`, optional, defaults to `Resampling.BICUBIC`):
	Resampling filter to use when resizing the image.
	do_rescale (`bool`, optional, defaults to `True`):
	Whether to rescale the image by the specified scale `rescale_factor`.
	rescale_factor (`int` or `float`, optional, defaults to `1/255`):
	Scale factor to use if rescaling the image.
	do_normalize (`bool`, optional, defaults to `True`):
	Whether to normalize the image.
	image_mean (`float` or `List[float]`, optional, defaults to `[0.5, 0.5, 0.5]`):
	Mean to use if normalizing the image. This is a float or list of floats for each channel in the image.
	image_std (`float` or `List[float]`, optional, defaults to `[0.5, 0.5, 0.5]`):
	Standard deviation to use if normalizing the image. This is a float or list of floats for each channel in the image.
	do_convert_rgb (`bool`, optional, defaults to `True`):
	Whether to convert the image to RGB.
	min_pixels (`int`, optional, defaults to `512 * 512`):
	The min pixels of the image to resize the image.
	max_pixels (`int`, optional, defaults to `1024 * 1024`):
	The max pixels of the image to resize the image.
	spatial_factor (`int`, optional, defautls to 8):
	The spatial downsample factor the image will be downsampled in feature extracting phase
	"""

	model_input_names = ["pixel_values"]

	def __init__(
	self,
	*args,
	spatial_factor: int = 16,
	**kwargs,
	) -> None:
	super().__init__(args, spatial_factor=spatial_factor, *kwargs)