Z-Image-Turbo / VideoX-Fun /videox_fun /models /flux2_image_processor.py

yongqiang

initialize this repo

ba96580 20 days ago

5.14 kB

	# Modified from https://github.com/huggingface/diffusers/blob/main/src/diffusers/pipelines/flux2/image_processor.py
	# Copyright 2025 The Black Forest Labs Team and The HuggingFace Team. All rights reserved.
	#
	# Licensed under the Apache License, Version 2.0 (the "License");
	# you may not use this file except in compliance with the License.
	# You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	# See the License for the specific language governing permissions and
	# limitations under the License.

	import math
	from typing import Tuple

	import PIL.Image

	from diffusers.configuration_utils import register_to_config
	from diffusers.image_processor import VaeImageProcessor


	class Flux2ImageProcessor(VaeImageProcessor):
	r"""
	Image processor to preprocess the reference (character) image for the Flux2 model.

	Args:
	do_resize (`bool`, optional, defaults to `True`):
	Whether to downscale the image's (height, width) dimensions to multiples of `vae_scale_factor`. Can accept
	`height` and `width` arguments from [`image_processor.VaeImageProcessor.preprocess`] method.
	vae_scale_factor (`int`, optional, defaults to `16`):
	VAE (spatial) scale factor. If `do_resize` is `True`, the image is automatically resized to multiples of
	this factor.
	vae_latent_channels (`int`, optional, defaults to `32`):
	VAE latent channels.
	do_normalize (`bool`, optional, defaults to `True`):
	Whether to normalize the image to [-1,1].
	do_convert_rgb (`bool`, optional, defaults to be `True`):
	Whether to convert the images to RGB format.
	"""

	@register_to_config
	def __init__(
	self,
	do_resize: bool = True,
	vae_scale_factor: int = 16,
	vae_latent_channels: int = 32,
	do_normalize: bool = True,
	do_convert_rgb: bool = True,
	):
	super().__init__(
	do_resize=do_resize,
	vae_scale_factor=vae_scale_factor,
	vae_latent_channels=vae_latent_channels,
	do_normalize=do_normalize,
	do_convert_rgb=do_convert_rgb,
	)

	@staticmethod
	def check_image_input(
	image: PIL.Image.Image, max_aspect_ratio: int = 8, min_side_length: int = 64, max_area: int = 1024 * 1024
	) -> PIL.Image.Image:
	"""
	Check if image meets minimum size and aspect ratio requirements.

	Args:
	image: PIL Image to validate
	max_aspect_ratio: Maximum allowed aspect ratio (width/height or height/width)
	min_side_length: Minimum pixels required for width and height
	max_area: Maximum allowed area in pixels²

	Returns:
	The input image if valid

	Raises:
	ValueError: If image is too small or aspect ratio is too extreme
	"""
	if not isinstance(image, PIL.Image.Image):
	raise ValueError(f"Image must be a PIL.Image.Image, got {type(image)}")

	width, height = image.size

	# Check minimum dimensions
	if width < min_side_length or height < min_side_length:
	raise ValueError(
	f"Image too small: {width}×{height}. Both dimensions must be at least {min_side_length}px"
	)

	# Check aspect ratio
	aspect_ratio = max(width / height, height / width)
	if aspect_ratio > max_aspect_ratio:
	raise ValueError(
	f"Aspect ratio too extreme: {width}×{height} (ratio: {aspect_ratio:.1f}:1). "
	f"Maximum allowed ratio is {max_aspect_ratio}:1"
	)

	return image

	@staticmethod
	def _resize_to_target_area(image: PIL.Image.Image, target_area: int = 1024 * 1024) -> Tuple[int, int]:
	image_width, image_height = image.size

	scale = math.sqrt(target_area / (image_width * image_height))
	width = int(image_width * scale)
	height = int(image_height * scale)

	return image.resize((width, height), PIL.Image.Resampling.LANCZOS)

	def _resize_and_crop(
	self,
	image: PIL.Image.Image,
	width: int,
	height: int,
	) -> PIL.Image.Image:
	r"""
	center crop the image to the specified width and height.

	Args:
	image (`PIL.Image.Image`):
	The image to resize and crop.
	width (`int`):
	The width to resize the image to.
	height (`int`):
	The height to resize the image to.

	Returns:
	`PIL.Image.Image`:
	The resized and cropped image.
	"""
	image_width, image_height = image.size

	left = (image_width - width) // 2
	top = (image_height - height) // 2
	right = left + width
	bottom = top + height

	return image.crop((left, top, right, bottom))