Spaces:
Running on Zero
Running on Zero
| # Copyright 2025 The JoyImage Team and The HuggingFace Team. All rights reserved. | |
| # | |
| # Licensed under the Apache License, Version 2.0 (the "License"); | |
| # you may not use this file except in compliance with the License. | |
| # You may obtain a copy of the License at | |
| # | |
| # http://www.apache.org/licenses/LICENSE-2.0 | |
| # | |
| # Unless required by applicable law or agreed to in writing, software | |
| # distributed under the License is distributed on an "AS IS" BASIS, | |
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
| # See the License for the specific language governing permissions and | |
| # limitations under the License. | |
| import math | |
| from typing import Tuple | |
| from PIL import Image | |
| from ...configuration_utils import register_to_config | |
| from ...image_processor import VaeImageProcessor | |
| # fmt: off | |
| BUCKETS = { | |
| 1024: [ | |
| (512, 1792), (512, 1856), (512, 1920), (512, 1984), (512, 2048), | |
| (576, 1600), (576, 1664), (576, 1728), (576, 1792), | |
| (640, 1472), (640, 1536), (640, 1600), | |
| (704, 1344), (704, 1408), (704, 1472), | |
| (768, 1216), (768, 1280), (768, 1344), | |
| (832, 1152), (832, 1216), | |
| (896, 1088), (896, 1152), | |
| (960, 1024), (960, 1088), | |
| (1024, 960), (1024, 1024), | |
| (1088, 896), (1088, 960), | |
| (1152, 832), (1152, 896), | |
| (1216, 768), (1216, 832), | |
| (1280, 768), | |
| (1344, 704), (1344, 768), | |
| (1408, 704), | |
| (1472, 640), (1472, 704), | |
| (1536, 640), | |
| (1600, 576), (1600, 640), | |
| (1664, 576), | |
| (1728, 576), | |
| (1792, 512), (1792, 576), | |
| (1856, 512), | |
| (1920, 512), | |
| (1984, 512), | |
| (2048, 512), | |
| ], | |
| } | |
| # fmt: on | |
| def find_best_bucket(height: int, width: int, basesize: int) -> Tuple[int, int]: | |
| """Return the (h, w) bucket whose aspect ratio is closest to height/width.""" | |
| target_ratio = height / width | |
| return min( | |
| BUCKETS[basesize], | |
| key=lambda hw: abs(hw[0] / hw[1] - target_ratio), | |
| ) | |
| class JoyImageEditImageProcessor(VaeImageProcessor): | |
| """ | |
| Image processor for the JoyImage Edit pipeline. | |
| Handles bucket-based resolution selection and resize-center-crop preprocessing. | |
| Args: | |
| do_resize (`bool`, *optional*, defaults to `True`): | |
| Whether to resize the image. | |
| vae_scale_factor (`int`, *optional*, defaults to `8`): | |
| VAE spatial scale factor. | |
| basesize (`int`, *optional*, defaults to `1024`): | |
| Base resolution for bucket generation. | |
| resample (`str`, *optional*, defaults to `bilinear`): | |
| Resampling filter for resizing. | |
| do_normalize (`bool`, *optional*, defaults to `True`): | |
| Whether to normalize the image to [-1,1]. | |
| do_binarize (`bool`, *optional*, defaults to `False`): | |
| Whether to binarize the image to 0/1. | |
| do_convert_rgb (`bool`, *optional*, defaults to `False`): | |
| Whether to convert the images to RGB format. | |
| do_convert_grayscale (`bool`, *optional*, defaults to `False`): | |
| Whether to convert the images to grayscale format. | |
| """ | |
| def __init__( | |
| self, | |
| do_resize: bool = True, | |
| vae_scale_factor: int = 8, | |
| basesize: int = 1024, | |
| resample: str = "bilinear", | |
| do_normalize: bool = True, | |
| do_binarize: bool = False, | |
| do_convert_rgb: bool = False, | |
| do_convert_grayscale: bool = False, | |
| ): | |
| super().__init__() | |
| def get_default_height_width( | |
| self, | |
| image: Image.Image, | |
| height: int | None = None, | |
| width: int | None = None, | |
| ) -> Tuple[int, int]: | |
| if height is not None and width is not None: | |
| src_w, src_h = width, height | |
| elif image is None: | |
| src_w, src_h = self.config.basesize, self.config.basesize | |
| elif isinstance(image, list): | |
| src_w, src_h = image[0].size | |
| else: | |
| src_w, src_h = image.size | |
| return find_best_bucket(src_h, src_w, self.config.basesize) | |
| def resize_center_crop( | |
| self, | |
| img, | |
| target_size: Tuple[int, int], | |
| ): | |
| """ | |
| Scale image to cover target_size, then center-crop. | |
| Args: | |
| img: Input PIL image or list of PIL images. | |
| target_size: (height, width) to crop to. | |
| Returns: | |
| Resized and center-cropped PIL image(s), matching the input type. | |
| """ | |
| if isinstance(img, list): | |
| return [self.resize_center_crop(i, target_size) for i in img] | |
| w, h = img.size | |
| bh, bw = target_size | |
| scale = max(bh / h, bw / w) | |
| resize_h = math.ceil(h * scale) | |
| resize_w = math.ceil(w * scale) | |
| img = img.resize((resize_w, resize_h), Image.BILINEAR) | |
| left = (resize_w - bw) // 2 | |
| top = (resize_h - bh) // 2 | |
| img = img.crop((left, top, left + bw, top + bh)) | |
| return img | |