File size: 4,950 Bytes
b8c861f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
# Copyright 2025 The JoyImage Team and The HuggingFace Team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import math
from typing import Tuple

from PIL import Image

from ...configuration_utils import register_to_config
from ...image_processor import VaeImageProcessor


# fmt: off
BUCKETS = {
    1024: [
        (512, 1792), (512, 1856), (512, 1920), (512, 1984), (512, 2048),
        (576, 1600), (576, 1664), (576, 1728), (576, 1792),
        (640, 1472), (640, 1536), (640, 1600),
        (704, 1344), (704, 1408), (704, 1472),
        (768, 1216), (768, 1280), (768, 1344),
        (832, 1152), (832, 1216),
        (896, 1088), (896, 1152),
        (960, 1024), (960, 1088),
        (1024, 960), (1024, 1024),
        (1088, 896), (1088, 960),
        (1152, 832), (1152, 896),
        (1216, 768), (1216, 832),
        (1280, 768),
        (1344, 704), (1344, 768),
        (1408, 704),
        (1472, 640), (1472, 704),
        (1536, 640),
        (1600, 576), (1600, 640),
        (1664, 576),
        (1728, 576),
        (1792, 512), (1792, 576),
        (1856, 512),
        (1920, 512),
        (1984, 512),
        (2048, 512),
    ],
}
# fmt: on


def find_best_bucket(height: int, width: int, basesize: int) -> Tuple[int, int]:
    """Return the (h, w) bucket whose aspect ratio is closest to height/width."""
    target_ratio = height / width
    return min(
        BUCKETS[basesize],
        key=lambda hw: abs(hw[0] / hw[1] - target_ratio),
    )


class JoyImageEditImageProcessor(VaeImageProcessor):
    """
    Image processor for the JoyImage Edit pipeline.

    Handles bucket-based resolution selection and resize-center-crop preprocessing.

    Args:
        do_resize (`bool`, *optional*, defaults to `True`):
            Whether to resize the image.
        vae_scale_factor (`int`, *optional*, defaults to `8`):
            VAE spatial scale factor.
        basesize (`int`, *optional*, defaults to `1024`):
            Base resolution for bucket generation.
        resample (`str`, *optional*, defaults to `bilinear`):
            Resampling filter for resizing.
        do_normalize (`bool`, *optional*, defaults to `True`):
            Whether to normalize the image to [-1,1].
        do_binarize (`bool`, *optional*, defaults to `False`):
            Whether to binarize the image to 0/1.
        do_convert_rgb (`bool`, *optional*, defaults to `False`):
            Whether to convert the images to RGB format.
        do_convert_grayscale (`bool`, *optional*, defaults to `False`):
            Whether to convert the images to grayscale format.
    """

    @register_to_config
    def __init__(
        self,
        do_resize: bool = True,
        vae_scale_factor: int = 8,
        basesize: int = 1024,
        resample: str = "bilinear",
        do_normalize: bool = True,
        do_binarize: bool = False,
        do_convert_rgb: bool = False,
        do_convert_grayscale: bool = False,
    ):
        super().__init__()

    def get_default_height_width(
        self,
        image: Image.Image,
        height: int | None = None,
        width: int | None = None,
    ) -> Tuple[int, int]:
        if height is not None and width is not None:
            src_w, src_h = width, height
        elif image is None:
            src_w, src_h = self.config.basesize, self.config.basesize
        elif isinstance(image, list):
            src_w, src_h = image[0].size
        else:
            src_w, src_h = image.size

        return find_best_bucket(src_h, src_w, self.config.basesize)

    def resize_center_crop(
        self,
        img,
        target_size: Tuple[int, int],
    ):
        """
        Scale image to cover target_size, then center-crop.

        Args:
            img: Input PIL image or list of PIL images.
            target_size: (height, width) to crop to.

        Returns:
            Resized and center-cropped PIL image(s), matching the input type.
        """
        if isinstance(img, list):
            return [self.resize_center_crop(i, target_size) for i in img]

        w, h = img.size
        bh, bw = target_size
        scale = max(bh / h, bw / w)
        resize_h = math.ceil(h * scale)
        resize_w = math.ceil(w * scale)
        img = img.resize((resize_w, resize_h), Image.BILINEAR)
        left = (resize_w - bw) // 2
        top = (resize_h - bh) // 2
        img = img.crop((left, top, left + bw, top + bh))
        return img