ovi054 commited on
Commit
8a79fb3
·
verified ·
1 Parent(s): c86dd60

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +246 -0
app.py ADDED
@@ -0,0 +1,246 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import math
3
+ import numpy as np
4
+ import random
5
+ import torch
6
+ import spaces
7
+
8
+ from PIL import Image
9
+ from diffusers import QwenImageEditPlusPipeline
10
+ from typing import Optional, Tuple
11
+
12
+ MAX_SEED = np.iinfo(np.int32).max
13
+
14
+ # --- Model Loading ---
15
+ dtype = torch.bfloat16
16
+ device = "cuda" if torch.cuda.is_available() else "cpu"
17
+
18
+ pipe = QwenImageEditPlusPipeline.from_pretrained(
19
+ "Qwen/Qwen-Image-Edit-2511",
20
+ torch_dtype=dtype
21
+ ).to(device)
22
+
23
+ # Load the lightning LoRA for fast inference
24
+ pipe.load_lora_weights(
25
+ "lightx2v/Qwen-Image-Edit-2511-Lightning",
26
+ weight_name="Qwen-Image-Edit-2511-Lightning-4steps-V1.0-bf16.safetensors",
27
+ adapter_name="lightning"
28
+ )
29
+
30
+ # Load the color grade transfer LoRA
31
+ pipe.load_lora_weights(
32
+ "ovi054/QIE-2511-Color-Grade-Transfer-LoRA",
33
+ weight_name="QIE-2511-Color-Grade-Transfer-LoRA.safetensors",
34
+ adapter_name="color"
35
+ )
36
+
37
+ pipe.set_adapters(["lightning", "color"], adapter_weights=[1.0, 1.0])
38
+
39
+
40
+ # VAE_IMAGE_SIZE must match the pipeline constant (pipeline_qwenimage_edit_plus.py line 67)
41
+ _VAE_IMAGE_SIZE = 1024 * 1024
42
+
43
+
44
+ def calculate_vae_gen_size(image: Image.Image) -> tuple:
45
+ """
46
+ Return (gen_w, gen_h) that exactly matches the pipeline's internal VAE
47
+ conditioning scale for this image.
48
+
49
+ The pipeline always resizes every input image to VAE_IMAGE_SIZE (~1MP) before
50
+ VAE-encoding it into image_latents, using:
51
+ vae_width, vae_height = calculate_dimensions(VAE_IMAGE_SIZE, w / h)
52
+
53
+ img_shapes (used for 2-D RoPE) is built from BOTH the output size (height/width)
54
+ AND the conditioning sizes (vae_width, vae_height). When they differ, the RoPE
55
+ coordinate systems are misaligned → huge pixel shift.
56
+
57
+ Passing gen_h/gen_w = the same 1MP-equivalent makes the output tokens and Image 1
58
+ conditioning tokens share an identical coordinate system → no shift.
59
+ This is exactly what ComfyUI’s ImageScaleToTotalPixels (megapixels=1.0) achieves.
60
+ """
61
+ W, H = image.size
62
+ ratio = W / H
63
+ gen_w = math.sqrt(_VAE_IMAGE_SIZE * ratio)
64
+ gen_h = gen_w / ratio
65
+ # pipeline rounds to multiples of 32 (also satisfies the ÷16 divisibility requirement)
66
+ gen_w = round(gen_w / 32) * 32
67
+ gen_h = round(gen_h / 32) * 32
68
+ return int(gen_w), int(gen_h)
69
+
70
+
71
+
72
+ def update_dimensions_on_upload(image: Optional[Image.Image]) -> Image.Image:
73
+ """
74
+ Cap longest side to 1328px, snap to multiples of 16.
75
+ Pipeline requires divisibility by vae_scale_factor * 2 = 8 * 2 = 16.
76
+ Never upscales.
77
+ """
78
+ if image is None:
79
+ return image
80
+
81
+ MAX_SIDE = 1328
82
+
83
+ original_width, original_height = image.size
84
+ scale = min(MAX_SIDE / original_width, MAX_SIDE / original_height, 1.0)
85
+
86
+ # Must be multiples of 16 (vae_scale_factor * 2)
87
+ new_width = (int(original_width * scale) // 16) * 16
88
+ new_height = (int(original_height * scale) // 16) * 16
89
+
90
+ if (new_width, new_height) == (original_width, original_height):
91
+ return image
92
+
93
+ return image.resize((new_width, new_height), Image.LANCZOS)
94
+
95
+
96
+ @spaces.GPU
97
+ def infer(
98
+ source_image: Optional[Image.Image] = None,
99
+ reference_image: Optional[Image.Image] = None,
100
+ seed: int = 0,
101
+ randomize_seed: bool = True,
102
+ true_guidance_scale: float = 1.0,
103
+ num_inference_steps: int = 4,
104
+ progress=gr.Progress(track_tqdm=True)
105
+ ) -> Tuple[Image.Image, int]:
106
+ """
107
+ Transfer color grading from a reference image onto a source image.
108
+ """
109
+ if source_image is None:
110
+ raise gr.Error("Please upload a source image (Image 1).")
111
+ if reference_image is None:
112
+ raise gr.Error("Please upload a color grade reference image (Image 2).")
113
+
114
+ if randomize_seed:
115
+ seed = random.randint(0, MAX_SEED)
116
+ generator = torch.Generator(device=device).manual_seed(seed)
117
+
118
+ src_img = source_image.convert("RGB")
119
+ ref_img = reference_image.convert("RGB")
120
+
121
+ # Original size — used to resize the output back at the end
122
+ out_w, out_h = src_img.size
123
+
124
+ # Generate at the 1MP-equivalent of Image 1’s aspect ratio.
125
+ # The pipeline internally scales ALL input images to VAE_IMAGE_SIZE (~1MP) before
126
+ # VAE-encoding them as conditioning latents. img_shapes (for 2-D RoPE) combines
127
+ # the output size (height/width) with those conditioning sizes. If they differ,
128
+ # the RoPE coordinate systems are misaligned → huge pixel shift.
129
+ # Using the same 1MP formula as the pipeline eliminates the mismatch.
130
+ # (ComfyUI achieves this via ImageScaleToTotalPixels at megapixels=1.0.)
131
+ gen_w, gen_h = calculate_vae_gen_size(src_img)
132
+
133
+ result = pipe(
134
+ image=[src_img, ref_img],
135
+ prompt="Transfer ONLY the color grading from Image 2 onto Image 1",
136
+ height=gen_h,
137
+ width=gen_w,
138
+ num_inference_steps=num_inference_steps,
139
+ generator=generator,
140
+ true_cfg_scale=true_guidance_scale,
141
+ num_images_per_prompt=1,
142
+ ).images[0]
143
+
144
+ # Resize output back to the original image dimensions
145
+ # if result.size != (out_w, out_h):
146
+ # result = result.resize((out_w, out_h), Image.LANCZOS)
147
+
148
+ return result, seed
149
+
150
+
151
+ # --- UI ---
152
+ css = '''
153
+ #col-container { max-width: 1000px; margin: 0 auto; }
154
+ .dark .progress-text { color: white !important }
155
+ #examples { max-width: 1000px; margin: 0 auto; }
156
+ .image-container { min-height: 300px; }
157
+ '''
158
+
159
+ with gr.Blocks() as demo:
160
+ with gr.Column(elem_id="col-container"):
161
+ gr.Markdown("## 🎨 Color Grade Transfer - Qwen Image Edit + LoRA")
162
+ gr.Markdown("""
163
+ Transfer color grading and tones from a reference image onto your source image ✨
164
+ Using my [ovi054/Color-Grade-Transfer-LoRA](https://huggingface.co/ovi054/QIE-2511-Color-Grade-Transfer-LoRA) and 4 step inference
165
+ """)
166
+
167
+ with gr.Row():
168
+ with gr.Column():
169
+ with gr.Row():
170
+ source_image = gr.Image(
171
+ label="Image 1 (Source — content to preserve)",
172
+ type="pil",
173
+ elem_classes="image-container"
174
+ )
175
+ reference_image = gr.Image(
176
+ label="Image 2 (Color Grade Reference)",
177
+ type="pil",
178
+ elem_classes="image-container"
179
+ )
180
+
181
+ run_btn = gr.Button("🎨 Transfer Color Grade", variant="primary", size="lg")
182
+
183
+ with gr.Accordion("Advanced Settings", open=False):
184
+ seed = gr.Slider(
185
+ label="Seed",
186
+ minimum=0,
187
+ maximum=MAX_SEED,
188
+ step=1,
189
+ value=0
190
+ )
191
+ randomize_seed = gr.Checkbox(
192
+ label="Randomize Seed",
193
+ value=True
194
+ )
195
+ true_guidance_scale = gr.Slider(
196
+ label="True Guidance Scale",
197
+ minimum=1.0,
198
+ maximum=10.0,
199
+ step=0.1,
200
+ value=1.0
201
+ )
202
+ num_inference_steps = gr.Slider(
203
+ label="Inference Steps",
204
+ minimum=1,
205
+ maximum=40,
206
+ step=1,
207
+ value=4
208
+ )
209
+
210
+ with gr.Column():
211
+ result = gr.Image(label="Color Graded Output", interactive=False)
212
+
213
+ gr.Examples(
214
+ examples=[
215
+ ["images/image1.jpg", "images/image2.jpeg"],
216
+ ["images/image2.jpeg","images/image1.jpg"],
217
+ ],
218
+ inputs=[source_image, reference_image],
219
+ outputs=[result, seed],
220
+ fn=infer,
221
+ cache_examples=True,
222
+ cache_mode="lazy",
223
+ elem_id="examples"
224
+ )
225
+
226
+ inputs = [
227
+ source_image, reference_image,
228
+ seed, randomize_seed, true_guidance_scale,
229
+ num_inference_steps,
230
+ ]
231
+ outputs = [result, seed]
232
+
233
+ run_btn.click(fn=infer, inputs=inputs, outputs=outputs)
234
+
235
+ source_image.upload(
236
+ fn=update_dimensions_on_upload,
237
+ inputs=[source_image],
238
+ outputs=[source_image]
239
+ )
240
+ reference_image.upload(
241
+ fn=update_dimensions_on_upload,
242
+ inputs=[reference_image],
243
+ outputs=[reference_image]
244
+ )
245
+
246
+ demo.launch(mcp_server=True, theme=gr.themes.Citrus(), css=css, footer_links=["api", "gradio", "settings"])