Spaces:
Running on Zero
Running on Zero
| import gradio as gr | |
| import math | |
| import numpy as np | |
| import random | |
| import torch | |
| import spaces | |
| from PIL import Image | |
| from diffusers import QwenImageEditPlusPipeline | |
| from typing import Optional, Tuple | |
| MAX_SEED = np.iinfo(np.int32).max | |
| # --- Model Loading --- | |
| dtype = torch.bfloat16 | |
| device = "cuda" if torch.cuda.is_available() else "cpu" | |
| pipe = QwenImageEditPlusPipeline.from_pretrained( | |
| "Qwen/Qwen-Image-Edit-2511", | |
| torch_dtype=dtype | |
| ).to(device) | |
| # Load the lightning LoRA for fast inference | |
| pipe.load_lora_weights( | |
| "lightx2v/Qwen-Image-Edit-2511-Lightning", | |
| weight_name="Qwen-Image-Edit-2511-Lightning-4steps-V1.0-bf16.safetensors", | |
| adapter_name="lightning" | |
| ) | |
| # Load the color grade transfer LoRA | |
| pipe.load_lora_weights( | |
| "ovi054/QIE-2511-Color-Grade-Transfer-LoRA", | |
| weight_name="QIE-2511-Color-Grade-Transfer-LoRA.safetensors", | |
| adapter_name="color" | |
| ) | |
| pipe.set_adapters(["lightning", "color"], adapter_weights=[1.0, 1.0]) | |
| # VAE_IMAGE_SIZE must match the pipeline constant (pipeline_qwenimage_edit_plus.py line 67) | |
| _VAE_IMAGE_SIZE = 1024 * 1024 | |
| def calculate_vae_gen_size(image: Image.Image) -> tuple: | |
| """ | |
| Return (gen_w, gen_h) that exactly matches the pipeline's internal VAE | |
| conditioning scale for this image. | |
| The pipeline always resizes every input image to VAE_IMAGE_SIZE (~1MP) before | |
| VAE-encoding it into image_latents, using: | |
| vae_width, vae_height = calculate_dimensions(VAE_IMAGE_SIZE, w / h) | |
| img_shapes (used for 2-D RoPE) is built from BOTH the output size (height/width) | |
| AND the conditioning sizes (vae_width, vae_height). When they differ, the RoPE | |
| coordinate systems are misaligned → huge pixel shift. | |
| Passing gen_h/gen_w = the same 1MP-equivalent makes the output tokens and Image 1 | |
| conditioning tokens share an identical coordinate system → no shift. | |
| This is exactly what ComfyUI’s ImageScaleToTotalPixels (megapixels=1.0) achieves. | |
| """ | |
| W, H = image.size | |
| ratio = W / H | |
| gen_w = math.sqrt(_VAE_IMAGE_SIZE * ratio) | |
| gen_h = gen_w / ratio | |
| # pipeline rounds to multiples of 32 (also satisfies the ÷16 divisibility requirement) | |
| gen_w = round(gen_w / 32) * 32 | |
| gen_h = round(gen_h / 32) * 32 | |
| return int(gen_w), int(gen_h) | |
| def update_dimensions_on_upload(image: Optional[Image.Image]) -> Image.Image: | |
| """ | |
| Cap longest side to 1328px, snap to multiples of 16. | |
| Pipeline requires divisibility by vae_scale_factor * 2 = 8 * 2 = 16. | |
| Never upscales. | |
| """ | |
| if image is None: | |
| return image | |
| MAX_SIDE = 1328 | |
| original_width, original_height = image.size | |
| scale = min(MAX_SIDE / original_width, MAX_SIDE / original_height, 1.0) | |
| # Must be multiples of 16 (vae_scale_factor * 2) | |
| new_width = (int(original_width * scale) // 16) * 16 | |
| new_height = (int(original_height * scale) // 16) * 16 | |
| if (new_width, new_height) == (original_width, original_height): | |
| return image | |
| return image.resize((new_width, new_height), Image.LANCZOS) | |
| def infer( | |
| source_image: Optional[Image.Image] = None, | |
| reference_image: Optional[Image.Image] = None, | |
| seed: int = 0, | |
| randomize_seed: bool = True, | |
| true_guidance_scale: float = 1.0, | |
| num_inference_steps: int = 4, | |
| progress=gr.Progress(track_tqdm=True) | |
| ) -> Tuple[Image.Image, int]: | |
| """ | |
| Transfer color grading from a reference image onto a source image. | |
| """ | |
| if source_image is None: | |
| raise gr.Error("Please upload a source image (Image 1).") | |
| if reference_image is None: | |
| raise gr.Error("Please upload a color grade reference image (Image 2).") | |
| if randomize_seed: | |
| seed = random.randint(0, MAX_SEED) | |
| generator = torch.Generator(device=device).manual_seed(seed) | |
| src_img = source_image.convert("RGB") | |
| ref_img = reference_image.convert("RGB") | |
| # Original size — used to resize the output back at the end | |
| out_w, out_h = src_img.size | |
| # Generate at the 1MP-equivalent of Image 1’s aspect ratio. | |
| # The pipeline internally scales ALL input images to VAE_IMAGE_SIZE (~1MP) before | |
| # VAE-encoding them as conditioning latents. img_shapes (for 2-D RoPE) combines | |
| # the output size (height/width) with those conditioning sizes. If they differ, | |
| # the RoPE coordinate systems are misaligned → huge pixel shift. | |
| # Using the same 1MP formula as the pipeline eliminates the mismatch. | |
| # (ComfyUI achieves this via ImageScaleToTotalPixels at megapixels=1.0.) | |
| gen_w, gen_h = calculate_vae_gen_size(src_img) | |
| result = pipe( | |
| image=[src_img, ref_img], | |
| prompt="Transfer ONLY the color grading from Image 2 onto Image 1", | |
| height=gen_h, | |
| width=gen_w, | |
| num_inference_steps=num_inference_steps, | |
| generator=generator, | |
| true_cfg_scale=true_guidance_scale, | |
| num_images_per_prompt=1, | |
| ).images[0] | |
| # Resize output back to the original image dimensions | |
| # if result.size != (out_w, out_h): | |
| # result = result.resize((out_w, out_h), Image.LANCZOS) | |
| return (src_img, result), seed | |
| # --- UI --- | |
| css = ''' | |
| #col-container { max-width: 1000px; margin: 0 auto; } | |
| .dark .progress-text { color: white !important } | |
| #examples { max-width: 1000px; margin: 0 auto; } | |
| .image-container { min-height: 300px; } | |
| ''' | |
| with gr.Blocks() as demo: | |
| with gr.Column(elem_id="col-container"): | |
| gr.Markdown("## 🎨 Color Grade Transfer - Qwen Image Edit + LoRA") | |
| gr.Markdown(""" | |
| Transfer color grading and tones from a reference image onto your source image ✨ | |
| Using my [ovi054/Color-Grade-Transfer-LoRA](https://huggingface.co/ovi054/QIE-2511-Color-Grade-Transfer-LoRA) and 4 step inference | |
| """) | |
| with gr.Row(): | |
| with gr.Column(): | |
| with gr.Row(): | |
| source_image = gr.Image( | |
| label="Image 1 (Source — content to preserve)", | |
| type="pil", | |
| elem_classes="image-container" | |
| ) | |
| reference_image = gr.Image( | |
| label="Image 2 (Color Grade Reference)", | |
| type="pil", | |
| elem_classes="image-container" | |
| ) | |
| run_btn = gr.Button("🎨 Transfer Color Grade", variant="primary", size="lg") | |
| with gr.Accordion("Advanced Settings", open=False): | |
| seed = gr.Slider( | |
| label="Seed", | |
| minimum=0, | |
| maximum=MAX_SEED, | |
| step=1, | |
| value=0 | |
| ) | |
| randomize_seed = gr.Checkbox( | |
| label="Randomize Seed", | |
| value=True | |
| ) | |
| true_guidance_scale = gr.Slider( | |
| label="True Guidance Scale", | |
| minimum=1.0, | |
| maximum=10.0, | |
| step=0.1, | |
| value=1.0 | |
| ) | |
| num_inference_steps = gr.Slider( | |
| label="Inference Steps", | |
| minimum=1, | |
| maximum=40, | |
| step=1, | |
| value=4 | |
| ) | |
| with gr.Column(): | |
| result = gr.ImageSlider(label="Color Graded Output", interactive=False) | |
| gr.Examples( | |
| examples=[ | |
| ["images/image1.jpg", "images/image2.jpeg"], | |
| ["images/image2.jpeg","images/image1.jpg"], | |
| ], | |
| inputs=[source_image, reference_image], | |
| outputs=[result, seed], | |
| fn=infer, | |
| cache_examples=True, | |
| cache_mode="lazy", | |
| elem_id="examples" | |
| ) | |
| inputs = [ | |
| source_image, reference_image, | |
| seed, randomize_seed, true_guidance_scale, | |
| num_inference_steps, | |
| ] | |
| outputs = [result, seed] | |
| run_btn.click(fn=infer, inputs=inputs, outputs=outputs) | |
| source_image.upload( | |
| fn=update_dimensions_on_upload, | |
| inputs=[source_image], | |
| outputs=[source_image] | |
| ) | |
| reference_image.upload( | |
| fn=update_dimensions_on_upload, | |
| inputs=[reference_image], | |
| outputs=[reference_image] | |
| ) | |
| demo.launch(mcp_server=True, theme=gr.themes.Citrus(), css=css, footer_links=["api", "gradio", "settings"]) |