Spaces:

prithivMLmods
/

Qwen-Image-Diffusion

Running on Zero

File size: 11,789 Bytes

import gradio as gr
import numpy as np
import random
import torch
import spaces
import os
from PIL import Image

from diffusers import FlowMatchEulerDiscreteScheduler
from optimization import optimize_pipeline_
from qwenimage.pipeline_qwenimage_edit_plus import QwenImageEditPlusPipeline
from qwenimage.transformer_qwenimage import QwenImageTransformer2DModel
from qwenimage.qwen_fa3_processor import QwenDoubleStreamAttnProcessorFA3

from gradio.themes import Soft
from gradio.themes.utils import colors, fonts, sizes

colors.steel_blue = colors.Color(
    name="steel_blue",
    c50="#EBF3F8",
    c100="#D3E5F0",
    c200="#A8CCE1",
    c300="#7DB3D2",
    c400="#529AC3",
    c500="#4682B4",
    c600="#3E72A0",
    c700="#36638C",
    c800="#2E5378",
    c900="#264364",
    c950="#1E3450",
)

class SteelBlueTheme(Soft):
    def __init__(
        self,
        *,
        primary_hue: colors.Color | str = colors.gray,
        secondary_hue: colors.Color | str = colors.steel_blue,
        neutral_hue: colors.Color | str = colors.slate,
        text_size: sizes.Size | str = sizes.text_lg,
        font: fonts.Font | str | Iterable[fonts.Font | str] = (
            fonts.GoogleFont("Outfit"), "Arial", "sans-serif",
        ),
        font_mono: fonts.Font | str | Iterable[fonts.Font | str] = (
            fonts.GoogleFont("IBM Plex Mono"), "ui-monospace", "monospace",
        ),
    ):
        super().__init__(
            primary_hue=primary_hue,
            secondary_hue=secondary_hue,
            neutral_hue=neutral_hue,
            text_size=text_size,
            font=font,
            font_mono=font_mono,
        )
        super().set(
            background_fill_primary="*primary_50",
            background_fill_primary_dark="*primary_900",
            body_background_fill="linear-gradient(135deg, *primary_200, *primary_100)",
            body_background_fill_dark="linear-gradient(135deg, *primary_900, *primary_800)",
            button_primary_text_color="white",
            button_primary_text_color_hover="white",
            button_primary_background_fill="linear-gradient(90deg, *secondary_500, *secondary_600)",
            button_primary_background_fill_hover="linear-gradient(90deg, *secondary_600, *secondary_700)",
            button_primary_background_fill_dark="linear-gradient(90deg, *secondary_600, *secondary_800)",
            button_primary_background_fill_hover_dark="linear-gradient(90deg, *secondary_500, *secondary_500)",
            button_secondary_text_color="black",
            button_secondary_text_color_hover="white",
            button_secondary_background_fill="linear-gradient(90deg, *primary_300, *primary_300)",
            button_secondary_background_fill_hover="linear-gradient(90deg, *primary_400, *primary_400)",
            button_secondary_background_fill_dark="linear-gradient(90deg, *primary_500, *primary_600)",
            button_secondary_background_fill_hover_dark="linear-gradient(90deg, *primary_500, *primary_500)",
            slider_color="*secondary_500",
            slider_color_dark="*secondary_600",
            block_title_text_weight="600",
            block_border_width="3px",
            block_shadow="*shadow_drop_lg",
            button_primary_shadow="*shadow_drop_lg",
            button_large_padding="11px",
            color_accent_soft="*primary_100",
            block_label_background_fill="*primary_200",
        )

steel_blue_theme = SteelBlueTheme()

# --- Constants and Setup ---
MAX_SEED = np.iinfo(np.int32).max
dtype = torch.bfloat16
device = "cuda" if torch.cuda.is_available() else "cpu"

# --- Model Loading ---
# Load the base pipeline and the optimized transformer
pipe = QwenImageEditPlusPipeline.from_pretrained(
    "Qwen/Qwen-Image-Edit-2509", 
    transformer=QwenImageTransformer2DModel.from_pretrained(
        "linoyts/Qwen-Image-Edit-Rapid-AIO", 
        subfolder='transformer',
        torch_dtype=dtype,
        device_map='cuda'
    ),
    torch_dtype=dtype
).to(device)

# Load all LoRA adapters with unique names
pipe.load_lora_weights(
    "dx8152/Qwen-Image-Edit-2509-Light_restoration", 
    weight_name="移除光影.safetensors", 
    adapter_name="light_restoration"
)
pipe.load_lora_weights(
    "dx8152/Qwen-Edit-2509-Multiple-angles",
    weight_name="镜头转换.safetensors",
    adapter_name="multiple_angles"
)
pipe.load_lora_weights(
    "autoweeb/Qwen-Image-Edit-2509-Photo-to-Anime",
    weight_name="Qwen-Image-Edit-2509-Photo-to-Anime_000001000.safetensors",
    adapter_name="photo_to_anime"
)
pipe.load_lora_weights(
    "dx8152/Qwen-Image-Edit-2509-Relight",
    weight_name="Qwen-Edit-Relight.safetensors",
    adapter_name="relight"
)

# Apply optimizations
pipe.transformer.__class__ = QwenImageTransformer2DModel
pipe.transformer.set_attn_processor(QwenDoubleStreamAttnProcessorFA3())
optimize_pipeline_(pipe, image=[Image.new("RGB", (1024, 1024)), Image.new("RGB", (1024, 1024))], prompt="prompt")


# --- Inference Logic ---
@spaces.GPU
def infer(
    image,
    prompt,
    lora_adapter,
    seed,
    randomize_seed,
    true_guidance_scale,
    num_inference_steps,
    height,
    width,
    progress=gr.Progress(track_tqdm=True)
):
    if image is None:
        raise gr.Error("Please upload an image to get started.")
    
    # Set the active LoRA adapter based on user selection
    if lora_adapter == "Shadow/Light Restoration":
        pipe.set_adapters(["light_restoration"], adapter_weights=[1.0])
    elif lora_adapter == "Multiple Angles":
        pipe.set_adapters(["multiple_angles"], adapter_weights=[1.0])
    elif lora_adapter == "Photo to Anime":
        pipe.set_adapters(["photo_to_anime"], adapter_weights=[1.0])
    elif lora_adapter == "Advanced Relighting":
        pipe.set_adapters(["relight"], adapter_weights=[1.0])

    if randomize_seed:
        seed = random.randint(0, MAX_SEED)
    generator = torch.Generator(device=device).manual_seed(seed)

    result = pipe(
        image=image.convert("RGB"),
        prompt=prompt,
        height=height,
        width=width,
        num_inference_steps=num_inference_steps,
        generator=generator,
        true_cfg_scale=true_guidance_scale,
        num_images_per_prompt=1,
    ).images[0]

    return result, seed

# --- UI Helper Functions ---
def update_dimensions_on_upload(image):
    """Adjusts the height and width sliders to match the uploaded image's aspect ratio."""
    if image is None:
        return 1024, 1024
    
    original_width, original_height = image.size
    
    if original_width > original_height:
        new_width = 1024
        new_height = int(new_width * (original_height / original_width))
    else:
        new_height = 1024
        new_width = int(new_height * (original_width / original_height))
        
    # Ensure dimensions are multiples of 8 for model compatibility
    new_width = (new_width // 8) * 8
    new_height = (new_height // 8) * 8
    
    return new_width, new_height

def update_prompt_on_adapter_change(adapter_name):
    """Provides a suggested prompt when a new adapter is selected."""
    prompts = {
        "Shadow/Light Restoration": "Remove shadows and relight the image using soft lighting.",
        "Multiple Angles": "A photo of the scene from a top-down view.",
        "Photo to Anime": "Transform into anime, masterpiece, best quality.",
        "Advanced Relighting": "Relight the image using soft, diffused lighting that simulates sunlight filtering through curtains."
    }
    return prompts.get(adapter_name, "")

# --- Gradio UI ---
css = '''
#col-container { 
    max-width: 960px; 
    margin: 0 auto; 
    font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, Helvetica, Arial, sans-serif;
}
.dark .progress-text { color: white !important }
#examples { max-width: 960px; margin: 0 auto; }
.gradio-container {
    background: linear-gradient(135deg, #f5f7fa 0%, #c3cfe2 100%);
}
.gr-button-primary {
    background: linear-gradient(135deg, #667eea 0%, #764ba2 100%) !important;
    border: none !important;
    border-radius: 12px !important;
    padding: 12px 24px !important;
    font-weight: 600 !important;
}
.gr-box {
    border-radius: 16px !important;
    box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1) !important;
}
'''

with gr.Blocks(theme=steel_blue_theme, css=css) as demo:
    with gr.Column(elem_id="col-container"):
        gr.Markdown("# Qwen Image Edit - Fast LoRA")
        with gr.Row():
            with gr.Column(scale=1):
                image = gr.Image(label="Upload Image", type="pil", height=450)
                
                lora_adapter = gr.Dropdown(
                    label="Choose an Editing Tool",
                    choices=[
                        "Shadow/Light Restoration", 
                        "Multiple Angles", 
                        "Photo to Anime", 
                        "Advanced Relighting"
                    ],
                    value="Shadow/Light Restoration"
                )

                prompt = gr.Textbox(
                    label="Prompt", 
                    value="Remove shadows and relight the image using soft lighting.",
                    lines=2
                )
                
                run_btn = gr.Button("Generate", variant="primary", size="lg")

                with gr.Accordion("⚙️ Advanced Settings", open=False):
                    seed = gr.Slider(label="Seed", minimum=0, maximum=MAX_SEED, step=1, value=0)
                    randomize_seed = gr.Checkbox(label="Randomize Seed", value=True)
                    true_guidance_scale = gr.Slider(label="Guidance Scale", minimum=1.0, maximum=10.0, step=0.1, value=1.0)
                    num_inference_steps = gr.Slider(label="Inference Steps", minimum=1, maximum=40, step=1, value=4)
                    height = gr.Slider(label="Height", minimum=256, maximum=2048, step=8, value=1024)
                    width = gr.Slider(label="Width", minimum=256, maximum=2048, step=8, value=1024)

            with gr.Column(scale=1):
                result = gr.Image(label="Output Image", interactive=False, height=500, format="png")

        gr.Examples(
            elem_id="examples",
            examples=[
                [
                    "examples/example1.png",
                    "A photo of the scene from a low angle shot.",
                    "Multiple Angles",
                ],
                [
                    "examples/example2.png",
                    "Remove shadows and relight the image using soft lighting.",
                    "Shadow/Light Restoration",
                ],
                [
                    "examples/example3.png",
                    "Transform into anime, masterpiece, best quality, girl with cherry blossoms.",
                    "Photo to Anime",
                ],
                [
                    "examples/example4.png",
                    "Relight the image using soft, diffused lighting that simulates sunlight filtering through curtains.",
                    "Advanced Relighting",
                ],
            ],
            inputs=[image, prompt, lora_adapter],
            outputs=[result, seed],
            fn=infer,
            cache_examples=False
        )
            
    # --- Event Handlers ---
    run_btn.click(
        fn=infer, 
        inputs=[image, prompt, lora_adapter, seed, randomize_seed, true_guidance_scale, num_inference_steps, height, width], 
        outputs=[result, seed]
    )
    
    image.upload(
        fn=update_dimensions_on_upload,
        inputs=[image],
        outputs=[width, height]
    )
    
    lora_adapter.change(
        fn=update_prompt_on_adapter_change,
        inputs=[lora_adapter],
        outputs=[prompt]
    )

demo.launch(mcp_server=True, ssr_mode=False, show_error=True)