HAL1993's picture
Update app.py
6889682 verified
# --------------------------------------------------------------
# Qwen‑Image‑Edit‑2509 LoRA Demo – fixed‑aspect‑ratio version
# --------------------------------------------------------------
import os
import random
import numpy as np
import torch
import gradio as gr
import spaces
from PIL import Image, ImageOps
from typing import Iterable
# -------------------- THEME ---------------------------------
from gradio.themes import Soft
from gradio.themes.utils import colors, fonts, sizes
# add a custom colour
colors.steel_blue = colors.Color(
name="steel_blue",
c50="#EBF3F8",
c100="#D3E5F0",
c200="#A8CCE1",
c300="#7DB3D2",
c400="#529AC3",
c500="#4682B4",
c600="#3E72A0",
c700="#36638C",
c800="#2E5378",
c900="#264364",
c950="#1E3450",
)
class SteelBlueTheme(Soft):
def __init__(
self,
*,
primary_hue: colors.Color | str = colors.gray,
secondary_hue: colors.Color | str = colors.steel_blue,
neutral_hue: colors.Color | str = colors.slate,
text_size: sizes.Size | str = sizes.text_lg,
font: fonts.Font | str | Iterable[fonts.Font | str] = (
fonts.GoogleFont("Outfit"),
"Arial",
"sans-serif",
),
font_mono: fonts.Font | str | Iterable[fonts.Font | str] = (
fonts.GoogleFont("IBM Plex Mono"),
"ui-monospace",
"monospace",
),
):
super().__init__(
primary_hue=primary_hue,
secondary_hue=secondary_hue,
neutral_hue=neutral_hue,
text_size=text_size,
font=font,
font_mono=font_mono,
)
super().set(
background_fill_primary="*primary_50",
background_fill_primary_dark="*primary_900",
body_background_fill="linear-gradient(135deg, *primary_200, *primary_100)",
body_background_fill_dark="linear-gradient(135deg, *primary_900, *primary_800)",
button_primary_text_color="white",
button_primary_text_color_hover="white",
button_primary_background_fill="linear-gradient(90deg, *secondary_500, *secondary_600)",
button_primary_background_fill_hover="linear-gradient(90deg, *secondary_600, *secondary_700)",
button_primary_background_fill_dark="linear-gradient(90deg, *secondary_600, *secondary_800)",
button_primary_background_fill_hover_dark="linear-gradient(90deg, *secondary_500, *secondary_500)",
button_secondary_text_color="black",
button_secondary_text_color_hover="white",
button_secondary_background_fill="linear-gradient(90deg, *primary_300, *primary_300)",
button_secondary_background_fill_hover="linear-gradient(90deg, *primary_400, *primary_400)",
button_secondary_background_fill_dark="linear-gradient(90deg, *primary_500, *primary_600)",
button_secondary_background_fill_hover_dark="linear-gradient(90deg, *primary_500, *primary_500)",
slider_color="*secondary_500",
slider_color_dark="*secondary_600",
block_title_text_weight="600",
block_border_width="3px",
block_shadow="*shadow_drop_lg",
button_primary_shadow="*shadow_drop_lg",
button_large_padding="11px",
color_accent_soft="*primary_100",
block_label_background_fill="*primary_200",
)
steel_blue_theme = SteelBlueTheme()
# --------------------------------------------------------------
# Device & diagnostics
# --------------------------------------------------------------
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("CUDA_VISIBLE_DEVICES =", os.getenv("CUDA_VISIBLE_DEVICES"))
print("torch.__version__ =", torch.__version__)
print("torch.version.cuda =", torch.version.cuda)
print("cuda available? :", torch.cuda.is_available())
print("cuda device count :", torch.cuda.device_count())
if torch.cuda.is_available():
print("current device :", torch.cuda.current_device())
print("device name :", torch.cuda.get_device_name(torch.cuda.current_device()))
print("Using device:", device)
# --------------------------------------------------------------
# Load the Qwen‑Image‑Edit model + LoRA adapters
# --------------------------------------------------------------
from diffusers import FlowMatchEulerDiscreteScheduler
from qwenimage.pipeline_qwenimage_edit_plus import QwenImageEditPlusPipeline
from qwenimage.transformer_qwenimage import QwenImageTransformer2DModel
from qwenimage.qwen_fa3_processor import QwenDoubleStreamAttnProcessorFA3
dtype = torch.bfloat16
pipe = QwenImageEditPlusPipeline.from_pretrained(
"Qwen/Qwen-Image-Edit-2509",
transformer=QwenImageTransformer2DModel.from_pretrained(
"linoyts/Qwen-Image-Edit-Rapid-AIO",
subfolder="transformer",
torch_dtype=dtype,
device_map="cuda",
),
torch_dtype=dtype,
).to(device)
# ---- LoRA adapters -------------------------------------------------
pipe.load_lora_weights(
"autoweeb/Qwen-Image-Edit-2509-Photo-to-Anime",
weight_name="Qwen-Image-Edit-2509-Photo-to-Anime_000001000.safetensors",
adapter_name="anime",
)
pipe.load_lora_weights(
"dx8152/Qwen-Edit-2509-Multiple-angles",
weight_name="镜头转换.safetensors",
adapter_name="multiple-angles",
)
pipe.load_lora_weights(
"dx8152/Qwen-Image-Edit-2509-Light_restoration",
weight_name="移除光影.safetensors",
adapter_name="light-restoration",
)
pipe.load_lora_weights(
"dx8152/Qwen-Image-Edit-2509-Relight",
weight_name="Qwen-Edit-Relight.safetensors",
adapter_name="relight",
)
pipe.load_lora_weights(
"dx8152/Qwen-Edit-2509-Multi-Angle-Lighting",
weight_name="多角度灯光-251116.safetensors",
adapter_name="multi-angle-lighting",
)
pipe.load_lora_weights(
"tlennon-ie/qwen-edit-skin",
weight_name="qwen-edit-skin_1.1_000002750.safetensors",
adapter_name="edit-skin",
)
pipe.load_lora_weights(
"lovis93/next-scene-qwen-image-lora-2509",
weight_name="next-scene_lora-v2-3000.safetensors",
adapter_name="next-scene",
)
pipe.load_lora_weights(
"vafipas663/Qwen-Edit-2509-Upscale-LoRA",
weight_name="qwen-edit-enhance_64-v3_000001000.safetensors",
adapter_name="upscale-image",
)
pipe.transformer.set_attn_processor(QwenDoubleStreamAttnProcessorFA3())
# --------------------------------------------------------------
# Small memory / speed tweaks (no quality loss)
# --------------------------------------------------------------
if torch.cuda.is_available():
# split attention into smaller chunks → less peak memory
pipe.enable_attention_slicing()
# try the fast xFormers kernel if it is installed
try:
pipe.enable_xformers_memory_efficient_attention()
except Exception as e:
print("xFormers not available:", e)
# The safety‑checker is not needed for this demo → disable it
pipe.safety_checker = None
# --------------------------------------------------------------
# Helper – keep aspect ratio, pad to a size accepted by the model
# --------------------------------------------------------------
MAX_SIDE = 1024 # longest side we allow (model limit)
DIVISIBLE_BY = 8 # all dimensions must be a multiple of 8
def _make_multiple(x: int, base: int = DIVISIBLE_BY) -> int:
"""Round *down* to the nearest multiple of `base`."""
return (x // base) * base
def prepare_image_for_pipe(pil_img: Image.Image):
"""
1️⃣ Resize the longer side to ``MAX_SIDE`` while preserving aspect‑ratio.
2️⃣ Pad the resized image (black) so both dimensions become multiples of 8.
3️⃣ Return the padded image **and** the crop‑box that lets us recover the original
aspect‑ratio after generation.
"""
w, h = pil_img.size
if max(w, h) > MAX_SIDE:
if w >= h: # wide image
new_w = MAX_SIDE
new_h = int(h * MAX_SIDE / w)
else: # tall image
new_h = MAX_SIDE
new_w = int(w * MAX_SIDE / h)
else:
new_w, new_h = w, h
resized = pil_img.resize((new_w, new_h), Image.LANCZOS)
pad_w = _make_multiple(new_w)
pad_h = _make_multiple(new_h)
padded = ImageOps.pad(resized, (pad_w, pad_h), method=Image.LANCZOS, color=(0, 0, 0))
left = (pad_w - new_w) // 2
top = (pad_h - new_h) // 2
crop_box = (left, top, left + new_w, top + new_h)
return padded, crop_box, (new_w, new_h) # padded img, crop box, size after resize
def crop_back_to_original(gen_img: Image.Image, crop_box, final_size):
"""
1️⃣ Crop the generation to the region that corresponds to the *resized*
original picture.
2️⃣ Resize that crop back to the exact dimensions the user uploaded.
"""
cropped = gen_img.crop(crop_box)
return cropped.resize(final_size, Image.LANCZOS)
# --------------------------------------------------------------
# Inference function (GPU‑bound)
# --------------------------------------------------------------
MAX_SEED = np.iinfo(np.int32).max
# Reduce the reservation time – 15 seconds is plenty for a 13‑second run.
@spaces.GPU(duration=15)
def infer(
input_image,
prompt,
lora_adapter,
seed,
randomize_seed,
guidance_scale,
steps,
progress=gr.Progress(track_tqdm=True),
):
"""Run a single edit – returns the edited image with the original aspect‑ratio."""
if input_image is None:
raise gr.Error("Please upload an image to edit.")
# ---------- LoRA ----------
adapter_map = {
"Photo-to-Anime": ["anime"],
"Multiple-Angles": ["multiple-angles"],
"Light-Restoration": ["light-restoration"],
"Relight": ["relight"],
"Multi-Angle-Lighting":["multi-angle-lighting"],
"Edit-Skin": ["edit-skin"],
"Next-Scene": ["next-scene"],
"Upscale-Image": ["upscale-image"],
}
pipe.set_adapters(adapter_map.get(lora_adapter, []), adapter_weights=[1.0])
# ---------- Seed ----------
if randomize_seed:
seed = random.randint(0, MAX_SEED)
generator = torch.Generator(device=device).manual_seed(seed)
# ---------- Prompt ----------
negative_prompt = (
"worst quality, low quality, bad anatomy, bad hands, text, error, "
"missing fingers, extra digit, fewer digits, cropped, jpeg artifacts, "
"signature, watermark, username, blurry"
)
# ---------- Image ----------
original = input_image.convert("RGB")
padded, crop_box, _ = prepare_image_for_pipe(original)
# ---------- Diffusion (no grad tracking) ----------
with torch.no_grad():
result = pipe(
image=padded,
prompt=prompt,
negative_prompt=negative_prompt,
height=padded.height,
width=padded.width,
num_inference_steps=steps,
generator=generator,
true_cfg_scale=guidance_scale,
).images[0]
# ---------- Recover original aspect‑ratio ----------
final = crop_back_to_original(result, crop_box, original.size)
# free GPU memory for the next request
torch.cuda.empty_cache()
return final, seed
# --------------------------------------------------------------
# Example helper (deterministic quick run)
# --------------------------------------------------------------
@spaces.GPU(duration=15)
def infer_example(input_image, prompt, lora_adapter):
"""Runs a quick example – 4 steps, guidance 1.0, random seed."""
return infer(
input_image,
prompt,
lora_adapter,
seed=0,
randomize_seed=True,
guidance_scale=1.0,
steps=4,
)
# --------------------------------------------------------------
# UI
# --------------------------------------------------------------
css = """
#col-container {margin: 0 auto; max-width: 960px;}
#main-title h1 {font-size: 2.1em !important;}
"""
with gr.Blocks() as demo:
with gr.Column(elem_id="col-container"):
gr.Markdown("# **Qwen‑Image‑Edit‑2509 LoRAs – Fixed Aspect Ratio**",
elem_id="main-title")
gr.Markdown(
"Edit images with a variety of LoRA adapters while preserving the "
"original aspect‑ratio (no unexpected cropping)."
)
with gr.Row(equal_height=True):
# ---------- left column ----------
with gr.Column():
input_image = gr.Image(
label="Upload Image",
type="pil",
height=290,
)
prompt = gr.Textbox(
label="Edit Prompt",
placeholder="e.g. transform into anime…",
)
run_button = gr.Button("Edit Image", variant="primary")
# ---------- right column ----------
with gr.Column():
output_image = gr.Image(
label="Output Image",
interactive=False,
format="png",
height=353,
)
lora_adapter = gr.Dropdown(
label="Choose Editing Style",
choices=[
"Photo-to-Anime", "Multiple-Angles", "Light-Restoration",
"Multi-Angle-Lighting", "Upscale-Image", "Relight",
"Next-Scene", "Edit-Skin",
],
value="Photo-to-Anime",
)
with gr.Accordion("Advanced Settings", open=False):
seed = gr.Slider(
label="Seed",
minimum=0,
maximum=MAX_SEED,
step=1,
value=0,
)
randomize_seed = gr.Checkbox(
label="Randomize Seed",
value=True,
)
guidance_scale = gr.Slider(
label="Guidance Scale",
minimum=1.0,
maximum=10.0,
step=0.1,
value=1.0,
)
steps = gr.Slider(
label="Inference Steps",
minimum=1,
maximum=50,
step=1,
value=4,
)
# ---------- examples ----------
gr.Examples(
examples=[
["examples/1.jpg", "Transform into anime.", "Photo-to-Anime"],
["examples/5.jpg", "Remove shadows and relight the image using soft lighting.", "Light-Restoration"],
["examples/4.jpg", "Use a subtle golden‑hour filter with smooth light diffusion.", "Relight"],
["examples/2.jpeg", "Rotate the camera 45 degrees to the left.", "Multiple-Angles"],
["examples/7.jpg", "Light source from the Right Rear", "Multi-Angle-Lighting"],
["examples/10.jpeg", "Upscale the image.", "Upscale-Image"],
["examples/7.jpg", "Light source from the Below", "Multi-Angle-Lighting"],
["examples/2.jpeg", "Switch the camera to a top‑down right corner view.", "Multiple-Angles"],
["examples/9.jpg", "The camera moves slightly forward as sunlight breaks through the clouds, casting a soft glow around the character's silhouette in the mist. Realistic cinematic style, atmospheric depth.", "Next-Scene"],
["examples/8.jpg", "Make the subjects skin details more prominent and natural.", "Edit-Skin"],
["examples/6.jpg", "Switch the camera to a bottom‑up view.", "Multiple-Angles"],
["examples/6.jpg", "Rotate the camera 180 degrees upside down.", "Multiple-Angles"],
["examples/4.jpg", "Rotate the camera 45 degrees to the right.", "Multiple-Angles"],
["examples/4.jpg", "Switch the camera to a top‑down view.", "Multiple-Angles"],
["examples/4.jpg", "Switch the camera to a wide‑angle lens.", "Multiple-Angles"],
],
inputs=[input_image, prompt, lora_adapter],
outputs=[output_image, seed],
fn=infer_example,
cache_examples=False,
label="Examples",
)
# ---------- button ----------
run_button.click(
fn=infer,
inputs=[
input_image,
prompt,
lora_adapter,
seed,
randomize_seed,
guidance_scale,
steps,
],
outputs=[output_image, seed],
)
if __name__ == "__main__":
demo.queue(max_size=30).launch(
css=css,
theme=steel_blue_theme,
mcp_server=True,
ssr_mode=False,
show_error=True,
)