Spaces:

prithivMLmods
/

Qwen-Image-Diffusion

Running on Zero

App Files Files Community

Qwen-Image-Diffusion / app.py

prithivMLmods

Update app.py

affd5b9 verified about 1 month ago

raw

history blame

11 kB

	import os
	import gradio as gr
	import numpy as np
	import spaces
	import torch
	import random
	from PIL import Image
	from typing import Iterable

	# --- Gradio Theme ---
	from gradio.themes import Soft
	from gradio.themes.utils import colors, fonts, sizes

	colors.steel_blue = colors.Color(
	name="steel_blue",
	c50="#EBF3F8",
	c100="#D3E5F0",
	c200="#A8CCE1",
	c300="#7DB3D2",
	c400="#529AC3",
	c500="#4682B4",
	c600="#3E72A0",
	c700="#36638C",
	c800="#2E5378",
	c900="#264364",
	c950="#1E3450",
	)

	class SteelBlueTheme(Soft):
	def __init__(
	self,
	*,
	primary_hue: colors.Color \| str = colors.gray,
	secondary_hue: colors.Color \| str = colors.steel_blue,
	neutral_hue: colors.Color \| str = colors.slate,
	text_size: sizes.Size \| str = sizes.text_lg,
	font: fonts.Font \| str \| Iterable[fonts.Font \| str] = (
	fonts.GoogleFont("Outfit"), "Arial", "sans-serif",
	),
	font_mono: fonts.Font \| str \| Iterable[fonts.Font \| str] = (
	fonts.GoogleFont("IBM Plex Mono"), "ui-monospace", "monospace",
	),
	):
	super().__init__(
	primary_hue=primary_hue,
	secondary_hue=secondary_hue,
	neutral_hue=neutral_hue,
	text_size=text_size,
	font=font,
	font_mono=font_mono,
	)
	super().set(
	background_fill_primary="*primary_50",
	background_fill_primary_dark="*primary_900",
	body_background_fill="linear-gradient(135deg, primary_200, primary_100)",
	body_background_fill_dark="linear-gradient(135deg, primary_900, primary_800)",
	button_primary_text_color="white",
	button_primary_text_color_hover="white",
	button_primary_background_fill="linear-gradient(90deg, secondary_500, secondary_600)",
	button_primary_background_fill_hover="linear-gradient(90deg, secondary_600, secondary_700)",
	slider_color="*secondary_500",
	slider_color_dark="*secondary_600",
	block_title_text_weight="600",
	block_border_width="3px",
	block_shadow="*shadow_drop_lg",
	)

	steel_blue_theme = SteelBlueTheme()

	# --- Model Loading ---
	from diffusers import FlowMatchEulerDiscreteScheduler
	# from optimization import optimize_pipeline_ # Assuming this is a custom file
	from qwenimage.pipeline_qwenimage_edit_plus import QwenImageEditPlusPipeline
	from qwenimage.transformer_qwenimage import QwenImageTransformer2DModel
	from qwenimage.qwen_fa3_processor import QwenDoubleStreamAttnProcessorFA3

	dtype = torch.bfloat16
	device = "cuda" if torch.cuda.is_available() else "cpu"

	pipe = QwenImageEditPlusPipeline.from_pretrained(
	"Qwen/Qwen-Image-Edit-2509",
	transformer=QwenImageTransformer2DModel.from_pretrained(
	"linoyts/Qwen-Image-Edit-Rapid-AIO",
	subfolder='transformer',
	torch_dtype=dtype,
	device_map='cuda'
	),
	torch_dtype=dtype
	).to(device)

	# Load all LoRA adapters
	pipe.load_lora_weights("autoweeb/Qwen-Image-Edit-2509-Photo-to-Anime",
	weight_name="Qwen-Image-Edit-2509-Photo-to-Anime_000001000.safetensors",
	adapter_name="anime")
	pipe.load_lora_weights("dx8152/Qwen-Edit-2509-Multiple-angles",
	weight_name="镜头转换.safetensors",
	adapter_name="multiple-angles")
	pipe.load_lora_weights("dx8152/Qwen-Image-Edit-2509-Light_restoration",
	weight_name="移除光影.safetensors",
	adapter_name="light-restoration")
	pipe.load_lora_weights("dx8152/Qwen-Image-Edit-2509-Relight",
	weight_name="Qwen-Edit-Relight.safetensors",
	adapter_name="relight")

	pipe.transformer.__class__ = QwenImageTransformer2DModel
	pipe.transformer.set_attn_processor(QwenDoubleStreamAttnProcessorFA3())

	optimize_pipeline_(pipe, image=[Image.new("RGB", (1024, 1024)), Image.new("RGB", (1024, 1024))], prompt="prompt")

	# --- Helper Function for Aspect Ratio (Corrected) ---
	@torch.no_grad()
	def update_dimensions_on_upload(image):
	# * FIX: This function now correctly preserves aspect ratio for all image sizes. *
	if image is None:
	return 1024, 1024 # Default for no image

	original_width, original_height = image.size
	max_dim = 1024

	if original_width > max_dim or original_height > max_dim:
	# If the image is larger than the max dimension, scale it down
	if original_width > original_height:
	new_width = max_dim
	new_height = int(max_dim * original_height / original_width)
	else:
	new_height = max_dim
	new_width = int(max_dim * original_width / original_height)
	else:
	# If the image is smaller, use its original dimensions
	new_width = original_width
	new_height = original_height

	# Ensure final dimensions are multiples of 8 for model compatibility
	final_width = (new_width // 8) * 8
	final_height = (new_height // 8) * 8

	return final_width, final_height


	# --- Main Inference Function ---
	@spaces.GPU
	def infer(
	input_image,
	prompt,
	lora_adapter,
	seed,
	randomize_seed,
	guidance_scale,
	steps,
	width,
	height,
	progress=gr.Progress(track_tqdm=True)
	):
	if input_image is None:
	raise gr.Error("Please upload an image to edit.")

	# Dynamically set the adapter
	if lora_adapter == "Photo-to-Anime":
	pipe.set_adapters(["anime"], adapter_weights=[1.0])
	elif lora_adapter == "Multiple-Angles":
	pipe.set_adapters(["multiple-angles"], adapter_weights=[1.0])
	elif lora_adapter == "Light-Restoration":
	pipe.set_adapters(["light-restoration"], adapter_weights=[1.0])
	elif lora_adapter == "Relight":
	pipe.set_adapters(["relight"], adapter_weights=[1.0])

	if randomize_seed:
	seed = random.randint(0, MAX_SEED)

	generator = torch.Generator(device=device).manual_seed(seed)

	negative_prompt = "worst quality, low quality, bad anatomy, bad hands, text, error, missing fingers, extra digit, fewer digits, cropped, jpeg artifacts, signature, watermark, username, blurry"

	result = pipe(
	image=input_image.convert("RGB"),
	prompt=prompt,
	negative_prompt=negative_prompt,
	height=height,
	width=width,
	num_inference_steps=steps,
	generator=generator,
	true_cfg_scale=guidance_scale,
	num_images_per_prompt=1,
	).images[0]

	return result, seed

	# --- Wrapper for Examples ---
	@spaces.GPU
	def infer_example(input_image, prompt, lora_adapter):
	input_pil = input_image.convert("RGB")
	# Calculate correct aspect ratio for the example image using the corrected function
	width, height = update_dimensions_on_upload(input_pil)
	# Set reasonable default values for example inference
	guidance_scale = 1.0
	steps = 4
	# Call the main infer function
	result, seed = infer(input_pil, prompt, lora_adapter, 0, True, guidance_scale, steps, width, height)
	return result, seed

	# --- UI Layout ---
	css="""
	#col-container {
	margin: 0 auto;
	max-width: 960px;
	}
	#main-title h1 {font-size: 2.1em !important;}
	"""

	with gr.Blocks(css=css, theme=steel_blue_theme) as demo:
	with gr.Column(elem_id="col-container"):
	gr.Markdown("# Qwen-Image-Edit-2509-LoRAs-Fast", elem_id="main-title")
	gr.Markdown("Perform diverse image edits using specialized LoRA adapters for the Qwen-Image-Edit model.")

	with gr.Row(equal_height=True):
	with gr.Column():
	input_image = gr.Image(label="Upload Image", type="pil")

	lora_adapter = gr.Dropdown(
	label="Choose Editing Style",
	choices=["Photo-to-Anime", "Multiple-Angles", "Light-Restoration", "Relight"],
	value="Photo-to-Anime"
	)

	prompt = gr.Text(
	label="Edit Prompt",
	show_label=True,
	placeholder="e.g., transform into anime",
	)

	run_button = gr.Button("Run", variant="primary")

	with gr.Accordion("⚙️ Advanced Settings", open=False):
	seed = gr.Slider(label="Seed", minimum=0, maximum=MAX_SEED, step=1, value=0)
	randomize_seed = gr.Checkbox(label="Randomize Seed", value=True)
	guidance_scale = gr.Slider(label="Guidance Scale", minimum=1.0, maximum=10.0, step=0.1, value=1.0)
	steps = gr.Slider(label="Inference Steps", minimum=1, maximum=50, step=1, value=4)
	# Hidden sliders to hold image dimensions
	height = gr.Slider(label="Height", minimum=256, maximum=1024, step=8, value=1024, visible=False)
	width = gr.Slider(label="Width", minimum=256, maximum=1024, step=8, value=1024, visible=False)

	with gr.Column():
	output_image = gr.Image(label="Output Image", interactive=False, format="png", height=290)

	gr.Examples(
	examples=[
	["examples/1.jpg", "Transform into anime.", "Photo-to-Anime"],
	["examples/4.jpg", "Remove shadows and relight the image using soft lighting.", "Light-Restoration"],
	["examples/5.jpg", "Relight the image using soft, diffused lighting that simulates sunlight filtering through curtains.", "Relight"],
	["examples/2.jpeg", "Move the camera left.", "Multiple-Angles"],
	["examples/2.jpeg", "Move the camera right.", "Multiple-Angles"],
	["examples/2.jpeg", "Move the camera down.", "Multiple-Angles"],
	["examples/2.jpeg", "Rotate the camera 45 degrees to the left.", "Multiple-Angles"],
	["examples/3.jpg", "Rotate the camera 45 degrees to the right.", "Multiple-Angles"],
	["examples/3.jpg", "Switch the camera to a top-down view.", "Multiple-Angles"],
	["examples/3.jpg", "Switch the camera to a wide-angle lens.", "Multiple-Angles"],
	["examples/3.jpg", "Switch the camera to a close-up lens.", "Multiple-Angles"],
	],
	inputs=[input_image, prompt, lora_adapter],
	outputs=[output_image, seed],
	fn=infer_example,
	cache_examples=False,
	label="Examples"
	)

	# --- Event Handlers ---
	run_button.click(
	fn=infer,
	inputs=[input_image, prompt, lora_adapter, seed, randomize_seed, guidance_scale, steps, width, height],
	outputs=[output_image, seed]
	)

	input_image.upload(
	fn=update_dimensions_on_upload,
	inputs=[input_image],
	outputs=[width, height]
	)

	demo.launch()