Qwen-Image-Layered-1024

Running on Zero

App Files Files Community

Qwen-Image-Layered-1024 / app.py

hexware

Update app.py

17a64d0 verified 1 day ago

raw

history blame contribute delete

9.82 kB

	import os
	import uuid
	import numpy as np
	import random
	import tempfile
	import zipfile

	import spaces
	import torch
	import gradio as gr

	from PIL import Image
	from diffusers import QwenImageLayeredPipeline
	from pptx import Presentation

	LOG_DIR = "/tmp/local"
	MAX_SEED = np.iinfo(np.int32).max

	# Optional HF login (works in Spaces if you set HF token as secret env var "hf")
	from huggingface_hub import login

	login(token=os.environ.get("hf"))

	dtype = torch.bfloat16
	device = "cuda" if torch.cuda.is_available() else "cpu"

	pipeline = QwenImageLayeredPipeline.from_pretrained(
	"Qwen/Qwen-Image-Layered", torch_dtype=dtype
	).to(device)


	def ensure_dirname(path: str):
	if path and not os.path.exists(path):
	os.makedirs(path, exist_ok=True)


	def random_str(length=8):
	return uuid.uuid4().hex[:length]


	def imagelist_to_pptx(img_files):
	with Image.open(img_files[0]) as img:
	img_width_px, img_height_px = img.size

	def px_to_emu(px, dpi=96):
	inch = px / dpi
	emu = inch * 914400
	return int(emu)

	prs = Presentation()
	prs.slide_width = px_to_emu(img_width_px)
	prs.slide_height = px_to_emu(img_height_px)

	slide = prs.slides.add_slide(prs.slide_layouts[6])

	left = top = 0
	for img_path in img_files:
	slide.shapes.add_picture(
	img_path,
	left,
	top,
	width=px_to_emu(img_width_px),
	height=px_to_emu(img_height_px),
	)

	with tempfile.NamedTemporaryFile(suffix=".pptx", delete=False) as tmp:
	prs.save(tmp.name)
	return tmp.name


	def _clamp_int(x, default: int, lo: int, hi: int) -> int:
	try:
	v = int(x)
	except Exception:
	v = default
	return max(lo, min(hi, v))


	# Dynamic duration callable: must accept the same args as infer(). It returns seconds.
	def get_duration(
	input_image,
	seed=777,
	randomize_seed=False,
	prompt=None,
	neg_prompt=" ",
	true_guidance_scale=4.0,
	num_inference_steps=50,
	layer=4,
	cfg_norm=True,
	use_en_prompt=True,
	resolution=640,
	gpu_duration=1000, # <-- NEW
	):
	# Allow user override via UI (text field), but keep it sane
	return _clamp_int(gpu_duration, default=1000, lo=20, hi=1500)


	@spaces.GPU(duration=get_duration)
	def infer(
	input_image,
	seed=777,
	randomize_seed=False,
	prompt=None,
	neg_prompt=" ",
	true_guidance_scale=4.0,
	num_inference_steps=50,
	layer=4,
	cfg_norm=True,
	use_en_prompt=True,
	resolution=640,
	gpu_duration=1000, # <-- NEW (must match get_duration signature)
	):
	# Seed
	if randomize_seed:
	seed = random.randint(0, MAX_SEED)

	# Normalize resolution input
	resolution = _clamp_int(resolution, default=640, lo=640, hi=1024)
	if resolution not in (640, 1024):
	resolution = 640

	# Normalize image input
	if isinstance(input_image, list):
	input_image = input_image[0]

	if isinstance(input_image, str):
	pil_image = Image.open(input_image).convert("RGB").convert("RGBA")
	elif isinstance(input_image, Image.Image):
	pil_image = input_image.convert("RGB").convert("RGBA")
	elif isinstance(input_image, np.ndarray):
	pil_image = Image.fromarray(input_image).convert("RGB").convert("RGBA")
	else:
	raise ValueError(f"Unsupported input_image type: {type(input_image)}")

	gen_device = "cuda" if torch.cuda.is_available() else "cpu"

	inputs = {
	"image": pil_image,
	"generator": torch.Generator(device=gen_device).manual_seed(seed),
	"true_cfg_scale": true_guidance_scale,
	"prompt": prompt,
	"negative_prompt": neg_prompt,
	"num_inference_steps": num_inference_steps,
	"num_images_per_prompt": 1,
	"layers": layer,
	"resolution": resolution, # 640 or 1024
	"cfg_normalize": cfg_norm,
	"use_en_prompt": use_en_prompt,
	}

	print("INFER INPUTS:", inputs)
	print("REQUESTED GPU DURATION:", gpu_duration)

	with torch.inference_mode():
	out = pipeline(**inputs)
	output_images = out.images[0] # list of PIL images (layers)

	# Prepare gallery + export files
	gallery_out = []
	temp_files = []

	for img in output_images:
	gallery_out.append(img)
	tmp = tempfile.NamedTemporaryFile(suffix=".png", delete=False)
	img.save(tmp.name)
	temp_files.append(tmp.name)

	pptx_path = imagelist_to_pptx(temp_files)

	with tempfile.NamedTemporaryFile(suffix=".zip", delete=False) as tmpzip:
	with zipfile.ZipFile(tmpzip.name, "w", zipfile.ZIP_DEFLATED) as zipf:
	for i, img_path in enumerate(temp_files):
	zipf.write(img_path, f"layer_{i+1}.png")
	zip_path = tmpzip.name

	return gallery_out, pptx_path, zip_path


	ensure_dirname(LOG_DIR)

	examples = [
	"assets/test_images/1.png",
	"assets/test_images/2.png",
	"assets/test_images/3.png",
	"assets/test_images/4.png",
	"assets/test_images/5.png",
	"assets/test_images/6.png",
	"assets/test_images/7.png",
	"assets/test_images/8.png",
	"assets/test_images/9.png",
	"assets/test_images/10.png",
	"assets/test_images/11.png",
	"assets/test_images/12.png",
	"assets/test_images/13.png",
	]

	with gr.Blocks() as demo:
	with gr.Column(elem_id="col-container"):
	gr.HTML(
	'<img src="https://qianwen-res.oss-cn-beijing.aliyuncs.com/Qwen-Image/layered/qwen-image-layered-logo.png" '
	'alt="Qwen-Image-Layered Logo" width="600" style="display: block; margin: 0 auto;">'
	)
	gr.Markdown(
	"""
	The text prompt is intended to describe the overall content of the input image—including elements that may be partially occluded (e.g., you may specify the text hidden behind a foreground object). It is not designed to control the semantic content of individual layers explicitly.
	"""
	)

	with gr.Row():
	with gr.Column(scale=1):
	input_image = gr.Image(label="Input Image", image_mode="RGBA")

	with gr.Accordion("Advanced Settings", open=False):
	prompt = gr.Textbox(
	label="Prompt (Optional)",
	placeholder="Please enter the prompt to descibe the image. (Optional)",
	value="",
	lines=2,
	)
	neg_prompt = gr.Textbox(
	label="Negative Prompt (Optional)",
	placeholder="Please enter the negative prompt",
	value=" ",
	lines=2,
	)

	seed = gr.Slider(
	label="Seed",
	minimum=0,
	maximum=MAX_SEED,
	step=1,
	value=0,
	)
	randomize_seed = gr.Checkbox(label="Randomize seed", value=True)

	true_guidance_scale = gr.Slider(
	label="True guidance scale",
	minimum=1.0,
	maximum=10.0,
	step=0.1,
	value=4.0,
	)

	num_inference_steps = gr.Slider(
	label="Number of inference steps",
	minimum=1,
	maximum=50,
	step=1,
	value=50,
	)

	layer = gr.Slider(
	label="Layers",
	minimum=2,
	maximum=10,
	step=1,
	value=4,
	)

	resolution = gr.Radio(
	label="Processing resolution",
	choices=[640, 1024],
	value=640,
	)

	cfg_norm = gr.Checkbox(
	label="Whether enable CFG normalization", value=True
	)
	use_en_prompt = gr.Checkbox(
	label="Automatic caption language if no prompt provided, True for EN, False for ZH",
	value=True,
	)

	# NEW: text field for GPU duration override (seconds)
	gpu_duration = gr.Textbox(
	label="GPU duration override (seconds, 20..1500)",
	value="1000",
	lines=1,
	placeholder="e.g. 60, 120, 300, 1000, 1500",
	)

	run_button = gr.Button("Decompose!", variant="primary")

	with gr.Column(scale=2):
	gallery = gr.Gallery(label="Layers", columns=4, rows=1, format="png")
	with gr.Row():
	export_file = gr.File(label="Download PPTX")
	export_zip_file = gr.File(label="Download ZIP")

	gr.Examples(
	examples=examples,
	inputs=[input_image],
	outputs=[gallery, export_file, export_zip_file],
	fn=infer,
	examples_per_page=14,
	cache_examples=False,
	run_on_click=True,
	)

	run_button.click(
	fn=infer,
	inputs=[
	input_image,
	seed,
	randomize_seed,
	prompt,
	neg_prompt,
	true_guidance_scale,
	num_inference_steps,
	layer,
	cfg_norm,
	use_en_prompt,
	resolution,
	gpu_duration, # <-- NEW
	],
	outputs=[gallery, export_file, export_zip_file],
	)

	if __name__ == "__main__":
	demo.launch()