Qwen-Image-Layered-1024

Running on Zero

App Files Files Community

hexware commited on Dec 22, 2025

Commit

893f414

verified ·

1 Parent(s): 33759f1

Update app.py

Browse files

Files changed (1) hide show

app.py +129 -100

app.py CHANGED Viewed

@@ -3,33 +3,40 @@ import uuid
 import numpy as np
 import random
 import tempfile
 import spaces
-import zipfile
-from PIL import Image
-from diffusers import QwenImageLayeredPipeline
 import torch
-from pptx import Presentation
 import gradio as gr
 LOG_DIR = "/tmp/local"
 MAX_SEED = np.iinfo(np.int32).max
 from huggingface_hub import login
-login(token=os.environ.get('hf'))
 dtype = torch.bfloat16
 device = "cuda" if torch.cuda.is_available() else "cpu"
-pipeline = QwenImageLayeredPipeline.from_pretrained("Qwen/Qwen-Image-Layered", torch_dtype=dtype).to(device)
-# pipeline.set_progress_bar_config(disable=None)
 def ensure_dirname(path: str):
     if path and not os.path.exists(path):
         os.makedirs(path, exist_ok=True)
 def random_str(length=8):
     return uuid.uuid4().hex[:length]
 def imagelist_to_pptx(img_files):
     with Image.open(img_files[0]) as img:
         img_width_px, img_height_px = img.size
@@ -47,49 +54,49 @@ def imagelist_to_pptx(img_files):
     left = top = 0
     for img_path in img_files:
-        slide.shapes.add_picture(img_path, left, top, width=px_to_emu(img_width_px), height=px_to_emu(img_height_px))
     with tempfile.NamedTemporaryFile(suffix=".pptx", delete=False) as tmp:
         prs.save(tmp.name)
         return tmp.name
-def export_gallery(images):
-    # images: list of image file paths
-    images = [e[0] for e in images]
-    pptx_path = imagelist_to_pptx(images)
-    return pptx_path
-def export_gallery_zip(images):
-    # images: list of tuples (file_path, caption)
-    images = [e[0] for e in images]
-    with tempfile.NamedTemporaryFile(suffix=".zip", delete=False) as tmp:
-        with zipfile.ZipFile(tmp.name, 'w', zipfile.ZIP_DEFLATED) as zipf:
-            for i, img_path in enumerate(images):
-                # Get the file extension from original file
-                ext = os.path.splitext(img_path)[1] or '.png'
-                # Add each image to the zip with a numbered filename
-                zipf.write(img_path, f"layer_{i+1}{ext}")
-        return tmp.name
-@spaces.GPU(duration=180)
-def infer(input_image,
-          seed=777,
-          randomize_seed=False,
-          prompt=None,
-          neg_prompt=" ",
-          true_guidance_scale=4.0,
-          num_inference_steps=50,
-          layer=4,
-          cfg_norm=True,
-          use_en_prompt=True):
     if randomize_seed:
         seed = random.randint(0, MAX_SEED)
     if isinstance(input_image, list):
         input_image = input_image[0]
     if isinstance(input_image, str):
         pil_image = Image.open(input_image).convert("RGB").convert("RGBA")
     elif isinstance(input_image, Image.Image):
@@ -97,80 +104,89 @@ def infer(input_image,
     elif isinstance(input_image, np.ndarray):
         pil_image = Image.fromarray(input_image).convert("RGB").convert("RGBA")
     else:
-        raise ValueError("Unsupported input_image type: %s" % type(input_image))
     inputs = {
         "image": pil_image,
-        "generator": torch.Generator(device='cuda').manual_seed(seed),
         "true_cfg_scale": true_guidance_scale,
         "prompt": prompt,
         "negative_prompt": neg_prompt,
         "num_inference_steps": num_inference_steps,
         "num_images_per_prompt": 1,
         "layers": layer,
-        "resolution": 640,      # Using different bucket (640, 1024) to determine the resolution. For this version, 640 is recommended
-        "cfg_normalize": cfg_norm,  # Whether enable cfg normalization.
-        "use_en_prompt": use_en_prompt,
     }
-    print(inputs)
     with torch.inference_mode():
-        output = pipeline(**inputs)
-        output_images = output.images[0]
-    output = []
     temp_files = []
-    for i, image in enumerate(output_images):
-        output.append(image)
-        # Save to temp file for export
         tmp = tempfile.NamedTemporaryFile(suffix=".png", delete=False)
-        image.save(tmp.name)
         temp_files.append(tmp.name)
-    # Generate PPTX
     pptx_path = imagelist_to_pptx(temp_files)
-    # Generate ZIP
-    with tempfile.NamedTemporaryFile(suffix=".zip", delete=False) as tmp:
-        with zipfile.ZipFile(tmp.name, 'w', zipfile.ZIP_DEFLATED) as zipf:
             for i, img_path in enumerate(temp_files):
                 zipf.write(img_path, f"layer_{i+1}.png")
-        zip_path = tmp.name
-    return output, pptx_path, zip_path
 ensure_dirname(LOG_DIR)
-examples = [
-            "assets/test_images/1.png",
-            "assets/test_images/2.png",
-            "assets/test_images/3.png",
-            "assets/test_images/4.png",
-            "assets/test_images/5.png",
-            "assets/test_images/6.png",
-            "assets/test_images/7.png",
-            "assets/test_images/8.png",
-            "assets/test_images/9.png",
-            "assets/test_images/10.png",
-            "assets/test_images/11.png",
-            "assets/test_images/12.png",
-            "assets/test_images/13.png",
-            ]
 with gr.Blocks() as demo:
     with gr.Column(elem_id="col-container"):
-        gr.HTML('<img src="https://qianwen-res.oss-cn-beijing.aliyuncs.com/Qwen-Image/layered/qwen-image-layered-logo.png" alt="Qwen-Image-Layered Logo" width="600" style="display: block; margin: 0 auto;">')
-        gr.Markdown("""
-                    The text prompt is intended to describe the overall content of the input image—including elements that may be partially occluded (e.g., you may specify the text hidden behind a foreground object). It is not designed to control the semantic content of individual layers explicitly.
-                    """)
         with gr.Row():
             with gr.Column(scale=1):
                 input_image = gr.Image(label="Input Image", image_mode="RGBA")
                 with gr.Accordion("Advanced Settings", open=False):
                     prompt = gr.Textbox(
                         label="Prompt (Optional)",
-                        placeholder="Please enter the prompt to descibe the image. （Optional）",
                         value="",
                         lines=2,
                     )
@@ -180,7 +196,7 @@ with gr.Blocks() as demo:
                         value=" ",
                         lines=2,
                     )
                     seed = gr.Slider(
                         label="Seed",
                         minimum=0,
@@ -189,13 +205,13 @@ with gr.Blocks() as demo:
                         value=0,
                     )
                     randomize_seed = gr.Checkbox(label="Randomize seed", value=True)
                     true_guidance_scale = gr.Slider(
                         label="True guidance scale",
                         minimum=1.0,
                         maximum=10.0,
                         step=0.1,
-                        value=4.0
                     )
                     num_inference_steps = gr.Slider(
@@ -214,9 +230,20 @@ with gr.Blocks() as demo:
                         value=4,
                     )
-                    cfg_norm = gr.Checkbox(label="Whether enable CFG normalization", value=True)
-                    use_en_prompt = gr.Checkbox(label="Automatic caption language if no prompt provided, True for EN, False for ZH", value=True)
                 run_button = gr.Button("Decompose!", variant="primary")
             with gr.Column(scale=2):
@@ -225,13 +252,14 @@ with gr.Blocks() as demo:
                     export_file = gr.File(label="Download PPTX")
                     export_zip_file = gr.File(label="Download ZIP")
-    gr.Examples(examples=examples,
-                inputs=[input_image],
-                outputs=[gallery, export_file, export_zip_file],
-                fn=infer,
-                examples_per_page=14,
-                cache_examples=False,
-                run_on_click=True
     )
     run_button.click(
@@ -247,7 +275,8 @@ with gr.Blocks() as demo:
             layer,
             cfg_norm,
             use_en_prompt,
-        ],
         outputs=[gallery, export_file, export_zip_file],
     )

 import numpy as np
 import random
 import tempfile
+import zipfile
 import spaces
 import torch
 import gradio as gr
+from PIL import Image
+from diffusers import QwenImageLayeredPipeline
+from pptx import Presentation
 LOG_DIR = "/tmp/local"
 MAX_SEED = np.iinfo(np.int32).max
+# Optional HF login (works in Spaces if you set HF token as secret env var "hf")
 from huggingface_hub import login
+login(token=os.environ.get("hf"))
 dtype = torch.bfloat16
 device = "cuda" if torch.cuda.is_available() else "cpu"
+pipeline = QwenImageLayeredPipeline.from_pretrained(
+    "Qwen/Qwen-Image-Layered", torch_dtype=dtype
+).to(device)
 def ensure_dirname(path: str):
     if path and not os.path.exists(path):
         os.makedirs(path, exist_ok=True)
 def random_str(length=8):
     return uuid.uuid4().hex[:length]
 def imagelist_to_pptx(img_files):
     with Image.open(img_files[0]) as img:
         img_width_px, img_height_px = img.size
     left = top = 0
     for img_path in img_files:
+        slide.shapes.add_picture(
+            img_path,
+            left,
+            top,
+            width=px_to_emu(img_width_px),
+            height=px_to_emu(img_height_px),
+        )
     with tempfile.NamedTemporaryFile(suffix=".pptx", delete=False) as tmp:
         prs.save(tmp.name)
         return tmp.name
+@spaces.GPU(duration=1500)
+def infer(
+    input_image,
+    seed=777,
+    randomize_seed=False,
+    prompt=None,
+    neg_prompt=" ",
+    true_guidance_scale=4.0,
+    num_inference_steps=50,
+    layer=4,
+    cfg_norm=True,
+    use_en_prompt=True,
+    resolution=640,
+):
+    # Seed
     if randomize_seed:
         seed = random.randint(0, MAX_SEED)
+    # Normalize resolution input
+    try:
+        resolution = int(resolution)
+    except Exception:
+        resolution = 640
+    if resolution not in (640, 1024):
+        resolution = 640
+    # Normalize image input
     if isinstance(input_image, list):
         input_image = input_image[0]
     if isinstance(input_image, str):
         pil_image = Image.open(input_image).convert("RGB").convert("RGBA")
     elif isinstance(input_image, Image.Image):
     elif isinstance(input_image, np.ndarray):
         pil_image = Image.fromarray(input_image).convert("RGB").convert("RGBA")
     else:
+        raise ValueError(f"Unsupported input_image type: {type(input_image)}")
+    gen_device = "cuda" if torch.cuda.is_available() else "cpu"
     inputs = {
         "image": pil_image,
+        "generator": torch.Generator(device=gen_device).manual_seed(seed),
         "true_cfg_scale": true_guidance_scale,
         "prompt": prompt,
         "negative_prompt": neg_prompt,
         "num_inference_steps": num_inference_steps,
         "num_images_per_prompt": 1,
         "layers": layer,
+        "resolution": resolution,  # <-- 640 or 1024
+        "cfg_normalize": cfg_norm,
+        "use_en_prompt": use_en_prompt,
     }
+    print("INFER INPUTS:", inputs)
     with torch.inference_mode():
+        out = pipeline(**inputs)
+        output_images = out.images[0]  # list of PIL images (layers)
+    # Prepare gallery + export files
+    gallery_out = []
     temp_files = []
+    for img in output_images:
+        gallery_out.append(img)
         tmp = tempfile.NamedTemporaryFile(suffix=".png", delete=False)
+        img.save(tmp.name)
         temp_files.append(tmp.name)
     pptx_path = imagelist_to_pptx(temp_files)
+    with tempfile.NamedTemporaryFile(suffix=".zip", delete=False) as tmpzip:
+        with zipfile.ZipFile(tmpzip.name, "w", zipfile.ZIP_DEFLATED) as zipf:
             for i, img_path in enumerate(temp_files):
                 zipf.write(img_path, f"layer_{i+1}.png")
+        zip_path = tmpzip.name
+    return gallery_out, pptx_path, zip_path
 ensure_dirname(LOG_DIR)
+examples = [
+    "assets/test_images/1.png",
+    "assets/test_images/2.png",
+    "assets/test_images/3.png",
+    "assets/test_images/4.png",
+    "assets/test_images/5.png",
+    "assets/test_images/6.png",
+    "assets/test_images/7.png",
+    "assets/test_images/8.png",
+    "assets/test_images/9.png",
+    "assets/test_images/10.png",
+    "assets/test_images/11.png",
+    "assets/test_images/12.png",
+    "assets/test_images/13.png",
+]
 with gr.Blocks() as demo:
     with gr.Column(elem_id="col-container"):
+        gr.HTML(
+            '<img src="https://qianwen-res.oss-cn-beijing.aliyuncs.com/Qwen-Image/layered/qwen-image-layered-logo.png" '
+            'alt="Qwen-Image-Layered Logo" width="600" style="display: block; margin: 0 auto;">'
+        )
+        gr.Markdown(
+            """
+The text prompt is intended to describe the overall content of the input image—including elements that may be partially occluded (e.g., you may specify the text hidden behind a foreground object). It is not designed to control the semantic content of individual layers explicitly.
+            """
+        )
         with gr.Row():
             with gr.Column(scale=1):
                 input_image = gr.Image(label="Input Image", image_mode="RGBA")
                 with gr.Accordion("Advanced Settings", open=False):
                     prompt = gr.Textbox(
                         label="Prompt (Optional)",
+                        placeholder="Please enter the prompt to descibe the image. (Optional)",
                         value="",
                         lines=2,
                     )
                         value=" ",
                         lines=2,
                     )
                     seed = gr.Slider(
                         label="Seed",
                         minimum=0,
                         value=0,
                     )
                     randomize_seed = gr.Checkbox(label="Randomize seed", value=True)
                     true_guidance_scale = gr.Slider(
                         label="True guidance scale",
                         minimum=1.0,
                         maximum=10.0,
                         step=0.1,
+                        value=4.0,
                     )
                     num_inference_steps = gr.Slider(
                         value=4,
                     )
+                    resolution = gr.Radio(
+                        label="Processing resolution",
+                        choices=[640, 1024],
+                        value=640,
+                    )
+                    cfg_norm = gr.Checkbox(
+                        label="Whether enable CFG normalization", value=True
+                    )
+                    use_en_prompt = gr.Checkbox(
+                        label="Automatic caption language if no prompt provided, True for EN, False for ZH",
+                        value=True,
+                    )
                 run_button = gr.Button("Decompose!", variant="primary")
             with gr.Column(scale=2):
                     export_file = gr.File(label="Download PPTX")
                     export_zip_file = gr.File(label="Download ZIP")
+    gr.Examples(
+        examples=examples,
+        inputs=[input_image],
+        outputs=[gallery, export_file, export_zip_file],
+        fn=infer,
+        examples_per_page=14,
+        cache_examples=False,
+        run_on_click=True,
     )
     run_button.click(
             layer,
             cfg_norm,
             use_en_prompt,
+            resolution,  # <-- NEW
+        ],
         outputs=[gallery, export_file, export_zip_file],
     )