Qwen-Image-Layered-1024

Running on Zero

App Files Files Community

hexware commited on Dec 30, 2025

Commit

05ce8b7

verified ·

1 Parent(s): 5cb19bc

Update app.py

Browse files

Files changed (1) hide show

app.py +126 -37

app.py CHANGED Viewed

@@ -18,7 +18,6 @@ MAX_SEED = np.iinfo(np.int32).max
 # Optional HF login (works in Spaces if you set HF token as secret env var "hf")
 from huggingface_hub import login
 login(token=os.environ.get("hf"))
 dtype = torch.bfloat16
@@ -89,12 +88,46 @@ def get_duration(
     cfg_norm=True,
     use_en_prompt=True,
     resolution=640,
-    gpu_duration=1000,  # <-- NEW
 ):
-    # Allow user override via UI (text field), but keep it sane
     return _clamp_int(gpu_duration, default=1000, lo=20, hi=1500)
 @spaces.GPU(duration=get_duration)
 def infer(
     input_image,
@@ -108,7 +141,10 @@ def infer(
     cfg_norm=True,
     use_en_prompt=True,
     resolution=640,
-    gpu_duration=1000,  # <-- NEW (must match get_duration signature)
 ):
     # Seed
     if randomize_seed:
@@ -120,30 +156,22 @@ def infer(
         resolution = 640
     # Normalize image input
-    if isinstance(input_image, list):
-        input_image = input_image[0]
-    if isinstance(input_image, str):
-        pil_image = Image.open(input_image).convert("RGB").convert("RGBA")
-    elif isinstance(input_image, Image.Image):
-        pil_image = input_image.convert("RGB").convert("RGBA")
-    elif isinstance(input_image, np.ndarray):
-        pil_image = Image.fromarray(input_image).convert("RGB").convert("RGBA")
-    else:
-        raise ValueError(f"Unsupported input_image type: {type(input_image)}")
     gen_device = "cuda" if torch.cuda.is_available() else "cpu"
     inputs = {
         "image": pil_image,
-        "generator": torch.Generator(device=gen_device).manual_seed(seed),
         "true_cfg_scale": true_guidance_scale,
         "prompt": prompt,
         "negative_prompt": neg_prompt,
         "num_inference_steps": num_inference_steps,
         "num_images_per_prompt": 1,
         "layers": layer,
-        "resolution": resolution,  # 640 or 1024
         "cfg_normalize": cfg_norm,
         "use_en_prompt": use_en_prompt,
     }
@@ -153,27 +181,46 @@ def infer(
     with torch.inference_mode():
         out = pipeline(**inputs)
-        output_images = out.images[0]  # list of PIL images (layers)
-    # Prepare gallery + export files
-    gallery_out = []
-    temp_files = []
-    for img in output_images:
-        gallery_out.append(img)
-        tmp = tempfile.NamedTemporaryFile(suffix=".png", delete=False)
-        img.save(tmp.name)
-        temp_files.append(tmp.name)
-    pptx_path = imagelist_to_pptx(temp_files)
-    with tempfile.NamedTemporaryFile(suffix=".zip", delete=False) as tmpzip:
-        with zipfile.ZipFile(tmpzip.name, "w", zipfile.ZIP_DEFLATED) as zipf:
-            for i, img_path in enumerate(temp_files):
-                zipf.write(img_path, f"layer_{i+1}.png")
-        zip_path = tmpzip.name
-    return gallery_out, pptx_path, zip_path
 ensure_dirname(LOG_DIR)
@@ -271,7 +318,6 @@ The text prompt is intended to describe the overall content of the input image
                         value=True,
                     )
-                    # NEW: text field for GPU duration override (seconds)
                     gpu_duration = gr.Textbox(
                         label="GPU duration override (seconds, 20..1500)",
                         value="1000",
@@ -279,6 +325,26 @@ The text prompt is intended to describe the overall content of the input image
                         placeholder="e.g. 60, 120, 300, 1000, 1500",
                     )
                 run_button = gr.Button("Decompose!", variant="primary")
             with gr.Column(scale=2):
@@ -287,10 +353,23 @@ The text prompt is intended to describe the overall content of the input image
                     export_file = gr.File(label="Download PPTX")
                     export_zip_file = gr.File(label="Download ZIP")
     gr.Examples(
         examples=examples,
         inputs=[input_image],
-        outputs=[gallery, export_file, export_zip_file],
         fn=infer,
         examples_per_page=14,
         cache_examples=False,
@@ -311,9 +390,19 @@ The text prompt is intended to describe the overall content of the input image
             cfg_norm,
             use_en_prompt,
             resolution,
-            gpu_duration,  # <-- NEW
         ],
-        outputs=[gallery, export_file, export_zip_file],
     )
 if __name__ == "__main__":

 # Optional HF login (works in Spaces if you set HF token as secret env var "hf")
 from huggingface_hub import login
 login(token=os.environ.get("hf"))
 dtype = torch.bfloat16
     cfg_norm=True,
     use_en_prompt=True,
     resolution=640,
+    gpu_duration=1000,
+    refine_enabled=False,
+    refine_layer_index=1,
+    refine_sub_layers=3,
 ):
     return _clamp_int(gpu_duration, default=1000, lo=20, hi=1500)
+def _normalize_input_image(input_image):
+    if isinstance(input_image, list):
+        input_image = input_image[0]
+    if isinstance(input_image, str):
+        return Image.open(input_image).convert("RGB").convert("RGBA")
+    if isinstance(input_image, Image.Image):
+        return input_image.convert("RGB").convert("RGBA")
+    if isinstance(input_image, np.ndarray):
+        return Image.fromarray(input_image).convert("RGB").convert("RGBA")
+    raise ValueError(f"Unsupported input_image type: {type(input_image)}")
+def _export_images_to_pptx_and_zip(pil_images, zip_prefix="layer"):
+    temp_files = []
+    for img in pil_images:
+        tmp = tempfile.NamedTemporaryFile(suffix=".png", delete=False)
+        img.save(tmp.name)
+        temp_files.append(tmp.name)
+    pptx_path = imagelist_to_pptx(temp_files)
+    with tempfile.NamedTemporaryFile(suffix=".zip", delete=False) as tmpzip:
+        with zipfile.ZipFile(tmpzip.name, "w", zipfile.ZIP_DEFLATED) as zipf:
+            for i, img_path in enumerate(temp_files):
+                zipf.write(img_path, f"{zip_prefix}_{i+1}.png")
+        zip_path = tmpzip.name
+    return pptx_path, zip_path
 @spaces.GPU(duration=get_duration)
 def infer(
     input_image,
     cfg_norm=True,
     use_en_prompt=True,
     resolution=640,
+    gpu_duration=1000,
+    refine_enabled=False,
+    refine_layer_index=1,   # 1-based for UI convenience
+    refine_sub_layers=3,
 ):
     # Seed
     if randomize_seed:
         resolution = 640
     # Normalize image input
+    pil_image = _normalize_input_image(input_image)
     gen_device = "cuda" if torch.cuda.is_available() else "cpu"
+    generator = torch.Generator(device=gen_device).manual_seed(seed)
+    # First pass inputs
     inputs = {
         "image": pil_image,
+        "generator": generator,
         "true_cfg_scale": true_guidance_scale,
         "prompt": prompt,
         "negative_prompt": neg_prompt,
         "num_inference_steps": num_inference_steps,
         "num_images_per_prompt": 1,
         "layers": layer,
+        "resolution": resolution,
         "cfg_normalize": cfg_norm,
         "use_en_prompt": use_en_prompt,
     }
     with torch.inference_mode():
         out = pipeline(**inputs)
+        output_layers = out.images[0]  # list[PIL.Image]
+    # Export first pass
+    pptx_path, zip_path = _export_images_to_pptx_and_zip(output_layers, zip_prefix="layer")
+    # Optional: Recursive (refine one layer into sub-layers) — no separate steps/resolution/cfg
+    refined_gallery = []
+    refined_pptx = None
+    refined_zip = None
+    if refine_enabled and len(output_layers) > 0:
+        idx0 = _clamp_int(refine_layer_index, default=1, lo=1, hi=len(output_layers)) - 1
+        refine_sub_layers = _clamp_int(refine_sub_layers, default=3, lo=2, hi=10)
+        selected_layer = output_layers[idx0].convert("RGBA")
+        refined_inputs = dict(inputs)  # reuse same params
+        refined_inputs["image"] = selected_layer
+        refined_inputs["layers"] = refine_sub_layers
+        print("REFINE ENABLED:", True)
+        print("REFINE LAYER INDEX (1-based):", idx0 + 1)
+        print("REFINE SUB-LAYERS:", refine_sub_layers)
+        print("REFINED INPUTS:", {k: v for k, v in refined_inputs.items() if k != "image"})
+        with torch.inference_mode():
+            refined_out = pipeline(**refined_inputs)
+            sub_layers = refined_out.images[0]
+        refined_gallery = sub_layers
+        refined_pptx, refined_zip = _export_images_to_pptx_and_zip(sub_layers, zip_prefix=f"sub_layer_{idx0+1}")
+    return (
+        output_layers,
+        pptx_path,
+        zip_path,
+        refined_gallery,
+        refined_pptx,
+        refined_zip,
+    )
 ensure_dirname(LOG_DIR)
                         value=True,
                     )
                     gpu_duration = gr.Textbox(
                         label="GPU duration override (seconds, 20..1500)",
                         value="1000",
                         placeholder="e.g. 60, 120, 300, 1000, 1500",
                     )
+                    gr.Markdown("### Advanced: Recursive decomposition")
+                    refine_enabled = gr.Checkbox(
+                        label="Refine one layer into sub-layers",
+                        value=False,
+                    )
+                    refine_layer_index = gr.Slider(
+                        label="Refine layer index (1-based)",
+                        minimum=1,
+                        maximum=10,
+                        step=1,
+                        value=1,
+                    )
+                    refine_sub_layers = gr.Slider(
+                        label="Sub-layers (for refined layer)",
+                        minimum=2,
+                        maximum=10,
+                        step=1,
+                        value=3,
+                    )
                 run_button = gr.Button("Decompose!", variant="primary")
             with gr.Column(scale=2):
                     export_file = gr.File(label="Download PPTX")
                     export_zip_file = gr.File(label="Download ZIP")
+                gr.Markdown("### Refined sub-layers")
+                refined_gallery = gr.Gallery(label="Sub-layers", columns=4, rows=1, format="png")
+                with gr.Row():
+                    refined_export_file = gr.File(label="Download refined PPTX")
+                    refined_export_zip_file = gr.File(label="Download refined ZIP")
     gr.Examples(
         examples=examples,
         inputs=[input_image],
+        outputs=[
+            gallery,
+            export_file,
+            export_zip_file,
+            refined_gallery,
+            refined_export_file,
+            refined_export_zip_file,
+        ],
         fn=infer,
         examples_per_page=14,
         cache_examples=False,
             cfg_norm,
             use_en_prompt,
             resolution,
+            gpu_duration,
+            refine_enabled,
+            refine_layer_index,
+            refine_sub_layers,
+        ],
+        outputs=[
+            gallery,
+            export_file,
+            export_zip_file,
+            refined_gallery,
+            refined_export_file,
+            refined_export_zip_file,
         ],
     )
 if __name__ == "__main__":