hexware commited on
Commit
a6bc145
·
verified ·
1 Parent(s): eeb4923

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +630 -332
app.py CHANGED
@@ -1,96 +1,63 @@
1
  import os
2
  import uuid
 
3
  import random
4
  import tempfile
5
  import zipfile
6
- import numpy as np
 
7
 
8
  import spaces
9
  import torch
10
  import gradio as gr
11
-
12
  from PIL import Image
13
- from pptx import Presentation
14
  from diffusers import QwenImageLayeredPipeline
 
 
15
 
16
  LOG_DIR = "/tmp/local"
17
  MAX_SEED = np.iinfo(np.int32).max
18
 
19
- # Reduce allocator fragmentation (new name; old PYTORCH_CUDA_ALLOC_CONF is deprecated)
20
- os.environ.setdefault("PYTORCH_ALLOC_CONF", "expandable_segments:True")
21
-
22
  # Optional HF login (Spaces secret env var "hf")
23
  from huggingface_hub import login
24
- login(token=os.environ.get("hf"))
25
-
26
- # ----------------------------
27
- # Device / dtype (memory-safe)
28
- # ----------------------------
29
- has_cuda = torch.cuda.is_available()
30
- device = "cuda" if has_cuda else ("mps" if torch.backends.mps.is_available() else "cpu")
31
 
32
- # fp16 is typically best for VRAM; CPU uses fp32
33
- torch_dtype = torch.float16 if device in ("cuda", "mps") else torch.float32
34
 
35
  # ----------------------------
36
- # Load pipeline (avoid CPU RAM spikes)
37
  # ----------------------------
38
- pipeline = QwenImageLayeredPipeline.from_pretrained(
39
- "Qwen/Qwen-Image-Layered",
40
- torch_dtype=torch_dtype,
41
- low_cpu_mem_usage=True,
42
- )
43
-
44
- # Memory helpers (guarded)
45
- if hasattr(pipeline, "enable_attention_slicing"):
46
- pipeline.enable_attention_slicing()
47
-
48
- # This pipeline may NOT expose enable_vae_slicing(), so guard both ways
49
- if hasattr(pipeline, "enable_vae_slicing"):
50
- pipeline.enable_vae_slicing()
51
- elif hasattr(pipeline, "vae") and hasattr(pipeline.vae, "enable_slicing"):
52
- pipeline.vae.enable_slicing()
53
-
54
- if device == "cuda":
55
- # Best for Spaces: keep CPU RAM lower and avoid huge peak VRAM at startup
56
- # (requires accelerate, usually present in Spaces)
57
- try:
58
- pipeline.enable_model_cpu_offload()
59
- except Exception:
60
- pipeline.to("cuda")
61
- elif device == "mps":
62
- pipeline.to("mps")
63
- else:
64
- pipeline.to("cpu")
65
-
66
-
67
  def ensure_dirname(path: str):
68
  if path and not os.path.exists(path):
69
  os.makedirs(path, exist_ok=True)
70
 
71
 
 
 
 
 
 
72
  def imagelist_to_pptx(img_files):
73
  with Image.open(img_files[0]) as img:
74
- img_width_px, img_height_px = img.size
75
-
76
- def px_to_emu(px, dpi=96):
77
- inch = px / dpi
78
- return int(inch * 914400)
79
 
80
  prs = Presentation()
81
- prs.slide_width = px_to_emu(img_width_px)
82
- prs.slide_height = px_to_emu(img_height_px)
83
 
84
  slide = prs.slides.add_slide(prs.slide_layouts[6])
85
  left = top = 0
86
 
 
87
  for img_path in img_files:
88
  slide.shapes.add_picture(
89
  img_path,
90
  left,
91
  top,
92
- width=px_to_emu(img_width_px),
93
- height=px_to_emu(img_height_px),
94
  )
95
 
96
  with tempfile.NamedTemporaryFile(suffix=".pptx", delete=False) as tmp:
@@ -98,7 +65,15 @@ def imagelist_to_pptx(img_files):
98
  return tmp.name
99
 
100
 
101
- def _clamp_int(x, default: int, lo: int, hi: int) -> int:
 
 
 
 
 
 
 
 
102
  try:
103
  v = int(x)
104
  except Exception:
@@ -106,139 +81,389 @@ def _clamp_int(x, default: int, lo: int, hi: int) -> int:
106
  return max(lo, min(hi, v))
107
 
108
 
109
- def _safe_open_rgba(img_like):
110
- if isinstance(img_like, list):
111
- img_like = img_like[0]
112
- if isinstance(img_like, str):
113
- return Image.open(img_like).convert("RGB").convert("RGBA")
114
- if isinstance(img_like, Image.Image):
115
- return img_like.convert("RGB").convert("RGBA")
116
- if isinstance(img_like, np.ndarray):
117
- return Image.fromarray(img_like).convert("RGB").convert("RGBA")
118
- raise ValueError(f"Unsupported input_image type: {type(img_like)}")
119
 
120
 
121
- def _update_refine_index_ui(n_layers: int, current_idx: int | None = None):
122
- n_layers = max(1, int(n_layers))
123
- if current_idx is None:
124
- current_idx = 1
125
- current_idx = max(1, min(int(current_idx), n_layers))
126
- return gr.update(minimum=1, maximum=n_layers, value=current_idx)
127
 
128
 
129
- # Dynamic duration callable: must accept same args as decompose() and refine()
130
- def get_duration(
131
- input_image,
132
- seed=0,
133
- randomize_seed=True,
134
- prompt="",
135
- neg_prompt=" ",
136
- true_guidance_scale=4.0,
137
- num_inference_steps=50,
138
- layer=7,
139
- cfg_norm=True,
140
- use_en_prompt=True,
141
- resolution=1024,
142
- gpu_duration="1000",
143
- refine_layer_index=1,
144
- refine_sub_layers=3,
145
- ):
146
- return _clamp_int(gpu_duration, default=1000, lo=20, hi=1500)
147
 
148
 
149
- @spaces.GPU(duration=get_duration)
150
- def decompose(
151
- input_image,
152
- seed=0,
153
- randomize_seed=True,
154
- prompt="",
155
- neg_prompt=" ",
156
- true_guidance_scale=4.0,
157
- num_inference_steps=50,
158
- layer=7,
159
- cfg_norm=True,
160
- use_en_prompt=True,
161
- resolution=1024,
162
- gpu_duration="1000",
163
- refine_layer_index=1, # passed in (so we can "clamp" it красиво)
164
- refine_sub_layers=3, # unused here, but kept for duration signature parity
165
- ):
166
- if randomize_seed:
167
- seed = random.randint(0, MAX_SEED)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
168
 
169
- resolution = _clamp_int(resolution, default=1024, lo=640, hi=1024)
170
- if resolution not in (640, 1024):
171
- resolution = 1024
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
172
 
173
- pil_image = _safe_open_rgba(input_image)
 
 
174
 
175
- # Generator on CPU works well with CPU offload too
176
- gen = torch.Generator(device="cpu").manual_seed(seed)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
177
 
178
  inputs = {
179
- "image": pil_image,
180
  "generator": gen,
181
  "true_cfg_scale": float(true_guidance_scale),
182
- "prompt": prompt if prompt else None,
183
  "negative_prompt": neg_prompt,
184
  "num_inference_steps": int(num_inference_steps),
185
  "num_images_per_prompt": 1,
186
- "layers": int(layer),
187
  "resolution": int(resolution),
188
  "cfg_normalize": bool(cfg_norm),
189
  "use_en_prompt": bool(use_en_prompt),
190
  }
191
 
192
- print("DECOMPOSE INPUTS:", {k: v for k, v in inputs.items() if k != "image"})
193
- print("REQUESTED GPU DURATION:", gpu_duration)
194
-
195
  with torch.inference_mode():
196
- out = pipeline(**inputs)
197
- output_images = out.images[0] # list[PIL.Image]
198
 
199
- # Save layers for exports + for refine stage
200
- layer_paths = []
201
- gallery_out = []
202
 
203
- for img in output_images:
204
- gallery_out.append(img)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
205
  tmp = tempfile.NamedTemporaryFile(suffix=".png", delete=False)
206
  img.save(tmp.name)
207
  layer_paths.append(tmp.name)
208
 
209
  pptx_path = imagelist_to_pptx(layer_paths)
 
210
 
211
- with tempfile.NamedTemporaryFile(suffix=".zip", delete=False) as tmpzip:
212
- with zipfile.ZipFile(tmpzip.name, "w", zipfile.ZIP_DEFLATED) as zipf:
213
- for i, p in enumerate(layer_paths):
214
- zipf.write(p, f"layer_{i+1}.png")
215
- zip_path = tmpzip.name
 
 
 
 
 
 
216
 
217
- # Reset refined outputs on new decompose
218
- refined_gallery = []
219
- refined_pptx = None
220
- refined_zip = None
221
 
222
- # "совсем красиво": clamp current refine index to new [1..N]
223
- refine_index_update = _update_refine_index_ui(len(layer_paths), refine_layer_index)
224
 
225
  return (
226
- gallery_out,
227
- pptx_path,
228
- zip_path,
229
- layer_paths, # gr.State
230
- refined_gallery,
231
- refined_pptx,
232
- refined_zip,
233
- refine_index_update, # update refine slider bounds/value
 
 
 
 
234
  )
235
 
236
 
237
  @spaces.GPU(duration=get_duration)
238
- def refine_selected_layer(
239
- layer_paths,
240
- refine_layer_index=1,
241
- refine_sub_layers=3,
 
 
242
  seed=0,
243
  randomize_seed=True,
244
  prompt="",
@@ -248,70 +473,79 @@ def refine_selected_layer(
248
  cfg_norm=True,
249
  use_en_prompt=True,
250
  resolution=1024,
251
- gpu_duration="1000",
252
  ):
253
- if not layer_paths:
254
- return [], None, None
 
 
255
 
256
- if randomize_seed:
257
- seed = random.randint(0, MAX_SEED)
 
258
 
259
- # Clamp index into existing layers
260
- n = len(layer_paths)
261
- idx = _clamp_int(refine_layer_index, default=1, lo=1, hi=n) - 1
262
 
263
- sub_layers = _clamp_int(refine_sub_layers, default=3, lo=2, hi=10)
 
264
 
265
- resolution = _clamp_int(resolution, default=1024, lo=640, hi=1024)
266
- if resolution not in (640, 1024):
267
- resolution = 1024
268
 
269
- selected_path = layer_paths[idx]
270
- selected_layer_img = Image.open(selected_path).convert("RGBA")
 
 
271
 
272
- gen = torch.Generator(device="cpu").manual_seed(seed)
 
 
 
 
 
 
 
273
 
274
- inputs = {
275
- "image": selected_layer_img,
276
- "generator": gen,
277
- "true_cfg_scale": float(true_guidance_scale),
278
- "prompt": prompt if prompt else None,
279
- "negative_prompt": neg_prompt,
280
- "num_inference_steps": int(num_inference_steps),
281
- "num_images_per_prompt": 1,
282
- "layers": int(sub_layers), # <-- ключевой параметр рекурсивной декомпозиции
283
- "resolution": int(resolution), # тот же resolution (без отдельных опций для refine)
284
- "cfg_normalize": bool(cfg_norm),
285
- "use_en_prompt": bool(use_en_prompt),
286
  }
 
287
 
288
- print("REFINE INPUTS:", {k: v for k, v in inputs.items() if k != "image"})
289
- print("REQUESTED GPU DURATION:", gpu_duration)
290
- print(f"REFINE: base layer index={idx+1}/{n}, sub_layers={sub_layers}")
291
-
292
- with torch.inference_mode():
293
- out = pipeline(**inputs)
294
- refined_images = out.images[0]
295
-
296
- refined_paths = []
297
- refined_gallery = []
298
- for img in refined_images:
299
- refined_gallery.append(img)
300
- tmp = tempfile.NamedTemporaryFile(suffix=".png", delete=False)
301
- img.save(tmp.name)
302
- refined_paths.append(tmp.name)
303
 
304
- refined_pptx = imagelist_to_pptx(refined_paths)
 
305
 
306
- with tempfile.NamedTemporaryFile(suffix=".zip", delete=False) as tmpzip:
307
- with zipfile.ZipFile(tmpzip.name, "w", zipfile.ZIP_DEFLATED) as zipf:
308
- for i, p in enumerate(refined_paths):
309
- zipf.write(p, f"sub_layer_{i+1}.png")
310
- refined_zip = tmpzip.name
311
 
312
- return refined_gallery, refined_pptx, refined_zip
 
 
 
 
 
 
 
 
 
 
 
 
 
313
 
314
 
 
 
 
315
  ensure_dirname(LOG_DIR)
316
  examples = [f"assets/test_images/{i}.png" for i in range(1, 14)]
317
 
@@ -321,169 +555,233 @@ with gr.Blocks() as demo:
321
  '<img src="https://qianwen-res.oss-cn-beijing.aliyuncs.com/Qwen-Image/layered/qwen-image-layered-logo.png" '
322
  'alt="Qwen-Image-Layered Logo" width="600" style="display: block; margin: 0 auto;">'
323
  )
 
324
  gr.Markdown(
325
  """
326
- The text prompt is intended to describe the overall content of the input image—including elements that may be partially occluded.
327
- It is not designed to control the semantic content of individual layers explicitly.
328
  """
329
  )
330
 
331
- # State to store layer PNG paths from last Decompose
332
- layer_paths_state = gr.State([])
 
 
 
333
 
334
  with gr.Row():
335
  with gr.Column(scale=1):
336
  input_image = gr.Image(label="Input Image", image_mode="RGBA")
337
 
338
- with gr.Accordion("Advanced Settings", open=False):
339
- prompt = gr.Textbox(
340
- label="Prompt (Optional)",
341
- placeholder="Please enter the prompt to describe the image (optional)",
342
- value="",
343
- lines=2,
344
- )
345
- neg_prompt = gr.Textbox(
346
- label="Negative Prompt (Optional)",
347
- placeholder="Please enter the negative prompt",
348
- value=" ",
349
- lines=2,
350
- )
351
 
352
  seed = gr.Slider(label="Seed", minimum=0, maximum=MAX_SEED, step=1, value=0)
353
  randomize_seed = gr.Checkbox(label="Randomize seed", value=True)
354
 
355
- true_guidance_scale = gr.Slider(
356
- label="True guidance scale", minimum=1.0, maximum=10.0, step=0.1, value=4.0
357
- )
358
-
359
- num_inference_steps = gr.Slider(
360
- label="Number of inference steps", minimum=1, maximum=100, step=1, value=50
361
- )
362
 
363
- layer = gr.Slider(label="Layers", minimum=2, maximum=10, step=1, value=7)
 
364
 
365
- # default 1024 as you asked earlier
366
- resolution = gr.Radio(
367
- label="Processing resolution",
368
- choices=[640, 1024],
369
- value=1024,
370
- )
371
 
372
- cfg_norm = gr.Checkbox(label="Whether enable CFG normalization", value=True)
373
- use_en_prompt = gr.Checkbox(
374
- label="Automatic caption language if no prompt provided, True for EN, False for ZH",
375
- value=True,
376
- )
377
 
378
  gpu_duration = gr.Textbox(
379
  label="GPU duration override (seconds, 20..1500)",
380
  value="1000",
381
  lines=1,
382
- placeholder="e.g. 60, 120, 300, 1000, 1500",
383
  )
384
 
385
- decompose_btn = gr.Button("Decompose!", variant="primary")
 
386
 
387
- with gr.Accordion("Refine layer (Recursive Decomposition)", open=False):
388
- refine_layer_index = gr.Slider(
389
- label="Refine layer index (1 = first layer)",
390
- minimum=1,
391
- maximum=7,
392
- step=1,
393
- value=1,
394
- )
395
- refine_sub_layers = gr.Slider(
396
- label="Sub-layers (how many to split selected layer into)",
397
- minimum=2,
398
- maximum=10,
399
- step=1,
400
- value=3,
401
- )
402
- refine_btn = gr.Button("Refine selected layer", variant="secondary")
403
 
404
  with gr.Column(scale=2):
405
- gallery = gr.Gallery(label="Layers", columns=4, rows=1, format="png")
406
- with gr.Row():
407
- export_file = gr.File(label="Download PPTX")
408
- export_zip_file = gr.File(label="Download ZIP")
409
 
410
- gr.Markdown("### Refined (sub-layers)")
411
- refined_gallery = gr.Gallery(label="Sub-layers", columns=4, rows=1, format="png")
412
  with gr.Row():
413
- refined_export_file = gr.File(label="Download refined PPTX")
414
- refined_export_zip_file = gr.File(label="Download refined ZIP")
415
-
416
- # Examples run Decompose
417
- gr.Examples(
418
- examples=examples,
419
- inputs=[input_image],
420
- outputs=[
421
- gallery,
422
- export_file,
423
- export_zip_file,
424
- layer_paths_state,
425
- refined_gallery,
426
- refined_export_file,
427
- refined_export_zip_file,
428
- refine_layer_index, # update slider bounds/value
429
- ],
430
- fn=decompose,
431
- examples_per_page=14,
432
- cache_examples=False,
433
- run_on_click=True,
434
- )
435
 
436
- # Decompose button
437
- decompose_btn.click(
438
- fn=decompose,
439
- inputs=[
440
- input_image,
441
- seed,
442
- randomize_seed,
443
- prompt,
444
- neg_prompt,
445
- true_guidance_scale,
446
- num_inference_steps,
447
- layer,
448
- cfg_norm,
449
- use_en_prompt,
450
- resolution,
451
- gpu_duration,
452
- refine_layer_index, # so we can clamp nicely after new decomposition
453
- refine_sub_layers, # for duration signature parity
454
- ],
455
- outputs=[
456
- gallery,
457
- export_file,
458
- export_zip_file,
459
- layer_paths_state,
460
- refined_gallery,
461
- refined_export_file,
462
- refined_export_zip_file,
463
- refine_layer_index, # update slider bounds/value
464
- ],
465
- )
 
 
 
466
 
467
- # Refine button
468
- refine_btn.click(
469
- fn=refine_selected_layer,
470
- inputs=[
471
- layer_paths_state,
472
- refine_layer_index,
473
- refine_sub_layers,
474
- seed,
475
- randomize_seed,
476
- prompt,
477
- neg_prompt,
478
- true_guidance_scale,
479
- num_inference_steps,
480
- cfg_norm,
481
- use_en_prompt,
482
- resolution,
483
- gpu_duration,
484
- ],
485
- outputs=[refined_gallery, refined_export_file, refined_export_zip_file],
486
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
487
 
488
  if __name__ == "__main__":
489
- demo.launch()
 
 
 
 
 
1
  import os
2
  import uuid
3
+ import numpy as np
4
  import random
5
  import tempfile
6
  import zipfile
7
+ import threading
8
+ from datetime import datetime
9
 
10
  import spaces
11
  import torch
12
  import gradio as gr
 
13
  from PIL import Image
14
+
15
  from diffusers import QwenImageLayeredPipeline
16
+ from pptx import Presentation
17
+
18
 
19
  LOG_DIR = "/tmp/local"
20
  MAX_SEED = np.iinfo(np.int32).max
21
 
 
 
 
22
  # Optional HF login (Spaces secret env var "hf")
23
  from huggingface_hub import login
24
+ _HF_TOKEN = os.environ.get("hf")
25
+ if _HF_TOKEN:
26
+ login(token=_HF_TOKEN)
 
 
 
 
27
 
 
 
28
 
29
  # ----------------------------
30
+ # Helpers
31
  # ----------------------------
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32
  def ensure_dirname(path: str):
33
  if path and not os.path.exists(path):
34
  os.makedirs(path, exist_ok=True)
35
 
36
 
37
+ def px_to_emu(px, dpi=96):
38
+ inch = px / dpi
39
+ return int(inch * 914400)
40
+
41
+
42
  def imagelist_to_pptx(img_files):
43
  with Image.open(img_files[0]) as img:
44
+ w, h = img.size
 
 
 
 
45
 
46
  prs = Presentation()
47
+ prs.slide_width = px_to_emu(w)
48
+ prs.slide_height = px_to_emu(h)
49
 
50
  slide = prs.slides.add_slide(prs.slide_layouts[6])
51
  left = top = 0
52
 
53
+ # Stack all images on top of each other (layers)
54
  for img_path in img_files:
55
  slide.shapes.add_picture(
56
  img_path,
57
  left,
58
  top,
59
+ width=px_to_emu(w),
60
+ height=px_to_emu(h),
61
  )
62
 
63
  with tempfile.NamedTemporaryFile(suffix=".pptx", delete=False) as tmp:
 
65
  return tmp.name
66
 
67
 
68
+ def make_zip(paths, prefix="layer"):
69
+ with tempfile.NamedTemporaryFile(suffix=".zip", delete=False) as tmpzip:
70
+ with zipfile.ZipFile(tmpzip.name, "w", zipfile.ZIP_DEFLATED) as z:
71
+ for i, p in enumerate(paths):
72
+ z.write(p, f"{prefix}_{i+1}.png")
73
+ return tmpzip.name
74
+
75
+
76
+ def clamp_int(x, default: int, lo: int, hi: int) -> int:
77
  try:
78
  v = int(x)
79
  except Exception:
 
81
  return max(lo, min(hi, v))
82
 
83
 
84
+ def norm_resolution(x):
85
+ v = clamp_int(x, default=1024, lo=640, hi=1024)
86
+ return v if v in (640, 1024) else 1024
 
 
 
 
 
 
 
87
 
88
 
89
+ def load_rgba(path: str) -> Image.Image:
90
+ return Image.open(path).convert("RGBA")
 
 
 
 
91
 
92
 
93
+ def labels_for_layers(n: int):
94
+ return [f"Layer {i}" for i in range(1, n + 1)]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
95
 
96
 
97
+ def parse_layer_label(label: str, default_idx0: int = 0) -> int:
98
+ # "Layer 3" -> 2
99
+ if not label:
100
+ return default_idx0
101
+ try:
102
+ num = int(label.strip().split()[-1])
103
+ return max(0, num - 1)
104
+ except Exception:
105
+ return default_idx0
106
+
107
+
108
+ def now_str():
109
+ return datetime.utcnow().strftime("%H:%M:%S")
110
+
111
+
112
+ def short_id():
113
+ return uuid.uuid4().hex[:8]
114
+
115
+
116
+ def find_node(history, node_id: str):
117
+ for n in history:
118
+ if n["id"] == node_id:
119
+ return n
120
+ return None
121
+
122
+
123
+ def compute_depth_and_path(history, node_id: str):
124
+ n = find_node(history, node_id)
125
+ if not n:
126
+ return 0, []
127
+ depth = 0
128
+ path = [n["title"]]
129
+ cur = n
130
+ while cur.get("parent_id"):
131
+ parent = find_node(history, cur["parent_id"])
132
+ if not parent:
133
+ break
134
+ depth += 1
135
+ path.append(parent["title"])
136
+ cur = parent
137
+ path.reverse()
138
+ return depth, path
139
+
140
+
141
+ def history_choices(history):
142
+ # Pretty dropdown labels with indentation + id
143
+ choices = []
144
+ for n in history:
145
+ depth = n.get("depth", 0)
146
+ indent = " " * depth + ("↳ " if depth > 0 else "")
147
+ choices.append((f"{indent}{n['title']} [{n['id']}]", n["id"]))
148
+ return choices
149
+
150
+
151
+ def render_breadcrumb(path_list):
152
+ if not path_list:
153
+ return "—"
154
+ return " → ".join(path_list)
155
+
156
+
157
+ # ----------------------------
158
+ # ZeroGPU-friendly pipeline (lazy init)
159
+ # ----------------------------
160
+ PIPELINE = None
161
+ PIPELINE_LOCK = threading.Lock()
162
+
163
+ TORCH_DTYPE = torch.float16 # important for RAM/VRAM
164
+ MODEL_ID = "Qwen/Qwen-Image-Layered"
165
+
166
+
167
+ def get_pipeline():
168
+ """
169
+ Load ONLY inside GPU functions.
170
+ This avoids cold-start CPU-only load that can blow 30GB.
171
+ """
172
+ global PIPELINE
173
+ if PIPELINE is not None:
174
+ return PIPELINE
175
+
176
+ with PIPELINE_LOCK:
177
+ if PIPELINE is not None:
178
+ return PIPELINE
179
+
180
+ pipe = QwenImageLayeredPipeline.from_pretrained(
181
+ MODEL_ID,
182
+ torch_dtype=TORCH_DTYPE,
183
+ low_cpu_mem_usage=True,
184
+ )
185
+
186
+ # memory helpers (guarded)
187
+ if hasattr(pipe, "enable_attention_slicing"):
188
+ pipe.enable_attention_slicing()
189
+ if hasattr(pipe, "enable_vae_slicing"):
190
+ pipe.enable_vae_slicing()
191
+ elif hasattr(pipe, "vae") and hasattr(pipe.vae, "enable_slicing"):
192
+ pipe.vae.enable_slicing()
193
+
194
+ PIPELINE = pipe
195
+ return PIPELINE
196
+
197
 
198
+ def ensure_device_strategy(pipe):
199
+ # Prefer CPU offload on CUDA to keep peak VRAM lower
200
+ if torch.cuda.is_available() and hasattr(pipe, "enable_model_cpu_offload"):
201
+ pipe.enable_model_cpu_offload()
202
+ elif torch.cuda.is_available():
203
+ try:
204
+ pipe.to("cuda")
205
+ except Exception:
206
+ pass
207
+
208
+
209
+ # ----------------------------
210
+ # Dynamic GPU duration (ZeroGPU)
211
+ # ----------------------------
212
+ def get_duration(*args, **kwargs):
213
+ return clamp_int(kwargs.get("gpu_duration", 1000), default=1000, lo=20, hi=1500)
214
+
215
+
216
+ # ----------------------------
217
+ # Node creation utilities
218
+ # ----------------------------
219
+ def add_node(history, parent_id, title, layer_paths, pptx_path, zip_path, meta: dict):
220
+ node_id = short_id()
221
+ node = {
222
+ "id": node_id,
223
+ "parent_id": parent_id, # None for root
224
+ "title": title, # shown in history
225
+ "layer_paths": layer_paths, # list[str]
226
+ "pptx_path": pptx_path, # str
227
+ "zip_path": zip_path, # str
228
+ "n_layers": len(layer_paths),
229
+ "created_at": now_str(),
230
+ "meta": meta or {},
231
+ "depth": 0,
232
+ "path": [],
233
+ }
234
+ history = list(history) if history else []
235
+ history.append(node)
236
+
237
+ # update depth/path for all nodes (simple, history small)
238
+ for n in history:
239
+ d, p = compute_depth_and_path(history, n["id"])
240
+ n["depth"] = d
241
+ n["path"] = p
242
+
243
+ return history, node_id
244
+
245
+
246
+ def node_to_ui(history, node_id):
247
+ """
248
+ Convert node -> UI outputs (gallery/strip, exports, dropdown choices, preview, breadcrumb).
249
+ """
250
+ node = find_node(history, node_id)
251
+ if not node:
252
+ empty = []
253
+ return (
254
+ empty, empty,
255
+ None, None,
256
+ gr.update(choices=[], value=None),
257
+ 0,
258
+ None,
259
+ f"**Node path:** —",
260
+ )
261
+
262
+ paths = node["layer_paths"]
263
+ images = [load_rgba(p) for p in paths] # small N <= 10
264
+ strip = images
265
+
266
+ labels = labels_for_layers(len(paths))
267
+ dd = gr.update(choices=labels, value=(labels[0] if labels else None))
268
+
269
+ selected_idx0 = 0
270
+ preview = load_rgba(paths[0]) if paths else None
271
+
272
+ breadcrumb = f"**Node path:** {render_breadcrumb(node.get('path', []))}"
273
+
274
+ return (
275
+ images, strip,
276
+ node["pptx_path"], node["zip_path"],
277
+ dd,
278
+ selected_idx0,
279
+ preview,
280
+ breadcrumb,
281
+ )
282
+
283
+
284
+ # ----------------------------
285
+ # Selection handlers
286
+ # ----------------------------
287
+ def on_layer_dropdown_change(layer_label, current_layer_paths):
288
+ if not current_layer_paths:
289
+ return 0, None
290
+ idx0 = parse_layer_label(layer_label, 0)
291
+ idx0 = max(0, min(idx0, len(current_layer_paths) - 1))
292
+ return idx0, load_rgba(current_layer_paths[idx0])
293
+
294
+
295
+ def on_gallery_select(current_layer_paths, evt: gr.SelectData):
296
+ if not current_layer_paths:
297
+ return 0, None, gr.update()
298
+ idx = evt.index
299
+ if isinstance(idx, (tuple, list)):
300
+ idx0 = int(idx[-1])
301
+ else:
302
+ idx0 = int(idx)
303
+ idx0 = max(0, min(idx0, len(current_layer_paths) - 1))
304
+ label = f"Layer {idx0 + 1}"
305
+ return idx0, load_rgba(current_layer_paths[idx0]), gr.update(value=label)
306
+
307
+
308
+ def on_history_select(history, node_id):
309
+ if not node_id:
310
+ return (
311
+ gr.update(choices=[], value=None),
312
+ [], # current_layer_paths_state
313
+ [], [], None, None,
314
+ gr.update(choices=[], value=None),
315
+ 0,
316
+ None,
317
+ "**Node path:** —",
318
+ )
319
 
320
+ node = find_node(history, node_id)
321
+ if not node:
322
+ raise gr.Error("History node not found (state mismatch).")
323
 
324
+ # Build UI for selected node
325
+ images, strip, pptx_path, zip_path, layer_dd, sel_idx0, preview, breadcrumb = node_to_ui(history, node_id)
326
+
327
+ return (
328
+ layer_dd,
329
+ node["layer_paths"], # current_layer_paths_state
330
+ images,
331
+ strip,
332
+ pptx_path,
333
+ zip_path,
334
+ layer_dd,
335
+ sel_idx0,
336
+ preview,
337
+ breadcrumb,
338
+ )
339
+
340
+
341
+ # ----------------------------
342
+ # Inference core
343
+ # ----------------------------
344
+ def run_pipeline_decompose(pipe, image_rgba, seed, randomize_seed, prompt, neg_prompt,
345
+ true_guidance_scale, num_inference_steps, layers, cfg_norm,
346
+ use_en_prompt, resolution):
347
+ if randomize_seed:
348
+ seed = random.randint(0, MAX_SEED)
349
+
350
+ gen_device = "cuda" if torch.cuda.is_available() else "cpu"
351
+ gen = torch.Generator(device=gen_device).manual_seed(int(seed))
352
 
353
  inputs = {
354
+ "image": image_rgba,
355
  "generator": gen,
356
  "true_cfg_scale": float(true_guidance_scale),
357
+ "prompt": prompt if prompt is not None else "",
358
  "negative_prompt": neg_prompt,
359
  "num_inference_steps": int(num_inference_steps),
360
  "num_images_per_prompt": 1,
361
+ "layers": int(layers),
362
  "resolution": int(resolution),
363
  "cfg_normalize": bool(cfg_norm),
364
  "use_en_prompt": bool(use_en_prompt),
365
  }
366
 
 
 
 
367
  with torch.inference_mode():
368
+ out = pipe(**inputs)
369
+ return out.images[0] # list[PIL]
370
 
 
 
 
371
 
372
+ # ----------------------------
373
+ # GPU functions
374
+ # ----------------------------
375
+ @spaces.GPU(duration=get_duration)
376
+ def do_decompose(
377
+ input_image,
378
+ seed=0,
379
+ randomize_seed=True,
380
+ prompt="",
381
+ neg_prompt=" ",
382
+ true_guidance_scale=4.0,
383
+ num_inference_steps=50,
384
+ layers=7,
385
+ cfg_norm=True,
386
+ use_en_prompt=True,
387
+ resolution=1024,
388
+ gpu_duration=1000,
389
+ history=None,
390
+ ):
391
+ if isinstance(input_image, list):
392
+ input_image = input_image[0]
393
+
394
+ if isinstance(input_image, str):
395
+ pil_image = Image.open(input_image).convert("RGBA")
396
+ elif isinstance(input_image, Image.Image):
397
+ pil_image = input_image.convert("RGBA")
398
+ elif isinstance(input_image, np.ndarray):
399
+ pil_image = Image.fromarray(input_image).convert("RGBA")
400
+ else:
401
+ raise ValueError(f"Unsupported input_image type: {type(input_image)}")
402
+
403
+ resolution = norm_resolution(resolution)
404
+ layers = clamp_int(layers, default=7, lo=2, hi=10)
405
+
406
+ pipe = get_pipeline()
407
+ ensure_device_strategy(pipe)
408
+
409
+ imgs = run_pipeline_decompose(
410
+ pipe, pil_image, seed, randomize_seed, prompt, neg_prompt,
411
+ true_guidance_scale, num_inference_steps, layers, cfg_norm, use_en_prompt, resolution
412
+ )
413
+
414
+ # Save images to temp
415
+ layer_paths = []
416
+ gallery = []
417
+ for img in imgs:
418
+ gallery.append(img)
419
  tmp = tempfile.NamedTemporaryFile(suffix=".png", delete=False)
420
  img.save(tmp.name)
421
  layer_paths.append(tmp.name)
422
 
423
  pptx_path = imagelist_to_pptx(layer_paths)
424
+ zip_path = make_zip(layer_paths, prefix="layer")
425
 
426
+ meta = {
427
+ "kind": "decompose",
428
+ "resolution": resolution,
429
+ "layers": layers,
430
+ "steps": int(num_inference_steps),
431
+ }
432
+ title = f"Decompose ({len(layer_paths)} layers) @ {resolution}"
433
+
434
+ history = history or []
435
+ history, node_id = add_node(history, parent_id=None, title=title,
436
+ layer_paths=layer_paths, pptx_path=pptx_path, zip_path=zip_path, meta=meta)
437
 
438
+ # Update history dropdown
439
+ hist_dd = gr.update(choices=history_choices(history), value=node_id)
 
 
440
 
441
+ # Set current node UI
442
+ images, strip, pptx, zipp, layer_dd, sel_idx0, preview, breadcrumb = node_to_ui(history, node_id)
443
 
444
  return (
445
+ history,
446
+ node_id,
447
+ hist_dd,
448
+ layer_paths, # current_layer_paths_state
449
+ images,
450
+ strip,
451
+ pptx,
452
+ zipp,
453
+ layer_dd,
454
+ sel_idx0,
455
+ preview,
456
+ breadcrumb,
457
  )
458
 
459
 
460
  @spaces.GPU(duration=get_duration)
461
+ def do_refine(
462
+ history,
463
+ current_node_id,
464
+ current_layer_paths,
465
+ selected_layer_idx0,
466
+ sub_layers=3,
467
  seed=0,
468
  randomize_seed=True,
469
  prompt="",
 
473
  cfg_norm=True,
474
  use_en_prompt=True,
475
  resolution=1024,
476
+ gpu_duration=1000,
477
  ):
478
+ if not history or not current_node_id:
479
+ raise gr.Error("Сначала сделай Decompose (создай root-узел).")
480
+ if not current_layer_paths:
481
+ raise gr.Error("Нет слоёв в текущем узле (state).")
482
 
483
+ parent = find_node(history, current_node_id)
484
+ if not parent:
485
+ raise gr.Error("Текущий узел не найден в history.")
486
 
487
+ resolution = norm_resolution(resolution)
488
+ sub_layers = clamp_int(sub_layers, default=3, lo=2, hi=10)
 
489
 
490
+ idx0 = clamp_int(selected_layer_idx0, default=0, lo=0, hi=len(current_layer_paths) - 1)
491
+ selected_img = load_rgba(current_layer_paths[idx0])
492
 
493
+ pipe = get_pipeline()
494
+ ensure_device_strategy(pipe)
 
495
 
496
+ imgs = run_pipeline_decompose(
497
+ pipe, selected_img, seed, randomize_seed, prompt, neg_prompt,
498
+ true_guidance_scale, num_inference_steps, sub_layers, cfg_norm, use_en_prompt, resolution
499
+ )
500
 
501
+ # Save images to temp
502
+ layer_paths = []
503
+ gallery = []
504
+ for img in imgs:
505
+ gallery.append(img)
506
+ tmp = tempfile.NamedTemporaryFile(suffix=".png", delete=False)
507
+ img.save(tmp.name)
508
+ layer_paths.append(tmp.name)
509
 
510
+ pptx_path = imagelist_to_pptx(layer_paths)
511
+ zip_path = make_zip(layer_paths, prefix="refined")
512
+
513
+ meta = {
514
+ "kind": "refine",
515
+ "resolution": resolution,
516
+ "sub_layers": sub_layers,
517
+ "steps": int(num_inference_steps),
518
+ "refined_from": {"node_id": current_node_id, "layer_index": idx0},
 
 
 
519
  }
520
+ title = f"Refine L{idx0+1} → {len(layer_paths)} sub @ {resolution}"
521
 
522
+ history, node_id = add_node(history, parent_id=current_node_id, title=title,
523
+ layer_paths=layer_paths, pptx_path=pptx_path, zip_path=zip_path, meta=meta)
 
 
 
 
 
 
 
 
 
 
 
 
 
524
 
525
+ # Update history dropdown and set current node to the new child
526
+ hist_dd = gr.update(choices=history_choices(history), value=node_id)
527
 
528
+ images, strip, pptx, zipp, layer_dd, sel_idx0, preview, breadcrumb = node_to_ui(history, node_id)
 
 
 
 
529
 
530
+ return (
531
+ history,
532
+ node_id,
533
+ hist_dd,
534
+ layer_paths, # current_layer_paths_state
535
+ images,
536
+ strip,
537
+ pptx,
538
+ zipp,
539
+ layer_dd,
540
+ sel_idx0,
541
+ preview,
542
+ breadcrumb,
543
+ )
544
 
545
 
546
+ # ----------------------------
547
+ # UI
548
+ # ----------------------------
549
  ensure_dirname(LOG_DIR)
550
  examples = [f"assets/test_images/{i}.png" for i in range(1, 14)]
551
 
 
555
  '<img src="https://qianwen-res.oss-cn-beijing.aliyuncs.com/Qwen-Image/layered/qwen-image-layered-logo.png" '
556
  'alt="Qwen-Image-Layered Logo" width="600" style="display: block; margin: 0 auto;">'
557
  )
558
+
559
  gr.Markdown(
560
  """
561
+ Prompt описывает изображение в целом (включая частично закрытые элементы).
562
+ Refine делает рекурсивную декомпозицию выбранного слоя текущего узла (узлы сохраняются в History).
563
  """
564
  )
565
 
566
+ # States
567
+ history_state = gr.State([])
568
+ current_node_id_state = gr.State(None)
569
+ current_layer_paths_state = gr.State([])
570
+ selected_layer_idx0_state = gr.State(0)
571
 
572
  with gr.Row():
573
  with gr.Column(scale=1):
574
  input_image = gr.Image(label="Input Image", image_mode="RGBA")
575
 
576
+ with gr.Accordion("Settings", open=False):
577
+ prompt = gr.Textbox(label="Prompt (Optional)", value="", lines=2)
578
+ neg_prompt = gr.Textbox(label="Negative Prompt (Optional)", value=" ", lines=2)
 
 
 
 
 
 
 
 
 
 
579
 
580
  seed = gr.Slider(label="Seed", minimum=0, maximum=MAX_SEED, step=1, value=0)
581
  randomize_seed = gr.Checkbox(label="Randomize seed", value=True)
582
 
583
+ true_guidance_scale = gr.Slider(label="True guidance scale", minimum=1.0, maximum=10.0, step=0.1, value=4.0)
584
+ num_inference_steps = gr.Slider(label="Steps", minimum=1, maximum=100, step=1, value=50)
 
 
 
 
 
585
 
586
+ layers = gr.Slider(label="Layers (Decompose)", minimum=2, maximum=10, step=1, value=7)
587
+ sub_layers = gr.Slider(label="Sub-layers (Refine)", minimum=2, maximum=10, step=1, value=3)
588
 
589
+ resolution = gr.Radio(label="Resolution", choices=[640, 1024], value=1024)
 
 
 
 
 
590
 
591
+ cfg_norm = gr.Checkbox(label="CFG normalize", value=True)
592
+ use_en_prompt = gr.Checkbox(label="Auto caption language (EN=True / ZH=False)", value=True)
 
 
 
593
 
594
  gpu_duration = gr.Textbox(
595
  label="GPU duration override (seconds, 20..1500)",
596
  value="1000",
597
  lines=1,
 
598
  )
599
 
600
+ decompose_btn = gr.Button("Decompose (new root node)", variant="primary")
601
+ refine_btn = gr.Button("Refine selected layer (create child node)", variant="secondary")
602
 
603
+ gr.Markdown("### History (nodes)")
604
+ history_dd = gr.Dropdown(label="Select node", choices=[], value=None, interactive=True)
605
+
606
+ breadcrumb_md = gr.Markdown("**Node path:** —")
607
+
608
+ gr.Markdown("### Layer selection (current node)")
609
+ layer_dd = gr.Dropdown(label="Select layer", choices=[], value=None, interactive=True)
610
+ layer_preview = gr.Image(label="Selected layer preview", image_mode="RGBA", interactive=False)
 
 
 
 
 
 
 
 
611
 
612
  with gr.Column(scale=2):
613
+ current_gallery = gr.Gallery(label="Current node layers (click to select)", columns=4, rows=1, format="png")
614
+ current_strip = gr.Gallery(label="Layer strip (1 row)", columns=8, rows=1, format="png", height=120)
 
 
615
 
 
 
616
  with gr.Row():
617
+ export_pptx = gr.File(label="Download PPTX (selected node)")
618
+ export_zip = gr.File(label="Download ZIP (selected node)")
619
+
620
+ # Examples run -> Decompose
621
+ gr.Examples(
622
+ examples=examples,
623
+ inputs=[input_image],
624
+ outputs=[
625
+ history_state, current_node_id_state, history_dd,
626
+ current_layer_paths_state, current_gallery, current_strip,
627
+ export_pptx, export_zip,
628
+ layer_dd, selected_layer_idx0_state, layer_preview,
629
+ breadcrumb_md,
630
+ ],
631
+ fn=do_decompose,
632
+ examples_per_page=14,
633
+ cache_examples=False,
634
+ run_on_click=True,
635
+ )
 
 
 
636
 
637
+ # Decompose button
638
+ decompose_btn.click(
639
+ fn=do_decompose,
640
+ inputs=[
641
+ input_image,
642
+ seed,
643
+ randomize_seed,
644
+ prompt,
645
+ neg_prompt,
646
+ true_guidance_scale,
647
+ num_inference_steps,
648
+ layers,
649
+ cfg_norm,
650
+ use_en_prompt,
651
+ resolution,
652
+ gpu_duration,
653
+ history_state,
654
+ ],
655
+ outputs=[
656
+ history_state,
657
+ current_node_id_state,
658
+ history_dd,
659
+ current_layer_paths_state,
660
+ current_gallery,
661
+ current_strip,
662
+ export_pptx,
663
+ export_zip,
664
+ layer_dd,
665
+ selected_layer_idx0_state,
666
+ layer_preview,
667
+ breadcrumb_md,
668
+ ],
669
+ )
670
 
671
+ # Refine button
672
+ refine_btn.click(
673
+ fn=do_refine,
674
+ inputs=[
675
+ history_state,
676
+ current_node_id_state,
677
+ current_layer_paths_state,
678
+ selected_layer_idx0_state,
679
+ sub_layers,
680
+ seed,
681
+ randomize_seed,
682
+ prompt,
683
+ neg_prompt,
684
+ true_guidance_scale,
685
+ num_inference_steps,
686
+ cfg_norm,
687
+ use_en_prompt,
688
+ resolution,
689
+ gpu_duration,
690
+ ],
691
+ outputs=[
692
+ history_state,
693
+ current_node_id_state,
694
+ history_dd,
695
+ current_layer_paths_state,
696
+ current_gallery,
697
+ current_strip,
698
+ export_pptx,
699
+ export_zip,
700
+ layer_dd,
701
+ selected_layer_idx0_state,
702
+ layer_preview,
703
+ breadcrumb_md,
704
+ ],
705
+ )
706
+
707
+ # History selection -> load any node
708
+ def _history_change(history, node_id):
709
+ # returns:
710
+ # layer_dd_update,
711
+ # current_layer_paths_state,
712
+ # current_gallery,
713
+ # current_strip,
714
+ # export_pptx,
715
+ # export_zip,
716
+ # layer_dd,
717
+ # selected_layer_idx0_state,
718
+ # layer_preview,
719
+ # breadcrumb
720
+ node = find_node(history, node_id)
721
+ if not node:
722
+ return (
723
+ gr.update(choices=[], value=None),
724
+ [],
725
+ [], [],
726
+ None, None,
727
+ gr.update(choices=[], value=None),
728
+ 0,
729
+ None,
730
+ "**Node path:** —",
731
+ )
732
+ images, strip, pptx, zipp, dd, sel_idx0, preview, breadcrumb = node_to_ui(history, node_id)
733
+ return (
734
+ dd,
735
+ node["layer_paths"],
736
+ images,
737
+ strip,
738
+ pptx,
739
+ zipp,
740
+ dd,
741
+ sel_idx0,
742
+ preview,
743
+ breadcrumb,
744
+ )
745
+
746
+ history_dd.change(
747
+ fn=_history_change,
748
+ inputs=[history_state, history_dd],
749
+ outputs=[
750
+ layer_dd,
751
+ current_layer_paths_state,
752
+ current_gallery,
753
+ current_strip,
754
+ export_pptx,
755
+ export_zip,
756
+ layer_dd,
757
+ selected_layer_idx0_state,
758
+ layer_preview,
759
+ breadcrumb_md,
760
+ ],
761
+ )
762
+
763
+ # Layer dropdown -> preview
764
+ layer_dd.change(
765
+ fn=on_layer_dropdown_change,
766
+ inputs=[layer_dd, current_layer_paths_state],
767
+ outputs=[selected_layer_idx0_state, layer_preview],
768
+ )
769
+
770
+ # Click on gallery/strip -> select layer
771
+ current_gallery.select(
772
+ fn=on_gallery_select,
773
+ inputs=[current_layer_paths_state],
774
+ outputs=[selected_layer_idx0_state, layer_preview, layer_dd],
775
+ )
776
+ current_strip.select(
777
+ fn=on_gallery_select,
778
+ inputs=[current_layer_paths_state],
779
+ outputs=[selected_layer_idx0_state, layer_preview, layer_dd],
780
+ )
781
 
782
  if __name__ == "__main__":
783
+ demo.queue()
784
+ try:
785
+ demo.launch(ssr_mode=False)
786
+ except TypeError:
787
+ demo.launch()