hexware commited on
Commit
f05bac3
·
verified ·
1 Parent(s): a6bc145

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +705 -667
app.py CHANGED
@@ -1,77 +1,151 @@
 
 
 
 
 
 
 
 
 
1
  import os
2
  import uuid
3
- import numpy as np
 
4
  import random
 
5
  import tempfile
6
  import zipfile
7
  import threading
8
- from datetime import datetime
9
 
 
10
  import spaces
11
  import torch
12
  import gradio as gr
13
  from PIL import Image
14
-
15
- from diffusers import QwenImageLayeredPipeline
16
  from pptx import Presentation
 
 
17
 
18
-
19
- LOG_DIR = "/tmp/local"
 
 
20
  MAX_SEED = np.iinfo(np.int32).max
21
 
22
- # Optional HF login (Spaces secret env var "hf")
23
- from huggingface_hub import login
24
- _HF_TOKEN = os.environ.get("hf")
25
- if _HF_TOKEN:
26
- login(token=_HF_TOKEN)
 
 
27
 
 
 
28
 
29
- # ----------------------------
30
- # Helpers
31
- # ----------------------------
32
- def ensure_dirname(path: str):
33
- if path and not os.path.exists(path):
34
- os.makedirs(path, exist_ok=True)
35
 
 
 
 
 
 
36
 
37
- def px_to_emu(px, dpi=96):
38
- inch = px / dpi
39
- return int(inch * 914400)
40
 
 
 
 
 
 
 
 
 
41
 
42
- def imagelist_to_pptx(img_files):
43
- with Image.open(img_files[0]) as img:
44
- w, h = img.size
45
 
46
- prs = Presentation()
47
- prs.slide_width = px_to_emu(w)
48
- prs.slide_height = px_to_emu(h)
49
 
50
- slide = prs.slides.add_slide(prs.slide_layouts[6])
51
- left = top = 0
52
-
53
- # Stack all images on top of each other (layers)
54
- for img_path in img_files:
55
- slide.shapes.add_picture(
56
- img_path,
57
- left,
58
- top,
59
- width=px_to_emu(w),
60
- height=px_to_emu(h),
61
  )
62
 
63
- with tempfile.NamedTemporaryFile(suffix=".pptx", delete=False) as tmp:
64
- prs.save(tmp.name)
65
- return tmp.name
66
-
67
-
68
- def make_zip(paths, prefix="layer"):
69
- with tempfile.NamedTemporaryFile(suffix=".zip", delete=False) as tmpzip:
70
- with zipfile.ZipFile(tmpzip.name, "w", zipfile.ZIP_DEFLATED) as z:
71
- for i, p in enumerate(paths):
72
- z.write(p, f"{prefix}_{i+1}.png")
73
- return tmpzip.name
74
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
75
 
76
  def clamp_int(x, default: int, lo: int, hi: int) -> int:
77
  try:
@@ -80,708 +154,672 @@ def clamp_int(x, default: int, lo: int, hi: int) -> int:
80
  v = default
81
  return max(lo, min(hi, v))
82
 
 
 
 
83
 
84
- def norm_resolution(x):
85
- v = clamp_int(x, default=1024, lo=640, hi=1024)
86
- return v if v in (640, 1024) else 1024
87
-
88
-
89
- def load_rgba(path: str) -> Image.Image:
90
- return Image.open(path).convert("RGBA")
91
-
92
-
93
- def labels_for_layers(n: int):
94
- return [f"Layer {i}" for i in range(1, n + 1)]
95
-
96
-
97
- def parse_layer_label(label: str, default_idx0: int = 0) -> int:
98
- # "Layer 3" -> 2
99
- if not label:
100
- return default_idx0
101
- try:
102
- num = int(label.strip().split()[-1])
103
- return max(0, num - 1)
104
- except Exception:
105
- return default_idx0
106
-
107
-
108
- def now_str():
109
- return datetime.utcnow().strftime("%H:%M:%S")
110
-
111
-
112
- def short_id():
113
- return uuid.uuid4().hex[:8]
114
-
115
-
116
- def find_node(history, node_id: str):
117
- for n in history:
118
- if n["id"] == node_id:
119
- return n
120
- return None
121
 
 
 
 
 
122
 
123
- def compute_depth_and_path(history, node_id: str):
124
- n = find_node(history, node_id)
125
- if not n:
126
- return 0, []
127
- depth = 0
128
- path = [n["title"]]
129
- cur = n
130
- while cur.get("parent_id"):
131
- parent = find_node(history, cur["parent_id"])
132
- if not parent:
133
- break
134
- depth += 1
135
- path.append(parent["title"])
136
- cur = parent
137
- path.reverse()
138
- return depth, path
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
139
 
140
 
141
- def history_choices(history):
142
- # Pretty dropdown labels with indentation + id
143
- choices = []
144
- for n in history:
145
- depth = n.get("depth", 0)
146
- indent = " " * depth + ("↳ " if depth > 0 else "")
147
- choices.append((f"{indent}{n['title']} [{n['id']}]", n["id"]))
148
- return choices
 
 
 
 
 
 
 
 
 
 
149
 
 
 
 
 
 
150
 
151
- def render_breadcrumb(path_list):
152
- if not path_list:
153
- return "—"
154
- return " → ".join(path_list)
 
 
 
 
 
 
 
155
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
156
 
157
- # ----------------------------
158
- # ZeroGPU-friendly pipeline (lazy init)
159
- # ----------------------------
160
- PIPELINE = None
161
- PIPELINE_LOCK = threading.Lock()
162
 
163
- TORCH_DTYPE = torch.float16 # important for RAM/VRAM
164
- MODEL_ID = "Qwen/Qwen-Image-Layered"
 
165
 
 
166
 
167
- def get_pipeline():
168
- """
169
- Load ONLY inside GPU functions.
170
- This avoids cold-start CPU-only load that can blow 30GB.
171
- """
172
- global PIPELINE
173
- if PIPELINE is not None:
174
- return PIPELINE
175
 
176
- with PIPELINE_LOCK:
177
- if PIPELINE is not None:
178
- return PIPELINE
 
 
 
 
 
 
 
 
 
 
 
 
179
 
180
- pipe = QwenImageLayeredPipeline.from_pretrained(
181
- MODEL_ID,
182
- torch_dtype=TORCH_DTYPE,
183
- low_cpu_mem_usage=True,
184
- )
185
 
186
- # memory helpers (guarded)
187
- if hasattr(pipe, "enable_attention_slicing"):
188
- pipe.enable_attention_slicing()
189
- if hasattr(pipe, "enable_vae_slicing"):
190
- pipe.enable_vae_slicing()
191
- elif hasattr(pipe, "vae") and hasattr(pipe.vae, "enable_slicing"):
192
- pipe.vae.enable_slicing()
193
 
194
- PIPELINE = pipe
195
- return PIPELINE
 
 
 
 
 
 
 
 
 
 
 
 
 
 
196
 
 
197
 
198
- def ensure_device_strategy(pipe):
199
- # Prefer CPU offload on CUDA to keep peak VRAM lower
200
- if torch.cuda.is_available() and hasattr(pipe, "enable_model_cpu_offload"):
201
- pipe.enable_model_cpu_offload()
202
- elif torch.cuda.is_available():
203
- try:
204
- pipe.to("cuda")
205
- except Exception:
206
- pass
207
 
 
208
 
209
- # ----------------------------
210
- # Dynamic GPU duration (ZeroGPU)
211
- # ----------------------------
212
- def get_duration(*args, **kwargs):
213
- return clamp_int(kwargs.get("gpu_duration", 1000), default=1000, lo=20, hi=1500)
214
 
 
 
 
 
 
 
 
 
 
215
 
216
- # ----------------------------
217
- # Node creation utilities
218
- # ----------------------------
219
- def add_node(history, parent_id, title, layer_paths, pptx_path, zip_path, meta: dict):
220
- node_id = short_id()
221
- node = {
 
 
222
  "id": node_id,
223
- "parent_id": parent_id, # None for root
224
- "title": title, # shown in history
225
- "layer_paths": layer_paths, # list[str]
226
- "pptx_path": pptx_path, # str
227
- "zip_path": zip_path, # str
228
- "n_layers": len(layer_paths),
229
- "created_at": now_str(),
230
- "meta": meta or {},
231
- "depth": 0,
232
- "path": [],
233
  }
234
- history = list(history) if history else []
235
- history.append(node)
236
-
237
- # update depth/path for all nodes (simple, history small)
238
- for n in history:
239
- d, p = compute_depth_and_path(history, n["id"])
240
- n["depth"] = d
241
- n["path"] = p
242
-
243
- return history, node_id
244
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
245
 
246
- def node_to_ui(history, node_id):
247
- """
248
- Convert node -> UI outputs (gallery/strip, exports, dropdown choices, preview, breadcrumb).
249
- """
250
- node = find_node(history, node_id)
251
- if not node:
252
- empty = []
253
- return (
254
- empty, empty,
255
- None, None,
256
- gr.update(choices=[], value=None),
257
- 0,
258
- None,
259
- f"**Node path:** —",
260
- )
261
 
262
- paths = node["layer_paths"]
263
- images = [load_rgba(p) for p in paths] # small N <= 10
264
- strip = images
 
265
 
266
- labels = labels_for_layers(len(paths))
267
- dd = gr.update(choices=labels, value=(labels[0] if labels else None))
 
 
 
 
268
 
269
- selected_idx0 = 0
270
- preview = load_rgba(paths[0]) if paths else None
 
 
271
 
272
- breadcrumb = f"**Node path:** {render_breadcrumb(node.get('path', []))}"
 
 
 
273
 
274
  return (
275
- images, strip,
276
- node["pptx_path"], node["zip_path"],
277
- dd,
278
- selected_idx0,
279
- preview,
280
- breadcrumb,
 
 
 
281
  )
282
 
 
 
 
 
 
 
 
283
 
284
- # ----------------------------
285
- # Selection handlers
286
- # ----------------------------
287
- def on_layer_dropdown_change(layer_label, current_layer_paths):
288
- if not current_layer_paths:
289
- return 0, None
290
- idx0 = parse_layer_label(layer_label, 0)
291
- idx0 = max(0, min(idx0, len(current_layer_paths) - 1))
292
- return idx0, load_rgba(current_layer_paths[idx0])
293
-
294
-
295
- def on_gallery_select(current_layer_paths, evt: gr.SelectData):
296
- if not current_layer_paths:
297
- return 0, None, gr.update()
298
- idx = evt.index
299
- if isinstance(idx, (tuple, list)):
300
- idx0 = int(idx[-1])
301
- else:
302
- idx0 = int(idx)
303
- idx0 = max(0, min(idx0, len(current_layer_paths) - 1))
304
- label = f"Layer {idx0 + 1}"
305
- return idx0, load_rgba(current_layer_paths[idx0]), gr.update(value=label)
306
-
307
-
308
- def on_history_select(history, node_id):
309
- if not node_id:
310
  return (
 
 
311
  gr.update(choices=[], value=None),
312
- [], # current_layer_paths_state
313
- [], [], None, None,
314
- gr.update(choices=[], value=None),
315
- 0,
316
- None,
317
- "**Node path:** —",
318
  )
319
 
320
- node = find_node(history, node_id)
321
- if not node:
322
- raise gr.Error("History node not found (state mismatch).")
323
-
324
- # Build UI for selected node
325
- images, strip, pptx_path, zip_path, layer_dd, sel_idx0, preview, breadcrumb = node_to_ui(history, node_id)
326
 
327
  return (
328
- layer_dd,
329
- node["layer_paths"], # current_layer_paths_state
330
- images,
331
- strip,
332
- pptx_path,
333
- zip_path,
334
- layer_dd,
335
- sel_idx0,
336
- preview,
337
- breadcrumb,
338
  )
339
 
 
 
 
 
340
 
341
- # ----------------------------
342
- # Inference core
343
- # ----------------------------
344
- def run_pipeline_decompose(pipe, image_rgba, seed, randomize_seed, prompt, neg_prompt,
345
- true_guidance_scale, num_inference_steps, layers, cfg_norm,
346
- use_en_prompt, resolution):
347
- if randomize_seed:
348
- seed = random.randint(0, MAX_SEED)
349
-
350
- gen_device = "cuda" if torch.cuda.is_available() else "cpu"
351
- gen = torch.Generator(device=gen_device).manual_seed(int(seed))
352
-
353
- inputs = {
354
- "image": image_rgba,
355
- "generator": gen,
356
- "true_cfg_scale": float(true_guidance_scale),
357
- "prompt": prompt if prompt is not None else "",
358
- "negative_prompt": neg_prompt,
359
- "num_inference_steps": int(num_inference_steps),
360
- "num_images_per_prompt": 1,
361
- "layers": int(layers),
362
- "resolution": int(resolution),
363
- "cfg_normalize": bool(cfg_norm),
364
- "use_en_prompt": bool(use_en_prompt),
365
- }
366
-
367
- with torch.inference_mode():
368
- out = pipe(**inputs)
369
- return out.images[0] # list[PIL]
370
-
371
 
372
- # ----------------------------
373
- # GPU functions
374
- # ----------------------------
375
- @spaces.GPU(duration=get_duration)
376
- def do_decompose(
377
- input_image,
378
- seed=0,
379
- randomize_seed=True,
380
- prompt="",
381
- neg_prompt=" ",
382
- true_guidance_scale=4.0,
383
- num_inference_steps=50,
384
- layers=7,
385
- cfg_norm=True,
386
- use_en_prompt=True,
387
- resolution=1024,
388
- gpu_duration=1000,
389
- history=None,
 
 
 
 
390
  ):
391
- if isinstance(input_image, list):
392
- input_image = input_image[0]
393
-
394
- if isinstance(input_image, str):
395
- pil_image = Image.open(input_image).convert("RGBA")
396
- elif isinstance(input_image, Image.Image):
397
- pil_image = input_image.convert("RGBA")
398
- elif isinstance(input_image, np.ndarray):
399
- pil_image = Image.fromarray(input_image).convert("RGBA")
400
- else:
401
- raise ValueError(f"Unsupported input_image type: {type(input_image)}")
402
-
403
- resolution = norm_resolution(resolution)
404
- layers = clamp_int(layers, default=7, lo=2, hi=10)
405
 
406
- pipe = get_pipeline()
407
- ensure_device_strategy(pipe)
408
 
409
- imgs = run_pipeline_decompose(
410
- pipe, pil_image, seed, randomize_seed, prompt, neg_prompt,
411
- true_guidance_scale, num_inference_steps, layers, cfg_norm, use_en_prompt, resolution
 
 
 
 
 
 
 
 
 
 
 
 
 
 
412
  )
413
 
414
- # Save images to temp
415
- layer_paths = []
416
- gallery = []
417
- for img in imgs:
418
- gallery.append(img)
419
- tmp = tempfile.NamedTemporaryFile(suffix=".png", delete=False)
420
- img.save(tmp.name)
421
- layer_paths.append(tmp.name)
422
-
423
- pptx_path = imagelist_to_pptx(layer_paths)
424
- zip_path = make_zip(layer_paths, prefix="layer")
425
-
426
- meta = {
427
- "kind": "decompose",
428
- "resolution": resolution,
429
- "layers": layers,
430
- "steps": int(num_inference_steps),
431
- }
432
- title = f"Decompose ({len(layer_paths)} layers) @ {resolution}"
433
 
434
- history = history or []
435
- history, node_id = add_node(history, parent_id=None, title=title,
436
- layer_paths=layer_paths, pptx_path=pptx_path, zip_path=zip_path, meta=meta)
437
-
438
- # Update history dropdown
439
- hist_dd = gr.update(choices=history_choices(history), value=node_id)
440
 
441
- # Set current node UI
442
- images, strip, pptx, zipp, layer_dd, sel_idx0, preview, breadcrumb = node_to_ui(history, node_id)
 
 
 
 
 
443
 
444
  return (
445
- history,
446
- node_id,
447
- hist_dd,
448
- layer_paths, # current_layer_paths_state
449
- images,
450
- strip,
451
- pptx,
452
- zipp,
453
- layer_dd,
454
- sel_idx0,
455
- preview,
456
- breadcrumb,
457
  )
458
 
 
 
 
 
 
 
 
 
 
 
459
 
460
- @spaces.GPU(duration=get_duration)
461
- def do_refine(
462
- history,
463
- current_node_id,
464
- current_layer_paths,
465
- selected_layer_idx0,
466
- sub_layers=3,
467
- seed=0,
468
- randomize_seed=True,
469
- prompt="",
470
- neg_prompt=" ",
471
- true_guidance_scale=4.0,
472
- num_inference_steps=50,
473
- cfg_norm=True,
474
- use_en_prompt=True,
475
- resolution=1024,
476
- gpu_duration=1000,
477
- ):
478
- if not history or not current_node_id:
479
- raise gr.Error("Сначала сделай Decompose (создай root-узел).")
480
- if not current_layer_paths:
481
- raise gr.Error("Нет слоёв в текущем узле (state).")
482
-
483
- parent = find_node(history, current_node_id)
484
- if not parent:
485
- raise gr.Error("Текущий узел не найден в history.")
486
-
487
- resolution = norm_resolution(resolution)
488
- sub_layers = clamp_int(sub_layers, default=3, lo=2, hi=10)
489
-
490
- idx0 = clamp_int(selected_layer_idx0, default=0, lo=0, hi=len(current_layer_paths) - 1)
491
- selected_img = load_rgba(current_layer_paths[idx0])
492
 
493
- pipe = get_pipeline()
494
- ensure_device_strategy(pipe)
 
 
495
 
496
- imgs = run_pipeline_decompose(
497
- pipe, selected_img, seed, randomize_seed, prompt, neg_prompt,
498
- true_guidance_scale, num_inference_steps, sub_layers, cfg_norm, use_en_prompt, resolution
 
499
  )
500
-
501
- # Save images to temp
502
- layer_paths = []
503
- gallery = []
504
- for img in imgs:
505
- gallery.append(img)
506
- tmp = tempfile.NamedTemporaryFile(suffix=".png", delete=False)
507
- img.save(tmp.name)
508
- layer_paths.append(tmp.name)
509
-
510
- pptx_path = imagelist_to_pptx(layer_paths)
511
- zip_path = make_zip(layer_paths, prefix="refined")
512
-
513
- meta = {
514
- "kind": "refine",
515
- "resolution": resolution,
516
- "sub_layers": sub_layers,
517
- "steps": int(num_inference_steps),
518
- "refined_from": {"node_id": current_node_id, "layer_index": idx0},
519
- }
520
- title = f"Refine L{idx0+1} → {len(layer_paths)} sub @ {resolution}"
521
-
522
- history, node_id = add_node(history, parent_id=current_node_id, title=title,
523
- layer_paths=layer_paths, pptx_path=pptx_path, zip_path=zip_path, meta=meta)
524
-
525
- # Update history dropdown and set current node to the new child
526
- hist_dd = gr.update(choices=history_choices(history), value=node_id)
527
-
528
- images, strip, pptx, zipp, layer_dd, sel_idx0, preview, breadcrumb = node_to_ui(history, node_id)
529
-
530
- return (
531
- history,
532
- node_id,
533
- hist_dd,
534
- layer_paths, # current_layer_paths_state
535
- images,
536
- strip,
537
- pptx,
538
- zipp,
539
- layer_dd,
540
- sel_idx0,
541
- preview,
542
- breadcrumb,
543
  )
544
 
 
 
 
545
 
546
- # ----------------------------
547
- # UI
548
- # ----------------------------
549
- ensure_dirname(LOG_DIR)
550
- examples = [f"assets/test_images/{i}.png" for i in range(1, 14)]
 
 
 
 
 
 
 
551
 
552
- with gr.Blocks() as demo:
553
- with gr.Column(elem_id="col-container"):
554
- gr.HTML(
555
- '<img src="https://qianwen-res.oss-cn-beijing.aliyuncs.com/Qwen-Image/layered/qwen-image-layered-logo.png" '
556
- 'alt="Qwen-Image-Layered Logo" width="600" style="display: block; margin: 0 auto;">'
557
- )
558
 
559
- gr.Markdown(
560
- """
561
- Prompt описывает изображение в целом (включая частично закрытые элементы).
562
- Refine делает рекурсивную декомпозицию выбранного слоя текущего узла (узлы сохраняются в History).
563
- """
564
- )
565
 
566
- # States
567
- history_state = gr.State([])
568
- current_node_id_state = gr.State(None)
569
- current_layer_paths_state = gr.State([])
570
- selected_layer_idx0_state = gr.State(0)
571
 
572
- with gr.Row():
573
- with gr.Column(scale=1):
574
- input_image = gr.Image(label="Input Image", image_mode="RGBA")
 
 
575
 
576
- with gr.Accordion("Settings", open=False):
577
- prompt = gr.Textbox(label="Prompt (Optional)", value="", lines=2)
578
- neg_prompt = gr.Textbox(label="Negative Prompt (Optional)", value=" ", lines=2)
 
 
 
579
 
580
- seed = gr.Slider(label="Seed", minimum=0, maximum=MAX_SEED, step=1, value=0)
581
- randomize_seed = gr.Checkbox(label="Randomize seed", value=True)
582
 
583
- true_guidance_scale = gr.Slider(label="True guidance scale", minimum=1.0, maximum=10.0, step=0.1, value=4.0)
584
- num_inference_steps = gr.Slider(label="Steps", minimum=1, maximum=100, step=1, value=50)
585
 
586
- layers = gr.Slider(label="Layers (Decompose)", minimum=2, maximum=10, step=1, value=7)
587
- sub_layers = gr.Slider(label="Sub-layers (Refine)", minimum=2, maximum=10, step=1, value=3)
 
 
 
 
 
 
588
 
589
- resolution = gr.Radio(label="Resolution", choices=[640, 1024], value=1024)
 
 
 
590
 
591
- cfg_norm = gr.Checkbox(label="CFG normalize", value=True)
592
- use_en_prompt = gr.Checkbox(label="Auto caption language (EN=True / ZH=False)", value=True)
 
593
 
594
- gpu_duration = gr.Textbox(
595
- label="GPU duration override (seconds, 20..1500)",
596
- value="1000",
597
- lines=1,
 
 
 
 
 
 
 
 
598
  )
599
 
600
- decompose_btn = gr.Button("Decompose (new root node)", variant="primary")
601
- refine_btn = gr.Button("Refine selected layer (create child node)", variant="secondary")
602
-
603
- gr.Markdown("### History (nodes)")
604
- history_dd = gr.Dropdown(label="Select node", choices=[], value=None, interactive=True)
 
 
605
 
606
- breadcrumb_md = gr.Markdown("**Node path:** ")
607
 
608
- gr.Markdown("### Layer selection (current node)")
609
- layer_dd = gr.Dropdown(label="Select layer", choices=[], value=None, interactive=True)
610
- layer_preview = gr.Image(label="Selected layer preview", image_mode="RGBA", interactive=False)
611
 
612
- with gr.Column(scale=2):
613
- current_gallery = gr.Gallery(label="Current node layers (click to select)", columns=4, rows=1, format="png")
614
- current_strip = gr.Gallery(label="Layer strip (1 row)", columns=8, rows=1, format="png", height=120)
615
 
616
- with gr.Row():
617
- export_pptx = gr.File(label="Download PPTX (selected node)")
618
- export_zip = gr.File(label="Download ZIP (selected node)")
619
-
620
- # Examples run -> Decompose
621
- gr.Examples(
622
- examples=examples,
623
- inputs=[input_image],
624
- outputs=[
625
- history_state, current_node_id_state, history_dd,
626
- current_layer_paths_state, current_gallery, current_strip,
627
- export_pptx, export_zip,
628
- layer_dd, selected_layer_idx0_state, layer_preview,
629
- breadcrumb_md,
630
- ],
631
- fn=do_decompose,
632
- examples_per_page=14,
633
- cache_examples=False,
634
- run_on_click=True,
635
- )
636
 
637
- # Decompose button
638
- decompose_btn.click(
639
- fn=do_decompose,
640
- inputs=[
641
- input_image,
642
- seed,
643
- randomize_seed,
644
- prompt,
645
- neg_prompt,
646
- true_guidance_scale,
647
- num_inference_steps,
648
- layers,
649
- cfg_norm,
650
- use_en_prompt,
651
- resolution,
652
- gpu_duration,
653
- history_state,
654
- ],
655
- outputs=[
656
- history_state,
657
- current_node_id_state,
658
- history_dd,
659
- current_layer_paths_state,
660
- current_gallery,
661
- current_strip,
662
- export_pptx,
663
- export_zip,
664
- layer_dd,
665
- selected_layer_idx0_state,
666
- layer_preview,
667
- breadcrumb_md,
668
- ],
669
- )
670
 
671
- # Refine button
672
- refine_btn.click(
673
- fn=do_refine,
674
- inputs=[
675
- history_state,
676
- current_node_id_state,
677
- current_layer_paths_state,
678
- selected_layer_idx0_state,
679
- sub_layers,
680
- seed,
681
- randomize_seed,
682
- prompt,
683
- neg_prompt,
684
- true_guidance_scale,
685
- num_inference_steps,
686
- cfg_norm,
687
- use_en_prompt,
688
- resolution,
689
- gpu_duration,
690
- ],
691
- outputs=[
692
- history_state,
693
- current_node_id_state,
694
- history_dd,
695
- current_layer_paths_state,
696
- current_gallery,
697
- current_strip,
698
- export_pptx,
699
- export_zip,
700
- layer_dd,
701
- selected_layer_idx0_state,
702
- layer_preview,
703
- breadcrumb_md,
704
- ],
705
- )
706
 
707
- # History selection -> load any node
708
- def _history_change(history, node_id):
709
- # returns:
710
- # layer_dd_update,
711
- # current_layer_paths_state,
712
- # current_gallery,
713
- # current_strip,
714
- # export_pptx,
715
- # export_zip,
716
- # layer_dd,
717
- # selected_layer_idx0_state,
718
- # layer_preview,
719
- # breadcrumb
720
- node = find_node(history, node_id)
721
- if not node:
722
- return (
723
- gr.update(choices=[], value=None),
724
- [],
725
- [], [],
726
- None, None,
727
- gr.update(choices=[], value=None),
728
- 0,
729
- None,
730
- "**Node path:** —",
731
- )
732
- images, strip, pptx, zipp, dd, sel_idx0, preview, breadcrumb = node_to_ui(history, node_id)
733
- return (
734
- dd,
735
- node["layer_paths"],
736
- images,
737
- strip,
738
- pptx,
739
- zipp,
740
- dd,
741
- sel_idx0,
742
- preview,
743
- breadcrumb,
744
- )
745
 
746
- history_dd.change(
747
- fn=_history_change,
748
- inputs=[history_state, history_dd],
749
- outputs=[
750
- layer_dd,
751
- current_layer_paths_state,
752
- current_gallery,
753
- current_strip,
754
- export_pptx,
755
- export_zip,
756
- layer_dd,
757
- selected_layer_idx0_state,
758
- layer_preview,
759
- breadcrumb_md,
760
- ],
761
- )
762
 
763
- # Layer dropdown -> preview
764
- layer_dd.change(
765
- fn=on_layer_dropdown_change,
766
- inputs=[layer_dd, current_layer_paths_state],
767
- outputs=[selected_layer_idx0_state, layer_preview],
768
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
769
 
770
- # Click on gallery/strip -> select layer
771
- current_gallery.select(
772
- fn=on_gallery_select,
773
- inputs=[current_layer_paths_state],
774
- outputs=[selected_layer_idx0_state, layer_preview, layer_dd],
775
- )
776
- current_strip.select(
777
- fn=on_gallery_select,
778
- inputs=[current_layer_paths_state],
779
- outputs=[selected_layer_idx0_state, layer_preview, layer_dd],
780
- )
781
 
782
  if __name__ == "__main__":
783
- demo.queue()
784
- try:
785
- demo.launch(ssr_mode=False)
786
- except TypeError:
787
- demo.launch()
 
1
+ # app.py — Qwen/Qwen-Image-Layered (ZeroGPU-friendly) with:
2
+ # - Decompose
3
+ # - Refine selected layer (recursive / hierarchical)
4
+ # - History "nodes" (tree-like lineage via parent)
5
+ # - Switch to any node
6
+ # - Export ZIP/PPTX for selected node
7
+ # - Layer selection UX: dropdown + 1-row thumbnail gallery (Photoshop-like)
8
+ # - Auto-open "Refined layers" accordion after refine
9
+
10
  import os
11
  import uuid
12
+ import json
13
+ import time
14
  import random
15
+ import shutil
16
  import tempfile
17
  import zipfile
18
  import threading
19
+ from typing import Dict, Any, List, Tuple, Optional
20
 
21
+ import numpy as np
22
  import spaces
23
  import torch
24
  import gradio as gr
25
  from PIL import Image
 
 
26
  from pptx import Presentation
27
+ from huggingface_hub import login
28
+ from diffusers import QwenImageLayeredPipeline
29
 
30
+ # -------------------------
31
+ # Config
32
+ # -------------------------
33
+ APP_TMP_ROOT = "/tmp/qwen_image_layered"
34
  MAX_SEED = np.iinfo(np.int32).max
35
 
36
+ # (Optional) HF token as Spaces secret env var "hf"
37
+ HF_TOKEN = os.environ.get("hf")
38
+ if HF_TOKEN:
39
+ try:
40
+ login(token=HF_TOKEN)
41
+ except Exception as e:
42
+ print("HF login failed (continuing):", e)
43
 
44
+ # Reduce allocator fragmentation (new name; old PYTORCH_CUDA_ALLOC_CONF is deprecated)
45
+ os.environ.setdefault("PYTORCH_ALLOC_CONF", "expandable_segments:True")
46
 
47
+ SESSION_ID = uuid.uuid4().hex[:10]
48
+ SESSION_DIR = os.path.join(APP_TMP_ROOT, f"session_{SESSION_ID}")
49
+ os.makedirs(SESSION_DIR, exist_ok=True)
 
 
 
50
 
51
+ # -------------------------
52
+ # Pipeline (lazy + cached)
53
+ # -------------------------
54
+ _PIPELINE = None
55
+ _PIPELINE_LOCK = threading.Lock()
56
 
57
+ def _has_cuda() -> bool:
58
+ return torch.cuda.is_available()
 
59
 
60
+ def get_pipeline() -> QwenImageLayeredPipeline:
61
+ """
62
+ Lazy-load the pipeline the first time we actually run on GPU (ZeroGPU attaches GPU only inside @spaces.GPU).
63
+ We keep the pipeline cached for the lifetime of the Space process.
64
+ """
65
+ global _PIPELINE
66
+ if _PIPELINE is not None:
67
+ return _PIPELINE
68
 
69
+ with _PIPELINE_LOCK:
70
+ if _PIPELINE is not None:
71
+ return _PIPELINE
72
 
73
+ cuda = _has_cuda()
74
+ # Prefer bf16 on NVIDIA; fallback to fp16 (some GPUs/drivers might not like bf16)
75
+ preferred_dtype = torch.bfloat16 if cuda else torch.float32
76
 
77
+ load_kwargs = dict(
78
+ torch_dtype=preferred_dtype,
79
+ low_cpu_mem_usage=True,
 
 
 
 
 
 
 
 
80
  )
81
 
82
+ # Some diffusers builds/pipelines accept device_map; some don't.
83
+ # Try to use it to reduce peak RAM when possible.
84
+ try:
85
+ _PIPELINE = QwenImageLayeredPipeline.from_pretrained(
86
+ "Qwen/Qwen-Image-Layered",
87
+ **load_kwargs,
88
+ device_map="auto",
89
+ )
90
+ print("Loaded pipeline with device_map=auto")
91
+ except TypeError:
92
+ _PIPELINE = QwenImageLayeredPipeline.from_pretrained(
93
+ "Qwen/Qwen-Image-Layered",
94
+ **load_kwargs,
95
+ )
96
+ print("Loaded pipeline without device_map")
97
+
98
+ # Memory savers (guarded)
99
+ if hasattr(_PIPELINE, "enable_attention_slicing"):
100
+ try:
101
+ _PIPELINE.enable_attention_slicing()
102
+ except Exception:
103
+ pass
104
+
105
+ # VAE slicing exists on some pipelines; on your logs it may be missing -> guard
106
+ if hasattr(_PIPELINE, "enable_vae_slicing"):
107
+ try:
108
+ _PIPELINE.enable_vae_slicing()
109
+ except Exception:
110
+ pass
111
+ elif hasattr(_PIPELINE, "vae") and hasattr(_PIPELINE.vae, "enable_slicing"):
112
+ try:
113
+ _PIPELINE.vae.enable_slicing()
114
+ except Exception:
115
+ pass
116
+
117
+ # Place pipeline
118
+ if cuda:
119
+ # If enable_model_cpu_offload exists, it helps VRAM usage a lot
120
+ if hasattr(_PIPELINE, "enable_model_cpu_offload"):
121
+ try:
122
+ _PIPELINE.enable_model_cpu_offload()
123
+ print("Enabled model CPU offload")
124
+ except Exception as e:
125
+ print("CPU offload failed, falling back to .to('cuda'):", e)
126
+ _PIPELINE.to("cuda")
127
+ else:
128
+ _PIPELINE.to("cuda")
129
+ else:
130
+ _PIPELINE.to("cpu")
131
+
132
+ return _PIPELINE
133
+
134
+
135
+ # -------------------------
136
+ # Helpers: filesystem nodes
137
+ # -------------------------
138
+ def _now_id() -> str:
139
+ return time.strftime("%H%M%S") + "_" + uuid.uuid4().hex[:6]
140
+
141
+ def ensure_dir(path: str):
142
+ os.makedirs(path, exist_ok=True)
143
+
144
+ def sanitize_prompt(x):
145
+ if x is None:
146
+ return None
147
+ s = str(x).strip()
148
+ return s if s else None
149
 
150
  def clamp_int(x, default: int, lo: int, hi: int) -> int:
151
  try:
 
154
  v = default
155
  return max(lo, min(hi, v))
156
 
157
+ def px_to_emu(px, dpi=96):
158
+ inch = px / dpi
159
+ return int(inch * 914400)
160
 
161
+ def imagelist_to_pptx(img_paths: List[str]) -> str:
162
+ # Slide size = first image size
163
+ with Image.open(img_paths[0]) as img:
164
+ w, h = img.size
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
165
 
166
+ prs = Presentation()
167
+ prs.slide_width = px_to_emu(w)
168
+ prs.slide_height = px_to_emu(h)
169
+ slide = prs.slides.add_slide(prs.slide_layouts[6])
170
 
171
+ for p in img_paths:
172
+ slide.shapes.add_picture(p, 0, 0, width=px_to_emu(w), height=px_to_emu(h))
173
+
174
+ out = tempfile.NamedTemporaryFile(suffix=".pptx", delete=False)
175
+ out.close()
176
+ prs.save(out.name)
177
+ return out.name
178
+
179
+ def imagelist_to_zip(img_paths: List[str]) -> str:
180
+ out = tempfile.NamedTemporaryFile(suffix=".zip", delete=False)
181
+ out.close()
182
+ with zipfile.ZipFile(out.name, "w", zipfile.ZIP_DEFLATED) as zf:
183
+ for i, p in enumerate(img_paths):
184
+ zf.write(p, f"layer_{i+1}.png")
185
+ return out.name
186
+
187
+ def write_images_to_node_dir(node_dir: str, images: List[Image.Image]) -> List[str]:
188
+ ensure_dir(node_dir)
189
+ paths = []
190
+ for i, im in enumerate(images):
191
+ p = os.path.join(node_dir, f"layer_{i+1:02d}.png")
192
+ im.save(p)
193
+ paths.append(p)
194
+ return paths
195
+
196
+ def layer_choices_from_paths(paths: List[str]) -> List[Tuple[str, int]]:
197
+ # return list of (label, index)
198
+ out = []
199
+ for i in range(len(paths)):
200
+ out.append((f"Layer {i+1}", i))
201
+ return out
202
+
203
+ def pretty_history_list(nodes: Dict[str, Any], selected_id: Optional[str]) -> str:
204
+ # Simple readable text block
205
+ # Show newest first
206
+ items = sorted(nodes.values(), key=lambda n: n.get("created_at", 0), reverse=True)
207
+ lines = []
208
+ for n in items[:50]:
209
+ mark = "👉 " if n["id"] == selected_id else " "
210
+ parent = n.get("parent_id")
211
+ parent_txt = f" (parent: {parent})" if parent else ""
212
+ lines.append(f"{mark}{n['id']} — {n['name']} [{len(n['images'])} imgs]{parent_txt}")
213
+ if not lines:
214
+ return "_No history yet. Click **Decompose** to create the first node._"
215
+ return "```\n" + "\n".join(lines) + "\n```"
216
+
217
+
218
+ # -------------------------
219
+ # GPU duration control (ZeroGPU)
220
+ # -------------------------
221
+ def get_duration(
222
+ *args,
223
+ gpu_duration=1000,
224
+ **kwargs
225
+ ):
226
+ return clamp_int(gpu_duration, default=1000, lo=20, hi=1500)
227
 
228
 
229
+ # -------------------------
230
+ # Core GPU runs
231
+ # -------------------------
232
+ @spaces.GPU(duration=get_duration)
233
+ def run_decompose_gpu(
234
+ input_image,
235
+ seed=0,
236
+ prompt=None,
237
+ neg_prompt=" ",
238
+ true_guidance_scale=4.0,
239
+ num_inference_steps=50,
240
+ layers=7,
241
+ cfg_norm=True,
242
+ use_en_prompt=True,
243
+ resolution=1024,
244
+ gpu_duration=1000,
245
+ ):
246
+ pipe = get_pipeline()
247
 
248
+ # Normalize
249
+ resolution = int(resolution) if str(resolution).isdigit() else 1024
250
+ resolution = 1024 if resolution == 1024 else 640
251
+ prompt = sanitize_prompt(prompt)
252
+ neg_prompt = str(neg_prompt) if neg_prompt is not None else " "
253
 
254
+ # Image
255
+ if isinstance(input_image, list):
256
+ input_image = input_image[0]
257
+ if isinstance(input_image, str):
258
+ img = Image.open(input_image).convert("RGB").convert("RGBA")
259
+ elif isinstance(input_image, Image.Image):
260
+ img = input_image.convert("RGB").convert("RGBA")
261
+ elif isinstance(input_image, np.ndarray):
262
+ img = Image.fromarray(input_image).convert("RGB").convert("RGBA")
263
+ else:
264
+ raise ValueError(f"Unsupported input_image type: {type(input_image)}")
265
 
266
+ # Generator
267
+ gen_device = "cuda" if torch.cuda.is_available() else "cpu"
268
+ generator = torch.Generator(device=gen_device).manual_seed(int(seed))
269
+
270
+ inputs = dict(
271
+ image=img,
272
+ generator=generator,
273
+ true_cfg_scale=float(true_guidance_scale),
274
+ prompt=prompt,
275
+ negative_prompt=neg_prompt,
276
+ num_inference_steps=int(num_inference_steps),
277
+ num_images_per_prompt=1,
278
+ layers=int(layers),
279
+ resolution=int(resolution),
280
+ cfg_normalize=bool(cfg_norm),
281
+ use_en_prompt=bool(use_en_prompt),
282
+ )
283
 
284
+ print("DECOMPOSE INPUTS:", {k: (v if k != "image" else "<PIL>") for k, v in inputs.items()})
 
 
 
 
285
 
286
+ with torch.inference_mode():
287
+ out = pipe(**inputs)
288
+ imgs = out.images[0] # list[PIL]
289
 
290
+ return imgs
291
 
 
 
 
 
 
 
 
 
292
 
293
+ @spaces.GPU(duration=get_duration)
294
+ def run_refine_gpu(
295
+ selected_layer_path: str,
296
+ seed=0,
297
+ prompt=None,
298
+ neg_prompt=" ",
299
+ true_guidance_scale=4.0,
300
+ num_inference_steps=50,
301
+ sub_layers=3,
302
+ cfg_norm=True,
303
+ use_en_prompt=True,
304
+ resolution=1024,
305
+ gpu_duration=1000,
306
+ ):
307
+ pipe = get_pipeline()
308
 
309
+ resolution = int(resolution) if str(resolution).isdigit() else 1024
310
+ resolution = 1024 if resolution == 1024 else 640
311
+ prompt = sanitize_prompt(prompt)
312
+ neg_prompt = str(neg_prompt) if neg_prompt is not None else " "
 
313
 
314
+ img = Image.open(selected_layer_path).convert("RGB").convert("RGBA")
 
 
 
 
 
 
315
 
316
+ gen_device = "cuda" if torch.cuda.is_available() else "cpu"
317
+ generator = torch.Generator(device=gen_device).manual_seed(int(seed))
318
+
319
+ inputs = dict(
320
+ image=img,
321
+ generator=generator,
322
+ true_cfg_scale=float(true_guidance_scale),
323
+ prompt=prompt,
324
+ negative_prompt=neg_prompt,
325
+ num_inference_steps=int(num_inference_steps),
326
+ num_images_per_prompt=1,
327
+ layers=int(sub_layers),
328
+ resolution=int(resolution),
329
+ cfg_normalize=bool(cfg_norm),
330
+ use_en_prompt=bool(use_en_prompt),
331
+ )
332
 
333
+ print("REFINE INPUTS:", {k: (v if k != "image" else "<PIL>") for k, v in inputs.items()})
334
 
335
+ with torch.inference_mode():
336
+ out = pipe(**inputs)
337
+ imgs = out.images[0]
 
 
 
 
 
 
338
 
339
+ return imgs
340
 
 
 
 
 
 
341
 
342
+ # -------------------------
343
+ # App state structure
344
+ # -------------------------
345
+ def new_state() -> Dict[str, Any]:
346
+ return {
347
+ "nodes": {}, # node_id -> node dict
348
+ "current_id": None, # selected node id
349
+ "root_dir": SESSION_DIR,
350
+ }
351
 
352
+ def add_node(
353
+ state: Dict[str, Any],
354
+ name: str,
355
+ image_paths: List[str],
356
+ parent_id: Optional[str] = None,
357
+ ) -> Dict[str, Any]:
358
+ node_id = _now_id()
359
+ state["nodes"][node_id] = {
360
  "id": node_id,
361
+ "name": name,
362
+ "parent_id": parent_id,
363
+ "images": image_paths,
364
+ "created_at": time.time(),
 
 
 
 
 
 
365
  }
366
+ state["current_id"] = node_id
367
+ return state
368
+
369
+ def get_current_node(state: Dict[str, Any]) -> Optional[Dict[str, Any]]:
370
+ cid = state.get("current_id")
371
+ if not cid:
372
+ return None
373
+ return state["nodes"].get(cid)
374
+
375
+ def get_node(state: Dict[str, Any], node_id: str) -> Optional[Dict[str, Any]]:
376
+ return state["nodes"].get(node_id)
377
+
378
+ def history_dropdown_choices(state: Dict[str, Any]) -> List[Tuple[str, str]]:
379
+ # label, value=node_id
380
+ items = sorted(state["nodes"].values(), key=lambda n: n.get("created_at", 0), reverse=True)
381
+ return [(f"{n['name']} — {n['id']}", n["id"]) for n in items]
382
+
383
+ def update_layer_ui_from_node(node: Dict[str, Any]):
384
+ paths = node["images"]
385
+ dd_choices = [label for label, _ in layer_choices_from_paths(paths)]
386
+ # Dropdown values are labels, but we need mapping label->index; we’ll use index stored separately via State.
387
+ return paths, dd_choices
388
+
389
+
390
+ # -------------------------
391
+ # Gradio callbacks (non-GPU)
392
+ # -------------------------
393
+ def on_decompose_click(
394
+ state: Dict[str, Any],
395
+ input_image,
396
+ seed,
397
+ randomize_seed,
398
+ prompt,
399
+ neg_prompt,
400
+ true_guidance_scale,
401
+ num_inference_steps,
402
+ layers,
403
+ cfg_norm,
404
+ use_en_prompt,
405
+ resolution,
406
+ gpu_duration,
407
+ ):
408
+ if randomize_seed:
409
+ seed = random.randint(0, MAX_SEED)
410
 
411
+ # GPU run
412
+ imgs = run_decompose_gpu(
413
+ input_image=input_image,
414
+ seed=int(seed),
415
+ prompt=prompt,
416
+ neg_prompt=neg_prompt,
417
+ true_guidance_scale=true_guidance_scale,
418
+ num_inference_steps=num_inference_steps,
419
+ layers=layers,
420
+ cfg_norm=cfg_norm,
421
+ use_en_prompt=use_en_prompt,
422
+ resolution=resolution,
423
+ gpu_duration=gpu_duration,
424
+ )
 
425
 
426
+ # Save node
427
+ node_dir = os.path.join(state["root_dir"], "nodes", f"node_{_now_id()}_decompose")
428
+ ensure_dir(node_dir)
429
+ paths = write_images_to_node_dir(node_dir, imgs)
430
 
431
+ state = add_node(
432
+ state=state,
433
+ name=f"Decompose ({len(paths)} layers)",
434
+ image_paths=paths,
435
+ parent_id=None,
436
+ )
437
 
438
+ node = get_current_node(state)
439
+ # Prepare UI
440
+ layer_paths = node["images"]
441
+ dd_labels = [f"Layer {i+1}" for i in range(len(layer_paths))]
442
 
443
+ # Default selection = first layer
444
+ selected_idx = 0
445
+ selected_label = dd_labels[selected_idx]
446
+ selected_path = layer_paths[selected_idx]
447
 
448
  return (
449
+ state,
450
+ gr.update(value=state["current_id"], choices=history_dropdown_choices(state)), # history dropdown
451
+ gr.update(value=pretty_history_list(state["nodes"], state["current_id"])), # history text
452
+ layer_paths, # main gallery
453
+ gr.update(choices=dd_labels, value=selected_label), # refine dropdown
454
+ layer_paths, # refine thumb gallery
455
+ gr.update(value=selected_path), # refine preview
456
+ gr.update(open=False), # refined accordion closed (fresh start)
457
+ [], # refined gallery cleared
458
  )
459
 
460
+ def _label_to_index(label: str) -> int:
461
+ try:
462
+ # "Layer 3" -> 2
463
+ n = int(label.strip().split()[-1])
464
+ return max(0, n - 1)
465
+ except Exception:
466
+ return 0
467
 
468
+ def on_history_change(state: Dict[str, Any], node_id: str):
469
+ node = get_node(state, node_id) if node_id else None
470
+ if not node:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
471
  return (
472
+ state,
473
+ gr.update(value=pretty_history_list(state["nodes"], state.get("current_id"))),
474
  gr.update(choices=[], value=None),
475
+ [],
476
+ gr.update(value=None),
477
+ [],
478
+ gr.update(open=False),
479
+ [],
 
480
  )
481
 
482
+ state["current_id"] = node_id
483
+ paths = node["images"]
484
+ dd_labels = [f"Layer {i+1}" for i in range(len(paths))]
485
+ selected_idx = 0
486
+ selected_label = dd_labels[selected_idx] if dd_labels else None
487
+ selected_path = paths[selected_idx] if paths else None
488
 
489
  return (
490
+ state,
491
+ gr.update(value=pretty_history_list(state["nodes"], state["current_id"])),
492
+ gr.update(choices=dd_labels, value=selected_label),
493
+ paths,
494
+ gr.update(value=selected_path),
495
+ paths,
496
+ gr.update(open=False),
497
+ [],
 
 
498
  )
499
 
500
+ def on_refine_thumb_select(state: Dict[str, Any], evt: gr.SelectData):
501
+ node = get_current_node(state)
502
+ if not node:
503
+ return gr.update(), gr.update()
504
 
505
+ idx = int(evt.index) if evt and evt.index is not None else 0
506
+ idx = max(0, min(idx, len(node["images"]) - 1))
507
+ label = f"Layer {idx+1}"
508
+ path = node["images"][idx]
509
+ return gr.update(value=label), gr.update(value=path)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
510
 
511
+ def on_refine_dropdown_change(state: Dict[str, Any], label: str):
512
+ node = get_current_node(state)
513
+ if not node:
514
+ return gr.update(value=None)
515
+ idx = _label_to_index(label)
516
+ idx = max(0, min(idx, len(node["images"]) - 1))
517
+ return gr.update(value=node["images"][idx])
518
+
519
+ def on_refine_click(
520
+ state: Dict[str, Any],
521
+ refine_layer_label: str,
522
+ seed,
523
+ randomize_seed,
524
+ prompt,
525
+ neg_prompt,
526
+ true_guidance_scale,
527
+ num_inference_steps,
528
+ sub_layers,
529
+ cfg_norm,
530
+ use_en_prompt,
531
+ resolution,
532
+ gpu_duration,
533
  ):
534
+ node = get_current_node(state)
535
+ if not node:
536
+ raise gr.Error("No active node. Run Decompose first.")
 
 
 
 
 
 
 
 
 
 
 
537
 
538
+ if randomize_seed:
539
+ seed = random.randint(0, MAX_SEED)
540
 
541
+ idx = _label_to_index(refine_layer_label)
542
+ idx = max(0, min(idx, len(node["images"]) - 1))
543
+ selected_path = node["images"][idx]
544
+
545
+ # GPU run refine
546
+ imgs = run_refine_gpu(
547
+ selected_layer_path=selected_path,
548
+ seed=int(seed),
549
+ prompt=prompt,
550
+ neg_prompt=neg_prompt,
551
+ true_guidance_scale=true_guidance_scale,
552
+ num_inference_steps=num_inference_steps,
553
+ sub_layers=sub_layers,
554
+ cfg_norm=cfg_norm,
555
+ use_en_prompt=use_en_prompt,
556
+ resolution=resolution,
557
+ gpu_duration=gpu_duration,
558
  )
559
 
560
+ # Save new node (child of current)
561
+ node_dir = os.path.join(state["root_dir"], "nodes", f"node_{_now_id()}_refine")
562
+ ensure_dir(node_dir)
563
+ paths = write_images_to_node_dir(node_dir, imgs)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
564
 
565
+ state = add_node(
566
+ state=state,
567
+ name=f"Refine {refine_layer_label} ({len(paths)} sub-layers)",
568
+ image_paths=paths,
569
+ parent_id=node["id"],
570
+ )
571
 
572
+ # Now current node is the refined node (so "Refine again" is just clicking Refine again)
573
+ new_node = get_current_node(state)
574
+ new_paths = new_node["images"]
575
+ dd_labels = [f"Layer {i+1}" for i in range(len(new_paths))]
576
+ selected_idx2 = 0
577
+ selected_label2 = dd_labels[selected_idx2] if dd_labels else None
578
+ selected_path2 = new_paths[selected_idx2] if new_paths else None
579
 
580
  return (
581
+ state,
582
+ gr.update(value=state["current_id"], choices=history_dropdown_choices(state)),
583
+ gr.update(value=pretty_history_list(state["nodes"], state["current_id"])),
584
+ new_paths, # main gallery becomes the refined node
585
+ gr.update(choices=dd_labels, value=selected_label2),
586
+ new_paths, # refine thumbnails now reflect refined node
587
+ gr.update(value=selected_path2),
588
+ gr.update(open=True), # auto-open refined accordion
589
+ new_paths, # refined gallery shows refined node too
 
 
 
590
  )
591
 
592
+ def on_export_click(state: Dict[str, Any], node_id: str):
593
+ node = get_node(state, node_id) if node_id else get_current_node(state)
594
+ if not node:
595
+ raise gr.Error("No node selected to export.")
596
+ paths = node["images"]
597
+ if not paths:
598
+ raise gr.Error("Selected node has no images.")
599
+ pptx = imagelist_to_pptx(paths)
600
+ zipp = imagelist_to_zip(paths)
601
+ return pptx, zipp
602
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
603
 
604
+ # -------------------------
605
+ # UI
606
+ # -------------------------
607
+ state = gr.State(new_state())
608
 
609
+ with gr.Blocks() as demo:
610
+ gr.HTML(
611
+ '<img src="https://qianwen-res.oss-cn-beijing.aliyuncs.com/Qwen-Image/layered/qwen-image-layered-logo.png" '
612
+ 'alt="Qwen-Image-Layered Logo" width="520" style="display:block;margin:0 auto;">'
613
  )
614
+ gr.Markdown(
615
+ "Decompose choose a layer → Refine → build a recursive history tree. Export any node to ZIP/PPTX."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
616
  )
617
 
618
+ with gr.Row():
619
+ with gr.Column(scale=1):
620
+ input_image = gr.Image(label="Input Image", image_mode="RGBA")
621
 
622
+ with gr.Accordion("Settings", open=False):
623
+ prompt = gr.Textbox(
624
+ label="Prompt (Optional)",
625
+ placeholder="Describe the image (overall content, including occluded parts).",
626
+ value="",
627
+ lines=2,
628
+ )
629
+ neg_prompt = gr.Textbox(
630
+ label="Negative Prompt (Optional)",
631
+ value=" ",
632
+ lines=2,
633
+ )
634
 
635
+ seed = gr.Slider(label="Seed", minimum=0, maximum=MAX_SEED, step=1, value=0)
636
+ randomize_seed = gr.Checkbox(label="Randomize seed", value=True)
 
 
 
 
637
 
638
+ true_guidance_scale = gr.Slider(
639
+ label="True guidance scale", minimum=1.0, maximum=10.0, step=0.1, value=4.0
640
+ )
641
+ num_inference_steps = gr.Slider(
642
+ label="Number of inference steps", minimum=1, maximum=100, step=1, value=50
643
+ )
644
 
645
+ resolution = gr.Radio(
646
+ label="Processing resolution",
647
+ choices=[640, 1024],
648
+ value=1024, # default 1024
649
+ )
650
 
651
+ cfg_norm = gr.Checkbox(label="Whether enable CFG normalization", value=True)
652
+ use_en_prompt = gr.Checkbox(
653
+ label="Automatic caption language if no prompt (True=EN, False=ZH)",
654
+ value=True,
655
+ )
656
 
657
+ gpu_duration = gr.Textbox(
658
+ label="GPU duration override (seconds, 20..1500)",
659
+ value="1000",
660
+ lines=1,
661
+ placeholder="e.g. 120 / 300 / 1000 / 1500",
662
+ )
663
 
664
+ with gr.Row():
665
+ layers = gr.Slider(label="Layers (Decompose)", minimum=2, maximum=10, step=1, value=7)
666
 
667
+ decompose_btn = gr.Button("Decompose", variant="primary")
 
668
 
669
+ gr.Markdown("### History")
670
+ history_dd = gr.Dropdown(
671
+ label="Select node",
672
+ choices=[],
673
+ value=None,
674
+ interactive=True,
675
+ )
676
+ history_text = gr.Markdown("_No history yet._")
677
 
678
+ with gr.Row():
679
+ export_pptx = gr.File(label="Download PPTX")
680
+ export_zip = gr.File(label="Download ZIP")
681
+ export_btn = gr.Button("Export selected node")
682
 
683
+ with gr.Column(scale=2):
684
+ gr.Markdown("### Layers (Current node)")
685
+ main_gallery = gr.Gallery(label="", columns=4, rows=2, height=420, show_label=False)
686
 
687
+ with gr.Accordion("Refine", open=True):
688
+ gr.Markdown("Pick a layer like in Photoshop (thumbnails) or via dropdown, then refine it.")
689
+ with gr.Row():
690
+ refine_layer_dd = gr.Dropdown(
691
+ label="Refine layer",
692
+ choices=[],
693
+ value=None,
694
+ interactive=True,
695
+ )
696
+ sub_layers = gr.Slider(
697
+ label="Sub-layers (Refine)",
698
+ minimum=2, maximum=10, step=1, value=3
699
  )
700
 
701
+ # Photoshop-like 1-row thumbnail strip
702
+ refine_thumb_gallery = gr.Gallery(
703
+ label="Layer thumbnails (click to select)",
704
+ columns=8,
705
+ rows=1,
706
+ height=120,
707
+ )
708
 
709
+ refine_preview = gr.Image(label="Selected layer preview", image_mode="RGBA")
710
 
711
+ refine_btn = gr.Button("Refine selected layer", variant="secondary")
 
 
712
 
713
+ refined_acc = gr.Accordion("Refined layers", open=False)
714
+ with refined_acc:
715
+ refined_gallery = gr.Gallery(label="", columns=4, rows=1, height=220, show_label=False)
716
 
717
+ # Examples (optional)
718
+ examples = [f"assets/test_images/{i}.png" for i in range(1, 14)]
719
+ gr.Examples(
720
+ examples=examples,
721
+ inputs=[input_image],
722
+ fn=None,
723
+ cache_examples=False,
724
+ label="Examples",
725
+ )
 
 
 
 
 
 
 
 
 
 
 
726
 
727
+ # -------------------------
728
+ # Wiring
729
+ # -------------------------
730
+ decompose_btn.click(
731
+ fn=on_decompose_click,
732
+ inputs=[
733
+ state,
734
+ input_image,
735
+ seed,
736
+ randomize_seed,
737
+ prompt,
738
+ neg_prompt,
739
+ true_guidance_scale,
740
+ num_inference_steps,
741
+ layers,
742
+ cfg_norm,
743
+ use_en_prompt,
744
+ resolution,
745
+ gpu_duration,
746
+ ],
747
+ outputs=[
748
+ state,
749
+ history_dd,
750
+ history_text,
751
+ main_gallery,
752
+ refine_layer_dd,
753
+ refine_thumb_gallery,
754
+ refine_preview,
755
+ refined_acc,
756
+ refined_gallery,
757
+ ],
758
+ )
 
759
 
760
+ history_dd.change(
761
+ fn=on_history_change,
762
+ inputs=[state, history_dd],
763
+ outputs=[
764
+ state,
765
+ history_text,
766
+ refine_layer_dd,
767
+ main_gallery,
768
+ refine_preview,
769
+ refine_thumb_gallery,
770
+ refined_acc,
771
+ refined_gallery,
772
+ ],
773
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
774
 
775
+ refine_thumb_gallery.select(
776
+ fn=on_refine_thumb_select,
777
+ inputs=[state],
778
+ outputs=[refine_layer_dd, refine_preview],
779
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
780
 
781
+ refine_layer_dd.change(
782
+ fn=on_refine_dropdown_change,
783
+ inputs=[state, refine_layer_dd],
784
+ outputs=[refine_preview],
785
+ )
 
 
 
 
 
 
 
 
 
 
 
786
 
787
+ refine_btn.click(
788
+ fn=on_refine_click,
789
+ inputs=[
790
+ state,
791
+ refine_layer_dd,
792
+ seed,
793
+ randomize_seed,
794
+ prompt,
795
+ neg_prompt,
796
+ true_guidance_scale,
797
+ num_inference_steps,
798
+ sub_layers,
799
+ cfg_norm,
800
+ use_en_prompt,
801
+ resolution,
802
+ gpu_duration,
803
+ ],
804
+ outputs=[
805
+ state,
806
+ history_dd,
807
+ history_text,
808
+ main_gallery,
809
+ refine_layer_dd,
810
+ refine_thumb_gallery,
811
+ refine_preview,
812
+ refined_acc,
813
+ refined_gallery,
814
+ ],
815
+ )
816
 
817
+ export_btn.click(
818
+ fn=on_export_click,
819
+ inputs=[state, history_dd],
820
+ outputs=[export_pptx, export_zip],
821
+ )
 
 
 
 
 
 
822
 
823
  if __name__ == "__main__":
824
+ # ssr_mode=False helps avoid those asyncio "Invalid file descriptor" messages in some envs
825
+ demo.launch(server_name="0.0.0.0", server_port=7860, ssr_mode=False)