hexware commited on
Commit
893f414
·
verified ·
1 Parent(s): 33759f1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +129 -100
app.py CHANGED
@@ -3,33 +3,40 @@ import uuid
3
  import numpy as np
4
  import random
5
  import tempfile
 
 
6
  import spaces
7
- import zipfile
8
- from PIL import Image
9
- from diffusers import QwenImageLayeredPipeline
10
  import torch
11
- from pptx import Presentation
12
  import gradio as gr
13
 
 
 
 
14
 
15
  LOG_DIR = "/tmp/local"
16
  MAX_SEED = np.iinfo(np.int32).max
17
 
 
18
  from huggingface_hub import login
19
- login(token=os.environ.get('hf'))
20
 
21
  dtype = torch.bfloat16
22
  device = "cuda" if torch.cuda.is_available() else "cpu"
23
- pipeline = QwenImageLayeredPipeline.from_pretrained("Qwen/Qwen-Image-Layered", torch_dtype=dtype).to(device)
24
- # pipeline.set_progress_bar_config(disable=None)
 
 
 
25
 
26
  def ensure_dirname(path: str):
27
  if path and not os.path.exists(path):
28
  os.makedirs(path, exist_ok=True)
29
 
 
30
  def random_str(length=8):
31
  return uuid.uuid4().hex[:length]
32
 
 
33
  def imagelist_to_pptx(img_files):
34
  with Image.open(img_files[0]) as img:
35
  img_width_px, img_height_px = img.size
@@ -47,49 +54,49 @@ def imagelist_to_pptx(img_files):
47
 
48
  left = top = 0
49
  for img_path in img_files:
50
- slide.shapes.add_picture(img_path, left, top, width=px_to_emu(img_width_px), height=px_to_emu(img_height_px))
 
 
 
 
 
 
51
 
52
  with tempfile.NamedTemporaryFile(suffix=".pptx", delete=False) as tmp:
53
  prs.save(tmp.name)
54
  return tmp.name
55
 
56
- def export_gallery(images):
57
- # images: list of image file paths
58
- images = [e[0] for e in images]
59
- pptx_path = imagelist_to_pptx(images)
60
- return pptx_path
61
-
62
- def export_gallery_zip(images):
63
- # images: list of tuples (file_path, caption)
64
- images = [e[0] for e in images]
65
-
66
- with tempfile.NamedTemporaryFile(suffix=".zip", delete=False) as tmp:
67
- with zipfile.ZipFile(tmp.name, 'w', zipfile.ZIP_DEFLATED) as zipf:
68
- for i, img_path in enumerate(images):
69
- # Get the file extension from original file
70
- ext = os.path.splitext(img_path)[1] or '.png'
71
- # Add each image to the zip with a numbered filename
72
- zipf.write(img_path, f"layer_{i+1}{ext}")
73
- return tmp.name
74
 
75
- @spaces.GPU(duration=180)
76
- def infer(input_image,
77
- seed=777,
78
- randomize_seed=False,
79
- prompt=None,
80
- neg_prompt=" ",
81
- true_guidance_scale=4.0,
82
- num_inference_steps=50,
83
- layer=4,
84
- cfg_norm=True,
85
- use_en_prompt=True):
86
-
 
 
 
87
  if randomize_seed:
88
  seed = random.randint(0, MAX_SEED)
89
-
 
 
 
 
 
 
 
 
 
90
  if isinstance(input_image, list):
91
  input_image = input_image[0]
92
-
93
  if isinstance(input_image, str):
94
  pil_image = Image.open(input_image).convert("RGB").convert("RGBA")
95
  elif isinstance(input_image, Image.Image):
@@ -97,80 +104,89 @@ def infer(input_image,
97
  elif isinstance(input_image, np.ndarray):
98
  pil_image = Image.fromarray(input_image).convert("RGB").convert("RGBA")
99
  else:
100
- raise ValueError("Unsupported input_image type: %s" % type(input_image))
101
-
 
 
102
  inputs = {
103
  "image": pil_image,
104
- "generator": torch.Generator(device='cuda').manual_seed(seed),
105
  "true_cfg_scale": true_guidance_scale,
106
  "prompt": prompt,
107
  "negative_prompt": neg_prompt,
108
  "num_inference_steps": num_inference_steps,
109
  "num_images_per_prompt": 1,
110
  "layers": layer,
111
- "resolution": 640, # Using different bucket (640, 1024) to determine the resolution. For this version, 640 is recommended
112
- "cfg_normalize": cfg_norm, # Whether enable cfg normalization.
113
- "use_en_prompt": use_en_prompt,
114
  }
115
- print(inputs)
 
 
116
  with torch.inference_mode():
117
- output = pipeline(**inputs)
118
- output_images = output.images[0]
119
-
120
- output = []
 
121
  temp_files = []
122
- for i, image in enumerate(output_images):
123
- output.append(image)
124
- # Save to temp file for export
125
  tmp = tempfile.NamedTemporaryFile(suffix=".png", delete=False)
126
- image.save(tmp.name)
127
  temp_files.append(tmp.name)
128
-
129
- # Generate PPTX
130
  pptx_path = imagelist_to_pptx(temp_files)
131
-
132
- # Generate ZIP
133
- with tempfile.NamedTemporaryFile(suffix=".zip", delete=False) as tmp:
134
- with zipfile.ZipFile(tmp.name, 'w', zipfile.ZIP_DEFLATED) as zipf:
135
  for i, img_path in enumerate(temp_files):
136
  zipf.write(img_path, f"layer_{i+1}.png")
137
- zip_path = tmp.name
138
-
139
- return output, pptx_path, zip_path
 
140
 
141
  ensure_dirname(LOG_DIR)
142
- examples = [
143
- "assets/test_images/1.png",
144
- "assets/test_images/2.png",
145
- "assets/test_images/3.png",
146
- "assets/test_images/4.png",
147
- "assets/test_images/5.png",
148
- "assets/test_images/6.png",
149
- "assets/test_images/7.png",
150
- "assets/test_images/8.png",
151
- "assets/test_images/9.png",
152
- "assets/test_images/10.png",
153
- "assets/test_images/11.png",
154
- "assets/test_images/12.png",
155
- "assets/test_images/13.png",
156
- ]
157
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
158
 
159
  with gr.Blocks() as demo:
160
  with gr.Column(elem_id="col-container"):
161
- gr.HTML('<img src="https://qianwen-res.oss-cn-beijing.aliyuncs.com/Qwen-Image/layered/qwen-image-layered-logo.png" alt="Qwen-Image-Layered Logo" width="600" style="display: block; margin: 0 auto;">')
162
- gr.Markdown("""
163
- The text prompt is intended to describe the overall content of the input image—including elements that may be partially occluded (e.g., you may specify the text hidden behind a foreground object). It is not designed to control the semantic content of individual layers explicitly.
164
- """)
 
 
 
 
 
 
165
  with gr.Row():
166
  with gr.Column(scale=1):
167
  input_image = gr.Image(label="Input Image", image_mode="RGBA")
168
-
169
-
170
  with gr.Accordion("Advanced Settings", open=False):
171
  prompt = gr.Textbox(
172
  label="Prompt (Optional)",
173
- placeholder="Please enter the prompt to descibe the image. Optional",
174
  value="",
175
  lines=2,
176
  )
@@ -180,7 +196,7 @@ with gr.Blocks() as demo:
180
  value=" ",
181
  lines=2,
182
  )
183
-
184
  seed = gr.Slider(
185
  label="Seed",
186
  minimum=0,
@@ -189,13 +205,13 @@ with gr.Blocks() as demo:
189
  value=0,
190
  )
191
  randomize_seed = gr.Checkbox(label="Randomize seed", value=True)
192
-
193
  true_guidance_scale = gr.Slider(
194
  label="True guidance scale",
195
  minimum=1.0,
196
  maximum=10.0,
197
  step=0.1,
198
- value=4.0
199
  )
200
 
201
  num_inference_steps = gr.Slider(
@@ -214,9 +230,20 @@ with gr.Blocks() as demo:
214
  value=4,
215
  )
216
 
217
- cfg_norm = gr.Checkbox(label="Whether enable CFG normalization", value=True)
218
- use_en_prompt = gr.Checkbox(label="Automatic caption language if no prompt provided, True for EN, False for ZH", value=True)
219
-
 
 
 
 
 
 
 
 
 
 
 
220
  run_button = gr.Button("Decompose!", variant="primary")
221
 
222
  with gr.Column(scale=2):
@@ -225,13 +252,14 @@ with gr.Blocks() as demo:
225
  export_file = gr.File(label="Download PPTX")
226
  export_zip_file = gr.File(label="Download ZIP")
227
 
228
- gr.Examples(examples=examples,
229
- inputs=[input_image],
230
- outputs=[gallery, export_file, export_zip_file],
231
- fn=infer,
232
- examples_per_page=14,
233
- cache_examples=False,
234
- run_on_click=True
 
235
  )
236
 
237
  run_button.click(
@@ -247,7 +275,8 @@ with gr.Blocks() as demo:
247
  layer,
248
  cfg_norm,
249
  use_en_prompt,
250
- ],
 
251
  outputs=[gallery, export_file, export_zip_file],
252
  )
253
 
 
3
  import numpy as np
4
  import random
5
  import tempfile
6
+ import zipfile
7
+
8
  import spaces
 
 
 
9
  import torch
 
10
  import gradio as gr
11
 
12
+ from PIL import Image
13
+ from diffusers import QwenImageLayeredPipeline
14
+ from pptx import Presentation
15
 
16
  LOG_DIR = "/tmp/local"
17
  MAX_SEED = np.iinfo(np.int32).max
18
 
19
+ # Optional HF login (works in Spaces if you set HF token as secret env var "hf")
20
  from huggingface_hub import login
21
+ login(token=os.environ.get("hf"))
22
 
23
  dtype = torch.bfloat16
24
  device = "cuda" if torch.cuda.is_available() else "cpu"
25
+
26
+ pipeline = QwenImageLayeredPipeline.from_pretrained(
27
+ "Qwen/Qwen-Image-Layered", torch_dtype=dtype
28
+ ).to(device)
29
+
30
 
31
  def ensure_dirname(path: str):
32
  if path and not os.path.exists(path):
33
  os.makedirs(path, exist_ok=True)
34
 
35
+
36
  def random_str(length=8):
37
  return uuid.uuid4().hex[:length]
38
 
39
+
40
  def imagelist_to_pptx(img_files):
41
  with Image.open(img_files[0]) as img:
42
  img_width_px, img_height_px = img.size
 
54
 
55
  left = top = 0
56
  for img_path in img_files:
57
+ slide.shapes.add_picture(
58
+ img_path,
59
+ left,
60
+ top,
61
+ width=px_to_emu(img_width_px),
62
+ height=px_to_emu(img_height_px),
63
+ )
64
 
65
  with tempfile.NamedTemporaryFile(suffix=".pptx", delete=False) as tmp:
66
  prs.save(tmp.name)
67
  return tmp.name
68
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
69
 
70
+ @spaces.GPU(duration=1500)
71
+ def infer(
72
+ input_image,
73
+ seed=777,
74
+ randomize_seed=False,
75
+ prompt=None,
76
+ neg_prompt=" ",
77
+ true_guidance_scale=4.0,
78
+ num_inference_steps=50,
79
+ layer=4,
80
+ cfg_norm=True,
81
+ use_en_prompt=True,
82
+ resolution=640,
83
+ ):
84
+ # Seed
85
  if randomize_seed:
86
  seed = random.randint(0, MAX_SEED)
87
+
88
+ # Normalize resolution input
89
+ try:
90
+ resolution = int(resolution)
91
+ except Exception:
92
+ resolution = 640
93
+ if resolution not in (640, 1024):
94
+ resolution = 640
95
+
96
+ # Normalize image input
97
  if isinstance(input_image, list):
98
  input_image = input_image[0]
99
+
100
  if isinstance(input_image, str):
101
  pil_image = Image.open(input_image).convert("RGB").convert("RGBA")
102
  elif isinstance(input_image, Image.Image):
 
104
  elif isinstance(input_image, np.ndarray):
105
  pil_image = Image.fromarray(input_image).convert("RGB").convert("RGBA")
106
  else:
107
+ raise ValueError(f"Unsupported input_image type: {type(input_image)}")
108
+
109
+ gen_device = "cuda" if torch.cuda.is_available() else "cpu"
110
+
111
  inputs = {
112
  "image": pil_image,
113
+ "generator": torch.Generator(device=gen_device).manual_seed(seed),
114
  "true_cfg_scale": true_guidance_scale,
115
  "prompt": prompt,
116
  "negative_prompt": neg_prompt,
117
  "num_inference_steps": num_inference_steps,
118
  "num_images_per_prompt": 1,
119
  "layers": layer,
120
+ "resolution": resolution, # <-- 640 or 1024
121
+ "cfg_normalize": cfg_norm,
122
+ "use_en_prompt": use_en_prompt,
123
  }
124
+
125
+ print("INFER INPUTS:", inputs)
126
+
127
  with torch.inference_mode():
128
+ out = pipeline(**inputs)
129
+ output_images = out.images[0] # list of PIL images (layers)
130
+
131
+ # Prepare gallery + export files
132
+ gallery_out = []
133
  temp_files = []
134
+
135
+ for img in output_images:
136
+ gallery_out.append(img)
137
  tmp = tempfile.NamedTemporaryFile(suffix=".png", delete=False)
138
+ img.save(tmp.name)
139
  temp_files.append(tmp.name)
140
+
 
141
  pptx_path = imagelist_to_pptx(temp_files)
142
+
143
+ with tempfile.NamedTemporaryFile(suffix=".zip", delete=False) as tmpzip:
144
+ with zipfile.ZipFile(tmpzip.name, "w", zipfile.ZIP_DEFLATED) as zipf:
 
145
  for i, img_path in enumerate(temp_files):
146
  zipf.write(img_path, f"layer_{i+1}.png")
147
+ zip_path = tmpzip.name
148
+
149
+ return gallery_out, pptx_path, zip_path
150
+
151
 
152
  ensure_dirname(LOG_DIR)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
153
 
154
+ examples = [
155
+ "assets/test_images/1.png",
156
+ "assets/test_images/2.png",
157
+ "assets/test_images/3.png",
158
+ "assets/test_images/4.png",
159
+ "assets/test_images/5.png",
160
+ "assets/test_images/6.png",
161
+ "assets/test_images/7.png",
162
+ "assets/test_images/8.png",
163
+ "assets/test_images/9.png",
164
+ "assets/test_images/10.png",
165
+ "assets/test_images/11.png",
166
+ "assets/test_images/12.png",
167
+ "assets/test_images/13.png",
168
+ ]
169
 
170
  with gr.Blocks() as demo:
171
  with gr.Column(elem_id="col-container"):
172
+ gr.HTML(
173
+ '<img src="https://qianwen-res.oss-cn-beijing.aliyuncs.com/Qwen-Image/layered/qwen-image-layered-logo.png" '
174
+ 'alt="Qwen-Image-Layered Logo" width="600" style="display: block; margin: 0 auto;">'
175
+ )
176
+ gr.Markdown(
177
+ """
178
+ The text prompt is intended to describe the overall content of the input image—including elements that may be partially occluded (e.g., you may specify the text hidden behind a foreground object). It is not designed to control the semantic content of individual layers explicitly.
179
+ """
180
+ )
181
+
182
  with gr.Row():
183
  with gr.Column(scale=1):
184
  input_image = gr.Image(label="Input Image", image_mode="RGBA")
185
+
 
186
  with gr.Accordion("Advanced Settings", open=False):
187
  prompt = gr.Textbox(
188
  label="Prompt (Optional)",
189
+ placeholder="Please enter the prompt to descibe the image. (Optional)",
190
  value="",
191
  lines=2,
192
  )
 
196
  value=" ",
197
  lines=2,
198
  )
199
+
200
  seed = gr.Slider(
201
  label="Seed",
202
  minimum=0,
 
205
  value=0,
206
  )
207
  randomize_seed = gr.Checkbox(label="Randomize seed", value=True)
208
+
209
  true_guidance_scale = gr.Slider(
210
  label="True guidance scale",
211
  minimum=1.0,
212
  maximum=10.0,
213
  step=0.1,
214
+ value=4.0,
215
  )
216
 
217
  num_inference_steps = gr.Slider(
 
230
  value=4,
231
  )
232
 
233
+ resolution = gr.Radio(
234
+ label="Processing resolution",
235
+ choices=[640, 1024],
236
+ value=640,
237
+ )
238
+
239
+ cfg_norm = gr.Checkbox(
240
+ label="Whether enable CFG normalization", value=True
241
+ )
242
+ use_en_prompt = gr.Checkbox(
243
+ label="Automatic caption language if no prompt provided, True for EN, False for ZH",
244
+ value=True,
245
+ )
246
+
247
  run_button = gr.Button("Decompose!", variant="primary")
248
 
249
  with gr.Column(scale=2):
 
252
  export_file = gr.File(label="Download PPTX")
253
  export_zip_file = gr.File(label="Download ZIP")
254
 
255
+ gr.Examples(
256
+ examples=examples,
257
+ inputs=[input_image],
258
+ outputs=[gallery, export_file, export_zip_file],
259
+ fn=infer,
260
+ examples_per_page=14,
261
+ cache_examples=False,
262
+ run_on_click=True,
263
  )
264
 
265
  run_button.click(
 
275
  layer,
276
  cfg_norm,
277
  use_en_prompt,
278
+ resolution, # <-- NEW
279
+ ],
280
  outputs=[gallery, export_file, export_zip_file],
281
  )
282