johndoe321 commited on
Commit
5fcfa96
·
verified ·
1 Parent(s): 2015b5b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +187 -193
app.py CHANGED
@@ -1,228 +1,222 @@
1
- import os
2
- import uuid
3
  import numpy as np
4
- import random
5
- import tempfile
6
- import spaces
7
- from PIL import Image
8
- from diffusers import QwenImageLayeredPipeline
9
  import torch
10
- from pptx import Presentation
11
- import gradio as gr
12
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
 
14
- LOG_DIR = "/tmp/local"
15
  MAX_SEED = np.iinfo(np.int32).max
16
 
17
- from huggingface_hub import login
18
- login(token=os.environ.get('hf'))
19
-
20
- dtype = torch.bfloat16
21
- device = "cuda" if torch.cuda.is_available() else "cpu"
22
- pipeline = QwenImageLayeredPipeline.from_pretrained("Qwen/Qwen-Image-Layered", torch_dtype=dtype).to(device)
23
- # pipeline.set_progress_bar_config(disable=None)
24
-
25
- def ensure_dirname(path: str):
26
- if path and not os.path.exists(path):
27
- os.makedirs(path, exist_ok=True)
28
-
29
- def random_str(length=8):
30
- return uuid.uuid4().hex[:length]
31
-
32
- def imagelist_to_pptx(img_files):
33
- with Image.open(img_files[0]) as img:
34
- img_width_px, img_height_px = img.size
35
-
36
- def px_to_emu(px, dpi=96):
37
- inch = px / dpi
38
- emu = inch * 914400
39
- return int(emu)
40
-
41
- prs = Presentation()
42
- prs.slide_width = px_to_emu(img_width_px)
43
- prs.slide_height = px_to_emu(img_height_px)
44
-
45
- slide = prs.slides.add_slide(prs.slide_layouts[6])
46
-
47
- left = top = 0
48
- for img_path in img_files:
49
- slide.shapes.add_picture(img_path, left, top, width=px_to_emu(img_width_px), height=px_to_emu(img_height_px))
50
-
51
- with tempfile.NamedTemporaryFile(suffix=".pptx", delete=False) as tmp:
52
- prs.save(tmp.name)
53
- return tmp.name
54
-
55
- def export_gallery(images):
56
- # images: list of image file paths
57
- images = [e[0] for e in images]
58
- pptx_path = imagelist_to_pptx(images)
59
- return pptx_path
60
-
61
- @spaces.GPU(duration=300)
62
- def infer(input_image,
63
- seed=777,
64
- randomize_seed=False,
65
- prompt=None,
66
- neg_prompt=" ",
67
- true_guidance_scale=4.0,
68
- num_inference_steps=50,
69
- layer=4,
70
- cfg_norm=True,
71
- use_en_prompt=True):
72
-
73
- if randomize_seed:
74
- seed = random.randint(0, MAX_SEED)
75
-
76
- if isinstance(input_image, list):
77
- input_image = input_image[0]
78
-
79
- if isinstance(input_image, str):
80
- pil_image = Image.open(input_image).convert("RGB").convert("RGBA")
81
- elif isinstance(input_image, Image.Image):
82
- pil_image = input_image.convert("RGB").convert("RGBA")
83
- elif isinstance(input_image, np.ndarray):
84
- pil_image = Image.fromarray(input_image).convert("RGB").convert("RGBA")
85
- else:
86
- raise ValueError("Unsupported input_image type: %s" % type(input_image))
87
-
88
  inputs = {
89
- "image": pil_image,
90
- "generator": torch.Generator(device='cuda').manual_seed(seed),
91
- "true_cfg_scale": true_guidance_scale,
92
  "prompt": prompt,
93
- "negative_prompt": neg_prompt,
 
 
94
  "num_inference_steps": num_inference_steps,
 
95
  "num_images_per_prompt": 1,
96
- "layers": layer,
97
- "resolution": 640, # Using different bucket (640, 1024) to determine the resolution. For this version, 640 is recommended
98
- "cfg_normalize": cfg_norm, # Whether enable cfg normalization.
99
- "use_en_prompt": use_en_prompt,
100
  }
101
- print(inputs)
102
- with torch.inference_mode():
103
- output = pipeline(**inputs)
104
- output_images = output.images[0]
105
-
106
- output = []
107
- for i, image in enumerate(output_images):
108
- output.append(image)
109
- return output
110
-
111
- ensure_dirname(LOG_DIR)
112
- examples = [
113
- "assets/test_images/1.png",
114
- "assets/test_images/2.png",
115
- "assets/test_images/3.png",
116
- "assets/test_images/4.png",
117
- "assets/test_images/5.png",
118
- "assets/test_images/6.png",
119
- "assets/test_images/7.png",
120
- "assets/test_images/8.png",
121
- "assets/test_images/9.png",
122
- "assets/test_images/10.png",
123
- "assets/test_images/11.png",
124
- "assets/test_images/12.png",
125
- "assets/test_images/13.png",
126
- ]
127
-
128
-
129
- with gr.Blocks() as demo:
130
  with gr.Column(elem_id="col-container"):
131
- gr.HTML('<img src="https://qianwen-res.oss-cn-beijing.aliyuncs.com/Qwen-Image/layered/qwen-image-layered-logo.png" alt="Qwen-Image-Layered Logo" width="600" style="display: block; margin: 0 auto;">')
132
-
 
 
 
 
 
 
 
 
 
 
 
 
133
  with gr.Row():
134
- with gr.Column(scale=1):
135
- input_image = gr.Image(label="Input Image", image_mode="RGBA")
136
-
 
 
 
 
 
 
 
 
137
  prompt = gr.Textbox(
138
- label="Prompt (Optional)",
139
- placeholder="Please enter the prompt to guide the decomposition (Optional)",
140
- value="",
 
 
 
141
  lines=2,
 
142
  )
143
-
144
  with gr.Accordion("Advanced Settings", open=False):
145
- neg_prompt = gr.Textbox(
146
- label="Negative Prompt (Optional)",
147
- placeholder="Please enter the negative prompt",
148
- value=" ",
149
- lines=2,
150
- )
151
-
152
  seed = gr.Slider(
153
- label="Seed",
154
  minimum=0,
155
  maximum=MAX_SEED,
156
  step=1,
157
  value=0,
158
  )
159
- randomize_seed = gr.Checkbox(label="Randomize seed", value=True)
160
-
161
- true_guidance_scale = gr.Slider(
162
- label="True guidance scale",
163
- minimum=1.0,
164
- maximum=10.0,
165
- step=0.1,
166
- value=4.0
167
- )
168
-
169
  num_inference_steps = gr.Slider(
170
- label="Number of inference steps",
171
- minimum=1,
172
- maximum=50,
173
  step=1,
174
- value=50,
175
  )
176
-
177
- layer = gr.Slider(
178
- label="Layers",
179
- minimum=2,
180
- maximum=10,
181
- step=1,
182
- value=4,
183
  )
 
 
 
 
 
 
 
 
184
 
185
- cfg_norm = gr.Checkbox(label="Whether enable CFG normalization", value=True)
186
- use_en_prompt = gr.Checkbox(label="Automatic caption language if no prompt provided, True for EN, False for ZH", value=True)
187
-
188
- run_button = gr.Button("Decompose!", variant="primary")
189
-
190
- with gr.Column(scale=1):
191
- gallery = gr.Gallery(label="Layers", columns=4, rows=1, format="png")
192
- export_btn = gr.Button("Export as PPTX")
193
- export_file = gr.File(label="Download PPTX")
194
-
195
- gr.Examples(examples=examples,
196
- inputs=[input_image],
197
- outputs=[gallery],
198
- fn=infer,
199
- examples_per_page=14,
200
- cache_examples=False,
201
- run_on_click=True
202
- )
203
-
204
- export_btn.click(
205
- fn=export_gallery,
206
- inputs=gallery,
207
- outputs=export_file
208
- )
209
 
210
- run_button.click(
211
- fn=infer,
 
212
  inputs=[
213
- input_image,
214
- seed,
215
- randomize_seed,
216
  prompt,
217
- neg_prompt,
218
- true_guidance_scale,
219
  num_inference_steps,
220
- layer,
221
- cfg_norm,
222
- use_en_prompt,
223
- ],
224
- outputs=gallery,
225
  )
226
 
227
  if __name__ == "__main__":
228
- demo.launch()
 
 
 
 
 
1
+ import gradio as gr
 
2
  import numpy as np
 
 
 
 
 
3
  import torch
4
+ from PIL import Image
 
5
 
6
+ # `spaces` is required for Hugging Face Spaces to function correctly
7
+ import spaces
8
+ from diffusers import QwenImageEditPlusPipeline
9
+
10
+ # --- Configuration ---
11
+ # The model name on the Hugging Face Hub
12
+ MODEL_NAME = "Qwen/Qwen-Image-Edit-2509"
13
+
14
+ # --- Global Initialization ---
15
+ # This code runs only once when the Space boots up, loading the model into memory.
16
+
17
+ print(f"PyTorch version: {torch.__version__}")
18
+
19
+ # We don't need to manually set the device. `device_map="auto"` will handle everything.
20
+ # We only specify the data type for the weights.
21
+ torch_dtype = torch.bfloat16
22
+
23
+ print(f"Loading model: {MODEL_NAME}...")
24
+ print("The model will be automatically sharded across all available GPUs (8x L40S).")
25
+
26
+ try:
27
+ # This is the most important line for multi-GPU memory distribution.
28
+ # device_map="auto" instructs the accelerate library to split the model
29
+ # across all available GPUs, balancing the memory load.
30
+ pipe = QwenImageEditPlusPipeline.from_pretrained(
31
+ MODEL_NAME,
32
+ torch_dtype=torch_dtype,
33
+ device_map="auto",
34
+ # Adding a safety checker can be memory-intensive, the base pipeline might not need it
35
+ # but if it fails, you can try to force disable it.
36
+ # safety_checker=None,
37
+ )
38
+ pipe.set_progress_bar_config(disable=None)
39
+ print("✅ Model loaded successfully and distributed across GPUs.")
40
+
41
+ except Exception as e:
42
+ print(f"❌ Error loading model: {e}")
43
+ pipe = None # Ensure pipe is None if loading fails
44
 
 
45
  MAX_SEED = np.iinfo(np.int32).max
46
 
47
+ # --- Gradio Function ---
48
+
49
+ # The @spaces.GPU decorator is still needed on HF Spaces to indicate this function
50
+ # uses the GPU. It helps the platform manage resources.
51
+ @spaces.GPU
52
+ def edit_images(
53
+ image1: Image.Image,
54
+ image2: Image.Image,
55
+ prompt: str,
56
+ negative_prompt: str,
57
+ seed: int,
58
+ num_inference_steps: int,
59
+ guidance_scale: float,
60
+ true_cfg_scale: float,
61
+ progress=gr.Progress(track_tqdm=True),
62
+ ):
63
+ if pipe is None:
64
+ gr.Error("The model is not available. Please check the Space logs for errors during startup.")
65
+ return None, seed
66
+
67
+ # 必须有 prompt,且至少有一张图片
68
+ if not prompt:
69
+ gr.Warning("Please provide a text prompt.")
70
+ return None, seed
71
+
72
+ images = []
73
+ if image1 is not None:
74
+ images.append(image1)
75
+ if image2 is not None:
76
+ images.append(image2)
77
+
78
+ if len(images) == 0:
79
+ gr.Warning("Please upload at least one image.")
80
+ return None, seed
81
+
82
+ num_inference_steps = int(num_inference_steps)
83
+ negative_prompt_value = negative_prompt if negative_prompt else " "
84
+
85
+ # If seed is 0, generate a random one for reproducibility
86
+ if seed == 0:
87
+ seed = np.random.randint(1, MAX_SEED)
88
+
89
+ # IMPORTANT: The pipeline is already sharded across GPUs due to device_map="auto".
90
+ # We create the generator on the primary device ('cuda' or 'cuda:0').
91
+ # PyTorch's distributed backend will handle the rest.
92
+ try:
93
+ generator = torch.Generator(device="cuda").manual_seed(seed)
94
+ except RuntimeError:
95
+ # Fallback if 'cuda' isn't the main device name in some environments
96
+ generator = torch.Generator(device="cuda:0").manual_seed(seed)
97
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
98
  inputs = {
99
+ "image": images,
 
 
100
  "prompt": prompt,
101
+ "generator": generator,
102
+ "true_cfg_scale": true_cfg_scale,
103
+ "negative_prompt": negative_prompt_value,
104
  "num_inference_steps": num_inference_steps,
105
+ "guidance_scale": guidance_scale,
106
  "num_images_per_prompt": 1,
 
 
 
 
107
  }
108
+
109
+ try:
110
+ with torch.inference_mode():
111
+ output = pipe(**inputs)
112
+ output_image = output.images[0]
113
+
114
+ return output_image, seed
115
+ except Exception as e:
116
+ print(f"An error occurred during inference: {e}")
117
+ gr.Error(f"Inference failed: {e}")
118
+ return None, seed
119
+
120
+
121
+ # --- Gradio UI ---
122
+
123
+ css = """
124
+ #col-container {
125
+ margin: 0 auto;
126
+ max-width: 900px;
127
+ }
128
+ """
129
+
130
+ with gr.Blocks(css=css) as demo:
 
 
 
 
 
 
131
  with gr.Column(elem_id="col-container"):
132
+ gr.Markdown(
133
+ """
134
+ # Qwen Image Edit Plus
135
+ ### 8x L40S Memory-Sharded Inference
136
+ This application shards the model across all 8 GPUs to handle its large memory footprint for a single request.
137
+ """
138
+ )
139
+ if pipe is None:
140
+ gr.Markdown(
141
+ """
142
+ <span style="color: red;">**Model failed to load. Check the Space logs.**</span>
143
+ """
144
+ )
145
+
146
  with gr.Row():
147
+ with gr.Column():
148
+ image1 = gr.Image(
149
+ label="Input Image 1 (Required)",
150
+ type="pil",
151
+ )
152
+ image2 = gr.Image(
153
+ label="Input Image 2 (Optional)",
154
+ type="pil",
155
+ )
156
+
157
+ with gr.Column():
158
  prompt = gr.Textbox(
159
+ label="Text Prompt",
160
+ lines=4,
161
+ placeholder="Describe how the image(s) should be edited...",
162
+ )
163
+ negative_prompt = gr.Textbox(
164
+ label="Negative Prompt (optional)",
165
  lines=2,
166
+ placeholder="Describe what you want to avoid...",
167
  )
 
168
  with gr.Accordion("Advanced Settings", open=False):
 
 
 
 
 
 
 
169
  seed = gr.Slider(
170
+ label="Seed (0 for random)",
171
  minimum=0,
172
  maximum=MAX_SEED,
173
  step=1,
174
  value=0,
175
  )
 
 
 
 
 
 
 
 
 
 
176
  num_inference_steps = gr.Slider(
177
+ label="Inference Steps",
178
+ minimum=5,
179
+ maximum=60,
180
  step=1,
181
+ value=40,
182
  )
183
+ guidance_scale = gr.Slider(
184
+ label="Guidance Scale",
185
+ minimum=0.0,
186
+ maximum=5.0,
187
+ step=0.1,
188
+ value=1.0,
 
189
  )
190
+ true_cfg_scale = gr.Slider(
191
+ label="True CFG Scale",
192
+ minimum=1.0,
193
+ maximum=8.0,
194
+ step=0.5,
195
+ value=4.0,
196
+ )
197
+ run_button = gr.Button("Generate", variant="primary")
198
 
199
+ result = gr.Image(label="Edited Image", show_label=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
200
 
201
+ gr.on(
202
+ triggers=[run_button.click, prompt.submit],
203
+ fn=edit_images,
204
  inputs=[
205
+ image1,
206
+ image2,
 
207
  prompt,
208
+ negative_prompt,
209
+ seed,
210
  num_inference_steps,
211
+ guidance_scale,
212
+ true_cfg_scale,
213
+ ],
214
+ outputs=[result, seed],
 
215
  )
216
 
217
  if __name__ == "__main__":
218
+ # For HF Spaces, we use .queue() to manage user requests.
219
+ # Setting a concurrency_count allows handling multiple users.
220
+ # A value of 2-4 is safe even on 8xL40S, as each inference is heavy.
221
+ # A higher value risks OOM if all 8 GPUs are already maxed out by one sharded model.
222
+ demo.queue(concurrency_count=2).launch()