IFMedTechdemo commited on
Commit
62ee321
·
verified ·
1 Parent(s): edf1c19

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +233 -294
app.py CHANGED
@@ -3,375 +3,314 @@ import numpy as np
3
  import random
4
  import torch
5
  import spaces
 
6
  from PIL import Image
 
 
 
 
 
 
 
7
  import math
8
- import gc
 
 
 
9
  import logging
10
- from typing import List, Optional
11
 
12
- # Configure logging
13
- logging.basicConfig(level=logging.INFO)
14
- logger = logging.getLogger(__name__)
15
 
16
- # Configuration
17
- DTYPE = torch.float16
18
- DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
19
- MODEL_ID = "Qwen/Qwen-Image-Edit-2509" # Use standard model [web:44]
20
- MAX_SEED = np.iinfo(np.int32).max
21
- HARDCODED_PROMPT = "remove acne marks and blemishes from the face"
22
- NEGATIVE_PROMPT = " "
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
 
24
- # Import pipeline
25
- try:
26
- from diffusers import QwenImageEditPlusPipeline, FlowMatchEulerDiscreteScheduler
27
- logger.info("✅ Diffusers imported successfully")
28
- except ImportError as e:
29
- logger.error(f"❌ Import failed: {e}")
30
- raise
31
 
32
- # Memory management functions
33
- def cleanup_memory():
34
- """Comprehensive memory cleanup"""
35
- if torch.cuda.is_available():
36
- torch.cuda.empty_cache()
37
- torch.cuda.synchronize()
38
- gc.collect()
39
 
40
- def check_gpu_memory():
41
- """Monitor GPU memory usage"""
42
- if torch.cuda.is_available():
43
- allocated = torch.cuda.memory_allocated() / 1024**3
44
- cached = torch.cuda.memory_reserved() / 1024**3
45
- logger.info(f"GPU Memory - Allocated: {allocated:.2f}GB, Cached: {cached:.2f}GB")
 
 
 
 
 
 
 
 
 
 
46
 
47
- # Initialize pipeline
48
- def load_pipeline():
49
- """Load and optimize the pipeline"""
50
- logger.info(f"🚀 Loading {MODEL_ID}...")
51
-
52
- # Scheduler configuration [web:39]
53
- scheduler_config = {
54
- "base_image_seq_len": 256,
55
- "base_shift": math.log(3),
56
- "invert_sigmas": False,
57
- "max_image_seq_len": 8192,
58
- "max_shift": math.log(3),
59
- "num_train_timesteps": 1000,
60
- "shift": 1.0,
61
- "shift_terminal": None,
62
- "stochastic_sampling": False,
63
- "time_shift_type": "exponential",
64
- "use_beta_sigmas": False,
65
- "use_dynamic_shifting": True,
66
- "use_exponential_sigmas": False,
67
- "use_karras_sigmas": False,
68
- }
69
-
70
- try:
71
- # Create scheduler
72
- scheduler = FlowMatchEulerDiscreteScheduler.from_config(scheduler_config)
73
-
74
- # Load pipeline [web:38]
75
- pipe = QwenImageEditPlusPipeline.from_pretrained(
76
- MODEL_ID,
77
- scheduler=scheduler,
78
- torch_dtype=DTYPE,
79
- use_safetensors=True,
80
- )
81
-
82
- # Move to device
83
- pipe = pipe.to(DEVICE)
84
-
85
- # Enable optimizations [web:43]
86
- pipe.enable_attention_slicing() # Memory efficient attention
87
- pipe.enable_vae_slicing() # Sliced VAE decoding
88
- pipe.enable_vae_tiling() # Tiled VAE for large images
89
-
90
- # Try to load Lightning LoRA for faster inference [web:39]
91
- try:
92
- pipe.load_lora_weights(
93
- "lightx2v/Qwen-Image-Lightning",
94
- weight_name="Qwen-Image-Edit-2509/Qwen-Image-Edit-2509-Lightning-8steps-V1.0-bf16.safetensors"
95
- )
96
- pipe.fuse_lora()
97
- logger.info("✅ Lightning LoRA loaded (4-step mode)")
98
- except Exception as e:
99
- logger.warning(f"⚠️ Lightning LoRA skipped: {e}")
100
-
101
- logger.info("✅ Pipeline loaded and optimized successfully")
102
- check_gpu_memory()
103
- return pipe
104
-
105
- except Exception as e:
106
- logger.error(f"❌ Pipeline loading failed: {e}")
107
- raise
108
 
109
- # Load pipeline at startup
110
- pipe = load_pipeline()
 
 
 
 
 
 
 
 
 
111
 
112
  @spaces.GPU()
113
  def infer(
114
- images: Optional[List],
115
- seed: int = 42,
116
- randomize_seed: bool = False,
117
- true_guidance_scale: float = 1.0,
118
- num_inference_steps: int = 4,
119
- height: int = 512,
120
- width: int = 512,
 
 
 
121
  progress=gr.Progress(track_tqdm=True),
122
  ):
123
- """
124
- Optimized inference function with proper error handling
125
- """
126
- # Clean memory before inference
127
- cleanup_memory()
128
-
129
  if randomize_seed:
130
  seed = random.randint(0, MAX_SEED)
 
131
 
132
- generator = torch.Generator(device=DEVICE).manual_seed(seed)
133
-
134
- # Process input images
135
  pil_images = []
136
  if images is not None:
137
  for item in images:
138
  try:
139
- # Handle different input types
140
- if isinstance(item, tuple) and len(item) > 0:
141
- img_path = item[0]
142
- if isinstance(img_path, Image.Image):
143
- img = img_path.convert("RGB")
144
- elif isinstance(img_path, str):
145
- img = Image.open(img_path).convert("RGB")
146
- else:
147
- continue
148
- else:
149
- continue
150
-
151
- # Resize for memory efficiency [web:38]
152
- img.thumbnail((768, 768), Image.Resampling.LANCZOS)
153
- pil_images.append(img)
154
-
155
- except Exception as e:
156
- logger.error(f"Error processing image: {e}")
157
  continue
158
-
159
- if not pil_images:
160
- raise gr.Error("No valid images provided")
161
-
162
- logger.info(f"📊 Processing {len(pil_images)} image(s), {height}x{width}, {num_inference_steps} steps")
163
-
164
- try:
165
- # Inference with proper context management [web:27]
166
- with torch.inference_mode():
167
- with torch.cuda.amp.autocast(enabled=True, dtype=DTYPE):
168
- output = pipe(
169
- image=pil_images,
170
- prompt=HARDCODED_PROMPT,
171
- height=height,
172
- width=width,
173
- negative_prompt=NEGATIVE_PROMPT,
174
- num_inference_steps=num_inference_steps,
175
- generator=generator,
176
- true_cfg_scale=true_guidance_scale,
177
- num_images_per_prompt=1,
178
- ).images
179
-
180
- logger.info("✅ Generation completed successfully")
181
- return output, seed, gr.update(visible=True)
182
-
183
- except torch.cuda.OutOfMemoryError as e:
184
- logger.warning("⚠️ CUDA OOM - Trying emergency mode")
185
- cleanup_memory()
186
-
187
- try:
188
- # Emergency fallback with reduced settings
189
- with torch.inference_mode():
190
- with torch.cuda.amp.autocast(enabled=True, dtype=DTYPE):
191
- output = pipe(
192
- image=pil_images,
193
- prompt=HARDCODED_PROMPT,
194
- height=min(height, 384),
195
- width=min(width, 384),
196
- negative_prompt=NEGATIVE_PROMPT,
197
- num_inference_steps=max(2, num_inference_steps // 2),
198
- generator=generator,
199
- true_cfg_scale=1.0,
200
- num_images_per_prompt=1,
201
- ).images
202
-
203
- logger.info("✅ Emergency mode successful")
204
- return output, seed, gr.update(visible=True)
205
-
206
- except Exception as emergency_e:
207
- logger.error(f"❌ Emergency mode failed: {emergency_e}")
208
- raise gr.Error(f"GPU memory insufficient. Try smaller images or reduce resolution.")
209
-
210
- except Exception as e:
211
- logger.error(f"❌ Inference failed: {e}")
212
- raise gr.Error(f"Generation failed: {str(e)}")
213
-
214
- finally:
215
- # Always clean up after inference [web:32]
216
- cleanup_memory()
217
 
218
- def use_output_as_input(output_images):
219
- """Convert output images to input format"""
220
- if output_images is None or len(output_images) == 0:
221
- return []
222
- return [(img, f"output_{i}.png") for i, img in enumerate(output_images)]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
223
 
224
- # UI Styles
225
  css = """
226
  #col-container {
227
  margin: 0 auto;
228
- max-width: 900px;
229
  }
230
  #logo-title {
231
  text-align: center;
232
  }
233
  #logo-title img {
234
- width: 350px;
235
- }
236
- .memory-info {
237
- font-size: 0.8em;
238
- color: #666;
239
- margin-top: 5px;
240
  }
 
241
  """
242
 
243
- # Gradio Interface
244
- with gr.Blocks(css=css, title="Acne Remover - Qwen Image Edit") as demo:
245
  with gr.Column(elem_id="col-container"):
246
- # Header
247
  gr.HTML("""
248
  <div id="logo-title">
249
- <img src="https://qianwen-res.oss-cn-beijing.aliyuncs.com/Qwen-Image/qwen_image_edit_logo.png" alt="Qwen-Image Edit Logo">
250
- <h2 style="font-style: italic;color: #5b47d1;margin-top: -20px">✨ Professional Acne Remover</h2>
251
  </div>
252
  """)
253
-
254
  gr.Markdown("""
255
- **Remove acne marks and blemishes** using the powerful Qwen-Image-Edit-2509 model.
256
-
257
- **State-of-the-art results** with 20B parameter model [web:42]
258
- ✅ **Multi-image support** for batch processing [web:45]
259
- ✅ **Lightning-fast inference** with 4-step generation [web:39]
260
- ✅ **Memory optimized** for stable performance [web:43]
261
  """)
262
-
263
  with gr.Row():
264
  with gr.Column():
265
- input_images = gr.File(
266
- label="📸 Upload facial images",
267
- file_count="multiple",
268
- file_types=["image"],
269
- height=300
270
- )
271
-
272
- gr.HTML('<div class="memory-info">💡 Tip: Upload multiple images for batch processing</div>')
273
 
274
  with gr.Column():
275
- result = gr.Gallery(
276
- label="🎯 Results",
277
- show_label=True,
278
- type="pil",
279
- height=300,
280
- columns=2
281
- )
282
- use_output_btn = gr.Button(
283
- "🔄 Use Results as New Input",
284
- variant="secondary",
285
- size="sm",
286
- visible=False
287
- )
288
 
289
- # Main action button
290
- run_button = gr.Button(
291
- "🚀 Remove Acne & Blemishes!",
292
- variant="primary",
293
- size="lg"
294
- )
 
 
295
 
296
- # Advanced settings
297
- with gr.Accordion("⚙️ Advanced Settings", open=False):
298
  seed = gr.Slider(
299
- label="🎲 Seed",
300
- minimum=0,
301
- maximum=MAX_SEED,
302
- step=1,
303
- value=0
304
- )
305
- randomize_seed = gr.Checkbox(
306
- label="🎯 Randomize seed",
307
- value=True
308
  )
309
 
 
 
310
  with gr.Row():
311
  true_guidance_scale = gr.Slider(
312
- label="📊 Guidance Scale",
313
  minimum=1.0,
314
- maximum=5.0,
315
  step=0.1,
316
- value=1.0,
317
- info="Higher values = stronger prompt adherence"
318
  )
319
 
320
  num_inference_steps = gr.Slider(
321
- label="🔄 Inference Steps",
322
- minimum=2,
323
- maximum=20,
324
  step=1,
325
- value=4,
326
- info="More steps = higher quality (slower)"
327
  )
328
 
329
- with gr.Row():
330
  height = gr.Slider(
331
- label="📏 Height",
332
- minimum=256,
333
- maximum=768,
334
- step=64,
335
- value=512
336
  )
 
337
  width = gr.Slider(
338
- label="📐 Width",
339
- minimum=256,
340
- maximum=768,
341
- step=64,
342
- value=512
343
  )
 
 
344
 
345
- # Footer info
346
- gr.Markdown("""
347
- ---
348
- **Model Info**: Qwen-Image-Edit-2509 | **Memory**: Optimized for GPU efficiency | **Speed**: ~4 steps with Lightning LoRA
349
- """)
350
-
351
- # Event handlers
352
- run_button.click(
353
  fn=infer,
354
  inputs=[
355
- input_images, seed, randomize_seed,
356
- true_guidance_scale, num_inference_steps,
357
- height, width
 
 
 
 
 
 
358
  ],
359
  outputs=[result, seed, use_output_btn],
360
- show_progress=True
361
  )
362
 
363
  use_output_btn.click(
364
- fn=use_output_as_input,
365
- inputs=[result],
366
  outputs=[input_images]
367
  )
368
 
369
- # Launch configuration
370
  if __name__ == "__main__":
371
- demo.launch(
372
- server_name="0.0.0.0",
373
- server_port=7860,
374
- share=False,
375
- show_error=True,
376
- quiet=False
377
- )
 
3
  import random
4
  import torch
5
  import spaces
6
+
7
  from PIL import Image
8
+ from diffusers import FlowMatchEulerDiscreteScheduler
9
+ from optimization import optimize_pipeline_
10
+ from qwenimage.pipeline_qwenimage_edit_plus import QwenImageEditPlusPipeline
11
+ from qwenimage.transformer_qwenimage import QwenImageTransformer2DModel
12
+ from qwenimage.qwen_fa3_processor import QwenDoubleStreamAttnProcessorFA3
13
+
14
+ from huggingface_hub import InferenceClient
15
  import math
16
+ import os
17
+ import base64
18
+ from io import BytesIO
19
+ import json
20
  import logging
 
21
 
22
+ logging.getLogger("transformers").setLevel(logging.ERROR)
23
+ logging.getLogger("diffusers").setLevel(logging.ERROR)
 
24
 
25
+ SYSTEM_PROMPT = '''
26
+ # Edit Instruction Rewriter
27
+ You are a professional edit instruction rewriter. Your task is to generate a precise, concise, and visually achievable professional-level edit instruction based on the user-provided instruction and the image to be edited.
28
+ Please strictly follow the rewriting rules below:
29
+ ## 1. General Principles
30
+ - Keep the rewritten prompt **concise and comprehensive**. Avoid overly long sentences and unnecessary descriptive language.
31
+ - If the instruction is contradictory, vague, or unachievable, prioritize reasonable inference and correction, and supplement details when necessary.
32
+ - Keep the main part of the original instruction unchanged, only enhancing its clarity, rationality, and visual feasibility.
33
+ - All added objects or modifications must align with the logic and style of the scene in the input images.
34
+ - If multiple sub-images are to be generated, describe the content of each sub-image individually.
35
+ ## 2. Task-Type Handling Rules
36
+ ### 1. Add, Delete, Replace Tasks
37
+ - If the instruction is clear (already includes task type, target entity, position, quantity, attributes), preserve the original intent and only refine the grammar.
38
+ - If the description is vague, supplement with minimal but sufficient details (category, color, size, orientation, position, etc.). For example:
39
+ > Original: "Add an animal"
40
+ > Rewritten: "Add a light-gray cat in the bottom-right corner, sitting and facing the camera"
41
+ - Remove meaningless instructions: e.g., "Add 0 objects" should be ignored or flagged as invalid.
42
+ - For replacement tasks, specify "Replace Y with X" and briefly describe the key visual features of X.
43
+ ### 2. Text Editing Tasks
44
+ - All text content must be enclosed in English double quotes `" "`. Keep the original language of the text, and keep the capitalization.
45
+ - Both adding new text and replacing existing text are text replacement tasks, For example:
46
+ - Replace "xx" to "yy"
47
+ - Replace the mask / bounding box to "yy"
48
+ - Replace the visual object to "yy"
49
+ - Specify text position, color, and layout only if user has required.
50
+ - If font is specified, keep the original language of the font.
51
+ ### 3. Human Editing Tasks
52
+ - Make the smallest changes to the given user's prompt.
53
+ - If changes to background, action, expression, camera shot, or ambient lighting are required, please list each modification individually.
54
+ - **Edits to makeup or facial features / expression must be subtle, not exaggerated, and must preserve the subject's identity consistency.**
55
+ > Original: "Add eyebrows to the face"
56
+ > Rewritten: "Slightly thicken the person's eyebrows with little change, look natural."
57
+ ### 4. Style Conversion or Enhancement Tasks
58
+ - If a style is specified, describe it concisely using key visual features. For example:
59
+ > Original: "Disco style"
60
+ > Rewritten: "1970s disco style: flashing lights, disco ball, mirrored walls, vibrant colors"
61
+ - For style reference, analyze the original image and extract key characteristics (color, composition, texture, lighting, artistic style, etc.), integrating them into the instruction.
62
+ - **Colorization tasks (including old photo restoration) must use the fixed template:**
63
+ "Restore and colorize the old photo."
64
+ - Clearly specify the object to be modified. For example:
65
+ > Original: Modify the subject in Picture 1 to match the style of Picture 2.
66
+ > Rewritten: Change the girl in Picture 1 to the ink-wash style of Picture 2 — rendered in black-and-white watercolor with soft color transitions.
67
+ ### 5. Material Replacement
68
+ - Clearly specify the object and the material. For example: "Change the material of the apple to papercut style."
69
+ - For text material replacement, use the fixed template:
70
+ "Change the material of text "xxxx" to laser style"
71
+ ### 6. Logo/Pattern Editing
72
+ - Material replacement should preserve the original shape and structure as much as possible. For example:
73
+ > Original: "Convert to sapphire material"
74
+ > Rewritten: "Convert the main subject in the image to sapphire material, preserving similar shape and structure"
75
+ - When migrating logos/patterns to new scenes, ensure shape and structure consistency. For example:
76
+ > Original: "Migrate the logo in the image to a new scene"
77
+ > Rewritten: "Migrate the logo in the image to a new scene, preserving similar shape and structure"
78
+ ### 7. Multi-Image Tasks
79
+ - Rewritten prompts must clearly point out which image's element is being modified. For example:
80
+ > Original: "Replace the subject of picture 1 with the subject of picture 2"
81
+ > Rewritten: "Replace the girl of picture 1 with the boy of picture 2, keeping picture 2's background unchanged"
82
+ - For stylization tasks, describe the reference image's style in the rewritten prompt, while preserving the visual content of the source image.
83
+ ## 3. Rationale and Logic Check
84
+ - Resolve contradictory instructions: e.g., "Remove all trees but keep all trees" requires logical correction.
85
+ - Supplement missing critical information: e.g., if position is unspecified, choose a reasonable area based on composition (near subject, blank space, center/edge, etc.).
86
+ # Output Format Example
87
+ ```json
88
+ {
89
+ "Rewritten": "..."
90
+ }
91
+ '''
92
 
93
+ def encode_image(pil_image):
94
+ import io
95
+ buffered = io.BytesIO()
96
+ pil_image.save(buffered, format="PNG")
97
+ return base64.b64encode(buffered.getvalue()).decode("utf-8")
 
 
98
 
99
+ dtype = torch.bfloat16
100
+ device = "cuda" if torch.cuda.is_available() else "cpu"
 
 
 
 
 
101
 
102
+ scheduler_config = {
103
+ "base_image_seq_len": 256,
104
+ "base_shift": math.log(3),
105
+ "invert_sigmas": False,
106
+ "max_image_seq_len": 8192,
107
+ "max_shift": math.log(3),
108
+ "num_train_timesteps": 1000,
109
+ "shift": 1.0,
110
+ "shift_terminal": None,
111
+ "stochastic_sampling": False,
112
+ "time_shift_type": "exponential",
113
+ "use_beta_sigmas": False,
114
+ "use_dynamic_shifting": True,
115
+ "use_exponential_sigmas": False,
116
+ "use_karras_sigmas": False,
117
+ }
118
 
119
+ scheduler = FlowMatchEulerDiscreteScheduler.from_config(scheduler_config)
120
+
121
+ pipe = QwenImageEditPlusPipeline.from_pretrained(
122
+ "Qwen/Qwen-Image-Edit-2509",
123
+ scheduler=scheduler,
124
+ torch_dtype=dtype
125
+ ).to(device)
126
+
127
+ pipe.load_lora_weights(
128
+ "lightx2v/Qwen-Image-Lightning",
129
+ weight_name="Qwen-Image-Edit-2509/Qwen-Image-Edit-2509-Lightning-8steps-V1.0-bf16.safetensors"
130
+ )
131
+ pipe.fuse_lora()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
132
 
133
+ pipe.transformer.__class__ = QwenImageTransformer2DModel
134
+ pipe.transformer.set_attn_processor(QwenDoubleStreamAttnProcessorFA3())
135
+
136
+ optimize_pipeline_(pipe, image=[Image.new("RGB", (1024, 1024)), Image.new("RGB", (1024, 1024))], prompt="prompt")
137
+
138
+ MAX_SEED = np.iinfo(np.int32).max
139
+
140
+ def use_output_as_input(output_images):
141
+ if output_images is None or len(output_images) == 0:
142
+ return []
143
+ return output_images
144
 
145
  @spaces.GPU()
146
  def infer(
147
+ images,
148
+ prompt,
149
+ seed=42,
150
+ randomize_seed=False,
151
+ true_guidance_scale=1.0,
152
+ num_inference_steps=8,
153
+ height=None,
154
+ width=None,
155
+ rewrite_prompt=True,
156
+ num_images_per_prompt=1,
157
  progress=gr.Progress(track_tqdm=True),
158
  ):
159
+ negative_prompt = " "
 
 
 
 
 
160
  if randomize_seed:
161
  seed = random.randint(0, MAX_SEED)
162
+ generator = torch.Generator(device=device).manual_seed(seed)
163
 
 
 
 
164
  pil_images = []
165
  if images is not None:
166
  for item in images:
167
  try:
168
+ if isinstance(item[0], Image.Image):
169
+ pil_images.append(item[0].convert("RGB"))
170
+ elif isinstance(item[0], str):
171
+ pil_images.append(Image.open(item[0]).convert("RGB"))
172
+ elif hasattr(item, "name"):
173
+ pil_images.append(Image.open(item.name).convert("RGB"))
174
+ except Exception:
 
 
 
 
 
 
 
 
 
 
 
175
  continue
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
176
 
177
+ if height == 256 and width == 256:
178
+ height, width = None, None
179
+
180
+ prompt = (
181
+ "Remove acne marks and black spots. "
182
+ )
183
+
184
+ image = pipe(
185
+ image=pil_images if len(pil_images) > 0 else None,
186
+ prompt=prompt,
187
+ height=height,
188
+ width=width,
189
+ negative_prompt=negative_prompt,
190
+ num_inference_steps=num_inference_steps,
191
+ generator=generator,
192
+ true_cfg_scale=true_guidance_scale,
193
+ num_images_per_prompt=num_images_per_prompt,
194
+ ).images
195
+
196
+ return image, seed, gr.update(visible=True)
197
+
198
+ examples = []
199
 
 
200
  css = """
201
  #col-container {
202
  margin: 0 auto;
203
+ max-width: 1024px;
204
  }
205
  #logo-title {
206
  text-align: center;
207
  }
208
  #logo-title img {
209
+ width: 400px;
 
 
 
 
 
210
  }
211
+ #edit_text{margin-top: -62px !important}
212
  """
213
 
214
+ with gr.Blocks(css=css) as demo:
 
215
  with gr.Column(elem_id="col-container"):
 
216
  gr.HTML("""
217
  <div id="logo-title">
218
+ <h2 style="font-style: italic;color: #5b47d1;margin-top: -27px !important;margin-left: 96px">[Plus] Fast, 8-steps with Lightning LoRA</h2>
 
219
  </div>
220
  """)
 
221
  gr.Markdown("""
222
+ [Learn more](https://github.com/QwenLM/Qwen-Image) about the Qwen-Image series.
223
+ This demo uses the new [Qwen-Image-Edit-2509](https://huggingface.co/Qwen/Qwen-Image-Edit-2509) with the [Qwen-Image-Lightning v2](https://huggingface.co/lightx2v/Qwen-Image-Lightning) LoRA + [AoT compilation & FA3](https://huggingface.co/blog/zerogpu-aoti) for accelerated inference.
224
+ Try on [Qwen Chat](https://chat.qwen.ai/), or [download model](https://huggingface.co/Qwen/Qwen-Image-Edit-2509) to run locally with ComfyUI or diffusers.
 
 
 
225
  """)
 
226
  with gr.Row():
227
  with gr.Column():
228
+ input_images = gr.Gallery(label="Input Images",
229
+ show_label=False,
230
+ type="pil",
231
+ interactive=True)
 
 
 
 
232
 
233
  with gr.Column():
234
+ result = gr.Gallery(label="Result", show_label=False, type="pil")
235
+ use_output_btn = gr.Button("↗️ Use as input", variant="secondary", size="sm", visible=False)
 
 
 
 
 
 
 
 
 
 
 
236
 
237
+ with gr.Row():
238
+ prompt = gr.Text(
239
+ label="Prompt",
240
+ show_label=False,
241
+ placeholder="describe the edit instruction",
242
+ container=False,
243
+ )
244
+ run_button = gr.Button("Edit!", variant="primary")
245
 
246
+ with gr.Accordion("Advanced Settings", open=False):
 
247
  seed = gr.Slider(
248
+ label="Seed",
249
+ minimum=0,
250
+ maximum=MAX_SEED,
251
+ step=1,
252
+ value=0,
 
 
 
 
253
  )
254
 
255
+ randomize_seed = gr.Checkbox(label="Randomize seed", value=True)
256
+
257
  with gr.Row():
258
  true_guidance_scale = gr.Slider(
259
+ label="True guidance scale",
260
  minimum=1.0,
261
+ maximum=10.0,
262
  step=0.1,
263
+ value=1.0
 
264
  )
265
 
266
  num_inference_steps = gr.Slider(
267
+ label="Number of inference steps",
268
+ minimum=1,
269
+ maximum=40,
270
  step=1,
271
+ value=8,
 
272
  )
273
 
 
274
  height = gr.Slider(
275
+ label="Height",
276
+ minimum=256,
277
+ maximum=2048,
278
+ step=8,
279
+ value=None,
280
  )
281
+
282
  width = gr.Slider(
283
+ label="Width",
284
+ minimum=256,
285
+ maximum=2048,
286
+ step=8,
287
+ value=None,
288
  )
289
+
290
+ rewrite_prompt = gr.Checkbox(label="Rewrite prompt", value=True)
291
 
292
+ gr.on(
293
+ triggers=[run_button.click, prompt.submit],
 
 
 
 
 
 
294
  fn=infer,
295
  inputs=[
296
+ input_images,
297
+ prompt,
298
+ seed,
299
+ randomize_seed,
300
+ true_guidance_scale,
301
+ num_inference_steps,
302
+ height,
303
+ width,
304
+ rewrite_prompt,
305
  ],
306
  outputs=[result, seed, use_output_btn],
 
307
  )
308
 
309
  use_output_btn.click(
310
+ fn=use_output_as_input,
311
+ inputs=[result],
312
  outputs=[input_images]
313
  )
314
 
 
315
  if __name__ == "__main__":
316
+ demo.launch()