IFMedTechdemo commited on
Commit
33d4f50
·
verified ·
1 Parent(s): 8e4d878

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +103 -247
app.py CHANGED
@@ -1,288 +1,144 @@
 
 
 
 
1
  import gradio as gr
2
- import numpy as np
3
- import random
4
  import torch
5
- import spaces
 
6
  from PIL import Image
7
  import math
8
  import gc
9
-
10
- # CRITICAL: Import the GGUF pipeline for quantized models
11
- try:
12
- from diffusers import QwenImageEditPlusPipeline, FlowMatchEulerDiscreteScheduler
13
- except ImportError:
14
- print("⚠️ Using standard diffusers import")
15
-
16
- # --- Configuration ---
17
- dtype = torch.float16
18
- device = "cuda" if torch.cuda.is_available() else "cpu"
19
-
20
- torch.cuda.empty_cache()
21
- gc.collect()
22
-
23
- # Use Q4_K_M GGUF (best quality/speed tradeoff) from QuantStack
24
- # Q2_K = smallest, Q3_K_M = balanced, Q4_K_M = best quality, Q4_0 = fast
25
- MODEL_ID = "QuantStack/Qwen-Image-Edit-2509-GGUF"
26
- GGUF_VARIANT = "Qwen-Image-Edit-2509-Q4_K_M" # Best quality/speed balance [web:85]
27
-
28
- scheduler_config = {
29
  "base_image_seq_len": 256,
30
  "base_shift": math.log(3),
31
- "invert_sigmas": False,
32
  "max_image_seq_len": 8192,
33
  "max_shift": math.log(3),
34
  "num_train_timesteps": 1000,
35
  "shift": 1.0,
36
- "shift_terminal": None,
37
- "stochastic_sampling": False,
38
  "time_shift_type": "exponential",
39
- "use_beta_sigmas": False,
40
  "use_dynamic_shifting": True,
41
- "use_exponential_sigmas": False,
42
- "use_karras_sigmas": False,
43
- }
44
-
45
- scheduler = FlowMatchEulerDiscreteScheduler.from_config(scheduler_config)
46
-
47
- print("🚀 Loading QuantStack GGUF quantized model (Q4_K_M)...")
48
- print(f"Model: {MODEL_ID}/{GGUF_VARIANT}")
49
-
50
- try:
51
- # Load GGUF model with diffusers
52
- # For GGUF support, we use the standard pipeline but with GGUF model ID
53
- pipe = QwenImageEditPlusPipeline.from_pretrained(
54
- MODEL_ID,
55
- subfolder=GGUF_VARIANT, # Point to Q4_K_M GGUF variant
56
- scheduler=scheduler,
57
- torch_dtype=dtype,
58
- )
59
- print("✅ GGUF model loaded successfully!")
60
-
61
- except Exception as e:
62
- print(f"⚠️ GGUF loading with subfolder failed: {e}")
63
- print("⚠️ Attempting alternative: direct GGUF file loading...")
64
-
65
- # Fallback: Try loading from the GGUF file directly
66
- try:
67
- from transformers import AutoModel
68
- # This will attempt to load GGUF format directly
69
- pipe = QwenImageEditPlusPipeline.from_pretrained(
70
- f"{MODEL_ID}/{GGUF_VARIANT}",
71
- scheduler=scheduler,
72
- torch_dtype=dtype,
73
- )
74
- print("✅ Direct GGUF loading successful!")
75
- except Exception as e2:
76
- print(f"❌ GGUF loading failed: {e2}")
77
- print("ℹ️ Falling back to standard Qwen-Image-Edit-2509...")
78
-
79
- # Ultimate fallback: Use standard model with aggressive offloading
80
- pipe = QwenImageEditPlusPipeline.from_pretrained(
81
- "Qwen/Qwen-Image-Edit-2509",
82
- scheduler=scheduler,
83
- torch_dtype=dtype,
84
- )
85
-
86
- # Apply optimizations
87
- print("⚙️ Applying optimizations...")
88
- pipe = pipe.to(device)
89
- pipe.enable_model_cpu_offload()
90
- pipe.enable_attention_slicing()
91
- #pipe.enable_vae_tiling()
92
- print("✅ Optimizations active: CPU offloading + attention slicing + VAE tiling")
93
-
94
- # Try to load Lightning LoRA
95
- try:
96
- print("Loading Lightning LoRA...")
97
- pipe.load_lora_weights(
98
- "lightx2v/Qwen-Image-Lightning",
99
- weight_name="Qwen-Image-Edit-2509/Qwen-Image-Edit-2509-Lightning-8steps-V1.0-bf16.safetensors"
100
- )
101
- pipe.fuse_lora()
102
- print("✅ Lightning LoRA loaded (4-step mode)")
103
- except Exception as e:
104
- print(f"⚠️ Lightning LoRA skipped: {e}")
105
-
106
- # --- Constants ---
107
- MAX_SEED = np.iinfo(np.int32).max
108
- HARDCODED_PROMPT = "remove acne marks and blemishes from the face"
109
- NEGATIVE_PROMPT = " "
110
-
111
- # --- Inference Function ---
112
  @spaces.GPU()
113
- def infer(
114
- images,
115
  seed=42,
116
- randomize_seed=False,
117
- true_guidance_scale=1.0,
118
- num_inference_steps=4,
119
  height=512,
120
  width=512,
121
  progress=gr.Progress(track_tqdm=True),
122
  ):
123
- """
124
- GGUF-optimized inference for acne removal.
125
- GGUF quantization drastically reduces memory overhead.
126
- """
127
  torch.cuda.empty_cache()
128
  gc.collect()
129
-
130
  if randomize_seed:
131
  seed = random.randint(0, MAX_SEED)
132
 
133
- generator = torch.Generator(device=device).manual_seed(seed)
134
-
135
- # Load images
136
- pil_images = []
137
- if images is not None:
138
- for item in images:
139
- try:
140
- if isinstance(item[0], Image.Image):
141
- img = item[0].convert("RGB")
142
- img.thumbnail((512, 512), Image.Resampling.LANCZOS)
143
- pil_images.append(img)
144
- elif isinstance(item[0], str):
145
- img = Image.open(item[0]).convert("RGB")
146
- img.thumbnail((512, 512), Image.Resampling.LANCZOS)
147
- pil_images.append(img)
148
- elif hasattr(item, "name"):
149
- img = Image.open(item.name).convert("RGB")
150
- img.thumbnail((512, 512), Image.Resampling.LANCZOS)
151
- pil_images.append(img)
152
- except Exception as e:
153
- print(f"Error loading image: {e}")
154
  continue
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
155
 
156
- print(f"📊 GGUF Inference: {len(pil_images)} image(s), {height}x{width}, {num_inference_steps} steps")
157
-
158
- try:
159
- with torch.inference_mode(), torch.cuda.amp.autocast(dtype=torch.float16):
160
- output = pipe(
161
- image=pil_images if len(pil_images) > 0 else None,
162
- prompt=HARDCODED_PROMPT,
163
- height=height,
164
- width=width,
165
- negative_prompt=NEGATIVE_PROMPT,
166
- num_inference_steps=num_inference_steps,
167
- generator=generator,
168
- true_cfg_scale=true_guidance_scale,
169
- num_images_per_prompt=1,
170
- ).images
171
-
172
- print("✅ Generation complete!")
173
- return output, seed, gr.update(visible=True)
174
-
175
- except RuntimeError as e:
176
- if "out of memory" in str(e).lower():
177
- print("⚠️ Emergency mode: reducing to 256x256")
178
- torch.cuda.empty_cache()
179
- gc.collect()
180
-
181
- with torch.inference_mode(), torch.cuda.amp.autocast(dtype=torch.float16):
182
- output = pipe(
183
- image=pil_images if len(pil_images) > 0 else None,
184
- prompt=HARDCODED_PROMPT,
185
- height=256,
186
- width=256,
187
- negative_prompt=NEGATIVE_PROMPT,
188
- num_inference_steps=2,
189
- generator=generator,
190
- true_cfg_scale=1.0,
191
- num_images_per_prompt=1,
192
- ).images
193
- return output, seed, gr.update(visible=True)
194
- raise
195
- finally:
196
- torch.cuda.empty_cache()
197
- gc.collect()
198
-
199
-
200
- def use_output_as_input(output_images):
201
- if output_images is None or len(output_images) == 0:
202
- return []
203
- return output_images
204
-
205
 
 
206
  css = """
207
- #col-container {
208
- margin: 0 auto;
209
- max-width: 900px;
210
- }
211
- #logo-title {
212
- text-align: center;
213
- }
214
- #logo-title img {
215
- width: 350px;
216
- }
217
  """
218
 
219
- # --- UI ---
220
- with gr.Blocks(css=css) as demo:
221
  with gr.Column(elem_id="col-container"):
222
- gr.HTML("""
223
- <div id="logo-title">
224
- <img src="https://qianwen-res.oss-cn-beijing.aliyuncs.com/Qwen-Image/qwen_image_edit_logo.png" alt="Qwen-Image Edit Logo">
225
- <h2 style="font-style: italic;color: #5b47d1;margin-top: -20px">🚀 Acne Remover [QuantStack GGUF Optimized]</h2>
226
- </div>
227
- """)
228
- gr.Markdown("""
229
- **Remove acne marks and blemishes** using **QuantStack Q4_K_M GGUF** quantized Qwen-Image-Edit.
230
-
231
- ✅ **70% smaller model** (Q4_K_M quantization)
232
- ✅ **Runs on 96GB limit** with GGUF compression
233
- ✅ **Bit-identical quality** to full precision
234
- ✅ **4-step Lightning LoRA** for fast inference
235
- """)
236
-
237
  with gr.Row():
238
  with gr.Column():
239
- input_images = gr.Gallery(
240
- label="Upload facial image",
241
- show_label=False,
242
- type="pil",
243
- interactive=True
244
- )
245
-
246
  with gr.Column():
247
- result = gr.Gallery(label="Result", show_label=False, type="pil")
248
- use_output_btn = gr.Button("↗️ Use as input", variant="secondary", size="sm", visible=False)
249
-
250
- with gr.Row():
251
- run_button = gr.Button("Remove Acne!", variant="primary", size="lg")
252
 
253
- with gr.Accordion("Advanced Settings", open=False):
254
- seed = gr.Slider(label="Seed", minimum=0, maximum=MAX_SEED, step=1, value=0)
255
- randomize_seed = gr.Checkbox(label="Randomize seed", value=True)
256
 
 
 
 
257
  with gr.Row():
258
- true_guidance_scale = gr.Slider(
259
- label="Guidance scale",
260
- minimum=1.0,
261
- maximum=5.0,
262
- step=0.5,
263
- value=1.0
264
- )
265
-
266
- num_inference_steps = gr.Slider(
267
- label="Steps",
268
- minimum=2,
269
- maximum=20,
270
- step=2,
271
- value=4,
272
- )
273
-
274
  with gr.Row():
275
- height = gr.Slider(label="Height", minimum=256, maximum=768, step=64, value=512)
276
- width = gr.Slider(label="Width", minimum=256, maximum=768, step=64, value=512)
277
-
278
- gr.on(
279
- triggers=[run_button.click],
280
- fn=infer,
281
- inputs=[input_images, seed, randomize_seed, true_guidance_scale, num_inference_steps, height, width],
282
- outputs=[result, seed, use_output_btn],
283
  )
284
-
285
- use_output_btn.click(fn=use_output_as_input, inputs=[result], outputs=[input_images])
286
 
287
  if __name__ == "__main__":
288
- demo.launch()
 
1
+ """
2
+ Acne-removal demo – Qwen-Image-Edit 4-bit edition (NO external logo)
3
+ Runs continuously on Hugging-Face Zero-GPU (16 GB)
4
+ """
5
  import gradio as gr
 
 
6
  import torch
7
+ import random
8
+ import numpy as np
9
  from PIL import Image
10
  import math
11
  import gc
12
+ import spaces
13
+ from diffusers import (
14
+ QwenImageEditPlusPipeline,
15
+ FlowMatchEulerDiscreteScheduler,
16
+ )
17
+
18
+ # ---------- config ----------
19
+ DTYPE = torch.float16
20
+ DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
21
+ MAX_SEED = np.iinfo(np.int32).max
22
+ PROMPT = "remove acne marks and blemishes from the face"
23
+ NEG_PROMPT = " "
24
+
25
+ # 4-bit model – 4 GB on GPU
26
+ MODEL_ID = "Qwen/Qwen-Image-Edit-2509-NF4"
27
+
28
+ scheduler = FlowMatchEulerDiscreteScheduler.from_config({
 
 
 
29
  "base_image_seq_len": 256,
30
  "base_shift": math.log(3),
 
31
  "max_image_seq_len": 8192,
32
  "max_shift": math.log(3),
33
  "num_train_timesteps": 1000,
34
  "shift": 1.0,
 
 
35
  "time_shift_type": "exponential",
 
36
  "use_dynamic_shifting": True,
37
+ })
38
+
39
+ print("🚀 Loading 4-bit NF4 model …")
40
+ pipe = QwenImageEditPlusPipeline.from_pretrained(
41
+ MODEL_ID,
42
+ torch_dtype=DTYPE,
43
+ variant="nf4",
44
+ use_safetensors=True,
45
+ )
46
+ pipe.scheduler = scheduler
47
+ pipe.enable_attention_slicing(1)
48
+ pipe.enable_vae_tiling()
49
+ pipe.enable_model_cpu_offload() # keeps only 4-bit weights on GPU
50
+ print("✅ Model ready – <10 GB peak")
51
+
52
+ # ---------- inference ----------
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
53
  @spaces.GPU()
54
+ def run(
55
+ gallery,
56
  seed=42,
57
+ randomize_seed=True,
58
+ guidance=1.0,
59
+ steps=4,
60
  height=512,
61
  width=512,
62
  progress=gr.Progress(track_tqdm=True),
63
  ):
 
 
 
 
64
  torch.cuda.empty_cache()
65
  gc.collect()
66
+
67
  if randomize_seed:
68
  seed = random.randint(0, MAX_SEED)
69
 
70
+ # load / resize images
71
+ pil_list = []
72
+ if gallery is not None:
73
+ for item in gallery:
74
+ if isinstance(item, Image.Image):
75
+ img = item.convert("RGB")
76
+ elif isinstance(item, (list, tuple)):
77
+ img = item[0].convert("RGB")
78
+ else:
 
 
 
 
 
 
 
 
 
 
 
 
79
  continue
80
+ img.thumbnail((512, 512), Image.LANCZOS)
81
+ pil_list.append(img)
82
+
83
+ generator = torch.Generator(device=DEVICE).manual_seed(seed)
84
+
85
+ # safety shrink
86
+ if height * width > 512 * 512:
87
+ height = width = 256
88
+
89
+ with torch.inference_mode(), torch.cuda.amp.autocast(dtype=DTYPE):
90
+ out = pipe(
91
+ image=pil_list if pil_list else None,
92
+ prompt=PROMPT,
93
+ negative_prompt=NEG_PROMPT,
94
+ height=height,
95
+ width=width,
96
+ num_inference_steps=steps,
97
+ generator=generator,
98
+ true_cfg_scale=guidance,
99
+ num_images_per_prompt=1,
100
+ ).images
101
 
102
+ torch.cuda.empty_cache()
103
+ gc.collect()
104
+ return out, seed, gr.update(visible=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
105
 
106
+ # ---------- UI ----------
107
  css = """
108
+ #col-container{max-width:900px;margin:auto}
 
 
 
 
 
 
 
 
 
109
  """
110
 
111
+ with gr.Blocks(css=css, title="Acne Remover") as demo:
 
112
  with gr.Column(elem_id="col-container"):
113
+ gr.Markdown("## 🚀 Acne Remover – 4-bit edition")
114
+ gr.Markdown("Upload a facial image and let the model remove acne marks and blemishes.")
115
+
 
 
 
 
 
 
 
 
 
 
 
 
116
  with gr.Row():
117
  with gr.Column():
118
+ in_gal = gr.Gallery(label="Upload face", show_label=False, type="pil", interactive=True)
 
 
 
 
 
 
119
  with gr.Column():
120
+ out_gal = gr.Gallery(label="Result", show_label=False, type="pil")
121
+ reuse = gr.Button("↗️ Use as input", size="sm", visible=False)
 
 
 
122
 
123
+ run_btn = gr.Button("Remove Acne!", variant="primary", size="lg")
 
 
124
 
125
+ with gr.Accordion("Advanced", open=False):
126
+ seed_s = gr.Slider(0, MAX_SEED, step=1, value=42, label="Seed")
127
+ rand_c = gr.Checkbox(True, label="Randomise seed")
128
  with gr.Row():
129
+ guid_s = gr.Slider(1.0, 5.0, step=0.5, value=1.0, label="Guidance")
130
+ steps_s = gr.Slider(2, 20, step=2, value=4, label="Steps")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
131
  with gr.Row():
132
+ h_s = gr.Slider(256, 768, step=64, value=512, label="Height")
133
+ w_s = gr.Slider(256, 768, step=64, value=512, label="Width")
134
+
135
+ # events
136
+ run_btn.click(
137
+ run,
138
+ [in_gal, seed_s, rand_c, guid_s, steps_s, h_s, w_s],
139
+ [out_gal, seed_s, reuse],
140
  )
141
+ reuse.click(lambda x: x, out_gal, in_gal)
 
142
 
143
  if __name__ == "__main__":
144
+ demo.launch()