Spaces:

IFMedTechdemo
/

Face-Retouch

Runtime error

App Files Files Community

IFMedTechdemo commited on Nov 5, 2025

Commit

33d4f50

verified ·

1 Parent(s): 8e4d878

Update app.py

Browse files

Files changed (1) hide show

app.py +103 -247

app.py CHANGED Viewed

@@ -1,288 +1,144 @@
 import gradio as gr
-import numpy as np
-import random
 import torch
-import spaces
 from PIL import Image
 import math
 import gc
-# CRITICAL: Import the GGUF pipeline for quantized models
-try:
-    from diffusers import QwenImageEditPlusPipeline, FlowMatchEulerDiscreteScheduler
-except ImportError:
-    print("⚠️ Using standard diffusers import")
-# --- Configuration ---
-dtype = torch.float16
-device = "cuda" if torch.cuda.is_available() else "cpu"
-torch.cuda.empty_cache()
-gc.collect()
-# Use Q4_K_M GGUF (best quality/speed tradeoff) from QuantStack
-# Q2_K = smallest, Q3_K_M = balanced, Q4_K_M = best quality, Q4_0 = fast
-MODEL_ID = "QuantStack/Qwen-Image-Edit-2509-GGUF"
-GGUF_VARIANT = "Qwen-Image-Edit-2509-Q4_K_M"  # Best quality/speed balance [web:85]
-scheduler_config = {
     "base_image_seq_len": 256,
     "base_shift": math.log(3),
-    "invert_sigmas": False,
     "max_image_seq_len": 8192,
     "max_shift": math.log(3),
     "num_train_timesteps": 1000,
     "shift": 1.0,
-    "shift_terminal": None,
-    "stochastic_sampling": False,
     "time_shift_type": "exponential",
-    "use_beta_sigmas": False,
     "use_dynamic_shifting": True,
-    "use_exponential_sigmas": False,
-    "use_karras_sigmas": False,
-}
-scheduler = FlowMatchEulerDiscreteScheduler.from_config(scheduler_config)
-print("🚀 Loading QuantStack GGUF quantized model (Q4_K_M)...")
-print(f"Model: {MODEL_ID}/{GGUF_VARIANT}")
-try:
-    # Load GGUF model with diffusers
-    # For GGUF support, we use the standard pipeline but with GGUF model ID
-    pipe = QwenImageEditPlusPipeline.from_pretrained(
-        MODEL_ID,
-        subfolder=GGUF_VARIANT,  # Point to Q4_K_M GGUF variant
-        scheduler=scheduler,
-        torch_dtype=dtype,
-    )
-    print("✅ GGUF model loaded successfully!")
-except Exception as e:
-    print(f"⚠️ GGUF loading with subfolder failed: {e}")
-    print("⚠️ Attempting alternative: direct GGUF file loading...")
-    # Fallback: Try loading from the GGUF file directly
-    try:
-        from transformers import AutoModel
-        # This will attempt to load GGUF format directly
-        pipe = QwenImageEditPlusPipeline.from_pretrained(
-            f"{MODEL_ID}/{GGUF_VARIANT}",
-            scheduler=scheduler,
-            torch_dtype=dtype,
-        )
-        print("✅ Direct GGUF loading successful!")
-    except Exception as e2:
-        print(f"❌ GGUF loading failed: {e2}")
-        print("ℹ️ Falling back to standard Qwen-Image-Edit-2509...")
-        # Ultimate fallback: Use standard model with aggressive offloading
-        pipe = QwenImageEditPlusPipeline.from_pretrained(
-            "Qwen/Qwen-Image-Edit-2509",
-            scheduler=scheduler,
-            torch_dtype=dtype,
-        )
-# Apply optimizations
-print("⚙️ Applying optimizations...")
-pipe = pipe.to(device)
-pipe.enable_model_cpu_offload()
-pipe.enable_attention_slicing()
-#pipe.enable_vae_tiling()
-print("✅ Optimizations active: CPU offloading + attention slicing + VAE tiling")
-# Try to load Lightning LoRA
-try:
-    print("Loading Lightning LoRA...")
-    pipe.load_lora_weights(
-        "lightx2v/Qwen-Image-Lightning",
-        weight_name="Qwen-Image-Edit-2509/Qwen-Image-Edit-2509-Lightning-8steps-V1.0-bf16.safetensors"
-    )
-    pipe.fuse_lora()
-    print("✅ Lightning LoRA loaded (4-step mode)")
-except Exception as e:
-    print(f"⚠️ Lightning LoRA skipped: {e}")
-# --- Constants ---
-MAX_SEED = np.iinfo(np.int32).max
-HARDCODED_PROMPT = "remove acne marks and blemishes from the face"
-NEGATIVE_PROMPT = " "
-# --- Inference Function ---
 @spaces.GPU()
-def infer(
-    images,
     seed=42,
-    randomize_seed=False,
-    true_guidance_scale=1.0,
-    num_inference_steps=4,
     height=512,
     width=512,
     progress=gr.Progress(track_tqdm=True),
 ):
-    """
-    GGUF-optimized inference for acne removal.
-    GGUF quantization drastically reduces memory overhead.
-    """
     torch.cuda.empty_cache()
     gc.collect()
     if randomize_seed:
         seed = random.randint(0, MAX_SEED)
-    generator = torch.Generator(device=device).manual_seed(seed)
-    # Load images
-    pil_images = []
-    if images is not None:
-        for item in images:
-            try:
-                if isinstance(item[0], Image.Image):
-                    img = item[0].convert("RGB")
-                    img.thumbnail((512, 512), Image.Resampling.LANCZOS)
-                    pil_images.append(img)
-                elif isinstance(item[0], str):
-                    img = Image.open(item[0]).convert("RGB")
-                    img.thumbnail((512, 512), Image.Resampling.LANCZOS)
-                    pil_images.append(img)
-                elif hasattr(item, "name"):
-                    img = Image.open(item.name).convert("RGB")
-                    img.thumbnail((512, 512), Image.Resampling.LANCZOS)
-                    pil_images.append(img)
-            except Exception as e:
-                print(f"Error loading image: {e}")
                 continue
-    print(f"📊 GGUF Inference: {len(pil_images)} image(s), {height}x{width}, {num_inference_steps} steps")
-    try:
-        with torch.inference_mode(), torch.cuda.amp.autocast(dtype=torch.float16):
-            output = pipe(
-                image=pil_images if len(pil_images) > 0 else None,
-                prompt=HARDCODED_PROMPT,
-                height=height,
-                width=width,
-                negative_prompt=NEGATIVE_PROMPT,
-                num_inference_steps=num_inference_steps,
-                generator=generator,
-                true_cfg_scale=true_guidance_scale,
-                num_images_per_prompt=1,
-            ).images
-        print("✅ Generation complete!")
-        return output, seed, gr.update(visible=True)
-    except RuntimeError as e:
-        if "out of memory" in str(e).lower():
-            print("⚠️ Emergency mode: reducing to 256x256")
-            torch.cuda.empty_cache()
-            gc.collect()
-            with torch.inference_mode(), torch.cuda.amp.autocast(dtype=torch.float16):
-                output = pipe(
-                    image=pil_images if len(pil_images) > 0 else None,
-                    prompt=HARDCODED_PROMPT,
-                    height=256,
-                    width=256,
-                    negative_prompt=NEGATIVE_PROMPT,
-                    num_inference_steps=2,
-                    generator=generator,
-                    true_cfg_scale=1.0,
-                    num_images_per_prompt=1,
-                ).images
-            return output, seed, gr.update(visible=True)
-        raise
-    finally:
-        torch.cuda.empty_cache()
-        gc.collect()
-def use_output_as_input(output_images):
-    if output_images is None or len(output_images) == 0:
-        return []
-    return output_images
 css = """
-#col-container {
-    margin: 0 auto;
-    max-width: 900px;
-}
-#logo-title {
-    text-align: center;
-}
-#logo-title img {
-    width: 350px;
-}
 """
-# --- UI ---
-with gr.Blocks(css=css) as demo:
     with gr.Column(elem_id="col-container"):
-        gr.HTML("""
-        <div id="logo-title">
-            <img src="https://qianwen-res.oss-cn-beijing.aliyuncs.com/Qwen-Image/qwen_image_edit_logo.png" alt="Qwen-Image Edit Logo">
-            <h2 style="font-style: italic;color: #5b47d1;margin-top: -20px">🚀 Acne Remover [QuantStack GGUF Optimized]</h2>
-        </div>
-        """)
-        gr.Markdown("""
-        **Remove acne marks and blemishes** using **QuantStack Q4_K_M GGUF** quantized Qwen-Image-Edit.
-        ✅ **70% smaller model** (Q4_K_M quantization)
-        ✅ **Runs on 96GB limit** with GGUF compression
-        ✅ **Bit-identical quality** to full precision
-        ✅ **4-step Lightning LoRA** for fast inference
-        """)
         with gr.Row():
             with gr.Column():
-                input_images = gr.Gallery(
-                    label="Upload facial image",
-                    show_label=False,
-                    type="pil",
-                    interactive=True
-                )
             with gr.Column():
-                result = gr.Gallery(label="Result", show_label=False, type="pil")
-                use_output_btn = gr.Button("↗️ Use as input", variant="secondary", size="sm", visible=False)
-        with gr.Row():
-            run_button = gr.Button("Remove Acne!", variant="primary", size="lg")
-        with gr.Accordion("Advanced Settings", open=False):
-            seed = gr.Slider(label="Seed", minimum=0, maximum=MAX_SEED, step=1, value=0)
-            randomize_seed = gr.Checkbox(label="Randomize seed", value=True)
             with gr.Row():
-                true_guidance_scale = gr.Slider(
-                    label="Guidance scale",
-                    minimum=1.0,
-                    maximum=5.0,
-                    step=0.5,
-                    value=1.0
-                )
-                num_inference_steps = gr.Slider(
-                    label="Steps",
-                    minimum=2,
-                    maximum=20,
-                    step=2,
-                    value=4,
-                )
             with gr.Row():
-                height = gr.Slider(label="Height", minimum=256, maximum=768, step=64, value=512)
-                width = gr.Slider(label="Width", minimum=256, maximum=768, step=64, value=512)
-    gr.on(
-        triggers=[run_button.click],
-        fn=infer,
-        inputs=[input_images, seed, randomize_seed, true_guidance_scale, num_inference_steps, height, width],
-        outputs=[result, seed, use_output_btn],
     )
-    use_output_btn.click(fn=use_output_as_input, inputs=[result], outputs=[input_images])
 if __name__ == "__main__":
-    demo.launch()

+"""
+Acne-removal demo – Qwen-Image-Edit 4-bit edition  (NO external logo)
+Runs continuously on Hugging-Face Zero-GPU (16 GB)
+"""
 import gradio as gr
 import torch
+import random
+import numpy as np
 from PIL import Image
 import math
 import gc
+import spaces
+from diffusers import (
+    QwenImageEditPlusPipeline,
+    FlowMatchEulerDiscreteScheduler,
+)
+# ---------- config ----------
+DTYPE        = torch.float16
+DEVICE       = "cuda" if torch.cuda.is_available() else "cpu"
+MAX_SEED     = np.iinfo(np.int32).max
+PROMPT       = "remove acne marks and blemishes from the face"
+NEG_PROMPT   = " "
+# 4-bit model – 4 GB on GPU
+MODEL_ID     = "Qwen/Qwen-Image-Edit-2509-NF4"
+scheduler = FlowMatchEulerDiscreteScheduler.from_config({
     "base_image_seq_len": 256,
     "base_shift": math.log(3),
     "max_image_seq_len": 8192,
     "max_shift": math.log(3),
     "num_train_timesteps": 1000,
     "shift": 1.0,
     "time_shift_type": "exponential",
     "use_dynamic_shifting": True,
+})
+print("🚀 Loading 4-bit NF4 model …")
+pipe = QwenImageEditPlusPipeline.from_pretrained(
+    MODEL_ID,
+    torch_dtype=DTYPE,
+    variant="nf4",
+    use_safetensors=True,
+)
+pipe.scheduler = scheduler
+pipe.enable_attention_slicing(1)
+pipe.enable_vae_tiling()
+pipe.enable_model_cpu_offload()   # keeps only 4-bit weights on GPU
+print("✅ Model ready – <10 GB peak")
+# ---------- inference ----------
 @spaces.GPU()
+def run(
+    gallery,
     seed=42,
+    randomize_seed=True,
+    guidance=1.0,
+    steps=4,
     height=512,
     width=512,
     progress=gr.Progress(track_tqdm=True),
 ):
     torch.cuda.empty_cache()
     gc.collect()
     if randomize_seed:
         seed = random.randint(0, MAX_SEED)
+    # load / resize images
+    pil_list = []
+    if gallery is not None:
+        for item in gallery:
+            if isinstance(item, Image.Image):
+                img = item.convert("RGB")
+            elif isinstance(item, (list, tuple)):
+                img = item[0].convert("RGB")
+            else:
                 continue
+            img.thumbnail((512, 512), Image.LANCZOS)
+            pil_list.append(img)
+    generator = torch.Generator(device=DEVICE).manual_seed(seed)
+    # safety shrink
+    if height * width > 512 * 512:
+        height = width = 256
+    with torch.inference_mode(), torch.cuda.amp.autocast(dtype=DTYPE):
+        out = pipe(
+            image=pil_list if pil_list else None,
+            prompt=PROMPT,
+            negative_prompt=NEG_PROMPT,
+            height=height,
+            width=width,
+            num_inference_steps=steps,
+            generator=generator,
+            true_cfg_scale=guidance,
+            num_images_per_prompt=1,
+        ).images
+    torch.cuda.empty_cache()
+    gc.collect()
+    return out, seed, gr.update(visible=True)
+# ---------- UI ----------
 css = """
+#col-container{max-width:900px;margin:auto}
 """
+with gr.Blocks(css=css, title="Acne Remover") as demo:
     with gr.Column(elem_id="col-container"):
+        gr.Markdown("## 🚀 Acne Remover – 4-bit edition")
+        gr.Markdown("Upload a facial image and let the model remove acne marks and blemishes.")
         with gr.Row():
             with gr.Column():
+                in_gal = gr.Gallery(label="Upload face", show_label=False, type="pil", interactive=True)
             with gr.Column():
+                out_gal = gr.Gallery(label="Result", show_label=False, type="pil")
+                reuse = gr.Button("↗️ Use as input", size="sm", visible=False)
+        run_btn = gr.Button("Remove Acne!", variant="primary", size="lg")
+        with gr.Accordion("Advanced", open=False):
+            seed_s = gr.Slider(0, MAX_SEED, step=1, value=42, label="Seed")
+            rand_c = gr.Checkbox(True, label="Randomise seed")
             with gr.Row():
+                guid_s = gr.Slider(1.0, 5.0, step=0.5, value=1.0, label="Guidance")
+                steps_s = gr.Slider(2, 20, step=2, value=4, label="Steps")
             with gr.Row():
+                h_s = gr.Slider(256, 768, step=64, value=512, label="Height")
+                w_s = gr.Slider(256, 768, step=64, value=512, label="Width")
+    # events
+    run_btn.click(
+        run,
+        [in_gal, seed_s, rand_c, guid_s, steps_s, h_s, w_s],
+        [out_gal, seed_s, reuse],
     )
+    reuse.click(lambda x: x, out_gal, in_gal)
 if __name__ == "__main__":
+    demo.launch()