Spaces:

IFMedTechdemo
/

Face-Retouch

Paused

App Files Files Community

IFMedTechdemo commited on Nov 6, 2025

Commit

62ee321

verified ·

1 Parent(s): edf1c19

Update app.py

Browse files

Files changed (1) hide show

app.py +233 -294

app.py CHANGED Viewed

@@ -3,375 +3,314 @@ import numpy as np
 import random
 import torch
 import spaces
 from PIL import Image
 import math
-import gc
 import logging
-from typing import List, Optional
-# Configure logging
-logging.basicConfig(level=logging.INFO)
-logger = logging.getLogger(__name__)
-# Configuration
-DTYPE = torch.float16
-DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
-MODEL_ID = "Qwen/Qwen-Image-Edit-2509"  # Use standard model [web:44]
-MAX_SEED = np.iinfo(np.int32).max
-HARDCODED_PROMPT = "remove acne marks and blemishes from the face"
-NEGATIVE_PROMPT = " "
-# Import pipeline
-try:
-    from diffusers import QwenImageEditPlusPipeline, FlowMatchEulerDiscreteScheduler
-    logger.info("✅ Diffusers imported successfully")
-except ImportError as e:
-    logger.error(f"❌ Import failed: {e}")
-    raise
-# Memory management functions
-def cleanup_memory():
-    """Comprehensive memory cleanup"""
-    if torch.cuda.is_available():
-        torch.cuda.empty_cache()
-        torch.cuda.synchronize()
-    gc.collect()
-def check_gpu_memory():
-    """Monitor GPU memory usage"""
-    if torch.cuda.is_available():
-        allocated = torch.cuda.memory_allocated() / 1024**3
-        cached = torch.cuda.memory_reserved() / 1024**3
-        logger.info(f"GPU Memory - Allocated: {allocated:.2f}GB, Cached: {cached:.2f}GB")
-# Initialize pipeline
-def load_pipeline():
-    """Load and optimize the pipeline"""
-    logger.info(f"🚀 Loading {MODEL_ID}...")
-    # Scheduler configuration [web:39]
-    scheduler_config = {
-        "base_image_seq_len": 256,
-        "base_shift": math.log(3),
-        "invert_sigmas": False,
-        "max_image_seq_len": 8192,
-        "max_shift": math.log(3),
-        "num_train_timesteps": 1000,
-        "shift": 1.0,
-        "shift_terminal": None,
-        "stochastic_sampling": False,
-        "time_shift_type": "exponential",
-        "use_beta_sigmas": False,
-        "use_dynamic_shifting": True,
-        "use_exponential_sigmas": False,
-        "use_karras_sigmas": False,
-    }
-    try:
-        # Create scheduler
-        scheduler = FlowMatchEulerDiscreteScheduler.from_config(scheduler_config)
-        # Load pipeline [web:38]
-        pipe = QwenImageEditPlusPipeline.from_pretrained(
-            MODEL_ID,
-            scheduler=scheduler,
-            torch_dtype=DTYPE,
-            use_safetensors=True,
-        )
-        # Move to device
-        pipe = pipe.to(DEVICE)
-        # Enable optimizations [web:43]
-        pipe.enable_attention_slicing()  # Memory efficient attention
-        pipe.enable_vae_slicing()        # Sliced VAE decoding
-        pipe.enable_vae_tiling()         # Tiled VAE for large images
-        # Try to load Lightning LoRA for faster inference [web:39]
-        try:
-            pipe.load_lora_weights(
-                "lightx2v/Qwen-Image-Lightning",
-                weight_name="Qwen-Image-Edit-2509/Qwen-Image-Edit-2509-Lightning-8steps-V1.0-bf16.safetensors"
-            )
-            pipe.fuse_lora()
-            logger.info("✅ Lightning LoRA loaded (4-step mode)")
-        except Exception as e:
-            logger.warning(f"⚠️ Lightning LoRA skipped: {e}")
-        logger.info("✅ Pipeline loaded and optimized successfully")
-        check_gpu_memory()
-        return pipe
-    except Exception as e:
-        logger.error(f"❌ Pipeline loading failed: {e}")
-        raise
-# Load pipeline at startup
-pipe = load_pipeline()
 @spaces.GPU()
 def infer(
-    images: Optional[List],
-    seed: int = 42,
-    randomize_seed: bool = False,
-    true_guidance_scale: float = 1.0,
-    num_inference_steps: int = 4,
-    height: int = 512,
-    width: int = 512,
     progress=gr.Progress(track_tqdm=True),
 ):
-    """
-    Optimized inference function with proper error handling
-    """
-    # Clean memory before inference
-    cleanup_memory()
     if randomize_seed:
         seed = random.randint(0, MAX_SEED)
-    generator = torch.Generator(device=DEVICE).manual_seed(seed)
-    # Process input images
     pil_images = []
     if images is not None:
         for item in images:
             try:
-                # Handle different input types
-                if isinstance(item, tuple) and len(item) > 0:
-                    img_path = item[0]
-                    if isinstance(img_path, Image.Image):
-                        img = img_path.convert("RGB")
-                    elif isinstance(img_path, str):
-                        img = Image.open(img_path).convert("RGB")
-                    else:
-                        continue
-                else:
-                    continue
-                # Resize for memory efficiency [web:38]
-                img.thumbnail((768, 768), Image.Resampling.LANCZOS)
-                pil_images.append(img)
-            except Exception as e:
-                logger.error(f"Error processing image: {e}")
                 continue
-    if not pil_images:
-        raise gr.Error("No valid images provided")
-    logger.info(f"📊 Processing {len(pil_images)} image(s), {height}x{width}, {num_inference_steps} steps")
-    try:
-        # Inference with proper context management [web:27]
-        with torch.inference_mode():
-            with torch.cuda.amp.autocast(enabled=True, dtype=DTYPE):
-                output = pipe(
-                    image=pil_images,
-                    prompt=HARDCODED_PROMPT,
-                    height=height,
-                    width=width,
-                    negative_prompt=NEGATIVE_PROMPT,
-                    num_inference_steps=num_inference_steps,
-                    generator=generator,
-                    true_cfg_scale=true_guidance_scale,
-                    num_images_per_prompt=1,
-                ).images
-        logger.info("✅ Generation completed successfully")
-        return output, seed, gr.update(visible=True)
-    except torch.cuda.OutOfMemoryError as e:
-        logger.warning("⚠️ CUDA OOM - Trying emergency mode")
-        cleanup_memory()
-        try:
-            # Emergency fallback with reduced settings
-            with torch.inference_mode():
-                with torch.cuda.amp.autocast(enabled=True, dtype=DTYPE):
-                    output = pipe(
-                        image=pil_images,
-                        prompt=HARDCODED_PROMPT,
-                        height=min(height, 384),
-                        width=min(width, 384),
-                        negative_prompt=NEGATIVE_PROMPT,
-                        num_inference_steps=max(2, num_inference_steps // 2),
-                        generator=generator,
-                        true_cfg_scale=1.0,
-                        num_images_per_prompt=1,
-                    ).images
-            logger.info("✅ Emergency mode successful")
-            return output, seed, gr.update(visible=True)
-        except Exception as emergency_e:
-            logger.error(f"❌ Emergency mode failed: {emergency_e}")
-            raise gr.Error(f"GPU memory insufficient. Try smaller images or reduce resolution.")
-    except Exception as e:
-        logger.error(f"❌ Inference failed: {e}")
-        raise gr.Error(f"Generation failed: {str(e)}")
-    finally:
-        # Always clean up after inference [web:32]
-        cleanup_memory()
-def use_output_as_input(output_images):
-    """Convert output images to input format"""
-    if output_images is None or len(output_images) == 0:
-        return []
-    return [(img, f"output_{i}.png") for i, img in enumerate(output_images)]
-# UI Styles
 css = """
 #col-container {
     margin: 0 auto;
-    max-width: 900px;
 }
 #logo-title {
     text-align: center;
 }
 #logo-title img {
-    width: 350px;
-}
-.memory-info {
-    font-size: 0.8em;
-    color: #666;
-    margin-top: 5px;
 }
 """
-# Gradio Interface
-with gr.Blocks(css=css, title="Acne Remover - Qwen Image Edit") as demo:
     with gr.Column(elem_id="col-container"):
-        # Header
         gr.HTML("""
         <div id="logo-title">
-            <img src="https://qianwen-res.oss-cn-beijing.aliyuncs.com/Qwen-Image/qwen_image_edit_logo.png" alt="Qwen-Image Edit Logo">
-            <h2 style="font-style: italic;color: #5b47d1;margin-top: -20px">✨ Professional Acne Remover</h2>
         </div>
         """)
         gr.Markdown("""
-        **Remove acne marks and blemishes** using the powerful Qwen-Image-Edit-2509 model.
-        ✅ **State-of-the-art results** with 20B parameter model [web:42]
-        ✅ **Multi-image support** for batch processing [web:45]
-        ✅ **Lightning-fast inference** with 4-step generation [web:39]
-        ✅ **Memory optimized** for stable performance [web:43]
         """)
         with gr.Row():
             with gr.Column():
-                input_images = gr.File(
-                    label="📸 Upload facial images",
-                    file_count="multiple",
-                    file_types=["image"],
-                    height=300
-                )
-                gr.HTML('<div class="memory-info">💡 Tip: Upload multiple images for batch processing</div>')
             with gr.Column():
-                result = gr.Gallery(
-                    label="🎯 Results",
-                    show_label=True,
-                    type="pil",
-                    height=300,
-                    columns=2
-                )
-                use_output_btn = gr.Button(
-                    "🔄 Use Results as New Input",
-                    variant="secondary",
-                    size="sm",
-                    visible=False
-                )
-        # Main action button
-        run_button = gr.Button(
-            "🚀 Remove Acne & Blemishes!",
-            variant="primary",
-            size="lg"
-        )
-        # Advanced settings
-        with gr.Accordion("⚙️ Advanced Settings", open=False):
             seed = gr.Slider(
-                label="🎲 Seed",
-                minimum=0,
-                maximum=MAX_SEED,
-                step=1,
-                value=0
-            )
-            randomize_seed = gr.Checkbox(
-                label="🎯 Randomize seed",
-                value=True
             )
             with gr.Row():
                 true_guidance_scale = gr.Slider(
-                    label="📊 Guidance Scale",
                     minimum=1.0,
-                    maximum=5.0,
                     step=0.1,
-                    value=1.0,
-                    info="Higher values = stronger prompt adherence"
                 )
                 num_inference_steps = gr.Slider(
-                    label="🔄 Inference Steps",
-                    minimum=2,
-                    maximum=20,
                     step=1,
-                    value=4,
-                    info="More steps = higher quality (slower)"
                 )
-            with gr.Row():
                 height = gr.Slider(
-                    label="📏 Height",
-                    minimum=256,
-                    maximum=768,
-                    step=64,
-                    value=512
                 )
                 width = gr.Slider(
-                    label="📐 Width",
-                    minimum=256,
-                    maximum=768,
-                    step=64,
-                    value=512
                 )
-        # Footer info
-        gr.Markdown("""
-        ---
-        **Model Info**: Qwen-Image-Edit-2509 | **Memory**: Optimized for GPU efficiency | **Speed**: ~4 steps with Lightning LoRA
-        """)
-    # Event handlers
-    run_button.click(
         fn=infer,
         inputs=[
-            input_images, seed, randomize_seed,
-            true_guidance_scale, num_inference_steps,
-            height, width
         ],
         outputs=[result, seed, use_output_btn],
-        show_progress=True
     )
     use_output_btn.click(
-        fn=use_output_as_input,
-        inputs=[result],
         outputs=[input_images]
     )
-# Launch configuration
 if __name__ == "__main__":
-    demo.launch(
-        server_name="0.0.0.0",
-        server_port=7860,
-        share=False,
-        show_error=True,
-        quiet=False
-    )

 import random
 import torch
 import spaces
 from PIL import Image
+from diffusers import FlowMatchEulerDiscreteScheduler
+from optimization import optimize_pipeline_
+from qwenimage.pipeline_qwenimage_edit_plus import QwenImageEditPlusPipeline
+from qwenimage.transformer_qwenimage import QwenImageTransformer2DModel
+from qwenimage.qwen_fa3_processor import QwenDoubleStreamAttnProcessorFA3
+from huggingface_hub import InferenceClient
 import math
+import os
+import base64
+from io import BytesIO
+import json
 import logging
+logging.getLogger("transformers").setLevel(logging.ERROR)
+logging.getLogger("diffusers").setLevel(logging.ERROR)
+SYSTEM_PROMPT = '''
+# Edit Instruction Rewriter
+You are a professional edit instruction rewriter. Your task is to generate a precise, concise, and visually achievable professional-level edit instruction based on the user-provided instruction and the image to be edited.
+Please strictly follow the rewriting rules below:
+## 1. General Principles
+- Keep the rewritten prompt **concise and comprehensive**. Avoid overly long sentences and unnecessary descriptive language.
+- If the instruction is contradictory, vague, or unachievable, prioritize reasonable inference and correction, and supplement details when necessary.
+- Keep the main part of the original instruction unchanged, only enhancing its clarity, rationality, and visual feasibility.
+- All added objects or modifications must align with the logic and style of the scene in the input images.
+- If multiple sub-images are to be generated, describe the content of each sub-image individually.
+## 2. Task-Type Handling Rules
+### 1. Add, Delete, Replace Tasks
+- If the instruction is clear (already includes task type, target entity, position, quantity, attributes), preserve the original intent and only refine the grammar.
+- If the description is vague, supplement with minimal but sufficient details (category, color, size, orientation, position, etc.). For example:
+    > Original: "Add an animal"
+    > Rewritten: "Add a light-gray cat in the bottom-right corner, sitting and facing the camera"
+- Remove meaningless instructions: e.g., "Add 0 objects" should be ignored or flagged as invalid.
+- For replacement tasks, specify "Replace Y with X" and briefly describe the key visual features of X.
+### 2. Text Editing Tasks
+- All text content must be enclosed in English double quotes `" "`. Keep the original language of the text, and keep the capitalization.
+- Both adding new text and replacing existing text are text replacement tasks, For example:
+    - Replace "xx" to "yy"
+    - Replace the mask / bounding box to "yy"
+    - Replace the visual object to "yy"
+- Specify text position, color, and layout only if user has required.
+- If font is specified, keep the original language of the font.
+### 3. Human Editing Tasks
+- Make the smallest changes to the given user's prompt.
+- If changes to background, action, expression, camera shot, or ambient lighting are required, please list each modification individually.
+- **Edits to makeup or facial features / expression must be subtle, not exaggerated, and must preserve the subject's identity consistency.**
+    > Original: "Add eyebrows to the face"
+    > Rewritten: "Slightly thicken the person's eyebrows with little change, look natural."
+### 4. Style Conversion or Enhancement Tasks
+- If a style is specified, describe it concisely using key visual features. For example:
+    > Original: "Disco style"
+    > Rewritten: "1970s disco style: flashing lights, disco ball, mirrored walls, vibrant colors"
+- For style reference, analyze the original image and extract key characteristics (color, composition, texture, lighting, artistic style, etc.), integrating them into the instruction.
+- **Colorization tasks (including old photo restoration) must use the fixed template:**
+  "Restore and colorize the old photo."
+- Clearly specify the object to be modified. For example:
+    > Original: Modify the subject in Picture 1 to match the style of Picture 2.
+    > Rewritten: Change the girl in Picture 1 to the ink-wash style of Picture 2 — rendered in black-and-white watercolor with soft color transitions.
+### 5. Material Replacement
+- Clearly specify the object and the material. For example: "Change the material of the apple to papercut style."
+- For text material replacement, use the fixed template:
+    "Change the material of text "xxxx" to laser style"
+### 6. Logo/Pattern Editing
+- Material replacement should preserve the original shape and structure as much as possible. For example:
+   > Original: "Convert to sapphire material"
+   > Rewritten: "Convert the main subject in the image to sapphire material, preserving similar shape and structure"
+- When migrating logos/patterns to new scenes, ensure shape and structure consistency. For example:
+   > Original: "Migrate the logo in the image to a new scene"
+   > Rewritten: "Migrate the logo in the image to a new scene, preserving similar shape and structure"
+### 7. Multi-Image Tasks
+- Rewritten prompts must clearly point out which image's element is being modified. For example:
+    > Original: "Replace the subject of picture 1 with the subject of picture 2"
+    > Rewritten: "Replace the girl of picture 1 with the boy of picture 2, keeping picture 2's background unchanged"
+- For stylization tasks, describe the reference image's style in the rewritten prompt, while preserving the visual content of the source image.
+## 3. Rationale and Logic Check
+- Resolve contradictory instructions: e.g., "Remove all trees but keep all trees" requires logical correction.
+- Supplement missing critical information: e.g., if position is unspecified, choose a reasonable area based on composition (near subject, blank space, center/edge, etc.).
+# Output Format Example
+```json
+{
+   "Rewritten": "..."
+}
+'''
+def encode_image(pil_image):
+    import io
+    buffered = io.BytesIO()
+    pil_image.save(buffered, format="PNG")
+    return base64.b64encode(buffered.getvalue()).decode("utf-8")
+dtype = torch.bfloat16
+device = "cuda" if torch.cuda.is_available() else "cpu"
+scheduler_config = {
+    "base_image_seq_len": 256,
+    "base_shift": math.log(3),
+    "invert_sigmas": False,
+    "max_image_seq_len": 8192,
+    "max_shift": math.log(3),
+    "num_train_timesteps": 1000,
+    "shift": 1.0,
+    "shift_terminal": None,
+    "stochastic_sampling": False,
+    "time_shift_type": "exponential",
+    "use_beta_sigmas": False,
+    "use_dynamic_shifting": True,
+    "use_exponential_sigmas": False,
+    "use_karras_sigmas": False,
+}
+scheduler = FlowMatchEulerDiscreteScheduler.from_config(scheduler_config)
+pipe = QwenImageEditPlusPipeline.from_pretrained(
+     "Qwen/Qwen-Image-Edit-2509",
+     scheduler=scheduler,
+     torch_dtype=dtype
+).to(device)
+pipe.load_lora_weights(
+    "lightx2v/Qwen-Image-Lightning",
+    weight_name="Qwen-Image-Edit-2509/Qwen-Image-Edit-2509-Lightning-8steps-V1.0-bf16.safetensors"
+)
+pipe.fuse_lora()
+pipe.transformer.__class__ = QwenImageTransformer2DModel
+pipe.transformer.set_attn_processor(QwenDoubleStreamAttnProcessorFA3())
+optimize_pipeline_(pipe, image=[Image.new("RGB", (1024, 1024)), Image.new("RGB", (1024, 1024))], prompt="prompt")
+MAX_SEED = np.iinfo(np.int32).max
+def use_output_as_input(output_images):
+    if output_images is None or len(output_images) == 0:
+        return []
+    return output_images
 @spaces.GPU()
 def infer(
+    images,
+    prompt,
+    seed=42,
+    randomize_seed=False,
+    true_guidance_scale=1.0,
+    num_inference_steps=8,
+    height=None,
+    width=None,
+    rewrite_prompt=True,
+    num_images_per_prompt=1,
     progress=gr.Progress(track_tqdm=True),
 ):
+    negative_prompt = " "
     if randomize_seed:
         seed = random.randint(0, MAX_SEED)
+    generator = torch.Generator(device=device).manual_seed(seed)
     pil_images = []
     if images is not None:
         for item in images:
             try:
+                if isinstance(item[0], Image.Image):
+                    pil_images.append(item[0].convert("RGB"))
+                elif isinstance(item[0], str):
+                    pil_images.append(Image.open(item[0]).convert("RGB"))
+                elif hasattr(item, "name"):
+                    pil_images.append(Image.open(item.name).convert("RGB"))
+            except Exception:
                 continue
+    if height == 256 and width == 256:
+        height, width = None, None
+    prompt = (
+        "Remove acne marks and black spots. "
+    )
+    image = pipe(
+        image=pil_images if len(pil_images) > 0 else None,
+        prompt=prompt,
+        height=height,
+        width=width,
+        negative_prompt=negative_prompt,
+        num_inference_steps=num_inference_steps,
+        generator=generator,
+        true_cfg_scale=true_guidance_scale,
+        num_images_per_prompt=num_images_per_prompt,
+    ).images
+    return image, seed, gr.update(visible=True)
+examples = []
 css = """
 #col-container {
     margin: 0 auto;
+    max-width: 1024px;
 }
 #logo-title {
     text-align: center;
 }
 #logo-title img {
+    width: 400px;
 }
+#edit_text{margin-top: -62px !important}
 """
+with gr.Blocks(css=css) as demo:
     with gr.Column(elem_id="col-container"):
         gr.HTML("""
         <div id="logo-title">
+            <h2 style="font-style: italic;color: #5b47d1;margin-top: -27px !important;margin-left: 96px">[Plus] Fast, 8-steps with Lightning LoRA</h2>
         </div>
         """)
         gr.Markdown("""
+        [Learn more](https://github.com/QwenLM/Qwen-Image) about the Qwen-Image series.
+        This demo uses the new [Qwen-Image-Edit-2509](https://huggingface.co/Qwen/Qwen-Image-Edit-2509) with the [Qwen-Image-Lightning v2](https://huggingface.co/lightx2v/Qwen-Image-Lightning) LoRA + [AoT compilation & FA3](https://huggingface.co/blog/zerogpu-aoti) for accelerated inference.
+        Try on [Qwen Chat](https://chat.qwen.ai/), or [download model](https://huggingface.co/Qwen/Qwen-Image-Edit-2509) to run locally with ComfyUI or diffusers.
         """)
         with gr.Row():
             with gr.Column():
+                input_images = gr.Gallery(label="Input Images",
+                                          show_label=False,
+                                          type="pil",
+                                          interactive=True)
             with gr.Column():
+                result = gr.Gallery(label="Result", show_label=False, type="pil")
+                use_output_btn = gr.Button("↗️ Use as input", variant="secondary", size="sm", visible=False)
+        with gr.Row():
+            prompt = gr.Text(
+                    label="Prompt",
+                    show_label=False,
+                    placeholder="describe the edit instruction",
+                    container=False,
+            )
+            run_button = gr.Button("Edit!", variant="primary")
+        with gr.Accordion("Advanced Settings", open=False):
             seed = gr.Slider(
+                label="Seed",
+                minimum=0,
+                maximum=MAX_SEED,
+                step=1,
+                value=0,
             )
+            randomize_seed = gr.Checkbox(label="Randomize seed", value=True)
             with gr.Row():
                 true_guidance_scale = gr.Slider(
+                    label="True guidance scale",
                     minimum=1.0,
+                    maximum=10.0,
                     step=0.1,
+                    value=1.0
                 )
                 num_inference_steps = gr.Slider(
+                    label="Number of inference steps",
+                    minimum=1,
+                    maximum=40,
                     step=1,
+                    value=8,
                 )
                 height = gr.Slider(
+                    label="Height",
+                    minimum=256,
+                    maximum=2048,
+                    step=8,
+                    value=None,
                 )
                 width = gr.Slider(
+                    label="Width",
+                    minimum=256,
+                    maximum=2048,
+                    step=8,
+                    value=None,
                 )
+                rewrite_prompt = gr.Checkbox(label="Rewrite prompt", value=True)
+    gr.on(
+        triggers=[run_button.click, prompt.submit],
         fn=infer,
         inputs=[
+            input_images,
+            prompt,
+            seed,
+            randomize_seed,
+            true_guidance_scale,
+            num_inference_steps,
+            height,
+            width,
+            rewrite_prompt,
         ],
         outputs=[result, seed, use_output_btn],
     )
     use_output_btn.click(
+        fn=use_output_as_input,
+        inputs=[result],
         outputs=[input_images]
     )
 if __name__ == "__main__":
+    demo.launch()