Qwen-Image-Edit_Fast-Presets

Running on Zero

App Files Files Community

LPX55 commited on Aug 18

Commit

ac7c7b8

verified ·

1 Parent(s): 60bac87

Create app_fast.py

Browse files

Files changed (1) hide show

app_fast.py +198 -0

app_fast.py ADDED Viewed

	@@ -0,0 +1,198 @@

+import gradio as gr
+import numpy as np
+import random
+import torch
+import spaces
+from PIL import Image
+from diffusers import QwenImageEditPipeline
+import os
+import base64
+import json
+from huggingface_hub import InferenceClient
+def get_caption_language(prompt):
+    """Detects if the prompt contains Chinese characters."""
+    ranges = [
+        ('\u4e00', '\u9fff'),  # CJK Unified Ideographs
+    ]
+    for char in prompt:
+        if any(start <= char <= end for start, end in ranges):
+            return 'zh'
+    return 'en'
+def polish_prompt(original_prompt, system_prompt):
+    """
+    Rewrites the prompt using a Hugging Face InferenceClient.
+    """
+    api_key = os.environ.get("HF_TOKEN")
+    if not api_key:
+        raise EnvironmentError("HF_TOKEN is not set. Please set it in your environment.")
+    client = InferenceClient(
+        provider="cerebras",
+        api_key=api_key,
+    )
+    messages = [
+        {"role": "system", "content": system_prompt},
+        {"role": "user", "content": original_prompt}
+    ]
+    try:
+        completion = client.chat.completions.create(
+            model="Qwen/Qwen3-235B-A22B-Instruct-2507",
+            messages=messages,
+            max_tokens=2000,
+        )
+        polished_prompt = completion.choices[0].message.content
+        polished_prompt = polished_prompt.strip().replace("\n", " ")
+        return polished_prompt
+    except Exception as e:
+        print(f"Error during Hugging Face API call: {e}")
+        return original_prompt
+SYSTEM_PROMPT_EDIT = '''
+# Edit Instruction Rewriter
+You are a professional edit instruction rewriter. Your task is to generate a precise, concise, and visually achievable instruction based on the user's intent and the input image.
+## 1. General Principles
+- Keep the rewritten instruction **concise** and clear.
+- Avoid contradictions, vagueness, or unachievable instructions.
+- Maintain the core logic of the original instruction; only enhance clarity and feasibility.
+- Ensure new added elements or modifications align with the image's original context and art style.
+## 2. Task Types
+### Add, Delete, Replace:
+- When the input is detailed, only refine grammar and clarity.
+- For vague instructions, infer minimal but sufficient details.
+- For replacement, use the format: `"Replace X with Y"`.
+### Text Editing (e.g., text replacement):
+- Enclose text content in quotes, e.g., `Replace "abc" with "xyz"`.
+- Preserving the original structure and language—**do not translate** or alter style.
+### Human Editing (e.g., change a person’s face/hair):
+- Preserve core visual identity (gender, ethnic features).
+- Describe expressions in subtle and natural terms.
+- Maintain key clothing or styling details unless explicitly replaced.
+### Style Transformation:
+- If a style is specified, e.g., `Disco style`, rewrite it to encapsulate the essential visual traits.
+- Use a fixed template for **coloring/restoration**:
+  `"Restore old photograph, remove scratches, reduce noise, enhance details, high resolution, realistic, natural skin tones, clear facial features, no distortion, vintage photo restoration"`
+  if applicable.
+## 4. Output Format
+Please provide the rewritten instruction in a clean `json` format as:
+{
+  "Rewritten": "..."
+}
+'''
+dtype = torch.bfloat16
+device = "cuda" if torch.cuda.is_available() else "cpu"
+pipe = QwenImageEditPipeline.from_pretrained("Qwen/Qwen-Image-Edit", torch_dtype=dtype).to(device)
+pipe.load_lora_weights(
+    "lightx2v/Qwen-Image-Edit-Lightning",
+    weight_name="Qwen-Image-Edit-Lightning-8steps-V1.1.safetensors"
+)
+pipe.fuse_lora()
+@spaces.GPU(duration=60)
+def infer(
+    image,
+    prompt,
+    seed=42,
+    randomize_seed=False,
+    true_guidance_scale=1.0,
+    num_inference_steps=8,
+    rewrite_prompt=False,
+    num_images_per_prompt=1,
+    progress=gr.Progress(track_tqdm=True),
+):
+    """
+    Uses Qwen-Image-Edit with optional prompt rewriting before execution.
+    """
+    negative_prompt = " "
+    if randomize_seed:
+        seed = random.randint(0, MAX_SEED)
+    generator = torch.Generator(device=device).manual_seed(seed)
+    print(f"Calling pipeline with prompt: '{prompt}'")
+    print(f"Negative Prompt: '{negative_prompt}'")
+    print(f"Seed: {seed}, Steps: {num_inference_steps}, Guidance: {true_guidance_scale}")
+    if rewrite_prompt:
+        lang = get_caption_language(prompt)
+        system_prompt = SYSTEM_PROMPT_EDIT
+        polished_prompt = polish_prompt(prompt, system_prompt)
+        print(f"Rewritten Prompt: {polished_prompt}")
+        prompt = polished_prompt
+    edited_images = pipe(
+        image,
+        prompt=prompt,
+        negative_prompt=negative_prompt,
+        num_inference_steps=num_inference_steps,
+        generator=generator,
+        true_cfg_scale=true_guidance_scale,
+        num_images_per_prompt=num_images_per_prompt,
+    ).images
+    return edited_images, seed
+MAX_SEED = np.iinfo(np.int32).max
+examples = [
+    "Replace the cat with a friendly golden retriever. Make it look happier, and add more background details.",
+    "Add text 'Qwen - AI for image editing' in Chinese at the bottom center with a small shadow.",
+    "Change the style to 1970s vintage, add old photo effect, restore any scratches on the wall or window.",
+    "Remove the blue sky and replace it with a dark night cityscape.",
+    """Replace "Qwen" with "通义" in the Image. Ensure Chinese font is used for "通义" and position it to the top left with a light heading-style font."""
+]
+with gr.Blocks() as demo:
+    gr.Markdown("# Qwen-Image-Edit with Prompt Enhancement and Fast Inference")
+    gr.Markdown("Try editing images with multi-modal instruction polishing.")
+    with gr.Column():
+        input_image = gr.Image(label="Input Image", type="pil")
+        prompt = gr.Text(label="Edit Instruction", placeholder="e.g. Add a dog to the right side.")
+        run_button = gr.Button("Edit", variant="primary")
+        result = gr.Gallery(label="Output Images", show_label=False)
+    with gr.Accordion("Advanced Settings", open=False):
+        seed = gr.Slider(
+            label="Seed",
+            minimum=0,
+            maximum=MAX_SEED,
+            step=1,
+            value=0
+        )
+        randomize_seed = gr.Checkbox(label="Randomize Seed", value=True)
+        with gr.Row():
+            true_guidance_scale = gr.Slider(
+                label="True Guidance Scale",
+                minimum=1.0,
+                maximum=5.0,
+                step=0.1,
+                value=4.0
+            )
+            num_inference_steps = gr.Slider(
+                label="Inference Steps (Fast 8-step mode)",
+                minimum=4,
+                maximum=8,
+                step=1,
+                value=8
+            )
+            num_images_per_prompt = gr.Slider(
+                label="Images per Prompt",
+                minimum=1,
+                maximum=4,
+                step=1,
+                value=1
+            )
+            rewrite_prompt = gr.Checkbox(label="Use Prompt Rewriter", value=False, visible=True)
+    gr.on(
+        triggers=[run_button.click, prompt.submit],
+        fn=infer,
+        inputs=[
+            input_image,
+            prompt,
+            seed,
+            randomize_seed,
+            true_guidance_scale,
+            num_inference_steps,
+            rewrite_prompt,
+            num_images_per_prompt
+        ],
+        outputs=[result, seed],
+    )
+if __name__ == "__main__":
+    demo.launch()