LPX55 commited on
Commit
ac7c7b8
·
verified ·
1 Parent(s): 60bac87

Create app_fast.py

Browse files
Files changed (1) hide show
  1. app_fast.py +198 -0
app_fast.py ADDED
@@ -0,0 +1,198 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import numpy as np
3
+ import random
4
+ import torch
5
+ import spaces
6
+ from PIL import Image
7
+ from diffusers import QwenImageEditPipeline
8
+ import os
9
+ import base64
10
+ import json
11
+ from huggingface_hub import InferenceClient
12
+
13
+ def get_caption_language(prompt):
14
+ """Detects if the prompt contains Chinese characters."""
15
+ ranges = [
16
+ ('\u4e00', '\u9fff'), # CJK Unified Ideographs
17
+ ]
18
+ for char in prompt:
19
+ if any(start <= char <= end for start, end in ranges):
20
+ return 'zh'
21
+ return 'en'
22
+
23
+ def polish_prompt(original_prompt, system_prompt):
24
+ """
25
+ Rewrites the prompt using a Hugging Face InferenceClient.
26
+ """
27
+ api_key = os.environ.get("HF_TOKEN")
28
+ if not api_key:
29
+ raise EnvironmentError("HF_TOKEN is not set. Please set it in your environment.")
30
+ client = InferenceClient(
31
+ provider="cerebras",
32
+ api_key=api_key,
33
+ )
34
+ messages = [
35
+ {"role": "system", "content": system_prompt},
36
+ {"role": "user", "content": original_prompt}
37
+ ]
38
+ try:
39
+ completion = client.chat.completions.create(
40
+ model="Qwen/Qwen3-235B-A22B-Instruct-2507",
41
+ messages=messages,
42
+ max_tokens=2000,
43
+ )
44
+ polished_prompt = completion.choices[0].message.content
45
+ polished_prompt = polished_prompt.strip().replace("\n", " ")
46
+ return polished_prompt
47
+ except Exception as e:
48
+ print(f"Error during Hugging Face API call: {e}")
49
+ return original_prompt
50
+
51
+ SYSTEM_PROMPT_EDIT = '''
52
+ # Edit Instruction Rewriter
53
+ You are a professional edit instruction rewriter. Your task is to generate a precise, concise, and visually achievable instruction based on the user's intent and the input image.
54
+ ## 1. General Principles
55
+ - Keep the rewritten instruction **concise** and clear.
56
+ - Avoid contradictions, vagueness, or unachievable instructions.
57
+ - Maintain the core logic of the original instruction; only enhance clarity and feasibility.
58
+ - Ensure new added elements or modifications align with the image's original context and art style.
59
+ ## 2. Task Types
60
+ ### Add, Delete, Replace:
61
+ - When the input is detailed, only refine grammar and clarity.
62
+ - For vague instructions, infer minimal but sufficient details.
63
+ - For replacement, use the format: `"Replace X with Y"`.
64
+ ### Text Editing (e.g., text replacement):
65
+ - Enclose text content in quotes, e.g., `Replace "abc" with "xyz"`.
66
+ - Preserving the original structure and language—**do not translate** or alter style.
67
+ ### Human Editing (e.g., change a person’s face/hair):
68
+ - Preserve core visual identity (gender, ethnic features).
69
+ - Describe expressions in subtle and natural terms.
70
+ - Maintain key clothing or styling details unless explicitly replaced.
71
+ ### Style Transformation:
72
+ - If a style is specified, e.g., `Disco style`, rewrite it to encapsulate the essential visual traits.
73
+ - Use a fixed template for **coloring/restoration**:
74
+ `"Restore old photograph, remove scratches, reduce noise, enhance details, high resolution, realistic, natural skin tones, clear facial features, no distortion, vintage photo restoration"`
75
+ if applicable.
76
+ ## 4. Output Format
77
+ Please provide the rewritten instruction in a clean `json` format as:
78
+ {
79
+ "Rewritten": "..."
80
+ }
81
+ '''
82
+
83
+ dtype = torch.bfloat16
84
+ device = "cuda" if torch.cuda.is_available() else "cpu"
85
+ pipe = QwenImageEditPipeline.from_pretrained("Qwen/Qwen-Image-Edit", torch_dtype=dtype).to(device)
86
+ pipe.load_lora_weights(
87
+ "lightx2v/Qwen-Image-Edit-Lightning",
88
+ weight_name="Qwen-Image-Edit-Lightning-8steps-V1.1.safetensors"
89
+ )
90
+ pipe.fuse_lora()
91
+
92
+ @spaces.GPU(duration=60)
93
+ def infer(
94
+ image,
95
+ prompt,
96
+ seed=42,
97
+ randomize_seed=False,
98
+ true_guidance_scale=1.0,
99
+ num_inference_steps=8,
100
+ rewrite_prompt=False,
101
+ num_images_per_prompt=1,
102
+ progress=gr.Progress(track_tqdm=True),
103
+ ):
104
+ """
105
+ Uses Qwen-Image-Edit with optional prompt rewriting before execution.
106
+ """
107
+ negative_prompt = " "
108
+ if randomize_seed:
109
+ seed = random.randint(0, MAX_SEED)
110
+ generator = torch.Generator(device=device).manual_seed(seed)
111
+ print(f"Calling pipeline with prompt: '{prompt}'")
112
+ print(f"Negative Prompt: '{negative_prompt}'")
113
+ print(f"Seed: {seed}, Steps: {num_inference_steps}, Guidance: {true_guidance_scale}")
114
+ if rewrite_prompt:
115
+ lang = get_caption_language(prompt)
116
+ system_prompt = SYSTEM_PROMPT_EDIT
117
+ polished_prompt = polish_prompt(prompt, system_prompt)
118
+ print(f"Rewritten Prompt: {polished_prompt}")
119
+ prompt = polished_prompt
120
+ edited_images = pipe(
121
+ image,
122
+ prompt=prompt,
123
+ negative_prompt=negative_prompt,
124
+ num_inference_steps=num_inference_steps,
125
+ generator=generator,
126
+ true_cfg_scale=true_guidance_scale,
127
+ num_images_per_prompt=num_images_per_prompt,
128
+ ).images
129
+ return edited_images, seed
130
+
131
+ MAX_SEED = np.iinfo(np.int32).max
132
+ examples = [
133
+ "Replace the cat with a friendly golden retriever. Make it look happier, and add more background details.",
134
+ "Add text 'Qwen - AI for image editing' in Chinese at the bottom center with a small shadow.",
135
+ "Change the style to 1970s vintage, add old photo effect, restore any scratches on the wall or window.",
136
+ "Remove the blue sky and replace it with a dark night cityscape.",
137
+ """Replace "Qwen" with "通义" in the Image. Ensure Chinese font is used for "通义" and position it to the top left with a light heading-style font."""
138
+ ]
139
+
140
+ with gr.Blocks() as demo:
141
+ gr.Markdown("# Qwen-Image-Edit with Prompt Enhancement and Fast Inference")
142
+ gr.Markdown("Try editing images with multi-modal instruction polishing.")
143
+ with gr.Column():
144
+ input_image = gr.Image(label="Input Image", type="pil")
145
+ prompt = gr.Text(label="Edit Instruction", placeholder="e.g. Add a dog to the right side.")
146
+ run_button = gr.Button("Edit", variant="primary")
147
+ result = gr.Gallery(label="Output Images", show_label=False)
148
+ with gr.Accordion("Advanced Settings", open=False):
149
+ seed = gr.Slider(
150
+ label="Seed",
151
+ minimum=0,
152
+ maximum=MAX_SEED,
153
+ step=1,
154
+ value=0
155
+ )
156
+ randomize_seed = gr.Checkbox(label="Randomize Seed", value=True)
157
+ with gr.Row():
158
+ true_guidance_scale = gr.Slider(
159
+ label="True Guidance Scale",
160
+ minimum=1.0,
161
+ maximum=5.0,
162
+ step=0.1,
163
+ value=4.0
164
+ )
165
+ num_inference_steps = gr.Slider(
166
+ label="Inference Steps (Fast 8-step mode)",
167
+ minimum=4,
168
+ maximum=8,
169
+ step=1,
170
+ value=8
171
+ )
172
+ num_images_per_prompt = gr.Slider(
173
+ label="Images per Prompt",
174
+ minimum=1,
175
+ maximum=4,
176
+ step=1,
177
+ value=1
178
+ )
179
+ rewrite_prompt = gr.Checkbox(label="Use Prompt Rewriter", value=False, visible=True)
180
+
181
+ gr.on(
182
+ triggers=[run_button.click, prompt.submit],
183
+ fn=infer,
184
+ inputs=[
185
+ input_image,
186
+ prompt,
187
+ seed,
188
+ randomize_seed,
189
+ true_guidance_scale,
190
+ num_inference_steps,
191
+ rewrite_prompt,
192
+ num_images_per_prompt
193
+ ],
194
+ outputs=[result, seed],
195
+ )
196
+
197
+ if __name__ == "__main__":
198
+ demo.launch()