lenML commited on
Commit
6da8d6f
·
verified ·
1 Parent(s): 2a5c8e8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +25 -225
app.py CHANGED
@@ -11,203 +11,8 @@ from diffusers import FlowMatchEulerDiscreteScheduler, QwenImageEditPlusPipeline
11
  # from qwenimage.transformer_qwenimage import QwenImageTransformer2DModel
12
  # from qwenimage.qwen_fa3_processor import QwenDoubleStreamAttnProcessorFA3
13
 
14
- from huggingface_hub import InferenceClient
15
  import math
16
 
17
- import os
18
- import base64
19
- from io import BytesIO
20
- import json
21
-
22
- SYSTEM_PROMPT = '''
23
- # Edit Instruction Rewriter
24
- You are a professional edit instruction rewriter. Your task is to generate a precise, concise, and visually achievable professional-level edit instruction based on the user-provided instruction and the image to be edited.
25
-
26
- Please strictly follow the rewriting rules below:
27
-
28
- ## 1. General Principles
29
- - Keep the rewritten prompt **concise and comprehensive**. Avoid overly long sentences and unnecessary descriptive language.
30
- - If the instruction is contradictory, vague, or unachievable, prioritize reasonable inference and correction, and supplement details when necessary.
31
- - Keep the main part of the original instruction unchanged, only enhancing its clarity, rationality, and visual feasibility.
32
- - All added objects or modifications must align with the logic and style of the scene in the input images.
33
- - If multiple sub-images are to be generated, describe the content of each sub-image individually.
34
-
35
- ## 2. Task-Type Handling Rules
36
-
37
- ### 1. Add, Delete, Replace Tasks
38
- - If the instruction is clear (already includes task type, target entity, position, quantity, attributes), preserve the original intent and only refine the grammar.
39
- - If the description is vague, supplement with minimal but sufficient details (category, color, size, orientation, position, etc.). For example:
40
- > Original: "Add an animal"
41
- > Rewritten: "Add a light-gray cat in the bottom-right corner, sitting and facing the camera"
42
- - Remove meaningless instructions: e.g., "Add 0 objects" should be ignored or flagged as invalid.
43
- - For replacement tasks, specify "Replace Y with X" and briefly describe the key visual features of X.
44
-
45
- ### 2. Text Editing Tasks
46
- - All text content must be enclosed in English double quotes `" "`. Keep the original language of the text, and keep the capitalization.
47
- - Both adding new text and replacing existing text are text replacement tasks, For example:
48
- - Replace "xx" to "yy"
49
- - Replace the mask / bounding box to "yy"
50
- - Replace the visual object to "yy"
51
- - Specify text position, color, and layout only if user has required.
52
- - If font is specified, keep the original language of the font.
53
-
54
- ### 3. Human Editing Tasks
55
- - Make the smallest changes to the given user's prompt.
56
- - If changes to background, action, expression, camera shot, or ambient lighting are required, please list each modification individually.
57
- - **Edits to makeup or facial features / expression must be subtle, not exaggerated, and must preserve the subject's identity consistency.**
58
- > Original: "Add eyebrows to the face"
59
- > Rewritten: "Slightly thicken the person's eyebrows with little change, look natural."
60
-
61
- ### 4. Style Conversion or Enhancement Tasks
62
- - If a style is specified, describe it concisely using key visual features. For example:
63
- > Original: "Disco style"
64
- > Rewritten: "1970s disco style: flashing lights, disco ball, mirrored walls, vibrant colors"
65
- - For style reference, analyze the original image and extract key characteristics (color, composition, texture, lighting, artistic style, etc.), integrating them into the instruction.
66
- - **Colorization tasks (including old photo restoration) must use the fixed template:**
67
- "Restore and colorize the old photo."
68
- - Clearly specify the object to be modified. For example:
69
- > Original: Modify the subject in Picture 1 to match the style of Picture 2.
70
- > Rewritten: Change the girl in Picture 1 to the ink-wash style of Picture 2 — rendered in black-and-white watercolor with soft color transitions.
71
-
72
- ### 5. Material Replacement
73
- - Clearly specify the object and the material. For example: "Change the material of the apple to papercut style."
74
- - For text material replacement, use the fixed template:
75
- "Change the material of text "xxxx" to laser style"
76
-
77
- ### 6. Logo/Pattern Editing
78
- - Material replacement should preserve the original shape and structure as much as possible. For example:
79
- > Original: "Convert to sapphire material"
80
- > Rewritten: "Convert the main subject in the image to sapphire material, preserving similar shape and structure"
81
- - When migrating logos/patterns to new scenes, ensure shape and structure consistency. For example:
82
- > Original: "Migrate the logo in the image to a new scene"
83
- > Rewritten: "Migrate the logo in the image to a new scene, preserving similar shape and structure"
84
-
85
- ### 7. Multi-Image Tasks
86
- - Rewritten prompts must clearly point out which image's element is being modified. For example:
87
- > Original: "Replace the subject of picture 1 with the subject of picture 2"
88
- > Rewritten: "Replace the girl of picture 1 with the boy of picture 2, keeping picture 2's background unchanged"
89
- - For stylization tasks, describe the reference image's style in the rewritten prompt, while preserving the visual content of the source image.
90
-
91
- ## 3. Rationale and Logic Check
92
- - Resolve contradictory instructions: e.g., "Remove all trees but keep all trees" requires logical correction.
93
- - Supplement missing critical information: e.g., if position is unspecified, choose a reasonable area based on composition (near subject, blank space, center/edge, etc.).
94
-
95
- # Output Format Example
96
- ```json
97
- {
98
- "Rewritten": "..."
99
- }
100
- '''
101
-
102
- def polish_prompt_hf(original_prompt, img_list):
103
- """
104
- Rewrites the prompt using a Hugging Face InferenceClient.
105
- Supports multiple images via img_list.
106
- """
107
- # Ensure HF_TOKEN is set
108
- api_key = os.environ.get("inference_providers")
109
- if not api_key:
110
- print("Warning: HF_TOKEN not set. Falling back to original prompt.")
111
- return original_prompt
112
- prompt = f"{SYSTEM_PROMPT}\n\nUser Input: {original_prompt}\n\nRewritten Prompt:"
113
- system_prompt = "you are a helpful assistant, you should provide useful answers to users."
114
- try:
115
- # Initialize the client
116
- client = InferenceClient(
117
- provider="nebius",
118
- api_key=api_key,
119
- )
120
-
121
- # Convert list of images to base64 data URLs
122
- image_urls = []
123
- if img_list is not None:
124
- # Ensure img_list is actually a list
125
- if not isinstance(img_list, list):
126
- img_list = [img_list]
127
-
128
- for img in img_list:
129
- image_url = None
130
- # If img is a PIL Image
131
- if hasattr(img, 'save'): # Check if it's a PIL Image
132
- buffered = BytesIO()
133
- img.save(buffered, format="PNG")
134
- img_base64 = base64.b64encode(buffered.getvalue()).decode('utf-8')
135
- image_url = f"data:image/png;base64,{img_base64}"
136
- # If img is already a file path (string)
137
- elif isinstance(img, str):
138
- with open(img, "rb") as image_file:
139
- img_base64 = base64.b64encode(image_file.read()).decode('utf-8')
140
- image_url = f"data:image/png;base64,{img_base64}"
141
- else:
142
- print(f"Warning: Unexpected image type: {type(img)}, skipping...")
143
- continue
144
-
145
- if image_url:
146
- image_urls.append(image_url)
147
-
148
- # Build the content array with text first, then all images
149
- content = [
150
- {
151
- "type": "text",
152
- "text": prompt
153
- }
154
- ]
155
-
156
- # Add all images to the content
157
- for image_url in image_urls:
158
- content.append({
159
- "type": "image_url",
160
- "image_url": {
161
- "url": image_url
162
- }
163
- })
164
-
165
- # Format the messages for the chat completions API
166
- messages = [
167
- {"role": "system", "content": system_prompt},
168
- {
169
- "role": "user",
170
- "content": content
171
- }
172
- ]
173
-
174
- # Call the API
175
- completion = client.chat.completions.create(
176
- model="Qwen/Qwen2.5-VL-72B-Instruct",
177
- messages=messages,
178
- )
179
-
180
- # Parse the response
181
- result = completion.choices[0].message.content
182
-
183
- # Try to extract JSON if present
184
- if '"Rewritten"' in result:
185
- try:
186
- # Clean up the response
187
- result = result.replace('```json', '').replace('```', '')
188
- result_json = json.loads(result)
189
- polished_prompt = result_json.get('Rewritten', result)
190
- except:
191
- polished_prompt = result
192
- else:
193
- polished_prompt = result
194
-
195
- polished_prompt = polished_prompt.strip().replace("\n", " ")
196
- return polished_prompt
197
-
198
- except Exception as e:
199
- print(f"Error during API call to Hugging Face: {e}")
200
- # Fallback to original prompt if enhancement fails
201
- return original_prompt
202
-
203
-
204
-
205
- def encode_image(pil_image):
206
- import io
207
- buffered = io.BytesIO()
208
- pil_image.save(buffered, format="PNG")
209
- return base64.b64encode(buffered.getvalue()).decode("utf-8")
210
-
211
  # --- Model Loading ---
212
  dtype = torch.bfloat16
213
  device = "cuda" if torch.cuda.is_available() else "cpu"
@@ -215,7 +20,7 @@ device = "cuda" if torch.cuda.is_available() else "cpu"
215
  # Scheduler configuration for Lightning
216
  scheduler_config = {
217
  "base_image_seq_len": 256,
218
- "base_shift": math.log(3),
219
  "invert_sigmas": False,
220
  "max_image_seq_len": 8192,
221
  "max_shift": math.log(3),
@@ -262,7 +67,9 @@ def use_output_as_input(output_images):
262
  # --- Main Inference Function (with hardcoded negative prompt) ---
263
  @spaces.GPU()
264
  def infer(
265
- images,
 
 
266
  prompt,
267
  seed=42,
268
  randomize_seed=False,
@@ -270,7 +77,6 @@ def infer(
270
  num_inference_steps=4,
271
  height=None,
272
  width=None,
273
- rewrite_prompt=True,
274
  num_images_per_prompt=1,
275
  progress=gr.Progress(track_tqdm=True),
276
  ):
@@ -303,28 +109,25 @@ def infer(
303
  # Set up the generator for reproducibility
304
  generator = torch.Generator(device=device).manual_seed(seed)
305
 
306
- # Load input images into PIL Images
307
  pil_images = []
308
- if images is not None:
309
- for item in images:
310
- try:
311
- if isinstance(item[0], Image.Image):
312
- pil_images.append(item[0].convert("RGB"))
313
- elif isinstance(item[0], str):
314
- pil_images.append(Image.open(item[0]).convert("RGB"))
315
- elif hasattr(item, "name"):
316
- pil_images.append(Image.open(item.name).convert("RGB"))
317
- except Exception:
318
- continue
319
 
320
  if height==256 and width==256:
321
  height, width = None, None
322
  print(f"Calling pipeline with prompt: '{prompt}'")
323
  print(f"Negative Prompt: '{negative_prompt}'")
324
  print(f"Seed: {seed}, Steps: {num_inference_steps}, Guidance: {true_guidance_scale}, Size: {width}x{height}")
325
- if rewrite_prompt and len(pil_images) > 0:
326
- prompt = polish_prompt_hf(prompt, pil_images)
327
- print(f"Rewritten Prompt: {prompt}")
328
 
329
 
330
  # Generate the image
@@ -375,15 +178,14 @@ with gr.Blocks(css=css) as demo:
375
  """)
376
  with gr.Row():
377
  with gr.Column():
378
- input_images = gr.Gallery(label="Input Images",
379
- show_label=False,
380
- type="pil",
381
- interactive=True)
382
 
383
  with gr.Column():
384
- result = gr.Gallery(label="Result", show_label=False, type="pil", interactive=False)
385
  # Add this button right after the result gallery - initially hidden
386
- use_output_btn = gr.Button("↗️ Use as input", variant="secondary", size="sm", visible=False)
387
 
388
  with gr.Row():
389
  prompt = gr.Text(
@@ -440,9 +242,6 @@ with gr.Blocks(css=css) as demo:
440
  step=8,
441
  value=None,
442
  )
443
-
444
-
445
- rewrite_prompt = gr.Checkbox(label="Rewrite prompt", value=True)
446
 
447
  # gr.Examples(examples=examples, inputs=[prompt], outputs=[result, seed], fn=infer, cache_examples=False)
448
 
@@ -450,7 +249,9 @@ with gr.Blocks(css=css) as demo:
450
  triggers=[run_button.click, prompt.submit],
451
  fn=infer,
452
  inputs=[
453
- input_images,
 
 
454
  prompt,
455
  seed,
456
  randomize_seed,
@@ -458,7 +259,6 @@ with gr.Blocks(css=css) as demo:
458
  num_inference_steps,
459
  height,
460
  width,
461
- rewrite_prompt,
462
  ],
463
  outputs=[result, seed, use_output_btn], # Added use_output_btn to outputs
464
  )
@@ -467,7 +267,7 @@ with gr.Blocks(css=css) as demo:
467
  use_output_btn.click(
468
  fn=use_output_as_input,
469
  inputs=[result],
470
- outputs=[input_images]
471
  )
472
 
473
  if __name__ == "__main__":
 
11
  # from qwenimage.transformer_qwenimage import QwenImageTransformer2DModel
12
  # from qwenimage.qwen_fa3_processor import QwenDoubleStreamAttnProcessorFA3
13
 
 
14
  import math
15
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
  # --- Model Loading ---
17
  dtype = torch.bfloat16
18
  device = "cuda" if torch.cuda.is_available() else "cpu"
 
20
  # Scheduler configuration for Lightning
21
  scheduler_config = {
22
  "base_image_seq_len": 256,
23
+ "base_shift": math.log(5),
24
  "invert_sigmas": False,
25
  "max_image_seq_len": 8192,
26
  "max_shift": math.log(3),
 
67
  # --- Main Inference Function (with hardcoded negative prompt) ---
68
  @spaces.GPU()
69
  def infer(
70
+ image_1,
71
+ image_2,
72
+ image_3,
73
  prompt,
74
  seed=42,
75
  randomize_seed=False,
 
77
  num_inference_steps=4,
78
  height=None,
79
  width=None,
 
80
  num_images_per_prompt=1,
81
  progress=gr.Progress(track_tqdm=True),
82
  ):
 
109
  # Set up the generator for reproducibility
110
  generator = torch.Generator(device=device).manual_seed(seed)
111
 
112
+ # Load input images into a list of PIL Images
113
  pil_images = []
114
+ for item in [image_1, image_2, image_3]:
115
+ if item is None: continue
116
+ try:
117
+ if isinstance(item[0], Image.Image):
118
+ pil_images.append(item[0].convert("RGB"))
119
+ elif isinstance(item[0], str):
120
+ pil_images.append(Image.open(item[0]).convert("RGB"))
121
+ elif hasattr(item, "name"):
122
+ pil_images.append(Image.open(item.name).convert("RGB"))
123
+ except Exception:
124
+ continue
125
 
126
  if height==256 and width==256:
127
  height, width = None, None
128
  print(f"Calling pipeline with prompt: '{prompt}'")
129
  print(f"Negative Prompt: '{negative_prompt}'")
130
  print(f"Seed: {seed}, Steps: {num_inference_steps}, Guidance: {true_guidance_scale}, Size: {width}x{height}")
 
 
 
131
 
132
 
133
  # Generate the image
 
178
  """)
179
  with gr.Row():
180
  with gr.Column():
181
+ image_1 = gr.Image(label="image 1", type="pil", interactive=True)
182
+ image_2 = gr.Image(label="image 2", type="pil", interactive=True)
183
+ image_3 = gr.Image(label="image 3", type="pil", interactive=True)
 
184
 
185
  with gr.Column():
186
+ result = gr.Image(label="Result", type="pil", interactive=False)
187
  # Add this button right after the result gallery - initially hidden
188
+ use_output_btn = gr.Button("↗️ Use as image 1", variant="secondary", size="sm", visible=False)
189
 
190
  with gr.Row():
191
  prompt = gr.Text(
 
242
  step=8,
243
  value=None,
244
  )
 
 
 
245
 
246
  # gr.Examples(examples=examples, inputs=[prompt], outputs=[result, seed], fn=infer, cache_examples=False)
247
 
 
249
  triggers=[run_button.click, prompt.submit],
250
  fn=infer,
251
  inputs=[
252
+ image_1,
253
+ image_2,
254
+ image_3,
255
  prompt,
256
  seed,
257
  randomize_seed,
 
259
  num_inference_steps,
260
  height,
261
  width,
 
262
  ],
263
  outputs=[result, seed, use_output_btn], # Added use_output_btn to outputs
264
  )
 
267
  use_output_btn.click(
268
  fn=use_output_as_input,
269
  inputs=[result],
270
+ outputs=[image_1]
271
  )
272
 
273
  if __name__ == "__main__":