alphabagibagi commited on
Commit
f33f9df
·
verified ·
1 Parent(s): b872f0d

Upload 6 files

Browse files
Files changed (6) hide show
  1. .gitattributes +35 -35
  2. .gitignore +1 -0
  3. README.md +13 -13
  4. app.py +741 -741
  5. optimization.py +77 -77
  6. requirements.txt +9 -9
.gitattributes CHANGED
@@ -1,35 +1,35 @@
1
- *.7z filter=lfs diff=lfs merge=lfs -text
2
- *.arrow filter=lfs diff=lfs merge=lfs -text
3
- *.bin filter=lfs diff=lfs merge=lfs -text
4
- *.bz2 filter=lfs diff=lfs merge=lfs -text
5
- *.ckpt filter=lfs diff=lfs merge=lfs -text
6
- *.ftz filter=lfs diff=lfs merge=lfs -text
7
- *.gz filter=lfs diff=lfs merge=lfs -text
8
- *.h5 filter=lfs diff=lfs merge=lfs -text
9
- *.joblib filter=lfs diff=lfs merge=lfs -text
10
- *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
- *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
- *.model filter=lfs diff=lfs merge=lfs -text
13
- *.msgpack filter=lfs diff=lfs merge=lfs -text
14
- *.npy filter=lfs diff=lfs merge=lfs -text
15
- *.npz filter=lfs diff=lfs merge=lfs -text
16
- *.onnx filter=lfs diff=lfs merge=lfs -text
17
- *.ot filter=lfs diff=lfs merge=lfs -text
18
- *.parquet filter=lfs diff=lfs merge=lfs -text
19
- *.pb filter=lfs diff=lfs merge=lfs -text
20
- *.pickle filter=lfs diff=lfs merge=lfs -text
21
- *.pkl filter=lfs diff=lfs merge=lfs -text
22
- *.pt filter=lfs diff=lfs merge=lfs -text
23
- *.pth filter=lfs diff=lfs merge=lfs -text
24
- *.rar filter=lfs diff=lfs merge=lfs -text
25
- *.safetensors filter=lfs diff=lfs merge=lfs -text
26
- saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
- *.tar.* filter=lfs diff=lfs merge=lfs -text
28
- *.tar filter=lfs diff=lfs merge=lfs -text
29
- *.tflite filter=lfs diff=lfs merge=lfs -text
30
- *.tgz filter=lfs diff=lfs merge=lfs -text
31
- *.wasm filter=lfs diff=lfs merge=lfs -text
32
- *.xz filter=lfs diff=lfs merge=lfs -text
33
- *.zip filter=lfs diff=lfs merge=lfs -text
34
- *.zst filter=lfs diff=lfs merge=lfs -text
35
- *tfevents* filter=lfs diff=lfs merge=lfs -text
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
.gitignore ADDED
@@ -0,0 +1 @@
 
 
1
+ *.pyc
README.md CHANGED
@@ -1,13 +1,13 @@
1
- ---
2
- title: Qwen
3
- emoji: 🔥
4
- colorFrom: gray
5
- colorTo: green
6
- sdk: gradio
7
- sdk_version: 6.1.0
8
- app_file: app.py
9
- pinned: false
10
- license: mit
11
- ---
12
-
13
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
+ ---
2
+ title: Qwen Image Edit Outpaint
3
+ emoji: 🌖
4
+ colorFrom: pink
5
+ colorTo: gray
6
+ sdk: gradio
7
+ sdk_version: 5.43.1
8
+ app_file: app.py
9
+ pinned: false
10
+ short_description: 'outpaint images with Qwen Image Edit '
11
+ ---
12
+
13
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py CHANGED
@@ -1,742 +1,742 @@
1
- import gradio as gr
2
- import numpy as np
3
- import random
4
- import torch
5
- import spaces
6
- import os
7
- import json
8
- import time
9
-
10
- from PIL import Image, ImageDraw
11
- import torch
12
- import math
13
-
14
- from optimization import optimize_pipeline_
15
- from qwenimage.pipeline_qwen_image_edit import QwenImageEditPipeline
16
- from qwenimage.transformer_qwenimage import QwenImageTransformer2DModel
17
- from qwenimage.qwen_fa3_processor import QwenDoubleStreamAttnProcessorFA3
18
-
19
- from huggingface_hub import InferenceClient
20
- import math
21
-
22
- # --- Prompt Enhancement using Hugging Face InferenceClient ---
23
- def polish_prompt_hf(original_prompt, system_prompt):
24
- """
25
- Rewrites the prompt using a Hugging Face InferenceClient.
26
- """
27
- # Ensure HF_TOKEN is set
28
- api_key = os.environ.get("HF_TOKEN")
29
- if not api_key:
30
- print("Warning: HF_TOKEN not set. Falling back to original prompt.")
31
- return original_prompt
32
-
33
- try:
34
- # Initialize the client
35
- client = InferenceClient(
36
- provider="cerebras",
37
- api_key=api_key,
38
- )
39
-
40
- # Format the messages for the chat completions API
41
- messages = [
42
- {"role": "system", "content": system_prompt},
43
- {"role": "user", "content": original_prompt}
44
- ]
45
-
46
- # Call the API
47
- completion = client.chat.completions.create(
48
- model="Qwen/Qwen3-235B-A22B-Instruct-2507",
49
- messages=messages,
50
- )
51
-
52
- # Parse the response
53
- result = completion.choices[0].message.content
54
-
55
- # Try to extract JSON if present
56
- if '{"Rewritten"' in result:
57
- try:
58
- # Clean up the response
59
- result = result.replace('```json', '').replace('```', '')
60
- result_json = json.loads(result)
61
- polished_prompt = result_json.get('Rewritten', result)
62
- except:
63
- polished_prompt = result
64
- else:
65
- polished_prompt = result
66
-
67
- polished_prompt = polished_prompt.strip().replace("\n", " ")
68
- return polished_prompt
69
-
70
- except Exception as e:
71
- print(f"Error during API call to Hugging Face: {e}")
72
- # Fallback to original prompt if enhancement fails
73
- return original_prompt
74
-
75
-
76
- def polish_prompt(prompt, img):
77
- """
78
- Main function to polish prompts for image editing using HF inference.
79
- """
80
- SYSTEM_PROMPT = '''
81
- # Edit Instruction Rewriter
82
- You are a professional edit instruction rewriter. Your task is to generate a precise, concise, and visually achievable professional-level edit instruction based on the user-provided instruction and the image to be edited.
83
-
84
- Please strictly follow the rewriting rules below:
85
-
86
- ## 1. General Principles
87
- - Keep the rewritten prompt **concise**. Avoid overly long sentences and reduce unnecessary descriptive language.
88
- - If the instruction is contradictory, vague, or unachievable, prioritize reasonable inference and correction, and supplement details when necessary.
89
- - Keep the core intention of the original instruction unchanged, only enhancing its clarity, rationality, and visual feasibility.
90
- - All added objects or modifications must align with the logic and style of the edited input image's overall scene.
91
-
92
- ## 2. Task Type Handling Rules
93
- ### 1. Add, Delete, Replace Tasks
94
- - If the instruction is clear (already includes task type, target entity, position, quantity, attributes), preserve the original intent and only refine the grammar.
95
- - If the description is vague, supplement with minimal but sufficient details (category, color, size, orientation, position, etc.). For example:
96
- > Original: "Add an animal"
97
- > Rewritten: "Add a light-gray cat in the bottom-right corner, sitting and facing the camera"
98
- - Remove meaningless instructions: e.g., "Add 0 objects" should be ignored or flagged as invalid.
99
- - For replacement tasks, specify "Replace Y with X" and briefly describe the key visual features of X.
100
-
101
- ### 2. Text Editing Tasks
102
- - All text content must be enclosed in English double quotes " ". Do not translate or alter the original language of the text, and do not change the capitalization.
103
- - **For text replacement tasks, always use the fixed template:**
104
- - Replace "xx" to "yy".
105
- - Replace the xx bounding box to "yy".
106
- - If the user does not specify text content, infer and add concise text based on the instruction and the input image's context. For example:
107
- > Original: "Add a line of text" (poster)
108
- > Rewritten: "Add text "LIMITED EDITION" at the top center with slight shadow"
109
- - Specify text position, color, and layout in a concise way.
110
-
111
- ### 3. Human Editing Tasks
112
- - Maintain the person's core visual consistency (ethnicity, gender, age, hairstyle, expression, outfit, etc.).
113
- - If modifying appearance (e.g., clothes, hairstyle), ensure the new element is consistent with the original style.
114
- - **For expression changes, they must be natural and subtle, never exaggerated.**
115
- - If deletion is not specifically emphasized, the most important subject in the original image (e.g., a person, an animal) should be preserved.
116
- - For background change tasks, emphasize maintaining subject consistency at first.
117
- - Example:
118
- > Original: "Change the person's hat"
119
- > Rewritten: "Replace the man's hat with a dark brown beret; keep smile, short hair, and gray jacket unchanged"
120
-
121
- ### 4. Style Transformation or Enhancement Tasks
122
- - If a style is specified, describe it concisely with key visual traits. For example:
123
- > Original: "Disco style"
124
- > Rewritten: "1970s disco: flashing lights, disco ball, mirrored walls, colorful tones"
125
- - If the instruction says "use reference style" or "keep current style," analyze the input image, extract main features (color, composition, texture, lighting, art style), and integrate them concisely.
126
- - **For coloring tasks, including restoring old photos, always use the fixed template:** "Restore old photograph, remove scratches, reduce noise, enhance details, high resolution, realistic, natural skin tones, clear facial features, no distortion, vintage photo restoration"
127
- - If there are other changes, place the style description at the end.
128
-
129
- ## 3. Rationality and Logic Checks
130
- - Resolve contradictory instructions: e.g., "Remove all trees but keep all trees" should be logically corrected.
131
- - Add missing key information: if position is unspecified, choose a reasonable area based on composition (near subject, empty space, center/edges).
132
-
133
- # Output Format
134
- Return only the rewritten instruction text directly, without JSON formatting or any other wrapper.
135
- '''
136
-
137
- # Note: We're not actually using the image in the HF version,
138
- # but keeping the interface consistent
139
- full_prompt = f"{SYSTEM_PROMPT}\n\nUser Input: {prompt}\n\nRewritten Prompt:"
140
-
141
- return polish_prompt_hf(full_prompt, SYSTEM_PROMPT)
142
-
143
- # --- Outpainting Functions ---
144
- def can_expand(source_width, source_height, target_width, target_height, alignment):
145
- """Checks if the image can be expanded based on the alignment."""
146
- if alignment in ("Left", "Right") and source_width >= target_width:
147
- return False
148
- if alignment in ("Top", "Bottom") and source_height >= target_height:
149
- return False
150
- return True
151
-
152
- def prepare_image_and_mask(image, width, height, overlap_percentage, resize_option, custom_resize_percentage, alignment, overlap_left, overlap_right, overlap_top, overlap_bottom):
153
- """Prepares the image with white margins and creates a mask for outpainting."""
154
- target_size = (width, height)
155
-
156
- # Calculate the scaling factor to fit the image within the target size
157
- scale_factor = min(target_size[0] / image.width, target_size[1] / image.height)
158
- new_width = int(image.width * scale_factor)
159
- new_height = int(image.height * scale_factor)
160
-
161
- # Resize the source image to fit within target size
162
- source = image.resize((new_width, new_height), Image.LANCZOS)
163
-
164
- # Apply resize option using percentages
165
- if resize_option == "Full":
166
- resize_percentage = 100
167
- elif resize_option == "50%":
168
- resize_percentage = 50
169
- elif resize_option == "33%":
170
- resize_percentage = 33
171
- elif resize_option == "25%":
172
- resize_percentage = 25
173
- else: # Custom
174
- resize_percentage = custom_resize_percentage
175
-
176
- # Calculate new dimensions based on percentage
177
- resize_factor = resize_percentage / 100
178
- new_width = int(source.width * resize_factor)
179
- new_height = int(source.height * resize_factor)
180
-
181
- # Ensure minimum size of 64 pixels
182
- new_width = max(new_width, 64)
183
- new_height = max(new_height, 64)
184
-
185
- # Resize the image
186
- source = source.resize((new_width, new_height), Image.LANCZOS)
187
-
188
- # Calculate the overlap in pixels based on the percentage
189
- overlap_x = int(new_width * (overlap_percentage / 100))
190
- overlap_y = int(new_height * (overlap_percentage / 100))
191
-
192
- # Ensure minimum overlap of 1 pixel
193
- overlap_x = max(overlap_x, 1)
194
- overlap_y = max(overlap_y, 1)
195
-
196
- # Calculate margins based on alignment
197
- if alignment == "Middle":
198
- margin_x = (target_size[0] - new_width) // 2
199
- margin_y = (target_size[1] - new_height) // 2
200
- elif alignment == "Left":
201
- margin_x = 0
202
- margin_y = (target_size[1] - new_height) // 2
203
- elif alignment == "Right":
204
- margin_x = target_size[0] - new_width
205
- margin_y = (target_size[1] - new_height) // 2
206
- elif alignment == "Top":
207
- margin_x = (target_size[0] - new_width) // 2
208
- margin_y = 0
209
- elif alignment == "Bottom":
210
- margin_x = (target_size[0] - new_width) // 2
211
- margin_y = target_size[1] - new_height
212
-
213
- # Adjust margins to eliminate gaps
214
- margin_x = max(0, min(margin_x, target_size[0] - new_width))
215
- margin_y = max(0, min(margin_y, target_size[1] - new_height))
216
-
217
- # Create a new background image with white margins and paste the resized source image
218
- background = Image.new('RGB', target_size, (255, 255, 255))
219
- background.paste(source, (margin_x, margin_y))
220
-
221
- # Create the mask
222
- mask = Image.new('L', target_size, 255)
223
- mask_draw = ImageDraw.Draw(mask)
224
-
225
- # Calculate overlap areas
226
- white_gaps_patch = 2
227
- left_overlap = margin_x + overlap_x if overlap_left else margin_x + white_gaps_patch
228
- right_overlap = margin_x + new_width - overlap_x if overlap_right else margin_x + new_width - white_gaps_patch
229
- top_overlap = margin_y + overlap_y if overlap_top else margin_y + white_gaps_patch
230
- bottom_overlap = margin_y + new_height - overlap_y if overlap_bottom else margin_y + new_height - white_gaps_patch
231
-
232
- if alignment == "Left":
233
- left_overlap = margin_x + overlap_x if overlap_left else margin_x
234
- elif alignment == "Right":
235
- right_overlap = margin_x + new_width - overlap_x if overlap_right else margin_x + new_width
236
- elif alignment == "Top":
237
- top_overlap = margin_y + overlap_y if overlap_top else margin_y
238
- elif alignment == "Bottom":
239
- bottom_overlap = margin_y + new_height - overlap_y if overlap_bottom else margin_y + new_height
240
-
241
- # Draw the mask
242
- mask_draw.rectangle([
243
- (left_overlap, top_overlap),
244
- (right_overlap, bottom_overlap)
245
- ], fill=0)
246
-
247
- return background, mask
248
-
249
- def preview_image_and_mask(image, width, height, overlap_percentage, resize_option, custom_resize_percentage, alignment, overlap_left, overlap_right, overlap_top, overlap_bottom):
250
- """Creates a preview showing the mask overlay."""
251
- background, mask = prepare_image_and_mask(image, width, height, overlap_percentage, resize_option, custom_resize_percentage, alignment, overlap_left, overlap_right, overlap_top, overlap_bottom)
252
-
253
- # Create a preview image showing the mask
254
- preview = background.copy().convert('RGBA')
255
-
256
- # Create a semi-transparent red overlay
257
- red_overlay = Image.new('RGBA', background.size, (255, 0, 0, 64)) # Reduced alpha to 64 (25% opacity)
258
-
259
- # Convert black pixels in the mask to semi-transparent red
260
- red_mask = Image.new('RGBA', background.size, (0, 0, 0, 0))
261
- red_mask.paste(red_overlay, (0, 0), mask)
262
-
263
- # Overlay the red mask on the background
264
- preview = Image.alpha_composite(preview, red_mask)
265
-
266
- return preview
267
-
268
- # --- Model Loading ---
269
- dtype = torch.bfloat16
270
- device = "cuda" if torch.cuda.is_available() else "cpu"
271
- pipe = QwenImageEditPipeline.from_pretrained("Qwen/Qwen-Image-Edit", torch_dtype=dtype).to(device)
272
- pipe.transformer.__class__ = QwenImageTransformer2DModel
273
- pipe.transformer.set_attn_processor(QwenDoubleStreamAttnProcessorFA3())
274
-
275
- # --- Ahead-of-time compilation ---
276
- optimize_pipeline_(pipe, image=Image.new("RGB", (1024, 1024)), prompt="prompt")
277
-
278
- # --- UI Constants and Helpers ---
279
- MAX_SEED = np.iinfo(np.int32).max
280
-
281
- def clear_result():
282
- """Clears the result image."""
283
- return gr.update(value=None)
284
-
285
- def update_history(new_image, history):
286
- """Updates the history gallery with the new image."""
287
- time.sleep(0.5) # Small delay to ensure image is ready
288
- if history is None:
289
- history = []
290
- if new_image is not None:
291
- # Convert to list if needed (Gradio sometimes returns tuples)
292
- if not isinstance(history, list):
293
- history = list(history) if history else []
294
- history.insert(0, new_image)
295
- # Keep only the last 20 images in history
296
- history = history[:20]
297
- return history
298
-
299
- def use_history_as_input(evt: gr.SelectData, history):
300
- """Sets the selected history image as the new input image."""
301
- if history and evt.index < len(history):
302
- return gr.update(value=history[evt.index][0])
303
- return gr.update()
304
-
305
- def use_output_as_input(output_image):
306
- """Sets the generated output as the new input image."""
307
- if output_image is not None:
308
- return gr.update(value=output_image)
309
- return gr.update()
310
-
311
- def preload_presets(target_ratio, ui_width, ui_height):
312
- """Updates the width and height sliders based on the selected aspect ratio."""
313
- if target_ratio == "9:16":
314
- changed_width = 720
315
- changed_height = 1280
316
- return changed_width, changed_height, gr.update()
317
- elif target_ratio == "16:9":
318
- changed_width = 1280
319
- changed_height = 720
320
- return changed_width, changed_height, gr.update()
321
- elif target_ratio == "1:1":
322
- changed_width = 1024
323
- changed_height = 1024
324
- return changed_width, changed_height, gr.update()
325
- elif target_ratio == "Custom":
326
- return ui_width, ui_height, gr.update(open=True)
327
-
328
- def select_the_right_preset(user_width, user_height):
329
- if user_width == 720 and user_height == 1280:
330
- return "9:16"
331
- elif user_width == 1280 and user_height == 720:
332
- return "16:9"
333
- elif user_width == 1024 and user_height == 1024:
334
- return "1:1"
335
- else:
336
- return "Custom"
337
-
338
- def toggle_custom_resize_slider(resize_option):
339
- return gr.update(visible=(resize_option == "Custom"))
340
-
341
- # --- Main Inference Function (with outpainting preprocessing) ---
342
- @spaces.GPU(duration=120)
343
- def infer(
344
- image,
345
- prompt,
346
- width,
347
- height,
348
- overlap_percentage,
349
- resize_option,
350
- custom_resize_percentage,
351
- alignment,
352
- overlap_left,
353
- overlap_right,
354
- overlap_top,
355
- overlap_bottom,
356
- seed=42,
357
- randomize_seed=False,
358
- true_guidance_scale=4.0,
359
- num_inference_steps=50,
360
- rewrite_prompt=True,
361
- progress=gr.Progress(track_tqdm=True),
362
- ):
363
- """
364
- Generates an outpainted image using the Qwen-Image-Edit pipeline.
365
- """
366
- # Hardcode the negative prompt as requested
367
- negative_prompt = " "
368
-
369
- if randomize_seed:
370
- seed = random.randint(0, MAX_SEED)
371
-
372
- # Set up the generator for reproducibility
373
- generator = torch.Generator(device=device).manual_seed(seed)
374
-
375
- print(f"Original Prompt: '{prompt}'")
376
- print(f"Negative Prompt: '{negative_prompt}'")
377
- print(f"Seed: {seed}, Steps: {num_inference_steps}")
378
-
379
- if rewrite_prompt:
380
- prompt = polish_prompt(prompt, image)
381
- print(f"Rewritten Prompt: {prompt}")
382
-
383
- # Prepare the image with white margins for outpainting
384
- outpaint_image, mask = prepare_image_and_mask(
385
- image, width, height, overlap_percentage,
386
- resize_option, custom_resize_percentage, alignment,
387
- overlap_left, overlap_right, overlap_top, overlap_bottom
388
- )
389
-
390
- # Check if expansion is possible
391
- if not can_expand(image.width, image.height, width, height, alignment):
392
- alignment = "Middle"
393
- outpaint_image, mask = prepare_image_and_mask(
394
- image, width, height, overlap_percentage,
395
- resize_option, custom_resize_percentage, "Middle",
396
- overlap_left, overlap_right, overlap_top, overlap_bottom
397
- )
398
-
399
- print(f"Outpaint dimensions: {outpaint_image.size}")
400
-
401
- # Generate the image with outpainting preprocessing
402
- result_image = pipe(
403
- outpaint_image, # Use the preprocessed image with white margins
404
- prompt="replace the white margins. "+ prompt,
405
- negative_prompt=negative_prompt,
406
- num_inference_steps=num_inference_steps,
407
- generator=generator,
408
- true_cfg_scale=true_guidance_scale,
409
- ).images[0]
410
-
411
- return result_image, seed
412
-
413
- # --- Examples and UI Layout ---
414
- # You can add examples here if you have sample images
415
- # examples = [
416
- # ["path/to/example1.jpg", "extend the landscape", 1280, 720, "Middle"],
417
- # ["path/to/example2.jpg", "add more sky", 1024, 1024, "Top"],
418
- # ]
419
-
420
- css = """
421
- #col-container {
422
- margin: 0 auto;
423
- max-width: 1024px;
424
- }
425
- #logo-title {
426
- text-align: center;
427
- }
428
- #logo-title img {
429
- width: 400px;
430
- }
431
- #edit_text{margin-top: -62px !important}
432
- .preview-container {
433
- border: 1px solid #e0e0e0;
434
- border-radius: 8px;
435
- padding: 10px;
436
- margin-top: 10px;
437
- }
438
- .gallery-container {
439
- margin-top: 20px;
440
- }
441
- """
442
-
443
- with gr.Blocks(css=css) as demo:
444
- with gr.Column(elem_id="col-container"):
445
- gr.HTML("""
446
- <div id="logo-title">
447
- <img src="https://qianwen-res.oss-cn-beijing.aliyuncs.com/Qwen-Image/qwen_image_edit_logo.png" alt="Qwen-Image Edit Logo" width="400" style="display: block; margin: 0 auto;">
448
- <h2 style="font-style: italic;color: #5b47d1;margin-top: -27px !important;margin-left: 133px;">Outpaint [Fast]</h2>
449
- </div>
450
- """)
451
- gr.Markdown("""
452
-
453
- Outpaint images with Qwen Image Edit. [Learn more](https://github.com/QwenLM/Qwen-Image) about the Qwen-Image series.
454
-
455
- This demo uses the [Qwen-Image-Lightning](https://huggingface.co/lightx2v/Qwen-Image-Lightning) LoRA with AoT compilation and FA3 for accelerated 8-step inference.
456
- Try on [Qwen Chat](https://chat.qwen.ai/), or [download model](https://huggingface.co/Qwen/Qwen-Image-Edit) to run locally with ComfyUI or diffusers.
457
- """)
458
-
459
-
460
- with gr.Row():
461
- with gr.Column():
462
- input_image = gr.Image(label="Input Image", type="pil")
463
-
464
- prompt = gr.Text(
465
- label="Prompt",
466
- info="Describe what should appear in the extended areas",
467
- value="extend the image naturally",
468
- )
469
-
470
- with gr.Row():
471
- target_ratio = gr.Radio(
472
- label="Target Ratio",
473
- choices=["9:16", "16:9", "1:1", "Custom"],
474
- value="16:9",
475
- scale=2
476
- )
477
- alignment_dropdown = gr.Dropdown(
478
- choices=["Middle", "Left", "Right", "Top", "Bottom"],
479
- value="Middle",
480
- label="Alignment"
481
- )
482
-
483
- run_button = gr.Button("run", variant="primary")
484
-
485
- with gr.Accordion("Outpainting Settings", open=False) as settings_panel:
486
- with gr.Row():
487
- width_slider = gr.Slider(
488
- label="Target Width",
489
- minimum=512,
490
- maximum=2048,
491
- step=8,
492
- value=1280,
493
- )
494
- height_slider = gr.Slider(
495
- label="Target Height",
496
- minimum=512,
497
- maximum=2048,
498
- step=8,
499
- value=720,
500
- )
501
-
502
- with gr.Group():
503
- overlap_percentage = gr.Slider(
504
- label="Mask overlap (%)",
505
- minimum=1,
506
- maximum=50,
507
- value=10,
508
- step=1,
509
- info="Controls the blending area between original and new content"
510
- )
511
-
512
- with gr.Row():
513
- overlap_top = gr.Checkbox(label="Overlap Top", value=True)
514
- overlap_right = gr.Checkbox(label="Overlap Right", value=True)
515
- with gr.Row():
516
- overlap_left = gr.Checkbox(label="Overlap Left", value=True)
517
- overlap_bottom = gr.Checkbox(label="Overlap Bottom", value=True)
518
-
519
- with gr.Row():
520
- resize_option = gr.Radio(
521
- label="Resize input image",
522
- choices=["Full", "50%", "33%", "25%", "Custom"],
523
- value="Full",
524
- info="How much of the target canvas the original image should occupy"
525
- )
526
- custom_resize_percentage = gr.Slider(
527
- label="Custom resize (%)",
528
- minimum=1,
529
- maximum=100,
530
- step=1,
531
- value=50,
532
- visible=False
533
- )
534
-
535
- preview_button = gr.Button("👁️ Preview alignment and mask", variant="secondary")
536
-
537
- with gr.Accordion("Advanced Settings", open=False):
538
- seed = gr.Slider(
539
- label="Seed",
540
- minimum=0,
541
- maximum=MAX_SEED,
542
- step=1,
543
- value=0,
544
- )
545
-
546
- randomize_seed = gr.Checkbox(label="Randomize seed", value=True)
547
-
548
- with gr.Row():
549
- true_guidance_scale = gr.Slider(
550
- label="True guidance scale",
551
- minimum=1.0,
552
- maximum=10.0,
553
- step=0.1,
554
- value=1.0
555
- )
556
-
557
- num_inference_steps = gr.Slider(
558
- label="Number of inference steps",
559
- minimum=1,
560
- maximum=28,
561
- step=1,
562
- value=8,
563
- )
564
-
565
- rewrite_prompt = gr.Checkbox(
566
- label="Enhance prompt (using HF Inference)",
567
- value=True
568
- )
569
-
570
- with gr.Column():
571
- result = gr.Image(label="Result", type="pil", interactive=False)
572
-
573
- use_as_input_button = gr.Button("🔄 Use as Input Image", visible=False, variant="secondary")
574
-
575
- with gr.Column(visible=False) as preview_container:
576
- preview_image = gr.Image(label="Preview (red area will be generated)", type="pil")
577
-
578
- gr.Markdown("---")
579
-
580
- with gr.Row():
581
- gr.Markdown("### 📜 History")
582
- clear_history_button = gr.Button("🗑️ Clear History", size="sm", variant="stop")
583
-
584
- history_gallery = gr.Gallery(
585
- label="Click any image to use as input",
586
- columns=4,
587
- rows=2,
588
- object_fit="contain",
589
- height="auto",
590
- interactive=False,
591
- show_label=True,
592
- elem_classes=["gallery-container"]
593
- )
594
-
595
- # Event handlers
596
- use_as_input_button.click(
597
- fn=use_output_as_input,
598
- inputs=[result],
599
- outputs=[input_image],
600
- show_api=False
601
- )
602
-
603
- history_gallery.select(
604
- fn=use_history_as_input,
605
- inputs=[history_gallery],
606
- outputs=[input_image],
607
- show_api=False
608
- )
609
-
610
- clear_history_button.click(
611
- fn=lambda: [],
612
- inputs=None,
613
- outputs=history_gallery,
614
- show_api=False
615
- )
616
-
617
- target_ratio.change(
618
- fn=preload_presets,
619
- inputs=[target_ratio, width_slider, height_slider],
620
- outputs=[width_slider, height_slider, settings_panel],
621
- queue=False,
622
- )
623
-
624
- width_slider.change(
625
- fn=select_the_right_preset,
626
- inputs=[width_slider, height_slider],
627
- outputs=[target_ratio],
628
- queue=False,
629
- )
630
-
631
- height_slider.change(
632
- fn=select_the_right_preset,
633
- inputs=[width_slider, height_slider],
634
- outputs=[target_ratio],
635
- queue=False,
636
- )
637
-
638
- resize_option.change(
639
- fn=toggle_custom_resize_slider,
640
- inputs=[resize_option],
641
- outputs=[custom_resize_percentage],
642
- queue=False,
643
- )
644
-
645
- preview_button.click(
646
- fn=lambda: gr.update(visible=True),
647
- inputs=None,
648
- outputs=[preview_container],
649
- queue=False,
650
- ).then(
651
- fn=preview_image_and_mask,
652
- inputs=[
653
- input_image, width_slider, height_slider, overlap_percentage,
654
- resize_option, custom_resize_percentage, alignment_dropdown,
655
- overlap_left, overlap_right, overlap_top, overlap_bottom
656
- ],
657
- outputs=preview_image,
658
- queue=False,
659
- )
660
-
661
- # Main generation pipeline with result clearing, history update, and button visibility
662
- run_button.click(
663
- fn=clear_result,
664
- inputs=None,
665
- outputs=result,
666
- show_api=False
667
- ).then(
668
- fn=infer,
669
- inputs=[
670
- input_image,
671
- prompt,
672
- width_slider,
673
- height_slider,
674
- overlap_percentage,
675
- resize_option,
676
- custom_resize_percentage,
677
- alignment_dropdown,
678
- overlap_left,
679
- overlap_right,
680
- overlap_top,
681
- overlap_bottom,
682
- seed,
683
- randomize_seed,
684
- true_guidance_scale,
685
- num_inference_steps,
686
- rewrite_prompt,
687
- ],
688
- outputs=[result, seed],
689
- ).then(
690
- fn=lambda: gr.update(visible=True),
691
- inputs=None,
692
- outputs=use_as_input_button,
693
- show_api=False
694
- ).then(
695
- fn=update_history,
696
- inputs=[result, history_gallery],
697
- outputs=history_gallery,
698
- show_api=False
699
- )
700
-
701
- # Also trigger on prompt submit
702
- prompt.submit(
703
- fn=clear_result,
704
- inputs=None,
705
- outputs=result,
706
- show_api=False
707
- ).then(
708
- fn=infer,
709
- inputs=[
710
- input_image,
711
- prompt,
712
- width_slider,
713
- height_slider,
714
- overlap_percentage,
715
- resize_option,
716
- custom_resize_percentage,
717
- alignment_dropdown,
718
- overlap_left,
719
- overlap_right,
720
- overlap_top,
721
- overlap_bottom,
722
- seed,
723
- randomize_seed,
724
- true_guidance_scale,
725
- num_inference_steps,
726
- rewrite_prompt,
727
- ],
728
- outputs=[result, seed],
729
- ).then(
730
- fn=lambda: gr.update(visible=True),
731
- inputs=None,
732
- outputs=use_as_input_button,
733
- show_api=False
734
- ).then(
735
- fn=update_history,
736
- inputs=[result, history_gallery],
737
- outputs=history_gallery,
738
- show_api=False
739
- )
740
-
741
- if __name__ == "__main__":
742
  demo.launch()
 
1
+ import gradio as gr
2
+ import numpy as np
3
+ import random
4
+ import torch
5
+ import spaces
6
+ import os
7
+ import json
8
+ import time
9
+
10
+ from PIL import Image, ImageDraw
11
+ import torch
12
+ import math
13
+
14
+ from optimization import optimize_pipeline_
15
+ from qwenimage.pipeline_qwen_image_edit import QwenImageEditPipeline
16
+ from qwenimage.transformer_qwenimage import QwenImageTransformer2DModel
17
+ from qwenimage.qwen_fa3_processor import QwenDoubleStreamAttnProcessorFA3
18
+
19
+ from huggingface_hub import InferenceClient
20
+ import math
21
+
22
+ # --- Prompt Enhancement using Hugging Face InferenceClient ---
23
+ def polish_prompt_hf(original_prompt, system_prompt):
24
+ """
25
+ Rewrites the prompt using a Hugging Face InferenceClient.
26
+ """
27
+ # Ensure HF_TOKEN is set
28
+ api_key = os.environ.get("HF_TOKEN")
29
+ if not api_key:
30
+ print("Warning: HF_TOKEN not set. Falling back to original prompt.")
31
+ return original_prompt
32
+
33
+ try:
34
+ # Initialize the client
35
+ client = InferenceClient(
36
+ provider="cerebras",
37
+ api_key=api_key,
38
+ )
39
+
40
+ # Format the messages for the chat completions API
41
+ messages = [
42
+ {"role": "system", "content": system_prompt},
43
+ {"role": "user", "content": original_prompt}
44
+ ]
45
+
46
+ # Call the API
47
+ completion = client.chat.completions.create(
48
+ model="Qwen/Qwen3-235B-A22B-Instruct-2507",
49
+ messages=messages,
50
+ )
51
+
52
+ # Parse the response
53
+ result = completion.choices[0].message.content
54
+
55
+ # Try to extract JSON if present
56
+ if '{"Rewritten"' in result:
57
+ try:
58
+ # Clean up the response
59
+ result = result.replace('```json', '').replace('```', '')
60
+ result_json = json.loads(result)
61
+ polished_prompt = result_json.get('Rewritten', result)
62
+ except:
63
+ polished_prompt = result
64
+ else:
65
+ polished_prompt = result
66
+
67
+ polished_prompt = polished_prompt.strip().replace("\n", " ")
68
+ return polished_prompt
69
+
70
+ except Exception as e:
71
+ print(f"Error during API call to Hugging Face: {e}")
72
+ # Fallback to original prompt if enhancement fails
73
+ return original_prompt
74
+
75
+
76
+ def polish_prompt(prompt, img):
77
+ """
78
+ Main function to polish prompts for image editing using HF inference.
79
+ """
80
+ SYSTEM_PROMPT = '''
81
+ # Edit Instruction Rewriter
82
+ You are a professional edit instruction rewriter. Your task is to generate a precise, concise, and visually achievable professional-level edit instruction based on the user-provided instruction and the image to be edited.
83
+
84
+ Please strictly follow the rewriting rules below:
85
+
86
+ ## 1. General Principles
87
+ - Keep the rewritten prompt **concise**. Avoid overly long sentences and reduce unnecessary descriptive language.
88
+ - If the instruction is contradictory, vague, or unachievable, prioritize reasonable inference and correction, and supplement details when necessary.
89
+ - Keep the core intention of the original instruction unchanged, only enhancing its clarity, rationality, and visual feasibility.
90
+ - All added objects or modifications must align with the logic and style of the edited input image's overall scene.
91
+
92
+ ## 2. Task Type Handling Rules
93
+ ### 1. Add, Delete, Replace Tasks
94
+ - If the instruction is clear (already includes task type, target entity, position, quantity, attributes), preserve the original intent and only refine the grammar.
95
+ - If the description is vague, supplement with minimal but sufficient details (category, color, size, orientation, position, etc.). For example:
96
+ > Original: "Add an animal"
97
+ > Rewritten: "Add a light-gray cat in the bottom-right corner, sitting and facing the camera"
98
+ - Remove meaningless instructions: e.g., "Add 0 objects" should be ignored or flagged as invalid.
99
+ - For replacement tasks, specify "Replace Y with X" and briefly describe the key visual features of X.
100
+
101
+ ### 2. Text Editing Tasks
102
+ - All text content must be enclosed in English double quotes " ". Do not translate or alter the original language of the text, and do not change the capitalization.
103
+ - **For text replacement tasks, always use the fixed template:**
104
+ - Replace "xx" to "yy".
105
+ - Replace the xx bounding box to "yy".
106
+ - If the user does not specify text content, infer and add concise text based on the instruction and the input image's context. For example:
107
+ > Original: "Add a line of text" (poster)
108
+ > Rewritten: "Add text "LIMITED EDITION" at the top center with slight shadow"
109
+ - Specify text position, color, and layout in a concise way.
110
+
111
+ ### 3. Human Editing Tasks
112
+ - Maintain the person's core visual consistency (ethnicity, gender, age, hairstyle, expression, outfit, etc.).
113
+ - If modifying appearance (e.g., clothes, hairstyle), ensure the new element is consistent with the original style.
114
+ - **For expression changes, they must be natural and subtle, never exaggerated.**
115
+ - If deletion is not specifically emphasized, the most important subject in the original image (e.g., a person, an animal) should be preserved.
116
+ - For background change tasks, emphasize maintaining subject consistency at first.
117
+ - Example:
118
+ > Original: "Change the person's hat"
119
+ > Rewritten: "Replace the man's hat with a dark brown beret; keep smile, short hair, and gray jacket unchanged"
120
+
121
+ ### 4. Style Transformation or Enhancement Tasks
122
+ - If a style is specified, describe it concisely with key visual traits. For example:
123
+ > Original: "Disco style"
124
+ > Rewritten: "1970s disco: flashing lights, disco ball, mirrored walls, colorful tones"
125
+ - If the instruction says "use reference style" or "keep current style," analyze the input image, extract main features (color, composition, texture, lighting, art style), and integrate them concisely.
126
+ - **For coloring tasks, including restoring old photos, always use the fixed template:** "Restore old photograph, remove scratches, reduce noise, enhance details, high resolution, realistic, natural skin tones, clear facial features, no distortion, vintage photo restoration"
127
+ - If there are other changes, place the style description at the end.
128
+
129
+ ## 3. Rationality and Logic Checks
130
+ - Resolve contradictory instructions: e.g., "Remove all trees but keep all trees" should be logically corrected.
131
+ - Add missing key information: if position is unspecified, choose a reasonable area based on composition (near subject, empty space, center/edges).
132
+
133
+ # Output Format
134
+ Return only the rewritten instruction text directly, without JSON formatting or any other wrapper.
135
+ '''
136
+
137
+ # Note: We're not actually using the image in the HF version,
138
+ # but keeping the interface consistent
139
+ full_prompt = f"{SYSTEM_PROMPT}\n\nUser Input: {prompt}\n\nRewritten Prompt:"
140
+
141
+ return polish_prompt_hf(full_prompt, SYSTEM_PROMPT)
142
+
143
+ # --- Outpainting Functions ---
144
+ def can_expand(source_width, source_height, target_width, target_height, alignment):
145
+ """Checks if the image can be expanded based on the alignment."""
146
+ if alignment in ("Left", "Right") and source_width >= target_width:
147
+ return False
148
+ if alignment in ("Top", "Bottom") and source_height >= target_height:
149
+ return False
150
+ return True
151
+
152
+ def prepare_image_and_mask(image, width, height, overlap_percentage, resize_option, custom_resize_percentage, alignment, overlap_left, overlap_right, overlap_top, overlap_bottom):
153
+ """Prepares the image with white margins and creates a mask for outpainting."""
154
+ target_size = (width, height)
155
+
156
+ # Calculate the scaling factor to fit the image within the target size
157
+ scale_factor = min(target_size[0] / image.width, target_size[1] / image.height)
158
+ new_width = int(image.width * scale_factor)
159
+ new_height = int(image.height * scale_factor)
160
+
161
+ # Resize the source image to fit within target size
162
+ source = image.resize((new_width, new_height), Image.LANCZOS)
163
+
164
+ # Apply resize option using percentages
165
+ if resize_option == "Full":
166
+ resize_percentage = 100
167
+ elif resize_option == "50%":
168
+ resize_percentage = 50
169
+ elif resize_option == "33%":
170
+ resize_percentage = 33
171
+ elif resize_option == "25%":
172
+ resize_percentage = 25
173
+ else: # Custom
174
+ resize_percentage = custom_resize_percentage
175
+
176
+ # Calculate new dimensions based on percentage
177
+ resize_factor = resize_percentage / 100
178
+ new_width = int(source.width * resize_factor)
179
+ new_height = int(source.height * resize_factor)
180
+
181
+ # Ensure minimum size of 64 pixels
182
+ new_width = max(new_width, 64)
183
+ new_height = max(new_height, 64)
184
+
185
+ # Resize the image
186
+ source = source.resize((new_width, new_height), Image.LANCZOS)
187
+
188
+ # Calculate the overlap in pixels based on the percentage
189
+ overlap_x = int(new_width * (overlap_percentage / 100))
190
+ overlap_y = int(new_height * (overlap_percentage / 100))
191
+
192
+ # Ensure minimum overlap of 1 pixel
193
+ overlap_x = max(overlap_x, 1)
194
+ overlap_y = max(overlap_y, 1)
195
+
196
+ # Calculate margins based on alignment
197
+ if alignment == "Middle":
198
+ margin_x = (target_size[0] - new_width) // 2
199
+ margin_y = (target_size[1] - new_height) // 2
200
+ elif alignment == "Left":
201
+ margin_x = 0
202
+ margin_y = (target_size[1] - new_height) // 2
203
+ elif alignment == "Right":
204
+ margin_x = target_size[0] - new_width
205
+ margin_y = (target_size[1] - new_height) // 2
206
+ elif alignment == "Top":
207
+ margin_x = (target_size[0] - new_width) // 2
208
+ margin_y = 0
209
+ elif alignment == "Bottom":
210
+ margin_x = (target_size[0] - new_width) // 2
211
+ margin_y = target_size[1] - new_height
212
+
213
+ # Adjust margins to eliminate gaps
214
+ margin_x = max(0, min(margin_x, target_size[0] - new_width))
215
+ margin_y = max(0, min(margin_y, target_size[1] - new_height))
216
+
217
+ # Create a new background image with white margins and paste the resized source image
218
+ background = Image.new('RGB', target_size, (255, 255, 255))
219
+ background.paste(source, (margin_x, margin_y))
220
+
221
+ # Create the mask
222
+ mask = Image.new('L', target_size, 255)
223
+ mask_draw = ImageDraw.Draw(mask)
224
+
225
+ # Calculate overlap areas
226
+ white_gaps_patch = 2
227
+ left_overlap = margin_x + overlap_x if overlap_left else margin_x + white_gaps_patch
228
+ right_overlap = margin_x + new_width - overlap_x if overlap_right else margin_x + new_width - white_gaps_patch
229
+ top_overlap = margin_y + overlap_y if overlap_top else margin_y + white_gaps_patch
230
+ bottom_overlap = margin_y + new_height - overlap_y if overlap_bottom else margin_y + new_height - white_gaps_patch
231
+
232
+ if alignment == "Left":
233
+ left_overlap = margin_x + overlap_x if overlap_left else margin_x
234
+ elif alignment == "Right":
235
+ right_overlap = margin_x + new_width - overlap_x if overlap_right else margin_x + new_width
236
+ elif alignment == "Top":
237
+ top_overlap = margin_y + overlap_y if overlap_top else margin_y
238
+ elif alignment == "Bottom":
239
+ bottom_overlap = margin_y + new_height - overlap_y if overlap_bottom else margin_y + new_height
240
+
241
+ # Draw the mask
242
+ mask_draw.rectangle([
243
+ (left_overlap, top_overlap),
244
+ (right_overlap, bottom_overlap)
245
+ ], fill=0)
246
+
247
+ return background, mask
248
+
249
+ def preview_image_and_mask(image, width, height, overlap_percentage, resize_option, custom_resize_percentage, alignment, overlap_left, overlap_right, overlap_top, overlap_bottom):
250
+ """Creates a preview showing the mask overlay."""
251
+ background, mask = prepare_image_and_mask(image, width, height, overlap_percentage, resize_option, custom_resize_percentage, alignment, overlap_left, overlap_right, overlap_top, overlap_bottom)
252
+
253
+ # Create a preview image showing the mask
254
+ preview = background.copy().convert('RGBA')
255
+
256
+ # Create a semi-transparent red overlay
257
+ red_overlay = Image.new('RGBA', background.size, (255, 0, 0, 64)) # Reduced alpha to 64 (25% opacity)
258
+
259
+ # Convert black pixels in the mask to semi-transparent red
260
+ red_mask = Image.new('RGBA', background.size, (0, 0, 0, 0))
261
+ red_mask.paste(red_overlay, (0, 0), mask)
262
+
263
+ # Overlay the red mask on the background
264
+ preview = Image.alpha_composite(preview, red_mask)
265
+
266
+ return preview
267
+
268
+ # --- Model Loading ---
269
+ dtype = torch.bfloat16
270
+ device = "cuda" if torch.cuda.is_available() else "cpu"
271
+ pipe = QwenImageEditPipeline.from_pretrained("Qwen/Qwen-Image-Edit", torch_dtype=dtype).to(device)
272
+ pipe.transformer.__class__ = QwenImageTransformer2DModel
273
+ pipe.transformer.set_attn_processor(QwenDoubleStreamAttnProcessorFA3())
274
+
275
+ # --- Ahead-of-time compilation ---
276
+ optimize_pipeline_(pipe, image=Image.new("RGB", (1024, 1024)), prompt="prompt")
277
+
278
+ # --- UI Constants and Helpers ---
279
+ MAX_SEED = np.iinfo(np.int32).max
280
+
281
+ def clear_result():
282
+ """Clears the result image."""
283
+ return gr.update(value=None)
284
+
285
+ def update_history(new_image, history):
286
+ """Updates the history gallery with the new image."""
287
+ time.sleep(0.5) # Small delay to ensure image is ready
288
+ if history is None:
289
+ history = []
290
+ if new_image is not None:
291
+ # Convert to list if needed (Gradio sometimes returns tuples)
292
+ if not isinstance(history, list):
293
+ history = list(history) if history else []
294
+ history.insert(0, new_image)
295
+ # Keep only the last 20 images in history
296
+ history = history[:20]
297
+ return history
298
+
299
+ def use_history_as_input(evt: gr.SelectData, history):
300
+ """Sets the selected history image as the new input image."""
301
+ if history and evt.index < len(history):
302
+ return gr.update(value=history[evt.index][0])
303
+ return gr.update()
304
+
305
+ def use_output_as_input(output_image):
306
+ """Sets the generated output as the new input image."""
307
+ if output_image is not None:
308
+ return gr.update(value=output_image)
309
+ return gr.update()
310
+
311
+ def preload_presets(target_ratio, ui_width, ui_height):
312
+ """Updates the width and height sliders based on the selected aspect ratio."""
313
+ if target_ratio == "9:16":
314
+ changed_width = 720
315
+ changed_height = 1280
316
+ return changed_width, changed_height, gr.update()
317
+ elif target_ratio == "16:9":
318
+ changed_width = 1280
319
+ changed_height = 720
320
+ return changed_width, changed_height, gr.update()
321
+ elif target_ratio == "1:1":
322
+ changed_width = 1024
323
+ changed_height = 1024
324
+ return changed_width, changed_height, gr.update()
325
+ elif target_ratio == "Custom":
326
+ return ui_width, ui_height, gr.update(open=True)
327
+
328
+ def select_the_right_preset(user_width, user_height):
329
+ if user_width == 720 and user_height == 1280:
330
+ return "9:16"
331
+ elif user_width == 1280 and user_height == 720:
332
+ return "16:9"
333
+ elif user_width == 1024 and user_height == 1024:
334
+ return "1:1"
335
+ else:
336
+ return "Custom"
337
+
338
+ def toggle_custom_resize_slider(resize_option):
339
+ return gr.update(visible=(resize_option == "Custom"))
340
+
341
+ # --- Main Inference Function (with outpainting preprocessing) ---
342
+ @spaces.GPU(duration=120)
343
+ def infer(
344
+ image,
345
+ prompt,
346
+ width,
347
+ height,
348
+ overlap_percentage,
349
+ resize_option,
350
+ custom_resize_percentage,
351
+ alignment,
352
+ overlap_left,
353
+ overlap_right,
354
+ overlap_top,
355
+ overlap_bottom,
356
+ seed=42,
357
+ randomize_seed=False,
358
+ true_guidance_scale=4.0,
359
+ num_inference_steps=50,
360
+ rewrite_prompt=True,
361
+ progress=gr.Progress(track_tqdm=True),
362
+ ):
363
+ """
364
+ Generates an outpainted image using the Qwen-Image-Edit pipeline.
365
+ """
366
+ # Hardcode the negative prompt as requested
367
+ negative_prompt = " "
368
+
369
+ if randomize_seed:
370
+ seed = random.randint(0, MAX_SEED)
371
+
372
+ # Set up the generator for reproducibility
373
+ generator = torch.Generator(device=device).manual_seed(seed)
374
+
375
+ print(f"Original Prompt: '{prompt}'")
376
+ print(f"Negative Prompt: '{negative_prompt}'")
377
+ print(f"Seed: {seed}, Steps: {num_inference_steps}")
378
+
379
+ if rewrite_prompt:
380
+ prompt = polish_prompt(prompt, image)
381
+ print(f"Rewritten Prompt: {prompt}")
382
+
383
+ # Prepare the image with white margins for outpainting
384
+ outpaint_image, mask = prepare_image_and_mask(
385
+ image, width, height, overlap_percentage,
386
+ resize_option, custom_resize_percentage, alignment,
387
+ overlap_left, overlap_right, overlap_top, overlap_bottom
388
+ )
389
+
390
+ # Check if expansion is possible
391
+ if not can_expand(image.width, image.height, width, height, alignment):
392
+ alignment = "Middle"
393
+ outpaint_image, mask = prepare_image_and_mask(
394
+ image, width, height, overlap_percentage,
395
+ resize_option, custom_resize_percentage, "Middle",
396
+ overlap_left, overlap_right, overlap_top, overlap_bottom
397
+ )
398
+
399
+ print(f"Outpaint dimensions: {outpaint_image.size}")
400
+
401
+ # Generate the image with outpainting preprocessing
402
+ result_image = pipe(
403
+ outpaint_image, # Use the preprocessed image with white margins
404
+ prompt="replace the white margins. "+ prompt,
405
+ negative_prompt=negative_prompt,
406
+ num_inference_steps=num_inference_steps,
407
+ generator=generator,
408
+ true_cfg_scale=true_guidance_scale,
409
+ ).images[0]
410
+
411
+ return result_image, seed
412
+
413
+ # --- Examples and UI Layout ---
414
+ # You can add examples here if you have sample images
415
+ # examples = [
416
+ # ["path/to/example1.jpg", "extend the landscape", 1280, 720, "Middle"],
417
+ # ["path/to/example2.jpg", "add more sky", 1024, 1024, "Top"],
418
+ # ]
419
+
420
+ css = """
421
+ #col-container {
422
+ margin: 0 auto;
423
+ max-width: 1024px;
424
+ }
425
+ #logo-title {
426
+ text-align: center;
427
+ }
428
+ #logo-title img {
429
+ width: 400px;
430
+ }
431
+ #edit_text{margin-top: -62px !important}
432
+ .preview-container {
433
+ border: 1px solid #e0e0e0;
434
+ border-radius: 8px;
435
+ padding: 10px;
436
+ margin-top: 10px;
437
+ }
438
+ .gallery-container {
439
+ margin-top: 20px;
440
+ }
441
+ """
442
+
443
+ with gr.Blocks(css=css) as demo:
444
+ with gr.Column(elem_id="col-container"):
445
+ gr.HTML("""
446
+ <div id="logo-title">
447
+ <img src="https://qianwen-res.oss-cn-beijing.aliyuncs.com/Qwen-Image/qwen_image_edit_logo.png" alt="Qwen-Image Edit Logo" width="400" style="display: block; margin: 0 auto;">
448
+ <h2 style="font-style: italic;color: #5b47d1;margin-top: -27px !important;margin-left: 133px;">Outpaint [Fast]</h2>
449
+ </div>
450
+ """)
451
+ gr.Markdown("""
452
+
453
+ Outpaint images with Qwen Image Edit. [Learn more](https://github.com/QwenLM/Qwen-Image) about the Qwen-Image series.
454
+
455
+ This demo uses the [Qwen-Image-Lightning](https://huggingface.co/lightx2v/Qwen-Image-Lightning) LoRA with AoT compilation and FA3 for accelerated 8-step inference.
456
+ Try on [Qwen Chat](https://chat.qwen.ai/), or [download model](https://huggingface.co/Qwen/Qwen-Image-Edit) to run locally with ComfyUI or diffusers.
457
+ """)
458
+
459
+
460
+ with gr.Row():
461
+ with gr.Column():
462
+ input_image = gr.Image(label="Input Image", type="pil")
463
+
464
+ prompt = gr.Text(
465
+ label="Prompt",
466
+ info="Describe what should appear in the extended areas",
467
+ value="extend the image naturally",
468
+ )
469
+
470
+ with gr.Row():
471
+ target_ratio = gr.Radio(
472
+ label="Target Ratio",
473
+ choices=["9:16", "16:9", "1:1", "Custom"],
474
+ value="16:9",
475
+ scale=2
476
+ )
477
+ alignment_dropdown = gr.Dropdown(
478
+ choices=["Middle", "Left", "Right", "Top", "Bottom"],
479
+ value="Middle",
480
+ label="Alignment"
481
+ )
482
+
483
+ run_button = gr.Button("run", variant="primary")
484
+
485
+ with gr.Accordion("Outpainting Settings", open=False) as settings_panel:
486
+ with gr.Row():
487
+ width_slider = gr.Slider(
488
+ label="Target Width",
489
+ minimum=512,
490
+ maximum=2048,
491
+ step=8,
492
+ value=1280,
493
+ )
494
+ height_slider = gr.Slider(
495
+ label="Target Height",
496
+ minimum=512,
497
+ maximum=2048,
498
+ step=8,
499
+ value=720,
500
+ )
501
+
502
+ with gr.Group():
503
+ overlap_percentage = gr.Slider(
504
+ label="Mask overlap (%)",
505
+ minimum=1,
506
+ maximum=50,
507
+ value=10,
508
+ step=1,
509
+ info="Controls the blending area between original and new content"
510
+ )
511
+
512
+ with gr.Row():
513
+ overlap_top = gr.Checkbox(label="Overlap Top", value=True)
514
+ overlap_right = gr.Checkbox(label="Overlap Right", value=True)
515
+ with gr.Row():
516
+ overlap_left = gr.Checkbox(label="Overlap Left", value=True)
517
+ overlap_bottom = gr.Checkbox(label="Overlap Bottom", value=True)
518
+
519
+ with gr.Row():
520
+ resize_option = gr.Radio(
521
+ label="Resize input image",
522
+ choices=["Full", "50%", "33%", "25%", "Custom"],
523
+ value="Full",
524
+ info="How much of the target canvas the original image should occupy"
525
+ )
526
+ custom_resize_percentage = gr.Slider(
527
+ label="Custom resize (%)",
528
+ minimum=1,
529
+ maximum=100,
530
+ step=1,
531
+ value=50,
532
+ visible=False
533
+ )
534
+
535
+ preview_button = gr.Button("👁️ Preview alignment and mask", variant="secondary")
536
+
537
+ with gr.Accordion("Advanced Settings", open=False):
538
+ seed = gr.Slider(
539
+ label="Seed",
540
+ minimum=0,
541
+ maximum=MAX_SEED,
542
+ step=1,
543
+ value=0,
544
+ )
545
+
546
+ randomize_seed = gr.Checkbox(label="Randomize seed", value=True)
547
+
548
+ with gr.Row():
549
+ true_guidance_scale = gr.Slider(
550
+ label="True guidance scale",
551
+ minimum=1.0,
552
+ maximum=10.0,
553
+ step=0.1,
554
+ value=1.0
555
+ )
556
+
557
+ num_inference_steps = gr.Slider(
558
+ label="Number of inference steps",
559
+ minimum=1,
560
+ maximum=28,
561
+ step=1,
562
+ value=8,
563
+ )
564
+
565
+ rewrite_prompt = gr.Checkbox(
566
+ label="Enhance prompt (using HF Inference)",
567
+ value=True
568
+ )
569
+
570
+ with gr.Column():
571
+ result = gr.Image(label="Result", type="pil", interactive=False)
572
+
573
+ use_as_input_button = gr.Button("🔄 Use as Input Image", visible=False, variant="secondary")
574
+
575
+ with gr.Column(visible=False) as preview_container:
576
+ preview_image = gr.Image(label="Preview (red area will be generated)", type="pil")
577
+
578
+ gr.Markdown("---")
579
+
580
+ with gr.Row():
581
+ gr.Markdown("### 📜 History")
582
+ clear_history_button = gr.Button("🗑️ Clear History", size="sm", variant="stop")
583
+
584
+ history_gallery = gr.Gallery(
585
+ label="Click any image to use as input",
586
+ columns=4,
587
+ rows=2,
588
+ object_fit="contain",
589
+ height="auto",
590
+ interactive=False,
591
+ show_label=True,
592
+ elem_classes=["gallery-container"]
593
+ )
594
+
595
+ # Event handlers
596
+ use_as_input_button.click(
597
+ fn=use_output_as_input,
598
+ inputs=[result],
599
+ outputs=[input_image],
600
+ show_api=False
601
+ )
602
+
603
+ history_gallery.select(
604
+ fn=use_history_as_input,
605
+ inputs=[history_gallery],
606
+ outputs=[input_image],
607
+ show_api=False
608
+ )
609
+
610
+ clear_history_button.click(
611
+ fn=lambda: [],
612
+ inputs=None,
613
+ outputs=history_gallery,
614
+ show_api=False
615
+ )
616
+
617
+ target_ratio.change(
618
+ fn=preload_presets,
619
+ inputs=[target_ratio, width_slider, height_slider],
620
+ outputs=[width_slider, height_slider, settings_panel],
621
+ queue=False,
622
+ )
623
+
624
+ width_slider.change(
625
+ fn=select_the_right_preset,
626
+ inputs=[width_slider, height_slider],
627
+ outputs=[target_ratio],
628
+ queue=False,
629
+ )
630
+
631
+ height_slider.change(
632
+ fn=select_the_right_preset,
633
+ inputs=[width_slider, height_slider],
634
+ outputs=[target_ratio],
635
+ queue=False,
636
+ )
637
+
638
+ resize_option.change(
639
+ fn=toggle_custom_resize_slider,
640
+ inputs=[resize_option],
641
+ outputs=[custom_resize_percentage],
642
+ queue=False,
643
+ )
644
+
645
+ preview_button.click(
646
+ fn=lambda: gr.update(visible=True),
647
+ inputs=None,
648
+ outputs=[preview_container],
649
+ queue=False,
650
+ ).then(
651
+ fn=preview_image_and_mask,
652
+ inputs=[
653
+ input_image, width_slider, height_slider, overlap_percentage,
654
+ resize_option, custom_resize_percentage, alignment_dropdown,
655
+ overlap_left, overlap_right, overlap_top, overlap_bottom
656
+ ],
657
+ outputs=preview_image,
658
+ queue=False,
659
+ )
660
+
661
+ # Main generation pipeline with result clearing, history update, and button visibility
662
+ run_button.click(
663
+ fn=clear_result,
664
+ inputs=None,
665
+ outputs=result,
666
+ show_api=False
667
+ ).then(
668
+ fn=infer,
669
+ inputs=[
670
+ input_image,
671
+ prompt,
672
+ width_slider,
673
+ height_slider,
674
+ overlap_percentage,
675
+ resize_option,
676
+ custom_resize_percentage,
677
+ alignment_dropdown,
678
+ overlap_left,
679
+ overlap_right,
680
+ overlap_top,
681
+ overlap_bottom,
682
+ seed,
683
+ randomize_seed,
684
+ true_guidance_scale,
685
+ num_inference_steps,
686
+ rewrite_prompt,
687
+ ],
688
+ outputs=[result, seed],
689
+ ).then(
690
+ fn=lambda: gr.update(visible=True),
691
+ inputs=None,
692
+ outputs=use_as_input_button,
693
+ show_api=False
694
+ ).then(
695
+ fn=update_history,
696
+ inputs=[result, history_gallery],
697
+ outputs=history_gallery,
698
+ show_api=False
699
+ )
700
+
701
+ # Also trigger on prompt submit
702
+ prompt.submit(
703
+ fn=clear_result,
704
+ inputs=None,
705
+ outputs=result,
706
+ show_api=False
707
+ ).then(
708
+ fn=infer,
709
+ inputs=[
710
+ input_image,
711
+ prompt,
712
+ width_slider,
713
+ height_slider,
714
+ overlap_percentage,
715
+ resize_option,
716
+ custom_resize_percentage,
717
+ alignment_dropdown,
718
+ overlap_left,
719
+ overlap_right,
720
+ overlap_top,
721
+ overlap_bottom,
722
+ seed,
723
+ randomize_seed,
724
+ true_guidance_scale,
725
+ num_inference_steps,
726
+ rewrite_prompt,
727
+ ],
728
+ outputs=[result, seed],
729
+ ).then(
730
+ fn=lambda: gr.update(visible=True),
731
+ inputs=None,
732
+ outputs=use_as_input_button,
733
+ show_api=False
734
+ ).then(
735
+ fn=update_history,
736
+ inputs=[result, history_gallery],
737
+ outputs=history_gallery,
738
+ show_api=False
739
+ )
740
+
741
+ if __name__ == "__main__":
742
  demo.launch()
optimization.py CHANGED
@@ -1,77 +1,77 @@
1
- """
2
- """
3
-
4
- from typing import Any
5
- from typing import Callable
6
- from typing import ParamSpec
7
- from torchao.quantization import quantize_
8
- from torchao.quantization import Float8DynamicActivationFloat8WeightConfig
9
- import spaces
10
- import torch
11
- from torch.utils._pytree import tree_map
12
-
13
-
14
- P = ParamSpec('P')
15
-
16
-
17
- TRANSFORMER_IMAGE_SEQ_LENGTH_DIM = torch.export.Dim('image_seq_length')
18
- TRANSFORMER_TEXT_SEQ_LENGTH_DIM = torch.export.Dim('text_seq_length')
19
-
20
- TRANSFORMER_DYNAMIC_SHAPES = {
21
- 'hidden_states': {
22
- 1: TRANSFORMER_IMAGE_SEQ_LENGTH_DIM,
23
- },
24
- 'encoder_hidden_states': {
25
- 1: TRANSFORMER_TEXT_SEQ_LENGTH_DIM,
26
- },
27
- 'encoder_hidden_states_mask': {
28
- 1: TRANSFORMER_TEXT_SEQ_LENGTH_DIM,
29
- },
30
- 'image_rotary_emb': ({
31
- 0: TRANSFORMER_IMAGE_SEQ_LENGTH_DIM,
32
- }, {
33
- 0: TRANSFORMER_TEXT_SEQ_LENGTH_DIM,
34
- }),
35
- }
36
-
37
-
38
- INDUCTOR_CONFIGS = {
39
- 'conv_1x1_as_mm': True,
40
- 'epilogue_fusion': False,
41
- 'coordinate_descent_tuning': True,
42
- 'coordinate_descent_check_all_directions': True,
43
- 'max_autotune': True,
44
- 'triton.cudagraphs': True,
45
- }
46
-
47
-
48
- def optimize_pipeline_(pipeline: Callable[P, Any], *args: P.args, **kwargs: P.kwargs):
49
-
50
- @spaces.GPU(duration=1500)
51
- def compile_transformer():
52
-
53
- pipeline.load_lora_weights(
54
- "lightx2v/Qwen-Image-Lightning",
55
- weight_name="Qwen-Image-Lightning-8steps-V1.1.safetensors"
56
- )
57
- pipeline.fuse_lora()
58
- pipeline.unload_lora_weights()
59
-
60
- with spaces.aoti_capture(pipeline.transformer) as call:
61
- pipeline(*args, **kwargs)
62
-
63
- dynamic_shapes = tree_map(lambda t: None, call.kwargs)
64
- dynamic_shapes |= TRANSFORMER_DYNAMIC_SHAPES
65
-
66
- # quantize_(pipeline.transformer, Float8DynamicActivationFloat8WeightConfig())
67
-
68
- exported = torch.export.export(
69
- mod=pipeline.transformer,
70
- args=call.args,
71
- kwargs=call.kwargs,
72
- dynamic_shapes=dynamic_shapes,
73
- )
74
-
75
- return spaces.aoti_compile(exported, INDUCTOR_CONFIGS)
76
-
77
- spaces.aoti_apply(compile_transformer(), pipeline.transformer)
 
1
+ """
2
+ """
3
+
4
+ from typing import Any
5
+ from typing import Callable
6
+ from typing import ParamSpec
7
+ from torchao.quantization import quantize_
8
+ from torchao.quantization import Float8DynamicActivationFloat8WeightConfig
9
+ import spaces
10
+ import torch
11
+ from torch.utils._pytree import tree_map
12
+
13
+
14
+ P = ParamSpec('P')
15
+
16
+
17
+ TRANSFORMER_IMAGE_SEQ_LENGTH_DIM = torch.export.Dim('image_seq_length')
18
+ TRANSFORMER_TEXT_SEQ_LENGTH_DIM = torch.export.Dim('text_seq_length')
19
+
20
+ TRANSFORMER_DYNAMIC_SHAPES = {
21
+ 'hidden_states': {
22
+ 1: TRANSFORMER_IMAGE_SEQ_LENGTH_DIM,
23
+ },
24
+ 'encoder_hidden_states': {
25
+ 1: TRANSFORMER_TEXT_SEQ_LENGTH_DIM,
26
+ },
27
+ 'encoder_hidden_states_mask': {
28
+ 1: TRANSFORMER_TEXT_SEQ_LENGTH_DIM,
29
+ },
30
+ 'image_rotary_emb': ({
31
+ 0: TRANSFORMER_IMAGE_SEQ_LENGTH_DIM,
32
+ }, {
33
+ 0: TRANSFORMER_TEXT_SEQ_LENGTH_DIM,
34
+ }),
35
+ }
36
+
37
+
38
+ INDUCTOR_CONFIGS = {
39
+ 'conv_1x1_as_mm': True,
40
+ 'epilogue_fusion': False,
41
+ 'coordinate_descent_tuning': True,
42
+ 'coordinate_descent_check_all_directions': True,
43
+ 'max_autotune': True,
44
+ 'triton.cudagraphs': True,
45
+ }
46
+
47
+
48
+ def optimize_pipeline_(pipeline: Callable[P, Any], *args: P.args, **kwargs: P.kwargs):
49
+
50
+ @spaces.GPU(duration=1500)
51
+ def compile_transformer():
52
+
53
+ pipeline.load_lora_weights(
54
+ "lightx2v/Qwen-Image-Lightning",
55
+ weight_name="Qwen-Image-Lightning-8steps-V1.1.safetensors"
56
+ )
57
+ pipeline.fuse_lora()
58
+ pipeline.unload_lora_weights()
59
+
60
+ with spaces.aoti_capture(pipeline.transformer) as call:
61
+ pipeline(*args, **kwargs)
62
+
63
+ dynamic_shapes = tree_map(lambda t: None, call.kwargs)
64
+ dynamic_shapes |= TRANSFORMER_DYNAMIC_SHAPES
65
+
66
+ # quantize_(pipeline.transformer, Float8DynamicActivationFloat8WeightConfig())
67
+
68
+ exported = torch.export.export(
69
+ mod=pipeline.transformer,
70
+ args=call.args,
71
+ kwargs=call.kwargs,
72
+ dynamic_shapes=dynamic_shapes,
73
+ )
74
+
75
+ return spaces.aoti_compile(exported, INDUCTOR_CONFIGS)
76
+
77
+ spaces.aoti_apply(compile_transformer(), pipeline.transformer)
requirements.txt CHANGED
@@ -1,10 +1,10 @@
1
- git+https://github.com/huggingface/diffusers.git@qwenimage-lru-cache-bypass
2
- kernels
3
- torchao==0.11.0
4
- transformers
5
- accelerate
6
- safetensors
7
- sentencepiece
8
- dashscope
9
- torchvision
10
  peft
 
1
+ git+https://github.com/huggingface/diffusers.git@qwenimage-lru-cache-bypass
2
+ kernels
3
+ torchao==0.11.0
4
+ transformers
5
+ accelerate
6
+ safetensors
7
+ sentencepiece
8
+ dashscope
9
+ torchvision
10
  peft