Spaces:
Running
on
Zero
Running
on
Zero
Upload 8 files
Browse files- README.md +7 -5
- app.py +174 -512
- cat.png +3 -0
- flowers.png +3 -0
- monster.png +3 -0
- optimization.py +60 -133
- optimization_utils.py +17 -28
- requirements.txt +5 -11
README.md
CHANGED
|
@@ -1,12 +1,14 @@
|
|
| 1 |
---
|
| 2 |
-
title:
|
| 3 |
-
emoji:
|
| 4 |
-
colorFrom:
|
| 5 |
colorTo: gray
|
| 6 |
sdk: gradio
|
| 7 |
-
sdk_version: 5.
|
| 8 |
app_file: app.py
|
| 9 |
-
pinned:
|
|
|
|
|
|
|
| 10 |
---
|
| 11 |
|
| 12 |
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
|
|
|
| 1 |
---
|
| 2 |
+
title: FLUX.1 Kontext
|
| 3 |
+
emoji: ⚡
|
| 4 |
+
colorFrom: green
|
| 5 |
colorTo: gray
|
| 6 |
sdk: gradio
|
| 7 |
+
sdk_version: 5.34.0
|
| 8 |
app_file: app.py
|
| 9 |
+
pinned: true
|
| 10 |
+
license: mit
|
| 11 |
+
short_description: 'Kontext image editing on FLUX[dev] '
|
| 12 |
---
|
| 13 |
|
| 14 |
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
app.py
CHANGED
|
@@ -1,512 +1,174 @@
|
|
| 1 |
-
|
| 2 |
-
|
| 3 |
-
os.system('pip install --upgrade --pre --extra-index-url https://download.pytorch.org/whl/nightly/cu126 "torch<2.9" spaces')
|
| 4 |
-
|
| 5 |
-
#
|
| 6 |
-
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
from
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
|
| 58 |
-
|
| 59 |
-
|
| 60 |
-
|
| 61 |
-
|
| 62 |
-
|
| 63 |
-
|
| 64 |
-
|
| 65 |
-
|
| 66 |
-
|
| 67 |
-
|
| 68 |
-
|
| 69 |
-
|
| 70 |
-
|
| 71 |
-
|
| 72 |
-
|
| 73 |
-
|
| 74 |
-
|
| 75 |
-
|
| 76 |
-
|
| 77 |
-
|
| 78 |
-
|
| 79 |
-
|
| 80 |
-
|
| 81 |
-
|
| 82 |
-
|
| 83 |
-
|
| 84 |
-
|
| 85 |
-
|
| 86 |
-
|
| 87 |
-
|
| 88 |
-
|
| 89 |
-
|
| 90 |
-
|
| 91 |
-
|
| 92 |
-
|
| 93 |
-
|
| 94 |
-
|
| 95 |
-
|
| 96 |
-
|
| 97 |
-
|
| 98 |
-
|
| 99 |
-
|
| 100 |
-
|
| 101 |
-
)
|
| 102 |
-
|
| 103 |
-
|
| 104 |
-
|
| 105 |
-
|
| 106 |
-
|
| 107 |
-
|
| 108 |
-
|
| 109 |
-
|
| 110 |
-
|
| 111 |
-
|
| 112 |
-
|
| 113 |
-
|
| 114 |
-
|
| 115 |
-
|
| 116 |
-
|
| 117 |
-
|
| 118 |
-
|
| 119 |
-
|
| 120 |
-
|
| 121 |
-
|
| 122 |
-
|
| 123 |
-
|
| 124 |
-
|
| 125 |
-
|
| 126 |
-
|
| 127 |
-
|
| 128 |
-
|
| 129 |
-
|
| 130 |
-
|
| 131 |
-
|
| 132 |
-
|
| 133 |
-
|
| 134 |
-
|
| 135 |
-
|
| 136 |
-
|
| 137 |
-
|
| 138 |
-
|
| 139 |
-
|
| 140 |
-
|
| 141 |
-
|
| 142 |
-
|
| 143 |
-
|
| 144 |
-
|
| 145 |
-
|
| 146 |
-
|
| 147 |
-
|
| 148 |
-
|
| 149 |
-
|
| 150 |
-
|
| 151 |
-
|
| 152 |
-
|
| 153 |
-
|
| 154 |
-
|
| 155 |
-
|
| 156 |
-
|
| 157 |
-
|
| 158 |
-
|
| 159 |
-
|
| 160 |
-
|
| 161 |
-
|
| 162 |
-
|
| 163 |
-
|
| 164 |
-
|
| 165 |
-
|
| 166 |
-
|
| 167 |
-
|
| 168 |
-
|
| 169 |
-
|
| 170 |
-
|
| 171 |
-
|
| 172 |
-
|
| 173 |
-
|
| 174 |
-
|
| 175 |
-
scale = MIN_DIMENSION / new_height
|
| 176 |
-
else: # Portrait
|
| 177 |
-
scale = MIN_DIMENSION / new_width
|
| 178 |
-
new_width *= scale
|
| 179 |
-
new_height *= scale
|
| 180 |
-
|
| 181 |
-
# Rule 3: Round to the nearest multiple of DIMENSION_MULTIPLE
|
| 182 |
-
final_width = int(round(new_width / DIMENSION_MULTIPLE) * DIMENSION_MULTIPLE)
|
| 183 |
-
final_height = int(round(new_height / DIMENSION_MULTIPLE) * DIMENSION_MULTIPLE)
|
| 184 |
-
|
| 185 |
-
# Ensure final dimensions are at least the minimum
|
| 186 |
-
final_width = max(final_width, MIN_DIMENSION if aspect_ratio < 1 else SQUARE_SIZE)
|
| 187 |
-
final_height = max(final_height, MIN_DIMENSION if aspect_ratio > 1 else SQUARE_SIZE)
|
| 188 |
-
|
| 189 |
-
|
| 190 |
-
return image.resize((final_width, final_height), Image.Resampling.LANCZOS)
|
| 191 |
-
|
| 192 |
-
def resize_and_crop_to_match(target_image, reference_image):
|
| 193 |
-
"""Resizes and center-crops the target image to match the reference image's dimensions."""
|
| 194 |
-
ref_width, ref_height = reference_image.size
|
| 195 |
-
target_width, target_height = target_image.size
|
| 196 |
-
scale = max(ref_width / target_width, ref_height / target_height)
|
| 197 |
-
new_width, new_height = int(target_width * scale), int(target_height * scale)
|
| 198 |
-
resized = target_image.resize((new_width, new_height), Image.Resampling.LANCZOS)
|
| 199 |
-
left, top = (new_width - ref_width) // 2, (new_height - ref_height) // 2
|
| 200 |
-
return resized.crop((left, top, left + ref_width, top + ref_height))
|
| 201 |
-
|
| 202 |
-
def init_view():
|
| 203 |
-
return gr.update(interactive = True)
|
| 204 |
-
|
| 205 |
-
def generate_video(
|
| 206 |
-
start_image_pil,
|
| 207 |
-
end_image_pil,
|
| 208 |
-
prompt,
|
| 209 |
-
negative_prompt=default_negative_prompt,
|
| 210 |
-
duration_seconds=2.1,
|
| 211 |
-
steps=8,
|
| 212 |
-
guidance_scale=1,
|
| 213 |
-
guidance_scale_2=1,
|
| 214 |
-
seed=42,
|
| 215 |
-
randomize_seed=True,
|
| 216 |
-
progress=gr.Progress(track_tqdm=True)
|
| 217 |
-
):
|
| 218 |
-
start = time.time()
|
| 219 |
-
allocation_time = 120
|
| 220 |
-
factor = 1
|
| 221 |
-
|
| 222 |
-
if input_image_debug_value[0] is not None or end_image_debug_value[0] is not None or prompt_debug_value[0] is not None or total_second_length_debug_value[0] is not None:
|
| 223 |
-
start_image_pil = input_image_debug_value[0]
|
| 224 |
-
end_image_pil = end_image_debug_value[0]
|
| 225 |
-
prompt = prompt_debug_value[0]
|
| 226 |
-
duration_seconds = total_second_length_debug_value[0]
|
| 227 |
-
allocation_time = min(duration_seconds * 60 * 100, 10 * 60)
|
| 228 |
-
factor = 3.1
|
| 229 |
-
|
| 230 |
-
if start_image_pil is None or end_image_pil is None:
|
| 231 |
-
raise gr.Error("Please upload both a start and an end image.")
|
| 232 |
-
|
| 233 |
-
# Step 1: Process the start image to get our target dimensions based on the new rules.
|
| 234 |
-
processed_start_image = process_image_for_video(start_image_pil)
|
| 235 |
-
|
| 236 |
-
# Step 2: Make the end image match the *exact* dimensions of the processed start image.
|
| 237 |
-
processed_end_image = resize_and_crop_to_match(end_image_pil, processed_start_image)
|
| 238 |
-
|
| 239 |
-
target_height, target_width = processed_start_image.height, processed_start_image.width
|
| 240 |
-
|
| 241 |
-
# Handle seed and frame count
|
| 242 |
-
current_seed = random.randint(0, MAX_SEED) if randomize_seed else int(seed)
|
| 243 |
-
num_frames = np.clip(int(round(duration_seconds * FIXED_FPS)), MIN_FRAMES_MODEL, MAX_FRAMES_MODEL)
|
| 244 |
-
|
| 245 |
-
progress(0.2, desc=f"Generating {num_frames} frames at {target_width}x{target_height} (seed: {current_seed})...")
|
| 246 |
-
|
| 247 |
-
progress(0.1, desc="Preprocessing images...")
|
| 248 |
-
output_video, download_button, seed_input = generate_video_on_gpu(
|
| 249 |
-
start_image_pil,
|
| 250 |
-
end_image_pil,
|
| 251 |
-
prompt,
|
| 252 |
-
negative_prompt,
|
| 253 |
-
duration_seconds,
|
| 254 |
-
steps,
|
| 255 |
-
guidance_scale,
|
| 256 |
-
guidance_scale_2,
|
| 257 |
-
seed,
|
| 258 |
-
randomize_seed,
|
| 259 |
-
progress,
|
| 260 |
-
allocation_time,
|
| 261 |
-
factor,
|
| 262 |
-
target_height,
|
| 263 |
-
target_width,
|
| 264 |
-
current_seed,
|
| 265 |
-
num_frames,
|
| 266 |
-
processed_start_image,
|
| 267 |
-
processed_end_image
|
| 268 |
-
)
|
| 269 |
-
progress(1.0, desc="Done!")
|
| 270 |
-
end = time.time()
|
| 271 |
-
secondes = int(end - start)
|
| 272 |
-
minutes = math.floor(secondes / 60)
|
| 273 |
-
secondes = secondes - (minutes * 60)
|
| 274 |
-
hours = math.floor(minutes / 60)
|
| 275 |
-
minutes = minutes - (hours * 60)
|
| 276 |
-
information = ("Start the process again if you want a different result. " if randomize_seed else "") + \
|
| 277 |
-
"The video been generated in " + \
|
| 278 |
-
((str(hours) + " h, ") if hours != 0 else "") + \
|
| 279 |
-
((str(minutes) + " min, ") if hours != 0 or minutes != 0 else "") + \
|
| 280 |
-
str(secondes) + " sec. " + \
|
| 281 |
-
"The video resolution is " + str(target_width) + \
|
| 282 |
-
" pixels large and " + str(target_height) + \
|
| 283 |
-
" pixels high, so a resolution of " + f'{target_width * target_height:,}' + " pixels."
|
| 284 |
-
return [output_video, download_button, seed_input, gr.update(value = information, visible = True), gr.update(interactive = False)]
|
| 285 |
-
|
| 286 |
-
def get_duration(
|
| 287 |
-
start_image_pil,
|
| 288 |
-
end_image_pil,
|
| 289 |
-
prompt,
|
| 290 |
-
negative_prompt,
|
| 291 |
-
duration_seconds,
|
| 292 |
-
steps,
|
| 293 |
-
guidance_scale,
|
| 294 |
-
guidance_scale_2,
|
| 295 |
-
seed,
|
| 296 |
-
randomize_seed,
|
| 297 |
-
progress,
|
| 298 |
-
allocation_time,
|
| 299 |
-
factor,
|
| 300 |
-
target_height,
|
| 301 |
-
target_width,
|
| 302 |
-
current_seed,
|
| 303 |
-
num_frames,
|
| 304 |
-
processed_start_image,
|
| 305 |
-
processed_end_image
|
| 306 |
-
):
|
| 307 |
-
return allocation_time
|
| 308 |
-
|
| 309 |
-
@spaces.GPU(duration=get_duration)
|
| 310 |
-
def generate_video_on_gpu(
|
| 311 |
-
start_image_pil,
|
| 312 |
-
end_image_pil,
|
| 313 |
-
prompt,
|
| 314 |
-
negative_prompt,
|
| 315 |
-
duration_seconds,
|
| 316 |
-
steps,
|
| 317 |
-
guidance_scale,
|
| 318 |
-
guidance_scale_2,
|
| 319 |
-
seed,
|
| 320 |
-
randomize_seed,
|
| 321 |
-
progress,
|
| 322 |
-
allocation_time,
|
| 323 |
-
factor,
|
| 324 |
-
target_height,
|
| 325 |
-
target_width,
|
| 326 |
-
current_seed,
|
| 327 |
-
num_frames,
|
| 328 |
-
processed_start_image,
|
| 329 |
-
processed_end_image
|
| 330 |
-
):
|
| 331 |
-
"""
|
| 332 |
-
Generates a video by interpolating between a start and end image, guided by a text prompt,
|
| 333 |
-
using the diffusers Wan2.2 pipeline.
|
| 334 |
-
"""
|
| 335 |
-
print("Generate a video with the prompt: " + prompt)
|
| 336 |
-
|
| 337 |
-
output_frames_list = pipe(
|
| 338 |
-
image=processed_start_image,
|
| 339 |
-
last_image=processed_end_image,
|
| 340 |
-
prompt=prompt,
|
| 341 |
-
negative_prompt=negative_prompt,
|
| 342 |
-
height=target_height,
|
| 343 |
-
width=target_width,
|
| 344 |
-
num_frames=int(num_frames * factor),
|
| 345 |
-
guidance_scale=float(guidance_scale),
|
| 346 |
-
guidance_scale_2=float(guidance_scale_2),
|
| 347 |
-
num_inference_steps=int(steps),
|
| 348 |
-
generator=torch.Generator(device="cuda").manual_seed(current_seed),
|
| 349 |
-
).frames[0]
|
| 350 |
-
|
| 351 |
-
progress(0.9, desc="Encoding and saving video...")
|
| 352 |
-
|
| 353 |
-
video_path = 'wan_' + datetime.now().strftime("%Y-%m-%d_%H-%M-%S.%f") + '.mp4'
|
| 354 |
-
|
| 355 |
-
export_to_video(output_frames_list, video_path, fps=FIXED_FPS)
|
| 356 |
-
print("Video exported: " + video_path)
|
| 357 |
-
|
| 358 |
-
return video_path, gr.update(value = video_path, visible = True), current_seed
|
| 359 |
-
|
| 360 |
-
|
| 361 |
-
# --- 3. Gradio User Interface ---
|
| 362 |
-
|
| 363 |
-
|
| 364 |
-
|
| 365 |
-
js = """
|
| 366 |
-
function createGradioAnimation() {
|
| 367 |
-
window.addEventListener("beforeunload", function(e) {
|
| 368 |
-
if (document.getElementById('dummy_button_id') && !document.getElementById('dummy_button_id').disabled) {
|
| 369 |
-
var confirmationMessage = 'A process is still running. '
|
| 370 |
-
+ 'If you leave before saving, your changes will be lost.';
|
| 371 |
-
|
| 372 |
-
(e || window.event).returnValue = confirmationMessage;
|
| 373 |
-
}
|
| 374 |
-
return confirmationMessage;
|
| 375 |
-
});
|
| 376 |
-
return 'Animation created';
|
| 377 |
-
}
|
| 378 |
-
"""
|
| 379 |
-
|
| 380 |
-
# Gradio interface
|
| 381 |
-
with gr.Blocks(js=js) as app:
|
| 382 |
-
gr.Markdown("# Wan 2.2 First/Last Frame Video Fast")
|
| 383 |
-
gr.Markdown("Based on the [Wan 2.2 First/Last Frame workflow](https://www.reddit.com/r/StableDiffusion/comments/1me4306/psa_wan_22_does_first_frame_last_frame_out_of_the/), applied to 🧨 Diffusers + [lightx2v/Wan2.2-Lightning](https://huggingface.co/lightx2v/Wan2.2-Lightning) 8-step LoRA")
|
| 384 |
-
|
| 385 |
-
with gr.Row(elem_id="general_items"):
|
| 386 |
-
with gr.Column():
|
| 387 |
-
with gr.Group(elem_id="group_all"):
|
| 388 |
-
with gr.Row():
|
| 389 |
-
start_image = gr.Image(type="pil", label="Start Frame", sources=["upload", "clipboard"])
|
| 390 |
-
# Capture the Tabs component in a variable and assign IDs to tabs
|
| 391 |
-
with gr.Tabs(elem_id="group_tabs") as tabs:
|
| 392 |
-
with gr.TabItem("Upload", id="upload_tab"):
|
| 393 |
-
end_image = gr.Image(type="pil", label="End Frame", sources=["upload", "clipboard"])
|
| 394 |
-
with gr.TabItem("Generate", id="generate_tab"):
|
| 395 |
-
generate_5seconds = gr.Button("Generate scene 5 seconds in the future", elem_id="fivesec")
|
| 396 |
-
gr.Markdown("Generate a custom end-frame with an edit model like [Nano Banana](https://huggingface.co/spaces/multimodalart/nano-banana) or [Qwen Image Edit](https://huggingface.co/spaces/multimodalart/Qwen-Image-Edit-Fast)", elem_id="or_item")
|
| 397 |
-
prompt = gr.Textbox(label="Prompt", info="Describe the transition between the two images")
|
| 398 |
-
|
| 399 |
-
with gr.Accordion("Advanced Settings", open=False):
|
| 400 |
-
duration_seconds_input = gr.Slider(minimum=MIN_DURATION, maximum=MAX_DURATION, step=0.1, value=2.1, label="Video Duration (seconds)", info=f"Clamped to model's {MIN_FRAMES_MODEL}-{MAX_FRAMES_MODEL} frames at {FIXED_FPS}fps.")
|
| 401 |
-
negative_prompt_input = gr.Textbox(label="Negative Prompt", value=default_negative_prompt, lines=3)
|
| 402 |
-
steps_slider = gr.Slider(minimum=1, maximum=30, step=1, value=8, label="Inference Steps")
|
| 403 |
-
guidance_scale_input = gr.Slider(minimum=0.0, maximum=10.0, step=0.5, value=1.0, label="Guidance Scale - high noise")
|
| 404 |
-
guidance_scale_2_input = gr.Slider(minimum=0.0, maximum=10.0, step=0.5, value=1.0, label="Guidance Scale - low noise")
|
| 405 |
-
with gr.Row():
|
| 406 |
-
seed_input = gr.Slider(label="Seed", minimum=0, maximum=MAX_SEED, step=1, value=42)
|
| 407 |
-
randomize_seed_checkbox = gr.Checkbox(label="Randomize seed", value=True)
|
| 408 |
-
|
| 409 |
-
generate_button = gr.Button("Generate Video", variant="primary")
|
| 410 |
-
dummy_button = gr.Button(elem_id = "dummy_button_id", visible = False, interactive = False)
|
| 411 |
-
|
| 412 |
-
with gr.Column():
|
| 413 |
-
output_video = gr.Video(label="Generated Video", autoplay = True, loop = True)
|
| 414 |
-
download_button = gr.DownloadButton(label="Download", visible = True)
|
| 415 |
-
video_information = gr.HTML(value = "", visible = True)
|
| 416 |
-
|
| 417 |
-
# Main video generation button
|
| 418 |
-
ui_inputs = [
|
| 419 |
-
start_image,
|
| 420 |
-
end_image,
|
| 421 |
-
prompt,
|
| 422 |
-
negative_prompt_input,
|
| 423 |
-
duration_seconds_input,
|
| 424 |
-
steps_slider,
|
| 425 |
-
guidance_scale_input,
|
| 426 |
-
guidance_scale_2_input,
|
| 427 |
-
seed_input,
|
| 428 |
-
randomize_seed_checkbox
|
| 429 |
-
]
|
| 430 |
-
ui_outputs = [output_video, download_button, seed_input, video_information, dummy_button]
|
| 431 |
-
|
| 432 |
-
generate_button.click(fn = init_view, inputs = [], outputs = [dummy_button], queue = False, show_progress = False).success(
|
| 433 |
-
fn = generate_video,
|
| 434 |
-
inputs = ui_inputs,
|
| 435 |
-
outputs = ui_outputs
|
| 436 |
-
)
|
| 437 |
-
|
| 438 |
-
generate_5seconds.click(
|
| 439 |
-
fn=switch_to_upload_tab,
|
| 440 |
-
inputs=None,
|
| 441 |
-
outputs=[tabs]
|
| 442 |
-
).then(
|
| 443 |
-
fn=lambda img: generate_end_frame(img, "this image is a still frame from a movie. generate a new frame with what happens on this scene 5 seconds in the future"),
|
| 444 |
-
inputs=[start_image],
|
| 445 |
-
outputs=[end_image]
|
| 446 |
-
).success(
|
| 447 |
-
fn=generate_video,
|
| 448 |
-
inputs=ui_inputs,
|
| 449 |
-
outputs=ui_outputs
|
| 450 |
-
)
|
| 451 |
-
|
| 452 |
-
with gr.Row(visible=False):
|
| 453 |
-
prompt_debug=gr.Textbox(label="Prompt Debug")
|
| 454 |
-
input_image_debug=gr.Image(type="pil", label="Image Debug")
|
| 455 |
-
end_image_debug=gr.Image(type="pil", label="End Image Debug")
|
| 456 |
-
total_second_length_debug=gr.Slider(label="Additional Video Length to Generate (seconds) Debug", minimum=1, maximum=120, value=10, step=0.1)
|
| 457 |
-
gr.Examples(
|
| 458 |
-
examples=[["Schoolboy_without_backpack.webp", "Schoolboy_with_backpack.webp", "The schoolboy puts on his schoolbag."]],
|
| 459 |
-
inputs=[start_image, end_image, prompt],
|
| 460 |
-
outputs=ui_outputs,
|
| 461 |
-
fn=generate_video,
|
| 462 |
-
run_on_click=True,
|
| 463 |
-
cache_examples=True,
|
| 464 |
-
cache_mode='lazy',
|
| 465 |
-
)
|
| 466 |
-
|
| 467 |
-
gr.Examples(
|
| 468 |
-
label = "Examples from demo",
|
| 469 |
-
examples = [
|
| 470 |
-
["poli_tower.png", "tower_takes_off.png", "The man turns around."],
|
| 471 |
-
["ugly_sonic.jpeg", "squatting_sonic.png", "पात्रं क्षेपणास्त्रं चकमाति।"],
|
| 472 |
-
["Schoolboy_without_backpack.webp", "Schoolboy_with_backpack.webp", "The schoolboy puts on his schoolbag."],
|
| 473 |
-
],
|
| 474 |
-
inputs = [start_image, end_image, prompt],
|
| 475 |
-
outputs = ui_outputs,
|
| 476 |
-
fn = generate_video,
|
| 477 |
-
cache_examples = False,
|
| 478 |
-
)
|
| 479 |
-
|
| 480 |
-
def handle_field_debug_change(input_image_debug_data, end_image_debug_data, prompt_debug_data, total_second_length_debug_data):
|
| 481 |
-
input_image_debug_value[0] = input_image_debug_data
|
| 482 |
-
end_image_debug_value[0] = end_image_debug_data
|
| 483 |
-
prompt_debug_value[0] = prompt_debug_data
|
| 484 |
-
total_second_length_debug_value[0] = total_second_length_debug_data
|
| 485 |
-
return []
|
| 486 |
-
|
| 487 |
-
input_image_debug.upload(
|
| 488 |
-
fn=handle_field_debug_change,
|
| 489 |
-
inputs=[input_image_debug, end_image_debug, prompt_debug, total_second_length_debug],
|
| 490 |
-
outputs=[]
|
| 491 |
-
)
|
| 492 |
-
|
| 493 |
-
end_image_debug.upload(
|
| 494 |
-
fn=handle_field_debug_change,
|
| 495 |
-
inputs=[input_image_debug, end_image_debug, prompt_debug, total_second_length_debug],
|
| 496 |
-
outputs=[]
|
| 497 |
-
)
|
| 498 |
-
|
| 499 |
-
prompt_debug.change(
|
| 500 |
-
fn=handle_field_debug_change,
|
| 501 |
-
inputs=[input_image_debug, end_image_debug, prompt_debug, total_second_length_debug],
|
| 502 |
-
outputs=[]
|
| 503 |
-
)
|
| 504 |
-
|
| 505 |
-
total_second_length_debug.change(
|
| 506 |
-
fn=handle_field_debug_change,
|
| 507 |
-
inputs=[input_image_debug, end_image_debug, prompt_debug, total_second_length_debug],
|
| 508 |
-
outputs=[]
|
| 509 |
-
)
|
| 510 |
-
|
| 511 |
-
if __name__ == "__main__":
|
| 512 |
-
app.launch(mcp_server=True, share=True)
|
|
|
|
| 1 |
+
# PyTorch 2.8 (temporary hack)
|
| 2 |
+
import os
|
| 3 |
+
os.system('pip install --upgrade --pre --extra-index-url https://download.pytorch.org/whl/nightly/cu126 "torch<2.9" spaces')
|
| 4 |
+
|
| 5 |
+
# Actual demo code
|
| 6 |
+
import gradio as gr
|
| 7 |
+
import numpy as np
|
| 8 |
+
import spaces
|
| 9 |
+
import torch
|
| 10 |
+
import random
|
| 11 |
+
from PIL import Image
|
| 12 |
+
|
| 13 |
+
from diffusers import FluxKontextPipeline
|
| 14 |
+
from diffusers.utils import load_image
|
| 15 |
+
|
| 16 |
+
from optimization import optimize_pipeline_
|
| 17 |
+
|
| 18 |
+
MAX_SEED = np.iinfo(np.int32).max
|
| 19 |
+
|
| 20 |
+
pipe = FluxKontextPipeline.from_pretrained("yuvraj108c/FLUX.1-Kontext-dev", torch_dtype=torch.bfloat16).to("cuda")
|
| 21 |
+
optimize_pipeline_(pipe, image=Image.new("RGB", (512, 512)), prompt='prompt')
|
| 22 |
+
|
| 23 |
+
@spaces.GPU
|
| 24 |
+
def infer(input_image, prompt, seed=42, randomize_seed=False, guidance_scale=2.5, steps=28, progress=gr.Progress(track_tqdm=True)):
|
| 25 |
+
"""
|
| 26 |
+
Perform image editing using the FLUX.1 Kontext pipeline.
|
| 27 |
+
|
| 28 |
+
This function takes an input image and a text prompt to generate a modified version
|
| 29 |
+
of the image based on the provided instructions. It uses the FLUX.1 Kontext model
|
| 30 |
+
for contextual image editing tasks.
|
| 31 |
+
|
| 32 |
+
Args:
|
| 33 |
+
input_image (PIL.Image.Image): The input image to be edited. Will be converted
|
| 34 |
+
to RGB format if not already in that format.
|
| 35 |
+
prompt (str): Text description of the desired edit to apply to the image.
|
| 36 |
+
Examples: "Remove glasses", "Add a hat", "Change background to beach".
|
| 37 |
+
seed (int, optional): Random seed for reproducible generation. Defaults to 42.
|
| 38 |
+
Must be between 0 and MAX_SEED (2^31 - 1).
|
| 39 |
+
randomize_seed (bool, optional): If True, generates a random seed instead of
|
| 40 |
+
using the provided seed value. Defaults to False.
|
| 41 |
+
guidance_scale (float, optional): Controls how closely the model follows the
|
| 42 |
+
prompt. Higher values mean stronger adherence to the prompt but may reduce
|
| 43 |
+
image quality. Range: 1.0-10.0. Defaults to 2.5.
|
| 44 |
+
steps (int, optional): Controls how many steps to run the diffusion model for.
|
| 45 |
+
Range: 1-30. Defaults to 28.
|
| 46 |
+
progress (gr.Progress, optional): Gradio progress tracker for monitoring
|
| 47 |
+
generation progress. Defaults to gr.Progress(track_tqdm=True).
|
| 48 |
+
|
| 49 |
+
Returns:
|
| 50 |
+
tuple: A 3-tuple containing:
|
| 51 |
+
- PIL.Image.Image: The generated/edited image
|
| 52 |
+
- int: The seed value used for generation (useful when randomize_seed=True)
|
| 53 |
+
- gr.update: Gradio update object to make the reuse button visible
|
| 54 |
+
|
| 55 |
+
Example:
|
| 56 |
+
>>> edited_image, used_seed, button_update = infer(
|
| 57 |
+
... input_image=my_image,
|
| 58 |
+
... prompt="Add sunglasses",
|
| 59 |
+
... seed=123,
|
| 60 |
+
... randomize_seed=False,
|
| 61 |
+
... guidance_scale=2.5
|
| 62 |
+
... )
|
| 63 |
+
"""
|
| 64 |
+
if randomize_seed:
|
| 65 |
+
seed = random.randint(0, MAX_SEED)
|
| 66 |
+
|
| 67 |
+
if input_image:
|
| 68 |
+
input_image = input_image.convert("RGB")
|
| 69 |
+
image = pipe(
|
| 70 |
+
image=input_image,
|
| 71 |
+
prompt=prompt,
|
| 72 |
+
guidance_scale=guidance_scale,
|
| 73 |
+
width = input_image.size[0],
|
| 74 |
+
height = input_image.size[1],
|
| 75 |
+
num_inference_steps=steps,
|
| 76 |
+
generator=torch.Generator().manual_seed(seed),
|
| 77 |
+
).images[0]
|
| 78 |
+
else:
|
| 79 |
+
image = pipe(
|
| 80 |
+
prompt=prompt,
|
| 81 |
+
guidance_scale=guidance_scale,
|
| 82 |
+
num_inference_steps=steps,
|
| 83 |
+
generator=torch.Generator().manual_seed(seed),
|
| 84 |
+
).images[0]
|
| 85 |
+
return image, seed, gr.Button(visible=True)
|
| 86 |
+
|
| 87 |
+
@spaces.GPU
|
| 88 |
+
def infer_example(input_image, prompt):
|
| 89 |
+
image, seed, _ = infer(input_image, prompt)
|
| 90 |
+
return image, seed
|
| 91 |
+
|
| 92 |
+
css="""
|
| 93 |
+
#col-container {
|
| 94 |
+
margin: 0 auto;
|
| 95 |
+
max-width: 960px;
|
| 96 |
+
}
|
| 97 |
+
"""
|
| 98 |
+
|
| 99 |
+
with gr.Blocks(css=css) as demo:
|
| 100 |
+
|
| 101 |
+
with gr.Column(elem_id="col-container"):
|
| 102 |
+
gr.Markdown(f"""# FLUX.1 Kontext [dev]
|
| 103 |
+
Image editing and manipulation model guidance-distilled from FLUX.1 Kontext [pro], [[blog]](https://bfl.ai/announcements/flux-1-kontext-dev) [[model]](https://huggingface.co/black-forest-labs/FLUX.1-Kontext-dev)
|
| 104 |
+
""")
|
| 105 |
+
with gr.Row():
|
| 106 |
+
with gr.Column():
|
| 107 |
+
input_image = gr.Image(label="Upload the image for editing", type="pil")
|
| 108 |
+
with gr.Row():
|
| 109 |
+
prompt = gr.Text(
|
| 110 |
+
label="Prompt",
|
| 111 |
+
show_label=False,
|
| 112 |
+
max_lines=1,
|
| 113 |
+
placeholder="Enter your prompt for editing (e.g., 'Remove glasses', 'Add a hat')",
|
| 114 |
+
container=False,
|
| 115 |
+
)
|
| 116 |
+
run_button = gr.Button("Run", scale=0)
|
| 117 |
+
with gr.Accordion("Advanced Settings", open=False):
|
| 118 |
+
|
| 119 |
+
seed = gr.Slider(
|
| 120 |
+
label="Seed",
|
| 121 |
+
minimum=0,
|
| 122 |
+
maximum=MAX_SEED,
|
| 123 |
+
step=1,
|
| 124 |
+
value=0,
|
| 125 |
+
)
|
| 126 |
+
|
| 127 |
+
randomize_seed = gr.Checkbox(label="Randomize seed", value=True)
|
| 128 |
+
|
| 129 |
+
guidance_scale = gr.Slider(
|
| 130 |
+
label="Guidance Scale",
|
| 131 |
+
minimum=1,
|
| 132 |
+
maximum=10,
|
| 133 |
+
step=0.1,
|
| 134 |
+
value=2.5,
|
| 135 |
+
)
|
| 136 |
+
|
| 137 |
+
steps = gr.Slider(
|
| 138 |
+
label="Steps",
|
| 139 |
+
minimum=1,
|
| 140 |
+
maximum=30,
|
| 141 |
+
value=28,
|
| 142 |
+
step=1
|
| 143 |
+
)
|
| 144 |
+
|
| 145 |
+
with gr.Column():
|
| 146 |
+
result = gr.Image(label="Result", show_label=False, interactive=False)
|
| 147 |
+
reuse_button = gr.Button("Reuse this image", visible=False)
|
| 148 |
+
|
| 149 |
+
|
| 150 |
+
examples = gr.Examples(
|
| 151 |
+
examples=[
|
| 152 |
+
["flowers.png", "turn the flowers into sunflowers"],
|
| 153 |
+
["monster.png", "make this monster ride a skateboard on the beach"],
|
| 154 |
+
["cat.png", "make this cat happy"]
|
| 155 |
+
],
|
| 156 |
+
inputs=[input_image, prompt],
|
| 157 |
+
outputs=[result, seed],
|
| 158 |
+
fn=infer_example,
|
| 159 |
+
cache_examples="lazy"
|
| 160 |
+
)
|
| 161 |
+
|
| 162 |
+
gr.on(
|
| 163 |
+
triggers=[run_button.click, prompt.submit],
|
| 164 |
+
fn = infer,
|
| 165 |
+
inputs = [input_image, prompt, seed, randomize_seed, guidance_scale, steps],
|
| 166 |
+
outputs = [result, seed, reuse_button]
|
| 167 |
+
)
|
| 168 |
+
reuse_button.click(
|
| 169 |
+
fn = lambda image: image,
|
| 170 |
+
inputs = [result],
|
| 171 |
+
outputs = [input_image]
|
| 172 |
+
)
|
| 173 |
+
|
| 174 |
+
demo.launch(mcp_server=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
cat.png
ADDED
|
Git LFS Details
|
flowers.png
ADDED
|
Git LFS Details
|
monster.png
ADDED
|
Git LFS Details
|
optimization.py
CHANGED
|
@@ -1,133 +1,60 @@
|
|
| 1 |
-
"""
|
| 2 |
-
"""
|
| 3 |
-
|
| 4 |
-
from typing import Any
|
| 5 |
-
from typing import Callable
|
| 6 |
-
from typing import ParamSpec
|
| 7 |
-
|
| 8 |
-
import spaces
|
| 9 |
-
import torch
|
| 10 |
-
from torch.utils._pytree import tree_map_only
|
| 11 |
-
|
| 12 |
-
from
|
| 13 |
-
from
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
|
| 58 |
-
|
| 59 |
-
|
| 60 |
-
|
| 61 |
-
}
|
| 62 |
-
|
| 63 |
-
|
| 64 |
-
def optimize_pipeline_(pipeline: Callable[P, Any], *args: P.args, **kwargs: P.kwargs):
|
| 65 |
-
|
| 66 |
-
@spaces.GPU(duration=1500)
|
| 67 |
-
def compile_transformer():
|
| 68 |
-
|
| 69 |
-
# This LoRA fusion part remains the same
|
| 70 |
-
pipeline.load_lora_weights(
|
| 71 |
-
"Kijai/WanVideo_comfy",
|
| 72 |
-
weight_name="Lightx2v/lightx2v_I2V_14B_480p_cfg_step_distill_rank128_bf16.safetensors",
|
| 73 |
-
adapter_name="lightx2v"
|
| 74 |
-
)
|
| 75 |
-
kwargs_lora = {}
|
| 76 |
-
kwargs_lora["load_into_transformer_2"] = True
|
| 77 |
-
pipeline.load_lora_weights(
|
| 78 |
-
"Kijai/WanVideo_comfy",
|
| 79 |
-
weight_name="Lightx2v/lightx2v_I2V_14B_480p_cfg_step_distill_rank128_bf16.safetensors",
|
| 80 |
-
adapter_name="lightx2v_2", **kwargs_lora
|
| 81 |
-
)
|
| 82 |
-
pipeline.set_adapters(["lightx2v", "lightx2v_2"], adapter_weights=[1., 1.])
|
| 83 |
-
pipeline.fuse_lora(adapter_names=["lightx2v"], lora_scale=3., components=["transformer"])
|
| 84 |
-
pipeline.fuse_lora(adapter_names=["lightx2v_2"], lora_scale=1., components=["transformer_2"])
|
| 85 |
-
pipeline.unload_lora_weights()
|
| 86 |
-
|
| 87 |
-
# Capture a single call to get the args/kwargs structure
|
| 88 |
-
with capture_component_call(pipeline, 'transformer') as call:
|
| 89 |
-
pipeline(*args, **kwargs)
|
| 90 |
-
|
| 91 |
-
dynamic_shapes = tree_map_only((torch.Tensor, bool), lambda t: None, call.kwargs)
|
| 92 |
-
dynamic_shapes |= TRANSFORMER_DYNAMIC_SHAPES
|
| 93 |
-
|
| 94 |
-
# Quantization remains the same
|
| 95 |
-
quantize_(pipeline.transformer, Float8DynamicActivationFloat8WeightConfig())
|
| 96 |
-
quantize_(pipeline.transformer_2, Float8DynamicActivationFloat8WeightConfig())
|
| 97 |
-
|
| 98 |
-
# --- SIMPLIFIED COMPILATION ---
|
| 99 |
-
|
| 100 |
-
exported_1 = torch.export.export(
|
| 101 |
-
mod=pipeline.transformer,
|
| 102 |
-
args=call.args,
|
| 103 |
-
kwargs=call.kwargs,
|
| 104 |
-
dynamic_shapes=dynamic_shapes,
|
| 105 |
-
)
|
| 106 |
-
|
| 107 |
-
exported_2 = torch.export.export(
|
| 108 |
-
mod=pipeline.transformer_2,
|
| 109 |
-
args=call.args,
|
| 110 |
-
kwargs=call.kwargs,
|
| 111 |
-
dynamic_shapes=dynamic_shapes,
|
| 112 |
-
)
|
| 113 |
-
|
| 114 |
-
compiled_1 = aoti_compile(exported_1, INDUCTOR_CONFIGS)
|
| 115 |
-
compiled_2 = aoti_compile(exported_2, INDUCTOR_CONFIGS)
|
| 116 |
-
|
| 117 |
-
# Return the two compiled models
|
| 118 |
-
return compiled_1, compiled_2
|
| 119 |
-
|
| 120 |
-
|
| 121 |
-
# Quantize text encoder (same as before)
|
| 122 |
-
quantize_(pipeline.text_encoder, Int8WeightOnlyConfig())
|
| 123 |
-
|
| 124 |
-
# Get the two dynamically-shaped compiled models
|
| 125 |
-
compiled_transformer_1, compiled_transformer_2 = compile_transformer()
|
| 126 |
-
|
| 127 |
-
# --- SIMPLIFIED ASSIGNMENT ---
|
| 128 |
-
|
| 129 |
-
pipeline.transformer.forward = compiled_transformer_1
|
| 130 |
-
drain_module_parameters(pipeline.transformer)
|
| 131 |
-
|
| 132 |
-
pipeline.transformer_2.forward = compiled_transformer_2
|
| 133 |
-
drain_module_parameters(pipeline.transformer_2)
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
"""
|
| 3 |
+
|
| 4 |
+
from typing import Any
|
| 5 |
+
from typing import Callable
|
| 6 |
+
from typing import ParamSpec
|
| 7 |
+
|
| 8 |
+
import spaces
|
| 9 |
+
import torch
|
| 10 |
+
from torch.utils._pytree import tree_map_only
|
| 11 |
+
|
| 12 |
+
from optimization_utils import capture_component_call
|
| 13 |
+
from optimization_utils import aoti_compile
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
P = ParamSpec('P')
|
| 17 |
+
|
| 18 |
+
|
| 19 |
+
TRANSFORMER_HIDDEN_DIM = torch.export.Dim('hidden', min=4096, max=8212)
|
| 20 |
+
|
| 21 |
+
TRANSFORMER_DYNAMIC_SHAPES = {
|
| 22 |
+
'hidden_states': {1: TRANSFORMER_HIDDEN_DIM},
|
| 23 |
+
'img_ids': {0: TRANSFORMER_HIDDEN_DIM},
|
| 24 |
+
}
|
| 25 |
+
|
| 26 |
+
INDUCTOR_CONFIGS = {
|
| 27 |
+
'conv_1x1_as_mm': True,
|
| 28 |
+
'epilogue_fusion': False,
|
| 29 |
+
'coordinate_descent_tuning': True,
|
| 30 |
+
'coordinate_descent_check_all_directions': True,
|
| 31 |
+
'max_autotune': True,
|
| 32 |
+
'triton.cudagraphs': True,
|
| 33 |
+
}
|
| 34 |
+
|
| 35 |
+
|
| 36 |
+
def optimize_pipeline_(pipeline: Callable[P, Any], *args: P.args, **kwargs: P.kwargs):
|
| 37 |
+
|
| 38 |
+
@spaces.GPU(duration=1500)
|
| 39 |
+
def compile_transformer():
|
| 40 |
+
|
| 41 |
+
with capture_component_call(pipeline, 'transformer') as call:
|
| 42 |
+
pipeline(*args, **kwargs)
|
| 43 |
+
|
| 44 |
+
dynamic_shapes = tree_map_only((torch.Tensor, bool), lambda t: None, call.kwargs)
|
| 45 |
+
dynamic_shapes |= TRANSFORMER_DYNAMIC_SHAPES
|
| 46 |
+
|
| 47 |
+
pipeline.transformer.fuse_qkv_projections()
|
| 48 |
+
|
| 49 |
+
exported = torch.export.export(
|
| 50 |
+
mod=pipeline.transformer,
|
| 51 |
+
args=call.args,
|
| 52 |
+
kwargs=call.kwargs,
|
| 53 |
+
dynamic_shapes=dynamic_shapes,
|
| 54 |
+
)
|
| 55 |
+
|
| 56 |
+
return aoti_compile(exported, INDUCTOR_CONFIGS)
|
| 57 |
+
|
| 58 |
+
transformer_config = pipeline.transformer.config
|
| 59 |
+
pipeline.transformer = compile_transformer()
|
| 60 |
+
pipeline.transformer.config = transformer_config # pyright: ignore[reportAttributeAccessIssue]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
optimization_utils.py
CHANGED
|
@@ -10,6 +10,7 @@ from unittest.mock import patch
|
|
| 10 |
import torch
|
| 11 |
from torch._inductor.package.package import package_aoti
|
| 12 |
from torch.export.pt2_archive._package import AOTICompiledModel
|
|
|
|
| 13 |
from torch.export.pt2_archive._package_weights import Weights
|
| 14 |
|
| 15 |
|
|
@@ -20,33 +21,31 @@ INDUCTOR_CONFIGS_OVERRIDES = {
|
|
| 20 |
}
|
| 21 |
|
| 22 |
|
| 23 |
-
class ZeroGPUWeights:
|
| 24 |
-
def __init__(self, constants_map: dict[str, torch.Tensor], to_cuda: bool = False):
|
| 25 |
-
if to_cuda:
|
| 26 |
-
self.constants_map = {name: tensor.to('cuda') for name, tensor in constants_map.items()}
|
| 27 |
-
else:
|
| 28 |
-
self.constants_map = constants_map
|
| 29 |
-
def __reduce__(self):
|
| 30 |
-
constants_map: dict[str, torch.Tensor] = {}
|
| 31 |
-
for name, tensor in self.constants_map.items():
|
| 32 |
-
tensor_ = torch.empty_like(tensor, device='cpu').pin_memory()
|
| 33 |
-
constants_map[name] = tensor_.copy_(tensor).detach().share_memory_()
|
| 34 |
-
return ZeroGPUWeights, (constants_map, True)
|
| 35 |
-
|
| 36 |
-
|
| 37 |
class ZeroGPUCompiledModel:
|
| 38 |
-
def __init__(self, archive_file: torch.types.FileLike, weights:
|
| 39 |
self.archive_file = archive_file
|
| 40 |
self.weights = weights
|
|
|
|
|
|
|
| 41 |
self.compiled_model: ContextVar[AOTICompiledModel | None] = ContextVar('compiled_model', default=None)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 42 |
def __call__(self, *args, **kwargs):
|
| 43 |
if (compiled_model := self.compiled_model.get()) is None:
|
|
|
|
| 44 |
compiled_model = cast(AOTICompiledModel, torch._inductor.aoti_load_package(self.archive_file))
|
| 45 |
-
compiled_model.load_constants(
|
| 46 |
self.compiled_model.set(compiled_model)
|
| 47 |
return compiled_model(*args, **kwargs)
|
| 48 |
def __reduce__(self):
|
| 49 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 50 |
|
| 51 |
|
| 52 |
def aoti_compile(
|
|
@@ -62,8 +61,7 @@ def aoti_compile(
|
|
| 62 |
files: list[str | Weights] = [file for file in artifacts if isinstance(file, str)]
|
| 63 |
package_aoti(archive_file, files)
|
| 64 |
weights, = (artifact for artifact in artifacts if isinstance(artifact, Weights))
|
| 65 |
-
|
| 66 |
-
return ZeroGPUCompiledModel(archive_file, zerogpu_weights)
|
| 67 |
|
| 68 |
|
| 69 |
@contextlib.contextmanager
|
|
@@ -96,12 +94,3 @@ def capture_component_call(
|
|
| 96 |
except CapturedCallException as e:
|
| 97 |
captured_call.args = e.args
|
| 98 |
captured_call.kwargs = e.kwargs
|
| 99 |
-
|
| 100 |
-
|
| 101 |
-
def drain_module_parameters(module: torch.nn.Module):
|
| 102 |
-
state_dict_meta = {name: {'device': tensor.device, 'dtype': tensor.dtype} for name, tensor in module.state_dict().items()}
|
| 103 |
-
state_dict = {name: torch.nn.Parameter(torch.empty_like(tensor, device='cpu')) for name, tensor in module.state_dict().items()}
|
| 104 |
-
module.load_state_dict(state_dict, assign=True)
|
| 105 |
-
for name, param in state_dict.items():
|
| 106 |
-
meta = state_dict_meta[name]
|
| 107 |
-
param.data = torch.Tensor([]).to(**meta)
|
|
|
|
| 10 |
import torch
|
| 11 |
from torch._inductor.package.package import package_aoti
|
| 12 |
from torch.export.pt2_archive._package import AOTICompiledModel
|
| 13 |
+
from torch.export.pt2_archive._package_weights import TensorProperties
|
| 14 |
from torch.export.pt2_archive._package_weights import Weights
|
| 15 |
|
| 16 |
|
|
|
|
| 21 |
}
|
| 22 |
|
| 23 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 24 |
class ZeroGPUCompiledModel:
|
| 25 |
+
def __init__(self, archive_file: torch.types.FileLike, weights: Weights, cuda: bool = False):
|
| 26 |
self.archive_file = archive_file
|
| 27 |
self.weights = weights
|
| 28 |
+
if cuda:
|
| 29 |
+
self.weights_to_cuda_()
|
| 30 |
self.compiled_model: ContextVar[AOTICompiledModel | None] = ContextVar('compiled_model', default=None)
|
| 31 |
+
def weights_to_cuda_(self):
|
| 32 |
+
for name in self.weights:
|
| 33 |
+
tensor, properties = self.weights.get_weight(name)
|
| 34 |
+
self.weights[name] = (tensor.to('cuda'), properties)
|
| 35 |
def __call__(self, *args, **kwargs):
|
| 36 |
if (compiled_model := self.compiled_model.get()) is None:
|
| 37 |
+
constants_map = {name: value[0] for name, value in self.weights.items()}
|
| 38 |
compiled_model = cast(AOTICompiledModel, torch._inductor.aoti_load_package(self.archive_file))
|
| 39 |
+
compiled_model.load_constants(constants_map, check_full_update=True, user_managed=True)
|
| 40 |
self.compiled_model.set(compiled_model)
|
| 41 |
return compiled_model(*args, **kwargs)
|
| 42 |
def __reduce__(self):
|
| 43 |
+
weight_dict: dict[str, tuple[torch.Tensor, TensorProperties]] = {}
|
| 44 |
+
for name in self.weights:
|
| 45 |
+
tensor, properties = self.weights.get_weight(name)
|
| 46 |
+
tensor_ = torch.empty_like(tensor, device='cpu').pin_memory()
|
| 47 |
+
weight_dict[name] = (tensor_.copy_(tensor).detach().share_memory_(), properties)
|
| 48 |
+
return ZeroGPUCompiledModel, (self.archive_file, Weights(weight_dict), True)
|
| 49 |
|
| 50 |
|
| 51 |
def aoti_compile(
|
|
|
|
| 61 |
files: list[str | Weights] = [file for file in artifacts if isinstance(file, str)]
|
| 62 |
package_aoti(archive_file, files)
|
| 63 |
weights, = (artifact for artifact in artifacts if isinstance(artifact, Weights))
|
| 64 |
+
return ZeroGPUCompiledModel(archive_file, weights)
|
|
|
|
| 65 |
|
| 66 |
|
| 67 |
@contextlib.contextmanager
|
|
|
|
| 94 |
except CapturedCallException as e:
|
| 95 |
captured_call.args = e.args
|
| 96 |
captured_call.kwargs = e.kwargs
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
requirements.txt
CHANGED
|
@@ -1,11 +1,5 @@
|
|
| 1 |
-
|
| 2 |
-
|
| 3 |
-
|
| 4 |
-
|
| 5 |
-
|
| 6 |
-
sentencepiece==0.2.1
|
| 7 |
-
peft==0.17.1
|
| 8 |
-
ftfy==6.3.1
|
| 9 |
-
imageio-ffmpeg==0.6.0
|
| 10 |
-
opencv-python==4.12.0.88
|
| 11 |
-
torchao==0.11.0
|
|
|
|
| 1 |
+
transformers
|
| 2 |
+
git+https://github.com/huggingface/diffusers.git
|
| 3 |
+
accelerate
|
| 4 |
+
safetensors
|
| 5 |
+
sentencepiece
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|