Spaces:
Runtime error
Runtime error
Upload 4 files
Browse files- README.md +8 -17
- app.py +491 -1057
- optimization.py +2 -0
- requirements.txt +10 -42
README.md
CHANGED
|
@@ -1,21 +1,12 @@
|
|
| 1 |
---
|
| 2 |
-
title:
|
|
|
|
|
|
|
|
|
|
| 3 |
sdk: gradio
|
| 4 |
-
emoji: 📷
|
| 5 |
sdk_version: 5.29.1
|
| 6 |
app_file: app.py
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
- Upscaling
|
| 12 |
-
- Restoring
|
| 13 |
-
- Image-to-Image
|
| 14 |
-
- Image-2-Image
|
| 15 |
-
- Img-to-Img
|
| 16 |
-
- Img-2-Img
|
| 17 |
-
- language models
|
| 18 |
-
- LLMs
|
| 19 |
-
short_description: Restore blurred or small images with prompt
|
| 20 |
-
suggested_hardware: zero-a10g
|
| 21 |
-
---
|
|
|
|
| 1 |
---
|
| 2 |
+
title: Wan 2 2 First Last Frame
|
| 3 |
+
emoji: 💻
|
| 4 |
+
colorFrom: purple
|
| 5 |
+
colorTo: gray
|
| 6 |
sdk: gradio
|
|
|
|
| 7 |
sdk_version: 5.29.1
|
| 8 |
app_file: app.py
|
| 9 |
+
pinned: false
|
| 10 |
+
---
|
| 11 |
+
|
| 12 |
+
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
app.py
CHANGED
|
@@ -1,1057 +1,491 @@
|
|
| 1 |
-
import os
|
| 2 |
-
|
| 3 |
-
|
| 4 |
-
|
| 5 |
-
|
| 6 |
-
|
| 7 |
-
import
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
import
|
| 23 |
-
import
|
| 24 |
-
|
| 25 |
-
from
|
| 26 |
-
|
| 27 |
-
import
|
| 28 |
-
|
| 29 |
-
from
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
|
| 58 |
-
|
| 59 |
-
|
| 60 |
-
|
| 61 |
-
|
| 62 |
-
|
| 63 |
-
|
| 64 |
-
|
| 65 |
-
|
| 66 |
-
|
| 67 |
-
|
| 68 |
-
|
| 69 |
-
|
| 70 |
-
|
| 71 |
-
|
| 72 |
-
|
| 73 |
-
|
| 74 |
-
|
| 75 |
-
|
| 76 |
-
|
| 77 |
-
|
| 78 |
-
|
| 79 |
-
|
| 80 |
-
|
| 81 |
-
|
| 82 |
-
|
| 83 |
-
|
| 84 |
-
|
| 85 |
-
|
| 86 |
-
|
| 87 |
-
|
| 88 |
-
|
| 89 |
-
|
| 90 |
-
|
| 91 |
-
|
| 92 |
-
|
| 93 |
-
|
| 94 |
-
|
| 95 |
-
|
| 96 |
-
|
| 97 |
-
|
| 98 |
-
|
| 99 |
-
|
| 100 |
-
|
| 101 |
-
|
| 102 |
-
|
| 103 |
-
|
| 104 |
-
|
| 105 |
-
|
| 106 |
-
|
| 107 |
-
|
| 108 |
-
|
| 109 |
-
|
| 110 |
-
|
| 111 |
-
|
| 112 |
-
|
| 113 |
-
"
|
| 114 |
-
|
| 115 |
-
|
| 116 |
-
|
| 117 |
-
|
| 118 |
-
|
| 119 |
-
|
| 120 |
-
|
| 121 |
-
|
| 122 |
-
|
| 123 |
-
|
| 124 |
-
|
| 125 |
-
|
| 126 |
-
|
| 127 |
-
|
| 128 |
-
|
| 129 |
-
|
| 130 |
-
|
| 131 |
-
|
| 132 |
-
|
| 133 |
-
|
| 134 |
-
|
| 135 |
-
|
| 136 |
-
|
| 137 |
-
|
| 138 |
-
|
| 139 |
-
|
| 140 |
-
|
| 141 |
-
|
| 142 |
-
|
| 143 |
-
|
| 144 |
-
|
| 145 |
-
|
| 146 |
-
|
| 147 |
-
|
| 148 |
-
|
| 149 |
-
|
| 150 |
-
|
| 151 |
-
|
| 152 |
-
|
| 153 |
-
|
| 154 |
-
|
| 155 |
-
#
|
| 156 |
-
|
| 157 |
-
|
| 158 |
-
|
| 159 |
-
|
| 160 |
-
|
| 161 |
-
|
| 162 |
-
|
| 163 |
-
|
| 164 |
-
|
| 165 |
-
|
| 166 |
-
|
| 167 |
-
#
|
| 168 |
-
|
| 169 |
-
|
| 170 |
-
|
| 171 |
-
|
| 172 |
-
|
| 173 |
-
|
| 174 |
-
|
| 175 |
-
|
| 176 |
-
|
| 177 |
-
|
| 178 |
-
|
| 179 |
-
|
| 180 |
-
|
| 181 |
-
|
| 182 |
-
|
| 183 |
-
|
| 184 |
-
|
| 185 |
-
|
| 186 |
-
|
| 187 |
-
|
| 188 |
-
|
| 189 |
-
|
| 190 |
-
|
| 191 |
-
|
| 192 |
-
|
| 193 |
-
|
| 194 |
-
|
| 195 |
-
|
| 196 |
-
|
| 197 |
-
|
| 198 |
-
|
| 199 |
-
|
| 200 |
-
|
| 201 |
-
|
| 202 |
-
|
| 203 |
-
|
| 204 |
-
|
| 205 |
-
|
| 206 |
-
|
| 207 |
-
|
| 208 |
-
|
| 209 |
-
|
| 210 |
-
|
| 211 |
-
|
| 212 |
-
|
| 213 |
-
|
| 214 |
-
|
| 215 |
-
|
| 216 |
-
|
| 217 |
-
|
| 218 |
-
|
| 219 |
-
|
| 220 |
-
|
| 221 |
-
|
| 222 |
-
|
| 223 |
-
|
| 224 |
-
|
| 225 |
-
|
| 226 |
-
|
| 227 |
-
|
| 228 |
-
|
| 229 |
-
|
| 230 |
-
|
| 231 |
-
|
| 232 |
-
|
| 233 |
-
|
| 234 |
-
|
| 235 |
-
|
| 236 |
-
|
| 237 |
-
|
| 238 |
-
|
| 239 |
-
|
| 240 |
-
|
| 241 |
-
|
| 242 |
-
|
| 243 |
-
|
| 244 |
-
|
| 245 |
-
|
| 246 |
-
|
| 247 |
-
|
| 248 |
-
|
| 249 |
-
|
| 250 |
-
|
| 251 |
-
|
| 252 |
-
|
| 253 |
-
|
| 254 |
-
|
| 255 |
-
|
| 256 |
-
|
| 257 |
-
|
| 258 |
-
|
| 259 |
-
|
| 260 |
-
|
| 261 |
-
|
| 262 |
-
|
| 263 |
-
|
| 264 |
-
|
| 265 |
-
|
| 266 |
-
|
| 267 |
-
|
| 268 |
-
|
| 269 |
-
|
| 270 |
-
|
| 271 |
-
|
| 272 |
-
|
| 273 |
-
else
|
| 274 |
-
|
| 275 |
-
|
| 276 |
-
|
| 277 |
-
|
| 278 |
-
|
| 279 |
-
|
| 280 |
-
|
| 281 |
-
|
| 282 |
-
|
| 283 |
-
|
| 284 |
-
|
| 285 |
-
|
| 286 |
-
|
| 287 |
-
|
| 288 |
-
|
| 289 |
-
|
| 290 |
-
|
| 291 |
-
|
| 292 |
-
|
| 293 |
-
|
| 294 |
-
|
| 295 |
-
|
| 296 |
-
|
| 297 |
-
|
| 298 |
-
|
| 299 |
-
|
| 300 |
-
|
| 301 |
-
|
| 302 |
-
|
| 303 |
-
|
| 304 |
-
|
| 305 |
-
|
| 306 |
-
|
| 307 |
-
|
| 308 |
-
|
| 309 |
-
|
| 310 |
-
|
| 311 |
-
|
| 312 |
-
|
| 313 |
-
|
| 314 |
-
|
| 315 |
-
|
| 316 |
-
|
| 317 |
-
|
| 318 |
-
|
| 319 |
-
|
| 320 |
-
|
| 321 |
-
|
| 322 |
-
|
| 323 |
-
|
| 324 |
-
|
| 325 |
-
|
| 326 |
-
|
| 327 |
-
|
| 328 |
-
|
| 329 |
-
|
| 330 |
-
|
| 331 |
-
|
| 332 |
-
|
| 333 |
-
|
| 334 |
-
|
| 335 |
-
|
| 336 |
-
|
| 337 |
-
|
| 338 |
-
|
| 339 |
-
|
| 340 |
-
|
| 341 |
-
)
|
| 342 |
-
|
| 343 |
-
|
| 344 |
-
|
| 345 |
-
|
| 346 |
-
|
| 347 |
-
|
| 348 |
-
|
| 349 |
-
|
| 350 |
-
|
| 351 |
-
|
| 352 |
-
|
| 353 |
-
|
| 354 |
-
|
| 355 |
-
|
| 356 |
-
|
| 357 |
-
|
| 358 |
-
|
| 359 |
-
|
| 360 |
-
|
| 361 |
-
|
| 362 |
-
|
| 363 |
-
|
| 364 |
-
|
| 365 |
-
|
| 366 |
-
|
| 367 |
-
|
| 368 |
-
|
| 369 |
-
|
| 370 |
-
|
| 371 |
-
|
| 372 |
-
|
| 373 |
-
|
| 374 |
-
):
|
| 375 |
-
|
| 376 |
-
|
| 377 |
-
|
| 378 |
-
|
| 379 |
-
|
| 380 |
-
|
| 381 |
-
|
| 382 |
-
|
| 383 |
-
|
| 384 |
-
|
| 385 |
-
|
| 386 |
-
|
| 387 |
-
|
| 388 |
-
|
| 389 |
-
|
| 390 |
-
|
| 391 |
-
|
| 392 |
-
|
| 393 |
-
|
| 394 |
-
|
| 395 |
-
|
| 396 |
-
|
| 397 |
-
|
| 398 |
-
|
| 399 |
-
|
| 400 |
-
|
| 401 |
-
|
| 402 |
-
|
| 403 |
-
|
| 404 |
-
|
| 405 |
-
|
| 406 |
-
|
| 407 |
-
|
| 408 |
-
|
| 409 |
-
|
| 410 |
-
|
| 411 |
-
|
| 412 |
-
|
| 413 |
-
|
| 414 |
-
|
| 415 |
-
|
| 416 |
-
|
| 417 |
-
|
| 418 |
-
|
| 419 |
-
|
| 420 |
-
|
| 421 |
-
|
| 422 |
-
|
| 423 |
-
|
| 424 |
-
|
| 425 |
-
|
| 426 |
-
|
| 427 |
-
|
| 428 |
-
|
| 429 |
-
|
| 430 |
-
|
| 431 |
-
|
| 432 |
-
|
| 433 |
-
|
| 434 |
-
|
| 435 |
-
|
| 436 |
-
|
| 437 |
-
|
| 438 |
-
|
| 439 |
-
|
| 440 |
-
|
| 441 |
-
|
| 442 |
-
|
| 443 |
-
|
| 444 |
-
|
| 445 |
-
|
| 446 |
-
|
| 447 |
-
|
| 448 |
-
|
| 449 |
-
|
| 450 |
-
|
| 451 |
-
|
| 452 |
-
|
| 453 |
-
|
| 454 |
-
|
| 455 |
-
|
| 456 |
-
|
| 457 |
-
|
| 458 |
-
|
| 459 |
-
|
| 460 |
-
|
| 461 |
-
|
| 462 |
-
|
| 463 |
-
|
| 464 |
-
|
| 465 |
-
|
| 466 |
-
|
| 467 |
-
|
| 468 |
-
|
| 469 |
-
|
| 470 |
-
|
| 471 |
-
|
| 472 |
-
|
| 473 |
-
|
| 474 |
-
|
| 475 |
-
|
| 476 |
-
|
| 477 |
-
|
| 478 |
-
|
| 479 |
-
|
| 480 |
-
|
| 481 |
-
|
| 482 |
-
|
| 483 |
-
|
| 484 |
-
|
| 485 |
-
|
| 486 |
-
|
| 487 |
-
|
| 488 |
-
|
| 489 |
-
|
| 490 |
-
|
| 491 |
-
|
| 492 |
-
<p>This is an online demo of SUPIR, a practicing model scaling for photo-realistic image restoration.
|
| 493 |
-
The content added by SUPIR is <b><u>imagination, not real-world information</u></b>.
|
| 494 |
-
SUPIR is for beauty and illustration only.
|
| 495 |
-
Most of the processes last few minutes.
|
| 496 |
-
If you want to upscale AI-generated images, be noticed that <i>PixArt Sigma</i> space can directly generate 5984x5984 images.
|
| 497 |
-
Due to Gradio issues, the generated image is slightly less satured than the original.
|
| 498 |
-
Please leave a <a href="https://huggingface.co/spaces/Fabrice-TIERCELIN/SUPIR/discussions/new">message in discussion</a> if you encounter issues.
|
| 499 |
-
You can also use <a href="https://huggingface.co/spaces/gokaygokay/AuraSR">AuraSR</a> to upscale x4.
|
| 500 |
-
|
| 501 |
-
<p><center><a href="https://arxiv.org/abs/2401.13627">Paper</a>   <a href="http://supir.xpixel.group/">Project Page</a>   <a href="https://huggingface.co/blog/MonsterMMORPG/supir-sota-image-upscale-better-than-magnific-ai">Local Install Guide</a></center></p>
|
| 502 |
-
<p><center><a style="display:inline-block" href='https://github.com/Fanghua-Yu/SUPIR'><img alt="GitHub Repo stars" src="https://img.shields.io/github/stars/Fanghua-Yu/SUPIR?style=social"></a></center></p>
|
| 503 |
-
"""
|
| 504 |
-
|
| 505 |
-
|
| 506 |
-
claim_md = """
|
| 507 |
-
## **Piracy**
|
| 508 |
-
The images are not stored but the logs are saved during a month.
|
| 509 |
-
## **How to get SUPIR**
|
| 510 |
-
You can get SUPIR on HuggingFace by [duplicating this space](https://huggingface.co/spaces/Fabrice-TIERCELIN/SUPIR?duplicate=true) and set GPU.
|
| 511 |
-
You can also install SUPIR on your computer following [this tutorial](https://huggingface.co/blog/MonsterMMORPG/supir-sota-image-upscale-better-than-magnific-ai).
|
| 512 |
-
You can install _Pinokio_ on your computer and then install _SUPIR_ into it. It should be quite easy if you have an Nvidia GPU.
|
| 513 |
-
## **Terms of use**
|
| 514 |
-
By using this service, users are required to agree to the following terms: The service is a research preview intended for non-commercial use only. It only provides limited safety measures and may generate offensive content. It must not be used for any illegal, harmful, violent, racist, or sexual purposes. The service may collect user dialogue data for future research. Please submit a feedback to us if you get any inappropriate answer! We will collect those to keep improving our models. For an optimal experience, please use desktop computers for this demo, as mobile devices may compromise its quality.
|
| 515 |
-
## **License**
|
| 516 |
-
The service is a research preview intended for non-commercial use only, subject to the model [License](https://github.com/Fanghua-Yu/SUPIR) of SUPIR.
|
| 517 |
-
"""
|
| 518 |
-
|
| 519 |
-
js = """
|
| 520 |
-
function createGradioAnimation() {
|
| 521 |
-
window.addEventListener("beforeunload", function(e) {
|
| 522 |
-
if (document.getElementById('dummy_button_id') && !document.getElementById('dummy_button_id').disabled) {
|
| 523 |
-
var confirmationMessage = 'A process is still running. '
|
| 524 |
-
+ 'If you leave before saving, your changes will be lost.';
|
| 525 |
-
|
| 526 |
-
(e || window.event).returnValue = confirmationMessage;
|
| 527 |
-
}
|
| 528 |
-
return confirmationMessage;
|
| 529 |
-
});
|
| 530 |
-
return 'Animation created';
|
| 531 |
-
}
|
| 532 |
-
"""
|
| 533 |
-
|
| 534 |
-
# Gradio interface
|
| 535 |
-
with gr.Blocks(js=js) as interface:
|
| 536 |
-
if torch.cuda.device_count() == 0:
|
| 537 |
-
with gr.Row():
|
| 538 |
-
gr.HTML("""
|
| 539 |
-
<p style="background-color: red;"><big><big><big><b>⚠️To use SUPIR, <a href="https://huggingface.co/spaces/Fabrice-TIERCELIN/SUPIR?duplicate=true">duplicate this space</a> and set a GPU with 30 GB VRAM.</b>
|
| 540 |
-
|
| 541 |
-
You can't use SUPIR directly here because this space runs on a CPU, which is not enough for SUPIR. Please provide <a href="https://huggingface.co/spaces/Fabrice-TIERCELIN/SUPIR/discussions/new">feedback</a> if you have issues.
|
| 542 |
-
</big></big></big></p>
|
| 543 |
-
""")
|
| 544 |
-
gr.HTML(title_html)
|
| 545 |
-
|
| 546 |
-
input_image = gr.Image(label="Input (*.png, *.webp, *.jpeg, *.jpg, *.gif, *.bmp, *.avif)", show_label=True, type="filepath", height=600, elem_id="image-input")
|
| 547 |
-
rotation = gr.Radio([["No rotation", 0], ["⤵ Rotate +90°", 90], ["↩ Return 180°", 180], ["⤴ Rotate -90°", -90]], label="Orientation correction", info="Will apply the following rotation before restoring the image; the AI needs a good orientation to understand the content", value=0, visible=False)
|
| 548 |
-
with gr.Group():
|
| 549 |
-
prompt = gr.Textbox(label="Image description", info="Help the AI understand what the image represents; describe as much as possible, especially the details we can't see on the original image; you can write in any language", value="", placeholder="A 33 years old man, walking, in the street, Santiago, morning, Summer, photorealistic", lines=3)
|
| 550 |
-
prompt_hint = gr.HTML("You can use a <a href='"'https://huggingface.co/spaces/badayvedat/LLaVA'"'>LlaVa space</a> to auto-generate the description of your image.")
|
| 551 |
-
upscale = gr.Radio([["x1", 1], ["x2", 2], ["x3", 3], ["x4", 4], ["x5", 5], ["x6", 6], ["x7", 7], ["x8", 8], ["x9", 9], ["x10", 10]], label="Upscale factor", info="Resolution x1 to x10", value=2)
|
| 552 |
-
output_format = gr.Radio([["As input", "input"], ["*.png", "png"], ["*.webp", "webp"], ["*.jpeg", "jpeg"], ["*.gif", "gif"], ["*.bmp", "bmp"]], label="Image format for result", info="File extention", value="input")
|
| 553 |
-
allocation = gr.Slider(label="GPU allocation time (in seconds)", info='lower=May abort run, higher=Quota penalty for next runs; only useful for ZeroGPU; for instance set to 88 when you have the message "You have exceeded your GPU quota (180s requested vs. 89s left)."', value=180, minimum=60, maximum=320, step=1)
|
| 554 |
-
|
| 555 |
-
with gr.Accordion("Pre-denoising (optional)", open=False):
|
| 556 |
-
gamma_correction = gr.Slider(label="Gamma Correction", info = "lower=lighter, higher=darker", minimum=0.1, maximum=2.0, value=1.0, step=0.1)
|
| 557 |
-
denoise_button = gr.Button(value="Pre-denoise")
|
| 558 |
-
denoise_image = gr.Image(label="Denoised image", show_label=True, type="filepath", sources=[], interactive = False, height=600, elem_id="image-s1")
|
| 559 |
-
denoise_information = gr.HTML(value="If present, the denoised image will be used for the restoration instead of the input image.", visible=False)
|
| 560 |
-
|
| 561 |
-
with gr.Accordion("Advanced options", open=False):
|
| 562 |
-
a_prompt = gr.Textbox(label="Additional image description",
|
| 563 |
-
info="Completes the main image description",
|
| 564 |
-
value='Cinematic, High Contrast, highly detailed, taken using a Canon EOS R '
|
| 565 |
-
'camera, hyper detailed photo - realistic maximum detail, 32k, Color '
|
| 566 |
-
'Grading, ultra HD, extreme meticulous detailing, skin pore detailing, clothing fabric detailing, '
|
| 567 |
-
'hyper sharpness, perfect without deformations.',
|
| 568 |
-
lines=3)
|
| 569 |
-
n_prompt = gr.Textbox(label="Negative image description",
|
| 570 |
-
info="Disambiguate by listing what the image does NOT represent",
|
| 571 |
-
value='painting, oil painting, illustration, drawing, art, sketch, anime, '
|
| 572 |
-
'cartoon, CG Style, 3D render, unreal engine, blurring, aliasing, pixel, unsharp, weird textures, ugly, dirty, messy, '
|
| 573 |
-
'worst quality, low quality, frames, watermark, signature, jpeg artifacts, '
|
| 574 |
-
'deformed, lowres, over-smooth',
|
| 575 |
-
lines=3)
|
| 576 |
-
edm_steps = gr.Slider(label="Steps", info="lower=faster, higher=more details; too many steps create a checker effect", minimum=1, maximum=200, value=default_setting.edm_steps if torch.cuda.device_count() > 0 else 1, step=1)
|
| 577 |
-
num_samples = gr.Slider(label="Num Samples", info="Number of generated results", minimum=1, maximum=4 if not args.use_image_slider else 1
|
| 578 |
-
, value=1, step=1)
|
| 579 |
-
min_size = gr.Slider(label="Minimum size", info="Minimum height, minimum width of the result", minimum=32, maximum=4096, value=1024, step=32)
|
| 580 |
-
downscale = gr.Radio([["/1", 1], ["/2", 2], ["/3", 3], ["/4", 4], ["/5", 5], ["/6", 6], ["/7", 7], ["/8", 8], ["/9", 9], ["/10", 10]], label="Pre-downscale factor", info="Reducing blurred image reduce the process time", value=1)
|
| 581 |
-
with gr.Row():
|
| 582 |
-
with gr.Column():
|
| 583 |
-
model_select = gr.Radio([["💃 Quality (v0-Q)", "v0-Q"], ["🎯 Fidelity (v0-F)", "v0-F"]], label="Model Selection", info="Pretrained model", value="v0-Q")
|
| 584 |
-
with gr.Column():
|
| 585 |
-
color_fix_type = gr.Radio([["None", "None"], ["AdaIn (improve as a photo)", "AdaIn"], ["Wavelet (for JPEG artifacts)", "Wavelet"]], label="Color-Fix Type", info="AdaIn=Improve following a style, Wavelet=For JPEG artifacts", value="AdaIn")
|
| 586 |
-
s_cfg = gr.Slider(label="Text Guidance Scale", info="lower=follow the image, higher=follow the prompt", minimum=1.0, maximum=15.0,
|
| 587 |
-
value=default_setting.s_cfg_Quality if torch.cuda.device_count() > 0 else 1.0, step=0.1)
|
| 588 |
-
s_stage2 = gr.Slider(label="Restoring Guidance Strength", minimum=0., maximum=1., value=1., step=0.05)
|
| 589 |
-
s_stage1 = gr.Slider(label="Pre-denoising Guidance Strength", minimum=-1.0, maximum=6.0, value=-1.0, step=1.0)
|
| 590 |
-
s_churn = gr.Slider(label="S-Churn", minimum=0, maximum=40, value=5, step=1)
|
| 591 |
-
s_noise = gr.Slider(label="S-Noise", minimum=1.0, maximum=1.1, value=1.003, step=0.001)
|
| 592 |
-
with gr.Row():
|
| 593 |
-
with gr.Column():
|
| 594 |
-
linear_CFG = gr.Checkbox(label="Linear CFG", value=True)
|
| 595 |
-
spt_linear_CFG = gr.Slider(label="CFG Start", minimum=1.0,
|
| 596 |
-
maximum=9.0, value=default_setting.spt_linear_CFG_Quality if torch.cuda.device_count() > 0 else 1.0, step=0.5)
|
| 597 |
-
with gr.Column():
|
| 598 |
-
linear_s_stage2 = gr.Checkbox(label="Linear Restoring Guidance", value=False)
|
| 599 |
-
spt_linear_s_stage2 = gr.Slider(label="Guidance Start", minimum=0.,
|
| 600 |
-
maximum=1., value=0., step=0.05)
|
| 601 |
-
with gr.Column():
|
| 602 |
-
diff_dtype = gr.Radio([["fp32 (precision)", "fp32"], ["fp16 (medium)", "fp16"], ["bf16 (speed)", "bf16"]], label="Diffusion Data Type", value="fp32")
|
| 603 |
-
with gr.Column():
|
| 604 |
-
ae_dtype = gr.Radio([["fp32 (precision)", "fp32"], ["bf16 (speed)", "bf16"]], label="Auto-Encoder Data Type", value="fp32")
|
| 605 |
-
randomize_seed = gr.Checkbox(label = "\U0001F3B2 Randomize seed", value = True, info = "If checked, result is always different")
|
| 606 |
-
seed = gr.Slider(label="Seed", minimum=0, maximum=max_64_bit_int, step=1, randomize=True)
|
| 607 |
-
with gr.Group():
|
| 608 |
-
param_setting = gr.Radio(["Quality", "Fidelity"], label="Presetting", value = "Quality")
|
| 609 |
-
restart_button = gr.Button(value="Apply presetting")
|
| 610 |
-
|
| 611 |
-
with gr.Column():
|
| 612 |
-
diffusion_button = gr.Button(value="🚀 Upscale/Restore", variant = "primary", elem_id = "process_button")
|
| 613 |
-
reset_btn = gr.Button(value="🧹 Reinit page", variant="stop", elem_id="reset_button", visible = False)
|
| 614 |
-
dummy_button = gr.Button(elem_id = "dummy_button_id", visible = False, interactive = False)
|
| 615 |
-
|
| 616 |
-
warning = gr.HTML(elem_id="warning", value = "<center><big>Your computer must <u>not</u> enter into standby mode.</big><br/>On Chrome, you can force to keep a tab alive in <code>chrome://discards/</code></center>", visible = False)
|
| 617 |
-
restore_information = gr.HTML(value = "Restart the process to get another result.", visible = False)
|
| 618 |
-
result_slider = ImageSlider(label = 'Comparator', show_label = False, interactive = False, elem_id = "slider1", show_download_button = False, visible = False)
|
| 619 |
-
result_gallery = gr.Gallery(label = 'Downloadable results', show_label = True, interactive = False, elem_id = "gallery1")
|
| 620 |
-
result_example = gr.HTML(elem_id="result_example", visible = False)
|
| 621 |
-
result_image_example = gr.Image(label="Example Image", visible = False)
|
| 622 |
-
|
| 623 |
-
with gr.Row(elem_id="cache", visible=False):
|
| 624 |
-
input_image_debug=gr.Image(label="Image Debug", type="filepath")
|
| 625 |
-
prompt_debug=gr.Textbox(label="Prompt Debug", value='')
|
| 626 |
-
upscale_debug=gr.Radio([["x1", 1], ["x2", 2], ["x3", 3], ["x4", 4], ["x5", 5], ["x6", 6], ["x7", 7], ["x8", 8], ["x9", 9], ["x10", 10]], label="Upscale factor Debug", info="Resolution x1 to x10", value=2)
|
| 627 |
-
|
| 628 |
-
gr.Examples(
|
| 629 |
-
label = "Examples for cache",
|
| 630 |
-
examples = [
|
| 631 |
-
[
|
| 632 |
-
"./Examples/Example2.jpeg",
|
| 633 |
-
0,
|
| 634 |
-
"./Examples/Example2.jpeg",
|
| 635 |
-
"La cabeza de un gato atigrado, en una casa, fotorrealista, 8k, extremadamente detallada",
|
| 636 |
-
"Cinematic, High Contrast, highly detailed, taken using a Canon EOS R camera, hyper detailed photo - realistic maximum detail, 32k, Color Grading, ultra HD, extreme meticulous detailing, skin pore detailing, hyper sharpness, perfect without deformations.",
|
| 637 |
-
"painting, oil painting, illustration, drawing, art, sketch, anime, cartoon, CG Style, 3D render, unreal engine, blurring, aliasing, pixel, unsharp, weird textures, ugly, dirty, messy, worst quality, low quality, frames, watermark, signature, jpeg artifacts, deformed, lowres, over-smooth",
|
| 638 |
-
1, # num_samples
|
| 639 |
-
32, # min_size
|
| 640 |
-
1, # downscale
|
| 641 |
-
1, # upscale
|
| 642 |
-
100, # edm_steps
|
| 643 |
-
-1, # s_stage1
|
| 644 |
-
1, # s_stage2
|
| 645 |
-
7.5, # s_cfg
|
| 646 |
-
True, # randomize_seed
|
| 647 |
-
42, # seed
|
| 648 |
-
5, # s_churn
|
| 649 |
-
1.003, # s_noise
|
| 650 |
-
"Wavelet", # color_fix_type
|
| 651 |
-
"fp16", # diff_dtype
|
| 652 |
-
"bf16", # ae_dtype
|
| 653 |
-
1.0, # gamma_correction
|
| 654 |
-
True, # linear_CFG
|
| 655 |
-
4, # spt_linear_CFG
|
| 656 |
-
False, # linear_s_stage2
|
| 657 |
-
0., # spt_linear_s_stage2
|
| 658 |
-
"v0-Q", # model_select
|
| 659 |
-
"input", # output_format
|
| 660 |
-
60 # allocation
|
| 661 |
-
],
|
| 662 |
-
[
|
| 663 |
-
"./Examples/Example2.jpeg",
|
| 664 |
-
0,
|
| 665 |
-
"./Examples/Example2.jpeg",
|
| 666 |
-
"La cabeza de un gato atigrado, en una casa, fotorrealista, 8k, extremadamente detallada",
|
| 667 |
-
"Cinematic, High Contrast, highly detailed, taken using a Canon EOS R camera, hyper detailed photo - realistic maximum detail, 32k, Color Grading, ultra HD, extreme meticulous detailing, skin pore detailing, hyper sharpness, perfect without deformations.",
|
| 668 |
-
"painting, oil painting, illustration, drawing, art, sketch, anime, cartoon, CG Style, 3D render, unreal engine, blurring, aliasing, pixel, unsharp, weird textures, ugly, dirty, messy, worst quality, low quality, frames, watermark, signature, jpeg artifacts, deformed, lowres, over-smooth",
|
| 669 |
-
4, # num_samples
|
| 670 |
-
32, # min_size
|
| 671 |
-
1, # downscale
|
| 672 |
-
1, # upscale
|
| 673 |
-
100, # edm_steps
|
| 674 |
-
-1, # s_stage1
|
| 675 |
-
1, # s_stage2
|
| 676 |
-
7.5, # s_cfg
|
| 677 |
-
True, # randomize_seed
|
| 678 |
-
42, # seed
|
| 679 |
-
5, # s_churn
|
| 680 |
-
1.003, # s_noise
|
| 681 |
-
"Wavelet", # color_fix_type
|
| 682 |
-
"fp16", # diff_dtype
|
| 683 |
-
"bf16", # ae_dtype
|
| 684 |
-
1.0, # gamma_correction
|
| 685 |
-
True, # linear_CFG
|
| 686 |
-
4, # spt_linear_CFG
|
| 687 |
-
False, # linear_s_stage2
|
| 688 |
-
0., # spt_linear_s_stage2
|
| 689 |
-
"v0-Q", # model_select
|
| 690 |
-
"input", # output_format
|
| 691 |
-
60 # allocation
|
| 692 |
-
]
|
| 693 |
-
],
|
| 694 |
-
run_on_click = True,
|
| 695 |
-
fn = stage2_process_example,
|
| 696 |
-
inputs = [
|
| 697 |
-
input_image,
|
| 698 |
-
rotation,
|
| 699 |
-
denoise_image,
|
| 700 |
-
prompt,
|
| 701 |
-
a_prompt,
|
| 702 |
-
n_prompt,
|
| 703 |
-
num_samples,
|
| 704 |
-
min_size,
|
| 705 |
-
downscale,
|
| 706 |
-
upscale,
|
| 707 |
-
edm_steps,
|
| 708 |
-
s_stage1,
|
| 709 |
-
s_stage2,
|
| 710 |
-
s_cfg,
|
| 711 |
-
randomize_seed,
|
| 712 |
-
seed,
|
| 713 |
-
s_churn,
|
| 714 |
-
s_noise,
|
| 715 |
-
color_fix_type,
|
| 716 |
-
diff_dtype,
|
| 717 |
-
ae_dtype,
|
| 718 |
-
gamma_correction,
|
| 719 |
-
linear_CFG,
|
| 720 |
-
spt_linear_CFG,
|
| 721 |
-
linear_s_stage2,
|
| 722 |
-
spt_linear_s_stage2,
|
| 723 |
-
model_select,
|
| 724 |
-
output_format,
|
| 725 |
-
allocation
|
| 726 |
-
],
|
| 727 |
-
outputs = [
|
| 728 |
-
result_example,
|
| 729 |
-
warning,
|
| 730 |
-
dummy_button,
|
| 731 |
-
prompt_hint
|
| 732 |
-
],
|
| 733 |
-
cache_examples = True,
|
| 734 |
-
)
|
| 735 |
-
|
| 736 |
-
gr.Examples(
|
| 737 |
-
label = "Examples for demo",
|
| 738 |
-
examples = [
|
| 739 |
-
[
|
| 740 |
-
"./Examples/Example1.png",
|
| 741 |
-
0,
|
| 742 |
-
"./Examples/Example1.png",
|
| 743 |
-
"Group of people, walking, happy, in the street, photorealistic, 8k, extremely detailled",
|
| 744 |
-
"Cinematic, High Contrast, highly detailed, taken using a Canon EOS R camera, hyper detailed photo - realistic maximum detail, 32k, Color Grading, ultra HD, extreme meticulous detailing, skin pore detailing, hyper sharpness, perfect without deformations.",
|
| 745 |
-
"painting, oil painting, illustration, drawing, art, sketch, anime, cartoon, CG Style, 3D render, unreal engine, blurring, aliasing, pixel, unsharp, weird textures, ugly, dirty, messy, worst quality, low quality, frames, watermark, signature, jpeg artifacts, deformed, lowres, over-smooth",
|
| 746 |
-
2, # num_samples
|
| 747 |
-
1024, # min_size
|
| 748 |
-
1, # downscale
|
| 749 |
-
8, # upscale
|
| 750 |
-
100, # edm_steps
|
| 751 |
-
-1, # s_stage1
|
| 752 |
-
1, # s_stage2
|
| 753 |
-
7.5, # s_cfg
|
| 754 |
-
False, # randomize_seed
|
| 755 |
-
42, # seed
|
| 756 |
-
5, # s_churn
|
| 757 |
-
1.003, # s_noise
|
| 758 |
-
"AdaIn", # color_fix_type
|
| 759 |
-
"fp16", # diff_dtype
|
| 760 |
-
"bf16", # ae_dtype
|
| 761 |
-
1.0, # gamma_correction
|
| 762 |
-
True, # linear_CFG
|
| 763 |
-
4, # spt_linear_CFG
|
| 764 |
-
False, # linear_s_stage2
|
| 765 |
-
0., # spt_linear_s_stage2
|
| 766 |
-
"v0-Q", # model_select
|
| 767 |
-
"input", # output_format
|
| 768 |
-
180 # allocation
|
| 769 |
-
],
|
| 770 |
-
[
|
| 771 |
-
"./Examples/Example2.jpeg",
|
| 772 |
-
0,
|
| 773 |
-
"./Examples/Example2.jpeg",
|
| 774 |
-
"La cabeza de un gato atigrado, en una casa, fotorrealista, 8k, extremadamente detallada",
|
| 775 |
-
"Cinematic, High Contrast, highly detailed, taken using a Canon EOS R camera, hyper detailed photo - realistic maximum detail, 32k, Color Grading, ultra HD, extreme meticulous detailing, skin pore detailing, hyper sharpness, perfect without deformations.",
|
| 776 |
-
"painting, oil painting, illustration, drawing, art, sketch, anime, cartoon, CG Style, 3D render, unreal engine, blurring, aliasing, pixel, unsharp, weird textures, ugly, dirty, messy, worst quality, low quality, frames, watermark, signature, jpeg artifacts, deformed, lowres, over-smooth",
|
| 777 |
-
1, # num_samples
|
| 778 |
-
1024, # min_size
|
| 779 |
-
1, # downscale
|
| 780 |
-
1, # upscale
|
| 781 |
-
100, # edm_steps
|
| 782 |
-
-1, # s_stage1
|
| 783 |
-
1, # s_stage2
|
| 784 |
-
7.5, # s_cfg
|
| 785 |
-
False, # randomize_seed
|
| 786 |
-
42, # seed
|
| 787 |
-
5, # s_churn
|
| 788 |
-
1.003, # s_noise
|
| 789 |
-
"Wavelet", # color_fix_type
|
| 790 |
-
"fp16", # diff_dtype
|
| 791 |
-
"bf16", # ae_dtype
|
| 792 |
-
1.0, # gamma_correction
|
| 793 |
-
True, # linear_CFG
|
| 794 |
-
4, # spt_linear_CFG
|
| 795 |
-
False, # linear_s_stage2
|
| 796 |
-
0., # spt_linear_s_stage2
|
| 797 |
-
"v0-Q", # model_select
|
| 798 |
-
"input", # output_format
|
| 799 |
-
60 # allocation
|
| 800 |
-
],
|
| 801 |
-
[
|
| 802 |
-
"./Examples/Example3.webp",
|
| 803 |
-
0,
|
| 804 |
-
"./Examples/Example3.webp",
|
| 805 |
-
"A red apple",
|
| 806 |
-
"Cinematic, High Contrast, highly detailed, taken using a Canon EOS R camera, hyper detailed photo - realistic maximum detail, 32k, Color Grading, ultra HD, extreme meticulous detailing, skin pore detailing, hyper sharpness, perfect without deformations.",
|
| 807 |
-
"painting, oil painting, illustration, drawing, art, sketch, anime, cartoon, CG Style, 3D render, unreal engine, blurring, aliasing, pixel, unsharp, weird textures, ugly, dirty, messy, worst quality, low quality, frames, watermark, signature, jpeg artifacts, deformed, lowres, over-smooth",
|
| 808 |
-
1, # num_samples
|
| 809 |
-
1024, # min_size
|
| 810 |
-
1, # downscale
|
| 811 |
-
1, # upscale
|
| 812 |
-
200, # edm_steps
|
| 813 |
-
-1, # s_stage1
|
| 814 |
-
1, # s_stage2
|
| 815 |
-
7.5, # s_cfg
|
| 816 |
-
False, # randomize_seed
|
| 817 |
-
42, # seed
|
| 818 |
-
5, # s_churn
|
| 819 |
-
1.003, # s_noise
|
| 820 |
-
"Wavelet", # color_fix_type
|
| 821 |
-
"fp16", # diff_dtype
|
| 822 |
-
"bf16", # ae_dtype
|
| 823 |
-
1.0, # gamma_correction
|
| 824 |
-
True, # linear_CFG
|
| 825 |
-
4, # spt_linear_CFG
|
| 826 |
-
False, # linear_s_stage2
|
| 827 |
-
0., # spt_linear_s_stage2
|
| 828 |
-
"v0-Q", # model_select
|
| 829 |
-
"input", # output_format
|
| 830 |
-
180 # allocation
|
| 831 |
-
],
|
| 832 |
-
[
|
| 833 |
-
"./Examples/Example3.webp",
|
| 834 |
-
0,
|
| 835 |
-
"./Examples/Example3.webp",
|
| 836 |
-
"A red marble",
|
| 837 |
-
"Cinematic, High Contrast, highly detailed, taken using a Canon EOS R camera, hyper detailed photo - realistic maximum detail, 32k, Color Grading, ultra HD, extreme meticulous detailing, skin pore detailing, hyper sharpness, perfect without deformations.",
|
| 838 |
-
"painting, oil painting, illustration, drawing, art, sketch, anime, cartoon, CG Style, 3D render, unreal engine, blurring, aliasing, pixel, unsharp, weird textures, ugly, dirty, messy, worst quality, low quality, frames, watermark, signature, jpeg artifacts, deformed, lowres, over-smooth",
|
| 839 |
-
1, # num_samples
|
| 840 |
-
1024, # min_size
|
| 841 |
-
1, # downscale
|
| 842 |
-
1, # upscale
|
| 843 |
-
200, # edm_steps
|
| 844 |
-
-1, # s_stage1
|
| 845 |
-
1, # s_stage2
|
| 846 |
-
7.5, # s_cfg
|
| 847 |
-
False, # randomize_seed
|
| 848 |
-
42, # seed
|
| 849 |
-
5, # s_churn
|
| 850 |
-
1.003, # s_noise
|
| 851 |
-
"Wavelet", # color_fix_type
|
| 852 |
-
"fp16", # diff_dtype
|
| 853 |
-
"bf16", # ae_dtype
|
| 854 |
-
1.0, # gamma_correction
|
| 855 |
-
True, # linear_CFG
|
| 856 |
-
4, # spt_linear_CFG
|
| 857 |
-
False, # linear_s_stage2
|
| 858 |
-
0., # spt_linear_s_stage2
|
| 859 |
-
"v0-Q", # model_select
|
| 860 |
-
"input", # output_format
|
| 861 |
-
180 # allocation
|
| 862 |
-
],
|
| 863 |
-
],
|
| 864 |
-
run_on_click = True,
|
| 865 |
-
fn = stage2_process,
|
| 866 |
-
inputs = [
|
| 867 |
-
input_image,
|
| 868 |
-
rotation,
|
| 869 |
-
denoise_image,
|
| 870 |
-
prompt,
|
| 871 |
-
a_prompt,
|
| 872 |
-
n_prompt,
|
| 873 |
-
num_samples,
|
| 874 |
-
min_size,
|
| 875 |
-
downscale,
|
| 876 |
-
upscale,
|
| 877 |
-
edm_steps,
|
| 878 |
-
s_stage1,
|
| 879 |
-
s_stage2,
|
| 880 |
-
s_cfg,
|
| 881 |
-
randomize_seed,
|
| 882 |
-
seed,
|
| 883 |
-
s_churn,
|
| 884 |
-
s_noise,
|
| 885 |
-
color_fix_type,
|
| 886 |
-
diff_dtype,
|
| 887 |
-
ae_dtype,
|
| 888 |
-
gamma_correction,
|
| 889 |
-
linear_CFG,
|
| 890 |
-
spt_linear_CFG,
|
| 891 |
-
linear_s_stage2,
|
| 892 |
-
spt_linear_s_stage2,
|
| 893 |
-
model_select,
|
| 894 |
-
output_format,
|
| 895 |
-
allocation
|
| 896 |
-
],
|
| 897 |
-
outputs = [
|
| 898 |
-
result_slider,
|
| 899 |
-
result_gallery,
|
| 900 |
-
restore_information,
|
| 901 |
-
reset_btn,
|
| 902 |
-
warning,
|
| 903 |
-
dummy_button
|
| 904 |
-
],
|
| 905 |
-
cache_examples = False,
|
| 906 |
-
)
|
| 907 |
-
|
| 908 |
-
with gr.Row():
|
| 909 |
-
gr.Markdown(claim_md)
|
| 910 |
-
|
| 911 |
-
input_image.upload(fn = check_upload, inputs = [
|
| 912 |
-
input_image
|
| 913 |
-
], outputs = [
|
| 914 |
-
rotation
|
| 915 |
-
], queue = False, show_progress = False)
|
| 916 |
-
|
| 917 |
-
denoise_button.click(fn = check_and_update, inputs = [
|
| 918 |
-
input_image
|
| 919 |
-
], outputs = [warning, dummy_button], queue = False, show_progress = False).success(fn = stage1_process, inputs = [
|
| 920 |
-
input_image,
|
| 921 |
-
gamma_correction,
|
| 922 |
-
diff_dtype,
|
| 923 |
-
ae_dtype
|
| 924 |
-
], outputs=[
|
| 925 |
-
denoise_image,
|
| 926 |
-
denoise_information,
|
| 927 |
-
dummy_button
|
| 928 |
-
])
|
| 929 |
-
|
| 930 |
-
diffusion_button.click(fn = update_seed, inputs = [
|
| 931 |
-
randomize_seed,
|
| 932 |
-
seed
|
| 933 |
-
], outputs = [
|
| 934 |
-
seed
|
| 935 |
-
], queue = False, show_progress = False).then(fn = check_and_update, inputs = [
|
| 936 |
-
input_image
|
| 937 |
-
], outputs = [warning, dummy_button], queue = False, show_progress = False).success(fn=stage2_process, inputs = [
|
| 938 |
-
input_image,
|
| 939 |
-
rotation,
|
| 940 |
-
denoise_image,
|
| 941 |
-
prompt,
|
| 942 |
-
a_prompt,
|
| 943 |
-
n_prompt,
|
| 944 |
-
num_samples,
|
| 945 |
-
min_size,
|
| 946 |
-
downscale,
|
| 947 |
-
upscale,
|
| 948 |
-
edm_steps,
|
| 949 |
-
s_stage1,
|
| 950 |
-
s_stage2,
|
| 951 |
-
s_cfg,
|
| 952 |
-
randomize_seed,
|
| 953 |
-
seed,
|
| 954 |
-
s_churn,
|
| 955 |
-
s_noise,
|
| 956 |
-
color_fix_type,
|
| 957 |
-
diff_dtype,
|
| 958 |
-
ae_dtype,
|
| 959 |
-
gamma_correction,
|
| 960 |
-
linear_CFG,
|
| 961 |
-
spt_linear_CFG,
|
| 962 |
-
linear_s_stage2,
|
| 963 |
-
spt_linear_s_stage2,
|
| 964 |
-
model_select,
|
| 965 |
-
output_format,
|
| 966 |
-
allocation
|
| 967 |
-
], outputs = [
|
| 968 |
-
result_slider,
|
| 969 |
-
result_gallery,
|
| 970 |
-
restore_information,
|
| 971 |
-
reset_btn,
|
| 972 |
-
warning,
|
| 973 |
-
dummy_button
|
| 974 |
-
]).success(fn = log_information, inputs = [
|
| 975 |
-
result_gallery
|
| 976 |
-
], outputs = [], queue = False, show_progress = False)
|
| 977 |
-
|
| 978 |
-
result_gallery.change(on_select_result, [result_slider, result_gallery], result_slider)
|
| 979 |
-
result_gallery.select(on_select_result, [result_slider, result_gallery], result_slider)
|
| 980 |
-
result_example.change(on_render_image_example, result_example, result_image_example)
|
| 981 |
-
|
| 982 |
-
restart_button.click(fn = load_and_reset, inputs = [
|
| 983 |
-
param_setting
|
| 984 |
-
], outputs = [
|
| 985 |
-
edm_steps,
|
| 986 |
-
s_cfg,
|
| 987 |
-
s_stage2,
|
| 988 |
-
s_stage1,
|
| 989 |
-
s_churn,
|
| 990 |
-
s_noise,
|
| 991 |
-
a_prompt,
|
| 992 |
-
n_prompt,
|
| 993 |
-
color_fix_type,
|
| 994 |
-
linear_CFG,
|
| 995 |
-
spt_linear_CFG,
|
| 996 |
-
linear_s_stage2,
|
| 997 |
-
spt_linear_s_stage2,
|
| 998 |
-
model_select
|
| 999 |
-
])
|
| 1000 |
-
|
| 1001 |
-
reset_btn.click(fn = reset, inputs = [], outputs = [
|
| 1002 |
-
input_image,
|
| 1003 |
-
rotation,
|
| 1004 |
-
denoise_image,
|
| 1005 |
-
prompt,
|
| 1006 |
-
a_prompt,
|
| 1007 |
-
n_prompt,
|
| 1008 |
-
num_samples,
|
| 1009 |
-
min_size,
|
| 1010 |
-
downscale,
|
| 1011 |
-
upscale,
|
| 1012 |
-
edm_steps,
|
| 1013 |
-
s_stage1,
|
| 1014 |
-
s_stage2,
|
| 1015 |
-
s_cfg,
|
| 1016 |
-
randomize_seed,
|
| 1017 |
-
seed,
|
| 1018 |
-
s_churn,
|
| 1019 |
-
s_noise,
|
| 1020 |
-
color_fix_type,
|
| 1021 |
-
diff_dtype,
|
| 1022 |
-
ae_dtype,
|
| 1023 |
-
gamma_correction,
|
| 1024 |
-
linear_CFG,
|
| 1025 |
-
spt_linear_CFG,
|
| 1026 |
-
linear_s_stage2,
|
| 1027 |
-
spt_linear_s_stage2,
|
| 1028 |
-
model_select,
|
| 1029 |
-
output_format,
|
| 1030 |
-
allocation
|
| 1031 |
-
], queue = False, show_progress = False)
|
| 1032 |
-
|
| 1033 |
-
def handle_field_debug_change(input_image_debug_data, prompt_debug_data, upscale_debug_data):
|
| 1034 |
-
input_image_debug_value[0] = input_image_debug_data
|
| 1035 |
-
prompt_debug_value[0] = prompt_debug_data
|
| 1036 |
-
upscale_debug_value[0] = upscale_debug_data
|
| 1037 |
-
return []
|
| 1038 |
-
|
| 1039 |
-
input_image_debug.upload(
|
| 1040 |
-
fn=handle_field_debug_change,
|
| 1041 |
-
inputs=[input_image_debug, prompt_debug, upscale_debug],
|
| 1042 |
-
outputs=[]
|
| 1043 |
-
)
|
| 1044 |
-
|
| 1045 |
-
prompt_debug.change(
|
| 1046 |
-
fn=handle_field_debug_change,
|
| 1047 |
-
inputs=[input_image_debug, prompt_debug, upscale_debug],
|
| 1048 |
-
outputs=[]
|
| 1049 |
-
)
|
| 1050 |
-
|
| 1051 |
-
upscale_debug.change(
|
| 1052 |
-
fn=handle_field_debug_change,
|
| 1053 |
-
inputs=[input_image_debug, prompt_debug, upscale_debug],
|
| 1054 |
-
outputs=[]
|
| 1055 |
-
)
|
| 1056 |
-
|
| 1057 |
-
interface.queue(10).launch(mcp_server=True, ssr_mode=False)
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
# PyTorch 2.8 (temporary hack)
|
| 3 |
+
os.system('pip install --upgrade --pre --extra-index-url https://download.pytorch.org/whl/nightly/cu126 "torch<2.9" spaces')
|
| 4 |
+
|
| 5 |
+
# --- 1. Model Download and Setup (Diffusers Backend) ---
|
| 6 |
+
try:
|
| 7 |
+
import spaces
|
| 8 |
+
except:
|
| 9 |
+
class spaces():
|
| 10 |
+
def GPU(*args, **kwargs):
|
| 11 |
+
def decorator(function):
|
| 12 |
+
return lambda *dummy_args, **dummy_kwargs: function(*dummy_args, **dummy_kwargs)
|
| 13 |
+
return decorator
|
| 14 |
+
|
| 15 |
+
import torch
|
| 16 |
+
from diffusers import FlowMatchEulerDiscreteScheduler
|
| 17 |
+
from diffusers.pipelines.wan.pipeline_wan_i2v import WanImageToVideoPipeline
|
| 18 |
+
from diffusers.models.transformers.transformer_wan import WanTransformer3DModel
|
| 19 |
+
from diffusers.utils.export_utils import export_to_video
|
| 20 |
+
import gradio as gr
|
| 21 |
+
import tempfile
|
| 22 |
+
import time
|
| 23 |
+
from datetime import datetime
|
| 24 |
+
import numpy as np
|
| 25 |
+
from PIL import Image
|
| 26 |
+
import random
|
| 27 |
+
import math
|
| 28 |
+
import gc
|
| 29 |
+
from gradio_client import Client, handle_file # Import for API call
|
| 30 |
+
|
| 31 |
+
# Import the optimization function from the separate file
|
| 32 |
+
from optimization import optimize_pipeline_
|
| 33 |
+
|
| 34 |
+
# --- Constants and Model Loading ---
|
| 35 |
+
MODEL_ID = "Wan-AI/Wan2.2-I2V-A14B-Diffusers"
|
| 36 |
+
|
| 37 |
+
# --- NEW: Flexible Dimension Constants ---
|
| 38 |
+
MAX_DIMENSION = 832
|
| 39 |
+
MIN_DIMENSION = 480
|
| 40 |
+
DIMENSION_MULTIPLE = 16
|
| 41 |
+
SQUARE_SIZE = 480
|
| 42 |
+
|
| 43 |
+
MAX_SEED = np.iinfo(np.int32).max
|
| 44 |
+
|
| 45 |
+
FIXED_FPS = 24
|
| 46 |
+
MIN_FRAMES_MODEL = 8
|
| 47 |
+
MAX_FRAMES_MODEL = 81
|
| 48 |
+
|
| 49 |
+
MIN_DURATION = round(MIN_FRAMES_MODEL/FIXED_FPS, 1)
|
| 50 |
+
MAX_DURATION = round(MAX_FRAMES_MODEL/FIXED_FPS, 1)
|
| 51 |
+
|
| 52 |
+
input_image_debug_value = [None]
|
| 53 |
+
end_image_debug_value = [None]
|
| 54 |
+
prompt_debug_value = [None]
|
| 55 |
+
total_second_length_debug_value = [None]
|
| 56 |
+
|
| 57 |
+
default_negative_prompt = "Vibrant colors, overexposure, static, blurred details, subtitles, error, style, artwork, painting, image, still, overall gray, worst quality, low quality, JPEG compression residue, ugly, mutilated, extra fingers, poorly drawn hands, poorly drawn faces, deformed, disfigured, malformed limbs, fused fingers, still image, cluttered background, three legs, many people in the background, walking backwards, overexposure, jumpcut, crossfader, "
|
| 58 |
+
|
| 59 |
+
print("Loading transformer...")
|
| 60 |
+
|
| 61 |
+
transformer = WanTransformer3DModel.from_pretrained('cbensimon/Wan2.2-I2V-A14B-bf16-Diffusers',
|
| 62 |
+
subfolder='transformer',
|
| 63 |
+
torch_dtype=torch.bfloat16,
|
| 64 |
+
device_map='cuda',
|
| 65 |
+
)
|
| 66 |
+
|
| 67 |
+
print("Loadingtransformer 2...")
|
| 68 |
+
|
| 69 |
+
transformer_2 = WanTransformer3DModel.from_pretrained('cbensimon/Wan2.2-I2V-A14B-bf16-Diffusers',
|
| 70 |
+
subfolder='transformer_2',
|
| 71 |
+
torch_dtype=torch.bfloat16,
|
| 72 |
+
device_map='cuda',
|
| 73 |
+
)
|
| 74 |
+
|
| 75 |
+
print("Loading models into memory. This may take a few minutes...")
|
| 76 |
+
|
| 77 |
+
pipe = WanImageToVideoPipeline.from_pretrained(
|
| 78 |
+
MODEL_ID,
|
| 79 |
+
transformer = transformer,
|
| 80 |
+
transformer_2 = transformer_2,
|
| 81 |
+
torch_dtype=torch.bfloat16,
|
| 82 |
+
)
|
| 83 |
+
print("Loading scheduler...")
|
| 84 |
+
pipe.scheduler = FlowMatchEulerDiscreteScheduler.from_config(pipe.scheduler.config, shift=8.0)
|
| 85 |
+
pipe.to('cuda')
|
| 86 |
+
|
| 87 |
+
print("Clean cache...")
|
| 88 |
+
for i in range(3):
|
| 89 |
+
gc.collect()
|
| 90 |
+
torch.cuda.synchronize()
|
| 91 |
+
torch.cuda.empty_cache()
|
| 92 |
+
|
| 93 |
+
print("Optimizing pipeline...")
|
| 94 |
+
|
| 95 |
+
optimize_pipeline_(pipe,
|
| 96 |
+
image=Image.new('RGB', (MAX_DIMENSION, MIN_DIMENSION)),
|
| 97 |
+
prompt='prompt',
|
| 98 |
+
height=MIN_DIMENSION,
|
| 99 |
+
width=MAX_DIMENSION,
|
| 100 |
+
num_frames=MAX_FRAMES_MODEL,
|
| 101 |
+
)
|
| 102 |
+
print("All models loaded and optimized. Gradio app is ready.")
|
| 103 |
+
|
| 104 |
+
|
| 105 |
+
# --- 2. Image Processing and Application Logic ---
|
| 106 |
+
def generate_end_frame(start_img, gen_prompt, progress=gr.Progress(track_tqdm=True)):
|
| 107 |
+
"""Calls an external Gradio API to generate an image."""
|
| 108 |
+
if start_img is None:
|
| 109 |
+
raise gr.Error("Please provide a Start Frame first.")
|
| 110 |
+
|
| 111 |
+
hf_token = os.getenv("HF_TOKEN")
|
| 112 |
+
if not hf_token:
|
| 113 |
+
raise gr.Error("HF_TOKEN not found in environment variables. Please set it in your Space secrets.")
|
| 114 |
+
|
| 115 |
+
with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tmpfile:
|
| 116 |
+
start_img.save(tmpfile.name)
|
| 117 |
+
tmp_path = tmpfile.name
|
| 118 |
+
|
| 119 |
+
progress(0.1, desc="Connecting to image generation API...")
|
| 120 |
+
client = Client("multimodalart/nano-banana-private")
|
| 121 |
+
|
| 122 |
+
progress(0.5, desc=f"Generating with prompt: '{gen_prompt}'...")
|
| 123 |
+
try:
|
| 124 |
+
result = client.predict(
|
| 125 |
+
prompt=gen_prompt,
|
| 126 |
+
images=[
|
| 127 |
+
{"image": handle_file(tmp_path)}
|
| 128 |
+
],
|
| 129 |
+
manual_token=hf_token,
|
| 130 |
+
api_name="/unified_image_generator"
|
| 131 |
+
)
|
| 132 |
+
finally:
|
| 133 |
+
os.remove(tmp_path)
|
| 134 |
+
|
| 135 |
+
progress(1.0, desc="Done!")
|
| 136 |
+
print(result)
|
| 137 |
+
return result
|
| 138 |
+
|
| 139 |
+
def switch_to_upload_tab():
|
| 140 |
+
"""Returns a gr.Tabs update to switch to the first tab."""
|
| 141 |
+
return gr.Tabs(selected="upload_tab")
|
| 142 |
+
|
| 143 |
+
|
| 144 |
+
def process_image_for_video(image: Image.Image) -> Image.Image:
|
| 145 |
+
"""
|
| 146 |
+
Resizes an image based on the following rules for video generation:
|
| 147 |
+
1. The longest side will be scaled down to MAX_DIMENSION if it's larger.
|
| 148 |
+
2. The shortest side will be scaled up to MIN_DIMENSION if it's smaller.
|
| 149 |
+
3. The final dimensions will be rounded to the nearest multiple of DIMENSION_MULTIPLE.
|
| 150 |
+
4. Square images are resized to a fixed SQUARE_SIZE.
|
| 151 |
+
The aspect ratio is preserved as closely as possible.
|
| 152 |
+
"""
|
| 153 |
+
width, height = image.size
|
| 154 |
+
|
| 155 |
+
# Rule 4: Handle square images
|
| 156 |
+
if width == height:
|
| 157 |
+
return image.resize((SQUARE_SIZE, SQUARE_SIZE), Image.Resampling.LANCZOS)
|
| 158 |
+
|
| 159 |
+
# Determine target dimensions while preserving aspect ratio
|
| 160 |
+
aspect_ratio = width / height
|
| 161 |
+
new_width, new_height = width, height
|
| 162 |
+
|
| 163 |
+
# Rule 1: Scale down if too large
|
| 164 |
+
if new_width > MAX_DIMENSION or new_height > MAX_DIMENSION:
|
| 165 |
+
if aspect_ratio > 1: # Landscape
|
| 166 |
+
scale = MAX_DIMENSION / new_width
|
| 167 |
+
else: # Portrait
|
| 168 |
+
scale = MAX_DIMENSION / new_height
|
| 169 |
+
new_width *= scale
|
| 170 |
+
new_height *= scale
|
| 171 |
+
|
| 172 |
+
# Rule 2: Scale up if too small
|
| 173 |
+
if new_width < MIN_DIMENSION or new_height < MIN_DIMENSION:
|
| 174 |
+
if aspect_ratio > 1: # Landscape
|
| 175 |
+
scale = MIN_DIMENSION / new_height
|
| 176 |
+
else: # Portrait
|
| 177 |
+
scale = MIN_DIMENSION / new_width
|
| 178 |
+
new_width *= scale
|
| 179 |
+
new_height *= scale
|
| 180 |
+
|
| 181 |
+
# Rule 3: Round to the nearest multiple of DIMENSION_MULTIPLE
|
| 182 |
+
final_width = int(round(new_width / DIMENSION_MULTIPLE) * DIMENSION_MULTIPLE)
|
| 183 |
+
final_height = int(round(new_height / DIMENSION_MULTIPLE) * DIMENSION_MULTIPLE)
|
| 184 |
+
|
| 185 |
+
# Ensure final dimensions are at least the minimum
|
| 186 |
+
final_width = max(final_width, MIN_DIMENSION if aspect_ratio < 1 else SQUARE_SIZE)
|
| 187 |
+
final_height = max(final_height, MIN_DIMENSION if aspect_ratio > 1 else SQUARE_SIZE)
|
| 188 |
+
|
| 189 |
+
|
| 190 |
+
return image.resize((final_width, final_height), Image.Resampling.LANCZOS)
|
| 191 |
+
|
| 192 |
+
def resize_and_crop_to_match(target_image, reference_image):
|
| 193 |
+
"""Resizes and center-crops the target image to match the reference image's dimensions."""
|
| 194 |
+
ref_width, ref_height = reference_image.size
|
| 195 |
+
target_width, target_height = target_image.size
|
| 196 |
+
scale = max(ref_width / target_width, ref_height / target_height)
|
| 197 |
+
new_width, new_height = int(target_width * scale), int(target_height * scale)
|
| 198 |
+
resized = target_image.resize((new_width, new_height), Image.Resampling.LANCZOS)
|
| 199 |
+
left, top = (new_width - ref_width) // 2, (new_height - ref_height) // 2
|
| 200 |
+
return resized.crop((left, top, left + ref_width, top + ref_height))
|
| 201 |
+
|
| 202 |
+
def generate_video(
|
| 203 |
+
start_image_pil,
|
| 204 |
+
end_image_pil,
|
| 205 |
+
prompt,
|
| 206 |
+
negative_prompt=default_negative_prompt,
|
| 207 |
+
duration_seconds=2.1,
|
| 208 |
+
steps=8,
|
| 209 |
+
guidance_scale=1,
|
| 210 |
+
guidance_scale_2=1,
|
| 211 |
+
seed=42,
|
| 212 |
+
randomize_seed=True,
|
| 213 |
+
progress=gr.Progress(track_tqdm=True)
|
| 214 |
+
):
|
| 215 |
+
start = time.time()
|
| 216 |
+
allocation_time = 120
|
| 217 |
+
factor = 1
|
| 218 |
+
|
| 219 |
+
if input_image_debug_value[0] is not None or end_image_debug_value[0] is not None or prompt_debug_value[0] is not None or total_second_length_debug_value[0] is not None:
|
| 220 |
+
start_image_pil = input_image_debug_value[0]
|
| 221 |
+
end_image_pil = end_image_debug_value[0]
|
| 222 |
+
prompt = prompt_debug_value[0]
|
| 223 |
+
duration_seconds = total_second_length_debug_value[0]
|
| 224 |
+
allocation_time = min(duration_seconds * 60 * 100, 10 * 60)
|
| 225 |
+
factor = 3.1
|
| 226 |
+
|
| 227 |
+
if start_image_pil is None or end_image_pil is None:
|
| 228 |
+
raise gr.Error("Please upload both a start and an end image.")
|
| 229 |
+
|
| 230 |
+
# Step 1: Process the start image to get our target dimensions based on the new rules.
|
| 231 |
+
processed_start_image = process_image_for_video(start_image_pil)
|
| 232 |
+
|
| 233 |
+
# Step 2: Make the end image match the *exact* dimensions of the processed start image.
|
| 234 |
+
processed_end_image = resize_and_crop_to_match(end_image_pil, processed_start_image)
|
| 235 |
+
|
| 236 |
+
target_height, target_width = processed_start_image.height, processed_start_image.width
|
| 237 |
+
|
| 238 |
+
# Handle seed and frame count
|
| 239 |
+
current_seed = random.randint(0, MAX_SEED) if randomize_seed else int(seed)
|
| 240 |
+
num_frames = np.clip(int(round(duration_seconds * FIXED_FPS)), MIN_FRAMES_MODEL, MAX_FRAMES_MODEL)
|
| 241 |
+
|
| 242 |
+
progress(0.2, desc=f"Generating {num_frames} frames at {target_width}x{target_height} (seed: {current_seed})...")
|
| 243 |
+
|
| 244 |
+
progress(0.1, desc="Preprocessing images...")
|
| 245 |
+
output_video, download_button, seed_input = generate_video_on_gpu(
|
| 246 |
+
start_image_pil,
|
| 247 |
+
end_image_pil,
|
| 248 |
+
prompt,
|
| 249 |
+
negative_prompt,
|
| 250 |
+
duration_seconds,
|
| 251 |
+
steps,
|
| 252 |
+
guidance_scale,
|
| 253 |
+
guidance_scale_2,
|
| 254 |
+
seed,
|
| 255 |
+
randomize_seed,
|
| 256 |
+
progress,
|
| 257 |
+
allocation_time,
|
| 258 |
+
factor,
|
| 259 |
+
target_height,
|
| 260 |
+
target_width,
|
| 261 |
+
current_seed,
|
| 262 |
+
num_frames,
|
| 263 |
+
processed_start_image,
|
| 264 |
+
processed_end_image
|
| 265 |
+
)
|
| 266 |
+
progress(1.0, desc="Done!")
|
| 267 |
+
end = time.time()
|
| 268 |
+
secondes = int(end - start)
|
| 269 |
+
minutes = math.floor(secondes / 60)
|
| 270 |
+
secondes = secondes - (minutes * 60)
|
| 271 |
+
hours = math.floor(minutes / 60)
|
| 272 |
+
minutes = minutes - (hours * 60)
|
| 273 |
+
information = ("Start the process again if you want a different result. " if randomize_seed else "") + \
|
| 274 |
+
"The video been generated in " + \
|
| 275 |
+
((str(hours) + " h, ") if hours != 0 else "") + \
|
| 276 |
+
((str(minutes) + " min, ") if hours != 0 or minutes != 0 else "") + \
|
| 277 |
+
str(secondes) + " sec. " + \
|
| 278 |
+
"The video resolution is " + str(target_width) + \
|
| 279 |
+
" pixels large and " + str(target_height) + \
|
| 280 |
+
" pixels high, so a resolution of " + f'{target_width * target_height:,}' + " pixels."
|
| 281 |
+
return [output_video, download_button, seed_input, gr.update(value = information, visible = True)]
|
| 282 |
+
|
| 283 |
+
def get_duration(
|
| 284 |
+
start_image_pil,
|
| 285 |
+
end_image_pil,
|
| 286 |
+
prompt,
|
| 287 |
+
negative_prompt,
|
| 288 |
+
duration_seconds,
|
| 289 |
+
steps,
|
| 290 |
+
guidance_scale,
|
| 291 |
+
guidance_scale_2,
|
| 292 |
+
seed,
|
| 293 |
+
randomize_seed,
|
| 294 |
+
progress,
|
| 295 |
+
allocation_time,
|
| 296 |
+
factor,
|
| 297 |
+
target_height,
|
| 298 |
+
target_width,
|
| 299 |
+
current_seed,
|
| 300 |
+
num_frames,
|
| 301 |
+
processed_start_image,
|
| 302 |
+
processed_end_image
|
| 303 |
+
):
|
| 304 |
+
return allocation_time
|
| 305 |
+
|
| 306 |
+
@spaces.GPU(duration=get_duration)
|
| 307 |
+
def generate_video_on_gpu(
|
| 308 |
+
start_image_pil,
|
| 309 |
+
end_image_pil,
|
| 310 |
+
prompt,
|
| 311 |
+
negative_prompt,
|
| 312 |
+
duration_seconds,
|
| 313 |
+
steps,
|
| 314 |
+
guidance_scale,
|
| 315 |
+
guidance_scale_2,
|
| 316 |
+
seed,
|
| 317 |
+
randomize_seed,
|
| 318 |
+
progress,
|
| 319 |
+
allocation_time,
|
| 320 |
+
factor,
|
| 321 |
+
target_height,
|
| 322 |
+
target_width,
|
| 323 |
+
current_seed,
|
| 324 |
+
num_frames,
|
| 325 |
+
processed_start_image,
|
| 326 |
+
processed_end_image
|
| 327 |
+
):
|
| 328 |
+
"""
|
| 329 |
+
Generates a video by interpolating between a start and end image, guided by a text prompt,
|
| 330 |
+
using the diffusers Wan2.2 pipeline.
|
| 331 |
+
"""
|
| 332 |
+
print("Generate a video with the prompt: " + prompt)
|
| 333 |
+
|
| 334 |
+
output_frames_list = pipe(
|
| 335 |
+
image=processed_start_image,
|
| 336 |
+
last_image=processed_end_image,
|
| 337 |
+
prompt=prompt,
|
| 338 |
+
negative_prompt=negative_prompt,
|
| 339 |
+
height=target_height,
|
| 340 |
+
width=target_width,
|
| 341 |
+
num_frames=int(num_frames * factor),
|
| 342 |
+
guidance_scale=float(guidance_scale),
|
| 343 |
+
guidance_scale_2=float(guidance_scale_2),
|
| 344 |
+
num_inference_steps=int(steps),
|
| 345 |
+
generator=torch.Generator(device="cuda").manual_seed(current_seed),
|
| 346 |
+
).frames[0]
|
| 347 |
+
|
| 348 |
+
progress(0.9, desc="Encoding and saving video...")
|
| 349 |
+
|
| 350 |
+
video_path = 'wan_' + datetime.now().strftime("%Y-%m-%d_%H-%M-%S.%f") + '.mp4'
|
| 351 |
+
print("Exporting video: " + video_path)
|
| 352 |
+
|
| 353 |
+
export_to_video(output_frames_list, video_path, fps=FIXED_FPS)
|
| 354 |
+
print("Video exported: " + video_path)
|
| 355 |
+
|
| 356 |
+
return video_path, gr.update(value = video_path, visible = True), current_seed
|
| 357 |
+
|
| 358 |
+
|
| 359 |
+
# --- 3. Gradio User Interface ---
|
| 360 |
+
|
| 361 |
+
with gr.Blocks() as app:
|
| 362 |
+
gr.Markdown("# Wan 2.2 First/Last Frame Video Fast")
|
| 363 |
+
gr.Markdown("Based on the [Wan 2.2 First/Last Frame workflow](https://www.reddit.com/r/StableDiffusion/comments/1me4306/psa_wan_22_does_first_frame_last_frame_out_of_the/), applied to 🧨 Diffusers + [lightx2v/Wan2.2-Lightning](https://huggingface.co/lightx2v/Wan2.2-Lightning) 8-step LoRA")
|
| 364 |
+
|
| 365 |
+
with gr.Row(elem_id="general_items"):
|
| 366 |
+
with gr.Column():
|
| 367 |
+
with gr.Group(elem_id="group_all"):
|
| 368 |
+
with gr.Row():
|
| 369 |
+
start_image = gr.Image(type="pil", label="Start Frame", sources=["upload", "clipboard"])
|
| 370 |
+
# Capture the Tabs component in a variable and assign IDs to tabs
|
| 371 |
+
with gr.Tabs(elem_id="group_tabs") as tabs:
|
| 372 |
+
with gr.TabItem("Upload", id="upload_tab"):
|
| 373 |
+
end_image = gr.Image(type="pil", label="End Frame", sources=["upload", "clipboard"])
|
| 374 |
+
with gr.TabItem("Generate", id="generate_tab"):
|
| 375 |
+
generate_5seconds = gr.Button("Generate scene 5 seconds in the future", elem_id="fivesec")
|
| 376 |
+
gr.Markdown("Generate a custom end-frame with an edit model like [Nano Banana](https://huggingface.co/spaces/multimodalart/nano-banana) or [Qwen Image Edit](https://huggingface.co/spaces/multimodalart/Qwen-Image-Edit-Fast)", elem_id="or_item")
|
| 377 |
+
prompt = gr.Textbox(label="Prompt", info="Describe the transition between the two images")
|
| 378 |
+
|
| 379 |
+
with gr.Accordion("Advanced Settings", open=False):
|
| 380 |
+
duration_seconds_input = gr.Slider(minimum=MIN_DURATION, maximum=MAX_DURATION, step=0.1, value=2.1, label="Video Duration (seconds)", info=f"Clamped to model's {MIN_FRAMES_MODEL}-{MAX_FRAMES_MODEL} frames at {FIXED_FPS}fps.")
|
| 381 |
+
negative_prompt_input = gr.Textbox(label="Negative Prompt", value=default_negative_prompt, lines=3)
|
| 382 |
+
steps_slider = gr.Slider(minimum=1, maximum=30, step=1, value=8, label="Inference Steps")
|
| 383 |
+
guidance_scale_input = gr.Slider(minimum=0.0, maximum=10.0, step=0.5, value=1.0, label="Guidance Scale - high noise")
|
| 384 |
+
guidance_scale_2_input = gr.Slider(minimum=0.0, maximum=10.0, step=0.5, value=1.0, label="Guidance Scale - low noise")
|
| 385 |
+
with gr.Row():
|
| 386 |
+
seed_input = gr.Slider(label="Seed", minimum=0, maximum=MAX_SEED, step=1, value=42)
|
| 387 |
+
randomize_seed_checkbox = gr.Checkbox(label="Randomize seed", value=True)
|
| 388 |
+
|
| 389 |
+
generate_button = gr.Button("Generate Video", variant="primary")
|
| 390 |
+
|
| 391 |
+
with gr.Column():
|
| 392 |
+
output_video = gr.Video(label="Generated Video", autoplay = True, loop = True)
|
| 393 |
+
download_button = gr.DownloadButton(label="Download", visible = True)
|
| 394 |
+
video_information = gr.HTML(value = "", visible = True)
|
| 395 |
+
|
| 396 |
+
# Main video generation button
|
| 397 |
+
ui_inputs = [
|
| 398 |
+
start_image,
|
| 399 |
+
end_image,
|
| 400 |
+
prompt,
|
| 401 |
+
negative_prompt_input,
|
| 402 |
+
duration_seconds_input,
|
| 403 |
+
steps_slider,
|
| 404 |
+
guidance_scale_input,
|
| 405 |
+
guidance_scale_2_input,
|
| 406 |
+
seed_input,
|
| 407 |
+
randomize_seed_checkbox
|
| 408 |
+
]
|
| 409 |
+
ui_outputs = [output_video, download_button, seed_input, video_information]
|
| 410 |
+
|
| 411 |
+
generate_button.click(
|
| 412 |
+
fn=generate_video,
|
| 413 |
+
inputs=ui_inputs,
|
| 414 |
+
outputs=ui_outputs
|
| 415 |
+
)
|
| 416 |
+
|
| 417 |
+
generate_5seconds.click(
|
| 418 |
+
fn=switch_to_upload_tab,
|
| 419 |
+
inputs=None,
|
| 420 |
+
outputs=[tabs]
|
| 421 |
+
).then(
|
| 422 |
+
fn=lambda img: generate_end_frame(img, "this image is a still frame from a movie. generate a new frame with what happens on this scene 5 seconds in the future"),
|
| 423 |
+
inputs=[start_image],
|
| 424 |
+
outputs=[end_image]
|
| 425 |
+
).success(
|
| 426 |
+
fn=generate_video,
|
| 427 |
+
inputs=ui_inputs,
|
| 428 |
+
outputs=ui_outputs
|
| 429 |
+
)
|
| 430 |
+
|
| 431 |
+
with gr.Row(visible=False):
|
| 432 |
+
input_image_debug=gr.Image(type="pil", label="Image Debug")
|
| 433 |
+
end_image_debug=gr.Image(type="pil", label="End Image Debug")
|
| 434 |
+
prompt_debug=gr.Textbox(label="Prompt Debug")
|
| 435 |
+
total_second_length_debug=gr.Slider(label="Additional Video Length to Generate (seconds) Debug", minimum=1, maximum=120, value=10, step=0.1)
|
| 436 |
+
gr.Examples(
|
| 437 |
+
examples=[["ugly_sonic.jpeg", "squatting_sonic.png", "the character dodges the missiles"]],
|
| 438 |
+
inputs=[start_image, end_image, prompt],
|
| 439 |
+
outputs=ui_outputs,
|
| 440 |
+
fn=generate_video,
|
| 441 |
+
run_on_click=True,
|
| 442 |
+
cache_examples=True,
|
| 443 |
+
cache_mode='lazy',
|
| 444 |
+
)
|
| 445 |
+
|
| 446 |
+
gr.Examples(
|
| 447 |
+
label = "Examples from demo",
|
| 448 |
+
examples = [
|
| 449 |
+
["poli_tower.png", "tower_takes_off.png", "the man turns around"],
|
| 450 |
+
["ugly_sonic.jpeg", "squatting_sonic.png", "the character dodges the missiles"],
|
| 451 |
+
["capyabara_zoomed.png", "capyabara.webp", "a dramatic dolly zoom"],
|
| 452 |
+
],
|
| 453 |
+
inputs = [start_image, end_image, prompt],
|
| 454 |
+
outputs = ui_outputs,
|
| 455 |
+
fn = generate_video,
|
| 456 |
+
cache_examples = False,
|
| 457 |
+
)
|
| 458 |
+
|
| 459 |
+
def handle_field_debug_change(input_image_debug_data, end_image_debug_data, prompt_debug_data, total_second_length_debug_data):
|
| 460 |
+
input_image_debug_value[0] = input_image_debug_data
|
| 461 |
+
end_image_debug_value[0] = end_image_debug_data
|
| 462 |
+
prompt_debug_value[0] = prompt_debug_data
|
| 463 |
+
total_second_length_debug_value[0] = total_second_length_debug_data
|
| 464 |
+
return []
|
| 465 |
+
|
| 466 |
+
input_image_debug.upload(
|
| 467 |
+
fn=handle_field_debug_change,
|
| 468 |
+
inputs=[input_image_debug, end_image_debug, prompt_debug, total_second_length_debug],
|
| 469 |
+
outputs=[]
|
| 470 |
+
)
|
| 471 |
+
|
| 472 |
+
end_image_debug.upload(
|
| 473 |
+
fn=handle_field_debug_change,
|
| 474 |
+
inputs=[input_image_debug, end_image_debug, prompt_debug, total_second_length_debug],
|
| 475 |
+
outputs=[]
|
| 476 |
+
)
|
| 477 |
+
|
| 478 |
+
prompt_debug.change(
|
| 479 |
+
fn=handle_field_debug_change,
|
| 480 |
+
inputs=[input_image_debug, end_image_debug, prompt_debug, total_second_length_debug],
|
| 481 |
+
outputs=[]
|
| 482 |
+
)
|
| 483 |
+
|
| 484 |
+
total_second_length_debug.change(
|
| 485 |
+
fn=handle_field_debug_change,
|
| 486 |
+
inputs=[input_image_debug, end_image_debug, prompt_debug, total_second_length_debug],
|
| 487 |
+
outputs=[]
|
| 488 |
+
)
|
| 489 |
+
|
| 490 |
+
if __name__ == "__main__":
|
| 491 |
+
app.launch(mcp_server=True, share=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
optimization.py
CHANGED
|
@@ -65,6 +65,7 @@ def optimize_pipeline_(pipeline: Callable[P, Any], *args: P.args, **kwargs: P.kw
|
|
| 65 |
|
| 66 |
@spaces.GPU(duration=1500)
|
| 67 |
def compile_transformer():
|
|
|
|
| 68 |
|
| 69 |
# This LoRA fusion part remains the same
|
| 70 |
pipeline.load_lora_weights(
|
|
@@ -115,6 +116,7 @@ def optimize_pipeline_(pipeline: Callable[P, Any], *args: P.args, **kwargs: P.kw
|
|
| 115 |
compiled_2 = aoti_compile(exported_2, INDUCTOR_CONFIGS)
|
| 116 |
|
| 117 |
# Return the two compiled models
|
|
|
|
| 118 |
return compiled_1, compiled_2
|
| 119 |
|
| 120 |
|
|
|
|
| 65 |
|
| 66 |
@spaces.GPU(duration=1500)
|
| 67 |
def compile_transformer():
|
| 68 |
+
print("Start compile_transformer()")
|
| 69 |
|
| 70 |
# This LoRA fusion part remains the same
|
| 71 |
pipeline.load_lora_weights(
|
|
|
|
| 116 |
compiled_2 = aoti_compile(exported_2, INDUCTOR_CONFIGS)
|
| 117 |
|
| 118 |
# Return the two compiled models
|
| 119 |
+
print("End compile_transformer()")
|
| 120 |
return compiled_1, compiled_2
|
| 121 |
|
| 122 |
|
requirements.txt
CHANGED
|
@@ -1,43 +1,11 @@
|
|
| 1 |
-
|
| 2 |
|
| 3 |
-
|
| 4 |
-
|
| 5 |
-
|
| 6 |
-
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
wandb==0.20.1
|
| 13 |
-
httpx==0.28.1
|
| 14 |
-
transformers==4.43.0
|
| 15 |
-
accelerate==1.8.0
|
| 16 |
-
scikit-learn==1.7.0
|
| 17 |
-
einops==0.8.1
|
| 18 |
-
einops-exts==0.0.4
|
| 19 |
-
timm==1.0.15
|
| 20 |
-
openai-clip==1.0.1
|
| 21 |
-
fsspec==2025.5.1
|
| 22 |
-
kornia==0.8.1
|
| 23 |
-
matplotlib==3.10.3
|
| 24 |
-
ninja==1.11.1.4
|
| 25 |
-
omegaconf==2.3.0
|
| 26 |
-
opencv-python==4.11.0.86
|
| 27 |
-
pandas==2.3.0
|
| 28 |
-
pillow==11.2.1
|
| 29 |
-
pytorch-lightning==2.5.1.post0
|
| 30 |
-
PyYAML==6.0.2
|
| 31 |
-
scipy==1.15.3
|
| 32 |
-
tqdm==4.67.1
|
| 33 |
-
triton==3.3.0
|
| 34 |
-
urllib3==2.4.0
|
| 35 |
-
webdataset==0.2.111
|
| 36 |
-
xformers==0.0.30
|
| 37 |
-
facexlib==0.3.0
|
| 38 |
-
k-diffusion==0.1.1.post1
|
| 39 |
-
diffusers==0.33.1
|
| 40 |
-
imageio==2.37.0
|
| 41 |
-
pillow-heif==0.22.0
|
| 42 |
-
|
| 43 |
-
open-clip-torch==2.24.0
|
|
|
|
| 1 |
+
git+https://github.com/linoytsaban/diffusers.git@wan22-loras
|
| 2 |
|
| 3 |
+
transformers
|
| 4 |
+
accelerate
|
| 5 |
+
safetensors==0.6.2
|
| 6 |
+
sentencepiece==0.2.1
|
| 7 |
+
peft==0.17.1
|
| 8 |
+
ftfy==6.3.1
|
| 9 |
+
imageio-ffmpeg==0.6.0
|
| 10 |
+
opencv-python==4.12.0.88
|
| 11 |
+
torchao==0.11.0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|