Spaces:
Running on Zero
Running on Zero
update app
Browse files
app.py
CHANGED
|
@@ -1,726 +1,619 @@
|
|
| 1 |
-
import sys
|
| 2 |
-
|
| 3 |
-
import
|
| 4 |
-
import
|
| 5 |
-
|
| 6 |
-
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
import
|
| 12 |
-
import
|
| 13 |
-
import
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
import
|
| 18 |
-
from
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
""
|
| 56 |
-
|
| 57 |
-
|
| 58 |
-
|
| 59 |
-
|
| 60 |
-
|
| 61 |
-
|
| 62 |
-
|
| 63 |
-
|
| 64 |
-
|
| 65 |
-
|
| 66 |
-
|
| 67 |
-
|
| 68 |
-
|
| 69 |
-
|
| 70 |
-
|
| 71 |
-
|
| 72 |
-
|
| 73 |
-
|
| 74 |
-
|
| 75 |
-
|
| 76 |
-
|
| 77 |
-
|
| 78 |
-
|
| 79 |
-
|
| 80 |
-
|
| 81 |
-
|
| 82 |
-
|
| 83 |
-
|
| 84 |
-
|
| 85 |
-
|
| 86 |
-
|
| 87 |
-
|
| 88 |
-
|
| 89 |
-
|
| 90 |
-
|
| 91 |
-
|
| 92 |
-
|
| 93 |
-
|
| 94 |
-
|
| 95 |
-
|
| 96 |
-
|
| 97 |
-
|
| 98 |
-
|
| 99 |
-
|
| 100 |
-
|
| 101 |
-
|
| 102 |
-
|
| 103 |
-
|
| 104 |
-
|
| 105 |
-
|
| 106 |
-
|
| 107 |
-
|
| 108 |
-
|
| 109 |
-
|
| 110 |
-
|
| 111 |
-
|
| 112 |
-
|
| 113 |
-
|
| 114 |
-
|
| 115 |
-
|
| 116 |
-
|
| 117 |
-
|
| 118 |
-
|
| 119 |
-
|
| 120 |
-
|
| 121 |
-
|
| 122 |
-
|
| 123 |
-
|
| 124 |
-
|
| 125 |
-
|
| 126 |
-
|
| 127 |
-
|
| 128 |
-
|
| 129 |
-
|
| 130 |
-
|
| 131 |
-
|
| 132 |
-
|
| 133 |
-
|
| 134 |
-
|
| 135 |
-
|
| 136 |
-
|
| 137 |
-
|
| 138 |
-
|
| 139 |
-
|
| 140 |
-
|
| 141 |
-
|
| 142 |
-
|
| 143 |
-
|
| 144 |
-
|
| 145 |
-
|
| 146 |
-
|
| 147 |
-
|
| 148 |
-
|
| 149 |
-
|
| 150 |
-
|
| 151 |
-
|
| 152 |
-
|
| 153 |
-
|
| 154 |
-
|
| 155 |
-
|
| 156 |
-
#
|
| 157 |
-
|
| 158 |
-
|
| 159 |
-
|
| 160 |
-
|
| 161 |
-
|
| 162 |
-
|
| 163 |
-
|
| 164 |
-
|
| 165 |
-
|
| 166 |
-
|
| 167 |
-
|
| 168 |
-
|
| 169 |
-
|
| 170 |
-
|
| 171 |
-
|
| 172 |
-
|
| 173 |
-
|
| 174 |
-
|
| 175 |
-
|
| 176 |
-
|
| 177 |
-
|
| 178 |
-
|
| 179 |
-
|
| 180 |
-
|
| 181 |
-
|
| 182 |
-
|
| 183 |
-
|
| 184 |
-
|
| 185 |
-
|
| 186 |
-
|
| 187 |
-
|
| 188 |
-
|
| 189 |
-
|
| 190 |
-
|
| 191 |
-
|
| 192 |
-
|
| 193 |
-
|
| 194 |
-
|
| 195 |
-
|
| 196 |
-
|
| 197 |
-
|
| 198 |
-
|
| 199 |
-
|
| 200 |
-
|
| 201 |
-
|
| 202 |
-
|
| 203 |
-
|
| 204 |
-
|
| 205 |
-
|
| 206 |
-
|
| 207 |
-
|
| 208 |
-
|
| 209 |
-
|
| 210 |
-
|
| 211 |
-
|
| 212 |
-
|
| 213 |
-
|
| 214 |
-
|
| 215 |
-
|
| 216 |
-
|
| 217 |
-
|
| 218 |
-
|
| 219 |
-
|
| 220 |
-
|
| 221 |
-
|
| 222 |
-
|
| 223 |
-
|
| 224 |
-
|
| 225 |
-
|
| 226 |
-
|
| 227 |
-
|
| 228 |
-
|
| 229 |
-
|
| 230 |
-
|
| 231 |
-
|
| 232 |
-
|
| 233 |
-
|
| 234 |
-
|
| 235 |
-
|
| 236 |
-
|
| 237 |
-
|
| 238 |
-
|
| 239 |
-
|
| 240 |
-
|
| 241 |
-
|
| 242 |
-
|
| 243 |
-
|
| 244 |
-
|
| 245 |
-
|
| 246 |
-
|
| 247 |
-
|
| 248 |
-
|
| 249 |
-
|
| 250 |
-
|
| 251 |
-
|
| 252 |
-
|
| 253 |
-
|
| 254 |
-
|
| 255 |
-
|
| 256 |
-
|
| 257 |
-
|
| 258 |
-
|
| 259 |
-
|
| 260 |
-
|
| 261 |
-
)
|
| 262 |
-
|
| 263 |
-
|
| 264 |
-
|
| 265 |
-
|
| 266 |
-
|
| 267 |
-
|
| 268 |
-
|
| 269 |
-
|
| 270 |
-
|
| 271 |
-
|
| 272 |
-
|
| 273 |
-
|
| 274 |
-
|
| 275 |
-
|
| 276 |
-
|
| 277 |
-
|
| 278 |
-
|
| 279 |
-
|
| 280 |
-
|
| 281 |
-
|
| 282 |
-
|
| 283 |
-
|
| 284 |
-
|
| 285 |
-
|
| 286 |
-
|
| 287 |
-
|
| 288 |
-
|
| 289 |
-
|
| 290 |
-
|
| 291 |
-
|
| 292 |
-
|
| 293 |
-
|
| 294 |
-
|
| 295 |
-
|
| 296 |
-
|
| 297 |
-
|
| 298 |
-
|
| 299 |
-
|
| 300 |
-
|
| 301 |
-
|
| 302 |
-
|
| 303 |
-
|
| 304 |
-
|
| 305 |
-
|
| 306 |
-
|
| 307 |
-
|
| 308 |
-
|
| 309 |
-
|
| 310 |
-
|
| 311 |
-
|
| 312 |
-
|
| 313 |
-
|
| 314 |
-
|
| 315 |
-
|
| 316 |
-
|
| 317 |
-
|
| 318 |
-
|
| 319 |
-
|
| 320 |
-
|
| 321 |
-
|
| 322 |
-
|
| 323 |
-
|
| 324 |
-
|
| 325 |
-
|
| 326 |
-
|
| 327 |
-
|
| 328 |
-
|
| 329 |
-
|
| 330 |
-
|
| 331 |
-
|
| 332 |
-
|
| 333 |
-
|
| 334 |
-
|
| 335 |
-
|
| 336 |
-
|
| 337 |
-
|
| 338 |
-
|
| 339 |
-
|
| 340 |
-
|
| 341 |
-
|
| 342 |
-
|
| 343 |
-
|
| 344 |
-
|
| 345 |
-
|
| 346 |
-
|
| 347 |
-
|
| 348 |
-
|
| 349 |
-
|
| 350 |
-
|
| 351 |
-
|
| 352 |
-
|
| 353 |
-
|
| 354 |
-
|
| 355 |
-
|
| 356 |
-
|
| 357 |
-
|
| 358 |
-
|
| 359 |
-
|
| 360 |
-
|
| 361 |
-
|
| 362 |
-
|
| 363 |
-
|
| 364 |
-
|
| 365 |
-
|
| 366 |
-
|
| 367 |
-
output_path
|
| 368 |
-
|
| 369 |
-
|
| 370 |
-
|
| 371 |
-
|
| 372 |
-
|
| 373 |
-
|
| 374 |
-
|
| 375 |
-
|
| 376 |
-
|
| 377 |
-
|
| 378 |
-
|
| 379 |
-
|
| 380 |
-
|
| 381 |
-
|
| 382 |
-
|
| 383 |
-
|
| 384 |
-
|
| 385 |
-
|
| 386 |
-
|
| 387 |
-
)
|
| 388 |
-
|
| 389 |
-
|
| 390 |
-
|
| 391 |
-
|
| 392 |
-
|
| 393 |
-
|
| 394 |
-
|
| 395 |
-
|
| 396 |
-
|
| 397 |
-
|
| 398 |
-
|
| 399 |
-
|
| 400 |
-
|
| 401 |
-
|
| 402 |
-
|
| 403 |
-
|
| 404 |
-
|
| 405 |
-
|
| 406 |
-
|
| 407 |
-
|
| 408 |
-
|
| 409 |
-
|
| 410 |
-
|
| 411 |
-
|
| 412 |
-
|
| 413 |
-
|
| 414 |
-
|
| 415 |
-
|
| 416 |
-
|
| 417 |
-
|
| 418 |
-
|
| 419 |
-
|
| 420 |
-
|
| 421 |
-
|
| 422 |
-
|
| 423 |
-
|
| 424 |
-
|
| 425 |
-
|
| 426 |
-
|
| 427 |
-
|
| 428 |
-
|
| 429 |
-
|
| 430 |
-
|
| 431 |
-
|
| 432 |
-
|
| 433 |
-
|
| 434 |
-
|
| 435 |
-
|
| 436 |
-
|
| 437 |
-
|
| 438 |
-
|
| 439 |
-
|
| 440 |
-
|
| 441 |
-
|
| 442 |
-
|
| 443 |
-
|
| 444 |
-
|
| 445 |
-
|
| 446 |
-
|
| 447 |
-
|
| 448 |
-
|
| 449 |
-
|
| 450 |
-
|
| 451 |
-
|
| 452 |
-
|
| 453 |
-
|
| 454 |
-
|
| 455 |
-
|
| 456 |
-
|
| 457 |
-
|
| 458 |
-
|
| 459 |
-
|
| 460 |
-
|
| 461 |
-
|
| 462 |
-
|
| 463 |
-
|
| 464 |
-
|
| 465 |
-
|
| 466 |
-
|
| 467 |
-
|
| 468 |
-
|
| 469 |
-
|
| 470 |
-
|
| 471 |
-
|
| 472 |
-
|
| 473 |
-
|
| 474 |
-
|
| 475 |
-
|
| 476 |
-
|
| 477 |
-
|
| 478 |
-
|
| 479 |
-
|
| 480 |
-
|
| 481 |
-
|
| 482 |
-
|
| 483 |
-
|
| 484 |
-
|
| 485 |
-
|
| 486 |
-
|
| 487 |
-
|
| 488 |
-
|
| 489 |
-
|
| 490 |
-
|
| 491 |
-
|
| 492 |
-
|
| 493 |
-
|
| 494 |
-
|
| 495 |
-
|
| 496 |
-
|
| 497 |
-
|
| 498 |
-
|
| 499 |
-
|
| 500 |
-
|
| 501 |
-
|
| 502 |
-
|
| 503 |
-
|
| 504 |
-
|
| 505 |
-
|
| 506 |
-
|
| 507 |
-
|
| 508 |
-
|
| 509 |
-
}
|
| 510 |
-
|
| 511 |
-
|
| 512 |
-
|
| 513 |
-
|
| 514 |
-
|
| 515 |
-
|
| 516 |
-
|
| 517 |
-
|
| 518 |
-
|
| 519 |
-
|
| 520 |
-
|
| 521 |
-
|
| 522 |
-
|
| 523 |
-
|
| 524 |
-
|
| 525 |
-
|
| 526 |
-
|
| 527 |
-
|
| 528 |
-
|
| 529 |
-
|
| 530 |
-
|
| 531 |
-
|
| 532 |
-
|
| 533 |
-
|
| 534 |
-
|
| 535 |
-
|
| 536 |
-
|
| 537 |
-
|
| 538 |
-
|
| 539 |
-
|
| 540 |
-
|
| 541 |
-
|
| 542 |
-
|
| 543 |
-
|
| 544 |
-
|
| 545 |
-
|
| 546 |
-
|
| 547 |
-
|
| 548 |
-
|
| 549 |
-
|
| 550 |
-
|
| 551 |
-
|
| 552 |
-
|
| 553 |
-
|
| 554 |
-
|
| 555 |
-
|
| 556 |
-
|
| 557 |
-
|
| 558 |
-
|
| 559 |
-
|
| 560 |
-
|
| 561 |
-
|
| 562 |
-
|
| 563 |
-
|
| 564 |
-
|
| 565 |
-
|
| 566 |
-
|
| 567 |
-
|
| 568 |
-
|
| 569 |
-
|
| 570 |
-
|
| 571 |
-
|
| 572 |
-
|
| 573 |
-
|
| 574 |
-
|
| 575 |
-
|
| 576 |
-
|
| 577 |
-
|
| 578 |
-
|
| 579 |
-
|
| 580 |
-
|
| 581 |
-
|
| 582 |
-
|
| 583 |
-
|
| 584 |
-
|
| 585 |
-
|
| 586 |
-
|
| 587 |
-
|
| 588 |
-
|
| 589 |
-
|
| 590 |
-
|
| 591 |
-
|
| 592 |
-
|
| 593 |
-
|
| 594 |
-
|
| 595 |
-
|
| 596 |
-
|
| 597 |
-
|
| 598 |
-
|
| 599 |
-
|
| 600 |
-
|
| 601 |
-
|
| 602 |
-
|
| 603 |
-
|
| 604 |
-
|
| 605 |
-
|
| 606 |
-
|
| 607 |
-
|
| 608 |
-
|
| 609 |
-
|
| 610 |
-
|
| 611 |
-
|
| 612 |
-
|
| 613 |
-
|
| 614 |
-
|
| 615 |
-
|
| 616 |
-
|
| 617 |
-
|
| 618 |
-
|
| 619 |
-
|
| 620 |
-
with gr.Accordion("Advanced Settings", open=False, visible=False):
|
| 621 |
-
seed = gr.Slider(
|
| 622 |
-
label="Seed",
|
| 623 |
-
minimum=0,
|
| 624 |
-
maximum=MAX_SEED,
|
| 625 |
-
value=DEFAULT_SEED,
|
| 626 |
-
step=1
|
| 627 |
-
)
|
| 628 |
-
|
| 629 |
-
randomize_seed = gr.Checkbox(label="Randomize Seed", value=True)
|
| 630 |
-
|
| 631 |
-
|
| 632 |
-
with gr.Column(elem_id="step-column"):
|
| 633 |
-
output_video = gr.Video(label="Generated Video", autoplay=True, height=512)
|
| 634 |
-
|
| 635 |
-
with gr.Row():
|
| 636 |
-
|
| 637 |
-
with gr.Column():
|
| 638 |
-
radioanimated_duration = RadioAnimated(
|
| 639 |
-
choices=["3s", "5s", "10s"],
|
| 640 |
-
value="3s",
|
| 641 |
-
elem_id="radioanimated_duration"
|
| 642 |
-
)
|
| 643 |
-
|
| 644 |
-
duration = gr.Slider(
|
| 645 |
-
label="Duration (seconds)",
|
| 646 |
-
minimum=1.0,
|
| 647 |
-
maximum=10.0,
|
| 648 |
-
value=3.0,
|
| 649 |
-
step=0.1,
|
| 650 |
-
visible=False
|
| 651 |
-
)
|
| 652 |
-
|
| 653 |
-
with gr.Column():
|
| 654 |
-
radioanimated_resolution = RadioAnimated(
|
| 655 |
-
choices=["768x512", "512x512", "512x768"],
|
| 656 |
-
value=f"{DEFAULT_1_STAGE_WIDTH}x{DEFAULT_1_STAGE_HEIGHT}",
|
| 657 |
-
elem_id="radioanimated_resolution"
|
| 658 |
-
)
|
| 659 |
-
|
| 660 |
-
width = gr.Number(label="Width", value=DEFAULT_1_STAGE_WIDTH, precision=0, visible=False)
|
| 661 |
-
height = gr.Number(label="Height", value=DEFAULT_1_STAGE_HEIGHT, precision=0, visible=False)
|
| 662 |
-
|
| 663 |
-
|
| 664 |
-
generate_btn = gr.Button("🤩 Generate Video", variant="primary", elem_classes="button-gradient")
|
| 665 |
-
|
| 666 |
-
|
| 667 |
-
radioanimated_duration.change(
|
| 668 |
-
fn=apply_duration,
|
| 669 |
-
inputs=radioanimated_duration,
|
| 670 |
-
outputs=[duration],
|
| 671 |
-
api_visibility="private"
|
| 672 |
-
)
|
| 673 |
-
radioanimated_resolution.change(
|
| 674 |
-
fn=apply_resolution,
|
| 675 |
-
inputs=radioanimated_resolution,
|
| 676 |
-
outputs=[width, height],
|
| 677 |
-
api_visibility="private"
|
| 678 |
-
)
|
| 679 |
-
|
| 680 |
-
generate_btn.click(
|
| 681 |
-
fn=generate_video,
|
| 682 |
-
inputs=[
|
| 683 |
-
input_image,
|
| 684 |
-
prompt,
|
| 685 |
-
duration,
|
| 686 |
-
enhance_prompt,
|
| 687 |
-
seed,
|
| 688 |
-
randomize_seed,
|
| 689 |
-
height,
|
| 690 |
-
width,
|
| 691 |
-
],
|
| 692 |
-
outputs=[output_video,seed]
|
| 693 |
-
)
|
| 694 |
-
|
| 695 |
-
# Add example
|
| 696 |
-
gr.Examples(
|
| 697 |
-
examples=[
|
| 698 |
-
[
|
| 699 |
-
"supergirl.png",
|
| 700 |
-
"A fuzzy puppet superhero character resembling a female puppet with blonde hair and a blue superhero suit stands inside an icy cave made of frozen walls and icicles, she looks panicked and frantic, rapidly turning her head left and right and scanning the cave while waving her arms and shouting angrily and desperately, mouthing the words “where the hell is my dog,” her movements exaggerated and puppet-like with high energy and urgency, suddenly a second puppet dog bursts into frame from the side, jumping up excitedly and tackling her affectionately while licking her face repeatedly, she freezes in surprise and then breaks into relief and laughter as the dog continues licking her, the scene feels chaotic, comedic, and emotional with expressive puppet reactions, cinematic lighting, smooth camera motion, shallow depth of field, and high-quality puppet-style animation"
|
| 701 |
-
],
|
| 702 |
-
[
|
| 703 |
-
"highland.png",
|
| 704 |
-
"Realistic POV selfie-style video in a snowy, foggy field. Two shaggy Highland cows with long curved horns stand ahead. The camera is handheld and slightly shaky. The woman filming talks nervously and excitedly in a vlog tone: \"Oh my god guys… look how big those horns are… I’m kinda scared.\" The cow on the left walks toward the camera in a cute, bouncy, hopping way, curious and gentle. Snow crunches under its hooves, breath visible in the cold air. The horns look massive from the POV. As the cow gets very close, its wet nose with slight dripping fills part of the frame. She laughs nervously but reaches out and pets the cow. The cow makes deep, soft, interesting mooing and snorting sounds, calm and friendly. Ultra-realistic, natural lighting, immersive audio, documentary-style realism.",
|
| 705 |
-
],
|
| 706 |
-
[
|
| 707 |
-
"wednesday.png",
|
| 708 |
-
"A cinematic close-up of Wednesday Addams frozen mid-dance on a dark, blue-lit ballroom floor as students move indistinctly behind her, their footsteps and muffled music reduced to a distant, underwater thrum; the audio foregrounds her steady breathing and the faint rustle of fabric as she slowly raises one arm, never breaking eye contact with the camera, then after a deliberately long silence she speaks in a flat, dry, perfectly controlled voice, “I don’t dance… I vibe code,” each word crisp and unemotional, followed by an abrupt cutoff of her voice as the background sound swells slightly, reinforcing the deadpan humor, with precise lip sync, minimal facial movement, stark gothic lighting, and cinematic realism.",
|
| 709 |
-
],
|
| 710 |
-
[
|
| 711 |
-
"astronaut.png",
|
| 712 |
-
"An astronaut hatches from a fragile egg on the surface of the Moon, the shell cracking and peeling apart in gentle low-gravity motion. Fine lunar dust lifts and drifts outward with each movement, floating in slow arcs before settling back onto the ground. The astronaut pushes free in a deliberate, weightless motion, small fragments of the egg tumbling and spinning through the air. In the background, the deep darkness of space subtly shifts as stars glide with the camera's movement, emphasizing vast depth and scale. The camera performs a smooth, cinematic slow push-in, with natural parallax between the foreground dust, the astronaut, and the distant starfield. Ultra-realistic detail, physically accurate low-gravity motion, cinematic lighting, and a breath-taking, movie-like shot.",
|
| 713 |
-
]
|
| 714 |
-
|
| 715 |
-
],
|
| 716 |
-
fn=generate_video_example,
|
| 717 |
-
inputs=[input_image, prompt],
|
| 718 |
-
outputs = [output_video],
|
| 719 |
-
label="Example",
|
| 720 |
-
cache_examples=True,
|
| 721 |
-
)
|
| 722 |
-
|
| 723 |
-
|
| 724 |
-
|
| 725 |
-
if __name__ == "__main__":
|
| 726 |
-
demo.launch(ssr_mode=False, mcp_server=True, css=css)
|
|
|
|
| 1 |
+
import sys
|
| 2 |
+
import os
|
| 3 |
+
import gc
|
| 4 |
+
from pathlib import Path
|
| 5 |
+
import uuid
|
| 6 |
+
import tempfile
|
| 7 |
+
import time
|
| 8 |
+
import random
|
| 9 |
+
import numpy as np
|
| 10 |
+
import torch
|
| 11 |
+
import gradio as gr
|
| 12 |
+
import spaces
|
| 13 |
+
from typing import Iterable, Optional
|
| 14 |
+
from PIL import Image
|
| 15 |
+
|
| 16 |
+
# Gradio Theme Imports
|
| 17 |
+
from gradio.themes import Soft
|
| 18 |
+
from gradio.themes.utils import colors, fonts, sizes
|
| 19 |
+
|
| 20 |
+
# Add packages to Python path
|
| 21 |
+
current_dir = Path(__file__).parent
|
| 22 |
+
sys.path.insert(0, str(current_dir / "packages" / "ltx-pipelines" / "src"))
|
| 23 |
+
sys.path.insert(0, str(current_dir / "packages" / "ltx-core" / "src"))
|
| 24 |
+
|
| 25 |
+
import flash_attn_interface
|
| 26 |
+
from huggingface_hub import hf_hub_download, snapshot_download
|
| 27 |
+
|
| 28 |
+
# LTX Imports
|
| 29 |
+
from ltx_pipelines.distilled import DistilledPipeline
|
| 30 |
+
from ltx_core.model.video_vae import TilingConfig
|
| 31 |
+
from ltx_core.loader.primitives import LoraPathStrengthAndSDOps
|
| 32 |
+
from ltx_core.loader.sd_ops import LTXV_LORA_COMFY_RENAMING_MAP
|
| 33 |
+
from ltx_pipelines.utils import ModelLedger
|
| 34 |
+
from ltx_pipelines.utils.helpers import generate_enhanced_prompt
|
| 35 |
+
from ltx_pipelines.utils.constants import (
|
| 36 |
+
DEFAULT_SEED,
|
| 37 |
+
DEFAULT_1_STAGE_HEIGHT,
|
| 38 |
+
DEFAULT_1_STAGE_WIDTH,
|
| 39 |
+
DEFAULT_NUM_FRAMES,
|
| 40 |
+
DEFAULT_FRAME_RATE,
|
| 41 |
+
DEFAULT_LORA_STRENGTH,
|
| 42 |
+
)
|
| 43 |
+
|
| 44 |
+
# -----------------------------------------------------------------------------
|
| 45 |
+
# 1. OrangeRed Theme Configuration
|
| 46 |
+
# -----------------------------------------------------------------------------
|
| 47 |
+
|
| 48 |
+
colors.orange_red = colors.Color(
|
| 49 |
+
name="orange_red",
|
| 50 |
+
c50="#FFF0E5",
|
| 51 |
+
c100="#FFE0CC",
|
| 52 |
+
c200="#FFC299",
|
| 53 |
+
c300="#FFA366",
|
| 54 |
+
c400="#FF8533",
|
| 55 |
+
c500="#FF4500",
|
| 56 |
+
c600="#E63E00",
|
| 57 |
+
c700="#CC3700",
|
| 58 |
+
c800="#B33000",
|
| 59 |
+
c900="#992900",
|
| 60 |
+
c950="#802200",
|
| 61 |
+
)
|
| 62 |
+
|
| 63 |
+
class OrangeRedTheme(Soft):
|
| 64 |
+
def __init__(
|
| 65 |
+
self,
|
| 66 |
+
*,
|
| 67 |
+
primary_hue: colors.Color | str = colors.gray,
|
| 68 |
+
secondary_hue: colors.Color | str = colors.orange_red,
|
| 69 |
+
neutral_hue: colors.Color | str = colors.slate,
|
| 70 |
+
text_size: sizes.Size | str = sizes.text_lg,
|
| 71 |
+
font: fonts.Font | str | Iterable[fonts.Font | str] = (
|
| 72 |
+
fonts.GoogleFont("Outfit"), "Arial", "sans-serif",
|
| 73 |
+
),
|
| 74 |
+
font_mono: fonts.Font | str | Iterable[fonts.Font | str] = (
|
| 75 |
+
fonts.GoogleFont("IBM Plex Mono"), "ui-monospace", "monospace",
|
| 76 |
+
),
|
| 77 |
+
):
|
| 78 |
+
super().__init__(
|
| 79 |
+
primary_hue=primary_hue,
|
| 80 |
+
secondary_hue=secondary_hue,
|
| 81 |
+
neutral_hue=neutral_hue,
|
| 82 |
+
text_size=text_size,
|
| 83 |
+
font=font,
|
| 84 |
+
font_mono=font_mono,
|
| 85 |
+
)
|
| 86 |
+
super().set(
|
| 87 |
+
background_fill_primary="*primary_50",
|
| 88 |
+
background_fill_primary_dark="*primary_900",
|
| 89 |
+
body_background_fill="linear-gradient(135deg, *primary_200, *primary_100)",
|
| 90 |
+
body_background_fill_dark="linear-gradient(135deg, *primary_900, *primary_800)",
|
| 91 |
+
button_primary_text_color="white",
|
| 92 |
+
button_primary_text_color_hover="white",
|
| 93 |
+
button_primary_background_fill="linear-gradient(90deg, *secondary_500, *secondary_600)",
|
| 94 |
+
button_primary_background_fill_hover="linear-gradient(90deg, *secondary_600, *secondary_700)",
|
| 95 |
+
button_primary_background_fill_dark="linear-gradient(90deg, *secondary_600, *secondary_700)",
|
| 96 |
+
button_primary_background_fill_hover_dark="linear-gradient(90deg, *secondary_500, *secondary_600)",
|
| 97 |
+
button_secondary_text_color="black",
|
| 98 |
+
button_secondary_text_color_hover="white",
|
| 99 |
+
button_secondary_background_fill="linear-gradient(90deg, *primary_300, *primary_300)",
|
| 100 |
+
button_secondary_background_fill_hover="linear-gradient(90deg, *primary_400, *primary_400)",
|
| 101 |
+
button_secondary_background_fill_dark="linear-gradient(90deg, *primary_500, *primary_600)",
|
| 102 |
+
button_secondary_background_fill_hover_dark="linear-gradient(90deg, *primary_500, *primary_500)",
|
| 103 |
+
slider_color="*secondary_500",
|
| 104 |
+
slider_color_dark="*secondary_600",
|
| 105 |
+
block_title_text_weight="600",
|
| 106 |
+
block_border_width="3px",
|
| 107 |
+
block_shadow="*shadow_drop_lg",
|
| 108 |
+
button_primary_shadow="*shadow_drop_lg",
|
| 109 |
+
button_large_padding="11px",
|
| 110 |
+
color_accent_soft="*primary_100",
|
| 111 |
+
block_label_background_fill="*primary_200",
|
| 112 |
+
)
|
| 113 |
+
|
| 114 |
+
orange_red_theme = OrangeRedTheme()
|
| 115 |
+
|
| 116 |
+
# -----------------------------------------------------------------------------
|
| 117 |
+
# 2. Configuration & Adapters
|
| 118 |
+
# -----------------------------------------------------------------------------
|
| 119 |
+
|
| 120 |
+
MAX_SEED = np.iinfo(np.int32).max
|
| 121 |
+
|
| 122 |
+
# HuggingFace Hub defaults
|
| 123 |
+
DEFAULT_REPO_ID = "Lightricks/LTX-2"
|
| 124 |
+
DEFAULT_GEMMA_REPO_ID = "unsloth/gemma-3-12b-it-qat-bnb-4bit"
|
| 125 |
+
DEFAULT_CHECKPOINT_FILENAME = "ltx-2-19b-dev.safetensors"
|
| 126 |
+
DEFAULT_DISTILLED_LORA_FILENAME = "ltx-2-19b-distilled-lora-384.safetensors"
|
| 127 |
+
DEFAULT_SPATIAL_UPSAMPLER_FILENAME = "ltx-2-spatial-upscaler-x2-1.0.safetensors"
|
| 128 |
+
|
| 129 |
+
# New Adapter Definitions
|
| 130 |
+
ADAPTER_SPECS = {
|
| 131 |
+
"None": None,
|
| 132 |
+
"Camera-Control-Dolly-Left": {
|
| 133 |
+
"repo": "Lightricks/LTX-2-19b-LoRA-Camera-Control-Dolly-Left",
|
| 134 |
+
"weights": "ltx-2-19b-lora-camera-control-dolly-left.safetensors",
|
| 135 |
+
"adapter_name": "camera-control-dolly-left"
|
| 136 |
+
},
|
| 137 |
+
"Camera-Control-Dolly-Right": {
|
| 138 |
+
"repo": "Lightricks/LTX-2-19b-LoRA-Camera-Control-Dolly-Right",
|
| 139 |
+
"weights": "ltx-2-19b-lora-camera-control-dolly-right.safetensors",
|
| 140 |
+
"adapter_name": "camera-control-dolly-right"
|
| 141 |
+
},
|
| 142 |
+
"Camera-Control-Dolly-In": {
|
| 143 |
+
"repo": "Lightricks/LTX-2-19b-LoRA-Camera-Control-Dolly-In",
|
| 144 |
+
"weights": "ltx-2-19b-lora-camera-control-dolly-in.safetensors",
|
| 145 |
+
"adapter_name": "camera-control-dolly-in"
|
| 146 |
+
},
|
| 147 |
+
"Camera-Control-Dolly-Out": {
|
| 148 |
+
"repo": "Lightricks/LTX-2-19b-LoRA-Camera-Control-Dolly-Out",
|
| 149 |
+
"weights": "ltx-2-19b-lora-camera-control-dolly-out.safetensors",
|
| 150 |
+
"adapter_name": "camera-control-dolly-out"
|
| 151 |
+
}
|
| 152 |
+
}
|
| 153 |
+
|
| 154 |
+
# -----------------------------------------------------------------------------
|
| 155 |
+
# 3. Model Loading Helper Functions
|
| 156 |
+
# -----------------------------------------------------------------------------
|
| 157 |
+
|
| 158 |
+
def get_hub_or_local_checkpoint(repo_id: Optional[str] = None, filename: Optional[str] = None):
|
| 159 |
+
"""Download from HuggingFace Hub or use local checkpoint."""
|
| 160 |
+
if repo_id is None and filename is None:
|
| 161 |
+
raise ValueError("Please supply at least one of `repo_id` or `filename`")
|
| 162 |
+
|
| 163 |
+
if repo_id is not None:
|
| 164 |
+
if filename is None:
|
| 165 |
+
raise ValueError("If repo_id is specified, filename must also be specified.")
|
| 166 |
+
print(f"Downloading {filename} from {repo_id}...")
|
| 167 |
+
ckpt_path = hf_hub_download(repo_id=repo_id, filename=filename)
|
| 168 |
+
print(f"Downloaded to {ckpt_path}")
|
| 169 |
+
else:
|
| 170 |
+
ckpt_path = filename
|
| 171 |
+
|
| 172 |
+
return ckpt_path
|
| 173 |
+
|
| 174 |
+
def download_gemma_model(repo_id: str):
|
| 175 |
+
"""Download the full Gemma model directory."""
|
| 176 |
+
print(f"Downloading Gemma model from {repo_id}...")
|
| 177 |
+
local_dir = snapshot_download(repo_id=repo_id)
|
| 178 |
+
print(f"Gemma model downloaded to {local_dir}")
|
| 179 |
+
return local_dir
|
| 180 |
+
|
| 181 |
+
# -----------------------------------------------------------------------------
|
| 182 |
+
# 4. Global Initialization (Text Encoder & Paths)
|
| 183 |
+
# -----------------------------------------------------------------------------
|
| 184 |
+
|
| 185 |
+
print("=" * 80)
|
| 186 |
+
print("Initializing LTX-2 Environment...")
|
| 187 |
+
print("=" * 80)
|
| 188 |
+
|
| 189 |
+
device = "cuda"
|
| 190 |
+
|
| 191 |
+
# Load Text Encoder Weights
|
| 192 |
+
checkpoint_path = get_hub_or_local_checkpoint(DEFAULT_REPO_ID, DEFAULT_CHECKPOINT_FILENAME)
|
| 193 |
+
gemma_local_path = download_gemma_model(DEFAULT_GEMMA_REPO_ID)
|
| 194 |
+
distilled_lora_path = get_hub_or_local_checkpoint(DEFAULT_REPO_ID, DEFAULT_DISTILLED_LORA_FILENAME)
|
| 195 |
+
spatial_upsampler_path = get_hub_or_local_checkpoint(DEFAULT_REPO_ID, DEFAULT_SPATIAL_UPSAMPLER_FILENAME)
|
| 196 |
+
|
| 197 |
+
print("Loading Gemma Text Encoder...")
|
| 198 |
+
model_ledger = ModelLedger(
|
| 199 |
+
dtype=torch.bfloat16,
|
| 200 |
+
device=device,
|
| 201 |
+
checkpoint_path=checkpoint_path,
|
| 202 |
+
gemma_root_path=DEFAULT_GEMMA_REPO_ID,
|
| 203 |
+
local_files_only=False
|
| 204 |
+
)
|
| 205 |
+
text_encoder = model_ledger.text_encoder()
|
| 206 |
+
print("Text encoder loaded.")
|
| 207 |
+
|
| 208 |
+
# -----------------------------------------------------------------------------
|
| 209 |
+
# 5. Inference Logic
|
| 210 |
+
# -----------------------------------------------------------------------------
|
| 211 |
+
|
| 212 |
+
def encode_text_simple(text_encoder, prompt: str):
|
| 213 |
+
"""Simple text encoding without using pipeline_utils."""
|
| 214 |
+
v_context, a_context, _ = text_encoder(prompt)
|
| 215 |
+
return v_context, a_context
|
| 216 |
+
|
| 217 |
+
@spaces.GPU()
|
| 218 |
+
def encode_prompt(
|
| 219 |
+
prompt: str,
|
| 220 |
+
enhance_prompt: bool = True,
|
| 221 |
+
input_image=None,
|
| 222 |
+
seed: int = 42,
|
| 223 |
+
negative_prompt: str = ""
|
| 224 |
+
):
|
| 225 |
+
start_time = time.time()
|
| 226 |
+
try:
|
| 227 |
+
final_prompt = prompt
|
| 228 |
+
if enhance_prompt:
|
| 229 |
+
final_prompt = generate_enhanced_prompt(
|
| 230 |
+
text_encoder=text_encoder,
|
| 231 |
+
prompt=prompt,
|
| 232 |
+
image_path=input_image if input_image is not None else None,
|
| 233 |
+
seed=seed,
|
| 234 |
+
)
|
| 235 |
+
|
| 236 |
+
with torch.inference_mode():
|
| 237 |
+
video_context, audio_context = encode_text_simple(text_encoder, final_prompt)
|
| 238 |
+
|
| 239 |
+
video_context_negative = None
|
| 240 |
+
audio_context_negative = None
|
| 241 |
+
if negative_prompt:
|
| 242 |
+
video_context_negative, audio_context_negative = encode_text_simple(text_encoder, negative_prompt)
|
| 243 |
+
|
| 244 |
+
embedding_data = {
|
| 245 |
+
"video_context": video_context.detach().cpu(),
|
| 246 |
+
"audio_context": audio_context.detach().cpu(),
|
| 247 |
+
"prompt": final_prompt,
|
| 248 |
+
"original_prompt": prompt,
|
| 249 |
+
}
|
| 250 |
+
if video_context_negative is not None:
|
| 251 |
+
embedding_data["video_context_negative"] = video_context_negative
|
| 252 |
+
embedding_data["audio_context_negative"] = audio_context_negative
|
| 253 |
+
embedding_data["negative_prompt"] = negative_prompt
|
| 254 |
+
|
| 255 |
+
elapsed_time = time.time() - start_time
|
| 256 |
+
status = f"✓ Encoded in {elapsed_time:.2f}s"
|
| 257 |
+
return embedding_data, final_prompt, status
|
| 258 |
+
|
| 259 |
+
except Exception as e:
|
| 260 |
+
import traceback
|
| 261 |
+
error_msg = f"Error: {str(e)}\n{traceback.format_exc()}"
|
| 262 |
+
print(error_msg)
|
| 263 |
+
return None, prompt, error_msg
|
| 264 |
+
|
| 265 |
+
def get_duration(input_image, prompt, lora_adapter, duration, enhance_prompt, seed, randomize_seed, height, width, progress):
|
| 266 |
+
if duration <= 5:
|
| 267 |
+
return 80
|
| 268 |
+
else:
|
| 269 |
+
return 120
|
| 270 |
+
|
| 271 |
+
@spaces.GPU(duration=get_duration)
|
| 272 |
+
def generate_video(
|
| 273 |
+
input_image,
|
| 274 |
+
prompt: str,
|
| 275 |
+
lora_adapter: str,
|
| 276 |
+
duration: float,
|
| 277 |
+
enhance_prompt: bool = True,
|
| 278 |
+
seed: int = 42,
|
| 279 |
+
randomize_seed: bool = True,
|
| 280 |
+
height: int = DEFAULT_1_STAGE_HEIGHT,
|
| 281 |
+
width: int = DEFAULT_1_STAGE_WIDTH,
|
| 282 |
+
progress=gr.Progress(track_tqdm=True),
|
| 283 |
+
):
|
| 284 |
+
gc.collect()
|
| 285 |
+
torch.cuda.empty_cache()
|
| 286 |
+
|
| 287 |
+
try:
|
| 288 |
+
current_seed = random.randint(0, MAX_SEED) if randomize_seed else int(seed)
|
| 289 |
+
frame_rate = 24.0
|
| 290 |
+
num_frames = int(duration * frame_rate) + 1
|
| 291 |
+
|
| 292 |
+
with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as tmpfile:
|
| 293 |
+
output_path = tmpfile.name
|
| 294 |
+
|
| 295 |
+
# Prepare Inputs
|
| 296 |
+
images = []
|
| 297 |
+
if input_image is not None:
|
| 298 |
+
images = [(input_image, 0, 1.0)]
|
| 299 |
+
|
| 300 |
+
# Encode Prompt
|
| 301 |
+
embeddings, final_prompt, status = encode_prompt(
|
| 302 |
+
prompt=prompt,
|
| 303 |
+
enhance_prompt=enhance_prompt,
|
| 304 |
+
input_image=input_image,
|
| 305 |
+
seed=current_seed,
|
| 306 |
+
negative_prompt="",
|
| 307 |
+
)
|
| 308 |
+
|
| 309 |
+
if embeddings is None:
|
| 310 |
+
raise Exception("Failed to encode prompt")
|
| 311 |
+
|
| 312 |
+
video_context = embeddings["video_context"].to("cuda", non_blocking=True)
|
| 313 |
+
audio_context = embeddings["audio_context"].to("cuda", non_blocking=True)
|
| 314 |
+
|
| 315 |
+
# ---------------------------
|
| 316 |
+
# Configure LoRAs
|
| 317 |
+
# ---------------------------
|
| 318 |
+
# Always start with the base Distilled LoRA
|
| 319 |
+
active_loras = [
|
| 320 |
+
LoraPathStrengthAndSDOps(
|
| 321 |
+
path=distilled_lora_path,
|
| 322 |
+
strength=DEFAULT_LORA_STRENGTH,
|
| 323 |
+
sd_ops=LTXV_LORA_COMFY_RENAMING_MAP,
|
| 324 |
+
)
|
| 325 |
+
]
|
| 326 |
+
|
| 327 |
+
# Add additional selected Adapter
|
| 328 |
+
if lora_adapter and lora_adapter != "None":
|
| 329 |
+
spec = ADAPTER_SPECS.get(lora_adapter)
|
| 330 |
+
if spec:
|
| 331 |
+
print(f"Loading Adapter: {lora_adapter}")
|
| 332 |
+
# Download on demand
|
| 333 |
+
adapter_path = get_hub_or_local_checkpoint(repo_id=spec["repo"], filename=spec["weights"])
|
| 334 |
+
|
| 335 |
+
# Append to list
|
| 336 |
+
active_loras.append(
|
| 337 |
+
LoraPathStrengthAndSDOps(
|
| 338 |
+
path=adapter_path,
|
| 339 |
+
strength=0.8, # Default strength for style/camera LoRAs
|
| 340 |
+
sd_ops=LTXV_LORA_COMFY_RENAMING_MAP,
|
| 341 |
+
)
|
| 342 |
+
)
|
| 343 |
+
|
| 344 |
+
# ---------------------------
|
| 345 |
+
# Instantiate Pipeline
|
| 346 |
+
# ---------------------------
|
| 347 |
+
# We instantiate the pipeline inside the GPU function to ensure LoRAs are applied correctly
|
| 348 |
+
# for this specific run without global state pollution.
|
| 349 |
+
# Since 'model_ledger' and checkpoints are already downloaded/cached, this is fast.
|
| 350 |
+
pipeline = DistilledPipeline(
|
| 351 |
+
device=torch.device("cuda"),
|
| 352 |
+
checkpoint_path=checkpoint_path,
|
| 353 |
+
spatial_upsampler_path=spatial_upsampler_path,
|
| 354 |
+
gemma_root=None, # Already handled externally
|
| 355 |
+
loras=active_loras,
|
| 356 |
+
fp8transformer=False,
|
| 357 |
+
local_files_only=False,
|
| 358 |
+
)
|
| 359 |
+
|
| 360 |
+
# Explicitly link the pre-loaded encoder/transformer to avoid VRAM bloat
|
| 361 |
+
pipeline._video_encoder = pipeline.model_ledger.video_encoder()
|
| 362 |
+
pipeline._transformer = pipeline.model_ledger.transformer()
|
| 363 |
+
|
| 364 |
+
# Run Generation
|
| 365 |
+
pipeline(
|
| 366 |
+
prompt=prompt,
|
| 367 |
+
output_path=str(output_path),
|
| 368 |
+
seed=current_seed,
|
| 369 |
+
height=height,
|
| 370 |
+
width=width,
|
| 371 |
+
num_frames=num_frames,
|
| 372 |
+
frame_rate=frame_rate,
|
| 373 |
+
images=images,
|
| 374 |
+
tiling_config=TilingConfig.default(),
|
| 375 |
+
video_context=video_context,
|
| 376 |
+
audio_context=audio_context,
|
| 377 |
+
)
|
| 378 |
+
|
| 379 |
+
del video_context, audio_context, pipeline
|
| 380 |
+
gc.collect()
|
| 381 |
+
torch.cuda.empty_cache()
|
| 382 |
+
|
| 383 |
+
return str(output_path), current_seed
|
| 384 |
+
|
| 385 |
+
except Exception as e:
|
| 386 |
+
import traceback
|
| 387 |
+
error_msg = f"Error: {str(e)}\n{traceback.format_exc()}"
|
| 388 |
+
print(error_msg)
|
| 389 |
+
return None, current_seed
|
| 390 |
+
|
| 391 |
+
def generate_video_example(input_image, prompt, lora_adapter, duration):
|
| 392 |
+
output, seed = generate_video(
|
| 393 |
+
input_image=input_image,
|
| 394 |
+
prompt=prompt,
|
| 395 |
+
lora_adapter=lora_adapter,
|
| 396 |
+
duration=5.0,
|
| 397 |
+
enhance_prompt=True,
|
| 398 |
+
seed=42,
|
| 399 |
+
randomize_seed=True,
|
| 400 |
+
height=DEFAULT_1_STAGE_HEIGHT,
|
| 401 |
+
width=DEFAULT_1_STAGE_WIDTH
|
| 402 |
+
)
|
| 403 |
+
return output
|
| 404 |
+
|
| 405 |
+
# -----------------------------------------------------------------------------
|
| 406 |
+
# 6. UI Components
|
| 407 |
+
# -----------------------------------------------------------------------------
|
| 408 |
+
|
| 409 |
+
def apply_resolution(resolution: str):
|
| 410 |
+
w, h = resolution.split("x")
|
| 411 |
+
return int(w), int(h)
|
| 412 |
+
|
| 413 |
+
def apply_duration(duration: str):
|
| 414 |
+
duration_s = int(duration[:-1])
|
| 415 |
+
return duration_s
|
| 416 |
+
|
| 417 |
+
class RadioAnimated(gr.HTML):
|
| 418 |
+
def __init__(self, choices, value=None, **kwargs):
|
| 419 |
+
if not choices or len(choices) < 2:
|
| 420 |
+
raise ValueError("RadioAnimated requires at least 2 choices.")
|
| 421 |
+
if value is None:
|
| 422 |
+
value = choices[0]
|
| 423 |
+
|
| 424 |
+
uid = uuid.uuid4().hex[:8]
|
| 425 |
+
group_name = f"ra-{uid}"
|
| 426 |
+
|
| 427 |
+
inputs_html = "\n".join(
|
| 428 |
+
f"""
|
| 429 |
+
<input class="ra-input" type="radio" name="{group_name}" id="{group_name}-{i}" value="{c}">
|
| 430 |
+
<label class="ra-label" for="{group_name}-{i}">{c}</label>
|
| 431 |
+
"""
|
| 432 |
+
for i, c in enumerate(choices)
|
| 433 |
+
)
|
| 434 |
+
|
| 435 |
+
html_template = f"""
|
| 436 |
+
<div class="ra-wrap" data-ra="{uid}">
|
| 437 |
+
<div class="ra-inner">
|
| 438 |
+
<div class="ra-highlight"></div>
|
| 439 |
+
{inputs_html}
|
| 440 |
+
</div>
|
| 441 |
+
</div>
|
| 442 |
+
"""
|
| 443 |
+
|
| 444 |
+
js_on_load = r"""
|
| 445 |
+
(() => {
|
| 446 |
+
const wrap = element.querySelector('.ra-wrap');
|
| 447 |
+
const inner = element.querySelector('.ra-inner');
|
| 448 |
+
const highlight = element.querySelector('.ra-highlight');
|
| 449 |
+
const inputs = Array.from(element.querySelectorAll('.ra-input'));
|
| 450 |
+
|
| 451 |
+
if (!inputs.length) return;
|
| 452 |
+
|
| 453 |
+
const choices = inputs.map(i => i.value);
|
| 454 |
+
|
| 455 |
+
function setHighlightByIndex(idx) {
|
| 456 |
+
const n = choices.length;
|
| 457 |
+
const pct = 100 / n;
|
| 458 |
+
highlight.style.width = `calc(${pct}% - 6px)`;
|
| 459 |
+
highlight.style.transform = `translateX(${idx * 100}%)`;
|
| 460 |
+
}
|
| 461 |
+
|
| 462 |
+
function setCheckedByValue(val, shouldTrigger=false) {
|
| 463 |
+
const idx = Math.max(0, choices.indexOf(val));
|
| 464 |
+
inputs.forEach((inp, i) => { inp.checked = (i === idx); });
|
| 465 |
+
setHighlightByIndex(idx);
|
| 466 |
+
|
| 467 |
+
props.value = choices[idx];
|
| 468 |
+
if (shouldTrigger) trigger('change', props.value);
|
| 469 |
+
}
|
| 470 |
+
|
| 471 |
+
setCheckedByValue(props.value ?? choices[0], false);
|
| 472 |
+
|
| 473 |
+
inputs.forEach((inp) => {
|
| 474 |
+
inp.addEventListener('change', () => {
|
| 475 |
+
setCheckedByValue(inp.value, true);
|
| 476 |
+
});
|
| 477 |
+
});
|
| 478 |
+
})();
|
| 479 |
+
"""
|
| 480 |
+
|
| 481 |
+
super().__init__(
|
| 482 |
+
value=value,
|
| 483 |
+
html_template=html_template,
|
| 484 |
+
js_on_load=js_on_load,
|
| 485 |
+
**kwargs
|
| 486 |
+
)
|
| 487 |
+
|
| 488 |
+
# -----------------------------------------------------------------------------
|
| 489 |
+
# 7. Gradio Application
|
| 490 |
+
# -----------------------------------------------------------------------------
|
| 491 |
+
|
| 492 |
+
css = """
|
| 493 |
+
#col-container {
|
| 494 |
+
margin: 0 auto;
|
| 495 |
+
max-width: 1200px;
|
| 496 |
+
}
|
| 497 |
+
#step-column {
|
| 498 |
+
padding: 20px;
|
| 499 |
+
border-radius: 12px;
|
| 500 |
+
background: var(--background-fill-secondary);
|
| 501 |
+
border: 1px solid var(--border-color-primary);
|
| 502 |
+
margin-bottom: 20px;
|
| 503 |
+
}
|
| 504 |
+
.button-gradient {
|
| 505 |
+
background: linear-gradient(90deg, #FF4500, #E63E00);
|
| 506 |
+
border: none;
|
| 507 |
+
color: white;
|
| 508 |
+
font-weight: bold;
|
| 509 |
+
}
|
| 510 |
+
.ra-wrap{ width: fit-content; }
|
| 511 |
+
.ra-inner{
|
| 512 |
+
position: relative; display: inline-flex; align-items: center; gap: 0; padding: 6px;
|
| 513 |
+
background: var(--neutral-200); border-radius: 9999px; overflow: hidden;
|
| 514 |
+
}
|
| 515 |
+
.ra-input{ display: none; }
|
| 516 |
+
.ra-label{
|
| 517 |
+
position: relative; z-index: 2; padding: 8px 16px;
|
| 518 |
+
font-family: inherit; font-size: 14px; font-weight: 600;
|
| 519 |
+
color: var(--neutral-500); cursor: pointer; transition: color 0.2s; white-space: nowrap;
|
| 520 |
+
}
|
| 521 |
+
.ra-highlight{
|
| 522 |
+
position: absolute; z-index: 1; top: 6px; left: 6px;
|
| 523 |
+
height: calc(100% - 12px); border-radius: 9999px;
|
| 524 |
+
background: white; box-shadow: 0 2px 4px rgba(0,0,0,0.1);
|
| 525 |
+
transition: transform 0.2s, width 0.2s;
|
| 526 |
+
}
|
| 527 |
+
.ra-input:checked + .ra-label{ color: black; }
|
| 528 |
+
|
| 529 |
+
/* Dark mode adjustments for Radio */
|
| 530 |
+
.dark .ra-inner { background: var(--neutral-800); }
|
| 531 |
+
.dark .ra-label { color: var(--neutral-400); }
|
| 532 |
+
.dark .ra-highlight { background: var(--neutral-600); }
|
| 533 |
+
.dark .ra-input:checked + .ra-label { color: white; }
|
| 534 |
+
"""
|
| 535 |
+
|
| 536 |
+
with gr.Blocks() as demo:
|
| 537 |
+
with gr.Column(elem_id="col-container"):
|
| 538 |
+
gr.Markdown("# **LTX-2 Video Distilled + LoRA Adapters**")
|
| 539 |
+
gr.Markdown("Create cinematic video from text or image using LTX-2 Distilled model. Select LoRA adapters for specific camera movements or styles.")
|
| 540 |
+
|
| 541 |
+
with gr.Row():
|
| 542 |
+
# Left Column: Inputs
|
| 543 |
+
with gr.Column(elem_id="step-column"):
|
| 544 |
+
input_image = gr.Image(
|
| 545 |
+
label="Input Image (Optional)",
|
| 546 |
+
type="filepath",
|
| 547 |
+
height=300
|
| 548 |
+
)
|
| 549 |
+
|
| 550 |
+
prompt = gr.Textbox(
|
| 551 |
+
label="Prompt",
|
| 552 |
+
value="Make this image come alive with cinematic motion...",
|
| 553 |
+
lines=3,
|
| 554 |
+
placeholder="Describe the motion and animation you want..."
|
| 555 |
+
)
|
| 556 |
+
|
| 557 |
+
lora_adapter = gr.Dropdown(
|
| 558 |
+
label="Camera Control / Adapter",
|
| 559 |
+
choices=list(ADAPTER_SPECS.keys()),
|
| 560 |
+
value="None",
|
| 561 |
+
info="Select a specific camera movement or style adapter."
|
| 562 |
+
)
|
| 563 |
+
|
| 564 |
+
enhance_prompt = gr.Checkbox(label="Enhance Prompt", value=True, visible=False)
|
| 565 |
+
|
| 566 |
+
with gr.Accordion("Advanced Settings", open=False):
|
| 567 |
+
seed = gr.Slider(label="Seed", minimum=0, maximum=MAX_SEED, value=DEFAULT_SEED, step=1)
|
| 568 |
+
randomize_seed = gr.Checkbox(label="Randomize Seed", value=True)
|
| 569 |
+
|
| 570 |
+
# Right Column: Output & Settings
|
| 571 |
+
with gr.Column(elem_id="step-column"):
|
| 572 |
+
output_video = gr.Video(label="Generated Video", autoplay=True, height=350)
|
| 573 |
+
|
| 574 |
+
with gr.Row():
|
| 575 |
+
with gr.Column():
|
| 576 |
+
gr.Markdown("**Duration**")
|
| 577 |
+
radioanimated_duration = RadioAnimated(
|
| 578 |
+
choices=["3s", "5s", "10s"],
|
| 579 |
+
value="5s",
|
| 580 |
+
elem_id="radioanimated_duration"
|
| 581 |
+
)
|
| 582 |
+
duration = gr.Number(value=5.0, visible=False)
|
| 583 |
+
|
| 584 |
+
with gr.Column():
|
| 585 |
+
gr.Markdown("**Resolution**")
|
| 586 |
+
radioanimated_resolution = RadioAnimated(
|
| 587 |
+
choices=["768x512", "512x512", "512x768"],
|
| 588 |
+
value=f"{DEFAULT_1_STAGE_WIDTH}x{DEFAULT_1_STAGE_HEIGHT}",
|
| 589 |
+
elem_id="radioanimated_resolution"
|
| 590 |
+
)
|
| 591 |
+
width = gr.Number(value=DEFAULT_1_STAGE_WIDTH, visible=False)
|
| 592 |
+
height = gr.Number(value=DEFAULT_1_STAGE_HEIGHT, visible=False)
|
| 593 |
+
|
| 594 |
+
generate_btn = gr.Button("Generate Video", variant="primary", elem_classes="button-gradient")
|
| 595 |
+
|
| 596 |
+
# Wire up events
|
| 597 |
+
radioanimated_duration.change(fn=apply_duration, inputs=radioanimated_duration, outputs=[duration], api_visibility="private")
|
| 598 |
+
radioanimated_resolution.change(fn=apply_resolution, inputs=radioanimated_resolution, outputs=[width, height], api_visibility="private")
|
| 599 |
+
|
| 600 |
+
generate_btn.click(
|
| 601 |
+
fn=generate_video,
|
| 602 |
+
inputs=[input_image, prompt, lora_adapter, duration, enhance_prompt, seed, randomize_seed, height, width],
|
| 603 |
+
outputs=[output_video, seed]
|
| 604 |
+
)
|
| 605 |
+
|
| 606 |
+
gr.Examples(
|
| 607 |
+
examples=[
|
| 608 |
+
["examples/supergirl.png", "A fuzzy puppet superhero...", "Camera-Control-Dolly-Left"],
|
| 609 |
+
["examples/astronaut.png", "An astronaut hatches from a fragile egg...", "Camera-Control-Dolly-In"],
|
| 610 |
+
],
|
| 611 |
+
fn=generate_video_example,
|
| 612 |
+
inputs=[input_image, prompt, lora_adapter],
|
| 613 |
+
outputs=[output_video],
|
| 614 |
+
label="Examples",
|
| 615 |
+
cache_examples=False
|
| 616 |
+
)
|
| 617 |
+
|
| 618 |
+
if __name__ == "__main__":
|
| 619 |
+
demo.launch(css=css, theme=orange_red_theme, ssr_mode=False, mcp_server=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|