Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -423,20 +423,15 @@ print("Pipeline initialized!")
|
|
| 423 |
print("=" * 80)
|
| 424 |
|
| 425 |
# =============================================================================
|
| 426 |
-
# ZeroGPU Tensor Preloading -
|
| 427 |
# =============================================================================
|
| 428 |
-
# Many components are SHARED between stage 1 and stage 2:
|
| 429 |
-
# - video_encoder, text_encoder (Gemma), audio_encoder, vocoder, embeddings_processor
|
| 430 |
-
# These only need to be loaded ONCE.
|
| 431 |
-
# Only the TRANSFORMER is different per stage (different LoRA weights).
|
| 432 |
|
| 433 |
print("Preloading models for ZeroGPU tensor packing...")
|
| 434 |
|
| 435 |
-
# =====
|
| 436 |
print(" Loading shared components...")
|
| 437 |
shared_ledger = pipeline.stage_1_model_ledger
|
| 438 |
|
| 439 |
-
# These are shared - load once from stage 1
|
| 440 |
_video_encoder = shared_ledger.video_encoder()
|
| 441 |
shared_ledger.video_encoder = lambda: _video_encoder
|
| 442 |
|
|
@@ -455,6 +450,9 @@ shared_ledger.vocoder = lambda: _vocoder
|
|
| 455 |
_spatial_upsampler = shared_ledger.spatial_upsampler()
|
| 456 |
shared_ledger.spatial_upsampler = lambda: _spatial_upsampler
|
| 457 |
|
|
|
|
|
|
|
|
|
|
| 458 |
# Point stage 2 to same shared components
|
| 459 |
pipeline.stage_2_model_ledger.video_encoder = lambda: _video_encoder
|
| 460 |
pipeline.stage_2_model_ledger.text_encoder = lambda: _text_encoder
|
|
@@ -462,10 +460,11 @@ pipeline.stage_2_model_ledger.gemma_embeddings_processor = lambda: _embeddings_p
|
|
| 462 |
pipeline.stage_2_model_ledger.audio_decoder = lambda: _audio_decoder
|
| 463 |
pipeline.stage_2_model_ledger.vocoder = lambda: _vocoder
|
| 464 |
pipeline.stage_2_model_ledger.spatial_upsampler = lambda: _spatial_upsampler
|
|
|
|
| 465 |
|
| 466 |
-
print("
|
| 467 |
|
| 468 |
-
# =====
|
| 469 |
print(" Loading stage 1 transformer...")
|
| 470 |
_transformer1 = pipeline.stage_1_model_ledger.transformer()
|
| 471 |
pipeline.stage_1_model_ledger.transformer = lambda: _transformer1
|
|
@@ -474,12 +473,6 @@ print(" Loading stage 2 transformer...")
|
|
| 474 |
_transformer2 = pipeline.stage_2_model_ledger.transformer()
|
| 475 |
pipeline.stage_2_model_ledger.transformer = lambda: _transformer2
|
| 476 |
|
| 477 |
-
# ===== Preload video_decoder (may be shared, but preload for safety) =====
|
| 478 |
-
print(" Loading video decoder...")
|
| 479 |
-
_video_decoder1 = shared_ledger.video_decoder()
|
| 480 |
-
shared_ledger.video_decoder = lambda: _video_decoder1
|
| 481 |
-
pipeline.stage_2_model_ledger.video_decoder = lambda: _video_decoder1
|
| 482 |
-
|
| 483 |
print("All models preloaded for ZeroGPU tensor packing!")
|
| 484 |
print("=" * 80)
|
| 485 |
print("Pipeline ready!")
|
|
@@ -643,11 +636,25 @@ def on_highres_toggle(first_image, last_image, high_res):
|
|
| 643 |
|
| 644 |
|
| 645 |
def get_duration(
|
| 646 |
-
|
| 647 |
-
|
| 648 |
-
|
| 649 |
-
|
| 650 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 651 |
progress=None,
|
| 652 |
) -> int:
|
| 653 |
return int(gpu_duration)
|
|
@@ -656,10 +663,10 @@ def get_duration(
|
|
| 656 |
@spaces.GPU(duration=get_duration)
|
| 657 |
@torch.inference_mode()
|
| 658 |
def generate_video(
|
| 659 |
-
prompt: str,
|
| 660 |
-
negative_prompt: str,
|
| 661 |
first_image,
|
| 662 |
last_image,
|
|
|
|
|
|
|
| 663 |
duration: float,
|
| 664 |
seed: int,
|
| 665 |
randomize_seed: bool,
|
|
@@ -754,21 +761,6 @@ def generate_video(
|
|
| 754 |
|
| 755 |
output_path = tempfile.mktemp(suffix=".mp4")
|
| 756 |
encode_video(
|
| 757 |
-
video=video,
|
| 758 |
-
fps=DEFAULT_FRAME_RATE,
|
| 759 |
-
audio=audio,
|
| 760 |
-
output_path=output_path,
|
| 761 |
-
video_chunks_number=video_chunks_number,
|
| 762 |
-
)
|
| 763 |
-
|
| 764 |
-
log_memory("after encode_video")
|
| 765 |
-
return str(output_path), current_seed
|
| 766 |
-
|
| 767 |
-
except Exception as e:
|
| 768 |
-
import traceback
|
| 769 |
-
log_memory("on error")
|
| 770 |
-
print(f"Error: {str(e)}\n{traceback.format_exc()}")
|
| 771 |
-
return None, current_seed
|
| 772 |
|
| 773 |
|
| 774 |
# =============================================================================
|
|
|
|
| 423 |
print("=" * 80)
|
| 424 |
|
| 425 |
# =============================================================================
|
| 426 |
+
# ZeroGPU Tensor Preloading - Shared Components + Unique Transformers
|
| 427 |
# =============================================================================
|
|
|
|
|
|
|
|
|
|
|
|
|
| 428 |
|
| 429 |
print("Preloading models for ZeroGPU tensor packing...")
|
| 430 |
|
| 431 |
+
# ===== Load shared components from stage 1 =====
|
| 432 |
print(" Loading shared components...")
|
| 433 |
shared_ledger = pipeline.stage_1_model_ledger
|
| 434 |
|
|
|
|
| 435 |
_video_encoder = shared_ledger.video_encoder()
|
| 436 |
shared_ledger.video_encoder = lambda: _video_encoder
|
| 437 |
|
|
|
|
| 450 |
_spatial_upsampler = shared_ledger.spatial_upsampler()
|
| 451 |
shared_ledger.spatial_upsampler = lambda: _spatial_upsampler
|
| 452 |
|
| 453 |
+
_video_decoder = shared_ledger.video_decoder()
|
| 454 |
+
shared_ledger.video_decoder = lambda: _video_decoder
|
| 455 |
+
|
| 456 |
# Point stage 2 to same shared components
|
| 457 |
pipeline.stage_2_model_ledger.video_encoder = lambda: _video_encoder
|
| 458 |
pipeline.stage_2_model_ledger.text_encoder = lambda: _text_encoder
|
|
|
|
| 460 |
pipeline.stage_2_model_ledger.audio_decoder = lambda: _audio_decoder
|
| 461 |
pipeline.stage_2_model_ledger.vocoder = lambda: _vocoder
|
| 462 |
pipeline.stage_2_model_ledger.spatial_upsampler = lambda: _spatial_upsampler
|
| 463 |
+
pipeline.stage_2_model_ledger.video_decoder = lambda: _video_decoder
|
| 464 |
|
| 465 |
+
print("Shared components preloaded")
|
| 466 |
|
| 467 |
+
# ===== Load ONLY stage-specific transformers =====
|
| 468 |
print(" Loading stage 1 transformer...")
|
| 469 |
_transformer1 = pipeline.stage_1_model_ledger.transformer()
|
| 470 |
pipeline.stage_1_model_ledger.transformer = lambda: _transformer1
|
|
|
|
| 473 |
_transformer2 = pipeline.stage_2_model_ledger.transformer()
|
| 474 |
pipeline.stage_2_model_ledger.transformer = lambda: _transformer2
|
| 475 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 476 |
print("All models preloaded for ZeroGPU tensor packing!")
|
| 477 |
print("=" * 80)
|
| 478 |
print("Pipeline ready!")
|
|
|
|
| 636 |
|
| 637 |
|
| 638 |
def get_duration(
|
| 639 |
+
first_image,
|
| 640 |
+
last_image,
|
| 641 |
+
prompt: str,
|
| 642 |
+
negative_prompt: str,
|
| 643 |
+
duration: float,
|
| 644 |
+
seed: int,
|
| 645 |
+
randomize_seed: bool,
|
| 646 |
+
height: int,
|
| 647 |
+
width: int,
|
| 648 |
+
enhance_prompt: bool,
|
| 649 |
+
video_cfg_scale: float,
|
| 650 |
+
video_stg_scale: float,
|
| 651 |
+
video_rescale_scale: float,
|
| 652 |
+
video_a2v_scale: float,
|
| 653 |
+
audio_cfg_scale: float,
|
| 654 |
+
audio_stg_scale: float,
|
| 655 |
+
audio_rescale_scale: float,
|
| 656 |
+
audio_v2a_scale: float,
|
| 657 |
+
gpu_duration: float,
|
| 658 |
progress=None,
|
| 659 |
) -> int:
|
| 660 |
return int(gpu_duration)
|
|
|
|
| 663 |
@spaces.GPU(duration=get_duration)
|
| 664 |
@torch.inference_mode()
|
| 665 |
def generate_video(
|
|
|
|
|
|
|
| 666 |
first_image,
|
| 667 |
last_image,
|
| 668 |
+
prompt: str,
|
| 669 |
+
negative_prompt: str,
|
| 670 |
duration: float,
|
| 671 |
seed: int,
|
| 672 |
randomize_seed: bool,
|
|
|
|
| 761 |
|
| 762 |
output_path = tempfile.mktemp(suffix=".mp4")
|
| 763 |
encode_video(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 764 |
|
| 765 |
|
| 766 |
# =============================================================================
|