TestingwithNeg

Running on Zero

App Files Files Community

dagloop5 commited on Apr 18

Commit

777a622

verified ·

1 Parent(s): 01fc873

Update app.py

Browse files

Files changed (1) hide show

app.py +29 -35

app.py CHANGED Viewed

@@ -178,44 +178,38 @@ print("=" * 80)
 # =============================================================================
 # ZeroGPU Tensor Preloading
 # =============================================================================
-# ZeroGPU needs all tensors to be loaded BEFORE the Space starts.
-# We trigger model loading here so ZeroGPU can pack them into shared GPU memory.
 print("Preloading all models for ZeroGPU tensor packing...")
 print("This may take a few minutes...")
-# Access pipeline components to force tensor loading
-# TI2VidTwoStagesHQPipeline exposes these as class attributes (not through a ledger)
-_video_encoder = pipeline.prompt_encoder.video_encoder
-_video_decoder = pipeline.video_decoder
-_audio_decoder = pipeline.audio_decoder
-_spatial_upsampler = pipeline.upsampler
-_prompt_encoder = pipeline.prompt_encoder
-# Trigger actual loading by accessing the underlying models
-# These are lazily loaded on first access
-print("  Loading video encoder...")
-_video_encoder.model
-print("  Loading video decoder...")
-_video_decoder.model
-print("  Loading audio decoder...")
-_audio_decoder.model
-print("  Loading spatial upsampler...")
-_spatial_upsampler.model
-print("  Loading prompt encoder (Gemma)...")
-_prompt_encoder.gemma
-# Trigger diffusion stages to load their transformer weights
-# We do this by creating a small dummy inference to force loading
-print("  Loading stage 1 transformer...")
-with torch.no_grad():
-    dummy_latent = torch.randn(1, 16, 9, 64, 64, device='cpu', dtype=torch.bfloat16)
-    dummy_sigmas = torch.tensor([1.0, 0.5, 0.0], device='cpu', dtype=torch.float32)
-    # Access the internal transformer through stage_1._transformer_ctx
-    _ = pipeline.stage_1._transformer_ctx
-    _ = pipeline.stage_2._transformer_ctx
-print("All models preloaded for ZeroGPU tensor packing!")
 print("=" * 80)
 # =============================================================================

 # =============================================================================
 # ZeroGPU Tensor Preloading
 # =============================================================================
 print("Preloading all models for ZeroGPU tensor packing...")
 print("This may take a few minutes...")
+print("  Running dummy inference to load all model weights...")
+# Create a dummy inference call to force ALL models to load at startup
+# This triggers lazy loading for all components without needing internal attribute names
+try:
+    _ = pipeline(
+        prompt="init",
+        negative_prompt="",
+        seed=0,
+        height=256,
+        width=256,
+        num_frames=9,  # 8*1+1 = minimum frames
+        frame_rate=DEFAULT_FRAME_RATE,
+        num_inference_steps=1,  # Single step for dummy init
+        video_guider_params=MultiModalGuiderParams(
+            cfg_scale=1.0, stg_scale=0.0, rescale_scale=0.0,
+            modality_scale=0.0, skip_step=0, stg_blocks=[]
+        ),
+        audio_guider_params=MultiModalGuiderParams(
+            cfg_scale=1.0, stg_scale=0.0, rescale_scale=0.0,
+            modality_scale=0.0, skip_step=0, stg_blocks=[]
+        ),
+        images=[],
+        enhance_prompt=False,
+    )
+    print("All models preloaded for ZeroGPU tensor packing!")
+except Exception as e:
+    print(f"Warning: Dummy inference preload failed (will retry during generation): {e}")
+    print("Continuing startup...")
 print("=" * 80)
 # =============================================================================