Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -178,44 +178,38 @@ print("=" * 80)
|
|
| 178 |
# =============================================================================
|
| 179 |
# ZeroGPU Tensor Preloading
|
| 180 |
# =============================================================================
|
| 181 |
-
# ZeroGPU needs all tensors to be loaded BEFORE the Space starts.
|
| 182 |
-
# We trigger model loading here so ZeroGPU can pack them into shared GPU memory.
|
| 183 |
-
|
| 184 |
print("Preloading all models for ZeroGPU tensor packing...")
|
| 185 |
print("This may take a few minutes...")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 186 |
|
| 187 |
-
# Access pipeline components to force tensor loading
|
| 188 |
-
# TI2VidTwoStagesHQPipeline exposes these as class attributes (not through a ledger)
|
| 189 |
-
_video_encoder = pipeline.prompt_encoder.video_encoder
|
| 190 |
-
_video_decoder = pipeline.video_decoder
|
| 191 |
-
_audio_decoder = pipeline.audio_decoder
|
| 192 |
-
_spatial_upsampler = pipeline.upsampler
|
| 193 |
-
_prompt_encoder = pipeline.prompt_encoder
|
| 194 |
-
|
| 195 |
-
# Trigger actual loading by accessing the underlying models
|
| 196 |
-
# These are lazily loaded on first access
|
| 197 |
-
print(" Loading video encoder...")
|
| 198 |
-
_video_encoder.model
|
| 199 |
-
print(" Loading video decoder...")
|
| 200 |
-
_video_decoder.model
|
| 201 |
-
print(" Loading audio decoder...")
|
| 202 |
-
_audio_decoder.model
|
| 203 |
-
print(" Loading spatial upsampler...")
|
| 204 |
-
_spatial_upsampler.model
|
| 205 |
-
print(" Loading prompt encoder (Gemma)...")
|
| 206 |
-
_prompt_encoder.gemma
|
| 207 |
-
|
| 208 |
-
# Trigger diffusion stages to load their transformer weights
|
| 209 |
-
# We do this by creating a small dummy inference to force loading
|
| 210 |
-
print(" Loading stage 1 transformer...")
|
| 211 |
-
with torch.no_grad():
|
| 212 |
-
dummy_latent = torch.randn(1, 16, 9, 64, 64, device='cpu', dtype=torch.bfloat16)
|
| 213 |
-
dummy_sigmas = torch.tensor([1.0, 0.5, 0.0], device='cpu', dtype=torch.float32)
|
| 214 |
-
# Access the internal transformer through stage_1._transformer_ctx
|
| 215 |
-
_ = pipeline.stage_1._transformer_ctx
|
| 216 |
-
_ = pipeline.stage_2._transformer_ctx
|
| 217 |
-
|
| 218 |
-
print("All models preloaded for ZeroGPU tensor packing!")
|
| 219 |
print("=" * 80)
|
| 220 |
|
| 221 |
# =============================================================================
|
|
|
|
| 178 |
# =============================================================================
|
| 179 |
# ZeroGPU Tensor Preloading
|
| 180 |
# =============================================================================
|
|
|
|
|
|
|
|
|
|
| 181 |
print("Preloading all models for ZeroGPU tensor packing...")
|
| 182 |
print("This may take a few minutes...")
|
| 183 |
+
print(" Running dummy inference to load all model weights...")
|
| 184 |
+
|
| 185 |
+
# Create a dummy inference call to force ALL models to load at startup
|
| 186 |
+
# This triggers lazy loading for all components without needing internal attribute names
|
| 187 |
+
try:
|
| 188 |
+
_ = pipeline(
|
| 189 |
+
prompt="init",
|
| 190 |
+
negative_prompt="",
|
| 191 |
+
seed=0,
|
| 192 |
+
height=256,
|
| 193 |
+
width=256,
|
| 194 |
+
num_frames=9, # 8*1+1 = minimum frames
|
| 195 |
+
frame_rate=DEFAULT_FRAME_RATE,
|
| 196 |
+
num_inference_steps=1, # Single step for dummy init
|
| 197 |
+
video_guider_params=MultiModalGuiderParams(
|
| 198 |
+
cfg_scale=1.0, stg_scale=0.0, rescale_scale=0.0,
|
| 199 |
+
modality_scale=0.0, skip_step=0, stg_blocks=[]
|
| 200 |
+
),
|
| 201 |
+
audio_guider_params=MultiModalGuiderParams(
|
| 202 |
+
cfg_scale=1.0, stg_scale=0.0, rescale_scale=0.0,
|
| 203 |
+
modality_scale=0.0, skip_step=0, stg_blocks=[]
|
| 204 |
+
),
|
| 205 |
+
images=[],
|
| 206 |
+
enhance_prompt=False,
|
| 207 |
+
)
|
| 208 |
+
print("All models preloaded for ZeroGPU tensor packing!")
|
| 209 |
+
except Exception as e:
|
| 210 |
+
print(f"Warning: Dummy inference preload failed (will retry during generation): {e}")
|
| 211 |
+
print("Continuing startup...")
|
| 212 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 213 |
print("=" * 80)
|
| 214 |
|
| 215 |
# =============================================================================
|