TestingwithNeg

Running on Zero

App Files Files Community

dagloop5 commited on Apr 18

Commit

864541a

verified ·

1 Parent(s): e2e429c

Update app.py

Browse files

Files changed (1) hide show

app.py +94 -60

app.py CHANGED Viewed

@@ -181,66 +181,100 @@ print("=" * 80)
 print("Preloading all models for ZeroGPU tensor packing...")
 print("This may take a few minutes...")
-# Inspect available attributes on key components
-print("  Inspecting DiffusionStage attributes...")
-stage_attrs = [a for a in dir(pipeline.stage_1) if not a.startswith('__')]
-print(f"    DiffusionStage attributes: {stage_attrs}")
-# For DiffusionStage, the transformer is accessed via _transformer_ctx
-# but we need to trigger actual loading by accessing the context
-if hasattr(pipeline.stage_1, '_transformer_ctx'):
-    print("  Loading stage 1 transformer via _transformer_ctx...")
-    ctx = pipeline.stage_1._transformer_ctx
-    if hasattr(ctx, '__enter__'):
-        ctx.__enter__()  # Force context entry to load transformer
-if hasattr(pipeline.stage_2, '_transformer_ctx'):
-    print("  Loading stage 2 transformer via _transformer_ctx...")
-    ctx = pipeline.stage_2._transformer_ctx
-    if hasattr(ctx, '__enter__'):
-        ctx.__enter__()
-# Inspect PromptEncoder attributes
-print("  Inspecting PromptEncoder attributes...")
-pe_attrs = [a for a in dir(pipeline.prompt_encoder) if not a.startswith('__')]
-print(f"    PromptEncoder attributes: {pe_attrs}")
-# Try common names for video encoder in PromptEncoder
-for attr_name in ['video_encoder', '_video_encoder', 'enc', 'encoder', '_enc']:
-    if hasattr(pipeline.prompt_encoder, attr_name):
-        print(f"  Loading video encoder via .{attr_name}...")
-        _ = getattr(pipeline.prompt_encoder, attr_name)
-        break
-# Inspect and load VideoDecoder
-print("  Inspecting VideoDecoder attributes...")
-vd_attrs = [a for a in dir(pipeline.video_decoder) if not a.startswith('__')]
-print(f"    VideoDecoder attributes: {vd_attrs}")
-for attr_name in ['model', 'decoder', '_model']:
-    if hasattr(pipeline.video_decoder, attr_name):
-        print(f"  Loading video decoder via .{attr_name}...")
-        _ = getattr(pipeline.video_decoder, attr_name)
-        break
-# Inspect and load AudioDecoder
-print("  Inspecting AudioDecoder attributes...")
-ad_attrs = [a for a in dir(pipeline.audio_decoder) if not a.startswith('__')]
-print(f"    AudioDecoder attributes: {ad_attrs}")
-for attr_name in ['model', 'decoder', '_model']:
-    if hasattr(pipeline.audio_decoder, attr_name):
-        print(f"  Loading audio decoder via .{attr_name}...")
-        _ = getattr(pipeline.audio_decoder, attr_name)
-        break
-# Inspect and load VideoUpsampler
-print("  Inspecting VideoUpsampler attributes...")
-up_attrs = [a for a in dir(pipeline.upsampler) if not a.startswith('__')]
-print(f"    VideoUpsampler attributes: {up_attrs}")
-for attr_name in ['model', 'upsampler', '_model']:
-    if hasattr(pipeline.upsampler, attr_name):
-        print(f"  Loading spatial upsampler via .{attr_name}...")
-        _ = getattr(pipeline.upsampler, attr_name)
-        break
 print("All models preloaded for ZeroGPU tensor packing!")
 print("=" * 80)

 print("Preloading all models for ZeroGPU tensor packing...")
 print("This may take a few minutes...")
+# The TI2VidTwoStagesHQPipeline uses context managers for lazy loading.
+# We need to enter the contexts, capture the loaded models, AND preserve them
+# by replacing the pipeline's internal references with lambdas that hold them.
+# This prevents garbage collection and allows ZeroGPU to pack them.
+# 1. Load transformer via _transformer_ctx (enter context to load, store result)
+print("  Loading stage 1 transformer...")
+pipeline.stage_1._transformer_ctx.__enter__()
+# Capture the actual model from the context
+_stage_1_transformer = pipeline.stage_1._transformer_ctx.__dict__.get('transformer') or \
+                       getattr(pipeline.stage_1, '_transformer', None)
+# Replace _transformer_ctx with lambda that returns the captured model
+pipeline.stage_1._transformer_ctx = type('ctx', (), {
+    '__enter__': lambda s: _stage_1_transformer,
+    '__exit__': lambda s, *a: None,
+    '__call__': lambda s, *a, **kw: _stage_1_transformer(*a, **kw)
+})()
+print(f"    Captured stage 1 transformer: {type(_stage_1_transformer)}")
+print("  Loading stage 2 transformer...")
+pipeline.stage_2._transformer_ctx.__enter__()
+_stage_2_transformer = pipeline.stage_2._transformer_ctx.__dict__.get('transformer') or \
+                       getattr(pipeline.stage_2, '_transformer', None)
+pipeline.stage_2._transformer_ctx = type('ctx', (), {
+    '__enter__': lambda s: _stage_2_transformer,
+    '__exit__': lambda s, *a: None,
+    '__call__': lambda s, *a, **kw: _stage_2_transformer(*a, **kw)
+})()
+print(f"    Captured stage 2 transformer: {type(_stage_2_transformer)}")
+# 2. Load text encoder via _text_encoder_ctx
+print("  Loading Gemma text encoder...")
+pipeline.prompt_encoder._text_encoder_ctx.__enter__()
+_text_encoder = pipeline.prompt_encoder._text_encoder_ctx.__dict__.get('text_encoder') or \
+                getattr(pipeline.prompt_encoder, '_text_encoder', None)
+# Store as instance attribute and create replacement context
+pipeline.prompt_encoder._text_encoder = _text_encoder
+pipeline.prompt_encoder._text_encoder_ctx = type('ctx', (), {
+    '__enter__': lambda s: _text_encoder,
+    '__exit__': lambda s, *a: None
+})()
+print(f"    Captured text encoder: {type(_text_encoder)}")
+# 3. Load video encoder (from prompt_encoder's video_encoder method)
+print("  Loading video encoder...")
+_video_encoder = pipeline.prompt_encoder.video_encoder()
+pipeline.prompt_encoder.video_encoder = lambda: _video_encoder
+print(f"    Captured video encoder: {type(_video_encoder)}")
+# 4. Load video decoder via _decoder_builder
+print("  Loading video decoder...")
+_video_decoder = pipeline.video_decoder._decoder_builder()
+pipeline.video_decoder._decoder_builder = lambda: _video_decoder
+# Also try direct model attribute if exists
+if hasattr(pipeline.video_decoder, '_decoder'):
+    pipeline.video_decoder._decoder = _video_decoder
+print(f"    Captured video decoder: {type(_video_decoder)}")
+# 5. Load audio decoder via _decoder_builder
+print("  Loading audio decoder...")
+_audio_decoder = pipeline.audio_decoder._decoder_builder()
+pipeline.audio_decoder._decoder_builder = lambda: _audio_decoder
+if hasattr(pipeline.audio_decoder, '_decoder'):
+    pipeline.audio_decoder._decoder = _audio_decoder
+print(f"    Captured audio decoder: {type(_audio_decoder)}")
+# 6. Load vocoder (audio decoder has _vocoder_builder)
+print("  Loading vocoder...")
+if hasattr(pipeline.audio_decoder, '_vocoder_builder'):
+    _vocoder = pipeline.audio_decoder._vocoder_builder()
+    pipeline.audio_decoder._vocoder_builder = lambda: _vocoder
+    print(f"    Captured vocoder: {type(_vocoder)}")
+# 7. Load spatial upsampler via _upsampler_builder
+print("  Loading spatial upsampler...")
+_spatial_upsampler = pipeline.upsampler._upsampler_builder()
+pipeline.upsampler._upsampler_builder = lambda: _spatial_upsampler
+# Also try _encoder_builder
+if hasattr(pipeline.upsampler, '_encoder'):
+    pipeline.upsampler._encoder = _spatial_upsampler
+print(f"    Captured spatial upsampler: {type(_spatial_upsampler)}")
+# 8. Load image conditioner
+print("  Loading image conditioner...")
+if hasattr(pipeline, 'image_conditioner'):
+    if hasattr(pipeline.image_conditioner, 'video_encoder'):
+        _ic_encoder = pipeline.image_conditioner.video_encoder()
+        pipeline.image_conditioner.video_encoder = lambda: _ic_encoder
+# Create global references to prevent garbage collection
+# These ensure models stay loaded and ZeroGPU can pack them
+print("  Creating global references to prevent garbage collection...")
+global _stage_1_transformer, _stage_2_transformer, _text_encoder, _video_encoder
+global _video_decoder, _audio_decoder, _vocoder, _spatial_upsampler
 print("All models preloaded for ZeroGPU tensor packing!")
 print("=" * 80)