self-forcing

Runtime error

App Files Files Community

innoai commited on Jun 19

Commit

c9dd185

verified ·

1 Parent(s): bce8064

Update app.py

Browse files

Files changed (1) hide show

app.py +15 -9

app.py CHANGED Viewed

@@ -172,21 +172,21 @@ ASPECT_RATIOS = {
 def get_vae_cache_for_aspect_ratio(aspect_ratio, device, dtype):
     """
     Create VAE cache with appropriate dimensions for the given aspect ratio.
     """
     ar_config = ASPECT_RATIOS[aspect_ratio]
     latent_h = ar_config["latent_h"]
     latent_w = ar_config["latent_w"]
     # Create new cache tensors with correct dimensions
-    # Based on ZERO_VAE_CACHE structure but adjusted for aspect ratio
     cache = []
-    # The cache dimensions need to match the latent dimensions
-    # These are placeholder tensors that will be updated during generation
-    cache.append(torch.zeros(1, 512, latent_h // 8, latent_w // 8, device=device, dtype=dtype))  # 8x downsampled
-    cache.append(torch.zeros(1, 512, latent_h // 4, latent_w // 4, device=device, dtype=dtype))  # 4x downsampled
-    cache.append(torch.zeros(1, 256, latent_h // 2, latent_w // 2, device=device, dtype=dtype))  # 2x downsampled
-    cache.append(torch.zeros(1, 128, latent_h, latent_w, device=device, dtype=dtype))            # 1x (same as latent)
     return cache
@@ -381,8 +381,14 @@ def video_generation_handler_streaming(prompt, seed=42, fps=15, aspect_ratio="16
     vae_cache, latents_cache = None, None
     if not APP_STATE["current_use_taehv"] and not args.trt:
-        # Create VAE cache with correct dimensions for the aspect ratio
-        vae_cache = get_vae_cache_for_aspect_ratio(aspect_ratio, gpu, torch.float16)
     num_blocks = 7
     current_start_frame = 0

 def get_vae_cache_for_aspect_ratio(aspect_ratio, device, dtype):
     """
     Create VAE cache with appropriate dimensions for the given aspect ratio.
+    VAE cache needs to have 5 dimensions: (batch, channels, time, height, width)
     """
     ar_config = ASPECT_RATIOS[aspect_ratio]
     latent_h = ar_config["latent_h"]
     latent_w = ar_config["latent_w"]
     # Create new cache tensors with correct dimensions
+    # These need to be 5D tensors: (batch, channels, time, height, width)
     cache = []
+    # The time dimension is 1 for cache initialization
+    cache.append(torch.zeros(1, 512, 1, latent_h // 8, latent_w // 8, device=device, dtype=dtype))  # 8x downsampled
+    cache.append(torch.zeros(1, 512, 1, latent_h // 4, latent_w // 4, device=device, dtype=dtype))  # 4x downsampled
+    cache.append(torch.zeros(1, 256, 1, latent_h // 2, latent_w // 2, device=device, dtype=dtype))  # 2x downsampled
+    cache.append(torch.zeros(1, 128, 1, latent_h, latent_w, device=device, dtype=dtype))            # 1x (same as latent)
     return cache
     vae_cache, latents_cache = None, None
     if not APP_STATE["current_use_taehv"] and not args.trt:
+        # For non-TRT and non-TAEHV, we need to handle aspect ratio properly
+        # Use the original ZERO_VAE_CACHE as a template but adjust dimensions
+        if aspect_ratio == "16:9":
+            # Use default cache for 16:9
+            vae_cache = [c.to(device=gpu, dtype=torch.float16) for c in ZERO_VAE_CACHE]
+        else:
+            # Create custom cache for 9:16
+            vae_cache = get_vae_cache_for_aspect_ratio(aspect_ratio, gpu, torch.float16)
     num_blocks = 7
     current_start_frame = 0