ltx-video-distilled-split

Paused

ford442 commited on Nov 23, 2025

Commit

213a98f

verified ·

1 Parent(s): c5900cd

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -668,15 +668,25 @@ def generate(prompt, negative_prompt, clips_list, input_image_filepath, input_vi
     output_video_path = os.path.join(tempfile.mkdtemp(), f"output_{random.randint(10000,99999)}.mp4")
     if WORKER_CLIENT:
         print("📡 Sending Video Latents to Worker for Decode...")
-        latents = pipeline_output.images # These are raw transformer outputs (Unit Variance)
-        # DEBUG B: What did the Transformer produce?
-        # If Std is 0.0 or NaN here, the Transformer failed.
-        print(f"--- DEBUG: MAIN APP OUTPUT ---")
-        print(f"B. OUTPUT Latents | Mean: {latents.mean().item():.4f} | Std: {latents.std().item():.4f}")
-        # Simple package, no stats needed now
-        pkg = { "latents": latents.cpu() }
         temp_path = "/tmp/temp_video_pkg.pt"
         torch.save(pkg, temp_path)

     output_video_path = os.path.join(tempfile.mkdtemp(), f"output_{random.randint(10000,99999)}.mp4")
     if WORKER_CLIENT:
         print("📡 Sending Video Latents to Worker for Decode...")
+        latents = pipeline_output.images
+        # LOGIC:
+        # For I2V, we want to match the input image's contrast.
+        # For T2V, we use the "Natural" LTX stats (Std ~0.25).
+        target_mean = torch.tensor(0.0)
+        target_std = torch.tensor(0.25) # Default LTX "Natural" Std
+        # If we have stats from the Input Image (I2V), use those!
+        if mode == "image-to-video" and "mean" in image_stats and "std" in image_stats:
+             target_mean = image_stats["mean"].cpu()
+             target_std = image_stats["std"].cpu()
+        pkg = {
+            "latents": latents.cpu(),
+            "mean": target_mean,
+            "std": target_std
+        }
         temp_path = "/tmp/temp_video_pkg.pt"
         torch.save(pkg, temp_path)