Test

Paused

App Files Files Community

Carlos s commited on Oct 6, 2025

Commit

bc1a8df

verified ·

1 Parent(s): 36025be

Upload pipeline_ltx_video.py

Browse files

Files changed (1) hide show

pipeline_ltx_video.py +51 -3

pipeline_ltx_video.py CHANGED Viewed

@@ -186,6 +186,13 @@ def retrieve_timesteps(
         ]
         scheduler.set_timesteps(timesteps=timesteps, device=device, **kwargs)
         num_inference_steps = len(timesteps)
     return timesteps, num_inference_steps
@@ -875,12 +882,18 @@ class LTXVideoPipeline(DiffusionPipeline):
             tone_map_compression_ratio: compression ratio for tone mapping, defaults to 0.0.
                         If set to 0.0, no tone mapping is applied. If set to 1.0 - full compression is applied.
         Examples:
         Returns:
             [`~pipelines.ImagePipelineOutput`] or `tuple`:
                 If `return_dict` is `True`, [`~pipelines.ImagePipelineOutput`] is returned, otherwise a `tuple` is
                 returned where the first element is a list with the generated images
         """
         if "mask_feature" in kwargs:
             deprecation_message = "The use of `mask_feature` is deprecated. It is no longer used in any computation and that doesn't affect the end results. It will be removed in a future version."
             deprecate("mask_feature", "1.0.0", deprecation_message, standard_warn=False)
@@ -948,6 +961,11 @@ class LTXVideoPipeline(DiffusionPipeline):
             skip_final_inference_steps=skip_final_inference_steps,
             **retrieve_timesteps_kwargs,
         )
         if self.allowed_inference_steps is not None:
             for timestep in [round(x, 4) for x in timesteps.tolist()]:
@@ -1016,7 +1034,12 @@ class LTXVideoPipeline(DiffusionPipeline):
                 conditioning_items,
                 max_new_tokens=text_encoder_max_tokens,
             )
         # 3. Encode input prompt
         if self.text_encoder is not None:
             self.text_encoder = self.text_encoder.to(self._execution_device)
@@ -1081,6 +1104,13 @@ class LTXVideoPipeline(DiffusionPipeline):
             generator=generator,
             vae_per_channel_normalize=vae_per_channel_normalize,
         )
         # Update the latents with the conditioning items and patchify them into (b, n, c)
         latents, pixel_coords, conditioning_mask, num_cond_latents = (
@@ -1096,9 +1126,20 @@ class LTXVideoPipeline(DiffusionPipeline):
         )
         init_latents = latents.clone()  # Used for image_cond_noise_update
         # 6. Prepare extra step kwargs. TODO: Logic should ideally just be moved out of the pipeline
         extra_step_kwargs = self.prepare_extra_step_kwargs(generator, eta)
         # 7. Denoising loop
         num_warmup_steps = max(
             len(timesteps) - num_inference_steps * self.scheduler.order, 0
@@ -1289,7 +1330,14 @@ class LTXVideoPipeline(DiffusionPipeline):
                 if callback_on_step_end is not None:
                     callback_on_step_end(self, i, t, {})
-        if offload_to_cpu:
             self.transformer = self.transformer.cpu()
             if self._execution_device == "cuda":
                 torch.cuda.empty_cache()

         ]
         scheduler.set_timesteps(timesteps=timesteps, device=device, **kwargs)
         num_inference_steps = len(timesteps)
+        print(f"[ADUC DEBUG LTX *causal_video_autoencoder.py*]=======")
+        print(f"skip_initial_inference_steps {skip_initial_inference_steps}")
+        print(f"skip_final_inference_steps {skip_final_inference_steps}")
+        print(f"timesteps {timesteps}")
     return timesteps, num_inference_steps
             tone_map_compression_ratio: compression ratio for tone mapping, defaults to 0.0.
                         If set to 0.0, no tone mapping is applied. If set to 1.0 - full compression is applied.
         Examples:
         Returns:
             [`~pipelines.ImagePipelineOutput`] or `tuple`:
                 If `return_dict` is `True`, [`~pipelines.ImagePipelineOutput`] is returned, otherwise a `tuple` is
                 returned where the first element is a list with the generated images
         """
+        print(f"[1ADUC DEBUG LTX *causal_video_autoencoder.py*]=======")
+        print(f"skip_initial_inference_steps {skip_initial_inference_steps}")
+        print(f"skip_final_inference_steps {skip_final_inference_steps}")
+        print(f"latents {latents.shape}")
         if "mask_feature" in kwargs:
             deprecation_message = "The use of `mask_feature` is deprecated. It is no longer used in any computation and that doesn't affect the end results. It will be removed in a future version."
             deprecate("mask_feature", "1.0.0", deprecation_message, standard_warn=False)
             skip_final_inference_steps=skip_final_inference_steps,
             **retrieve_timesteps_kwargs,
         )
+        print(f"[2ADUC DEBUG LTX *causal_video_autoencoder.py*]=======")
+        print(f"skip_initial_inference_steps {skip_initial_inference_steps}")
+        print(f"skip_final_inference_steps {skip_final_inference_steps}")
+        print(f"latents {latents.shape}")
         if self.allowed_inference_steps is not None:
             for timestep in [round(x, 4) for x in timesteps.tolist()]:
                 conditioning_items,
                 max_new_tokens=text_encoder_max_tokens,
             )
+        print(f"[4ADUC DEBUG LTX *causal_video_autoencoder.py*]=======")
+        print(f"skip_initial_inference_steps {skip_initial_inference_steps}")
+        print(f"skip_final_inference_steps {skip_final_inference_steps}")
+        print(f"latents {latents.shape}")
         # 3. Encode input prompt
         if self.text_encoder is not None:
             self.text_encoder = self.text_encoder.to(self._execution_device)
             generator=generator,
             vae_per_channel_normalize=vae_per_channel_normalize,
         )
+        print(f"[5ADUC DEBUG LTX *causal_video_autoencoder.py*]=======")
+        print(f"skip_initial_inference_steps {skip_initial_inference_steps}")
+        print(f"skip_final_inference_steps {skip_final_inference_steps}")
+        print(f"latents {latents.shape}")
         # Update the latents with the conditioning items and patchify them into (b, n, c)
         latents, pixel_coords, conditioning_mask, num_cond_latents = (
         )
         init_latents = latents.clone()  # Used for image_cond_noise_update
+        print(f"[6ADUC DEBUG LTX *causal_video_autoencoder.py*]=======")
+        print(f"skip_initial_inference_steps {skip_initial_inference_steps}")
+        print(f"skip_final_inference_steps {skip_final_inference_steps}")
+        print(f"latents {latents.shape}")
         # 6. Prepare extra step kwargs. TODO: Logic should ideally just be moved out of the pipeline
         extra_step_kwargs = self.prepare_extra_step_kwargs(generator, eta)
+        print(f"[7ADUC DEBUG LTX *causal_video_autoencoder.py*]=======")
+        print(f"skip_initial_inference_steps {skip_initial_inference_steps}")
+        print(f"skip_final_inference_steps {skip_final_inference_steps}")
+        print(f"latents {latents.shape}")
         # 7. Denoising loop
         num_warmup_steps = max(
             len(timesteps) - num_inference_steps * self.scheduler.order, 0
                 if callback_on_step_end is not None:
                     callback_on_step_end(self, i, t, {})
+        print(f"[8ADUC DEBUG LTX *causal_video_autoencoder.py*]=======")
+        print(f"skip_initial_inference_steps {skip_initial_inference_steps}")
+        print(f"skip_final_inference_steps {skip_final_inference_steps}")
+        print(f"latents {latents.shape}")
+          if offload_to_cpu:
             self.transformer = self.transformer.cpu()
             if self._execution_device == "cuda":
                 torch.cuda.empty_cache()