Carlos s commited on
Commit
bc1a8df
·
verified ·
1 Parent(s): 36025be

Upload pipeline_ltx_video.py

Browse files
Files changed (1) hide show
  1. pipeline_ltx_video.py +51 -3
pipeline_ltx_video.py CHANGED
@@ -186,6 +186,13 @@ def retrieve_timesteps(
186
  ]
187
  scheduler.set_timesteps(timesteps=timesteps, device=device, **kwargs)
188
  num_inference_steps = len(timesteps)
 
 
 
 
 
 
 
189
 
190
  return timesteps, num_inference_steps
191
 
@@ -875,12 +882,18 @@ class LTXVideoPipeline(DiffusionPipeline):
875
  tone_map_compression_ratio: compression ratio for tone mapping, defaults to 0.0.
876
  If set to 0.0, no tone mapping is applied. If set to 1.0 - full compression is applied.
877
  Examples:
878
-
879
  Returns:
880
  [`~pipelines.ImagePipelineOutput`] or `tuple`:
881
  If `return_dict` is `True`, [`~pipelines.ImagePipelineOutput`] is returned, otherwise a `tuple` is
882
  returned where the first element is a list with the generated images
883
  """
 
 
 
 
 
 
 
884
  if "mask_feature" in kwargs:
885
  deprecation_message = "The use of `mask_feature` is deprecated. It is no longer used in any computation and that doesn't affect the end results. It will be removed in a future version."
886
  deprecate("mask_feature", "1.0.0", deprecation_message, standard_warn=False)
@@ -948,6 +961,11 @@ class LTXVideoPipeline(DiffusionPipeline):
948
  skip_final_inference_steps=skip_final_inference_steps,
949
  **retrieve_timesteps_kwargs,
950
  )
 
 
 
 
 
951
 
952
  if self.allowed_inference_steps is not None:
953
  for timestep in [round(x, 4) for x in timesteps.tolist()]:
@@ -1016,7 +1034,12 @@ class LTXVideoPipeline(DiffusionPipeline):
1016
  conditioning_items,
1017
  max_new_tokens=text_encoder_max_tokens,
1018
  )
1019
-
 
 
 
 
 
1020
  # 3. Encode input prompt
1021
  if self.text_encoder is not None:
1022
  self.text_encoder = self.text_encoder.to(self._execution_device)
@@ -1081,6 +1104,13 @@ class LTXVideoPipeline(DiffusionPipeline):
1081
  generator=generator,
1082
  vae_per_channel_normalize=vae_per_channel_normalize,
1083
  )
 
 
 
 
 
 
 
1084
 
1085
  # Update the latents with the conditioning items and patchify them into (b, n, c)
1086
  latents, pixel_coords, conditioning_mask, num_cond_latents = (
@@ -1096,9 +1126,20 @@ class LTXVideoPipeline(DiffusionPipeline):
1096
  )
1097
  init_latents = latents.clone() # Used for image_cond_noise_update
1098
 
 
 
 
 
 
 
1099
  # 6. Prepare extra step kwargs. TODO: Logic should ideally just be moved out of the pipeline
1100
  extra_step_kwargs = self.prepare_extra_step_kwargs(generator, eta)
1101
 
 
 
 
 
 
1102
  # 7. Denoising loop
1103
  num_warmup_steps = max(
1104
  len(timesteps) - num_inference_steps * self.scheduler.order, 0
@@ -1289,7 +1330,14 @@ class LTXVideoPipeline(DiffusionPipeline):
1289
  if callback_on_step_end is not None:
1290
  callback_on_step_end(self, i, t, {})
1291
 
1292
- if offload_to_cpu:
 
 
 
 
 
 
 
1293
  self.transformer = self.transformer.cpu()
1294
  if self._execution_device == "cuda":
1295
  torch.cuda.empty_cache()
 
186
  ]
187
  scheduler.set_timesteps(timesteps=timesteps, device=device, **kwargs)
188
  num_inference_steps = len(timesteps)
189
+
190
+
191
+ print(f"[ADUC DEBUG LTX *causal_video_autoencoder.py*]=======")
192
+ print(f"skip_initial_inference_steps {skip_initial_inference_steps}")
193
+ print(f"skip_final_inference_steps {skip_final_inference_steps}")
194
+ print(f"timesteps {timesteps}")
195
+
196
 
197
  return timesteps, num_inference_steps
198
 
 
882
  tone_map_compression_ratio: compression ratio for tone mapping, defaults to 0.0.
883
  If set to 0.0, no tone mapping is applied. If set to 1.0 - full compression is applied.
884
  Examples:
 
885
  Returns:
886
  [`~pipelines.ImagePipelineOutput`] or `tuple`:
887
  If `return_dict` is `True`, [`~pipelines.ImagePipelineOutput`] is returned, otherwise a `tuple` is
888
  returned where the first element is a list with the generated images
889
  """
890
+
891
+ print(f"[1ADUC DEBUG LTX *causal_video_autoencoder.py*]=======")
892
+ print(f"skip_initial_inference_steps {skip_initial_inference_steps}")
893
+ print(f"skip_final_inference_steps {skip_final_inference_steps}")
894
+ print(f"latents {latents.shape}")
895
+
896
+
897
  if "mask_feature" in kwargs:
898
  deprecation_message = "The use of `mask_feature` is deprecated. It is no longer used in any computation and that doesn't affect the end results. It will be removed in a future version."
899
  deprecate("mask_feature", "1.0.0", deprecation_message, standard_warn=False)
 
961
  skip_final_inference_steps=skip_final_inference_steps,
962
  **retrieve_timesteps_kwargs,
963
  )
964
+
965
+ print(f"[2ADUC DEBUG LTX *causal_video_autoencoder.py*]=======")
966
+ print(f"skip_initial_inference_steps {skip_initial_inference_steps}")
967
+ print(f"skip_final_inference_steps {skip_final_inference_steps}")
968
+ print(f"latents {latents.shape}")
969
 
970
  if self.allowed_inference_steps is not None:
971
  for timestep in [round(x, 4) for x in timesteps.tolist()]:
 
1034
  conditioning_items,
1035
  max_new_tokens=text_encoder_max_tokens,
1036
  )
1037
+
1038
+ print(f"[4ADUC DEBUG LTX *causal_video_autoencoder.py*]=======")
1039
+ print(f"skip_initial_inference_steps {skip_initial_inference_steps}")
1040
+ print(f"skip_final_inference_steps {skip_final_inference_steps}")
1041
+ print(f"latents {latents.shape}")
1042
+
1043
  # 3. Encode input prompt
1044
  if self.text_encoder is not None:
1045
  self.text_encoder = self.text_encoder.to(self._execution_device)
 
1104
  generator=generator,
1105
  vae_per_channel_normalize=vae_per_channel_normalize,
1106
  )
1107
+
1108
+
1109
+ print(f"[5ADUC DEBUG LTX *causal_video_autoencoder.py*]=======")
1110
+ print(f"skip_initial_inference_steps {skip_initial_inference_steps}")
1111
+ print(f"skip_final_inference_steps {skip_final_inference_steps}")
1112
+ print(f"latents {latents.shape}")
1113
+
1114
 
1115
  # Update the latents with the conditioning items and patchify them into (b, n, c)
1116
  latents, pixel_coords, conditioning_mask, num_cond_latents = (
 
1126
  )
1127
  init_latents = latents.clone() # Used for image_cond_noise_update
1128
 
1129
+
1130
+
1131
+ print(f"[6ADUC DEBUG LTX *causal_video_autoencoder.py*]=======")
1132
+ print(f"skip_initial_inference_steps {skip_initial_inference_steps}")
1133
+ print(f"skip_final_inference_steps {skip_final_inference_steps}")
1134
+ print(f"latents {latents.shape}")
1135
  # 6. Prepare extra step kwargs. TODO: Logic should ideally just be moved out of the pipeline
1136
  extra_step_kwargs = self.prepare_extra_step_kwargs(generator, eta)
1137
 
1138
+
1139
+ print(f"[7ADUC DEBUG LTX *causal_video_autoencoder.py*]=======")
1140
+ print(f"skip_initial_inference_steps {skip_initial_inference_steps}")
1141
+ print(f"skip_final_inference_steps {skip_final_inference_steps}")
1142
+ print(f"latents {latents.shape}")
1143
  # 7. Denoising loop
1144
  num_warmup_steps = max(
1145
  len(timesteps) - num_inference_steps * self.scheduler.order, 0
 
1330
  if callback_on_step_end is not None:
1331
  callback_on_step_end(self, i, t, {})
1332
 
1333
+
1334
+
1335
+ print(f"[8ADUC DEBUG LTX *causal_video_autoencoder.py*]=======")
1336
+ print(f"skip_initial_inference_steps {skip_initial_inference_steps}")
1337
+ print(f"skip_final_inference_steps {skip_final_inference_steps}")
1338
+ print(f"latents {latents.shape}")
1339
+
1340
+ if offload_to_cpu:
1341
  self.transformer = self.transformer.cpu()
1342
  if self._execution_device == "cuda":
1343
  torch.cuda.empty_cache()