caarleexx commited on
Commit
854c213
·
verified ·
1 Parent(s): d77f692

Update api/ltx_server_refactored.py

Browse files
Files changed (1) hide show
  1. api/ltx_server_refactored.py +14 -14
api/ltx_server_refactored.py CHANGED
@@ -590,7 +590,7 @@ class VideoService:
590
  print(f"[DEBUG] Carregando condicionamento: {filepath}")
591
  tensor = self._load_image_to_tensor_with_resize_and_crop(filepath, height, width)
592
  tensor = torch.nn.functional.pad(tensor, padding_values)
593
- out = tensor.to(self.device, dtype=self.runtime_autocast_dtype) if self.device == "cuda" else tensor.to(self.device)
594
  print(f"[DEBUG] Cond shape={tuple(out.shape)} dtype={out.dtype} device={out.device}")
595
  return out
596
 
@@ -642,7 +642,7 @@ class VideoService:
642
  print(f" - Dimensões de Saída: {downscaled_height}x{downscaled_width}")
643
 
644
  # --- Execução da Pipeline ---
645
- with torch.autocast(device_type=self.device.split(':')[0], dtype=self.runtime_autocast_dtype, enabled=(self.device == 'cuda')):
646
 
647
  first_pass_kwargs = {
648
  "prompt": prompt,
@@ -651,7 +651,7 @@ class VideoService:
651
  "width": downscaled_width,
652
  "num_frames": (actual_num_frames//8)+1,
653
  "frame_rate": int(DEFAULT_FPS),
654
- "generator": torch.Generator(device=self.device).manual_seed(used_seed),
655
  "output_type": "latent",
656
  "conditioning_items": conditioning_items,
657
  "guidance_scale": float(guidance_scale),
@@ -874,17 +874,17 @@ class VideoService:
874
  print("[LOG] FASE 1: Geração de Latentes (Transformer na GPU)")
875
  self._set_generation_environment()
876
 
877
- latents_to_refine = torch.load(latents_path).to(self.device)
878
  print(f" [LOG] Latentes carregados para a GPU. Shape: {latents_to_refine.shape}")
879
 
880
- with torch.autocast(device_type=self.device.split(':')[0], dtype=self.runtime_autocast_dtype, enabled=(self.device == 'cuda')):
881
  refine_height = latents_to_refine.shape[3] * self.pipeline.vae_scale_factor
882
  refine_width = latents_to_refine.shape[4] * self.pipeline.vae_scale_factor
883
  second_pass_kwargs = {
884
  "prompt": prompt, "negative_prompt": negative_prompt, "height": refine_height, "width": refine_width,
885
  "frame_rate": int(DEFAULT_FPS), "num_frames": latents_to_refine.shape[2],
886
  "latents": latents_to_refine, "guidance_scale": float(guidance_scale), "output_type": "latent",
887
- "generator": torch.Generator(device=self.device).manual_seed(used_seed),
888
  "conditioning_items": conditioning_items, **(self.config.get("second_pass", {}))
889
  }
890
  print(" [LOG] Enviando para a pipeline de refinamento (Transformer)...")
@@ -907,7 +907,7 @@ class VideoService:
907
 
908
  if len(pontos_de_corte) == 1:
909
  pixel_tensor = vae_manager_singleton.decode(
910
- final_latents_cpu.to(self.device),
911
  decode_timestep=float(self.config.get("decode_timestep", 0.05))
912
  ).cpu()
913
  else:
@@ -922,7 +922,7 @@ class VideoService:
922
  print(f" -> Decodificando Grupo {i+1}/{len(pontos_de_corte)} (latentes {start} a {end-1}), shape: {latent_chunk.shape}")
923
 
924
  pixel_chunk = vae_manager_singleton.decode(
925
- latent_chunk.to(self.device),
926
  decode_timestep=float(self.config.get("decode_timestep", 0.05))
927
  )
928
  pixel_chunks_list.append(pixel_chunk.cpu())
@@ -960,10 +960,10 @@ class VideoService:
960
  chunks = self._split_latents_with_overlap(latents)
961
  pixel_chunks = []
962
 
963
- with torch.autocast(device_type=self.device.split(':')[0], dtype=self.runtime_autocast_dtype, enabled=(self.device == 'cuda')):
964
  for chunk in chunks:
965
  if chunk.shape[2] == 0: continue
966
- pixel_chunk = vae_manager_singleton.decode(chunk.to(self.device), decode_timestep=float(self.config.get("decode_timestep", 0.05)))
967
  pixel_chunks.append(pixel_chunk)
968
 
969
  final_pixel_tensor = self._merge_chunks_with_overlap(pixel_chunks)
@@ -1041,10 +1041,10 @@ class VideoService:
1041
 
1042
  def _move_models_to_device(self):
1043
  """Move os modelos carregados para o dispositivo de computação (GPU/CPU)."""
1044
- print(f"[INFO] Movendo modelos para o dispositivo: {self.device}")
1045
- self.pipeline.to(self.device)
1046
  if self.latent_upsampler:
1047
- self.latent_upsampler.to(self.device)
1048
 
1049
  def _get_precision_dtype(self) -> torch.dtype:
1050
  """Determina o dtype para autocast com base na configuração de precisão."""
@@ -1072,7 +1072,7 @@ class VideoService:
1072
  """Carrega uma imagem, redimensiona, aplica padding e move para o dispositivo."""
1073
  tensor = self._load_image_to_tensor_with_resize_and_crop(filepath, height, width)
1074
  tensor = F.pad(tensor, padding)
1075
- return tensor.to(self.device, dtype=self.runtime_autocast_dtype)
1076
 
1077
  def _calculate_downscaled_dims(self, height: int, width: int) -> Tuple[int, int]:
1078
  """Calcula as dimensões para o primeiro passo (baixa resolução)."""
 
590
  print(f"[DEBUG] Carregando condicionamento: {filepath}")
591
  tensor = self._load_image_to_tensor_with_resize_and_crop(filepath, height, width)
592
  tensor = torch.nn.functional.pad(tensor, padding_values)
593
+ out = tensor.to(self.transformer_devices[0] , dtype=self.runtime_autocast_dtype) if self.transformer_devices[0] == "cuda" else tensor.to(self.transformer_devices[0] )
594
  print(f"[DEBUG] Cond shape={tuple(out.shape)} dtype={out.dtype} device={out.device}")
595
  return out
596
 
 
642
  print(f" - Dimensões de Saída: {downscaled_height}x{downscaled_width}")
643
 
644
  # --- Execução da Pipeline ---
645
+ with torch.autocast(device_type=self.transformer_devices[0] .split(':')[0], dtype=self.runtime_autocast_dtype, enabled=(self.transformer_devices[0] == 'cuda')):
646
 
647
  first_pass_kwargs = {
648
  "prompt": prompt,
 
651
  "width": downscaled_width,
652
  "num_frames": (actual_num_frames//8)+1,
653
  "frame_rate": int(DEFAULT_FPS),
654
+ "generator": torch.Generator(device=self.transformer_devices[0] ).manual_seed(used_seed),
655
  "output_type": "latent",
656
  "conditioning_items": conditioning_items,
657
  "guidance_scale": float(guidance_scale),
 
874
  print("[LOG] FASE 1: Geração de Latentes (Transformer na GPU)")
875
  self._set_generation_environment()
876
 
877
+ latents_to_refine = torch.load(latents_path).to(self.transformer_devices[0] )
878
  print(f" [LOG] Latentes carregados para a GPU. Shape: {latents_to_refine.shape}")
879
 
880
+ with torch.autocast(device_type=self.transformer_devices[0] .split(':')[0], dtype=self.runtime_autocast_dtype, enabled=(self.device == 'cuda')):
881
  refine_height = latents_to_refine.shape[3] * self.pipeline.vae_scale_factor
882
  refine_width = latents_to_refine.shape[4] * self.pipeline.vae_scale_factor
883
  second_pass_kwargs = {
884
  "prompt": prompt, "negative_prompt": negative_prompt, "height": refine_height, "width": refine_width,
885
  "frame_rate": int(DEFAULT_FPS), "num_frames": latents_to_refine.shape[2],
886
  "latents": latents_to_refine, "guidance_scale": float(guidance_scale), "output_type": "latent",
887
+ "generator": torch.Generator(device=self.transformer_devices[0] ).manual_seed(used_seed),
888
  "conditioning_items": conditioning_items, **(self.config.get("second_pass", {}))
889
  }
890
  print(" [LOG] Enviando para a pipeline de refinamento (Transformer)...")
 
907
 
908
  if len(pontos_de_corte) == 1:
909
  pixel_tensor = vae_manager_singleton.decode(
910
+ final_latents_cpu.to(self.transformer_devices[0] ),
911
  decode_timestep=float(self.config.get("decode_timestep", 0.05))
912
  ).cpu()
913
  else:
 
922
  print(f" -> Decodificando Grupo {i+1}/{len(pontos_de_corte)} (latentes {start} a {end-1}), shape: {latent_chunk.shape}")
923
 
924
  pixel_chunk = vae_manager_singleton.decode(
925
+ latent_chunk.to(self.transformer_devices[0] ),
926
  decode_timestep=float(self.config.get("decode_timestep", 0.05))
927
  )
928
  pixel_chunks_list.append(pixel_chunk.cpu())
 
960
  chunks = self._split_latents_with_overlap(latents)
961
  pixel_chunks = []
962
 
963
+ with torch.autocast(device_type=self.transformer_devices[0] .split(':')[0], dtype=self.runtime_autocast_dtype, enabled=(self.transformer_devices[0] == 'cuda')):
964
  for chunk in chunks:
965
  if chunk.shape[2] == 0: continue
966
+ pixel_chunk = vae_manager_singleton.decode(chunk.to(self.transformer_devices[0] ), decode_timestep=float(self.config.get("decode_timestep", 0.05)))
967
  pixel_chunks.append(pixel_chunk)
968
 
969
  final_pixel_tensor = self._merge_chunks_with_overlap(pixel_chunks)
 
1041
 
1042
  def _move_models_to_device(self):
1043
  """Move os modelos carregados para o dispositivo de computação (GPU/CPU)."""
1044
+ print(f"[INFO] Movendo modelos para o dispositivo: {self.transformer_devices[0] }")
1045
+ self.pipeline.to(self.transformer_devices[0] )
1046
  if self.latent_upsampler:
1047
+ self.latent_upsampler.to(self.transformer_devices[0] )
1048
 
1049
  def _get_precision_dtype(self) -> torch.dtype:
1050
  """Determina o dtype para autocast com base na configuração de precisão."""
 
1072
  """Carrega uma imagem, redimensiona, aplica padding e move para o dispositivo."""
1073
  tensor = self._load_image_to_tensor_with_resize_and_crop(filepath, height, width)
1074
  tensor = F.pad(tensor, padding)
1075
+ return tensor.to(self.transformer_devices[0] , dtype=self.runtime_autocast_dtype)
1076
 
1077
  def _calculate_downscaled_dims(self, height: int, width: int) -> Tuple[int, int]:
1078
  """Calcula as dimensões para o primeiro passo (baixa resolução)."""