Spaces:

caarleexx
/

Aduc

Paused

App Files Files Community

caarleexx commited on Nov 1, 2025

Commit

854c213

verified ·

1 Parent(s): d77f692

Update api/ltx_server_refactored.py

Browse files

Files changed (1) hide show

api/ltx_server_refactored.py +14 -14

api/ltx_server_refactored.py CHANGED Viewed

@@ -590,7 +590,7 @@ class VideoService:
         print(f"[DEBUG] Carregando condicionamento: {filepath}")
         tensor = self._load_image_to_tensor_with_resize_and_crop(filepath, height, width)
         tensor = torch.nn.functional.pad(tensor, padding_values)
-        out = tensor.to(self.device, dtype=self.runtime_autocast_dtype) if self.device == "cuda" else tensor.to(self.device)
         print(f"[DEBUG] Cond shape={tuple(out.shape)} dtype={out.dtype} device={out.device}")
         return out
@@ -642,7 +642,7 @@ class VideoService:
         print(f"  - Dimensões de Saída: {downscaled_height}x{downscaled_width}")
         # --- Execução da Pipeline ---
-        with torch.autocast(device_type=self.device.split(':')[0], dtype=self.runtime_autocast_dtype, enabled=(self.device == 'cuda')):
             first_pass_kwargs = {
                 "prompt": prompt,
@@ -651,7 +651,7 @@ class VideoService:
                 "width": downscaled_width,
                 "num_frames": (actual_num_frames//8)+1,
                 "frame_rate": int(DEFAULT_FPS),
-                "generator": torch.Generator(device=self.device).manual_seed(used_seed),
                 "output_type": "latent",
                 "conditioning_items": conditioning_items,
                 "guidance_scale": float(guidance_scale),
@@ -874,17 +874,17 @@ class VideoService:
         print("[LOG] FASE 1: Geração de Latentes (Transformer na GPU)")
         self._set_generation_environment()
-        latents_to_refine = torch.load(latents_path).to(self.device)
         print(f"  [LOG] Latentes carregados para a GPU. Shape: {latents_to_refine.shape}")
-        with torch.autocast(device_type=self.device.split(':')[0], dtype=self.runtime_autocast_dtype, enabled=(self.device == 'cuda')):
             refine_height = latents_to_refine.shape[3] * self.pipeline.vae_scale_factor
             refine_width = latents_to_refine.shape[4] * self.pipeline.vae_scale_factor
             second_pass_kwargs = {
                 "prompt": prompt, "negative_prompt": negative_prompt, "height": refine_height, "width": refine_width,
                 "frame_rate": int(DEFAULT_FPS), "num_frames": latents_to_refine.shape[2],
                 "latents": latents_to_refine, "guidance_scale": float(guidance_scale), "output_type": "latent",
-                "generator": torch.Generator(device=self.device).manual_seed(used_seed),
                 "conditioning_items": conditioning_items, **(self.config.get("second_pass", {}))
             }
             print("  [LOG] Enviando para a pipeline de refinamento (Transformer)...")
@@ -907,7 +907,7 @@ class VideoService:
         if len(pontos_de_corte) == 1:
             pixel_tensor = vae_manager_singleton.decode(
-                final_latents_cpu.to(self.device),
                 decode_timestep=float(self.config.get("decode_timestep", 0.05))
             ).cpu()
         else:
@@ -922,7 +922,7 @@ class VideoService:
                 print(f"    -> Decodificando Grupo {i+1}/{len(pontos_de_corte)} (latentes {start} a {end-1}), shape: {latent_chunk.shape}")
                 pixel_chunk = vae_manager_singleton.decode(
-                    latent_chunk.to(self.device),
                     decode_timestep=float(self.config.get("decode_timestep", 0.05))
                 )
                 pixel_chunks_list.append(pixel_chunk.cpu())
@@ -960,10 +960,10 @@ class VideoService:
             chunks = self._split_latents_with_overlap(latents)
             pixel_chunks = []
-            with torch.autocast(device_type=self.device.split(':')[0], dtype=self.runtime_autocast_dtype, enabled=(self.device == 'cuda')):
                 for chunk in chunks:
                     if chunk.shape[2] == 0: continue
-                    pixel_chunk = vae_manager_singleton.decode(chunk.to(self.device), decode_timestep=float(self.config.get("decode_timestep", 0.05)))
                     pixel_chunks.append(pixel_chunk)
             final_pixel_tensor = self._merge_chunks_with_overlap(pixel_chunks)
@@ -1041,10 +1041,10 @@ class VideoService:
     def _move_models_to_device(self):
         """Move os modelos carregados para o dispositivo de computação (GPU/CPU)."""
-        print(f"[INFO] Movendo modelos para o dispositivo: {self.device}")
-        self.pipeline.to(self.device)
         if self.latent_upsampler:
-            self.latent_upsampler.to(self.device)
     def _get_precision_dtype(self) -> torch.dtype:
         """Determina o dtype para autocast com base na configuração de precisão."""
@@ -1072,7 +1072,7 @@ class VideoService:
         """Carrega uma imagem, redimensiona, aplica padding e move para o dispositivo."""
         tensor = self._load_image_to_tensor_with_resize_and_crop(filepath, height, width)
         tensor = F.pad(tensor, padding)
-        return tensor.to(self.device, dtype=self.runtime_autocast_dtype)
     def _calculate_downscaled_dims(self, height: int, width: int) -> Tuple[int, int]:
         """Calcula as dimensões para o primeiro passo (baixa resolução)."""

         print(f"[DEBUG] Carregando condicionamento: {filepath}")
         tensor = self._load_image_to_tensor_with_resize_and_crop(filepath, height, width)
         tensor = torch.nn.functional.pad(tensor, padding_values)
+        out = tensor.to(self.transformer_devices[0] , dtype=self.runtime_autocast_dtype) if self.transformer_devices[0]  == "cuda" else tensor.to(self.transformer_devices[0] )
         print(f"[DEBUG] Cond shape={tuple(out.shape)} dtype={out.dtype} device={out.device}")
         return out
         print(f"  - Dimensões de Saída: {downscaled_height}x{downscaled_width}")
         # --- Execução da Pipeline ---
+        with torch.autocast(device_type=self.transformer_devices[0] .split(':')[0], dtype=self.runtime_autocast_dtype, enabled=(self.transformer_devices[0]  == 'cuda')):
             first_pass_kwargs = {
                 "prompt": prompt,
                 "width": downscaled_width,
                 "num_frames": (actual_num_frames//8)+1,
                 "frame_rate": int(DEFAULT_FPS),
+                "generator": torch.Generator(device=self.transformer_devices[0] ).manual_seed(used_seed),
                 "output_type": "latent",
                 "conditioning_items": conditioning_items,
                 "guidance_scale": float(guidance_scale),
         print("[LOG] FASE 1: Geração de Latentes (Transformer na GPU)")
         self._set_generation_environment()
+        latents_to_refine = torch.load(latents_path).to(self.transformer_devices[0] )
         print(f"  [LOG] Latentes carregados para a GPU. Shape: {latents_to_refine.shape}")
+        with torch.autocast(device_type=self.transformer_devices[0] .split(':')[0], dtype=self.runtime_autocast_dtype, enabled=(self.device == 'cuda')):
             refine_height = latents_to_refine.shape[3] * self.pipeline.vae_scale_factor
             refine_width = latents_to_refine.shape[4] * self.pipeline.vae_scale_factor
             second_pass_kwargs = {
                 "prompt": prompt, "negative_prompt": negative_prompt, "height": refine_height, "width": refine_width,
                 "frame_rate": int(DEFAULT_FPS), "num_frames": latents_to_refine.shape[2],
                 "latents": latents_to_refine, "guidance_scale": float(guidance_scale), "output_type": "latent",
+                "generator": torch.Generator(device=self.transformer_devices[0] ).manual_seed(used_seed),
                 "conditioning_items": conditioning_items, **(self.config.get("second_pass", {}))
             }
             print("  [LOG] Enviando para a pipeline de refinamento (Transformer)...")
         if len(pontos_de_corte) == 1:
             pixel_tensor = vae_manager_singleton.decode(
+                final_latents_cpu.to(self.transformer_devices[0] ),
                 decode_timestep=float(self.config.get("decode_timestep", 0.05))
             ).cpu()
         else:
                 print(f"    -> Decodificando Grupo {i+1}/{len(pontos_de_corte)} (latentes {start} a {end-1}), shape: {latent_chunk.shape}")
                 pixel_chunk = vae_manager_singleton.decode(
+                    latent_chunk.to(self.transformer_devices[0] ),
                     decode_timestep=float(self.config.get("decode_timestep", 0.05))
                 )
                 pixel_chunks_list.append(pixel_chunk.cpu())
             chunks = self._split_latents_with_overlap(latents)
             pixel_chunks = []
+            with torch.autocast(device_type=self.transformer_devices[0] .split(':')[0], dtype=self.runtime_autocast_dtype, enabled=(self.transformer_devices[0]  == 'cuda')):
                 for chunk in chunks:
                     if chunk.shape[2] == 0: continue
+                    pixel_chunk = vae_manager_singleton.decode(chunk.to(self.transformer_devices[0] ), decode_timestep=float(self.config.get("decode_timestep", 0.05)))
                     pixel_chunks.append(pixel_chunk)
             final_pixel_tensor = self._merge_chunks_with_overlap(pixel_chunks)
     def _move_models_to_device(self):
         """Move os modelos carregados para o dispositivo de computação (GPU/CPU)."""
+        print(f"[INFO] Movendo modelos para o dispositivo: {self.transformer_devices[0] }")
+        self.pipeline.to(self.transformer_devices[0] )
         if self.latent_upsampler:
+            self.latent_upsampler.to(self.transformer_devices[0] )
     def _get_precision_dtype(self) -> torch.dtype:
         """Determina o dtype para autocast com base na configuração de precisão."""
         """Carrega uma imagem, redimensiona, aplica padding e move para o dispositivo."""
         tensor = self._load_image_to_tensor_with_resize_and_crop(filepath, height, width)
         tensor = F.pad(tensor, padding)
+        return tensor.to(self.transformer_devices[0] , dtype=self.runtime_autocast_dtype)
     def _calculate_downscaled_dims(self, height: int, width: int) -> Tuple[int, int]:
         """Calcula as dimensões para o primeiro passo (baixa resolução)."""