Spaces:

caarleexx
/

Aduc

Paused

App Files Files Community

caarleexx commited on Nov 1, 2025

Commit

bc7a0e0

verified ·

1 Parent(s): 3f5d2ec

Update api/ltx_server_refactored.py

Browse files

Files changed (1) hide show

api/ltx_server_refactored.py +169 -0

api/ltx_server_refactored.py CHANGED Viewed

@@ -19,6 +19,14 @@ from pathlib import Path
 from typing import List, Dict, Optional, Tuple, Union
 import cv2
 # --- Configurações de Logging e Avisos ---
 warnings.filterwarnings("ignore", category=UserWarning)
 warnings.filterwarnings("ignore", category=FutureWarning)
@@ -247,6 +255,55 @@ class VideoService:
         RESULTS_DIR.mkdir(exist_ok=True)
         print(f"[INFO] VideoService pronto. Tempo de inicialização: {time.perf_counter()-t0:.2f}s")
     # --------------------------------------------------------------------------
     # --- Métodos Públicos (API do Serviço) ---
     # --------------------------------------------------------------------------
@@ -510,6 +567,8 @@ class VideoService:
         return final_video_path, final_latents_path, used_seed
     def refine_texture_only(
         self,
         latents_path: str,
@@ -518,6 +577,116 @@ class VideoService:
         guidance_scale: float,
         seed: Optional[int] = None,
         conditioning_items: Optional[List[ConditioningItem]] = None
     ) -> Tuple[str, str]:
         """
         ETAPA 2: Refina a textura dos latentes existentes.

 from typing import List, Dict, Optional, Tuple, Union
 import cv2
+# --- GATILHO DE OTIMIZAÇÃO DE MEMÓRIA ---
+# Se "1", "true", ou "yes" (ignorando maiúsculas/minúsculas), ativa o descarregamento de modelos.
+# Por padrão, fica ativado para segurança em ambientes com VRAM limitada.
+ENABLE_MEMORY_OPTIMIZATION = os.getenv("ADUC_MEMORY_OPTIMIZATION", "1").lower() in ["1", "true", "yes"]
 # --- Configurações de Logging e Avisos ---
 warnings.filterwarnings("ignore", category=UserWarning)
 warnings.filterwarnings("ignore", category=FutureWarning)
         RESULTS_DIR.mkdir(exist_ok=True)
         print(f"[INFO] VideoService pronto. Tempo de inicialização: {time.perf_counter()-t0:.2f}s")
+    # --------------------------------------------------------------------------
+    # --- Métodos de Gerenciamento de Memória (com Gatilho) ---
+    # --------------------------------------------------------------------------
+    def _set_generation_environment(self):
+        """Prepara a GPU para tarefas de geração (Transformer)."""
+        if not ENABLE_MEMORY_OPTIMIZATION:
+            if not next(self.pipeline.transformer.parameters()).is_cuda: self.pipeline.transformer.to(self.device)
+            if not next(self.pipeline.text_encoder.parameters()).is_cuda: self.pipeline.text_encoder.to(self.device)
+            if not next(self.pipeline.vae.parameters()).is_cuda: self.pipeline.vae.to(self.device)
+            return
+        print("\n  [VRAM Manager] Otimização ATIVA. Configurando ambiente de GERAÇÃO...")
+        if next(self.pipeline.vae.parameters()).is_cuda:
+            self.pipeline.vae.to('cpu')
+            print("    - Modelo VAE movido para a CPU.")
+        if not next(self.pipeline.transformer.parameters()).is_cuda:
+            self.pipeline.transformer.to(self.device)
+            print("    - Modelo Transformer carregado na GPU.")
+        if not next(self.pipeline.text_encoder.parameters()).is_cuda:
+            self.pipeline.text_encoder.to(self.device)
+            print("    - Modelo Text Encoder carregado na GPU.")
+        torch.cuda.empty_cache()
+        print("  [VRAM Manager] Ambiente de GERAÇÃO pronto.\n")
+    def _set_decode_environment(self):
+        """Prepara a GPU para tarefas de decodificação (VAE)."""
+        if not ENABLE_MEMORY_OPTIMIZATION:
+            return
+        print("\n  [VRAM Manager] Otimização ATIVA. Configurando ambiente de DECODIFICAÇÃO...")
+        if next(self.pipeline.transformer.parameters()).is_cuda:
+            self.pipeline.transformer.to('cpu')
+            print("    - Modelo Transformer movido para a CPU.")
+        if next(self.pipeline.text_encoder.parameters()).is_cuda:
+            self.pipeline.text_encoder.to('cpu')
+            print("    - Modelo Text Encoder movido para a CPU.")
+        if not next(self.pipeline.vae.parameters()).is_cuda:
+            self.pipeline.vae.to(self.device)
+            print("    - Modelo VAE carregado na GPU.")
+        torch.cuda.empty_cache()
+        print("  [VRAM Manager] Ambiente de DECODIFICAÇÃO pronto.\n")
     # --------------------------------------------------------------------------
     # --- Métodos Públicos (API do Serviço) ---
     # --------------------------------------------------------------------------
         return final_video_path, final_latents_path, used_seed
     def refine_texture_only(
         self,
         latents_path: str,
         guidance_scale: float,
         seed: Optional[int] = None,
         conditioning_items: Optional[List[ConditioningItem]] = None
+    ) -> Tuple[str, str]:
+        """
+        Refina e decodifica latentes com gerenciamento explícito de modelos
+        na GPU para máxima performance e robustez ("hot-swap").
+        """
+        print("\n======================================================================")
+        print("====== [INFO] Iniciando ETAPA 2: Refinamento e Decodificação com Hot-Swap ======")
+        print("======================================================================\n")
+        temp_dir = tempfile.mkdtemp(prefix="ltxv_refine_")
+        self._register_tmp_dir(temp_dir)
+        used_seed = random.randint(0, 2**32 - 1) if seed is None else int(seed)
+        # --- FASE 1: GERAÇÃO DE LATENTES (TRABALHO DO TRANSFORMER) ---
+        print("[LOG] FASE 1: Geração de Latentes (Transformer na GPU)")
+        self._set_generation_environment()
+        latents_to_refine = torch.load(latents_path).to(self.device)
+        print(f"  [LOG] Latentes carregados para a GPU. Shape: {latents_to_refine.shape}")
+        with torch.autocast(device_type=self.device.split(':')[0], dtype=self.runtime_autocast_dtype, enabled=(self.device == 'cuda')):
+            refine_height = latents_to_refine.shape[3] * self.pipeline.vae_scale_factor
+            refine_width = latents_to_refine.shape[4] * self.pipeline.vae_scale_factor
+            second_pass_kwargs = {
+                "prompt": prompt, "negative_prompt": negative_prompt, "height": refine_height, "width": refine_width,
+                "frame_rate": int(DEFAULT_FPS), "num_frames": latents_to_refine.shape[2],
+                "latents": latents_to_refine, "guidance_scale": float(guidance_scale), "output_type": "latent",
+                "generator": torch.Generator(device=self.device).manual_seed(used_seed),
+                "conditioning_items": conditioning_items, **(self.config.get("second_pass", {}))
+            }
+            print("  [LOG] Enviando para a pipeline de refinamento (Transformer)...")
+            final_latents = self.pipeline(**second_pass_kwargs).images
+            print(f"  [LOG] [SUCESSO] Latentes refinados. Shape: {final_latents.shape}")
+        print("  [LOG] Geração de latentes concluída. Movendo resultado para a CPU.")
+        final_latents_cpu = final_latents.cpu()
+        del final_latents, latents_to_refine
+        # --- FASE 2: DECODIFICAÇÃO EM CHUNKS (TRABALHO DO VAE) ---
+        print("\n[LOG] FASE 2: Decodificação de Latentes (VAE na GPU)")
+        self._set_decode_environment()
+        total_latents = final_latents_cpu.shape[2]
+        if total_latents <= 10:
+            print(f"  [LOG] Detecção: Vídeo curto ({total_latents} latentes). Usando decodificação direta.")
+            pixel_tensor = vae_manager_singleton.decode(
+                final_latents_cpu.to(self.device),
+                decode_timestep=float(self.config.get("decode_timestep", 0.05))
+            ).cpu()
+        else:
+            print(f"  [LOG] Detecção: Vídeo longo ({total_latents} latentes). Ativando modo de janela deslizante.")
+            sobreposicao = 2
+            tamanho_base = (total_latents - 1) // 3
+            pontos_de_corte = [
+                (0, tamanho_base + sobreposicao),
+                (tamanho_base - sobreposicao, (2 * tamanho_base) + sobreposicao),
+                ((2 * tamanho_base) - sobreposicao, total_latents)
+            ]
+            pixel_chunks_list = []
+            for i, (start, end) in enumerate(pontos_de_corte):
+                latent_chunk = final_latents_cpu[:, :, start:end, :, :]
+                print(f"    -> Decodificando Grupo {i+1} (latentes {start} a {end-1}), shape: {latent_chunk.shape}")
+                pixel_chunk = vae_manager_singleton.decode(
+                    latent_chunk.to(self.device),
+                    decode_timestep=float(self.config.get("decode_timestep", 0.05))
+                )
+                pixel_chunks_list.append(pixel_chunk.cpu())
+                torch.cuda.empty_cache()
+            print("    [LOG] Costurando os vídeos decodificados...")
+            frames_p1 = tamanho_base * 8
+            parte1 = pixel_chunks_list[0][:, :, :frames_p1, :, :]
+            descarte_inicio_p2 = sobreposicao * 8
+            frames_p2 = tamanho_base * 8
+            parte2 = pixel_chunks_list[1][:, :, descarte_inicio_p2 : descarte_inicio_p2 + frames_p2, :, :]
+            descarte_inicio_p3 = sobreposicao * 8
+            parte3 = pixel_chunks_list[2][:, :, descarte_inicio_p3:, :, :]
+            pixel_tensor = torch.cat([parte1, parte2, parte3], dim=2)
+        print(f"\n[LOG] [SUCESSO] Tensor de pixels final montado na CPU com shape: {pixel_tensor.shape}")
+        # --- FASE 3: SALVAMENTO E RESTAURAÇÃO DO AMBIENTE ---
+        print("\n[LOG] FASE 3: Salvamento e Restauração do Ambiente da GPU")
+        video_path_out = self._save_video_from_tensor(pixel_tensor, "refined_video_final", used_seed, temp_dir)
+        latents_path_out = self._save_latents_to_disk(final_latents_cpu, "latents_refined_final", used_seed)
+        print("  [LOG] Tarefa concluída. Restaurando ambiente de GERAÇÃO na GPU para a próxima execução...")
+        self._set_generation_environment()
+        print("  [LOG] Liberando tensores finais da memória da CPU.")
+        del pixel_tensor, final_latents_cpu
+        self._finalize()
+        print("\n======================================================================")
+        print("============ [SUCCESS] ETAPA 2 Concluída com Sucesso =============")
+        print("======================================================================\n")
+        return video_path_out, latents_path_out
+    def refine_texture_only1(
+        self,
+        latents_path: str,
+        prompt: str,
+        negative_prompt: str,
+        guidance_scale: float,
+        seed: Optional[int] = None,
+        conditioning_items: Optional[List[ConditioningItem]] = None
     ) -> Tuple[str, str]:
         """
         ETAPA 2: Refina a textura dos latentes existentes.