Spaces:

caarleexx
/

Aduc

Paused

App Files Files Community

caarleexx commited on Nov 1, 2025

Commit

cd55bb4

verified ·

1 Parent(s): 854c213

Update api/ltx_server_refactored.py

Browse files

Files changed (1) hide show

api/ltx_server_refactored.py +268 -41

api/ltx_server_refactored.py CHANGED Viewed

@@ -214,6 +214,273 @@ GPU_CONFIG = {
 class VideoService:
     def __init__(self):
         """Inicializa o serviço com 4 workers especializados."""
@@ -265,46 +532,6 @@ class VideoService:
         RESULTS_DIR.mkdir(exist_ok=True)
         print(f"[INFO] VideoService 4-Workers pronto. Tempo: {time.perf_counter()-t0:.2f}s")
-    def _setup_4gpu_workers(self):
-        """Configura 4 workers especializados sem deepcopy."""
-        if self.multi_gpu_enabled:
-            print("[INFO] Distribuindo modelos em 4 workers...")
-            # Workers 0 e 1: Transformer + Text Encenger completos
-            # Mover os modelos completos para cada GPU transformer
-            for i, device in enumerate(self.transformer_devices):
-                print(f"[INFO] Worker {i} (Transformer): {device}")
-                # Para evitar deepcopy, movemos o modelo principal para a primeira GPU
-                # e para as outras usamos o modelo já carregado mas movemos para a GPU
-                if i == 0:
-                    self.pipeline.transformer.to(device)
-                    self.pipeline.text_encoder.to(device)
-                    #self.pipeline.patchifier.to(device)
-                else:
-                    # Para GPUs adicionais, usamos o mesmo modelo mas movemos entre GPUs quando necessário
-                    # Na prática, vamos usar apenas uma GPU transformer por vez
-                    pass
-            # Workers 2 e 3: Apenas VAE
-            # Não usar deepcopy - vamos compartilhar o mesmo VAE entre GPUs
-            for i, device in enumerate(self.vae_devices):
-                print(f"[INFO] Worker {i+2} (VAE): {device}")
-                # Movemos o VAE para a GPU quando for usar
-                # Inicialmente fica na primeira GPU VAE
-                if i == 0:
-                    self.pipeline.vae.to(device)
-            # Upscaler - manter na primeira GPU VAE
-            if self.latent_upsampler:
-                self.latent_upsampler.to(self.vae_devices[0])
-            print("[INFO] Distribuição 4-Workers concluída.")
-        else:
-            # Fallback para single GPU
-            self.pipeline.to(self.device_ltx)
-            if self.latent_upsampler:
-                self.latent_upsampler.to(self.device_ltx)
     def _set_generation_environment(self):
         """Prepara o ambiente para geração (LTX pipeline)."""
         if not ENABLE_MEMORY_OPTIMIZATION:
@@ -594,7 +821,7 @@ class VideoService:
         print(f"[DEBUG] Cond shape={tuple(out.shape)} dtype={out.dtype} device={out.device}")
         return out
-    def generate_low_resolution(
         self,
         prompt: str,
         negative_prompt: str,

+# Adicione/modifique estas configurações no início do arquivo
+PRECISION_CONFIG = {
+    "enable_fp8": False,  # Desabilitar FP8 devido a problemas de compatibilidade
+    "default_dtype": torch.bfloat16 if torch.cuda.is_available() and torch.cuda.is_bf16_supported() else torch.float16,
+    "fallback_dtype": torch.float16
+}
+# Modifique a classe VideoService
+class VideoService:
+    def _get_safe_precision_dtype(self):
+        """Configuração de precisão mais segura para evitar conflitos de dtype."""
+        if not torch.cuda.is_available():
+            return torch.float32
+        # Verificar suporte a bfloat16
+        if torch.cuda.is_bf16_supported():
+            print("[INFO] Usando bfloat16 (suportado pela GPU)")
+            return torch.bfloat16
+        else:
+            print("[INFO] Usando float16 (bfloat16 não suportado)")
+            return torch.float16
+    def _load_models_with_safe_dtype(self):
+        """Carrega modelos com dtype seguro e verifica compatibilidade."""
+        print("[INFO] Carregando modelos com dtype seguro...")
+        # CORREÇÃO: Usar dtype seguro explicitamente
+        torch_dtype = self.runtime_autocast_dtype
+        try:
+            pipeline = LTXVideoPipeline.from_pretrained(
+                "Lightricks/LTX-Video",
+                torch_dtype=torch_dtype,
+                variant="fp8" if PRECISION_CONFIG["enable_fp8"] else None,
+                cache_dir=MODEL_CACHE_DIR
+            )
+        except Exception as e:
+            print(f"[WARNING] Erro ao carregar com {torch_dtype}: {e}")
+            print("[INFO] Tentando carregar com float16...")
+            torch_dtype = torch.float16
+            pipeline = LTXVideoPipeline.from_pretrained(
+                "Lightricks/LTX-Video",
+                torch_dtype=torch_dtype,
+                cache_dir=MODEL_CACHE_DIR
+            )
+        # CORREÇÃO: Verificar e ajustar dtypes dos componentes do modelo
+        self._ensure_consistent_dtypes(pipeline, torch_dtype)
+        # Carregar upscaler com o mesmo dtype
+        latent_upsampler = self._load_latent_upsampler(torch_dtype)
+        return pipeline, latent_upsampler
+    def _ensure_consistent_dtypes(self, pipeline, expected_dtype):
+        """Garante que todos os componentes do pipeline tenham dtypes consistentes."""
+        print("[INFO] Verificando consistência de dtypes...")
+        components = [
+            (pipeline.transformer, "transformer"),
+            (pipeline.text_encoder, "text_encoder"),
+            (pipeline.vae, "vae"),
+            (pipeline.patchifier, "patchifier")
+        ]
+        for component, name in components:
+            if hasattr(component, 'parameters') and next(component.parameters(), None) is not None:
+                actual_dtype = next(component.parameters()).dtype
+                if actual_dtype != expected_dtype:
+                    print(f"[INFO] Convertendo {name} de {actual_dtype} para {expected_dtype}")
+                    component.to(dtype=expected_dtype)
+        print("[INFO] Verificação de dtypes concluída.")
+    def _load_latent_upsampler(self, torch_dtype):
+        """Carrega o latent upscaler com dtype seguro."""
+        try:
+            from ltx_video.models import LatentUpscaler
+            upscaler = LatentUpscaler.from_pretrained(
+                "Lightricks/LTX-Video",
+                subfolder="ltxv-spatial-upscaler-0.9.8",
+                torch_dtype=torch_dtype,
+                cache_dir=MODEL_CACHE_DIR
+            )
+            return upscaler
+        except Exception as e:
+            print(f"[WARNING] Não foi possível carregar o latent upscaler: {e}")
+            return None
+    def _setup_4gpu_workers(self):
+        """Configura 4 workers com verificação de dtype."""
+        if self.multi_gpu_enabled:
+            print("[INFO] Distribuindo modelos em 4 workers...")
+            # Workers 0 e 1: Transformer + Text Encoder
+            for i, device in enumerate(self.transformer_devices):
+                print(f"[INFO] Worker {i} (Transformer): {device}")
+                if i == 0:
+                    self.pipeline.transformer.to(device)
+                    self.pipeline.text_encoder.to(device)
+                    self.pipeline.patchifier.to(device)
+                # Nota: Para multi-worker transformer, precisaríamos de cópias do modelo
+            # Workers 2 e 3: VAE
+            for i, device in enumerate(self.vae_devices):
+                print(f"[INFO] Worker {i+2} (VAE): {device}")
+                if i == 0:
+                    self.pipeline.vae.to(device)
+            # Upscaler
+            if self.latent_upsampler:
+                self.latent_upsampler.to(self.vae_devices[0])
+            print("[INFO] Distribuição 4-Workers concluída.")
+            # CORREÇÃO: Verificar dtypes após mover para GPU
+            self._verify_gpu_dtypes()
+        else:
+            self.pipeline.to(self.device_ltx)
+            if self.latent_upsampler:
+                self.latent_upsampler.to(self.device_ltx)
+    def _verify_gpu_dtypes(self):
+        """Verifica se os dtypes estão consistentes após mover para GPU."""
+        print("[INFO] Verificando dtypes nas GPUs...")
+        components = [
+            (self.pipeline.transformer, "transformer"),
+            (self.pipeline.vae, "vae")
+        ]
+        for component, name in components:
+            if hasattr(component, 'parameters') and next(component.parameters(), None) is not None:
+                param = next(component.parameters())
+                print(f"  {name}: dtype={param.dtype}, device={param.device}")
+        print("[INFO] Verificação de GPU dtypes concluída.")
+    def generate_low_resolution(self, prompt: str, negative_prompt: str,
+                              height: int, width: int, num_frames: int,
+                              guidance_scale: float, seed: Optional[int] = None,
+                              conditioning_items: Optional[List[ConditioningItem]] = None) -> Tuple[str, str, int]:
+        """Geração de baixa resolução com dtype seguro."""
+        print("\n[INFO] Iniciando ETAPA 1: Geração de Baixa Resolução...")
+        self._set_generation_environment()
+        temp_dir = tempfile.mkdtemp(prefix="ltxv_low_")
+        self._register_tmp_dir(temp_dir)
+        used_seed = random.randint(0, 2**32 - 1) if seed is None else int(seed)
+        # Determinar dispositivo
+        if self.multi_gpu_enabled:
+            device = self.transformer_devices[0]
+        else:
+            device = self.device_ltx
+        print(f"  - Usando Seed: {used_seed}")
+        print(f"  - Frames: {num_frames}, Duração: {num_frames/DEFAULT_FPS:.1f}s")
+        print(f"  - Dimensões de Saída: {height}x{width}")
+        print(f"  - Dispositivo: {device}, Dtype: {self.runtime_autocast_dtype}")
+        # CORREÇÃO: Configuração de autocast mais robusta
+        device_type = device.split(':')[0] if ':' in device else device
+        enabled_autocast = 'cuda' in device and self.runtime_autocast_dtype in [torch.float16, torch.bfloat16]
+        print(f"  - Autocast habilitado: {enabled_autocast}")
+        try:
+            with torch.autocast(device_type=device_type,
+                              dtype=self.runtime_autocast_dtype,
+                              enabled=enabled_autocast):
+                first_pass_kwargs = {
+                    "prompt": prompt, "negative_prompt": negative_prompt,
+                    "height": height, "width": width,
+                    "frame_rate": int(DEFAULT_FPS), "num_frames": num_frames,
+                    "guidance_scale": float(guidance_scale),
+                    "output_type": "latent",
+                    "generator": torch.Generator(device=device).manual_seed(used_seed),
+                    "conditioning_items": conditioning_items,
+                    **(self.config.get("first_pass", {}))
+                }
+                print("  - Enviando para a pipeline LTX...")
+                latents = self.pipeline(**first_pass_kwargs).images
+                print(f"  [LOG] Latentes gerados. Shape: {latents.shape}, Dtype: {latents.dtype}")
+        except RuntimeError as e:
+            print(f"[ERROR] Erro durante a geração: {e}")
+            print("[INFO] Tentando fallback para float32...")
+            # Fallback para float32
+            with torch.autocast(device_type=device_type, dtype=torch.float32, enabled=False):
+                first_pass_kwargs = {
+                    "prompt": prompt, "negative_prompt": negative_prompt,
+                    "height": height, "width": width,
+                    "frame_rate": int(DEFAULT_FPS), "num_frames": num_frames,
+                    "guidance_scale": float(guidance_scale),
+                    "output_type": "latent",
+                    "generator": torch.Generator(device=device).manual_seed(used_seed),
+                    "conditioning_items": conditioning_items,
+                    **(self.config.get("first_pass", {}))
+                }
+                latents = self.pipeline(**first_pass_kwargs).images
+        # Resto do método permanece igual...
+        latents_cpu = latents.cpu()
+        del latents
+        torch.cuda.empty_cache()
+        # ... (decodificação e salvamento)
+        latents_path = self._save_latents_to_disk(latents_cpu, "latents_low", used_seed)
+        print("\n[INFO] Decodificando vídeo de baixa resolução...")
+        self._set_decode_environment()
+        # Decodificação (similar ao código anterior)
+        total_latents = latents_cpu.shape[2]
+        pontos_de_corte, segment_sizes = self._calculate_dynamic_cuts(total_latents)
+        if len(pontos_de_corte) == 1:
+            vae_device = self.vae_devices[0] if self.multi_gpu_enabled else self.device_vae
+            latents_for_decode = latents_cpu.to(vae_device)
+            vae_manager = self._get_vae_manager(vae_device)
+            pixel_tensor = vae_manager.decode(
+                latents_for_decode,
+                decode_timestep=float(self.config.get("decode_timestep", 0.05))
+            ).cpu()
+        else:
+            print(f"  [LOG] Decodificação em {len(pontos_de_corte)} chunks...")
+            pixel_chunks_list = []
+            for i, (start, end) in enumerate(pontos_de_corte):
+                start, end = max(0, start), min(total_latents, end)
+                if start >= end:
+                    continue
+                latent_chunk = latents_cpu[:, :, start:end, :, :]
+                vae_device = self.vae_devices[0] if self.multi_gpu_enabled else self.device_vae
+                latent_chunk = latent_chunk.to(vae_device)
+                vae_manager = self._get_vae_manager(vae_device)
+                print(f"    -> Decodificando Grupo {i+1} (latentes {start} a {end-1})")
+                pixel_chunk = vae_manager.decode(
+                    latent_chunk,
+                    decode_timestep=float(self.config.get("decode_timestep", 0.05))
+                )
+                pixel_chunks_list.append(pixel_chunk.cpu())
+                torch.cuda.empty_cache()
+            pixel_tensor = self._stitch_dynamic_chunks(pixel_chunks_list, segment_sizes)
+        video_path = self._save_video_from_tensor(pixel_tensor, "video_low", used_seed, temp_dir)
+        self._set_generation_environment()
+        del latents_cpu
+        self._finalize()
+        print("\n[SUCCESS] Geração de Baixa Resolução Concluída")
+        return video_path, latents_path, used_seed
 class VideoService:
     def __init__(self):
         """Inicializa o serviço com 4 workers especializados."""
         RESULTS_DIR.mkdir(exist_ok=True)
         print(f"[INFO] VideoService 4-Workers pronto. Tempo: {time.perf_counter()-t0:.2f}s")
     def _set_generation_environment(self):
         """Prepara o ambiente para geração (LTX pipeline)."""
         if not ENABLE_MEMORY_OPTIMIZATION:
         print(f"[DEBUG] Cond shape={tuple(out.shape)} dtype={out.dtype} device={out.device}")
         return out
+    def generate_low_resolution1(
         self,
         prompt: str,
         negative_prompt: str,