Test

Paused

App Files Files Community

eeuuia commited on Oct 12

Commit

655068e

verified ·

1 Parent(s): b521010

Update api/ltx/ltx_aduc_pipeline.py

Browse files

Files changed (1) hide show

api/ltx/ltx_aduc_pipeline.py +232 -140

api/ltx/ltx_aduc_pipeline.py CHANGED Viewed

@@ -1,175 +1,267 @@
 # FILE: api/ltx/ltx_aduc_pipeline.py
-# DESCRIPTION: A unified high-level client for submitting ALL LTX-related jobs (generation and VAE)
-# to the LTXAducManager pool.
 import logging
 import time
-import torch
 import random
-from typing import List, Optional, Tuple, Dict
 from PIL import Image
-from dataclasses import dataclass
-from pathlib import Path
-import sys
-from api.ltx.ltx_utils import load_image_to_tensor_with_resize_and_crop # Importa o helper de ltx_utils
-# O cliente importa o MANAGER para submeter todos os trabalhos.
-from api.ltx.ltx_aduc_manager import ltx_aduc_manager
-# Adiciona o path do LTX-Video para importações de baixo nível e tipos.
-LTX_VIDEO_REPO_DIR = Path("/data/LTX-Video")
 def add_deps_to_path():
     repo_path = str(LTX_VIDEO_REPO_DIR.resolve())
     if repo_path not in sys.path:
         sys.path.insert(0, repo_path)
 add_deps_to_path()
-from ltx_video.pipelines.pipeline_ltx_video import LTXVideoPipeline
-from ltx_video.models.autoencoders.vae_encode import vae_encode, vae_decode
 # ==============================================================================
-# --- DEFINIÇÕES DE ESTRUTURA ---
 # ==============================================================================
-@dataclass
-class LatentConditioningItem:
-    """Estrutura de dados para passar latentes condicionados ao job de geração."""
-    latent_tensor: torch.Tensor
-    media_frame_number: int
-    conditioning_strength: float
-# ==============================================================================
-# --- FUNÇÕES DE TRABALHO (Jobs a serem executados no Pool LTX) ---
-# ==============================================================================
-def _job_encode_media(pipeline: LTXVideoPipeline, autocast_dtype: torch.dtype, pixel_tensor: torch.Tensor) -> torch.Tensor:
-    """Job que usa o VAE do pipeline para codificar um tensor de pixel."""
-    vae = pipeline.vae
-    pixel_tensor_gpu = pixel_tensor.to(vae.device, dtype=vae.dtype)
-    latents = vae_encode(pixel_tensor_gpu, vae, vae_per_channel_normalize=True)
-    return latents.cpu()
-def _job_decode_latent(pipeline: LTXVideoPipeline, autocast_dtype: torch.dtype, latent_tensor: torch.Tensor) -> torch.Tensor:
-    """Job que usa o VAE do pipeline para decodificar um tensor latente."""
-    vae = pipeline.vae
-    latent_tensor_gpu = latent_tensor.to(vae.device, dtype=vae.dtype)
-    pixels = vae_decode(latent_tensor_gpu, vae, is_video=True, vae_per_channel_normalize=True)
-    return pixels.cpu()
-def _job_generate_latent_chunk(pipeline: LTXVideoPipeline, autocast_dtype: torch.dtype, **kwargs) -> torch.Tensor:
-    """Job que usa o pipeline principal para gerar um chunk de vídeo latente."""
-    generator = torch.Generator(device=pipeline.device).manual_seed(kwargs['seed'])
-    pipeline_kwargs = {"generator": generator, "output_type": "latent", **kwargs}
-    with torch.autocast(device_type=pipeline.device.type, dtype=autocast_dtype):
-        latents_raw = pipeline(**pipeline_kwargs).images
-    return latents_raw.cpu()
-# ==============================================================================
-# --- A CLASSE CLIENTE UNIFICADA ---
-# ==============================================================================
-class LtxAducPipeline:
-    """
-    Cliente unificado para orquestrar todas as tarefas LTX, incluindo geração e VAE.
-    """
-    def __init__(self):
-        logging.info("✅ Unified LTX/VAE ADUC Pipeline (Client) initialized.")
-        self.FRAMES_ALIGNMENT = 8
-    def _get_random_seed(self) -> int:
-        return random.randint(0, 2**32 - 1)
-    def _align(self, dim: int, alignment: int = 8) -> int:
-        return ((dim + alignment - 1) // alignment) * alignment
-    # --- Métodos de API para o Orquestrador ---
-    def encode_to_conditioning_items(self, media_list: List, params: List, resolution: Tuple[int, int]) -> List[LatentConditioningItem]:
-        """Converte uma lista de imagens em uma lista de LatentConditioningItem."""
-        pixel_tensors = [load_image_to_tensor_with_resize_and_crop(m, resolution[0], resolution[1]) for m in media_list]
-        items = []
-        for i, pt in enumerate(pixel_tensors):
-            latent_tensor = ltx_aduc_manager.submit_job(_job_encode_media, pixel_tensor=pt)
-            frame_number, strength = params[i]
-            items.append(LatentConditioningItem(
-                latent_tensor=latent_tensor,
-                media_frame_number=frame_number,
-                conditioning_strength=strength
-            ))
-        return items
-    def decode_to_pixels(self, latent_tensor: torch.Tensor) -> torch.Tensor:
-        """Decodifica um tensor latente em um tensor de pixels."""
-        return ltx_aduc_manager.submit_job(_job_decode_latent, latent_tensor=latent_tensor)
-    def generate_latents(
         self,
         prompt_list: List[str],
-        duration_in_seconds: float,
-        common_ltx_args: Dict,
-        initial_conditioning_items: Optional[List[LatentConditioningItem]] = None
-    ) -> Tuple[Optional[torch.Tensor], Optional[int]]:
-        """Gera um vídeo latente completo a partir de uma lista de prompts."""
-        t0 = time.time()
-        logging.info(f"LTX Client received a generation job for {len(prompt_list)} scenes.")
         used_seed = self._get_random_seed()
         num_chunks = len(prompt_list)
-        total_frames = self._align(int(duration_in_seconds * 24))
-        frames_per_chunk_base = total_frames // num_chunks if num_chunks > 0 else total_frames
-        overlap_frames = self._align(9) if num_chunks > 1 else 0
-        final_latents_list = []
-        overlap_condition_item = None
-        for i, chunk_prompt in enumerate(prompt_list):
-            current_conditions = []
-            if i == 0 and initial_conditioning_items:
-                current_conditions.extend(initial_conditioning_items)
-            if overlap_condition_item:
-                current_conditions.append(overlap_condition_item)
-            num_frames_for_chunk = frames_per_chunk_base
-            if i == num_chunks - 1:
-                processed_frames = sum(f.shape[2] for f in final_latents_list)
-                num_frames_for_chunk = total_frames - processed_frames
-            num_frames_for_chunk = self._align(num_frames_for_chunk)
-            if num_frames_for_chunk <= 0: continue
-            job_specific_args = {
-                "prompt": chunk_prompt,
-                "num_frames": num_frames_for_chunk,
-                "seed": used_seed + i,
-                "conditioning_items": current_conditions
-            }
-            final_job_args = {**common_ltx_args, **job_specific_args}
-            chunk_latents = ltx_aduc_manager.submit_job(_job_generate_latent_chunk, **final_job_args)
-            if chunk_latents is None:
-                logging.error(f"Failed to generate latents for scene {i+1}. Aborting.")
-                return None, used_seed
-            if i < num_chunks - 1:
-                overlap_latents = chunk_latents[:, :, -overlap_frames:, :, :].clone()
-                overlap_condition_item = LatentConditioningItem(
-                    latent_tensor=overlap_latents, media_frame_number=0, conditioning_strength=1.0)
-                final_latents_list.append(chunk_latents[:, :, :-overlap_frames, :, :])
-            else:
-                final_latents_list.append(chunk_latents)
-        if not final_latents_list:
-            logging.warning("No latent chunks were generated.")
-            return None, used_seed
-        final_latents = torch.cat(final_latents_list, dim=2)
-        logging.info(f"LTX Client job finished in {time.time() - t0:.2f}s. Final latent shape: {final_latents.shape}")
-        return final_latents, used_seed
-# --- INSTÂNCIA SINGLETON DO CLIENTE ---
-ltx_aduc_pipeline = LtxAducPipeline()

 # FILE: api/ltx/ltx_aduc_pipeline.py
+# DESCRIPTION: Final high-level orchestrator for LTX-Video generation.
+# This version acts as a client to the specialized managers (LTX, VAE),
+# focusing solely on the business logic of video generation workflows.
+import gc
+import json
 import logging
+import os
+import shutil
+import sys
+import tempfile
 import time
+from pathlib import Path
+from typing import Dict, List, Optional, Tuple, Union
 import random
+import torch
+import yaml
+import numpy as np
 from PIL import Image
+# ==============================================================================
+# --- SETUP E IMPORTAÇÕES DO PROJETO ---
+# ==============================================================================
+# Configuração de logging e supressão de warnings
+import warnings
+warnings.filterwarnings("ignore")
+logging.getLogger("huggingface_hub").setLevel(logging.ERROR)
+log_level = os.environ.get("ADUC_LOG_LEVEL", "INFO").upper()
+logging.basicConfig(level=log_level, format='[%(levelname)s] [%(name)s] %(message)s')
+# --- Constantes de Configuração ---
+DEPS_DIR = Path("/data")
+LTX_VIDEO_REPO_DIR = DEPS_DIR / "LTX-Video"
+RESULTS_DIR = Path("/app/output")
+DEFAULT_FPS = 24.0
+FRAMES_ALIGNMENT = 8
+from api.ltx.ltx_utils import seed_everything
+from utils.debug_utils import log_function_io
+# Garante que a biblioteca LTX-Video seja importável
 def add_deps_to_path():
     repo_path = str(LTX_VIDEO_REPO_DIR.resolve())
     if repo_path not in sys.path:
         sys.path.insert(0, repo_path)
 add_deps_to_path()
+# --- Módulos da nossa Arquitetura ---
+try:
+    from api.managers.gpu_manager import gpu_manager
+    from api.ltx.ltx_aduc_manager import ltx_pool_manager, LatentConditioningItem
+    from api.ltx.vae_aduc_pipeline import vae_server_singleton
+    from tools.video_encode_tool import video_encode_tool_singleton
+    except ImportError as e:
+    logging.critical(f"A crucial import from the local API/architecture failed. Error: {e}", exc_info=True)
+    sys.exit(1)
 # ==============================================================================
+# --- CLASSE DE SERVIÇO (O ORQUESTRADOR) ---
 # ==============================================================================
+class VideoService:
+    """
+    Orchestrates the high-level logic of video generation, delegating all
+    low-level tasks to specialized managers and utility modules.
+    """
+    @log_function_io
+    def __init__(self):
+        t0 = time.time()
+        logging.info("Initializing VideoService Orchestrator...")
+        if ltx_pool_manager is None or vae_server_singleton is None:
+            raise RuntimeError("A required manager (LTX or VAE) failed to initialize. Aborting.")
+        self.pipeline = ltx_pool_manager.get_pipeline()
+        self.main_device = self.pipeline.device
+        self.vae_device = self.pipeline.vae.device
+        self.config = ltx_pool_manager.config
+        self._apply_precision_policy()
+        logging.info(f"VideoService ready. Using Main: {self.main_device}, VAE: {self.vae_device}. Startup time: {time.time() - t0:.2f}s")
+    def finalize(self):
+        """Cleans up GPU memory after a generation task."""
+        gc.collect()
+        if torch.cuda.is_available():
+            with torch.cuda.device(self.main_device):
+                torch.cuda.empty_cache()
+            with torch.cuda.device(self.vae_device):
+                torch.cuda.empty_cache()
+            try: torch.cuda.ipc_collect()
+            except Exception: pass
+    # ==========================================================================
+    # --- LÓGICA DE NEGÓCIO: ORQUESTRADOR PÚBLICO UNIFICADO ---
+    # ==========================================================================
+    @log_function_io
+    def generate_low_resolution(
         self,
         prompt_list: List[str],
+        initial_media_items: Optional[List[Tuple[Union[str, Image.Image, torch.Tensor], int, float]]] = None,
+        **kwargs
+    ) -> Tuple[Optional[str], Optional[str], Optional[int]]:
+        """
+        [UNIFIED ORCHESTRATOR] Generates a video from a list of prompts and raw media items.
+        """
+        logging.info("Starting unified low-resolution generation...")
         used_seed = self._get_random_seed()
+        seed_everything(used_seed)
+        logging.info(f"Using randomly generated seed: {used_seed}")
+        if not prompt_list: raise ValueError("Prompt list cannot be empty.")
+        is_narrative = len(prompt_list) > 1
         num_chunks = len(prompt_list)
+        total_frames = self._calculate_aligned_frames(kwargs.get("duration", 4.0))
+        frames_per_chunk = max(FRAMES_ALIGNMENT, (total_frames // num_chunks // FRAMES_ALIGNMENT) * FRAMES_ALIGNMENT)
+        overlap_frames = 9 if is_narrative else 0
+        initial_conditions = []
+        if initial_media_items:
+            logging.info("Delegating to VaeServer to prepare initial conditioning items...")
+            initial_conditions = vae_server_singleton.generate_conditioning_items(
+                media_items=[item[0] for item in initial_media_items],
+                target_frames=[item[1] for item in initial_media_items],
+                strengths=[item[2] for item in initial_media_items],
+                target_resolution=(kwargs['height'], kwargs['width'])
+            )
+        temp_latent_paths = []
+        overlap_condition_item: Optional[LatentConditioningItem] = None
+        try:
+            for i, chunk_prompt in enumerate(prompt_list):
+                logging.info(f"Processing scene {i+1}/{num_chunks}: '{chunk_prompt[:50]}...'")
+                current_frames_base = frames_per_chunk if i < num_chunks - 1 else total_frames - ((num_chunks - 1) * frames_per_chunk)
+                current_frames = current_frames_base + (overlap_frames if i > 0 else 0)
+                current_frames = self._align(current_frames, alignment_rule='n*8+1')
+                current_conditions = initial_conditions if i == 0 else []
+                if overlap_condition_item: current_conditions.append(overlap_condition_item)
+                chunk_latents = self._generate_single_chunk_low(
+                    prompt=chunk_prompt, num_frames=current_frames, seed=used_seed + i,
+                    conditioning_items=current_conditions, **kwargs
+                )
+                if chunk_latents is None: raise RuntimeError(f"Failed to generate latents for scene {i+1}.")
+                if is_narrative and i < num_chunks - 1:
+                    overlap_latents = chunk_latents[:, :, -overlap_frames:, :, :].clone()
+                    overlap_condition_item = LatentConditioningItem(
+                        latent_tensor=overlap_latents.cpu(),
+                        media_frame_number=0,
+                        conditioning_strength=1.0
+                    )
+                if i > 0: chunk_latents = chunk_latents[:, :, overlap_frames:, :, :]
+                chunk_path = RESULTS_DIR / f"temp_chunk_{i}_{used_seed}.pt"
+                torch.save(chunk_latents.cpu(), chunk_path)
+                temp_latent_paths.append(chunk_path)
+            base_filename = "narrative_video" if is_narrative else "single_video"
+            all_tensors_cpu = [torch.load(p) for p in temp_latent_paths]
+            final_latents = torch.cat(all_tensors_cpu, dim=2)
+            video_path, latents_path = self._finalize_generation(final_latents, base_filename, used_seed)
+            return video_path, latents_path, used_seed
+        except Exception as e:
+            logging.error(f"Error during unified generation: {e}", exc_info=True)
+            return None, None, None
+        finally:
+            for path in temp_latent_paths:
+                if path.exists(): path.unlink()
+            self.finalize()
+    # ==========================================================================
+    # --- UNIDADES DE TRABALHO E HELPERS INTERNOS ---
+    # ==========================================================================
+    @log_function_io
+    def _generate_single_chunk_low(self, **kwargs) -> Optional[torch.Tensor]:
+        """[WORKER] Calls the patched LTX pipeline to generate a single chunk of latents."""
+        height_padded, width_padded = (self._align(d) for d in (kwargs['height'], kwargs['width']))
+        downscale_factor = self.config.get("downscale_factor", 0.6666666)
+        vae_scale_factor = self.pipeline.vae_scale_factor
+        downscaled_height = self._align(int(height_padded * downscale_factor), vae_scale_factor)
+        downscaled_width = self._align(int(width_padded * downscale_factor), vae_scale_factor)
+        first_pass_config = self.config.get("first_pass", {}).copy()
+        if kwargs.get("ltx_configs_override"):
+            self._apply_ui_overrides(first_pass_config, kwargs["ltx_configs_override"])
+        pipeline_kwargs = {
+            "prompt": kwargs['prompt'], "negative_prompt": kwargs['negative_prompt'],
+            "height": downscaled_height, "width": downscaled_width, "num_frames": kwargs['num_frames'],
+            "frame_rate": int(DEFAULT_FPS), "generator": torch.Generator(device=self.main_device).manual_seed(kwargs['seed']),
+            "output_type": "latent", "conditioning_items": kwargs['conditioning_items'], **first_pass_config
+        }
+        with torch.autocast(device_type=self.main_device.type, dtype=self.runtime_autocast_dtype, enabled="cuda" in self.main_device.type):
+            latents_raw = self.pipeline(**pipeline_kwargs).images
+        return latents_raw.to(self.main_device)
+    @log_function_io
+    def _finalize_generation(self, final_latents: torch.Tensor, base_filename: str, seed: int) -> Tuple[str, str]:
+        """Delegates final decoding and encoding to specialist services."""
+        logging.info("Finalizing generation: decoding latents and encoding video.")
+        final_latents_path = RESULTS_DIR / f"latents_{base_filename}_{seed}.pt"
+        torch.save(final_latents, final_latents_path)
+        logging.info(f"Final latents saved to: {final_latents_path}")
+        pixel_tensor = vae_server_singleton.decode_to_pixels(
+            final_latents, decode_timestep=float(self.config.get("decode_timestep", 0.05))
+        )
+        video_path = self._save_and_log_video(pixel_tensor, f"{base_filename}_{seed}")
+        return str(video_path), str(final_latents_path)
+    def _apply_ui_overrides(self, config_dict: Dict, overrides: Dict):
+        # (Lógica de overrides da UI)
+        pass
+    def _save_and_log_video(self, pixel_tensor: torch.Tensor, base_filename: str) -> Path:
+        with tempfile.TemporaryDirectory() as temp_dir:
+            temp_path = os.path.join(temp_dir, f"{base_filename}.mp4")
+            video_encode_tool_singleton.save_video_from_tensor(pixel_tensor, temp_path, fps=DEFAULT_FPS)
+            final_path = RESULTS_DIR / f"{base_filename}.mp4"
+            shutil.move(temp_path, final_path)
+            logging.info(f"Video saved successfully to: {final_path}")
+            return final_path
+    def _apply_precision_policy(self):
+        precision = str(self.config.get("precision", "bfloat16")).lower()
+        if precision in ["float8_e4m3fn", "bfloat16"]: self.runtime_autocast_dtype = torch.bfloat16
+        elif precision == "mixed_precision": self.runtime_autocast_dtype = torch.float16
+        else: self.runtime_autocast_dtype = torch.float32
+        logging.info(f"Runtime precision policy set for autocast: {self.runtime_autocast_dtype}")
+    def _align(self, dim: int, alignment: int = FRAMES_ALIGNMENT, alignment_rule: str = 'default') -> int:
+        if alignment_rule == 'n*8+1':
+             return ((dim - 1) // alignment) * alignment + 1
+        return ((dim - 1) // alignment + 1) * alignment
+    def _calculate_aligned_frames(self, duration_s: float, min_frames: int = 1) -> int:
+        num_frames = int(round(duration_s * DEFAULT_FPS))
+        aligned_frames = self._align(num_frames, alignment=FRAMES_ALIGNMENT)
+        return max(aligned_frames, min_frames)
+    def _get_random_seed(self) -> int:
+        return random.randint(0, 2**32 - 1)
+# ==============================================================================
+# --- INSTANCIAÇÃO SINGLETON ---
+# ==============================================================================
+try:
+    video_generation_service = VideoService()
+    logging.info("Global VideoService orchestrator instance created successfully.")
+except Exception as e:
+    logging.critical(f"Failed to initialize VideoService: {e}", exc_info=True)
+    sys.exit(1)