Spaces:

caarleexx
/

Aduc

Paused

App Files Files Community

caarleexx commited on Nov 1, 2025

Commit

f433ba6

verified ·

1 Parent(s): 9094e95

Upload ltx-video-complete.py

Browse files

Files changed (1) hide show

api/ltx-video-complete.py +1215 -0

api/ltx-video-complete.py ADDED Viewed

	@@ -0,0 +1,1215 @@

+# ==============================================================================
+# ltx_video_service_with_gpu_pools.py
+# VideoService com Multi-GPU Pool Manager Integrado
+# ==============================================================================
+# Arquitetura:
+# - GPU 0 e 1: Pipeline + Upscaler (geração/refinamento de latentes)
+# - GPU 2 e 3: VAE Decode (decodificação de latentes para pixels)
+# ==============================================================================
+import os
+import sys
+import gc
+import yaml
+import time
+import json
+import random
+import shutil
+import warnings
+import tempfile
+import traceback
+import subprocess
+import threading
+import queue
+from pathlib import Path
+from typing import List, Dict, Optional, Tuple, Union
+from dataclasses import dataclass
+from enum import Enum
+import cv2
+import torch
+import torch.nn.functional as F
+import numpy as np
+from PIL import Image
+from einops import rearrange
+from huggingface_hub import hf_hub_download
+from safetensors import safe_open
+# --- Configurações ---
+ENABLE_MEMORY_OPTIMIZATION = os.getenv("ADUC_MEMORY_OPTIMIZATION", "1").lower() in ["1", "true", "yes"]
+warnings.filterwarnings("ignore", category=UserWarning)
+warnings.filterwarnings("ignore", category=FutureWarning)
+from huggingface_hub import logging as hf_logging
+hf_logging.set_verbosity_error()
+# --- Importações de managers ---
+from managers.vae_manager import vae_manager_singleton
+from tools.video_encode_tool import video_encode_tool_singleton
+# --- Constantes Globais ---
+LTXV_DEBUG = True
+LTXV_FRAME_LOG_EVERY = 8
+DEPS_DIR = Path("/data")
+LTX_VIDEO_REPO_DIR = DEPS_DIR / "LTX-Video"
+RESULTS_DIR = Path("/app/output")
+DEFAULT_FPS = 24.0
+# ==============================================================================
+# SETUP E IMPORTAÇÕES DO REPOSITÓRIO
+# ==============================================================================
+def _run_setup_script():
+    """Executa o script setup.py se o repositório LTX-Video não existir."""
+    setup_script_path = "setup.py"
+    if not os.path.exists(setup_script_path):
+        print("[DEBUG] 'setup.py' não encontrado. Pulando clonagem de dependências.")
+        return
+    print(f"[DEBUG] Repositório não encontrado em {LTX_VIDEO_REPO_DIR}. Executando setup.py...")
+    try:
+        subprocess.run([sys.executable, setup_script_path], check=True, capture_output=True, text=True)
+        print("[DEBUG] Script 'setup.py' concluído com sucesso.")
+    except subprocess.CalledProcessError as e:
+        print(f"[ERROR] Falha ao executar 'setup.py' (código {e.returncode}).\nOutput:\n{e.stdout}\n{e.stderr}")
+        sys.exit(1)
+def add_deps_to_path(repo_path: Path):
+    """Adiciona o diretório do repositório ao sys.path para importações locais."""
+    resolved_path = str(repo_path.resolve())
+    if resolved_path not in sys.path:
+        sys.path.insert(0, resolved_path)
+        if LTXV_DEBUG:
+            print(f"[DEBUG] Adicionado ao sys.path: {resolved_path}")
+if not LTX_VIDEO_REPO_DIR.exists():
+    _run_setup_script()
+add_deps_to_path(LTX_VIDEO_REPO_DIR)
+# --- Importações Dependentes do Path Adicionado ---
+from ltx_video.models.autoencoders.vae_encode import un_normalize_latents, normalize_latents
+from ltx_video.pipelines.pipeline_ltx_video import adain_filter_latent
+from ltx_video.models.autoencoders.latent_upsampler import LatentUpsampler
+from ltx_video.pipelines.pipeline_ltx_video import ConditioningItem, LTXVideoPipeline
+from transformers import T5EncoderModel, T5Tokenizer, AutoModelForCausalLM, AutoProcessor, AutoTokenizer
+from ltx_video.models.autoencoders.causal_video_autoencoder import CausalVideoAutoencoder
+from ltx_video.models.transformers.symmetric_patchifier import SymmetricPatchifier
+from ltx_video.models.transformers.transformer3d import Transformer3DModel
+from ltx_video.schedulers.rf import RectifiedFlowScheduler
+from ltx_video.utils.skip_layer_strategy import SkipLayerStrategy
+import ltx_video.pipelines.crf_compressor as crf_compressor
+# ==============================================================================
+# GPU POOL MANAGER - Sistema Multi-GPU
+# ==============================================================================
+class GPUPoolType(Enum):
+    """Tipos de pools de GPU disponíveis"""
+    GENERATION = "generation"  # Pipeline + Upscaler
+    DECODE = "decode"          # VAE Decode
+@dataclass
+class GPUTask:
+    """Representa uma tarefa a ser executada em uma GPU"""
+    task_id: str
+    task_fn: callable
+    args: tuple
+    kwargs: dict
+    result_queue: queue.Queue
+@dataclass
+class GPUWorker:
+    """Representa um worker de GPU individual"""
+    worker_id: int
+    device_id: str
+    pool_type: GPUPoolType
+    thread: Optional[threading.Thread] = None
+    is_busy: bool = False
+class GPUPoolManager:
+    """
+    Gerenciador de pools de GPU para distribuição de tarefas.
+    Arquitetura:
+    - Pool 1 (GENERATION): 2 GPUs para pipeline + upscaler
+    - Pool 2 (DECODE): 2 GPUs para VAE decode
+    """
+    def __init__(
+        self,
+        generation_devices: List[str] = None,
+        decode_devices: List[str] = None,
+        max_queue_size: int = 10
+    ):
+        """Inicializa o gerenciador de pools."""
+        self.generation_devices = generation_devices or ["cuda:0", "cuda:1"]
+        self.decode_devices = decode_devices or ["cuda:2", "cuda:3"]
+        self.generation_queue = queue.Queue(maxsize=max_queue_size)
+        self.decode_queue = queue.Queue(maxsize=max_queue_size)
+        self.generation_workers: List[GPUWorker] = []
+        self.decode_workers: List[GPUWorker] = []
+        self._shutdown = False
+        self._lock = threading.Lock()
+        self.stats = {
+            "generation_tasks_completed": 0,
+            "decode_tasks_completed": 0,
+            "generation_tasks_failed": 0,
+            "decode_tasks_failed": 0,
+        }
+        self._initialize_workers()
+    def _initialize_workers(self):
+        """Inicializa todos os workers de GPU"""
+        print("[GPU Pool Manager] Inicializando workers...")
+        for i, device in enumerate(self.generation_devices):
+            worker = GPUWorker(
+                worker_id=i,
+                device_id=device,
+                pool_type=GPUPoolType.GENERATION
+            )
+            worker.thread = threading.Thread(
+                target=self._worker_loop,
+                args=(worker, self.generation_queue),
+                daemon=True
+            )
+            worker.thread.start()
+            self.generation_workers.append(worker)
+            print(f"  ✓ Generation Worker {i} iniciado em {device}")
+        for i, device in enumerate(self.decode_devices):
+            worker = GPUWorker(
+                worker_id=i,
+                device_id=device,
+                pool_type=GPUPoolType.DECODE
+            )
+            worker.thread = threading.Thread(
+                target=self._worker_loop,
+                args=(worker, self.decode_queue),
+                daemon=True
+            )
+            worker.thread.start()
+            self.decode_workers.append(worker)
+            print(f"  ✓ Decode Worker {i} iniciado em {device}")
+        print(f"[GPU Pool Manager] {len(self.generation_workers)} workers de GERAÇÃO e {len(self.decode_workers)} workers de DECODE ativos.\n")
+    def _worker_loop(self, worker: GPUWorker, task_queue: queue.Queue):
+        """Loop principal de um worker."""
+        print(f"[Worker {worker.worker_id}:{worker.device_id}] Aguardando tarefas ({worker.pool_type.value})...")
+        while not self._shutdown:
+            try:
+                task: GPUTask = task_queue.get(timeout=1.0)
+                with self._lock:
+                    worker.is_busy = True
+                print(f"[Worker {worker.worker_id}:{worker.device_id}] Executando tarefa '{task.task_id}'...")
+                try:
+                    torch.cuda.set_device(worker.device_id)
+                    result = task.task_fn(
+                        worker.device_id,
+                        *task.args,
+                        **task.kwargs
+                    )
+                    task.result_queue.put(("success", result))
+                    with self._lock:
+                        if worker.pool_type == GPUPoolType.GENERATION:
+                            self.stats["generation_tasks_completed"] += 1
+                        else:
+                            self.stats["decode_tasks_completed"] += 1
+                    print(f"[Worker {worker.worker_id}:{worker.device_id}] Tarefa '{task.task_id}' concluída com sucesso.")
+                except Exception as e:
+                    print(f"[Worker {worker.worker_id}:{worker.device_id}] ERRO na tarefa '{task.task_id}': {e}")
+                    import traceback
+                    traceback.print_exc()
+                    task.result_queue.put(("error", str(e)))
+                    with self._lock:
+                        if worker.pool_type == GPUPoolType.GENERATION:
+                            self.stats["generation_tasks_failed"] += 1
+                        else:
+                            self.stats["decode_tasks_failed"] += 1
+                finally:
+                    with self._lock:
+                        worker.is_busy = False
+                    task_queue.task_done()
+                    torch.cuda.empty_cache()
+            except queue.Empty:
+                continue
+    def submit_generation_task(
+        self,
+        task_id: str,
+        task_fn: callable,
+        *args,
+        **kwargs
+    ) -> queue.Queue:
+        """Submete uma tarefa de GERAÇÃO ao pool."""
+        result_queue = queue.Queue(maxsize=1)
+        task = GPUTask(
+            task_id=task_id,
+            task_fn=task_fn,
+            args=args,
+            kwargs=kwargs,
+            result_queue=result_queue
+        )
+        print(f"[GPU Pool Manager] Submetendo tarefa de GERAÇÃO: '{task_id}'")
+        self.generation_queue.put(task)
+        return result_queue
+    def submit_decode_task(
+        self,
+        task_id: str,
+        task_fn: callable,
+        *args,
+        **kwargs
+    ) -> queue.Queue:
+        """Submete uma tarefa de DECODE ao pool."""
+        result_queue = queue.Queue(maxsize=1)
+        task = GPUTask(
+            task_id=task_id,
+            task_fn=task_fn,
+            args=args,
+            kwargs=kwargs,
+            result_queue=result_queue
+        )
+        print(f"[GPU Pool Manager] Submetendo tarefa de DECODE: '{task_id}'")
+        self.decode_queue.put(task)
+        return result_queue
+    def get_result(self, result_queue: queue.Queue, timeout: Optional[float] = None):
+        """Aguarda e retorna o resultado de uma tarefa."""
+        status, result = result_queue.get(timeout=timeout)
+        if status == "error":
+            raise Exception(f"Tarefa falhou: {result}")
+        return result
+    def submit_and_wait_generation(
+        self,
+        task_id: str,
+        task_fn: callable,
+        *args,
+        timeout: Optional[float] = None,
+        **kwargs
+    ):
+        """Submete uma tarefa de geração e aguarda o resultado (bloqueante)."""
+        result_queue = self.submit_generation_task(task_id, task_fn, *args, **kwargs)
+        return self.get_result(result_queue, timeout=timeout)
+    def submit_and_wait_decode(
+        self,
+        task_id: str,
+        task_fn: callable,
+        *args,
+        timeout: Optional[float] = None,
+        **kwargs
+    ):
+        """Submete uma tarefa de decode e aguarda o resultado (bloqueante)."""
+        result_queue = self.submit_decode_task(task_id, task_fn, *args, **kwargs)
+        return self.get_result(result_queue, timeout=timeout)
+    def wait_all(self):
+        """Aguarda todas as tarefas pendentes serem concluídas"""
+        print("[GPU Pool Manager] Aguardando conclusão de todas as tarefas...")
+        self.generation_queue.join()
+        self.decode_queue.join()
+        print("[GPU Pool Manager] Todas as tarefas concluídas.")
+    def get_stats(self) -> dict:
+        """Retorna estatísticas de uso do pool"""
+        with self._lock:
+            return {
+                **self.stats,
+                "generation_queue_size": self.generation_queue.qsize(),
+                "decode_queue_size": self.decode_queue.qsize(),
+                "generation_workers_busy": sum(1 for w in self.generation_workers if w.is_busy),
+                "decode_workers_busy": sum(1 for w in self.decode_workers if w.is_busy),
+            }
+    def print_stats(self):
+        """Imprime estatísticas formatadas"""
+        stats = self.get_stats()
+        print("\n" + "="*60)
+        print("GPU POOL MANAGER - ESTATÍSTICAS")
+        print("="*60)
+        print(f"Generation Pool:")
+        print(f"  - Tarefas Concluídas: {stats['generation_tasks_completed']}")
+        print(f"  - Tarefas Falhadas:   {stats['generation_tasks_failed']}")
+        print(f"  - Workers Ocupados:   {stats['generation_workers_busy']}/{len(self.generation_workers)}")
+        print(f"  - Fila:               {stats['generation_queue_size']} tarefas")
+        print(f"\nDecode Pool:")
+        print(f"  - Tarefas Concluídas: {stats['decode_tasks_completed']}")
+        print(f"  - Tarefas Falhadas:   {stats['decode_tasks_failed']}")
+        print(f"  - Workers Ocupados:   {stats['decode_workers_busy']}/{len(self.decode_workers)}")
+        print(f"  - Fila:               {stats['decode_queue_size']} tarefas")
+        print("="*60 + "\n")
+    def shutdown(self):
+        """Encerra todos os workers"""
+        print("[GPU Pool Manager] Encerrando...")
+        self._shutdown = True
+        for worker in self.generation_workers + self.decode_workers:
+            if worker.thread:
+                worker.thread.join(timeout=5.0)
+        print("[GPU Pool Manager] Encerrado.")
+# Singleton global
+_gpu_pool_manager_instance: Optional[GPUPoolManager] = None
+def get_gpu_pool_manager(
+    generation_devices: List[str] = None,
+    decode_devices: List[str] = None,
+    force_reinit: bool = False
+) -> GPUPoolManager:
+    """Retorna a instância singleton do GPUPoolManager."""
+    global _gpu_pool_manager_instance
+    if _gpu_pool_manager_instance is None or force_reinit:
+        if _gpu_pool_manager_instance and force_reinit:
+            _gpu_pool_manager_instance.shutdown()
+        _gpu_pool_manager_instance = GPUPoolManager(
+            generation_devices=generation_devices,
+            decode_devices=decode_devices
+        )
+    return _gpu_pool_manager_instance
+# ==============================================================================
+# FUNÇÕES AUXILIARES DE PROCESSAMENTO
+# ==============================================================================
+def debug_log(message: str):
+    """Log condicional baseado em LTXV_DEBUG"""
+    if LTXV_DEBUG:
+        print(f"[DEBUG] {message}")
+def load_image_cv2(image_path: str, target_height: int, target_width: int) -> np.ndarray:
+    """Carrega uma imagem usando OpenCV e redimensiona"""
+    image = cv2.imread(image_path)
+    if image is None:
+        raise ValueError(f"Não foi possível carregar a imagem: {image_path}")
+    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
+    image = cv2.resize(image, (target_width, target_height), interpolation=cv2.INTER_LINEAR)
+    return image
+def normalize_image(image: np.ndarray) -> np.ndarray:
+    """Normaliza imagem para [-1, 1]"""
+    image = image.astype(np.float32) / 127.5 - 1.0
+    return image
+def denormalize_image(image: np.ndarray) -> np.ndarray:
+    """Desnormaliza imagem de [-1, 1] para [0, 255]"""
+    image = (image + 1.0) * 127.5
+    return np.clip(image, 0, 255).astype(np.uint8)
+# ==============================================================================
+# CLASSE PRINCIPAL DO SERVIÇO DE VÍDEO COM GPU POOLS
+# ==============================================================================
+class VideoService:
+    """
+    Serviço de Geração de Vídeos com LTX Video e Multi-GPU Pool Manager.
+    Arquitetura de GPUs:
+    - GPU 0 e 1: Pipeline + Upscaler (GENERATION pool)
+    - GPU 2 e 3: VAE Decode (DECODE pool)
+    """
+    def __init__(self):
+        """Inicializa o serviço com GPU Pools"""
+        print("[VideoService] Inicializando com Multi-GPU Pools...")
+        # Inicializa o pool manager
+        self.gpu_pool = get_gpu_pool_manager(
+            generation_devices=["cuda:0", "cuda:1"],
+            decode_devices=["cuda:2", "cuda:3"]
+        )
+        # Carrega configuração
+        self.config = self._load_config("ltxv-13b-0.9.8-distilled-fp8.yaml")
+        # Carrega modelos (template que será clonado para cada GPU)
+        self.pipeline_template, self.latent_upsampler_template = self._load_models_from_hub()
+        # Inicializa pipelines em cada GPU de geração
+        self.generation_models = {}
+        for device in ["cuda:0", "cuda:1"]:
+            self.generation_models[device] = self._clone_pipeline_to_device(device)
+        # Inicializa VAE em cada GPU de decode
+        self.decode_models = {}
+        for device in ["cuda:2", "cuda:3"]:
+            self.decode_models[device] = self._clone_vae_to_device(device)
+        # Configurações de tempo de execução
+        self.runtime_autocast_dtype = self._get_precision_dtype()
+        # Anexa pipeline ao vae_manager_singleton
+        vae_manager_singleton.attach_pipeline(
+            self.pipeline_template,
+            device="cuda:0",
+            autocast_dtype=self.runtime_autocast_dtype
+        )
+        # Rastreamento de seed
+        self.used_seed = None
+        self.tmp_dir = None
+        self._register_tmp_dir()
+        print("[VideoService] Inicializado com sucesso!")
+        print("[VideoService] Pools de GPU ativos:")
+        print("[VideoService]   - Geração: cuda:0, cuda:1")
+        print("[VideoService]   - Decode: cuda:2, cuda:3")
+    def _clone_pipeline_to_device(self, device: str) -> Dict:
+        """Clona a pipeline para um dispositivo específico"""
+        print(f"  Clonando pipeline para {device}...")
+        pipeline = {
+            'transformer': self.pipeline_template.transformer.to(device),
+            'text_encoder': self.pipeline_template.text_encoder.to(device),
+            'scheduler': self.pipeline_template.scheduler,
+            'tokenizer': self.pipeline_template.tokenizer,
+            'patchifier': self.pipeline_template.patchifier,
+        }
+        if self.latent_upsampler_template:
+            pipeline['upsampler'] = self.latent_upsampler_template.to(device)
+        return pipeline
+    def _clone_vae_to_device(self, device: str) -> torch.nn.Module:
+        """Clona o VAE para um dispositivo específico"""
+        print(f"  Clonando VAE para {device}...")
+        vae = self.pipeline_template.vae.to(device)
+        vae.eval()
+        return vae
+    # ==============================================================================
+    # FUNÇÕES WORKER PARA POOL MANAGER
+    # ==============================================================================
+    def _generate_latents_worker(
+        self,
+        device_id: str,
+        prompt: str,
+        negative_prompt: str,
+        height: int,
+        width: int,
+        num_frames: int,
+        guidance_scale: float,
+        seed: int,
+        conditioning_items: Optional[List] = None
+    ) -> torch.Tensor:
+        """Worker para geração de latentes (roda em cuda:0 ou cuda:1)"""
+        print(f"    [Generation Worker] Gerando latentes em {device_id}")
+        generator = torch.Generator(device=device_id).manual_seed(seed)
+        with torch.autocast(device_type='cuda', dtype=self.runtime_autocast_dtype):
+            kwargs = {
+                "prompt": prompt,
+                "negative_prompt": negative_prompt,
+                "height": height,
+                "width": width,
+                "num_frames": num_frames,
+                "frame_rate": int(DEFAULT_FPS),
+                "generator": generator,
+                "output_type": "latent",
+                "guidance_scale": float(guidance_scale),
+                "conditioning_items": conditioning_items,
+                **self.config.get("first_pass", {})
+            }
+            latents = self.pipeline_template(**kwargs).images
+        # Aplica upsampler se disponível
+        if 'upsampler' in self.generation_models[device_id]:
+            latents = self._upsample_and_filter_latents(
+                latents,
+                self.generation_models[device_id]['upsampler'],
+                device_id
+            )
+        return latents.cpu()
+    def _refine_latents_worker(
+        self,
+        device_id: str,
+        latents: torch.Tensor,
+        prompt: str,
+        negative_prompt: str,
+        guidance_scale: float,
+        seed: int,
+        conditioning_items: Optional[List] = None
+    ) -> torch.Tensor:
+        """Worker para refinamento de latentes (roda em cuda:0 ou cuda:1)"""
+        print(f"    [Refine Worker] Refinando latentes em {device_id}")
+        latents = latents.to(device_id)
+        with torch.autocast(device_type='cuda', dtype=self.runtime_autocast_dtype):
+            refine_height = latents.shape[3] * 8  # vae_scale_factor
+            refine_width = latents.shape[4] * 8
+            kwargs = {
+                "prompt": prompt,
+                "negative_prompt": negative_prompt,
+                "height": refine_height,
+                "width": refine_width,
+                "frame_rate": int(DEFAULT_FPS),
+                "num_frames": latents.shape[2],
+                "latents": latents,
+                "guidance_scale": float(guidance_scale),
+                "output_type": "latent",
+                "generator": torch.Generator(device=device_id).manual_seed(seed),
+                "conditioning_items": conditioning_items,
+                **self.config.get("second_pass", {})
+            }
+            refined_latents = self.pipeline_template(**kwargs).images
+        return refined_latents.cpu()
+    def _decode_latents_worker(
+        self,
+        device_id: str,
+        latents: torch.Tensor,
+        decode_timestep: float = 0.05
+    ) -> torch.Tensor:
+        """Worker para decodificação de latentes (roda em cuda:2 ou cuda:3)"""
+        print(f"    [Decode Worker] Decodificando em {device_id} (shape: {latents.shape})")
+        latents = latents.to(device_id)
+        vae = self.decode_models[device_id]
+        with torch.no_grad():
+            with torch.autocast(device_type='cuda', dtype=self.runtime_autocast_dtype):
+                pixel_tensor = vae_manager_singleton.decode(
+                    latents,
+                    decode_timestep=decode_timestep
+                )
+        return pixel_tensor.cpu()
+    # ==============================================================================
+    # MÉTODOS DE PREPARAÇÃO DE DADOS
+    # ==============================================================================
+    def _load_image_to_tensor_with_resize_and_crop(
+        self,
+        image_path: str,
+        target_height: int,
+        target_width: int,
+        padding_values: tuple = (0, 0, 0)
+    ) -> torch.Tensor:
+        """Carrega uma imagem, redimensiona e converte para tensor"""
+        image = load_image_cv2(image_path, target_height, target_width)
+        image = normalize_image(image)
+        tensor = torch.from_numpy(image).permute(2, 0, 1).unsqueeze(0).float()
+        return tensor
+    def _prepare_conditioning_tensor(
+        self,
+        image_path: str,
+        target_height: int,
+        target_width: int,
+        padding_values: tuple = (0, 0, 0)
+    ) -> torch.Tensor:
+        """Prepara tensor de condicionamento de uma imagem"""
+        return self._load_image_to_tensor_with_resize_and_crop(
+            image_path,
+            target_height,
+            target_width,
+            padding_values
+        )
+    def _prepare_conditioning_tensor_from_path(self, image_path: str) -> torch.Tensor:
+        """Prepara tensor de condicionamento com resolução padrão"""
+        return self._prepare_conditioning_tensor(image_path, 512, 768, (0, 0, 0))
+    # ==============================================================================
+    # MÉTODOS DE CÁLCULO E PROCESSAMENTO
+    # ==============================================================================
+    def _calculate_downscaled_dims(self, height: int, width: int) -> Tuple[int, int]:
+        """Calcula dimensões reduzidas para primeira passagem"""
+        downscale_factor = 4
+        return height // downscale_factor, width // downscale_factor
+    def _calculate_dynamic_cuts(
+        self,
+        total_latents: int,
+        min_chunk_size: int = 8,
+        overlap: int = 2
+    ) -> Tuple[List[Tuple[int, int]], List[int]]:
+        """Calcula pontos de corte dinâmicos para chunks com overlap"""
+        cut_points = []
+        segment_sizes = []
+        start = 0
+        while start < total_latents:
+            end = min(start + min_chunk_size, total_latents)
+            cut_points.append((start, end))
+            segment_sizes.append(end - start)
+            if end >= total_latents:
+                break
+            start = end - overlap
+        return cut_points, segment_sizes
+    def _split_latents_with_overlap(
+        self,
+        latents: torch.Tensor,
+        chunk_size: int = 8,
+        overlap: int = 2
+    ) -> List[torch.Tensor]:
+        """Divide latentes em chunks com overlap"""
+        chunks = []
+        start = 0
+        total_frames = latents.shape[2]
+        while start < total_frames:
+            end = min(start + chunk_size, total_frames)
+            chunk = latents[:, :, start:end, :, :]
+            chunks.append(chunk)
+            if end >= total_frames:
+                break
+            start = end - overlap
+        return chunks
+    def _merge_chunks_with_overlap(
+        self,
+        chunks: List[torch.Tensor],
+        overlap: int = 2
+    ) -> torch.Tensor:
+        """Costura chunks removendo overlap"""
+        if len(chunks) == 1:
+            return chunks[0]
+        overlap_pixels = overlap * 8  # 8 = VAE scale factor
+        result_parts = [chunks[0][:, :, :-overlap_pixels, :, :]]
+        for chunk in chunks[1:-1]:
+            result_parts.append(chunk[:, :, overlap_pixels:-overlap_pixels, :, :])
+        if len(chunks) > 1:
+            result_parts.append(chunks[-1][:, :, overlap_pixels:, :, :])
+        return torch.cat(result_parts, dim=2)
+    def _stitch_dynamic_chunks(
+        self,
+        pixel_chunks: List[torch.Tensor],
+        segment_sizes: List[int],
+        macro_overlap: int = 2
+    ) -> torch.Tensor:
+        """Costura chunks dinâmicos com tratamento de overlap"""
+        if len(pixel_chunks) == 1:
+            return pixel_chunks[0]
+        overlap_frames = macro_overlap * 8
+        stitched_parts = []
+        for i, chunk in enumerate(pixel_chunks):
+            if i == 0:
+                stitched_parts.append(chunk[:, :, :-overlap_frames, :, :])
+            elif i == len(pixel_chunks) - 1:
+                stitched_parts.append(chunk[:, :, overlap_frames:, :, :])
+            else:
+                stitched_parts.append(chunk[:, :, overlap_frames:-overlap_frames, :, :])
+        return torch.cat(stitched_parts, dim=2)
+    def _upsample_and_filter_latents(
+        self,
+        latents: torch.Tensor,
+        upsampler: torch.nn.Module,
+        device: str
+    ) -> torch.Tensor:
+        """Aplica upsampler e filtro aos latentes"""
+        latents = latents.to(device)
+        with torch.no_grad():
+            with torch.autocast(device_type='cuda', dtype=self.runtime_autocast_dtype):
+                upsampled = upsampler(latents)
+                filtered = adain_filter_latent(upsampled, latents)
+        return filtered
+    # ==============================================================================
+    # MÉTODOS DE GERAÇÃO E REFINAMENTO (USANDO POOL MANAGER)
+    # ==============================================================================
+    def generate_low_resolution(
+        self,
+        prompt: str,
+        negative_prompt: str,
+        height: int,
+        width: int,
+        duration_secs: float,
+        guidance_scale: float,
+        seed: Optional[int] = None,
+        image_filepaths: Optional[List[str]] = None
+    ) -> Tuple[str, int]:
+        """Gera vídeo em baixa resolução usando pool de geração"""
+        print("[INFO] Iniciando geração em baixa resolução (modo paralelo)...")
+        used_seed = seed or random.randint(0, 2**32 - 1)
+        self._seed_everething(used_seed)
+        actual_num_frames = int(round(duration_secs * DEFAULT_FPS))
+        downscaled_height, downscaled_width = self._calculate_downscaled_dims(height, width)
+        conditioning_items = []
+        if image_filepaths:
+            for filepath in image_filepaths:
+                cond_tensor = self._prepare_conditioning_tensor(
+                    filepath,
+                    downscaled_height,
+                    downscaled_width,
+                    (0, 0, 0)
+                )
+                conditioning_items.append(ConditioningItem(cond_tensor, 0, 1.0))
+        # Submete tarefa de geração ao pool
+        task_id = f"gen_lowres_{used_seed}"
+        latents = self.gpu_pool.submit_and_wait_generation(
+            task_id=task_id,
+            task_fn=self._generate_latents_worker,
+            prompt=prompt,
+            negative_prompt=negative_prompt,
+            height=downscaled_height,
+            width=downscaled_width,
+            num_frames=(actual_num_frames // 8) + 1,
+            guidance_scale=guidance_scale,
+            seed=used_seed,
+            conditioning_items=conditioning_items if conditioning_items else None,
+            timeout=600
+        )
+        tensor_path = self._save_latents_to_disk(latents, "latents_low_res", used_seed)
+        print("[SUCCESS] Geração de baixa resolução concluída!")
+        self.used_seed = used_seed
+        return tensor_path, used_seed
+    def refine_texture_only(
+        self,
+        latents_path: str,
+        prompt: str,
+        negative_prompt: str,
+        guidance_scale: float,
+        seed: int,
+        image_filepaths: Optional[List[str]] = None,
+        macro_chunk_size: int = 8,
+        macro_overlap: int = 2
+    ) -> Tuple[str, str, torch.Tensor]:
+        """Refina e decodifica latentes usando ambos os pools em paralelo"""
+        print("[INFO] Iniciando refinamento e decodificação paralela...")
+        initial_latents = torch.load(latents_path).cpu()
+        total_latents = initial_latents.shape[2]
+        height = initial_latents.shape[3] * 8
+        width = initial_latents.shape[4] * 8
+        cut_points, segment_sizes = self._calculate_dynamic_cuts(
+            total_latents,
+            min_chunk_size=macro_chunk_size,
+            overlap=macro_overlap
+        )
+        print(f"  Processando {len(cut_points)} chunks em paralelo...")
+        # Prepara conditioning se fornecido
+        conditioning_items = []
+        if image_filepaths:
+            for filepath in image_filepaths:
+                cond_tensor = self._prepare_conditioning_tensor(
+                    filepath,
+                    height,
+                    width,
+                    (0, 0, 0)
+                )
+                conditioning_items.append(ConditioningItem(cond_tensor, 0, 1.0))
+        pixel_results = []
+        for i, (start, end) in enumerate(cut_points):
+            chunk_id = f"chunk_{i}_seed_{seed}"
+            latent_chunk = initial_latents[:, :, start:end, :, :]
+            # ETAPA 1: Refinar latentes (pool de geração)
+            print(f"\n  [{i+1}/{len(cut_points)}] Refinando chunk {start}-{end}...")
+            refined_latents = self.gpu_pool.submit_and_wait_generation(
+                task_id=f"refine_{chunk_id}",
+                task_fn=self._refine_latents_worker,
+                latents=latent_chunk,
+                prompt=prompt,
+                negative_prompt=negative_prompt,
+                guidance_scale=guidance_scale,
+                seed=seed + i,
+                conditioning_items=conditioning_items if conditioning_items else None,
+                timeout=600
+            )
+            # ETAPA 2: Decodificar latentes (pool de decode)
+            print(f"  [{i+1}/{len(cut_points)}] Decodificando chunk {start}-{end}...")
+            pixel_tensor = self.gpu_pool.submit_and_wait_decode(
+                task_id=f"decode_{chunk_id}",
+                task_fn=self._decode_latents_worker,
+                latents=refined_latents,
+                decode_timestep=float(self.config.get("decode_timestep", 0.05)),
+                timeout=300
+            )
+            pixel_results.append(pixel_tensor)
+            del refined_latents
+            torch.cuda.empty_cache()
+        # Costura resultados
+        print("\n  Costurando chunks finais...")
+        final_pixel_tensor = self._stitch_dynamic_chunks(
+            pixel_results,
+            segment_sizes,
+            macro_overlap
+        )
+        final_video_path = self._save_video_from_tensor(
+            final_pixel_tensor,
+            "final_video",
+            seed
+        )
+        print(f"[SUCCESS] Vídeo final salvo em: {final_video_path}")
+        self.gpu_pool.print_stats()
+        return final_video_path, latents_path, final_pixel_tensor
+    def apply_secondary_refinement(
+        self,
+        initial_latents_path: str,
+        prompt: str,
+        negative_prompt: str,
+        guidance_scale: float,
+        seed: int,
+        image_filepaths: Optional[List[str]] = None
+    ) -> str:
+        """Aplica refinamento secundário em múltiplos chunks"""
+        print("[INFO] Aplicando refinamento secundário...")
+        initial_latents = torch.load(initial_latents_path).cpu()
+        total_latents = initial_latents.shape[2]
+        # Divide em chunks maiores
+        macro_chunk_size = 16
+        macro_overlap = 2
+        cut_points, segment_sizes = self._calculate_dynamic_cuts(
+            total_latents,
+            min_chunk_size=macro_chunk_size,
+            overlap=macro_overlap
+        )
+        height = initial_latents.shape[3] * 8
+        width = initial_latents.shape[4] * 8
+        conditioning_items = []
+        if image_filepaths:
+            for filepath in image_filepaths:
+                cond_tensor = self._prepare_conditioning_tensor(
+                    filepath, height, width, (0, 0, 0)
+                )
+                conditioning_items.append(ConditioningItem(cond_tensor, 0, 1.0))
+        print(f"  Refinando {len(cut_points)} chunks...")
+        # Submete TODAS as tarefas de refinamento
+        refine_queues = []
+        for i, (start, end) in enumerate(cut_points):
+            latent_chunk = initial_latents[:, :, start:end, :, :]
+            queue = self.gpu_pool.submit_generation_task(
+                task_id=f"refine_macro_{i}",
+                task_fn=self._refine_latents_worker,
+                latents=latent_chunk,
+                prompt=prompt,
+                negative_prompt=negative_prompt,
+                guidance_scale=guidance_scale,
+                seed=seed + i,
+                conditioning_items=conditioning_items if conditioning_items else None
+            )
+            refine_queues.append((i, queue))
+        # Processa decodes conforme refinamentos ficam prontos
+        print(f"\n  Decodificando chunks refinados...")
+        decode_queues = []
+        for i, refine_queue in refine_queues:
+            refined_latents = self.gpu_pool.get_result(refine_queue, timeout=600)
+            print(f"    ✓ Chunk {i} refinado")
+            decode_queue = self.gpu_pool.submit_decode_task(
+                task_id=f"decode_macro_{i}",
+                task_fn=self._decode_latents_worker,
+                latents=refined_latents,
+                decode_timestep=float(self.config.get("decode_timestep", 0.05))
+            )
+            decode_queues.append((i, decode_queue))
+        # Aguarda todos os decodes
+        print(f"\n  Aguardando conclusão de todos os decodes...")
+        pixel_results = []
+        for i, decode_queue in decode_queues:
+            pixel_tensor = self.gpu_pool.get_result(decode_queue, timeout=300)
+            pixel_results.append(pixel_tensor)
+            print(f"    ✓ Chunk {i} decodificado")
+        # Costura resultados finais
+        print(f"\n  Costurando resultado final...")
+        final_pixel_tensor = self._stitch_dynamic_chunks(
+            pixel_results,
+            segment_sizes,
+            macro_overlap
+        )
+        final_video_path = self._save_video_from_tensor(
+            final_pixel_tensor,
+            "refined_final_video",
+            seed
+        )
+        print(f"[SUCCESS] Vídeo refinado salvo em: {final_video_path}")
+        self.gpu_pool.print_stats()
+        return final_video_path
+    def encode_latents_to_mp4(
+        self,
+        pixel_tensor: torch.Tensor,
+        output_path: str,
+        fps: float = 24.0
+    ) -> str:
+        """Codifica tensor de pixels em arquivo MP4"""
+        print(f"[INFO] Codificando vídeo para MP4: {output_path}")
+        # Desnormaliza
+        pixel_tensor = (pixel_tensor + 1.0) / 2.0 * 255.0
+        pixel_tensor = torch.clamp(pixel_tensor, 0, 255)
+        # Converte para formato de vídeo
+        video_encode_tool_singleton.encode_video_from_tensor(
+            pixel_tensor,
+            output_path,
+            fps=fps
+        )
+        print(f"[SUCCESS] Vídeo codificado: {output_path}")
+        return output_path
+    # ==============================================================================
+    # MÉTODOS DE CONFIGURAÇÃO E CARREGAMENTO
+    # ==============================================================================
+    def _load_config(self, config_file: str) -> Dict:
+        """Carrega configuração YAML"""
+        config_path = LTX_VIDEO_REPO_DIR / "configs" / config_file
+        if not config_path.exists():
+            print(f"[WARNING] Arquivo de config não encontrado: {config_path}")
+            return {}
+        with open(config_path, "r") as f:
+            config = yaml.safe_load(f)
+        return config or {}
+    def _load_models_from_hub(self) -> Tuple[LTXVideoPipeline, Optional[LatentUpsampler]]:
+        """Carrega modelos do Hugging Face Hub"""
+        print("[INFO] Carregando modelos do Hub...")
+        # Carrega pipeline
+        pipeline = LTXVideoPipeline.from_pretrained(
+            "Lightricks/LTX-Video",
+            torch_dtype=torch.bfloat16
+        )
+        # Carrega upsampler (opcional)
+        try:
+            upsampler = LatentUpsampler.from_pretrained(
+                "Lightricks/LTX-Video",
+                torch_dtype=torch.bfloat16
+            )
+        except Exception as e:
+            print(f"[WARNING] Upsampler não disponível: {e}")
+            upsampler = None
+        print("[SUCCESS] Modelos carregados com sucesso!")
+        return pipeline, upsampler
+    def _move_models_to_device(self):
+        """Move modelos para dispositivo principal (não usado com pools)"""
+        # Implementado no _clone_pipeline_to_device
+        pass
+    def _get_precision_dtype(self) -> torch.dtype:
+        """Retorna tipo de dados de precisão baseado em disponibilidade"""
+        if torch.cuda.is_available():
+            device_props = torch.cuda.get_device_properties(0)
+            if device_props.major >= 8:  # A100, H100, etc.
+                return torch.bfloat16
+        return torch.float16
+    # ==============================================================================
+    # MÉTODOS AUXILIARES DE SALVAMENTO E GERENCIAMENTO
+    # ==============================================================================
+    def _save_latents_to_disk(
+        self,
+        latents: torch.Tensor,
+        prefix: str,
+        seed: int
+    ) -> str:
+        """Salva latentes em arquivo .pt"""
+        filename = f"{prefix}_{seed}.pt"
+        filepath = self.tmp_dir / filename
+        torch.save(latents, filepath)
+        print(f"  Latentes salvos: {filepath}")
+        return str(filepath)
+    def _save_video_from_tensor(
+        self,
+        pixel_tensor: torch.Tensor,
+        prefix: str,
+        seed: int
+    ) -> str:
+        """Salva tensor de pixels como vídeo MP4"""
+        filename = f"{prefix}_{seed}.mp4"
+        filepath = RESULTS_DIR / filename
+        RESULTS_DIR.mkdir(parents=True, exist_ok=True)
+        self.encode_latents_to_mp4(pixel_tensor, str(filepath), fps=DEFAULT_FPS)
+        print(f"  Vídeo salvo: {filepath}")
+        return str(filepath)
+    def _finalize(self):
+        """Finaliza o serviço e libera recursos"""
+        print("[INFO] Finalizando VideoService...")
+        self.gpu_pool.print_stats()
+        self.gpu_pool.shutdown()
+        if self.tmp_dir and self.tmp_dir.exists():
+            shutil.rmtree(self.tmp_dir)
+            print(f"  Diretório temporário removido: {self.tmp_dir}")
+        # Limpa memória CUDA
+        torch.cuda.empty_cache()
+        gc.collect()
+        print("[SUCCESS] VideoService finalizado!")
+    def _seed_everething(self, seed: int):
+        """Define seed para reproducibilidade"""
+        random.seed(seed)
+        np.random.seed(seed)
+        torch.manual_seed(seed)
+        torch.cuda.manual_seed_all(seed)
+    def _register_tmp_dir(self):
+        """Registra diretório temporário para salvamento de latentes"""
+        self.tmp_dir = Path(tempfile.mkdtemp(prefix="ltx_video_"))
+        print(f"  Diretório temporário: {self.tmp_dir}")
+# ==============================================================================
+# PONTO DE ENTRADA E EXEMPLO DE USO
+# ==============================================================================
+if __name__ == "__main__":
+    print("\n" + "="*80)
+    print("LTX VIDEO SERVICE - Multi-GPU Pool Manager")
+    print("="*80 + "\n")
+    try:
+        # Inicializa o serviço
+        print("Criando instância do VideoService...")
+        video_service = VideoService()
+        # Exemplo 1: Geração de baixa resolução
+        print("\n[EXEMPLO 1] Geração de baixa resolução...")
+        latents_path, seed = video_service.generate_low_resolution(
+            prompt="A beautiful sunset over the ocean",
+            negative_prompt="blurry, low quality",
+            height=512,
+            width=768,
+            duration_secs=2.0,
+            guidance_scale=3.0,
+            seed=42,
+            image_filepaths=None
+        )
+        # Exemplo 2: Refinamento e decodificação
+        print("\n[EXEMPLO 2] Refinamento e decodificação...")
+        video_path, latents_path, final_tensor = video_service.refine_texture_only(
+            latents_path=latents_path,
+            prompt="A beautiful sunset over the ocean",
+            negative_prompt="blurry, low quality",
+            guidance_scale=3.0,
+            seed=seed,
+            image_filepaths=None,
+            macro_chunk_size=8,
+            macro_overlap=2
+        )
+        print(f"\n✓ Vídeo final gerado: {video_path}")
+    except KeyboardInterrupt:
+        print("\n\n[INFO] Interrompido pelo usuário.")
+    except Exception as e:
+        print(f"\n\n[ERROR] Erro na execução: {e}")
+        import traceback
+        traceback.print_exc()
+    finally:
+        if 'video_service' in locals():
+            video_service._finalize()
+    print("\n" + "="*80)
+    print("Execução concluída")
+    print("="*80 + "\n")