Spaces:

caarleexx
/

Aduc

Paused

App Files Files Community

caarleexx commited on Nov 1, 2025

Commit

9094e95

verified ·

1 Parent(s): 232cc6e

Delete api/ltx_server_refactored.py

Browse files

Files changed (1) hide show

api/ltx_server_refactored.py +0 -1388

api/ltx_server_refactored.py DELETED Viewed

@@ -1,1388 +0,0 @@
-# ltx_server_clean_refactor.py — VideoService (Modular Version with Simple Overlap Chunking)
-# ==============================================================================
-# 0. CONFIGURAÇÃO DE AMBIENTE E IMPORTAÇÕES
-# ==============================================================================
-import os
-import sys
-import gc
-import yaml
-import time
-import json
-import copy
-import random
-import shutil
-import warnings
-import tempfile
-import traceback
-import subprocess
-from pathlib import Path
-from typing import List, Dict, Optional, Tuple, Union
-import cv2
-ENABLE_MEMORY_OPTIMIZATION = os.getenv("ADUC_MEMORY_OPTIMIZATION", "1").lower() in ["1", "true", "yes"]
-# --- Configurações de Logging e Avisos ---
-warnings.filterwarnings("ignore", category=UserWarning)
-warnings.filterwarnings("ignore", category=FutureWarning)
-from huggingface_hub import logging as hf_logging
-hf_logging.set_verbosity_error()
-# --- Importações de Bibliotecas de ML/Processamento ---
-import torch
-import torch.nn.functional as F
-import numpy as np
-from PIL import Image
-from einops import rearrange
-from huggingface_hub import hf_hub_download
-from safetensors import safe_open
-from managers.vae_manager import vae_manager_singleton
-from tools.video_encode_tool import video_encode_tool_singleton
-# --- Constantes Globais ---
-LTXV_DEBUG = True  # Mude para False para desativar logs de debug
-LTXV_FRAME_LOG_EVERY = 8
-DEPS_DIR = Path("/data")
-LTX_VIDEO_REPO_DIR = DEPS_DIR / "LTX-Video"
-RESULTS_DIR = Path("/app/output")
-DEFAULT_FPS = 24.0
-def add_deps_to_path(repo_path: Path):
-    """Adiciona o diretório do repositório ao sys.path para importações locais."""
-    resolved_path = str(repo_path.resolve())
-    if resolved_path not in sys.path:
-        sys.path.insert(0, resolved_path)
-add_deps_to_path(LTX_VIDEO_REPO_DIR)
-# --- Importações Dependentes do Path Adicionado ---
-from ltx_video.models.autoencoders.vae_encode import un_normalize_latents, normalize_latents
-from ltx_video.pipelines.pipeline_ltx_video import adain_filter_latent
-from ltx_video.models.autoencoders.latent_upsampler import LatentUpsampler
-from ltx_video.pipelines.pipeline_ltx_video import ConditioningItem, LTXVideoPipeline
-from transformers import T5EncoderModel, T5Tokenizer, AutoModelForCausalLM, AutoProcessor, AutoTokenizer
-from ltx_video.models.autoencoders.causal_video_autoencoder import CausalVideoAutoencoder
-from ltx_video.models.transformers.symmetric_patchifier import SymmetricPatchifier
-from ltx_video.models.transformers.transformer3d import Transformer3DModel
-from ltx_video.schedulers.rf import RectifiedFlowScheduler
-from ltx_video.utils.skip_layer_strategy import SkipLayerStrategy
-import ltx_video.pipelines.crf_compressor as crf_compressor
-def create_latent_upsampler(latent_upsampler_model_path: str, device: str):
-    latent_upsampler = LatentUpsampler.from_pretrained(latent_upsampler_model_path)
-    latent_upsampler.to(device)
-    latent_upsampler.eval()
-    return latent_upsampler
-def create_ltx_video_pipeline(
-    ckpt_path: str,
-    precision: str,
-    text_encoder_model_name_or_path: str,
-    sampler: Optional[str] = None,
-    device: Optional[str] = None,
-    enhance_prompt: bool = False,
-    prompt_enhancer_image_caption_model_name_or_path: Optional[str] = None,
-    prompt_enhancer_llm_model_name_or_path: Optional[str] = None,
-) -> LTXVideoPipeline:
-    ckpt_path = Path(ckpt_path)
-    assert os.path.exists(
-        ckpt_path
-    ), f"Ckpt path provided (--ckpt_path) {ckpt_path} does not exist"
-    with safe_open(ckpt_path, framework="pt") as f:
-        metadata = f.metadata()
-        config_str = metadata.get("config")
-        configs = json.loads(config_str)
-        allowed_inference_steps = configs.get("allowed_inference_steps", None)
-    vae = CausalVideoAutoencoder.from_pretrained(ckpt_path)
-    transformer = Transformer3DModel.from_pretrained(ckpt_path)
-    # Use constructor if sampler is specified, otherwise use from_pretrained
-    if sampler == "from_checkpoint" or not sampler:
-        scheduler = RectifiedFlowScheduler.from_pretrained(ckpt_path)
-    else:
-        scheduler = RectifiedFlowScheduler(
-            sampler=("Uniform" if sampler.lower() == "uniform" else "LinearQuadratic")
-        )
-    text_encoder = T5EncoderModel.from_pretrained(
-        text_encoder_model_name_or_path, subfolder="text_encoder"
-    )
-    patchifier = SymmetricPatchifier(patch_size=1)
-    tokenizer = T5Tokenizer.from_pretrained(
-        text_encoder_model_name_or_path, subfolder="tokenizer"
-    )
-    transformer = transformer.to(device)
-    vae = vae.to(device)
-    text_encoder = text_encoder.to(device)
-    if enhance_prompt:
-        prompt_enhancer_image_caption_model = AutoModelForCausalLM.from_pretrained(
-            prompt_enhancer_image_caption_model_name_or_path, trust_remote_code=True
-        )
-        prompt_enhancer_image_caption_processor = AutoProcessor.from_pretrained(
-            prompt_enhancer_image_caption_model_name_or_path, trust_remote_code=True
-        )
-        prompt_enhancer_llm_model = AutoModelForCausalLM.from_pretrained(
-            prompt_enhancer_llm_model_name_or_path,
-            torch_dtype="bfloat16",
-        )
-        prompt_enhancer_llm_tokenizer = AutoTokenizer.from_pretrained(
-            prompt_enhancer_llm_model_name_or_path,
-        )
-    else:
-        prompt_enhancer_image_caption_model = None
-        prompt_enhancer_image_caption_processor = None
-        prompt_enhancer_llm_model = None
-        prompt_enhancer_llm_tokenizer = None
-    vae = vae.to(torch.bfloat16)
-    if precision == "bfloat16" and transformer.dtype != torch.bfloat16:
-        transformer = transformer.to(torch.bfloat16)
-    text_encoder = text_encoder.to(torch.bfloat16)
-    # Use submodels for the pipeline
-    submodel_dict = {
-        "transformer": transformer,
-        "patchifier": patchifier,
-        "text_encoder": text_encoder,
-        "tokenizer": tokenizer,
-        "scheduler": scheduler,
-        "vae": vae,
-        "prompt_enhancer_image_caption_model": prompt_enhancer_image_caption_model,
-        "prompt_enhancer_image_caption_processor": prompt_enhancer_image_caption_processor,
-        "prompt_enhancer_llm_model": prompt_enhancer_llm_model,
-        "prompt_enhancer_llm_tokenizer": prompt_enhancer_llm_tokenizer,
-        "allowed_inference_steps": allowed_inference_steps,
-    }
-    pipeline = LTXVideoPipeline(**submodel_dict)
-    pipeline = pipeline.to(device)
-    return pipeline
-# ==============================================================================
-# 2. FUNÇÕES AUXILIARES DE PROCESSAMENTO
-# ==============================================================================
-def calculate_padding(orig_h: int, orig_w: int, target_h: int, target_w: int) -> Tuple[int, int, int, int]:
-    """Calcula o preenchimento para centralizar uma imagem em uma nova dimensão."""
-    pad_h = target_h - orig_h
-    pad_w = target_w - orig_w
-    pad_top = pad_h // 2
-    pad_bottom = pad_h - pad_top
-    pad_left = pad_w // 2
-    pad_right = pad_w - pad_left
-    return (pad_left, pad_right, pad_top, pad_bottom)
-def log_tensor_info(tensor: torch.Tensor, name: str = "Tensor"):
-    """Exibe informações detalhadas sobre um tensor para depuração."""
-    if not isinstance(tensor, torch.Tensor):
-        print(f"\n[INFO] '{name}' não é um tensor.")
-        return
-    print(f"\n--- Tensor Info: {name} ---")
-    print(f"  - Shape:  {tuple(tensor.shape)}")
-    print(f"  - Dtype:  {tensor.dtype}")
-    print(f"  - Device: {tensor.device}")
-    if tensor.numel() > 0:
-        try:
-            print(f"  - Stats:  Min={tensor.min().item():.4f}, Max={tensor.max().item():.4f}, Mean={tensor.mean().item():.4f}")
-        except RuntimeError:
-            print("  - Stats: Não foi possível calcular (ex: tensores bool).")
-    print("-" * 30)
-# ==============================================================================
-# 3. CLASSE PRINCIPAL DO SERVIÇO DE VÍDEO
-# ==============================================================================
-# Nova configuração para 4 GPUs
-GPU_CONFIG = {
-    "transformer_workers": [0, 1],    # GPUs para transformer + text_encoder
-    "vae_workers": [2, 3],            # GPUs para VAE + upscaler
-    "enable_multi_gpu": True
-}
-# Adicione/modifique estas configurações no início do arquivo
-PRECISION_CONFIG = {
-    "enable_fp8": False,  # Desabilitar FP8 devido a problemas de compatibilidade
-    "default_dtype": torch.bfloat16 if torch.cuda.is_available() and torch.cuda.is_bf16_supported() else torch.float16,
-    "fallback_dtype": torch.float16
-}
-# Modifique a classe VideoService
-class VideoService:
-    def _get_safe_precision_dtype(self):
-        """Configuração de precisão mais segura para evitar conflitos de dtype."""
-        if not torch.cuda.is_available():
-            return torch.float32
-        # Verificar suporte a bfloat16
-        if torch.cuda.is_bf16_supported():
-            print("[INFO] Usando bfloat16 (suportado pela GPU)")
-            return torch.bfloat16
-        else:
-            print("[INFO] Usando float16 (bfloat16 não suportado)")
-            return torch.float16
-    def _load_models_with_safe_dtype(self):
-        """Carrega modelos com dtype seguro e verifica compatibilidade."""
-        print("[INFO] Carregando modelos com dtype seguro...")
-        # CORREÇÃO: Usar dtype seguro explicitamente
-        torch_dtype = self.runtime_autocast_dtype
-        try:
-            pipeline = LTXVideoPipeline.from_pretrained(
-                "Lightricks/LTX-Video",
-                torch_dtype=torch_dtype,
-                variant="fp8" if PRECISION_CONFIG["enable_fp8"] else None,
-                cache_dir=MODEL_CACHE_DIR
-            )
-        except Exception as e:
-            print(f"[WARNING] Erro ao carregar com {torch_dtype}: {e}")
-            print("[INFO] Tentando carregar com float16...")
-            torch_dtype = torch.float16
-            pipeline = LTXVideoPipeline.from_pretrained(
-                "Lightricks/LTX-Video",
-                torch_dtype=torch_dtype,
-                cache_dir=MODEL_CACHE_DIR
-            )
-        # CORREÇÃO: Verificar e ajustar dtypes dos componentes do modelo
-        self._ensure_consistent_dtypes(pipeline, torch_dtype)
-        # Carregar upscaler com o mesmo dtype
-        latent_upsampler = self._load_latent_upsampler(torch_dtype)
-        return pipeline, latent_upsampler
-    def _ensure_consistent_dtypes(self, pipeline, expected_dtype):
-        """Garante que todos os componentes do pipeline tenham dtypes consistentes."""
-        print("[INFO] Verificando consistência de dtypes...")
-        components = [
-            (pipeline.transformer, "transformer"),
-            (pipeline.text_encoder, "text_encoder"),
-            (pipeline.vae, "vae"),
-            (pipeline.patchifier, "patchifier")
-        ]
-        for component, name in components:
-            if hasattr(component, 'parameters') and next(component.parameters(), None) is not None:
-                actual_dtype = next(component.parameters()).dtype
-                if actual_dtype != expected_dtype:
-                    print(f"[INFO] Convertendo {name} de {actual_dtype} para {expected_dtype}")
-                    component.to(dtype=expected_dtype)
-        print("[INFO] Verificação de dtypes concluída.")
-    def _load_latent_upsampler(self, torch_dtype):
-        """Carrega o latent upscaler com dtype seguro."""
-        try:
-            from ltx_video.models import LatentUpscaler
-            upscaler = LatentUpscaler.from_pretrained(
-                "Lightricks/LTX-Video",
-                subfolder="ltxv-spatial-upscaler-0.9.8",
-                torch_dtype=torch_dtype,
-                cache_dir=MODEL_CACHE_DIR
-            )
-            return upscaler
-        except Exception as e:
-            print(f"[WARNING] Não foi possível carregar o latent upscaler: {e}")
-            return None
-    def _setup_4gpu_workers(self):
-        """Configura 4 workers com verificação de dtype."""
-        if self.multi_gpu_enabled:
-            print("[INFO] Distribuindo modelos em 4 workers...")
-            # Workers 0 e 1: Transformer + Text Encoder
-            for i, device in enumerate(self.transformer_devices):
-                print(f"[INFO] Worker {i} (Transformer): {device}")
-                if i == 0:
-                    self.pipeline.transformer.to(device)
-                    self.pipeline.text_encoder.to(device)
-                    #self.pipeline.patchifier.to(device)
-                # Nota: Para multi-worker transformer, precisaríamos de cópias do modelo
-            # Workers 2 e 3: VAE
-            for i, device in enumerate(self.vae_devices):
-                print(f"[INFO] Worker {i+2} (VAE): {device}")
-                if i == 0:
-                    self.pipeline.vae.to(device)
-            # Upscaler
-            if self.latent_upsampler:
-                self.latent_upsampler.to(self.vae_devices[0])
-            print("[INFO] Distribuição 4-Workers concluída.")
-            # CORREÇÃO: Verificar dtypes após mover para GPU
-            self._verify_gpu_dtypes()
-        else:
-            self.pipeline.to(self.device_ltx)
-            if self.latent_upsampler:
-                self.latent_upsampler.to(self.device_ltx)
-    def _verify_gpu_dtypes(self):
-        """Verifica se os dtypes estão consistentes após mover para GPU."""
-        print("[INFO] Verificando dtypes nas GPUs...")
-        components = [
-            (self.pipeline.transformer, "transformer"),
-            (self.pipeline.vae, "vae")
-        ]
-        for component, name in components:
-            if hasattr(component, 'parameters') and next(component.parameters(), None) is not None:
-                param = next(component.parameters())
-                print(f"  {name}: dtype={param.dtype}, device={param.device}")
-        print("[INFO] Verificação de GPU dtypes concluída.")
-    def generate_low_resolution1(self, prompt: str, negative_prompt: str,
-                              height: int, width: int, num_frames: int,
-                              guidance_scale: float, seed: Optional[int] = None,
-                              conditioning_items: Optional[List[ConditioningItem]] = None) -> Tuple[str, str, int]:
-        """Geração de baixa resolução com dtype seguro."""
-        print("\n[INFO] Iniciando ETAPA 1: Geração de Baixa Resolução...")
-        self._set_generation_environment()
-        temp_dir = tempfile.mkdtemp(prefix="ltxv_low_")
-        self._register_tmp_dir(temp_dir)
-        used_seed = random.randint(0, 2**32 - 1) if seed is None else int(seed)
-        # Determinar dispositivo
-        if self.multi_gpu_enabled:
-            device = self.transformer_devices[0]
-        else:
-            device = self.device_ltx
-        print(f"  - Usando Seed: {used_seed}")
-        print(f"  - Frames: {num_frames}, Duração: {num_frames/DEFAULT_FPS:.1f}s")
-        print(f"  - Dimensões de Saída: {height}x{width}")
-        print(f"  - Dispositivo: {device}, Dtype: {self.runtime_autocast_dtype}")
-        # CORREÇÃO: Configuração de autocast mais robusta
-        device_type = device.split(':')[0] if ':' in device else device
-        enabled_autocast = 'cuda' in device and self.runtime_autocast_dtype in [torch.float16, torch.bfloat16]
-        print(f"  - Autocast habilitado: {enabled_autocast}")
-        try:
-            with torch.autocast(device_type=device_type,
-                              dtype=self.runtime_autocast_dtype,
-                              enabled=enabled_autocast):
-                first_pass_kwargs = {
-                    "prompt": prompt, "negative_prompt": negative_prompt,
-                    "height": height, "width": width,
-                    "frame_rate": int(DEFAULT_FPS), "num_frames": num_frames,
-                    "guidance_scale": float(guidance_scale),
-                    "output_type": "latent",
-                    "generator": torch.Generator(device=device).manual_seed(used_seed),
-                    "conditioning_items": conditioning_items,
-                    **(self.config.get("first_pass", {}))
-                }
-                print("  - Enviando para a pipeline LTX...")
-                latents = self.pipeline(**first_pass_kwargs).images
-                print(f"  [LOG] Latentes gerados. Shape: {latents.shape}, Dtype: {latents.dtype}")
-        except RuntimeError as e:
-            print(f"[ERROR] Erro durante a geração: {e}")
-            print("[INFO] Tentando fallback para float32...")
-            # Fallback para float32
-            with torch.autocast(device_type=device_type, dtype=torch.float32, enabled=False):
-                first_pass_kwargs = {
-                    "prompt": prompt, "negative_prompt": negative_prompt,
-                    "height": height, "width": width,
-                    "frame_rate": int(DEFAULT_FPS), "num_frames": num_frames,
-                    "guidance_scale": float(guidance_scale),
-                    "output_type": "latent",
-                    "generator": torch.Generator(device=device).manual_seed(used_seed),
-                    "conditioning_items": conditioning_items,
-                    **(self.config.get("first_pass", {}))
-                }
-                latents = self.pipeline(**first_pass_kwargs).images
-        # Resto do método permanece igual...
-        latents_cpu = latents.cpu()
-        del latents
-        torch.cuda.empty_cache()
-        # ... (decodificação e salvamento)
-        latents_path = self._save_latents_to_disk(latents_cpu, "latents_low", used_seed)
-        print("\n[INFO] Decodificando vídeo de baixa resolução...")
-        self._set_decode_environment()
-        # Decodificação (similar ao código anterior)
-        total_latents = latents_cpu.shape[2]
-        pontos_de_corte, segment_sizes = self._calculate_dynamic_cuts(total_latents)
-        if len(pontos_de_corte) == 1:
-            vae_device = self.vae_devices[0] if self.multi_gpu_enabled else self.device_vae
-            latents_for_decode = latents_cpu.to(vae_device)
-            vae_manager = self._get_vae_manager(vae_device)
-            pixel_tensor = vae_manager.decode(
-                latents_for_decode,
-                decode_timestep=float(self.config.get("decode_timestep", 0.05))
-            ).cpu()
-        else:
-            print(f"  [LOG] Decodificação em {len(pontos_de_corte)} chunks...")
-            pixel_chunks_list = []
-            for i, (start, end) in enumerate(pontos_de_corte):
-                start, end = max(0, start), min(total_latents, end)
-                if start >= end:
-                    continue
-                latent_chunk = latents_cpu[:, :, start:end, :, :]
-                vae_device = self.vae_devices[0] if self.multi_gpu_enabled else self.device_vae
-                latent_chunk = latent_chunk.to(vae_device)
-                vae_manager = self._get_vae_manager(vae_device)
-                print(f"    -> Decodificando Grupo {i+1} (latentes {start} a {end-1})")
-                pixel_chunk = vae_manager.decode(
-                    latent_chunk,
-                    decode_timestep=float(self.config.get("decode_timestep", 0.05))
-                )
-                pixel_chunks_list.append(pixel_chunk.cpu())
-                torch.cuda.empty_cache()
-            pixel_tensor = self._stitch_dynamic_chunks(pixel_chunks_list, segment_sizes)
-        video_path = self._save_video_from_tensor(pixel_tensor, "video_low", used_seed, temp_dir)
-        self._set_generation_environment()
-        del latents_cpu
-        self._finalize()
-        print("\n[SUCCESS] Geração de Baixa Resolução Concluída")
-        return video_path, latents_path, used_seed
-    def __init__(self):
-        """Inicializa o serviço com 4 workers especializados."""
-        t0 = time.perf_counter()
-        print("[INFO] Inicializando VideoService com 4 Workers...")
-        # Configuração para 4 GPUs
-        self.multi_gpu_enabled = GPU_CONFIG["enable_multi_gpu"] and torch.cuda.device_count() >= 4
-        if self.multi_gpu_enabled:
-            self.transformer_devices = [f"cuda:{gpu}" for gpu in GPU_CONFIG["transformer_workers"]]
-            self.vae_devices = [f"cuda:{gpu}" for gpu in GPU_CONFIG["vae_workers"]]
-            self.current_transformer_idx = 0
-            self.current_vae_idx = 0
-            print(f"[INFO] Configuração 4-Workers:")
-            print(f"       Transformer Workers: {self.transformer_devices}")
-            print(f"       VAE Workers: {self.vae_devices}")
-        else:
-            self.device_ltx = self.device_vae = "cuda" if torch.cuda.is_available() else "cpu"
-            print("[INFO] Usando configuração single-GPU")
-        self.config = self._load_config("ltxv-13b-0.9.8-distilled-fp8.yaml")
-        self.pipeline, self.latent_upsampler = self._load_models_from_hub()
-        self._setup_4gpu_workers()
-        self.runtime_autocast_dtype = self._get_precision_dtype()
-        # Configurar VAE managers para todas as GPUs VAE
-        self.vae_managers = []
-        if self.multi_gpu_enabled:
-            for vae_device in self.vae_devices:
-                # Usar o mesmo VAE manager singleton mas configurar para dispositivos diferentes
-                manager = type(vae_manager_singleton)()  # Nova instância
-                manager.attach_pipeline(
-                    self.pipeline,
-                    device=vae_device,
-                    autocast_dtype=self.runtime_autocast_dtype
-                )
-                self.vae_managers.append(manager)
-        else:
-            vae_manager_singleton.attach_pipeline(
-                self.pipeline,
-                device=self.device_vae,
-                autocast_dtype=self.runtime_autocast_dtype
-            )
-        self._tmp_dirs = set()
-        RESULTS_DIR.mkdir(exist_ok=True)
-        print(f"[INFO] VideoService 4-Workers pronto. Tempo: {time.perf_counter()-t0:.2f}s")
-    def _set_generation_environment(self):
-        """Prepara o ambiente para geração (LTX pipeline)."""
-        if not ENABLE_MEMORY_OPTIMIZATION:
-            return
-        print("\n  [VRAM Manager] Configurando ambiente de GERAÇÃO...")
-        if self.multi_gpu_enabled:
-            transformer_device = self.transformer_devices[0]  # Usar primeira GPU transformer
-            # Garantir que transformer e text_encoder estão na GPU correta
-            if not next(self.pipeline.transformer.parameters()).is_cuda:
-                self.pipeline.transformer.to(transformer_device)
-            if not next(self.pipeline.text_encoder.parameters()).is_cuda:
-                self.pipeline.text_encoder.to(transformer_device)
-            # Mover VAE para CPU durante geração
-            if next(self.pipeline.vae.parameters()).is_cuda:
-                self.pipeline.vae.to('cpu')
-        else:
-            # Comportamento original para single GPU
-            if next(self.pipeline.vae.parameters()).is_cuda:
-                self.pipeline.vae.to('cpu')
-            if not next(self.pipeline.transformer.parameters()).is_cuda:
-                self.pipeline.transformer.to(self.device_ltx)
-            if not next(self.pipeline.text_encoder.parameters()).is_cuda:
-                self.pipeline.text_encoder.to(self.device_ltx)
-        torch.cuda.empty_cache()
-        print("  [VRAM Manager] Ambiente de GERAÇÃO pronto.\n")
-    def _set_decode_environment(self):
-        """Prepara o ambiente para decodificação (VAE pipeline)."""
-        if not ENABLE_MEMORY_OPTIMIZATION:
-            return
-        print("\n  [VRAM Manager] Configurando ambiente de DECODIFICAÇÃO...")
-        if self.multi_gpu_enabled:
-            # Mover transformer e text_encoder para CPU
-            if next(self.pipeline.transformer.parameters()).is_cuda:
-                self.pipeline.transformer.to('cpu')
-            if next(self.pipeline.text_encoder.parameters()).is_cuda:
-                self.pipeline.text_encoder.to('cpu')
-            # Garantir que VAE está na primeira GPU VAE para decodificação
-            vae_device = self.vae_devices[0]
-            if not next(self.pipeline.vae.parameters()).is_cuda:
-                self.pipeline.vae.to(vae_device)
-        else:
-            # Comportamento original para single GPU
-            if next(self.pipeline.transformer.parameters()).is_cuda:
-                self.pipeline.transformer.to('cpu')
-            if next(self.pipeline.text_encoder.parameters()).is_cuda:
-                self.pipeline.text_encoder.to('cpu')
-            if not next(self.pipeline.vae.parameters()).is_cuda:
-                self.pipeline.vae.to(self.device_vae)
-        torch.cuda.empty_cache()
-        print("  [VRAM Manager] Ambiente de DECODIFICAÇÃO pronto.\n")
-    def _get_vae_manager(self, device):
-        """Retorna o VAE manager para o dispositivo especificado."""
-        if not self.multi_gpu_enabled:
-            return vae_manager_singleton
-        # Encontrar o manager correspondente ao dispositivo
-        device_index = int(device.split(':')[-1])
-        for i, vae_device in enumerate(self.vae_devices):
-            if int(vae_device.split(':')[-1]) == device_index:
-                return self.vae_managers[i]
-        return self.vae_managers[0]  # Fallback
-    def refine_texture_only(self, latents_path: str, prompt: str, negative_prompt: str,
-                          guidance_scale: float, seed: Optional[int] = None,
-                          conditioning_items: Optional[List[ConditioningItem]] = None) -> Tuple[str, str, torch.Tensor]:
-        """Versão simplificada para 4 workers."""
-        print("\n[INFO] Iniciando ETAPA 2 com 4 Workers...")
-        temp_dir = tempfile.mkdtemp(prefix="ltxv_refine_")
-        self._register_tmp_dir(temp_dir)
-        used_seed = random.randint(0, 2**32 - 1) if seed is None else int(seed)
-        # FASE 1: Geração com worker Transformer
-        print("[LOG] FASE 1: Geração de Latentes")
-        self._set_generation_environment()
-        # Carregar latentes
-        latents_to_refine = torch.load(latents_path)
-        transformer_device = self.transformer_devices[0]  # Usar primeira GPU transformer
-        latents_to_refine = latents_to_refine.to(transformer_device)
-        print(f"  [LOG] Latentes carregados no Worker {transformer_device}. Shape: {latents_to_refine.shape}")
-        with torch.autocast(device_type=transformer_device.split(':')[0],
-                          dtype=self.runtime_autocast_dtype,
-                          enabled=('cuda' in transformer_device)):
-            refine_height = latents_to_refine.shape[3] * self.pipeline.vae_scale_factor
-            refine_width = latents_to_refine.shape[4] * self.pipeline.vae_scale_factor
-            second_pass_kwargs = {
-                "prompt": prompt, "negative_prompt": negative_prompt,
-                "height": refine_height, "width": refine_width,
-                "frame_rate": int(DEFAULT_FPS), "num_frames": latents_to_refine.shape[2],
-                "latents": latents_to_refine, "guidance_scale": float(guidance_scale),
-                "output_type": "latent",
-                "generator": torch.Generator(device=transformer_device).manual_seed(used_seed),
-                "conditioning_items": conditioning_items,
-                **(self.config.get("second_pass", {}))
-            }
-            final_latents = self.pipeline(**second_pass_kwargs).images
-            print(f"  [LOG] Latentes refinados. Shape: {final_latents.shape}")
-        # Mover latentes refinados para CPU
-        final_latents_cpu = final_latents.cpu()
-        del final_latents, latents_to_refine
-        torch.cuda.empty_cache()
-        # FASE 2: Decodificação
-        print("\n[LOG] FASE 2: Decodificação")
-        self._set_decode_environment()
-        total_latents = final_latents_cpu.shape[2]
-        pontos_de_corte, segment_sizes = self._calculate_dynamic_cuts(total_latents)
-        if len(pontos_de_corte) == 1:
-            vae_device = self.vae_devices[0]  # Usar primeira GPU VAE
-            latents_for_decode = final_latents_cpu.to(vae_device)
-            vae_manager = self._get_vae_manager(vae_device)
-            pixel_tensor = vae_manager.decode(
-                latents_for_decode,
-                decode_timestep=float(self.config.get("decode_timestep", 0.05))
-            ).cpu()
-        else:
-            print(f"  [LOG] Decodificação em {len(pontos_de_corte)} chunks...")
-            pixel_chunks_list = []
-            for i, (start, end) in enumerate(pontos_de_corte):
-                start, end = max(0, start), min(total_latents, end)
-                if start >= end:
-                    continue
-                latent_chunk = final_latents_cpu[:, :, start:end, :, :]
-                # Usar sempre a primeira GPU VAE (evita problemas com múltiplos VAEs)
-                vae_device = self.vae_devices[0]
-                latent_chunk = latent_chunk.to(vae_device)
-                vae_manager = self._get_vae_manager(vae_device)
-                print(f"    -> Decodificando Grupo {i+1} (latentes {start} a {end-1})")
-                pixel_chunk = vae_manager.decode(
-                    latent_chunk,
-                    decode_timestep=float(self.config.get("decode_timestep", 0.05))
-                )
-                pixel_chunks_list.append(pixel_chunk.cpu())
-                torch.cuda.empty_cache()
-            pixel_tensor = self._stitch_dynamic_chunks(pixel_chunks_list, segment_sizes)
-        # Salvar resultados
-        video_path_out = self._save_video_from_tensor(pixel_tensor, "refined_video_final", used_seed, temp_dir)
-        latents_path_out = self._save_latents_to_disk(final_latents_cpu, "latents_refined_final", used_seed)
-        # Restaurar ambiente
-        self._set_generation_environment()
-        del final_latents_cpu
-        self._finalize()
-        print("\n[SUCCESS] ETAPA 2 com 4 Workers Concluída")
-        return video_path_out, latents_path_out, pixel_tensor
-    def _get_next_transformer_device(self):
-        """Retorna o próximo dispositivo transformer (round-robin)."""
-        if not self.multi_gpu_enabled:
-            return self.device_ltx
-        device = self.transformer_devices[self.current_transformer_idx]
-        self.current_transformer_idx = (self.current_transformer_idx + 1) % len(self.transformer_devices)
-        return device
-    def _get_next_vae_device(self):
-        """Retorna o próximo dispositivo VAE (round-robin)."""
-        if not self.multi_gpu_enabled:
-            return self.device_vae
-        device = self.vae_devices[self.current_vae_idx]
-        self.current_vae_idx = (self.current_vae_idx + 1) % len(self.vae_devices)
-        return device
-    @torch.no_grad()
-    def _upsample_and_filter_latents(self, latents: torch.Tensor) -> torch.Tensor:
-        """Upsampling com suporte a múltiplos workers VAE."""
-        if not self.latent_upsampler:
-            raise ValueError("Latent Upsampler não está carregado.")
-        # Selecionar worker VAE para upscaling
-        upsample_device = self._get_next_vae_device()
-        latents = latents.to(upsample_device)
-        latents_unnormalized = un_normalize_latents(latents, self.pipeline.vae, vae_per_channel_normalize=True)
-        upsampled_latents_unnormalized = self.latent_upsampler(latents_unnormalized)
-        upsampled_latents_normalized = normalize_latents(upsampled_latents_unnormalized, self.pipeline.vae, vae_per_channel_normalize=True)
-        return adain_filter_latent(latents=upsampled_latents_normalized, reference_latents=latents)
-    def get_gpu_usage(self):
-        """Monitora o uso de VRAM em todas as 4 GPUs."""
-        if not torch.cuda.is_available():
-            return "CUDA não disponível"
-        info = []
-        for i in range(torch.cuda.device_count()):
-            alloc = torch.cuda.memory_allocated(i) / 1024**3
-            cached = torch.cuda.memory_reserved(i) / 1024**3
-            info.append(f"GPU{i}: {alloc:.2f}GB / {cached:.2f}GB")
-        return " | ".join(info)
-    # --------------------------------------------------------------------------
-    # --- Métodos Públicos (API do Serviço) ---
-    # --------------------------------------------------------------------------
-    def _load_image_to_tensor_with_resize_and_crop(
-        self,
-        image_input: Union[str, Image.Image],
-        target_height: int = 512,
-        target_width: int = 768,
-        just_crop: bool = False,
-    ) -> torch.Tensor:
-        """Load and process an image into a tensor.
-        Args:
-            image_input: Either a file path (str) or a PIL Image object
-            target_height: Desired height of output tensor
-            target_width: Desired width of output tensor
-            just_crop: If True, only crop the image to the target size without resizing
-        """
-        if isinstance(image_input, str):
-            image = Image.open(image_input).convert("RGB")
-        elif isinstance(image_input, Image.Image):
-            image = image_input
-        else:
-            raise ValueError("image_input must be either a file path or a PIL Image object")
-        input_width, input_height = image.size
-        aspect_ratio_target = target_width / target_height
-        aspect_ratio_frame = input_width / input_height
-        if aspect_ratio_frame > aspect_ratio_target:
-            new_width = int(input_height * aspect_ratio_target)
-            new_height = input_height
-            x_start = (input_width - new_width) // 2
-            y_start = 0
-        else:
-            new_width = input_width
-            new_height = int(input_width / aspect_ratio_target)
-            x_start = 0
-            y_start = (input_height - new_height) // 2
-        image = image.crop((x_start, y_start, x_start + new_width, y_start + new_height))
-        if not just_crop:
-            image = image.resize((target_width, target_height))
-        image = np.array(image)
-        image = cv2.GaussianBlur(image, (3, 3), 0)
-        frame_tensor = torch.from_numpy(image).float()
-        frame_tensor = crf_compressor.compress(frame_tensor / 255.0) * 255.0
-        frame_tensor = frame_tensor.permute(2, 0, 1)
-        frame_tensor = (frame_tensor / 127.5) - 1.0
-        # Create 5D tensor: (batch_size=1, channels=3, num_frames=1, height, width)
-        return frame_tensor.unsqueeze(0).unsqueeze(2)
-    def _prepare_conditioning_tensor(self, filepath, height, width, padding_values):
-        print(f"[DEBUG] Carregando condicionamento: {filepath}")
-        tensor = self._load_image_to_tensor_with_resize_and_crop(filepath, height, width)
-        tensor = torch.nn.functional.pad(tensor, padding_values)
-        out = tensor.to(self.transformer_devices[0] , dtype=self.runtime_autocast_dtype) if self.transformer_devices[0]  == "cuda" else tensor.to(self.transformer_devices[0] )
-        print(f"[DEBUG] Cond shape={tuple(out.shape)} dtype={out.dtype} device={out.device}")
-        return out
-    def generate_low_resolution(
-        self,
-        prompt: str,
-        negative_prompt: str,
-        height: int,
-        width: int,
-        duration_secs: float,
-        guidance_scale: float,
-        seed: Optional[int] = None,
-        image_filepaths: Optional[List[str]] = None
-    ) -> Tuple[str, str, int]:
-        """
-        ETAPA 1: Gera um vídeo e latentes em resolução base a partir de um prompt e
-        condicionamentos opcionais.
-        """
-        print("[INFO] Iniciando ETAPA 1: Geração de Baixa Resolução...")
-        self._set_generation_environment()
-        # --- Configuração de Seed e Diretórios ---
-        used_seed = random.randint(0, 2**32 - 1) if seed is None else int(seed)
-        #seed_everything(used_seed)
-        print(f"  - Usando Seed: {used_seed}")
-        temp_dir = tempfile.mkdtemp(prefix="ltxv_low_")
-        self._register_tmp_dir(temp_dir)
-        results_dir = "/app/output"
-        os.makedirs(results_dir, exist_ok=True)
-        # --- Cálculo de Dimensões e Frames ---
-        actual_num_frames = int(round(duration_secs * DEFAULT_FPS))
-        downscaled_height, downscaled_width = self._calculate_downscaled_dims(height, width)
-        height_padded = ((downscaled_height - 1) // 32 + 1) * 32
-        width_padded = ((downscaled_width - 1) // 32 + 1) * 32
-        padding_values = calculate_padding(downscaled_height, downscaled_width, height_padded, width_padded)
-        conditioning_items = []
-        for filepath in image_filepaths:
-            cond_tensor = self._prepare_conditioning_tensor(filepath, downscaled_height, downscaled_width, padding_values)
-            conditioning_items.append(ConditioningItem(cond_tensor, 0, 1.0))
-        print(f"  - Frames: {actual_num_frames}, Duração: {duration_secs}s")
-        print(f"  - Dimensões de Saída: {downscaled_height}x{downscaled_width}")
-        # --- Execução da Pipeline ---
-        with torch.autocast(device_type=self.transformer_devices[0] .split(':')[0], dtype=self.runtime_autocast_dtype, enabled=(self.transformer_devices[0]  == 'cuda')):
-            first_pass_kwargs = {
-                "prompt": prompt,
-                "negative_prompt": negative_prompt,
-                "height": downscaled_height,
-                "width": downscaled_width,
-                "num_frames": (actual_num_frames//8)+1,
-                "frame_rate": int(DEFAULT_FPS),
-                "generator": torch.Generator(device=self.transformer_devices[0] ).manual_seed(used_seed),
-                "output_type": "latent",
-                "conditioning_items": conditioning_items,
-                "guidance_scale": float(guidance_scale),
-                **(self.config.get("first_pass", {}))
-            }
-            print("  - Enviando para a pipeline LTX...")
-            latents = self.pipeline(**first_pass_kwargs).images
-            print(f"  - Latentes gerados com shape: {latents.shape}")
-            #_upsample_and_filter_latents
-            latents = self._upsample_and_filter_latents(latents)
-            print(f"  - Latentes com upscaler: {latents.shape}")
-            tensor_path = self._save_latents_to_disk(latents, "latents_low_res", used_seed)
-            self._finalize()
-            final_video_path, final_latents_path, _ = self.refine_texture_only(
-                latents_path=tensor_path,
-                prompt=prompt,
-                negative_prompt=negative_prompt,
-                guidance_scale=guidance_scale,
-                seed=used_seed,
-                conditioning_items=conditioning_items,
-            )
-        # --- Limpeza ---
-        self._finalize()
-        self._set_generation_environment()
-        print("[SUCCESS] ETAPA 1 Concluída.")
-        return final_video_path, final_latents_path, used_seed
-    def apply_secondary_refinement(
-        self,
-        latents_path: str,
-        prompt: str,
-        negative_prompt: str,
-        guidance_scale: float,
-        seed: int,
-        # Parâmetros para controlar a divisão principal (Nível 1)
-        macro_chunk_size: int = 8,
-        macro_overlap: int = 2
-    ) -> str: # A função agora retorna apenas o caminho do vídeo final.
-        """
-        Função "ponte" aprimorada que orquestra um refinamento secundário
-        usando uma lógica de MACRO-DIVISÃO aninhada para processar vídeos
-        muito longos de forma robusta.
-        """
-        print("[LOG] Preparando ambiente da GPU para o refinamento...")
-        self._set_generation_environment()
-        print(f"[LOG] Carregando latentes principais de: {latents_path}")
-        initial_latents = torch.load(latents_path).cpu()
-        total_latents = initial_latents.shape[2]
-        print(f"[LOG] Nível 1 (Macro): Calculando divisão para {total_latents} latentes...")
-        macro_cuts, macro_segment_sizes = self._calculate_dynamic_cuts(
-            total_latents,
-            min_chunk_size=macro_chunk_size,
-            overlap=macro_overlap
-        )
-        print(f"[LOG] Nível 1 (Macro): Trabalho dividido em {len(macro_cuts)} tarefas principais.")
-        # 3. EXECUTAR CADA TAREFA EM UM LOOP
-        pixel_results = []
-        for i, (start, end) in enumerate(macro_cuts):
-            task_id_str = f"[Tarefa {i+1}/{len(macro_cuts)}]"
-            print(f"\n--- Processando {task_id_str} (latentes {start} a {end-1}) ---")
-            latent_chunk = initial_latents[:, :, start:end, :, :]
-            tensor_path = self._save_latents_to_disk(latent_chunk, "latents_chuck_i", seed)
-            _video_path, _latents_path, pixel_tensor_chunk = self.refine_texture_only(
-                latents_path=tensor_path,
-                prompt=prompt,
-                negative_prompt=negative_prompt,
-                guidance_scale=guidance_scale,
-                seed=seed + i, # Garante seeds diferentes para cada tarefa
-                conditioning_items=None,
-            )
-            # Armazena o tensor de pixels resultante em memória
-            pixel_results.append(pixel_tensor_chunk)
-            torch.cuda.empty_cache() # Limpa VRAM entre as tarefas
-        final_pixel_tensor = self._stitch_dynamic_chunks(
-            pixel_chunks_list=pixel_results,
-            segment_sizes=macro_segment_sizes,
-            overlap=macro_overlap
-        )
-        print(f"[LOG] Costura final (Nível 1) concluída. Shape do tensor final: {final_pixel_tensor.shape}")
-        # 5. SALVAR O VÍDEO FINAL E LIMPAR
-        final_video_path = self._save_video_from_tensor(
-            pixel_tensor=final_pixel_tensor,
-            base_filename="final_video_stitched",
-            seed=seed,
-            # Salva o vídeo em um diretório temporário antes de movê-lo para a saída final
-            temp_dir=tempfile.mkdtemp(prefix="ltxv_final_")
-        )
-        del pixel_results, final_pixel_tensor
-        self._finalize() # Limpa todos os diretórios temporários registrados e a memória
-        print(f"\n[SUCCESS] Processo de Macro-Divisão concluído. Vídeo final em: {final_video_path}")
-        self._set_generation_environment()
-        # Retorna apenas o caminho do vídeo final consolidado
-        return final_video_path, latents_path
-    def _calculate_dynamic_cuts(
-        self,
-        total_latents: int,
-        min_chunk_size: int = 5,
-        overlap: int = 2
-    ) -> tuple[list[tuple[int, int]], list[int]]:
-        """
-        Calcula dinamicamente os pontos de corte para 'X' chunks.
-        """
-        if total_latents <= min_chunk_size + overlap:
-            print(f"  [LOG] Detecção: Vídeo muito curto ({total_latents} latentes). Usando 1 chunk.")
-            return [(0, total_latents)], [total_latents]
-        # Regra: O cálculo principal é feito em (total - 2) latentes
-        effective_total_latents = total_latents - 2
-        # Determina o número de chunks (X) para maximizar o uso da VRAM
-        num_chunks = effective_total_latents // min_chunk_size
-        if num_chunks == 0: # Garante pelo menos um chunk
-             num_chunks = 1
-        # Distribui os latentes entre os chunks
-        base_size = effective_total_latents // num_chunks
-        remainder = effective_total_latents % num_chunks
-        segment_sizes = []
-        for i in range(num_chunks):
-            size = base_size + (1 if i < remainder else 0)
-            segment_sizes.append(size)
-        # Regra: Adiciona os 2 latentes restantes ao último chunk
-        segment_sizes[-1] += 2
-        print(f"  [LOG] Divisão dinâmica: {total_latents} latentes em {num_chunks} chunks.")
-        print(f"        Tamanhos de conteúdo: {segment_sizes}")
-        # Calcula os pontos de corte (start, end) com sobreposição
-        cut_points = []
-        cursor = 0
-        for i in range(num_chunks):
-            start_pos = cursor if i == 0 else cursor - overlap
-            # O último chunk sempre vai até o final
-            end_pos = total_latents if i == num_chunks - 1 else cursor + segment_sizes[i] + overlap
-            cut_points.append((start_pos, end_pos))
-            cursor += segment_sizes[i]
-        return cut_points, segment_sizes
-    def _stitch_dynamic_chunks(
-        self,
-        pixel_chunks_list: list[torch.Tensor],
-        segment_sizes: list[int],
-        overlap: int = 2
-    ) -> torch.Tensor:
-        """
-        Costura uma lista de chunks de pixels decodificados.
-        """
-        if not pixel_chunks_list:
-            return torch.empty(0)
-        if len(pixel_chunks_list) == 1:
-            return pixel_chunks_list[0]
-        final_parts = []
-        # 1. Processa o primeiro chunk
-        # Mantém apenas os frames correspondentes ao seu tamanho de conteúdo
-        first_chunk_frame_count = segment_sizes[0] * 8
-        final_parts.append(pixel_chunks_list[0][:, :, :first_chunk_frame_count, :, :])
-        # 2. Processa os chunks restantes
-        for i in range(1, len(pixel_chunks_list)):
-            chunk = pixel_chunks_list[i]
-            # Descarta os frames da sobreposição inicial e pega todo o resto
-            discard_frames = overlap * 8
-            final_parts.append(chunk[:, :, discard_frames:, :, :])
-        return torch.cat(final_parts, dim=2)
-    def refine_texture_onl4y(
-        self,
-        latents_path: str,
-        prompt: str,
-        negative_prompt: str,
-        guidance_scale: float,
-        seed: Optional[int] = None,
-        conditioning_items: Optional[List[ConditioningItem]] = None
-    ) -> Tuple[str, str]:
-        """
-        Refina e decodifica latentes com gerenciamento explícito de modelos
-        na GPU e lógica de chunking dinâmico para máxima performance e robustez.
-        """
-        print("\n======================================================================")
-        print("====== [INFO] Iniciando ETAPA 2: Refinamento e Decodificação Dinâmica ======")
-        print("======================================================================\n")
-        temp_dir = tempfile.mkdtemp(prefix="ltxv_refine_")
-        self._register_tmp_dir(temp_dir)
-        used_seed = random.randint(0, 2**32 - 1) if seed is None else int(seed)
-        # --- FASE 1: GERAÇÃO DE LATENTES (TRABALHO DO TRANSFORMER) ---
-        print("[LOG] FASE 1: Geração de Latentes (Transformer na GPU)")
-        self._set_generation_environment()
-        latents_to_refine = torch.load(latents_path).to(self.transformer_devices[0] )
-        print(f"  [LOG] Latentes carregados para a GPU. Shape: {latents_to_refine.shape}")
-        with torch.autocast(device_type=self.transformer_devices[0] .split(':')[0], dtype=self.runtime_autocast_dtype, enabled=(self.device == 'cuda')):
-            refine_height = latents_to_refine.shape[3] * self.pipeline.vae_scale_factor
-            refine_width = latents_to_refine.shape[4] * self.pipeline.vae_scale_factor
-            second_pass_kwargs = {
-                "prompt": prompt, "negative_prompt": negative_prompt, "height": refine_height, "width": refine_width,
-                "frame_rate": int(DEFAULT_FPS), "num_frames": latents_to_refine.shape[2],
-                "latents": latents_to_refine, "guidance_scale": float(guidance_scale), "output_type": "latent",
-                "generator": torch.Generator(device=self.transformer_devices[0] ).manual_seed(used_seed),
-                "conditioning_items": conditioning_items, **(self.config.get("second_pass", {}))
-            }
-            print("  [LOG] Enviando para a pipeline de refinamento (Transformer)...")
-            final_latents = self.pipeline(**second_pass_kwargs).images
-            print(f"  [LOG] [SUCESSO] Latentes refinados. Shape: {final_latents.shape}")
-        print("  [LOG] Geração de latentes concluída. Movendo resultado para a CPU.")
-        final_latents_cpu = final_latents.cpu()
-        del final_latents, latents_to_refine
-        torch.cuda.empty_cache()
-        # --- FASE 2: DECODIFICAÇÃO EM CHUNKS (TRABALHO DO VAE) ---
-        print("\n[LOG] FASE 2: Decodificação de Latentes (VAE na GPU)")
-        self._set_decode_environment()
-        total_latents = final_latents_cpu.shape[2]
-        # AQUI ESTÁ A MUDANÇA: Substituímos a lógica fixa pela chamada da função dinâmica.
-        pontos_de_corte, segment_sizes = self._calculate_dynamic_cuts(total_latents)
-        if len(pontos_de_corte) == 1:
-            pixel_tensor = vae_manager_singleton.decode(
-                final_latents_cpu.to(self.transformer_devices[0] ),
-                decode_timestep=float(self.config.get("decode_timestep", 0.05))
-            ).cpu()
-        else:
-            print(f"  [LOG] Ativando modo de janela deslizante para {len(pontos_de_corte)} chunks.")
-            pixel_chunks_list = []
-            for i, (start, end) in enumerate(pontos_de_corte):
-                # Garante que os slices sejam válidos dentro dos limites do tensor.
-                start, end = max(0, start), min(total_latents, end)
-                if start >= end: continue
-                latent_chunk = final_latents_cpu[:, :, start:end, :, :]
-                print(f"    -> Decodificando Grupo {i+1}/{len(pontos_de_corte)} (latentes {start} a {end-1}), shape: {latent_chunk.shape}")
-                pixel_chunk = vae_manager_singleton.decode(
-                    latent_chunk.to(self.transformer_devices[0] ),
-                    decode_timestep=float(self.config.get("decode_timestep", 0.05))
-                )
-                pixel_chunks_list.append(pixel_chunk.cpu())
-                torch.cuda.empty_cache()
-            print("    [LOG] Costurando os vídeos decodificados...")
-            pixel_tensor = self._stitch_dynamic_chunks(pixel_chunks_list, segment_sizes)
-        print(f"\n[LOG] [SUCESSO] Tensor de pixels final montado na CPU com shape: {pixel_tensor.shape}")
-        # --- FASE 3: SALVAMENTO E RESTAURAÇÃO DO AMBIENTE ---
-        print("\n[LOG] FASE 3: Salvamento e Restauração do Ambiente da GPU")
-        video_path_out = self._save_video_from_tensor(pixel_tensor, "refined_video_final", used_seed, temp_dir)
-        latents_path_out = self._save_latents_to_disk(final_latents_cpu, "latents_refined_final", used_seed)
-        print("  [LOG] Tarefa concluída. Restaurando ambiente de GERAÇÃO na GPU para a próxima execução...")
-        self._set_decode_environment()
-        print("  [LOG] Liberando tensores finais da memória da CPU.")
-        del final_latents_cpu
-        self._finalize()
-        return video_path_out, latents_path_out, pixel_tensor
-    def encode_latents_to_mp4(self, latents_path: str, fps: int = int(DEFAULT_FPS)) -> str:
-        """Decodifica um tensor de latentes salvo e o salva como um vídeo MP4."""
-        latents = torch.load(latents_path)
-        temp_dir = tempfile.mkdtemp(prefix="ltxv_enc_")
-        self._register_tmp_dir(temp_dir)
-        seed = random.randint(0, 99999) # Seed apenas para nome do arquivo
-        try:
-            chunks = self._split_latents_with_overlap(latents)
-            pixel_chunks = []
-            with torch.autocast(device_type=self.transformer_devices[0] .split(':')[0], dtype=self.runtime_autocast_dtype, enabled=(self.transformer_devices[0]  == 'cuda')):
-                for chunk in chunks:
-                    if chunk.shape[2] == 0: continue
-                    pixel_chunk = vae_manager_singleton.decode(chunk.to(self.transformer_devices[0] ), decode_timestep=float(self.config.get("decode_timestep", 0.05)))
-                    pixel_chunks.append(pixel_chunk)
-            final_pixel_tensor = self._merge_chunks_with_overlap(pixel_chunks)
-            final_video_path = self._save_video_from_tensor(final_pixel_tensor, f"final_video_{seed}", seed, temp_dir, fps=fps)
-            return final_video_path
-        except Exception as e:
-            print(f"[ERROR] Falha ao encodar latentes para MP4: {e}")
-            traceback.print_exc()
-            raise
-        finally:
-            self._finalize()
-    def _finalize(self):
-        """Limpa a memória da GPU e os diretórios temporários."""
-        if LTXV_DEBUG:
-            print("[DEBUG] Finalize: iniciando limpeza...")
-        gc.collect()
-        if torch.cuda.is_available():
-            torch.cuda.empty_cache()
-            torch.cuda.ipc_collect()
-        # Limpa todos os diretórios temporários registrados
-        for d in list(self._tmp_dirs):
-            shutil.rmtree(d, ignore_errors=True)
-            self._tmp_dirs.remove(d)
-            if LTXV_DEBUG:
-                print(f"[DEBUG] Diretório temporário removido: {d}")
-    def _load_config(self, config_filename: str) -> Dict:
-        """Carrega o arquivo de configuração YAML."""
-        config_path = LTX_VIDEO_REPO_DIR / "configs" / config_filename
-        print(f"[INFO] Carregando configuração de: {config_path}")
-        with open(config_path, "r") as file:
-            return yaml.safe_load(file)
-    def _load_models_from_hub(self):
-        """Baixa e cria as instâncias da pipeline e do upsampler."""
-        t0 = time.perf_counter()
-        LTX_REPO = "Lightricks/LTX-Video"
-        print("[INFO] Baixando checkpoint principal...")
-        self.config["checkpoint_path"] = hf_hub_download(
-            repo_id=LTX_REPO, filename=self.config["checkpoint_path"],
-            token=os.getenv("HF_TOKEN"),
-        )
-        print(f"[INFO] Checkpoint principal em: {self.config['checkpoint_path']}")
-        print("[INFO] Construindo pipeline...")
-        pipeline = create_ltx_video_pipeline(
-            ckpt_path=self.config["checkpoint_path"],
-            precision=self.config["precision"],
-            text_encoder_model_name_or_path=self.config["text_encoder_model_name_or_path"],
-            sampler=self.config["sampler"],
-            device="cpu",  # Carrega em CPU primeiro
-            enhance_prompt=False
-        )
-        print("[INFO] Pipeline construída.")
-        latent_upsampler = None
-        if self.config.get("spatial_upscaler_model_path"):
-            print("[INFO] Baixando upscaler espacial...")
-            self.config["spatial_upscaler_model_path"] = hf_hub_download(
-                repo_id=LTX_REPO, filename=self.config["spatial_upscaler_model_path"],
-                token=os.getenv("HF_TOKEN")
-            )
-            print(f"[INFO] Upscaler em: {self.config['spatial_upscaler_model_path']}")
-            print("[INFO] Construindo latent_upsampler...")
-            latent_upsampler = create_latent_upsampler(self.config["spatial_upscaler_model_path"], device="cpu")
-            print("[INFO] Latent upsampler construído.")
-        print(f"[INFO] Carregamento de modelos concluído em {time.perf_counter()-t0:.2f}s")
-        return pipeline, latent_upsampler
-    def _move_models_to_device(self):
-        """Move os modelos carregados para o dispositivo de computação (GPU/CPU)."""
-        print(f"[INFO] Movendo modelos para o dispositivo: {self.transformer_devices[0] }")
-        self.pipeline.to(self.transformer_devices[0] )
-        if self.latent_upsampler:
-            self.latent_upsampler.to(self.transformer_devices[0] )
-    def _get_precision_dtype(self) -> torch.dtype:
-        """Determina o dtype para autocast com base na configuração de precisão."""
-        prec = str(self.config.get("precision", "")).lower()
-        if prec in ["float8_e4m3fn", "bfloat16"]:
-            return torch.bfloat16
-        elif prec == "mixed_precision":
-            return torch.float16
-        return torch.float32
-    @torch.no_grad()
-    def _upsample_and_filter_latents4(self, latents: torch.Tensor) -> torch.Tensor:
-        """Aplica o upsample espacial e o filtro AdaIN aos latentes."""
-        if not self.latent_upsampler:
-            raise ValueError("Latent Upsampler não está carregado para a operação de upscale.")
-        latents_unnormalized = un_normalize_latents(latents, self.pipeline.vae, vae_per_channel_normalize=True)
-        upsampled_latents_unnormalized = self.latent_upsampler(latents_unnormalized)
-        upsampled_latents_normalized = normalize_latents(upsampled_latents_unnormalized, self.pipeline.vae, vae_per_channel_normalize=True)
-        # Filtro AdaIN para manter consistência de cor/estilo com o vídeo de baixa resolução
-        return adain_filter_latent(latents=upsampled_latents_normalized, reference_latents=latents)
-    def _prepare_conditioning_tensor_from_path(self, filepath: str, height: int, width: int, padding: Tuple) -> torch.Tensor:
-        """Carrega uma imagem, redimensiona, aplica padding e move para o dispositivo."""
-        tensor = self._load_image_to_tensor_with_resize_and_crop(filepath, height, width)
-        tensor = F.pad(tensor, padding)
-        return tensor.to(self.transformer_devices[0] , dtype=self.runtime_autocast_dtype)
-    def _calculate_downscaled_dims(self, height: int, width: int) -> Tuple[int, int]:
-        """Calcula as dimensões para o primeiro passo (baixa resolução)."""
-        height_padded = ((height - 1) // 8 + 1) * 8
-        width_padded = ((width - 1) // 8 + 1) * 8
-        downscale_factor = self.config.get("downscale_factor", 0.6666666)
-        vae_scale_factor = self.pipeline.vae_scale_factor
-        target_w = int(width_padded * downscale_factor)
-        downscaled_width = target_w - (target_w % vae_scale_factor)
-        target_h = int(height_padded * downscale_factor)
-        downscaled_height = target_h - (target_h % vae_scale_factor)
-        return downscaled_height, downscaled_width
-    def _split_latents_with_overlap(self, latents: torch.Tensor, overlap: int = 1) -> List[torch.Tensor]:
-        """Divide um tensor de latentes em dois chunks com sobreposição."""
-        total_frames = latents.shape[2]
-        if total_frames <= overlap:
-            return [latents]
-        mid_point = max(overlap, total_frames // 2)
-        chunk1 = latents[:, :, :mid_point, :, :]
-        # O segundo chunk começa 'overlap' frames antes para criar a sobreposição
-        chunk2 = latents[:, :, mid_point - overlap:, :, :]
-        return [c for c in [chunk1, chunk2] if c.shape[2] > 0]
-    def _merge_chunks_with_overlap(self, chunks: List[torch.Tensor], overlap: int = 1) -> torch.Tensor:
-        """Junta uma lista de chunks, removendo a sobreposição."""
-        if not chunks:
-            return torch.empty(0)
-        if len(chunks) == 1:
-            return chunks[0]
-        # Pega o primeiro chunk sem o frame de sobreposição final
-        merged_list = [chunks[0][:, :, :-overlap, :, :]]
-        # Adiciona os chunks restantes
-        merged_list.extend(chunks[1:])
-        return torch.cat(merged_list, dim=2)
-    def _save_latents_to_disk(self, latents_tensor: torch.Tensor, base_filename: str, seed: int) -> str:
-        """Salva um tensor de latentes em um arquivo .pt."""
-        latents_cpu = latents_tensor.detach().to("cpu")
-        tensor_path = RESULTS_DIR / f"{base_filename}_{seed}.pt"
-        torch.save(latents_cpu, tensor_path)
-        if LTXV_DEBUG:
-            print(f"[DEBUG] Latentes salvos em: {tensor_path}")
-        return str(tensor_path)
-    def _save_video_from_tensor(self, pixel_tensor: torch.Tensor, base_filename: str, seed: int, temp_dir: str, fps: int = int(DEFAULT_FPS)) -> str:
-        """Salva um tensor de pixels como um arquivo de vídeo MP4."""
-        temp_path = os.path.join(temp_dir, f"{base_filename}_{seed}.mp4")
-        video_encode_tool_singleton.save_video_from_tensor(pixel_tensor, temp_path, fps=fps)
-        final_path = RESULTS_DIR / f"{base_filename}_{seed}.mp4"
-        shutil.move(temp_path, final_path)
-        print(f"[INFO] Vídeo final salvo em: {final_path}")
-        return str(final_path)
-    def _seed_everething(self, seed: int):
-        random.seed(seed)
-        np.random.seed(seed)
-        torch.manual_seed(seed)
-        if torch.cuda.is_available():
-            torch.cuda.manual_seed(seed)
-        if torch.backends.mps.is_available():
-            torch.mps.manual_seed(seed)
-    def _register_tmp_dir(self, dir_path: str):
-        """Registra um diretório temporário para limpeza posterior."""
-        if dir_path and os.path.isdir(dir_path):
-            self._tmp_dirs.add(dir_path)
-            if LTXV_DEBUG:
-                print(f"[DEBUG] Diretório temporário registrado: {dir_path}")
-# ==============================================================================
-# 4. INSTANCIAÇÃO E PONTO DE ENTRADA (Exemplo)
-# ==============================================================================
-print("Criando instância do VideoService. O carregamento do modelo começará agora...")
-video_generation_service = VideoService()
-print("Instância do VideoService pronta para uso.")