Test

Paused

App Files Files Community

Eueuiaa commited on Oct 9, 2025

Commit

8e066ce

verified ·

1 Parent(s): 9641c9e

Update api/ltx_server_refactored.py

Browse files

Files changed (1) hide show

api/ltx_server_refactored.py +337 -448

api/ltx_server_refactored.py CHANGED Viewed

@@ -1,167 +1,122 @@
-# ltx_server_refactored.py — VideoService (Modular Version with Exact Dimension Calculation)
-# --- 0. WARNINGS E AMBIENTE ---
 import warnings
 warnings.filterwarnings("ignore", category=UserWarning)
 warnings.filterwarnings("ignore", category=FutureWarning)
-warnings.filterwarnings("ignore", message=".*")
-from huggingface_hub import logging
-logging.set_verbosity_error()
-logging.set_verbosity_warning()
-logging.set_verbosity_info()
-logging.set_verbosity_debug()
-LTXV_DEBUG=1
-LTXV_FRAME_LOG_EVERY=8
-import os, subprocess, shlex, tempfile
-import torch
-import json
-import numpy as np
-import random
 import os
-import shlex
-import yaml
-from typing import List, Dict
-from pathlib import Path
-import imageio
-from PIL import Image
-import tempfile
-from huggingface_hub import hf_hub_download
 import sys
 import subprocess
 import gc
 import shutil
 import contextlib
 import time
 import traceback
-from einops import rearrange
 import torch.nn.functional as F
-from managers.vae_manager import vae_manager_singleton
-from tools.video_encode_tool import video_encode_tool_singleton
 DEPS_DIR = Path("/data")
 LTX_VIDEO_REPO_DIR = DEPS_DIR / "LTX-Video"
 def run_setup():
     setup_script_path = "setup.py"
     if not os.path.exists(setup_script_path):
         print("[DEBUG] 'setup.py' não encontrado. Pulando clonagem de dependências.")
         return
     try:
-        print("[DEBUG] Executando setup.py para dependências...")
-        subprocess.run([sys.executable, setup_script_path], check=True)
         print("[DEBUG] Setup concluído com sucesso.")
     except subprocess.CalledProcessError as e:
-        print(f"[DEBUG] ERRO no setup.py (code {e.returncode}). Abortando.")
         sys.exit(1)
-if not LTX_VIDEO_REPO_DIR.exists():
-    print(f"[DEBUG] Repositório não encontrado em {LTX_VIDEO_REPO_DIR}. Rodando setup...")
-    run_setup()
 def add_deps_to_path():
     repo_path = str(LTX_VIDEO_REPO_DIR.resolve())
-    if str(LTX_VIDEO_REPO_DIR.resolve()) not in sys.path:
         sys.path.insert(0, repo_path)
-        print(f"[DEBUG] Repo adicionado ao sys.path: {repo_path}")
-def _query_gpu_processes_via_nvml(device_index: int) -> List[Dict]:
-    try:
-        import psutil
-        import pynvml as nvml
-        nvml.nvmlInit()
-        handle = nvml.nvmlDeviceGetHandleByIndex(device_index)
-        try:
-            procs = nvml.nvmlDeviceGetComputeRunningProcesses_v3(handle)
-        except Exception:
-            procs = nvml.nvmlDeviceGetComputeRunningProcesses(handle)
-        results = []
-        for p in procs:
-            pid = int(p.pid)
-            used_mb = None
-            try:
-                if getattr(p, "usedGpuMemory", None) is not None and p.usedGpuMemory not in (0,):
-                    used_mb = max(0, int(p.usedGpuMemory) // (1024 * 1024))
-            except Exception:
-                used_mb = None
-            name = "unknown"
-            user = "unknown"
-            try:
-                import psutil
-                pr = psutil.Process(pid)
-                name = pr.name()
-                user = pr.username()
-            except Exception:
-                pass
-            results.append({"pid": pid, "name": name, "user": user, "used_mb": used_mb})
-        nvml.nvmlShutdown()
-        return results
-    except Exception:
-        return []
-def _query_gpu_processes_via_nvidiasmi(device_index: int) -> List[Dict]:
-    cmd = f"nvidia-smi -i {device_index} --query-compute-apps=pid,process_name,used_memory --format=csv,noheader,nounits"
-    try:
-        out = subprocess.check_output(shlex.split(cmd), stderr=subprocess.STDOUT, text=True, timeout=2.0)
-    except Exception:
-        return []
-    results = []
-    for line in out.strip().splitlines():
-        parts = [p.strip() for p in line.split(",")]
-        if len(parts) >= 3:
-            try:
-                pid = int(parts[0]); name = parts[1]; used_mb = int(parts[2])
-                user = "unknown"
-                try:
-                    import psutil
-                    pr = psutil.Process(pid)
-                    user = pr.username()
-                except Exception:
-                    pass
-                results.append({"pid": pid, "name": name, "user": user, "used_mb": used_mb})
-            except Exception:
-                continue
-    return results
-def calculate_padding(orig_h, orig_w, target_h, target_w):
-    pad_h = target_h - orig_h
-    pad_w = target_w - orig_w
-    pad_top = pad_h // 2
-    pad_bottom = pad_h - pad_top
-    pad_left = pad_w // 2
-    pad_right = pad_w - pad_left
-    return (pad_left, pad_right, pad_top, pad_bottom)
-def calculate_new_dimensions(orig_w, orig_h, divisor=8):
-    if orig_w == 0 or orig_h == 0:
-        return 512, 512
-    if orig_w >= orig_h:
-        aspect_ratio = orig_w / orig_h
-        new_h = 512
-        new_w = new_h * aspect_ratio
-    else:
-        aspect_ratio = orig_h / orig_w
-        new_w = 512
-        new_h = new_w * aspect_ratio
-    final_w = int(round(new_w / divisor)) * divisor
-    final_h = int(round(new_h / divisor)) * divisor
     final_w = max(divisor, final_w)
     final_h = max(divisor, final_h)
-    print(f"[Dimension Calc] Original: {orig_w}x{orig_h} -> Calculado: {new_w:.0f}x{new_h:.0f} -> Final (divisível por {divisor}): {final_w}x{final_h}")
-    return final_h, final_w
-def _gpu_process_table(processes: List[Dict], current_pid: int) -> str:
-    if not processes:
-        return "  - Processos ativos: (nenhum)\n"
-    processes = sorted(processes, key=lambda x: (x.get("used_mb") or 0), reverse=True)
-    lines = ["  - Processos ativos (PID | USER | NAME | VRAM MB):"]
-    for p in processes:
-        star = "*" if p["pid"] == current_pid else " "
-        used_str = str(p["used_mb"]) if p.get("used_mb") is not None else "N/A"
-        lines.append(f"    {star} {p['pid']} | {p['user']} | {p['name']} | {used_str}")
-    return "\n".join(lines) + "\n"
 def log_tensor_info(tensor, name="Tensor"):
     if not isinstance(tensor, torch.Tensor):
-        print(f"\n[INFO] '{name}' não é tensor.")
         return
     print(f"\n--- Tensor: {name} ---")
     print(f"  - Shape: {tuple(tensor.shape)}")
@@ -169,141 +124,88 @@ def log_tensor_info(tensor, name="Tensor"):
     print(f"  - Device: {tensor.device}")
     if tensor.numel() > 0:
         try:
-            print(f"  - Min: {tensor.min().item():.4f}  Max: {tensor.max().item():.4f}  Mean: {tensor.mean().item():.4f}")
-        except Exception:
-            pass
     print("------------------------------------------\n")
-add_deps_to_path()
-from ltx_video.pipelines.pipeline_ltx_video import ConditioningItem, LTXMultiScalePipeline
-from ltx_video.utils.skip_layer_strategy import SkipLayerStrategy
-from ltx_video.models.autoencoders.vae_encode import un_normalize_latents, normalize_latents
-from ltx_video.pipelines.pipeline_ltx_video import adain_filter_latent
-from api.ltx.inference import (
-    create_ltx_video_pipeline,
-    create_latent_upsampler,
-    load_image_to_tensor_with_resize_and_crop,
-    seed_everething,
-)
 class VideoService:
     def __init__(self):
         t0 = time.perf_counter()
-        print("[DEBUG] Inicializando VideoService...")
-        self.debug = os.getenv("LTXV_DEBUG", "1") == "1"
-        self.frame_log_every = int(os.getenv("LTXV_FRAME_LOG_EVERY", "8"))
-        self.config = self._load_config()
-        print(f"[DEBUG] Config carregada (precision={self.config.get('precision')}, sampler={self.config.get('sampler')})")
         self.device = "cuda" if torch.cuda.is_available() else "cpu"
-        print(f"[DEBUG] Device selecionado: {self.device}")
-        self.last_memory_reserved_mb = 0.0
-        self._tmp_dirs = set(); self._tmp_files = set(); self._last_outputs = []
         self.pipeline, self.latent_upsampler = self._load_models()
-        print(f"[DEBUG] Pipeline e Upsampler carregados. Upsampler ativo? {bool(self.latent_upsampler)}")
-        print(f"[DEBUG] Movendo modelos para {self.device}...")
         self.pipeline.to(self.device)
         if self.latent_upsampler:
             self.latent_upsampler.to(self.device)
         self._apply_precision_policy()
-        print(f"[DEBUG] runtime_autocast_dtype = {getattr(self, 'runtime_autocast_dtype', None)}")
         vae_manager_singleton.attach_pipeline(
             self.pipeline,
             device=self.device,
             autocast_dtype=self.runtime_autocast_dtype
         )
-        print(f"[DEBUG] VAE manager conectado: has_vae={hasattr(self.pipeline, 'vae')} device={self.device}")
         if self.device == "cuda":
             torch.cuda.empty_cache()
-            self._log_gpu_memory("Após carregar modelos")
-        print(f"[DEBUG] VideoService pronto. boot_time={time.perf_counter()-t0:.3f}s")
-    def _log_gpu_memory(self, stage_name: str):
-        if self.device != "cuda":
-            return
-        device_index = torch.cuda.current_device() if torch.cuda.is_available() else 0
-        current_reserved_b = torch.cuda.memory_reserved(device_index)
-        current_reserved_mb = current_reserved_b / (1024 ** 2)
-        total_memory_b = torch.cuda.get_device_properties(device_index).total_memory
-        total_memory_mb = total_memory_b / (1024 ** 2)
-        peak_reserved_mb = torch.cuda.max_memory_reserved(device_index) / (1024 ** 2)
-        delta_mb = current_reserved_mb - getattr(self, "last_memory_reserved_mb", 0.0)
-        processes = _query_gpu_processes_via_nvml(device_index) or _query_gpu_processes_via_nvidiasmi(device_index)
-        print(f"\n--- [LOG GPU] {stage_name} (cuda:{device_index}) ---")
-        print(f"  - Reservado: {current_reserved_mb:.2f} MB / {total_memory_mb:.2f} MB  (Δ={delta_mb:+.2f} MB)")
-        if peak_reserved_mb > getattr(self, "last_memory_reserved_mb", 0.0):
-            print(f"  - Pico reservado (nesta fase): {peak_reserved_mb:.2f} MB")
-        print(_gpu_process_table(processes, os.getpid()), end="")
-        print("--------------------------------------------------\n")
-        self.last_memory_reserved_mb = current_reserved_mb
-    def _register_tmp_dir(self, d: str):
-        if d and os.path.isdir(d):
-            self._tmp_dirs.add(d); print(f"[DEBUG] Registrado tmp dir: {d}")
-    def _register_tmp_file(self, f: str):
-        if f and os.path.exists(f):
-            self._tmp_files.add(f); print(f"[DEBUG] Registrado tmp file: {f}")
-    def finalize(self, keep_paths=None, extra_paths=None, clear_gpu=True):
-        print("[DEBUG] Finalize: iniciando limpeza...")
-        keep = set(keep_paths or []); extras = set(extra_paths or [])
-        removed_files = 0
-        for f in list(self._tmp_files | extras):
-            try:
-                if f not in keep and os.path.isfile(f):
-                    os.remove(f); removed_files += 1; print(f"[DEBUG] Removido arquivo tmp: {f}")
-            except Exception as e:
-                print(f"[DEBUG] Falha removendo arquivo {f}: {e}")
-            finally:
-                self._tmp_files.discard(f)
-        removed_dirs = 0
-        for d in list(self._tmp_dirs):
-            try:
-                if d not in keep and os.path.isdir(d):
-                    shutil.rmtree(d, ignore_errors=True); removed_dirs += 1; print(f"[DEBUG] Removido diretório tmp: {d}")
-            except Exception as e:
-                print(f"[DEBUG] Falha removendo diretório {d}: {e}")
-            finally:
-                self._tmp_dirs.discard(d)
-        print(f"[DEBUG] Finalize: arquivos removidos={removed_files}, dirs removidos={removed_dirs}")
-        gc.collect()
-        try:
-            if clear_gpu and torch.cuda.is_available():
-                torch.cuda.empty_cache()
-                try:
-                    torch.cuda.ipc_collect()
-                except Exception:
-                    pass
-        except Exception as e:
-            print(f"[DEBUG] Finalize: limpeza GPU falhou: {e}")
-        try:
-            self._log_gpu_memory("Após finalize")
-        except Exception as e:
-            print(f"[DEBUG] Log GPU pós-finalize falhou: {e}")
     def _load_config(self):
         base = LTX_VIDEO_REPO_DIR / "configs"
-        config_path = base / "ltxv-13b-0.9.8-distilled-fp8.yaml"
-        print(f"[DEBUG] Carregando config: {config_path}")
-        with open(config_path, "r") as file:
-            return yaml.safe_load(file)
     def _load_models(self):
         t0 = time.perf_counter()
         LTX_REPO = "Lightricks/LTX-Video"
         print("[DEBUG] Baixando checkpoint principal...")
-        distilled_model_path = hf_hub_download(repo_id=LTX_REPO, filename=self.config["checkpoint_path"])
         self.config["checkpoint_path"] = distilled_model_path
         print(f"[DEBUG] Checkpoint em: {distilled_model_path}")
         print("[DEBUG] Baixando upscaler espacial...")
-        spatial_upscaler_path = hf_hub_download(repo_id=LTX_REPO, filename=self.config["spatial_upscaler_model_path"])
         self.config["spatial_upscaler_model_path"] = spatial_upscaler_path
         print(f"[DEBUG] Upscaler em: {spatial_upscaler_path}")
@@ -312,7 +214,9 @@ class VideoService:
             ckpt_path=self.config["checkpoint_path"],
             precision=self.config["precision"],
             text_encoder_model_name_or_path=self.config["text_encoder_model_name_or_path"],
-            sampler=self.config["sampler"], device="cpu", enhance_prompt=False,
             prompt_enhancer_image_caption_model_name_or_path=self.config["prompt_enhancer_image_caption_model_name_or_path"],
             prompt_enhancer_llm_model_name_or_path=self.config["prompt_enhancer_llm_model_name_or_path"],
         )
@@ -326,248 +230,233 @@ class VideoService:
         print(f"[DEBUG] _load_models() tempo total={time.perf_counter()-t0:.3f}s")
         return pipeline, latent_upsampler
-    @torch.no_grad()
-    def _upsample_latents_internal(self, latents: torch.Tensor) -> torch.Tensor:
-        if not self.latent_upsampler:
-            raise ValueError("Latent Upsampler não está carregado.")
-        self.latent_upsampler.to(self.device)
-        self.pipeline.vae.to(self.device)
-        print(f"[DEBUG-UPSAMPLE] Shape de entrada: {tuple(latents.shape)}")
-        latents_unnormalized = un_normalize_latents(latents, self.pipeline.vae, vae_per_channel_normalize=True)
-        upsampled_latents = self.latent_upsampler(latents_unnormalized)
-        upsampled_latents_normalized = normalize_latents(upsampled_latents, self.pipeline.vae, vae_per_channel_normalize=True)
-        print(f"[DEBUG-UPSAMPLE] Shape de saída: {tuple(upsampled_latents_normalized.shape)}")
-        return upsampled_latents_normalized
     def _apply_precision_policy(self):
         prec = str(self.config.get("precision", "")).lower()
         self.runtime_autocast_dtype = torch.float32
-        print(f"[DEBUG] Aplicando política de precisão: {prec}")
-        if prec in ["float8_e4m3fn", "bfloat16"]:
             self.runtime_autocast_dtype = torch.bfloat16
-        elif prec == "mixed_precision":
             self.runtime_autocast_dtype = torch.float16
     def _prepare_conditioning_tensor(self, filepath, height, width, padding_values):
-        print(f"[DEBUG] Carregando condicionamento: {filepath}")
         tensor = load_image_to_tensor_with_resize_and_crop(filepath, height, width)
-        tensor = torch.nn.functional.pad(tensor, padding_values)
-        out = tensor.to(self.device, dtype=self.runtime_autocast_dtype)
-        print(f"[DEBUG] Cond shape={tuple(out.shape)} dtype={out.dtype} device={out.device}")
-        return out
-    def _concat_mp4s_no_reencode(self, mp4_list: List[str], out_path: str):
-        if not mp4_list:
-            raise ValueError("A lista de MP4s para concatenar está vazia.")
-        if len(mp4_list) == 1:
-            shutil.move(mp4_list[0], out_path)
-            print(f"[DEBUG] Apenas um vídeo, movido para: {out_path}")
-            return
-        with tempfile.NamedTemporaryFile("w", delete=False, suffix=".txt") as f:
-            for mp4 in mp4_list:
-                f.write(f"file '{os.path.abspath(mp4)}'\n")
-            list_path = f.name
-        cmd = f"ffmpeg -y -f concat -safe 0 -i {list_path} -c copy {out_path}"
-        print(f"[DEBUG] Concat: {cmd}")
-        try:
-            subprocess.check_call(shlex.split(cmd))
-        finally:
-            os.remove(list_path)
-    def _save_and_log_video(self, pixel_tensor, base_filename, fps, temp_dir, results_dir, used_seed, progress_callback=None):
-        """Função auxiliar para salvar um tensor de pixels em um arquivo MP4."""
-        output_path = os.path.join(temp_dir, f"{base_filename}_{used_seed}.mp4")
-        video_encode_tool_singleton.save_video_from_tensor(
-            pixel_tensor, output_path, fps=fps, progress_callback=progress_callback
-        )
-        final_path = os.path.join(results_dir, f"{base_filename}_{used_seed}.mp4")
-        shutil.move(output_path, final_path)
-        print(f"[DEBUG] Vídeo salvo em: {final_path}")
-        return final_path
-    # ==============================================================================
-    # --- NOVAS FUNÇÕES MODULARES ---
-    # ==============================================================================
-    def prepare_condition_items(self, items_list: List, height: int, width: int, num_frames: int):
-        if not items_list:
-            return []
-        height_padded = ((height - 1) // 8 + 1) * 8
-        width_padded = ((width - 1) // 8 + 1) * 8
-        padding_values = calculate_padding(height, width, height_padded, width_padded)
-        conditioning_items = []
-        print("\n--- Preparando Itens de Condicionamento ---")
-        for item in items_list:
-            media, frame, weight = item
-            if isinstance(media, str):
-                print(f"  - Carregando imagem: {media} para o frame {frame}")
-                tensor = self._prepare_conditioning_tensor(media, height, width, padding_values)
-            elif isinstance(media, torch.Tensor):
-                print(f"  - Usando tensor fornecido para o frame {frame}")
-                tensor = media.to(self.device, dtype=self.runtime_autocast_dtype)
-            else:
-                warnings.warn(f"Tipo de item desconhecido: {type(media)}. Ignorando.")
-                continue
-            safe_frame = max(0, min(int(frame), num_frames - 1))
-            conditioning_items.append(ConditioningItem(tensor, safe_frame, float(weight)))
-        print(f"Total de itens de condicionamento preparados: {len(conditioning_items)}")
-        return conditioning_items
-    def generate_low(self, prompt, negative_prompt, height, width, duration, guidance_scale, seed, conditioning_items=None):
-        print("\n--- INICIANDO ETAPA 1: GERAÇÃO EM BAIXA RESOLUÇÃO ---")
-        self._log_gpu_memory("Início da Geração Low-Res")
-        used_seed = random.randint(0, 2**32 - 1) if seed is None else int(seed)
-        seed_everething(used_seed)
-        FPS = 24.0
-        target_frames = round(duration * FPS)
-        actual_num_frames = max(9, int(round((target_frames - 1) / 8.0) * 8 + 1))
-        height_padded = ((height - 1) // 8 + 1) * 8
-        width_padded = ((width - 1) // 8 + 1) * 8
-        generator = torch.Generator(device=self.device).manual_seed(used_seed)
-        temp_dir = tempfile.mkdtemp(prefix="ltxv_low_"); self._register_tmp_dir(temp_dir)
-        results_dir = "/app/output"; os.makedirs(results_dir, exist_ok=True)
-        downscale_factor = self.config.get("downscale_factor", 0.6666666)
-        vae_scale_factor = self.pipeline.vae_scale_factor
-        # --- <INÍCIO DA LÓGICA DE CÁLCULO EXATA> ---
-        # Replica a fórmula da LTXMultiScalePipeline
-        x_width = int(width_padded * downscale_factor)
-        downscaled_width = x_width - (x_width % vae_scale_factor)
-        x_height = int(height_padded * downscale_factor)
-        downscaled_height = x_height - (x_height % vae_scale_factor)
-        print(f"[DEBUG] First Pass Dims: Original Pad ({width_padded}x{height_padded}) -> Downscaled ({downscaled_width}x{downscaled_height})")
-        # --- <FIM DA LÓGICA DE CÁLCULO EXATA> ---
-        first_pass_kwargs = {
-            "prompt": prompt, "negative_prompt": negative_prompt, "height": downscaled_height, "width": downscaled_width,
-            "num_frames": actual_num_frames, "frame_rate": int(FPS), "generator": generator, "output_type": "latent",
-            "conditioning_items": conditioning_items, "guidance_scale": float(guidance_scale),
-            **(self.config.get("first_pass", {}))
-        }
-        with torch.autocast(device_type="cuda", dtype=self.runtime_autocast_dtype, enabled=self.device == 'cuda'):
-            latents = self.pipeline(**first_pass_kwargs).images
-            log_tensor_info(latents, "Latentes Low-Res Gerados")
-            pixel_tensor = vae_manager_singleton.decode(latents.clone(), decode_timestep=float(self.config.get("decode_timestep", 0.05)))
-            video_path = self._save_and_log_video(pixel_tensor, "low_res_video", FPS, temp_dir, results_dir, used_seed)
-            del pixel_tensor
-            latents_cpu = latents.detach().to("cpu")
-            tensor_path = os.path.join(results_dir, f"latents_low_res_{used_seed}.pt")
-            torch.save(latents_cpu, tensor_path)
-            print(f"[DEBUG] Tensor latente de baixa resolução salvo em: {tensor_path}")
-        self._log_gpu_memory("Fim da Geração Low-Res")
-        return video_path, tensor_path, used_seed
-    def generate_upscale_denoise(self, latents_path, prompt, negative_prompt, guidance_scale, seed):
-        print("\n--- INICIANDO ETAPA 2: UPSCALE E REFINAMENTO ---")
-        self._log_gpu_memory("Início do Upscale/Denoise")
-        used_seed = random.randint(0, 2**32 - 1) if seed is None else int(seed)
-        seed_everething(used_seed)
-        temp_dir = tempfile.mkdtemp(prefix="ltxv_up_"); self._register_tmp_dir(temp_dir)
-        results_dir = "/app/output"; os.makedirs(results_dir, exist_ok=True)
-        latents_low = torch.load(latents_path).to(self.device)
-        log_tensor_info(latents_low, "Latentes Low-Res Carregados")
-        with torch.autocast(device_type="cuda", dtype=self.runtime_autocast_dtype, enabled=self.device == 'cuda'):
-            upsampled_latents = self._upsample_latents_internal(latents_low)
-            upsampled_latents = adain_filter_latent(latents=upsampled_latents, reference_latents=latents_low)
-            del latents_low; torch.cuda.empty_cache()
-            total_frames = upsampled_latents.shape[2]
-            mid_point = total_frames // 2
-            chunk1 = upsampled_latents[:, :, :mid_point, :, :]
-            chunk2 = upsampled_latents[:, :, mid_point:, :, :]
-            final_latents_list = []
-            for i, chunk in enumerate([chunk1, chunk2]):
-                if chunk.shape[2] == 0: continue
-                print(f"  - Refinando chunk {i+1}/{2} com {chunk.shape[2]} frames")
-                second_pass_height = chunk.shape[3] * self.pipeline.vae_scale_factor
-                second_pass_width = chunk.shape[4] * self.pipeline.vae_scale_factor
-                second_pass_kwargs = {
-                    "prompt": prompt, "negative_prompt": negative_prompt, "height": second_pass_height, "width": second_pass_width,
-                    "num_frames": chunk.shape[2], "latents": chunk, "guidance_scale": float(guidance_scale),
-                    "output_type": "latent", "generator": torch.Generator(device=self.device).manual_seed(used_seed),
-                    **(self.config.get("second_pass", {}))
-                }
-                refined_chunk = self.pipeline(**second_pass_kwargs).images
-                final_latents_list.append(refined_chunk.detach().clone())
-            del upsampled_latents, chunk1, chunk2; torch.cuda.empty_cache()
-            final_latents = torch.cat(final_latents_list, dim=2)
-            log_tensor_info(final_latents, "Latentes Upscaled/Refinados Finais")
-            latents_cpu = final_latents.detach().to("cpu")
-            tensor_path = os.path.join(results_dir, f"latents_refined_{used_seed}.pt")
-            torch.save(latents_cpu, tensor_path)
-            pixel_tensor = vae_manager_singleton.decode(final_latents, decode_timestep=float(self.config.get("decode_timestep", 0.05)))
-            video_path = self._save_and_log_video(pixel_tensor, "refined_video", 24.0, temp_dir, results_dir, used_seed)
-            del pixel_tensor, final_latents
-        self._log_gpu_memory("Fim do Upscale/Denoise")
-        return video_path, tensor_path
-    def encode_mp4(self, latents_path: str, fps: int = 24):
-        print("\n--- INICIANDO ETAPA 3: DECODIFICAÇÃO FINAL ---")
-        self._log_gpu_memory("Início do Encode MP4")
-        latents = torch.load(latents_path)
-        seed = random.randint(0, 99999)
-        temp_dir = tempfile.mkdtemp(prefix="ltxv_enc_"); self._register_tmp_dir(temp_dir)
-        results_dir = "/app/output"; os.makedirs(results_dir, exist_ok=True)
-        total_frames = latents.shape[2]
-        mid_point = total_frames // 2
-        chunk1_latents = latents[:, :, :mid_point, :, :]
-        chunk2_latents = latents[:, :, mid_point:, :, :]
-        video_parts = []
-        with torch.autocast(device_type="cuda", dtype=self.runtime_autocast_dtype, enabled=self.device == 'cuda'):
-            for i, chunk in enumerate([chunk1_latents, chunk2_latents]):
-                if chunk.shape[2] == 0: continue
-                print(f"  - Decodificando chunk {i+1}/{2}")
-                pixel_chunk = vae_manager_singleton.decode(chunk.to(self.device), decode_timestep=float(self.config.get("decode_timestep", 0.05)))
-                part_path = os.path.join(temp_dir, f"part_{i}.mp4")
-                video_encode_tool_singleton.save_video_from_tensor(pixel_chunk, part_path, fps=fps)
-                video_parts.append(part_path)
-                del pixel_chunk; torch.cuda.empty_cache()
-        final_video_path = os.path.join(results_dir, f"final_concatenated_{seed}.mp4")
-        self._concat_mp4s_no_reencode(video_parts, final_video_path)
-        print(f"Encode final concluído: {final_video_path}")
-        self._log_gpu_memory("Fim do Encode MP4")
-        return final_video_path
-# --- INSTANCIAÇÃO DO SERVIÇO ---
-print("Criando instância do VideoService. O carregamento do modelo começará agora...")
-video_generation_service = VideoService()
-print("Instância do VideoService pronta para uso.")

+# ltx_server.py — VideoService (beta 1.2 - Robusto e Completo)
+# DESCRIÇÃO:
+# - Servidor de geração de vídeo com pipeline de 2 passes para melhoria de textura.
+# - Gerenciamento de memória robusto com limpeza garantida via `finalize()`.
+# - Cálculo de dimensões inteligente para preservar a proporção e evitar erros.
+# - Suporte para divisão de tarefas longas em chunks para evitar OOM (Out of Memory).
+# - Concatenação de chunks com transições suaves (crossfade) para um resultado contínuo.
+# --- 0. WARNINGS, IMPORTS E CONFIGURAÇÃO DE AMBIENTE ---
 import warnings
 warnings.filterwarnings("ignore", category=UserWarning)
 warnings.filterwarnings("ignore", category=FutureWarning)
+from huggingface_hub import logging as hf_logging, hf_hub_download
+hf_logging.set_verbosity_error()
 import os
 import sys
 import subprocess
+import shlex
+import tempfile
 import gc
 import shutil
 import contextlib
 import time
 import traceback
+import json
+import yaml
+import random
+from typing import List, Dict
+from pathlib import Path
+import torch
 import torch.nn.functional as F
+import numpy as np
+import imageio
+from PIL import Image
+from einops import rearrange
+# --- Variáveis de Ambiente e Constantes ---
+LTXV_DEBUG = os.getenv("LTXV_DEBUG", "1") == "1"
+LTXV_FRAME_LOG_EVERY = int(os.getenv("LTXV_FRAME_LOG_EVERY", "8"))
 DEPS_DIR = Path("/data")
 LTX_VIDEO_REPO_DIR = DEPS_DIR / "LTX-Video"
+# --- 1. SETUP E GERENCIAMENTO DE DEPENDÊNCIAS ---
 def run_setup():
+    """Executa o script de setup para clonar dependências se necessário."""
     setup_script_path = "setup.py"
     if not os.path.exists(setup_script_path):
         print("[DEBUG] 'setup.py' não encontrado. Pulando clonagem de dependências.")
         return
     try:
+        print("[DEBUG] Executando setup.py para instalar dependências...")
+        subprocess.run([sys.executable, setup_script_path], check=True, capture_output=True, text=True)
         print("[DEBUG] Setup concluído com sucesso.")
     except subprocess.CalledProcessError as e:
+        print(f"[ERROR] Falha crítica ao executar setup.py (código {e.returncode}).\nOutput:\n{e.stdout}\n{e.stderr}")
         sys.exit(1)
 def add_deps_to_path():
+    """Adiciona o diretório do repositório ao sys.path para importação dos módulos."""
     repo_path = str(LTX_VIDEO_REPO_DIR.resolve())
+    if repo_path not in sys.path:
         sys.path.insert(0, repo_path)
+        print(f"[DEBUG] Repositório LTX-Video adicionado ao sys.path: {repo_path}")
+# Executa a configuração inicial ao carregar o script
+if not LTX_VIDEO_REPO_DIR.exists():
+    print(f"[INFO] Repositório não encontrado em {LTX_VIDEO_REPO_DIR}. Executando setup...")
+    run_setup()
+add_deps_to_path()
+# --- Importações que dependem do sys.path modificado ---
+from managers.vae_manager import vae_manager_singleton
+from tools.video_encode_tool import video_encode_tool_singleton
+from ltx_video.pipelines.pipeline_ltx_video import ConditioningItem, LTXMultiScalePipeline, adain_filter_latent
+from ltx_video.utils.skip_layer_strategy import SkipLayerStrategy
+from ltx_video.models.autoencoders.vae_encode import un_normalize_latents, normalize_latents
+from api.ltx.inference import (
+    create_ltx_video_pipeline, create_latent_upsampler,
+    load_image_to_tensor_with_resize_and_crop, seed_everething,
+    calculate_padding, load_media_file
+)
+# --- 2. FUNÇÕES UTILITÁRIAS INTELIGENTES ---
+def calculate_new_dimensions(orig_w, orig_h, target_area=512*768, divisor=8):
+    """
+    [FUNÇÃO INTELIGENTE]
+    Calcula novas dimensões mantendo a proporção original, garantindo que ambos
+    os lados sejam múltiplos do divisor. Visa uma 'área alvo' para manter o
+    uso de VRAM consistente e previsível.
+    """
+    if orig_w <= 0 or orig_h <= 0:
+        print(f"[WARN] Dimensões originais inválidas ({orig_w}x{orig_h}). Usando padrão 512x768.")
+        return 512, 768
+    aspect_ratio = orig_w / orig_h
+    new_h = int((target_area / aspect_ratio)**0.5)
+    new_w = int(new_h * aspect_ratio)
+    final_w = round(new_w / divisor) * divisor
+    final_h = round(new_h / divisor) * divisor
     final_w = max(divisor, final_w)
     final_h = max(divisor, final_h)
+    if LTXV_DEBUG:
+        print(f"[Dimension Calc] Original: {orig_w}x{orig_h} (AR: {aspect_ratio:.2f}) -> "
+              f"Calculado: {new_w}x{new_h} -> Final (múltiplo de {divisor}): {final_w}x{final_h}")
+    return final_h, final_w
 def log_tensor_info(tensor, name="Tensor"):
+    """Exibe informações detalhadas sobre um tensor para depuração."""
+    if not LTXV_DEBUG: return
     if not isinstance(tensor, torch.Tensor):
+        print(f"\n[INFO] '{name}' não é um tensor.")
         return
     print(f"\n--- Tensor: {name} ---")
     print(f"  - Shape: {tuple(tensor.shape)}")
     print(f"  - Device: {tensor.device}")
     if tensor.numel() > 0:
         try:
+            print(f"  - Stats: Min={tensor.min().item():.4f}, Max={tensor.max().item():.4f}, Mean={tensor.mean().item():.4f}")
+        except Exception as e:
+            print(f"  - Stats: Falha ao calcular estatísticas - {e}")
     print("------------------------------------------\n")
+# --- 3. CLASSE PRINCIPAL DO SERVIÇO DE VÍDEO ---
 class VideoService:
     def __init__(self):
         t0 = time.perf_counter()
+        print("[INFO] Inicializando VideoService...")
         self.device = "cuda" if torch.cuda.is_available() else "cpu"
+        self.config = self._load_config()
+        print(f"[INFO] Config carregada (precision={self.config.get('precision')}, sampler={self.config.get('sampler')})")
+        print(f"[INFO] Dispositivo selecionado: {self.device}")
+        self._tmp_dirs = set()
+        self._tmp_files = set()
         self.pipeline, self.latent_upsampler = self._load_models()
+        print("[INFO] Movendo modelos para o dispositivo...")
         self.pipeline.to(self.device)
         if self.latent_upsampler:
             self.latent_upsampler.to(self.device)
         self._apply_precision_policy()
         vae_manager_singleton.attach_pipeline(
             self.pipeline,
             device=self.device,
             autocast_dtype=self.runtime_autocast_dtype
         )
+        print("[INFO] VAE manager conectado ao pipeline.")
         if self.device == "cuda":
             torch.cuda.empty_cache()
+        print(f"[SUCCESS] VideoService pronto. Tempo de inicialização: {time.perf_counter()-t0:.2f}s")
+    # --- MÉTODOS INTERNOS: INICIALIZAÇÃO E SETUP ---
     def _load_config(self):
+        """Carrega o arquivo de configuração YAML do modelo."""
         base = LTX_VIDEO_REPO_DIR / "configs"
+        # Tenta carregar a configuração mais provável, com fallbacks
+        candidates = [
+            base / "ltxv-13b-0.9.8-dev-fp8.yaml",
+            base / "ltxv-13b-0.9.8-distilled-fp8.yaml",
+            base / "ltxv-13b-0.9.8-distilled.yaml",
+        ]
+        for cfg_path in candidates:
+            if cfg_path.exists():
+                print(f"[DEBUG] Configuração encontrada e selecionada: {cfg_path}")
+                with open(cfg_path, "r") as file:
+                    return yaml.safe_load(file)
+        raise FileNotFoundError(f"Nenhum arquivo de configuração YAML encontrado em {base}. Verifique a instalação.")
     def _load_models(self):
         t0 = time.perf_counter()
         LTX_REPO = "Lightricks/LTX-Video"
         print("[DEBUG] Baixando checkpoint principal...")
+        distilled_model_path = hf_hub_download(
+            repo_id=LTX_REPO,
+            filename=self.config["checkpoint_path"],
+            local_dir=os.getenv("HF_HOME"),
+            cache_dir=os.getenv("HF_HOME_CACHE"),
+            token=os.getenv("HF_TOKEN"),
+        )
         self.config["checkpoint_path"] = distilled_model_path
         print(f"[DEBUG] Checkpoint em: {distilled_model_path}")
         print("[DEBUG] Baixando upscaler espacial...")
+        spatial_upscaler_path = hf_hub_download(
+            repo_id=LTX_REPO,
+            filename=self.config["spatial_upscaler_model_path"],
+            local_dir=os.getenv("HF_HOME"),
+            cache_dir=os.getenv("HF_HOME_CACHE"),
+            token=os.getenv("HF_TOKEN")
+        )
         self.config["spatial_upscaler_model_path"] = spatial_upscaler_path
         print(f"[DEBUG] Upscaler em: {spatial_upscaler_path}")
             ckpt_path=self.config["checkpoint_path"],
             precision=self.config["precision"],
             text_encoder_model_name_or_path=self.config["text_encoder_model_name_or_path"],
+            sampler=self.config["sampler"],
+            device="cpu",
+            enhance_prompt=False,
             prompt_enhancer_image_caption_model_name_or_path=self.config["prompt_enhancer_image_caption_model_name_or_path"],
             prompt_enhancer_llm_model_name_or_path=self.config["prompt_enhancer_llm_model_name_or_path"],
         )
         print(f"[DEBUG] _load_models() tempo total={time.perf_counter()-t0:.3f}s")
         return pipeline, latent_upsampler
     def _apply_precision_policy(self):
+        """Define o dtype a ser usado pelo autocast com base na configuração."""
         prec = str(self.config.get("precision", "")).lower()
         self.runtime_autocast_dtype = torch.float32
+        if "bfloat16" in prec or "fp8" in prec:
             self.runtime_autocast_dtype = torch.bfloat16
+        elif "mixed_precision" in prec or "fp16" in prec:
             self.runtime_autocast_dtype = torch.float16
+        print(f"[INFO] Política de precisão aplicada. Dtype para Autocast: {self.runtime_autocast_dtype}")
+    # --- MÉTODOS INTERNOS: OPERAÇÕES DE TENSOR E VÍDEO ---
     def _prepare_conditioning_tensor(self, filepath, height, width, padding_values):
+        """Carrega uma imagem, redimensiona, aplica padding e move para o dispositivo correto."""
         tensor = load_image_to_tensor_with_resize_and_crop(filepath, height, width)
+        tensor = F.pad(tensor, padding_values)
+        return tensor.to(self.device, dtype=self.runtime_autocast_dtype)
+    @torch.no_grad()
+    def _upsample_latents_internal(self, latents: torch.Tensor) -> torch.Tensor:
+        """Lógica de upscale de latentes, garantindo que os modelos estejam no dispositivo correto."""
+        if not self.latent_upsampler:
+            raise ValueError("Latent Upsampler não está carregado, mas foi solicitado.")
+        # Garante que o VAE e o upsampler estejam no dispositivo correto para a operação
+        self.latent_upsampler.to(self.device)
+        self.pipeline.vae.to(self.device)
+        latents_up = un_normalize_latents(latents, self.pipeline.vae, vae_per_channel_normalize=True)
+        latents_up = self.latent_upsampler(latents_up)
+        latents_up = normalize_latents(latents_up, self.pipeline.vae, vae_per_channel_normalize=True)
+        return latents_up
+    # --- MÉTODO PRINCIPAL DE LIMPEZA ---
+    def finalize(self, keep_paths=None, clear_gpu=True):
+        """
+        [FUNÇÃO INTELIGENTE]
+        Limpeza robusta para garantir a liberação de recursos de disco e GPU,
+        mesmo em caso de falhas, prevenindo memory leaks.
+        """
+        print("[INFO] Finalize: iniciando limpeza de recursos...")
+        keep = set(keep_paths or [])
+        files_to_clean, dirs_to_clean = list(self._tmp_files), list(self._tmp_dirs)
+        removed_files, removed_dirs = 0, 0
+        for f in files_to_clean:
+            try:
+                if f not in keep and os.path.isfile(f):
+                    os.remove(f); removed_files += 1
+            except OSError as e:
+                print(f"[WARN] Falha ao remover arquivo temporário {f}: {e}")
+            finally:
+                self._tmp_files.discard(f)
+        for d in dirs_to_clean:
+            try:
+                if d not in keep and os.path.isdir(d):
+                    shutil.rmtree(d, ignore_errors=True); removed_dirs += 1
+            except OSError as e:
+                print(f"[WARN] Falha ao remover diretório temporário {d}: {e}")
+            finally:
+                self._tmp_dirs.discard(d)
+        if LTXV_DEBUG:
+            print(f"[DEBUG] Limpeza de disco: {removed_files} arquivos e {removed_dirs} diretórios removidos.")
+        gc.collect()
+        if clear_gpu and self.device == "cuda":
+            try:
+                torch.cuda.empty_cache()
+                torch.cuda.ipc_collect()
+                if LTXV_DEBUG: print("[DEBUG] Limpeza da GPU concluída com sucesso.")
+            except Exception as e:
+                print(f"[ERROR] Falha crítica durante a limpeza da GPU: {e}")
+    # ==============================================================================
+    # --- FUNÇÃO PRINCIPAL DE GERAÇÃO (generate) ---
+    # ==============================================================================
+    @torch.no_grad()
+    def generate(
+        self,
+        prompt: str,
+        negative_prompt: str = "",
+        mode: str = "text-to-video",
+        start_image_filepath: str = None,
+        height: int = 512,
+        width: int = 704,
+        duration: float = 2.0,
+        seed: int = 42,
+        randomize_seed: bool = True,
+        guidance_scale: float = 3.0,
+        improve_texture: bool = True,
+    ):
+        output_path, final_seed = None, None
+        try:
+            t_all = time.perf_counter()
+            print(f"\n{'='*20} INICIANDO NOVA GERAÇÃO {'='*20}")
+            if self.device == "cuda":
+                torch.cuda.empty_cache()
+            # --- 1. Setup da Geração (parâmetros, seed, dimensões) ---
+            if mode == "image-to-video" and not start_image_filepath:
+                raise ValueError("Imagem de início é obrigatória para o modo 'image-to-video'")
+            final_seed = random.randint(0, 2**32 - 1) if randomize_seed else int(seed)
+            seed_everething(final_seed)
+            print(f"[INFO] Geração com Seed: {final_seed}")
+            FPS = 24.0; MAX_NUM_FRAMES = 2570
+            target_frames_rounded = round(duration * FPS)
+            n_val = round((float(target_frames_rounded) - 1.0) / 8.0)
+            actual_num_frames = max(9, min(MAX_NUM_FRAMES, int(n_val * 8 + 1)))
+            height_padded = ((height - 1) // 8 + 1) * 8
+            width_padded = ((width - 1) // 8 + 1) * 8
+            padding_values = calculate_padding(height, width, height_padded, width_padded)
+            generator = torch.Generator(device=self.device).manual_seed(final_seed)
+            temp_dir = tempfile.mkdtemp(prefix="ltxv_")
+            self._tmp_dirs.add(temp_dir)
+            # --- 2. Preparação dos Tensores de Condicionamento ---
+            conditioning_items = []
+            if mode == "image-to-video" and start_image_filepath:
+                start_tensor = self._prepare_conditioning_tensor(start_image_filepath, height, width, padding_values)
+                conditioning_items.append(ConditioningItem(start_tensor, 0, 1.0))
+            # --- 3. Construção dos Argumentos da Pipeline ---
+            call_kwargs = self.config.get("pipeline_defaults", {}).copy() # Carrega defaults do YAML
+            call_kwargs.update({
+                "prompt": prompt, "negative_prompt": negative_prompt,
+                "height": height_padded, "width": width_padded,
+                "num_frames": actual_num_frames, "frame_rate": int(FPS),
+                "generator": generator, "output_type": "latent",
+                "conditioning_items": conditioning_items or None,
+                "guidance_scale": float(guidance_scale),
+            })
+            # --- 4. Lógica de Geração (Pipeline de 1 ou 2 passes) ---
+            final_latents = None
+            ctx = torch.autocast(device_type="cuda", dtype=self.runtime_autocast_dtype)
+            with ctx:
+                if improve_texture:
+                    print("[INFO] Iniciando pipeline de 2 passes para melhoria de textura.")
+                    # ETAPA 1: Geração Base em Baixa Resolução
+                    downscale_factor = self.config.get("downscale_factor", 0.5)
+                    target_low_res_area = (width * height) * (downscale_factor**2)
+                    downscaled_h, downscaled_w = calculate_new_dimensions(width, height, target_area=target_low_res_area)
+                    first_pass_kwargs = call_kwargs.copy()
+                    first_pass_kwargs.update(self.config.get("first_pass", {}))
+                    first_pass_kwargs.update({"width": downscaled_w, "height": downscaled_h, "guidance_scale": float(guidance_scale)})
+                    base_latents = self.pipeline(**first_pass_kwargs).images
+                    log_tensor_info(base_latents, "Latentes Base (Passo 1)")
+                    # ETAPA 2: Upscale e Refinamento
+                    upsampled_latents = self._upsample_latents_internal(base_latents)
+                    del base_latents; gc.collect(); torch.cuda.empty_cache()
+                    second_pass_kwargs = call_kwargs.copy()
+                    second_pass_kwargs.update(self.config.get("second_pass", {}))
+                    second_pass_kwargs.update({"latents": upsampled_latents, "guidance_scale": float(guidance_scale)})
+                    final_latents = self.pipeline(**second_pass_kwargs).images
+                    log_tensor_info(final_latents, "Latentes Finais (Passo 2)")
+                else:
+                    print("[INFO] Iniciando pipeline de 1 passe.")
+                    final_latents = self.pipeline(**call_kwargs).images
+                    log_tensor_info(final_latents, "Latentes Finais (Passe Único)")
+            # --- 5. Decodificação, Codificação de Vídeo e Finalização ---
+            print("[INFO] Decodificando latentes para pixels com VAE...")
+            pixel_tensor = vae_manager_singleton.decode(
+                final_latents.to(self.device),
+                decode_timestep=float(self.config.get("decode_timestep", 0.05))
+            )
+            del final_latents; gc.collect(); torch.cuda.empty_cache()
+            output_video_path_tmp = os.path.join(temp_dir, f"output_{final_seed}.mp4")
+            print(f"[INFO] Codificando vídeo final para: {output_video_path_tmp}")
+            video_encode_tool_singleton.save_video_from_tensor(
+               pixel_tensor, output_video_path_tmp, fps=call_kwargs["frame_rate"]
+            )
+            del pixel_tensor
+            results_dir = "/app/output"
+            os.makedirs(results_dir, exist_ok=True)
+            output_path = os.path.join(results_dir, f"final_video_{final_seed}.mp4")
+            shutil.move(output_video_path_tmp, output_path)
+            print(f"[SUCCESS] Geração concluída em {time.perf_counter() - t_all:.2f}s. Vídeo salvo em: {output_path}")
+            return output_path, final_seed
+        except Exception as e:
+            print(f"[FATAL ERROR] A geração falhou: {type(e).__name__} - {e}")
+            traceback.print_exc()
+            raise
+        finally:
+            print("[INFO] Executando limpeza final da tarefa...")
+            self.finalize(keep_paths=[output_path] if output_path else [])
+# --- Ponto de Entrada Principal ---
+if __name__ == "__main__":
+    print("Iniciando carregamento do VideoService...")
+    video_generation_service = VideoService()
+    print("\n[INFO] VideoService carregado e pronto para receber tarefas.")
+    # Exemplo de como chamar a geração (pode ser substituído por uma API)
+    try:
+        video_generation_service.generate(
+            prompt="A cinematic shot of a panda drinking bubble tea in a Tokyo cafe",
+            negative_prompt="blurry, low quality, cartoon",
+            duration=3.0,
+            improve_texture=True
+        )
+    except Exception as e:
+        print("\n[MAIN] Exemplo de geração falhou. O servidor ainda está de pé, mas verifique o erro acima.")