Eueuiaa commited on
Commit
8e066ce
·
verified ·
1 Parent(s): 9641c9e

Update api/ltx_server_refactored.py

Browse files
Files changed (1) hide show
  1. api/ltx_server_refactored.py +337 -448
api/ltx_server_refactored.py CHANGED
@@ -1,167 +1,122 @@
1
- # ltx_server_refactored.py — VideoService (Modular Version with Exact Dimension Calculation)
 
 
 
 
 
 
 
 
2
 
3
- # --- 0. WARNINGS E AMBIENTE ---
4
  import warnings
5
  warnings.filterwarnings("ignore", category=UserWarning)
6
  warnings.filterwarnings("ignore", category=FutureWarning)
7
- warnings.filterwarnings("ignore", message=".*")
8
- from huggingface_hub import logging
9
- logging.set_verbosity_error()
10
- logging.set_verbosity_warning()
11
- logging.set_verbosity_info()
12
- logging.set_verbosity_debug()
13
- LTXV_DEBUG=1
14
- LTXV_FRAME_LOG_EVERY=8
15
- import os, subprocess, shlex, tempfile
16
- import torch
17
- import json
18
- import numpy as np
19
- import random
20
  import os
21
- import shlex
22
- import yaml
23
- from typing import List, Dict
24
- from pathlib import Path
25
- import imageio
26
- from PIL import Image
27
- import tempfile
28
- from huggingface_hub import hf_hub_download
29
  import sys
30
  import subprocess
 
 
31
  import gc
32
  import shutil
33
  import contextlib
34
  import time
35
  import traceback
36
- from einops import rearrange
 
 
 
 
 
 
37
  import torch.nn.functional as F
38
- from managers.vae_manager import vae_manager_singleton
39
- from tools.video_encode_tool import video_encode_tool_singleton
 
 
 
 
 
 
40
  DEPS_DIR = Path("/data")
41
  LTX_VIDEO_REPO_DIR = DEPS_DIR / "LTX-Video"
42
 
 
 
43
  def run_setup():
 
44
  setup_script_path = "setup.py"
45
  if not os.path.exists(setup_script_path):
46
  print("[DEBUG] 'setup.py' não encontrado. Pulando clonagem de dependências.")
47
  return
48
  try:
49
- print("[DEBUG] Executando setup.py para dependências...")
50
- subprocess.run([sys.executable, setup_script_path], check=True)
51
  print("[DEBUG] Setup concluído com sucesso.")
52
  except subprocess.CalledProcessError as e:
53
- print(f"[DEBUG] ERRO no setup.py (code {e.returncode}). Abortando.")
54
  sys.exit(1)
55
-
56
- if not LTX_VIDEO_REPO_DIR.exists():
57
- print(f"[DEBUG] Repositório não encontrado em {LTX_VIDEO_REPO_DIR}. Rodando setup...")
58
- run_setup()
59
 
60
  def add_deps_to_path():
 
61
  repo_path = str(LTX_VIDEO_REPO_DIR.resolve())
62
- if str(LTX_VIDEO_REPO_DIR.resolve()) not in sys.path:
63
  sys.path.insert(0, repo_path)
64
- print(f"[DEBUG] Repo adicionado ao sys.path: {repo_path}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
65
 
66
- def _query_gpu_processes_via_nvml(device_index: int) -> List[Dict]:
67
- try:
68
- import psutil
69
- import pynvml as nvml
70
- nvml.nvmlInit()
71
- handle = nvml.nvmlDeviceGetHandleByIndex(device_index)
72
- try:
73
- procs = nvml.nvmlDeviceGetComputeRunningProcesses_v3(handle)
74
- except Exception:
75
- procs = nvml.nvmlDeviceGetComputeRunningProcesses(handle)
76
- results = []
77
- for p in procs:
78
- pid = int(p.pid)
79
- used_mb = None
80
- try:
81
- if getattr(p, "usedGpuMemory", None) is not None and p.usedGpuMemory not in (0,):
82
- used_mb = max(0, int(p.usedGpuMemory) // (1024 * 1024))
83
- except Exception:
84
- used_mb = None
85
- name = "unknown"
86
- user = "unknown"
87
- try:
88
- import psutil
89
- pr = psutil.Process(pid)
90
- name = pr.name()
91
- user = pr.username()
92
- except Exception:
93
- pass
94
- results.append({"pid": pid, "name": name, "user": user, "used_mb": used_mb})
95
- nvml.nvmlShutdown()
96
- return results
97
- except Exception:
98
- return []
99
-
100
- def _query_gpu_processes_via_nvidiasmi(device_index: int) -> List[Dict]:
101
- cmd = f"nvidia-smi -i {device_index} --query-compute-apps=pid,process_name,used_memory --format=csv,noheader,nounits"
102
- try:
103
- out = subprocess.check_output(shlex.split(cmd), stderr=subprocess.STDOUT, text=True, timeout=2.0)
104
- except Exception:
105
- return []
106
- results = []
107
- for line in out.strip().splitlines():
108
- parts = [p.strip() for p in line.split(",")]
109
- if len(parts) >= 3:
110
- try:
111
- pid = int(parts[0]); name = parts[1]; used_mb = int(parts[2])
112
- user = "unknown"
113
- try:
114
- import psutil
115
- pr = psutil.Process(pid)
116
- user = pr.username()
117
- except Exception:
118
- pass
119
- results.append({"pid": pid, "name": name, "user": user, "used_mb": used_mb})
120
- except Exception:
121
- continue
122
- return results
123
-
124
- def calculate_padding(orig_h, orig_w, target_h, target_w):
125
- pad_h = target_h - orig_h
126
- pad_w = target_w - orig_w
127
- pad_top = pad_h // 2
128
- pad_bottom = pad_h - pad_top
129
- pad_left = pad_w // 2
130
- pad_right = pad_w - pad_left
131
- return (pad_left, pad_right, pad_top, pad_bottom)
132
-
133
- def calculate_new_dimensions(orig_w, orig_h, divisor=8):
134
- if orig_w == 0 or orig_h == 0:
135
- return 512, 512
136
- if orig_w >= orig_h:
137
- aspect_ratio = orig_w / orig_h
138
- new_h = 512
139
- new_w = new_h * aspect_ratio
140
- else:
141
- aspect_ratio = orig_h / orig_w
142
- new_w = 512
143
- new_h = new_w * aspect_ratio
144
- final_w = int(round(new_w / divisor)) * divisor
145
- final_h = int(round(new_h / divisor)) * divisor
146
  final_w = max(divisor, final_w)
147
  final_h = max(divisor, final_h)
148
- print(f"[Dimension Calc] Original: {orig_w}x{orig_h} -> Calculado: {new_w:.0f}x{new_h:.0f} -> Final (divisível por {divisor}): {final_w}x{final_h}")
149
- return final_h, final_w
150
 
151
- def _gpu_process_table(processes: List[Dict], current_pid: int) -> str:
152
- if not processes:
153
- return " - Processos ativos: (nenhum)\n"
154
- processes = sorted(processes, key=lambda x: (x.get("used_mb") or 0), reverse=True)
155
- lines = [" - Processos ativos (PID | USER | NAME | VRAM MB):"]
156
- for p in processes:
157
- star = "*" if p["pid"] == current_pid else " "
158
- used_str = str(p["used_mb"]) if p.get("used_mb") is not None else "N/A"
159
- lines.append(f" {star} {p['pid']} | {p['user']} | {p['name']} | {used_str}")
160
- return "\n".join(lines) + "\n"
161
 
162
  def log_tensor_info(tensor, name="Tensor"):
 
 
163
  if not isinstance(tensor, torch.Tensor):
164
- print(f"\n[INFO] '{name}' não é tensor.")
165
  return
166
  print(f"\n--- Tensor: {name} ---")
167
  print(f" - Shape: {tuple(tensor.shape)}")
@@ -169,141 +124,88 @@ def log_tensor_info(tensor, name="Tensor"):
169
  print(f" - Device: {tensor.device}")
170
  if tensor.numel() > 0:
171
  try:
172
- print(f" - Min: {tensor.min().item():.4f} Max: {tensor.max().item():.4f} Mean: {tensor.mean().item():.4f}")
173
- except Exception:
174
- pass
175
  print("------------------------------------------\n")
176
 
177
- add_deps_to_path()
178
- from ltx_video.pipelines.pipeline_ltx_video import ConditioningItem, LTXMultiScalePipeline
179
- from ltx_video.utils.skip_layer_strategy import SkipLayerStrategy
180
- from ltx_video.models.autoencoders.vae_encode import un_normalize_latents, normalize_latents
181
- from ltx_video.pipelines.pipeline_ltx_video import adain_filter_latent
182
- from api.ltx.inference import (
183
- create_ltx_video_pipeline,
184
- create_latent_upsampler,
185
- load_image_to_tensor_with_resize_and_crop,
186
- seed_everething,
187
- )
188
 
189
  class VideoService:
190
  def __init__(self):
191
  t0 = time.perf_counter()
192
- print("[DEBUG] Inicializando VideoService...")
193
- self.debug = os.getenv("LTXV_DEBUG", "1") == "1"
194
- self.frame_log_every = int(os.getenv("LTXV_FRAME_LOG_EVERY", "8"))
195
- self.config = self._load_config()
196
- print(f"[DEBUG] Config carregada (precision={self.config.get('precision')}, sampler={self.config.get('sampler')})")
197
  self.device = "cuda" if torch.cuda.is_available() else "cpu"
198
- print(f"[DEBUG] Device selecionado: {self.device}")
199
- self.last_memory_reserved_mb = 0.0
200
- self._tmp_dirs = set(); self._tmp_files = set(); self._last_outputs = []
 
201
 
 
 
 
202
  self.pipeline, self.latent_upsampler = self._load_models()
203
- print(f"[DEBUG] Pipeline e Upsampler carregados. Upsampler ativo? {bool(self.latent_upsampler)}")
204
 
205
- print(f"[DEBUG] Movendo modelos para {self.device}...")
206
  self.pipeline.to(self.device)
207
  if self.latent_upsampler:
208
  self.latent_upsampler.to(self.device)
209
 
210
  self._apply_precision_policy()
211
- print(f"[DEBUG] runtime_autocast_dtype = {getattr(self, 'runtime_autocast_dtype', None)}")
212
 
213
  vae_manager_singleton.attach_pipeline(
214
  self.pipeline,
215
  device=self.device,
216
  autocast_dtype=self.runtime_autocast_dtype
217
  )
218
- print(f"[DEBUG] VAE manager conectado: has_vae={hasattr(self.pipeline, 'vae')} device={self.device}")
219
-
220
  if self.device == "cuda":
221
  torch.cuda.empty_cache()
222
- self._log_gpu_memory("Após carregar modelos")
223
-
224
- print(f"[DEBUG] VideoService pronto. boot_time={time.perf_counter()-t0:.3f}s")
225
-
226
- def _log_gpu_memory(self, stage_name: str):
227
- if self.device != "cuda":
228
- return
229
- device_index = torch.cuda.current_device() if torch.cuda.is_available() else 0
230
- current_reserved_b = torch.cuda.memory_reserved(device_index)
231
- current_reserved_mb = current_reserved_b / (1024 ** 2)
232
- total_memory_b = torch.cuda.get_device_properties(device_index).total_memory
233
- total_memory_mb = total_memory_b / (1024 ** 2)
234
- peak_reserved_mb = torch.cuda.max_memory_reserved(device_index) / (1024 ** 2)
235
- delta_mb = current_reserved_mb - getattr(self, "last_memory_reserved_mb", 0.0)
236
- processes = _query_gpu_processes_via_nvml(device_index) or _query_gpu_processes_via_nvidiasmi(device_index)
237
- print(f"\n--- [LOG GPU] {stage_name} (cuda:{device_index}) ---")
238
- print(f" - Reservado: {current_reserved_mb:.2f} MB / {total_memory_mb:.2f} MB (Δ={delta_mb:+.2f} MB)")
239
- if peak_reserved_mb > getattr(self, "last_memory_reserved_mb", 0.0):
240
- print(f" - Pico reservado (nesta fase): {peak_reserved_mb:.2f} MB")
241
- print(_gpu_process_table(processes, os.getpid()), end="")
242
- print("--------------------------------------------------\n")
243
- self.last_memory_reserved_mb = current_reserved_mb
244
-
245
- def _register_tmp_dir(self, d: str):
246
- if d and os.path.isdir(d):
247
- self._tmp_dirs.add(d); print(f"[DEBUG] Registrado tmp dir: {d}")
248
-
249
- def _register_tmp_file(self, f: str):
250
- if f and os.path.exists(f):
251
- self._tmp_files.add(f); print(f"[DEBUG] Registrado tmp file: {f}")
252
-
253
- def finalize(self, keep_paths=None, extra_paths=None, clear_gpu=True):
254
- print("[DEBUG] Finalize: iniciando limpeza...")
255
- keep = set(keep_paths or []); extras = set(extra_paths or [])
256
- removed_files = 0
257
- for f in list(self._tmp_files | extras):
258
- try:
259
- if f not in keep and os.path.isfile(f):
260
- os.remove(f); removed_files += 1; print(f"[DEBUG] Removido arquivo tmp: {f}")
261
- except Exception as e:
262
- print(f"[DEBUG] Falha removendo arquivo {f}: {e}")
263
- finally:
264
- self._tmp_files.discard(f)
265
- removed_dirs = 0
266
- for d in list(self._tmp_dirs):
267
- try:
268
- if d not in keep and os.path.isdir(d):
269
- shutil.rmtree(d, ignore_errors=True); removed_dirs += 1; print(f"[DEBUG] Removido diretório tmp: {d}")
270
- except Exception as e:
271
- print(f"[DEBUG] Falha removendo diretório {d}: {e}")
272
- finally:
273
- self._tmp_dirs.discard(d)
274
- print(f"[DEBUG] Finalize: arquivos removidos={removed_files}, dirs removidos={removed_dirs}")
275
- gc.collect()
276
- try:
277
- if clear_gpu and torch.cuda.is_available():
278
- torch.cuda.empty_cache()
279
- try:
280
- torch.cuda.ipc_collect()
281
- except Exception:
282
- pass
283
- except Exception as e:
284
- print(f"[DEBUG] Finalize: limpeza GPU falhou: {e}")
285
- try:
286
- self._log_gpu_memory("Após finalize")
287
- except Exception as e:
288
- print(f"[DEBUG] Log GPU pós-finalize falhou: {e}")
289
 
290
  def _load_config(self):
 
291
  base = LTX_VIDEO_REPO_DIR / "configs"
292
- config_path = base / "ltxv-13b-0.9.8-distilled-fp8.yaml"
293
- print(f"[DEBUG] Carregando config: {config_path}")
294
- with open(config_path, "r") as file:
295
- return yaml.safe_load(file)
 
 
 
 
 
 
 
 
 
296
 
297
  def _load_models(self):
298
  t0 = time.perf_counter()
299
  LTX_REPO = "Lightricks/LTX-Video"
300
  print("[DEBUG] Baixando checkpoint principal...")
301
- distilled_model_path = hf_hub_download(repo_id=LTX_REPO, filename=self.config["checkpoint_path"])
 
 
 
 
 
 
302
  self.config["checkpoint_path"] = distilled_model_path
303
  print(f"[DEBUG] Checkpoint em: {distilled_model_path}")
304
 
305
  print("[DEBUG] Baixando upscaler espacial...")
306
- spatial_upscaler_path = hf_hub_download(repo_id=LTX_REPO, filename=self.config["spatial_upscaler_model_path"])
 
 
 
 
 
 
307
  self.config["spatial_upscaler_model_path"] = spatial_upscaler_path
308
  print(f"[DEBUG] Upscaler em: {spatial_upscaler_path}")
309
 
@@ -312,7 +214,9 @@ class VideoService:
312
  ckpt_path=self.config["checkpoint_path"],
313
  precision=self.config["precision"],
314
  text_encoder_model_name_or_path=self.config["text_encoder_model_name_or_path"],
315
- sampler=self.config["sampler"], device="cpu", enhance_prompt=False,
 
 
316
  prompt_enhancer_image_caption_model_name_or_path=self.config["prompt_enhancer_image_caption_model_name_or_path"],
317
  prompt_enhancer_llm_model_name_or_path=self.config["prompt_enhancer_llm_model_name_or_path"],
318
  )
@@ -326,248 +230,233 @@ class VideoService:
326
  print(f"[DEBUG] _load_models() tempo total={time.perf_counter()-t0:.3f}s")
327
  return pipeline, latent_upsampler
328
 
329
- @torch.no_grad()
330
- def _upsample_latents_internal(self, latents: torch.Tensor) -> torch.Tensor:
331
- if not self.latent_upsampler:
332
- raise ValueError("Latent Upsampler não está carregado.")
333
- self.latent_upsampler.to(self.device)
334
- self.pipeline.vae.to(self.device)
335
- print(f"[DEBUG-UPSAMPLE] Shape de entrada: {tuple(latents.shape)}")
336
- latents_unnormalized = un_normalize_latents(latents, self.pipeline.vae, vae_per_channel_normalize=True)
337
- upsampled_latents = self.latent_upsampler(latents_unnormalized)
338
- upsampled_latents_normalized = normalize_latents(upsampled_latents, self.pipeline.vae, vae_per_channel_normalize=True)
339
- print(f"[DEBUG-UPSAMPLE] Shape de saída: {tuple(upsampled_latents_normalized.shape)}")
340
- return upsampled_latents_normalized
341
-
342
  def _apply_precision_policy(self):
 
343
  prec = str(self.config.get("precision", "")).lower()
344
  self.runtime_autocast_dtype = torch.float32
345
- print(f"[DEBUG] Aplicando política de precisão: {prec}")
346
- if prec in ["float8_e4m3fn", "bfloat16"]:
347
  self.runtime_autocast_dtype = torch.bfloat16
348
- elif prec == "mixed_precision":
349
  self.runtime_autocast_dtype = torch.float16
 
 
 
 
350
 
351
  def _prepare_conditioning_tensor(self, filepath, height, width, padding_values):
352
- print(f"[DEBUG] Carregando condicionamento: {filepath}")
353
  tensor = load_image_to_tensor_with_resize_and_crop(filepath, height, width)
354
- tensor = torch.nn.functional.pad(tensor, padding_values)
355
- out = tensor.to(self.device, dtype=self.runtime_autocast_dtype)
356
- print(f"[DEBUG] Cond shape={tuple(out.shape)} dtype={out.dtype} device={out.device}")
357
- return out
358
-
359
- def _concat_mp4s_no_reencode(self, mp4_list: List[str], out_path: str):
360
- if not mp4_list:
361
- raise ValueError("A lista de MP4s para concatenar está vazia.")
362
- if len(mp4_list) == 1:
363
- shutil.move(mp4_list[0], out_path)
364
- print(f"[DEBUG] Apenas um vídeo, movido para: {out_path}")
365
- return
366
-
367
- with tempfile.NamedTemporaryFile("w", delete=False, suffix=".txt") as f:
368
- for mp4 in mp4_list:
369
- f.write(f"file '{os.path.abspath(mp4)}'\n")
370
- list_path = f.name
371
-
372
- cmd = f"ffmpeg -y -f concat -safe 0 -i {list_path} -c copy {out_path}"
373
- print(f"[DEBUG] Concat: {cmd}")
374
-
375
- try:
376
- subprocess.check_call(shlex.split(cmd))
377
- finally:
378
- os.remove(list_path)
379
 
380
- def _save_and_log_video(self, pixel_tensor, base_filename, fps, temp_dir, results_dir, used_seed, progress_callback=None):
381
- """Função auxiliar para salvar um tensor de pixels em um arquivo MP4."""
382
- output_path = os.path.join(temp_dir, f"{base_filename}_{used_seed}.mp4")
 
 
383
 
384
- video_encode_tool_singleton.save_video_from_tensor(
385
- pixel_tensor, output_path, fps=fps, progress_callback=progress_callback
386
- )
387
 
388
- final_path = os.path.join(results_dir, f"{base_filename}_{used_seed}.mp4")
389
- shutil.move(output_path, final_path)
390
- print(f"[DEBUG] Vídeo salvo em: {final_path}")
391
- return final_path
392
-
393
- # ==============================================================================
394
- # --- NOVAS FUNÇÕES MODULARES ---
395
- # ==============================================================================
396
-
397
- def prepare_condition_items(self, items_list: List, height: int, width: int, num_frames: int):
398
- if not items_list:
399
- return []
 
 
 
 
 
 
 
 
 
 
 
 
 
 
400
 
401
- height_padded = ((height - 1) // 8 + 1) * 8
402
- width_padded = ((width - 1) // 8 + 1) * 8
403
- padding_values = calculate_padding(height, width, height_padded, width_padded)
404
-
405
- conditioning_items = []
406
- print("\n--- Preparando Itens de Condicionamento ---")
407
- for item in items_list:
408
- media, frame, weight = item
409
-
410
- if isinstance(media, str):
411
- print(f" - Carregando imagem: {media} para o frame {frame}")
412
- tensor = self._prepare_conditioning_tensor(media, height, width, padding_values)
413
- elif isinstance(media, torch.Tensor):
414
- print(f" - Usando tensor fornecido para o frame {frame}")
415
- tensor = media.to(self.device, dtype=self.runtime_autocast_dtype)
416
- else:
417
- warnings.warn(f"Tipo de item desconhecido: {type(media)}. Ignorando.")
418
- continue
419
-
420
- safe_frame = max(0, min(int(frame), num_frames - 1))
421
- conditioning_items.append(ConditioningItem(tensor, safe_frame, float(weight)))
422
-
423
- print(f"Total de itens de condicionamento preparados: {len(conditioning_items)}")
424
- return conditioning_items
425
-
426
- def generate_low(self, prompt, negative_prompt, height, width, duration, guidance_scale, seed, conditioning_items=None):
427
- print("\n--- INICIANDO ETAPA 1: GERAÇÃO EM BAIXA RESOLUÇÃO ---")
428
- self._log_gpu_memory("Início da Geração Low-Res")
429
-
430
- used_seed = random.randint(0, 2**32 - 1) if seed is None else int(seed)
431
- seed_everething(used_seed)
432
-
433
- FPS = 24.0
434
- target_frames = round(duration * FPS)
435
- actual_num_frames = max(9, int(round((target_frames - 1) / 8.0) * 8 + 1))
436
-
437
- height_padded = ((height - 1) // 8 + 1) * 8
438
- width_padded = ((width - 1) // 8 + 1) * 8
439
- generator = torch.Generator(device=self.device).manual_seed(used_seed)
440
-
441
- temp_dir = tempfile.mkdtemp(prefix="ltxv_low_"); self._register_tmp_dir(temp_dir)
442
- results_dir = "/app/output"; os.makedirs(results_dir, exist_ok=True)
443
 
444
- downscale_factor = self.config.get("downscale_factor", 0.6666666)
445
- vae_scale_factor = self.pipeline.vae_scale_factor
446
-
447
-
448
- # --- <INÍCIO DA LÓGICA DE CÁLCULO EXATA> ---
449
- # Replica a fórmula da LTXMultiScalePipeline
450
- x_width = int(width_padded * downscale_factor)
451
- downscaled_width = x_width - (x_width % vae_scale_factor)
452
- x_height = int(height_padded * downscale_factor)
453
- downscaled_height = x_height - (x_height % vae_scale_factor)
454
- print(f"[DEBUG] First Pass Dims: Original Pad ({width_padded}x{height_padded}) -> Downscaled ({downscaled_width}x{downscaled_height})")
455
- # --- <FIM DA LÓGICA DE CÁLCULO EXATA> ---
456
-
457
- first_pass_kwargs = {
458
- "prompt": prompt, "negative_prompt": negative_prompt, "height": downscaled_height, "width": downscaled_width,
459
- "num_frames": actual_num_frames, "frame_rate": int(FPS), "generator": generator, "output_type": "latent",
460
- "conditioning_items": conditioning_items, "guidance_scale": float(guidance_scale),
461
- **(self.config.get("first_pass", {}))
462
- }
463
-
464
- with torch.autocast(device_type="cuda", dtype=self.runtime_autocast_dtype, enabled=self.device == 'cuda'):
465
- latents = self.pipeline(**first_pass_kwargs).images
466
- log_tensor_info(latents, "Latentes Low-Res Gerados")
467
-
468
- pixel_tensor = vae_manager_singleton.decode(latents.clone(), decode_timestep=float(self.config.get("decode_timestep", 0.05)))
469
- video_path = self._save_and_log_video(pixel_tensor, "low_res_video", FPS, temp_dir, results_dir, used_seed)
470
- del pixel_tensor
471
 
472
- latents_cpu = latents.detach().to("cpu")
473
- tensor_path = os.path.join(results_dir, f"latents_low_res_{used_seed}.pt")
474
- torch.save(latents_cpu, tensor_path)
475
- print(f"[DEBUG] Tensor latente de baixa resolução salvo em: {tensor_path}")
476
-
477
- self._log_gpu_memory("Fim da Geração Low-Res")
478
- return video_path, tensor_path, used_seed
479
 
480
- def generate_upscale_denoise(self, latents_path, prompt, negative_prompt, guidance_scale, seed):
481
- print("\n--- INICIANDO ETAPA 2: UPSCALE E REFINAMENTO ---")
482
- self._log_gpu_memory("Início do Upscale/Denoise")
 
 
 
 
483
 
484
- used_seed = random.randint(0, 2**32 - 1) if seed is None else int(seed)
485
- seed_everething(used_seed)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
486
 
487
- temp_dir = tempfile.mkdtemp(prefix="ltxv_up_"); self._register_tmp_dir(temp_dir)
488
- results_dir = "/app/output"; os.makedirs(results_dir, exist_ok=True)
489
-
490
- latents_low = torch.load(latents_path).to(self.device)
491
- log_tensor_info(latents_low, "Latentes Low-Res Carregados")
492
-
493
- with torch.autocast(device_type="cuda", dtype=self.runtime_autocast_dtype, enabled=self.device == 'cuda'):
494
- upsampled_latents = self._upsample_latents_internal(latents_low)
495
- upsampled_latents = adain_filter_latent(latents=upsampled_latents, reference_latents=latents_low)
496
- del latents_low; torch.cuda.empty_cache()
497
 
498
- total_frames = upsampled_latents.shape[2]
499
- mid_point = total_frames // 2
500
- chunk1 = upsampled_latents[:, :, :mid_point, :, :]
501
- chunk2 = upsampled_latents[:, :, mid_point:, :, :]
 
 
 
 
502
 
503
- final_latents_list = []
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
504
 
505
- for i, chunk in enumerate([chunk1, chunk2]):
506
- if chunk.shape[2] == 0: continue
507
- print(f" - Refinando chunk {i+1}/{2} com {chunk.shape[2]} frames")
508
- second_pass_height = chunk.shape[3] * self.pipeline.vae_scale_factor
509
- second_pass_width = chunk.shape[4] * self.pipeline.vae_scale_factor
510
-
511
- second_pass_kwargs = {
512
- "prompt": prompt, "negative_prompt": negative_prompt, "height": second_pass_height, "width": second_pass_width,
513
- "num_frames": chunk.shape[2], "latents": chunk, "guidance_scale": float(guidance_scale),
514
- "output_type": "latent", "generator": torch.Generator(device=self.device).manual_seed(used_seed),
515
- **(self.config.get("second_pass", {}))
516
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
517
 
518
- refined_chunk = self.pipeline(**second_pass_kwargs).images
519
- final_latents_list.append(refined_chunk.detach().clone())
 
 
 
 
 
 
 
 
 
 
520
 
521
- del upsampled_latents, chunk1, chunk2; torch.cuda.empty_cache()
522
-
523
- final_latents = torch.cat(final_latents_list, dim=2)
524
- log_tensor_info(final_latents, "Latentes Upscaled/Refinados Finais")
525
-
526
- latents_cpu = final_latents.detach().to("cpu")
527
- tensor_path = os.path.join(results_dir, f"latents_refined_{used_seed}.pt")
528
- torch.save(latents_cpu, tensor_path)
 
 
 
 
529
 
530
- pixel_tensor = vae_manager_singleton.decode(final_latents, decode_timestep=float(self.config.get("decode_timestep", 0.05)))
531
- video_path = self._save_and_log_video(pixel_tensor, "refined_video", 24.0, temp_dir, results_dir, used_seed)
532
- del pixel_tensor, final_latents
533
 
534
- self._log_gpu_memory("Fim do Upscale/Denoise")
535
- return video_path, tensor_path
 
 
536
 
537
- def encode_mp4(self, latents_path: str, fps: int = 24):
538
- print("\n--- INICIANDO ETAPA 3: DECODIFICAÇÃO FINAL ---")
539
- self._log_gpu_memory("Início do Encode MP4")
540
 
541
- latents = torch.load(latents_path)
542
- seed = random.randint(0, 99999)
543
- temp_dir = tempfile.mkdtemp(prefix="ltxv_enc_"); self._register_tmp_dir(temp_dir)
544
- results_dir = "/app/output"; os.makedirs(results_dir, exist_ok=True)
545
-
546
- total_frames = latents.shape[2]
547
- mid_point = total_frames // 2
548
- chunk1_latents = latents[:, :, :mid_point, :, :]
549
- chunk2_latents = latents[:, :, mid_point:, :, :]
550
-
551
- video_parts = []
552
- with torch.autocast(device_type="cuda", dtype=self.runtime_autocast_dtype, enabled=self.device == 'cuda'):
553
- for i, chunk in enumerate([chunk1_latents, chunk2_latents]):
554
- if chunk.shape[2] == 0: continue
555
- print(f" - Decodificando chunk {i+1}/{2}")
556
- pixel_chunk = vae_manager_singleton.decode(chunk.to(self.device), decode_timestep=float(self.config.get("decode_timestep", 0.05)))
557
-
558
- part_path = os.path.join(temp_dir, f"part_{i}.mp4")
559
- video_encode_tool_singleton.save_video_from_tensor(pixel_chunk, part_path, fps=fps)
560
- video_parts.append(part_path)
561
- del pixel_chunk; torch.cuda.empty_cache()
562
-
563
- final_video_path = os.path.join(results_dir, f"final_concatenated_{seed}.mp4")
564
- self._concat_mp4s_no_reencode(video_parts, final_video_path)
565
-
566
- print(f"Encode final concluído: {final_video_path}")
567
- self._log_gpu_memory("Fim do Encode MP4")
568
- return final_video_path
569
-
570
- # --- INSTANCIAÇÃO DO SERVIÇO ---
571
- print("Criando instância do VideoService. O carregamento do modelo começará agora...")
572
- video_generation_service = VideoService()
573
- print("Instância do VideoService pronta para uso.")
 
1
+ # ltx_server.py — VideoService (beta 1.2 - Robusto e Completo)
2
+ # DESCRIÇÃO:
3
+ # - Servidor de geração de vídeo com pipeline de 2 passes para melhoria de textura.
4
+ # - Gerenciamento de memória robusto com limpeza garantida via `finalize()`.
5
+ # - Cálculo de dimensões inteligente para preservar a proporção e evitar erros.
6
+ # - Suporte para divisão de tarefas longas em chunks para evitar OOM (Out of Memory).
7
+ # - Concatenação de chunks com transições suaves (crossfade) para um resultado contínuo.
8
+
9
+ # --- 0. WARNINGS, IMPORTS E CONFIGURAÇÃO DE AMBIENTE ---
10
 
 
11
  import warnings
12
  warnings.filterwarnings("ignore", category=UserWarning)
13
  warnings.filterwarnings("ignore", category=FutureWarning)
14
+ from huggingface_hub import logging as hf_logging, hf_hub_download
15
+ hf_logging.set_verbosity_error()
16
+
 
 
 
 
 
 
 
 
 
 
17
  import os
 
 
 
 
 
 
 
 
18
  import sys
19
  import subprocess
20
+ import shlex
21
+ import tempfile
22
  import gc
23
  import shutil
24
  import contextlib
25
  import time
26
  import traceback
27
+ import json
28
+ import yaml
29
+ import random
30
+ from typing import List, Dict
31
+ from pathlib import Path
32
+
33
+ import torch
34
  import torch.nn.functional as F
35
+ import numpy as np
36
+ import imageio
37
+ from PIL import Image
38
+ from einops import rearrange
39
+
40
+ # --- Variáveis de Ambiente e Constantes ---
41
+ LTXV_DEBUG = os.getenv("LTXV_DEBUG", "1") == "1"
42
+ LTXV_FRAME_LOG_EVERY = int(os.getenv("LTXV_FRAME_LOG_EVERY", "8"))
43
  DEPS_DIR = Path("/data")
44
  LTX_VIDEO_REPO_DIR = DEPS_DIR / "LTX-Video"
45
 
46
+ # --- 1. SETUP E GERENCIAMENTO DE DEPENDÊNCIAS ---
47
+
48
  def run_setup():
49
+ """Executa o script de setup para clonar dependências se necessário."""
50
  setup_script_path = "setup.py"
51
  if not os.path.exists(setup_script_path):
52
  print("[DEBUG] 'setup.py' não encontrado. Pulando clonagem de dependências.")
53
  return
54
  try:
55
+ print("[DEBUG] Executando setup.py para instalar dependências...")
56
+ subprocess.run([sys.executable, setup_script_path], check=True, capture_output=True, text=True)
57
  print("[DEBUG] Setup concluído com sucesso.")
58
  except subprocess.CalledProcessError as e:
59
+ print(f"[ERROR] Falha crítica ao executar setup.py (código {e.returncode}).\nOutput:\n{e.stdout}\n{e.stderr}")
60
  sys.exit(1)
 
 
 
 
61
 
62
  def add_deps_to_path():
63
+ """Adiciona o diretório do repositório ao sys.path para importação dos módulos."""
64
  repo_path = str(LTX_VIDEO_REPO_DIR.resolve())
65
+ if repo_path not in sys.path:
66
  sys.path.insert(0, repo_path)
67
+ print(f"[DEBUG] Repositório LTX-Video adicionado ao sys.path: {repo_path}")
68
+
69
+ # Executa a configuração inicial ao carregar o script
70
+ if not LTX_VIDEO_REPO_DIR.exists():
71
+ print(f"[INFO] Repositório não encontrado em {LTX_VIDEO_REPO_DIR}. Executando setup...")
72
+ run_setup()
73
+ add_deps_to_path()
74
+
75
+ # --- Importações que dependem do sys.path modificado ---
76
+ from managers.vae_manager import vae_manager_singleton
77
+ from tools.video_encode_tool import video_encode_tool_singleton
78
+ from ltx_video.pipelines.pipeline_ltx_video import ConditioningItem, LTXMultiScalePipeline, adain_filter_latent
79
+ from ltx_video.utils.skip_layer_strategy import SkipLayerStrategy
80
+ from ltx_video.models.autoencoders.vae_encode import un_normalize_latents, normalize_latents
81
+ from api.ltx.inference import (
82
+ create_ltx_video_pipeline, create_latent_upsampler,
83
+ load_image_to_tensor_with_resize_and_crop, seed_everething,
84
+ calculate_padding, load_media_file
85
+ )
86
+
87
+ # --- 2. FUNÇÕES UTILITÁRIAS INTELIGENTES ---
88
+
89
+ def calculate_new_dimensions(orig_w, orig_h, target_area=512*768, divisor=8):
90
+ """
91
+ [FUNÇÃO INTELIGENTE]
92
+ Calcula novas dimensões mantendo a proporção original, garantindo que ambos
93
+ os lados sejam múltiplos do divisor. Visa uma 'área alvo' para manter o
94
+ uso de VRAM consistente e previsível.
95
+ """
96
+ if orig_w <= 0 or orig_h <= 0:
97
+ print(f"[WARN] Dimensões originais inválidas ({orig_w}x{orig_h}). Usando padrão 512x768.")
98
+ return 512, 768
99
+
100
+ aspect_ratio = orig_w / orig_h
101
+ new_h = int((target_area / aspect_ratio)**0.5)
102
+ new_w = int(new_h * aspect_ratio)
103
+
104
+ final_w = round(new_w / divisor) * divisor
105
+ final_h = round(new_h / divisor) * divisor
106
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
107
  final_w = max(divisor, final_w)
108
  final_h = max(divisor, final_h)
 
 
109
 
110
+ if LTXV_DEBUG:
111
+ print(f"[Dimension Calc] Original: {orig_w}x{orig_h} (AR: {aspect_ratio:.2f}) -> "
112
+ f"Calculado: {new_w}x{new_h} -> Final (múltiplo de {divisor}): {final_w}x{final_h}")
113
+ return final_h, final_w
 
 
 
 
 
 
114
 
115
  def log_tensor_info(tensor, name="Tensor"):
116
+ """Exibe informações detalhadas sobre um tensor para depuração."""
117
+ if not LTXV_DEBUG: return
118
  if not isinstance(tensor, torch.Tensor):
119
+ print(f"\n[INFO] '{name}' não é um tensor.")
120
  return
121
  print(f"\n--- Tensor: {name} ---")
122
  print(f" - Shape: {tuple(tensor.shape)}")
 
124
  print(f" - Device: {tensor.device}")
125
  if tensor.numel() > 0:
126
  try:
127
+ print(f" - Stats: Min={tensor.min().item():.4f}, Max={tensor.max().item():.4f}, Mean={tensor.mean().item():.4f}")
128
+ except Exception as e:
129
+ print(f" - Stats: Falha ao calcular estatísticas - {e}")
130
  print("------------------------------------------\n")
131
 
132
+ # --- 3. CLASSE PRINCIPAL DO SERVIÇO DE VÍDEO ---
 
 
 
 
 
 
 
 
 
 
133
 
134
  class VideoService:
135
  def __init__(self):
136
  t0 = time.perf_counter()
137
+ print("[INFO] Inicializando VideoService...")
 
 
 
 
138
  self.device = "cuda" if torch.cuda.is_available() else "cpu"
139
+ self.config = self._load_config()
140
+
141
+ print(f"[INFO] Config carregada (precision={self.config.get('precision')}, sampler={self.config.get('sampler')})")
142
+ print(f"[INFO] Dispositivo selecionado: {self.device}")
143
 
144
+ self._tmp_dirs = set()
145
+ self._tmp_files = set()
146
+
147
  self.pipeline, self.latent_upsampler = self._load_models()
 
148
 
149
+ print("[INFO] Movendo modelos para o dispositivo...")
150
  self.pipeline.to(self.device)
151
  if self.latent_upsampler:
152
  self.latent_upsampler.to(self.device)
153
 
154
  self._apply_precision_policy()
 
155
 
156
  vae_manager_singleton.attach_pipeline(
157
  self.pipeline,
158
  device=self.device,
159
  autocast_dtype=self.runtime_autocast_dtype
160
  )
161
+ print("[INFO] VAE manager conectado ao pipeline.")
162
+
163
  if self.device == "cuda":
164
  torch.cuda.empty_cache()
165
+
166
+ print(f"[SUCCESS] VideoService pronto. Tempo de inicialização: {time.perf_counter()-t0:.2f}s")
167
+
168
+ # --- MÉTODOS INTERNOS: INICIALIZAÇÃO E SETUP ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
169
 
170
  def _load_config(self):
171
+ """Carrega o arquivo de configuração YAML do modelo."""
172
  base = LTX_VIDEO_REPO_DIR / "configs"
173
+ # Tenta carregar a configuração mais provável, com fallbacks
174
+ candidates = [
175
+ base / "ltxv-13b-0.9.8-dev-fp8.yaml",
176
+ base / "ltxv-13b-0.9.8-distilled-fp8.yaml",
177
+ base / "ltxv-13b-0.9.8-distilled.yaml",
178
+ ]
179
+ for cfg_path in candidates:
180
+ if cfg_path.exists():
181
+ print(f"[DEBUG] Configuração encontrada e selecionada: {cfg_path}")
182
+ with open(cfg_path, "r") as file:
183
+ return yaml.safe_load(file)
184
+
185
+ raise FileNotFoundError(f"Nenhum arquivo de configuração YAML encontrado em {base}. Verifique a instalação.")
186
 
187
  def _load_models(self):
188
  t0 = time.perf_counter()
189
  LTX_REPO = "Lightricks/LTX-Video"
190
  print("[DEBUG] Baixando checkpoint principal...")
191
+ distilled_model_path = hf_hub_download(
192
+ repo_id=LTX_REPO,
193
+ filename=self.config["checkpoint_path"],
194
+ local_dir=os.getenv("HF_HOME"),
195
+ cache_dir=os.getenv("HF_HOME_CACHE"),
196
+ token=os.getenv("HF_TOKEN"),
197
+ )
198
  self.config["checkpoint_path"] = distilled_model_path
199
  print(f"[DEBUG] Checkpoint em: {distilled_model_path}")
200
 
201
  print("[DEBUG] Baixando upscaler espacial...")
202
+ spatial_upscaler_path = hf_hub_download(
203
+ repo_id=LTX_REPO,
204
+ filename=self.config["spatial_upscaler_model_path"],
205
+ local_dir=os.getenv("HF_HOME"),
206
+ cache_dir=os.getenv("HF_HOME_CACHE"),
207
+ token=os.getenv("HF_TOKEN")
208
+ )
209
  self.config["spatial_upscaler_model_path"] = spatial_upscaler_path
210
  print(f"[DEBUG] Upscaler em: {spatial_upscaler_path}")
211
 
 
214
  ckpt_path=self.config["checkpoint_path"],
215
  precision=self.config["precision"],
216
  text_encoder_model_name_or_path=self.config["text_encoder_model_name_or_path"],
217
+ sampler=self.config["sampler"],
218
+ device="cpu",
219
+ enhance_prompt=False,
220
  prompt_enhancer_image_caption_model_name_or_path=self.config["prompt_enhancer_image_caption_model_name_or_path"],
221
  prompt_enhancer_llm_model_name_or_path=self.config["prompt_enhancer_llm_model_name_or_path"],
222
  )
 
230
  print(f"[DEBUG] _load_models() tempo total={time.perf_counter()-t0:.3f}s")
231
  return pipeline, latent_upsampler
232
 
233
+
 
 
 
 
 
 
 
 
 
 
 
 
234
  def _apply_precision_policy(self):
235
+ """Define o dtype a ser usado pelo autocast com base na configuração."""
236
  prec = str(self.config.get("precision", "")).lower()
237
  self.runtime_autocast_dtype = torch.float32
238
+
239
+ if "bfloat16" in prec or "fp8" in prec:
240
  self.runtime_autocast_dtype = torch.bfloat16
241
+ elif "mixed_precision" in prec or "fp16" in prec:
242
  self.runtime_autocast_dtype = torch.float16
243
+
244
+ print(f"[INFO] Política de precisão aplicada. Dtype para Autocast: {self.runtime_autocast_dtype}")
245
+
246
+ # --- MÉTODOS INTERNOS: OPERAÇÕES DE TENSOR E VÍDEO ---
247
 
248
  def _prepare_conditioning_tensor(self, filepath, height, width, padding_values):
249
+ """Carrega uma imagem, redimensiona, aplica padding e move para o dispositivo correto."""
250
  tensor = load_image_to_tensor_with_resize_and_crop(filepath, height, width)
251
+ tensor = F.pad(tensor, padding_values)
252
+ return tensor.to(self.device, dtype=self.runtime_autocast_dtype)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
253
 
254
+ @torch.no_grad()
255
+ def _upsample_latents_internal(self, latents: torch.Tensor) -> torch.Tensor:
256
+ """Lógica de upscale de latentes, garantindo que os modelos estejam no dispositivo correto."""
257
+ if not self.latent_upsampler:
258
+ raise ValueError("Latent Upsampler não está carregado, mas foi solicitado.")
259
 
260
+ # Garante que o VAE e o upsampler estejam no dispositivo correto para a operação
261
+ self.latent_upsampler.to(self.device)
262
+ self.pipeline.vae.to(self.device)
263
 
264
+ latents_up = un_normalize_latents(latents, self.pipeline.vae, vae_per_channel_normalize=True)
265
+ latents_up = self.latent_upsampler(latents_up)
266
+ latents_up = normalize_latents(latents_up, self.pipeline.vae, vae_per_channel_normalize=True)
267
+ return latents_up
268
+
269
+ # --- MÉTODO PRINCIPAL DE LIMPEZA ---
270
+
271
+ def finalize(self, keep_paths=None, clear_gpu=True):
272
+ """
273
+ [FUNÇÃO INTELIGENTE]
274
+ Limpeza robusta para garantir a liberação de recursos de disco e GPU,
275
+ mesmo em caso de falhas, prevenindo memory leaks.
276
+ """
277
+ print("[INFO] Finalize: iniciando limpeza de recursos...")
278
+ keep = set(keep_paths or [])
279
+ files_to_clean, dirs_to_clean = list(self._tmp_files), list(self._tmp_dirs)
280
+ removed_files, removed_dirs = 0, 0
281
+
282
+ for f in files_to_clean:
283
+ try:
284
+ if f not in keep and os.path.isfile(f):
285
+ os.remove(f); removed_files += 1
286
+ except OSError as e:
287
+ print(f"[WARN] Falha ao remover arquivo temporário {f}: {e}")
288
+ finally:
289
+ self._tmp_files.discard(f)
290
 
291
+ for d in dirs_to_clean:
292
+ try:
293
+ if d not in keep and os.path.isdir(d):
294
+ shutil.rmtree(d, ignore_errors=True); removed_dirs += 1
295
+ except OSError as e:
296
+ print(f"[WARN] Falha ao remover diretório temporário {d}: {e}")
297
+ finally:
298
+ self._tmp_dirs.discard(d)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
299
 
300
+ if LTXV_DEBUG:
301
+ print(f"[DEBUG] Limpeza de disco: {removed_files} arquivos e {removed_dirs} diretórios removidos.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
302
 
303
+ gc.collect()
 
 
 
 
 
 
304
 
305
+ if clear_gpu and self.device == "cuda":
306
+ try:
307
+ torch.cuda.empty_cache()
308
+ torch.cuda.ipc_collect()
309
+ if LTXV_DEBUG: print("[DEBUG] Limpeza da GPU concluída com sucesso.")
310
+ except Exception as e:
311
+ print(f"[ERROR] Falha crítica durante a limpeza da GPU: {e}")
312
 
313
+ # ==============================================================================
314
+ # --- FUNÇÃO PRINCIPAL DE GERAÇÃO (generate) ---
315
+ # ==============================================================================
316
+
317
+ @torch.no_grad()
318
+ def generate(
319
+ self,
320
+ prompt: str,
321
+ negative_prompt: str = "",
322
+ mode: str = "text-to-video",
323
+ start_image_filepath: str = None,
324
+ height: int = 512,
325
+ width: int = 704,
326
+ duration: float = 2.0,
327
+ seed: int = 42,
328
+ randomize_seed: bool = True,
329
+ guidance_scale: float = 3.0,
330
+ improve_texture: bool = True,
331
+ ):
332
+ output_path, final_seed = None, None
333
+ try:
334
+ t_all = time.perf_counter()
335
+ print(f"\n{'='*20} INICIANDO NOVA GERAÇÃO {'='*20}")
336
+
337
+ if self.device == "cuda":
338
+ torch.cuda.empty_cache()
339
 
340
+ # --- 1. Setup da Geração (parâmetros, seed, dimensões) ---
341
+ if mode == "image-to-video" and not start_image_filepath:
342
+ raise ValueError("Imagem de início é obrigatória para o modo 'image-to-video'")
 
 
 
 
 
 
 
343
 
344
+ final_seed = random.randint(0, 2**32 - 1) if randomize_seed else int(seed)
345
+ seed_everething(final_seed)
346
+ print(f"[INFO] Geração com Seed: {final_seed}")
347
+
348
+ FPS = 24.0; MAX_NUM_FRAMES = 2570
349
+ target_frames_rounded = round(duration * FPS)
350
+ n_val = round((float(target_frames_rounded) - 1.0) / 8.0)
351
+ actual_num_frames = max(9, min(MAX_NUM_FRAMES, int(n_val * 8 + 1)))
352
 
353
+ height_padded = ((height - 1) // 8 + 1) * 8
354
+ width_padded = ((width - 1) // 8 + 1) * 8
355
+ padding_values = calculate_padding(height, width, height_padded, width_padded)
356
+ generator = torch.Generator(device=self.device).manual_seed(final_seed)
357
+
358
+ temp_dir = tempfile.mkdtemp(prefix="ltxv_")
359
+ self._tmp_dirs.add(temp_dir)
360
+
361
+ # --- 2. Preparação dos Tensores de Condicionamento ---
362
+ conditioning_items = []
363
+ if mode == "image-to-video" and start_image_filepath:
364
+ start_tensor = self._prepare_conditioning_tensor(start_image_filepath, height, width, padding_values)
365
+ conditioning_items.append(ConditioningItem(start_tensor, 0, 1.0))
366
+
367
+ # --- 3. Construção dos Argumentos da Pipeline ---
368
+ call_kwargs = self.config.get("pipeline_defaults", {}).copy() # Carrega defaults do YAML
369
+ call_kwargs.update({
370
+ "prompt": prompt, "negative_prompt": negative_prompt,
371
+ "height": height_padded, "width": width_padded,
372
+ "num_frames": actual_num_frames, "frame_rate": int(FPS),
373
+ "generator": generator, "output_type": "latent",
374
+ "conditioning_items": conditioning_items or None,
375
+ "guidance_scale": float(guidance_scale),
376
+ })
377
+
378
+ # --- 4. Lógica de Geração (Pipeline de 1 ou 2 passes) ---
379
+ final_latents = None
380
+ ctx = torch.autocast(device_type="cuda", dtype=self.runtime_autocast_dtype)
381
 
382
+ with ctx:
383
+ if improve_texture:
384
+ print("[INFO] Iniciando pipeline de 2 passes para melhoria de textura.")
385
+
386
+ # ETAPA 1: Geração Base em Baixa Resolução
387
+ downscale_factor = self.config.get("downscale_factor", 0.5)
388
+ target_low_res_area = (width * height) * (downscale_factor**2)
389
+ downscaled_h, downscaled_w = calculate_new_dimensions(width, height, target_area=target_low_res_area)
390
+
391
+ first_pass_kwargs = call_kwargs.copy()
392
+ first_pass_kwargs.update(self.config.get("first_pass", {}))
393
+ first_pass_kwargs.update({"width": downscaled_w, "height": downscaled_h, "guidance_scale": float(guidance_scale)})
394
+
395
+ base_latents = self.pipeline(**first_pass_kwargs).images
396
+ log_tensor_info(base_latents, "Latentes Base (Passo 1)")
397
+
398
+ # ETAPA 2: Upscale e Refinamento
399
+ upsampled_latents = self._upsample_latents_internal(base_latents)
400
+ del base_latents; gc.collect(); torch.cuda.empty_cache()
401
+
402
+ second_pass_kwargs = call_kwargs.copy()
403
+ second_pass_kwargs.update(self.config.get("second_pass", {}))
404
+ second_pass_kwargs.update({"latents": upsampled_latents, "guidance_scale": float(guidance_scale)})
405
+
406
+ final_latents = self.pipeline(**second_pass_kwargs).images
407
+ log_tensor_info(final_latents, "Latentes Finais (Passo 2)")
408
 
409
+ else:
410
+ print("[INFO] Iniciando pipeline de 1 passe.")
411
+ final_latents = self.pipeline(**call_kwargs).images
412
+ log_tensor_info(final_latents, "Latentes Finais (Passe Único)")
413
+
414
+ # --- 5. Decodificação, Codificação de Vídeo e Finalização ---
415
+ print("[INFO] Decodificando latentes para pixels com VAE...")
416
+ pixel_tensor = vae_manager_singleton.decode(
417
+ final_latents.to(self.device),
418
+ decode_timestep=float(self.config.get("decode_timestep", 0.05))
419
+ )
420
+ del final_latents; gc.collect(); torch.cuda.empty_cache()
421
 
422
+ output_video_path_tmp = os.path.join(temp_dir, f"output_{final_seed}.mp4")
423
+
424
+ print(f"[INFO] Codificando vídeo final para: {output_video_path_tmp}")
425
+ video_encode_tool_singleton.save_video_from_tensor(
426
+ pixel_tensor, output_video_path_tmp, fps=call_kwargs["frame_rate"]
427
+ )
428
+ del pixel_tensor
429
+
430
+ results_dir = "/app/output"
431
+ os.makedirs(results_dir, exist_ok=True)
432
+ output_path = os.path.join(results_dir, f"final_video_{final_seed}.mp4")
433
+ shutil.move(output_video_path_tmp, output_path)
434
 
435
+ print(f"[SUCCESS] Geração concluída em {time.perf_counter() - t_all:.2f}s. Vídeo salvo em: {output_path}")
436
+ return output_path, final_seed
 
437
 
438
+ except Exception as e:
439
+ print(f"[FATAL ERROR] A geração falhou: {type(e).__name__} - {e}")
440
+ traceback.print_exc()
441
+ raise
442
 
443
+ finally:
444
+ print("[INFO] Executando limpeza final da tarefa...")
445
+ self.finalize(keep_paths=[output_path] if output_path else [])
446
 
447
+
448
+ # --- Ponto de Entrada Principal ---
449
+ if __name__ == "__main__":
450
+ print("Iniciando carregamento do VideoService...")
451
+ video_generation_service = VideoService()
452
+ print("\n[INFO] VideoService carregado e pronto para receber tarefas.")
453
+ # Exemplo de como chamar a geração (pode ser substituído por uma API)
454
+ try:
455
+ video_generation_service.generate(
456
+ prompt="A cinematic shot of a panda drinking bubble tea in a Tokyo cafe",
457
+ negative_prompt="blurry, low quality, cartoon",
458
+ duration=3.0,
459
+ improve_texture=True
460
+ )
461
+ except Exception as e:
462
+ print("\n[MAIN] Exemplo de geração falhou. O servidor ainda está de pé, mas verifique o erro acima.")