Test

Paused

App Files Files Community

Carlos s commited on Oct 4, 2025

Commit

f70421a

verified ·

1 Parent(s): 79febce

Update api/ltx_server.py

Browse files

Files changed (1) hide show

api/ltx_server.py +308 -4

api/ltx_server.py CHANGED Viewed

@@ -765,15 +765,319 @@ class VideoService:
             self._log_gpu_memory("Fim da Geração")
             return final_concat, used_seed
         except Exception as e:
             print("[DEBUG] EXCEÇÃO NA GERAÇÃO:")
             print("".join(traceback.format_exception(type(e), e, e.__traceback__)))
             raise
         finally:
-            try:
-                del latents, latents_low_res, latents_high_res, second_pass_result
-            except Exception:
-                pass
             gc.collect()
             if self.device == "cuda":

             self._log_gpu_memory("Fim da Geração")
             return final_concat, used_seed
+        except Exception as e:
+            print("[DEBUG] EXCEÇÃO NA GERAÇÃO:")
+            print("".join(traceback.format_exception(type(e), e, e.__traceback__)))
+            raise
+# ltx_server.py
+    def generate(
+        self,
+        prompt,
+        negative_prompt,
+        mode="text-to-video",
+        start_image_filepath=None,
+        middle_image_filepath=None,
+        middle_frame_number=None,
+        middle_image_weight=1.0,
+        end_image_filepath=None,
+        end_image_weight=1.0,
+        input_video_filepath=None,
+        height=512,
+        width=704,
+        duration=2.0,
+        frames_to_use=9,
+        seed=42,
+        randomize_seed=True,
+        guidance_scale=3.0,
+        improve_texture=True,
+        progress_callback=None,
+        external_decode=True,
+    ):
+        t_all = time.perf_counter()
+        print(f"[DEBUG] generate() begin mode={mode} external_decode={external_decode} improve_texture={improve_texture}")
+        if self.device == "cuda":
+            torch.cuda.empty_cache(); torch.cuda.reset_peak_memory_stats()
+        self._log_gpu_memory("Início da Geração")
+        if mode == "image-to-video" and not start_image_filepath:
+            raise ValueError("A imagem de início é obrigatória para o modo image-to-video")
+        if mode == "video-to-video" and not input_video_filepath:
+            raise ValueError("O vídeo de entrada é obrigatório para o modo video-to-video")
+        used_seed = random.randint(0, 2**32 - 1) if randomize_seed else int(seed)
+        seed_everething(used_seed); print(f"[DEBUG] Seed usado: {used_seed}")
+        FPS = 24.0; MAX_NUM_FRAMES = 2570
+        target_frames_rounded = round(duration * FPS)
+        n_val = round((float(target_frames_rounded) - 1.0) / 8.0)
+        actual_num_frames = max(9, min(MAX_NUM_FRAMES, int(n_val * 8 + 1)))
+        print(f"[DEBUG] Frames alvo: {actual_num_frames} (dur={duration}s @ {FPS}fps)")
+        height_padded = ((height - 1) // 32 + 1) * 32
+        width_padded = ((width - 1) // 32 + 1) * 32
+        padding_values = calculate_padding(height, width, height_padded, width_padded)
+        print(f"[DEBUG] Dimensões: ({height},{width}) -> pad ({height_padded},{width_padded}); padding={padding_values}")
+        generator = torch.Generator(device=self.device).manual_seed(used_seed)
+        conditioning_items = []
+        if mode == "image-to-video":
+            start_tensor = self._prepare_conditioning_tensor(start_image_filepath, height, width, padding_values)
+            conditioning_items.append(ConditioningItem(start_tensor, 0, 1.0))
+            if middle_image_filepath and middle_frame_number is not None:
+                middle_tensor = self._prepare_conditioning_tensor(middle_image_filepath, height, width, padding_values)
+                safe_middle_frame = max(0, min(int(middle_frame_number), actual_num_frames - 1))
+                conditioning_items.append(ConditioningItem(middle_tensor, safe_middle_frame, float(middle_image_weight)))
+            if end_image_filepath:
+                end_tensor = self._prepare_conditioning_tensor(end_image_filepath, height, width, padding_values)
+                last_frame_index = actual_num_frames - 1
+                conditioning_items.append(ConditioningItem(end_tensor, last_frame_index, float(end_image_weight)))
+            print(f"[DEBUG] Conditioning items: {len(conditioning_items)}")
+        call_kwargs = {
+            "prompt": prompt,
+            "negative_prompt": negative_prompt,
+            "height": height_padded,
+            "width": width_padded,
+            "num_frames": actual_num_frames,
+            "frame_rate": int(FPS),
+            "generator": generator,
+            "output_type": "latent",
+            "conditioning_items": conditioning_items if conditioning_items else None,
+            "media_items": None,
+            "decode_timestep": self.config["decode_timestep"],
+            "decode_noise_scale": self.config["decode_noise_scale"],
+            "stochastic_sampling": self.config["stochastic_sampling"],
+            "image_cond_noise_scale": 0.01,
+            "is_video": True,
+            "vae_per_channel_normalize": True,
+            "mixed_precision": (self.config["precision"] == "mixed_precision"),
+            "offload_to_cpu": False,
+            "enhance_prompt": False,
+            "skip_layer_strategy": SkipLayerStrategy.AttentionValues,
+        }
+        print(f"[DEBUG] output_type={call_kwargs['output_type']} skip_layer_strategy={call_kwargs['skip_layer_strategy']}")
+        if mode == "video-to-video":
+            media = load_media_file(
+                media_path=input_video_filepath,
+                height=height,
+                width=width,
+                max_frames=int(frames_to_use),
+                padding=padding_values,
+            ).to(self.device)
+            call_kwargs["media_items"] = media
+            print(f"[DEBUG] media_items shape={tuple(media.shape)}")
+        latents = None
+        try:
+            ctx = torch.autast(device_type="cuda", dtype=self.runtime_autocast_dtype) if self.device == "cuda" else contextlib.nullcontext()
+            if improve_texture:
+                if not self.latent_upsampler:
+                    raise ValueError("Upscaler espacial não carregado.")
+                # --- PASSO 1: GERAÇÃO DE LATENTES EM BAIXA RESOLUÇÃO ---
+                print("[DEBUG] Multi-escala: Iniciando Passo 1 (geração de latentes base).")
+                first_pass_args = self.config.get("first_pass", {}).copy()
+                first_pass_kwargs = call_kwargs.copy()
+                first_pass_kwargs.update({
+                    "guidance_scale": float(guidance_scale),
+                    "stg_scale": first_pass_args.get("stg_scale"),
+                    "rescaling_scale": first_pass_args.get("rescaling_scale"),
+                    "skip_block_list": first_pass_args.get("skip_block_list"),
+                })
+                schedule = first_pass_args.get("timesteps") or first_pass_args.get("guidance_timesteps")
+                if schedule:
+                    first_pass_kwargs["timesteps"] = schedule
+                    first_pass_kwargs["guidance_timesteps"] = schedule
+                downscale_factor = self.config.get("downscale_factor", 2)
+                original_height = first_pass_kwargs["height"]
+                original_width = first_pass_kwargs["width"]
+                divisor = 24
+                target_height_p1 = original_height // downscale_factor
+                height_p1 = round(target_height_p1 / divisor) * divisor
+                if height_p1 == 0: height_p1 = divisor
+                first_pass_kwargs["height"] = height_p1
+                target_width_p1 = original_width // downscale_factor
+                width_p1 = round(target_width_p1 / divisor) * divisor
+                if width_p1 == 0: width_p1 = divisor
+                first_pass_kwargs["width"] = width_p1
+                print(f"[DEBUG] Passo 1: Dimensões reduzidas e ajustadas para {height_p1}x{width_p1}")
+                with ctx:
+                    first_pass_result = self.pipeline(**first_pass_kwargs)
+                latents_low_res = first_pass_result.images
+                log_tensor_info(latents_low_res, "Latentes (Passo 1)")
+                del first_pass_result
+                gc.collect()
+                if self.device == "cuda": torch.cuda.empty_cache()
+                # --- PASSO INTERMEDIÁRIO: UPSCALE DOS LATENTES ---
+                print("[DEBUG] Multi-escala: Fazendo upscale dos latentes com latent_upsampler.")
+                with ctx:
+                    latents_high_res = self.latent_upsampler(latents_low_res)
+                log_tensor_info(latents_high_res, "Latentes (Pós-Upscale)")
+                del latents_low_res
+                gc.collect()
+                if self.device == "cuda": torch.cuda.empty_cache()
+                # --- PASSO 2: REFINAMENTO EM ALTA RESOLUÇÃO ---
+                print("[DEBUG] Multi-escala: Iniciando Passo 2 (refinamento em alta resolução).")
+                second_pass_args = self.config.get("second_pass", {}).copy()
+                second_pass_kwargs = call_kwargs.copy()
+                height_p2 = height_p1 * 2
+                width_p2 = width_p1 * 2
+                second_pass_kwargs["height"] = height_p2
+                second_pass_kwargs["width"] = width_p2
+                print(f"[DEBUG] Passo 2: Dimensões definidas para {height_p2}x{width_p2} para corresponder ao upscale.")
+                second_pass_kwargs.update({
+                    "guidance_scale": float(guidance_scale),
+                    "stg_scale": second_pass_args.get("stg_scale"),
+                    "rescaling_scale": second_pass_args.get("rescaling_scale"),
+                    "skip_block_list": second_pass_args.get("skip_block_list"),
+                })
+                schedule_p2 = second_pass_args.get("timesteps") or second_pass_args.get("guidance_timesteps")
+                if schedule_p2:
+                    timesteps_para_refinamento = schedule_p2
+                    print(f"[DEBUG] Passo 2: Usando {len(timesteps_para_refinamento)} timesteps pré-definidos do config para refinamento.")
+                else:
+                    strength_p2 = second_pass_args.get("strength", second_pass_args.get("denoising_strength", 0.4))
+                    num_steps_passo2_total = second_pass_args.get("num_inference_steps", 20)
+                    self.pipeline.scheduler.set_timesteps(num_steps_passo2_total, device=self.device)
+                    todos_os_timesteps_p2 = self.pipeline.scheduler.timesteps
+                    ponto_de_corte = int(len(todos_os_timesteps_p2) * (1.0 - strength_p2))
+                    timesteps_para_refinamento = todos_os_timesteps_p2[ponto_de_corte:]
+                    print(f"[DEBUG] Passo 2: Calculando {len(timesteps_para_refinamento)} timesteps manuais (strength ≈ {strength_p2})")
+                second_pass_kwargs["timesteps"] = timesteps_para_refinamento
+                if "strength" in second_pass_kwargs: del second_pass_kwargs["strength"]
+                second_pass_kwargs["latents"] = latents_high_res
+                num_timesteps_p2 = len(timesteps_para_refinamento)
+                if 'guidance_mapping' not in second_pass_kwargs:
+                     second_pass_kwargs['guidance_mapping'] = list(range(num_timesteps_p2))
+                print(f"[DEBUG] Passo 2: Injetando 'guidance_mapping' de identidade com {num_timesteps_p2} passos.")
+                with ctx:
+                    second_pass_result = self.pipeline(**second_pass_kwargs)
+                latents = second_pass_result.images
+                log_tensor_info(latents, "Latentes Finais (Passo 2)")
+            else:
+                # --- PASSO ÚNICO (SINGLE-PASS) ---
+                single_pass_kwargs = call_kwargs.copy()
+                first_pass_config = self.config.get("first_pass", {})
+                single_pass_kwargs.update({
+                    "guidance_scale": float(guidance_scale),
+                    "stg_scale": first_pass_config.get("stg_scale"),
+                    "rescaling_scale": first_pass_config.get("rescaling_scale"),
+                    "skip_block_list": first_pass_config.get("skip_block_list"),
+                })
+                schedule = first_pass_config.get("timesteps") or first_pass_config.get("guidance_timesteps")
+                if mode == "video-to-video":
+                    schedule = [0.7]; print("[INFO] Modo video-to-video (etapa única): timesteps=[0.7]")
+                if isinstance(schedule, (list, tuple)) and len(schedule) > 0:
+                    single_pass_kwargs["timesteps"] = schedule
+                    single_pass_kwargs["guidance_timesteps"] = schedule
+                print(f"[DEBUG] Single-pass: timesteps_len={len(schedule) if schedule else 0}")
+                print("\n[INFO] Executando pipeline de etapa única...")
+                with ctx:
+                    result = self.pipeline(**single_pass_kwargs)
+                latents = result.images
+                print(f"[DEBUG] Latentes (single-pass): shape={tuple(latents.shape)}")
+            # --- DECODIFICAÇÃO E CODIFICAÇÃO DE VÍDEO FINAL ---
+            latents_cpu = latents.detach().to("cpu", non_blocking=True)
+            if self.device == "cuda":
+                torch.cuda.empty_cache()
+                try: torch.cuda.ipc_collect()
+                except Exception: pass
+            lat_a, lat_b = self._dividir_latentes(latents_cpu)
+            lat_a1, lat_a2 = self._dividir_latentes(lat_a)
+            lat_b1, lat_b2 = self._dividir_latentes(lat_b)
+            latents_parts = [lat_a1, lat_a2, lat_b1, lat_b2]
+            temp_dir = tempfile.mkdtemp(prefix="ltxv_"); self._register_tmp_dir(temp_dir)
+            results_dir = "/app/output"; os.makedirs(results_dir, exist_ok=True)
+            partes_mp4 = []
+            par = 0
+            for part in latents_parts:
+                par += 1
+                if part is None: continue
+                print(f"[DEBUG] Partição {par}: {tuple(part.shape)}")
+                output_video_path = os.path.join(temp_dir, f"output_{used_seed}_{par}.mp4")
+                print("[DEBUG] Decodificando bloco de latentes com VAE → tensor de pixels...")
+                pixel_tensor = vae_manager_singleton.decode(
+                    part.to(self.device, non_blocking=True),
+                    decode_timestep=float(self.config.get("decode_timestep", 0.05))
+                )
+                log_tensor_info(pixel_tensor, "Pixel tensor (VAE saída)")
+                print("[DEBUG] Codificando MP4 a partir do tensor de pixels...")
+                video_encode_tool_singleton.save_video_from_tensor(
+                    pixel_tensor,
+                    output_video_path,
+                    fps=call_kwargs["frame_rate"],
+                    progress_callback=progress_callback
+                )
+                candidate = os.path.join(results_dir, f"output_par_{par}.mp4")
+                try:
+                    shutil.move(output_video_path, candidate)
+                    print(f"[DEBUG] MP4 parte {par} movido para {candidate}")
+                    partes_mp4.append(candidate)
+                except Exception as e:
+                    print(f"[DEBUG] Falha no move; usando tmp como final: {e}")
+                    partes_mp4.append(output_video_path)
+            final_concat = os.path.join(results_dir, f"concat_fim_{used_seed}.mp4")
+            if partes_mp4:
+                if len(partes_mp4) == 1:
+                    shutil.move(partes_mp4[0], final_concat)
+                    print(f"[DEBUG] Apenas uma parte, movida para {final_concat}")
+                else:
+                    self._concat_mp4s_no_reencode(partes_mp4, final_concat)
+            else:
+                print("[WARN] Nenhuma parte de vídeo foi gerada para concatenar.")
+                return None, used_seed
+            self._log_gpu_memory("Fim da Geração")
+            return final_concat, used_seed
         except Exception as e:
             print("[DEBUG] EXCEÇÃO NA GERAÇÃO:")
             print("".join(traceback.format_exception(type(e), e, e.__traceback__)))
             raise
         finally:
+            # Limpeza de variáveis para liberar memória
+            try: del latents, latents_low_res, latents_high_res, second_pass_result
+            except NameError: pass
+            except Exception as e: print(f"[DEBUG] Erro na limpeza de variáveis: {e}")
             gc.collect()
             if self.device == "cuda":