Test

Paused

App Files Files Community

eeuuia commited on Oct 15

Commit

226818a

verified ·

1 Parent(s): b743563

Update app.py

Browse files

Files changed (1) hide show

app.py +60 -57

app.py CHANGED Viewed

@@ -59,20 +59,42 @@ local_repo_path = snapshot_download(
 # 3. Carregar cada componente da pipeline explicitamente
 print("=== Carregando componentes da pipeline... ===")
-vae = AutoencoderKLLTXVideo.from_pretrained(local_repo_path, subfolder="vae", torch_dtype=torch_dtype)
-text_encoder = T5EncoderModel.from_pretrained(local_repo_path, subfolder="text_encoder", torch_dtype=torch_dtype)
-tokenizer = T5TokenizerFast.from_pretrained(local_repo_path, subfolder="tokenizer")
-scheduler = FlowMatchEulerDiscreteScheduler.from_pretrained(local_repo_path, subfolder="scheduler")
-# Correção para o erro 'mu': desativar explicitamente o dynamic shifting
 if hasattr(scheduler.config, 'use_dynamic_shifting') and scheduler.config.use_dynamic_shifting:
     print("[Config] Desativando 'use_dynamic_shifting' no scheduler.")
     scheduler.config.use_dynamic_shifting = False
-print(f"Carregando pesos do Transformer de: {checkpoint_path}")
-transformer = LTXVideoTransformer3DModel.from_pretrained(
-    local_repo_path, subfolder="transformer", weight_name=checkpoint_path, torch_dtype=torch_dtype
 )
 # 4. Montar a pipeline principal
 print("Montando a LTXConditionPipeline...")
@@ -162,60 +184,41 @@ def prepare_and_generate_video(
             guidance_scale=guidance_scale,
             guidance_rescale=0.7,
             generator=torch.Generator().manual_seed(seed),
-            output_type="latent",
             **pipeline_args
-        ).frames
         # ETAPA 2: Upscale dos latentes
-        upscaled_height, upscaled_width = downscaled_height * 2, downscaled_width * 2
-        upscaled_latents = pipe_upsample(
-            latents=latents,
-            output_type="latent"
-        ).frames
-        conditions = []
-        if condition_image_1 is not None:
-            condition_image_1 = ImageOps.fit(condition_image_1, (upscaled_width, upscaled_height), Image.LANCZOS)
-            conditions.append(LTXVideoCondition(
-                image=condition_image_1,
-                strength=condition_strength_1,
-                frame_index=int(condition_frame_index_1)
-            ))
-        if condition_image_2 is not None:
-            condition_image_2 = ImageOps.fit(condition_image_2, (upscaled_width, upscaled_height), Image.LANCZOS)
-            conditions.append(LTXVideoCondition(
-                image=condition_image_2,
-                strength=condition_strength_2,
-                frame_index=int(condition_frame_index_2)
-            ))
-        pipeline_args = {}
-        if conditions:
-            pipeline_args["conditions"] = conditions
         # ETAPA 3: Denoise final em alta resolução
-        final_video_frames_np = pipeline(
-            prompt=prompt,
-            negative_prompt=negative_prompt,
-            width=upscaled_width,
-            height=upscaled_height,
-            num_frames=num_frames,
-            denoise_strength=0.999,
-            timesteps=[1000, 909, 725, 421, 0],
-            latents=upscaled_latents,
-            decode_timestep=0.05,
-            decode_noise_scale=0.025,
-            image_cond_noise_scale=0.0,
-            guidance_scale=guidance_scale,
-            guidance_rescale=0.7,
-            generator=torch.Generator(device="cuda").manual_seed(seed),
-            output_type="np",
-            **pipeline_args
-        ).frames[0]
         # Exportação para arquivo MP4
         video_uint8_frames = [(frame * 255).astype(np.uint8) for frame in final_video_frames_np]

 # 3. Carregar cada componente da pipeline explicitamente
 print("=== Carregando componentes da pipeline... ===")
+vae = AutoModel.from_pretrained(
+    "Lightricks/LTX-Video",
+    subfolder="vae",
+    torch_dtype=torch_dtype
+)
+text_encoder = AutoModel.from_pretrained(
+    "Lightricks/LTX-Video",
+    subfolder="text_encoder",
+    torch_dtype=torch_dtype
+)
+scheduler = AutoModel.from_pretrained(
+    "Lightricks/LTX-Video",
+    subfolder="scheduler",
+    torch_dtype=torch_dtype
+)
+tokenizer = AutoModel.from_pretrained(
+    "Lightricks/LTX-Video",
+    subfolder="tokenizer",
+    torch_dtype=torch_dtype
+)
 if hasattr(scheduler.config, 'use_dynamic_shifting') and scheduler.config.use_dynamic_shifting:
     print("[Config] Desativando 'use_dynamic_shifting' no scheduler.")
     scheduler.config.use_dynamic_shifting = False
+transformer = AutoModel.from_pretrained(
+    "Lightricks/LTX-Video",
+    subfolder="transformer",
+    torch_dtype=torch.bfloat16
 )
+transformer.enable_layerwise_casting(
+    storage_dtype=torch.float8_e4m3fn, compute_dtype=torch.bfloat16
+)
 # 4. Montar a pipeline principal
 print("Montando a LTXConditionPipeline...")
             guidance_scale=guidance_scale,
             guidance_rescale=0.7,
             generator=torch.Generator().manual_seed(seed),
+            #output_type="latent",
+            output_type="np",
             **pipeline_args
+        ).frames[0]
         # ETAPA 2: Upscale dos latentes
+        #upscaled_height, upscaled_width = downscaled_height * 2, downscaled_width * 2
+        #upscaled_latents = pipe_upsample(
+        #    latents=latents,
+        #    output_type="latent"
+        #).frames
         # ETAPA 3: Denoise final em alta resolução
+        if false:
+            final_video_frames_np = pipeline(
+                prompt=prompt,
+                negative_prompt=negative_prompt,
+                width=upscaled_width,
+                height=upscaled_height,
+                num_frames=num_frames,
+                denoise_strength=0.999,
+                timesteps=[1000, 909, 725, 421, 0],
+                latents=upscaled_latents,
+                decode_timestep=0.05,
+                decode_noise_scale=0.025,
+                image_cond_noise_scale=0.0,
+                guidance_scale=guidance_scale,
+                guidance_rescale=0.7,
+                generator=torch.Generator(device="cuda").manual_seed(seed),
+                output_type="np",
+                **pipeline_args
+            ).frames[0]
+        else:
+            final_video_frames_np = latents
         # Exportação para arquivo MP4
         video_uint8_frames = [(frame * 255).astype(np.uint8) for frame in final_video_frames_np]