TestingwithNeg

Running on Zero

App Files Files Community

dagloop5 commited on May 27

Commit

b30f6da

verified ·

1 Parent(s): 809e547

Update app.py

Browse files

Files changed (1) hide show

app.py +4 -53

app.py CHANGED Viewed

@@ -214,7 +214,7 @@ class LTX23DistilledA2VPipeline:
         # Stage 1: Generate sigmas using LTX2Scheduler with user-specified steps
         empty_latent = torch.empty(VideoLatentShape.from_pixel_shape(
-            VideoPixelShape(batch=1, frames=num_frames, width=width // 2, height=height // 2, fps=frame_rate)
         ).to_torch_shape())
         stage_1_sigmas = (
             LTX2Scheduler()
@@ -243,25 +243,12 @@ class LTX23DistilledA2VPipeline:
                 ),
             )
-        def stage2_denoising_loop(sigmas: torch.Tensor, video_state, audio_state, stepper: DiffusionStepProtocol):
-            return res2s_audio_video_denoising_loop(
-                sigmas=sigmas,
-                video_state=video_state,
-                audio_state=audio_state,
-                stepper=stepper,
-                denoise_fn=simple_denoising_func(
-                    video_context=v_context_p,
-                    audio_context=a_context_p,
-                    transformer=transformer,  # noqa: F821
-                ),
-            )
         # ── Stage 1: Half resolution ──
         stage_1_output_shape = VideoPixelShape(
             batch=1,
             frames=num_frames,
-            width=width // 2,
-            height=height // 2,
             fps=frame_rate,
         )
         stage_1_conditionings = combined_image_conditionings(
@@ -291,42 +278,6 @@ class LTX23DistilledA2VPipeline:
         torch.cuda.synchronize()
         cleanup_memory()
-        # ── Upscaling ──
-        upscaled_video_latent = upsample_video(
-            latent=video_state.latent[:1],
-            video_encoder=video_encoder,
-            upsampler=self.model_ledger.spatial_upsampler(),
-        )
-        # ── Stage 2: Full resolution ──
-        stage_2_sigmas = torch.tensor(STAGE_2_DISTILLED_SIGMA_VALUES, device=self.device)
-        stage_2_output_shape = VideoPixelShape(batch=1, frames=num_frames, width=width, height=height, fps=frame_rate)
-        stage_2_conditionings = combined_image_conditionings(
-            images=images,
-            height=stage_2_output_shape.height,
-            width=stage_2_output_shape.width,
-            video_encoder=video_encoder,
-            dtype=dtype,
-            device=self.device,
-        )
-        video_state, audio_state = denoise_audio_video(
-            output_shape=stage_2_output_shape,
-            conditionings=stage_2_conditionings,
-            noiser=noiser,
-            sigmas=stage_2_sigmas,
-            stepper=stepper,
-            denoising_loop_fn=stage2_denoising_loop,
-            components=self.pipeline_components,
-            dtype=dtype,
-            device=self.device,
-            noise_scale=stage_2_sigmas[0],
-            initial_video_latent=upscaled_video_latent,
-            initial_audio_latent=audio_state.latent,
-        )
-        torch.cuda.synchronize()
-        cleanup_memory()
         # ── Decode both video and audio ──
         decoded_video = vae_decode_video(
             video_state.latent,
@@ -856,7 +807,7 @@ css = """
 """
 with gr.Blocks(title="LTX-2.3 Distilled with LoRAs, Negative Prompting, and Advanced Settings") as demo:
-    gr.Markdown("# LTX-2.3 Two-Stage HQ Video Generation")
     gr.Markdown(
         "High-quality text/image-to-video with cached LoRA state + CFG guidance. "
         "[[Model]](https://huggingface.co/Lightricks/LTX-2.3)"

         # Stage 1: Generate sigmas using LTX2Scheduler with user-specified steps
         empty_latent = torch.empty(VideoLatentShape.from_pixel_shape(
+            VideoPixelShape(batch=1, frames=num_frames, width=width, height=height, fps=frame_rate)
         ).to_torch_shape())
         stage_1_sigmas = (
             LTX2Scheduler()
                 ),
             )
         # ── Stage 1: Half resolution ──
         stage_1_output_shape = VideoPixelShape(
             batch=1,
             frames=num_frames,
+            width=width,
+            height=height,
             fps=frame_rate,
         )
         stage_1_conditionings = combined_image_conditionings(
         torch.cuda.synchronize()
         cleanup_memory()
         # ── Decode both video and audio ──
         decoded_video = vae_decode_video(
             video_state.latent,
 """
 with gr.Blocks(title="LTX-2.3 Distilled with LoRAs, Negative Prompting, and Advanced Settings") as demo:
+    gr.Markdown("# LTX-2.3 One-Stage HQ Video Generation")
     gr.Markdown(
         "High-quality text/image-to-video with cached LoRA state + CFG guidance. "
         "[[Model]](https://huggingface.co/Lightricks/LTX-2.3)"