dagloop5 commited on
Commit
b30f6da
Β·
verified Β·
1 Parent(s): 809e547

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +4 -53
app.py CHANGED
@@ -214,7 +214,7 @@ class LTX23DistilledA2VPipeline:
214
 
215
  # Stage 1: Generate sigmas using LTX2Scheduler with user-specified steps
216
  empty_latent = torch.empty(VideoLatentShape.from_pixel_shape(
217
- VideoPixelShape(batch=1, frames=num_frames, width=width // 2, height=height // 2, fps=frame_rate)
218
  ).to_torch_shape())
219
  stage_1_sigmas = (
220
  LTX2Scheduler()
@@ -243,25 +243,12 @@ class LTX23DistilledA2VPipeline:
243
  ),
244
  )
245
 
246
- def stage2_denoising_loop(sigmas: torch.Tensor, video_state, audio_state, stepper: DiffusionStepProtocol):
247
- return res2s_audio_video_denoising_loop(
248
- sigmas=sigmas,
249
- video_state=video_state,
250
- audio_state=audio_state,
251
- stepper=stepper,
252
- denoise_fn=simple_denoising_func(
253
- video_context=v_context_p,
254
- audio_context=a_context_p,
255
- transformer=transformer, # noqa: F821
256
- ),
257
- )
258
-
259
  # ── Stage 1: Half resolution ──
260
  stage_1_output_shape = VideoPixelShape(
261
  batch=1,
262
  frames=num_frames,
263
- width=width // 2,
264
- height=height // 2,
265
  fps=frame_rate,
266
  )
267
  stage_1_conditionings = combined_image_conditionings(
@@ -291,42 +278,6 @@ class LTX23DistilledA2VPipeline:
291
  torch.cuda.synchronize()
292
  cleanup_memory()
293
 
294
- # ── Upscaling ──
295
- upscaled_video_latent = upsample_video(
296
- latent=video_state.latent[:1],
297
- video_encoder=video_encoder,
298
- upsampler=self.model_ledger.spatial_upsampler(),
299
- )
300
-
301
- # ── Stage 2: Full resolution ──
302
- stage_2_sigmas = torch.tensor(STAGE_2_DISTILLED_SIGMA_VALUES, device=self.device)
303
- stage_2_output_shape = VideoPixelShape(batch=1, frames=num_frames, width=width, height=height, fps=frame_rate)
304
- stage_2_conditionings = combined_image_conditionings(
305
- images=images,
306
- height=stage_2_output_shape.height,
307
- width=stage_2_output_shape.width,
308
- video_encoder=video_encoder,
309
- dtype=dtype,
310
- device=self.device,
311
- )
312
- video_state, audio_state = denoise_audio_video(
313
- output_shape=stage_2_output_shape,
314
- conditionings=stage_2_conditionings,
315
- noiser=noiser,
316
- sigmas=stage_2_sigmas,
317
- stepper=stepper,
318
- denoising_loop_fn=stage2_denoising_loop,
319
- components=self.pipeline_components,
320
- dtype=dtype,
321
- device=self.device,
322
- noise_scale=stage_2_sigmas[0],
323
- initial_video_latent=upscaled_video_latent,
324
- initial_audio_latent=audio_state.latent,
325
- )
326
-
327
- torch.cuda.synchronize()
328
- cleanup_memory()
329
-
330
  # ── Decode both video and audio ──
331
  decoded_video = vae_decode_video(
332
  video_state.latent,
@@ -856,7 +807,7 @@ css = """
856
  """
857
 
858
  with gr.Blocks(title="LTX-2.3 Distilled with LoRAs, Negative Prompting, and Advanced Settings") as demo:
859
- gr.Markdown("# LTX-2.3 Two-Stage HQ Video Generation")
860
  gr.Markdown(
861
  "High-quality text/image-to-video with cached LoRA state + CFG guidance. "
862
  "[[Model]](https://huggingface.co/Lightricks/LTX-2.3)"
 
214
 
215
  # Stage 1: Generate sigmas using LTX2Scheduler with user-specified steps
216
  empty_latent = torch.empty(VideoLatentShape.from_pixel_shape(
217
+ VideoPixelShape(batch=1, frames=num_frames, width=width, height=height, fps=frame_rate)
218
  ).to_torch_shape())
219
  stage_1_sigmas = (
220
  LTX2Scheduler()
 
243
  ),
244
  )
245
 
 
 
 
 
 
 
 
 
 
 
 
 
 
246
  # ── Stage 1: Half resolution ──
247
  stage_1_output_shape = VideoPixelShape(
248
  batch=1,
249
  frames=num_frames,
250
+ width=width,
251
+ height=height,
252
  fps=frame_rate,
253
  )
254
  stage_1_conditionings = combined_image_conditionings(
 
278
  torch.cuda.synchronize()
279
  cleanup_memory()
280
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
281
  # ── Decode both video and audio ──
282
  decoded_video = vae_decode_video(
283
  video_state.latent,
 
807
  """
808
 
809
  with gr.Blocks(title="LTX-2.3 Distilled with LoRAs, Negative Prompting, and Advanced Settings") as demo:
810
+ gr.Markdown("# LTX-2.3 One-Stage HQ Video Generation")
811
  gr.Markdown(
812
  "High-quality text/image-to-video with cached LoRA state + CFG guidance. "
813
  "[[Model]](https://huggingface.co/Lightricks/LTX-2.3)"