Arrokothwhi commited on
Commit
05c70dd
·
1 Parent(s): 625a3af
Files changed (1) hide show
  1. app.py +49 -65
app.py CHANGED
@@ -73,10 +73,7 @@ def load_generation_pipe():
73
  image_encoder=image_encoder,
74
  torch_dtype=PIPE_DTYPE,
75
  )
76
- if DEVICE == "cuda":
77
- pipe.enable_model_cpu_offload()
78
- else:
79
- pipe = pipe.to(DEVICE)
80
  return pipe
81
 
82
 
@@ -282,10 +279,12 @@ def generate_and_decode(image, prompt, seed, progress=gr.Progress(track_tqdm=Fal
282
  f"Latents: {tuple(latents.shape)}"
283
  )
284
  progress(1.0, desc="Done")
285
- return str(latent_path), wan_video_path, ref_video_path, status
286
 
287
 
288
  CUSTOM_CSS = """
 
 
289
  :root {
290
  --page-bg: #f4f1e8;
291
  --card-bg: rgba(255, 252, 246, 0.92);
@@ -294,6 +293,7 @@ CUSTOM_CSS = """
294
  --accent-2: #c96f42;
295
  --text-main: #201a14;
296
  --text-soft: #5c5348;
 
297
  }
298
 
299
  .gradio-container {
@@ -348,6 +348,7 @@ CUSTOM_CSS = """
348
  color: var(--text-soft);
349
  font-size: 17px;
350
  line-height: 1.6;
 
351
  }
352
 
353
  .panel-card,
@@ -367,6 +368,7 @@ CUSTOM_CSS = """
367
  color: var(--text-soft);
368
  font-size: 14px;
369
  line-height: 1.55;
 
370
  }
371
 
372
  .compare-note {
@@ -398,7 +400,6 @@ with gr.Blocks(title="RefDecoder I2V Demo", theme=gr.themes.Soft(), css=CUSTOM_C
398
  gr.HTML(
399
  """
400
  <div class="hero-card">
401
- <div class="hero-kicker">Image To Video Comparison</div>
402
  <div class="hero-title">RefDecoder I2V Demo</div>
403
  <p class="hero-copy">
404
  Upload one image, optionally add a motion prompt, and compare two decoders on the same Wan latent video.
@@ -428,73 +429,56 @@ with gr.Blocks(title="RefDecoder I2V Demo", theme=gr.themes.Soft(), css=CUSTOM_C
428
  lines=5,
429
  placeholder="A woman turns toward the camera as her hair moves in the wind...",
430
  )
431
- with gr.Row():
432
- seed_input = gr.Number(
433
- label="Seed",
434
- value=None,
435
- precision=0,
436
- info="Optional",
437
- )
438
- run_button = gr.Button(
439
- "Generate Latents and Compare Decoders",
440
- variant="primary",
441
- elem_id="generate-btn",
442
- )
443
-
444
- gr.Examples(
445
- examples=[
446
- ["A calm portrait shot with subtle blinking and gentle camera drift.", None],
447
- ["A dramatic push-in as cloth and hair sway in the breeze.", 7],
448
- ["", None],
449
- ],
450
- inputs=[prompt_input, seed_input],
451
- label="Quick Prompt Starters",
452
  )
453
-
454
- with gr.Column(scale=4, elem_classes="panel-card"):
455
- gr.HTML(
456
- """
457
- <div class="section-title">What This Runs</div>
458
- <div class="compare-note">
459
- Step 1: Generate Wan I2V latents from the uploaded image.<br>
460
- Step 2: Decode the same latents with Wan VAE.<br>
461
- Step 3: Decode the same latents with RefDecoder.
462
- </div>
463
- <div class="section-copy">
464
- This is a comparison demo, not a general editing UI. The two videos differ only in the decoder used on the same latent representation.
465
- </div>
466
- """
467
- )
468
- status_output = gr.Textbox(
469
- label="Run Info",
470
- lines=7,
471
- interactive=False,
472
  )
473
- latent_output = gr.File(label="Wan Latents (.pt)")
474
 
475
- with gr.Row(equal_height=True):
476
- with gr.Column(elem_classes="output-card"):
477
- gr.HTML(
478
- """
479
- <div class="section-title">Wan Baseline</div>
480
- <div class="section-copy">Decoded with Wan2.1's original VAE.</div>
481
- """
482
- )
483
- wan_video_output = gr.Video(label="Wan VAE Decode", height=420)
 
 
 
 
 
 
484
 
485
- with gr.Column(elem_classes="output-card"):
486
- gr.HTML(
487
- """
488
- <div class="section-title">RefDecoder Result</div>
489
- <div class="section-copy">Decoded with the custom RefDecoder checkpoint.</div>
490
- """
491
- )
492
- ref_video_output = gr.Video(label="RefDecoder Decode", height=420)
 
 
 
 
 
 
 
 
 
493
 
494
  run_button.click(
495
  fn=generate_and_decode,
496
  inputs=[image_input, prompt_input, seed_input],
497
- outputs=[latent_output, wan_video_output, ref_video_output, status_output],
498
  )
499
 
500
 
 
73
  image_encoder=image_encoder,
74
  torch_dtype=PIPE_DTYPE,
75
  )
76
+ pipe = pipe.to(DEVICE)
 
 
 
77
  return pipe
78
 
79
 
 
279
  f"Latents: {tuple(latents.shape)}"
280
  )
281
  progress(1.0, desc="Done")
282
+ return wan_video_path, ref_video_path, status
283
 
284
 
285
  CUSTOM_CSS = """
286
+ @import url('https://fonts.googleapis.com/css2?family=Fraunces:opsz,wght@9..144,450;9..144,550&display=swap');
287
+
288
  :root {
289
  --page-bg: #f4f1e8;
290
  --card-bg: rgba(255, 252, 246, 0.92);
 
293
  --accent-2: #c96f42;
294
  --text-main: #201a14;
295
  --text-soft: #5c5348;
296
+ --copy-font: "Fraunces", "Iowan Old Style", "Palatino Linotype", serif;
297
  }
298
 
299
  .gradio-container {
 
348
  color: var(--text-soft);
349
  font-size: 17px;
350
  line-height: 1.6;
351
+ font-family: var(--copy-font);
352
  }
353
 
354
  .panel-card,
 
368
  color: var(--text-soft);
369
  font-size: 14px;
370
  line-height: 1.55;
371
+ font-family: var(--copy-font);
372
  }
373
 
374
  .compare-note {
 
400
  gr.HTML(
401
  """
402
  <div class="hero-card">
 
403
  <div class="hero-title">RefDecoder I2V Demo</div>
404
  <p class="hero-copy">
405
  Upload one image, optionally add a motion prompt, and compare two decoders on the same Wan latent video.
 
429
  lines=5,
430
  placeholder="A woman turns toward the camera as her hair moves in the wind...",
431
  )
432
+ seed_input = gr.Number(
433
+ label="Seed",
434
+ value=None,
435
+ precision=0,
436
+ info="Optional",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
437
  )
438
+ run_button = gr.Button(
439
+ "Generate Latents and Compare Decoders",
440
+ variant="primary",
441
+ elem_id="generate-btn",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
442
  )
 
443
 
444
+ with gr.Column(scale=6):
445
+ with gr.Column(elem_classes="panel-card"):
446
+ gr.HTML(
447
+ """
448
+ <div class="section-title">Run Info</div>
449
+ <div class="section-copy">
450
+ Generation details for the current comparison run.
451
+ </div>
452
+ """
453
+ )
454
+ status_output = gr.Textbox(
455
+ label="Run Info",
456
+ lines=7,
457
+ interactive=False,
458
+ )
459
 
460
+ with gr.Column(elem_classes="output-card"):
461
+ gr.HTML(
462
+ """
463
+ <div class="section-title">Wan Baseline</div>
464
+ <div class="section-copy">Decoded with Wan2.1's original VAE.</div>
465
+ """
466
+ )
467
+ wan_video_output = gr.Video(label="Wan VAE Decode", height=260)
468
+
469
+ with gr.Column(elem_classes="output-card"):
470
+ gr.HTML(
471
+ """
472
+ <div class="section-title">RefDecoder Result</div>
473
+ <div class="section-copy">Decoded with the custom RefDecoder checkpoint.</div>
474
+ """
475
+ )
476
+ ref_video_output = gr.Video(label="RefDecoder Decode", height=260)
477
 
478
  run_button.click(
479
  fn=generate_and_decode,
480
  inputs=[image_input, prompt_input, seed_input],
481
+ outputs=[wan_video_output, ref_video_output, status_output],
482
  )
483
 
484