Spaces:
Configuration error
Configuration error
Commit ·
05c70dd
1
Parent(s): 625a3af
update
Browse files
app.py
CHANGED
|
@@ -73,10 +73,7 @@ def load_generation_pipe():
|
|
| 73 |
image_encoder=image_encoder,
|
| 74 |
torch_dtype=PIPE_DTYPE,
|
| 75 |
)
|
| 76 |
-
|
| 77 |
-
pipe.enable_model_cpu_offload()
|
| 78 |
-
else:
|
| 79 |
-
pipe = pipe.to(DEVICE)
|
| 80 |
return pipe
|
| 81 |
|
| 82 |
|
|
@@ -282,10 +279,12 @@ def generate_and_decode(image, prompt, seed, progress=gr.Progress(track_tqdm=Fal
|
|
| 282 |
f"Latents: {tuple(latents.shape)}"
|
| 283 |
)
|
| 284 |
progress(1.0, desc="Done")
|
| 285 |
-
return
|
| 286 |
|
| 287 |
|
| 288 |
CUSTOM_CSS = """
|
|
|
|
|
|
|
| 289 |
:root {
|
| 290 |
--page-bg: #f4f1e8;
|
| 291 |
--card-bg: rgba(255, 252, 246, 0.92);
|
|
@@ -294,6 +293,7 @@ CUSTOM_CSS = """
|
|
| 294 |
--accent-2: #c96f42;
|
| 295 |
--text-main: #201a14;
|
| 296 |
--text-soft: #5c5348;
|
|
|
|
| 297 |
}
|
| 298 |
|
| 299 |
.gradio-container {
|
|
@@ -348,6 +348,7 @@ CUSTOM_CSS = """
|
|
| 348 |
color: var(--text-soft);
|
| 349 |
font-size: 17px;
|
| 350 |
line-height: 1.6;
|
|
|
|
| 351 |
}
|
| 352 |
|
| 353 |
.panel-card,
|
|
@@ -367,6 +368,7 @@ CUSTOM_CSS = """
|
|
| 367 |
color: var(--text-soft);
|
| 368 |
font-size: 14px;
|
| 369 |
line-height: 1.55;
|
|
|
|
| 370 |
}
|
| 371 |
|
| 372 |
.compare-note {
|
|
@@ -398,7 +400,6 @@ with gr.Blocks(title="RefDecoder I2V Demo", theme=gr.themes.Soft(), css=CUSTOM_C
|
|
| 398 |
gr.HTML(
|
| 399 |
"""
|
| 400 |
<div class="hero-card">
|
| 401 |
-
<div class="hero-kicker">Image To Video Comparison</div>
|
| 402 |
<div class="hero-title">RefDecoder I2V Demo</div>
|
| 403 |
<p class="hero-copy">
|
| 404 |
Upload one image, optionally add a motion prompt, and compare two decoders on the same Wan latent video.
|
|
@@ -428,73 +429,56 @@ with gr.Blocks(title="RefDecoder I2V Demo", theme=gr.themes.Soft(), css=CUSTOM_C
|
|
| 428 |
lines=5,
|
| 429 |
placeholder="A woman turns toward the camera as her hair moves in the wind...",
|
| 430 |
)
|
| 431 |
-
|
| 432 |
-
|
| 433 |
-
|
| 434 |
-
|
| 435 |
-
|
| 436 |
-
info="Optional",
|
| 437 |
-
)
|
| 438 |
-
run_button = gr.Button(
|
| 439 |
-
"Generate Latents and Compare Decoders",
|
| 440 |
-
variant="primary",
|
| 441 |
-
elem_id="generate-btn",
|
| 442 |
-
)
|
| 443 |
-
|
| 444 |
-
gr.Examples(
|
| 445 |
-
examples=[
|
| 446 |
-
["A calm portrait shot with subtle blinking and gentle camera drift.", None],
|
| 447 |
-
["A dramatic push-in as cloth and hair sway in the breeze.", 7],
|
| 448 |
-
["", None],
|
| 449 |
-
],
|
| 450 |
-
inputs=[prompt_input, seed_input],
|
| 451 |
-
label="Quick Prompt Starters",
|
| 452 |
)
|
| 453 |
-
|
| 454 |
-
|
| 455 |
-
|
| 456 |
-
""
|
| 457 |
-
<div class="section-title">What This Runs</div>
|
| 458 |
-
<div class="compare-note">
|
| 459 |
-
Step 1: Generate Wan I2V latents from the uploaded image.<br>
|
| 460 |
-
Step 2: Decode the same latents with Wan VAE.<br>
|
| 461 |
-
Step 3: Decode the same latents with RefDecoder.
|
| 462 |
-
</div>
|
| 463 |
-
<div class="section-copy">
|
| 464 |
-
This is a comparison demo, not a general editing UI. The two videos differ only in the decoder used on the same latent representation.
|
| 465 |
-
</div>
|
| 466 |
-
"""
|
| 467 |
-
)
|
| 468 |
-
status_output = gr.Textbox(
|
| 469 |
-
label="Run Info",
|
| 470 |
-
lines=7,
|
| 471 |
-
interactive=False,
|
| 472 |
)
|
| 473 |
-
latent_output = gr.File(label="Wan Latents (.pt)")
|
| 474 |
|
| 475 |
-
|
| 476 |
-
|
| 477 |
-
|
| 478 |
-
|
| 479 |
-
|
| 480 |
-
|
| 481 |
-
|
| 482 |
-
|
| 483 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 484 |
|
| 485 |
-
|
| 486 |
-
|
| 487 |
-
|
| 488 |
-
|
| 489 |
-
|
| 490 |
-
|
| 491 |
-
|
| 492 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 493 |
|
| 494 |
run_button.click(
|
| 495 |
fn=generate_and_decode,
|
| 496 |
inputs=[image_input, prompt_input, seed_input],
|
| 497 |
-
outputs=[
|
| 498 |
)
|
| 499 |
|
| 500 |
|
|
|
|
| 73 |
image_encoder=image_encoder,
|
| 74 |
torch_dtype=PIPE_DTYPE,
|
| 75 |
)
|
| 76 |
+
pipe = pipe.to(DEVICE)
|
|
|
|
|
|
|
|
|
|
| 77 |
return pipe
|
| 78 |
|
| 79 |
|
|
|
|
| 279 |
f"Latents: {tuple(latents.shape)}"
|
| 280 |
)
|
| 281 |
progress(1.0, desc="Done")
|
| 282 |
+
return wan_video_path, ref_video_path, status
|
| 283 |
|
| 284 |
|
| 285 |
CUSTOM_CSS = """
|
| 286 |
+
@import url('https://fonts.googleapis.com/css2?family=Fraunces:opsz,wght@9..144,450;9..144,550&display=swap');
|
| 287 |
+
|
| 288 |
:root {
|
| 289 |
--page-bg: #f4f1e8;
|
| 290 |
--card-bg: rgba(255, 252, 246, 0.92);
|
|
|
|
| 293 |
--accent-2: #c96f42;
|
| 294 |
--text-main: #201a14;
|
| 295 |
--text-soft: #5c5348;
|
| 296 |
+
--copy-font: "Fraunces", "Iowan Old Style", "Palatino Linotype", serif;
|
| 297 |
}
|
| 298 |
|
| 299 |
.gradio-container {
|
|
|
|
| 348 |
color: var(--text-soft);
|
| 349 |
font-size: 17px;
|
| 350 |
line-height: 1.6;
|
| 351 |
+
font-family: var(--copy-font);
|
| 352 |
}
|
| 353 |
|
| 354 |
.panel-card,
|
|
|
|
| 368 |
color: var(--text-soft);
|
| 369 |
font-size: 14px;
|
| 370 |
line-height: 1.55;
|
| 371 |
+
font-family: var(--copy-font);
|
| 372 |
}
|
| 373 |
|
| 374 |
.compare-note {
|
|
|
|
| 400 |
gr.HTML(
|
| 401 |
"""
|
| 402 |
<div class="hero-card">
|
|
|
|
| 403 |
<div class="hero-title">RefDecoder I2V Demo</div>
|
| 404 |
<p class="hero-copy">
|
| 405 |
Upload one image, optionally add a motion prompt, and compare two decoders on the same Wan latent video.
|
|
|
|
| 429 |
lines=5,
|
| 430 |
placeholder="A woman turns toward the camera as her hair moves in the wind...",
|
| 431 |
)
|
| 432 |
+
seed_input = gr.Number(
|
| 433 |
+
label="Seed",
|
| 434 |
+
value=None,
|
| 435 |
+
precision=0,
|
| 436 |
+
info="Optional",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 437 |
)
|
| 438 |
+
run_button = gr.Button(
|
| 439 |
+
"Generate Latents and Compare Decoders",
|
| 440 |
+
variant="primary",
|
| 441 |
+
elem_id="generate-btn",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 442 |
)
|
|
|
|
| 443 |
|
| 444 |
+
with gr.Column(scale=6):
|
| 445 |
+
with gr.Column(elem_classes="panel-card"):
|
| 446 |
+
gr.HTML(
|
| 447 |
+
"""
|
| 448 |
+
<div class="section-title">Run Info</div>
|
| 449 |
+
<div class="section-copy">
|
| 450 |
+
Generation details for the current comparison run.
|
| 451 |
+
</div>
|
| 452 |
+
"""
|
| 453 |
+
)
|
| 454 |
+
status_output = gr.Textbox(
|
| 455 |
+
label="Run Info",
|
| 456 |
+
lines=7,
|
| 457 |
+
interactive=False,
|
| 458 |
+
)
|
| 459 |
|
| 460 |
+
with gr.Column(elem_classes="output-card"):
|
| 461 |
+
gr.HTML(
|
| 462 |
+
"""
|
| 463 |
+
<div class="section-title">Wan Baseline</div>
|
| 464 |
+
<div class="section-copy">Decoded with Wan2.1's original VAE.</div>
|
| 465 |
+
"""
|
| 466 |
+
)
|
| 467 |
+
wan_video_output = gr.Video(label="Wan VAE Decode", height=260)
|
| 468 |
+
|
| 469 |
+
with gr.Column(elem_classes="output-card"):
|
| 470 |
+
gr.HTML(
|
| 471 |
+
"""
|
| 472 |
+
<div class="section-title">RefDecoder Result</div>
|
| 473 |
+
<div class="section-copy">Decoded with the custom RefDecoder checkpoint.</div>
|
| 474 |
+
"""
|
| 475 |
+
)
|
| 476 |
+
ref_video_output = gr.Video(label="RefDecoder Decode", height=260)
|
| 477 |
|
| 478 |
run_button.click(
|
| 479 |
fn=generate_and_decode,
|
| 480 |
inputs=[image_input, prompt_input, seed_input],
|
| 481 |
+
outputs=[wan_video_output, ref_video_output, status_output],
|
| 482 |
)
|
| 483 |
|
| 484 |
|