Spaces:

allenai
/

RefDecoder

Configuration error

App Files Files Community

Arrokothwhi commited on 5 days ago

Commit

05c70dd

1 Parent(s): 625a3af

update

Browse files

Files changed (1) hide show

app.py +49 -65

app.py CHANGED Viewed

@@ -73,10 +73,7 @@ def load_generation_pipe():
         image_encoder=image_encoder,
         torch_dtype=PIPE_DTYPE,
     )
-    if DEVICE == "cuda":
-        pipe.enable_model_cpu_offload()
-    else:
-        pipe = pipe.to(DEVICE)
     return pipe
@@ -282,10 +279,12 @@ def generate_and_decode(image, prompt, seed, progress=gr.Progress(track_tqdm=Fal
         f"Latents: {tuple(latents.shape)}"
     )
     progress(1.0, desc="Done")
-    return str(latent_path), wan_video_path, ref_video_path, status
 CUSTOM_CSS = """
 :root {
     --page-bg: #f4f1e8;
     --card-bg: rgba(255, 252, 246, 0.92);
@@ -294,6 +293,7 @@ CUSTOM_CSS = """
     --accent-2: #c96f42;
     --text-main: #201a14;
     --text-soft: #5c5348;
 }
 .gradio-container {
@@ -348,6 +348,7 @@ CUSTOM_CSS = """
     color: var(--text-soft);
     font-size: 17px;
     line-height: 1.6;
 }
 .panel-card,
@@ -367,6 +368,7 @@ CUSTOM_CSS = """
     color: var(--text-soft);
     font-size: 14px;
     line-height: 1.55;
 }
 .compare-note {
@@ -398,7 +400,6 @@ with gr.Blocks(title="RefDecoder I2V Demo", theme=gr.themes.Soft(), css=CUSTOM_C
         gr.HTML(
             """
             <div class="hero-card">
-                <div class="hero-kicker">Image To Video Comparison</div>
                 <div class="hero-title">RefDecoder I2V Demo</div>
                 <p class="hero-copy">
                     Upload one image, optionally add a motion prompt, and compare two decoders on the same Wan latent video.
@@ -428,73 +429,56 @@ with gr.Blocks(title="RefDecoder I2V Demo", theme=gr.themes.Soft(), css=CUSTOM_C
                     lines=5,
                     placeholder="A woman turns toward the camera as her hair moves in the wind...",
                 )
-                with gr.Row():
-                    seed_input = gr.Number(
-                        label="Seed",
-                        value=None,
-                        precision=0,
-                        info="Optional",
-                    )
-                    run_button = gr.Button(
-                        "Generate Latents and Compare Decoders",
-                        variant="primary",
-                        elem_id="generate-btn",
-                    )
-                gr.Examples(
-                    examples=[
-                        ["A calm portrait shot with subtle blinking and gentle camera drift.", None],
-                        ["A dramatic push-in as cloth and hair sway in the breeze.", 7],
-                        ["", None],
-                    ],
-                    inputs=[prompt_input, seed_input],
-                    label="Quick Prompt Starters",
                 )
-            with gr.Column(scale=4, elem_classes="panel-card"):
-                gr.HTML(
-                    """
-                    <div class="section-title">What This Runs</div>
-                    <div class="compare-note">
-                        Step 1: Generate Wan I2V latents from the uploaded image.<br>
-                        Step 2: Decode the same latents with Wan VAE.<br>
-                        Step 3: Decode the same latents with RefDecoder.
-                    </div>
-                    <div class="section-copy">
-                        This is a comparison demo, not a general editing UI. The two videos differ only in the decoder used on the same latent representation.
-                    </div>
-                    """
-                )
-                status_output = gr.Textbox(
-                    label="Run Info",
-                    lines=7,
-                    interactive=False,
                 )
-                latent_output = gr.File(label="Wan Latents (.pt)")
-        with gr.Row(equal_height=True):
-            with gr.Column(elem_classes="output-card"):
-                gr.HTML(
-                    """
-                    <div class="section-title">Wan Baseline</div>
-                    <div class="section-copy">Decoded with Wan2.1's original VAE.</div>
-                    """
-                )
-                wan_video_output = gr.Video(label="Wan VAE Decode", height=420)
-            with gr.Column(elem_classes="output-card"):
-                gr.HTML(
-                    """
-                    <div class="section-title">RefDecoder Result</div>
-                    <div class="section-copy">Decoded with the custom RefDecoder checkpoint.</div>
-                    """
-                )
-                ref_video_output = gr.Video(label="RefDecoder Decode", height=420)
         run_button.click(
             fn=generate_and_decode,
             inputs=[image_input, prompt_input, seed_input],
-            outputs=[latent_output, wan_video_output, ref_video_output, status_output],
         )

         image_encoder=image_encoder,
         torch_dtype=PIPE_DTYPE,
     )
+    pipe = pipe.to(DEVICE)
     return pipe
         f"Latents: {tuple(latents.shape)}"
     )
     progress(1.0, desc="Done")
+    return wan_video_path, ref_video_path, status
 CUSTOM_CSS = """
+@import url('https://fonts.googleapis.com/css2?family=Fraunces:opsz,wght@9..144,450;9..144,550&display=swap');
 :root {
     --page-bg: #f4f1e8;
     --card-bg: rgba(255, 252, 246, 0.92);
     --accent-2: #c96f42;
     --text-main: #201a14;
     --text-soft: #5c5348;
+    --copy-font: "Fraunces", "Iowan Old Style", "Palatino Linotype", serif;
 }
 .gradio-container {
     color: var(--text-soft);
     font-size: 17px;
     line-height: 1.6;
+    font-family: var(--copy-font);
 }
 .panel-card,
     color: var(--text-soft);
     font-size: 14px;
     line-height: 1.55;
+    font-family: var(--copy-font);
 }
 .compare-note {
         gr.HTML(
             """
             <div class="hero-card">
                 <div class="hero-title">RefDecoder I2V Demo</div>
                 <p class="hero-copy">
                     Upload one image, optionally add a motion prompt, and compare two decoders on the same Wan latent video.
                     lines=5,
                     placeholder="A woman turns toward the camera as her hair moves in the wind...",
                 )
+                seed_input = gr.Number(
+                    label="Seed",
+                    value=None,
+                    precision=0,
+                    info="Optional",
                 )
+                run_button = gr.Button(
+                    "Generate Latents and Compare Decoders",
+                    variant="primary",
+                    elem_id="generate-btn",
                 )
+            with gr.Column(scale=6):
+                with gr.Column(elem_classes="panel-card"):
+                    gr.HTML(
+                        """
+                        <div class="section-title">Run Info</div>
+                        <div class="section-copy">
+                            Generation details for the current comparison run.
+                        </div>
+                        """
+                    )
+                    status_output = gr.Textbox(
+                        label="Run Info",
+                        lines=7,
+                        interactive=False,
+                    )
+                with gr.Column(elem_classes="output-card"):
+                    gr.HTML(
+                        """
+                        <div class="section-title">Wan Baseline</div>
+                        <div class="section-copy">Decoded with Wan2.1's original VAE.</div>
+                        """
+                    )
+                    wan_video_output = gr.Video(label="Wan VAE Decode", height=260)
+                with gr.Column(elem_classes="output-card"):
+                    gr.HTML(
+                        """
+                        <div class="section-title">RefDecoder Result</div>
+                        <div class="section-copy">Decoded with the custom RefDecoder checkpoint.</div>
+                        """
+                    )
+                    ref_video_output = gr.Video(label="RefDecoder Decode", height=260)
         run_button.click(
             fn=generate_and_decode,
             inputs=[image_input, prompt_input, seed_input],
+            outputs=[wan_video_output, ref_video_output, status_output],
         )