Spaces:

allenai
/

RefDecoder

Configuration error

App Files Files Community

xiangfan00 commited on 3 days ago

Commit

efdefa8

1 Parent(s): eb10dec

Optimize and apply fixes

Browse files

Files changed (1) hide show

app.py +303 -51

app.py CHANGED Viewed

@@ -2,6 +2,7 @@ import gc
 import html
 import random
 import sys
 import uuid
 from pathlib import Path
 from urllib.parse import quote
@@ -86,7 +87,11 @@ def log_cuda_mem(tag):
         print(f"[mem] {tag}: CUDA not available")
         return
-    free_bytes, total_bytes = torch.cuda.mem_get_info()
     allocated_bytes = torch.cuda.memory_allocated()
     reserved_bytes = torch.cuda.memory_reserved()
     print(
@@ -123,7 +128,6 @@ def load_generation_pipe():
         image_encoder=image_encoder,
         torch_dtype=PIPE_DTYPE,
     )
-    pipe = pipe.to(DEVICE)
     log_cuda_mem("after load_generation_pipe")
     return pipe
@@ -135,7 +139,6 @@ def load_wan_vae():
         subfolder="vae",
         torch_dtype=PIPE_DTYPE,
     )
-    vae = vae.to(DEVICE)
     vae.eval()
     log_cuda_mem("after load_wan_vae")
     return vae
@@ -170,12 +173,17 @@ def load_refdecoder_module():
     vae.load_state_dict(vae_sd, strict=False)
     transformer.load_state_dict(transformer_sd, strict=False)
-    vae = vae.to(DEVICE).eval()
-    transformer = transformer.to(DEVICE).eval()
     log_cuda_mem("after load_refdecoder_module")
     return vae, transformer
 def resize_image_for_wan(image, pipe):
     image = image.convert("RGB")
     aspect_ratio = image.height / image.width
@@ -240,8 +248,8 @@ def build_compare_html(wan_video_path, ref_video_path):
           gap: 12px;
         }}
         .compare-topbar {{
-          display: grid;
-          grid-template-columns: 1fr auto 1fr;
           align-items: center;
           gap: 12px;
         }}
@@ -339,13 +347,33 @@ def build_compare_html(wan_video_path, ref_video_path):
           line-height: 1.5;
           text-align: center;
         }}
       </style>
     </head>
     <body>
       <div class="compare-shell" id="{compare_id}">
         <div class="compare-topbar">
           <div class="compare-chip">Wan Baseline</div>
-          <button class="compare-button" type="button">Pause</button>
           <div class="compare-chip compare-chip-right">RefDecoder</div>
         </div>
         <div class="compare-stage">
@@ -354,6 +382,12 @@ def build_compare_html(wan_video_path, ref_video_path):
           <div class="compare-divider"></div>
           <input class="compare-range" type="range" min="0" max="100" value="50" />
         </div>
         <div class="compare-caption">Drag the divider to compare the two decoders on the same latent video.</div>
       </div>
       <script>
@@ -363,8 +397,13 @@ def build_compare_html(wan_video_path, ref_video_path):
         const overlay = root.querySelector(".compare-overlay");
         const divider = root.querySelector(".compare-divider");
         const slider = root.querySelector(".compare-range");
-        const button = root.querySelector(".compare-button");
         const videos = Array.from(root.querySelectorAll("video"));
         const applySplit = () => {{
           const value = Number(slider.value);
@@ -396,6 +435,28 @@ def build_compare_html(wan_video_path, ref_video_path):
           primary.addEventListener("ratechange", () => {{ secondary.playbackRate = primary.playbackRate; }});
         }};
         if (base.tagName === "VIDEO" && overlay.tagName === "VIDEO") {{
           bindSync(base, overlay);
           bindSync(overlay, base);
@@ -403,6 +464,7 @@ def build_compare_html(wan_video_path, ref_video_path):
           button.disabled = true;
           button.textContent = "Play";
           button.style.opacity = "0.55";
         }}
         videos.forEach((video) => {{
@@ -417,8 +479,25 @@ def build_compare_html(wan_video_path, ref_video_path):
           }}
         }});
         slider.addEventListener("input", applySplit);
         applySplit();
       }})();
       </script>
     </body>
@@ -480,30 +559,30 @@ def decode_with_refdecoder(latents, reference_frame, vae, transformer):
     return video
-def button_state(label, interactive):
-    return gr.update(value=label, interactive=interactive)
-@spaces.GPU(duration=80)
 def generate_latents_on_gpu(image, prompt, seed):
     log_cuda_mem("start generate_latents_on_gpu")
-    pipe = load_generation_pipe()
-    resized_image, height, width = resize_image_for_wan(image, pipe)
     generator = torch.Generator(device=DEVICE).manual_seed(seed)
-    with torch.no_grad():
-        output = pipe(
-            image=resized_image,
-            prompt=prompt,
-            negative_prompt=NEGATIVE_PROMPT,
-            height=height,
-            width=width,
-            num_frames=NUM_FRAMES,
-            num_inference_steps=NUM_INFERENCE_STEPS,
-            guidance_scale=GUIDANCE_SCALE,
-            generator=generator,
-            output_type="latent",
-        )
-    latents = normalize_latent_shape(output.frames).detach().cpu()
     log_cuda_mem("after latent generation")
     return latents, resized_image, height, width
@@ -511,37 +590,52 @@ def generate_latents_on_gpu(image, prompt, seed):
 @spaces.GPU(duration=20)
 def decode_wan_on_gpu(latents):
     log_cuda_mem("start decode_wan_on_gpu")
-    wan_vae = load_wan_vae()
-    video = decode_with_wan_vae(latents, wan_vae)
     log_cuda_mem("after wan decode")
-    return video.detach().cpu()
 @spaces.GPU(duration=25)
 def decode_refdecoder_on_gpu(latents, reference_frame):
     log_cuda_mem("start decode_refdecoder_on_gpu")
-    ref_vae, ref_transformer = load_refdecoder_module()
-    video = decode_with_refdecoder(latents, reference_frame, ref_vae, ref_transformer)
     log_cuda_mem("after refdecoder decode")
-    return video.detach().cpu()
-def generate_and_decode(image, prompt, seed):
     if image is None:
         raise gr.Error("Please upload an input image.")
     if DEVICE != "cuda":
         raise gr.Error("This demo expects a CUDA GPU to run Wan I2V generation.")
-    yield gr.update(), gr.update(), gr.update(), button_state("Loading Wan I2V...", False)
     prompt = prompt.strip() if prompt else ""
     seed = int(seed) if seed is not None else random.randint(0, 2**32 - 1)
     run_dir = OUTPUT_ROOT / f"refdecoder_demo_{uuid.uuid4().hex}"
     run_dir.mkdir(parents=True, exist_ok=True)
-    yield gr.update(), gr.update(), gr.update(), button_state("Generating Latents...", False)
     latents, resized_image, height, width = generate_latents_on_gpu(image, prompt, seed)
     reference_frame = build_reference_frame(resized_image, "cpu")
     latent_path = run_dir / "wan_latents.pt"
@@ -556,26 +650,41 @@ def generate_and_decode(image, prompt, seed):
         latent_path,
     )
-    yield gr.update(), gr.update(), gr.update(), button_state("Decoding Wan Baseline...", False)
     wan_video = decode_wan_on_gpu(latents)
     wan_video_path = save_video_tensor(wan_video, run_dir / "wan_vae.mp4")
     del wan_video
     gc.collect()
-    yield gr.update(), wan_video_path, gr.update(), button_state("Decoding RefDecoder...", False)
     ref_video = decode_refdecoder_on_gpu(latents, reference_frame)
     ref_video_path = save_video_tensor(ref_video, run_dir / "refdecoder.mp4")
     del ref_video
     gc.collect()
     compare_html = build_compare_html(wan_video_path, ref_video_path)
-    yield (
         gr.update(value=compare_html, visible=True),
         wan_video_path,
         ref_video_path,
-        button_state("Generate Comparison", True),
     )
@@ -709,16 +818,14 @@ CUSTOM_CSS = """
 .compare-frame {
     width: 100%;
-    height: 860px;
     border: 0;
     background: transparent;
     overflow: hidden;
-}
-@media (max-width: 900px) {
-    .compare-frame {
-        height: 720px;
-    }
 }
 .compare-topbar {
@@ -818,6 +925,67 @@ CUSTOM_CSS = """
 .seed-action-row > .gradio-column {
     min-width: 0;
 }
 """
@@ -825,6 +993,55 @@ with gr.Blocks(title="RefDecoder I2V Demo", theme=gr.themes.Soft(), css=CUSTOM_C
     with gr.Column(elem_classes="app-shell"):
         gr.HTML(
             """
             <div class="hero-card">
                 <div class="hero-title">RefDecoder I2V Demo</div>
                 <p class="hero-copy">
@@ -871,6 +1088,7 @@ with gr.Blocks(title="RefDecoder I2V Demo", theme=gr.themes.Soft(), css=CUSTOM_C
                                 variant="primary",
                                 elem_id="generate-btn",
                             )
         with gr.Column(elem_classes="panel-card"):
             gr.HTML(
@@ -883,13 +1101,47 @@ with gr.Blocks(title="RefDecoder I2V Demo", theme=gr.themes.Soft(), css=CUSTOM_C
             )
             compare_output = gr.HTML(value=build_compare_html(None, None))
             wan_video_hidden = gr.Video(visible=False)
             ref_video_hidden = gr.Video(visible=False)
         run_button.click(
             fn=generate_and_decode,
             inputs=[image_input, prompt_input, seed_input],
-            outputs=[compare_output, wan_video_hidden, ref_video_hidden, run_button],
         )

 import html
 import random
 import sys
+import time
 import uuid
 from pathlib import Path
 from urllib.parse import quote
         print(f"[mem] {tag}: CUDA not available")
         return
+    try:
+        free_bytes, total_bytes = torch.cuda.mem_get_info()
+    except RuntimeError as exc:
+        print(f"[mem] {tag}: CUDA not currently leased ({exc})")
+        return
     allocated_bytes = torch.cuda.memory_allocated()
     reserved_bytes = torch.cuda.memory_reserved()
     print(
         image_encoder=image_encoder,
         torch_dtype=PIPE_DTYPE,
     )
     log_cuda_mem("after load_generation_pipe")
     return pipe
         subfolder="vae",
         torch_dtype=PIPE_DTYPE,
     )
     vae.eval()
     log_cuda_mem("after load_wan_vae")
     return vae
     vae.load_state_dict(vae_sd, strict=False)
     transformer.load_state_dict(transformer_sd, strict=False)
     log_cuda_mem("after load_refdecoder_module")
     return vae, transformer
+# Preload all models on CPU at init so each @spaces.GPU lease only pays for the
+# CPU -> GPU transfer, not the full from_pretrained / checkpoint read.
+GENERATION_PIPE = load_generation_pipe()
+WAN_VAE = load_wan_vae()
+REFDECODER_VAE, REFDECODER_TRANSFORMER = load_refdecoder_module()
 def resize_image_for_wan(image, pipe):
     image = image.convert("RGB")
     aspect_ratio = image.height / image.width
           gap: 12px;
         }}
         .compare-topbar {{
+          display: flex;
+          justify-content: space-between;
           align-items: center;
           gap: 12px;
         }}
           line-height: 1.5;
           text-align: center;
         }}
+        .compare-controls {{
+          display: flex;
+          justify-content: center;
+          align-items: center;
+          gap: 10px;
+          flex-wrap: wrap;
+        }}
+        .compare-controls .compare-button {{
+          padding: 9px 16px;
+          font-size: 14px;
+        }}
+        .compare-button-step {{
+          background: #2f5746;
+        }}
+        .compare-button-reset {{
+          background: #c96f42;
+        }}
+        .compare-button[disabled] {{
+          opacity: 0.55;
+          cursor: not-allowed;
+        }}
       </style>
     </head>
     <body>
       <div class="compare-shell" id="{compare_id}">
         <div class="compare-topbar">
           <div class="compare-chip">Wan Baseline</div>
           <div class="compare-chip compare-chip-right">RefDecoder</div>
         </div>
         <div class="compare-stage">
           <div class="compare-divider"></div>
           <input class="compare-range" type="range" min="0" max="100" value="50" />
         </div>
+        <div class="compare-controls">
+          <button class="compare-button compare-button-step" type="button" data-action="prev">− 1 Frame</button>
+          <button class="compare-button" type="button" data-action="toggle">Pause</button>
+          <button class="compare-button compare-button-step" type="button" data-action="next">+ 1 Frame</button>
+          <button class="compare-button compare-button-reset" type="button" data-action="reset">Reset Playback</button>
+        </div>
         <div class="compare-caption">Drag the divider to compare the two decoders on the same latent video.</div>
       </div>
       <script>
         const overlay = root.querySelector(".compare-overlay");
         const divider = root.querySelector(".compare-divider");
         const slider = root.querySelector(".compare-range");
+        const button = root.querySelector('[data-action="toggle"]');
+        const prevBtn = root.querySelector('[data-action="prev"]');
+        const nextBtn = root.querySelector('[data-action="next"]');
+        const resetBtn = root.querySelector('[data-action="reset"]');
+        const stepButtons = [prevBtn, nextBtn, resetBtn];
         const videos = Array.from(root.querySelectorAll("video"));
+        const FRAME_DELTA = 1 / {FPS};
         const applySplit = () => {{
           const value = Number(slider.value);
           primary.addEventListener("ratechange", () => {{ secondary.playbackRate = primary.playbackRate; }});
         }};
+        const stepFrame = (delta) => {{
+          if (!videos.length) return;
+          pauseBoth();
+          videos.forEach((video) => {{
+            const duration = isFinite(video.duration) ? video.duration : 0;
+            let nextTime = (video.currentTime || 0) + delta;
+            if (duration > 0) {{
+              nextTime = ((nextTime % duration) + duration) % duration;
+            }} else {{
+              nextTime = Math.max(0, nextTime);
+            }}
+            try {{ video.currentTime = nextTime; }} catch (e) {{}}
+          }});
+        }};
+        const resetPlayback = () => {{
+          pauseBoth();
+          videos.forEach((video) => {{
+            try {{ video.currentTime = 0; }} catch (e) {{}}
+          }});
+        }};
         if (base.tagName === "VIDEO" && overlay.tagName === "VIDEO") {{
           bindSync(base, overlay);
           bindSync(overlay, base);
           button.disabled = true;
           button.textContent = "Play";
           button.style.opacity = "0.55";
+          stepButtons.forEach((btn) => {{ if (btn) btn.disabled = true; }});
         }}
         videos.forEach((video) => {{
           }}
         }});
+        if (prevBtn) prevBtn.addEventListener("click", () => stepFrame(-FRAME_DELTA));
+        if (nextBtn) nextBtn.addEventListener("click", () => stepFrame(FRAME_DELTA));
+        if (resetBtn) resetBtn.addEventListener("click", resetPlayback);
         slider.addEventListener("input", applySplit);
         applySplit();
+        const reportHeight = () => {{
+          const h = Math.ceil(root.getBoundingClientRect().height + 2);
+          parent.postMessage({{ type: "compare-iframe-height", id: "{compare_id}", height: h }}, "*");
+        }};
+        reportHeight();
+        window.addEventListener("load", reportHeight);
+        if (typeof ResizeObserver !== "undefined") {{
+          new ResizeObserver(reportHeight).observe(root);
+        }}
+        videos.forEach((video) => {{
+          video.addEventListener("loadedmetadata", reportHeight);
+        }});
       }})();
       </script>
     </body>
     return video
+@spaces.GPU(duration=95)
 def generate_latents_on_gpu(image, prompt, seed):
     log_cuda_mem("start generate_latents_on_gpu")
+    GENERATION_PIPE.to(DEVICE)
+    log_cuda_mem("after pipe -> cuda")
+    resized_image, height, width = resize_image_for_wan(image, GENERATION_PIPE)
     generator = torch.Generator(device=DEVICE).manual_seed(seed)
+    try:
+        with torch.no_grad():
+            output = GENERATION_PIPE(
+                image=resized_image,
+                prompt=prompt,
+                negative_prompt=NEGATIVE_PROMPT,
+                height=height,
+                width=width,
+                num_frames=NUM_FRAMES,
+                num_inference_steps=NUM_INFERENCE_STEPS,
+                guidance_scale=GUIDANCE_SCALE,
+                generator=generator,
+                output_type="latent",
+            )
+        latents = normalize_latent_shape(output.frames).detach().cpu()
+    finally:
+        GENERATION_PIPE.to("cpu")
     log_cuda_mem("after latent generation")
     return latents, resized_image, height, width
 @spaces.GPU(duration=20)
 def decode_wan_on_gpu(latents):
     log_cuda_mem("start decode_wan_on_gpu")
+    WAN_VAE.to(DEVICE)
+    try:
+        video = decode_with_wan_vae(latents, WAN_VAE)
+        video = video.detach().cpu()
+    finally:
+        WAN_VAE.to("cpu")
     log_cuda_mem("after wan decode")
+    return video
 @spaces.GPU(duration=25)
 def decode_refdecoder_on_gpu(latents, reference_frame):
     log_cuda_mem("start decode_refdecoder_on_gpu")
+    REFDECODER_VAE.to(DEVICE)
+    REFDECODER_TRANSFORMER.to(DEVICE)
+    try:
+        video = decode_with_refdecoder(
+            latents, reference_frame, REFDECODER_VAE, REFDECODER_TRANSFORMER,
+        )
+        video = video.detach().cpu()
+    finally:
+        REFDECODER_VAE.to("cpu")
+        REFDECODER_TRANSFORMER.to("cpu")
     log_cuda_mem("after refdecoder decode")
+    return video
+def generate_and_decode(image, prompt, seed, progress=gr.Progress()):
     if image is None:
         raise gr.Error("Please upload an input image.")
     if DEVICE != "cuda":
         raise gr.Error("This demo expects a CUDA GPU to run Wan I2V generation.")
+    request_start = time.perf_counter()
     prompt = prompt.strip() if prompt else ""
     seed = int(seed) if seed is not None else random.randint(0, 2**32 - 1)
     run_dir = OUTPUT_ROOT / f"refdecoder_demo_{uuid.uuid4().hex}"
     run_dir.mkdir(parents=True, exist_ok=True)
+    progress(0.0, desc="Generating latents")
+    t0 = time.perf_counter()
     latents, resized_image, height, width = generate_latents_on_gpu(image, prompt, seed)
+    latent_secs = time.perf_counter() - t0
+    print(f"[timing] latent generation: {latent_secs:.2f}s")
     reference_frame = build_reference_frame(resized_image, "cpu")
     latent_path = run_dir / "wan_latents.pt"
         latent_path,
     )
+    progress(0.8, desc="Decoding Wan baseline")
+    t0 = time.perf_counter()
     wan_video = decode_wan_on_gpu(latents)
+    wan_secs = time.perf_counter() - t0
+    print(f"[timing] wan decode: {wan_secs:.2f}s")
     wan_video_path = save_video_tensor(wan_video, run_dir / "wan_vae.mp4")
     del wan_video
     gc.collect()
+    progress(0.9, desc="Decoding RefDecoder")
+    t0 = time.perf_counter()
     ref_video = decode_refdecoder_on_gpu(latents, reference_frame)
+    ref_secs = time.perf_counter() - t0
+    print(f"[timing] refdecoder decode: {ref_secs:.2f}s")
     ref_video_path = save_video_tensor(ref_video, run_dir / "refdecoder.mp4")
     del ref_video
     gc.collect()
     compare_html = build_compare_html(wan_video_path, ref_video_path)
+    total_secs = time.perf_counter() - request_start
+    print(
+        f"[timing] request total: {total_secs:.2f}s "
+        f"(latents={latent_secs:.2f}s, wan={wan_secs:.2f}s, ref={ref_secs:.2f}s)"
+    )
+    return (
         gr.update(value=compare_html, visible=True),
         wan_video_path,
         ref_video_path,
+        "",
+        gr.update(value=wan_video_path, interactive=True),
+        gr.update(value=ref_video_path, interactive=True),
     )
 .compare-frame {
     width: 100%;
+    /* aspect-ratio is a tight fallback for the brief moment before the parent
+       JS estimator (and then the iframe's own postMessage) sets the height. */
+    aspect-ratio: 16 / 11;
     border: 0;
     background: transparent;
     overflow: hidden;
+    display: block;
+    transition: height 120ms ease;
 }
 .compare-topbar {
 .seed-action-row > .gradio-column {
     min-width: 0;
 }
+.run-status {
+    margin-top: 8px;
+    color: var(--text-soft);
+    font-size: 13px;
+    line-height: 1.4;
+    min-height: 1.4em;
+}
+.run-status p {
+    margin: 0;
+}
+.download-row {
+    margin-top: 12px;
+    gap: 12px;
+    justify-content: center;
+    flex-wrap: wrap;
+}
+.download-row button {
+    border: 0 !important;
+    border-radius: 999px !important;
+    padding: 10px 22px !important;
+    font-size: 14px !important;
+    font-weight: 700 !important;
+    box-shadow: none !important;
+    min-height: 0 !important;
+}
+button.download-baseline {
+    background: var(--accent) !important;
+    color: #fff !important;
+}
+button.download-ref {
+    background: var(--accent-2) !important;
+    color: #fff !important;
+}
+.download-row button:hover:not([disabled]):not(:disabled) {
+    filter: brightness(1.05);
+}
+button.download-baseline[disabled],
+button.download-baseline:disabled {
+    background: rgba(31, 106, 82, 0.14) !important;
+    color: #123a2d !important;
+    box-shadow: inset 0 0 0 1px rgba(31, 106, 82, 0.12) !important;
+    opacity: 1 !important;
+    cursor: not-allowed;
+}
+button.download-ref[disabled],
+button.download-ref:disabled {
+    background: rgba(201, 111, 66, 0.16) !important;
+    color: #6e3d23 !important;
+    box-shadow: inset 0 0 0 1px rgba(201, 111, 66, 0.16) !important;
+    opacity: 1 !important;
+    cursor: not-allowed;
+}
 """
     with gr.Column(elem_classes="app-shell"):
         gr.HTML(
             """
+            <script>
+            (() => {
+                if (window.__refdecoderResizeBound) return;
+                window.__refdecoderResizeBound = true;
+                const STAGE_RATIO = 9 / 16;
+                const CHROME = 160;
+                const observed = new WeakSet();
+                const estimateHeight = (iframe) => {
+                    if (iframe.dataset.exactSized === "1") return;
+                    const w = iframe.getBoundingClientRect().width;
+                    if (w > 0) {
+                        iframe.style.height = Math.round(w * STAGE_RATIO + CHROME) + "px";
+                    }
+                };
+                const trackIframe = (iframe) => {
+                    if (observed.has(iframe)) return;
+                    observed.add(iframe);
+                    estimateHeight(iframe);
+                    new ResizeObserver(() => estimateHeight(iframe)).observe(iframe);
+                };
+                document.querySelectorAll("iframe.compare-frame").forEach(trackIframe);
+                new MutationObserver((mutations) => {
+                    for (const m of mutations) {
+                        for (const n of m.addedNodes) {
+                            if (n.nodeType !== 1) continue;
+                            if (n.matches && n.matches("iframe.compare-frame")) trackIframe(n);
+                            const inner = n.querySelectorAll && n.querySelectorAll("iframe.compare-frame");
+                            if (inner) inner.forEach(trackIframe);
+                        }
+                    }
+                }).observe(document.body, { childList: true, subtree: true });
+                window.addEventListener("message", (e) => {
+                    if (!e.data || e.data.type !== "compare-iframe-height") return;
+                    const h = Math.max(200, Number(e.data.height) || 0);
+                    document.querySelectorAll("iframe.compare-frame").forEach((f) => {
+                        if (f.contentWindow === e.source) {
+                            f.style.height = h + "px";
+                            f.dataset.exactSized = "1";
+                        }
+                    });
+                });
+            })();
+            </script>
             <div class="hero-card">
                 <div class="hero-title">RefDecoder I2V Demo</div>
                 <p class="hero-copy">
                                 variant="primary",
                                 elem_id="generate-btn",
                             )
+                    status_md = gr.Markdown(value="", elem_classes="run-status")
         with gr.Column(elem_classes="panel-card"):
             gr.HTML(
             )
             compare_output = gr.HTML(value=build_compare_html(None, None))
+            with gr.Row(elem_classes="download-row"):
+                wan_download_btn = gr.DownloadButton(
+                    label="Download Baseline",
+                    value=None,
+                    interactive=False,
+                    elem_classes="download-baseline",
+                )
+                ref_download_btn = gr.DownloadButton(
+                    label="Download RefDecoder",
+                    value=None,
+                    interactive=False,
+                    elem_classes="download-ref",
+                )
             wan_video_hidden = gr.Video(visible=False)
             ref_video_hidden = gr.Video(visible=False)
+        def reset_for_new_run():
+            return (
+                "",
+                gr.update(value=None, interactive=False),
+                gr.update(value=None, interactive=False),
+            )
         run_button.click(
+            fn=reset_for_new_run,
+            inputs=None,
+            outputs=[status_md, wan_download_btn, ref_download_btn],
+            queue=False,
+            show_progress="hidden",
+        ).then(
             fn=generate_and_decode,
             inputs=[image_input, prompt_input, seed_input],
+            outputs=[
+                compare_output,
+                wan_video_hidden,
+                ref_video_hidden,
+                status_md,
+                wan_download_btn,
+                ref_download_btn,
+            ],
         )