Spaces:

MoonMath-ai
/

StitchTool

Running

App Files Files Community

Shalmoni commited on Sep 17

Commit

5a6bbaa

verified ·

1 Parent(s): 3711439

Update app.py

Browse files

Files changed (1) hide show

app.py +228 -449

app.py CHANGED Viewed

@@ -1,475 +1,254 @@
-import time, base64, io, os, requests, traceback, binascii, gzip, zlib, bz2, lzma, re
-from typing import Optional
 from PIL import Image
 import gradio as gr
-import imageio.v2 as imageio
-import numpy as np
-# =========================
-# Stable Horde config
-# =========================
-HORDE_URL = "https://stablehorde.net/api/v2/generate/async"
-HORDE_STATUS = "https://stablehorde.net/api/v2/generate/status/{id}"
-# HF Space secret recommended for priority
-HORDE_API_KEY = os.getenv("HORDE_API_KEY", "")
-CLIENT_AGENT = "StitchMaster/0.3 (https://huggingface.co/spaces/your-space)"
-DEFAULT_STEPS = 24
-DEFAULT_W = 704   # keep defaults under the 715px threshold
-DEFAULT_H = 704
-POLL_INTERVAL = 2.5
-POLL_TIMEOUT = 240            # bump if queues are long
-MODEL = None                  # or set e.g. "SDXL 1.0"
-# ---------------- Helpers ----------------
-def _headers():
-    # Always send an apikey; fallback to anonymous for testing
-    return {
-        "Client-Agent": CLIENT_AGENT,
-        "apikey": HORDE_API_KEY if HORDE_API_KEY else "0000000000"
-    }
-def pil_to_b64(img_pil: Image.Image) -> str:
     buf = io.BytesIO()
-    img_pil.save(buf, format="PNG")
-    return base64.b64encode(buf.getvalue()).decode("utf-8")
-def _b64_to_bytes(s: str) -> bytes:
-    """
-    Robustly decode base64 / base64url (handles '-' '_' and missing padding).
-    """
-    s = (s or "").strip()
-    s = s.replace("-", "+").replace("_", "/")
-    pad = (-len(s)) % 4
-    if pad:
-        s += "=" * pad
-    try:
-        return base64.b64decode(s, validate=False)
-    except Exception:
-        return base64.urlsafe_b64decode(s + "=" * ((4 - len(s) % 4) % 4))
-def _maybe_decompress(buf: bytes, dbg: list[str]) -> bytes:
-    """
-    If bytes are compressed, try gzip, zlib, bz2, lzma (in that order).
-    Returns original buf if none succeed.
-    """
-    head = buf[:4]
-    try:
-        # gzip
-        if len(buf) > 2 and buf[0:2] == b"\x1f\x8b":
-            dbg.append("Detected gzip; decompressing…")
-            return gzip.decompress(buf)
-        # zlib (78 01/9C/DA)
-        if len(buf) > 2 and buf[0] == 0x78 and buf[1] in (0x01, 0x5E, 0x9C, 0xDA):
-            dbg.append("Detected zlib; decompressing…")
-            return zlib.decompress(buf)
-        # bz2
-        if len(buf) > 3 and buf[0:3] == b"BZh":
-            dbg.append("Detected bz2; decompressing…")
-            return bz2.decompress(buf)
-        # lzma/xz
-        if len(buf) > 6 and buf[0:6] == b"\xfd7zXZ\x00":
-            dbg.append("Detected lzma/xz; decompressing…")
-            return lzma.decompress(buf)
-    except Exception as e:
-        dbg.append(f"Decompress probe failed: {type(e).__name__}: {e}")
-    return buf
-def build_prompt(user_text: str, is_first: bool, lock_longshot: bool = True) -> str:
-    """Compose continuity-aware prompt text."""
-    user_text = (user_text or "").strip()
-    longshot_plus = (
-        "single continuous long shot; no cuts or new shot; no angle switch; "
-        "smooth camera motion (pan/tilt/zoom only); unbroken continuity"
-    )
-    if is_first:
-        base = f"Opening frame. {user_text}" if user_text else "Opening frame."
-        if lock_longshot:
-            base += ". " + longshot_plus
-        return base
-    # Subsequent frames
-    base = (
-        "Treat the previous frame as a still from the same continuous long shot. "
-        "Maintain style, subject identity, lighting, and camera continuity. "
-        f"Generate the next moment: {user_text if user_text else 'advance the action naturally.'}"
-    )
-    if lock_longshot:
-        base += ". " + longshot_plus
-    return base
-# =========================
-# Horde client (txt2img OR img2img)
-# =========================
-def horde_generate(
-    prompt: str,
-    steps: int = DEFAULT_STEPS,
-    width: int = DEFAULT_W,
-    height: int = DEFAULT_H,
-    model: Optional[str] = MODEL,
-    init_image: Optional[Image.Image] = None,
-    denoise: float = 0.45,                # 0.0 = identical, 1.0 = big change
-):
     """
-    If init_image is provided, tries img2img first (source_image + source_processing='img2img').
-    Falls back to txt2img if Horde rejects it.
     """
     dbg = []
-    if not (prompt and prompt.strip()):
-        raise gr.Error("Please enter a prompt.")
-    def _submit(payload):
-        sub = requests.post(HORDE_URL, json=payload, headers=_headers(), timeout=30)
-        # Auto-fallback if KudosUpfront required
-        if sub.status_code == 403:
-            try:
-                body = sub.json()
-            except Exception:
-                body = {"message": sub.text}
-            msg = (body.get("message") or "").lower()
-            rc = body.get("rc") or ""
-            if "kudos" in msg or rc == "KudosUpfront":
-                payload["params"]["steps"]  = min(int(payload["params"]["steps"]), 30)
-                payload["params"]["width"]  = min(int(payload["params"]["width"]), 704)
-                payload["params"]["height"] = min(int(payload["params"]["height"]), 704)
-                dbg.append("Fallback applied: steps<=30, width/height<=704. Retrying submit…")
-                sub = requests.post(HORDE_URL, json=payload, headers=_headers(), timeout=30)
-        return sub
-    # ---- try img2img if init_image provided ----
-    tried_img2img = False
-    if init_image is not None:
-        tried_img2img = True
-        payload = {
-            "prompt": prompt.strip(),
-            "params": {
-                "steps": int(steps),
-                "width": int(width),
-                "height": int(height),
-                "n": 1,
-                "denoise": float(denoise)
-            },
-            "nsfw": False,
-            "censor_nsfw": True,
-            "source_processing": "img2img",
-            "source_image": pil_to_b64(init_image),
-            "r2": True
-        }
-        if model:
-            payload["models"] = [model]
-        try:
-            submit = _submit(payload)
-            dbg.append(f"SUBMIT (img2img) status={submit.status_code}")
-            if submit.status_code >= 300:
-                dbg.append(f"SUBMIT body={submit.text[:500]}")
-                submit.raise_for_status()
-            submit_j = submit.json()
-            job_id = submit_j.get("id")
-            if not job_id:
-                dbg.append(f"SUBMIT json={submit_j}")
-                raise gr.Error("Horde submit succeeded but no job id returned.")
-            dbg.append(f"JOB id={job_id}")
-            return _poll_and_decode(job_id, dbg)
-        except Exception:
-            dbg.append("IMG2IMG path failed, falling back to text-only:\n" + traceback.format_exc())
-    # ---- txt2img path ----
-    payload = {
-        "prompt": prompt.strip(),
-        "params": {
-            "steps": int(steps),
-            "width": int(width),
-            "height": int(height),
-            "n": 1
-        },
-        "nsfw": False,
-        "censor_nsfw": True,
-        "r2": True
-    }
-    if model:
-        payload["models"] = [model]
-    try:
-        submit = _submit(payload)
-        dbg.append(f"SUBMIT (txt2img{', after img2img fail' if tried_img2img else ''}) status={submit.status_code}")
-        if submit.status_code >= 300:
-            dbg.append(f"SUBMIT body={submit.text[:500]}")
-            submit.raise_for_status()
-        submit_j = submit.json()
-        job_id = submit_j.get("id")
-        if not job_id:
-            dbg.append(f"SUBMIT json={submit_j}")
-            raise gr.Error("Horde submit succeeded but no job id returned.")
-        dbg.append(f"JOB id={job_id}")
-        return _poll_and_decode(job_id, dbg)
-    except Exception:
-        dbg.append("SUBMIT exception:\n" + traceback.format_exc())
-        return None, "\n".join(dbg)
-def _poll_and_decode(job_id: str, dbg: list[str]):
-    start = time.time()
-    while True:
         try:
-            status_r = requests.get(HORDE_STATUS.format(id=job_id), headers=_headers(), timeout=30)
-            if status_r.status_code >= 300:
-                dbg.append(f"POLL status={status_r.status_code}")
-                dbg.append(f"POLL body={status_r.text[:500]}")
-                status_r.raise_for_status()
-            s = status_r.json()
-            k = s.get("kudos", "?")
-            queue = s.get("queue_position", "?")
-            eta = s.get("wait_time", "?")
-            dbg.append(f"queue={queue} eta≈{eta}s kudos={k}")
-            if s.get("faulted"):
-                dbg.append(f"FAULT: {s}")
-                return None, "\n".join(dbg)
-            if s.get("done"):
-                gens = s.get("generations") or []
-                if not gens:
-                    dbg.append("DONE but no generations returned.")
-                    return None, "\n".join(dbg)
-                g0 = gens[0]
-                dbg.append(f"GEN keys: {list(g0.keys())}")
-                dbg.append(f"img_type: {g0.get('img_type')}")
-                # Prefer URL if present (also check gen_metadata for r2 url)
-                url = (
-                    g0.get("r2")
-                    or g0.get("url")
-                    or g0.get("src")
-                    or g0.get("image_url")
-                    or (isinstance(g0.get("gen_metadata"), dict) and (g0["gen_metadata"].get("r2") or g0["gen_metadata"].get("url")))
-                )
-                if isinstance(url, str) and (url.startswith("http://") or url.startswith("https://")):
-                    dbg.append("Found URL in generation → fetching…")
-                    try:
-                        r = requests.get(url, timeout=60)
-                        r.raise_for_status()
-                        img_bytes = r.content
-                        return _decode_bytes_to_image(img_bytes, dbg)
-                    except Exception as e:
-                        dbg.append(f"URL fetch failed: {type(e).__name__}: {e}")
-                        return None, "\n".join(dbg)
-                # Base64 (or hex/encoded) path
-                b64 = g0.get("img")
-                if not b64:
-                    dbg.append("No 'img' field present.")
-                    return None, "\n".join(dbg)
-                # If 'img' looks like a URL string, fetch it
-                if b64.startswith("http://") or b64.startswith("https://"):
-                    dbg.append("img field is a URL string → fetching…")
-                    try:
-                        r = requests.get(b64, timeout=60)
-                        r.raise_for_status()
-                        img_bytes = r.content
-                        return _decode_bytes_to_image(img_bytes, dbg)
-                    except Exception as e:
-                        dbg.append(f"URL fetch failed: {type(e).__name__}: {e}")
-                        return None, "\n".join(dbg)
-                # Try base64-url safe first
-                try:
-                    img_bytes = _b64_to_bytes(b64)
-                except Exception as e:
-                    dbg.append(f"Base64/urlsafe decode failed: {type(e).__name__}: {e}")
-                    img_bytes = None
-                # If that failed or header looks wrong, try hex
-                if not img_bytes or len(img_bytes) < 8:
-                    if re.fullmatch(r"[0-9a-fA-F]+", b64) and len(b64) % 2 == 0:
-                        dbg.append("img looks like hex → decoding…")
-                        try:
-                            img_bytes = bytes.fromhex(b64)
-                        except Exception as e:
-                            dbg.append(f"Hex decode failed: {type(e).__name__}: {e}")
-                            img_bytes = None
-                if not img_bytes:
-                    return None, "\n".join(dbg)
-                # Some workers compress payloads—try to decompress if needed
-                img_bytes = _maybe_decompress(img_bytes, dbg)
-                return _decode_bytes_to_image(img_bytes, dbg)
-            if time.time() - start > POLL_TIMEOUT:
-                dbg.append("TIMEOUT waiting for Horde.")
-                return None, "\n".join(dbg)
-            time.sleep(POLL_INTERVAL)
         except Exception:
-            dbg.append("POLL exception:\n" + traceback.format_exc())
-            return None, "\n".join(dbg)
-def _decode_bytes_to_image(img_bytes: bytes, dbg: list[str]):
-    head = img_bytes[:12]
-    dbg.append(f"header bytes: {head.hex(' ')}")
-    try:
-        img = Image.open(io.BytesIO(img_bytes)).convert("RGB")
-        return img, "\n".join(dbg)
-    except Exception as e:
-        dbg.append(f"PIL decode failed: {type(e).__name__}: {e}")
-    try:
-        arr = imageio.imread(io.BytesIO(img_bytes))
-        if isinstance(arr, np.ndarray):
-            if arr.ndim == 2:  # grayscale → RGB
-                arr = np.stack([arr, arr, arr], axis=-1)
-            elif arr.shape[-1] == 4:  # RGBA → RGB
-                arr = arr[..., :3]
-            img = Image.fromarray(arr.astype(np.uint8), mode="RGB")
-            dbg.append("Decoded via imageio fallback.")
-            return img, "\n".join(dbg)
     except Exception as e:
-        dbg.append(f"imageio decode failed: {type(e).__name__}: {e}")
-    try:
-        tmp = f"unknown_img_{int(time.time())}.bin"
-        with open(tmp, "wb") as f:
-            f.write(img_bytes)
-        dbg.append(f"Wrote undecodable bytes to {tmp}")
-    except Exception as e:
-        dbg.append(f"Could not write debug bytes: {type(e).__name__}: {e}")
-    return None, "\n".join(dbg)
-# =========================
-# Gradio glue
-# =========================
-def generate_opening(prompt_text, steps, size, lock):
-    w, h = _parse_size(size)
-    prompt = build_prompt(prompt_text, is_first=True, lock_longshot=lock)
-    img, debug = horde_generate(prompt, steps=steps, width=w, height=h, init_image=None)
-    if img is None:
-        gr.Warning("Generation failed. See debug log for details.")
-    return img, debug
-def generate_next(prompt_text, steps, size, lock, prev_img, change):
-    w, h = _parse_size(size)
-    prompt = build_prompt(prompt_text, is_first=False, lock_longshot=lock)
-    init_img = prev_img if isinstance(prev_img, Image.Image) else None
-    img, debug = horde_generate(prompt, steps=steps, width=w, height=h,
-                                init_image=init_img, denoise=float(change))
-    if img is None:
-        gr.Warning("Generation failed. See debug log for details.")
-    return img, debug
-def _parse_size(s):
-    try:
-        w, h = [int(x.strip()) for x in str(s).lower().split("x")]
-    except Exception:
-        w, h = DEFAULT_W, DEFAULT_H
-    return w, h
-# =========================
-# UI
-# =========================
-CUSTOM_CSS = """
 .gradio-container { padding: 24px; }
-/* Rounded prompt boxes */
-.prompt-box textarea {
-  border-radius: 18px !important;
-  min-height: 90px;
-  font-size: 16px;
-  line-height: 1.4;
-  padding: 14px 16px;
-}
-/* Pill buttons */
-.pill button {
-  border-radius: 999px !important;
-  padding: 10px 18px;
-  font-size: 15px;
-}
-/* Rounded image boxes */
-.image-out .wrap, .image-out .svelte-1ipelgc {
-  border-radius: 22px !important;
-}
 """
-with gr.Blocks(css=CUSTOM_CSS, title="Image Checkpoints – Stable Horde (txt2img + img2img)") as demo:
-    gr.Markdown("### Image Checkpoints (Stable Horde) — Opening shot + next scenes\n"
-                "Image 2–4 use the previous output as the init image (img2img) with a continuity slider.")
     with gr.Row():
-        steps = gr.Slider(8, 50, value=DEFAULT_STEPS, step=1, label="Steps (quality/time)")
-        size = gr.Dropdown(
-            choices=["512x512", "704x704", "704x512", "640x640"],
-            value=f"{DEFAULT_W}x{DEFAULT_H}",
-            label="Resolution"
-        )
-        lock = gr.Checkbox(value=True, label="Lock camera (long shot, no cuts)")
-    # Continuity / denoise slider for img2img steps (2–4)
-    change = gr.Slider(0.05, 0.95, value=0.45, step=0.05, label="Change from previous frame (denoise)")
-    # Shared debug panel
-    debug_box = gr.Code(label="Debug log", interactive=False)
-    # ---- Row 1: Opening shot ----
-    with gr.Row():
-        with gr.Column(scale=1, min_width=320):
-            p1 = gr.Textbox(
-                placeholder="Describe the opening shot",
-                lines=4,
-                label=None,
-                elem_classes=["prompt-box"]
-            )
-            b1 = gr.Button("Generate image 1", elem_classes=["pill"])
-        with gr.Column(scale=2, min_width=380):
-            img1 = gr.Image(label="Image 1 output", type="pil", elem_classes=["image-out"])
-    # ---- Row 2: Next scene ----
-    with gr.Row():
-        with gr.Column(scale=1, min_width=320):
-            p2 = gr.Textbox(
-                placeholder="Describe the next scene",
-                lines=4,
-                label=None,
-                elem_classes=["prompt-box"]
-            )
-            b2 = gr.Button("Generate image 2", elem_classes=["pill"])
-        with gr.Column(scale=2, min_width=380):
-            img2 = gr.Image(label="Image 2 output", type="pil", elem_classes=["image-out"])
-    # ---- Row 3: Next scene ----
-    with gr.Row():
-        with gr.Column(scale=1, min_width=320):
-            p3 = gr.Textbox(
-                placeholder="Describe the next scene",
-                lines=4,
-                label=None,
-                elem_classes=["prompt-box"]
-            )
-            b3 = gr.Button("Generate image 3", elem_classes=["pill"])
-        with gr.Column(scale=2, min_width=380):
-            img3 = gr.Image(label="Image 3 output", type="pil", elem_classes=["image-out"])
-    # ---- Row 4: Next scene ----
-    with gr.Row():
-        with gr.Column(scale=1, min_width=320):
-            p4 = gr.Textbox(
-                placeholder="Describe the next scene",
-                lines=4,
-                label=None,
-                elem_classes=["prompt-box"]
-            )
-            b4 = gr.Button("Generate image 4", elem_classes=["pill"])
-        with gr.Column(scale=2, min_width=380):
-            img4 = gr.Image(label="Image 4 output", type="pil", elem_classes=["image-out"])
-    # Wire callbacks
-    b1.click(fn=generate_opening, inputs=[p1, steps, size, lock], outputs=[img1, debug_box])
-    b2.click(fn=generate_next,     inputs=[p2, steps, size, lock, img1, change], outputs=[img2, debug_box])
-    b3.click(fn=generate_next,     inputs=[p3, steps, size, lock, img2, change], outputs=[img3, debug_box])
-    b4.click(fn=generate_next,     inputs=[p4, steps, size, lock, img3, change], outputs=[img4, debug_box])
 if __name__ == "__main__":
-    demo.queue().launch()

+import os, io, time, random, base64, zipfile
+from typing import List, Tuple, Optional
+import requests
 from PIL import Image
 import gradio as gr
+# ========= Config =========
+MAX_FRAMES = 8  # how many upload slots & rows to render
+MODAL_BASE = "https://moonmath-ai--moonmath-i2v-backend-moonmathinference-run.modal.run"
+# ========= Helpers =========
+def _save_video_bytes(data: bytes, tag: str) -> str:
+    os.makedirs("/mnt/data", exist_ok=True)
+    path = f"/mnt/data/{tag}_{int(time.time())}.mp4"
+    with open(path, "wb") as f:
+        f.write(data)
+    return path
+def _png_bytes_from_pil(img: Image.Image) -> bytes:
     buf = io.BytesIO()
+    img.save(buf, format="PNG")
+    return buf.getvalue()
+def _download_to_bytes(url: str) -> bytes:
+    r = requests.get(url, timeout=180)
+    r.raise_for_status()
+    return r.content
+def call_modal_i2v(start_img: Image.Image, prompt: str, seed: Optional[int]) -> Tuple[Optional[str], str]:
     """
+    POST to Modal with multipart 'image_bytes' and query args prompt & seed.
+    Returns (mp4_path_or_None, debug_log).
     """
     dbg = []
+    if seed in (None, 0, -1):
+        seed = random.randint(1, 2**31 - 1)
+    # Build URL (encode prompt)
+    from urllib.parse import quote
+    url = f"{MODAL_BASE}?prompt={quote(prompt)}&seed={seed}"
+    files = {"image_bytes": ("start.png", _png_bytes_from_pil(start_img), "image/png")}
+    headers = {"accept": "application/json"}
+    try:
+        resp = requests.post(url, files=files, headers=headers, timeout=600)
+        ctype = (resp.headers.get("content-type") or "").lower()
+        dbg.append(f"HTTP {resp.status_code}; content-type={ctype}")
+        # Case A: raw bytes (not JSON)
+        if "application/json" not in ctype:
+            resp.raise_for_status()
+            path = _save_video_bytes(resp.content, "pair")
+            dbg.append(f"Saved raw video to {path}")
+            return path, "\n".join(dbg)
+        # Case B: JSON containing url or base64
+        data = resp.json()
+        video_url = data.get("video_url") or data.get("url") or data.get("result") or data.get("output")
+        video_b64 = data.get("video_b64") or data.get("videoBase64")
+        if video_url and isinstance(video_url, str):
+            b = _download_to_bytes(video_url)
+            path = _save_video_bytes(b, "pair")
+            dbg.append(f"Downloaded video from {video_url} -> {path}")
+            return path, "\n".join(dbg)
+        if video_b64 and isinstance(video_b64, str):
+            pad = (-len(video_b64)) % 4
+            if pad: video_b64 += "=" * pad
+            b = base64.b64decode(video_b64)
+            path = _save_video_bytes(b, "pair")
+            dbg.append("Decoded base64 video.")
+            return path, "\n".join(dbg)
+        # Nothing usable returned
         try:
+            dbg.append(f"Backend JSON: {str(data)[:500]}")
         except Exception:
+            pass
+        return None, "\n".join(dbg)
     except Exception as e:
+        dbg.append(f"Exception: {type(e).__name__}: {e}")
+        return None, "\n".join(dbg)
+# ========= State handlers =========
+def add_images(files: List[str], images_state: List[Image.Image], names_state: List[str]):
+    """
+    Append uploads to state; return updated previews and row visibilities.
+    """
+    imgs, names = list(images_state), list(names_state)
+    for f in files or []:
+        try:
+            img = Image.open(f).convert("RGB")
+            imgs.append(img)
+            names.append(os.path.basename(f))
+        except Exception:
+            continue
+    # Outputs to update: image slots, labels, visibilities; pair rows visible up to len-1
+    img_values, img_labels, img_vis = [], [], []
+    pair_vis = []
+    for i in range(MAX_FRAMES):
+        if i < len(imgs):
+            img_values.append(imgs[i])
+            img_labels.append(f"Image {i+1}")
+            img_vis.append(True)
+        else:
+            img_values.append(None)
+            img_labels.append(f"Image {i+1}")
+            img_vis.append(False)
+    for i in range(MAX_FRAMES - 1):
+        pair_vis.append(i < len(imgs) - 1)
+    return imgs, names, img_values, img_labels, img_vis, pair_vis
+def clear_all():
+    img_values = [None]*MAX_FRAMES
+    img_labels = [f"Image {i+1}" for i in range(MAX_FRAMES)]
+    img_vis    = [False]*MAX_FRAMES
+    pair_vis   = [False]*(MAX_FRAMES-1)
+    return [], [], img_values, img_labels, img_vis, pair_vis
+def stitch_pair(index: int,
+                images: List[Image.Image],
+                prompt: str,
+                seed: int):
+    """
+    index is 0-based pair (0 => 1&2, 1 => 2&3...)
+    We call Modal using the *first* image of the pair as the init image.
+    """
+    if not images or len(images) < index+2:
+        gr.Warning("Upload more images first.")
+        return None, "Not enough images."
+    # Compose a minimal helpful prompt for continuity
+    user = (prompt or "").strip()
+    extra = f"(Transition between frame {index+1} → {index+2} of the same shot.)"
+    final_prompt = f"{user} {extra}".strip()
+    path, dbg = call_modal_i2v(images[index], final_prompt, seed)
+    if path is None:
+        gr.Warning("Stitch failed. See debug log.")
+    return path, dbg
+# ========= UI =========
+CSS = """
 .gradio-container { padding: 24px; }
+.pill button { border-radius: 999px !important; padding: 10px 18px; }
+.rounded textarea { border-radius: 16px !important; }
 """
+with gr.Blocks(css=CSS, title="Stitch — Upload & Stitch Adjacent Pairs") as demo:
+    gr.Markdown("## Stitch — Upload stills, then generate between-frames videos\n"
+                "Upload images in order. For each adjacent pair (1&2, 2&3, …), write a short transition prompt and click **Stitch**.")
+    images_state = gr.State([])   # List[PIL.Image]
+    names_state  = gr.State([])   # List[str]
     with gr.Row():
+        # Left column: image slots
+        with gr.Column(scale=1, min_width=340):
+            uploader = gr.Files(label="Add images (in order)", file_types=["image"], file_count="multiple")
+            clear_btn = gr.Button("Clear all", elem_classes=["pill"])
+            image_slots = []
+            for i in range(MAX_FRAMES):
+                image_slots.append(
+                    gr.Image(label=f"Image {i+1}", interactive=False, visible=False)
+                )
+        # Middle column: per-pair prompt + button
+        with gr.Column(scale=1, min_width=340):
+            seed_in = gr.Number(value=0, precision=0, label="Seed (0 = random)")
+            prompt_boxes = []
+            stitch_buttons = []
+            for i in range(MAX_FRAMES - 1):
+                prompt_boxes.append(
+                    gr.Textbox(
+                        placeholder=f"Prompt for transition between Image {i+1} & {i+2}",
+                        lines=2, label="Prompt", elem_classes=["rounded"], visible=False
+                    )
+                )
+                stitch_buttons.append(
+                    gr.Button(f"Stitch {i+1}&{i+2}", elem_classes=["pill"], visible=False)
+                )
+        # Right column: per-pair video outputs + shared debug
+        with gr.Column(scale=1, min_width=360):
+            video_outputs = []
+            for i in range(MAX_FRAMES - 1):
+                video_outputs.append(
+                    gr.Video(label=f"Video (image {i+1}+{i+2}) output", visible=False)
+                )
+            debug_box = gr.Code(label="Debug log", interactive=False)
+    # ---- Wiring: upload & clear ----
+    uploader.upload(
+        fn=add_images,
+        inputs=[uploader, images_state, names_state],
+        outputs=[
+            images_state, names_state,
+            # image values, labels, visibilities
+            *image_slots,                           # values (Image components accept PIL Image)
+            *[s for s in image_slots],              # labels: set via .label below (we'll hack via .update)
+            *[s for s in image_slots],              # visibility
+            *[b for b in stitch_buttons]            # visibility for rows (we’ll mirror to prompt/video too)
+        ],
+        queue=False
+    )
+    # NOTE: Gradio can't directly set multiple attributes with one function return to each component slot,
+    # so we will do a lightweight post-upload JS update using .update. Simpler: tie visibility of prompt/video
+    # to the corresponding button's visibility in another handler:
+    def reflect_row_visibility(images: List[Image.Image]):
+        n = len(images)
+        vis = [i < n-1 for i in range(MAX_FRAMES-1)]
+        # return prompt visibilities, button visibilities, video visibilities
+        return [gr.Textbox(visible=vis[i]) for i in range(MAX_FRAMES-1)] + \
+               [gr.Button(visible=vis[i]) for i in range(MAX_FRAMES-1)] + \
+               [gr.Video(visible=vis[i]) for i in range(MAX_FRAMES-1)]
+    uploader.upload(
+        fn=reflect_row_visibility,
+        inputs=[images_state],
+        outputs=[*prompt_boxes, *stitch_buttons, *video_outputs],
+        queue=False
+    )
+    clear_btn.click(
+        fn=clear_all,
+        inputs=[],
+        outputs=[images_state, names_state, *image_slots, *image_slots, *image_slots, *stitch_buttons],
+        queue=False
+    ).then(
+        fn=lambda imgs: reflect_row_visibility(imgs),
+        inputs=[images_state],
+        outputs=[*prompt_boxes, *stitch_buttons, *video_outputs],
+        queue=False
+    )
+    # ---- Wiring: per-pair stitchers ----
+    for i in range(MAX_FRAMES - 1):
+        stitch_buttons[i].click(
+            fn=lambda prompt, seed, imgs, idx=i: stitch_pair(idx, imgs, prompt, int(seed or 0)),
+            inputs=[prompt_boxes[i], seed_in, images_state],
+            outputs=[video_outputs[i], debug_box]
+        )
 if __name__ == "__main__":
+    demo.queue().launch()