Spaces:

MoonMath-ai
/

StitchTool

Sleeping

App Files Files Community

Shalmoni commited on Sep 17

Commit

6259109

verified ·

1 Parent(s): 5a6bbaa

Update app.py

Browse files

Files changed (1) hide show

app.py +96 -153

app.py CHANGED Viewed

@@ -1,15 +1,16 @@
-import os, io, time, random, base64, zipfile
-from typing import List, Tuple, Optional
 import requests
 from PIL import Image
 import gradio as gr
-# ========= Config =========
-MAX_FRAMES = 8  # how many upload slots & rows to render
 MODAL_BASE = "https://moonmath-ai--moonmath-i2v-backend-moonmathinference-run.modal.run"
-# ========= Helpers =========
 def _save_video_bytes(data: bytes, tag: str) -> str:
     os.makedirs("/mnt/data", exist_ok=True)
     path = f"/mnt/data/{tag}_{int(time.time())}.mp4"
@@ -27,126 +28,104 @@ def _download_to_bytes(url: str) -> bytes:
     r.raise_for_status()
     return r.content
-def call_modal_i2v(start_img: Image.Image, prompt: str, seed: Optional[int]) -> Tuple[Optional[str], str]:
     """
-    POST to Modal with multipart 'image_bytes' and query args prompt & seed.
-    Returns (mp4_path_or_None, debug_log).
     """
-    dbg = []
     if seed in (None, 0, -1):
         seed = random.randint(1, 2**31 - 1)
-    # Build URL (encode prompt)
-    from urllib.parse import quote
     url = f"{MODAL_BASE}?prompt={quote(prompt)}&seed={seed}"
     files = {"image_bytes": ("start.png", _png_bytes_from_pil(start_img), "image/png")}
     headers = {"accept": "application/json"}
     try:
         resp = requests.post(url, files=files, headers=headers, timeout=600)
         ctype = (resp.headers.get("content-type") or "").lower()
-        dbg.append(f"HTTP {resp.status_code}; content-type={ctype}")
-        # Case A: raw bytes (not JSON)
         if "application/json" not in ctype:
             resp.raise_for_status()
-            path = _save_video_bytes(resp.content, "pair")
-            dbg.append(f"Saved raw video to {path}")
-            return path, "\n".join(dbg)
-        # Case B: JSON containing url or base64
         data = resp.json()
         video_url = data.get("video_url") or data.get("url") or data.get("result") or data.get("output")
-        video_b64 = data.get("video_b64") or data.get("videoBase64")
-        if video_url and isinstance(video_url, str):
             b = _download_to_bytes(video_url)
-            path = _save_video_bytes(b, "pair")
-            dbg.append(f"Downloaded video from {video_url} -> {path}")
-            return path, "\n".join(dbg)
-        if video_b64 and isinstance(video_b64, str):
             pad = (-len(video_b64)) % 4
             if pad: video_b64 += "=" * pad
             b = base64.b64decode(video_b64)
-            path = _save_video_bytes(b, "pair")
-            dbg.append("Decoded base64 video.")
-            return path, "\n".join(dbg)
-        # Nothing usable returned
-        try:
-            dbg.append(f"Backend JSON: {str(data)[:500]}")
-        except Exception:
-            pass
-        return None, "\n".join(dbg)
-    except Exception as e:
-        dbg.append(f"Exception: {type(e).__name__}: {e}")
-        return None, "\n".join(dbg)
-# ========= State handlers =========
-def add_images(files: List[str], images_state: List[Image.Image], names_state: List[str]):
     """
-    Append uploads to state; return updated previews and row visibilities.
     """
-    imgs, names = list(images_state), list(names_state)
     for f in files or []:
         try:
-            img = Image.open(f).convert("RGB")
-            imgs.append(img)
-            names.append(os.path.basename(f))
         except Exception:
             continue
-    # Outputs to update: image slots, labels, visibilities; pair rows visible up to len-1
-    img_values, img_labels, img_vis = [], [], []
-    pair_vis = []
     for i in range(MAX_FRAMES):
         if i < len(imgs):
-            img_values.append(imgs[i])
-            img_labels.append(f"Image {i+1}")
-            img_vis.append(True)
         else:
-            img_values.append(None)
-            img_labels.append(f"Image {i+1}")
-            img_vis.append(False)
-    for i in range(MAX_FRAMES - 1):
-        pair_vis.append(i < len(imgs) - 1)
-    return imgs, names, img_values, img_labels, img_vis, pair_vis
 def clear_all():
-    img_values = [None]*MAX_FRAMES
-    img_labels = [f"Image {i+1}" for i in range(MAX_FRAMES)]
-    img_vis    = [False]*MAX_FRAMES
-    pair_vis   = [False]*(MAX_FRAMES-1)
-    return [], [], img_values, img_labels, img_vis, pair_vis
-def stitch_pair(index: int,
-                images: List[Image.Image],
-                prompt: str,
-                seed: int):
     """
-    index is 0-based pair (0 => 1&2, 1 => 2&3...)
-    We call Modal using the *first* image of the pair as the init image.
     """
-    if not images or len(images) < index+2:
-        gr.Warning("Upload more images first.")
-        return None, "Not enough images."
-    # Compose a minimal helpful prompt for continuity
     user = (prompt or "").strip()
-    extra = f"(Transition between frame {index+1} → {index+2} of the same shot.)"
-    final_prompt = f"{user} {extra}".strip()
-    path, dbg = call_modal_i2v(images[index], final_prompt, seed)
-    if path is None:
-        gr.Warning("Stitch failed. See debug log.")
-    return path, dbg
-# ========= UI =========
 CSS = """
 .gradio-container { padding: 24px; }
 .pill button { border-radius: 999px !important; padding: 10px 18px; }
@@ -154,100 +133,64 @@ CSS = """
 """
 with gr.Blocks(css=CSS, title="Stitch — Upload & Stitch Adjacent Pairs") as demo:
-    gr.Markdown("## Stitch — Upload stills, then generate between-frames videos\n"
-                "Upload images in order. For each adjacent pair (1&2, 2&3, …), write a short transition prompt and click **Stitch**.")
-    images_state = gr.State([])   # List[PIL.Image]
-    names_state  = gr.State([])   # List[str]
     with gr.Row():
-        # Left column: image slots
         with gr.Column(scale=1, min_width=340):
             uploader = gr.Files(label="Add images (in order)", file_types=["image"], file_count="multiple")
             clear_btn = gr.Button("Clear all", elem_classes=["pill"])
-            image_slots = []
-            for i in range(MAX_FRAMES):
-                image_slots.append(
-                    gr.Image(label=f"Image {i+1}", interactive=False, visible=False)
-                )
-        # Middle column: per-pair prompt + button
-        with gr.Column(scale=1, min_width=340):
-            seed_in = gr.Number(value=0, precision=0, label="Seed (0 = random)")
-            prompt_boxes = []
-            stitch_buttons = []
-            for i in range(MAX_FRAMES - 1):
-                prompt_boxes.append(
-                    gr.Textbox(
-                        placeholder=f"Prompt for transition between Image {i+1} & {i+2}",
-                        lines=2, label="Prompt", elem_classes=["rounded"], visible=False
-                    )
-                )
-                stitch_buttons.append(
-                    gr.Button(f"Stitch {i+1}&{i+2}", elem_classes=["pill"], visible=False)
-                )
-        # Right column: per-pair video outputs + shared debug
         with gr.Column(scale=1, min_width=360):
-            video_outputs = []
-            for i in range(MAX_FRAMES - 1):
-                video_outputs.append(
-                    gr.Video(label=f"Video (image {i+1}+{i+2}) output", visible=False)
-                )
-            debug_box = gr.Code(label="Debug log", interactive=False)
-    # ---- Wiring: upload & clear ----
-    uploader.upload(
-        fn=add_images,
-        inputs=[uploader, images_state, names_state],
-        outputs=[
-            images_state, names_state,
-            # image values, labels, visibilities
-            *image_slots,                           # values (Image components accept PIL Image)
-            *[s for s in image_slots],              # labels: set via .label below (we'll hack via .update)
-            *[s for s in image_slots],              # visibility
-            *[b for b in stitch_buttons]            # visibility for rows (we’ll mirror to prompt/video too)
-        ],
-        queue=False
-    )
-    # NOTE: Gradio can't directly set multiple attributes with one function return to each component slot,
-    # so we will do a lightweight post-upload JS update using .update. Simpler: tie visibility of prompt/video
-    # to the corresponding button's visibility in another handler:
-    def reflect_row_visibility(images: List[Image.Image]):
-        n = len(images)
-        vis = [i < n-1 for i in range(MAX_FRAMES-1)]
-        # return prompt visibilities, button visibilities, video visibilities
-        return [gr.Textbox(visible=vis[i]) for i in range(MAX_FRAMES-1)] + \
-               [gr.Button(visible=vis[i]) for i in range(MAX_FRAMES-1)] + \
-               [gr.Video(visible=vis[i]) for i in range(MAX_FRAMES-1)]
     uploader.upload(
-        fn=reflect_row_visibility,
-        inputs=[images_state],
-        outputs=[*prompt_boxes, *stitch_buttons, *video_outputs],
-        queue=False
     )
     clear_btn.click(
         fn=clear_all,
         inputs=[],
-        outputs=[images_state, names_state, *image_slots, *image_slots, *image_slots, *stitch_buttons],
-        queue=False
-    ).then(
-        fn=lambda imgs: reflect_row_visibility(imgs),
-        inputs=[images_state],
-        outputs=[*prompt_boxes, *stitch_buttons, *video_outputs],
-        queue=False
     )
-    # ---- Wiring: per-pair stitchers ----
     for i in range(MAX_FRAMES - 1):
         stitch_buttons[i].click(
-            fn=lambda prompt, seed, imgs, idx=i: stitch_pair(idx, imgs, prompt, int(seed or 0)),
             inputs=[prompt_boxes[i], seed_in, images_state],
-            outputs=[video_outputs[i], debug_box]
         )
 if __name__ == "__main__":

+import os, io, time, random, base64
+from typing import List, Optional, Tuple
+from urllib.parse import quote
 import requests
 from PIL import Image
 import gradio as gr
+# ---------- CONFIG ----------
+MAX_FRAMES = 12  # how many visible "Image N" slots & rows to render
 MODAL_BASE = "https://moonmath-ai--moonmath-i2v-backend-moonmathinference-run.modal.run"
+# ---------- Helpers ----------
 def _save_video_bytes(data: bytes, tag: str) -> str:
     os.makedirs("/mnt/data", exist_ok=True)
     path = f"/mnt/data/{tag}_{int(time.time())}.mp4"
     r.raise_for_status()
     return r.content
+def call_modal_i2v(start_img: Image.Image, prompt: str, seed: Optional[int]) -> Optional[str]:
     """
+    Your Modal call, exactly like your JS snippet:
+      POST …?prompt={user_prompt}&seed={seed}
+      multipart field: image_bytes
+    Accepts raw mp4 bytes OR JSON { video_url | url | video_b64 }.
+    Returns path to saved mp4 or None on failure.
     """
     if seed in (None, 0, -1):
         seed = random.randint(1, 2**31 - 1)
     url = f"{MODAL_BASE}?prompt={quote(prompt)}&seed={seed}"
     files = {"image_bytes": ("start.png", _png_bytes_from_pil(start_img), "image/png")}
     headers = {"accept": "application/json"}
     try:
         resp = requests.post(url, files=files, headers=headers, timeout=600)
         ctype = (resp.headers.get("content-type") or "").lower()
+        # Case A: raw video bytes
         if "application/json" not in ctype:
             resp.raise_for_status()
+            return _save_video_bytes(resp.content, "pair")
+        # Case B: JSON with URL or base64
         data = resp.json()
         video_url = data.get("video_url") or data.get("url") or data.get("result") or data.get("output")
+        if isinstance(video_url, str) and (video_url.startswith("http://") or video_url.startswith("https://")):
             b = _download_to_bytes(video_url)
+            return _save_video_bytes(b, "pair")
+        video_b64 = data.get("video_b64") or data.get("videoBase64")
+        if isinstance(video_b64, str):
             pad = (-len(video_b64)) % 4
             if pad: video_b64 += "=" * pad
             b = base64.b64decode(video_b64)
+            return _save_video_bytes(b, "pair")
+    except Exception:
+        pass
+    return None
+# ---------- State & wiring helpers ----------
+def handle_upload(files: List[str], images_state: List[Image.Image]):
     """
+    Append newly uploaded images to state (keeps first MAX_FRAMES).
+    Returns updates for all image slots, prompt rows, stitch buttons, and video boxes.
     """
+    imgs = list(images_state)
     for f in files or []:
         try:
+            imgs.append(Image.open(f).convert("RGB"))
         except Exception:
             continue
+    imgs = imgs[:MAX_FRAMES]  # cap
+    # Build updates
+    image_updates = []
     for i in range(MAX_FRAMES):
         if i < len(imgs):
+            image_updates.append(gr.Image.update(value=imgs[i], visible=True, label=f"Image {i+1}"))
         else:
+            image_updates.append(gr.Image.update(value=None, visible=False, label=f"Image {i+1}"))
+    row_visible = [i < len(imgs) - 1 for i in range(MAX_FRAMES - 1)]
+    prompt_updates = [gr.Textbox.update(visible=row_visible[i], value="") for i in range(MAX_FRAMES - 1)]
+    button_updates = [gr.Button.update(visible=row_visible[i]) for i in range(MAX_FRAMES - 1)]
+    video_updates  = [gr.Video.update(visible=row_visible[i], value=None) for i in range(MAX_FRAMES - 1)]
+    return imgs, image_updates, prompt_updates, button_updates, video_updates
 def clear_all():
+    imgs = []
+    image_updates = [gr.Image.update(value=None, visible=False) for _ in range(MAX_FRAMES)]
+    prompt_updates = [gr.Textbox.update(visible=False, value="") for _ in range(MAX_FRAMES - 1)]
+    button_updates = [gr.Button.update(visible=False) for _ in range(MAX_FRAMES - 1)]
+    video_updates  = [gr.Video.update(visible=False, value=None) for _ in range(MAX_FRAMES - 1)]
+    return imgs, image_updates, prompt_updates, button_updates, video_updates
+def stitch_pair(idx: int, images: List[Image.Image], prompt: str, seed: int):
     """
+    idx is 0-based pair: 0 => stitch 1&2, 1 => stitch 2&3, ...
+    We send Image(idx) as the start frame, plus the user prompt (you can add your own template here).
     """
+    if not images or len(images) < idx + 2:
+        gr.Warning("Please upload enough images first.")
+        return None
     user = (prompt or "").strip()
+    # Optional light context; tweak or remove as you wish:
+    final_prompt = f"{user} (Transition between frame {idx+1} → {idx+2}.)".strip()
+    path = call_modal_i2v(images[idx], final_prompt, int(seed or 0))
+    if not path:
+        gr.Warning("Stitch failed. Try again or adjust your prompt.")
+    return path
+# ---------- UI ----------
 CSS = """
 .gradio-container { padding: 24px; }
 .pill button { border-radius: 999px !important; padding: 10px 18px; }
 """
 with gr.Blocks(css=CSS, title="Stitch — Upload & Stitch Adjacent Pairs") as demo:
+    gr.Markdown("## Stitch — Upload stills, then generate between-frames videos")
+    gr.Markdown("Upload images in order. For each adjacent pair (1&2, 2&3, …), write a short transition prompt and click **Stitch**.")
+    images_state = gr.State([])  # List[PIL.Image]
     with gr.Row():
+        # Left: Image slots
         with gr.Column(scale=1, min_width=340):
             uploader = gr.Files(label="Add images (in order)", file_types=["image"], file_count="multiple")
             clear_btn = gr.Button("Clear all", elem_classes=["pill"])
+            image_slots = [gr.Image(label=f"Image {i+1}", interactive=False, visible=False) for i in range(MAX_FRAMES)]
+        # Middle: per-pair prompt + Stitch button
         with gr.Column(scale=1, min_width=360):
+            seed_in = gr.Number(value=0, precision=0, label="Seed (0 = random)")
+            prompt_boxes  = [gr.Textbox(placeholder=f"Prompt for transition between Image {i+1} & {i+2}",
+                                        lines=2, label="Prompt", elem_classes=["rounded"], visible=False)
+                             for i in range(MAX_FRAMES - 1)]
+            stitch_buttons = [gr.Button(f"Stitch {i+1}&{i+2}", elem_classes=["pill"], visible=False)
+                              for i in range(MAX_FRAMES - 1)]
+        # Right: per-pair video outputs
+        with gr.Column(scale=1, min_width=360):
+            video_outputs = [gr.Video(label=f"Video (image {i+1}+{i+2}) output", visible=False)
+                             for i in range(MAX_FRAMES - 1)]
+    # Upload wiring
     uploader.upload(
+        fn=handle_upload,
+        inputs=[uploader, images_state],
+        outputs=[
+            images_state,
+            *image_slots,            # image updates
+            *prompt_boxes,           # prompt visibility + reset
+            *stitch_buttons,         # button visibility
+            *video_outputs           # video visibility + reset
+        ]
     )
+    # Clear wiring
     clear_btn.click(
         fn=clear_all,
         inputs=[],
+        outputs=[
+            images_state,
+            *image_slots,
+            *prompt_boxes,
+            *stitch_buttons,
+            *video_outputs
+        ]
     )
+    # Per-pair stitch wiring
     for i in range(MAX_FRAMES - 1):
         stitch_buttons[i].click(
+            fn=lambda p, s, imgs, idx=i: stitch_pair(idx, imgs, p, s),
             inputs=[prompt_boxes[i], seed_in, images_state],
+            outputs=[video_outputs[i]]
         )
 if __name__ == "__main__":