Spaces:

MoonMath-ai
/

StitchTool

Running

App Files Files Community

Shalmoni commited on Sep 17

Commit

26b0caf

verified ·

1 Parent(s): 6259109

Update app.py

Browse files

Files changed (1) hide show

app.py +223 -177

app.py CHANGED Viewed

@@ -1,197 +1,243 @@
-import os, io, time, random, base64
-from typing import List, Optional, Tuple
-from urllib.parse import quote
 import requests
-from PIL import Image
 import gradio as gr
-# ---------- CONFIG ----------
-MAX_FRAMES = 12  # how many visible "Image N" slots & rows to render
-MODAL_BASE = "https://moonmath-ai--moonmath-i2v-backend-moonmathinference-run.modal.run"
-# ---------- Helpers ----------
-def _save_video_bytes(data: bytes, tag: str) -> str:
-    os.makedirs("/mnt/data", exist_ok=True)
-    path = f"/mnt/data/{tag}_{int(time.time())}.mp4"
     with open(path, "wb") as f:
-        f.write(data)
     return path
-def _png_bytes_from_pil(img: Image.Image) -> bytes:
-    buf = io.BytesIO()
-    img.save(buf, format="PNG")
-    return buf.getvalue()
-def _download_to_bytes(url: str) -> bytes:
-    r = requests.get(url, timeout=180)
     r.raise_for_status()
     return r.content
-def call_modal_i2v(start_img: Image.Image, prompt: str, seed: Optional[int]) -> Optional[str]:
-    """
-    Your Modal call, exactly like your JS snippet:
-      POST …?prompt={user_prompt}&seed={seed}
-      multipart field: image_bytes
-    Accepts raw mp4 bytes OR JSON { video_url | url | video_b64 }.
-    Returns path to saved mp4 or None on failure.
-    """
-    if seed in (None, 0, -1):
-        seed = random.randint(1, 2**31 - 1)
-    url = f"{MODAL_BASE}?prompt={quote(prompt)}&seed={seed}"
-    files = {"image_bytes": ("start.png", _png_bytes_from_pil(start_img), "image/png")}
-    headers = {"accept": "application/json"}
     try:
-        resp = requests.post(url, files=files, headers=headers, timeout=600)
-        ctype = (resp.headers.get("content-type") or "").lower()
-        # Case A: raw video bytes
-        if "application/json" not in ctype:
-            resp.raise_for_status()
-            return _save_video_bytes(resp.content, "pair")
-        # Case B: JSON with URL or base64
-        data = resp.json()
-        video_url = data.get("video_url") or data.get("url") or data.get("result") or data.get("output")
-        if isinstance(video_url, str) and (video_url.startswith("http://") or video_url.startswith("https://")):
-            b = _download_to_bytes(video_url)
-            return _save_video_bytes(b, "pair")
-        video_b64 = data.get("video_b64") or data.get("videoBase64")
-        if isinstance(video_b64, str):
-            pad = (-len(video_b64)) % 4
-            if pad: video_b64 += "=" * pad
-            b = base64.b64decode(video_b64)
-            return _save_video_bytes(b, "pair")
-    except Exception:
-        pass
-    return None
-# ---------- State & wiring helpers ----------
-def handle_upload(files: List[str], images_state: List[Image.Image]):
-    """
-    Append newly uploaded images to state (keeps first MAX_FRAMES).
-    Returns updates for all image slots, prompt rows, stitch buttons, and video boxes.
-    """
-    imgs = list(images_state)
-    for f in files or []:
-        try:
-            imgs.append(Image.open(f).convert("RGB"))
-        except Exception:
-            continue
-    imgs = imgs[:MAX_FRAMES]  # cap
-    # Build updates
-    image_updates = []
-    for i in range(MAX_FRAMES):
-        if i < len(imgs):
-            image_updates.append(gr.Image.update(value=imgs[i], visible=True, label=f"Image {i+1}"))
         else:
-            image_updates.append(gr.Image.update(value=None, visible=False, label=f"Image {i+1}"))
-    row_visible = [i < len(imgs) - 1 for i in range(MAX_FRAMES - 1)]
-    prompt_updates = [gr.Textbox.update(visible=row_visible[i], value="") for i in range(MAX_FRAMES - 1)]
-    button_updates = [gr.Button.update(visible=row_visible[i]) for i in range(MAX_FRAMES - 1)]
-    video_updates  = [gr.Video.update(visible=row_visible[i], value=None) for i in range(MAX_FRAMES - 1)]
-    return imgs, image_updates, prompt_updates, button_updates, video_updates
-def clear_all():
-    imgs = []
-    image_updates = [gr.Image.update(value=None, visible=False) for _ in range(MAX_FRAMES)]
-    prompt_updates = [gr.Textbox.update(visible=False, value="") for _ in range(MAX_FRAMES - 1)]
-    button_updates = [gr.Button.update(visible=False) for _ in range(MAX_FRAMES - 1)]
-    video_updates  = [gr.Video.update(visible=False, value=None) for _ in range(MAX_FRAMES - 1)]
-    return imgs, image_updates, prompt_updates, button_updates, video_updates
-def stitch_pair(idx: int, images: List[Image.Image], prompt: str, seed: int):
-    """
-    idx is 0-based pair: 0 => stitch 1&2, 1 => stitch 2&3, ...
-    We send Image(idx) as the start frame, plus the user prompt (you can add your own template here).
-    """
-    if not images or len(images) < idx + 2:
-        gr.Warning("Please upload enough images first.")
-        return None
-    user = (prompt or "").strip()
-    # Optional light context; tweak or remove as you wish:
-    final_prompt = f"{user} (Transition between frame {idx+1} → {idx+2}.)".strip()
-    path = call_modal_i2v(images[idx], final_prompt, int(seed or 0))
-    if not path:
-        gr.Warning("Stitch failed. Try again or adjust your prompt.")
-    return path
-# ---------- UI ----------
-CSS = """
-.gradio-container { padding: 24px; }
-.pill button { border-radius: 999px !important; padding: 10px 18px; }
-.rounded textarea { border-radius: 16px !important; }
-"""
-with gr.Blocks(css=CSS, title="Stitch — Upload & Stitch Adjacent Pairs") as demo:
-    gr.Markdown("## Stitch — Upload stills, then generate between-frames videos")
-    gr.Markdown("Upload images in order. For each adjacent pair (1&2, 2&3, …), write a short transition prompt and click **Stitch**.")
-    images_state = gr.State([])  # List[PIL.Image]
-    with gr.Row():
-        # Left: Image slots
-        with gr.Column(scale=1, min_width=340):
-            uploader = gr.Files(label="Add images (in order)", file_types=["image"], file_count="multiple")
-            clear_btn = gr.Button("Clear all", elem_classes=["pill"])
-            image_slots = [gr.Image(label=f"Image {i+1}", interactive=False, visible=False) for i in range(MAX_FRAMES)]
-        # Middle: per-pair prompt + Stitch button
-        with gr.Column(scale=1, min_width=360):
-            seed_in = gr.Number(value=0, precision=0, label="Seed (0 = random)")
-            prompt_boxes  = [gr.Textbox(placeholder=f"Prompt for transition between Image {i+1} & {i+2}",
-                                        lines=2, label="Prompt", elem_classes=["rounded"], visible=False)
-                             for i in range(MAX_FRAMES - 1)]
-            stitch_buttons = [gr.Button(f"Stitch {i+1}&{i+2}", elem_classes=["pill"], visible=False)
-                              for i in range(MAX_FRAMES - 1)]
-        # Right: per-pair video outputs
-        with gr.Column(scale=1, min_width=360):
-            video_outputs = [gr.Video(label=f"Video (image {i+1}+{i+2}) output", visible=False)
-                             for i in range(MAX_FRAMES - 1)]
-    # Upload wiring
-    uploader.upload(
-        fn=handle_upload,
-        inputs=[uploader, images_state],
-        outputs=[
-            images_state,
-            *image_slots,            # image updates
-            *prompt_boxes,           # prompt visibility + reset
-            *stitch_buttons,         # button visibility
-            *video_outputs           # video visibility + reset
-        ]
     )
-    # Clear wiring
-    clear_btn.click(
-        fn=clear_all,
-        inputs=[],
-        outputs=[
-            images_state,
-            *image_slots,
-            *prompt_boxes,
-            *stitch_buttons,
-            *video_outputs
-        ]
     )
-    # Per-pair stitch wiring
-    for i in range(MAX_FRAMES - 1):
-        stitch_buttons[i].click(
-            fn=lambda p, s, imgs, idx=i: stitch_pair(idx, imgs, p, s),
-            inputs=[prompt_boxes[i], seed_in, images_state],
-            outputs=[video_outputs[i]]
-        )
-if __name__ == "__main__":
-    demo.queue().launch()

+import os
+import io
+import time
+import random
+import base64
+from urllib.parse import quote_plus
+from typing import Optional, Tuple
 import requests
 import gradio as gr
+# -----------------------------
+# Config
+# -----------------------------
+# You can set your Modal endpoint via env var MM_I2V_URL
+DEFAULT_API_URL = os.getenv(
+    "MM_I2V_URL",
+    "https://moonmath-ai--moonmath-i2v-backend-moonmathinference-run.modal.run",
+)
+SAVE_DIR = "outputs"
+os.makedirs(SAVE_DIR, exist_ok=True)
+# -----------------------------
+# Helpers
+# -----------------------------
+def _save_bytes_to_mp4(buf: bytes, name_prefix: str) -> str:
+    ts = int(time.time() * 1000)
+    path = os.path.join(SAVE_DIR, f"{name_prefix}-{ts}.mp4")
     with open(path, "wb") as f:
+        f.write(buf)
     return path
+def _download(url: str) -> bytes:
+    r = requests.get(url, timeout=600)
     r.raise_for_status()
     return r.content
+def call_i2v(
+    image_path: str,
+    prompt: str,
+    seed: Optional[int],
+    api_url: Optional[str] = None,
+) -> Tuple[Optional[str], Optional[str]]:
+    \"\"\"
+    Call the image->video backend and return (video_path, error_message).
+    Tries to handle several common response types:
+      1) raw mp4 bytes
+      2) JSON with {\"video\": \"<base64>\"} (mp4 base64)
+      3) JSON with {\"video_url\": \"https://...\"} (or \"result_url\")
+    \"\"\"
+    api = (api_url or DEFAULT_API_URL).strip().rstrip(\"/\")
+    used_seed = seed if (seed is not None and str(seed).strip() != \"\") else random.randint(0, 2**31 - 1)
+    url = f\"{api}?prompt={quote_plus(prompt)}&seed={used_seed}\"
+    files = {
+        \"image_bytes\": (os.path.basename(image_path), open(image_path, \"rb\"), \"application/octet-stream\")
+    }
+    headers = {\"accept\": \"application/json\"}
     try:
+        resp = requests.post(url, headers=headers, files=files, timeout=1200)
+        # Try to accommodate various backends
+        ctype = resp.headers.get(\"Content-Type\", \"\")
+        if \"application/json\" in ctype:
+            data = resp.json()
+            # base64 payload
+            if \"video\" in data and isinstance(data[\"video\"], str) and len(data[\"video\"]) > 50:
+                try:
+                    raw = base64.b64decode(data[\"video\"], validate=True)
+                    return _save_bytes_to_mp4(raw, \"clip\"), None
+                except Exception as e:
+                    return None, f\"Could not decode base64 video: {e}\"
+            # url payload
+            for key in (\"video_url\", \"result_url\", \"url\"):
+                if key in data and isinstance(data[key], str) and data[key].startswith(\"http\"):
+                    raw = _download(data[key])
+                    return _save_bytes_to_mp4(raw, \"clip\"), None
+            return None, 'JSON response did not include \"video\" (base64) or a known url key.'
+        # Raw bytes (ideally mp4)
+        elif \"video\" in ctype or \"octet-stream\" in ctype:
+            return _save_bytes_to_mp4(resp.content, \"clip\"), None
         else:
+            # Some backends still reply bytes with missing/odd content-type
+            if resp.content and len(resp.content) > 1024:
+                return _save_bytes_to_mp4(resp.content, \"clip\"), None
+            return None, f\"Unexpected content type: {ctype}\"
+    except requests.RequestException as e:
+        return None, f\"Request failed: {e}\"
+def stitch_pair(
+    image_a: str,
+    image_b: str,
+    prompt: str,
+    seed: Optional[int],
+    api_url: Optional[str],
+    crossfade: float,
+) -> Tuple[Optional[str], str]:
+    \"\"\"
+    Strategy:
+      - Generate a short clip from image A
+      - Generate a short clip from image B (same prompt/seed unless user changes)
+      - Concatenate with a short crossfade in Python (moviepy)
+    If you already have a backend endpoint that does stitching directly,
+    replace this function body with a single backend call.
+    \"\"\"
+    if not image_a or not image_b:
+        return None, \"Please upload both images.\"
+    # First generate both clips
+    clip1_path, err1 = call_i2v(image_a, prompt, seed, api_url)
+    if err1:
+        return None, f\"Clip 1 failed: {err1}\"
+    clip2_path, err2 = call_i2v(image_b, prompt, seed, api_url)
+    if err2:
+        return None, f\"Clip 2 failed: {err2}\"
+    # If crossfade is 0, just concatenate directly
+    try:
+        from moviepy.editor import VideoFileClip, concatenate_videoclips
+    except Exception as e:
+        return None, f\"MoviePy import failed. Add moviepy & imageio-ffmpeg to requirements.txt. Error: {e}\"
+    try:
+        c1 = VideoFileClip(clip1_path)
+        c2 = VideoFileClip(clip2_path)
+        # Enforce same size/fps (compose handles mismatches)
+        if crossfade and crossfade > 0:
+            # Apply crossfade (second clip fades in)
+            c2 = c2.crossfadein(crossfade)
+            c1 = c1.crossfadeout(crossfade)
+            merged = concatenate_videoclips([c1, c2], method=\"compose\", padding=-crossfade)
+        else:
+            merged = concatenate_videoclips([c1, c2], method=\"compose\")
+        out_path = os.path.join(SAVE_DIR, f\"stitched-{int(time.time()*1000)}.mp4\")
+        merged.write_videofile(out_path, codec=\"libx264\", audio_codec=\"aac\", verbose=False, logger=None)
+        c1.close(); c2.close(); merged.close()
+        return out_path, \"\"
+    except Exception as e:
+        return None, f\"Stitching failed: {e}\"
+# -----------------------------
+# UI
+# -----------------------------
+with gr.Blocks(title=\"Image Stitch to Video\", css=\"\"\"
+/* Rounded tiles like the sketch */
+.rounded { border-radius: 24px; }
+.tile { background: #f7f7ff; padding: 12px; }
+.tile-blue { background: #e8f0ff; }
+.tile-yellow { background: #fff7d6; }
+.small-btn button { padding: 6px 10px; border-radius: 999px; }
+.label-center label { text-align:center; width: 100%; }
+\"\"\") as demo:
+    gr.Markdown(\"### Image → Video (Stitch Adjacent Pairs)\\nUpload 3 images, enter prompts for each stitch, then click the stitch buttons.\")
+    with gr.Row(equal_height=True):
+        # Left column: images + add image
+        with gr.Column(scale=1):
+            gr.Markdown(\"**Images**\")
+            img1 = gr.Image(type=\"filepath\", label=\"Image 1\", height=220, elem_classes=[\"rounded\", \"tile\", \"tile-blue\"])
+            img2 = gr.Image(type=\"filepath\", label=\"Image 2\", height=220, elem_classes=[\"rounded\", \"tile\", \"tile-blue\"])
+            img3 = gr.Image(type=\"filepath\", label=\"Image 3\", height=220, elem_classes=[\"rounded\", \"tile\", \"tile-blue\"])
+            # Optional extra slots (hidden until added)
+            extra_imgs = []
+            for i in range(4, 9):
+                comp = gr.Image(type=\"filepath\", label=f\"Image {i}\", height=220, visible=False, elem_classes=[\"rounded\", \"tile\", \"tile-blue\"])
+                extra_imgs.append(comp)
+            add_btn = gr.Button(\"Add Image\", variant=\"secondary\")
+        # Middle column: prompts + stitch buttons
+        with gr.Column(scale=1):
+            gr.Markdown(\"**Prompts**\")
+            prompt12 = gr.Textbox(label=\"Prompt for Stitch 1 & 2\", lines=3, placeholder=\"Describe motion/style/etc.\", elem_classes=[\"rounded\", \"tile\"])
+            seed12 = gr.Number(label=\"Seed (optional)\", value=None, precision=0)
+            stitch12 = gr.Button(\"Stitch 1 & 2\", elem_classes=[\"small-btn\"])
+            prompt23 = gr.Textbox(label=\"Prompt for Stitch 2 & 3\", lines=3, placeholder=\"Describe motion/style/etc.\", elem_classes=[\"rounded\", \"tile\"])
+            seed23 = gr.Number(label=\"Seed (optional)\", value=None, precision=0)
+            stitch23 = gr.Button(\"Stitch 2 & 3\", elem_classes=[\"small-btn\"])
+            with gr.Accordion(\"Advanced (API & Stitch)\", open=False):
+                api_url = gr.Textbox(label=\"Backend API URL\", value=DEFAULT_API_URL)
+                crossfade = gr.Slider(0.0, 1.5, value=0.4, step=0.1, label=\"Crossfade seconds\")
+                clear_btn = gr.Button(\"Clear All\")
+        # Right column: video outputs
+        with gr.Column(scale=1):
+            gr.Markdown(\"**Outputs**\")
+            vid12 = gr.Video(label=\"Video (image 1 + 2) output\", elem_classes=[\"rounded\", \"tile\", \"tile-yellow\"])
+            vid23 = gr.Video(label=\"Video (image 2 + 3) output\", elem_classes=[\"rounded\", \"tile\", \"tile-yellow\"])
+    # Wire up actions
+    def _on_add(*imgs):
+        # Reveal the next hidden uploader
+        for comp in extra_imgs:
+            if comp.visible is False:
+                comp.visible = True
+                break
+        return [gr.update(visible=comp.visible) for comp in extra_imgs]
+    add_btn.click(
+        _on_add,
+        inputs=extra_imgs,
+        outputs=extra_imgs,
+    )
+    stitch12.click(
+        stitch_pair,
+        inputs=[img1, img2, prompt12, seed12, api_url, crossfade],
+        outputs=[vid12, gr.Textbox(visible=False)],
     )
+    stitch23.click(
+        stitch_pair,
+        inputs=[img2, img3, prompt23, seed23, api_url, crossfade],
+        outputs=[vid23, gr.Textbox(visible=False)],
     )
+    def _on_clear():
+        updates = []
+        for comp in [img1, img2, img3, *extra_imgs]:
+            updates.append(gr.update(value=None, visible=True if comp in [img1, img2, img3] else False))
+        return updates + [None, None, \"\", \"\", gr.update(value=DEFAULT_API_URL), 0.4]
+    clear_btn.click(
+        _on_clear,
+        inputs=None,
+        outputs=[img1, img2, img3, *extra_imgs, vid12, vid23, prompt12, prompt23, api_url, crossfade],
+    )
+if __name__ == \"__main__\":
+    demo.launch()