Spaces:

MoonMath-ai
/

StitchTool

Sleeping

App Files Files Community

Shalmoni commited on Sep 18

Commit

75c12be

verified ·

1 Parent(s): a667aee

Update app.py

Browse files

Files changed (1) hide show

app.py +171 -73

app.py CHANGED Viewed

@@ -1,5 +1,5 @@
 import os, io, time, base64, random, subprocess
-from typing import Optional
 from urllib.parse import quote
 import requests
@@ -9,6 +9,10 @@ import gradio as gr
 # -------- Modal inference endpoint (dev) --------
 INFERENCE_URL = "https://moonmath-ai-dev--moonmath-i2v-backend-moonmathinference-run.modal.run"
 # -------- small helpers --------
 def _save_video_bytes(data: bytes, tag: str) -> str:
     os.makedirs("/tmp", exist_ok=True)
@@ -35,7 +39,6 @@ def stitch_call(start_img: Image.Image, end_img: Image.Image, prompt: str, seed:
         seed = random.randint(1, 2**31 - 1)
     url = f"{INFERENCE_URL}?prompt={quote(prompt or '')}&seed={seed}"
     files = {
         "image_bytes": ("start.png", _png_bytes(start_img), "image/png"),
         "image_bytes_end": ("end.png", _png_bytes(end_img), "image/png"),
@@ -55,122 +58,217 @@ def stitch_call(start_img: Image.Image, end_img: Image.Image, prompt: str, seed:
         data = resp.json()
         video_url = data.get("video_url") or data.get("url") or data.get("result")
         if isinstance(video_url, str) and video_url.startswith("http"):
-            b = _download_to_bytes(video_url)
-            return _save_video_bytes(b, "stitch")
         video_b64 = data.get("video_b64")
         if isinstance(video_b64, str):
             pad = (-len(video_b64)) % 4
             if pad:
                 video_b64 += "=" * pad
-            b = base64.b64decode(video_b64)
-            return _save_video_bytes(b, "stitch")
     except Exception as e:
         print("stitch_call error:", e)
     return None
-# -------- FFmpeg-based concatenation --------
-def concat_videos(vid1: str, vid2: str) -> Optional[str]:
-    if not vid1 or not vid2:
         return None
     try:
         os.makedirs("/tmp", exist_ok=True)
         out_path = f"/tmp/final_{int(time.time())}.mp4"
-        # Create a temporary file list for ffmpeg
         list_file = f"/tmp/list_{int(time.time())}.txt"
         with open(list_file, "w") as f:
-            f.write(f"file '{vid1}'\n")
-            f.write(f"file '{vid2}'\n")
-        # Run ffmpeg concat
         subprocess.run(
             ["ffmpeg", "-y", "-f", "concat", "-safe", "0", "-i", list_file, "-c", "copy", out_path],
-            check=True,
-            stdout=subprocess.PIPE,
-            stderr=subprocess.PIPE,
         )
         return out_path
     except Exception as e:
-        print("concat_videos error:", e)
         return None
-# -------- Gradio callbacks --------
-def stitch_12(prompt12, seed, img1, img2):
-    path = stitch_call(img1, img2, prompt12 or "", int(seed or 0))
-    return path
-def stitch_23(prompt23, seed, img2, img3):
-    path = stitch_call(img2, img3, prompt23 or "", int(seed or 0))
-    return path
-def stitch_all(vid12, vid23):
-    if vid12 is None or vid23 is None:
-        gr.Warning("Generate both videos first before stitching all.")
         return None
-    return concat_videos(vid12, vid23)
-# ---------- UI ----------
 CSS = """
 .gradio-container { padding: 24px; }
 .pill button { border-radius: 999px !important; padding: 10px 18px; }
 .rounded textarea { border-radius: 16px !important; }
 """
-with gr.Blocks(css=CSS, title="Stitch — vertical flow") as demo:
-    gr.Markdown("## StitchMaster")
-    # Top row: 1 - 2 - 3 (side-by-side)
-    with gr.Row():
-        with gr.Column(scale=1, min_width=280):
-            img1 = gr.Image(label="Image 1 upload", type="pil")
-        with gr.Column(scale=1, min_width=280):
-            img2 = gr.Image(label="Image 2 upload", type="pil")
-        with gr.Column(scale=1, min_width=280):
-            img3 = gr.Image(label="Image 3 upload", type="pil")
-    # Seed under the uploads
     seed = gr.Number(value=0, precision=0, label="Seed (0 = random)")
-    # Stitch 1→2: LEFT = prompt+button, RIGHT = video
-    with gr.Row():
-        with gr.Column(scale=1, min_width=420):
-            prompt12 = gr.Textbox(
-                placeholder="Prompt for stitching 1→2",
-                lines=2, label="Prompt (1→2)", elem_classes=["rounded"]
-            )
-            btn12 = gr.Button("Generate 1→2", elem_classes=["pill"])
-        with gr.Column(scale=1, min_width=420):
-            vid12 = gr.Video(label="Video (1→2)", interactive=False)
-    # Stitch 2→3: LEFT = prompt+button, RIGHT = video
     with gr.Row():
         with gr.Column(scale=1, min_width=420):
-            prompt23 = gr.Textbox(
-                placeholder="Prompt for stitching 2→3",
-                lines=2, label="Prompt (2→3)", elem_classes=["rounded"]
             )
-            btn23 = gr.Button("Generate 2→3", elem_classes=["pill"])
         with gr.Column(scale=1, min_width=420):
-            vid23 = gr.Video(label="Video (2→3)", interactive=False)
-    # Final merge: LEFT = button, RIGHT = final video
     with gr.Row():
         with gr.Column(scale=1, min_width=420):
-            btn_all = gr.Button("Stitch Together", elem_classes=["pill"])
         with gr.Column(scale=1, min_width=420):
-            vid_all = gr.Video(label="Final Combined Video", interactive=False)
-    # keep your existing .click wiring below this block
-    # Wire buttons
-    btn12.click(stitch_12, inputs=[prompt12, seed, img1, img2], outputs=[vid12])
-    btn23.click(stitch_23, inputs=[prompt23, seed, img2, img3], outputs=[vid23])
-    btn_all.click(stitch_all, inputs=[vid12, vid23], outputs=[vid_all])
 if __name__ == "__main__":
     demo.queue().launch()

 import os, io, time, base64, random, subprocess
+from typing import Optional, List
 from urllib.parse import quote
 import requests
 # -------- Modal inference endpoint (dev) --------
 INFERENCE_URL = "https://moonmath-ai-dev--moonmath-i2v-backend-moonmathinference-run.modal.run"
+# -------- settings --------
+MAX_SLOTS = 12          # max image slots user can reveal
+MAX_TIMELINE = 20       # max clips in the timeline
 # -------- small helpers --------
 def _save_video_bytes(data: bytes, tag: str) -> str:
     os.makedirs("/tmp", exist_ok=True)
         seed = random.randint(1, 2**31 - 1)
     url = f"{INFERENCE_URL}?prompt={quote(prompt or '')}&seed={seed}"
     files = {
         "image_bytes": ("start.png", _png_bytes(start_img), "image/png"),
         "image_bytes_end": ("end.png", _png_bytes(end_img), "image/png"),
         data = resp.json()
         video_url = data.get("video_url") or data.get("url") or data.get("result")
         if isinstance(video_url, str) and video_url.startswith("http"):
+            return _save_video_bytes(_download_to_bytes(video_url), "stitch")
         video_b64 = data.get("video_b64")
         if isinstance(video_b64, str):
             pad = (-len(video_b64)) % 4
             if pad:
                 video_b64 += "=" * pad
+            return _save_video_bytes(base64.b64decode(video_b64), "stitch")
     except Exception as e:
         print("stitch_call error:", e)
     return None
+# -------- FFmpeg-based concatenation (N clips) --------
+def concat_many(videos: List[str]) -> Optional[str]:
+    vids = [v for v in videos if v]
+    if len(vids) < 2:
         return None
     try:
         os.makedirs("/tmp", exist_ok=True)
         out_path = f"/tmp/final_{int(time.time())}.mp4"
         list_file = f"/tmp/list_{int(time.time())}.txt"
         with open(list_file, "w") as f:
+            for v in vids:
+                f.write(f"file '{v}'\n")
         subprocess.run(
             ["ffmpeg", "-y", "-f", "concat", "-safe", "0", "-i", list_file, "-c", "copy", out_path],
+            check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE
         )
         return out_path
     except Exception as e:
+        print("concat_many error:", e)
         return None
+# =========================
+# Gradio callbacks / state ops
+# =========================
+def add_image_slot(visible_slots: int):
+    """Reveal one more upload slot (up to MAX_SLOTS)."""
+    new_count = min(MAX_SLOTS, visible_slots + 1)
+    return new_count
+def collect_choices(*imgs):
+    """Build dropdown choices of available indices (1-based labels)."""
+    choices = []
+    for i, img in enumerate(imgs, start=1):
+        if img is not None:
+            choices.append(str(i))
+    # Return same list for both start/end dropdowns
+    return gr.update(choices=choices), gr.update(choices=choices)
+def stitch_selected(prompt, seed, start_idx_str, end_idx_str, *imgs):
+    """Run inference for selected start/end indices (1-based strings)."""
+    if not start_idx_str or not end_idx_str:
+        gr.Warning("Please select Start and End frames.")
+        return None
+    try:
+        s = int(start_idx_str) - 1
+        e = int(end_idx_str) - 1
+    except Exception:
+        gr.Warning("Invalid Start/End selection.")
+        return None
+    if s < 0 or e < 0 or s >= len(imgs) or e >= len(imgs):
+        gr.Warning("Start/End out of range.")
+        return None
+    start_img = imgs[s]
+    end_img = imgs[e]
+    if start_img is None or end_img is None:
+        gr.Warning("Selected slots are empty.")
         return None
+    vid = stitch_call(start_img, end_img, prompt or "", int(seed or 0))
+    if not vid:
+        gr.Warning("Generation failed.")
+        return None
+    return vid  # path for preview
+def add_to_timeline(preview_path, timeline_paths: List[str]):
+    """Append preview_path to timeline state; return updated per-slot outputs."""
+    if not preview_path:
+        gr.Warning("Generate a clip first.")
+        return timeline_paths, *([gr.update(value=None)] * MAX_TIMELINE)
+    # append if room
+    tl = list(timeline_paths or [])
+    if len(tl) >= MAX_TIMELINE:
+        gr.Warning("Timeline full.")
+        return tl, *([gr.update(value=None)] * MAX_TIMELINE)
+    tl.append(preview_path)
+    # map into video components
+    outputs = []
+    for i in range(MAX_TIMELINE):
+        outputs.append(gr.update(value=tl[i] if i < len(tl) else None))
+    return tl, *outputs
+def stitch_all_from_timeline(timeline_paths: List[str]):
+    vids = list(timeline_paths or [])
+    if len(vids) < 2:
+        gr.Warning("Add at least two clips to the timeline first.")
+        return None
+    out = concat_many(vids)
+    if not out:
+        gr.Warning("Failed to concatenate clips.")
+    return out
+# =========================
+# UI
+# =========================
 CSS = """
 .gradio-container { padding: 24px; }
 .pill button { border-radius: 999px !important; padding: 10px 18px; }
 .rounded textarea { border-radius: 16px !important; }
+.gallery-row { display:flex; gap:16px; overflow-x:auto; padding:8px 4px; }
+.gallery-row .gradio-image { min-width: 220px; }
+.timeline-row { display:flex; gap:16px; overflow-x:auto; padding:8px 4px; }
 """
+with gr.Blocks(css=CSS, title="StitchMaster") as demo:
+    gr.Markdown("## StitchMaster — Upload images, stitch between frames, build a timeline, and export a single video.")
+    # --- State ---
+    visible_slots = gr.State(value=3)      # how many image slots are visible
+    timeline_state = gr.State(value=[])    # list[str] of video file paths (timeline)
+    # --- Image gallery (growing) ---
+    with gr.Row(elem_classes=["gallery-row"]):
+        img_comps = []
+        for i in range(MAX_SLOTS):
+            comp = gr.Image(label=f"Image {i+1} upload", type="pil", visible=(i < 3))
+            img_comps.append(comp)
+        add_btn = gr.Button("＋ Add image")
+    # clicking add → reveal one more slot
+    add_btn.click(
+        fn=add_image_slot,
+        inputs=[visible_slots],
+        outputs=[visible_slots],
+    )
+    # reflect visibility changes whenever visible_slots changes
+    # (we re-render all image components with correct visibility)
+    def _reveal_slots(n, *imgs):
+        updates = []
+        for i in range(MAX_SLOTS):
+            updates.append(gr.update(visible=(i < int(n))))
+        return updates
+    visible_slots.change(
+        fn=_reveal_slots,
+        inputs=[visible_slots] + img_comps,
+        outputs=img_comps
+    )
+    # --- Stitch controls ---
     seed = gr.Number(value=0, precision=0, label="Seed (0 = random)")
     with gr.Row():
         with gr.Column(scale=1, min_width=420):
+            start_dd = gr.Dropdown(label="Start frame", choices=[], interactive=True)
+            end_dd = gr.Dropdown(label="End frame", choices=[], interactive=True)
+            prompt = gr.Textbox(
+                placeholder="Describe the transition between the selected start and end frames…",
+                lines=3, label="Prompt", elem_classes=["rounded"]
             )
+            run_btn = gr.Button("Stitch", elem_classes=["pill"])
+            add_tl_btn = gr.Button("Add to timeline", elem_classes=["pill"])
         with gr.Column(scale=1, min_width=420):
+            preview = gr.Video(label="Video output", interactive=False)
+    # keep start/end dropdowns up to date based on which slots actually have images
+    for comp in img_comps:
+        comp.change(
+            fn=collect_choices,
+            inputs=img_comps,
+            outputs=[start_dd, end_dd]
+        )
+    # stitch action → preview
+    run_btn.click(
+        fn=stitch_selected,
+        inputs=[prompt, seed, start_dd, end_dd] + img_comps,
+        outputs=[preview]
+    )
+    # add to timeline action → update state and visible clips
+    # Prepare timeline video components (scroll row)
+    with gr.Row(elem_classes=["timeline-row"]):
+        timeline_videos = [gr.Video(label=f"Clip {i+1}", interactive=False) for i in range(MAX_TIMELINE)]
+    add_tl_btn.click(
+        fn=add_to_timeline,
+        inputs=[preview, timeline_state],
+        outputs=[timeline_state] + timeline_videos
+    )
+    # final stitch all (concatenate in order)
     with gr.Row():
         with gr.Column(scale=1, min_width=420):
+            stitch_all_btn = gr.Button("Stitch All", elem_classes=["pill"])
         with gr.Column(scale=1, min_width=420):
+            final_vid = gr.Video(label="Stitched Video Output", interactive=False)
+    stitch_all_btn.click(
+        fn=stitch_all_from_timeline,
+        inputs=[timeline_state],
+        outputs=[final_vid]
+    )
 if __name__ == "__main__":
     demo.queue().launch()