Spaces:

MoonMath-ai
/

StitchTool

Sleeping

App Files Files Community

Shalmoni commited on Sep 29, 2025

Commit

7f0be4a

verified ·

1 Parent(s): 1addcdc

Update app.py

Browse files

Files changed (1) hide show

app.py +341 -181

app.py CHANGED Viewed

@@ -1,211 +1,371 @@
-import os, uuid, time, shutil, mimetypes, subprocess, requests, gradio as gr
-from datetime import datetime
 from typing import Optional, List
-# --- config ---
-ENDPOINT = "https://moonmath-ai-dev--moonmath-i2v-backend-moonmathinference-run.modal.run"
-FFMPEG = "ffmpeg"
-OUT, TMP = "outputs", "tmp"
-os.makedirs(OUT, exist_ok=True); os.makedirs(TMP, exist_ok=True)
-ts     = lambda: datetime.utcnow().strftime("%Y%m%d_%H%M%S")
-fname  = lambda p,e: f"{p}_{ts()}_{uuid.uuid4().hex[:6]}.{e}"
-abspath= lambda p: os.path.abspath(p)
-def run_ffmpeg(args: List[str]):
-    p = subprocess.run([FFMPEG]+args, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
-    if p.returncode:
-        raise RuntimeError(p.stderr.decode(errors="ignore"))
-def extract_last_frame(video_path: str) -> str:
-    out = os.path.join(TMP, fname("last", "png"))
-    run_ffmpeg(["-sseof","-1","-i",video_path,"-frames:v","1","-q:v","2",out])
-    return out
-def concat_videos(paths: List[str]) -> str:
-    if not paths: raise ValueError("No videos selected.")
-    if len(paths)==1:
-        dst = os.path.join(OUT, fname("continuous","mp4")); shutil.copy(paths[0], dst); return dst
-    listfile = os.path.join(TMP, f"concat_{uuid.uuid4().hex}.txt")
-    with open(listfile,"w") as f:
-        for p in paths: f.write(f"file '{abspath(p)}'\n")
-    out = os.path.join(OUT, fname("continuous","mp4"))
-    run_ffmpeg(["-f","concat","-safe","0","-i",listfile,"-c","copy",out])
-    os.remove(listfile)
-    return out
-def zip_used(paths: List[str]) -> str:
-    pack = os.path.join(TMP, f"pack_{uuid.uuid4().hex[:6]}")
-    os.makedirs(pack, exist_ok=True)
-    for p in paths: shutil.copy(p, pack)
-    base = os.path.join(OUT, f"used_{ts()}")
-    shutil.make_archive(base, "zip", pack)
-    shutil.rmtree(pack, ignore_errors=True)
-    return base + ".zip"
-def save_video_bytes(content: bytes, content_type: str) -> str:
-    ext = (mimetypes.guess_extension(content_type) or ".mp4").lstrip(".")
-    path = os.path.join(OUT, fname("gen", ext))
-    with open(path,"wb") as f: f.write(content)
-    return path
-def call_backend(
     prompt: str,
-    image_bytes_path: str,
-    negative_prompt: Optional[str],
-    fps: int,
-    vlen: int,
-    steps: Optional[int],
-    seed: Optional[int]
-) -> str:
-    params = {
-        "prompt": prompt,
-        "frames_per_second": str(fps),
-        "video_length": str(vlen),
     }
-    if negative_prompt:           params["negative_prompt"]      = negative_prompt
-    if steps is not None:         params["num_inference_steps"]  = str(steps)
-    if seed is None:              seed = int(time.time())
-    params["seed"] = str(seed)
-    files = {"image_bytes": (os.path.basename(image_bytes_path), open(image_bytes_path,"rb"),
-                             "application/octet-stream")}
     try:
-        r = requests.post(ENDPOINT, params=params, files=files, headers={"accept":"application/json"}, timeout=600)
-    finally:
-        try: files["image_bytes"][1].close()
-        except: pass
-    if r.status_code != 200:
-        raise RuntimeError(f"Backend {r.status_code}: {r.text[:500]}")
-    ctype = r.headers.get("Content-Type","")
-    if ctype.startswith("video/"):   # raw video bytes
-        return save_video_bytes(r.content, ctype)
-    # expect JSON with { "video_url": ... }
     try:
-        payload = r.json()
-        url = payload.get("video_url")
-        if not url: raise ValueError("no video_url in response")
-        r2 = requests.get(url, stream=True, timeout=600)
-        if r2.status_code != 200: raise RuntimeError(f"fetch video {r2.status_code}")
-        path = os.path.join(OUT, fname("gen","mp4"))
-        with open(path,"wb") as f:
-            for chunk in r2.iter_content(1<<20):
-                if chunk: f.write(chunk)
-        return path
     except Exception:
-        # fallback if backend mislabels content
-        return save_video_bytes(r.content, ctype or "video/mp4")
-# ================= UI =================
-with gr.Blocks() as demo:
-    gr.Markdown("## Continuous Video — chain last frame → next first frame")
-    # state
-    used_videos   = gr.State([])      # list[str]
-    last_seed_img = gr.State(None)    # PNG path (becomes image_bytes)
-    current_video = gr.State(None)    # latest generated video
-    with gr.Row():
-        prompt = gr.Textbox(label="Prompt", placeholder="Describe your shot…", lines=2)
-    with gr.Row():
-        start_file = gr.File(
-            label="Initial start image or video (only for the very first clip)",
-            file_types=["image","video"]
-        )
-    with gr.Row():
-        negative = gr.Textbox(label="Negative prompt (optional)", placeholder="What to avoid…")
-    with gr.Row():
-        fps   = gr.Slider(1,60, value=24, step=1, label="Frames per second")
-        vlen  = gr.Slider(1,12, value=4,  step=1, label="Video length (seconds)")
-        steps = gr.Slider(1,100, value=30, step=1, label="Num inference steps (optional)")
-        seed  = gr.Number(label="Seed (optional)", precision=0)
-    video_out = gr.Video(label="Output")
-    status_md = gr.Markdown("")  # shows ✅ Saved… after Chain
     with gr.Row():
-        gen_btn  = gr.Button("Generate", variant="primary")
-        chain_btn= gr.Button("Chain (save & clear)")
-        dl_btn   = gr.Button("Download")
-    files_out = gr.Files(label="Downloads")
-    # ---- handlers ----
-    def on_generate(prompt_txt, start_file_obj, neg, fps_val, vlen_val, steps_val, seed_val, seed_img):
-        if not prompt_txt or not prompt_txt.strip():
-            raise gr.Error("Prompt is required.")
-        # choose image_bytes source: last stolen frame > uploaded start file
-        image_path = seed_img
-        if not image_path:
-            if not start_file_obj: raise gr.Error("First generation requires an initial image OR video.")
-            image_path = start_file_obj.name
-        try:
-            out = call_backend(
-                prompt=prompt_txt.strip(),
-                image_bytes_path=image_path,
-                negative_prompt=(neg.strip() if (neg and neg.strip()) else None),
-                fps=int(fps_val),
-                vlen=int(vlen_val),
-                steps=int(steps_val) if steps_val else None,
-                seed=int(seed_val) if (seed_val not in [None,""]) else None
             )
-        except Exception as e:
-            raise gr.Error(str(e))
-        return out, out, gr.update(value="")  # show video, set state, clear status
-    gen_btn.click(
-        on_generate,
-        inputs=[prompt, start_file, negative, fps, vlen, steps, seed, last_seed_img],
-        outputs=[video_out, current_video, status_md]
-    )
-    def on_chain(curr, used):
-        """
-        Save current video, steal last frame for next seed,
-        and CLEAR: preview, prompt, negative, start_file.
-        Also show a visible 'saved' banner.
-        """
-        if not curr or not os.path.exists(curr):
-            raise gr.Error("Generate a video first.")
-        new_used = used[:] if used else []
-        if curr not in new_used: new_used.append(curr)
-        # last frame for next seed
-        seed_img = extract_last_frame(curr)
-        saved_idx = len(new_used)
-        saved_msg = f"✅ **Saved clip #{saved_idx}** — last frame captured. Ready for your next prompt."
-        return (
-            new_used,            # used_videos
-            seed_img,            # last_seed_img (image_bytes for next gen)
-            gr.update(value=None), # CLEAR prompt
-            gr.update(value=None), # CLEAR negative prompt
-            gr.update(value=None), # CLEAR start_file
-            gr.update(value=None), # CLEAR video preview
-            gr.update(value=saved_msg)  # status banner
         )
-    chain_btn.click(
-        on_chain,
-        inputs=[current_video, used_videos],
-        outputs=[used_videos, last_seed_img, prompt, negative, start_file, video_out, status_md]
     )
-    def on_download(used):
-        if not used: raise gr.Error("Nothing to download. Click Chain after generating clips.")
-        files = []
-        try:
-            merged = concat_videos(used); files.append(merged)
-        except Exception as e:
-            print("concat error:", e)
-        try:
-            zipped = zip_used(used); files.append(zipped)
-        except Exception as e:
-            print("zip error:", e)
-        return files
-    dl_btn.click(on_download, inputs=[used_videos], outputs=[files_out])
 if __name__ == "__main__":
-    demo.launch()

+import os, io, time, base64, random, subprocess
 from typing import Optional, List
+from urllib.parse import urlencode
+import requests
+from PIL import Image
+import gradio as gr
+# -------- Modal inference endpoint (dev) --------
+INFERENCE_URL = "https://moonmath-ai-dev--moonmath-i2v-backend-moonmathinference-run.modal.run"
+# -------- settings --------
+MAX_SLOTS = 12          # max image slots user can reveal
+# -------- small helpers --------
+def _save_video_bytes(data: bytes, tag: str) -> str:
+    os.makedirs("/tmp", exist_ok=True)
+    path = f"/tmp/{tag}_{int(time.time())}.mp4"
+    with open(path, "wb") as f:
+        f.write(data)
+    return path
+def _png_bytes(img: Image.Image) -> bytes:
+    buf = io.BytesIO()
+    img.save(buf, format="PNG")
+    return buf.getvalue()
+def _download_to_bytes(url: str) -> bytes:
+    r = requests.get(url, timeout=180)
+    r.raise_for_status()
+    return r.content
+def stitch_call(
+    start_img: Image.Image,
+    end_img: Image.Image,
     prompt: str,
+    seed: Optional[int],
+    negative_prompt: Optional[str] = None,
+    frames_per_second: int = 24,
+    video_length: int = 4,
+    num_inference_steps: Optional[int] = None,
+) -> Optional[str]:
+    """
+    Required (in body): image_bytes (+ image_bytes_end)
+    In URL query: prompt, negative_prompt, frames_per_second, video_length, seed, num_inference_steps
+    """
+    if start_img is None or end_img is None:
+        return None
+    # default seed behavior
+    if seed in (None, 0, -1):
+        seed = random.randint(1, 2**31 - 1)
+    # Build query string
+    q = {
+        "prompt": prompt or "",
+        "seed": int(seed),
+        "frames_per_second": int(frames_per_second),
+        "video_length": int(video_length),
+    }
+    if negative_prompt:
+        q["negative_prompt"] = negative_prompt
+    if num_inference_steps is not None:
+        q["num_inference_steps"] = int(num_inference_steps)
+    url = f"{INFERENCE_URL}?{urlencode(q)}"
+    # Images go in the body
+    files = {
+        "image_bytes": ("start.png", _png_bytes(start_img), "image/png"),
+        "image_bytes_end": ("end.png", _png_bytes(end_img), "image/png"),
     }
+    headers = {"accept": "application/json"}
     try:
+        resp = requests.post(url, files=files, headers=headers, timeout=600)
+        ctype = (resp.headers.get("content-type") or "").lower()
+        # Raw video bytes
+        if "application/json" not in ctype:
+            resp.raise_for_status()
+            return _save_video_bytes(resp.content, "stitch")
+        # JSON with url or base64
+        data = resp.json()
+        video_url = data.get("video_url") or data.get("url") or data.get("result") or data.get("output")
+        if isinstance(video_url, str) and video_url.startswith(("http://", "https://")):
+            return _save_video_bytes(_download_to_bytes(video_url), "stitch")
+        video_b64 = data.get("video_b64") or data.get("videoBase64")
+        if isinstance(video_b64, str):
+            pad = (-len(video_b64)) % 4
+            if pad:
+                video_b64 += "=" * pad
+            return _save_video_bytes(base64.b64decode(video_b64), "stitch")
+    except Exception as e:
+        print("stitch_call error:", e)
+    return None
+# -------- FFmpeg-based concatenation (N clips) --------
+def concat_many(videos: List[str]) -> Optional[str]:
+    vids = [v for v in videos if v]
+    if len(vids) < 2:
+        return None
     try:
+        os.makedirs("/tmp", exist_ok=True)
+        out_path = f"/tmp/final_{int(time.time())}.mp4"
+        list_file = f"/tmp/list_{int(time.time())}.txt"
+        with open(list_file, "w") as f:
+            for v in vids:
+                f.write(f"file '{v}'\n")
+        subprocess.run(
+            ["ffmpeg", "-y", "-f", "concat", "-safe", "0", "-i", list_file, "-c", "copy", out_path],
+            check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE
+        )
+        return out_path
+    except Exception as e:
+        print("concat_many error:", e)
+        return None
+# -------- Timeline HTML renderer --------
+def render_timeline_html(paths: List[str]):
+    vids = [p for p in (paths or []) if p]
+    if not vids:
+        return "<div class='tl-grid tl-empty'>No clips yet. Generate and click ‘Add to timeline’.</div>"
+    items = []
+    for i, p in enumerate(vids, 1):
+        items.append(
+            f"""
+            <div class="tl-item">
+              <video src="{p}" controls playsinline></video>
+              <div class="tl-label">Clip {i}</div>
+            </div>
+            """
+        )
+    return f"<div class='tl-grid'>{''.join(items)}</div>"
+# =========================
+# Gradio callbacks / state ops
+# =========================
+def add_image_slot(visible_slots: int):
+    """Reveal one more upload slot (up to MAX_SLOTS)."""
+    return min(MAX_SLOTS, int(visible_slots) + 1)
+def _reveal_slots(n, *imgs):
+    """Update visibility of image upload components based on visible_slots state."""
+    n = int(n)
+    updates = []
+    for i in range(MAX_SLOTS):
+        updates.append(gr.update(visible=(i < n)))
+    return updates
+def collect_choices(*imgs):
+    """Build dropdown choices of available indices (1-based labels) based on non-empty slots."""
+    choices = []
+    for i, img in enumerate(imgs, start=1):
+        if img is not None:
+            choices.append(str(i))
+    return gr.update(choices=choices), gr.update(choices=choices)
+def stitch_selected(
+    prompt, negative_prompt, fps, length_sec, seed, start_idx_str, end_idx_str, *imgs
+):
+    """Run inference for selected start/end indices (1-based strings) + options."""
+    if not start_idx_str or not end_idx_str:
+        gr.Warning("Please select Start and End frames.")
+        return None
+    try:
+        s = int(start_idx_str) - 1
+        e = int(end_idx_str) - 1
     except Exception:
+        gr.Warning("Invalid Start/End selection.")
+        return None
+    if s < 0 or e < 0 or s >= len(imgs) or e >= len(imgs):
+        gr.Warning("Start/End out of range.")
+        return None
+    start_img = imgs[s]
+    end_img = imgs[e]
+    if start_img is None or end_img is None:
+        gr.Warning("Selected slots are empty.")
+        return None
+    fps_val = int(str(fps)) if fps else 24
+    len_val = int(str(length_sec)) if length_sec else 4
+    vid = stitch_call(
+        start_img=start_img,
+        end_img=end_img,
+        prompt=prompt or "",
+        seed=int(seed or 0),
+        negative_prompt=(negative_prompt or "").strip() or None,
+        frames_per_second=fps_val,
+        video_length=len_val,
+        num_inference_steps=None,
+    )
+    if not vid:
+        gr.Warning("Generation failed.")
+        return None
+    return vid  # path for preview
+def add_to_timeline(preview_path, timeline_paths: List[str]):
+    """Append preview to timeline; return updated state and HTML."""
+    tl = list(timeline_paths or [])
+    if not preview_path:
+        gr.Warning("Generate a clip first.")
+        return tl, gr.update(value=render_timeline_html(tl))
+    tl.append(preview_path)
+    return tl, gr.update(value=render_timeline_html(tl))
+def stitch_all_from_timeline(timeline_paths: List[str]):
+    vids = list(timeline_paths or [])
+    if len(vids) < 2:
+        gr.Warning("Add at least two clips to the timeline first.")
+        return None
+    out = concat_many(vids)
+    if not out:
+        gr.Warning("Failed to concatenate clips.")
+    return out
+# =========================
+# UI
+# =========================
+CSS = """
+.gradio-container { padding: 24px; }
+.pill button { border-radius: 999px !important; padding: 10px 18px; }
+.rounded textarea { border-radius: 16px !important; }
+.gallery-row { display:flex; gap:16px; overflow-x:auto; padding:8px 4px; }
+.gallery-row .gradio-image { min-width: 220px; }
+.tl-grid {
+  display: grid;
+  grid-template-columns: repeat(auto-fill, minmax(180px, 1fr));
+  gap: 12px;
+}
+.stitch-box {
+  background-color: #f0f4ff;   /* pick any color you like */
+  border-radius: 12px;
+  padding: 16px;
+}
+.tl-grid video {
+  width: 100%;
+  height: 120px;
+  object-fit: cover;
+  border-radius: 12px;
+  display: block;
+}
+.tl-label {
+  font-size: 12px;
+  color: #9aa0a6;
+  margin-top: 4px;
+  text-align: center;
+}
+.tl-empty { color: #9aa0a6; padding: 8px 4px; }
+"""
+with gr.Blocks(css=CSS, title="StitchTool") as demo:
+    gr.Markdown("## StitchTool")
+    # --- State ---
+    visible_slots = gr.State(value=3)      # number of visible image slots
+    timeline_state = gr.State(value=[])    # list[str] of video file paths (timeline)
+    # --- Image gallery (horizontal, grows on demand) ---
+    with gr.Row(elem_classes=["gallery-row"]):
+        img_comps = []
+        for i in range(MAX_SLOTS):
+            comp = gr.Image(label=f"Image {i+1} upload", type="pil", visible=(i < 3))
+            img_comps.append(comp)
+        add_btn = gr.Button("＋ Add image")
+    # clicking add → reveal one more slot
+    add_btn.click(
+        fn=add_image_slot,
+        inputs=[visible_slots],
+        outputs=[visible_slots],
+    )
+    # reflect visibility changes whenever visible_slots changes
+    visible_slots.change(
+        fn=_reveal_slots,
+        inputs=[visible_slots] + img_comps,
+        outputs=img_comps
+    )
+    # Seed + Start/End selection + Prompt + options + Stitch + Preview
+    seed = gr.Number(value=0, precision=0, label="Seed (0 = random)")
     with gr.Row():
+        # Left column: controls (with colored background via .stitch-box)
+        with gr.Column(scale=1, min_width=420, elem_classes=["stitch-box"]):
+            start_dd = gr.Dropdown(label="Start frame", choices=[], interactive=True)
+            end_dd = gr.Dropdown(label="End frame", choices=[], interactive=True)
+            prompt = gr.Textbox(
+                placeholder="Describe the transition between the selected start and end frames…",
+                lines=3,
+                label="Prompt",
+                elem_classes=["rounded"]
             )
+            negative = gr.Textbox(
+                placeholder="Optional: things to avoid (e.g., 'bad quality, extra fingers, etc.')",
+                lines=2,
+                label="Negative prompt",
+                elem_classes=["rounded"]
+            )
+            with gr.Row():
+                fps = gr.Dropdown(
+                    label="Frame rate",
+                    choices=["16", "24", "32"],
+                    value="24",
+                    interactive=True,
+                )
+                length_sec = gr.Dropdown(
+                    label="Video length (sec)",
+                    choices=["2", "4"],
+                    value="4",
+                    interactive=True,
+                )
+            run_btn = gr.Button("Generate", elem_classes=["pill"])
+            add_tl_btn = gr.Button("Add to timeline", elem_classes=["pill"])
+        # Right column: preview video
+        with gr.Column(scale=1, min_width=420):
+            preview = gr.Video(label="Video output", interactive=False)
+    # keep start/end dropdowns up to date based on which slots have images
+    for comp in img_comps:
+        comp.change(
+            fn=collect_choices,
+            inputs=img_comps,
+            outputs=[start_dd, end_dd]
         )
+    # stitch action → preview
+    run_btn.click(
+        fn=stitch_selected,
+        inputs=[prompt, negative, fps, length_sec, seed, start_dd, end_dd] + img_comps,
+        outputs=[preview]
     )
+    # --- Dynamic timeline (no placeholders) ---
+    with gr.Row():
+        timeline_html = gr.HTML(value=render_timeline_html([]))
+    add_tl_btn.click(
+        fn=add_to_timeline,
+        inputs=[preview, timeline_state],
+        outputs=[timeline_state, timeline_html]
+    )
+    # final stitch all (concatenate in order)
+    with gr.Row():
+        with gr.Column(scale=1, min_width=420):
+            stitch_all_btn = gr.Button("Stitch All", elem_classes=["pill"])
+        with gr.Column(scale=1, min_width=420):
+            final_vid = gr.Video(label="Stitched Video Output", interactive=False)
+    stitch_all_btn.click(
+        fn=stitch_all_from_timeline,
+        inputs=[timeline_state],
+        outputs=[final_vid]
+    )
 if __name__ == "__main__":
+    demo.queue().launch()