Spaces:

MoonMath-ai
/

StitchTool

Sleeping

App Files Files Community

Shalmoni commited on Sep 29, 2025

Commit

c576104

verified ·

1 Parent(s): cb2acd5

Update app.py

Browse files

Files changed (1) hide show

app.py +162 -342

app.py CHANGED Viewed

@@ -1,371 +1,191 @@
-import os, io, time, base64, random, subprocess
 from typing import Optional, List
-from urllib.parse import urlencode
-import requests
-from PIL import Image
-import gradio as gr
-# -------- Modal inference endpoint (dev) --------
-INFERENCE_URL = "https://moonmath-ai-dev--moonmath-i2v-backend-moonmathinference-run.modal.run"
-# -------- settings --------
-MAX_SLOTS = 12          # max image slots user can reveal
-# -------- small helpers --------
-def _save_video_bytes(data: bytes, tag: str) -> str:
-    os.makedirs("/tmp", exist_ok=True)
-    path = f"/tmp/{tag}_{int(time.time())}.mp4"
-    with open(path, "wb") as f:
-        f.write(data)
-    return path
-def _png_bytes(img: Image.Image) -> bytes:
-    buf = io.BytesIO()
-    img.save(buf, format="PNG")
-    return buf.getvalue()
-def _download_to_bytes(url: str) -> bytes:
-    r = requests.get(url, timeout=180)
-    r.raise_for_status()
-    return r.content
-def stitch_call(
-    start_img: Image.Image,
-    end_img: Image.Image,
     prompt: str,
-    seed: Optional[int],
-    negative_prompt: Optional[str] = None,
-    frames_per_second: int = 24,
-    video_length: int = 4,
-    num_inference_steps: Optional[int] = None,
-) -> Optional[str]:
-    """
-    Required (in body): image_bytes (+ image_bytes_end)
-    In URL query: prompt, negative_prompt, frames_per_second, video_length, seed, num_inference_steps
-    """
-    if start_img is None or end_img is None:
-        return None
-    # default seed behavior
-    if seed in (None, 0, -1):
-        seed = random.randint(1, 2**31 - 1)
-    # Build query string
-    q = {
-        "prompt": prompt or "",
-        "seed": int(seed),
-        "frames_per_second": int(frames_per_second),
-        "video_length": int(video_length),
     }
-    if negative_prompt:
-        q["negative_prompt"] = negative_prompt
-    if num_inference_steps is not None:
-        q["num_inference_steps"] = int(num_inference_steps)
-    url = f"{INFERENCE_URL}?{urlencode(q)}"
-    # Images go in the body
-    files = {
-        "image_bytes": ("start.png", _png_bytes(start_img), "image/png"),
-        "image_bytes_end": ("end.png", _png_bytes(end_img), "image/png"),
-    }
-    headers = {"accept": "application/json"}
     try:
-        resp = requests.post(url, files=files, headers=headers, timeout=600)
-        ctype = (resp.headers.get("content-type") or "").lower()
-        # Raw video bytes
-        if "application/json" not in ctype:
-            resp.raise_for_status()
-            return _save_video_bytes(resp.content, "stitch")
-        # JSON with url or base64
-        data = resp.json()
-        video_url = data.get("video_url") or data.get("url") or data.get("result") or data.get("output")
-        if isinstance(video_url, str) and video_url.startswith(("http://", "https://")):
-            return _save_video_bytes(_download_to_bytes(video_url), "stitch")
-        video_b64 = data.get("video_b64") or data.get("videoBase64")
-        if isinstance(video_b64, str):
-            pad = (-len(video_b64)) % 4
-            if pad:
-                video_b64 += "=" * pad
-            return _save_video_bytes(base64.b64decode(video_b64), "stitch")
-    except Exception as e:
-        print("stitch_call error:", e)
-    return None
-# -------- FFmpeg-based concatenation (N clips) --------
-def concat_many(videos: List[str]) -> Optional[str]:
-    vids = [v for v in videos if v]
-    if len(vids) < 2:
-        return None
     try:
-        os.makedirs("/tmp", exist_ok=True)
-        out_path = f"/tmp/final_{int(time.time())}.mp4"
-        list_file = f"/tmp/list_{int(time.time())}.txt"
-        with open(list_file, "w") as f:
-            for v in vids:
-                f.write(f"file '{v}'\n")
-        subprocess.run(
-            ["ffmpeg", "-y", "-f", "concat", "-safe", "0", "-i", list_file, "-c", "copy", out_path],
-            check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE
-        )
-        return out_path
-    except Exception as e:
-        print("concat_many error:", e)
-        return None
-# -------- Timeline HTML renderer --------
-def render_timeline_html(paths: List[str]):
-    vids = [p for p in (paths or []) if p]
-    if not vids:
-        return "<div class='tl-grid tl-empty'>No clips yet. Generate and click ‘Add to timeline’.</div>"
-    items = []
-    for i, p in enumerate(vids, 1):
-        items.append(
-            f"""
-            <div class="tl-item">
-              <video src="{p}" controls playsinline></video>
-              <div class="tl-label">Clip {i}</div>
-            </div>
-            """
-        )
-    return f"<div class='tl-grid'>{''.join(items)}</div>"
-# =========================
-# Gradio callbacks / state ops
-# =========================
-def add_image_slot(visible_slots: int):
-    """Reveal one more upload slot (up to MAX_SLOTS)."""
-    return min(MAX_SLOTS, int(visible_slots) + 1)
-def _reveal_slots(n, *imgs):
-    """Update visibility of image upload components based on visible_slots state."""
-    n = int(n)
-    updates = []
-    for i in range(MAX_SLOTS):
-        updates.append(gr.update(visible=(i < n)))
-    return updates
-def collect_choices(*imgs):
-    """Build dropdown choices of available indices (1-based labels) based on non-empty slots."""
-    choices = []
-    for i, img in enumerate(imgs, start=1):
-        if img is not None:
-            choices.append(str(i))
-    return gr.update(choices=choices), gr.update(choices=choices)
-def stitch_selected(
-    prompt, negative_prompt, fps, length_sec, seed, start_idx_str, end_idx_str, *imgs
-):
-    """Run inference for selected start/end indices (1-based strings) + options."""
-    if not start_idx_str or not end_idx_str:
-        gr.Warning("Please select Start and End frames.")
-        return None
-    try:
-        s = int(start_idx_str) - 1
-        e = int(end_idx_str) - 1
     except Exception:
-        gr.Warning("Invalid Start/End selection.")
-        return None
-    if s < 0 or e < 0 or s >= len(imgs) or e >= len(imgs):
-        gr.Warning("Start/End out of range.")
-        return None
-    start_img = imgs[s]
-    end_img = imgs[e]
-    if start_img is None or end_img is None:
-        gr.Warning("Selected slots are empty.")
-        return None
-    fps_val = int(str(fps)) if fps else 24
-    len_val = int(str(length_sec)) if length_sec else 4
-    vid = stitch_call(
-        start_img=start_img,
-        end_img=end_img,
-        prompt=prompt or "",
-        seed=int(seed or 0),
-        negative_prompt=(negative_prompt or "").strip() or None,
-        frames_per_second=fps_val,
-        video_length=len_val,
-        num_inference_steps=None,
-    )
-    if not vid:
-        gr.Warning("Generation failed.")
-        return None
-    return vid  # path for preview
-def add_to_timeline(preview_path, timeline_paths: List[str]):
-    """Append preview to timeline; return updated state and HTML."""
-    tl = list(timeline_paths or [])
-    if not preview_path:
-        gr.Warning("Generate a clip first.")
-        return tl, gr.update(value=render_timeline_html(tl))
-    tl.append(preview_path)
-    return tl, gr.update(value=render_timeline_html(tl))
-def stitch_all_from_timeline(timeline_paths: List[str]):
-    vids = list(timeline_paths or [])
-    if len(vids) < 2:
-        gr.Warning("Add at least two clips to the timeline first.")
-        return None
-    out = concat_many(vids)
-    if not out:
-        gr.Warning("Failed to concatenate clips.")
-    return out
-# =========================
-# UI
-# =========================
-CSS = """
-.gradio-container { padding: 24px; }
-.pill button { border-radius: 999px !important; padding: 10px 18px; }
-.rounded textarea { border-radius: 16px !important; }
-.gallery-row { display:flex; gap:16px; overflow-x:auto; padding:8px 4px; }
-.gallery-row .gradio-image { min-width: 220px; }
-.tl-grid {
-  display: grid;
-  grid-template-columns: repeat(auto-fill, minmax(180px, 1fr));
-  gap: 12px;
-}
-.stitch-box {
-  background-color: #f0f4ff;   /* pick any color you like */
-  border-radius: 12px;
-  padding: 16px;
-}
-.tl-grid video {
-  width: 100%;
-  height: 120px;
-  object-fit: cover;
-  border-radius: 12px;
-  display: block;
-}
-.tl-label {
-  font-size: 12px;
-  color: #9aa0a6;
-  margin-top: 4px;
-  text-align: center;
-}
-.tl-empty { color: #9aa0a6; padding: 8px 4px; }
-"""
-with gr.Blocks(css=CSS, title="StitchTool") as demo:
-    gr.Markdown("## StitchTool")
-    # --- State ---
-    visible_slots = gr.State(value=3)      # number of visible image slots
-    timeline_state = gr.State(value=[])    # list[str] of video file paths (timeline)
-    # --- Image gallery (horizontal, grows on demand) ---
-    with gr.Row(elem_classes=["gallery-row"]):
-        img_comps = []
-        for i in range(MAX_SLOTS):
-            comp = gr.Image(label=f"Image {i+1} upload", type="pil", visible=(i < 3))
-            img_comps.append(comp)
-        add_btn = gr.Button("＋ Add image")
-    # clicking add → reveal one more slot
-    add_btn.click(
-        fn=add_image_slot,
-        inputs=[visible_slots],
-        outputs=[visible_slots],
-    )
-    # reflect visibility changes whenever visible_slots changes
-    visible_slots.change(
-        fn=_reveal_slots,
-        inputs=[visible_slots] + img_comps,
-        outputs=img_comps
-    )
-    # Seed + Start/End selection + Prompt + options + Stitch + Preview
-    seed = gr.Number(value=0, precision=0, label="Seed (0 = random)")
     with gr.Row():
-        # Left column: controls (with colored background via .stitch-box)
-        with gr.Column(scale=1, min_width=420, elem_classes=["stitch-box"]):
-            start_dd = gr.Dropdown(label="Start frame", choices=[], interactive=True)
-            end_dd = gr.Dropdown(label="End frame", choices=[], interactive=True)
-            prompt = gr.Textbox(
-                placeholder="Describe the transition between the selected start and end frames…",
-                lines=3,
-                label="Prompt",
-                elem_classes=["rounded"]
-            )
-            negative = gr.Textbox(
-                placeholder="Optional: things to avoid (e.g., 'bad quality, extra fingers, etc.')",
-                lines=2,
-                label="Negative prompt",
-                elem_classes=["rounded"]
             )
-            with gr.Row():
-                fps = gr.Dropdown(
-                    label="Frame rate",
-                    choices=["16", "24", "32"],
-                    value="24",
-                    interactive=True,
-                )
-                length_sec = gr.Dropdown(
-                    label="Video length (sec)",
-                    choices=["2", "4"],
-                    value="4",
-                    interactive=True,
-                )
-            run_btn = gr.Button("Generate", elem_classes=["pill"])
-            add_tl_btn = gr.Button("Add to timeline", elem_classes=["pill"])
-        # Right column: preview video
-        with gr.Column(scale=1, min_width=420):
-            preview = gr.Video(label="Video output", interactive=False)
-    # keep start/end dropdowns up to date based on which slots have images
-    for comp in img_comps:
-        comp.change(
-            fn=collect_choices,
-            inputs=img_comps,
-            outputs=[start_dd, end_dd]
-        )
-    # stitch action → preview
-    run_btn.click(
-        fn=stitch_selected,
-        inputs=[prompt, negative, fps, length_sec, seed, start_dd, end_dd] + img_comps,
-        outputs=[preview]
     )
-    # --- Dynamic timeline (no placeholders) ---
-    with gr.Row():
-        timeline_html = gr.HTML(value=render_timeline_html([]))
-    add_tl_btn.click(
-        fn=add_to_timeline,
-        inputs=[preview, timeline_state],
-        outputs=[timeline_state, timeline_html]
     )
-    # final stitch all (concatenate in order)
-    with gr.Row():
-        with gr.Column(scale=1, min_width=420):
-            stitch_all_btn = gr.Button("Stitch All", elem_classes=["pill"])
-        with gr.Column(scale=1, min_width=420):
-            final_vid = gr.Video(label="Stitched Video Output", interactive=False)
-    stitch_all_btn.click(
-        fn=stitch_all_from_timeline,
-        inputs=[timeline_state],
-        outputs=[final_vid]
-    )
 if __name__ == "__main__":
-    demo.queue().launch()

+import os, uuid, time, json, shutil, mimetypes, subprocess, requests, gradio as gr
+from datetime import datetime
 from typing import Optional, List
+# -------- config --------
+ENDPOINT = "https://moonmath-ai-dev--moonmath-i2v-backend-moonmathinference-run.modal.run"
+FFMPEG = "ffmpeg"
+OUT, TMP = "outputs", "tmp"
+os.makedirs(OUT, exist_ok=True); os.makedirs(TMP, exist_ok=True)
+ts     = lambda: datetime.utcnow().strftime("%Y%m%d_%H%M%S")
+fname  = lambda p,e: f"{p}_{ts()}_{uuid.uuid4().hex[:6]}.{e}"
+abspath= lambda p: os.path.abspath(p)
+def run_ffmpeg(args: List[str]):
+    p = subprocess.run([FFMPEG]+args, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+    if p.returncode:
+        raise RuntimeError(p.stderr.decode(errors="ignore"))
+def extract_last_frame(video_path: str) -> str:
+    out = os.path.join(TMP, fname("last", "png"))
+    run_ffmpeg(["-sseof","-1","-i",video_path,"-frames:v","1","-q:v","2",out])
+    return out
+def concat_videos(paths: List[str]) -> str:
+    if not paths: raise ValueError("No videos selected.")
+    if len(paths)==1:
+        dst = os.path.join(OUT, fname("continuous","mp4")); shutil.copy(paths[0], dst); return dst
+    listfile = os.path.join(TMP, f"concat_{uuid.uuid4().hex}.txt")
+    with open(listfile,"w") as f:
+        for p in paths: f.write(f"file '{abspath(p)}'\n")
+    out = os.path.join(OUT, fname("continuous","mp4"))
+    run_ffmpeg(["-f","concat","-safe","0","-i",listfile,"-c","copy",out])
+    os.remove(listfile)
+    return out
+def zip_used(paths: List[str]) -> str:
+    pack = os.path.join(TMP, f"pack_{uuid.uuid4().hex[:6]}")
+    os.makedirs(pack, exist_ok=True)
+    for p in paths: shutil.copy(p, pack)
+    base = os.path.join(OUT, f"used_{ts()}")
+    shutil.make_archive(base, "zip", pack)
+    shutil.rmtree(pack, ignore_errors=True)
+    return base + ".zip"
+def save_video_bytes(content: bytes, content_type: str) -> str:
+    ext = (mimetypes.guess_extension(content_type) or ".mp4").lstrip(".")
+    path = os.path.join(OUT, fname("gen", ext))
+    with open(path,"wb") as f: f.write(content)
+    return path
+def call_backend(
     prompt: str,
+    image_bytes_path: str,
+    negative_prompt: Optional[str],
+    fps: int,
+    vlen: int,
+    steps: Optional[int],
+    seed: Optional[int]
+) -> str:
+    params = {
+        "prompt": prompt,
+        "frames_per_second": str(fps),
+        "video_length": str(vlen),
     }
+    if negative_prompt:           params["negative_prompt"]      = negative_prompt
+    if steps is not None:         params["num_inference_steps"]  = str(steps)
+    if seed is None:              seed = int(time.time())
+    params["seed"] = str(seed)
+    files = {"image_bytes": (os.path.basename(image_bytes_path), open(image_bytes_path,"rb"),
+                             "application/octet-stream")}
     try:
+        r = requests.post(ENDPOINT, params=params, files=files, headers={"accept":"application/json"}, timeout=600)
+    finally:
+        try: files["image_bytes"][1].close()
+        except: pass
+    if r.status_code != 200:
+        raise RuntimeError(f"Backend {r.status_code}: {r.text[:500]}")
+    ctype = r.headers.get("Content-Type","")
+    if ctype.startswith("video/"):   # raw video bytes
+        return save_video_bytes(r.content, ctype)
+    # expect JSON with { "video_url": ... } or direct mp4 URL
     try:
+        payload = r.json()
+        url = payload.get("video_url")
+        if not url: raise ValueError("no video_url in response")
+        r2 = requests.get(url, stream=True, timeout=600)
+        if r2.status_code != 200: raise RuntimeError(f"fetch video {r2.status_code}")
+        path = os.path.join(OUT, fname("gen","mp4"))
+        with open(path,"wb") as f:
+            for chunk in r2.iter_content(1<<20):
+                if chunk: f.write(chunk)
+        return path
     except Exception:
+        # if backend returns direct bytes but mislabeled JSON, fallback
+        return save_video_bytes(r.content, ctype or "video/mp4")
+with gr.Blocks() as demo:
+    gr.Markdown("## Continuous Video — chain last frame → next first frame")
+    # state
+    used_videos   = gr.State([])      # list[str]
+    last_seed_img = gr.State(None)    # str path (PNG) to send as image_bytes
+    current_video = gr.State(None)    # str path
     with gr.Row():
+        prompt = gr.Textbox(label="Prompt", placeholder="Describe your shot…", lines=2)
+    with gr.Row():
+        start_file = gr.File(label="Initial start image or video (only needed for the very first clip)", file_types=["image","video"])
+    with gr.Row():
+        negative = gr.Textbox(label="Negative prompt (optional)", placeholder="What to avoid…")
+    with gr.Row():
+        fps   = gr.Slider(1,60, value=24, step=1, label="Frames per second")
+        vlen  = gr.Slider(1,12, value=4,  step=1, label="Video length (seconds)")
+        steps = gr.Slider(1,100, value=30, step=1, label="Num inference steps (optional)", interactive=True)
+        seed  = gr.Number(label="Seed (optional)", precision=0, value=None)
+    video_out = gr.Video(label="Output")
+    with gr.Row():
+        gen_btn  = gr.Button("Generate", variant="primary")
+        use_btn  = gr.Button("Chain")
+        dl_btn   = gr.Button("Download")
+    files_out = gr.Files(label="Downloads")
+    def on_generate(prompt_txt, start_file_obj, neg, fps_val, vlen_val, steps_val, seed_val, seed_img):
+        if not prompt_txt or not prompt_txt.strip():
+            raise gr.Error("Prompt is required.")
+        # choose image_bytes source: last stolen frame > uploaded start file
+        image_path = seed_img
+        if not image_path:
+            if not start_file_obj: raise gr.Error("First generation requires an initial image OR video.")
+            image_path = start_file_obj.name
+        try:
+            out = call_backend(
+                prompt=prompt_txt.strip(),
+                image_bytes_path=image_path,
+                negative_prompt=(neg.strip() if (neg and neg.strip()) else None),
+                fps=int(fps_val),
+                vlen=int(vlen_val),
+                steps=int(steps_val) if steps_val else None,
+                seed=int(seed_val) if (seed_val not in [None,""]) else None
             )
+        except Exception as e:
+            raise gr.Error(str(e))
+        return out, out  # preview path, current_video state
+    gen_btn.click(
+        on_generate,
+        inputs=[prompt, start_file, negative, fps, vlen, steps, seed, last_seed_img],
+        outputs=[video_out, current_video]
     )
+    def on_use(curr, used):
+        if not curr or not os.path.exists(curr):
+            raise gr.Error("Generate a video first.")
+        if curr not in used: used = used + [curr]
+        try:
+            seed_img = extract_last_frame(curr)
+        except Exception as e:
+            raise gr.Error(f"Could not extract last frame: {e}")
+        return used, seed_img
+    use_btn.click(
+        on_use,
+        inputs=[current_video, used_videos],
+        outputs=[used_videos, last_seed_img]
     )
+    def on_download(used):
+        if not used: raise gr.Error("Nothing to download. Click Use after generating clips.")
+        files = []
+        try:
+            merged = concat_videos(used); files.append(merged)
+        except Exception as e:
+            # still allow ZIP even if concat fails
+            print("concat error:", e)
+        try:
+            zipped = zip_used(used); files.append(zipped)
+        except Exception as e:
+            print("zip error:", e)
+        return files
+    dl_btn.click(on_download, inputs=[used_videos], outputs=[files_out])
 if __name__ == "__main__":
+    demo.launch()