Spaces:

MoonMath-ai
/

Prompt-2-Video

Running on Zero

App Files Files Community

Shalmoni commited on Oct 15

Commit

385a076

verified ·

1 Parent(s): f234a17

Update app.py

Browse files

Files changed (1) hide show

app.py +162 -11

app.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# app.py — FLUX-only with temporal chaining (5s later by default) + Aggressive follow option
 import os, json, uuid, re
 from datetime import datetime
 import gradio as gr
@@ -7,6 +7,11 @@ import torch
 from PIL import Image
 import pandas as pd
 # =========================
 # Storage helpers
 # =========================
@@ -48,6 +53,7 @@ def ensure_project(p, suggested_name="Project"):
     save_project(proj)
     return proj
 # =========================
 # LLM (ZeroGPU) — Storyboard generator (robust)
 # =========================
@@ -105,7 +111,6 @@ def _prompt_with_tags(user_prompt: str, n_shots: int, default_fps: int, default_
         "Output must start with <JSON> and end with </JSON>.\n"
     )
 def _prompt_minimal(user_prompt: str, n_shots: int, default_fps: int, default_len: int) -> str:
     return (
         "Reply ONLY with a JSON array starting with '[' and ending with ']'. No extra text.\n"
@@ -122,7 +127,7 @@ def _prompt_minimal(user_prompt: str, n_shots: int, default_fps: int, default_le
         '  "negative": ""\n'
         "}\n"
     )
 def _apply_chat(tok, system_msg: str, user_msg: str) -> str:
     if hasattr(tok, "apply_chat_template"):
         return tok.apply_chat_template(
@@ -151,7 +156,7 @@ def _generate_text(model, tok, prompt_text: str) -> str:
     continuation_ids = gen[0][prompt_len:]
     text = tok.decode(continuation_ids, skip_special_tokens=True).strip()
     if text.startswith("```"):
-        text = re.sub(r"^```(?:json)?\s*|\s*```$", "", text, flags=re.IGNORECASE|re.DOTALL).strip()
     return text
 def _extract_json_array(text: str) -> str:
@@ -240,6 +245,7 @@ def generate_storyboard_with_llm(user_prompt: str, n_shots: int, default_fps: in
     return _normalize_shots(shots_raw, default_fps, default_len)
 # =========================
 # IMAGE GEN — FLUX only (no fallback) + Temporal chaining
 # =========================
@@ -282,8 +288,9 @@ def _save_keyframe(pid: str, shot_id: int, img: Image.Image) -> str:
     img.save(out)
     return out
 # ---- Temporal prompt composer (PRIORITIZE the new shot) ----
-def _compose_temporal_prompt(shots: list, idx: int, seconds_forward: int = 5) -> tuple[str, str]:
     """
     Build a prompt that continues the scene N seconds later,
     prioritizing the NEW shot description (composition/action),
@@ -302,9 +309,9 @@ def _compose_temporal_prompt(shots: list, idx: int, seconds_forward: int = 5) ->
     composed = (
         f"Continue the same scene {seconds_forward} seconds later.\n"
-        f"PRIORITIZE this new moment and its composition now: \"{curr_desc}\".\n"
-        f"Keep continuity ONLY for subject identity, lighting palette, time of day, and general environment style.\n"
-        f"Previous frame (context only, do not copy its framing): \"{prev_desc}\".\n"
         f"Avoid replicating the previous composition; allow camera move / subject reposition consistent with {seconds_forward} seconds of natural progression."
     ).strip()
@@ -317,6 +324,7 @@ def _compose_temporal_prompt(shots: list, idx: int, seconds_forward: int = 5) ->
     return composed, negative
 @spaces.GPU(duration=180)
 def generate_keyframe_image(
     pid: str,
@@ -393,6 +401,96 @@ def generate_keyframe_image(
     saved_path = _save_keyframe(pid, int(shots[shot_idx]["id"]), out)
     return saved_path
 # =========================
 # Shots <-> DataFrame utils
 # =========================
@@ -418,11 +516,12 @@ def df_to_shots(df: pd.DataFrame) -> list:
         })
     return sorted(out, key=lambda x: x["id"])
 # =========================
 # Gradio UI
 # =========================
 with gr.Blocks() as demo:
-    gr.Markdown("# 🎬 Storyboard → Keyframes → (Videos soon) → Export")
     gr.Markdown(
         "Edit storyboard prompts, then generate keyframes.\n"
         "**Temporal chaining**: each new shot is generated N seconds later from the previous approved frame, "
@@ -488,8 +587,15 @@ with gr.Blocks() as demo:
             kf_status = gr.Markdown("")
         with gr.Tab("Videos"):
-            gr.Markdown("### 3) Videos (coming next)")
-            vd_table = gr.JSON(label="Planned clip edges (read-only for now)")
         with gr.Tab("Export"):
             gr.Markdown("### 4) Export (coming next)")
@@ -634,6 +740,51 @@ with gr.Blocks() as demo:
     approve_next_btn.click(on_approve_next, inputs=[project, current_idx, prompt_box, out_img], outputs=[project, current_idx, shot_info_md, prompt_box, prev_img, out_img, kf_status])
     def on_save(p):
         if p is None:
             raise gr.Error("No project in memory.")

+# app.py — FLUX-only with temporal chaining + Aggressive follow + Video stitching
 import os, json, uuid, re
 from datetime import datetime
 import gradio as gr
 from PIL import Image
 import pandas as pd
+# MoviePy for stitching
+from moviepy.editor import ImageClip, CompositeVideoClip, concatenate_videoclips
+from moviepy.video.io.VideoFileClip import VideoFileClip
 # =========================
 # Storage helpers
 # =========================
     save_project(proj)
     return proj
 # =========================
 # LLM (ZeroGPU) — Storyboard generator (robust)
 # =========================
         "Output must start with <JSON> and end with </JSON>.\n"
     )
 def _prompt_minimal(user_prompt: str, n_shots: int, default_fps: int, default_len: int) -> str:
     return (
         "Reply ONLY with a JSON array starting with '[' and ending with ']'. No extra text.\n"
         '  "negative": ""\n'
         "}\n"
     )
 def _apply_chat(tok, system_msg: str, user_msg: str) -> str:
     if hasattr(tok, "apply_chat_template"):
         return tok.apply_chat_template(
     continuation_ids = gen[0][prompt_len:]
     text = tok.decode(continuation_ids, skip_special_tokens=True).strip()
     if text.startswith("```"):
+        text = re.sub(r"^```(?:json)?\s*|\s*```$", "", text, flags=re.IGNORECASE | re.DOTALL).strip()
     return text
 def _extract_json_array(text: str) -> str:
     return _normalize_shots(shots_raw, default_fps, default_len)
 # =========================
 # IMAGE GEN — FLUX only (no fallback) + Temporal chaining
 # =========================
     img.save(out)
     return out
 # ---- Temporal prompt composer (PRIORITIZE the new shot) ----
+def _compose_temporal_prompt(shots: list, idx: int, seconds_forward: int = 5):
     """
     Build a prompt that continues the scene N seconds later,
     prioritizing the NEW shot description (composition/action),
     composed = (
         f"Continue the same scene {seconds_forward} seconds later.\n"
+        f'PRIORITIZE this new moment and its composition now: "{curr_desc}".\n'
+        "Keep continuity ONLY for subject identity, lighting palette, time of day, and general environment style.\n"
+        f'Previous frame (context only, do not copy its framing): "{prev_desc}".\n'
         f"Avoid replicating the previous composition; allow camera move / subject reposition consistent with {seconds_forward} seconds of natural progression."
     ).strip()
     return composed, negative
 @spaces.GPU(duration=180)
 def generate_keyframe_image(
     pid: str,
     saved_path = _save_keyframe(pid, int(shots[shot_idx]["id"]), out)
     return saved_path
+# =========================
+# Video stitching helpers (pairwise dissolve + final concat)
+# =========================
+def _pair_clip_path(pid: str, i: int, j: int) -> str:
+    return os.path.join(project_dir(pid), "clips", f"pair_{i:02d}_to_{j:02d}.mp4")
+def _final_stitched_path(pid: str) -> str:
+    return os.path.join(project_dir(pid), "clips", "final_stitched.mp4")
+def _image_size(path: str):
+    with Image.open(path) as im:
+        return im.width, im.height
+def _build_pair_clip(img_a: str, img_b: str, out_path: str, fps: int = 24, hold: float = 0.5, crossfade: float = 0.7, resize_to=None):
+    """
+    Create a dissolve transition from img_a -> img_b:
+      - show img_a for `hold` seconds
+      - dissolve for `crossfade` seconds into img_b
+      - hold img_b for `hold` seconds
+    """
+    ca = ImageClip(img_a).set_duration(hold + crossfade)
+    cb = ImageClip(img_b).set_duration(hold + crossfade).set_start(hold)
+    if resize_to:
+        ca = ca.resize(newsize=resize_to)
+        cb = cb.resize(newsize=resize_to)
+    ca_x = ca.crossfadeout(crossfade)
+    cb_x = cb.crossfadein(crossfade)
+    total = hold + crossfade + hold
+    comp = CompositeVideoClip([ca_x, cb_x]).set_duration(total)
+    comp.write_videofile(
+        out_path,
+        fps=fps,
+        codec="libx264",
+        audio=False,
+        preset="medium",
+        threads=os.cpu_count() or 2,
+        verbose=False,
+        logger=None
+    )
+    comp.close(); ca.close(); cb.close()
+def _build_all_pair_clips(pid: str, shots: list, fps: int = 24, hold: float = 0.5, crossfade: float = 0.7, force_size=None):
+    paths = []
+    base_size = None
+    if not force_size:
+        for s in shots:
+            p = s.get("image_path")
+            if p and os.path.exists(p):
+                base_size = _image_size(p)
+                break
+    size = force_size or base_size
+    for i in range(len(shots)-1):
+        a = shots[i].get("image_path")
+        b = shots[i+1].get("image_path")
+        if not (a and b and os.path.exists(a) and os.path.exists(b)):
+            continue
+        outp = _pair_clip_path(pid, shots[i]["id"], shots[i+1]["id"])
+        _build_pair_clip(a, b, outp, fps=fps, hold=hold, crossfade=crossfade, resize_to=size)
+        paths.append(outp)
+    return paths
+def _build_final_stitched_from_pairs(pair_paths: list, out_path: str, fps: int = 24):
+    if not pair_paths:
+        raise RuntimeError("No pair clips to stitch.")
+    clips = []
+    for p in pair_paths:
+        if os.path.exists(p):
+            clips.append(VideoFileClip(p))
+    if not clips:
+        raise RuntimeError("No readable pair clips on disk.")
+    final = concatenate_videoclips(clips, method="compose")
+    final.write_videofile(
+        out_path,
+        fps=fps,
+        codec="libx264",
+        audio=False,
+        preset="medium",
+        threads=os.cpu_count() or 2,
+        verbose=False,
+        logger=None
+    )
+    final.close()
+    for c in clips: c.close()
 # =========================
 # Shots <-> DataFrame utils
 # =========================
         })
     return sorted(out, key=lambda x: x["id"])
 # =========================
 # Gradio UI
 # =========================
 with gr.Blocks() as demo:
+    gr.Markdown("# 🎬 Storyboard → Keyframes → Videos → Export")
     gr.Markdown(
         "Edit storyboard prompts, then generate keyframes.\n"
         "**Temporal chaining**: each new shot is generated N seconds later from the previous approved frame, "
             kf_status = gr.Markdown("")
         with gr.Tab("Videos"):
+            gr.Markdown("### 3) Videos")
+            with gr.Row():
+                v_fps = gr.Slider(8, 60, value=24, step=1, label="FPS")
+                v_hold = gr.Slider(0.0, 2.0, value=0.5, step=0.1, label="Hold per still (s)")
+                v_xfade = gr.Slider(0.0, 2.0, value=0.7, step=0.1, label="Crossfade (s)")
+            with gr.Row():
+                build_pairs_btn = gr.Button("Build pair clips (A→B, B→C, ...)", variant="primary")
+                build_final_btn = gr.Button("Build final stitched video", variant="secondary")
+            vd_table = gr.JSON(label="Rendered outputs (paths)")
         with gr.Tab("Export"):
             gr.Markdown("### 4) Export (coming next)")
     approve_next_btn.click(on_approve_next, inputs=[project, current_idx, prompt_box, out_img], outputs=[project, current_idx, shot_info_md, prompt_box, prev_img, out_img, kf_status])
+    # ---- Videos tab handlers
+    def on_build_pairs(p, fps, hold, xfade):
+        if p is None:
+            raise gr.Error("No project.")
+        shots = p.get("shots", [])
+        if len(shots) < 2:
+            raise gr.Error("Need at least 2 approved images to build pair clips.")
+        if not any(s.get("image_path") for s in shots):
+            raise gr.Error("No approved images yet. Approve keyframes first.")
+        pair_paths = _build_all_pair_clips(
+            p["meta"]["id"], shots,
+            fps=int(fps), hold=float(hold), crossfade=float(xfade),
+            force_size=None  # or (640, 640) to force uniform size
+        )
+        if not pair_paths:
+            raise gr.Error("Could not find any consecutive pairs with images.")
+        return {"pair_clips": pair_paths, "final": None}
+    build_pairs_btn.click(
+        on_build_pairs,
+        inputs=[project, v_fps, v_hold, v_xfade],
+        outputs=[vd_table]
+    )
+    def on_build_final(p, fps):
+        if p is None:
+            raise gr.Error("No project.")
+        pid = p["meta"]["id"]
+        clips_dir = os.path.join(project_dir(pid), "clips")
+        pair_paths = sorted(
+            [os.path.join(clips_dir, f) for f in os.listdir(clips_dir) if f.startswith("pair_") and f.endswith(".mp4")]
+        )
+        if not pair_paths:
+            raise gr.Error("No pair clips found. Click 'Build pair clips' first.")
+        outp = _final_stitched_path(pid)
+        _build_final_stitched_from_pairs(pair_paths, outp, fps=int(fps))
+        return {"pair_clips": pair_paths, "final": outp}
+    build_final_btn.click(
+        on_build_final,
+        inputs=[project, v_fps],
+        outputs=[vd_table]
+    )
     def on_save(p):
         if p is None:
             raise gr.Error("No project in memory.")