Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
| 1 |
-
# app.py — FLUX-only with temporal chaining
|
| 2 |
import os, json, uuid, re
|
| 3 |
from datetime import datetime
|
| 4 |
import gradio as gr
|
|
@@ -7,6 +7,11 @@ import torch
|
|
| 7 |
from PIL import Image
|
| 8 |
import pandas as pd
|
| 9 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 10 |
# =========================
|
| 11 |
# Storage helpers
|
| 12 |
# =========================
|
|
@@ -48,6 +53,7 @@ def ensure_project(p, suggested_name="Project"):
|
|
| 48 |
save_project(proj)
|
| 49 |
return proj
|
| 50 |
|
|
|
|
| 51 |
# =========================
|
| 52 |
# LLM (ZeroGPU) — Storyboard generator (robust)
|
| 53 |
# =========================
|
|
@@ -105,7 +111,6 @@ def _prompt_with_tags(user_prompt: str, n_shots: int, default_fps: int, default_
|
|
| 105 |
"Output must start with <JSON> and end with </JSON>.\n"
|
| 106 |
)
|
| 107 |
|
| 108 |
-
|
| 109 |
def _prompt_minimal(user_prompt: str, n_shots: int, default_fps: int, default_len: int) -> str:
|
| 110 |
return (
|
| 111 |
"Reply ONLY with a JSON array starting with '[' and ending with ']'. No extra text.\n"
|
|
@@ -122,7 +127,7 @@ def _prompt_minimal(user_prompt: str, n_shots: int, default_fps: int, default_le
|
|
| 122 |
' "negative": ""\n'
|
| 123 |
"}\n"
|
| 124 |
)
|
| 125 |
-
|
| 126 |
def _apply_chat(tok, system_msg: str, user_msg: str) -> str:
|
| 127 |
if hasattr(tok, "apply_chat_template"):
|
| 128 |
return tok.apply_chat_template(
|
|
@@ -151,7 +156,7 @@ def _generate_text(model, tok, prompt_text: str) -> str:
|
|
| 151 |
continuation_ids = gen[0][prompt_len:]
|
| 152 |
text = tok.decode(continuation_ids, skip_special_tokens=True).strip()
|
| 153 |
if text.startswith("```"):
|
| 154 |
-
text = re.sub(r"^```(?:json)?\s*|\s*```$", "", text, flags=re.IGNORECASE|re.DOTALL).strip()
|
| 155 |
return text
|
| 156 |
|
| 157 |
def _extract_json_array(text: str) -> str:
|
|
@@ -240,6 +245,7 @@ def generate_storyboard_with_llm(user_prompt: str, n_shots: int, default_fps: in
|
|
| 240 |
|
| 241 |
return _normalize_shots(shots_raw, default_fps, default_len)
|
| 242 |
|
|
|
|
| 243 |
# =========================
|
| 244 |
# IMAGE GEN — FLUX only (no fallback) + Temporal chaining
|
| 245 |
# =========================
|
|
@@ -282,8 +288,9 @@ def _save_keyframe(pid: str, shot_id: int, img: Image.Image) -> str:
|
|
| 282 |
img.save(out)
|
| 283 |
return out
|
| 284 |
|
|
|
|
| 285 |
# ---- Temporal prompt composer (PRIORITIZE the new shot) ----
|
| 286 |
-
def _compose_temporal_prompt(shots: list, idx: int, seconds_forward: int = 5)
|
| 287 |
"""
|
| 288 |
Build a prompt that continues the scene N seconds later,
|
| 289 |
prioritizing the NEW shot description (composition/action),
|
|
@@ -302,9 +309,9 @@ def _compose_temporal_prompt(shots: list, idx: int, seconds_forward: int = 5) ->
|
|
| 302 |
|
| 303 |
composed = (
|
| 304 |
f"Continue the same scene {seconds_forward} seconds later.\n"
|
| 305 |
-
f
|
| 306 |
-
|
| 307 |
-
f
|
| 308 |
f"Avoid replicating the previous composition; allow camera move / subject reposition consistent with {seconds_forward} seconds of natural progression."
|
| 309 |
).strip()
|
| 310 |
|
|
@@ -317,6 +324,7 @@ def _compose_temporal_prompt(shots: list, idx: int, seconds_forward: int = 5) ->
|
|
| 317 |
|
| 318 |
return composed, negative
|
| 319 |
|
|
|
|
| 320 |
@spaces.GPU(duration=180)
|
| 321 |
def generate_keyframe_image(
|
| 322 |
pid: str,
|
|
@@ -393,6 +401,96 @@ def generate_keyframe_image(
|
|
| 393 |
saved_path = _save_keyframe(pid, int(shots[shot_idx]["id"]), out)
|
| 394 |
return saved_path
|
| 395 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 396 |
# =========================
|
| 397 |
# Shots <-> DataFrame utils
|
| 398 |
# =========================
|
|
@@ -418,11 +516,12 @@ def df_to_shots(df: pd.DataFrame) -> list:
|
|
| 418 |
})
|
| 419 |
return sorted(out, key=lambda x: x["id"])
|
| 420 |
|
|
|
|
| 421 |
# =========================
|
| 422 |
# Gradio UI
|
| 423 |
# =========================
|
| 424 |
with gr.Blocks() as demo:
|
| 425 |
-
gr.Markdown("# 🎬 Storyboard → Keyframes →
|
| 426 |
gr.Markdown(
|
| 427 |
"Edit storyboard prompts, then generate keyframes.\n"
|
| 428 |
"**Temporal chaining**: each new shot is generated N seconds later from the previous approved frame, "
|
|
@@ -488,8 +587,15 @@ with gr.Blocks() as demo:
|
|
| 488 |
kf_status = gr.Markdown("")
|
| 489 |
|
| 490 |
with gr.Tab("Videos"):
|
| 491 |
-
gr.Markdown("### 3) Videos
|
| 492 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 493 |
|
| 494 |
with gr.Tab("Export"):
|
| 495 |
gr.Markdown("### 4) Export (coming next)")
|
|
@@ -634,6 +740,51 @@ with gr.Blocks() as demo:
|
|
| 634 |
|
| 635 |
approve_next_btn.click(on_approve_next, inputs=[project, current_idx, prompt_box, out_img], outputs=[project, current_idx, shot_info_md, prompt_box, prev_img, out_img, kf_status])
|
| 636 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 637 |
def on_save(p):
|
| 638 |
if p is None:
|
| 639 |
raise gr.Error("No project in memory.")
|
|
|
|
| 1 |
+
# app.py — FLUX-only with temporal chaining + Aggressive follow + Video stitching
|
| 2 |
import os, json, uuid, re
|
| 3 |
from datetime import datetime
|
| 4 |
import gradio as gr
|
|
|
|
| 7 |
from PIL import Image
|
| 8 |
import pandas as pd
|
| 9 |
|
| 10 |
+
# MoviePy for stitching
|
| 11 |
+
from moviepy.editor import ImageClip, CompositeVideoClip, concatenate_videoclips
|
| 12 |
+
from moviepy.video.io.VideoFileClip import VideoFileClip
|
| 13 |
+
|
| 14 |
+
|
| 15 |
# =========================
|
| 16 |
# Storage helpers
|
| 17 |
# =========================
|
|
|
|
| 53 |
save_project(proj)
|
| 54 |
return proj
|
| 55 |
|
| 56 |
+
|
| 57 |
# =========================
|
| 58 |
# LLM (ZeroGPU) — Storyboard generator (robust)
|
| 59 |
# =========================
|
|
|
|
| 111 |
"Output must start with <JSON> and end with </JSON>.\n"
|
| 112 |
)
|
| 113 |
|
|
|
|
| 114 |
def _prompt_minimal(user_prompt: str, n_shots: int, default_fps: int, default_len: int) -> str:
|
| 115 |
return (
|
| 116 |
"Reply ONLY with a JSON array starting with '[' and ending with ']'. No extra text.\n"
|
|
|
|
| 127 |
' "negative": ""\n'
|
| 128 |
"}\n"
|
| 129 |
)
|
| 130 |
+
|
| 131 |
def _apply_chat(tok, system_msg: str, user_msg: str) -> str:
|
| 132 |
if hasattr(tok, "apply_chat_template"):
|
| 133 |
return tok.apply_chat_template(
|
|
|
|
| 156 |
continuation_ids = gen[0][prompt_len:]
|
| 157 |
text = tok.decode(continuation_ids, skip_special_tokens=True).strip()
|
| 158 |
if text.startswith("```"):
|
| 159 |
+
text = re.sub(r"^```(?:json)?\s*|\s*```$", "", text, flags=re.IGNORECASE | re.DOTALL).strip()
|
| 160 |
return text
|
| 161 |
|
| 162 |
def _extract_json_array(text: str) -> str:
|
|
|
|
| 245 |
|
| 246 |
return _normalize_shots(shots_raw, default_fps, default_len)
|
| 247 |
|
| 248 |
+
|
| 249 |
# =========================
|
| 250 |
# IMAGE GEN — FLUX only (no fallback) + Temporal chaining
|
| 251 |
# =========================
|
|
|
|
| 288 |
img.save(out)
|
| 289 |
return out
|
| 290 |
|
| 291 |
+
|
| 292 |
# ---- Temporal prompt composer (PRIORITIZE the new shot) ----
|
| 293 |
+
def _compose_temporal_prompt(shots: list, idx: int, seconds_forward: int = 5):
|
| 294 |
"""
|
| 295 |
Build a prompt that continues the scene N seconds later,
|
| 296 |
prioritizing the NEW shot description (composition/action),
|
|
|
|
| 309 |
|
| 310 |
composed = (
|
| 311 |
f"Continue the same scene {seconds_forward} seconds later.\n"
|
| 312 |
+
f'PRIORITIZE this new moment and its composition now: "{curr_desc}".\n'
|
| 313 |
+
"Keep continuity ONLY for subject identity, lighting palette, time of day, and general environment style.\n"
|
| 314 |
+
f'Previous frame (context only, do not copy its framing): "{prev_desc}".\n'
|
| 315 |
f"Avoid replicating the previous composition; allow camera move / subject reposition consistent with {seconds_forward} seconds of natural progression."
|
| 316 |
).strip()
|
| 317 |
|
|
|
|
| 324 |
|
| 325 |
return composed, negative
|
| 326 |
|
| 327 |
+
|
| 328 |
@spaces.GPU(duration=180)
|
| 329 |
def generate_keyframe_image(
|
| 330 |
pid: str,
|
|
|
|
| 401 |
saved_path = _save_keyframe(pid, int(shots[shot_idx]["id"]), out)
|
| 402 |
return saved_path
|
| 403 |
|
| 404 |
+
|
| 405 |
+
# =========================
|
| 406 |
+
# Video stitching helpers (pairwise dissolve + final concat)
|
| 407 |
+
# =========================
|
| 408 |
+
def _pair_clip_path(pid: str, i: int, j: int) -> str:
|
| 409 |
+
return os.path.join(project_dir(pid), "clips", f"pair_{i:02d}_to_{j:02d}.mp4")
|
| 410 |
+
|
| 411 |
+
def _final_stitched_path(pid: str) -> str:
|
| 412 |
+
return os.path.join(project_dir(pid), "clips", "final_stitched.mp4")
|
| 413 |
+
|
| 414 |
+
def _image_size(path: str):
|
| 415 |
+
with Image.open(path) as im:
|
| 416 |
+
return im.width, im.height
|
| 417 |
+
|
| 418 |
+
def _build_pair_clip(img_a: str, img_b: str, out_path: str, fps: int = 24, hold: float = 0.5, crossfade: float = 0.7, resize_to=None):
|
| 419 |
+
"""
|
| 420 |
+
Create a dissolve transition from img_a -> img_b:
|
| 421 |
+
- show img_a for `hold` seconds
|
| 422 |
+
- dissolve for `crossfade` seconds into img_b
|
| 423 |
+
- hold img_b for `hold` seconds
|
| 424 |
+
"""
|
| 425 |
+
ca = ImageClip(img_a).set_duration(hold + crossfade)
|
| 426 |
+
cb = ImageClip(img_b).set_duration(hold + crossfade).set_start(hold)
|
| 427 |
+
|
| 428 |
+
if resize_to:
|
| 429 |
+
ca = ca.resize(newsize=resize_to)
|
| 430 |
+
cb = cb.resize(newsize=resize_to)
|
| 431 |
+
|
| 432 |
+
ca_x = ca.crossfadeout(crossfade)
|
| 433 |
+
cb_x = cb.crossfadein(crossfade)
|
| 434 |
+
|
| 435 |
+
total = hold + crossfade + hold
|
| 436 |
+
comp = CompositeVideoClip([ca_x, cb_x]).set_duration(total)
|
| 437 |
+
|
| 438 |
+
comp.write_videofile(
|
| 439 |
+
out_path,
|
| 440 |
+
fps=fps,
|
| 441 |
+
codec="libx264",
|
| 442 |
+
audio=False,
|
| 443 |
+
preset="medium",
|
| 444 |
+
threads=os.cpu_count() or 2,
|
| 445 |
+
verbose=False,
|
| 446 |
+
logger=None
|
| 447 |
+
)
|
| 448 |
+
comp.close(); ca.close(); cb.close()
|
| 449 |
+
|
| 450 |
+
def _build_all_pair_clips(pid: str, shots: list, fps: int = 24, hold: float = 0.5, crossfade: float = 0.7, force_size=None):
|
| 451 |
+
paths = []
|
| 452 |
+
base_size = None
|
| 453 |
+
if not force_size:
|
| 454 |
+
for s in shots:
|
| 455 |
+
p = s.get("image_path")
|
| 456 |
+
if p and os.path.exists(p):
|
| 457 |
+
base_size = _image_size(p)
|
| 458 |
+
break
|
| 459 |
+
size = force_size or base_size
|
| 460 |
+
for i in range(len(shots)-1):
|
| 461 |
+
a = shots[i].get("image_path")
|
| 462 |
+
b = shots[i+1].get("image_path")
|
| 463 |
+
if not (a and b and os.path.exists(a) and os.path.exists(b)):
|
| 464 |
+
continue
|
| 465 |
+
outp = _pair_clip_path(pid, shots[i]["id"], shots[i+1]["id"])
|
| 466 |
+
_build_pair_clip(a, b, outp, fps=fps, hold=hold, crossfade=crossfade, resize_to=size)
|
| 467 |
+
paths.append(outp)
|
| 468 |
+
return paths
|
| 469 |
+
|
| 470 |
+
def _build_final_stitched_from_pairs(pair_paths: list, out_path: str, fps: int = 24):
|
| 471 |
+
if not pair_paths:
|
| 472 |
+
raise RuntimeError("No pair clips to stitch.")
|
| 473 |
+
clips = []
|
| 474 |
+
for p in pair_paths:
|
| 475 |
+
if os.path.exists(p):
|
| 476 |
+
clips.append(VideoFileClip(p))
|
| 477 |
+
if not clips:
|
| 478 |
+
raise RuntimeError("No readable pair clips on disk.")
|
| 479 |
+
final = concatenate_videoclips(clips, method="compose")
|
| 480 |
+
final.write_videofile(
|
| 481 |
+
out_path,
|
| 482 |
+
fps=fps,
|
| 483 |
+
codec="libx264",
|
| 484 |
+
audio=False,
|
| 485 |
+
preset="medium",
|
| 486 |
+
threads=os.cpu_count() or 2,
|
| 487 |
+
verbose=False,
|
| 488 |
+
logger=None
|
| 489 |
+
)
|
| 490 |
+
final.close()
|
| 491 |
+
for c in clips: c.close()
|
| 492 |
+
|
| 493 |
+
|
| 494 |
# =========================
|
| 495 |
# Shots <-> DataFrame utils
|
| 496 |
# =========================
|
|
|
|
| 516 |
})
|
| 517 |
return sorted(out, key=lambda x: x["id"])
|
| 518 |
|
| 519 |
+
|
| 520 |
# =========================
|
| 521 |
# Gradio UI
|
| 522 |
# =========================
|
| 523 |
with gr.Blocks() as demo:
|
| 524 |
+
gr.Markdown("# 🎬 Storyboard → Keyframes → Videos → Export")
|
| 525 |
gr.Markdown(
|
| 526 |
"Edit storyboard prompts, then generate keyframes.\n"
|
| 527 |
"**Temporal chaining**: each new shot is generated N seconds later from the previous approved frame, "
|
|
|
|
| 587 |
kf_status = gr.Markdown("")
|
| 588 |
|
| 589 |
with gr.Tab("Videos"):
|
| 590 |
+
gr.Markdown("### 3) Videos")
|
| 591 |
+
with gr.Row():
|
| 592 |
+
v_fps = gr.Slider(8, 60, value=24, step=1, label="FPS")
|
| 593 |
+
v_hold = gr.Slider(0.0, 2.0, value=0.5, step=0.1, label="Hold per still (s)")
|
| 594 |
+
v_xfade = gr.Slider(0.0, 2.0, value=0.7, step=0.1, label="Crossfade (s)")
|
| 595 |
+
with gr.Row():
|
| 596 |
+
build_pairs_btn = gr.Button("Build pair clips (A→B, B→C, ...)", variant="primary")
|
| 597 |
+
build_final_btn = gr.Button("Build final stitched video", variant="secondary")
|
| 598 |
+
vd_table = gr.JSON(label="Rendered outputs (paths)")
|
| 599 |
|
| 600 |
with gr.Tab("Export"):
|
| 601 |
gr.Markdown("### 4) Export (coming next)")
|
|
|
|
| 740 |
|
| 741 |
approve_next_btn.click(on_approve_next, inputs=[project, current_idx, prompt_box, out_img], outputs=[project, current_idx, shot_info_md, prompt_box, prev_img, out_img, kf_status])
|
| 742 |
|
| 743 |
+
# ---- Videos tab handlers
|
| 744 |
+
def on_build_pairs(p, fps, hold, xfade):
|
| 745 |
+
if p is None:
|
| 746 |
+
raise gr.Error("No project.")
|
| 747 |
+
shots = p.get("shots", [])
|
| 748 |
+
if len(shots) < 2:
|
| 749 |
+
raise gr.Error("Need at least 2 approved images to build pair clips.")
|
| 750 |
+
if not any(s.get("image_path") for s in shots):
|
| 751 |
+
raise gr.Error("No approved images yet. Approve keyframes first.")
|
| 752 |
+
|
| 753 |
+
pair_paths = _build_all_pair_clips(
|
| 754 |
+
p["meta"]["id"], shots,
|
| 755 |
+
fps=int(fps), hold=float(hold), crossfade=float(xfade),
|
| 756 |
+
force_size=None # or (640, 640) to force uniform size
|
| 757 |
+
)
|
| 758 |
+
if not pair_paths:
|
| 759 |
+
raise gr.Error("Could not find any consecutive pairs with images.")
|
| 760 |
+
return {"pair_clips": pair_paths, "final": None}
|
| 761 |
+
|
| 762 |
+
build_pairs_btn.click(
|
| 763 |
+
on_build_pairs,
|
| 764 |
+
inputs=[project, v_fps, v_hold, v_xfade],
|
| 765 |
+
outputs=[vd_table]
|
| 766 |
+
)
|
| 767 |
+
|
| 768 |
+
def on_build_final(p, fps):
|
| 769 |
+
if p is None:
|
| 770 |
+
raise gr.Error("No project.")
|
| 771 |
+
pid = p["meta"]["id"]
|
| 772 |
+
clips_dir = os.path.join(project_dir(pid), "clips")
|
| 773 |
+
pair_paths = sorted(
|
| 774 |
+
[os.path.join(clips_dir, f) for f in os.listdir(clips_dir) if f.startswith("pair_") and f.endswith(".mp4")]
|
| 775 |
+
)
|
| 776 |
+
if not pair_paths:
|
| 777 |
+
raise gr.Error("No pair clips found. Click 'Build pair clips' first.")
|
| 778 |
+
outp = _final_stitched_path(pid)
|
| 779 |
+
_build_final_stitched_from_pairs(pair_paths, outp, fps=int(fps))
|
| 780 |
+
return {"pair_clips": pair_paths, "final": outp}
|
| 781 |
+
|
| 782 |
+
build_final_btn.click(
|
| 783 |
+
on_build_final,
|
| 784 |
+
inputs=[project, v_fps],
|
| 785 |
+
outputs=[vd_table]
|
| 786 |
+
)
|
| 787 |
+
|
| 788 |
def on_save(p):
|
| 789 |
if p is None:
|
| 790 |
raise gr.Error("No project in memory.")
|