Shalmoni commited on
Commit
aaab32d
·
verified ·
1 Parent(s): 4c310ec

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +360 -217
app.py CHANGED
@@ -1,11 +1,14 @@
1
- # app.py — FLUX-only with temporal chaining + Aggressive follow + Video stitching (lazy MoviePy)
2
- import os, json, uuid, re, sys, subprocess
3
  from datetime import datetime
 
4
  import gradio as gr
5
  import spaces
6
  import torch
7
  from PIL import Image
8
  import pandas as pd
 
 
9
 
10
  # =========================
11
  # Storage helpers
@@ -42,14 +45,15 @@ def ensure_project(p, suggested_name="Project"):
42
  name = f"{suggested_name}-{pid[:4]}"
43
  proj = {
44
  "meta": {"id": pid, "name": name, "created": now_iso(), "updated": now_iso()},
45
- "shots": [], # id,title,description,duration,fps,steps,seed,negative,image_path
46
  "clips": [],
 
47
  }
48
  save_project(proj)
49
  return proj
50
 
51
  # =========================
52
- # LLM — Storyboard generator (ZeroGPU friendly)
53
  # =========================
54
  from transformers import AutoTokenizer, AutoModelForCausalLM
55
 
@@ -65,27 +69,37 @@ def _lazy_model_tok():
65
  return _model, _tokenizer
66
 
67
  _tokenizer = AutoTokenizer.from_pretrained(STORYBOARD_MODEL, trust_remote_code=True)
 
68
  use_cuda = torch.cuda.is_available()
69
- dtype = torch.float16 if use_cuda else torch.float32
 
70
  _model = AutoModelForCausalLM.from_pretrained(
71
- STORYBOARD_MODEL, device_map="auto", torch_dtype=dtype,
72
- trust_remote_code=True, use_safetensors=True
 
 
 
73
  )
 
74
  if _tokenizer.pad_token_id is None and _tokenizer.eos_token_id is not None:
75
  _tokenizer.pad_token_id = _tokenizer.eos_token_id
 
76
  return _model, _tokenizer
77
 
78
  def _prompt_with_tags(user_prompt: str, n_shots: int, default_fps: int, default_len: int) -> str:
79
  return (
80
  "You are a cinematographer and storyboard artist. "
81
- "Break the idea into DISTINCT, DETAILED shots with concrete visual info: objects, camera placement/angle, subject position, lighting, background.\n\n"
82
- "Return ONLY a JSON array enclosed between <JSON> and </JSON>.\n"
83
- f"Create {n_shots} shots for:\n'''{user_prompt}'''\n\n"
84
- "Item schema:\n"
 
 
 
85
  "{\n"
86
  ' "id": <int starting at 1>,\n'
87
  ' "title": "Short shot title",\n'
88
- ' "description": "Highly specific visual description (camera, framing, time of day, subject position, lighting, mood, background).",\n'
89
  f' "duration": {default_len},\n'
90
  f' "fps": {default_fps},\n'
91
  ' "steps": 30,\n'
@@ -97,7 +111,7 @@ def _prompt_with_tags(user_prompt: str, n_shots: int, default_fps: int, default_
97
 
98
  def _prompt_minimal(user_prompt: str, n_shots: int, default_fps: int, default_len: int) -> str:
99
  return (
100
- "Reply ONLY with a JSON array starting with '[' and ending with ']'.\n"
101
  f"Storyboard: {n_shots} shots for:\n'''{user_prompt}'''\n"
102
  "Item schema:\n"
103
  "{\n"
@@ -117,7 +131,8 @@ def _apply_chat(tok, system_msg: str, user_msg: str) -> str:
117
  return tok.apply_chat_template(
118
  [{"role": "system", "content": system_msg},
119
  {"role": "user", "content": user_msg}],
120
- tokenize=False, add_generation_prompt=True
 
121
  )
122
  return system_msg + "\n\n" + user_msg
123
 
@@ -125,32 +140,46 @@ def _generate_text(model, tok, prompt_text: str) -> str:
125
  inputs = tok(prompt_text, return_tensors="pt")
126
  inputs = {k: v.to(model.device) for k, v in inputs.items()}
127
  eos_id = tok.eos_token_id or tok.pad_token_id
 
128
  gen = model.generate(
129
- **inputs, max_new_tokens=HF_TASK_MAX_TOKENS, do_sample=False, temperature=0.0,
130
- repetition_penalty=1.05, eos_token_id=eos_id, pad_token_id=eos_id
 
 
 
 
 
131
  )
132
  prompt_len = inputs["input_ids"].shape[1]
133
  continuation_ids = gen[0][prompt_len:]
134
  text = tok.decode(continuation_ids, skip_special_tokens=True).strip()
135
  if text.startswith("```"):
136
- text = re.sub(r"^```(?:json)?\s*|\s*```$", "", text, flags=re.I|re.S).strip()
137
  return text
138
 
139
  def _extract_json_array(text: str) -> str:
140
- m = re.search(r"<JSON>(.*?)</JSON>", text, flags=re.S|re.I)
141
- if m and m.group(1).strip():
142
- return m.group(1).strip()
 
 
143
  start = text.find("[")
144
- if start == -1: return ""
145
- depth = 0; in_str = False; prev = ""
 
 
 
146
  for i in range(start, len(text)):
147
  ch = text[i]
148
- if ch == '"' and prev != '\\': in_str = not in_str
 
149
  if not in_str:
150
- if ch == "[": depth += 1
 
151
  elif ch == "]":
152
  depth -= 1
153
- if depth == 0: return text[start:i+1].strip()
 
154
  prev = ch
155
  return ""
156
 
@@ -174,6 +203,7 @@ def _normalize_shots(shots_raw, default_fps: int, default_len: int):
174
  def generate_storyboard_with_llm(user_prompt: str, n_shots: int, default_fps: int, default_len: int):
175
  model, tok = _lazy_model_tok()
176
  system = "You are a film previsualization assistant. Output must be valid JSON."
 
177
  p1 = _apply_chat(tok, system + " Return ONLY JSON inside <JSON> tags.",
178
  _prompt_with_tags(user_prompt, n_shots, default_fps, default_len))
179
  out1 = _generate_text(model, tok, p1)
@@ -185,31 +215,50 @@ def generate_storyboard_with_llm(user_prompt: str, n_shots: int, default_fps: in
185
  out2 = _generate_text(model, tok, p2)
186
  json_text = _extract_json_array(out2)
187
  if not json_text and "[" in out2 and "]" in out2:
188
- start, end = out2.find("["), out2.rfind("]")
189
- if start != -1 and end > start: json_text = out2[start:end+1].strip()
190
-
191
- if not json_text:
192
- return [{
193
- "id": i, "title": f"Shot {i}",
194
- "description": f"Placeholder for: {user_prompt[:80]}",
195
- "duration": default_len, "fps": default_fps,
196
- "steps": 30, "seed": None, "negative": "", "image_path": None
197
- } for i in range(1, int(n_shots)+1)]
 
 
 
 
 
 
 
 
 
198
 
199
  try:
200
  shots_raw = json.loads(json_text)
201
  except Exception:
202
- shots_raw = json.loads(re.sub(r",\s*([\]\}])", r"\1", json_text))
 
 
203
  return _normalize_shots(shots_raw, default_fps, default_len)
204
 
205
  # =========================
206
- # IMAGE GEN — FLUX-only + Temporal chaining
207
  # =========================
208
  USE_CUDA = torch.cuda.is_available()
209
  DTYPE = torch.float16 if USE_CUDA else torch.float32
210
- FLUX_MODEL = os.getenv("FLUX_MODEL", "black-forest-labs/FLUX.1-schnell") # gated
 
 
211
  HF_TOKEN = os.getenv("HF_TOKEN") or os.getenv("HUGGINGFACE_HUB_TOKEN")
212
 
 
 
 
 
 
 
213
  _flux_t2i = None
214
  _flux_i2i = None
215
 
@@ -230,122 +279,131 @@ def _lazy_flux_pipes():
230
 
231
  def _flux_healthcheck():
232
  if not HF_TOKEN:
233
- raise RuntimeError("HF_TOKEN is not set. Accept the model terms on HF and provide a READ token.")
 
 
 
234
  _lazy_flux_pipes()
235
 
236
  def _save_keyframe(pid: str, shot_id: int, img: Image.Image) -> str:
237
  pdir = project_dir(pid)
238
  out = os.path.join(pdir, "keyframes", f"shot_{shot_id:02d}.png")
239
- img.save(out); return out
 
240
 
 
241
  def _compose_temporal_prompt(shots: list, idx: int, seconds_forward: int = 5):
 
 
 
 
 
 
242
  curr = shots[idx]
243
  curr_desc = (curr.get("description") or "").strip()
244
  curr_neg = (curr.get("negative") or "").strip()
245
- if idx == 0: return curr_desc, curr_neg
246
- prev_desc = (shots[idx-1].get("description") or "").strip()
 
 
 
 
 
247
  composed = (
248
  f"Continue the same scene {seconds_forward} seconds later.\n"
249
- f'PRIORITIZE this new moment & composition: "{curr_desc}".\n'
250
- "Keep continuity ONLY for subject identity, lighting palette, time of day, environment style.\n"
251
  f'Previous frame (context only, do not copy its framing): "{prev_desc}".\n'
252
- f"Avoid replicating the previous composition; allow camera move / subject reposition consistent with {seconds_forward} seconds of progression."
253
  ).strip()
254
- negative = (curr_neg + "; identical composition as previous; exact same framing; rigid pose repeat; freeze frame; "
255
- "hard scene reset; different subject identity; wildly different art style; unrelated background").strip("; ")
 
 
 
 
 
 
256
  return composed, negative
257
 
258
  @spaces.GPU(duration=180)
259
  def generate_keyframe_image(
260
- pid: str, shot_idx: int, shots: list,
261
- t2i_steps: int = 18, i2i_steps: int = 22, i2i_strength: float = 0.90,
262
- guidance_scale: float = 3.4, width: int = 640, height: int = 640,
263
- seconds_forward: int = 5, aggressive: bool = False
 
 
 
 
 
 
 
264
  ):
 
 
 
 
 
265
  try:
266
  t2i, i2i = _lazy_flux_pipes()
267
  except Exception as e:
268
- raise gr.Error(f"FLUX failed to load: {e}")
 
 
 
269
 
270
- prompt, negative = _compose_temporal_prompt(shots, shot_idx, seconds_forward=seconds_forward)
 
271
 
 
272
  seed = shots[shot_idx].get("seed", None)
273
  device = "cuda" if USE_CUDA else "cpu"
274
  gen = torch.Generator(device)
275
- if isinstance(seed, int): gen = gen.manual_seed(int(seed))
 
276
 
277
- width = max(256, min(1024, int(width)))
 
278
  height = max(256, min(1024, int(height)))
279
 
 
280
  prev_path = shots[shot_idx - 1].get("image_path") if shot_idx > 0 else None
281
  use_prev = bool(shot_idx > 0 and prev_path and os.path.exists(prev_path))
282
 
 
283
  if aggressive:
284
  i2i_strength = min(0.98, max(i2i_strength, 0.92))
285
  guidance_scale = max(guidance_scale, 3.6)
286
  i2i_steps = max(i2i_steps, 24)
287
 
 
288
  if not use_prev:
289
  out = t2i(
290
- prompt=prompt, negative_prompt=(negative or None),
 
291
  num_inference_steps=int(max(10, t2i_steps)),
292
  guidance_scale=float(max(2.4, guidance_scale)),
293
- generator=gen, width=width, height=height
 
294
  ).images[0]
295
  else:
296
- init_image = Image.open(prev_path).convert("RGB")
297
  out = i2i(
298
- prompt=prompt, negative_prompt=(negative or None),
299
- image=init_image, strength=float(min(max(i2i_strength, 0.70), 0.98)),
 
 
300
  num_inference_steps=int(max(14, i2i_steps)),
301
- guidance_scale=float(max(2.4, guidance_scale)), generator=gen
 
302
  ).images[0]
303
 
304
- saved = _save_keyframe(pid, int(shots[shot_idx]["id"]), out)
305
- return saved
306
 
307
  # =========================
308
- # MoviePy lazy install/import
309
- # =========================
310
- def _ensure_moviepy():
311
- """
312
- Import MoviePy lazily. If unavailable, try a best-effort pip install.
313
- If that still fails, raise a clear Gradio error telling the user to rebuild.
314
- Also wires up the bundled ffmpeg from imageio-ffmpeg.
315
- """
316
- try:
317
- from moviepy.editor import ImageClip, CompositeVideoClip, concatenate_videoclips
318
- from moviepy.video.io.VideoFileClip import VideoFileClip
319
- return ImageClip, CompositeVideoClip, concatenate_videoclips, VideoFileClip
320
- except Exception:
321
- pass # will try to install below
322
-
323
- # Try to install at runtime (some Spaces block this)
324
- try:
325
- import sys, subprocess
326
- subprocess.check_call([sys.executable, "-m", "pip", "install", "-q",
327
- "moviepy==1.0.3", "imageio>=2.34.0", "imageio-ffmpeg>=0.4.9"])
328
- # Point MoviePy to a known-good ffmpeg
329
- try:
330
- import imageio_ffmpeg, os as _os
331
- _os.environ["IMAGEIO_FFMPEG_EXE"] = imageio_ffmpeg.get_ffmpeg_exe()
332
- except Exception:
333
- pass
334
- # Try importing again
335
- from moviepy.editor import ImageClip, CompositeVideoClip, concatenate_videoclips
336
- from moviepy.video.io.VideoFileClip import VideoFileClip
337
- return ImageClip, CompositeVideoClip, concatenate_videoclips, VideoFileClip
338
- except Exception as e:
339
- # Final, friendly failure with next steps
340
- import gradio as gr
341
- raise gr.Error(
342
- "MoviePy is not available. Add `moviepy==1.0.3`, `imageio>=2.34.0`, "
343
- "`imageio-ffmpeg>=0.4.9` to requirements.txt and restart/rebuild the Space. "
344
- f"(Runtime install failed with: {type(e).__name__}: {e})"
345
- )
346
-
347
- # =========================
348
- # Video stitching (pairwise dissolve + final concat)
349
  # =========================
350
  def _pair_clip_path(pid: str, i: int, j: int) -> str:
351
  return os.path.join(project_dir(pid), "clips", f"pair_{i:02d}_to_{j:02d}.mp4")
@@ -353,54 +411,69 @@ def _pair_clip_path(pid: str, i: int, j: int) -> str:
353
  def _final_stitched_path(pid: str) -> str:
354
  return os.path.join(project_dir(pid), "clips", "final_stitched.mp4")
355
 
356
- def _image_size(path: str):
357
- with Image.open(path) as im:
358
- return im.width, im.height
359
-
360
- def _build_pair_clip(img_a: str, img_b: str, out_path: str, fps: int = 24, hold: float = 0.5, crossfade: float = 0.7, resize_to=None):
361
- ImageClip, CompositeVideoClip, concatenate_videoclips, VideoFileClip = _ensure_moviepy()
362
- ca = ImageClip(img_a).set_duration(hold + crossfade)
363
- cb = ImageClip(img_b).set_duration(hold + crossfade).set_start(hold)
364
- if resize_to:
365
- ca = ca.resize(newsize=resize_to)
366
- cb = cb.resize(newsize=resize_to)
367
- ca_x = ca.crossfadeout(crossfade)
368
- cb_x = cb.crossfadein(crossfade)
369
- total = hold + crossfade + hold
370
- comp = CompositeVideoClip([ca_x, cb_x]).set_duration(total)
371
- comp.write_videofile(out_path, fps=fps, codec="libx264", audio=False, preset="medium",
372
- threads=os.cpu_count() or 2, verbose=False, logger=None)
373
- comp.close(); ca.close(); cb.close()
374
-
375
- def _build_all_pair_clips(pid: str, shots: list, fps: int = 24, hold: float = 0.5, crossfade: float = 0.7, force_size=None):
376
- paths = []
377
- base_size = None
378
- if not force_size:
379
- for s in shots:
380
- p = s.get("image_path")
381
- if p and os.path.exists(p):
382
- base_size = _image_size(p)
383
- break
384
- size = force_size or base_size
385
- for i in range(len(shots)-1):
386
- a = shots[i].get("image_path")
387
- b = shots[i+1].get("image_path")
388
- if not (a and b and os.path.exists(a) and os.path.exists(b)): continue
389
- outp = _pair_clip_path(pid, shots[i]["id"], shots[i+1]["id"])
390
- _build_pair_clip(a, b, outp, fps=fps, hold=hold, crossfade=crossfade, resize_to=size)
391
- paths.append(outp)
392
- return paths
393
-
394
- def _build_final_stitched_from_pairs(pair_paths: list, out_path: str, fps: int = 24):
395
- ImageClip, CompositeVideoClip, concatenate_videoclips, VideoFileClip = _ensure_moviepy()
396
- if not pair_paths: raise RuntimeError("No pair clips to stitch.")
397
- clips = [VideoFileClip(p) for p in pair_paths if os.path.exists(p)]
398
- if not clips: raise RuntimeError("No readable pair clips on disk.")
399
- final = concatenate_videoclips(clips, method="compose")
400
- final.write_videofile(out_path, fps=fps, codec="libx264", audio=False, preset="medium",
401
- threads=os.cpu_count() or 2, verbose=False, logger=None)
402
- final.close()
403
- for c in clips: c.close()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
404
 
405
  # =========================
406
  # Shots <-> DataFrame utils
@@ -433,13 +506,16 @@ def df_to_shots(df: pd.DataFrame) -> list:
433
  with gr.Blocks() as demo:
434
  gr.Markdown("# 🎬 Storyboard → Keyframes → Videos → Export")
435
  gr.Markdown(
436
- "Temporal chaining: each new shot is generated N seconds later from the previous approved frame, "
 
437
  "while the current shot description drives composition & action. **Model**: FLUX-only."
438
  )
439
 
 
440
  project = gr.State(None)
441
  current_idx = gr.State(0)
442
 
 
443
  with gr.Row():
444
  with gr.Column(scale=2):
445
  proj_name = gr.Textbox(label="Project name", placeholder="e.g., Desert Chase")
@@ -452,20 +528,21 @@ with gr.Blocks() as demo:
452
  load_btn = gr.Button("Load")
453
  sb_status = gr.Markdown("")
454
 
 
455
  with gr.Tabs():
456
  with gr.Tab("Storyboard"):
457
  gr.Markdown("### 1) Storyboard")
458
- sb_prompt = gr.Textbox(label="High-level prompt", lines=4, placeholder="Describe the story…")
459
  with gr.Row():
460
  sb_target_shots = gr.Slider(1, 12, value=3, step=1, label="Target # of shots")
461
  sb_default_fps = gr.Slider(8, 60, value=24, step=1, label="Default FPS")
462
- sb_default_len = gr.Slider(1, 12, value=4, step=1, label="Default seconds/shot")
463
- propose_btn = gr.Button("Propose Storyboard (LLM)")
464
  shots_df = gr.Dataframe(
465
  headers=SHOT_COLUMNS,
466
  datatype=["number","str","str","number","number","number","number","str","str"],
467
  row_count=(1,"dynamic"), col_count=len(SHOT_COLUMNS),
468
- label="Edit shots (prompts & params)", wrap=True
469
  )
470
  save_edits_btn = gr.Button("Save Edits ✓", variant="primary", interactive=False)
471
  with gr.Row():
@@ -475,16 +552,18 @@ with gr.Blocks() as demo:
475
  with gr.Tab("Keyframes"):
476
  gr.Markdown("### 2) Keyframes")
477
  shot_info_md = gr.Markdown("")
478
- prompt_box = gr.Textbox(label="Shot description (editable)", lines=4)
479
  with gr.Row():
480
  gen_btn = gr.Button("Generate / Regenerate", variant="primary")
481
  approve_next_btn = gr.Button("Approve & Next →", variant="secondary")
 
482
  with gr.Row():
483
  img_strength = gr.Slider(0.50, 0.98, value=0.90, step=0.02, label="Change vs Consistency (img2img strength)")
484
  img_steps = gr.Slider(12, 28, value=22, step=1, label="Inference Steps (img2img)")
485
  guidance = gr.Slider(2.4, 4.0, value=3.4, step=0.1, label="Guidance Scale")
486
  temporal_secs = gr.Slider(1, 10, value=5, step=1, label="Temporal step (seconds later)")
487
  aggressive_follow = gr.Checkbox(value=False, label="Aggressive follow prompt (more change)")
 
488
  with gr.Row():
489
  prev_img = gr.Image(label="Previous approved image (conditioning)", type="filepath")
490
  out_img = gr.Image(label="Generated image", type="filepath")
@@ -493,9 +572,9 @@ with gr.Blocks() as demo:
493
  with gr.Tab("Videos"):
494
  gr.Markdown("### 3) Videos")
495
  with gr.Row():
496
- v_fps = gr.Slider(8, 60, value=24, step=1, label="FPS")
497
- v_hold = gr.Slider(0.0, 2.0, value=0.5, step=0.1, label="Hold per still (s)")
498
- v_xfade = gr.Slider(0.0, 2.0, value=0.7, step=0.1, label="Crossfade (s)")
499
  with gr.Row():
500
  build_pairs_btn = gr.Button("Build pair clips (A→B, B→C, ...)", variant="primary")
501
  build_final_btn = gr.Button("Build final stitched video", variant="secondary")
@@ -514,22 +593,31 @@ with gr.Blocks() as demo:
514
 
515
  def on_propose(p, prompt, target_shots, fps, vlen):
516
  p = ensure_project(p, suggested_name=(proj_name.value if hasattr(proj_name, "value") else "Project"))
517
- if not str(prompt or "").strip():
518
  raise gr.Error("Please enter a high-level prompt.")
519
  shots = generate_storyboard_with_llm(str(prompt).strip(), int(target_shots), int(fps), int(vlen))
520
- p = dict(p); p["shots"] = shots; p["meta"]["updated"] = now_iso(); save_project(p)
 
 
 
521
  return p, shots_to_df(shots), gr.update(value="Storyboard generated (editable)."), gr.update(interactive=True)
522
 
523
- propose_btn.click(on_propose,
 
524
  inputs=[project, sb_prompt, sb_target_shots, sb_default_fps, sb_default_len],
525
  outputs=[project, shots_df, sb_status, save_edits_btn]
526
  )
527
 
528
  def on_save_edits(p, df):
529
- if p is None: raise gr.Error("No project in memory.")
530
- if df is None: raise gr.Error("No storyboard table to save.")
 
 
531
  shots = df_to_shots(df)
532
- p = dict(p); p["shots"] = shots; p["meta"]["updated"] = now_iso(); save_project(p)
 
 
 
533
  return p, gr.update(value="Edits saved.")
534
 
535
  save_edits_btn.click(on_save_edits, inputs=[project, shots_df], outputs=[project, sb_status])
@@ -538,23 +626,42 @@ with gr.Blocks() as demo:
538
  if p is None: raise gr.Error("No project.")
539
  shots = df_to_shots(df)
540
  if not shots: raise gr.Error("Storyboard is empty.")
 
 
541
  proj_seed = None
542
- if str(proj_seed_override or "").isdigit(): proj_seed = int(proj_seed_override)
543
- if proj_seed is None: proj_seed = p.get("meta", {}).get("seed")
 
 
544
  if proj_seed is None:
545
  for s in shots:
546
- if isinstance(s.get("seed"), int): proj_seed = int(s["seed"]); break
547
- if proj_seed is None: proj_seed = int(torch.randint(0, 2**31 - 1, (1,)).item())
 
 
 
 
548
  for s in shots:
549
- if not isinstance(s.get("seed"), int): s["seed"] = proj_seed
550
- p = dict(p); p["shots"] = shots; p["meta"]["seed"] = proj_seed; p["meta"]["updated"] = now_iso(); save_project(p)
551
- idx = 0; prev_path = None
552
- info = (f"**Shot {shots[idx]['id']} — {shots[idx]['title']}** \n"
553
- f"Duration: {shots[idx]['duration']}s @ {shots[idx]['fps']} fps \n"
554
- f"Locked project seed: `{proj_seed}`")
555
- return p, 0, gr.update(value=info), gr.update(value=shots[idx]["description"]), gr.update(value=prev_path), gr.update(value=None), gr.update(value="Ready for shot 1."), gr.update(value=proj_seed)
556
-
557
- to_keyframes_btn.click(on_start_keyframes,
 
 
 
 
 
 
 
 
 
 
 
558
  inputs=[project, shots_df, proj_seed_box],
559
  outputs=[project, current_idx, shot_info_md, prompt_box, prev_img, out_img, kf_status, proj_seed_box]
560
  )
@@ -563,93 +670,129 @@ with gr.Blocks() as demo:
563
  if p is None: raise gr.Error("No project.")
564
  shots = p["shots"]
565
  if idx < 0 or idx >= len(shots): raise gr.Error("Invalid shot index.")
566
- shots[idx]["description"] = current_prompt
 
567
  img_path = generate_keyframe_image(
568
- p["meta"]["id"], int(idx), shots,
569
- t2i_steps=18, i2i_steps=int(i2i_steps_val),
 
 
 
570
  i2i_strength=float(i2i_strength_val),
571
  guidance_scale=float(guidance_val),
572
- width=640, height=640,
 
573
  seconds_forward=int(seconds_forward_val),
574
  aggressive=bool(aggressive_val)
575
  )
576
  prev_path = shots[idx-1]["image_path"] if idx > 0 else None
577
  return img_path, (prev_path or None), gr.update(value=f"Generated candidate for shot {shots[idx]['id']}.")
578
 
579
- gen_btn.click(on_generate_img,
 
580
  inputs=[project, current_idx, prompt_box, img_strength, img_steps, guidance, temporal_secs, aggressive_follow],
581
  outputs=[out_img, prev_img, kf_status]
582
  )
583
 
584
  def on_approve_next(p, idx, current_prompt, latest_img_path):
585
  if p is None: raise gr.Error("No project.")
586
- shots = p["shots"]; i = int(idx)
 
587
  if i < 0 or i >= len(shots): raise gr.Error("Invalid shot index.")
588
  if not latest_img_path: raise gr.Error("Generate an image first.")
 
 
589
  shots[i]["description"] = current_prompt
590
  shots[i]["image_path"] = latest_img_path
591
- p["shots"] = shots; p["meta"]["updated"] = now_iso(); save_project(p)
 
 
 
 
592
  if i + 1 < len(shots):
593
  ni = i + 1
594
- info = (f"**Shot {shots[ni]['id']} — {shots[ni]['title']}** \n"
595
- f"Duration: {shots[ni]['duration']}s @ {shots[ni]['fps']} fps \n"
596
- f"Locked project seed: `{p['meta'].get('seed')}`")
 
 
597
  prev_path = shots[ni-1]["image_path"]
598
  return p, ni, gr.update(value=info), gr.update(value=shots[ni]["description"]), gr.update(value=prev_path), gr.update(value=None), gr.update(value=f"Approved shot {shots[i]['id']}. On to shot {shots[ni]['id']}.")
599
  else:
600
  return p, i, gr.update(value="**All keyframes approved.** Proceed to Videos tab."), gr.update(value=""), gr.update(value=shots[i]["image_path"]), gr.update(value=None), gr.update(value="All shots approved ✅")
601
 
602
- approve_next_btn.click(on_approve_next,
603
- inputs=[project, current_idx, prompt_box, out_img],
604
- outputs=[project, current_idx, shot_info_md, prompt_box, prev_img, out_img, kf_status]
605
- )
606
 
607
- # ---- Videos tab
608
  def on_build_pairs(p, fps, hold, xfade):
609
- if p is None: raise gr.Error("No project.")
 
610
  shots = p.get("shots", [])
611
- if len(shots) < 2: raise gr.Error("Need at least 2 approved images.")
612
- if not any(s.get("image_path") for s in shots): raise gr.Error("No approved images yet.")
613
- pair_paths = _build_all_pair_clips(
 
 
 
 
 
 
 
614
  p["meta"]["id"], shots,
615
- fps=int(fps), hold=float(hold), crossfade=float(xfade),
616
- force_size=None
 
617
  )
618
- if not pair_paths: raise gr.Error("No consecutive pairs with images found.")
 
619
  return {"pair_clips": pair_paths, "final": None}
620
 
621
- build_pairs_btn.click(on_build_pairs, inputs=[project, v_fps, v_hold, v_xfade], outputs=[vd_table])
 
 
 
 
622
 
623
  def on_build_final(p, fps):
624
- if p is None: raise gr.Error("No project.")
 
625
  pid = p["meta"]["id"]
626
  clips_dir = os.path.join(project_dir(pid), "clips")
627
- pair_paths = sorted([os.path.join(clips_dir, f) for f in os.listdir(clips_dir)
628
- if f.startswith("pair_") and f.endswith(".mp4")])
629
- if not pair_paths: raise gr.Error("No pair clips found. Build pair clips first.")
 
 
630
  outp = _final_stitched_path(pid)
631
- _build_final_stitched_from_pairs(pair_paths, outp, fps=int(fps))
632
  return {"pair_clips": pair_paths, "final": outp}
633
 
634
- build_final_btn.click(on_build_final, inputs=[project, v_fps], outputs=[vd_table])
 
 
 
 
635
 
636
- # save/load
637
  def on_save(p):
638
- if p is None: raise gr.Error("No project in memory.")
639
- path = save_project(p); return gr.update(value=f"Saved to `{path}`")
 
 
640
 
641
  save_btn.click(on_save, inputs=[project], outputs=[sb_status])
642
 
643
  def on_load(file_obj):
644
  p = load_project_file(file_obj)
645
  seed_val = p.get("meta", {}).get("seed", None)
646
- return (p,
647
- gr.update(value=f"Loaded `{p['meta']['name']}` (id: `{p['meta']['id']}`)"),
648
- shots_to_df(p.get("shots", [])),
649
- gr.update(value=seed_val))
 
 
650
 
651
  load_btn.click(on_load, inputs=[load_file], outputs=[project, sb_status, shots_df, proj_seed_box])
652
 
653
  if __name__ == "__main__":
654
- _flux_healthcheck()
655
  demo.launch()
 
1
+ # app.py — FLUX-only with temporal chaining + Aggressive follow + Video stitching (backend + ffmpeg)
2
+ import os, json, uuid, re, tempfile, subprocess, shlex
3
  from datetime import datetime
4
+
5
  import gradio as gr
6
  import spaces
7
  import torch
8
  from PIL import Image
9
  import pandas as pd
10
+ import requests
11
+ import imageio_ffmpeg
12
 
13
  # =========================
14
  # Storage helpers
 
45
  name = f"{suggested_name}-{pid[:4]}"
46
  proj = {
47
  "meta": {"id": pid, "name": name, "created": now_iso(), "updated": now_iso()},
48
+ "shots": [], # each shot: id,title,description,duration,fps,steps,seed,negative,image_path
49
  "clips": [],
50
+ # optional: "seed" filled later
51
  }
52
  save_project(proj)
53
  return proj
54
 
55
  # =========================
56
+ # LLM (ZeroGPU) — Storyboard generator (robust)
57
  # =========================
58
  from transformers import AutoTokenizer, AutoModelForCausalLM
59
 
 
69
  return _model, _tokenizer
70
 
71
  _tokenizer = AutoTokenizer.from_pretrained(STORYBOARD_MODEL, trust_remote_code=True)
72
+
73
  use_cuda = torch.cuda.is_available()
74
+ preferred_dtype = torch.float16 if use_cuda else torch.float32
75
+
76
  _model = AutoModelForCausalLM.from_pretrained(
77
+ STORYBOARD_MODEL,
78
+ device_map="auto",
79
+ torch_dtype=preferred_dtype,
80
+ trust_remote_code=True,
81
+ use_safetensors=True
82
  )
83
+
84
  if _tokenizer.pad_token_id is None and _tokenizer.eos_token_id is not None:
85
  _tokenizer.pad_token_id = _tokenizer.eos_token_id
86
+
87
  return _model, _tokenizer
88
 
89
  def _prompt_with_tags(user_prompt: str, n_shots: int, default_fps: int, default_len: int) -> str:
90
  return (
91
  "You are a cinematographer and storyboard artist. "
92
+ "Given a story idea, break it into a sequence of visually DISTINCT, DETAILED shots. "
93
+ "For each shot, provide the objects in the scene, very specific camera placement, angle, subject position, lighting, and background details. "
94
+ "Imagine you're describing frames for a film storyboard, not vague events.\n\n"
95
+ "Return ONLY a JSON array enclosed between <JSON> and </JSON> tags.\n"
96
+ f"Create a storyboard of {n_shots} shots for this idea:\n\n"
97
+ f"'''{user_prompt}'''\n\n"
98
+ "Each item schema:\n"
99
  "{\n"
100
  ' "id": <int starting at 1>,\n'
101
  ' "title": "Short shot title",\n'
102
+ ' "description": "Highly specific visual description for image generation. Include camera angle, framing, time of day, subject position, lighting, mood, and background details.",\n'
103
  f' "duration": {default_len},\n'
104
  f' "fps": {default_fps},\n'
105
  ' "steps": 30,\n'
 
111
 
112
  def _prompt_minimal(user_prompt: str, n_shots: int, default_fps: int, default_len: int) -> str:
113
  return (
114
+ "Reply ONLY with a JSON array starting with '[' and ending with ']'. No extra text.\n"
115
  f"Storyboard: {n_shots} shots for:\n'''{user_prompt}'''\n"
116
  "Item schema:\n"
117
  "{\n"
 
131
  return tok.apply_chat_template(
132
  [{"role": "system", "content": system_msg},
133
  {"role": "user", "content": user_msg}],
134
+ tokenize=False,
135
+ add_generation_prompt=True
136
  )
137
  return system_msg + "\n\n" + user_msg
138
 
 
140
  inputs = tok(prompt_text, return_tensors="pt")
141
  inputs = {k: v.to(model.device) for k, v in inputs.items()}
142
  eos_id = tok.eos_token_id or tok.pad_token_id
143
+
144
  gen = model.generate(
145
+ **inputs,
146
+ max_new_tokens=HF_TASK_MAX_TOKENS,
147
+ do_sample=False,
148
+ temperature=0.0,
149
+ repetition_penalty=1.05,
150
+ eos_token_id=eos_id,
151
+ pad_token_id=eos_id,
152
  )
153
  prompt_len = inputs["input_ids"].shape[1]
154
  continuation_ids = gen[0][prompt_len:]
155
  text = tok.decode(continuation_ids, skip_special_tokens=True).strip()
156
  if text.startswith("```"):
157
+ text = re.sub(r"^```(?:json)?\s*|\s*```$", "", text, flags=re.IGNORECASE | re.DOTALL).strip()
158
  return text
159
 
160
  def _extract_json_array(text: str) -> str:
161
+ m = re.search(r"<JSON>(.*?)</JSON>", text, flags=re.DOTALL | re.IGNORECASE)
162
+ if m:
163
+ inner = m.group(1).strip()
164
+ if inner:
165
+ return inner
166
  start = text.find("[")
167
+ if start == -1:
168
+ return ""
169
+ depth = 0
170
+ in_str = False
171
+ prev = ""
172
  for i in range(start, len(text)):
173
  ch = text[i]
174
+ if ch == '"' and prev != '\\':
175
+ in_str = not in_str
176
  if not in_str:
177
+ if ch == "[":
178
+ depth += 1
179
  elif ch == "]":
180
  depth -= 1
181
+ if depth == 0:
182
+ return text[start:i+1].strip()
183
  prev = ch
184
  return ""
185
 
 
203
  def generate_storyboard_with_llm(user_prompt: str, n_shots: int, default_fps: int, default_len: int):
204
  model, tok = _lazy_model_tok()
205
  system = "You are a film previsualization assistant. Output must be valid JSON."
206
+
207
  p1 = _apply_chat(tok, system + " Return ONLY JSON inside <JSON> tags.",
208
  _prompt_with_tags(user_prompt, n_shots, default_fps, default_len))
209
  out1 = _generate_text(model, tok, p1)
 
215
  out2 = _generate_text(model, tok, p2)
216
  json_text = _extract_json_array(out2)
217
  if not json_text and "[" in out2 and "]" in out2:
218
+ start = out2.find("["); end = out2.rfind("]")
219
+ if start != -1 and end != -1 and end > start:
220
+ json_text = out2[start:end+1].strip()
221
+
222
+ if not json_text or not json_text.strip():
223
+ fallback = []
224
+ for i in range(1, int(n_shots) + 1):
225
+ fallback.append({
226
+ "id": i,
227
+ "title": f"Shot {i}",
228
+ "description": f"Simple placeholder for: {user_prompt[:80]}",
229
+ "duration": default_len,
230
+ "fps": default_fps,
231
+ "steps": 30,
232
+ "seed": None,
233
+ "negative": "",
234
+ "image_path": None
235
+ })
236
+ return fallback
237
 
238
  try:
239
  shots_raw = json.loads(json_text)
240
  except Exception:
241
+ json_text_clean = re.sub(r",\s*([\]\}])", r"\1", json_text)
242
+ shots_raw = json.loads(json_text_clean)
243
+
244
  return _normalize_shots(shots_raw, default_fps, default_len)
245
 
246
  # =========================
247
+ # IMAGE GEN — FLUX only (no fallback) + Temporal chaining
248
  # =========================
249
  USE_CUDA = torch.cuda.is_available()
250
  DTYPE = torch.float16 if USE_CUDA else torch.float32
251
+
252
+ # Gated repo; accept access and set HF_TOKEN
253
+ FLUX_MODEL = os.getenv("FLUX_MODEL", "black-forest-labs/FLUX.1-schnell")
254
  HF_TOKEN = os.getenv("HF_TOKEN") or os.getenv("HUGGINGFACE_HUB_TOKEN")
255
 
256
+ # I2V backend for video between frames
257
+ I2V_ENDPOINT = os.getenv(
258
+ "I2V_ENDPOINT",
259
+ "https://moonmath-ai-dev--moonmath-i2v-backend-moonmathinference-run.modal.run"
260
+ )
261
+
262
  _flux_t2i = None
263
  _flux_i2i = None
264
 
 
279
 
280
  def _flux_healthcheck():
281
  if not HF_TOKEN:
282
+ raise RuntimeError(
283
+ "HF_TOKEN is not set. FLUX models are gated; set a Hugging Face READ token "
284
+ "and accept the model terms on the repo page."
285
+ )
286
  _lazy_flux_pipes()
287
 
288
  def _save_keyframe(pid: str, shot_id: int, img: Image.Image) -> str:
289
  pdir = project_dir(pid)
290
  out = os.path.join(pdir, "keyframes", f"shot_{shot_id:02d}.png")
291
+ img.save(out)
292
+ return out
293
 
294
+ # ---- Temporal prompt composer (PRIORITIZE the new shot) ----
295
  def _compose_temporal_prompt(shots: list, idx: int, seconds_forward: int = 5):
296
+ """
297
+ Build a prompt that continues the scene N seconds later,
298
+ prioritizing the NEW shot description (composition/action),
299
+ while keeping only identity/lighting/environment continuity.
300
+ Returns (composed_prompt, composed_negative).
301
+ """
302
  curr = shots[idx]
303
  curr_desc = (curr.get("description") or "").strip()
304
  curr_neg = (curr.get("negative") or "").strip()
305
+
306
+ if idx == 0:
307
+ return curr_desc, curr_neg
308
+
309
+ prev = shots[idx - 1]
310
+ prev_desc = (prev.get("description") or "").strip()
311
+
312
  composed = (
313
  f"Continue the same scene {seconds_forward} seconds later.\n"
314
+ f'PRIORITIZE this new moment and its composition now: "{curr_desc}".\n'
315
+ "Keep continuity ONLY for subject identity, lighting palette, time of day, and general environment style.\n"
316
  f'Previous frame (context only, do not copy its framing): "{prev_desc}".\n'
317
+ f"Avoid replicating the previous composition; allow camera move / subject reposition consistent with {seconds_forward} seconds of natural progression."
318
  ).strip()
319
+
320
+ negative = (
321
+ curr_neg + (
322
+ "; identical composition as previous; exact same framing; rigid pose repeat; freeze frame; "
323
+ "hard scene reset; different subject identity; wildly different art style; unrelated background"
324
+ )
325
+ ).strip("; ")
326
+
327
  return composed, negative
328
 
329
  @spaces.GPU(duration=180)
330
  def generate_keyframe_image(
331
+ pid: str,
332
+ shot_idx: int,
333
+ shots: list,
334
+ t2i_steps: int = 18, # FLUX: 12–22
335
+ i2i_steps: int = 22, # FLUX: 16–26
336
+ i2i_strength: float = 0.90, # more change toward new prompt
337
+ guidance_scale: float = 3.4, # stronger text pull
338
+ width: int = 640,
339
+ height: int = 640,
340
+ seconds_forward: int = 5, # temporal step
341
+ aggressive: bool = False # optional push
342
  ):
343
+ """
344
+ Generate image for shots[shot_idx] using FLUX only.
345
+ - Shot 1: text2img
346
+ - Shot k>1: img2img from previous approved frame + temporal prompt ("N seconds later")
347
+ """
348
  try:
349
  t2i, i2i = _lazy_flux_pipes()
350
  except Exception as e:
351
+ raise gr.Error(
352
+ f"FLUX failed to load: {e}\n"
353
+ "Set FLUX_MODEL (e.g., 'black-forest-labs/FLUX.1-schnell') and ensure HF_TOKEN if required."
354
+ )
355
 
356
+ # Build temporal prompt
357
+ composed_prompt, composed_negative = _compose_temporal_prompt(shots, shot_idx, seconds_forward=seconds_forward)
358
 
359
+ # RNG / seed
360
  seed = shots[shot_idx].get("seed", None)
361
  device = "cuda" if USE_CUDA else "cpu"
362
  gen = torch.Generator(device)
363
+ if isinstance(seed, int):
364
+ gen = gen.manual_seed(int(seed))
365
 
366
+ # sizes
367
+ width = max(256, min(1024, int(width)))
368
  height = max(256, min(1024, int(height)))
369
 
370
+ # chaining
371
  prev_path = shots[shot_idx - 1].get("image_path") if shot_idx > 0 else None
372
  use_prev = bool(shot_idx > 0 and prev_path and os.path.exists(prev_path))
373
 
374
+ # Aggressive mode bumps
375
  if aggressive:
376
  i2i_strength = min(0.98, max(i2i_strength, 0.92))
377
  guidance_scale = max(guidance_scale, 3.6)
378
  i2i_steps = max(i2i_steps, 24)
379
 
380
+ # generate
381
  if not use_prev:
382
  out = t2i(
383
+ prompt=composed_prompt,
384
+ negative_prompt=composed_negative or None,
385
  num_inference_steps=int(max(10, t2i_steps)),
386
  guidance_scale=float(max(2.4, guidance_scale)),
387
+ generator=gen,
388
+ width=width, height=height
389
  ).images[0]
390
  else:
391
+ init_image = Image.open(prev_path).convert("RGB") # previous approved frame (the "init_image")
392
  out = i2i(
393
+ prompt=composed_prompt,
394
+ negative_prompt=composed_negative or None,
395
+ image=init_image,
396
+ strength=float(min(max(i2i_strength, 0.70), 0.98)),
397
  num_inference_steps=int(max(14, i2i_steps)),
398
+ guidance_scale=float(max(2.4, guidance_scale)),
399
+ generator=gen
400
  ).images[0]
401
 
402
+ saved_path = _save_keyframe(pid, int(shots[shot_idx]["id"]), out)
403
+ return saved_path
404
 
405
  # =========================
406
+ # Video stitching helpers (backend per pair + ffmpeg concat)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
407
  # =========================
408
  def _pair_clip_path(pid: str, i: int, j: int) -> str:
409
  return os.path.join(project_dir(pid), "clips", f"pair_{i:02d}_to_{j:02d}.mp4")
 
411
  def _final_stitched_path(pid: str) -> str:
412
  return os.path.join(project_dir(pid), "clips", "final_stitched.mp4")
413
 
414
+ def _call_i2v_backend(img_a_path: str, img_b_path: str, prompt: str, seed: int | None, endpoint: str) -> bytes:
415
+ """
416
+ Calls Modal backend with two images to get a transition clip (mp4 bytes).
417
+ """
418
+ params = {}
419
+ if prompt:
420
+ params["prompt"] = prompt
421
+ if seed is not None:
422
+ params["seed"] = str(int(seed))
423
+
424
+ with open(img_a_path, "rb") as fa, open(img_b_path, "rb") as fb:
425
+ files = {
426
+ "image_bytes": ("start.png", fa, "application/octet-stream"),
427
+ "image_bytes_end": ("end.png", fb, "application/octet-stream"),
428
+ }
429
+ r = requests.post(endpoint, params=params, files=files, headers={"accept": "application/json"})
430
+ if r.status_code != 200:
431
+ raise gr.Error(f"I2V backend error {r.status_code}: {r.text[:400]}")
432
+ return r.content
433
+
434
+ def _build_all_pair_videos_backend(pid: str, shots: list, endpoint: str, prompt: str, seed: int | None) -> list[str]:
435
+ out_paths = []
436
+ for k in range(len(shots) - 1):
437
+ a = shots[k].get("image_path")
438
+ b = shots[k + 1].get("image_path")
439
+ if not (a and b and os.path.exists(a) and os.path.exists(b)):
440
+ continue
441
+ mp4_bytes = _call_i2v_backend(a, b, prompt=prompt, seed=seed, endpoint=endpoint)
442
+ outp = _pair_clip_path(pid, shots[k]["id"], shots[k + 1]["id"])
443
+ with open(outp, "wb") as f:
444
+ f.write(mp4_bytes)
445
+ out_paths.append(outp)
446
+ return out_paths
447
+
448
+ def _ffmpeg_concat_videos(mp4_paths: list[str], out_path: str) -> None:
449
+ if not mp4_paths:
450
+ raise gr.Error("No clips to concatenate.")
451
+
452
+ # Create a concat list file
453
+ list_txt = tempfile.NamedTemporaryFile("w", delete=False, suffix=".txt")
454
+ try:
455
+ for p in mp4_paths:
456
+ if not os.path.exists(p):
457
+ raise gr.Error(f"Missing clip: {p}")
458
+ list_txt.write(f"file '{p}'\n")
459
+ list_txt.flush(); list_txt.close()
460
+
461
+ ffmpeg = imageio_ffmpeg.get_ffmpeg_exe()
462
+
463
+ # Try stream copy (fast)
464
+ cmd_copy = f"{shlex.quote(ffmpeg)} -y -f concat -safe 0 -i {shlex.quote(list_txt.name)} -c copy {shlex.quote(out_path)}"
465
+ rc = subprocess.call(cmd_copy, shell=True)
466
+ if rc == 0 and os.path.exists(out_path) and os.path.getsize(out_path) > 0:
467
+ return
468
+
469
+ # Fallback re-encode
470
+ cmd_reenc = f"{shlex.quote(ffmpeg)} -y -f concat -safe 0 -i {shlex.quote(list_txt.name)} -c:v libx264 -pix_fmt yuv420p -preset medium -crf 18 -an {shlex.quote(out_path)}"
471
+ rc2 = subprocess.call(cmd_reenc, shell=True)
472
+ if rc2 != 0 or not os.path.exists(out_path) or os.path.getsize(out_path) == 0:
473
+ raise gr.Error("ffmpeg concat failed (copy and re-encode).")
474
+ finally:
475
+ try: os.unlink(list_txt.name)
476
+ except: pass
477
 
478
  # =========================
479
  # Shots <-> DataFrame utils
 
506
  with gr.Blocks() as demo:
507
  gr.Markdown("# 🎬 Storyboard → Keyframes → Videos → Export")
508
  gr.Markdown(
509
+ "Edit storyboard prompts, then generate keyframes.\n"
510
+ "**Temporal chaining**: each new shot is generated N seconds later from the previous approved frame, "
511
  "while the current shot description drives composition & action. **Model**: FLUX-only."
512
  )
513
 
514
+ # State
515
  project = gr.State(None)
516
  current_idx = gr.State(0)
517
 
518
+ # Header
519
  with gr.Row():
520
  with gr.Column(scale=2):
521
  proj_name = gr.Textbox(label="Project name", placeholder="e.g., Desert Chase")
 
528
  load_btn = gr.Button("Load")
529
  sb_status = gr.Markdown("")
530
 
531
+ # Tabs
532
  with gr.Tabs():
533
  with gr.Tab("Storyboard"):
534
  gr.Markdown("### 1) Storyboard")
535
+ sb_prompt = gr.Textbox(label="High-level prompt", lines=4, placeholder="Describe the story you want to create…")
536
  with gr.Row():
537
  sb_target_shots = gr.Slider(1, 12, value=3, step=1, label="Target # of shots")
538
  sb_default_fps = gr.Slider(8, 60, value=24, step=1, label="Default FPS")
539
+ sb_default_len = gr.Slider(1, 12, value=4, step=1, label="Default seconds per shot")
540
+ propose_btn = gr.Button("Propose Storyboard (LLM on ZeroGPU)")
541
  shots_df = gr.Dataframe(
542
  headers=SHOT_COLUMNS,
543
  datatype=["number","str","str","number","number","number","number","str","str"],
544
  row_count=(1,"dynamic"), col_count=len(SHOT_COLUMNS),
545
+ label="Edit shots below (prompts & params)", wrap=True
546
  )
547
  save_edits_btn = gr.Button("Save Edits ✓", variant="primary", interactive=False)
548
  with gr.Row():
 
552
  with gr.Tab("Keyframes"):
553
  gr.Markdown("### 2) Keyframes")
554
  shot_info_md = gr.Markdown("")
555
+ prompt_box = gr.Textbox(label="Shot description (editable before generating)", lines=4)
556
  with gr.Row():
557
  gen_btn = gr.Button("Generate / Regenerate", variant="primary")
558
  approve_next_btn = gr.Button("Approve & Next →", variant="secondary")
559
+
560
  with gr.Row():
561
  img_strength = gr.Slider(0.50, 0.98, value=0.90, step=0.02, label="Change vs Consistency (img2img strength)")
562
  img_steps = gr.Slider(12, 28, value=22, step=1, label="Inference Steps (img2img)")
563
  guidance = gr.Slider(2.4, 4.0, value=3.4, step=0.1, label="Guidance Scale")
564
  temporal_secs = gr.Slider(1, 10, value=5, step=1, label="Temporal step (seconds later)")
565
  aggressive_follow = gr.Checkbox(value=False, label="Aggressive follow prompt (more change)")
566
+
567
  with gr.Row():
568
  prev_img = gr.Image(label="Previous approved image (conditioning)", type="filepath")
569
  out_img = gr.Image(label="Generated image", type="filepath")
 
572
  with gr.Tab("Videos"):
573
  gr.Markdown("### 3) Videos")
574
  with gr.Row():
575
+ v_fps = gr.Slider(8, 60, value=24, step=1, label="FPS (display only)")
576
+ v_hold = gr.Slider(0.0, 2.0, value=0.5, step=0.1, label="Hold per still (UI only)")
577
+ v_xfade = gr.Slider(0.0, 2.0, value=0.7, step=0.1, label="Crossfade (UI only)")
578
  with gr.Row():
579
  build_pairs_btn = gr.Button("Build pair clips (A→B, B→C, ...)", variant="primary")
580
  build_final_btn = gr.Button("Build final stitched video", variant="secondary")
 
593
 
594
  def on_propose(p, prompt, target_shots, fps, vlen):
595
  p = ensure_project(p, suggested_name=(proj_name.value if hasattr(proj_name, "value") else "Project"))
596
+ if not prompt or not str(prompt).strip():
597
  raise gr.Error("Please enter a high-level prompt.")
598
  shots = generate_storyboard_with_llm(str(prompt).strip(), int(target_shots), int(fps), int(vlen))
599
+ p = dict(p)
600
+ p["shots"] = shots
601
+ p["meta"]["updated"] = now_iso()
602
+ save_project(p)
603
  return p, shots_to_df(shots), gr.update(value="Storyboard generated (editable)."), gr.update(interactive=True)
604
 
605
+ propose_btn.click(
606
+ on_propose,
607
  inputs=[project, sb_prompt, sb_target_shots, sb_default_fps, sb_default_len],
608
  outputs=[project, shots_df, sb_status, save_edits_btn]
609
  )
610
 
611
  def on_save_edits(p, df):
612
+ if p is None:
613
+ raise gr.Error("No project in memory. Click New Project, then generate a storyboard.")
614
+ if df is None:
615
+ raise gr.Error("No storyboard table to save. Generate a storyboard first, then edit it.")
616
  shots = df_to_shots(df)
617
+ p = dict(p)
618
+ p["shots"] = shots
619
+ p["meta"]["updated"] = now_iso()
620
+ save_project(p)
621
  return p, gr.update(value="Edits saved.")
622
 
623
  save_edits_btn.click(on_save_edits, inputs=[project, shots_df], outputs=[project, sb_status])
 
626
  if p is None: raise gr.Error("No project.")
627
  shots = df_to_shots(df)
628
  if not shots: raise gr.Error("Storyboard is empty.")
629
+
630
+ # lock a single seed for the project:
631
  proj_seed = None
632
+ if proj_seed_override not in [None, ""] and str(proj_seed_override).isdigit():
633
+ proj_seed = int(proj_seed_override)
634
+ if proj_seed is None:
635
+ proj_seed = p.get("meta", {}).get("seed", None)
636
  if proj_seed is None:
637
  for s in shots:
638
+ if isinstance(s.get("seed"), int):
639
+ proj_seed = int(s["seed"])
640
+ break
641
+ if proj_seed is None:
642
+ proj_seed = int(torch.randint(0, 2**31 - 1, (1,)).item())
643
+
644
  for s in shots:
645
+ if not isinstance(s.get("seed"), int):
646
+ s["seed"] = proj_seed
647
+
648
+ p = dict(p)
649
+ p["shots"] = shots
650
+ p["meta"]["seed"] = proj_seed
651
+ p["meta"]["updated"] = now_iso()
652
+ save_project(p)
653
+
654
+ idx = 0
655
+ prev_path = None
656
+ info = (
657
+ f"**Shot {shots[idx]['id']} — {shots[idx]['title']}** \n"
658
+ f"Duration: {shots[idx]['duration']}s @ {shots[idx]['fps']} fps \n"
659
+ f"Locked project seed: `{proj_seed}`"
660
+ )
661
+ return p, 0, gr.update(value=info), gr.update(value=shots[idx]["description"]), gr.update(value=prev_path), gr.update(value=None), gr.update(value=f"Ready to generate shot 1."), gr.update(value=proj_seed)
662
+
663
+ to_keyframes_btn.click(
664
+ on_start_keyframes,
665
  inputs=[project, shots_df, proj_seed_box],
666
  outputs=[project, current_idx, shot_info_md, prompt_box, prev_img, out_img, kf_status, proj_seed_box]
667
  )
 
670
  if p is None: raise gr.Error("No project.")
671
  shots = p["shots"]
672
  if idx < 0 or idx >= len(shots): raise gr.Error("Invalid shot index.")
673
+ shots[idx]["description"] = current_prompt # allow tweaking
674
+
675
  img_path = generate_keyframe_image(
676
+ p["meta"]["id"],
677
+ int(idx),
678
+ shots,
679
+ t2i_steps=18,
680
+ i2i_steps=int(i2i_steps_val),
681
  i2i_strength=float(i2i_strength_val),
682
  guidance_scale=float(guidance_val),
683
+ width=640,
684
+ height=640,
685
  seconds_forward=int(seconds_forward_val),
686
  aggressive=bool(aggressive_val)
687
  )
688
  prev_path = shots[idx-1]["image_path"] if idx > 0 else None
689
  return img_path, (prev_path or None), gr.update(value=f"Generated candidate for shot {shots[idx]['id']}.")
690
 
691
+ gen_btn.click(
692
+ on_generate_img,
693
  inputs=[project, current_idx, prompt_box, img_strength, img_steps, guidance, temporal_secs, aggressive_follow],
694
  outputs=[out_img, prev_img, kf_status]
695
  )
696
 
697
  def on_approve_next(p, idx, current_prompt, latest_img_path):
698
  if p is None: raise gr.Error("No project.")
699
+ shots = p["shots"]
700
+ i = int(idx)
701
  if i < 0 or i >= len(shots): raise gr.Error("Invalid shot index.")
702
  if not latest_img_path: raise gr.Error("Generate an image first.")
703
+
704
+ # commit
705
  shots[i]["description"] = current_prompt
706
  shots[i]["image_path"] = latest_img_path
707
+ p["shots"] = shots
708
+ p["meta"]["updated"] = now_iso()
709
+ save_project(p)
710
+
711
+ # next
712
  if i + 1 < len(shots):
713
  ni = i + 1
714
+ info = (
715
+ f"**Shot {shots[ni]['id']} {shots[ni]['title']}** \n"
716
+ f"Duration: {shots[ni]['duration']}s @ {shots[ni]['fps']} fps \n"
717
+ f"Locked project seed: `{p['meta'].get('seed')}`"
718
+ )
719
  prev_path = shots[ni-1]["image_path"]
720
  return p, ni, gr.update(value=info), gr.update(value=shots[ni]["description"]), gr.update(value=prev_path), gr.update(value=None), gr.update(value=f"Approved shot {shots[i]['id']}. On to shot {shots[ni]['id']}.")
721
  else:
722
  return p, i, gr.update(value="**All keyframes approved.** Proceed to Videos tab."), gr.update(value=""), gr.update(value=shots[i]["image_path"]), gr.update(value=None), gr.update(value="All shots approved ✅")
723
 
724
+ approve_next_btn.click(on_approve_next, inputs=[project, current_idx, prompt_box, out_img], outputs=[project, current_idx, shot_info_md, prompt_box, prev_img, out_img, kf_status])
 
 
 
725
 
726
+ # ---- Videos tab handlers (backend + ffmpeg)
727
  def on_build_pairs(p, fps, hold, xfade):
728
+ if p is None:
729
+ raise gr.Error("No project.")
730
  shots = p.get("shots", [])
731
+ if len(shots) < 2:
732
+ raise gr.Error("Need at least 2 approved images to build pair clips.")
733
+ if not any(s.get("image_path") for s in shots):
734
+ raise gr.Error("No approved images yet. Approve keyframes first.")
735
+
736
+ seed = p.get("meta", {}).get("seed", None)
737
+ titles = " -> ".join([s.get("title") or f"Shot {s.get('id')}" for s in shots])
738
+ context_prompt = f"Transition between consecutive storyboard frames. Sequence: {titles}"
739
+
740
+ pair_paths = _build_all_pair_videos_backend(
741
  p["meta"]["id"], shots,
742
+ endpoint=I2V_ENDPOINT,
743
+ prompt=context_prompt,
744
+ seed=seed
745
  )
746
+ if not pair_paths:
747
+ raise gr.Error("Could not create any pair clips (missing consecutive images).")
748
  return {"pair_clips": pair_paths, "final": None}
749
 
750
+ build_pairs_btn.click(
751
+ on_build_pairs,
752
+ inputs=[project, v_fps, v_hold, v_xfade],
753
+ outputs=[vd_table]
754
+ )
755
 
756
  def on_build_final(p, fps):
757
+ if p is None:
758
+ raise gr.Error("No project.")
759
  pid = p["meta"]["id"]
760
  clips_dir = os.path.join(project_dir(pid), "clips")
761
+ pair_paths = sorted(
762
+ [os.path.join(clips_dir, f) for f in os.listdir(clips_dir) if f.startswith("pair_") and f.endswith(".mp4")]
763
+ )
764
+ if not pair_paths:
765
+ raise gr.Error("No pair clips found. Click 'Build pair clips' first.")
766
  outp = _final_stitched_path(pid)
767
+ _ffmpeg_concat_videos(pair_paths, outp)
768
  return {"pair_clips": pair_paths, "final": outp}
769
 
770
+ build_final_btn.click(
771
+ on_build_final,
772
+ inputs=[project, v_fps],
773
+ outputs=[vd_table]
774
+ )
775
 
 
776
  def on_save(p):
777
+ if p is None:
778
+ raise gr.Error("No project in memory.")
779
+ path = save_project(p)
780
+ return gr.update(value=f"Saved to `{path}`")
781
 
782
  save_btn.click(on_save, inputs=[project], outputs=[sb_status])
783
 
784
  def on_load(file_obj):
785
  p = load_project_file(file_obj)
786
  seed_val = p.get("meta", {}).get("seed", None)
787
+ return (
788
+ p,
789
+ gr.update(value=f"Loaded project `{p['meta']['name']}` (id: `{p['meta']['id']}`)"),
790
+ shots_to_df(p.get("shots", [])),
791
+ gr.update(value=seed_val)
792
+ )
793
 
794
  load_btn.click(on_load, inputs=[load_file], outputs=[project, sb_status, shots_df, proj_seed_box])
795
 
796
  if __name__ == "__main__":
797
+ _flux_healthcheck() # fail fast with clear error if FLUX isn't available
798
  demo.launch()