Shalmoni commited on
Commit
7d6ee20
·
verified ·
1 Parent(s): 05fa186

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +146 -327
app.py CHANGED
@@ -1,5 +1,5 @@
1
- # app.py — FLUX-only with temporal chaining + Aggressive follow + Video stitching
2
- import os, json, uuid, re
3
  from datetime import datetime
4
  import gradio as gr
5
  import spaces
@@ -7,11 +7,6 @@ import torch
7
  from PIL import Image
8
  import pandas as pd
9
 
10
- # MoviePy for stitching
11
- from moviepy.editor import ImageClip, CompositeVideoClip, concatenate_videoclips
12
- from moviepy.video.io.VideoFileClip import VideoFileClip
13
-
14
-
15
  # =========================
16
  # Storage helpers
17
  # =========================
@@ -47,15 +42,14 @@ def ensure_project(p, suggested_name="Project"):
47
  name = f"{suggested_name}-{pid[:4]}"
48
  proj = {
49
  "meta": {"id": pid, "name": name, "created": now_iso(), "updated": now_iso()},
50
- "shots": [], # each shot: id,title,description,duration,fps,steps,seed,negative,image_path
51
  "clips": [],
52
  }
53
  save_project(proj)
54
  return proj
55
 
56
-
57
  # =========================
58
- # LLM (ZeroGPU) — Storyboard generator (robust)
59
  # =========================
60
  from transformers import AutoTokenizer, AutoModelForCausalLM
61
 
@@ -71,37 +65,27 @@ def _lazy_model_tok():
71
  return _model, _tokenizer
72
 
73
  _tokenizer = AutoTokenizer.from_pretrained(STORYBOARD_MODEL, trust_remote_code=True)
74
-
75
  use_cuda = torch.cuda.is_available()
76
- preferred_dtype = torch.float16 if use_cuda else torch.float32
77
-
78
  _model = AutoModelForCausalLM.from_pretrained(
79
- STORYBOARD_MODEL,
80
- device_map="auto",
81
- torch_dtype=preferred_dtype,
82
- trust_remote_code=True,
83
- use_safetensors=True
84
  )
85
-
86
  if _tokenizer.pad_token_id is None and _tokenizer.eos_token_id is not None:
87
  _tokenizer.pad_token_id = _tokenizer.eos_token_id
88
-
89
  return _model, _tokenizer
90
 
91
  def _prompt_with_tags(user_prompt: str, n_shots: int, default_fps: int, default_len: int) -> str:
92
  return (
93
  "You are a cinematographer and storyboard artist. "
94
- "Given a story idea, break it into a sequence of visually DISTINCT, DETAILED shots. "
95
- "For each shot, provide the objects in the scene, very specific camera placement, angle, subject position, lighting, and background details. "
96
- "Imagine you're describing frames for a film storyboard, not vague events.\n\n"
97
- "Return ONLY a JSON array enclosed between <JSON> and </JSON> tags.\n"
98
- f"Create a storyboard of {n_shots} shots for this idea:\n\n"
99
- f"'''{user_prompt}'''\n\n"
100
- "Each item schema:\n"
101
  "{\n"
102
  ' "id": <int starting at 1>,\n'
103
  ' "title": "Short shot title",\n'
104
- ' "description": "Highly specific visual description for image generation. Include camera angle, framing, time of day, subject position, lighting, mood, and background details.",\n'
105
  f' "duration": {default_len},\n'
106
  f' "fps": {default_fps},\n'
107
  ' "steps": 30,\n'
@@ -113,7 +97,7 @@ def _prompt_with_tags(user_prompt: str, n_shots: int, default_fps: int, default_
113
 
114
  def _prompt_minimal(user_prompt: str, n_shots: int, default_fps: int, default_len: int) -> str:
115
  return (
116
- "Reply ONLY with a JSON array starting with '[' and ending with ']'. No extra text.\n"
117
  f"Storyboard: {n_shots} shots for:\n'''{user_prompt}'''\n"
118
  "Item schema:\n"
119
  "{\n"
@@ -133,8 +117,7 @@ def _apply_chat(tok, system_msg: str, user_msg: str) -> str:
133
  return tok.apply_chat_template(
134
  [{"role": "system", "content": system_msg},
135
  {"role": "user", "content": user_msg}],
136
- tokenize=False,
137
- add_generation_prompt=True
138
  )
139
  return system_msg + "\n\n" + user_msg
140
 
@@ -142,46 +125,32 @@ def _generate_text(model, tok, prompt_text: str) -> str:
142
  inputs = tok(prompt_text, return_tensors="pt")
143
  inputs = {k: v.to(model.device) for k, v in inputs.items()}
144
  eos_id = tok.eos_token_id or tok.pad_token_id
145
-
146
  gen = model.generate(
147
- **inputs,
148
- max_new_tokens=HF_TASK_MAX_TOKENS,
149
- do_sample=False,
150
- temperature=0.0,
151
- repetition_penalty=1.05,
152
- eos_token_id=eos_id,
153
- pad_token_id=eos_id,
154
  )
155
  prompt_len = inputs["input_ids"].shape[1]
156
  continuation_ids = gen[0][prompt_len:]
157
  text = tok.decode(continuation_ids, skip_special_tokens=True).strip()
158
  if text.startswith("```"):
159
- text = re.sub(r"^```(?:json)?\s*|\s*```$", "", text, flags=re.IGNORECASE | re.DOTALL).strip()
160
  return text
161
 
162
  def _extract_json_array(text: str) -> str:
163
- m = re.search(r"<JSON>(.*?)</JSON>", text, flags=re.DOTALL | re.IGNORECASE)
164
- if m:
165
- inner = m.group(1).strip()
166
- if inner:
167
- return inner
168
  start = text.find("[")
169
- if start == -1:
170
- return ""
171
- depth = 0
172
- in_str = False
173
- prev = ""
174
  for i in range(start, len(text)):
175
  ch = text[i]
176
- if ch == '"' and prev != '\\':
177
- in_str = not in_str
178
  if not in_str:
179
- if ch == "[":
180
- depth += 1
181
  elif ch == "]":
182
  depth -= 1
183
- if depth == 0:
184
- return text[start:i+1].strip()
185
  prev = ch
186
  return ""
187
 
@@ -205,7 +174,6 @@ def _normalize_shots(shots_raw, default_fps: int, default_len: int):
205
  def generate_storyboard_with_llm(user_prompt: str, n_shots: int, default_fps: int, default_len: int):
206
  model, tok = _lazy_model_tok()
207
  system = "You are a film previsualization assistant. Output must be valid JSON."
208
-
209
  p1 = _apply_chat(tok, system + " Return ONLY JSON inside <JSON> tags.",
210
  _prompt_with_tags(user_prompt, n_shots, default_fps, default_len))
211
  out1 = _generate_text(model, tok, p1)
@@ -217,43 +185,29 @@ def generate_storyboard_with_llm(user_prompt: str, n_shots: int, default_fps: in
217
  out2 = _generate_text(model, tok, p2)
218
  json_text = _extract_json_array(out2)
219
  if not json_text and "[" in out2 and "]" in out2:
220
- start = out2.find("["); end = out2.rfind("]")
221
- if start != -1 and end != -1 and end > start:
222
- json_text = out2[start:end+1].strip()
223
-
224
- if not json_text or not json_text.strip():
225
- fallback = []
226
- for i in range(1, int(n_shots) + 1):
227
- fallback.append({
228
- "id": i,
229
- "title": f"Shot {i}",
230
- "description": f"Simple placeholder for: {user_prompt[:80]}",
231
- "duration": default_len,
232
- "fps": default_fps,
233
- "steps": 30,
234
- "seed": None,
235
- "negative": "",
236
- "image_path": None
237
- })
238
- return fallback
239
 
240
  try:
241
  shots_raw = json.loads(json_text)
242
  except Exception:
243
- json_text_clean = re.sub(r",\s*([\]\}])", r"\1", json_text)
244
- shots_raw = json.loads(json_text_clean)
245
-
246
  return _normalize_shots(shots_raw, default_fps, default_len)
247
 
248
-
249
  # =========================
250
- # IMAGE GEN — FLUX only (no fallback) + Temporal chaining
251
  # =========================
252
  USE_CUDA = torch.cuda.is_available()
253
  DTYPE = torch.float16 if USE_CUDA else torch.float32
254
-
255
- # Correct, gated repo; accept access and set HF_TOKEN
256
- FLUX_MODEL = os.getenv("FLUX_MODEL", "black-forest-labs/FLUX.1-schnell")
257
  HF_TOKEN = os.getenv("HF_TOKEN") or os.getenv("HUGGINGFACE_HUB_TOKEN")
258
 
259
  _flux_t2i = None
@@ -276,134 +230,96 @@ def _lazy_flux_pipes():
276
 
277
  def _flux_healthcheck():
278
  if not HF_TOKEN:
279
- raise RuntimeError(
280
- "HF_TOKEN is not set. FLUX models are gated; set a Hugging Face READ token "
281
- "and accept the model terms on the repo page."
282
- )
283
  _lazy_flux_pipes()
284
 
285
  def _save_keyframe(pid: str, shot_id: int, img: Image.Image) -> str:
286
  pdir = project_dir(pid)
287
  out = os.path.join(pdir, "keyframes", f"shot_{shot_id:02d}.png")
288
- img.save(out)
289
- return out
290
-
291
 
292
- # ---- Temporal prompt composer (PRIORITIZE the new shot) ----
293
  def _compose_temporal_prompt(shots: list, idx: int, seconds_forward: int = 5):
294
- """
295
- Build a prompt that continues the scene N seconds later,
296
- prioritizing the NEW shot description (composition/action),
297
- while keeping only identity/lighting/environment continuity.
298
- Returns (composed_prompt, composed_negative).
299
- """
300
  curr = shots[idx]
301
  curr_desc = (curr.get("description") or "").strip()
302
  curr_neg = (curr.get("negative") or "").strip()
303
-
304
- if idx == 0:
305
- return curr_desc, curr_neg
306
-
307
- prev = shots[idx - 1]
308
- prev_desc = (prev.get("description") or "").strip()
309
-
310
  composed = (
311
  f"Continue the same scene {seconds_forward} seconds later.\n"
312
- f'PRIORITIZE this new moment and its composition now: "{curr_desc}".\n'
313
- "Keep continuity ONLY for subject identity, lighting palette, time of day, and general environment style.\n"
314
  f'Previous frame (context only, do not copy its framing): "{prev_desc}".\n'
315
- f"Avoid replicating the previous composition; allow camera move / subject reposition consistent with {seconds_forward} seconds of natural progression."
316
  ).strip()
317
-
318
- negative = (
319
- curr_neg + (
320
- "; identical composition as previous; exact same framing; rigid pose repeat; freeze frame; "
321
- "hard scene reset; different subject identity; wildly different art style; unrelated background"
322
- )
323
- ).strip("; ")
324
-
325
  return composed, negative
326
 
327
-
328
  @spaces.GPU(duration=180)
329
  def generate_keyframe_image(
330
- pid: str,
331
- shot_idx: int,
332
- shots: list,
333
- t2i_steps: int = 18, # FLUX: 12–22
334
- i2i_steps: int = 22, # FLUX: 16–26
335
- i2i_strength: float = 0.90, # ↑ more change toward new prompt
336
- guidance_scale: float = 3.4, # ↑ stronger text pull
337
- width: int = 640,
338
- height: int = 640,
339
- seconds_forward: int = 5, # temporal step
340
- aggressive: bool = False # optional push
341
  ):
342
- """
343
- Generate image for shots[shot_idx] using FLUX only.
344
- - Shot 1: text2img
345
- - Shot k>1: img2img from previous approved frame + temporal prompt ("N seconds later")
346
- """
347
  try:
348
  t2i, i2i = _lazy_flux_pipes()
349
  except Exception as e:
350
- raise gr.Error(
351
- f"FLUX failed to load: {e}\n"
352
- "Set FLUX_MODEL (e.g., 'black-forest-labs/FLUX.1-schnell') and ensure HF_TOKEN if required."
353
- )
354
 
355
- # Build temporal prompt
356
- composed_prompt, composed_negative = _compose_temporal_prompt(shots, shot_idx, seconds_forward=seconds_forward)
357
 
358
- # RNG / seed
359
  seed = shots[shot_idx].get("seed", None)
360
  device = "cuda" if USE_CUDA else "cpu"
361
  gen = torch.Generator(device)
362
- if isinstance(seed, int):
363
- gen = gen.manual_seed(int(seed))
364
 
365
- # sizes
366
- width = max(256, min(1024, int(width)))
367
  height = max(256, min(1024, int(height)))
368
 
369
- # chaining
370
  prev_path = shots[shot_idx - 1].get("image_path") if shot_idx > 0 else None
371
  use_prev = bool(shot_idx > 0 and prev_path and os.path.exists(prev_path))
372
 
373
- # Aggressive mode bumps
374
  if aggressive:
375
  i2i_strength = min(0.98, max(i2i_strength, 0.92))
376
  guidance_scale = max(guidance_scale, 3.6)
377
  i2i_steps = max(i2i_steps, 24)
378
 
379
- # generate
380
  if not use_prev:
381
  out = t2i(
382
- prompt=composed_prompt,
383
- negative_prompt=composed_negative or None,
384
  num_inference_steps=int(max(10, t2i_steps)),
385
  guidance_scale=float(max(2.4, guidance_scale)),
386
- generator=gen,
387
- width=width, height=height
388
  ).images[0]
389
  else:
390
- init_image = Image.open(prev_path).convert("RGB") # previous approved frame (the "init_image")
391
  out = i2i(
392
- prompt=composed_prompt,
393
- negative_prompt=composed_negative or None,
394
- image=init_image,
395
- strength=float(min(max(i2i_strength, 0.70), 0.98)),
396
  num_inference_steps=int(max(14, i2i_steps)),
397
- guidance_scale=float(max(2.4, guidance_scale)),
398
- generator=gen
399
  ).images[0]
400
 
401
- saved_path = _save_keyframe(pid, int(shots[shot_idx]["id"]), out)
402
- return saved_path
403
 
 
 
 
 
 
 
 
 
 
 
 
 
 
404
 
405
  # =========================
406
- # Video stitching helpers (pairwise dissolve + final concat)
407
  # =========================
408
  def _pair_clip_path(pid: str, i: int, j: int) -> str:
409
  return os.path.join(project_dir(pid), "clips", f"pair_{i:02d}_to_{j:02d}.mp4")
@@ -416,35 +332,18 @@ def _image_size(path: str):
416
  return im.width, im.height
417
 
418
  def _build_pair_clip(img_a: str, img_b: str, out_path: str, fps: int = 24, hold: float = 0.5, crossfade: float = 0.7, resize_to=None):
419
- """
420
- Create a dissolve transition from img_a -> img_b:
421
- - show img_a for `hold` seconds
422
- - dissolve for `crossfade` seconds into img_b
423
- - hold img_b for `hold` seconds
424
- """
425
  ca = ImageClip(img_a).set_duration(hold + crossfade)
426
  cb = ImageClip(img_b).set_duration(hold + crossfade).set_start(hold)
427
-
428
  if resize_to:
429
  ca = ca.resize(newsize=resize_to)
430
  cb = cb.resize(newsize=resize_to)
431
-
432
  ca_x = ca.crossfadeout(crossfade)
433
  cb_x = cb.crossfadein(crossfade)
434
-
435
  total = hold + crossfade + hold
436
  comp = CompositeVideoClip([ca_x, cb_x]).set_duration(total)
437
-
438
- comp.write_videofile(
439
- out_path,
440
- fps=fps,
441
- codec="libx264",
442
- audio=False,
443
- preset="medium",
444
- threads=os.cpu_count() or 2,
445
- verbose=False,
446
- logger=None
447
- )
448
  comp.close(); ca.close(); cb.close()
449
 
450
  def _build_all_pair_clips(pid: str, shots: list, fps: int = 24, hold: float = 0.5, crossfade: float = 0.7, force_size=None):
@@ -460,37 +359,23 @@ def _build_all_pair_clips(pid: str, shots: list, fps: int = 24, hold: float = 0.
460
  for i in range(len(shots)-1):
461
  a = shots[i].get("image_path")
462
  b = shots[i+1].get("image_path")
463
- if not (a and b and os.path.exists(a) and os.path.exists(b)):
464
- continue
465
  outp = _pair_clip_path(pid, shots[i]["id"], shots[i+1]["id"])
466
  _build_pair_clip(a, b, outp, fps=fps, hold=hold, crossfade=crossfade, resize_to=size)
467
  paths.append(outp)
468
  return paths
469
 
470
  def _build_final_stitched_from_pairs(pair_paths: list, out_path: str, fps: int = 24):
471
- if not pair_paths:
472
- raise RuntimeError("No pair clips to stitch.")
473
- clips = []
474
- for p in pair_paths:
475
- if os.path.exists(p):
476
- clips.append(VideoFileClip(p))
477
- if not clips:
478
- raise RuntimeError("No readable pair clips on disk.")
479
  final = concatenate_videoclips(clips, method="compose")
480
- final.write_videofile(
481
- out_path,
482
- fps=fps,
483
- codec="libx264",
484
- audio=False,
485
- preset="medium",
486
- threads=os.cpu_count() or 2,
487
- verbose=False,
488
- logger=None
489
- )
490
  final.close()
491
  for c in clips: c.close()
492
 
493
-
494
  # =========================
495
  # Shots <-> DataFrame utils
496
  # =========================
@@ -516,23 +401,19 @@ def df_to_shots(df: pd.DataFrame) -> list:
516
  })
517
  return sorted(out, key=lambda x: x["id"])
518
 
519
-
520
  # =========================
521
  # Gradio UI
522
  # =========================
523
  with gr.Blocks() as demo:
524
  gr.Markdown("# 🎬 Storyboard → Keyframes → Videos → Export")
525
  gr.Markdown(
526
- "Edit storyboard prompts, then generate keyframes.\n"
527
- "**Temporal chaining**: each new shot is generated N seconds later from the previous approved frame, "
528
  "while the current shot description drives composition & action. **Model**: FLUX-only."
529
  )
530
 
531
- # State
532
  project = gr.State(None)
533
  current_idx = gr.State(0)
534
 
535
- # Header
536
  with gr.Row():
537
  with gr.Column(scale=2):
538
  proj_name = gr.Textbox(label="Project name", placeholder="e.g., Desert Chase")
@@ -545,21 +426,20 @@ with gr.Blocks() as demo:
545
  load_btn = gr.Button("Load")
546
  sb_status = gr.Markdown("")
547
 
548
- # Tabs
549
  with gr.Tabs():
550
  with gr.Tab("Storyboard"):
551
  gr.Markdown("### 1) Storyboard")
552
- sb_prompt = gr.Textbox(label="High-level prompt", lines=4, placeholder="Describe the story you want to create…")
553
  with gr.Row():
554
  sb_target_shots = gr.Slider(1, 12, value=3, step=1, label="Target # of shots")
555
  sb_default_fps = gr.Slider(8, 60, value=24, step=1, label="Default FPS")
556
- sb_default_len = gr.Slider(1, 12, value=4, step=1, label="Default seconds per shot")
557
- propose_btn = gr.Button("Propose Storyboard (LLM on ZeroGPU)")
558
  shots_df = gr.Dataframe(
559
  headers=SHOT_COLUMNS,
560
  datatype=["number","str","str","number","number","number","number","str","str"],
561
  row_count=(1,"dynamic"), col_count=len(SHOT_COLUMNS),
562
- label="Edit shots below (prompts & params)", wrap=True
563
  )
564
  save_edits_btn = gr.Button("Save Edits ✓", variant="primary", interactive=False)
565
  with gr.Row():
@@ -569,18 +449,16 @@ with gr.Blocks() as demo:
569
  with gr.Tab("Keyframes"):
570
  gr.Markdown("### 2) Keyframes")
571
  shot_info_md = gr.Markdown("")
572
- prompt_box = gr.Textbox(label="Shot description (editable before generating)", lines=4)
573
  with gr.Row():
574
  gen_btn = gr.Button("Generate / Regenerate", variant="primary")
575
  approve_next_btn = gr.Button("Approve & Next →", variant="secondary")
576
-
577
  with gr.Row():
578
  img_strength = gr.Slider(0.50, 0.98, value=0.90, step=0.02, label="Change vs Consistency (img2img strength)")
579
  img_steps = gr.Slider(12, 28, value=22, step=1, label="Inference Steps (img2img)")
580
  guidance = gr.Slider(2.4, 4.0, value=3.4, step=0.1, label="Guidance Scale")
581
  temporal_secs = gr.Slider(1, 10, value=5, step=1, label="Temporal step (seconds later)")
582
  aggressive_follow = gr.Checkbox(value=False, label="Aggressive follow prompt (more change)")
583
-
584
  with gr.Row():
585
  prev_img = gr.Image(label="Previous approved image (conditioning)", type="filepath")
586
  out_img = gr.Image(label="Generated image", type="filepath")
@@ -610,31 +488,22 @@ with gr.Blocks() as demo:
610
 
611
  def on_propose(p, prompt, target_shots, fps, vlen):
612
  p = ensure_project(p, suggested_name=(proj_name.value if hasattr(proj_name, "value") else "Project"))
613
- if not prompt or not str(prompt).strip():
614
  raise gr.Error("Please enter a high-level prompt.")
615
  shots = generate_storyboard_with_llm(str(prompt).strip(), int(target_shots), int(fps), int(vlen))
616
- p = dict(p)
617
- p["shots"] = shots
618
- p["meta"]["updated"] = now_iso()
619
- save_project(p)
620
  return p, shots_to_df(shots), gr.update(value="Storyboard generated (editable)."), gr.update(interactive=True)
621
 
622
- propose_btn.click(
623
- on_propose,
624
  inputs=[project, sb_prompt, sb_target_shots, sb_default_fps, sb_default_len],
625
  outputs=[project, shots_df, sb_status, save_edits_btn]
626
  )
627
 
628
  def on_save_edits(p, df):
629
- if p is None:
630
- raise gr.Error("No project in memory. Click New Project, then generate a storyboard.")
631
- if df is None:
632
- raise gr.Error("No storyboard table to save. Generate a storyboard first, then edit it.")
633
  shots = df_to_shots(df)
634
- p = dict(p)
635
- p["shots"] = shots
636
- p["meta"]["updated"] = now_iso()
637
- save_project(p)
638
  return p, gr.update(value="Edits saved.")
639
 
640
  save_edits_btn.click(on_save_edits, inputs=[project, shots_df], outputs=[project, sb_status])
@@ -643,42 +512,23 @@ with gr.Blocks() as demo:
643
  if p is None: raise gr.Error("No project.")
644
  shots = df_to_shots(df)
645
  if not shots: raise gr.Error("Storyboard is empty.")
646
-
647
- # lock a single seed for the project:
648
  proj_seed = None
649
- if proj_seed_override not in [None, ""] and str(proj_seed_override).isdigit():
650
- proj_seed = int(proj_seed_override)
651
- if proj_seed is None:
652
- proj_seed = p.get("meta", {}).get("seed", None)
653
  if proj_seed is None:
654
  for s in shots:
655
- if isinstance(s.get("seed"), int):
656
- proj_seed = int(s["seed"])
657
- break
658
- if proj_seed is None:
659
- proj_seed = int(torch.randint(0, 2**31 - 1, (1,)).item())
660
-
661
  for s in shots:
662
- if not isinstance(s.get("seed"), int):
663
- s["seed"] = proj_seed
664
-
665
- p = dict(p)
666
- p["shots"] = shots
667
- p["meta"]["seed"] = proj_seed
668
- p["meta"]["updated"] = now_iso()
669
- save_project(p)
670
-
671
- idx = 0
672
- prev_path = None
673
- info = (
674
- f"**Shot {shots[idx]['id']} — {shots[idx]['title']}** \n"
675
- f"Duration: {shots[idx]['duration']}s @ {shots[idx]['fps']} fps \n"
676
- f"Locked project seed: `{proj_seed}`"
677
- )
678
- return p, 0, gr.update(value=info), gr.update(value=shots[idx]["description"]), gr.update(value=prev_path), gr.update(value=None), gr.update(value=f"Ready to generate shot 1."), gr.update(value=proj_seed)
679
-
680
- to_keyframes_btn.click(
681
- on_start_keyframes,
682
  inputs=[project, shots_df, proj_seed_box],
683
  outputs=[project, current_idx, shot_info_md, prompt_box, prev_img, out_img, kf_status, proj_seed_box]
684
  )
@@ -687,124 +537,93 @@ with gr.Blocks() as demo:
687
  if p is None: raise gr.Error("No project.")
688
  shots = p["shots"]
689
  if idx < 0 or idx >= len(shots): raise gr.Error("Invalid shot index.")
690
- shots[idx]["description"] = current_prompt # allow tweaking
691
-
692
  img_path = generate_keyframe_image(
693
- p["meta"]["id"],
694
- int(idx),
695
- shots,
696
- t2i_steps=18,
697
- i2i_steps=int(i2i_steps_val),
698
  i2i_strength=float(i2i_strength_val),
699
  guidance_scale=float(guidance_val),
700
- width=640,
701
- height=640,
702
  seconds_forward=int(seconds_forward_val),
703
  aggressive=bool(aggressive_val)
704
  )
705
  prev_path = shots[idx-1]["image_path"] if idx > 0 else None
706
  return img_path, (prev_path or None), gr.update(value=f"Generated candidate for shot {shots[idx]['id']}.")
707
 
708
- gen_btn.click(
709
- on_generate_img,
710
  inputs=[project, current_idx, prompt_box, img_strength, img_steps, guidance, temporal_secs, aggressive_follow],
711
  outputs=[out_img, prev_img, kf_status]
712
  )
713
 
714
  def on_approve_next(p, idx, current_prompt, latest_img_path):
715
  if p is None: raise gr.Error("No project.")
716
- shots = p["shots"]
717
- i = int(idx)
718
  if i < 0 or i >= len(shots): raise gr.Error("Invalid shot index.")
719
  if not latest_img_path: raise gr.Error("Generate an image first.")
720
-
721
- # commit
722
  shots[i]["description"] = current_prompt
723
  shots[i]["image_path"] = latest_img_path
724
- p["shots"] = shots
725
- p["meta"]["updated"] = now_iso()
726
- save_project(p)
727
-
728
- # next
729
  if i + 1 < len(shots):
730
  ni = i + 1
731
- info = (
732
- f"**Shot {shots[ni]['id']} {shots[ni]['title']}** \n"
733
- f"Duration: {shots[ni]['duration']}s @ {shots[ni]['fps']} fps \n"
734
- f"Locked project seed: `{p['meta'].get('seed')}`"
735
- )
736
  prev_path = shots[ni-1]["image_path"]
737
  return p, ni, gr.update(value=info), gr.update(value=shots[ni]["description"]), gr.update(value=prev_path), gr.update(value=None), gr.update(value=f"Approved shot {shots[i]['id']}. On to shot {shots[ni]['id']}.")
738
  else:
739
  return p, i, gr.update(value="**All keyframes approved.** Proceed to Videos tab."), gr.update(value=""), gr.update(value=shots[i]["image_path"]), gr.update(value=None), gr.update(value="All shots approved ✅")
740
 
741
- approve_next_btn.click(on_approve_next, inputs=[project, current_idx, prompt_box, out_img], outputs=[project, current_idx, shot_info_md, prompt_box, prev_img, out_img, kf_status])
 
 
 
742
 
743
- # ---- Videos tab handlers
744
  def on_build_pairs(p, fps, hold, xfade):
745
- if p is None:
746
- raise gr.Error("No project.")
747
  shots = p.get("shots", [])
748
- if len(shots) < 2:
749
- raise gr.Error("Need at least 2 approved images to build pair clips.")
750
- if not any(s.get("image_path") for s in shots):
751
- raise gr.Error("No approved images yet. Approve keyframes first.")
752
-
753
  pair_paths = _build_all_pair_clips(
754
  p["meta"]["id"], shots,
755
  fps=int(fps), hold=float(hold), crossfade=float(xfade),
756
- force_size=None # or (640, 640) to force uniform size
757
  )
758
- if not pair_paths:
759
- raise gr.Error("Could not find any consecutive pairs with images.")
760
  return {"pair_clips": pair_paths, "final": None}
761
 
762
- build_pairs_btn.click(
763
- on_build_pairs,
764
- inputs=[project, v_fps, v_hold, v_xfade],
765
- outputs=[vd_table]
766
- )
767
 
768
  def on_build_final(p, fps):
769
- if p is None:
770
- raise gr.Error("No project.")
771
  pid = p["meta"]["id"]
772
  clips_dir = os.path.join(project_dir(pid), "clips")
773
- pair_paths = sorted(
774
- [os.path.join(clips_dir, f) for f in os.listdir(clips_dir) if f.startswith("pair_") and f.endswith(".mp4")]
775
- )
776
- if not pair_paths:
777
- raise gr.Error("No pair clips found. Click 'Build pair clips' first.")
778
  outp = _final_stitched_path(pid)
779
  _build_final_stitched_from_pairs(pair_paths, outp, fps=int(fps))
780
  return {"pair_clips": pair_paths, "final": outp}
781
 
782
- build_final_btn.click(
783
- on_build_final,
784
- inputs=[project, v_fps],
785
- outputs=[vd_table]
786
- )
787
 
 
788
  def on_save(p):
789
- if p is None:
790
- raise gr.Error("No project in memory.")
791
- path = save_project(p)
792
- return gr.update(value=f"Saved to `{path}`")
793
 
794
  save_btn.click(on_save, inputs=[project], outputs=[sb_status])
795
 
796
  def on_load(file_obj):
797
  p = load_project_file(file_obj)
798
  seed_val = p.get("meta", {}).get("seed", None)
799
- return (
800
- p,
801
- gr.update(value=f"Loaded project `{p['meta']['name']}` (id: `{p['meta']['id']}`)"),
802
- shots_to_df(p.get("shots", [])),
803
- gr.update(value=seed_val)
804
- )
805
 
806
  load_btn.click(on_load, inputs=[load_file], outputs=[project, sb_status, shots_df, proj_seed_box])
807
 
808
  if __name__ == "__main__":
809
- _flux_healthcheck() # fail fast with clear error if FLUX isn't available
810
  demo.launch()
 
1
+ # app.py — FLUX-only with temporal chaining + Aggressive follow + Video stitching (lazy MoviePy)
2
+ import os, json, uuid, re, sys, subprocess
3
  from datetime import datetime
4
  import gradio as gr
5
  import spaces
 
7
  from PIL import Image
8
  import pandas as pd
9
 
 
 
 
 
 
10
  # =========================
11
  # Storage helpers
12
  # =========================
 
42
  name = f"{suggested_name}-{pid[:4]}"
43
  proj = {
44
  "meta": {"id": pid, "name": name, "created": now_iso(), "updated": now_iso()},
45
+ "shots": [], # id,title,description,duration,fps,steps,seed,negative,image_path
46
  "clips": [],
47
  }
48
  save_project(proj)
49
  return proj
50
 
 
51
  # =========================
52
+ # LLM — Storyboard generator (ZeroGPU friendly)
53
  # =========================
54
  from transformers import AutoTokenizer, AutoModelForCausalLM
55
 
 
65
  return _model, _tokenizer
66
 
67
  _tokenizer = AutoTokenizer.from_pretrained(STORYBOARD_MODEL, trust_remote_code=True)
 
68
  use_cuda = torch.cuda.is_available()
69
+ dtype = torch.float16 if use_cuda else torch.float32
 
70
  _model = AutoModelForCausalLM.from_pretrained(
71
+ STORYBOARD_MODEL, device_map="auto", torch_dtype=dtype,
72
+ trust_remote_code=True, use_safetensors=True
 
 
 
73
  )
 
74
  if _tokenizer.pad_token_id is None and _tokenizer.eos_token_id is not None:
75
  _tokenizer.pad_token_id = _tokenizer.eos_token_id
 
76
  return _model, _tokenizer
77
 
78
  def _prompt_with_tags(user_prompt: str, n_shots: int, default_fps: int, default_len: int) -> str:
79
  return (
80
  "You are a cinematographer and storyboard artist. "
81
+ "Break the idea into DISTINCT, DETAILED shots with concrete visual info: objects, camera placement/angle, subject position, lighting, background.\n\n"
82
+ "Return ONLY a JSON array enclosed between <JSON> and </JSON>.\n"
83
+ f"Create {n_shots} shots for:\n'''{user_prompt}'''\n\n"
84
+ "Item schema:\n"
 
 
 
85
  "{\n"
86
  ' "id": <int starting at 1>,\n'
87
  ' "title": "Short shot title",\n'
88
+ ' "description": "Highly specific visual description (camera, framing, time of day, subject position, lighting, mood, background).",\n'
89
  f' "duration": {default_len},\n'
90
  f' "fps": {default_fps},\n'
91
  ' "steps": 30,\n'
 
97
 
98
  def _prompt_minimal(user_prompt: str, n_shots: int, default_fps: int, default_len: int) -> str:
99
  return (
100
+ "Reply ONLY with a JSON array starting with '[' and ending with ']'.\n"
101
  f"Storyboard: {n_shots} shots for:\n'''{user_prompt}'''\n"
102
  "Item schema:\n"
103
  "{\n"
 
117
  return tok.apply_chat_template(
118
  [{"role": "system", "content": system_msg},
119
  {"role": "user", "content": user_msg}],
120
+ tokenize=False, add_generation_prompt=True
 
121
  )
122
  return system_msg + "\n\n" + user_msg
123
 
 
125
  inputs = tok(prompt_text, return_tensors="pt")
126
  inputs = {k: v.to(model.device) for k, v in inputs.items()}
127
  eos_id = tok.eos_token_id or tok.pad_token_id
 
128
  gen = model.generate(
129
+ **inputs, max_new_tokens=HF_TASK_MAX_TOKENS, do_sample=False, temperature=0.0,
130
+ repetition_penalty=1.05, eos_token_id=eos_id, pad_token_id=eos_id
 
 
 
 
 
131
  )
132
  prompt_len = inputs["input_ids"].shape[1]
133
  continuation_ids = gen[0][prompt_len:]
134
  text = tok.decode(continuation_ids, skip_special_tokens=True).strip()
135
  if text.startswith("```"):
136
+ text = re.sub(r"^```(?:json)?\s*|\s*```$", "", text, flags=re.I|re.S).strip()
137
  return text
138
 
139
  def _extract_json_array(text: str) -> str:
140
+ m = re.search(r"<JSON>(.*?)</JSON>", text, flags=re.S|re.I)
141
+ if m and m.group(1).strip():
142
+ return m.group(1).strip()
 
 
143
  start = text.find("[")
144
+ if start == -1: return ""
145
+ depth = 0; in_str = False; prev = ""
 
 
 
146
  for i in range(start, len(text)):
147
  ch = text[i]
148
+ if ch == '"' and prev != '\\': in_str = not in_str
 
149
  if not in_str:
150
+ if ch == "[": depth += 1
 
151
  elif ch == "]":
152
  depth -= 1
153
+ if depth == 0: return text[start:i+1].strip()
 
154
  prev = ch
155
  return ""
156
 
 
174
  def generate_storyboard_with_llm(user_prompt: str, n_shots: int, default_fps: int, default_len: int):
175
  model, tok = _lazy_model_tok()
176
  system = "You are a film previsualization assistant. Output must be valid JSON."
 
177
  p1 = _apply_chat(tok, system + " Return ONLY JSON inside <JSON> tags.",
178
  _prompt_with_tags(user_prompt, n_shots, default_fps, default_len))
179
  out1 = _generate_text(model, tok, p1)
 
185
  out2 = _generate_text(model, tok, p2)
186
  json_text = _extract_json_array(out2)
187
  if not json_text and "[" in out2 and "]" in out2:
188
+ start, end = out2.find("["), out2.rfind("]")
189
+ if start != -1 and end > start: json_text = out2[start:end+1].strip()
190
+
191
+ if not json_text:
192
+ return [{
193
+ "id": i, "title": f"Shot {i}",
194
+ "description": f"Placeholder for: {user_prompt[:80]}",
195
+ "duration": default_len, "fps": default_fps,
196
+ "steps": 30, "seed": None, "negative": "", "image_path": None
197
+ } for i in range(1, int(n_shots)+1)]
 
 
 
 
 
 
 
 
 
198
 
199
  try:
200
  shots_raw = json.loads(json_text)
201
  except Exception:
202
+ shots_raw = json.loads(re.sub(r",\s*([\]\}])", r"\1", json_text))
 
 
203
  return _normalize_shots(shots_raw, default_fps, default_len)
204
 
 
205
  # =========================
206
+ # IMAGE GEN — FLUX-only + Temporal chaining
207
  # =========================
208
  USE_CUDA = torch.cuda.is_available()
209
  DTYPE = torch.float16 if USE_CUDA else torch.float32
210
+ FLUX_MODEL = os.getenv("FLUX_MODEL", "black-forest-labs/FLUX.1-schnell") # gated
 
 
211
  HF_TOKEN = os.getenv("HF_TOKEN") or os.getenv("HUGGINGFACE_HUB_TOKEN")
212
 
213
  _flux_t2i = None
 
230
 
231
  def _flux_healthcheck():
232
  if not HF_TOKEN:
233
+ raise RuntimeError("HF_TOKEN is not set. Accept the model terms on HF and provide a READ token.")
 
 
 
234
  _lazy_flux_pipes()
235
 
236
  def _save_keyframe(pid: str, shot_id: int, img: Image.Image) -> str:
237
  pdir = project_dir(pid)
238
  out = os.path.join(pdir, "keyframes", f"shot_{shot_id:02d}.png")
239
+ img.save(out); return out
 
 
240
 
 
241
  def _compose_temporal_prompt(shots: list, idx: int, seconds_forward: int = 5):
 
 
 
 
 
 
242
  curr = shots[idx]
243
  curr_desc = (curr.get("description") or "").strip()
244
  curr_neg = (curr.get("negative") or "").strip()
245
+ if idx == 0: return curr_desc, curr_neg
246
+ prev_desc = (shots[idx-1].get("description") or "").strip()
 
 
 
 
 
247
  composed = (
248
  f"Continue the same scene {seconds_forward} seconds later.\n"
249
+ f'PRIORITIZE this new moment & composition: "{curr_desc}".\n'
250
+ "Keep continuity ONLY for subject identity, lighting palette, time of day, environment style.\n"
251
  f'Previous frame (context only, do not copy its framing): "{prev_desc}".\n'
252
+ f"Avoid replicating the previous composition; allow camera move / subject reposition consistent with {seconds_forward} seconds of progression."
253
  ).strip()
254
+ negative = (curr_neg + "; identical composition as previous; exact same framing; rigid pose repeat; freeze frame; "
255
+ "hard scene reset; different subject identity; wildly different art style; unrelated background").strip("; ")
 
 
 
 
 
 
256
  return composed, negative
257
 
 
258
  @spaces.GPU(duration=180)
259
  def generate_keyframe_image(
260
+ pid: str, shot_idx: int, shots: list,
261
+ t2i_steps: int = 18, i2i_steps: int = 22, i2i_strength: float = 0.90,
262
+ guidance_scale: float = 3.4, width: int = 640, height: int = 640,
263
+ seconds_forward: int = 5, aggressive: bool = False
 
 
 
 
 
 
 
264
  ):
 
 
 
 
 
265
  try:
266
  t2i, i2i = _lazy_flux_pipes()
267
  except Exception as e:
268
+ raise gr.Error(f"FLUX failed to load: {e}")
 
 
 
269
 
270
+ prompt, negative = _compose_temporal_prompt(shots, shot_idx, seconds_forward=seconds_forward)
 
271
 
 
272
  seed = shots[shot_idx].get("seed", None)
273
  device = "cuda" if USE_CUDA else "cpu"
274
  gen = torch.Generator(device)
275
+ if isinstance(seed, int): gen = gen.manual_seed(int(seed))
 
276
 
277
+ width = max(256, min(1024, int(width)))
 
278
  height = max(256, min(1024, int(height)))
279
 
 
280
  prev_path = shots[shot_idx - 1].get("image_path") if shot_idx > 0 else None
281
  use_prev = bool(shot_idx > 0 and prev_path and os.path.exists(prev_path))
282
 
 
283
  if aggressive:
284
  i2i_strength = min(0.98, max(i2i_strength, 0.92))
285
  guidance_scale = max(guidance_scale, 3.6)
286
  i2i_steps = max(i2i_steps, 24)
287
 
 
288
  if not use_prev:
289
  out = t2i(
290
+ prompt=prompt, negative_prompt=(negative or None),
 
291
  num_inference_steps=int(max(10, t2i_steps)),
292
  guidance_scale=float(max(2.4, guidance_scale)),
293
+ generator=gen, width=width, height=height
 
294
  ).images[0]
295
  else:
296
+ init_image = Image.open(prev_path).convert("RGB")
297
  out = i2i(
298
+ prompt=prompt, negative_prompt=(negative or None),
299
+ image=init_image, strength=float(min(max(i2i_strength, 0.70), 0.98)),
 
 
300
  num_inference_steps=int(max(14, i2i_steps)),
301
+ guidance_scale=float(max(2.4, guidance_scale)), generator=gen
 
302
  ).images[0]
303
 
304
+ saved = _save_keyframe(pid, int(shots[shot_idx]["id"]), out)
305
+ return saved
306
 
307
+ # =========================
308
+ # MoviePy lazy install/import
309
+ # =========================
310
+ def _ensure_moviepy():
311
+ try:
312
+ from moviepy.editor import ImageClip, CompositeVideoClip, concatenate_videoclips
313
+ from moviepy.video.io.VideoFileClip import VideoFileClip
314
+ return ImageClip, CompositeVideoClip, concatenate_videoclips, VideoFileClip
315
+ except Exception:
316
+ subprocess.check_call([sys.executable, "-m", "pip", "install", "moviepy", "imageio", "imageio-ffmpeg"])
317
+ from moviepy.editor import ImageClip, CompositeVideoClip, concatenate_videoclips
318
+ from moviepy.video.io.VideoFileClip import VideoFileClip
319
+ return ImageClip, CompositeVideoClip, concatenate_videoclips, VideoFileClip
320
 
321
  # =========================
322
+ # Video stitching (pairwise dissolve + final concat)
323
  # =========================
324
  def _pair_clip_path(pid: str, i: int, j: int) -> str:
325
  return os.path.join(project_dir(pid), "clips", f"pair_{i:02d}_to_{j:02d}.mp4")
 
332
  return im.width, im.height
333
 
334
  def _build_pair_clip(img_a: str, img_b: str, out_path: str, fps: int = 24, hold: float = 0.5, crossfade: float = 0.7, resize_to=None):
335
+ ImageClip, CompositeVideoClip, concatenate_videoclips, VideoFileClip = _ensure_moviepy()
 
 
 
 
 
336
  ca = ImageClip(img_a).set_duration(hold + crossfade)
337
  cb = ImageClip(img_b).set_duration(hold + crossfade).set_start(hold)
 
338
  if resize_to:
339
  ca = ca.resize(newsize=resize_to)
340
  cb = cb.resize(newsize=resize_to)
 
341
  ca_x = ca.crossfadeout(crossfade)
342
  cb_x = cb.crossfadein(crossfade)
 
343
  total = hold + crossfade + hold
344
  comp = CompositeVideoClip([ca_x, cb_x]).set_duration(total)
345
+ comp.write_videofile(out_path, fps=fps, codec="libx264", audio=False, preset="medium",
346
+ threads=os.cpu_count() or 2, verbose=False, logger=None)
 
 
 
 
 
 
 
 
 
347
  comp.close(); ca.close(); cb.close()
348
 
349
  def _build_all_pair_clips(pid: str, shots: list, fps: int = 24, hold: float = 0.5, crossfade: float = 0.7, force_size=None):
 
359
  for i in range(len(shots)-1):
360
  a = shots[i].get("image_path")
361
  b = shots[i+1].get("image_path")
362
+ if not (a and b and os.path.exists(a) and os.path.exists(b)): continue
 
363
  outp = _pair_clip_path(pid, shots[i]["id"], shots[i+1]["id"])
364
  _build_pair_clip(a, b, outp, fps=fps, hold=hold, crossfade=crossfade, resize_to=size)
365
  paths.append(outp)
366
  return paths
367
 
368
  def _build_final_stitched_from_pairs(pair_paths: list, out_path: str, fps: int = 24):
369
+ ImageClip, CompositeVideoClip, concatenate_videoclips, VideoFileClip = _ensure_moviepy()
370
+ if not pair_paths: raise RuntimeError("No pair clips to stitch.")
371
+ clips = [VideoFileClip(p) for p in pair_paths if os.path.exists(p)]
372
+ if not clips: raise RuntimeError("No readable pair clips on disk.")
 
 
 
 
373
  final = concatenate_videoclips(clips, method="compose")
374
+ final.write_videofile(out_path, fps=fps, codec="libx264", audio=False, preset="medium",
375
+ threads=os.cpu_count() or 2, verbose=False, logger=None)
 
 
 
 
 
 
 
 
376
  final.close()
377
  for c in clips: c.close()
378
 
 
379
  # =========================
380
  # Shots <-> DataFrame utils
381
  # =========================
 
401
  })
402
  return sorted(out, key=lambda x: x["id"])
403
 
 
404
  # =========================
405
  # Gradio UI
406
  # =========================
407
  with gr.Blocks() as demo:
408
  gr.Markdown("# 🎬 Storyboard → Keyframes → Videos → Export")
409
  gr.Markdown(
410
+ "Temporal chaining: each new shot is generated N seconds later from the previous approved frame, "
 
411
  "while the current shot description drives composition & action. **Model**: FLUX-only."
412
  )
413
 
 
414
  project = gr.State(None)
415
  current_idx = gr.State(0)
416
 
 
417
  with gr.Row():
418
  with gr.Column(scale=2):
419
  proj_name = gr.Textbox(label="Project name", placeholder="e.g., Desert Chase")
 
426
  load_btn = gr.Button("Load")
427
  sb_status = gr.Markdown("")
428
 
 
429
  with gr.Tabs():
430
  with gr.Tab("Storyboard"):
431
  gr.Markdown("### 1) Storyboard")
432
+ sb_prompt = gr.Textbox(label="High-level prompt", lines=4, placeholder="Describe the story…")
433
  with gr.Row():
434
  sb_target_shots = gr.Slider(1, 12, value=3, step=1, label="Target # of shots")
435
  sb_default_fps = gr.Slider(8, 60, value=24, step=1, label="Default FPS")
436
+ sb_default_len = gr.Slider(1, 12, value=4, step=1, label="Default seconds/shot")
437
+ propose_btn = gr.Button("Propose Storyboard (LLM)")
438
  shots_df = gr.Dataframe(
439
  headers=SHOT_COLUMNS,
440
  datatype=["number","str","str","number","number","number","number","str","str"],
441
  row_count=(1,"dynamic"), col_count=len(SHOT_COLUMNS),
442
+ label="Edit shots (prompts & params)", wrap=True
443
  )
444
  save_edits_btn = gr.Button("Save Edits ✓", variant="primary", interactive=False)
445
  with gr.Row():
 
449
  with gr.Tab("Keyframes"):
450
  gr.Markdown("### 2) Keyframes")
451
  shot_info_md = gr.Markdown("")
452
+ prompt_box = gr.Textbox(label="Shot description (editable)", lines=4)
453
  with gr.Row():
454
  gen_btn = gr.Button("Generate / Regenerate", variant="primary")
455
  approve_next_btn = gr.Button("Approve & Next →", variant="secondary")
 
456
  with gr.Row():
457
  img_strength = gr.Slider(0.50, 0.98, value=0.90, step=0.02, label="Change vs Consistency (img2img strength)")
458
  img_steps = gr.Slider(12, 28, value=22, step=1, label="Inference Steps (img2img)")
459
  guidance = gr.Slider(2.4, 4.0, value=3.4, step=0.1, label="Guidance Scale")
460
  temporal_secs = gr.Slider(1, 10, value=5, step=1, label="Temporal step (seconds later)")
461
  aggressive_follow = gr.Checkbox(value=False, label="Aggressive follow prompt (more change)")
 
462
  with gr.Row():
463
  prev_img = gr.Image(label="Previous approved image (conditioning)", type="filepath")
464
  out_img = gr.Image(label="Generated image", type="filepath")
 
488
 
489
  def on_propose(p, prompt, target_shots, fps, vlen):
490
  p = ensure_project(p, suggested_name=(proj_name.value if hasattr(proj_name, "value") else "Project"))
491
+ if not str(prompt or "").strip():
492
  raise gr.Error("Please enter a high-level prompt.")
493
  shots = generate_storyboard_with_llm(str(prompt).strip(), int(target_shots), int(fps), int(vlen))
494
+ p = dict(p); p["shots"] = shots; p["meta"]["updated"] = now_iso(); save_project(p)
 
 
 
495
  return p, shots_to_df(shots), gr.update(value="Storyboard generated (editable)."), gr.update(interactive=True)
496
 
497
+ propose_btn.click(on_propose,
 
498
  inputs=[project, sb_prompt, sb_target_shots, sb_default_fps, sb_default_len],
499
  outputs=[project, shots_df, sb_status, save_edits_btn]
500
  )
501
 
502
  def on_save_edits(p, df):
503
+ if p is None: raise gr.Error("No project in memory.")
504
+ if df is None: raise gr.Error("No storyboard table to save.")
 
 
505
  shots = df_to_shots(df)
506
+ p = dict(p); p["shots"] = shots; p["meta"]["updated"] = now_iso(); save_project(p)
 
 
 
507
  return p, gr.update(value="Edits saved.")
508
 
509
  save_edits_btn.click(on_save_edits, inputs=[project, shots_df], outputs=[project, sb_status])
 
512
  if p is None: raise gr.Error("No project.")
513
  shots = df_to_shots(df)
514
  if not shots: raise gr.Error("Storyboard is empty.")
 
 
515
  proj_seed = None
516
+ if str(proj_seed_override or "").isdigit(): proj_seed = int(proj_seed_override)
517
+ if proj_seed is None: proj_seed = p.get("meta", {}).get("seed")
 
 
518
  if proj_seed is None:
519
  for s in shots:
520
+ if isinstance(s.get("seed"), int): proj_seed = int(s["seed"]); break
521
+ if proj_seed is None: proj_seed = int(torch.randint(0, 2**31 - 1, (1,)).item())
 
 
 
 
522
  for s in shots:
523
+ if not isinstance(s.get("seed"), int): s["seed"] = proj_seed
524
+ p = dict(p); p["shots"] = shots; p["meta"]["seed"] = proj_seed; p["meta"]["updated"] = now_iso(); save_project(p)
525
+ idx = 0; prev_path = None
526
+ info = (f"**Shot {shots[idx]['id']} — {shots[idx]['title']}** \n"
527
+ f"Duration: {shots[idx]['duration']}s @ {shots[idx]['fps']} fps \n"
528
+ f"Locked project seed: `{proj_seed}`")
529
+ return p, 0, gr.update(value=info), gr.update(value=shots[idx]["description"]), gr.update(value=prev_path), gr.update(value=None), gr.update(value="Ready for shot 1."), gr.update(value=proj_seed)
530
+
531
+ to_keyframes_btn.click(on_start_keyframes,
 
 
 
 
 
 
 
 
 
 
 
532
  inputs=[project, shots_df, proj_seed_box],
533
  outputs=[project, current_idx, shot_info_md, prompt_box, prev_img, out_img, kf_status, proj_seed_box]
534
  )
 
537
  if p is None: raise gr.Error("No project.")
538
  shots = p["shots"]
539
  if idx < 0 or idx >= len(shots): raise gr.Error("Invalid shot index.")
540
+ shots[idx]["description"] = current_prompt
 
541
  img_path = generate_keyframe_image(
542
+ p["meta"]["id"], int(idx), shots,
543
+ t2i_steps=18, i2i_steps=int(i2i_steps_val),
 
 
 
544
  i2i_strength=float(i2i_strength_val),
545
  guidance_scale=float(guidance_val),
546
+ width=640, height=640,
 
547
  seconds_forward=int(seconds_forward_val),
548
  aggressive=bool(aggressive_val)
549
  )
550
  prev_path = shots[idx-1]["image_path"] if idx > 0 else None
551
  return img_path, (prev_path or None), gr.update(value=f"Generated candidate for shot {shots[idx]['id']}.")
552
 
553
+ gen_btn.click(on_generate_img,
 
554
  inputs=[project, current_idx, prompt_box, img_strength, img_steps, guidance, temporal_secs, aggressive_follow],
555
  outputs=[out_img, prev_img, kf_status]
556
  )
557
 
558
  def on_approve_next(p, idx, current_prompt, latest_img_path):
559
  if p is None: raise gr.Error("No project.")
560
+ shots = p["shots"]; i = int(idx)
 
561
  if i < 0 or i >= len(shots): raise gr.Error("Invalid shot index.")
562
  if not latest_img_path: raise gr.Error("Generate an image first.")
 
 
563
  shots[i]["description"] = current_prompt
564
  shots[i]["image_path"] = latest_img_path
565
+ p["shots"] = shots; p["meta"]["updated"] = now_iso(); save_project(p)
 
 
 
 
566
  if i + 1 < len(shots):
567
  ni = i + 1
568
+ info = (f"**Shot {shots[ni]['id']} — {shots[ni]['title']}** \n"
569
+ f"Duration: {shots[ni]['duration']}s @ {shots[ni]['fps']} fps \n"
570
+ f"Locked project seed: `{p['meta'].get('seed')}`")
 
 
571
  prev_path = shots[ni-1]["image_path"]
572
  return p, ni, gr.update(value=info), gr.update(value=shots[ni]["description"]), gr.update(value=prev_path), gr.update(value=None), gr.update(value=f"Approved shot {shots[i]['id']}. On to shot {shots[ni]['id']}.")
573
  else:
574
  return p, i, gr.update(value="**All keyframes approved.** Proceed to Videos tab."), gr.update(value=""), gr.update(value=shots[i]["image_path"]), gr.update(value=None), gr.update(value="All shots approved ✅")
575
 
576
+ approve_next_btn.click(on_approve_next,
577
+ inputs=[project, current_idx, prompt_box, out_img],
578
+ outputs=[project, current_idx, shot_info_md, prompt_box, prev_img, out_img, kf_status]
579
+ )
580
 
581
+ # ---- Videos tab
582
  def on_build_pairs(p, fps, hold, xfade):
583
+ if p is None: raise gr.Error("No project.")
 
584
  shots = p.get("shots", [])
585
+ if len(shots) < 2: raise gr.Error("Need at least 2 approved images.")
586
+ if not any(s.get("image_path") for s in shots): raise gr.Error("No approved images yet.")
 
 
 
587
  pair_paths = _build_all_pair_clips(
588
  p["meta"]["id"], shots,
589
  fps=int(fps), hold=float(hold), crossfade=float(xfade),
590
+ force_size=None
591
  )
592
+ if not pair_paths: raise gr.Error("No consecutive pairs with images found.")
 
593
  return {"pair_clips": pair_paths, "final": None}
594
 
595
+ build_pairs_btn.click(on_build_pairs, inputs=[project, v_fps, v_hold, v_xfade], outputs=[vd_table])
 
 
 
 
596
 
597
  def on_build_final(p, fps):
598
+ if p is None: raise gr.Error("No project.")
 
599
  pid = p["meta"]["id"]
600
  clips_dir = os.path.join(project_dir(pid), "clips")
601
+ pair_paths = sorted([os.path.join(clips_dir, f) for f in os.listdir(clips_dir)
602
+ if f.startswith("pair_") and f.endswith(".mp4")])
603
+ if not pair_paths: raise gr.Error("No pair clips found. Build pair clips first.")
 
 
604
  outp = _final_stitched_path(pid)
605
  _build_final_stitched_from_pairs(pair_paths, outp, fps=int(fps))
606
  return {"pair_clips": pair_paths, "final": outp}
607
 
608
+ build_final_btn.click(on_build_final, inputs=[project, v_fps], outputs=[vd_table])
 
 
 
 
609
 
610
+ # save/load
611
  def on_save(p):
612
+ if p is None: raise gr.Error("No project in memory.")
613
+ path = save_project(p); return gr.update(value=f"Saved to `{path}`")
 
 
614
 
615
  save_btn.click(on_save, inputs=[project], outputs=[sb_status])
616
 
617
  def on_load(file_obj):
618
  p = load_project_file(file_obj)
619
  seed_val = p.get("meta", {}).get("seed", None)
620
+ return (p,
621
+ gr.update(value=f"Loaded `{p['meta']['name']}` (id: `{p['meta']['id']}`)"),
622
+ shots_to_df(p.get("shots", [])),
623
+ gr.update(value=seed_val))
 
 
624
 
625
  load_btn.click(on_load, inputs=[load_file], outputs=[project, sb_status, shots_df, proj_seed_box])
626
 
627
  if __name__ == "__main__":
628
+ _flux_healthcheck()
629
  demo.launch()