Shalmoni commited on
Commit
e65b7f3
Β·
verified Β·
1 Parent(s): 2fe90ee

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +62 -49
app.py CHANGED
@@ -1,4 +1,4 @@
1
- # app.py (FLUX-only, smart chaining)
2
  import os, json, uuid, re
3
  from datetime import datetime
4
  import gradio as gr
@@ -247,15 +247,13 @@ def generate_storyboard_with_llm(user_prompt: str, n_shots: int, default_fps: in
247
  return _normalize_shots(shots_raw, default_fps, default_len)
248
 
249
  # =========================
250
- # IMAGE GEN β€” FLUX only (no fallback)
251
  # =========================
252
  USE_CUDA = torch.cuda.is_available()
253
  DTYPE = torch.float16 if USE_CUDA else torch.float32
254
 
255
- # βœ… Use a real FLUX repo instead of Nano
256
  FLUX_MODEL = os.getenv("FLUX_MODEL", "black-forest-labs/FLUX.1-schnell")
257
-
258
- # βœ… Add token support
259
  HF_TOKEN = os.getenv("HF_TOKEN") or os.getenv("HUGGINGFACE_HUB_TOKEN")
260
 
261
  _flux_t2i = None
@@ -290,20 +288,38 @@ def _save_keyframe(pid: str, shot_id: int, img: Image.Image) -> str:
290
  img.save(out)
291
  return out
292
 
293
- def _significant_change(curr_desc: str, prev_desc: str) -> bool:
 
294
  """
295
- If token-level symmetric difference is large, treat as a new scene:
296
- do text2img (same seed) instead of img2img to avoid 'mush'.
297
  """
298
- if not prev_desc: return True
299
- a = set(re.findall(r"\w+", curr_desc.lower()))
300
- b = set(re.findall(r"\w+", prev_desc.lower()))
301
- comp_words = {"wide","close","low","high","overhead","aerial","profile","left","right","center",
302
- "portrait","landscape","long","establishing","macro","tilt","dutch","angle",
303
- "night","day","sunset","sunrise","noon","backlit","rim","key","fill"}
304
- delta = a.symmetric_difference(b)
305
- score = len(delta) + 2 * len((a ^ b) & comp_words)
306
- return score >= 10 # more eager to break chaining
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
307
 
308
  @spaces.GPU(duration=180)
309
  def generate_keyframe_image(
@@ -315,50 +331,45 @@ def generate_keyframe_image(
315
  i2i_strength: float = 0.85, # higher -> follow prompt more
316
  guidance_scale: float = 3.0, # FLUX sweet spot: ~2.8–3.2
317
  width: int = 640,
318
- height: int = 640
 
319
  ):
320
  """
321
  Generate image for shots[shot_idx] using FLUX only.
322
- - shot 0: text2img
323
- - shot k>0: smart chaining
324
- * if significant change: text2img (same seed for style)
325
- * else: img2img from previous approved image
326
  """
327
  try:
328
  t2i, i2i = _lazy_flux_pipes()
329
  except Exception as e:
330
  raise gr.Error(
331
  f"FLUX failed to load: {e}\n"
332
- "Set FLUX_MODEL (e.g., 'black-forest-labs/FLUX.1-Nano') and ensure HF_TOKEN if required."
333
  )
334
 
335
- shot = shots[shot_idx]
336
- prompt = (shot.get("description") or "").strip()
337
- negative = shot.get("negative") or ""
338
- seed = shot.get("seed", None)
339
 
 
 
340
  device = "cuda" if USE_CUDA else "cpu"
341
  gen = torch.Generator(device)
342
  if isinstance(seed, int):
343
  gen = gen.manual_seed(int(seed))
344
 
 
345
  width = max(256, min(1024, int(width)))
346
  height = max(256, min(1024, int(height)))
347
 
348
- # decide chaining
349
- use_prev = False
350
  prev_path = shots[shot_idx - 1].get("image_path") if shot_idx > 0 else None
351
- if shot_idx == 0 or not prev_path or not os.path.exists(prev_path):
352
- use_prev = False
353
- else:
354
- prev_desc = shots[shot_idx - 1].get("description") or ""
355
- use_prev = not _significant_change(prompt, prev_desc)
356
 
357
  # generate
358
  if not use_prev:
359
  out = t2i(
360
- prompt=prompt,
361
- negative_prompt=negative or None,
362
  num_inference_steps=int(max(8, t2i_steps)),
363
  guidance_scale=float(max(2.0, guidance_scale)),
364
  generator=gen,
@@ -367,16 +378,16 @@ def generate_keyframe_image(
367
  else:
368
  init_image = Image.open(prev_path).convert("RGB")
369
  out = i2i(
370
- prompt=prompt,
371
- negative_prompt=negative or None,
372
  image=init_image,
373
- strength=float(min(max(i2i_strength, 0.5), 0.95)),
374
- num_inference_steps=int(max(10, i2i_steps)),
375
  guidance_scale=float(max(2.0, guidance_scale)),
376
  generator=gen
377
  ).images[0]
378
 
379
- saved_path = _save_keyframe(pid, int(shot["id"]), out)
380
  return saved_path
381
 
382
  # =========================
@@ -410,10 +421,9 @@ def df_to_shots(df: pd.DataFrame) -> list:
410
  with gr.Blocks() as demo:
411
  gr.Markdown("# 🎬 Storyboard β†’ Keyframes β†’ (Videos soon) β†’ Export")
412
  gr.Markdown(
413
- "Edit storyboard prompts, then generate keyframes. "
414
- "**Smart chaining**: only reuse the previous image if the new prompt is similar; "
415
- "otherwise we regenerate from text with the same seed for style consistency. "
416
- "**Model**: FLUX-only."
417
  )
418
 
419
  # State
@@ -461,11 +471,13 @@ with gr.Blocks() as demo:
461
  with gr.Row():
462
  gen_btn = gr.Button("Generate / Regenerate", variant="primary")
463
  approve_next_btn = gr.Button("Approve & Next β†’", variant="secondary")
464
- # tuning controls (defaults tuned for FLUX)
465
  with gr.Row():
466
  img_strength = gr.Slider(0.50, 0.95, value=0.85, step=0.05, label="Change vs Consistency (img2img strength)")
467
  img_steps = gr.Slider(8, 28, value=18, step=1, label="Inference Steps (img2img)")
468
  guidance = gr.Slider(2.0, 4.0, value=3.0, step=0.1, label="Guidance Scale")
 
 
469
  with gr.Row():
470
  prev_img = gr.Image(label="Previous approved image (conditioning)", type="filepath")
471
  out_img = gr.Image(label="Generated image", type="filepath")
@@ -561,7 +573,7 @@ with gr.Blocks() as demo:
561
  outputs=[project, current_idx, shot_info_md, prompt_box, prev_img, out_img, kf_status, proj_seed_box]
562
  )
563
 
564
- def on_generate_img(p, idx, current_prompt, i2i_strength_val, i2i_steps_val, guidance_val):
565
  if p is None: raise gr.Error("No project.")
566
  shots = p["shots"]
567
  if idx < 0 or idx >= len(shots): raise gr.Error("Invalid shot index.")
@@ -576,14 +588,15 @@ with gr.Blocks() as demo:
576
  i2i_strength=float(i2i_strength_val),
577
  guidance_scale=float(guidance_val),
578
  width=640,
579
- height=640
 
580
  )
581
  prev_path = shots[idx-1]["image_path"] if idx > 0 else None
582
  return img_path, (prev_path or None), gr.update(value=f"Generated candidate for shot {shots[idx]['id']}.")
583
 
584
  gen_btn.click(
585
  on_generate_img,
586
- inputs=[project, current_idx, prompt_box, img_strength, img_steps, guidance],
587
  outputs=[out_img, prev_img, kf_status]
588
  )
589
 
 
1
+ # app.py β€” FLUX-only with temporal chaining (5s later by default)
2
  import os, json, uuid, re
3
  from datetime import datetime
4
  import gradio as gr
 
247
  return _normalize_shots(shots_raw, default_fps, default_len)
248
 
249
  # =========================
250
+ # IMAGE GEN β€” FLUX only (no fallback) + Temporal chaining
251
  # =========================
252
  USE_CUDA = torch.cuda.is_available()
253
  DTYPE = torch.float16 if USE_CUDA else torch.float32
254
 
255
+ # Correct, gated repo; accept access and set HF_TOKEN
256
  FLUX_MODEL = os.getenv("FLUX_MODEL", "black-forest-labs/FLUX.1-schnell")
 
 
257
  HF_TOKEN = os.getenv("HF_TOKEN") or os.getenv("HUGGINGFACE_HUB_TOKEN")
258
 
259
  _flux_t2i = None
 
288
  img.save(out)
289
  return out
290
 
291
+ # ---- Temporal prompt composer ----
292
+ def _compose_temporal_prompt(shots: list, idx: int, seconds_forward: int = 5) -> tuple[str, str]:
293
  """
294
+ Build a prompt that explicitly continues the scene N seconds later.
295
+ Returns (composed_prompt, composed_negative).
296
  """
297
+ curr = shots[idx]
298
+ curr_desc = (curr.get("description") or "").strip()
299
+ curr_neg = (curr.get("negative") or "").strip()
300
+
301
+ if idx == 0:
302
+ return curr_desc, curr_neg
303
+
304
+ prev = shots[idx - 1]
305
+ prev_desc = (prev.get("description") or "").strip()
306
+
307
+ composed = (
308
+ f"Continue the exact same scene {seconds_forward} seconds later.\n"
309
+ f"Maintain continuity with the previous frame (composition, subject identity, camera lens and angle, lighting, color palette, time of day, environment).\n"
310
+ f"Previous frame description: \"{prev_desc}\"\n"
311
+ f"New moment to depict now: \"{curr_desc}\"\n"
312
+ f"Do NOT reset the scene; only natural progression over {seconds_forward} seconds."
313
+ ).strip()
314
+
315
+ negative = (
316
+ curr_neg + (
317
+ "; scene reset; different subject identity; different environment; time jump; hard cut; "
318
+ "dramatic style shift; unrelated background; different camera make/lens"
319
+ )
320
+ ).strip("; ")
321
+
322
+ return composed, negative
323
 
324
  @spaces.GPU(duration=180)
325
  def generate_keyframe_image(
 
331
  i2i_strength: float = 0.85, # higher -> follow prompt more
332
  guidance_scale: float = 3.0, # FLUX sweet spot: ~2.8–3.2
333
  width: int = 640,
334
+ height: int = 640,
335
+ seconds_forward: int = 5 # temporal step
336
  ):
337
  """
338
  Generate image for shots[shot_idx] using FLUX only.
339
+ - Shot 1: text2img
340
+ - Shot k>1: ALWAYS img2img from previous approved frame + temporal prompt ("N seconds later")
 
 
341
  """
342
  try:
343
  t2i, i2i = _lazy_flux_pipes()
344
  except Exception as e:
345
  raise gr.Error(
346
  f"FLUX failed to load: {e}\n"
347
+ "Set FLUX_MODEL (e.g., 'black-forest-labs/FLUX.1-schnell') and ensure HF_TOKEN if required."
348
  )
349
 
350
+ # Build temporal prompt
351
+ composed_prompt, composed_negative = _compose_temporal_prompt(shots, shot_idx, seconds_forward=seconds_forward)
 
 
352
 
353
+ # RNG / seed
354
+ seed = shots[shot_idx].get("seed", None)
355
  device = "cuda" if USE_CUDA else "cpu"
356
  gen = torch.Generator(device)
357
  if isinstance(seed, int):
358
  gen = gen.manual_seed(int(seed))
359
 
360
+ # sizes
361
  width = max(256, min(1024, int(width)))
362
  height = max(256, min(1024, int(height)))
363
 
364
+ # chaining
 
365
  prev_path = shots[shot_idx - 1].get("image_path") if shot_idx > 0 else None
366
+ use_prev = bool(shot_idx > 0 and prev_path and os.path.exists(prev_path))
 
 
 
 
367
 
368
  # generate
369
  if not use_prev:
370
  out = t2i(
371
+ prompt=composed_prompt,
372
+ negative_prompt=composed_negative or None,
373
  num_inference_steps=int(max(8, t2i_steps)),
374
  guidance_scale=float(max(2.0, guidance_scale)),
375
  generator=gen,
 
378
  else:
379
  init_image = Image.open(prev_path).convert("RGB")
380
  out = i2i(
381
+ prompt=composed_prompt,
382
+ negative_prompt=composed_negative or None,
383
  image=init_image,
384
+ strength=float(min(max(i2i_strength, 0.70), 0.95)),
385
+ num_inference_steps=int(max(12, i2i_steps)),
386
  guidance_scale=float(max(2.0, guidance_scale)),
387
  generator=gen
388
  ).images[0]
389
 
390
+ saved_path = _save_keyframe(pid, int(shots[shot_idx]["id"]), out)
391
  return saved_path
392
 
393
  # =========================
 
421
  with gr.Blocks() as demo:
422
  gr.Markdown("# 🎬 Storyboard β†’ Keyframes β†’ (Videos soon) β†’ Export")
423
  gr.Markdown(
424
+ "Edit storyboard prompts, then generate keyframes.\n"
425
+ "**Temporal chaining**: each new shot is generated N seconds later from the previous approved frame, "
426
+ "while respecting the current shot description. **Model**: FLUX-only."
 
427
  )
428
 
429
  # State
 
471
  with gr.Row():
472
  gen_btn = gr.Button("Generate / Regenerate", variant="primary")
473
  approve_next_btn = gr.Button("Approve & Next β†’", variant="secondary")
474
+
475
  with gr.Row():
476
  img_strength = gr.Slider(0.50, 0.95, value=0.85, step=0.05, label="Change vs Consistency (img2img strength)")
477
  img_steps = gr.Slider(8, 28, value=18, step=1, label="Inference Steps (img2img)")
478
  guidance = gr.Slider(2.0, 4.0, value=3.0, step=0.1, label="Guidance Scale")
479
+ temporal_secs = gr.Slider(1, 10, value=5, step=1, label="Temporal step (seconds later)")
480
+
481
  with gr.Row():
482
  prev_img = gr.Image(label="Previous approved image (conditioning)", type="filepath")
483
  out_img = gr.Image(label="Generated image", type="filepath")
 
573
  outputs=[project, current_idx, shot_info_md, prompt_box, prev_img, out_img, kf_status, proj_seed_box]
574
  )
575
 
576
+ def on_generate_img(p, idx, current_prompt, i2i_strength_val, i2i_steps_val, guidance_val, seconds_forward_val):
577
  if p is None: raise gr.Error("No project.")
578
  shots = p["shots"]
579
  if idx < 0 or idx >= len(shots): raise gr.Error("Invalid shot index.")
 
588
  i2i_strength=float(i2i_strength_val),
589
  guidance_scale=float(guidance_val),
590
  width=640,
591
+ height=640,
592
+ seconds_forward=int(seconds_forward_val)
593
  )
594
  prev_path = shots[idx-1]["image_path"] if idx > 0 else None
595
  return img_path, (prev_path or None), gr.update(value=f"Generated candidate for shot {shots[idx]['id']}.")
596
 
597
  gen_btn.click(
598
  on_generate_img,
599
+ inputs=[project, current_idx, prompt_box, img_strength, img_steps, guidance, temporal_secs],
600
  outputs=[out_img, prev_img, kf_status]
601
  )
602