Shalmoni commited on
Commit
9628a3b
Β·
verified Β·
1 Parent(s): e65b7f3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +47 -43
app.py CHANGED
@@ -1,4 +1,4 @@
1
- # app.py β€” FLUX-only with temporal chaining (5s later by default)
2
  import os, json, uuid, re
3
  from datetime import datetime
4
  import gradio as gr
@@ -84,10 +84,10 @@ def _lazy_model_tok():
84
 
85
  def _prompt_with_tags(user_prompt: str, n_shots: int, default_fps: int, default_len: int) -> str:
86
  return (
87
- "You are a **cinematographer and storyboard artist**. "
88
  "Given a story idea, break it into a sequence of visually DISTINCT, DETAILED shots. "
89
- "For each shot, provide **the objects in the scene, very specific camera placement, angle, subject position, lighting, and background details**. "
90
- "Imagine you're describing frames for a film storyboard, NOT vague events.\n\n"
91
  "Return ONLY a JSON array enclosed between <JSON> and </JSON> tags.\n"
92
  f"Create a storyboard of {n_shots} shots for this idea:\n\n"
93
  f"'''{user_prompt}'''\n\n"
@@ -95,19 +95,12 @@ def _prompt_with_tags(user_prompt: str, n_shots: int, default_fps: int, default_
95
  "{\n"
96
  ' \"id\": <int starting at 1>,\n'
97
  ' \"title\": \"Short shot title\",\n'
98
- ' \"description\": \"Highly specific visual description for image generation. Include camera angle, framing, time of day, subject position, lighting, mood, and background details. Be as descriptive as possible.\",\n'
99
  f" \"duration\": {default_len},\n"
100
  f" \"fps\": {default_fps},\n"
101
  " \"steps\": 30,\n"
102
  " \"seed\": null,\n"
103
- ' \"negative\": \"\"\n'
104
- "}\n\n"
105
- "Example of good description:\n"
106
- "{\n"
107
- " \"id\": 1,\n"
108
- " \"title\": \"Low angle car approach\",\n"
109
- " \"description\": \"A silver sedan drives towards the camera on a narrow mountain road at sunset. The camera is low to the ground near the center of the road, facing slightly upwards. Pine trees rise on both sides, and warm orange light hits the rocks. The car is centered, headlights on, creating dramatic shadows.\",\n"
110
- " ...\n"
111
  "}\n\n"
112
  "Output must start with <JSON> and end with </JSON>.\n"
113
  )
@@ -125,7 +118,7 @@ def _prompt_minimal(user_prompt: str, n_shots: int, default_fps: int, default_le
125
  f" \"fps\": {default_fps},\n"
126
  " \"steps\": 30,\n"
127
  " \"seed\": null,\n"
128
- ' "negative": ""\n'
129
  "}\n"
130
  )
131
 
@@ -288,10 +281,12 @@ def _save_keyframe(pid: str, shot_id: int, img: Image.Image) -> str:
288
  img.save(out)
289
  return out
290
 
291
- # ---- Temporal prompt composer ----
292
  def _compose_temporal_prompt(shots: list, idx: int, seconds_forward: int = 5) -> tuple[str, str]:
293
  """
294
- Build a prompt that explicitly continues the scene N seconds later.
 
 
295
  Returns (composed_prompt, composed_negative).
296
  """
297
  curr = shots[idx]
@@ -305,17 +300,17 @@ def _compose_temporal_prompt(shots: list, idx: int, seconds_forward: int = 5) ->
305
  prev_desc = (prev.get("description") or "").strip()
306
 
307
  composed = (
308
- f"Continue the exact same scene {seconds_forward} seconds later.\n"
309
- f"Maintain continuity with the previous frame (composition, subject identity, camera lens and angle, lighting, color palette, time of day, environment).\n"
310
- f"Previous frame description: \"{prev_desc}\"\n"
311
- f"New moment to depict now: \"{curr_desc}\"\n"
312
- f"Do NOT reset the scene; only natural progression over {seconds_forward} seconds."
313
  ).strip()
314
 
315
  negative = (
316
  curr_neg + (
317
- "; scene reset; different subject identity; different environment; time jump; hard cut; "
318
- "dramatic style shift; unrelated background; different camera make/lens"
319
  )
320
  ).strip("; ")
321
 
@@ -326,18 +321,19 @@ def generate_keyframe_image(
326
  pid: str,
327
  shot_idx: int,
328
  shots: list,
329
- t2i_steps: int = 16, # FLUX: 12–20
330
- i2i_steps: int = 18, # FLUX: 14–22
331
- i2i_strength: float = 0.85, # higher -> follow prompt more
332
- guidance_scale: float = 3.0, # FLUX sweet spot: ~2.8–3.2
333
  width: int = 640,
334
  height: int = 640,
335
- seconds_forward: int = 5 # temporal step
 
336
  ):
337
  """
338
  Generate image for shots[shot_idx] using FLUX only.
339
  - Shot 1: text2img
340
- - Shot k>1: ALWAYS img2img from previous approved frame + temporal prompt ("N seconds later")
341
  """
342
  try:
343
  t2i, i2i = _lazy_flux_pipes()
@@ -365,25 +361,31 @@ def generate_keyframe_image(
365
  prev_path = shots[shot_idx - 1].get("image_path") if shot_idx > 0 else None
366
  use_prev = bool(shot_idx > 0 and prev_path and os.path.exists(prev_path))
367
 
 
 
 
 
 
 
368
  # generate
369
  if not use_prev:
370
  out = t2i(
371
  prompt=composed_prompt,
372
  negative_prompt=composed_negative or None,
373
- num_inference_steps=int(max(8, t2i_steps)),
374
- guidance_scale=float(max(2.0, guidance_scale)),
375
  generator=gen,
376
  width=width, height=height
377
  ).images[0]
378
  else:
379
- init_image = Image.open(prev_path).convert("RGB")
380
  out = i2i(
381
  prompt=composed_prompt,
382
  negative_prompt=composed_negative or None,
383
  image=init_image,
384
- strength=float(min(max(i2i_strength, 0.70), 0.95)),
385
- num_inference_steps=int(max(12, i2i_steps)),
386
- guidance_scale=float(max(2.0, guidance_scale)),
387
  generator=gen
388
  ).images[0]
389
 
@@ -423,7 +425,7 @@ with gr.Blocks() as demo:
423
  gr.Markdown(
424
  "Edit storyboard prompts, then generate keyframes.\n"
425
  "**Temporal chaining**: each new shot is generated N seconds later from the previous approved frame, "
426
- "while respecting the current shot description. **Model**: FLUX-only."
427
  )
428
 
429
  # State
@@ -473,10 +475,11 @@ with gr.Blocks() as demo:
473
  approve_next_btn = gr.Button("Approve & Next β†’", variant="secondary")
474
 
475
  with gr.Row():
476
- img_strength = gr.Slider(0.50, 0.95, value=0.85, step=0.05, label="Change vs Consistency (img2img strength)")
477
- img_steps = gr.Slider(8, 28, value=18, step=1, label="Inference Steps (img2img)")
478
- guidance = gr.Slider(2.0, 4.0, value=3.0, step=0.1, label="Guidance Scale")
479
  temporal_secs = gr.Slider(1, 10, value=5, step=1, label="Temporal step (seconds later)")
 
480
 
481
  with gr.Row():
482
  prev_img = gr.Image(label="Previous approved image (conditioning)", type="filepath")
@@ -573,7 +576,7 @@ with gr.Blocks() as demo:
573
  outputs=[project, current_idx, shot_info_md, prompt_box, prev_img, out_img, kf_status, proj_seed_box]
574
  )
575
 
576
- def on_generate_img(p, idx, current_prompt, i2i_strength_val, i2i_steps_val, guidance_val, seconds_forward_val):
577
  if p is None: raise gr.Error("No project.")
578
  shots = p["shots"]
579
  if idx < 0 or idx >= len(shots): raise gr.Error("Invalid shot index.")
@@ -583,20 +586,21 @@ with gr.Blocks() as demo:
583
  p["meta"]["id"],
584
  int(idx),
585
  shots,
586
- t2i_steps=16,
587
  i2i_steps=int(i2i_steps_val),
588
  i2i_strength=float(i2i_strength_val),
589
  guidance_scale=float(guidance_val),
590
  width=640,
591
  height=640,
592
- seconds_forward=int(seconds_forward_val)
 
593
  )
594
  prev_path = shots[idx-1]["image_path"] if idx > 0 else None
595
  return img_path, (prev_path or None), gr.update(value=f"Generated candidate for shot {shots[idx]['id']}.")
596
 
597
  gen_btn.click(
598
  on_generate_img,
599
- inputs=[project, current_idx, prompt_box, img_strength, img_steps, guidance, temporal_secs],
600
  outputs=[out_img, prev_img, kf_status]
601
  )
602
 
 
1
+ # app.py β€” FLUX-only with temporal chaining (5s later by default) + Aggressive follow option
2
  import os, json, uuid, re
3
  from datetime import datetime
4
  import gradio as gr
 
84
 
85
  def _prompt_with_tags(user_prompt: str, n_shots: int, default_fps: int, default_len: int) -> str:
86
  return (
87
+ "You are a cinematographer and storyboard artist. "
88
  "Given a story idea, break it into a sequence of visually DISTINCT, DETAILED shots. "
89
+ "For each shot, provide the objects in the scene, very specific camera placement, angle, subject position, lighting, and background details. "
90
+ "Imagine you're describing frames for a film storyboard, not vague events.\n\n"
91
  "Return ONLY a JSON array enclosed between <JSON> and </JSON> tags.\n"
92
  f"Create a storyboard of {n_shots} shots for this idea:\n\n"
93
  f"'''{user_prompt}'''\n\n"
 
95
  "{\n"
96
  ' \"id\": <int starting at 1>,\n'
97
  ' \"title\": \"Short shot title\",\n'
98
+ ' \"description\": \"Highly specific visual description for image generation. Include camera angle, framing, time of day, subject position, lighting, mood, and background details.\",\n'
99
  f" \"duration\": {default_len},\n"
100
  f" \"fps\": {default_fps},\n"
101
  " \"steps\": 30,\n"
102
  " \"seed\": null,\n"
103
+ ' \"negative\": \"\"\n"
 
 
 
 
 
 
 
104
  "}\n\n"
105
  "Output must start with <JSON> and end with </JSON>.\n"
106
  )
 
118
  f" \"fps\": {default_fps},\n"
119
  " \"steps\": 30,\n"
120
  " \"seed\": null,\n"
121
+ ' \"negative\": \"\"\n"
122
  "}\n"
123
  )
124
 
 
281
  img.save(out)
282
  return out
283
 
284
+ # ---- Temporal prompt composer (PRIORITIZE the new shot) ----
285
  def _compose_temporal_prompt(shots: list, idx: int, seconds_forward: int = 5) -> tuple[str, str]:
286
  """
287
+ Build a prompt that continues the scene N seconds later,
288
+ prioritizing the NEW shot description (composition/action),
289
+ while keeping only identity/lighting/environment continuity.
290
  Returns (composed_prompt, composed_negative).
291
  """
292
  curr = shots[idx]
 
300
  prev_desc = (prev.get("description") or "").strip()
301
 
302
  composed = (
303
+ f"Continue the same scene {seconds_forward} seconds later.\n"
304
+ f"PRIORITIZE this new moment and its composition now: \"{curr_desc}\".\n"
305
+ f"Keep continuity ONLY for subject identity, lighting palette, time of day, and general environment style.\n"
306
+ f"Previous frame (context only, do not copy its framing): \"{prev_desc}\".\n"
307
+ f"Avoid replicating the previous composition; allow camera move / subject reposition consistent with {seconds_forward} seconds of natural progression."
308
  ).strip()
309
 
310
  negative = (
311
  curr_neg + (
312
+ "; identical composition as previous; exact same framing; rigid pose repeat; freeze frame; "
313
+ "hard scene reset; different subject identity; wildly different art style; unrelated background"
314
  )
315
  ).strip("; ")
316
 
 
321
  pid: str,
322
  shot_idx: int,
323
  shots: list,
324
+ t2i_steps: int = 18, # FLUX: 12–22
325
+ i2i_steps: int = 22, # FLUX: 16–26
326
+ i2i_strength: float = 0.90, # ↑ more change toward new prompt
327
+ guidance_scale: float = 3.4, # ↑ stronger text pull
328
  width: int = 640,
329
  height: int = 640,
330
+ seconds_forward: int = 5, # temporal step
331
+ aggressive: bool = False # optional push
332
  ):
333
  """
334
  Generate image for shots[shot_idx] using FLUX only.
335
  - Shot 1: text2img
336
+ - Shot k>1: img2img from previous approved frame + temporal prompt ("N seconds later")
337
  """
338
  try:
339
  t2i, i2i = _lazy_flux_pipes()
 
361
  prev_path = shots[shot_idx - 1].get("image_path") if shot_idx > 0 else None
362
  use_prev = bool(shot_idx > 0 and prev_path and os.path.exists(prev_path))
363
 
364
+ # Aggressive mode bumps
365
+ if aggressive:
366
+ i2i_strength = min(0.98, max(i2i_strength, 0.92))
367
+ guidance_scale = max(guidance_scale, 3.6)
368
+ i2i_steps = max(i2i_steps, 24)
369
+
370
  # generate
371
  if not use_prev:
372
  out = t2i(
373
  prompt=composed_prompt,
374
  negative_prompt=composed_negative or None,
375
+ num_inference_steps=int(max(10, t2i_steps)),
376
+ guidance_scale=float(max(2.4, guidance_scale)),
377
  generator=gen,
378
  width=width, height=height
379
  ).images[0]
380
  else:
381
+ init_image = Image.open(prev_path).convert("RGB") # previous approved frame (the "init_image")
382
  out = i2i(
383
  prompt=composed_prompt,
384
  negative_prompt=composed_negative or None,
385
  image=init_image,
386
+ strength=float(min(max(i2i_strength, 0.70), 0.98)),
387
+ num_inference_steps=int(max(14, i2i_steps)),
388
+ guidance_scale=float(max(2.4, guidance_scale)),
389
  generator=gen
390
  ).images[0]
391
 
 
425
  gr.Markdown(
426
  "Edit storyboard prompts, then generate keyframes.\n"
427
  "**Temporal chaining**: each new shot is generated N seconds later from the previous approved frame, "
428
+ "while the current shot description drives composition & action. **Model**: FLUX-only."
429
  )
430
 
431
  # State
 
475
  approve_next_btn = gr.Button("Approve & Next β†’", variant="secondary")
476
 
477
  with gr.Row():
478
+ img_strength = gr.Slider(0.50, 0.98, value=0.90, step=0.02, label="Change vs Consistency (img2img strength)")
479
+ img_steps = gr.Slider(12, 28, value=22, step=1, label="Inference Steps (img2img)")
480
+ guidance = gr.Slider(2.4, 4.0, value=3.4, step=0.1, label="Guidance Scale")
481
  temporal_secs = gr.Slider(1, 10, value=5, step=1, label="Temporal step (seconds later)")
482
+ aggressive_follow = gr.Checkbox(value=False, label="Aggressive follow prompt (more change)")
483
 
484
  with gr.Row():
485
  prev_img = gr.Image(label="Previous approved image (conditioning)", type="filepath")
 
576
  outputs=[project, current_idx, shot_info_md, prompt_box, prev_img, out_img, kf_status, proj_seed_box]
577
  )
578
 
579
+ def on_generate_img(p, idx, current_prompt, i2i_strength_val, i2i_steps_val, guidance_val, seconds_forward_val, aggressive_val):
580
  if p is None: raise gr.Error("No project.")
581
  shots = p["shots"]
582
  if idx < 0 or idx >= len(shots): raise gr.Error("Invalid shot index.")
 
586
  p["meta"]["id"],
587
  int(idx),
588
  shots,
589
+ t2i_steps=18,
590
  i2i_steps=int(i2i_steps_val),
591
  i2i_strength=float(i2i_strength_val),
592
  guidance_scale=float(guidance_val),
593
  width=640,
594
  height=640,
595
+ seconds_forward=int(seconds_forward_val),
596
+ aggressive=bool(aggressive_val)
597
  )
598
  prev_path = shots[idx-1]["image_path"] if idx > 0 else None
599
  return img_path, (prev_path or None), gr.update(value=f"Generated candidate for shot {shots[idx]['id']}.")
600
 
601
  gen_btn.click(
602
  on_generate_img,
603
+ inputs=[project, current_idx, prompt_box, img_strength, img_steps, guidance, temporal_secs, aggressive_follow],
604
  outputs=[out_img, prev_img, kf_status]
605
  )
606