Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
| 1 |
-
# app.py β FLUX-only with temporal chaining (5s later by default)
|
| 2 |
import os, json, uuid, re
|
| 3 |
from datetime import datetime
|
| 4 |
import gradio as gr
|
|
@@ -84,10 +84,10 @@ def _lazy_model_tok():
|
|
| 84 |
|
| 85 |
def _prompt_with_tags(user_prompt: str, n_shots: int, default_fps: int, default_len: int) -> str:
|
| 86 |
return (
|
| 87 |
-
"You are a
|
| 88 |
"Given a story idea, break it into a sequence of visually DISTINCT, DETAILED shots. "
|
| 89 |
-
"For each shot, provide
|
| 90 |
-
"Imagine you're describing frames for a film storyboard,
|
| 91 |
"Return ONLY a JSON array enclosed between <JSON> and </JSON> tags.\n"
|
| 92 |
f"Create a storyboard of {n_shots} shots for this idea:\n\n"
|
| 93 |
f"'''{user_prompt}'''\n\n"
|
|
@@ -95,19 +95,12 @@ def _prompt_with_tags(user_prompt: str, n_shots: int, default_fps: int, default_
|
|
| 95 |
"{\n"
|
| 96 |
' \"id\": <int starting at 1>,\n'
|
| 97 |
' \"title\": \"Short shot title\",\n'
|
| 98 |
-
' \"description\": \"Highly specific visual description for image generation. Include camera angle, framing, time of day, subject position, lighting, mood, and background details
|
| 99 |
f" \"duration\": {default_len},\n"
|
| 100 |
f" \"fps\": {default_fps},\n"
|
| 101 |
" \"steps\": 30,\n"
|
| 102 |
" \"seed\": null,\n"
|
| 103 |
-
' \"negative\": \"\"\n
|
| 104 |
-
"}\n\n"
|
| 105 |
-
"Example of good description:\n"
|
| 106 |
-
"{\n"
|
| 107 |
-
" \"id\": 1,\n"
|
| 108 |
-
" \"title\": \"Low angle car approach\",\n"
|
| 109 |
-
" \"description\": \"A silver sedan drives towards the camera on a narrow mountain road at sunset. The camera is low to the ground near the center of the road, facing slightly upwards. Pine trees rise on both sides, and warm orange light hits the rocks. The car is centered, headlights on, creating dramatic shadows.\",\n"
|
| 110 |
-
" ...\n"
|
| 111 |
"}\n\n"
|
| 112 |
"Output must start with <JSON> and end with </JSON>.\n"
|
| 113 |
)
|
|
@@ -125,7 +118,7 @@ def _prompt_minimal(user_prompt: str, n_shots: int, default_fps: int, default_le
|
|
| 125 |
f" \"fps\": {default_fps},\n"
|
| 126 |
" \"steps\": 30,\n"
|
| 127 |
" \"seed\": null,\n"
|
| 128 |
-
' "negative": ""\n
|
| 129 |
"}\n"
|
| 130 |
)
|
| 131 |
|
|
@@ -288,10 +281,12 @@ def _save_keyframe(pid: str, shot_id: int, img: Image.Image) -> str:
|
|
| 288 |
img.save(out)
|
| 289 |
return out
|
| 290 |
|
| 291 |
-
# ---- Temporal prompt composer ----
|
| 292 |
def _compose_temporal_prompt(shots: list, idx: int, seconds_forward: int = 5) -> tuple[str, str]:
|
| 293 |
"""
|
| 294 |
-
Build a prompt that
|
|
|
|
|
|
|
| 295 |
Returns (composed_prompt, composed_negative).
|
| 296 |
"""
|
| 297 |
curr = shots[idx]
|
|
@@ -305,17 +300,17 @@ def _compose_temporal_prompt(shots: list, idx: int, seconds_forward: int = 5) ->
|
|
| 305 |
prev_desc = (prev.get("description") or "").strip()
|
| 306 |
|
| 307 |
composed = (
|
| 308 |
-
f"Continue the
|
| 309 |
-
f"
|
| 310 |
-
f"
|
| 311 |
-
f"
|
| 312 |
-
f"
|
| 313 |
).strip()
|
| 314 |
|
| 315 |
negative = (
|
| 316 |
curr_neg + (
|
| 317 |
-
";
|
| 318 |
-
"
|
| 319 |
)
|
| 320 |
).strip("; ")
|
| 321 |
|
|
@@ -326,18 +321,19 @@ def generate_keyframe_image(
|
|
| 326 |
pid: str,
|
| 327 |
shot_idx: int,
|
| 328 |
shots: list,
|
| 329 |
-
t2i_steps: int =
|
| 330 |
-
i2i_steps: int =
|
| 331 |
-
i2i_strength: float = 0.
|
| 332 |
-
guidance_scale: float = 3.
|
| 333 |
width: int = 640,
|
| 334 |
height: int = 640,
|
| 335 |
-
seconds_forward: int = 5
|
|
|
|
| 336 |
):
|
| 337 |
"""
|
| 338 |
Generate image for shots[shot_idx] using FLUX only.
|
| 339 |
- Shot 1: text2img
|
| 340 |
-
- Shot k>1:
|
| 341 |
"""
|
| 342 |
try:
|
| 343 |
t2i, i2i = _lazy_flux_pipes()
|
|
@@ -365,25 +361,31 @@ def generate_keyframe_image(
|
|
| 365 |
prev_path = shots[shot_idx - 1].get("image_path") if shot_idx > 0 else None
|
| 366 |
use_prev = bool(shot_idx > 0 and prev_path and os.path.exists(prev_path))
|
| 367 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 368 |
# generate
|
| 369 |
if not use_prev:
|
| 370 |
out = t2i(
|
| 371 |
prompt=composed_prompt,
|
| 372 |
negative_prompt=composed_negative or None,
|
| 373 |
-
num_inference_steps=int(max(
|
| 374 |
-
guidance_scale=float(max(2.
|
| 375 |
generator=gen,
|
| 376 |
width=width, height=height
|
| 377 |
).images[0]
|
| 378 |
else:
|
| 379 |
-
init_image = Image.open(prev_path).convert("RGB")
|
| 380 |
out = i2i(
|
| 381 |
prompt=composed_prompt,
|
| 382 |
negative_prompt=composed_negative or None,
|
| 383 |
image=init_image,
|
| 384 |
-
strength=float(min(max(i2i_strength, 0.70), 0.
|
| 385 |
-
num_inference_steps=int(max(
|
| 386 |
-
guidance_scale=float(max(2.
|
| 387 |
generator=gen
|
| 388 |
).images[0]
|
| 389 |
|
|
@@ -423,7 +425,7 @@ with gr.Blocks() as demo:
|
|
| 423 |
gr.Markdown(
|
| 424 |
"Edit storyboard prompts, then generate keyframes.\n"
|
| 425 |
"**Temporal chaining**: each new shot is generated N seconds later from the previous approved frame, "
|
| 426 |
-
"while
|
| 427 |
)
|
| 428 |
|
| 429 |
# State
|
|
@@ -473,10 +475,11 @@ with gr.Blocks() as demo:
|
|
| 473 |
approve_next_btn = gr.Button("Approve & Next β", variant="secondary")
|
| 474 |
|
| 475 |
with gr.Row():
|
| 476 |
-
img_strength = gr.Slider(0.50, 0.
|
| 477 |
-
img_steps = gr.Slider(
|
| 478 |
-
guidance = gr.Slider(2.
|
| 479 |
temporal_secs = gr.Slider(1, 10, value=5, step=1, label="Temporal step (seconds later)")
|
|
|
|
| 480 |
|
| 481 |
with gr.Row():
|
| 482 |
prev_img = gr.Image(label="Previous approved image (conditioning)", type="filepath")
|
|
@@ -573,7 +576,7 @@ with gr.Blocks() as demo:
|
|
| 573 |
outputs=[project, current_idx, shot_info_md, prompt_box, prev_img, out_img, kf_status, proj_seed_box]
|
| 574 |
)
|
| 575 |
|
| 576 |
-
def on_generate_img(p, idx, current_prompt, i2i_strength_val, i2i_steps_val, guidance_val, seconds_forward_val):
|
| 577 |
if p is None: raise gr.Error("No project.")
|
| 578 |
shots = p["shots"]
|
| 579 |
if idx < 0 or idx >= len(shots): raise gr.Error("Invalid shot index.")
|
|
@@ -583,20 +586,21 @@ with gr.Blocks() as demo:
|
|
| 583 |
p["meta"]["id"],
|
| 584 |
int(idx),
|
| 585 |
shots,
|
| 586 |
-
t2i_steps=
|
| 587 |
i2i_steps=int(i2i_steps_val),
|
| 588 |
i2i_strength=float(i2i_strength_val),
|
| 589 |
guidance_scale=float(guidance_val),
|
| 590 |
width=640,
|
| 591 |
height=640,
|
| 592 |
-
seconds_forward=int(seconds_forward_val)
|
|
|
|
| 593 |
)
|
| 594 |
prev_path = shots[idx-1]["image_path"] if idx > 0 else None
|
| 595 |
return img_path, (prev_path or None), gr.update(value=f"Generated candidate for shot {shots[idx]['id']}.")
|
| 596 |
|
| 597 |
gen_btn.click(
|
| 598 |
on_generate_img,
|
| 599 |
-
inputs=[project, current_idx, prompt_box, img_strength, img_steps, guidance, temporal_secs],
|
| 600 |
outputs=[out_img, prev_img, kf_status]
|
| 601 |
)
|
| 602 |
|
|
|
|
| 1 |
+
# app.py β FLUX-only with temporal chaining (5s later by default) + Aggressive follow option
|
| 2 |
import os, json, uuid, re
|
| 3 |
from datetime import datetime
|
| 4 |
import gradio as gr
|
|
|
|
| 84 |
|
| 85 |
def _prompt_with_tags(user_prompt: str, n_shots: int, default_fps: int, default_len: int) -> str:
|
| 86 |
return (
|
| 87 |
+
"You are a cinematographer and storyboard artist. "
|
| 88 |
"Given a story idea, break it into a sequence of visually DISTINCT, DETAILED shots. "
|
| 89 |
+
"For each shot, provide the objects in the scene, very specific camera placement, angle, subject position, lighting, and background details. "
|
| 90 |
+
"Imagine you're describing frames for a film storyboard, not vague events.\n\n"
|
| 91 |
"Return ONLY a JSON array enclosed between <JSON> and </JSON> tags.\n"
|
| 92 |
f"Create a storyboard of {n_shots} shots for this idea:\n\n"
|
| 93 |
f"'''{user_prompt}'''\n\n"
|
|
|
|
| 95 |
"{\n"
|
| 96 |
' \"id\": <int starting at 1>,\n'
|
| 97 |
' \"title\": \"Short shot title\",\n'
|
| 98 |
+
' \"description\": \"Highly specific visual description for image generation. Include camera angle, framing, time of day, subject position, lighting, mood, and background details.\",\n'
|
| 99 |
f" \"duration\": {default_len},\n"
|
| 100 |
f" \"fps\": {default_fps},\n"
|
| 101 |
" \"steps\": 30,\n"
|
| 102 |
" \"seed\": null,\n"
|
| 103 |
+
' \"negative\": \"\"\n"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 104 |
"}\n\n"
|
| 105 |
"Output must start with <JSON> and end with </JSON>.\n"
|
| 106 |
)
|
|
|
|
| 118 |
f" \"fps\": {default_fps},\n"
|
| 119 |
" \"steps\": 30,\n"
|
| 120 |
" \"seed\": null,\n"
|
| 121 |
+
' \"negative\": \"\"\n"
|
| 122 |
"}\n"
|
| 123 |
)
|
| 124 |
|
|
|
|
| 281 |
img.save(out)
|
| 282 |
return out
|
| 283 |
|
| 284 |
+
# ---- Temporal prompt composer (PRIORITIZE the new shot) ----
|
| 285 |
def _compose_temporal_prompt(shots: list, idx: int, seconds_forward: int = 5) -> tuple[str, str]:
|
| 286 |
"""
|
| 287 |
+
Build a prompt that continues the scene N seconds later,
|
| 288 |
+
prioritizing the NEW shot description (composition/action),
|
| 289 |
+
while keeping only identity/lighting/environment continuity.
|
| 290 |
Returns (composed_prompt, composed_negative).
|
| 291 |
"""
|
| 292 |
curr = shots[idx]
|
|
|
|
| 300 |
prev_desc = (prev.get("description") or "").strip()
|
| 301 |
|
| 302 |
composed = (
|
| 303 |
+
f"Continue the same scene {seconds_forward} seconds later.\n"
|
| 304 |
+
f"PRIORITIZE this new moment and its composition now: \"{curr_desc}\".\n"
|
| 305 |
+
f"Keep continuity ONLY for subject identity, lighting palette, time of day, and general environment style.\n"
|
| 306 |
+
f"Previous frame (context only, do not copy its framing): \"{prev_desc}\".\n"
|
| 307 |
+
f"Avoid replicating the previous composition; allow camera move / subject reposition consistent with {seconds_forward} seconds of natural progression."
|
| 308 |
).strip()
|
| 309 |
|
| 310 |
negative = (
|
| 311 |
curr_neg + (
|
| 312 |
+
"; identical composition as previous; exact same framing; rigid pose repeat; freeze frame; "
|
| 313 |
+
"hard scene reset; different subject identity; wildly different art style; unrelated background"
|
| 314 |
)
|
| 315 |
).strip("; ")
|
| 316 |
|
|
|
|
| 321 |
pid: str,
|
| 322 |
shot_idx: int,
|
| 323 |
shots: list,
|
| 324 |
+
t2i_steps: int = 18, # FLUX: 12β22
|
| 325 |
+
i2i_steps: int = 22, # FLUX: 16β26
|
| 326 |
+
i2i_strength: float = 0.90, # β more change toward new prompt
|
| 327 |
+
guidance_scale: float = 3.4, # β stronger text pull
|
| 328 |
width: int = 640,
|
| 329 |
height: int = 640,
|
| 330 |
+
seconds_forward: int = 5, # temporal step
|
| 331 |
+
aggressive: bool = False # optional push
|
| 332 |
):
|
| 333 |
"""
|
| 334 |
Generate image for shots[shot_idx] using FLUX only.
|
| 335 |
- Shot 1: text2img
|
| 336 |
+
- Shot k>1: img2img from previous approved frame + temporal prompt ("N seconds later")
|
| 337 |
"""
|
| 338 |
try:
|
| 339 |
t2i, i2i = _lazy_flux_pipes()
|
|
|
|
| 361 |
prev_path = shots[shot_idx - 1].get("image_path") if shot_idx > 0 else None
|
| 362 |
use_prev = bool(shot_idx > 0 and prev_path and os.path.exists(prev_path))
|
| 363 |
|
| 364 |
+
# Aggressive mode bumps
|
| 365 |
+
if aggressive:
|
| 366 |
+
i2i_strength = min(0.98, max(i2i_strength, 0.92))
|
| 367 |
+
guidance_scale = max(guidance_scale, 3.6)
|
| 368 |
+
i2i_steps = max(i2i_steps, 24)
|
| 369 |
+
|
| 370 |
# generate
|
| 371 |
if not use_prev:
|
| 372 |
out = t2i(
|
| 373 |
prompt=composed_prompt,
|
| 374 |
negative_prompt=composed_negative or None,
|
| 375 |
+
num_inference_steps=int(max(10, t2i_steps)),
|
| 376 |
+
guidance_scale=float(max(2.4, guidance_scale)),
|
| 377 |
generator=gen,
|
| 378 |
width=width, height=height
|
| 379 |
).images[0]
|
| 380 |
else:
|
| 381 |
+
init_image = Image.open(prev_path).convert("RGB") # previous approved frame (the "init_image")
|
| 382 |
out = i2i(
|
| 383 |
prompt=composed_prompt,
|
| 384 |
negative_prompt=composed_negative or None,
|
| 385 |
image=init_image,
|
| 386 |
+
strength=float(min(max(i2i_strength, 0.70), 0.98)),
|
| 387 |
+
num_inference_steps=int(max(14, i2i_steps)),
|
| 388 |
+
guidance_scale=float(max(2.4, guidance_scale)),
|
| 389 |
generator=gen
|
| 390 |
).images[0]
|
| 391 |
|
|
|
|
| 425 |
gr.Markdown(
|
| 426 |
"Edit storyboard prompts, then generate keyframes.\n"
|
| 427 |
"**Temporal chaining**: each new shot is generated N seconds later from the previous approved frame, "
|
| 428 |
+
"while the current shot description drives composition & action. **Model**: FLUX-only."
|
| 429 |
)
|
| 430 |
|
| 431 |
# State
|
|
|
|
| 475 |
approve_next_btn = gr.Button("Approve & Next β", variant="secondary")
|
| 476 |
|
| 477 |
with gr.Row():
|
| 478 |
+
img_strength = gr.Slider(0.50, 0.98, value=0.90, step=0.02, label="Change vs Consistency (img2img strength)")
|
| 479 |
+
img_steps = gr.Slider(12, 28, value=22, step=1, label="Inference Steps (img2img)")
|
| 480 |
+
guidance = gr.Slider(2.4, 4.0, value=3.4, step=0.1, label="Guidance Scale")
|
| 481 |
temporal_secs = gr.Slider(1, 10, value=5, step=1, label="Temporal step (seconds later)")
|
| 482 |
+
aggressive_follow = gr.Checkbox(value=False, label="Aggressive follow prompt (more change)")
|
| 483 |
|
| 484 |
with gr.Row():
|
| 485 |
prev_img = gr.Image(label="Previous approved image (conditioning)", type="filepath")
|
|
|
|
| 576 |
outputs=[project, current_idx, shot_info_md, prompt_box, prev_img, out_img, kf_status, proj_seed_box]
|
| 577 |
)
|
| 578 |
|
| 579 |
+
def on_generate_img(p, idx, current_prompt, i2i_strength_val, i2i_steps_val, guidance_val, seconds_forward_val, aggressive_val):
|
| 580 |
if p is None: raise gr.Error("No project.")
|
| 581 |
shots = p["shots"]
|
| 582 |
if idx < 0 or idx >= len(shots): raise gr.Error("Invalid shot index.")
|
|
|
|
| 586 |
p["meta"]["id"],
|
| 587 |
int(idx),
|
| 588 |
shots,
|
| 589 |
+
t2i_steps=18,
|
| 590 |
i2i_steps=int(i2i_steps_val),
|
| 591 |
i2i_strength=float(i2i_strength_val),
|
| 592 |
guidance_scale=float(guidance_val),
|
| 593 |
width=640,
|
| 594 |
height=640,
|
| 595 |
+
seconds_forward=int(seconds_forward_val),
|
| 596 |
+
aggressive=bool(aggressive_val)
|
| 597 |
)
|
| 598 |
prev_path = shots[idx-1]["image_path"] if idx > 0 else None
|
| 599 |
return img_path, (prev_path or None), gr.update(value=f"Generated candidate for shot {shots[idx]['id']}.")
|
| 600 |
|
| 601 |
gen_btn.click(
|
| 602 |
on_generate_img,
|
| 603 |
+
inputs=[project, current_idx, prompt_box, img_strength, img_steps, guidance, temporal_secs, aggressive_follow],
|
| 604 |
outputs=[out_img, prev_img, kf_status]
|
| 605 |
)
|
| 606 |
|