Spaces:

MoonMath-ai
/

Prompt-2-Video

Running on Zero

App Files Files Community

Shalmoni commited on Oct 15

Commit

e65b7f3

verified ·

1 Parent(s): 2fe90ee

Update app.py

Browse files

Files changed (1) hide show

app.py +62 -49

app.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# app.py (FLUX-only, smart chaining)
 import os, json, uuid, re
 from datetime import datetime
 import gradio as gr
@@ -247,15 +247,13 @@ def generate_storyboard_with_llm(user_prompt: str, n_shots: int, default_fps: in
     return _normalize_shots(shots_raw, default_fps, default_len)
 # =========================
-# IMAGE GEN — FLUX only (no fallback)
 # =========================
 USE_CUDA = torch.cuda.is_available()
 DTYPE = torch.float16 if USE_CUDA else torch.float32
-# ✅ Use a real FLUX repo instead of Nano
 FLUX_MODEL = os.getenv("FLUX_MODEL", "black-forest-labs/FLUX.1-schnell")
-# ✅ Add token support
 HF_TOKEN = os.getenv("HF_TOKEN") or os.getenv("HUGGINGFACE_HUB_TOKEN")
 _flux_t2i = None
@@ -290,20 +288,38 @@ def _save_keyframe(pid: str, shot_id: int, img: Image.Image) -> str:
     img.save(out)
     return out
-def _significant_change(curr_desc: str, prev_desc: str) -> bool:
     """
-    If token-level symmetric difference is large, treat as a new scene:
-    do text2img (same seed) instead of img2img to avoid 'mush'.
     """
-    if not prev_desc: return True
-    a = set(re.findall(r"\w+", curr_desc.lower()))
-    b = set(re.findall(r"\w+", prev_desc.lower()))
-    comp_words = {"wide","close","low","high","overhead","aerial","profile","left","right","center",
-                  "portrait","landscape","long","establishing","macro","tilt","dutch","angle",
-                  "night","day","sunset","sunrise","noon","backlit","rim","key","fill"}
-    delta = a.symmetric_difference(b)
-    score = len(delta) + 2 * len((a ^ b) & comp_words)
-    return score >= 10  # more eager to break chaining
 @spaces.GPU(duration=180)
 def generate_keyframe_image(
@@ -315,50 +331,45 @@ def generate_keyframe_image(
     i2i_strength: float = 0.85,  # higher -> follow prompt more
     guidance_scale: float = 3.0, # FLUX sweet spot: ~2.8–3.2
     width: int = 640,
-    height: int = 640
 ):
     """
     Generate image for shots[shot_idx] using FLUX only.
-    - shot 0: text2img
-    - shot k>0: smart chaining
-        * if significant change: text2img (same seed for style)
-        * else: img2img from previous approved image
     """
     try:
         t2i, i2i = _lazy_flux_pipes()
     except Exception as e:
         raise gr.Error(
             f"FLUX failed to load: {e}\n"
-            "Set FLUX_MODEL (e.g., 'black-forest-labs/FLUX.1-Nano') and ensure HF_TOKEN if required."
         )
-    shot = shots[shot_idx]
-    prompt   = (shot.get("description") or "").strip()
-    negative = shot.get("negative") or ""
-    seed     = shot.get("seed", None)
     device = "cuda" if USE_CUDA else "cpu"
     gen = torch.Generator(device)
     if isinstance(seed, int):
         gen = gen.manual_seed(int(seed))
     width  = max(256, min(1024, int(width)))
     height = max(256, min(1024, int(height)))
-    # decide chaining
-    use_prev = False
     prev_path = shots[shot_idx - 1].get("image_path") if shot_idx > 0 else None
-    if shot_idx == 0 or not prev_path or not os.path.exists(prev_path):
-        use_prev = False
-    else:
-        prev_desc = shots[shot_idx - 1].get("description") or ""
-        use_prev = not _significant_change(prompt, prev_desc)
     # generate
     if not use_prev:
         out = t2i(
-            prompt=prompt,
-            negative_prompt=negative or None,
             num_inference_steps=int(max(8, t2i_steps)),
             guidance_scale=float(max(2.0, guidance_scale)),
             generator=gen,
@@ -367,16 +378,16 @@ def generate_keyframe_image(
     else:
         init_image = Image.open(prev_path).convert("RGB")
         out = i2i(
-            prompt=prompt,
-            negative_prompt=negative or None,
             image=init_image,
-            strength=float(min(max(i2i_strength, 0.5), 0.95)),
-            num_inference_steps=int(max(10, i2i_steps)),
             guidance_scale=float(max(2.0, guidance_scale)),
             generator=gen
         ).images[0]
-    saved_path = _save_keyframe(pid, int(shot["id"]), out)
     return saved_path
 # =========================
@@ -410,10 +421,9 @@ def df_to_shots(df: pd.DataFrame) -> list:
 with gr.Blocks() as demo:
     gr.Markdown("# 🎬 Storyboard → Keyframes → (Videos soon) → Export")
     gr.Markdown(
-        "Edit storyboard prompts, then generate keyframes. "
-        "**Smart chaining**: only reuse the previous image if the new prompt is similar; "
-        "otherwise we regenerate from text with the same seed for style consistency.  "
-        "**Model**: FLUX-only."
     )
     # State
@@ -461,11 +471,13 @@ with gr.Blocks() as demo:
             with gr.Row():
                 gen_btn = gr.Button("Generate / Regenerate", variant="primary")
                 approve_next_btn = gr.Button("Approve & Next →", variant="secondary")
-            # tuning controls (defaults tuned for FLUX)
             with gr.Row():
                 img_strength = gr.Slider(0.50, 0.95, value=0.85, step=0.05, label="Change vs Consistency (img2img strength)")
                 img_steps    = gr.Slider(8, 28, value=18,  step=1,   label="Inference Steps (img2img)")
                 guidance     = gr.Slider(2.0, 4.0, value=3.0, step=0.1, label="Guidance Scale")
             with gr.Row():
                 prev_img = gr.Image(label="Previous approved image (conditioning)", type="filepath")
                 out_img  = gr.Image(label="Generated image", type="filepath")
@@ -561,7 +573,7 @@ with gr.Blocks() as demo:
         outputs=[project, current_idx, shot_info_md, prompt_box, prev_img, out_img, kf_status, proj_seed_box]
     )
-    def on_generate_img(p, idx, current_prompt, i2i_strength_val, i2i_steps_val, guidance_val):
         if p is None: raise gr.Error("No project.")
         shots = p["shots"]
         if idx < 0 or idx >= len(shots): raise gr.Error("Invalid shot index.")
@@ -576,14 +588,15 @@ with gr.Blocks() as demo:
             i2i_strength=float(i2i_strength_val),
             guidance_scale=float(guidance_val),
             width=640,
-            height=640
         )
         prev_path = shots[idx-1]["image_path"] if idx > 0 else None
         return img_path, (prev_path or None), gr.update(value=f"Generated candidate for shot {shots[idx]['id']}.")
     gen_btn.click(
         on_generate_img,
-        inputs=[project, current_idx, prompt_box, img_strength, img_steps, guidance],
         outputs=[out_img, prev_img, kf_status]
     )

+# app.py — FLUX-only with temporal chaining (5s later by default)
 import os, json, uuid, re
 from datetime import datetime
 import gradio as gr
     return _normalize_shots(shots_raw, default_fps, default_len)
 # =========================
+# IMAGE GEN — FLUX only (no fallback) + Temporal chaining
 # =========================
 USE_CUDA = torch.cuda.is_available()
 DTYPE = torch.float16 if USE_CUDA else torch.float32
+# Correct, gated repo; accept access and set HF_TOKEN
 FLUX_MODEL = os.getenv("FLUX_MODEL", "black-forest-labs/FLUX.1-schnell")
 HF_TOKEN = os.getenv("HF_TOKEN") or os.getenv("HUGGINGFACE_HUB_TOKEN")
 _flux_t2i = None
     img.save(out)
     return out
+# ---- Temporal prompt composer ----
+def _compose_temporal_prompt(shots: list, idx: int, seconds_forward: int = 5) -> tuple[str, str]:
     """
+    Build a prompt that explicitly continues the scene N seconds later.
+    Returns (composed_prompt, composed_negative).
     """
+    curr = shots[idx]
+    curr_desc = (curr.get("description") or "").strip()
+    curr_neg  = (curr.get("negative") or "").strip()
+    if idx == 0:
+        return curr_desc, curr_neg
+    prev = shots[idx - 1]
+    prev_desc = (prev.get("description") or "").strip()
+    composed = (
+        f"Continue the exact same scene {seconds_forward} seconds later.\n"
+        f"Maintain continuity with the previous frame (composition, subject identity, camera lens and angle, lighting, color palette, time of day, environment).\n"
+        f"Previous frame description: \"{prev_desc}\"\n"
+        f"New moment to depict now: \"{curr_desc}\"\n"
+        f"Do NOT reset the scene; only natural progression over {seconds_forward} seconds."
+    ).strip()
+    negative = (
+        curr_neg + (
+            "; scene reset; different subject identity; different environment; time jump; hard cut; "
+            "dramatic style shift; unrelated background; different camera make/lens"
+        )
+    ).strip("; ")
+    return composed, negative
 @spaces.GPU(duration=180)
 def generate_keyframe_image(
     i2i_strength: float = 0.85,  # higher -> follow prompt more
     guidance_scale: float = 3.0, # FLUX sweet spot: ~2.8–3.2
     width: int = 640,
+    height: int = 640,
+    seconds_forward: int = 5     # temporal step
 ):
     """
     Generate image for shots[shot_idx] using FLUX only.
+    - Shot 1: text2img
+    - Shot k>1: ALWAYS img2img from previous approved frame + temporal prompt ("N seconds later")
     """
     try:
         t2i, i2i = _lazy_flux_pipes()
     except Exception as e:
         raise gr.Error(
             f"FLUX failed to load: {e}\n"
+            "Set FLUX_MODEL (e.g., 'black-forest-labs/FLUX.1-schnell') and ensure HF_TOKEN if required."
         )
+    # Build temporal prompt
+    composed_prompt, composed_negative = _compose_temporal_prompt(shots, shot_idx, seconds_forward=seconds_forward)
+    # RNG / seed
+    seed = shots[shot_idx].get("seed", None)
     device = "cuda" if USE_CUDA else "cpu"
     gen = torch.Generator(device)
     if isinstance(seed, int):
         gen = gen.manual_seed(int(seed))
+    # sizes
     width  = max(256, min(1024, int(width)))
     height = max(256, min(1024, int(height)))
+    # chaining
     prev_path = shots[shot_idx - 1].get("image_path") if shot_idx > 0 else None
+    use_prev = bool(shot_idx > 0 and prev_path and os.path.exists(prev_path))
     # generate
     if not use_prev:
         out = t2i(
+            prompt=composed_prompt,
+            negative_prompt=composed_negative or None,
             num_inference_steps=int(max(8, t2i_steps)),
             guidance_scale=float(max(2.0, guidance_scale)),
             generator=gen,
     else:
         init_image = Image.open(prev_path).convert("RGB")
         out = i2i(
+            prompt=composed_prompt,
+            negative_prompt=composed_negative or None,
             image=init_image,
+            strength=float(min(max(i2i_strength, 0.70), 0.95)),
+            num_inference_steps=int(max(12, i2i_steps)),
             guidance_scale=float(max(2.0, guidance_scale)),
             generator=gen
         ).images[0]
+    saved_path = _save_keyframe(pid, int(shots[shot_idx]["id"]), out)
     return saved_path
 # =========================
 with gr.Blocks() as demo:
     gr.Markdown("# 🎬 Storyboard → Keyframes → (Videos soon) → Export")
     gr.Markdown(
+        "Edit storyboard prompts, then generate keyframes.\n"
+        "**Temporal chaining**: each new shot is generated N seconds later from the previous approved frame, "
+        "while respecting the current shot description. **Model**: FLUX-only."
     )
     # State
             with gr.Row():
                 gen_btn = gr.Button("Generate / Regenerate", variant="primary")
                 approve_next_btn = gr.Button("Approve & Next →", variant="secondary")
             with gr.Row():
                 img_strength = gr.Slider(0.50, 0.95, value=0.85, step=0.05, label="Change vs Consistency (img2img strength)")
                 img_steps    = gr.Slider(8, 28, value=18,  step=1,   label="Inference Steps (img2img)")
                 guidance     = gr.Slider(2.0, 4.0, value=3.0, step=0.1, label="Guidance Scale")
+                temporal_secs = gr.Slider(1, 10, value=5, step=1, label="Temporal step (seconds later)")
             with gr.Row():
                 prev_img = gr.Image(label="Previous approved image (conditioning)", type="filepath")
                 out_img  = gr.Image(label="Generated image", type="filepath")
         outputs=[project, current_idx, shot_info_md, prompt_box, prev_img, out_img, kf_status, proj_seed_box]
     )
+    def on_generate_img(p, idx, current_prompt, i2i_strength_val, i2i_steps_val, guidance_val, seconds_forward_val):
         if p is None: raise gr.Error("No project.")
         shots = p["shots"]
         if idx < 0 or idx >= len(shots): raise gr.Error("Invalid shot index.")
             i2i_strength=float(i2i_strength_val),
             guidance_scale=float(guidance_val),
             width=640,
+            height=640,
+            seconds_forward=int(seconds_forward_val)
         )
         prev_path = shots[idx-1]["image_path"] if idx > 0 else None
         return img_path, (prev_path or None), gr.update(value=f"Generated candidate for shot {shots[idx]['id']}.")
     gen_btn.click(
         on_generate_img,
+        inputs=[project, current_idx, prompt_box, img_strength, img_steps, guidance, temporal_secs],
         outputs=[out_img, prev_img, kf_status]
     )