Spaces:

MoonMath-ai
/

Prompt-2-Video

Running on Zero

App Files Files Community

Shalmoni commited on Oct 15

Commit

9628a3b

verified ·

1 Parent(s): e65b7f3

Update app.py

Browse files

Files changed (1) hide show

app.py +47 -43

app.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# app.py — FLUX-only with temporal chaining (5s later by default)
 import os, json, uuid, re
 from datetime import datetime
 import gradio as gr
@@ -84,10 +84,10 @@ def _lazy_model_tok():
 def _prompt_with_tags(user_prompt: str, n_shots: int, default_fps: int, default_len: int) -> str:
     return (
-        "You are a **cinematographer and storyboard artist**. "
         "Given a story idea, break it into a sequence of visually DISTINCT, DETAILED shots. "
-        "For each shot, provide **the objects in the scene, very specific camera placement, angle, subject position, lighting, and background details**. "
-        "Imagine you're describing frames for a film storyboard, NOT vague events.\n\n"
         "Return ONLY a JSON array enclosed between <JSON> and </JSON> tags.\n"
         f"Create a storyboard of {n_shots} shots for this idea:\n\n"
         f"'''{user_prompt}'''\n\n"
@@ -95,19 +95,12 @@ def _prompt_with_tags(user_prompt: str, n_shots: int, default_fps: int, default_
         "{\n"
         '  \"id\": <int starting at 1>,\n'
         '  \"title\": \"Short shot title\",\n'
-        '  \"description\": \"Highly specific visual description for image generation. Include camera angle, framing, time of day, subject position, lighting, mood, and background details. Be as descriptive as possible.\",\n'
         f"  \"duration\": {default_len},\n"
         f"  \"fps\": {default_fps},\n"
         "  \"steps\": 30,\n"
         "  \"seed\": null,\n"
-        '  \"negative\": \"\"\n'
-        "}\n\n"
-        "Example of good description:\n"
-        "{\n"
-        "  \"id\": 1,\n"
-        "  \"title\": \"Low angle car approach\",\n"
-        "  \"description\": \"A silver sedan drives towards the camera on a narrow mountain road at sunset. The camera is low to the ground near the center of the road, facing slightly upwards. Pine trees rise on both sides, and warm orange light hits the rocks. The car is centered, headlights on, creating dramatic shadows.\",\n"
-        "  ...\n"
         "}\n\n"
         "Output must start with <JSON> and end with </JSON>.\n"
     )
@@ -125,7 +118,7 @@ def _prompt_minimal(user_prompt: str, n_shots: int, default_fps: int, default_le
         f"  \"fps\": {default_fps},\n"
         "  \"steps\": 30,\n"
         "  \"seed\": null,\n"
-        '  "negative": ""\n'
         "}\n"
     )
@@ -288,10 +281,12 @@ def _save_keyframe(pid: str, shot_id: int, img: Image.Image) -> str:
     img.save(out)
     return out
-# ---- Temporal prompt composer ----
 def _compose_temporal_prompt(shots: list, idx: int, seconds_forward: int = 5) -> tuple[str, str]:
     """
-    Build a prompt that explicitly continues the scene N seconds later.
     Returns (composed_prompt, composed_negative).
     """
     curr = shots[idx]
@@ -305,17 +300,17 @@ def _compose_temporal_prompt(shots: list, idx: int, seconds_forward: int = 5) ->
     prev_desc = (prev.get("description") or "").strip()
     composed = (
-        f"Continue the exact same scene {seconds_forward} seconds later.\n"
-        f"Maintain continuity with the previous frame (composition, subject identity, camera lens and angle, lighting, color palette, time of day, environment).\n"
-        f"Previous frame description: \"{prev_desc}\"\n"
-        f"New moment to depict now: \"{curr_desc}\"\n"
-        f"Do NOT reset the scene; only natural progression over {seconds_forward} seconds."
     ).strip()
     negative = (
         curr_neg + (
-            "; scene reset; different subject identity; different environment; time jump; hard cut; "
-            "dramatic style shift; unrelated background; different camera make/lens"
         )
     ).strip("; ")
@@ -326,18 +321,19 @@ def generate_keyframe_image(
     pid: str,
     shot_idx: int,
     shots: list,
-    t2i_steps: int = 16,         # FLUX: 12–20
-    i2i_steps: int = 18,         # FLUX: 14–22
-    i2i_strength: float = 0.85,  # higher -> follow prompt more
-    guidance_scale: float = 3.0, # FLUX sweet spot: ~2.8–3.2
     width: int = 640,
     height: int = 640,
-    seconds_forward: int = 5     # temporal step
 ):
     """
     Generate image for shots[shot_idx] using FLUX only.
     - Shot 1: text2img
-    - Shot k>1: ALWAYS img2img from previous approved frame + temporal prompt ("N seconds later")
     """
     try:
         t2i, i2i = _lazy_flux_pipes()
@@ -365,25 +361,31 @@ def generate_keyframe_image(
     prev_path = shots[shot_idx - 1].get("image_path") if shot_idx > 0 else None
     use_prev = bool(shot_idx > 0 and prev_path and os.path.exists(prev_path))
     # generate
     if not use_prev:
         out = t2i(
             prompt=composed_prompt,
             negative_prompt=composed_negative or None,
-            num_inference_steps=int(max(8, t2i_steps)),
-            guidance_scale=float(max(2.0, guidance_scale)),
             generator=gen,
             width=width, height=height
         ).images[0]
     else:
-        init_image = Image.open(prev_path).convert("RGB")
         out = i2i(
             prompt=composed_prompt,
             negative_prompt=composed_negative or None,
             image=init_image,
-            strength=float(min(max(i2i_strength, 0.70), 0.95)),
-            num_inference_steps=int(max(12, i2i_steps)),
-            guidance_scale=float(max(2.0, guidance_scale)),
             generator=gen
         ).images[0]
@@ -423,7 +425,7 @@ with gr.Blocks() as demo:
     gr.Markdown(
         "Edit storyboard prompts, then generate keyframes.\n"
         "**Temporal chaining**: each new shot is generated N seconds later from the previous approved frame, "
-        "while respecting the current shot description. **Model**: FLUX-only."
     )
     # State
@@ -473,10 +475,11 @@ with gr.Blocks() as demo:
                 approve_next_btn = gr.Button("Approve & Next →", variant="secondary")
             with gr.Row():
-                img_strength = gr.Slider(0.50, 0.95, value=0.85, step=0.05, label="Change vs Consistency (img2img strength)")
-                img_steps    = gr.Slider(8, 28, value=18,  step=1,   label="Inference Steps (img2img)")
-                guidance     = gr.Slider(2.0, 4.0, value=3.0, step=0.1, label="Guidance Scale")
                 temporal_secs = gr.Slider(1, 10, value=5, step=1, label="Temporal step (seconds later)")
             with gr.Row():
                 prev_img = gr.Image(label="Previous approved image (conditioning)", type="filepath")
@@ -573,7 +576,7 @@ with gr.Blocks() as demo:
         outputs=[project, current_idx, shot_info_md, prompt_box, prev_img, out_img, kf_status, proj_seed_box]
     )
-    def on_generate_img(p, idx, current_prompt, i2i_strength_val, i2i_steps_val, guidance_val, seconds_forward_val):
         if p is None: raise gr.Error("No project.")
         shots = p["shots"]
         if idx < 0 or idx >= len(shots): raise gr.Error("Invalid shot index.")
@@ -583,20 +586,21 @@ with gr.Blocks() as demo:
             p["meta"]["id"],
             int(idx),
             shots,
-            t2i_steps=16,
             i2i_steps=int(i2i_steps_val),
             i2i_strength=float(i2i_strength_val),
             guidance_scale=float(guidance_val),
             width=640,
             height=640,
-            seconds_forward=int(seconds_forward_val)
         )
         prev_path = shots[idx-1]["image_path"] if idx > 0 else None
         return img_path, (prev_path or None), gr.update(value=f"Generated candidate for shot {shots[idx]['id']}.")
     gen_btn.click(
         on_generate_img,
-        inputs=[project, current_idx, prompt_box, img_strength, img_steps, guidance, temporal_secs],
         outputs=[out_img, prev_img, kf_status]
     )

+# app.py — FLUX-only with temporal chaining (5s later by default) + Aggressive follow option
 import os, json, uuid, re
 from datetime import datetime
 import gradio as gr
 def _prompt_with_tags(user_prompt: str, n_shots: int, default_fps: int, default_len: int) -> str:
     return (
+        "You are a cinematographer and storyboard artist. "
         "Given a story idea, break it into a sequence of visually DISTINCT, DETAILED shots. "
+        "For each shot, provide the objects in the scene, very specific camera placement, angle, subject position, lighting, and background details. "
+        "Imagine you're describing frames for a film storyboard, not vague events.\n\n"
         "Return ONLY a JSON array enclosed between <JSON> and </JSON> tags.\n"
         f"Create a storyboard of {n_shots} shots for this idea:\n\n"
         f"'''{user_prompt}'''\n\n"
         "{\n"
         '  \"id\": <int starting at 1>,\n'
         '  \"title\": \"Short shot title\",\n'
+        '  \"description\": \"Highly specific visual description for image generation. Include camera angle, framing, time of day, subject position, lighting, mood, and background details.\",\n'
         f"  \"duration\": {default_len},\n"
         f"  \"fps\": {default_fps},\n"
         "  \"steps\": 30,\n"
         "  \"seed\": null,\n"
+        '  \"negative\": \"\"\n"
         "}\n\n"
         "Output must start with <JSON> and end with </JSON>.\n"
     )
         f"  \"fps\": {default_fps},\n"
         "  \"steps\": 30,\n"
         "  \"seed\": null,\n"
+        '  \"negative\": \"\"\n"
         "}\n"
     )
     img.save(out)
     return out
+# ---- Temporal prompt composer (PRIORITIZE the new shot) ----
 def _compose_temporal_prompt(shots: list, idx: int, seconds_forward: int = 5) -> tuple[str, str]:
     """
+    Build a prompt that continues the scene N seconds later,
+    prioritizing the NEW shot description (composition/action),
+    while keeping only identity/lighting/environment continuity.
     Returns (composed_prompt, composed_negative).
     """
     curr = shots[idx]
     prev_desc = (prev.get("description") or "").strip()
     composed = (
+        f"Continue the same scene {seconds_forward} seconds later.\n"
+        f"PRIORITIZE this new moment and its composition now: \"{curr_desc}\".\n"
+        f"Keep continuity ONLY for subject identity, lighting palette, time of day, and general environment style.\n"
+        f"Previous frame (context only, do not copy its framing): \"{prev_desc}\".\n"
+        f"Avoid replicating the previous composition; allow camera move / subject reposition consistent with {seconds_forward} seconds of natural progression."
     ).strip()
     negative = (
         curr_neg + (
+            "; identical composition as previous; exact same framing; rigid pose repeat; freeze frame; "
+            "hard scene reset; different subject identity; wildly different art style; unrelated background"
         )
     ).strip("; ")
     pid: str,
     shot_idx: int,
     shots: list,
+    t2i_steps: int = 18,         # FLUX: 12–22
+    i2i_steps: int = 22,         # FLUX: 16–26
+    i2i_strength: float = 0.90,  # ↑ more change toward new prompt
+    guidance_scale: float = 3.4, # ↑ stronger text pull
     width: int = 640,
     height: int = 640,
+    seconds_forward: int = 5,    # temporal step
+    aggressive: bool = False     # optional push
 ):
     """
     Generate image for shots[shot_idx] using FLUX only.
     - Shot 1: text2img
+    - Shot k>1: img2img from previous approved frame + temporal prompt ("N seconds later")
     """
     try:
         t2i, i2i = _lazy_flux_pipes()
     prev_path = shots[shot_idx - 1].get("image_path") if shot_idx > 0 else None
     use_prev = bool(shot_idx > 0 and prev_path and os.path.exists(prev_path))
+    # Aggressive mode bumps
+    if aggressive:
+        i2i_strength = min(0.98, max(i2i_strength, 0.92))
+        guidance_scale = max(guidance_scale, 3.6)
+        i2i_steps = max(i2i_steps, 24)
     # generate
     if not use_prev:
         out = t2i(
             prompt=composed_prompt,
             negative_prompt=composed_negative or None,
+            num_inference_steps=int(max(10, t2i_steps)),
+            guidance_scale=float(max(2.4, guidance_scale)),
             generator=gen,
             width=width, height=height
         ).images[0]
     else:
+        init_image = Image.open(prev_path).convert("RGB")  # previous approved frame (the "init_image")
         out = i2i(
             prompt=composed_prompt,
             negative_prompt=composed_negative or None,
             image=init_image,
+            strength=float(min(max(i2i_strength, 0.70), 0.98)),
+            num_inference_steps=int(max(14, i2i_steps)),
+            guidance_scale=float(max(2.4, guidance_scale)),
             generator=gen
         ).images[0]
     gr.Markdown(
         "Edit storyboard prompts, then generate keyframes.\n"
         "**Temporal chaining**: each new shot is generated N seconds later from the previous approved frame, "
+        "while the current shot description drives composition & action. **Model**: FLUX-only."
     )
     # State
                 approve_next_btn = gr.Button("Approve & Next →", variant="secondary")
             with gr.Row():
+                img_strength = gr.Slider(0.50, 0.98, value=0.90, step=0.02, label="Change vs Consistency (img2img strength)")
+                img_steps    = gr.Slider(12, 28, value=22, step=1, label="Inference Steps (img2img)")
+                guidance     = gr.Slider(2.4, 4.0, value=3.4, step=0.1, label="Guidance Scale")
                 temporal_secs = gr.Slider(1, 10, value=5, step=1, label="Temporal step (seconds later)")
+                aggressive_follow = gr.Checkbox(value=False, label="Aggressive follow prompt (more change)")
             with gr.Row():
                 prev_img = gr.Image(label="Previous approved image (conditioning)", type="filepath")
         outputs=[project, current_idx, shot_info_md, prompt_box, prev_img, out_img, kf_status, proj_seed_box]
     )
+    def on_generate_img(p, idx, current_prompt, i2i_strength_val, i2i_steps_val, guidance_val, seconds_forward_val, aggressive_val):
         if p is None: raise gr.Error("No project.")
         shots = p["shots"]
         if idx < 0 or idx >= len(shots): raise gr.Error("Invalid shot index.")
             p["meta"]["id"],
             int(idx),
             shots,
+            t2i_steps=18,
             i2i_steps=int(i2i_steps_val),
             i2i_strength=float(i2i_strength_val),
             guidance_scale=float(guidance_val),
             width=640,
             height=640,
+            seconds_forward=int(seconds_forward_val),
+            aggressive=bool(aggressive_val)
         )
         prev_path = shots[idx-1]["image_path"] if idx > 0 else None
         return img_path, (prev_path or None), gr.update(value=f"Generated candidate for shot {shots[idx]['id']}.")
     gen_btn.click(
         on_generate_img,
+        inputs=[project, current_idx, prompt_box, img_strength, img_steps, guidance, temporal_secs, aggressive_follow],
         outputs=[out_img, prev_img, kf_status]
     )