Spaces:

MoonMath-ai
/

Prompt-2-Video

Running on Zero

App Files Files Community

Shalmoni commited on Oct 5

Commit

184ddd2

verified ·

1 Parent(s): a4f9434

Update app.py

Browse files

Files changed (1) hide show

app.py +132 -60

app.py CHANGED Viewed

@@ -41,8 +41,8 @@ def ensure_project(p, suggested_name="Project"):
     name = f"{suggested_name}-{pid[:4]}"
     proj = {
         "meta": {"id": pid, "name": name, "created": now_iso(), "updated": now_iso()},
-        "shots": [],    # each shot: id,title,description,duration,fps,steps,seed,negative,image_path?
-        "clips": []
     }
     save_project(proj)
     return proj
@@ -65,19 +65,17 @@ def _lazy_model_tok():
     _tokenizer = AutoTokenizer.from_pretrained(STORYBOARD_MODEL, trust_remote_code=True)
-    # Choose a dtype that works both locally and on ZeroGPU
     use_cuda = torch.cuda.is_available()
-    preferred_dtype = torch.float16 if use_cuda else torch.float32  # torch.bfloat16 is also fine if supported
     _model = AutoModelForCausalLM.from_pretrained(
         STORYBOARD_MODEL,
         device_map="auto",
-        torch_dtype=preferred_dtype,   # ✅ FIXED: use torch_dtype
         trust_remote_code=True,
         use_safetensors=True
     )
-    # Ensure pad token to avoid warnings
     if _tokenizer.pad_token_id is None and _tokenizer.eos_token_id is not None:
         _tokenizer.pad_token_id = _tokenizer.eos_token_id
@@ -115,7 +113,7 @@ def _prompt_minimal(user_prompt: str, n_shots: int, default_fps: int, default_le
         f"  \"fps\": {default_fps},\n"
         "  \"steps\": 30,\n"
         "  \"seed\": null,\n"
-        '  \"negative\": \"\"\n'
         "}\n"
     )
@@ -130,7 +128,6 @@ def _apply_chat(tok, system_msg: str, user_msg: str) -> str:
     return system_msg + "\n\n" + user_msg
 def _generate_text(model, tok, prompt_text: str) -> str:
-    """Decode only the continuation (avoid prompt echo)."""
     inputs = tok(prompt_text, return_tensors="pt")
     inputs = {k: v.to(model.device) for k, v in inputs.items()}
     eos_id = tok.eos_token_id or tok.pad_token_id
@@ -192,13 +189,11 @@ def generate_storyboard_with_llm(user_prompt: str, n_shots: int, default_fps: in
     model, tok = _lazy_model_tok()
     system = "You are a film previsualization assistant. Output must be valid JSON."
-    # Pass 1
     p1 = _apply_chat(tok, system + " Return ONLY JSON inside <JSON> tags.",
                      _prompt_with_tags(user_prompt, n_shots, default_fps, default_len))
     out1 = _generate_text(model, tok, p1)
     json_text = _extract_json_array(out1)
-    # Pass 2
     if not json_text:
         p2 = _apply_chat(tok, system + " Reply ONLY with a JSON array.",
                          _prompt_minimal(user_prompt, n_shots, default_fps, default_len))
@@ -209,7 +204,6 @@ def generate_storyboard_with_llm(user_prompt: str, n_shots: int, default_fps: in
             if start != -1 and end != -1 and end > start:
                 json_text = out2[start:end+1].strip()
-    # Empty fallback
     if not json_text or not json_text.strip():
         fallback = []
         for i in range(1, int(n_shots) + 1):
@@ -235,9 +229,8 @@ def generate_storyboard_with_llm(user_prompt: str, n_shots: int, default_fps: in
     return _normalize_shots(shots_raw, default_fps, default_len)
 # =========================
-# IMAGE GEN (ZeroGPU) — SD1.5 text2img + img2img chaining
 # =========================
 from diffusers import StableDiffusionPipeline, StableDiffusionImg2ImgPipeline
 SD_MODEL = os.getenv("SD_MODEL", "stabilityai/sd-turbo")
@@ -245,20 +238,13 @@ _sd_t2i = None
 _sd_i2i = None
 def _lazy_sd_pipes():
-    """
-    Load SD once in a version-safe way:
-      - torch_dtype (not dtype)
-      - low_cpu_mem_usage=False to avoid offload_state_dict kwarg
-      - no revision pin (some repos don't have 'fp16' branch)
-      - optional HF token if set (for gated models)
-    """
     global _sd_t2i, _sd_i2i
     if _sd_t2i is not None and _sd_i2i is not None:
         return _sd_t2i, _sd_i2i
     use_cuda = torch.cuda.is_available()
     dtype = torch.float16 if use_cuda else torch.float32
-    hf_token = os.getenv("HF_TOKEN", None)  # add this in Space Secrets only if needed
     _sd_t2i = StableDiffusionPipeline.from_pretrained(
         SD_MODEL,
@@ -297,51 +283,75 @@ def generate_keyframe_image(
     pid: str,
     shot_idx: int,
     shots: list,
-    guidance_scale: float = 7.5,
-    strength: float = 0.35
 ):
     """
     Generate image for shots[shot_idx].
-    - shot 0: text2img
-    - shot k>0: img2img using previous approved image as conditioning (if available)
     """
     t2i, i2i = _lazy_sd_pipes()
     shot = shots[shot_idx]
-    prompt = shot.get("description", "")
     negative = shot.get("negative") or ""
-    steps = int(shot.get("steps", 30))
-    seed = shot.get("seed", None)
-    gen = torch.Generator("cuda" if torch.cuda.is_available() else "cpu")
     if isinstance(seed, int):
-        gen = gen.manual_seed(seed)
     if shot_idx == 0 or not shots[shot_idx - 1].get("image_path"):
         out = t2i(
             prompt=prompt,
             negative_prompt=negative,
             guidance_scale=guidance_scale,
-            num_inference_steps=steps,
-            generator=gen
         ).images[0]
     else:
-        prev_path = shots[shot_idx - 1]["image_path"]
-        init_image = Image.open(prev_path).convert("RGB")
-        out = i2i(
-            prompt=prompt,
-            negative_prompt=negative,
-            image=init_image,
-            guidance_scale=guidance_scale,
-            strength=strength,
-            num_inference_steps=steps,
-            generator=gen
-        ).images[0]
     saved_path = _save_keyframe(pid, int(shot["id"]), out)
     return saved_path
 # =========================
-# Shots <-> Dataframe utils
 # =========================
 SHOT_COLUMNS = ["id", "title", "description", "duration", "fps", "steps", "seed", "negative", "image_path"]
@@ -370,7 +380,7 @@ def df_to_shots(df: pd.DataFrame) -> list:
 # =========================
 with gr.Blocks() as demo:
     gr.Markdown("# 🎬 Storyboard → Keyframes → Videos → Export")
-    gr.Markdown("**Edit storyboard prompts**, then generate keyframes. Each next shot uses the **previous approved image** as reference.")
     # State
     project = gr.State(None)
@@ -406,6 +416,8 @@ with gr.Blocks() as demo:
                 label="Edit shots below (prompts & params)", wrap=True
             )
             save_edits_btn = gr.Button("Save Edits ✓", variant="primary", interactive=False)
             to_keyframes_btn = gr.Button("Start Keyframes →", variant="secondary")
         with gr.Tab("Keyframes"):
@@ -415,6 +427,11 @@ with gr.Blocks() as demo:
             with gr.Row():
                 gen_btn = gr.Button("Generate / Regenerate", variant="primary")
                 approve_next_btn = gr.Button("Approve & Next →", variant="secondary")
             with gr.Row():
                 prev_img = gr.Image(label="Previous approved image (conditioning)", type="filepath")
                 out_img  = gr.Image(label="Generated image", type="filepath")
@@ -453,10 +470,7 @@ with gr.Blocks() as demo:
         outputs=[project, shots_df, sb_status, save_edits_btn]
     )
-    # Defensive save handler (works even if user clicks too early)
-    def on_save_edits(*args):
-        p = args[0] if len(args) > 0 else None
-        df = args[1] if len(args) > 1 else None
         if p is None:
             raise gr.Error("No project in memory. Click New Project, then generate a storyboard.")
         if df is None:
@@ -470,28 +484,79 @@ with gr.Blocks() as demo:
     save_edits_btn.click(on_save_edits, inputs=[project, shots_df], outputs=[project, sb_status])
-    def on_start_keyframes(p, df):
         if p is None: raise gr.Error("No project.")
         shots = df_to_shots(df)
         if not shots: raise gr.Error("Storyboard is empty.")
-        p = dict(p); p["shots"] = shots; p["meta"]["updated"] = now_iso(); save_project(p)
         idx = 0
         prev_path = None
-        info = f"**Shot {shots[idx]['id']} — {shots[idx]['title']}**  \nDuration: {shots[idx]['duration']}s @ {shots[idx]['fps']} fps"
-        return p, 0, gr.update(value=info), gr.update(value=shots[idx]["description"]), gr.update(value=prev_path), gr.update(value=None), gr.update(value="Ready to generate shot 1.")
-    to_keyframes_btn.click(on_start_keyframes, inputs=[project, shots_df], outputs=[project, current_idx, shot_info_md, prompt_box, prev_img, out_img, kf_status])
-    def on_generate_img(p, idx, current_prompt):
         if p is None: raise gr.Error("No project.")
         shots = p["shots"]
         if idx < 0 or idx >= len(shots): raise gr.Error("Invalid shot index.")
-        shots[idx]["description"] = current_prompt  # allow tweaking before generation
         prev_path = shots[idx-1]["image_path"] if idx > 0 else None
-        img_path = generate_keyframe_image(p["meta"]["id"], int(idx), shots)
         return img_path, (prev_path or None), gr.update(value=f"Generated candidate for shot {shots[idx]['id']}.")
-    gen_btn.click(on_generate_img, inputs=[project, current_idx, prompt_box], outputs=[out_img, prev_img, kf_status])
     def on_approve_next(p, idx, current_prompt, latest_img_path):
         if p is None: raise gr.Error("No project.")
@@ -499,6 +564,7 @@ with gr.Blocks() as demo:
         i = int(idx)
         if i < 0 or i >= len(shots): raise gr.Error("Invalid shot index.")
         if not latest_img_path: raise gr.Error("Generate an image first.")
         # commit
         shots[i]["description"] = current_prompt
         shots[i]["image_path"] = latest_img_path
@@ -509,7 +575,11 @@ with gr.Blocks() as demo:
         # next
         if i + 1 < len(shots):
             ni = i + 1
-            info = f"**Shot {shots[ni]['id']} — {shots[ni]['title']}**  \nDuration: {shots[ni]['duration']}s @ {shots[ni]['fps']} fps"
             prev_path = shots[ni-1]["image_path"]
             return p, ni, gr.update(value=info), gr.update(value=shots[ni]["description"]), gr.update(value=prev_path), gr.update(value=None), gr.update(value=f"Approved shot {shots[i]['id']}. On to shot {shots[ni]['id']}.")
         else:
@@ -527,13 +597,15 @@ with gr.Blocks() as demo:
     def on_load(file_obj):
         p = load_project_file(file_obj)
         return (
             p,
             gr.update(value=f"Loaded project `{p['meta']['name']}` (id: `{p['meta']['id']}`)"),
             shots_to_df(p.get("shots", [])),
         )
-    load_btn.click(on_load, inputs=[load_file], outputs=[project, sb_status, shots_df])
 if __name__ == "__main__":
     demo.launch()

     name = f"{suggested_name}-{pid[:4]}"
     proj = {
         "meta": {"id": pid, "name": name, "created": now_iso(), "updated": now_iso()},
+        "shots": [],    # each shot: id,title,description,duration,fps,steps,seed,negative,image_path
+        "clips": [],
     }
     save_project(proj)
     return proj
     _tokenizer = AutoTokenizer.from_pretrained(STORYBOARD_MODEL, trust_remote_code=True)
     use_cuda = torch.cuda.is_available()
+    preferred_dtype = torch.float16 if use_cuda else torch.float32
     _model = AutoModelForCausalLM.from_pretrained(
         STORYBOARD_MODEL,
         device_map="auto",
+        torch_dtype=preferred_dtype,     # <- correct kwarg
         trust_remote_code=True,
         use_safetensors=True
     )
     if _tokenizer.pad_token_id is None and _tokenizer.eos_token_id is not None:
         _tokenizer.pad_token_id = _tokenizer.eos_token_id
         f"  \"fps\": {default_fps},\n"
         "  \"steps\": 30,\n"
         "  \"seed\": null,\n"
+        '  \"negative\": \"\"\n"
         "}\n"
     )
     return system_msg + "\n\n" + user_msg
 def _generate_text(model, tok, prompt_text: str) -> str:
     inputs = tok(prompt_text, return_tensors="pt")
     inputs = {k: v.to(model.device) for k, v in inputs.items()}
     eos_id = tok.eos_token_id or tok.pad_token_id
     model, tok = _lazy_model_tok()
     system = "You are a film previsualization assistant. Output must be valid JSON."
     p1 = _apply_chat(tok, system + " Return ONLY JSON inside <JSON> tags.",
                      _prompt_with_tags(user_prompt, n_shots, default_fps, default_len))
     out1 = _generate_text(model, tok, p1)
     json_text = _extract_json_array(out1)
     if not json_text:
         p2 = _apply_chat(tok, system + " Reply ONLY with a JSON array.",
                          _prompt_minimal(user_prompt, n_shots, default_fps, default_len))
             if start != -1 and end != -1 and end > start:
                 json_text = out2[start:end+1].strip()
     if not json_text or not json_text.strip():
         fallback = []
         for i in range(1, int(n_shots) + 1):
     return _normalize_shots(shots_raw, default_fps, default_len)
 # =========================
+# IMAGE GEN (ZeroGPU) — sd-turbo t2i + img2img chaining
 # =========================
 from diffusers import StableDiffusionPipeline, StableDiffusionImg2ImgPipeline
 SD_MODEL = os.getenv("SD_MODEL", "stabilityai/sd-turbo")
 _sd_i2i = None
 def _lazy_sd_pipes():
     global _sd_t2i, _sd_i2i
     if _sd_t2i is not None and _sd_i2i is not None:
         return _sd_t2i, _sd_i2i
     use_cuda = torch.cuda.is_available()
     dtype = torch.float16 if use_cuda else torch.float32
+    hf_token = os.getenv("HF_TOKEN", None)
     _sd_t2i = StableDiffusionPipeline.from_pretrained(
         SD_MODEL,
     pid: str,
     shot_idx: int,
     shots: list,
+    t2i_steps: int = 6,          # first shot
+    i2i_steps: int = 10,         # subsequent shots
+    i2i_strength: float = 0.65,  # change vs consistency
+    guidance_scale: float = 0.5,
+    width: int = 512,
+    height: int = 512
 ):
     """
     Generate image for shots[shot_idx].
+    - shot 0: text2img (few steps)
+    - shot k>0: img2img from previous approved image with higher strength/steps
+    Seed is kept SAME across all shots (stored in shots[i]['seed']).
     """
     t2i, i2i = _lazy_sd_pipes()
     shot = shots[shot_idx]
+    prompt   = (shot.get("description") or "").strip()
     negative = shot.get("negative") or ""
+    seed     = shot.get("seed", None)
+    device = "cuda" if torch.cuda.is_available() else "cpu"
+    gen = torch.Generator(device)
     if isinstance(seed, int):
+        gen = gen.manual_seed(int(seed))
+    width  = max(256, min(1024, int(width)))
+    height = max(256, min(1024, int(height)))
     if shot_idx == 0 or not shots[shot_idx - 1].get("image_path"):
         out = t2i(
             prompt=prompt,
             negative_prompt=negative,
             guidance_scale=guidance_scale,
+            num_inference_steps=int(max(1, t2i_steps)),
+            generator=gen,
+            width=width,
+            height=height
         ).images[0]
     else:
+        prev_path = shots[shot_idx - 1].get("image_path")
+        if prev_path and os.path.exists(prev_path):
+            init_image = Image.open(prev_path).convert("RGB")
+            strength = float(i2i_strength)
+            strength = min(max(strength, 0.50), 0.90)
+            out = i2i(
+                prompt=prompt,
+                negative_prompt=negative,
+                image=init_image,
+                guidance_scale=guidance_scale,
+                strength=strength,
+                num_inference_steps=int(max(2, i2i_steps)),
+                generator=gen
+            ).images[0]
+        else:
+            out = t2i(
+                prompt=prompt,
+                negative_prompt=negative,
+                guidance_scale=guidance_scale,
+                num_inference_steps=int(max(1, t2i_steps)),
+                generator=gen,
+                width=width,
+                height=height
+            ).images[0]
     saved_path = _save_keyframe(pid, int(shot["id"]), out)
     return saved_path
 # =========================
+# Shots <-> DataFrame utils
 # =========================
 SHOT_COLUMNS = ["id", "title", "description", "duration", "fps", "steps", "seed", "negative", "image_path"]
 # =========================
 with gr.Blocks() as demo:
     gr.Markdown("# 🎬 Storyboard → Keyframes → Videos → Export")
+    gr.Markdown("Edit storyboard prompts, then generate keyframes. Shots 2+ use the previous approved image for consistency. A single project seed is locked for a cohesive look.")
     # State
     project = gr.State(None)
                 label="Edit shots below (prompts & params)", wrap=True
             )
             save_edits_btn = gr.Button("Save Edits ✓", variant="primary", interactive=False)
+            with gr.Row():
+                proj_seed_box = gr.Number(label="Project Seed (locked across shots)", precision=0)
             to_keyframes_btn = gr.Button("Start Keyframes →", variant="secondary")
         with gr.Tab("Keyframes"):
             with gr.Row():
                 gen_btn = gr.Button("Generate / Regenerate", variant="primary")
                 approve_next_btn = gr.Button("Approve & Next →", variant="secondary")
+            # tuning controls
+            with gr.Row():
+                img_strength = gr.Slider(0.40, 0.90, value=0.65, step=0.05, label="Change vs Consistency (img2img strength)")
+                img_steps    = gr.Slider(4, 20, value=10,  step=1,   label="Img2Img Steps")
+                guidance     = gr.Slider(0.0, 2.0, value=0.5, step=0.05, label="Guidance Scale")
             with gr.Row():
                 prev_img = gr.Image(label="Previous approved image (conditioning)", type="filepath")
                 out_img  = gr.Image(label="Generated image", type="filepath")
         outputs=[project, shots_df, sb_status, save_edits_btn]
     )
+    def on_save_edits(p, df):
         if p is None:
             raise gr.Error("No project in memory. Click New Project, then generate a storyboard.")
         if df is None:
     save_edits_btn.click(on_save_edits, inputs=[project, shots_df], outputs=[project, sb_status])
+    def on_start_keyframes(p, df, proj_seed_override):
         if p is None: raise gr.Error("No project.")
         shots = df_to_shots(df)
         if not shots: raise gr.Error("Storyboard is empty.")
+        # lock a single seed for the project:
+        proj_seed = None
+        # override if user supplied:
+        if proj_seed_override not in [None, ""] and str(proj_seed_override).isdigit():
+            proj_seed = int(proj_seed_override)
+        # otherwise use existing project meta seed or find one in shots:
+        if proj_seed is None:
+            proj_seed = p.get("meta", {}).get("seed", None)
+        if proj_seed is None:
+            for s in shots:
+                if isinstance(s.get("seed"), int):
+                    proj_seed = int(s["seed"])
+                    break
+        if proj_seed is None:
+            proj_seed = int(torch.randint(0, 2**31 - 1, (1,)).item())
+        # apply to all shots missing seed
+        for s in shots:
+            if not isinstance(s.get("seed"), int):
+                s["seed"] = proj_seed
+        p = dict(p)
+        p["shots"] = shots
+        p["meta"]["seed"] = proj_seed
+        p["meta"]["updated"] = now_iso()
+        save_project(p)
         idx = 0
         prev_path = None
+        info = (
+            f"**Shot {shots[idx]['id']} — {shots[idx]['title']}**  \n"
+            f"Duration: {shots[idx]['duration']}s @ {shots[idx]['fps']} fps  \n"
+            f"Locked project seed: `{proj_seed}`"
+        )
+        return p, 0, gr.update(value=info), gr.update(value=shots[idx]["description"]), gr.update(value=prev_path), gr.update(value=None), gr.update(value=f"Ready to generate shot 1."), gr.update(value=proj_seed)
+    to_keyframes_btn.click(
+        on_start_keyframes,
+        inputs=[project, shots_df, proj_seed_box],
+        outputs=[project, current_idx, shot_info_md, prompt_box, prev_img, out_img, kf_status, proj_seed_box]
+    )
+    def on_generate_img(p, idx, current_prompt, i2i_strength_val, i2i_steps_val, guidance_val):
         if p is None: raise gr.Error("No project.")
         shots = p["shots"]
         if idx < 0 or idx >= len(shots): raise gr.Error("Invalid shot index.")
+        shots[idx]["description"] = current_prompt  # allow tweaking
         prev_path = shots[idx-1]["image_path"] if idx > 0 else None
+        img_path = generate_keyframe_image(
+            p["meta"]["id"],
+            int(idx),
+            shots,
+            t2i_steps=6,
+            i2i_steps=int(i2i_steps_val),
+            i2i_strength=float(i2i_strength_val),
+            guidance_scale=float(guidance_val),
+            width=512,
+            height=512
+        )
         return img_path, (prev_path or None), gr.update(value=f"Generated candidate for shot {shots[idx]['id']}.")
+    gen_btn.click(
+        on_generate_img,
+        inputs=[project, current_idx, prompt_box, img_strength, img_steps, guidance],
+        outputs=[out_img, prev_img, kf_status]
+    )
     def on_approve_next(p, idx, current_prompt, latest_img_path):
         if p is None: raise gr.Error("No project.")
         i = int(idx)
         if i < 0 or i >= len(shots): raise gr.Error("Invalid shot index.")
         if not latest_img_path: raise gr.Error("Generate an image first.")
         # commit
         shots[i]["description"] = current_prompt
         shots[i]["image_path"] = latest_img_path
         # next
         if i + 1 < len(shots):
             ni = i + 1
+            info = (
+                f"**Shot {shots[ni]['id']} — {shots[ni]['title']}**  \n"
+                f"Duration: {shots[ni]['duration']}s @ {shots[ni]['fps']} fps  \n"
+                f"Locked project seed: `{p['meta'].get('seed')}`"
+            )
             prev_path = shots[ni-1]["image_path"]
             return p, ni, gr.update(value=info), gr.update(value=shots[ni]["description"]), gr.update(value=prev_path), gr.update(value=None), gr.update(value=f"Approved shot {shots[i]['id']}. On to shot {shots[ni]['id']}.")
         else:
     def on_load(file_obj):
         p = load_project_file(file_obj)
+        seed_val = p.get("meta", {}).get("seed", None)
         return (
             p,
             gr.update(value=f"Loaded project `{p['meta']['name']}` (id: `{p['meta']['id']}`)"),
             shots_to_df(p.get("shots", [])),
+            gr.update(value=seed_val)
         )
+    load_btn.click(on_load, inputs=[load_file], outputs=[project, sb_status, shots_df, proj_seed_box])
 if __name__ == "__main__":
     demo.launch()