Spaces:

MoonMath-ai
/

Prompt-2-Video

Running on Zero

App Files Files Community

Shalmoni commited on Oct 15

Commit

5362213

verified ·

1 Parent(s): 76a13c3

Update app.py

Browse files

Files changed (1) hide show

app.py +60 -101

app.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# app.py
 import os, json, uuid, re
 from datetime import datetime
 import gradio as gr
@@ -247,22 +247,17 @@ def generate_storyboard_with_llm(user_prompt: str, n_shots: int, default_fps: in
     return _normalize_shots(shots_raw, default_fps, default_len)
 # =========================
-# IMAGE GEN — FLUX first, SD-Turbo fallback
 # =========================
 USE_CUDA = torch.cuda.is_available()
 DTYPE = torch.float16 if USE_CUDA else torch.float32
-FLUX_MODEL = os.getenv("FLUX_MODEL", "black-forest-labs/FLUX.1-Nano")  # or "black-forest-labs/FLUX.1-dev"
-SD_MODEL   = os.getenv("SD_MODEL", "stabilityai/sd-turbo")
 _flux_t2i = None
 _flux_i2i = None
-_sd_t2i = None
-_sd_i2i = None
-_have_flux = None
 def _lazy_flux_pipes():
-    # Returns (t2i, i2i) or raises
     from diffusers import FluxPipeline, FluxImg2ImgPipeline
     global _flux_t2i, _flux_i2i
     if _flux_t2i is not None and _flux_i2i is not None:
@@ -273,40 +268,15 @@ def _lazy_flux_pipes():
     if USE_CUDA: _flux_i2i = _flux_i2i.to("cuda")
     return _flux_t2i, _flux_i2i
-def _lazy_sd_pipes():
-    # Returns (t2i, i2i)
-    from diffusers import StableDiffusionPipeline, StableDiffusionImg2ImgPipeline
-    global _sd_t2i, _sd_i2i
-    if _sd_t2i is not None and _sd_i2i is not None:
-        return _sd_t2i, _sd_i2i
-    hf_token = os.getenv("HF_TOKEN", None)
-    _sd_t2i = StableDiffusionPipeline.from_pretrained(
-        SD_MODEL, torch_dtype=DTYPE, safety_checker=None, feature_extractor=None,
-        use_safetensors=True, low_cpu_mem_usage=False, token=hf_token
-    )
-    if USE_CUDA: _sd_t2i = _sd_t2i.to("cuda")
-    _sd_i2i = StableDiffusionImg2ImgPipeline(
-        vae=_sd_t2i.vae, text_encoder=_sd_t2i.text_encoder, tokenizer=_sd_t2i.tokenizer,
-        unet=_sd_t2i.unet, scheduler=_sd_t2i.scheduler,
-        safety_checker=None, feature_extractor=None
-    )
-    if USE_CUDA: _sd_i2i = _sd_i2i.to("cuda")
-    return _sd_t2i, _sd_i2i
-def _try_get_pipes():
-    """Prefer FLUX; fall back to SD-Turbo. Returns (mode, t2i, i2i) where mode in {'flux','sd'}."""
-    global _have_flux
-    if _have_flux is None:
-        try:
-            t2i, i2i = _lazy_flux_pipes()
-            _have_flux = True
-            return "flux", t2i, i2i
-        except Exception as e:
-            _have_flux = False
-    if _have_flux:
-        return "flux", *_lazy_flux_pipes()
-    else:
-        return "sd", *_lazy_sd_pipes()
 def _save_keyframe(pid: str, shot_id: int, img: Image.Image) -> str:
     pdir = project_dir(pid)
@@ -316,42 +286,47 @@ def _save_keyframe(pid: str, shot_id: int, img: Image.Image) -> str:
 def _significant_change(curr_desc: str, prev_desc: str) -> bool:
     """
-    Heuristic: if symmetric difference of tokens is large -> treat as a new scene,
-    so we should text2img (seed keeps style) instead of img2img.
     """
     if not prev_desc: return True
     a = set(re.findall(r"\w+", curr_desc.lower()))
     b = set(re.findall(r"\w+", prev_desc.lower()))
-    # weights: boost composition-y words
     comp_words = {"wide","close","low","high","overhead","aerial","profile","left","right","center",
                   "portrait","landscape","long","establishing","macro","tilt","dutch","angle",
                   "night","day","sunset","sunrise","noon","backlit","rim","key","fill"}
     delta = a.symmetric_difference(b)
     score = len(delta) + 2 * len((a ^ b) & comp_words)
-    return score >= 12  # tune threshold 10–16
 @spaces.GPU(duration=180)
 def generate_keyframe_image(
     pid: str,
     shot_idx: int,
     shots: list,
-    t2i_steps: int = 14,         # FLUX likes 12–20
-    i2i_steps: int = 16,
-    i2i_strength: float = 0.8,   # higher = follow prompt more
-    guidance_scale: float = 3.0, # FLUX sweet spot ~2.5–3.5
     width: int = 640,
     height: int = 640
 ):
     """
-    Generate image for shots[shot_idx].
     - shot 0: text2img
     - shot k>0: smart chaining
         * if significant change: text2img (same seed for style)
         * else: img2img from previous approved image
     """
-    mode, t2i, i2i = _try_get_pipes()
-    shot = shots[shot_idx]
     prompt   = (shot.get("description") or "").strip()
     negative = shot.get("negative") or ""
     seed     = shot.get("seed", None)
@@ -373,49 +348,27 @@ def generate_keyframe_image(
         prev_desc = shots[shot_idx - 1].get("description") or ""
         use_prev = not _significant_change(prompt, prev_desc)
-    # invoke
-    if mode == "flux":
-        if not use_prev:
-            out = t2i(
-                prompt=prompt,
-                negative_prompt=negative or None,
-                num_inference_steps=int(max(8, t2i_steps)),
-                guidance_scale=float(max(2.0, guidance_scale)),
-                generator=gen,
-                width=width, height=height
-            ).images[0]
-        else:
-            init_image = Image.open(prev_path).convert("RGB")
-            out = i2i(
-                prompt=prompt,
-                negative_prompt=negative or None,
-                image=init_image,
-                strength=float(min(max(i2i_strength, 0.5), 0.95)),
-                num_inference_steps=int(max(10, i2i_steps)),
-                guidance_scale=float(max(2.0, guidance_scale)),
-                generator=gen
-            ).images[0]
     else:
-        # SD-turbo fallback (keep your original behavior but with less mushy defaults)
-        if not use_prev:
-            out = t2i(
-                prompt=prompt,
-                negative_prompt=negative,
-                guidance_scale=1.0,
-                num_inference_steps=int(max(6, t2i_steps//2)),
-                generator=gen,
-                width=width, height=height
-            ).images[0]
-        else:
-            init_image = Image.open(prev_path).convert("RGB")
-            out = i2i(
-                prompt=prompt,
-                negative_prompt=negative,
-                image=init_image,
-                strength=float(min(max(i2i_strength, 0.55), 0.9)),
-                num_inference_steps=int(max(8, i2i_steps//2)),
-                generator=gen
-            ).images[0]
     saved_path = _save_keyframe(pid, int(shot["id"]), out)
     return saved_path
@@ -449,8 +402,13 @@ def df_to_shots(df: pd.DataFrame) -> list:
 # Gradio UI
 # =========================
 with gr.Blocks() as demo:
-    gr.Markdown("# 🎬 Storyboard → Keyframes → Videos → Export")
-    gr.Markdown("Edit storyboard prompts, then generate keyframes. **Smart chaining**: only reuse the previous image if the new prompt is similar; otherwise we regenerate from text with the same seed for style consistency.")
     # State
     project = gr.State(None)
@@ -497,10 +455,10 @@ with gr.Blocks() as demo:
             with gr.Row():
                 gen_btn = gr.Button("Generate / Regenerate", variant="primary")
                 approve_next_btn = gr.Button("Approve & Next →", variant="secondary")
-            # tuning controls (defaults tuned for FLUX; fallback will downshift)
             with gr.Row():
-                img_strength = gr.Slider(0.50, 0.95, value=0.80, step=0.05, label="Change vs Consistency (img2img strength)")
-                img_steps    = gr.Slider(8, 28, value=16,  step=1,   label="Inference Steps (img2img)")
                 guidance     = gr.Slider(2.0, 4.0, value=3.0, step=0.1, label="Guidance Scale")
             with gr.Row():
                 prev_img = gr.Image(label="Previous approved image (conditioning)", type="filepath")
@@ -607,7 +565,7 @@ with gr.Blocks() as demo:
             p["meta"]["id"],
             int(idx),
             shots,
-            t2i_steps=14,  # tuned for FLUX
             i2i_steps=int(i2i_steps_val),
             i2i_strength=float(i2i_strength_val),
             guidance_scale=float(guidance_val),
@@ -673,4 +631,5 @@ with gr.Blocks() as demo:
     load_btn.click(on_load, inputs=[load_file], outputs=[project, sb_status, shots_df, proj_seed_box])
 if __name__ == "__main__":
     demo.launch()

+# app.py (FLUX-only, smart chaining)
 import os, json, uuid, re
 from datetime import datetime
 import gradio as gr
     return _normalize_shots(shots_raw, default_fps, default_len)
 # =========================
+# IMAGE GEN — FLUX only (no fallback)
 # =========================
 USE_CUDA = torch.cuda.is_available()
 DTYPE = torch.float16 if USE_CUDA else torch.float32
+FLUX_MODEL = os.getenv("FLUX_MODEL", "black-forest-labs/FLUX.1-Nano")
 _flux_t2i = None
 _flux_i2i = None
 def _lazy_flux_pipes():
     from diffusers import FluxPipeline, FluxImg2ImgPipeline
     global _flux_t2i, _flux_i2i
     if _flux_t2i is not None and _flux_i2i is not None:
     if USE_CUDA: _flux_i2i = _flux_i2i.to("cuda")
     return _flux_t2i, _flux_i2i
+def _flux_healthcheck():
+    try:
+        _lazy_flux_pipes()
+        return True
+    except Exception as e:
+        raise RuntimeError(
+            f"FLUX failed to initialize: {e}\n"
+            f"FLUX_MODEL='{FLUX_MODEL}'. If the repo is gated/private, set HF_TOKEN in env."
+        )
 def _save_keyframe(pid: str, shot_id: int, img: Image.Image) -> str:
     pdir = project_dir(pid)
 def _significant_change(curr_desc: str, prev_desc: str) -> bool:
     """
+    If token-level symmetric difference is large, treat as a new scene:
+    do text2img (same seed) instead of img2img to avoid 'mush'.
     """
     if not prev_desc: return True
     a = set(re.findall(r"\w+", curr_desc.lower()))
     b = set(re.findall(r"\w+", prev_desc.lower()))
     comp_words = {"wide","close","low","high","overhead","aerial","profile","left","right","center",
                   "portrait","landscape","long","establishing","macro","tilt","dutch","angle",
                   "night","day","sunset","sunrise","noon","backlit","rim","key","fill"}
     delta = a.symmetric_difference(b)
     score = len(delta) + 2 * len((a ^ b) & comp_words)
+    return score >= 10  # more eager to break chaining
 @spaces.GPU(duration=180)
 def generate_keyframe_image(
     pid: str,
     shot_idx: int,
     shots: list,
+    t2i_steps: int = 16,         # FLUX: 12–20
+    i2i_steps: int = 18,         # FLUX: 14–22
+    i2i_strength: float = 0.85,  # higher -> follow prompt more
+    guidance_scale: float = 3.0, # FLUX sweet spot: ~2.8–3.2
     width: int = 640,
     height: int = 640
 ):
     """
+    Generate image for shots[shot_idx] using FLUX only.
     - shot 0: text2img
     - shot k>0: smart chaining
         * if significant change: text2img (same seed for style)
         * else: img2img from previous approved image
     """
+    try:
+        t2i, i2i = _lazy_flux_pipes()
+    except Exception as e:
+        raise gr.Error(
+            f"FLUX failed to load: {e}\n"
+            "Set FLUX_MODEL (e.g., 'black-forest-labs/FLUX.1-Nano') and ensure HF_TOKEN if required."
+        )
+    shot = shots[shot_idx]
     prompt   = (shot.get("description") or "").strip()
     negative = shot.get("negative") or ""
     seed     = shot.get("seed", None)
         prev_desc = shots[shot_idx - 1].get("description") or ""
         use_prev = not _significant_change(prompt, prev_desc)
+    # generate
+    if not use_prev:
+        out = t2i(
+            prompt=prompt,
+            negative_prompt=negative or None,
+            num_inference_steps=int(max(8, t2i_steps)),
+            guidance_scale=float(max(2.0, guidance_scale)),
+            generator=gen,
+            width=width, height=height
+        ).images[0]
     else:
+        init_image = Image.open(prev_path).convert("RGB")
+        out = i2i(
+            prompt=prompt,
+            negative_prompt=negative or None,
+            image=init_image,
+            strength=float(min(max(i2i_strength, 0.5), 0.95)),
+            num_inference_steps=int(max(10, i2i_steps)),
+            guidance_scale=float(max(2.0, guidance_scale)),
+            generator=gen
+        ).images[0]
     saved_path = _save_keyframe(pid, int(shot["id"]), out)
     return saved_path
 # Gradio UI
 # =========================
 with gr.Blocks() as demo:
+    gr.Markdown("# 🎬 Storyboard → Keyframes → (Videos soon) → Export")
+    gr.Markdown(
+        "Edit storyboard prompts, then generate keyframes. "
+        "**Smart chaining**: only reuse the previous image if the new prompt is similar; "
+        "otherwise we regenerate from text with the same seed for style consistency.  "
+        "**Model**: FLUX-only."
+    )
     # State
     project = gr.State(None)
             with gr.Row():
                 gen_btn = gr.Button("Generate / Regenerate", variant="primary")
                 approve_next_btn = gr.Button("Approve & Next →", variant="secondary")
+            # tuning controls (defaults tuned for FLUX)
             with gr.Row():
+                img_strength = gr.Slider(0.50, 0.95, value=0.85, step=0.05, label="Change vs Consistency (img2img strength)")
+                img_steps    = gr.Slider(8, 28, value=18,  step=1,   label="Inference Steps (img2img)")
                 guidance     = gr.Slider(2.0, 4.0, value=3.0, step=0.1, label="Guidance Scale")
             with gr.Row():
                 prev_img = gr.Image(label="Previous approved image (conditioning)", type="filepath")
             p["meta"]["id"],
             int(idx),
             shots,
+            t2i_steps=16,
             i2i_steps=int(i2i_steps_val),
             i2i_strength=float(i2i_strength_val),
             guidance_scale=float(guidance_val),
     load_btn.click(on_load, inputs=[load_file], outputs=[project, sb_status, shots_df, proj_seed_box])
 if __name__ == "__main__":
+    _flux_healthcheck()  # fail fast with clear error if FLUX isn't available
     demo.launch()