Spaces:

MoonMath-ai
/

StitchTool

Running

App Files Files Community

Shalmoni commited on Sep 17

Commit

0a73965

verified ·

1 Parent(s): 809f869

Update app.py

Browse files

Files changed (1) hide show

app.py +197 -89

app.py CHANGED Viewed

@@ -13,7 +13,7 @@ HORDE_STATUS = "https://stablehorde.net/api/v2/generate/status/{id}"
 # HF Space secret recommended for priority
 HORDE_API_KEY = os.getenv("HORDE_API_KEY", "")
-CLIENT_AGENT = "StitchMaster/0.1 (https://huggingface.co/spaces/your-space)"
 DEFAULT_STEPS = 24
 DEFAULT_W = 704   # keep defaults under the 715px threshold
@@ -29,19 +29,111 @@ def _headers():
         "apikey": HORDE_API_KEY if HORDE_API_KEY else "0000000000"
     }
 # =========================
-# Horde client with debugging
 # =========================
-def horde_txt2img(prompt: str,
-                  steps: int = DEFAULT_STEPS,
-                  width: int = DEFAULT_W,
-                  height: int = DEFAULT_H,
-                  model: Optional[str] = MODEL):
     dbg = []
-    if not prompt or not prompt.strip():
         raise gr.Error("Please enter a prompt.")
     payload = {
         "prompt": prompt.strip(),
         "params": {
@@ -52,49 +144,29 @@ def horde_txt2img(prompt: str,
         },
         "nsfw": False,
         "censor_nsfw": True,
-        # Ask Horde to return a CDN URL if available (many deployments support this)
         "r2": True
     }
     if model:
         payload["models"] = [model]
-    # -------- Submit (with KudosUpfront fallback) --------
     try:
-        submit = requests.post(HORDE_URL, json=payload, headers=_headers(), timeout=30)
-        # Auto-fallback if KudosUpfront required
-        if submit.status_code == 403:
-            try:
-                body = submit.json()
-            except Exception:
-                body = {"message": submit.text}
-            msg = (body.get("message") or "").lower()
-            rc = body.get("rc") or ""
-            if "kudos" in msg or rc == "KudosUpfront":
-                # Clamp params to stay under upfront limits
-                payload["params"]["steps"]  = min(int(payload["params"]["steps"]), 30)
-                payload["params"]["width"]  = min(int(payload["params"]["width"]), 704)
-                payload["params"]["height"] = min(int(payload["params"]["height"]), 704)
-                dbg.append("Fallback applied: steps<=30, width/height<=704. Retrying submit...")
-                submit = requests.post(HORDE_URL, json=payload, headers=_headers(), timeout=30)
-        dbg.append(f"SUBMIT status={submit.status_code}")
         if submit.status_code >= 300:
             dbg.append(f"SUBMIT body={submit.text[:500]}")
             submit.raise_for_status()
         submit_j = submit.json()
         job_id = submit_j.get("id")
         if not job_id:
             dbg.append(f"SUBMIT json={submit_j}")
             raise gr.Error("Horde submit succeeded but no job id returned.")
         dbg.append(f"JOB id={job_id}")
     except Exception:
         dbg.append("SUBMIT exception:\n" + traceback.format_exc())
         return None, "\n".join(dbg)
-    # -------- Poll --------
     start = time.time()
     while True:
         try:
@@ -105,7 +177,6 @@ def horde_txt2img(prompt: str,
                 status_r.raise_for_status()
             s = status_r.json()
-            # progress info
             k = s.get("kudos", "?")
             queue = s.get("queue_position", "?")
             eta = s.get("wait_time", "?")
@@ -125,7 +196,7 @@ def horde_txt2img(prompt: str,
                 dbg.append(f"GEN keys: {list(g0.keys())}")
                 dbg.append(f"img_type: {g0.get('img_type')}")
-                # Prefer URL fields if present
                 url = g0.get("r2") or g0.get("url") or g0.get("src") or g0.get("image_url")
                 if isinstance(url, str) and (url.startswith("http://") or url.startswith("https://")):
                     dbg.append("Found URL in generation → fetching…")
@@ -133,31 +204,17 @@ def horde_txt2img(prompt: str,
                         r = requests.get(url, timeout=60)
                         r.raise_for_status()
                         img_bytes = r.content
                     except Exception as e:
                         dbg.append(f"URL fetch failed: {type(e).__name__}: {e}")
                         return None, "\n".join(dbg)
-                    return _decode_bytes_to_image(img_bytes, dbg)
-                # Else fall back to base64 field
                 b64 = g0.get("img")
                 if not b64:
                     dbg.append("No 'img' field present.")
                     return None, "\n".join(dbg)
-                # If 'img' looks like URL text (rare), just fetch it
-                if b64.startswith("http://") or b64.startswith("https://"):
-                    dbg.append("img field is a URL string → fetching…")
-                    try:
-                        r = requests.get(b64, timeout=60)
-                        r.raise_for_status()
-                        img_bytes = r.content
-                    except Exception as e:
-                        dbg.append(f"URL fetch failed: {type(e).__name__}: {e}")
-                        return None, "\n".join(dbg)
-                    return _decode_bytes_to_image(img_bytes, dbg)
-                # Base64 path
-                # 1) fix base64 padding if needed
                 pad = (-len(b64)) % 4
                 if pad:
                     b64 = b64 + ("=" * pad)
@@ -165,11 +222,9 @@ def horde_txt2img(prompt: str,
                     img_bytes = base64.b64decode(b64, validate=False)
                 except binascii.Error as e:
                     dbg.append(f"Base64 decode error: {e}")
-                    # Try to interpret as text (maybe it's a URL encoded in base64)
                     try:
-                        txt = base64.b64decode(b64 + "==", validate=False).decode("utf-8", "ignore").strip()
                         if txt.startswith("http"):
-                            dbg.append("Base64 decoded to text URL → fetching…")
                             r = requests.get(txt, timeout=60)
                             r.raise_for_status()
                             img_bytes = r.content
@@ -192,18 +247,14 @@ def horde_txt2img(prompt: str,
             return None, "\n".join(dbg)
 def _decode_bytes_to_image(img_bytes: bytes, dbg: list[str]):
-    # Log header
     head = img_bytes[:12]
     dbg.append(f"header bytes: {head.hex(' ')}")
-    # Try Pillow first
     try:
         img = Image.open(io.BytesIO(img_bytes)).convert("RGB")
         return img, "\n".join(dbg)
     except Exception as e:
         dbg.append(f"PIL decode failed: {type(e).__name__}: {e}")
-    # Fallback: imageio
     try:
         arr = imageio.imread(io.BytesIO(img_bytes))
         if isinstance(arr, np.ndarray):
@@ -217,7 +268,6 @@ def _decode_bytes_to_image(img_bytes: bytes, dbg: list[str]):
     except Exception as e:
         dbg.append(f"imageio decode failed: {type(e).__name__}: {e}")
-    # Last resort: save bytes for inspection
     try:
         tmp = f"unknown_img_{int(time.time())}.bin"
         with open(tmp, "wb") as f:
@@ -228,17 +278,33 @@ def _decode_bytes_to_image(img_bytes: bytes, dbg: list[str]):
     return None, "\n".join(dbg)
-def generate_image(prompt, steps, size):
-    # size like "704x704"
-    try:
-        w, h = [int(x.strip()) for x in size.lower().split("x")]
-    except Exception:
-        w, h = DEFAULT_W, DEFAULT_H
-    img, debug = horde_txt2img(prompt, steps=steps, width=w, height=h)
     if img is None:
         gr.Warning("Generation failed. See debug log for details.")
     return img, debug
 # =========================
 # UI
 # =========================
@@ -267,8 +333,9 @@ CUSTOM_CSS = """
 }
 """
-with gr.Blocks(css=CUSTOM_CSS, title="Image Checkpoints – Stable Horde") as demo:
-    gr.Markdown("### Image Checkpoints (Stable Horde)\nGenerate per-prompt frames. If a run fails, check the debug panel.")
     with gr.Row():
         steps = gr.Slider(8, 50, value=DEFAULT_STEPS, step=1, label="Steps (quality/time)")
@@ -277,31 +344,72 @@ with gr.Blocks(css=CUSTOM_CSS, title="Image Checkpoints – Stable Horde") as de
             value=f"{DEFAULT_W}x{DEFAULT_H}",
             label="Resolution"
         )
     # Shared debug panel
     debug_box = gr.Code(label="Debug log", interactive=False)
-    prompt_boxes, gen_buttons, img_outputs = [], [], []
-    for i in range(1, 5):
-        with gr.Row():
-            with gr.Column(scale=1, min_width=320):
-                p = gr.Textbox(
-                    placeholder=f"Prompt input (Image {i})",
-                    lines=4,
-                    label=None,
-                    elem_classes=["prompt-box"]
-                )
-                b = gr.Button(f"Generate image {i}", elem_classes=["pill"])
-            with gr.Column(scale=2, min_width=380):
-                img = gr.Image(label=f"Image {i} output", type="pil", elem_classes=["image-out"])
-        prompt_boxes.append(p); gen_buttons.append(b); img_outputs.append(img)
-    for i in range(4):
-        gen_buttons[i].click(
-            fn=generate_image,
-            inputs=[prompt_boxes[i], steps, size],
-            outputs=[img_outputs[i], debug_box]
-        )
 if __name__ == "__main__":
     demo.queue().launch()

 # HF Space secret recommended for priority
 HORDE_API_KEY = os.getenv("HORDE_API_KEY", "")
+CLIENT_AGENT = "StitchMaster/0.2 (https://huggingface.co/spaces/your-space)"
 DEFAULT_STEPS = 24
 DEFAULT_W = 704   # keep defaults under the 715px threshold
         "apikey": HORDE_API_KEY if HORDE_API_KEY else "0000000000"
     }
+def pil_to_b64(img_pil: Image.Image) -> str:
+    buf = io.BytesIO()
+    img_pil.save(buf, format="PNG")
+    return base64.b64encode(buf.getvalue()).decode("utf-8")
+def build_prompt(user_text: str, is_first: bool, lock_longshot: bool = True) -> str:
+    """Compose continuity-aware prompt text."""
+    user_text = (user_text or "").strip()
+    longshot_plus = (
+        "single continuous long shot; no cuts or new shot; no angle switch; "
+        "smooth camera motion (pan/tilt/zoom only); unbroken continuity"
+    )
+    if is_first:
+        base = f"Opening frame. {user_text}" if user_text else "Opening frame."
+        if lock_longshot:
+            base += ". " + longshot_plus
+        return base
+    # Subsequent frames
+    base = (
+        "Treat the previous frame as a still from the same continuous long shot. "
+        "Maintain style, subject identity, lighting, and camera continuity. "
+        f"Generate the next moment: {user_text if user_text else 'advance the action naturally.'}"
+    )
+    if lock_longshot:
+        base += ". " + longshot_plus
+    return base
 # =========================
+# Horde client with debugging (txt2img OR img2img)
 # =========================
+def horde_generate(
+    prompt: str,
+    steps: int = DEFAULT_STEPS,
+    width: int = DEFAULT_W,
+    height: int = DEFAULT_H,
+    model: Optional[str] = MODEL,
+    init_image: Optional[Image.Image] = None,
+    denoise: float = 0.45,                # 0.0 = identical, 1.0 = big change
+):
+    """
+    If init_image is provided, tries img2img first (source_image + source_processing='img2img').
+    Falls back to txt2img if Horde rejects it.
+    """
     dbg = []
+    if not (prompt and prompt.strip()):
         raise gr.Error("Please enter a prompt.")
+    def _submit(payload):
+        sub = requests.post(HORDE_URL, json=payload, headers=_headers(), timeout=30)
+        # Auto-fallback if KudosUpfront required
+        if sub.status_code == 403:
+            try:
+                body = sub.json()
+            except Exception:
+                body = {"message": sub.text}
+            msg = (body.get("message") or "").lower()
+            rc = body.get("rc") or ""
+            if "kudos" in msg or rc == "KudosUpfront":
+                payload["params"]["steps"]  = min(int(payload["params"]["steps"]), 30)
+                payload["params"]["width"]  = min(int(payload["params"]["width"]), 704)
+                payload["params"]["height"] = min(int(payload["params"]["height"]), 704)
+                dbg.append("Fallback applied: steps<=30, width/height<=704. Retrying submit...")
+                sub = requests.post(HORDE_URL, json=payload, headers=_headers(), timeout=30)
+        return sub
+    # ---- try img2img if init_image provided ----
+    tried_img2img = False
+    if init_image is not None:
+        tried_img2img = True
+        payload = {
+            "prompt": prompt.strip(),
+            "params": {
+                "steps": int(steps),
+                "width": int(width),
+                "height": int(height),
+                "n": 1,
+                "denoise": float(denoise)
+            },
+            "nsfw": False,
+            "censor_nsfw": True,
+            "source_processing": "img2img",
+            "source_image": pil_to_b64(init_image),
+            "r2": True
+        }
+        if model:
+            payload["models"] = [model]
+        try:
+            submit = _submit(payload)
+            dbg.append(f"SUBMIT (img2img) status={submit.status_code}")
+            if submit.status_code >= 300:
+                dbg.append(f"SUBMIT body={submit.text[:500]}")
+                submit.raise_for_status()
+            submit_j = submit.json()
+            job_id = submit_j.get("id")
+            if not job_id:
+                dbg.append(f"SUBMIT json={submit_j}")
+                raise gr.Error("Horde submit succeeded but no job id returned.")
+            dbg.append(f"JOB id={job_id}")
+            # Poll & decode
+            return _poll_and_decode(job_id, dbg)
+        except Exception:
+            dbg.append("IMG2IMG path failed, falling back to text-only:\n" + traceback.format_exc())
+    # ---- txt2img path ----
     payload = {
         "prompt": prompt.strip(),
         "params": {
         },
         "nsfw": False,
         "censor_nsfw": True,
         "r2": True
     }
     if model:
         payload["models"] = [model]
     try:
+        submit = _submit(payload)
+        dbg.append(f"SUBMIT (txt2img{', after img2img fail' if tried_img2img else ''}) status={submit.status_code}")
         if submit.status_code >= 300:
             dbg.append(f"SUBMIT body={submit.text[:500]}")
             submit.raise_for_status()
         submit_j = submit.json()
         job_id = submit_j.get("id")
         if not job_id:
             dbg.append(f"SUBMIT json={submit_j}")
             raise gr.Error("Horde submit succeeded but no job id returned.")
         dbg.append(f"JOB id={job_id}")
+        return _poll_and_decode(job_id, dbg)
     except Exception:
         dbg.append("SUBMIT exception:\n" + traceback.format_exc())
         return None, "\n".join(dbg)
+def _poll_and_decode(job_id: str, dbg: list[str]):
     start = time.time()
     while True:
         try:
                 status_r.raise_for_status()
             s = status_r.json()
             k = s.get("kudos", "?")
             queue = s.get("queue_position", "?")
             eta = s.get("wait_time", "?")
                 dbg.append(f"GEN keys: {list(g0.keys())}")
                 dbg.append(f"img_type: {g0.get('img_type')}")
+                # Prefer URL if present
                 url = g0.get("r2") or g0.get("url") or g0.get("src") or g0.get("image_url")
                 if isinstance(url, str) and (url.startswith("http://") or url.startswith("https://")):
                     dbg.append("Found URL in generation → fetching…")
                         r = requests.get(url, timeout=60)
                         r.raise_for_status()
                         img_bytes = r.content
+                        return _decode_bytes_to_image(img_bytes, dbg)
                     except Exception as e:
                         dbg.append(f"URL fetch failed: {type(e).__name__}: {e}")
                         return None, "\n".join(dbg)
+                # Base64 branch
                 b64 = g0.get("img")
                 if not b64:
                     dbg.append("No 'img' field present.")
                     return None, "\n".join(dbg)
                 pad = (-len(b64)) % 4
                 if pad:
                     b64 = b64 + ("=" * pad)
                     img_bytes = base64.b64decode(b64, validate=False)
                 except binascii.Error as e:
                     dbg.append(f"Base64 decode error: {e}")
                     try:
+                        txt = base64.b64decode(b64 + '==', validate=False).decode("utf-8", "ignore").strip()
                         if txt.startswith("http"):
                             r = requests.get(txt, timeout=60)
                             r.raise_for_status()
                             img_bytes = r.content
             return None, "\n".join(dbg)
 def _decode_bytes_to_image(img_bytes: bytes, dbg: list[str]):
     head = img_bytes[:12]
     dbg.append(f"header bytes: {head.hex(' ')}")
     try:
         img = Image.open(io.BytesIO(img_bytes)).convert("RGB")
         return img, "\n".join(dbg)
     except Exception as e:
         dbg.append(f"PIL decode failed: {type(e).__name__}: {e}")
     try:
         arr = imageio.imread(io.BytesIO(img_bytes))
         if isinstance(arr, np.ndarray):
     except Exception as e:
         dbg.append(f"imageio decode failed: {type(e).__name__}: {e}")
     try:
         tmp = f"unknown_img_{int(time.time())}.bin"
         with open(tmp, "wb") as f:
     return None, "\n".join(dbg)
+# =========================
+# Gradio glue
+# =========================
+def generate_opening(prompt_text, steps, size, lock):
+    w, h = _parse_size(size)
+    prompt = build_prompt(prompt_text, is_first=True, lock_longshot=lock)
+    img, debug = horde_generate(prompt, steps=steps, width=w, height=h, init_image=None)
     if img is None:
         gr.Warning("Generation failed. See debug log for details.")
     return img, debug
+def generate_next(prompt_text, steps, size, lock, prev_img, change):
+    w, h = _parse_size(size)
+    prompt = build_prompt(prompt_text, is_first=False, lock_longshot=lock)
+    init_img = prev_img if isinstance(prev_img, Image.Image) else None
+    img, debug = horde_generate(prompt, steps=steps, width=w, height=h, init_image=init_img, denoise=float(change))
+    if img is None:
+        gr.Warning("Generation failed. See debug log for details.")
+    return img, debug
+def _parse_size(s):
+    try:
+        w, h = [int(x.strip()) for x in str(s).lower().split("x")]
+    except Exception:
+        w, h = DEFAULT_W, DEFAULT_H
+    return w, h
 # =========================
 # UI
 # =========================
 }
 """
+with gr.Blocks(css=CUSTOM_CSS, title="Image Checkpoints – Stable Horde (txt2img + img2img)") as demo:
+    gr.Markdown("### Image Checkpoints (Stable Horde) — Opening shot + next scenes\n"
+                "Image 2–4 use the previous output as the init image (img2img) with a continuity slider.")
     with gr.Row():
         steps = gr.Slider(8, 50, value=DEFAULT_STEPS, step=1, label="Steps (quality/time)")
             value=f"{DEFAULT_W}x{DEFAULT_H}",
             label="Resolution"
         )
+        lock = gr.Checkbox(value=True, label="Lock camera (long shot, no cuts)")
+    # Continuity / denoise slider for img2img steps (2–4)
+    change = gr.Slider(0.05, 0.95, value=0.45, step=0.05, label="Change from previous frame (denoise)")
     # Shared debug panel
     debug_box = gr.Code(label="Debug log", interactive=False)
+    # ---- Row 1: Opening shot ----
+    with gr.Row():
+        with gr.Column(scale=1, min_width=320):
+            p1 = gr.Textbox(
+                placeholder="Describe the opening shot",
+                lines=4,
+                label=None,
+                elem_classes=["prompt-box"]
+            )
+            b1 = gr.Button("Generate image 1", elem_classes=["pill"])
+        with gr.Column(scale=2, min_width=380):
+            img1 = gr.Image(label="Image 1 output", type="pil", elem_classes=["image-out"])
+    # ---- Row 2: Next scene ----
+    with gr.Row():
+        with gr.Column(scale=1, min_width=320):
+            p2 = gr.Textbox(
+                placeholder="Describe the next scene",
+                lines=4,
+                label=None,
+                elem_classes=["prompt-box"]
+            )
+            b2 = gr.Button("Generate image 2", elem_classes=["pill"])
+        with gr.Column(scale=2, min_width=380):
+            img2 = gr.Image(label="Image 2 output", type="pil", elem_classes=["image-out"])
+    # ---- Row 3: Next scene ----
+    with gr.Row():
+        with gr.Column(scale=1, min_width=320):
+            p3 = gr.Textbox(
+                placeholder="Describe the next scene",
+                lines=4,
+                label=None,
+                elem_classes=["prompt-box"]
+            )
+            b3 = gr.Button("Generate image 3", elem_classes=["pill"])
+        with gr.Column(scale=2, min_width=380):
+            img3 = gr.Image(label="Image 3 output", type="pil", elem_classes=["image-out"])
+    # ---- Row 4: Next scene ----
+    with gr.Row():
+        with gr.Column(scale=1, min_width=320):
+            p4 = gr.Textbox(
+                placeholder="Describe the next scene",
+                lines=4,
+                label=None,
+                elem_classes=["prompt-box"]
+            )
+            b4 = gr.Button("Generate image 4", elem_classes=["pill"])
+        with gr.Column(scale=2, min_width=380):
+            img4 = gr.Image(label="Image 4 output", type="pil", elem_classes=["image-out"])
+    # Wire callbacks
+    b1.click(fn=generate_opening, inputs=[p1, steps, size, lock], outputs=[img1, debug_box])
+    b2.click(fn=generate_next,     inputs=[p2, steps, size, lock, img1, change], outputs=[img2, debug_box])
+    b3.click(fn=generate_next,     inputs=[p3, steps, size, lock, img2, change], outputs=[img3, debug_box])
+    b4.click(fn=generate_next,     inputs=[p4, steps, size, lock, img3, change], outputs=[img4, debug_box])
 if __name__ == "__main__":
     demo.queue().launch()