scribble-sdxl

Build error

App Files Files Community

joeaa17 commited on Oct 13, 2025

Commit

217e2c7

verified ·

1 Parent(s): 6f5220b

Update app.py

Browse files

Files changed (1) hide show

app.py +60 -48

app.py CHANGED Viewed

@@ -1,24 +1,32 @@
 #!/usr/bin/env python
-import os, random, numpy as np, cv2, torch
-import gradio as gr
 from pathlib import Path
-from PIL import Image, ImageOps
-import PIL.Image
 import spaces
 from diffusers import (
     ControlNetModel,
     StableDiffusionXLControlNetPipeline,
     AutoencoderKL,
     EulerAncestralDiscreteScheduler,
 )
-from controlnet_aux import HEDdetector
-from gradio_imageslider import ImageSlider
 # ──────────────────────────────────────────────────────────────────────────────
-# Small JS helper to force dark theme (kept from your version)
 # ──────────────────────────────────────────────────────────────────────────────
 js_func = """
 function refresh() {
     const url = new URL(window.location);
@@ -29,20 +37,18 @@ function refresh() {
 }
 """
-# ──────────────────────────────────────────────────────────────────────────────
-# UI text
-# ──────────────────────────────────────────────────────────────────────────────
 DESCRIPTION = '''# Scribble SDXL 🖋️🌄 — live updates
-Sketch → image with SDXL ControlNet (scribble/canny). Now with **auto re-inference** when you draw or tweak settings (debounced).
-Models: [xinsir/controlnet-scribble-sdxl-1.0], [xinsir/controlnet-canny-sdxl-1.0], base [stabilityai/stable-diffusion-xl-base-1.0]
 '''
 if not torch.cuda.is_available():
     DESCRIPTION += "\n<p>Running on CPU 🥶 This demo is intended for GPU Spaces for good latency.</p>"
 # ──────────────────────────────────────────────────────────────────────────────
-# Styles (unchanged, but refactored into a compact mapping)
 # ──────────────────────────────────────────────────────────────────────────────
 style_list = [
     {
         "name": "(No style)",
@@ -61,7 +67,7 @@ style_list = [
     },
     {
         "name": "Anime",
-        "prompt": "anime artwork {prompt} . anime style, key visual, vibrant, studio anime,  highly detailed",
         "negative_prompt": "photo, deformed, black and white, realism, disfigured, low contrast",
     },
     {
@@ -106,6 +112,7 @@ def apply_style(style_name: str, positive: str, negative: str = "") -> tuple[str
 # ──────────────────────────────────────────────────────────────────────────────
 # Utilities
 # ──────────────────────────────────────────────────────────────────────────────
 def HWC3(x: np.ndarray) -> np.ndarray:
     assert x.dtype == np.uint8
     if x.ndim == 2:
@@ -137,46 +144,58 @@ def nms(x, t, s):
     return z
 def clamp_size_to_megapixels(w: int, h: int, max_mpx: float = 1.0) -> tuple[int, int]:
-    """Scale so that w*h ≈ max_mpx*1e6 (default ~1024x1024 area)."""
     area = w * h
     target = max_mpx * 1_000_000.0
     if area <= target:
-        return w, h
     r = (target / area) ** 0.5
-    return max(64, int(w * r)) // 8 * 8, max(64, int(h * r)) // 8 * 8  # SDXL likes multiples of 8
 # ─────────────────────────────────────────────────��────────────────────────────
-# Load models once
 # ──────────────────────────────────────────────────────────────────────────────
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 scheduler = EulerAncestralDiscreteScheduler.from_pretrained(
-    "stabilityai/stable-diffusion-xl-base-1.0", subfolder="scheduler"
 )
 controlnet_scribble = ControlNetModel.from_pretrained(
-    "xinsir/controlnet-scribble-sdxl-1.0", torch_dtype=torch.float16 if device.type=="cuda" else torch.float32
 )
 controlnet_canny = ControlNetModel.from_pretrained(
-    "xinsir/controlnet-canny-sdxl-1.0", torch_dtype=torch.float16 if device.type=="cuda" else torch.float32
 )
 vae = AutoencoderKL.from_pretrained(
-    "madebyollin/sdxl-vae-fp16-fix", torch_dtype=torch.float16 if device.type=="cuda" else torch.float32
 )
 pipe_scribble = StableDiffusionXLControlNetPipeline.from_pretrained(
     "stabilityai/stable-diffusion-xl-base-1.0",
     controlnet=controlnet_scribble,
     vae=vae,
-    torch_dtype=torch.float16 if device.type=="cuda" else torch.float32,
     scheduler=scheduler,
 )
 pipe_canny = StableDiffusionXLControlNetPipeline.from_pretrained(
     "stabilityai/stable-diffusion-xl-base-1.0",
     controlnet=controlnet_canny,
     vae=vae,
-    torch_dtype=torch.float16 if device.type=="cuda" else torch.float32,
     scheduler=scheduler,
 )
 for p in (pipe_scribble, pipe_canny):
@@ -192,12 +211,12 @@ MAX_SEED = np.iinfo(np.int32).max
 hed = HEDdetector.from_pretrained("lllyasviel/Annotators")
 # ──────────────────────────────────────────────────────────────────────────────
-# Core inference
 # ──────────────────────────────────────────────────────────────────────────────
-def _prepare_control_image(image_editor_value, use_hed: bool, use_canny: bool) -> Image.Image:
     """
-    Accepts the dict from gr.ImageEditor (contains 'composite'), or a PIL.Image.
-    Returns a PIL.Image with control map (scribble/canny/hed result).
     """
     if image_editor_value is None:
         return None
@@ -209,7 +228,6 @@ def _prepare_control_image(image_editor_value, use_hed: bool, use_canny: bool) -
     else:
         return None
-    # Convert to RGB for detectors
     if img.mode != "RGB":
         img = img.convert("RGB")
@@ -224,14 +242,12 @@ def _prepare_control_image(image_editor_value, use_hed: bool, use_canny: bool) -
         control = np.array(control)
         control = nms(control, 127, 3)
         control = cv2.GaussianBlur(control, (0, 0), 3)
-        # Simulate human sketch width with a soft random threshold
-        thr = int(round(random.uniform(0.01, 0.10), 2) * 255)
         control[control > thr] = 255
         control[control < 255] = 0
         return Image.fromarray(control)
-    # Default: use the editor composite as "scribble"
     return img
 def _image_size_from_editor(image_editor_value, target_mpx=1.0) -> tuple[int, int]:
@@ -256,6 +272,10 @@ def _maybe_seed(seed: int):
 def randomize_seed_fn(seed: int, randomize_seed: bool) -> int:
     return random.randint(0, MAX_SEED) if randomize_seed else int(seed)
 @spaces.GPU
 def run(
     image,                        # dict from ImageEditor or PIL.Image
@@ -265,7 +285,7 @@ def run(
     num_steps: int = 12,
     guidance_scale: float = 5.0,
     controlnet_conditioning_scale: float = 1.0,
-    seed: int = 0,
     use_hed: bool = False,
     use_canny: bool = False,
     progress=gr.Progress(track_tqdm=True),
@@ -273,13 +293,10 @@ def run(
     if image is None or (isinstance(prompt, str) and prompt.strip() == ""):
         return (None, None)
-    # Prepare control image + target size (≈1MP for speed)
     ctrl_img = _prepare_control_image(image, use_hed=use_hed, use_canny=use_canny)
-    w, h = _image_size_from_editor(image, target_mpx=1.0)
-    # Style injection
     prompt_styled, neg_styled = apply_style(style_name, prompt, negative_prompt or "")
     g = _maybe_seed(seed)
     pipe = _pick_pipe(use_canny)
@@ -294,16 +311,12 @@ def run(
         width=w, height=h,
     ).images[0]
-    # Return (control, output) for ImageSlider
-    if isinstance(ctrl_img, Image.Image):
-        ci = ctrl_img
-    else:
-        ci = Image.fromarray(ctrl_img) if ctrl_img is not None else None
-    return (ci, out)
 # ──────────────────────────────────────────────────────────────────────────────
-# UI (with live updates wired via .change on inputs)
 # ──────────────────────────────────────────────────────────────────────────────
 with gr.Blocks(css="style.css", js=js_func, title="Scribble SDXL — Live") as demo:
     gr.Markdown(DESCRIPTION, elem_id="description")
@@ -347,13 +360,12 @@ with gr.Blocks(css="style.css", js=js_func, title="Scribble SDXL — Live") as d
     ]
     outputs = [image_slider]
-    # Manual "Run" flow (seed randomization, clear slider, then infer)
     run_button.click(
         fn=randomize_seed_fn, inputs=[seed, randomize_seed], outputs=seed, queue=False, api_name=False
     ).then(lambda: None, inputs=None, outputs=image_slider).then(fn=run, inputs=inputs, outputs=outputs)
-    # ── Live re-inference hooks (debounced) ───────────────────────────────────
-    # Fire when drawing or tweaking settings. 'every' = debounce seconds.
     for comp in [image, prompt, negative_prompt, style, num_steps, guidance_scale,
                  controlnet_conditioning_scale, seed, use_hed, use_canny]:
         comp.change(fn=run, inputs=inputs, outputs=outputs, every=0.5, queue=True)

 #!/usr/bin/env python
+# -*- coding: utf-8 -*-
+import os
+import random
 from pathlib import Path
+import cv2
+import numpy as np
+import PIL.Image
+import torch
+import gradio as gr
 import spaces
+from PIL import Image
+from gradio_imageslider import ImageSlider
+from controlnet_aux import HEDdetector
 from diffusers import (
     ControlNetModel,
     StableDiffusionXLControlNetPipeline,
     AutoencoderKL,
     EulerAncestralDiscreteScheduler,
 )
 # ──────────────────────────────────────────────────────────────────────────────
+# UI text / theme helper
 # ──────────────────────────────────────────────────────────────────────────────
 js_func = """
 function refresh() {
     const url = new URL(window.location);
 }
 """
 DESCRIPTION = '''# Scribble SDXL 🖋️🌄 — live updates
+Sketch → image with SDXL ControlNet (scribble/canny). Auto re-infers when you draw or tweak settings (debounced).
+Models: **xinsir/controlnet-scribble-sdxl-1.0**, **xinsir/controlnet-canny-sdxl-1.0**, base **stabilityai/stable-diffusion-xl-base-1.0**.
 '''
 if not torch.cuda.is_available():
     DESCRIPTION += "\n<p>Running on CPU 🥶 This demo is intended for GPU Spaces for good latency.</p>"
 # ──────────────────────────────────────────────────────────────────────────────
+# Styles
 # ──────────────────────────────────────────────────────────────────────────────
 style_list = [
     {
         "name": "(No style)",
     },
     {
         "name": "Anime",
+        "prompt": "anime artwork {prompt} . anime style, key visual, vibrant, studio anime, highly detailed",
         "negative_prompt": "photo, deformed, black and white, realism, disfigured, low contrast",
     },
     {
 # ──────────────────────────────────────────────────────────────────────────────
 # Utilities
 # ──────────────────────────────────────────────────────────────────────────────
 def HWC3(x: np.ndarray) -> np.ndarray:
     assert x.dtype == np.uint8
     if x.ndim == 2:
     return z
 def clamp_size_to_megapixels(w: int, h: int, max_mpx: float = 1.0) -> tuple[int, int]:
+    """Scale so that w*h ≈ max_mpx*1e6 (default ~1024x1024 area). SDXL prefers multiples of 8."""
     area = w * h
     target = max_mpx * 1_000_000.0
     if area <= target:
+        return (w // 8) * 8, (h // 8) * 8
     r = (target / area) ** 0.5
+    return max(64, int(w * r)) // 8 * 8, max(64, int(h * r)) // 8 * 8
 # ─────────────────────────────────────────────────��────────────────────────────
+# Models (use dtype= and use_safetensors=True to avoid offload_state_dict issue)
 # ──────────────────────────────────────────────────────────────────────────────
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+DTYPE = torch.float16 if device.type == "cuda" else torch.float32
 scheduler = EulerAncestralDiscreteScheduler.from_pretrained(
+    "stabilityai/stable-diffusion-xl-base-1.0",
+    subfolder="scheduler",
+    use_safetensors=True,
 )
 controlnet_scribble = ControlNetModel.from_pretrained(
+    "xinsir/controlnet-scribble-sdxl-1.0",
+    use_safetensors=True,
+    dtype=DTYPE,
 )
 controlnet_canny = ControlNetModel.from_pretrained(
+    "xinsir/controlnet-canny-sdxl-1.0",
+    use_safetensors=True,
+    dtype=DTYPE,
 )
 vae = AutoencoderKL.from_pretrained(
+    "madebyollin/sdxl-vae-fp16-fix",
+    use_safetensors=True,
+    dtype=DTYPE,
 )
 pipe_scribble = StableDiffusionXLControlNetPipeline.from_pretrained(
     "stabilityai/stable-diffusion-xl-base-1.0",
     controlnet=controlnet_scribble,
     vae=vae,
     scheduler=scheduler,
+    use_safetensors=True,
+    dtype=DTYPE,
 )
 pipe_canny = StableDiffusionXLControlNetPipeline.from_pretrained(
     "stabilityai/stable-diffusion-xl-base-1.0",
     controlnet=controlnet_canny,
     vae=vae,
     scheduler=scheduler,
+    use_safetensors=True,
+    dtype=DTYPE,
 )
 for p in (pipe_scribble, pipe_canny):
 hed = HEDdetector.from_pretrained("lllyasviel/Annotators")
 # ──────────────────────────────────────────────────────────────────────────────
+# Pre / Post processing
 # ──────────────────────────────────────────────────────────────────────────────
+def _prepare_control_image(image_editor_value, use_hed: bool, use_canny: bool) -> Image.Image | None:
     """
+    Accepts gr.ImageEditor dict (with 'composite') or a PIL.Image and returns a PIL.Image control map.
     """
     if image_editor_value is None:
         return None
     else:
         return None
     if img.mode != "RGB":
         img = img.convert("RGB")
         control = np.array(control)
         control = nms(control, 127, 3)
         control = cv2.GaussianBlur(control, (0, 0), 3)
+        thr = int(round(random.uniform(0.01, 0.10), 2) * 255)  # simulate human sketch thickness
         control[control > thr] = 255
         control[control < 255] = 0
         return Image.fromarray(control)
+    # default: treat the editor composite as the scribble itself
     return img
 def _image_size_from_editor(image_editor_value, target_mpx=1.0) -> tuple[int, int]:
 def randomize_seed_fn(seed: int, randomize_seed: bool) -> int:
     return random.randint(0, MAX_SEED) if randomize_seed else int(seed)
+# ──────────────────────────────────────────────────────────────────────────────
+# Inference
+# ──────────────────────────────────────────────────────────────────────────────
 @spaces.GPU
 def run(
     image,                        # dict from ImageEditor or PIL.Image
     num_steps: int = 12,
     guidance_scale: float = 5.0,
     controlnet_conditioning_scale: float = 1.0,
+    seed: int = -1,
     use_hed: bool = False,
     use_canny: bool = False,
     progress=gr.Progress(track_tqdm=True),
     if image is None or (isinstance(prompt, str) and prompt.strip() == ""):
         return (None, None)
     ctrl_img = _prepare_control_image(image, use_hed=use_hed, use_canny=use_canny)
+    w, h = _image_size_from_editor(image, target_mpx=1.0)  # ~1MP for speed
     prompt_styled, neg_styled = apply_style(style_name, prompt, negative_prompt or "")
     g = _maybe_seed(seed)
     pipe = _pick_pipe(use_canny)
         width=w, height=h,
     ).images[0]
+    return (ctrl_img if isinstance(ctrl_img, Image.Image) else Image.fromarray(ctrl_img), out)
 # ──────────────────────────────────────────────────────────────────────────────
+# UI
 # ──────────────────────────────────────────────────────────────────────────────
 with gr.Blocks(css="style.css", js=js_func, title="Scribble SDXL — Live") as demo:
     gr.Markdown(DESCRIPTION, elem_id="description")
     ]
     outputs = [image_slider]
+    # Manual run
     run_button.click(
         fn=randomize_seed_fn, inputs=[seed, randomize_seed], outputs=seed, queue=False, api_name=False
     ).then(lambda: None, inputs=None, outputs=image_slider).then(fn=run, inputs=inputs, outputs=outputs)
+    # Live re-inference (debounced)
     for comp in [image, prompt, negative_prompt, style, num_steps, guidance_scale,
                  controlnet_conditioning_scale, seed, use_hed, use_canny]:
         comp.change(fn=run, inputs=inputs, outputs=outputs, every=0.5, queue=True)