scribble-sdxl

Build error

App Files Files Community

joeaa17 commited on Oct 13, 2025

Commit

1e225d0

verified ·

1 Parent(s): 22b3cc7

Update app.py

Browse files

Files changed (1) hide show

app.py +50 -106

app.py CHANGED Viewed

@@ -2,19 +2,15 @@
 # -*- coding: utf-8 -*-
 import random
-from pathlib import Path
 import cv2
 import numpy as np
 import PIL.Image
 import torch
 import gradio as gr
 import spaces
 from PIL import Image
 from gradio_imageslider import ImageSlider
 from controlnet_aux import HEDdetector
 from diffusers import (
     ControlNetModel,
     StableDiffusionXLControlNetPipeline,
@@ -36,69 +32,48 @@ function refresh() {
 }
 """
-DESCRIPTION = '''# Scribble SDXL 🖋️🌄 — live updates
-Sketch → image with SDXL ControlNet (scribble/canny). Auto re-infers when you draw or tweak settings (debounced).
 Models: **xinsir/controlnet-scribble-sdxl-1.0**, **xinsir/controlnet-canny-sdxl-1.0**, base **stabilityai/stable-diffusion-xl-base-1.0**.
 '''
 if not torch.cuda.is_available():
-    DESCRIPTION += "\n<p>Running on CPU 🥶 This demo is intended for GPU Spaces for good latency.</p>"
 # ──────────────────────────────────────────────────────────────────────────────
 # Styles
 # ──────────────────────────────────────────────────────────────────────────────
 style_list = [
-    {
-        "name": "(No style)",
-        "prompt": "{prompt}",
-        "negative_prompt": "longbody, lowres, bad anatomy, bad hands, missing fingers, extra digit, fewer digits, cropped, worst quality, low quality",
-    },
-    {
-        "name": "Cinematic",
-        "prompt": "cinematic still {prompt} . emotional, harmonious, vignette, highly detailed, high budget, bokeh, cinemascope, moody, epic, gorgeous, film grain, grainy",
-        "negative_prompt": "anime, cartoon, graphic, text, painting, crayon, graphite, abstract, glitch, deformed, mutated, ugly, disfigured",
-    },
-    {
-        "name": "3D Model",
-        "prompt": "professional 3d model {prompt} . octane render, highly detailed, volumetric, dramatic lighting",
-        "negative_prompt": "ugly, deformed, noisy, low poly, blurry, painting",
-    },
-    {
-        "name": "Anime",
-        "prompt": "anime artwork {prompt} . anime style, key visual, vibrant, studio anime, highly detailed",
-        "negative_prompt": "photo, deformed, black and white, realism, disfigured, low contrast",
-    },
-    {
-        "name": "Digital Art",
-        "prompt": "concept art {prompt} . digital artwork, illustrative, painterly, matte painting, highly detailed",
-        "negative_prompt": "photo, photorealistic, realism, ugly",
-    },
-    {
-        "name": "Photographic",
-        "prompt": "cinematic photo {prompt} . 35mm photograph, film, bokeh, professional, 4k, highly detailed",
-        "negative_prompt": "drawing, painting, crayon, sketch, graphite, impressionist, noisy, blurry, soft, deformed, ugly",
-    },
-    {
-        "name": "Pixel art",
-        "prompt": "pixel-art {prompt} . low-res, blocky, pixel art style, 8-bit graphics",
-        "negative_prompt": "sloppy, messy, blurry, noisy, highly detailed, ultra textured, photo, realistic",
-    },
-    {
-        "name": "Fantasy art",
-        "prompt": "ethereal fantasy concept art of {prompt} . magnificent, celestial, ethereal, painterly, epic, majestic, magical, fantasy art, cover art, dreamy",
-        "negative_prompt": "photographic, realistic, realism, 35mm film, dslr, cropped, frame, text, deformed, glitch, noise, noisy, off-center, deformed, cross-eyed, closed eyes, bad anatomy, ugly, disfigured, sloppy, duplicate, mutated, black and white",
-    },
-    {
-        "name": "Neonpunk",
-        "prompt": "neonpunk style {prompt} . cyberpunk, vaporwave, neon, vibes, vibrant, stunningly beautiful, crisp, detailed, sleek, ultramodern, magenta highlights, dark purple shadows, high contrast, cinematic, ultra detailed, intricate, professional",
-        "negative_prompt": "painting, drawing, illustration, glitch, deformed, mutated, cross-eyed, ugly, disfigured",
-    },
-    {
-        "name": "Manga",
-        "prompt": "manga style {prompt} . vibrant, high-energy, detailed, iconic, Japanese comic style",
-        "negative_prompt": "ugly, deformed, noisy, blurry, low contrast, realism, photorealistic, Western comic style",
-    },
 ]
 styles = {s["name"]: (s["prompt"], s["negative_prompt"]) for s in style_list}
 STYLE_NAMES = list(styles.keys())
@@ -122,12 +97,10 @@ def HWC3(x: np.ndarray) -> np.ndarray:
         return x
     if C == 1:
         return np.concatenate([x, x, x], axis=2)
-    # C == 4
     color = x[:, :, 0:3].astype(np.float32)
     alpha = x[:, :, 3:4].astype(np.float32) / 255.0
     y = color * alpha + 255.0 * (1.0 - alpha)
-    y = y.clip(0, 255).astype(np.uint8)
-    return y
 def nms(x, t, s):
     x = cv2.GaussianBlur(x.astype(np.float32), (0, 0), s)
@@ -143,7 +116,6 @@ def nms(x, t, s):
     return z
 def clamp_size_to_megapixels(w: int, h: int, max_mpx: float = 1.0) -> tuple[int, int]:
-    """Scale so that w*h ≈ max_mpx*1e6 (default ~1024x1024 area). SDXL prefers multiples of 8."""
     area = w * h
     target = max_mpx * 1_000_000.0
     if area <= target:
@@ -152,32 +124,23 @@ def clamp_size_to_megapixels(w: int, h: int, max_mpx: float = 1.0) -> tuple[int,
     return max(64, int(w * r)) // 8 * 8, max(64, int(h * r)) // 8 * 8
 # ──────────────────────────────────────────────────────────────────────────────
-# Models (use torch_dtype= and use_safetensors=True)
 # ──────────────────────────────────────────────────────────────────────────────
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 DTYPE = torch.float16 if device.type == "cuda" else torch.float32
 scheduler = EulerAncestralDiscreteScheduler.from_pretrained(
-    "stabilityai/stable-diffusion-xl-base-1.0",
-    subfolder="scheduler",
-    use_safetensors=True,
 )
 controlnet_scribble = ControlNetModel.from_pretrained(
-    "xinsir/controlnet-scribble-sdxl-1.0",
-    use_safetensors=True,
-    torch_dtype=DTYPE,
 )
 controlnet_canny = ControlNetModel.from_pretrained(
-    "xinsir/controlnet-canny-sdxl-1.0",
-    use_safetensors=True,
-    torch_dtype=DTYPE,
 )
 vae = AutoencoderKL.from_pretrained(
-    "madebyollin/sdxl-vae-fp16-fix",
-    use_safetensors=True,
-    torch_dtype=DTYPE,
 )
 pipe_scribble = StableDiffusionXLControlNetPipeline.from_pretrained(
@@ -214,39 +177,29 @@ hed = HEDdetector.from_pretrained("lllyasviel/Annotators")
 # ──────────────────────────────────────────────────────────────────────────────
 def _prepare_control_image(image_editor_value, use_hed: bool, use_canny: bool) -> Image.Image | None:
-    """
-    Accepts gr.ImageEditor dict (with 'composite') or a PIL.Image and returns a PIL.Image control map.
-    """
     if image_editor_value is None:
         return None
     if isinstance(image_editor_value, dict) and "composite" in image_editor_value:
         img = image_editor_value["composite"]
     elif isinstance(image_editor_value, PIL.Image.Image):
         img = image_editor_value
     else:
         return None
     if img.mode != "RGB":
         img = img.convert("RGB")
     if use_canny:
         arr = np.array(img)
         edge = cv2.Canny(arr, 100, 200)
-        edge = HWC3(edge)
-        return Image.fromarray(edge)
     if use_hed:
         control = hed(img, scribble=False)
         control = np.array(control)
         control = nms(control, 127, 3)
         control = cv2.GaussianBlur(control, (0, 0), 3)
-        thr = int(round(random.uniform(0.01, 0.10), 2) * 255)  # simulate human sketch thickness
         control[control > thr] = 255
         control[control < 255] = 0
         return Image.fromarray(control)
-    # default: treat the editor composite as the scribble itself
     return img
 def _image_size_from_editor(image_editor_value, target_mpx=1.0) -> tuple[int, int]:
@@ -277,7 +230,7 @@ def randomize_seed_fn(seed: int, randomize_seed: bool) -> int:
 @spaces.GPU
 def run(
-    image,                        # dict from ImageEditor or PIL.Image
     prompt: str,
     negative_prompt: str,
     style_name: str = DEFAULT_STYLE_NAME,
@@ -293,7 +246,7 @@ def run(
         return (None, None)
     ctrl_img = _prepare_control_image(image, use_hed=use_hed, use_canny=use_canny)
-    w, h = _image_size_from_editor(image, target_mpx=1.0)  # ~1MP for speed
     prompt_styled, neg_styled = apply_style(style_name, prompt, negative_prompt or "")
     g = _maybe_seed(seed)
@@ -346,20 +299,13 @@ with gr.Blocks(css="style.css", js=js_func, title="Scribble SDXL — Live") as d
                 image_slider = ImageSlider(position=0.5, label="Control ↔ Output")
     inputs = [
-        image,
-        prompt,
-        negative_prompt,
-        style,
-        num_steps,
-        guidance_scale,
-        controlnet_conditioning_scale,
-        seed,
-        use_hed,
-        use_canny,
     ]
     outputs = [image_slider]
-    # Manual run (with per-event concurrency limits)
     run_button.click(
         fn=randomize_seed_fn,
         inputs=[seed, randomize_seed],
@@ -373,12 +319,10 @@ with gr.Blocks(css="style.css", js=js_func, title="Scribble SDXL — Live") as d
         fn=run, inputs=inputs, outputs=outputs, concurrency_limit=2
     )
-    # Live re-inference (debounced; per-event concurrency limits)
     for comp in [image, prompt, negative_prompt, style, num_steps, guidance_scale,
                  controlnet_conditioning_scale, seed, use_hed, use_canny]:
-        comp.change(
-            fn=run, inputs=inputs, outputs=outputs, every=0.5, queue=True, concurrency_limit=2
-        )
-# Enable queue without deprecated args
-demo.queue(max_size=20).launch()

 # -*- coding: utf-8 -*-
 import random
 import cv2
 import numpy as np
 import PIL.Image
 import torch
 import gradio as gr
 import spaces
 from PIL import Image
 from gradio_imageslider import ImageSlider
 from controlnet_aux import HEDdetector
 from diffusers import (
     ControlNetModel,
     StableDiffusionXLControlNetPipeline,
 }
 """
+DESCRIPTION = '''# Scribble SDXL 🖋️🌄
+Sketch → image with SDXL ControlNet (scribble/canny). Live updates on changes (no timer throttling for Gradio 4.31.5).
 Models: **xinsir/controlnet-scribble-sdxl-1.0**, **xinsir/controlnet-canny-sdxl-1.0**, base **stabilityai/stable-diffusion-xl-base-1.0**.
 '''
 if not torch.cuda.is_available():
+    DESCRIPTION += "\n<p>Running on CPU 🥶 This demo is intended for GPU Spaces.</p>"
 # ──────────────────────────────────────────────────────────────────────────────
 # Styles
 # ──────────────────────────────────────────────────────────────────────────────
 style_list = [
+    {"name": "(No style)", "prompt": "{prompt}",
+     "negative_prompt": "longbody, lowres, bad anatomy, bad hands, missing fingers, extra digit, fewer digits, cropped, worst quality, low quality"},
+    {"name": "Cinematic",
+     "prompt": "cinematic still {prompt} . emotional, harmonious, vignette, highly detailed, high budget, bokeh, cinemascope, moody, epic, gorgeous, film grain, grainy",
+     "negative_prompt": "anime, cartoon, graphic, text, painting, crayon, graphite, abstract, glitch, deformed, mutated, ugly, disfigured"},
+    {"name": "3D Model",
+     "prompt": "professional 3d model {prompt} . octane render, highly detailed, volumetric, dramatic lighting",
+     "negative_prompt": "ugly, deformed, noisy, low poly, blurry, painting"},
+    {"name": "Anime",
+     "prompt": "anime artwork {prompt} . anime style, key visual, vibrant, studio anime, highly detailed",
+     "negative_prompt": "photo, deformed, black and white, realism, disfigured, low contrast"},
+    {"name": "Digital Art",
+     "prompt": "concept art {prompt} . digital artwork, illustrative, painterly, matte painting, highly detailed",
+     "negative_prompt": "photo, photorealistic, realism, ugly"},
+    {"name": "Photographic",
+     "prompt": "cinematic photo {prompt} . 35mm photograph, film, bokeh, professional, 4k, highly detailed",
+     "negative_prompt": "drawing, painting, crayon, sketch, graphite, impressionist, noisy, blurry, soft, deformed, ugly"},
+    {"name": "Pixel art",
+     "prompt": "pixel-art {prompt} . low-res, blocky, pixel art style, 8-bit graphics",
+     "negative_prompt": "sloppy, messy, blurry, noisy, highly detailed, ultra textured, photo, realistic"},
+    {"name": "Fantasy art",
+     "prompt": "ethereal fantasy concept art of {prompt} . magnificent, celestial, ethereal, painterly, epic, majestic, magical, fantasy art, cover art, dreamy",
+     "negative_prompt": "photographic, realistic, realism, 35mm film, dslr, cropped, frame, text, deformed, glitch, noise, noisy, off-center, deformed, cross-eyed, closed eyes, bad anatomy, ugly, disfigured, sloppy, duplicate, mutated, black and white"},
+    {"name": "Neonpunk",
+     "prompt": "neonpunk style {prompt} . cyberpunk, vaporwave, neon, vibes, vibrant, stunningly beautiful, crisp, detailed, sleek, ultramodern, magenta highlights, dark purple shadows, high contrast, cinematic, ultra detailed, intricate, professional",
+     "negative_prompt": "painting, drawing, illustration, glitch, deformed, mutated, cross-eyed, ugly, disfigured"},
+    {"name": "Manga",
+     "prompt": "manga style {prompt} . vibrant, high-energy, detailed, iconic, Japanese comic style",
+     "negative_prompt": "ugly, deformed, noisy, blurry, low contrast, realism, photorealistic, Western comic style"},
 ]
 styles = {s["name"]: (s["prompt"], s["negative_prompt"]) for s in style_list}
 STYLE_NAMES = list(styles.keys())
         return x
     if C == 1:
         return np.concatenate([x, x, x], axis=2)
     color = x[:, :, 0:3].astype(np.float32)
     alpha = x[:, :, 3:4].astype(np.float32) / 255.0
     y = color * alpha + 255.0 * (1.0 - alpha)
+    return y.clip(0, 255).astype(np.uint8)
 def nms(x, t, s):
     x = cv2.GaussianBlur(x.astype(np.float32), (0, 0), s)
     return z
 def clamp_size_to_megapixels(w: int, h: int, max_mpx: float = 1.0) -> tuple[int, int]:
     area = w * h
     target = max_mpx * 1_000_000.0
     if area <= target:
     return max(64, int(w * r)) // 8 * 8, max(64, int(h * r)) // 8 * 8
 # ──────────────────────────────────────────────────────────────────────────────
+# Models
 # ──────────────────────────────────────────────────────────────────────────────
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 DTYPE = torch.float16 if device.type == "cuda" else torch.float32
 scheduler = EulerAncestralDiscreteScheduler.from_pretrained(
+    "stabilityai/stable-diffusion-xl-base-1.0", subfolder="scheduler", use_safetensors=True
 )
 controlnet_scribble = ControlNetModel.from_pretrained(
+    "xinsir/controlnet-scribble-sdxl-1.0", use_safetensors=True, torch_dtype=DTYPE
 )
 controlnet_canny = ControlNetModel.from_pretrained(
+    "xinsir/controlnet-canny-sdxl-1.0", use_safetensors=True, torch_dtype=DTYPE
 )
 vae = AutoencoderKL.from_pretrained(
+    "madebyollin/sdxl-vae-fp16-fix", use_safetensors=True, torch_dtype=DTYPE
 )
 pipe_scribble = StableDiffusionXLControlNetPipeline.from_pretrained(
 # ──────────────────────────────────────────────────────────────────────────────
 def _prepare_control_image(image_editor_value, use_hed: bool, use_canny: bool) -> Image.Image | None:
     if image_editor_value is None:
         return None
     if isinstance(image_editor_value, dict) and "composite" in image_editor_value:
         img = image_editor_value["composite"]
     elif isinstance(image_editor_value, PIL.Image.Image):
         img = image_editor_value
     else:
         return None
     if img.mode != "RGB":
         img = img.convert("RGB")
     if use_canny:
         arr = np.array(img)
         edge = cv2.Canny(arr, 100, 200)
+        return Image.fromarray(HWC3(edge))
     if use_hed:
         control = hed(img, scribble=False)
         control = np.array(control)
         control = nms(control, 127, 3)
         control = cv2.GaussianBlur(control, (0, 0), 3)
+        thr = int(round(random.uniform(0.01, 0.10), 2) * 255)
         control[control > thr] = 255
         control[control < 255] = 0
         return Image.fromarray(control)
     return img
 def _image_size_from_editor(image_editor_value, target_mpx=1.0) -> tuple[int, int]:
 @spaces.GPU
 def run(
+    image,
     prompt: str,
     negative_prompt: str,
     style_name: str = DEFAULT_STYLE_NAME,
         return (None, None)
     ctrl_img = _prepare_control_image(image, use_hed=use_hed, use_canny=use_canny)
+    w, h = _image_size_from_editor(image, target_mpx=1.0)
     prompt_styled, neg_styled = apply_style(style_name, prompt, negative_prompt or "")
     g = _maybe_seed(seed)
                 image_slider = ImageSlider(position=0.5, label="Control ↔ Output")
     inputs = [
+        image, prompt, negative_prompt, style,
+        num_steps, guidance_scale, controlnet_conditioning_scale,
+        seed, use_hed, use_canny,
     ]
     outputs = [image_slider]
+    # Manual run (per-event limit OK here)
     run_button.click(
         fn=randomize_seed_fn,
         inputs=[seed, randomize_seed],
         fn=run, inputs=inputs, outputs=outputs, concurrency_limit=2
     )
+    # Live re-inference on changes (no `every`, because 4.31.5 disallows it with limits)
     for comp in [image, prompt, negative_prompt, style, num_steps, guidance_scale,
                  controlnet_conditioning_scale, seed, use_hed, use_canny]:
+        comp.change(fn=run, inputs=inputs, outputs=outputs, queue=True)
+# Enable queue and cap worker threads globally
+demo.queue(max_size=20).launch(max_threads=2)