File size: 16,219 Bytes
4a2500d
217e2c7
4a2500d
217e2c7
 
 
 
 
 
081422c
217e2c7
 
 
081422c
 
 
 
 
 
4a2500d
081422c
217e2c7
081422c
217e2c7
56d5bbe
 
 
 
 
 
 
 
 
4a2500d
1e225d0
 
217e2c7
4a2500d
 
 
1e225d0
4a2500d
081422c
217e2c7
081422c
217e2c7
4a2500d
1e225d0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4a2500d
081422c
4a2500d
 
 
 
 
081422c
 
 
 
 
217e2c7
081422c
 
 
 
 
 
 
 
 
 
 
 
 
1e225d0
4a2500d
081422c
 
 
 
 
 
 
 
 
 
 
 
4a2500d
081422c
 
 
 
217e2c7
081422c
217e2c7
081422c
 
1e225d0
081422c
217e2c7
4a2500d
217e2c7
4a2500d
081422c
1e225d0
081422c
 
1e225d0
4a2500d
a1fb6e9
1e225d0
081422c
 
1e225d0
a1fb6e9
4a2500d
081422c
4a2500d
081422c
4a2500d
081422c
217e2c7
e7634c7
4a2500d
a1fb6e9
 
 
 
081422c
217e2c7
e7634c7
a1fb6e9
 
081422c
 
 
 
 
 
 
 
4a2500d
 
081422c
 
 
217e2c7
081422c
217e2c7
 
081422c
 
 
 
 
 
 
 
 
 
 
 
 
1e225d0
081422c
 
 
 
 
1e225d0
081422c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4a2500d
081422c
 
4a2500d
081422c
 
 
 
4a2500d
 
081422c
4a2500d
217e2c7
 
 
 
925c814
4a2500d
1e225d0
4a2500d
 
 
081422c
 
4a2500d
217e2c7
4a2500d
a1fb6e9
4a2500d
081422c
 
 
 
 
1e225d0
081422c
 
 
 
 
 
 
 
 
 
 
 
 
 
4a2500d
 
217e2c7
4a2500d
081422c
217e2c7
081422c
217e2c7
081422c
4a2500d
 
 
 
 
081422c
 
4a2500d
081422c
 
4a2500d
 
 
 
a7f8c35
4a2500d
081422c
 
4a2500d
081422c
4a2500d
081422c
 
 
4a2500d
2de91e5
081422c
4a2500d
 
1e225d0
 
 
4a2500d
664a77c
081422c
1e225d0
a7f8c35
3dde279
 
 
 
 
 
 
 
 
 
 
 
1e225d0
081422c
 
1e225d0
081422c
1e225d0
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
#!/usr/bin/env python
# -*- coding: utf-8 -*-

import random
import cv2
import numpy as np
import PIL.Image
import torch
import gradio as gr
import spaces
from PIL import Image
from gradio_imageslider import ImageSlider
from controlnet_aux import HEDdetector
from diffusers import (
    ControlNetModel,
    StableDiffusionXLControlNetPipeline,
    AutoencoderKL,
    EulerAncestralDiscreteScheduler,
)

# ──────────────────────────────────────────────────────────────────────────────
# UI text / theme helper
# ──────────────────────────────────────────────────────────────────────────────

js_func = """
function refresh() {
    const url = new URL(window.location);
    if (url.searchParams.get('__theme') !== 'dark') {
        url.searchParams.set('__theme', 'dark');
        window.location.href = url.href;
    }
}
"""

DESCRIPTION = '''# Scribble SDXL πŸ–‹οΈπŸŒ„
Sketch β†’ image with SDXL ControlNet (scribble/canny). Live updates on changes (no timer throttling for Gradio 4.31.5).
Models: **xinsir/controlnet-scribble-sdxl-1.0**, **xinsir/controlnet-canny-sdxl-1.0**, base **stabilityai/stable-diffusion-xl-base-1.0**.
'''

if not torch.cuda.is_available():
    DESCRIPTION += "\n<p>Running on CPU πŸ₯Ά This demo is intended for GPU Spaces.</p>"

# ──────────────────────────────────────────────────────────────────────────────
# Styles
# ──────────────────────────────────────────────────────────────────────────────

style_list = [
    {"name": "(No style)", "prompt": "{prompt}",
     "negative_prompt": "longbody, lowres, bad anatomy, bad hands, missing fingers, extra digit, fewer digits, cropped, worst quality, low quality"},
    {"name": "Cinematic",
     "prompt": "cinematic still {prompt} . emotional, harmonious, vignette, highly detailed, high budget, bokeh, cinemascope, moody, epic, gorgeous, film grain, grainy",
     "negative_prompt": "anime, cartoon, graphic, text, painting, crayon, graphite, abstract, glitch, deformed, mutated, ugly, disfigured"},
    {"name": "3D Model",
     "prompt": "professional 3d model {prompt} . octane render, highly detailed, volumetric, dramatic lighting",
     "negative_prompt": "ugly, deformed, noisy, low poly, blurry, painting"},
    {"name": "Anime",
     "prompt": "anime artwork {prompt} . anime style, key visual, vibrant, studio anime, highly detailed",
     "negative_prompt": "photo, deformed, black and white, realism, disfigured, low contrast"},
    {"name": "Digital Art",
     "prompt": "concept art {prompt} . digital artwork, illustrative, painterly, matte painting, highly detailed",
     "negative_prompt": "photo, photorealistic, realism, ugly"},
    {"name": "Photographic",
     "prompt": "cinematic photo {prompt} . 35mm photograph, film, bokeh, professional, 4k, highly detailed",
     "negative_prompt": "drawing, painting, crayon, sketch, graphite, impressionist, noisy, blurry, soft, deformed, ugly"},
    {"name": "Pixel art",
     "prompt": "pixel-art {prompt} . low-res, blocky, pixel art style, 8-bit graphics",
     "negative_prompt": "sloppy, messy, blurry, noisy, highly detailed, ultra textured, photo, realistic"},
    {"name": "Fantasy art",
     "prompt": "ethereal fantasy concept art of {prompt} . magnificent, celestial, ethereal, painterly, epic, majestic, magical, fantasy art, cover art, dreamy",
     "negative_prompt": "photographic, realistic, realism, 35mm film, dslr, cropped, frame, text, deformed, glitch, noise, noisy, off-center, deformed, cross-eyed, closed eyes, bad anatomy, ugly, disfigured, sloppy, duplicate, mutated, black and white"},
    {"name": "Neonpunk",
     "prompt": "neonpunk style {prompt} . cyberpunk, vaporwave, neon, vibes, vibrant, stunningly beautiful, crisp, detailed, sleek, ultramodern, magenta highlights, dark purple shadows, high contrast, cinematic, ultra detailed, intricate, professional",
     "negative_prompt": "painting, drawing, illustration, glitch, deformed, mutated, cross-eyed, ugly, disfigured"},
    {"name": "Manga",
     "prompt": "manga style {prompt} . vibrant, high-energy, detailed, iconic, Japanese comic style",
     "negative_prompt": "ugly, deformed, noisy, blurry, low contrast, realism, photorealistic, Western comic style"},
]
styles = {s["name"]: (s["prompt"], s["negative_prompt"]) for s in style_list}
STYLE_NAMES = list(styles.keys())
DEFAULT_STYLE_NAME = "(No style)"

def apply_style(style_name: str, positive: str, negative: str = "") -> tuple[str, str]:
    p, n = styles.get(style_name, styles[DEFAULT_STYLE_NAME])
    return p.replace("{prompt}", positive), (n + " " + (negative or "")).strip()

# ──────────────────────────────────────────────────────────────────────────────
# Utilities
# ──────────────────────────────────────────────────────────────────────────────

def HWC3(x: np.ndarray) -> np.ndarray:
    assert x.dtype == np.uint8
    if x.ndim == 2:
        x = x[:, :, None]
    H, W, C = x.shape
    assert C in (1, 3, 4)
    if C == 3:
        return x
    if C == 1:
        return np.concatenate([x, x, x], axis=2)
    color = x[:, :, 0:3].astype(np.float32)
    alpha = x[:, :, 3:4].astype(np.float32) / 255.0
    y = color * alpha + 255.0 * (1.0 - alpha)
    return y.clip(0, 255).astype(np.uint8)

def nms(x, t, s):
    x = cv2.GaussianBlur(x.astype(np.float32), (0, 0), s)
    f1 = np.array([[0,0,0],[1,1,1],[0,0,0]], dtype=np.uint8)
    f2 = np.array([[0,1,0],[0,1,0],[0,1,0]], dtype=np.uint8)
    f3 = np.array([[1,0,0],[0,1,0],[0,0,1]], dtype=np.uint8)
    f4 = np.array([[0,0,1],[0,1,0],[1,0,0]], dtype=np.uint8)
    y = np.zeros_like(x)
    for f in [f1,f2,f3,f4]:
        np.putmask(y, cv2.dilate(x, kernel=f) == x, x)
    z = np.zeros_like(y, dtype=np.uint8)
    z[y > t] = 255
    return z

def clamp_size_to_megapixels(w: int, h: int, max_mpx: float = 1.0) -> tuple[int, int]:
    area = w * h
    target = max_mpx * 1_000_000.0
    if area <= target:
        return (w // 8) * 8, (h // 8) * 8
    r = (target / area) ** 0.5
    return max(64, int(w * r)) // 8 * 8, max(64, int(h * r)) // 8 * 8

# ──────────────────────────────────────────────────────────────────────────────
# Models
# ──────────────────────────────────────────────────────────────────────────────

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
DTYPE = torch.float16 if device.type == "cuda" else torch.float32

scheduler = EulerAncestralDiscreteScheduler.from_pretrained(
    "stabilityai/stable-diffusion-xl-base-1.0", subfolder="scheduler", use_safetensors=True
)
controlnet_scribble = ControlNetModel.from_pretrained(
    "xinsir/controlnet-scribble-sdxl-1.0", use_safetensors=True, torch_dtype=DTYPE
)
controlnet_canny = ControlNetModel.from_pretrained(
    "xinsir/controlnet-canny-sdxl-1.0", use_safetensors=True, torch_dtype=DTYPE
)
vae = AutoencoderKL.from_pretrained(
    "madebyollin/sdxl-vae-fp16-fix", use_safetensors=True, torch_dtype=DTYPE
)

pipe_scribble = StableDiffusionXLControlNetPipeline.from_pretrained(
    "stabilityai/stable-diffusion-xl-base-1.0",
    controlnet=controlnet_scribble,
    vae=vae,
    scheduler=scheduler,
    use_safetensors=True,
    torch_dtype=DTYPE,
)
pipe_canny = StableDiffusionXLControlNetPipeline.from_pretrained(
    "stabilityai/stable-diffusion-xl-base-1.0",
    controlnet=controlnet_canny,
    vae=vae,
    scheduler=scheduler,
    use_safetensors=True,
    torch_dtype=DTYPE,
)

for p in (pipe_scribble, pipe_canny):
    if device.type == "cuda":
        try:
            p.enable_xformers_memory_efficient_attention()
        except Exception:
            pass
        p.enable_attention_slicing()
    p.to(device)

MAX_SEED = np.iinfo(np.int32).max
hed = HEDdetector.from_pretrained("lllyasviel/Annotators")

# ──────────────────────────────────────────────────────────────────────────────
# Pre / Post processing
# ──────────────────────────────────────────────────────────────────────────────

def _prepare_control_image(image_editor_value, use_hed: bool, use_canny: bool) -> Image.Image | None:
    if image_editor_value is None:
        return None
    if isinstance(image_editor_value, dict) and "composite" in image_editor_value:
        img = image_editor_value["composite"]
    elif isinstance(image_editor_value, PIL.Image.Image):
        img = image_editor_value
    else:
        return None
    if img.mode != "RGB":
        img = img.convert("RGB")
    if use_canny:
        arr = np.array(img)
        edge = cv2.Canny(arr, 100, 200)
        return Image.fromarray(HWC3(edge))
    if use_hed:
        control = hed(img, scribble=False)
        control = np.array(control)
        control = nms(control, 127, 3)
        control = cv2.GaussianBlur(control, (0, 0), 3)
        thr = int(round(random.uniform(0.01, 0.10), 2) * 255)
        control[control > thr] = 255
        control[control < 255] = 0
        return Image.fromarray(control)
    return img

def _image_size_from_editor(image_editor_value, target_mpx=1.0) -> tuple[int, int]:
    if image_editor_value is None:
        return 1024, 1024
    if isinstance(image_editor_value, dict) and "composite" in image_editor_value:
        w, h = image_editor_value["composite"].size
    elif isinstance(image_editor_value, PIL.Image.Image):
        w, h = image_editor_value.size
    else:
        w, h = 1024, 1024
    return clamp_size_to_megapixels(w, h, max_mpx=target_mpx)

def _pick_pipe(use_canny: bool):
    return pipe_canny if use_canny else pipe_scribble

def _maybe_seed(seed: int):
    if seed is None or seed < 0:
        return None
    return torch.Generator(device=device).manual_seed(int(seed))

def randomize_seed_fn(seed: int, randomize_seed: bool) -> int:
    return random.randint(0, MAX_SEED) if randomize_seed else int(seed)

# ──────────────────────────────────────────────────────────────────────────────
# Inference
# ──────────────────────────────────────────────────────────────────────────────

@spaces.GPU
def run(
    image,
    prompt: str,
    negative_prompt: str,
    style_name: str = DEFAULT_STYLE_NAME,
    num_steps: int = 12,
    guidance_scale: float = 5.0,
    controlnet_conditioning_scale: float = 1.0,
    seed: int = -1,
    use_hed: bool = False,
    use_canny: bool = False,
    progress=gr.Progress(track_tqdm=True),
):
    if image is None or (isinstance(prompt, str) and prompt.strip() == ""):
        return (None, None)

    ctrl_img = _prepare_control_image(image, use_hed=use_hed, use_canny=use_canny)
    w, h = _image_size_from_editor(image, target_mpx=1.0)

    prompt_styled, neg_styled = apply_style(style_name, prompt, negative_prompt or "")
    g = _maybe_seed(seed)
    pipe = _pick_pipe(use_canny)

    out = pipe(
        prompt=prompt_styled,
        negative_prompt=neg_styled,
        image=ctrl_img,
        num_inference_steps=int(num_steps),
        controlnet_conditioning_scale=float(controlnet_conditioning_scale),
        guidance_scale=float(guidance_scale),
        generator=g,
        width=w, height=h,
    ).images[0]

    return (ctrl_img if isinstance(ctrl_img, Image.Image) else Image.fromarray(ctrl_img), out)

# ──────────────────────────────────────────────────────────────────────────────
# UI
# ──────────────────────────────────────────────────────────────────────────────

with gr.Blocks(css="style.css", js=js_func, title="Scribble SDXL β€” Live") as demo:
    gr.Markdown(DESCRIPTION, elem_id="description")

    with gr.Row():
        with gr.Column():
            with gr.Group():
                image = gr.ImageEditor(type="pil", image_mode="L", crop_size=(512, 512), label="Draw / Edit")
                prompt = gr.Textbox(label="Prompt", value="a detailed robot mascot, studio lighting, clean lines")
                style = gr.Dropdown(label="Style", choices=STYLE_NAMES, value=DEFAULT_STYLE_NAME)
                use_hed = gr.Checkbox(label="Use HED detector (turn photo β†’ sketch)", value=False)
                use_canny = gr.Checkbox(label="Use Canny (ControlNet Canny)", value=False)
                run_button = gr.Button("Run")
            with gr.Accordion("Advanced options", open=False):
                negative_prompt = gr.Textbox(
                    label="Negative prompt",
                    value="longbody, lowres, bad anatomy, bad hands, missing fingers, extra digit, fewer digits, cropped, worst quality, low quality",
                )
                num_steps = gr.Slider(label="Steps (lower = faster)", minimum=4, maximum=40, step=1, value=12)
                guidance_scale = gr.Slider(label="Guidance", minimum=0.1, maximum=12.0, step=0.1, value=5.0)
                controlnet_conditioning_scale = gr.Slider(
                    label="Control strength", minimum=0.5, maximum=2.0, step=0.05, value=1.0
                )
                seed = gr.Slider(label="Seed (-1 random)", minimum=-1, maximum=MAX_SEED, step=1, value=-1)
                randomize_seed = gr.Checkbox(label="Randomize seed on Run", value=True)

        with gr.Column():
            with gr.Group():
                image_slider = ImageSlider(position=0.5, label="Control ↔ Output")

    inputs = [
        image, prompt, negative_prompt, style,
        num_steps, guidance_scale, controlnet_conditioning_scale,
        seed, use_hed, use_canny,
    ]
    outputs = [image_slider]

    # Manual run (per-event limit OK here)
    run_button.click(
        fn=randomize_seed_fn,
        inputs=[seed, randomize_seed],
        outputs=seed,
        queue=False,
        api_name=False,
        concurrency_limit=2,
    ).then(
        lambda: None, inputs=None, outputs=image_slider, concurrency_limit=2
    ).then(
        fn=run, inputs=inputs, outputs=outputs, concurrency_limit=2
    )

    # Live re-inference on changes (no `every`, because 4.31.5 disallows it with limits)
    for comp in [image, prompt, negative_prompt, style, num_steps, guidance_scale,
                 controlnet_conditioning_scale, seed, use_hed, use_canny]:
        comp.change(fn=run, inputs=inputs, outputs=outputs, queue=True)

# Enable queue and cap worker threads globally
demo.queue(max_size=20).launch(max_threads=2)