joeaa17 commited on
Commit
1e225d0
Β·
verified Β·
1 Parent(s): 22b3cc7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +50 -106
app.py CHANGED
@@ -2,19 +2,15 @@
2
  # -*- coding: utf-8 -*-
3
 
4
  import random
5
- from pathlib import Path
6
-
7
  import cv2
8
  import numpy as np
9
  import PIL.Image
10
  import torch
11
  import gradio as gr
12
  import spaces
13
-
14
  from PIL import Image
15
  from gradio_imageslider import ImageSlider
16
  from controlnet_aux import HEDdetector
17
-
18
  from diffusers import (
19
  ControlNetModel,
20
  StableDiffusionXLControlNetPipeline,
@@ -36,69 +32,48 @@ function refresh() {
36
  }
37
  """
38
 
39
- DESCRIPTION = '''# Scribble SDXL πŸ–‹οΈπŸŒ„ β€” live updates
40
- Sketch β†’ image with SDXL ControlNet (scribble/canny). Auto re-infers when you draw or tweak settings (debounced).
41
  Models: **xinsir/controlnet-scribble-sdxl-1.0**, **xinsir/controlnet-canny-sdxl-1.0**, base **stabilityai/stable-diffusion-xl-base-1.0**.
42
  '''
43
 
44
  if not torch.cuda.is_available():
45
- DESCRIPTION += "\n<p>Running on CPU πŸ₯Ά This demo is intended for GPU Spaces for good latency.</p>"
46
 
47
  # ──────────────────────────────────────────────────────────────────────────────
48
  # Styles
49
  # ──────────────────────────────────────────────────────────────────────────────
50
 
51
  style_list = [
52
- {
53
- "name": "(No style)",
54
- "prompt": "{prompt}",
55
- "negative_prompt": "longbody, lowres, bad anatomy, bad hands, missing fingers, extra digit, fewer digits, cropped, worst quality, low quality",
56
- },
57
- {
58
- "name": "Cinematic",
59
- "prompt": "cinematic still {prompt} . emotional, harmonious, vignette, highly detailed, high budget, bokeh, cinemascope, moody, epic, gorgeous, film grain, grainy",
60
- "negative_prompt": "anime, cartoon, graphic, text, painting, crayon, graphite, abstract, glitch, deformed, mutated, ugly, disfigured",
61
- },
62
- {
63
- "name": "3D Model",
64
- "prompt": "professional 3d model {prompt} . octane render, highly detailed, volumetric, dramatic lighting",
65
- "negative_prompt": "ugly, deformed, noisy, low poly, blurry, painting",
66
- },
67
- {
68
- "name": "Anime",
69
- "prompt": "anime artwork {prompt} . anime style, key visual, vibrant, studio anime, highly detailed",
70
- "negative_prompt": "photo, deformed, black and white, realism, disfigured, low contrast",
71
- },
72
- {
73
- "name": "Digital Art",
74
- "prompt": "concept art {prompt} . digital artwork, illustrative, painterly, matte painting, highly detailed",
75
- "negative_prompt": "photo, photorealistic, realism, ugly",
76
- },
77
- {
78
- "name": "Photographic",
79
- "prompt": "cinematic photo {prompt} . 35mm photograph, film, bokeh, professional, 4k, highly detailed",
80
- "negative_prompt": "drawing, painting, crayon, sketch, graphite, impressionist, noisy, blurry, soft, deformed, ugly",
81
- },
82
- {
83
- "name": "Pixel art",
84
- "prompt": "pixel-art {prompt} . low-res, blocky, pixel art style, 8-bit graphics",
85
- "negative_prompt": "sloppy, messy, blurry, noisy, highly detailed, ultra textured, photo, realistic",
86
- },
87
- {
88
- "name": "Fantasy art",
89
- "prompt": "ethereal fantasy concept art of {prompt} . magnificent, celestial, ethereal, painterly, epic, majestic, magical, fantasy art, cover art, dreamy",
90
- "negative_prompt": "photographic, realistic, realism, 35mm film, dslr, cropped, frame, text, deformed, glitch, noise, noisy, off-center, deformed, cross-eyed, closed eyes, bad anatomy, ugly, disfigured, sloppy, duplicate, mutated, black and white",
91
- },
92
- {
93
- "name": "Neonpunk",
94
- "prompt": "neonpunk style {prompt} . cyberpunk, vaporwave, neon, vibes, vibrant, stunningly beautiful, crisp, detailed, sleek, ultramodern, magenta highlights, dark purple shadows, high contrast, cinematic, ultra detailed, intricate, professional",
95
- "negative_prompt": "painting, drawing, illustration, glitch, deformed, mutated, cross-eyed, ugly, disfigured",
96
- },
97
- {
98
- "name": "Manga",
99
- "prompt": "manga style {prompt} . vibrant, high-energy, detailed, iconic, Japanese comic style",
100
- "negative_prompt": "ugly, deformed, noisy, blurry, low contrast, realism, photorealistic, Western comic style",
101
- },
102
  ]
103
  styles = {s["name"]: (s["prompt"], s["negative_prompt"]) for s in style_list}
104
  STYLE_NAMES = list(styles.keys())
@@ -122,12 +97,10 @@ def HWC3(x: np.ndarray) -> np.ndarray:
122
  return x
123
  if C == 1:
124
  return np.concatenate([x, x, x], axis=2)
125
- # C == 4
126
  color = x[:, :, 0:3].astype(np.float32)
127
  alpha = x[:, :, 3:4].astype(np.float32) / 255.0
128
  y = color * alpha + 255.0 * (1.0 - alpha)
129
- y = y.clip(0, 255).astype(np.uint8)
130
- return y
131
 
132
  def nms(x, t, s):
133
  x = cv2.GaussianBlur(x.astype(np.float32), (0, 0), s)
@@ -143,7 +116,6 @@ def nms(x, t, s):
143
  return z
144
 
145
  def clamp_size_to_megapixels(w: int, h: int, max_mpx: float = 1.0) -> tuple[int, int]:
146
- """Scale so that w*h β‰ˆ max_mpx*1e6 (default ~1024x1024 area). SDXL prefers multiples of 8."""
147
  area = w * h
148
  target = max_mpx * 1_000_000.0
149
  if area <= target:
@@ -152,32 +124,23 @@ def clamp_size_to_megapixels(w: int, h: int, max_mpx: float = 1.0) -> tuple[int,
152
  return max(64, int(w * r)) // 8 * 8, max(64, int(h * r)) // 8 * 8
153
 
154
  # ──────────────────────────────────────────────────────────────────────────────
155
- # Models (use torch_dtype= and use_safetensors=True)
156
  # ──────────────────────────────────────────────────────────────────────────────
157
 
158
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
159
  DTYPE = torch.float16 if device.type == "cuda" else torch.float32
160
 
161
  scheduler = EulerAncestralDiscreteScheduler.from_pretrained(
162
- "stabilityai/stable-diffusion-xl-base-1.0",
163
- subfolder="scheduler",
164
- use_safetensors=True,
165
  )
166
-
167
  controlnet_scribble = ControlNetModel.from_pretrained(
168
- "xinsir/controlnet-scribble-sdxl-1.0",
169
- use_safetensors=True,
170
- torch_dtype=DTYPE,
171
  )
172
  controlnet_canny = ControlNetModel.from_pretrained(
173
- "xinsir/controlnet-canny-sdxl-1.0",
174
- use_safetensors=True,
175
- torch_dtype=DTYPE,
176
  )
177
  vae = AutoencoderKL.from_pretrained(
178
- "madebyollin/sdxl-vae-fp16-fix",
179
- use_safetensors=True,
180
- torch_dtype=DTYPE,
181
  )
182
 
183
  pipe_scribble = StableDiffusionXLControlNetPipeline.from_pretrained(
@@ -214,39 +177,29 @@ hed = HEDdetector.from_pretrained("lllyasviel/Annotators")
214
  # ──────────────────────────────────────────────────────────────────────────────
215
 
216
  def _prepare_control_image(image_editor_value, use_hed: bool, use_canny: bool) -> Image.Image | None:
217
- """
218
- Accepts gr.ImageEditor dict (with 'composite') or a PIL.Image and returns a PIL.Image control map.
219
- """
220
  if image_editor_value is None:
221
  return None
222
-
223
  if isinstance(image_editor_value, dict) and "composite" in image_editor_value:
224
  img = image_editor_value["composite"]
225
  elif isinstance(image_editor_value, PIL.Image.Image):
226
  img = image_editor_value
227
  else:
228
  return None
229
-
230
  if img.mode != "RGB":
231
  img = img.convert("RGB")
232
-
233
  if use_canny:
234
  arr = np.array(img)
235
  edge = cv2.Canny(arr, 100, 200)
236
- edge = HWC3(edge)
237
- return Image.fromarray(edge)
238
-
239
  if use_hed:
240
  control = hed(img, scribble=False)
241
  control = np.array(control)
242
  control = nms(control, 127, 3)
243
  control = cv2.GaussianBlur(control, (0, 0), 3)
244
- thr = int(round(random.uniform(0.01, 0.10), 2) * 255) # simulate human sketch thickness
245
  control[control > thr] = 255
246
  control[control < 255] = 0
247
  return Image.fromarray(control)
248
-
249
- # default: treat the editor composite as the scribble itself
250
  return img
251
 
252
  def _image_size_from_editor(image_editor_value, target_mpx=1.0) -> tuple[int, int]:
@@ -277,7 +230,7 @@ def randomize_seed_fn(seed: int, randomize_seed: bool) -> int:
277
 
278
  @spaces.GPU
279
  def run(
280
- image, # dict from ImageEditor or PIL.Image
281
  prompt: str,
282
  negative_prompt: str,
283
  style_name: str = DEFAULT_STYLE_NAME,
@@ -293,7 +246,7 @@ def run(
293
  return (None, None)
294
 
295
  ctrl_img = _prepare_control_image(image, use_hed=use_hed, use_canny=use_canny)
296
- w, h = _image_size_from_editor(image, target_mpx=1.0) # ~1MP for speed
297
 
298
  prompt_styled, neg_styled = apply_style(style_name, prompt, negative_prompt or "")
299
  g = _maybe_seed(seed)
@@ -346,20 +299,13 @@ with gr.Blocks(css="style.css", js=js_func, title="Scribble SDXL β€” Live") as d
346
  image_slider = ImageSlider(position=0.5, label="Control ↔ Output")
347
 
348
  inputs = [
349
- image,
350
- prompt,
351
- negative_prompt,
352
- style,
353
- num_steps,
354
- guidance_scale,
355
- controlnet_conditioning_scale,
356
- seed,
357
- use_hed,
358
- use_canny,
359
  ]
360
  outputs = [image_slider]
361
 
362
- # Manual run (with per-event concurrency limits)
363
  run_button.click(
364
  fn=randomize_seed_fn,
365
  inputs=[seed, randomize_seed],
@@ -373,12 +319,10 @@ with gr.Blocks(css="style.css", js=js_func, title="Scribble SDXL β€” Live") as d
373
  fn=run, inputs=inputs, outputs=outputs, concurrency_limit=2
374
  )
375
 
376
- # Live re-inference (debounced; per-event concurrency limits)
377
  for comp in [image, prompt, negative_prompt, style, num_steps, guidance_scale,
378
  controlnet_conditioning_scale, seed, use_hed, use_canny]:
379
- comp.change(
380
- fn=run, inputs=inputs, outputs=outputs, every=0.5, queue=True, concurrency_limit=2
381
- )
382
 
383
- # Enable queue without deprecated args
384
- demo.queue(max_size=20).launch()
 
2
  # -*- coding: utf-8 -*-
3
 
4
  import random
 
 
5
  import cv2
6
  import numpy as np
7
  import PIL.Image
8
  import torch
9
  import gradio as gr
10
  import spaces
 
11
  from PIL import Image
12
  from gradio_imageslider import ImageSlider
13
  from controlnet_aux import HEDdetector
 
14
  from diffusers import (
15
  ControlNetModel,
16
  StableDiffusionXLControlNetPipeline,
 
32
  }
33
  """
34
 
35
+ DESCRIPTION = '''# Scribble SDXL πŸ–‹οΈπŸŒ„
36
+ Sketch β†’ image with SDXL ControlNet (scribble/canny). Live updates on changes (no timer throttling for Gradio 4.31.5).
37
  Models: **xinsir/controlnet-scribble-sdxl-1.0**, **xinsir/controlnet-canny-sdxl-1.0**, base **stabilityai/stable-diffusion-xl-base-1.0**.
38
  '''
39
 
40
  if not torch.cuda.is_available():
41
+ DESCRIPTION += "\n<p>Running on CPU πŸ₯Ά This demo is intended for GPU Spaces.</p>"
42
 
43
  # ──────────────────────────────────────────────────────────────────────────────
44
  # Styles
45
  # ──────────────────────────────────────────────────────────────────────────────
46
 
47
  style_list = [
48
+ {"name": "(No style)", "prompt": "{prompt}",
49
+ "negative_prompt": "longbody, lowres, bad anatomy, bad hands, missing fingers, extra digit, fewer digits, cropped, worst quality, low quality"},
50
+ {"name": "Cinematic",
51
+ "prompt": "cinematic still {prompt} . emotional, harmonious, vignette, highly detailed, high budget, bokeh, cinemascope, moody, epic, gorgeous, film grain, grainy",
52
+ "negative_prompt": "anime, cartoon, graphic, text, painting, crayon, graphite, abstract, glitch, deformed, mutated, ugly, disfigured"},
53
+ {"name": "3D Model",
54
+ "prompt": "professional 3d model {prompt} . octane render, highly detailed, volumetric, dramatic lighting",
55
+ "negative_prompt": "ugly, deformed, noisy, low poly, blurry, painting"},
56
+ {"name": "Anime",
57
+ "prompt": "anime artwork {prompt} . anime style, key visual, vibrant, studio anime, highly detailed",
58
+ "negative_prompt": "photo, deformed, black and white, realism, disfigured, low contrast"},
59
+ {"name": "Digital Art",
60
+ "prompt": "concept art {prompt} . digital artwork, illustrative, painterly, matte painting, highly detailed",
61
+ "negative_prompt": "photo, photorealistic, realism, ugly"},
62
+ {"name": "Photographic",
63
+ "prompt": "cinematic photo {prompt} . 35mm photograph, film, bokeh, professional, 4k, highly detailed",
64
+ "negative_prompt": "drawing, painting, crayon, sketch, graphite, impressionist, noisy, blurry, soft, deformed, ugly"},
65
+ {"name": "Pixel art",
66
+ "prompt": "pixel-art {prompt} . low-res, blocky, pixel art style, 8-bit graphics",
67
+ "negative_prompt": "sloppy, messy, blurry, noisy, highly detailed, ultra textured, photo, realistic"},
68
+ {"name": "Fantasy art",
69
+ "prompt": "ethereal fantasy concept art of {prompt} . magnificent, celestial, ethereal, painterly, epic, majestic, magical, fantasy art, cover art, dreamy",
70
+ "negative_prompt": "photographic, realistic, realism, 35mm film, dslr, cropped, frame, text, deformed, glitch, noise, noisy, off-center, deformed, cross-eyed, closed eyes, bad anatomy, ugly, disfigured, sloppy, duplicate, mutated, black and white"},
71
+ {"name": "Neonpunk",
72
+ "prompt": "neonpunk style {prompt} . cyberpunk, vaporwave, neon, vibes, vibrant, stunningly beautiful, crisp, detailed, sleek, ultramodern, magenta highlights, dark purple shadows, high contrast, cinematic, ultra detailed, intricate, professional",
73
+ "negative_prompt": "painting, drawing, illustration, glitch, deformed, mutated, cross-eyed, ugly, disfigured"},
74
+ {"name": "Manga",
75
+ "prompt": "manga style {prompt} . vibrant, high-energy, detailed, iconic, Japanese comic style",
76
+ "negative_prompt": "ugly, deformed, noisy, blurry, low contrast, realism, photorealistic, Western comic style"},
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
77
  ]
78
  styles = {s["name"]: (s["prompt"], s["negative_prompt"]) for s in style_list}
79
  STYLE_NAMES = list(styles.keys())
 
97
  return x
98
  if C == 1:
99
  return np.concatenate([x, x, x], axis=2)
 
100
  color = x[:, :, 0:3].astype(np.float32)
101
  alpha = x[:, :, 3:4].astype(np.float32) / 255.0
102
  y = color * alpha + 255.0 * (1.0 - alpha)
103
+ return y.clip(0, 255).astype(np.uint8)
 
104
 
105
  def nms(x, t, s):
106
  x = cv2.GaussianBlur(x.astype(np.float32), (0, 0), s)
 
116
  return z
117
 
118
  def clamp_size_to_megapixels(w: int, h: int, max_mpx: float = 1.0) -> tuple[int, int]:
 
119
  area = w * h
120
  target = max_mpx * 1_000_000.0
121
  if area <= target:
 
124
  return max(64, int(w * r)) // 8 * 8, max(64, int(h * r)) // 8 * 8
125
 
126
  # ──────────────────────────────────────────────────────────────────────────────
127
+ # Models
128
  # ──────────────────────────────────────────────────────────────────────────────
129
 
130
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
131
  DTYPE = torch.float16 if device.type == "cuda" else torch.float32
132
 
133
  scheduler = EulerAncestralDiscreteScheduler.from_pretrained(
134
+ "stabilityai/stable-diffusion-xl-base-1.0", subfolder="scheduler", use_safetensors=True
 
 
135
  )
 
136
  controlnet_scribble = ControlNetModel.from_pretrained(
137
+ "xinsir/controlnet-scribble-sdxl-1.0", use_safetensors=True, torch_dtype=DTYPE
 
 
138
  )
139
  controlnet_canny = ControlNetModel.from_pretrained(
140
+ "xinsir/controlnet-canny-sdxl-1.0", use_safetensors=True, torch_dtype=DTYPE
 
 
141
  )
142
  vae = AutoencoderKL.from_pretrained(
143
+ "madebyollin/sdxl-vae-fp16-fix", use_safetensors=True, torch_dtype=DTYPE
 
 
144
  )
145
 
146
  pipe_scribble = StableDiffusionXLControlNetPipeline.from_pretrained(
 
177
  # ──────────────────────────────────────────────────────────────────────────────
178
 
179
  def _prepare_control_image(image_editor_value, use_hed: bool, use_canny: bool) -> Image.Image | None:
 
 
 
180
  if image_editor_value is None:
181
  return None
 
182
  if isinstance(image_editor_value, dict) and "composite" in image_editor_value:
183
  img = image_editor_value["composite"]
184
  elif isinstance(image_editor_value, PIL.Image.Image):
185
  img = image_editor_value
186
  else:
187
  return None
 
188
  if img.mode != "RGB":
189
  img = img.convert("RGB")
 
190
  if use_canny:
191
  arr = np.array(img)
192
  edge = cv2.Canny(arr, 100, 200)
193
+ return Image.fromarray(HWC3(edge))
 
 
194
  if use_hed:
195
  control = hed(img, scribble=False)
196
  control = np.array(control)
197
  control = nms(control, 127, 3)
198
  control = cv2.GaussianBlur(control, (0, 0), 3)
199
+ thr = int(round(random.uniform(0.01, 0.10), 2) * 255)
200
  control[control > thr] = 255
201
  control[control < 255] = 0
202
  return Image.fromarray(control)
 
 
203
  return img
204
 
205
  def _image_size_from_editor(image_editor_value, target_mpx=1.0) -> tuple[int, int]:
 
230
 
231
  @spaces.GPU
232
  def run(
233
+ image,
234
  prompt: str,
235
  negative_prompt: str,
236
  style_name: str = DEFAULT_STYLE_NAME,
 
246
  return (None, None)
247
 
248
  ctrl_img = _prepare_control_image(image, use_hed=use_hed, use_canny=use_canny)
249
+ w, h = _image_size_from_editor(image, target_mpx=1.0)
250
 
251
  prompt_styled, neg_styled = apply_style(style_name, prompt, negative_prompt or "")
252
  g = _maybe_seed(seed)
 
299
  image_slider = ImageSlider(position=0.5, label="Control ↔ Output")
300
 
301
  inputs = [
302
+ image, prompt, negative_prompt, style,
303
+ num_steps, guidance_scale, controlnet_conditioning_scale,
304
+ seed, use_hed, use_canny,
 
 
 
 
 
 
 
305
  ]
306
  outputs = [image_slider]
307
 
308
+ # Manual run (per-event limit OK here)
309
  run_button.click(
310
  fn=randomize_seed_fn,
311
  inputs=[seed, randomize_seed],
 
319
  fn=run, inputs=inputs, outputs=outputs, concurrency_limit=2
320
  )
321
 
322
+ # Live re-inference on changes (no `every`, because 4.31.5 disallows it with limits)
323
  for comp in [image, prompt, negative_prompt, style, num_steps, guidance_scale,
324
  controlnet_conditioning_scale, seed, use_hed, use_canny]:
325
+ comp.change(fn=run, inputs=inputs, outputs=outputs, queue=True)
 
 
326
 
327
+ # Enable queue and cap worker threads globally
328
+ demo.queue(max_size=20).launch(max_threads=2)