sausheong commited on
Commit
db60394
·
1 Parent(s): ff52fe9

added sd2

Browse files
Files changed (2) hide show
  1. app.py +401 -0
  2. requirements.txt +8 -0
app.py ADDED
@@ -0,0 +1,401 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from diffusers import StableDiffusionPipeline, StableDiffusionImg2ImgPipeline, StableDiffusionUpscalePipeline, DiffusionPipeline, DPMSolverMultistepScheduler
2
+ import gradio as gr
3
+ import torch
4
+ from PIL import Image
5
+
6
+ state = None
7
+ current_steps = 25
8
+
9
+ model_id = 'stabilityai/stable-diffusion-2'
10
+
11
+ scheduler = DPMSolverMultistepScheduler.from_pretrained(model_id, subfolder="scheduler")
12
+
13
+ pipe = StableDiffusionPipeline.from_pretrained(
14
+ model_id,
15
+ revision="fp16" if torch.cuda.is_available() else "fp32",
16
+ torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
17
+ scheduler=scheduler
18
+ ).to("cuda")
19
+ pipe.enable_attention_slicing()
20
+ pipe.enable_xformers_memory_efficient_attention()
21
+
22
+ pipe_i2i = None
23
+ pipe_upscale = None
24
+ pipe_inpaint = None
25
+
26
+ attn_slicing_enabled = True
27
+ mem_eff_attn_enabled = True
28
+
29
+ modes = {
30
+ 'txt2img': 'Text to Image',
31
+ 'img2img': 'Image to Image',
32
+ 'inpaint': 'Inpainting',
33
+ 'upscale4x': 'Upscale 4x',
34
+ }
35
+ current_mode = modes['txt2img']
36
+
37
+ def error_str(error, title="Error"):
38
+ return f"""#### {title}
39
+ {error}""" if error else ""
40
+
41
+ def update_state(new_state):
42
+ global state
43
+ state = new_state
44
+
45
+ def update_state_info(old_state):
46
+ if state and state != old_state:
47
+ return gr.update(value=state)
48
+
49
+ def set_mem_optimizations(pipe):
50
+ if attn_slicing_enabled:
51
+ pipe.enable_attention_slicing()
52
+ else:
53
+ pipe.disable_attention_slicing()
54
+
55
+ if mem_eff_attn_enabled:
56
+ pipe.enable_xformers_memory_efficient_attention()
57
+ else:
58
+ pipe.disable_xformers_memory_efficient_attention()
59
+
60
+ def get_i2i_pipe(scheduler):
61
+
62
+ update_state("Loading image to image model...")
63
+
64
+ pipe = StableDiffusionImg2ImgPipeline.from_pretrained(
65
+ model_id,
66
+ revision="fp16" if torch.cuda.is_available() else "fp32",
67
+ torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
68
+ scheduler=scheduler
69
+ )
70
+ set_mem_optimizations(pipe)
71
+ pipe.to("cuda")
72
+ return pipe
73
+
74
+ def get_inpaint_pipe():
75
+
76
+ update_state("Loading inpainting model...")
77
+
78
+ pipe = DiffusionPipeline.from_pretrained(
79
+ "stabilityai/stable-diffusion-2-inpainting",
80
+ revision="fp16" if torch.cuda.is_available() else "fp32",
81
+ torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
82
+ # scheduler=scheduler # TODO currently setting scheduler here messes up the end result. A bug in Diffusers🧨
83
+ ).to("cuda")
84
+ pipe.scheduler = DPMSolverMultistepScheduler.from_config(pipe.scheduler.config)
85
+ pipe.enable_attention_slicing()
86
+ pipe.enable_xformers_memory_efficient_attention()
87
+ return pipe
88
+
89
+ def get_upscale_pipe(scheduler):
90
+
91
+ update_state("Loading upscale model...")
92
+
93
+ pipe = StableDiffusionUpscalePipeline.from_pretrained(
94
+ "stabilityai/stable-diffusion-x4-upscaler",
95
+ revision="fp16" if torch.cuda.is_available() else "fp32",
96
+ torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
97
+ # scheduler=scheduler
98
+ )
99
+ # pipe.scheduler = DPMSolverMultistepScheduler.from_config(pipe.scheduler.config)
100
+ set_mem_optimizations(pipe)
101
+ pipe.to("cuda")
102
+ return pipe
103
+
104
+ def switch_attention_slicing(attn_slicing):
105
+ global attn_slicing_enabled
106
+ attn_slicing_enabled = attn_slicing
107
+
108
+ def switch_mem_eff_attn(mem_eff_attn):
109
+ global mem_eff_attn_enabled
110
+ mem_eff_attn_enabled = mem_eff_attn
111
+
112
+ def pipe_callback(step: int, timestep: int, latents: torch.FloatTensor):
113
+ update_state(f"{step}/{current_steps} steps")#\nTime left, sec: {timestep/100:.0f}")
114
+
115
+ def inference(inf_mode, prompt, n_images, guidance, steps, width=768, height=768, seed=0, img=None, strength=0.5, neg_prompt=""):
116
+
117
+ update_state(" ")
118
+
119
+ global current_mode
120
+ if inf_mode != current_mode:
121
+ pipe.to("cuda" if inf_mode == modes['txt2img'] else "cpu")
122
+
123
+ if pipe_i2i is not None:
124
+ pipe_i2i.to("cuda" if inf_mode == modes['img2img'] else "cpu")
125
+
126
+ if pipe_inpaint is not None:
127
+ pipe_inpaint.to("cuda" if inf_mode == modes['inpaint'] else "cpu")
128
+
129
+ if pipe_upscale is not None:
130
+ pipe_upscale.to("cuda" if inf_mode == modes['upscale4x'] else "cpu")
131
+
132
+ current_mode = inf_mode
133
+
134
+ generator = torch.Generator('cuda').manual_seed(seed) if seed != 0 else None
135
+ prompt = prompt
136
+
137
+ try:
138
+
139
+ if inf_mode == modes['txt2img']:
140
+ return txt_to_img(prompt, n_images, neg_prompt, guidance, steps, width, height, generator), gr.update(visible=False, value=None)
141
+
142
+ elif inf_mode == modes['img2img']:
143
+ if img is None:
144
+ return None, gr.update(visible=True, value=error_str("Image is required for Image to Image mode"))
145
+
146
+ return img_to_img(prompt, n_images, neg_prompt, img, strength, guidance, steps, width, height, generator), gr.update(visible=False, value=None)
147
+
148
+ elif inf_mode == modes['inpaint']:
149
+ if img is None:
150
+ return None, gr.update(visible=True, value=error_str("Image is required for Inpainting mode"))
151
+
152
+ return inpaint(prompt, n_images, neg_prompt, img, guidance, steps, width, height, generator), gr.update(visible=False, value=None)
153
+
154
+ elif inf_mode == modes['upscale4x']:
155
+ if img is None:
156
+ return None, gr.update(visible=True, value=error_str("Image is required for Upscale mode"))
157
+
158
+ return upscale(prompt, n_images, neg_prompt, img, guidance, steps, generator), gr.update(visible=False, value=None)
159
+ except Exception as e:
160
+ return None, gr.update(visible=True, value=error_str(e))
161
+
162
+ def txt_to_img(prompt, n_images, neg_prompt, guidance, steps, width, height, generator):
163
+
164
+ result = pipe(
165
+ prompt,
166
+ num_images_per_prompt = n_images,
167
+ negative_prompt = neg_prompt,
168
+ num_inference_steps = int(steps),
169
+ guidance_scale = guidance,
170
+ width = width,
171
+ height = height,
172
+ generator = generator,
173
+ callback=pipe_callback).images
174
+
175
+ update_state("")
176
+
177
+ return result
178
+
179
+ def img_to_img(prompt, n_images, neg_prompt, img, strength, guidance, steps, width, height, generator):
180
+
181
+ global pipe_i2i
182
+ if pipe_i2i is None:
183
+ pipe_i2i = get_i2i_pipe(scheduler)
184
+
185
+ img = img['image']
186
+ ratio = min(height / img.height, width / img.width)
187
+ img = img.resize((int(img.width * ratio), int(img.height * ratio)), Image.LANCZOS)
188
+ result = pipe_i2i(
189
+ prompt,
190
+ num_images_per_prompt = n_images,
191
+ negative_prompt = neg_prompt,
192
+ init_image = img,
193
+ num_inference_steps = int(steps),
194
+ strength = strength,
195
+ guidance_scale = guidance,
196
+ width = width,
197
+ height = height,
198
+ generator = generator,
199
+ callback=pipe_callback).images
200
+
201
+ update_state("")
202
+
203
+ return result
204
+
205
+ # TODO Currently supports only 512x512 images
206
+ def inpaint(prompt, n_images, neg_prompt, img, guidance, steps, width, height, generator):
207
+
208
+ global pipe_inpaint
209
+ if pipe_inpaint is None:
210
+ pipe_inpaint = get_inpaint_pipe()
211
+
212
+ inp_img = img['image']
213
+ mask = img['mask']
214
+ inp_img = square_padding(inp_img)
215
+ mask = square_padding(mask)
216
+
217
+ # # ratio = min(height / inp_img.height, width / inp_img.width)
218
+ # ratio = min(512 / inp_img.height, 512 / inp_img.width)
219
+ # inp_img = inp_img.resize((int(inp_img.width * ratio), int(inp_img.height * ratio)), Image.LANCZOS)
220
+ # mask = mask.resize((int(mask.width * ratio), int(mask.height * ratio)), Image.LANCZOS)
221
+
222
+ inp_img = inp_img.resize((512, 512))
223
+ mask = mask.resize((512, 512))
224
+
225
+ result = pipe_inpaint(
226
+ prompt,
227
+ image = inp_img,
228
+ mask_image = mask,
229
+ num_images_per_prompt = n_images,
230
+ negative_prompt = neg_prompt,
231
+ num_inference_steps = int(steps),
232
+ guidance_scale = guidance,
233
+ # width = width,
234
+ # height = height,
235
+ generator = generator,
236
+ callback=pipe_callback).images
237
+
238
+ update_state("")
239
+
240
+ return result
241
+
242
+ def square_padding(img):
243
+ width, height = img.size
244
+ if width == height:
245
+ return img
246
+ new_size = max(width, height)
247
+ new_img = Image.new('RGB', (new_size, new_size), (0, 0, 0, 255))
248
+ new_img.paste(img, ((new_size - width) // 2, (new_size - height) // 2))
249
+ return new_img
250
+
251
+ def upscale(prompt, n_images, neg_prompt, img, guidance, steps, generator):
252
+
253
+ global pipe_upscale
254
+ if pipe_upscale is None:
255
+ pipe_upscale = get_upscale_pipe(scheduler)
256
+
257
+ img = img['image']
258
+ return upscale_tiling(prompt, neg_prompt, img, guidance, steps, generator)
259
+
260
+ # result = pipe_upscale(
261
+ # prompt,
262
+ # image = img,
263
+ # num_inference_steps = int(steps),
264
+ # guidance_scale = guidance,
265
+ # negative_prompt = neg_prompt,
266
+ # num_images_per_prompt = n_images,
267
+ # generator = generator).images[0]
268
+
269
+ # return result
270
+
271
+ def upscale_tiling(prompt, neg_prompt, img, guidance, steps, generator):
272
+
273
+ width, height = img.size
274
+
275
+ # calculate the padding needed to make the image dimensions a multiple of 128
276
+ padding_x = 128 - (width % 128) if width % 128 != 0 else 0
277
+ padding_y = 128 - (height % 128) if height % 128 != 0 else 0
278
+
279
+ # create a white image of the right size to be used as padding
280
+ padding_img = Image.new('RGB', (padding_x, padding_y), color=(255, 255, 255, 0))
281
+
282
+ # paste the padding image onto the original image to add the padding
283
+ img.paste(padding_img, (width, height))
284
+
285
+ # update the image dimensions to include the padding
286
+ width += padding_x
287
+ height += padding_y
288
+
289
+ if width > 128 or height > 128:
290
+
291
+ num_tiles_x = int(width / 128)
292
+ num_tiles_y = int(height / 128)
293
+
294
+ upscaled_img = Image.new('RGB', (img.size[0] * 4, img.size[1] * 4))
295
+ for x in range(num_tiles_x):
296
+ for y in range(num_tiles_y):
297
+ update_state(f"Upscaling tile {x * num_tiles_y + y + 1}/{num_tiles_x * num_tiles_y}")
298
+ tile = img.crop((x * 128, y * 128, (x + 1) * 128, (y + 1) * 128))
299
+
300
+ upscaled_tile = pipe_upscale(
301
+ prompt="",
302
+ image=tile,
303
+ num_inference_steps=steps,
304
+ guidance_scale=guidance,
305
+ # negative_prompt = neg_prompt,
306
+ generator=generator,
307
+ ).images[0]
308
+
309
+ upscaled_img.paste(upscaled_tile, (x * upscaled_tile.size[0], y * upscaled_tile.size[1]))
310
+
311
+ return [upscaled_img]
312
+ else:
313
+ return pipe_upscale(
314
+ prompt=prompt,
315
+ image=img,
316
+ num_inference_steps=steps,
317
+ guidance_scale=guidance,
318
+ negative_prompt = neg_prompt,
319
+ generator=generator,
320
+ ).images
321
+
322
+
323
+
324
+ def on_mode_change(mode):
325
+ return gr.update(visible = mode in (modes['img2img'], modes['inpaint'], modes['upscale4x'])), \
326
+ gr.update(visible = mode == modes['inpaint']), \
327
+ gr.update(visible = mode == modes['upscale4x']), \
328
+ gr.update(visible = mode == modes['img2img'])
329
+
330
+ def on_steps_change(steps):
331
+ global current_steps
332
+ current_steps = steps
333
+
334
+ css = """.main-div div{display:inline-flex;align-items:center;gap:.8rem;font-size:1.75rem}.main-div div h1{font-weight:900;margin-bottom:7px}.main-div p{margin-bottom:10px;font-size:94%}a{text-decoration:underline}.tabs{margin-top:0;margin-bottom:0}#gallery{min-height:20rem}
335
+ """
336
+ with gr.Blocks(css=css) as demo:
337
+ gr.HTML(
338
+ f"""
339
+ <div class="main-div">
340
+ <div>
341
+ <h1>Stable Diffusion 2</h1>
342
+ </div><br>
343
+ <p> Model used: <a href="https://huggingface.co/stabilityai/stable-diffusion-2/blob/main/768-v-ema.ckpt" target="_blank">768-v-ema.ckpt</a></p>
344
+ Running on <b>{"GPU 🔥" if torch.cuda.is_available() else "CPU 🥶"}</b>
345
+ </div>
346
+ """
347
+ )
348
+ with gr.Row():
349
+
350
+ with gr.Column(scale=55):
351
+ with gr.Group():
352
+ with gr.Row():
353
+ prompt = gr.Textbox(label="Prompt", show_label=False, max_lines=2,placeholder=f"Enter prompt").style(container=False)
354
+ generate = gr.Button(value="Generate").style(rounded=(False, True, True, False))
355
+
356
+ gallery = gr.Gallery(label="Generated images", show_label=False).style(grid=[2], height="auto")
357
+ state_info = gr.Textbox(label="State", show_label=False, max_lines=2).style(container=False)
358
+ error_output = gr.Markdown(visible=False)
359
+
360
+ with gr.Column(scale=45):
361
+ inf_mode = gr.Radio(label="Inference Mode", choices=list(modes.values())[:4], value=modes['txt2img']) # TODO remove [:3] limit
362
+
363
+ with gr.Group(visible=False) as i2i_options:
364
+ image = gr.Image(label="Image", height=128, type="pil", tool='sketch')
365
+ inpaint_info = gr.Markdown("Inpainting resizes and pads images to 512x512", visible=False)
366
+ upscale_info = gr.Markdown("""Best for small images (128x128 or smaller).<br>
367
+ Bigger images will be sliced into 128x128 tiles which will be upscaled individually.<br>
368
+ This is done to avoid running out of GPU memory.""", visible=False)
369
+ strength = gr.Slider(label="Transformation strength", minimum=0, maximum=1, step=0.01, value=0.5)
370
+
371
+ with gr.Group():
372
+ neg_prompt = gr.Textbox(label="Negative prompt", placeholder="What to exclude from the image")
373
+
374
+ n_images = gr.Slider(label="Number of images", value=1, minimum=1, maximum=4, step=1)
375
+ with gr.Row():
376
+ guidance = gr.Slider(label="Guidance scale", value=7.5, maximum=15)
377
+ steps = gr.Slider(label="Steps", value=current_steps, minimum=2, maximum=100, step=1)
378
+
379
+ with gr.Row():
380
+ width = gr.Slider(label="Width", value=768, minimum=64, maximum=1024, step=8)
381
+ height = gr.Slider(label="Height", value=768, minimum=64, maximum=1024, step=8)
382
+
383
+ seed = gr.Slider(0, 2147483647, label='Seed (0 = random)', value=0, step=1)
384
+ with gr.Accordion("Memory optimization"):
385
+ attn_slicing = gr.Checkbox(label="Attention slicing (a bit slower, but uses less memory)", value=attn_slicing_enabled)
386
+ mem_eff_attn = gr.Checkbox(label="Memory efficient attention (xformers)", value=mem_eff_attn_enabled)
387
+
388
+ inf_mode.change(on_mode_change, inputs=[inf_mode], outputs=[i2i_options, inpaint_info, upscale_info, strength], queue=False)
389
+ steps.change(on_steps_change, inputs=[steps], outputs=[], queue=False)
390
+ attn_slicing.change(lambda x: switch_attention_slicing(x), inputs=[attn_slicing], queue=False)
391
+ mem_eff_attn.change(lambda x: switch_mem_eff_attn(x), inputs=[mem_eff_attn], queue=False)
392
+
393
+ inputs = [inf_mode, prompt, n_images, guidance, steps, width, height, seed, image, strength, neg_prompt]
394
+ outputs = [gallery, error_output]
395
+ prompt.submit(inference, inputs=inputs, outputs=outputs)
396
+ generate.click(inference, inputs=inputs, outputs=outputs)
397
+
398
+ demo.load(update_state_info, inputs=state_info, outputs=state_info, every=0.5, show_progress=False)
399
+
400
+ demo.queue()
401
+ demo.launch(debug=True, share=True, height=768)
requirements.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ diffusers
2
+ transformers
3
+ accelerate
4
+ scipy
5
+ triton
6
+ ftfy
7
+ gradio
8
+ https://github.com/TheLastBen/fast-stable-diffusion/raw/main/precompiled/T4/xformers-0.0.13.dev0-py3-none-any.whl