Spaces:
Running
on
A10G
Running
on
A10G
Commit
·
11a0843
1
Parent(s):
b66c7cf
Fix warmup steps after JIT to actually work
Browse files
app.py
CHANGED
|
@@ -15,10 +15,6 @@ pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1
|
|
| 15 |
pipe.scheduler = DDIMScheduler.from_config(pipe.scheduler.config)
|
| 16 |
pipe = pipe.to("cuda")
|
| 17 |
|
| 18 |
-
# optimize for speed
|
| 19 |
-
pipe.unet = torch.compile(pipe.unet, mode="max-autotune", fullgraph=True) # hopefully this works on Ampere series GPU
|
| 20 |
-
pipe(prompt="an astronaut riding a green horse", num_inference_steps=25) # force lengthy JIT compilation to happen ahead of time
|
| 21 |
-
|
| 22 |
# watermarking helper functions. paraphrased from the reference impl of arXiv:2305.20030
|
| 23 |
|
| 24 |
def circle_mask(size=128, r=16, x_offset=0, y_offset=0):
|
|
@@ -114,6 +110,10 @@ def detect(image):
|
|
| 114 |
def generate(prompt):
|
| 115 |
return pipe(prompt=prompt, num_inference_steps=25, latents=get_noise()).images[0]
|
| 116 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 117 |
# actual gradio demo
|
| 118 |
|
| 119 |
def manager(input, progress=gr.Progress(track_tqdm=True)): # to prevent the queue from overloading
|
|
|
|
| 15 |
pipe.scheduler = DDIMScheduler.from_config(pipe.scheduler.config)
|
| 16 |
pipe = pipe.to("cuda")
|
| 17 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 18 |
# watermarking helper functions. paraphrased from the reference impl of arXiv:2305.20030
|
| 19 |
|
| 20 |
def circle_mask(size=128, r=16, x_offset=0, y_offset=0):
|
|
|
|
| 110 |
def generate(prompt):
|
| 111 |
return pipe(prompt=prompt, num_inference_steps=25, latents=get_noise()).images[0]
|
| 112 |
|
| 113 |
+
# optimize for speed
|
| 114 |
+
pipe.unet = torch.compile(pipe.unet, mode="reduce-overhead", fullgraph=True)
|
| 115 |
+
print(detect(generate("an astronaut riding a green horse"))) # warmup after jit
|
| 116 |
+
|
| 117 |
# actual gradio demo
|
| 118 |
|
| 119 |
def manager(input, progress=gr.Progress(track_tqdm=True)): # to prevent the queue from overloading
|