Commit
·
71da11e
1
Parent(s):
6ed2376
up
Browse files- run_bug_4297.py +52 -0
- run_local_xl.py +12 -6
- run_sd_compile.py +20 -0
- run_sd_xl.py +0 -2
- run_xl_ediffi.py +3 -4
- run_xl_lora.py +7 -0
run_bug_4297.py
ADDED
|
@@ -0,0 +1,52 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
from diffusers import DiffusionPipeline
|
| 3 |
+
import torch
|
| 4 |
+
torch.backends.cudnn.deterministic = False
|
| 5 |
+
torch.backends.cuda.matmul.allow_tf32 = False
|
| 6 |
+
torch.backends.cudnn.allow_tf32 = False
|
| 7 |
+
torch.backends.cudnn.benchmark = True
|
| 8 |
+
torch.backends.cuda.enable_flash_sdp(False)
|
| 9 |
+
|
| 10 |
+
base_pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-0.9", torch_dtype=torch.float16, use_safetensors=True, variant="fp16")
|
| 11 |
+
base_pipe.to("cuda") # OR, pipe.enable_sequential_cpu_offload() OR,
|
| 12 |
+
#pipe.enable_model_cpu_offload()
|
| 13 |
+
|
| 14 |
+
# if using torch < 2.0
|
| 15 |
+
# pipe.enable_xformers_memory_efficient_attention()
|
| 16 |
+
|
| 17 |
+
# Reproducibility.
|
| 18 |
+
torch_seed = 4202420420
|
| 19 |
+
refiner_seed = 698008569
|
| 20 |
+
refiner_strength = 0.50
|
| 21 |
+
prompt = "happy child flying a kite on a sunny day"
|
| 22 |
+
negative_prompt = ''
|
| 23 |
+
# Batch size.
|
| 24 |
+
batch_size = 2
|
| 25 |
+
do_latent = False
|
| 26 |
+
prompt = [ prompt ] * batch_size
|
| 27 |
+
negative_prompt = [ negative_prompt ] * batch_size
|
| 28 |
+
# We're going to schedule 20 steps, and complete 50% of them using either model.
|
| 29 |
+
total_num_steps = 20
|
| 30 |
+
# We need multiple Generators.
|
| 31 |
+
generator = [ torch.Generator(device="cuda").manual_seed(torch_seed) ] * batch_size
|
| 32 |
+
|
| 33 |
+
pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-refiner-0.9", vae=base_pipe.vae, text_encoder_2=base_pipe.text_encoder_2, torch_dtype=torch.float16, use_safetensors=True, variant="fp16")
|
| 34 |
+
# Using channels last layout.
|
| 35 |
+
pipe.unet.to(memory_format=torch.channels_last)
|
| 36 |
+
pipe.to("cuda") # OR, pipe.enable_sequential_cpu_offload() OR,
|
| 37 |
+
|
| 38 |
+
# Generate the base image.
|
| 39 |
+
pre_image = base_pipe(prompt=prompt, generator=generator,
|
| 40 |
+
num_inference_steps=total_num_steps, negative_prompt=negative_prompt, output_type="latent" if do_latent else "pil").images
|
| 41 |
+
|
| 42 |
+
# Generate a range from 0.1 to 0.9, with 0.1 increments.
|
| 43 |
+
test_strengths = [0.2]
|
| 44 |
+
for refiner_strength in test_strengths:
|
| 45 |
+
# Generate a new set of random states for each image.
|
| 46 |
+
generator_two = [ torch.Generator(device="cuda").manual_seed(refiner_seed) ] * batch_size
|
| 47 |
+
# Put through the refiner now.
|
| 48 |
+
images = pipe(prompt=prompt, image=pre_image, aesthetic_score=10, negative_aesthetic_score=2.4, generator=generator_two,
|
| 49 |
+
num_inference_steps=total_num_steps, strength=refiner_strength, negative_prompt=negative_prompt).images # denoising_start
|
| 50 |
+
for idx in range(0, len(images)):
|
| 51 |
+
print(f'Image: {idx}')
|
| 52 |
+
images[idx].save(f'/home/patrick/images/refiner_bug/test-{refiner_strength}-{idx}--{batch_size}--{do_latent}.png', format='PNG')
|
run_local_xl.py
CHANGED
|
@@ -16,18 +16,24 @@ from io import BytesIO
|
|
| 16 |
api = HfApi()
|
| 17 |
start_time = time.time()
|
| 18 |
|
| 19 |
-
use_refiner = bool(int(sys.argv[1]))
|
| 20 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 21 |
|
| 22 |
-
vae = AutoencoderKL.from_pretrained(
|
| 23 |
if use_diffusers:
|
| 24 |
-
pipe = StableDiffusionXLPipeline.from_pretrained(
|
|
|
|
| 25 |
print(time.time() - start_time)
|
| 26 |
pipe.to("cuda")
|
| 27 |
|
| 28 |
if use_refiner:
|
| 29 |
start_time = time.time()
|
| 30 |
-
refiner = StableDiffusionXLImg2ImgPipeline.from_pretrained(
|
| 31 |
print(time.time() - start_time)
|
| 32 |
refiner.to("cuda")
|
| 33 |
# refiner.enable_sequential_cpu_offload()
|
|
@@ -49,7 +55,7 @@ steps = 20
|
|
| 49 |
seed = 0
|
| 50 |
seed_everything(seed)
|
| 51 |
start_time = time.time()
|
| 52 |
-
image = pipe(prompt=prompt, num_inference_steps=steps, output_type="pil").images[0]
|
| 53 |
print(time.time() - start_time)
|
| 54 |
|
| 55 |
if use_refiner:
|
|
|
|
| 16 |
api = HfApi()
|
| 17 |
start_time = time.time()
|
| 18 |
|
| 19 |
+
# use_refiner = bool(int(sys.argv[1]))
|
| 20 |
+
use_refiner = True
|
| 21 |
+
use_diffusers = True
|
| 22 |
+
path = "/home/patrick/sai/stable-diffusion-xl-base-1.0"
|
| 23 |
+
refiner_path = "/home/patrick/sai/stable-diffusion-xl-refiner-1.0"
|
| 24 |
+
vae_path = "/home/patrick/sai/stable-diffusion-xl-base-1.0/vae/"
|
| 25 |
+
vae_path = "/home/patrick/sai/sdxl-vae"
|
| 26 |
|
| 27 |
+
vae = AutoencoderKL.from_pretrained(vae_path, torch_dtype=torch.float16, force_upcast=True)
|
| 28 |
if use_diffusers:
|
| 29 |
+
# pipe = StableDiffusionXLPipeline.from_pretrained(path, vae=vae, torch_dtype=torch.float16, variant="fp16", use_safetensors=True, local_files_only=True)
|
| 30 |
+
pipe = StableDiffusionXLPipeline.from_pretrained(path, torch_dtype=torch.float16, vae=vae, variant="fp16", use_safetensors=True, local_files_only=True)
|
| 31 |
print(time.time() - start_time)
|
| 32 |
pipe.to("cuda")
|
| 33 |
|
| 34 |
if use_refiner:
|
| 35 |
start_time = time.time()
|
| 36 |
+
refiner = StableDiffusionXLImg2ImgPipeline.from_pretrained(refiner_path, vae=vae, torch_dtype=torch.float16, use_safetensors=True, variant="fp16")
|
| 37 |
print(time.time() - start_time)
|
| 38 |
refiner.to("cuda")
|
| 39 |
# refiner.enable_sequential_cpu_offload()
|
|
|
|
| 55 |
seed = 0
|
| 56 |
seed_everything(seed)
|
| 57 |
start_time = time.time()
|
| 58 |
+
image = pipe(prompt=prompt, num_inference_steps=steps, output_type="latent" if use_refiner else "pil").images[0]
|
| 59 |
print(time.time() - start_time)
|
| 60 |
|
| 61 |
if use_refiner:
|
run_sd_compile.py
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
from diffusers import StableDiffusionPipeline
|
| 3 |
+
import torch
|
| 4 |
+
|
| 5 |
+
path = "runwayml/stable-diffusion-v1-5"
|
| 6 |
+
|
| 7 |
+
run_compile = True # Set True / False
|
| 8 |
+
|
| 9 |
+
pipe = StableDiffusionPipeline.from_pretrained(path, torch_dtype=torch.float16)
|
| 10 |
+
pipe = pipe.to("cuda:0")
|
| 11 |
+
pipe.unet.to(memory_format=torch.channels_last)
|
| 12 |
+
|
| 13 |
+
if run_compile:
|
| 14 |
+
print("Run torch compile")
|
| 15 |
+
pipe.unet = torch.compile(pipe.unet, mode="reduce-overhead", fullgraph=True)
|
| 16 |
+
|
| 17 |
+
prompt = "ghibli style, a fantasy landscape with castles"
|
| 18 |
+
|
| 19 |
+
for _ in range(3):
|
| 20 |
+
images = pipe(prompt=prompt).images
|
run_sd_xl.py
DELETED
|
@@ -1,2 +0,0 @@
|
|
| 1 |
-
#!/usr/bin/env python3
|
| 2 |
-
from diffusers import StableDiffusionXLPipeline
|
|
|
|
|
|
|
|
|
run_xl_ediffi.py
CHANGED
|
@@ -18,15 +18,14 @@ from torch.nn.functional import fractional_max_pool2d_with_indices
|
|
| 18 |
api = HfApi()
|
| 19 |
start_time = time.time()
|
| 20 |
|
| 21 |
-
model_id = "
|
| 22 |
scheduler = DPMSolverMultistepScheduler.from_pretrained(model_id, subfolder="scheduler")
|
| 23 |
|
| 24 |
-
|
| 25 |
-
pipe_high_noise = DiffusionPipeline.from_pretrained(model_id, torch_dtype=torch.float16, variant="fp16", use_safetensors=True, local_files_only=True)
|
| 26 |
# pipe_high_noise.scheduler = scheduler
|
| 27 |
pipe_high_noise.to("cuda")
|
| 28 |
|
| 29 |
-
pipe_low_noise = DiffusionPipeline.from_pretrained("
|
| 30 |
# pipe_low_noise.scheduler = scheduler
|
| 31 |
pipe_low_noise.to("cuda")
|
| 32 |
|
|
|
|
| 18 |
api = HfApi()
|
| 19 |
start_time = time.time()
|
| 20 |
|
| 21 |
+
model_id = "/home/patrick/stable-diffusion-xl-base-1.0/"
|
| 22 |
scheduler = DPMSolverMultistepScheduler.from_pretrained(model_id, subfolder="scheduler")
|
| 23 |
|
| 24 |
+
pipe_high_noise = DiffusionPipeline.from_pretrained("/home/patrick/stable-diffusion-xl-base-1.0/", torch_dtype=torch.float16, variant="fp16", use_safetensors=True, local_files_only=True)
|
|
|
|
| 25 |
# pipe_high_noise.scheduler = scheduler
|
| 26 |
pipe_high_noise.to("cuda")
|
| 27 |
|
| 28 |
+
pipe_low_noise = DiffusionPipeline.from_pretrained("/home/patrick/stable-diffusion-xl-refiner-1.0/", torch_dtype=torch.float16, use_safetensors=True, variant="fp16")
|
| 29 |
# pipe_low_noise.scheduler = scheduler
|
| 30 |
pipe_low_noise.to("cuda")
|
| 31 |
|
run_xl_lora.py
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
from diffusers import DiffusionPipeline
|
| 3 |
+
import torch
|
| 4 |
+
|
| 5 |
+
pipe = DiffusionPipeline.from_pretrained("/home/patrick/sai/stable-diffusion-xl-base-1.0", torch_dtype=torch.float16)
|
| 6 |
+
pipe.load_lora_weights("/home/patrick/sai/stable-diffusion-xl-base-1.0/sd_xl_offset_example-lora_1.0.safetensors")
|
| 7 |
+
import ipdb; ipdb.set_trace()
|