up

Browse files

Files changed (6) hide show

run_bug_4297.py +52 -0
run_local_xl.py +12 -6
run_sd_compile.py +20 -0
run_sd_xl.py +0 -2
run_xl_ediffi.py +3 -4
run_xl_lora.py +7 -0

run_bug_4297.py ADDED Viewed

	@@ -0,0 +1,52 @@

+#!/usr/bin/env python3
+from diffusers import DiffusionPipeline
+import torch
+torch.backends.cudnn.deterministic = False
+torch.backends.cuda.matmul.allow_tf32 = False
+torch.backends.cudnn.allow_tf32 = False
+torch.backends.cudnn.benchmark = True
+torch.backends.cuda.enable_flash_sdp(False)
+base_pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-0.9", torch_dtype=torch.float16, use_safetensors=True, variant="fp16")
+base_pipe.to("cuda") # OR, pipe.enable_sequential_cpu_offload() OR,
+#pipe.enable_model_cpu_offload()
+# if using torch < 2.0
+# pipe.enable_xformers_memory_efficient_attention()
+# Reproducibility.
+torch_seed = 4202420420
+refiner_seed = 698008569
+refiner_strength = 0.50
+prompt = "happy child flying a kite on a sunny day"
+negative_prompt = ''
+# Batch size.
+batch_size = 2
+do_latent = False
+prompt = [ prompt ] * batch_size
+negative_prompt = [ negative_prompt ] * batch_size
+# We're going to schedule 20 steps, and complete 50% of them using either model.
+total_num_steps = 20
+# We need multiple Generators.
+generator = [ torch.Generator(device="cuda").manual_seed(torch_seed) ] * batch_size
+pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-refiner-0.9", vae=base_pipe.vae, text_encoder_2=base_pipe.text_encoder_2, torch_dtype=torch.float16, use_safetensors=True, variant="fp16")
+# Using channels last layout.
+pipe.unet.to(memory_format=torch.channels_last)
+pipe.to("cuda") # OR, pipe.enable_sequential_cpu_offload() OR,
+# Generate the base image.
+pre_image = base_pipe(prompt=prompt, generator=generator,
+        num_inference_steps=total_num_steps, negative_prompt=negative_prompt, output_type="latent" if do_latent else "pil").images
+# Generate a range from 0.1 to 0.9, with 0.1 increments.
+test_strengths = [0.2]
+for refiner_strength in test_strengths:
+    # Generate a new set of random states for each image.
+    generator_two = [ torch.Generator(device="cuda").manual_seed(refiner_seed) ] * batch_size
+    # Put through the refiner now.
+    images = pipe(prompt=prompt, image=pre_image, aesthetic_score=10, negative_aesthetic_score=2.4, generator=generator_two,
+                num_inference_steps=total_num_steps, strength=refiner_strength, negative_prompt=negative_prompt).images # denoising_start
+    for idx in range(0, len(images)):
+        print(f'Image: {idx}')
+        images[idx].save(f'/home/patrick/images/refiner_bug/test-{refiner_strength}-{idx}--{batch_size}--{do_latent}.png', format='PNG')

run_local_xl.py CHANGED Viewed

@@ -16,18 +16,24 @@ from io import BytesIO
 api = HfApi()
 start_time = time.time()
-use_refiner = bool(int(sys.argv[1]))
-use_diffusers = False
-vae = AutoencoderKL.from_pretrained("madebyollin/sdxl-vae-fp16-fix", torch_dtype=torch.float16, force_upcast=True)
 if use_diffusers:
-    pipe = StableDiffusionXLPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-0.9", vae=vae, torch_dtype=torch.float16, variant="fp16", use_safetensors=True, local_files_only=True)
     print(time.time() - start_time)
     pipe.to("cuda")
     if use_refiner:
         start_time = time.time()
-        refiner = StableDiffusionXLImg2ImgPipeline.from_pretrained("stabilityai/stable-diffusion-xl-refiner-0.9", vae=vae, torch_dtype=torch.float16, use_safetensors=True, variant="fp16")
         print(time.time() - start_time)
         refiner.to("cuda")
         # refiner.enable_sequential_cpu_offload()
@@ -49,7 +55,7 @@ steps = 20
 seed = 0
 seed_everything(seed)
 start_time = time.time()
-image = pipe(prompt=prompt, num_inference_steps=steps, output_type="pil").images[0]
 print(time.time() - start_time)
 if use_refiner:

 api = HfApi()
 start_time = time.time()
+# use_refiner = bool(int(sys.argv[1]))
+use_refiner = True
+use_diffusers = True
+path = "/home/patrick/sai/stable-diffusion-xl-base-1.0"
+refiner_path = "/home/patrick/sai/stable-diffusion-xl-refiner-1.0"
+vae_path = "/home/patrick/sai/stable-diffusion-xl-base-1.0/vae/"
+vae_path = "/home/patrick/sai/sdxl-vae"
+vae = AutoencoderKL.from_pretrained(vae_path, torch_dtype=torch.float16, force_upcast=True)
 if use_diffusers:
+    # pipe = StableDiffusionXLPipeline.from_pretrained(path, vae=vae, torch_dtype=torch.float16, variant="fp16", use_safetensors=True, local_files_only=True)
+    pipe = StableDiffusionXLPipeline.from_pretrained(path, torch_dtype=torch.float16, vae=vae, variant="fp16", use_safetensors=True, local_files_only=True)
     print(time.time() - start_time)
     pipe.to("cuda")
     if use_refiner:
         start_time = time.time()
+        refiner = StableDiffusionXLImg2ImgPipeline.from_pretrained(refiner_path, vae=vae, torch_dtype=torch.float16, use_safetensors=True, variant="fp16")
         print(time.time() - start_time)
         refiner.to("cuda")
         # refiner.enable_sequential_cpu_offload()
 seed = 0
 seed_everything(seed)
 start_time = time.time()
+image = pipe(prompt=prompt, num_inference_steps=steps, output_type="latent" if use_refiner else "pil").images[0]
 print(time.time() - start_time)
 if use_refiner:

run_sd_compile.py ADDED Viewed

	@@ -0,0 +1,20 @@

+#!/usr/bin/env python3
+from diffusers import StableDiffusionPipeline
+import torch
+path = "runwayml/stable-diffusion-v1-5"
+run_compile = True  # Set True / False
+pipe = StableDiffusionPipeline.from_pretrained(path, torch_dtype=torch.float16)
+pipe = pipe.to("cuda:0")
+pipe.unet.to(memory_format=torch.channels_last)
+if run_compile:
+    print("Run torch compile")
+    pipe.unet = torch.compile(pipe.unet, mode="reduce-overhead", fullgraph=True)
+prompt = "ghibli style, a fantasy landscape with castles"
+for _ in range(3):
+    images = pipe(prompt=prompt).images

run_sd_xl.py DELETED Viewed

	@@ -1,2 +0,0 @@
1	- #!/usr/bin/env python3
2	- from diffusers import StableDiffusionXLPipeline

run_xl_ediffi.py CHANGED Viewed

@@ -18,15 +18,14 @@ from torch.nn.functional import fractional_max_pool2d_with_indices
 api = HfApi()
 start_time = time.time()
-model_id = "stabilityai/stable-diffusion-xl-base-0.9"
 scheduler = DPMSolverMultistepScheduler.from_pretrained(model_id, subfolder="scheduler")
-model_id = "stabilityai/stable-diffusion-xl-base-0.9"
-pipe_high_noise = DiffusionPipeline.from_pretrained(model_id, torch_dtype=torch.float16, variant="fp16", use_safetensors=True, local_files_only=True)
 # pipe_high_noise.scheduler = scheduler
 pipe_high_noise.to("cuda")
-pipe_low_noise = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-refiner-0.9", torch_dtype=torch.float16, use_safetensors=True, variant="fp16")
 # pipe_low_noise.scheduler = scheduler
 pipe_low_noise.to("cuda")

 api = HfApi()
 start_time = time.time()
+model_id = "/home/patrick/stable-diffusion-xl-base-1.0/"
 scheduler = DPMSolverMultistepScheduler.from_pretrained(model_id, subfolder="scheduler")
+pipe_high_noise = DiffusionPipeline.from_pretrained("/home/patrick/stable-diffusion-xl-base-1.0/", torch_dtype=torch.float16, variant="fp16", use_safetensors=True, local_files_only=True)
 # pipe_high_noise.scheduler = scheduler
 pipe_high_noise.to("cuda")
+pipe_low_noise = DiffusionPipeline.from_pretrained("/home/patrick/stable-diffusion-xl-refiner-1.0/", torch_dtype=torch.float16, use_safetensors=True, variant="fp16")
 # pipe_low_noise.scheduler = scheduler
 pipe_low_noise.to("cuda")

run_xl_lora.py ADDED Viewed

	@@ -0,0 +1,7 @@

+#!/usr/bin/env python3
+from diffusers import DiffusionPipeline
+import torch
+pipe = DiffusionPipeline.from_pretrained("/home/patrick/sai/stable-diffusion-xl-base-1.0", torch_dtype=torch.float16)
+pipe.load_lora_weights("/home/patrick/sai/stable-diffusion-xl-base-1.0/sd_xl_offset_example-lora_1.0.safetensors")
+import ipdb; ipdb.set_trace()