ByteDance
/

Hyper-SD

@@ -31,9 +31,10 @@ In this repository, we release the models distilled from [SDXL Base 1.0](https:/
 * `Hyper-SD15-Nstep-lora.safetensors`: Lora checkpoint, for SD1.5-related models.
 * `Hyper-SDXL-1step-unet.safetensors`: Unet checkpoint distilled from SDXL-Base.
-## SDXL-related models Usage
-### 2-Steps, 4-Steps, 8-steps LoRA
 ```python
 import torch
 from diffusers import DiffusionPipeline, DDIMScheduler
@@ -46,14 +47,15 @@ ckpt_name = "Hyper-SDXL-2steps-lora.safetensors"
 pipe = DiffusionPipeline.from_pretrained(base_model_id, torch_dtype=torch.float16, variant="fp16").to("cuda")
 pipe.load_lora_weights(hf_hub_download(repo_name, ckpt_name))
 pipe.fuse_lora()
-# Ensure ddim scheduler timestep spacing set as trailing
 pipe.scheduler = DDIMScheduler.from_config(pipe.scheduler.config, timestep_spacing="trailing")
 # lower eta results in more detail
 prompt="a photo of a cat"
 image=pipe(prompt=prompt, num_inference_steps=2, guidance_scale=0).images[0]
 ```
-### Unified LoRA (support 1 to 8 steps inference)
 ```python
 import torch
 from diffusers import DiffusionPipeline, TCDScheduler
@@ -67,15 +69,14 @@ pipe.load_lora_weights(hf_hub_download(repo_name, ckpt_name))
 pipe.fuse_lora()
 # Use TCD scheduler to achieve better image quality
 pipe.scheduler = TCDScheduler.from_config(pipe.scheduler.config)
-# lower eta results in more detail
 eta=1.0
 prompt="a photo of a cat"
 image=pipe(prompt=prompt, num_inference_steps=1, guidance_scale=0, eta=eta).images[0]
 ```
-### 1-step SDXL Unet
 ```python
 import torch
 from diffusers import DiffusionPipeline, UNet2DConditionModel, LCMScheduler
@@ -96,10 +97,10 @@ image=pipe(prompt=prompt, num_inference_steps=1, guidance_scale=0, timesteps=[80
 ```
-## SD1.5-related models Usage
-### 2-Steps, 4-Steps, 8-steps LoRA
 ```python
 import torch
 from diffusers import DiffusionPipeline, DDIMScheduler
@@ -112,14 +113,15 @@ ckpt_name = "Hyper-SD15-2steps-lora.safetensors"
 pipe = DiffusionPipeline.from_pretrained(base_model_id, torch_dtype=torch.float16, variant="fp16").to("cuda")
 pipe.load_lora_weights(hf_hub_download(repo_name, ckpt_name))
 pipe.fuse_lora()
-# Ensure ddim scheduler timestep spacing set as trailing
 pipe.scheduler = DDIMScheduler.from_config(pipe.scheduler.config, timestep_spacing="trailing")
 prompt="a photo of a cat"
 image=pipe(prompt=prompt, num_inference_steps=2, guidance_scale=0).images[0]
 ```
-### Unified LoRA (support 1 to 8 steps inference)
 ```python
 import torch
 from diffusers import DiffusionPipeline, TCDScheduler
@@ -133,12 +135,180 @@ pipe.load_lora_weights(hf_hub_download(repo_name, ckpt_name))
 pipe.fuse_lora()
 # Use TCD scheduler to achieve better image quality
 pipe.scheduler = TCDScheduler.from_config(pipe.scheduler.config)
-# Lower eta results in more detail
 eta=1.0
 prompt="a photo of a cat"
 image=pipe(prompt=prompt, num_inference_steps=1, guidance_scale=0, eta=eta).images[0]
 ```
 ## Citation
 ```bibtex
 @article{ren2024hypersd,

 * `Hyper-SD15-Nstep-lora.safetensors`: Lora checkpoint, for SD1.5-related models.
 * `Hyper-SDXL-1step-unet.safetensors`: Unet checkpoint distilled from SDXL-Base.
+## Text-to-Image Usage
+### SDXL-related models
+#### 2-Steps, 4-Steps, 8-steps LoRA
+Take the 2-steps LoRA as an example, you can also use other LoRAs for the corresponding inference steps setting.
 ```python
 import torch
 from diffusers import DiffusionPipeline, DDIMScheduler
 pipe = DiffusionPipeline.from_pretrained(base_model_id, torch_dtype=torch.float16, variant="fp16").to("cuda")
 pipe.load_lora_weights(hf_hub_download(repo_name, ckpt_name))
 pipe.fuse_lora()
+# Ensure ddim scheduler timestep spacing set as trailing !!!
 pipe.scheduler = DDIMScheduler.from_config(pipe.scheduler.config, timestep_spacing="trailing")
 # lower eta results in more detail
 prompt="a photo of a cat"
 image=pipe(prompt=prompt, num_inference_steps=2, guidance_scale=0).images[0]
 ```
+#### Unified LoRA (support 1 to 8 steps inference)
+You can flexibly adjust the number of inference steps and eta value to achieve best performance.
 ```python
 import torch
 from diffusers import DiffusionPipeline, TCDScheduler
 pipe.fuse_lora()
 # Use TCD scheduler to achieve better image quality
 pipe.scheduler = TCDScheduler.from_config(pipe.scheduler.config)
+# Lower eta results in more detail for multi-steps inference
 eta=1.0
 prompt="a photo of a cat"
 image=pipe(prompt=prompt, num_inference_steps=1, guidance_scale=0, eta=eta).images[0]
 ```
+#### 1-step SDXL Unet
+Only for the single step inference.
 ```python
 import torch
 from diffusers import DiffusionPipeline, UNet2DConditionModel, LCMScheduler
 ```
+### SD1.5-related models
+#### 2-Steps, 4-Steps, 8-steps LoRA
+Take the 2-steps LoRA as an example, you can also use other LoRAs for the corresponding inference steps setting.
 ```python
 import torch
 from diffusers import DiffusionPipeline, DDIMScheduler
 pipe = DiffusionPipeline.from_pretrained(base_model_id, torch_dtype=torch.float16, variant="fp16").to("cuda")
 pipe.load_lora_weights(hf_hub_download(repo_name, ckpt_name))
 pipe.fuse_lora()
+# Ensure ddim scheduler timestep spacing set as trailing !!!
 pipe.scheduler = DDIMScheduler.from_config(pipe.scheduler.config, timestep_spacing="trailing")
 prompt="a photo of a cat"
 image=pipe(prompt=prompt, num_inference_steps=2, guidance_scale=0).images[0]
 ```
+#### Unified LoRA (support 1 to 8 steps inference)
+You can flexibly adjust the number of inference steps and eta value to achieve best performance.
 ```python
 import torch
 from diffusers import DiffusionPipeline, TCDScheduler
 pipe.fuse_lora()
 # Use TCD scheduler to achieve better image quality
 pipe.scheduler = TCDScheduler.from_config(pipe.scheduler.config)
+# Lower eta results in more detail for multi-steps inference
 eta=1.0
 prompt="a photo of a cat"
 image=pipe(prompt=prompt, num_inference_steps=1, guidance_scale=0, eta=eta).images[0]
 ```
+## ControlNet Usage
+### SDXL-related models
+#### 2-Steps, 4-Steps, 8-steps LoRA
+Take Canny Controlnet and 2-steps inference as an example:
+```python
+import torch
+from diffusers.utils import load_image
+import numpy as np
+import cv2
+from PIL import Image
+from diffusers import ControlNetModel, StableDiffusionXLControlNetPipeline, AutoencoderKL, DDIMScheduler
+from huggingface_hub import hf_hub_download
+# Load original image
+image = load_image("https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/sd_controlnet/hf-logo.png")
+image = np.array(image)
+# Prepare Canny Control Image
+low_threshold = 100
+high_threshold = 200
+image = cv2.Canny(image, low_threshold, high_threshold)
+image = image[:, :, None]
+image = np.concatenate([image, image, image], axis=2)
+control_image = Image.fromarray(image)
+control_image.save("control.png")
+control_weight = 0.5  # recommended for good generalization
+# Initialize pipeline
+controlnet = ControlNetModel.from_pretrained(
+    "diffusers/controlnet-canny-sdxl-1.0",
+    torch_dtype=torch.float16
+)
+vae = AutoencoderKL.from_pretrained("madebyollin/sdxl-vae-fp16-fix", torch_dtype=torch.float16)
+pipe = StableDiffusionXLControlNetPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0", controlnet=controlnet, vae=vae, torch_dtype=torch.float16).to("cuda")
+pipe.load_lora_weights(hf_hub_download("ByteDance/Hyper-SD", "Hyper-SDXL-2steps-lora.safetensors"))
+# Ensure ddim scheduler timestep spacing set as trailing !!!
+pipe.scheduler = DDIMScheduler.from_config(pipe.scheduler.config, timestep_spacing="trailing")
+pipe.fuse_lora()
+image = pipe("A chocolate cookie", num_inference_steps=2, image=control_image, guidance_scale=0, controlnet_conditioning_scale=control_weight).images[0]
+image.save('image_out.png')
+```
+#### Unified LoRA (support 1 to 8 steps inference)
+Take Canny Controlnet as an example:
+```python
+import torch
+from diffusers.utils import load_image
+import numpy as np
+import cv2
+from PIL import Image
+from diffusers import ControlNetModel, StableDiffusionXLControlNetPipeline, AutoencoderKL, TCDScheduler
+from huggingface_hub import hf_hub_download
+# Load original image
+image = load_image("https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/sd_controlnet/hf-logo.png")
+image = np.array(image)
+# Prepare Canny Control Image
+low_threshold = 100
+high_threshold = 200
+image = cv2.Canny(image, low_threshold, high_threshold)
+image = image[:, :, None]
+image = np.concatenate([image, image, image], axis=2)
+control_image = Image.fromarray(image)
+control_image.save("control.png")
+control_weight = 0.5  # recommended for good generalization
+# Initialize pipeline
+controlnet = ControlNetModel.from_pretrained(
+    "diffusers/controlnet-canny-sdxl-1.0",
+    torch_dtype=torch.float16
+)
+vae = AutoencoderKL.from_pretrained("madebyollin/sdxl-vae-fp16-fix", torch_dtype=torch.float16)
+pipe = StableDiffusionXLControlNetPipeline.from_pretrained(
+    "stabilityai/stable-diffusion-xl-base-1.0",
+    controlnet=controlnet, vae=vae, torch_dtype=torch.float16).to("cuda")
+# Load Hyper-SD15-1step lora
+pipe.load_lora_weights(hf_hub_download("ByteDance/Hyper-SD", "Hyper-SDXL-1step-lora.safetensors"))
+pipe.fuse_lora()
+# Use TCD scheduler to achieve better image quality
+pipe.scheduler = TCDScheduler.from_config(pipe.scheduler.config)
+# Lower eta results in more detail for multi-steps inference
+eta=1.0
+image = pipe("A chocolate cookie", num_inference_steps=4, image=control_image, guidance_scale=0, controlnet_conditioning_scale=control_weight, eta=eta).images[0]
+image.save('image_out.png')
+```
+### SD1.5-related models
+#### 2-Steps, 4-Steps, 8-steps LoRA
+Take Canny Controlnet and 2-steps inference as an example:
+```python
+import torch
+from diffusers.utils import load_image
+import numpy as np
+import cv2
+from PIL import Image
+from diffusers import ControlNetModel, StableDiffusionControlNetPipeline, DDIMScheduler
+from huggingface_hub import hf_hub_download
+controlnet_checkpoint = "lllyasviel/control_v11p_sd15_canny"
+# Load original image
+image = load_image("https://huggingface.co/lllyasviel/control_v11p_sd15_canny/resolve/main/images/input.png")
+image = np.array(image)
+# Prepare Canny Control Image
+low_threshold = 100
+high_threshold = 200
+image = cv2.Canny(image, low_threshold, high_threshold)
+image = image[:, :, None]
+image = np.concatenate([image, image, image], axis=2)
+control_image = Image.fromarray(image)
+control_image.save("control.png")
+# Initialize pipeline
+controlnet = ControlNetModel.from_pretrained(controlnet_checkpoint, torch_dtype=torch.float16)
+pipe = StableDiffusionControlNetPipeline.from_pretrained("runwayml/stable-diffusion-v1-5", controlnet=controlnet, torch_dtype=torch.float16).to("cuda")
+pipe.load_lora_weights(hf_hub_download("ByteDance/Hyper-SD", "Hyper-SD15-2steps-lora.safetensors"))
+pipe.fuse_lora()
+# Ensure ddim scheduler timestep spacing set as trailing !!!
+pipe.scheduler = DDIMScheduler.from_config(pipe.scheduler.config, timestep_spacing="trailing")
+image = pipe("a blue paradise bird in the jungle", num_inference_steps=2, image=control_image, guidance_scale=0).images[0]
+image.save('image_out.png')
+```
+#### Unified LoRA (support 1 to 8 steps inference)
+Take Canny Controlnet as an example:
+```python
+import torch
+from diffusers.utils import load_image
+import numpy as np
+import cv2
+from PIL import Image
+from diffusers import ControlNetModel, StableDiffusionControlNetPipeline, TCDScheduler
+from huggingface_hub import hf_hub_download
+controlnet_checkpoint = "lllyasviel/control_v11p_sd15_canny"
+# Load original image
+image = load_image("https://huggingface.co/lllyasviel/control_v11p_sd15_canny/resolve/main/images/input.png")
+image = np.array(image)
+# Prepare Canny Control Image
+low_threshold = 100
+high_threshold = 200
+image = cv2.Canny(image, low_threshold, high_threshold)
+image = image[:, :, None]
+image = np.concatenate([image, image, image], axis=2)
+control_image = Image.fromarray(image)
+control_image.save("control.png")
+# Initialize pipeline
+controlnet = ControlNetModel.from_pretrained(controlnet_checkpoint, torch_dtype=torch.float16)
+pipe = StableDiffusionControlNetPipeline.from_pretrained("runwayml/stable-diffusion-v1-5", controlnet=controlnet, torch_dtype=torch.float16).to("cuda")
+# Load Hyper-SD15-1step lora
+pipe.load_lora_weights(hf_hub_download("ByteDance/Hyper-SD", "Hyper-SD15-1step-lora.safetensors"))
+pipe.fuse_lora()
+# Use TCD scheduler to achieve better image quality
+pipe.scheduler = TCDScheduler.from_config(pipe.scheduler.config)
+# Lower eta results in more detail for multi-steps inference
+eta=1.0
+image = pipe("a blue paradise bird in the jungle", num_inference_steps=1, image=control_image, guidance_scale=0, eta=eta).images[0]
+image.save('image_out.png')
+```
 ## Citation
 ```bibtex
 @article{ren2024hypersd,