Update README.md
Browse files
README.md
CHANGED
|
@@ -5,7 +5,7 @@ license: apache-2.0
|
|
| 5 |
<div align="center">
|
| 6 |
Sa Xiao<sup>*</sup>, Yibo Lu<sup>*</sup>, Kangjian Wu<sup>*</sup>, Bin Wu<sup>†</sup>, Haoxiong Su, Mian Peng, Qiwen Mao, Wenjiang Zhou</br>(*co-first author), (†Corresponding Author, benbinwu@tencent.com)</br>
|
| 7 |
Lyra Lab, Tencent Music Entertainment</br>
|
| 8 |
-
<p>[<a href="https://github.com/TMElyralab/lyraDiff">github</a>]
|
| 9 |
</div>
|
| 10 |
|
| 11 |
## Introduction
|
|
@@ -27,8 +27,85 @@ The core features include:
|
|
| 27 |
|
| 28 |
## Usage
|
| 29 |
|
|
|
|
|
|
|
| 30 |
We provide a reference implementation of lyraDiff version of SD1.5/SDXL, as well as sampling code, in a dedicated [github repository](https://github.com/TMElyralab/lyraDiff).
|
| 31 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 32 |
## Citation
|
| 33 |
``` bibtex
|
| 34 |
@Misc{lyraDiff_2025,
|
|
|
|
| 5 |
<div align="center">
|
| 6 |
Sa Xiao<sup>*</sup>, Yibo Lu<sup>*</sup>, Kangjian Wu<sup>*</sup>, Bin Wu<sup>†</sup>, Haoxiong Su, Mian Peng, Qiwen Mao, Wenjiang Zhou</br>(*co-first author), (†Corresponding Author, benbinwu@tencent.com)</br>
|
| 7 |
Lyra Lab, Tencent Music Entertainment</br>
|
| 8 |
+
<p>[<a href="https://github.com/TMElyralab/lyraDiff">github</a>] </p>
|
| 9 |
</div>
|
| 10 |
|
| 11 |
## Introduction
|
|
|
|
| 27 |
|
| 28 |
## Usage
|
| 29 |
|
| 30 |
+
<!-- 
|
| 31 |
+
-->
|
| 32 |
We provide a reference implementation of lyraDiff version of SD1.5/SDXL, as well as sampling code, in a dedicated [github repository](https://github.com/TMElyralab/lyraDiff).
|
| 33 |
|
| 34 |
+
### Example
|
| 35 |
+
We provide minimal [script](https://github.com/TMElyralab/lyraDiff/blob/main/examples/SDXL/ipadapter_demo.py) for running SDXL models + IP-Adapter with lyraDiff as follows:
|
| 36 |
+
|
| 37 |
+
```python
|
| 38 |
+
import torch
|
| 39 |
+
import time
|
| 40 |
+
import sys, os
|
| 41 |
+
from diffusers import StableDiffusionXLPipeline
|
| 42 |
+
from lyradiff.lyradiff_model.module.lyradiff_ip_adapter import LyraIPAdapter
|
| 43 |
+
from transformers import CLIPTextModel, CLIPTokenizer, CLIPTextModelWithProjection
|
| 44 |
+
from lyradiff.lyradiff_model.lyradiff_unet_model import LyraDiffUNet2DConditionModel
|
| 45 |
+
from lyradiff.lyradiff_model.lyradiff_vae_model import LyraDiffVaeModel
|
| 46 |
+
from diffusers import EulerAncestralDiscreteScheduler
|
| 47 |
+
from PIL import Image
|
| 48 |
+
from diffusers.utils import load_image
|
| 49 |
+
import GPUtil
|
| 50 |
+
|
| 51 |
+
model_path = "/path/to/sdxl/model/"
|
| 52 |
+
vae_model_path = "/path/to/sdxl/sdxl-vae-fp16-fix"
|
| 53 |
+
|
| 54 |
+
text_encoder = CLIPTextModel.from_pretrained(model_path, subfolder="text_encoder").to(torch.float16).to(torch.device("cuda"))
|
| 55 |
+
text_encoder_2 = CLIPTextModelWithProjection.from_pretrained(model_path, subfolder="text_encoder_2").to(torch.float16).to(torch.device("cuda"))
|
| 56 |
+
tokenizer = CLIPTokenizer.from_pretrained(model_path, subfolder="tokenizer")
|
| 57 |
+
tokenizer_2 = CLIPTokenizer.from_pretrained( model_path, subfolder="tokenizer_2")
|
| 58 |
+
|
| 59 |
+
unet = LyraDiffUNet2DConditionModel(is_sdxl=True)
|
| 60 |
+
vae = LyraDiffVaeModel(scaling_factor=0.13025, is_upcast=False)
|
| 61 |
+
|
| 62 |
+
unet.load_from_diffusers_model(os.path.join(model_path, "unet"))
|
| 63 |
+
vae.load_from_diffusers_model(vae_model_path)
|
| 64 |
+
|
| 65 |
+
scheduler = EulerAncestralDiscreteScheduler.from_pretrained(model_path, subfolder="scheduler", timestep_spacing="linspace")
|
| 66 |
+
|
| 67 |
+
pipe = StableDiffusionXLPipeline(
|
| 68 |
+
vae=vae,
|
| 69 |
+
unet=unet,
|
| 70 |
+
text_encoder=text_encoder,
|
| 71 |
+
text_encoder_2=text_encoder_2,
|
| 72 |
+
tokenizer=tokenizer,
|
| 73 |
+
tokenizer_2=tokenizer_2,
|
| 74 |
+
scheduler=scheduler
|
| 75 |
+
)
|
| 76 |
+
|
| 77 |
+
ip_ckpt = "/path/to/sdxl/ip_ckpt/ip-adapter-plus_sdxl_vit-h.bin"
|
| 78 |
+
image_encoder_path = "/path/to/sdxl/ip_ckpt/image_encoder"
|
| 79 |
+
|
| 80 |
+
# Create LyraIPAdapter
|
| 81 |
+
ip_adapter = LyraIPAdapter(unet_model=unet.model, sdxl=True, device=torch.device("cuda"), ip_ckpt=ip_ckpt, ip_plus=True, image_encoder_path=image_encoder_path, num_ip_tokens=16, ip_projection_dim=1024)
|
| 82 |
+
|
| 83 |
+
# load ip_adapter image
|
| 84 |
+
ip_image = load_image("https://cdn-uploads.huggingface.co/production/uploads/6461b412846a6c8c8305319d/8U6yNHTPLaOC3gIWJZWGL.png")
|
| 85 |
+
ip_scale = 0.5
|
| 86 |
+
|
| 87 |
+
# get ip image embedding and pass it to the pipeline
|
| 88 |
+
ip_image_embedding = [ip_adapter.get_image_embeds_lyradiff(ip_image)['ip_hidden_states']]
|
| 89 |
+
# unet set ip adapter scale in unet model obj, since we cannot set ip_adapter_scale through diffusers pipeline
|
| 90 |
+
unet.set_ip_adapter_scale(ip_scale)
|
| 91 |
+
|
| 92 |
+
for i in range(3):
|
| 93 |
+
generator = torch.Generator("cuda").manual_seed(123)
|
| 94 |
+
start = time.perf_counter()
|
| 95 |
+
images = pipe(prompt="a beautiful girl, cartoon style",
|
| 96 |
+
height=1024,
|
| 97 |
+
width=1024,
|
| 98 |
+
num_inference_steps=20,
|
| 99 |
+
num_images_per_prompt=1,
|
| 100 |
+
guidance_scale=7.5,
|
| 101 |
+
negative_prompt="NSFW",
|
| 102 |
+
generator=torch.Generator("cuda").manual_seed(123),
|
| 103 |
+
ip_adapter_image_embeds=ip_image_embedding
|
| 104 |
+
)[0]
|
| 105 |
+
images[0].save(f"sdxl_ip_{i}.png")
|
| 106 |
+
```
|
| 107 |
+
|
| 108 |
+
|
| 109 |
## Citation
|
| 110 |
``` bibtex
|
| 111 |
@Misc{lyraDiff_2025,
|