Update README.md
#3
by YiYiXu HF Staff - opened
README.md
CHANGED
|
@@ -25,41 +25,23 @@ pip install diffusers transformers
|
|
| 25 |
### Text to image
|
| 26 |
|
| 27 |
```python
|
| 28 |
-
from diffusers import
|
| 29 |
import torch
|
| 30 |
|
| 31 |
-
|
| 32 |
-
pipe_prior = KandinskyPriorPipeline.from_pretrained("kandinsky-community/kandinsky-2-1-prior", torch_dtype=torch.float16)
|
| 33 |
pipe_prior.to("cuda")
|
| 34 |
|
|
|
|
|
|
|
|
|
|
| 35 |
prompt = "A alien cheeseburger creature eating itself, claymation, cinematic, moody lighting"
|
| 36 |
negative_prompt = "low quality, bad quality"
|
| 37 |
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
).images
|
| 41 |
-
|
| 42 |
-
zero_image_emb = pipe_prior(
|
| 43 |
-
negative_prompt, guidance_scale=1.0, num_inference_steps=25, generator=generator, negative_prompt=negative_prompt
|
| 44 |
-
).images
|
| 45 |
-
|
| 46 |
-
pipe = KandinskyPipeline.from_pretrained("kandinsky-community/kandinsky-2-1", torch_dtype=torch.float16)
|
| 47 |
-
pipe.to("cuda")
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
images = pipe(
|
| 51 |
-
prompt,
|
| 52 |
-
image_embeds=image_emb,
|
| 53 |
-
negative_image_embeds=zero_image_emb,
|
| 54 |
-
num_images_per_prompt=2,
|
| 55 |
-
height=768,
|
| 56 |
-
width=768,
|
| 57 |
-
num_inference_steps=100,
|
| 58 |
-
guidance_scale=4.0,
|
| 59 |
-
generator=generator,
|
| 60 |
-
).images[0]
|
| 61 |
|
| 62 |
-
image
|
|
|
|
| 63 |
```
|
| 64 |
|
| 65 |

|
|
@@ -81,7 +63,9 @@ original_image = Image.open(BytesIO(response.content)).convert("RGB")
|
|
| 81 |
original_image = original_image.resize((768, 512))
|
| 82 |
|
| 83 |
# create prior
|
| 84 |
-
pipe_prior = KandinskyPriorPipeline.from_pretrained(
|
|
|
|
|
|
|
| 85 |
pipe_prior.to("cuda")
|
| 86 |
|
| 87 |
# create img2img pipeline
|
|
@@ -91,22 +75,16 @@ pipe.to("cuda")
|
|
| 91 |
prompt = "A fantasy landscape, Cinematic lighting"
|
| 92 |
negative_prompt = "low quality, bad quality"
|
| 93 |
|
| 94 |
-
|
| 95 |
-
|
| 96 |
-
).images
|
| 97 |
-
|
| 98 |
-
zero_image_emb = pipe_prior(
|
| 99 |
-
negative_prompt, guidance_scale=4.0, num_inference_steps=25, generator=generator, negative_prompt=negative_prompt
|
| 100 |
-
).images
|
| 101 |
|
| 102 |
out = pipe(
|
| 103 |
prompt,
|
| 104 |
image=original_image,
|
| 105 |
-
image_embeds=
|
| 106 |
-
negative_image_embeds=
|
| 107 |
height=768,
|
| 108 |
width=768,
|
| 109 |
-
num_inference_steps=500,
|
| 110 |
strength=0.3,
|
| 111 |
)
|
| 112 |
|
|
@@ -124,9 +102,10 @@ from diffusers.utils import load_image
|
|
| 124 |
import PIL
|
| 125 |
|
| 126 |
import torch
|
| 127 |
-
from torchvision import transforms
|
| 128 |
|
| 129 |
-
pipe_prior = KandinskyPriorPipeline.from_pretrained(
|
|
|
|
|
|
|
| 130 |
pipe_prior.to("cuda")
|
| 131 |
|
| 132 |
img1 = load_image(
|
|
@@ -137,16 +116,20 @@ img2 = load_image(
|
|
| 137 |
"https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main" "/kandinsky/starry_night.jpeg"
|
| 138 |
)
|
| 139 |
|
|
|
|
| 140 |
images_texts = ["a cat", img1, img2]
|
|
|
|
|
|
|
| 141 |
weights = [0.3, 0.3, 0.4]
|
| 142 |
-
|
|
|
|
|
|
|
|
|
|
| 143 |
|
| 144 |
pipe = KandinskyPipeline.from_pretrained("kandinsky-community/kandinsky-2-1", torch_dtype=torch.float16)
|
| 145 |
pipe.to("cuda")
|
| 146 |
|
| 147 |
-
image = pipe(
|
| 148 |
-
"", image_embeds=image_emb, negative_image_embeds=zero_image_emb, height=768, width=768, num_inference_steps=150
|
| 149 |
-
).images[0]
|
| 150 |
|
| 151 |
image.save("starry_cat.png")
|
| 152 |
```
|
|
|
|
| 25 |
### Text to image
|
| 26 |
|
| 27 |
```python
|
| 28 |
+
from diffusers import DiffusionPipeline
|
| 29 |
import torch
|
| 30 |
|
| 31 |
+
pipe_prior = DiffusionPipeline.from_pretrained("kandinsky-community/kandinsky-2-1-prior", torch_dtype=torch.float16)
|
|
|
|
| 32 |
pipe_prior.to("cuda")
|
| 33 |
|
| 34 |
+
t2i_pipe = DiffusionPipeline.from_pretrained("kandinsky-community/kandinsky-2-1", torch_dtype=torch.float16)
|
| 35 |
+
t2i_pipe.to("cuda")
|
| 36 |
+
|
| 37 |
prompt = "A alien cheeseburger creature eating itself, claymation, cinematic, moody lighting"
|
| 38 |
negative_prompt = "low quality, bad quality"
|
| 39 |
|
| 40 |
+
generator = torch.Generator(device="cuda").manual_seed(12)
|
| 41 |
+
image_embeds, negative_image_embeds = pipe_prior(prompt, negative_prompt, generator=generator).to_tuple()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 42 |
|
| 43 |
+
image = t2i_pipe(prompt, image_embeds=image_embeds, negative_image_embeds=negative_image_embeds).images[0]
|
| 44 |
+
image.save("cheeseburger_monster.png")
|
| 45 |
```
|
| 46 |
|
| 47 |

|
|
|
|
| 63 |
original_image = original_image.resize((768, 512))
|
| 64 |
|
| 65 |
# create prior
|
| 66 |
+
pipe_prior = KandinskyPriorPipeline.from_pretrained(
|
| 67 |
+
"kandinsky-community/kandinsky-2-1-prior", torch_dtype=torch.float16
|
| 68 |
+
)
|
| 69 |
pipe_prior.to("cuda")
|
| 70 |
|
| 71 |
# create img2img pipeline
|
|
|
|
| 75 |
prompt = "A fantasy landscape, Cinematic lighting"
|
| 76 |
negative_prompt = "low quality, bad quality"
|
| 77 |
|
| 78 |
+
generator = torch.Generator(device="cuda").manual_seed(30)
|
| 79 |
+
image_embeds, negative_image_embeds = pipe_prior(prompt, negative_prompt, generator=generator).to_tuple()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 80 |
|
| 81 |
out = pipe(
|
| 82 |
prompt,
|
| 83 |
image=original_image,
|
| 84 |
+
image_embeds=image_embeds,
|
| 85 |
+
negative_image_embeds=negative_image_embeds,
|
| 86 |
height=768,
|
| 87 |
width=768,
|
|
|
|
| 88 |
strength=0.3,
|
| 89 |
)
|
| 90 |
|
|
|
|
| 102 |
import PIL
|
| 103 |
|
| 104 |
import torch
|
|
|
|
| 105 |
|
| 106 |
+
pipe_prior = KandinskyPriorPipeline.from_pretrained(
|
| 107 |
+
"kandinsky-community/kandinsky-2-1-prior", torch_dtype=torch.float16
|
| 108 |
+
)
|
| 109 |
pipe_prior.to("cuda")
|
| 110 |
|
| 111 |
img1 = load_image(
|
|
|
|
| 116 |
"https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main" "/kandinsky/starry_night.jpeg"
|
| 117 |
)
|
| 118 |
|
| 119 |
+
# add all the conditions we want to interpolate, can be either text or image
|
| 120 |
images_texts = ["a cat", img1, img2]
|
| 121 |
+
|
| 122 |
+
# specify the weights for each condition in images_texts
|
| 123 |
weights = [0.3, 0.3, 0.4]
|
| 124 |
+
|
| 125 |
+
# We can leave the prompt empty
|
| 126 |
+
prompt = ""
|
| 127 |
+
prior_out = pipe_prior.interpolate(images_texts, weights)
|
| 128 |
|
| 129 |
pipe = KandinskyPipeline.from_pretrained("kandinsky-community/kandinsky-2-1", torch_dtype=torch.float16)
|
| 130 |
pipe.to("cuda")
|
| 131 |
|
| 132 |
+
image = pipe(prompt, **prior_out, height=768, width=768).images[0]
|
|
|
|
|
|
|
| 133 |
|
| 134 |
image.save("starry_cat.png")
|
| 135 |
```
|