add IP-Adapter-FaceID-Portrait
Browse files
README.md
CHANGED
|
@@ -56,9 +56,15 @@ IP-Adapter-FaceID-SDXL: An experimental SDXL version of IP-Adapter-FaceID
|
|
| 56 |

|
| 57 |
</div>
|
| 58 |
|
| 59 |
-
**Update 2024/01/
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 60 |
|
| 61 |
-
IP-Adapter-FaceID-PlusV2-SDXL: An experimental SDXL version of IP-Adapter-FaceID-PlusV2
|
| 62 |
|
| 63 |
## Usage
|
| 64 |
|
|
@@ -316,6 +322,75 @@ images = ip_model.generate(
|
|
| 316 |
|
| 317 |
```
|
| 318 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 319 |
|
| 320 |
## Limitations and Bias
|
| 321 |
- The models do not achieve perfect photorealism and ID consistency.
|
|
|
|
| 56 |

|
| 57 |
</div>
|
| 58 |
|
| 59 |
+
**Update 2024/01/19**:
|
| 60 |
+
|
| 61 |
+
IP-Adapter-FaceID-Portrait: same with IP-Adapter-FaceID but for portrait generation (no lora! no controlnet!). Specifically, it accepts multiple facial images to enhance similarity (the default is 5).
|
| 62 |
+
|
| 63 |
+
<div align="center">
|
| 64 |
+
|
| 65 |
+

|
| 66 |
+
</div>
|
| 67 |
|
|
|
|
| 68 |
|
| 69 |
## Usage
|
| 70 |
|
|
|
|
| 322 |
|
| 323 |
```
|
| 324 |
|
| 325 |
+
### IP-Adapter-FaceID-Portrait
|
| 326 |
+
|
| 327 |
+
```python
|
| 328 |
+
|
| 329 |
+
import cv2
|
| 330 |
+
from insightface.app import FaceAnalysis
|
| 331 |
+
import torch
|
| 332 |
+
|
| 333 |
+
app = FaceAnalysis(name="buffalo_l", providers=['CUDAExecutionProvider', 'CPUExecutionProvider'])
|
| 334 |
+
app.prepare(ctx_id=0, det_size=(640, 640))
|
| 335 |
+
|
| 336 |
+
|
| 337 |
+
images = ["1.jpg", "2.jpg", "3.jpg", "4.jpg", "5.jpg"]
|
| 338 |
+
|
| 339 |
+
faceid_embeds = []
|
| 340 |
+
for image in images:
|
| 341 |
+
image = cv2.imread("person.jpg")
|
| 342 |
+
faces = app.get(image)
|
| 343 |
+
faceid_embeds.append(torch.from_numpy(faces[0].normed_embedding).unsqueeze(0).unsqueeze(0))
|
| 344 |
+
faceid_embeds = torch.cat(faceid_embeds, dim=1)
|
| 345 |
+
```
|
| 346 |
+
|
| 347 |
+
```python
|
| 348 |
+
import torch
|
| 349 |
+
from diffusers import StableDiffusionPipeline, DDIMScheduler, AutoencoderKL
|
| 350 |
+
from PIL import Image
|
| 351 |
+
|
| 352 |
+
from ip_adapter.ip_adapter_faceid_separate import IPAdapterFaceID
|
| 353 |
+
|
| 354 |
+
base_model_path = "SG161222/Realistic_Vision_V4.0_noVAE"
|
| 355 |
+
vae_model_path = "stabilityai/sd-vae-ft-mse"
|
| 356 |
+
ip_ckpt = "ip-adapter-faceid-portrait_sd15.bin"
|
| 357 |
+
device = "cuda"
|
| 358 |
+
|
| 359 |
+
noise_scheduler = DDIMScheduler(
|
| 360 |
+
num_train_timesteps=1000,
|
| 361 |
+
beta_start=0.00085,
|
| 362 |
+
beta_end=0.012,
|
| 363 |
+
beta_schedule="scaled_linear",
|
| 364 |
+
clip_sample=False,
|
| 365 |
+
set_alpha_to_one=False,
|
| 366 |
+
steps_offset=1,
|
| 367 |
+
)
|
| 368 |
+
vae = AutoencoderKL.from_pretrained(vae_model_path).to(dtype=torch.float16)
|
| 369 |
+
pipe = StableDiffusionPipeline.from_pretrained(
|
| 370 |
+
base_model_path,
|
| 371 |
+
torch_dtype=torch.float16,
|
| 372 |
+
scheduler=noise_scheduler,
|
| 373 |
+
vae=vae,
|
| 374 |
+
feature_extractor=None,
|
| 375 |
+
safety_checker=None
|
| 376 |
+
)
|
| 377 |
+
|
| 378 |
+
|
| 379 |
+
# load ip-adapter
|
| 380 |
+
ip_model = IPAdapterFaceID(pipe, ip_ckpt, device, num_tokens=16, n_cond=5)
|
| 381 |
+
|
| 382 |
+
# generate image
|
| 383 |
+
prompt = "photo of a woman in red dress in a garden"
|
| 384 |
+
negative_prompt = "monochrome, lowres, bad anatomy, worst quality, low quality, blurry"
|
| 385 |
+
|
| 386 |
+
images = ip_model.generate(
|
| 387 |
+
prompt=prompt, negative_prompt=negative_prompt, faceid_embeds=faceid_embeds, num_samples=4, width=512, height=512, num_inference_steps=30, seed=2023
|
| 388 |
+
)
|
| 389 |
+
|
| 390 |
+
|
| 391 |
+
```
|
| 392 |
+
|
| 393 |
+
|
| 394 |
|
| 395 |
## Limitations and Bias
|
| 396 |
- The models do not achieve perfect photorealism and ID consistency.
|