Update README.md
Browse files
README.md
CHANGED
|
@@ -22,20 +22,73 @@ Unlike the inpaint controlnets used for general scenarios, this model is fine-tu
|
|
| 22 |
|
| 23 |
<span style="width: 150px !important;display: inline-block;">`Foreground`<span> | <span style="width: 150px !important;display: inline-block;">`Mask`<span> | <span style="width: 150px !important;display: inline-block;">`w/o instance mask`<span> | <span style="width: 150px !important;display: inline-block;">`w/ instance mask`<span>
|
| 24 |
:--:|:--:|:--:|:--:
|
| 25 |
-
 |  | 
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 38 |
```
|
|
|
|
| 39 |
|
| 40 |
## Training details
|
| 41 |
In the first phase, the model was trained on 12M laion2B and internal source images with random masks for 20k steps. In the second phase, the model was trained on 3M e-commerce images with the instance mask for 20k steps.<br>
|
|
|
|
| 22 |
|
| 23 |
<span style="width: 150px !important;display: inline-block;">`Foreground`<span> | <span style="width: 150px !important;display: inline-block;">`Mask`<span> | <span style="width: 150px !important;display: inline-block;">`w/o instance mask`<span> | <span style="width: 150px !important;display: inline-block;">`w/ instance mask`<span>
|
| 24 |
:--:|:--:|:--:|:--:
|
| 25 |
+
 |  |  | 
|
| 26 |
+
 |  |  | 
|
| 27 |
+
 |  |  | 
|
|
|
|
| 28 |
|
| 29 |
## Usage with Diffusers
|
| 30 |
```python
|
| 31 |
+
from diffusers import (
|
| 32 |
+
ControlNetModel,
|
| 33 |
+
StableDiffusionXLControlNetInpaintPipeline
|
| 34 |
+
)
|
| 35 |
+
from diffusers.utils import load_image
|
| 36 |
import torch
|
| 37 |
+
from PIL import Image
|
| 38 |
+
|
| 39 |
+
def make_inpaint_condition(init_image, mask_image):
|
| 40 |
+
init_image = np.array(init_image.convert("RGB")).astype(np.float32) / 255.0
|
| 41 |
+
mask_image = np.array(mask_image.convert("L")).astype(np.float32) / 255.0
|
| 42 |
+
|
| 43 |
+
assert init_image.shape[0:1] == mask_image.shape[0:1], "image and image_mask must have the same image size"
|
| 44 |
+
init_image[mask_image > 0.5] = -1.0 # set as masked pixel
|
| 45 |
+
init_image = np.expand_dims(init_image, 0).transpose(0, 3, 1, 2)
|
| 46 |
+
init_image = torch.from_numpy(init_image)
|
| 47 |
+
return init_image
|
| 48 |
+
|
| 49 |
|
| 50 |
controlnet = ControlNetModel.from_pretrained(
|
| 51 |
"alimama-creative/EcomXL_controlnet_inpaint", torch_dtype=torch.float16, use_safetensors=True
|
| 52 |
)
|
| 53 |
+
|
| 54 |
+
pipe = StableDiffusionXLControlNetInpaintPipeline.from_pretrained(
|
| 55 |
+
"stabilityai/stable-diffusion-xl-base-1.0",
|
| 56 |
+
controlnet=controlnet,
|
| 57 |
+
torch_dtype=torch.float16
|
| 58 |
+
)
|
| 59 |
+
|
| 60 |
+
pipe.scheduler = DPMSolverMultistepScheduler.from_config(pipe.scheduler.config)
|
| 61 |
+
# pipe.enable_xformers_memory_efficient_attention()
|
| 62 |
+
pipe.enable_vae_slicing()
|
| 63 |
+
|
| 64 |
+
image = load_image(
|
| 65 |
+
"https://huggingface.co/alimama-creative/EcomXL_controlnet_inpaint/resolve/main/images/inp_0.png"
|
| 66 |
+
)
|
| 67 |
+
mask = load_image(
|
| 68 |
+
"https://huggingface.co/alimama-creative/EcomXL_controlnet_inpaint/resolve/main/images/inp_1.png"
|
| 69 |
+
)
|
| 70 |
+
mask = Image.fromarray(255 - np.array(mask))
|
| 71 |
+
|
| 72 |
+
control_image = make_inpaint_condition(img, mask)
|
| 73 |
+
|
| 74 |
+
prompt="a product on the table"
|
| 75 |
+
|
| 76 |
+
images = pipe(
|
| 77 |
+
prompt,
|
| 78 |
+
image=img,
|
| 79 |
+
mask_image=mask,
|
| 80 |
+
control_image=control_image,
|
| 81 |
+
controlnet_conditioning_scale=0.5,
|
| 82 |
+
guidance_scale=7,
|
| 83 |
+
strength=0.75,
|
| 84 |
+
width=1024,
|
| 85 |
+
height=1024,
|
| 86 |
+
).images[0]
|
| 87 |
+
|
| 88 |
+
image.save(f'test_inp.png')
|
| 89 |
+
|
| 90 |
```
|
| 91 |
+
The model exhibits good performance when the controlnet weight (controllet_condition_scale) is 0.5.
|
| 92 |
|
| 93 |
## Training details
|
| 94 |
In the first phase, the model was trained on 12M laion2B and internal source images with random masks for 20k steps. In the second phase, the model was trained on 3M e-commerce images with the instance mask for 20k steps.<br>
|