Update pipeline.py
Browse files- pipeline.py +47 -34
pipeline.py
CHANGED
|
@@ -17,6 +17,7 @@
|
|
| 17 |
# Here is the AGPL-3.0 license https://github.com/AUTOMATIC1111/stable-diffusion-webui/blob/master/LICENSE.txt
|
| 18 |
|
| 19 |
import inspect
|
|
|
|
| 20 |
from typing import Any, Callable, Dict, List, Optional, Union
|
| 21 |
|
| 22 |
import paddle
|
|
@@ -30,18 +31,25 @@ from ppdiffusers.pipelines.stable_diffusion.safety_checker import (
|
|
| 30 |
StableDiffusionSafetyChecker,
|
| 31 |
)
|
| 32 |
from ppdiffusers.schedulers import KarrasDiffusionSchedulers
|
| 33 |
-
from ppdiffusers.utils import
|
| 34 |
-
|
| 35 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 36 |
|
| 37 |
logger = logging.get_logger(__name__) # pylint: disable=invalid-name
|
| 38 |
|
|
|
|
| 39 |
@paddle.no_grad()
|
| 40 |
-
def load_lora(
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
|
|
|
|
|
|
| 45 |
ratio = float(ratio)
|
| 46 |
visited = []
|
| 47 |
for key in state_dict:
|
|
@@ -49,8 +57,7 @@ def load_lora(pipeline,
|
|
| 49 |
continue
|
| 50 |
|
| 51 |
if "text" in key:
|
| 52 |
-
tmp_layer_infos = key.split(".")[0].split(
|
| 53 |
-
LORA_PREFIX_TEXT_ENCODER + "_")[-1].split("_")
|
| 54 |
hf_to_ppnlp = {
|
| 55 |
"encoder": "transformer",
|
| 56 |
"fc1": "linear1",
|
|
@@ -58,12 +65,12 @@ def load_lora(pipeline,
|
|
| 58 |
}
|
| 59 |
layer_infos = []
|
| 60 |
for layer_info in tmp_layer_infos:
|
| 61 |
-
if layer_info == "mlp":
|
|
|
|
| 62 |
layer_infos.append(hf_to_ppnlp.get(layer_info, layer_info))
|
| 63 |
curr_layer: paddle.nn.Linear = pipeline.text_encoder
|
| 64 |
else:
|
| 65 |
-
layer_infos = key.split(".")[0].split(LORA_PREFIX_UNET +
|
| 66 |
-
"_")[-1].split("_")
|
| 67 |
curr_layer: paddle.nn.Linear = pipeline.unet
|
| 68 |
|
| 69 |
temp_name = layer_infos.pop(0)
|
|
@@ -82,14 +89,9 @@ def load_lora(pipeline,
|
|
| 82 |
else:
|
| 83 |
temp_name = layer_infos.pop(0)
|
| 84 |
|
| 85 |
-
triplet_keys = [
|
| 86 |
-
key,
|
| 87 |
-
key.replace("lora_down", "lora_up"),
|
| 88 |
-
key.replace("lora_down.weight", "alpha")
|
| 89 |
-
]
|
| 90 |
dtype: paddle.dtype = curr_layer.weight.dtype
|
| 91 |
-
weight_down: paddle.Tensor = state_dict[triplet_keys[0]].cast(
|
| 92 |
-
dtype)
|
| 93 |
weight_up: paddle.Tensor = state_dict[triplet_keys[1]].cast(dtype)
|
| 94 |
rank: float = float(weight_down.shape[0])
|
| 95 |
if triplet_keys[2] in state_dict:
|
|
@@ -100,31 +102,37 @@ def load_lora(pipeline,
|
|
| 100 |
|
| 101 |
if not hasattr(curr_layer, "backup_weights"):
|
| 102 |
curr_layer.backup_weights = curr_layer.weight.clone()
|
| 103 |
-
|
| 104 |
if len(weight_down.shape) == 4:
|
| 105 |
if weight_down.shape[2:4] == [1, 1]:
|
| 106 |
# conv2d 1x1
|
| 107 |
curr_layer.weight.copy_(
|
| 108 |
-
curr_layer.weight
|
| 109 |
-
ratio
|
| 110 |
-
|
| 111 |
-
|
|
|
|
|
|
|
| 112 |
else:
|
| 113 |
# conv2d 3x3
|
| 114 |
curr_layer.weight.copy_(
|
| 115 |
-
curr_layer.weight
|
| 116 |
-
|
| 117 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 118 |
else:
|
| 119 |
# linear
|
| 120 |
-
curr_layer.weight.copy_(
|
| 121 |
-
curr_layer.weight +
|
| 122 |
-
ratio * paddle.matmul(weight_up, weight_down).T * scale, True)
|
| 123 |
|
| 124 |
# update visited list
|
| 125 |
visited.extend(triplet_keys)
|
| 126 |
return pipeline
|
| 127 |
|
|
|
|
| 128 |
class WebUIStableDiffusionPipeline(DiffusionPipeline):
|
| 129 |
r"""
|
| 130 |
Pipeline for text-to-image generation using Stable Diffusion.
|
|
@@ -399,7 +407,7 @@ class WebUIStableDiffusionPipeline(DiffusionPipeline):
|
|
| 399 |
callback: Optional[Callable[[int, int, paddle.Tensor], None]] = None,
|
| 400 |
callback_steps: Optional[int] = 1,
|
| 401 |
cross_attention_kwargs: Optional[Dict[str, Any]] = None,
|
| 402 |
-
clip_skip: int =
|
| 403 |
lora_dir: str = "./loras",
|
| 404 |
):
|
| 405 |
r"""
|
|
@@ -452,7 +460,9 @@ class WebUIStableDiffusionPipeline(DiffusionPipeline):
|
|
| 452 |
`self.processor` in
|
| 453 |
[diffusers.cross_attention](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/cross_attention.py).
|
| 454 |
clip_skip (`int`, *optional*, defaults to 0):
|
| 455 |
-
CLIP_stop_at_last_layers, if clip_skip
|
|
|
|
|
|
|
| 456 |
Examples:
|
| 457 |
|
| 458 |
Returns:
|
|
@@ -554,7 +564,9 @@ class WebUIStableDiffusionPipeline(DiffusionPipeline):
|
|
| 554 |
cross_attention_kwargs=cross_attention_kwargs,
|
| 555 |
).sample
|
| 556 |
noise_pred_uncond, noise_pred_text = noise_pred.chunk(2)
|
| 557 |
-
noise_pred = noise_pred_uncond + weight * guidance_scale * (
|
|
|
|
|
|
|
| 558 |
else:
|
| 559 |
noise_pred = self.unet(
|
| 560 |
latent_model_input,
|
|
@@ -616,6 +628,7 @@ class WebUIStableDiffusionPipeline(DiffusionPipeline):
|
|
| 616 |
sub_layer.weight.copy_(sub_layer.backup_weights, True)
|
| 617 |
self.weights_has_changed = False
|
| 618 |
|
|
|
|
| 619 |
# clip.py
|
| 620 |
import math
|
| 621 |
from collections import namedtuple
|
|
|
|
| 17 |
# Here is the AGPL-3.0 license https://github.com/AUTOMATIC1111/stable-diffusion-webui/blob/master/LICENSE.txt
|
| 18 |
|
| 19 |
import inspect
|
| 20 |
+
from pathlib import Path
|
| 21 |
from typing import Any, Callable, Dict, List, Optional, Union
|
| 22 |
|
| 23 |
import paddle
|
|
|
|
| 31 |
StableDiffusionSafetyChecker,
|
| 32 |
)
|
| 33 |
from ppdiffusers.schedulers import KarrasDiffusionSchedulers
|
| 34 |
+
from ppdiffusers.utils import (
|
| 35 |
+
logging,
|
| 36 |
+
randn_tensor,
|
| 37 |
+
safetensors_load,
|
| 38 |
+
smart_load,
|
| 39 |
+
torch_load,
|
| 40 |
+
)
|
| 41 |
|
| 42 |
logger = logging.get_logger(__name__) # pylint: disable=invalid-name
|
| 43 |
|
| 44 |
+
|
| 45 |
@paddle.no_grad()
|
| 46 |
+
def load_lora(
|
| 47 |
+
pipeline,
|
| 48 |
+
state_dict: dict,
|
| 49 |
+
LORA_PREFIX_UNET: str = "lora_unet",
|
| 50 |
+
LORA_PREFIX_TEXT_ENCODER: str = "lora_te",
|
| 51 |
+
ratio: float = 1.0,
|
| 52 |
+
):
|
| 53 |
ratio = float(ratio)
|
| 54 |
visited = []
|
| 55 |
for key in state_dict:
|
|
|
|
| 57 |
continue
|
| 58 |
|
| 59 |
if "text" in key:
|
| 60 |
+
tmp_layer_infos = key.split(".")[0].split(LORA_PREFIX_TEXT_ENCODER + "_")[-1].split("_")
|
|
|
|
| 61 |
hf_to_ppnlp = {
|
| 62 |
"encoder": "transformer",
|
| 63 |
"fc1": "linear1",
|
|
|
|
| 65 |
}
|
| 66 |
layer_infos = []
|
| 67 |
for layer_info in tmp_layer_infos:
|
| 68 |
+
if layer_info == "mlp":
|
| 69 |
+
continue
|
| 70 |
layer_infos.append(hf_to_ppnlp.get(layer_info, layer_info))
|
| 71 |
curr_layer: paddle.nn.Linear = pipeline.text_encoder
|
| 72 |
else:
|
| 73 |
+
layer_infos = key.split(".")[0].split(LORA_PREFIX_UNET + "_")[-1].split("_")
|
|
|
|
| 74 |
curr_layer: paddle.nn.Linear = pipeline.unet
|
| 75 |
|
| 76 |
temp_name = layer_infos.pop(0)
|
|
|
|
| 89 |
else:
|
| 90 |
temp_name = layer_infos.pop(0)
|
| 91 |
|
| 92 |
+
triplet_keys = [key, key.replace("lora_down", "lora_up"), key.replace("lora_down.weight", "alpha")]
|
|
|
|
|
|
|
|
|
|
|
|
|
| 93 |
dtype: paddle.dtype = curr_layer.weight.dtype
|
| 94 |
+
weight_down: paddle.Tensor = state_dict[triplet_keys[0]].cast(dtype)
|
|
|
|
| 95 |
weight_up: paddle.Tensor = state_dict[triplet_keys[1]].cast(dtype)
|
| 96 |
rank: float = float(weight_down.shape[0])
|
| 97 |
if triplet_keys[2] in state_dict:
|
|
|
|
| 102 |
|
| 103 |
if not hasattr(curr_layer, "backup_weights"):
|
| 104 |
curr_layer.backup_weights = curr_layer.weight.clone()
|
| 105 |
+
|
| 106 |
if len(weight_down.shape) == 4:
|
| 107 |
if weight_down.shape[2:4] == [1, 1]:
|
| 108 |
# conv2d 1x1
|
| 109 |
curr_layer.weight.copy_(
|
| 110 |
+
curr_layer.weight
|
| 111 |
+
+ ratio
|
| 112 |
+
* paddle.matmul(weight_up.squeeze([-1, -2]), weight_down.squeeze([-1, -2])).unsqueeze([-1, -2])
|
| 113 |
+
* scale,
|
| 114 |
+
True,
|
| 115 |
+
)
|
| 116 |
else:
|
| 117 |
# conv2d 3x3
|
| 118 |
curr_layer.weight.copy_(
|
| 119 |
+
curr_layer.weight
|
| 120 |
+
+ ratio
|
| 121 |
+
* paddle.nn.functional.conv2d(weight_down.transpose([1, 0, 2, 3]), weight_up).transpose(
|
| 122 |
+
[1, 0, 2, 3]
|
| 123 |
+
)
|
| 124 |
+
* scale,
|
| 125 |
+
True,
|
| 126 |
+
)
|
| 127 |
else:
|
| 128 |
# linear
|
| 129 |
+
curr_layer.weight.copy_(curr_layer.weight + ratio * paddle.matmul(weight_up, weight_down).T * scale, True)
|
|
|
|
|
|
|
| 130 |
|
| 131 |
# update visited list
|
| 132 |
visited.extend(triplet_keys)
|
| 133 |
return pipeline
|
| 134 |
|
| 135 |
+
|
| 136 |
class WebUIStableDiffusionPipeline(DiffusionPipeline):
|
| 137 |
r"""
|
| 138 |
Pipeline for text-to-image generation using Stable Diffusion.
|
|
|
|
| 407 |
callback: Optional[Callable[[int, int, paddle.Tensor], None]] = None,
|
| 408 |
callback_steps: Optional[int] = 1,
|
| 409 |
cross_attention_kwargs: Optional[Dict[str, Any]] = None,
|
| 410 |
+
clip_skip: int = 1,
|
| 411 |
lora_dir: str = "./loras",
|
| 412 |
):
|
| 413 |
r"""
|
|
|
|
| 460 |
`self.processor` in
|
| 461 |
[diffusers.cross_attention](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/cross_attention.py).
|
| 462 |
clip_skip (`int`, *optional*, defaults to 0):
|
| 463 |
+
CLIP_stop_at_last_layers, if clip_skip <= 1, we will use the last_hidden_state from text_encoder.
|
| 464 |
+
lora_dir (`str`, *optional*):
|
| 465 |
+
Path to lora which we want to load.
|
| 466 |
Examples:
|
| 467 |
|
| 468 |
Returns:
|
|
|
|
| 564 |
cross_attention_kwargs=cross_attention_kwargs,
|
| 565 |
).sample
|
| 566 |
noise_pred_uncond, noise_pred_text = noise_pred.chunk(2)
|
| 567 |
+
noise_pred = noise_pred_uncond + weight * guidance_scale * (
|
| 568 |
+
noise_pred_text - noise_pred_uncond
|
| 569 |
+
)
|
| 570 |
else:
|
| 571 |
noise_pred = self.unet(
|
| 572 |
latent_model_input,
|
|
|
|
| 628 |
sub_layer.weight.copy_(sub_layer.backup_weights, True)
|
| 629 |
self.weights_has_changed = False
|
| 630 |
|
| 631 |
+
|
| 632 |
# clip.py
|
| 633 |
import math
|
| 634 |
from collections import namedtuple
|