Add files using upload-large-folder tool
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/activations/__init__.py +5 -0
- SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/activations/activation.py +40 -0
- SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/activations/elu.py +32 -0
- SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/activations/leaky_relu.py +67 -0
- SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/activations/prelu.py +99 -0
- SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/activations/relu.py +86 -0
- SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/activations/softmax.py +76 -0
- SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/attention/__init__.py +0 -0
- SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/attention/__pycache__/__init__.cpython-310.pyc +0 -0
- SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/attention/__pycache__/additive_attention.cpython-310.pyc +0 -0
- SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/attention/__pycache__/attention.cpython-310.pyc +0 -0
- SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/attention/__pycache__/grouped_query_attention.cpython-310.pyc +0 -0
- SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/attention/__pycache__/multi_head_attention.cpython-310.pyc +0 -0
- SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/attention/additive_attention.py +103 -0
- SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/attention/attention.py +330 -0
- SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/attention/grouped_query_attention.py +504 -0
- SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/attention/multi_head_attention.py +827 -0
- SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/__init__.py +0 -0
- SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/__pycache__/__init__.cpython-310.pyc +0 -0
- SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/__pycache__/base_conv.cpython-310.pyc +0 -0
- SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/__pycache__/base_conv_transpose.cpython-310.pyc +0 -0
- SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/__pycache__/base_depthwise_conv.cpython-310.pyc +0 -0
- SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/__pycache__/base_separable_conv.cpython-310.pyc +0 -0
- SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/__pycache__/conv1d.cpython-310.pyc +0 -0
- SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/__pycache__/conv1d_transpose.cpython-310.pyc +0 -0
- SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/__pycache__/conv2d.cpython-310.pyc +0 -0
- SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/__pycache__/conv2d_transpose.cpython-310.pyc +0 -0
- SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/__pycache__/conv3d.cpython-310.pyc +0 -0
- SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/__pycache__/conv3d_transpose.cpython-310.pyc +0 -0
- SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/__pycache__/depthwise_conv1d.cpython-310.pyc +0 -0
- SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/__pycache__/depthwise_conv2d.cpython-310.pyc +0 -0
- SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/__pycache__/separable_conv1d.cpython-310.pyc +0 -0
- SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/__pycache__/separable_conv2d.cpython-310.pyc +0 -0
- SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/base_conv.py +401 -0
- SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/base_conv_transpose.py +259 -0
- SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/base_depthwise_conv.py +274 -0
- SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/base_separable_conv.py +295 -0
- SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/conv1d.py +170 -0
- SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/conv1d_transpose.py +131 -0
- SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/conv2d.py +128 -0
- SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/conv2d_transpose.py +133 -0
- SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/conv3d.py +134 -0
- SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/conv3d_transpose.py +138 -0
- SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/depthwise_conv1d.py +137 -0
- SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/depthwise_conv2d.py +138 -0
- SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/separable_conv1d.py +143 -0
- SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/separable_conv2d.py +144 -0
- SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/core/__init__.py +0 -0
- SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/core/__pycache__/__init__.cpython-310.pyc +0 -0
- SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/core/__pycache__/dense.cpython-310.pyc +0 -0
SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/activations/__init__.py
ADDED
|
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from keras.src.layers.activations.elu import ELU
|
| 2 |
+
from keras.src.layers.activations.leaky_relu import LeakyReLU
|
| 3 |
+
from keras.src.layers.activations.prelu import PReLU
|
| 4 |
+
from keras.src.layers.activations.relu import ReLU
|
| 5 |
+
from keras.src.layers.activations.softmax import Softmax
|
SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/activations/activation.py
ADDED
|
@@ -0,0 +1,40 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from keras.src import activations
|
| 2 |
+
from keras.src.api_export import keras_export
|
| 3 |
+
from keras.src.layers.layer import Layer
|
| 4 |
+
|
| 5 |
+
|
| 6 |
+
@keras_export("keras.layers.Activation")
|
| 7 |
+
class Activation(Layer):
|
| 8 |
+
"""Applies an activation function to an output.
|
| 9 |
+
|
| 10 |
+
Args:
|
| 11 |
+
activation: Activation function. It could be a callable, or the name of
|
| 12 |
+
an activation from the `keras.activations` namespace.
|
| 13 |
+
**kwargs: Base layer keyword arguments, such as `name` and `dtype`.
|
| 14 |
+
|
| 15 |
+
Example:
|
| 16 |
+
|
| 17 |
+
>>> layer = keras.layers.Activation('relu')
|
| 18 |
+
>>> layer(np.array([-3.0, -1.0, 0.0, 2.0]))
|
| 19 |
+
[0.0, 0.0, 0.0, 2.0]
|
| 20 |
+
>>> layer = keras.layers.Activation(keras.activations.relu)
|
| 21 |
+
>>> layer(np.array([-3.0, -1.0, 0.0, 2.0]))
|
| 22 |
+
[0.0, 0.0, 0.0, 2.0]
|
| 23 |
+
"""
|
| 24 |
+
|
| 25 |
+
def __init__(self, activation, **kwargs):
|
| 26 |
+
super().__init__(**kwargs)
|
| 27 |
+
self.supports_masking = True
|
| 28 |
+
self.activation = activations.get(activation)
|
| 29 |
+
self.built = True
|
| 30 |
+
|
| 31 |
+
def call(self, inputs):
|
| 32 |
+
return self.activation(inputs)
|
| 33 |
+
|
| 34 |
+
def compute_output_shape(self, input_shape):
|
| 35 |
+
return input_shape
|
| 36 |
+
|
| 37 |
+
def get_config(self):
|
| 38 |
+
config = {"activation": activations.serialize(self.activation)}
|
| 39 |
+
base_config = super().get_config()
|
| 40 |
+
return {**base_config, **config}
|
SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/activations/elu.py
ADDED
|
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from keras.src import activations
|
| 2 |
+
from keras.src.api_export import keras_export
|
| 3 |
+
from keras.src.layers.layer import Layer
|
| 4 |
+
|
| 5 |
+
|
| 6 |
+
@keras_export("keras.layers.ELU")
|
| 7 |
+
class ELU(Layer):
|
| 8 |
+
"""Applies an Exponential Linear Unit function to an output.
|
| 9 |
+
|
| 10 |
+
Formula:
|
| 11 |
+
|
| 12 |
+
```
|
| 13 |
+
f(x) = alpha * (exp(x) - 1.) for x < 0
|
| 14 |
+
f(x) = x for x >= 0
|
| 15 |
+
```
|
| 16 |
+
|
| 17 |
+
Args:
|
| 18 |
+
alpha: float, slope of negative section. Defaults to `1.0`.
|
| 19 |
+
**kwargs: Base layer keyword arguments, such as `name` and `dtype`.
|
| 20 |
+
"""
|
| 21 |
+
|
| 22 |
+
def __init__(self, alpha=1.0, **kwargs):
|
| 23 |
+
super().__init__(**kwargs)
|
| 24 |
+
self.alpha = alpha
|
| 25 |
+
self.supports_masking = True
|
| 26 |
+
self.built = True
|
| 27 |
+
|
| 28 |
+
def call(self, inputs):
|
| 29 |
+
return activations.elu(inputs, alpha=self.alpha)
|
| 30 |
+
|
| 31 |
+
def compute_output_shape(self, input_shape):
|
| 32 |
+
return input_shape
|
SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/activations/leaky_relu.py
ADDED
|
@@ -0,0 +1,67 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import warnings
|
| 2 |
+
|
| 3 |
+
from keras.src import activations
|
| 4 |
+
from keras.src.api_export import keras_export
|
| 5 |
+
from keras.src.layers.layer import Layer
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
@keras_export("keras.layers.LeakyReLU")
|
| 9 |
+
class LeakyReLU(Layer):
|
| 10 |
+
"""Leaky version of a Rectified Linear Unit activation layer.
|
| 11 |
+
|
| 12 |
+
This layer allows a small gradient when the unit is not active.
|
| 13 |
+
|
| 14 |
+
Formula:
|
| 15 |
+
|
| 16 |
+
``` python
|
| 17 |
+
f(x) = alpha * x if x < 0
|
| 18 |
+
f(x) = x if x >= 0
|
| 19 |
+
```
|
| 20 |
+
|
| 21 |
+
Example:
|
| 22 |
+
|
| 23 |
+
``` python
|
| 24 |
+
leaky_relu_layer = LeakyReLU(negative_slope=0.5)
|
| 25 |
+
input = np.array([-10, -5, 0.0, 5, 10])
|
| 26 |
+
result = leaky_relu_layer(input)
|
| 27 |
+
# result = [-5. , -2.5, 0. , 5. , 10.]
|
| 28 |
+
```
|
| 29 |
+
|
| 30 |
+
Args:
|
| 31 |
+
negative_slope: Float >= 0.0. Negative slope coefficient.
|
| 32 |
+
Defaults to `0.3`.
|
| 33 |
+
**kwargs: Base layer keyword arguments, such as
|
| 34 |
+
`name` and `dtype`.
|
| 35 |
+
|
| 36 |
+
"""
|
| 37 |
+
|
| 38 |
+
def __init__(self, negative_slope=0.3, **kwargs):
|
| 39 |
+
if "alpha" in kwargs:
|
| 40 |
+
negative_slope = kwargs.pop("alpha")
|
| 41 |
+
warnings.warn(
|
| 42 |
+
"Argument `alpha` is deprecated. "
|
| 43 |
+
"Use `negative_slope` instead."
|
| 44 |
+
)
|
| 45 |
+
super().__init__(**kwargs)
|
| 46 |
+
if negative_slope is None or negative_slope < 0:
|
| 47 |
+
raise ValueError(
|
| 48 |
+
"The negative_slope value of a Leaky ReLU layer "
|
| 49 |
+
"cannot be None or negative value. Expected a float."
|
| 50 |
+
f" Received: negative_slope={negative_slope}"
|
| 51 |
+
)
|
| 52 |
+
self.negative_slope = negative_slope
|
| 53 |
+
self.supports_masking = True
|
| 54 |
+
self.built = True
|
| 55 |
+
|
| 56 |
+
def call(self, inputs):
|
| 57 |
+
return activations.leaky_relu(
|
| 58 |
+
inputs, negative_slope=self.negative_slope
|
| 59 |
+
)
|
| 60 |
+
|
| 61 |
+
def get_config(self):
|
| 62 |
+
config = super().get_config()
|
| 63 |
+
config.update({"negative_slope": self.negative_slope})
|
| 64 |
+
return config
|
| 65 |
+
|
| 66 |
+
def compute_output_shape(self, input_shape):
|
| 67 |
+
return input_shape
|
SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/activations/prelu.py
ADDED
|
@@ -0,0 +1,99 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from keras.src import activations
|
| 2 |
+
from keras.src import constraints
|
| 3 |
+
from keras.src import initializers
|
| 4 |
+
from keras.src import regularizers
|
| 5 |
+
from keras.src.api_export import keras_export
|
| 6 |
+
from keras.src.layers.input_spec import InputSpec
|
| 7 |
+
from keras.src.layers.layer import Layer
|
| 8 |
+
|
| 9 |
+
|
| 10 |
+
@keras_export("keras.layers.PReLU")
|
| 11 |
+
class PReLU(Layer):
|
| 12 |
+
"""Parametric Rectified Linear Unit activation layer.
|
| 13 |
+
|
| 14 |
+
Formula:
|
| 15 |
+
``` python
|
| 16 |
+
f(x) = alpha * x for x < 0
|
| 17 |
+
f(x) = x for x >= 0
|
| 18 |
+
```
|
| 19 |
+
where `alpha` is a learned array with the same shape as x.
|
| 20 |
+
|
| 21 |
+
Args:
|
| 22 |
+
alpha_initializer: Initializer function for the weights.
|
| 23 |
+
alpha_regularizer: Regularizer for the weights.
|
| 24 |
+
alpha_constraint: Constraint for the weights.
|
| 25 |
+
shared_axes: The axes along which to share learnable parameters for the
|
| 26 |
+
activation function. For example, if the incoming feature maps are
|
| 27 |
+
from a 2D convolution with output shape
|
| 28 |
+
`(batch, height, width, channels)`, and you wish to share parameters
|
| 29 |
+
across space so that each filter only has one set of parameters,
|
| 30 |
+
set `shared_axes=[1, 2]`.
|
| 31 |
+
**kwargs: Base layer keyword arguments, such as `name` and `dtype`.
|
| 32 |
+
"""
|
| 33 |
+
|
| 34 |
+
def __init__(
|
| 35 |
+
self,
|
| 36 |
+
alpha_initializer="Zeros",
|
| 37 |
+
alpha_regularizer=None,
|
| 38 |
+
alpha_constraint=None,
|
| 39 |
+
shared_axes=None,
|
| 40 |
+
**kwargs,
|
| 41 |
+
):
|
| 42 |
+
super().__init__(**kwargs)
|
| 43 |
+
self.supports_masking = True
|
| 44 |
+
self.alpha_initializer = initializers.get(alpha_initializer)
|
| 45 |
+
self.alpha_regularizer = regularizers.get(alpha_regularizer)
|
| 46 |
+
self.alpha_constraint = constraints.get(alpha_constraint)
|
| 47 |
+
if shared_axes is None:
|
| 48 |
+
self.shared_axes = None
|
| 49 |
+
elif not isinstance(shared_axes, (list, tuple)):
|
| 50 |
+
self.shared_axes = [shared_axes]
|
| 51 |
+
else:
|
| 52 |
+
self.shared_axes = list(shared_axes)
|
| 53 |
+
|
| 54 |
+
def build(self, input_shape):
|
| 55 |
+
param_shape = list(input_shape[1:])
|
| 56 |
+
if self.shared_axes is not None:
|
| 57 |
+
for i in self.shared_axes:
|
| 58 |
+
param_shape[i - 1] = 1
|
| 59 |
+
self.alpha = self.add_weight(
|
| 60 |
+
shape=param_shape,
|
| 61 |
+
name="alpha",
|
| 62 |
+
initializer=self.alpha_initializer,
|
| 63 |
+
regularizer=self.alpha_regularizer,
|
| 64 |
+
constraint=self.alpha_constraint,
|
| 65 |
+
)
|
| 66 |
+
# Set input spec
|
| 67 |
+
axes = {}
|
| 68 |
+
if self.shared_axes:
|
| 69 |
+
for i in range(1, len(input_shape)):
|
| 70 |
+
if i not in self.shared_axes:
|
| 71 |
+
axes[i] = input_shape[i]
|
| 72 |
+
self.input_spec = InputSpec(ndim=len(input_shape), axes=axes)
|
| 73 |
+
self.built = True
|
| 74 |
+
|
| 75 |
+
def call(self, inputs):
|
| 76 |
+
pos = activations.relu(inputs)
|
| 77 |
+
neg = -self.alpha * activations.relu(-inputs)
|
| 78 |
+
return pos + neg
|
| 79 |
+
|
| 80 |
+
def get_config(self):
|
| 81 |
+
config = super().get_config()
|
| 82 |
+
config.update(
|
| 83 |
+
{
|
| 84 |
+
"alpha_initializer": initializers.serialize(
|
| 85 |
+
self.alpha_initializer
|
| 86 |
+
),
|
| 87 |
+
"alpha_regularizer": regularizers.serialize(
|
| 88 |
+
self.alpha_regularizer
|
| 89 |
+
),
|
| 90 |
+
"alpha_constraint": constraints.serialize(
|
| 91 |
+
self.alpha_constraint
|
| 92 |
+
),
|
| 93 |
+
"shared_axes": self.shared_axes,
|
| 94 |
+
}
|
| 95 |
+
)
|
| 96 |
+
return config
|
| 97 |
+
|
| 98 |
+
def compute_output_shape(self, input_shape):
|
| 99 |
+
return input_shape
|
SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/activations/relu.py
ADDED
|
@@ -0,0 +1,86 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from keras.src import activations
|
| 2 |
+
from keras.src.api_export import keras_export
|
| 3 |
+
from keras.src.layers.layer import Layer
|
| 4 |
+
|
| 5 |
+
|
| 6 |
+
@keras_export("keras.layers.ReLU")
|
| 7 |
+
class ReLU(Layer):
|
| 8 |
+
"""Rectified Linear Unit activation function layer.
|
| 9 |
+
|
| 10 |
+
Formula:
|
| 11 |
+
``` python
|
| 12 |
+
f(x) = max(x,0)
|
| 13 |
+
f(x) = max_value if x >= max_value
|
| 14 |
+
f(x) = x if threshold <= x < max_value
|
| 15 |
+
f(x) = negative_slope * (x - threshold) otherwise
|
| 16 |
+
```
|
| 17 |
+
|
| 18 |
+
Example:
|
| 19 |
+
``` python
|
| 20 |
+
relu_layer = keras.layers.ReLU(
|
| 21 |
+
max_value=10,
|
| 22 |
+
negative_slope=0.5,
|
| 23 |
+
threshold=0,
|
| 24 |
+
)
|
| 25 |
+
input = np.array([-10, -5, 0.0, 5, 10])
|
| 26 |
+
result = relu_layer(input)
|
| 27 |
+
# result = [-5. , -2.5, 0. , 5. , 10.]
|
| 28 |
+
```
|
| 29 |
+
|
| 30 |
+
Args:
|
| 31 |
+
max_value: Float >= 0. Maximum activation value. None means unlimited.
|
| 32 |
+
Defaults to `None`.
|
| 33 |
+
negative_slope: Float >= 0. Negative slope coefficient.
|
| 34 |
+
Defaults to `0.0`.
|
| 35 |
+
threshold: Float >= 0. Threshold value for thresholded activation.
|
| 36 |
+
Defaults to `0.0`.
|
| 37 |
+
**kwargs: Base layer keyword arguments, such as `name` and `dtype`.
|
| 38 |
+
"""
|
| 39 |
+
|
| 40 |
+
def __init__(
|
| 41 |
+
self, max_value=None, negative_slope=0.0, threshold=0.0, **kwargs
|
| 42 |
+
):
|
| 43 |
+
super().__init__(**kwargs)
|
| 44 |
+
if max_value is not None and max_value < 0.0:
|
| 45 |
+
raise ValueError(
|
| 46 |
+
"max_value of a ReLU layer cannot be a negative "
|
| 47 |
+
f"value. Received: max_value={max_value}"
|
| 48 |
+
)
|
| 49 |
+
if negative_slope is None or negative_slope < 0.0:
|
| 50 |
+
raise ValueError(
|
| 51 |
+
"negative_slope of a ReLU layer cannot be a negative "
|
| 52 |
+
f"value. Received: negative_slope={negative_slope}"
|
| 53 |
+
)
|
| 54 |
+
if threshold is None or threshold < 0.0:
|
| 55 |
+
raise ValueError(
|
| 56 |
+
"threshold of a ReLU layer cannot be a negative "
|
| 57 |
+
f"value. Received: threshold={threshold}"
|
| 58 |
+
)
|
| 59 |
+
|
| 60 |
+
self.max_value = max_value
|
| 61 |
+
self.negative_slope = negative_slope
|
| 62 |
+
self.threshold = threshold
|
| 63 |
+
self.supports_masking = True
|
| 64 |
+
self.built = True
|
| 65 |
+
|
| 66 |
+
def call(self, inputs):
|
| 67 |
+
return activations.relu(
|
| 68 |
+
inputs,
|
| 69 |
+
negative_slope=self.negative_slope,
|
| 70 |
+
max_value=self.max_value,
|
| 71 |
+
threshold=self.threshold,
|
| 72 |
+
)
|
| 73 |
+
|
| 74 |
+
def get_config(self):
|
| 75 |
+
config = super().get_config()
|
| 76 |
+
config.update(
|
| 77 |
+
{
|
| 78 |
+
"max_value": self.max_value,
|
| 79 |
+
"negative_slope": self.negative_slope,
|
| 80 |
+
"threshold": self.threshold,
|
| 81 |
+
}
|
| 82 |
+
)
|
| 83 |
+
return config
|
| 84 |
+
|
| 85 |
+
def compute_output_shape(self, input_shape):
|
| 86 |
+
return input_shape
|
SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/activations/softmax.py
ADDED
|
@@ -0,0 +1,76 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from keras.src import activations
|
| 2 |
+
from keras.src import backend
|
| 3 |
+
from keras.src.api_export import keras_export
|
| 4 |
+
from keras.src.layers.layer import Layer
|
| 5 |
+
|
| 6 |
+
|
| 7 |
+
def _large_negative_number(dtype):
|
| 8 |
+
"""Return a Large negative number based on dtype."""
|
| 9 |
+
if backend.standardize_dtype(dtype) == "float16":
|
| 10 |
+
return -3e4
|
| 11 |
+
return -1e9
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
@keras_export("keras.layers.Softmax")
|
| 15 |
+
class Softmax(Layer):
|
| 16 |
+
"""Softmax activation layer.
|
| 17 |
+
|
| 18 |
+
Formula:
|
| 19 |
+
``` python
|
| 20 |
+
exp_x = exp(x - max(x))
|
| 21 |
+
f(x) = exp_x / sum(exp_x)
|
| 22 |
+
```
|
| 23 |
+
|
| 24 |
+
Example:
|
| 25 |
+
>>> softmax_layer = keras.layers.Softmax()
|
| 26 |
+
>>> input = np.array([1.0, 2.0, 1.0])
|
| 27 |
+
>>> result = softmax_layer(input)
|
| 28 |
+
>>> result
|
| 29 |
+
[0.21194157, 0.5761169, 0.21194157]
|
| 30 |
+
|
| 31 |
+
|
| 32 |
+
Args:
|
| 33 |
+
axis: Integer, or list of Integers, axis along which the softmax
|
| 34 |
+
normalization is applied.
|
| 35 |
+
**kwargs: Base layer keyword arguments, such as `name` and `dtype`.
|
| 36 |
+
|
| 37 |
+
Call arguments:
|
| 38 |
+
inputs: The inputs (logits) to the softmax layer.
|
| 39 |
+
mask: A boolean mask of the same shape as `inputs`. The mask
|
| 40 |
+
specifies 1 to keep and 0 to mask. Defaults to `None`.
|
| 41 |
+
|
| 42 |
+
Returns:
|
| 43 |
+
Softmaxed output with the same shape as `inputs`.
|
| 44 |
+
"""
|
| 45 |
+
|
| 46 |
+
def __init__(self, axis=-1, **kwargs):
|
| 47 |
+
super().__init__(**kwargs)
|
| 48 |
+
self.axis = axis
|
| 49 |
+
self.supports_masking = True
|
| 50 |
+
self.built = True
|
| 51 |
+
|
| 52 |
+
def call(self, inputs, mask=None):
|
| 53 |
+
if mask is not None:
|
| 54 |
+
adder = (
|
| 55 |
+
1.0 - backend.cast(mask, inputs.dtype)
|
| 56 |
+
) * _large_negative_number(inputs.dtype)
|
| 57 |
+
inputs += adder
|
| 58 |
+
if isinstance(self.axis, (tuple, list)):
|
| 59 |
+
if len(self.axis) > 1:
|
| 60 |
+
return backend.numpy.exp(
|
| 61 |
+
inputs
|
| 62 |
+
- backend.math.logsumexp(
|
| 63 |
+
inputs, axis=self.axis, keepdims=True
|
| 64 |
+
)
|
| 65 |
+
)
|
| 66 |
+
else:
|
| 67 |
+
return activations.softmax(inputs, axis=self.axis[0])
|
| 68 |
+
return activations.softmax(inputs, axis=self.axis)
|
| 69 |
+
|
| 70 |
+
def get_config(self):
|
| 71 |
+
config = super().get_config()
|
| 72 |
+
config.update({"axis": self.axis})
|
| 73 |
+
return config
|
| 74 |
+
|
| 75 |
+
def compute_output_shape(self, input_shape):
|
| 76 |
+
return input_shape
|
SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/attention/__init__.py
ADDED
|
File without changes
|
SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/attention/__pycache__/__init__.cpython-310.pyc
ADDED
|
Binary file (202 Bytes). View file
|
|
|
SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/attention/__pycache__/additive_attention.cpython-310.pyc
ADDED
|
Binary file (4.68 kB). View file
|
|
|
SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/attention/__pycache__/attention.cpython-310.pyc
ADDED
|
Binary file (10.5 kB). View file
|
|
|
SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/attention/__pycache__/grouped_query_attention.cpython-310.pyc
ADDED
|
Binary file (14 kB). View file
|
|
|
SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/attention/__pycache__/multi_head_attention.cpython-310.pyc
ADDED
|
Binary file (22.5 kB). View file
|
|
|
SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/attention/additive_attention.py
ADDED
|
@@ -0,0 +1,103 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from keras.src import ops
|
| 2 |
+
from keras.src.api_export import keras_export
|
| 3 |
+
from keras.src.layers.attention.attention import Attention
|
| 4 |
+
|
| 5 |
+
|
| 6 |
+
@keras_export("keras.layers.AdditiveAttention")
|
| 7 |
+
class AdditiveAttention(Attention):
|
| 8 |
+
"""Additive attention layer, a.k.a. Bahdanau-style attention.
|
| 9 |
+
|
| 10 |
+
Inputs are a list with 2 or 3 elements:
|
| 11 |
+
1. A `query` tensor of shape `(batch_size, Tq, dim)`.
|
| 12 |
+
2. A `value` tensor of shape `(batch_size, Tv, dim)`.
|
| 13 |
+
3. A optional `key` tensor of shape `(batch_size, Tv, dim)`. If none
|
| 14 |
+
supplied, `value` will be used as `key`.
|
| 15 |
+
|
| 16 |
+
The calculation follows the steps:
|
| 17 |
+
1. Calculate attention scores using `query` and `key` with shape
|
| 18 |
+
`(batch_size, Tq, Tv)` as a non-linear sum
|
| 19 |
+
`scores = reduce_sum(tanh(query + key), axis=-1)`.
|
| 20 |
+
2. Use scores to calculate a softmax distribution with shape
|
| 21 |
+
`(batch_size, Tq, Tv)`.
|
| 22 |
+
3. Use the softmax distribution to create a linear combination of `value`
|
| 23 |
+
with shape `(batch_size, Tq, dim)`.
|
| 24 |
+
|
| 25 |
+
Args:
|
| 26 |
+
use_scale: If `True`, will create a scalar variable to scale the
|
| 27 |
+
attention scores.
|
| 28 |
+
dropout: Float between 0 and 1. Fraction of the units to drop for the
|
| 29 |
+
attention scores. Defaults to `0.0`.
|
| 30 |
+
|
| 31 |
+
Call arguments:
|
| 32 |
+
inputs: List of the following tensors:
|
| 33 |
+
- `query`: Query tensor of shape `(batch_size, Tq, dim)`.
|
| 34 |
+
- `value`: Value tensor of shape `(batch_size, Tv, dim)`.
|
| 35 |
+
- `key`: Optional key tensor of shape `(batch_size, Tv, dim)`. If
|
| 36 |
+
not given, will use `value` for both `key` and `value`, which is
|
| 37 |
+
the most common case.
|
| 38 |
+
mask: List of the following tensors:
|
| 39 |
+
- `query_mask`: A boolean mask tensor of shape `(batch_size, Tq)`.
|
| 40 |
+
If given, the output will be zero at the positions where
|
| 41 |
+
`mask==False`.
|
| 42 |
+
- `value_mask`: A boolean mask tensor of shape `(batch_size, Tv)`.
|
| 43 |
+
If given, will apply the mask such that values at positions
|
| 44 |
+
where `mask==False` do not contribute to the result.
|
| 45 |
+
return_attention_scores: bool, it `True`, returns the attention scores
|
| 46 |
+
(after masking and softmax) as an additional output argument.
|
| 47 |
+
training: Python boolean indicating whether the layer should behave in
|
| 48 |
+
training mode (adding dropout) or in inference mode (no dropout).
|
| 49 |
+
use_causal_mask: Boolean. Set to `True` for decoder self-attention. Adds
|
| 50 |
+
a mask such that position `i` cannot attend to positions `j > i`.
|
| 51 |
+
This prevents the flow of information from the future towards the
|
| 52 |
+
past. Defaults to `False`.
|
| 53 |
+
|
| 54 |
+
Output:
|
| 55 |
+
Attention outputs of shape `(batch_size, Tq, dim)`.
|
| 56 |
+
(Optional) Attention scores after masking and softmax with shape
|
| 57 |
+
`(batch_size, Tq, Tv)`.
|
| 58 |
+
"""
|
| 59 |
+
|
| 60 |
+
def __init__(
|
| 61 |
+
self,
|
| 62 |
+
use_scale=True,
|
| 63 |
+
dropout=0.0,
|
| 64 |
+
**kwargs,
|
| 65 |
+
):
|
| 66 |
+
super().__init__(use_scale=use_scale, dropout=dropout, **kwargs)
|
| 67 |
+
|
| 68 |
+
def build(self, input_shape):
|
| 69 |
+
self._validate_inputs(input_shape)
|
| 70 |
+
dim = input_shape[0][-1]
|
| 71 |
+
self.scale = None
|
| 72 |
+
if self.use_scale:
|
| 73 |
+
self.scale = self.add_weight(
|
| 74 |
+
name="scale",
|
| 75 |
+
shape=[dim],
|
| 76 |
+
initializer="glorot_uniform",
|
| 77 |
+
dtype=self.dtype,
|
| 78 |
+
trainable=True,
|
| 79 |
+
)
|
| 80 |
+
self.built = True
|
| 81 |
+
|
| 82 |
+
def _calculate_scores(self, query, key):
|
| 83 |
+
"""Calculates attention scores as a nonlinear sum of query and key.
|
| 84 |
+
|
| 85 |
+
Args:
|
| 86 |
+
query: Query tensor of shape `(batch_size, Tq, dim)`.
|
| 87 |
+
key: Key tensor of shape `(batch_size, Tv, dim)`.
|
| 88 |
+
|
| 89 |
+
Returns:
|
| 90 |
+
Tensor of shape `(batch_size, Tq, Tv)`.
|
| 91 |
+
"""
|
| 92 |
+
# Reshape tensors to enable broadcasting.
|
| 93 |
+
# Reshape into [batch_size, Tq, 1, dim].
|
| 94 |
+
q_reshaped = ops.expand_dims(query, axis=-2)
|
| 95 |
+
# Reshape into [batch_size, 1, Tv, dim].
|
| 96 |
+
k_reshaped = ops.expand_dims(key, axis=-3)
|
| 97 |
+
scale = self.scale if self.use_scale else 1.0
|
| 98 |
+
return ops.sum(scale * ops.tanh(q_reshaped + k_reshaped), axis=-1)
|
| 99 |
+
|
| 100 |
+
def get_config(self):
|
| 101 |
+
base_config = super().get_config()
|
| 102 |
+
del base_config["score_mode"]
|
| 103 |
+
return base_config
|
SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/attention/attention.py
ADDED
|
@@ -0,0 +1,330 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from keras.src import backend
|
| 2 |
+
from keras.src import ops
|
| 3 |
+
from keras.src.api_export import keras_export
|
| 4 |
+
from keras.src.backend import KerasTensor
|
| 5 |
+
from keras.src.layers.layer import Layer
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
@keras_export("keras.layers.Attention")
|
| 9 |
+
class Attention(Layer):
|
| 10 |
+
"""Dot-product attention layer, a.k.a. Luong-style attention.
|
| 11 |
+
|
| 12 |
+
Inputs are a list with 2 or 3 elements:
|
| 13 |
+
1. A `query` tensor of shape `(batch_size, Tq, dim)`.
|
| 14 |
+
2. A `value` tensor of shape `(batch_size, Tv, dim)`.
|
| 15 |
+
3. A optional `key` tensor of shape `(batch_size, Tv, dim)`. If none
|
| 16 |
+
supplied, `value` will be used as a `key`.
|
| 17 |
+
|
| 18 |
+
The calculation follows the steps:
|
| 19 |
+
1. Calculate attention scores using `query` and `key` with shape
|
| 20 |
+
`(batch_size, Tq, Tv)`.
|
| 21 |
+
2. Use scores to calculate a softmax distribution with shape
|
| 22 |
+
`(batch_size, Tq, Tv)`.
|
| 23 |
+
3. Use the softmax distribution to create a linear combination of `value`
|
| 24 |
+
with shape `(batch_size, Tq, dim)`.
|
| 25 |
+
|
| 26 |
+
Args:
|
| 27 |
+
use_scale: If `True`, will create a scalar variable to scale the
|
| 28 |
+
attention scores.
|
| 29 |
+
dropout: Float between 0 and 1. Fraction of the units to drop for the
|
| 30 |
+
attention scores. Defaults to `0.0`.
|
| 31 |
+
seed: A Python integer to use as random seed in case of `dropout`.
|
| 32 |
+
score_mode: Function to use to compute attention scores, one of
|
| 33 |
+
`{"dot", "concat"}`. `"dot"` refers to the dot product between the
|
| 34 |
+
query and key vectors. `"concat"` refers to the hyperbolic tangent
|
| 35 |
+
of the concatenation of the `query` and `key` vectors.
|
| 36 |
+
|
| 37 |
+
Call arguments:
|
| 38 |
+
inputs: List of the following tensors:
|
| 39 |
+
- `query`: Query tensor of shape `(batch_size, Tq, dim)`.
|
| 40 |
+
- `value`: Value tensor of shape `(batch_size, Tv, dim)`.
|
| 41 |
+
- `key`: Optional key tensor of shape `(batch_size, Tv, dim)`. If
|
| 42 |
+
not given, will use `value` for both `key` and `value`, which is
|
| 43 |
+
the most common case.
|
| 44 |
+
mask: List of the following tensors:
|
| 45 |
+
- `query_mask`: A boolean mask tensor of shape `(batch_size, Tq)`.
|
| 46 |
+
If given, the output will be zero at the positions where
|
| 47 |
+
`mask==False`.
|
| 48 |
+
- `value_mask`: A boolean mask tensor of shape `(batch_size, Tv)`.
|
| 49 |
+
If given, will apply the mask such that values at positions
|
| 50 |
+
where `mask==False` do not contribute to the result.
|
| 51 |
+
return_attention_scores: bool, it `True`, returns the attention scores
|
| 52 |
+
(after masking and softmax) as an additional output argument.
|
| 53 |
+
training: Python boolean indicating whether the layer should behave in
|
| 54 |
+
training mode (adding dropout) or in inference mode (no dropout).
|
| 55 |
+
use_causal_mask: Boolean. Set to `True` for decoder self-attention. Adds
|
| 56 |
+
a mask such that position `i` cannot attend to positions `j > i`.
|
| 57 |
+
This prevents the flow of information from the future towards the
|
| 58 |
+
past. Defaults to `False`.
|
| 59 |
+
|
| 60 |
+
Output:
|
| 61 |
+
Attention outputs of shape `(batch_size, Tq, dim)`.
|
| 62 |
+
(Optional) Attention scores after masking and softmax with shape
|
| 63 |
+
`(batch_size, Tq, Tv)`.
|
| 64 |
+
"""
|
| 65 |
+
|
| 66 |
+
def __init__(
|
| 67 |
+
self,
|
| 68 |
+
use_scale=False,
|
| 69 |
+
score_mode="dot",
|
| 70 |
+
dropout=0.0,
|
| 71 |
+
seed=None,
|
| 72 |
+
**kwargs,
|
| 73 |
+
):
|
| 74 |
+
super().__init__(**kwargs)
|
| 75 |
+
self.use_scale = use_scale
|
| 76 |
+
self.score_mode = score_mode
|
| 77 |
+
self.dropout = dropout
|
| 78 |
+
if self.dropout > 0:
|
| 79 |
+
self.seed_generator = backend.random.SeedGenerator(seed=seed)
|
| 80 |
+
|
| 81 |
+
if self.score_mode not in ["dot", "concat"]:
|
| 82 |
+
raise ValueError(
|
| 83 |
+
"Invalid value for argument score_mode. "
|
| 84 |
+
"Expected one of {'dot', 'concat'}. "
|
| 85 |
+
f"Received: score_mode={score_mode}"
|
| 86 |
+
)
|
| 87 |
+
|
| 88 |
+
self._return_attention_scores = False
|
| 89 |
+
|
| 90 |
+
def build(self, input_shape):
|
| 91 |
+
self._validate_inputs(input_shape)
|
| 92 |
+
self.scale = None
|
| 93 |
+
self.concat_score_weight = None
|
| 94 |
+
if self.use_scale:
|
| 95 |
+
self.scale = self.add_weight(
|
| 96 |
+
name="scale",
|
| 97 |
+
shape=(),
|
| 98 |
+
initializer="ones",
|
| 99 |
+
dtype=self.dtype,
|
| 100 |
+
trainable=True,
|
| 101 |
+
)
|
| 102 |
+
if self.score_mode == "concat":
|
| 103 |
+
self.concat_score_weight = self.add_weight(
|
| 104 |
+
name="concat_score_weight",
|
| 105 |
+
shape=(),
|
| 106 |
+
initializer="ones",
|
| 107 |
+
dtype=self.dtype,
|
| 108 |
+
trainable=True,
|
| 109 |
+
)
|
| 110 |
+
self.built = True
|
| 111 |
+
|
| 112 |
+
def _calculate_scores(self, query, key):
|
| 113 |
+
"""Calculates attention scores as a query-key dot product.
|
| 114 |
+
|
| 115 |
+
Args:
|
| 116 |
+
query: Query tensor of shape `(batch_size, Tq, dim)`.
|
| 117 |
+
key: Key tensor of shape `(batch_size, Tv, dim)`.
|
| 118 |
+
|
| 119 |
+
Returns:
|
| 120 |
+
Tensor of shape `(batch_size, Tq, Tv)`.
|
| 121 |
+
"""
|
| 122 |
+
if self.score_mode == "dot":
|
| 123 |
+
scores = ops.matmul(query, ops.transpose(key, axes=[0, 2, 1]))
|
| 124 |
+
if self.scale is not None:
|
| 125 |
+
scores *= self.scale
|
| 126 |
+
elif self.score_mode == "concat":
|
| 127 |
+
# Reshape tensors to enable broadcasting.
|
| 128 |
+
# Reshape into [batch_size, Tq, 1, dim].
|
| 129 |
+
q_reshaped = ops.expand_dims(query, axis=-2)
|
| 130 |
+
# Reshape into [batch_size, 1, Tv, dim].
|
| 131 |
+
k_reshaped = ops.expand_dims(key, axis=-3)
|
| 132 |
+
if self.scale is not None:
|
| 133 |
+
scores = self.concat_score_weight * ops.sum(
|
| 134 |
+
ops.tanh(self.scale * (q_reshaped + k_reshaped)), axis=-1
|
| 135 |
+
)
|
| 136 |
+
else:
|
| 137 |
+
scores = self.concat_score_weight * ops.sum(
|
| 138 |
+
ops.tanh(q_reshaped + k_reshaped), axis=-1
|
| 139 |
+
)
|
| 140 |
+
else:
|
| 141 |
+
raise ValueError("scores not computed")
|
| 142 |
+
|
| 143 |
+
return scores
|
| 144 |
+
|
| 145 |
+
def _apply_scores(self, scores, value, scores_mask=None, training=False):
|
| 146 |
+
"""Applies attention scores to the given value tensor.
|
| 147 |
+
|
| 148 |
+
To use this method in your attention layer, follow the steps:
|
| 149 |
+
|
| 150 |
+
* Use `query` tensor of shape `(batch_size, Tq)` and `key` tensor of
|
| 151 |
+
shape `(batch_size, Tv)` to calculate the attention `scores`.
|
| 152 |
+
* Pass `scores` and `value` tensors to this method. The method applies
|
| 153 |
+
`scores_mask`, calculates
|
| 154 |
+
`attention_distribution = softmax(scores)`, then returns
|
| 155 |
+
`matmul(attention_distribution, value).
|
| 156 |
+
* Apply `query_mask` and return the result.
|
| 157 |
+
|
| 158 |
+
Args:
|
| 159 |
+
scores: Scores float tensor of shape `(batch_size, Tq, Tv)`.
|
| 160 |
+
value: Value tensor of shape `(batch_size, Tv, dim)`.
|
| 161 |
+
scores_mask: A boolean mask tensor of shape `(batch_size, 1, Tv)`
|
| 162 |
+
or `(batch_size, Tq, Tv)`. If given, scores at positions where
|
| 163 |
+
`scores_mask==False` do not contribute to the result. It must
|
| 164 |
+
contain at least one `True` value in each line along the last
|
| 165 |
+
dimension.
|
| 166 |
+
training: Python boolean indicating whether the layer should behave
|
| 167 |
+
in training mode (adding dropout) or in inference mode
|
| 168 |
+
(no dropout).
|
| 169 |
+
|
| 170 |
+
Returns:
|
| 171 |
+
Tensor of shape `(batch_size, Tq, dim)`.
|
| 172 |
+
Attention scores after masking and softmax with shape
|
| 173 |
+
`(batch_size, Tq, Tv)`.
|
| 174 |
+
"""
|
| 175 |
+
if scores_mask is not None:
|
| 176 |
+
padding_mask = ops.logical_not(scores_mask)
|
| 177 |
+
# Bias so padding positions do not contribute to attention
|
| 178 |
+
# distribution. Note 65504. is the max float16 value.
|
| 179 |
+
max_value = 65504.0 if scores.dtype == "float16" else 1.0e9
|
| 180 |
+
scores -= max_value * ops.cast(padding_mask, dtype=scores.dtype)
|
| 181 |
+
|
| 182 |
+
weights = ops.softmax(scores, axis=-1)
|
| 183 |
+
if training and self.dropout > 0:
|
| 184 |
+
weights = backend.random.dropout(
|
| 185 |
+
weights,
|
| 186 |
+
self.dropout,
|
| 187 |
+
seed=self.seed_generator,
|
| 188 |
+
)
|
| 189 |
+
return ops.matmul(weights, value), weights
|
| 190 |
+
|
| 191 |
+
def _calculate_score_mask(self, scores, v_mask, use_causal_mask):
|
| 192 |
+
if use_causal_mask:
|
| 193 |
+
# Creates a lower triangular mask, so position i cannot attend to
|
| 194 |
+
# positions j > i. This prevents the flow of information from the
|
| 195 |
+
# future into the past.
|
| 196 |
+
score_shape = ops.shape(scores)
|
| 197 |
+
# causal_mask_shape = [1, Tq, Tv].
|
| 198 |
+
mask_shape = (1, score_shape[-2], score_shape[-1])
|
| 199 |
+
ones_mask = ops.ones(shape=mask_shape, dtype="int32")
|
| 200 |
+
row_index = ops.cumsum(ones_mask, axis=-2)
|
| 201 |
+
col_index = ops.cumsum(ones_mask, axis=-1)
|
| 202 |
+
causal_mask = ops.greater_equal(row_index, col_index)
|
| 203 |
+
|
| 204 |
+
if v_mask is not None:
|
| 205 |
+
# Mask of shape [batch_size, 1, Tv].
|
| 206 |
+
v_mask = ops.expand_dims(v_mask, axis=-2)
|
| 207 |
+
return ops.logical_and(v_mask, causal_mask)
|
| 208 |
+
return causal_mask
|
| 209 |
+
else:
|
| 210 |
+
# If not using causal mask, return the value mask as is,
|
| 211 |
+
# or None if the value mask is not provided.
|
| 212 |
+
return v_mask
|
| 213 |
+
|
| 214 |
+
def call(
|
| 215 |
+
self,
|
| 216 |
+
inputs,
|
| 217 |
+
mask=None,
|
| 218 |
+
training=False,
|
| 219 |
+
return_attention_scores=False,
|
| 220 |
+
use_causal_mask=False,
|
| 221 |
+
):
|
| 222 |
+
self._validate_inputs(inputs=inputs, mask=mask)
|
| 223 |
+
self._return_attention_scores = return_attention_scores
|
| 224 |
+
q = inputs[0]
|
| 225 |
+
v = inputs[1]
|
| 226 |
+
k = inputs[2] if len(inputs) > 2 else v
|
| 227 |
+
q_mask = mask[0] if mask else None
|
| 228 |
+
v_mask = mask[1] if mask else None
|
| 229 |
+
scores = self._calculate_scores(query=q, key=k)
|
| 230 |
+
scores_mask = self._calculate_score_mask(
|
| 231 |
+
scores, v_mask, use_causal_mask
|
| 232 |
+
)
|
| 233 |
+
attention_output, attention_scores = self._apply_scores(
|
| 234 |
+
scores=scores, value=v, scores_mask=scores_mask, training=training
|
| 235 |
+
)
|
| 236 |
+
if q_mask is not None:
|
| 237 |
+
# Mask of shape [batch_size, Tq, 1].
|
| 238 |
+
q_mask = ops.expand_dims(q_mask, axis=-1)
|
| 239 |
+
attention_output *= ops.cast(q_mask, dtype=attention_output.dtype)
|
| 240 |
+
if return_attention_scores:
|
| 241 |
+
return (attention_output, attention_scores)
|
| 242 |
+
else:
|
| 243 |
+
return attention_output
|
| 244 |
+
|
| 245 |
+
def compute_mask(self, inputs, mask=None):
|
| 246 |
+
self._validate_inputs(inputs=inputs, mask=mask)
|
| 247 |
+
if mask is None or mask[0] is None:
|
| 248 |
+
return None
|
| 249 |
+
return ops.convert_to_tensor(mask[0])
|
| 250 |
+
|
| 251 |
+
def compute_output_shape(self, input_shape):
|
| 252 |
+
query_shape, value_shape, key_shape = input_shape
|
| 253 |
+
if key_shape is None:
|
| 254 |
+
key_shape = value_shape
|
| 255 |
+
|
| 256 |
+
output_shape = (*query_shape[:-1], value_shape[-1])
|
| 257 |
+
if self._return_attention_scores:
|
| 258 |
+
scores_shape = (query_shape[0], query_shape[1], key_shape[1])
|
| 259 |
+
return output_shape, scores_shape
|
| 260 |
+
return output_shape
|
| 261 |
+
|
| 262 |
+
def compute_output_spec(
|
| 263 |
+
self,
|
| 264 |
+
inputs,
|
| 265 |
+
mask=None,
|
| 266 |
+
return_attention_scores=False,
|
| 267 |
+
training=None,
|
| 268 |
+
use_causal_mask=False,
|
| 269 |
+
):
|
| 270 |
+
# Validate and unpack inputs
|
| 271 |
+
self._validate_inputs(inputs, mask)
|
| 272 |
+
query = inputs[0]
|
| 273 |
+
value = inputs[1]
|
| 274 |
+
key = inputs[2] if len(inputs) > 2 else value
|
| 275 |
+
|
| 276 |
+
# Compute primary output shape
|
| 277 |
+
output_shape = self.compute_output_shape(
|
| 278 |
+
[query.shape, value.shape, key.shape]
|
| 279 |
+
)
|
| 280 |
+
output_spec = KerasTensor(output_shape, dtype=self.compute_dtype)
|
| 281 |
+
|
| 282 |
+
# Handle attention scores if requested
|
| 283 |
+
if self._return_attention_scores or return_attention_scores:
|
| 284 |
+
scores_shape = (
|
| 285 |
+
query.shape[0],
|
| 286 |
+
query.shape[1],
|
| 287 |
+
key.shape[1],
|
| 288 |
+
) # (batch_size, Tq, Tv)
|
| 289 |
+
attention_scores_spec = KerasTensor(
|
| 290 |
+
scores_shape, dtype=self.compute_dtype
|
| 291 |
+
)
|
| 292 |
+
return (output_spec, attention_scores_spec)
|
| 293 |
+
|
| 294 |
+
return output_spec
|
| 295 |
+
|
| 296 |
+
def _validate_inputs(self, inputs, mask=None):
|
| 297 |
+
"""Validates arguments of the call method."""
|
| 298 |
+
class_name = self.__class__.__name__
|
| 299 |
+
if not isinstance(inputs, list):
|
| 300 |
+
raise ValueError(
|
| 301 |
+
f"{class_name} layer must be called on a list of inputs, "
|
| 302 |
+
"namely [query, value] or [query, value, key]. "
|
| 303 |
+
f"Received: inputs={inputs}."
|
| 304 |
+
)
|
| 305 |
+
if len(inputs) < 2 or len(inputs) > 3:
|
| 306 |
+
raise ValueError(
|
| 307 |
+
f"{class_name} layer accepts inputs list of length 2 or 3, "
|
| 308 |
+
"namely [query, value] or [query, value, key]. "
|
| 309 |
+
f"Received length: {len(inputs)}."
|
| 310 |
+
)
|
| 311 |
+
if mask is not None:
|
| 312 |
+
if not isinstance(mask, list):
|
| 313 |
+
raise ValueError(
|
| 314 |
+
f"{class_name} layer mask must be a list, "
|
| 315 |
+
f"namely [query_mask, value_mask]. Received: mask={mask}."
|
| 316 |
+
)
|
| 317 |
+
if len(mask) < 2 or len(mask) > 3:
|
| 318 |
+
raise ValueError(
|
| 319 |
+
f"{class_name} layer accepts mask list of length 2 or 3. "
|
| 320 |
+
f"Received: inputs={inputs}, mask={mask}."
|
| 321 |
+
)
|
| 322 |
+
|
| 323 |
+
def get_config(self):
|
| 324 |
+
base_config = super().get_config()
|
| 325 |
+
config = {
|
| 326 |
+
"use_scale": self.use_scale,
|
| 327 |
+
"score_mode": self.score_mode,
|
| 328 |
+
"dropout": self.dropout,
|
| 329 |
+
}
|
| 330 |
+
return {**base_config, **config}
|
SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/attention/grouped_query_attention.py
ADDED
|
@@ -0,0 +1,504 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import math
|
| 2 |
+
|
| 3 |
+
from keras.src import constraints
|
| 4 |
+
from keras.src import initializers
|
| 5 |
+
from keras.src import ops
|
| 6 |
+
from keras.src import regularizers
|
| 7 |
+
from keras.src.api_export import keras_export
|
| 8 |
+
from keras.src.backend.config import is_flash_attention_enabled
|
| 9 |
+
from keras.src.layers.activations.softmax import Softmax
|
| 10 |
+
from keras.src.layers.core.einsum_dense import EinsumDense
|
| 11 |
+
from keras.src.layers.layer import Layer
|
| 12 |
+
from keras.src.layers.regularization.dropout import Dropout
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
@keras_export("keras.layers.GroupQueryAttention")
|
| 16 |
+
class GroupedQueryAttention(Layer):
|
| 17 |
+
"""Grouped Query Attention layer.
|
| 18 |
+
|
| 19 |
+
This is an implementation of grouped-query attention introduced by
|
| 20 |
+
[Ainslie et al., 2023](https://arxiv.org/abs/2305.13245). Here
|
| 21 |
+
`num_key_value_heads` denotes number of groups, setting
|
| 22 |
+
`num_key_value_heads` to 1 is equivalent to multi-query attention, and
|
| 23 |
+
when `num_key_value_heads` is equal to `num_query_heads` it is equivalent
|
| 24 |
+
to multi-head attention.
|
| 25 |
+
|
| 26 |
+
This layer first projects `query`, `key`, and `value` tensors. Then, `key`
|
| 27 |
+
and `value` are repeated to match the number of heads of `query`.
|
| 28 |
+
|
| 29 |
+
Then, the `query` is scaled and dot-producted with `key` tensors. These are
|
| 30 |
+
softmaxed to obtain attention probabilities. The value tensors are then
|
| 31 |
+
interpolated by these probabilities and concatenated back to a single
|
| 32 |
+
tensor.
|
| 33 |
+
|
| 34 |
+
Args:
|
| 35 |
+
head_dim: Size of each attention head.
|
| 36 |
+
num_query_heads: Number of query attention heads.
|
| 37 |
+
num_key_value_heads: Number of key and value attention heads.
|
| 38 |
+
dropout: Dropout probability.
|
| 39 |
+
use_bias: Boolean, whether the dense layers use bias vectors/matrices.
|
| 40 |
+
flash_attention: If `None`, the layer attempts to use flash
|
| 41 |
+
attention for faster and more memory-efficient attention
|
| 42 |
+
computations when possible. This behavior can be configured using
|
| 43 |
+
`keras.config.enable_flash_attention()` or
|
| 44 |
+
`keras.config.disable_flash_attention()`.
|
| 45 |
+
kernel_initializer: Initializer for dense layer kernels.
|
| 46 |
+
bias_initializer: Initializer for dense layer biases.
|
| 47 |
+
kernel_regularizer: Regularizer for dense layer kernels.
|
| 48 |
+
bias_regularizer: Regularizer for dense layer biases.
|
| 49 |
+
activity_regularizer: Regularizer for dense layer activity.
|
| 50 |
+
kernel_constraint: Constraint for dense layer kernels.
|
| 51 |
+
bias_constraint: Constraint for dense layer kernels.
|
| 52 |
+
seed: Optional integer to seed the dropout layer.
|
| 53 |
+
|
| 54 |
+
Call arguments:
|
| 55 |
+
query: Query tensor of shape `(batch_dim, target_seq_len, feature_dim)`,
|
| 56 |
+
where `batch_dim` is batch size, `target_seq_len` is the length of
|
| 57 |
+
target sequence, and `feature_dim` is dimension of feature.
|
| 58 |
+
value: Value tensor of shape `(batch_dim, source_seq_len, feature_dim)`,
|
| 59 |
+
where `batch_dim` is batch size, `source_seq_len` is the length of
|
| 60 |
+
source sequence, and `feature_dim` is dimension of feature.
|
| 61 |
+
key: Optional key tensor of shape
|
| 62 |
+
`(batch_dim, source_seq_len, feature_dim)`. If not given, will use
|
| 63 |
+
`value` for both `key` and `value`, which is most common case.
|
| 64 |
+
attention_mask: A boolean mask of shape
|
| 65 |
+
`(batch_dim, target_seq_len, source_seq_len)`, that prevents
|
| 66 |
+
attention to certain positions. The boolean mask specifies which
|
| 67 |
+
query elements can attend to which key elements, where 1 indicates
|
| 68 |
+
attention and 0 indicates no attention. Broadcasting can happen for
|
| 69 |
+
the missing batch dimensions and the head dimension.
|
| 70 |
+
return_attention_scores: A boolean to indicate whether the output
|
| 71 |
+
should be `(attention_output, attention_scores)` if `True`, or
|
| 72 |
+
`attention_output` if `False`. Defaults to `False`.
|
| 73 |
+
training: Python boolean indicating whether the layer should behave in
|
| 74 |
+
training mode (adding dropout) or in inference mode (no dropout).
|
| 75 |
+
Will go with either using the training mode of the parent
|
| 76 |
+
layer/model or `False` (inference) if there is no parent layer.
|
| 77 |
+
use_causal_mask: A boolean to indicate whether to apply a causal mask to
|
| 78 |
+
prevent tokens from attending to future tokens (e.g., used in a
|
| 79 |
+
decoder Transformer).
|
| 80 |
+
|
| 81 |
+
Returns:
|
| 82 |
+
attention_output: Result of the computation, of shape
|
| 83 |
+
`(batch_dim, target_seq_len, feature_dim)`, where `target_seq_len`
|
| 84 |
+
is for target sequence length and `feature_dim` is the query input
|
| 85 |
+
last dim.
|
| 86 |
+
attention_scores: (Optional) attention coefficients of shape
|
| 87 |
+
`(batch_dim, num_query_heads, target_seq_len, source_seq_len)`.
|
| 88 |
+
"""
|
| 89 |
+
|
| 90 |
+
def __init__(
|
| 91 |
+
self,
|
| 92 |
+
head_dim,
|
| 93 |
+
num_query_heads,
|
| 94 |
+
num_key_value_heads,
|
| 95 |
+
dropout=0.0,
|
| 96 |
+
use_bias=True,
|
| 97 |
+
flash_attention=None,
|
| 98 |
+
kernel_initializer="glorot_uniform",
|
| 99 |
+
bias_initializer="zeros",
|
| 100 |
+
kernel_regularizer=None,
|
| 101 |
+
bias_regularizer=None,
|
| 102 |
+
activity_regularizer=None,
|
| 103 |
+
kernel_constraint=None,
|
| 104 |
+
bias_constraint=None,
|
| 105 |
+
seed=None,
|
| 106 |
+
**kwargs,
|
| 107 |
+
):
|
| 108 |
+
super().__init__(**kwargs)
|
| 109 |
+
self.supports_masking = True
|
| 110 |
+
self.head_dim = head_dim
|
| 111 |
+
self.num_query_heads = num_query_heads
|
| 112 |
+
self.num_key_value_heads = num_key_value_heads
|
| 113 |
+
if num_query_heads % num_key_value_heads != 0:
|
| 114 |
+
raise ValueError(
|
| 115 |
+
"`num_query_heads` must be divisible"
|
| 116 |
+
" by `num_key_value_heads`."
|
| 117 |
+
)
|
| 118 |
+
self.num_repeats = num_query_heads // num_key_value_heads
|
| 119 |
+
self.dropout = dropout
|
| 120 |
+
self.use_bias = use_bias
|
| 121 |
+
self._flash_attention = flash_attention or is_flash_attention_enabled()
|
| 122 |
+
self.kernel_initializer = initializers.get(kernel_initializer)
|
| 123 |
+
self.bias_initializer = initializers.get(bias_initializer)
|
| 124 |
+
self.kernel_regularizer = regularizers.get(kernel_regularizer)
|
| 125 |
+
self.bias_regularizer = regularizers.get(bias_regularizer)
|
| 126 |
+
self.activity_regularizer = regularizers.get(activity_regularizer)
|
| 127 |
+
self.kernel_constraint = constraints.get(kernel_constraint)
|
| 128 |
+
self.bias_constraint = constraints.get(bias_constraint)
|
| 129 |
+
self.seed = seed
|
| 130 |
+
|
| 131 |
+
self._inverse_sqrt_head_dim = 1.0 / math.sqrt(float(self.head_dim))
|
| 132 |
+
self._return_attention_scores = False
|
| 133 |
+
|
| 134 |
+
# Check for flash attention constraints
|
| 135 |
+
if self._flash_attention and self.dropout > 0.0:
|
| 136 |
+
raise ValueError(
|
| 137 |
+
"Dropout is not supported when flash attention is enabled. "
|
| 138 |
+
"Please set dropout to 0.0 to use flash attention."
|
| 139 |
+
)
|
| 140 |
+
|
| 141 |
+
def build(
|
| 142 |
+
self,
|
| 143 |
+
query_shape,
|
| 144 |
+
value_shape,
|
| 145 |
+
key_shape=None,
|
| 146 |
+
):
|
| 147 |
+
# Einsum variables:
|
| 148 |
+
# b = batch size
|
| 149 |
+
# q = query length
|
| 150 |
+
# k = key/value length
|
| 151 |
+
# m = model dim
|
| 152 |
+
# u = num query heads
|
| 153 |
+
# v = num key/value heads
|
| 154 |
+
# h = head dim
|
| 155 |
+
key_shape = value_shape if key_shape is None else key_shape
|
| 156 |
+
self.feature_dim = query_shape[-1]
|
| 157 |
+
self._query_dense = EinsumDense(
|
| 158 |
+
"bqm,muh->bquh",
|
| 159 |
+
output_shape=(None, self.num_query_heads, self.head_dim),
|
| 160 |
+
bias_axes="uh" if self.use_bias else None,
|
| 161 |
+
name="query",
|
| 162 |
+
**self._get_common_kwargs_for_sublayer(),
|
| 163 |
+
)
|
| 164 |
+
self._query_dense.build(query_shape)
|
| 165 |
+
|
| 166 |
+
self._key_dense = EinsumDense(
|
| 167 |
+
"bkm,mvh->bkvh",
|
| 168 |
+
output_shape=(None, self.num_key_value_heads, self.head_dim),
|
| 169 |
+
bias_axes="vh" if self.use_bias else None,
|
| 170 |
+
name="key",
|
| 171 |
+
**self._get_common_kwargs_for_sublayer(),
|
| 172 |
+
)
|
| 173 |
+
self._key_dense.build(key_shape)
|
| 174 |
+
|
| 175 |
+
self._value_dense = EinsumDense(
|
| 176 |
+
"bkm,mvh->bkvh",
|
| 177 |
+
output_shape=(None, self.num_key_value_heads, self.head_dim),
|
| 178 |
+
bias_axes="vh" if self.use_bias else None,
|
| 179 |
+
name="value",
|
| 180 |
+
**self._get_common_kwargs_for_sublayer(),
|
| 181 |
+
)
|
| 182 |
+
self._value_dense.build(value_shape)
|
| 183 |
+
|
| 184 |
+
self._softmax = Softmax(axis=-1, dtype=self.dtype_policy)
|
| 185 |
+
self._dropout_layer = Dropout(
|
| 186 |
+
rate=self.dropout, dtype=self.dtype_policy, seed=self.seed
|
| 187 |
+
)
|
| 188 |
+
|
| 189 |
+
self._dot_product_equation = "bquh,bkuh->buqk"
|
| 190 |
+
self._combine_equation = "buqk,bkuh->bquh"
|
| 191 |
+
|
| 192 |
+
self._output_dense = EinsumDense(
|
| 193 |
+
"bquh,uhm->bqm",
|
| 194 |
+
output_shape=(None, self.feature_dim),
|
| 195 |
+
bias_axes="m" if self.use_bias else None,
|
| 196 |
+
name="attention_output",
|
| 197 |
+
**self._get_common_kwargs_for_sublayer(),
|
| 198 |
+
)
|
| 199 |
+
self._output_dense.build(
|
| 200 |
+
(None, None, self.num_query_heads, self.head_dim)
|
| 201 |
+
)
|
| 202 |
+
self.built = True
|
| 203 |
+
|
| 204 |
+
def _get_common_kwargs_for_sublayer(self):
|
| 205 |
+
common_kwargs = dict(
|
| 206 |
+
kernel_regularizer=self.kernel_regularizer,
|
| 207 |
+
bias_regularizer=self.bias_regularizer,
|
| 208 |
+
activity_regularizer=self.activity_regularizer,
|
| 209 |
+
kernel_constraint=self.kernel_constraint,
|
| 210 |
+
bias_constraint=self.bias_constraint,
|
| 211 |
+
dtype=self.dtype_policy,
|
| 212 |
+
)
|
| 213 |
+
# Create new clone of kernel/bias initializer, so that we don't reuse
|
| 214 |
+
# the initializer instance, which could lead to same init value since
|
| 215 |
+
# initializer is stateless.
|
| 216 |
+
kernel_initializer = self.kernel_initializer.__class__.from_config(
|
| 217 |
+
self.kernel_initializer.get_config()
|
| 218 |
+
)
|
| 219 |
+
bias_initializer = self.bias_initializer.__class__.from_config(
|
| 220 |
+
self.bias_initializer.get_config()
|
| 221 |
+
)
|
| 222 |
+
common_kwargs["kernel_initializer"] = kernel_initializer
|
| 223 |
+
common_kwargs["bias_initializer"] = bias_initializer
|
| 224 |
+
return common_kwargs
|
| 225 |
+
|
| 226 |
+
def call(
|
| 227 |
+
self,
|
| 228 |
+
query,
|
| 229 |
+
value,
|
| 230 |
+
key=None,
|
| 231 |
+
query_mask=None,
|
| 232 |
+
value_mask=None,
|
| 233 |
+
key_mask=None,
|
| 234 |
+
attention_mask=None,
|
| 235 |
+
return_attention_scores=False,
|
| 236 |
+
training=None,
|
| 237 |
+
use_causal_mask=False,
|
| 238 |
+
):
|
| 239 |
+
self._return_attention_scores = return_attention_scores
|
| 240 |
+
if key is None:
|
| 241 |
+
key = value
|
| 242 |
+
|
| 243 |
+
attention_mask = self._compute_attention_mask(
|
| 244 |
+
query,
|
| 245 |
+
value,
|
| 246 |
+
query_mask=query_mask,
|
| 247 |
+
value_mask=value_mask,
|
| 248 |
+
key_mask=key_mask,
|
| 249 |
+
attention_mask=attention_mask,
|
| 250 |
+
use_causal_mask=use_causal_mask,
|
| 251 |
+
)
|
| 252 |
+
|
| 253 |
+
query = self._query_dense(query)
|
| 254 |
+
key = self._key_dense(key)
|
| 255 |
+
value = self._value_dense(value)
|
| 256 |
+
|
| 257 |
+
key = ops.repeat(
|
| 258 |
+
key, self.num_repeats, axis=2
|
| 259 |
+
) # (batch_dim, source_seq_len, query_heads, head_dim)
|
| 260 |
+
value = ops.repeat(
|
| 261 |
+
value, self.num_repeats, axis=2
|
| 262 |
+
) # (batch_dim, source_seq_len, query_heads, head_dim)
|
| 263 |
+
|
| 264 |
+
output, scores = self._compute_attention(
|
| 265 |
+
query,
|
| 266 |
+
key,
|
| 267 |
+
value,
|
| 268 |
+
attention_mask=attention_mask,
|
| 269 |
+
training=training,
|
| 270 |
+
)
|
| 271 |
+
|
| 272 |
+
output = self._output_dense(
|
| 273 |
+
output
|
| 274 |
+
) # (batch_dim, target_seq_len, feature_dim)
|
| 275 |
+
|
| 276 |
+
if return_attention_scores:
|
| 277 |
+
return output, scores
|
| 278 |
+
return output
|
| 279 |
+
|
| 280 |
+
def _compute_attention_mask(
|
| 281 |
+
self,
|
| 282 |
+
query,
|
| 283 |
+
value,
|
| 284 |
+
query_mask=None,
|
| 285 |
+
value_mask=None,
|
| 286 |
+
key_mask=None,
|
| 287 |
+
attention_mask=None,
|
| 288 |
+
use_causal_mask=False,
|
| 289 |
+
):
|
| 290 |
+
"""Computes the attention mask, using the Keras masks of the inputs.
|
| 291 |
+
|
| 292 |
+
* The `query`'s mask is reshaped from [B, T] to [B, T, 1].
|
| 293 |
+
* The `value`'s mask is reshaped from [B, S] to [B, 1, S].
|
| 294 |
+
* The `key`'s mask is reshaped from [B, S] to [B, 1, S]. The `key`'s
|
| 295 |
+
mask is ignored if `key` is `None` or if `key is value`.
|
| 296 |
+
* If `use_causal_mask=True`, then the causal mask is computed. Its shape
|
| 297 |
+
is [1, T, S].
|
| 298 |
+
|
| 299 |
+
All defined masks are merged using a logical AND operation (`&`).
|
| 300 |
+
|
| 301 |
+
In general, if the `query` and `value` are masked, then there is no need
|
| 302 |
+
to define the `attention_mask`.
|
| 303 |
+
|
| 304 |
+
Args:
|
| 305 |
+
query: Projected query tensor of shape `(B, T, N, key_dim)`.
|
| 306 |
+
key: Projected key tensor of shape `(B, T, N, key_dim)`.
|
| 307 |
+
value: Projected value tensor of shape `(B, T, N, value_dim)`.
|
| 308 |
+
attention_mask: a boolean mask of shape `(B, T, S)`, that prevents
|
| 309 |
+
attention to certain positions.
|
| 310 |
+
use_causal_mask: A boolean to indicate whether to apply a causal
|
| 311 |
+
mask to prevent tokens from attending to future tokens (e.g.,
|
| 312 |
+
used in a decoder Transformer).
|
| 313 |
+
|
| 314 |
+
Returns:
|
| 315 |
+
attention_mask: a boolean mask of shape `(B, T, S)`, that prevents
|
| 316 |
+
attention to certain positions, based on the Keras masks of the
|
| 317 |
+
`query`, `key`, `value`, and `attention_mask` tensors, and the
|
| 318 |
+
causal mask if `use_causal_mask=True`.
|
| 319 |
+
"""
|
| 320 |
+
auto_mask = None
|
| 321 |
+
if query_mask is not None:
|
| 322 |
+
query_mask = ops.cast(query_mask, "bool") # defensive casting
|
| 323 |
+
# B = batch size, T = max query length
|
| 324 |
+
auto_mask = ops.expand_dims(query_mask, -1) # shape is [B, T, 1]
|
| 325 |
+
if value_mask is not None:
|
| 326 |
+
value_mask = ops.cast(value_mask, "bool") # defensive casting
|
| 327 |
+
# B = batch size, S == max value length
|
| 328 |
+
mask = ops.expand_dims(value_mask, -2) # shape is [B, 1, S]
|
| 329 |
+
auto_mask = mask if auto_mask is None else auto_mask & mask
|
| 330 |
+
if key_mask is not None:
|
| 331 |
+
key_mask = ops.cast(key_mask, "bool") # defensive casting
|
| 332 |
+
# B == batch size, S == max key length == max value length
|
| 333 |
+
mask = ops.expand_dims(key_mask, -2) # shape is [B, 1, S]
|
| 334 |
+
auto_mask = mask if auto_mask is None else auto_mask & mask
|
| 335 |
+
if use_causal_mask:
|
| 336 |
+
# the shape of the causal mask is [1, T, S]
|
| 337 |
+
mask = self._compute_causal_mask(query, value)
|
| 338 |
+
auto_mask = mask if auto_mask is None else auto_mask & mask
|
| 339 |
+
if auto_mask is not None:
|
| 340 |
+
# merge attention_mask & automatic mask, to shape [B, T, S]
|
| 341 |
+
attention_mask = (
|
| 342 |
+
auto_mask
|
| 343 |
+
if attention_mask is None
|
| 344 |
+
else ops.cast(attention_mask, bool) & auto_mask
|
| 345 |
+
)
|
| 346 |
+
return attention_mask
|
| 347 |
+
|
| 348 |
+
def _compute_causal_mask(self, query, value=None):
|
| 349 |
+
"""Computes a causal mask (e.g., for masked self-attention layers).
|
| 350 |
+
|
| 351 |
+
For example, if query and value both contain sequences of length 4,
|
| 352 |
+
this function returns a boolean tensor equal to:
|
| 353 |
+
|
| 354 |
+
```
|
| 355 |
+
[[[True, False, False, False],
|
| 356 |
+
[True, True, False, False],
|
| 357 |
+
[True, True, True, False],
|
| 358 |
+
[True, True, True, True]]]
|
| 359 |
+
```
|
| 360 |
+
|
| 361 |
+
Args:
|
| 362 |
+
query: query tensor of shape `(B, T, ...)`.
|
| 363 |
+
value: value tensor of shape `(B, S, ...)` (optional, defaults to
|
| 364 |
+
query).
|
| 365 |
+
|
| 366 |
+
Returns:
|
| 367 |
+
mask: a boolean tensor of shape `(1, T, S)` containing a lower
|
| 368 |
+
triangular matrix of shape `(T, S)`.
|
| 369 |
+
"""
|
| 370 |
+
q_seq_length = ops.shape(query)[1]
|
| 371 |
+
v_seq_length = q_seq_length if value is None else ops.shape(value)[1]
|
| 372 |
+
ones_mask = ops.ones((1, q_seq_length, v_seq_length), dtype="int32")
|
| 373 |
+
row_index = ops.cumsum(ones_mask, axis=-2)
|
| 374 |
+
col_index = ops.cumsum(ones_mask, axis=-1)
|
| 375 |
+
return ops.greater_equal(row_index, col_index)
|
| 376 |
+
|
| 377 |
+
def _compute_attention(
|
| 378 |
+
self, query, key, value, attention_mask=None, training=None
|
| 379 |
+
):
|
| 380 |
+
# Check for flash attention constraints
|
| 381 |
+
if self._flash_attention and self._return_attention_scores:
|
| 382 |
+
raise ValueError(
|
| 383 |
+
"Returning attention scores is not supported when flash "
|
| 384 |
+
"attention is enabled. Please disable flash attention to access"
|
| 385 |
+
" attention scores."
|
| 386 |
+
)
|
| 387 |
+
|
| 388 |
+
# Determine whether to use dot-product attention
|
| 389 |
+
use_dot_product_attention = not (
|
| 390 |
+
self.dropout > 0.0
|
| 391 |
+
or self._return_attention_scores
|
| 392 |
+
or (len(query.shape) != 4)
|
| 393 |
+
)
|
| 394 |
+
|
| 395 |
+
if use_dot_product_attention:
|
| 396 |
+
if attention_mask is not None:
|
| 397 |
+
# Ensure attention_mask has the correct shape for broadcasting
|
| 398 |
+
# Expected shape: [batch_size, num_heads, query_seq_len,
|
| 399 |
+
# key_seq_len].
|
| 400 |
+
mask_expansion_axis = -1 * 2 - 1
|
| 401 |
+
len_attention_scores_shape = 4 # Only accepts 4D inputs
|
| 402 |
+
for _ in range(
|
| 403 |
+
len_attention_scores_shape - len(attention_mask.shape)
|
| 404 |
+
):
|
| 405 |
+
attention_mask = ops.expand_dims(
|
| 406 |
+
attention_mask, axis=mask_expansion_axis
|
| 407 |
+
)
|
| 408 |
+
attention_mask = ops.cast(attention_mask, dtype="bool")
|
| 409 |
+
# Directly compute the attention output using dot-product attention
|
| 410 |
+
attention_output = ops.dot_product_attention(
|
| 411 |
+
query=query,
|
| 412 |
+
key=key,
|
| 413 |
+
value=value,
|
| 414 |
+
bias=None,
|
| 415 |
+
mask=attention_mask,
|
| 416 |
+
scale=self._inverse_sqrt_head_dim,
|
| 417 |
+
is_causal=False,
|
| 418 |
+
flash_attention=self._flash_attention,
|
| 419 |
+
)
|
| 420 |
+
return attention_output, None
|
| 421 |
+
|
| 422 |
+
# Default behavior without flash attention, with explicit attention
|
| 423 |
+
# scores
|
| 424 |
+
query = ops.multiply(
|
| 425 |
+
query, ops.cast(self._inverse_sqrt_head_dim, query.dtype)
|
| 426 |
+
)
|
| 427 |
+
# Take the dot product between "query" and "key" to get the raw
|
| 428 |
+
# attention scores.
|
| 429 |
+
scores = ops.einsum(
|
| 430 |
+
self._dot_product_equation, query, key
|
| 431 |
+
) # (batch_dim, query_heads, target_seq_len, source_seq_len)
|
| 432 |
+
scores = self._masked_softmax(scores, attention_mask=attention_mask)
|
| 433 |
+
# This is actually dropping out entire tokens to attend to, which might
|
| 434 |
+
# seem a bit unusual, but is taken from the original Transformer paper.
|
| 435 |
+
if self.dropout > 0.0:
|
| 436 |
+
scores_dropout = self._dropout_layer(scores, training=training)
|
| 437 |
+
else:
|
| 438 |
+
scores_dropout = scores
|
| 439 |
+
output = ops.einsum(self._combine_equation, scores_dropout, value)
|
| 440 |
+
return output, scores
|
| 441 |
+
|
| 442 |
+
def _masked_softmax(self, scores, attention_mask=None):
|
| 443 |
+
# Normalize the attention scores to probabilities.
|
| 444 |
+
# scores = [B, N, T, S]
|
| 445 |
+
if attention_mask is not None:
|
| 446 |
+
# The expand dim happens starting from the `num_heads` dimension,
|
| 447 |
+
# (<batch_dims>, num_heads, <query_attention_dims,
|
| 448 |
+
# key_attention_dims>)
|
| 449 |
+
mask_expansion_axis = -1 * 2 - 1
|
| 450 |
+
for _ in range(len(scores.shape) - len(attention_mask.shape)):
|
| 451 |
+
attention_mask = ops.expand_dims(
|
| 452 |
+
attention_mask, axis=mask_expansion_axis
|
| 453 |
+
)
|
| 454 |
+
return self._softmax(scores, mask=attention_mask)
|
| 455 |
+
|
| 456 |
+
def compute_output_shape(
|
| 457 |
+
self,
|
| 458 |
+
query_shape,
|
| 459 |
+
value_shape,
|
| 460 |
+
key_shape=None,
|
| 461 |
+
):
|
| 462 |
+
if key_shape is None:
|
| 463 |
+
key_shape = value_shape
|
| 464 |
+
|
| 465 |
+
if query_shape[-1] != value_shape[-1]:
|
| 466 |
+
raise ValueError(
|
| 467 |
+
"The last dimension of `query_shape` and `value_shape` "
|
| 468 |
+
f"must be equal, but are {query_shape[-1]}, {value_shape[-1]}. "
|
| 469 |
+
"Received: query_shape={query_shape}, value_shape={value_shape}"
|
| 470 |
+
)
|
| 471 |
+
|
| 472 |
+
if value_shape[1:-1] != key_shape[1:-1]:
|
| 473 |
+
raise ValueError(
|
| 474 |
+
"All dimensions of `value` and `key`, except the last one, "
|
| 475 |
+
f"must be equal. Received: value_shape={value_shape} and "
|
| 476 |
+
f"key_shape={key_shape}"
|
| 477 |
+
)
|
| 478 |
+
|
| 479 |
+
return query_shape
|
| 480 |
+
|
| 481 |
+
def get_config(self):
|
| 482 |
+
config = {
|
| 483 |
+
"head_dim": self.head_dim,
|
| 484 |
+
"num_query_heads": self.num_query_heads,
|
| 485 |
+
"num_key_value_heads": self.num_key_value_heads,
|
| 486 |
+
"use_bias": self.use_bias,
|
| 487 |
+
"dropout": self.dropout,
|
| 488 |
+
"kernel_initializer": initializers.serialize(
|
| 489 |
+
self.kernel_initializer
|
| 490 |
+
),
|
| 491 |
+
"bias_initializer": initializers.serialize(self.bias_initializer),
|
| 492 |
+
"kernel_regularizer": regularizers.serialize(
|
| 493 |
+
self.kernel_regularizer
|
| 494 |
+
),
|
| 495 |
+
"bias_regularizer": regularizers.serialize(self.bias_regularizer),
|
| 496 |
+
"activity_regularizer": regularizers.serialize(
|
| 497 |
+
self.activity_regularizer
|
| 498 |
+
),
|
| 499 |
+
"kernel_constraint": constraints.serialize(self.kernel_constraint),
|
| 500 |
+
"bias_constraint": constraints.serialize(self.bias_constraint),
|
| 501 |
+
"seed": self.seed,
|
| 502 |
+
}
|
| 503 |
+
base_config = super().get_config()
|
| 504 |
+
return {**base_config, **config}
|
SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/attention/multi_head_attention.py
ADDED
|
@@ -0,0 +1,827 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import math
|
| 2 |
+
import string
|
| 3 |
+
|
| 4 |
+
import numpy as np
|
| 5 |
+
|
| 6 |
+
from keras.src import backend
|
| 7 |
+
from keras.src import constraints
|
| 8 |
+
from keras.src import initializers
|
| 9 |
+
from keras.src import ops
|
| 10 |
+
from keras.src import regularizers
|
| 11 |
+
from keras.src.api_export import keras_export
|
| 12 |
+
from keras.src.backend.config import is_flash_attention_enabled
|
| 13 |
+
from keras.src.layers.activations.softmax import Softmax
|
| 14 |
+
from keras.src.layers.core.einsum_dense import EinsumDense
|
| 15 |
+
from keras.src.layers.layer import Layer
|
| 16 |
+
from keras.src.layers.regularization.dropout import Dropout
|
| 17 |
+
|
| 18 |
+
|
| 19 |
+
@keras_export("keras.layers.MultiHeadAttention")
|
| 20 |
+
class MultiHeadAttention(Layer):
|
| 21 |
+
"""MultiHeadAttention layer.
|
| 22 |
+
|
| 23 |
+
This is an implementation of multi-headed attention as described in the
|
| 24 |
+
paper "Attention is all you Need"
|
| 25 |
+
[Vaswani et al., 2017](https://arxiv.org/abs/1706.03762).
|
| 26 |
+
If `query`, `key,` `value` are the same, then
|
| 27 |
+
this is self-attention. Each timestep in `query` attends to the
|
| 28 |
+
corresponding sequence in `key`, and returns a fixed-width vector.
|
| 29 |
+
|
| 30 |
+
This layer first projects `query`, `key` and `value`. These are
|
| 31 |
+
(effectively) a list of tensors of length `num_attention_heads`, where the
|
| 32 |
+
corresponding shapes are `(batch_size, <query dimensions>, key_dim)`,
|
| 33 |
+
`(batch_size, <key/value dimensions>, key_dim)`,
|
| 34 |
+
`(batch_size, <key/value dimensions>, value_dim)`.
|
| 35 |
+
|
| 36 |
+
Then, the query and key tensors are dot-producted and scaled. These are
|
| 37 |
+
softmaxed to obtain attention probabilities. The value tensors are then
|
| 38 |
+
interpolated by these probabilities, then concatenated back to a single
|
| 39 |
+
tensor.
|
| 40 |
+
|
| 41 |
+
Finally, the result tensor with the last dimension as `value_dim` can take
|
| 42 |
+
a linear projection and return.
|
| 43 |
+
|
| 44 |
+
Args:
|
| 45 |
+
num_heads: Number of attention heads.
|
| 46 |
+
key_dim: Size of each attention head for query and key.
|
| 47 |
+
value_dim: Size of each attention head for value.
|
| 48 |
+
dropout: Dropout probability.
|
| 49 |
+
use_bias: Boolean, whether the dense layers use bias vectors/matrices.
|
| 50 |
+
output_shape: The expected shape of an output tensor, besides the batch
|
| 51 |
+
and sequence dims. If not specified, projects back to the query
|
| 52 |
+
feature dim (the query input's last dimension).
|
| 53 |
+
attention_axes: axes over which the attention is applied. `None` means
|
| 54 |
+
attention over all axes, but batch, heads, and features.
|
| 55 |
+
flash_attention: If `None`, the layer attempts to use flash
|
| 56 |
+
attention for faster and more memory-efficient attention
|
| 57 |
+
computations when possible. This behavior can be configured using
|
| 58 |
+
`keras.config.enable_flash_attention()` or
|
| 59 |
+
`keras.config.disable_flash_attention()`.
|
| 60 |
+
kernel_initializer: Initializer for dense layer kernels.
|
| 61 |
+
bias_initializer: Initializer for dense layer biases.
|
| 62 |
+
kernel_regularizer: Regularizer for dense layer kernels.
|
| 63 |
+
bias_regularizer: Regularizer for dense layer biases.
|
| 64 |
+
activity_regularizer: Regularizer for dense layer activity.
|
| 65 |
+
kernel_constraint: Constraint for dense layer kernels.
|
| 66 |
+
bias_constraint: Constraint for dense layer kernels.
|
| 67 |
+
seed: Optional integer to seed the dropout layer.
|
| 68 |
+
|
| 69 |
+
Call arguments:
|
| 70 |
+
query: Query tensor of shape `(B, T, dim)`, where `B` is the batch size,
|
| 71 |
+
`T` is the target sequence length, and dim is the feature dimension.
|
| 72 |
+
value: Value tensor of shape `(B, S, dim)`, where `B` is the batch size,
|
| 73 |
+
`S` is the source sequence length, and dim is the feature dimension.
|
| 74 |
+
key: Optional key tensor of shape `(B, S, dim)`. If not given, will
|
| 75 |
+
use `value` for both `key` and `value`, which is the most common
|
| 76 |
+
case.
|
| 77 |
+
attention_mask: a boolean mask of shape `(B, T, S)`, that prevents
|
| 78 |
+
attention to certain positions. The boolean mask specifies which
|
| 79 |
+
query elements can attend to which key elements, 1 indicates
|
| 80 |
+
attention and 0 indicates no attention. Broadcasting can happen for
|
| 81 |
+
the missing batch dimensions and the head dimension.
|
| 82 |
+
return_attention_scores: A boolean to indicate whether the output should
|
| 83 |
+
be `(attention_output, attention_scores)` if `True`, or
|
| 84 |
+
`attention_output` if `False`. Defaults to `False`.
|
| 85 |
+
training: Python boolean indicating whether the layer should behave in
|
| 86 |
+
training mode (adding dropout) or in inference mode (no dropout).
|
| 87 |
+
Will go with either using the training mode of the parent
|
| 88 |
+
layer/model, or `False` (inference) if there is no parent layer.
|
| 89 |
+
use_causal_mask: A boolean to indicate whether to apply a causal mask to
|
| 90 |
+
prevent tokens from attending to future tokens (e.g., used in a
|
| 91 |
+
decoder Transformer).
|
| 92 |
+
|
| 93 |
+
Returns:
|
| 94 |
+
attention_output: The result of the computation, of shape `(B, T, E)`,
|
| 95 |
+
where `T` is for target sequence shapes and `E` is the query input
|
| 96 |
+
last dimension if `output_shape` is `None`. Otherwise, the
|
| 97 |
+
multi-head outputs are projected to the shape specified by
|
| 98 |
+
`output_shape`.
|
| 99 |
+
attention_scores: (Optional) multi-head attention coefficients over
|
| 100 |
+
attention axes.
|
| 101 |
+
"""
|
| 102 |
+
|
| 103 |
+
def __init__(
|
| 104 |
+
self,
|
| 105 |
+
num_heads,
|
| 106 |
+
key_dim,
|
| 107 |
+
value_dim=None,
|
| 108 |
+
dropout=0.0,
|
| 109 |
+
use_bias=True,
|
| 110 |
+
output_shape=None,
|
| 111 |
+
attention_axes=None,
|
| 112 |
+
flash_attention=None,
|
| 113 |
+
kernel_initializer="glorot_uniform",
|
| 114 |
+
bias_initializer="zeros",
|
| 115 |
+
kernel_regularizer=None,
|
| 116 |
+
bias_regularizer=None,
|
| 117 |
+
activity_regularizer=None,
|
| 118 |
+
kernel_constraint=None,
|
| 119 |
+
bias_constraint=None,
|
| 120 |
+
seed=None,
|
| 121 |
+
**kwargs,
|
| 122 |
+
):
|
| 123 |
+
super().__init__(**kwargs)
|
| 124 |
+
self.supports_masking = True
|
| 125 |
+
self._num_heads = num_heads
|
| 126 |
+
self._key_dim = key_dim
|
| 127 |
+
self._value_dim = value_dim if value_dim else key_dim
|
| 128 |
+
self._dropout = dropout
|
| 129 |
+
self._use_bias = use_bias
|
| 130 |
+
if output_shape:
|
| 131 |
+
if isinstance(output_shape, int):
|
| 132 |
+
output_shape = (output_shape,)
|
| 133 |
+
try:
|
| 134 |
+
output_shape = tuple(output_shape)
|
| 135 |
+
except:
|
| 136 |
+
raise ValueError(
|
| 137 |
+
f"Invalid `output_shape`: {output_shape}. When "
|
| 138 |
+
"specified, the `output_shape` should be of type tuple, "
|
| 139 |
+
"list, or int."
|
| 140 |
+
)
|
| 141 |
+
self._output_shape = output_shape
|
| 142 |
+
self._flash_attention = flash_attention or is_flash_attention_enabled()
|
| 143 |
+
self._kernel_initializer = initializers.get(kernel_initializer)
|
| 144 |
+
self._bias_initializer = initializers.get(bias_initializer)
|
| 145 |
+
self._kernel_regularizer = regularizers.get(kernel_regularizer)
|
| 146 |
+
self._bias_regularizer = regularizers.get(bias_regularizer)
|
| 147 |
+
self._activity_regularizer = regularizers.get(activity_regularizer)
|
| 148 |
+
self._kernel_constraint = constraints.get(kernel_constraint)
|
| 149 |
+
self._bias_constraint = constraints.get(bias_constraint)
|
| 150 |
+
if isinstance(attention_axes, int):
|
| 151 |
+
attention_axes = (attention_axes,)
|
| 152 |
+
elif attention_axes and not isinstance(attention_axes, (list, tuple)):
|
| 153 |
+
raise ValueError(
|
| 154 |
+
"`attention_axes` must be an int, list, or tuple."
|
| 155 |
+
f"Received: attention_axes={attention_axes}"
|
| 156 |
+
)
|
| 157 |
+
self._attention_axes = attention_axes
|
| 158 |
+
self.seed = seed
|
| 159 |
+
|
| 160 |
+
self._inverse_sqrt_key_dim = 1.0 / math.sqrt(float(self._key_dim))
|
| 161 |
+
self._return_attention_scores = False
|
| 162 |
+
|
| 163 |
+
# Check for flash attention constraints
|
| 164 |
+
if self._flash_attention and self._dropout > 0.0:
|
| 165 |
+
raise ValueError(
|
| 166 |
+
"Dropout is not supported when flash attention is enabled. "
|
| 167 |
+
"Please set dropout to 0.0 to use flash attention."
|
| 168 |
+
)
|
| 169 |
+
|
| 170 |
+
@property
|
| 171 |
+
def num_heads(self):
|
| 172 |
+
return self._num_heads
|
| 173 |
+
|
| 174 |
+
@property
|
| 175 |
+
def key_dim(self):
|
| 176 |
+
return self._key_dim
|
| 177 |
+
|
| 178 |
+
@property
|
| 179 |
+
def value_dim(self):
|
| 180 |
+
return self._value_dim
|
| 181 |
+
|
| 182 |
+
@property
|
| 183 |
+
def dropout(self):
|
| 184 |
+
return self._dropout
|
| 185 |
+
|
| 186 |
+
@property
|
| 187 |
+
def use_bias(self):
|
| 188 |
+
return self._use_bias
|
| 189 |
+
|
| 190 |
+
# Avoid exposing `output_shape` as it may conflict with `Functional` and
|
| 191 |
+
# `Sequential` models when calling `summary()`.
|
| 192 |
+
|
| 193 |
+
@property
|
| 194 |
+
def attention_axes(self):
|
| 195 |
+
return self._attention_axes
|
| 196 |
+
|
| 197 |
+
def get_config(self):
|
| 198 |
+
base_config = super().get_config()
|
| 199 |
+
config = {
|
| 200 |
+
"num_heads": self._num_heads,
|
| 201 |
+
"key_dim": self._key_dim,
|
| 202 |
+
"value_dim": self._value_dim,
|
| 203 |
+
"dropout": self._dropout,
|
| 204 |
+
"use_bias": self._use_bias,
|
| 205 |
+
"output_shape": self._output_shape,
|
| 206 |
+
"attention_axes": self._attention_axes,
|
| 207 |
+
"kernel_initializer": initializers.serialize(
|
| 208 |
+
self._kernel_initializer
|
| 209 |
+
),
|
| 210 |
+
"bias_initializer": initializers.serialize(self._bias_initializer),
|
| 211 |
+
"kernel_regularizer": regularizers.serialize(
|
| 212 |
+
self._kernel_regularizer
|
| 213 |
+
),
|
| 214 |
+
"bias_regularizer": regularizers.serialize(self._bias_regularizer),
|
| 215 |
+
"activity_regularizer": regularizers.serialize(
|
| 216 |
+
self._activity_regularizer
|
| 217 |
+
),
|
| 218 |
+
"kernel_constraint": constraints.serialize(self._kernel_constraint),
|
| 219 |
+
"bias_constraint": constraints.serialize(self._bias_constraint),
|
| 220 |
+
"seed": self.seed,
|
| 221 |
+
}
|
| 222 |
+
return {**base_config, **config}
|
| 223 |
+
|
| 224 |
+
def build(
|
| 225 |
+
self,
|
| 226 |
+
query_shape,
|
| 227 |
+
value_shape,
|
| 228 |
+
key_shape=None,
|
| 229 |
+
):
|
| 230 |
+
"""Builds layers and variables.
|
| 231 |
+
|
| 232 |
+
Args:
|
| 233 |
+
query_shape: Shape of the `query` tensor.
|
| 234 |
+
value_shape: Shape of the `value` tensor.
|
| 235 |
+
key: Optional shape of the `key` tensor.
|
| 236 |
+
"""
|
| 237 |
+
key_shape = value_shape if key_shape is None else key_shape
|
| 238 |
+
|
| 239 |
+
if value_shape[1:-1] != key_shape[1:-1]:
|
| 240 |
+
raise ValueError(
|
| 241 |
+
"All dimensions of `value` and `key`, except the last one, "
|
| 242 |
+
f"must be equal. Received: value_shape={value_shape} and "
|
| 243 |
+
f"key_shape={key_shape}"
|
| 244 |
+
)
|
| 245 |
+
|
| 246 |
+
query_rank = len(query_shape)
|
| 247 |
+
value_rank = len(value_shape)
|
| 248 |
+
key_rank = len(key_shape)
|
| 249 |
+
einsum_equation, bias_axes, output_rank = _build_proj_equation(
|
| 250 |
+
query_rank - 1, bound_dims=1, output_dims=2
|
| 251 |
+
)
|
| 252 |
+
self._query_dense = EinsumDense(
|
| 253 |
+
einsum_equation,
|
| 254 |
+
output_shape=_get_output_shape(
|
| 255 |
+
output_rank - 1, [self._num_heads, self._key_dim]
|
| 256 |
+
),
|
| 257 |
+
bias_axes=bias_axes if self._use_bias else None,
|
| 258 |
+
name="query",
|
| 259 |
+
**self._get_common_kwargs_for_sublayer(),
|
| 260 |
+
)
|
| 261 |
+
self._query_dense.build(query_shape)
|
| 262 |
+
einsum_equation, bias_axes, output_rank = _build_proj_equation(
|
| 263 |
+
key_rank - 1, bound_dims=1, output_dims=2
|
| 264 |
+
)
|
| 265 |
+
self._key_dense = EinsumDense(
|
| 266 |
+
einsum_equation,
|
| 267 |
+
output_shape=_get_output_shape(
|
| 268 |
+
output_rank - 1, [self._num_heads, self._key_dim]
|
| 269 |
+
),
|
| 270 |
+
bias_axes=bias_axes if self._use_bias else None,
|
| 271 |
+
name="key",
|
| 272 |
+
**self._get_common_kwargs_for_sublayer(),
|
| 273 |
+
)
|
| 274 |
+
self._key_dense.build(key_shape)
|
| 275 |
+
einsum_equation, bias_axes, output_rank = _build_proj_equation(
|
| 276 |
+
value_rank - 1, bound_dims=1, output_dims=2
|
| 277 |
+
)
|
| 278 |
+
self._value_dense = EinsumDense(
|
| 279 |
+
einsum_equation,
|
| 280 |
+
output_shape=_get_output_shape(
|
| 281 |
+
output_rank - 1, [self._num_heads, self._value_dim]
|
| 282 |
+
),
|
| 283 |
+
bias_axes=bias_axes if self._use_bias else None,
|
| 284 |
+
name="value",
|
| 285 |
+
**self._get_common_kwargs_for_sublayer(),
|
| 286 |
+
)
|
| 287 |
+
self._value_dense.build(value_shape)
|
| 288 |
+
|
| 289 |
+
# Builds the attention computations for multi-head dot product
|
| 290 |
+
# attention. These computations could be wrapped into the keras
|
| 291 |
+
# attention layer once it supports multi-head einsum computations.
|
| 292 |
+
self._build_attention(output_rank)
|
| 293 |
+
self._output_dense = self._make_output_dense(
|
| 294 |
+
query_shape,
|
| 295 |
+
self._get_common_kwargs_for_sublayer(),
|
| 296 |
+
"attention_output",
|
| 297 |
+
)
|
| 298 |
+
output_dense_input_shape = list(
|
| 299 |
+
self._query_dense.compute_output_shape(query_shape)
|
| 300 |
+
)
|
| 301 |
+
output_dense_input_shape[-1] = self._value_dim
|
| 302 |
+
self._output_dense.build(tuple(output_dense_input_shape))
|
| 303 |
+
self.built = True
|
| 304 |
+
|
| 305 |
+
@property
|
| 306 |
+
def query_dense(self):
|
| 307 |
+
return self._query_dense
|
| 308 |
+
|
| 309 |
+
@property
|
| 310 |
+
def key_dense(self):
|
| 311 |
+
return self._key_dense
|
| 312 |
+
|
| 313 |
+
@property
|
| 314 |
+
def value_dense(self):
|
| 315 |
+
return self._value_dense
|
| 316 |
+
|
| 317 |
+
@property
|
| 318 |
+
def output_dense(self):
|
| 319 |
+
return self._output_dense
|
| 320 |
+
|
| 321 |
+
def _get_common_kwargs_for_sublayer(self):
|
| 322 |
+
common_kwargs = dict(
|
| 323 |
+
kernel_regularizer=self._kernel_regularizer,
|
| 324 |
+
bias_regularizer=self._bias_regularizer,
|
| 325 |
+
activity_regularizer=self._activity_regularizer,
|
| 326 |
+
kernel_constraint=self._kernel_constraint,
|
| 327 |
+
bias_constraint=self._bias_constraint,
|
| 328 |
+
dtype=self.dtype_policy,
|
| 329 |
+
)
|
| 330 |
+
# Create new clone of kernel/bias initializer, so that we don't reuse
|
| 331 |
+
# the initializer instance, which could lead to same init value since
|
| 332 |
+
# initializer is stateless.
|
| 333 |
+
kernel_initializer = self._kernel_initializer.__class__.from_config(
|
| 334 |
+
self._kernel_initializer.get_config()
|
| 335 |
+
)
|
| 336 |
+
bias_initializer = self._bias_initializer.__class__.from_config(
|
| 337 |
+
self._bias_initializer.get_config()
|
| 338 |
+
)
|
| 339 |
+
common_kwargs["kernel_initializer"] = kernel_initializer
|
| 340 |
+
common_kwargs["bias_initializer"] = bias_initializer
|
| 341 |
+
return common_kwargs
|
| 342 |
+
|
| 343 |
+
def _make_output_dense(self, query_shape, common_kwargs, name=None):
|
| 344 |
+
"""Builds the output projection matrix.
|
| 345 |
+
|
| 346 |
+
Args:
|
| 347 |
+
free_dims: Number of free dimensions for einsum equation building.
|
| 348 |
+
common_kwargs: Common keyword arguments for einsum layer.
|
| 349 |
+
name: Name for the projection layer.
|
| 350 |
+
|
| 351 |
+
Returns:
|
| 352 |
+
Projection layer.
|
| 353 |
+
"""
|
| 354 |
+
query_rank = len(query_shape)
|
| 355 |
+
if self._output_shape:
|
| 356 |
+
output_shape = self._output_shape
|
| 357 |
+
else:
|
| 358 |
+
output_shape = [query_shape[-1]]
|
| 359 |
+
einsum_equation, bias_axes, output_rank = _build_proj_equation(
|
| 360 |
+
query_rank - 1, bound_dims=2, output_dims=len(output_shape)
|
| 361 |
+
)
|
| 362 |
+
return EinsumDense(
|
| 363 |
+
einsum_equation,
|
| 364 |
+
output_shape=_get_output_shape(output_rank - 1, output_shape),
|
| 365 |
+
bias_axes=bias_axes if self._use_bias else None,
|
| 366 |
+
name=name,
|
| 367 |
+
**common_kwargs,
|
| 368 |
+
)
|
| 369 |
+
|
| 370 |
+
def _build_attention(self, rank):
|
| 371 |
+
"""Builds multi-head dot-product attention computations.
|
| 372 |
+
|
| 373 |
+
This function builds attributes necessary for `_compute_attention` to
|
| 374 |
+
customize attention computation to replace the default dot-product
|
| 375 |
+
attention.
|
| 376 |
+
|
| 377 |
+
Args:
|
| 378 |
+
rank: the rank of query, key, value tensors.
|
| 379 |
+
"""
|
| 380 |
+
if self._attention_axes is None:
|
| 381 |
+
self._attention_axes = tuple(range(1, rank - 2))
|
| 382 |
+
else:
|
| 383 |
+
self._attention_axes = tuple(self._attention_axes)
|
| 384 |
+
(
|
| 385 |
+
self._dot_product_equation,
|
| 386 |
+
self._combine_equation,
|
| 387 |
+
attn_scores_rank,
|
| 388 |
+
) = _build_attention_equation(rank, attn_axes=self._attention_axes)
|
| 389 |
+
norm_axes = tuple(
|
| 390 |
+
range(
|
| 391 |
+
attn_scores_rank - len(self._attention_axes), attn_scores_rank
|
| 392 |
+
)
|
| 393 |
+
)
|
| 394 |
+
self._softmax = Softmax(axis=norm_axes, dtype=self.dtype_policy)
|
| 395 |
+
self._dropout_layer = Dropout(
|
| 396 |
+
rate=self._dropout, dtype=self.dtype_policy, seed=self.seed
|
| 397 |
+
)
|
| 398 |
+
|
| 399 |
+
def _masked_softmax(self, attention_scores, attention_mask=None):
|
| 400 |
+
# Normalize the attention scores to probabilities.
|
| 401 |
+
# attention_scores = [B, N, T, S]
|
| 402 |
+
if attention_mask is not None:
|
| 403 |
+
# The expand dim happens starting from the `num_heads` dimension,
|
| 404 |
+
# (<batch_dims>, num_heads, <query_attention_dims,
|
| 405 |
+
# key_attention_dims>)
|
| 406 |
+
mask_expansion_axis = -len(self._attention_axes) * 2 - 1
|
| 407 |
+
for _ in range(
|
| 408 |
+
len(attention_scores.shape) - len(attention_mask.shape)
|
| 409 |
+
):
|
| 410 |
+
attention_mask = ops.expand_dims(
|
| 411 |
+
attention_mask, axis=mask_expansion_axis
|
| 412 |
+
)
|
| 413 |
+
return self._softmax(attention_scores, mask=attention_mask)
|
| 414 |
+
|
| 415 |
+
def _compute_attention(
|
| 416 |
+
self,
|
| 417 |
+
query,
|
| 418 |
+
key,
|
| 419 |
+
value,
|
| 420 |
+
attention_mask=None,
|
| 421 |
+
training=None,
|
| 422 |
+
):
|
| 423 |
+
"""Applies Dot-product attention with query, key, value tensors.
|
| 424 |
+
|
| 425 |
+
This function defines the computation inside `call` with projected
|
| 426 |
+
multi-head Q, K, V inputs. Users can override this function for
|
| 427 |
+
customized attention implementation.
|
| 428 |
+
|
| 429 |
+
Args:
|
| 430 |
+
query: Projected query tensor of shape `(B, T, N, key_dim)`.
|
| 431 |
+
key: Projected key tensor of shape `(B, S, N, key_dim)`.
|
| 432 |
+
value: Projected value tensor of shape `(B, S, N, value_dim)`.
|
| 433 |
+
attention_mask: a boolean mask of shape `(B, T, S)`, that prevents
|
| 434 |
+
attention to certain positions. It is generally not needed if
|
| 435 |
+
the `query` and `value` (and/or `key`) are masked.
|
| 436 |
+
training: Python boolean indicating whether the layer should behave
|
| 437 |
+
in training mode (adding dropout) or in inference mode (doing
|
| 438 |
+
nothing).
|
| 439 |
+
|
| 440 |
+
Returns:
|
| 441 |
+
attention_output: Multi-headed outputs of attention computation.
|
| 442 |
+
attention_scores: Multi-headed attention weights.
|
| 443 |
+
"""
|
| 444 |
+
# Check for flash attention constraints
|
| 445 |
+
if self._flash_attention and self._return_attention_scores:
|
| 446 |
+
raise ValueError(
|
| 447 |
+
"Returning attention scores is not supported when flash "
|
| 448 |
+
"attention is enabled. Please disable flash attention to access"
|
| 449 |
+
" attention scores."
|
| 450 |
+
)
|
| 451 |
+
|
| 452 |
+
# Determine whether to use dot-product attention
|
| 453 |
+
use_dot_product_attention = not (
|
| 454 |
+
self._dropout > 0.0
|
| 455 |
+
or self._return_attention_scores
|
| 456 |
+
or (len(query.shape) != 4)
|
| 457 |
+
)
|
| 458 |
+
|
| 459 |
+
if use_dot_product_attention:
|
| 460 |
+
if attention_mask is not None:
|
| 461 |
+
# Ensure attention_mask has the correct shape for broadcasting
|
| 462 |
+
# Expected shape: [batch_size, num_heads, query_seq_len,
|
| 463 |
+
# key_seq_len].
|
| 464 |
+
mask_expansion_axis = -len(self._attention_axes) * 2 - 1
|
| 465 |
+
len_attention_scores_shape = 4 # Only accepts 4D inputs
|
| 466 |
+
for _ in range(
|
| 467 |
+
len_attention_scores_shape - len(attention_mask.shape)
|
| 468 |
+
):
|
| 469 |
+
attention_mask = ops.expand_dims(
|
| 470 |
+
attention_mask, axis=mask_expansion_axis
|
| 471 |
+
)
|
| 472 |
+
attention_mask = ops.cast(attention_mask, dtype="bool")
|
| 473 |
+
# Directly compute the attention output using dot-product attention
|
| 474 |
+
attention_output = ops.dot_product_attention(
|
| 475 |
+
query=query,
|
| 476 |
+
key=key,
|
| 477 |
+
value=value,
|
| 478 |
+
bias=None,
|
| 479 |
+
mask=attention_mask,
|
| 480 |
+
scale=self._inverse_sqrt_key_dim,
|
| 481 |
+
is_causal=False,
|
| 482 |
+
flash_attention=self._flash_attention,
|
| 483 |
+
)
|
| 484 |
+
return attention_output, None
|
| 485 |
+
|
| 486 |
+
# Default behavior without flash attention, with explicit attention
|
| 487 |
+
# scores
|
| 488 |
+
query = ops.multiply(
|
| 489 |
+
query, ops.cast(self._inverse_sqrt_key_dim, query.dtype)
|
| 490 |
+
)
|
| 491 |
+
|
| 492 |
+
# Take the dot product between "query" and "key" to get the raw
|
| 493 |
+
# attention scores.
|
| 494 |
+
attention_scores = ops.einsum(self._dot_product_equation, key, query)
|
| 495 |
+
|
| 496 |
+
# Apply the mask using the custom masked softmax
|
| 497 |
+
attention_scores = self._masked_softmax(
|
| 498 |
+
attention_scores, attention_mask
|
| 499 |
+
)
|
| 500 |
+
|
| 501 |
+
# Apply dropout to the attention scores if needed
|
| 502 |
+
if self._dropout > 0.0:
|
| 503 |
+
final_attn_scores = self._dropout_layer(
|
| 504 |
+
attention_scores, training=training
|
| 505 |
+
)
|
| 506 |
+
else:
|
| 507 |
+
final_attn_scores = attention_scores
|
| 508 |
+
|
| 509 |
+
# `context_layer` = [B, T, N, H]
|
| 510 |
+
attention_output = ops.einsum(
|
| 511 |
+
self._combine_equation, final_attn_scores, value
|
| 512 |
+
)
|
| 513 |
+
return attention_output, attention_scores
|
| 514 |
+
|
| 515 |
+
def call(
|
| 516 |
+
self,
|
| 517 |
+
query,
|
| 518 |
+
value,
|
| 519 |
+
key=None,
|
| 520 |
+
query_mask=None,
|
| 521 |
+
value_mask=None,
|
| 522 |
+
key_mask=None,
|
| 523 |
+
attention_mask=None,
|
| 524 |
+
return_attention_scores=False,
|
| 525 |
+
training=None,
|
| 526 |
+
use_causal_mask=False,
|
| 527 |
+
):
|
| 528 |
+
self._return_attention_scores = return_attention_scores
|
| 529 |
+
if key is None:
|
| 530 |
+
key = value
|
| 531 |
+
|
| 532 |
+
# Delete the masks because the masks are handled at the level of the
|
| 533 |
+
# layer
|
| 534 |
+
query_mask = backend.get_keras_mask(query)
|
| 535 |
+
backend.set_keras_mask(query, None)
|
| 536 |
+
backend.set_keras_mask(value, None)
|
| 537 |
+
backend.set_keras_mask(key, None)
|
| 538 |
+
|
| 539 |
+
attention_mask = self._compute_attention_mask(
|
| 540 |
+
query,
|
| 541 |
+
value,
|
| 542 |
+
query_mask=query_mask,
|
| 543 |
+
value_mask=value_mask,
|
| 544 |
+
key_mask=key_mask,
|
| 545 |
+
attention_mask=attention_mask,
|
| 546 |
+
use_causal_mask=use_causal_mask,
|
| 547 |
+
)
|
| 548 |
+
# N = `num_attention_heads`
|
| 549 |
+
# H = `size_per_head`
|
| 550 |
+
|
| 551 |
+
# `query` = [B, T, N, H]
|
| 552 |
+
query = self._query_dense(query)
|
| 553 |
+
|
| 554 |
+
# `key` = [B, S, N, H]
|
| 555 |
+
key = self._key_dense(key)
|
| 556 |
+
|
| 557 |
+
# `value` = [B, S, N, H]
|
| 558 |
+
value = self._value_dense(value)
|
| 559 |
+
attention_output, attention_scores = self._compute_attention(
|
| 560 |
+
query,
|
| 561 |
+
key,
|
| 562 |
+
value,
|
| 563 |
+
attention_mask,
|
| 564 |
+
training,
|
| 565 |
+
)
|
| 566 |
+
attention_output = self._output_dense(attention_output)
|
| 567 |
+
|
| 568 |
+
# Set mask on output if needed
|
| 569 |
+
if query_mask is not None:
|
| 570 |
+
backend.set_keras_mask(attention_output, query_mask)
|
| 571 |
+
|
| 572 |
+
if return_attention_scores:
|
| 573 |
+
return attention_output, attention_scores
|
| 574 |
+
return attention_output
|
| 575 |
+
|
| 576 |
+
def _compute_attention_mask(
|
| 577 |
+
self,
|
| 578 |
+
query,
|
| 579 |
+
value,
|
| 580 |
+
query_mask=None,
|
| 581 |
+
value_mask=None,
|
| 582 |
+
key_mask=None,
|
| 583 |
+
attention_mask=None,
|
| 584 |
+
use_causal_mask=False,
|
| 585 |
+
):
|
| 586 |
+
"""Computes the attention mask, using the Keras masks of the inputs.
|
| 587 |
+
|
| 588 |
+
* The `query`'s mask is reshaped from [B, T] to [B, T, 1].
|
| 589 |
+
* The `value`'s mask is reshaped from [B, S] to [B, 1, S].
|
| 590 |
+
* The `key`'s mask is reshaped from [B, S] to [B, 1, S]. The `key`'s
|
| 591 |
+
mask is ignored if `key` is `None` or if `key is value`.
|
| 592 |
+
* If `use_causal_mask=True`, then the causal mask is computed. Its shape
|
| 593 |
+
is [1, T, S].
|
| 594 |
+
|
| 595 |
+
All defined masks are merged using a logical AND operation (`&`).
|
| 596 |
+
|
| 597 |
+
In general, if the `query` and `value` are masked, then there is no need
|
| 598 |
+
to define the `attention_mask`.
|
| 599 |
+
|
| 600 |
+
Args:
|
| 601 |
+
query: Projected query tensor of shape `(B, T, N, key_dim)`.
|
| 602 |
+
key: Projected key tensor of shape `(B, T, N, key_dim)`.
|
| 603 |
+
value: Projected value tensor of shape `(B, T, N, value_dim)`.
|
| 604 |
+
attention_mask: a boolean mask of shape `(B, T, S)`, that prevents
|
| 605 |
+
attention to certain positions.
|
| 606 |
+
use_causal_mask: A boolean to indicate whether to apply a causal
|
| 607 |
+
mask to prevent tokens from attending to future tokens (e.g.,
|
| 608 |
+
used in a decoder Transformer).
|
| 609 |
+
|
| 610 |
+
Returns:
|
| 611 |
+
attention_mask: a boolean mask of shape `(B, T, S)`, that prevents
|
| 612 |
+
attention to certain positions, based on the Keras masks of the
|
| 613 |
+
`query`, `key`, `value`, and `attention_mask` tensors, and the
|
| 614 |
+
causal mask if `use_causal_mask=True`.
|
| 615 |
+
"""
|
| 616 |
+
auto_mask = None
|
| 617 |
+
if query_mask is not None:
|
| 618 |
+
query_mask = ops.cast(query_mask, "bool") # defensive casting
|
| 619 |
+
# B = batch size, T = max query length
|
| 620 |
+
auto_mask = ops.expand_dims(query_mask, -1) # shape is [B, T, 1]
|
| 621 |
+
if value_mask is not None:
|
| 622 |
+
value_mask = ops.cast(value_mask, "bool") # defensive casting
|
| 623 |
+
# B = batch size, S == max value length
|
| 624 |
+
mask = ops.expand_dims(value_mask, -2) # shape is [B, 1, S]
|
| 625 |
+
auto_mask = mask if auto_mask is None else auto_mask & mask
|
| 626 |
+
if key_mask is not None:
|
| 627 |
+
key_mask = ops.cast(key_mask, "bool") # defensive casting
|
| 628 |
+
# B == batch size, S == max key length == max value length
|
| 629 |
+
mask = ops.expand_dims(key_mask, -2) # shape is [B, 1, S]
|
| 630 |
+
auto_mask = mask if auto_mask is None else auto_mask & mask
|
| 631 |
+
if use_causal_mask:
|
| 632 |
+
# the shape of the causal mask is [1, T, S]
|
| 633 |
+
mask = self._compute_causal_mask(query, value)
|
| 634 |
+
auto_mask = mask if auto_mask is None else auto_mask & mask
|
| 635 |
+
|
| 636 |
+
if attention_mask is not None:
|
| 637 |
+
attention_mask = ops.cast(attention_mask, "bool")
|
| 638 |
+
if auto_mask is not None:
|
| 639 |
+
# merge attention_mask & automatic mask, to shape [B, T, S]
|
| 640 |
+
attention_mask = (
|
| 641 |
+
auto_mask
|
| 642 |
+
if attention_mask is None
|
| 643 |
+
else attention_mask & auto_mask
|
| 644 |
+
)
|
| 645 |
+
return attention_mask
|
| 646 |
+
|
| 647 |
+
def _compute_causal_mask(self, query, value=None):
|
| 648 |
+
"""Computes a causal mask (e.g., for masked self-attention layers).
|
| 649 |
+
|
| 650 |
+
For example, if query and value both contain sequences of length 4,
|
| 651 |
+
this function returns a boolean tensor equal to:
|
| 652 |
+
|
| 653 |
+
```
|
| 654 |
+
[[[True, False, False, False],
|
| 655 |
+
[True, True, False, False],
|
| 656 |
+
[True, True, True, False],
|
| 657 |
+
[True, True, True, True]]]
|
| 658 |
+
```
|
| 659 |
+
|
| 660 |
+
Args:
|
| 661 |
+
query: query tensor of shape `(B, T, ...)`.
|
| 662 |
+
value: value tensor of shape `(B, S, ...)` (optional, defaults to
|
| 663 |
+
query).
|
| 664 |
+
|
| 665 |
+
Returns:
|
| 666 |
+
mask: a boolean tensor of shape `(1, T, S)` containing a lower
|
| 667 |
+
triangular matrix of shape `(T, S)`.
|
| 668 |
+
"""
|
| 669 |
+
q_seq_length = ops.shape(query)[1]
|
| 670 |
+
v_seq_length = q_seq_length if value is None else ops.shape(value)[1]
|
| 671 |
+
ones_mask = ops.ones((1, q_seq_length, v_seq_length), dtype="int32")
|
| 672 |
+
row_index = ops.cumsum(ones_mask, axis=-2)
|
| 673 |
+
col_index = ops.cumsum(ones_mask, axis=-1)
|
| 674 |
+
return ops.greater_equal(row_index, col_index)
|
| 675 |
+
|
| 676 |
+
def compute_output_shape(
|
| 677 |
+
self,
|
| 678 |
+
query_shape,
|
| 679 |
+
value_shape,
|
| 680 |
+
key_shape=None,
|
| 681 |
+
):
|
| 682 |
+
query_shape = tuple(query_shape)
|
| 683 |
+
value_shape = tuple(value_shape)
|
| 684 |
+
if key_shape is None:
|
| 685 |
+
key_shape = value_shape
|
| 686 |
+
else:
|
| 687 |
+
key_shape = tuple(key_shape)
|
| 688 |
+
|
| 689 |
+
if value_shape[1:-1] != key_shape[1:-1]:
|
| 690 |
+
raise ValueError(
|
| 691 |
+
"All dimensions of `value` and `key`, except the last one, "
|
| 692 |
+
f"must be equal. Received: value_shape={value_shape} and "
|
| 693 |
+
f"key_shape={key_shape}"
|
| 694 |
+
)
|
| 695 |
+
if self._output_shape:
|
| 696 |
+
query_shape = query_shape[:-1] + self._output_shape
|
| 697 |
+
return query_shape
|
| 698 |
+
|
| 699 |
+
def compute_output_spec(
|
| 700 |
+
self,
|
| 701 |
+
query,
|
| 702 |
+
value,
|
| 703 |
+
key=None,
|
| 704 |
+
query_mask=None,
|
| 705 |
+
value_mask=None,
|
| 706 |
+
key_mask=None,
|
| 707 |
+
attention_mask=None,
|
| 708 |
+
return_attention_scores=False,
|
| 709 |
+
training=None,
|
| 710 |
+
use_causal_mask=False,
|
| 711 |
+
):
|
| 712 |
+
if key is not None:
|
| 713 |
+
key_shape = key.shape
|
| 714 |
+
else:
|
| 715 |
+
key_shape = None
|
| 716 |
+
output_shape = self.compute_output_shape(
|
| 717 |
+
query.shape, value.shape, key_shape
|
| 718 |
+
)
|
| 719 |
+
output_spec = backend.KerasTensor(
|
| 720 |
+
output_shape, dtype=self.compute_dtype
|
| 721 |
+
)
|
| 722 |
+
if return_attention_scores:
|
| 723 |
+
length = query.shape[1]
|
| 724 |
+
attention_shape = (query.shape[0], self.num_heads, length, length)
|
| 725 |
+
return output_spec, backend.KerasTensor(
|
| 726 |
+
attention_shape, dtype=self.compute_dtype
|
| 727 |
+
)
|
| 728 |
+
return output_spec
|
| 729 |
+
|
| 730 |
+
|
| 731 |
+
def _index_to_einsum_variable(i):
|
| 732 |
+
"""Converts an index to a einsum variable name.
|
| 733 |
+
|
| 734 |
+
We simply map indices to lowercase characters, e.g. 0 -> 'a', 1 -> 'b'.
|
| 735 |
+
"""
|
| 736 |
+
return string.ascii_lowercase[i]
|
| 737 |
+
|
| 738 |
+
|
| 739 |
+
def _build_attention_equation(rank, attn_axes):
|
| 740 |
+
"""Builds einsum equations for the attention computation.
|
| 741 |
+
|
| 742 |
+
Query, key, value inputs after projection are expected to have the shape as:
|
| 743 |
+
`(bs, <non-attention dims>, <attention dims>, num_heads, channels)`.
|
| 744 |
+
`bs` and `<non-attention dims>` are treated as `<batch dims>`.
|
| 745 |
+
|
| 746 |
+
The attention operations can be generalized:
|
| 747 |
+
1. Query-key dot product:
|
| 748 |
+
(<batch dims>, <query attention dims>, num_heads, channels),
|
| 749 |
+
(<batch dims>, <key attention dims>, num_heads, channels) ->
|
| 750 |
+
(<batch dims>, num_heads, <query attention dims>, <key attention dims>)
|
| 751 |
+
2. Combination:
|
| 752 |
+
(<batch dims>, num_heads, <query attention dims>, <key attention dims>),
|
| 753 |
+
(<batch dims>, <value attention dims>, num_heads, channels) -> (<batch
|
| 754 |
+
dims>, <query attention dims>, num_heads, channels)
|
| 755 |
+
|
| 756 |
+
Args:
|
| 757 |
+
rank: Rank of query, key, value tensors.
|
| 758 |
+
attn_axes: List/tuple of axes, `[-1, rank)`,
|
| 759 |
+
that attention will be applied to.
|
| 760 |
+
|
| 761 |
+
Returns:
|
| 762 |
+
Einsum equations.
|
| 763 |
+
"""
|
| 764 |
+
target_notation = ""
|
| 765 |
+
for i in range(rank):
|
| 766 |
+
target_notation += _index_to_einsum_variable(i)
|
| 767 |
+
# `batch_dims` includes the head dim.
|
| 768 |
+
batch_dims = tuple(np.delete(range(rank), attn_axes + (rank - 1,)))
|
| 769 |
+
letter_offset = rank
|
| 770 |
+
source_notation = ""
|
| 771 |
+
for i in range(rank):
|
| 772 |
+
if i in batch_dims or i == rank - 1:
|
| 773 |
+
source_notation += target_notation[i]
|
| 774 |
+
else:
|
| 775 |
+
source_notation += _index_to_einsum_variable(letter_offset)
|
| 776 |
+
letter_offset += 1
|
| 777 |
+
|
| 778 |
+
product_notation = "".join(
|
| 779 |
+
[target_notation[i] for i in batch_dims]
|
| 780 |
+
+ [target_notation[i] for i in attn_axes]
|
| 781 |
+
+ [source_notation[i] for i in attn_axes]
|
| 782 |
+
)
|
| 783 |
+
dot_product_equation = "%s,%s->%s" % (
|
| 784 |
+
source_notation,
|
| 785 |
+
target_notation,
|
| 786 |
+
product_notation,
|
| 787 |
+
)
|
| 788 |
+
attn_scores_rank = len(product_notation)
|
| 789 |
+
combine_equation = "%s,%s->%s" % (
|
| 790 |
+
product_notation,
|
| 791 |
+
source_notation,
|
| 792 |
+
target_notation,
|
| 793 |
+
)
|
| 794 |
+
return dot_product_equation, combine_equation, attn_scores_rank
|
| 795 |
+
|
| 796 |
+
|
| 797 |
+
def _build_proj_equation(free_dims, bound_dims, output_dims):
|
| 798 |
+
"""Builds an einsum equation for projections inside multi-head attention."""
|
| 799 |
+
input_str = ""
|
| 800 |
+
kernel_str = ""
|
| 801 |
+
output_str = ""
|
| 802 |
+
bias_axes = ""
|
| 803 |
+
letter_offset = 0
|
| 804 |
+
for i in range(free_dims):
|
| 805 |
+
char = _index_to_einsum_variable(i + letter_offset)
|
| 806 |
+
input_str += char
|
| 807 |
+
output_str += char
|
| 808 |
+
|
| 809 |
+
letter_offset += free_dims
|
| 810 |
+
for i in range(bound_dims):
|
| 811 |
+
char = _index_to_einsum_variable(i + letter_offset)
|
| 812 |
+
input_str += char
|
| 813 |
+
kernel_str += char
|
| 814 |
+
|
| 815 |
+
letter_offset += bound_dims
|
| 816 |
+
for i in range(output_dims):
|
| 817 |
+
char = _index_to_einsum_variable(i + letter_offset)
|
| 818 |
+
kernel_str += char
|
| 819 |
+
output_str += char
|
| 820 |
+
bias_axes += char
|
| 821 |
+
equation = f"{input_str},{kernel_str}->{output_str}"
|
| 822 |
+
|
| 823 |
+
return equation, bias_axes, len(output_str)
|
| 824 |
+
|
| 825 |
+
|
| 826 |
+
def _get_output_shape(output_rank, known_last_dims):
|
| 827 |
+
return [None] * (output_rank - len(known_last_dims)) + list(known_last_dims)
|
SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/__init__.py
ADDED
|
File without changes
|
SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/__pycache__/__init__.cpython-310.pyc
ADDED
|
Binary file (206 Bytes). View file
|
|
|
SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/__pycache__/base_conv.cpython-310.pyc
ADDED
|
Binary file (13 kB). View file
|
|
|
SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/__pycache__/base_conv_transpose.cpython-310.pyc
ADDED
|
Binary file (7.97 kB). View file
|
|
|
SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/__pycache__/base_depthwise_conv.cpython-310.pyc
ADDED
|
Binary file (8.77 kB). View file
|
|
|
SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/__pycache__/base_separable_conv.cpython-310.pyc
ADDED
|
Binary file (9.09 kB). View file
|
|
|
SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/__pycache__/conv1d.cpython-310.pyc
ADDED
|
Binary file (6.86 kB). View file
|
|
|
SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/__pycache__/conv1d_transpose.cpython-310.pyc
ADDED
|
Binary file (5.54 kB). View file
|
|
|
SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/__pycache__/conv2d.cpython-310.pyc
ADDED
|
Binary file (5.65 kB). View file
|
|
|
SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/__pycache__/conv2d_transpose.cpython-310.pyc
ADDED
|
Binary file (5.66 kB). View file
|
|
|
SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/__pycache__/conv3d.cpython-310.pyc
ADDED
|
Binary file (5.88 kB). View file
|
|
|
SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/__pycache__/conv3d_transpose.cpython-310.pyc
ADDED
|
Binary file (5.86 kB). View file
|
|
|
SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/__pycache__/depthwise_conv1d.cpython-310.pyc
ADDED
|
Binary file (5.96 kB). View file
|
|
|
SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/__pycache__/depthwise_conv2d.cpython-310.pyc
ADDED
|
Binary file (6.06 kB). View file
|
|
|
SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/__pycache__/separable_conv1d.cpython-310.pyc
ADDED
|
Binary file (6.21 kB). View file
|
|
|
SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/__pycache__/separable_conv2d.cpython-310.pyc
ADDED
|
Binary file (6.29 kB). View file
|
|
|
SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/base_conv.py
ADDED
|
@@ -0,0 +1,401 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Keras base class for convolution layers."""
|
| 2 |
+
|
| 3 |
+
from keras.src import activations
|
| 4 |
+
from keras.src import constraints
|
| 5 |
+
from keras.src import initializers
|
| 6 |
+
from keras.src import ops
|
| 7 |
+
from keras.src import regularizers
|
| 8 |
+
from keras.src.backend import standardize_data_format
|
| 9 |
+
from keras.src.layers.input_spec import InputSpec
|
| 10 |
+
from keras.src.layers.layer import Layer
|
| 11 |
+
from keras.src.ops.operation_utils import compute_conv_output_shape
|
| 12 |
+
from keras.src.utils.argument_validation import standardize_padding
|
| 13 |
+
from keras.src.utils.argument_validation import standardize_tuple
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
class BaseConv(Layer):
|
| 17 |
+
"""Abstract N-D convolution layer (private, used as implementation base).
|
| 18 |
+
|
| 19 |
+
This layer creates a convolution kernel that is convolved (actually
|
| 20 |
+
cross-correlated) with the layer input to produce a tensor of outputs. If
|
| 21 |
+
`use_bias` is True (and a `bias_initializer` is provided), a bias vector is
|
| 22 |
+
created and added to the outputs. Finally, if `activation` is not `None`, it
|
| 23 |
+
is applied to the outputs as well.
|
| 24 |
+
|
| 25 |
+
Note: layer attributes cannot be modified after the layer has been called
|
| 26 |
+
once (except the `trainable` attribute).
|
| 27 |
+
|
| 28 |
+
Args:
|
| 29 |
+
rank: int, the rank of the convolution, e.g. 2 for 2D convolution.
|
| 30 |
+
filters: int, the dimension of the output space (the number of filters
|
| 31 |
+
in the convolution).
|
| 32 |
+
kernel_size: int or tuple/list of `rank` integers, specifying the size
|
| 33 |
+
of the convolution window.
|
| 34 |
+
strides: int or tuple/list of `rank` integers, specifying the stride
|
| 35 |
+
length of the convolution. If only one int is specified, the same
|
| 36 |
+
stride size will be used for all dimensions. `strides > 1` is
|
| 37 |
+
incompatible with `dilation_rate > 1`.
|
| 38 |
+
padding: string, either `"valid"` or `"same"` (case-insensitive).
|
| 39 |
+
`"valid"` means no padding. `"same"` results in padding evenly to
|
| 40 |
+
the left/right or up/down of the input. When `padding="same"` and
|
| 41 |
+
`strides=1`, the output has the same size as the input.
|
| 42 |
+
data_format: string, either `"channels_last"` or `"channels_first"`.
|
| 43 |
+
The ordering of the dimensions in the inputs. `"channels_last"`
|
| 44 |
+
corresponds to inputs with shape `(batch, steps, features)`
|
| 45 |
+
while `"channels_first"` corresponds to inputs with shape
|
| 46 |
+
`(batch, features, steps)`. It defaults to the `image_data_format`
|
| 47 |
+
value found in your Keras config file at `~/.keras/keras.json`.
|
| 48 |
+
If you never set it, then it will be `"channels_last"`.
|
| 49 |
+
dilation_rate: int or tuple/list of `rank` integers, specifying the
|
| 50 |
+
dilation rate to use for dilated convolution. If only one int is
|
| 51 |
+
specified, the same dilation rate will be used for all dimensions.
|
| 52 |
+
groups: A positive int specifying the number of groups in which the
|
| 53 |
+
input is split along the channel axis. Each group is convolved
|
| 54 |
+
separately with `filters // groups` filters. The output is the
|
| 55 |
+
concatenation of all the `groups` results along the channel axis.
|
| 56 |
+
Input channels and `filters` must both be divisible by `groups`.
|
| 57 |
+
activation: Activation function. If `None`, no activation is applied.
|
| 58 |
+
use_bias: bool, if `True`, bias will be added to the output.
|
| 59 |
+
kernel_initializer: Initializer for the convolution kernel. If `None`,
|
| 60 |
+
the default initializer (`"glorot_uniform"`) will be used.
|
| 61 |
+
bias_initializer: Initializer for the bias vector. If `None`, the
|
| 62 |
+
default initializer (`"zeros"`) will be used.
|
| 63 |
+
kernel_regularizer: Optional regularizer for the convolution kernel.
|
| 64 |
+
bias_regularizer: Optional regularizer for the bias vector.
|
| 65 |
+
activity_regularizer: Optional regularizer function for the output.
|
| 66 |
+
kernel_constraint: Optional projection function to be applied to the
|
| 67 |
+
kernel after being updated by an `Optimizer` (e.g. used to implement
|
| 68 |
+
norm constraints or value constraints for layer weights). The
|
| 69 |
+
function must take as input the unprojected variable and must return
|
| 70 |
+
the projected variable (which must have the same shape). Constraints
|
| 71 |
+
are not safe to use when doing asynchronous distributed training.
|
| 72 |
+
bias_constraint: Optional projection function to be applied to the
|
| 73 |
+
bias after being updated by an `Optimizer`.
|
| 74 |
+
lora_rank: Optional integer. If set, the layer's forward pass
|
| 75 |
+
will implement LoRA (Low-Rank Adaptation)
|
| 76 |
+
with the provided rank. LoRA sets the layer's kernel
|
| 77 |
+
to non-trainable and replaces it with a delta over the
|
| 78 |
+
original kernel, obtained via multiplying two lower-rank
|
| 79 |
+
trainable matrices. This can be useful to reduce the
|
| 80 |
+
computation cost of fine-tuning large dense layers.
|
| 81 |
+
You can also enable LoRA on an existing layer by calling
|
| 82 |
+
`layer.enable_lora(rank)`.
|
| 83 |
+
"""
|
| 84 |
+
|
| 85 |
+
def __init__(
|
| 86 |
+
self,
|
| 87 |
+
rank,
|
| 88 |
+
filters,
|
| 89 |
+
kernel_size,
|
| 90 |
+
strides=1,
|
| 91 |
+
padding="valid",
|
| 92 |
+
data_format=None,
|
| 93 |
+
dilation_rate=1,
|
| 94 |
+
groups=1,
|
| 95 |
+
activation=None,
|
| 96 |
+
use_bias=True,
|
| 97 |
+
kernel_initializer="glorot_uniform",
|
| 98 |
+
bias_initializer="zeros",
|
| 99 |
+
kernel_regularizer=None,
|
| 100 |
+
bias_regularizer=None,
|
| 101 |
+
activity_regularizer=None,
|
| 102 |
+
kernel_constraint=None,
|
| 103 |
+
bias_constraint=None,
|
| 104 |
+
lora_rank=None,
|
| 105 |
+
**kwargs,
|
| 106 |
+
):
|
| 107 |
+
super().__init__(activity_regularizer=activity_regularizer, **kwargs)
|
| 108 |
+
self.rank = rank
|
| 109 |
+
self.filters = filters
|
| 110 |
+
self.groups = groups
|
| 111 |
+
self.kernel_size = standardize_tuple(kernel_size, rank, "kernel_size")
|
| 112 |
+
self.strides = standardize_tuple(strides, rank, "strides")
|
| 113 |
+
self.dilation_rate = standardize_tuple(
|
| 114 |
+
dilation_rate, rank, "dilation_rate"
|
| 115 |
+
)
|
| 116 |
+
self.padding = standardize_padding(padding, allow_causal=rank == 1)
|
| 117 |
+
self.data_format = standardize_data_format(data_format)
|
| 118 |
+
self.activation = activations.get(activation)
|
| 119 |
+
self.use_bias = use_bias
|
| 120 |
+
self.kernel_initializer = initializers.get(kernel_initializer)
|
| 121 |
+
self.bias_initializer = initializers.get(bias_initializer)
|
| 122 |
+
self.kernel_regularizer = regularizers.get(kernel_regularizer)
|
| 123 |
+
self.bias_regularizer = regularizers.get(bias_regularizer)
|
| 124 |
+
self.kernel_constraint = constraints.get(kernel_constraint)
|
| 125 |
+
self.bias_constraint = constraints.get(bias_constraint)
|
| 126 |
+
self.lora_rank = lora_rank
|
| 127 |
+
self.lora_enabled = False
|
| 128 |
+
self.input_spec = InputSpec(min_ndim=self.rank + 2)
|
| 129 |
+
self.data_format = self.data_format
|
| 130 |
+
|
| 131 |
+
if self.filters is not None and self.filters <= 0:
|
| 132 |
+
raise ValueError(
|
| 133 |
+
"Invalid value for argument `filters`. Expected a strictly "
|
| 134 |
+
f"positive value. Received filters={self.filters}."
|
| 135 |
+
)
|
| 136 |
+
|
| 137 |
+
if self.groups <= 0:
|
| 138 |
+
raise ValueError(
|
| 139 |
+
"The number of groups must be a positive integer. "
|
| 140 |
+
f"Received: groups={self.groups}."
|
| 141 |
+
)
|
| 142 |
+
|
| 143 |
+
if self.filters is not None and self.filters % self.groups != 0:
|
| 144 |
+
raise ValueError(
|
| 145 |
+
"The number of filters must be evenly divisible by the "
|
| 146 |
+
f"number of groups. Received: groups={self.groups}, "
|
| 147 |
+
f"filters={self.filters}."
|
| 148 |
+
)
|
| 149 |
+
|
| 150 |
+
if not all(self.kernel_size):
|
| 151 |
+
raise ValueError(
|
| 152 |
+
"The argument `kernel_size` cannot contain 0. Received "
|
| 153 |
+
f"kernel_size={self.kernel_size}."
|
| 154 |
+
)
|
| 155 |
+
|
| 156 |
+
if not all(self.strides):
|
| 157 |
+
raise ValueError(
|
| 158 |
+
"The argument `strides` cannot contains 0. Received "
|
| 159 |
+
f"strides={self.strides}"
|
| 160 |
+
)
|
| 161 |
+
|
| 162 |
+
if max(self.strides) > 1 and max(self.dilation_rate) > 1:
|
| 163 |
+
raise ValueError(
|
| 164 |
+
"`strides > 1` not supported in conjunction with "
|
| 165 |
+
f"`dilation_rate > 1`. Received: strides={self.strides} and "
|
| 166 |
+
f"dilation_rate={self.dilation_rate}"
|
| 167 |
+
)
|
| 168 |
+
|
| 169 |
+
def build(self, input_shape):
|
| 170 |
+
if self.data_format == "channels_last":
|
| 171 |
+
channel_axis = -1
|
| 172 |
+
input_channel = input_shape[-1]
|
| 173 |
+
else:
|
| 174 |
+
channel_axis = 1
|
| 175 |
+
input_channel = input_shape[1]
|
| 176 |
+
self.input_spec = InputSpec(
|
| 177 |
+
min_ndim=self.rank + 2, axes={channel_axis: input_channel}
|
| 178 |
+
)
|
| 179 |
+
if input_channel % self.groups != 0:
|
| 180 |
+
raise ValueError(
|
| 181 |
+
"The number of input channels must be evenly divisible by "
|
| 182 |
+
f"the number of groups. Received groups={self.groups}, but the "
|
| 183 |
+
f"input has {input_channel} channels (full input shape is "
|
| 184 |
+
f"{input_shape})."
|
| 185 |
+
)
|
| 186 |
+
kernel_shape = self.kernel_size + (
|
| 187 |
+
input_channel // self.groups,
|
| 188 |
+
self.filters,
|
| 189 |
+
)
|
| 190 |
+
|
| 191 |
+
# compute_output_shape contains some validation logic for the input
|
| 192 |
+
# shape, and make sure the output shape has all positive dimensions.
|
| 193 |
+
self.compute_output_shape(input_shape)
|
| 194 |
+
|
| 195 |
+
self._kernel = self.add_weight(
|
| 196 |
+
name="kernel",
|
| 197 |
+
shape=kernel_shape,
|
| 198 |
+
initializer=self.kernel_initializer,
|
| 199 |
+
regularizer=self.kernel_regularizer,
|
| 200 |
+
constraint=self.kernel_constraint,
|
| 201 |
+
trainable=True,
|
| 202 |
+
dtype=self.dtype,
|
| 203 |
+
)
|
| 204 |
+
if self.use_bias:
|
| 205 |
+
self.bias = self.add_weight(
|
| 206 |
+
name="bias",
|
| 207 |
+
shape=(self.filters,),
|
| 208 |
+
initializer=self.bias_initializer,
|
| 209 |
+
regularizer=self.bias_regularizer,
|
| 210 |
+
constraint=self.bias_constraint,
|
| 211 |
+
trainable=True,
|
| 212 |
+
dtype=self.dtype,
|
| 213 |
+
)
|
| 214 |
+
else:
|
| 215 |
+
self.bias = None
|
| 216 |
+
self.built = True
|
| 217 |
+
if self.lora_rank:
|
| 218 |
+
self.enable_lora(self.lora_rank)
|
| 219 |
+
|
| 220 |
+
@property
|
| 221 |
+
def kernel(self):
|
| 222 |
+
if not self.built:
|
| 223 |
+
raise AttributeError(
|
| 224 |
+
"You must build the layer before accessing `kernel`."
|
| 225 |
+
)
|
| 226 |
+
if self.lora_enabled:
|
| 227 |
+
return self._kernel + ops.matmul(
|
| 228 |
+
self.lora_kernel_a, self.lora_kernel_b
|
| 229 |
+
)
|
| 230 |
+
return self._kernel
|
| 231 |
+
|
| 232 |
+
def convolution_op(self, inputs, kernel):
|
| 233 |
+
return ops.conv(
|
| 234 |
+
inputs,
|
| 235 |
+
kernel,
|
| 236 |
+
strides=list(self.strides),
|
| 237 |
+
padding=self.padding,
|
| 238 |
+
dilation_rate=self.dilation_rate,
|
| 239 |
+
data_format=self.data_format,
|
| 240 |
+
)
|
| 241 |
+
|
| 242 |
+
def call(self, inputs):
|
| 243 |
+
outputs = self.convolution_op(
|
| 244 |
+
inputs,
|
| 245 |
+
self.kernel,
|
| 246 |
+
)
|
| 247 |
+
if self.use_bias:
|
| 248 |
+
if self.data_format == "channels_last":
|
| 249 |
+
bias_shape = (1,) * (self.rank + 1) + (self.filters,)
|
| 250 |
+
else:
|
| 251 |
+
bias_shape = (1, self.filters) + (1,) * self.rank
|
| 252 |
+
bias = ops.reshape(self.bias, bias_shape)
|
| 253 |
+
outputs = ops.add(outputs, bias)
|
| 254 |
+
|
| 255 |
+
if self.activation is not None:
|
| 256 |
+
return self.activation(outputs)
|
| 257 |
+
return outputs
|
| 258 |
+
|
| 259 |
+
def compute_output_shape(self, input_shape):
|
| 260 |
+
return compute_conv_output_shape(
|
| 261 |
+
input_shape,
|
| 262 |
+
self.filters,
|
| 263 |
+
self.kernel_size,
|
| 264 |
+
strides=self.strides,
|
| 265 |
+
padding=self.padding,
|
| 266 |
+
data_format=self.data_format,
|
| 267 |
+
dilation_rate=self.dilation_rate,
|
| 268 |
+
)
|
| 269 |
+
|
| 270 |
+
def enable_lora(
|
| 271 |
+
self, rank, a_initializer="he_uniform", b_initializer="zeros"
|
| 272 |
+
):
|
| 273 |
+
if self.kernel_constraint:
|
| 274 |
+
raise ValueError(
|
| 275 |
+
"Lora is incompatible with kernel constraints. "
|
| 276 |
+
"In order to enable lora on this layer, remove the "
|
| 277 |
+
"`kernel_constraint` argument."
|
| 278 |
+
)
|
| 279 |
+
if not self.built:
|
| 280 |
+
raise ValueError(
|
| 281 |
+
"Cannot enable lora on a layer that isn't yet built."
|
| 282 |
+
)
|
| 283 |
+
if self.lora_enabled:
|
| 284 |
+
raise ValueError(
|
| 285 |
+
"lora is already enabled. "
|
| 286 |
+
"This can only be done once per layer."
|
| 287 |
+
)
|
| 288 |
+
self._tracker.unlock()
|
| 289 |
+
self.lora_kernel_a = self.add_weight(
|
| 290 |
+
name="lora_kernel_a",
|
| 291 |
+
shape=self._kernel.shape[:-1] + (rank,),
|
| 292 |
+
initializer=initializers.get(a_initializer),
|
| 293 |
+
regularizer=self.kernel_regularizer,
|
| 294 |
+
)
|
| 295 |
+
self.lora_kernel_b = self.add_weight(
|
| 296 |
+
name="lora_kernel_b",
|
| 297 |
+
shape=(rank, self.filters),
|
| 298 |
+
initializer=initializers.get(b_initializer),
|
| 299 |
+
regularizer=self.kernel_regularizer,
|
| 300 |
+
)
|
| 301 |
+
self._kernel.trainable = False
|
| 302 |
+
self._tracker.lock()
|
| 303 |
+
self.lora_enabled = True
|
| 304 |
+
self.lora_rank = rank
|
| 305 |
+
|
| 306 |
+
def save_own_variables(self, store):
|
| 307 |
+
# Do nothing if the layer isn't yet built
|
| 308 |
+
if not self.built:
|
| 309 |
+
return
|
| 310 |
+
target_variables = [self.kernel]
|
| 311 |
+
if self.use_bias:
|
| 312 |
+
target_variables.append(self.bias)
|
| 313 |
+
for i, variable in enumerate(target_variables):
|
| 314 |
+
store[str(i)] = variable
|
| 315 |
+
|
| 316 |
+
def load_own_variables(self, store):
|
| 317 |
+
if not self.lora_enabled:
|
| 318 |
+
self._check_load_own_variables(store)
|
| 319 |
+
# Do nothing if the layer isn't yet built
|
| 320 |
+
if not self.built:
|
| 321 |
+
return
|
| 322 |
+
target_variables = [self._kernel]
|
| 323 |
+
if self.use_bias:
|
| 324 |
+
target_variables.append(self.bias)
|
| 325 |
+
for i, variable in enumerate(target_variables):
|
| 326 |
+
variable.assign(store[str(i)])
|
| 327 |
+
if self.lora_enabled:
|
| 328 |
+
self.lora_kernel_a.assign(ops.zeros(self.lora_kernel_a.shape))
|
| 329 |
+
self.lora_kernel_b.assign(ops.zeros(self.lora_kernel_b.shape))
|
| 330 |
+
|
| 331 |
+
def get_config(self):
|
| 332 |
+
config = super().get_config()
|
| 333 |
+
config.update(
|
| 334 |
+
{
|
| 335 |
+
"filters": self.filters,
|
| 336 |
+
"kernel_size": self.kernel_size,
|
| 337 |
+
"strides": self.strides,
|
| 338 |
+
"padding": self.padding,
|
| 339 |
+
"data_format": self.data_format,
|
| 340 |
+
"dilation_rate": self.dilation_rate,
|
| 341 |
+
"groups": self.groups,
|
| 342 |
+
"activation": activations.serialize(self.activation),
|
| 343 |
+
"use_bias": self.use_bias,
|
| 344 |
+
"kernel_initializer": initializers.serialize(
|
| 345 |
+
self.kernel_initializer
|
| 346 |
+
),
|
| 347 |
+
"bias_initializer": initializers.serialize(
|
| 348 |
+
self.bias_initializer
|
| 349 |
+
),
|
| 350 |
+
"kernel_regularizer": regularizers.serialize(
|
| 351 |
+
self.kernel_regularizer
|
| 352 |
+
),
|
| 353 |
+
"bias_regularizer": regularizers.serialize(
|
| 354 |
+
self.bias_regularizer
|
| 355 |
+
),
|
| 356 |
+
"activity_regularizer": regularizers.serialize(
|
| 357 |
+
self.activity_regularizer
|
| 358 |
+
),
|
| 359 |
+
"kernel_constraint": constraints.serialize(
|
| 360 |
+
self.kernel_constraint
|
| 361 |
+
),
|
| 362 |
+
"bias_constraint": constraints.serialize(self.bias_constraint),
|
| 363 |
+
}
|
| 364 |
+
)
|
| 365 |
+
if self.lora_rank:
|
| 366 |
+
config["lora_rank"] = self.lora_rank
|
| 367 |
+
return config
|
| 368 |
+
|
| 369 |
+
def _check_load_own_variables(self, store):
|
| 370 |
+
all_vars = self._trainable_variables + self._non_trainable_variables
|
| 371 |
+
if len(store.keys()) != len(all_vars):
|
| 372 |
+
if len(all_vars) == 0 and not self.built:
|
| 373 |
+
raise ValueError(
|
| 374 |
+
f"Layer '{self.name}' was never built "
|
| 375 |
+
"and thus it doesn't have any variables. "
|
| 376 |
+
f"However the weights file lists {len(store.keys())} "
|
| 377 |
+
"variables for this layer.\n"
|
| 378 |
+
"In most cases, this error indicates that either:\n\n"
|
| 379 |
+
"1. The layer is owned by a parent layer that "
|
| 380 |
+
"implements a `build()` method, but calling the "
|
| 381 |
+
"parent's `build()` method did NOT create the state of "
|
| 382 |
+
f"the child layer '{self.name}'. A `build()` method "
|
| 383 |
+
"must create ALL state for the layer, including "
|
| 384 |
+
"the state of any children layers.\n\n"
|
| 385 |
+
"2. You need to implement "
|
| 386 |
+
"the `def build_from_config(self, config)` method "
|
| 387 |
+
f"on layer '{self.name}', to specify how to rebuild "
|
| 388 |
+
"it during loading. "
|
| 389 |
+
"In this case, you might also want to implement the "
|
| 390 |
+
"method that generates the build config at saving time, "
|
| 391 |
+
"`def get_build_config(self)`. "
|
| 392 |
+
"The method `build_from_config()` is meant "
|
| 393 |
+
"to create the state "
|
| 394 |
+
"of the layer (i.e. its variables) upon deserialization.",
|
| 395 |
+
)
|
| 396 |
+
raise ValueError(
|
| 397 |
+
f"Layer '{self.name}' expected {len(all_vars)} variables, "
|
| 398 |
+
"but received "
|
| 399 |
+
f"{len(store.keys())} variables during loading. "
|
| 400 |
+
f"Expected: {[v.name for v in all_vars]}"
|
| 401 |
+
)
|
SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/base_conv_transpose.py
ADDED
|
@@ -0,0 +1,259 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Keras base class for transpose convolution layers."""
|
| 2 |
+
|
| 3 |
+
from keras.src import activations
|
| 4 |
+
from keras.src import constraints
|
| 5 |
+
from keras.src import initializers
|
| 6 |
+
from keras.src import ops
|
| 7 |
+
from keras.src import regularizers
|
| 8 |
+
from keras.src.backend import standardize_data_format
|
| 9 |
+
from keras.src.backend.common.backend_utils import (
|
| 10 |
+
compute_conv_transpose_output_shape,
|
| 11 |
+
)
|
| 12 |
+
from keras.src.layers.input_spec import InputSpec
|
| 13 |
+
from keras.src.layers.layer import Layer
|
| 14 |
+
from keras.src.utils.argument_validation import standardize_padding
|
| 15 |
+
from keras.src.utils.argument_validation import standardize_tuple
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
class BaseConvTranspose(Layer):
|
| 19 |
+
"""Abstract N-D transposed convolution layer.
|
| 20 |
+
|
| 21 |
+
The need for transposed convolutions generally arises from the desire to use
|
| 22 |
+
a transformation going in the opposite direction of a normal convolution,
|
| 23 |
+
i.e., from something that has the shape of the output of some convolution to
|
| 24 |
+
something that has the shape of its input while maintaining a connectivity
|
| 25 |
+
pattern that is compatible with said convolution.
|
| 26 |
+
|
| 27 |
+
Args:
|
| 28 |
+
rank: int, the rank of the transposed convolution, e.g. 2 for 2D
|
| 29 |
+
transposed convolution.
|
| 30 |
+
filters: int, the dimension of the output space (the number of filters
|
| 31 |
+
in the transposed convolution).
|
| 32 |
+
kernel_size: int or tuple/list of `rank` integers, specifying the size
|
| 33 |
+
of the transposed convolution window.
|
| 34 |
+
strides: int or tuple/list of `rank` integers, specifying the stride
|
| 35 |
+
length of the transposed convolution. If only one int is specified,
|
| 36 |
+
the same stride size will be used for all dimensions.
|
| 37 |
+
`strides > 1` is incompatible with `dilation_rate > 1`.
|
| 38 |
+
padding: string, either `"valid"` or `"same"` (case-insensitive).
|
| 39 |
+
`"valid"` means no padding. `"same"` results in padding evenly to
|
| 40 |
+
the left/right or up/down of the input such that output has the same
|
| 41 |
+
height/width dimension as the input.
|
| 42 |
+
data_format: string, either `"channels_last"` or `"channels_first"`.
|
| 43 |
+
The ordering of the dimensions in the inputs. `"channels_last"`
|
| 44 |
+
corresponds to inputs with shape `(batch, steps, features)`
|
| 45 |
+
while `"channels_first"` corresponds to inputs with shape
|
| 46 |
+
`(batch, features, steps)`. It defaults to the `image_data_format`
|
| 47 |
+
value found in your Keras config file at `~/.keras/keras.json`.
|
| 48 |
+
If you never set it, then it will be `"channels_last"`.
|
| 49 |
+
dilation_rate: int or tuple/list of `rank` integers, specifying the
|
| 50 |
+
dilation rate to use for dilated convolution. If only one int is
|
| 51 |
+
specified, the same dilation rate will be used for all dimensions.
|
| 52 |
+
activation: Activation function. If `None`, no activation is applied.
|
| 53 |
+
use_bias: bool, if `True`, bias will be added to the output.
|
| 54 |
+
kernel_initializer: Initializer for the convolution kernel. If `None`,
|
| 55 |
+
the default initializer (`"glorot_uniform"`) will be used.
|
| 56 |
+
bias_initializer: Initializer for the bias vector. If `None`, the
|
| 57 |
+
default initializer (`"zeros"`) will be used.
|
| 58 |
+
kernel_regularizer: Optional regularizer for the convolution kernel.
|
| 59 |
+
bias_regularizer: Optional regularizer for the bias vector.
|
| 60 |
+
activity_regularizer: Optional regularizer function for the output.
|
| 61 |
+
kernel_constraint: Optional projection function to be applied to the
|
| 62 |
+
kernel after being updated by an `Optimizer` (e.g. used to implement
|
| 63 |
+
norm constraints or value constraints for layer weights). The
|
| 64 |
+
function must take as input the unprojected variable and must return
|
| 65 |
+
the projected variable (which must have the same shape). Constraints
|
| 66 |
+
are not safe to use when doing asynchronous distributed training.
|
| 67 |
+
bias_constraint: Optional projection function to be applied to the
|
| 68 |
+
bias after being updated by an `Optimizer`.
|
| 69 |
+
"""
|
| 70 |
+
|
| 71 |
+
def __init__(
|
| 72 |
+
self,
|
| 73 |
+
rank,
|
| 74 |
+
filters,
|
| 75 |
+
kernel_size,
|
| 76 |
+
strides=1,
|
| 77 |
+
padding="valid",
|
| 78 |
+
output_padding=None,
|
| 79 |
+
data_format=None,
|
| 80 |
+
dilation_rate=1,
|
| 81 |
+
activation=None,
|
| 82 |
+
use_bias=True,
|
| 83 |
+
kernel_initializer="glorot_uniform",
|
| 84 |
+
bias_initializer="zeros",
|
| 85 |
+
kernel_regularizer=None,
|
| 86 |
+
bias_regularizer=None,
|
| 87 |
+
activity_regularizer=None,
|
| 88 |
+
kernel_constraint=None,
|
| 89 |
+
bias_constraint=None,
|
| 90 |
+
trainable=True,
|
| 91 |
+
name=None,
|
| 92 |
+
**kwargs,
|
| 93 |
+
):
|
| 94 |
+
super().__init__(
|
| 95 |
+
trainable=trainable,
|
| 96 |
+
name=name,
|
| 97 |
+
activity_regularizer=activity_regularizer,
|
| 98 |
+
**kwargs,
|
| 99 |
+
)
|
| 100 |
+
self.rank = rank
|
| 101 |
+
self.filters = filters
|
| 102 |
+
self.kernel_size = standardize_tuple(kernel_size, rank, "kernel_size")
|
| 103 |
+
self.strides = standardize_tuple(strides, rank, "strides")
|
| 104 |
+
self.dilation_rate = standardize_tuple(
|
| 105 |
+
dilation_rate, rank, "dilation_rate"
|
| 106 |
+
)
|
| 107 |
+
self.padding = standardize_padding(padding)
|
| 108 |
+
if output_padding is None:
|
| 109 |
+
self.output_padding = None
|
| 110 |
+
else:
|
| 111 |
+
self.output_padding = standardize_tuple(
|
| 112 |
+
output_padding,
|
| 113 |
+
rank,
|
| 114 |
+
"output_padding",
|
| 115 |
+
)
|
| 116 |
+
self.data_format = standardize_data_format(data_format)
|
| 117 |
+
self.activation = activations.get(activation)
|
| 118 |
+
self.use_bias = use_bias
|
| 119 |
+
self.kernel_initializer = initializers.get(kernel_initializer)
|
| 120 |
+
self.bias_initializer = initializers.get(bias_initializer)
|
| 121 |
+
self.kernel_regularizer = regularizers.get(kernel_regularizer)
|
| 122 |
+
self.bias_regularizer = regularizers.get(bias_regularizer)
|
| 123 |
+
self.kernel_constraint = constraints.get(kernel_constraint)
|
| 124 |
+
self.bias_constraint = constraints.get(bias_constraint)
|
| 125 |
+
self.input_spec = InputSpec(min_ndim=self.rank + 2)
|
| 126 |
+
self.data_format = self.data_format
|
| 127 |
+
|
| 128 |
+
if self.filters is not None and self.filters <= 0:
|
| 129 |
+
raise ValueError(
|
| 130 |
+
"Invalid value for argument `filters`. Expected a strictly "
|
| 131 |
+
f"positive value. Received filters={self.filters}."
|
| 132 |
+
)
|
| 133 |
+
|
| 134 |
+
if not all(self.kernel_size):
|
| 135 |
+
raise ValueError(
|
| 136 |
+
"The argument `kernel_size` cannot contain 0. Received "
|
| 137 |
+
f"kernel_size={self.kernel_size}."
|
| 138 |
+
)
|
| 139 |
+
|
| 140 |
+
if not all(self.strides):
|
| 141 |
+
raise ValueError(
|
| 142 |
+
"The argument `strides` cannot contains 0. Received "
|
| 143 |
+
f"strides={self.strides}."
|
| 144 |
+
)
|
| 145 |
+
|
| 146 |
+
if max(self.strides) > 1 and max(self.dilation_rate) > 1:
|
| 147 |
+
raise ValueError(
|
| 148 |
+
"`strides > 1` not supported in conjunction with "
|
| 149 |
+
f"`dilation_rate > 1`. Received: strides={self.strides} and "
|
| 150 |
+
f"dilation_rate={self.dilation_rate}"
|
| 151 |
+
)
|
| 152 |
+
|
| 153 |
+
def build(self, input_shape):
|
| 154 |
+
if self.data_format == "channels_last":
|
| 155 |
+
channel_axis = -1
|
| 156 |
+
input_channel = input_shape[-1]
|
| 157 |
+
else:
|
| 158 |
+
channel_axis = 1
|
| 159 |
+
input_channel = input_shape[1]
|
| 160 |
+
self.input_spec = InputSpec(
|
| 161 |
+
min_ndim=self.rank + 2, axes={channel_axis: input_channel}
|
| 162 |
+
)
|
| 163 |
+
kernel_shape = self.kernel_size + (
|
| 164 |
+
self.filters,
|
| 165 |
+
input_channel,
|
| 166 |
+
)
|
| 167 |
+
|
| 168 |
+
self.kernel = self.add_weight(
|
| 169 |
+
name="kernel",
|
| 170 |
+
shape=kernel_shape,
|
| 171 |
+
initializer=self.kernel_initializer,
|
| 172 |
+
regularizer=self.kernel_regularizer,
|
| 173 |
+
constraint=self.kernel_constraint,
|
| 174 |
+
trainable=True,
|
| 175 |
+
dtype=self.dtype,
|
| 176 |
+
)
|
| 177 |
+
if self.use_bias:
|
| 178 |
+
self.bias = self.add_weight(
|
| 179 |
+
name="bias",
|
| 180 |
+
shape=(self.filters,),
|
| 181 |
+
initializer=self.bias_initializer,
|
| 182 |
+
regularizer=self.bias_regularizer,
|
| 183 |
+
constraint=self.bias_constraint,
|
| 184 |
+
trainable=True,
|
| 185 |
+
dtype=self.dtype,
|
| 186 |
+
)
|
| 187 |
+
else:
|
| 188 |
+
self.bias = None
|
| 189 |
+
self.built = True
|
| 190 |
+
|
| 191 |
+
def call(self, inputs):
|
| 192 |
+
outputs = ops.conv_transpose(
|
| 193 |
+
inputs,
|
| 194 |
+
self.kernel,
|
| 195 |
+
strides=list(self.strides),
|
| 196 |
+
padding=self.padding,
|
| 197 |
+
output_padding=self.output_padding,
|
| 198 |
+
dilation_rate=self.dilation_rate,
|
| 199 |
+
data_format=self.data_format,
|
| 200 |
+
)
|
| 201 |
+
|
| 202 |
+
if self.use_bias:
|
| 203 |
+
if self.data_format == "channels_last":
|
| 204 |
+
bias_shape = (1,) * (self.rank + 1) + (self.filters,)
|
| 205 |
+
else:
|
| 206 |
+
bias_shape = (1, self.filters) + (1,) * self.rank
|
| 207 |
+
bias = ops.reshape(self.bias, bias_shape)
|
| 208 |
+
outputs = ops.add(outputs, bias)
|
| 209 |
+
|
| 210 |
+
if self.activation is not None:
|
| 211 |
+
return self.activation(outputs)
|
| 212 |
+
return outputs
|
| 213 |
+
|
| 214 |
+
def compute_output_shape(self, input_shape):
|
| 215 |
+
return compute_conv_transpose_output_shape(
|
| 216 |
+
input_shape,
|
| 217 |
+
self.kernel_size,
|
| 218 |
+
self.filters,
|
| 219 |
+
strides=self.strides,
|
| 220 |
+
padding=self.padding,
|
| 221 |
+
output_padding=self.output_padding,
|
| 222 |
+
data_format=self.data_format,
|
| 223 |
+
dilation_rate=self.dilation_rate,
|
| 224 |
+
)
|
| 225 |
+
|
| 226 |
+
def get_config(self):
|
| 227 |
+
config = super().get_config()
|
| 228 |
+
config.update(
|
| 229 |
+
{
|
| 230 |
+
"filters": self.filters,
|
| 231 |
+
"kernel_size": self.kernel_size,
|
| 232 |
+
"strides": self.strides,
|
| 233 |
+
"padding": self.padding,
|
| 234 |
+
"data_format": self.data_format,
|
| 235 |
+
"dilation_rate": self.dilation_rate,
|
| 236 |
+
"activation": activations.serialize(self.activation),
|
| 237 |
+
"use_bias": self.use_bias,
|
| 238 |
+
"kernel_initializer": initializers.serialize(
|
| 239 |
+
self.kernel_initializer
|
| 240 |
+
),
|
| 241 |
+
"bias_initializer": initializers.serialize(
|
| 242 |
+
self.bias_initializer
|
| 243 |
+
),
|
| 244 |
+
"kernel_regularizer": regularizers.serialize(
|
| 245 |
+
self.kernel_regularizer
|
| 246 |
+
),
|
| 247 |
+
"bias_regularizer": regularizers.serialize(
|
| 248 |
+
self.bias_regularizer
|
| 249 |
+
),
|
| 250 |
+
"activity_regularizer": regularizers.serialize(
|
| 251 |
+
self.activity_regularizer
|
| 252 |
+
),
|
| 253 |
+
"kernel_constraint": constraints.serialize(
|
| 254 |
+
self.kernel_constraint
|
| 255 |
+
),
|
| 256 |
+
"bias_constraint": constraints.serialize(self.bias_constraint),
|
| 257 |
+
}
|
| 258 |
+
)
|
| 259 |
+
return config
|
SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/base_depthwise_conv.py
ADDED
|
@@ -0,0 +1,274 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Keras base class for depthwise convolution layers."""
|
| 2 |
+
|
| 3 |
+
from keras.src import activations
|
| 4 |
+
from keras.src import constraints
|
| 5 |
+
from keras.src import initializers
|
| 6 |
+
from keras.src import ops
|
| 7 |
+
from keras.src import regularizers
|
| 8 |
+
from keras.src.backend import standardize_data_format
|
| 9 |
+
from keras.src.layers.input_spec import InputSpec
|
| 10 |
+
from keras.src.layers.layer import Layer
|
| 11 |
+
from keras.src.ops.operation_utils import compute_conv_output_shape
|
| 12 |
+
from keras.src.utils.argument_validation import standardize_padding
|
| 13 |
+
from keras.src.utils.argument_validation import standardize_tuple
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
class BaseDepthwiseConv(Layer):
|
| 17 |
+
"""Abstract N-D depthwise convolution layer.
|
| 18 |
+
|
| 19 |
+
Depthwise convolution is a type of convolution in which each input channel
|
| 20 |
+
is convolved with a different kernel (called a depthwise kernel). You can
|
| 21 |
+
understand depthwise convolution as the first step in a depthwise separable
|
| 22 |
+
convolution.
|
| 23 |
+
|
| 24 |
+
It is implemented via the following steps:
|
| 25 |
+
|
| 26 |
+
- Split the input into individual channels.
|
| 27 |
+
- Convolve each channel with an individual depthwise kernel with
|
| 28 |
+
`depth_multiplier` output channels.
|
| 29 |
+
- Concatenate the convolved outputs along the channels axis.
|
| 30 |
+
|
| 31 |
+
Unlike a regular convolution, depthwise convolution does not mix information
|
| 32 |
+
across different input channels.
|
| 33 |
+
|
| 34 |
+
The `depth_multiplier` argument determines how many filter are applied to
|
| 35 |
+
one input channel. As such, it controls the amount of output channels that
|
| 36 |
+
are generated per input channel in the depthwise step.
|
| 37 |
+
|
| 38 |
+
|
| 39 |
+
Args:
|
| 40 |
+
rank: int, the rank of the convolution, e.g. 2 for 2D convolution.
|
| 41 |
+
depth_multiplier: The number of depthwise convolution output channels
|
| 42 |
+
for each input channel. The total number of depthwise convolution
|
| 43 |
+
output channels will be equal to `input_channel * depth_multiplier`.
|
| 44 |
+
kernel_size: int or tuple/list of `rank` integers, specifying the size
|
| 45 |
+
of the depthwise convolution window.
|
| 46 |
+
strides: int or tuple/list of `rank` integers, specifying the stride
|
| 47 |
+
length of the depthwise convolution. If only one int is specified,
|
| 48 |
+
the same stride size will be used for all dimensions.
|
| 49 |
+
`strides > 1` is incompatible with `dilation_rate > 1`.
|
| 50 |
+
padding: string, either `"valid"` or `"same"` (case-insensitive).
|
| 51 |
+
`"valid"` means no padding. `"same"` results in padding evenly to
|
| 52 |
+
the left/right or up/down of the input. When `padding="same"` and
|
| 53 |
+
`strides=1`, the output has the same size as the input.
|
| 54 |
+
data_format: string, either `"channels_last"` or `"channels_first"`.
|
| 55 |
+
The ordering of the dimensions in the inputs. `"channels_last"`
|
| 56 |
+
corresponds to inputs with shape `(batch, steps, features)`
|
| 57 |
+
while `"channels_first"` corresponds to inputs with shape
|
| 58 |
+
`(batch, features, steps)`. It defaults to the `image_data_format`
|
| 59 |
+
value found in your Keras config file at `~/.keras/keras.json`.
|
| 60 |
+
If you never set it, then it will be `"channels_last"`.
|
| 61 |
+
dilation_rate: int or tuple/list of `rank` integers, specifying the
|
| 62 |
+
dilation rate to use for dilated convolution. If only one int is
|
| 63 |
+
specified, the same dilation rate will be used for all dimensions.
|
| 64 |
+
activation: Activation function. If `None`, no activation is applied.
|
| 65 |
+
use_bias: bool, if `True`, bias will be added to the output.
|
| 66 |
+
depthwise_initializer: Initializer for the depthwsie convolution
|
| 67 |
+
kernel. If `None`, the default initializer (`"glorot_uniform"`)
|
| 68 |
+
will be used.
|
| 69 |
+
bias_initializer: Initializer for the bias vector. If `None`, the
|
| 70 |
+
default initializer (`"zeros"`) will be used.
|
| 71 |
+
depthwise_regularizer: Optional regularizer for the convolution kernel.
|
| 72 |
+
bias_regularizer: Optional regularizer for the bias vector.
|
| 73 |
+
activity_regularizer: Optional regularizer function for the output.
|
| 74 |
+
depthwise_constraint: Optional projection function to be applied to the
|
| 75 |
+
kernel after being updated by an `Optimizer` (e.g. used to implement
|
| 76 |
+
norm constraints or value constraints for layer weights). The
|
| 77 |
+
function must take as input the unprojected variable and must return
|
| 78 |
+
the projected variable (which must have the same shape). Constraints
|
| 79 |
+
are not safe to use when doing asynchronous distributed training.
|
| 80 |
+
bias_constraint: Optional projection function to be applied to the
|
| 81 |
+
bias after being updated by an `Optimizer`.
|
| 82 |
+
"""
|
| 83 |
+
|
| 84 |
+
def __init__(
|
| 85 |
+
self,
|
| 86 |
+
rank,
|
| 87 |
+
depth_multiplier,
|
| 88 |
+
kernel_size,
|
| 89 |
+
strides=1,
|
| 90 |
+
padding="valid",
|
| 91 |
+
data_format=None,
|
| 92 |
+
dilation_rate=1,
|
| 93 |
+
activation=None,
|
| 94 |
+
use_bias=True,
|
| 95 |
+
depthwise_initializer="glorot_uniform",
|
| 96 |
+
bias_initializer="zeros",
|
| 97 |
+
depthwise_regularizer=None,
|
| 98 |
+
bias_regularizer=None,
|
| 99 |
+
activity_regularizer=None,
|
| 100 |
+
depthwise_constraint=None,
|
| 101 |
+
bias_constraint=None,
|
| 102 |
+
trainable=True,
|
| 103 |
+
name=None,
|
| 104 |
+
**kwargs,
|
| 105 |
+
):
|
| 106 |
+
super().__init__(
|
| 107 |
+
trainable=trainable,
|
| 108 |
+
name=name,
|
| 109 |
+
activity_regularizer=regularizers.get(activity_regularizer),
|
| 110 |
+
**kwargs,
|
| 111 |
+
)
|
| 112 |
+
self.rank = rank
|
| 113 |
+
self.depth_multiplier = depth_multiplier
|
| 114 |
+
self.kernel_size = standardize_tuple(kernel_size, rank, "kernel_size")
|
| 115 |
+
self.strides = standardize_tuple(strides, rank, "strides")
|
| 116 |
+
self.dilation_rate = standardize_tuple(
|
| 117 |
+
dilation_rate, rank, "dilation_rate"
|
| 118 |
+
)
|
| 119 |
+
self.padding = standardize_padding(padding)
|
| 120 |
+
self.data_format = standardize_data_format(data_format)
|
| 121 |
+
self.activation = activations.get(activation)
|
| 122 |
+
self.use_bias = use_bias
|
| 123 |
+
self.depthwise_initializer = initializers.get(depthwise_initializer)
|
| 124 |
+
self.bias_initializer = initializers.get(bias_initializer)
|
| 125 |
+
self.depthwise_regularizer = regularizers.get(depthwise_regularizer)
|
| 126 |
+
self.bias_regularizer = regularizers.get(bias_regularizer)
|
| 127 |
+
self.depthwise_constraint = constraints.get(depthwise_constraint)
|
| 128 |
+
self.bias_constraint = constraints.get(bias_constraint)
|
| 129 |
+
self.input_spec = InputSpec(min_ndim=self.rank + 2)
|
| 130 |
+
self.data_format = self.data_format
|
| 131 |
+
|
| 132 |
+
if self.depth_multiplier is not None and self.depth_multiplier <= 0:
|
| 133 |
+
raise ValueError(
|
| 134 |
+
"Invalid value for argument `depth_multiplier`. Expected a "
|
| 135 |
+
"strictly positive value. Received "
|
| 136 |
+
f"depth_multiplier={self.depth_multiplier}."
|
| 137 |
+
)
|
| 138 |
+
|
| 139 |
+
if not all(self.kernel_size):
|
| 140 |
+
raise ValueError(
|
| 141 |
+
"The argument `kernel_size` cannot contain 0. Received "
|
| 142 |
+
f"kernel_size={self.kernel_size}."
|
| 143 |
+
)
|
| 144 |
+
|
| 145 |
+
if not all(self.strides):
|
| 146 |
+
raise ValueError(
|
| 147 |
+
"The argument `strides` cannot contains 0. Received "
|
| 148 |
+
f"strides={self.strides}"
|
| 149 |
+
)
|
| 150 |
+
|
| 151 |
+
if max(self.strides) > 1 and max(self.dilation_rate) > 1:
|
| 152 |
+
raise ValueError(
|
| 153 |
+
"`strides > 1` not supported in conjunction with "
|
| 154 |
+
f"`dilation_rate > 1`. Received: strides={self.strides} and "
|
| 155 |
+
f"dilation_rate={self.dilation_rate}"
|
| 156 |
+
)
|
| 157 |
+
|
| 158 |
+
def build(self, input_shape):
|
| 159 |
+
if self.data_format == "channels_last":
|
| 160 |
+
channel_axis = -1
|
| 161 |
+
input_channel = input_shape[-1]
|
| 162 |
+
else:
|
| 163 |
+
channel_axis = 1
|
| 164 |
+
input_channel = input_shape[1]
|
| 165 |
+
self.input_spec = InputSpec(
|
| 166 |
+
min_ndim=self.rank + 2, axes={channel_axis: input_channel}
|
| 167 |
+
)
|
| 168 |
+
depthwise_shape = self.kernel_size + (
|
| 169 |
+
input_channel,
|
| 170 |
+
self.depth_multiplier,
|
| 171 |
+
)
|
| 172 |
+
self.kernel = self.add_weight(
|
| 173 |
+
name="kernel",
|
| 174 |
+
shape=depthwise_shape,
|
| 175 |
+
initializer=self.depthwise_initializer,
|
| 176 |
+
regularizer=self.depthwise_regularizer,
|
| 177 |
+
constraint=self.depthwise_constraint,
|
| 178 |
+
trainable=True,
|
| 179 |
+
dtype=self.dtype,
|
| 180 |
+
)
|
| 181 |
+
if self.use_bias:
|
| 182 |
+
self.bias = self.add_weight(
|
| 183 |
+
name="bias",
|
| 184 |
+
shape=(self.depth_multiplier * input_channel,),
|
| 185 |
+
initializer=self.bias_initializer,
|
| 186 |
+
regularizer=self.bias_regularizer,
|
| 187 |
+
constraint=self.bias_constraint,
|
| 188 |
+
trainable=True,
|
| 189 |
+
dtype=self.dtype,
|
| 190 |
+
)
|
| 191 |
+
else:
|
| 192 |
+
self.bias = None
|
| 193 |
+
self.built = True
|
| 194 |
+
|
| 195 |
+
def _get_input_channel(self, input_shape):
|
| 196 |
+
if self.data_format == "channels_last":
|
| 197 |
+
input_channel = input_shape[-1]
|
| 198 |
+
else:
|
| 199 |
+
input_channel = input_shape[1]
|
| 200 |
+
return input_channel
|
| 201 |
+
|
| 202 |
+
def call(self, inputs):
|
| 203 |
+
input_channel = self._get_input_channel(inputs.shape)
|
| 204 |
+
outputs = ops.depthwise_conv(
|
| 205 |
+
inputs,
|
| 206 |
+
self.kernel,
|
| 207 |
+
strides=self.strides,
|
| 208 |
+
padding=self.padding,
|
| 209 |
+
dilation_rate=self.dilation_rate,
|
| 210 |
+
data_format=self.data_format,
|
| 211 |
+
)
|
| 212 |
+
|
| 213 |
+
if self.use_bias:
|
| 214 |
+
if self.data_format == "channels_last":
|
| 215 |
+
bias_shape = (1,) * (self.rank + 1) + (
|
| 216 |
+
self.depth_multiplier * input_channel,
|
| 217 |
+
)
|
| 218 |
+
else:
|
| 219 |
+
bias_shape = (1, self.depth_multiplier * input_channel) + (
|
| 220 |
+
1,
|
| 221 |
+
) * self.rank
|
| 222 |
+
bias = ops.reshape(self.bias, bias_shape)
|
| 223 |
+
outputs = ops.add(outputs, bias)
|
| 224 |
+
|
| 225 |
+
if self.activation is not None:
|
| 226 |
+
return self.activation(outputs)
|
| 227 |
+
return outputs
|
| 228 |
+
|
| 229 |
+
def compute_output_shape(self, input_shape):
|
| 230 |
+
input_channel = self._get_input_channel(input_shape)
|
| 231 |
+
return compute_conv_output_shape(
|
| 232 |
+
input_shape,
|
| 233 |
+
self.depth_multiplier * input_channel,
|
| 234 |
+
self.kernel_size,
|
| 235 |
+
strides=self.strides,
|
| 236 |
+
padding=self.padding,
|
| 237 |
+
data_format=self.data_format,
|
| 238 |
+
dilation_rate=self.dilation_rate,
|
| 239 |
+
)
|
| 240 |
+
|
| 241 |
+
def get_config(self):
|
| 242 |
+
config = super().get_config()
|
| 243 |
+
config.update(
|
| 244 |
+
{
|
| 245 |
+
"depth_multiplier": self.depth_multiplier,
|
| 246 |
+
"kernel_size": self.kernel_size,
|
| 247 |
+
"strides": self.strides,
|
| 248 |
+
"padding": self.padding,
|
| 249 |
+
"data_format": self.data_format,
|
| 250 |
+
"dilation_rate": self.dilation_rate,
|
| 251 |
+
"activation": activations.serialize(self.activation),
|
| 252 |
+
"use_bias": self.use_bias,
|
| 253 |
+
"depthwise_initializer": initializers.serialize(
|
| 254 |
+
self.depthwise_initializer
|
| 255 |
+
),
|
| 256 |
+
"bias_initializer": initializers.serialize(
|
| 257 |
+
self.bias_initializer
|
| 258 |
+
),
|
| 259 |
+
"depthwise_regularizer": regularizers.serialize(
|
| 260 |
+
self.depthwise_regularizer
|
| 261 |
+
),
|
| 262 |
+
"bias_regularizer": regularizers.serialize(
|
| 263 |
+
self.bias_regularizer
|
| 264 |
+
),
|
| 265 |
+
"activity_regularizer": regularizers.serialize(
|
| 266 |
+
self.activity_regularizer
|
| 267 |
+
),
|
| 268 |
+
"depthwise_constraint": constraints.serialize(
|
| 269 |
+
self.depthwise_constraint
|
| 270 |
+
),
|
| 271 |
+
"bias_constraint": constraints.serialize(self.bias_constraint),
|
| 272 |
+
}
|
| 273 |
+
)
|
| 274 |
+
return config
|
SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/base_separable_conv.py
ADDED
|
@@ -0,0 +1,295 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Keras abstract base layer for separable convolution."""
|
| 2 |
+
|
| 3 |
+
from keras.src import activations
|
| 4 |
+
from keras.src import constraints
|
| 5 |
+
from keras.src import initializers
|
| 6 |
+
from keras.src import ops
|
| 7 |
+
from keras.src import regularizers
|
| 8 |
+
from keras.src.backend import standardize_data_format
|
| 9 |
+
from keras.src.layers.input_spec import InputSpec
|
| 10 |
+
from keras.src.layers.layer import Layer
|
| 11 |
+
from keras.src.ops.operation_utils import compute_conv_output_shape
|
| 12 |
+
from keras.src.utils.argument_validation import standardize_padding
|
| 13 |
+
from keras.src.utils.argument_validation import standardize_tuple
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
class BaseSeparableConv(Layer):
|
| 17 |
+
"""Abstract base layer for separable convolution.
|
| 18 |
+
|
| 19 |
+
This layer performs a depthwise convolution that acts separately on
|
| 20 |
+
channels, followed by a pointwise convolution that mixes channels. If
|
| 21 |
+
`use_bias` is True and a bias initializer is provided, it adds a bias vector
|
| 22 |
+
to the output.
|
| 23 |
+
|
| 24 |
+
Args:
|
| 25 |
+
rank: int, the rank of the convolution, e.g. 2 for 2D convolution.
|
| 26 |
+
depth_multiplier: The number of depthwise convolution output channels
|
| 27 |
+
for each input channel. The total number of depthwise convolution
|
| 28 |
+
output channels will be equal to `input_channel * depth_multiplier`.
|
| 29 |
+
filters: int, the dimensionality of the output space (i.e. the number
|
| 30 |
+
of filters in the pointwise convolution).
|
| 31 |
+
kernel_size: int or tuple/list of `rank` integers, specifying the size
|
| 32 |
+
of the depthwise convolution window.
|
| 33 |
+
strides: int or tuple/list of `rank` integers, specifying the stride
|
| 34 |
+
length of the depthwise convolution. If only one int is specified,
|
| 35 |
+
the same stride size will be used for all dimensions.
|
| 36 |
+
`stride value != 1` is incompatible with `dilation_rate != 1`.
|
| 37 |
+
padding: string, either `"valid"` or `"same"` (case-insensitive).
|
| 38 |
+
`"valid"` means no padding. `"same"` results in padding evenly to
|
| 39 |
+
the left/right or up/down of the input. When `padding="same"` and
|
| 40 |
+
`strides=1`, the output has the same size as the input.
|
| 41 |
+
data_format: string, either `"channels_last"` or `"channels_first"`.
|
| 42 |
+
The ordering of the dimensions in the inputs. `"channels_last"`
|
| 43 |
+
corresponds to inputs with shape `(batch, steps, features)`
|
| 44 |
+
while `"channels_first"` corresponds to inputs with shape
|
| 45 |
+
`(batch, features, steps)`. It defaults to the `image_data_format`
|
| 46 |
+
value found in your Keras config file at `~/.keras/keras.json`.
|
| 47 |
+
If you never set it, then it will be `"channels_last"`.
|
| 48 |
+
dilation_rate: int or tuple/list of `rank` integers, specifying the
|
| 49 |
+
dilation rate to use for dilated convolution. If only one int is
|
| 50 |
+
specified, the same dilation rate will be used for all dimensions.
|
| 51 |
+
activation: Activation function. If `None`, no activation is applied.
|
| 52 |
+
use_bias: bool, if `True`, bias will be added to the output.
|
| 53 |
+
depthwise_initializer: An initializer for the depthwise convolution
|
| 54 |
+
kernel. If None, then the default initializer (`"glorot_uniform"`)
|
| 55 |
+
will be used.
|
| 56 |
+
pointwise_initializer: An initializer for the pointwise convolution
|
| 57 |
+
kernel. If None, then the default initializer (`"glorot_uniform"`)
|
| 58 |
+
will be used.
|
| 59 |
+
bias_initializer: An initializer for the bias vector. If None, the
|
| 60 |
+
default initializer ('"zeros"') will be used.
|
| 61 |
+
depthwise_regularizer: Optional regularizer for the depthwise
|
| 62 |
+
convolution kernel.
|
| 63 |
+
pointwise_regularizer: Optional regularizer for the pointwise
|
| 64 |
+
convolution kernel.
|
| 65 |
+
bias_regularizer: Optional regularizer for the bias vector.
|
| 66 |
+
activity_regularizer: Optional regularizer function for the output.
|
| 67 |
+
depthwise_constraint: Optional projection function to be applied to the
|
| 68 |
+
depthwise kernel after being updated by an `Optimizer` (e.g. used
|
| 69 |
+
for norm constraints or value constraints for layer weights). The
|
| 70 |
+
function must take as input the unprojected variable and must return
|
| 71 |
+
the projected variable (which must have the same shape).
|
| 72 |
+
pointwise_constraint: Optional projection function to be applied to the
|
| 73 |
+
pointwise kernel after being updated by an `Optimizer`.
|
| 74 |
+
bias_constraint: Optional projection function to be applied to the
|
| 75 |
+
bias after being updated by an `Optimizer`.
|
| 76 |
+
"""
|
| 77 |
+
|
| 78 |
+
def __init__(
|
| 79 |
+
self,
|
| 80 |
+
rank,
|
| 81 |
+
depth_multiplier,
|
| 82 |
+
filters,
|
| 83 |
+
kernel_size,
|
| 84 |
+
strides=1,
|
| 85 |
+
padding="valid",
|
| 86 |
+
data_format=None,
|
| 87 |
+
dilation_rate=1,
|
| 88 |
+
activation=None,
|
| 89 |
+
use_bias=True,
|
| 90 |
+
depthwise_initializer="glorot_uniform",
|
| 91 |
+
pointwise_initializer="glorot_uniform",
|
| 92 |
+
bias_initializer="zeros",
|
| 93 |
+
depthwise_regularizer=None,
|
| 94 |
+
pointwise_regularizer=None,
|
| 95 |
+
bias_regularizer=None,
|
| 96 |
+
activity_regularizer=None,
|
| 97 |
+
depthwise_constraint=None,
|
| 98 |
+
pointwise_constraint=None,
|
| 99 |
+
bias_constraint=None,
|
| 100 |
+
trainable=True,
|
| 101 |
+
name=None,
|
| 102 |
+
**kwargs,
|
| 103 |
+
):
|
| 104 |
+
super().__init__(
|
| 105 |
+
trainable=trainable,
|
| 106 |
+
name=name,
|
| 107 |
+
activity_regularizer=regularizers.get(activity_regularizer),
|
| 108 |
+
**kwargs,
|
| 109 |
+
)
|
| 110 |
+
self.rank = rank
|
| 111 |
+
self.depth_multiplier = depth_multiplier
|
| 112 |
+
self.filters = filters
|
| 113 |
+
self.kernel_size = standardize_tuple(kernel_size, rank, "kernel_size")
|
| 114 |
+
self.strides = standardize_tuple(strides, rank, "strides")
|
| 115 |
+
self.dilation_rate = standardize_tuple(
|
| 116 |
+
dilation_rate, rank, "dilation_rate"
|
| 117 |
+
)
|
| 118 |
+
self.padding = standardize_padding(padding)
|
| 119 |
+
self.data_format = standardize_data_format(data_format)
|
| 120 |
+
self.activation = activations.get(activation)
|
| 121 |
+
self.use_bias = use_bias
|
| 122 |
+
self.depthwise_initializer = initializers.get(depthwise_initializer)
|
| 123 |
+
self.pointwise_initializer = initializers.get(pointwise_initializer)
|
| 124 |
+
self.bias_initializer = initializers.get(bias_initializer)
|
| 125 |
+
self.depthwise_regularizer = regularizers.get(depthwise_regularizer)
|
| 126 |
+
self.pointwise_regularizer = regularizers.get(pointwise_regularizer)
|
| 127 |
+
self.bias_regularizer = regularizers.get(bias_regularizer)
|
| 128 |
+
self.depthwise_constraint = constraints.get(depthwise_constraint)
|
| 129 |
+
self.pointwise_constraint = constraints.get(pointwise_constraint)
|
| 130 |
+
self.bias_constraint = constraints.get(bias_constraint)
|
| 131 |
+
self.data_format = self.data_format
|
| 132 |
+
|
| 133 |
+
self.input_spec = InputSpec(min_ndim=self.rank + 2)
|
| 134 |
+
|
| 135 |
+
if self.depth_multiplier is not None and self.depth_multiplier <= 0:
|
| 136 |
+
raise ValueError(
|
| 137 |
+
"Invalid value for argument `depth_multiplier`. Expected a "
|
| 138 |
+
"strictly positive value. Received "
|
| 139 |
+
f"depth_multiplier={self.depth_multiplier}."
|
| 140 |
+
)
|
| 141 |
+
|
| 142 |
+
if self.filters is not None and self.filters <= 0:
|
| 143 |
+
raise ValueError(
|
| 144 |
+
"Invalid value for argument `filters`. Expected a strictly "
|
| 145 |
+
f"positive value. Received filters={self.filters}."
|
| 146 |
+
)
|
| 147 |
+
|
| 148 |
+
if not all(self.kernel_size):
|
| 149 |
+
raise ValueError(
|
| 150 |
+
"The argument `kernel_size` cannot contain 0. Received: "
|
| 151 |
+
f"kernel_size={self.kernel_size}."
|
| 152 |
+
)
|
| 153 |
+
|
| 154 |
+
if not all(self.strides):
|
| 155 |
+
raise ValueError(
|
| 156 |
+
"The argument `strides` cannot contains 0(s). Received: "
|
| 157 |
+
f"strides={self.strides}"
|
| 158 |
+
)
|
| 159 |
+
|
| 160 |
+
if max(self.strides) > 1 and max(self.dilation_rate) > 1:
|
| 161 |
+
raise ValueError(
|
| 162 |
+
"`strides > 1` not supported in conjunction with "
|
| 163 |
+
f"`dilation_rate > 1`. Received: strides={self.strides} and "
|
| 164 |
+
f"dilation_rate={self.dilation_rate}"
|
| 165 |
+
)
|
| 166 |
+
|
| 167 |
+
def build(self, input_shape):
|
| 168 |
+
if self.data_format == "channels_last":
|
| 169 |
+
channel_axis = -1
|
| 170 |
+
input_channel = input_shape[-1]
|
| 171 |
+
else:
|
| 172 |
+
channel_axis = 1
|
| 173 |
+
input_channel = input_shape[1]
|
| 174 |
+
self.input_spec = InputSpec(
|
| 175 |
+
min_ndim=self.rank + 2, axes={channel_axis: input_channel}
|
| 176 |
+
)
|
| 177 |
+
depthwise_kernel_shape = self.kernel_size + (
|
| 178 |
+
input_channel,
|
| 179 |
+
self.depth_multiplier,
|
| 180 |
+
)
|
| 181 |
+
pointwise_kernel_shape = (1,) * self.rank + (
|
| 182 |
+
self.depth_multiplier * input_channel,
|
| 183 |
+
self.filters,
|
| 184 |
+
)
|
| 185 |
+
|
| 186 |
+
self.depthwise_kernel = self.add_weight(
|
| 187 |
+
name="depthwise_kernel",
|
| 188 |
+
shape=depthwise_kernel_shape,
|
| 189 |
+
initializer=self.depthwise_initializer,
|
| 190 |
+
regularizer=self.depthwise_regularizer,
|
| 191 |
+
constraint=self.depthwise_constraint,
|
| 192 |
+
trainable=True,
|
| 193 |
+
dtype=self.dtype,
|
| 194 |
+
)
|
| 195 |
+
self.pointwise_kernel = self.add_weight(
|
| 196 |
+
name="pointwise_kernel",
|
| 197 |
+
shape=pointwise_kernel_shape,
|
| 198 |
+
initializer=self.pointwise_initializer,
|
| 199 |
+
regularizer=self.pointwise_regularizer,
|
| 200 |
+
constraint=self.pointwise_constraint,
|
| 201 |
+
trainable=True,
|
| 202 |
+
dtype=self.dtype,
|
| 203 |
+
)
|
| 204 |
+
if self.use_bias:
|
| 205 |
+
self.bias = self.add_weight(
|
| 206 |
+
name="bias",
|
| 207 |
+
shape=(self.filters,),
|
| 208 |
+
initializer=self.bias_initializer,
|
| 209 |
+
regularizer=self.bias_regularizer,
|
| 210 |
+
constraint=self.bias_constraint,
|
| 211 |
+
trainable=True,
|
| 212 |
+
dtype=self.dtype,
|
| 213 |
+
)
|
| 214 |
+
else:
|
| 215 |
+
self.bias = None
|
| 216 |
+
self.built = True
|
| 217 |
+
|
| 218 |
+
def call(self, inputs):
|
| 219 |
+
outputs = ops.separable_conv(
|
| 220 |
+
inputs,
|
| 221 |
+
self.depthwise_kernel,
|
| 222 |
+
self.pointwise_kernel,
|
| 223 |
+
strides=self.strides,
|
| 224 |
+
padding=self.padding,
|
| 225 |
+
dilation_rate=self.dilation_rate,
|
| 226 |
+
data_format=self.data_format,
|
| 227 |
+
)
|
| 228 |
+
|
| 229 |
+
if self.use_bias:
|
| 230 |
+
if self.data_format == "channels_last":
|
| 231 |
+
bias_shape = (1,) * (self.rank + 1) + (self.filters,)
|
| 232 |
+
else:
|
| 233 |
+
bias_shape = (1, self.filters) + (1,) * self.rank
|
| 234 |
+
bias = ops.reshape(self.bias, bias_shape)
|
| 235 |
+
outputs = ops.add(outputs, bias)
|
| 236 |
+
|
| 237 |
+
if self.activation is not None:
|
| 238 |
+
return self.activation(outputs)
|
| 239 |
+
return outputs
|
| 240 |
+
|
| 241 |
+
def compute_output_shape(self, input_shape):
|
| 242 |
+
return compute_conv_output_shape(
|
| 243 |
+
input_shape,
|
| 244 |
+
self.filters,
|
| 245 |
+
self.kernel_size,
|
| 246 |
+
strides=self.strides,
|
| 247 |
+
padding=self.padding,
|
| 248 |
+
data_format=self.data_format,
|
| 249 |
+
dilation_rate=self.dilation_rate,
|
| 250 |
+
)
|
| 251 |
+
|
| 252 |
+
def get_config(self):
|
| 253 |
+
config = super().get_config()
|
| 254 |
+
config.update(
|
| 255 |
+
{
|
| 256 |
+
"depth_multiplier": self.depth_multiplier,
|
| 257 |
+
"filters": self.filters,
|
| 258 |
+
"kernel_size": self.kernel_size,
|
| 259 |
+
"strides": self.strides,
|
| 260 |
+
"padding": self.padding,
|
| 261 |
+
"data_format": self.data_format,
|
| 262 |
+
"dilation_rate": self.dilation_rate,
|
| 263 |
+
"activation": activations.serialize(self.activation),
|
| 264 |
+
"use_bias": self.use_bias,
|
| 265 |
+
"depthwise_initializer": initializers.serialize(
|
| 266 |
+
self.depthwise_initializer
|
| 267 |
+
),
|
| 268 |
+
"pointwise_initializer": initializers.serialize(
|
| 269 |
+
self.pointwise_initializer
|
| 270 |
+
),
|
| 271 |
+
"bias_initializer": initializers.serialize(
|
| 272 |
+
self.bias_initializer
|
| 273 |
+
),
|
| 274 |
+
"depthwise_regularizer": regularizers.serialize(
|
| 275 |
+
self.depthwise_regularizer
|
| 276 |
+
),
|
| 277 |
+
"pointwise_regularizer": regularizers.serialize(
|
| 278 |
+
self.pointwise_regularizer
|
| 279 |
+
),
|
| 280 |
+
"bias_regularizer": regularizers.serialize(
|
| 281 |
+
self.bias_regularizer
|
| 282 |
+
),
|
| 283 |
+
"activity_regularizer": regularizers.serialize(
|
| 284 |
+
self.activity_regularizer
|
| 285 |
+
),
|
| 286 |
+
"depthwise_constraint": constraints.serialize(
|
| 287 |
+
self.depthwise_constraint
|
| 288 |
+
),
|
| 289 |
+
"pointwise_constraint": constraints.serialize(
|
| 290 |
+
self.pointwise_constraint
|
| 291 |
+
),
|
| 292 |
+
"bias_constraint": constraints.serialize(self.bias_constraint),
|
| 293 |
+
}
|
| 294 |
+
)
|
| 295 |
+
return config
|
SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/conv1d.py
ADDED
|
@@ -0,0 +1,170 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from keras.src import ops
|
| 2 |
+
from keras.src.api_export import keras_export
|
| 3 |
+
from keras.src.layers.convolutional.base_conv import BaseConv
|
| 4 |
+
|
| 5 |
+
|
| 6 |
+
@keras_export(["keras.layers.Conv1D", "keras.layers.Convolution1D"])
|
| 7 |
+
class Conv1D(BaseConv):
|
| 8 |
+
"""1D convolution layer (e.g. temporal convolution).
|
| 9 |
+
|
| 10 |
+
This layer creates a convolution kernel that is convolved with the layer
|
| 11 |
+
input over a single spatial (or temporal) dimension to produce a tensor of
|
| 12 |
+
outputs. If `use_bias` is True, a bias vector is created and added to the
|
| 13 |
+
outputs. Finally, if `activation` is not `None`, it is applied to the
|
| 14 |
+
outputs as well.
|
| 15 |
+
|
| 16 |
+
Args:
|
| 17 |
+
filters: int, the dimension of the output space (the number of filters
|
| 18 |
+
in the convolution).
|
| 19 |
+
kernel_size: int or tuple/list of 1 integer, specifying the size of the
|
| 20 |
+
convolution window.
|
| 21 |
+
strides: int or tuple/list of 1 integer, specifying the stride length
|
| 22 |
+
of the convolution. `strides > 1` is incompatible with
|
| 23 |
+
`dilation_rate > 1`.
|
| 24 |
+
padding: string, `"valid"`, `"same"` or `"causal"`(case-insensitive).
|
| 25 |
+
`"valid"` means no padding. `"same"` results in padding evenly to
|
| 26 |
+
the left/right or up/down of the input. When `padding="same"` and
|
| 27 |
+
`strides=1`, the output has the same size as the input.
|
| 28 |
+
`"causal"` results in causal(dilated) convolutions, e.g. `output[t]`
|
| 29 |
+
does not depend on`input[t+1:]`. Useful when modeling temporal data
|
| 30 |
+
where the model should not violate the temporal order.
|
| 31 |
+
See [WaveNet: A Generative Model for Raw Audio, section2.1](
|
| 32 |
+
https://arxiv.org/abs/1609.03499).
|
| 33 |
+
data_format: string, either `"channels_last"` or `"channels_first"`.
|
| 34 |
+
The ordering of the dimensions in the inputs. `"channels_last"`
|
| 35 |
+
corresponds to inputs with shape `(batch, steps, features)`
|
| 36 |
+
while `"channels_first"` corresponds to inputs with shape
|
| 37 |
+
`(batch, features, steps)`. It defaults to the `image_data_format`
|
| 38 |
+
value found in your Keras config file at `~/.keras/keras.json`.
|
| 39 |
+
If you never set it, then it will be `"channels_last"`.
|
| 40 |
+
dilation_rate: int or tuple/list of 1 integers, specifying the dilation
|
| 41 |
+
rate to use for dilated convolution.
|
| 42 |
+
groups: A positive int specifying the number of groups in which the
|
| 43 |
+
input is split along the channel axis. Each group is convolved
|
| 44 |
+
separately with `filters // groups` filters. The output is the
|
| 45 |
+
concatenation of all the `groups` results along the channel axis.
|
| 46 |
+
Input channels and `filters` must both be divisible by `groups`.
|
| 47 |
+
activation: Activation function. If `None`, no activation is applied.
|
| 48 |
+
use_bias: bool, if `True`, bias will be added to the output.
|
| 49 |
+
kernel_initializer: Initializer for the convolution kernel. If `None`,
|
| 50 |
+
the default initializer (`"glorot_uniform"`) will be used.
|
| 51 |
+
bias_initializer: Initializer for the bias vector. If `None`, the
|
| 52 |
+
default initializer (`"zeros"`) will be used.
|
| 53 |
+
kernel_regularizer: Optional regularizer for the convolution kernel.
|
| 54 |
+
bias_regularizer: Optional regularizer for the bias vector.
|
| 55 |
+
activity_regularizer: Optional regularizer function for the output.
|
| 56 |
+
kernel_constraint: Optional projection function to be applied to the
|
| 57 |
+
kernel after being updated by an `Optimizer` (e.g. used to implement
|
| 58 |
+
norm constraints or value constraints for layer weights). The
|
| 59 |
+
function must take as input the unprojected variable and must return
|
| 60 |
+
the projected variable (which must have the same shape). Constraints
|
| 61 |
+
are not safe to use when doing asynchronous distributed training.
|
| 62 |
+
bias_constraint: Optional projection function to be applied to the
|
| 63 |
+
bias after being updated by an `Optimizer`.
|
| 64 |
+
|
| 65 |
+
Input shape:
|
| 66 |
+
|
| 67 |
+
- If `data_format="channels_last"`:
|
| 68 |
+
A 3D tensor with shape: `(batch_shape, steps, channels)`
|
| 69 |
+
- If `data_format="channels_first"`:
|
| 70 |
+
A 3D tensor with shape: `(batch_shape, channels, steps)`
|
| 71 |
+
|
| 72 |
+
Output shape:
|
| 73 |
+
|
| 74 |
+
- If `data_format="channels_last"`:
|
| 75 |
+
A 3D tensor with shape: `(batch_shape, new_steps, filters)`
|
| 76 |
+
- If `data_format="channels_first"`:
|
| 77 |
+
A 3D tensor with shape: `(batch_shape, filters, new_steps)`
|
| 78 |
+
|
| 79 |
+
Returns:
|
| 80 |
+
A 3D tensor representing `activation(conv1d(inputs, kernel) + bias)`.
|
| 81 |
+
|
| 82 |
+
Raises:
|
| 83 |
+
ValueError: when both `strides > 1` and `dilation_rate > 1`.
|
| 84 |
+
|
| 85 |
+
Example:
|
| 86 |
+
|
| 87 |
+
>>> # The inputs are 128-length vectors with 10 timesteps, and the
|
| 88 |
+
>>> # batch size is 4.
|
| 89 |
+
>>> x = np.random.rand(4, 10, 128)
|
| 90 |
+
>>> y = keras.layers.Conv1D(32, 3, activation='relu')(x)
|
| 91 |
+
>>> print(y.shape)
|
| 92 |
+
(4, 8, 32)
|
| 93 |
+
"""
|
| 94 |
+
|
| 95 |
+
def __init__(
|
| 96 |
+
self,
|
| 97 |
+
filters,
|
| 98 |
+
kernel_size,
|
| 99 |
+
strides=1,
|
| 100 |
+
padding="valid",
|
| 101 |
+
data_format=None,
|
| 102 |
+
dilation_rate=1,
|
| 103 |
+
groups=1,
|
| 104 |
+
activation=None,
|
| 105 |
+
use_bias=True,
|
| 106 |
+
kernel_initializer="glorot_uniform",
|
| 107 |
+
bias_initializer="zeros",
|
| 108 |
+
kernel_regularizer=None,
|
| 109 |
+
bias_regularizer=None,
|
| 110 |
+
activity_regularizer=None,
|
| 111 |
+
kernel_constraint=None,
|
| 112 |
+
bias_constraint=None,
|
| 113 |
+
**kwargs,
|
| 114 |
+
):
|
| 115 |
+
super().__init__(
|
| 116 |
+
rank=1,
|
| 117 |
+
filters=filters,
|
| 118 |
+
kernel_size=kernel_size,
|
| 119 |
+
strides=strides,
|
| 120 |
+
padding=padding,
|
| 121 |
+
data_format=data_format,
|
| 122 |
+
dilation_rate=dilation_rate,
|
| 123 |
+
groups=groups,
|
| 124 |
+
activation=activation,
|
| 125 |
+
use_bias=use_bias,
|
| 126 |
+
kernel_initializer=kernel_initializer,
|
| 127 |
+
bias_initializer=bias_initializer,
|
| 128 |
+
kernel_regularizer=kernel_regularizer,
|
| 129 |
+
bias_regularizer=bias_regularizer,
|
| 130 |
+
activity_regularizer=activity_regularizer,
|
| 131 |
+
kernel_constraint=kernel_constraint,
|
| 132 |
+
bias_constraint=bias_constraint,
|
| 133 |
+
**kwargs,
|
| 134 |
+
)
|
| 135 |
+
|
| 136 |
+
def _compute_causal_padding(self):
|
| 137 |
+
left_pad = self.dilation_rate[0] * (self.kernel_size[0] - 1)
|
| 138 |
+
if self.data_format == "channels_last":
|
| 139 |
+
causal_padding = [[0, 0], [left_pad, 0], [0, 0]]
|
| 140 |
+
else:
|
| 141 |
+
causal_padding = [[0, 0], [0, 0], [left_pad, 0]]
|
| 142 |
+
return causal_padding
|
| 143 |
+
|
| 144 |
+
def call(self, inputs):
|
| 145 |
+
padding = self.padding
|
| 146 |
+
if self.padding == "causal":
|
| 147 |
+
# Apply causal padding to inputs.
|
| 148 |
+
inputs = ops.pad(inputs, self._compute_causal_padding())
|
| 149 |
+
padding = "valid"
|
| 150 |
+
|
| 151 |
+
outputs = ops.conv(
|
| 152 |
+
inputs,
|
| 153 |
+
self.kernel,
|
| 154 |
+
strides=list(self.strides),
|
| 155 |
+
padding=padding,
|
| 156 |
+
dilation_rate=self.dilation_rate,
|
| 157 |
+
data_format=self.data_format,
|
| 158 |
+
)
|
| 159 |
+
|
| 160 |
+
if self.use_bias:
|
| 161 |
+
if self.data_format == "channels_last":
|
| 162 |
+
bias_shape = (1,) * (self.rank + 1) + (self.filters,)
|
| 163 |
+
else:
|
| 164 |
+
bias_shape = (1, self.filters) + (1,) * self.rank
|
| 165 |
+
bias = ops.reshape(self.bias, bias_shape)
|
| 166 |
+
outputs = ops.add(outputs, bias)
|
| 167 |
+
|
| 168 |
+
if self.activation is not None:
|
| 169 |
+
return self.activation(outputs)
|
| 170 |
+
return outputs
|
SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/conv1d_transpose.py
ADDED
|
@@ -0,0 +1,131 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from keras.src.api_export import keras_export
|
| 2 |
+
from keras.src.layers.convolutional.base_conv_transpose import BaseConvTranspose
|
| 3 |
+
|
| 4 |
+
|
| 5 |
+
@keras_export(
|
| 6 |
+
[
|
| 7 |
+
"keras.layers.Conv1DTranspose",
|
| 8 |
+
"keras.layers.Convolution1DTranspose",
|
| 9 |
+
]
|
| 10 |
+
)
|
| 11 |
+
class Conv1DTranspose(BaseConvTranspose):
|
| 12 |
+
"""1D transposed convolution layer.
|
| 13 |
+
|
| 14 |
+
The need for transposed convolutions generally arise from the desire to use
|
| 15 |
+
a transformation going in the opposite direction of a normal convolution,
|
| 16 |
+
i.e., from something that has the shape of the output of some convolution
|
| 17 |
+
to something that has the shape of its input while maintaining a
|
| 18 |
+
connectivity pattern that is compatible with said convolution.
|
| 19 |
+
|
| 20 |
+
Args:
|
| 21 |
+
filters: int, the dimension of the output space (the number of filters
|
| 22 |
+
in the transpose convolution).
|
| 23 |
+
kernel_size: int or tuple/list of 1 integer, specifying the size of the
|
| 24 |
+
transposed convolution window.
|
| 25 |
+
strides: int or tuple/list of 1 integer, specifying the stride length
|
| 26 |
+
of the transposed convolution. `strides > 1` is incompatible with
|
| 27 |
+
`dilation_rate > 1`.
|
| 28 |
+
padding: string, either `"valid"` or `"same"` (case-insensitive).
|
| 29 |
+
`"valid"` means no padding. `"same"` results in padding evenly to
|
| 30 |
+
the left/right or up/down of the input such that output has the same
|
| 31 |
+
height/width dimension as the input.
|
| 32 |
+
data_format: string, either `"channels_last"` or `"channels_first"`.
|
| 33 |
+
The ordering of the dimensions in the inputs. `"channels_last"`
|
| 34 |
+
corresponds to inputs with shape `(batch, steps, features)`
|
| 35 |
+
while `"channels_first"` corresponds to inputs with shape
|
| 36 |
+
`(batch, features, steps)`. It defaults to the `image_data_format`
|
| 37 |
+
value found in your Keras config file at `~/.keras/keras.json`.
|
| 38 |
+
If you never set it, then it will be `"channels_last"`.
|
| 39 |
+
dilation_rate: int or tuple/list of 1 integers, specifying the dilation
|
| 40 |
+
rate to use for dilated transposed convolution.
|
| 41 |
+
activation: Activation function. If `None`, no activation is applied.
|
| 42 |
+
use_bias: bool, if `True`, bias will be added to the output.
|
| 43 |
+
kernel_initializer: Initializer for the convolution kernel. If `None`,
|
| 44 |
+
the default initializer (`"glorot_uniform"`) will be used.
|
| 45 |
+
bias_initializer: Initializer for the bias vector. If `None`, the
|
| 46 |
+
default initializer (`"zeros"`) will be used.
|
| 47 |
+
kernel_regularizer: Optional regularizer for the convolution kernel.
|
| 48 |
+
bias_regularizer: Optional regularizer for the bias vector.
|
| 49 |
+
activity_regularizer: Optional regularizer function for the output.
|
| 50 |
+
kernel_constraint: Optional projection function to be applied to the
|
| 51 |
+
kernel after being updated by an `Optimizer` (e.g. used to implement
|
| 52 |
+
norm constraints or value constraints for layer weights). The
|
| 53 |
+
function must take as input the unprojected variable and must return
|
| 54 |
+
the projected variable (which must have the same shape). Constraints
|
| 55 |
+
are not safe to use when doing asynchronous distributed training.
|
| 56 |
+
bias_constraint: Optional projection function to be applied to the
|
| 57 |
+
bias after being updated by an `Optimizer`.
|
| 58 |
+
|
| 59 |
+
Input shape:
|
| 60 |
+
|
| 61 |
+
- If `data_format="channels_last"`:
|
| 62 |
+
A 3D tensor with shape: `(batch_shape, steps, channels)`
|
| 63 |
+
- If `data_format="channels_first"`:
|
| 64 |
+
A 3D tensor with shape: `(batch_shape, channels, steps)`
|
| 65 |
+
|
| 66 |
+
Output shape:
|
| 67 |
+
|
| 68 |
+
- If `data_format="channels_last"`:
|
| 69 |
+
A 3D tensor with shape: `(batch_shape, new_steps, filters)`
|
| 70 |
+
- If `data_format="channels_first"`:
|
| 71 |
+
A 3D tensor with shape: `(batch_shape, filters, new_steps)`
|
| 72 |
+
|
| 73 |
+
Returns:
|
| 74 |
+
A 3D tensor representing
|
| 75 |
+
`activation(conv1d_transpose(inputs, kernel) + bias)`.
|
| 76 |
+
|
| 77 |
+
Raises:
|
| 78 |
+
ValueError: when both `strides > 1` and `dilation_rate > 1`.
|
| 79 |
+
|
| 80 |
+
References:
|
| 81 |
+
- [A guide to convolution arithmetic for deep learning](
|
| 82 |
+
https://arxiv.org/abs/1603.07285v1)
|
| 83 |
+
- [Deconvolutional Networks](
|
| 84 |
+
https://www.matthewzeiler.com/mattzeiler/deconvolutionalnetworks.pdf)
|
| 85 |
+
|
| 86 |
+
Example:
|
| 87 |
+
|
| 88 |
+
>>> x = np.random.rand(4, 10, 128)
|
| 89 |
+
>>> y = keras.layers.Conv1DTranspose(32, 3, 2, activation='relu')(x)
|
| 90 |
+
>>> print(y.shape)
|
| 91 |
+
(4, 21, 32)
|
| 92 |
+
"""
|
| 93 |
+
|
| 94 |
+
def __init__(
|
| 95 |
+
self,
|
| 96 |
+
filters,
|
| 97 |
+
kernel_size,
|
| 98 |
+
strides=1,
|
| 99 |
+
padding="valid",
|
| 100 |
+
data_format=None,
|
| 101 |
+
dilation_rate=1,
|
| 102 |
+
activation=None,
|
| 103 |
+
use_bias=True,
|
| 104 |
+
kernel_initializer="glorot_uniform",
|
| 105 |
+
bias_initializer="zeros",
|
| 106 |
+
kernel_regularizer=None,
|
| 107 |
+
bias_regularizer=None,
|
| 108 |
+
activity_regularizer=None,
|
| 109 |
+
kernel_constraint=None,
|
| 110 |
+
bias_constraint=None,
|
| 111 |
+
**kwargs,
|
| 112 |
+
):
|
| 113 |
+
super().__init__(
|
| 114 |
+
rank=1,
|
| 115 |
+
filters=filters,
|
| 116 |
+
kernel_size=kernel_size,
|
| 117 |
+
strides=strides,
|
| 118 |
+
padding=padding,
|
| 119 |
+
data_format=data_format,
|
| 120 |
+
dilation_rate=dilation_rate,
|
| 121 |
+
activation=activation,
|
| 122 |
+
use_bias=use_bias,
|
| 123 |
+
kernel_initializer=kernel_initializer,
|
| 124 |
+
bias_initializer=bias_initializer,
|
| 125 |
+
kernel_regularizer=kernel_regularizer,
|
| 126 |
+
bias_regularizer=bias_regularizer,
|
| 127 |
+
activity_regularizer=activity_regularizer,
|
| 128 |
+
kernel_constraint=kernel_constraint,
|
| 129 |
+
bias_constraint=bias_constraint,
|
| 130 |
+
**kwargs,
|
| 131 |
+
)
|
SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/conv2d.py
ADDED
|
@@ -0,0 +1,128 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from keras.src.api_export import keras_export
|
| 2 |
+
from keras.src.layers.convolutional.base_conv import BaseConv
|
| 3 |
+
|
| 4 |
+
|
| 5 |
+
@keras_export(["keras.layers.Conv2D", "keras.layers.Convolution2D"])
|
| 6 |
+
class Conv2D(BaseConv):
|
| 7 |
+
"""2D convolution layer.
|
| 8 |
+
|
| 9 |
+
This layer creates a convolution kernel that is convolved with the layer
|
| 10 |
+
input over a 2D spatial (or temporal) dimension (height and width) to
|
| 11 |
+
produce a tensor of outputs. If `use_bias` is True, a bias vector is created
|
| 12 |
+
and added to the outputs. Finally, if `activation` is not `None`, it is
|
| 13 |
+
applied to the outputs as well.
|
| 14 |
+
|
| 15 |
+
Args:
|
| 16 |
+
filters: int, the dimension of the output space (the number of filters
|
| 17 |
+
in the convolution).
|
| 18 |
+
kernel_size: int or tuple/list of 2 integer, specifying the size of the
|
| 19 |
+
convolution window.
|
| 20 |
+
strides: int or tuple/list of 2 integer, specifying the stride length
|
| 21 |
+
of the convolution. `strides > 1` is incompatible with
|
| 22 |
+
`dilation_rate > 1`.
|
| 23 |
+
padding: string, either `"valid"` or `"same"` (case-insensitive).
|
| 24 |
+
`"valid"` means no padding. `"same"` results in padding evenly to
|
| 25 |
+
the left/right or up/down of the input. When `padding="same"` and
|
| 26 |
+
`strides=1`, the output has the same size as the input.
|
| 27 |
+
data_format: string, either `"channels_last"` or `"channels_first"`.
|
| 28 |
+
The ordering of the dimensions in the inputs. `"channels_last"`
|
| 29 |
+
corresponds to inputs with shape
|
| 30 |
+
`(batch_size, height, width, channels)`
|
| 31 |
+
while `"channels_first"` corresponds to inputs with shape
|
| 32 |
+
`(batch_size, channels, height, width)`. It defaults to the
|
| 33 |
+
`image_data_format` value found in your Keras config file at
|
| 34 |
+
`~/.keras/keras.json`. If you never set it, then it will be
|
| 35 |
+
`"channels_last"`.
|
| 36 |
+
dilation_rate: int or tuple/list of 2 integers, specifying the dilation
|
| 37 |
+
rate to use for dilated convolution.
|
| 38 |
+
groups: A positive int specifying the number of groups in which the
|
| 39 |
+
input is split along the channel axis. Each group is convolved
|
| 40 |
+
separately with `filters // groups` filters. The output is the
|
| 41 |
+
concatenation of all the `groups` results along the channel axis.
|
| 42 |
+
Input channels and `filters` must both be divisible by `groups`.
|
| 43 |
+
activation: Activation function. If `None`, no activation is applied.
|
| 44 |
+
use_bias: bool, if `True`, bias will be added to the output.
|
| 45 |
+
kernel_initializer: Initializer for the convolution kernel. If `None`,
|
| 46 |
+
the default initializer (`"glorot_uniform"`) will be used.
|
| 47 |
+
bias_initializer: Initializer for the bias vector. If `None`, the
|
| 48 |
+
default initializer (`"zeros"`) will be used.
|
| 49 |
+
kernel_regularizer: Optional regularizer for the convolution kernel.
|
| 50 |
+
bias_regularizer: Optional regularizer for the bias vector.
|
| 51 |
+
activity_regularizer: Optional regularizer function for the output.
|
| 52 |
+
kernel_constraint: Optional projection function to be applied to the
|
| 53 |
+
kernel after being updated by an `Optimizer` (e.g. used to implement
|
| 54 |
+
norm constraints or value constraints for layer weights). The
|
| 55 |
+
function must take as input the unprojected variable and must return
|
| 56 |
+
the projected variable (which must have the same shape). Constraints
|
| 57 |
+
are not safe to use when doing asynchronous distributed training.
|
| 58 |
+
bias_constraint: Optional projection function to be applied to the
|
| 59 |
+
bias after being updated by an `Optimizer`.
|
| 60 |
+
|
| 61 |
+
Input shape:
|
| 62 |
+
|
| 63 |
+
- If `data_format="channels_last"`:
|
| 64 |
+
A 4D tensor with shape: `(batch_size, height, width, channels)`
|
| 65 |
+
- If `data_format="channels_first"`:
|
| 66 |
+
A 4D tensor with shape: `(batch_size, channels, height, width)`
|
| 67 |
+
|
| 68 |
+
Output shape:
|
| 69 |
+
|
| 70 |
+
- If `data_format="channels_last"`:
|
| 71 |
+
A 4D tensor with shape: `(batch_size, new_height, new_width, filters)`
|
| 72 |
+
- If `data_format="channels_first"`:
|
| 73 |
+
A 4D tensor with shape: `(batch_size, filters, new_height, new_width)`
|
| 74 |
+
|
| 75 |
+
Returns:
|
| 76 |
+
A 4D tensor representing `activation(conv2d(inputs, kernel) + bias)`.
|
| 77 |
+
|
| 78 |
+
Raises:
|
| 79 |
+
ValueError: when both `strides > 1` and `dilation_rate > 1`.
|
| 80 |
+
|
| 81 |
+
Example:
|
| 82 |
+
|
| 83 |
+
>>> x = np.random.rand(4, 10, 10, 128)
|
| 84 |
+
>>> y = keras.layers.Conv2D(32, 3, activation='relu')(x)
|
| 85 |
+
>>> print(y.shape)
|
| 86 |
+
(4, 8, 8, 32)
|
| 87 |
+
"""
|
| 88 |
+
|
| 89 |
+
def __init__(
|
| 90 |
+
self,
|
| 91 |
+
filters,
|
| 92 |
+
kernel_size,
|
| 93 |
+
strides=(1, 1),
|
| 94 |
+
padding="valid",
|
| 95 |
+
data_format=None,
|
| 96 |
+
dilation_rate=(1, 1),
|
| 97 |
+
groups=1,
|
| 98 |
+
activation=None,
|
| 99 |
+
use_bias=True,
|
| 100 |
+
kernel_initializer="glorot_uniform",
|
| 101 |
+
bias_initializer="zeros",
|
| 102 |
+
kernel_regularizer=None,
|
| 103 |
+
bias_regularizer=None,
|
| 104 |
+
activity_regularizer=None,
|
| 105 |
+
kernel_constraint=None,
|
| 106 |
+
bias_constraint=None,
|
| 107 |
+
**kwargs,
|
| 108 |
+
):
|
| 109 |
+
super().__init__(
|
| 110 |
+
rank=2,
|
| 111 |
+
filters=filters,
|
| 112 |
+
kernel_size=kernel_size,
|
| 113 |
+
strides=strides,
|
| 114 |
+
padding=padding,
|
| 115 |
+
data_format=data_format,
|
| 116 |
+
dilation_rate=dilation_rate,
|
| 117 |
+
groups=groups,
|
| 118 |
+
activation=activation,
|
| 119 |
+
use_bias=use_bias,
|
| 120 |
+
kernel_initializer=kernel_initializer,
|
| 121 |
+
bias_initializer=bias_initializer,
|
| 122 |
+
kernel_regularizer=kernel_regularizer,
|
| 123 |
+
bias_regularizer=bias_regularizer,
|
| 124 |
+
activity_regularizer=activity_regularizer,
|
| 125 |
+
kernel_constraint=kernel_constraint,
|
| 126 |
+
bias_constraint=bias_constraint,
|
| 127 |
+
**kwargs,
|
| 128 |
+
)
|
SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/conv2d_transpose.py
ADDED
|
@@ -0,0 +1,133 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from keras.src.api_export import keras_export
|
| 2 |
+
from keras.src.layers.convolutional.base_conv_transpose import BaseConvTranspose
|
| 3 |
+
|
| 4 |
+
|
| 5 |
+
@keras_export(
|
| 6 |
+
[
|
| 7 |
+
"keras.layers.Conv2DTranspose",
|
| 8 |
+
"keras.layers.Convolution2DTranspose",
|
| 9 |
+
]
|
| 10 |
+
)
|
| 11 |
+
class Conv2DTranspose(BaseConvTranspose):
|
| 12 |
+
"""2D transposed convolution layer.
|
| 13 |
+
|
| 14 |
+
The need for transposed convolutions generally arise from the desire to use
|
| 15 |
+
a transformation going in the opposite direction of a normal convolution,
|
| 16 |
+
i.e., from something that has the shape of the output of some convolution
|
| 17 |
+
to something that has the shape of its input while maintaining a
|
| 18 |
+
connectivity pattern that is compatible with said convolution.
|
| 19 |
+
|
| 20 |
+
Args:
|
| 21 |
+
filters: int, the dimension of the output space (the number of filters
|
| 22 |
+
in the transposed convolution).
|
| 23 |
+
kernel_size: int or tuple/list of 1 integer, specifying the size of the
|
| 24 |
+
transposed convolution window.
|
| 25 |
+
strides: int or tuple/list of 1 integer, specifying the stride length
|
| 26 |
+
of the transposed convolution. `strides > 1` is incompatible with
|
| 27 |
+
`dilation_rate > 1`.
|
| 28 |
+
padding: string, either `"valid"` or `"same"` (case-insensitive).
|
| 29 |
+
`"valid"` means no padding. `"same"` results in padding evenly to
|
| 30 |
+
the left/right or up/down of the input. When `padding="same"` and
|
| 31 |
+
`strides=1`, the output has the same size as the input.
|
| 32 |
+
data_format: string, either `"channels_last"` or `"channels_first"`.
|
| 33 |
+
The ordering of the dimensions in the inputs. `"channels_last"`
|
| 34 |
+
corresponds to inputs with shape
|
| 35 |
+
`(batch_size, height, width, channels)`
|
| 36 |
+
while `"channels_first"` corresponds to inputs with shape
|
| 37 |
+
`(batch_size, channels, height, width)`. It defaults to the
|
| 38 |
+
`image_data_format` value found in your Keras config file at
|
| 39 |
+
`~/.keras/keras.json`. If you never set it, then it will be
|
| 40 |
+
`"channels_last"`.
|
| 41 |
+
dilation_rate: int or tuple/list of 1 integers, specifying the dilation
|
| 42 |
+
rate to use for dilated transposed convolution.
|
| 43 |
+
activation: Activation function. If `None`, no activation is applied.
|
| 44 |
+
use_bias: bool, if `True`, bias will be added to the output.
|
| 45 |
+
kernel_initializer: Initializer for the convolution kernel. If `None`,
|
| 46 |
+
the default initializer (`"glorot_uniform"`) will be used.
|
| 47 |
+
bias_initializer: Initializer for the bias vector. If `None`, the
|
| 48 |
+
default initializer (`"zeros"`) will be used.
|
| 49 |
+
kernel_regularizer: Optional regularizer for the convolution kernel.
|
| 50 |
+
bias_regularizer: Optional regularizer for the bias vector.
|
| 51 |
+
activity_regularizer: Optional regularizer function for the output.
|
| 52 |
+
kernel_constraint: Optional projection function to be applied to the
|
| 53 |
+
kernel after being updated by an `Optimizer` (e.g. used to implement
|
| 54 |
+
norm constraints or value constraints for layer weights). The
|
| 55 |
+
function must take as input the unprojected variable and must return
|
| 56 |
+
the projected variable (which must have the same shape). Constraints
|
| 57 |
+
are not safe to use when doing asynchronous distributed training.
|
| 58 |
+
bias_constraint: Optional projection function to be applied to the
|
| 59 |
+
bias after being updated by an `Optimizer`.
|
| 60 |
+
|
| 61 |
+
Input shape:
|
| 62 |
+
|
| 63 |
+
- If `data_format="channels_last"`:
|
| 64 |
+
A 4D tensor with shape: `(batch_size, height, width, channels)`
|
| 65 |
+
- If `data_format="channels_first"`:
|
| 66 |
+
A 4D tensor with shape: `(batch_size, channels, height, width)`
|
| 67 |
+
|
| 68 |
+
Output shape:
|
| 69 |
+
|
| 70 |
+
- If `data_format="channels_last"`:
|
| 71 |
+
A 4D tensor with shape: `(batch_size, new_height, new_width, filters)`
|
| 72 |
+
- If `data_format="channels_first"`:
|
| 73 |
+
A 4D tensor with shape: `(batch_size, filters, new_height, new_width)`
|
| 74 |
+
|
| 75 |
+
Returns:
|
| 76 |
+
A 4D tensor representing
|
| 77 |
+
`activation(conv2d_transpose(inputs, kernel) + bias)`.
|
| 78 |
+
|
| 79 |
+
Raises:
|
| 80 |
+
ValueError: when both `strides > 1` and `dilation_rate > 1`.
|
| 81 |
+
|
| 82 |
+
References:
|
| 83 |
+
- [A guide to convolution arithmetic for deep learning](
|
| 84 |
+
https://arxiv.org/abs/1603.07285v1)
|
| 85 |
+
- [Deconvolutional Networks](
|
| 86 |
+
https://www.matthewzeiler.com/mattzeiler/deconvolutionalnetworks.pdf)
|
| 87 |
+
|
| 88 |
+
Example:
|
| 89 |
+
|
| 90 |
+
>>> x = np.random.rand(4, 10, 8, 128)
|
| 91 |
+
>>> y = keras.layers.Conv2DTranspose(32, 2, 2, activation='relu')(x)
|
| 92 |
+
>>> print(y.shape)
|
| 93 |
+
(4, 20, 16, 32)
|
| 94 |
+
"""
|
| 95 |
+
|
| 96 |
+
def __init__(
|
| 97 |
+
self,
|
| 98 |
+
filters,
|
| 99 |
+
kernel_size,
|
| 100 |
+
strides=(1, 1),
|
| 101 |
+
padding="valid",
|
| 102 |
+
data_format=None,
|
| 103 |
+
dilation_rate=(1, 1),
|
| 104 |
+
activation=None,
|
| 105 |
+
use_bias=True,
|
| 106 |
+
kernel_initializer="glorot_uniform",
|
| 107 |
+
bias_initializer="zeros",
|
| 108 |
+
kernel_regularizer=None,
|
| 109 |
+
bias_regularizer=None,
|
| 110 |
+
activity_regularizer=None,
|
| 111 |
+
kernel_constraint=None,
|
| 112 |
+
bias_constraint=None,
|
| 113 |
+
**kwargs,
|
| 114 |
+
):
|
| 115 |
+
super().__init__(
|
| 116 |
+
rank=2,
|
| 117 |
+
filters=filters,
|
| 118 |
+
kernel_size=kernel_size,
|
| 119 |
+
strides=strides,
|
| 120 |
+
padding=padding,
|
| 121 |
+
data_format=data_format,
|
| 122 |
+
dilation_rate=dilation_rate,
|
| 123 |
+
activation=activation,
|
| 124 |
+
use_bias=use_bias,
|
| 125 |
+
kernel_initializer=kernel_initializer,
|
| 126 |
+
bias_initializer=bias_initializer,
|
| 127 |
+
kernel_regularizer=kernel_regularizer,
|
| 128 |
+
bias_regularizer=bias_regularizer,
|
| 129 |
+
activity_regularizer=activity_regularizer,
|
| 130 |
+
kernel_constraint=kernel_constraint,
|
| 131 |
+
bias_constraint=bias_constraint,
|
| 132 |
+
**kwargs,
|
| 133 |
+
)
|
SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/conv3d.py
ADDED
|
@@ -0,0 +1,134 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from keras.src.api_export import keras_export
|
| 2 |
+
from keras.src.layers.convolutional.base_conv import BaseConv
|
| 3 |
+
|
| 4 |
+
|
| 5 |
+
@keras_export(["keras.layers.Conv3D", "keras.layers.Convolution3D"])
|
| 6 |
+
class Conv3D(BaseConv):
|
| 7 |
+
"""3D convolution layer.
|
| 8 |
+
|
| 9 |
+
This layer creates a convolution kernel that is convolved with the layer
|
| 10 |
+
input over a 3D spatial (or temporal) dimension (width,height and depth) to
|
| 11 |
+
produce a tensor of outputs. If `use_bias` is True, a bias vector is created
|
| 12 |
+
and added to the outputs. Finally, if `activation` is not `None`, it is
|
| 13 |
+
applied to the outputs as well.
|
| 14 |
+
|
| 15 |
+
Args:
|
| 16 |
+
filters: int, the dimension of the output space (the number of filters
|
| 17 |
+
in the convolution).
|
| 18 |
+
kernel_size: int or tuple/list of 3 integer, specifying the size of the
|
| 19 |
+
convolution window.
|
| 20 |
+
strides: int or tuple/list of 3 integer, specifying the stride length
|
| 21 |
+
of the convolution. `strides > 1` is incompatible with
|
| 22 |
+
`dilation_rate > 1`.
|
| 23 |
+
padding: string, either `"valid"` or `"same"` (case-insensitive).
|
| 24 |
+
`"valid"` means no padding. `"same"` results in padding evenly to
|
| 25 |
+
the left/right or up/down of the input. When `padding="same"` and
|
| 26 |
+
`strides=1`, the output has the same size as the input.
|
| 27 |
+
data_format: string, either `"channels_last"` or `"channels_first"`.
|
| 28 |
+
The ordering of the dimensions in the inputs. `"channels_last"`
|
| 29 |
+
corresponds to inputs with shape
|
| 30 |
+
`(batch_size, spatial_dim1, spatial_dim2, spatial_dim3, channels)`
|
| 31 |
+
while `"channels_first"` corresponds to inputs with shape
|
| 32 |
+
`(batch_size, channels, spatial_dim1, spatial_dim2, spatial_dim3)`.
|
| 33 |
+
It defaults to the `image_data_format` value found in your Keras
|
| 34 |
+
config file at `~/.keras/keras.json`. If you never set it, then it
|
| 35 |
+
will be `"channels_last"`.
|
| 36 |
+
dilation_rate: int or tuple/list of 3 integers, specifying the dilation
|
| 37 |
+
rate to use for dilated convolution.
|
| 38 |
+
groups: A positive int specifying the number of groups in which the
|
| 39 |
+
input is split along the channel axis. Each group is convolved
|
| 40 |
+
separately with `filters // groups` filters. The output is the
|
| 41 |
+
concatenation of all the `groups` results along the channel axis.
|
| 42 |
+
Input channels and `filters` must both be divisible by `groups`.
|
| 43 |
+
activation: Activation function. If `None`, no activation is applied.
|
| 44 |
+
use_bias: bool, if `True`, bias will be added to the output.
|
| 45 |
+
kernel_initializer: Initializer for the convolution kernel. If `None`,
|
| 46 |
+
the default initializer (`"glorot_uniform"`) will be used.
|
| 47 |
+
bias_initializer: Initializer for the bias vector. If `None`, the
|
| 48 |
+
default initializer (`"zeros"`) will be used.
|
| 49 |
+
kernel_regularizer: Optional regularizer for the convolution kernel.
|
| 50 |
+
bias_regularizer: Optional regularizer for the bias vector.
|
| 51 |
+
activity_regularizer: Optional regularizer function for the output.
|
| 52 |
+
kernel_constraint: Optional projection function to be applied to the
|
| 53 |
+
kernel after being updated by an `Optimizer` (e.g. used to implement
|
| 54 |
+
norm constraints or value constraints for layer weights). The
|
| 55 |
+
function must take as input the unprojected variable and must return
|
| 56 |
+
the projected variable (which must have the same shape). Constraints
|
| 57 |
+
are not safe to use when doing asynchronous distributed training.
|
| 58 |
+
bias_constraint: Optional projection function to be applied to the
|
| 59 |
+
bias after being updated by an `Optimizer`.
|
| 60 |
+
|
| 61 |
+
Input shape:
|
| 62 |
+
|
| 63 |
+
- If `data_format="channels_last"`:
|
| 64 |
+
5D tensor with shape:
|
| 65 |
+
`(batch_size, spatial_dim1, spatial_dim2, spatial_dim3, channels)`
|
| 66 |
+
- If `data_format="channels_first"`:
|
| 67 |
+
5D tensor with shape:
|
| 68 |
+
`(batch_size, channels, spatial_dim1, spatial_dim2, spatial_dim3)`
|
| 69 |
+
|
| 70 |
+
Output shape:
|
| 71 |
+
|
| 72 |
+
- If `data_format="channels_last"`:
|
| 73 |
+
5D tensor with shape:
|
| 74 |
+
`(batch_size, new_spatial_dim1, new_spatial_dim2, new_spatial_dim3,
|
| 75 |
+
filters)`
|
| 76 |
+
- If `data_format="channels_first"`:
|
| 77 |
+
5D tensor with shape:
|
| 78 |
+
`(batch_size, filters, new_spatial_dim1, new_spatial_dim2,
|
| 79 |
+
new_spatial_dim3)`
|
| 80 |
+
|
| 81 |
+
Returns:
|
| 82 |
+
A 5D tensor representing `activation(conv3d(inputs, kernel) + bias)`.
|
| 83 |
+
|
| 84 |
+
Raises:
|
| 85 |
+
ValueError: when both `strides > 1` and `dilation_rate > 1`.
|
| 86 |
+
|
| 87 |
+
Example:
|
| 88 |
+
|
| 89 |
+
>>> x = np.random.rand(4, 10, 10, 10, 128)
|
| 90 |
+
>>> y = keras.layers.Conv3D(32, 3, activation='relu')(x)
|
| 91 |
+
>>> print(y.shape)
|
| 92 |
+
(4, 8, 8, 8, 32)
|
| 93 |
+
"""
|
| 94 |
+
|
| 95 |
+
def __init__(
|
| 96 |
+
self,
|
| 97 |
+
filters,
|
| 98 |
+
kernel_size,
|
| 99 |
+
strides=(1, 1, 1),
|
| 100 |
+
padding="valid",
|
| 101 |
+
data_format=None,
|
| 102 |
+
dilation_rate=(1, 1, 1),
|
| 103 |
+
groups=1,
|
| 104 |
+
activation=None,
|
| 105 |
+
use_bias=True,
|
| 106 |
+
kernel_initializer="glorot_uniform",
|
| 107 |
+
bias_initializer="zeros",
|
| 108 |
+
kernel_regularizer=None,
|
| 109 |
+
bias_regularizer=None,
|
| 110 |
+
activity_regularizer=None,
|
| 111 |
+
kernel_constraint=None,
|
| 112 |
+
bias_constraint=None,
|
| 113 |
+
**kwargs,
|
| 114 |
+
):
|
| 115 |
+
super().__init__(
|
| 116 |
+
rank=3,
|
| 117 |
+
filters=filters,
|
| 118 |
+
kernel_size=kernel_size,
|
| 119 |
+
strides=strides,
|
| 120 |
+
padding=padding,
|
| 121 |
+
data_format=data_format,
|
| 122 |
+
dilation_rate=dilation_rate,
|
| 123 |
+
groups=groups,
|
| 124 |
+
activation=activation,
|
| 125 |
+
use_bias=use_bias,
|
| 126 |
+
kernel_initializer=kernel_initializer,
|
| 127 |
+
bias_initializer=bias_initializer,
|
| 128 |
+
kernel_regularizer=kernel_regularizer,
|
| 129 |
+
bias_regularizer=bias_regularizer,
|
| 130 |
+
activity_regularizer=activity_regularizer,
|
| 131 |
+
kernel_constraint=kernel_constraint,
|
| 132 |
+
bias_constraint=bias_constraint,
|
| 133 |
+
**kwargs,
|
| 134 |
+
)
|
SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/conv3d_transpose.py
ADDED
|
@@ -0,0 +1,138 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from keras.src.api_export import keras_export
|
| 2 |
+
from keras.src.layers.convolutional.base_conv_transpose import BaseConvTranspose
|
| 3 |
+
|
| 4 |
+
|
| 5 |
+
@keras_export(
|
| 6 |
+
[
|
| 7 |
+
"keras.layers.Conv3DTranspose",
|
| 8 |
+
"keras.layers.Convolution3DTranspose",
|
| 9 |
+
]
|
| 10 |
+
)
|
| 11 |
+
class Conv3DTranspose(BaseConvTranspose):
|
| 12 |
+
"""3D transposed convolution layer.
|
| 13 |
+
|
| 14 |
+
The need for transposed convolutions generally arise from the desire to use
|
| 15 |
+
a transformation going in the opposite direction of a normal convolution,
|
| 16 |
+
i.e., from something that has the shape of the output of some convolution
|
| 17 |
+
to something that has the shape of its input while maintaining a
|
| 18 |
+
connectivity pattern that is compatible with said convolution.
|
| 19 |
+
|
| 20 |
+
Args:
|
| 21 |
+
filters: int, the dimension of the output space (the number of filters
|
| 22 |
+
in the transposed convolution).
|
| 23 |
+
kernel_size: int or tuple/list of 1 integer, specifying the size of the
|
| 24 |
+
transposed convolution window.
|
| 25 |
+
strides: int or tuple/list of 1 integer, specifying the stride length
|
| 26 |
+
of the transposed convolution. `strides > 1` is incompatible with
|
| 27 |
+
`dilation_rate > 1`.
|
| 28 |
+
padding: string, either `"valid"` or `"same"` (case-insensitive).
|
| 29 |
+
`"valid"` means no padding. `"same"` results in padding evenly to
|
| 30 |
+
the left/right or up/down of the input. When `padding="same"` and
|
| 31 |
+
`strides=1`, the output has the same size as the input.
|
| 32 |
+
data_format: string, either `"channels_last"` or `"channels_first"`.
|
| 33 |
+
The ordering of the dimensions in the inputs. `"channels_last"`
|
| 34 |
+
corresponds to inputs with shape
|
| 35 |
+
`(batch_size, spatial_dim1, spatial_dim2, spatial_dim3, channels)`
|
| 36 |
+
while `"channels_first"` corresponds to inputs with shape
|
| 37 |
+
`(batch_size, channels, spatial_dim1, spatial_dim2, spatial_dim3)`.
|
| 38 |
+
It defaults to the `image_data_format` value found in your Keras
|
| 39 |
+
config file at `~/.keras/keras.json`. If you never set it, then it
|
| 40 |
+
will be `"channels_last"`.
|
| 41 |
+
dilation_rate: int or tuple/list of 1 integers, specifying the dilation
|
| 42 |
+
rate to use for dilated transposed convolution.
|
| 43 |
+
activation: Activation function. If `None`, no activation is applied.
|
| 44 |
+
use_bias: bool, if `True`, bias will be added to the output.
|
| 45 |
+
kernel_initializer: Initializer for the convolution kernel. If `None`,
|
| 46 |
+
the default initializer (`"glorot_uniform"`) will be used.
|
| 47 |
+
bias_initializer: Initializer for the bias vector. If `None`, the
|
| 48 |
+
default initializer (`"zeros"`) will be used.
|
| 49 |
+
kernel_regularizer: Optional regularizer for the convolution kernel.
|
| 50 |
+
bias_regularizer: Optional regularizer for the bias vector.
|
| 51 |
+
activity_regularizer: Optional regularizer function for the output.
|
| 52 |
+
kernel_constraint: Optional projection function to be applied to the
|
| 53 |
+
kernel after being updated by an `Optimizer` (e.g. used to implement
|
| 54 |
+
norm constraints or value constraints for layer weights). The
|
| 55 |
+
function must take as input the unprojected variable and must return
|
| 56 |
+
the projected variable (which must have the same shape). Constraints
|
| 57 |
+
are not safe to use when doing asynchronous distributed training.
|
| 58 |
+
bias_constraint: Optional projection function to be applied to the
|
| 59 |
+
bias after being updated by an `Optimizer`.
|
| 60 |
+
|
| 61 |
+
Input shape:
|
| 62 |
+
|
| 63 |
+
- If `data_format="channels_last"`:
|
| 64 |
+
5D tensor with shape:
|
| 65 |
+
`(batch_size, spatial_dim1, spatial_dim2, spatial_dim3, channels)`
|
| 66 |
+
- If `data_format="channels_first"`:
|
| 67 |
+
5D tensor with shape:
|
| 68 |
+
`(batch_size, channels, spatial_dim1, spatial_dim2, spatial_dim3)`
|
| 69 |
+
|
| 70 |
+
Output shape:
|
| 71 |
+
|
| 72 |
+
- If `data_format="channels_last"`:
|
| 73 |
+
5D tensor with shape:
|
| 74 |
+
`(batch_size, new_spatial_dim1, new_spatial_dim2, new_spatial_dim3,
|
| 75 |
+
filters)`
|
| 76 |
+
- If `data_format="channels_first"`:
|
| 77 |
+
5D tensor with shape:
|
| 78 |
+
`(batch_size, filters, new_spatial_dim1, new_spatial_dim2,
|
| 79 |
+
new_spatial_dim3)`
|
| 80 |
+
|
| 81 |
+
Returns:
|
| 82 |
+
A 5D tensor representing `activation(conv3d(inputs, kernel) + bias)`.
|
| 83 |
+
|
| 84 |
+
Raises:
|
| 85 |
+
ValueError: when both `strides > 1` and `dilation_rate > 1`.
|
| 86 |
+
|
| 87 |
+
References:
|
| 88 |
+
- [A guide to convolution arithmetic for deep learning](
|
| 89 |
+
https://arxiv.org/abs/1603.07285v1)
|
| 90 |
+
- [Deconvolutional Networks](
|
| 91 |
+
https://www.matthewzeiler.com/mattzeiler/deconvolutionalnetworks.pdf)
|
| 92 |
+
|
| 93 |
+
Example:
|
| 94 |
+
|
| 95 |
+
>>> x = np.random.rand(4, 10, 8, 12, 128)
|
| 96 |
+
>>> y = keras.layers.Conv3DTranspose(32, 2, 2, activation='relu')(x)
|
| 97 |
+
>>> print(y.shape)
|
| 98 |
+
(4, 20, 16, 24, 32)
|
| 99 |
+
"""
|
| 100 |
+
|
| 101 |
+
def __init__(
|
| 102 |
+
self,
|
| 103 |
+
filters,
|
| 104 |
+
kernel_size,
|
| 105 |
+
strides=(1, 1, 1),
|
| 106 |
+
padding="valid",
|
| 107 |
+
data_format=None,
|
| 108 |
+
dilation_rate=(1, 1, 1),
|
| 109 |
+
activation=None,
|
| 110 |
+
use_bias=True,
|
| 111 |
+
kernel_initializer="glorot_uniform",
|
| 112 |
+
bias_initializer="zeros",
|
| 113 |
+
kernel_regularizer=None,
|
| 114 |
+
bias_regularizer=None,
|
| 115 |
+
activity_regularizer=None,
|
| 116 |
+
kernel_constraint=None,
|
| 117 |
+
bias_constraint=None,
|
| 118 |
+
**kwargs,
|
| 119 |
+
):
|
| 120 |
+
super().__init__(
|
| 121 |
+
rank=3,
|
| 122 |
+
filters=filters,
|
| 123 |
+
kernel_size=kernel_size,
|
| 124 |
+
strides=strides,
|
| 125 |
+
padding=padding,
|
| 126 |
+
data_format=data_format,
|
| 127 |
+
dilation_rate=dilation_rate,
|
| 128 |
+
activation=activation,
|
| 129 |
+
use_bias=use_bias,
|
| 130 |
+
kernel_initializer=kernel_initializer,
|
| 131 |
+
bias_initializer=bias_initializer,
|
| 132 |
+
kernel_regularizer=kernel_regularizer,
|
| 133 |
+
bias_regularizer=bias_regularizer,
|
| 134 |
+
activity_regularizer=activity_regularizer,
|
| 135 |
+
kernel_constraint=kernel_constraint,
|
| 136 |
+
bias_constraint=bias_constraint,
|
| 137 |
+
**kwargs,
|
| 138 |
+
)
|
SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/depthwise_conv1d.py
ADDED
|
@@ -0,0 +1,137 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from keras.src.api_export import keras_export
|
| 2 |
+
from keras.src.layers.convolutional.base_depthwise_conv import BaseDepthwiseConv
|
| 3 |
+
|
| 4 |
+
|
| 5 |
+
@keras_export("keras.layers.DepthwiseConv1D")
|
| 6 |
+
class DepthwiseConv1D(BaseDepthwiseConv):
|
| 7 |
+
"""1D depthwise convolution layer.
|
| 8 |
+
|
| 9 |
+
Depthwise convolution is a type of convolution in which each input channel
|
| 10 |
+
is convolved with a different kernel (called a depthwise kernel). You can
|
| 11 |
+
understand depthwise convolution as the first step in a depthwise separable
|
| 12 |
+
convolution.
|
| 13 |
+
|
| 14 |
+
It is implemented via the following steps:
|
| 15 |
+
|
| 16 |
+
- Split the input into individual channels.
|
| 17 |
+
- Convolve each channel with an individual depthwise kernel with
|
| 18 |
+
`depth_multiplier` output channels.
|
| 19 |
+
- Concatenate the convolved outputs along the channels axis.
|
| 20 |
+
|
| 21 |
+
Unlike a regular 1D convolution, depthwise convolution does not mix
|
| 22 |
+
information across different input channels.
|
| 23 |
+
|
| 24 |
+
The `depth_multiplier` argument determines how many filters are applied to
|
| 25 |
+
one input channel. As such, it controls the amount of output channels that
|
| 26 |
+
are generated per input channel in the depthwise step.
|
| 27 |
+
|
| 28 |
+
Args:
|
| 29 |
+
kernel_size: int or tuple/list of 1 integer, specifying the size of the
|
| 30 |
+
depthwise convolution window.
|
| 31 |
+
strides: int or tuple/list of 1 integer, specifying the stride length
|
| 32 |
+
of the convolution. `strides > 1` is incompatible with
|
| 33 |
+
`dilation_rate > 1`.
|
| 34 |
+
padding: string, either `"valid"` or `"same"` (case-insensitive).
|
| 35 |
+
`"valid"` means no padding. `"same"` results in padding evenly to
|
| 36 |
+
the left/right or up/down of the input. When `padding="same"` and
|
| 37 |
+
`strides=1`, the output has the same size as the input.
|
| 38 |
+
depth_multiplier: The number of depthwise convolution output channels
|
| 39 |
+
for each input channel. The total number of depthwise convolution
|
| 40 |
+
output channels will be equal to `input_channel * depth_multiplier`.
|
| 41 |
+
data_format: string, either `"channels_last"` or `"channels_first"`.
|
| 42 |
+
The ordering of the dimensions in the inputs. `"channels_last"`
|
| 43 |
+
corresponds to inputs with shape `(batch, steps, features)`
|
| 44 |
+
while `"channels_first"` corresponds to inputs with shape
|
| 45 |
+
`(batch, features, steps)`. It defaults to the `image_data_format`
|
| 46 |
+
value found in your Keras config file at `~/.keras/keras.json`.
|
| 47 |
+
If you never set it, then it will be `"channels_last"`.
|
| 48 |
+
dilation_rate: int or tuple/list of 1 integers, specifying the dilation
|
| 49 |
+
rate to use for dilated convolution.
|
| 50 |
+
activation: Activation function. If `None`, no activation is applied.
|
| 51 |
+
use_bias: bool, if `True`, bias will be added to the output.
|
| 52 |
+
depthwise_initializer: Initializer for the convolution kernel.
|
| 53 |
+
If `None`, the default initializer (`"glorot_uniform"`)
|
| 54 |
+
will be used.
|
| 55 |
+
bias_initializer: Initializer for the bias vector. If `None`, the
|
| 56 |
+
default initializer (`"zeros"`) will be used.
|
| 57 |
+
depthwise_regularizer: Optional regularizer for the convolution kernel.
|
| 58 |
+
bias_regularizer: Optional regularizer for the bias vector.
|
| 59 |
+
activity_regularizer: Optional regularizer function for the output.
|
| 60 |
+
depthwise_constraint: Optional projection function to be applied to the
|
| 61 |
+
kernel after being updated by an `Optimizer` (e.g. used to implement
|
| 62 |
+
norm constraints or value constraints for layer weights). The
|
| 63 |
+
function must take as input the unprojected variable and must return
|
| 64 |
+
the projected variable (which must have the same shape). Constraints
|
| 65 |
+
are not safe to use when doing asynchronous distributed training.
|
| 66 |
+
bias_constraint: Optional projection function to be applied to the
|
| 67 |
+
bias after being updated by an `Optimizer`.
|
| 68 |
+
|
| 69 |
+
Input shape:
|
| 70 |
+
|
| 71 |
+
- If `data_format="channels_last"`:
|
| 72 |
+
A 3D tensor with shape: `(batch_shape, steps, channels)`
|
| 73 |
+
- If `data_format="channels_first"`:
|
| 74 |
+
A 3D tensor with shape: `(batch_shape, channels, steps)`
|
| 75 |
+
|
| 76 |
+
Output shape:
|
| 77 |
+
|
| 78 |
+
- If `data_format="channels_last"`:
|
| 79 |
+
A 3D tensor with shape:
|
| 80 |
+
`(batch_shape, new_steps, channels * depth_multiplier)`
|
| 81 |
+
- If `data_format="channels_first"`:
|
| 82 |
+
A 3D tensor with shape:
|
| 83 |
+
`(batch_shape, channels * depth_multiplier, new_steps)`
|
| 84 |
+
|
| 85 |
+
Returns:
|
| 86 |
+
A 3D tensor representing
|
| 87 |
+
`activation(depthwise_conv1d(inputs, kernel) + bias)`.
|
| 88 |
+
|
| 89 |
+
Raises:
|
| 90 |
+
ValueError: when both `strides > 1` and `dilation_rate > 1`.
|
| 91 |
+
|
| 92 |
+
Example:
|
| 93 |
+
|
| 94 |
+
>>> x = np.random.rand(4, 10, 12)
|
| 95 |
+
>>> y = keras.layers.DepthwiseConv1D(3, 3, 2, activation='relu')(x)
|
| 96 |
+
>>> print(y.shape)
|
| 97 |
+
(4, 4, 36)
|
| 98 |
+
"""
|
| 99 |
+
|
| 100 |
+
def __init__(
|
| 101 |
+
self,
|
| 102 |
+
kernel_size,
|
| 103 |
+
strides=1,
|
| 104 |
+
padding="valid",
|
| 105 |
+
depth_multiplier=1,
|
| 106 |
+
data_format=None,
|
| 107 |
+
dilation_rate=1,
|
| 108 |
+
activation=None,
|
| 109 |
+
use_bias=True,
|
| 110 |
+
depthwise_initializer="glorot_uniform",
|
| 111 |
+
bias_initializer="zeros",
|
| 112 |
+
depthwise_regularizer=None,
|
| 113 |
+
bias_regularizer=None,
|
| 114 |
+
activity_regularizer=None,
|
| 115 |
+
depthwise_constraint=None,
|
| 116 |
+
bias_constraint=None,
|
| 117 |
+
**kwargs,
|
| 118 |
+
):
|
| 119 |
+
super().__init__(
|
| 120 |
+
rank=1,
|
| 121 |
+
depth_multiplier=depth_multiplier,
|
| 122 |
+
kernel_size=kernel_size,
|
| 123 |
+
strides=strides,
|
| 124 |
+
padding=padding,
|
| 125 |
+
data_format=data_format,
|
| 126 |
+
dilation_rate=dilation_rate,
|
| 127 |
+
activation=activation,
|
| 128 |
+
use_bias=use_bias,
|
| 129 |
+
depthwise_initializer=depthwise_initializer,
|
| 130 |
+
bias_initializer=bias_initializer,
|
| 131 |
+
depthwise_regularizer=depthwise_regularizer,
|
| 132 |
+
bias_regularizer=bias_regularizer,
|
| 133 |
+
activity_regularizer=activity_regularizer,
|
| 134 |
+
depthwise_constraint=depthwise_constraint,
|
| 135 |
+
bias_constraint=bias_constraint,
|
| 136 |
+
**kwargs,
|
| 137 |
+
)
|
SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/depthwise_conv2d.py
ADDED
|
@@ -0,0 +1,138 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from keras.src.api_export import keras_export
|
| 2 |
+
from keras.src.layers.convolutional.base_depthwise_conv import BaseDepthwiseConv
|
| 3 |
+
|
| 4 |
+
|
| 5 |
+
@keras_export("keras.layers.DepthwiseConv2D")
|
| 6 |
+
class DepthwiseConv2D(BaseDepthwiseConv):
|
| 7 |
+
"""2D depthwise convolution layer.
|
| 8 |
+
|
| 9 |
+
Depthwise convolution is a type of convolution in which each input channel
|
| 10 |
+
is convolved with a different kernel (called a depthwise kernel). You can
|
| 11 |
+
understand depthwise convolution as the first step in a depthwise separable
|
| 12 |
+
convolution.
|
| 13 |
+
|
| 14 |
+
It is implemented via the following steps:
|
| 15 |
+
|
| 16 |
+
- Split the input into individual channels.
|
| 17 |
+
- Convolve each channel with an individual depthwise kernel with
|
| 18 |
+
`depth_multiplier` output channels.
|
| 19 |
+
- Concatenate the convolved outputs along the channels axis.
|
| 20 |
+
|
| 21 |
+
Unlike a regular 2D convolution, depthwise convolution does not mix
|
| 22 |
+
information across different input channels.
|
| 23 |
+
|
| 24 |
+
The `depth_multiplier` argument determines how many filters are applied to
|
| 25 |
+
one input channel. As such, it controls the amount of output channels that
|
| 26 |
+
are generated per input channel in the depthwise step.
|
| 27 |
+
|
| 28 |
+
Args:
|
| 29 |
+
kernel_size: int or tuple/list of 2 integer, specifying the size of the
|
| 30 |
+
depthwise convolution window.
|
| 31 |
+
strides: int or tuple/list of 2 integer, specifying the stride length
|
| 32 |
+
of the depthwise convolution. `strides > 1` is incompatible with
|
| 33 |
+
`dilation_rate > 1`.
|
| 34 |
+
padding: string, either `"valid"` or `"same"` (case-insensitive).
|
| 35 |
+
`"valid"` means no padding. `"same"` results in padding evenly to
|
| 36 |
+
the left/right or up/down of the input. When `padding="same"` and
|
| 37 |
+
`strides=1`, the output has the same size as the input.
|
| 38 |
+
depth_multiplier: The number of depthwise convolution output channels
|
| 39 |
+
for each input channel. The total number of depthwise convolution
|
| 40 |
+
output channels will be equal to `input_channel * depth_multiplier`.
|
| 41 |
+
data_format: string, either `"channels_last"` or `"channels_first"`.
|
| 42 |
+
The ordering of the dimensions in the inputs. `"channels_last"`
|
| 43 |
+
corresponds to inputs with shape `(batch, height, width, channels)`
|
| 44 |
+
while `"channels_first"` corresponds to inputs with shape
|
| 45 |
+
`(batch, channels, height, width)`. It defaults to the
|
| 46 |
+
`image_data_format` value found in your Keras config file
|
| 47 |
+
at `~/.keras/keras.json`.
|
| 48 |
+
If you never set it, then it will be `"channels_last"`.
|
| 49 |
+
dilation_rate: int or tuple/list of 2 integers, specifying the dilation
|
| 50 |
+
rate to use for dilated convolution.
|
| 51 |
+
activation: Activation function. If `None`, no activation is applied.
|
| 52 |
+
use_bias: bool, if `True`, bias will be added to the output.
|
| 53 |
+
depthwise_initializer: Initializer for the convolution kernel.
|
| 54 |
+
If `None`, the default initializer (`"glorot_uniform"`)
|
| 55 |
+
will be used.
|
| 56 |
+
bias_initializer: Initializer for the bias vector. If `None`, the
|
| 57 |
+
default initializer (`"zeros"`) will be used.
|
| 58 |
+
depthwise_regularizer: Optional regularizer for the convolution kernel.
|
| 59 |
+
bias_regularizer: Optional regularizer for the bias vector.
|
| 60 |
+
activity_regularizer: Optional regularizer function for the output.
|
| 61 |
+
depthwise_constraint: Optional projection function to be applied to the
|
| 62 |
+
kernel after being updated by an `Optimizer` (e.g. used to implement
|
| 63 |
+
norm constraints or value constraints for layer weights). The
|
| 64 |
+
function must take as input the unprojected variable and must return
|
| 65 |
+
the projected variable (which must have the same shape). Constraints
|
| 66 |
+
are not safe to use when doing asynchronous distributed training.
|
| 67 |
+
bias_constraint: Optional projection function to be applied to the
|
| 68 |
+
bias after being updated by an `Optimizer`.
|
| 69 |
+
|
| 70 |
+
Input shape:
|
| 71 |
+
|
| 72 |
+
- If `data_format="channels_last"`:
|
| 73 |
+
A 4D tensor with shape: `(batch_size, height, width, channels)`
|
| 74 |
+
- If `data_format="channels_first"`:
|
| 75 |
+
A 4D tensor with shape: `(batch_size, channels, height, width)`
|
| 76 |
+
|
| 77 |
+
Output shape:
|
| 78 |
+
|
| 79 |
+
- If `data_format="channels_last"`:
|
| 80 |
+
A 4D tensor with shape:
|
| 81 |
+
`(batch_size, new_height, new_width, channels * depth_multiplier)`
|
| 82 |
+
- If `data_format="channels_first"`:
|
| 83 |
+
A 4D tensor with shape:
|
| 84 |
+
`(batch_size, channels * depth_multiplier, new_height, new_width)`
|
| 85 |
+
|
| 86 |
+
Returns:
|
| 87 |
+
A 4D tensor representing
|
| 88 |
+
`activation(depthwise_conv2d(inputs, kernel) + bias)`.
|
| 89 |
+
|
| 90 |
+
Raises:
|
| 91 |
+
ValueError: when both `strides > 1` and `dilation_rate > 1`.
|
| 92 |
+
|
| 93 |
+
Example:
|
| 94 |
+
|
| 95 |
+
>>> x = np.random.rand(4, 10, 10, 12)
|
| 96 |
+
>>> y = keras.layers.DepthwiseConv2D(kernel_size=3, activation='relu')(x)
|
| 97 |
+
>>> print(y.shape)
|
| 98 |
+
(4, 8, 8, 12)
|
| 99 |
+
"""
|
| 100 |
+
|
| 101 |
+
def __init__(
|
| 102 |
+
self,
|
| 103 |
+
kernel_size,
|
| 104 |
+
strides=(1, 1),
|
| 105 |
+
padding="valid",
|
| 106 |
+
depth_multiplier=1,
|
| 107 |
+
data_format=None,
|
| 108 |
+
dilation_rate=(1, 1),
|
| 109 |
+
activation=None,
|
| 110 |
+
use_bias=True,
|
| 111 |
+
depthwise_initializer="glorot_uniform",
|
| 112 |
+
bias_initializer="zeros",
|
| 113 |
+
depthwise_regularizer=None,
|
| 114 |
+
bias_regularizer=None,
|
| 115 |
+
activity_regularizer=None,
|
| 116 |
+
depthwise_constraint=None,
|
| 117 |
+
bias_constraint=None,
|
| 118 |
+
**kwargs,
|
| 119 |
+
):
|
| 120 |
+
super().__init__(
|
| 121 |
+
rank=2,
|
| 122 |
+
depth_multiplier=depth_multiplier,
|
| 123 |
+
kernel_size=kernel_size,
|
| 124 |
+
strides=strides,
|
| 125 |
+
padding=padding,
|
| 126 |
+
data_format=data_format,
|
| 127 |
+
dilation_rate=dilation_rate,
|
| 128 |
+
activation=activation,
|
| 129 |
+
use_bias=use_bias,
|
| 130 |
+
depthwise_initializer=depthwise_initializer,
|
| 131 |
+
bias_initializer=bias_initializer,
|
| 132 |
+
depthwise_regularizer=depthwise_regularizer,
|
| 133 |
+
bias_regularizer=bias_regularizer,
|
| 134 |
+
activity_regularizer=activity_regularizer,
|
| 135 |
+
depthwise_constraint=depthwise_constraint,
|
| 136 |
+
bias_constraint=bias_constraint,
|
| 137 |
+
**kwargs,
|
| 138 |
+
)
|
SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/separable_conv1d.py
ADDED
|
@@ -0,0 +1,143 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from keras.src.api_export import keras_export
|
| 2 |
+
from keras.src.layers.convolutional.base_separable_conv import BaseSeparableConv
|
| 3 |
+
|
| 4 |
+
|
| 5 |
+
@keras_export(
|
| 6 |
+
[
|
| 7 |
+
"keras.layers.SeparableConv1D",
|
| 8 |
+
"keras.layers.SeparableConvolution1D",
|
| 9 |
+
]
|
| 10 |
+
)
|
| 11 |
+
class SeparableConv1D(BaseSeparableConv):
|
| 12 |
+
"""1D separable convolution layer.
|
| 13 |
+
|
| 14 |
+
This layer performs a depthwise convolution that acts separately on
|
| 15 |
+
channels, followed by a pointwise convolution that mixes channels.
|
| 16 |
+
If `use_bias` is True and a bias initializer is provided,
|
| 17 |
+
it adds a bias vector to the output. It then optionally applies an
|
| 18 |
+
activation function to produce the final output.
|
| 19 |
+
|
| 20 |
+
Args:
|
| 21 |
+
filters: int, the dimensionality of the output space (i.e. the number
|
| 22 |
+
of filters in the pointwise convolution).
|
| 23 |
+
kernel_size: int or tuple/list of 1 integers, specifying the size of the
|
| 24 |
+
depthwise convolution window.
|
| 25 |
+
strides: int or tuple/list of 1 integers, specifying the stride length
|
| 26 |
+
of the depthwise convolution. If only one int is specified, the same
|
| 27 |
+
stride size will be used for all dimensions. `strides > 1` is
|
| 28 |
+
incompatible with `dilation_rate > 1`.
|
| 29 |
+
padding: string, either `"valid"` or `"same"` (case-insensitive).
|
| 30 |
+
`"valid"` means no padding. `"same"` results in padding evenly to
|
| 31 |
+
the left/right or up/down of the input. When `padding="same"` and
|
| 32 |
+
`strides=1`, the output has the same size as the input.
|
| 33 |
+
data_format: string, either `"channels_last"` or `"channels_first"`.
|
| 34 |
+
The ordering of the dimensions in the inputs. `"channels_last"`
|
| 35 |
+
corresponds to inputs with shape `(batch, steps, features)`
|
| 36 |
+
while `"channels_first"` corresponds to inputs with shape
|
| 37 |
+
`(batch, features, steps)`. It defaults to the `image_data_format`
|
| 38 |
+
value found in your Keras config file at `~/.keras/keras.json`.
|
| 39 |
+
If you never set it, then it will be `"channels_last"`.
|
| 40 |
+
dilation_rate: int or tuple/list of 1 integers, specifying the dilation
|
| 41 |
+
rate to use for dilated convolution. If only one int is specified,
|
| 42 |
+
the same dilation rate will be used for all dimensions.
|
| 43 |
+
depth_multiplier: The number of depthwise convolution output channels
|
| 44 |
+
for each input channel. The total number of depthwise convolution
|
| 45 |
+
output channels will be equal to `input_channel * depth_multiplier`.
|
| 46 |
+
activation: Activation function. If `None`, no activation is applied.
|
| 47 |
+
use_bias: bool, if `True`, bias will be added to the output.
|
| 48 |
+
depthwise_initializer: An initializer for the depthwise convolution
|
| 49 |
+
kernel. If None, then the default initializer (`"glorot_uniform"`)
|
| 50 |
+
will be used.
|
| 51 |
+
pointwise_initializer: An initializer for the pointwise convolution
|
| 52 |
+
kernel. If None, then the default initializer (`"glorot_uniform"`)
|
| 53 |
+
will be used.
|
| 54 |
+
bias_initializer: An initializer for the bias vector. If None, the
|
| 55 |
+
default initializer ('"zeros"') will be used.
|
| 56 |
+
depthwise_regularizer: Optional regularizer for the depthwise
|
| 57 |
+
convolution kernel.
|
| 58 |
+
pointwise_regularizer: Optional regularizer for the pointwise
|
| 59 |
+
convolution kernel.
|
| 60 |
+
bias_regularizer: Optional regularizer for the bias vector.
|
| 61 |
+
activity_regularizer: Optional regularizer function for the output.
|
| 62 |
+
depthwise_constraint: Optional projection function to be applied to the
|
| 63 |
+
depthwise kernel after being updated by an `Optimizer` (e.g. used
|
| 64 |
+
for norm constraints or value constraints for layer weights). The
|
| 65 |
+
function must take as input the unprojected variable and must return
|
| 66 |
+
the projected variable (which must have the same shape).
|
| 67 |
+
pointwise_constraint: Optional projection function to be applied to the
|
| 68 |
+
pointwise kernel after being updated by an `Optimizer`.
|
| 69 |
+
bias_constraint: Optional projection function to be applied to the
|
| 70 |
+
bias after being updated by an `Optimizer`.
|
| 71 |
+
|
| 72 |
+
Input shape:
|
| 73 |
+
|
| 74 |
+
- If `data_format="channels_last"`:
|
| 75 |
+
A 3D tensor with shape: `(batch_shape, steps, channels)`
|
| 76 |
+
- If `data_format="channels_first"`:
|
| 77 |
+
A 3D tensor with shape: `(batch_shape, channels, steps)`
|
| 78 |
+
|
| 79 |
+
Output shape:
|
| 80 |
+
|
| 81 |
+
- If `data_format="channels_last"`:
|
| 82 |
+
A 3D tensor with shape: `(batch_shape, new_steps, filters)`
|
| 83 |
+
- If `data_format="channels_first"`:
|
| 84 |
+
A 3D tensor with shape: `(batch_shape, filters, new_steps)`
|
| 85 |
+
|
| 86 |
+
Returns:
|
| 87 |
+
A 3D tensor representing
|
| 88 |
+
`activation(separable_conv1d(inputs, kernel) + bias)`.
|
| 89 |
+
|
| 90 |
+
Example:
|
| 91 |
+
|
| 92 |
+
>>> x = np.random.rand(4, 10, 12)
|
| 93 |
+
>>> y = keras.layers.SeparableConv1D(3, 4, 3, 2, activation='relu')(x)
|
| 94 |
+
>>> print(y.shape)
|
| 95 |
+
(4, 4, 4)
|
| 96 |
+
"""
|
| 97 |
+
|
| 98 |
+
def __init__(
|
| 99 |
+
self,
|
| 100 |
+
filters,
|
| 101 |
+
kernel_size,
|
| 102 |
+
strides=1,
|
| 103 |
+
padding="valid",
|
| 104 |
+
data_format=None,
|
| 105 |
+
dilation_rate=1,
|
| 106 |
+
depth_multiplier=1,
|
| 107 |
+
activation=None,
|
| 108 |
+
use_bias=True,
|
| 109 |
+
depthwise_initializer="glorot_uniform",
|
| 110 |
+
pointwise_initializer="glorot_uniform",
|
| 111 |
+
bias_initializer="zeros",
|
| 112 |
+
depthwise_regularizer=None,
|
| 113 |
+
pointwise_regularizer=None,
|
| 114 |
+
bias_regularizer=None,
|
| 115 |
+
activity_regularizer=None,
|
| 116 |
+
depthwise_constraint=None,
|
| 117 |
+
pointwise_constraint=None,
|
| 118 |
+
bias_constraint=None,
|
| 119 |
+
**kwargs,
|
| 120 |
+
):
|
| 121 |
+
super().__init__(
|
| 122 |
+
rank=1,
|
| 123 |
+
depth_multiplier=depth_multiplier,
|
| 124 |
+
filters=filters,
|
| 125 |
+
kernel_size=kernel_size,
|
| 126 |
+
strides=strides,
|
| 127 |
+
padding=padding,
|
| 128 |
+
data_format=data_format,
|
| 129 |
+
dilation_rate=dilation_rate,
|
| 130 |
+
activation=activation,
|
| 131 |
+
use_bias=use_bias,
|
| 132 |
+
depthwise_initializer=depthwise_initializer,
|
| 133 |
+
pointwise_initializer=pointwise_initializer,
|
| 134 |
+
bias_initializer=bias_initializer,
|
| 135 |
+
depthwise_regularizer=depthwise_regularizer,
|
| 136 |
+
pointwise_regularizer=pointwise_regularizer,
|
| 137 |
+
bias_regularizer=bias_regularizer,
|
| 138 |
+
activity_regularizer=activity_regularizer,
|
| 139 |
+
depthwise_constraint=depthwise_constraint,
|
| 140 |
+
pointwise_constraint=pointwise_constraint,
|
| 141 |
+
bias_constraint=bias_constraint,
|
| 142 |
+
**kwargs,
|
| 143 |
+
)
|
SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/separable_conv2d.py
ADDED
|
@@ -0,0 +1,144 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from keras.src.api_export import keras_export
|
| 2 |
+
from keras.src.layers.convolutional.base_separable_conv import BaseSeparableConv
|
| 3 |
+
|
| 4 |
+
|
| 5 |
+
@keras_export(
|
| 6 |
+
[
|
| 7 |
+
"keras.layers.SeparableConv2D",
|
| 8 |
+
"keras.layers.SeparableConvolution2D",
|
| 9 |
+
]
|
| 10 |
+
)
|
| 11 |
+
class SeparableConv2D(BaseSeparableConv):
|
| 12 |
+
"""2D separable convolution layer.
|
| 13 |
+
|
| 14 |
+
This layer performs a depthwise convolution that acts separately on
|
| 15 |
+
channels, followed by a pointwise convolution that mixes channels.
|
| 16 |
+
If `use_bias` is True and a bias initializer is provided,
|
| 17 |
+
it adds a bias vector to the output. It then optionally applies an
|
| 18 |
+
activation function to produce the final output.
|
| 19 |
+
|
| 20 |
+
Args:
|
| 21 |
+
filters: int, the dimensionality of the output space (i.e. the number
|
| 22 |
+
of filters in the pointwise convolution).
|
| 23 |
+
kernel_size: int or tuple/list of 2 integers, specifying the size of the
|
| 24 |
+
depthwise convolution window.
|
| 25 |
+
strides: int or tuple/list of 2 integers, specifying the stride length
|
| 26 |
+
of the depthwise convolution. If only one int is specified, the same
|
| 27 |
+
stride size will be used for all dimensions. `strides > 1` is
|
| 28 |
+
incompatible with `dilation_rate > 1`.
|
| 29 |
+
padding: string, either `"valid"` or `"same"` (case-insensitive).
|
| 30 |
+
`"valid"` means no padding. `"same"` results in padding evenly to
|
| 31 |
+
the left/right or up/down of the input. When `padding="same"` and
|
| 32 |
+
`strides=1`, the output has the same size as the input.
|
| 33 |
+
data_format: string, either `"channels_last"` or `"channels_first"`.
|
| 34 |
+
The ordering of the dimensions in the inputs. `"channels_last"`
|
| 35 |
+
corresponds to inputs with shape `(batch, height, width, channels)`
|
| 36 |
+
while `"channels_first"` corresponds to inputs with shape
|
| 37 |
+
`(batch, channels, height, width)`. It defaults to the
|
| 38 |
+
`image_data_format` value found in your Keras config file
|
| 39 |
+
at `~/.keras/keras.json`.
|
| 40 |
+
If you never set it, then it will be `"channels_last"`.
|
| 41 |
+
dilation_rate: int or tuple/list of 2 integers, specifying the dilation
|
| 42 |
+
rate to use for dilated convolution. If only one int is specified,
|
| 43 |
+
the same dilation rate will be used for all dimensions.
|
| 44 |
+
depth_multiplier: The number of depthwise convolution output channels
|
| 45 |
+
for each input channel. The total number of depthwise convolution
|
| 46 |
+
output channels will be equal to `input_channel * depth_multiplier`.
|
| 47 |
+
activation: Activation function. If `None`, no activation is applied.
|
| 48 |
+
use_bias: bool, if `True`, bias will be added to the output.
|
| 49 |
+
depthwise_initializer: An initializer for the depthwise convolution
|
| 50 |
+
kernel. If None, then the default initializer (`"glorot_uniform"`)
|
| 51 |
+
will be used.
|
| 52 |
+
pointwise_initializer: An initializer for the pointwise convolution
|
| 53 |
+
kernel. If None, then the default initializer (`"glorot_uniform"`)
|
| 54 |
+
will be used.
|
| 55 |
+
bias_initializer: An initializer for the bias vector. If None, the
|
| 56 |
+
default initializer ('"zeros"') will be used.
|
| 57 |
+
depthwise_regularizer: Optional regularizer for the depthwise
|
| 58 |
+
convolution kernel.
|
| 59 |
+
pointwise_regularizer: Optional regularizer for the pointwise
|
| 60 |
+
convolution kernel.
|
| 61 |
+
bias_regularizer: Optional regularizer for the bias vector.
|
| 62 |
+
activity_regularizer: Optional regularizer function for the output.
|
| 63 |
+
depthwise_constraint: Optional projection function to be applied to the
|
| 64 |
+
depthwise kernel after being updated by an `Optimizer` (e.g. used
|
| 65 |
+
for norm constraints or value constraints for layer weights). The
|
| 66 |
+
function must take as input the unprojected variable and must return
|
| 67 |
+
the projected variable (which must have the same shape).
|
| 68 |
+
pointwise_constraint: Optional projection function to be applied to the
|
| 69 |
+
pointwise kernel after being updated by an `Optimizer`.
|
| 70 |
+
bias_constraint: Optional projection function to be applied to the
|
| 71 |
+
bias after being updated by an `Optimizer`.
|
| 72 |
+
|
| 73 |
+
Input shape:
|
| 74 |
+
|
| 75 |
+
- If `data_format="channels_last"`:
|
| 76 |
+
A 4D tensor with shape: `(batch_size, height, width, channels)`
|
| 77 |
+
- If `data_format="channels_first"`:
|
| 78 |
+
A 4D tensor with shape: `(batch_size, channels, height, width)`
|
| 79 |
+
|
| 80 |
+
Output shape:
|
| 81 |
+
|
| 82 |
+
- If `data_format="channels_last"`:
|
| 83 |
+
A 4D tensor with shape: `(batch_size, new_height, new_width, filters)`
|
| 84 |
+
- If `data_format="channels_first"`:
|
| 85 |
+
A 4D tensor with shape: `(batch_size, filters, new_height, new_width)`
|
| 86 |
+
|
| 87 |
+
Returns:
|
| 88 |
+
A 4D tensor representing
|
| 89 |
+
`activation(separable_conv2d(inputs, kernel) + bias)`.
|
| 90 |
+
|
| 91 |
+
Example:
|
| 92 |
+
|
| 93 |
+
>>> x = np.random.rand(4, 10, 10, 12)
|
| 94 |
+
>>> y = keras.layers.SeparableConv2D(3, 4, 3, 2, activation='relu')(x)
|
| 95 |
+
>>> print(y.shape)
|
| 96 |
+
(4, 4, 4, 4)
|
| 97 |
+
"""
|
| 98 |
+
|
| 99 |
+
def __init__(
|
| 100 |
+
self,
|
| 101 |
+
filters,
|
| 102 |
+
kernel_size,
|
| 103 |
+
strides=(1, 1),
|
| 104 |
+
padding="valid",
|
| 105 |
+
data_format=None,
|
| 106 |
+
dilation_rate=(1, 1),
|
| 107 |
+
depth_multiplier=1,
|
| 108 |
+
activation=None,
|
| 109 |
+
use_bias=True,
|
| 110 |
+
depthwise_initializer="glorot_uniform",
|
| 111 |
+
pointwise_initializer="glorot_uniform",
|
| 112 |
+
bias_initializer="zeros",
|
| 113 |
+
depthwise_regularizer=None,
|
| 114 |
+
pointwise_regularizer=None,
|
| 115 |
+
bias_regularizer=None,
|
| 116 |
+
activity_regularizer=None,
|
| 117 |
+
depthwise_constraint=None,
|
| 118 |
+
pointwise_constraint=None,
|
| 119 |
+
bias_constraint=None,
|
| 120 |
+
**kwargs,
|
| 121 |
+
):
|
| 122 |
+
super().__init__(
|
| 123 |
+
rank=2,
|
| 124 |
+
depth_multiplier=depth_multiplier,
|
| 125 |
+
filters=filters,
|
| 126 |
+
kernel_size=kernel_size,
|
| 127 |
+
strides=strides,
|
| 128 |
+
padding=padding,
|
| 129 |
+
data_format=data_format,
|
| 130 |
+
dilation_rate=dilation_rate,
|
| 131 |
+
activation=activation,
|
| 132 |
+
use_bias=use_bias,
|
| 133 |
+
depthwise_initializer=depthwise_initializer,
|
| 134 |
+
pointwise_initializer=pointwise_initializer,
|
| 135 |
+
bias_initializer=bias_initializer,
|
| 136 |
+
depthwise_regularizer=depthwise_regularizer,
|
| 137 |
+
pointwise_regularizer=pointwise_regularizer,
|
| 138 |
+
bias_regularizer=bias_regularizer,
|
| 139 |
+
activity_regularizer=activity_regularizer,
|
| 140 |
+
depthwise_constraint=depthwise_constraint,
|
| 141 |
+
pointwise_constraint=pointwise_constraint,
|
| 142 |
+
bias_constraint=bias_constraint,
|
| 143 |
+
**kwargs,
|
| 144 |
+
)
|
SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/core/__init__.py
ADDED
|
File without changes
|
SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/core/__pycache__/__init__.cpython-310.pyc
ADDED
|
Binary file (197 Bytes). View file
|
|
|
SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/core/__pycache__/dense.cpython-310.pyc
ADDED
|
Binary file (15.3 kB). View file
|
|
|