AIDUDE0541 commited on
Commit
26875e5
·
verified ·
1 Parent(s): a304ccf

Add files using upload-large-folder tool

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/activations/__init__.py +5 -0
  2. SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/activations/activation.py +40 -0
  3. SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/activations/elu.py +32 -0
  4. SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/activations/leaky_relu.py +67 -0
  5. SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/activations/prelu.py +99 -0
  6. SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/activations/relu.py +86 -0
  7. SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/activations/softmax.py +76 -0
  8. SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/attention/__init__.py +0 -0
  9. SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/attention/__pycache__/__init__.cpython-310.pyc +0 -0
  10. SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/attention/__pycache__/additive_attention.cpython-310.pyc +0 -0
  11. SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/attention/__pycache__/attention.cpython-310.pyc +0 -0
  12. SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/attention/__pycache__/grouped_query_attention.cpython-310.pyc +0 -0
  13. SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/attention/__pycache__/multi_head_attention.cpython-310.pyc +0 -0
  14. SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/attention/additive_attention.py +103 -0
  15. SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/attention/attention.py +330 -0
  16. SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/attention/grouped_query_attention.py +504 -0
  17. SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/attention/multi_head_attention.py +827 -0
  18. SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/__init__.py +0 -0
  19. SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/__pycache__/__init__.cpython-310.pyc +0 -0
  20. SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/__pycache__/base_conv.cpython-310.pyc +0 -0
  21. SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/__pycache__/base_conv_transpose.cpython-310.pyc +0 -0
  22. SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/__pycache__/base_depthwise_conv.cpython-310.pyc +0 -0
  23. SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/__pycache__/base_separable_conv.cpython-310.pyc +0 -0
  24. SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/__pycache__/conv1d.cpython-310.pyc +0 -0
  25. SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/__pycache__/conv1d_transpose.cpython-310.pyc +0 -0
  26. SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/__pycache__/conv2d.cpython-310.pyc +0 -0
  27. SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/__pycache__/conv2d_transpose.cpython-310.pyc +0 -0
  28. SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/__pycache__/conv3d.cpython-310.pyc +0 -0
  29. SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/__pycache__/conv3d_transpose.cpython-310.pyc +0 -0
  30. SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/__pycache__/depthwise_conv1d.cpython-310.pyc +0 -0
  31. SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/__pycache__/depthwise_conv2d.cpython-310.pyc +0 -0
  32. SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/__pycache__/separable_conv1d.cpython-310.pyc +0 -0
  33. SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/__pycache__/separable_conv2d.cpython-310.pyc +0 -0
  34. SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/base_conv.py +401 -0
  35. SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/base_conv_transpose.py +259 -0
  36. SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/base_depthwise_conv.py +274 -0
  37. SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/base_separable_conv.py +295 -0
  38. SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/conv1d.py +170 -0
  39. SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/conv1d_transpose.py +131 -0
  40. SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/conv2d.py +128 -0
  41. SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/conv2d_transpose.py +133 -0
  42. SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/conv3d.py +134 -0
  43. SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/conv3d_transpose.py +138 -0
  44. SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/depthwise_conv1d.py +137 -0
  45. SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/depthwise_conv2d.py +138 -0
  46. SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/separable_conv1d.py +143 -0
  47. SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/separable_conv2d.py +144 -0
  48. SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/core/__init__.py +0 -0
  49. SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/core/__pycache__/__init__.cpython-310.pyc +0 -0
  50. SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/core/__pycache__/dense.cpython-310.pyc +0 -0
SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/activations/__init__.py ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ from keras.src.layers.activations.elu import ELU
2
+ from keras.src.layers.activations.leaky_relu import LeakyReLU
3
+ from keras.src.layers.activations.prelu import PReLU
4
+ from keras.src.layers.activations.relu import ReLU
5
+ from keras.src.layers.activations.softmax import Softmax
SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/activations/activation.py ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from keras.src import activations
2
+ from keras.src.api_export import keras_export
3
+ from keras.src.layers.layer import Layer
4
+
5
+
6
+ @keras_export("keras.layers.Activation")
7
+ class Activation(Layer):
8
+ """Applies an activation function to an output.
9
+
10
+ Args:
11
+ activation: Activation function. It could be a callable, or the name of
12
+ an activation from the `keras.activations` namespace.
13
+ **kwargs: Base layer keyword arguments, such as `name` and `dtype`.
14
+
15
+ Example:
16
+
17
+ >>> layer = keras.layers.Activation('relu')
18
+ >>> layer(np.array([-3.0, -1.0, 0.0, 2.0]))
19
+ [0.0, 0.0, 0.0, 2.0]
20
+ >>> layer = keras.layers.Activation(keras.activations.relu)
21
+ >>> layer(np.array([-3.0, -1.0, 0.0, 2.0]))
22
+ [0.0, 0.0, 0.0, 2.0]
23
+ """
24
+
25
+ def __init__(self, activation, **kwargs):
26
+ super().__init__(**kwargs)
27
+ self.supports_masking = True
28
+ self.activation = activations.get(activation)
29
+ self.built = True
30
+
31
+ def call(self, inputs):
32
+ return self.activation(inputs)
33
+
34
+ def compute_output_shape(self, input_shape):
35
+ return input_shape
36
+
37
+ def get_config(self):
38
+ config = {"activation": activations.serialize(self.activation)}
39
+ base_config = super().get_config()
40
+ return {**base_config, **config}
SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/activations/elu.py ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from keras.src import activations
2
+ from keras.src.api_export import keras_export
3
+ from keras.src.layers.layer import Layer
4
+
5
+
6
+ @keras_export("keras.layers.ELU")
7
+ class ELU(Layer):
8
+ """Applies an Exponential Linear Unit function to an output.
9
+
10
+ Formula:
11
+
12
+ ```
13
+ f(x) = alpha * (exp(x) - 1.) for x < 0
14
+ f(x) = x for x >= 0
15
+ ```
16
+
17
+ Args:
18
+ alpha: float, slope of negative section. Defaults to `1.0`.
19
+ **kwargs: Base layer keyword arguments, such as `name` and `dtype`.
20
+ """
21
+
22
+ def __init__(self, alpha=1.0, **kwargs):
23
+ super().__init__(**kwargs)
24
+ self.alpha = alpha
25
+ self.supports_masking = True
26
+ self.built = True
27
+
28
+ def call(self, inputs):
29
+ return activations.elu(inputs, alpha=self.alpha)
30
+
31
+ def compute_output_shape(self, input_shape):
32
+ return input_shape
SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/activations/leaky_relu.py ADDED
@@ -0,0 +1,67 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import warnings
2
+
3
+ from keras.src import activations
4
+ from keras.src.api_export import keras_export
5
+ from keras.src.layers.layer import Layer
6
+
7
+
8
+ @keras_export("keras.layers.LeakyReLU")
9
+ class LeakyReLU(Layer):
10
+ """Leaky version of a Rectified Linear Unit activation layer.
11
+
12
+ This layer allows a small gradient when the unit is not active.
13
+
14
+ Formula:
15
+
16
+ ``` python
17
+ f(x) = alpha * x if x < 0
18
+ f(x) = x if x >= 0
19
+ ```
20
+
21
+ Example:
22
+
23
+ ``` python
24
+ leaky_relu_layer = LeakyReLU(negative_slope=0.5)
25
+ input = np.array([-10, -5, 0.0, 5, 10])
26
+ result = leaky_relu_layer(input)
27
+ # result = [-5. , -2.5, 0. , 5. , 10.]
28
+ ```
29
+
30
+ Args:
31
+ negative_slope: Float >= 0.0. Negative slope coefficient.
32
+ Defaults to `0.3`.
33
+ **kwargs: Base layer keyword arguments, such as
34
+ `name` and `dtype`.
35
+
36
+ """
37
+
38
+ def __init__(self, negative_slope=0.3, **kwargs):
39
+ if "alpha" in kwargs:
40
+ negative_slope = kwargs.pop("alpha")
41
+ warnings.warn(
42
+ "Argument `alpha` is deprecated. "
43
+ "Use `negative_slope` instead."
44
+ )
45
+ super().__init__(**kwargs)
46
+ if negative_slope is None or negative_slope < 0:
47
+ raise ValueError(
48
+ "The negative_slope value of a Leaky ReLU layer "
49
+ "cannot be None or negative value. Expected a float."
50
+ f" Received: negative_slope={negative_slope}"
51
+ )
52
+ self.negative_slope = negative_slope
53
+ self.supports_masking = True
54
+ self.built = True
55
+
56
+ def call(self, inputs):
57
+ return activations.leaky_relu(
58
+ inputs, negative_slope=self.negative_slope
59
+ )
60
+
61
+ def get_config(self):
62
+ config = super().get_config()
63
+ config.update({"negative_slope": self.negative_slope})
64
+ return config
65
+
66
+ def compute_output_shape(self, input_shape):
67
+ return input_shape
SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/activations/prelu.py ADDED
@@ -0,0 +1,99 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from keras.src import activations
2
+ from keras.src import constraints
3
+ from keras.src import initializers
4
+ from keras.src import regularizers
5
+ from keras.src.api_export import keras_export
6
+ from keras.src.layers.input_spec import InputSpec
7
+ from keras.src.layers.layer import Layer
8
+
9
+
10
+ @keras_export("keras.layers.PReLU")
11
+ class PReLU(Layer):
12
+ """Parametric Rectified Linear Unit activation layer.
13
+
14
+ Formula:
15
+ ``` python
16
+ f(x) = alpha * x for x < 0
17
+ f(x) = x for x >= 0
18
+ ```
19
+ where `alpha` is a learned array with the same shape as x.
20
+
21
+ Args:
22
+ alpha_initializer: Initializer function for the weights.
23
+ alpha_regularizer: Regularizer for the weights.
24
+ alpha_constraint: Constraint for the weights.
25
+ shared_axes: The axes along which to share learnable parameters for the
26
+ activation function. For example, if the incoming feature maps are
27
+ from a 2D convolution with output shape
28
+ `(batch, height, width, channels)`, and you wish to share parameters
29
+ across space so that each filter only has one set of parameters,
30
+ set `shared_axes=[1, 2]`.
31
+ **kwargs: Base layer keyword arguments, such as `name` and `dtype`.
32
+ """
33
+
34
+ def __init__(
35
+ self,
36
+ alpha_initializer="Zeros",
37
+ alpha_regularizer=None,
38
+ alpha_constraint=None,
39
+ shared_axes=None,
40
+ **kwargs,
41
+ ):
42
+ super().__init__(**kwargs)
43
+ self.supports_masking = True
44
+ self.alpha_initializer = initializers.get(alpha_initializer)
45
+ self.alpha_regularizer = regularizers.get(alpha_regularizer)
46
+ self.alpha_constraint = constraints.get(alpha_constraint)
47
+ if shared_axes is None:
48
+ self.shared_axes = None
49
+ elif not isinstance(shared_axes, (list, tuple)):
50
+ self.shared_axes = [shared_axes]
51
+ else:
52
+ self.shared_axes = list(shared_axes)
53
+
54
+ def build(self, input_shape):
55
+ param_shape = list(input_shape[1:])
56
+ if self.shared_axes is not None:
57
+ for i in self.shared_axes:
58
+ param_shape[i - 1] = 1
59
+ self.alpha = self.add_weight(
60
+ shape=param_shape,
61
+ name="alpha",
62
+ initializer=self.alpha_initializer,
63
+ regularizer=self.alpha_regularizer,
64
+ constraint=self.alpha_constraint,
65
+ )
66
+ # Set input spec
67
+ axes = {}
68
+ if self.shared_axes:
69
+ for i in range(1, len(input_shape)):
70
+ if i not in self.shared_axes:
71
+ axes[i] = input_shape[i]
72
+ self.input_spec = InputSpec(ndim=len(input_shape), axes=axes)
73
+ self.built = True
74
+
75
+ def call(self, inputs):
76
+ pos = activations.relu(inputs)
77
+ neg = -self.alpha * activations.relu(-inputs)
78
+ return pos + neg
79
+
80
+ def get_config(self):
81
+ config = super().get_config()
82
+ config.update(
83
+ {
84
+ "alpha_initializer": initializers.serialize(
85
+ self.alpha_initializer
86
+ ),
87
+ "alpha_regularizer": regularizers.serialize(
88
+ self.alpha_regularizer
89
+ ),
90
+ "alpha_constraint": constraints.serialize(
91
+ self.alpha_constraint
92
+ ),
93
+ "shared_axes": self.shared_axes,
94
+ }
95
+ )
96
+ return config
97
+
98
+ def compute_output_shape(self, input_shape):
99
+ return input_shape
SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/activations/relu.py ADDED
@@ -0,0 +1,86 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from keras.src import activations
2
+ from keras.src.api_export import keras_export
3
+ from keras.src.layers.layer import Layer
4
+
5
+
6
+ @keras_export("keras.layers.ReLU")
7
+ class ReLU(Layer):
8
+ """Rectified Linear Unit activation function layer.
9
+
10
+ Formula:
11
+ ``` python
12
+ f(x) = max(x,0)
13
+ f(x) = max_value if x >= max_value
14
+ f(x) = x if threshold <= x < max_value
15
+ f(x) = negative_slope * (x - threshold) otherwise
16
+ ```
17
+
18
+ Example:
19
+ ``` python
20
+ relu_layer = keras.layers.ReLU(
21
+ max_value=10,
22
+ negative_slope=0.5,
23
+ threshold=0,
24
+ )
25
+ input = np.array([-10, -5, 0.0, 5, 10])
26
+ result = relu_layer(input)
27
+ # result = [-5. , -2.5, 0. , 5. , 10.]
28
+ ```
29
+
30
+ Args:
31
+ max_value: Float >= 0. Maximum activation value. None means unlimited.
32
+ Defaults to `None`.
33
+ negative_slope: Float >= 0. Negative slope coefficient.
34
+ Defaults to `0.0`.
35
+ threshold: Float >= 0. Threshold value for thresholded activation.
36
+ Defaults to `0.0`.
37
+ **kwargs: Base layer keyword arguments, such as `name` and `dtype`.
38
+ """
39
+
40
+ def __init__(
41
+ self, max_value=None, negative_slope=0.0, threshold=0.0, **kwargs
42
+ ):
43
+ super().__init__(**kwargs)
44
+ if max_value is not None and max_value < 0.0:
45
+ raise ValueError(
46
+ "max_value of a ReLU layer cannot be a negative "
47
+ f"value. Received: max_value={max_value}"
48
+ )
49
+ if negative_slope is None or negative_slope < 0.0:
50
+ raise ValueError(
51
+ "negative_slope of a ReLU layer cannot be a negative "
52
+ f"value. Received: negative_slope={negative_slope}"
53
+ )
54
+ if threshold is None or threshold < 0.0:
55
+ raise ValueError(
56
+ "threshold of a ReLU layer cannot be a negative "
57
+ f"value. Received: threshold={threshold}"
58
+ )
59
+
60
+ self.max_value = max_value
61
+ self.negative_slope = negative_slope
62
+ self.threshold = threshold
63
+ self.supports_masking = True
64
+ self.built = True
65
+
66
+ def call(self, inputs):
67
+ return activations.relu(
68
+ inputs,
69
+ negative_slope=self.negative_slope,
70
+ max_value=self.max_value,
71
+ threshold=self.threshold,
72
+ )
73
+
74
+ def get_config(self):
75
+ config = super().get_config()
76
+ config.update(
77
+ {
78
+ "max_value": self.max_value,
79
+ "negative_slope": self.negative_slope,
80
+ "threshold": self.threshold,
81
+ }
82
+ )
83
+ return config
84
+
85
+ def compute_output_shape(self, input_shape):
86
+ return input_shape
SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/activations/softmax.py ADDED
@@ -0,0 +1,76 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from keras.src import activations
2
+ from keras.src import backend
3
+ from keras.src.api_export import keras_export
4
+ from keras.src.layers.layer import Layer
5
+
6
+
7
+ def _large_negative_number(dtype):
8
+ """Return a Large negative number based on dtype."""
9
+ if backend.standardize_dtype(dtype) == "float16":
10
+ return -3e4
11
+ return -1e9
12
+
13
+
14
+ @keras_export("keras.layers.Softmax")
15
+ class Softmax(Layer):
16
+ """Softmax activation layer.
17
+
18
+ Formula:
19
+ ``` python
20
+ exp_x = exp(x - max(x))
21
+ f(x) = exp_x / sum(exp_x)
22
+ ```
23
+
24
+ Example:
25
+ >>> softmax_layer = keras.layers.Softmax()
26
+ >>> input = np.array([1.0, 2.0, 1.0])
27
+ >>> result = softmax_layer(input)
28
+ >>> result
29
+ [0.21194157, 0.5761169, 0.21194157]
30
+
31
+
32
+ Args:
33
+ axis: Integer, or list of Integers, axis along which the softmax
34
+ normalization is applied.
35
+ **kwargs: Base layer keyword arguments, such as `name` and `dtype`.
36
+
37
+ Call arguments:
38
+ inputs: The inputs (logits) to the softmax layer.
39
+ mask: A boolean mask of the same shape as `inputs`. The mask
40
+ specifies 1 to keep and 0 to mask. Defaults to `None`.
41
+
42
+ Returns:
43
+ Softmaxed output with the same shape as `inputs`.
44
+ """
45
+
46
+ def __init__(self, axis=-1, **kwargs):
47
+ super().__init__(**kwargs)
48
+ self.axis = axis
49
+ self.supports_masking = True
50
+ self.built = True
51
+
52
+ def call(self, inputs, mask=None):
53
+ if mask is not None:
54
+ adder = (
55
+ 1.0 - backend.cast(mask, inputs.dtype)
56
+ ) * _large_negative_number(inputs.dtype)
57
+ inputs += adder
58
+ if isinstance(self.axis, (tuple, list)):
59
+ if len(self.axis) > 1:
60
+ return backend.numpy.exp(
61
+ inputs
62
+ - backend.math.logsumexp(
63
+ inputs, axis=self.axis, keepdims=True
64
+ )
65
+ )
66
+ else:
67
+ return activations.softmax(inputs, axis=self.axis[0])
68
+ return activations.softmax(inputs, axis=self.axis)
69
+
70
+ def get_config(self):
71
+ config = super().get_config()
72
+ config.update({"axis": self.axis})
73
+ return config
74
+
75
+ def compute_output_shape(self, input_shape):
76
+ return input_shape
SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/attention/__init__.py ADDED
File without changes
SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/attention/__pycache__/__init__.cpython-310.pyc ADDED
Binary file (202 Bytes). View file
 
SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/attention/__pycache__/additive_attention.cpython-310.pyc ADDED
Binary file (4.68 kB). View file
 
SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/attention/__pycache__/attention.cpython-310.pyc ADDED
Binary file (10.5 kB). View file
 
SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/attention/__pycache__/grouped_query_attention.cpython-310.pyc ADDED
Binary file (14 kB). View file
 
SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/attention/__pycache__/multi_head_attention.cpython-310.pyc ADDED
Binary file (22.5 kB). View file
 
SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/attention/additive_attention.py ADDED
@@ -0,0 +1,103 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from keras.src import ops
2
+ from keras.src.api_export import keras_export
3
+ from keras.src.layers.attention.attention import Attention
4
+
5
+
6
+ @keras_export("keras.layers.AdditiveAttention")
7
+ class AdditiveAttention(Attention):
8
+ """Additive attention layer, a.k.a. Bahdanau-style attention.
9
+
10
+ Inputs are a list with 2 or 3 elements:
11
+ 1. A `query` tensor of shape `(batch_size, Tq, dim)`.
12
+ 2. A `value` tensor of shape `(batch_size, Tv, dim)`.
13
+ 3. A optional `key` tensor of shape `(batch_size, Tv, dim)`. If none
14
+ supplied, `value` will be used as `key`.
15
+
16
+ The calculation follows the steps:
17
+ 1. Calculate attention scores using `query` and `key` with shape
18
+ `(batch_size, Tq, Tv)` as a non-linear sum
19
+ `scores = reduce_sum(tanh(query + key), axis=-1)`.
20
+ 2. Use scores to calculate a softmax distribution with shape
21
+ `(batch_size, Tq, Tv)`.
22
+ 3. Use the softmax distribution to create a linear combination of `value`
23
+ with shape `(batch_size, Tq, dim)`.
24
+
25
+ Args:
26
+ use_scale: If `True`, will create a scalar variable to scale the
27
+ attention scores.
28
+ dropout: Float between 0 and 1. Fraction of the units to drop for the
29
+ attention scores. Defaults to `0.0`.
30
+
31
+ Call arguments:
32
+ inputs: List of the following tensors:
33
+ - `query`: Query tensor of shape `(batch_size, Tq, dim)`.
34
+ - `value`: Value tensor of shape `(batch_size, Tv, dim)`.
35
+ - `key`: Optional key tensor of shape `(batch_size, Tv, dim)`. If
36
+ not given, will use `value` for both `key` and `value`, which is
37
+ the most common case.
38
+ mask: List of the following tensors:
39
+ - `query_mask`: A boolean mask tensor of shape `(batch_size, Tq)`.
40
+ If given, the output will be zero at the positions where
41
+ `mask==False`.
42
+ - `value_mask`: A boolean mask tensor of shape `(batch_size, Tv)`.
43
+ If given, will apply the mask such that values at positions
44
+ where `mask==False` do not contribute to the result.
45
+ return_attention_scores: bool, it `True`, returns the attention scores
46
+ (after masking and softmax) as an additional output argument.
47
+ training: Python boolean indicating whether the layer should behave in
48
+ training mode (adding dropout) or in inference mode (no dropout).
49
+ use_causal_mask: Boolean. Set to `True` for decoder self-attention. Adds
50
+ a mask such that position `i` cannot attend to positions `j > i`.
51
+ This prevents the flow of information from the future towards the
52
+ past. Defaults to `False`.
53
+
54
+ Output:
55
+ Attention outputs of shape `(batch_size, Tq, dim)`.
56
+ (Optional) Attention scores after masking and softmax with shape
57
+ `(batch_size, Tq, Tv)`.
58
+ """
59
+
60
+ def __init__(
61
+ self,
62
+ use_scale=True,
63
+ dropout=0.0,
64
+ **kwargs,
65
+ ):
66
+ super().__init__(use_scale=use_scale, dropout=dropout, **kwargs)
67
+
68
+ def build(self, input_shape):
69
+ self._validate_inputs(input_shape)
70
+ dim = input_shape[0][-1]
71
+ self.scale = None
72
+ if self.use_scale:
73
+ self.scale = self.add_weight(
74
+ name="scale",
75
+ shape=[dim],
76
+ initializer="glorot_uniform",
77
+ dtype=self.dtype,
78
+ trainable=True,
79
+ )
80
+ self.built = True
81
+
82
+ def _calculate_scores(self, query, key):
83
+ """Calculates attention scores as a nonlinear sum of query and key.
84
+
85
+ Args:
86
+ query: Query tensor of shape `(batch_size, Tq, dim)`.
87
+ key: Key tensor of shape `(batch_size, Tv, dim)`.
88
+
89
+ Returns:
90
+ Tensor of shape `(batch_size, Tq, Tv)`.
91
+ """
92
+ # Reshape tensors to enable broadcasting.
93
+ # Reshape into [batch_size, Tq, 1, dim].
94
+ q_reshaped = ops.expand_dims(query, axis=-2)
95
+ # Reshape into [batch_size, 1, Tv, dim].
96
+ k_reshaped = ops.expand_dims(key, axis=-3)
97
+ scale = self.scale if self.use_scale else 1.0
98
+ return ops.sum(scale * ops.tanh(q_reshaped + k_reshaped), axis=-1)
99
+
100
+ def get_config(self):
101
+ base_config = super().get_config()
102
+ del base_config["score_mode"]
103
+ return base_config
SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/attention/attention.py ADDED
@@ -0,0 +1,330 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from keras.src import backend
2
+ from keras.src import ops
3
+ from keras.src.api_export import keras_export
4
+ from keras.src.backend import KerasTensor
5
+ from keras.src.layers.layer import Layer
6
+
7
+
8
+ @keras_export("keras.layers.Attention")
9
+ class Attention(Layer):
10
+ """Dot-product attention layer, a.k.a. Luong-style attention.
11
+
12
+ Inputs are a list with 2 or 3 elements:
13
+ 1. A `query` tensor of shape `(batch_size, Tq, dim)`.
14
+ 2. A `value` tensor of shape `(batch_size, Tv, dim)`.
15
+ 3. A optional `key` tensor of shape `(batch_size, Tv, dim)`. If none
16
+ supplied, `value` will be used as a `key`.
17
+
18
+ The calculation follows the steps:
19
+ 1. Calculate attention scores using `query` and `key` with shape
20
+ `(batch_size, Tq, Tv)`.
21
+ 2. Use scores to calculate a softmax distribution with shape
22
+ `(batch_size, Tq, Tv)`.
23
+ 3. Use the softmax distribution to create a linear combination of `value`
24
+ with shape `(batch_size, Tq, dim)`.
25
+
26
+ Args:
27
+ use_scale: If `True`, will create a scalar variable to scale the
28
+ attention scores.
29
+ dropout: Float between 0 and 1. Fraction of the units to drop for the
30
+ attention scores. Defaults to `0.0`.
31
+ seed: A Python integer to use as random seed in case of `dropout`.
32
+ score_mode: Function to use to compute attention scores, one of
33
+ `{"dot", "concat"}`. `"dot"` refers to the dot product between the
34
+ query and key vectors. `"concat"` refers to the hyperbolic tangent
35
+ of the concatenation of the `query` and `key` vectors.
36
+
37
+ Call arguments:
38
+ inputs: List of the following tensors:
39
+ - `query`: Query tensor of shape `(batch_size, Tq, dim)`.
40
+ - `value`: Value tensor of shape `(batch_size, Tv, dim)`.
41
+ - `key`: Optional key tensor of shape `(batch_size, Tv, dim)`. If
42
+ not given, will use `value` for both `key` and `value`, which is
43
+ the most common case.
44
+ mask: List of the following tensors:
45
+ - `query_mask`: A boolean mask tensor of shape `(batch_size, Tq)`.
46
+ If given, the output will be zero at the positions where
47
+ `mask==False`.
48
+ - `value_mask`: A boolean mask tensor of shape `(batch_size, Tv)`.
49
+ If given, will apply the mask such that values at positions
50
+ where `mask==False` do not contribute to the result.
51
+ return_attention_scores: bool, it `True`, returns the attention scores
52
+ (after masking and softmax) as an additional output argument.
53
+ training: Python boolean indicating whether the layer should behave in
54
+ training mode (adding dropout) or in inference mode (no dropout).
55
+ use_causal_mask: Boolean. Set to `True` for decoder self-attention. Adds
56
+ a mask such that position `i` cannot attend to positions `j > i`.
57
+ This prevents the flow of information from the future towards the
58
+ past. Defaults to `False`.
59
+
60
+ Output:
61
+ Attention outputs of shape `(batch_size, Tq, dim)`.
62
+ (Optional) Attention scores after masking and softmax with shape
63
+ `(batch_size, Tq, Tv)`.
64
+ """
65
+
66
+ def __init__(
67
+ self,
68
+ use_scale=False,
69
+ score_mode="dot",
70
+ dropout=0.0,
71
+ seed=None,
72
+ **kwargs,
73
+ ):
74
+ super().__init__(**kwargs)
75
+ self.use_scale = use_scale
76
+ self.score_mode = score_mode
77
+ self.dropout = dropout
78
+ if self.dropout > 0:
79
+ self.seed_generator = backend.random.SeedGenerator(seed=seed)
80
+
81
+ if self.score_mode not in ["dot", "concat"]:
82
+ raise ValueError(
83
+ "Invalid value for argument score_mode. "
84
+ "Expected one of {'dot', 'concat'}. "
85
+ f"Received: score_mode={score_mode}"
86
+ )
87
+
88
+ self._return_attention_scores = False
89
+
90
+ def build(self, input_shape):
91
+ self._validate_inputs(input_shape)
92
+ self.scale = None
93
+ self.concat_score_weight = None
94
+ if self.use_scale:
95
+ self.scale = self.add_weight(
96
+ name="scale",
97
+ shape=(),
98
+ initializer="ones",
99
+ dtype=self.dtype,
100
+ trainable=True,
101
+ )
102
+ if self.score_mode == "concat":
103
+ self.concat_score_weight = self.add_weight(
104
+ name="concat_score_weight",
105
+ shape=(),
106
+ initializer="ones",
107
+ dtype=self.dtype,
108
+ trainable=True,
109
+ )
110
+ self.built = True
111
+
112
+ def _calculate_scores(self, query, key):
113
+ """Calculates attention scores as a query-key dot product.
114
+
115
+ Args:
116
+ query: Query tensor of shape `(batch_size, Tq, dim)`.
117
+ key: Key tensor of shape `(batch_size, Tv, dim)`.
118
+
119
+ Returns:
120
+ Tensor of shape `(batch_size, Tq, Tv)`.
121
+ """
122
+ if self.score_mode == "dot":
123
+ scores = ops.matmul(query, ops.transpose(key, axes=[0, 2, 1]))
124
+ if self.scale is not None:
125
+ scores *= self.scale
126
+ elif self.score_mode == "concat":
127
+ # Reshape tensors to enable broadcasting.
128
+ # Reshape into [batch_size, Tq, 1, dim].
129
+ q_reshaped = ops.expand_dims(query, axis=-2)
130
+ # Reshape into [batch_size, 1, Tv, dim].
131
+ k_reshaped = ops.expand_dims(key, axis=-3)
132
+ if self.scale is not None:
133
+ scores = self.concat_score_weight * ops.sum(
134
+ ops.tanh(self.scale * (q_reshaped + k_reshaped)), axis=-1
135
+ )
136
+ else:
137
+ scores = self.concat_score_weight * ops.sum(
138
+ ops.tanh(q_reshaped + k_reshaped), axis=-1
139
+ )
140
+ else:
141
+ raise ValueError("scores not computed")
142
+
143
+ return scores
144
+
145
+ def _apply_scores(self, scores, value, scores_mask=None, training=False):
146
+ """Applies attention scores to the given value tensor.
147
+
148
+ To use this method in your attention layer, follow the steps:
149
+
150
+ * Use `query` tensor of shape `(batch_size, Tq)` and `key` tensor of
151
+ shape `(batch_size, Tv)` to calculate the attention `scores`.
152
+ * Pass `scores` and `value` tensors to this method. The method applies
153
+ `scores_mask`, calculates
154
+ `attention_distribution = softmax(scores)`, then returns
155
+ `matmul(attention_distribution, value).
156
+ * Apply `query_mask` and return the result.
157
+
158
+ Args:
159
+ scores: Scores float tensor of shape `(batch_size, Tq, Tv)`.
160
+ value: Value tensor of shape `(batch_size, Tv, dim)`.
161
+ scores_mask: A boolean mask tensor of shape `(batch_size, 1, Tv)`
162
+ or `(batch_size, Tq, Tv)`. If given, scores at positions where
163
+ `scores_mask==False` do not contribute to the result. It must
164
+ contain at least one `True` value in each line along the last
165
+ dimension.
166
+ training: Python boolean indicating whether the layer should behave
167
+ in training mode (adding dropout) or in inference mode
168
+ (no dropout).
169
+
170
+ Returns:
171
+ Tensor of shape `(batch_size, Tq, dim)`.
172
+ Attention scores after masking and softmax with shape
173
+ `(batch_size, Tq, Tv)`.
174
+ """
175
+ if scores_mask is not None:
176
+ padding_mask = ops.logical_not(scores_mask)
177
+ # Bias so padding positions do not contribute to attention
178
+ # distribution. Note 65504. is the max float16 value.
179
+ max_value = 65504.0 if scores.dtype == "float16" else 1.0e9
180
+ scores -= max_value * ops.cast(padding_mask, dtype=scores.dtype)
181
+
182
+ weights = ops.softmax(scores, axis=-1)
183
+ if training and self.dropout > 0:
184
+ weights = backend.random.dropout(
185
+ weights,
186
+ self.dropout,
187
+ seed=self.seed_generator,
188
+ )
189
+ return ops.matmul(weights, value), weights
190
+
191
+ def _calculate_score_mask(self, scores, v_mask, use_causal_mask):
192
+ if use_causal_mask:
193
+ # Creates a lower triangular mask, so position i cannot attend to
194
+ # positions j > i. This prevents the flow of information from the
195
+ # future into the past.
196
+ score_shape = ops.shape(scores)
197
+ # causal_mask_shape = [1, Tq, Tv].
198
+ mask_shape = (1, score_shape[-2], score_shape[-1])
199
+ ones_mask = ops.ones(shape=mask_shape, dtype="int32")
200
+ row_index = ops.cumsum(ones_mask, axis=-2)
201
+ col_index = ops.cumsum(ones_mask, axis=-1)
202
+ causal_mask = ops.greater_equal(row_index, col_index)
203
+
204
+ if v_mask is not None:
205
+ # Mask of shape [batch_size, 1, Tv].
206
+ v_mask = ops.expand_dims(v_mask, axis=-2)
207
+ return ops.logical_and(v_mask, causal_mask)
208
+ return causal_mask
209
+ else:
210
+ # If not using causal mask, return the value mask as is,
211
+ # or None if the value mask is not provided.
212
+ return v_mask
213
+
214
+ def call(
215
+ self,
216
+ inputs,
217
+ mask=None,
218
+ training=False,
219
+ return_attention_scores=False,
220
+ use_causal_mask=False,
221
+ ):
222
+ self._validate_inputs(inputs=inputs, mask=mask)
223
+ self._return_attention_scores = return_attention_scores
224
+ q = inputs[0]
225
+ v = inputs[1]
226
+ k = inputs[2] if len(inputs) > 2 else v
227
+ q_mask = mask[0] if mask else None
228
+ v_mask = mask[1] if mask else None
229
+ scores = self._calculate_scores(query=q, key=k)
230
+ scores_mask = self._calculate_score_mask(
231
+ scores, v_mask, use_causal_mask
232
+ )
233
+ attention_output, attention_scores = self._apply_scores(
234
+ scores=scores, value=v, scores_mask=scores_mask, training=training
235
+ )
236
+ if q_mask is not None:
237
+ # Mask of shape [batch_size, Tq, 1].
238
+ q_mask = ops.expand_dims(q_mask, axis=-1)
239
+ attention_output *= ops.cast(q_mask, dtype=attention_output.dtype)
240
+ if return_attention_scores:
241
+ return (attention_output, attention_scores)
242
+ else:
243
+ return attention_output
244
+
245
+ def compute_mask(self, inputs, mask=None):
246
+ self._validate_inputs(inputs=inputs, mask=mask)
247
+ if mask is None or mask[0] is None:
248
+ return None
249
+ return ops.convert_to_tensor(mask[0])
250
+
251
+ def compute_output_shape(self, input_shape):
252
+ query_shape, value_shape, key_shape = input_shape
253
+ if key_shape is None:
254
+ key_shape = value_shape
255
+
256
+ output_shape = (*query_shape[:-1], value_shape[-1])
257
+ if self._return_attention_scores:
258
+ scores_shape = (query_shape[0], query_shape[1], key_shape[1])
259
+ return output_shape, scores_shape
260
+ return output_shape
261
+
262
+ def compute_output_spec(
263
+ self,
264
+ inputs,
265
+ mask=None,
266
+ return_attention_scores=False,
267
+ training=None,
268
+ use_causal_mask=False,
269
+ ):
270
+ # Validate and unpack inputs
271
+ self._validate_inputs(inputs, mask)
272
+ query = inputs[0]
273
+ value = inputs[1]
274
+ key = inputs[2] if len(inputs) > 2 else value
275
+
276
+ # Compute primary output shape
277
+ output_shape = self.compute_output_shape(
278
+ [query.shape, value.shape, key.shape]
279
+ )
280
+ output_spec = KerasTensor(output_shape, dtype=self.compute_dtype)
281
+
282
+ # Handle attention scores if requested
283
+ if self._return_attention_scores or return_attention_scores:
284
+ scores_shape = (
285
+ query.shape[0],
286
+ query.shape[1],
287
+ key.shape[1],
288
+ ) # (batch_size, Tq, Tv)
289
+ attention_scores_spec = KerasTensor(
290
+ scores_shape, dtype=self.compute_dtype
291
+ )
292
+ return (output_spec, attention_scores_spec)
293
+
294
+ return output_spec
295
+
296
+ def _validate_inputs(self, inputs, mask=None):
297
+ """Validates arguments of the call method."""
298
+ class_name = self.__class__.__name__
299
+ if not isinstance(inputs, list):
300
+ raise ValueError(
301
+ f"{class_name} layer must be called on a list of inputs, "
302
+ "namely [query, value] or [query, value, key]. "
303
+ f"Received: inputs={inputs}."
304
+ )
305
+ if len(inputs) < 2 or len(inputs) > 3:
306
+ raise ValueError(
307
+ f"{class_name} layer accepts inputs list of length 2 or 3, "
308
+ "namely [query, value] or [query, value, key]. "
309
+ f"Received length: {len(inputs)}."
310
+ )
311
+ if mask is not None:
312
+ if not isinstance(mask, list):
313
+ raise ValueError(
314
+ f"{class_name} layer mask must be a list, "
315
+ f"namely [query_mask, value_mask]. Received: mask={mask}."
316
+ )
317
+ if len(mask) < 2 or len(mask) > 3:
318
+ raise ValueError(
319
+ f"{class_name} layer accepts mask list of length 2 or 3. "
320
+ f"Received: inputs={inputs}, mask={mask}."
321
+ )
322
+
323
+ def get_config(self):
324
+ base_config = super().get_config()
325
+ config = {
326
+ "use_scale": self.use_scale,
327
+ "score_mode": self.score_mode,
328
+ "dropout": self.dropout,
329
+ }
330
+ return {**base_config, **config}
SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/attention/grouped_query_attention.py ADDED
@@ -0,0 +1,504 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import math
2
+
3
+ from keras.src import constraints
4
+ from keras.src import initializers
5
+ from keras.src import ops
6
+ from keras.src import regularizers
7
+ from keras.src.api_export import keras_export
8
+ from keras.src.backend.config import is_flash_attention_enabled
9
+ from keras.src.layers.activations.softmax import Softmax
10
+ from keras.src.layers.core.einsum_dense import EinsumDense
11
+ from keras.src.layers.layer import Layer
12
+ from keras.src.layers.regularization.dropout import Dropout
13
+
14
+
15
+ @keras_export("keras.layers.GroupQueryAttention")
16
+ class GroupedQueryAttention(Layer):
17
+ """Grouped Query Attention layer.
18
+
19
+ This is an implementation of grouped-query attention introduced by
20
+ [Ainslie et al., 2023](https://arxiv.org/abs/2305.13245). Here
21
+ `num_key_value_heads` denotes number of groups, setting
22
+ `num_key_value_heads` to 1 is equivalent to multi-query attention, and
23
+ when `num_key_value_heads` is equal to `num_query_heads` it is equivalent
24
+ to multi-head attention.
25
+
26
+ This layer first projects `query`, `key`, and `value` tensors. Then, `key`
27
+ and `value` are repeated to match the number of heads of `query`.
28
+
29
+ Then, the `query` is scaled and dot-producted with `key` tensors. These are
30
+ softmaxed to obtain attention probabilities. The value tensors are then
31
+ interpolated by these probabilities and concatenated back to a single
32
+ tensor.
33
+
34
+ Args:
35
+ head_dim: Size of each attention head.
36
+ num_query_heads: Number of query attention heads.
37
+ num_key_value_heads: Number of key and value attention heads.
38
+ dropout: Dropout probability.
39
+ use_bias: Boolean, whether the dense layers use bias vectors/matrices.
40
+ flash_attention: If `None`, the layer attempts to use flash
41
+ attention for faster and more memory-efficient attention
42
+ computations when possible. This behavior can be configured using
43
+ `keras.config.enable_flash_attention()` or
44
+ `keras.config.disable_flash_attention()`.
45
+ kernel_initializer: Initializer for dense layer kernels.
46
+ bias_initializer: Initializer for dense layer biases.
47
+ kernel_regularizer: Regularizer for dense layer kernels.
48
+ bias_regularizer: Regularizer for dense layer biases.
49
+ activity_regularizer: Regularizer for dense layer activity.
50
+ kernel_constraint: Constraint for dense layer kernels.
51
+ bias_constraint: Constraint for dense layer kernels.
52
+ seed: Optional integer to seed the dropout layer.
53
+
54
+ Call arguments:
55
+ query: Query tensor of shape `(batch_dim, target_seq_len, feature_dim)`,
56
+ where `batch_dim` is batch size, `target_seq_len` is the length of
57
+ target sequence, and `feature_dim` is dimension of feature.
58
+ value: Value tensor of shape `(batch_dim, source_seq_len, feature_dim)`,
59
+ where `batch_dim` is batch size, `source_seq_len` is the length of
60
+ source sequence, and `feature_dim` is dimension of feature.
61
+ key: Optional key tensor of shape
62
+ `(batch_dim, source_seq_len, feature_dim)`. If not given, will use
63
+ `value` for both `key` and `value`, which is most common case.
64
+ attention_mask: A boolean mask of shape
65
+ `(batch_dim, target_seq_len, source_seq_len)`, that prevents
66
+ attention to certain positions. The boolean mask specifies which
67
+ query elements can attend to which key elements, where 1 indicates
68
+ attention and 0 indicates no attention. Broadcasting can happen for
69
+ the missing batch dimensions and the head dimension.
70
+ return_attention_scores: A boolean to indicate whether the output
71
+ should be `(attention_output, attention_scores)` if `True`, or
72
+ `attention_output` if `False`. Defaults to `False`.
73
+ training: Python boolean indicating whether the layer should behave in
74
+ training mode (adding dropout) or in inference mode (no dropout).
75
+ Will go with either using the training mode of the parent
76
+ layer/model or `False` (inference) if there is no parent layer.
77
+ use_causal_mask: A boolean to indicate whether to apply a causal mask to
78
+ prevent tokens from attending to future tokens (e.g., used in a
79
+ decoder Transformer).
80
+
81
+ Returns:
82
+ attention_output: Result of the computation, of shape
83
+ `(batch_dim, target_seq_len, feature_dim)`, where `target_seq_len`
84
+ is for target sequence length and `feature_dim` is the query input
85
+ last dim.
86
+ attention_scores: (Optional) attention coefficients of shape
87
+ `(batch_dim, num_query_heads, target_seq_len, source_seq_len)`.
88
+ """
89
+
90
+ def __init__(
91
+ self,
92
+ head_dim,
93
+ num_query_heads,
94
+ num_key_value_heads,
95
+ dropout=0.0,
96
+ use_bias=True,
97
+ flash_attention=None,
98
+ kernel_initializer="glorot_uniform",
99
+ bias_initializer="zeros",
100
+ kernel_regularizer=None,
101
+ bias_regularizer=None,
102
+ activity_regularizer=None,
103
+ kernel_constraint=None,
104
+ bias_constraint=None,
105
+ seed=None,
106
+ **kwargs,
107
+ ):
108
+ super().__init__(**kwargs)
109
+ self.supports_masking = True
110
+ self.head_dim = head_dim
111
+ self.num_query_heads = num_query_heads
112
+ self.num_key_value_heads = num_key_value_heads
113
+ if num_query_heads % num_key_value_heads != 0:
114
+ raise ValueError(
115
+ "`num_query_heads` must be divisible"
116
+ " by `num_key_value_heads`."
117
+ )
118
+ self.num_repeats = num_query_heads // num_key_value_heads
119
+ self.dropout = dropout
120
+ self.use_bias = use_bias
121
+ self._flash_attention = flash_attention or is_flash_attention_enabled()
122
+ self.kernel_initializer = initializers.get(kernel_initializer)
123
+ self.bias_initializer = initializers.get(bias_initializer)
124
+ self.kernel_regularizer = regularizers.get(kernel_regularizer)
125
+ self.bias_regularizer = regularizers.get(bias_regularizer)
126
+ self.activity_regularizer = regularizers.get(activity_regularizer)
127
+ self.kernel_constraint = constraints.get(kernel_constraint)
128
+ self.bias_constraint = constraints.get(bias_constraint)
129
+ self.seed = seed
130
+
131
+ self._inverse_sqrt_head_dim = 1.0 / math.sqrt(float(self.head_dim))
132
+ self._return_attention_scores = False
133
+
134
+ # Check for flash attention constraints
135
+ if self._flash_attention and self.dropout > 0.0:
136
+ raise ValueError(
137
+ "Dropout is not supported when flash attention is enabled. "
138
+ "Please set dropout to 0.0 to use flash attention."
139
+ )
140
+
141
+ def build(
142
+ self,
143
+ query_shape,
144
+ value_shape,
145
+ key_shape=None,
146
+ ):
147
+ # Einsum variables:
148
+ # b = batch size
149
+ # q = query length
150
+ # k = key/value length
151
+ # m = model dim
152
+ # u = num query heads
153
+ # v = num key/value heads
154
+ # h = head dim
155
+ key_shape = value_shape if key_shape is None else key_shape
156
+ self.feature_dim = query_shape[-1]
157
+ self._query_dense = EinsumDense(
158
+ "bqm,muh->bquh",
159
+ output_shape=(None, self.num_query_heads, self.head_dim),
160
+ bias_axes="uh" if self.use_bias else None,
161
+ name="query",
162
+ **self._get_common_kwargs_for_sublayer(),
163
+ )
164
+ self._query_dense.build(query_shape)
165
+
166
+ self._key_dense = EinsumDense(
167
+ "bkm,mvh->bkvh",
168
+ output_shape=(None, self.num_key_value_heads, self.head_dim),
169
+ bias_axes="vh" if self.use_bias else None,
170
+ name="key",
171
+ **self._get_common_kwargs_for_sublayer(),
172
+ )
173
+ self._key_dense.build(key_shape)
174
+
175
+ self._value_dense = EinsumDense(
176
+ "bkm,mvh->bkvh",
177
+ output_shape=(None, self.num_key_value_heads, self.head_dim),
178
+ bias_axes="vh" if self.use_bias else None,
179
+ name="value",
180
+ **self._get_common_kwargs_for_sublayer(),
181
+ )
182
+ self._value_dense.build(value_shape)
183
+
184
+ self._softmax = Softmax(axis=-1, dtype=self.dtype_policy)
185
+ self._dropout_layer = Dropout(
186
+ rate=self.dropout, dtype=self.dtype_policy, seed=self.seed
187
+ )
188
+
189
+ self._dot_product_equation = "bquh,bkuh->buqk"
190
+ self._combine_equation = "buqk,bkuh->bquh"
191
+
192
+ self._output_dense = EinsumDense(
193
+ "bquh,uhm->bqm",
194
+ output_shape=(None, self.feature_dim),
195
+ bias_axes="m" if self.use_bias else None,
196
+ name="attention_output",
197
+ **self._get_common_kwargs_for_sublayer(),
198
+ )
199
+ self._output_dense.build(
200
+ (None, None, self.num_query_heads, self.head_dim)
201
+ )
202
+ self.built = True
203
+
204
+ def _get_common_kwargs_for_sublayer(self):
205
+ common_kwargs = dict(
206
+ kernel_regularizer=self.kernel_regularizer,
207
+ bias_regularizer=self.bias_regularizer,
208
+ activity_regularizer=self.activity_regularizer,
209
+ kernel_constraint=self.kernel_constraint,
210
+ bias_constraint=self.bias_constraint,
211
+ dtype=self.dtype_policy,
212
+ )
213
+ # Create new clone of kernel/bias initializer, so that we don't reuse
214
+ # the initializer instance, which could lead to same init value since
215
+ # initializer is stateless.
216
+ kernel_initializer = self.kernel_initializer.__class__.from_config(
217
+ self.kernel_initializer.get_config()
218
+ )
219
+ bias_initializer = self.bias_initializer.__class__.from_config(
220
+ self.bias_initializer.get_config()
221
+ )
222
+ common_kwargs["kernel_initializer"] = kernel_initializer
223
+ common_kwargs["bias_initializer"] = bias_initializer
224
+ return common_kwargs
225
+
226
+ def call(
227
+ self,
228
+ query,
229
+ value,
230
+ key=None,
231
+ query_mask=None,
232
+ value_mask=None,
233
+ key_mask=None,
234
+ attention_mask=None,
235
+ return_attention_scores=False,
236
+ training=None,
237
+ use_causal_mask=False,
238
+ ):
239
+ self._return_attention_scores = return_attention_scores
240
+ if key is None:
241
+ key = value
242
+
243
+ attention_mask = self._compute_attention_mask(
244
+ query,
245
+ value,
246
+ query_mask=query_mask,
247
+ value_mask=value_mask,
248
+ key_mask=key_mask,
249
+ attention_mask=attention_mask,
250
+ use_causal_mask=use_causal_mask,
251
+ )
252
+
253
+ query = self._query_dense(query)
254
+ key = self._key_dense(key)
255
+ value = self._value_dense(value)
256
+
257
+ key = ops.repeat(
258
+ key, self.num_repeats, axis=2
259
+ ) # (batch_dim, source_seq_len, query_heads, head_dim)
260
+ value = ops.repeat(
261
+ value, self.num_repeats, axis=2
262
+ ) # (batch_dim, source_seq_len, query_heads, head_dim)
263
+
264
+ output, scores = self._compute_attention(
265
+ query,
266
+ key,
267
+ value,
268
+ attention_mask=attention_mask,
269
+ training=training,
270
+ )
271
+
272
+ output = self._output_dense(
273
+ output
274
+ ) # (batch_dim, target_seq_len, feature_dim)
275
+
276
+ if return_attention_scores:
277
+ return output, scores
278
+ return output
279
+
280
+ def _compute_attention_mask(
281
+ self,
282
+ query,
283
+ value,
284
+ query_mask=None,
285
+ value_mask=None,
286
+ key_mask=None,
287
+ attention_mask=None,
288
+ use_causal_mask=False,
289
+ ):
290
+ """Computes the attention mask, using the Keras masks of the inputs.
291
+
292
+ * The `query`'s mask is reshaped from [B, T] to [B, T, 1].
293
+ * The `value`'s mask is reshaped from [B, S] to [B, 1, S].
294
+ * The `key`'s mask is reshaped from [B, S] to [B, 1, S]. The `key`'s
295
+ mask is ignored if `key` is `None` or if `key is value`.
296
+ * If `use_causal_mask=True`, then the causal mask is computed. Its shape
297
+ is [1, T, S].
298
+
299
+ All defined masks are merged using a logical AND operation (`&`).
300
+
301
+ In general, if the `query` and `value` are masked, then there is no need
302
+ to define the `attention_mask`.
303
+
304
+ Args:
305
+ query: Projected query tensor of shape `(B, T, N, key_dim)`.
306
+ key: Projected key tensor of shape `(B, T, N, key_dim)`.
307
+ value: Projected value tensor of shape `(B, T, N, value_dim)`.
308
+ attention_mask: a boolean mask of shape `(B, T, S)`, that prevents
309
+ attention to certain positions.
310
+ use_causal_mask: A boolean to indicate whether to apply a causal
311
+ mask to prevent tokens from attending to future tokens (e.g.,
312
+ used in a decoder Transformer).
313
+
314
+ Returns:
315
+ attention_mask: a boolean mask of shape `(B, T, S)`, that prevents
316
+ attention to certain positions, based on the Keras masks of the
317
+ `query`, `key`, `value`, and `attention_mask` tensors, and the
318
+ causal mask if `use_causal_mask=True`.
319
+ """
320
+ auto_mask = None
321
+ if query_mask is not None:
322
+ query_mask = ops.cast(query_mask, "bool") # defensive casting
323
+ # B = batch size, T = max query length
324
+ auto_mask = ops.expand_dims(query_mask, -1) # shape is [B, T, 1]
325
+ if value_mask is not None:
326
+ value_mask = ops.cast(value_mask, "bool") # defensive casting
327
+ # B = batch size, S == max value length
328
+ mask = ops.expand_dims(value_mask, -2) # shape is [B, 1, S]
329
+ auto_mask = mask if auto_mask is None else auto_mask & mask
330
+ if key_mask is not None:
331
+ key_mask = ops.cast(key_mask, "bool") # defensive casting
332
+ # B == batch size, S == max key length == max value length
333
+ mask = ops.expand_dims(key_mask, -2) # shape is [B, 1, S]
334
+ auto_mask = mask if auto_mask is None else auto_mask & mask
335
+ if use_causal_mask:
336
+ # the shape of the causal mask is [1, T, S]
337
+ mask = self._compute_causal_mask(query, value)
338
+ auto_mask = mask if auto_mask is None else auto_mask & mask
339
+ if auto_mask is not None:
340
+ # merge attention_mask & automatic mask, to shape [B, T, S]
341
+ attention_mask = (
342
+ auto_mask
343
+ if attention_mask is None
344
+ else ops.cast(attention_mask, bool) & auto_mask
345
+ )
346
+ return attention_mask
347
+
348
+ def _compute_causal_mask(self, query, value=None):
349
+ """Computes a causal mask (e.g., for masked self-attention layers).
350
+
351
+ For example, if query and value both contain sequences of length 4,
352
+ this function returns a boolean tensor equal to:
353
+
354
+ ```
355
+ [[[True, False, False, False],
356
+ [True, True, False, False],
357
+ [True, True, True, False],
358
+ [True, True, True, True]]]
359
+ ```
360
+
361
+ Args:
362
+ query: query tensor of shape `(B, T, ...)`.
363
+ value: value tensor of shape `(B, S, ...)` (optional, defaults to
364
+ query).
365
+
366
+ Returns:
367
+ mask: a boolean tensor of shape `(1, T, S)` containing a lower
368
+ triangular matrix of shape `(T, S)`.
369
+ """
370
+ q_seq_length = ops.shape(query)[1]
371
+ v_seq_length = q_seq_length if value is None else ops.shape(value)[1]
372
+ ones_mask = ops.ones((1, q_seq_length, v_seq_length), dtype="int32")
373
+ row_index = ops.cumsum(ones_mask, axis=-2)
374
+ col_index = ops.cumsum(ones_mask, axis=-1)
375
+ return ops.greater_equal(row_index, col_index)
376
+
377
+ def _compute_attention(
378
+ self, query, key, value, attention_mask=None, training=None
379
+ ):
380
+ # Check for flash attention constraints
381
+ if self._flash_attention and self._return_attention_scores:
382
+ raise ValueError(
383
+ "Returning attention scores is not supported when flash "
384
+ "attention is enabled. Please disable flash attention to access"
385
+ " attention scores."
386
+ )
387
+
388
+ # Determine whether to use dot-product attention
389
+ use_dot_product_attention = not (
390
+ self.dropout > 0.0
391
+ or self._return_attention_scores
392
+ or (len(query.shape) != 4)
393
+ )
394
+
395
+ if use_dot_product_attention:
396
+ if attention_mask is not None:
397
+ # Ensure attention_mask has the correct shape for broadcasting
398
+ # Expected shape: [batch_size, num_heads, query_seq_len,
399
+ # key_seq_len].
400
+ mask_expansion_axis = -1 * 2 - 1
401
+ len_attention_scores_shape = 4 # Only accepts 4D inputs
402
+ for _ in range(
403
+ len_attention_scores_shape - len(attention_mask.shape)
404
+ ):
405
+ attention_mask = ops.expand_dims(
406
+ attention_mask, axis=mask_expansion_axis
407
+ )
408
+ attention_mask = ops.cast(attention_mask, dtype="bool")
409
+ # Directly compute the attention output using dot-product attention
410
+ attention_output = ops.dot_product_attention(
411
+ query=query,
412
+ key=key,
413
+ value=value,
414
+ bias=None,
415
+ mask=attention_mask,
416
+ scale=self._inverse_sqrt_head_dim,
417
+ is_causal=False,
418
+ flash_attention=self._flash_attention,
419
+ )
420
+ return attention_output, None
421
+
422
+ # Default behavior without flash attention, with explicit attention
423
+ # scores
424
+ query = ops.multiply(
425
+ query, ops.cast(self._inverse_sqrt_head_dim, query.dtype)
426
+ )
427
+ # Take the dot product between "query" and "key" to get the raw
428
+ # attention scores.
429
+ scores = ops.einsum(
430
+ self._dot_product_equation, query, key
431
+ ) # (batch_dim, query_heads, target_seq_len, source_seq_len)
432
+ scores = self._masked_softmax(scores, attention_mask=attention_mask)
433
+ # This is actually dropping out entire tokens to attend to, which might
434
+ # seem a bit unusual, but is taken from the original Transformer paper.
435
+ if self.dropout > 0.0:
436
+ scores_dropout = self._dropout_layer(scores, training=training)
437
+ else:
438
+ scores_dropout = scores
439
+ output = ops.einsum(self._combine_equation, scores_dropout, value)
440
+ return output, scores
441
+
442
+ def _masked_softmax(self, scores, attention_mask=None):
443
+ # Normalize the attention scores to probabilities.
444
+ # scores = [B, N, T, S]
445
+ if attention_mask is not None:
446
+ # The expand dim happens starting from the `num_heads` dimension,
447
+ # (<batch_dims>, num_heads, <query_attention_dims,
448
+ # key_attention_dims>)
449
+ mask_expansion_axis = -1 * 2 - 1
450
+ for _ in range(len(scores.shape) - len(attention_mask.shape)):
451
+ attention_mask = ops.expand_dims(
452
+ attention_mask, axis=mask_expansion_axis
453
+ )
454
+ return self._softmax(scores, mask=attention_mask)
455
+
456
+ def compute_output_shape(
457
+ self,
458
+ query_shape,
459
+ value_shape,
460
+ key_shape=None,
461
+ ):
462
+ if key_shape is None:
463
+ key_shape = value_shape
464
+
465
+ if query_shape[-1] != value_shape[-1]:
466
+ raise ValueError(
467
+ "The last dimension of `query_shape` and `value_shape` "
468
+ f"must be equal, but are {query_shape[-1]}, {value_shape[-1]}. "
469
+ "Received: query_shape={query_shape}, value_shape={value_shape}"
470
+ )
471
+
472
+ if value_shape[1:-1] != key_shape[1:-1]:
473
+ raise ValueError(
474
+ "All dimensions of `value` and `key`, except the last one, "
475
+ f"must be equal. Received: value_shape={value_shape} and "
476
+ f"key_shape={key_shape}"
477
+ )
478
+
479
+ return query_shape
480
+
481
+ def get_config(self):
482
+ config = {
483
+ "head_dim": self.head_dim,
484
+ "num_query_heads": self.num_query_heads,
485
+ "num_key_value_heads": self.num_key_value_heads,
486
+ "use_bias": self.use_bias,
487
+ "dropout": self.dropout,
488
+ "kernel_initializer": initializers.serialize(
489
+ self.kernel_initializer
490
+ ),
491
+ "bias_initializer": initializers.serialize(self.bias_initializer),
492
+ "kernel_regularizer": regularizers.serialize(
493
+ self.kernel_regularizer
494
+ ),
495
+ "bias_regularizer": regularizers.serialize(self.bias_regularizer),
496
+ "activity_regularizer": regularizers.serialize(
497
+ self.activity_regularizer
498
+ ),
499
+ "kernel_constraint": constraints.serialize(self.kernel_constraint),
500
+ "bias_constraint": constraints.serialize(self.bias_constraint),
501
+ "seed": self.seed,
502
+ }
503
+ base_config = super().get_config()
504
+ return {**base_config, **config}
SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/attention/multi_head_attention.py ADDED
@@ -0,0 +1,827 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import math
2
+ import string
3
+
4
+ import numpy as np
5
+
6
+ from keras.src import backend
7
+ from keras.src import constraints
8
+ from keras.src import initializers
9
+ from keras.src import ops
10
+ from keras.src import regularizers
11
+ from keras.src.api_export import keras_export
12
+ from keras.src.backend.config import is_flash_attention_enabled
13
+ from keras.src.layers.activations.softmax import Softmax
14
+ from keras.src.layers.core.einsum_dense import EinsumDense
15
+ from keras.src.layers.layer import Layer
16
+ from keras.src.layers.regularization.dropout import Dropout
17
+
18
+
19
+ @keras_export("keras.layers.MultiHeadAttention")
20
+ class MultiHeadAttention(Layer):
21
+ """MultiHeadAttention layer.
22
+
23
+ This is an implementation of multi-headed attention as described in the
24
+ paper "Attention is all you Need"
25
+ [Vaswani et al., 2017](https://arxiv.org/abs/1706.03762).
26
+ If `query`, `key,` `value` are the same, then
27
+ this is self-attention. Each timestep in `query` attends to the
28
+ corresponding sequence in `key`, and returns a fixed-width vector.
29
+
30
+ This layer first projects `query`, `key` and `value`. These are
31
+ (effectively) a list of tensors of length `num_attention_heads`, where the
32
+ corresponding shapes are `(batch_size, <query dimensions>, key_dim)`,
33
+ `(batch_size, <key/value dimensions>, key_dim)`,
34
+ `(batch_size, <key/value dimensions>, value_dim)`.
35
+
36
+ Then, the query and key tensors are dot-producted and scaled. These are
37
+ softmaxed to obtain attention probabilities. The value tensors are then
38
+ interpolated by these probabilities, then concatenated back to a single
39
+ tensor.
40
+
41
+ Finally, the result tensor with the last dimension as `value_dim` can take
42
+ a linear projection and return.
43
+
44
+ Args:
45
+ num_heads: Number of attention heads.
46
+ key_dim: Size of each attention head for query and key.
47
+ value_dim: Size of each attention head for value.
48
+ dropout: Dropout probability.
49
+ use_bias: Boolean, whether the dense layers use bias vectors/matrices.
50
+ output_shape: The expected shape of an output tensor, besides the batch
51
+ and sequence dims. If not specified, projects back to the query
52
+ feature dim (the query input's last dimension).
53
+ attention_axes: axes over which the attention is applied. `None` means
54
+ attention over all axes, but batch, heads, and features.
55
+ flash_attention: If `None`, the layer attempts to use flash
56
+ attention for faster and more memory-efficient attention
57
+ computations when possible. This behavior can be configured using
58
+ `keras.config.enable_flash_attention()` or
59
+ `keras.config.disable_flash_attention()`.
60
+ kernel_initializer: Initializer for dense layer kernels.
61
+ bias_initializer: Initializer for dense layer biases.
62
+ kernel_regularizer: Regularizer for dense layer kernels.
63
+ bias_regularizer: Regularizer for dense layer biases.
64
+ activity_regularizer: Regularizer for dense layer activity.
65
+ kernel_constraint: Constraint for dense layer kernels.
66
+ bias_constraint: Constraint for dense layer kernels.
67
+ seed: Optional integer to seed the dropout layer.
68
+
69
+ Call arguments:
70
+ query: Query tensor of shape `(B, T, dim)`, where `B` is the batch size,
71
+ `T` is the target sequence length, and dim is the feature dimension.
72
+ value: Value tensor of shape `(B, S, dim)`, where `B` is the batch size,
73
+ `S` is the source sequence length, and dim is the feature dimension.
74
+ key: Optional key tensor of shape `(B, S, dim)`. If not given, will
75
+ use `value` for both `key` and `value`, which is the most common
76
+ case.
77
+ attention_mask: a boolean mask of shape `(B, T, S)`, that prevents
78
+ attention to certain positions. The boolean mask specifies which
79
+ query elements can attend to which key elements, 1 indicates
80
+ attention and 0 indicates no attention. Broadcasting can happen for
81
+ the missing batch dimensions and the head dimension.
82
+ return_attention_scores: A boolean to indicate whether the output should
83
+ be `(attention_output, attention_scores)` if `True`, or
84
+ `attention_output` if `False`. Defaults to `False`.
85
+ training: Python boolean indicating whether the layer should behave in
86
+ training mode (adding dropout) or in inference mode (no dropout).
87
+ Will go with either using the training mode of the parent
88
+ layer/model, or `False` (inference) if there is no parent layer.
89
+ use_causal_mask: A boolean to indicate whether to apply a causal mask to
90
+ prevent tokens from attending to future tokens (e.g., used in a
91
+ decoder Transformer).
92
+
93
+ Returns:
94
+ attention_output: The result of the computation, of shape `(B, T, E)`,
95
+ where `T` is for target sequence shapes and `E` is the query input
96
+ last dimension if `output_shape` is `None`. Otherwise, the
97
+ multi-head outputs are projected to the shape specified by
98
+ `output_shape`.
99
+ attention_scores: (Optional) multi-head attention coefficients over
100
+ attention axes.
101
+ """
102
+
103
+ def __init__(
104
+ self,
105
+ num_heads,
106
+ key_dim,
107
+ value_dim=None,
108
+ dropout=0.0,
109
+ use_bias=True,
110
+ output_shape=None,
111
+ attention_axes=None,
112
+ flash_attention=None,
113
+ kernel_initializer="glorot_uniform",
114
+ bias_initializer="zeros",
115
+ kernel_regularizer=None,
116
+ bias_regularizer=None,
117
+ activity_regularizer=None,
118
+ kernel_constraint=None,
119
+ bias_constraint=None,
120
+ seed=None,
121
+ **kwargs,
122
+ ):
123
+ super().__init__(**kwargs)
124
+ self.supports_masking = True
125
+ self._num_heads = num_heads
126
+ self._key_dim = key_dim
127
+ self._value_dim = value_dim if value_dim else key_dim
128
+ self._dropout = dropout
129
+ self._use_bias = use_bias
130
+ if output_shape:
131
+ if isinstance(output_shape, int):
132
+ output_shape = (output_shape,)
133
+ try:
134
+ output_shape = tuple(output_shape)
135
+ except:
136
+ raise ValueError(
137
+ f"Invalid `output_shape`: {output_shape}. When "
138
+ "specified, the `output_shape` should be of type tuple, "
139
+ "list, or int."
140
+ )
141
+ self._output_shape = output_shape
142
+ self._flash_attention = flash_attention or is_flash_attention_enabled()
143
+ self._kernel_initializer = initializers.get(kernel_initializer)
144
+ self._bias_initializer = initializers.get(bias_initializer)
145
+ self._kernel_regularizer = regularizers.get(kernel_regularizer)
146
+ self._bias_regularizer = regularizers.get(bias_regularizer)
147
+ self._activity_regularizer = regularizers.get(activity_regularizer)
148
+ self._kernel_constraint = constraints.get(kernel_constraint)
149
+ self._bias_constraint = constraints.get(bias_constraint)
150
+ if isinstance(attention_axes, int):
151
+ attention_axes = (attention_axes,)
152
+ elif attention_axes and not isinstance(attention_axes, (list, tuple)):
153
+ raise ValueError(
154
+ "`attention_axes` must be an int, list, or tuple."
155
+ f"Received: attention_axes={attention_axes}"
156
+ )
157
+ self._attention_axes = attention_axes
158
+ self.seed = seed
159
+
160
+ self._inverse_sqrt_key_dim = 1.0 / math.sqrt(float(self._key_dim))
161
+ self._return_attention_scores = False
162
+
163
+ # Check for flash attention constraints
164
+ if self._flash_attention and self._dropout > 0.0:
165
+ raise ValueError(
166
+ "Dropout is not supported when flash attention is enabled. "
167
+ "Please set dropout to 0.0 to use flash attention."
168
+ )
169
+
170
+ @property
171
+ def num_heads(self):
172
+ return self._num_heads
173
+
174
+ @property
175
+ def key_dim(self):
176
+ return self._key_dim
177
+
178
+ @property
179
+ def value_dim(self):
180
+ return self._value_dim
181
+
182
+ @property
183
+ def dropout(self):
184
+ return self._dropout
185
+
186
+ @property
187
+ def use_bias(self):
188
+ return self._use_bias
189
+
190
+ # Avoid exposing `output_shape` as it may conflict with `Functional` and
191
+ # `Sequential` models when calling `summary()`.
192
+
193
+ @property
194
+ def attention_axes(self):
195
+ return self._attention_axes
196
+
197
+ def get_config(self):
198
+ base_config = super().get_config()
199
+ config = {
200
+ "num_heads": self._num_heads,
201
+ "key_dim": self._key_dim,
202
+ "value_dim": self._value_dim,
203
+ "dropout": self._dropout,
204
+ "use_bias": self._use_bias,
205
+ "output_shape": self._output_shape,
206
+ "attention_axes": self._attention_axes,
207
+ "kernel_initializer": initializers.serialize(
208
+ self._kernel_initializer
209
+ ),
210
+ "bias_initializer": initializers.serialize(self._bias_initializer),
211
+ "kernel_regularizer": regularizers.serialize(
212
+ self._kernel_regularizer
213
+ ),
214
+ "bias_regularizer": regularizers.serialize(self._bias_regularizer),
215
+ "activity_regularizer": regularizers.serialize(
216
+ self._activity_regularizer
217
+ ),
218
+ "kernel_constraint": constraints.serialize(self._kernel_constraint),
219
+ "bias_constraint": constraints.serialize(self._bias_constraint),
220
+ "seed": self.seed,
221
+ }
222
+ return {**base_config, **config}
223
+
224
+ def build(
225
+ self,
226
+ query_shape,
227
+ value_shape,
228
+ key_shape=None,
229
+ ):
230
+ """Builds layers and variables.
231
+
232
+ Args:
233
+ query_shape: Shape of the `query` tensor.
234
+ value_shape: Shape of the `value` tensor.
235
+ key: Optional shape of the `key` tensor.
236
+ """
237
+ key_shape = value_shape if key_shape is None else key_shape
238
+
239
+ if value_shape[1:-1] != key_shape[1:-1]:
240
+ raise ValueError(
241
+ "All dimensions of `value` and `key`, except the last one, "
242
+ f"must be equal. Received: value_shape={value_shape} and "
243
+ f"key_shape={key_shape}"
244
+ )
245
+
246
+ query_rank = len(query_shape)
247
+ value_rank = len(value_shape)
248
+ key_rank = len(key_shape)
249
+ einsum_equation, bias_axes, output_rank = _build_proj_equation(
250
+ query_rank - 1, bound_dims=1, output_dims=2
251
+ )
252
+ self._query_dense = EinsumDense(
253
+ einsum_equation,
254
+ output_shape=_get_output_shape(
255
+ output_rank - 1, [self._num_heads, self._key_dim]
256
+ ),
257
+ bias_axes=bias_axes if self._use_bias else None,
258
+ name="query",
259
+ **self._get_common_kwargs_for_sublayer(),
260
+ )
261
+ self._query_dense.build(query_shape)
262
+ einsum_equation, bias_axes, output_rank = _build_proj_equation(
263
+ key_rank - 1, bound_dims=1, output_dims=2
264
+ )
265
+ self._key_dense = EinsumDense(
266
+ einsum_equation,
267
+ output_shape=_get_output_shape(
268
+ output_rank - 1, [self._num_heads, self._key_dim]
269
+ ),
270
+ bias_axes=bias_axes if self._use_bias else None,
271
+ name="key",
272
+ **self._get_common_kwargs_for_sublayer(),
273
+ )
274
+ self._key_dense.build(key_shape)
275
+ einsum_equation, bias_axes, output_rank = _build_proj_equation(
276
+ value_rank - 1, bound_dims=1, output_dims=2
277
+ )
278
+ self._value_dense = EinsumDense(
279
+ einsum_equation,
280
+ output_shape=_get_output_shape(
281
+ output_rank - 1, [self._num_heads, self._value_dim]
282
+ ),
283
+ bias_axes=bias_axes if self._use_bias else None,
284
+ name="value",
285
+ **self._get_common_kwargs_for_sublayer(),
286
+ )
287
+ self._value_dense.build(value_shape)
288
+
289
+ # Builds the attention computations for multi-head dot product
290
+ # attention. These computations could be wrapped into the keras
291
+ # attention layer once it supports multi-head einsum computations.
292
+ self._build_attention(output_rank)
293
+ self._output_dense = self._make_output_dense(
294
+ query_shape,
295
+ self._get_common_kwargs_for_sublayer(),
296
+ "attention_output",
297
+ )
298
+ output_dense_input_shape = list(
299
+ self._query_dense.compute_output_shape(query_shape)
300
+ )
301
+ output_dense_input_shape[-1] = self._value_dim
302
+ self._output_dense.build(tuple(output_dense_input_shape))
303
+ self.built = True
304
+
305
+ @property
306
+ def query_dense(self):
307
+ return self._query_dense
308
+
309
+ @property
310
+ def key_dense(self):
311
+ return self._key_dense
312
+
313
+ @property
314
+ def value_dense(self):
315
+ return self._value_dense
316
+
317
+ @property
318
+ def output_dense(self):
319
+ return self._output_dense
320
+
321
+ def _get_common_kwargs_for_sublayer(self):
322
+ common_kwargs = dict(
323
+ kernel_regularizer=self._kernel_regularizer,
324
+ bias_regularizer=self._bias_regularizer,
325
+ activity_regularizer=self._activity_regularizer,
326
+ kernel_constraint=self._kernel_constraint,
327
+ bias_constraint=self._bias_constraint,
328
+ dtype=self.dtype_policy,
329
+ )
330
+ # Create new clone of kernel/bias initializer, so that we don't reuse
331
+ # the initializer instance, which could lead to same init value since
332
+ # initializer is stateless.
333
+ kernel_initializer = self._kernel_initializer.__class__.from_config(
334
+ self._kernel_initializer.get_config()
335
+ )
336
+ bias_initializer = self._bias_initializer.__class__.from_config(
337
+ self._bias_initializer.get_config()
338
+ )
339
+ common_kwargs["kernel_initializer"] = kernel_initializer
340
+ common_kwargs["bias_initializer"] = bias_initializer
341
+ return common_kwargs
342
+
343
+ def _make_output_dense(self, query_shape, common_kwargs, name=None):
344
+ """Builds the output projection matrix.
345
+
346
+ Args:
347
+ free_dims: Number of free dimensions for einsum equation building.
348
+ common_kwargs: Common keyword arguments for einsum layer.
349
+ name: Name for the projection layer.
350
+
351
+ Returns:
352
+ Projection layer.
353
+ """
354
+ query_rank = len(query_shape)
355
+ if self._output_shape:
356
+ output_shape = self._output_shape
357
+ else:
358
+ output_shape = [query_shape[-1]]
359
+ einsum_equation, bias_axes, output_rank = _build_proj_equation(
360
+ query_rank - 1, bound_dims=2, output_dims=len(output_shape)
361
+ )
362
+ return EinsumDense(
363
+ einsum_equation,
364
+ output_shape=_get_output_shape(output_rank - 1, output_shape),
365
+ bias_axes=bias_axes if self._use_bias else None,
366
+ name=name,
367
+ **common_kwargs,
368
+ )
369
+
370
+ def _build_attention(self, rank):
371
+ """Builds multi-head dot-product attention computations.
372
+
373
+ This function builds attributes necessary for `_compute_attention` to
374
+ customize attention computation to replace the default dot-product
375
+ attention.
376
+
377
+ Args:
378
+ rank: the rank of query, key, value tensors.
379
+ """
380
+ if self._attention_axes is None:
381
+ self._attention_axes = tuple(range(1, rank - 2))
382
+ else:
383
+ self._attention_axes = tuple(self._attention_axes)
384
+ (
385
+ self._dot_product_equation,
386
+ self._combine_equation,
387
+ attn_scores_rank,
388
+ ) = _build_attention_equation(rank, attn_axes=self._attention_axes)
389
+ norm_axes = tuple(
390
+ range(
391
+ attn_scores_rank - len(self._attention_axes), attn_scores_rank
392
+ )
393
+ )
394
+ self._softmax = Softmax(axis=norm_axes, dtype=self.dtype_policy)
395
+ self._dropout_layer = Dropout(
396
+ rate=self._dropout, dtype=self.dtype_policy, seed=self.seed
397
+ )
398
+
399
+ def _masked_softmax(self, attention_scores, attention_mask=None):
400
+ # Normalize the attention scores to probabilities.
401
+ # attention_scores = [B, N, T, S]
402
+ if attention_mask is not None:
403
+ # The expand dim happens starting from the `num_heads` dimension,
404
+ # (<batch_dims>, num_heads, <query_attention_dims,
405
+ # key_attention_dims>)
406
+ mask_expansion_axis = -len(self._attention_axes) * 2 - 1
407
+ for _ in range(
408
+ len(attention_scores.shape) - len(attention_mask.shape)
409
+ ):
410
+ attention_mask = ops.expand_dims(
411
+ attention_mask, axis=mask_expansion_axis
412
+ )
413
+ return self._softmax(attention_scores, mask=attention_mask)
414
+
415
+ def _compute_attention(
416
+ self,
417
+ query,
418
+ key,
419
+ value,
420
+ attention_mask=None,
421
+ training=None,
422
+ ):
423
+ """Applies Dot-product attention with query, key, value tensors.
424
+
425
+ This function defines the computation inside `call` with projected
426
+ multi-head Q, K, V inputs. Users can override this function for
427
+ customized attention implementation.
428
+
429
+ Args:
430
+ query: Projected query tensor of shape `(B, T, N, key_dim)`.
431
+ key: Projected key tensor of shape `(B, S, N, key_dim)`.
432
+ value: Projected value tensor of shape `(B, S, N, value_dim)`.
433
+ attention_mask: a boolean mask of shape `(B, T, S)`, that prevents
434
+ attention to certain positions. It is generally not needed if
435
+ the `query` and `value` (and/or `key`) are masked.
436
+ training: Python boolean indicating whether the layer should behave
437
+ in training mode (adding dropout) or in inference mode (doing
438
+ nothing).
439
+
440
+ Returns:
441
+ attention_output: Multi-headed outputs of attention computation.
442
+ attention_scores: Multi-headed attention weights.
443
+ """
444
+ # Check for flash attention constraints
445
+ if self._flash_attention and self._return_attention_scores:
446
+ raise ValueError(
447
+ "Returning attention scores is not supported when flash "
448
+ "attention is enabled. Please disable flash attention to access"
449
+ " attention scores."
450
+ )
451
+
452
+ # Determine whether to use dot-product attention
453
+ use_dot_product_attention = not (
454
+ self._dropout > 0.0
455
+ or self._return_attention_scores
456
+ or (len(query.shape) != 4)
457
+ )
458
+
459
+ if use_dot_product_attention:
460
+ if attention_mask is not None:
461
+ # Ensure attention_mask has the correct shape for broadcasting
462
+ # Expected shape: [batch_size, num_heads, query_seq_len,
463
+ # key_seq_len].
464
+ mask_expansion_axis = -len(self._attention_axes) * 2 - 1
465
+ len_attention_scores_shape = 4 # Only accepts 4D inputs
466
+ for _ in range(
467
+ len_attention_scores_shape - len(attention_mask.shape)
468
+ ):
469
+ attention_mask = ops.expand_dims(
470
+ attention_mask, axis=mask_expansion_axis
471
+ )
472
+ attention_mask = ops.cast(attention_mask, dtype="bool")
473
+ # Directly compute the attention output using dot-product attention
474
+ attention_output = ops.dot_product_attention(
475
+ query=query,
476
+ key=key,
477
+ value=value,
478
+ bias=None,
479
+ mask=attention_mask,
480
+ scale=self._inverse_sqrt_key_dim,
481
+ is_causal=False,
482
+ flash_attention=self._flash_attention,
483
+ )
484
+ return attention_output, None
485
+
486
+ # Default behavior without flash attention, with explicit attention
487
+ # scores
488
+ query = ops.multiply(
489
+ query, ops.cast(self._inverse_sqrt_key_dim, query.dtype)
490
+ )
491
+
492
+ # Take the dot product between "query" and "key" to get the raw
493
+ # attention scores.
494
+ attention_scores = ops.einsum(self._dot_product_equation, key, query)
495
+
496
+ # Apply the mask using the custom masked softmax
497
+ attention_scores = self._masked_softmax(
498
+ attention_scores, attention_mask
499
+ )
500
+
501
+ # Apply dropout to the attention scores if needed
502
+ if self._dropout > 0.0:
503
+ final_attn_scores = self._dropout_layer(
504
+ attention_scores, training=training
505
+ )
506
+ else:
507
+ final_attn_scores = attention_scores
508
+
509
+ # `context_layer` = [B, T, N, H]
510
+ attention_output = ops.einsum(
511
+ self._combine_equation, final_attn_scores, value
512
+ )
513
+ return attention_output, attention_scores
514
+
515
+ def call(
516
+ self,
517
+ query,
518
+ value,
519
+ key=None,
520
+ query_mask=None,
521
+ value_mask=None,
522
+ key_mask=None,
523
+ attention_mask=None,
524
+ return_attention_scores=False,
525
+ training=None,
526
+ use_causal_mask=False,
527
+ ):
528
+ self._return_attention_scores = return_attention_scores
529
+ if key is None:
530
+ key = value
531
+
532
+ # Delete the masks because the masks are handled at the level of the
533
+ # layer
534
+ query_mask = backend.get_keras_mask(query)
535
+ backend.set_keras_mask(query, None)
536
+ backend.set_keras_mask(value, None)
537
+ backend.set_keras_mask(key, None)
538
+
539
+ attention_mask = self._compute_attention_mask(
540
+ query,
541
+ value,
542
+ query_mask=query_mask,
543
+ value_mask=value_mask,
544
+ key_mask=key_mask,
545
+ attention_mask=attention_mask,
546
+ use_causal_mask=use_causal_mask,
547
+ )
548
+ # N = `num_attention_heads`
549
+ # H = `size_per_head`
550
+
551
+ # `query` = [B, T, N, H]
552
+ query = self._query_dense(query)
553
+
554
+ # `key` = [B, S, N, H]
555
+ key = self._key_dense(key)
556
+
557
+ # `value` = [B, S, N, H]
558
+ value = self._value_dense(value)
559
+ attention_output, attention_scores = self._compute_attention(
560
+ query,
561
+ key,
562
+ value,
563
+ attention_mask,
564
+ training,
565
+ )
566
+ attention_output = self._output_dense(attention_output)
567
+
568
+ # Set mask on output if needed
569
+ if query_mask is not None:
570
+ backend.set_keras_mask(attention_output, query_mask)
571
+
572
+ if return_attention_scores:
573
+ return attention_output, attention_scores
574
+ return attention_output
575
+
576
+ def _compute_attention_mask(
577
+ self,
578
+ query,
579
+ value,
580
+ query_mask=None,
581
+ value_mask=None,
582
+ key_mask=None,
583
+ attention_mask=None,
584
+ use_causal_mask=False,
585
+ ):
586
+ """Computes the attention mask, using the Keras masks of the inputs.
587
+
588
+ * The `query`'s mask is reshaped from [B, T] to [B, T, 1].
589
+ * The `value`'s mask is reshaped from [B, S] to [B, 1, S].
590
+ * The `key`'s mask is reshaped from [B, S] to [B, 1, S]. The `key`'s
591
+ mask is ignored if `key` is `None` or if `key is value`.
592
+ * If `use_causal_mask=True`, then the causal mask is computed. Its shape
593
+ is [1, T, S].
594
+
595
+ All defined masks are merged using a logical AND operation (`&`).
596
+
597
+ In general, if the `query` and `value` are masked, then there is no need
598
+ to define the `attention_mask`.
599
+
600
+ Args:
601
+ query: Projected query tensor of shape `(B, T, N, key_dim)`.
602
+ key: Projected key tensor of shape `(B, T, N, key_dim)`.
603
+ value: Projected value tensor of shape `(B, T, N, value_dim)`.
604
+ attention_mask: a boolean mask of shape `(B, T, S)`, that prevents
605
+ attention to certain positions.
606
+ use_causal_mask: A boolean to indicate whether to apply a causal
607
+ mask to prevent tokens from attending to future tokens (e.g.,
608
+ used in a decoder Transformer).
609
+
610
+ Returns:
611
+ attention_mask: a boolean mask of shape `(B, T, S)`, that prevents
612
+ attention to certain positions, based on the Keras masks of the
613
+ `query`, `key`, `value`, and `attention_mask` tensors, and the
614
+ causal mask if `use_causal_mask=True`.
615
+ """
616
+ auto_mask = None
617
+ if query_mask is not None:
618
+ query_mask = ops.cast(query_mask, "bool") # defensive casting
619
+ # B = batch size, T = max query length
620
+ auto_mask = ops.expand_dims(query_mask, -1) # shape is [B, T, 1]
621
+ if value_mask is not None:
622
+ value_mask = ops.cast(value_mask, "bool") # defensive casting
623
+ # B = batch size, S == max value length
624
+ mask = ops.expand_dims(value_mask, -2) # shape is [B, 1, S]
625
+ auto_mask = mask if auto_mask is None else auto_mask & mask
626
+ if key_mask is not None:
627
+ key_mask = ops.cast(key_mask, "bool") # defensive casting
628
+ # B == batch size, S == max key length == max value length
629
+ mask = ops.expand_dims(key_mask, -2) # shape is [B, 1, S]
630
+ auto_mask = mask if auto_mask is None else auto_mask & mask
631
+ if use_causal_mask:
632
+ # the shape of the causal mask is [1, T, S]
633
+ mask = self._compute_causal_mask(query, value)
634
+ auto_mask = mask if auto_mask is None else auto_mask & mask
635
+
636
+ if attention_mask is not None:
637
+ attention_mask = ops.cast(attention_mask, "bool")
638
+ if auto_mask is not None:
639
+ # merge attention_mask & automatic mask, to shape [B, T, S]
640
+ attention_mask = (
641
+ auto_mask
642
+ if attention_mask is None
643
+ else attention_mask & auto_mask
644
+ )
645
+ return attention_mask
646
+
647
+ def _compute_causal_mask(self, query, value=None):
648
+ """Computes a causal mask (e.g., for masked self-attention layers).
649
+
650
+ For example, if query and value both contain sequences of length 4,
651
+ this function returns a boolean tensor equal to:
652
+
653
+ ```
654
+ [[[True, False, False, False],
655
+ [True, True, False, False],
656
+ [True, True, True, False],
657
+ [True, True, True, True]]]
658
+ ```
659
+
660
+ Args:
661
+ query: query tensor of shape `(B, T, ...)`.
662
+ value: value tensor of shape `(B, S, ...)` (optional, defaults to
663
+ query).
664
+
665
+ Returns:
666
+ mask: a boolean tensor of shape `(1, T, S)` containing a lower
667
+ triangular matrix of shape `(T, S)`.
668
+ """
669
+ q_seq_length = ops.shape(query)[1]
670
+ v_seq_length = q_seq_length if value is None else ops.shape(value)[1]
671
+ ones_mask = ops.ones((1, q_seq_length, v_seq_length), dtype="int32")
672
+ row_index = ops.cumsum(ones_mask, axis=-2)
673
+ col_index = ops.cumsum(ones_mask, axis=-1)
674
+ return ops.greater_equal(row_index, col_index)
675
+
676
+ def compute_output_shape(
677
+ self,
678
+ query_shape,
679
+ value_shape,
680
+ key_shape=None,
681
+ ):
682
+ query_shape = tuple(query_shape)
683
+ value_shape = tuple(value_shape)
684
+ if key_shape is None:
685
+ key_shape = value_shape
686
+ else:
687
+ key_shape = tuple(key_shape)
688
+
689
+ if value_shape[1:-1] != key_shape[1:-1]:
690
+ raise ValueError(
691
+ "All dimensions of `value` and `key`, except the last one, "
692
+ f"must be equal. Received: value_shape={value_shape} and "
693
+ f"key_shape={key_shape}"
694
+ )
695
+ if self._output_shape:
696
+ query_shape = query_shape[:-1] + self._output_shape
697
+ return query_shape
698
+
699
+ def compute_output_spec(
700
+ self,
701
+ query,
702
+ value,
703
+ key=None,
704
+ query_mask=None,
705
+ value_mask=None,
706
+ key_mask=None,
707
+ attention_mask=None,
708
+ return_attention_scores=False,
709
+ training=None,
710
+ use_causal_mask=False,
711
+ ):
712
+ if key is not None:
713
+ key_shape = key.shape
714
+ else:
715
+ key_shape = None
716
+ output_shape = self.compute_output_shape(
717
+ query.shape, value.shape, key_shape
718
+ )
719
+ output_spec = backend.KerasTensor(
720
+ output_shape, dtype=self.compute_dtype
721
+ )
722
+ if return_attention_scores:
723
+ length = query.shape[1]
724
+ attention_shape = (query.shape[0], self.num_heads, length, length)
725
+ return output_spec, backend.KerasTensor(
726
+ attention_shape, dtype=self.compute_dtype
727
+ )
728
+ return output_spec
729
+
730
+
731
+ def _index_to_einsum_variable(i):
732
+ """Converts an index to a einsum variable name.
733
+
734
+ We simply map indices to lowercase characters, e.g. 0 -> 'a', 1 -> 'b'.
735
+ """
736
+ return string.ascii_lowercase[i]
737
+
738
+
739
+ def _build_attention_equation(rank, attn_axes):
740
+ """Builds einsum equations for the attention computation.
741
+
742
+ Query, key, value inputs after projection are expected to have the shape as:
743
+ `(bs, <non-attention dims>, <attention dims>, num_heads, channels)`.
744
+ `bs` and `<non-attention dims>` are treated as `<batch dims>`.
745
+
746
+ The attention operations can be generalized:
747
+ 1. Query-key dot product:
748
+ (<batch dims>, <query attention dims>, num_heads, channels),
749
+ (<batch dims>, <key attention dims>, num_heads, channels) ->
750
+ (<batch dims>, num_heads, <query attention dims>, <key attention dims>)
751
+ 2. Combination:
752
+ (<batch dims>, num_heads, <query attention dims>, <key attention dims>),
753
+ (<batch dims>, <value attention dims>, num_heads, channels) -> (<batch
754
+ dims>, <query attention dims>, num_heads, channels)
755
+
756
+ Args:
757
+ rank: Rank of query, key, value tensors.
758
+ attn_axes: List/tuple of axes, `[-1, rank)`,
759
+ that attention will be applied to.
760
+
761
+ Returns:
762
+ Einsum equations.
763
+ """
764
+ target_notation = ""
765
+ for i in range(rank):
766
+ target_notation += _index_to_einsum_variable(i)
767
+ # `batch_dims` includes the head dim.
768
+ batch_dims = tuple(np.delete(range(rank), attn_axes + (rank - 1,)))
769
+ letter_offset = rank
770
+ source_notation = ""
771
+ for i in range(rank):
772
+ if i in batch_dims or i == rank - 1:
773
+ source_notation += target_notation[i]
774
+ else:
775
+ source_notation += _index_to_einsum_variable(letter_offset)
776
+ letter_offset += 1
777
+
778
+ product_notation = "".join(
779
+ [target_notation[i] for i in batch_dims]
780
+ + [target_notation[i] for i in attn_axes]
781
+ + [source_notation[i] for i in attn_axes]
782
+ )
783
+ dot_product_equation = "%s,%s->%s" % (
784
+ source_notation,
785
+ target_notation,
786
+ product_notation,
787
+ )
788
+ attn_scores_rank = len(product_notation)
789
+ combine_equation = "%s,%s->%s" % (
790
+ product_notation,
791
+ source_notation,
792
+ target_notation,
793
+ )
794
+ return dot_product_equation, combine_equation, attn_scores_rank
795
+
796
+
797
+ def _build_proj_equation(free_dims, bound_dims, output_dims):
798
+ """Builds an einsum equation for projections inside multi-head attention."""
799
+ input_str = ""
800
+ kernel_str = ""
801
+ output_str = ""
802
+ bias_axes = ""
803
+ letter_offset = 0
804
+ for i in range(free_dims):
805
+ char = _index_to_einsum_variable(i + letter_offset)
806
+ input_str += char
807
+ output_str += char
808
+
809
+ letter_offset += free_dims
810
+ for i in range(bound_dims):
811
+ char = _index_to_einsum_variable(i + letter_offset)
812
+ input_str += char
813
+ kernel_str += char
814
+
815
+ letter_offset += bound_dims
816
+ for i in range(output_dims):
817
+ char = _index_to_einsum_variable(i + letter_offset)
818
+ kernel_str += char
819
+ output_str += char
820
+ bias_axes += char
821
+ equation = f"{input_str},{kernel_str}->{output_str}"
822
+
823
+ return equation, bias_axes, len(output_str)
824
+
825
+
826
+ def _get_output_shape(output_rank, known_last_dims):
827
+ return [None] * (output_rank - len(known_last_dims)) + list(known_last_dims)
SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/__init__.py ADDED
File without changes
SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/__pycache__/__init__.cpython-310.pyc ADDED
Binary file (206 Bytes). View file
 
SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/__pycache__/base_conv.cpython-310.pyc ADDED
Binary file (13 kB). View file
 
SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/__pycache__/base_conv_transpose.cpython-310.pyc ADDED
Binary file (7.97 kB). View file
 
SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/__pycache__/base_depthwise_conv.cpython-310.pyc ADDED
Binary file (8.77 kB). View file
 
SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/__pycache__/base_separable_conv.cpython-310.pyc ADDED
Binary file (9.09 kB). View file
 
SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/__pycache__/conv1d.cpython-310.pyc ADDED
Binary file (6.86 kB). View file
 
SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/__pycache__/conv1d_transpose.cpython-310.pyc ADDED
Binary file (5.54 kB). View file
 
SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/__pycache__/conv2d.cpython-310.pyc ADDED
Binary file (5.65 kB). View file
 
SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/__pycache__/conv2d_transpose.cpython-310.pyc ADDED
Binary file (5.66 kB). View file
 
SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/__pycache__/conv3d.cpython-310.pyc ADDED
Binary file (5.88 kB). View file
 
SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/__pycache__/conv3d_transpose.cpython-310.pyc ADDED
Binary file (5.86 kB). View file
 
SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/__pycache__/depthwise_conv1d.cpython-310.pyc ADDED
Binary file (5.96 kB). View file
 
SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/__pycache__/depthwise_conv2d.cpython-310.pyc ADDED
Binary file (6.06 kB). View file
 
SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/__pycache__/separable_conv1d.cpython-310.pyc ADDED
Binary file (6.21 kB). View file
 
SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/__pycache__/separable_conv2d.cpython-310.pyc ADDED
Binary file (6.29 kB). View file
 
SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/base_conv.py ADDED
@@ -0,0 +1,401 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Keras base class for convolution layers."""
2
+
3
+ from keras.src import activations
4
+ from keras.src import constraints
5
+ from keras.src import initializers
6
+ from keras.src import ops
7
+ from keras.src import regularizers
8
+ from keras.src.backend import standardize_data_format
9
+ from keras.src.layers.input_spec import InputSpec
10
+ from keras.src.layers.layer import Layer
11
+ from keras.src.ops.operation_utils import compute_conv_output_shape
12
+ from keras.src.utils.argument_validation import standardize_padding
13
+ from keras.src.utils.argument_validation import standardize_tuple
14
+
15
+
16
+ class BaseConv(Layer):
17
+ """Abstract N-D convolution layer (private, used as implementation base).
18
+
19
+ This layer creates a convolution kernel that is convolved (actually
20
+ cross-correlated) with the layer input to produce a tensor of outputs. If
21
+ `use_bias` is True (and a `bias_initializer` is provided), a bias vector is
22
+ created and added to the outputs. Finally, if `activation` is not `None`, it
23
+ is applied to the outputs as well.
24
+
25
+ Note: layer attributes cannot be modified after the layer has been called
26
+ once (except the `trainable` attribute).
27
+
28
+ Args:
29
+ rank: int, the rank of the convolution, e.g. 2 for 2D convolution.
30
+ filters: int, the dimension of the output space (the number of filters
31
+ in the convolution).
32
+ kernel_size: int or tuple/list of `rank` integers, specifying the size
33
+ of the convolution window.
34
+ strides: int or tuple/list of `rank` integers, specifying the stride
35
+ length of the convolution. If only one int is specified, the same
36
+ stride size will be used for all dimensions. `strides > 1` is
37
+ incompatible with `dilation_rate > 1`.
38
+ padding: string, either `"valid"` or `"same"` (case-insensitive).
39
+ `"valid"` means no padding. `"same"` results in padding evenly to
40
+ the left/right or up/down of the input. When `padding="same"` and
41
+ `strides=1`, the output has the same size as the input.
42
+ data_format: string, either `"channels_last"` or `"channels_first"`.
43
+ The ordering of the dimensions in the inputs. `"channels_last"`
44
+ corresponds to inputs with shape `(batch, steps, features)`
45
+ while `"channels_first"` corresponds to inputs with shape
46
+ `(batch, features, steps)`. It defaults to the `image_data_format`
47
+ value found in your Keras config file at `~/.keras/keras.json`.
48
+ If you never set it, then it will be `"channels_last"`.
49
+ dilation_rate: int or tuple/list of `rank` integers, specifying the
50
+ dilation rate to use for dilated convolution. If only one int is
51
+ specified, the same dilation rate will be used for all dimensions.
52
+ groups: A positive int specifying the number of groups in which the
53
+ input is split along the channel axis. Each group is convolved
54
+ separately with `filters // groups` filters. The output is the
55
+ concatenation of all the `groups` results along the channel axis.
56
+ Input channels and `filters` must both be divisible by `groups`.
57
+ activation: Activation function. If `None`, no activation is applied.
58
+ use_bias: bool, if `True`, bias will be added to the output.
59
+ kernel_initializer: Initializer for the convolution kernel. If `None`,
60
+ the default initializer (`"glorot_uniform"`) will be used.
61
+ bias_initializer: Initializer for the bias vector. If `None`, the
62
+ default initializer (`"zeros"`) will be used.
63
+ kernel_regularizer: Optional regularizer for the convolution kernel.
64
+ bias_regularizer: Optional regularizer for the bias vector.
65
+ activity_regularizer: Optional regularizer function for the output.
66
+ kernel_constraint: Optional projection function to be applied to the
67
+ kernel after being updated by an `Optimizer` (e.g. used to implement
68
+ norm constraints or value constraints for layer weights). The
69
+ function must take as input the unprojected variable and must return
70
+ the projected variable (which must have the same shape). Constraints
71
+ are not safe to use when doing asynchronous distributed training.
72
+ bias_constraint: Optional projection function to be applied to the
73
+ bias after being updated by an `Optimizer`.
74
+ lora_rank: Optional integer. If set, the layer's forward pass
75
+ will implement LoRA (Low-Rank Adaptation)
76
+ with the provided rank. LoRA sets the layer's kernel
77
+ to non-trainable and replaces it with a delta over the
78
+ original kernel, obtained via multiplying two lower-rank
79
+ trainable matrices. This can be useful to reduce the
80
+ computation cost of fine-tuning large dense layers.
81
+ You can also enable LoRA on an existing layer by calling
82
+ `layer.enable_lora(rank)`.
83
+ """
84
+
85
+ def __init__(
86
+ self,
87
+ rank,
88
+ filters,
89
+ kernel_size,
90
+ strides=1,
91
+ padding="valid",
92
+ data_format=None,
93
+ dilation_rate=1,
94
+ groups=1,
95
+ activation=None,
96
+ use_bias=True,
97
+ kernel_initializer="glorot_uniform",
98
+ bias_initializer="zeros",
99
+ kernel_regularizer=None,
100
+ bias_regularizer=None,
101
+ activity_regularizer=None,
102
+ kernel_constraint=None,
103
+ bias_constraint=None,
104
+ lora_rank=None,
105
+ **kwargs,
106
+ ):
107
+ super().__init__(activity_regularizer=activity_regularizer, **kwargs)
108
+ self.rank = rank
109
+ self.filters = filters
110
+ self.groups = groups
111
+ self.kernel_size = standardize_tuple(kernel_size, rank, "kernel_size")
112
+ self.strides = standardize_tuple(strides, rank, "strides")
113
+ self.dilation_rate = standardize_tuple(
114
+ dilation_rate, rank, "dilation_rate"
115
+ )
116
+ self.padding = standardize_padding(padding, allow_causal=rank == 1)
117
+ self.data_format = standardize_data_format(data_format)
118
+ self.activation = activations.get(activation)
119
+ self.use_bias = use_bias
120
+ self.kernel_initializer = initializers.get(kernel_initializer)
121
+ self.bias_initializer = initializers.get(bias_initializer)
122
+ self.kernel_regularizer = regularizers.get(kernel_regularizer)
123
+ self.bias_regularizer = regularizers.get(bias_regularizer)
124
+ self.kernel_constraint = constraints.get(kernel_constraint)
125
+ self.bias_constraint = constraints.get(bias_constraint)
126
+ self.lora_rank = lora_rank
127
+ self.lora_enabled = False
128
+ self.input_spec = InputSpec(min_ndim=self.rank + 2)
129
+ self.data_format = self.data_format
130
+
131
+ if self.filters is not None and self.filters <= 0:
132
+ raise ValueError(
133
+ "Invalid value for argument `filters`. Expected a strictly "
134
+ f"positive value. Received filters={self.filters}."
135
+ )
136
+
137
+ if self.groups <= 0:
138
+ raise ValueError(
139
+ "The number of groups must be a positive integer. "
140
+ f"Received: groups={self.groups}."
141
+ )
142
+
143
+ if self.filters is not None and self.filters % self.groups != 0:
144
+ raise ValueError(
145
+ "The number of filters must be evenly divisible by the "
146
+ f"number of groups. Received: groups={self.groups}, "
147
+ f"filters={self.filters}."
148
+ )
149
+
150
+ if not all(self.kernel_size):
151
+ raise ValueError(
152
+ "The argument `kernel_size` cannot contain 0. Received "
153
+ f"kernel_size={self.kernel_size}."
154
+ )
155
+
156
+ if not all(self.strides):
157
+ raise ValueError(
158
+ "The argument `strides` cannot contains 0. Received "
159
+ f"strides={self.strides}"
160
+ )
161
+
162
+ if max(self.strides) > 1 and max(self.dilation_rate) > 1:
163
+ raise ValueError(
164
+ "`strides > 1` not supported in conjunction with "
165
+ f"`dilation_rate > 1`. Received: strides={self.strides} and "
166
+ f"dilation_rate={self.dilation_rate}"
167
+ )
168
+
169
+ def build(self, input_shape):
170
+ if self.data_format == "channels_last":
171
+ channel_axis = -1
172
+ input_channel = input_shape[-1]
173
+ else:
174
+ channel_axis = 1
175
+ input_channel = input_shape[1]
176
+ self.input_spec = InputSpec(
177
+ min_ndim=self.rank + 2, axes={channel_axis: input_channel}
178
+ )
179
+ if input_channel % self.groups != 0:
180
+ raise ValueError(
181
+ "The number of input channels must be evenly divisible by "
182
+ f"the number of groups. Received groups={self.groups}, but the "
183
+ f"input has {input_channel} channels (full input shape is "
184
+ f"{input_shape})."
185
+ )
186
+ kernel_shape = self.kernel_size + (
187
+ input_channel // self.groups,
188
+ self.filters,
189
+ )
190
+
191
+ # compute_output_shape contains some validation logic for the input
192
+ # shape, and make sure the output shape has all positive dimensions.
193
+ self.compute_output_shape(input_shape)
194
+
195
+ self._kernel = self.add_weight(
196
+ name="kernel",
197
+ shape=kernel_shape,
198
+ initializer=self.kernel_initializer,
199
+ regularizer=self.kernel_regularizer,
200
+ constraint=self.kernel_constraint,
201
+ trainable=True,
202
+ dtype=self.dtype,
203
+ )
204
+ if self.use_bias:
205
+ self.bias = self.add_weight(
206
+ name="bias",
207
+ shape=(self.filters,),
208
+ initializer=self.bias_initializer,
209
+ regularizer=self.bias_regularizer,
210
+ constraint=self.bias_constraint,
211
+ trainable=True,
212
+ dtype=self.dtype,
213
+ )
214
+ else:
215
+ self.bias = None
216
+ self.built = True
217
+ if self.lora_rank:
218
+ self.enable_lora(self.lora_rank)
219
+
220
+ @property
221
+ def kernel(self):
222
+ if not self.built:
223
+ raise AttributeError(
224
+ "You must build the layer before accessing `kernel`."
225
+ )
226
+ if self.lora_enabled:
227
+ return self._kernel + ops.matmul(
228
+ self.lora_kernel_a, self.lora_kernel_b
229
+ )
230
+ return self._kernel
231
+
232
+ def convolution_op(self, inputs, kernel):
233
+ return ops.conv(
234
+ inputs,
235
+ kernel,
236
+ strides=list(self.strides),
237
+ padding=self.padding,
238
+ dilation_rate=self.dilation_rate,
239
+ data_format=self.data_format,
240
+ )
241
+
242
+ def call(self, inputs):
243
+ outputs = self.convolution_op(
244
+ inputs,
245
+ self.kernel,
246
+ )
247
+ if self.use_bias:
248
+ if self.data_format == "channels_last":
249
+ bias_shape = (1,) * (self.rank + 1) + (self.filters,)
250
+ else:
251
+ bias_shape = (1, self.filters) + (1,) * self.rank
252
+ bias = ops.reshape(self.bias, bias_shape)
253
+ outputs = ops.add(outputs, bias)
254
+
255
+ if self.activation is not None:
256
+ return self.activation(outputs)
257
+ return outputs
258
+
259
+ def compute_output_shape(self, input_shape):
260
+ return compute_conv_output_shape(
261
+ input_shape,
262
+ self.filters,
263
+ self.kernel_size,
264
+ strides=self.strides,
265
+ padding=self.padding,
266
+ data_format=self.data_format,
267
+ dilation_rate=self.dilation_rate,
268
+ )
269
+
270
+ def enable_lora(
271
+ self, rank, a_initializer="he_uniform", b_initializer="zeros"
272
+ ):
273
+ if self.kernel_constraint:
274
+ raise ValueError(
275
+ "Lora is incompatible with kernel constraints. "
276
+ "In order to enable lora on this layer, remove the "
277
+ "`kernel_constraint` argument."
278
+ )
279
+ if not self.built:
280
+ raise ValueError(
281
+ "Cannot enable lora on a layer that isn't yet built."
282
+ )
283
+ if self.lora_enabled:
284
+ raise ValueError(
285
+ "lora is already enabled. "
286
+ "This can only be done once per layer."
287
+ )
288
+ self._tracker.unlock()
289
+ self.lora_kernel_a = self.add_weight(
290
+ name="lora_kernel_a",
291
+ shape=self._kernel.shape[:-1] + (rank,),
292
+ initializer=initializers.get(a_initializer),
293
+ regularizer=self.kernel_regularizer,
294
+ )
295
+ self.lora_kernel_b = self.add_weight(
296
+ name="lora_kernel_b",
297
+ shape=(rank, self.filters),
298
+ initializer=initializers.get(b_initializer),
299
+ regularizer=self.kernel_regularizer,
300
+ )
301
+ self._kernel.trainable = False
302
+ self._tracker.lock()
303
+ self.lora_enabled = True
304
+ self.lora_rank = rank
305
+
306
+ def save_own_variables(self, store):
307
+ # Do nothing if the layer isn't yet built
308
+ if not self.built:
309
+ return
310
+ target_variables = [self.kernel]
311
+ if self.use_bias:
312
+ target_variables.append(self.bias)
313
+ for i, variable in enumerate(target_variables):
314
+ store[str(i)] = variable
315
+
316
+ def load_own_variables(self, store):
317
+ if not self.lora_enabled:
318
+ self._check_load_own_variables(store)
319
+ # Do nothing if the layer isn't yet built
320
+ if not self.built:
321
+ return
322
+ target_variables = [self._kernel]
323
+ if self.use_bias:
324
+ target_variables.append(self.bias)
325
+ for i, variable in enumerate(target_variables):
326
+ variable.assign(store[str(i)])
327
+ if self.lora_enabled:
328
+ self.lora_kernel_a.assign(ops.zeros(self.lora_kernel_a.shape))
329
+ self.lora_kernel_b.assign(ops.zeros(self.lora_kernel_b.shape))
330
+
331
+ def get_config(self):
332
+ config = super().get_config()
333
+ config.update(
334
+ {
335
+ "filters": self.filters,
336
+ "kernel_size": self.kernel_size,
337
+ "strides": self.strides,
338
+ "padding": self.padding,
339
+ "data_format": self.data_format,
340
+ "dilation_rate": self.dilation_rate,
341
+ "groups": self.groups,
342
+ "activation": activations.serialize(self.activation),
343
+ "use_bias": self.use_bias,
344
+ "kernel_initializer": initializers.serialize(
345
+ self.kernel_initializer
346
+ ),
347
+ "bias_initializer": initializers.serialize(
348
+ self.bias_initializer
349
+ ),
350
+ "kernel_regularizer": regularizers.serialize(
351
+ self.kernel_regularizer
352
+ ),
353
+ "bias_regularizer": regularizers.serialize(
354
+ self.bias_regularizer
355
+ ),
356
+ "activity_regularizer": regularizers.serialize(
357
+ self.activity_regularizer
358
+ ),
359
+ "kernel_constraint": constraints.serialize(
360
+ self.kernel_constraint
361
+ ),
362
+ "bias_constraint": constraints.serialize(self.bias_constraint),
363
+ }
364
+ )
365
+ if self.lora_rank:
366
+ config["lora_rank"] = self.lora_rank
367
+ return config
368
+
369
+ def _check_load_own_variables(self, store):
370
+ all_vars = self._trainable_variables + self._non_trainable_variables
371
+ if len(store.keys()) != len(all_vars):
372
+ if len(all_vars) == 0 and not self.built:
373
+ raise ValueError(
374
+ f"Layer '{self.name}' was never built "
375
+ "and thus it doesn't have any variables. "
376
+ f"However the weights file lists {len(store.keys())} "
377
+ "variables for this layer.\n"
378
+ "In most cases, this error indicates that either:\n\n"
379
+ "1. The layer is owned by a parent layer that "
380
+ "implements a `build()` method, but calling the "
381
+ "parent's `build()` method did NOT create the state of "
382
+ f"the child layer '{self.name}'. A `build()` method "
383
+ "must create ALL state for the layer, including "
384
+ "the state of any children layers.\n\n"
385
+ "2. You need to implement "
386
+ "the `def build_from_config(self, config)` method "
387
+ f"on layer '{self.name}', to specify how to rebuild "
388
+ "it during loading. "
389
+ "In this case, you might also want to implement the "
390
+ "method that generates the build config at saving time, "
391
+ "`def get_build_config(self)`. "
392
+ "The method `build_from_config()` is meant "
393
+ "to create the state "
394
+ "of the layer (i.e. its variables) upon deserialization.",
395
+ )
396
+ raise ValueError(
397
+ f"Layer '{self.name}' expected {len(all_vars)} variables, "
398
+ "but received "
399
+ f"{len(store.keys())} variables during loading. "
400
+ f"Expected: {[v.name for v in all_vars]}"
401
+ )
SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/base_conv_transpose.py ADDED
@@ -0,0 +1,259 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Keras base class for transpose convolution layers."""
2
+
3
+ from keras.src import activations
4
+ from keras.src import constraints
5
+ from keras.src import initializers
6
+ from keras.src import ops
7
+ from keras.src import regularizers
8
+ from keras.src.backend import standardize_data_format
9
+ from keras.src.backend.common.backend_utils import (
10
+ compute_conv_transpose_output_shape,
11
+ )
12
+ from keras.src.layers.input_spec import InputSpec
13
+ from keras.src.layers.layer import Layer
14
+ from keras.src.utils.argument_validation import standardize_padding
15
+ from keras.src.utils.argument_validation import standardize_tuple
16
+
17
+
18
+ class BaseConvTranspose(Layer):
19
+ """Abstract N-D transposed convolution layer.
20
+
21
+ The need for transposed convolutions generally arises from the desire to use
22
+ a transformation going in the opposite direction of a normal convolution,
23
+ i.e., from something that has the shape of the output of some convolution to
24
+ something that has the shape of its input while maintaining a connectivity
25
+ pattern that is compatible with said convolution.
26
+
27
+ Args:
28
+ rank: int, the rank of the transposed convolution, e.g. 2 for 2D
29
+ transposed convolution.
30
+ filters: int, the dimension of the output space (the number of filters
31
+ in the transposed convolution).
32
+ kernel_size: int or tuple/list of `rank` integers, specifying the size
33
+ of the transposed convolution window.
34
+ strides: int or tuple/list of `rank` integers, specifying the stride
35
+ length of the transposed convolution. If only one int is specified,
36
+ the same stride size will be used for all dimensions.
37
+ `strides > 1` is incompatible with `dilation_rate > 1`.
38
+ padding: string, either `"valid"` or `"same"` (case-insensitive).
39
+ `"valid"` means no padding. `"same"` results in padding evenly to
40
+ the left/right or up/down of the input such that output has the same
41
+ height/width dimension as the input.
42
+ data_format: string, either `"channels_last"` or `"channels_first"`.
43
+ The ordering of the dimensions in the inputs. `"channels_last"`
44
+ corresponds to inputs with shape `(batch, steps, features)`
45
+ while `"channels_first"` corresponds to inputs with shape
46
+ `(batch, features, steps)`. It defaults to the `image_data_format`
47
+ value found in your Keras config file at `~/.keras/keras.json`.
48
+ If you never set it, then it will be `"channels_last"`.
49
+ dilation_rate: int or tuple/list of `rank` integers, specifying the
50
+ dilation rate to use for dilated convolution. If only one int is
51
+ specified, the same dilation rate will be used for all dimensions.
52
+ activation: Activation function. If `None`, no activation is applied.
53
+ use_bias: bool, if `True`, bias will be added to the output.
54
+ kernel_initializer: Initializer for the convolution kernel. If `None`,
55
+ the default initializer (`"glorot_uniform"`) will be used.
56
+ bias_initializer: Initializer for the bias vector. If `None`, the
57
+ default initializer (`"zeros"`) will be used.
58
+ kernel_regularizer: Optional regularizer for the convolution kernel.
59
+ bias_regularizer: Optional regularizer for the bias vector.
60
+ activity_regularizer: Optional regularizer function for the output.
61
+ kernel_constraint: Optional projection function to be applied to the
62
+ kernel after being updated by an `Optimizer` (e.g. used to implement
63
+ norm constraints or value constraints for layer weights). The
64
+ function must take as input the unprojected variable and must return
65
+ the projected variable (which must have the same shape). Constraints
66
+ are not safe to use when doing asynchronous distributed training.
67
+ bias_constraint: Optional projection function to be applied to the
68
+ bias after being updated by an `Optimizer`.
69
+ """
70
+
71
+ def __init__(
72
+ self,
73
+ rank,
74
+ filters,
75
+ kernel_size,
76
+ strides=1,
77
+ padding="valid",
78
+ output_padding=None,
79
+ data_format=None,
80
+ dilation_rate=1,
81
+ activation=None,
82
+ use_bias=True,
83
+ kernel_initializer="glorot_uniform",
84
+ bias_initializer="zeros",
85
+ kernel_regularizer=None,
86
+ bias_regularizer=None,
87
+ activity_regularizer=None,
88
+ kernel_constraint=None,
89
+ bias_constraint=None,
90
+ trainable=True,
91
+ name=None,
92
+ **kwargs,
93
+ ):
94
+ super().__init__(
95
+ trainable=trainable,
96
+ name=name,
97
+ activity_regularizer=activity_regularizer,
98
+ **kwargs,
99
+ )
100
+ self.rank = rank
101
+ self.filters = filters
102
+ self.kernel_size = standardize_tuple(kernel_size, rank, "kernel_size")
103
+ self.strides = standardize_tuple(strides, rank, "strides")
104
+ self.dilation_rate = standardize_tuple(
105
+ dilation_rate, rank, "dilation_rate"
106
+ )
107
+ self.padding = standardize_padding(padding)
108
+ if output_padding is None:
109
+ self.output_padding = None
110
+ else:
111
+ self.output_padding = standardize_tuple(
112
+ output_padding,
113
+ rank,
114
+ "output_padding",
115
+ )
116
+ self.data_format = standardize_data_format(data_format)
117
+ self.activation = activations.get(activation)
118
+ self.use_bias = use_bias
119
+ self.kernel_initializer = initializers.get(kernel_initializer)
120
+ self.bias_initializer = initializers.get(bias_initializer)
121
+ self.kernel_regularizer = regularizers.get(kernel_regularizer)
122
+ self.bias_regularizer = regularizers.get(bias_regularizer)
123
+ self.kernel_constraint = constraints.get(kernel_constraint)
124
+ self.bias_constraint = constraints.get(bias_constraint)
125
+ self.input_spec = InputSpec(min_ndim=self.rank + 2)
126
+ self.data_format = self.data_format
127
+
128
+ if self.filters is not None and self.filters <= 0:
129
+ raise ValueError(
130
+ "Invalid value for argument `filters`. Expected a strictly "
131
+ f"positive value. Received filters={self.filters}."
132
+ )
133
+
134
+ if not all(self.kernel_size):
135
+ raise ValueError(
136
+ "The argument `kernel_size` cannot contain 0. Received "
137
+ f"kernel_size={self.kernel_size}."
138
+ )
139
+
140
+ if not all(self.strides):
141
+ raise ValueError(
142
+ "The argument `strides` cannot contains 0. Received "
143
+ f"strides={self.strides}."
144
+ )
145
+
146
+ if max(self.strides) > 1 and max(self.dilation_rate) > 1:
147
+ raise ValueError(
148
+ "`strides > 1` not supported in conjunction with "
149
+ f"`dilation_rate > 1`. Received: strides={self.strides} and "
150
+ f"dilation_rate={self.dilation_rate}"
151
+ )
152
+
153
+ def build(self, input_shape):
154
+ if self.data_format == "channels_last":
155
+ channel_axis = -1
156
+ input_channel = input_shape[-1]
157
+ else:
158
+ channel_axis = 1
159
+ input_channel = input_shape[1]
160
+ self.input_spec = InputSpec(
161
+ min_ndim=self.rank + 2, axes={channel_axis: input_channel}
162
+ )
163
+ kernel_shape = self.kernel_size + (
164
+ self.filters,
165
+ input_channel,
166
+ )
167
+
168
+ self.kernel = self.add_weight(
169
+ name="kernel",
170
+ shape=kernel_shape,
171
+ initializer=self.kernel_initializer,
172
+ regularizer=self.kernel_regularizer,
173
+ constraint=self.kernel_constraint,
174
+ trainable=True,
175
+ dtype=self.dtype,
176
+ )
177
+ if self.use_bias:
178
+ self.bias = self.add_weight(
179
+ name="bias",
180
+ shape=(self.filters,),
181
+ initializer=self.bias_initializer,
182
+ regularizer=self.bias_regularizer,
183
+ constraint=self.bias_constraint,
184
+ trainable=True,
185
+ dtype=self.dtype,
186
+ )
187
+ else:
188
+ self.bias = None
189
+ self.built = True
190
+
191
+ def call(self, inputs):
192
+ outputs = ops.conv_transpose(
193
+ inputs,
194
+ self.kernel,
195
+ strides=list(self.strides),
196
+ padding=self.padding,
197
+ output_padding=self.output_padding,
198
+ dilation_rate=self.dilation_rate,
199
+ data_format=self.data_format,
200
+ )
201
+
202
+ if self.use_bias:
203
+ if self.data_format == "channels_last":
204
+ bias_shape = (1,) * (self.rank + 1) + (self.filters,)
205
+ else:
206
+ bias_shape = (1, self.filters) + (1,) * self.rank
207
+ bias = ops.reshape(self.bias, bias_shape)
208
+ outputs = ops.add(outputs, bias)
209
+
210
+ if self.activation is not None:
211
+ return self.activation(outputs)
212
+ return outputs
213
+
214
+ def compute_output_shape(self, input_shape):
215
+ return compute_conv_transpose_output_shape(
216
+ input_shape,
217
+ self.kernel_size,
218
+ self.filters,
219
+ strides=self.strides,
220
+ padding=self.padding,
221
+ output_padding=self.output_padding,
222
+ data_format=self.data_format,
223
+ dilation_rate=self.dilation_rate,
224
+ )
225
+
226
+ def get_config(self):
227
+ config = super().get_config()
228
+ config.update(
229
+ {
230
+ "filters": self.filters,
231
+ "kernel_size": self.kernel_size,
232
+ "strides": self.strides,
233
+ "padding": self.padding,
234
+ "data_format": self.data_format,
235
+ "dilation_rate": self.dilation_rate,
236
+ "activation": activations.serialize(self.activation),
237
+ "use_bias": self.use_bias,
238
+ "kernel_initializer": initializers.serialize(
239
+ self.kernel_initializer
240
+ ),
241
+ "bias_initializer": initializers.serialize(
242
+ self.bias_initializer
243
+ ),
244
+ "kernel_regularizer": regularizers.serialize(
245
+ self.kernel_regularizer
246
+ ),
247
+ "bias_regularizer": regularizers.serialize(
248
+ self.bias_regularizer
249
+ ),
250
+ "activity_regularizer": regularizers.serialize(
251
+ self.activity_regularizer
252
+ ),
253
+ "kernel_constraint": constraints.serialize(
254
+ self.kernel_constraint
255
+ ),
256
+ "bias_constraint": constraints.serialize(self.bias_constraint),
257
+ }
258
+ )
259
+ return config
SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/base_depthwise_conv.py ADDED
@@ -0,0 +1,274 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Keras base class for depthwise convolution layers."""
2
+
3
+ from keras.src import activations
4
+ from keras.src import constraints
5
+ from keras.src import initializers
6
+ from keras.src import ops
7
+ from keras.src import regularizers
8
+ from keras.src.backend import standardize_data_format
9
+ from keras.src.layers.input_spec import InputSpec
10
+ from keras.src.layers.layer import Layer
11
+ from keras.src.ops.operation_utils import compute_conv_output_shape
12
+ from keras.src.utils.argument_validation import standardize_padding
13
+ from keras.src.utils.argument_validation import standardize_tuple
14
+
15
+
16
+ class BaseDepthwiseConv(Layer):
17
+ """Abstract N-D depthwise convolution layer.
18
+
19
+ Depthwise convolution is a type of convolution in which each input channel
20
+ is convolved with a different kernel (called a depthwise kernel). You can
21
+ understand depthwise convolution as the first step in a depthwise separable
22
+ convolution.
23
+
24
+ It is implemented via the following steps:
25
+
26
+ - Split the input into individual channels.
27
+ - Convolve each channel with an individual depthwise kernel with
28
+ `depth_multiplier` output channels.
29
+ - Concatenate the convolved outputs along the channels axis.
30
+
31
+ Unlike a regular convolution, depthwise convolution does not mix information
32
+ across different input channels.
33
+
34
+ The `depth_multiplier` argument determines how many filter are applied to
35
+ one input channel. As such, it controls the amount of output channels that
36
+ are generated per input channel in the depthwise step.
37
+
38
+
39
+ Args:
40
+ rank: int, the rank of the convolution, e.g. 2 for 2D convolution.
41
+ depth_multiplier: The number of depthwise convolution output channels
42
+ for each input channel. The total number of depthwise convolution
43
+ output channels will be equal to `input_channel * depth_multiplier`.
44
+ kernel_size: int or tuple/list of `rank` integers, specifying the size
45
+ of the depthwise convolution window.
46
+ strides: int or tuple/list of `rank` integers, specifying the stride
47
+ length of the depthwise convolution. If only one int is specified,
48
+ the same stride size will be used for all dimensions.
49
+ `strides > 1` is incompatible with `dilation_rate > 1`.
50
+ padding: string, either `"valid"` or `"same"` (case-insensitive).
51
+ `"valid"` means no padding. `"same"` results in padding evenly to
52
+ the left/right or up/down of the input. When `padding="same"` and
53
+ `strides=1`, the output has the same size as the input.
54
+ data_format: string, either `"channels_last"` or `"channels_first"`.
55
+ The ordering of the dimensions in the inputs. `"channels_last"`
56
+ corresponds to inputs with shape `(batch, steps, features)`
57
+ while `"channels_first"` corresponds to inputs with shape
58
+ `(batch, features, steps)`. It defaults to the `image_data_format`
59
+ value found in your Keras config file at `~/.keras/keras.json`.
60
+ If you never set it, then it will be `"channels_last"`.
61
+ dilation_rate: int or tuple/list of `rank` integers, specifying the
62
+ dilation rate to use for dilated convolution. If only one int is
63
+ specified, the same dilation rate will be used for all dimensions.
64
+ activation: Activation function. If `None`, no activation is applied.
65
+ use_bias: bool, if `True`, bias will be added to the output.
66
+ depthwise_initializer: Initializer for the depthwsie convolution
67
+ kernel. If `None`, the default initializer (`"glorot_uniform"`)
68
+ will be used.
69
+ bias_initializer: Initializer for the bias vector. If `None`, the
70
+ default initializer (`"zeros"`) will be used.
71
+ depthwise_regularizer: Optional regularizer for the convolution kernel.
72
+ bias_regularizer: Optional regularizer for the bias vector.
73
+ activity_regularizer: Optional regularizer function for the output.
74
+ depthwise_constraint: Optional projection function to be applied to the
75
+ kernel after being updated by an `Optimizer` (e.g. used to implement
76
+ norm constraints or value constraints for layer weights). The
77
+ function must take as input the unprojected variable and must return
78
+ the projected variable (which must have the same shape). Constraints
79
+ are not safe to use when doing asynchronous distributed training.
80
+ bias_constraint: Optional projection function to be applied to the
81
+ bias after being updated by an `Optimizer`.
82
+ """
83
+
84
+ def __init__(
85
+ self,
86
+ rank,
87
+ depth_multiplier,
88
+ kernel_size,
89
+ strides=1,
90
+ padding="valid",
91
+ data_format=None,
92
+ dilation_rate=1,
93
+ activation=None,
94
+ use_bias=True,
95
+ depthwise_initializer="glorot_uniform",
96
+ bias_initializer="zeros",
97
+ depthwise_regularizer=None,
98
+ bias_regularizer=None,
99
+ activity_regularizer=None,
100
+ depthwise_constraint=None,
101
+ bias_constraint=None,
102
+ trainable=True,
103
+ name=None,
104
+ **kwargs,
105
+ ):
106
+ super().__init__(
107
+ trainable=trainable,
108
+ name=name,
109
+ activity_regularizer=regularizers.get(activity_regularizer),
110
+ **kwargs,
111
+ )
112
+ self.rank = rank
113
+ self.depth_multiplier = depth_multiplier
114
+ self.kernel_size = standardize_tuple(kernel_size, rank, "kernel_size")
115
+ self.strides = standardize_tuple(strides, rank, "strides")
116
+ self.dilation_rate = standardize_tuple(
117
+ dilation_rate, rank, "dilation_rate"
118
+ )
119
+ self.padding = standardize_padding(padding)
120
+ self.data_format = standardize_data_format(data_format)
121
+ self.activation = activations.get(activation)
122
+ self.use_bias = use_bias
123
+ self.depthwise_initializer = initializers.get(depthwise_initializer)
124
+ self.bias_initializer = initializers.get(bias_initializer)
125
+ self.depthwise_regularizer = regularizers.get(depthwise_regularizer)
126
+ self.bias_regularizer = regularizers.get(bias_regularizer)
127
+ self.depthwise_constraint = constraints.get(depthwise_constraint)
128
+ self.bias_constraint = constraints.get(bias_constraint)
129
+ self.input_spec = InputSpec(min_ndim=self.rank + 2)
130
+ self.data_format = self.data_format
131
+
132
+ if self.depth_multiplier is not None and self.depth_multiplier <= 0:
133
+ raise ValueError(
134
+ "Invalid value for argument `depth_multiplier`. Expected a "
135
+ "strictly positive value. Received "
136
+ f"depth_multiplier={self.depth_multiplier}."
137
+ )
138
+
139
+ if not all(self.kernel_size):
140
+ raise ValueError(
141
+ "The argument `kernel_size` cannot contain 0. Received "
142
+ f"kernel_size={self.kernel_size}."
143
+ )
144
+
145
+ if not all(self.strides):
146
+ raise ValueError(
147
+ "The argument `strides` cannot contains 0. Received "
148
+ f"strides={self.strides}"
149
+ )
150
+
151
+ if max(self.strides) > 1 and max(self.dilation_rate) > 1:
152
+ raise ValueError(
153
+ "`strides > 1` not supported in conjunction with "
154
+ f"`dilation_rate > 1`. Received: strides={self.strides} and "
155
+ f"dilation_rate={self.dilation_rate}"
156
+ )
157
+
158
+ def build(self, input_shape):
159
+ if self.data_format == "channels_last":
160
+ channel_axis = -1
161
+ input_channel = input_shape[-1]
162
+ else:
163
+ channel_axis = 1
164
+ input_channel = input_shape[1]
165
+ self.input_spec = InputSpec(
166
+ min_ndim=self.rank + 2, axes={channel_axis: input_channel}
167
+ )
168
+ depthwise_shape = self.kernel_size + (
169
+ input_channel,
170
+ self.depth_multiplier,
171
+ )
172
+ self.kernel = self.add_weight(
173
+ name="kernel",
174
+ shape=depthwise_shape,
175
+ initializer=self.depthwise_initializer,
176
+ regularizer=self.depthwise_regularizer,
177
+ constraint=self.depthwise_constraint,
178
+ trainable=True,
179
+ dtype=self.dtype,
180
+ )
181
+ if self.use_bias:
182
+ self.bias = self.add_weight(
183
+ name="bias",
184
+ shape=(self.depth_multiplier * input_channel,),
185
+ initializer=self.bias_initializer,
186
+ regularizer=self.bias_regularizer,
187
+ constraint=self.bias_constraint,
188
+ trainable=True,
189
+ dtype=self.dtype,
190
+ )
191
+ else:
192
+ self.bias = None
193
+ self.built = True
194
+
195
+ def _get_input_channel(self, input_shape):
196
+ if self.data_format == "channels_last":
197
+ input_channel = input_shape[-1]
198
+ else:
199
+ input_channel = input_shape[1]
200
+ return input_channel
201
+
202
+ def call(self, inputs):
203
+ input_channel = self._get_input_channel(inputs.shape)
204
+ outputs = ops.depthwise_conv(
205
+ inputs,
206
+ self.kernel,
207
+ strides=self.strides,
208
+ padding=self.padding,
209
+ dilation_rate=self.dilation_rate,
210
+ data_format=self.data_format,
211
+ )
212
+
213
+ if self.use_bias:
214
+ if self.data_format == "channels_last":
215
+ bias_shape = (1,) * (self.rank + 1) + (
216
+ self.depth_multiplier * input_channel,
217
+ )
218
+ else:
219
+ bias_shape = (1, self.depth_multiplier * input_channel) + (
220
+ 1,
221
+ ) * self.rank
222
+ bias = ops.reshape(self.bias, bias_shape)
223
+ outputs = ops.add(outputs, bias)
224
+
225
+ if self.activation is not None:
226
+ return self.activation(outputs)
227
+ return outputs
228
+
229
+ def compute_output_shape(self, input_shape):
230
+ input_channel = self._get_input_channel(input_shape)
231
+ return compute_conv_output_shape(
232
+ input_shape,
233
+ self.depth_multiplier * input_channel,
234
+ self.kernel_size,
235
+ strides=self.strides,
236
+ padding=self.padding,
237
+ data_format=self.data_format,
238
+ dilation_rate=self.dilation_rate,
239
+ )
240
+
241
+ def get_config(self):
242
+ config = super().get_config()
243
+ config.update(
244
+ {
245
+ "depth_multiplier": self.depth_multiplier,
246
+ "kernel_size": self.kernel_size,
247
+ "strides": self.strides,
248
+ "padding": self.padding,
249
+ "data_format": self.data_format,
250
+ "dilation_rate": self.dilation_rate,
251
+ "activation": activations.serialize(self.activation),
252
+ "use_bias": self.use_bias,
253
+ "depthwise_initializer": initializers.serialize(
254
+ self.depthwise_initializer
255
+ ),
256
+ "bias_initializer": initializers.serialize(
257
+ self.bias_initializer
258
+ ),
259
+ "depthwise_regularizer": regularizers.serialize(
260
+ self.depthwise_regularizer
261
+ ),
262
+ "bias_regularizer": regularizers.serialize(
263
+ self.bias_regularizer
264
+ ),
265
+ "activity_regularizer": regularizers.serialize(
266
+ self.activity_regularizer
267
+ ),
268
+ "depthwise_constraint": constraints.serialize(
269
+ self.depthwise_constraint
270
+ ),
271
+ "bias_constraint": constraints.serialize(self.bias_constraint),
272
+ }
273
+ )
274
+ return config
SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/base_separable_conv.py ADDED
@@ -0,0 +1,295 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Keras abstract base layer for separable convolution."""
2
+
3
+ from keras.src import activations
4
+ from keras.src import constraints
5
+ from keras.src import initializers
6
+ from keras.src import ops
7
+ from keras.src import regularizers
8
+ from keras.src.backend import standardize_data_format
9
+ from keras.src.layers.input_spec import InputSpec
10
+ from keras.src.layers.layer import Layer
11
+ from keras.src.ops.operation_utils import compute_conv_output_shape
12
+ from keras.src.utils.argument_validation import standardize_padding
13
+ from keras.src.utils.argument_validation import standardize_tuple
14
+
15
+
16
+ class BaseSeparableConv(Layer):
17
+ """Abstract base layer for separable convolution.
18
+
19
+ This layer performs a depthwise convolution that acts separately on
20
+ channels, followed by a pointwise convolution that mixes channels. If
21
+ `use_bias` is True and a bias initializer is provided, it adds a bias vector
22
+ to the output.
23
+
24
+ Args:
25
+ rank: int, the rank of the convolution, e.g. 2 for 2D convolution.
26
+ depth_multiplier: The number of depthwise convolution output channels
27
+ for each input channel. The total number of depthwise convolution
28
+ output channels will be equal to `input_channel * depth_multiplier`.
29
+ filters: int, the dimensionality of the output space (i.e. the number
30
+ of filters in the pointwise convolution).
31
+ kernel_size: int or tuple/list of `rank` integers, specifying the size
32
+ of the depthwise convolution window.
33
+ strides: int or tuple/list of `rank` integers, specifying the stride
34
+ length of the depthwise convolution. If only one int is specified,
35
+ the same stride size will be used for all dimensions.
36
+ `stride value != 1` is incompatible with `dilation_rate != 1`.
37
+ padding: string, either `"valid"` or `"same"` (case-insensitive).
38
+ `"valid"` means no padding. `"same"` results in padding evenly to
39
+ the left/right or up/down of the input. When `padding="same"` and
40
+ `strides=1`, the output has the same size as the input.
41
+ data_format: string, either `"channels_last"` or `"channels_first"`.
42
+ The ordering of the dimensions in the inputs. `"channels_last"`
43
+ corresponds to inputs with shape `(batch, steps, features)`
44
+ while `"channels_first"` corresponds to inputs with shape
45
+ `(batch, features, steps)`. It defaults to the `image_data_format`
46
+ value found in your Keras config file at `~/.keras/keras.json`.
47
+ If you never set it, then it will be `"channels_last"`.
48
+ dilation_rate: int or tuple/list of `rank` integers, specifying the
49
+ dilation rate to use for dilated convolution. If only one int is
50
+ specified, the same dilation rate will be used for all dimensions.
51
+ activation: Activation function. If `None`, no activation is applied.
52
+ use_bias: bool, if `True`, bias will be added to the output.
53
+ depthwise_initializer: An initializer for the depthwise convolution
54
+ kernel. If None, then the default initializer (`"glorot_uniform"`)
55
+ will be used.
56
+ pointwise_initializer: An initializer for the pointwise convolution
57
+ kernel. If None, then the default initializer (`"glorot_uniform"`)
58
+ will be used.
59
+ bias_initializer: An initializer for the bias vector. If None, the
60
+ default initializer ('"zeros"') will be used.
61
+ depthwise_regularizer: Optional regularizer for the depthwise
62
+ convolution kernel.
63
+ pointwise_regularizer: Optional regularizer for the pointwise
64
+ convolution kernel.
65
+ bias_regularizer: Optional regularizer for the bias vector.
66
+ activity_regularizer: Optional regularizer function for the output.
67
+ depthwise_constraint: Optional projection function to be applied to the
68
+ depthwise kernel after being updated by an `Optimizer` (e.g. used
69
+ for norm constraints or value constraints for layer weights). The
70
+ function must take as input the unprojected variable and must return
71
+ the projected variable (which must have the same shape).
72
+ pointwise_constraint: Optional projection function to be applied to the
73
+ pointwise kernel after being updated by an `Optimizer`.
74
+ bias_constraint: Optional projection function to be applied to the
75
+ bias after being updated by an `Optimizer`.
76
+ """
77
+
78
+ def __init__(
79
+ self,
80
+ rank,
81
+ depth_multiplier,
82
+ filters,
83
+ kernel_size,
84
+ strides=1,
85
+ padding="valid",
86
+ data_format=None,
87
+ dilation_rate=1,
88
+ activation=None,
89
+ use_bias=True,
90
+ depthwise_initializer="glorot_uniform",
91
+ pointwise_initializer="glorot_uniform",
92
+ bias_initializer="zeros",
93
+ depthwise_regularizer=None,
94
+ pointwise_regularizer=None,
95
+ bias_regularizer=None,
96
+ activity_regularizer=None,
97
+ depthwise_constraint=None,
98
+ pointwise_constraint=None,
99
+ bias_constraint=None,
100
+ trainable=True,
101
+ name=None,
102
+ **kwargs,
103
+ ):
104
+ super().__init__(
105
+ trainable=trainable,
106
+ name=name,
107
+ activity_regularizer=regularizers.get(activity_regularizer),
108
+ **kwargs,
109
+ )
110
+ self.rank = rank
111
+ self.depth_multiplier = depth_multiplier
112
+ self.filters = filters
113
+ self.kernel_size = standardize_tuple(kernel_size, rank, "kernel_size")
114
+ self.strides = standardize_tuple(strides, rank, "strides")
115
+ self.dilation_rate = standardize_tuple(
116
+ dilation_rate, rank, "dilation_rate"
117
+ )
118
+ self.padding = standardize_padding(padding)
119
+ self.data_format = standardize_data_format(data_format)
120
+ self.activation = activations.get(activation)
121
+ self.use_bias = use_bias
122
+ self.depthwise_initializer = initializers.get(depthwise_initializer)
123
+ self.pointwise_initializer = initializers.get(pointwise_initializer)
124
+ self.bias_initializer = initializers.get(bias_initializer)
125
+ self.depthwise_regularizer = regularizers.get(depthwise_regularizer)
126
+ self.pointwise_regularizer = regularizers.get(pointwise_regularizer)
127
+ self.bias_regularizer = regularizers.get(bias_regularizer)
128
+ self.depthwise_constraint = constraints.get(depthwise_constraint)
129
+ self.pointwise_constraint = constraints.get(pointwise_constraint)
130
+ self.bias_constraint = constraints.get(bias_constraint)
131
+ self.data_format = self.data_format
132
+
133
+ self.input_spec = InputSpec(min_ndim=self.rank + 2)
134
+
135
+ if self.depth_multiplier is not None and self.depth_multiplier <= 0:
136
+ raise ValueError(
137
+ "Invalid value for argument `depth_multiplier`. Expected a "
138
+ "strictly positive value. Received "
139
+ f"depth_multiplier={self.depth_multiplier}."
140
+ )
141
+
142
+ if self.filters is not None and self.filters <= 0:
143
+ raise ValueError(
144
+ "Invalid value for argument `filters`. Expected a strictly "
145
+ f"positive value. Received filters={self.filters}."
146
+ )
147
+
148
+ if not all(self.kernel_size):
149
+ raise ValueError(
150
+ "The argument `kernel_size` cannot contain 0. Received: "
151
+ f"kernel_size={self.kernel_size}."
152
+ )
153
+
154
+ if not all(self.strides):
155
+ raise ValueError(
156
+ "The argument `strides` cannot contains 0(s). Received: "
157
+ f"strides={self.strides}"
158
+ )
159
+
160
+ if max(self.strides) > 1 and max(self.dilation_rate) > 1:
161
+ raise ValueError(
162
+ "`strides > 1` not supported in conjunction with "
163
+ f"`dilation_rate > 1`. Received: strides={self.strides} and "
164
+ f"dilation_rate={self.dilation_rate}"
165
+ )
166
+
167
+ def build(self, input_shape):
168
+ if self.data_format == "channels_last":
169
+ channel_axis = -1
170
+ input_channel = input_shape[-1]
171
+ else:
172
+ channel_axis = 1
173
+ input_channel = input_shape[1]
174
+ self.input_spec = InputSpec(
175
+ min_ndim=self.rank + 2, axes={channel_axis: input_channel}
176
+ )
177
+ depthwise_kernel_shape = self.kernel_size + (
178
+ input_channel,
179
+ self.depth_multiplier,
180
+ )
181
+ pointwise_kernel_shape = (1,) * self.rank + (
182
+ self.depth_multiplier * input_channel,
183
+ self.filters,
184
+ )
185
+
186
+ self.depthwise_kernel = self.add_weight(
187
+ name="depthwise_kernel",
188
+ shape=depthwise_kernel_shape,
189
+ initializer=self.depthwise_initializer,
190
+ regularizer=self.depthwise_regularizer,
191
+ constraint=self.depthwise_constraint,
192
+ trainable=True,
193
+ dtype=self.dtype,
194
+ )
195
+ self.pointwise_kernel = self.add_weight(
196
+ name="pointwise_kernel",
197
+ shape=pointwise_kernel_shape,
198
+ initializer=self.pointwise_initializer,
199
+ regularizer=self.pointwise_regularizer,
200
+ constraint=self.pointwise_constraint,
201
+ trainable=True,
202
+ dtype=self.dtype,
203
+ )
204
+ if self.use_bias:
205
+ self.bias = self.add_weight(
206
+ name="bias",
207
+ shape=(self.filters,),
208
+ initializer=self.bias_initializer,
209
+ regularizer=self.bias_regularizer,
210
+ constraint=self.bias_constraint,
211
+ trainable=True,
212
+ dtype=self.dtype,
213
+ )
214
+ else:
215
+ self.bias = None
216
+ self.built = True
217
+
218
+ def call(self, inputs):
219
+ outputs = ops.separable_conv(
220
+ inputs,
221
+ self.depthwise_kernel,
222
+ self.pointwise_kernel,
223
+ strides=self.strides,
224
+ padding=self.padding,
225
+ dilation_rate=self.dilation_rate,
226
+ data_format=self.data_format,
227
+ )
228
+
229
+ if self.use_bias:
230
+ if self.data_format == "channels_last":
231
+ bias_shape = (1,) * (self.rank + 1) + (self.filters,)
232
+ else:
233
+ bias_shape = (1, self.filters) + (1,) * self.rank
234
+ bias = ops.reshape(self.bias, bias_shape)
235
+ outputs = ops.add(outputs, bias)
236
+
237
+ if self.activation is not None:
238
+ return self.activation(outputs)
239
+ return outputs
240
+
241
+ def compute_output_shape(self, input_shape):
242
+ return compute_conv_output_shape(
243
+ input_shape,
244
+ self.filters,
245
+ self.kernel_size,
246
+ strides=self.strides,
247
+ padding=self.padding,
248
+ data_format=self.data_format,
249
+ dilation_rate=self.dilation_rate,
250
+ )
251
+
252
+ def get_config(self):
253
+ config = super().get_config()
254
+ config.update(
255
+ {
256
+ "depth_multiplier": self.depth_multiplier,
257
+ "filters": self.filters,
258
+ "kernel_size": self.kernel_size,
259
+ "strides": self.strides,
260
+ "padding": self.padding,
261
+ "data_format": self.data_format,
262
+ "dilation_rate": self.dilation_rate,
263
+ "activation": activations.serialize(self.activation),
264
+ "use_bias": self.use_bias,
265
+ "depthwise_initializer": initializers.serialize(
266
+ self.depthwise_initializer
267
+ ),
268
+ "pointwise_initializer": initializers.serialize(
269
+ self.pointwise_initializer
270
+ ),
271
+ "bias_initializer": initializers.serialize(
272
+ self.bias_initializer
273
+ ),
274
+ "depthwise_regularizer": regularizers.serialize(
275
+ self.depthwise_regularizer
276
+ ),
277
+ "pointwise_regularizer": regularizers.serialize(
278
+ self.pointwise_regularizer
279
+ ),
280
+ "bias_regularizer": regularizers.serialize(
281
+ self.bias_regularizer
282
+ ),
283
+ "activity_regularizer": regularizers.serialize(
284
+ self.activity_regularizer
285
+ ),
286
+ "depthwise_constraint": constraints.serialize(
287
+ self.depthwise_constraint
288
+ ),
289
+ "pointwise_constraint": constraints.serialize(
290
+ self.pointwise_constraint
291
+ ),
292
+ "bias_constraint": constraints.serialize(self.bias_constraint),
293
+ }
294
+ )
295
+ return config
SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/conv1d.py ADDED
@@ -0,0 +1,170 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from keras.src import ops
2
+ from keras.src.api_export import keras_export
3
+ from keras.src.layers.convolutional.base_conv import BaseConv
4
+
5
+
6
+ @keras_export(["keras.layers.Conv1D", "keras.layers.Convolution1D"])
7
+ class Conv1D(BaseConv):
8
+ """1D convolution layer (e.g. temporal convolution).
9
+
10
+ This layer creates a convolution kernel that is convolved with the layer
11
+ input over a single spatial (or temporal) dimension to produce a tensor of
12
+ outputs. If `use_bias` is True, a bias vector is created and added to the
13
+ outputs. Finally, if `activation` is not `None`, it is applied to the
14
+ outputs as well.
15
+
16
+ Args:
17
+ filters: int, the dimension of the output space (the number of filters
18
+ in the convolution).
19
+ kernel_size: int or tuple/list of 1 integer, specifying the size of the
20
+ convolution window.
21
+ strides: int or tuple/list of 1 integer, specifying the stride length
22
+ of the convolution. `strides > 1` is incompatible with
23
+ `dilation_rate > 1`.
24
+ padding: string, `"valid"`, `"same"` or `"causal"`(case-insensitive).
25
+ `"valid"` means no padding. `"same"` results in padding evenly to
26
+ the left/right or up/down of the input. When `padding="same"` and
27
+ `strides=1`, the output has the same size as the input.
28
+ `"causal"` results in causal(dilated) convolutions, e.g. `output[t]`
29
+ does not depend on`input[t+1:]`. Useful when modeling temporal data
30
+ where the model should not violate the temporal order.
31
+ See [WaveNet: A Generative Model for Raw Audio, section2.1](
32
+ https://arxiv.org/abs/1609.03499).
33
+ data_format: string, either `"channels_last"` or `"channels_first"`.
34
+ The ordering of the dimensions in the inputs. `"channels_last"`
35
+ corresponds to inputs with shape `(batch, steps, features)`
36
+ while `"channels_first"` corresponds to inputs with shape
37
+ `(batch, features, steps)`. It defaults to the `image_data_format`
38
+ value found in your Keras config file at `~/.keras/keras.json`.
39
+ If you never set it, then it will be `"channels_last"`.
40
+ dilation_rate: int or tuple/list of 1 integers, specifying the dilation
41
+ rate to use for dilated convolution.
42
+ groups: A positive int specifying the number of groups in which the
43
+ input is split along the channel axis. Each group is convolved
44
+ separately with `filters // groups` filters. The output is the
45
+ concatenation of all the `groups` results along the channel axis.
46
+ Input channels and `filters` must both be divisible by `groups`.
47
+ activation: Activation function. If `None`, no activation is applied.
48
+ use_bias: bool, if `True`, bias will be added to the output.
49
+ kernel_initializer: Initializer for the convolution kernel. If `None`,
50
+ the default initializer (`"glorot_uniform"`) will be used.
51
+ bias_initializer: Initializer for the bias vector. If `None`, the
52
+ default initializer (`"zeros"`) will be used.
53
+ kernel_regularizer: Optional regularizer for the convolution kernel.
54
+ bias_regularizer: Optional regularizer for the bias vector.
55
+ activity_regularizer: Optional regularizer function for the output.
56
+ kernel_constraint: Optional projection function to be applied to the
57
+ kernel after being updated by an `Optimizer` (e.g. used to implement
58
+ norm constraints or value constraints for layer weights). The
59
+ function must take as input the unprojected variable and must return
60
+ the projected variable (which must have the same shape). Constraints
61
+ are not safe to use when doing asynchronous distributed training.
62
+ bias_constraint: Optional projection function to be applied to the
63
+ bias after being updated by an `Optimizer`.
64
+
65
+ Input shape:
66
+
67
+ - If `data_format="channels_last"`:
68
+ A 3D tensor with shape: `(batch_shape, steps, channels)`
69
+ - If `data_format="channels_first"`:
70
+ A 3D tensor with shape: `(batch_shape, channels, steps)`
71
+
72
+ Output shape:
73
+
74
+ - If `data_format="channels_last"`:
75
+ A 3D tensor with shape: `(batch_shape, new_steps, filters)`
76
+ - If `data_format="channels_first"`:
77
+ A 3D tensor with shape: `(batch_shape, filters, new_steps)`
78
+
79
+ Returns:
80
+ A 3D tensor representing `activation(conv1d(inputs, kernel) + bias)`.
81
+
82
+ Raises:
83
+ ValueError: when both `strides > 1` and `dilation_rate > 1`.
84
+
85
+ Example:
86
+
87
+ >>> # The inputs are 128-length vectors with 10 timesteps, and the
88
+ >>> # batch size is 4.
89
+ >>> x = np.random.rand(4, 10, 128)
90
+ >>> y = keras.layers.Conv1D(32, 3, activation='relu')(x)
91
+ >>> print(y.shape)
92
+ (4, 8, 32)
93
+ """
94
+
95
+ def __init__(
96
+ self,
97
+ filters,
98
+ kernel_size,
99
+ strides=1,
100
+ padding="valid",
101
+ data_format=None,
102
+ dilation_rate=1,
103
+ groups=1,
104
+ activation=None,
105
+ use_bias=True,
106
+ kernel_initializer="glorot_uniform",
107
+ bias_initializer="zeros",
108
+ kernel_regularizer=None,
109
+ bias_regularizer=None,
110
+ activity_regularizer=None,
111
+ kernel_constraint=None,
112
+ bias_constraint=None,
113
+ **kwargs,
114
+ ):
115
+ super().__init__(
116
+ rank=1,
117
+ filters=filters,
118
+ kernel_size=kernel_size,
119
+ strides=strides,
120
+ padding=padding,
121
+ data_format=data_format,
122
+ dilation_rate=dilation_rate,
123
+ groups=groups,
124
+ activation=activation,
125
+ use_bias=use_bias,
126
+ kernel_initializer=kernel_initializer,
127
+ bias_initializer=bias_initializer,
128
+ kernel_regularizer=kernel_regularizer,
129
+ bias_regularizer=bias_regularizer,
130
+ activity_regularizer=activity_regularizer,
131
+ kernel_constraint=kernel_constraint,
132
+ bias_constraint=bias_constraint,
133
+ **kwargs,
134
+ )
135
+
136
+ def _compute_causal_padding(self):
137
+ left_pad = self.dilation_rate[0] * (self.kernel_size[0] - 1)
138
+ if self.data_format == "channels_last":
139
+ causal_padding = [[0, 0], [left_pad, 0], [0, 0]]
140
+ else:
141
+ causal_padding = [[0, 0], [0, 0], [left_pad, 0]]
142
+ return causal_padding
143
+
144
+ def call(self, inputs):
145
+ padding = self.padding
146
+ if self.padding == "causal":
147
+ # Apply causal padding to inputs.
148
+ inputs = ops.pad(inputs, self._compute_causal_padding())
149
+ padding = "valid"
150
+
151
+ outputs = ops.conv(
152
+ inputs,
153
+ self.kernel,
154
+ strides=list(self.strides),
155
+ padding=padding,
156
+ dilation_rate=self.dilation_rate,
157
+ data_format=self.data_format,
158
+ )
159
+
160
+ if self.use_bias:
161
+ if self.data_format == "channels_last":
162
+ bias_shape = (1,) * (self.rank + 1) + (self.filters,)
163
+ else:
164
+ bias_shape = (1, self.filters) + (1,) * self.rank
165
+ bias = ops.reshape(self.bias, bias_shape)
166
+ outputs = ops.add(outputs, bias)
167
+
168
+ if self.activation is not None:
169
+ return self.activation(outputs)
170
+ return outputs
SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/conv1d_transpose.py ADDED
@@ -0,0 +1,131 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from keras.src.api_export import keras_export
2
+ from keras.src.layers.convolutional.base_conv_transpose import BaseConvTranspose
3
+
4
+
5
+ @keras_export(
6
+ [
7
+ "keras.layers.Conv1DTranspose",
8
+ "keras.layers.Convolution1DTranspose",
9
+ ]
10
+ )
11
+ class Conv1DTranspose(BaseConvTranspose):
12
+ """1D transposed convolution layer.
13
+
14
+ The need for transposed convolutions generally arise from the desire to use
15
+ a transformation going in the opposite direction of a normal convolution,
16
+ i.e., from something that has the shape of the output of some convolution
17
+ to something that has the shape of its input while maintaining a
18
+ connectivity pattern that is compatible with said convolution.
19
+
20
+ Args:
21
+ filters: int, the dimension of the output space (the number of filters
22
+ in the transpose convolution).
23
+ kernel_size: int or tuple/list of 1 integer, specifying the size of the
24
+ transposed convolution window.
25
+ strides: int or tuple/list of 1 integer, specifying the stride length
26
+ of the transposed convolution. `strides > 1` is incompatible with
27
+ `dilation_rate > 1`.
28
+ padding: string, either `"valid"` or `"same"` (case-insensitive).
29
+ `"valid"` means no padding. `"same"` results in padding evenly to
30
+ the left/right or up/down of the input such that output has the same
31
+ height/width dimension as the input.
32
+ data_format: string, either `"channels_last"` or `"channels_first"`.
33
+ The ordering of the dimensions in the inputs. `"channels_last"`
34
+ corresponds to inputs with shape `(batch, steps, features)`
35
+ while `"channels_first"` corresponds to inputs with shape
36
+ `(batch, features, steps)`. It defaults to the `image_data_format`
37
+ value found in your Keras config file at `~/.keras/keras.json`.
38
+ If you never set it, then it will be `"channels_last"`.
39
+ dilation_rate: int or tuple/list of 1 integers, specifying the dilation
40
+ rate to use for dilated transposed convolution.
41
+ activation: Activation function. If `None`, no activation is applied.
42
+ use_bias: bool, if `True`, bias will be added to the output.
43
+ kernel_initializer: Initializer for the convolution kernel. If `None`,
44
+ the default initializer (`"glorot_uniform"`) will be used.
45
+ bias_initializer: Initializer for the bias vector. If `None`, the
46
+ default initializer (`"zeros"`) will be used.
47
+ kernel_regularizer: Optional regularizer for the convolution kernel.
48
+ bias_regularizer: Optional regularizer for the bias vector.
49
+ activity_regularizer: Optional regularizer function for the output.
50
+ kernel_constraint: Optional projection function to be applied to the
51
+ kernel after being updated by an `Optimizer` (e.g. used to implement
52
+ norm constraints or value constraints for layer weights). The
53
+ function must take as input the unprojected variable and must return
54
+ the projected variable (which must have the same shape). Constraints
55
+ are not safe to use when doing asynchronous distributed training.
56
+ bias_constraint: Optional projection function to be applied to the
57
+ bias after being updated by an `Optimizer`.
58
+
59
+ Input shape:
60
+
61
+ - If `data_format="channels_last"`:
62
+ A 3D tensor with shape: `(batch_shape, steps, channels)`
63
+ - If `data_format="channels_first"`:
64
+ A 3D tensor with shape: `(batch_shape, channels, steps)`
65
+
66
+ Output shape:
67
+
68
+ - If `data_format="channels_last"`:
69
+ A 3D tensor with shape: `(batch_shape, new_steps, filters)`
70
+ - If `data_format="channels_first"`:
71
+ A 3D tensor with shape: `(batch_shape, filters, new_steps)`
72
+
73
+ Returns:
74
+ A 3D tensor representing
75
+ `activation(conv1d_transpose(inputs, kernel) + bias)`.
76
+
77
+ Raises:
78
+ ValueError: when both `strides > 1` and `dilation_rate > 1`.
79
+
80
+ References:
81
+ - [A guide to convolution arithmetic for deep learning](
82
+ https://arxiv.org/abs/1603.07285v1)
83
+ - [Deconvolutional Networks](
84
+ https://www.matthewzeiler.com/mattzeiler/deconvolutionalnetworks.pdf)
85
+
86
+ Example:
87
+
88
+ >>> x = np.random.rand(4, 10, 128)
89
+ >>> y = keras.layers.Conv1DTranspose(32, 3, 2, activation='relu')(x)
90
+ >>> print(y.shape)
91
+ (4, 21, 32)
92
+ """
93
+
94
+ def __init__(
95
+ self,
96
+ filters,
97
+ kernel_size,
98
+ strides=1,
99
+ padding="valid",
100
+ data_format=None,
101
+ dilation_rate=1,
102
+ activation=None,
103
+ use_bias=True,
104
+ kernel_initializer="glorot_uniform",
105
+ bias_initializer="zeros",
106
+ kernel_regularizer=None,
107
+ bias_regularizer=None,
108
+ activity_regularizer=None,
109
+ kernel_constraint=None,
110
+ bias_constraint=None,
111
+ **kwargs,
112
+ ):
113
+ super().__init__(
114
+ rank=1,
115
+ filters=filters,
116
+ kernel_size=kernel_size,
117
+ strides=strides,
118
+ padding=padding,
119
+ data_format=data_format,
120
+ dilation_rate=dilation_rate,
121
+ activation=activation,
122
+ use_bias=use_bias,
123
+ kernel_initializer=kernel_initializer,
124
+ bias_initializer=bias_initializer,
125
+ kernel_regularizer=kernel_regularizer,
126
+ bias_regularizer=bias_regularizer,
127
+ activity_regularizer=activity_regularizer,
128
+ kernel_constraint=kernel_constraint,
129
+ bias_constraint=bias_constraint,
130
+ **kwargs,
131
+ )
SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/conv2d.py ADDED
@@ -0,0 +1,128 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from keras.src.api_export import keras_export
2
+ from keras.src.layers.convolutional.base_conv import BaseConv
3
+
4
+
5
+ @keras_export(["keras.layers.Conv2D", "keras.layers.Convolution2D"])
6
+ class Conv2D(BaseConv):
7
+ """2D convolution layer.
8
+
9
+ This layer creates a convolution kernel that is convolved with the layer
10
+ input over a 2D spatial (or temporal) dimension (height and width) to
11
+ produce a tensor of outputs. If `use_bias` is True, a bias vector is created
12
+ and added to the outputs. Finally, if `activation` is not `None`, it is
13
+ applied to the outputs as well.
14
+
15
+ Args:
16
+ filters: int, the dimension of the output space (the number of filters
17
+ in the convolution).
18
+ kernel_size: int or tuple/list of 2 integer, specifying the size of the
19
+ convolution window.
20
+ strides: int or tuple/list of 2 integer, specifying the stride length
21
+ of the convolution. `strides > 1` is incompatible with
22
+ `dilation_rate > 1`.
23
+ padding: string, either `"valid"` or `"same"` (case-insensitive).
24
+ `"valid"` means no padding. `"same"` results in padding evenly to
25
+ the left/right or up/down of the input. When `padding="same"` and
26
+ `strides=1`, the output has the same size as the input.
27
+ data_format: string, either `"channels_last"` or `"channels_first"`.
28
+ The ordering of the dimensions in the inputs. `"channels_last"`
29
+ corresponds to inputs with shape
30
+ `(batch_size, height, width, channels)`
31
+ while `"channels_first"` corresponds to inputs with shape
32
+ `(batch_size, channels, height, width)`. It defaults to the
33
+ `image_data_format` value found in your Keras config file at
34
+ `~/.keras/keras.json`. If you never set it, then it will be
35
+ `"channels_last"`.
36
+ dilation_rate: int or tuple/list of 2 integers, specifying the dilation
37
+ rate to use for dilated convolution.
38
+ groups: A positive int specifying the number of groups in which the
39
+ input is split along the channel axis. Each group is convolved
40
+ separately with `filters // groups` filters. The output is the
41
+ concatenation of all the `groups` results along the channel axis.
42
+ Input channels and `filters` must both be divisible by `groups`.
43
+ activation: Activation function. If `None`, no activation is applied.
44
+ use_bias: bool, if `True`, bias will be added to the output.
45
+ kernel_initializer: Initializer for the convolution kernel. If `None`,
46
+ the default initializer (`"glorot_uniform"`) will be used.
47
+ bias_initializer: Initializer for the bias vector. If `None`, the
48
+ default initializer (`"zeros"`) will be used.
49
+ kernel_regularizer: Optional regularizer for the convolution kernel.
50
+ bias_regularizer: Optional regularizer for the bias vector.
51
+ activity_regularizer: Optional regularizer function for the output.
52
+ kernel_constraint: Optional projection function to be applied to the
53
+ kernel after being updated by an `Optimizer` (e.g. used to implement
54
+ norm constraints or value constraints for layer weights). The
55
+ function must take as input the unprojected variable and must return
56
+ the projected variable (which must have the same shape). Constraints
57
+ are not safe to use when doing asynchronous distributed training.
58
+ bias_constraint: Optional projection function to be applied to the
59
+ bias after being updated by an `Optimizer`.
60
+
61
+ Input shape:
62
+
63
+ - If `data_format="channels_last"`:
64
+ A 4D tensor with shape: `(batch_size, height, width, channels)`
65
+ - If `data_format="channels_first"`:
66
+ A 4D tensor with shape: `(batch_size, channels, height, width)`
67
+
68
+ Output shape:
69
+
70
+ - If `data_format="channels_last"`:
71
+ A 4D tensor with shape: `(batch_size, new_height, new_width, filters)`
72
+ - If `data_format="channels_first"`:
73
+ A 4D tensor with shape: `(batch_size, filters, new_height, new_width)`
74
+
75
+ Returns:
76
+ A 4D tensor representing `activation(conv2d(inputs, kernel) + bias)`.
77
+
78
+ Raises:
79
+ ValueError: when both `strides > 1` and `dilation_rate > 1`.
80
+
81
+ Example:
82
+
83
+ >>> x = np.random.rand(4, 10, 10, 128)
84
+ >>> y = keras.layers.Conv2D(32, 3, activation='relu')(x)
85
+ >>> print(y.shape)
86
+ (4, 8, 8, 32)
87
+ """
88
+
89
+ def __init__(
90
+ self,
91
+ filters,
92
+ kernel_size,
93
+ strides=(1, 1),
94
+ padding="valid",
95
+ data_format=None,
96
+ dilation_rate=(1, 1),
97
+ groups=1,
98
+ activation=None,
99
+ use_bias=True,
100
+ kernel_initializer="glorot_uniform",
101
+ bias_initializer="zeros",
102
+ kernel_regularizer=None,
103
+ bias_regularizer=None,
104
+ activity_regularizer=None,
105
+ kernel_constraint=None,
106
+ bias_constraint=None,
107
+ **kwargs,
108
+ ):
109
+ super().__init__(
110
+ rank=2,
111
+ filters=filters,
112
+ kernel_size=kernel_size,
113
+ strides=strides,
114
+ padding=padding,
115
+ data_format=data_format,
116
+ dilation_rate=dilation_rate,
117
+ groups=groups,
118
+ activation=activation,
119
+ use_bias=use_bias,
120
+ kernel_initializer=kernel_initializer,
121
+ bias_initializer=bias_initializer,
122
+ kernel_regularizer=kernel_regularizer,
123
+ bias_regularizer=bias_regularizer,
124
+ activity_regularizer=activity_regularizer,
125
+ kernel_constraint=kernel_constraint,
126
+ bias_constraint=bias_constraint,
127
+ **kwargs,
128
+ )
SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/conv2d_transpose.py ADDED
@@ -0,0 +1,133 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from keras.src.api_export import keras_export
2
+ from keras.src.layers.convolutional.base_conv_transpose import BaseConvTranspose
3
+
4
+
5
+ @keras_export(
6
+ [
7
+ "keras.layers.Conv2DTranspose",
8
+ "keras.layers.Convolution2DTranspose",
9
+ ]
10
+ )
11
+ class Conv2DTranspose(BaseConvTranspose):
12
+ """2D transposed convolution layer.
13
+
14
+ The need for transposed convolutions generally arise from the desire to use
15
+ a transformation going in the opposite direction of a normal convolution,
16
+ i.e., from something that has the shape of the output of some convolution
17
+ to something that has the shape of its input while maintaining a
18
+ connectivity pattern that is compatible with said convolution.
19
+
20
+ Args:
21
+ filters: int, the dimension of the output space (the number of filters
22
+ in the transposed convolution).
23
+ kernel_size: int or tuple/list of 1 integer, specifying the size of the
24
+ transposed convolution window.
25
+ strides: int or tuple/list of 1 integer, specifying the stride length
26
+ of the transposed convolution. `strides > 1` is incompatible with
27
+ `dilation_rate > 1`.
28
+ padding: string, either `"valid"` or `"same"` (case-insensitive).
29
+ `"valid"` means no padding. `"same"` results in padding evenly to
30
+ the left/right or up/down of the input. When `padding="same"` and
31
+ `strides=1`, the output has the same size as the input.
32
+ data_format: string, either `"channels_last"` or `"channels_first"`.
33
+ The ordering of the dimensions in the inputs. `"channels_last"`
34
+ corresponds to inputs with shape
35
+ `(batch_size, height, width, channels)`
36
+ while `"channels_first"` corresponds to inputs with shape
37
+ `(batch_size, channels, height, width)`. It defaults to the
38
+ `image_data_format` value found in your Keras config file at
39
+ `~/.keras/keras.json`. If you never set it, then it will be
40
+ `"channels_last"`.
41
+ dilation_rate: int or tuple/list of 1 integers, specifying the dilation
42
+ rate to use for dilated transposed convolution.
43
+ activation: Activation function. If `None`, no activation is applied.
44
+ use_bias: bool, if `True`, bias will be added to the output.
45
+ kernel_initializer: Initializer for the convolution kernel. If `None`,
46
+ the default initializer (`"glorot_uniform"`) will be used.
47
+ bias_initializer: Initializer for the bias vector. If `None`, the
48
+ default initializer (`"zeros"`) will be used.
49
+ kernel_regularizer: Optional regularizer for the convolution kernel.
50
+ bias_regularizer: Optional regularizer for the bias vector.
51
+ activity_regularizer: Optional regularizer function for the output.
52
+ kernel_constraint: Optional projection function to be applied to the
53
+ kernel after being updated by an `Optimizer` (e.g. used to implement
54
+ norm constraints or value constraints for layer weights). The
55
+ function must take as input the unprojected variable and must return
56
+ the projected variable (which must have the same shape). Constraints
57
+ are not safe to use when doing asynchronous distributed training.
58
+ bias_constraint: Optional projection function to be applied to the
59
+ bias after being updated by an `Optimizer`.
60
+
61
+ Input shape:
62
+
63
+ - If `data_format="channels_last"`:
64
+ A 4D tensor with shape: `(batch_size, height, width, channels)`
65
+ - If `data_format="channels_first"`:
66
+ A 4D tensor with shape: `(batch_size, channels, height, width)`
67
+
68
+ Output shape:
69
+
70
+ - If `data_format="channels_last"`:
71
+ A 4D tensor with shape: `(batch_size, new_height, new_width, filters)`
72
+ - If `data_format="channels_first"`:
73
+ A 4D tensor with shape: `(batch_size, filters, new_height, new_width)`
74
+
75
+ Returns:
76
+ A 4D tensor representing
77
+ `activation(conv2d_transpose(inputs, kernel) + bias)`.
78
+
79
+ Raises:
80
+ ValueError: when both `strides > 1` and `dilation_rate > 1`.
81
+
82
+ References:
83
+ - [A guide to convolution arithmetic for deep learning](
84
+ https://arxiv.org/abs/1603.07285v1)
85
+ - [Deconvolutional Networks](
86
+ https://www.matthewzeiler.com/mattzeiler/deconvolutionalnetworks.pdf)
87
+
88
+ Example:
89
+
90
+ >>> x = np.random.rand(4, 10, 8, 128)
91
+ >>> y = keras.layers.Conv2DTranspose(32, 2, 2, activation='relu')(x)
92
+ >>> print(y.shape)
93
+ (4, 20, 16, 32)
94
+ """
95
+
96
+ def __init__(
97
+ self,
98
+ filters,
99
+ kernel_size,
100
+ strides=(1, 1),
101
+ padding="valid",
102
+ data_format=None,
103
+ dilation_rate=(1, 1),
104
+ activation=None,
105
+ use_bias=True,
106
+ kernel_initializer="glorot_uniform",
107
+ bias_initializer="zeros",
108
+ kernel_regularizer=None,
109
+ bias_regularizer=None,
110
+ activity_regularizer=None,
111
+ kernel_constraint=None,
112
+ bias_constraint=None,
113
+ **kwargs,
114
+ ):
115
+ super().__init__(
116
+ rank=2,
117
+ filters=filters,
118
+ kernel_size=kernel_size,
119
+ strides=strides,
120
+ padding=padding,
121
+ data_format=data_format,
122
+ dilation_rate=dilation_rate,
123
+ activation=activation,
124
+ use_bias=use_bias,
125
+ kernel_initializer=kernel_initializer,
126
+ bias_initializer=bias_initializer,
127
+ kernel_regularizer=kernel_regularizer,
128
+ bias_regularizer=bias_regularizer,
129
+ activity_regularizer=activity_regularizer,
130
+ kernel_constraint=kernel_constraint,
131
+ bias_constraint=bias_constraint,
132
+ **kwargs,
133
+ )
SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/conv3d.py ADDED
@@ -0,0 +1,134 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from keras.src.api_export import keras_export
2
+ from keras.src.layers.convolutional.base_conv import BaseConv
3
+
4
+
5
+ @keras_export(["keras.layers.Conv3D", "keras.layers.Convolution3D"])
6
+ class Conv3D(BaseConv):
7
+ """3D convolution layer.
8
+
9
+ This layer creates a convolution kernel that is convolved with the layer
10
+ input over a 3D spatial (or temporal) dimension (width,height and depth) to
11
+ produce a tensor of outputs. If `use_bias` is True, a bias vector is created
12
+ and added to the outputs. Finally, if `activation` is not `None`, it is
13
+ applied to the outputs as well.
14
+
15
+ Args:
16
+ filters: int, the dimension of the output space (the number of filters
17
+ in the convolution).
18
+ kernel_size: int or tuple/list of 3 integer, specifying the size of the
19
+ convolution window.
20
+ strides: int or tuple/list of 3 integer, specifying the stride length
21
+ of the convolution. `strides > 1` is incompatible with
22
+ `dilation_rate > 1`.
23
+ padding: string, either `"valid"` or `"same"` (case-insensitive).
24
+ `"valid"` means no padding. `"same"` results in padding evenly to
25
+ the left/right or up/down of the input. When `padding="same"` and
26
+ `strides=1`, the output has the same size as the input.
27
+ data_format: string, either `"channels_last"` or `"channels_first"`.
28
+ The ordering of the dimensions in the inputs. `"channels_last"`
29
+ corresponds to inputs with shape
30
+ `(batch_size, spatial_dim1, spatial_dim2, spatial_dim3, channels)`
31
+ while `"channels_first"` corresponds to inputs with shape
32
+ `(batch_size, channels, spatial_dim1, spatial_dim2, spatial_dim3)`.
33
+ It defaults to the `image_data_format` value found in your Keras
34
+ config file at `~/.keras/keras.json`. If you never set it, then it
35
+ will be `"channels_last"`.
36
+ dilation_rate: int or tuple/list of 3 integers, specifying the dilation
37
+ rate to use for dilated convolution.
38
+ groups: A positive int specifying the number of groups in which the
39
+ input is split along the channel axis. Each group is convolved
40
+ separately with `filters // groups` filters. The output is the
41
+ concatenation of all the `groups` results along the channel axis.
42
+ Input channels and `filters` must both be divisible by `groups`.
43
+ activation: Activation function. If `None`, no activation is applied.
44
+ use_bias: bool, if `True`, bias will be added to the output.
45
+ kernel_initializer: Initializer for the convolution kernel. If `None`,
46
+ the default initializer (`"glorot_uniform"`) will be used.
47
+ bias_initializer: Initializer for the bias vector. If `None`, the
48
+ default initializer (`"zeros"`) will be used.
49
+ kernel_regularizer: Optional regularizer for the convolution kernel.
50
+ bias_regularizer: Optional regularizer for the bias vector.
51
+ activity_regularizer: Optional regularizer function for the output.
52
+ kernel_constraint: Optional projection function to be applied to the
53
+ kernel after being updated by an `Optimizer` (e.g. used to implement
54
+ norm constraints or value constraints for layer weights). The
55
+ function must take as input the unprojected variable and must return
56
+ the projected variable (which must have the same shape). Constraints
57
+ are not safe to use when doing asynchronous distributed training.
58
+ bias_constraint: Optional projection function to be applied to the
59
+ bias after being updated by an `Optimizer`.
60
+
61
+ Input shape:
62
+
63
+ - If `data_format="channels_last"`:
64
+ 5D tensor with shape:
65
+ `(batch_size, spatial_dim1, spatial_dim2, spatial_dim3, channels)`
66
+ - If `data_format="channels_first"`:
67
+ 5D tensor with shape:
68
+ `(batch_size, channels, spatial_dim1, spatial_dim2, spatial_dim3)`
69
+
70
+ Output shape:
71
+
72
+ - If `data_format="channels_last"`:
73
+ 5D tensor with shape:
74
+ `(batch_size, new_spatial_dim1, new_spatial_dim2, new_spatial_dim3,
75
+ filters)`
76
+ - If `data_format="channels_first"`:
77
+ 5D tensor with shape:
78
+ `(batch_size, filters, new_spatial_dim1, new_spatial_dim2,
79
+ new_spatial_dim3)`
80
+
81
+ Returns:
82
+ A 5D tensor representing `activation(conv3d(inputs, kernel) + bias)`.
83
+
84
+ Raises:
85
+ ValueError: when both `strides > 1` and `dilation_rate > 1`.
86
+
87
+ Example:
88
+
89
+ >>> x = np.random.rand(4, 10, 10, 10, 128)
90
+ >>> y = keras.layers.Conv3D(32, 3, activation='relu')(x)
91
+ >>> print(y.shape)
92
+ (4, 8, 8, 8, 32)
93
+ """
94
+
95
+ def __init__(
96
+ self,
97
+ filters,
98
+ kernel_size,
99
+ strides=(1, 1, 1),
100
+ padding="valid",
101
+ data_format=None,
102
+ dilation_rate=(1, 1, 1),
103
+ groups=1,
104
+ activation=None,
105
+ use_bias=True,
106
+ kernel_initializer="glorot_uniform",
107
+ bias_initializer="zeros",
108
+ kernel_regularizer=None,
109
+ bias_regularizer=None,
110
+ activity_regularizer=None,
111
+ kernel_constraint=None,
112
+ bias_constraint=None,
113
+ **kwargs,
114
+ ):
115
+ super().__init__(
116
+ rank=3,
117
+ filters=filters,
118
+ kernel_size=kernel_size,
119
+ strides=strides,
120
+ padding=padding,
121
+ data_format=data_format,
122
+ dilation_rate=dilation_rate,
123
+ groups=groups,
124
+ activation=activation,
125
+ use_bias=use_bias,
126
+ kernel_initializer=kernel_initializer,
127
+ bias_initializer=bias_initializer,
128
+ kernel_regularizer=kernel_regularizer,
129
+ bias_regularizer=bias_regularizer,
130
+ activity_regularizer=activity_regularizer,
131
+ kernel_constraint=kernel_constraint,
132
+ bias_constraint=bias_constraint,
133
+ **kwargs,
134
+ )
SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/conv3d_transpose.py ADDED
@@ -0,0 +1,138 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from keras.src.api_export import keras_export
2
+ from keras.src.layers.convolutional.base_conv_transpose import BaseConvTranspose
3
+
4
+
5
+ @keras_export(
6
+ [
7
+ "keras.layers.Conv3DTranspose",
8
+ "keras.layers.Convolution3DTranspose",
9
+ ]
10
+ )
11
+ class Conv3DTranspose(BaseConvTranspose):
12
+ """3D transposed convolution layer.
13
+
14
+ The need for transposed convolutions generally arise from the desire to use
15
+ a transformation going in the opposite direction of a normal convolution,
16
+ i.e., from something that has the shape of the output of some convolution
17
+ to something that has the shape of its input while maintaining a
18
+ connectivity pattern that is compatible with said convolution.
19
+
20
+ Args:
21
+ filters: int, the dimension of the output space (the number of filters
22
+ in the transposed convolution).
23
+ kernel_size: int or tuple/list of 1 integer, specifying the size of the
24
+ transposed convolution window.
25
+ strides: int or tuple/list of 1 integer, specifying the stride length
26
+ of the transposed convolution. `strides > 1` is incompatible with
27
+ `dilation_rate > 1`.
28
+ padding: string, either `"valid"` or `"same"` (case-insensitive).
29
+ `"valid"` means no padding. `"same"` results in padding evenly to
30
+ the left/right or up/down of the input. When `padding="same"` and
31
+ `strides=1`, the output has the same size as the input.
32
+ data_format: string, either `"channels_last"` or `"channels_first"`.
33
+ The ordering of the dimensions in the inputs. `"channels_last"`
34
+ corresponds to inputs with shape
35
+ `(batch_size, spatial_dim1, spatial_dim2, spatial_dim3, channels)`
36
+ while `"channels_first"` corresponds to inputs with shape
37
+ `(batch_size, channels, spatial_dim1, spatial_dim2, spatial_dim3)`.
38
+ It defaults to the `image_data_format` value found in your Keras
39
+ config file at `~/.keras/keras.json`. If you never set it, then it
40
+ will be `"channels_last"`.
41
+ dilation_rate: int or tuple/list of 1 integers, specifying the dilation
42
+ rate to use for dilated transposed convolution.
43
+ activation: Activation function. If `None`, no activation is applied.
44
+ use_bias: bool, if `True`, bias will be added to the output.
45
+ kernel_initializer: Initializer for the convolution kernel. If `None`,
46
+ the default initializer (`"glorot_uniform"`) will be used.
47
+ bias_initializer: Initializer for the bias vector. If `None`, the
48
+ default initializer (`"zeros"`) will be used.
49
+ kernel_regularizer: Optional regularizer for the convolution kernel.
50
+ bias_regularizer: Optional regularizer for the bias vector.
51
+ activity_regularizer: Optional regularizer function for the output.
52
+ kernel_constraint: Optional projection function to be applied to the
53
+ kernel after being updated by an `Optimizer` (e.g. used to implement
54
+ norm constraints or value constraints for layer weights). The
55
+ function must take as input the unprojected variable and must return
56
+ the projected variable (which must have the same shape). Constraints
57
+ are not safe to use when doing asynchronous distributed training.
58
+ bias_constraint: Optional projection function to be applied to the
59
+ bias after being updated by an `Optimizer`.
60
+
61
+ Input shape:
62
+
63
+ - If `data_format="channels_last"`:
64
+ 5D tensor with shape:
65
+ `(batch_size, spatial_dim1, spatial_dim2, spatial_dim3, channels)`
66
+ - If `data_format="channels_first"`:
67
+ 5D tensor with shape:
68
+ `(batch_size, channels, spatial_dim1, spatial_dim2, spatial_dim3)`
69
+
70
+ Output shape:
71
+
72
+ - If `data_format="channels_last"`:
73
+ 5D tensor with shape:
74
+ `(batch_size, new_spatial_dim1, new_spatial_dim2, new_spatial_dim3,
75
+ filters)`
76
+ - If `data_format="channels_first"`:
77
+ 5D tensor with shape:
78
+ `(batch_size, filters, new_spatial_dim1, new_spatial_dim2,
79
+ new_spatial_dim3)`
80
+
81
+ Returns:
82
+ A 5D tensor representing `activation(conv3d(inputs, kernel) + bias)`.
83
+
84
+ Raises:
85
+ ValueError: when both `strides > 1` and `dilation_rate > 1`.
86
+
87
+ References:
88
+ - [A guide to convolution arithmetic for deep learning](
89
+ https://arxiv.org/abs/1603.07285v1)
90
+ - [Deconvolutional Networks](
91
+ https://www.matthewzeiler.com/mattzeiler/deconvolutionalnetworks.pdf)
92
+
93
+ Example:
94
+
95
+ >>> x = np.random.rand(4, 10, 8, 12, 128)
96
+ >>> y = keras.layers.Conv3DTranspose(32, 2, 2, activation='relu')(x)
97
+ >>> print(y.shape)
98
+ (4, 20, 16, 24, 32)
99
+ """
100
+
101
+ def __init__(
102
+ self,
103
+ filters,
104
+ kernel_size,
105
+ strides=(1, 1, 1),
106
+ padding="valid",
107
+ data_format=None,
108
+ dilation_rate=(1, 1, 1),
109
+ activation=None,
110
+ use_bias=True,
111
+ kernel_initializer="glorot_uniform",
112
+ bias_initializer="zeros",
113
+ kernel_regularizer=None,
114
+ bias_regularizer=None,
115
+ activity_regularizer=None,
116
+ kernel_constraint=None,
117
+ bias_constraint=None,
118
+ **kwargs,
119
+ ):
120
+ super().__init__(
121
+ rank=3,
122
+ filters=filters,
123
+ kernel_size=kernel_size,
124
+ strides=strides,
125
+ padding=padding,
126
+ data_format=data_format,
127
+ dilation_rate=dilation_rate,
128
+ activation=activation,
129
+ use_bias=use_bias,
130
+ kernel_initializer=kernel_initializer,
131
+ bias_initializer=bias_initializer,
132
+ kernel_regularizer=kernel_regularizer,
133
+ bias_regularizer=bias_regularizer,
134
+ activity_regularizer=activity_regularizer,
135
+ kernel_constraint=kernel_constraint,
136
+ bias_constraint=bias_constraint,
137
+ **kwargs,
138
+ )
SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/depthwise_conv1d.py ADDED
@@ -0,0 +1,137 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from keras.src.api_export import keras_export
2
+ from keras.src.layers.convolutional.base_depthwise_conv import BaseDepthwiseConv
3
+
4
+
5
+ @keras_export("keras.layers.DepthwiseConv1D")
6
+ class DepthwiseConv1D(BaseDepthwiseConv):
7
+ """1D depthwise convolution layer.
8
+
9
+ Depthwise convolution is a type of convolution in which each input channel
10
+ is convolved with a different kernel (called a depthwise kernel). You can
11
+ understand depthwise convolution as the first step in a depthwise separable
12
+ convolution.
13
+
14
+ It is implemented via the following steps:
15
+
16
+ - Split the input into individual channels.
17
+ - Convolve each channel with an individual depthwise kernel with
18
+ `depth_multiplier` output channels.
19
+ - Concatenate the convolved outputs along the channels axis.
20
+
21
+ Unlike a regular 1D convolution, depthwise convolution does not mix
22
+ information across different input channels.
23
+
24
+ The `depth_multiplier` argument determines how many filters are applied to
25
+ one input channel. As such, it controls the amount of output channels that
26
+ are generated per input channel in the depthwise step.
27
+
28
+ Args:
29
+ kernel_size: int or tuple/list of 1 integer, specifying the size of the
30
+ depthwise convolution window.
31
+ strides: int or tuple/list of 1 integer, specifying the stride length
32
+ of the convolution. `strides > 1` is incompatible with
33
+ `dilation_rate > 1`.
34
+ padding: string, either `"valid"` or `"same"` (case-insensitive).
35
+ `"valid"` means no padding. `"same"` results in padding evenly to
36
+ the left/right or up/down of the input. When `padding="same"` and
37
+ `strides=1`, the output has the same size as the input.
38
+ depth_multiplier: The number of depthwise convolution output channels
39
+ for each input channel. The total number of depthwise convolution
40
+ output channels will be equal to `input_channel * depth_multiplier`.
41
+ data_format: string, either `"channels_last"` or `"channels_first"`.
42
+ The ordering of the dimensions in the inputs. `"channels_last"`
43
+ corresponds to inputs with shape `(batch, steps, features)`
44
+ while `"channels_first"` corresponds to inputs with shape
45
+ `(batch, features, steps)`. It defaults to the `image_data_format`
46
+ value found in your Keras config file at `~/.keras/keras.json`.
47
+ If you never set it, then it will be `"channels_last"`.
48
+ dilation_rate: int or tuple/list of 1 integers, specifying the dilation
49
+ rate to use for dilated convolution.
50
+ activation: Activation function. If `None`, no activation is applied.
51
+ use_bias: bool, if `True`, bias will be added to the output.
52
+ depthwise_initializer: Initializer for the convolution kernel.
53
+ If `None`, the default initializer (`"glorot_uniform"`)
54
+ will be used.
55
+ bias_initializer: Initializer for the bias vector. If `None`, the
56
+ default initializer (`"zeros"`) will be used.
57
+ depthwise_regularizer: Optional regularizer for the convolution kernel.
58
+ bias_regularizer: Optional regularizer for the bias vector.
59
+ activity_regularizer: Optional regularizer function for the output.
60
+ depthwise_constraint: Optional projection function to be applied to the
61
+ kernel after being updated by an `Optimizer` (e.g. used to implement
62
+ norm constraints or value constraints for layer weights). The
63
+ function must take as input the unprojected variable and must return
64
+ the projected variable (which must have the same shape). Constraints
65
+ are not safe to use when doing asynchronous distributed training.
66
+ bias_constraint: Optional projection function to be applied to the
67
+ bias after being updated by an `Optimizer`.
68
+
69
+ Input shape:
70
+
71
+ - If `data_format="channels_last"`:
72
+ A 3D tensor with shape: `(batch_shape, steps, channels)`
73
+ - If `data_format="channels_first"`:
74
+ A 3D tensor with shape: `(batch_shape, channels, steps)`
75
+
76
+ Output shape:
77
+
78
+ - If `data_format="channels_last"`:
79
+ A 3D tensor with shape:
80
+ `(batch_shape, new_steps, channels * depth_multiplier)`
81
+ - If `data_format="channels_first"`:
82
+ A 3D tensor with shape:
83
+ `(batch_shape, channels * depth_multiplier, new_steps)`
84
+
85
+ Returns:
86
+ A 3D tensor representing
87
+ `activation(depthwise_conv1d(inputs, kernel) + bias)`.
88
+
89
+ Raises:
90
+ ValueError: when both `strides > 1` and `dilation_rate > 1`.
91
+
92
+ Example:
93
+
94
+ >>> x = np.random.rand(4, 10, 12)
95
+ >>> y = keras.layers.DepthwiseConv1D(3, 3, 2, activation='relu')(x)
96
+ >>> print(y.shape)
97
+ (4, 4, 36)
98
+ """
99
+
100
+ def __init__(
101
+ self,
102
+ kernel_size,
103
+ strides=1,
104
+ padding="valid",
105
+ depth_multiplier=1,
106
+ data_format=None,
107
+ dilation_rate=1,
108
+ activation=None,
109
+ use_bias=True,
110
+ depthwise_initializer="glorot_uniform",
111
+ bias_initializer="zeros",
112
+ depthwise_regularizer=None,
113
+ bias_regularizer=None,
114
+ activity_regularizer=None,
115
+ depthwise_constraint=None,
116
+ bias_constraint=None,
117
+ **kwargs,
118
+ ):
119
+ super().__init__(
120
+ rank=1,
121
+ depth_multiplier=depth_multiplier,
122
+ kernel_size=kernel_size,
123
+ strides=strides,
124
+ padding=padding,
125
+ data_format=data_format,
126
+ dilation_rate=dilation_rate,
127
+ activation=activation,
128
+ use_bias=use_bias,
129
+ depthwise_initializer=depthwise_initializer,
130
+ bias_initializer=bias_initializer,
131
+ depthwise_regularizer=depthwise_regularizer,
132
+ bias_regularizer=bias_regularizer,
133
+ activity_regularizer=activity_regularizer,
134
+ depthwise_constraint=depthwise_constraint,
135
+ bias_constraint=bias_constraint,
136
+ **kwargs,
137
+ )
SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/depthwise_conv2d.py ADDED
@@ -0,0 +1,138 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from keras.src.api_export import keras_export
2
+ from keras.src.layers.convolutional.base_depthwise_conv import BaseDepthwiseConv
3
+
4
+
5
+ @keras_export("keras.layers.DepthwiseConv2D")
6
+ class DepthwiseConv2D(BaseDepthwiseConv):
7
+ """2D depthwise convolution layer.
8
+
9
+ Depthwise convolution is a type of convolution in which each input channel
10
+ is convolved with a different kernel (called a depthwise kernel). You can
11
+ understand depthwise convolution as the first step in a depthwise separable
12
+ convolution.
13
+
14
+ It is implemented via the following steps:
15
+
16
+ - Split the input into individual channels.
17
+ - Convolve each channel with an individual depthwise kernel with
18
+ `depth_multiplier` output channels.
19
+ - Concatenate the convolved outputs along the channels axis.
20
+
21
+ Unlike a regular 2D convolution, depthwise convolution does not mix
22
+ information across different input channels.
23
+
24
+ The `depth_multiplier` argument determines how many filters are applied to
25
+ one input channel. As such, it controls the amount of output channels that
26
+ are generated per input channel in the depthwise step.
27
+
28
+ Args:
29
+ kernel_size: int or tuple/list of 2 integer, specifying the size of the
30
+ depthwise convolution window.
31
+ strides: int or tuple/list of 2 integer, specifying the stride length
32
+ of the depthwise convolution. `strides > 1` is incompatible with
33
+ `dilation_rate > 1`.
34
+ padding: string, either `"valid"` or `"same"` (case-insensitive).
35
+ `"valid"` means no padding. `"same"` results in padding evenly to
36
+ the left/right or up/down of the input. When `padding="same"` and
37
+ `strides=1`, the output has the same size as the input.
38
+ depth_multiplier: The number of depthwise convolution output channels
39
+ for each input channel. The total number of depthwise convolution
40
+ output channels will be equal to `input_channel * depth_multiplier`.
41
+ data_format: string, either `"channels_last"` or `"channels_first"`.
42
+ The ordering of the dimensions in the inputs. `"channels_last"`
43
+ corresponds to inputs with shape `(batch, height, width, channels)`
44
+ while `"channels_first"` corresponds to inputs with shape
45
+ `(batch, channels, height, width)`. It defaults to the
46
+ `image_data_format` value found in your Keras config file
47
+ at `~/.keras/keras.json`.
48
+ If you never set it, then it will be `"channels_last"`.
49
+ dilation_rate: int or tuple/list of 2 integers, specifying the dilation
50
+ rate to use for dilated convolution.
51
+ activation: Activation function. If `None`, no activation is applied.
52
+ use_bias: bool, if `True`, bias will be added to the output.
53
+ depthwise_initializer: Initializer for the convolution kernel.
54
+ If `None`, the default initializer (`"glorot_uniform"`)
55
+ will be used.
56
+ bias_initializer: Initializer for the bias vector. If `None`, the
57
+ default initializer (`"zeros"`) will be used.
58
+ depthwise_regularizer: Optional regularizer for the convolution kernel.
59
+ bias_regularizer: Optional regularizer for the bias vector.
60
+ activity_regularizer: Optional regularizer function for the output.
61
+ depthwise_constraint: Optional projection function to be applied to the
62
+ kernel after being updated by an `Optimizer` (e.g. used to implement
63
+ norm constraints or value constraints for layer weights). The
64
+ function must take as input the unprojected variable and must return
65
+ the projected variable (which must have the same shape). Constraints
66
+ are not safe to use when doing asynchronous distributed training.
67
+ bias_constraint: Optional projection function to be applied to the
68
+ bias after being updated by an `Optimizer`.
69
+
70
+ Input shape:
71
+
72
+ - If `data_format="channels_last"`:
73
+ A 4D tensor with shape: `(batch_size, height, width, channels)`
74
+ - If `data_format="channels_first"`:
75
+ A 4D tensor with shape: `(batch_size, channels, height, width)`
76
+
77
+ Output shape:
78
+
79
+ - If `data_format="channels_last"`:
80
+ A 4D tensor with shape:
81
+ `(batch_size, new_height, new_width, channels * depth_multiplier)`
82
+ - If `data_format="channels_first"`:
83
+ A 4D tensor with shape:
84
+ `(batch_size, channels * depth_multiplier, new_height, new_width)`
85
+
86
+ Returns:
87
+ A 4D tensor representing
88
+ `activation(depthwise_conv2d(inputs, kernel) + bias)`.
89
+
90
+ Raises:
91
+ ValueError: when both `strides > 1` and `dilation_rate > 1`.
92
+
93
+ Example:
94
+
95
+ >>> x = np.random.rand(4, 10, 10, 12)
96
+ >>> y = keras.layers.DepthwiseConv2D(kernel_size=3, activation='relu')(x)
97
+ >>> print(y.shape)
98
+ (4, 8, 8, 12)
99
+ """
100
+
101
+ def __init__(
102
+ self,
103
+ kernel_size,
104
+ strides=(1, 1),
105
+ padding="valid",
106
+ depth_multiplier=1,
107
+ data_format=None,
108
+ dilation_rate=(1, 1),
109
+ activation=None,
110
+ use_bias=True,
111
+ depthwise_initializer="glorot_uniform",
112
+ bias_initializer="zeros",
113
+ depthwise_regularizer=None,
114
+ bias_regularizer=None,
115
+ activity_regularizer=None,
116
+ depthwise_constraint=None,
117
+ bias_constraint=None,
118
+ **kwargs,
119
+ ):
120
+ super().__init__(
121
+ rank=2,
122
+ depth_multiplier=depth_multiplier,
123
+ kernel_size=kernel_size,
124
+ strides=strides,
125
+ padding=padding,
126
+ data_format=data_format,
127
+ dilation_rate=dilation_rate,
128
+ activation=activation,
129
+ use_bias=use_bias,
130
+ depthwise_initializer=depthwise_initializer,
131
+ bias_initializer=bias_initializer,
132
+ depthwise_regularizer=depthwise_regularizer,
133
+ bias_regularizer=bias_regularizer,
134
+ activity_regularizer=activity_regularizer,
135
+ depthwise_constraint=depthwise_constraint,
136
+ bias_constraint=bias_constraint,
137
+ **kwargs,
138
+ )
SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/separable_conv1d.py ADDED
@@ -0,0 +1,143 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from keras.src.api_export import keras_export
2
+ from keras.src.layers.convolutional.base_separable_conv import BaseSeparableConv
3
+
4
+
5
+ @keras_export(
6
+ [
7
+ "keras.layers.SeparableConv1D",
8
+ "keras.layers.SeparableConvolution1D",
9
+ ]
10
+ )
11
+ class SeparableConv1D(BaseSeparableConv):
12
+ """1D separable convolution layer.
13
+
14
+ This layer performs a depthwise convolution that acts separately on
15
+ channels, followed by a pointwise convolution that mixes channels.
16
+ If `use_bias` is True and a bias initializer is provided,
17
+ it adds a bias vector to the output. It then optionally applies an
18
+ activation function to produce the final output.
19
+
20
+ Args:
21
+ filters: int, the dimensionality of the output space (i.e. the number
22
+ of filters in the pointwise convolution).
23
+ kernel_size: int or tuple/list of 1 integers, specifying the size of the
24
+ depthwise convolution window.
25
+ strides: int or tuple/list of 1 integers, specifying the stride length
26
+ of the depthwise convolution. If only one int is specified, the same
27
+ stride size will be used for all dimensions. `strides > 1` is
28
+ incompatible with `dilation_rate > 1`.
29
+ padding: string, either `"valid"` or `"same"` (case-insensitive).
30
+ `"valid"` means no padding. `"same"` results in padding evenly to
31
+ the left/right or up/down of the input. When `padding="same"` and
32
+ `strides=1`, the output has the same size as the input.
33
+ data_format: string, either `"channels_last"` or `"channels_first"`.
34
+ The ordering of the dimensions in the inputs. `"channels_last"`
35
+ corresponds to inputs with shape `(batch, steps, features)`
36
+ while `"channels_first"` corresponds to inputs with shape
37
+ `(batch, features, steps)`. It defaults to the `image_data_format`
38
+ value found in your Keras config file at `~/.keras/keras.json`.
39
+ If you never set it, then it will be `"channels_last"`.
40
+ dilation_rate: int or tuple/list of 1 integers, specifying the dilation
41
+ rate to use for dilated convolution. If only one int is specified,
42
+ the same dilation rate will be used for all dimensions.
43
+ depth_multiplier: The number of depthwise convolution output channels
44
+ for each input channel. The total number of depthwise convolution
45
+ output channels will be equal to `input_channel * depth_multiplier`.
46
+ activation: Activation function. If `None`, no activation is applied.
47
+ use_bias: bool, if `True`, bias will be added to the output.
48
+ depthwise_initializer: An initializer for the depthwise convolution
49
+ kernel. If None, then the default initializer (`"glorot_uniform"`)
50
+ will be used.
51
+ pointwise_initializer: An initializer for the pointwise convolution
52
+ kernel. If None, then the default initializer (`"glorot_uniform"`)
53
+ will be used.
54
+ bias_initializer: An initializer for the bias vector. If None, the
55
+ default initializer ('"zeros"') will be used.
56
+ depthwise_regularizer: Optional regularizer for the depthwise
57
+ convolution kernel.
58
+ pointwise_regularizer: Optional regularizer for the pointwise
59
+ convolution kernel.
60
+ bias_regularizer: Optional regularizer for the bias vector.
61
+ activity_regularizer: Optional regularizer function for the output.
62
+ depthwise_constraint: Optional projection function to be applied to the
63
+ depthwise kernel after being updated by an `Optimizer` (e.g. used
64
+ for norm constraints or value constraints for layer weights). The
65
+ function must take as input the unprojected variable and must return
66
+ the projected variable (which must have the same shape).
67
+ pointwise_constraint: Optional projection function to be applied to the
68
+ pointwise kernel after being updated by an `Optimizer`.
69
+ bias_constraint: Optional projection function to be applied to the
70
+ bias after being updated by an `Optimizer`.
71
+
72
+ Input shape:
73
+
74
+ - If `data_format="channels_last"`:
75
+ A 3D tensor with shape: `(batch_shape, steps, channels)`
76
+ - If `data_format="channels_first"`:
77
+ A 3D tensor with shape: `(batch_shape, channels, steps)`
78
+
79
+ Output shape:
80
+
81
+ - If `data_format="channels_last"`:
82
+ A 3D tensor with shape: `(batch_shape, new_steps, filters)`
83
+ - If `data_format="channels_first"`:
84
+ A 3D tensor with shape: `(batch_shape, filters, new_steps)`
85
+
86
+ Returns:
87
+ A 3D tensor representing
88
+ `activation(separable_conv1d(inputs, kernel) + bias)`.
89
+
90
+ Example:
91
+
92
+ >>> x = np.random.rand(4, 10, 12)
93
+ >>> y = keras.layers.SeparableConv1D(3, 4, 3, 2, activation='relu')(x)
94
+ >>> print(y.shape)
95
+ (4, 4, 4)
96
+ """
97
+
98
+ def __init__(
99
+ self,
100
+ filters,
101
+ kernel_size,
102
+ strides=1,
103
+ padding="valid",
104
+ data_format=None,
105
+ dilation_rate=1,
106
+ depth_multiplier=1,
107
+ activation=None,
108
+ use_bias=True,
109
+ depthwise_initializer="glorot_uniform",
110
+ pointwise_initializer="glorot_uniform",
111
+ bias_initializer="zeros",
112
+ depthwise_regularizer=None,
113
+ pointwise_regularizer=None,
114
+ bias_regularizer=None,
115
+ activity_regularizer=None,
116
+ depthwise_constraint=None,
117
+ pointwise_constraint=None,
118
+ bias_constraint=None,
119
+ **kwargs,
120
+ ):
121
+ super().__init__(
122
+ rank=1,
123
+ depth_multiplier=depth_multiplier,
124
+ filters=filters,
125
+ kernel_size=kernel_size,
126
+ strides=strides,
127
+ padding=padding,
128
+ data_format=data_format,
129
+ dilation_rate=dilation_rate,
130
+ activation=activation,
131
+ use_bias=use_bias,
132
+ depthwise_initializer=depthwise_initializer,
133
+ pointwise_initializer=pointwise_initializer,
134
+ bias_initializer=bias_initializer,
135
+ depthwise_regularizer=depthwise_regularizer,
136
+ pointwise_regularizer=pointwise_regularizer,
137
+ bias_regularizer=bias_regularizer,
138
+ activity_regularizer=activity_regularizer,
139
+ depthwise_constraint=depthwise_constraint,
140
+ pointwise_constraint=pointwise_constraint,
141
+ bias_constraint=bias_constraint,
142
+ **kwargs,
143
+ )
SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/separable_conv2d.py ADDED
@@ -0,0 +1,144 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from keras.src.api_export import keras_export
2
+ from keras.src.layers.convolutional.base_separable_conv import BaseSeparableConv
3
+
4
+
5
+ @keras_export(
6
+ [
7
+ "keras.layers.SeparableConv2D",
8
+ "keras.layers.SeparableConvolution2D",
9
+ ]
10
+ )
11
+ class SeparableConv2D(BaseSeparableConv):
12
+ """2D separable convolution layer.
13
+
14
+ This layer performs a depthwise convolution that acts separately on
15
+ channels, followed by a pointwise convolution that mixes channels.
16
+ If `use_bias` is True and a bias initializer is provided,
17
+ it adds a bias vector to the output. It then optionally applies an
18
+ activation function to produce the final output.
19
+
20
+ Args:
21
+ filters: int, the dimensionality of the output space (i.e. the number
22
+ of filters in the pointwise convolution).
23
+ kernel_size: int or tuple/list of 2 integers, specifying the size of the
24
+ depthwise convolution window.
25
+ strides: int or tuple/list of 2 integers, specifying the stride length
26
+ of the depthwise convolution. If only one int is specified, the same
27
+ stride size will be used for all dimensions. `strides > 1` is
28
+ incompatible with `dilation_rate > 1`.
29
+ padding: string, either `"valid"` or `"same"` (case-insensitive).
30
+ `"valid"` means no padding. `"same"` results in padding evenly to
31
+ the left/right or up/down of the input. When `padding="same"` and
32
+ `strides=1`, the output has the same size as the input.
33
+ data_format: string, either `"channels_last"` or `"channels_first"`.
34
+ The ordering of the dimensions in the inputs. `"channels_last"`
35
+ corresponds to inputs with shape `(batch, height, width, channels)`
36
+ while `"channels_first"` corresponds to inputs with shape
37
+ `(batch, channels, height, width)`. It defaults to the
38
+ `image_data_format` value found in your Keras config file
39
+ at `~/.keras/keras.json`.
40
+ If you never set it, then it will be `"channels_last"`.
41
+ dilation_rate: int or tuple/list of 2 integers, specifying the dilation
42
+ rate to use for dilated convolution. If only one int is specified,
43
+ the same dilation rate will be used for all dimensions.
44
+ depth_multiplier: The number of depthwise convolution output channels
45
+ for each input channel. The total number of depthwise convolution
46
+ output channels will be equal to `input_channel * depth_multiplier`.
47
+ activation: Activation function. If `None`, no activation is applied.
48
+ use_bias: bool, if `True`, bias will be added to the output.
49
+ depthwise_initializer: An initializer for the depthwise convolution
50
+ kernel. If None, then the default initializer (`"glorot_uniform"`)
51
+ will be used.
52
+ pointwise_initializer: An initializer for the pointwise convolution
53
+ kernel. If None, then the default initializer (`"glorot_uniform"`)
54
+ will be used.
55
+ bias_initializer: An initializer for the bias vector. If None, the
56
+ default initializer ('"zeros"') will be used.
57
+ depthwise_regularizer: Optional regularizer for the depthwise
58
+ convolution kernel.
59
+ pointwise_regularizer: Optional regularizer for the pointwise
60
+ convolution kernel.
61
+ bias_regularizer: Optional regularizer for the bias vector.
62
+ activity_regularizer: Optional regularizer function for the output.
63
+ depthwise_constraint: Optional projection function to be applied to the
64
+ depthwise kernel after being updated by an `Optimizer` (e.g. used
65
+ for norm constraints or value constraints for layer weights). The
66
+ function must take as input the unprojected variable and must return
67
+ the projected variable (which must have the same shape).
68
+ pointwise_constraint: Optional projection function to be applied to the
69
+ pointwise kernel after being updated by an `Optimizer`.
70
+ bias_constraint: Optional projection function to be applied to the
71
+ bias after being updated by an `Optimizer`.
72
+
73
+ Input shape:
74
+
75
+ - If `data_format="channels_last"`:
76
+ A 4D tensor with shape: `(batch_size, height, width, channels)`
77
+ - If `data_format="channels_first"`:
78
+ A 4D tensor with shape: `(batch_size, channels, height, width)`
79
+
80
+ Output shape:
81
+
82
+ - If `data_format="channels_last"`:
83
+ A 4D tensor with shape: `(batch_size, new_height, new_width, filters)`
84
+ - If `data_format="channels_first"`:
85
+ A 4D tensor with shape: `(batch_size, filters, new_height, new_width)`
86
+
87
+ Returns:
88
+ A 4D tensor representing
89
+ `activation(separable_conv2d(inputs, kernel) + bias)`.
90
+
91
+ Example:
92
+
93
+ >>> x = np.random.rand(4, 10, 10, 12)
94
+ >>> y = keras.layers.SeparableConv2D(3, 4, 3, 2, activation='relu')(x)
95
+ >>> print(y.shape)
96
+ (4, 4, 4, 4)
97
+ """
98
+
99
+ def __init__(
100
+ self,
101
+ filters,
102
+ kernel_size,
103
+ strides=(1, 1),
104
+ padding="valid",
105
+ data_format=None,
106
+ dilation_rate=(1, 1),
107
+ depth_multiplier=1,
108
+ activation=None,
109
+ use_bias=True,
110
+ depthwise_initializer="glorot_uniform",
111
+ pointwise_initializer="glorot_uniform",
112
+ bias_initializer="zeros",
113
+ depthwise_regularizer=None,
114
+ pointwise_regularizer=None,
115
+ bias_regularizer=None,
116
+ activity_regularizer=None,
117
+ depthwise_constraint=None,
118
+ pointwise_constraint=None,
119
+ bias_constraint=None,
120
+ **kwargs,
121
+ ):
122
+ super().__init__(
123
+ rank=2,
124
+ depth_multiplier=depth_multiplier,
125
+ filters=filters,
126
+ kernel_size=kernel_size,
127
+ strides=strides,
128
+ padding=padding,
129
+ data_format=data_format,
130
+ dilation_rate=dilation_rate,
131
+ activation=activation,
132
+ use_bias=use_bias,
133
+ depthwise_initializer=depthwise_initializer,
134
+ pointwise_initializer=pointwise_initializer,
135
+ bias_initializer=bias_initializer,
136
+ depthwise_regularizer=depthwise_regularizer,
137
+ pointwise_regularizer=pointwise_regularizer,
138
+ bias_regularizer=bias_regularizer,
139
+ activity_regularizer=activity_regularizer,
140
+ depthwise_constraint=depthwise_constraint,
141
+ pointwise_constraint=pointwise_constraint,
142
+ bias_constraint=bias_constraint,
143
+ **kwargs,
144
+ )
SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/core/__init__.py ADDED
File without changes
SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/core/__pycache__/__init__.cpython-310.pyc ADDED
Binary file (197 Bytes). View file
 
SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/core/__pycache__/dense.cpython-310.pyc ADDED
Binary file (15.3 kB). View file