diff --git a/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/activations/__init__.py b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/activations/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..009ce976c51b54689837db8c616ce99467bc10a7 --- /dev/null +++ b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/activations/__init__.py @@ -0,0 +1,5 @@ +from keras.src.layers.activations.elu import ELU +from keras.src.layers.activations.leaky_relu import LeakyReLU +from keras.src.layers.activations.prelu import PReLU +from keras.src.layers.activations.relu import ReLU +from keras.src.layers.activations.softmax import Softmax diff --git a/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/activations/activation.py b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/activations/activation.py new file mode 100644 index 0000000000000000000000000000000000000000..68f65ec8711f382356c3f80ab7da682709d6b300 --- /dev/null +++ b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/activations/activation.py @@ -0,0 +1,40 @@ +from keras.src import activations +from keras.src.api_export import keras_export +from keras.src.layers.layer import Layer + + +@keras_export("keras.layers.Activation") +class Activation(Layer): + """Applies an activation function to an output. + + Args: + activation: Activation function. It could be a callable, or the name of + an activation from the `keras.activations` namespace. + **kwargs: Base layer keyword arguments, such as `name` and `dtype`. + + Example: + + >>> layer = keras.layers.Activation('relu') + >>> layer(np.array([-3.0, -1.0, 0.0, 2.0])) + [0.0, 0.0, 0.0, 2.0] + >>> layer = keras.layers.Activation(keras.activations.relu) + >>> layer(np.array([-3.0, -1.0, 0.0, 2.0])) + [0.0, 0.0, 0.0, 2.0] + """ + + def __init__(self, activation, **kwargs): + super().__init__(**kwargs) + self.supports_masking = True + self.activation = activations.get(activation) + self.built = True + + def call(self, inputs): + return self.activation(inputs) + + def compute_output_shape(self, input_shape): + return input_shape + + def get_config(self): + config = {"activation": activations.serialize(self.activation)} + base_config = super().get_config() + return {**base_config, **config} diff --git a/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/activations/elu.py b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/activations/elu.py new file mode 100644 index 0000000000000000000000000000000000000000..cbf3f632ee700b25ac05a0eb9992ec3426e3e1ab --- /dev/null +++ b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/activations/elu.py @@ -0,0 +1,32 @@ +from keras.src import activations +from keras.src.api_export import keras_export +from keras.src.layers.layer import Layer + + +@keras_export("keras.layers.ELU") +class ELU(Layer): + """Applies an Exponential Linear Unit function to an output. + + Formula: + + ``` + f(x) = alpha * (exp(x) - 1.) for x < 0 + f(x) = x for x >= 0 + ``` + + Args: + alpha: float, slope of negative section. Defaults to `1.0`. + **kwargs: Base layer keyword arguments, such as `name` and `dtype`. + """ + + def __init__(self, alpha=1.0, **kwargs): + super().__init__(**kwargs) + self.alpha = alpha + self.supports_masking = True + self.built = True + + def call(self, inputs): + return activations.elu(inputs, alpha=self.alpha) + + def compute_output_shape(self, input_shape): + return input_shape diff --git a/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/activations/leaky_relu.py b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/activations/leaky_relu.py new file mode 100644 index 0000000000000000000000000000000000000000..6be1ddfb7e642205594050ff422fca059decbcca --- /dev/null +++ b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/activations/leaky_relu.py @@ -0,0 +1,67 @@ +import warnings + +from keras.src import activations +from keras.src.api_export import keras_export +from keras.src.layers.layer import Layer + + +@keras_export("keras.layers.LeakyReLU") +class LeakyReLU(Layer): + """Leaky version of a Rectified Linear Unit activation layer. + + This layer allows a small gradient when the unit is not active. + + Formula: + + ``` python + f(x) = alpha * x if x < 0 + f(x) = x if x >= 0 + ``` + + Example: + + ``` python + leaky_relu_layer = LeakyReLU(negative_slope=0.5) + input = np.array([-10, -5, 0.0, 5, 10]) + result = leaky_relu_layer(input) + # result = [-5. , -2.5, 0. , 5. , 10.] + ``` + + Args: + negative_slope: Float >= 0.0. Negative slope coefficient. + Defaults to `0.3`. + **kwargs: Base layer keyword arguments, such as + `name` and `dtype`. + + """ + + def __init__(self, negative_slope=0.3, **kwargs): + if "alpha" in kwargs: + negative_slope = kwargs.pop("alpha") + warnings.warn( + "Argument `alpha` is deprecated. " + "Use `negative_slope` instead." + ) + super().__init__(**kwargs) + if negative_slope is None or negative_slope < 0: + raise ValueError( + "The negative_slope value of a Leaky ReLU layer " + "cannot be None or negative value. Expected a float." + f" Received: negative_slope={negative_slope}" + ) + self.negative_slope = negative_slope + self.supports_masking = True + self.built = True + + def call(self, inputs): + return activations.leaky_relu( + inputs, negative_slope=self.negative_slope + ) + + def get_config(self): + config = super().get_config() + config.update({"negative_slope": self.negative_slope}) + return config + + def compute_output_shape(self, input_shape): + return input_shape diff --git a/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/activations/prelu.py b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/activations/prelu.py new file mode 100644 index 0000000000000000000000000000000000000000..652b60e2206776eeb317af4346ef577c1d32d945 --- /dev/null +++ b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/activations/prelu.py @@ -0,0 +1,99 @@ +from keras.src import activations +from keras.src import constraints +from keras.src import initializers +from keras.src import regularizers +from keras.src.api_export import keras_export +from keras.src.layers.input_spec import InputSpec +from keras.src.layers.layer import Layer + + +@keras_export("keras.layers.PReLU") +class PReLU(Layer): + """Parametric Rectified Linear Unit activation layer. + + Formula: + ``` python + f(x) = alpha * x for x < 0 + f(x) = x for x >= 0 + ``` + where `alpha` is a learned array with the same shape as x. + + Args: + alpha_initializer: Initializer function for the weights. + alpha_regularizer: Regularizer for the weights. + alpha_constraint: Constraint for the weights. + shared_axes: The axes along which to share learnable parameters for the + activation function. For example, if the incoming feature maps are + from a 2D convolution with output shape + `(batch, height, width, channels)`, and you wish to share parameters + across space so that each filter only has one set of parameters, + set `shared_axes=[1, 2]`. + **kwargs: Base layer keyword arguments, such as `name` and `dtype`. + """ + + def __init__( + self, + alpha_initializer="Zeros", + alpha_regularizer=None, + alpha_constraint=None, + shared_axes=None, + **kwargs, + ): + super().__init__(**kwargs) + self.supports_masking = True + self.alpha_initializer = initializers.get(alpha_initializer) + self.alpha_regularizer = regularizers.get(alpha_regularizer) + self.alpha_constraint = constraints.get(alpha_constraint) + if shared_axes is None: + self.shared_axes = None + elif not isinstance(shared_axes, (list, tuple)): + self.shared_axes = [shared_axes] + else: + self.shared_axes = list(shared_axes) + + def build(self, input_shape): + param_shape = list(input_shape[1:]) + if self.shared_axes is not None: + for i in self.shared_axes: + param_shape[i - 1] = 1 + self.alpha = self.add_weight( + shape=param_shape, + name="alpha", + initializer=self.alpha_initializer, + regularizer=self.alpha_regularizer, + constraint=self.alpha_constraint, + ) + # Set input spec + axes = {} + if self.shared_axes: + for i in range(1, len(input_shape)): + if i not in self.shared_axes: + axes[i] = input_shape[i] + self.input_spec = InputSpec(ndim=len(input_shape), axes=axes) + self.built = True + + def call(self, inputs): + pos = activations.relu(inputs) + neg = -self.alpha * activations.relu(-inputs) + return pos + neg + + def get_config(self): + config = super().get_config() + config.update( + { + "alpha_initializer": initializers.serialize( + self.alpha_initializer + ), + "alpha_regularizer": regularizers.serialize( + self.alpha_regularizer + ), + "alpha_constraint": constraints.serialize( + self.alpha_constraint + ), + "shared_axes": self.shared_axes, + } + ) + return config + + def compute_output_shape(self, input_shape): + return input_shape diff --git a/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/activations/relu.py b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/activations/relu.py new file mode 100644 index 0000000000000000000000000000000000000000..53a120f852c56643ca15f67aca6ff95a308ce3a9 --- /dev/null +++ b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/activations/relu.py @@ -0,0 +1,86 @@ +from keras.src import activations +from keras.src.api_export import keras_export +from keras.src.layers.layer import Layer + + +@keras_export("keras.layers.ReLU") +class ReLU(Layer): + """Rectified Linear Unit activation function layer. + + Formula: + ``` python + f(x) = max(x,0) + f(x) = max_value if x >= max_value + f(x) = x if threshold <= x < max_value + f(x) = negative_slope * (x - threshold) otherwise + ``` + + Example: + ``` python + relu_layer = keras.layers.ReLU( + max_value=10, + negative_slope=0.5, + threshold=0, + ) + input = np.array([-10, -5, 0.0, 5, 10]) + result = relu_layer(input) + # result = [-5. , -2.5, 0. , 5. , 10.] + ``` + + Args: + max_value: Float >= 0. Maximum activation value. None means unlimited. + Defaults to `None`. + negative_slope: Float >= 0. Negative slope coefficient. + Defaults to `0.0`. + threshold: Float >= 0. Threshold value for thresholded activation. + Defaults to `0.0`. + **kwargs: Base layer keyword arguments, such as `name` and `dtype`. + """ + + def __init__( + self, max_value=None, negative_slope=0.0, threshold=0.0, **kwargs + ): + super().__init__(**kwargs) + if max_value is not None and max_value < 0.0: + raise ValueError( + "max_value of a ReLU layer cannot be a negative " + f"value. Received: max_value={max_value}" + ) + if negative_slope is None or negative_slope < 0.0: + raise ValueError( + "negative_slope of a ReLU layer cannot be a negative " + f"value. Received: negative_slope={negative_slope}" + ) + if threshold is None or threshold < 0.0: + raise ValueError( + "threshold of a ReLU layer cannot be a negative " + f"value. Received: threshold={threshold}" + ) + + self.max_value = max_value + self.negative_slope = negative_slope + self.threshold = threshold + self.supports_masking = True + self.built = True + + def call(self, inputs): + return activations.relu( + inputs, + negative_slope=self.negative_slope, + max_value=self.max_value, + threshold=self.threshold, + ) + + def get_config(self): + config = super().get_config() + config.update( + { + "max_value": self.max_value, + "negative_slope": self.negative_slope, + "threshold": self.threshold, + } + ) + return config + + def compute_output_shape(self, input_shape): + return input_shape diff --git a/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/activations/softmax.py b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/activations/softmax.py new file mode 100644 index 0000000000000000000000000000000000000000..195b47e2b209b4998a119d3ae59b65dc139fe6ee --- /dev/null +++ b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/activations/softmax.py @@ -0,0 +1,76 @@ +from keras.src import activations +from keras.src import backend +from keras.src.api_export import keras_export +from keras.src.layers.layer import Layer + + +def _large_negative_number(dtype): + """Return a Large negative number based on dtype.""" + if backend.standardize_dtype(dtype) == "float16": + return -3e4 + return -1e9 + + +@keras_export("keras.layers.Softmax") +class Softmax(Layer): + """Softmax activation layer. + + Formula: + ``` python + exp_x = exp(x - max(x)) + f(x) = exp_x / sum(exp_x) + ``` + + Example: + >>> softmax_layer = keras.layers.Softmax() + >>> input = np.array([1.0, 2.0, 1.0]) + >>> result = softmax_layer(input) + >>> result + [0.21194157, 0.5761169, 0.21194157] + + + Args: + axis: Integer, or list of Integers, axis along which the softmax + normalization is applied. + **kwargs: Base layer keyword arguments, such as `name` and `dtype`. + + Call arguments: + inputs: The inputs (logits) to the softmax layer. + mask: A boolean mask of the same shape as `inputs`. The mask + specifies 1 to keep and 0 to mask. Defaults to `None`. + + Returns: + Softmaxed output with the same shape as `inputs`. + """ + + def __init__(self, axis=-1, **kwargs): + super().__init__(**kwargs) + self.axis = axis + self.supports_masking = True + self.built = True + + def call(self, inputs, mask=None): + if mask is not None: + adder = ( + 1.0 - backend.cast(mask, inputs.dtype) + ) * _large_negative_number(inputs.dtype) + inputs += adder + if isinstance(self.axis, (tuple, list)): + if len(self.axis) > 1: + return backend.numpy.exp( + inputs + - backend.math.logsumexp( + inputs, axis=self.axis, keepdims=True + ) + ) + else: + return activations.softmax(inputs, axis=self.axis[0]) + return activations.softmax(inputs, axis=self.axis) + + def get_config(self): + config = super().get_config() + config.update({"axis": self.axis}) + return config + + def compute_output_shape(self, input_shape): + return input_shape diff --git a/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/attention/__init__.py b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/attention/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/attention/__pycache__/__init__.cpython-310.pyc b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/attention/__pycache__/__init__.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..9fda279acfb7a705a0be193d0fdd03d8b7b99af1 Binary files /dev/null and b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/attention/__pycache__/__init__.cpython-310.pyc differ diff --git a/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/attention/__pycache__/additive_attention.cpython-310.pyc b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/attention/__pycache__/additive_attention.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..b61985db216b6dcc2d28a746d0a3e92b664807f7 Binary files /dev/null and b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/attention/__pycache__/additive_attention.cpython-310.pyc differ diff --git a/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/attention/__pycache__/attention.cpython-310.pyc b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/attention/__pycache__/attention.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..d7607d503871461d98ecc21e14c8061f4332cae3 Binary files /dev/null and b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/attention/__pycache__/attention.cpython-310.pyc differ diff --git a/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/attention/__pycache__/grouped_query_attention.cpython-310.pyc b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/attention/__pycache__/grouped_query_attention.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..b7210e6998c1c77c129095b0f002034cc37f8e02 Binary files /dev/null and b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/attention/__pycache__/grouped_query_attention.cpython-310.pyc differ diff --git a/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/attention/__pycache__/multi_head_attention.cpython-310.pyc b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/attention/__pycache__/multi_head_attention.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..10c9f476aa1ab4cc30ba34c159c270a386496f4b Binary files /dev/null and b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/attention/__pycache__/multi_head_attention.cpython-310.pyc differ diff --git a/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/attention/additive_attention.py b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/attention/additive_attention.py new file mode 100644 index 0000000000000000000000000000000000000000..787dd50e71a9f627376cbdcbf6f57d0331a13403 --- /dev/null +++ b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/attention/additive_attention.py @@ -0,0 +1,103 @@ +from keras.src import ops +from keras.src.api_export import keras_export +from keras.src.layers.attention.attention import Attention + + +@keras_export("keras.layers.AdditiveAttention") +class AdditiveAttention(Attention): + """Additive attention layer, a.k.a. Bahdanau-style attention. + + Inputs are a list with 2 or 3 elements: + 1. A `query` tensor of shape `(batch_size, Tq, dim)`. + 2. A `value` tensor of shape `(batch_size, Tv, dim)`. + 3. A optional `key` tensor of shape `(batch_size, Tv, dim)`. If none + supplied, `value` will be used as `key`. + + The calculation follows the steps: + 1. Calculate attention scores using `query` and `key` with shape + `(batch_size, Tq, Tv)` as a non-linear sum + `scores = reduce_sum(tanh(query + key), axis=-1)`. + 2. Use scores to calculate a softmax distribution with shape + `(batch_size, Tq, Tv)`. + 3. Use the softmax distribution to create a linear combination of `value` + with shape `(batch_size, Tq, dim)`. + + Args: + use_scale: If `True`, will create a scalar variable to scale the + attention scores. + dropout: Float between 0 and 1. Fraction of the units to drop for the + attention scores. Defaults to `0.0`. + + Call arguments: + inputs: List of the following tensors: + - `query`: Query tensor of shape `(batch_size, Tq, dim)`. + - `value`: Value tensor of shape `(batch_size, Tv, dim)`. + - `key`: Optional key tensor of shape `(batch_size, Tv, dim)`. If + not given, will use `value` for both `key` and `value`, which is + the most common case. + mask: List of the following tensors: + - `query_mask`: A boolean mask tensor of shape `(batch_size, Tq)`. + If given, the output will be zero at the positions where + `mask==False`. + - `value_mask`: A boolean mask tensor of shape `(batch_size, Tv)`. + If given, will apply the mask such that values at positions + where `mask==False` do not contribute to the result. + return_attention_scores: bool, it `True`, returns the attention scores + (after masking and softmax) as an additional output argument. + training: Python boolean indicating whether the layer should behave in + training mode (adding dropout) or in inference mode (no dropout). + use_causal_mask: Boolean. Set to `True` for decoder self-attention. Adds + a mask such that position `i` cannot attend to positions `j > i`. + This prevents the flow of information from the future towards the + past. Defaults to `False`. + + Output: + Attention outputs of shape `(batch_size, Tq, dim)`. + (Optional) Attention scores after masking and softmax with shape + `(batch_size, Tq, Tv)`. + """ + + def __init__( + self, + use_scale=True, + dropout=0.0, + **kwargs, + ): + super().__init__(use_scale=use_scale, dropout=dropout, **kwargs) + + def build(self, input_shape): + self._validate_inputs(input_shape) + dim = input_shape[0][-1] + self.scale = None + if self.use_scale: + self.scale = self.add_weight( + name="scale", + shape=[dim], + initializer="glorot_uniform", + dtype=self.dtype, + trainable=True, + ) + self.built = True + + def _calculate_scores(self, query, key): + """Calculates attention scores as a nonlinear sum of query and key. + + Args: + query: Query tensor of shape `(batch_size, Tq, dim)`. + key: Key tensor of shape `(batch_size, Tv, dim)`. + + Returns: + Tensor of shape `(batch_size, Tq, Tv)`. + """ + # Reshape tensors to enable broadcasting. + # Reshape into [batch_size, Tq, 1, dim]. + q_reshaped = ops.expand_dims(query, axis=-2) + # Reshape into [batch_size, 1, Tv, dim]. + k_reshaped = ops.expand_dims(key, axis=-3) + scale = self.scale if self.use_scale else 1.0 + return ops.sum(scale * ops.tanh(q_reshaped + k_reshaped), axis=-1) + + def get_config(self): + base_config = super().get_config() + del base_config["score_mode"] + return base_config diff --git a/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/attention/attention.py b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/attention/attention.py new file mode 100644 index 0000000000000000000000000000000000000000..d336781c8b3c400d45f24c8d5539e1208f0ae01c --- /dev/null +++ b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/attention/attention.py @@ -0,0 +1,330 @@ +from keras.src import backend +from keras.src import ops +from keras.src.api_export import keras_export +from keras.src.backend import KerasTensor +from keras.src.layers.layer import Layer + + +@keras_export("keras.layers.Attention") +class Attention(Layer): + """Dot-product attention layer, a.k.a. Luong-style attention. + + Inputs are a list with 2 or 3 elements: + 1. A `query` tensor of shape `(batch_size, Tq, dim)`. + 2. A `value` tensor of shape `(batch_size, Tv, dim)`. + 3. A optional `key` tensor of shape `(batch_size, Tv, dim)`. If none + supplied, `value` will be used as a `key`. + + The calculation follows the steps: + 1. Calculate attention scores using `query` and `key` with shape + `(batch_size, Tq, Tv)`. + 2. Use scores to calculate a softmax distribution with shape + `(batch_size, Tq, Tv)`. + 3. Use the softmax distribution to create a linear combination of `value` + with shape `(batch_size, Tq, dim)`. + + Args: + use_scale: If `True`, will create a scalar variable to scale the + attention scores. + dropout: Float between 0 and 1. Fraction of the units to drop for the + attention scores. Defaults to `0.0`. + seed: A Python integer to use as random seed in case of `dropout`. + score_mode: Function to use to compute attention scores, one of + `{"dot", "concat"}`. `"dot"` refers to the dot product between the + query and key vectors. `"concat"` refers to the hyperbolic tangent + of the concatenation of the `query` and `key` vectors. + + Call arguments: + inputs: List of the following tensors: + - `query`: Query tensor of shape `(batch_size, Tq, dim)`. + - `value`: Value tensor of shape `(batch_size, Tv, dim)`. + - `key`: Optional key tensor of shape `(batch_size, Tv, dim)`. If + not given, will use `value` for both `key` and `value`, which is + the most common case. + mask: List of the following tensors: + - `query_mask`: A boolean mask tensor of shape `(batch_size, Tq)`. + If given, the output will be zero at the positions where + `mask==False`. + - `value_mask`: A boolean mask tensor of shape `(batch_size, Tv)`. + If given, will apply the mask such that values at positions + where `mask==False` do not contribute to the result. + return_attention_scores: bool, it `True`, returns the attention scores + (after masking and softmax) as an additional output argument. + training: Python boolean indicating whether the layer should behave in + training mode (adding dropout) or in inference mode (no dropout). + use_causal_mask: Boolean. Set to `True` for decoder self-attention. Adds + a mask such that position `i` cannot attend to positions `j > i`. + This prevents the flow of information from the future towards the + past. Defaults to `False`. + + Output: + Attention outputs of shape `(batch_size, Tq, dim)`. + (Optional) Attention scores after masking and softmax with shape + `(batch_size, Tq, Tv)`. + """ + + def __init__( + self, + use_scale=False, + score_mode="dot", + dropout=0.0, + seed=None, + **kwargs, + ): + super().__init__(**kwargs) + self.use_scale = use_scale + self.score_mode = score_mode + self.dropout = dropout + if self.dropout > 0: + self.seed_generator = backend.random.SeedGenerator(seed=seed) + + if self.score_mode not in ["dot", "concat"]: + raise ValueError( + "Invalid value for argument score_mode. " + "Expected one of {'dot', 'concat'}. " + f"Received: score_mode={score_mode}" + ) + + self._return_attention_scores = False + + def build(self, input_shape): + self._validate_inputs(input_shape) + self.scale = None + self.concat_score_weight = None + if self.use_scale: + self.scale = self.add_weight( + name="scale", + shape=(), + initializer="ones", + dtype=self.dtype, + trainable=True, + ) + if self.score_mode == "concat": + self.concat_score_weight = self.add_weight( + name="concat_score_weight", + shape=(), + initializer="ones", + dtype=self.dtype, + trainable=True, + ) + self.built = True + + def _calculate_scores(self, query, key): + """Calculates attention scores as a query-key dot product. + + Args: + query: Query tensor of shape `(batch_size, Tq, dim)`. + key: Key tensor of shape `(batch_size, Tv, dim)`. + + Returns: + Tensor of shape `(batch_size, Tq, Tv)`. + """ + if self.score_mode == "dot": + scores = ops.matmul(query, ops.transpose(key, axes=[0, 2, 1])) + if self.scale is not None: + scores *= self.scale + elif self.score_mode == "concat": + # Reshape tensors to enable broadcasting. + # Reshape into [batch_size, Tq, 1, dim]. + q_reshaped = ops.expand_dims(query, axis=-2) + # Reshape into [batch_size, 1, Tv, dim]. + k_reshaped = ops.expand_dims(key, axis=-3) + if self.scale is not None: + scores = self.concat_score_weight * ops.sum( + ops.tanh(self.scale * (q_reshaped + k_reshaped)), axis=-1 + ) + else: + scores = self.concat_score_weight * ops.sum( + ops.tanh(q_reshaped + k_reshaped), axis=-1 + ) + else: + raise ValueError("scores not computed") + + return scores + + def _apply_scores(self, scores, value, scores_mask=None, training=False): + """Applies attention scores to the given value tensor. + + To use this method in your attention layer, follow the steps: + + * Use `query` tensor of shape `(batch_size, Tq)` and `key` tensor of + shape `(batch_size, Tv)` to calculate the attention `scores`. + * Pass `scores` and `value` tensors to this method. The method applies + `scores_mask`, calculates + `attention_distribution = softmax(scores)`, then returns + `matmul(attention_distribution, value). + * Apply `query_mask` and return the result. + + Args: + scores: Scores float tensor of shape `(batch_size, Tq, Tv)`. + value: Value tensor of shape `(batch_size, Tv, dim)`. + scores_mask: A boolean mask tensor of shape `(batch_size, 1, Tv)` + or `(batch_size, Tq, Tv)`. If given, scores at positions where + `scores_mask==False` do not contribute to the result. It must + contain at least one `True` value in each line along the last + dimension. + training: Python boolean indicating whether the layer should behave + in training mode (adding dropout) or in inference mode + (no dropout). + + Returns: + Tensor of shape `(batch_size, Tq, dim)`. + Attention scores after masking and softmax with shape + `(batch_size, Tq, Tv)`. + """ + if scores_mask is not None: + padding_mask = ops.logical_not(scores_mask) + # Bias so padding positions do not contribute to attention + # distribution. Note 65504. is the max float16 value. + max_value = 65504.0 if scores.dtype == "float16" else 1.0e9 + scores -= max_value * ops.cast(padding_mask, dtype=scores.dtype) + + weights = ops.softmax(scores, axis=-1) + if training and self.dropout > 0: + weights = backend.random.dropout( + weights, + self.dropout, + seed=self.seed_generator, + ) + return ops.matmul(weights, value), weights + + def _calculate_score_mask(self, scores, v_mask, use_causal_mask): + if use_causal_mask: + # Creates a lower triangular mask, so position i cannot attend to + # positions j > i. This prevents the flow of information from the + # future into the past. + score_shape = ops.shape(scores) + # causal_mask_shape = [1, Tq, Tv]. + mask_shape = (1, score_shape[-2], score_shape[-1]) + ones_mask = ops.ones(shape=mask_shape, dtype="int32") + row_index = ops.cumsum(ones_mask, axis=-2) + col_index = ops.cumsum(ones_mask, axis=-1) + causal_mask = ops.greater_equal(row_index, col_index) + + if v_mask is not None: + # Mask of shape [batch_size, 1, Tv]. + v_mask = ops.expand_dims(v_mask, axis=-2) + return ops.logical_and(v_mask, causal_mask) + return causal_mask + else: + # If not using causal mask, return the value mask as is, + # or None if the value mask is not provided. + return v_mask + + def call( + self, + inputs, + mask=None, + training=False, + return_attention_scores=False, + use_causal_mask=False, + ): + self._validate_inputs(inputs=inputs, mask=mask) + self._return_attention_scores = return_attention_scores + q = inputs[0] + v = inputs[1] + k = inputs[2] if len(inputs) > 2 else v + q_mask = mask[0] if mask else None + v_mask = mask[1] if mask else None + scores = self._calculate_scores(query=q, key=k) + scores_mask = self._calculate_score_mask( + scores, v_mask, use_causal_mask + ) + attention_output, attention_scores = self._apply_scores( + scores=scores, value=v, scores_mask=scores_mask, training=training + ) + if q_mask is not None: + # Mask of shape [batch_size, Tq, 1]. + q_mask = ops.expand_dims(q_mask, axis=-1) + attention_output *= ops.cast(q_mask, dtype=attention_output.dtype) + if return_attention_scores: + return (attention_output, attention_scores) + else: + return attention_output + + def compute_mask(self, inputs, mask=None): + self._validate_inputs(inputs=inputs, mask=mask) + if mask is None or mask[0] is None: + return None + return ops.convert_to_tensor(mask[0]) + + def compute_output_shape(self, input_shape): + query_shape, value_shape, key_shape = input_shape + if key_shape is None: + key_shape = value_shape + + output_shape = (*query_shape[:-1], value_shape[-1]) + if self._return_attention_scores: + scores_shape = (query_shape[0], query_shape[1], key_shape[1]) + return output_shape, scores_shape + return output_shape + + def compute_output_spec( + self, + inputs, + mask=None, + return_attention_scores=False, + training=None, + use_causal_mask=False, + ): + # Validate and unpack inputs + self._validate_inputs(inputs, mask) + query = inputs[0] + value = inputs[1] + key = inputs[2] if len(inputs) > 2 else value + + # Compute primary output shape + output_shape = self.compute_output_shape( + [query.shape, value.shape, key.shape] + ) + output_spec = KerasTensor(output_shape, dtype=self.compute_dtype) + + # Handle attention scores if requested + if self._return_attention_scores or return_attention_scores: + scores_shape = ( + query.shape[0], + query.shape[1], + key.shape[1], + ) # (batch_size, Tq, Tv) + attention_scores_spec = KerasTensor( + scores_shape, dtype=self.compute_dtype + ) + return (output_spec, attention_scores_spec) + + return output_spec + + def _validate_inputs(self, inputs, mask=None): + """Validates arguments of the call method.""" + class_name = self.__class__.__name__ + if not isinstance(inputs, list): + raise ValueError( + f"{class_name} layer must be called on a list of inputs, " + "namely [query, value] or [query, value, key]. " + f"Received: inputs={inputs}." + ) + if len(inputs) < 2 or len(inputs) > 3: + raise ValueError( + f"{class_name} layer accepts inputs list of length 2 or 3, " + "namely [query, value] or [query, value, key]. " + f"Received length: {len(inputs)}." + ) + if mask is not None: + if not isinstance(mask, list): + raise ValueError( + f"{class_name} layer mask must be a list, " + f"namely [query_mask, value_mask]. Received: mask={mask}." + ) + if len(mask) < 2 or len(mask) > 3: + raise ValueError( + f"{class_name} layer accepts mask list of length 2 or 3. " + f"Received: inputs={inputs}, mask={mask}." + ) + + def get_config(self): + base_config = super().get_config() + config = { + "use_scale": self.use_scale, + "score_mode": self.score_mode, + "dropout": self.dropout, + } + return {**base_config, **config} diff --git a/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/attention/grouped_query_attention.py b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/attention/grouped_query_attention.py new file mode 100644 index 0000000000000000000000000000000000000000..6246964679c330bf198f83e2bedd153478bc7940 --- /dev/null +++ b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/attention/grouped_query_attention.py @@ -0,0 +1,504 @@ +import math + +from keras.src import constraints +from keras.src import initializers +from keras.src import ops +from keras.src import regularizers +from keras.src.api_export import keras_export +from keras.src.backend.config import is_flash_attention_enabled +from keras.src.layers.activations.softmax import Softmax +from keras.src.layers.core.einsum_dense import EinsumDense +from keras.src.layers.layer import Layer +from keras.src.layers.regularization.dropout import Dropout + + +@keras_export("keras.layers.GroupQueryAttention") +class GroupedQueryAttention(Layer): + """Grouped Query Attention layer. + + This is an implementation of grouped-query attention introduced by + [Ainslie et al., 2023](https://arxiv.org/abs/2305.13245). Here + `num_key_value_heads` denotes number of groups, setting + `num_key_value_heads` to 1 is equivalent to multi-query attention, and + when `num_key_value_heads` is equal to `num_query_heads` it is equivalent + to multi-head attention. + + This layer first projects `query`, `key`, and `value` tensors. Then, `key` + and `value` are repeated to match the number of heads of `query`. + + Then, the `query` is scaled and dot-producted with `key` tensors. These are + softmaxed to obtain attention probabilities. The value tensors are then + interpolated by these probabilities and concatenated back to a single + tensor. + + Args: + head_dim: Size of each attention head. + num_query_heads: Number of query attention heads. + num_key_value_heads: Number of key and value attention heads. + dropout: Dropout probability. + use_bias: Boolean, whether the dense layers use bias vectors/matrices. + flash_attention: If `None`, the layer attempts to use flash + attention for faster and more memory-efficient attention + computations when possible. This behavior can be configured using + `keras.config.enable_flash_attention()` or + `keras.config.disable_flash_attention()`. + kernel_initializer: Initializer for dense layer kernels. + bias_initializer: Initializer for dense layer biases. + kernel_regularizer: Regularizer for dense layer kernels. + bias_regularizer: Regularizer for dense layer biases. + activity_regularizer: Regularizer for dense layer activity. + kernel_constraint: Constraint for dense layer kernels. + bias_constraint: Constraint for dense layer kernels. + seed: Optional integer to seed the dropout layer. + + Call arguments: + query: Query tensor of shape `(batch_dim, target_seq_len, feature_dim)`, + where `batch_dim` is batch size, `target_seq_len` is the length of + target sequence, and `feature_dim` is dimension of feature. + value: Value tensor of shape `(batch_dim, source_seq_len, feature_dim)`, + where `batch_dim` is batch size, `source_seq_len` is the length of + source sequence, and `feature_dim` is dimension of feature. + key: Optional key tensor of shape + `(batch_dim, source_seq_len, feature_dim)`. If not given, will use + `value` for both `key` and `value`, which is most common case. + attention_mask: A boolean mask of shape + `(batch_dim, target_seq_len, source_seq_len)`, that prevents + attention to certain positions. The boolean mask specifies which + query elements can attend to which key elements, where 1 indicates + attention and 0 indicates no attention. Broadcasting can happen for + the missing batch dimensions and the head dimension. + return_attention_scores: A boolean to indicate whether the output + should be `(attention_output, attention_scores)` if `True`, or + `attention_output` if `False`. Defaults to `False`. + training: Python boolean indicating whether the layer should behave in + training mode (adding dropout) or in inference mode (no dropout). + Will go with either using the training mode of the parent + layer/model or `False` (inference) if there is no parent layer. + use_causal_mask: A boolean to indicate whether to apply a causal mask to + prevent tokens from attending to future tokens (e.g., used in a + decoder Transformer). + + Returns: + attention_output: Result of the computation, of shape + `(batch_dim, target_seq_len, feature_dim)`, where `target_seq_len` + is for target sequence length and `feature_dim` is the query input + last dim. + attention_scores: (Optional) attention coefficients of shape + `(batch_dim, num_query_heads, target_seq_len, source_seq_len)`. + """ + + def __init__( + self, + head_dim, + num_query_heads, + num_key_value_heads, + dropout=0.0, + use_bias=True, + flash_attention=None, + kernel_initializer="glorot_uniform", + bias_initializer="zeros", + kernel_regularizer=None, + bias_regularizer=None, + activity_regularizer=None, + kernel_constraint=None, + bias_constraint=None, + seed=None, + **kwargs, + ): + super().__init__(**kwargs) + self.supports_masking = True + self.head_dim = head_dim + self.num_query_heads = num_query_heads + self.num_key_value_heads = num_key_value_heads + if num_query_heads % num_key_value_heads != 0: + raise ValueError( + "`num_query_heads` must be divisible" + " by `num_key_value_heads`." + ) + self.num_repeats = num_query_heads // num_key_value_heads + self.dropout = dropout + self.use_bias = use_bias + self._flash_attention = flash_attention or is_flash_attention_enabled() + self.kernel_initializer = initializers.get(kernel_initializer) + self.bias_initializer = initializers.get(bias_initializer) + self.kernel_regularizer = regularizers.get(kernel_regularizer) + self.bias_regularizer = regularizers.get(bias_regularizer) + self.activity_regularizer = regularizers.get(activity_regularizer) + self.kernel_constraint = constraints.get(kernel_constraint) + self.bias_constraint = constraints.get(bias_constraint) + self.seed = seed + + self._inverse_sqrt_head_dim = 1.0 / math.sqrt(float(self.head_dim)) + self._return_attention_scores = False + + # Check for flash attention constraints + if self._flash_attention and self.dropout > 0.0: + raise ValueError( + "Dropout is not supported when flash attention is enabled. " + "Please set dropout to 0.0 to use flash attention." + ) + + def build( + self, + query_shape, + value_shape, + key_shape=None, + ): + # Einsum variables: + # b = batch size + # q = query length + # k = key/value length + # m = model dim + # u = num query heads + # v = num key/value heads + # h = head dim + key_shape = value_shape if key_shape is None else key_shape + self.feature_dim = query_shape[-1] + self._query_dense = EinsumDense( + "bqm,muh->bquh", + output_shape=(None, self.num_query_heads, self.head_dim), + bias_axes="uh" if self.use_bias else None, + name="query", + **self._get_common_kwargs_for_sublayer(), + ) + self._query_dense.build(query_shape) + + self._key_dense = EinsumDense( + "bkm,mvh->bkvh", + output_shape=(None, self.num_key_value_heads, self.head_dim), + bias_axes="vh" if self.use_bias else None, + name="key", + **self._get_common_kwargs_for_sublayer(), + ) + self._key_dense.build(key_shape) + + self._value_dense = EinsumDense( + "bkm,mvh->bkvh", + output_shape=(None, self.num_key_value_heads, self.head_dim), + bias_axes="vh" if self.use_bias else None, + name="value", + **self._get_common_kwargs_for_sublayer(), + ) + self._value_dense.build(value_shape) + + self._softmax = Softmax(axis=-1, dtype=self.dtype_policy) + self._dropout_layer = Dropout( + rate=self.dropout, dtype=self.dtype_policy, seed=self.seed + ) + + self._dot_product_equation = "bquh,bkuh->buqk" + self._combine_equation = "buqk,bkuh->bquh" + + self._output_dense = EinsumDense( + "bquh,uhm->bqm", + output_shape=(None, self.feature_dim), + bias_axes="m" if self.use_bias else None, + name="attention_output", + **self._get_common_kwargs_for_sublayer(), + ) + self._output_dense.build( + (None, None, self.num_query_heads, self.head_dim) + ) + self.built = True + + def _get_common_kwargs_for_sublayer(self): + common_kwargs = dict( + kernel_regularizer=self.kernel_regularizer, + bias_regularizer=self.bias_regularizer, + activity_regularizer=self.activity_regularizer, + kernel_constraint=self.kernel_constraint, + bias_constraint=self.bias_constraint, + dtype=self.dtype_policy, + ) + # Create new clone of kernel/bias initializer, so that we don't reuse + # the initializer instance, which could lead to same init value since + # initializer is stateless. + kernel_initializer = self.kernel_initializer.__class__.from_config( + self.kernel_initializer.get_config() + ) + bias_initializer = self.bias_initializer.__class__.from_config( + self.bias_initializer.get_config() + ) + common_kwargs["kernel_initializer"] = kernel_initializer + common_kwargs["bias_initializer"] = bias_initializer + return common_kwargs + + def call( + self, + query, + value, + key=None, + query_mask=None, + value_mask=None, + key_mask=None, + attention_mask=None, + return_attention_scores=False, + training=None, + use_causal_mask=False, + ): + self._return_attention_scores = return_attention_scores + if key is None: + key = value + + attention_mask = self._compute_attention_mask( + query, + value, + query_mask=query_mask, + value_mask=value_mask, + key_mask=key_mask, + attention_mask=attention_mask, + use_causal_mask=use_causal_mask, + ) + + query = self._query_dense(query) + key = self._key_dense(key) + value = self._value_dense(value) + + key = ops.repeat( + key, self.num_repeats, axis=2 + ) # (batch_dim, source_seq_len, query_heads, head_dim) + value = ops.repeat( + value, self.num_repeats, axis=2 + ) # (batch_dim, source_seq_len, query_heads, head_dim) + + output, scores = self._compute_attention( + query, + key, + value, + attention_mask=attention_mask, + training=training, + ) + + output = self._output_dense( + output + ) # (batch_dim, target_seq_len, feature_dim) + + if return_attention_scores: + return output, scores + return output + + def _compute_attention_mask( + self, + query, + value, + query_mask=None, + value_mask=None, + key_mask=None, + attention_mask=None, + use_causal_mask=False, + ): + """Computes the attention mask, using the Keras masks of the inputs. + + * The `query`'s mask is reshaped from [B, T] to [B, T, 1]. + * The `value`'s mask is reshaped from [B, S] to [B, 1, S]. + * The `key`'s mask is reshaped from [B, S] to [B, 1, S]. The `key`'s + mask is ignored if `key` is `None` or if `key is value`. + * If `use_causal_mask=True`, then the causal mask is computed. Its shape + is [1, T, S]. + + All defined masks are merged using a logical AND operation (`&`). + + In general, if the `query` and `value` are masked, then there is no need + to define the `attention_mask`. + + Args: + query: Projected query tensor of shape `(B, T, N, key_dim)`. + key: Projected key tensor of shape `(B, T, N, key_dim)`. + value: Projected value tensor of shape `(B, T, N, value_dim)`. + attention_mask: a boolean mask of shape `(B, T, S)`, that prevents + attention to certain positions. + use_causal_mask: A boolean to indicate whether to apply a causal + mask to prevent tokens from attending to future tokens (e.g., + used in a decoder Transformer). + + Returns: + attention_mask: a boolean mask of shape `(B, T, S)`, that prevents + attention to certain positions, based on the Keras masks of the + `query`, `key`, `value`, and `attention_mask` tensors, and the + causal mask if `use_causal_mask=True`. + """ + auto_mask = None + if query_mask is not None: + query_mask = ops.cast(query_mask, "bool") # defensive casting + # B = batch size, T = max query length + auto_mask = ops.expand_dims(query_mask, -1) # shape is [B, T, 1] + if value_mask is not None: + value_mask = ops.cast(value_mask, "bool") # defensive casting + # B = batch size, S == max value length + mask = ops.expand_dims(value_mask, -2) # shape is [B, 1, S] + auto_mask = mask if auto_mask is None else auto_mask & mask + if key_mask is not None: + key_mask = ops.cast(key_mask, "bool") # defensive casting + # B == batch size, S == max key length == max value length + mask = ops.expand_dims(key_mask, -2) # shape is [B, 1, S] + auto_mask = mask if auto_mask is None else auto_mask & mask + if use_causal_mask: + # the shape of the causal mask is [1, T, S] + mask = self._compute_causal_mask(query, value) + auto_mask = mask if auto_mask is None else auto_mask & mask + if auto_mask is not None: + # merge attention_mask & automatic mask, to shape [B, T, S] + attention_mask = ( + auto_mask + if attention_mask is None + else ops.cast(attention_mask, bool) & auto_mask + ) + return attention_mask + + def _compute_causal_mask(self, query, value=None): + """Computes a causal mask (e.g., for masked self-attention layers). + + For example, if query and value both contain sequences of length 4, + this function returns a boolean tensor equal to: + + ``` + [[[True, False, False, False], + [True, True, False, False], + [True, True, True, False], + [True, True, True, True]]] + ``` + + Args: + query: query tensor of shape `(B, T, ...)`. + value: value tensor of shape `(B, S, ...)` (optional, defaults to + query). + + Returns: + mask: a boolean tensor of shape `(1, T, S)` containing a lower + triangular matrix of shape `(T, S)`. + """ + q_seq_length = ops.shape(query)[1] + v_seq_length = q_seq_length if value is None else ops.shape(value)[1] + ones_mask = ops.ones((1, q_seq_length, v_seq_length), dtype="int32") + row_index = ops.cumsum(ones_mask, axis=-2) + col_index = ops.cumsum(ones_mask, axis=-1) + return ops.greater_equal(row_index, col_index) + + def _compute_attention( + self, query, key, value, attention_mask=None, training=None + ): + # Check for flash attention constraints + if self._flash_attention and self._return_attention_scores: + raise ValueError( + "Returning attention scores is not supported when flash " + "attention is enabled. Please disable flash attention to access" + " attention scores." + ) + + # Determine whether to use dot-product attention + use_dot_product_attention = not ( + self.dropout > 0.0 + or self._return_attention_scores + or (len(query.shape) != 4) + ) + + if use_dot_product_attention: + if attention_mask is not None: + # Ensure attention_mask has the correct shape for broadcasting + # Expected shape: [batch_size, num_heads, query_seq_len, + # key_seq_len]. + mask_expansion_axis = -1 * 2 - 1 + len_attention_scores_shape = 4 # Only accepts 4D inputs + for _ in range( + len_attention_scores_shape - len(attention_mask.shape) + ): + attention_mask = ops.expand_dims( + attention_mask, axis=mask_expansion_axis + ) + attention_mask = ops.cast(attention_mask, dtype="bool") + # Directly compute the attention output using dot-product attention + attention_output = ops.dot_product_attention( + query=query, + key=key, + value=value, + bias=None, + mask=attention_mask, + scale=self._inverse_sqrt_head_dim, + is_causal=False, + flash_attention=self._flash_attention, + ) + return attention_output, None + + # Default behavior without flash attention, with explicit attention + # scores + query = ops.multiply( + query, ops.cast(self._inverse_sqrt_head_dim, query.dtype) + ) + # Take the dot product between "query" and "key" to get the raw + # attention scores. + scores = ops.einsum( + self._dot_product_equation, query, key + ) # (batch_dim, query_heads, target_seq_len, source_seq_len) + scores = self._masked_softmax(scores, attention_mask=attention_mask) + # This is actually dropping out entire tokens to attend to, which might + # seem a bit unusual, but is taken from the original Transformer paper. + if self.dropout > 0.0: + scores_dropout = self._dropout_layer(scores, training=training) + else: + scores_dropout = scores + output = ops.einsum(self._combine_equation, scores_dropout, value) + return output, scores + + def _masked_softmax(self, scores, attention_mask=None): + # Normalize the attention scores to probabilities. + # scores = [B, N, T, S] + if attention_mask is not None: + # The expand dim happens starting from the `num_heads` dimension, + # (, num_heads, ) + mask_expansion_axis = -1 * 2 - 1 + for _ in range(len(scores.shape) - len(attention_mask.shape)): + attention_mask = ops.expand_dims( + attention_mask, axis=mask_expansion_axis + ) + return self._softmax(scores, mask=attention_mask) + + def compute_output_shape( + self, + query_shape, + value_shape, + key_shape=None, + ): + if key_shape is None: + key_shape = value_shape + + if query_shape[-1] != value_shape[-1]: + raise ValueError( + "The last dimension of `query_shape` and `value_shape` " + f"must be equal, but are {query_shape[-1]}, {value_shape[-1]}. " + "Received: query_shape={query_shape}, value_shape={value_shape}" + ) + + if value_shape[1:-1] != key_shape[1:-1]: + raise ValueError( + "All dimensions of `value` and `key`, except the last one, " + f"must be equal. Received: value_shape={value_shape} and " + f"key_shape={key_shape}" + ) + + return query_shape + + def get_config(self): + config = { + "head_dim": self.head_dim, + "num_query_heads": self.num_query_heads, + "num_key_value_heads": self.num_key_value_heads, + "use_bias": self.use_bias, + "dropout": self.dropout, + "kernel_initializer": initializers.serialize( + self.kernel_initializer + ), + "bias_initializer": initializers.serialize(self.bias_initializer), + "kernel_regularizer": regularizers.serialize( + self.kernel_regularizer + ), + "bias_regularizer": regularizers.serialize(self.bias_regularizer), + "activity_regularizer": regularizers.serialize( + self.activity_regularizer + ), + "kernel_constraint": constraints.serialize(self.kernel_constraint), + "bias_constraint": constraints.serialize(self.bias_constraint), + "seed": self.seed, + } + base_config = super().get_config() + return {**base_config, **config} diff --git a/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/attention/multi_head_attention.py b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/attention/multi_head_attention.py new file mode 100644 index 0000000000000000000000000000000000000000..ad4d55d3a14ba9fedf97d0a2104b8ad1cabbe763 --- /dev/null +++ b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/attention/multi_head_attention.py @@ -0,0 +1,827 @@ +import math +import string + +import numpy as np + +from keras.src import backend +from keras.src import constraints +from keras.src import initializers +from keras.src import ops +from keras.src import regularizers +from keras.src.api_export import keras_export +from keras.src.backend.config import is_flash_attention_enabled +from keras.src.layers.activations.softmax import Softmax +from keras.src.layers.core.einsum_dense import EinsumDense +from keras.src.layers.layer import Layer +from keras.src.layers.regularization.dropout import Dropout + + +@keras_export("keras.layers.MultiHeadAttention") +class MultiHeadAttention(Layer): + """MultiHeadAttention layer. + + This is an implementation of multi-headed attention as described in the + paper "Attention is all you Need" + [Vaswani et al., 2017](https://arxiv.org/abs/1706.03762). + If `query`, `key,` `value` are the same, then + this is self-attention. Each timestep in `query` attends to the + corresponding sequence in `key`, and returns a fixed-width vector. + + This layer first projects `query`, `key` and `value`. These are + (effectively) a list of tensors of length `num_attention_heads`, where the + corresponding shapes are `(batch_size, , key_dim)`, + `(batch_size, , key_dim)`, + `(batch_size, , value_dim)`. + + Then, the query and key tensors are dot-producted and scaled. These are + softmaxed to obtain attention probabilities. The value tensors are then + interpolated by these probabilities, then concatenated back to a single + tensor. + + Finally, the result tensor with the last dimension as `value_dim` can take + a linear projection and return. + + Args: + num_heads: Number of attention heads. + key_dim: Size of each attention head for query and key. + value_dim: Size of each attention head for value. + dropout: Dropout probability. + use_bias: Boolean, whether the dense layers use bias vectors/matrices. + output_shape: The expected shape of an output tensor, besides the batch + and sequence dims. If not specified, projects back to the query + feature dim (the query input's last dimension). + attention_axes: axes over which the attention is applied. `None` means + attention over all axes, but batch, heads, and features. + flash_attention: If `None`, the layer attempts to use flash + attention for faster and more memory-efficient attention + computations when possible. This behavior can be configured using + `keras.config.enable_flash_attention()` or + `keras.config.disable_flash_attention()`. + kernel_initializer: Initializer for dense layer kernels. + bias_initializer: Initializer for dense layer biases. + kernel_regularizer: Regularizer for dense layer kernels. + bias_regularizer: Regularizer for dense layer biases. + activity_regularizer: Regularizer for dense layer activity. + kernel_constraint: Constraint for dense layer kernels. + bias_constraint: Constraint for dense layer kernels. + seed: Optional integer to seed the dropout layer. + + Call arguments: + query: Query tensor of shape `(B, T, dim)`, where `B` is the batch size, + `T` is the target sequence length, and dim is the feature dimension. + value: Value tensor of shape `(B, S, dim)`, where `B` is the batch size, + `S` is the source sequence length, and dim is the feature dimension. + key: Optional key tensor of shape `(B, S, dim)`. If not given, will + use `value` for both `key` and `value`, which is the most common + case. + attention_mask: a boolean mask of shape `(B, T, S)`, that prevents + attention to certain positions. The boolean mask specifies which + query elements can attend to which key elements, 1 indicates + attention and 0 indicates no attention. Broadcasting can happen for + the missing batch dimensions and the head dimension. + return_attention_scores: A boolean to indicate whether the output should + be `(attention_output, attention_scores)` if `True`, or + `attention_output` if `False`. Defaults to `False`. + training: Python boolean indicating whether the layer should behave in + training mode (adding dropout) or in inference mode (no dropout). + Will go with either using the training mode of the parent + layer/model, or `False` (inference) if there is no parent layer. + use_causal_mask: A boolean to indicate whether to apply a causal mask to + prevent tokens from attending to future tokens (e.g., used in a + decoder Transformer). + + Returns: + attention_output: The result of the computation, of shape `(B, T, E)`, + where `T` is for target sequence shapes and `E` is the query input + last dimension if `output_shape` is `None`. Otherwise, the + multi-head outputs are projected to the shape specified by + `output_shape`. + attention_scores: (Optional) multi-head attention coefficients over + attention axes. + """ + + def __init__( + self, + num_heads, + key_dim, + value_dim=None, + dropout=0.0, + use_bias=True, + output_shape=None, + attention_axes=None, + flash_attention=None, + kernel_initializer="glorot_uniform", + bias_initializer="zeros", + kernel_regularizer=None, + bias_regularizer=None, + activity_regularizer=None, + kernel_constraint=None, + bias_constraint=None, + seed=None, + **kwargs, + ): + super().__init__(**kwargs) + self.supports_masking = True + self._num_heads = num_heads + self._key_dim = key_dim + self._value_dim = value_dim if value_dim else key_dim + self._dropout = dropout + self._use_bias = use_bias + if output_shape: + if isinstance(output_shape, int): + output_shape = (output_shape,) + try: + output_shape = tuple(output_shape) + except: + raise ValueError( + f"Invalid `output_shape`: {output_shape}. When " + "specified, the `output_shape` should be of type tuple, " + "list, or int." + ) + self._output_shape = output_shape + self._flash_attention = flash_attention or is_flash_attention_enabled() + self._kernel_initializer = initializers.get(kernel_initializer) + self._bias_initializer = initializers.get(bias_initializer) + self._kernel_regularizer = regularizers.get(kernel_regularizer) + self._bias_regularizer = regularizers.get(bias_regularizer) + self._activity_regularizer = regularizers.get(activity_regularizer) + self._kernel_constraint = constraints.get(kernel_constraint) + self._bias_constraint = constraints.get(bias_constraint) + if isinstance(attention_axes, int): + attention_axes = (attention_axes,) + elif attention_axes and not isinstance(attention_axes, (list, tuple)): + raise ValueError( + "`attention_axes` must be an int, list, or tuple." + f"Received: attention_axes={attention_axes}" + ) + self._attention_axes = attention_axes + self.seed = seed + + self._inverse_sqrt_key_dim = 1.0 / math.sqrt(float(self._key_dim)) + self._return_attention_scores = False + + # Check for flash attention constraints + if self._flash_attention and self._dropout > 0.0: + raise ValueError( + "Dropout is not supported when flash attention is enabled. " + "Please set dropout to 0.0 to use flash attention." + ) + + @property + def num_heads(self): + return self._num_heads + + @property + def key_dim(self): + return self._key_dim + + @property + def value_dim(self): + return self._value_dim + + @property + def dropout(self): + return self._dropout + + @property + def use_bias(self): + return self._use_bias + + # Avoid exposing `output_shape` as it may conflict with `Functional` and + # `Sequential` models when calling `summary()`. + + @property + def attention_axes(self): + return self._attention_axes + + def get_config(self): + base_config = super().get_config() + config = { + "num_heads": self._num_heads, + "key_dim": self._key_dim, + "value_dim": self._value_dim, + "dropout": self._dropout, + "use_bias": self._use_bias, + "output_shape": self._output_shape, + "attention_axes": self._attention_axes, + "kernel_initializer": initializers.serialize( + self._kernel_initializer + ), + "bias_initializer": initializers.serialize(self._bias_initializer), + "kernel_regularizer": regularizers.serialize( + self._kernel_regularizer + ), + "bias_regularizer": regularizers.serialize(self._bias_regularizer), + "activity_regularizer": regularizers.serialize( + self._activity_regularizer + ), + "kernel_constraint": constraints.serialize(self._kernel_constraint), + "bias_constraint": constraints.serialize(self._bias_constraint), + "seed": self.seed, + } + return {**base_config, **config} + + def build( + self, + query_shape, + value_shape, + key_shape=None, + ): + """Builds layers and variables. + + Args: + query_shape: Shape of the `query` tensor. + value_shape: Shape of the `value` tensor. + key: Optional shape of the `key` tensor. + """ + key_shape = value_shape if key_shape is None else key_shape + + if value_shape[1:-1] != key_shape[1:-1]: + raise ValueError( + "All dimensions of `value` and `key`, except the last one, " + f"must be equal. Received: value_shape={value_shape} and " + f"key_shape={key_shape}" + ) + + query_rank = len(query_shape) + value_rank = len(value_shape) + key_rank = len(key_shape) + einsum_equation, bias_axes, output_rank = _build_proj_equation( + query_rank - 1, bound_dims=1, output_dims=2 + ) + self._query_dense = EinsumDense( + einsum_equation, + output_shape=_get_output_shape( + output_rank - 1, [self._num_heads, self._key_dim] + ), + bias_axes=bias_axes if self._use_bias else None, + name="query", + **self._get_common_kwargs_for_sublayer(), + ) + self._query_dense.build(query_shape) + einsum_equation, bias_axes, output_rank = _build_proj_equation( + key_rank - 1, bound_dims=1, output_dims=2 + ) + self._key_dense = EinsumDense( + einsum_equation, + output_shape=_get_output_shape( + output_rank - 1, [self._num_heads, self._key_dim] + ), + bias_axes=bias_axes if self._use_bias else None, + name="key", + **self._get_common_kwargs_for_sublayer(), + ) + self._key_dense.build(key_shape) + einsum_equation, bias_axes, output_rank = _build_proj_equation( + value_rank - 1, bound_dims=1, output_dims=2 + ) + self._value_dense = EinsumDense( + einsum_equation, + output_shape=_get_output_shape( + output_rank - 1, [self._num_heads, self._value_dim] + ), + bias_axes=bias_axes if self._use_bias else None, + name="value", + **self._get_common_kwargs_for_sublayer(), + ) + self._value_dense.build(value_shape) + + # Builds the attention computations for multi-head dot product + # attention. These computations could be wrapped into the keras + # attention layer once it supports multi-head einsum computations. + self._build_attention(output_rank) + self._output_dense = self._make_output_dense( + query_shape, + self._get_common_kwargs_for_sublayer(), + "attention_output", + ) + output_dense_input_shape = list( + self._query_dense.compute_output_shape(query_shape) + ) + output_dense_input_shape[-1] = self._value_dim + self._output_dense.build(tuple(output_dense_input_shape)) + self.built = True + + @property + def query_dense(self): + return self._query_dense + + @property + def key_dense(self): + return self._key_dense + + @property + def value_dense(self): + return self._value_dense + + @property + def output_dense(self): + return self._output_dense + + def _get_common_kwargs_for_sublayer(self): + common_kwargs = dict( + kernel_regularizer=self._kernel_regularizer, + bias_regularizer=self._bias_regularizer, + activity_regularizer=self._activity_regularizer, + kernel_constraint=self._kernel_constraint, + bias_constraint=self._bias_constraint, + dtype=self.dtype_policy, + ) + # Create new clone of kernel/bias initializer, so that we don't reuse + # the initializer instance, which could lead to same init value since + # initializer is stateless. + kernel_initializer = self._kernel_initializer.__class__.from_config( + self._kernel_initializer.get_config() + ) + bias_initializer = self._bias_initializer.__class__.from_config( + self._bias_initializer.get_config() + ) + common_kwargs["kernel_initializer"] = kernel_initializer + common_kwargs["bias_initializer"] = bias_initializer + return common_kwargs + + def _make_output_dense(self, query_shape, common_kwargs, name=None): + """Builds the output projection matrix. + + Args: + free_dims: Number of free dimensions for einsum equation building. + common_kwargs: Common keyword arguments for einsum layer. + name: Name for the projection layer. + + Returns: + Projection layer. + """ + query_rank = len(query_shape) + if self._output_shape: + output_shape = self._output_shape + else: + output_shape = [query_shape[-1]] + einsum_equation, bias_axes, output_rank = _build_proj_equation( + query_rank - 1, bound_dims=2, output_dims=len(output_shape) + ) + return EinsumDense( + einsum_equation, + output_shape=_get_output_shape(output_rank - 1, output_shape), + bias_axes=bias_axes if self._use_bias else None, + name=name, + **common_kwargs, + ) + + def _build_attention(self, rank): + """Builds multi-head dot-product attention computations. + + This function builds attributes necessary for `_compute_attention` to + customize attention computation to replace the default dot-product + attention. + + Args: + rank: the rank of query, key, value tensors. + """ + if self._attention_axes is None: + self._attention_axes = tuple(range(1, rank - 2)) + else: + self._attention_axes = tuple(self._attention_axes) + ( + self._dot_product_equation, + self._combine_equation, + attn_scores_rank, + ) = _build_attention_equation(rank, attn_axes=self._attention_axes) + norm_axes = tuple( + range( + attn_scores_rank - len(self._attention_axes), attn_scores_rank + ) + ) + self._softmax = Softmax(axis=norm_axes, dtype=self.dtype_policy) + self._dropout_layer = Dropout( + rate=self._dropout, dtype=self.dtype_policy, seed=self.seed + ) + + def _masked_softmax(self, attention_scores, attention_mask=None): + # Normalize the attention scores to probabilities. + # attention_scores = [B, N, T, S] + if attention_mask is not None: + # The expand dim happens starting from the `num_heads` dimension, + # (, num_heads, ) + mask_expansion_axis = -len(self._attention_axes) * 2 - 1 + for _ in range( + len(attention_scores.shape) - len(attention_mask.shape) + ): + attention_mask = ops.expand_dims( + attention_mask, axis=mask_expansion_axis + ) + return self._softmax(attention_scores, mask=attention_mask) + + def _compute_attention( + self, + query, + key, + value, + attention_mask=None, + training=None, + ): + """Applies Dot-product attention with query, key, value tensors. + + This function defines the computation inside `call` with projected + multi-head Q, K, V inputs. Users can override this function for + customized attention implementation. + + Args: + query: Projected query tensor of shape `(B, T, N, key_dim)`. + key: Projected key tensor of shape `(B, S, N, key_dim)`. + value: Projected value tensor of shape `(B, S, N, value_dim)`. + attention_mask: a boolean mask of shape `(B, T, S)`, that prevents + attention to certain positions. It is generally not needed if + the `query` and `value` (and/or `key`) are masked. + training: Python boolean indicating whether the layer should behave + in training mode (adding dropout) or in inference mode (doing + nothing). + + Returns: + attention_output: Multi-headed outputs of attention computation. + attention_scores: Multi-headed attention weights. + """ + # Check for flash attention constraints + if self._flash_attention and self._return_attention_scores: + raise ValueError( + "Returning attention scores is not supported when flash " + "attention is enabled. Please disable flash attention to access" + " attention scores." + ) + + # Determine whether to use dot-product attention + use_dot_product_attention = not ( + self._dropout > 0.0 + or self._return_attention_scores + or (len(query.shape) != 4) + ) + + if use_dot_product_attention: + if attention_mask is not None: + # Ensure attention_mask has the correct shape for broadcasting + # Expected shape: [batch_size, num_heads, query_seq_len, + # key_seq_len]. + mask_expansion_axis = -len(self._attention_axes) * 2 - 1 + len_attention_scores_shape = 4 # Only accepts 4D inputs + for _ in range( + len_attention_scores_shape - len(attention_mask.shape) + ): + attention_mask = ops.expand_dims( + attention_mask, axis=mask_expansion_axis + ) + attention_mask = ops.cast(attention_mask, dtype="bool") + # Directly compute the attention output using dot-product attention + attention_output = ops.dot_product_attention( + query=query, + key=key, + value=value, + bias=None, + mask=attention_mask, + scale=self._inverse_sqrt_key_dim, + is_causal=False, + flash_attention=self._flash_attention, + ) + return attention_output, None + + # Default behavior without flash attention, with explicit attention + # scores + query = ops.multiply( + query, ops.cast(self._inverse_sqrt_key_dim, query.dtype) + ) + + # Take the dot product between "query" and "key" to get the raw + # attention scores. + attention_scores = ops.einsum(self._dot_product_equation, key, query) + + # Apply the mask using the custom masked softmax + attention_scores = self._masked_softmax( + attention_scores, attention_mask + ) + + # Apply dropout to the attention scores if needed + if self._dropout > 0.0: + final_attn_scores = self._dropout_layer( + attention_scores, training=training + ) + else: + final_attn_scores = attention_scores + + # `context_layer` = [B, T, N, H] + attention_output = ops.einsum( + self._combine_equation, final_attn_scores, value + ) + return attention_output, attention_scores + + def call( + self, + query, + value, + key=None, + query_mask=None, + value_mask=None, + key_mask=None, + attention_mask=None, + return_attention_scores=False, + training=None, + use_causal_mask=False, + ): + self._return_attention_scores = return_attention_scores + if key is None: + key = value + + # Delete the masks because the masks are handled at the level of the + # layer + query_mask = backend.get_keras_mask(query) + backend.set_keras_mask(query, None) + backend.set_keras_mask(value, None) + backend.set_keras_mask(key, None) + + attention_mask = self._compute_attention_mask( + query, + value, + query_mask=query_mask, + value_mask=value_mask, + key_mask=key_mask, + attention_mask=attention_mask, + use_causal_mask=use_causal_mask, + ) + # N = `num_attention_heads` + # H = `size_per_head` + + # `query` = [B, T, N, H] + query = self._query_dense(query) + + # `key` = [B, S, N, H] + key = self._key_dense(key) + + # `value` = [B, S, N, H] + value = self._value_dense(value) + attention_output, attention_scores = self._compute_attention( + query, + key, + value, + attention_mask, + training, + ) + attention_output = self._output_dense(attention_output) + + # Set mask on output if needed + if query_mask is not None: + backend.set_keras_mask(attention_output, query_mask) + + if return_attention_scores: + return attention_output, attention_scores + return attention_output + + def _compute_attention_mask( + self, + query, + value, + query_mask=None, + value_mask=None, + key_mask=None, + attention_mask=None, + use_causal_mask=False, + ): + """Computes the attention mask, using the Keras masks of the inputs. + + * The `query`'s mask is reshaped from [B, T] to [B, T, 1]. + * The `value`'s mask is reshaped from [B, S] to [B, 1, S]. + * The `key`'s mask is reshaped from [B, S] to [B, 1, S]. The `key`'s + mask is ignored if `key` is `None` or if `key is value`. + * If `use_causal_mask=True`, then the causal mask is computed. Its shape + is [1, T, S]. + + All defined masks are merged using a logical AND operation (`&`). + + In general, if the `query` and `value` are masked, then there is no need + to define the `attention_mask`. + + Args: + query: Projected query tensor of shape `(B, T, N, key_dim)`. + key: Projected key tensor of shape `(B, T, N, key_dim)`. + value: Projected value tensor of shape `(B, T, N, value_dim)`. + attention_mask: a boolean mask of shape `(B, T, S)`, that prevents + attention to certain positions. + use_causal_mask: A boolean to indicate whether to apply a causal + mask to prevent tokens from attending to future tokens (e.g., + used in a decoder Transformer). + + Returns: + attention_mask: a boolean mask of shape `(B, T, S)`, that prevents + attention to certain positions, based on the Keras masks of the + `query`, `key`, `value`, and `attention_mask` tensors, and the + causal mask if `use_causal_mask=True`. + """ + auto_mask = None + if query_mask is not None: + query_mask = ops.cast(query_mask, "bool") # defensive casting + # B = batch size, T = max query length + auto_mask = ops.expand_dims(query_mask, -1) # shape is [B, T, 1] + if value_mask is not None: + value_mask = ops.cast(value_mask, "bool") # defensive casting + # B = batch size, S == max value length + mask = ops.expand_dims(value_mask, -2) # shape is [B, 1, S] + auto_mask = mask if auto_mask is None else auto_mask & mask + if key_mask is not None: + key_mask = ops.cast(key_mask, "bool") # defensive casting + # B == batch size, S == max key length == max value length + mask = ops.expand_dims(key_mask, -2) # shape is [B, 1, S] + auto_mask = mask if auto_mask is None else auto_mask & mask + if use_causal_mask: + # the shape of the causal mask is [1, T, S] + mask = self._compute_causal_mask(query, value) + auto_mask = mask if auto_mask is None else auto_mask & mask + + if attention_mask is not None: + attention_mask = ops.cast(attention_mask, "bool") + if auto_mask is not None: + # merge attention_mask & automatic mask, to shape [B, T, S] + attention_mask = ( + auto_mask + if attention_mask is None + else attention_mask & auto_mask + ) + return attention_mask + + def _compute_causal_mask(self, query, value=None): + """Computes a causal mask (e.g., for masked self-attention layers). + + For example, if query and value both contain sequences of length 4, + this function returns a boolean tensor equal to: + + ``` + [[[True, False, False, False], + [True, True, False, False], + [True, True, True, False], + [True, True, True, True]]] + ``` + + Args: + query: query tensor of shape `(B, T, ...)`. + value: value tensor of shape `(B, S, ...)` (optional, defaults to + query). + + Returns: + mask: a boolean tensor of shape `(1, T, S)` containing a lower + triangular matrix of shape `(T, S)`. + """ + q_seq_length = ops.shape(query)[1] + v_seq_length = q_seq_length if value is None else ops.shape(value)[1] + ones_mask = ops.ones((1, q_seq_length, v_seq_length), dtype="int32") + row_index = ops.cumsum(ones_mask, axis=-2) + col_index = ops.cumsum(ones_mask, axis=-1) + return ops.greater_equal(row_index, col_index) + + def compute_output_shape( + self, + query_shape, + value_shape, + key_shape=None, + ): + query_shape = tuple(query_shape) + value_shape = tuple(value_shape) + if key_shape is None: + key_shape = value_shape + else: + key_shape = tuple(key_shape) + + if value_shape[1:-1] != key_shape[1:-1]: + raise ValueError( + "All dimensions of `value` and `key`, except the last one, " + f"must be equal. Received: value_shape={value_shape} and " + f"key_shape={key_shape}" + ) + if self._output_shape: + query_shape = query_shape[:-1] + self._output_shape + return query_shape + + def compute_output_spec( + self, + query, + value, + key=None, + query_mask=None, + value_mask=None, + key_mask=None, + attention_mask=None, + return_attention_scores=False, + training=None, + use_causal_mask=False, + ): + if key is not None: + key_shape = key.shape + else: + key_shape = None + output_shape = self.compute_output_shape( + query.shape, value.shape, key_shape + ) + output_spec = backend.KerasTensor( + output_shape, dtype=self.compute_dtype + ) + if return_attention_scores: + length = query.shape[1] + attention_shape = (query.shape[0], self.num_heads, length, length) + return output_spec, backend.KerasTensor( + attention_shape, dtype=self.compute_dtype + ) + return output_spec + + +def _index_to_einsum_variable(i): + """Converts an index to a einsum variable name. + + We simply map indices to lowercase characters, e.g. 0 -> 'a', 1 -> 'b'. + """ + return string.ascii_lowercase[i] + + +def _build_attention_equation(rank, attn_axes): + """Builds einsum equations for the attention computation. + + Query, key, value inputs after projection are expected to have the shape as: + `(bs, , , num_heads, channels)`. + `bs` and `` are treated as ``. + + The attention operations can be generalized: + 1. Query-key dot product: + (, , num_heads, channels), + (, , num_heads, channels) -> + (, num_heads, , ) + 2. Combination: + (, num_heads, , ), + (, , num_heads, channels) -> (, , num_heads, channels) + + Args: + rank: Rank of query, key, value tensors. + attn_axes: List/tuple of axes, `[-1, rank)`, + that attention will be applied to. + + Returns: + Einsum equations. + """ + target_notation = "" + for i in range(rank): + target_notation += _index_to_einsum_variable(i) + # `batch_dims` includes the head dim. + batch_dims = tuple(np.delete(range(rank), attn_axes + (rank - 1,))) + letter_offset = rank + source_notation = "" + for i in range(rank): + if i in batch_dims or i == rank - 1: + source_notation += target_notation[i] + else: + source_notation += _index_to_einsum_variable(letter_offset) + letter_offset += 1 + + product_notation = "".join( + [target_notation[i] for i in batch_dims] + + [target_notation[i] for i in attn_axes] + + [source_notation[i] for i in attn_axes] + ) + dot_product_equation = "%s,%s->%s" % ( + source_notation, + target_notation, + product_notation, + ) + attn_scores_rank = len(product_notation) + combine_equation = "%s,%s->%s" % ( + product_notation, + source_notation, + target_notation, + ) + return dot_product_equation, combine_equation, attn_scores_rank + + +def _build_proj_equation(free_dims, bound_dims, output_dims): + """Builds an einsum equation for projections inside multi-head attention.""" + input_str = "" + kernel_str = "" + output_str = "" + bias_axes = "" + letter_offset = 0 + for i in range(free_dims): + char = _index_to_einsum_variable(i + letter_offset) + input_str += char + output_str += char + + letter_offset += free_dims + for i in range(bound_dims): + char = _index_to_einsum_variable(i + letter_offset) + input_str += char + kernel_str += char + + letter_offset += bound_dims + for i in range(output_dims): + char = _index_to_einsum_variable(i + letter_offset) + kernel_str += char + output_str += char + bias_axes += char + equation = f"{input_str},{kernel_str}->{output_str}" + + return equation, bias_axes, len(output_str) + + +def _get_output_shape(output_rank, known_last_dims): + return [None] * (output_rank - len(known_last_dims)) + list(known_last_dims) diff --git a/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/__init__.py b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/__pycache__/__init__.cpython-310.pyc b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/__pycache__/__init__.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..971ab45b63f3b60c9362da19bfe84fff65d7f323 Binary files /dev/null and b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/__pycache__/__init__.cpython-310.pyc differ diff --git a/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/__pycache__/base_conv.cpython-310.pyc b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/__pycache__/base_conv.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..b70b6e203a15aaba20054de7bacba1baf88a18f0 Binary files /dev/null and b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/__pycache__/base_conv.cpython-310.pyc differ diff --git a/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/__pycache__/base_conv_transpose.cpython-310.pyc b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/__pycache__/base_conv_transpose.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..9a37e60a0ea2d937a4dfd3d000322b17a003a15f Binary files /dev/null and b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/__pycache__/base_conv_transpose.cpython-310.pyc differ diff --git a/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/__pycache__/base_depthwise_conv.cpython-310.pyc b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/__pycache__/base_depthwise_conv.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..1f49fe4cd357546eb38f493e9afd3325917ca514 Binary files /dev/null and b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/__pycache__/base_depthwise_conv.cpython-310.pyc differ diff --git a/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/__pycache__/base_separable_conv.cpython-310.pyc b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/__pycache__/base_separable_conv.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..ae93c8b26dd447a655775271a3a1d039c844c175 Binary files /dev/null and b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/__pycache__/base_separable_conv.cpython-310.pyc differ diff --git a/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/__pycache__/conv1d.cpython-310.pyc b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/__pycache__/conv1d.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..58846a7815583d558f3953e5b04febc49b1c4d44 Binary files /dev/null and b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/__pycache__/conv1d.cpython-310.pyc differ diff --git a/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/__pycache__/conv1d_transpose.cpython-310.pyc b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/__pycache__/conv1d_transpose.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..e5fff2c08d88caa4f7043614188a25e490ecb63a Binary files /dev/null and b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/__pycache__/conv1d_transpose.cpython-310.pyc differ diff --git a/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/__pycache__/conv2d.cpython-310.pyc b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/__pycache__/conv2d.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..3644c80e9d17b4b735bc227f52ac5812974b5d4c Binary files /dev/null and b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/__pycache__/conv2d.cpython-310.pyc differ diff --git a/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/__pycache__/conv2d_transpose.cpython-310.pyc b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/__pycache__/conv2d_transpose.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..c776308b193fef49bcd1f5d19b47019b97be7da8 Binary files /dev/null and b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/__pycache__/conv2d_transpose.cpython-310.pyc differ diff --git a/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/__pycache__/conv3d.cpython-310.pyc b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/__pycache__/conv3d.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..6f8ab9c58d41f6216f2b1a5dc396f82f950ea5bd Binary files /dev/null and b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/__pycache__/conv3d.cpython-310.pyc differ diff --git a/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/__pycache__/conv3d_transpose.cpython-310.pyc b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/__pycache__/conv3d_transpose.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..7a6651addfffc8d13727f72eeecaac2ad6e7ec38 Binary files /dev/null and b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/__pycache__/conv3d_transpose.cpython-310.pyc differ diff --git a/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/__pycache__/depthwise_conv1d.cpython-310.pyc b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/__pycache__/depthwise_conv1d.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..5ccb3493444ba15d535e1f41ce226e5bc8e3f6d3 Binary files /dev/null and b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/__pycache__/depthwise_conv1d.cpython-310.pyc differ diff --git a/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/__pycache__/depthwise_conv2d.cpython-310.pyc b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/__pycache__/depthwise_conv2d.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..015baaf4772db3f1fdaed20f69cd4357cf8cb15b Binary files /dev/null and b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/__pycache__/depthwise_conv2d.cpython-310.pyc differ diff --git a/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/__pycache__/separable_conv1d.cpython-310.pyc b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/__pycache__/separable_conv1d.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..8ef02fab4741e4494c201d000b5d3318fc28a117 Binary files /dev/null and b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/__pycache__/separable_conv1d.cpython-310.pyc differ diff --git a/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/__pycache__/separable_conv2d.cpython-310.pyc b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/__pycache__/separable_conv2d.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..4b01bbcdf7c92f2eeb3f377714f0fa8015ad2816 Binary files /dev/null and b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/__pycache__/separable_conv2d.cpython-310.pyc differ diff --git a/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/base_conv.py b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/base_conv.py new file mode 100644 index 0000000000000000000000000000000000000000..82b4140ebafd1eb47a7287b145814f330c655b78 --- /dev/null +++ b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/base_conv.py @@ -0,0 +1,401 @@ +"""Keras base class for convolution layers.""" + +from keras.src import activations +from keras.src import constraints +from keras.src import initializers +from keras.src import ops +from keras.src import regularizers +from keras.src.backend import standardize_data_format +from keras.src.layers.input_spec import InputSpec +from keras.src.layers.layer import Layer +from keras.src.ops.operation_utils import compute_conv_output_shape +from keras.src.utils.argument_validation import standardize_padding +from keras.src.utils.argument_validation import standardize_tuple + + +class BaseConv(Layer): + """Abstract N-D convolution layer (private, used as implementation base). + + This layer creates a convolution kernel that is convolved (actually + cross-correlated) with the layer input to produce a tensor of outputs. If + `use_bias` is True (and a `bias_initializer` is provided), a bias vector is + created and added to the outputs. Finally, if `activation` is not `None`, it + is applied to the outputs as well. + + Note: layer attributes cannot be modified after the layer has been called + once (except the `trainable` attribute). + + Args: + rank: int, the rank of the convolution, e.g. 2 for 2D convolution. + filters: int, the dimension of the output space (the number of filters + in the convolution). + kernel_size: int or tuple/list of `rank` integers, specifying the size + of the convolution window. + strides: int or tuple/list of `rank` integers, specifying the stride + length of the convolution. If only one int is specified, the same + stride size will be used for all dimensions. `strides > 1` is + incompatible with `dilation_rate > 1`. + padding: string, either `"valid"` or `"same"` (case-insensitive). + `"valid"` means no padding. `"same"` results in padding evenly to + the left/right or up/down of the input. When `padding="same"` and + `strides=1`, the output has the same size as the input. + data_format: string, either `"channels_last"` or `"channels_first"`. + The ordering of the dimensions in the inputs. `"channels_last"` + corresponds to inputs with shape `(batch, steps, features)` + while `"channels_first"` corresponds to inputs with shape + `(batch, features, steps)`. It defaults to the `image_data_format` + value found in your Keras config file at `~/.keras/keras.json`. + If you never set it, then it will be `"channels_last"`. + dilation_rate: int or tuple/list of `rank` integers, specifying the + dilation rate to use for dilated convolution. If only one int is + specified, the same dilation rate will be used for all dimensions. + groups: A positive int specifying the number of groups in which the + input is split along the channel axis. Each group is convolved + separately with `filters // groups` filters. The output is the + concatenation of all the `groups` results along the channel axis. + Input channels and `filters` must both be divisible by `groups`. + activation: Activation function. If `None`, no activation is applied. + use_bias: bool, if `True`, bias will be added to the output. + kernel_initializer: Initializer for the convolution kernel. If `None`, + the default initializer (`"glorot_uniform"`) will be used. + bias_initializer: Initializer for the bias vector. If `None`, the + default initializer (`"zeros"`) will be used. + kernel_regularizer: Optional regularizer for the convolution kernel. + bias_regularizer: Optional regularizer for the bias vector. + activity_regularizer: Optional regularizer function for the output. + kernel_constraint: Optional projection function to be applied to the + kernel after being updated by an `Optimizer` (e.g. used to implement + norm constraints or value constraints for layer weights). The + function must take as input the unprojected variable and must return + the projected variable (which must have the same shape). Constraints + are not safe to use when doing asynchronous distributed training. + bias_constraint: Optional projection function to be applied to the + bias after being updated by an `Optimizer`. + lora_rank: Optional integer. If set, the layer's forward pass + will implement LoRA (Low-Rank Adaptation) + with the provided rank. LoRA sets the layer's kernel + to non-trainable and replaces it with a delta over the + original kernel, obtained via multiplying two lower-rank + trainable matrices. This can be useful to reduce the + computation cost of fine-tuning large dense layers. + You can also enable LoRA on an existing layer by calling + `layer.enable_lora(rank)`. + """ + + def __init__( + self, + rank, + filters, + kernel_size, + strides=1, + padding="valid", + data_format=None, + dilation_rate=1, + groups=1, + activation=None, + use_bias=True, + kernel_initializer="glorot_uniform", + bias_initializer="zeros", + kernel_regularizer=None, + bias_regularizer=None, + activity_regularizer=None, + kernel_constraint=None, + bias_constraint=None, + lora_rank=None, + **kwargs, + ): + super().__init__(activity_regularizer=activity_regularizer, **kwargs) + self.rank = rank + self.filters = filters + self.groups = groups + self.kernel_size = standardize_tuple(kernel_size, rank, "kernel_size") + self.strides = standardize_tuple(strides, rank, "strides") + self.dilation_rate = standardize_tuple( + dilation_rate, rank, "dilation_rate" + ) + self.padding = standardize_padding(padding, allow_causal=rank == 1) + self.data_format = standardize_data_format(data_format) + self.activation = activations.get(activation) + self.use_bias = use_bias + self.kernel_initializer = initializers.get(kernel_initializer) + self.bias_initializer = initializers.get(bias_initializer) + self.kernel_regularizer = regularizers.get(kernel_regularizer) + self.bias_regularizer = regularizers.get(bias_regularizer) + self.kernel_constraint = constraints.get(kernel_constraint) + self.bias_constraint = constraints.get(bias_constraint) + self.lora_rank = lora_rank + self.lora_enabled = False + self.input_spec = InputSpec(min_ndim=self.rank + 2) + self.data_format = self.data_format + + if self.filters is not None and self.filters <= 0: + raise ValueError( + "Invalid value for argument `filters`. Expected a strictly " + f"positive value. Received filters={self.filters}." + ) + + if self.groups <= 0: + raise ValueError( + "The number of groups must be a positive integer. " + f"Received: groups={self.groups}." + ) + + if self.filters is not None and self.filters % self.groups != 0: + raise ValueError( + "The number of filters must be evenly divisible by the " + f"number of groups. Received: groups={self.groups}, " + f"filters={self.filters}." + ) + + if not all(self.kernel_size): + raise ValueError( + "The argument `kernel_size` cannot contain 0. Received " + f"kernel_size={self.kernel_size}." + ) + + if not all(self.strides): + raise ValueError( + "The argument `strides` cannot contains 0. Received " + f"strides={self.strides}" + ) + + if max(self.strides) > 1 and max(self.dilation_rate) > 1: + raise ValueError( + "`strides > 1` not supported in conjunction with " + f"`dilation_rate > 1`. Received: strides={self.strides} and " + f"dilation_rate={self.dilation_rate}" + ) + + def build(self, input_shape): + if self.data_format == "channels_last": + channel_axis = -1 + input_channel = input_shape[-1] + else: + channel_axis = 1 + input_channel = input_shape[1] + self.input_spec = InputSpec( + min_ndim=self.rank + 2, axes={channel_axis: input_channel} + ) + if input_channel % self.groups != 0: + raise ValueError( + "The number of input channels must be evenly divisible by " + f"the number of groups. Received groups={self.groups}, but the " + f"input has {input_channel} channels (full input shape is " + f"{input_shape})." + ) + kernel_shape = self.kernel_size + ( + input_channel // self.groups, + self.filters, + ) + + # compute_output_shape contains some validation logic for the input + # shape, and make sure the output shape has all positive dimensions. + self.compute_output_shape(input_shape) + + self._kernel = self.add_weight( + name="kernel", + shape=kernel_shape, + initializer=self.kernel_initializer, + regularizer=self.kernel_regularizer, + constraint=self.kernel_constraint, + trainable=True, + dtype=self.dtype, + ) + if self.use_bias: + self.bias = self.add_weight( + name="bias", + shape=(self.filters,), + initializer=self.bias_initializer, + regularizer=self.bias_regularizer, + constraint=self.bias_constraint, + trainable=True, + dtype=self.dtype, + ) + else: + self.bias = None + self.built = True + if self.lora_rank: + self.enable_lora(self.lora_rank) + + @property + def kernel(self): + if not self.built: + raise AttributeError( + "You must build the layer before accessing `kernel`." + ) + if self.lora_enabled: + return self._kernel + ops.matmul( + self.lora_kernel_a, self.lora_kernel_b + ) + return self._kernel + + def convolution_op(self, inputs, kernel): + return ops.conv( + inputs, + kernel, + strides=list(self.strides), + padding=self.padding, + dilation_rate=self.dilation_rate, + data_format=self.data_format, + ) + + def call(self, inputs): + outputs = self.convolution_op( + inputs, + self.kernel, + ) + if self.use_bias: + if self.data_format == "channels_last": + bias_shape = (1,) * (self.rank + 1) + (self.filters,) + else: + bias_shape = (1, self.filters) + (1,) * self.rank + bias = ops.reshape(self.bias, bias_shape) + outputs = ops.add(outputs, bias) + + if self.activation is not None: + return self.activation(outputs) + return outputs + + def compute_output_shape(self, input_shape): + return compute_conv_output_shape( + input_shape, + self.filters, + self.kernel_size, + strides=self.strides, + padding=self.padding, + data_format=self.data_format, + dilation_rate=self.dilation_rate, + ) + + def enable_lora( + self, rank, a_initializer="he_uniform", b_initializer="zeros" + ): + if self.kernel_constraint: + raise ValueError( + "Lora is incompatible with kernel constraints. " + "In order to enable lora on this layer, remove the " + "`kernel_constraint` argument." + ) + if not self.built: + raise ValueError( + "Cannot enable lora on a layer that isn't yet built." + ) + if self.lora_enabled: + raise ValueError( + "lora is already enabled. " + "This can only be done once per layer." + ) + self._tracker.unlock() + self.lora_kernel_a = self.add_weight( + name="lora_kernel_a", + shape=self._kernel.shape[:-1] + (rank,), + initializer=initializers.get(a_initializer), + regularizer=self.kernel_regularizer, + ) + self.lora_kernel_b = self.add_weight( + name="lora_kernel_b", + shape=(rank, self.filters), + initializer=initializers.get(b_initializer), + regularizer=self.kernel_regularizer, + ) + self._kernel.trainable = False + self._tracker.lock() + self.lora_enabled = True + self.lora_rank = rank + + def save_own_variables(self, store): + # Do nothing if the layer isn't yet built + if not self.built: + return + target_variables = [self.kernel] + if self.use_bias: + target_variables.append(self.bias) + for i, variable in enumerate(target_variables): + store[str(i)] = variable + + def load_own_variables(self, store): + if not self.lora_enabled: + self._check_load_own_variables(store) + # Do nothing if the layer isn't yet built + if not self.built: + return + target_variables = [self._kernel] + if self.use_bias: + target_variables.append(self.bias) + for i, variable in enumerate(target_variables): + variable.assign(store[str(i)]) + if self.lora_enabled: + self.lora_kernel_a.assign(ops.zeros(self.lora_kernel_a.shape)) + self.lora_kernel_b.assign(ops.zeros(self.lora_kernel_b.shape)) + + def get_config(self): + config = super().get_config() + config.update( + { + "filters": self.filters, + "kernel_size": self.kernel_size, + "strides": self.strides, + "padding": self.padding, + "data_format": self.data_format, + "dilation_rate": self.dilation_rate, + "groups": self.groups, + "activation": activations.serialize(self.activation), + "use_bias": self.use_bias, + "kernel_initializer": initializers.serialize( + self.kernel_initializer + ), + "bias_initializer": initializers.serialize( + self.bias_initializer + ), + "kernel_regularizer": regularizers.serialize( + self.kernel_regularizer + ), + "bias_regularizer": regularizers.serialize( + self.bias_regularizer + ), + "activity_regularizer": regularizers.serialize( + self.activity_regularizer + ), + "kernel_constraint": constraints.serialize( + self.kernel_constraint + ), + "bias_constraint": constraints.serialize(self.bias_constraint), + } + ) + if self.lora_rank: + config["lora_rank"] = self.lora_rank + return config + + def _check_load_own_variables(self, store): + all_vars = self._trainable_variables + self._non_trainable_variables + if len(store.keys()) != len(all_vars): + if len(all_vars) == 0 and not self.built: + raise ValueError( + f"Layer '{self.name}' was never built " + "and thus it doesn't have any variables. " + f"However the weights file lists {len(store.keys())} " + "variables for this layer.\n" + "In most cases, this error indicates that either:\n\n" + "1. The layer is owned by a parent layer that " + "implements a `build()` method, but calling the " + "parent's `build()` method did NOT create the state of " + f"the child layer '{self.name}'. A `build()` method " + "must create ALL state for the layer, including " + "the state of any children layers.\n\n" + "2. You need to implement " + "the `def build_from_config(self, config)` method " + f"on layer '{self.name}', to specify how to rebuild " + "it during loading. " + "In this case, you might also want to implement the " + "method that generates the build config at saving time, " + "`def get_build_config(self)`. " + "The method `build_from_config()` is meant " + "to create the state " + "of the layer (i.e. its variables) upon deserialization.", + ) + raise ValueError( + f"Layer '{self.name}' expected {len(all_vars)} variables, " + "but received " + f"{len(store.keys())} variables during loading. " + f"Expected: {[v.name for v in all_vars]}" + ) diff --git a/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/base_conv_transpose.py b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/base_conv_transpose.py new file mode 100644 index 0000000000000000000000000000000000000000..e0c1c4a085a95d2355a4de6113ce9adfa66379fb --- /dev/null +++ b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/base_conv_transpose.py @@ -0,0 +1,259 @@ +"""Keras base class for transpose convolution layers.""" + +from keras.src import activations +from keras.src import constraints +from keras.src import initializers +from keras.src import ops +from keras.src import regularizers +from keras.src.backend import standardize_data_format +from keras.src.backend.common.backend_utils import ( + compute_conv_transpose_output_shape, +) +from keras.src.layers.input_spec import InputSpec +from keras.src.layers.layer import Layer +from keras.src.utils.argument_validation import standardize_padding +from keras.src.utils.argument_validation import standardize_tuple + + +class BaseConvTranspose(Layer): + """Abstract N-D transposed convolution layer. + + The need for transposed convolutions generally arises from the desire to use + a transformation going in the opposite direction of a normal convolution, + i.e., from something that has the shape of the output of some convolution to + something that has the shape of its input while maintaining a connectivity + pattern that is compatible with said convolution. + + Args: + rank: int, the rank of the transposed convolution, e.g. 2 for 2D + transposed convolution. + filters: int, the dimension of the output space (the number of filters + in the transposed convolution). + kernel_size: int or tuple/list of `rank` integers, specifying the size + of the transposed convolution window. + strides: int or tuple/list of `rank` integers, specifying the stride + length of the transposed convolution. If only one int is specified, + the same stride size will be used for all dimensions. + `strides > 1` is incompatible with `dilation_rate > 1`. + padding: string, either `"valid"` or `"same"` (case-insensitive). + `"valid"` means no padding. `"same"` results in padding evenly to + the left/right or up/down of the input such that output has the same + height/width dimension as the input. + data_format: string, either `"channels_last"` or `"channels_first"`. + The ordering of the dimensions in the inputs. `"channels_last"` + corresponds to inputs with shape `(batch, steps, features)` + while `"channels_first"` corresponds to inputs with shape + `(batch, features, steps)`. It defaults to the `image_data_format` + value found in your Keras config file at `~/.keras/keras.json`. + If you never set it, then it will be `"channels_last"`. + dilation_rate: int or tuple/list of `rank` integers, specifying the + dilation rate to use for dilated convolution. If only one int is + specified, the same dilation rate will be used for all dimensions. + activation: Activation function. If `None`, no activation is applied. + use_bias: bool, if `True`, bias will be added to the output. + kernel_initializer: Initializer for the convolution kernel. If `None`, + the default initializer (`"glorot_uniform"`) will be used. + bias_initializer: Initializer for the bias vector. If `None`, the + default initializer (`"zeros"`) will be used. + kernel_regularizer: Optional regularizer for the convolution kernel. + bias_regularizer: Optional regularizer for the bias vector. + activity_regularizer: Optional regularizer function for the output. + kernel_constraint: Optional projection function to be applied to the + kernel after being updated by an `Optimizer` (e.g. used to implement + norm constraints or value constraints for layer weights). The + function must take as input the unprojected variable and must return + the projected variable (which must have the same shape). Constraints + are not safe to use when doing asynchronous distributed training. + bias_constraint: Optional projection function to be applied to the + bias after being updated by an `Optimizer`. + """ + + def __init__( + self, + rank, + filters, + kernel_size, + strides=1, + padding="valid", + output_padding=None, + data_format=None, + dilation_rate=1, + activation=None, + use_bias=True, + kernel_initializer="glorot_uniform", + bias_initializer="zeros", + kernel_regularizer=None, + bias_regularizer=None, + activity_regularizer=None, + kernel_constraint=None, + bias_constraint=None, + trainable=True, + name=None, + **kwargs, + ): + super().__init__( + trainable=trainable, + name=name, + activity_regularizer=activity_regularizer, + **kwargs, + ) + self.rank = rank + self.filters = filters + self.kernel_size = standardize_tuple(kernel_size, rank, "kernel_size") + self.strides = standardize_tuple(strides, rank, "strides") + self.dilation_rate = standardize_tuple( + dilation_rate, rank, "dilation_rate" + ) + self.padding = standardize_padding(padding) + if output_padding is None: + self.output_padding = None + else: + self.output_padding = standardize_tuple( + output_padding, + rank, + "output_padding", + ) + self.data_format = standardize_data_format(data_format) + self.activation = activations.get(activation) + self.use_bias = use_bias + self.kernel_initializer = initializers.get(kernel_initializer) + self.bias_initializer = initializers.get(bias_initializer) + self.kernel_regularizer = regularizers.get(kernel_regularizer) + self.bias_regularizer = regularizers.get(bias_regularizer) + self.kernel_constraint = constraints.get(kernel_constraint) + self.bias_constraint = constraints.get(bias_constraint) + self.input_spec = InputSpec(min_ndim=self.rank + 2) + self.data_format = self.data_format + + if self.filters is not None and self.filters <= 0: + raise ValueError( + "Invalid value for argument `filters`. Expected a strictly " + f"positive value. Received filters={self.filters}." + ) + + if not all(self.kernel_size): + raise ValueError( + "The argument `kernel_size` cannot contain 0. Received " + f"kernel_size={self.kernel_size}." + ) + + if not all(self.strides): + raise ValueError( + "The argument `strides` cannot contains 0. Received " + f"strides={self.strides}." + ) + + if max(self.strides) > 1 and max(self.dilation_rate) > 1: + raise ValueError( + "`strides > 1` not supported in conjunction with " + f"`dilation_rate > 1`. Received: strides={self.strides} and " + f"dilation_rate={self.dilation_rate}" + ) + + def build(self, input_shape): + if self.data_format == "channels_last": + channel_axis = -1 + input_channel = input_shape[-1] + else: + channel_axis = 1 + input_channel = input_shape[1] + self.input_spec = InputSpec( + min_ndim=self.rank + 2, axes={channel_axis: input_channel} + ) + kernel_shape = self.kernel_size + ( + self.filters, + input_channel, + ) + + self.kernel = self.add_weight( + name="kernel", + shape=kernel_shape, + initializer=self.kernel_initializer, + regularizer=self.kernel_regularizer, + constraint=self.kernel_constraint, + trainable=True, + dtype=self.dtype, + ) + if self.use_bias: + self.bias = self.add_weight( + name="bias", + shape=(self.filters,), + initializer=self.bias_initializer, + regularizer=self.bias_regularizer, + constraint=self.bias_constraint, + trainable=True, + dtype=self.dtype, + ) + else: + self.bias = None + self.built = True + + def call(self, inputs): + outputs = ops.conv_transpose( + inputs, + self.kernel, + strides=list(self.strides), + padding=self.padding, + output_padding=self.output_padding, + dilation_rate=self.dilation_rate, + data_format=self.data_format, + ) + + if self.use_bias: + if self.data_format == "channels_last": + bias_shape = (1,) * (self.rank + 1) + (self.filters,) + else: + bias_shape = (1, self.filters) + (1,) * self.rank + bias = ops.reshape(self.bias, bias_shape) + outputs = ops.add(outputs, bias) + + if self.activation is not None: + return self.activation(outputs) + return outputs + + def compute_output_shape(self, input_shape): + return compute_conv_transpose_output_shape( + input_shape, + self.kernel_size, + self.filters, + strides=self.strides, + padding=self.padding, + output_padding=self.output_padding, + data_format=self.data_format, + dilation_rate=self.dilation_rate, + ) + + def get_config(self): + config = super().get_config() + config.update( + { + "filters": self.filters, + "kernel_size": self.kernel_size, + "strides": self.strides, + "padding": self.padding, + "data_format": self.data_format, + "dilation_rate": self.dilation_rate, + "activation": activations.serialize(self.activation), + "use_bias": self.use_bias, + "kernel_initializer": initializers.serialize( + self.kernel_initializer + ), + "bias_initializer": initializers.serialize( + self.bias_initializer + ), + "kernel_regularizer": regularizers.serialize( + self.kernel_regularizer + ), + "bias_regularizer": regularizers.serialize( + self.bias_regularizer + ), + "activity_regularizer": regularizers.serialize( + self.activity_regularizer + ), + "kernel_constraint": constraints.serialize( + self.kernel_constraint + ), + "bias_constraint": constraints.serialize(self.bias_constraint), + } + ) + return config diff --git a/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/base_depthwise_conv.py b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/base_depthwise_conv.py new file mode 100644 index 0000000000000000000000000000000000000000..bd49da64a7d7bfd57e7de40314f870bb1828ea7b --- /dev/null +++ b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/base_depthwise_conv.py @@ -0,0 +1,274 @@ +"""Keras base class for depthwise convolution layers.""" + +from keras.src import activations +from keras.src import constraints +from keras.src import initializers +from keras.src import ops +from keras.src import regularizers +from keras.src.backend import standardize_data_format +from keras.src.layers.input_spec import InputSpec +from keras.src.layers.layer import Layer +from keras.src.ops.operation_utils import compute_conv_output_shape +from keras.src.utils.argument_validation import standardize_padding +from keras.src.utils.argument_validation import standardize_tuple + + +class BaseDepthwiseConv(Layer): + """Abstract N-D depthwise convolution layer. + + Depthwise convolution is a type of convolution in which each input channel + is convolved with a different kernel (called a depthwise kernel). You can + understand depthwise convolution as the first step in a depthwise separable + convolution. + + It is implemented via the following steps: + + - Split the input into individual channels. + - Convolve each channel with an individual depthwise kernel with + `depth_multiplier` output channels. + - Concatenate the convolved outputs along the channels axis. + + Unlike a regular convolution, depthwise convolution does not mix information + across different input channels. + + The `depth_multiplier` argument determines how many filter are applied to + one input channel. As such, it controls the amount of output channels that + are generated per input channel in the depthwise step. + + + Args: + rank: int, the rank of the convolution, e.g. 2 for 2D convolution. + depth_multiplier: The number of depthwise convolution output channels + for each input channel. The total number of depthwise convolution + output channels will be equal to `input_channel * depth_multiplier`. + kernel_size: int or tuple/list of `rank` integers, specifying the size + of the depthwise convolution window. + strides: int or tuple/list of `rank` integers, specifying the stride + length of the depthwise convolution. If only one int is specified, + the same stride size will be used for all dimensions. + `strides > 1` is incompatible with `dilation_rate > 1`. + padding: string, either `"valid"` or `"same"` (case-insensitive). + `"valid"` means no padding. `"same"` results in padding evenly to + the left/right or up/down of the input. When `padding="same"` and + `strides=1`, the output has the same size as the input. + data_format: string, either `"channels_last"` or `"channels_first"`. + The ordering of the dimensions in the inputs. `"channels_last"` + corresponds to inputs with shape `(batch, steps, features)` + while `"channels_first"` corresponds to inputs with shape + `(batch, features, steps)`. It defaults to the `image_data_format` + value found in your Keras config file at `~/.keras/keras.json`. + If you never set it, then it will be `"channels_last"`. + dilation_rate: int or tuple/list of `rank` integers, specifying the + dilation rate to use for dilated convolution. If only one int is + specified, the same dilation rate will be used for all dimensions. + activation: Activation function. If `None`, no activation is applied. + use_bias: bool, if `True`, bias will be added to the output. + depthwise_initializer: Initializer for the depthwsie convolution + kernel. If `None`, the default initializer (`"glorot_uniform"`) + will be used. + bias_initializer: Initializer for the bias vector. If `None`, the + default initializer (`"zeros"`) will be used. + depthwise_regularizer: Optional regularizer for the convolution kernel. + bias_regularizer: Optional regularizer for the bias vector. + activity_regularizer: Optional regularizer function for the output. + depthwise_constraint: Optional projection function to be applied to the + kernel after being updated by an `Optimizer` (e.g. used to implement + norm constraints or value constraints for layer weights). The + function must take as input the unprojected variable and must return + the projected variable (which must have the same shape). Constraints + are not safe to use when doing asynchronous distributed training. + bias_constraint: Optional projection function to be applied to the + bias after being updated by an `Optimizer`. + """ + + def __init__( + self, + rank, + depth_multiplier, + kernel_size, + strides=1, + padding="valid", + data_format=None, + dilation_rate=1, + activation=None, + use_bias=True, + depthwise_initializer="glorot_uniform", + bias_initializer="zeros", + depthwise_regularizer=None, + bias_regularizer=None, + activity_regularizer=None, + depthwise_constraint=None, + bias_constraint=None, + trainable=True, + name=None, + **kwargs, + ): + super().__init__( + trainable=trainable, + name=name, + activity_regularizer=regularizers.get(activity_regularizer), + **kwargs, + ) + self.rank = rank + self.depth_multiplier = depth_multiplier + self.kernel_size = standardize_tuple(kernel_size, rank, "kernel_size") + self.strides = standardize_tuple(strides, rank, "strides") + self.dilation_rate = standardize_tuple( + dilation_rate, rank, "dilation_rate" + ) + self.padding = standardize_padding(padding) + self.data_format = standardize_data_format(data_format) + self.activation = activations.get(activation) + self.use_bias = use_bias + self.depthwise_initializer = initializers.get(depthwise_initializer) + self.bias_initializer = initializers.get(bias_initializer) + self.depthwise_regularizer = regularizers.get(depthwise_regularizer) + self.bias_regularizer = regularizers.get(bias_regularizer) + self.depthwise_constraint = constraints.get(depthwise_constraint) + self.bias_constraint = constraints.get(bias_constraint) + self.input_spec = InputSpec(min_ndim=self.rank + 2) + self.data_format = self.data_format + + if self.depth_multiplier is not None and self.depth_multiplier <= 0: + raise ValueError( + "Invalid value for argument `depth_multiplier`. Expected a " + "strictly positive value. Received " + f"depth_multiplier={self.depth_multiplier}." + ) + + if not all(self.kernel_size): + raise ValueError( + "The argument `kernel_size` cannot contain 0. Received " + f"kernel_size={self.kernel_size}." + ) + + if not all(self.strides): + raise ValueError( + "The argument `strides` cannot contains 0. Received " + f"strides={self.strides}" + ) + + if max(self.strides) > 1 and max(self.dilation_rate) > 1: + raise ValueError( + "`strides > 1` not supported in conjunction with " + f"`dilation_rate > 1`. Received: strides={self.strides} and " + f"dilation_rate={self.dilation_rate}" + ) + + def build(self, input_shape): + if self.data_format == "channels_last": + channel_axis = -1 + input_channel = input_shape[-1] + else: + channel_axis = 1 + input_channel = input_shape[1] + self.input_spec = InputSpec( + min_ndim=self.rank + 2, axes={channel_axis: input_channel} + ) + depthwise_shape = self.kernel_size + ( + input_channel, + self.depth_multiplier, + ) + self.kernel = self.add_weight( + name="kernel", + shape=depthwise_shape, + initializer=self.depthwise_initializer, + regularizer=self.depthwise_regularizer, + constraint=self.depthwise_constraint, + trainable=True, + dtype=self.dtype, + ) + if self.use_bias: + self.bias = self.add_weight( + name="bias", + shape=(self.depth_multiplier * input_channel,), + initializer=self.bias_initializer, + regularizer=self.bias_regularizer, + constraint=self.bias_constraint, + trainable=True, + dtype=self.dtype, + ) + else: + self.bias = None + self.built = True + + def _get_input_channel(self, input_shape): + if self.data_format == "channels_last": + input_channel = input_shape[-1] + else: + input_channel = input_shape[1] + return input_channel + + def call(self, inputs): + input_channel = self._get_input_channel(inputs.shape) + outputs = ops.depthwise_conv( + inputs, + self.kernel, + strides=self.strides, + padding=self.padding, + dilation_rate=self.dilation_rate, + data_format=self.data_format, + ) + + if self.use_bias: + if self.data_format == "channels_last": + bias_shape = (1,) * (self.rank + 1) + ( + self.depth_multiplier * input_channel, + ) + else: + bias_shape = (1, self.depth_multiplier * input_channel) + ( + 1, + ) * self.rank + bias = ops.reshape(self.bias, bias_shape) + outputs = ops.add(outputs, bias) + + if self.activation is not None: + return self.activation(outputs) + return outputs + + def compute_output_shape(self, input_shape): + input_channel = self._get_input_channel(input_shape) + return compute_conv_output_shape( + input_shape, + self.depth_multiplier * input_channel, + self.kernel_size, + strides=self.strides, + padding=self.padding, + data_format=self.data_format, + dilation_rate=self.dilation_rate, + ) + + def get_config(self): + config = super().get_config() + config.update( + { + "depth_multiplier": self.depth_multiplier, + "kernel_size": self.kernel_size, + "strides": self.strides, + "padding": self.padding, + "data_format": self.data_format, + "dilation_rate": self.dilation_rate, + "activation": activations.serialize(self.activation), + "use_bias": self.use_bias, + "depthwise_initializer": initializers.serialize( + self.depthwise_initializer + ), + "bias_initializer": initializers.serialize( + self.bias_initializer + ), + "depthwise_regularizer": regularizers.serialize( + self.depthwise_regularizer + ), + "bias_regularizer": regularizers.serialize( + self.bias_regularizer + ), + "activity_regularizer": regularizers.serialize( + self.activity_regularizer + ), + "depthwise_constraint": constraints.serialize( + self.depthwise_constraint + ), + "bias_constraint": constraints.serialize(self.bias_constraint), + } + ) + return config diff --git a/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/base_separable_conv.py b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/base_separable_conv.py new file mode 100644 index 0000000000000000000000000000000000000000..0f4322396d42d68f0b2451b4fdc3da66d0c49883 --- /dev/null +++ b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/base_separable_conv.py @@ -0,0 +1,295 @@ +"""Keras abstract base layer for separable convolution.""" + +from keras.src import activations +from keras.src import constraints +from keras.src import initializers +from keras.src import ops +from keras.src import regularizers +from keras.src.backend import standardize_data_format +from keras.src.layers.input_spec import InputSpec +from keras.src.layers.layer import Layer +from keras.src.ops.operation_utils import compute_conv_output_shape +from keras.src.utils.argument_validation import standardize_padding +from keras.src.utils.argument_validation import standardize_tuple + + +class BaseSeparableConv(Layer): + """Abstract base layer for separable convolution. + + This layer performs a depthwise convolution that acts separately on + channels, followed by a pointwise convolution that mixes channels. If + `use_bias` is True and a bias initializer is provided, it adds a bias vector + to the output. + + Args: + rank: int, the rank of the convolution, e.g. 2 for 2D convolution. + depth_multiplier: The number of depthwise convolution output channels + for each input channel. The total number of depthwise convolution + output channels will be equal to `input_channel * depth_multiplier`. + filters: int, the dimensionality of the output space (i.e. the number + of filters in the pointwise convolution). + kernel_size: int or tuple/list of `rank` integers, specifying the size + of the depthwise convolution window. + strides: int or tuple/list of `rank` integers, specifying the stride + length of the depthwise convolution. If only one int is specified, + the same stride size will be used for all dimensions. + `stride value != 1` is incompatible with `dilation_rate != 1`. + padding: string, either `"valid"` or `"same"` (case-insensitive). + `"valid"` means no padding. `"same"` results in padding evenly to + the left/right or up/down of the input. When `padding="same"` and + `strides=1`, the output has the same size as the input. + data_format: string, either `"channels_last"` or `"channels_first"`. + The ordering of the dimensions in the inputs. `"channels_last"` + corresponds to inputs with shape `(batch, steps, features)` + while `"channels_first"` corresponds to inputs with shape + `(batch, features, steps)`. It defaults to the `image_data_format` + value found in your Keras config file at `~/.keras/keras.json`. + If you never set it, then it will be `"channels_last"`. + dilation_rate: int or tuple/list of `rank` integers, specifying the + dilation rate to use for dilated convolution. If only one int is + specified, the same dilation rate will be used for all dimensions. + activation: Activation function. If `None`, no activation is applied. + use_bias: bool, if `True`, bias will be added to the output. + depthwise_initializer: An initializer for the depthwise convolution + kernel. If None, then the default initializer (`"glorot_uniform"`) + will be used. + pointwise_initializer: An initializer for the pointwise convolution + kernel. If None, then the default initializer (`"glorot_uniform"`) + will be used. + bias_initializer: An initializer for the bias vector. If None, the + default initializer ('"zeros"') will be used. + depthwise_regularizer: Optional regularizer for the depthwise + convolution kernel. + pointwise_regularizer: Optional regularizer for the pointwise + convolution kernel. + bias_regularizer: Optional regularizer for the bias vector. + activity_regularizer: Optional regularizer function for the output. + depthwise_constraint: Optional projection function to be applied to the + depthwise kernel after being updated by an `Optimizer` (e.g. used + for norm constraints or value constraints for layer weights). The + function must take as input the unprojected variable and must return + the projected variable (which must have the same shape). + pointwise_constraint: Optional projection function to be applied to the + pointwise kernel after being updated by an `Optimizer`. + bias_constraint: Optional projection function to be applied to the + bias after being updated by an `Optimizer`. + """ + + def __init__( + self, + rank, + depth_multiplier, + filters, + kernel_size, + strides=1, + padding="valid", + data_format=None, + dilation_rate=1, + activation=None, + use_bias=True, + depthwise_initializer="glorot_uniform", + pointwise_initializer="glorot_uniform", + bias_initializer="zeros", + depthwise_regularizer=None, + pointwise_regularizer=None, + bias_regularizer=None, + activity_regularizer=None, + depthwise_constraint=None, + pointwise_constraint=None, + bias_constraint=None, + trainable=True, + name=None, + **kwargs, + ): + super().__init__( + trainable=trainable, + name=name, + activity_regularizer=regularizers.get(activity_regularizer), + **kwargs, + ) + self.rank = rank + self.depth_multiplier = depth_multiplier + self.filters = filters + self.kernel_size = standardize_tuple(kernel_size, rank, "kernel_size") + self.strides = standardize_tuple(strides, rank, "strides") + self.dilation_rate = standardize_tuple( + dilation_rate, rank, "dilation_rate" + ) + self.padding = standardize_padding(padding) + self.data_format = standardize_data_format(data_format) + self.activation = activations.get(activation) + self.use_bias = use_bias + self.depthwise_initializer = initializers.get(depthwise_initializer) + self.pointwise_initializer = initializers.get(pointwise_initializer) + self.bias_initializer = initializers.get(bias_initializer) + self.depthwise_regularizer = regularizers.get(depthwise_regularizer) + self.pointwise_regularizer = regularizers.get(pointwise_regularizer) + self.bias_regularizer = regularizers.get(bias_regularizer) + self.depthwise_constraint = constraints.get(depthwise_constraint) + self.pointwise_constraint = constraints.get(pointwise_constraint) + self.bias_constraint = constraints.get(bias_constraint) + self.data_format = self.data_format + + self.input_spec = InputSpec(min_ndim=self.rank + 2) + + if self.depth_multiplier is not None and self.depth_multiplier <= 0: + raise ValueError( + "Invalid value for argument `depth_multiplier`. Expected a " + "strictly positive value. Received " + f"depth_multiplier={self.depth_multiplier}." + ) + + if self.filters is not None and self.filters <= 0: + raise ValueError( + "Invalid value for argument `filters`. Expected a strictly " + f"positive value. Received filters={self.filters}." + ) + + if not all(self.kernel_size): + raise ValueError( + "The argument `kernel_size` cannot contain 0. Received: " + f"kernel_size={self.kernel_size}." + ) + + if not all(self.strides): + raise ValueError( + "The argument `strides` cannot contains 0(s). Received: " + f"strides={self.strides}" + ) + + if max(self.strides) > 1 and max(self.dilation_rate) > 1: + raise ValueError( + "`strides > 1` not supported in conjunction with " + f"`dilation_rate > 1`. Received: strides={self.strides} and " + f"dilation_rate={self.dilation_rate}" + ) + + def build(self, input_shape): + if self.data_format == "channels_last": + channel_axis = -1 + input_channel = input_shape[-1] + else: + channel_axis = 1 + input_channel = input_shape[1] + self.input_spec = InputSpec( + min_ndim=self.rank + 2, axes={channel_axis: input_channel} + ) + depthwise_kernel_shape = self.kernel_size + ( + input_channel, + self.depth_multiplier, + ) + pointwise_kernel_shape = (1,) * self.rank + ( + self.depth_multiplier * input_channel, + self.filters, + ) + + self.depthwise_kernel = self.add_weight( + name="depthwise_kernel", + shape=depthwise_kernel_shape, + initializer=self.depthwise_initializer, + regularizer=self.depthwise_regularizer, + constraint=self.depthwise_constraint, + trainable=True, + dtype=self.dtype, + ) + self.pointwise_kernel = self.add_weight( + name="pointwise_kernel", + shape=pointwise_kernel_shape, + initializer=self.pointwise_initializer, + regularizer=self.pointwise_regularizer, + constraint=self.pointwise_constraint, + trainable=True, + dtype=self.dtype, + ) + if self.use_bias: + self.bias = self.add_weight( + name="bias", + shape=(self.filters,), + initializer=self.bias_initializer, + regularizer=self.bias_regularizer, + constraint=self.bias_constraint, + trainable=True, + dtype=self.dtype, + ) + else: + self.bias = None + self.built = True + + def call(self, inputs): + outputs = ops.separable_conv( + inputs, + self.depthwise_kernel, + self.pointwise_kernel, + strides=self.strides, + padding=self.padding, + dilation_rate=self.dilation_rate, + data_format=self.data_format, + ) + + if self.use_bias: + if self.data_format == "channels_last": + bias_shape = (1,) * (self.rank + 1) + (self.filters,) + else: + bias_shape = (1, self.filters) + (1,) * self.rank + bias = ops.reshape(self.bias, bias_shape) + outputs = ops.add(outputs, bias) + + if self.activation is not None: + return self.activation(outputs) + return outputs + + def compute_output_shape(self, input_shape): + return compute_conv_output_shape( + input_shape, + self.filters, + self.kernel_size, + strides=self.strides, + padding=self.padding, + data_format=self.data_format, + dilation_rate=self.dilation_rate, + ) + + def get_config(self): + config = super().get_config() + config.update( + { + "depth_multiplier": self.depth_multiplier, + "filters": self.filters, + "kernel_size": self.kernel_size, + "strides": self.strides, + "padding": self.padding, + "data_format": self.data_format, + "dilation_rate": self.dilation_rate, + "activation": activations.serialize(self.activation), + "use_bias": self.use_bias, + "depthwise_initializer": initializers.serialize( + self.depthwise_initializer + ), + "pointwise_initializer": initializers.serialize( + self.pointwise_initializer + ), + "bias_initializer": initializers.serialize( + self.bias_initializer + ), + "depthwise_regularizer": regularizers.serialize( + self.depthwise_regularizer + ), + "pointwise_regularizer": regularizers.serialize( + self.pointwise_regularizer + ), + "bias_regularizer": regularizers.serialize( + self.bias_regularizer + ), + "activity_regularizer": regularizers.serialize( + self.activity_regularizer + ), + "depthwise_constraint": constraints.serialize( + self.depthwise_constraint + ), + "pointwise_constraint": constraints.serialize( + self.pointwise_constraint + ), + "bias_constraint": constraints.serialize(self.bias_constraint), + } + ) + return config diff --git a/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/conv1d.py b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/conv1d.py new file mode 100644 index 0000000000000000000000000000000000000000..ce1ced8c422bda41fa6d01aa897e79ac662db803 --- /dev/null +++ b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/conv1d.py @@ -0,0 +1,170 @@ +from keras.src import ops +from keras.src.api_export import keras_export +from keras.src.layers.convolutional.base_conv import BaseConv + + +@keras_export(["keras.layers.Conv1D", "keras.layers.Convolution1D"]) +class Conv1D(BaseConv): + """1D convolution layer (e.g. temporal convolution). + + This layer creates a convolution kernel that is convolved with the layer + input over a single spatial (or temporal) dimension to produce a tensor of + outputs. If `use_bias` is True, a bias vector is created and added to the + outputs. Finally, if `activation` is not `None`, it is applied to the + outputs as well. + + Args: + filters: int, the dimension of the output space (the number of filters + in the convolution). + kernel_size: int or tuple/list of 1 integer, specifying the size of the + convolution window. + strides: int or tuple/list of 1 integer, specifying the stride length + of the convolution. `strides > 1` is incompatible with + `dilation_rate > 1`. + padding: string, `"valid"`, `"same"` or `"causal"`(case-insensitive). + `"valid"` means no padding. `"same"` results in padding evenly to + the left/right or up/down of the input. When `padding="same"` and + `strides=1`, the output has the same size as the input. + `"causal"` results in causal(dilated) convolutions, e.g. `output[t]` + does not depend on`input[t+1:]`. Useful when modeling temporal data + where the model should not violate the temporal order. + See [WaveNet: A Generative Model for Raw Audio, section2.1]( + https://arxiv.org/abs/1609.03499). + data_format: string, either `"channels_last"` or `"channels_first"`. + The ordering of the dimensions in the inputs. `"channels_last"` + corresponds to inputs with shape `(batch, steps, features)` + while `"channels_first"` corresponds to inputs with shape + `(batch, features, steps)`. It defaults to the `image_data_format` + value found in your Keras config file at `~/.keras/keras.json`. + If you never set it, then it will be `"channels_last"`. + dilation_rate: int or tuple/list of 1 integers, specifying the dilation + rate to use for dilated convolution. + groups: A positive int specifying the number of groups in which the + input is split along the channel axis. Each group is convolved + separately with `filters // groups` filters. The output is the + concatenation of all the `groups` results along the channel axis. + Input channels and `filters` must both be divisible by `groups`. + activation: Activation function. If `None`, no activation is applied. + use_bias: bool, if `True`, bias will be added to the output. + kernel_initializer: Initializer for the convolution kernel. If `None`, + the default initializer (`"glorot_uniform"`) will be used. + bias_initializer: Initializer for the bias vector. If `None`, the + default initializer (`"zeros"`) will be used. + kernel_regularizer: Optional regularizer for the convolution kernel. + bias_regularizer: Optional regularizer for the bias vector. + activity_regularizer: Optional regularizer function for the output. + kernel_constraint: Optional projection function to be applied to the + kernel after being updated by an `Optimizer` (e.g. used to implement + norm constraints or value constraints for layer weights). The + function must take as input the unprojected variable and must return + the projected variable (which must have the same shape). Constraints + are not safe to use when doing asynchronous distributed training. + bias_constraint: Optional projection function to be applied to the + bias after being updated by an `Optimizer`. + + Input shape: + + - If `data_format="channels_last"`: + A 3D tensor with shape: `(batch_shape, steps, channels)` + - If `data_format="channels_first"`: + A 3D tensor with shape: `(batch_shape, channels, steps)` + + Output shape: + + - If `data_format="channels_last"`: + A 3D tensor with shape: `(batch_shape, new_steps, filters)` + - If `data_format="channels_first"`: + A 3D tensor with shape: `(batch_shape, filters, new_steps)` + + Returns: + A 3D tensor representing `activation(conv1d(inputs, kernel) + bias)`. + + Raises: + ValueError: when both `strides > 1` and `dilation_rate > 1`. + + Example: + + >>> # The inputs are 128-length vectors with 10 timesteps, and the + >>> # batch size is 4. + >>> x = np.random.rand(4, 10, 128) + >>> y = keras.layers.Conv1D(32, 3, activation='relu')(x) + >>> print(y.shape) + (4, 8, 32) + """ + + def __init__( + self, + filters, + kernel_size, + strides=1, + padding="valid", + data_format=None, + dilation_rate=1, + groups=1, + activation=None, + use_bias=True, + kernel_initializer="glorot_uniform", + bias_initializer="zeros", + kernel_regularizer=None, + bias_regularizer=None, + activity_regularizer=None, + kernel_constraint=None, + bias_constraint=None, + **kwargs, + ): + super().__init__( + rank=1, + filters=filters, + kernel_size=kernel_size, + strides=strides, + padding=padding, + data_format=data_format, + dilation_rate=dilation_rate, + groups=groups, + activation=activation, + use_bias=use_bias, + kernel_initializer=kernel_initializer, + bias_initializer=bias_initializer, + kernel_regularizer=kernel_regularizer, + bias_regularizer=bias_regularizer, + activity_regularizer=activity_regularizer, + kernel_constraint=kernel_constraint, + bias_constraint=bias_constraint, + **kwargs, + ) + + def _compute_causal_padding(self): + left_pad = self.dilation_rate[0] * (self.kernel_size[0] - 1) + if self.data_format == "channels_last": + causal_padding = [[0, 0], [left_pad, 0], [0, 0]] + else: + causal_padding = [[0, 0], [0, 0], [left_pad, 0]] + return causal_padding + + def call(self, inputs): + padding = self.padding + if self.padding == "causal": + # Apply causal padding to inputs. + inputs = ops.pad(inputs, self._compute_causal_padding()) + padding = "valid" + + outputs = ops.conv( + inputs, + self.kernel, + strides=list(self.strides), + padding=padding, + dilation_rate=self.dilation_rate, + data_format=self.data_format, + ) + + if self.use_bias: + if self.data_format == "channels_last": + bias_shape = (1,) * (self.rank + 1) + (self.filters,) + else: + bias_shape = (1, self.filters) + (1,) * self.rank + bias = ops.reshape(self.bias, bias_shape) + outputs = ops.add(outputs, bias) + + if self.activation is not None: + return self.activation(outputs) + return outputs diff --git a/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/conv1d_transpose.py b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/conv1d_transpose.py new file mode 100644 index 0000000000000000000000000000000000000000..466f1f19931f174ddccf6beb0d29a34a10857a60 --- /dev/null +++ b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/conv1d_transpose.py @@ -0,0 +1,131 @@ +from keras.src.api_export import keras_export +from keras.src.layers.convolutional.base_conv_transpose import BaseConvTranspose + + +@keras_export( + [ + "keras.layers.Conv1DTranspose", + "keras.layers.Convolution1DTranspose", + ] +) +class Conv1DTranspose(BaseConvTranspose): + """1D transposed convolution layer. + + The need for transposed convolutions generally arise from the desire to use + a transformation going in the opposite direction of a normal convolution, + i.e., from something that has the shape of the output of some convolution + to something that has the shape of its input while maintaining a + connectivity pattern that is compatible with said convolution. + + Args: + filters: int, the dimension of the output space (the number of filters + in the transpose convolution). + kernel_size: int or tuple/list of 1 integer, specifying the size of the + transposed convolution window. + strides: int or tuple/list of 1 integer, specifying the stride length + of the transposed convolution. `strides > 1` is incompatible with + `dilation_rate > 1`. + padding: string, either `"valid"` or `"same"` (case-insensitive). + `"valid"` means no padding. `"same"` results in padding evenly to + the left/right or up/down of the input such that output has the same + height/width dimension as the input. + data_format: string, either `"channels_last"` or `"channels_first"`. + The ordering of the dimensions in the inputs. `"channels_last"` + corresponds to inputs with shape `(batch, steps, features)` + while `"channels_first"` corresponds to inputs with shape + `(batch, features, steps)`. It defaults to the `image_data_format` + value found in your Keras config file at `~/.keras/keras.json`. + If you never set it, then it will be `"channels_last"`. + dilation_rate: int or tuple/list of 1 integers, specifying the dilation + rate to use for dilated transposed convolution. + activation: Activation function. If `None`, no activation is applied. + use_bias: bool, if `True`, bias will be added to the output. + kernel_initializer: Initializer for the convolution kernel. If `None`, + the default initializer (`"glorot_uniform"`) will be used. + bias_initializer: Initializer for the bias vector. If `None`, the + default initializer (`"zeros"`) will be used. + kernel_regularizer: Optional regularizer for the convolution kernel. + bias_regularizer: Optional regularizer for the bias vector. + activity_regularizer: Optional regularizer function for the output. + kernel_constraint: Optional projection function to be applied to the + kernel after being updated by an `Optimizer` (e.g. used to implement + norm constraints or value constraints for layer weights). The + function must take as input the unprojected variable and must return + the projected variable (which must have the same shape). Constraints + are not safe to use when doing asynchronous distributed training. + bias_constraint: Optional projection function to be applied to the + bias after being updated by an `Optimizer`. + + Input shape: + + - If `data_format="channels_last"`: + A 3D tensor with shape: `(batch_shape, steps, channels)` + - If `data_format="channels_first"`: + A 3D tensor with shape: `(batch_shape, channels, steps)` + + Output shape: + + - If `data_format="channels_last"`: + A 3D tensor with shape: `(batch_shape, new_steps, filters)` + - If `data_format="channels_first"`: + A 3D tensor with shape: `(batch_shape, filters, new_steps)` + + Returns: + A 3D tensor representing + `activation(conv1d_transpose(inputs, kernel) + bias)`. + + Raises: + ValueError: when both `strides > 1` and `dilation_rate > 1`. + + References: + - [A guide to convolution arithmetic for deep learning]( + https://arxiv.org/abs/1603.07285v1) + - [Deconvolutional Networks]( + https://www.matthewzeiler.com/mattzeiler/deconvolutionalnetworks.pdf) + + Example: + + >>> x = np.random.rand(4, 10, 128) + >>> y = keras.layers.Conv1DTranspose(32, 3, 2, activation='relu')(x) + >>> print(y.shape) + (4, 21, 32) + """ + + def __init__( + self, + filters, + kernel_size, + strides=1, + padding="valid", + data_format=None, + dilation_rate=1, + activation=None, + use_bias=True, + kernel_initializer="glorot_uniform", + bias_initializer="zeros", + kernel_regularizer=None, + bias_regularizer=None, + activity_regularizer=None, + kernel_constraint=None, + bias_constraint=None, + **kwargs, + ): + super().__init__( + rank=1, + filters=filters, + kernel_size=kernel_size, + strides=strides, + padding=padding, + data_format=data_format, + dilation_rate=dilation_rate, + activation=activation, + use_bias=use_bias, + kernel_initializer=kernel_initializer, + bias_initializer=bias_initializer, + kernel_regularizer=kernel_regularizer, + bias_regularizer=bias_regularizer, + activity_regularizer=activity_regularizer, + kernel_constraint=kernel_constraint, + bias_constraint=bias_constraint, + **kwargs, + ) diff --git a/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/conv2d.py b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/conv2d.py new file mode 100644 index 0000000000000000000000000000000000000000..c46f8f9a0bc1eca50a48c98890678538830b1981 --- /dev/null +++ b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/conv2d.py @@ -0,0 +1,128 @@ +from keras.src.api_export import keras_export +from keras.src.layers.convolutional.base_conv import BaseConv + + +@keras_export(["keras.layers.Conv2D", "keras.layers.Convolution2D"]) +class Conv2D(BaseConv): + """2D convolution layer. + + This layer creates a convolution kernel that is convolved with the layer + input over a 2D spatial (or temporal) dimension (height and width) to + produce a tensor of outputs. If `use_bias` is True, a bias vector is created + and added to the outputs. Finally, if `activation` is not `None`, it is + applied to the outputs as well. + + Args: + filters: int, the dimension of the output space (the number of filters + in the convolution). + kernel_size: int or tuple/list of 2 integer, specifying the size of the + convolution window. + strides: int or tuple/list of 2 integer, specifying the stride length + of the convolution. `strides > 1` is incompatible with + `dilation_rate > 1`. + padding: string, either `"valid"` or `"same"` (case-insensitive). + `"valid"` means no padding. `"same"` results in padding evenly to + the left/right or up/down of the input. When `padding="same"` and + `strides=1`, the output has the same size as the input. + data_format: string, either `"channels_last"` or `"channels_first"`. + The ordering of the dimensions in the inputs. `"channels_last"` + corresponds to inputs with shape + `(batch_size, height, width, channels)` + while `"channels_first"` corresponds to inputs with shape + `(batch_size, channels, height, width)`. It defaults to the + `image_data_format` value found in your Keras config file at + `~/.keras/keras.json`. If you never set it, then it will be + `"channels_last"`. + dilation_rate: int or tuple/list of 2 integers, specifying the dilation + rate to use for dilated convolution. + groups: A positive int specifying the number of groups in which the + input is split along the channel axis. Each group is convolved + separately with `filters // groups` filters. The output is the + concatenation of all the `groups` results along the channel axis. + Input channels and `filters` must both be divisible by `groups`. + activation: Activation function. If `None`, no activation is applied. + use_bias: bool, if `True`, bias will be added to the output. + kernel_initializer: Initializer for the convolution kernel. If `None`, + the default initializer (`"glorot_uniform"`) will be used. + bias_initializer: Initializer for the bias vector. If `None`, the + default initializer (`"zeros"`) will be used. + kernel_regularizer: Optional regularizer for the convolution kernel. + bias_regularizer: Optional regularizer for the bias vector. + activity_regularizer: Optional regularizer function for the output. + kernel_constraint: Optional projection function to be applied to the + kernel after being updated by an `Optimizer` (e.g. used to implement + norm constraints or value constraints for layer weights). The + function must take as input the unprojected variable and must return + the projected variable (which must have the same shape). Constraints + are not safe to use when doing asynchronous distributed training. + bias_constraint: Optional projection function to be applied to the + bias after being updated by an `Optimizer`. + + Input shape: + + - If `data_format="channels_last"`: + A 4D tensor with shape: `(batch_size, height, width, channels)` + - If `data_format="channels_first"`: + A 4D tensor with shape: `(batch_size, channels, height, width)` + + Output shape: + + - If `data_format="channels_last"`: + A 4D tensor with shape: `(batch_size, new_height, new_width, filters)` + - If `data_format="channels_first"`: + A 4D tensor with shape: `(batch_size, filters, new_height, new_width)` + + Returns: + A 4D tensor representing `activation(conv2d(inputs, kernel) + bias)`. + + Raises: + ValueError: when both `strides > 1` and `dilation_rate > 1`. + + Example: + + >>> x = np.random.rand(4, 10, 10, 128) + >>> y = keras.layers.Conv2D(32, 3, activation='relu')(x) + >>> print(y.shape) + (4, 8, 8, 32) + """ + + def __init__( + self, + filters, + kernel_size, + strides=(1, 1), + padding="valid", + data_format=None, + dilation_rate=(1, 1), + groups=1, + activation=None, + use_bias=True, + kernel_initializer="glorot_uniform", + bias_initializer="zeros", + kernel_regularizer=None, + bias_regularizer=None, + activity_regularizer=None, + kernel_constraint=None, + bias_constraint=None, + **kwargs, + ): + super().__init__( + rank=2, + filters=filters, + kernel_size=kernel_size, + strides=strides, + padding=padding, + data_format=data_format, + dilation_rate=dilation_rate, + groups=groups, + activation=activation, + use_bias=use_bias, + kernel_initializer=kernel_initializer, + bias_initializer=bias_initializer, + kernel_regularizer=kernel_regularizer, + bias_regularizer=bias_regularizer, + activity_regularizer=activity_regularizer, + kernel_constraint=kernel_constraint, + bias_constraint=bias_constraint, + **kwargs, + ) diff --git a/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/conv2d_transpose.py b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/conv2d_transpose.py new file mode 100644 index 0000000000000000000000000000000000000000..ac13452f626386457589891e270c89a962c1265d --- /dev/null +++ b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/conv2d_transpose.py @@ -0,0 +1,133 @@ +from keras.src.api_export import keras_export +from keras.src.layers.convolutional.base_conv_transpose import BaseConvTranspose + + +@keras_export( + [ + "keras.layers.Conv2DTranspose", + "keras.layers.Convolution2DTranspose", + ] +) +class Conv2DTranspose(BaseConvTranspose): + """2D transposed convolution layer. + + The need for transposed convolutions generally arise from the desire to use + a transformation going in the opposite direction of a normal convolution, + i.e., from something that has the shape of the output of some convolution + to something that has the shape of its input while maintaining a + connectivity pattern that is compatible with said convolution. + + Args: + filters: int, the dimension of the output space (the number of filters + in the transposed convolution). + kernel_size: int or tuple/list of 1 integer, specifying the size of the + transposed convolution window. + strides: int or tuple/list of 1 integer, specifying the stride length + of the transposed convolution. `strides > 1` is incompatible with + `dilation_rate > 1`. + padding: string, either `"valid"` or `"same"` (case-insensitive). + `"valid"` means no padding. `"same"` results in padding evenly to + the left/right or up/down of the input. When `padding="same"` and + `strides=1`, the output has the same size as the input. + data_format: string, either `"channels_last"` or `"channels_first"`. + The ordering of the dimensions in the inputs. `"channels_last"` + corresponds to inputs with shape + `(batch_size, height, width, channels)` + while `"channels_first"` corresponds to inputs with shape + `(batch_size, channels, height, width)`. It defaults to the + `image_data_format` value found in your Keras config file at + `~/.keras/keras.json`. If you never set it, then it will be + `"channels_last"`. + dilation_rate: int or tuple/list of 1 integers, specifying the dilation + rate to use for dilated transposed convolution. + activation: Activation function. If `None`, no activation is applied. + use_bias: bool, if `True`, bias will be added to the output. + kernel_initializer: Initializer for the convolution kernel. If `None`, + the default initializer (`"glorot_uniform"`) will be used. + bias_initializer: Initializer for the bias vector. If `None`, the + default initializer (`"zeros"`) will be used. + kernel_regularizer: Optional regularizer for the convolution kernel. + bias_regularizer: Optional regularizer for the bias vector. + activity_regularizer: Optional regularizer function for the output. + kernel_constraint: Optional projection function to be applied to the + kernel after being updated by an `Optimizer` (e.g. used to implement + norm constraints or value constraints for layer weights). The + function must take as input the unprojected variable and must return + the projected variable (which must have the same shape). Constraints + are not safe to use when doing asynchronous distributed training. + bias_constraint: Optional projection function to be applied to the + bias after being updated by an `Optimizer`. + + Input shape: + + - If `data_format="channels_last"`: + A 4D tensor with shape: `(batch_size, height, width, channels)` + - If `data_format="channels_first"`: + A 4D tensor with shape: `(batch_size, channels, height, width)` + + Output shape: + + - If `data_format="channels_last"`: + A 4D tensor with shape: `(batch_size, new_height, new_width, filters)` + - If `data_format="channels_first"`: + A 4D tensor with shape: `(batch_size, filters, new_height, new_width)` + + Returns: + A 4D tensor representing + `activation(conv2d_transpose(inputs, kernel) + bias)`. + + Raises: + ValueError: when both `strides > 1` and `dilation_rate > 1`. + + References: + - [A guide to convolution arithmetic for deep learning]( + https://arxiv.org/abs/1603.07285v1) + - [Deconvolutional Networks]( + https://www.matthewzeiler.com/mattzeiler/deconvolutionalnetworks.pdf) + + Example: + + >>> x = np.random.rand(4, 10, 8, 128) + >>> y = keras.layers.Conv2DTranspose(32, 2, 2, activation='relu')(x) + >>> print(y.shape) + (4, 20, 16, 32) + """ + + def __init__( + self, + filters, + kernel_size, + strides=(1, 1), + padding="valid", + data_format=None, + dilation_rate=(1, 1), + activation=None, + use_bias=True, + kernel_initializer="glorot_uniform", + bias_initializer="zeros", + kernel_regularizer=None, + bias_regularizer=None, + activity_regularizer=None, + kernel_constraint=None, + bias_constraint=None, + **kwargs, + ): + super().__init__( + rank=2, + filters=filters, + kernel_size=kernel_size, + strides=strides, + padding=padding, + data_format=data_format, + dilation_rate=dilation_rate, + activation=activation, + use_bias=use_bias, + kernel_initializer=kernel_initializer, + bias_initializer=bias_initializer, + kernel_regularizer=kernel_regularizer, + bias_regularizer=bias_regularizer, + activity_regularizer=activity_regularizer, + kernel_constraint=kernel_constraint, + bias_constraint=bias_constraint, + **kwargs, + ) diff --git a/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/conv3d.py b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/conv3d.py new file mode 100644 index 0000000000000000000000000000000000000000..4badd2042c377819c7135b9dd8fccadfc805601a --- /dev/null +++ b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/conv3d.py @@ -0,0 +1,134 @@ +from keras.src.api_export import keras_export +from keras.src.layers.convolutional.base_conv import BaseConv + + +@keras_export(["keras.layers.Conv3D", "keras.layers.Convolution3D"]) +class Conv3D(BaseConv): + """3D convolution layer. + + This layer creates a convolution kernel that is convolved with the layer + input over a 3D spatial (or temporal) dimension (width,height and depth) to + produce a tensor of outputs. If `use_bias` is True, a bias vector is created + and added to the outputs. Finally, if `activation` is not `None`, it is + applied to the outputs as well. + + Args: + filters: int, the dimension of the output space (the number of filters + in the convolution). + kernel_size: int or tuple/list of 3 integer, specifying the size of the + convolution window. + strides: int or tuple/list of 3 integer, specifying the stride length + of the convolution. `strides > 1` is incompatible with + `dilation_rate > 1`. + padding: string, either `"valid"` or `"same"` (case-insensitive). + `"valid"` means no padding. `"same"` results in padding evenly to + the left/right or up/down of the input. When `padding="same"` and + `strides=1`, the output has the same size as the input. + data_format: string, either `"channels_last"` or `"channels_first"`. + The ordering of the dimensions in the inputs. `"channels_last"` + corresponds to inputs with shape + `(batch_size, spatial_dim1, spatial_dim2, spatial_dim3, channels)` + while `"channels_first"` corresponds to inputs with shape + `(batch_size, channels, spatial_dim1, spatial_dim2, spatial_dim3)`. + It defaults to the `image_data_format` value found in your Keras + config file at `~/.keras/keras.json`. If you never set it, then it + will be `"channels_last"`. + dilation_rate: int or tuple/list of 3 integers, specifying the dilation + rate to use for dilated convolution. + groups: A positive int specifying the number of groups in which the + input is split along the channel axis. Each group is convolved + separately with `filters // groups` filters. The output is the + concatenation of all the `groups` results along the channel axis. + Input channels and `filters` must both be divisible by `groups`. + activation: Activation function. If `None`, no activation is applied. + use_bias: bool, if `True`, bias will be added to the output. + kernel_initializer: Initializer for the convolution kernel. If `None`, + the default initializer (`"glorot_uniform"`) will be used. + bias_initializer: Initializer for the bias vector. If `None`, the + default initializer (`"zeros"`) will be used. + kernel_regularizer: Optional regularizer for the convolution kernel. + bias_regularizer: Optional regularizer for the bias vector. + activity_regularizer: Optional regularizer function for the output. + kernel_constraint: Optional projection function to be applied to the + kernel after being updated by an `Optimizer` (e.g. used to implement + norm constraints or value constraints for layer weights). The + function must take as input the unprojected variable and must return + the projected variable (which must have the same shape). Constraints + are not safe to use when doing asynchronous distributed training. + bias_constraint: Optional projection function to be applied to the + bias after being updated by an `Optimizer`. + + Input shape: + + - If `data_format="channels_last"`: + 5D tensor with shape: + `(batch_size, spatial_dim1, spatial_dim2, spatial_dim3, channels)` + - If `data_format="channels_first"`: + 5D tensor with shape: + `(batch_size, channels, spatial_dim1, spatial_dim2, spatial_dim3)` + + Output shape: + + - If `data_format="channels_last"`: + 5D tensor with shape: + `(batch_size, new_spatial_dim1, new_spatial_dim2, new_spatial_dim3, + filters)` + - If `data_format="channels_first"`: + 5D tensor with shape: + `(batch_size, filters, new_spatial_dim1, new_spatial_dim2, + new_spatial_dim3)` + + Returns: + A 5D tensor representing `activation(conv3d(inputs, kernel) + bias)`. + + Raises: + ValueError: when both `strides > 1` and `dilation_rate > 1`. + + Example: + + >>> x = np.random.rand(4, 10, 10, 10, 128) + >>> y = keras.layers.Conv3D(32, 3, activation='relu')(x) + >>> print(y.shape) + (4, 8, 8, 8, 32) + """ + + def __init__( + self, + filters, + kernel_size, + strides=(1, 1, 1), + padding="valid", + data_format=None, + dilation_rate=(1, 1, 1), + groups=1, + activation=None, + use_bias=True, + kernel_initializer="glorot_uniform", + bias_initializer="zeros", + kernel_regularizer=None, + bias_regularizer=None, + activity_regularizer=None, + kernel_constraint=None, + bias_constraint=None, + **kwargs, + ): + super().__init__( + rank=3, + filters=filters, + kernel_size=kernel_size, + strides=strides, + padding=padding, + data_format=data_format, + dilation_rate=dilation_rate, + groups=groups, + activation=activation, + use_bias=use_bias, + kernel_initializer=kernel_initializer, + bias_initializer=bias_initializer, + kernel_regularizer=kernel_regularizer, + bias_regularizer=bias_regularizer, + activity_regularizer=activity_regularizer, + kernel_constraint=kernel_constraint, + bias_constraint=bias_constraint, + **kwargs, + ) diff --git a/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/conv3d_transpose.py b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/conv3d_transpose.py new file mode 100644 index 0000000000000000000000000000000000000000..348ff5f5d80083ae7439e06766aa2fec52a2f81d --- /dev/null +++ b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/conv3d_transpose.py @@ -0,0 +1,138 @@ +from keras.src.api_export import keras_export +from keras.src.layers.convolutional.base_conv_transpose import BaseConvTranspose + + +@keras_export( + [ + "keras.layers.Conv3DTranspose", + "keras.layers.Convolution3DTranspose", + ] +) +class Conv3DTranspose(BaseConvTranspose): + """3D transposed convolution layer. + + The need for transposed convolutions generally arise from the desire to use + a transformation going in the opposite direction of a normal convolution, + i.e., from something that has the shape of the output of some convolution + to something that has the shape of its input while maintaining a + connectivity pattern that is compatible with said convolution. + + Args: + filters: int, the dimension of the output space (the number of filters + in the transposed convolution). + kernel_size: int or tuple/list of 1 integer, specifying the size of the + transposed convolution window. + strides: int or tuple/list of 1 integer, specifying the stride length + of the transposed convolution. `strides > 1` is incompatible with + `dilation_rate > 1`. + padding: string, either `"valid"` or `"same"` (case-insensitive). + `"valid"` means no padding. `"same"` results in padding evenly to + the left/right or up/down of the input. When `padding="same"` and + `strides=1`, the output has the same size as the input. + data_format: string, either `"channels_last"` or `"channels_first"`. + The ordering of the dimensions in the inputs. `"channels_last"` + corresponds to inputs with shape + `(batch_size, spatial_dim1, spatial_dim2, spatial_dim3, channels)` + while `"channels_first"` corresponds to inputs with shape + `(batch_size, channels, spatial_dim1, spatial_dim2, spatial_dim3)`. + It defaults to the `image_data_format` value found in your Keras + config file at `~/.keras/keras.json`. If you never set it, then it + will be `"channels_last"`. + dilation_rate: int or tuple/list of 1 integers, specifying the dilation + rate to use for dilated transposed convolution. + activation: Activation function. If `None`, no activation is applied. + use_bias: bool, if `True`, bias will be added to the output. + kernel_initializer: Initializer for the convolution kernel. If `None`, + the default initializer (`"glorot_uniform"`) will be used. + bias_initializer: Initializer for the bias vector. If `None`, the + default initializer (`"zeros"`) will be used. + kernel_regularizer: Optional regularizer for the convolution kernel. + bias_regularizer: Optional regularizer for the bias vector. + activity_regularizer: Optional regularizer function for the output. + kernel_constraint: Optional projection function to be applied to the + kernel after being updated by an `Optimizer` (e.g. used to implement + norm constraints or value constraints for layer weights). The + function must take as input the unprojected variable and must return + the projected variable (which must have the same shape). Constraints + are not safe to use when doing asynchronous distributed training. + bias_constraint: Optional projection function to be applied to the + bias after being updated by an `Optimizer`. + + Input shape: + + - If `data_format="channels_last"`: + 5D tensor with shape: + `(batch_size, spatial_dim1, spatial_dim2, spatial_dim3, channels)` + - If `data_format="channels_first"`: + 5D tensor with shape: + `(batch_size, channels, spatial_dim1, spatial_dim2, spatial_dim3)` + + Output shape: + + - If `data_format="channels_last"`: + 5D tensor with shape: + `(batch_size, new_spatial_dim1, new_spatial_dim2, new_spatial_dim3, + filters)` + - If `data_format="channels_first"`: + 5D tensor with shape: + `(batch_size, filters, new_spatial_dim1, new_spatial_dim2, + new_spatial_dim3)` + + Returns: + A 5D tensor representing `activation(conv3d(inputs, kernel) + bias)`. + + Raises: + ValueError: when both `strides > 1` and `dilation_rate > 1`. + + References: + - [A guide to convolution arithmetic for deep learning]( + https://arxiv.org/abs/1603.07285v1) + - [Deconvolutional Networks]( + https://www.matthewzeiler.com/mattzeiler/deconvolutionalnetworks.pdf) + + Example: + + >>> x = np.random.rand(4, 10, 8, 12, 128) + >>> y = keras.layers.Conv3DTranspose(32, 2, 2, activation='relu')(x) + >>> print(y.shape) + (4, 20, 16, 24, 32) + """ + + def __init__( + self, + filters, + kernel_size, + strides=(1, 1, 1), + padding="valid", + data_format=None, + dilation_rate=(1, 1, 1), + activation=None, + use_bias=True, + kernel_initializer="glorot_uniform", + bias_initializer="zeros", + kernel_regularizer=None, + bias_regularizer=None, + activity_regularizer=None, + kernel_constraint=None, + bias_constraint=None, + **kwargs, + ): + super().__init__( + rank=3, + filters=filters, + kernel_size=kernel_size, + strides=strides, + padding=padding, + data_format=data_format, + dilation_rate=dilation_rate, + activation=activation, + use_bias=use_bias, + kernel_initializer=kernel_initializer, + bias_initializer=bias_initializer, + kernel_regularizer=kernel_regularizer, + bias_regularizer=bias_regularizer, + activity_regularizer=activity_regularizer, + kernel_constraint=kernel_constraint, + bias_constraint=bias_constraint, + **kwargs, + ) diff --git a/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/depthwise_conv1d.py b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/depthwise_conv1d.py new file mode 100644 index 0000000000000000000000000000000000000000..51312d8447e2fa6e532e0e420c29ef5aa6b22365 --- /dev/null +++ b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/depthwise_conv1d.py @@ -0,0 +1,137 @@ +from keras.src.api_export import keras_export +from keras.src.layers.convolutional.base_depthwise_conv import BaseDepthwiseConv + + +@keras_export("keras.layers.DepthwiseConv1D") +class DepthwiseConv1D(BaseDepthwiseConv): + """1D depthwise convolution layer. + + Depthwise convolution is a type of convolution in which each input channel + is convolved with a different kernel (called a depthwise kernel). You can + understand depthwise convolution as the first step in a depthwise separable + convolution. + + It is implemented via the following steps: + + - Split the input into individual channels. + - Convolve each channel with an individual depthwise kernel with + `depth_multiplier` output channels. + - Concatenate the convolved outputs along the channels axis. + + Unlike a regular 1D convolution, depthwise convolution does not mix + information across different input channels. + + The `depth_multiplier` argument determines how many filters are applied to + one input channel. As such, it controls the amount of output channels that + are generated per input channel in the depthwise step. + + Args: + kernel_size: int or tuple/list of 1 integer, specifying the size of the + depthwise convolution window. + strides: int or tuple/list of 1 integer, specifying the stride length + of the convolution. `strides > 1` is incompatible with + `dilation_rate > 1`. + padding: string, either `"valid"` or `"same"` (case-insensitive). + `"valid"` means no padding. `"same"` results in padding evenly to + the left/right or up/down of the input. When `padding="same"` and + `strides=1`, the output has the same size as the input. + depth_multiplier: The number of depthwise convolution output channels + for each input channel. The total number of depthwise convolution + output channels will be equal to `input_channel * depth_multiplier`. + data_format: string, either `"channels_last"` or `"channels_first"`. + The ordering of the dimensions in the inputs. `"channels_last"` + corresponds to inputs with shape `(batch, steps, features)` + while `"channels_first"` corresponds to inputs with shape + `(batch, features, steps)`. It defaults to the `image_data_format` + value found in your Keras config file at `~/.keras/keras.json`. + If you never set it, then it will be `"channels_last"`. + dilation_rate: int or tuple/list of 1 integers, specifying the dilation + rate to use for dilated convolution. + activation: Activation function. If `None`, no activation is applied. + use_bias: bool, if `True`, bias will be added to the output. + depthwise_initializer: Initializer for the convolution kernel. + If `None`, the default initializer (`"glorot_uniform"`) + will be used. + bias_initializer: Initializer for the bias vector. If `None`, the + default initializer (`"zeros"`) will be used. + depthwise_regularizer: Optional regularizer for the convolution kernel. + bias_regularizer: Optional regularizer for the bias vector. + activity_regularizer: Optional regularizer function for the output. + depthwise_constraint: Optional projection function to be applied to the + kernel after being updated by an `Optimizer` (e.g. used to implement + norm constraints or value constraints for layer weights). The + function must take as input the unprojected variable and must return + the projected variable (which must have the same shape). Constraints + are not safe to use when doing asynchronous distributed training. + bias_constraint: Optional projection function to be applied to the + bias after being updated by an `Optimizer`. + + Input shape: + + - If `data_format="channels_last"`: + A 3D tensor with shape: `(batch_shape, steps, channels)` + - If `data_format="channels_first"`: + A 3D tensor with shape: `(batch_shape, channels, steps)` + + Output shape: + + - If `data_format="channels_last"`: + A 3D tensor with shape: + `(batch_shape, new_steps, channels * depth_multiplier)` + - If `data_format="channels_first"`: + A 3D tensor with shape: + `(batch_shape, channels * depth_multiplier, new_steps)` + + Returns: + A 3D tensor representing + `activation(depthwise_conv1d(inputs, kernel) + bias)`. + + Raises: + ValueError: when both `strides > 1` and `dilation_rate > 1`. + + Example: + + >>> x = np.random.rand(4, 10, 12) + >>> y = keras.layers.DepthwiseConv1D(3, 3, 2, activation='relu')(x) + >>> print(y.shape) + (4, 4, 36) + """ + + def __init__( + self, + kernel_size, + strides=1, + padding="valid", + depth_multiplier=1, + data_format=None, + dilation_rate=1, + activation=None, + use_bias=True, + depthwise_initializer="glorot_uniform", + bias_initializer="zeros", + depthwise_regularizer=None, + bias_regularizer=None, + activity_regularizer=None, + depthwise_constraint=None, + bias_constraint=None, + **kwargs, + ): + super().__init__( + rank=1, + depth_multiplier=depth_multiplier, + kernel_size=kernel_size, + strides=strides, + padding=padding, + data_format=data_format, + dilation_rate=dilation_rate, + activation=activation, + use_bias=use_bias, + depthwise_initializer=depthwise_initializer, + bias_initializer=bias_initializer, + depthwise_regularizer=depthwise_regularizer, + bias_regularizer=bias_regularizer, + activity_regularizer=activity_regularizer, + depthwise_constraint=depthwise_constraint, + bias_constraint=bias_constraint, + **kwargs, + ) diff --git a/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/depthwise_conv2d.py b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/depthwise_conv2d.py new file mode 100644 index 0000000000000000000000000000000000000000..71c950246e03e6563dfcce01d6acd1fe2408cec5 --- /dev/null +++ b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/depthwise_conv2d.py @@ -0,0 +1,138 @@ +from keras.src.api_export import keras_export +from keras.src.layers.convolutional.base_depthwise_conv import BaseDepthwiseConv + + +@keras_export("keras.layers.DepthwiseConv2D") +class DepthwiseConv2D(BaseDepthwiseConv): + """2D depthwise convolution layer. + + Depthwise convolution is a type of convolution in which each input channel + is convolved with a different kernel (called a depthwise kernel). You can + understand depthwise convolution as the first step in a depthwise separable + convolution. + + It is implemented via the following steps: + + - Split the input into individual channels. + - Convolve each channel with an individual depthwise kernel with + `depth_multiplier` output channels. + - Concatenate the convolved outputs along the channels axis. + + Unlike a regular 2D convolution, depthwise convolution does not mix + information across different input channels. + + The `depth_multiplier` argument determines how many filters are applied to + one input channel. As such, it controls the amount of output channels that + are generated per input channel in the depthwise step. + + Args: + kernel_size: int or tuple/list of 2 integer, specifying the size of the + depthwise convolution window. + strides: int or tuple/list of 2 integer, specifying the stride length + of the depthwise convolution. `strides > 1` is incompatible with + `dilation_rate > 1`. + padding: string, either `"valid"` or `"same"` (case-insensitive). + `"valid"` means no padding. `"same"` results in padding evenly to + the left/right or up/down of the input. When `padding="same"` and + `strides=1`, the output has the same size as the input. + depth_multiplier: The number of depthwise convolution output channels + for each input channel. The total number of depthwise convolution + output channels will be equal to `input_channel * depth_multiplier`. + data_format: string, either `"channels_last"` or `"channels_first"`. + The ordering of the dimensions in the inputs. `"channels_last"` + corresponds to inputs with shape `(batch, height, width, channels)` + while `"channels_first"` corresponds to inputs with shape + `(batch, channels, height, width)`. It defaults to the + `image_data_format` value found in your Keras config file + at `~/.keras/keras.json`. + If you never set it, then it will be `"channels_last"`. + dilation_rate: int or tuple/list of 2 integers, specifying the dilation + rate to use for dilated convolution. + activation: Activation function. If `None`, no activation is applied. + use_bias: bool, if `True`, bias will be added to the output. + depthwise_initializer: Initializer for the convolution kernel. + If `None`, the default initializer (`"glorot_uniform"`) + will be used. + bias_initializer: Initializer for the bias vector. If `None`, the + default initializer (`"zeros"`) will be used. + depthwise_regularizer: Optional regularizer for the convolution kernel. + bias_regularizer: Optional regularizer for the bias vector. + activity_regularizer: Optional regularizer function for the output. + depthwise_constraint: Optional projection function to be applied to the + kernel after being updated by an `Optimizer` (e.g. used to implement + norm constraints or value constraints for layer weights). The + function must take as input the unprojected variable and must return + the projected variable (which must have the same shape). Constraints + are not safe to use when doing asynchronous distributed training. + bias_constraint: Optional projection function to be applied to the + bias after being updated by an `Optimizer`. + + Input shape: + + - If `data_format="channels_last"`: + A 4D tensor with shape: `(batch_size, height, width, channels)` + - If `data_format="channels_first"`: + A 4D tensor with shape: `(batch_size, channels, height, width)` + + Output shape: + + - If `data_format="channels_last"`: + A 4D tensor with shape: + `(batch_size, new_height, new_width, channels * depth_multiplier)` + - If `data_format="channels_first"`: + A 4D tensor with shape: + `(batch_size, channels * depth_multiplier, new_height, new_width)` + + Returns: + A 4D tensor representing + `activation(depthwise_conv2d(inputs, kernel) + bias)`. + + Raises: + ValueError: when both `strides > 1` and `dilation_rate > 1`. + + Example: + + >>> x = np.random.rand(4, 10, 10, 12) + >>> y = keras.layers.DepthwiseConv2D(kernel_size=3, activation='relu')(x) + >>> print(y.shape) + (4, 8, 8, 12) + """ + + def __init__( + self, + kernel_size, + strides=(1, 1), + padding="valid", + depth_multiplier=1, + data_format=None, + dilation_rate=(1, 1), + activation=None, + use_bias=True, + depthwise_initializer="glorot_uniform", + bias_initializer="zeros", + depthwise_regularizer=None, + bias_regularizer=None, + activity_regularizer=None, + depthwise_constraint=None, + bias_constraint=None, + **kwargs, + ): + super().__init__( + rank=2, + depth_multiplier=depth_multiplier, + kernel_size=kernel_size, + strides=strides, + padding=padding, + data_format=data_format, + dilation_rate=dilation_rate, + activation=activation, + use_bias=use_bias, + depthwise_initializer=depthwise_initializer, + bias_initializer=bias_initializer, + depthwise_regularizer=depthwise_regularizer, + bias_regularizer=bias_regularizer, + activity_regularizer=activity_regularizer, + depthwise_constraint=depthwise_constraint, + bias_constraint=bias_constraint, + **kwargs, + ) diff --git a/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/separable_conv1d.py b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/separable_conv1d.py new file mode 100644 index 0000000000000000000000000000000000000000..2f03161981d44db66c49a92532f5e232da17fd60 --- /dev/null +++ b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/separable_conv1d.py @@ -0,0 +1,143 @@ +from keras.src.api_export import keras_export +from keras.src.layers.convolutional.base_separable_conv import BaseSeparableConv + + +@keras_export( + [ + "keras.layers.SeparableConv1D", + "keras.layers.SeparableConvolution1D", + ] +) +class SeparableConv1D(BaseSeparableConv): + """1D separable convolution layer. + + This layer performs a depthwise convolution that acts separately on + channels, followed by a pointwise convolution that mixes channels. + If `use_bias` is True and a bias initializer is provided, + it adds a bias vector to the output. It then optionally applies an + activation function to produce the final output. + + Args: + filters: int, the dimensionality of the output space (i.e. the number + of filters in the pointwise convolution). + kernel_size: int or tuple/list of 1 integers, specifying the size of the + depthwise convolution window. + strides: int or tuple/list of 1 integers, specifying the stride length + of the depthwise convolution. If only one int is specified, the same + stride size will be used for all dimensions. `strides > 1` is + incompatible with `dilation_rate > 1`. + padding: string, either `"valid"` or `"same"` (case-insensitive). + `"valid"` means no padding. `"same"` results in padding evenly to + the left/right or up/down of the input. When `padding="same"` and + `strides=1`, the output has the same size as the input. + data_format: string, either `"channels_last"` or `"channels_first"`. + The ordering of the dimensions in the inputs. `"channels_last"` + corresponds to inputs with shape `(batch, steps, features)` + while `"channels_first"` corresponds to inputs with shape + `(batch, features, steps)`. It defaults to the `image_data_format` + value found in your Keras config file at `~/.keras/keras.json`. + If you never set it, then it will be `"channels_last"`. + dilation_rate: int or tuple/list of 1 integers, specifying the dilation + rate to use for dilated convolution. If only one int is specified, + the same dilation rate will be used for all dimensions. + depth_multiplier: The number of depthwise convolution output channels + for each input channel. The total number of depthwise convolution + output channels will be equal to `input_channel * depth_multiplier`. + activation: Activation function. If `None`, no activation is applied. + use_bias: bool, if `True`, bias will be added to the output. + depthwise_initializer: An initializer for the depthwise convolution + kernel. If None, then the default initializer (`"glorot_uniform"`) + will be used. + pointwise_initializer: An initializer for the pointwise convolution + kernel. If None, then the default initializer (`"glorot_uniform"`) + will be used. + bias_initializer: An initializer for the bias vector. If None, the + default initializer ('"zeros"') will be used. + depthwise_regularizer: Optional regularizer for the depthwise + convolution kernel. + pointwise_regularizer: Optional regularizer for the pointwise + convolution kernel. + bias_regularizer: Optional regularizer for the bias vector. + activity_regularizer: Optional regularizer function for the output. + depthwise_constraint: Optional projection function to be applied to the + depthwise kernel after being updated by an `Optimizer` (e.g. used + for norm constraints or value constraints for layer weights). The + function must take as input the unprojected variable and must return + the projected variable (which must have the same shape). + pointwise_constraint: Optional projection function to be applied to the + pointwise kernel after being updated by an `Optimizer`. + bias_constraint: Optional projection function to be applied to the + bias after being updated by an `Optimizer`. + + Input shape: + + - If `data_format="channels_last"`: + A 3D tensor with shape: `(batch_shape, steps, channels)` + - If `data_format="channels_first"`: + A 3D tensor with shape: `(batch_shape, channels, steps)` + + Output shape: + + - If `data_format="channels_last"`: + A 3D tensor with shape: `(batch_shape, new_steps, filters)` + - If `data_format="channels_first"`: + A 3D tensor with shape: `(batch_shape, filters, new_steps)` + + Returns: + A 3D tensor representing + `activation(separable_conv1d(inputs, kernel) + bias)`. + + Example: + + >>> x = np.random.rand(4, 10, 12) + >>> y = keras.layers.SeparableConv1D(3, 4, 3, 2, activation='relu')(x) + >>> print(y.shape) + (4, 4, 4) + """ + + def __init__( + self, + filters, + kernel_size, + strides=1, + padding="valid", + data_format=None, + dilation_rate=1, + depth_multiplier=1, + activation=None, + use_bias=True, + depthwise_initializer="glorot_uniform", + pointwise_initializer="glorot_uniform", + bias_initializer="zeros", + depthwise_regularizer=None, + pointwise_regularizer=None, + bias_regularizer=None, + activity_regularizer=None, + depthwise_constraint=None, + pointwise_constraint=None, + bias_constraint=None, + **kwargs, + ): + super().__init__( + rank=1, + depth_multiplier=depth_multiplier, + filters=filters, + kernel_size=kernel_size, + strides=strides, + padding=padding, + data_format=data_format, + dilation_rate=dilation_rate, + activation=activation, + use_bias=use_bias, + depthwise_initializer=depthwise_initializer, + pointwise_initializer=pointwise_initializer, + bias_initializer=bias_initializer, + depthwise_regularizer=depthwise_regularizer, + pointwise_regularizer=pointwise_regularizer, + bias_regularizer=bias_regularizer, + activity_regularizer=activity_regularizer, + depthwise_constraint=depthwise_constraint, + pointwise_constraint=pointwise_constraint, + bias_constraint=bias_constraint, + **kwargs, + ) diff --git a/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/separable_conv2d.py b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/separable_conv2d.py new file mode 100644 index 0000000000000000000000000000000000000000..27c1548231dd1b42c22c8394af6d8a4ac8429795 --- /dev/null +++ b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/separable_conv2d.py @@ -0,0 +1,144 @@ +from keras.src.api_export import keras_export +from keras.src.layers.convolutional.base_separable_conv import BaseSeparableConv + + +@keras_export( + [ + "keras.layers.SeparableConv2D", + "keras.layers.SeparableConvolution2D", + ] +) +class SeparableConv2D(BaseSeparableConv): + """2D separable convolution layer. + + This layer performs a depthwise convolution that acts separately on + channels, followed by a pointwise convolution that mixes channels. + If `use_bias` is True and a bias initializer is provided, + it adds a bias vector to the output. It then optionally applies an + activation function to produce the final output. + + Args: + filters: int, the dimensionality of the output space (i.e. the number + of filters in the pointwise convolution). + kernel_size: int or tuple/list of 2 integers, specifying the size of the + depthwise convolution window. + strides: int or tuple/list of 2 integers, specifying the stride length + of the depthwise convolution. If only one int is specified, the same + stride size will be used for all dimensions. `strides > 1` is + incompatible with `dilation_rate > 1`. + padding: string, either `"valid"` or `"same"` (case-insensitive). + `"valid"` means no padding. `"same"` results in padding evenly to + the left/right or up/down of the input. When `padding="same"` and + `strides=1`, the output has the same size as the input. + data_format: string, either `"channels_last"` or `"channels_first"`. + The ordering of the dimensions in the inputs. `"channels_last"` + corresponds to inputs with shape `(batch, height, width, channels)` + while `"channels_first"` corresponds to inputs with shape + `(batch, channels, height, width)`. It defaults to the + `image_data_format` value found in your Keras config file + at `~/.keras/keras.json`. + If you never set it, then it will be `"channels_last"`. + dilation_rate: int or tuple/list of 2 integers, specifying the dilation + rate to use for dilated convolution. If only one int is specified, + the same dilation rate will be used for all dimensions. + depth_multiplier: The number of depthwise convolution output channels + for each input channel. The total number of depthwise convolution + output channels will be equal to `input_channel * depth_multiplier`. + activation: Activation function. If `None`, no activation is applied. + use_bias: bool, if `True`, bias will be added to the output. + depthwise_initializer: An initializer for the depthwise convolution + kernel. If None, then the default initializer (`"glorot_uniform"`) + will be used. + pointwise_initializer: An initializer for the pointwise convolution + kernel. If None, then the default initializer (`"glorot_uniform"`) + will be used. + bias_initializer: An initializer for the bias vector. If None, the + default initializer ('"zeros"') will be used. + depthwise_regularizer: Optional regularizer for the depthwise + convolution kernel. + pointwise_regularizer: Optional regularizer for the pointwise + convolution kernel. + bias_regularizer: Optional regularizer for the bias vector. + activity_regularizer: Optional regularizer function for the output. + depthwise_constraint: Optional projection function to be applied to the + depthwise kernel after being updated by an `Optimizer` (e.g. used + for norm constraints or value constraints for layer weights). The + function must take as input the unprojected variable and must return + the projected variable (which must have the same shape). + pointwise_constraint: Optional projection function to be applied to the + pointwise kernel after being updated by an `Optimizer`. + bias_constraint: Optional projection function to be applied to the + bias after being updated by an `Optimizer`. + + Input shape: + + - If `data_format="channels_last"`: + A 4D tensor with shape: `(batch_size, height, width, channels)` + - If `data_format="channels_first"`: + A 4D tensor with shape: `(batch_size, channels, height, width)` + + Output shape: + + - If `data_format="channels_last"`: + A 4D tensor with shape: `(batch_size, new_height, new_width, filters)` + - If `data_format="channels_first"`: + A 4D tensor with shape: `(batch_size, filters, new_height, new_width)` + + Returns: + A 4D tensor representing + `activation(separable_conv2d(inputs, kernel) + bias)`. + + Example: + + >>> x = np.random.rand(4, 10, 10, 12) + >>> y = keras.layers.SeparableConv2D(3, 4, 3, 2, activation='relu')(x) + >>> print(y.shape) + (4, 4, 4, 4) + """ + + def __init__( + self, + filters, + kernel_size, + strides=(1, 1), + padding="valid", + data_format=None, + dilation_rate=(1, 1), + depth_multiplier=1, + activation=None, + use_bias=True, + depthwise_initializer="glorot_uniform", + pointwise_initializer="glorot_uniform", + bias_initializer="zeros", + depthwise_regularizer=None, + pointwise_regularizer=None, + bias_regularizer=None, + activity_regularizer=None, + depthwise_constraint=None, + pointwise_constraint=None, + bias_constraint=None, + **kwargs, + ): + super().__init__( + rank=2, + depth_multiplier=depth_multiplier, + filters=filters, + kernel_size=kernel_size, + strides=strides, + padding=padding, + data_format=data_format, + dilation_rate=dilation_rate, + activation=activation, + use_bias=use_bias, + depthwise_initializer=depthwise_initializer, + pointwise_initializer=pointwise_initializer, + bias_initializer=bias_initializer, + depthwise_regularizer=depthwise_regularizer, + pointwise_regularizer=pointwise_regularizer, + bias_regularizer=bias_regularizer, + activity_regularizer=activity_regularizer, + depthwise_constraint=depthwise_constraint, + pointwise_constraint=pointwise_constraint, + bias_constraint=bias_constraint, + **kwargs, + ) diff --git a/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/core/__init__.py b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/core/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/core/__pycache__/__init__.cpython-310.pyc b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/core/__pycache__/__init__.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..003afbdde642d1fba7b80a3209d053904bb3e543 Binary files /dev/null and b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/core/__pycache__/__init__.cpython-310.pyc differ diff --git a/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/core/__pycache__/dense.cpython-310.pyc b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/core/__pycache__/dense.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..40ed4c8f8553bad909b839070a596db72ba1dfd5 Binary files /dev/null and b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/core/__pycache__/dense.cpython-310.pyc differ diff --git a/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/core/__pycache__/einsum_dense.cpython-310.pyc b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/core/__pycache__/einsum_dense.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..11cb8b9c25a13adb1d0b5c6a572c437fd0787893 Binary files /dev/null and b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/core/__pycache__/einsum_dense.cpython-310.pyc differ diff --git a/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/core/__pycache__/embedding.cpython-310.pyc b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/core/__pycache__/embedding.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..4d8e11c89aacc56bcca5eddea204537c44bc0dee Binary files /dev/null and b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/core/__pycache__/embedding.cpython-310.pyc differ diff --git a/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/core/__pycache__/identity.cpython-310.pyc b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/core/__pycache__/identity.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..f112355f2731f01daa79bf46381109322a5f34a8 Binary files /dev/null and b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/core/__pycache__/identity.cpython-310.pyc differ diff --git a/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/core/__pycache__/input_layer.cpython-310.pyc b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/core/__pycache__/input_layer.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..4b73147d65106c389527a24813f2b1b0690f32c2 Binary files /dev/null and b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/core/__pycache__/input_layer.cpython-310.pyc differ diff --git a/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/core/__pycache__/lambda_layer.cpython-310.pyc b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/core/__pycache__/lambda_layer.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..00c25823fcd0231bcbd92a36f6df76bf650fadb3 Binary files /dev/null and b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/core/__pycache__/lambda_layer.cpython-310.pyc differ diff --git a/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/core/__pycache__/masking.cpython-310.pyc b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/core/__pycache__/masking.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..8a64ca0cff3828ce94fb6b780794a93c6728fe68 Binary files /dev/null and b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/core/__pycache__/masking.cpython-310.pyc differ diff --git a/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/core/__pycache__/wrapper.cpython-310.pyc b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/core/__pycache__/wrapper.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..35f3fc28d3ddf821ab5488d3fbb4f648d000143c Binary files /dev/null and b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/core/__pycache__/wrapper.cpython-310.pyc differ diff --git a/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/core/dense.py b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/core/dense.py new file mode 100644 index 0000000000000000000000000000000000000000..21063a38272546c8d8605c66bc46a22308fdd94a --- /dev/null +++ b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/core/dense.py @@ -0,0 +1,555 @@ +import ml_dtypes + +from keras.src import activations +from keras.src import constraints +from keras.src import dtype_policies +from keras.src import initializers +from keras.src import ops +from keras.src import quantizers +from keras.src import regularizers +from keras.src.api_export import keras_export +from keras.src.layers.input_spec import InputSpec +from keras.src.layers.layer import Layer + + +@keras_export("keras.layers.Dense") +class Dense(Layer): + """Just your regular densely-connected NN layer. + + `Dense` implements the operation: + `output = activation(dot(input, kernel) + bias)` + where `activation` is the element-wise activation function + passed as the `activation` argument, `kernel` is a weights matrix + created by the layer, and `bias` is a bias vector created by the layer + (only applicable if `use_bias` is `True`). + + Note: If the input to the layer has a rank greater than 2, `Dense` + computes the dot product between the `inputs` and the `kernel` along the + last axis of the `inputs` and axis 0 of the `kernel` (using `tf.tensordot`). + For example, if input has dimensions `(batch_size, d0, d1)`, then we create + a `kernel` with shape `(d1, units)`, and the `kernel` operates along axis 2 + of the `input`, on every sub-tensor of shape `(1, 1, d1)` (there are + `batch_size * d0` such sub-tensors). The output in this case will have + shape `(batch_size, d0, units)`. + + Args: + units: Positive integer, dimensionality of the output space. + activation: Activation function to use. + If you don't specify anything, no activation is applied + (ie. "linear" activation: `a(x) = x`). + use_bias: Boolean, whether the layer uses a bias vector. + kernel_initializer: Initializer for the `kernel` weights matrix. + bias_initializer: Initializer for the bias vector. + kernel_regularizer: Regularizer function applied to + the `kernel` weights matrix. + bias_regularizer: Regularizer function applied to the bias vector. + activity_regularizer: Regularizer function applied to + the output of the layer (its "activation"). + kernel_constraint: Constraint function applied to + the `kernel` weights matrix. + bias_constraint: Constraint function applied to the bias vector. + lora_rank: Optional integer. If set, the layer's forward pass + will implement LoRA (Low-Rank Adaptation) + with the provided rank. LoRA sets the layer's kernel + to non-trainable and replaces it with a delta over the + original kernel, obtained via multiplying two lower-rank + trainable matrices. This can be useful to reduce the + computation cost of fine-tuning large dense layers. + You can also enable LoRA on an existing + `Dense` layer by calling `layer.enable_lora(rank)`. + + Input shape: + N-D tensor with shape: `(batch_size, ..., input_dim)`. + The most common situation would be + a 2D input with shape `(batch_size, input_dim)`. + + Output shape: + N-D tensor with shape: `(batch_size, ..., units)`. + For instance, for a 2D input with shape `(batch_size, input_dim)`, + the output would have shape `(batch_size, units)`. + """ + + def __init__( + self, + units, + activation=None, + use_bias=True, + kernel_initializer="glorot_uniform", + bias_initializer="zeros", + kernel_regularizer=None, + bias_regularizer=None, + activity_regularizer=None, + kernel_constraint=None, + bias_constraint=None, + lora_rank=None, + **kwargs, + ): + super().__init__(activity_regularizer=activity_regularizer, **kwargs) + self.units = units + self.activation = activations.get(activation) + self.use_bias = use_bias + self.kernel_initializer = initializers.get(kernel_initializer) + self.bias_initializer = initializers.get(bias_initializer) + self.kernel_regularizer = regularizers.get(kernel_regularizer) + self.bias_regularizer = regularizers.get(bias_regularizer) + self.kernel_constraint = constraints.get(kernel_constraint) + self.bias_constraint = constraints.get(bias_constraint) + self.lora_rank = lora_rank + self.lora_enabled = False + self.input_spec = InputSpec(min_ndim=2) + self.supports_masking = True + + def build(self, input_shape): + input_dim = input_shape[-1] + if self.quantization_mode: + self.quantized_build(input_shape, mode=self.quantization_mode) + if self.quantization_mode != "int8": + # If the layer is quantized to int8, `self._kernel` will be added + # in `self._int8_build`. Therefore, we skip it here. + self._kernel = self.add_weight( + name="kernel", + shape=(input_dim, self.units), + initializer=self.kernel_initializer, + regularizer=self.kernel_regularizer, + constraint=self.kernel_constraint, + ) + if self.use_bias: + self.bias = self.add_weight( + name="bias", + shape=(self.units,), + initializer=self.bias_initializer, + regularizer=self.bias_regularizer, + constraint=self.bias_constraint, + ) + else: + self.bias = None + self.input_spec = InputSpec(min_ndim=2, axes={-1: input_dim}) + self.built = True + if self.lora_rank: + self.enable_lora(self.lora_rank) + + @property + def kernel(self): + if not self.built: + raise AttributeError( + "You must build the layer before accessing `kernel`." + ) + if self.lora_enabled: + return self._kernel + ops.matmul( + self.lora_kernel_a, self.lora_kernel_b + ) + return self._kernel + + def call(self, inputs, training=None): + x = ops.matmul(inputs, self.kernel) + if self.bias is not None: + x = ops.add(x, self.bias) + if self.activation is not None: + x = self.activation(x) + return x + + def compute_output_shape(self, input_shape): + output_shape = list(input_shape) + output_shape[-1] = self.units + return tuple(output_shape) + + def enable_lora( + self, rank, a_initializer="he_uniform", b_initializer="zeros" + ): + if self.kernel_constraint: + raise ValueError( + "Lora is incompatible with kernel constraints. " + "In order to enable lora on this layer, remove the " + "`kernel_constraint` argument." + ) + if not self.built: + raise ValueError( + "Cannot enable lora on a layer that isn't yet built." + ) + if self.lora_enabled: + raise ValueError( + "lora is already enabled. " + "This can only be done once per layer." + ) + self._tracker.unlock() + self.lora_kernel_a = self.add_weight( + name="lora_kernel_a", + shape=(self.kernel.shape[0], rank), + initializer=initializers.get(a_initializer), + regularizer=self.kernel_regularizer, + ) + self.lora_kernel_b = self.add_weight( + name="lora_kernel_b", + shape=(rank, self.kernel.shape[1]), + initializer=initializers.get(b_initializer), + regularizer=self.kernel_regularizer, + ) + self._kernel.trainable = False + self._tracker.lock() + self.lora_enabled = True + self.lora_rank = rank + + def save_own_variables(self, store): + # Do nothing if the layer isn't yet built + if not self.built: + return + # The keys of the `store` will be saved as determined because the + # default ordering will change after quantization + kernel_value, kernel_scale = self._get_kernel_with_merged_lora() + target_variables = [kernel_value] + if self.use_bias: + target_variables.append(self.bias) + if self.quantization_mode is not None: + if self.quantization_mode == "int8": + target_variables.append(kernel_scale) + elif self.quantization_mode == "float8": + target_variables.append(self.inputs_scale) + target_variables.append(self.inputs_amax_history) + target_variables.append(self.kernel_scale) + target_variables.append(self.kernel_amax_history) + target_variables.append(self.outputs_grad_scale) + target_variables.append(self.outputs_grad_amax_history) + else: + raise self._quantization_mode_error(self.quantization_mode) + for i, variable in enumerate(target_variables): + store[str(i)] = variable + + def load_own_variables(self, store): + if not self.lora_enabled: + self._check_load_own_variables(store) + # Do nothing if the layer isn't yet built + if not self.built: + return + # The keys of the `store` will be saved as determined because the + # default ordering will change after quantization + target_variables = [self._kernel] + if self.use_bias: + target_variables.append(self.bias) + if self.quantization_mode is not None: + if self.quantization_mode == "int8": + target_variables.append(self.kernel_scale) + elif self.quantization_mode == "float8": + target_variables.append(self.inputs_scale) + target_variables.append(self.inputs_amax_history) + target_variables.append(self.kernel_scale) + target_variables.append(self.kernel_amax_history) + target_variables.append(self.outputs_grad_scale) + target_variables.append(self.outputs_grad_amax_history) + else: + raise self._quantization_mode_error(self.quantization_mode) + for i, variable in enumerate(target_variables): + variable.assign(store[str(i)]) + if self.lora_enabled: + self.lora_kernel_a.assign(ops.zeros(self.lora_kernel_a.shape)) + self.lora_kernel_b.assign(ops.zeros(self.lora_kernel_b.shape)) + + def get_config(self): + base_config = super().get_config() + config = { + "units": self.units, + "activation": activations.serialize(self.activation), + "use_bias": self.use_bias, + "kernel_initializer": initializers.serialize( + self.kernel_initializer + ), + "bias_initializer": initializers.serialize(self.bias_initializer), + "kernel_regularizer": regularizers.serialize( + self.kernel_regularizer + ), + "bias_regularizer": regularizers.serialize(self.bias_regularizer), + "kernel_constraint": constraints.serialize(self.kernel_constraint), + "bias_constraint": constraints.serialize(self.bias_constraint), + } + if self.lora_rank: + config["lora_rank"] = self.lora_rank + return {**base_config, **config} + + def _check_load_own_variables(self, store): + all_vars = self._trainable_variables + self._non_trainable_variables + if len(store.keys()) != len(all_vars): + if len(all_vars) == 0 and not self.built: + raise ValueError( + f"Layer '{self.name}' was never built " + "and thus it doesn't have any variables. " + f"However the weights file lists {len(store.keys())} " + "variables for this layer.\n" + "In most cases, this error indicates that either:\n\n" + "1. The layer is owned by a parent layer that " + "implements a `build()` method, but calling the " + "parent's `build()` method did NOT create the state of " + f"the child layer '{self.name}'. A `build()` method " + "must create ALL state for the layer, including " + "the state of any children layers.\n\n" + "2. You need to implement " + "the `def build_from_config(self, config)` method " + f"on layer '{self.name}', to specify how to rebuild " + "it during loading. " + "In this case, you might also want to implement the " + "method that generates the build config at saving time, " + "`def get_build_config(self)`. " + "The method `build_from_config()` is meant " + "to create the state " + "of the layer (i.e. its variables) upon deserialization.", + ) + raise ValueError( + f"Layer '{self.name}' expected {len(all_vars)} variables, " + "but received " + f"{len(store.keys())} variables during loading. " + f"Expected: {[v.name for v in all_vars]}" + ) + + # Quantization-related (int8 and float8) methods + + def quantized_build(self, input_shape, mode): + if mode == "int8": + input_dim = input_shape[-1] + kernel_shape = (input_dim, self.units) + self._int8_build(kernel_shape) + elif mode == "float8": + self._float8_build() + else: + raise self._quantization_mode_error(mode) + + def _int8_build( + self, + kernel_shape, + kernel_initializer="zeros", + kernel_scale_initializer="ones", + ): + self.inputs_quantizer = quantizers.AbsMaxQuantizer(axis=-1) + self._kernel = self.add_weight( + name="kernel", + shape=kernel_shape, + initializer=kernel_initializer, + dtype="int8", + trainable=False, + ) + self.kernel_scale = self.add_weight( + name="kernel_scale", + shape=(self.units,), + initializer=kernel_scale_initializer, + trainable=False, + ) + self._is_quantized = True + + def _float8_build(self): + from keras.src.dtype_policies import QuantizedFloat8DTypePolicy + + # If `self.dtype_policy` is not QuantizedFloat8DTypePolicy, then set + # `amax_history_length` to its default value. + amax_history_length = getattr( + self.dtype_policy, + "amax_history_length", + QuantizedFloat8DTypePolicy.default_amax_history_length, + ) + # We set `trainable=True` because we will use the gradients to overwrite + # these variables + scale_kwargs = { + "shape": (), + "initializer": "ones", + "dtype": "float32", # Always be float32 + "trainable": True, + "autocast": False, + } + amax_history_kwargs = { + "shape": (amax_history_length,), + "initializer": "zeros", + "dtype": "float32", # Always be float32 + "trainable": True, + "autocast": False, + } + self.inputs_scale = self.add_weight(name="inputs_scale", **scale_kwargs) + self.inputs_amax_history = self.add_weight( + name="inputs_amax_history", **amax_history_kwargs + ) + self.kernel_scale = self.add_weight(name="kernel_scale", **scale_kwargs) + self.kernel_amax_history = self.add_weight( + name="kernel_amax_history", **amax_history_kwargs + ) + self.outputs_grad_scale = self.add_weight( + name="outputs_grad_scale", **scale_kwargs + ) + self.outputs_grad_amax_history = self.add_weight( + name="outputs_grad_amax_history", **amax_history_kwargs + ) + # We need to set `overwrite_with_gradient=True` to instruct the + # optimizer to directly overwrite these variables with their computed + # gradients during training + self.inputs_scale.overwrite_with_gradient = True + self.inputs_amax_history.overwrite_with_gradient = True + self.kernel_scale.overwrite_with_gradient = True + self.kernel_amax_history.overwrite_with_gradient = True + self.outputs_grad_scale.overwrite_with_gradient = True + self.outputs_grad_amax_history.overwrite_with_gradient = True + self._is_quantized = True + + def _int8_call(self, inputs, training=None): + @ops.custom_gradient + def matmul_with_inputs_gradient(inputs, kernel, kernel_scale): + def grad_fn(*args, upstream=None): + if upstream is None: + (upstream,) = args + float_kernel = ops.divide( + ops.cast(kernel, dtype=self.compute_dtype), + kernel_scale, + ) + inputs_grad = ops.matmul(upstream, ops.transpose(float_kernel)) + return (inputs_grad, None, None) + + inputs, inputs_scale = self.inputs_quantizer(inputs) + x = ops.matmul(inputs, kernel) + # De-scale outputs + x = ops.cast(x, self.compute_dtype) + x = ops.divide(x, ops.multiply(inputs_scale, kernel_scale)) + return x, grad_fn + + x = matmul_with_inputs_gradient( + inputs, + ops.convert_to_tensor(self._kernel), + ops.convert_to_tensor(self.kernel_scale), + ) + if self.lora_enabled: + lora_x = ops.matmul(inputs, self.lora_kernel_a) + lora_x = ops.matmul(lora_x, self.lora_kernel_b) + x = ops.add(x, lora_x) + if self.bias is not None: + x = ops.add(x, self.bias) + if self.activation is not None: + x = self.activation(x) + return x + + def _float8_call(self, inputs, training=None): + if self.lora_enabled: + raise NotImplementedError( + "Currently, `_float8_call` doesn't support LoRA" + ) + + @ops.custom_gradient + def quantized_dequantize_inputs(inputs, scale, amax_history): + if training: + new_scale = quantizers.compute_float8_scale( + ops.max(amax_history, axis=0), + scale, + ops.cast( + float(ml_dtypes.finfo("float8_e4m3fn").max), "float32" + ), + ) + new_amax_history = quantizers.compute_float8_amax_history( + inputs, amax_history + ) + else: + new_scale = None + new_amax_history = None + qdq_inputs = quantizers.quantize_and_dequantize( + inputs, scale, "float8_e4m3fn", self.compute_dtype + ) + + def grad(*args, upstream=None, variables=None): + if upstream is None: + (upstream,) = args + return upstream, new_scale, new_amax_history + + return qdq_inputs, grad + + @ops.custom_gradient + def quantized_dequantize_outputs(outputs, scale, amax_history): + """Quantize-dequantize the output gradient but not the output.""" + + def grad(*args, upstream=None, variables=None): + if upstream is None: + (upstream,) = args + new_scale = quantizers.compute_float8_scale( + ops.max(amax_history, axis=0), + scale, + ops.cast( + float(ml_dtypes.finfo("float8_e5m2").max), "float32" + ), + ) + qdq_upstream = quantizers.quantize_and_dequantize( + upstream, scale, "float8_e5m2", self.compute_dtype + ) + new_amax_history = quantizers.compute_float8_amax_history( + upstream, amax_history + ) + return qdq_upstream, new_scale, new_amax_history + + return outputs, grad + + x = ops.matmul( + quantized_dequantize_inputs( + inputs, + ops.convert_to_tensor(self.inputs_scale), + ops.convert_to_tensor(self.inputs_amax_history), + ), + quantized_dequantize_inputs( + ops.convert_to_tensor(self._kernel), + ops.convert_to_tensor(self.kernel_scale), + ops.convert_to_tensor(self.kernel_amax_history), + ), + ) + # `quantized_dequantize_outputs` is placed immediately after + # `ops.matmul` for the sake of pattern matching in gemm_rewrite. That + # way, the qdq will be adjacent to the corresponding matmul_bprop in the + # bprop. + x = quantized_dequantize_outputs( + x, + ops.convert_to_tensor(self.outputs_grad_scale), + ops.convert_to_tensor(self.outputs_grad_amax_history), + ) + if self.bias is not None: + # Under non-mixed precision cases, F32 bias has to be converted to + # BF16 first to get the biasAdd fusion support. ref. PR + # https://github.com/tensorflow/tensorflow/pull/60306 + bias = self.bias + if self.dtype_policy.compute_dtype == "float32": + bias_bf16 = ops.cast(bias, "bfloat16") + bias = ops.cast(bias_bf16, bias.dtype) + x = ops.add(x, bias) + if self.activation is not None: + x = self.activation(x) + return x + + def quantize(self, mode, type_check=True): + # Prevent quantization of the subclasses + if type_check and (type(self) is not Dense): + raise self._not_implemented_error(self.quantize) + + if mode == "int8": + # Quantize `self._kernel` to int8 and compute corresponding scale + kernel_value, kernel_scale = quantizers.abs_max_quantize( + self._kernel, axis=0, to_numpy=True + ) + kernel_scale = ops.squeeze(kernel_scale, axis=0) + kernel_shape = tuple(self._kernel.shape) + del self._kernel + # Utilize a lambda expression as an initializer to prevent adding a + # large constant to the computation graph. + self._int8_build(kernel_shape, kernel_value, kernel_scale) + elif mode == "float8": + self._float8_build() + else: + raise self._quantization_mode_error(mode) + + # Set new dtype policy + if self.dtype_policy.quantization_mode is None: + policy = dtype_policies.get(f"{mode}_from_{self.dtype_policy.name}") + self.dtype_policy = policy + + def _get_kernel_with_merged_lora(self): + if self.dtype_policy.quantization_mode is not None: + kernel_value = self._kernel + kernel_scale = self.kernel_scale + if self.lora_enabled: + # Dequantize & quantize to merge lora weights into int8 kernel + # Note that this is a lossy compression + kernel_value = ops.divide(kernel_value, kernel_scale) + kernel_value = ops.add( + kernel_value, + ops.matmul(self.lora_kernel_a, self.lora_kernel_b), + ) + kernel_value, kernel_scale = quantizers.abs_max_quantize( + kernel_value, axis=0, to_numpy=True + ) + kernel_scale = ops.squeeze(kernel_scale, axis=0) + return kernel_value, kernel_scale + return self.kernel, None diff --git a/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/core/einsum_dense.py b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/core/einsum_dense.py new file mode 100644 index 0000000000000000000000000000000000000000..1600ae59b62e7d94cf1570b970b001bb6f36865b --- /dev/null +++ b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/core/einsum_dense.py @@ -0,0 +1,1008 @@ +import re +import string + +import ml_dtypes +import numpy as np + +from keras.src import activations +from keras.src import constraints +from keras.src import dtype_policies +from keras.src import initializers +from keras.src import ops +from keras.src import quantizers +from keras.src import regularizers +from keras.src.api_export import keras_export +from keras.src.layers.input_spec import InputSpec +from keras.src.layers.layer import Layer + + +@keras_export("keras.layers.EinsumDense") +class EinsumDense(Layer): + """A layer that uses `einsum` as the backing computation. + + This layer can perform einsum calculations of arbitrary dimensionality. + + Args: + equation: An equation describing the einsum to perform. + This equation must be a valid einsum string of the form + `ab,bc->ac`, `...ab,bc->...ac`, or + `ab...,bc->ac...` where 'ab', 'bc', and 'ac' can be any valid einsum + axis expression sequence. + output_shape: The expected shape of the output tensor + (excluding the batch dimension and any dimensions + represented by ellipses). You can specify `None` for any dimension + that is unknown or can be inferred from the input shape. + activation: Activation function to use. If you don't specify anything, + no activation is applied + (that is, a "linear" activation: `a(x) = x`). + bias_axes: A string containing the output dimension(s) + to apply a bias to. Each character in the `bias_axes` string + should correspond to a character in the output portion + of the `equation` string. + kernel_initializer: Initializer for the `kernel` weights matrix. + bias_initializer: Initializer for the bias vector. + kernel_regularizer: Regularizer function applied to the `kernel` weights + matrix. + bias_regularizer: Regularizer function applied to the bias vector. + kernel_constraint: Constraint function applied to the `kernel` weights + matrix. + bias_constraint: Constraint function applied to the bias vector. + lora_rank: Optional integer. If set, the layer's forward pass + will implement LoRA (Low-Rank Adaptation) + with the provided rank. LoRA sets the layer's kernel + to non-trainable and replaces it with a delta over the + original kernel, obtained via multiplying two lower-rank + trainable matrices + (the factorization happens on the last dimension). + This can be useful to reduce the + computation cost of fine-tuning large dense layers. + You can also enable LoRA on an existing + `EinsumDense` layer by calling `layer.enable_lora(rank)`. + **kwargs: Base layer keyword arguments, such as `name` and `dtype`. + + Examples: + + **Biased dense layer with einsums** + + This example shows how to instantiate a standard Keras dense layer using + einsum operations. This example is equivalent to + `keras.layers.Dense(64, use_bias=True)`. + + >>> layer = keras.layers.EinsumDense("ab,bc->ac", + ... output_shape=64, + ... bias_axes="c") + >>> input_tensor = keras.Input(shape=[32]) + >>> output_tensor = layer(input_tensor) + >>> output_tensor.shape + (None, 64) + + **Applying a dense layer to a sequence** + + This example shows how to instantiate a layer that applies the same dense + operation to every element in a sequence. Here, the `output_shape` has two + values (since there are two non-batch dimensions in the output); the first + dimension in the `output_shape` is `None`, because the sequence dimension + `b` has an unknown shape. + + >>> layer = keras.layers.EinsumDense("abc,cd->abd", + ... output_shape=(None, 64), + ... bias_axes="d") + >>> input_tensor = keras.Input(shape=[32, 128]) + >>> output_tensor = layer(input_tensor) + >>> output_tensor.shape + (None, 32, 64) + + **Applying a dense layer to a sequence using ellipses** + + This example shows how to instantiate a layer that applies the same dense + operation to every element in a sequence, but uses the ellipsis notation + instead of specifying the batch and sequence dimensions. + + Because we are using ellipsis notation and have specified only one axis, the + `output_shape` arg is a single value. When instantiated in this way, the + layer can handle any number of sequence dimensions - including the case + where no sequence dimension exists. + + >>> layer = keras.layers.EinsumDense("...x,xy->...y", + ... output_shape=64, + ... bias_axes="y") + >>> input_tensor = keras.Input(shape=[32, 128]) + >>> output_tensor = layer(input_tensor) + >>> output_tensor.shape + (None, 32, 64) + """ + + def __init__( + self, + equation, + output_shape, + activation=None, + bias_axes=None, + kernel_initializer="glorot_uniform", + bias_initializer="zeros", + kernel_regularizer=None, + bias_regularizer=None, + kernel_constraint=None, + bias_constraint=None, + lora_rank=None, + **kwargs, + ): + super().__init__(**kwargs) + self.equation = equation + if isinstance(output_shape, int): + self.partial_output_shape = (output_shape,) + else: + self.partial_output_shape = tuple(output_shape) + self.bias_axes = bias_axes + self.activation = activations.get(activation) + self.kernel_initializer = initializers.get(kernel_initializer) + self.bias_initializer = initializers.get(bias_initializer) + self.kernel_regularizer = regularizers.get(kernel_regularizer) + self.bias_regularizer = regularizers.get(bias_regularizer) + self.kernel_constraint = constraints.get(kernel_constraint) + self.bias_constraint = constraints.get(bias_constraint) + self.lora_rank = lora_rank + self.lora_enabled = False + + def build(self, input_shape): + shape_data = _analyze_einsum_string( + self.equation, + self.bias_axes, + input_shape, + self.partial_output_shape, + ) + kernel_shape, bias_shape, full_output_shape = shape_data + self.full_output_shape = tuple(full_output_shape) + # `self._int8_build` needs `self.input_spec` + self.input_spec = InputSpec(ndim=len(input_shape)) + # We use `self._dtype_policy` to check to avoid issues in torch dynamo + if self.quantization_mode is not None: + self.quantized_build(input_shape, mode=self.quantization_mode) + if self.quantization_mode != "int8": + # If the layer is quantized to int8, `self._kernel` will be added + # in `self._int8_build`. Therefore, we skip it here. + self._kernel = self.add_weight( + name="kernel", + shape=tuple(kernel_shape), + initializer=self.kernel_initializer, + regularizer=self.kernel_regularizer, + constraint=self.kernel_constraint, + dtype=self.dtype, + trainable=True, + ) + if bias_shape is not None: + self.bias = self.add_weight( + name="bias", + shape=tuple(bias_shape), + initializer=self.bias_initializer, + regularizer=self.bias_regularizer, + constraint=self.bias_constraint, + dtype=self.dtype, + trainable=True, + ) + else: + self.bias = None + self.built = True + if self.lora_rank: + self.enable_lora(self.lora_rank) + + @property + def kernel(self): + if not self.built: + raise AttributeError( + "You must build the layer before accessing `kernel`." + ) + if self.lora_enabled: + return self._kernel + ops.matmul( + self.lora_kernel_a, self.lora_kernel_b + ) + return self._kernel + + def compute_output_shape(self, _): + return self.full_output_shape + + def call(self, inputs, training=None): + x = ops.einsum(self.equation, inputs, self.kernel) + if self.bias is not None: + x += self.bias + if self.activation is not None: + x = self.activation(x) + return x + + def enable_lora( + self, rank, a_initializer="he_uniform", b_initializer="zeros" + ): + if self.kernel_constraint: + raise ValueError( + "Lora is incompatible with kernel constraints. " + "In order to enable lora on this layer, remove the " + "`kernel_constraint` argument." + ) + if not self.built: + raise ValueError( + "Cannot enable lora on a layer that isn't yet built." + ) + if self.lora_enabled: + raise ValueError( + "lora is already enabled. " + "This can only be done once per layer." + ) + self._tracker.unlock() + self.lora_kernel_a = self.add_weight( + name="lora_kernel_a", + shape=(self.kernel.shape[:-1] + (rank,)), + initializer=initializers.get(a_initializer), + regularizer=self.kernel_regularizer, + ) + self.lora_kernel_b = self.add_weight( + name="lora_kernel_b", + shape=(rank, self.kernel.shape[-1]), + initializer=initializers.get(b_initializer), + regularizer=self.kernel_regularizer, + ) + self._kernel.trainable = False + self._tracker.lock() + self.lora_enabled = True + self.lora_rank = rank + + def save_own_variables(self, store): + # Do nothing if the layer isn't yet built + if not self.built: + return + # The keys of the `store` will be saved as determined because the + # default ordering will change after quantization + kernel_value, kernel_scale = self._get_kernel_with_merged_lora() + target_variables = [kernel_value] + if self.bias is not None: + target_variables.append(self.bias) + if self.quantization_mode is not None: + if self.quantization_mode == "int8": + target_variables.append(kernel_scale) + elif self.quantization_mode == "float8": + target_variables.append(self.inputs_scale) + target_variables.append(self.inputs_amax_history) + target_variables.append(self.kernel_scale) + target_variables.append(self.kernel_amax_history) + target_variables.append(self.outputs_grad_scale) + target_variables.append(self.outputs_grad_amax_history) + else: + raise self._quantization_mode_error(self.quantization_mode) + for i, variable in enumerate(target_variables): + store[str(i)] = variable + + def load_own_variables(self, store): + if not self.lora_enabled: + self._check_load_own_variables(store) + # Do nothing if the layer isn't yet built + if not self.built: + return + # The keys of the `store` will be saved as determined because the + # default ordering will change after quantization + target_variables = [self._kernel] + if self.bias is not None: + target_variables.append(self.bias) + if self.quantization_mode is not None: + if self.quantization_mode == "int8": + target_variables.append(self.kernel_scale) + elif self.quantization_mode == "float8": + target_variables.append(self.inputs_scale) + target_variables.append(self.inputs_amax_history) + target_variables.append(self.kernel_scale) + target_variables.append(self.kernel_amax_history) + target_variables.append(self.outputs_grad_scale) + target_variables.append(self.outputs_grad_amax_history) + else: + raise self._quantization_mode_error(self.quantization_mode) + for i, variable in enumerate(target_variables): + variable.assign(store[str(i)]) + if self.lora_enabled: + self.lora_kernel_a.assign(ops.zeros(self.lora_kernel_a.shape)) + self.lora_kernel_b.assign(ops.zeros(self.lora_kernel_b.shape)) + + def get_config(self): + base_config = super().get_config() + config = { + "output_shape": self.partial_output_shape, + "equation": self.equation, + "activation": activations.serialize(self.activation), + "bias_axes": self.bias_axes, + "kernel_initializer": initializers.serialize( + self.kernel_initializer + ), + "bias_initializer": initializers.serialize(self.bias_initializer), + "kernel_regularizer": regularizers.serialize( + self.kernel_regularizer + ), + "bias_regularizer": regularizers.serialize(self.bias_regularizer), + "activity_regularizer": regularizers.serialize( + self.activity_regularizer + ), + "kernel_constraint": constraints.serialize(self.kernel_constraint), + "bias_constraint": constraints.serialize(self.bias_constraint), + } + if self.lora_rank: + config["lora_rank"] = self.lora_rank + return {**base_config, **config} + + def _check_load_own_variables(self, store): + all_vars = self._trainable_variables + self._non_trainable_variables + if len(store.keys()) != len(all_vars): + if len(all_vars) == 0 and not self.built: + raise ValueError( + f"Layer '{self.name}' was never built " + "and thus it doesn't have any variables. " + f"However the weights file lists {len(store.keys())} " + "variables for this layer.\n" + "In most cases, this error indicates that either:\n\n" + "1. The layer is owned by a parent layer that " + "implements a `build()` method, but calling the " + "parent's `build()` method did NOT create the state of " + f"the child layer '{self.name}'. A `build()` method " + "must create ALL state for the layer, including " + "the state of any children layers.\n\n" + "2. You need to implement " + "the `def build_from_config(self, config)` method " + f"on layer '{self.name}', to specify how to rebuild " + "it during loading. " + "In this case, you might also want to implement the " + "method that generates the build config at saving time, " + "`def get_build_config(self)`. " + "The method `build_from_config()` is meant " + "to create the state " + "of the layer (i.e. its variables) upon deserialization.", + ) + raise ValueError( + f"Layer '{self.name}' expected {len(all_vars)} variables, " + "but received " + f"{len(store.keys())} variables during loading. " + f"Expected: {[v.name for v in all_vars]}" + ) + + # Quantization-related (int8 and float8) methods + + def quantized_build(self, input_shape, mode): + if mode == "int8": + shape_data = _analyze_einsum_string( + self.equation, + self.bias_axes, + input_shape, + self.partial_output_shape, + ) + kernel_shape, _, _ = shape_data + self._int8_build(kernel_shape) + elif mode == "float8": + self._float8_build() + else: + raise self._quantization_mode_error(mode) + + def _int8_build( + self, + kernel_shape, + kernel_initializer="zeros", + kernel_scale_initializer="ones", + ): + ( + self._input_reduced_axes, + self._kernel_reduced_axes, + self._input_transpose_axes, + self._kernel_transpose_axes, + self._input_expand_axes, + self._kernel_expand_axes, + self._input_squeeze_axes, + self._kernel_squeeze_axes, + self._custom_gradient_equation, + self._kernel_reverse_transpose_axes, + ) = _analyze_quantization_info(self.equation, self.input_spec.ndim) + self.inputs_quantizer = quantizers.AbsMaxQuantizer( + axis=self._input_reduced_axes + ) + self._kernel = self.add_weight( + name="kernel", + shape=kernel_shape, + initializer=kernel_initializer, + dtype="int8", + trainable=False, + ) + kernel_scale_shape = np.array(kernel_shape) + kernel_scale_shape[self._kernel_reduced_axes] = 1 + kernel_scale_shape = kernel_scale_shape[self._kernel_transpose_axes] + kernel_scale_shape = kernel_scale_shape.tolist() + for a in sorted(self._kernel_expand_axes): + kernel_scale_shape.insert(a, 1) + for a in sorted(self._kernel_squeeze_axes, reverse=True): + kernel_scale_shape.pop(a) + self.kernel_scale = self.add_weight( + name="kernel_scale", + shape=kernel_scale_shape, + initializer=kernel_scale_initializer, + trainable=False, + ) + self._is_quantized = True + + def _float8_build(self): + from keras.src.dtype_policies import QuantizedFloat8DTypePolicy + + # If `self.dtype_policy` is not QuantizedFloat8DTypePolicy, then set + # `amax_history_length` to its default value. + amax_history_length = getattr( + self.dtype_policy, + "amax_history_length", + QuantizedFloat8DTypePolicy.default_amax_history_length, + ) + # We set `trainable=True` because we will use the gradients to overwrite + # these variables + scale_kwargs = { + "shape": (), + "initializer": "ones", + "dtype": "float32", # Always be float32 + "trainable": True, + "autocast": False, + } + amax_history_kwargs = { + "shape": (amax_history_length,), + "initializer": "zeros", + "dtype": "float32", # Always be float32 + "trainable": True, + "autocast": False, + } + self.inputs_scale = self.add_weight(name="inputs_scale", **scale_kwargs) + self.inputs_amax_history = self.add_weight( + name="inputs_amax_history", **amax_history_kwargs + ) + self.kernel_scale = self.add_weight(name="kernel_scale", **scale_kwargs) + self.kernel_amax_history = self.add_weight( + name="kernel_amax_history", **amax_history_kwargs + ) + self.outputs_grad_scale = self.add_weight( + name="outputs_grad_scale", **scale_kwargs + ) + self.outputs_grad_amax_history = self.add_weight( + name="outputs_grad_amax_history", **amax_history_kwargs + ) + # We need to set `overwrite_with_gradient=True` to instruct the + # optimizer to directly overwrite these variables with their computed + # gradients during training + self.inputs_scale.overwrite_with_gradient = True + self.inputs_amax_history.overwrite_with_gradient = True + self.kernel_scale.overwrite_with_gradient = True + self.kernel_amax_history.overwrite_with_gradient = True + self.outputs_grad_scale.overwrite_with_gradient = True + self.outputs_grad_amax_history.overwrite_with_gradient = True + self._is_quantized = True + + def _int8_call(self, inputs, training=None): + @ops.custom_gradient + def einsum_with_inputs_gradient(inputs, kernel, kernel_scale): + def grad_fn(*args, upstream=None): + if upstream is None: + (upstream,) = args + # De-scale kernel + _kernel_scale = kernel_scale # Overcome UnboundLocalError + if self._kernel_squeeze_axes: + _kernel_scale = ops.expand_dims( + _kernel_scale, axis=self._kernel_squeeze_axes + ) + if self._kernel_expand_axes: + _kernel_scale = ops.squeeze( + _kernel_scale, axis=self._kernel_expand_axes + ) + _kernel_scale = ops.transpose( + _kernel_scale, self._kernel_reverse_transpose_axes + ) + float_kernel = ops.divide( + ops.cast(kernel, dtype=self.compute_dtype), + _kernel_scale, + ) + # From https://stackoverflow.com/a/47609896 + inputs_grad = ops.einsum( + self._custom_gradient_equation, upstream, float_kernel + ) + return (inputs_grad, None, None) + + inputs, inputs_scale = self.inputs_quantizer(inputs) + x = ops.einsum(self.equation, inputs, kernel) + # Deal with `inputs_scale` + inputs_scale = ops.transpose( + inputs_scale, self._input_transpose_axes + ) + if self._input_expand_axes: + inputs_scale = ops.expand_dims( + inputs_scale, axis=self._input_expand_axes + ) + if self._input_squeeze_axes: + inputs_scale = ops.squeeze( + inputs_scale, axis=self._input_squeeze_axes + ) + # De-scale outputs + x = ops.cast(x, self.compute_dtype) + x = ops.divide(x, ops.multiply(inputs_scale, kernel_scale)) + return x, grad_fn + + x = einsum_with_inputs_gradient( + inputs, + ops.convert_to_tensor(self._kernel), + ops.convert_to_tensor(self.kernel_scale), + ) + if self.lora_enabled: + lora_x = ops.einsum(self.equation, inputs, self.lora_kernel_a) + lora_x = ops.matmul(lora_x, self.lora_kernel_b) + x = ops.add(x, lora_x) + if self.bias is not None: + x += self.bias + if self.activation is not None: + x = self.activation(x) + return x + + def _float8_call(self, inputs, training=None): + if self.lora_enabled: + raise NotImplementedError( + "Currently, `_float8_call` doesn't support LoRA" + ) + + @ops.custom_gradient + def quantized_dequantize_inputs(inputs, scale, amax_history): + if training: + new_scale = quantizers.compute_float8_scale( + ops.max(amax_history, axis=0), + scale, + ops.cast( + float(ml_dtypes.finfo("float8_e4m3fn").max), "float32" + ), + ) + new_amax_history = quantizers.compute_float8_amax_history( + inputs, amax_history + ) + else: + new_scale = None + new_amax_history = None + qdq_inputs = quantizers.quantize_and_dequantize( + inputs, scale, "float8_e4m3fn", self.compute_dtype + ) + + def grad(*args, upstream=None, variables=None): + if upstream is None: + (upstream,) = args + return upstream, new_scale, new_amax_history + + return qdq_inputs, grad + + @ops.custom_gradient + def quantized_dequantize_outputs(outputs, scale, amax_history): + """Quantize-dequantize the output gradient but not the output.""" + + def grad(*args, upstream=None, variables=None): + if upstream is None: + (upstream,) = args + new_scale = quantizers.compute_float8_scale( + ops.max(amax_history, axis=0), + scale, + ops.cast( + float(ml_dtypes.finfo("float8_e5m2").max), "float32" + ), + ) + qdq_upstream = quantizers.quantize_and_dequantize( + upstream, scale, "float8_e5m2", self.compute_dtype + ) + new_amax_history = quantizers.compute_float8_amax_history( + upstream, amax_history + ) + return qdq_upstream, new_scale, new_amax_history + + return outputs, grad + + x = ops.einsum( + self.equation, + quantized_dequantize_inputs( + inputs, + ops.convert_to_tensor(self.inputs_scale), + ops.convert_to_tensor(self.inputs_amax_history), + ), + quantized_dequantize_inputs( + ops.convert_to_tensor(self._kernel), + ops.convert_to_tensor(self.kernel_scale), + ops.convert_to_tensor(self.kernel_amax_history), + ), + ) + # `quantized_dequantize_outputs` is placed immediately after + # `ops.einsum` for the sake of pattern matching in gemm_rewrite. That + # way, the qdq will be adjacent to the corresponding einsum_bprop in the + # bprop. + x = quantized_dequantize_outputs( + x, + ops.convert_to_tensor(self.outputs_grad_scale), + ops.convert_to_tensor(self.outputs_grad_amax_history), + ) + if self.bias is not None: + # Under non-mixed precision cases, F32 bias has to be converted to + # BF16 first to get the biasAdd fusion support. ref. PR + # https://github.com/tensorflow/tensorflow/pull/60306 + bias = self.bias + if self.dtype_policy.compute_dtype == "float32": + bias_bf16 = ops.cast(bias, "bfloat16") + bias = ops.cast(bias_bf16, bias.dtype) + x = ops.add(x, bias) + if self.activation is not None: + x = self.activation(x) + return x + + def quantize(self, mode, type_check=True): + # Prevent quantization of the subclasses + if type_check and (type(self) is not EinsumDense): + raise self._not_implemented_error(self.quantize) + + if mode == "int8": + ( + self._input_reduced_axes, + self._kernel_reduced_axes, + self._input_transpose_axes, + self._kernel_transpose_axes, + self._input_expand_axes, + self._kernel_expand_axes, + self._input_squeeze_axes, + self._kernel_squeeze_axes, + self._custom_gradient_equation, + self._kernel_reverse_transpose_axes, + ) = _analyze_quantization_info(self.equation, self.input_spec.ndim) + # Quantize `self._kernel` to int8 and compute corresponding scale + kernel_value, kernel_scale = quantizers.abs_max_quantize( + self._kernel, axis=self._kernel_reduced_axes, to_numpy=True + ) + kernel_scale = ops.transpose( + kernel_scale, self._kernel_transpose_axes + ) + if self._kernel_expand_axes: + kernel_scale = ops.expand_dims( + kernel_scale, axis=self._kernel_expand_axes + ) + if self._kernel_squeeze_axes: + kernel_scale = ops.squeeze( + kernel_scale, axis=self._kernel_squeeze_axes + ) + kernel_shape = tuple(self._kernel.shape) + del self._kernel + # Utilize a lambda expression as an initializer to prevent adding a + # large constant to the computation graph. + self._int8_build(kernel_shape, kernel_value, kernel_scale) + elif mode == "float8": + self._float8_build() + else: + raise self._quantization_mode_error(mode) + + # Set new dtype policy + if self.dtype_policy.quantization_mode is None: + policy = dtype_policies.get(f"{mode}_from_{self.dtype_policy.name}") + self.dtype_policy = policy + + def _get_kernel_with_merged_lora(self): + if self.dtype_policy.quantization_mode is not None: + kernel_value = self._kernel + kernel_scale = self.kernel_scale + if self.lora_enabled: + # Dequantize & quantize to merge lora weights into int8 kernel + # Note that this is a lossy compression + if self._kernel_squeeze_axes: + kernel_scale = ops.expand_dims( + kernel_scale, axis=self._kernel_squeeze_axes + ) + if self._kernel_expand_axes: + kernel_scale = ops.squeeze( + kernel_scale, axis=self._kernel_expand_axes + ) + if self._kernel_transpose_axes: + + def _argsort(seq): + # Ref: https://stackoverflow.com/a/3382369 + return sorted(range(len(seq)), key=seq.__getitem__) + + reverse_transpose = _argsort(self._kernel_transpose_axes) + kernel_scale = ops.transpose( + kernel_scale, axes=reverse_transpose + ) + kernel_value = ops.divide(kernel_value, kernel_scale) + kernel_value = ops.add( + kernel_value, + ops.matmul(self.lora_kernel_a, self.lora_kernel_b), + ) + kernel_value, kernel_scale = quantizers.abs_max_quantize( + kernel_value, axis=self._kernel_reduced_axes, to_numpy=True + ) + kernel_scale = ops.transpose( + kernel_scale, self._kernel_transpose_axes + ) + if self._kernel_expand_axes: + kernel_scale = ops.expand_dims( + kernel_scale, axis=self._kernel_expand_axes + ) + if self._kernel_squeeze_axes: + kernel_scale = ops.squeeze( + kernel_scale, axis=self._kernel_squeeze_axes + ) + else: + kernel_value = self.kernel + kernel_scale = None + return kernel_value, kernel_scale + + +def _analyze_einsum_string(equation, bias_axes, input_shape, output_shape): + """Analyzes an einsum string to determine the required weight shape.""" + + dot_replaced_string = re.sub(r"\.\.\.", "0", equation) + + # This is the case where no ellipses are present in the string. + split_string = re.match( + "([a-zA-Z]+),([a-zA-Z]+)->([a-zA-Z]+)", dot_replaced_string + ) + if split_string: + return _analyze_split_string( + split_string, bias_axes, input_shape, output_shape + ) + + # This is the case where ellipses are present on the left. + split_string = re.match( + "0([a-zA-Z]+),([a-zA-Z]+)->0([a-zA-Z]+)", dot_replaced_string + ) + if split_string: + return _analyze_split_string( + split_string, bias_axes, input_shape, output_shape, left_elided=True + ) + + # This is the case where ellipses are present on the right. + split_string = re.match( + "([a-zA-Z]{2,})0,([a-zA-Z]+)->([a-zA-Z]+)0", dot_replaced_string + ) + if split_string: + return _analyze_split_string( + split_string, bias_axes, input_shape, output_shape + ) + + raise ValueError( + f"Invalid einsum equation '{equation}'. Equations must be in the form " + "[X],[Y]->[Z], ...[X],[Y]->...[Z], or [X]...,[Y]->[Z]...." + ) + + +def _analyze_split_string( + split_string, bias_axes, input_shape, output_shape, left_elided=False +): + """Analyze an pre-split einsum string to find the weight shape.""" + input_spec = split_string.group(1) + weight_spec = split_string.group(2) + output_spec = split_string.group(3) + elided = len(input_shape) - len(input_spec) + + if isinstance(output_shape, int): + output_shape = [output_shape] + else: + output_shape = list(output_shape) + + output_shape.insert(0, input_shape[0]) + + if elided > 0 and left_elided: + for i in range(1, elided): + # We already inserted the 0th input dimension at dim 0, so we need + # to start at location 1 here. + output_shape.insert(1, input_shape[i]) + elif elided > 0 and not left_elided: + for i in range(len(input_shape) - elided, len(input_shape)): + output_shape.append(input_shape[i]) + + if left_elided: + # If we have beginning dimensions elided, we need to use negative + # indexing to determine where in the input dimension our values are. + input_dim_map = { + dim: (i + elided) - len(input_shape) + for i, dim in enumerate(input_spec) + } + # Because we've constructed the full output shape already, we don't need + # to do negative indexing. + output_dim_map = { + dim: (i + elided) for i, dim in enumerate(output_spec) + } + else: + input_dim_map = {dim: i for i, dim in enumerate(input_spec)} + output_dim_map = {dim: i for i, dim in enumerate(output_spec)} + + for dim in input_spec: + input_shape_at_dim = input_shape[input_dim_map[dim]] + if dim in output_dim_map: + output_shape_at_dim = output_shape[output_dim_map[dim]] + if ( + output_shape_at_dim is not None + and output_shape_at_dim != input_shape_at_dim + ): + raise ValueError( + "Input shape and output shape do not match at shared " + f"dimension '{dim}'. Input shape is {input_shape_at_dim}, " + "and output shape " + f"is {output_shape[output_dim_map[dim]]}." + ) + + for dim in output_spec: + if dim not in input_spec and dim not in weight_spec: + raise ValueError( + f"Dimension '{dim}' was specified in the output " + f"'{output_spec}' but has no corresponding dim in the input " + f"spec '{input_spec}' or weight spec '{output_spec}'" + ) + + weight_shape = [] + for dim in weight_spec: + if dim in input_dim_map: + weight_shape.append(input_shape[input_dim_map[dim]]) + elif dim in output_dim_map: + weight_shape.append(output_shape[output_dim_map[dim]]) + else: + raise ValueError( + f"Weight dimension '{dim}' did not have a match in either " + f"the input spec '{input_spec}' or the output " + f"spec '{output_spec}'. For this layer, the weight must " + "be fully specified." + ) + + if bias_axes is not None: + num_left_elided = elided if left_elided else 0 + idx_map = { + char: output_shape[i + num_left_elided] + for i, char in enumerate(output_spec) + } + + for char in bias_axes: + if char not in output_spec: + raise ValueError( + f"Bias dimension '{char}' was requested, but is not part " + f"of the output spec '{output_spec}'" + ) + + first_bias_location = min( + [output_spec.find(char) for char in bias_axes] + ) + bias_output_spec = output_spec[first_bias_location:] + + bias_shape = [ + idx_map[char] if char in bias_axes else 1 + for char in bias_output_spec + ] + + if not left_elided: + for _ in range(elided): + bias_shape.append(1) + else: + bias_shape = None + + return weight_shape, bias_shape, output_shape + + +def _analyze_quantization_info(equation, input_shape): + def get_specs(equation, input_shape): + possible_labels = string.ascii_letters + dot_replaced_string = re.sub(r"\.\.\.", "0", equation) + + # This is the case where no ellipses are present in the string. + split_string = re.match( + "([a-zA-Z]+),([a-zA-Z]+)->([a-zA-Z]+)", dot_replaced_string + ) + if split_string is not None: + input_spec = split_string.group(1) + weight_spec = split_string.group(2) + output_spec = split_string.group(3) + return input_spec, weight_spec, output_spec + + # This is the case where ellipses are present on the left. + split_string = re.match( + "0([a-zA-Z]+),([a-zA-Z]+)->0([a-zA-Z]+)", dot_replaced_string + ) + if split_string is not None: + input_spec = split_string.group(1) + weight_spec = split_string.group(2) + output_spec = split_string.group(3) + elided = len(input_shape) - len(input_spec) + possible_labels = sorted( + set(possible_labels) + - set(input_spec) + - set(weight_spec) + - set(output_spec) + ) + # Pad labels on the left to `input_spec` and `output_spec` + for i in range(elided): + input_spec = possible_labels[i] + input_spec + output_spec = possible_labels[i] + output_spec + return input_spec, weight_spec, output_spec + + # This is the case where ellipses are present on the right. + split_string = re.match( + "([a-zA-Z]{2,})0,([a-zA-Z]+)->([a-zA-Z]+)0", dot_replaced_string + ) + if split_string is not None: + input_spec = split_string.group(1) + weight_spec = split_string.group(2) + output_spec = split_string.group(3) + elided = len(input_shape) - len(input_spec) + possible_labels = sorted( + set(possible_labels) + - set(input_spec) + - set(weight_spec) + - set(output_spec) + ) + # Pad labels on the right to `input_spec` and `output_spec` + for i in range(elided): + input_spec = input_spec + possible_labels[i] + output_spec = output_spec + possible_labels[i] + return input_spec, weight_spec, output_spec + + raise ValueError( + f"Invalid einsum equation '{equation}'. Equations must be in the " + "form [X],[Y]->[Z], ...[X],[Y]->...[Z], or [X]...,[Y]->[Z]...." + ) + + input_spec, weight_spec, output_spec = get_specs(equation, input_shape) + + # Determine the axes that should be reduced by the quantizer + input_reduced_axes = [] + weight_reduced_axes = [] + for i, label in enumerate(input_spec): + index = output_spec.find(label) + if index == -1: + input_reduced_axes.append(i) + for i, label in enumerate(weight_spec): + index = output_spec.find(label) + if index == -1: + weight_reduced_axes.append(i) + + # Determine the axes of `ops.expand_dims` + input_expand_axes = [] + weight_expand_axes = [] + for i, label in enumerate(output_spec): + index_input = input_spec.find(label) + index_weight = weight_spec.find(label) + if index_input == -1: + input_expand_axes.append(i) + if index_weight == -1: + weight_expand_axes.append(i) + + # Determine the axes of `ops.transpose` + input_transpose_axes = [] + weight_transpose_axes = [] + for i, label in enumerate(output_spec): + index_input = input_spec.find(label) + index_weight = weight_spec.find(label) + if index_input != -1: + input_transpose_axes.append(index_input) + if index_weight != -1: + weight_transpose_axes.append(index_weight) + # Postprocess the information: + # 1. Add dummy axes (1) to transpose_axes + # 2. Add axis to squeeze_axes if 1. failed + input_squeeze_axes = [] + weight_squeeze_axes = [] + for ori_index in input_reduced_axes: + try: + index = input_expand_axes.pop(0) + except IndexError: + input_squeeze_axes.append(ori_index) + input_transpose_axes.insert(index, ori_index) + for ori_index in weight_reduced_axes: + try: + index = weight_expand_axes.pop(0) + except IndexError: + weight_squeeze_axes.append(ori_index) + weight_transpose_axes.insert(index, ori_index) + # Prepare equation for `einsum_with_inputs_gradient` + custom_gradient_equation = f"{output_spec},{weight_spec}->{input_spec}" + weight_reverse_transpose_axes = [ + i + for (_, i) in sorted( + (v, i) for (i, v) in enumerate(weight_transpose_axes) + ) + ] + return ( + input_reduced_axes, + weight_reduced_axes, + input_transpose_axes, + weight_transpose_axes, + input_expand_axes, + weight_expand_axes, + input_squeeze_axes, + weight_squeeze_axes, + custom_gradient_equation, + weight_reverse_transpose_axes, + ) diff --git a/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/core/embedding.py b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/core/embedding.py new file mode 100644 index 0000000000000000000000000000000000000000..38ced7194a4b391cb0d54862a88b73f3e63db3dc --- /dev/null +++ b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/core/embedding.py @@ -0,0 +1,391 @@ +import warnings + +from keras.src import backend +from keras.src import constraints +from keras.src import dtype_policies +from keras.src import initializers +from keras.src import ops +from keras.src import quantizers +from keras.src import regularizers +from keras.src.api_export import keras_export +from keras.src.layers.layer import Layer + + +@keras_export("keras.layers.Embedding") +class Embedding(Layer): + """Turns nonnegative integers (indexes) into dense vectors of fixed size. + + e.g. `[[4], [20]] -> [[0.25, 0.1], [0.6, -0.2]]` + + This layer can only be used on nonnegative integer inputs of a fixed range. + + Example: + + >>> model = keras.Sequential() + >>> model.add(keras.layers.Embedding(1000, 64)) + >>> # The model will take as input an integer matrix of size (batch, + >>> # input_length), and the largest integer (i.e. word index) in the input + >>> # should be no larger than 999 (vocabulary size). + >>> # Now model.output_shape is (None, 10, 64), where `None` is the batch + >>> # dimension. + >>> input_array = np.random.randint(1000, size=(32, 10)) + >>> model.compile('rmsprop', 'mse') + >>> output_array = model.predict(input_array) + >>> print(output_array.shape) + (32, 10, 64) + + Args: + input_dim: Integer. Size of the vocabulary, + i.e. maximum integer index + 1. + output_dim: Integer. Dimension of the dense embedding. + embeddings_initializer: Initializer for the `embeddings` + matrix (see `keras.initializers`). + embeddings_regularizer: Regularizer function applied to + the `embeddings` matrix (see `keras.regularizers`). + embeddings_constraint: Constraint function applied to + the `embeddings` matrix (see `keras.constraints`). + mask_zero: Boolean, whether or not the input value 0 is a special + "padding" value that should be masked out. + This is useful when using recurrent layers which + may take variable length input. If this is `True`, + then all subsequent layers in the model need + to support masking or an exception will be raised. + If `mask_zero` is set to `True`, as a consequence, + index 0 cannot be used in the vocabulary (`input_dim` should + equal size of vocabulary + 1). + weights: Optional floating-point matrix of size + `(input_dim, output_dim)`. The initial embeddings values + to use. + lora_rank: Optional integer. If set, the layer's forward pass + will implement LoRA (Low-Rank Adaptation) + with the provided rank. LoRA sets the layer's embeddings + matrix to non-trainable and replaces it with a delta over the + original matrix, obtained via multiplying two lower-rank + trainable matrices. This can be useful to reduce the + computation cost of fine-tuning large embedding layers. + You can also enable LoRA on an existing + `Embedding` layer by calling `layer.enable_lora(rank)`. + + Input shape: + 2D tensor with shape: `(batch_size, input_length)`. + + Output shape: + 3D tensor with shape: `(batch_size, input_length, output_dim)`. + """ + + def __init__( + self, + input_dim, + output_dim, + embeddings_initializer="uniform", + embeddings_regularizer=None, + embeddings_constraint=None, + mask_zero=False, + weights=None, + lora_rank=None, + **kwargs, + ): + input_length = kwargs.pop("input_length", None) + if input_length is not None: + warnings.warn( + "Argument `input_length` is deprecated. Just remove it." + ) + super().__init__(**kwargs) + self.input_dim = input_dim + self.output_dim = output_dim + self.embeddings_initializer = initializers.get(embeddings_initializer) + self.embeddings_regularizer = regularizers.get(embeddings_regularizer) + self.embeddings_constraint = constraints.get(embeddings_constraint) + self.mask_zero = mask_zero + self.supports_masking = mask_zero + self.autocast = False + self.lora_rank = lora_rank + self.lora_enabled = False + + if weights is not None: + self.build() + if not (isinstance(weights, list) and len(weights) == 1): + weights = [weights] + self.set_weights(weights) + + def build(self, input_shape=None): + if self.built: + return + if self.quantization_mode is not None: + self.quantized_build(input_shape, mode=self.quantization_mode) + if self.quantization_mode != "int8": + self._embeddings = self.add_weight( + shape=(self.input_dim, self.output_dim), + initializer=self.embeddings_initializer, + name="embeddings", + regularizer=self.embeddings_regularizer, + constraint=self.embeddings_constraint, + trainable=True, + ) + self.built = True + if self.lora_rank: + self.enable_lora(self.lora_rank) + + @property + def embeddings(self): + if self.lora_enabled: + return self._embeddings + ops.matmul( + self.lora_embeddings_a, self.lora_embeddings_b + ) + return self._embeddings + + def call(self, inputs): + if inputs.dtype != "int32" and inputs.dtype != "int64": + inputs = ops.cast(inputs, "int32") + outputs = ops.take(self.embeddings, inputs, axis=0) + return ops.cast(outputs, dtype=self.compute_dtype) + + def compute_mask(self, inputs, mask=None): + if not self.mask_zero: + return None + return ops.not_equal(inputs, 0) + + def compute_output_shape(self, input_shape): + return (*input_shape, self.output_dim) + + def enable_lora( + self, rank, a_initializer="he_uniform", b_initializer="zeros" + ): + if self.embeddings_constraint: + raise ValueError( + "Lora is incompatible with embedding constraints. " + "In order to enable lora on this layer, remove the " + "`embeddings_constraint` argument." + ) + if not self.built: + raise ValueError( + "Cannot enable lora on a layer that isn't yet built." + ) + if self.lora_enabled: + raise ValueError( + "lora is already enabled. " + "This can only be done once per layer." + ) + self._tracker.unlock() + self.lora_embeddings_a = self.add_weight( + name="lora_embeddings_a", + shape=(self.embeddings.shape[0], rank), + initializer=initializers.get(a_initializer), + regularizer=self.embeddings_regularizer, + ) + self.lora_embeddings_b = self.add_weight( + name="lora_embeddings_b", + shape=(rank, self.embeddings.shape[1]), + initializer=initializers.get(b_initializer), + regularizer=self.embeddings_regularizer, + ) + self.embeddings.trainable = False + self._tracker.lock() + self.lora_enabled = True + self.lora_rank = rank + + def save_own_variables(self, store): + # Do nothing if the layer isn't yet built + if not self.built: + return + # The keys of the `store` will be saved as determined because the + # default ordering will change after quantization + embeddings_value, embeddings_scale = ( + self._get_embeddings_with_merged_lora() + ) + target_variables = [embeddings_value] + if self.quantization_mode is not None: + if self.quantization_mode == "int8": + target_variables.append(embeddings_scale) + else: + raise self._quantization_mode_error(self.quantization_mode) + for i, variable in enumerate(target_variables): + store[str(i)] = variable + + def load_own_variables(self, store): + if not self.lora_enabled: + self._check_load_own_variables(store) + # Do nothing if the layer isn't yet built + if not self.built: + return + # The keys of the `store` will be saved as determined because the + # default ordering will change after quantization + target_variables = [self._embeddings] + if self.quantization_mode is not None: + if self.quantization_mode == "int8": + target_variables.append(self.embeddings_scale) + else: + raise self._quantization_mode_error(self.quantization_mode) + for i, variable in enumerate(target_variables): + variable.assign(store[str(i)]) + if self.lora_enabled: + self.lora_embeddings_a.assign( + ops.zeros(self.lora_embeddings_a.shape) + ) + self.lora_embeddings_b.assign( + ops.zeros(self.lora_embeddings_b.shape) + ) + + def get_config(self): + base_config = super().get_config() + config = { + "input_dim": self.input_dim, + "output_dim": self.output_dim, + "embeddings_initializer": initializers.serialize( + self.embeddings_initializer + ), + "embeddings_regularizer": regularizers.serialize( + self.embeddings_regularizer + ), + "activity_regularizer": regularizers.serialize( + self.activity_regularizer + ), + "embeddings_constraint": constraints.serialize( + self.embeddings_constraint + ), + "mask_zero": self.mask_zero, + } + if self.lora_rank: + config["lora_rank"] = self.lora_rank + return {**base_config, **config} + + def _check_load_own_variables(self, store): + all_vars = self._trainable_variables + self._non_trainable_variables + if len(store.keys()) != len(all_vars): + if len(all_vars) == 0 and not self.built: + raise ValueError( + f"Layer '{self.name}' was never built " + "and thus it doesn't have any variables. " + f"However the weights file lists {len(store.keys())} " + "variables for this layer.\n" + "In most cases, this error indicates that either:\n\n" + "1. The layer is owned by a parent layer that " + "implements a `build()` method, but calling the " + "parent's `build()` method did NOT create the state of " + f"the child layer '{self.name}'. A `build()` method " + "must create ALL state for the layer, including " + "the state of any children layers.\n\n" + "2. You need to implement " + "the `def build_from_config(self, config)` method " + f"on layer '{self.name}', to specify how to rebuild " + "it during loading. " + "In this case, you might also want to implement the " + "method that generates the build config at saving time, " + "`def get_build_config(self)`. " + "The method `build_from_config()` is meant " + "to create the state " + "of the layer (i.e. its variables) upon deserialization.", + ) + raise ValueError( + f"Layer '{self.name}' expected {len(all_vars)} variables, " + "but received " + f"{len(store.keys())} variables during loading. " + f"Expected: {[v.name for v in all_vars]}" + ) + + """Quantization-related (int8) methods""" + + def _quantization_mode_error(self, mode): + return NotImplementedError( + "Invalid quantization mode. Expected 'int8'. " + f"Received: quantization_mode={mode}" + ) + + def quantized_build(self, input_shape, mode): + if mode == "int8": + self._int8_build() + else: + raise self._quantization_mode_error(mode) + + def _int8_build( + self, + embeddings_initializer="zeros", + embeddings_scale_initializer="ones", + ): + self._embeddings = self.add_weight( + name="embeddings", + shape=(self.input_dim, self.output_dim), + initializer=embeddings_initializer, + dtype="int8", + trainable=False, + ) + # We choose to reduce the axis of `output_dim` because, typically, + # `input_dim` is larger than `output_dim`. This reduces quantization + # error. + self.embeddings_scale = self.add_weight( + name="embeddings_scale", + shape=(self.input_dim,), + initializer=embeddings_scale_initializer, + trainable=False, + ) + self._is_quantized = True + + def quantized_call(self, *args, **kwargs): + if self.quantization_mode != "int8": + raise self._quantization_mode_error(self.quantization_mode) + return super().quantized_call(*args, **kwargs) + + def _int8_call(self, inputs, training=None): + # We cannot update quantized self._embeddings, so the custom gradient is + # not needed + if backend.standardize_dtype(inputs.dtype) not in ("int32", "int64"): + inputs = ops.cast(inputs, "int32") + embeddings_scale = ops.take(self.embeddings_scale, inputs, axis=0) + outputs = ops.take(self._embeddings, inputs, axis=0) + # De-scale outputs + outputs = ops.divide( + ops.cast(outputs, dtype=self.compute_dtype), + ops.expand_dims(embeddings_scale, axis=-1), + ) + if self.lora_enabled: + lora_outputs = ops.take(self.lora_embeddings_a, inputs, axis=0) + lora_outputs = ops.matmul(lora_outputs, self.lora_embeddings_b) + outputs = ops.add(outputs, lora_outputs) + return outputs + + def quantize(self, mode, type_check=True): + # Prevent quantization of the subclasses + if type_check and (type(self) is not Embedding): + raise self._not_implemented_error(self.quantize) + + if mode == "int8": + # Quantize `self._embeddings` to int8 and compute corresponding + # scale + embeddings_value, embeddings_scale = quantizers.abs_max_quantize( + self._embeddings, axis=-1, to_numpy=True + ) + embeddings_scale = ops.squeeze(embeddings_scale, axis=-1) + del self._embeddings + # Utilize a lambda expression as an initializer to prevent adding a + # large constant to the computation graph. + self._int8_build(embeddings_value, embeddings_scale) + else: + raise self._quantization_mode_error(mode) + + # Set new dtype policy + if self.dtype_policy.quantization_mode is None: + policy = dtype_policies.get(f"{mode}_from_{self.dtype_policy.name}") + self.dtype_policy = policy + + def _get_embeddings_with_merged_lora(self): + if self.dtype_policy.quantization_mode is not None: + embeddings_value = self._embeddings + embeddings_scale = self.embeddings_scale + if self.lora_enabled: + # Dequantize & quantize to merge lora weights into embeddings + # Note that this is a lossy compression + embeddings_value = ops.divide( + embeddings_value, ops.expand_dims(embeddings_scale, axis=-1) + ) + embeddings_value = ops.add( + embeddings_value, + ops.matmul(self.lora_embeddings_a, self.lora_embeddings_b), + ) + embeddings_value, embeddings_scale = ( + quantizers.abs_max_quantize( + embeddings_value, axis=-1, to_numpy=True + ) + ) + embeddings_scale = ops.squeeze(embeddings_scale, axis=-1) + return embeddings_value, embeddings_scale + return self.embeddings, None diff --git a/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/core/identity.py b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/core/identity.py new file mode 100644 index 0000000000000000000000000000000000000000..f7fa9e752fb00c458ec9ee772940755a2f8b70a7 --- /dev/null +++ b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/core/identity.py @@ -0,0 +1,30 @@ +from keras.src import tree +from keras.src.api_export import keras_export +from keras.src.backend import KerasTensor +from keras.src.layers.layer import Layer + + +@keras_export("keras.layers.Identity") +class Identity(Layer): + """Identity layer. + + This layer should be used as a placeholder when no operation is to be + performed. The layer just returns its `inputs` argument as output. + """ + + def __init__(self, **kwargs): + super().__init__(**kwargs) + self.supports_masking = True + self.built = True + + def call(self, inputs): + return inputs + + def compute_output_shape(self, input_shape): + return input_shape + + def compute_output_spec(self, inputs): + return tree.map_structure( + lambda x: KerasTensor(x.shape, dtype=x.dtype, sparse=x.sparse), + inputs, + ) diff --git a/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/core/input_layer.py b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/core/input_layer.py new file mode 100644 index 0000000000000000000000000000000000000000..8a45178456c969d03ed6e64f0d982dbe7444856e --- /dev/null +++ b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/core/input_layer.py @@ -0,0 +1,201 @@ +import warnings + +from keras.src import backend +from keras.src.api_export import keras_export +from keras.src.layers.layer import Layer +from keras.src.ops.node import Node + + +@keras_export("keras.layers.InputLayer") +class InputLayer(Layer): + def __init__( + self, + shape=None, + batch_size=None, + dtype=None, + sparse=None, + batch_shape=None, + input_tensor=None, + optional=False, + name=None, + **kwargs, + ): + # TODO: support for ragged. + super().__init__(name=name) + + if "input_shape" in kwargs: + warnings.warn( + "Argument `input_shape` is deprecated. Use `shape` instead." + ) + shape = kwargs.pop("input_shape") + if "batch_input_shape" in kwargs: + batch_shape = kwargs.pop("batch_input_shape") + + if input_tensor is not None: + if not isinstance(input_tensor, backend.KerasTensor): + raise ValueError( + "Argument `input_tensor` must be a KerasTensor. " + f"Received invalid type: input_tensor={input_tensor} " + f"(of type {type(input_tensor)})" + ) + if batch_size is not None: + if ( + len(input_tensor.shape) < 1 + or input_tensor.shape[0] != batch_size + ): + raise ValueError( + "When providing the `input_tensor` argument, you " + "cannot provide an incompatible `batch_size` argument." + ) + if shape is not None: + if ( + len(shape) != len(input_tensor.shape) - 1 + or shape != input_tensor.shape[1:] + ): + raise ValueError( + "When providing the `input_tensor` argument, you " + "cannot provide an incompatible `shape` argument." + ) + if batch_shape is not None and batch_shape != input_tensor.shape: + raise ValueError( + "When providing the `input_tensor` argument, you " + "cannot provide an incompatible `batch_shape` argument." + ) + if dtype is not None and input_tensor.dtype != dtype: + raise ValueError( + "When providing the `input_tensor` argument, you " + "cannot provide an incompatible `dtype` argument." + ) + if sparse is not None and input_tensor.sparse != sparse: + raise ValueError( + "When providing the `input_tensor` argument, you " + "cannot provide an incompatible `sparse` argument." + ) + batch_shape = input_tensor.shape + dtype = input_tensor.dtype + sparse = input_tensor.sparse + else: + if shape is not None and batch_shape is not None: + raise ValueError( + "You cannot pass both `shape` and `batch_shape` at the " + "same time." + ) + if batch_size is not None and batch_shape is not None: + raise ValueError( + "You cannot pass both `batch_size` and `batch_shape` " + "at the same time." + ) + if shape is None and batch_shape is None: + raise ValueError("You must pass a `shape` argument.") + + if shape is not None: + shape = backend.standardize_shape(shape) + batch_shape = (batch_size,) + shape + + self._batch_shape = backend.standardize_shape(batch_shape) + self._dtype = backend.standardize_dtype(dtype) + self.sparse = bool(sparse) + if self.sparse and not backend.SUPPORTS_SPARSE_TENSORS: + raise ValueError( + "`sparse=True` is not supported with backend: " + f"{backend.backend()}" + ) + if input_tensor is None: + input_tensor = backend.KerasTensor( + shape=batch_shape, dtype=dtype, sparse=sparse, name=name + ) + self._input_tensor = input_tensor + Node(operation=self, call_args=(), call_kwargs={}, outputs=input_tensor) + self.built = True + self.optional = optional + + def call(self): + return + + @property + def batch_shape(self): + return self._batch_shape + + @property + def dtype(self): + return self._dtype + + def get_config(self): + return { + "batch_shape": self.batch_shape, + "dtype": self.dtype, + "sparse": self.sparse, + "name": self.name, + } + + +@keras_export(["keras.layers.Input", "keras.Input"]) +def Input( + shape=None, + batch_size=None, + dtype=None, + sparse=None, + batch_shape=None, + name=None, + tensor=None, + optional=False, +): + """Used to instantiate a Keras tensor. + + A Keras tensor is a symbolic tensor-like object, which we augment with + certain attributes that allow us to build a Keras model just by knowing the + inputs and outputs of the model. + + For instance, if `a`, `b` and `c` are Keras tensors, + it becomes possible to do: + `model = Model(input=[a, b], output=c)` + + Args: + shape: A shape tuple (tuple of integers or `None` objects), + not including the batch size. + For instance, `shape=(32,)` indicates that the expected input + will be batches of 32-dimensional vectors. Elements of this tuple + can be `None`; `None` elements represent dimensions where the shape + is not known and may vary (e.g. sequence length). + batch_size: Optional static batch size (integer). + dtype: The data type expected by the input, as a string + (e.g. `"float32"`, `"int32"`...) + sparse: A boolean specifying whether the expected input will be sparse + tensors. Note that, if `sparse` is `False`, sparse tensors can still + be passed into the input - they will be densified with a default + value of 0. This feature is only supported with the TensorFlow + backend. Defaults to `False`. + batch_shape: Optional shape tuple (tuple of integers or `None` objects), + including the batch size. + name: Optional name string for the layer. + Should be unique in a model (do not reuse the same name twice). + It will be autogenerated if it isn't provided. + tensor: Optional existing tensor to wrap into the `Input` layer. + If set, the layer will use this tensor rather + than creating a new placeholder tensor. + optional: Boolean, whether the input is optional or not. + An optional input can accept `None` values. + + Returns: + A Keras tensor. + + Example: + + ```python + # This is a logistic regression in Keras + x = Input(shape=(32,)) + y = Dense(16, activation='softmax')(x) + model = Model(x, y) + ``` + """ + layer = InputLayer( + shape=shape, + batch_size=batch_size, + dtype=dtype, + sparse=sparse, + batch_shape=batch_shape, + name=name, + input_tensor=tensor, + optional=optional, + ) + return layer.output diff --git a/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/core/lambda_layer.py b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/core/lambda_layer.py new file mode 100644 index 0000000000000000000000000000000000000000..11d5f15f0f9ee92ccd3591c15a0356857e960108 --- /dev/null +++ b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/core/lambda_layer.py @@ -0,0 +1,231 @@ +import inspect +import types + +from keras.src import backend +from keras.src import tree +from keras.src.api_export import keras_export +from keras.src.layers.layer import Layer +from keras.src.saving import serialization_lib +from keras.src.utils import python_utils + + +@keras_export("keras.layers.Lambda") +class Lambda(Layer): + """Wraps arbitrary expressions as a `Layer` object. + + The `Lambda` layer exists so that arbitrary expressions can be used + as a `Layer` when constructing Sequential + and Functional API models. `Lambda` layers are best suited for simple + operations or quick experimentation. For more advanced use cases, + prefer writing new subclasses of `Layer`. + + WARNING: `Lambda` layers have (de)serialization limitations! + + The main reason to subclass `Layer` instead of using a + `Lambda` layer is saving and inspecting a model. `Lambda` layers + are saved by serializing the Python bytecode, which is fundamentally + non-portable and potentially unsafe. + They should only be loaded in the same environment where + they were saved. Subclassed layers can be saved in a more portable way + by overriding their `get_config()` method. Models that rely on + subclassed Layers are also often easier to visualize and reason about. + + Example: + + ```python + # add a x -> x^2 layer + model.add(Lambda(lambda x: x ** 2)) + ``` + + Args: + function: The function to be evaluated. Takes input tensor as first + argument. + output_shape: Expected output shape from function. This argument + can usually be inferred if not explicitly provided. + Can be a tuple or function. If a tuple, it only specifies + the first dimension onward; sample dimension is assumed + either the same as the input: + `output_shape = (input_shape[0], ) + output_shape` or, + the input is `None` and the sample dimension is also `None`: + `output_shape = (None, ) + output_shape`. + If a function, it specifies the + entire shape as a function of the input shape: + `output_shape = f(input_shape)`. + mask: Either None (indicating no masking) or a callable with the same + signature as the `compute_mask` layer method, or a tensor + that will be returned as output mask regardless + of what the input is. + arguments: Optional dictionary of keyword arguments to be passed to the + function. + """ + + def __init__( + self, function, output_shape=None, mask=None, arguments=None, **kwargs + ): + super().__init__(**kwargs) + + self.arguments = arguments or {} + self.function = function + + if mask is not None: + self.supports_masking = True + else: + self.supports_masking = False + self.mask = mask + self._output_shape = output_shape + + # Warning on every invocation will be quite irksome in Eager mode. + self._already_warned = False + + function_args = inspect.getfullargspec(function).args + self._fn_expects_training_arg = "training" in function_args + self._fn_expects_mask_arg = "mask" in function_args + + def compute_output_shape(self, input_shape): + if self._output_shape is None: + # Leverage backend shape inference + try: + inputs = tree.map_shape_structure( + lambda x: backend.KerasTensor(x, dtype=self.compute_dtype), + input_shape, + ) + output_spec = backend.compute_output_spec(self.call, inputs) + return tree.map_structure(lambda x: x.shape, output_spec) + except: + raise NotImplementedError( + "We could not automatically infer the shape of " + "the Lambda's output. Please specify the `output_shape` " + "argument for this Lambda layer." + ) + + if callable(self._output_shape): + return self._output_shape(input_shape) + + # Output shapes are passed directly and don't include batch dimension. + batch_size = tree.flatten(input_shape)[0] + + def _add_batch(shape): + return (batch_size,) + shape + + return tree.map_shape_structure(_add_batch, self._output_shape) + + def call(self, inputs, mask=None, training=None): + # We must copy for thread safety, + # but it only needs to be a shallow copy. + kwargs = {k: v for k, v in self.arguments.items()} + if self._fn_expects_mask_arg: + kwargs["mask"] = mask + if self._fn_expects_training_arg: + kwargs["training"] = training + return self.function(inputs, **kwargs) + + def compute_mask(self, inputs, mask=None): + if callable(self.mask): + return self.mask(inputs, mask) + return self.mask + + def get_config(self): + config = { + "function": self._serialize_function_to_config(self.function), + } + if self._output_shape is not None: + if callable(self._output_shape): + output_shape = self._serialize_function_to_config( + self._output_shape + ) + else: + output_shape = self._output_shape + config["output_shape"] = output_shape + if self.mask is not None: + if callable(self.mask): + mask = self._serialize_function_to_config(self.mask) + else: + mask = serialization_lib.serialize_keras_object(self.mask) + config["mask"] = mask + config["arguments"] = serialization_lib.serialize_keras_object( + self.arguments + ) + base_config = super().get_config() + return {**base_config, **config} + + def _serialize_function_to_config(self, fn): + if isinstance(fn, types.LambdaType) and fn.__name__ == "": + code, defaults, closure = python_utils.func_dump(fn) + return { + "class_name": "__lambda__", + "config": { + "code": code, + "defaults": defaults, + "closure": closure, + }, + } + elif callable(fn): + return serialization_lib.serialize_keras_object(fn) + raise ValueError( + "Invalid input type for serialization. " + f"Received: {fn} of type {type(fn)}." + ) + + @staticmethod + def _raise_for_lambda_deserialization(arg_name, safe_mode): + if safe_mode: + raise ValueError( + "The `{arg_name}` of this `Lambda` layer is a Python lambda. " + "Deserializing it is unsafe. If you trust the source of the " + "config artifact, you can override this error " + "by passing `safe_mode=False` " + "to `from_config()`, or calling " + "`keras.config.enable_unsafe_deserialization()." + ) + + @classmethod + def from_config(cls, config, custom_objects=None, safe_mode=None): + safe_mode = safe_mode or serialization_lib.in_safe_mode() + fn_config = config["function"] + if ( + isinstance(fn_config, dict) + and "class_name" in fn_config + and fn_config["class_name"] == "__lambda__" + ): + cls._raise_for_lambda_deserialization("function", safe_mode) + inner_config = fn_config["config"] + fn = python_utils.func_load( + inner_config["code"], + defaults=inner_config["defaults"], + closure=inner_config["closure"], + ) + config["function"] = fn + else: + config["function"] = serialization_lib.deserialize_keras_object( + fn_config, custom_objects=custom_objects + ) + if "output_shape" in config: + fn_config = config["output_shape"] + if ( + isinstance(fn_config, dict) + and "class_name" in fn_config + and fn_config["class_name"] == "__lambda__" + ): + cls._raise_for_lambda_deserialization("function", safe_mode) + inner_config = fn_config["config"] + fn = python_utils.func_load( + inner_config["code"], + defaults=inner_config["defaults"], + closure=inner_config["closure"], + ) + config["output_shape"] = fn + else: + output_shape = serialization_lib.deserialize_keras_object( + fn_config, custom_objects=custom_objects + ) + if isinstance(output_shape, list) and all( + isinstance(e, (int, type(None))) for e in output_shape + ): + output_shape = tuple(output_shape) + config["output_shape"] = output_shape + + if "arguments" in config: + config["arguments"] = serialization_lib.deserialize_keras_object( + config["arguments"], custom_objects=custom_objects + ) + return cls(**config) diff --git a/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/core/masking.py b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/core/masking.py new file mode 100644 index 0000000000000000000000000000000000000000..64483aefb149425cf4335230e1dd0a396cf1422c --- /dev/null +++ b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/core/masking.py @@ -0,0 +1,71 @@ +from keras.src import backend +from keras.src import ops +from keras.src.api_export import keras_export +from keras.src.layers.layer import Layer + + +@keras_export("keras.layers.Masking") +class Masking(Layer): + """Masks a sequence by using a mask value to skip timesteps. + + For each timestep in the input tensor (dimension #1 in the tensor), + if all values in the input tensor at that timestep + are equal to `mask_value`, then the timestep will be masked (skipped) + in all downstream layers (as long as they support masking). + + If any downstream layer does not support masking yet receives such + an input mask, an exception will be raised. + + Example: + + Consider a NumPy data array `x` of shape `(samples, timesteps, features)`, + to be fed to an LSTM layer. You want to mask timestep #3 and #5 because you + lack data for these timesteps. You can: + + - Set `x[:, 3, :] = 0.` and `x[:, 5, :] = 0.` + - Insert a `Masking` layer with `mask_value=0.` before the LSTM layer: + + ```python + samples, timesteps, features = 32, 10, 8 + inputs = np.random.random([samples, timesteps, features]).astype(np.float32) + inputs[:, 3, :] = 0. + inputs[:, 5, :] = 0. + + model = keras.models.Sequential() + model.add(keras.layers.Masking(mask_value=0.0)) + model.add(keras.layers.LSTM(32)) + output = model(inputs) + # The time step 3 and 5 will be skipped from LSTM calculation. + ``` + + Note: in the Keras masking convention, a masked timestep is denoted by + a mask value of `False`, while a non-masked (i.e. usable) timestep + is denoted by a mask value of `True`. + """ + + def __init__(self, mask_value=0.0, **kwargs): + super().__init__(**kwargs) + self.mask_value = mask_value + self.supports_masking = True + self.built = True + + def compute_mask(self, inputs, mask=None): + return ops.any(ops.not_equal(inputs, self.mask_value), axis=-1) + + def call(self, inputs): + boolean_mask = ops.any( + ops.not_equal(inputs, self.mask_value), axis=-1, keepdims=True + ) + # Set masked outputs to 0 + outputs = inputs * backend.cast(boolean_mask, dtype=inputs.dtype) + # Compute the mask and outputs simultaneously. + backend.set_keras_mask(outputs, mask=ops.squeeze(boolean_mask, axis=-1)) + return outputs + + def compute_output_shape(self, input_shape): + return input_shape + + def get_config(self): + base_config = super().get_config() + config = {"mask_value": self.mask_value} + return {**base_config, **config} diff --git a/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/core/wrapper.py b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/core/wrapper.py new file mode 100644 index 0000000000000000000000000000000000000000..ee98a70a0291013bab6d5c04963a03ea749ecf6b --- /dev/null +++ b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/core/wrapper.py @@ -0,0 +1,47 @@ +from keras.src.api_export import keras_export +from keras.src.layers.layer import Layer +from keras.src.saving import serialization_lib + + +@keras_export("keras.layers.Wrapper") +class Wrapper(Layer): + """Abstract wrapper base class. + + Wrappers take another layer and augment it in various ways. + Do not use this class as a layer, it is only an abstract base class. + Two usable wrappers are the `TimeDistributed` and `Bidirectional` layers. + + Args: + layer: The layer to be wrapped. + """ + + def __init__(self, layer, **kwargs): + try: + assert isinstance(layer, Layer) + except Exception: + raise ValueError( + f"Layer {layer} supplied to Wrapper isn't " + "a supported layer type. Please " + "ensure wrapped layer is a valid Keras layer." + ) + super().__init__(**kwargs) + self.layer = layer + + def build(self, input_shape=None): + if not self.layer.built: + self.layer.build(input_shape) + self.layer.built = True + self.built = True + + def get_config(self): + config = {"layer": serialization_lib.serialize_keras_object(self.layer)} + base_config = super().get_config() + return {**base_config, **config} + + @classmethod + def from_config(cls, config, custom_objects=None): + layer = serialization_lib.deserialize_keras_object( + config.pop("layer"), + custom_objects=custom_objects, + ) + return cls(layer, **config) diff --git a/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/merging/__pycache__/__init__.cpython-310.pyc b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/merging/__pycache__/__init__.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..99203b2164365e9a0fc245ed560999f269cb4b1d Binary files /dev/null and b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/merging/__pycache__/__init__.cpython-310.pyc differ diff --git a/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/merging/__pycache__/add.cpython-310.pyc b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/merging/__pycache__/add.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..c61851633a8cbb127f57742e5e99b1304949f753 Binary files /dev/null and b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/merging/__pycache__/add.cpython-310.pyc differ diff --git a/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/merging/__pycache__/average.cpython-310.pyc b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/merging/__pycache__/average.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..c6808a6c25410eaa70a23f6e6712ad199542f44c Binary files /dev/null and b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/merging/__pycache__/average.cpython-310.pyc differ diff --git a/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/merging/__pycache__/base_merge.cpython-310.pyc b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/merging/__pycache__/base_merge.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..9ba0e8a17d0a89fa05e012077f81d6c9e27c5d18 Binary files /dev/null and b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/merging/__pycache__/base_merge.cpython-310.pyc differ diff --git a/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/merging/__pycache__/concatenate.cpython-310.pyc b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/merging/__pycache__/concatenate.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..fa24a4f945f8e72380d790bb67a9b16d6d5ad1e0 Binary files /dev/null and b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/merging/__pycache__/concatenate.cpython-310.pyc differ diff --git a/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/merging/__pycache__/dot.cpython-310.pyc b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/merging/__pycache__/dot.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..5c82ee704f03c80fd539b66852ac9f9f69ddecd0 Binary files /dev/null and b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/merging/__pycache__/dot.cpython-310.pyc differ diff --git a/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/merging/__pycache__/maximum.cpython-310.pyc b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/merging/__pycache__/maximum.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..64ea7f1c0bfae3444b75925b69df8631a7c82428 Binary files /dev/null and b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/merging/__pycache__/maximum.cpython-310.pyc differ diff --git a/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/merging/__pycache__/minimum.cpython-310.pyc b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/merging/__pycache__/minimum.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..ccd06d36b94f3af38912cb6de47f482ec2d685b0 Binary files /dev/null and b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/merging/__pycache__/minimum.cpython-310.pyc differ diff --git a/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/merging/__pycache__/multiply.cpython-310.pyc b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/merging/__pycache__/multiply.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..501d49f1c8d053e198c97fbbef69ef74371ded39 Binary files /dev/null and b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/merging/__pycache__/multiply.cpython-310.pyc differ diff --git a/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/merging/__pycache__/subtract.cpython-310.pyc b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/merging/__pycache__/subtract.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..657a07572b3da5e0aef163b6abe8f1c30b89f453 Binary files /dev/null and b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/merging/__pycache__/subtract.cpython-310.pyc differ