diff --git a/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/activations/__init__.py b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/activations/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..009ce976c51b54689837db8c616ce99467bc10a7
--- /dev/null
+++ b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/activations/__init__.py
@@ -0,0 +1,5 @@
+from keras.src.layers.activations.elu import ELU
+from keras.src.layers.activations.leaky_relu import LeakyReLU
+from keras.src.layers.activations.prelu import PReLU
+from keras.src.layers.activations.relu import ReLU
+from keras.src.layers.activations.softmax import Softmax
diff --git a/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/activations/activation.py b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/activations/activation.py
new file mode 100644
index 0000000000000000000000000000000000000000..68f65ec8711f382356c3f80ab7da682709d6b300
--- /dev/null
+++ b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/activations/activation.py
@@ -0,0 +1,40 @@
+from keras.src import activations
+from keras.src.api_export import keras_export
+from keras.src.layers.layer import Layer
+
+
+@keras_export("keras.layers.Activation")
+class Activation(Layer):
+    """Applies an activation function to an output.
+
+    Args:
+        activation: Activation function. It could be a callable, or the name of
+            an activation from the `keras.activations` namespace.
+        **kwargs: Base layer keyword arguments, such as `name` and `dtype`.
+
+    Example:
+
+    >>> layer = keras.layers.Activation('relu')
+    >>> layer(np.array([-3.0, -1.0, 0.0, 2.0]))
+    [0.0, 0.0, 0.0, 2.0]
+    >>> layer = keras.layers.Activation(keras.activations.relu)
+    >>> layer(np.array([-3.0, -1.0, 0.0, 2.0]))
+    [0.0, 0.0, 0.0, 2.0]
+    """
+
+    def __init__(self, activation, **kwargs):
+        super().__init__(**kwargs)
+        self.supports_masking = True
+        self.activation = activations.get(activation)
+        self.built = True
+
+    def call(self, inputs):
+        return self.activation(inputs)
+
+    def compute_output_shape(self, input_shape):
+        return input_shape
+
+    def get_config(self):
+        config = {"activation": activations.serialize(self.activation)}
+        base_config = super().get_config()
+        return {**base_config, **config}
diff --git a/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/activations/elu.py b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/activations/elu.py
new file mode 100644
index 0000000000000000000000000000000000000000..cbf3f632ee700b25ac05a0eb9992ec3426e3e1ab
--- /dev/null
+++ b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/activations/elu.py
@@ -0,0 +1,32 @@
+from keras.src import activations
+from keras.src.api_export import keras_export
+from keras.src.layers.layer import Layer
+
+
+@keras_export("keras.layers.ELU")
+class ELU(Layer):
+    """Applies an Exponential Linear Unit function to an output.
+
+    Formula:
+
+    ```
+    f(x) = alpha * (exp(x) - 1.) for x < 0
+    f(x) = x for x >= 0
+    ```
+
+    Args:
+        alpha: float, slope of negative section. Defaults to `1.0`.
+        **kwargs: Base layer keyword arguments, such as `name` and `dtype`.
+    """
+
+    def __init__(self, alpha=1.0, **kwargs):
+        super().__init__(**kwargs)
+        self.alpha = alpha
+        self.supports_masking = True
+        self.built = True
+
+    def call(self, inputs):
+        return activations.elu(inputs, alpha=self.alpha)
+
+    def compute_output_shape(self, input_shape):
+        return input_shape
diff --git a/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/activations/leaky_relu.py b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/activations/leaky_relu.py
new file mode 100644
index 0000000000000000000000000000000000000000..6be1ddfb7e642205594050ff422fca059decbcca
--- /dev/null
+++ b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/activations/leaky_relu.py
@@ -0,0 +1,67 @@
+import warnings
+
+from keras.src import activations
+from keras.src.api_export import keras_export
+from keras.src.layers.layer import Layer
+
+
+@keras_export("keras.layers.LeakyReLU")
+class LeakyReLU(Layer):
+    """Leaky version of a Rectified Linear Unit activation layer.
+
+    This layer allows a small gradient when the unit is not active.
+
+    Formula:
+
+    ``` python
+    f(x) = alpha * x if x < 0
+    f(x) = x if x >= 0
+    ```
+
+    Example:
+
+    ``` python
+    leaky_relu_layer = LeakyReLU(negative_slope=0.5)
+    input = np.array([-10, -5, 0.0, 5, 10])
+    result = leaky_relu_layer(input)
+    # result = [-5. , -2.5,  0. ,  5. , 10.]
+    ```
+
+    Args:
+        negative_slope: Float >= 0.0. Negative slope coefficient.
+          Defaults to `0.3`.
+        **kwargs: Base layer keyword arguments, such as
+            `name` and `dtype`.
+
+    """
+
+    def __init__(self, negative_slope=0.3, **kwargs):
+        if "alpha" in kwargs:
+            negative_slope = kwargs.pop("alpha")
+            warnings.warn(
+                "Argument `alpha` is deprecated. "
+                "Use `negative_slope` instead."
+            )
+        super().__init__(**kwargs)
+        if negative_slope is None or negative_slope < 0:
+            raise ValueError(
+                "The negative_slope value of a Leaky ReLU layer "
+                "cannot be None or negative value. Expected a float."
+                f" Received: negative_slope={negative_slope}"
+            )
+        self.negative_slope = negative_slope
+        self.supports_masking = True
+        self.built = True
+
+    def call(self, inputs):
+        return activations.leaky_relu(
+            inputs, negative_slope=self.negative_slope
+        )
+
+    def get_config(self):
+        config = super().get_config()
+        config.update({"negative_slope": self.negative_slope})
+        return config
+
+    def compute_output_shape(self, input_shape):
+        return input_shape
diff --git a/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/activations/prelu.py b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/activations/prelu.py
new file mode 100644
index 0000000000000000000000000000000000000000..652b60e2206776eeb317af4346ef577c1d32d945
--- /dev/null
+++ b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/activations/prelu.py
@@ -0,0 +1,99 @@
+from keras.src import activations
+from keras.src import constraints
+from keras.src import initializers
+from keras.src import regularizers
+from keras.src.api_export import keras_export
+from keras.src.layers.input_spec import InputSpec
+from keras.src.layers.layer import Layer
+
+
+@keras_export("keras.layers.PReLU")
+class PReLU(Layer):
+    """Parametric Rectified Linear Unit activation layer.
+
+    Formula:
+    ``` python
+    f(x) = alpha * x for x < 0
+    f(x) = x for x >= 0
+    ```
+    where `alpha` is a learned array with the same shape as x.
+
+    Args:
+        alpha_initializer: Initializer function for the weights.
+        alpha_regularizer: Regularizer for the weights.
+        alpha_constraint: Constraint for the weights.
+        shared_axes: The axes along which to share learnable parameters for the
+            activation function. For example, if the incoming feature maps are
+            from a 2D convolution with output shape
+            `(batch, height, width, channels)`, and you wish to share parameters
+            across space so that each filter only has one set of parameters,
+            set `shared_axes=[1, 2]`.
+        **kwargs: Base layer keyword arguments, such as `name` and `dtype`.
+    """
+
+    def __init__(
+        self,
+        alpha_initializer="Zeros",
+        alpha_regularizer=None,
+        alpha_constraint=None,
+        shared_axes=None,
+        **kwargs,
+    ):
+        super().__init__(**kwargs)
+        self.supports_masking = True
+        self.alpha_initializer = initializers.get(alpha_initializer)
+        self.alpha_regularizer = regularizers.get(alpha_regularizer)
+        self.alpha_constraint = constraints.get(alpha_constraint)
+        if shared_axes is None:
+            self.shared_axes = None
+        elif not isinstance(shared_axes, (list, tuple)):
+            self.shared_axes = [shared_axes]
+        else:
+            self.shared_axes = list(shared_axes)
+
+    def build(self, input_shape):
+        param_shape = list(input_shape[1:])
+        if self.shared_axes is not None:
+            for i in self.shared_axes:
+                param_shape[i - 1] = 1
+        self.alpha = self.add_weight(
+            shape=param_shape,
+            name="alpha",
+            initializer=self.alpha_initializer,
+            regularizer=self.alpha_regularizer,
+            constraint=self.alpha_constraint,
+        )
+        # Set input spec
+        axes = {}
+        if self.shared_axes:
+            for i in range(1, len(input_shape)):
+                if i not in self.shared_axes:
+                    axes[i] = input_shape[i]
+        self.input_spec = InputSpec(ndim=len(input_shape), axes=axes)
+        self.built = True
+
+    def call(self, inputs):
+        pos = activations.relu(inputs)
+        neg = -self.alpha * activations.relu(-inputs)
+        return pos + neg
+
+    def get_config(self):
+        config = super().get_config()
+        config.update(
+            {
+                "alpha_initializer": initializers.serialize(
+                    self.alpha_initializer
+                ),
+                "alpha_regularizer": regularizers.serialize(
+                    self.alpha_regularizer
+                ),
+                "alpha_constraint": constraints.serialize(
+                    self.alpha_constraint
+                ),
+                "shared_axes": self.shared_axes,
+            }
+        )
+        return config
+
+    def compute_output_shape(self, input_shape):
+        return input_shape
diff --git a/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/activations/relu.py b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/activations/relu.py
new file mode 100644
index 0000000000000000000000000000000000000000..53a120f852c56643ca15f67aca6ff95a308ce3a9
--- /dev/null
+++ b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/activations/relu.py
@@ -0,0 +1,86 @@
+from keras.src import activations
+from keras.src.api_export import keras_export
+from keras.src.layers.layer import Layer
+
+
+@keras_export("keras.layers.ReLU")
+class ReLU(Layer):
+    """Rectified Linear Unit activation function layer.
+
+    Formula:
+    ``` python
+    f(x) = max(x,0)
+    f(x) = max_value if x >= max_value
+    f(x) = x if threshold <= x < max_value
+    f(x) = negative_slope * (x - threshold) otherwise
+    ```
+
+    Example:
+    ``` python
+    relu_layer = keras.layers.ReLU(
+        max_value=10,
+        negative_slope=0.5,
+        threshold=0,
+    )
+    input = np.array([-10, -5, 0.0, 5, 10])
+    result = relu_layer(input)
+    # result = [-5. , -2.5,  0. ,  5. , 10.]
+    ```
+
+    Args:
+        max_value: Float >= 0. Maximum activation value. None means unlimited.
+            Defaults to `None`.
+        negative_slope: Float >= 0. Negative slope coefficient.
+            Defaults to `0.0`.
+        threshold: Float >= 0. Threshold value for thresholded activation.
+            Defaults to `0.0`.
+        **kwargs: Base layer keyword arguments, such as `name` and `dtype`.
+    """
+
+    def __init__(
+        self, max_value=None, negative_slope=0.0, threshold=0.0, **kwargs
+    ):
+        super().__init__(**kwargs)
+        if max_value is not None and max_value < 0.0:
+            raise ValueError(
+                "max_value of a ReLU layer cannot be a negative "
+                f"value. Received: max_value={max_value}"
+            )
+        if negative_slope is None or negative_slope < 0.0:
+            raise ValueError(
+                "negative_slope of a ReLU layer cannot be a negative "
+                f"value. Received: negative_slope={negative_slope}"
+            )
+        if threshold is None or threshold < 0.0:
+            raise ValueError(
+                "threshold of a ReLU layer cannot be a negative "
+                f"value. Received: threshold={threshold}"
+            )
+
+        self.max_value = max_value
+        self.negative_slope = negative_slope
+        self.threshold = threshold
+        self.supports_masking = True
+        self.built = True
+
+    def call(self, inputs):
+        return activations.relu(
+            inputs,
+            negative_slope=self.negative_slope,
+            max_value=self.max_value,
+            threshold=self.threshold,
+        )
+
+    def get_config(self):
+        config = super().get_config()
+        config.update(
+            {
+                "max_value": self.max_value,
+                "negative_slope": self.negative_slope,
+                "threshold": self.threshold,
+            }
+        )
+        return config
+
+    def compute_output_shape(self, input_shape):
+        return input_shape
diff --git a/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/activations/softmax.py b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/activations/softmax.py
new file mode 100644
index 0000000000000000000000000000000000000000..195b47e2b209b4998a119d3ae59b65dc139fe6ee
--- /dev/null
+++ b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/activations/softmax.py
@@ -0,0 +1,76 @@
+from keras.src import activations
+from keras.src import backend
+from keras.src.api_export import keras_export
+from keras.src.layers.layer import Layer
+
+
+def _large_negative_number(dtype):
+    """Return a Large negative number based on dtype."""
+    if backend.standardize_dtype(dtype) == "float16":
+        return -3e4
+    return -1e9
+
+
+@keras_export("keras.layers.Softmax")
+class Softmax(Layer):
+    """Softmax activation layer.
+
+    Formula:
+    ``` python
+    exp_x = exp(x - max(x))
+    f(x) = exp_x / sum(exp_x)
+    ```
+
+    Example:
+    >>> softmax_layer = keras.layers.Softmax()
+    >>> input = np.array([1.0, 2.0, 1.0])
+    >>> result = softmax_layer(input)
+    >>> result
+    [0.21194157, 0.5761169, 0.21194157]
+
+
+    Args:
+        axis: Integer, or list of Integers, axis along which the softmax
+            normalization is applied.
+        **kwargs: Base layer keyword arguments, such as `name` and `dtype`.
+
+    Call arguments:
+        inputs: The inputs (logits) to the softmax layer.
+        mask: A boolean mask of the same shape as `inputs`. The mask
+            specifies 1 to keep and 0 to mask. Defaults to `None`.
+
+    Returns:
+        Softmaxed output with the same shape as `inputs`.
+    """
+
+    def __init__(self, axis=-1, **kwargs):
+        super().__init__(**kwargs)
+        self.axis = axis
+        self.supports_masking = True
+        self.built = True
+
+    def call(self, inputs, mask=None):
+        if mask is not None:
+            adder = (
+                1.0 - backend.cast(mask, inputs.dtype)
+            ) * _large_negative_number(inputs.dtype)
+            inputs += adder
+        if isinstance(self.axis, (tuple, list)):
+            if len(self.axis) > 1:
+                return backend.numpy.exp(
+                    inputs
+                    - backend.math.logsumexp(
+                        inputs, axis=self.axis, keepdims=True
+                    )
+                )
+            else:
+                return activations.softmax(inputs, axis=self.axis[0])
+        return activations.softmax(inputs, axis=self.axis)
+
+    def get_config(self):
+        config = super().get_config()
+        config.update({"axis": self.axis})
+        return config
+
+    def compute_output_shape(self, input_shape):
+        return input_shape
diff --git a/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/attention/__init__.py b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/attention/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/attention/__pycache__/__init__.cpython-310.pyc b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/attention/__pycache__/__init__.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..9fda279acfb7a705a0be193d0fdd03d8b7b99af1
Binary files /dev/null and b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/attention/__pycache__/__init__.cpython-310.pyc differ
diff --git a/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/attention/__pycache__/additive_attention.cpython-310.pyc b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/attention/__pycache__/additive_attention.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..b61985db216b6dcc2d28a746d0a3e92b664807f7
Binary files /dev/null and b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/attention/__pycache__/additive_attention.cpython-310.pyc differ
diff --git a/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/attention/__pycache__/attention.cpython-310.pyc b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/attention/__pycache__/attention.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..d7607d503871461d98ecc21e14c8061f4332cae3
Binary files /dev/null and b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/attention/__pycache__/attention.cpython-310.pyc differ
diff --git a/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/attention/__pycache__/grouped_query_attention.cpython-310.pyc b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/attention/__pycache__/grouped_query_attention.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..b7210e6998c1c77c129095b0f002034cc37f8e02
Binary files /dev/null and b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/attention/__pycache__/grouped_query_attention.cpython-310.pyc differ
diff --git a/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/attention/__pycache__/multi_head_attention.cpython-310.pyc b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/attention/__pycache__/multi_head_attention.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..10c9f476aa1ab4cc30ba34c159c270a386496f4b
Binary files /dev/null and b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/attention/__pycache__/multi_head_attention.cpython-310.pyc differ
diff --git a/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/attention/additive_attention.py b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/attention/additive_attention.py
new file mode 100644
index 0000000000000000000000000000000000000000..787dd50e71a9f627376cbdcbf6f57d0331a13403
--- /dev/null
+++ b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/attention/additive_attention.py
@@ -0,0 +1,103 @@
+from keras.src import ops
+from keras.src.api_export import keras_export
+from keras.src.layers.attention.attention import Attention
+
+
+@keras_export("keras.layers.AdditiveAttention")
+class AdditiveAttention(Attention):
+    """Additive attention layer, a.k.a. Bahdanau-style attention.
+
+    Inputs are a list with 2 or 3 elements:
+    1. A `query` tensor of shape `(batch_size, Tq, dim)`.
+    2. A `value` tensor of shape `(batch_size, Tv, dim)`.
+    3. A optional `key` tensor of shape `(batch_size, Tv, dim)`. If none
+        supplied, `value` will be used as `key`.
+
+    The calculation follows the steps:
+    1. Calculate attention scores using `query` and `key` with shape
+        `(batch_size, Tq, Tv)` as a non-linear sum
+        `scores = reduce_sum(tanh(query + key), axis=-1)`.
+    2. Use scores to calculate a softmax distribution with shape
+        `(batch_size, Tq, Tv)`.
+    3. Use the softmax distribution to create a linear combination of `value`
+        with shape `(batch_size, Tq, dim)`.
+
+    Args:
+        use_scale: If `True`, will create a scalar variable to scale the
+            attention scores.
+        dropout: Float between 0 and 1. Fraction of the units to drop for the
+            attention scores. Defaults to `0.0`.
+
+    Call arguments:
+        inputs: List of the following tensors:
+            - `query`: Query tensor of shape `(batch_size, Tq, dim)`.
+            - `value`: Value tensor of shape `(batch_size, Tv, dim)`.
+            - `key`: Optional key tensor of shape `(batch_size, Tv, dim)`. If
+                not given, will use `value` for both `key` and `value`, which is
+                the most common case.
+        mask: List of the following tensors:
+            - `query_mask`: A boolean mask tensor of shape `(batch_size, Tq)`.
+                If given, the output will be zero at the positions where
+                `mask==False`.
+            - `value_mask`: A boolean mask tensor of shape `(batch_size, Tv)`.
+                If given, will apply the mask such that values at positions
+                 where `mask==False` do not contribute to the result.
+        return_attention_scores: bool, it `True`, returns the attention scores
+            (after masking and softmax) as an additional output argument.
+        training: Python boolean indicating whether the layer should behave in
+            training mode (adding dropout) or in inference mode (no dropout).
+        use_causal_mask: Boolean. Set to `True` for decoder self-attention. Adds
+            a mask such that position `i` cannot attend to positions `j > i`.
+            This prevents the flow of information from the future towards the
+            past. Defaults to `False`.
+
+    Output:
+        Attention outputs of shape `(batch_size, Tq, dim)`.
+        (Optional) Attention scores after masking and softmax with shape
+            `(batch_size, Tq, Tv)`.
+    """
+
+    def __init__(
+        self,
+        use_scale=True,
+        dropout=0.0,
+        **kwargs,
+    ):
+        super().__init__(use_scale=use_scale, dropout=dropout, **kwargs)
+
+    def build(self, input_shape):
+        self._validate_inputs(input_shape)
+        dim = input_shape[0][-1]
+        self.scale = None
+        if self.use_scale:
+            self.scale = self.add_weight(
+                name="scale",
+                shape=[dim],
+                initializer="glorot_uniform",
+                dtype=self.dtype,
+                trainable=True,
+            )
+        self.built = True
+
+    def _calculate_scores(self, query, key):
+        """Calculates attention scores as a nonlinear sum of query and key.
+
+        Args:
+            query: Query tensor of shape `(batch_size, Tq, dim)`.
+            key: Key tensor of shape `(batch_size, Tv, dim)`.
+
+        Returns:
+            Tensor of shape `(batch_size, Tq, Tv)`.
+        """
+        # Reshape tensors to enable broadcasting.
+        # Reshape into [batch_size, Tq, 1, dim].
+        q_reshaped = ops.expand_dims(query, axis=-2)
+        # Reshape into [batch_size, 1, Tv, dim].
+        k_reshaped = ops.expand_dims(key, axis=-3)
+        scale = self.scale if self.use_scale else 1.0
+        return ops.sum(scale * ops.tanh(q_reshaped + k_reshaped), axis=-1)
+
+    def get_config(self):
+        base_config = super().get_config()
+        del base_config["score_mode"]
+        return base_config
diff --git a/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/attention/attention.py b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/attention/attention.py
new file mode 100644
index 0000000000000000000000000000000000000000..d336781c8b3c400d45f24c8d5539e1208f0ae01c
--- /dev/null
+++ b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/attention/attention.py
@@ -0,0 +1,330 @@
+from keras.src import backend
+from keras.src import ops
+from keras.src.api_export import keras_export
+from keras.src.backend import KerasTensor
+from keras.src.layers.layer import Layer
+
+
+@keras_export("keras.layers.Attention")
+class Attention(Layer):
+    """Dot-product attention layer, a.k.a. Luong-style attention.
+
+    Inputs are a list with 2 or 3 elements:
+    1. A `query` tensor of shape `(batch_size, Tq, dim)`.
+    2. A `value` tensor of shape `(batch_size, Tv, dim)`.
+    3. A optional `key` tensor of shape `(batch_size, Tv, dim)`. If none
+        supplied, `value` will be used as a `key`.
+
+    The calculation follows the steps:
+    1. Calculate attention scores using `query` and `key` with shape
+        `(batch_size, Tq, Tv)`.
+    2. Use scores to calculate a softmax distribution with shape
+        `(batch_size, Tq, Tv)`.
+    3. Use the softmax distribution to create a linear combination of `value`
+        with shape `(batch_size, Tq, dim)`.
+
+    Args:
+        use_scale: If `True`, will create a scalar variable to scale the
+            attention scores.
+        dropout: Float between 0 and 1. Fraction of the units to drop for the
+            attention scores. Defaults to `0.0`.
+        seed: A Python integer to use as random seed in case of `dropout`.
+        score_mode: Function to use to compute attention scores, one of
+            `{"dot", "concat"}`. `"dot"` refers to the dot product between the
+            query and key vectors. `"concat"` refers to the hyperbolic tangent
+            of the concatenation of the `query` and `key` vectors.
+
+    Call arguments:
+        inputs: List of the following tensors:
+            - `query`: Query tensor of shape `(batch_size, Tq, dim)`.
+            - `value`: Value tensor of shape `(batch_size, Tv, dim)`.
+            - `key`: Optional key tensor of shape `(batch_size, Tv, dim)`. If
+                not given, will use `value` for both `key` and `value`, which is
+                the most common case.
+        mask: List of the following tensors:
+            - `query_mask`: A boolean mask tensor of shape `(batch_size, Tq)`.
+                If given, the output will be zero at the positions where
+                `mask==False`.
+            - `value_mask`: A boolean mask tensor of shape `(batch_size, Tv)`.
+                If given, will apply the mask such that values at positions
+                 where `mask==False` do not contribute to the result.
+        return_attention_scores: bool, it `True`, returns the attention scores
+            (after masking and softmax) as an additional output argument.
+        training: Python boolean indicating whether the layer should behave in
+            training mode (adding dropout) or in inference mode (no dropout).
+        use_causal_mask: Boolean. Set to `True` for decoder self-attention. Adds
+            a mask such that position `i` cannot attend to positions `j > i`.
+            This prevents the flow of information from the future towards the
+            past. Defaults to `False`.
+
+    Output:
+        Attention outputs of shape `(batch_size, Tq, dim)`.
+        (Optional) Attention scores after masking and softmax with shape
+            `(batch_size, Tq, Tv)`.
+    """
+
+    def __init__(
+        self,
+        use_scale=False,
+        score_mode="dot",
+        dropout=0.0,
+        seed=None,
+        **kwargs,
+    ):
+        super().__init__(**kwargs)
+        self.use_scale = use_scale
+        self.score_mode = score_mode
+        self.dropout = dropout
+        if self.dropout > 0:
+            self.seed_generator = backend.random.SeedGenerator(seed=seed)
+
+        if self.score_mode not in ["dot", "concat"]:
+            raise ValueError(
+                "Invalid value for argument score_mode. "
+                "Expected one of {'dot', 'concat'}. "
+                f"Received: score_mode={score_mode}"
+            )
+
+        self._return_attention_scores = False
+
+    def build(self, input_shape):
+        self._validate_inputs(input_shape)
+        self.scale = None
+        self.concat_score_weight = None
+        if self.use_scale:
+            self.scale = self.add_weight(
+                name="scale",
+                shape=(),
+                initializer="ones",
+                dtype=self.dtype,
+                trainable=True,
+            )
+        if self.score_mode == "concat":
+            self.concat_score_weight = self.add_weight(
+                name="concat_score_weight",
+                shape=(),
+                initializer="ones",
+                dtype=self.dtype,
+                trainable=True,
+            )
+        self.built = True
+
+    def _calculate_scores(self, query, key):
+        """Calculates attention scores as a query-key dot product.
+
+        Args:
+            query: Query tensor of shape `(batch_size, Tq, dim)`.
+            key: Key tensor of shape `(batch_size, Tv, dim)`.
+
+        Returns:
+            Tensor of shape `(batch_size, Tq, Tv)`.
+        """
+        if self.score_mode == "dot":
+            scores = ops.matmul(query, ops.transpose(key, axes=[0, 2, 1]))
+            if self.scale is not None:
+                scores *= self.scale
+        elif self.score_mode == "concat":
+            # Reshape tensors to enable broadcasting.
+            # Reshape into [batch_size, Tq, 1, dim].
+            q_reshaped = ops.expand_dims(query, axis=-2)
+            # Reshape into [batch_size, 1, Tv, dim].
+            k_reshaped = ops.expand_dims(key, axis=-3)
+            if self.scale is not None:
+                scores = self.concat_score_weight * ops.sum(
+                    ops.tanh(self.scale * (q_reshaped + k_reshaped)), axis=-1
+                )
+            else:
+                scores = self.concat_score_weight * ops.sum(
+                    ops.tanh(q_reshaped + k_reshaped), axis=-1
+                )
+        else:
+            raise ValueError("scores not computed")
+
+        return scores
+
+    def _apply_scores(self, scores, value, scores_mask=None, training=False):
+        """Applies attention scores to the given value tensor.
+
+        To use this method in your attention layer, follow the steps:
+
+        * Use `query` tensor of shape `(batch_size, Tq)` and `key` tensor of
+            shape `(batch_size, Tv)` to calculate the attention `scores`.
+        * Pass `scores` and `value` tensors to this method. The method applies
+            `scores_mask`, calculates
+            `attention_distribution = softmax(scores)`, then returns
+            `matmul(attention_distribution, value).
+        * Apply `query_mask` and return the result.
+
+        Args:
+            scores: Scores float tensor of shape `(batch_size, Tq, Tv)`.
+            value: Value tensor of shape `(batch_size, Tv, dim)`.
+            scores_mask: A boolean mask tensor of shape `(batch_size, 1, Tv)`
+                or `(batch_size, Tq, Tv)`. If given, scores at positions where
+                `scores_mask==False` do not contribute to the result. It must
+                contain at least one `True` value in each line along the last
+                dimension.
+            training: Python boolean indicating whether the layer should behave
+                in training mode (adding dropout) or in inference mode
+                (no dropout).
+
+        Returns:
+            Tensor of shape `(batch_size, Tq, dim)`.
+            Attention scores after masking and softmax with shape
+                `(batch_size, Tq, Tv)`.
+        """
+        if scores_mask is not None:
+            padding_mask = ops.logical_not(scores_mask)
+            # Bias so padding positions do not contribute to attention
+            # distribution.  Note 65504. is the max float16 value.
+            max_value = 65504.0 if scores.dtype == "float16" else 1.0e9
+            scores -= max_value * ops.cast(padding_mask, dtype=scores.dtype)
+
+        weights = ops.softmax(scores, axis=-1)
+        if training and self.dropout > 0:
+            weights = backend.random.dropout(
+                weights,
+                self.dropout,
+                seed=self.seed_generator,
+            )
+        return ops.matmul(weights, value), weights
+
+    def _calculate_score_mask(self, scores, v_mask, use_causal_mask):
+        if use_causal_mask:
+            # Creates a lower triangular mask, so position i cannot attend to
+            # positions j > i. This prevents the flow of information from the
+            # future into the past.
+            score_shape = ops.shape(scores)
+            # causal_mask_shape = [1, Tq, Tv].
+            mask_shape = (1, score_shape[-2], score_shape[-1])
+            ones_mask = ops.ones(shape=mask_shape, dtype="int32")
+            row_index = ops.cumsum(ones_mask, axis=-2)
+            col_index = ops.cumsum(ones_mask, axis=-1)
+            causal_mask = ops.greater_equal(row_index, col_index)
+
+            if v_mask is not None:
+                # Mask of shape [batch_size, 1, Tv].
+                v_mask = ops.expand_dims(v_mask, axis=-2)
+                return ops.logical_and(v_mask, causal_mask)
+            return causal_mask
+        else:
+            # If not using causal mask, return the value mask as is,
+            # or None if the value mask is not provided.
+            return v_mask
+
+    def call(
+        self,
+        inputs,
+        mask=None,
+        training=False,
+        return_attention_scores=False,
+        use_causal_mask=False,
+    ):
+        self._validate_inputs(inputs=inputs, mask=mask)
+        self._return_attention_scores = return_attention_scores
+        q = inputs[0]
+        v = inputs[1]
+        k = inputs[2] if len(inputs) > 2 else v
+        q_mask = mask[0] if mask else None
+        v_mask = mask[1] if mask else None
+        scores = self._calculate_scores(query=q, key=k)
+        scores_mask = self._calculate_score_mask(
+            scores, v_mask, use_causal_mask
+        )
+        attention_output, attention_scores = self._apply_scores(
+            scores=scores, value=v, scores_mask=scores_mask, training=training
+        )
+        if q_mask is not None:
+            # Mask of shape [batch_size, Tq, 1].
+            q_mask = ops.expand_dims(q_mask, axis=-1)
+            attention_output *= ops.cast(q_mask, dtype=attention_output.dtype)
+        if return_attention_scores:
+            return (attention_output, attention_scores)
+        else:
+            return attention_output
+
+    def compute_mask(self, inputs, mask=None):
+        self._validate_inputs(inputs=inputs, mask=mask)
+        if mask is None or mask[0] is None:
+            return None
+        return ops.convert_to_tensor(mask[0])
+
+    def compute_output_shape(self, input_shape):
+        query_shape, value_shape, key_shape = input_shape
+        if key_shape is None:
+            key_shape = value_shape
+
+        output_shape = (*query_shape[:-1], value_shape[-1])
+        if self._return_attention_scores:
+            scores_shape = (query_shape[0], query_shape[1], key_shape[1])
+            return output_shape, scores_shape
+        return output_shape
+
+    def compute_output_spec(
+        self,
+        inputs,
+        mask=None,
+        return_attention_scores=False,
+        training=None,
+        use_causal_mask=False,
+    ):
+        # Validate and unpack inputs
+        self._validate_inputs(inputs, mask)
+        query = inputs[0]
+        value = inputs[1]
+        key = inputs[2] if len(inputs) > 2 else value
+
+        # Compute primary output shape
+        output_shape = self.compute_output_shape(
+            [query.shape, value.shape, key.shape]
+        )
+        output_spec = KerasTensor(output_shape, dtype=self.compute_dtype)
+
+        # Handle attention scores if requested
+        if self._return_attention_scores or return_attention_scores:
+            scores_shape = (
+                query.shape[0],
+                query.shape[1],
+                key.shape[1],
+            )  # (batch_size, Tq, Tv)
+            attention_scores_spec = KerasTensor(
+                scores_shape, dtype=self.compute_dtype
+            )
+            return (output_spec, attention_scores_spec)
+
+        return output_spec
+
+    def _validate_inputs(self, inputs, mask=None):
+        """Validates arguments of the call method."""
+        class_name = self.__class__.__name__
+        if not isinstance(inputs, list):
+            raise ValueError(
+                f"{class_name} layer must be called on a list of inputs, "
+                "namely [query, value] or [query, value, key]. "
+                f"Received: inputs={inputs}."
+            )
+        if len(inputs) < 2 or len(inputs) > 3:
+            raise ValueError(
+                f"{class_name} layer accepts inputs list of length 2 or 3, "
+                "namely [query, value] or [query, value, key]. "
+                f"Received length: {len(inputs)}."
+            )
+        if mask is not None:
+            if not isinstance(mask, list):
+                raise ValueError(
+                    f"{class_name} layer mask must be a list, "
+                    f"namely [query_mask, value_mask]. Received: mask={mask}."
+                )
+            if len(mask) < 2 or len(mask) > 3:
+                raise ValueError(
+                    f"{class_name} layer accepts mask list of length 2 or 3. "
+                    f"Received: inputs={inputs}, mask={mask}."
+                )
+
+    def get_config(self):
+        base_config = super().get_config()
+        config = {
+            "use_scale": self.use_scale,
+            "score_mode": self.score_mode,
+            "dropout": self.dropout,
+        }
+        return {**base_config, **config}
diff --git a/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/attention/grouped_query_attention.py b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/attention/grouped_query_attention.py
new file mode 100644
index 0000000000000000000000000000000000000000..6246964679c330bf198f83e2bedd153478bc7940
--- /dev/null
+++ b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/attention/grouped_query_attention.py
@@ -0,0 +1,504 @@
+import math
+
+from keras.src import constraints
+from keras.src import initializers
+from keras.src import ops
+from keras.src import regularizers
+from keras.src.api_export import keras_export
+from keras.src.backend.config import is_flash_attention_enabled
+from keras.src.layers.activations.softmax import Softmax
+from keras.src.layers.core.einsum_dense import EinsumDense
+from keras.src.layers.layer import Layer
+from keras.src.layers.regularization.dropout import Dropout
+
+
+@keras_export("keras.layers.GroupQueryAttention")
+class GroupedQueryAttention(Layer):
+    """Grouped Query Attention layer.
+
+    This is an implementation of grouped-query attention introduced by
+    [Ainslie et al., 2023](https://arxiv.org/abs/2305.13245). Here
+    `num_key_value_heads` denotes number of groups, setting
+    `num_key_value_heads` to 1 is equivalent to multi-query attention, and
+    when `num_key_value_heads` is equal to `num_query_heads` it is equivalent
+    to multi-head attention.
+
+    This layer first projects `query`, `key`, and `value` tensors. Then, `key`
+    and `value` are repeated to match the number of heads of `query`.
+
+    Then, the `query` is scaled and dot-producted with `key` tensors. These are
+    softmaxed to obtain attention probabilities. The value tensors are then
+    interpolated by these probabilities and concatenated back to a single
+    tensor.
+
+    Args:
+        head_dim: Size of each attention head.
+        num_query_heads: Number of query attention heads.
+        num_key_value_heads: Number of key and value attention heads.
+        dropout: Dropout probability.
+        use_bias: Boolean, whether the dense layers use bias vectors/matrices.
+        flash_attention: If `None`, the layer attempts to use flash
+            attention for faster and more memory-efficient attention
+            computations when possible. This behavior can be configured using
+            `keras.config.enable_flash_attention()` or
+            `keras.config.disable_flash_attention()`.
+        kernel_initializer: Initializer for dense layer kernels.
+        bias_initializer: Initializer for dense layer biases.
+        kernel_regularizer: Regularizer for dense layer kernels.
+        bias_regularizer: Regularizer for dense layer biases.
+        activity_regularizer: Regularizer for dense layer activity.
+        kernel_constraint: Constraint for dense layer kernels.
+        bias_constraint: Constraint for dense layer kernels.
+        seed: Optional integer to seed the dropout layer.
+
+    Call arguments:
+        query: Query tensor of shape `(batch_dim, target_seq_len, feature_dim)`,
+            where `batch_dim` is batch size, `target_seq_len` is the length of
+            target sequence, and `feature_dim` is dimension of feature.
+        value: Value tensor of shape `(batch_dim, source_seq_len, feature_dim)`,
+            where `batch_dim` is batch size, `source_seq_len` is the length of
+            source sequence, and `feature_dim` is dimension of feature.
+        key: Optional key tensor of shape
+            `(batch_dim, source_seq_len, feature_dim)`. If not given, will use
+            `value` for both `key` and `value`, which is most common case.
+        attention_mask: A boolean mask of shape
+            `(batch_dim, target_seq_len, source_seq_len)`, that prevents
+            attention to certain positions. The boolean mask specifies which
+            query elements can attend to which key elements, where 1 indicates
+            attention and 0 indicates no attention. Broadcasting can happen for
+            the missing batch dimensions and the head dimension.
+        return_attention_scores: A boolean to indicate whether the output
+            should be `(attention_output, attention_scores)` if `True`, or
+            `attention_output` if `False`. Defaults to `False`.
+        training: Python boolean indicating whether the layer should behave in
+            training mode (adding dropout) or in inference mode (no dropout).
+            Will go with either using the training mode of the parent
+            layer/model or `False` (inference) if there is no parent layer.
+        use_causal_mask: A boolean to indicate whether to apply a causal mask to
+            prevent tokens from attending to future tokens (e.g., used in a
+            decoder Transformer).
+
+    Returns:
+        attention_output: Result of the computation, of shape
+            `(batch_dim, target_seq_len, feature_dim)`, where `target_seq_len`
+            is for target sequence length and `feature_dim` is the query input
+            last dim.
+        attention_scores: (Optional) attention coefficients of shape
+            `(batch_dim, num_query_heads, target_seq_len, source_seq_len)`.
+    """
+
+    def __init__(
+        self,
+        head_dim,
+        num_query_heads,
+        num_key_value_heads,
+        dropout=0.0,
+        use_bias=True,
+        flash_attention=None,
+        kernel_initializer="glorot_uniform",
+        bias_initializer="zeros",
+        kernel_regularizer=None,
+        bias_regularizer=None,
+        activity_regularizer=None,
+        kernel_constraint=None,
+        bias_constraint=None,
+        seed=None,
+        **kwargs,
+    ):
+        super().__init__(**kwargs)
+        self.supports_masking = True
+        self.head_dim = head_dim
+        self.num_query_heads = num_query_heads
+        self.num_key_value_heads = num_key_value_heads
+        if num_query_heads % num_key_value_heads != 0:
+            raise ValueError(
+                "`num_query_heads` must be divisible"
+                " by `num_key_value_heads`."
+            )
+        self.num_repeats = num_query_heads // num_key_value_heads
+        self.dropout = dropout
+        self.use_bias = use_bias
+        self._flash_attention = flash_attention or is_flash_attention_enabled()
+        self.kernel_initializer = initializers.get(kernel_initializer)
+        self.bias_initializer = initializers.get(bias_initializer)
+        self.kernel_regularizer = regularizers.get(kernel_regularizer)
+        self.bias_regularizer = regularizers.get(bias_regularizer)
+        self.activity_regularizer = regularizers.get(activity_regularizer)
+        self.kernel_constraint = constraints.get(kernel_constraint)
+        self.bias_constraint = constraints.get(bias_constraint)
+        self.seed = seed
+
+        self._inverse_sqrt_head_dim = 1.0 / math.sqrt(float(self.head_dim))
+        self._return_attention_scores = False
+
+        # Check for flash attention constraints
+        if self._flash_attention and self.dropout > 0.0:
+            raise ValueError(
+                "Dropout is not supported when flash attention is enabled. "
+                "Please set dropout to 0.0 to use flash attention."
+            )
+
+    def build(
+        self,
+        query_shape,
+        value_shape,
+        key_shape=None,
+    ):
+        # Einsum variables:
+        # b = batch size
+        # q = query length
+        # k = key/value length
+        # m = model dim
+        # u = num query heads
+        # v = num key/value heads
+        # h = head dim
+        key_shape = value_shape if key_shape is None else key_shape
+        self.feature_dim = query_shape[-1]
+        self._query_dense = EinsumDense(
+            "bqm,muh->bquh",
+            output_shape=(None, self.num_query_heads, self.head_dim),
+            bias_axes="uh" if self.use_bias else None,
+            name="query",
+            **self._get_common_kwargs_for_sublayer(),
+        )
+        self._query_dense.build(query_shape)
+
+        self._key_dense = EinsumDense(
+            "bkm,mvh->bkvh",
+            output_shape=(None, self.num_key_value_heads, self.head_dim),
+            bias_axes="vh" if self.use_bias else None,
+            name="key",
+            **self._get_common_kwargs_for_sublayer(),
+        )
+        self._key_dense.build(key_shape)
+
+        self._value_dense = EinsumDense(
+            "bkm,mvh->bkvh",
+            output_shape=(None, self.num_key_value_heads, self.head_dim),
+            bias_axes="vh" if self.use_bias else None,
+            name="value",
+            **self._get_common_kwargs_for_sublayer(),
+        )
+        self._value_dense.build(value_shape)
+
+        self._softmax = Softmax(axis=-1, dtype=self.dtype_policy)
+        self._dropout_layer = Dropout(
+            rate=self.dropout, dtype=self.dtype_policy, seed=self.seed
+        )
+
+        self._dot_product_equation = "bquh,bkuh->buqk"
+        self._combine_equation = "buqk,bkuh->bquh"
+
+        self._output_dense = EinsumDense(
+            "bquh,uhm->bqm",
+            output_shape=(None, self.feature_dim),
+            bias_axes="m" if self.use_bias else None,
+            name="attention_output",
+            **self._get_common_kwargs_for_sublayer(),
+        )
+        self._output_dense.build(
+            (None, None, self.num_query_heads, self.head_dim)
+        )
+        self.built = True
+
+    def _get_common_kwargs_for_sublayer(self):
+        common_kwargs = dict(
+            kernel_regularizer=self.kernel_regularizer,
+            bias_regularizer=self.bias_regularizer,
+            activity_regularizer=self.activity_regularizer,
+            kernel_constraint=self.kernel_constraint,
+            bias_constraint=self.bias_constraint,
+            dtype=self.dtype_policy,
+        )
+        # Create new clone of kernel/bias initializer, so that we don't reuse
+        # the initializer instance, which could lead to same init value since
+        # initializer is stateless.
+        kernel_initializer = self.kernel_initializer.__class__.from_config(
+            self.kernel_initializer.get_config()
+        )
+        bias_initializer = self.bias_initializer.__class__.from_config(
+            self.bias_initializer.get_config()
+        )
+        common_kwargs["kernel_initializer"] = kernel_initializer
+        common_kwargs["bias_initializer"] = bias_initializer
+        return common_kwargs
+
+    def call(
+        self,
+        query,
+        value,
+        key=None,
+        query_mask=None,
+        value_mask=None,
+        key_mask=None,
+        attention_mask=None,
+        return_attention_scores=False,
+        training=None,
+        use_causal_mask=False,
+    ):
+        self._return_attention_scores = return_attention_scores
+        if key is None:
+            key = value
+
+        attention_mask = self._compute_attention_mask(
+            query,
+            value,
+            query_mask=query_mask,
+            value_mask=value_mask,
+            key_mask=key_mask,
+            attention_mask=attention_mask,
+            use_causal_mask=use_causal_mask,
+        )
+
+        query = self._query_dense(query)
+        key = self._key_dense(key)
+        value = self._value_dense(value)
+
+        key = ops.repeat(
+            key, self.num_repeats, axis=2
+        )  # (batch_dim, source_seq_len, query_heads, head_dim)
+        value = ops.repeat(
+            value, self.num_repeats, axis=2
+        )  # (batch_dim, source_seq_len, query_heads, head_dim)
+
+        output, scores = self._compute_attention(
+            query,
+            key,
+            value,
+            attention_mask=attention_mask,
+            training=training,
+        )
+
+        output = self._output_dense(
+            output
+        )  # (batch_dim, target_seq_len, feature_dim)
+
+        if return_attention_scores:
+            return output, scores
+        return output
+
+    def _compute_attention_mask(
+        self,
+        query,
+        value,
+        query_mask=None,
+        value_mask=None,
+        key_mask=None,
+        attention_mask=None,
+        use_causal_mask=False,
+    ):
+        """Computes the attention mask, using the Keras masks of the inputs.
+
+        * The `query`'s mask is reshaped from [B, T] to [B, T, 1].
+        * The `value`'s mask is reshaped from [B, S] to [B, 1, S].
+        * The `key`'s mask is reshaped from [B, S] to [B, 1, S]. The `key`'s
+          mask is ignored if `key` is `None` or if `key is value`.
+        * If `use_causal_mask=True`, then the causal mask is computed. Its shape
+          is [1, T, S].
+
+        All defined masks are merged using a logical AND operation (`&`).
+
+        In general, if the `query` and `value` are masked, then there is no need
+        to define the `attention_mask`.
+
+        Args:
+            query: Projected query tensor of shape `(B, T, N, key_dim)`.
+            key: Projected key tensor of shape `(B, T, N, key_dim)`.
+            value: Projected value tensor of shape `(B, T, N, value_dim)`.
+            attention_mask: a boolean mask of shape `(B, T, S)`, that prevents
+                attention to certain positions.
+            use_causal_mask: A boolean to indicate whether to apply a causal
+                mask to prevent tokens from attending to future tokens (e.g.,
+                used in a decoder Transformer).
+
+        Returns:
+            attention_mask: a boolean mask of shape `(B, T, S)`, that prevents
+                attention to certain positions, based on the Keras masks of the
+                `query`, `key`, `value`, and `attention_mask` tensors, and the
+                causal mask if `use_causal_mask=True`.
+        """
+        auto_mask = None
+        if query_mask is not None:
+            query_mask = ops.cast(query_mask, "bool")  # defensive casting
+            # B = batch size, T = max query length
+            auto_mask = ops.expand_dims(query_mask, -1)  # shape is [B, T, 1]
+        if value_mask is not None:
+            value_mask = ops.cast(value_mask, "bool")  # defensive casting
+            # B = batch size, S == max value length
+            mask = ops.expand_dims(value_mask, -2)  # shape is [B, 1, S]
+            auto_mask = mask if auto_mask is None else auto_mask & mask
+        if key_mask is not None:
+            key_mask = ops.cast(key_mask, "bool")  # defensive casting
+            # B == batch size, S == max key length == max value length
+            mask = ops.expand_dims(key_mask, -2)  # shape is [B, 1, S]
+            auto_mask = mask if auto_mask is None else auto_mask & mask
+        if use_causal_mask:
+            # the shape of the causal mask is [1, T, S]
+            mask = self._compute_causal_mask(query, value)
+            auto_mask = mask if auto_mask is None else auto_mask & mask
+        if auto_mask is not None:
+            # merge attention_mask & automatic mask, to shape [B, T, S]
+            attention_mask = (
+                auto_mask
+                if attention_mask is None
+                else ops.cast(attention_mask, bool) & auto_mask
+            )
+        return attention_mask
+
+    def _compute_causal_mask(self, query, value=None):
+        """Computes a causal mask (e.g., for masked self-attention layers).
+
+        For example, if query and value both contain sequences of length 4,
+        this function returns a boolean tensor equal to:
+
+        ```
+        [[[True,  False, False, False],
+          [True,  True,  False, False],
+          [True,  True,  True,  False],
+          [True,  True,  True,  True]]]
+        ```
+
+        Args:
+            query: query tensor of shape `(B, T, ...)`.
+            value: value tensor of shape `(B, S, ...)` (optional, defaults to
+                query).
+
+        Returns:
+            mask: a boolean tensor of shape `(1, T, S)` containing a lower
+                triangular matrix of shape `(T, S)`.
+        """
+        q_seq_length = ops.shape(query)[1]
+        v_seq_length = q_seq_length if value is None else ops.shape(value)[1]
+        ones_mask = ops.ones((1, q_seq_length, v_seq_length), dtype="int32")
+        row_index = ops.cumsum(ones_mask, axis=-2)
+        col_index = ops.cumsum(ones_mask, axis=-1)
+        return ops.greater_equal(row_index, col_index)
+
+    def _compute_attention(
+        self, query, key, value, attention_mask=None, training=None
+    ):
+        # Check for flash attention constraints
+        if self._flash_attention and self._return_attention_scores:
+            raise ValueError(
+                "Returning attention scores is not supported when flash "
+                "attention is enabled. Please disable flash attention to access"
+                " attention scores."
+            )
+
+        # Determine whether to use dot-product attention
+        use_dot_product_attention = not (
+            self.dropout > 0.0
+            or self._return_attention_scores
+            or (len(query.shape) != 4)
+        )
+
+        if use_dot_product_attention:
+            if attention_mask is not None:
+                # Ensure attention_mask has the correct shape for broadcasting
+                # Expected shape: [batch_size, num_heads, query_seq_len,
+                # key_seq_len].
+                mask_expansion_axis = -1 * 2 - 1
+                len_attention_scores_shape = 4  # Only accepts 4D inputs
+                for _ in range(
+                    len_attention_scores_shape - len(attention_mask.shape)
+                ):
+                    attention_mask = ops.expand_dims(
+                        attention_mask, axis=mask_expansion_axis
+                    )
+                attention_mask = ops.cast(attention_mask, dtype="bool")
+            # Directly compute the attention output using dot-product attention
+            attention_output = ops.dot_product_attention(
+                query=query,
+                key=key,
+                value=value,
+                bias=None,
+                mask=attention_mask,
+                scale=self._inverse_sqrt_head_dim,
+                is_causal=False,
+                flash_attention=self._flash_attention,
+            )
+            return attention_output, None
+
+        # Default behavior without flash attention, with explicit attention
+        # scores
+        query = ops.multiply(
+            query, ops.cast(self._inverse_sqrt_head_dim, query.dtype)
+        )
+        # Take the dot product between "query" and "key" to get the raw
+        # attention scores.
+        scores = ops.einsum(
+            self._dot_product_equation, query, key
+        )  # (batch_dim, query_heads, target_seq_len, source_seq_len)
+        scores = self._masked_softmax(scores, attention_mask=attention_mask)
+        # This is actually dropping out entire tokens to attend to, which might
+        # seem a bit unusual, but is taken from the original Transformer paper.
+        if self.dropout > 0.0:
+            scores_dropout = self._dropout_layer(scores, training=training)
+        else:
+            scores_dropout = scores
+        output = ops.einsum(self._combine_equation, scores_dropout, value)
+        return output, scores
+
+    def _masked_softmax(self, scores, attention_mask=None):
+        # Normalize the attention scores to probabilities.
+        # scores = [B, N, T, S]
+        if attention_mask is not None:
+            # The expand dim happens starting from the `num_heads` dimension,
+            # (<batch_dims>, num_heads, <query_attention_dims,
+            # key_attention_dims>)
+            mask_expansion_axis = -1 * 2 - 1
+            for _ in range(len(scores.shape) - len(attention_mask.shape)):
+                attention_mask = ops.expand_dims(
+                    attention_mask, axis=mask_expansion_axis
+                )
+        return self._softmax(scores, mask=attention_mask)
+
+    def compute_output_shape(
+        self,
+        query_shape,
+        value_shape,
+        key_shape=None,
+    ):
+        if key_shape is None:
+            key_shape = value_shape
+
+        if query_shape[-1] != value_shape[-1]:
+            raise ValueError(
+                "The last dimension of `query_shape` and `value_shape` "
+                f"must be equal, but are {query_shape[-1]}, {value_shape[-1]}. "
+                "Received: query_shape={query_shape}, value_shape={value_shape}"
+            )
+
+        if value_shape[1:-1] != key_shape[1:-1]:
+            raise ValueError(
+                "All dimensions of `value` and `key`, except the last one, "
+                f"must be equal. Received: value_shape={value_shape} and "
+                f"key_shape={key_shape}"
+            )
+
+        return query_shape
+
+    def get_config(self):
+        config = {
+            "head_dim": self.head_dim,
+            "num_query_heads": self.num_query_heads,
+            "num_key_value_heads": self.num_key_value_heads,
+            "use_bias": self.use_bias,
+            "dropout": self.dropout,
+            "kernel_initializer": initializers.serialize(
+                self.kernel_initializer
+            ),
+            "bias_initializer": initializers.serialize(self.bias_initializer),
+            "kernel_regularizer": regularizers.serialize(
+                self.kernel_regularizer
+            ),
+            "bias_regularizer": regularizers.serialize(self.bias_regularizer),
+            "activity_regularizer": regularizers.serialize(
+                self.activity_regularizer
+            ),
+            "kernel_constraint": constraints.serialize(self.kernel_constraint),
+            "bias_constraint": constraints.serialize(self.bias_constraint),
+            "seed": self.seed,
+        }
+        base_config = super().get_config()
+        return {**base_config, **config}
diff --git a/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/attention/multi_head_attention.py b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/attention/multi_head_attention.py
new file mode 100644
index 0000000000000000000000000000000000000000..ad4d55d3a14ba9fedf97d0a2104b8ad1cabbe763
--- /dev/null
+++ b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/attention/multi_head_attention.py
@@ -0,0 +1,827 @@
+import math
+import string
+
+import numpy as np
+
+from keras.src import backend
+from keras.src import constraints
+from keras.src import initializers
+from keras.src import ops
+from keras.src import regularizers
+from keras.src.api_export import keras_export
+from keras.src.backend.config import is_flash_attention_enabled
+from keras.src.layers.activations.softmax import Softmax
+from keras.src.layers.core.einsum_dense import EinsumDense
+from keras.src.layers.layer import Layer
+from keras.src.layers.regularization.dropout import Dropout
+
+
+@keras_export("keras.layers.MultiHeadAttention")
+class MultiHeadAttention(Layer):
+    """MultiHeadAttention layer.
+
+    This is an implementation of multi-headed attention as described in the
+    paper "Attention is all you Need"
+    [Vaswani et al., 2017](https://arxiv.org/abs/1706.03762).
+    If `query`, `key,` `value` are the same, then
+    this is self-attention. Each timestep in `query` attends to the
+    corresponding sequence in `key`, and returns a fixed-width vector.
+
+    This layer first projects `query`, `key` and `value`. These are
+    (effectively) a list of tensors of length `num_attention_heads`, where the
+    corresponding shapes are `(batch_size, <query dimensions>, key_dim)`,
+    `(batch_size, <key/value dimensions>, key_dim)`,
+    `(batch_size, <key/value dimensions>, value_dim)`.
+
+    Then, the query and key tensors are dot-producted and scaled. These are
+    softmaxed to obtain attention probabilities. The value tensors are then
+    interpolated by these probabilities, then concatenated back to a single
+    tensor.
+
+    Finally, the result tensor with the last dimension as `value_dim` can take
+    a linear projection and return.
+
+    Args:
+        num_heads: Number of attention heads.
+        key_dim: Size of each attention head for query and key.
+        value_dim: Size of each attention head for value.
+        dropout: Dropout probability.
+        use_bias: Boolean, whether the dense layers use bias vectors/matrices.
+        output_shape: The expected shape of an output tensor, besides the batch
+            and sequence dims. If not specified, projects back to the query
+            feature dim (the query input's last dimension).
+        attention_axes: axes over which the attention is applied. `None` means
+            attention over all axes, but batch, heads, and features.
+        flash_attention: If `None`, the layer attempts to use flash
+            attention for faster and more memory-efficient attention
+            computations when possible. This behavior can be configured using
+            `keras.config.enable_flash_attention()` or
+            `keras.config.disable_flash_attention()`.
+        kernel_initializer: Initializer for dense layer kernels.
+        bias_initializer: Initializer for dense layer biases.
+        kernel_regularizer: Regularizer for dense layer kernels.
+        bias_regularizer: Regularizer for dense layer biases.
+        activity_regularizer: Regularizer for dense layer activity.
+        kernel_constraint: Constraint for dense layer kernels.
+        bias_constraint: Constraint for dense layer kernels.
+        seed: Optional integer to seed the dropout layer.
+
+    Call arguments:
+        query: Query tensor of shape `(B, T, dim)`, where `B` is the batch size,
+            `T` is the target sequence length, and dim is the feature dimension.
+        value: Value tensor of shape `(B, S, dim)`, where `B` is the batch size,
+            `S` is the source sequence length, and dim is the feature dimension.
+        key: Optional key tensor of shape `(B, S, dim)`. If not given, will
+            use `value` for both `key` and `value`, which is the most common
+            case.
+        attention_mask: a boolean mask of shape `(B, T, S)`, that prevents
+            attention to certain positions. The boolean mask specifies which
+            query elements can attend to which key elements, 1 indicates
+            attention and 0 indicates no attention. Broadcasting can happen for
+            the missing batch dimensions and the head dimension.
+        return_attention_scores: A boolean to indicate whether the output should
+            be `(attention_output, attention_scores)` if `True`, or
+            `attention_output` if `False`. Defaults to `False`.
+        training: Python boolean indicating whether the layer should behave in
+            training mode (adding dropout) or in inference mode (no dropout).
+            Will go with either using the training mode of the parent
+            layer/model, or `False` (inference) if there is no parent layer.
+        use_causal_mask: A boolean to indicate whether to apply a causal mask to
+            prevent tokens from attending to future tokens (e.g., used in a
+            decoder Transformer).
+
+    Returns:
+        attention_output: The result of the computation, of shape `(B, T, E)`,
+            where `T` is for target sequence shapes and `E` is the query input
+            last dimension if `output_shape` is `None`. Otherwise, the
+            multi-head outputs are projected to the shape specified by
+            `output_shape`.
+        attention_scores: (Optional) multi-head attention coefficients over
+            attention axes.
+    """
+
+    def __init__(
+        self,
+        num_heads,
+        key_dim,
+        value_dim=None,
+        dropout=0.0,
+        use_bias=True,
+        output_shape=None,
+        attention_axes=None,
+        flash_attention=None,
+        kernel_initializer="glorot_uniform",
+        bias_initializer="zeros",
+        kernel_regularizer=None,
+        bias_regularizer=None,
+        activity_regularizer=None,
+        kernel_constraint=None,
+        bias_constraint=None,
+        seed=None,
+        **kwargs,
+    ):
+        super().__init__(**kwargs)
+        self.supports_masking = True
+        self._num_heads = num_heads
+        self._key_dim = key_dim
+        self._value_dim = value_dim if value_dim else key_dim
+        self._dropout = dropout
+        self._use_bias = use_bias
+        if output_shape:
+            if isinstance(output_shape, int):
+                output_shape = (output_shape,)
+            try:
+                output_shape = tuple(output_shape)
+            except:
+                raise ValueError(
+                    f"Invalid `output_shape`: {output_shape}. When "
+                    "specified, the `output_shape` should be of type tuple, "
+                    "list, or int."
+                )
+        self._output_shape = output_shape
+        self._flash_attention = flash_attention or is_flash_attention_enabled()
+        self._kernel_initializer = initializers.get(kernel_initializer)
+        self._bias_initializer = initializers.get(bias_initializer)
+        self._kernel_regularizer = regularizers.get(kernel_regularizer)
+        self._bias_regularizer = regularizers.get(bias_regularizer)
+        self._activity_regularizer = regularizers.get(activity_regularizer)
+        self._kernel_constraint = constraints.get(kernel_constraint)
+        self._bias_constraint = constraints.get(bias_constraint)
+        if isinstance(attention_axes, int):
+            attention_axes = (attention_axes,)
+        elif attention_axes and not isinstance(attention_axes, (list, tuple)):
+            raise ValueError(
+                "`attention_axes` must be an int, list, or tuple."
+                f"Received: attention_axes={attention_axes}"
+            )
+        self._attention_axes = attention_axes
+        self.seed = seed
+
+        self._inverse_sqrt_key_dim = 1.0 / math.sqrt(float(self._key_dim))
+        self._return_attention_scores = False
+
+        # Check for flash attention constraints
+        if self._flash_attention and self._dropout > 0.0:
+            raise ValueError(
+                "Dropout is not supported when flash attention is enabled. "
+                "Please set dropout to 0.0 to use flash attention."
+            )
+
+    @property
+    def num_heads(self):
+        return self._num_heads
+
+    @property
+    def key_dim(self):
+        return self._key_dim
+
+    @property
+    def value_dim(self):
+        return self._value_dim
+
+    @property
+    def dropout(self):
+        return self._dropout
+
+    @property
+    def use_bias(self):
+        return self._use_bias
+
+    # Avoid exposing `output_shape` as it may conflict with `Functional` and
+    # `Sequential` models when calling `summary()`.
+
+    @property
+    def attention_axes(self):
+        return self._attention_axes
+
+    def get_config(self):
+        base_config = super().get_config()
+        config = {
+            "num_heads": self._num_heads,
+            "key_dim": self._key_dim,
+            "value_dim": self._value_dim,
+            "dropout": self._dropout,
+            "use_bias": self._use_bias,
+            "output_shape": self._output_shape,
+            "attention_axes": self._attention_axes,
+            "kernel_initializer": initializers.serialize(
+                self._kernel_initializer
+            ),
+            "bias_initializer": initializers.serialize(self._bias_initializer),
+            "kernel_regularizer": regularizers.serialize(
+                self._kernel_regularizer
+            ),
+            "bias_regularizer": regularizers.serialize(self._bias_regularizer),
+            "activity_regularizer": regularizers.serialize(
+                self._activity_regularizer
+            ),
+            "kernel_constraint": constraints.serialize(self._kernel_constraint),
+            "bias_constraint": constraints.serialize(self._bias_constraint),
+            "seed": self.seed,
+        }
+        return {**base_config, **config}
+
+    def build(
+        self,
+        query_shape,
+        value_shape,
+        key_shape=None,
+    ):
+        """Builds layers and variables.
+
+        Args:
+            query_shape: Shape of the `query` tensor.
+            value_shape: Shape of the `value` tensor.
+            key: Optional shape of the `key` tensor.
+        """
+        key_shape = value_shape if key_shape is None else key_shape
+
+        if value_shape[1:-1] != key_shape[1:-1]:
+            raise ValueError(
+                "All dimensions of `value` and `key`, except the last one, "
+                f"must be equal. Received: value_shape={value_shape} and "
+                f"key_shape={key_shape}"
+            )
+
+        query_rank = len(query_shape)
+        value_rank = len(value_shape)
+        key_rank = len(key_shape)
+        einsum_equation, bias_axes, output_rank = _build_proj_equation(
+            query_rank - 1, bound_dims=1, output_dims=2
+        )
+        self._query_dense = EinsumDense(
+            einsum_equation,
+            output_shape=_get_output_shape(
+                output_rank - 1, [self._num_heads, self._key_dim]
+            ),
+            bias_axes=bias_axes if self._use_bias else None,
+            name="query",
+            **self._get_common_kwargs_for_sublayer(),
+        )
+        self._query_dense.build(query_shape)
+        einsum_equation, bias_axes, output_rank = _build_proj_equation(
+            key_rank - 1, bound_dims=1, output_dims=2
+        )
+        self._key_dense = EinsumDense(
+            einsum_equation,
+            output_shape=_get_output_shape(
+                output_rank - 1, [self._num_heads, self._key_dim]
+            ),
+            bias_axes=bias_axes if self._use_bias else None,
+            name="key",
+            **self._get_common_kwargs_for_sublayer(),
+        )
+        self._key_dense.build(key_shape)
+        einsum_equation, bias_axes, output_rank = _build_proj_equation(
+            value_rank - 1, bound_dims=1, output_dims=2
+        )
+        self._value_dense = EinsumDense(
+            einsum_equation,
+            output_shape=_get_output_shape(
+                output_rank - 1, [self._num_heads, self._value_dim]
+            ),
+            bias_axes=bias_axes if self._use_bias else None,
+            name="value",
+            **self._get_common_kwargs_for_sublayer(),
+        )
+        self._value_dense.build(value_shape)
+
+        # Builds the attention computations for multi-head dot product
+        # attention.  These computations could be wrapped into the keras
+        # attention layer once it supports multi-head einsum computations.
+        self._build_attention(output_rank)
+        self._output_dense = self._make_output_dense(
+            query_shape,
+            self._get_common_kwargs_for_sublayer(),
+            "attention_output",
+        )
+        output_dense_input_shape = list(
+            self._query_dense.compute_output_shape(query_shape)
+        )
+        output_dense_input_shape[-1] = self._value_dim
+        self._output_dense.build(tuple(output_dense_input_shape))
+        self.built = True
+
+    @property
+    def query_dense(self):
+        return self._query_dense
+
+    @property
+    def key_dense(self):
+        return self._key_dense
+
+    @property
+    def value_dense(self):
+        return self._value_dense
+
+    @property
+    def output_dense(self):
+        return self._output_dense
+
+    def _get_common_kwargs_for_sublayer(self):
+        common_kwargs = dict(
+            kernel_regularizer=self._kernel_regularizer,
+            bias_regularizer=self._bias_regularizer,
+            activity_regularizer=self._activity_regularizer,
+            kernel_constraint=self._kernel_constraint,
+            bias_constraint=self._bias_constraint,
+            dtype=self.dtype_policy,
+        )
+        # Create new clone of kernel/bias initializer, so that we don't reuse
+        # the initializer instance, which could lead to same init value since
+        # initializer is stateless.
+        kernel_initializer = self._kernel_initializer.__class__.from_config(
+            self._kernel_initializer.get_config()
+        )
+        bias_initializer = self._bias_initializer.__class__.from_config(
+            self._bias_initializer.get_config()
+        )
+        common_kwargs["kernel_initializer"] = kernel_initializer
+        common_kwargs["bias_initializer"] = bias_initializer
+        return common_kwargs
+
+    def _make_output_dense(self, query_shape, common_kwargs, name=None):
+        """Builds the output projection matrix.
+
+        Args:
+            free_dims: Number of free dimensions for einsum equation building.
+            common_kwargs: Common keyword arguments for einsum layer.
+            name: Name for the projection layer.
+
+        Returns:
+            Projection layer.
+        """
+        query_rank = len(query_shape)
+        if self._output_shape:
+            output_shape = self._output_shape
+        else:
+            output_shape = [query_shape[-1]]
+        einsum_equation, bias_axes, output_rank = _build_proj_equation(
+            query_rank - 1, bound_dims=2, output_dims=len(output_shape)
+        )
+        return EinsumDense(
+            einsum_equation,
+            output_shape=_get_output_shape(output_rank - 1, output_shape),
+            bias_axes=bias_axes if self._use_bias else None,
+            name=name,
+            **common_kwargs,
+        )
+
+    def _build_attention(self, rank):
+        """Builds multi-head dot-product attention computations.
+
+        This function builds attributes necessary for `_compute_attention` to
+        customize attention computation to replace the default dot-product
+        attention.
+
+        Args:
+            rank: the rank of query, key, value tensors.
+        """
+        if self._attention_axes is None:
+            self._attention_axes = tuple(range(1, rank - 2))
+        else:
+            self._attention_axes = tuple(self._attention_axes)
+        (
+            self._dot_product_equation,
+            self._combine_equation,
+            attn_scores_rank,
+        ) = _build_attention_equation(rank, attn_axes=self._attention_axes)
+        norm_axes = tuple(
+            range(
+                attn_scores_rank - len(self._attention_axes), attn_scores_rank
+            )
+        )
+        self._softmax = Softmax(axis=norm_axes, dtype=self.dtype_policy)
+        self._dropout_layer = Dropout(
+            rate=self._dropout, dtype=self.dtype_policy, seed=self.seed
+        )
+
+    def _masked_softmax(self, attention_scores, attention_mask=None):
+        # Normalize the attention scores to probabilities.
+        # attention_scores = [B, N, T, S]
+        if attention_mask is not None:
+            # The expand dim happens starting from the `num_heads` dimension,
+            # (<batch_dims>, num_heads, <query_attention_dims,
+            # key_attention_dims>)
+            mask_expansion_axis = -len(self._attention_axes) * 2 - 1
+            for _ in range(
+                len(attention_scores.shape) - len(attention_mask.shape)
+            ):
+                attention_mask = ops.expand_dims(
+                    attention_mask, axis=mask_expansion_axis
+                )
+        return self._softmax(attention_scores, mask=attention_mask)
+
+    def _compute_attention(
+        self,
+        query,
+        key,
+        value,
+        attention_mask=None,
+        training=None,
+    ):
+        """Applies Dot-product attention with query, key, value tensors.
+
+        This function defines the computation inside `call` with projected
+        multi-head Q, K, V inputs. Users can override this function for
+        customized attention implementation.
+
+        Args:
+            query: Projected query tensor of shape `(B, T, N, key_dim)`.
+            key: Projected key tensor of shape `(B, S, N, key_dim)`.
+            value: Projected value tensor of shape `(B, S, N, value_dim)`.
+            attention_mask: a boolean mask of shape `(B, T, S)`, that prevents
+                attention to certain positions. It is generally not needed if
+                the `query` and `value` (and/or `key`) are masked.
+            training: Python boolean indicating whether the layer should behave
+                in training mode (adding dropout) or in inference mode (doing
+                nothing).
+
+        Returns:
+          attention_output: Multi-headed outputs of attention computation.
+          attention_scores: Multi-headed attention weights.
+        """
+        # Check for flash attention constraints
+        if self._flash_attention and self._return_attention_scores:
+            raise ValueError(
+                "Returning attention scores is not supported when flash "
+                "attention is enabled. Please disable flash attention to access"
+                " attention scores."
+            )
+
+        # Determine whether to use dot-product attention
+        use_dot_product_attention = not (
+            self._dropout > 0.0
+            or self._return_attention_scores
+            or (len(query.shape) != 4)
+        )
+
+        if use_dot_product_attention:
+            if attention_mask is not None:
+                # Ensure attention_mask has the correct shape for broadcasting
+                # Expected shape: [batch_size, num_heads, query_seq_len,
+                # key_seq_len].
+                mask_expansion_axis = -len(self._attention_axes) * 2 - 1
+                len_attention_scores_shape = 4  # Only accepts 4D inputs
+                for _ in range(
+                    len_attention_scores_shape - len(attention_mask.shape)
+                ):
+                    attention_mask = ops.expand_dims(
+                        attention_mask, axis=mask_expansion_axis
+                    )
+                attention_mask = ops.cast(attention_mask, dtype="bool")
+            # Directly compute the attention output using dot-product attention
+            attention_output = ops.dot_product_attention(
+                query=query,
+                key=key,
+                value=value,
+                bias=None,
+                mask=attention_mask,
+                scale=self._inverse_sqrt_key_dim,
+                is_causal=False,
+                flash_attention=self._flash_attention,
+            )
+            return attention_output, None
+
+        # Default behavior without flash attention, with explicit attention
+        # scores
+        query = ops.multiply(
+            query, ops.cast(self._inverse_sqrt_key_dim, query.dtype)
+        )
+
+        # Take the dot product between "query" and "key" to get the raw
+        # attention scores.
+        attention_scores = ops.einsum(self._dot_product_equation, key, query)
+
+        # Apply the mask using the custom masked softmax
+        attention_scores = self._masked_softmax(
+            attention_scores, attention_mask
+        )
+
+        # Apply dropout to the attention scores if needed
+        if self._dropout > 0.0:
+            final_attn_scores = self._dropout_layer(
+                attention_scores, training=training
+            )
+        else:
+            final_attn_scores = attention_scores
+
+        # `context_layer` = [B, T, N, H]
+        attention_output = ops.einsum(
+            self._combine_equation, final_attn_scores, value
+        )
+        return attention_output, attention_scores
+
+    def call(
+        self,
+        query,
+        value,
+        key=None,
+        query_mask=None,
+        value_mask=None,
+        key_mask=None,
+        attention_mask=None,
+        return_attention_scores=False,
+        training=None,
+        use_causal_mask=False,
+    ):
+        self._return_attention_scores = return_attention_scores
+        if key is None:
+            key = value
+
+        # Delete the masks because the masks are handled at the level of the
+        # layer
+        query_mask = backend.get_keras_mask(query)
+        backend.set_keras_mask(query, None)
+        backend.set_keras_mask(value, None)
+        backend.set_keras_mask(key, None)
+
+        attention_mask = self._compute_attention_mask(
+            query,
+            value,
+            query_mask=query_mask,
+            value_mask=value_mask,
+            key_mask=key_mask,
+            attention_mask=attention_mask,
+            use_causal_mask=use_causal_mask,
+        )
+        #   N = `num_attention_heads`
+        #   H = `size_per_head`
+
+        # `query` = [B, T, N, H]
+        query = self._query_dense(query)
+
+        # `key` = [B, S, N, H]
+        key = self._key_dense(key)
+
+        # `value` = [B, S, N, H]
+        value = self._value_dense(value)
+        attention_output, attention_scores = self._compute_attention(
+            query,
+            key,
+            value,
+            attention_mask,
+            training,
+        )
+        attention_output = self._output_dense(attention_output)
+
+        # Set mask on output if needed
+        if query_mask is not None:
+            backend.set_keras_mask(attention_output, query_mask)
+
+        if return_attention_scores:
+            return attention_output, attention_scores
+        return attention_output
+
+    def _compute_attention_mask(
+        self,
+        query,
+        value,
+        query_mask=None,
+        value_mask=None,
+        key_mask=None,
+        attention_mask=None,
+        use_causal_mask=False,
+    ):
+        """Computes the attention mask, using the Keras masks of the inputs.
+
+        * The `query`'s mask is reshaped from [B, T] to [B, T, 1].
+        * The `value`'s mask is reshaped from [B, S] to [B, 1, S].
+        * The `key`'s mask is reshaped from [B, S] to [B, 1, S]. The `key`'s
+          mask is ignored if `key` is `None` or if `key is value`.
+        * If `use_causal_mask=True`, then the causal mask is computed. Its shape
+          is [1, T, S].
+
+        All defined masks are merged using a logical AND operation (`&`).
+
+        In general, if the `query` and `value` are masked, then there is no need
+        to define the `attention_mask`.
+
+        Args:
+            query: Projected query tensor of shape `(B, T, N, key_dim)`.
+            key: Projected key tensor of shape `(B, T, N, key_dim)`.
+            value: Projected value tensor of shape `(B, T, N, value_dim)`.
+            attention_mask: a boolean mask of shape `(B, T, S)`, that prevents
+                attention to certain positions.
+            use_causal_mask: A boolean to indicate whether to apply a causal
+                mask to prevent tokens from attending to future tokens (e.g.,
+                used in a decoder Transformer).
+
+        Returns:
+            attention_mask: a boolean mask of shape `(B, T, S)`, that prevents
+                attention to certain positions, based on the Keras masks of the
+                `query`, `key`, `value`, and `attention_mask` tensors, and the
+                causal mask if `use_causal_mask=True`.
+        """
+        auto_mask = None
+        if query_mask is not None:
+            query_mask = ops.cast(query_mask, "bool")  # defensive casting
+            # B = batch size, T = max query length
+            auto_mask = ops.expand_dims(query_mask, -1)  # shape is [B, T, 1]
+        if value_mask is not None:
+            value_mask = ops.cast(value_mask, "bool")  # defensive casting
+            # B = batch size, S == max value length
+            mask = ops.expand_dims(value_mask, -2)  # shape is [B, 1, S]
+            auto_mask = mask if auto_mask is None else auto_mask & mask
+        if key_mask is not None:
+            key_mask = ops.cast(key_mask, "bool")  # defensive casting
+            # B == batch size, S == max key length == max value length
+            mask = ops.expand_dims(key_mask, -2)  # shape is [B, 1, S]
+            auto_mask = mask if auto_mask is None else auto_mask & mask
+        if use_causal_mask:
+            # the shape of the causal mask is [1, T, S]
+            mask = self._compute_causal_mask(query, value)
+            auto_mask = mask if auto_mask is None else auto_mask & mask
+
+        if attention_mask is not None:
+            attention_mask = ops.cast(attention_mask, "bool")
+        if auto_mask is not None:
+            # merge attention_mask & automatic mask, to shape [B, T, S]
+            attention_mask = (
+                auto_mask
+                if attention_mask is None
+                else attention_mask & auto_mask
+            )
+        return attention_mask
+
+    def _compute_causal_mask(self, query, value=None):
+        """Computes a causal mask (e.g., for masked self-attention layers).
+
+        For example, if query and value both contain sequences of length 4,
+        this function returns a boolean tensor equal to:
+
+        ```
+        [[[True,  False, False, False],
+          [True,  True,  False, False],
+          [True,  True,  True,  False],
+          [True,  True,  True,  True]]]
+        ```
+
+        Args:
+            query: query tensor of shape `(B, T, ...)`.
+            value: value tensor of shape `(B, S, ...)` (optional, defaults to
+                query).
+
+        Returns:
+            mask: a boolean tensor of shape `(1, T, S)` containing a lower
+                triangular matrix of shape `(T, S)`.
+        """
+        q_seq_length = ops.shape(query)[1]
+        v_seq_length = q_seq_length if value is None else ops.shape(value)[1]
+        ones_mask = ops.ones((1, q_seq_length, v_seq_length), dtype="int32")
+        row_index = ops.cumsum(ones_mask, axis=-2)
+        col_index = ops.cumsum(ones_mask, axis=-1)
+        return ops.greater_equal(row_index, col_index)
+
+    def compute_output_shape(
+        self,
+        query_shape,
+        value_shape,
+        key_shape=None,
+    ):
+        query_shape = tuple(query_shape)
+        value_shape = tuple(value_shape)
+        if key_shape is None:
+            key_shape = value_shape
+        else:
+            key_shape = tuple(key_shape)
+
+        if value_shape[1:-1] != key_shape[1:-1]:
+            raise ValueError(
+                "All dimensions of `value` and `key`, except the last one, "
+                f"must be equal. Received: value_shape={value_shape} and "
+                f"key_shape={key_shape}"
+            )
+        if self._output_shape:
+            query_shape = query_shape[:-1] + self._output_shape
+        return query_shape
+
+    def compute_output_spec(
+        self,
+        query,
+        value,
+        key=None,
+        query_mask=None,
+        value_mask=None,
+        key_mask=None,
+        attention_mask=None,
+        return_attention_scores=False,
+        training=None,
+        use_causal_mask=False,
+    ):
+        if key is not None:
+            key_shape = key.shape
+        else:
+            key_shape = None
+        output_shape = self.compute_output_shape(
+            query.shape, value.shape, key_shape
+        )
+        output_spec = backend.KerasTensor(
+            output_shape, dtype=self.compute_dtype
+        )
+        if return_attention_scores:
+            length = query.shape[1]
+            attention_shape = (query.shape[0], self.num_heads, length, length)
+            return output_spec, backend.KerasTensor(
+                attention_shape, dtype=self.compute_dtype
+            )
+        return output_spec
+
+
+def _index_to_einsum_variable(i):
+    """Converts an index to a einsum variable name.
+
+    We simply map indices to lowercase characters, e.g. 0 -> 'a', 1 -> 'b'.
+    """
+    return string.ascii_lowercase[i]
+
+
+def _build_attention_equation(rank, attn_axes):
+    """Builds einsum equations for the attention computation.
+
+    Query, key, value inputs after projection are expected to have the shape as:
+    `(bs, <non-attention dims>, <attention dims>, num_heads, channels)`.
+    `bs` and `<non-attention dims>` are treated as `<batch dims>`.
+
+    The attention operations can be generalized:
+    1. Query-key dot product:
+        (<batch dims>, <query attention dims>, num_heads, channels),
+        (<batch dims>, <key attention dims>, num_heads, channels) ->
+        (<batch dims>, num_heads, <query attention dims>, <key attention dims>)
+    2. Combination:
+        (<batch dims>, num_heads, <query attention dims>, <key attention dims>),
+        (<batch dims>, <value attention dims>, num_heads, channels) -> (<batch
+        dims>, <query attention dims>, num_heads, channels)
+
+    Args:
+        rank: Rank of query, key, value tensors.
+        attn_axes: List/tuple of axes, `[-1, rank)`,
+            that attention will be applied to.
+
+    Returns:
+        Einsum equations.
+    """
+    target_notation = ""
+    for i in range(rank):
+        target_notation += _index_to_einsum_variable(i)
+    # `batch_dims` includes the head dim.
+    batch_dims = tuple(np.delete(range(rank), attn_axes + (rank - 1,)))
+    letter_offset = rank
+    source_notation = ""
+    for i in range(rank):
+        if i in batch_dims or i == rank - 1:
+            source_notation += target_notation[i]
+        else:
+            source_notation += _index_to_einsum_variable(letter_offset)
+            letter_offset += 1
+
+    product_notation = "".join(
+        [target_notation[i] for i in batch_dims]
+        + [target_notation[i] for i in attn_axes]
+        + [source_notation[i] for i in attn_axes]
+    )
+    dot_product_equation = "%s,%s->%s" % (
+        source_notation,
+        target_notation,
+        product_notation,
+    )
+    attn_scores_rank = len(product_notation)
+    combine_equation = "%s,%s->%s" % (
+        product_notation,
+        source_notation,
+        target_notation,
+    )
+    return dot_product_equation, combine_equation, attn_scores_rank
+
+
+def _build_proj_equation(free_dims, bound_dims, output_dims):
+    """Builds an einsum equation for projections inside multi-head attention."""
+    input_str = ""
+    kernel_str = ""
+    output_str = ""
+    bias_axes = ""
+    letter_offset = 0
+    for i in range(free_dims):
+        char = _index_to_einsum_variable(i + letter_offset)
+        input_str += char
+        output_str += char
+
+    letter_offset += free_dims
+    for i in range(bound_dims):
+        char = _index_to_einsum_variable(i + letter_offset)
+        input_str += char
+        kernel_str += char
+
+    letter_offset += bound_dims
+    for i in range(output_dims):
+        char = _index_to_einsum_variable(i + letter_offset)
+        kernel_str += char
+        output_str += char
+        bias_axes += char
+    equation = f"{input_str},{kernel_str}->{output_str}"
+
+    return equation, bias_axes, len(output_str)
+
+
+def _get_output_shape(output_rank, known_last_dims):
+    return [None] * (output_rank - len(known_last_dims)) + list(known_last_dims)
diff --git a/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/__init__.py b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/__pycache__/__init__.cpython-310.pyc b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/__pycache__/__init__.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..971ab45b63f3b60c9362da19bfe84fff65d7f323
Binary files /dev/null and b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/__pycache__/__init__.cpython-310.pyc differ
diff --git a/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/__pycache__/base_conv.cpython-310.pyc b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/__pycache__/base_conv.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..b70b6e203a15aaba20054de7bacba1baf88a18f0
Binary files /dev/null and b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/__pycache__/base_conv.cpython-310.pyc differ
diff --git a/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/__pycache__/base_conv_transpose.cpython-310.pyc b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/__pycache__/base_conv_transpose.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..9a37e60a0ea2d937a4dfd3d000322b17a003a15f
Binary files /dev/null and b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/__pycache__/base_conv_transpose.cpython-310.pyc differ
diff --git a/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/__pycache__/base_depthwise_conv.cpython-310.pyc b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/__pycache__/base_depthwise_conv.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..1f49fe4cd357546eb38f493e9afd3325917ca514
Binary files /dev/null and b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/__pycache__/base_depthwise_conv.cpython-310.pyc differ
diff --git a/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/__pycache__/base_separable_conv.cpython-310.pyc b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/__pycache__/base_separable_conv.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..ae93c8b26dd447a655775271a3a1d039c844c175
Binary files /dev/null and b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/__pycache__/base_separable_conv.cpython-310.pyc differ
diff --git a/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/__pycache__/conv1d.cpython-310.pyc b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/__pycache__/conv1d.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..58846a7815583d558f3953e5b04febc49b1c4d44
Binary files /dev/null and b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/__pycache__/conv1d.cpython-310.pyc differ
diff --git a/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/__pycache__/conv1d_transpose.cpython-310.pyc b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/__pycache__/conv1d_transpose.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..e5fff2c08d88caa4f7043614188a25e490ecb63a
Binary files /dev/null and b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/__pycache__/conv1d_transpose.cpython-310.pyc differ
diff --git a/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/__pycache__/conv2d.cpython-310.pyc b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/__pycache__/conv2d.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..3644c80e9d17b4b735bc227f52ac5812974b5d4c
Binary files /dev/null and b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/__pycache__/conv2d.cpython-310.pyc differ
diff --git a/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/__pycache__/conv2d_transpose.cpython-310.pyc b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/__pycache__/conv2d_transpose.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..c776308b193fef49bcd1f5d19b47019b97be7da8
Binary files /dev/null and b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/__pycache__/conv2d_transpose.cpython-310.pyc differ
diff --git a/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/__pycache__/conv3d.cpython-310.pyc b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/__pycache__/conv3d.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..6f8ab9c58d41f6216f2b1a5dc396f82f950ea5bd
Binary files /dev/null and b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/__pycache__/conv3d.cpython-310.pyc differ
diff --git a/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/__pycache__/conv3d_transpose.cpython-310.pyc b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/__pycache__/conv3d_transpose.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..7a6651addfffc8d13727f72eeecaac2ad6e7ec38
Binary files /dev/null and b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/__pycache__/conv3d_transpose.cpython-310.pyc differ
diff --git a/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/__pycache__/depthwise_conv1d.cpython-310.pyc b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/__pycache__/depthwise_conv1d.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..5ccb3493444ba15d535e1f41ce226e5bc8e3f6d3
Binary files /dev/null and b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/__pycache__/depthwise_conv1d.cpython-310.pyc differ
diff --git a/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/__pycache__/depthwise_conv2d.cpython-310.pyc b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/__pycache__/depthwise_conv2d.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..015baaf4772db3f1fdaed20f69cd4357cf8cb15b
Binary files /dev/null and b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/__pycache__/depthwise_conv2d.cpython-310.pyc differ
diff --git a/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/__pycache__/separable_conv1d.cpython-310.pyc b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/__pycache__/separable_conv1d.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..8ef02fab4741e4494c201d000b5d3318fc28a117
Binary files /dev/null and b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/__pycache__/separable_conv1d.cpython-310.pyc differ
diff --git a/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/__pycache__/separable_conv2d.cpython-310.pyc b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/__pycache__/separable_conv2d.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..4b01bbcdf7c92f2eeb3f377714f0fa8015ad2816
Binary files /dev/null and b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/__pycache__/separable_conv2d.cpython-310.pyc differ
diff --git a/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/base_conv.py b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/base_conv.py
new file mode 100644
index 0000000000000000000000000000000000000000..82b4140ebafd1eb47a7287b145814f330c655b78
--- /dev/null
+++ b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/base_conv.py
@@ -0,0 +1,401 @@
+"""Keras base class for convolution layers."""
+
+from keras.src import activations
+from keras.src import constraints
+from keras.src import initializers
+from keras.src import ops
+from keras.src import regularizers
+from keras.src.backend import standardize_data_format
+from keras.src.layers.input_spec import InputSpec
+from keras.src.layers.layer import Layer
+from keras.src.ops.operation_utils import compute_conv_output_shape
+from keras.src.utils.argument_validation import standardize_padding
+from keras.src.utils.argument_validation import standardize_tuple
+
+
+class BaseConv(Layer):
+    """Abstract N-D convolution layer (private, used as implementation base).
+
+    This layer creates a convolution kernel that is convolved (actually
+    cross-correlated) with the layer input to produce a tensor of outputs. If
+    `use_bias` is True (and a `bias_initializer` is provided), a bias vector is
+    created and added to the outputs. Finally, if `activation` is not `None`, it
+    is applied to the outputs as well.
+
+    Note: layer attributes cannot be modified after the layer has been called
+    once (except the `trainable` attribute).
+
+    Args:
+        rank: int, the rank of the convolution, e.g. 2 for 2D convolution.
+        filters: int, the dimension of the output space (the number of filters
+            in the convolution).
+        kernel_size: int or tuple/list of `rank` integers, specifying the size
+            of the convolution window.
+        strides: int or tuple/list of `rank` integers, specifying the stride
+            length of the convolution. If only one int is specified, the same
+            stride size will be used for all dimensions. `strides > 1` is
+            incompatible with `dilation_rate > 1`.
+        padding: string, either `"valid"` or `"same"` (case-insensitive).
+            `"valid"` means no padding. `"same"` results in padding evenly to
+            the left/right or up/down of the input. When `padding="same"` and
+            `strides=1`, the output has the same size as the input.
+        data_format: string, either `"channels_last"` or `"channels_first"`.
+            The ordering of the dimensions in the inputs. `"channels_last"`
+            corresponds to inputs with shape `(batch, steps, features)`
+            while `"channels_first"` corresponds to inputs with shape
+            `(batch, features, steps)`. It defaults to the `image_data_format`
+            value found in your Keras config file at `~/.keras/keras.json`.
+            If you never set it, then it will be `"channels_last"`.
+        dilation_rate: int or tuple/list of `rank` integers, specifying the
+            dilation rate to use for dilated convolution. If only one int is
+            specified, the same dilation rate will be used for all dimensions.
+        groups: A positive int specifying the number of groups in which the
+            input is split along the channel axis. Each group is convolved
+            separately with `filters // groups` filters. The output is the
+            concatenation of all the `groups` results along the channel axis.
+            Input channels and `filters` must both be divisible by `groups`.
+        activation: Activation function. If `None`, no activation is applied.
+        use_bias: bool, if `True`, bias will be added to the output.
+        kernel_initializer: Initializer for the convolution kernel. If `None`,
+            the default initializer (`"glorot_uniform"`) will be used.
+        bias_initializer: Initializer for the bias vector. If `None`, the
+            default initializer (`"zeros"`) will be used.
+        kernel_regularizer: Optional regularizer for the convolution kernel.
+        bias_regularizer: Optional regularizer for the bias vector.
+        activity_regularizer: Optional regularizer function for the output.
+        kernel_constraint: Optional projection function to be applied to the
+            kernel after being updated by an `Optimizer` (e.g. used to implement
+            norm constraints or value constraints for layer weights). The
+            function must take as input the unprojected variable and must return
+            the projected variable (which must have the same shape). Constraints
+            are not safe to use when doing asynchronous distributed training.
+        bias_constraint: Optional projection function to be applied to the
+            bias after being updated by an `Optimizer`.
+        lora_rank: Optional integer. If set, the layer's forward pass
+            will implement LoRA (Low-Rank Adaptation)
+            with the provided rank. LoRA sets the layer's kernel
+            to non-trainable and replaces it with a delta over the
+            original kernel, obtained via multiplying two lower-rank
+            trainable matrices. This can be useful to reduce the
+            computation cost of fine-tuning large dense layers.
+            You can also enable LoRA on an existing layer by calling
+            `layer.enable_lora(rank)`.
+    """
+
+    def __init__(
+        self,
+        rank,
+        filters,
+        kernel_size,
+        strides=1,
+        padding="valid",
+        data_format=None,
+        dilation_rate=1,
+        groups=1,
+        activation=None,
+        use_bias=True,
+        kernel_initializer="glorot_uniform",
+        bias_initializer="zeros",
+        kernel_regularizer=None,
+        bias_regularizer=None,
+        activity_regularizer=None,
+        kernel_constraint=None,
+        bias_constraint=None,
+        lora_rank=None,
+        **kwargs,
+    ):
+        super().__init__(activity_regularizer=activity_regularizer, **kwargs)
+        self.rank = rank
+        self.filters = filters
+        self.groups = groups
+        self.kernel_size = standardize_tuple(kernel_size, rank, "kernel_size")
+        self.strides = standardize_tuple(strides, rank, "strides")
+        self.dilation_rate = standardize_tuple(
+            dilation_rate, rank, "dilation_rate"
+        )
+        self.padding = standardize_padding(padding, allow_causal=rank == 1)
+        self.data_format = standardize_data_format(data_format)
+        self.activation = activations.get(activation)
+        self.use_bias = use_bias
+        self.kernel_initializer = initializers.get(kernel_initializer)
+        self.bias_initializer = initializers.get(bias_initializer)
+        self.kernel_regularizer = regularizers.get(kernel_regularizer)
+        self.bias_regularizer = regularizers.get(bias_regularizer)
+        self.kernel_constraint = constraints.get(kernel_constraint)
+        self.bias_constraint = constraints.get(bias_constraint)
+        self.lora_rank = lora_rank
+        self.lora_enabled = False
+        self.input_spec = InputSpec(min_ndim=self.rank + 2)
+        self.data_format = self.data_format
+
+        if self.filters is not None and self.filters <= 0:
+            raise ValueError(
+                "Invalid value for argument `filters`. Expected a strictly "
+                f"positive value. Received filters={self.filters}."
+            )
+
+        if self.groups <= 0:
+            raise ValueError(
+                "The number of groups must be a positive integer. "
+                f"Received: groups={self.groups}."
+            )
+
+        if self.filters is not None and self.filters % self.groups != 0:
+            raise ValueError(
+                "The number of filters must be evenly divisible by the "
+                f"number of groups. Received: groups={self.groups}, "
+                f"filters={self.filters}."
+            )
+
+        if not all(self.kernel_size):
+            raise ValueError(
+                "The argument `kernel_size` cannot contain 0. Received "
+                f"kernel_size={self.kernel_size}."
+            )
+
+        if not all(self.strides):
+            raise ValueError(
+                "The argument `strides` cannot contains 0. Received "
+                f"strides={self.strides}"
+            )
+
+        if max(self.strides) > 1 and max(self.dilation_rate) > 1:
+            raise ValueError(
+                "`strides > 1` not supported in conjunction with "
+                f"`dilation_rate > 1`. Received: strides={self.strides} and "
+                f"dilation_rate={self.dilation_rate}"
+            )
+
+    def build(self, input_shape):
+        if self.data_format == "channels_last":
+            channel_axis = -1
+            input_channel = input_shape[-1]
+        else:
+            channel_axis = 1
+            input_channel = input_shape[1]
+        self.input_spec = InputSpec(
+            min_ndim=self.rank + 2, axes={channel_axis: input_channel}
+        )
+        if input_channel % self.groups != 0:
+            raise ValueError(
+                "The number of input channels must be evenly divisible by "
+                f"the number of groups. Received groups={self.groups}, but the "
+                f"input has {input_channel} channels (full input shape is "
+                f"{input_shape})."
+            )
+        kernel_shape = self.kernel_size + (
+            input_channel // self.groups,
+            self.filters,
+        )
+
+        # compute_output_shape contains some validation logic for the input
+        # shape, and make sure the output shape has all positive dimensions.
+        self.compute_output_shape(input_shape)
+
+        self._kernel = self.add_weight(
+            name="kernel",
+            shape=kernel_shape,
+            initializer=self.kernel_initializer,
+            regularizer=self.kernel_regularizer,
+            constraint=self.kernel_constraint,
+            trainable=True,
+            dtype=self.dtype,
+        )
+        if self.use_bias:
+            self.bias = self.add_weight(
+                name="bias",
+                shape=(self.filters,),
+                initializer=self.bias_initializer,
+                regularizer=self.bias_regularizer,
+                constraint=self.bias_constraint,
+                trainable=True,
+                dtype=self.dtype,
+            )
+        else:
+            self.bias = None
+        self.built = True
+        if self.lora_rank:
+            self.enable_lora(self.lora_rank)
+
+    @property
+    def kernel(self):
+        if not self.built:
+            raise AttributeError(
+                "You must build the layer before accessing `kernel`."
+            )
+        if self.lora_enabled:
+            return self._kernel + ops.matmul(
+                self.lora_kernel_a, self.lora_kernel_b
+            )
+        return self._kernel
+
+    def convolution_op(self, inputs, kernel):
+        return ops.conv(
+            inputs,
+            kernel,
+            strides=list(self.strides),
+            padding=self.padding,
+            dilation_rate=self.dilation_rate,
+            data_format=self.data_format,
+        )
+
+    def call(self, inputs):
+        outputs = self.convolution_op(
+            inputs,
+            self.kernel,
+        )
+        if self.use_bias:
+            if self.data_format == "channels_last":
+                bias_shape = (1,) * (self.rank + 1) + (self.filters,)
+            else:
+                bias_shape = (1, self.filters) + (1,) * self.rank
+            bias = ops.reshape(self.bias, bias_shape)
+            outputs = ops.add(outputs, bias)
+
+        if self.activation is not None:
+            return self.activation(outputs)
+        return outputs
+
+    def compute_output_shape(self, input_shape):
+        return compute_conv_output_shape(
+            input_shape,
+            self.filters,
+            self.kernel_size,
+            strides=self.strides,
+            padding=self.padding,
+            data_format=self.data_format,
+            dilation_rate=self.dilation_rate,
+        )
+
+    def enable_lora(
+        self, rank, a_initializer="he_uniform", b_initializer="zeros"
+    ):
+        if self.kernel_constraint:
+            raise ValueError(
+                "Lora is incompatible with kernel constraints. "
+                "In order to enable lora on this layer, remove the "
+                "`kernel_constraint` argument."
+            )
+        if not self.built:
+            raise ValueError(
+                "Cannot enable lora on a layer that isn't yet built."
+            )
+        if self.lora_enabled:
+            raise ValueError(
+                "lora is already enabled. "
+                "This can only be done once per layer."
+            )
+        self._tracker.unlock()
+        self.lora_kernel_a = self.add_weight(
+            name="lora_kernel_a",
+            shape=self._kernel.shape[:-1] + (rank,),
+            initializer=initializers.get(a_initializer),
+            regularizer=self.kernel_regularizer,
+        )
+        self.lora_kernel_b = self.add_weight(
+            name="lora_kernel_b",
+            shape=(rank, self.filters),
+            initializer=initializers.get(b_initializer),
+            regularizer=self.kernel_regularizer,
+        )
+        self._kernel.trainable = False
+        self._tracker.lock()
+        self.lora_enabled = True
+        self.lora_rank = rank
+
+    def save_own_variables(self, store):
+        # Do nothing if the layer isn't yet built
+        if not self.built:
+            return
+        target_variables = [self.kernel]
+        if self.use_bias:
+            target_variables.append(self.bias)
+        for i, variable in enumerate(target_variables):
+            store[str(i)] = variable
+
+    def load_own_variables(self, store):
+        if not self.lora_enabled:
+            self._check_load_own_variables(store)
+        # Do nothing if the layer isn't yet built
+        if not self.built:
+            return
+        target_variables = [self._kernel]
+        if self.use_bias:
+            target_variables.append(self.bias)
+        for i, variable in enumerate(target_variables):
+            variable.assign(store[str(i)])
+        if self.lora_enabled:
+            self.lora_kernel_a.assign(ops.zeros(self.lora_kernel_a.shape))
+            self.lora_kernel_b.assign(ops.zeros(self.lora_kernel_b.shape))
+
+    def get_config(self):
+        config = super().get_config()
+        config.update(
+            {
+                "filters": self.filters,
+                "kernel_size": self.kernel_size,
+                "strides": self.strides,
+                "padding": self.padding,
+                "data_format": self.data_format,
+                "dilation_rate": self.dilation_rate,
+                "groups": self.groups,
+                "activation": activations.serialize(self.activation),
+                "use_bias": self.use_bias,
+                "kernel_initializer": initializers.serialize(
+                    self.kernel_initializer
+                ),
+                "bias_initializer": initializers.serialize(
+                    self.bias_initializer
+                ),
+                "kernel_regularizer": regularizers.serialize(
+                    self.kernel_regularizer
+                ),
+                "bias_regularizer": regularizers.serialize(
+                    self.bias_regularizer
+                ),
+                "activity_regularizer": regularizers.serialize(
+                    self.activity_regularizer
+                ),
+                "kernel_constraint": constraints.serialize(
+                    self.kernel_constraint
+                ),
+                "bias_constraint": constraints.serialize(self.bias_constraint),
+            }
+        )
+        if self.lora_rank:
+            config["lora_rank"] = self.lora_rank
+        return config
+
+    def _check_load_own_variables(self, store):
+        all_vars = self._trainable_variables + self._non_trainable_variables
+        if len(store.keys()) != len(all_vars):
+            if len(all_vars) == 0 and not self.built:
+                raise ValueError(
+                    f"Layer '{self.name}' was never built "
+                    "and thus it doesn't have any variables. "
+                    f"However the weights file lists {len(store.keys())} "
+                    "variables for this layer.\n"
+                    "In most cases, this error indicates that either:\n\n"
+                    "1. The layer is owned by a parent layer that "
+                    "implements a `build()` method, but calling the "
+                    "parent's `build()` method did NOT create the state of "
+                    f"the child layer '{self.name}'. A `build()` method "
+                    "must create ALL state for the layer, including "
+                    "the state of any children layers.\n\n"
+                    "2. You need to implement "
+                    "the `def build_from_config(self, config)` method "
+                    f"on layer '{self.name}', to specify how to rebuild "
+                    "it during loading. "
+                    "In this case, you might also want to implement the "
+                    "method that generates the build config at saving time, "
+                    "`def get_build_config(self)`. "
+                    "The method `build_from_config()` is meant "
+                    "to create the state "
+                    "of the layer (i.e. its variables) upon deserialization.",
+                )
+            raise ValueError(
+                f"Layer '{self.name}' expected {len(all_vars)} variables, "
+                "but received "
+                f"{len(store.keys())} variables during loading. "
+                f"Expected: {[v.name for v in all_vars]}"
+            )
diff --git a/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/base_conv_transpose.py b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/base_conv_transpose.py
new file mode 100644
index 0000000000000000000000000000000000000000..e0c1c4a085a95d2355a4de6113ce9adfa66379fb
--- /dev/null
+++ b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/base_conv_transpose.py
@@ -0,0 +1,259 @@
+"""Keras base class for transpose convolution layers."""
+
+from keras.src import activations
+from keras.src import constraints
+from keras.src import initializers
+from keras.src import ops
+from keras.src import regularizers
+from keras.src.backend import standardize_data_format
+from keras.src.backend.common.backend_utils import (
+    compute_conv_transpose_output_shape,
+)
+from keras.src.layers.input_spec import InputSpec
+from keras.src.layers.layer import Layer
+from keras.src.utils.argument_validation import standardize_padding
+from keras.src.utils.argument_validation import standardize_tuple
+
+
+class BaseConvTranspose(Layer):
+    """Abstract N-D transposed convolution layer.
+
+    The need for transposed convolutions generally arises from the desire to use
+    a transformation going in the opposite direction of a normal convolution,
+    i.e., from something that has the shape of the output of some convolution to
+    something that has the shape of its input while maintaining a connectivity
+    pattern that is compatible with said convolution.
+
+    Args:
+        rank: int, the rank of the transposed convolution, e.g. 2 for 2D
+            transposed convolution.
+        filters: int, the dimension of the output space (the number of filters
+            in the transposed convolution).
+        kernel_size: int or tuple/list of `rank` integers, specifying the size
+            of the transposed convolution window.
+        strides: int or tuple/list of `rank` integers, specifying the stride
+            length of the transposed convolution. If only one int is specified,
+            the same stride size will be used for all dimensions.
+            `strides > 1` is incompatible with `dilation_rate > 1`.
+        padding: string, either `"valid"` or `"same"` (case-insensitive).
+            `"valid"` means no padding. `"same"` results in padding evenly to
+            the left/right or up/down of the input such that output has the same
+            height/width dimension as the input.
+        data_format: string, either `"channels_last"` or `"channels_first"`.
+            The ordering of the dimensions in the inputs. `"channels_last"`
+            corresponds to inputs with shape `(batch, steps, features)`
+            while `"channels_first"` corresponds to inputs with shape
+            `(batch, features, steps)`. It defaults to the `image_data_format`
+            value found in your Keras config file at `~/.keras/keras.json`.
+            If you never set it, then it will be `"channels_last"`.
+        dilation_rate: int or tuple/list of `rank` integers, specifying the
+            dilation rate to use for dilated convolution. If only one int is
+            specified, the same dilation rate will be used for all dimensions.
+        activation: Activation function. If `None`, no activation is applied.
+        use_bias: bool, if `True`, bias will be added to the output.
+        kernel_initializer: Initializer for the convolution kernel. If `None`,
+            the default initializer (`"glorot_uniform"`) will be used.
+        bias_initializer: Initializer for the bias vector. If `None`, the
+            default initializer (`"zeros"`) will be used.
+        kernel_regularizer: Optional regularizer for the convolution kernel.
+        bias_regularizer: Optional regularizer for the bias vector.
+        activity_regularizer: Optional regularizer function for the output.
+        kernel_constraint: Optional projection function to be applied to the
+            kernel after being updated by an `Optimizer` (e.g. used to implement
+            norm constraints or value constraints for layer weights). The
+            function must take as input the unprojected variable and must return
+            the projected variable (which must have the same shape). Constraints
+            are not safe to use when doing asynchronous distributed training.
+        bias_constraint: Optional projection function to be applied to the
+            bias after being updated by an `Optimizer`.
+    """
+
+    def __init__(
+        self,
+        rank,
+        filters,
+        kernel_size,
+        strides=1,
+        padding="valid",
+        output_padding=None,
+        data_format=None,
+        dilation_rate=1,
+        activation=None,
+        use_bias=True,
+        kernel_initializer="glorot_uniform",
+        bias_initializer="zeros",
+        kernel_regularizer=None,
+        bias_regularizer=None,
+        activity_regularizer=None,
+        kernel_constraint=None,
+        bias_constraint=None,
+        trainable=True,
+        name=None,
+        **kwargs,
+    ):
+        super().__init__(
+            trainable=trainable,
+            name=name,
+            activity_regularizer=activity_regularizer,
+            **kwargs,
+        )
+        self.rank = rank
+        self.filters = filters
+        self.kernel_size = standardize_tuple(kernel_size, rank, "kernel_size")
+        self.strides = standardize_tuple(strides, rank, "strides")
+        self.dilation_rate = standardize_tuple(
+            dilation_rate, rank, "dilation_rate"
+        )
+        self.padding = standardize_padding(padding)
+        if output_padding is None:
+            self.output_padding = None
+        else:
+            self.output_padding = standardize_tuple(
+                output_padding,
+                rank,
+                "output_padding",
+            )
+        self.data_format = standardize_data_format(data_format)
+        self.activation = activations.get(activation)
+        self.use_bias = use_bias
+        self.kernel_initializer = initializers.get(kernel_initializer)
+        self.bias_initializer = initializers.get(bias_initializer)
+        self.kernel_regularizer = regularizers.get(kernel_regularizer)
+        self.bias_regularizer = regularizers.get(bias_regularizer)
+        self.kernel_constraint = constraints.get(kernel_constraint)
+        self.bias_constraint = constraints.get(bias_constraint)
+        self.input_spec = InputSpec(min_ndim=self.rank + 2)
+        self.data_format = self.data_format
+
+        if self.filters is not None and self.filters <= 0:
+            raise ValueError(
+                "Invalid value for argument `filters`. Expected a strictly "
+                f"positive value. Received filters={self.filters}."
+            )
+
+        if not all(self.kernel_size):
+            raise ValueError(
+                "The argument `kernel_size` cannot contain 0. Received "
+                f"kernel_size={self.kernel_size}."
+            )
+
+        if not all(self.strides):
+            raise ValueError(
+                "The argument `strides` cannot contains 0. Received "
+                f"strides={self.strides}."
+            )
+
+        if max(self.strides) > 1 and max(self.dilation_rate) > 1:
+            raise ValueError(
+                "`strides > 1` not supported in conjunction with "
+                f"`dilation_rate > 1`. Received: strides={self.strides} and "
+                f"dilation_rate={self.dilation_rate}"
+            )
+
+    def build(self, input_shape):
+        if self.data_format == "channels_last":
+            channel_axis = -1
+            input_channel = input_shape[-1]
+        else:
+            channel_axis = 1
+            input_channel = input_shape[1]
+        self.input_spec = InputSpec(
+            min_ndim=self.rank + 2, axes={channel_axis: input_channel}
+        )
+        kernel_shape = self.kernel_size + (
+            self.filters,
+            input_channel,
+        )
+
+        self.kernel = self.add_weight(
+            name="kernel",
+            shape=kernel_shape,
+            initializer=self.kernel_initializer,
+            regularizer=self.kernel_regularizer,
+            constraint=self.kernel_constraint,
+            trainable=True,
+            dtype=self.dtype,
+        )
+        if self.use_bias:
+            self.bias = self.add_weight(
+                name="bias",
+                shape=(self.filters,),
+                initializer=self.bias_initializer,
+                regularizer=self.bias_regularizer,
+                constraint=self.bias_constraint,
+                trainable=True,
+                dtype=self.dtype,
+            )
+        else:
+            self.bias = None
+        self.built = True
+
+    def call(self, inputs):
+        outputs = ops.conv_transpose(
+            inputs,
+            self.kernel,
+            strides=list(self.strides),
+            padding=self.padding,
+            output_padding=self.output_padding,
+            dilation_rate=self.dilation_rate,
+            data_format=self.data_format,
+        )
+
+        if self.use_bias:
+            if self.data_format == "channels_last":
+                bias_shape = (1,) * (self.rank + 1) + (self.filters,)
+            else:
+                bias_shape = (1, self.filters) + (1,) * self.rank
+            bias = ops.reshape(self.bias, bias_shape)
+            outputs = ops.add(outputs, bias)
+
+        if self.activation is not None:
+            return self.activation(outputs)
+        return outputs
+
+    def compute_output_shape(self, input_shape):
+        return compute_conv_transpose_output_shape(
+            input_shape,
+            self.kernel_size,
+            self.filters,
+            strides=self.strides,
+            padding=self.padding,
+            output_padding=self.output_padding,
+            data_format=self.data_format,
+            dilation_rate=self.dilation_rate,
+        )
+
+    def get_config(self):
+        config = super().get_config()
+        config.update(
+            {
+                "filters": self.filters,
+                "kernel_size": self.kernel_size,
+                "strides": self.strides,
+                "padding": self.padding,
+                "data_format": self.data_format,
+                "dilation_rate": self.dilation_rate,
+                "activation": activations.serialize(self.activation),
+                "use_bias": self.use_bias,
+                "kernel_initializer": initializers.serialize(
+                    self.kernel_initializer
+                ),
+                "bias_initializer": initializers.serialize(
+                    self.bias_initializer
+                ),
+                "kernel_regularizer": regularizers.serialize(
+                    self.kernel_regularizer
+                ),
+                "bias_regularizer": regularizers.serialize(
+                    self.bias_regularizer
+                ),
+                "activity_regularizer": regularizers.serialize(
+                    self.activity_regularizer
+                ),
+                "kernel_constraint": constraints.serialize(
+                    self.kernel_constraint
+                ),
+                "bias_constraint": constraints.serialize(self.bias_constraint),
+            }
+        )
+        return config
diff --git a/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/base_depthwise_conv.py b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/base_depthwise_conv.py
new file mode 100644
index 0000000000000000000000000000000000000000..bd49da64a7d7bfd57e7de40314f870bb1828ea7b
--- /dev/null
+++ b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/base_depthwise_conv.py
@@ -0,0 +1,274 @@
+"""Keras base class for depthwise convolution layers."""
+
+from keras.src import activations
+from keras.src import constraints
+from keras.src import initializers
+from keras.src import ops
+from keras.src import regularizers
+from keras.src.backend import standardize_data_format
+from keras.src.layers.input_spec import InputSpec
+from keras.src.layers.layer import Layer
+from keras.src.ops.operation_utils import compute_conv_output_shape
+from keras.src.utils.argument_validation import standardize_padding
+from keras.src.utils.argument_validation import standardize_tuple
+
+
+class BaseDepthwiseConv(Layer):
+    """Abstract N-D depthwise convolution layer.
+
+    Depthwise convolution is a type of convolution in which each input channel
+    is convolved with a different kernel (called a depthwise kernel). You can
+    understand depthwise convolution as the first step in a depthwise separable
+    convolution.
+
+    It is implemented via the following steps:
+
+    - Split the input into individual channels.
+    - Convolve each channel with an individual depthwise kernel with
+      `depth_multiplier` output channels.
+    - Concatenate the convolved outputs along the channels axis.
+
+    Unlike a regular convolution, depthwise convolution does not mix information
+    across different input channels.
+
+    The `depth_multiplier` argument determines how many filter are applied to
+    one input channel. As such, it controls the amount of output channels that
+    are generated per input channel in the depthwise step.
+
+
+    Args:
+        rank: int, the rank of the convolution, e.g. 2 for 2D convolution.
+        depth_multiplier: The number of depthwise convolution output channels
+            for each input channel. The total number of depthwise convolution
+            output channels will be equal to `input_channel * depth_multiplier`.
+        kernel_size: int or tuple/list of `rank` integers, specifying the size
+            of the depthwise convolution window.
+        strides: int or tuple/list of `rank` integers, specifying the stride
+            length of the depthwise convolution. If only one int is specified,
+            the same stride size will be used for all dimensions.
+            `strides > 1` is incompatible with `dilation_rate > 1`.
+        padding: string, either `"valid"` or `"same"` (case-insensitive).
+            `"valid"` means no padding. `"same"` results in padding evenly to
+            the left/right or up/down of the input. When `padding="same"` and
+            `strides=1`, the output has the same size as the input.
+        data_format: string, either `"channels_last"` or `"channels_first"`.
+            The ordering of the dimensions in the inputs. `"channels_last"`
+            corresponds to inputs with shape `(batch, steps, features)`
+            while `"channels_first"` corresponds to inputs with shape
+            `(batch, features, steps)`. It defaults to the `image_data_format`
+            value found in your Keras config file at `~/.keras/keras.json`.
+            If you never set it, then it will be `"channels_last"`.
+        dilation_rate: int or tuple/list of `rank` integers, specifying the
+            dilation rate to use for dilated convolution. If only one int is
+            specified, the same dilation rate will be used for all dimensions.
+        activation: Activation function. If `None`, no activation is applied.
+        use_bias: bool, if `True`, bias will be added to the output.
+        depthwise_initializer: Initializer for the depthwsie convolution
+            kernel. If `None`, the default initializer (`"glorot_uniform"`)
+            will be used.
+        bias_initializer: Initializer for the bias vector. If `None`, the
+            default initializer (`"zeros"`) will be used.
+        depthwise_regularizer: Optional regularizer for the convolution kernel.
+        bias_regularizer: Optional regularizer for the bias vector.
+        activity_regularizer: Optional regularizer function for the output.
+        depthwise_constraint: Optional projection function to be applied to the
+            kernel after being updated by an `Optimizer` (e.g. used to implement
+            norm constraints or value constraints for layer weights). The
+            function must take as input the unprojected variable and must return
+            the projected variable (which must have the same shape). Constraints
+            are not safe to use when doing asynchronous distributed training.
+        bias_constraint: Optional projection function to be applied to the
+            bias after being updated by an `Optimizer`.
+    """
+
+    def __init__(
+        self,
+        rank,
+        depth_multiplier,
+        kernel_size,
+        strides=1,
+        padding="valid",
+        data_format=None,
+        dilation_rate=1,
+        activation=None,
+        use_bias=True,
+        depthwise_initializer="glorot_uniform",
+        bias_initializer="zeros",
+        depthwise_regularizer=None,
+        bias_regularizer=None,
+        activity_regularizer=None,
+        depthwise_constraint=None,
+        bias_constraint=None,
+        trainable=True,
+        name=None,
+        **kwargs,
+    ):
+        super().__init__(
+            trainable=trainable,
+            name=name,
+            activity_regularizer=regularizers.get(activity_regularizer),
+            **kwargs,
+        )
+        self.rank = rank
+        self.depth_multiplier = depth_multiplier
+        self.kernel_size = standardize_tuple(kernel_size, rank, "kernel_size")
+        self.strides = standardize_tuple(strides, rank, "strides")
+        self.dilation_rate = standardize_tuple(
+            dilation_rate, rank, "dilation_rate"
+        )
+        self.padding = standardize_padding(padding)
+        self.data_format = standardize_data_format(data_format)
+        self.activation = activations.get(activation)
+        self.use_bias = use_bias
+        self.depthwise_initializer = initializers.get(depthwise_initializer)
+        self.bias_initializer = initializers.get(bias_initializer)
+        self.depthwise_regularizer = regularizers.get(depthwise_regularizer)
+        self.bias_regularizer = regularizers.get(bias_regularizer)
+        self.depthwise_constraint = constraints.get(depthwise_constraint)
+        self.bias_constraint = constraints.get(bias_constraint)
+        self.input_spec = InputSpec(min_ndim=self.rank + 2)
+        self.data_format = self.data_format
+
+        if self.depth_multiplier is not None and self.depth_multiplier <= 0:
+            raise ValueError(
+                "Invalid value for argument `depth_multiplier`. Expected a "
+                "strictly positive value. Received "
+                f"depth_multiplier={self.depth_multiplier}."
+            )
+
+        if not all(self.kernel_size):
+            raise ValueError(
+                "The argument `kernel_size` cannot contain 0. Received "
+                f"kernel_size={self.kernel_size}."
+            )
+
+        if not all(self.strides):
+            raise ValueError(
+                "The argument `strides` cannot contains 0. Received "
+                f"strides={self.strides}"
+            )
+
+        if max(self.strides) > 1 and max(self.dilation_rate) > 1:
+            raise ValueError(
+                "`strides > 1` not supported in conjunction with "
+                f"`dilation_rate > 1`. Received: strides={self.strides} and "
+                f"dilation_rate={self.dilation_rate}"
+            )
+
+    def build(self, input_shape):
+        if self.data_format == "channels_last":
+            channel_axis = -1
+            input_channel = input_shape[-1]
+        else:
+            channel_axis = 1
+            input_channel = input_shape[1]
+        self.input_spec = InputSpec(
+            min_ndim=self.rank + 2, axes={channel_axis: input_channel}
+        )
+        depthwise_shape = self.kernel_size + (
+            input_channel,
+            self.depth_multiplier,
+        )
+        self.kernel = self.add_weight(
+            name="kernel",
+            shape=depthwise_shape,
+            initializer=self.depthwise_initializer,
+            regularizer=self.depthwise_regularizer,
+            constraint=self.depthwise_constraint,
+            trainable=True,
+            dtype=self.dtype,
+        )
+        if self.use_bias:
+            self.bias = self.add_weight(
+                name="bias",
+                shape=(self.depth_multiplier * input_channel,),
+                initializer=self.bias_initializer,
+                regularizer=self.bias_regularizer,
+                constraint=self.bias_constraint,
+                trainable=True,
+                dtype=self.dtype,
+            )
+        else:
+            self.bias = None
+        self.built = True
+
+    def _get_input_channel(self, input_shape):
+        if self.data_format == "channels_last":
+            input_channel = input_shape[-1]
+        else:
+            input_channel = input_shape[1]
+        return input_channel
+
+    def call(self, inputs):
+        input_channel = self._get_input_channel(inputs.shape)
+        outputs = ops.depthwise_conv(
+            inputs,
+            self.kernel,
+            strides=self.strides,
+            padding=self.padding,
+            dilation_rate=self.dilation_rate,
+            data_format=self.data_format,
+        )
+
+        if self.use_bias:
+            if self.data_format == "channels_last":
+                bias_shape = (1,) * (self.rank + 1) + (
+                    self.depth_multiplier * input_channel,
+                )
+            else:
+                bias_shape = (1, self.depth_multiplier * input_channel) + (
+                    1,
+                ) * self.rank
+            bias = ops.reshape(self.bias, bias_shape)
+            outputs = ops.add(outputs, bias)
+
+        if self.activation is not None:
+            return self.activation(outputs)
+        return outputs
+
+    def compute_output_shape(self, input_shape):
+        input_channel = self._get_input_channel(input_shape)
+        return compute_conv_output_shape(
+            input_shape,
+            self.depth_multiplier * input_channel,
+            self.kernel_size,
+            strides=self.strides,
+            padding=self.padding,
+            data_format=self.data_format,
+            dilation_rate=self.dilation_rate,
+        )
+
+    def get_config(self):
+        config = super().get_config()
+        config.update(
+            {
+                "depth_multiplier": self.depth_multiplier,
+                "kernel_size": self.kernel_size,
+                "strides": self.strides,
+                "padding": self.padding,
+                "data_format": self.data_format,
+                "dilation_rate": self.dilation_rate,
+                "activation": activations.serialize(self.activation),
+                "use_bias": self.use_bias,
+                "depthwise_initializer": initializers.serialize(
+                    self.depthwise_initializer
+                ),
+                "bias_initializer": initializers.serialize(
+                    self.bias_initializer
+                ),
+                "depthwise_regularizer": regularizers.serialize(
+                    self.depthwise_regularizer
+                ),
+                "bias_regularizer": regularizers.serialize(
+                    self.bias_regularizer
+                ),
+                "activity_regularizer": regularizers.serialize(
+                    self.activity_regularizer
+                ),
+                "depthwise_constraint": constraints.serialize(
+                    self.depthwise_constraint
+                ),
+                "bias_constraint": constraints.serialize(self.bias_constraint),
+            }
+        )
+        return config
diff --git a/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/base_separable_conv.py b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/base_separable_conv.py
new file mode 100644
index 0000000000000000000000000000000000000000..0f4322396d42d68f0b2451b4fdc3da66d0c49883
--- /dev/null
+++ b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/base_separable_conv.py
@@ -0,0 +1,295 @@
+"""Keras abstract base layer for separable convolution."""
+
+from keras.src import activations
+from keras.src import constraints
+from keras.src import initializers
+from keras.src import ops
+from keras.src import regularizers
+from keras.src.backend import standardize_data_format
+from keras.src.layers.input_spec import InputSpec
+from keras.src.layers.layer import Layer
+from keras.src.ops.operation_utils import compute_conv_output_shape
+from keras.src.utils.argument_validation import standardize_padding
+from keras.src.utils.argument_validation import standardize_tuple
+
+
+class BaseSeparableConv(Layer):
+    """Abstract base layer for separable convolution.
+
+    This layer performs a depthwise convolution that acts separately on
+    channels, followed by a pointwise convolution that mixes channels. If
+    `use_bias` is True and a bias initializer is provided, it adds a bias vector
+    to the output.
+
+    Args:
+        rank: int, the rank of the convolution, e.g. 2 for 2D convolution.
+        depth_multiplier: The number of depthwise convolution output channels
+            for each input channel. The total number of depthwise convolution
+            output channels will be equal to `input_channel * depth_multiplier`.
+        filters: int, the dimensionality of the output space (i.e. the number
+            of filters in the pointwise convolution).
+        kernel_size: int or tuple/list of `rank` integers, specifying the size
+            of the depthwise convolution window.
+        strides: int or tuple/list of `rank` integers, specifying the stride
+            length of the depthwise convolution. If only one int is specified,
+            the same stride size will be used for all dimensions.
+            `stride value != 1` is incompatible with `dilation_rate != 1`.
+        padding: string, either `"valid"` or `"same"` (case-insensitive).
+            `"valid"` means no padding. `"same"` results in padding evenly to
+            the left/right or up/down of the input. When `padding="same"` and
+            `strides=1`, the output has the same size as the input.
+        data_format: string, either `"channels_last"` or `"channels_first"`.
+            The ordering of the dimensions in the inputs. `"channels_last"`
+            corresponds to inputs with shape `(batch, steps, features)`
+            while `"channels_first"` corresponds to inputs with shape
+            `(batch, features, steps)`. It defaults to the `image_data_format`
+            value found in your Keras config file at `~/.keras/keras.json`.
+            If you never set it, then it will be `"channels_last"`.
+        dilation_rate: int or tuple/list of `rank` integers, specifying the
+            dilation rate to use for dilated convolution. If only one int is
+            specified, the same dilation rate will be used for all dimensions.
+        activation: Activation function. If `None`, no activation is applied.
+        use_bias: bool, if `True`, bias will be added to the output.
+        depthwise_initializer: An initializer for the depthwise convolution
+            kernel. If None, then the default initializer (`"glorot_uniform"`)
+            will be used.
+        pointwise_initializer: An initializer for the pointwise convolution
+            kernel. If None, then the default initializer (`"glorot_uniform"`)
+            will be used.
+        bias_initializer: An initializer for the bias vector. If None, the
+            default initializer ('"zeros"') will be used.
+        depthwise_regularizer: Optional regularizer for the depthwise
+            convolution kernel.
+        pointwise_regularizer: Optional regularizer for the pointwise
+            convolution kernel.
+        bias_regularizer: Optional regularizer for the bias vector.
+        activity_regularizer: Optional regularizer function for the output.
+        depthwise_constraint: Optional projection function to be applied to the
+            depthwise kernel after being updated by an `Optimizer` (e.g. used
+            for norm constraints or value constraints for layer weights). The
+            function must take as input the unprojected variable and must return
+            the projected variable (which must have the same shape).
+        pointwise_constraint: Optional projection function to be applied to the
+            pointwise kernel after being updated by an `Optimizer`.
+        bias_constraint: Optional projection function to be applied to the
+            bias after being updated by an `Optimizer`.
+    """
+
+    def __init__(
+        self,
+        rank,
+        depth_multiplier,
+        filters,
+        kernel_size,
+        strides=1,
+        padding="valid",
+        data_format=None,
+        dilation_rate=1,
+        activation=None,
+        use_bias=True,
+        depthwise_initializer="glorot_uniform",
+        pointwise_initializer="glorot_uniform",
+        bias_initializer="zeros",
+        depthwise_regularizer=None,
+        pointwise_regularizer=None,
+        bias_regularizer=None,
+        activity_regularizer=None,
+        depthwise_constraint=None,
+        pointwise_constraint=None,
+        bias_constraint=None,
+        trainable=True,
+        name=None,
+        **kwargs,
+    ):
+        super().__init__(
+            trainable=trainable,
+            name=name,
+            activity_regularizer=regularizers.get(activity_regularizer),
+            **kwargs,
+        )
+        self.rank = rank
+        self.depth_multiplier = depth_multiplier
+        self.filters = filters
+        self.kernel_size = standardize_tuple(kernel_size, rank, "kernel_size")
+        self.strides = standardize_tuple(strides, rank, "strides")
+        self.dilation_rate = standardize_tuple(
+            dilation_rate, rank, "dilation_rate"
+        )
+        self.padding = standardize_padding(padding)
+        self.data_format = standardize_data_format(data_format)
+        self.activation = activations.get(activation)
+        self.use_bias = use_bias
+        self.depthwise_initializer = initializers.get(depthwise_initializer)
+        self.pointwise_initializer = initializers.get(pointwise_initializer)
+        self.bias_initializer = initializers.get(bias_initializer)
+        self.depthwise_regularizer = regularizers.get(depthwise_regularizer)
+        self.pointwise_regularizer = regularizers.get(pointwise_regularizer)
+        self.bias_regularizer = regularizers.get(bias_regularizer)
+        self.depthwise_constraint = constraints.get(depthwise_constraint)
+        self.pointwise_constraint = constraints.get(pointwise_constraint)
+        self.bias_constraint = constraints.get(bias_constraint)
+        self.data_format = self.data_format
+
+        self.input_spec = InputSpec(min_ndim=self.rank + 2)
+
+        if self.depth_multiplier is not None and self.depth_multiplier <= 0:
+            raise ValueError(
+                "Invalid value for argument `depth_multiplier`. Expected a "
+                "strictly positive value. Received "
+                f"depth_multiplier={self.depth_multiplier}."
+            )
+
+        if self.filters is not None and self.filters <= 0:
+            raise ValueError(
+                "Invalid value for argument `filters`. Expected a strictly "
+                f"positive value. Received filters={self.filters}."
+            )
+
+        if not all(self.kernel_size):
+            raise ValueError(
+                "The argument `kernel_size` cannot contain 0. Received: "
+                f"kernel_size={self.kernel_size}."
+            )
+
+        if not all(self.strides):
+            raise ValueError(
+                "The argument `strides` cannot contains 0(s). Received: "
+                f"strides={self.strides}"
+            )
+
+        if max(self.strides) > 1 and max(self.dilation_rate) > 1:
+            raise ValueError(
+                "`strides > 1` not supported in conjunction with "
+                f"`dilation_rate > 1`. Received: strides={self.strides} and "
+                f"dilation_rate={self.dilation_rate}"
+            )
+
+    def build(self, input_shape):
+        if self.data_format == "channels_last":
+            channel_axis = -1
+            input_channel = input_shape[-1]
+        else:
+            channel_axis = 1
+            input_channel = input_shape[1]
+        self.input_spec = InputSpec(
+            min_ndim=self.rank + 2, axes={channel_axis: input_channel}
+        )
+        depthwise_kernel_shape = self.kernel_size + (
+            input_channel,
+            self.depth_multiplier,
+        )
+        pointwise_kernel_shape = (1,) * self.rank + (
+            self.depth_multiplier * input_channel,
+            self.filters,
+        )
+
+        self.depthwise_kernel = self.add_weight(
+            name="depthwise_kernel",
+            shape=depthwise_kernel_shape,
+            initializer=self.depthwise_initializer,
+            regularizer=self.depthwise_regularizer,
+            constraint=self.depthwise_constraint,
+            trainable=True,
+            dtype=self.dtype,
+        )
+        self.pointwise_kernel = self.add_weight(
+            name="pointwise_kernel",
+            shape=pointwise_kernel_shape,
+            initializer=self.pointwise_initializer,
+            regularizer=self.pointwise_regularizer,
+            constraint=self.pointwise_constraint,
+            trainable=True,
+            dtype=self.dtype,
+        )
+        if self.use_bias:
+            self.bias = self.add_weight(
+                name="bias",
+                shape=(self.filters,),
+                initializer=self.bias_initializer,
+                regularizer=self.bias_regularizer,
+                constraint=self.bias_constraint,
+                trainable=True,
+                dtype=self.dtype,
+            )
+        else:
+            self.bias = None
+        self.built = True
+
+    def call(self, inputs):
+        outputs = ops.separable_conv(
+            inputs,
+            self.depthwise_kernel,
+            self.pointwise_kernel,
+            strides=self.strides,
+            padding=self.padding,
+            dilation_rate=self.dilation_rate,
+            data_format=self.data_format,
+        )
+
+        if self.use_bias:
+            if self.data_format == "channels_last":
+                bias_shape = (1,) * (self.rank + 1) + (self.filters,)
+            else:
+                bias_shape = (1, self.filters) + (1,) * self.rank
+            bias = ops.reshape(self.bias, bias_shape)
+            outputs = ops.add(outputs, bias)
+
+        if self.activation is not None:
+            return self.activation(outputs)
+        return outputs
+
+    def compute_output_shape(self, input_shape):
+        return compute_conv_output_shape(
+            input_shape,
+            self.filters,
+            self.kernel_size,
+            strides=self.strides,
+            padding=self.padding,
+            data_format=self.data_format,
+            dilation_rate=self.dilation_rate,
+        )
+
+    def get_config(self):
+        config = super().get_config()
+        config.update(
+            {
+                "depth_multiplier": self.depth_multiplier,
+                "filters": self.filters,
+                "kernel_size": self.kernel_size,
+                "strides": self.strides,
+                "padding": self.padding,
+                "data_format": self.data_format,
+                "dilation_rate": self.dilation_rate,
+                "activation": activations.serialize(self.activation),
+                "use_bias": self.use_bias,
+                "depthwise_initializer": initializers.serialize(
+                    self.depthwise_initializer
+                ),
+                "pointwise_initializer": initializers.serialize(
+                    self.pointwise_initializer
+                ),
+                "bias_initializer": initializers.serialize(
+                    self.bias_initializer
+                ),
+                "depthwise_regularizer": regularizers.serialize(
+                    self.depthwise_regularizer
+                ),
+                "pointwise_regularizer": regularizers.serialize(
+                    self.pointwise_regularizer
+                ),
+                "bias_regularizer": regularizers.serialize(
+                    self.bias_regularizer
+                ),
+                "activity_regularizer": regularizers.serialize(
+                    self.activity_regularizer
+                ),
+                "depthwise_constraint": constraints.serialize(
+                    self.depthwise_constraint
+                ),
+                "pointwise_constraint": constraints.serialize(
+                    self.pointwise_constraint
+                ),
+                "bias_constraint": constraints.serialize(self.bias_constraint),
+            }
+        )
+        return config
diff --git a/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/conv1d.py b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/conv1d.py
new file mode 100644
index 0000000000000000000000000000000000000000..ce1ced8c422bda41fa6d01aa897e79ac662db803
--- /dev/null
+++ b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/conv1d.py
@@ -0,0 +1,170 @@
+from keras.src import ops
+from keras.src.api_export import keras_export
+from keras.src.layers.convolutional.base_conv import BaseConv
+
+
+@keras_export(["keras.layers.Conv1D", "keras.layers.Convolution1D"])
+class Conv1D(BaseConv):
+    """1D convolution layer (e.g. temporal convolution).
+
+    This layer creates a convolution kernel that is convolved with the layer
+    input over a single spatial (or temporal) dimension to produce a tensor of
+    outputs. If `use_bias` is True, a bias vector is created and added to the
+    outputs. Finally, if `activation` is not `None`, it is applied to the
+    outputs as well.
+
+    Args:
+        filters: int, the dimension of the output space (the number of filters
+            in the convolution).
+        kernel_size: int or tuple/list of 1 integer, specifying the size of the
+            convolution window.
+        strides: int or tuple/list of 1 integer, specifying the stride length
+            of the convolution. `strides > 1` is incompatible with
+            `dilation_rate > 1`.
+        padding: string, `"valid"`, `"same"` or `"causal"`(case-insensitive).
+            `"valid"` means no padding. `"same"` results in padding evenly to
+            the left/right or up/down of the input. When `padding="same"` and
+            `strides=1`, the output has the same size as the input.
+            `"causal"` results in causal(dilated) convolutions, e.g. `output[t]`
+            does not depend on`input[t+1:]`. Useful when modeling temporal data
+            where the model should not violate the temporal order.
+            See [WaveNet: A Generative Model for Raw Audio, section2.1](
+            https://arxiv.org/abs/1609.03499).
+        data_format: string, either `"channels_last"` or `"channels_first"`.
+            The ordering of the dimensions in the inputs. `"channels_last"`
+            corresponds to inputs with shape `(batch, steps, features)`
+            while `"channels_first"` corresponds to inputs with shape
+            `(batch, features, steps)`. It defaults to the `image_data_format`
+            value found in your Keras config file at `~/.keras/keras.json`.
+            If you never set it, then it will be `"channels_last"`.
+        dilation_rate: int or tuple/list of 1 integers, specifying the dilation
+            rate to use for dilated convolution.
+        groups: A positive int specifying the number of groups in which the
+            input is split along the channel axis. Each group is convolved
+            separately with `filters // groups` filters. The output is the
+            concatenation of all the `groups` results along the channel axis.
+            Input channels and `filters` must both be divisible by `groups`.
+        activation: Activation function. If `None`, no activation is applied.
+        use_bias: bool, if `True`, bias will be added to the output.
+        kernel_initializer: Initializer for the convolution kernel. If `None`,
+            the default initializer (`"glorot_uniform"`) will be used.
+        bias_initializer: Initializer for the bias vector. If `None`, the
+            default initializer (`"zeros"`) will be used.
+        kernel_regularizer: Optional regularizer for the convolution kernel.
+        bias_regularizer: Optional regularizer for the bias vector.
+        activity_regularizer: Optional regularizer function for the output.
+        kernel_constraint: Optional projection function to be applied to the
+            kernel after being updated by an `Optimizer` (e.g. used to implement
+            norm constraints or value constraints for layer weights). The
+            function must take as input the unprojected variable and must return
+            the projected variable (which must have the same shape). Constraints
+            are not safe to use when doing asynchronous distributed training.
+        bias_constraint: Optional projection function to be applied to the
+            bias after being updated by an `Optimizer`.
+
+    Input shape:
+
+    - If `data_format="channels_last"`:
+        A 3D tensor with shape: `(batch_shape, steps, channels)`
+    - If `data_format="channels_first"`:
+        A 3D tensor with shape: `(batch_shape, channels, steps)`
+
+    Output shape:
+
+    - If `data_format="channels_last"`:
+        A 3D tensor with shape: `(batch_shape, new_steps, filters)`
+    - If `data_format="channels_first"`:
+        A 3D tensor with shape: `(batch_shape, filters, new_steps)`
+
+    Returns:
+        A 3D tensor representing `activation(conv1d(inputs, kernel) + bias)`.
+
+    Raises:
+        ValueError: when both `strides > 1` and `dilation_rate > 1`.
+
+    Example:
+
+    >>> # The inputs are 128-length vectors with 10 timesteps, and the
+    >>> # batch size is 4.
+    >>> x = np.random.rand(4, 10, 128)
+    >>> y = keras.layers.Conv1D(32, 3, activation='relu')(x)
+    >>> print(y.shape)
+    (4, 8, 32)
+    """
+
+    def __init__(
+        self,
+        filters,
+        kernel_size,
+        strides=1,
+        padding="valid",
+        data_format=None,
+        dilation_rate=1,
+        groups=1,
+        activation=None,
+        use_bias=True,
+        kernel_initializer="glorot_uniform",
+        bias_initializer="zeros",
+        kernel_regularizer=None,
+        bias_regularizer=None,
+        activity_regularizer=None,
+        kernel_constraint=None,
+        bias_constraint=None,
+        **kwargs,
+    ):
+        super().__init__(
+            rank=1,
+            filters=filters,
+            kernel_size=kernel_size,
+            strides=strides,
+            padding=padding,
+            data_format=data_format,
+            dilation_rate=dilation_rate,
+            groups=groups,
+            activation=activation,
+            use_bias=use_bias,
+            kernel_initializer=kernel_initializer,
+            bias_initializer=bias_initializer,
+            kernel_regularizer=kernel_regularizer,
+            bias_regularizer=bias_regularizer,
+            activity_regularizer=activity_regularizer,
+            kernel_constraint=kernel_constraint,
+            bias_constraint=bias_constraint,
+            **kwargs,
+        )
+
+    def _compute_causal_padding(self):
+        left_pad = self.dilation_rate[0] * (self.kernel_size[0] - 1)
+        if self.data_format == "channels_last":
+            causal_padding = [[0, 0], [left_pad, 0], [0, 0]]
+        else:
+            causal_padding = [[0, 0], [0, 0], [left_pad, 0]]
+        return causal_padding
+
+    def call(self, inputs):
+        padding = self.padding
+        if self.padding == "causal":
+            # Apply causal padding to inputs.
+            inputs = ops.pad(inputs, self._compute_causal_padding())
+            padding = "valid"
+
+        outputs = ops.conv(
+            inputs,
+            self.kernel,
+            strides=list(self.strides),
+            padding=padding,
+            dilation_rate=self.dilation_rate,
+            data_format=self.data_format,
+        )
+
+        if self.use_bias:
+            if self.data_format == "channels_last":
+                bias_shape = (1,) * (self.rank + 1) + (self.filters,)
+            else:
+                bias_shape = (1, self.filters) + (1,) * self.rank
+            bias = ops.reshape(self.bias, bias_shape)
+            outputs = ops.add(outputs, bias)
+
+        if self.activation is not None:
+            return self.activation(outputs)
+        return outputs
diff --git a/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/conv1d_transpose.py b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/conv1d_transpose.py
new file mode 100644
index 0000000000000000000000000000000000000000..466f1f19931f174ddccf6beb0d29a34a10857a60
--- /dev/null
+++ b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/conv1d_transpose.py
@@ -0,0 +1,131 @@
+from keras.src.api_export import keras_export
+from keras.src.layers.convolutional.base_conv_transpose import BaseConvTranspose
+
+
+@keras_export(
+    [
+        "keras.layers.Conv1DTranspose",
+        "keras.layers.Convolution1DTranspose",
+    ]
+)
+class Conv1DTranspose(BaseConvTranspose):
+    """1D transposed convolution layer.
+
+    The need for transposed convolutions generally arise from the desire to use
+    a transformation going in the opposite direction of a normal convolution,
+    i.e., from something that has the shape of the output of some convolution
+    to something that has the shape of its input while maintaining a
+    connectivity pattern that is compatible with said convolution.
+
+    Args:
+        filters: int, the dimension of the output space (the number of filters
+            in the transpose convolution).
+        kernel_size: int or tuple/list of 1 integer, specifying the size of the
+            transposed convolution window.
+        strides: int or tuple/list of 1 integer, specifying the stride length
+            of the transposed convolution. `strides > 1` is incompatible with
+            `dilation_rate > 1`.
+        padding: string, either `"valid"` or `"same"` (case-insensitive).
+            `"valid"` means no padding. `"same"` results in padding evenly to
+            the left/right or up/down of the input such that output has the same
+            height/width dimension as the input.
+        data_format: string, either `"channels_last"` or `"channels_first"`.
+            The ordering of the dimensions in the inputs. `"channels_last"`
+            corresponds to inputs with shape `(batch, steps, features)`
+            while `"channels_first"` corresponds to inputs with shape
+            `(batch, features, steps)`. It defaults to the `image_data_format`
+            value found in your Keras config file at `~/.keras/keras.json`.
+            If you never set it, then it will be `"channels_last"`.
+        dilation_rate: int or tuple/list of 1 integers, specifying the dilation
+            rate to use for dilated transposed convolution.
+        activation: Activation function. If `None`, no activation is applied.
+        use_bias: bool, if `True`, bias will be added to the output.
+        kernel_initializer: Initializer for the convolution kernel. If `None`,
+            the default initializer (`"glorot_uniform"`) will be used.
+        bias_initializer: Initializer for the bias vector. If `None`, the
+            default initializer (`"zeros"`) will be used.
+        kernel_regularizer: Optional regularizer for the convolution kernel.
+        bias_regularizer: Optional regularizer for the bias vector.
+        activity_regularizer: Optional regularizer function for the output.
+        kernel_constraint: Optional projection function to be applied to the
+            kernel after being updated by an `Optimizer` (e.g. used to implement
+            norm constraints or value constraints for layer weights). The
+            function must take as input the unprojected variable and must return
+            the projected variable (which must have the same shape). Constraints
+            are not safe to use when doing asynchronous distributed training.
+        bias_constraint: Optional projection function to be applied to the
+            bias after being updated by an `Optimizer`.
+
+    Input shape:
+
+    - If `data_format="channels_last"`:
+        A 3D tensor with shape: `(batch_shape, steps, channels)`
+    - If `data_format="channels_first"`:
+        A 3D tensor with shape: `(batch_shape, channels, steps)`
+
+    Output shape:
+
+    - If `data_format="channels_last"`:
+        A 3D tensor with shape: `(batch_shape, new_steps, filters)`
+    - If `data_format="channels_first"`:
+        A 3D tensor with shape: `(batch_shape, filters, new_steps)`
+
+    Returns:
+        A 3D tensor representing
+        `activation(conv1d_transpose(inputs, kernel) + bias)`.
+
+    Raises:
+        ValueError: when both `strides > 1` and `dilation_rate > 1`.
+
+    References:
+    - [A guide to convolution arithmetic for deep learning](
+        https://arxiv.org/abs/1603.07285v1)
+    - [Deconvolutional Networks](
+        https://www.matthewzeiler.com/mattzeiler/deconvolutionalnetworks.pdf)
+
+    Example:
+
+    >>> x = np.random.rand(4, 10, 128)
+    >>> y = keras.layers.Conv1DTranspose(32, 3, 2, activation='relu')(x)
+    >>> print(y.shape)
+    (4, 21, 32)
+    """
+
+    def __init__(
+        self,
+        filters,
+        kernel_size,
+        strides=1,
+        padding="valid",
+        data_format=None,
+        dilation_rate=1,
+        activation=None,
+        use_bias=True,
+        kernel_initializer="glorot_uniform",
+        bias_initializer="zeros",
+        kernel_regularizer=None,
+        bias_regularizer=None,
+        activity_regularizer=None,
+        kernel_constraint=None,
+        bias_constraint=None,
+        **kwargs,
+    ):
+        super().__init__(
+            rank=1,
+            filters=filters,
+            kernel_size=kernel_size,
+            strides=strides,
+            padding=padding,
+            data_format=data_format,
+            dilation_rate=dilation_rate,
+            activation=activation,
+            use_bias=use_bias,
+            kernel_initializer=kernel_initializer,
+            bias_initializer=bias_initializer,
+            kernel_regularizer=kernel_regularizer,
+            bias_regularizer=bias_regularizer,
+            activity_regularizer=activity_regularizer,
+            kernel_constraint=kernel_constraint,
+            bias_constraint=bias_constraint,
+            **kwargs,
+        )
diff --git a/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/conv2d.py b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/conv2d.py
new file mode 100644
index 0000000000000000000000000000000000000000..c46f8f9a0bc1eca50a48c98890678538830b1981
--- /dev/null
+++ b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/conv2d.py
@@ -0,0 +1,128 @@
+from keras.src.api_export import keras_export
+from keras.src.layers.convolutional.base_conv import BaseConv
+
+
+@keras_export(["keras.layers.Conv2D", "keras.layers.Convolution2D"])
+class Conv2D(BaseConv):
+    """2D convolution layer.
+
+    This layer creates a convolution kernel that is convolved with the layer
+    input over a 2D spatial (or temporal) dimension (height and width) to
+    produce a tensor of outputs. If `use_bias` is True, a bias vector is created
+    and added to the outputs. Finally, if `activation` is not `None`, it is
+    applied to the outputs as well.
+
+    Args:
+        filters: int, the dimension of the output space (the number of filters
+            in the convolution).
+        kernel_size: int or tuple/list of 2 integer, specifying the size of the
+            convolution window.
+        strides: int or tuple/list of 2 integer, specifying the stride length
+            of the convolution. `strides > 1` is incompatible with
+            `dilation_rate > 1`.
+        padding: string, either `"valid"` or `"same"` (case-insensitive).
+            `"valid"` means no padding. `"same"` results in padding evenly to
+            the left/right or up/down of the input. When `padding="same"` and
+            `strides=1`, the output has the same size as the input.
+        data_format: string, either `"channels_last"` or `"channels_first"`.
+            The ordering of the dimensions in the inputs. `"channels_last"`
+            corresponds to inputs with shape
+            `(batch_size, height, width, channels)`
+            while `"channels_first"` corresponds to inputs with shape
+            `(batch_size, channels, height, width)`. It defaults to the
+            `image_data_format` value found in your Keras config file at
+            `~/.keras/keras.json`. If you never set it, then it will be
+            `"channels_last"`.
+        dilation_rate: int or tuple/list of 2 integers, specifying the dilation
+            rate to use for dilated convolution.
+        groups: A positive int specifying the number of groups in which the
+            input is split along the channel axis. Each group is convolved
+            separately with `filters // groups` filters. The output is the
+            concatenation of all the `groups` results along the channel axis.
+            Input channels and `filters` must both be divisible by `groups`.
+        activation: Activation function. If `None`, no activation is applied.
+        use_bias: bool, if `True`, bias will be added to the output.
+        kernel_initializer: Initializer for the convolution kernel. If `None`,
+            the default initializer (`"glorot_uniform"`) will be used.
+        bias_initializer: Initializer for the bias vector. If `None`, the
+            default initializer (`"zeros"`) will be used.
+        kernel_regularizer: Optional regularizer for the convolution kernel.
+        bias_regularizer: Optional regularizer for the bias vector.
+        activity_regularizer: Optional regularizer function for the output.
+        kernel_constraint: Optional projection function to be applied to the
+            kernel after being updated by an `Optimizer` (e.g. used to implement
+            norm constraints or value constraints for layer weights). The
+            function must take as input the unprojected variable and must return
+            the projected variable (which must have the same shape). Constraints
+            are not safe to use when doing asynchronous distributed training.
+        bias_constraint: Optional projection function to be applied to the
+            bias after being updated by an `Optimizer`.
+
+    Input shape:
+
+    - If `data_format="channels_last"`:
+        A 4D tensor with shape: `(batch_size, height, width, channels)`
+    - If `data_format="channels_first"`:
+        A 4D tensor with shape: `(batch_size, channels, height, width)`
+
+    Output shape:
+
+    - If `data_format="channels_last"`:
+        A 4D tensor with shape: `(batch_size, new_height, new_width, filters)`
+    - If `data_format="channels_first"`:
+        A 4D tensor with shape: `(batch_size, filters, new_height, new_width)`
+
+    Returns:
+        A 4D tensor representing `activation(conv2d(inputs, kernel) + bias)`.
+
+    Raises:
+        ValueError: when both `strides > 1` and `dilation_rate > 1`.
+
+    Example:
+
+    >>> x = np.random.rand(4, 10, 10, 128)
+    >>> y = keras.layers.Conv2D(32, 3, activation='relu')(x)
+    >>> print(y.shape)
+    (4, 8, 8, 32)
+    """
+
+    def __init__(
+        self,
+        filters,
+        kernel_size,
+        strides=(1, 1),
+        padding="valid",
+        data_format=None,
+        dilation_rate=(1, 1),
+        groups=1,
+        activation=None,
+        use_bias=True,
+        kernel_initializer="glorot_uniform",
+        bias_initializer="zeros",
+        kernel_regularizer=None,
+        bias_regularizer=None,
+        activity_regularizer=None,
+        kernel_constraint=None,
+        bias_constraint=None,
+        **kwargs,
+    ):
+        super().__init__(
+            rank=2,
+            filters=filters,
+            kernel_size=kernel_size,
+            strides=strides,
+            padding=padding,
+            data_format=data_format,
+            dilation_rate=dilation_rate,
+            groups=groups,
+            activation=activation,
+            use_bias=use_bias,
+            kernel_initializer=kernel_initializer,
+            bias_initializer=bias_initializer,
+            kernel_regularizer=kernel_regularizer,
+            bias_regularizer=bias_regularizer,
+            activity_regularizer=activity_regularizer,
+            kernel_constraint=kernel_constraint,
+            bias_constraint=bias_constraint,
+            **kwargs,
+        )
diff --git a/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/conv2d_transpose.py b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/conv2d_transpose.py
new file mode 100644
index 0000000000000000000000000000000000000000..ac13452f626386457589891e270c89a962c1265d
--- /dev/null
+++ b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/conv2d_transpose.py
@@ -0,0 +1,133 @@
+from keras.src.api_export import keras_export
+from keras.src.layers.convolutional.base_conv_transpose import BaseConvTranspose
+
+
+@keras_export(
+    [
+        "keras.layers.Conv2DTranspose",
+        "keras.layers.Convolution2DTranspose",
+    ]
+)
+class Conv2DTranspose(BaseConvTranspose):
+    """2D transposed convolution layer.
+
+    The need for transposed convolutions generally arise from the desire to use
+    a transformation going in the opposite direction of a normal convolution,
+    i.e., from something that has the shape of the output of some convolution
+    to something that has the shape of its input while maintaining a
+    connectivity pattern that is compatible with said convolution.
+
+    Args:
+        filters: int, the dimension of the output space (the number of filters
+            in the transposed convolution).
+        kernel_size: int or tuple/list of 1 integer, specifying the size of the
+            transposed convolution window.
+        strides: int or tuple/list of 1 integer, specifying the stride length
+            of the transposed convolution. `strides > 1` is incompatible with
+            `dilation_rate > 1`.
+        padding: string, either `"valid"` or `"same"` (case-insensitive).
+            `"valid"` means no padding. `"same"` results in padding evenly to
+            the left/right or up/down of the input. When `padding="same"` and
+            `strides=1`, the output has the same size as the input.
+        data_format: string, either `"channels_last"` or `"channels_first"`.
+            The ordering of the dimensions in the inputs. `"channels_last"`
+            corresponds to inputs with shape
+            `(batch_size, height, width, channels)`
+            while `"channels_first"` corresponds to inputs with shape
+            `(batch_size, channels, height, width)`. It defaults to the
+            `image_data_format` value found in your Keras config file at
+            `~/.keras/keras.json`. If you never set it, then it will be
+            `"channels_last"`.
+        dilation_rate: int or tuple/list of 1 integers, specifying the dilation
+            rate to use for dilated transposed convolution.
+        activation: Activation function. If `None`, no activation is applied.
+        use_bias: bool, if `True`, bias will be added to the output.
+        kernel_initializer: Initializer for the convolution kernel. If `None`,
+            the default initializer (`"glorot_uniform"`) will be used.
+        bias_initializer: Initializer for the bias vector. If `None`, the
+            default initializer (`"zeros"`) will be used.
+        kernel_regularizer: Optional regularizer for the convolution kernel.
+        bias_regularizer: Optional regularizer for the bias vector.
+        activity_regularizer: Optional regularizer function for the output.
+        kernel_constraint: Optional projection function to be applied to the
+            kernel after being updated by an `Optimizer` (e.g. used to implement
+            norm constraints or value constraints for layer weights). The
+            function must take as input the unprojected variable and must return
+            the projected variable (which must have the same shape). Constraints
+            are not safe to use when doing asynchronous distributed training.
+        bias_constraint: Optional projection function to be applied to the
+            bias after being updated by an `Optimizer`.
+
+    Input shape:
+
+    - If `data_format="channels_last"`:
+        A 4D tensor with shape: `(batch_size, height, width, channels)`
+    - If `data_format="channels_first"`:
+        A 4D tensor with shape: `(batch_size, channels, height, width)`
+
+    Output shape:
+
+    - If `data_format="channels_last"`:
+        A 4D tensor with shape: `(batch_size, new_height, new_width, filters)`
+    - If `data_format="channels_first"`:
+        A 4D tensor with shape: `(batch_size, filters, new_height, new_width)`
+
+    Returns:
+        A 4D tensor representing
+        `activation(conv2d_transpose(inputs, kernel) + bias)`.
+
+    Raises:
+        ValueError: when both `strides > 1` and `dilation_rate > 1`.
+
+    References:
+    - [A guide to convolution arithmetic for deep learning](
+        https://arxiv.org/abs/1603.07285v1)
+    - [Deconvolutional Networks](
+        https://www.matthewzeiler.com/mattzeiler/deconvolutionalnetworks.pdf)
+
+    Example:
+
+    >>> x = np.random.rand(4, 10, 8, 128)
+    >>> y = keras.layers.Conv2DTranspose(32, 2, 2, activation='relu')(x)
+    >>> print(y.shape)
+    (4, 20, 16, 32)
+    """
+
+    def __init__(
+        self,
+        filters,
+        kernel_size,
+        strides=(1, 1),
+        padding="valid",
+        data_format=None,
+        dilation_rate=(1, 1),
+        activation=None,
+        use_bias=True,
+        kernel_initializer="glorot_uniform",
+        bias_initializer="zeros",
+        kernel_regularizer=None,
+        bias_regularizer=None,
+        activity_regularizer=None,
+        kernel_constraint=None,
+        bias_constraint=None,
+        **kwargs,
+    ):
+        super().__init__(
+            rank=2,
+            filters=filters,
+            kernel_size=kernel_size,
+            strides=strides,
+            padding=padding,
+            data_format=data_format,
+            dilation_rate=dilation_rate,
+            activation=activation,
+            use_bias=use_bias,
+            kernel_initializer=kernel_initializer,
+            bias_initializer=bias_initializer,
+            kernel_regularizer=kernel_regularizer,
+            bias_regularizer=bias_regularizer,
+            activity_regularizer=activity_regularizer,
+            kernel_constraint=kernel_constraint,
+            bias_constraint=bias_constraint,
+            **kwargs,
+        )
diff --git a/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/conv3d.py b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/conv3d.py
new file mode 100644
index 0000000000000000000000000000000000000000..4badd2042c377819c7135b9dd8fccadfc805601a
--- /dev/null
+++ b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/conv3d.py
@@ -0,0 +1,134 @@
+from keras.src.api_export import keras_export
+from keras.src.layers.convolutional.base_conv import BaseConv
+
+
+@keras_export(["keras.layers.Conv3D", "keras.layers.Convolution3D"])
+class Conv3D(BaseConv):
+    """3D convolution layer.
+
+    This layer creates a convolution kernel that is convolved with the layer
+    input over a 3D spatial (or temporal) dimension (width,height and depth) to
+    produce a tensor of outputs. If `use_bias` is True, a bias vector is created
+    and added to the outputs. Finally, if `activation` is not `None`, it is
+    applied to the outputs as well.
+
+    Args:
+        filters: int, the dimension of the output space (the number of filters
+            in the convolution).
+        kernel_size: int or tuple/list of 3 integer, specifying the size of the
+            convolution window.
+        strides: int or tuple/list of 3 integer, specifying the stride length
+            of the convolution. `strides > 1` is incompatible with
+            `dilation_rate > 1`.
+        padding: string, either `"valid"` or `"same"` (case-insensitive).
+            `"valid"` means no padding. `"same"` results in padding evenly to
+            the left/right or up/down of the input. When `padding="same"` and
+            `strides=1`, the output has the same size as the input.
+        data_format: string, either `"channels_last"` or `"channels_first"`.
+            The ordering of the dimensions in the inputs. `"channels_last"`
+            corresponds to inputs with shape
+            `(batch_size, spatial_dim1, spatial_dim2, spatial_dim3, channels)`
+            while `"channels_first"` corresponds to inputs with shape
+            `(batch_size, channels, spatial_dim1, spatial_dim2, spatial_dim3)`.
+            It defaults to the `image_data_format` value found in your Keras
+            config file at `~/.keras/keras.json`. If you never set it, then it
+            will be `"channels_last"`.
+        dilation_rate: int or tuple/list of 3 integers, specifying the dilation
+            rate to use for dilated convolution.
+        groups: A positive int specifying the number of groups in which the
+            input is split along the channel axis. Each group is convolved
+            separately with `filters // groups` filters. The output is the
+            concatenation of all the `groups` results along the channel axis.
+            Input channels and `filters` must both be divisible by `groups`.
+        activation: Activation function. If `None`, no activation is applied.
+        use_bias: bool, if `True`, bias will be added to the output.
+        kernel_initializer: Initializer for the convolution kernel. If `None`,
+            the default initializer (`"glorot_uniform"`) will be used.
+        bias_initializer: Initializer for the bias vector. If `None`, the
+            default initializer (`"zeros"`) will be used.
+        kernel_regularizer: Optional regularizer for the convolution kernel.
+        bias_regularizer: Optional regularizer for the bias vector.
+        activity_regularizer: Optional regularizer function for the output.
+        kernel_constraint: Optional projection function to be applied to the
+            kernel after being updated by an `Optimizer` (e.g. used to implement
+            norm constraints or value constraints for layer weights). The
+            function must take as input the unprojected variable and must return
+            the projected variable (which must have the same shape). Constraints
+            are not safe to use when doing asynchronous distributed training.
+        bias_constraint: Optional projection function to be applied to the
+            bias after being updated by an `Optimizer`.
+
+    Input shape:
+
+    - If `data_format="channels_last"`:
+        5D tensor with shape:
+        `(batch_size, spatial_dim1, spatial_dim2, spatial_dim3, channels)`
+    - If `data_format="channels_first"`:
+        5D tensor with shape:
+        `(batch_size, channels, spatial_dim1, spatial_dim2, spatial_dim3)`
+
+    Output shape:
+
+    - If `data_format="channels_last"`:
+        5D tensor with shape:
+        `(batch_size, new_spatial_dim1, new_spatial_dim2, new_spatial_dim3,
+        filters)`
+    - If `data_format="channels_first"`:
+        5D tensor with shape:
+        `(batch_size, filters, new_spatial_dim1, new_spatial_dim2,
+        new_spatial_dim3)`
+
+    Returns:
+        A 5D tensor representing `activation(conv3d(inputs, kernel) + bias)`.
+
+    Raises:
+        ValueError: when both `strides > 1` and `dilation_rate > 1`.
+
+    Example:
+
+    >>> x = np.random.rand(4, 10, 10, 10, 128)
+    >>> y = keras.layers.Conv3D(32, 3, activation='relu')(x)
+    >>> print(y.shape)
+    (4, 8, 8, 8, 32)
+    """
+
+    def __init__(
+        self,
+        filters,
+        kernel_size,
+        strides=(1, 1, 1),
+        padding="valid",
+        data_format=None,
+        dilation_rate=(1, 1, 1),
+        groups=1,
+        activation=None,
+        use_bias=True,
+        kernel_initializer="glorot_uniform",
+        bias_initializer="zeros",
+        kernel_regularizer=None,
+        bias_regularizer=None,
+        activity_regularizer=None,
+        kernel_constraint=None,
+        bias_constraint=None,
+        **kwargs,
+    ):
+        super().__init__(
+            rank=3,
+            filters=filters,
+            kernel_size=kernel_size,
+            strides=strides,
+            padding=padding,
+            data_format=data_format,
+            dilation_rate=dilation_rate,
+            groups=groups,
+            activation=activation,
+            use_bias=use_bias,
+            kernel_initializer=kernel_initializer,
+            bias_initializer=bias_initializer,
+            kernel_regularizer=kernel_regularizer,
+            bias_regularizer=bias_regularizer,
+            activity_regularizer=activity_regularizer,
+            kernel_constraint=kernel_constraint,
+            bias_constraint=bias_constraint,
+            **kwargs,
+        )
diff --git a/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/conv3d_transpose.py b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/conv3d_transpose.py
new file mode 100644
index 0000000000000000000000000000000000000000..348ff5f5d80083ae7439e06766aa2fec52a2f81d
--- /dev/null
+++ b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/conv3d_transpose.py
@@ -0,0 +1,138 @@
+from keras.src.api_export import keras_export
+from keras.src.layers.convolutional.base_conv_transpose import BaseConvTranspose
+
+
+@keras_export(
+    [
+        "keras.layers.Conv3DTranspose",
+        "keras.layers.Convolution3DTranspose",
+    ]
+)
+class Conv3DTranspose(BaseConvTranspose):
+    """3D transposed convolution layer.
+
+    The need for transposed convolutions generally arise from the desire to use
+    a transformation going in the opposite direction of a normal convolution,
+    i.e., from something that has the shape of the output of some convolution
+    to something that has the shape of its input while maintaining a
+    connectivity pattern that is compatible with said convolution.
+
+    Args:
+        filters: int, the dimension of the output space (the number of filters
+            in the transposed convolution).
+        kernel_size: int or tuple/list of 1 integer, specifying the size of the
+            transposed convolution window.
+        strides: int or tuple/list of 1 integer, specifying the stride length
+            of the transposed convolution. `strides > 1` is incompatible with
+            `dilation_rate > 1`.
+        padding: string, either `"valid"` or `"same"` (case-insensitive).
+            `"valid"` means no padding. `"same"` results in padding evenly to
+            the left/right or up/down of the input. When `padding="same"` and
+            `strides=1`, the output has the same size as the input.
+        data_format: string, either `"channels_last"` or `"channels_first"`.
+            The ordering of the dimensions in the inputs. `"channels_last"`
+            corresponds to inputs with shape
+            `(batch_size, spatial_dim1, spatial_dim2, spatial_dim3, channels)`
+            while `"channels_first"` corresponds to inputs with shape
+            `(batch_size, channels, spatial_dim1, spatial_dim2, spatial_dim3)`.
+            It defaults to the `image_data_format` value found in your Keras
+            config file at `~/.keras/keras.json`. If you never set it, then it
+            will be `"channels_last"`.
+        dilation_rate: int or tuple/list of 1 integers, specifying the dilation
+            rate to use for dilated transposed convolution.
+        activation: Activation function. If `None`, no activation is applied.
+        use_bias: bool, if `True`, bias will be added to the output.
+        kernel_initializer: Initializer for the convolution kernel. If `None`,
+            the default initializer (`"glorot_uniform"`) will be used.
+        bias_initializer: Initializer for the bias vector. If `None`, the
+            default initializer (`"zeros"`) will be used.
+        kernel_regularizer: Optional regularizer for the convolution kernel.
+        bias_regularizer: Optional regularizer for the bias vector.
+        activity_regularizer: Optional regularizer function for the output.
+        kernel_constraint: Optional projection function to be applied to the
+            kernel after being updated by an `Optimizer` (e.g. used to implement
+            norm constraints or value constraints for layer weights). The
+            function must take as input the unprojected variable and must return
+            the projected variable (which must have the same shape). Constraints
+            are not safe to use when doing asynchronous distributed training.
+        bias_constraint: Optional projection function to be applied to the
+            bias after being updated by an `Optimizer`.
+
+    Input shape:
+
+    - If `data_format="channels_last"`:
+        5D tensor with shape:
+        `(batch_size, spatial_dim1, spatial_dim2, spatial_dim3, channels)`
+    - If `data_format="channels_first"`:
+        5D tensor with shape:
+        `(batch_size, channels, spatial_dim1, spatial_dim2, spatial_dim3)`
+
+    Output shape:
+
+    - If `data_format="channels_last"`:
+        5D tensor with shape:
+        `(batch_size, new_spatial_dim1, new_spatial_dim2, new_spatial_dim3,
+        filters)`
+    - If `data_format="channels_first"`:
+        5D tensor with shape:
+        `(batch_size, filters, new_spatial_dim1, new_spatial_dim2,
+        new_spatial_dim3)`
+
+    Returns:
+        A 5D tensor representing `activation(conv3d(inputs, kernel) + bias)`.
+
+    Raises:
+        ValueError: when both `strides > 1` and `dilation_rate > 1`.
+
+    References:
+    - [A guide to convolution arithmetic for deep learning](
+        https://arxiv.org/abs/1603.07285v1)
+    - [Deconvolutional Networks](
+        https://www.matthewzeiler.com/mattzeiler/deconvolutionalnetworks.pdf)
+
+    Example:
+
+    >>> x = np.random.rand(4, 10, 8, 12, 128)
+    >>> y = keras.layers.Conv3DTranspose(32, 2, 2, activation='relu')(x)
+    >>> print(y.shape)
+    (4, 20, 16, 24, 32)
+    """
+
+    def __init__(
+        self,
+        filters,
+        kernel_size,
+        strides=(1, 1, 1),
+        padding="valid",
+        data_format=None,
+        dilation_rate=(1, 1, 1),
+        activation=None,
+        use_bias=True,
+        kernel_initializer="glorot_uniform",
+        bias_initializer="zeros",
+        kernel_regularizer=None,
+        bias_regularizer=None,
+        activity_regularizer=None,
+        kernel_constraint=None,
+        bias_constraint=None,
+        **kwargs,
+    ):
+        super().__init__(
+            rank=3,
+            filters=filters,
+            kernel_size=kernel_size,
+            strides=strides,
+            padding=padding,
+            data_format=data_format,
+            dilation_rate=dilation_rate,
+            activation=activation,
+            use_bias=use_bias,
+            kernel_initializer=kernel_initializer,
+            bias_initializer=bias_initializer,
+            kernel_regularizer=kernel_regularizer,
+            bias_regularizer=bias_regularizer,
+            activity_regularizer=activity_regularizer,
+            kernel_constraint=kernel_constraint,
+            bias_constraint=bias_constraint,
+            **kwargs,
+        )
diff --git a/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/depthwise_conv1d.py b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/depthwise_conv1d.py
new file mode 100644
index 0000000000000000000000000000000000000000..51312d8447e2fa6e532e0e420c29ef5aa6b22365
--- /dev/null
+++ b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/depthwise_conv1d.py
@@ -0,0 +1,137 @@
+from keras.src.api_export import keras_export
+from keras.src.layers.convolutional.base_depthwise_conv import BaseDepthwiseConv
+
+
+@keras_export("keras.layers.DepthwiseConv1D")
+class DepthwiseConv1D(BaseDepthwiseConv):
+    """1D depthwise convolution layer.
+
+    Depthwise convolution is a type of convolution in which each input channel
+    is convolved with a different kernel (called a depthwise kernel). You can
+    understand depthwise convolution as the first step in a depthwise separable
+    convolution.
+
+    It is implemented via the following steps:
+
+    - Split the input into individual channels.
+    - Convolve each channel with an individual depthwise kernel with
+      `depth_multiplier` output channels.
+    - Concatenate the convolved outputs along the channels axis.
+
+    Unlike a regular 1D convolution, depthwise convolution does not mix
+    information across different input channels.
+
+    The `depth_multiplier` argument determines how many filters are applied to
+    one input channel. As such, it controls the amount of output channels that
+    are generated per input channel in the depthwise step.
+
+    Args:
+        kernel_size: int or tuple/list of 1 integer, specifying the size of the
+            depthwise convolution window.
+        strides: int or tuple/list of 1 integer, specifying the stride length
+            of the convolution. `strides > 1` is incompatible with
+            `dilation_rate > 1`.
+        padding: string, either `"valid"` or `"same"` (case-insensitive).
+            `"valid"` means no padding. `"same"` results in padding evenly to
+            the left/right or up/down of the input. When `padding="same"` and
+            `strides=1`, the output has the same size as the input.
+        depth_multiplier: The number of depthwise convolution output channels
+            for each input channel. The total number of depthwise convolution
+            output channels will be equal to `input_channel * depth_multiplier`.
+        data_format: string, either `"channels_last"` or `"channels_first"`.
+            The ordering of the dimensions in the inputs. `"channels_last"`
+            corresponds to inputs with shape `(batch, steps, features)`
+            while `"channels_first"` corresponds to inputs with shape
+            `(batch, features, steps)`. It defaults to the `image_data_format`
+            value found in your Keras config file at `~/.keras/keras.json`.
+            If you never set it, then it will be `"channels_last"`.
+        dilation_rate: int or tuple/list of 1 integers, specifying the dilation
+            rate to use for dilated convolution.
+        activation: Activation function. If `None`, no activation is applied.
+        use_bias: bool, if `True`, bias will be added to the output.
+        depthwise_initializer: Initializer for the convolution kernel.
+            If `None`, the default initializer (`"glorot_uniform"`)
+            will be used.
+        bias_initializer: Initializer for the bias vector. If `None`, the
+            default initializer (`"zeros"`) will be used.
+        depthwise_regularizer: Optional regularizer for the convolution kernel.
+        bias_regularizer: Optional regularizer for the bias vector.
+        activity_regularizer: Optional regularizer function for the output.
+        depthwise_constraint: Optional projection function to be applied to the
+            kernel after being updated by an `Optimizer` (e.g. used to implement
+            norm constraints or value constraints for layer weights). The
+            function must take as input the unprojected variable and must return
+            the projected variable (which must have the same shape). Constraints
+            are not safe to use when doing asynchronous distributed training.
+        bias_constraint: Optional projection function to be applied to the
+            bias after being updated by an `Optimizer`.
+
+    Input shape:
+
+    - If `data_format="channels_last"`:
+        A 3D tensor with shape: `(batch_shape, steps, channels)`
+    - If `data_format="channels_first"`:
+        A 3D tensor with shape: `(batch_shape, channels, steps)`
+
+    Output shape:
+
+    - If `data_format="channels_last"`:
+        A 3D tensor with shape:
+        `(batch_shape, new_steps, channels * depth_multiplier)`
+    - If `data_format="channels_first"`:
+        A 3D tensor with shape:
+        `(batch_shape, channels * depth_multiplier, new_steps)`
+
+    Returns:
+        A 3D tensor representing
+        `activation(depthwise_conv1d(inputs, kernel) + bias)`.
+
+    Raises:
+        ValueError: when both `strides > 1` and `dilation_rate > 1`.
+
+    Example:
+
+    >>> x = np.random.rand(4, 10, 12)
+    >>> y = keras.layers.DepthwiseConv1D(3, 3, 2, activation='relu')(x)
+    >>> print(y.shape)
+    (4, 4, 36)
+    """
+
+    def __init__(
+        self,
+        kernel_size,
+        strides=1,
+        padding="valid",
+        depth_multiplier=1,
+        data_format=None,
+        dilation_rate=1,
+        activation=None,
+        use_bias=True,
+        depthwise_initializer="glorot_uniform",
+        bias_initializer="zeros",
+        depthwise_regularizer=None,
+        bias_regularizer=None,
+        activity_regularizer=None,
+        depthwise_constraint=None,
+        bias_constraint=None,
+        **kwargs,
+    ):
+        super().__init__(
+            rank=1,
+            depth_multiplier=depth_multiplier,
+            kernel_size=kernel_size,
+            strides=strides,
+            padding=padding,
+            data_format=data_format,
+            dilation_rate=dilation_rate,
+            activation=activation,
+            use_bias=use_bias,
+            depthwise_initializer=depthwise_initializer,
+            bias_initializer=bias_initializer,
+            depthwise_regularizer=depthwise_regularizer,
+            bias_regularizer=bias_regularizer,
+            activity_regularizer=activity_regularizer,
+            depthwise_constraint=depthwise_constraint,
+            bias_constraint=bias_constraint,
+            **kwargs,
+        )
diff --git a/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/depthwise_conv2d.py b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/depthwise_conv2d.py
new file mode 100644
index 0000000000000000000000000000000000000000..71c950246e03e6563dfcce01d6acd1fe2408cec5
--- /dev/null
+++ b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/depthwise_conv2d.py
@@ -0,0 +1,138 @@
+from keras.src.api_export import keras_export
+from keras.src.layers.convolutional.base_depthwise_conv import BaseDepthwiseConv
+
+
+@keras_export("keras.layers.DepthwiseConv2D")
+class DepthwiseConv2D(BaseDepthwiseConv):
+    """2D depthwise convolution layer.
+
+    Depthwise convolution is a type of convolution in which each input channel
+    is convolved with a different kernel (called a depthwise kernel). You can
+    understand depthwise convolution as the first step in a depthwise separable
+    convolution.
+
+    It is implemented via the following steps:
+
+    - Split the input into individual channels.
+    - Convolve each channel with an individual depthwise kernel with
+      `depth_multiplier` output channels.
+    - Concatenate the convolved outputs along the channels axis.
+
+    Unlike a regular 2D convolution, depthwise convolution does not mix
+    information across different input channels.
+
+    The `depth_multiplier` argument determines how many filters are applied to
+    one input channel. As such, it controls the amount of output channels that
+    are generated per input channel in the depthwise step.
+
+    Args:
+        kernel_size: int or tuple/list of 2 integer, specifying the size of the
+            depthwise convolution window.
+        strides: int or tuple/list of 2 integer, specifying the stride length
+            of the depthwise convolution. `strides > 1` is incompatible with
+            `dilation_rate > 1`.
+        padding: string, either `"valid"` or `"same"` (case-insensitive).
+            `"valid"` means no padding. `"same"` results in padding evenly to
+            the left/right or up/down of the input. When `padding="same"` and
+            `strides=1`, the output has the same size as the input.
+        depth_multiplier: The number of depthwise convolution output channels
+            for each input channel. The total number of depthwise convolution
+            output channels will be equal to `input_channel * depth_multiplier`.
+        data_format: string, either `"channels_last"` or `"channels_first"`.
+            The ordering of the dimensions in the inputs. `"channels_last"`
+            corresponds to inputs with shape `(batch, height, width, channels)`
+            while `"channels_first"` corresponds to inputs with shape
+            `(batch, channels, height, width)`. It defaults to the
+            `image_data_format` value found in your Keras config file
+            at `~/.keras/keras.json`.
+            If you never set it, then it will be `"channels_last"`.
+        dilation_rate: int or tuple/list of 2 integers, specifying the dilation
+            rate to use for dilated convolution.
+        activation: Activation function. If `None`, no activation is applied.
+        use_bias: bool, if `True`, bias will be added to the output.
+        depthwise_initializer: Initializer for the convolution kernel.
+            If `None`, the default initializer (`"glorot_uniform"`)
+            will be used.
+        bias_initializer: Initializer for the bias vector. If `None`, the
+            default initializer (`"zeros"`) will be used.
+        depthwise_regularizer: Optional regularizer for the convolution kernel.
+        bias_regularizer: Optional regularizer for the bias vector.
+        activity_regularizer: Optional regularizer function for the output.
+        depthwise_constraint: Optional projection function to be applied to the
+            kernel after being updated by an `Optimizer` (e.g. used to implement
+            norm constraints or value constraints for layer weights). The
+            function must take as input the unprojected variable and must return
+            the projected variable (which must have the same shape). Constraints
+            are not safe to use when doing asynchronous distributed training.
+        bias_constraint: Optional projection function to be applied to the
+            bias after being updated by an `Optimizer`.
+
+    Input shape:
+
+    - If `data_format="channels_last"`:
+        A 4D tensor with shape: `(batch_size, height, width, channels)`
+    - If `data_format="channels_first"`:
+        A 4D tensor with shape: `(batch_size, channels, height, width)`
+
+    Output shape:
+
+    - If `data_format="channels_last"`:
+        A 4D tensor with shape:
+        `(batch_size, new_height, new_width, channels * depth_multiplier)`
+    - If `data_format="channels_first"`:
+        A 4D tensor with shape:
+        `(batch_size, channels * depth_multiplier, new_height, new_width)`
+
+    Returns:
+        A 4D tensor representing
+        `activation(depthwise_conv2d(inputs, kernel) + bias)`.
+
+    Raises:
+        ValueError: when both `strides > 1` and `dilation_rate > 1`.
+
+    Example:
+
+    >>> x = np.random.rand(4, 10, 10, 12)
+    >>> y = keras.layers.DepthwiseConv2D(kernel_size=3, activation='relu')(x)
+    >>> print(y.shape)
+    (4, 8, 8, 12)
+    """
+
+    def __init__(
+        self,
+        kernel_size,
+        strides=(1, 1),
+        padding="valid",
+        depth_multiplier=1,
+        data_format=None,
+        dilation_rate=(1, 1),
+        activation=None,
+        use_bias=True,
+        depthwise_initializer="glorot_uniform",
+        bias_initializer="zeros",
+        depthwise_regularizer=None,
+        bias_regularizer=None,
+        activity_regularizer=None,
+        depthwise_constraint=None,
+        bias_constraint=None,
+        **kwargs,
+    ):
+        super().__init__(
+            rank=2,
+            depth_multiplier=depth_multiplier,
+            kernel_size=kernel_size,
+            strides=strides,
+            padding=padding,
+            data_format=data_format,
+            dilation_rate=dilation_rate,
+            activation=activation,
+            use_bias=use_bias,
+            depthwise_initializer=depthwise_initializer,
+            bias_initializer=bias_initializer,
+            depthwise_regularizer=depthwise_regularizer,
+            bias_regularizer=bias_regularizer,
+            activity_regularizer=activity_regularizer,
+            depthwise_constraint=depthwise_constraint,
+            bias_constraint=bias_constraint,
+            **kwargs,
+        )
diff --git a/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/separable_conv1d.py b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/separable_conv1d.py
new file mode 100644
index 0000000000000000000000000000000000000000..2f03161981d44db66c49a92532f5e232da17fd60
--- /dev/null
+++ b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/separable_conv1d.py
@@ -0,0 +1,143 @@
+from keras.src.api_export import keras_export
+from keras.src.layers.convolutional.base_separable_conv import BaseSeparableConv
+
+
+@keras_export(
+    [
+        "keras.layers.SeparableConv1D",
+        "keras.layers.SeparableConvolution1D",
+    ]
+)
+class SeparableConv1D(BaseSeparableConv):
+    """1D separable convolution layer.
+
+    This layer performs a depthwise convolution that acts separately on
+    channels, followed by a pointwise convolution that mixes channels.
+    If `use_bias` is True and a bias initializer is provided,
+    it adds a bias vector to the output. It then optionally applies an
+    activation function to produce the final output.
+
+    Args:
+        filters: int, the dimensionality of the output space (i.e. the number
+            of filters in the pointwise convolution).
+        kernel_size: int or tuple/list of 1 integers, specifying the size of the
+            depthwise convolution window.
+        strides: int or tuple/list of 1 integers, specifying the stride length
+            of the depthwise convolution. If only one int is specified, the same
+            stride size will be used for all dimensions. `strides > 1` is
+            incompatible with `dilation_rate > 1`.
+        padding: string, either `"valid"` or `"same"` (case-insensitive).
+            `"valid"` means no padding. `"same"` results in padding evenly to
+            the left/right or up/down of the input. When `padding="same"` and
+            `strides=1`, the output has the same size as the input.
+        data_format: string, either `"channels_last"` or `"channels_first"`.
+            The ordering of the dimensions in the inputs. `"channels_last"`
+            corresponds to inputs with shape `(batch, steps, features)`
+            while `"channels_first"` corresponds to inputs with shape
+            `(batch, features, steps)`. It defaults to the `image_data_format`
+            value found in your Keras config file at `~/.keras/keras.json`.
+            If you never set it, then it will be `"channels_last"`.
+        dilation_rate: int or tuple/list of 1 integers, specifying the dilation
+            rate to use for dilated convolution. If only one int is specified,
+            the same dilation rate will be used for all dimensions.
+        depth_multiplier: The number of depthwise convolution output channels
+            for each input channel. The total number of depthwise convolution
+            output channels will be equal to `input_channel * depth_multiplier`.
+        activation: Activation function. If `None`, no activation is applied.
+        use_bias: bool, if `True`, bias will be added to the output.
+        depthwise_initializer: An initializer for the depthwise convolution
+            kernel. If None, then the default initializer (`"glorot_uniform"`)
+            will be used.
+        pointwise_initializer: An initializer for the pointwise convolution
+            kernel. If None, then the default initializer (`"glorot_uniform"`)
+            will be used.
+        bias_initializer: An initializer for the bias vector. If None, the
+            default initializer ('"zeros"') will be used.
+        depthwise_regularizer: Optional regularizer for the depthwise
+            convolution kernel.
+        pointwise_regularizer: Optional regularizer for the pointwise
+            convolution kernel.
+        bias_regularizer: Optional regularizer for the bias vector.
+        activity_regularizer: Optional regularizer function for the output.
+        depthwise_constraint: Optional projection function to be applied to the
+            depthwise kernel after being updated by an `Optimizer` (e.g. used
+            for norm constraints or value constraints for layer weights). The
+            function must take as input the unprojected variable and must return
+            the projected variable (which must have the same shape).
+        pointwise_constraint: Optional projection function to be applied to the
+            pointwise kernel after being updated by an `Optimizer`.
+        bias_constraint: Optional projection function to be applied to the
+            bias after being updated by an `Optimizer`.
+
+    Input shape:
+
+    - If `data_format="channels_last"`:
+        A 3D tensor with shape: `(batch_shape, steps, channels)`
+    - If `data_format="channels_first"`:
+        A 3D tensor with shape: `(batch_shape, channels, steps)`
+
+    Output shape:
+
+    - If `data_format="channels_last"`:
+        A 3D tensor with shape: `(batch_shape, new_steps, filters)`
+    - If `data_format="channels_first"`:
+        A 3D tensor with shape: `(batch_shape, filters, new_steps)`
+
+    Returns:
+        A 3D tensor representing
+        `activation(separable_conv1d(inputs, kernel) + bias)`.
+
+    Example:
+
+    >>> x = np.random.rand(4, 10, 12)
+    >>> y = keras.layers.SeparableConv1D(3, 4, 3, 2, activation='relu')(x)
+    >>> print(y.shape)
+    (4, 4, 4)
+    """
+
+    def __init__(
+        self,
+        filters,
+        kernel_size,
+        strides=1,
+        padding="valid",
+        data_format=None,
+        dilation_rate=1,
+        depth_multiplier=1,
+        activation=None,
+        use_bias=True,
+        depthwise_initializer="glorot_uniform",
+        pointwise_initializer="glorot_uniform",
+        bias_initializer="zeros",
+        depthwise_regularizer=None,
+        pointwise_regularizer=None,
+        bias_regularizer=None,
+        activity_regularizer=None,
+        depthwise_constraint=None,
+        pointwise_constraint=None,
+        bias_constraint=None,
+        **kwargs,
+    ):
+        super().__init__(
+            rank=1,
+            depth_multiplier=depth_multiplier,
+            filters=filters,
+            kernel_size=kernel_size,
+            strides=strides,
+            padding=padding,
+            data_format=data_format,
+            dilation_rate=dilation_rate,
+            activation=activation,
+            use_bias=use_bias,
+            depthwise_initializer=depthwise_initializer,
+            pointwise_initializer=pointwise_initializer,
+            bias_initializer=bias_initializer,
+            depthwise_regularizer=depthwise_regularizer,
+            pointwise_regularizer=pointwise_regularizer,
+            bias_regularizer=bias_regularizer,
+            activity_regularizer=activity_regularizer,
+            depthwise_constraint=depthwise_constraint,
+            pointwise_constraint=pointwise_constraint,
+            bias_constraint=bias_constraint,
+            **kwargs,
+        )
diff --git a/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/separable_conv2d.py b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/separable_conv2d.py
new file mode 100644
index 0000000000000000000000000000000000000000..27c1548231dd1b42c22c8394af6d8a4ac8429795
--- /dev/null
+++ b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/convolutional/separable_conv2d.py
@@ -0,0 +1,144 @@
+from keras.src.api_export import keras_export
+from keras.src.layers.convolutional.base_separable_conv import BaseSeparableConv
+
+
+@keras_export(
+    [
+        "keras.layers.SeparableConv2D",
+        "keras.layers.SeparableConvolution2D",
+    ]
+)
+class SeparableConv2D(BaseSeparableConv):
+    """2D separable convolution layer.
+
+    This layer performs a depthwise convolution that acts separately on
+    channels, followed by a pointwise convolution that mixes channels.
+    If `use_bias` is True and a bias initializer is provided,
+    it adds a bias vector to the output. It then optionally applies an
+    activation function to produce the final output.
+
+    Args:
+        filters: int, the dimensionality of the output space (i.e. the number
+            of filters in the pointwise convolution).
+        kernel_size: int or tuple/list of 2 integers, specifying the size of the
+            depthwise convolution window.
+        strides: int or tuple/list of 2 integers, specifying the stride length
+            of the depthwise convolution. If only one int is specified, the same
+            stride size will be used for all dimensions. `strides > 1` is
+            incompatible with `dilation_rate > 1`.
+        padding: string, either `"valid"` or `"same"` (case-insensitive).
+            `"valid"` means no padding. `"same"` results in padding evenly to
+            the left/right or up/down of the input. When `padding="same"` and
+            `strides=1`, the output has the same size as the input.
+        data_format: string, either `"channels_last"` or `"channels_first"`.
+            The ordering of the dimensions in the inputs. `"channels_last"`
+            corresponds to inputs with shape `(batch, height, width, channels)`
+            while `"channels_first"` corresponds to inputs with shape
+            `(batch, channels, height, width)`. It defaults to the
+            `image_data_format` value found in your Keras config file
+            at `~/.keras/keras.json`.
+            If you never set it, then it will be `"channels_last"`.
+        dilation_rate: int or tuple/list of 2 integers, specifying the dilation
+            rate to use for dilated convolution. If only one int is specified,
+            the same dilation rate will be used for all dimensions.
+        depth_multiplier: The number of depthwise convolution output channels
+            for each input channel. The total number of depthwise convolution
+            output channels will be equal to `input_channel * depth_multiplier`.
+        activation: Activation function. If `None`, no activation is applied.
+        use_bias: bool, if `True`, bias will be added to the output.
+        depthwise_initializer: An initializer for the depthwise convolution
+            kernel. If None, then the default initializer (`"glorot_uniform"`)
+            will be used.
+        pointwise_initializer: An initializer for the pointwise convolution
+            kernel. If None, then the default initializer (`"glorot_uniform"`)
+            will be used.
+        bias_initializer: An initializer for the bias vector. If None, the
+            default initializer ('"zeros"') will be used.
+        depthwise_regularizer: Optional regularizer for the depthwise
+            convolution kernel.
+        pointwise_regularizer: Optional regularizer for the pointwise
+            convolution kernel.
+        bias_regularizer: Optional regularizer for the bias vector.
+        activity_regularizer: Optional regularizer function for the output.
+        depthwise_constraint: Optional projection function to be applied to the
+            depthwise kernel after being updated by an `Optimizer` (e.g. used
+            for norm constraints or value constraints for layer weights). The
+            function must take as input the unprojected variable and must return
+            the projected variable (which must have the same shape).
+        pointwise_constraint: Optional projection function to be applied to the
+            pointwise kernel after being updated by an `Optimizer`.
+        bias_constraint: Optional projection function to be applied to the
+            bias after being updated by an `Optimizer`.
+
+    Input shape:
+
+    - If `data_format="channels_last"`:
+        A 4D tensor with shape: `(batch_size, height, width, channels)`
+    - If `data_format="channels_first"`:
+        A 4D tensor with shape: `(batch_size, channels, height, width)`
+
+    Output shape:
+
+    - If `data_format="channels_last"`:
+        A 4D tensor with shape: `(batch_size, new_height, new_width, filters)`
+    - If `data_format="channels_first"`:
+        A 4D tensor with shape: `(batch_size, filters, new_height, new_width)`
+
+    Returns:
+        A 4D tensor representing
+        `activation(separable_conv2d(inputs, kernel) + bias)`.
+
+    Example:
+
+    >>> x = np.random.rand(4, 10, 10, 12)
+    >>> y = keras.layers.SeparableConv2D(3, 4, 3, 2, activation='relu')(x)
+    >>> print(y.shape)
+    (4, 4, 4, 4)
+    """
+
+    def __init__(
+        self,
+        filters,
+        kernel_size,
+        strides=(1, 1),
+        padding="valid",
+        data_format=None,
+        dilation_rate=(1, 1),
+        depth_multiplier=1,
+        activation=None,
+        use_bias=True,
+        depthwise_initializer="glorot_uniform",
+        pointwise_initializer="glorot_uniform",
+        bias_initializer="zeros",
+        depthwise_regularizer=None,
+        pointwise_regularizer=None,
+        bias_regularizer=None,
+        activity_regularizer=None,
+        depthwise_constraint=None,
+        pointwise_constraint=None,
+        bias_constraint=None,
+        **kwargs,
+    ):
+        super().__init__(
+            rank=2,
+            depth_multiplier=depth_multiplier,
+            filters=filters,
+            kernel_size=kernel_size,
+            strides=strides,
+            padding=padding,
+            data_format=data_format,
+            dilation_rate=dilation_rate,
+            activation=activation,
+            use_bias=use_bias,
+            depthwise_initializer=depthwise_initializer,
+            pointwise_initializer=pointwise_initializer,
+            bias_initializer=bias_initializer,
+            depthwise_regularizer=depthwise_regularizer,
+            pointwise_regularizer=pointwise_regularizer,
+            bias_regularizer=bias_regularizer,
+            activity_regularizer=activity_regularizer,
+            depthwise_constraint=depthwise_constraint,
+            pointwise_constraint=pointwise_constraint,
+            bias_constraint=bias_constraint,
+            **kwargs,
+        )
diff --git a/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/core/__init__.py b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/core/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/core/__pycache__/__init__.cpython-310.pyc b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/core/__pycache__/__init__.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..003afbdde642d1fba7b80a3209d053904bb3e543
Binary files /dev/null and b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/core/__pycache__/__init__.cpython-310.pyc differ
diff --git a/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/core/__pycache__/dense.cpython-310.pyc b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/core/__pycache__/dense.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..40ed4c8f8553bad909b839070a596db72ba1dfd5
Binary files /dev/null and b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/core/__pycache__/dense.cpython-310.pyc differ
diff --git a/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/core/__pycache__/einsum_dense.cpython-310.pyc b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/core/__pycache__/einsum_dense.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..11cb8b9c25a13adb1d0b5c6a572c437fd0787893
Binary files /dev/null and b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/core/__pycache__/einsum_dense.cpython-310.pyc differ
diff --git a/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/core/__pycache__/embedding.cpython-310.pyc b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/core/__pycache__/embedding.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..4d8e11c89aacc56bcca5eddea204537c44bc0dee
Binary files /dev/null and b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/core/__pycache__/embedding.cpython-310.pyc differ
diff --git a/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/core/__pycache__/identity.cpython-310.pyc b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/core/__pycache__/identity.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..f112355f2731f01daa79bf46381109322a5f34a8
Binary files /dev/null and b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/core/__pycache__/identity.cpython-310.pyc differ
diff --git a/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/core/__pycache__/input_layer.cpython-310.pyc b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/core/__pycache__/input_layer.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..4b73147d65106c389527a24813f2b1b0690f32c2
Binary files /dev/null and b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/core/__pycache__/input_layer.cpython-310.pyc differ
diff --git a/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/core/__pycache__/lambda_layer.cpython-310.pyc b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/core/__pycache__/lambda_layer.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..00c25823fcd0231bcbd92a36f6df76bf650fadb3
Binary files /dev/null and b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/core/__pycache__/lambda_layer.cpython-310.pyc differ
diff --git a/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/core/__pycache__/masking.cpython-310.pyc b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/core/__pycache__/masking.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..8a64ca0cff3828ce94fb6b780794a93c6728fe68
Binary files /dev/null and b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/core/__pycache__/masking.cpython-310.pyc differ
diff --git a/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/core/__pycache__/wrapper.cpython-310.pyc b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/core/__pycache__/wrapper.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..35f3fc28d3ddf821ab5488d3fbb4f648d000143c
Binary files /dev/null and b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/core/__pycache__/wrapper.cpython-310.pyc differ
diff --git a/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/core/dense.py b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/core/dense.py
new file mode 100644
index 0000000000000000000000000000000000000000..21063a38272546c8d8605c66bc46a22308fdd94a
--- /dev/null
+++ b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/core/dense.py
@@ -0,0 +1,555 @@
+import ml_dtypes
+
+from keras.src import activations
+from keras.src import constraints
+from keras.src import dtype_policies
+from keras.src import initializers
+from keras.src import ops
+from keras.src import quantizers
+from keras.src import regularizers
+from keras.src.api_export import keras_export
+from keras.src.layers.input_spec import InputSpec
+from keras.src.layers.layer import Layer
+
+
+@keras_export("keras.layers.Dense")
+class Dense(Layer):
+    """Just your regular densely-connected NN layer.
+
+    `Dense` implements the operation:
+    `output = activation(dot(input, kernel) + bias)`
+    where `activation` is the element-wise activation function
+    passed as the `activation` argument, `kernel` is a weights matrix
+    created by the layer, and `bias` is a bias vector created by the layer
+    (only applicable if `use_bias` is `True`).
+
+    Note: If the input to the layer has a rank greater than 2, `Dense`
+    computes the dot product between the `inputs` and the `kernel` along the
+    last axis of the `inputs` and axis 0 of the `kernel` (using `tf.tensordot`).
+    For example, if input has dimensions `(batch_size, d0, d1)`, then we create
+    a `kernel` with shape `(d1, units)`, and the `kernel` operates along axis 2
+    of the `input`, on every sub-tensor of shape `(1, 1, d1)` (there are
+    `batch_size * d0` such sub-tensors). The output in this case will have
+    shape `(batch_size, d0, units)`.
+
+    Args:
+        units: Positive integer, dimensionality of the output space.
+        activation: Activation function to use.
+            If you don't specify anything, no activation is applied
+            (ie. "linear" activation: `a(x) = x`).
+        use_bias: Boolean, whether the layer uses a bias vector.
+        kernel_initializer: Initializer for the `kernel` weights matrix.
+        bias_initializer: Initializer for the bias vector.
+        kernel_regularizer: Regularizer function applied to
+            the `kernel` weights matrix.
+        bias_regularizer: Regularizer function applied to the bias vector.
+        activity_regularizer: Regularizer function applied to
+            the output of the layer (its "activation").
+        kernel_constraint: Constraint function applied to
+            the `kernel` weights matrix.
+        bias_constraint: Constraint function applied to the bias vector.
+        lora_rank: Optional integer. If set, the layer's forward pass
+            will implement LoRA (Low-Rank Adaptation)
+            with the provided rank. LoRA sets the layer's kernel
+            to non-trainable and replaces it with a delta over the
+            original kernel, obtained via multiplying two lower-rank
+            trainable matrices. This can be useful to reduce the
+            computation cost of fine-tuning large dense layers.
+            You can also enable LoRA on an existing
+            `Dense` layer by calling `layer.enable_lora(rank)`.
+
+    Input shape:
+        N-D tensor with shape: `(batch_size, ..., input_dim)`.
+        The most common situation would be
+        a 2D input with shape `(batch_size, input_dim)`.
+
+    Output shape:
+        N-D tensor with shape: `(batch_size, ..., units)`.
+        For instance, for a 2D input with shape `(batch_size, input_dim)`,
+        the output would have shape `(batch_size, units)`.
+    """
+
+    def __init__(
+        self,
+        units,
+        activation=None,
+        use_bias=True,
+        kernel_initializer="glorot_uniform",
+        bias_initializer="zeros",
+        kernel_regularizer=None,
+        bias_regularizer=None,
+        activity_regularizer=None,
+        kernel_constraint=None,
+        bias_constraint=None,
+        lora_rank=None,
+        **kwargs,
+    ):
+        super().__init__(activity_regularizer=activity_regularizer, **kwargs)
+        self.units = units
+        self.activation = activations.get(activation)
+        self.use_bias = use_bias
+        self.kernel_initializer = initializers.get(kernel_initializer)
+        self.bias_initializer = initializers.get(bias_initializer)
+        self.kernel_regularizer = regularizers.get(kernel_regularizer)
+        self.bias_regularizer = regularizers.get(bias_regularizer)
+        self.kernel_constraint = constraints.get(kernel_constraint)
+        self.bias_constraint = constraints.get(bias_constraint)
+        self.lora_rank = lora_rank
+        self.lora_enabled = False
+        self.input_spec = InputSpec(min_ndim=2)
+        self.supports_masking = True
+
+    def build(self, input_shape):
+        input_dim = input_shape[-1]
+        if self.quantization_mode:
+            self.quantized_build(input_shape, mode=self.quantization_mode)
+        if self.quantization_mode != "int8":
+            # If the layer is quantized to int8, `self._kernel` will be added
+            # in `self._int8_build`. Therefore, we skip it here.
+            self._kernel = self.add_weight(
+                name="kernel",
+                shape=(input_dim, self.units),
+                initializer=self.kernel_initializer,
+                regularizer=self.kernel_regularizer,
+                constraint=self.kernel_constraint,
+            )
+        if self.use_bias:
+            self.bias = self.add_weight(
+                name="bias",
+                shape=(self.units,),
+                initializer=self.bias_initializer,
+                regularizer=self.bias_regularizer,
+                constraint=self.bias_constraint,
+            )
+        else:
+            self.bias = None
+        self.input_spec = InputSpec(min_ndim=2, axes={-1: input_dim})
+        self.built = True
+        if self.lora_rank:
+            self.enable_lora(self.lora_rank)
+
+    @property
+    def kernel(self):
+        if not self.built:
+            raise AttributeError(
+                "You must build the layer before accessing `kernel`."
+            )
+        if self.lora_enabled:
+            return self._kernel + ops.matmul(
+                self.lora_kernel_a, self.lora_kernel_b
+            )
+        return self._kernel
+
+    def call(self, inputs, training=None):
+        x = ops.matmul(inputs, self.kernel)
+        if self.bias is not None:
+            x = ops.add(x, self.bias)
+        if self.activation is not None:
+            x = self.activation(x)
+        return x
+
+    def compute_output_shape(self, input_shape):
+        output_shape = list(input_shape)
+        output_shape[-1] = self.units
+        return tuple(output_shape)
+
+    def enable_lora(
+        self, rank, a_initializer="he_uniform", b_initializer="zeros"
+    ):
+        if self.kernel_constraint:
+            raise ValueError(
+                "Lora is incompatible with kernel constraints. "
+                "In order to enable lora on this layer, remove the "
+                "`kernel_constraint` argument."
+            )
+        if not self.built:
+            raise ValueError(
+                "Cannot enable lora on a layer that isn't yet built."
+            )
+        if self.lora_enabled:
+            raise ValueError(
+                "lora is already enabled. "
+                "This can only be done once per layer."
+            )
+        self._tracker.unlock()
+        self.lora_kernel_a = self.add_weight(
+            name="lora_kernel_a",
+            shape=(self.kernel.shape[0], rank),
+            initializer=initializers.get(a_initializer),
+            regularizer=self.kernel_regularizer,
+        )
+        self.lora_kernel_b = self.add_weight(
+            name="lora_kernel_b",
+            shape=(rank, self.kernel.shape[1]),
+            initializer=initializers.get(b_initializer),
+            regularizer=self.kernel_regularizer,
+        )
+        self._kernel.trainable = False
+        self._tracker.lock()
+        self.lora_enabled = True
+        self.lora_rank = rank
+
+    def save_own_variables(self, store):
+        # Do nothing if the layer isn't yet built
+        if not self.built:
+            return
+        # The keys of the `store` will be saved as determined because the
+        # default ordering will change after quantization
+        kernel_value, kernel_scale = self._get_kernel_with_merged_lora()
+        target_variables = [kernel_value]
+        if self.use_bias:
+            target_variables.append(self.bias)
+        if self.quantization_mode is not None:
+            if self.quantization_mode == "int8":
+                target_variables.append(kernel_scale)
+            elif self.quantization_mode == "float8":
+                target_variables.append(self.inputs_scale)
+                target_variables.append(self.inputs_amax_history)
+                target_variables.append(self.kernel_scale)
+                target_variables.append(self.kernel_amax_history)
+                target_variables.append(self.outputs_grad_scale)
+                target_variables.append(self.outputs_grad_amax_history)
+            else:
+                raise self._quantization_mode_error(self.quantization_mode)
+        for i, variable in enumerate(target_variables):
+            store[str(i)] = variable
+
+    def load_own_variables(self, store):
+        if not self.lora_enabled:
+            self._check_load_own_variables(store)
+        # Do nothing if the layer isn't yet built
+        if not self.built:
+            return
+        # The keys of the `store` will be saved as determined because the
+        # default ordering will change after quantization
+        target_variables = [self._kernel]
+        if self.use_bias:
+            target_variables.append(self.bias)
+        if self.quantization_mode is not None:
+            if self.quantization_mode == "int8":
+                target_variables.append(self.kernel_scale)
+            elif self.quantization_mode == "float8":
+                target_variables.append(self.inputs_scale)
+                target_variables.append(self.inputs_amax_history)
+                target_variables.append(self.kernel_scale)
+                target_variables.append(self.kernel_amax_history)
+                target_variables.append(self.outputs_grad_scale)
+                target_variables.append(self.outputs_grad_amax_history)
+            else:
+                raise self._quantization_mode_error(self.quantization_mode)
+        for i, variable in enumerate(target_variables):
+            variable.assign(store[str(i)])
+        if self.lora_enabled:
+            self.lora_kernel_a.assign(ops.zeros(self.lora_kernel_a.shape))
+            self.lora_kernel_b.assign(ops.zeros(self.lora_kernel_b.shape))
+
+    def get_config(self):
+        base_config = super().get_config()
+        config = {
+            "units": self.units,
+            "activation": activations.serialize(self.activation),
+            "use_bias": self.use_bias,
+            "kernel_initializer": initializers.serialize(
+                self.kernel_initializer
+            ),
+            "bias_initializer": initializers.serialize(self.bias_initializer),
+            "kernel_regularizer": regularizers.serialize(
+                self.kernel_regularizer
+            ),
+            "bias_regularizer": regularizers.serialize(self.bias_regularizer),
+            "kernel_constraint": constraints.serialize(self.kernel_constraint),
+            "bias_constraint": constraints.serialize(self.bias_constraint),
+        }
+        if self.lora_rank:
+            config["lora_rank"] = self.lora_rank
+        return {**base_config, **config}
+
+    def _check_load_own_variables(self, store):
+        all_vars = self._trainable_variables + self._non_trainable_variables
+        if len(store.keys()) != len(all_vars):
+            if len(all_vars) == 0 and not self.built:
+                raise ValueError(
+                    f"Layer '{self.name}' was never built "
+                    "and thus it doesn't have any variables. "
+                    f"However the weights file lists {len(store.keys())} "
+                    "variables for this layer.\n"
+                    "In most cases, this error indicates that either:\n\n"
+                    "1. The layer is owned by a parent layer that "
+                    "implements a `build()` method, but calling the "
+                    "parent's `build()` method did NOT create the state of "
+                    f"the child layer '{self.name}'. A `build()` method "
+                    "must create ALL state for the layer, including "
+                    "the state of any children layers.\n\n"
+                    "2. You need to implement "
+                    "the `def build_from_config(self, config)` method "
+                    f"on layer '{self.name}', to specify how to rebuild "
+                    "it during loading. "
+                    "In this case, you might also want to implement the "
+                    "method that generates the build config at saving time, "
+                    "`def get_build_config(self)`. "
+                    "The method `build_from_config()` is meant "
+                    "to create the state "
+                    "of the layer (i.e. its variables) upon deserialization.",
+                )
+            raise ValueError(
+                f"Layer '{self.name}' expected {len(all_vars)} variables, "
+                "but received "
+                f"{len(store.keys())} variables during loading. "
+                f"Expected: {[v.name for v in all_vars]}"
+            )
+
+    # Quantization-related (int8 and float8) methods
+
+    def quantized_build(self, input_shape, mode):
+        if mode == "int8":
+            input_dim = input_shape[-1]
+            kernel_shape = (input_dim, self.units)
+            self._int8_build(kernel_shape)
+        elif mode == "float8":
+            self._float8_build()
+        else:
+            raise self._quantization_mode_error(mode)
+
+    def _int8_build(
+        self,
+        kernel_shape,
+        kernel_initializer="zeros",
+        kernel_scale_initializer="ones",
+    ):
+        self.inputs_quantizer = quantizers.AbsMaxQuantizer(axis=-1)
+        self._kernel = self.add_weight(
+            name="kernel",
+            shape=kernel_shape,
+            initializer=kernel_initializer,
+            dtype="int8",
+            trainable=False,
+        )
+        self.kernel_scale = self.add_weight(
+            name="kernel_scale",
+            shape=(self.units,),
+            initializer=kernel_scale_initializer,
+            trainable=False,
+        )
+        self._is_quantized = True
+
+    def _float8_build(self):
+        from keras.src.dtype_policies import QuantizedFloat8DTypePolicy
+
+        # If `self.dtype_policy` is not QuantizedFloat8DTypePolicy, then set
+        # `amax_history_length` to its default value.
+        amax_history_length = getattr(
+            self.dtype_policy,
+            "amax_history_length",
+            QuantizedFloat8DTypePolicy.default_amax_history_length,
+        )
+        # We set `trainable=True` because we will use the gradients to overwrite
+        # these variables
+        scale_kwargs = {
+            "shape": (),
+            "initializer": "ones",
+            "dtype": "float32",  # Always be float32
+            "trainable": True,
+            "autocast": False,
+        }
+        amax_history_kwargs = {
+            "shape": (amax_history_length,),
+            "initializer": "zeros",
+            "dtype": "float32",  # Always be float32
+            "trainable": True,
+            "autocast": False,
+        }
+        self.inputs_scale = self.add_weight(name="inputs_scale", **scale_kwargs)
+        self.inputs_amax_history = self.add_weight(
+            name="inputs_amax_history", **amax_history_kwargs
+        )
+        self.kernel_scale = self.add_weight(name="kernel_scale", **scale_kwargs)
+        self.kernel_amax_history = self.add_weight(
+            name="kernel_amax_history", **amax_history_kwargs
+        )
+        self.outputs_grad_scale = self.add_weight(
+            name="outputs_grad_scale", **scale_kwargs
+        )
+        self.outputs_grad_amax_history = self.add_weight(
+            name="outputs_grad_amax_history", **amax_history_kwargs
+        )
+        # We need to set `overwrite_with_gradient=True` to instruct the
+        # optimizer to directly overwrite these variables with their computed
+        # gradients during training
+        self.inputs_scale.overwrite_with_gradient = True
+        self.inputs_amax_history.overwrite_with_gradient = True
+        self.kernel_scale.overwrite_with_gradient = True
+        self.kernel_amax_history.overwrite_with_gradient = True
+        self.outputs_grad_scale.overwrite_with_gradient = True
+        self.outputs_grad_amax_history.overwrite_with_gradient = True
+        self._is_quantized = True
+
+    def _int8_call(self, inputs, training=None):
+        @ops.custom_gradient
+        def matmul_with_inputs_gradient(inputs, kernel, kernel_scale):
+            def grad_fn(*args, upstream=None):
+                if upstream is None:
+                    (upstream,) = args
+                float_kernel = ops.divide(
+                    ops.cast(kernel, dtype=self.compute_dtype),
+                    kernel_scale,
+                )
+                inputs_grad = ops.matmul(upstream, ops.transpose(float_kernel))
+                return (inputs_grad, None, None)
+
+            inputs, inputs_scale = self.inputs_quantizer(inputs)
+            x = ops.matmul(inputs, kernel)
+            # De-scale outputs
+            x = ops.cast(x, self.compute_dtype)
+            x = ops.divide(x, ops.multiply(inputs_scale, kernel_scale))
+            return x, grad_fn
+
+        x = matmul_with_inputs_gradient(
+            inputs,
+            ops.convert_to_tensor(self._kernel),
+            ops.convert_to_tensor(self.kernel_scale),
+        )
+        if self.lora_enabled:
+            lora_x = ops.matmul(inputs, self.lora_kernel_a)
+            lora_x = ops.matmul(lora_x, self.lora_kernel_b)
+            x = ops.add(x, lora_x)
+        if self.bias is not None:
+            x = ops.add(x, self.bias)
+        if self.activation is not None:
+            x = self.activation(x)
+        return x
+
+    def _float8_call(self, inputs, training=None):
+        if self.lora_enabled:
+            raise NotImplementedError(
+                "Currently, `_float8_call` doesn't support LoRA"
+            )
+
+        @ops.custom_gradient
+        def quantized_dequantize_inputs(inputs, scale, amax_history):
+            if training:
+                new_scale = quantizers.compute_float8_scale(
+                    ops.max(amax_history, axis=0),
+                    scale,
+                    ops.cast(
+                        float(ml_dtypes.finfo("float8_e4m3fn").max), "float32"
+                    ),
+                )
+                new_amax_history = quantizers.compute_float8_amax_history(
+                    inputs, amax_history
+                )
+            else:
+                new_scale = None
+                new_amax_history = None
+            qdq_inputs = quantizers.quantize_and_dequantize(
+                inputs, scale, "float8_e4m3fn", self.compute_dtype
+            )
+
+            def grad(*args, upstream=None, variables=None):
+                if upstream is None:
+                    (upstream,) = args
+                return upstream, new_scale, new_amax_history
+
+            return qdq_inputs, grad
+
+        @ops.custom_gradient
+        def quantized_dequantize_outputs(outputs, scale, amax_history):
+            """Quantize-dequantize the output gradient but not the output."""
+
+            def grad(*args, upstream=None, variables=None):
+                if upstream is None:
+                    (upstream,) = args
+                new_scale = quantizers.compute_float8_scale(
+                    ops.max(amax_history, axis=0),
+                    scale,
+                    ops.cast(
+                        float(ml_dtypes.finfo("float8_e5m2").max), "float32"
+                    ),
+                )
+                qdq_upstream = quantizers.quantize_and_dequantize(
+                    upstream, scale, "float8_e5m2", self.compute_dtype
+                )
+                new_amax_history = quantizers.compute_float8_amax_history(
+                    upstream, amax_history
+                )
+                return qdq_upstream, new_scale, new_amax_history
+
+            return outputs, grad
+
+        x = ops.matmul(
+            quantized_dequantize_inputs(
+                inputs,
+                ops.convert_to_tensor(self.inputs_scale),
+                ops.convert_to_tensor(self.inputs_amax_history),
+            ),
+            quantized_dequantize_inputs(
+                ops.convert_to_tensor(self._kernel),
+                ops.convert_to_tensor(self.kernel_scale),
+                ops.convert_to_tensor(self.kernel_amax_history),
+            ),
+        )
+        # `quantized_dequantize_outputs` is placed immediately after
+        # `ops.matmul` for the sake of pattern matching in gemm_rewrite. That
+        # way, the qdq will be adjacent to the corresponding matmul_bprop in the
+        # bprop.
+        x = quantized_dequantize_outputs(
+            x,
+            ops.convert_to_tensor(self.outputs_grad_scale),
+            ops.convert_to_tensor(self.outputs_grad_amax_history),
+        )
+        if self.bias is not None:
+            # Under non-mixed precision cases, F32 bias has to be converted to
+            # BF16 first to get the biasAdd fusion support. ref. PR
+            # https://github.com/tensorflow/tensorflow/pull/60306
+            bias = self.bias
+            if self.dtype_policy.compute_dtype == "float32":
+                bias_bf16 = ops.cast(bias, "bfloat16")
+                bias = ops.cast(bias_bf16, bias.dtype)
+            x = ops.add(x, bias)
+        if self.activation is not None:
+            x = self.activation(x)
+        return x
+
+    def quantize(self, mode, type_check=True):
+        # Prevent quantization of the subclasses
+        if type_check and (type(self) is not Dense):
+            raise self._not_implemented_error(self.quantize)
+
+        if mode == "int8":
+            # Quantize `self._kernel` to int8 and compute corresponding scale
+            kernel_value, kernel_scale = quantizers.abs_max_quantize(
+                self._kernel, axis=0, to_numpy=True
+            )
+            kernel_scale = ops.squeeze(kernel_scale, axis=0)
+            kernel_shape = tuple(self._kernel.shape)
+            del self._kernel
+            # Utilize a lambda expression as an initializer to prevent adding a
+            # large constant to the computation graph.
+            self._int8_build(kernel_shape, kernel_value, kernel_scale)
+        elif mode == "float8":
+            self._float8_build()
+        else:
+            raise self._quantization_mode_error(mode)
+
+        # Set new dtype policy
+        if self.dtype_policy.quantization_mode is None:
+            policy = dtype_policies.get(f"{mode}_from_{self.dtype_policy.name}")
+            self.dtype_policy = policy
+
+    def _get_kernel_with_merged_lora(self):
+        if self.dtype_policy.quantization_mode is not None:
+            kernel_value = self._kernel
+            kernel_scale = self.kernel_scale
+            if self.lora_enabled:
+                # Dequantize & quantize to merge lora weights into int8 kernel
+                # Note that this is a lossy compression
+                kernel_value = ops.divide(kernel_value, kernel_scale)
+                kernel_value = ops.add(
+                    kernel_value,
+                    ops.matmul(self.lora_kernel_a, self.lora_kernel_b),
+                )
+                kernel_value, kernel_scale = quantizers.abs_max_quantize(
+                    kernel_value, axis=0, to_numpy=True
+                )
+                kernel_scale = ops.squeeze(kernel_scale, axis=0)
+            return kernel_value, kernel_scale
+        return self.kernel, None
diff --git a/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/core/einsum_dense.py b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/core/einsum_dense.py
new file mode 100644
index 0000000000000000000000000000000000000000..1600ae59b62e7d94cf1570b970b001bb6f36865b
--- /dev/null
+++ b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/core/einsum_dense.py
@@ -0,0 +1,1008 @@
+import re
+import string
+
+import ml_dtypes
+import numpy as np
+
+from keras.src import activations
+from keras.src import constraints
+from keras.src import dtype_policies
+from keras.src import initializers
+from keras.src import ops
+from keras.src import quantizers
+from keras.src import regularizers
+from keras.src.api_export import keras_export
+from keras.src.layers.input_spec import InputSpec
+from keras.src.layers.layer import Layer
+
+
+@keras_export("keras.layers.EinsumDense")
+class EinsumDense(Layer):
+    """A layer that uses `einsum` as the backing computation.
+
+    This layer can perform einsum calculations of arbitrary dimensionality.
+
+    Args:
+        equation: An equation describing the einsum to perform.
+            This equation must be a valid einsum string of the form
+            `ab,bc->ac`, `...ab,bc->...ac`, or
+            `ab...,bc->ac...` where 'ab', 'bc', and 'ac' can be any valid einsum
+            axis expression sequence.
+        output_shape: The expected shape of the output tensor
+            (excluding the batch dimension and any dimensions
+            represented by ellipses). You can specify `None` for any dimension
+            that is unknown or can be inferred from the input shape.
+        activation: Activation function to use. If you don't specify anything,
+            no activation is applied
+            (that is, a "linear" activation: `a(x) = x`).
+        bias_axes: A string containing the output dimension(s)
+            to apply a bias to. Each character in the `bias_axes` string
+            should correspond to a character in the output portion
+            of the `equation` string.
+        kernel_initializer: Initializer for the `kernel` weights matrix.
+        bias_initializer: Initializer for the bias vector.
+        kernel_regularizer: Regularizer function applied to the `kernel` weights
+            matrix.
+        bias_regularizer: Regularizer function applied to the bias vector.
+        kernel_constraint: Constraint function applied to the `kernel` weights
+            matrix.
+        bias_constraint: Constraint function applied to the bias vector.
+        lora_rank: Optional integer. If set, the layer's forward pass
+            will implement LoRA (Low-Rank Adaptation)
+            with the provided rank. LoRA sets the layer's kernel
+            to non-trainable and replaces it with a delta over the
+            original kernel, obtained via multiplying two lower-rank
+            trainable matrices
+            (the factorization happens on the last dimension).
+            This can be useful to reduce the
+            computation cost of fine-tuning large dense layers.
+            You can also enable LoRA on an existing
+            `EinsumDense` layer by calling `layer.enable_lora(rank)`.
+        **kwargs: Base layer keyword arguments, such as `name` and `dtype`.
+
+    Examples:
+
+    **Biased dense layer with einsums**
+
+    This example shows how to instantiate a standard Keras dense layer using
+    einsum operations. This example is equivalent to
+    `keras.layers.Dense(64, use_bias=True)`.
+
+    >>> layer = keras.layers.EinsumDense("ab,bc->ac",
+    ...                                       output_shape=64,
+    ...                                       bias_axes="c")
+    >>> input_tensor = keras.Input(shape=[32])
+    >>> output_tensor = layer(input_tensor)
+    >>> output_tensor.shape
+    (None, 64)
+
+    **Applying a dense layer to a sequence**
+
+    This example shows how to instantiate a layer that applies the same dense
+    operation to every element in a sequence. Here, the `output_shape` has two
+    values (since there are two non-batch dimensions in the output); the first
+    dimension in the `output_shape` is `None`, because the sequence dimension
+    `b` has an unknown shape.
+
+    >>> layer = keras.layers.EinsumDense("abc,cd->abd",
+    ...                                       output_shape=(None, 64),
+    ...                                       bias_axes="d")
+    >>> input_tensor = keras.Input(shape=[32, 128])
+    >>> output_tensor = layer(input_tensor)
+    >>> output_tensor.shape
+    (None, 32, 64)
+
+    **Applying a dense layer to a sequence using ellipses**
+
+    This example shows how to instantiate a layer that applies the same dense
+    operation to every element in a sequence, but uses the ellipsis notation
+    instead of specifying the batch and sequence dimensions.
+
+    Because we are using ellipsis notation and have specified only one axis, the
+    `output_shape` arg is a single value. When instantiated in this way, the
+    layer can handle any number of sequence dimensions - including the case
+    where no sequence dimension exists.
+
+    >>> layer = keras.layers.EinsumDense("...x,xy->...y",
+    ...                                       output_shape=64,
+    ...                                       bias_axes="y")
+    >>> input_tensor = keras.Input(shape=[32, 128])
+    >>> output_tensor = layer(input_tensor)
+    >>> output_tensor.shape
+    (None, 32, 64)
+    """
+
+    def __init__(
+        self,
+        equation,
+        output_shape,
+        activation=None,
+        bias_axes=None,
+        kernel_initializer="glorot_uniform",
+        bias_initializer="zeros",
+        kernel_regularizer=None,
+        bias_regularizer=None,
+        kernel_constraint=None,
+        bias_constraint=None,
+        lora_rank=None,
+        **kwargs,
+    ):
+        super().__init__(**kwargs)
+        self.equation = equation
+        if isinstance(output_shape, int):
+            self.partial_output_shape = (output_shape,)
+        else:
+            self.partial_output_shape = tuple(output_shape)
+        self.bias_axes = bias_axes
+        self.activation = activations.get(activation)
+        self.kernel_initializer = initializers.get(kernel_initializer)
+        self.bias_initializer = initializers.get(bias_initializer)
+        self.kernel_regularizer = regularizers.get(kernel_regularizer)
+        self.bias_regularizer = regularizers.get(bias_regularizer)
+        self.kernel_constraint = constraints.get(kernel_constraint)
+        self.bias_constraint = constraints.get(bias_constraint)
+        self.lora_rank = lora_rank
+        self.lora_enabled = False
+
+    def build(self, input_shape):
+        shape_data = _analyze_einsum_string(
+            self.equation,
+            self.bias_axes,
+            input_shape,
+            self.partial_output_shape,
+        )
+        kernel_shape, bias_shape, full_output_shape = shape_data
+        self.full_output_shape = tuple(full_output_shape)
+        # `self._int8_build` needs `self.input_spec`
+        self.input_spec = InputSpec(ndim=len(input_shape))
+        # We use `self._dtype_policy` to check to avoid issues in torch dynamo
+        if self.quantization_mode is not None:
+            self.quantized_build(input_shape, mode=self.quantization_mode)
+        if self.quantization_mode != "int8":
+            # If the layer is quantized to int8, `self._kernel` will be added
+            # in `self._int8_build`. Therefore, we skip it here.
+            self._kernel = self.add_weight(
+                name="kernel",
+                shape=tuple(kernel_shape),
+                initializer=self.kernel_initializer,
+                regularizer=self.kernel_regularizer,
+                constraint=self.kernel_constraint,
+                dtype=self.dtype,
+                trainable=True,
+            )
+        if bias_shape is not None:
+            self.bias = self.add_weight(
+                name="bias",
+                shape=tuple(bias_shape),
+                initializer=self.bias_initializer,
+                regularizer=self.bias_regularizer,
+                constraint=self.bias_constraint,
+                dtype=self.dtype,
+                trainable=True,
+            )
+        else:
+            self.bias = None
+        self.built = True
+        if self.lora_rank:
+            self.enable_lora(self.lora_rank)
+
+    @property
+    def kernel(self):
+        if not self.built:
+            raise AttributeError(
+                "You must build the layer before accessing `kernel`."
+            )
+        if self.lora_enabled:
+            return self._kernel + ops.matmul(
+                self.lora_kernel_a, self.lora_kernel_b
+            )
+        return self._kernel
+
+    def compute_output_shape(self, _):
+        return self.full_output_shape
+
+    def call(self, inputs, training=None):
+        x = ops.einsum(self.equation, inputs, self.kernel)
+        if self.bias is not None:
+            x += self.bias
+        if self.activation is not None:
+            x = self.activation(x)
+        return x
+
+    def enable_lora(
+        self, rank, a_initializer="he_uniform", b_initializer="zeros"
+    ):
+        if self.kernel_constraint:
+            raise ValueError(
+                "Lora is incompatible with kernel constraints. "
+                "In order to enable lora on this layer, remove the "
+                "`kernel_constraint` argument."
+            )
+        if not self.built:
+            raise ValueError(
+                "Cannot enable lora on a layer that isn't yet built."
+            )
+        if self.lora_enabled:
+            raise ValueError(
+                "lora is already enabled. "
+                "This can only be done once per layer."
+            )
+        self._tracker.unlock()
+        self.lora_kernel_a = self.add_weight(
+            name="lora_kernel_a",
+            shape=(self.kernel.shape[:-1] + (rank,)),
+            initializer=initializers.get(a_initializer),
+            regularizer=self.kernel_regularizer,
+        )
+        self.lora_kernel_b = self.add_weight(
+            name="lora_kernel_b",
+            shape=(rank, self.kernel.shape[-1]),
+            initializer=initializers.get(b_initializer),
+            regularizer=self.kernel_regularizer,
+        )
+        self._kernel.trainable = False
+        self._tracker.lock()
+        self.lora_enabled = True
+        self.lora_rank = rank
+
+    def save_own_variables(self, store):
+        # Do nothing if the layer isn't yet built
+        if not self.built:
+            return
+        # The keys of the `store` will be saved as determined because the
+        # default ordering will change after quantization
+        kernel_value, kernel_scale = self._get_kernel_with_merged_lora()
+        target_variables = [kernel_value]
+        if self.bias is not None:
+            target_variables.append(self.bias)
+        if self.quantization_mode is not None:
+            if self.quantization_mode == "int8":
+                target_variables.append(kernel_scale)
+            elif self.quantization_mode == "float8":
+                target_variables.append(self.inputs_scale)
+                target_variables.append(self.inputs_amax_history)
+                target_variables.append(self.kernel_scale)
+                target_variables.append(self.kernel_amax_history)
+                target_variables.append(self.outputs_grad_scale)
+                target_variables.append(self.outputs_grad_amax_history)
+            else:
+                raise self._quantization_mode_error(self.quantization_mode)
+        for i, variable in enumerate(target_variables):
+            store[str(i)] = variable
+
+    def load_own_variables(self, store):
+        if not self.lora_enabled:
+            self._check_load_own_variables(store)
+        # Do nothing if the layer isn't yet built
+        if not self.built:
+            return
+        # The keys of the `store` will be saved as determined because the
+        # default ordering will change after quantization
+        target_variables = [self._kernel]
+        if self.bias is not None:
+            target_variables.append(self.bias)
+        if self.quantization_mode is not None:
+            if self.quantization_mode == "int8":
+                target_variables.append(self.kernel_scale)
+            elif self.quantization_mode == "float8":
+                target_variables.append(self.inputs_scale)
+                target_variables.append(self.inputs_amax_history)
+                target_variables.append(self.kernel_scale)
+                target_variables.append(self.kernel_amax_history)
+                target_variables.append(self.outputs_grad_scale)
+                target_variables.append(self.outputs_grad_amax_history)
+            else:
+                raise self._quantization_mode_error(self.quantization_mode)
+        for i, variable in enumerate(target_variables):
+            variable.assign(store[str(i)])
+        if self.lora_enabled:
+            self.lora_kernel_a.assign(ops.zeros(self.lora_kernel_a.shape))
+            self.lora_kernel_b.assign(ops.zeros(self.lora_kernel_b.shape))
+
+    def get_config(self):
+        base_config = super().get_config()
+        config = {
+            "output_shape": self.partial_output_shape,
+            "equation": self.equation,
+            "activation": activations.serialize(self.activation),
+            "bias_axes": self.bias_axes,
+            "kernel_initializer": initializers.serialize(
+                self.kernel_initializer
+            ),
+            "bias_initializer": initializers.serialize(self.bias_initializer),
+            "kernel_regularizer": regularizers.serialize(
+                self.kernel_regularizer
+            ),
+            "bias_regularizer": regularizers.serialize(self.bias_regularizer),
+            "activity_regularizer": regularizers.serialize(
+                self.activity_regularizer
+            ),
+            "kernel_constraint": constraints.serialize(self.kernel_constraint),
+            "bias_constraint": constraints.serialize(self.bias_constraint),
+        }
+        if self.lora_rank:
+            config["lora_rank"] = self.lora_rank
+        return {**base_config, **config}
+
+    def _check_load_own_variables(self, store):
+        all_vars = self._trainable_variables + self._non_trainable_variables
+        if len(store.keys()) != len(all_vars):
+            if len(all_vars) == 0 and not self.built:
+                raise ValueError(
+                    f"Layer '{self.name}' was never built "
+                    "and thus it doesn't have any variables. "
+                    f"However the weights file lists {len(store.keys())} "
+                    "variables for this layer.\n"
+                    "In most cases, this error indicates that either:\n\n"
+                    "1. The layer is owned by a parent layer that "
+                    "implements a `build()` method, but calling the "
+                    "parent's `build()` method did NOT create the state of "
+                    f"the child layer '{self.name}'. A `build()` method "
+                    "must create ALL state for the layer, including "
+                    "the state of any children layers.\n\n"
+                    "2. You need to implement "
+                    "the `def build_from_config(self, config)` method "
+                    f"on layer '{self.name}', to specify how to rebuild "
+                    "it during loading. "
+                    "In this case, you might also want to implement the "
+                    "method that generates the build config at saving time, "
+                    "`def get_build_config(self)`. "
+                    "The method `build_from_config()` is meant "
+                    "to create the state "
+                    "of the layer (i.e. its variables) upon deserialization.",
+                )
+            raise ValueError(
+                f"Layer '{self.name}' expected {len(all_vars)} variables, "
+                "but received "
+                f"{len(store.keys())} variables during loading. "
+                f"Expected: {[v.name for v in all_vars]}"
+            )
+
+    # Quantization-related (int8 and float8) methods
+
+    def quantized_build(self, input_shape, mode):
+        if mode == "int8":
+            shape_data = _analyze_einsum_string(
+                self.equation,
+                self.bias_axes,
+                input_shape,
+                self.partial_output_shape,
+            )
+            kernel_shape, _, _ = shape_data
+            self._int8_build(kernel_shape)
+        elif mode == "float8":
+            self._float8_build()
+        else:
+            raise self._quantization_mode_error(mode)
+
+    def _int8_build(
+        self,
+        kernel_shape,
+        kernel_initializer="zeros",
+        kernel_scale_initializer="ones",
+    ):
+        (
+            self._input_reduced_axes,
+            self._kernel_reduced_axes,
+            self._input_transpose_axes,
+            self._kernel_transpose_axes,
+            self._input_expand_axes,
+            self._kernel_expand_axes,
+            self._input_squeeze_axes,
+            self._kernel_squeeze_axes,
+            self._custom_gradient_equation,
+            self._kernel_reverse_transpose_axes,
+        ) = _analyze_quantization_info(self.equation, self.input_spec.ndim)
+        self.inputs_quantizer = quantizers.AbsMaxQuantizer(
+            axis=self._input_reduced_axes
+        )
+        self._kernel = self.add_weight(
+            name="kernel",
+            shape=kernel_shape,
+            initializer=kernel_initializer,
+            dtype="int8",
+            trainable=False,
+        )
+        kernel_scale_shape = np.array(kernel_shape)
+        kernel_scale_shape[self._kernel_reduced_axes] = 1
+        kernel_scale_shape = kernel_scale_shape[self._kernel_transpose_axes]
+        kernel_scale_shape = kernel_scale_shape.tolist()
+        for a in sorted(self._kernel_expand_axes):
+            kernel_scale_shape.insert(a, 1)
+        for a in sorted(self._kernel_squeeze_axes, reverse=True):
+            kernel_scale_shape.pop(a)
+        self.kernel_scale = self.add_weight(
+            name="kernel_scale",
+            shape=kernel_scale_shape,
+            initializer=kernel_scale_initializer,
+            trainable=False,
+        )
+        self._is_quantized = True
+
+    def _float8_build(self):
+        from keras.src.dtype_policies import QuantizedFloat8DTypePolicy
+
+        # If `self.dtype_policy` is not QuantizedFloat8DTypePolicy, then set
+        # `amax_history_length` to its default value.
+        amax_history_length = getattr(
+            self.dtype_policy,
+            "amax_history_length",
+            QuantizedFloat8DTypePolicy.default_amax_history_length,
+        )
+        # We set `trainable=True` because we will use the gradients to overwrite
+        # these variables
+        scale_kwargs = {
+            "shape": (),
+            "initializer": "ones",
+            "dtype": "float32",  # Always be float32
+            "trainable": True,
+            "autocast": False,
+        }
+        amax_history_kwargs = {
+            "shape": (amax_history_length,),
+            "initializer": "zeros",
+            "dtype": "float32",  # Always be float32
+            "trainable": True,
+            "autocast": False,
+        }
+        self.inputs_scale = self.add_weight(name="inputs_scale", **scale_kwargs)
+        self.inputs_amax_history = self.add_weight(
+            name="inputs_amax_history", **amax_history_kwargs
+        )
+        self.kernel_scale = self.add_weight(name="kernel_scale", **scale_kwargs)
+        self.kernel_amax_history = self.add_weight(
+            name="kernel_amax_history", **amax_history_kwargs
+        )
+        self.outputs_grad_scale = self.add_weight(
+            name="outputs_grad_scale", **scale_kwargs
+        )
+        self.outputs_grad_amax_history = self.add_weight(
+            name="outputs_grad_amax_history", **amax_history_kwargs
+        )
+        # We need to set `overwrite_with_gradient=True` to instruct the
+        # optimizer to directly overwrite these variables with their computed
+        # gradients during training
+        self.inputs_scale.overwrite_with_gradient = True
+        self.inputs_amax_history.overwrite_with_gradient = True
+        self.kernel_scale.overwrite_with_gradient = True
+        self.kernel_amax_history.overwrite_with_gradient = True
+        self.outputs_grad_scale.overwrite_with_gradient = True
+        self.outputs_grad_amax_history.overwrite_with_gradient = True
+        self._is_quantized = True
+
+    def _int8_call(self, inputs, training=None):
+        @ops.custom_gradient
+        def einsum_with_inputs_gradient(inputs, kernel, kernel_scale):
+            def grad_fn(*args, upstream=None):
+                if upstream is None:
+                    (upstream,) = args
+                # De-scale kernel
+                _kernel_scale = kernel_scale  # Overcome UnboundLocalError
+                if self._kernel_squeeze_axes:
+                    _kernel_scale = ops.expand_dims(
+                        _kernel_scale, axis=self._kernel_squeeze_axes
+                    )
+                if self._kernel_expand_axes:
+                    _kernel_scale = ops.squeeze(
+                        _kernel_scale, axis=self._kernel_expand_axes
+                    )
+                _kernel_scale = ops.transpose(
+                    _kernel_scale, self._kernel_reverse_transpose_axes
+                )
+                float_kernel = ops.divide(
+                    ops.cast(kernel, dtype=self.compute_dtype),
+                    _kernel_scale,
+                )
+                # From https://stackoverflow.com/a/47609896
+                inputs_grad = ops.einsum(
+                    self._custom_gradient_equation, upstream, float_kernel
+                )
+                return (inputs_grad, None, None)
+
+            inputs, inputs_scale = self.inputs_quantizer(inputs)
+            x = ops.einsum(self.equation, inputs, kernel)
+            # Deal with `inputs_scale`
+            inputs_scale = ops.transpose(
+                inputs_scale, self._input_transpose_axes
+            )
+            if self._input_expand_axes:
+                inputs_scale = ops.expand_dims(
+                    inputs_scale, axis=self._input_expand_axes
+                )
+            if self._input_squeeze_axes:
+                inputs_scale = ops.squeeze(
+                    inputs_scale, axis=self._input_squeeze_axes
+                )
+            # De-scale outputs
+            x = ops.cast(x, self.compute_dtype)
+            x = ops.divide(x, ops.multiply(inputs_scale, kernel_scale))
+            return x, grad_fn
+
+        x = einsum_with_inputs_gradient(
+            inputs,
+            ops.convert_to_tensor(self._kernel),
+            ops.convert_to_tensor(self.kernel_scale),
+        )
+        if self.lora_enabled:
+            lora_x = ops.einsum(self.equation, inputs, self.lora_kernel_a)
+            lora_x = ops.matmul(lora_x, self.lora_kernel_b)
+            x = ops.add(x, lora_x)
+        if self.bias is not None:
+            x += self.bias
+        if self.activation is not None:
+            x = self.activation(x)
+        return x
+
+    def _float8_call(self, inputs, training=None):
+        if self.lora_enabled:
+            raise NotImplementedError(
+                "Currently, `_float8_call` doesn't support LoRA"
+            )
+
+        @ops.custom_gradient
+        def quantized_dequantize_inputs(inputs, scale, amax_history):
+            if training:
+                new_scale = quantizers.compute_float8_scale(
+                    ops.max(amax_history, axis=0),
+                    scale,
+                    ops.cast(
+                        float(ml_dtypes.finfo("float8_e4m3fn").max), "float32"
+                    ),
+                )
+                new_amax_history = quantizers.compute_float8_amax_history(
+                    inputs, amax_history
+                )
+            else:
+                new_scale = None
+                new_amax_history = None
+            qdq_inputs = quantizers.quantize_and_dequantize(
+                inputs, scale, "float8_e4m3fn", self.compute_dtype
+            )
+
+            def grad(*args, upstream=None, variables=None):
+                if upstream is None:
+                    (upstream,) = args
+                return upstream, new_scale, new_amax_history
+
+            return qdq_inputs, grad
+
+        @ops.custom_gradient
+        def quantized_dequantize_outputs(outputs, scale, amax_history):
+            """Quantize-dequantize the output gradient but not the output."""
+
+            def grad(*args, upstream=None, variables=None):
+                if upstream is None:
+                    (upstream,) = args
+                new_scale = quantizers.compute_float8_scale(
+                    ops.max(amax_history, axis=0),
+                    scale,
+                    ops.cast(
+                        float(ml_dtypes.finfo("float8_e5m2").max), "float32"
+                    ),
+                )
+                qdq_upstream = quantizers.quantize_and_dequantize(
+                    upstream, scale, "float8_e5m2", self.compute_dtype
+                )
+                new_amax_history = quantizers.compute_float8_amax_history(
+                    upstream, amax_history
+                )
+                return qdq_upstream, new_scale, new_amax_history
+
+            return outputs, grad
+
+        x = ops.einsum(
+            self.equation,
+            quantized_dequantize_inputs(
+                inputs,
+                ops.convert_to_tensor(self.inputs_scale),
+                ops.convert_to_tensor(self.inputs_amax_history),
+            ),
+            quantized_dequantize_inputs(
+                ops.convert_to_tensor(self._kernel),
+                ops.convert_to_tensor(self.kernel_scale),
+                ops.convert_to_tensor(self.kernel_amax_history),
+            ),
+        )
+        # `quantized_dequantize_outputs` is placed immediately after
+        # `ops.einsum` for the sake of pattern matching in gemm_rewrite. That
+        # way, the qdq will be adjacent to the corresponding einsum_bprop in the
+        # bprop.
+        x = quantized_dequantize_outputs(
+            x,
+            ops.convert_to_tensor(self.outputs_grad_scale),
+            ops.convert_to_tensor(self.outputs_grad_amax_history),
+        )
+        if self.bias is not None:
+            # Under non-mixed precision cases, F32 bias has to be converted to
+            # BF16 first to get the biasAdd fusion support. ref. PR
+            # https://github.com/tensorflow/tensorflow/pull/60306
+            bias = self.bias
+            if self.dtype_policy.compute_dtype == "float32":
+                bias_bf16 = ops.cast(bias, "bfloat16")
+                bias = ops.cast(bias_bf16, bias.dtype)
+            x = ops.add(x, bias)
+        if self.activation is not None:
+            x = self.activation(x)
+        return x
+
+    def quantize(self, mode, type_check=True):
+        # Prevent quantization of the subclasses
+        if type_check and (type(self) is not EinsumDense):
+            raise self._not_implemented_error(self.quantize)
+
+        if mode == "int8":
+            (
+                self._input_reduced_axes,
+                self._kernel_reduced_axes,
+                self._input_transpose_axes,
+                self._kernel_transpose_axes,
+                self._input_expand_axes,
+                self._kernel_expand_axes,
+                self._input_squeeze_axes,
+                self._kernel_squeeze_axes,
+                self._custom_gradient_equation,
+                self._kernel_reverse_transpose_axes,
+            ) = _analyze_quantization_info(self.equation, self.input_spec.ndim)
+            # Quantize `self._kernel` to int8 and compute corresponding scale
+            kernel_value, kernel_scale = quantizers.abs_max_quantize(
+                self._kernel, axis=self._kernel_reduced_axes, to_numpy=True
+            )
+            kernel_scale = ops.transpose(
+                kernel_scale, self._kernel_transpose_axes
+            )
+            if self._kernel_expand_axes:
+                kernel_scale = ops.expand_dims(
+                    kernel_scale, axis=self._kernel_expand_axes
+                )
+            if self._kernel_squeeze_axes:
+                kernel_scale = ops.squeeze(
+                    kernel_scale, axis=self._kernel_squeeze_axes
+                )
+            kernel_shape = tuple(self._kernel.shape)
+            del self._kernel
+            # Utilize a lambda expression as an initializer to prevent adding a
+            # large constant to the computation graph.
+            self._int8_build(kernel_shape, kernel_value, kernel_scale)
+        elif mode == "float8":
+            self._float8_build()
+        else:
+            raise self._quantization_mode_error(mode)
+
+        # Set new dtype policy
+        if self.dtype_policy.quantization_mode is None:
+            policy = dtype_policies.get(f"{mode}_from_{self.dtype_policy.name}")
+            self.dtype_policy = policy
+
+    def _get_kernel_with_merged_lora(self):
+        if self.dtype_policy.quantization_mode is not None:
+            kernel_value = self._kernel
+            kernel_scale = self.kernel_scale
+            if self.lora_enabled:
+                # Dequantize & quantize to merge lora weights into int8 kernel
+                # Note that this is a lossy compression
+                if self._kernel_squeeze_axes:
+                    kernel_scale = ops.expand_dims(
+                        kernel_scale, axis=self._kernel_squeeze_axes
+                    )
+                if self._kernel_expand_axes:
+                    kernel_scale = ops.squeeze(
+                        kernel_scale, axis=self._kernel_expand_axes
+                    )
+                if self._kernel_transpose_axes:
+
+                    def _argsort(seq):
+                        # Ref: https://stackoverflow.com/a/3382369
+                        return sorted(range(len(seq)), key=seq.__getitem__)
+
+                    reverse_transpose = _argsort(self._kernel_transpose_axes)
+                    kernel_scale = ops.transpose(
+                        kernel_scale, axes=reverse_transpose
+                    )
+                kernel_value = ops.divide(kernel_value, kernel_scale)
+                kernel_value = ops.add(
+                    kernel_value,
+                    ops.matmul(self.lora_kernel_a, self.lora_kernel_b),
+                )
+                kernel_value, kernel_scale = quantizers.abs_max_quantize(
+                    kernel_value, axis=self._kernel_reduced_axes, to_numpy=True
+                )
+                kernel_scale = ops.transpose(
+                    kernel_scale, self._kernel_transpose_axes
+                )
+                if self._kernel_expand_axes:
+                    kernel_scale = ops.expand_dims(
+                        kernel_scale, axis=self._kernel_expand_axes
+                    )
+                if self._kernel_squeeze_axes:
+                    kernel_scale = ops.squeeze(
+                        kernel_scale, axis=self._kernel_squeeze_axes
+                    )
+        else:
+            kernel_value = self.kernel
+            kernel_scale = None
+        return kernel_value, kernel_scale
+
+
+def _analyze_einsum_string(equation, bias_axes, input_shape, output_shape):
+    """Analyzes an einsum string to determine the required weight shape."""
+
+    dot_replaced_string = re.sub(r"\.\.\.", "0", equation)
+
+    # This is the case where no ellipses are present in the string.
+    split_string = re.match(
+        "([a-zA-Z]+),([a-zA-Z]+)->([a-zA-Z]+)", dot_replaced_string
+    )
+    if split_string:
+        return _analyze_split_string(
+            split_string, bias_axes, input_shape, output_shape
+        )
+
+    # This is the case where ellipses are present on the left.
+    split_string = re.match(
+        "0([a-zA-Z]+),([a-zA-Z]+)->0([a-zA-Z]+)", dot_replaced_string
+    )
+    if split_string:
+        return _analyze_split_string(
+            split_string, bias_axes, input_shape, output_shape, left_elided=True
+        )
+
+    # This is the case where ellipses are present on the right.
+    split_string = re.match(
+        "([a-zA-Z]{2,})0,([a-zA-Z]+)->([a-zA-Z]+)0", dot_replaced_string
+    )
+    if split_string:
+        return _analyze_split_string(
+            split_string, bias_axes, input_shape, output_shape
+        )
+
+    raise ValueError(
+        f"Invalid einsum equation '{equation}'. Equations must be in the form "
+        "[X],[Y]->[Z], ...[X],[Y]->...[Z], or [X]...,[Y]->[Z]...."
+    )
+
+
+def _analyze_split_string(
+    split_string, bias_axes, input_shape, output_shape, left_elided=False
+):
+    """Analyze an pre-split einsum string to find the weight shape."""
+    input_spec = split_string.group(1)
+    weight_spec = split_string.group(2)
+    output_spec = split_string.group(3)
+    elided = len(input_shape) - len(input_spec)
+
+    if isinstance(output_shape, int):
+        output_shape = [output_shape]
+    else:
+        output_shape = list(output_shape)
+
+    output_shape.insert(0, input_shape[0])
+
+    if elided > 0 and left_elided:
+        for i in range(1, elided):
+            # We already inserted the 0th input dimension at dim 0, so we need
+            # to start at location 1 here.
+            output_shape.insert(1, input_shape[i])
+    elif elided > 0 and not left_elided:
+        for i in range(len(input_shape) - elided, len(input_shape)):
+            output_shape.append(input_shape[i])
+
+    if left_elided:
+        # If we have beginning dimensions elided, we need to use negative
+        # indexing to determine where in the input dimension our values are.
+        input_dim_map = {
+            dim: (i + elided) - len(input_shape)
+            for i, dim in enumerate(input_spec)
+        }
+        # Because we've constructed the full output shape already, we don't need
+        # to do negative indexing.
+        output_dim_map = {
+            dim: (i + elided) for i, dim in enumerate(output_spec)
+        }
+    else:
+        input_dim_map = {dim: i for i, dim in enumerate(input_spec)}
+        output_dim_map = {dim: i for i, dim in enumerate(output_spec)}
+
+    for dim in input_spec:
+        input_shape_at_dim = input_shape[input_dim_map[dim]]
+        if dim in output_dim_map:
+            output_shape_at_dim = output_shape[output_dim_map[dim]]
+            if (
+                output_shape_at_dim is not None
+                and output_shape_at_dim != input_shape_at_dim
+            ):
+                raise ValueError(
+                    "Input shape and output shape do not match at shared "
+                    f"dimension '{dim}'. Input shape is {input_shape_at_dim}, "
+                    "and output shape "
+                    f"is {output_shape[output_dim_map[dim]]}."
+                )
+
+    for dim in output_spec:
+        if dim not in input_spec and dim not in weight_spec:
+            raise ValueError(
+                f"Dimension '{dim}' was specified in the output "
+                f"'{output_spec}' but has no corresponding dim in the input "
+                f"spec '{input_spec}' or weight spec '{output_spec}'"
+            )
+
+    weight_shape = []
+    for dim in weight_spec:
+        if dim in input_dim_map:
+            weight_shape.append(input_shape[input_dim_map[dim]])
+        elif dim in output_dim_map:
+            weight_shape.append(output_shape[output_dim_map[dim]])
+        else:
+            raise ValueError(
+                f"Weight dimension '{dim}' did not have a match in either "
+                f"the input spec '{input_spec}' or the output "
+                f"spec '{output_spec}'. For this layer, the weight must "
+                "be fully specified."
+            )
+
+    if bias_axes is not None:
+        num_left_elided = elided if left_elided else 0
+        idx_map = {
+            char: output_shape[i + num_left_elided]
+            for i, char in enumerate(output_spec)
+        }
+
+        for char in bias_axes:
+            if char not in output_spec:
+                raise ValueError(
+                    f"Bias dimension '{char}' was requested, but is not part "
+                    f"of the output spec '{output_spec}'"
+                )
+
+        first_bias_location = min(
+            [output_spec.find(char) for char in bias_axes]
+        )
+        bias_output_spec = output_spec[first_bias_location:]
+
+        bias_shape = [
+            idx_map[char] if char in bias_axes else 1
+            for char in bias_output_spec
+        ]
+
+        if not left_elided:
+            for _ in range(elided):
+                bias_shape.append(1)
+    else:
+        bias_shape = None
+
+    return weight_shape, bias_shape, output_shape
+
+
+def _analyze_quantization_info(equation, input_shape):
+    def get_specs(equation, input_shape):
+        possible_labels = string.ascii_letters
+        dot_replaced_string = re.sub(r"\.\.\.", "0", equation)
+
+        # This is the case where no ellipses are present in the string.
+        split_string = re.match(
+            "([a-zA-Z]+),([a-zA-Z]+)->([a-zA-Z]+)", dot_replaced_string
+        )
+        if split_string is not None:
+            input_spec = split_string.group(1)
+            weight_spec = split_string.group(2)
+            output_spec = split_string.group(3)
+            return input_spec, weight_spec, output_spec
+
+        # This is the case where ellipses are present on the left.
+        split_string = re.match(
+            "0([a-zA-Z]+),([a-zA-Z]+)->0([a-zA-Z]+)", dot_replaced_string
+        )
+        if split_string is not None:
+            input_spec = split_string.group(1)
+            weight_spec = split_string.group(2)
+            output_spec = split_string.group(3)
+            elided = len(input_shape) - len(input_spec)
+            possible_labels = sorted(
+                set(possible_labels)
+                - set(input_spec)
+                - set(weight_spec)
+                - set(output_spec)
+            )
+            # Pad labels on the left to `input_spec` and `output_spec`
+            for i in range(elided):
+                input_spec = possible_labels[i] + input_spec
+                output_spec = possible_labels[i] + output_spec
+            return input_spec, weight_spec, output_spec
+
+        # This is the case where ellipses are present on the right.
+        split_string = re.match(
+            "([a-zA-Z]{2,})0,([a-zA-Z]+)->([a-zA-Z]+)0", dot_replaced_string
+        )
+        if split_string is not None:
+            input_spec = split_string.group(1)
+            weight_spec = split_string.group(2)
+            output_spec = split_string.group(3)
+            elided = len(input_shape) - len(input_spec)
+            possible_labels = sorted(
+                set(possible_labels)
+                - set(input_spec)
+                - set(weight_spec)
+                - set(output_spec)
+            )
+            # Pad labels on the right to `input_spec` and `output_spec`
+            for i in range(elided):
+                input_spec = input_spec + possible_labels[i]
+                output_spec = output_spec + possible_labels[i]
+            return input_spec, weight_spec, output_spec
+
+        raise ValueError(
+            f"Invalid einsum equation '{equation}'. Equations must be in the "
+            "form [X],[Y]->[Z], ...[X],[Y]->...[Z], or [X]...,[Y]->[Z]...."
+        )
+
+    input_spec, weight_spec, output_spec = get_specs(equation, input_shape)
+
+    # Determine the axes that should be reduced by the quantizer
+    input_reduced_axes = []
+    weight_reduced_axes = []
+    for i, label in enumerate(input_spec):
+        index = output_spec.find(label)
+        if index == -1:
+            input_reduced_axes.append(i)
+    for i, label in enumerate(weight_spec):
+        index = output_spec.find(label)
+        if index == -1:
+            weight_reduced_axes.append(i)
+
+    # Determine the axes of `ops.expand_dims`
+    input_expand_axes = []
+    weight_expand_axes = []
+    for i, label in enumerate(output_spec):
+        index_input = input_spec.find(label)
+        index_weight = weight_spec.find(label)
+        if index_input == -1:
+            input_expand_axes.append(i)
+        if index_weight == -1:
+            weight_expand_axes.append(i)
+
+    # Determine the axes of `ops.transpose`
+    input_transpose_axes = []
+    weight_transpose_axes = []
+    for i, label in enumerate(output_spec):
+        index_input = input_spec.find(label)
+        index_weight = weight_spec.find(label)
+        if index_input != -1:
+            input_transpose_axes.append(index_input)
+        if index_weight != -1:
+            weight_transpose_axes.append(index_weight)
+    # Postprocess the information:
+    # 1. Add dummy axes (1) to transpose_axes
+    # 2. Add axis to squeeze_axes if 1. failed
+    input_squeeze_axes = []
+    weight_squeeze_axes = []
+    for ori_index in input_reduced_axes:
+        try:
+            index = input_expand_axes.pop(0)
+        except IndexError:
+            input_squeeze_axes.append(ori_index)
+        input_transpose_axes.insert(index, ori_index)
+    for ori_index in weight_reduced_axes:
+        try:
+            index = weight_expand_axes.pop(0)
+        except IndexError:
+            weight_squeeze_axes.append(ori_index)
+        weight_transpose_axes.insert(index, ori_index)
+    # Prepare equation for `einsum_with_inputs_gradient`
+    custom_gradient_equation = f"{output_spec},{weight_spec}->{input_spec}"
+    weight_reverse_transpose_axes = [
+        i
+        for (_, i) in sorted(
+            (v, i) for (i, v) in enumerate(weight_transpose_axes)
+        )
+    ]
+    return (
+        input_reduced_axes,
+        weight_reduced_axes,
+        input_transpose_axes,
+        weight_transpose_axes,
+        input_expand_axes,
+        weight_expand_axes,
+        input_squeeze_axes,
+        weight_squeeze_axes,
+        custom_gradient_equation,
+        weight_reverse_transpose_axes,
+    )
diff --git a/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/core/embedding.py b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/core/embedding.py
new file mode 100644
index 0000000000000000000000000000000000000000..38ced7194a4b391cb0d54862a88b73f3e63db3dc
--- /dev/null
+++ b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/core/embedding.py
@@ -0,0 +1,391 @@
+import warnings
+
+from keras.src import backend
+from keras.src import constraints
+from keras.src import dtype_policies
+from keras.src import initializers
+from keras.src import ops
+from keras.src import quantizers
+from keras.src import regularizers
+from keras.src.api_export import keras_export
+from keras.src.layers.layer import Layer
+
+
+@keras_export("keras.layers.Embedding")
+class Embedding(Layer):
+    """Turns nonnegative integers (indexes) into dense vectors of fixed size.
+
+    e.g. `[[4], [20]] -> [[0.25, 0.1], [0.6, -0.2]]`
+
+    This layer can only be used on nonnegative integer inputs of a fixed range.
+
+    Example:
+
+    >>> model = keras.Sequential()
+    >>> model.add(keras.layers.Embedding(1000, 64))
+    >>> # The model will take as input an integer matrix of size (batch,
+    >>> # input_length), and the largest integer (i.e. word index) in the input
+    >>> # should be no larger than 999 (vocabulary size).
+    >>> # Now model.output_shape is (None, 10, 64), where `None` is the batch
+    >>> # dimension.
+    >>> input_array = np.random.randint(1000, size=(32, 10))
+    >>> model.compile('rmsprop', 'mse')
+    >>> output_array = model.predict(input_array)
+    >>> print(output_array.shape)
+    (32, 10, 64)
+
+    Args:
+        input_dim: Integer. Size of the vocabulary,
+            i.e. maximum integer index + 1.
+        output_dim: Integer. Dimension of the dense embedding.
+        embeddings_initializer: Initializer for the `embeddings`
+            matrix (see `keras.initializers`).
+        embeddings_regularizer: Regularizer function applied to
+            the `embeddings` matrix (see `keras.regularizers`).
+        embeddings_constraint: Constraint function applied to
+            the `embeddings` matrix (see `keras.constraints`).
+        mask_zero: Boolean, whether or not the input value 0 is a special
+            "padding" value that should be masked out.
+            This is useful when using recurrent layers which
+            may take variable length input. If this is `True`,
+            then all subsequent layers in the model need
+            to support masking or an exception will be raised.
+            If `mask_zero` is set to `True`, as a consequence,
+            index 0 cannot be used in the vocabulary (`input_dim` should
+            equal size of vocabulary + 1).
+        weights: Optional floating-point matrix of size
+            `(input_dim, output_dim)`. The initial embeddings values
+            to use.
+        lora_rank: Optional integer. If set, the layer's forward pass
+            will implement LoRA (Low-Rank Adaptation)
+            with the provided rank. LoRA sets the layer's embeddings
+            matrix to non-trainable and replaces it with a delta over the
+            original matrix, obtained via multiplying two lower-rank
+            trainable matrices. This can be useful to reduce the
+            computation cost of fine-tuning large embedding layers.
+            You can also enable LoRA on an existing
+            `Embedding` layer by calling `layer.enable_lora(rank)`.
+
+    Input shape:
+        2D tensor with shape: `(batch_size, input_length)`.
+
+    Output shape:
+        3D tensor with shape: `(batch_size, input_length, output_dim)`.
+    """
+
+    def __init__(
+        self,
+        input_dim,
+        output_dim,
+        embeddings_initializer="uniform",
+        embeddings_regularizer=None,
+        embeddings_constraint=None,
+        mask_zero=False,
+        weights=None,
+        lora_rank=None,
+        **kwargs,
+    ):
+        input_length = kwargs.pop("input_length", None)
+        if input_length is not None:
+            warnings.warn(
+                "Argument `input_length` is deprecated. Just remove it."
+            )
+        super().__init__(**kwargs)
+        self.input_dim = input_dim
+        self.output_dim = output_dim
+        self.embeddings_initializer = initializers.get(embeddings_initializer)
+        self.embeddings_regularizer = regularizers.get(embeddings_regularizer)
+        self.embeddings_constraint = constraints.get(embeddings_constraint)
+        self.mask_zero = mask_zero
+        self.supports_masking = mask_zero
+        self.autocast = False
+        self.lora_rank = lora_rank
+        self.lora_enabled = False
+
+        if weights is not None:
+            self.build()
+            if not (isinstance(weights, list) and len(weights) == 1):
+                weights = [weights]
+            self.set_weights(weights)
+
+    def build(self, input_shape=None):
+        if self.built:
+            return
+        if self.quantization_mode is not None:
+            self.quantized_build(input_shape, mode=self.quantization_mode)
+        if self.quantization_mode != "int8":
+            self._embeddings = self.add_weight(
+                shape=(self.input_dim, self.output_dim),
+                initializer=self.embeddings_initializer,
+                name="embeddings",
+                regularizer=self.embeddings_regularizer,
+                constraint=self.embeddings_constraint,
+                trainable=True,
+            )
+        self.built = True
+        if self.lora_rank:
+            self.enable_lora(self.lora_rank)
+
+    @property
+    def embeddings(self):
+        if self.lora_enabled:
+            return self._embeddings + ops.matmul(
+                self.lora_embeddings_a, self.lora_embeddings_b
+            )
+        return self._embeddings
+
+    def call(self, inputs):
+        if inputs.dtype != "int32" and inputs.dtype != "int64":
+            inputs = ops.cast(inputs, "int32")
+        outputs = ops.take(self.embeddings, inputs, axis=0)
+        return ops.cast(outputs, dtype=self.compute_dtype)
+
+    def compute_mask(self, inputs, mask=None):
+        if not self.mask_zero:
+            return None
+        return ops.not_equal(inputs, 0)
+
+    def compute_output_shape(self, input_shape):
+        return (*input_shape, self.output_dim)
+
+    def enable_lora(
+        self, rank, a_initializer="he_uniform", b_initializer="zeros"
+    ):
+        if self.embeddings_constraint:
+            raise ValueError(
+                "Lora is incompatible with embedding constraints. "
+                "In order to enable lora on this layer, remove the "
+                "`embeddings_constraint` argument."
+            )
+        if not self.built:
+            raise ValueError(
+                "Cannot enable lora on a layer that isn't yet built."
+            )
+        if self.lora_enabled:
+            raise ValueError(
+                "lora is already enabled. "
+                "This can only be done once per layer."
+            )
+        self._tracker.unlock()
+        self.lora_embeddings_a = self.add_weight(
+            name="lora_embeddings_a",
+            shape=(self.embeddings.shape[0], rank),
+            initializer=initializers.get(a_initializer),
+            regularizer=self.embeddings_regularizer,
+        )
+        self.lora_embeddings_b = self.add_weight(
+            name="lora_embeddings_b",
+            shape=(rank, self.embeddings.shape[1]),
+            initializer=initializers.get(b_initializer),
+            regularizer=self.embeddings_regularizer,
+        )
+        self.embeddings.trainable = False
+        self._tracker.lock()
+        self.lora_enabled = True
+        self.lora_rank = rank
+
+    def save_own_variables(self, store):
+        # Do nothing if the layer isn't yet built
+        if not self.built:
+            return
+        # The keys of the `store` will be saved as determined because the
+        # default ordering will change after quantization
+        embeddings_value, embeddings_scale = (
+            self._get_embeddings_with_merged_lora()
+        )
+        target_variables = [embeddings_value]
+        if self.quantization_mode is not None:
+            if self.quantization_mode == "int8":
+                target_variables.append(embeddings_scale)
+            else:
+                raise self._quantization_mode_error(self.quantization_mode)
+        for i, variable in enumerate(target_variables):
+            store[str(i)] = variable
+
+    def load_own_variables(self, store):
+        if not self.lora_enabled:
+            self._check_load_own_variables(store)
+        # Do nothing if the layer isn't yet built
+        if not self.built:
+            return
+        # The keys of the `store` will be saved as determined because the
+        # default ordering will change after quantization
+        target_variables = [self._embeddings]
+        if self.quantization_mode is not None:
+            if self.quantization_mode == "int8":
+                target_variables.append(self.embeddings_scale)
+            else:
+                raise self._quantization_mode_error(self.quantization_mode)
+        for i, variable in enumerate(target_variables):
+            variable.assign(store[str(i)])
+        if self.lora_enabled:
+            self.lora_embeddings_a.assign(
+                ops.zeros(self.lora_embeddings_a.shape)
+            )
+            self.lora_embeddings_b.assign(
+                ops.zeros(self.lora_embeddings_b.shape)
+            )
+
+    def get_config(self):
+        base_config = super().get_config()
+        config = {
+            "input_dim": self.input_dim,
+            "output_dim": self.output_dim,
+            "embeddings_initializer": initializers.serialize(
+                self.embeddings_initializer
+            ),
+            "embeddings_regularizer": regularizers.serialize(
+                self.embeddings_regularizer
+            ),
+            "activity_regularizer": regularizers.serialize(
+                self.activity_regularizer
+            ),
+            "embeddings_constraint": constraints.serialize(
+                self.embeddings_constraint
+            ),
+            "mask_zero": self.mask_zero,
+        }
+        if self.lora_rank:
+            config["lora_rank"] = self.lora_rank
+        return {**base_config, **config}
+
+    def _check_load_own_variables(self, store):
+        all_vars = self._trainable_variables + self._non_trainable_variables
+        if len(store.keys()) != len(all_vars):
+            if len(all_vars) == 0 and not self.built:
+                raise ValueError(
+                    f"Layer '{self.name}' was never built "
+                    "and thus it doesn't have any variables. "
+                    f"However the weights file lists {len(store.keys())} "
+                    "variables for this layer.\n"
+                    "In most cases, this error indicates that either:\n\n"
+                    "1. The layer is owned by a parent layer that "
+                    "implements a `build()` method, but calling the "
+                    "parent's `build()` method did NOT create the state of "
+                    f"the child layer '{self.name}'. A `build()` method "
+                    "must create ALL state for the layer, including "
+                    "the state of any children layers.\n\n"
+                    "2. You need to implement "
+                    "the `def build_from_config(self, config)` method "
+                    f"on layer '{self.name}', to specify how to rebuild "
+                    "it during loading. "
+                    "In this case, you might also want to implement the "
+                    "method that generates the build config at saving time, "
+                    "`def get_build_config(self)`. "
+                    "The method `build_from_config()` is meant "
+                    "to create the state "
+                    "of the layer (i.e. its variables) upon deserialization.",
+                )
+            raise ValueError(
+                f"Layer '{self.name}' expected {len(all_vars)} variables, "
+                "but received "
+                f"{len(store.keys())} variables during loading. "
+                f"Expected: {[v.name for v in all_vars]}"
+            )
+
+    """Quantization-related (int8) methods"""
+
+    def _quantization_mode_error(self, mode):
+        return NotImplementedError(
+            "Invalid quantization mode. Expected 'int8'. "
+            f"Received: quantization_mode={mode}"
+        )
+
+    def quantized_build(self, input_shape, mode):
+        if mode == "int8":
+            self._int8_build()
+        else:
+            raise self._quantization_mode_error(mode)
+
+    def _int8_build(
+        self,
+        embeddings_initializer="zeros",
+        embeddings_scale_initializer="ones",
+    ):
+        self._embeddings = self.add_weight(
+            name="embeddings",
+            shape=(self.input_dim, self.output_dim),
+            initializer=embeddings_initializer,
+            dtype="int8",
+            trainable=False,
+        )
+        # We choose to reduce the axis of `output_dim` because, typically,
+        # `input_dim` is larger than `output_dim`. This reduces quantization
+        # error.
+        self.embeddings_scale = self.add_weight(
+            name="embeddings_scale",
+            shape=(self.input_dim,),
+            initializer=embeddings_scale_initializer,
+            trainable=False,
+        )
+        self._is_quantized = True
+
+    def quantized_call(self, *args, **kwargs):
+        if self.quantization_mode != "int8":
+            raise self._quantization_mode_error(self.quantization_mode)
+        return super().quantized_call(*args, **kwargs)
+
+    def _int8_call(self, inputs, training=None):
+        # We cannot update quantized self._embeddings, so the custom gradient is
+        # not needed
+        if backend.standardize_dtype(inputs.dtype) not in ("int32", "int64"):
+            inputs = ops.cast(inputs, "int32")
+        embeddings_scale = ops.take(self.embeddings_scale, inputs, axis=0)
+        outputs = ops.take(self._embeddings, inputs, axis=0)
+        # De-scale outputs
+        outputs = ops.divide(
+            ops.cast(outputs, dtype=self.compute_dtype),
+            ops.expand_dims(embeddings_scale, axis=-1),
+        )
+        if self.lora_enabled:
+            lora_outputs = ops.take(self.lora_embeddings_a, inputs, axis=0)
+            lora_outputs = ops.matmul(lora_outputs, self.lora_embeddings_b)
+            outputs = ops.add(outputs, lora_outputs)
+        return outputs
+
+    def quantize(self, mode, type_check=True):
+        # Prevent quantization of the subclasses
+        if type_check and (type(self) is not Embedding):
+            raise self._not_implemented_error(self.quantize)
+
+        if mode == "int8":
+            # Quantize `self._embeddings` to int8 and compute corresponding
+            # scale
+            embeddings_value, embeddings_scale = quantizers.abs_max_quantize(
+                self._embeddings, axis=-1, to_numpy=True
+            )
+            embeddings_scale = ops.squeeze(embeddings_scale, axis=-1)
+            del self._embeddings
+            # Utilize a lambda expression as an initializer to prevent adding a
+            # large constant to the computation graph.
+            self._int8_build(embeddings_value, embeddings_scale)
+        else:
+            raise self._quantization_mode_error(mode)
+
+        # Set new dtype policy
+        if self.dtype_policy.quantization_mode is None:
+            policy = dtype_policies.get(f"{mode}_from_{self.dtype_policy.name}")
+            self.dtype_policy = policy
+
+    def _get_embeddings_with_merged_lora(self):
+        if self.dtype_policy.quantization_mode is not None:
+            embeddings_value = self._embeddings
+            embeddings_scale = self.embeddings_scale
+            if self.lora_enabled:
+                # Dequantize & quantize to merge lora weights into embeddings
+                # Note that this is a lossy compression
+                embeddings_value = ops.divide(
+                    embeddings_value, ops.expand_dims(embeddings_scale, axis=-1)
+                )
+                embeddings_value = ops.add(
+                    embeddings_value,
+                    ops.matmul(self.lora_embeddings_a, self.lora_embeddings_b),
+                )
+                embeddings_value, embeddings_scale = (
+                    quantizers.abs_max_quantize(
+                        embeddings_value, axis=-1, to_numpy=True
+                    )
+                )
+                embeddings_scale = ops.squeeze(embeddings_scale, axis=-1)
+            return embeddings_value, embeddings_scale
+        return self.embeddings, None
diff --git a/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/core/identity.py b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/core/identity.py
new file mode 100644
index 0000000000000000000000000000000000000000..f7fa9e752fb00c458ec9ee772940755a2f8b70a7
--- /dev/null
+++ b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/core/identity.py
@@ -0,0 +1,30 @@
+from keras.src import tree
+from keras.src.api_export import keras_export
+from keras.src.backend import KerasTensor
+from keras.src.layers.layer import Layer
+
+
+@keras_export("keras.layers.Identity")
+class Identity(Layer):
+    """Identity layer.
+
+    This layer should be used as a placeholder when no operation is to be
+    performed. The layer just returns its `inputs` argument as output.
+    """
+
+    def __init__(self, **kwargs):
+        super().__init__(**kwargs)
+        self.supports_masking = True
+        self.built = True
+
+    def call(self, inputs):
+        return inputs
+
+    def compute_output_shape(self, input_shape):
+        return input_shape
+
+    def compute_output_spec(self, inputs):
+        return tree.map_structure(
+            lambda x: KerasTensor(x.shape, dtype=x.dtype, sparse=x.sparse),
+            inputs,
+        )
diff --git a/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/core/input_layer.py b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/core/input_layer.py
new file mode 100644
index 0000000000000000000000000000000000000000..8a45178456c969d03ed6e64f0d982dbe7444856e
--- /dev/null
+++ b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/core/input_layer.py
@@ -0,0 +1,201 @@
+import warnings
+
+from keras.src import backend
+from keras.src.api_export import keras_export
+from keras.src.layers.layer import Layer
+from keras.src.ops.node import Node
+
+
+@keras_export("keras.layers.InputLayer")
+class InputLayer(Layer):
+    def __init__(
+        self,
+        shape=None,
+        batch_size=None,
+        dtype=None,
+        sparse=None,
+        batch_shape=None,
+        input_tensor=None,
+        optional=False,
+        name=None,
+        **kwargs,
+    ):
+        # TODO: support for ragged.
+        super().__init__(name=name)
+
+        if "input_shape" in kwargs:
+            warnings.warn(
+                "Argument `input_shape` is deprecated. Use `shape` instead."
+            )
+            shape = kwargs.pop("input_shape")
+        if "batch_input_shape" in kwargs:
+            batch_shape = kwargs.pop("batch_input_shape")
+
+        if input_tensor is not None:
+            if not isinstance(input_tensor, backend.KerasTensor):
+                raise ValueError(
+                    "Argument `input_tensor` must be a KerasTensor. "
+                    f"Received invalid type: input_tensor={input_tensor} "
+                    f"(of type {type(input_tensor)})"
+                )
+            if batch_size is not None:
+                if (
+                    len(input_tensor.shape) < 1
+                    or input_tensor.shape[0] != batch_size
+                ):
+                    raise ValueError(
+                        "When providing the `input_tensor` argument, you "
+                        "cannot provide an incompatible `batch_size` argument."
+                    )
+            if shape is not None:
+                if (
+                    len(shape) != len(input_tensor.shape) - 1
+                    or shape != input_tensor.shape[1:]
+                ):
+                    raise ValueError(
+                        "When providing the `input_tensor` argument, you "
+                        "cannot provide an incompatible `shape` argument."
+                    )
+            if batch_shape is not None and batch_shape != input_tensor.shape:
+                raise ValueError(
+                    "When providing the `input_tensor` argument, you "
+                    "cannot provide an incompatible `batch_shape` argument."
+                )
+            if dtype is not None and input_tensor.dtype != dtype:
+                raise ValueError(
+                    "When providing the `input_tensor` argument, you "
+                    "cannot provide an incompatible `dtype` argument."
+                )
+            if sparse is not None and input_tensor.sparse != sparse:
+                raise ValueError(
+                    "When providing the `input_tensor` argument, you "
+                    "cannot provide an incompatible `sparse` argument."
+                )
+            batch_shape = input_tensor.shape
+            dtype = input_tensor.dtype
+            sparse = input_tensor.sparse
+        else:
+            if shape is not None and batch_shape is not None:
+                raise ValueError(
+                    "You cannot pass both `shape` and `batch_shape` at the "
+                    "same time."
+                )
+            if batch_size is not None and batch_shape is not None:
+                raise ValueError(
+                    "You cannot pass both `batch_size` and `batch_shape` "
+                    "at the same time."
+                )
+            if shape is None and batch_shape is None:
+                raise ValueError("You must pass a `shape` argument.")
+
+            if shape is not None:
+                shape = backend.standardize_shape(shape)
+                batch_shape = (batch_size,) + shape
+
+        self._batch_shape = backend.standardize_shape(batch_shape)
+        self._dtype = backend.standardize_dtype(dtype)
+        self.sparse = bool(sparse)
+        if self.sparse and not backend.SUPPORTS_SPARSE_TENSORS:
+            raise ValueError(
+                "`sparse=True` is not supported with backend: "
+                f"{backend.backend()}"
+            )
+        if input_tensor is None:
+            input_tensor = backend.KerasTensor(
+                shape=batch_shape, dtype=dtype, sparse=sparse, name=name
+            )
+        self._input_tensor = input_tensor
+        Node(operation=self, call_args=(), call_kwargs={}, outputs=input_tensor)
+        self.built = True
+        self.optional = optional
+
+    def call(self):
+        return
+
+    @property
+    def batch_shape(self):
+        return self._batch_shape
+
+    @property
+    def dtype(self):
+        return self._dtype
+
+    def get_config(self):
+        return {
+            "batch_shape": self.batch_shape,
+            "dtype": self.dtype,
+            "sparse": self.sparse,
+            "name": self.name,
+        }
+
+
+@keras_export(["keras.layers.Input", "keras.Input"])
+def Input(
+    shape=None,
+    batch_size=None,
+    dtype=None,
+    sparse=None,
+    batch_shape=None,
+    name=None,
+    tensor=None,
+    optional=False,
+):
+    """Used to instantiate a Keras tensor.
+
+    A Keras tensor is a symbolic tensor-like object, which we augment with
+    certain attributes that allow us to build a Keras model just by knowing the
+    inputs and outputs of the model.
+
+    For instance, if `a`, `b` and `c` are Keras tensors,
+    it becomes possible to do:
+    `model = Model(input=[a, b], output=c)`
+
+    Args:
+        shape: A shape tuple (tuple of integers or `None` objects),
+            not including the batch size.
+            For instance, `shape=(32,)` indicates that the expected input
+            will be batches of 32-dimensional vectors. Elements of this tuple
+            can be `None`; `None` elements represent dimensions where the shape
+            is not known and may vary (e.g. sequence length).
+        batch_size: Optional static batch size (integer).
+        dtype: The data type expected by the input, as a string
+            (e.g. `"float32"`, `"int32"`...)
+        sparse: A boolean specifying whether the expected input will be sparse
+            tensors. Note that, if `sparse` is `False`, sparse tensors can still
+            be passed into the input - they will be densified with a default
+            value of 0. This feature is only supported with the TensorFlow
+            backend. Defaults to `False`.
+        batch_shape: Optional shape tuple (tuple of integers or `None` objects),
+            including the batch size.
+        name: Optional name string for the layer.
+            Should be unique in a model (do not reuse the same name twice).
+            It will be autogenerated if it isn't provided.
+        tensor: Optional existing tensor to wrap into the `Input` layer.
+            If set, the layer will use this tensor rather
+            than creating a new placeholder tensor.
+        optional: Boolean, whether the input is optional or not.
+            An optional input can accept `None` values.
+
+    Returns:
+      A Keras tensor.
+
+    Example:
+
+    ```python
+    # This is a logistic regression in Keras
+    x = Input(shape=(32,))
+    y = Dense(16, activation='softmax')(x)
+    model = Model(x, y)
+    ```
+    """
+    layer = InputLayer(
+        shape=shape,
+        batch_size=batch_size,
+        dtype=dtype,
+        sparse=sparse,
+        batch_shape=batch_shape,
+        name=name,
+        input_tensor=tensor,
+        optional=optional,
+    )
+    return layer.output
diff --git a/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/core/lambda_layer.py b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/core/lambda_layer.py
new file mode 100644
index 0000000000000000000000000000000000000000..11d5f15f0f9ee92ccd3591c15a0356857e960108
--- /dev/null
+++ b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/core/lambda_layer.py
@@ -0,0 +1,231 @@
+import inspect
+import types
+
+from keras.src import backend
+from keras.src import tree
+from keras.src.api_export import keras_export
+from keras.src.layers.layer import Layer
+from keras.src.saving import serialization_lib
+from keras.src.utils import python_utils
+
+
+@keras_export("keras.layers.Lambda")
+class Lambda(Layer):
+    """Wraps arbitrary expressions as a `Layer` object.
+
+    The `Lambda` layer exists so that arbitrary expressions can be used
+    as a `Layer` when constructing Sequential
+    and Functional API models. `Lambda` layers are best suited for simple
+    operations or quick experimentation. For more advanced use cases,
+    prefer writing new subclasses of `Layer`.
+
+    WARNING: `Lambda` layers have (de)serialization limitations!
+
+    The main reason to subclass `Layer` instead of using a
+    `Lambda` layer is saving and inspecting a model. `Lambda` layers
+    are saved by serializing the Python bytecode, which is fundamentally
+    non-portable and potentially unsafe.
+    They should only be loaded in the same environment where
+    they were saved. Subclassed layers can be saved in a more portable way
+    by overriding their `get_config()` method. Models that rely on
+    subclassed Layers are also often easier to visualize and reason about.
+
+    Example:
+
+    ```python
+    # add a x -> x^2 layer
+    model.add(Lambda(lambda x: x ** 2))
+    ```
+
+    Args:
+        function: The function to be evaluated. Takes input tensor as first
+            argument.
+        output_shape: Expected output shape from function. This argument
+            can usually be inferred if not explicitly provided.
+            Can be a tuple or function. If a tuple, it only specifies
+            the first dimension onward; sample dimension is assumed
+            either the same as the input:
+            `output_shape = (input_shape[0], ) + output_shape` or,
+            the input is `None` and the sample dimension is also `None`:
+            `output_shape = (None, ) + output_shape`.
+            If a function, it specifies the
+            entire shape as a function of the input shape:
+            `output_shape = f(input_shape)`.
+        mask: Either None (indicating no masking) or a callable with the same
+            signature as the `compute_mask` layer method, or a tensor
+            that will be returned as output mask regardless
+            of what the input is.
+        arguments: Optional dictionary of keyword arguments to be passed to the
+            function.
+    """
+
+    def __init__(
+        self, function, output_shape=None, mask=None, arguments=None, **kwargs
+    ):
+        super().__init__(**kwargs)
+
+        self.arguments = arguments or {}
+        self.function = function
+
+        if mask is not None:
+            self.supports_masking = True
+        else:
+            self.supports_masking = False
+        self.mask = mask
+        self._output_shape = output_shape
+
+        # Warning on every invocation will be quite irksome in Eager mode.
+        self._already_warned = False
+
+        function_args = inspect.getfullargspec(function).args
+        self._fn_expects_training_arg = "training" in function_args
+        self._fn_expects_mask_arg = "mask" in function_args
+
+    def compute_output_shape(self, input_shape):
+        if self._output_shape is None:
+            # Leverage backend shape inference
+            try:
+                inputs = tree.map_shape_structure(
+                    lambda x: backend.KerasTensor(x, dtype=self.compute_dtype),
+                    input_shape,
+                )
+                output_spec = backend.compute_output_spec(self.call, inputs)
+                return tree.map_structure(lambda x: x.shape, output_spec)
+            except:
+                raise NotImplementedError(
+                    "We could not automatically infer the shape of "
+                    "the Lambda's output. Please specify the `output_shape` "
+                    "argument for this Lambda layer."
+                )
+
+        if callable(self._output_shape):
+            return self._output_shape(input_shape)
+
+        # Output shapes are passed directly and don't include batch dimension.
+        batch_size = tree.flatten(input_shape)[0]
+
+        def _add_batch(shape):
+            return (batch_size,) + shape
+
+        return tree.map_shape_structure(_add_batch, self._output_shape)
+
+    def call(self, inputs, mask=None, training=None):
+        # We must copy for thread safety,
+        # but it only needs to be a shallow copy.
+        kwargs = {k: v for k, v in self.arguments.items()}
+        if self._fn_expects_mask_arg:
+            kwargs["mask"] = mask
+        if self._fn_expects_training_arg:
+            kwargs["training"] = training
+        return self.function(inputs, **kwargs)
+
+    def compute_mask(self, inputs, mask=None):
+        if callable(self.mask):
+            return self.mask(inputs, mask)
+        return self.mask
+
+    def get_config(self):
+        config = {
+            "function": self._serialize_function_to_config(self.function),
+        }
+        if self._output_shape is not None:
+            if callable(self._output_shape):
+                output_shape = self._serialize_function_to_config(
+                    self._output_shape
+                )
+            else:
+                output_shape = self._output_shape
+            config["output_shape"] = output_shape
+        if self.mask is not None:
+            if callable(self.mask):
+                mask = self._serialize_function_to_config(self.mask)
+            else:
+                mask = serialization_lib.serialize_keras_object(self.mask)
+            config["mask"] = mask
+        config["arguments"] = serialization_lib.serialize_keras_object(
+            self.arguments
+        )
+        base_config = super().get_config()
+        return {**base_config, **config}
+
+    def _serialize_function_to_config(self, fn):
+        if isinstance(fn, types.LambdaType) and fn.__name__ == "<lambda>":
+            code, defaults, closure = python_utils.func_dump(fn)
+            return {
+                "class_name": "__lambda__",
+                "config": {
+                    "code": code,
+                    "defaults": defaults,
+                    "closure": closure,
+                },
+            }
+        elif callable(fn):
+            return serialization_lib.serialize_keras_object(fn)
+        raise ValueError(
+            "Invalid input type for serialization. "
+            f"Received: {fn} of type {type(fn)}."
+        )
+
+    @staticmethod
+    def _raise_for_lambda_deserialization(arg_name, safe_mode):
+        if safe_mode:
+            raise ValueError(
+                "The `{arg_name}` of this `Lambda` layer is a Python lambda. "
+                "Deserializing it is unsafe. If you trust the source of the "
+                "config artifact, you can override this error "
+                "by passing `safe_mode=False` "
+                "to `from_config()`, or calling "
+                "`keras.config.enable_unsafe_deserialization()."
+            )
+
+    @classmethod
+    def from_config(cls, config, custom_objects=None, safe_mode=None):
+        safe_mode = safe_mode or serialization_lib.in_safe_mode()
+        fn_config = config["function"]
+        if (
+            isinstance(fn_config, dict)
+            and "class_name" in fn_config
+            and fn_config["class_name"] == "__lambda__"
+        ):
+            cls._raise_for_lambda_deserialization("function", safe_mode)
+            inner_config = fn_config["config"]
+            fn = python_utils.func_load(
+                inner_config["code"],
+                defaults=inner_config["defaults"],
+                closure=inner_config["closure"],
+            )
+            config["function"] = fn
+        else:
+            config["function"] = serialization_lib.deserialize_keras_object(
+                fn_config, custom_objects=custom_objects
+            )
+        if "output_shape" in config:
+            fn_config = config["output_shape"]
+            if (
+                isinstance(fn_config, dict)
+                and "class_name" in fn_config
+                and fn_config["class_name"] == "__lambda__"
+            ):
+                cls._raise_for_lambda_deserialization("function", safe_mode)
+                inner_config = fn_config["config"]
+                fn = python_utils.func_load(
+                    inner_config["code"],
+                    defaults=inner_config["defaults"],
+                    closure=inner_config["closure"],
+                )
+                config["output_shape"] = fn
+            else:
+                output_shape = serialization_lib.deserialize_keras_object(
+                    fn_config, custom_objects=custom_objects
+                )
+                if isinstance(output_shape, list) and all(
+                    isinstance(e, (int, type(None))) for e in output_shape
+                ):
+                    output_shape = tuple(output_shape)
+                config["output_shape"] = output_shape
+
+        if "arguments" in config:
+            config["arguments"] = serialization_lib.deserialize_keras_object(
+                config["arguments"], custom_objects=custom_objects
+            )
+        return cls(**config)
diff --git a/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/core/masking.py b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/core/masking.py
new file mode 100644
index 0000000000000000000000000000000000000000..64483aefb149425cf4335230e1dd0a396cf1422c
--- /dev/null
+++ b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/core/masking.py
@@ -0,0 +1,71 @@
+from keras.src import backend
+from keras.src import ops
+from keras.src.api_export import keras_export
+from keras.src.layers.layer import Layer
+
+
+@keras_export("keras.layers.Masking")
+class Masking(Layer):
+    """Masks a sequence by using a mask value to skip timesteps.
+
+    For each timestep in the input tensor (dimension #1 in the tensor),
+    if all values in the input tensor at that timestep
+    are equal to `mask_value`, then the timestep will be masked (skipped)
+    in all downstream layers (as long as they support masking).
+
+    If any downstream layer does not support masking yet receives such
+    an input mask, an exception will be raised.
+
+    Example:
+
+    Consider a NumPy data array `x` of shape `(samples, timesteps, features)`,
+    to be fed to an LSTM layer. You want to mask timestep #3 and #5 because you
+    lack data for these timesteps. You can:
+
+    - Set `x[:, 3, :] = 0.` and `x[:, 5, :] = 0.`
+    - Insert a `Masking` layer with `mask_value=0.` before the LSTM layer:
+
+    ```python
+    samples, timesteps, features = 32, 10, 8
+    inputs = np.random.random([samples, timesteps, features]).astype(np.float32)
+    inputs[:, 3, :] = 0.
+    inputs[:, 5, :] = 0.
+
+    model = keras.models.Sequential()
+    model.add(keras.layers.Masking(mask_value=0.0))
+    model.add(keras.layers.LSTM(32))
+    output = model(inputs)
+    # The time step 3 and 5 will be skipped from LSTM calculation.
+    ```
+
+    Note: in the Keras masking convention, a masked timestep is denoted by
+    a mask value of `False`, while a non-masked (i.e. usable) timestep
+    is denoted by a mask value of `True`.
+    """
+
+    def __init__(self, mask_value=0.0, **kwargs):
+        super().__init__(**kwargs)
+        self.mask_value = mask_value
+        self.supports_masking = True
+        self.built = True
+
+    def compute_mask(self, inputs, mask=None):
+        return ops.any(ops.not_equal(inputs, self.mask_value), axis=-1)
+
+    def call(self, inputs):
+        boolean_mask = ops.any(
+            ops.not_equal(inputs, self.mask_value), axis=-1, keepdims=True
+        )
+        # Set masked outputs to 0
+        outputs = inputs * backend.cast(boolean_mask, dtype=inputs.dtype)
+        # Compute the mask and outputs simultaneously.
+        backend.set_keras_mask(outputs, mask=ops.squeeze(boolean_mask, axis=-1))
+        return outputs
+
+    def compute_output_shape(self, input_shape):
+        return input_shape
+
+    def get_config(self):
+        base_config = super().get_config()
+        config = {"mask_value": self.mask_value}
+        return {**base_config, **config}
diff --git a/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/core/wrapper.py b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/core/wrapper.py
new file mode 100644
index 0000000000000000000000000000000000000000..ee98a70a0291013bab6d5c04963a03ea749ecf6b
--- /dev/null
+++ b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/core/wrapper.py
@@ -0,0 +1,47 @@
+from keras.src.api_export import keras_export
+from keras.src.layers.layer import Layer
+from keras.src.saving import serialization_lib
+
+
+@keras_export("keras.layers.Wrapper")
+class Wrapper(Layer):
+    """Abstract wrapper base class.
+
+    Wrappers take another layer and augment it in various ways.
+    Do not use this class as a layer, it is only an abstract base class.
+    Two usable wrappers are the `TimeDistributed` and `Bidirectional` layers.
+
+    Args:
+        layer: The layer to be wrapped.
+    """
+
+    def __init__(self, layer, **kwargs):
+        try:
+            assert isinstance(layer, Layer)
+        except Exception:
+            raise ValueError(
+                f"Layer {layer} supplied to Wrapper isn't "
+                "a supported layer type. Please "
+                "ensure wrapped layer is a valid Keras layer."
+            )
+        super().__init__(**kwargs)
+        self.layer = layer
+
+    def build(self, input_shape=None):
+        if not self.layer.built:
+            self.layer.build(input_shape)
+            self.layer.built = True
+        self.built = True
+
+    def get_config(self):
+        config = {"layer": serialization_lib.serialize_keras_object(self.layer)}
+        base_config = super().get_config()
+        return {**base_config, **config}
+
+    @classmethod
+    def from_config(cls, config, custom_objects=None):
+        layer = serialization_lib.deserialize_keras_object(
+            config.pop("layer"),
+            custom_objects=custom_objects,
+        )
+        return cls(layer, **config)
diff --git a/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/merging/__pycache__/__init__.cpython-310.pyc b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/merging/__pycache__/__init__.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..99203b2164365e9a0fc245ed560999f269cb4b1d
Binary files /dev/null and b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/merging/__pycache__/__init__.cpython-310.pyc differ
diff --git a/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/merging/__pycache__/add.cpython-310.pyc b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/merging/__pycache__/add.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..c61851633a8cbb127f57742e5e99b1304949f753
Binary files /dev/null and b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/merging/__pycache__/add.cpython-310.pyc differ
diff --git a/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/merging/__pycache__/average.cpython-310.pyc b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/merging/__pycache__/average.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..c6808a6c25410eaa70a23f6e6712ad199542f44c
Binary files /dev/null and b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/merging/__pycache__/average.cpython-310.pyc differ
diff --git a/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/merging/__pycache__/base_merge.cpython-310.pyc b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/merging/__pycache__/base_merge.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..9ba0e8a17d0a89fa05e012077f81d6c9e27c5d18
Binary files /dev/null and b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/merging/__pycache__/base_merge.cpython-310.pyc differ
diff --git a/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/merging/__pycache__/concatenate.cpython-310.pyc b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/merging/__pycache__/concatenate.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..fa24a4f945f8e72380d790bb67a9b16d6d5ad1e0
Binary files /dev/null and b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/merging/__pycache__/concatenate.cpython-310.pyc differ
diff --git a/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/merging/__pycache__/dot.cpython-310.pyc b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/merging/__pycache__/dot.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..5c82ee704f03c80fd539b66852ac9f9f69ddecd0
Binary files /dev/null and b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/merging/__pycache__/dot.cpython-310.pyc differ
diff --git a/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/merging/__pycache__/maximum.cpython-310.pyc b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/merging/__pycache__/maximum.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..64ea7f1c0bfae3444b75925b69df8631a7c82428
Binary files /dev/null and b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/merging/__pycache__/maximum.cpython-310.pyc differ
diff --git a/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/merging/__pycache__/minimum.cpython-310.pyc b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/merging/__pycache__/minimum.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..ccd06d36b94f3af38912cb6de47f482ec2d685b0
Binary files /dev/null and b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/merging/__pycache__/minimum.cpython-310.pyc differ
diff --git a/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/merging/__pycache__/multiply.cpython-310.pyc b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/merging/__pycache__/multiply.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..501d49f1c8d053e198c97fbbef69ef74371ded39
Binary files /dev/null and b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/merging/__pycache__/multiply.cpython-310.pyc differ
diff --git a/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/merging/__pycache__/subtract.cpython-310.pyc b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/merging/__pycache__/subtract.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..657a07572b3da5e0aef163b6abe8f1c30b89f453
Binary files /dev/null and b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/keras/src/layers/merging/__pycache__/subtract.cpython-310.pyc differ