| | import numpy as np |
| |
|
| | from keras.src import backend |
| | from keras.src.api_export import keras_export |
| | from keras.src.backend import config |
| | from keras.src.backend.common import dtypes |
| | from keras.src.backend.common import global_state |
| | from keras.src.backend.common.name_scope import current_path |
| | from keras.src.backend.common.stateless_scope import get_stateless_scope |
| | from keras.src.backend.common.stateless_scope import in_stateless_scope |
| | from keras.src.utils.module_utils import tensorflow as tf |
| | from keras.src.utils.naming import auto_name |
| |
|
| |
|
| | class Variable: |
| | """Represents a backend-agnostic variable in Keras. |
| | |
| | A `Variable` acts as a container for state. It holds a tensor value and can |
| | be updated. With the JAX backend, variables are used to implement |
| | "functionalization", the pattern of lifting stateful operations out of |
| | a piece of computation to turn it into a stateless function. |
| | |
| | Args: |
| | initializer: Initial value or callable for initialization. |
| | If a callable is used, it should take the arguments |
| | `shape` and `dtype`. |
| | shape: Optional. Tuple for the variable's shape. |
| | Required if `initializer` is a callable. |
| | dtype: Optional. Data type of the variable. Defaults to the global float |
| | dtype type (`"float32"` if never configured). |
| | trainable: Optional. Boolean indicating if variable is trainable. |
| | Defaults to `True`. |
| | autocast: Optional. Boolean indicating whether the variable supports |
| | autocasting. If `True`, the layer may first convert the variable |
| | to the compute data type when accessed. Defaults to `True`. |
| | aggregation: Optional string, one of `None`, `"none"`, `"mean"`, |
| | `"sum"` or `"only_first_replica"` specifying how a distributed |
| | variable will be aggregated. This serves as a semantic annotation, |
| | to be taken into account by downstream backends or users. Defaults |
| | to `"none"`. |
| | name: Optional. A unique name for the variable. Automatically generated |
| | if not set. |
| | |
| | Attributes: |
| | shape: The shape of the variable (tuple of integers). |
| | ndim: The number of dimensions of the variable (integer). |
| | dtype: The data type of the variable (string). |
| | trainable: Whether the variable is trainable (boolean). |
| | autocast: Whether the variable supports autocasting (boolean). |
| | aggregation: How a distributed variable will be aggregated (string). |
| | value: The current value of the variable (NumPy array or tensor). |
| | name: The name of the variable (string). |
| | path: The path of the variable within the Keras model or layer (string). |
| | kwargs: Additional backend-specific keyword arguments. |
| | |
| | Examples: |
| | |
| | **Initializing a `Variable` with a NumPy array:** |
| | |
| | ```python |
| | import numpy as np |
| | import keras |
| | initial_array = np.ones((3, 3)) |
| | variable_from_array = keras.Variable(initializer=initial_array) |
| | ``` |
| | |
| | **Using a Keras initializer to create a `Variable`:** |
| | |
| | ```python |
| | from keras.src.initializers import Ones |
| | variable_from_initializer = keras.Variable( |
| | initializer=Ones(), shape=(3, 3), dtype="float32" |
| | ) |
| | ``` |
| | |
| | **Updating the value of a `Variable`:** |
| | |
| | ```python |
| | new_value = np.zeros((3, 3), dtype="float32") |
| | variable_from_array.assign(new_value) |
| | ``` |
| | |
| | **Marking a `Variable` as non-trainable:** |
| | |
| | ```python |
| | non_trainable_variable = keras.Variable( |
| | initializer=np.ones((3, 3), dtype="float32"), trainable=False |
| | ) |
| | ``` |
| | """ |
| |
|
| | def __init__( |
| | self, |
| | initializer, |
| | shape=None, |
| | dtype=None, |
| | trainable=True, |
| | autocast=True, |
| | aggregation="none", |
| | synchronization="auto", |
| | name=None, |
| | **kwargs, |
| | ): |
| | del kwargs |
| | name = name or auto_name(self.__class__.__name__) |
| | if not isinstance(name, str) or "/" in name: |
| | raise ValueError( |
| | "Argument `name` must be a string and " |
| | "cannot contain character `/`. " |
| | f"Received: name={name}" |
| | ) |
| | if aggregation not in ( |
| | None, |
| | "none", |
| | "mean", |
| | "sum", |
| | "only_first_replica", |
| | ): |
| | raise ValueError( |
| | "Invalid value for argument `aggregation`. Expected " |
| | "one of `None`, `'none'`, `'mean'`, `'sum'`, " |
| | "`'only_first_replica'`. " |
| | f"Received: aggregation={aggregation}" |
| | ) |
| | if aggregation is None: |
| | aggregation = "none" |
| | if synchronization not in ( |
| | None, |
| | "none", |
| | "on_read", |
| | "on_write", |
| | "auto", |
| | ): |
| | raise ValueError( |
| | "Invalid value for argument `synchronization`. Expected " |
| | "one of `None`, `'none'`, `'on_read'`, `'on_write'`, " |
| | "`'auto'`. " |
| | f"Received: synchronization={synchronization}" |
| | ) |
| | if synchronization is None: |
| | synchronization = "none" |
| | self._name = name |
| | parent_path = current_path() |
| | if parent_path: |
| | self._path = current_path() + "/" + name |
| | else: |
| | self._path = name |
| | self._shape = None |
| | self._initializer = None |
| | self._regularizer = None |
| | self._constraint = None |
| | self._trainable = bool(trainable) |
| | self._autocast = bool(autocast) |
| | self._aggregation = aggregation |
| | self._synchronization = synchronization |
| | |
| | |
| | |
| | self._overwrite_with_gradient = False |
| | if isinstance(initializer, str): |
| | from keras.src import initializers |
| |
|
| | initializer = initializers.get(initializer) |
| | if callable(initializer): |
| | if shape is None: |
| | raise ValueError( |
| | "When creating a Variable from an initializer, " |
| | "the `shape` argument should be specified. " |
| | f"Received: initializer={initializer} " |
| | f"and shape={shape}" |
| | ) |
| | else: |
| | initializer = self._convert_to_tensor(initializer, dtype=dtype) |
| | |
| | if dtype is None: |
| | dtype = initializer.dtype |
| | self._dtype = standardize_dtype(dtype) |
| |
|
| | if in_stateless_scope(): |
| | if callable(initializer): |
| | self._value = None |
| | self._initializer = initializer |
| | self._shape = self._validate_shape(shape) |
| | register_uninitialized_variable(self) |
| | else: |
| | raise ValueError( |
| | "You are attempting to create a variable " |
| | "while in a stateless scope. This is disallowed. " |
| | "Make sure that all variables are created " |
| | "before you start using your layer/model objects.\n\n" |
| | "In some cases, you might be seeing this error " |
| | "because you need to " |
| | "implement a `def build(self, input_shape)` method " |
| | "on your layer/model, which will " |
| | "create its variables.\n\n" |
| | "In some other cases, you might be seeing this error " |
| | "because you are instantiating a `Variable` and " |
| | "assigning it to a layer without going through " |
| | "self.add_variable()/self.add_weight(). Always prefer " |
| | "using these methods " |
| | "(with a `shape` and `initializer` argument)." |
| | ) |
| | else: |
| | if callable(initializer): |
| | self._shape = self._validate_shape(shape) |
| | self._initialize_with_initializer(initializer) |
| | else: |
| | self._initialize(initializer) |
| | self._shape = self._validate_shape(self._value.shape) |
| | self._ndim = len(self._shape) |
| |
|
| | def _deferred_initialize(self): |
| | if self._value is not None: |
| | raise ValueError(f"Variable {self.path} is already initialized.") |
| |
|
| | if in_stateless_scope(): |
| | raise ValueError( |
| | "You are attempting to initialize a variable " |
| | "while in a stateless scope. This is disallowed. " |
| | "Make sure that all variables are initialized " |
| | "before you start using your layer/model objects." |
| | ) |
| | self._initialize_with_initializer(self._initializer) |
| | self._initializer = None |
| |
|
| | def _validate_shape(self, shape): |
| | shape = standardize_shape(shape) |
| | if None in shape: |
| | raise ValueError( |
| | "Shapes used to initialize variables must be " |
| | "fully-defined (no `None` dimensions). Received: " |
| | f"shape={shape} for variable path='{self.path}'" |
| | ) |
| | return shape |
| |
|
| | def _maybe_autocast(self, value): |
| | autocast_scope = get_autocast_scope() |
| | if self._autocast and autocast_scope is not None: |
| | return autocast_scope.maybe_cast(value) |
| | return value |
| |
|
| | def numpy(self): |
| | return np.array(self) |
| |
|
| | @property |
| | def aggregation(self): |
| | """The strategy for aggregating this variable.""" |
| | return self._aggregation |
| |
|
| | @property |
| | def synchronization(self): |
| | """The strategy for synchronizing this variable.""" |
| | return self._synchronization |
| |
|
| | @property |
| | def value(self): |
| | """The current value of the variable (numpy array or backend tensor).""" |
| | if in_stateless_scope(): |
| | scope = get_stateless_scope() |
| | value = scope.get_current_value(self) |
| | if value is not None: |
| | return self._maybe_autocast(value) |
| | if self._value is None: |
| | |
| | |
| | |
| | |
| | return self._maybe_autocast( |
| | self._initializer(self._shape, dtype=self._dtype) |
| | ) |
| | return self._maybe_autocast(self._value) |
| |
|
| | def assign(self, value): |
| | value = self._convert_to_tensor(value, dtype=self.dtype) |
| | if not shape_equal(value.shape, self.shape): |
| | raise ValueError( |
| | "The shape of the target variable and " |
| | "the shape of the target value in " |
| | "`variable.assign(value)` must match. " |
| | f"variable.shape={self.value.shape}, " |
| | f"Received: value.shape={value.shape}. " |
| | f"Target variable: {self}" |
| | ) |
| | if in_stateless_scope(): |
| | scope = get_stateless_scope() |
| | scope.add_update((self, value)) |
| | else: |
| | self._direct_assign(value) |
| | return value |
| |
|
| | def assign_add(self, value): |
| | return self.assign(self + value) |
| |
|
| | def assign_sub(self, value): |
| | return self.assign(self - value) |
| |
|
| | @property |
| | def dtype(self): |
| | """The data type of the variable.""" |
| | autocast_scope = get_autocast_scope() |
| | if ( |
| | self._autocast |
| | and autocast_scope is not None |
| | and is_float_dtype(self._dtype) |
| | ): |
| | dtype = autocast_scope.dtype |
| | else: |
| | dtype = self._dtype |
| | return backend.standardize_dtype(dtype) |
| |
|
| | @property |
| | def shape(self): |
| | """The shape of the variable.""" |
| | return self._shape |
| |
|
| | @property |
| | def ndim(self): |
| | """The number of dimensions of the variable.""" |
| | return self._ndim |
| |
|
| | @property |
| | def trainable(self): |
| | """Whether the variable is trainable.""" |
| | return self._trainable |
| |
|
| | @trainable.setter |
| | def trainable(self, value): |
| | self._trainable = bool(value) |
| |
|
| | @property |
| | def name(self): |
| | """The name of the variable.""" |
| | return self._name |
| |
|
| | @property |
| | def path(self): |
| | """The path of the variable within the Keras model or layer.""" |
| | return self._path |
| |
|
| | @property |
| | def overwrite_with_gradient(self): |
| | """Whether this variable should be overwritten by the gradient. |
| | |
| | This property is designed for a special case where we want to overwrite |
| | the variable directly with its computed gradient. For example, in float8 |
| | training, new `scale` and `amax_history` are computed as gradients, and |
| | we want to overwrite them directly instead of following the typical |
| | procedure such as gradient descent with a learning rate, gradient |
| | clipping and weight decaying. |
| | """ |
| | return self._overwrite_with_gradient |
| |
|
| | @overwrite_with_gradient.setter |
| | def overwrite_with_gradient(self, value): |
| | if not isinstance(value, bool): |
| | raise TypeError( |
| | "`overwrite_with_gradient` must be a boolean. " |
| | f"Received: {value}" |
| | ) |
| | self._overwrite_with_gradient = value |
| |
|
| | @property |
| | def regularizer(self): |
| | return self._regularizer |
| |
|
| | @regularizer.setter |
| | def regularizer(self, value): |
| | from keras.src.regularizers import Regularizer |
| |
|
| | if value is not None and not isinstance(value, Regularizer): |
| | raise ValueError( |
| | "Invalid value for attribute `regularizer`. Expected an " |
| | "instance of `keras.regularizers.Regularizer`, or `None`. " |
| | f"Received: regularizer={value}" |
| | ) |
| | self._regularizer = value |
| |
|
| | @property |
| | def constraint(self): |
| | return self._constraint |
| |
|
| | @constraint.setter |
| | def constraint(self, value): |
| | from keras.src.constraints import Constraint |
| |
|
| | if value is not None and not isinstance(value, Constraint): |
| | raise ValueError( |
| | "Invalid value for attribute `constraint`. Expected an " |
| | "instance of `keras.constraints.Constraint`, or `None`. " |
| | f"Received: constraint={value}" |
| | ) |
| | self._constraint = value |
| |
|
| | def __repr__(self): |
| | value = None |
| | if hasattr(self, "_value") and self._value is not None: |
| | value = backend.core.convert_to_numpy(self._value) |
| | value_str = f", value={value}" if value is not None else "" |
| | return ( |
| | f"<Variable path={self.path}, shape={self.shape}, " |
| | f"dtype={self.dtype}{value_str}>" |
| | ) |
| |
|
| | def _initialize(self, value): |
| | raise NotImplementedError |
| |
|
| | def _initialize_with_initializer(self, initializer): |
| | value = self._convert_to_tensor( |
| | initializer(self._shape, dtype=self._dtype) |
| | ) |
| | self._initialize(value) |
| |
|
| | def _convert_to_tensor(self, value, dtype=None): |
| | raise NotImplementedError |
| |
|
| | def __getitem__(self, idx): |
| | return self.value.__getitem__(idx) |
| |
|
| | def __int__(self): |
| | if self.ndim > 0: |
| | raise TypeError( |
| | "Only scalar arrays can be converted to Python scalars. " |
| | f"Got: shape={self.shape}" |
| | ) |
| | return int(self.value) |
| |
|
| | def __float__(self): |
| | if self.ndim > 0: |
| | raise TypeError( |
| | "Only scalar arrays can be converted to Python scalars. " |
| | f"Got: shape={self.shape}" |
| | ) |
| | return float(self.value) |
| |
|
| | def __array__(self, dtype=None): |
| | |
| | |
| | |
| | |
| | return np.asarray(self.value.__array__(dtype)) |
| |
|
| | def __bool__(self): |
| | raise TypeError("A Keras Variable cannot be used as a boolean.") |
| |
|
| | def __neg__(self): |
| | return self.value.__neg__() |
| |
|
| | def __pos__(self): |
| | return self.value |
| |
|
| | def __abs__(self): |
| | return self.value.__abs__() |
| |
|
| | def __invert__(self): |
| | return self.value.__invert__() |
| |
|
| | def __eq__(self, other): |
| | return backend.numpy.equal(self.value, other) |
| |
|
| | def __ne__(self, other): |
| | return backend.numpy.not_equal(self.value, other) |
| |
|
| | def __lt__(self, other): |
| | return backend.numpy.less(self.value, other) |
| |
|
| | def __le__(self, other): |
| | return backend.numpy.less_equal(self.value, other) |
| |
|
| | def __gt__(self, other): |
| | return backend.numpy.greater(self.value, other) |
| |
|
| | def __ge__(self, other): |
| | return backend.numpy.greater_equal(self.value, other) |
| |
|
| | def __add__(self, other): |
| | return backend.numpy.add(self.value, other) |
| |
|
| | def __radd__(self, other): |
| | return backend.numpy.add(other, self.value) |
| |
|
| | def __sub__(self, other): |
| | return backend.numpy.subtract(self.value, other) |
| |
|
| | def __rsub__(self, other): |
| | return backend.numpy.subtract(other, self.value) |
| |
|
| | def __mul__(self, other): |
| | return backend.numpy.multiply(self.value, other) |
| |
|
| | def __rmul__(self, other): |
| | return backend.numpy.multiply(other, self.value) |
| |
|
| | def __truediv__(self, other): |
| | return backend.numpy.true_divide(self.value, other) |
| |
|
| | def __rtruediv__(self, other): |
| | return backend.numpy.true_divide(other, self.value) |
| |
|
| | def __floordiv__(self, other): |
| | return backend.numpy.floor_divide(self.value, other) |
| |
|
| | def __rfloordiv__(self, other): |
| | return backend.numpy.floor_divide(other, self.value) |
| |
|
| | def __mod__(self, other): |
| | return backend.numpy.mod(self.value, other) |
| |
|
| | def __rmod__(self, other): |
| | return backend.numpy.mod(other, self.value) |
| |
|
| | def __pow__(self, other): |
| | return backend.numpy.power(self.value, other) |
| |
|
| | def __rpow__(self, other): |
| | return backend.numpy.power(other, self.value) |
| |
|
| | def __matmul__(self, other): |
| | return backend.numpy.matmul(self.value, other) |
| |
|
| | def __rmatmul__(self, other): |
| | return backend.numpy.matmul(other, self.value) |
| |
|
| | def __and__(self, other): |
| | return backend.numpy.logical_and(self.value, other) |
| |
|
| | def __rand__(self, other): |
| | return backend.numpy.logical_and(other, self.value) |
| |
|
| | def __or__(self, other): |
| | return backend.numpy.logical_or(self.value, other) |
| |
|
| | def __ror__(self, other): |
| | return backend.numpy.logical_or(other, self.value) |
| |
|
| | def __xor__(self, other): |
| | return backend.numpy.logical_xor(self.value, other) |
| |
|
| | def __rxor__(self, other): |
| | return backend.numpy.logical_xor(other, self.value) |
| |
|
| | def __round__(self, ndigits=None): |
| | decimals = ndigits or 0 |
| | return backend.numpy.round(self.value, decimals=decimals) |
| |
|
| |
|
| | def register_uninitialized_variable(variable): |
| | uninitialized_variables = global_state.get_global_attribute( |
| | "uninitialized_variables", [], set_to_default=True |
| | ) |
| | uninitialized_variables.append(variable) |
| |
|
| |
|
| | def initialize_all_variables(): |
| | collection = global_state.get_global_attribute("uninitialized_variables") |
| | if collection: |
| | for v in collection: |
| | v._deferred_initialize() |
| | global_state.set_global_attribute("uninitialized_variables", []) |
| |
|
| |
|
| | @keras_export( |
| | ["keras.utils.standardize_dtype", "keras.backend.standardize_dtype"] |
| | ) |
| | def standardize_dtype(dtype): |
| | if dtype is None: |
| | return config.floatx() |
| | dtype = dtypes.PYTHON_DTYPES_MAP.get(dtype, dtype) |
| | if hasattr(dtype, "name"): |
| | dtype = dtype.name |
| | elif hasattr(dtype, "__name__"): |
| | dtype = dtype.__name__ |
| | elif hasattr(dtype, "__str__") and ( |
| | "torch" in str(dtype) or "jax.numpy" in str(dtype) |
| | ): |
| | dtype = str(dtype).split(".")[-1] |
| |
|
| | if dtype not in dtypes.ALLOWED_DTYPES: |
| | raise ValueError(f"Invalid dtype: {dtype}") |
| | return dtype |
| |
|
| |
|
| | def standardize_shape(shape): |
| | if not isinstance(shape, tuple): |
| | if shape is None: |
| | raise ValueError("Undefined shapes are not supported.") |
| | if not hasattr(shape, "__iter__"): |
| | raise ValueError(f"Cannot convert '{shape}' to a shape.") |
| | if config.backend() == "tensorflow": |
| | if isinstance(shape, tf.TensorShape): |
| | |
| | |
| | shape = shape.as_list() |
| | shape = tuple(shape) |
| |
|
| | if config.backend() == "torch": |
| | |
| | |
| | shape = tuple(map(lambda x: int(x) if x is not None else None, shape)) |
| |
|
| | for e in shape: |
| | if e is None: |
| | continue |
| | if config.backend() == "jax" and "_DimExpr" in str(type(e)): |
| | |
| | continue |
| | if not is_int_dtype(type(e)): |
| | raise ValueError( |
| | f"Cannot convert '{shape}' to a shape. " |
| | f"Found invalid entry '{e}' of type '{type(e)}'. " |
| | ) |
| | if e < 0: |
| | raise ValueError( |
| | f"Cannot convert '{shape}' to a shape. " |
| | "Negative dimensions are not allowed." |
| | ) |
| | return shape |
| |
|
| |
|
| | def shape_equal(a_shape, b_shape): |
| | """Return whether a_shape == b_shape (allows None entries).""" |
| | if len(a_shape) != len(b_shape): |
| | return False |
| | for e1, e2 in zip(a_shape, b_shape): |
| | if e1 is not None and e2 is not None and e1 != e2: |
| | return False |
| | return True |
| |
|
| |
|
| | @keras_export("keras.backend.is_float_dtype") |
| | def is_float_dtype(dtype): |
| | dtype = standardize_dtype(dtype) |
| | return dtype.startswith("float") or dtype.startswith("bfloat") |
| |
|
| |
|
| | @keras_export("keras.backend.is_int_dtype") |
| | def is_int_dtype(dtype): |
| | dtype = standardize_dtype(dtype) |
| | return dtype.startswith("int") or dtype.startswith("uint") |
| |
|
| |
|
| | def get_autocast_scope(): |
| | return global_state.get_global_attribute("autocast_scope") |
| |
|
| |
|
| | class AutocastScope: |
| | """Context manager that enables the autocasting of float variables. |
| | |
| | Under this context manager, float `Variables`s will be cast to `dtype` |
| | (note that `dtype` must also be float). |
| | """ |
| |
|
| | def __init__(self, dtype): |
| | if dtype is not None: |
| | dtype = standardize_dtype(dtype) |
| | if not is_float_dtype(dtype): |
| | raise ValueError( |
| | "`AutocastScope` can only be used with " |
| | "a floating-point target dtype, such as 'float16'. " |
| | f"Received: dtype={dtype}" |
| | ) |
| | self.dtype = dtype |
| | self.original_scope = None |
| |
|
| | def maybe_cast(self, value): |
| | from keras.src import backend |
| |
|
| | if self.dtype is not None and is_float_dtype(value.dtype): |
| | return backend.cast(value, dtype=self.dtype) |
| | return value |
| |
|
| | def __enter__(self): |
| | self.original_scope = get_autocast_scope() |
| | global_state.set_global_attribute("autocast_scope", self) |
| |
|
| | def __exit__(self, *args, **kwargs): |
| | global_state.set_global_attribute("autocast_scope", self.original_scope) |
| |
|