Spaces:
Runtime error
Runtime error
| # Copyright 2015 The TensorFlow Authors. All Rights Reserved. | |
| # | |
| # Licensed under the Apache License, Version 2.0 (the "License"); | |
| # you may not use this file except in compliance with the License. | |
| # You may obtain a copy of the License at | |
| # | |
| # http://www.apache.org/licenses/LICENSE-2.0 | |
| # | |
| # Unless required by applicable law or agreed to in writing, software | |
| # distributed under the License is distributed on an "AS IS" BASIS, | |
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
| # See the License for the specific language governing permissions and | |
| # limitations under the License. | |
| # ============================================================================= | |
| """Contains the normalization layer classes and their functional aliases. | |
| """ | |
| from __future__ import absolute_import | |
| from __future__ import division | |
| from __future__ import print_function | |
| from tensorflow.python.keras import layers as keras_layers | |
| from tensorflow.python.layers import base | |
| from tensorflow.python.ops import init_ops | |
| from tensorflow.python.util.tf_export import tf_export | |
| class BatchNormalization(keras_layers.BatchNormalization, base.Layer): | |
| """Batch Normalization layer from http://arxiv.org/abs/1502.03167. | |
| "Batch Normalization: Accelerating Deep Network Training by Reducing | |
| Internal Covariate Shift" | |
| Sergey Ioffe, Christian Szegedy | |
| Arguments: | |
| axis: An `int` or list of `int`, the axis or axes that should be | |
| normalized, typically the features axis/axes. For instance, after a | |
| `Conv2D` layer with `data_format="channels_first"`, set `axis=1`. If a | |
| list of axes is provided, each axis in `axis` will be normalized | |
| simultaneously. Default is `-1` which uses the last axis. Note: when | |
| using multi-axis batch norm, the `beta`, `gamma`, `moving_mean`, and | |
| `moving_variance` variables are the same rank as the input Tensor, with | |
| dimension size 1 in all reduced (non-axis) dimensions). | |
| momentum: Momentum for the moving average. | |
| epsilon: Small float added to variance to avoid dividing by zero. | |
| center: If True, add offset of `beta` to normalized tensor. If False, `beta` | |
| is ignored. | |
| scale: If True, multiply by `gamma`. If False, `gamma` is | |
| not used. When the next layer is linear (also e.g. `nn.relu`), this can be | |
| disabled since the scaling can be done by the next layer. | |
| beta_initializer: Initializer for the beta weight. | |
| gamma_initializer: Initializer for the gamma weight. | |
| moving_mean_initializer: Initializer for the moving mean. | |
| moving_variance_initializer: Initializer for the moving variance. | |
| beta_regularizer: Optional regularizer for the beta weight. | |
| gamma_regularizer: Optional regularizer for the gamma weight. | |
| beta_constraint: An optional projection function to be applied to the `beta` | |
| weight after being updated by an `Optimizer` (e.g. used to implement | |
| norm constraints or value constraints for layer weights). The function | |
| must take as input the unprojected variable and must return the | |
| projected variable (which must have the same shape). Constraints are | |
| not safe to use when doing asynchronous distributed training. | |
| gamma_constraint: An optional projection function to be applied to the | |
| `gamma` weight after being updated by an `Optimizer`. | |
| renorm: Whether to use Batch Renormalization | |
| (https://arxiv.org/abs/1702.03275). This adds extra variables during | |
| training. The inference is the same for either value of this parameter. | |
| renorm_clipping: A dictionary that may map keys 'rmax', 'rmin', 'dmax' to | |
| scalar `Tensors` used to clip the renorm correction. The correction | |
| `(r, d)` is used as `corrected_value = normalized_value * r + d`, with | |
| `r` clipped to [rmin, rmax], and `d` to [-dmax, dmax]. Missing rmax, rmin, | |
| dmax are set to inf, 0, inf, respectively. | |
| renorm_momentum: Momentum used to update the moving means and standard | |
| deviations with renorm. Unlike `momentum`, this affects training | |
| and should be neither too small (which would add noise) nor too large | |
| (which would give stale estimates). Note that `momentum` is still applied | |
| to get the means and variances for inference. | |
| fused: if `None` or `True`, use a faster, fused implementation if possible. | |
| If `False`, use the system recommended implementation. | |
| trainable: Boolean, if `True` also add variables to the graph collection | |
| `GraphKeys.TRAINABLE_VARIABLES` (see tf.Variable). | |
| virtual_batch_size: An `int`. By default, `virtual_batch_size` is `None`, | |
| which means batch normalization is performed across the whole batch. When | |
| `virtual_batch_size` is not `None`, instead perform "Ghost Batch | |
| Normalization", which creates virtual sub-batches which are each | |
| normalized separately (with shared gamma, beta, and moving statistics). | |
| Must divide the actual batch size during execution. | |
| adjustment: A function taking the `Tensor` containing the (dynamic) shape of | |
| the input tensor and returning a pair (scale, bias) to apply to the | |
| normalized values (before gamma and beta), only during training. For | |
| example, if axis==-1, | |
| `adjustment = lambda shape: ( | |
| tf.random_uniform(shape[-1:], 0.93, 1.07), | |
| tf.random_uniform(shape[-1:], -0.1, 0.1))` | |
| will scale the normalized value by up to 7% up or down, then shift the | |
| result by up to 0.1 (with independent scaling and bias for each feature | |
| but shared across all examples), and finally apply gamma and/or beta. If | |
| `None`, no adjustment is applied. Cannot be specified if | |
| virtual_batch_size is specified. | |
| name: A string, the name of the layer. | |
| """ | |
| def __init__(self, | |
| axis=-1, | |
| momentum=0.99, | |
| epsilon=1e-3, | |
| center=True, | |
| scale=True, | |
| beta_initializer=init_ops.zeros_initializer(), | |
| gamma_initializer=init_ops.ones_initializer(), | |
| moving_mean_initializer=init_ops.zeros_initializer(), | |
| moving_variance_initializer=init_ops.ones_initializer(), | |
| beta_regularizer=None, | |
| gamma_regularizer=None, | |
| beta_constraint=None, | |
| gamma_constraint=None, | |
| renorm=False, | |
| renorm_clipping=None, | |
| renorm_momentum=0.99, | |
| fused=None, | |
| trainable=True, | |
| virtual_batch_size=None, | |
| adjustment=None, | |
| name=None, | |
| **kwargs): | |
| super(BatchNormalization, self).__init__( | |
| axis=axis, | |
| momentum=momentum, | |
| epsilon=epsilon, | |
| center=center, | |
| scale=scale, | |
| beta_initializer=beta_initializer, | |
| gamma_initializer=gamma_initializer, | |
| moving_mean_initializer=moving_mean_initializer, | |
| moving_variance_initializer=moving_variance_initializer, | |
| beta_regularizer=beta_regularizer, | |
| gamma_regularizer=gamma_regularizer, | |
| beta_constraint=beta_constraint, | |
| gamma_constraint=gamma_constraint, | |
| renorm=renorm, | |
| renorm_clipping=renorm_clipping, | |
| renorm_momentum=renorm_momentum, | |
| fused=fused, | |
| trainable=trainable, | |
| virtual_batch_size=virtual_batch_size, | |
| adjustment=adjustment, | |
| name=name, | |
| **kwargs) | |
| def call(self, inputs, training=False): | |
| return super(BatchNormalization, self).call(inputs, training=training) | |
| def batch_normalization(inputs, | |
| axis=-1, | |
| momentum=0.99, | |
| epsilon=1e-3, | |
| center=True, | |
| scale=True, | |
| beta_initializer=init_ops.zeros_initializer(), | |
| gamma_initializer=init_ops.ones_initializer(), | |
| moving_mean_initializer=init_ops.zeros_initializer(), | |
| moving_variance_initializer=init_ops.ones_initializer(), | |
| beta_regularizer=None, | |
| gamma_regularizer=None, | |
| beta_constraint=None, | |
| gamma_constraint=None, | |
| training=False, | |
| trainable=True, | |
| name=None, | |
| reuse=None, | |
| renorm=False, | |
| renorm_clipping=None, | |
| renorm_momentum=0.99, | |
| fused=None, | |
| virtual_batch_size=None, | |
| adjustment=None): | |
| """Functional interface for the batch normalization layer. | |
| Reference: http://arxiv.org/abs/1502.03167 | |
| "Batch Normalization: Accelerating Deep Network Training by Reducing | |
| Internal Covariate Shift" | |
| Sergey Ioffe, Christian Szegedy | |
| Note: when training, the moving_mean and moving_variance need to be updated. | |
| By default the update ops are placed in `tf.GraphKeys.UPDATE_OPS`, so they | |
| need to be added as a dependency to the `train_op`. Also, be sure to add | |
| any batch_normalization ops before getting the update_ops collection. | |
| Otherwise, update_ops will be empty, and training/inference will not work | |
| properly. For example: | |
| ```python | |
| x_norm = tf.layers.batch_normalization(x, training=training) | |
| # ... | |
| update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) | |
| with tf.control_dependencies(update_ops): | |
| train_op = optimizer.minimize(loss) | |
| ``` | |
| Arguments: | |
| inputs: Tensor input. | |
| axis: An `int`, the axis that should be normalized (typically the features | |
| axis). For instance, after a `Convolution2D` layer with | |
| `data_format="channels_first"`, set `axis=1` in `BatchNormalization`. | |
| momentum: Momentum for the moving average. | |
| epsilon: Small float added to variance to avoid dividing by zero. | |
| center: If True, add offset of `beta` to normalized tensor. If False, `beta` | |
| is ignored. | |
| scale: If True, multiply by `gamma`. If False, `gamma` is | |
| not used. When the next layer is linear (also e.g. `nn.relu`), this can be | |
| disabled since the scaling can be done by the next layer. | |
| beta_initializer: Initializer for the beta weight. | |
| gamma_initializer: Initializer for the gamma weight. | |
| moving_mean_initializer: Initializer for the moving mean. | |
| moving_variance_initializer: Initializer for the moving variance. | |
| beta_regularizer: Optional regularizer for the beta weight. | |
| gamma_regularizer: Optional regularizer for the gamma weight. | |
| beta_constraint: An optional projection function to be applied to the `beta` | |
| weight after being updated by an `Optimizer` (e.g. used to implement | |
| norm constraints or value constraints for layer weights). The function | |
| must take as input the unprojected variable and must return the | |
| projected variable (which must have the same shape). Constraints are | |
| not safe to use when doing asynchronous distributed training. | |
| gamma_constraint: An optional projection function to be applied to the | |
| `gamma` weight after being updated by an `Optimizer`. | |
| training: Either a Python boolean, or a TensorFlow boolean scalar tensor | |
| (e.g. a placeholder). Whether to return the output in training mode | |
| (normalized with statistics of the current batch) or in inference mode | |
| (normalized with moving statistics). **NOTE**: make sure to set this | |
| parameter correctly, or else your training/inference will not work | |
| properly. | |
| trainable: Boolean, if `True` also add variables to the graph collection | |
| `GraphKeys.TRAINABLE_VARIABLES` (see tf.Variable). | |
| name: String, the name of the layer. | |
| reuse: Boolean, whether to reuse the weights of a previous layer | |
| by the same name. | |
| renorm: Whether to use Batch Renormalization | |
| (https://arxiv.org/abs/1702.03275). This adds extra variables during | |
| training. The inference is the same for either value of this parameter. | |
| renorm_clipping: A dictionary that may map keys 'rmax', 'rmin', 'dmax' to | |
| scalar `Tensors` used to clip the renorm correction. The correction | |
| `(r, d)` is used as `corrected_value = normalized_value * r + d`, with | |
| `r` clipped to [rmin, rmax], and `d` to [-dmax, dmax]. Missing rmax, rmin, | |
| dmax are set to inf, 0, inf, respectively. | |
| renorm_momentum: Momentum used to update the moving means and standard | |
| deviations with renorm. Unlike `momentum`, this affects training | |
| and should be neither too small (which would add noise) nor too large | |
| (which would give stale estimates). Note that `momentum` is still applied | |
| to get the means and variances for inference. | |
| fused: if `None` or `True`, use a faster, fused implementation if possible. | |
| If `False`, use the system recommended implementation. | |
| virtual_batch_size: An `int`. By default, `virtual_batch_size` is `None`, | |
| which means batch normalization is performed across the whole batch. When | |
| `virtual_batch_size` is not `None`, instead perform "Ghost Batch | |
| Normalization", which creates virtual sub-batches which are each | |
| normalized separately (with shared gamma, beta, and moving statistics). | |
| Must divide the actual batch size during execution. | |
| adjustment: A function taking the `Tensor` containing the (dynamic) shape of | |
| the input tensor and returning a pair (scale, bias) to apply to the | |
| normalized values (before gamma and beta), only during training. For | |
| example, if axis==-1, | |
| `adjustment = lambda shape: ( | |
| tf.random_uniform(shape[-1:], 0.93, 1.07), | |
| tf.random_uniform(shape[-1:], -0.1, 0.1))` | |
| will scale the normalized value by up to 7% up or down, then shift the | |
| result by up to 0.1 (with independent scaling and bias for each feature | |
| but shared across all examples), and finally apply gamma and/or beta. If | |
| `None`, no adjustment is applied. Cannot be specified if | |
| virtual_batch_size is specified. | |
| Returns: | |
| Output tensor. | |
| Raises: | |
| ValueError: if eager execution is enabled. | |
| """ | |
| layer = BatchNormalization( | |
| axis=axis, | |
| momentum=momentum, | |
| epsilon=epsilon, | |
| center=center, | |
| scale=scale, | |
| beta_initializer=beta_initializer, | |
| gamma_initializer=gamma_initializer, | |
| moving_mean_initializer=moving_mean_initializer, | |
| moving_variance_initializer=moving_variance_initializer, | |
| beta_regularizer=beta_regularizer, | |
| gamma_regularizer=gamma_regularizer, | |
| beta_constraint=beta_constraint, | |
| gamma_constraint=gamma_constraint, | |
| renorm=renorm, | |
| renorm_clipping=renorm_clipping, | |
| renorm_momentum=renorm_momentum, | |
| fused=fused, | |
| trainable=trainable, | |
| virtual_batch_size=virtual_batch_size, | |
| adjustment=adjustment, | |
| name=name, | |
| _reuse=reuse, | |
| _scope=name) | |
| return layer.apply(inputs, training=training) | |
| # Aliases | |
| BatchNorm = BatchNormalization | |
| batch_norm = batch_normalization | |