ASL-MoViNet-T5-translator

Sleeping

App Files Files Community

ASL-MoViNet-T5-translator / official /modeling /optimization /optimizer_factory.py

deanna-emery

updates

93528c6 about 2 years ago

raw

history blame contribute delete

10.5 kB

	# Copyright 2023 The TensorFlow Authors. All Rights Reserved.
	#
	# Licensed under the Apache License, Version 2.0 (the "License");
	# you may not use this file except in compliance with the License.
	# You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	# See the License for the specific language governing permissions and
	# limitations under the License.

	"""Optimizer factory class."""
	from typing import Callable, List, Optional, Tuple, Union

	import gin
	import tensorflow as tf, tf_keras

	from official.modeling.optimization import slide_optimizer
	from official.modeling.optimization import adafactor_optimizer
	from official.modeling.optimization import ema_optimizer
	from official.modeling.optimization import lamb
	from official.modeling.optimization import lars
	from official.modeling.optimization import legacy_adamw
	from official.modeling.optimization import lr_schedule
	from official.modeling.optimization.configs import optimization_config as opt_cfg

	# Optimizer CLS to be used in both legacy and new path.
	SHARED_OPTIMIZERS = {
	'sgd_experimental': tf_keras.optimizers.experimental.SGD,
	'adam_experimental': tf_keras.optimizers.experimental.Adam,
	'adamw': legacy_adamw.AdamWeightDecay,
	'adamw_experimental': tf_keras.optimizers.experimental.AdamW,
	'lamb': lamb.LAMB,
	'lars': lars.LARS,
	'slide': slide_optimizer.SLIDE,
	'adafactor': adafactor_optimizer.Adafactor,
	'adafactor_keras': tf_keras.optimizers.Adafactor,
	}

	LEGACY_OPTIMIZERS_CLS = {
	'sgd': tf_keras.optimizers.legacy.SGD,
	'adam': tf_keras.optimizers.legacy.Adam,
	'rmsprop': tf_keras.optimizers.legacy.RMSprop,
	'adagrad': tf_keras.optimizers.legacy.Adagrad,
	}
	LEGACY_OPTIMIZERS_CLS.update(SHARED_OPTIMIZERS)

	NEW_OPTIMIZERS_CLS = {
	'sgd': tf_keras.optimizers.experimental.SGD,
	'adam': tf_keras.optimizers.experimental.Adam,
	'rmsprop': tf_keras.optimizers.experimental.RMSprop,
	'adagrad': tf_keras.optimizers.experimental.Adagrad,
	}
	NEW_OPTIMIZERS_CLS.update(SHARED_OPTIMIZERS)

	LR_CLS = {
	'stepwise': lr_schedule.PiecewiseConstantDecayWithOffset,
	'polynomial': lr_schedule.PolynomialDecayWithOffset,
	'exponential': lr_schedule.ExponentialDecayWithOffset,
	'cosine': lr_schedule.CosineDecayWithOffset,
	'power': lr_schedule.DirectPowerDecay,
	'power_linear': lr_schedule.PowerAndLinearDecay,
	'power_with_offset': lr_schedule.PowerDecayWithOffset,
	'step_cosine_with_offset': lr_schedule.StepCosineDecayWithOffset,
	}

	WARMUP_CLS = {
	'linear': lr_schedule.LinearWarmup,
	'polynomial': lr_schedule.PolynomialWarmUp
	}


	def register_optimizer_cls(key: str,
	optimizer_config_cls: Union[
	tf_keras.optimizers.Optimizer,
	tf_keras.optimizers.legacy.Optimizer,
	tf_keras.optimizers.experimental.Optimizer
	],
	use_legacy_optimizer: bool = True):
	"""Register customize optimizer cls.

	The user will still need to subclass data classes in
	configs.optimization_config to be used with OptimizerFactory.

	Args:
	key: A string to that the optimizer_config_cls is registered with.
	optimizer_config_cls: A class which inherits tf_keras.optimizers.Optimizer.
	use_legacy_optimizer: A boolean that indicates if using legacy optimizers.
	"""
	if use_legacy_optimizer:
	if key in LEGACY_OPTIMIZERS_CLS:
	raise ValueError('%s already registered in LEGACY_OPTIMIZERS_CLS.' % key)
	LEGACY_OPTIMIZERS_CLS[key] = optimizer_config_cls
	else:
	if key in NEW_OPTIMIZERS_CLS:
	raise ValueError('%s already registered in NEW_OPTIMIZERS_CLS.' % key)
	NEW_OPTIMIZERS_CLS[key] = optimizer_config_cls


	class OptimizerFactory:
	"""Optimizer factory class.

	This class builds learning rate and optimizer based on an optimization config.
	To use this class, you need to do the following:
	(1) Define optimization config, this includes optimizer, and learning rate
	schedule.
	(2) Initialize the class using the optimization config.
	(3) Build learning rate.
	(4) Build optimizer.

	This is a typical example for using this class:

	```
	params = {
	'optimizer': {
	'type': 'sgd',
	'sgd': {'momentum': 0.9}
	},
	'learning_rate': {
	'type': 'stepwise',
	'stepwise': {'boundaries': [10000, 20000],
	'values': [0.1, 0.01, 0.001]}
	},
	'warmup': {
	'type': 'linear',
	'linear': {'warmup_steps': 500, 'warmup_learning_rate': 0.01}
	}
	}
	opt_config = OptimizationConfig(params)
	opt_factory = OptimizerFactory(opt_config)
	lr = opt_factory.build_learning_rate()
	optimizer = opt_factory.build_optimizer(lr)
	```
	"""

	def __init__(self, config: opt_cfg.OptimizationConfig):
	"""Initializing OptimizerFactory.

	Args:
	config: OptimizationConfig instance contain optimization config.
	"""
	self._config = config
	self._optimizer_config = config.optimizer.get()
	self._optimizer_type = config.optimizer.type

	self._use_ema = config.ema is not None
	self._ema_config = config.ema

	if self._optimizer_config is None:
	raise ValueError('Optimizer type must be specified')

	self._lr_config = config.learning_rate.get()
	self._lr_type = config.learning_rate.type

	if self._lr_type is None:
	raise ValueError('Learning rate type must be specified')

	self._warmup_config = config.warmup.get()
	self._warmup_type = config.warmup.type

	def build_learning_rate(self):
	"""Build learning rate.

	Builds learning rate from config. Learning rate schedule is built according
	to the learning rate config. If learning rate type is consant,
	lr_config.learning_rate is returned.

	Returns:
	tf_keras.optimizers.schedules.LearningRateSchedule instance. If
	learning rate type is consant, lr_config.learning_rate is returned.
	"""
	if self._lr_type == 'constant':
	lr = self._lr_config.learning_rate
	else:
	lr = LR_CLS[self._lr_type](**self._lr_config.as_dict())

	if self._warmup_config:
	lr = WARMUP_CLS[self._warmup_type](lr, **self._warmup_config.as_dict())

	return lr

	@gin.configurable
	def build_optimizer(
	self,
	lr: Union[tf_keras.optimizers.schedules.LearningRateSchedule, float],
	gradient_aggregator: Optional[Callable[
	[List[Tuple[tf.Tensor, tf.Tensor]]], List[Tuple[tf.Tensor,
	tf.Tensor]]]] = None,
	gradient_transformers: Optional[List[Callable[
	[List[Tuple[tf.Tensor, tf.Tensor]]], List[Tuple[tf.Tensor,
	tf.Tensor]]]]] = None,
	postprocessor: Optional[Callable[[tf_keras.optimizers.Optimizer],
	tf_keras.optimizers.Optimizer]] = None,
	use_legacy_optimizer: bool = True):
	"""Build optimizer.

	Builds optimizer from config. It takes learning rate as input, and builds
	the optimizer according to the optimizer config. Typically, the learning
	rate built using self.build_lr() is passed as an argument to this method.

	Args:
	lr: A floating point value, or a
	tf_keras.optimizers.schedules.LearningRateSchedule instance.
	gradient_aggregator: Optional function to overwrite gradient aggregation.
	gradient_transformers: Optional list of functions to use to transform
	gradients before applying updates to Variables. The functions are
	applied after gradient_aggregator. The functions should accept and
	return a list of (gradient, variable) tuples. clipvalue, clipnorm,
	global_clipnorm should not be set when gradient_transformers is passed.
	postprocessor: An optional function for postprocessing the optimizer. It
	takes an optimizer and returns an optimizer.
	use_legacy_optimizer: A boolean that indicates if using legacy optimizers.

	Returns:
	`tf_keras.optimizers.legacy.Optimizer` or
	`tf_keras.optimizers.experimental.Optimizer` instance.
	"""

	optimizer_dict = self._optimizer_config.as_dict()
	## Delete clipnorm, clipvalue, global_clipnorm if None
	if optimizer_dict['clipnorm'] is None:
	del optimizer_dict['clipnorm']
	if optimizer_dict['clipvalue'] is None:
	del optimizer_dict['clipvalue']
	if optimizer_dict['global_clipnorm'] is None:
	del optimizer_dict['global_clipnorm']

	optimizer_dict['learning_rate'] = lr
	if gradient_aggregator is not None:
	optimizer_dict['gradient_aggregator'] = gradient_aggregator
	if gradient_transformers is not None:
	optimizer_dict['gradient_transformers'] = gradient_transformers

	if use_legacy_optimizer:
	optimizer = LEGACY_OPTIMIZERS_CLS[self._optimizer_type](**optimizer_dict)
	else:
	if 'decay' in optimizer_dict:
	raise ValueError(
	'`decay` is deprecated in new Keras optimizer, please reflect the '
	'decay logic in `lr` or set `use_legacy_optimizer=True` to use the '
	'legacy optimizer.')
	optimizer = NEW_OPTIMIZERS_CLS[self._optimizer_type](**optimizer_dict)

	if self._use_ema:
	if not use_legacy_optimizer:
	raise ValueError(
	'EMA can only work with the legacy optimizer, please set '
	'`use_legacy_optimizer=True`.')
	optimizer = ema_optimizer.ExponentialMovingAverage(
	optimizer, **self._ema_config.as_dict())
	if postprocessor:
	optimizer = postprocessor(optimizer)
	if isinstance(optimizer, tf_keras.optimizers.Optimizer):
	return optimizer
	# The following check makes sure the function won't break in older TF
	# version because of missing the experimental/legacy package.
	if hasattr(tf_keras.optimizers, 'experimental'):
	if isinstance(optimizer, tf_keras.optimizers.experimental.Optimizer):
	return optimizer
	if hasattr(tf_keras.optimizers, 'legacy'):
	if isinstance(optimizer, tf_keras.optimizers.legacy.Optimizer):
	return optimizer
	raise TypeError('OptimizerFactory.build_optimizer returning a '
	'non-optimizer object: {}'.format(optimizer))