Spaces:
Sleeping
Sleeping
| # Copyright 2023 The TensorFlow Authors. All Rights Reserved. | |
| # | |
| # Licensed under the Apache License, Version 2.0 (the "License"); | |
| # you may not use this file except in compliance with the License. | |
| # You may obtain a copy of the License at | |
| # | |
| # http://www.apache.org/licenses/LICENSE-2.0 | |
| # | |
| # Unless required by applicable law or agreed to in writing, software | |
| # distributed under the License is distributed on an "AS IS" BASIS, | |
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
| # See the License for the specific language governing permissions and | |
| # limitations under the License. | |
| """Tests for optimizer_factory.py.""" | |
| from absl.testing import parameterized | |
| import numpy as np | |
| import tensorflow as tf, tf_keras | |
| from official.modeling.optimization import optimizer_factory | |
| from official.modeling.optimization.configs import optimization_config | |
| class OptimizerFactoryTest(tf.test.TestCase, parameterized.TestCase): | |
| def test_optimizers(self, optimizer_type): | |
| params = { | |
| 'optimizer': { | |
| 'type': optimizer_type | |
| }, | |
| 'learning_rate': { | |
| 'type': 'constant', | |
| 'constant': { | |
| 'learning_rate': 0.1 | |
| } | |
| } | |
| } | |
| optimizer_cls = optimizer_factory.LEGACY_OPTIMIZERS_CLS[optimizer_type] | |
| expected_optimizer_config = optimizer_cls().get_config() | |
| expected_optimizer_config['learning_rate'] = 0.1 | |
| opt_config = optimization_config.OptimizationConfig(params) | |
| opt_factory = optimizer_factory.OptimizerFactory(opt_config) | |
| lr = opt_factory.build_learning_rate() | |
| optimizer = opt_factory.build_optimizer(lr, postprocessor=lambda x: x) | |
| self.assertIsInstance(optimizer, optimizer_cls) | |
| self.assertEqual(expected_optimizer_config, optimizer.get_config()) | |
| def test_new_optimizers(self, optimizer_type): | |
| params = { | |
| 'optimizer': { | |
| 'type': optimizer_type | |
| }, | |
| 'learning_rate': { | |
| 'type': 'constant', | |
| 'constant': { | |
| 'learning_rate': 0.1 | |
| } | |
| } | |
| } | |
| optimizer_cls = optimizer_factory.NEW_OPTIMIZERS_CLS[optimizer_type] | |
| expected_optimizer_config = optimizer_cls().get_config() | |
| expected_optimizer_config['learning_rate'] = 0.1 | |
| opt_config = optimization_config.OptimizationConfig(params) | |
| if optimizer_type == 'sgd': | |
| # Delete unsupported arg `decay` from SGDConfig. | |
| delattr(opt_config.optimizer.sgd, 'decay') | |
| opt_factory = optimizer_factory.OptimizerFactory(opt_config) | |
| lr = opt_factory.build_learning_rate() | |
| optimizer = opt_factory.build_optimizer( | |
| lr, postprocessor=lambda x: x, use_legacy_optimizer=False) | |
| self.assertIsInstance(optimizer, optimizer_cls) | |
| self.assertEqual(expected_optimizer_config, optimizer.get_config()) | |
| def test_gradient_aggregator(self): | |
| params = { | |
| 'optimizer': { | |
| 'type': 'adam', | |
| }, | |
| 'learning_rate': { | |
| 'type': 'constant', | |
| 'constant': { | |
| 'learning_rate': 1.0 | |
| } | |
| } | |
| } | |
| opt_config = optimization_config.OptimizationConfig(params) | |
| opt_factory = optimizer_factory.OptimizerFactory(opt_config) | |
| lr = opt_factory.build_learning_rate() | |
| # Dummy function to zero out gradients. | |
| zero_grads = lambda gv: [(tf.zeros_like(g), v) for g, v in gv] | |
| optimizer = opt_factory.build_optimizer(lr, gradient_aggregator=zero_grads) | |
| if isinstance(optimizer, tf_keras.optimizers.experimental.Optimizer): | |
| self.skipTest('New Keras optimizer does not support ' | |
| '`gradient_aggregator` arg.') | |
| var0 = tf.Variable([1.0, 2.0]) | |
| var1 = tf.Variable([3.0, 4.0]) | |
| grads0 = tf.constant([1.0, 1.0]) | |
| grads1 = tf.constant([1.0, 1.0]) | |
| grads_and_vars = list(zip([grads0, grads1], [var0, var1])) | |
| optimizer.apply_gradients(grads_and_vars) | |
| self.assertAllClose(np.array([1.0, 2.0]), var0.numpy()) | |
| self.assertAllClose(np.array([3.0, 4.0]), var1.numpy()) | |
| def test_gradient_clipping(self, clipnorm, clipvalue): | |
| params = { | |
| 'optimizer': { | |
| 'type': 'sgd', | |
| 'sgd': { | |
| 'clipnorm': clipnorm, | |
| 'clipvalue': clipvalue | |
| } | |
| }, | |
| 'learning_rate': { | |
| 'type': 'constant', | |
| 'constant': { | |
| 'learning_rate': 1.0 | |
| } | |
| } | |
| } | |
| opt_config = optimization_config.OptimizationConfig(params) | |
| opt_factory = optimizer_factory.OptimizerFactory(opt_config) | |
| lr = opt_factory.build_learning_rate() | |
| optimizer = opt_factory.build_optimizer(lr) | |
| var0 = tf.Variable([1.0, 2.0]) | |
| var1 = tf.Variable([3.0, 4.0]) | |
| grads0 = tf.constant([0.1, 0.1]) | |
| grads1 = tf.constant([2.0, 3.0]) | |
| grads_and_vars = list(zip([grads0, grads1], [var0, var1])) | |
| optimizer.apply_gradients(grads_and_vars) | |
| self.assertAllClose(np.array([0.9, 1.9]), var0.numpy()) | |
| if clipvalue is not None: | |
| self.assertAllClose(np.array([2.0, 3.0]), var1.numpy()) | |
| elif clipnorm is not None: | |
| self.assertAllClose(np.array([2.4452999, 3.1679497]), var1.numpy()) | |
| else: | |
| self.assertAllClose(np.array([1.0, 1.0]), var1.numpy()) | |
| def test_missing_types(self): | |
| params = {'optimizer': {'type': 'sgd', 'sgd': {'momentum': 0.9}}} | |
| with self.assertRaises(ValueError): | |
| optimizer_factory.OptimizerFactory( | |
| optimization_config.OptimizationConfig(params)) | |
| params = { | |
| 'learning_rate': { | |
| 'type': 'stepwise', | |
| 'stepwise': { | |
| 'boundaries': [10000, 20000], | |
| 'values': [0.1, 0.01, 0.001] | |
| } | |
| } | |
| } | |
| with self.assertRaises(ValueError): | |
| optimizer_factory.OptimizerFactory( | |
| optimization_config.OptimizationConfig(params)) | |
| def test_wrong_return_type(self): | |
| optimizer_type = 'sgd' | |
| params = { | |
| 'optimizer': { | |
| 'type': optimizer_type | |
| }, | |
| 'learning_rate': { | |
| 'type': 'constant', | |
| 'constant': { | |
| 'learning_rate': 0.1 | |
| } | |
| } | |
| } | |
| opt_config = optimization_config.OptimizationConfig(params) | |
| opt_factory = optimizer_factory.OptimizerFactory(opt_config) | |
| with self.assertRaises(TypeError): | |
| _ = opt_factory.build_optimizer(0.1, postprocessor=lambda x: None) | |
| # TODO(b/187559334) refactor lr_schedule tests into `lr_schedule_test.py`. | |
| def test_stepwise_lr_schedule(self): | |
| params = { | |
| 'optimizer': { | |
| 'type': 'sgd', | |
| 'sgd': { | |
| 'momentum': 0.9 | |
| } | |
| }, | |
| 'learning_rate': { | |
| 'type': 'stepwise', | |
| 'stepwise': { | |
| 'boundaries': [10000, 20000], | |
| 'values': [0.1, 0.01, 0.001] | |
| } | |
| } | |
| } | |
| expected_lr_step_values = [[0, 0.1], [5000, 0.1], [10000, 0.1], | |
| [10001, 0.01], [20000, 0.01], [20001, 0.001]] | |
| opt_config = optimization_config.OptimizationConfig(params) | |
| opt_factory = optimizer_factory.OptimizerFactory(opt_config) | |
| lr = opt_factory.build_learning_rate() | |
| for step, value in expected_lr_step_values: | |
| self.assertAlmostEqual(lr(step).numpy(), value) | |
| def test_stepwise_lr_with_warmup_schedule(self): | |
| params = { | |
| 'optimizer': { | |
| 'type': 'sgd', | |
| 'sgd': { | |
| 'momentum': 0.9 | |
| } | |
| }, | |
| 'learning_rate': { | |
| 'type': 'stepwise', | |
| 'stepwise': { | |
| 'boundaries': [10000, 20000], | |
| 'values': [0.1, 0.01, 0.001] | |
| } | |
| }, | |
| 'warmup': { | |
| 'type': 'linear', | |
| 'linear': { | |
| 'warmup_steps': 500, | |
| 'warmup_learning_rate': 0.01 | |
| } | |
| } | |
| } | |
| expected_lr_step_values = [[0, 0.01], [250, 0.055], [500, 0.1], [5500, 0.1], | |
| [10000, 0.1], [10001, 0.01], [20000, 0.01], | |
| [20001, 0.001]] | |
| opt_config = optimization_config.OptimizationConfig(params) | |
| opt_factory = optimizer_factory.OptimizerFactory(opt_config) | |
| lr = opt_factory.build_learning_rate() | |
| for step, value in expected_lr_step_values: | |
| self.assertAlmostEqual(lr(step).numpy(), value) | |
| def test_exponential_lr_schedule(self): | |
| params = { | |
| 'optimizer': { | |
| 'type': 'sgd', | |
| 'sgd': { | |
| 'momentum': 0.9 | |
| } | |
| }, | |
| 'learning_rate': { | |
| 'type': 'exponential', | |
| 'exponential': { | |
| 'initial_learning_rate': 0.1, | |
| 'decay_steps': 1000, | |
| 'decay_rate': 0.96, | |
| 'staircase': True | |
| } | |
| } | |
| } | |
| expected_lr_step_values = [ | |
| [0, 0.1], | |
| [999, 0.1], | |
| [1000, 0.096], | |
| [1999, 0.096], | |
| [2000, 0.09216], | |
| ] | |
| opt_config = optimization_config.OptimizationConfig(params) | |
| opt_factory = optimizer_factory.OptimizerFactory(opt_config) | |
| lr = opt_factory.build_learning_rate() | |
| for step, value in expected_lr_step_values: | |
| self.assertAlmostEqual(lr(step).numpy(), value) | |
| def test_polynomial_lr_schedule(self): | |
| params = { | |
| 'optimizer': { | |
| 'type': 'sgd', | |
| 'sgd': { | |
| 'momentum': 0.9 | |
| } | |
| }, | |
| 'learning_rate': { | |
| 'type': 'polynomial', | |
| 'polynomial': { | |
| 'initial_learning_rate': 0.1, | |
| 'decay_steps': 1000, | |
| 'end_learning_rate': 0.001 | |
| } | |
| } | |
| } | |
| expected_lr_step_values = [[0, 0.1], [500, 0.0505], [1000, 0.001]] | |
| opt_config = optimization_config.OptimizationConfig(params) | |
| opt_factory = optimizer_factory.OptimizerFactory(opt_config) | |
| lr = opt_factory.build_learning_rate() | |
| for step, value in expected_lr_step_values: | |
| self.assertAlmostEqual(lr(step).numpy(), value) | |
| def test_cosine_lr_schedule(self): | |
| params = { | |
| 'optimizer': { | |
| 'type': 'sgd', | |
| 'sgd': { | |
| 'momentum': 0.9 | |
| } | |
| }, | |
| 'learning_rate': { | |
| 'type': 'cosine', | |
| 'cosine': { | |
| 'initial_learning_rate': 0.1, | |
| 'decay_steps': 1000 | |
| } | |
| } | |
| } | |
| expected_lr_step_values = [[0, 0.1], [250, 0.08535534], [500, 0.04999999], | |
| [750, 0.01464466], [1000, 0]] | |
| opt_config = optimization_config.OptimizationConfig(params) | |
| opt_factory = optimizer_factory.OptimizerFactory(opt_config) | |
| lr = opt_factory.build_learning_rate() | |
| for step, value in expected_lr_step_values: | |
| self.assertAlmostEqual(lr(step).numpy(), value) | |
| def test_constant_lr_with_warmup_schedule(self): | |
| params = { | |
| 'optimizer': { | |
| 'type': 'sgd', | |
| 'sgd': { | |
| 'momentum': 0.9 | |
| } | |
| }, | |
| 'learning_rate': { | |
| 'type': 'constant', | |
| 'constant': { | |
| 'learning_rate': 0.1 | |
| } | |
| }, | |
| 'warmup': { | |
| 'type': 'linear', | |
| 'linear': { | |
| 'warmup_steps': 500, | |
| 'warmup_learning_rate': 0.01 | |
| } | |
| } | |
| } | |
| expected_lr_step_values = [[0, 0.01], [250, 0.055], [500, 0.1], [5000, 0.1], | |
| [10000, 0.1], [20000, 0.1]] | |
| opt_config = optimization_config.OptimizationConfig(params) | |
| opt_factory = optimizer_factory.OptimizerFactory(opt_config) | |
| lr = opt_factory.build_learning_rate() | |
| for step, value in expected_lr_step_values: | |
| self.assertAlmostEqual(lr(step).numpy(), value) | |
| def test_stepwise_lr_with_polynomial_warmup_schedule(self): | |
| params = { | |
| 'optimizer': { | |
| 'type': 'sgd', | |
| 'sgd': { | |
| 'momentum': 0.9 | |
| } | |
| }, | |
| 'learning_rate': { | |
| 'type': 'stepwise', | |
| 'stepwise': { | |
| 'boundaries': [10000, 20000], | |
| 'values': [0.1, 0.01, 0.001] | |
| } | |
| }, | |
| 'warmup': { | |
| 'type': 'polynomial', | |
| 'polynomial': { | |
| 'warmup_steps': 500, | |
| 'power': 2. | |
| } | |
| } | |
| } | |
| expected_lr_step_values = [[0, 0.0], [250, 0.025], [500, 0.1], [5500, 0.1], | |
| [10000, 0.1], [10001, 0.01], [20000, 0.01], | |
| [20001, 0.001]] | |
| opt_config = optimization_config.OptimizationConfig(params) | |
| opt_factory = optimizer_factory.OptimizerFactory(opt_config) | |
| lr = opt_factory.build_learning_rate() | |
| for step, value in expected_lr_step_values: | |
| self.assertAlmostEqual(lr(step).numpy(), value, places=6) | |
| def test_power_lr_schedule(self): | |
| params = { | |
| 'optimizer': { | |
| 'type': 'sgd', | |
| 'sgd': { | |
| 'momentum': 0.9 | |
| } | |
| }, | |
| 'learning_rate': { | |
| 'type': 'power', | |
| 'power': { | |
| 'initial_learning_rate': 1.0, | |
| 'power': -1.0 | |
| } | |
| } | |
| } | |
| expected_lr_step_values = [[0, 1.0], [1, 1.0], [250, 1. / 250.]] | |
| opt_config = optimization_config.OptimizationConfig(params) | |
| opt_factory = optimizer_factory.OptimizerFactory(opt_config) | |
| lr = opt_factory.build_learning_rate() | |
| for step, value in expected_lr_step_values: | |
| self.assertAlmostEqual(lr(step).numpy(), value) | |
| def test_power_linear_lr_schedule(self): | |
| params = { | |
| 'optimizer': { | |
| 'type': 'sgd', | |
| 'sgd': { | |
| 'momentum': 0.9 | |
| } | |
| }, | |
| 'learning_rate': { | |
| 'type': 'power_linear', | |
| 'power_linear': { | |
| 'initial_learning_rate': 1.0, | |
| 'power': -1.0, | |
| 'linear_decay_fraction': 0.5, | |
| 'total_decay_steps': 100, | |
| 'offset': 0, | |
| } | |
| } | |
| } | |
| expected_lr_step_values = [[0, 1.0], [1, 1.0], [40, 1. / 40.], | |
| [60, 1. / 60. * 0.8]] | |
| opt_config = optimization_config.OptimizationConfig(params) | |
| opt_factory = optimizer_factory.OptimizerFactory(opt_config) | |
| lr = opt_factory.build_learning_rate() | |
| for step, value in expected_lr_step_values: | |
| self.assertAlmostEqual(lr(step).numpy(), value) | |
| def test_power_with_offset_lr_schedule(self): | |
| params = { | |
| 'optimizer': { | |
| 'type': 'sgd', | |
| 'sgd': { | |
| 'momentum': 0.9 | |
| } | |
| }, | |
| 'learning_rate': { | |
| 'type': 'power_with_offset', | |
| 'power_with_offset': { | |
| 'initial_learning_rate': 1.0, | |
| 'power': -1.0, | |
| 'offset': 10, | |
| 'pre_offset_learning_rate': 3.0, | |
| } | |
| } | |
| } | |
| expected_lr_step_values = [[1, 3.0], [10, 3.0], [20, 1. / 10.]] | |
| opt_config = optimization_config.OptimizationConfig(params) | |
| opt_factory = optimizer_factory.OptimizerFactory(opt_config) | |
| lr = opt_factory.build_learning_rate() | |
| for step, value in expected_lr_step_values: | |
| self.assertAlmostEqual(lr(step).numpy(), value) | |
| def test_step_cosine_lr_schedule_with_warmup(self): | |
| params = { | |
| 'optimizer': { | |
| 'type': 'sgd', | |
| 'sgd': { | |
| 'momentum': 0.9 | |
| } | |
| }, | |
| 'learning_rate': { | |
| 'type': 'step_cosine_with_offset', | |
| 'step_cosine_with_offset': { | |
| 'values': (0.0001, 0.00005), | |
| 'boundaries': (0, 500000), | |
| 'offset': 10000, | |
| } | |
| }, | |
| 'warmup': { | |
| 'type': 'linear', | |
| 'linear': { | |
| 'warmup_steps': 10000, | |
| 'warmup_learning_rate': 0.0 | |
| } | |
| } | |
| } | |
| expected_lr_step_values = [[0, 0.0], [5000, 1e-4 / 2.0], [10000, 1e-4], | |
| [20000, 9.994863e-05], [499999, 5e-05]] | |
| opt_config = optimization_config.OptimizationConfig(params) | |
| opt_factory = optimizer_factory.OptimizerFactory(opt_config) | |
| lr = opt_factory.build_learning_rate() | |
| for step, value in expected_lr_step_values: | |
| self.assertAlmostEqual(lr(step).numpy(), value) | |
| class OptimizerFactoryRegistryTest(tf.test.TestCase): | |
| def test_registry(self): | |
| class MyClass(): | |
| pass | |
| optimizer_factory.register_optimizer_cls('test', MyClass) | |
| self.assertIn('test', optimizer_factory.LEGACY_OPTIMIZERS_CLS) | |
| with self.assertRaisesRegex(ValueError, 'test already registered.*'): | |
| optimizer_factory.register_optimizer_cls('test', MyClass) | |
| if __name__ == '__main__': | |
| tf.test.main() | |