Spaces:
Build error
Build error
| # Copyright 2023 The TensorFlow Authors. All Rights Reserved. | |
| # | |
| # Licensed under the Apache License, Version 2.0 (the "License"); | |
| # you may not use this file except in compliance with the License. | |
| # You may obtain a copy of the License at | |
| # | |
| # http://www.apache.org/licenses/LICENSE-2.0 | |
| # | |
| # Unless required by applicable law or agreed to in writing, software | |
| # distributed under the License is distributed on an "AS IS" BASIS, | |
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
| # See the License for the specific language governing permissions and | |
| # limitations under the License. | |
| """Dataclasses for learning rate schedule config.""" | |
| from typing import List, Optional | |
| import dataclasses | |
| from official.modeling.hyperparams import base_config | |
| class ConstantLrConfig(base_config.Config): | |
| """Configuration for constant learning rate. | |
| This class is a containers for the constant learning rate decay configs. | |
| Attributes: | |
| name: The name of the learning rate schedule. Defaults to Constant. | |
| learning_rate: A float. The learning rate. Defaults to 0.1. | |
| """ | |
| name: str = 'Constant' | |
| learning_rate: float = 0.1 | |
| class StepwiseLrConfig(base_config.Config): | |
| """Configuration for stepwise learning rate decay. | |
| This class is a container for the piecewise constant learning rate scheduling | |
| configs. It will configure an instance of PiecewiseConstantDecay keras | |
| learning rate schedule. | |
| An example (from keras docs): use a learning rate that's 1.0 for the first | |
| 100001 steps, 0.5 for the next 10000 steps, and 0.1 for any additional steps. | |
| ```python | |
| boundaries: [100000, 110000] | |
| values: [1.0, 0.5, 0.1] | |
| Attributes: | |
| name: The name of the learning rate schedule. Defaults to PiecewiseConstant. | |
| boundaries: A list of ints of strictly increasing entries. Defaults to None. | |
| values: A list of floats that specifies the values for the intervals defined | |
| by `boundaries`. It should have one more element than `boundaries`. | |
| The learning rate is computed as follows: [0, boundaries[0]] -> | |
| values[0] [boundaries[0], boundaries[1]] -> values[1] | |
| [boundaries[n-1], boundaries[n]] -> values[n] [boundaries[n], | |
| end] -> values[n+1] Defaults to None. | |
| offset: An int. The offset applied to steps. Defaults to 0. | |
| """ | |
| name: str = 'PiecewiseConstantDecay' | |
| boundaries: Optional[List[int]] = None | |
| values: Optional[List[float]] = None | |
| offset: int = 0 | |
| class ExponentialLrConfig(base_config.Config): | |
| """Configuration for exponential learning rate decay. | |
| This class is a containers for the exponential learning rate decay configs. | |
| Attributes: | |
| name: The name of the learning rate schedule. Defaults to ExponentialDecay. | |
| initial_learning_rate: A float. The initial learning rate. Defaults to None. | |
| decay_steps: A positive integer that is used for decay computation. Defaults | |
| to None. | |
| decay_rate: A float. Defaults to None. | |
| staircase: A boolean, if true, learning rate is decreased at discreate | |
| intervals. Defaults to False. | |
| offset: An int. The offset applied to steps. Defaults to 0. | |
| """ | |
| name: str = 'ExponentialDecay' | |
| initial_learning_rate: Optional[float] = None | |
| decay_steps: Optional[int] = None | |
| decay_rate: Optional[float] = None | |
| staircase: Optional[bool] = None | |
| offset: int = 0 | |
| class PolynomialLrConfig(base_config.Config): | |
| """Configuration for polynomial learning rate decay. | |
| This class is a containers for the polynomial learning rate decay configs. | |
| Attributes: | |
| name: The name of the learning rate schedule. Defaults to PolynomialDecay. | |
| initial_learning_rate: A float. The initial learning rate. Defaults to None. | |
| decay_steps: A positive integer that is used for decay computation. Defaults | |
| to None. | |
| end_learning_rate: A float. The minimal end learning rate. | |
| power: A float. The power of the polynomial. Defaults to linear, 1.0. | |
| cycle: A boolean, whether or not it should cycle beyond decay_steps. | |
| Defaults to False. | |
| offset: An int. The offset applied to steps. Defaults to 0. | |
| """ | |
| name: str = 'PolynomialDecay' | |
| initial_learning_rate: Optional[float] = None | |
| decay_steps: Optional[int] = None | |
| end_learning_rate: float = 0.0001 | |
| power: float = 1.0 | |
| cycle: bool = False | |
| offset: int = 0 | |
| class CosineLrConfig(base_config.Config): | |
| """Configuration for Cosine learning rate decay. | |
| This class is a containers for the cosine learning rate decay configs, | |
| tf_keras.experimental.CosineDecay. | |
| Attributes: | |
| name: The name of the learning rate schedule. Defaults to CosineDecay. | |
| initial_learning_rate: A float. The initial learning rate. Defaults to None. | |
| decay_steps: A positive integer that is used for decay computation. Defaults | |
| to None. | |
| alpha: A float. Minimum learning rate value as a fraction of | |
| initial_learning_rate. | |
| offset: An int. The offset applied to steps. Defaults to 0. | |
| """ | |
| name: str = 'CosineDecay' | |
| initial_learning_rate: Optional[float] = None | |
| decay_steps: Optional[int] = None | |
| alpha: float = 0.0 | |
| offset: int = 0 | |
| class DirectPowerLrConfig(base_config.Config): | |
| """Configuration for DirectPower learning rate decay. | |
| This class configures a schedule following follows lr * (step)^power. | |
| Attributes: | |
| name: The name of the learning rate schedule. Defaults to DirectPowerDecay. | |
| initial_learning_rate: A float. The initial learning rate. Defaults to None. | |
| power: A float. Defaults to -0.5, for sqrt decay. | |
| """ | |
| name: str = 'DirectPowerDecay' | |
| initial_learning_rate: Optional[float] = None | |
| power: float = -0.5 | |
| class PowerAndLinearDecayLrConfig(base_config.Config): | |
| """Configuration for DirectPower learning rate decay. | |
| The schedule has the following behavoir. | |
| Let offset_step = step - offset. | |
| 1) offset_step < 0, the actual learning rate equals initial_learning_rate. | |
| 2) offset_step <= total_decay_steps * (1 - linear_decay_fraction), the | |
| actual learning rate equals lr * offset_step^power. | |
| 3) total_decay_steps * (1 - linear_decay_fraction) <= offset_step < | |
| total_decay_steps, the actual learning rate equals lr * offset_step^power * | |
| (total_decay_steps - offset_step) / (total_decay_steps * | |
| linear_decay_fraction). | |
| 4) offset_step >= total_decay_steps, the actual learning rate equals zero. | |
| Attributes: | |
| name: The name of the learning rate schedule. Defaults to | |
| PowerAndLinearDecay. | |
| initial_learning_rate: A float. The initial learning rate. Defaults to None. | |
| total_decay_steps: An int. The total number of steps for power + linear | |
| decay. Defaults to None. | |
| power: A float. The order of the polynomial. Defaults to -0.5, for sqrt | |
| decay. | |
| linear_decay_fraction: A float. In the last `linear_decay_fraction` steps, | |
| the learning rate will be multiplied by a linear decay. Defaults to 0.1. | |
| offset: An int. The offset applied to steps. Defaults to 0. | |
| """ | |
| name: str = 'PowerAndLinearDecay' | |
| initial_learning_rate: Optional[float] = None | |
| total_decay_steps: Optional[int] = None | |
| power: float = -0.5 | |
| linear_decay_fraction: float = 0.1 | |
| offset: int = 0 | |
| class PowerDecayWithOffsetLrConfig(base_config.Config): | |
| """Configuration for power learning rate decay with step offset. | |
| Learning rate equals to `pre_offset_learning_rate` if `step` < `offset`. | |
| Otherwise, learning rate equals to lr * (step - offset)^power. | |
| Attributes: | |
| name: The name of the learning rate schedule. Defaults to | |
| PowerDecayWithOffset. | |
| initial_learning_rate: A float. The initial learning rate. Defaults to None. | |
| power: A float. Defaults to -0.5, for sqrt decay. | |
| offset: An integer. Power decay happens after `offset` steps. | |
| pre_offset_learning_rate: A float. The constant learning rate before | |
| `offset` steps. | |
| """ | |
| name: str = 'PowerDecayWithOffset' | |
| initial_learning_rate: Optional[float] = None | |
| power: float = -0.5 | |
| offset: int = 0 | |
| pre_offset_learning_rate: float = 1.0e6 | |
| class StepCosineLrConfig(base_config.Config): | |
| """Configuration for stepwise learning rate decay. | |
| This class is a container for the piecewise cosine learning rate scheduling | |
| configs. It will configure an instance of StepCosineDecayWithOffset keras | |
| learning rate schedule. | |
| ```python | |
| boundaries: [100000, 110000] | |
| values: [1.0, 0.5] | |
| lr_decayed_fn = ( | |
| lr_schedule.StepCosineDecayWithOffset( | |
| boundaries, | |
| values)) | |
| ``` | |
| from 0 to 100000 step, it will cosine decay from 1.0 to 0.5 | |
| from 100000 to 110000 step, it cosine decay from 0.5 to 0.0 | |
| Attributes: | |
| name: The name of the learning rate schedule. Defaults to PiecewiseConstant. | |
| boundaries: A list of ints of strictly increasing entries. Defaults to None. | |
| values: A list of floats that specifies the values for the intervals defined | |
| by `boundaries`. It should have one more element than `boundaries`. | |
| The learning rate is computed as follows: | |
| [0, boundaries[0]] -> cosine from values[0] to values[1] | |
| [boundaries[0], boundaries[1]] -> values[1] to values[2] | |
| ... | |
| [boundaries[n-1], boundaries[n]] -> values[n] to values[n+1] | |
| [boundaries[n], end] -> values[n+1] to 0. | |
| offset: An int. The offset applied to steps. Defaults to 0. | |
| """ | |
| name: str = 'StepCosineDecayWithOffset' | |
| boundaries: Optional[List[int]] = None | |
| values: Optional[List[float]] = None | |
| offset: int = 0 | |
| class LinearWarmupConfig(base_config.Config): | |
| """Configuration for linear warmup schedule config. | |
| This class is a container for the linear warmup schedule configs. | |
| Warmup_learning_rate is the initial learning rate, the final learning rate of | |
| the warmup period is the learning_rate of the optimizer in use. The learning | |
| rate at each step linearly increased according to the following formula: | |
| warmup_learning_rate = warmup_learning_rate + | |
| step / warmup_steps * (final_learning_rate - warmup_learning_rate). | |
| Using warmup overrides the learning rate schedule by the number of warmup | |
| steps. | |
| Attributes: | |
| name: The name of warmup schedule. Defaults to linear. | |
| warmup_learning_rate: Initial learning rate for the warmup. Defaults to 0. | |
| warmup_steps: Warmup steps. Defaults to None. | |
| """ | |
| name: str = 'linear' | |
| warmup_learning_rate: float = 0 | |
| warmup_steps: Optional[int] = None | |
| class PolynomialWarmupConfig(base_config.Config): | |
| """Configuration for linear warmup schedule config. | |
| This class is a container for the polynomial warmup schedule configs. | |
| Attributes: | |
| name: The name of warmup schedule. Defaults to Polynomial. | |
| power: Polynomial power. Defaults to 1. | |
| warmup_steps: Warmup steps. Defaults to None. | |
| """ | |
| name: str = 'polynomial' | |
| power: float = 1 | |
| warmup_steps: Optional[int] = None | |