| | |
| | import logging |
| | import math |
| | from bisect import bisect_right |
| | from typing import List |
| | import torch |
| | from fvcore.common.param_scheduler import ( |
| | CompositeParamScheduler, |
| | ConstantParamScheduler, |
| | LinearParamScheduler, |
| | ParamScheduler, |
| | ) |
| |
|
| | try: |
| | from torch.optim.lr_scheduler import LRScheduler |
| | except ImportError: |
| | from torch.optim.lr_scheduler import _LRScheduler as LRScheduler |
| |
|
| | logger = logging.getLogger(__name__) |
| |
|
| |
|
| | class WarmupParamScheduler(CompositeParamScheduler): |
| | """ |
| | Add an initial warmup stage to another scheduler. |
| | """ |
| |
|
| | def __init__( |
| | self, |
| | scheduler: ParamScheduler, |
| | warmup_factor: float, |
| | warmup_length: float, |
| | warmup_method: str = "linear", |
| | rescale_interval: bool = False, |
| | ): |
| | """ |
| | Args: |
| | scheduler: warmup will be added at the beginning of this scheduler |
| | warmup_factor: the factor w.r.t the initial value of ``scheduler``, e.g. 0.001 |
| | warmup_length: the relative length (in [0, 1]) of warmup steps w.r.t the entire |
| | training, e.g. 0.01 |
| | warmup_method: one of "linear" or "constant" |
| | rescale_interval: whether we will rescale the interval of the scheduler after |
| | warmup |
| | """ |
| | |
| | end_value = scheduler(0.0) if rescale_interval else scheduler(warmup_length) |
| | start_value = warmup_factor * scheduler(0.0) |
| | if warmup_method == "constant": |
| | warmup = ConstantParamScheduler(start_value) |
| | elif warmup_method == "linear": |
| | warmup = LinearParamScheduler(start_value, end_value) |
| | else: |
| | raise ValueError("Unknown warmup method: {}".format(warmup_method)) |
| | super().__init__( |
| | [warmup, scheduler], |
| | interval_scaling=["rescaled", "rescaled" if rescale_interval else "fixed"], |
| | lengths=[warmup_length, 1 - warmup_length], |
| | ) |
| |
|
| |
|
| | class LRMultiplier(LRScheduler): |
| | """ |
| | A LRScheduler which uses fvcore :class:`ParamScheduler` to multiply the |
| | learning rate of each param in the optimizer. |
| | Every step, the learning rate of each parameter becomes its initial value |
| | multiplied by the output of the given :class:`ParamScheduler`. |
| | |
| | The absolute learning rate value of each parameter can be different. |
| | This scheduler can be used as long as the relative scale among them do |
| | not change during training. |
| | |
| | Examples: |
| | :: |
| | LRMultiplier( |
| | opt, |
| | WarmupParamScheduler( |
| | MultiStepParamScheduler( |
| | [1, 0.1, 0.01], |
| | milestones=[60000, 80000], |
| | num_updates=90000, |
| | ), 0.001, 100 / 90000 |
| | ), |
| | max_iter=90000 |
| | ) |
| | """ |
| |
|
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| |
|
| | def __init__( |
| | self, |
| | optimizer: torch.optim.Optimizer, |
| | multiplier: ParamScheduler, |
| | max_iter: int, |
| | last_iter: int = -1, |
| | ): |
| | """ |
| | Args: |
| | optimizer, last_iter: See ``torch.optim.lr_scheduler.LRScheduler``. |
| | ``last_iter`` is the same as ``last_epoch``. |
| | multiplier: a fvcore ParamScheduler that defines the multiplier on |
| | every LR of the optimizer |
| | max_iter: the total number of training iterations |
| | """ |
| | if not isinstance(multiplier, ParamScheduler): |
| | raise ValueError( |
| | "_LRMultiplier(multiplier=) must be an instance of fvcore " |
| | f"ParamScheduler. Got {multiplier} instead." |
| | ) |
| | self._multiplier = multiplier |
| | self._max_iter = max_iter |
| | super().__init__(optimizer, last_epoch=last_iter) |
| |
|
| | def state_dict(self): |
| | |
| | return {"base_lrs": self.base_lrs, "last_epoch": self.last_epoch} |
| |
|
| | def get_lr(self) -> List[float]: |
| | multiplier = self._multiplier(self.last_epoch / self._max_iter) |
| | return [base_lr * multiplier for base_lr in self.base_lrs] |
| |
|
| |
|
| | """ |
| | Content below is no longer needed! |
| | """ |
| |
|
| | |
| | |
| | |
| | |
| |
|
| | |
| | |
| |
|
| |
|
| | class WarmupMultiStepLR(LRScheduler): |
| | def __init__( |
| | self, |
| | optimizer: torch.optim.Optimizer, |
| | milestones: List[int], |
| | gamma: float = 0.1, |
| | warmup_factor: float = 0.001, |
| | warmup_iters: int = 1000, |
| | warmup_method: str = "linear", |
| | last_epoch: int = -1, |
| | ): |
| | logger.warning( |
| | "WarmupMultiStepLR is deprecated! Use LRMultipilier with fvcore ParamScheduler instead!" |
| | ) |
| | if not list(milestones) == sorted(milestones): |
| | raise ValueError( |
| | "Milestones should be a list of" " increasing integers. Got {}", milestones |
| | ) |
| | self.milestones = milestones |
| | self.gamma = gamma |
| | self.warmup_factor = warmup_factor |
| | self.warmup_iters = warmup_iters |
| | self.warmup_method = warmup_method |
| | super().__init__(optimizer, last_epoch) |
| |
|
| | def get_lr(self) -> List[float]: |
| | warmup_factor = _get_warmup_factor_at_iter( |
| | self.warmup_method, self.last_epoch, self.warmup_iters, self.warmup_factor |
| | ) |
| | return [ |
| | base_lr * warmup_factor * self.gamma ** bisect_right(self.milestones, self.last_epoch) |
| | for base_lr in self.base_lrs |
| | ] |
| |
|
| | def _compute_values(self) -> List[float]: |
| | |
| | return self.get_lr() |
| |
|
| |
|
| | class WarmupCosineLR(LRScheduler): |
| | def __init__( |
| | self, |
| | optimizer: torch.optim.Optimizer, |
| | max_iters: int, |
| | warmup_factor: float = 0.001, |
| | warmup_iters: int = 1000, |
| | warmup_method: str = "linear", |
| | last_epoch: int = -1, |
| | ): |
| | logger.warning( |
| | "WarmupCosineLR is deprecated! Use LRMultipilier with fvcore ParamScheduler instead!" |
| | ) |
| | self.max_iters = max_iters |
| | self.warmup_factor = warmup_factor |
| | self.warmup_iters = warmup_iters |
| | self.warmup_method = warmup_method |
| | super().__init__(optimizer, last_epoch) |
| |
|
| | def get_lr(self) -> List[float]: |
| | warmup_factor = _get_warmup_factor_at_iter( |
| | self.warmup_method, self.last_epoch, self.warmup_iters, self.warmup_factor |
| | ) |
| | |
| | |
| | |
| | |
| | |
| | return [ |
| | base_lr |
| | * warmup_factor |
| | * 0.5 |
| | * (1.0 + math.cos(math.pi * self.last_epoch / self.max_iters)) |
| | for base_lr in self.base_lrs |
| | ] |
| |
|
| | def _compute_values(self) -> List[float]: |
| | |
| | return self.get_lr() |
| |
|
| |
|
| | def _get_warmup_factor_at_iter( |
| | method: str, iter: int, warmup_iters: int, warmup_factor: float |
| | ) -> float: |
| | """ |
| | Return the learning rate warmup factor at a specific iteration. |
| | See :paper:`ImageNet in 1h` for more details. |
| | |
| | Args: |
| | method (str): warmup method; either "constant" or "linear". |
| | iter (int): iteration at which to calculate the warmup factor. |
| | warmup_iters (int): the number of warmup iterations. |
| | warmup_factor (float): the base warmup factor (the meaning changes according |
| | to the method used). |
| | |
| | Returns: |
| | float: the effective warmup factor at the given iteration. |
| | """ |
| | if iter >= warmup_iters: |
| | return 1.0 |
| |
|
| | if method == "constant": |
| | return warmup_factor |
| | elif method == "linear": |
| | alpha = iter / warmup_iters |
| | return warmup_factor * (1 - alpha) + alpha |
| | else: |
| | raise ValueError("Unknown warmup method: {}".format(method)) |
| |
|