|
|
| import logging
|
| import math
|
| from bisect import bisect_right
|
| from typing import List
|
| import torch
|
| from fvcore.common.param_scheduler import (
|
| CompositeParamScheduler,
|
| ConstantParamScheduler,
|
| LinearParamScheduler,
|
| ParamScheduler,
|
| )
|
|
|
| try:
|
| from torch.optim.lr_scheduler import LRScheduler
|
| except ImportError:
|
| from torch.optim.lr_scheduler import _LRScheduler as LRScheduler
|
|
|
| logger = logging.getLogger(__name__)
|
|
|
|
|
| class WarmupParamScheduler(CompositeParamScheduler):
|
| """
|
| Add an initial warmup stage to another scheduler.
|
| """
|
|
|
| def __init__(
|
| self,
|
| scheduler: ParamScheduler,
|
| warmup_factor: float,
|
| warmup_length: float,
|
| warmup_method: str = "linear",
|
| rescale_interval: bool = False,
|
| ):
|
| """
|
| Args:
|
| scheduler: warmup will be added at the beginning of this scheduler
|
| warmup_factor: the factor w.r.t the initial value of ``scheduler``, e.g. 0.001
|
| warmup_length: the relative length (in [0, 1]) of warmup steps w.r.t the entire
|
| training, e.g. 0.01
|
| warmup_method: one of "linear" or "constant"
|
| rescale_interval: whether we will rescale the interval of the scheduler after
|
| warmup
|
| """
|
| end_value = scheduler(warmup_length)
|
| start_value = warmup_factor * scheduler(0.0)
|
| if warmup_method == "constant":
|
| warmup = ConstantParamScheduler(start_value)
|
| elif warmup_method == "linear":
|
| warmup = LinearParamScheduler(start_value, end_value)
|
| else:
|
| raise ValueError("Unknown warmup method: {}".format(warmup_method))
|
| super().__init__(
|
| [warmup, scheduler],
|
| interval_scaling=["rescaled", "rescaled" if rescale_interval else "fixed"],
|
| lengths=[warmup_length, 1 - warmup_length],
|
| )
|
|
|
|
|
| class LRMultiplier(LRScheduler):
|
| """
|
| A LRScheduler which uses fvcore :class:`ParamScheduler` to multiply the
|
| learning rate of each param in the optimizer.
|
| Every step, the learning rate of each parameter becomes its initial value
|
| multiplied by the output of the given :class:`ParamScheduler`.
|
|
|
| The absolute learning rate value of each parameter can be different.
|
| This scheduler can be used as long as the relative scale among them do
|
| not change during training.
|
|
|
| Examples:
|
| ::
|
| LRMultiplier(
|
| opt,
|
| WarmupParamScheduler(
|
| MultiStepParamScheduler(
|
| [1, 0.1, 0.01],
|
| milestones=[60000, 80000],
|
| num_updates=90000,
|
| ), 0.001, 100 / 90000
|
| ),
|
| max_iter=90000
|
| )
|
| """
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| def __init__(
|
| self,
|
| optimizer: torch.optim.Optimizer,
|
| multiplier: ParamScheduler,
|
| max_iter: int,
|
| last_iter: int = -1,
|
| ):
|
| """
|
| Args:
|
| optimizer, last_iter: See ``torch.optim.lr_scheduler.LRScheduler``.
|
| ``last_iter`` is the same as ``last_epoch``.
|
| multiplier: a fvcore ParamScheduler that defines the multiplier on
|
| every LR of the optimizer
|
| max_iter: the total number of training iterations
|
| """
|
| if not isinstance(multiplier, ParamScheduler):
|
| raise ValueError(
|
| "_LRMultiplier(multiplier=) must be an instance of fvcore "
|
| f"ParamScheduler. Got {multiplier} instead."
|
| )
|
| self._multiplier = multiplier
|
| self._max_iter = max_iter
|
| super().__init__(optimizer, last_epoch=last_iter)
|
|
|
| def state_dict(self):
|
|
|
| return {"base_lrs": self.base_lrs, "last_epoch": self.last_epoch}
|
|
|
| def get_lr(self) -> List[float]:
|
| multiplier = self._multiplier(self.last_epoch / self._max_iter)
|
| return [base_lr * multiplier for base_lr in self.base_lrs]
|
|
|
|
|
| """
|
| Content below is no longer needed!
|
| """
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| class WarmupMultiStepLR(LRScheduler):
|
| def __init__(
|
| self,
|
| optimizer: torch.optim.Optimizer,
|
| milestones: List[int],
|
| gamma: float = 0.1,
|
| warmup_factor: float = 0.001,
|
| warmup_iters: int = 1000,
|
| warmup_method: str = "linear",
|
| last_epoch: int = -1,
|
| ):
|
| logger.warning(
|
| "WarmupMultiStepLR is deprecated! Use LRMultipilier with fvcore ParamScheduler instead!"
|
| )
|
| if not list(milestones) == sorted(milestones):
|
| raise ValueError(
|
| "Milestones should be a list of" " increasing integers. Got {}", milestones
|
| )
|
| self.milestones = milestones
|
| self.gamma = gamma
|
| self.warmup_factor = warmup_factor
|
| self.warmup_iters = warmup_iters
|
| self.warmup_method = warmup_method
|
| super().__init__(optimizer, last_epoch)
|
|
|
| def get_lr(self) -> List[float]:
|
| warmup_factor = _get_warmup_factor_at_iter(
|
| self.warmup_method, self.last_epoch, self.warmup_iters, self.warmup_factor
|
| )
|
| return [
|
| base_lr * warmup_factor * self.gamma ** bisect_right(self.milestones, self.last_epoch)
|
| for base_lr in self.base_lrs
|
| ]
|
|
|
| def _compute_values(self) -> List[float]:
|
|
|
| return self.get_lr()
|
|
|
|
|
| class WarmupCosineLR(LRScheduler):
|
| def __init__(
|
| self,
|
| optimizer: torch.optim.Optimizer,
|
| max_iters: int,
|
| warmup_factor: float = 0.001,
|
| warmup_iters: int = 1000,
|
| warmup_method: str = "linear",
|
| last_epoch: int = -1,
|
| ):
|
| logger.warning(
|
| "WarmupCosineLR is deprecated! Use LRMultipilier with fvcore ParamScheduler instead!"
|
| )
|
| self.max_iters = max_iters
|
| self.warmup_factor = warmup_factor
|
| self.warmup_iters = warmup_iters
|
| self.warmup_method = warmup_method
|
| super().__init__(optimizer, last_epoch)
|
|
|
| def get_lr(self) -> List[float]:
|
| warmup_factor = _get_warmup_factor_at_iter(
|
| self.warmup_method, self.last_epoch, self.warmup_iters, self.warmup_factor
|
| )
|
|
|
|
|
|
|
|
|
|
|
| return [
|
| base_lr
|
| * warmup_factor
|
| * 0.5
|
| * (1.0 + math.cos(math.pi * self.last_epoch / self.max_iters))
|
| for base_lr in self.base_lrs
|
| ]
|
|
|
| def _compute_values(self) -> List[float]:
|
|
|
| return self.get_lr()
|
|
|
|
|
| def _get_warmup_factor_at_iter(
|
| method: str, iter: int, warmup_iters: int, warmup_factor: float
|
| ) -> float:
|
| """
|
| Return the learning rate warmup factor at a specific iteration.
|
| See :paper:`ImageNet in 1h` for more details.
|
|
|
| Args:
|
| method (str): warmup method; either "constant" or "linear".
|
| iter (int): iteration at which to calculate the warmup factor.
|
| warmup_iters (int): the number of warmup iterations.
|
| warmup_factor (float): the base warmup factor (the meaning changes according
|
| to the method used).
|
|
|
| Returns:
|
| float: the effective warmup factor at the given iteration.
|
| """
|
| if iter >= warmup_iters:
|
| return 1.0
|
|
|
| if method == "constant":
|
| return warmup_factor
|
| elif method == "linear":
|
| alpha = iter / warmup_iters
|
| return warmup_factor * (1 - alpha) + alpha
|
| else:
|
| raise ValueError("Unknown warmup method: {}".format(method))
|
|
|