English
Shanci's picture
Upload folder using huggingface_hub
26225c5 verified
from torch.optim.lr_scheduler import _LRScheduler, StepLR, MultiStepLR, \
ExponentialLR, CosineAnnealingLR, ReduceLROnPlateau
import math
import warnings
__all__ = [
'CosinePowerAnnealingLR', 'StepLRWithWarmup', 'MultiStepLRWithWarmup',
'ExponentialLRWithWarmup', 'CosineAnnealingLRWithWarmup',
'CosinePowerAnnealingLRWithWarmup', 'ReduceLROnPlateauWithWarmup']
class _WarmupLR(_LRScheduler):
"""Wrapper adding a warmup phase to a Pytorch Scheduler.
This class is not intended to be directly instantiated. One should
instead create child classes with the desired `_SCHEDULER_CLASS`.
Credit: https://github.com/lehduong/torch-warmup-lr
:param init_lr: float
Learning rate value to start the warmup from. All your
optimizer's parameter groups will be warmed up from
`init_lr` to their initial value as set in the optimizer
:param num_warmup: int
Number of scheduler steps (i.e. epochs, most of the time)
dedicated to warming up
:param warmup_strategy: str
Warmup strategy, among ['linear', 'cos', 'constant']
"""
_SCHEDULER_CLASS = None
def __init__(
self, *args, warmup_init_lr=1e-6, num_warmup=1,
warmup_strategy='cos', **kwargs):
assert warmup_strategy in ['linear', 'cos', 'constant'], \
f"Expect warmup_strategy to be one of ['linear', 'cos', " \
f"'constant'] but got {warmup_strategy}"
self._scheduler = self._SCHEDULER_CLASS(*args, **kwargs)
self._init_lr = warmup_init_lr
self._num_warmup = num_warmup
self._step_count = 0
# Define the strategy to warm up learning rate
self._warmup_strategy = warmup_strategy
if warmup_strategy == 'cos':
self._warmup_func = self._warmup_cos
elif warmup_strategy == 'linear':
self._warmup_func = self._warmup_linear
else:
self._warmup_func = self._warmup_const
# Dave initial learning rate of each param group. only useful
# when each param groups having different learning rate
self._format_param()
# A first step is needed to initialize the LR
self.step()
def __getattr__(self, name):
if name == '_scheduler':
if name in self.__dict__.keys():
return self._scheduler
else:
return
return getattr(self._scheduler, name)
def state_dict(self):
"""Returns the state of the scheduler as a :class:`dict`.
It contains an entry for every variable in self.__dict__ which
is not the optimizer.
"""
wrapper_state_dict = {
key: value
for key, value in self.__dict__.items()
if (key != 'optimizer' and key != '_scheduler')}
wrapped_state_dict = {
key: value
for key, value in self._scheduler.__dict__.items()
if key != 'optimizer'}
return {'wrapped': wrapped_state_dict, 'wrapper': wrapper_state_dict}
def load_state_dict(self, state_dict):
"""Loads the schedulers state.
:param state_dict: dict
Scheduler state. Should be an object returned from a call
to :meth:`state_dict`.
"""
self.__dict__.update(state_dict['wrapper'])
self._scheduler.__dict__.update(state_dict['wrapped'])
def _format_param(self):
"""Set the first and last learning rates for the warmup phase,
for each parameter group. All parameter groups will start the
warmup at the same value `self._init_lr`.
"""
for group in self._scheduler.optimizer.param_groups:
group['warmup_max_lr'] = group['lr']
group['warmup_initial_lr'] = min(self._init_lr, group['lr'])
def _warmup_cos(self, start, end, pct):
"""Cosine warmup scheme.
"""
cos_out = math.cos(math.pi * pct) + 1
return end + (start - end) / 2.0 * cos_out
def _warmup_const(self, start, end, pct):
"""Constant warmup scheme.
"""
return start if pct < 0.9999 else end
def _warmup_linear(self, start, end, pct):
"""Linear warmup scheme.
"""
return (end - start) * pct + start
def get_lr(self):
lrs = []
step_num = self._step_count
# warm up learning rate
if step_num <= self._num_warmup:
for group in self._scheduler.optimizer.param_groups:
computed_lr = self._warmup_func(
group['warmup_initial_lr'], group['warmup_max_lr'],
step_num / self._num_warmup)
lrs.append(computed_lr)
else:
lrs = self._scheduler.get_lr()
return lrs
def step(self, *args, **kwargs):
if self._step_count <= self._num_warmup:
values = self.get_lr()
for param_group, lr in zip(
self._scheduler.optimizer.param_groups, values):
param_group['lr'] = lr
self._step_count += 1
else:
self._scheduler.step(*args, **kwargs)
class CosinePowerAnnealingLR(CosineAnnealingLR):
"""Same as CosineAnnealingLR, but with an additional `power`
parameter, to mitigate the annealing time spent on large learning
rates (i.e. `power < 1`) or small learning rates (i.e. `power > 1`).
"""
def __init__(
self, optimizer, T_max, eta_min=0, power=2, last_epoch=-1,
verbose=False):
super().__init__(
optimizer, T_max, eta_min=eta_min, last_epoch=last_epoch,
verbose=verbose)
self.power = power
def get_lr(self):
if not self._get_lr_called_within_step:
warnings.warn(
"To get the last learning rate computed by the scheduler, "
"please use `get_last_lr()`.", UserWarning)
if self.last_epoch == 0:
return [group['lr'] for group in self.optimizer.param_groups]
elif self._step_count == 1 and self.last_epoch > 0:
return [
self.eta_min + (base_lr - self.eta_min) *
((1 + math.cos((self.last_epoch) * math.pi / self.T_max)) / 2) ** self.power
for base_lr, group in
zip(self.base_lrs, self.optimizer.param_groups)]
elif (self.last_epoch - 1 - self.T_max) % (2 * self.T_max) == 0:
return [
group['lr'] + (base_lr - self.eta_min) *
((1 - math.cos(math.pi / self.T_max)) / 2) ** self.power
for base_lr, group in
zip(self.base_lrs, self.optimizer.param_groups)]
return [
((1 + math.cos(math.pi * self.last_epoch / self.T_max)) /
(1 + math.cos(math.pi * (self.last_epoch - 1) / self.T_max))) ** self.power *
(group['lr'] - self.eta_min) + self.eta_min
for group in self.optimizer.param_groups]
def _get_closed_form_lr(self):
return [
self.eta_min + (base_lr - self.eta_min) *
((1 + math.cos(math.pi * self.last_epoch / self.T_max)) / 2) ** self.power
for base_lr in self.base_lrs]
class StepLRWithWarmup(_WarmupLR):
"""StepLRWithWarmup with warmup.
"""
_SCHEDULER_CLASS = StepLR
class MultiStepLRWithWarmup(_WarmupLR):
"""MultiStepLR with warmup.
"""
_SCHEDULER_CLASS = MultiStepLR
class ExponentialLRWithWarmup(_WarmupLR):
"""ExponentialLR with warmup.
"""
_SCHEDULER_CLASS = ExponentialLR
class CosineAnnealingLRWithWarmup(_WarmupLR):
"""CosineAnnealingLR with warmup.
"""
_SCHEDULER_CLASS = CosineAnnealingLR
class CosinePowerAnnealingLRWithWarmup(_WarmupLR):
"""CosinePowerAnnealingLR with warmup.
"""
_SCHEDULER_CLASS = CosinePowerAnnealingLR
class ReduceLROnPlateauWithWarmup(_WarmupLR):
"""ReduceLROnPlateau with warmup.
"""
_SCHEDULER_CLASS = ReduceLROnPlateau
ON_PLATEAU_SCHEDULERS = (ReduceLROnPlateau, ReduceLROnPlateauWithWarmup)