'in1'

56dd783 about 2 years ago

9.15 kB

	# Copyright (c) Facebook, Inc. and its affiliates.
	import logging
	import math
	from bisect import bisect_right
	from typing import List
	import torch
	from fvcore.common.param_scheduler import (
	CompositeParamScheduler,
	ConstantParamScheduler,
	LinearParamScheduler,
	ParamScheduler,
	)

	try:
	from torch.optim.lr_scheduler import LRScheduler
	except ImportError:
	from torch.optim.lr_scheduler import _LRScheduler as LRScheduler

	logger = logging.getLogger(__name__)


	class WarmupParamScheduler(CompositeParamScheduler):
	"""
	Add an initial warmup stage to another scheduler.
	"""

	def __init__(
	self,
	scheduler: ParamScheduler,
	warmup_factor: float,
	warmup_length: float,
	warmup_method: str = "linear",
	rescale_interval: bool = False,
	):
	"""
	Args:
	scheduler: warmup will be added at the beginning of this scheduler
	warmup_factor: the factor w.r.t the initial value of ``scheduler``, e.g. 0.001
	warmup_length: the relative length (in [0, 1]) of warmup steps w.r.t the entire
	training, e.g. 0.01
	warmup_method: one of "linear" or "constant"
	rescale_interval: whether we will rescale the interval of the scheduler after
	warmup
	"""
	end_value = scheduler(warmup_length) # the value to reach when warmup ends
	start_value = warmup_factor * scheduler(0.0)
	if warmup_method == "constant":
	warmup = ConstantParamScheduler(start_value)
	elif warmup_method == "linear":
	warmup = LinearParamScheduler(start_value, end_value)
	else:
	raise ValueError("Unknown warmup method: {}".format(warmup_method))
	super().__init__(
	[warmup, scheduler],
	interval_scaling=["rescaled", "rescaled" if rescale_interval else "fixed"],
	lengths=[warmup_length, 1 - warmup_length],
	)


	class LRMultiplier(LRScheduler):
	"""
	A LRScheduler which uses fvcore :class:`ParamScheduler` to multiply the
	learning rate of each param in the optimizer.
	Every step, the learning rate of each parameter becomes its initial value
	multiplied by the output of the given :class:`ParamScheduler`.

	The absolute learning rate value of each parameter can be different.
	This scheduler can be used as long as the relative scale among them do
	not change during training.

	Examples:
	::
	LRMultiplier(
	opt,
	WarmupParamScheduler(
	MultiStepParamScheduler(
	[1, 0.1, 0.01],
	milestones=[60000, 80000],
	num_updates=90000,
	), 0.001, 100 / 90000
	),
	max_iter=90000
	)
	"""

	# NOTES: in the most general case, every LR can use its own scheduler.
	# Supporting this requires interaction with the optimizer when its parameter
	# group is initialized. For example, classyvision implements its own optimizer
	# that allows different schedulers for every parameter group.
	# To avoid this complexity, we use this class to support the most common cases
	# where the relative scale among all LRs stay unchanged during training. In this
	# case we only need a total of one scheduler that defines the relative LR multiplier.

	def __init__(
	self,
	optimizer: torch.optim.Optimizer,
	multiplier: ParamScheduler,
	max_iter: int,
	last_iter: int = -1,
	):
	"""
	Args:
	optimizer, last_iter: See ``torch.optim.lr_scheduler.LRScheduler``.
	``last_iter`` is the same as ``last_epoch``.
	multiplier: a fvcore ParamScheduler that defines the multiplier on
	every LR of the optimizer
	max_iter: the total number of training iterations
	"""
	if not isinstance(multiplier, ParamScheduler):
	raise ValueError(
	"_LRMultiplier(multiplier=) must be an instance of fvcore "
	f"ParamScheduler. Got {multiplier} instead."
	)
	self._multiplier = multiplier
	self._max_iter = max_iter
	super().__init__(optimizer, last_epoch=last_iter)

	def state_dict(self):
	# fvcore schedulers are stateless. Only keep pytorch scheduler states
	return {"base_lrs": self.base_lrs, "last_epoch": self.last_epoch}

	def get_lr(self) -> List[float]:
	multiplier = self._multiplier(self.last_epoch / self._max_iter)
	return [base_lr * multiplier for base_lr in self.base_lrs]


	"""
	Content below is no longer needed!
	"""

	# NOTE: PyTorch's LR scheduler interface uses names that assume the LR changes
	# only on epoch boundaries. We typically use iteration based schedules instead.
	# As a result, "epoch" (e.g., as in self.last_epoch) should be understood to mean
	# "iteration" instead.

	# FIXME: ideally this would be achieved with a CombinedLRScheduler, separating
	# MultiStepLR with WarmupLR but the current LRScheduler design doesn't allow it.


	class WarmupMultiStepLR(LRScheduler):
	def __init__(
	self,
	optimizer: torch.optim.Optimizer,
	milestones: List[int],
	gamma: float = 0.1,
	warmup_factor: float = 0.001,
	warmup_iters: int = 1000,
	warmup_method: str = "linear",
	last_epoch: int = -1,
	):
	logger.warning(
	"WarmupMultiStepLR is deprecated! Use LRMultipilier with fvcore ParamScheduler instead!"
	)
	if not list(milestones) == sorted(milestones):
	raise ValueError(
	"Milestones should be a list of" " increasing integers. Got {}", milestones
	)
	self.milestones = milestones
	self.gamma = gamma
	self.warmup_factor = warmup_factor
	self.warmup_iters = warmup_iters
	self.warmup_method = warmup_method
	super().__init__(optimizer, last_epoch)

	def get_lr(self) -> List[float]:
	warmup_factor = _get_warmup_factor_at_iter(
	self.warmup_method, self.last_epoch, self.warmup_iters, self.warmup_factor
	)
	return [
	base_lr * warmup_factor * self.gamma ** bisect_right(self.milestones, self.last_epoch)
	for base_lr in self.base_lrs
	]

	def _compute_values(self) -> List[float]:
	# The new interface
	return self.get_lr()


	class WarmupCosineLR(LRScheduler):
	def __init__(
	self,
	optimizer: torch.optim.Optimizer,
	max_iters: int,
	warmup_factor: float = 0.001,
	warmup_iters: int = 1000,
	warmup_method: str = "linear",
	last_epoch: int = -1,
	):
	logger.warning(
	"WarmupCosineLR is deprecated! Use LRMultipilier with fvcore ParamScheduler instead!"
	)
	self.max_iters = max_iters
	self.warmup_factor = warmup_factor
	self.warmup_iters = warmup_iters
	self.warmup_method = warmup_method
	super().__init__(optimizer, last_epoch)

	def get_lr(self) -> List[float]:
	warmup_factor = _get_warmup_factor_at_iter(
	self.warmup_method, self.last_epoch, self.warmup_iters, self.warmup_factor
	)
	# Different definitions of half-cosine with warmup are possible. For
	# simplicity we multiply the standard half-cosine schedule by the warmup
	# factor. An alternative is to start the period of the cosine at warmup_iters
	# instead of at 0. In the case that warmup_iters << max_iters the two are
	# very close to each other.
	return [
	base_lr
	* warmup_factor
	* 0.5
	* (1.0 + math.cos(math.pi * self.last_epoch / self.max_iters))
	for base_lr in self.base_lrs
	]

	def _compute_values(self) -> List[float]:
	# The new interface
	return self.get_lr()


	def _get_warmup_factor_at_iter(
	method: str, iter: int, warmup_iters: int, warmup_factor: float
	) -> float:
	"""
	Return the learning rate warmup factor at a specific iteration.
	See :paper:`ImageNet in 1h` for more details.

	Args:
	method (str): warmup method; either "constant" or "linear".
	iter (int): iteration at which to calculate the warmup factor.
	warmup_iters (int): the number of warmup iterations.
	warmup_factor (float): the base warmup factor (the meaning changes according
	to the method used).

	Returns:
	float: the effective warmup factor at the given iteration.
	"""
	if iter >= warmup_iters:
	return 1.0

	if method == "constant":
	return warmup_factor
	elif method == "linear":
	alpha = iter / warmup_iters
	return warmup_factor * (1 - alpha) + alpha
	else:
	raise ValueError("Unknown warmup method: {}".format(method))