ce304fafe19161978ad512b385c65426bad519e5a0b8fb3f0659eace3d2ea3cc

f14e74e over 2 years ago

4.53 kB

	# Copyright © 2023 Apple Inc.

	import mlx.core as mx
	from mlx.nn.layers.base import Module


	class Dropout(Module):
	r"""Randomly zero a portion of the elements during training.

	The remaining elements are multiplied with :math:`\frac{1}{1-p}` where
	:math:`p` is the probability of zeroing an element. This is done so the
	expected value of a given element will remain the same.

	Args:
	p (float): The probability to zero an element
	"""

	def __init__(self, p: float = 0.5):
	super().__init__()

	if p < 0 or p >= 1:
	raise ValueError(f"The dropout probability {p} is not in [0, 1)")

	self._p_1 = 1 - p

	def _extra_repr(self):
	return f"p={1-self._p_1}"

	def __call__(self, x):
	if self._p_1 == 1 or not self.training:
	return x

	mask = mx.random.bernoulli(self._p_1, x.shape)

	return (1 / self._p_1) * mask * x


	class Dropout2d(Module):
	r"""Apply 2D channel-wise dropout during training.

	Randomly zero out entire channels independently with probability :math:`p`.
	This layer expects the channels to be last, i.e. the input shape should be
	``NWHC`` or ``WHC`` where:``N`` is the batch dimension,``H`` is the input
	image height,``W`` is the input image width, and``C`` is the number of
	input channels

	The remaining channels are scaled by :math:`\frac{1}{1-p}` to
	maintain the expected value of each element. Unlike traditional dropout,
	which zeros individual entries, this layer zeros entire channels. This is
	beneficial for early convolution layers where adjacent pixels are
	correlated. In such case, traditional dropout may not effectively
	regularize activations. For more details, see [1].

	[1]: Thompson, J., Goroshin, R., Jain, A., LeCun, Y. and Bregler C., 2015.
	Efficient Object Localization Using Convolutional Networks. CVPR 2015.

	Args:
	p (float): Probability of zeroing a channel during training.
	"""

	def __init__(self, p: float = 0.5):
	super().__init__()

	if p < 0 or p >= 1:
	raise ValueError(f"The dropout probability {p} is not in [0, 1)")

	self._p_1 = 1 - p

	def _extra_repr(self):
	return f"p={1-self._p_1}"

	def __call__(self, x):
	if x.ndim not in (3, 4):
	raise ValueError(
	f"Received input with {x.ndim} dimensions. Expected 3 or 4 dimensions."
	)

	if self._p_1 == 1 or not self.training:
	return x

	# Dropout is applied on the whole channel
	# 3D input: (1, 1, C)
	# 4D input: (B, 1, 1, C)
	mask_shape = x.shape
	mask_shape[-2] = mask_shape[-3] = 1

	mask = mx.random.bernoulli(p=self._p_1, shape=mask_shape)
	return (1 / self._p_1) * mask * x


	class Dropout3d(Module):
	r"""Apply 3D channel-wise dropout during training.

	Randomly zero out entire channels independently with probability :math:`p`.
	This layer expects the channels to be last, i.e., the input shape should be
	`NDHWC` or `DHWC` where: `N` is the batch dimension, `D` is the depth,
	`H` is the input image height, `W` is the input image width, and `C` is
	the number of input channels.

	The remaining channels are scaled by :math:`\frac{1}{1-p}` to
	maintain the expected value of each element. Unlike traditional dropout,
	which zeros individual entries, this layer zeros entire channels. This is
	often beneficial for convolutional layers processing 3D data, like in
	medical imaging or video processing.

	Args:
	p (float): Probability of zeroing a channel during training.
	"""

	def __init__(self, p: float = 0.5):
	super().__init__()

	if p < 0 or p >= 1:
	raise ValueError(f"The dropout probability {p} is not in [0, 1)")

	self._p_1 = 1 - p

	def _extra_repr(self):
	return f"p={1-self._p_1}"

	def __call__(self, x):
	if x.ndim not in (4, 5):
	raise ValueError(
	f"Received input with {x.ndim} dimensions. Expected 4 or 5 dimensions."
	)

	if self._p_1 == 1 or not self.training:
	return x

	# Dropout is applied on the whole channel
	# 4D input: (1, 1, 1, C)
	# 5D input: (B, 1, 1, 1, C)
	mask_shape = list(x.shape)
	mask_shape[-2] = mask_shape[-3] = mask_shape[-4] = 1

	mask = mx.random.bernoulli(p=self._p_1, shape=mask_shape)
	return (1 / self._p_1) * mask * x