S-KEY

Running on Zero

App Files Files Community

S-KEY / skey /convnext.py

2cylu2

Upload 13 files

77ac75d verified about 2 months ago

raw

history blame contribute delete

4.04 kB

	# Code originally provided by Meta FAIR. https://github.com/facebookresearch/ConvNeXt
	import torch
	from torch import nn


	class DropPath(nn.Module):
	r"""Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks).

	Args:
	drop_prob (float): Probability of dropping a path. Default: 0.0.
	scale_by_keep (bool): Whether to scale the output by the keep probability. Default: True.
	"""

	def __init__(self, drop_prob: float = 0.0, scale_by_keep: bool = True):
	super(DropPath, self).__init__()
	self.drop_prob = drop_prob
	self.scale_by_keep = scale_by_keep

	def forward(self, x):
	if self.drop_prob == 0.0 or not self.training:
	return x
	keep_prob = 1 - self.drop_prob
	shape = (x.shape[0],) + (1,) * (x.ndim - 1)
	random_tensor = x.new_empty(shape).bernoulli_(keep_prob)
	if keep_prob > 0.0 and self.scale_by_keep:
	random_tensor.div_(keep_prob)
	return x * random_tensor

	def extra_repr(self):
	return f"drop_prob={round(self.drop_prob, 3):0.3f}"


	class ConvNeXtBlock(nn.Module):
	r"""ConvNeXt Block. There are two equivalent implementations:
	(1) DwConv -> LayerNorm (channels_first) -> 1x1 Conv -> GELU -> 1x1 Conv; all in (N, C, H, W)
	(2) DwConv -> Permute to (N, H, W, C); LayerNorm (channels_last) -> Linear -> GELU -> Linear; Permute back
	We use (2) as we find it slightly faster in PyTorch.

	Args:
	in_channels (int): Number of input channels.
	out_channels (int): Number of output channels.
	kernel_size (int): Size of the convolution kernel. Default: 7.
	padding (int): Padding size for the convolution. Default: 3.
	drop_path (float): Stochastic depth rate. Default: 0.1.
	layer_scale_init_value (float): Initial value for Layer Scale. Default: 1e-1.
	"""

	def __init__(
	self,
	in_channels,
	out_channels,
	kernel_size=7,
	padding=3,
	drop_path=0.1,
	layer_scale_init_value=1e-1,
	):
	super().__init__()
	self.dwconv = nn.Conv2d(
	in_channels,
	out_channels,
	kernel_size=kernel_size,
	padding=padding,
	groups=in_channels,
	padding_mode="replicate",
	) # depthwise conv
	self.norm = nn.functional.layer_norm
	self.pwconv1 = nn.Linear(out_channels, 4 * out_channels) # pointwise/1x1 convs, implemented with linear layers
	self.act = nn.GELU()
	self.pwconv2 = nn.Linear(4 * out_channels, in_channels)
	self.gamma = (
	nn.Parameter(layer_scale_init_value * torch.ones((out_channels)), requires_grad=True)
	if layer_scale_init_value > 0
	else None
	)
	self.drop_path = DropPath(drop_path) if drop_path > 0.0 else nn.Identity()

	def forward(self, x):
	input = x
	x = self.dwconv(x)
	x = self.norm(x, x.shape[1:])
	x = x.permute(0, 2, 3, 1) # (N, C, H, W) -> (N, H, W, C)
	x = self.pwconv1(x)
	x = self.act(x)
	x = self.pwconv2(x)
	if self.gamma is not None:
	x = self.gamma * x
	x = x.permute(0, 3, 1, 2) # (N, H, W, C) -> (N, C, H, W)

	x = input + self.drop_path(x)
	return x


	class TimeDownsamplingBlock(nn.Module):
	r"""Time Downsampling Block: LayerNorm -> 1x2 strided Conv -> GELU.

	Args:
	in_channels (int): Number of input channels.
	out_channels (int): Number of output channels.
	bias (bool): Whether to use bias in the convolution. Default: True.
	"""

	def __init__(self, in_channels, out_channels, bias=True):
	super().__init__()
	self.norm = nn.functional.layer_norm
	self.conv = nn.Conv2d(in_channels, out_channels, kernel_size=(1, 2), stride=(1, 2), bias=bias)
	self.act = nn.GELU()

	def forward(self, x):
	x = self.norm(x, x.shape[1:])
	x = self.conv(x)
	x = self.act(x)
	return x