| import torch |
| import torch.nn as nn |
|
|
|
|
| class RMSNorm(nn.Module): |
| def __init__( |
| self, |
| dim: int, |
| elementwise_affine=True, |
| eps: float = 1e-6, |
| device=None, |
| dtype=None, |
| ): |
| """ |
| Initialize the RMSNorm normalization layer. |
| |
| Args: |
| dim (int): The dimension of the input tensor. |
| eps (float, optional): A small value added to the denominator for numerical stability. Default is 1e-6. |
| |
| Attributes: |
| eps (float): A small value added to the denominator for numerical stability. |
| weight (nn.Parameter): Learnable scaling parameter. |
| |
| """ |
| factory_kwargs = {"device": device, "dtype": dtype} |
| super().__init__() |
| self.eps = eps |
| if elementwise_affine: |
| self.weight = nn.Parameter(torch.ones(dim, **factory_kwargs)) |
|
|
| def _norm(self, x): |
| """ |
| Apply the RMSNorm normalization to the input tensor. |
| |
| Args: |
| x (torch.Tensor): The input tensor. |
| |
| Returns: |
| torch.Tensor: The normalized tensor. |
| |
| """ |
| return x * torch.rsqrt(x.pow(2).mean(-1, keepdim=True) + self.eps) |
| |
| def forward(self, x): |
| """ |
| Forward pass through the RMSNorm layer. |
| |
| Args: |
| x (torch.Tensor): The input tensor. |
| |
| Returns: |
| torch.Tensor: The output tensor after applying RMSNorm. |
| |
| """ |
| output = self._norm(x.float()).type_as(x) |
| if hasattr(self, "weight"): |
| |
| |
| output = output * self.weight.to(output.dtype) |
| return output |
|
|
|
|
| def get_norm_layer(norm_layer): |
| """ |
| Get the normalization layer. |
| |
| Args: |
| norm_layer (str): The type of normalization layer. |
| |
| Returns: |
| norm_layer (nn.Module): The normalization layer. |
| """ |
| if norm_layer == "layer": |
| return nn.LayerNorm |
| elif norm_layer == "rms": |
| return RMSNorm |
| else: |
| raise NotImplementedError(f"Norm layer {norm_layer} is not implemented") |
|
|