mdiffae-v1 / m_diffae /norms.py
data-archetype's picture
Upload folder using huggingface_hub
128cb34 verified
"""Channel-wise RMSNorm for NCHW tensors."""
from __future__ import annotations
import torch
from torch import Tensor, nn
class ChannelWiseRMSNorm(nn.Module):
"""Channel-wise RMSNorm with float32 reduction for numerical stability.
Normalizes across channels per spatial position. Supports optional
per-channel affine weight and bias.
"""
def __init__(self, channels: int, eps: float = 1e-6, affine: bool = True) -> None:
super().__init__()
self.channels: int = int(channels)
self._eps: float = float(eps)
if affine:
self.weight = nn.Parameter(torch.ones(self.channels))
self.bias = nn.Parameter(torch.zeros(self.channels))
else:
self.register_parameter("weight", None)
self.register_parameter("bias", None)
def forward(self, x: Tensor) -> Tensor:
if x.dim() < 2:
return x
# Float32 accumulation for stability
ms = torch.mean(torch.square(x), dim=1, keepdim=True, dtype=torch.float32)
inv_rms = torch.rsqrt(ms + self._eps)
y = x * inv_rms
if self.weight is not None:
shape = (1, -1) + (1,) * (x.dim() - 2)
y = y * self.weight.view(shape).to(dtype=y.dtype)
if self.bias is not None:
y = y + self.bias.view(shape).to(dtype=y.dtype)
return y.to(dtype=x.dtype)