| | |
| | |
| | |
| | |
| | |
| | |
| | """Improved diffusion model architecture proposed in the paper |
| | "Analyzing and Improving the Training Dynamics of Diffusion Models".""" |
| |
|
| | import numpy as np |
| | import torch |
| |
|
| | |
| | |
| | |
| |
|
| | _constant_cache = dict() |
| |
|
| |
|
| | def constant(value, shape=None, dtype=None, device=None, memory_format=None): |
| | value = np.asarray(value) |
| | if shape is not None: |
| | shape = tuple(shape) |
| | if dtype is None: |
| | dtype = torch.get_default_dtype() |
| | if device is None: |
| | device = torch.device('cpu') |
| | if memory_format is None: |
| | memory_format = torch.contiguous_format |
| |
|
| | key = (value.shape, value.dtype, value.tobytes(), shape, dtype, device, memory_format) |
| | tensor = _constant_cache.get(key, None) |
| | if tensor is None: |
| | tensor = torch.as_tensor(value.copy(), dtype=dtype, device=device) |
| | if shape is not None: |
| | tensor, _ = torch.broadcast_tensors(tensor, torch.empty(shape)) |
| | tensor = tensor.contiguous(memory_format=memory_format) |
| | _constant_cache[key] = tensor |
| | return tensor |
| |
|
| |
|
| | def const_like(ref, value, shape=None, dtype=None, device=None, memory_format=None): |
| | if dtype is None: |
| | dtype = ref.dtype |
| | if device is None: |
| | device = ref.device |
| | return constant(value, shape=shape, dtype=dtype, device=device, memory_format=memory_format) |
| |
|
| |
|
| | |
| | |
| | |
| |
|
| |
|
| | def normalize(x, dim=None, eps=1e-4): |
| | if dim is None: |
| | dim = list(range(1, x.ndim)) |
| | norm = torch.linalg.vector_norm(x, dim=dim, keepdim=True, dtype=torch.float32) |
| | norm = torch.add(eps, norm, alpha=np.sqrt(norm.numel() / x.numel())) |
| | return x / norm.to(x.dtype) |
| |
|
| |
|
| | class Normalize(torch.nn.Module): |
| |
|
| | def __init__(self, dim=None, eps=1e-4): |
| | super().__init__() |
| | self.dim = dim |
| | self.eps = eps |
| |
|
| | def forward(self, x): |
| | return normalize(x, dim=self.dim, eps=self.eps) |
| |
|
| |
|
| | |
| | |
| | |
| |
|
| |
|
| | def resample(x, f=[1, 1], mode='keep'): |
| | if mode == 'keep': |
| | return x |
| | f = np.float32(f) |
| | assert f.ndim == 1 and len(f) % 2 == 0 |
| | pad = (len(f) - 1) // 2 |
| | f = f / f.sum() |
| | f = np.outer(f, f)[np.newaxis, np.newaxis, :, :] |
| | f = const_like(x, f) |
| | c = x.shape[1] |
| | if mode == 'down': |
| | return torch.nn.functional.conv2d(x, |
| | f.tile([c, 1, 1, 1]), |
| | groups=c, |
| | stride=2, |
| | padding=(pad, )) |
| | assert mode == 'up' |
| | return torch.nn.functional.conv_transpose2d(x, (f * 4).tile([c, 1, 1, 1]), |
| | groups=c, |
| | stride=2, |
| | padding=(pad, )) |
| |
|
| |
|
| | |
| | |
| |
|
| |
|
| | def mp_silu(x): |
| | return torch.nn.functional.silu(x) / 0.596 |
| |
|
| |
|
| | class MPSiLU(torch.nn.Module): |
| |
|
| | def forward(self, x): |
| | return mp_silu(x) |
| |
|
| |
|
| | |
| | |
| |
|
| |
|
| | def mp_sum(a, b, t=0.5): |
| | return a.lerp(b, t) / np.sqrt((1 - t)**2 + t**2) |
| |
|
| |
|
| | |
| | |
| |
|
| |
|
| | def mp_cat(a, b, dim=1, t=0.5): |
| | Na = a.shape[dim] |
| | Nb = b.shape[dim] |
| | C = np.sqrt((Na + Nb) / ((1 - t)**2 + t**2)) |
| | wa = C / np.sqrt(Na) * (1 - t) |
| | wb = C / np.sqrt(Nb) * t |
| | return torch.cat([wa * a, wb * b], dim=dim) |
| |
|
| |
|
| | |
| | |
| | |
| |
|
| |
|
| | class MPConv1D(torch.nn.Module): |
| |
|
| | def __init__(self, in_channels, out_channels, kernel_size): |
| | super().__init__() |
| | self.out_channels = out_channels |
| | self.weight = torch.nn.Parameter(torch.randn(out_channels, in_channels, kernel_size)) |
| |
|
| | self.weight_norm_removed = False |
| |
|
| | def forward(self, x, gain=1): |
| | assert self.weight_norm_removed, 'call remove_weight_norm() before inference' |
| |
|
| | w = self.weight * gain |
| | if w.ndim == 2: |
| | return x @ w.t() |
| | assert w.ndim == 3 |
| | return torch.nn.functional.conv1d(x, w, padding=(w.shape[-1] // 2, )) |
| |
|
| | def remove_weight_norm(self): |
| | w = self.weight.to(torch.float32) |
| | w = normalize(w) |
| | w = w / np.sqrt(w[0].numel()) |
| | w = w.to(self.weight.dtype) |
| | self.weight.data.copy_(w) |
| |
|
| | self.weight_norm_removed = True |
| | return self |
| |
|