|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| from .ncsnpp_utils import layers, layerspp, normalization
|
| import torch.nn as nn
|
| import functools
|
| import torch
|
| import numpy as np
|
|
|
| from .shared import BackboneRegistry
|
|
|
| ResnetBlockDDPM = layerspp.ResnetBlockDDPMpp
|
| ResnetBlockBigGAN = layerspp.ResnetBlockBigGANpp
|
| Combine = layerspp.Combine
|
| conv3x3 = layerspp.conv3x3
|
| conv1x1 = layerspp.conv1x1
|
| get_act = layers.get_act
|
| get_normalization = normalization.get_normalization
|
| default_initializer = layers.default_init
|
|
|
|
|
| @BackboneRegistry.register("ncsnpp")
|
| class NCSNpp(nn.Module):
|
| """NCSN++ model, adapted from https://github.com/yang-song/score_sde repository"""
|
|
|
| @staticmethod
|
| def add_argparse_args(parser):
|
| parser.add_argument("--ch_mult",type=int, nargs='+', default=[1,1,2,2,2,2,2])
|
| parser.add_argument("--num_res_blocks", type=int, default=2)
|
| parser.add_argument("--attn_resolutions", type=int, nargs='+', default=[16])
|
| parser.add_argument("--no-centered", dest="centered", action="store_false", help="The data is not centered [-1, 1]")
|
| parser.add_argument("--centered", dest="centered", action="store_true", help="The data is centered [-1, 1]")
|
| parser.set_defaults(centered=True)
|
| return parser
|
|
|
| def __init__(self,
|
| scale_by_sigma = True,
|
| nonlinearity = 'swish',
|
| nf = 128,
|
|
|
| ch_mult = (1, 1, 2, 2, 2, 2, 2),
|
| num_res_blocks = 2,
|
| attn_resolutions = (16,),
|
| resamp_with_conv = True,
|
| conditional = True,
|
| fir = True,
|
| fir_kernel = [1, 3, 3, 1],
|
| skip_rescale = True,
|
| resblock_type = 'biggan',
|
| progressive = 'output_skip',
|
| progressive_input = 'input_skip',
|
| progressive_combine = 'sum',
|
| init_scale = 0.,
|
| fourier_scale = 16,
|
| image_size = 256,
|
| embedding_type = 'fourier',
|
| dropout = .0,
|
| centered = True,
|
| **unused_kwargs
|
| ):
|
| super().__init__()
|
| self.act = act = get_act(nonlinearity)
|
|
|
| self.nf = nf = nf
|
| ch_mult = ch_mult
|
| self.num_res_blocks = num_res_blocks = num_res_blocks
|
| self.attn_resolutions = attn_resolutions = attn_resolutions
|
| dropout = dropout
|
| resamp_with_conv = resamp_with_conv
|
| self.num_resolutions = num_resolutions = len(ch_mult)
|
| self.all_resolutions = all_resolutions = [image_size // (2 ** i) for i in range(num_resolutions)]
|
|
|
| self.conditional = conditional = conditional
|
| self.centered = centered
|
| self.scale_by_sigma = scale_by_sigma
|
|
|
| fir = fir
|
| fir_kernel = fir_kernel
|
| self.skip_rescale = skip_rescale = skip_rescale
|
| self.resblock_type = resblock_type = resblock_type.lower()
|
| self.progressive = progressive = progressive.lower()
|
| self.progressive_input = progressive_input = progressive_input.lower()
|
| self.embedding_type = embedding_type = embedding_type.lower()
|
| init_scale = init_scale
|
| assert progressive in ['none', 'output_skip', 'residual']
|
| assert progressive_input in ['none', 'input_skip', 'residual']
|
| assert embedding_type in ['fourier', 'positional']
|
| combine_method = progressive_combine.lower()
|
| combiner = functools.partial(Combine, method=combine_method)
|
|
|
| num_channels = 4
|
| self.output_layer = nn.Conv2d(num_channels, 2, 1)
|
|
|
| modules = []
|
|
|
| if embedding_type == 'fourier':
|
|
|
| modules.append(layerspp.GaussianFourierProjection(
|
| embedding_size=nf, scale=fourier_scale
|
| ))
|
| embed_dim = 2 * nf
|
| elif embedding_type == 'positional':
|
| embed_dim = nf
|
| else:
|
| raise ValueError(f'embedding type {embedding_type} unknown.')
|
|
|
| if conditional:
|
| modules.append(nn.Linear(embed_dim, nf * 4))
|
| modules[-1].weight.data = default_initializer()(modules[-1].weight.shape)
|
| nn.init.zeros_(modules[-1].bias)
|
| modules.append(nn.Linear(nf * 4, nf * 4))
|
| modules[-1].weight.data = default_initializer()(modules[-1].weight.shape)
|
| nn.init.zeros_(modules[-1].bias)
|
|
|
| AttnBlock = functools.partial(layerspp.AttnBlockpp,
|
| init_scale=init_scale, skip_rescale=skip_rescale)
|
|
|
| Upsample = functools.partial(layerspp.Upsample,
|
| with_conv=resamp_with_conv, fir=fir, fir_kernel=fir_kernel)
|
|
|
| if progressive == 'output_skip':
|
| self.pyramid_upsample = layerspp.Upsample(fir=fir, fir_kernel=fir_kernel, with_conv=False)
|
| elif progressive == 'residual':
|
| pyramid_upsample = functools.partial(layerspp.Upsample, fir=fir,
|
| fir_kernel=fir_kernel, with_conv=True)
|
|
|
| Downsample = functools.partial(layerspp.Downsample, with_conv=resamp_with_conv, fir=fir, fir_kernel=fir_kernel)
|
|
|
| if progressive_input == 'input_skip':
|
| self.pyramid_downsample = layerspp.Downsample(fir=fir, fir_kernel=fir_kernel, with_conv=False)
|
| elif progressive_input == 'residual':
|
| pyramid_downsample = functools.partial(layerspp.Downsample,
|
| fir=fir, fir_kernel=fir_kernel, with_conv=True)
|
|
|
| if resblock_type == 'ddpm':
|
| ResnetBlock = functools.partial(ResnetBlockDDPM, act=act,
|
| dropout=dropout, init_scale=init_scale,
|
| skip_rescale=skip_rescale, temb_dim=nf * 4)
|
|
|
| elif resblock_type == 'biggan':
|
| ResnetBlock = functools.partial(ResnetBlockBigGAN, act=act,
|
| dropout=dropout, fir=fir, fir_kernel=fir_kernel,
|
| init_scale=init_scale, skip_rescale=skip_rescale, temb_dim=nf * 4)
|
|
|
| else:
|
| raise ValueError(f'resblock type {resblock_type} unrecognized.')
|
|
|
|
|
|
|
| channels = num_channels
|
| if progressive_input != 'none':
|
| input_pyramid_ch = channels
|
|
|
| modules.append(conv3x3(channels, nf))
|
| hs_c = [nf]
|
|
|
| in_ch = nf
|
| for i_level in range(num_resolutions):
|
|
|
| for i_block in range(num_res_blocks):
|
| out_ch = nf * ch_mult[i_level]
|
| modules.append(ResnetBlock(in_ch=in_ch, out_ch=out_ch))
|
| in_ch = out_ch
|
|
|
| if all_resolutions[i_level] in attn_resolutions:
|
| modules.append(AttnBlock(channels=in_ch))
|
| hs_c.append(in_ch)
|
|
|
| if i_level != num_resolutions - 1:
|
| if resblock_type == 'ddpm':
|
| modules.append(Downsample(in_ch=in_ch))
|
| else:
|
| modules.append(ResnetBlock(down=True, in_ch=in_ch))
|
|
|
| if progressive_input == 'input_skip':
|
| modules.append(combiner(dim1=input_pyramid_ch, dim2=in_ch))
|
| if combine_method == 'cat':
|
| in_ch *= 2
|
|
|
| elif progressive_input == 'residual':
|
| modules.append(pyramid_downsample(in_ch=input_pyramid_ch, out_ch=in_ch))
|
| input_pyramid_ch = in_ch
|
|
|
| hs_c.append(in_ch)
|
|
|
| in_ch = hs_c[-1]
|
| modules.append(ResnetBlock(in_ch=in_ch))
|
| modules.append(AttnBlock(channels=in_ch))
|
| modules.append(ResnetBlock(in_ch=in_ch))
|
|
|
| pyramid_ch = 0
|
|
|
| for i_level in reversed(range(num_resolutions)):
|
| for i_block in range(num_res_blocks + 1):
|
| out_ch = nf * ch_mult[i_level]
|
| modules.append(ResnetBlock(in_ch=in_ch + hs_c.pop(), out_ch=out_ch))
|
| in_ch = out_ch
|
|
|
| if all_resolutions[i_level] in attn_resolutions:
|
| modules.append(AttnBlock(channels=in_ch))
|
|
|
| if progressive != 'none':
|
| if i_level == num_resolutions - 1:
|
| if progressive == 'output_skip':
|
| modules.append(nn.GroupNorm(num_groups=min(in_ch // 4, 32),
|
| num_channels=in_ch, eps=1e-6))
|
| modules.append(conv3x3(in_ch, channels, init_scale=init_scale))
|
| pyramid_ch = channels
|
| elif progressive == 'residual':
|
| modules.append(nn.GroupNorm(num_groups=min(in_ch // 4, 32), num_channels=in_ch, eps=1e-6))
|
| modules.append(conv3x3(in_ch, in_ch, bias=True))
|
| pyramid_ch = in_ch
|
| else:
|
| raise ValueError(f'{progressive} is not a valid name.')
|
| else:
|
| if progressive == 'output_skip':
|
| modules.append(nn.GroupNorm(num_groups=min(in_ch // 4, 32),
|
| num_channels=in_ch, eps=1e-6))
|
| modules.append(conv3x3(in_ch, channels, bias=True, init_scale=init_scale))
|
| pyramid_ch = channels
|
| elif progressive == 'residual':
|
| modules.append(pyramid_upsample(in_ch=pyramid_ch, out_ch=in_ch))
|
| pyramid_ch = in_ch
|
| else:
|
| raise ValueError(f'{progressive} is not a valid name')
|
|
|
| if i_level != 0:
|
| if resblock_type == 'ddpm':
|
| modules.append(Upsample(in_ch=in_ch))
|
| else:
|
| modules.append(ResnetBlock(in_ch=in_ch, up=True))
|
|
|
| assert not hs_c
|
|
|
| if progressive != 'output_skip':
|
| modules.append(nn.GroupNorm(num_groups=min(in_ch // 4, 32),
|
| num_channels=in_ch, eps=1e-6))
|
| modules.append(conv3x3(in_ch, channels, init_scale=init_scale))
|
|
|
| self.all_modules = nn.ModuleList(modules)
|
|
|
|
|
| def forward(self, x, time_cond):
|
|
|
| modules = self.all_modules
|
| m_idx = 0
|
|
|
|
|
| x = torch.cat((x[:,[0],:,:].real, x[:,[0],:,:].imag,
|
| x[:,[1],:,:].real, x[:,[1],:,:].imag), dim=1)
|
|
|
| if self.embedding_type == 'fourier':
|
|
|
| used_sigmas = time_cond
|
| temb = modules[m_idx](torch.log(used_sigmas))
|
| m_idx += 1
|
|
|
| elif self.embedding_type == 'positional':
|
|
|
| timesteps = time_cond
|
| used_sigmas = self.sigmas[time_cond.long()]
|
| temb = layers.get_timestep_embedding(timesteps, self.nf)
|
|
|
| else:
|
| raise ValueError(f'embedding type {self.embedding_type} unknown.')
|
|
|
| if self.conditional:
|
| temb = modules[m_idx](temb)
|
| m_idx += 1
|
| temb = modules[m_idx](self.act(temb))
|
| m_idx += 1
|
| else:
|
| temb = None
|
|
|
| if not self.centered:
|
|
|
| x = 2 * x - 1.
|
|
|
|
|
| input_pyramid = None
|
| if self.progressive_input != 'none':
|
| input_pyramid = x
|
|
|
|
|
| hs = [modules[m_idx](x)]
|
| m_idx += 1
|
|
|
|
|
| for i_level in range(self.num_resolutions):
|
|
|
| for i_block in range(self.num_res_blocks):
|
| h = modules[m_idx](hs[-1], temb)
|
| m_idx += 1
|
|
|
| if h.shape[-2] in self.attn_resolutions:
|
| h = modules[m_idx](h)
|
| m_idx += 1
|
| hs.append(h)
|
|
|
|
|
| if i_level != self.num_resolutions - 1:
|
| if self.resblock_type == 'ddpm':
|
| h = modules[m_idx](hs[-1])
|
| m_idx += 1
|
| else:
|
| h = modules[m_idx](hs[-1], temb)
|
| m_idx += 1
|
|
|
| if self.progressive_input == 'input_skip':
|
| input_pyramid = self.pyramid_downsample(input_pyramid)
|
| h = modules[m_idx](input_pyramid, h)
|
| m_idx += 1
|
|
|
| elif self.progressive_input == 'residual':
|
| input_pyramid = modules[m_idx](input_pyramid)
|
| m_idx += 1
|
| if self.skip_rescale:
|
| input_pyramid = (input_pyramid + h) / np.sqrt(2.)
|
| else:
|
| input_pyramid = input_pyramid + h
|
| h = input_pyramid
|
| hs.append(h)
|
|
|
| h = hs[-1]
|
| h = modules[m_idx](h, temb)
|
| m_idx += 1
|
| h = modules[m_idx](h)
|
| m_idx += 1
|
| h = modules[m_idx](h, temb)
|
| m_idx += 1
|
|
|
| pyramid = None
|
|
|
|
|
| for i_level in reversed(range(self.num_resolutions)):
|
| for i_block in range(self.num_res_blocks + 1):
|
| h = modules[m_idx](torch.cat([h, hs.pop()], dim=1), temb)
|
| m_idx += 1
|
|
|
|
|
| if h.shape[-2] in self.attn_resolutions:
|
| h = modules[m_idx](h)
|
| m_idx += 1
|
|
|
| if self.progressive != 'none':
|
| if i_level == self.num_resolutions - 1:
|
| if self.progressive == 'output_skip':
|
| pyramid = self.act(modules[m_idx](h))
|
| m_idx += 1
|
| pyramid = modules[m_idx](pyramid)
|
| m_idx += 1
|
| elif self.progressive == 'residual':
|
| pyramid = self.act(modules[m_idx](h))
|
| m_idx += 1
|
| pyramid = modules[m_idx](pyramid)
|
| m_idx += 1
|
| else:
|
| raise ValueError(f'{self.progressive} is not a valid name.')
|
| else:
|
| if self.progressive == 'output_skip':
|
| pyramid = self.pyramid_upsample(pyramid)
|
| pyramid_h = self.act(modules[m_idx](h))
|
| m_idx += 1
|
| pyramid_h = modules[m_idx](pyramid_h)
|
| m_idx += 1
|
| pyramid = pyramid + pyramid_h
|
| elif self.progressive == 'residual':
|
| pyramid = modules[m_idx](pyramid)
|
| m_idx += 1
|
| if self.skip_rescale:
|
| pyramid = (pyramid + h) / np.sqrt(2.)
|
| else:
|
| pyramid = pyramid + h
|
| h = pyramid
|
| else:
|
| raise ValueError(f'{self.progressive} is not a valid name')
|
|
|
|
|
| if i_level != 0:
|
| if self.resblock_type == 'ddpm':
|
| h = modules[m_idx](h)
|
| m_idx += 1
|
| else:
|
| h = modules[m_idx](h, temb)
|
| m_idx += 1
|
|
|
| assert not hs
|
|
|
| if self.progressive == 'output_skip':
|
| h = pyramid
|
| else:
|
| h = self.act(modules[m_idx](h))
|
| m_idx += 1
|
| h = modules[m_idx](h)
|
| m_idx += 1
|
|
|
| assert m_idx == len(modules), "Implementation error"
|
| if self.scale_by_sigma:
|
| used_sigmas = used_sigmas.reshape((x.shape[0], *([1] * len(x.shape[1:]))))
|
| h = h / used_sigmas
|
|
|
|
|
| h = self.output_layer(h)
|
| h = torch.permute(h, (0, 2, 3, 1)).contiguous()
|
| h = torch.view_as_complex(h)[:,None, :, :]
|
| return h
|
|
|