Upload folder using huggingface_hub

ecf4035 verified 6 months ago

17.3 kB

	# coding=utf-8
	# Copyright 2020 The Google Research Authors.
	#
	# Licensed under the Apache License, Version 2.0 (the "License");
	# you may not use this file except in compliance with the License.
	# You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	# See the License for the specific language governing permissions and
	# limitations under the License.

	# pylint: skip-file

	from .ncsnpp_utils import layers, layerspp, normalization
	import torch.nn as nn
	import functools
	import torch
	import numpy as np

	from .shared import BackboneRegistry

	ResnetBlockDDPM = layerspp.ResnetBlockDDPMpp
	ResnetBlockBigGAN = layerspp.ResnetBlockBigGANpp
	Combine = layerspp.Combine
	conv3x3 = layerspp.conv3x3
	conv1x1 = layerspp.conv1x1
	get_act = layers.get_act
	get_normalization = normalization.get_normalization
	default_initializer = layers.default_init


	@BackboneRegistry.register("ncsnpp")
	class NCSNpp(nn.Module):
	"""NCSN++ model, adapted from https://github.com/yang-song/score_sde repository"""

	@staticmethod
	def add_argparse_args(parser):
	parser.add_argument("--ch_mult",type=int, nargs='+', default=[1,1,2,2,2,2,2])
	parser.add_argument("--num_res_blocks", type=int, default=2)
	parser.add_argument("--attn_resolutions", type=int, nargs='+', default=[16])
	parser.add_argument("--no-centered", dest="centered", action="store_false", help="The data is not centered [-1, 1]")
	parser.add_argument("--centered", dest="centered", action="store_true", help="The data is centered [-1, 1]")
	parser.set_defaults(centered=True)
	return parser

	def __init__(self,
	scale_by_sigma = True,
	nonlinearity = 'swish',
	nf = 128,
	# nf=96,
	ch_mult = (1, 1, 2, 2, 2, 2, 2),
	num_res_blocks = 2,
	attn_resolutions = (16,),
	resamp_with_conv = True,
	conditional = True,
	fir = True,
	fir_kernel = [1, 3, 3, 1],
	skip_rescale = True,
	resblock_type = 'biggan',
	progressive = 'output_skip',
	progressive_input = 'input_skip',
	progressive_combine = 'sum',
	init_scale = 0.,
	fourier_scale = 16,
	image_size = 256,
	embedding_type = 'fourier',
	dropout = .0,
	centered = True,
	**unused_kwargs
	):
	super().__init__()
	self.act = act = get_act(nonlinearity)

	self.nf = nf = nf
	ch_mult = ch_mult
	self.num_res_blocks = num_res_blocks = num_res_blocks
	self.attn_resolutions = attn_resolutions = attn_resolutions
	dropout = dropout
	resamp_with_conv = resamp_with_conv
	self.num_resolutions = num_resolutions = len(ch_mult)
	self.all_resolutions = all_resolutions = [image_size // (2 ** i) for i in range(num_resolutions)]

	self.conditional = conditional = conditional # noise-conditional
	self.centered = centered
	self.scale_by_sigma = scale_by_sigma

	fir = fir
	fir_kernel = fir_kernel
	self.skip_rescale = skip_rescale = skip_rescale
	self.resblock_type = resblock_type = resblock_type.lower()
	self.progressive = progressive = progressive.lower()
	self.progressive_input = progressive_input = progressive_input.lower()
	self.embedding_type = embedding_type = embedding_type.lower()
	init_scale = init_scale
	assert progressive in ['none', 'output_skip', 'residual']
	assert progressive_input in ['none', 'input_skip', 'residual']
	assert embedding_type in ['fourier', 'positional']
	combine_method = progressive_combine.lower()
	combiner = functools.partial(Combine, method=combine_method)

	num_channels = 4 # x.real, x.imag, y.real, y.imag
	self.output_layer = nn.Conv2d(num_channels, 2, 1)

	modules = []
	# timestep/noise_level embedding
	if embedding_type == 'fourier':
	# Gaussian Fourier features embeddings.
	modules.append(layerspp.GaussianFourierProjection(
	embedding_size=nf, scale=fourier_scale
	))
	embed_dim = 2 * nf
	elif embedding_type == 'positional':
	embed_dim = nf
	else:
	raise ValueError(f'embedding type {embedding_type} unknown.')

	if conditional:
	modules.append(nn.Linear(embed_dim, nf * 4))
	modules[-1].weight.data = default_initializer()(modules[-1].weight.shape)
	nn.init.zeros_(modules[-1].bias)
	modules.append(nn.Linear(nf * 4, nf * 4))
	modules[-1].weight.data = default_initializer()(modules[-1].weight.shape)
	nn.init.zeros_(modules[-1].bias)

	AttnBlock = functools.partial(layerspp.AttnBlockpp,
	init_scale=init_scale, skip_rescale=skip_rescale)

	Upsample = functools.partial(layerspp.Upsample,
	with_conv=resamp_with_conv, fir=fir, fir_kernel=fir_kernel)

	if progressive == 'output_skip':
	self.pyramid_upsample = layerspp.Upsample(fir=fir, fir_kernel=fir_kernel, with_conv=False)
	elif progressive == 'residual':
	pyramid_upsample = functools.partial(layerspp.Upsample, fir=fir,
	fir_kernel=fir_kernel, with_conv=True)

	Downsample = functools.partial(layerspp.Downsample, with_conv=resamp_with_conv, fir=fir, fir_kernel=fir_kernel)

	if progressive_input == 'input_skip':
	self.pyramid_downsample = layerspp.Downsample(fir=fir, fir_kernel=fir_kernel, with_conv=False)
	elif progressive_input == 'residual':
	pyramid_downsample = functools.partial(layerspp.Downsample,
	fir=fir, fir_kernel=fir_kernel, with_conv=True)

	if resblock_type == 'ddpm':
	ResnetBlock = functools.partial(ResnetBlockDDPM, act=act,
	dropout=dropout, init_scale=init_scale,
	skip_rescale=skip_rescale, temb_dim=nf * 4)

	elif resblock_type == 'biggan':
	ResnetBlock = functools.partial(ResnetBlockBigGAN, act=act,
	dropout=dropout, fir=fir, fir_kernel=fir_kernel,
	init_scale=init_scale, skip_rescale=skip_rescale, temb_dim=nf * 4)

	else:
	raise ValueError(f'resblock type {resblock_type} unrecognized.')

	# Downsampling block

	channels = num_channels
	if progressive_input != 'none':
	input_pyramid_ch = channels

	modules.append(conv3x3(channels, nf))
	hs_c = [nf]

	in_ch = nf
	for i_level in range(num_resolutions):
	# Residual blocks for this resolution
	for i_block in range(num_res_blocks):
	out_ch = nf * ch_mult[i_level]
	modules.append(ResnetBlock(in_ch=in_ch, out_ch=out_ch))
	in_ch = out_ch

	if all_resolutions[i_level] in attn_resolutions:
	modules.append(AttnBlock(channels=in_ch))
	hs_c.append(in_ch)

	if i_level != num_resolutions - 1:
	if resblock_type == 'ddpm':
	modules.append(Downsample(in_ch=in_ch))
	else:
	modules.append(ResnetBlock(down=True, in_ch=in_ch))

	if progressive_input == 'input_skip':
	modules.append(combiner(dim1=input_pyramid_ch, dim2=in_ch))
	if combine_method == 'cat':
	in_ch *= 2

	elif progressive_input == 'residual':
	modules.append(pyramid_downsample(in_ch=input_pyramid_ch, out_ch=in_ch))
	input_pyramid_ch = in_ch

	hs_c.append(in_ch)

	in_ch = hs_c[-1]
	modules.append(ResnetBlock(in_ch=in_ch))
	modules.append(AttnBlock(channels=in_ch))
	modules.append(ResnetBlock(in_ch=in_ch))

	pyramid_ch = 0
	# Upsampling block
	for i_level in reversed(range(num_resolutions)):
	for i_block in range(num_res_blocks + 1): # +1 blocks in upsampling because of skip connection from combiner (after downsampling)
	out_ch = nf * ch_mult[i_level]
	modules.append(ResnetBlock(in_ch=in_ch + hs_c.pop(), out_ch=out_ch))
	in_ch = out_ch

	if all_resolutions[i_level] in attn_resolutions:
	modules.append(AttnBlock(channels=in_ch))

	if progressive != 'none':
	if i_level == num_resolutions - 1:
	if progressive == 'output_skip':
	modules.append(nn.GroupNorm(num_groups=min(in_ch // 4, 32),
	num_channels=in_ch, eps=1e-6))
	modules.append(conv3x3(in_ch, channels, init_scale=init_scale))
	pyramid_ch = channels
	elif progressive == 'residual':
	modules.append(nn.GroupNorm(num_groups=min(in_ch // 4, 32), num_channels=in_ch, eps=1e-6))
	modules.append(conv3x3(in_ch, in_ch, bias=True))
	pyramid_ch = in_ch
	else:
	raise ValueError(f'{progressive} is not a valid name.')
	else:
	if progressive == 'output_skip':
	modules.append(nn.GroupNorm(num_groups=min(in_ch // 4, 32),
	num_channels=in_ch, eps=1e-6))
	modules.append(conv3x3(in_ch, channels, bias=True, init_scale=init_scale))
	pyramid_ch = channels
	elif progressive == 'residual':
	modules.append(pyramid_upsample(in_ch=pyramid_ch, out_ch=in_ch))
	pyramid_ch = in_ch
	else:
	raise ValueError(f'{progressive} is not a valid name')

	if i_level != 0:
	if resblock_type == 'ddpm':
	modules.append(Upsample(in_ch=in_ch))
	else:
	modules.append(ResnetBlock(in_ch=in_ch, up=True))

	assert not hs_c

	if progressive != 'output_skip':
	modules.append(nn.GroupNorm(num_groups=min(in_ch // 4, 32),
	num_channels=in_ch, eps=1e-6))
	modules.append(conv3x3(in_ch, channels, init_scale=init_scale))

	self.all_modules = nn.ModuleList(modules)


	def forward(self, x, time_cond):
	# timestep/noise_level embedding; only for continuous training
	modules = self.all_modules
	m_idx = 0

	# Convert real and imaginary parts of (x,y) into four channel dimensions
	x = torch.cat((x[:,[0],:,:].real, x[:,[0],:,:].imag,
	x[:,[1],:,:].real, x[:,[1],:,:].imag), dim=1)

	if self.embedding_type == 'fourier':
	# Gaussian Fourier features embeddings.
	used_sigmas = time_cond
	temb = modules[m_idx](torch.log(used_sigmas))
	m_idx += 1

	elif self.embedding_type == 'positional':
	# Sinusoidal positional embeddings.
	timesteps = time_cond
	used_sigmas = self.sigmas[time_cond.long()]
	temb = layers.get_timestep_embedding(timesteps, self.nf)

	else:
	raise ValueError(f'embedding type {self.embedding_type} unknown.')

	if self.conditional:
	temb = modules[m_idx](temb)
	m_idx += 1
	temb = modules[m_idx](self.act(temb))
	m_idx += 1
	else:
	temb = None

	if not self.centered:
	# If input data is in [0, 1]
	x = 2 * x - 1.

	# Downsampling block
	input_pyramid = None
	if self.progressive_input != 'none':
	input_pyramid = x

	# Input layer: Conv2d: 4ch -> 128ch
	hs = [modules[m_idx](x)]
	m_idx += 1

	# Down path in U-Net
	for i_level in range(self.num_resolutions):
	# Residual blocks for this resolution
	for i_block in range(self.num_res_blocks):
	h = modules[m_idx](hs[-1], temb)
	m_idx += 1
	# Attention layer (optional)
	if h.shape[-2] in self.attn_resolutions: # edit: check H dim (-2) not W dim (-1)
	h = modules[m_idx](h)
	m_idx += 1
	hs.append(h)

	# Downsampling
	if i_level != self.num_resolutions - 1:
	if self.resblock_type == 'ddpm':
	h = modules[m_idx](hs[-1])
	m_idx += 1
	else:
	h = modules[m_idx](hs[-1], temb)
	m_idx += 1

	if self.progressive_input == 'input_skip': # Combine h with x
	input_pyramid = self.pyramid_downsample(input_pyramid)
	h = modules[m_idx](input_pyramid, h)
	m_idx += 1

	elif self.progressive_input == 'residual':
	input_pyramid = modules[m_idx](input_pyramid)
	m_idx += 1
	if self.skip_rescale:
	input_pyramid = (input_pyramid + h) / np.sqrt(2.)
	else:
	input_pyramid = input_pyramid + h
	h = input_pyramid
	hs.append(h)

	h = hs[-1] # actualy equal to: h = h
	h = modules[m_idx](h, temb) # ResNet block
	m_idx += 1
	h = modules[m_idx](h) # Attention block
	m_idx += 1
	h = modules[m_idx](h, temb) # ResNet block
	m_idx += 1

	pyramid = None

	# Upsampling block
	for i_level in reversed(range(self.num_resolutions)):
	for i_block in range(self.num_res_blocks + 1):
	h = modules[m_idx](torch.cat([h, hs.pop()], dim=1), temb)
	m_idx += 1

	# edit: from -1 to -2
	if h.shape[-2] in self.attn_resolutions:
	h = modules[m_idx](h)
	m_idx += 1

	if self.progressive != 'none':
	if i_level == self.num_resolutions - 1:
	if self.progressive == 'output_skip':
	pyramid = self.act(modules[m_idx](h)) # GroupNorm
	m_idx += 1
	pyramid = modules[m_idx](pyramid) # Conv2D: 256 -> 4
	m_idx += 1
	elif self.progressive == 'residual':
	pyramid = self.act(modules[m_idx](h))
	m_idx += 1
	pyramid = modules[m_idx](pyramid)
	m_idx += 1
	else:
	raise ValueError(f'{self.progressive} is not a valid name.')
	else:
	if self.progressive == 'output_skip':
	pyramid = self.pyramid_upsample(pyramid) # Upsample
	pyramid_h = self.act(modules[m_idx](h)) # GroupNorm
	m_idx += 1
	pyramid_h = modules[m_idx](pyramid_h)
	m_idx += 1
	pyramid = pyramid + pyramid_h
	elif self.progressive == 'residual':
	pyramid = modules[m_idx](pyramid)
	m_idx += 1
	if self.skip_rescale:
	pyramid = (pyramid + h) / np.sqrt(2.)
	else:
	pyramid = pyramid + h
	h = pyramid
	else:
	raise ValueError(f'{self.progressive} is not a valid name')

	# Upsampling Layer
	if i_level != 0:
	if self.resblock_type == 'ddpm':
	h = modules[m_idx](h)
	m_idx += 1
	else:
	h = modules[m_idx](h, temb) # Upspampling
	m_idx += 1

	assert not hs

	if self.progressive == 'output_skip':
	h = pyramid
	else:
	h = self.act(modules[m_idx](h))
	m_idx += 1
	h = modules[m_idx](h)
	m_idx += 1

	assert m_idx == len(modules), "Implementation error"
	if self.scale_by_sigma:
	used_sigmas = used_sigmas.reshape((x.shape[0], ([1] len(x.shape[1:]))))
	h = h / used_sigmas

	# Convert back to complex number
	h = self.output_layer(h)
	h = torch.permute(h, (0, 2, 3, 1)).contiguous()
	h = torch.view_as_complex(h)[:,None, :, :]
	return h