Spaces:

ASLP-lab
/

YingMusic-Singer-Plus

Running on Zero

YingMusic-Singer-Plus / src /YingMusicSinger /melody /Gconform.py

64ec292 22 days ago

8.98 kB

	import torch
	import torch.nn as nn
	import torch.nn.functional as F
	from einops import rearrange


	class GLU(nn.Module):
	def __init__(self, dim):
	super().__init__()
	self.dim = dim

	def forward(self, x):
	out, gate = x.chunk(2, dim=self.dim)

	return out * gate.sigmoid()


	class conform_conv(nn.Module):
	def __init__(
	self, channels: int, kernel_size: int = 31, DropoutL=0.1, bias: bool = True
	):
	super().__init__()
	self.act2 = nn.SiLU()
	self.act1 = GLU(1)

	self.pointwise_conv1 = nn.Conv1d(
	channels, 2 * channels, kernel_size=1, stride=1, padding=0, bias=bias
	)

	# self.lorder is used to distinguish if it's a causal convolution,
	# if self.lorder > 0:
	# it's a causal convolution, the input will be padded with
	# `self.lorder` frames on the left in forward (causal conv impl).
	# else: it's a symmetrical convolution

	assert (kernel_size - 1) % 2 == 0
	padding = (kernel_size - 1) // 2

	self.depthwise_conv = nn.Conv1d(
	channels,
	channels,
	kernel_size,
	stride=1,
	padding=padding,
	groups=channels,
	bias=bias,
	)

	self.norm = nn.BatchNorm1d(channels)

	self.pointwise_conv2 = nn.Conv1d(
	channels, channels, kernel_size=1, stride=1, padding=0, bias=bias
	)
	self.drop = nn.Dropout(DropoutL) if DropoutL > 0.0 else nn.Identity()

	def forward(self, x):
	x = x.transpose(1, 2)
	x = self.act1(self.pointwise_conv1(x))
	x = self.depthwise_conv(x)
	x = self.norm(x)
	x = self.act2(x)
	x = self.pointwise_conv2(x)
	return self.drop(x).transpose(1, 2)


	class Attention(nn.Module):
	def __init__(self, dim, heads=4, dim_head=32, conditiondim=None):
	super().__init__()
	if conditiondim is None:
	conditiondim = dim

	self.scale = dim_head**-0.5
	self.heads = heads
	hidden_dim = dim_head * heads
	self.to_q = nn.Linear(dim, hidden_dim, bias=False)
	self.to_kv = nn.Linear(conditiondim, hidden_dim * 2, bias=False)

	self.to_out = nn.Sequential(
	nn.Linear(
	hidden_dim,
	dim,
	),
	)

	def forward(self, q, kv=None, mask=None):
	# b, c, h, w = x.shape
	if kv is None:
	kv = q
	# q, kv = map(
	# lambda t: rearrange(t, "b c t -> b t c", ), (q, kv)
	# )

	q = self.to_q(q)
	k, v = self.to_kv(kv).chunk(2, dim=2)

	q, k, v = map(
	lambda t: rearrange(t, "b t (h c) -> b h t c", h=self.heads), (q, k, v)
	)

	if mask is not None:
	mask = mask.unsqueeze(1).unsqueeze(1)

	with torch.backends.cuda.sdp_kernel(enable_math=False):
	out = F.scaled_dot_product_attention(q, k, v, attn_mask=mask)

	out = rearrange(
	out,
	"b h t c -> b t (h c) ",
	h=self.heads,
	)
	return self.to_out(out)


	class conform_ffn(nn.Module):
	def __init__(self, dim, DropoutL1: float = 0.1, DropoutL2: float = 0.1):
	super().__init__()
	self.ln1 = nn.Linear(dim, dim * 4)
	self.ln2 = nn.Linear(dim * 4, dim)
	self.drop1 = nn.Dropout(DropoutL1) if DropoutL1 > 0.0 else nn.Identity()
	self.drop2 = nn.Dropout(DropoutL2) if DropoutL2 > 0.0 else nn.Identity()
	self.act = nn.SiLU()

	def forward(self, x):
	x = self.ln1(x)
	x = self.act(x)
	x = self.drop1(x)
	x = self.ln2(x)
	return self.drop2(x)


	class conform_blocke(nn.Module):
	def __init__(
	self,
	dim: int,
	kernel_size: int = 31,
	conv_drop: float = 0.1,
	ffn_latent_drop: float = 0.1,
	ffn_out_drop: float = 0.1,
	attention_drop: float = 0.1,
	attention_heads: int = 4,
	attention_heads_dim: int = 64,
	):
	super().__init__()
	self.ffn1 = conform_ffn(dim, ffn_latent_drop, ffn_out_drop)
	self.ffn2 = conform_ffn(dim, ffn_latent_drop, ffn_out_drop)
	self.att = Attention(dim, heads=attention_heads, dim_head=attention_heads_dim)
	self.attdrop = (
	nn.Dropout(attention_drop) if attention_drop > 0.0 else nn.Identity()
	)
	self.conv = conform_conv(
	dim,
	kernel_size=kernel_size,
	DropoutL=conv_drop,
	)
	self.norm1 = nn.LayerNorm(dim)
	self.norm2 = nn.LayerNorm(dim)
	self.norm3 = nn.LayerNorm(dim)
	self.norm4 = nn.LayerNorm(dim)
	self.norm5 = nn.LayerNorm(dim)

	def forward(
	self,
	x,
	mask=None,
	):
	x = self.ffn1(self.norm1(x)) * 0.5 + x

	x = self.attdrop(self.att(self.norm2(x), mask=mask)) + x
	x = self.conv(self.norm3(x)) + x
	x = self.ffn2(self.norm4(x)) * 0.5 + x
	return self.norm5(x)

	# return x


	class Gcf(nn.Module):
	def __init__(
	self,
	dim: int,
	kernel_size: int = 31,
	conv_drop: float = 0.1,
	ffn_latent_drop: float = 0.1,
	ffn_out_drop: float = 0.1,
	attention_drop: float = 0.1,
	attention_heads: int = 4,
	attention_heads_dim: int = 64,
	):
	super().__init__()
	self.att1 = conform_blocke(
	dim=dim,
	kernel_size=kernel_size,
	conv_drop=conv_drop,
	ffn_latent_drop=ffn_latent_drop,
	ffn_out_drop=ffn_out_drop,
	attention_drop=attention_drop,
	attention_heads=attention_heads,
	attention_heads_dim=attention_heads_dim,
	)
	self.att2 = conform_blocke(
	dim=dim,
	kernel_size=kernel_size,
	conv_drop=conv_drop,
	ffn_latent_drop=ffn_latent_drop,
	ffn_out_drop=ffn_out_drop,
	attention_drop=attention_drop,
	attention_heads=attention_heads,
	attention_heads_dim=attention_heads_dim,
	)
	self.glu1 = nn.Sequential(nn.Linear(dim, dim * 2), GLU(2))
	self.glu2 = nn.Sequential(nn.Linear(dim, dim * 2), GLU(2))

	def forward(self, midi, bound):
	midi = self.att1(midi)
	bound = self.att2(bound)
	midis = self.glu1(midi)
	bounds = self.glu2(bound)
	return midi + bounds, bound + midis


	class Gmidi_conform(nn.Module):
	def __init__(
	self,
	lay: int,
	dim: int,
	indim: int,
	outdim: int,
	use_lay_skip: bool,
	kernel_size: int = 31,
	conv_drop: float = 0.1,
	ffn_latent_drop: float = 0.1,
	ffn_out_drop: float = 0.1,
	attention_drop: float = 0.1,
	attention_heads: int = 4,
	attention_heads_dim: int = 64,
	):
	super().__init__()

	self.inln = nn.Linear(indim, dim)
	self.inln1 = nn.Linear(indim, dim)
	self.outln = nn.Linear(dim, outdim)
	self.cutheard = nn.Linear(dim, 1)
	# self.cutheard = nn.Linear(dim, outdim)
	self.lay = lay
	self.use_lay_skip = use_lay_skip
	self.cf_lay = nn.ModuleList(
	[
	Gcf(
	dim=dim,
	kernel_size=kernel_size,
	conv_drop=conv_drop,
	ffn_latent_drop=ffn_latent_drop,
	ffn_out_drop=ffn_out_drop,
	attention_drop=attention_drop,
	attention_heads=attention_heads,
	attention_heads_dim=attention_heads_dim,
	)
	for _ in range(lay)
	]
	)
	self.att1 = conform_blocke(
	dim=dim,
	kernel_size=kernel_size,
	conv_drop=conv_drop,
	ffn_latent_drop=ffn_latent_drop,
	ffn_out_drop=ffn_out_drop,
	attention_drop=attention_drop,
	attention_heads=attention_heads,
	attention_heads_dim=attention_heads_dim,
	)
	self.att2 = conform_blocke(
	dim=dim,
	kernel_size=kernel_size,
	conv_drop=conv_drop,
	ffn_latent_drop=ffn_latent_drop,
	ffn_out_drop=ffn_out_drop,
	attention_drop=attention_drop,
	attention_heads=attention_heads,
	attention_heads_dim=attention_heads_dim,
	)

	def forward(self, x, mask=None):
	x1 = x.clone()

	x = self.inln(x)
	x1 = self.inln1(x1)
	if mask is not None:
	x = x.masked_fill(~mask.unsqueeze(-1), 0)
	for idx, i in enumerate(self.cf_lay):
	x, x1 = i(x, x1)

	if mask is not None:
	x = x.masked_fill(~mask.unsqueeze(-1), 0)
	x, x1 = self.att1(x), self.att2(x1)

	cutprp = self.cutheard(x1)
	midiout = self.outln(x)

	return midiout, cutprp