Spaces:
Runtime error
Runtime error
| import torch.nn as nn | |
| from models.vq.resnet import Resnet1D, CausalResnet1D | |
| class CausalConv1d(nn.Module): | |
| def __init__(self, in_channels, out_channels, kernel_size, stride=1, dilation=1): | |
| super(CausalConv1d, self).__init__() | |
| self.pad = (kernel_size - 1) * dilation + (1 - stride) | |
| self.conv = nn.Conv1d( | |
| in_channels, | |
| out_channels, | |
| kernel_size, | |
| stride=stride, | |
| padding=0, # no padding here | |
| dilation=dilation | |
| ) | |
| def forward(self, x): | |
| x = nn.functional.pad(x, (self.pad, 0)) # only pad on the left | |
| return self.conv(x) | |
| class Encoder(nn.Module): | |
| def __init__(self, | |
| input_emb_width=3, | |
| output_emb_width=512, | |
| down_t=2, | |
| stride_t=2, | |
| width=512, | |
| depth=3, | |
| dilation_growth_rate=3, | |
| activation='relu', | |
| norm=None, | |
| causal=False): | |
| super().__init__() | |
| self.causal = causal | |
| blocks = [] | |
| filter_t, pad_t = stride_t * 2, stride_t // 2 | |
| # First convolution layer | |
| if causal: | |
| blocks.append(CausalConv1d(input_emb_width, width, 3, 1, 1)) | |
| else: | |
| blocks.append(nn.Conv1d(input_emb_width, width, 3, 1, 1)) | |
| blocks.append(nn.ReLU()) | |
| for i in range(down_t): | |
| input_dim = width | |
| # Downsampling convolution | |
| if causal: | |
| down_conv = CausalConv1d(input_dim, width, filter_t, stride_t, 1) | |
| else: | |
| down_conv = nn.Conv1d(input_dim, width, filter_t, stride_t, pad_t) | |
| block = nn.Sequential( | |
| down_conv, | |
| CausalResnet1D(width, depth, dilation_growth_rate, activation=activation, norm=norm) if causal else Resnet1D(width, depth, dilation_growth_rate, activation=activation, norm=norm), | |
| ) | |
| blocks.append(block) | |
| # Final convolution layer | |
| if causal: | |
| blocks.append(CausalConv1d(width, output_emb_width, 3, 1, 1)) | |
| else: | |
| blocks.append(nn.Conv1d(width, output_emb_width, 3, 1, 1)) | |
| self.model = nn.Sequential(*blocks) | |
| def forward(self, x): | |
| for layer in self.model: | |
| x = layer(x) | |
| return x | |
| class Decoder(nn.Module): | |
| def __init__(self, | |
| input_emb_width=3, | |
| output_emb_width=512, | |
| down_t=2, | |
| stride_t=2, | |
| width=512, | |
| depth=3, | |
| dilation_growth_rate=3, | |
| activation='relu', | |
| norm=None, | |
| causal=False): | |
| super().__init__() | |
| self.causal = causal | |
| blocks = [] | |
| # First convolution layer | |
| if causal: | |
| blocks.append(CausalConv1d(output_emb_width, width, 3, 1, 1)) | |
| else: | |
| blocks.append(nn.Conv1d(output_emb_width, width, 3, 1, 1)) | |
| blocks.append(nn.ReLU()) | |
| for i in range(down_t): | |
| out_dim = width | |
| # Upsampling convolution | |
| if causal: | |
| up_conv = CausalConv1d(width, out_dim, 3, 1, 1) | |
| else: | |
| up_conv = nn.Conv1d(width, out_dim, 3, 1, 1) | |
| block = nn.Sequential( | |
| CausalResnet1D(width, depth, dilation_growth_rate, reverse_dilation=True, activation=activation, norm=norm) if causal else Resnet1D(width, depth, dilation_growth_rate, reverse_dilation=True, activation=activation, norm=norm), | |
| nn.Upsample(scale_factor=2, mode='nearest'), | |
| up_conv | |
| ) | |
| blocks.append(block) | |
| # Final convolution layers | |
| if causal: | |
| blocks.append(CausalConv1d(width, width, 3, 1, 1)) | |
| else: | |
| blocks.append(nn.Conv1d(width, width, 3, 1, 1)) | |
| blocks.append(nn.ReLU()) | |
| if causal: | |
| blocks.append(CausalConv1d(width, input_emb_width, 3, 1, 1)) | |
| else: | |
| blocks.append(nn.Conv1d(width, input_emb_width, 3, 1, 1)) | |
| self.model = nn.Sequential(*blocks) | |
| def forward(self, x): | |
| x = self.model(x) | |
| return x.permute(0, 2, 1) |