Spaces:
Runtime error
Runtime error
| import copy | |
| import torch | |
| import numpy as np | |
| from scipy import signal | |
| from librosa.filters import mel | |
| from scipy.signal import get_window | |
| import torch | |
| import torch.nn as nn | |
| import torch.nn.functional as F | |
| def butter_highpass(cutoff, fs, order=5): | |
| nyq = 0.5 * fs | |
| normal_cutoff = cutoff / nyq | |
| b, a = signal.butter(order, normal_cutoff, btype='high', analog=False) | |
| return b, a | |
| def pySTFT(x, fft_length=1024, hop_length=256): | |
| x = np.pad(x, int(fft_length//2), mode='reflect') | |
| noverlap = fft_length - hop_length | |
| shape = x.shape[:-1]+((x.shape[-1]-noverlap)//hop_length, fft_length) | |
| strides = x.strides[:-1]+(hop_length*x.strides[-1], x.strides[-1]) | |
| result = np.lib.stride_tricks.as_strided(x, shape=shape, | |
| strides=strides) | |
| fft_window = get_window('hann', fft_length, fftbins=True) | |
| result = np.fft.rfft(fft_window * result, n=fft_length).T | |
| return np.abs(result) | |
| class LinearNorm(torch.nn.Module): | |
| def __init__(self, in_dim, out_dim, bias=True, w_init_gain='linear'): | |
| super(LinearNorm, self).__init__() | |
| self.linear_layer = torch.nn.Linear(in_dim, out_dim, bias=bias) | |
| torch.nn.init.xavier_uniform_( | |
| self.linear_layer.weight, | |
| gain=torch.nn.init.calculate_gain(w_init_gain)) | |
| def forward(self, x): | |
| return self.linear_layer(x) | |
| class ConvNorm(torch.nn.Module): | |
| def __init__(self, in_channels, out_channels, kernel_size=1, stride=1, | |
| padding=None, dilation=1, bias=True, w_init_gain='linear'): | |
| super(ConvNorm, self).__init__() | |
| if padding is None: | |
| assert(kernel_size % 2 == 1) | |
| padding = int(dilation * (kernel_size - 1) / 2) | |
| self.conv = torch.nn.Conv1d(in_channels, out_channels, | |
| kernel_size=kernel_size, stride=stride, | |
| padding=padding, dilation=dilation, | |
| bias=bias) | |
| torch.nn.init.xavier_uniform_( | |
| self.conv.weight, gain=torch.nn.init.calculate_gain(w_init_gain)) | |
| def forward(self, signal): | |
| conv_signal = self.conv(signal) | |
| return conv_signal | |
| def filter_bank_mean(num_rep, codes_mask, max_len_long): | |
| ''' | |
| num_rep (B, L) | |
| codes_mask (B, L) | |
| output: filterbank (B, L, max_len_fake) | |
| zero pad in codes must be real zero | |
| ''' | |
| num_rep = num_rep.unsqueeze(-1) # (B, L, 1) | |
| codes_mask = codes_mask.unsqueeze(-1) # (B, L, 1) | |
| num_rep = num_rep * codes_mask | |
| right_edge = num_rep.cumsum(dim=1) | |
| left_edge = torch.zeros_like(right_edge) | |
| left_edge[:, 1:, :] = right_edge[:, :-1, :] | |
| right_edge = right_edge.ceil() | |
| left_edge = left_edge.floor() | |
| index = torch.arange(1, max_len_long+1, device=num_rep.device).view(1, 1, -1) | |
| lower = index - left_edge | |
| right_edge_flip = max_len_long - right_edge | |
| upper = (index - right_edge_flip).flip(dims=(2,)) | |
| # triangular pooling | |
| fb = F.relu(torch.min(lower, upper)).float() | |
| # mean pooling | |
| fb = (fb > 0).float() | |
| norm = fb.sum(dim=-1, keepdim=True) | |
| norm[norm==0] = 1.0 | |
| fb = fb / norm | |
| return fb * codes_mask |