|
|
|
|
|
|
|
|
|
|
| """STFT-based Loss modules."""
|
| import librosa
|
| import torch
|
|
|
| from modules.parallel_wavegan.losses import LogSTFTMagnitudeLoss, SpectralConvergengeLoss, stft
|
|
|
|
|
| class STFTLoss(torch.nn.Module):
|
| """STFT loss module."""
|
|
|
| def __init__(self, fft_size=1024, shift_size=120, win_length=600, window="hann_window",
|
| use_mel_loss=False):
|
| """Initialize STFT loss module."""
|
| super(STFTLoss, self).__init__()
|
| self.fft_size = fft_size
|
| self.shift_size = shift_size
|
| self.win_length = win_length
|
| self.window = getattr(torch, window)(win_length)
|
| self.spectral_convergenge_loss = SpectralConvergengeLoss()
|
| self.log_stft_magnitude_loss = LogSTFTMagnitudeLoss()
|
| self.use_mel_loss = use_mel_loss
|
| self.mel_basis = None
|
|
|
| def forward(self, x, y):
|
| """Calculate forward propagation.
|
|
|
| Args:
|
| x (Tensor): Predicted signal (B, T).
|
| y (Tensor): Groundtruth signal (B, T).
|
|
|
| Returns:
|
| Tensor: Spectral convergence loss value.
|
| Tensor: Log STFT magnitude loss value.
|
|
|
| """
|
| x_mag = stft(x, self.fft_size, self.shift_size, self.win_length, self.window)
|
| y_mag = stft(y, self.fft_size, self.shift_size, self.win_length, self.window)
|
| if self.use_mel_loss:
|
| if self.mel_basis is None:
|
| self.mel_basis = torch.from_numpy(librosa.filters.mel(22050, self.fft_size, 80)).cuda().T
|
| x_mag = x_mag @ self.mel_basis
|
| y_mag = y_mag @ self.mel_basis
|
|
|
| sc_loss = self.spectral_convergenge_loss(x_mag, y_mag)
|
| mag_loss = self.log_stft_magnitude_loss(x_mag, y_mag)
|
|
|
| return sc_loss, mag_loss
|
|
|
|
|
| class MultiResolutionSTFTLoss(torch.nn.Module):
|
| """Multi resolution STFT loss module."""
|
|
|
| def __init__(self,
|
| fft_sizes=[1024, 2048, 512],
|
| hop_sizes=[120, 240, 50],
|
| win_lengths=[600, 1200, 240],
|
| window="hann_window",
|
| use_mel_loss=False):
|
| """Initialize Multi resolution STFT loss module.
|
|
|
| Args:
|
| fft_sizes (list): List of FFT sizes.
|
| hop_sizes (list): List of hop sizes.
|
| win_lengths (list): List of window lengths.
|
| window (str): Window function type.
|
|
|
| """
|
| super(MultiResolutionSTFTLoss, self).__init__()
|
| assert len(fft_sizes) == len(hop_sizes) == len(win_lengths)
|
| self.stft_losses = torch.nn.ModuleList()
|
| for fs, ss, wl in zip(fft_sizes, hop_sizes, win_lengths):
|
| self.stft_losses += [STFTLoss(fs, ss, wl, window, use_mel_loss)]
|
|
|
| def forward(self, x, y):
|
| """Calculate forward propagation.
|
|
|
| Args:
|
| x (Tensor): Predicted signal (B, T).
|
| y (Tensor): Groundtruth signal (B, T).
|
|
|
| Returns:
|
| Tensor: Multi resolution spectral convergence loss value.
|
| Tensor: Multi resolution log STFT magnitude loss value.
|
|
|
| """
|
| sc_loss = 0.0
|
| mag_loss = 0.0
|
| for f in self.stft_losses:
|
| sc_l, mag_l = f(x, y)
|
| sc_loss += sc_l
|
| mag_loss += mag_l
|
| sc_loss /= len(self.stft_losses)
|
| mag_loss /= len(self.stft_losses)
|
|
|
| return sc_loss, mag_loss
|
|
|