| import torch
|
| from tqdm import tqdm
|
| from multiprocessing import Pool
|
| from mel_processing import spectrogram_torch, mel_spectrogram_torch
|
| from utils import load_wav_to_torch
|
|
|
|
|
| class AudioProcessor:
|
| def __init__(
|
| self,
|
| max_wav_value,
|
| use_mel_spec_posterior,
|
| filter_length,
|
| n_mel_channels,
|
| sampling_rate,
|
| hop_length,
|
| win_length,
|
| mel_fmin,
|
| mel_fmax,
|
| ):
|
| self.max_wav_value = max_wav_value
|
| self.use_mel_spec_posterior = use_mel_spec_posterior
|
| self.filter_length = filter_length
|
| self.n_mel_channels = n_mel_channels
|
| self.sampling_rate = sampling_rate
|
| self.hop_length = hop_length
|
| self.win_length = win_length
|
| self.mel_fmin = mel_fmin
|
| self.mel_fmax = mel_fmax
|
|
|
| def process_audio(self, filename):
|
| audio, sampling_rate = load_wav_to_torch(filename)
|
| audio_norm = audio / self.max_wav_value
|
| audio_norm = audio_norm.unsqueeze(0)
|
| spec_filename = filename.replace(".wav", ".spec.pt")
|
| if self.use_mel_spec_posterior:
|
| spec_filename = spec_filename.replace(".spec.pt", ".mel.pt")
|
| try:
|
| spec = torch.load(spec_filename)
|
| except:
|
| if self.use_mel_spec_posterior:
|
| spec = mel_spectrogram_torch(
|
| audio_norm,
|
| self.filter_length,
|
| self.n_mel_channels,
|
| self.sampling_rate,
|
| self.hop_length,
|
| self.win_length,
|
| self.mel_fmin,
|
| self.mel_fmax,
|
| center=False,
|
| )
|
| else:
|
| spec = spectrogram_torch(
|
| audio_norm,
|
| self.filter_length,
|
| self.sampling_rate,
|
| self.hop_length,
|
| self.win_length,
|
| center=False,
|
| )
|
| spec = torch.squeeze(spec, 0)
|
| torch.save(spec, spec_filename)
|
| return spec, audio_norm
|
|
|
|
|
|
|
| processor = AudioProcessor(
|
| max_wav_value=32768.0,
|
| use_mel_spec_posterior=False,
|
| filter_length=2048,
|
| n_mel_channels=128,
|
| sampling_rate=44100,
|
| hop_length=512,
|
| win_length=2048,
|
| mel_fmin=0.0,
|
| mel_fmax="null",
|
| )
|
|
|
| with open("filelists/train.list", "r") as f:
|
| filepaths = [line.split("|")[0] for line in f]
|
|
|
|
|
| with Pool(processes=32) as pool:
|
| with tqdm(total=len(filepaths)) as pbar:
|
| for i, _ in enumerate(pool.imap_unordered(processor.process_audio, filepaths)):
|
| pbar.update()
|
|
|