| import torch |
| from tqdm import tqdm |
| from multiprocessing import Pool |
| from mel_processing import spectrogram_torch, mel_spectrogram_torch |
| from utils import load_wav_to_torch |
|
|
|
|
| class AudioProcessor: |
| def __init__( |
| self, |
| max_wav_value, |
| use_mel_spec_posterior, |
| filter_length, |
| n_mel_channels, |
| sampling_rate, |
| hop_length, |
| win_length, |
| mel_fmin, |
| mel_fmax, |
| ): |
| self.max_wav_value = max_wav_value |
| self.use_mel_spec_posterior = use_mel_spec_posterior |
| self.filter_length = filter_length |
| self.n_mel_channels = n_mel_channels |
| self.sampling_rate = sampling_rate |
| self.hop_length = hop_length |
| self.win_length = win_length |
| self.mel_fmin = mel_fmin |
| self.mel_fmax = mel_fmax |
|
|
| def process_audio(self, filename): |
| audio, sampling_rate = load_wav_to_torch(filename) |
| audio_norm = audio / self.max_wav_value |
| audio_norm = audio_norm.unsqueeze(0) |
| spec_filename = filename.replace(".wav", ".spec.pt") |
| if self.use_mel_spec_posterior: |
| spec_filename = spec_filename.replace(".spec.pt", ".mel.pt") |
| try: |
| spec = torch.load(spec_filename) |
| except: |
| if self.use_mel_spec_posterior: |
| spec = mel_spectrogram_torch( |
| audio_norm, |
| self.filter_length, |
| self.n_mel_channels, |
| self.sampling_rate, |
| self.hop_length, |
| self.win_length, |
| self.mel_fmin, |
| self.mel_fmax, |
| center=False, |
| ) |
| else: |
| spec = spectrogram_torch( |
| audio_norm, |
| self.filter_length, |
| self.sampling_rate, |
| self.hop_length, |
| self.win_length, |
| center=False, |
| ) |
| spec = torch.squeeze(spec, 0) |
| torch.save(spec, spec_filename) |
| return spec, audio_norm |
|
|
|
|
| |
| processor = AudioProcessor( |
| max_wav_value=32768.0, |
| use_mel_spec_posterior=False, |
| filter_length=2048, |
| n_mel_channels=128, |
| sampling_rate=44100, |
| hop_length=512, |
| win_length=2048, |
| mel_fmin=0.0, |
| mel_fmax="null", |
| ) |
|
|
| with open("filelists/train.list", "r") as f: |
| filepaths = [line.split("|")[0] for line in f] |
|
|
| |
| with Pool(processes=32) as pool: |
| with tqdm(total=len(filepaths)) as pbar: |
| for i, _ in enumerate(pool.imap_unordered(processor.process_audio, filepaths)): |
| pbar.update() |
|
|