| import argparse | |
| from tqdm import tqdm | |
| from multiprocessing import Manager, Pool | |
| from scipy.io.wavfile import read | |
| from librosa.util import normalize | |
| import numpy as np | |
| import amfm_decompy.pYAAPT as pYAAPT | |
| import amfm_decompy.basic_tools as basic | |
| MAX_WAV_VALUE = 32768.0 | |
| parser = argparse.ArgumentParser(description="") | |
| parser.add_argument("tsv", help="") | |
| parser.add_argument("--extractor", choices=["crepe", "pyaapt"], default="pyaapt", help="") | |
| parser.add_argument("--interp", action="store_true", help="") | |
| parser.add_argument("--n_workers", type=int, default=40, help="") | |
| args = parser.parse_args() | |
| tsv_lines = open(args.tsv, "r").readlines() | |
| root, tsv_lines = tsv_lines[0].strip(), tsv_lines[1:] | |
| def extract_f0(tsv_line): | |
| wav_path, _ = tsv_line.split("\t") | |
| wav_path = root.strip() + "/" + wav_path | |
| sr, wav = read(wav_path) | |
| wav = wav / MAX_WAV_VALUE | |
| wav = normalize(wav) * 0.95 | |
| if args.extractor == "pyaapt": | |
| frame_length = 20.0 | |
| pad = int(frame_length / 1000 * sr) // 2 | |
| wav = np.pad(wav.squeeze(), (pad, pad), "constant", constant_values=0) | |
| signal = basic.SignalObj(wav, sr) | |
| pitch = pYAAPT.yaapt( | |
| signal, | |
| **{ | |
| 'frame_length': frame_length, | |
| 'frame_space': 5.0, | |
| 'nccf_thresh1': 0.25, | |
| 'tda_frame_length': 25.0 | |
| }) | |
| pitch = pitch.samp_interp[None, None, :] if args.interp else pitch.samp_values[None, None, :] | |
| pitch = pitch[0, 0] | |
| f0_path = wav_path.replace(".wav", ".yaapt") | |
| f0_path += ".interp.f0" if args.interp else ".f0" | |
| np.save(f0_path, pitch) | |
| def main(): | |
| with Pool(args.n_workers) as p: | |
| r = list(tqdm(p.imap(extract_f0, tsv_lines), total=len(tsv_lines))) | |
| if __name__ == "__main__": | |
| main() | |