victan
/

audio_seam

Model card Files Files and versions

audio_seam / augment.py

victan's picture

Upload augment.py with huggingface_hub

e102e80 about 2 years ago

history blame contribute delete

2.98 kB

	import argparse
	import os
	import subprocess

	import librosa
	import numpy as np
	import soundfile as sf
	from tqdm import tqdm

	from lib import dataset
	from lib import spec_utils


	if __name__ == '__main__':
	p = argparse.ArgumentParser()
	p.add_argument('--sr', '-r', type=int, default=44100)
	p.add_argument('--hop_length', '-l', type=int, default=1024)
	p.add_argument('--n_fft', '-f', type=int, default=2048)
	p.add_argument('--pitch', '-p', type=int, default=-1)
	p.add_argument('--mixtures', '-m', required=True)
	p.add_argument('--instruments', '-i', required=True)
	args = p.parse_args()

	input_i = 'input_i_{}.wav'.format(args.pitch)
	input_v = 'input_v_{}.wav'.format(args.pitch)
	output_i = 'output_i_{}.wav'.format(args.pitch)
	output_v = 'output_v_{}.wav'.format(args.pitch)
	cmd_i = 'soundstretch {} {} -pitch={}'.format(input_i, output_i, args.pitch)
	cmd_v = 'soundstretch {} {} -pitch={}'.format(input_v, output_v, args.pitch)
	cache_suffix = '_pitch{}.npy'.format(args.pitch)

	cache_dir = 'sr{}_hl{}_nf{}'.format(args.sr, args. hop_length, args.n_fft)
	mix_cache_dir = os.path.join(args.mixtures, cache_dir)
	inst_cache_dir = os.path.join(args.instruments, cache_dir)
	os.makedirs(mix_cache_dir, exist_ok=True)
	os.makedirs(inst_cache_dir, exist_ok=True)

	filelist = dataset.make_pair(args.mixtures, args.instruments)
	for mix_path, inst_path in tqdm(filelist):
	mix_basename = os.path.splitext(os.path.basename(mix_path))[0]
	mix_cache_path = os.path.join(mix_cache_dir, mix_basename + cache_suffix)

	inst_basename = os.path.splitext(os.path.basename(inst_path))[0]
	inst_cache_path = os.path.join(inst_cache_dir, inst_basename + cache_suffix)

	if os.path.exists(mix_cache_path) and os.path.exists(inst_cache_path):
	continue

	X, _ = librosa.load(
	mix_path, sr=args.sr, mono=False, dtype=np.float32, res_type='kaiser_fast')
	y, _ = librosa.load(
	inst_path, sr=args.sr, mono=False, dtype=np.float32, res_type='kaiser_fast')

	X, y = spec_utils.align_wave_head_and_tail(X, y, args.sr)
	v = X - y

	sf.write(input_i, y.T, args.sr)
	sf.write(input_v, v.T, args.sr)
	subprocess.call(cmd_i, stderr=subprocess.DEVNULL)
	subprocess.call(cmd_v, stderr=subprocess.DEVNULL)

	y, _ = librosa.load(
	output_i, sr=args.sr, mono=False, dtype=np.float32, res_type='kaiser_fast')
	v, _ = librosa.load(
	output_v, sr=args.sr, mono=False, dtype=np.float32, res_type='kaiser_fast')

	X = y + v

	spec = spec_utils.wave_to_spectrogram(X, args.hop_length, args.n_fft)
	np.save(mix_cache_path, spec)

	spec = spec_utils.wave_to_spectrogram(y, args.hop_length, args.n_fft)
	np.save(inst_cache_path, spec)

	os.remove(input_i)
	os.remove(input_v)
	os.remove(output_i)
	os.remove(output_v)