Spaces:
Running
Running
Update preprocess_hubert_f0.py
Browse files- preprocess_hubert_f0.py +9 -48
preprocess_hubert_f0.py
CHANGED
|
@@ -7,12 +7,10 @@ from random import shuffle
|
|
| 7 |
import torch
|
| 8 |
from glob import glob
|
| 9 |
from tqdm import tqdm
|
| 10 |
-
from modules.mel_processing import spectrogram_torch
|
| 11 |
|
| 12 |
import utils
|
| 13 |
import logging
|
| 14 |
-
|
| 15 |
-
logging.getLogger("numba").setLevel(logging.WARNING)
|
| 16 |
import librosa
|
| 17 |
import numpy as np
|
| 18 |
|
|
@@ -26,47 +24,16 @@ def process_one(filename, hmodel):
|
|
| 26 |
wav, sr = librosa.load(filename, sr=sampling_rate)
|
| 27 |
soft_path = filename + ".soft.pt"
|
| 28 |
if not os.path.exists(soft_path):
|
| 29 |
-
|
| 30 |
wav16k = librosa.resample(wav, orig_sr=sampling_rate, target_sr=16000)
|
| 31 |
-
wav16k = torch.from_numpy(wav16k).to(
|
| 32 |
c = utils.get_hubert_content(hmodel, wav_16k_tensor=wav16k)
|
| 33 |
torch.save(c.cpu(), soft_path)
|
| 34 |
-
|
| 35 |
f0_path = filename + ".f0.npy"
|
| 36 |
if not os.path.exists(f0_path):
|
| 37 |
-
f0 = utils.compute_f0_dio(
|
| 38 |
-
wav, sampling_rate=sampling_rate, hop_length=hop_length
|
| 39 |
-
)
|
| 40 |
np.save(f0_path, f0)
|
| 41 |
|
| 42 |
-
spec_path = filename.replace(".wav", ".spec.pt")
|
| 43 |
-
if not os.path.exists(spec_path):
|
| 44 |
-
# Process spectrogram
|
| 45 |
-
# The following code can't be replaced by torch.FloatTensor(wav)
|
| 46 |
-
# because load_wav_to_torch return a tensor that need to be normalized
|
| 47 |
-
|
| 48 |
-
audio, sr = utils.load_wav_to_torch(filename)
|
| 49 |
-
if sr != hps.data.sampling_rate:
|
| 50 |
-
raise ValueError(
|
| 51 |
-
"{} SR doesn't match target {} SR".format(
|
| 52 |
-
sr, hps.data.sampling_rate
|
| 53 |
-
)
|
| 54 |
-
)
|
| 55 |
-
|
| 56 |
-
audio_norm = audio / hps.data.max_wav_value
|
| 57 |
-
audio_norm = audio_norm.unsqueeze(0)
|
| 58 |
-
|
| 59 |
-
spec = spectrogram_torch(
|
| 60 |
-
audio_norm,
|
| 61 |
-
hps.data.filter_length,
|
| 62 |
-
hps.data.sampling_rate,
|
| 63 |
-
hps.data.hop_length,
|
| 64 |
-
hps.data.win_length,
|
| 65 |
-
center=False,
|
| 66 |
-
)
|
| 67 |
-
spec = torch.squeeze(spec, 0)
|
| 68 |
-
torch.save(spec, spec_path)
|
| 69 |
-
|
| 70 |
|
| 71 |
def process_batch(filenames):
|
| 72 |
print("Loading hubert for content...")
|
|
@@ -79,23 +46,17 @@ def process_batch(filenames):
|
|
| 79 |
|
| 80 |
if __name__ == "__main__":
|
| 81 |
parser = argparse.ArgumentParser()
|
| 82 |
-
parser.add_argument(
|
| 83 |
-
"--in_dir", type=str, default="dataset/44k", help="path to input dir"
|
| 84 |
-
)
|
| 85 |
|
| 86 |
args = parser.parse_args()
|
| 87 |
-
filenames = glob(f
|
| 88 |
shuffle(filenames)
|
| 89 |
-
multiprocessing.set_start_method(
|
| 90 |
|
| 91 |
num_processes = 1
|
| 92 |
chunk_size = int(math.ceil(len(filenames) / num_processes))
|
| 93 |
-
chunks = [
|
| 94 |
-
filenames[i : i + chunk_size] for i in range(0, len(filenames), chunk_size)
|
| 95 |
-
]
|
| 96 |
print([len(c) for c in chunks])
|
| 97 |
-
processes = [
|
| 98 |
-
multiprocessing.Process(target=process_batch, args=(chunk,)) for chunk in chunks
|
| 99 |
-
]
|
| 100 |
for p in processes:
|
| 101 |
p.start()
|
|
|
|
| 7 |
import torch
|
| 8 |
from glob import glob
|
| 9 |
from tqdm import tqdm
|
|
|
|
| 10 |
|
| 11 |
import utils
|
| 12 |
import logging
|
| 13 |
+
logging.getLogger('numba').setLevel(logging.WARNING)
|
|
|
|
| 14 |
import librosa
|
| 15 |
import numpy as np
|
| 16 |
|
|
|
|
| 24 |
wav, sr = librosa.load(filename, sr=sampling_rate)
|
| 25 |
soft_path = filename + ".soft.pt"
|
| 26 |
if not os.path.exists(soft_path):
|
| 27 |
+
devive = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
| 28 |
wav16k = librosa.resample(wav, orig_sr=sampling_rate, target_sr=16000)
|
| 29 |
+
wav16k = torch.from_numpy(wav16k).to(devive)
|
| 30 |
c = utils.get_hubert_content(hmodel, wav_16k_tensor=wav16k)
|
| 31 |
torch.save(c.cpu(), soft_path)
|
|
|
|
| 32 |
f0_path = filename + ".f0.npy"
|
| 33 |
if not os.path.exists(f0_path):
|
| 34 |
+
f0 = utils.compute_f0_dio(wav, sampling_rate=sampling_rate, hop_length=hop_length)
|
|
|
|
|
|
|
| 35 |
np.save(f0_path, f0)
|
| 36 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 37 |
|
| 38 |
def process_batch(filenames):
|
| 39 |
print("Loading hubert for content...")
|
|
|
|
| 46 |
|
| 47 |
if __name__ == "__main__":
|
| 48 |
parser = argparse.ArgumentParser()
|
| 49 |
+
parser.add_argument("--in_dir", type=str, default="dataset/44k", help="path to input dir")
|
|
|
|
|
|
|
| 50 |
|
| 51 |
args = parser.parse_args()
|
| 52 |
+
filenames = glob(f'{args.in_dir}/*/*.wav', recursive=True) # [:10]
|
| 53 |
shuffle(filenames)
|
| 54 |
+
multiprocessing.set_start_method('spawn',force=True)
|
| 55 |
|
| 56 |
num_processes = 1
|
| 57 |
chunk_size = int(math.ceil(len(filenames) / num_processes))
|
| 58 |
+
chunks = [filenames[i:i + chunk_size] for i in range(0, len(filenames), chunk_size)]
|
|
|
|
|
|
|
| 59 |
print([len(c) for c in chunks])
|
| 60 |
+
processes = [multiprocessing.Process(target=process_batch, args=(chunk,)) for chunk in chunks]
|
|
|
|
|
|
|
| 61 |
for p in processes:
|
| 62 |
p.start()
|