File size: 1,199 Bytes
0b65cde
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
import os



## 01 step

# os.system(f"python resample.py")
# os.system(f"python preprocess_flist_config.py --speech_encoder vec768l12")

## 02 step

speech_encoder = "vec768l12" #@param ["vec768l12", "vec256l9", "hubertsoft", "whisper-ppg", "whisper-ppg-large"]

from pretrain.meta import download_dict
download_dict = download_dict()

url = download_dict[speech_encoder]["url"]
output = download_dict[speech_encoder]["output"]


if not os.path.exists(output):
  os.system(f"curl -L {url} -o {output}")


##03 step
f0_predictor = "crepe" #@param ["crepe", "pm", "dio", "harvest"]
use_diff = True #@param {type:"boolean"}

diff_param = ""
if use_diff:
  diff_param = "--use_diff"

  if not os.path.exists("./pretrain/nsf_hifigan/model"):    
    os.system(f"curl -L https://github.com/openvpi/vocoders/releases/download/nsf-hifigan-v1/nsf_hifigan_20221211.zip -o nsf_hifigan_20221211.zip")
    os.system(f"unzip nsf_hifigan_20221211.zip")
    os.system(f"rm -rf pretrain/nsf_hifigan")
    os.system(f"mv -v nsf_hifigan pretrain")
    os.system(f"python preprocess_hubert_f0.py --f0_predictor={f0_predictor} {diff_param}")
    

##04 step
os.system(f"python train.py -c configs/config.json -m 44k")