Spaces:
Configuration error
Configuration error
Commit
·
632f309
1
Parent(s):
ef5bee9
Upload 11 files
Browse files- .gitattributes +1 -0
- app.py +116 -0
- checkpoints/0102_xiaoma_pe/config.yaml +172 -0
- checkpoints/0102_xiaoma_pe/model_ckpt_steps_60000.ckpt +3 -0
- checkpoints/Unnamed/config.yaml +445 -0
- checkpoints/Unnamed/config_nsf.yaml +445 -0
- checkpoints/Unnamed/lightning_logs/lastest/hparams.yaml +1 -0
- checkpoints/Unnamed/model_ckpt_steps_192000.ckpt +3 -0
- checkpoints/nsf_hifigan/NOTICE.txt +74 -0
- checkpoints/nsf_hifigan/NOTICE.zh-CN.txt +72 -0
- checkpoints/nsf_hifigan/config.json +38 -0
- checkpoints/nsf_hifigan/model +3 -0
.gitattributes
CHANGED
|
@@ -34,3 +34,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
| 34 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 35 |
results/test_output.wav filter=lfs diff=lfs merge=lfs -text
|
| 36 |
test_output.wav filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
| 34 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 35 |
results/test_output.wav filter=lfs diff=lfs merge=lfs -text
|
| 36 |
test_output.wav filter=lfs diff=lfs merge=lfs -text
|
| 37 |
+
checkpoints/nsf_hifigan/model filter=lfs diff=lfs merge=lfs -text
|
app.py
ADDED
|
@@ -0,0 +1,116 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import tkinter as tk
|
| 2 |
+
import tkinter.filedialog
|
| 3 |
+
import tkinter.ttk as ttk
|
| 4 |
+
import tkinter as tk
|
| 5 |
+
from tkinter import ttk
|
| 6 |
+
import wave
|
| 7 |
+
from utils.hparams import hparams
|
| 8 |
+
from preprocessing.data_gen_utils import get_pitch_parselmouth,get_pitch_crepe
|
| 9 |
+
import numpy as np
|
| 10 |
+
import IPython.display as ipd
|
| 11 |
+
import utils
|
| 12 |
+
import librosa
|
| 13 |
+
import torchcrepe
|
| 14 |
+
from infer import *
|
| 15 |
+
import logging
|
| 16 |
+
from infer_tools.infer_tool import *
|
| 17 |
+
from tkinter import Label
|
| 18 |
+
from time import sleep
|
| 19 |
+
import os
|
| 20 |
+
# Create the main window
|
| 21 |
+
window = tk.Tk()
|
| 22 |
+
|
| 23 |
+
# Set the window title and size
|
| 24 |
+
|
| 25 |
+
window.title("Diff-SVC Rendering Tool")
|
| 26 |
+
window.geometry("250x400")
|
| 27 |
+
|
| 28 |
+
# console Textbox
|
| 29 |
+
textbox = tk.Text(window)
|
| 30 |
+
textbox.grid(row=4, column=0, padx=20, pady=20, sticky="nsew")
|
| 31 |
+
|
| 32 |
+
loading_animation_label = Label(window)
|
| 33 |
+
loading_animation_label.grid(row=5, column=0)
|
| 34 |
+
|
| 35 |
+
# Set the column and row to stretch to fill the available space
|
| 36 |
+
window.grid_columnconfigure(0, weight=1)
|
| 37 |
+
window.grid_rowconfigure(4, weight=1)
|
| 38 |
+
|
| 39 |
+
button1 = ttk.Button(window, text="Load Model")
|
| 40 |
+
button1.grid(row=0, column=0, padx=20, pady=20)
|
| 41 |
+
|
| 42 |
+
pb = ttk.Progressbar(
|
| 43 |
+
window,
|
| 44 |
+
orient='horizontal',
|
| 45 |
+
mode='indeterminate',
|
| 46 |
+
length=250
|
| 47 |
+
)
|
| 48 |
+
def start():
|
| 49 |
+
pb.grid(column=0, row=5, padx=0, pady=0)
|
| 50 |
+
pb.start(10)
|
| 51 |
+
|
| 52 |
+
def stop():
|
| 53 |
+
pb.stop()
|
| 54 |
+
pb.grid_remove()
|
| 55 |
+
def button1_clicked():
|
| 56 |
+
filepath1 = tkinter.filedialog.askopenfilename(title = "Select CKPT File", filetypes=[("Checkpoint files", "*.ckpt")])
|
| 57 |
+
if filepath1 == '':
|
| 58 |
+
tkinter.messagebox.showerror("Error", "No CKPT file selected")
|
| 59 |
+
return
|
| 60 |
+
filepath2 = tkinter.filedialog.askopenfilename(title = "Select YAML File",filetypes=[("Yaml files", "*.yaml")])
|
| 61 |
+
if filepath2 == '':
|
| 62 |
+
tkinter.messagebox.showerror("Error", "No YAML file selected")
|
| 63 |
+
return
|
| 64 |
+
model_path = filepath1
|
| 65 |
+
config_path = filepath2
|
| 66 |
+
logging.getLogger('numba').setLevel(logging.WARNING)
|
| 67 |
+
start()
|
| 68 |
+
# Show a dialog box to input text
|
| 69 |
+
global project_name
|
| 70 |
+
project_name = tkinter.simpledialog.askstring("Input", "Enter project name:", parent=window)
|
| 71 |
+
if project_name == '':
|
| 72 |
+
tkinter.messagebox.showerror("Error", "No Project Name")
|
| 73 |
+
return
|
| 74 |
+
# Use the input text and the value of hubert_gpu as arguments when creating an instance of the Svc class
|
| 75 |
+
global svc_model
|
| 76 |
+
hubert_gpu = False
|
| 77 |
+
svc_model = Svc(project_name, config_path, hubert_gpu, model_path)
|
| 78 |
+
textbox.insert('end', 'model loaded\n')
|
| 79 |
+
stop()
|
| 80 |
+
|
| 81 |
+
|
| 82 |
+
# Assign the callback function to the button's "command" attribute
|
| 83 |
+
button1["command"] = button1_clicked
|
| 84 |
+
|
| 85 |
+
button2 = ttk.Button(window, text="Start Rendering")
|
| 86 |
+
button2.grid(row=1, column=0, padx=20, pady=20)
|
| 87 |
+
|
| 88 |
+
# Define a callback function for the second button
|
| 89 |
+
def button2_clicked():
|
| 90 |
+
# Open a file selection dialog for WAV files
|
| 91 |
+
filepath = tkinter.filedialog.askopenfilename(filetypes=[("WAV files", "*.wav")])
|
| 92 |
+
|
| 93 |
+
# Show a dialog box to input the "key" value
|
| 94 |
+
key = tkinter.simpledialog.askinteger("Input", "Enter key value:", parent=window)
|
| 95 |
+
textbox.insert('end', 'Rendering Started, please wait...\n')
|
| 96 |
+
start()
|
| 97 |
+
wav_gen = tkinter.simpledialog.askstring("Input", "Enter the track name:", parent=window)
|
| 98 |
+
if not wav_gen.endswith('.wav'):
|
| 99 |
+
wav_gen += '.wav'
|
| 100 |
+
wav_fn = filepath
|
| 101 |
+
demoaudio, sr = librosa.load(wav_fn)
|
| 102 |
+
pndm_speedup = 20
|
| 103 |
+
f0_tst, f0_pred, audio = run_clip(svc_model,file_path=wav_fn, key=key, acc=pndm_speedup, use_crepe=True, use_pe=True, thre=0.05,
|
| 104 |
+
use_gt_mel=False, add_noise_step=500,project_name=project_name,out_path=wav_gen)
|
| 105 |
+
time.sleep(2)
|
| 106 |
+
textbox.insert('end', 'Rendering process done!\nPlaying Audio now...')
|
| 107 |
+
os.startfile(wav_gen)
|
| 108 |
+
stop()
|
| 109 |
+
button2["command"] = button2_clicked
|
| 110 |
+
#Checkbox
|
| 111 |
+
hubert_gpu = tk.BooleanVar()
|
| 112 |
+
checkbox = tk.Checkbutton(window, text="Use GPU", variable=hubert_gpu)
|
| 113 |
+
checkbox.grid(row=3, column=0)
|
| 114 |
+
|
| 115 |
+
# Start the event loop
|
| 116 |
+
window.mainloop()
|
checkpoints/0102_xiaoma_pe/config.yaml
ADDED
|
@@ -0,0 +1,172 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
accumulate_grad_batches: 1
|
| 2 |
+
audio_num_mel_bins: 80
|
| 3 |
+
audio_sample_rate: 24000
|
| 4 |
+
base_config:
|
| 5 |
+
- configs/tts/lj/fs2.yaml
|
| 6 |
+
binarization_args:
|
| 7 |
+
shuffle: false
|
| 8 |
+
with_align: true
|
| 9 |
+
with_f0: true
|
| 10 |
+
with_f0cwt: true
|
| 11 |
+
with_spk_embed: true
|
| 12 |
+
with_txt: true
|
| 13 |
+
with_wav: false
|
| 14 |
+
binarizer_cls: data_gen.tts.base_binarizer.BaseBinarizer
|
| 15 |
+
binary_data_dir: data/binary/xiaoma1022_24k_128hop
|
| 16 |
+
check_val_every_n_epoch: 10
|
| 17 |
+
clip_grad_norm: 1
|
| 18 |
+
cwt_add_f0_loss: false
|
| 19 |
+
cwt_hidden_size: 128
|
| 20 |
+
cwt_layers: 2
|
| 21 |
+
cwt_loss: l1
|
| 22 |
+
cwt_std_scale: 0.8
|
| 23 |
+
debug: false
|
| 24 |
+
dec_ffn_kernel_size: 9
|
| 25 |
+
dec_layers: 4
|
| 26 |
+
decoder_type: fft
|
| 27 |
+
dict_dir: ''
|
| 28 |
+
dropout: 0.1
|
| 29 |
+
ds_workers: 4
|
| 30 |
+
dur_enc_hidden_stride_kernel:
|
| 31 |
+
- 0,2,3
|
| 32 |
+
- 0,2,3
|
| 33 |
+
- 0,1,3
|
| 34 |
+
dur_loss: mse
|
| 35 |
+
dur_predictor_kernel: 3
|
| 36 |
+
dur_predictor_layers: 2
|
| 37 |
+
enc_ffn_kernel_size: 9
|
| 38 |
+
enc_layers: 4
|
| 39 |
+
encoder_K: 8
|
| 40 |
+
encoder_type: fft
|
| 41 |
+
endless_ds: true
|
| 42 |
+
ffn_act: gelu
|
| 43 |
+
ffn_padding: SAME
|
| 44 |
+
fft_size: 512
|
| 45 |
+
fmax: 12000
|
| 46 |
+
fmin: 30
|
| 47 |
+
gen_dir_name: ''
|
| 48 |
+
hidden_size: 256
|
| 49 |
+
hop_size: 128
|
| 50 |
+
infer: false
|
| 51 |
+
lambda_commit: 0.25
|
| 52 |
+
lambda_energy: 0.1
|
| 53 |
+
lambda_f0: 1.0
|
| 54 |
+
lambda_ph_dur: 1.0
|
| 55 |
+
lambda_sent_dur: 1.0
|
| 56 |
+
lambda_uv: 1.0
|
| 57 |
+
lambda_word_dur: 1.0
|
| 58 |
+
load_ckpt: ''
|
| 59 |
+
log_interval: 100
|
| 60 |
+
loud_norm: false
|
| 61 |
+
lr: 2.0
|
| 62 |
+
max_epochs: 1000
|
| 63 |
+
max_eval_sentences: 1
|
| 64 |
+
max_eval_tokens: 60000
|
| 65 |
+
max_frames: 5000
|
| 66 |
+
max_input_tokens: 1550
|
| 67 |
+
max_sentences: 100000
|
| 68 |
+
max_tokens: 20000
|
| 69 |
+
max_updates: 60000
|
| 70 |
+
mel_loss: l1
|
| 71 |
+
mel_vmax: 1.5
|
| 72 |
+
mel_vmin: -6
|
| 73 |
+
min_level_db: -120
|
| 74 |
+
norm_type: gn
|
| 75 |
+
num_ckpt_keep: 3
|
| 76 |
+
num_heads: 2
|
| 77 |
+
num_sanity_val_steps: 5
|
| 78 |
+
num_spk: 1
|
| 79 |
+
num_test_samples: 20
|
| 80 |
+
num_valid_plots: 10
|
| 81 |
+
optimizer_adam_beta1: 0.9
|
| 82 |
+
optimizer_adam_beta2: 0.98
|
| 83 |
+
out_wav_norm: false
|
| 84 |
+
pitch_ar: false
|
| 85 |
+
pitch_enc_hidden_stride_kernel:
|
| 86 |
+
- 0,2,5
|
| 87 |
+
- 0,2,5
|
| 88 |
+
- 0,2,5
|
| 89 |
+
pitch_extractor_conv_layers: 2
|
| 90 |
+
pitch_loss: l1
|
| 91 |
+
pitch_norm: log
|
| 92 |
+
pitch_type: frame
|
| 93 |
+
pre_align_args:
|
| 94 |
+
allow_no_txt: false
|
| 95 |
+
denoise: false
|
| 96 |
+
forced_align: mfa
|
| 97 |
+
txt_processor: en
|
| 98 |
+
use_sox: false
|
| 99 |
+
use_tone: true
|
| 100 |
+
pre_align_cls: data_gen.tts.lj.pre_align.LJPreAlign
|
| 101 |
+
predictor_dropout: 0.5
|
| 102 |
+
predictor_grad: 0.1
|
| 103 |
+
predictor_hidden: -1
|
| 104 |
+
predictor_kernel: 5
|
| 105 |
+
predictor_layers: 2
|
| 106 |
+
prenet_dropout: 0.5
|
| 107 |
+
prenet_hidden_size: 256
|
| 108 |
+
pretrain_fs_ckpt: ''
|
| 109 |
+
processed_data_dir: data/processed/ljspeech
|
| 110 |
+
profile_infer: false
|
| 111 |
+
raw_data_dir: data/raw/LJSpeech-1.1
|
| 112 |
+
ref_norm_layer: bn
|
| 113 |
+
reset_phone_dict: true
|
| 114 |
+
save_best: false
|
| 115 |
+
save_ckpt: true
|
| 116 |
+
save_codes:
|
| 117 |
+
- configs
|
| 118 |
+
- modules
|
| 119 |
+
- tasks
|
| 120 |
+
- utils
|
| 121 |
+
- usr
|
| 122 |
+
save_f0: false
|
| 123 |
+
save_gt: false
|
| 124 |
+
seed: 1234
|
| 125 |
+
sort_by_len: true
|
| 126 |
+
stop_token_weight: 5.0
|
| 127 |
+
task_cls: tasks.tts.pe.PitchExtractionTask
|
| 128 |
+
test_ids:
|
| 129 |
+
- 68
|
| 130 |
+
- 70
|
| 131 |
+
- 74
|
| 132 |
+
- 87
|
| 133 |
+
- 110
|
| 134 |
+
- 172
|
| 135 |
+
- 190
|
| 136 |
+
- 215
|
| 137 |
+
- 231
|
| 138 |
+
- 294
|
| 139 |
+
- 316
|
| 140 |
+
- 324
|
| 141 |
+
- 402
|
| 142 |
+
- 422
|
| 143 |
+
- 485
|
| 144 |
+
- 500
|
| 145 |
+
- 505
|
| 146 |
+
- 508
|
| 147 |
+
- 509
|
| 148 |
+
- 519
|
| 149 |
+
test_input_dir: ''
|
| 150 |
+
test_num: 523
|
| 151 |
+
test_set_name: test
|
| 152 |
+
train_set_name: train
|
| 153 |
+
use_denoise: false
|
| 154 |
+
use_energy_embed: false
|
| 155 |
+
use_gt_dur: false
|
| 156 |
+
use_gt_f0: false
|
| 157 |
+
use_pitch_embed: true
|
| 158 |
+
use_pos_embed: true
|
| 159 |
+
use_spk_embed: false
|
| 160 |
+
use_spk_id: false
|
| 161 |
+
use_split_spk_id: false
|
| 162 |
+
use_uv: true
|
| 163 |
+
use_var_enc: false
|
| 164 |
+
val_check_interval: 2000
|
| 165 |
+
valid_num: 348
|
| 166 |
+
valid_set_name: valid
|
| 167 |
+
vocoder: pwg
|
| 168 |
+
vocoder_ckpt: ''
|
| 169 |
+
warmup_updates: 2000
|
| 170 |
+
weight_decay: 0
|
| 171 |
+
win_size: 512
|
| 172 |
+
work_dir: checkpoints/0102_xiaoma_pe
|
checkpoints/0102_xiaoma_pe/model_ckpt_steps_60000.ckpt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1863f12324e43783089ab933edeeb969106b851e30d71019ebbaa9b82099d82a
|
| 3 |
+
size 39141959
|
checkpoints/Unnamed/config.yaml
ADDED
|
@@ -0,0 +1,445 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
K_step: 1000
|
| 2 |
+
accumulate_grad_batches: 1
|
| 3 |
+
audio_num_mel_bins: 128
|
| 4 |
+
audio_sample_rate: 44100
|
| 5 |
+
binarization_args:
|
| 6 |
+
shuffle: false
|
| 7 |
+
with_align: true
|
| 8 |
+
with_f0: true
|
| 9 |
+
with_hubert: false
|
| 10 |
+
with_spk_embed: false
|
| 11 |
+
with_wav: false
|
| 12 |
+
binarizer_cls: preprocessing.SVCpre.SVCBinarizer
|
| 13 |
+
binary_data_dir: data/binary/Unnamed
|
| 14 |
+
check_val_every_n_epoch: 10
|
| 15 |
+
choose_test_manually: false
|
| 16 |
+
clip_grad_norm: 1
|
| 17 |
+
config_path: training/config_nsf.yaml
|
| 18 |
+
content_cond_steps: []
|
| 19 |
+
cwt_add_f0_loss: false
|
| 20 |
+
cwt_hidden_size: 128
|
| 21 |
+
cwt_layers: 2
|
| 22 |
+
cwt_loss: l1
|
| 23 |
+
cwt_std_scale: 0.8
|
| 24 |
+
datasets:
|
| 25 |
+
- opencpop
|
| 26 |
+
debug: false
|
| 27 |
+
dec_ffn_kernel_size: 9
|
| 28 |
+
dec_layers: 4
|
| 29 |
+
decay_steps: 40000
|
| 30 |
+
decoder_type: fft
|
| 31 |
+
dict_dir: ''
|
| 32 |
+
diff_decoder_type: wavenet
|
| 33 |
+
diff_loss_type: l2
|
| 34 |
+
dilation_cycle_length: 4
|
| 35 |
+
dropout: 0.1
|
| 36 |
+
ds_workers: 4
|
| 37 |
+
dur_enc_hidden_stride_kernel:
|
| 38 |
+
- 0,2,3
|
| 39 |
+
- 0,2,3
|
| 40 |
+
- 0,1,3
|
| 41 |
+
dur_loss: mse
|
| 42 |
+
dur_predictor_kernel: 3
|
| 43 |
+
dur_predictor_layers: 5
|
| 44 |
+
enc_ffn_kernel_size: 9
|
| 45 |
+
enc_layers: 4
|
| 46 |
+
encoder_K: 8
|
| 47 |
+
encoder_type: fft
|
| 48 |
+
endless_ds: false
|
| 49 |
+
f0_bin: 256
|
| 50 |
+
f0_max: 1100.0
|
| 51 |
+
f0_min: 40.0
|
| 52 |
+
ffn_act: gelu
|
| 53 |
+
ffn_padding: SAME
|
| 54 |
+
fft_size: 2048
|
| 55 |
+
fmax: 16000
|
| 56 |
+
fmin: 40
|
| 57 |
+
fs2_ckpt: ''
|
| 58 |
+
gaussian_start: true
|
| 59 |
+
gen_dir_name: ''
|
| 60 |
+
gen_tgt_spk_id: -1
|
| 61 |
+
hidden_size: 256
|
| 62 |
+
hop_size: 512
|
| 63 |
+
hubert_gpu: true
|
| 64 |
+
hubert_path: checkpoints/hubert/hubert_soft.pt
|
| 65 |
+
infer: false
|
| 66 |
+
keep_bins: 128
|
| 67 |
+
lambda_commit: 0.25
|
| 68 |
+
lambda_energy: 0.0
|
| 69 |
+
lambda_f0: 1.0
|
| 70 |
+
lambda_ph_dur: 0.3
|
| 71 |
+
lambda_sent_dur: 1.0
|
| 72 |
+
lambda_uv: 1.0
|
| 73 |
+
lambda_word_dur: 1.0
|
| 74 |
+
load_ckpt: ''
|
| 75 |
+
log_interval: 100
|
| 76 |
+
loud_norm: false
|
| 77 |
+
lr: 0.0008
|
| 78 |
+
max_beta: 0.02
|
| 79 |
+
max_epochs: 3000
|
| 80 |
+
max_eval_sentences: 1
|
| 81 |
+
max_eval_tokens: 60000
|
| 82 |
+
max_frames: 42000
|
| 83 |
+
max_input_tokens: 60000
|
| 84 |
+
max_sentences: 12
|
| 85 |
+
max_tokens: 128000
|
| 86 |
+
max_updates: 1000000
|
| 87 |
+
mel_loss: ssim:0.5|l1:0.5
|
| 88 |
+
mel_vmax: 1.5
|
| 89 |
+
mel_vmin: -6.0
|
| 90 |
+
min_level_db: -120
|
| 91 |
+
no_fs2: true
|
| 92 |
+
norm_type: gn
|
| 93 |
+
num_ckpt_keep: 10
|
| 94 |
+
num_heads: 2
|
| 95 |
+
num_sanity_val_steps: 1
|
| 96 |
+
num_spk: 1
|
| 97 |
+
num_test_samples: 0
|
| 98 |
+
num_valid_plots: 10
|
| 99 |
+
optimizer_adam_beta1: 0.9
|
| 100 |
+
optimizer_adam_beta2: 0.98
|
| 101 |
+
out_wav_norm: false
|
| 102 |
+
pe_ckpt: checkpoints/0102_xiaoma_pe/model_ckpt_steps_60000.ckpt
|
| 103 |
+
pe_enable: false
|
| 104 |
+
perform_enhance: true
|
| 105 |
+
pitch_ar: false
|
| 106 |
+
pitch_enc_hidden_stride_kernel:
|
| 107 |
+
- 0,2,5
|
| 108 |
+
- 0,2,5
|
| 109 |
+
- 0,2,5
|
| 110 |
+
pitch_extractor: parselmouth
|
| 111 |
+
pitch_loss: l2
|
| 112 |
+
pitch_norm: log
|
| 113 |
+
pitch_type: frame
|
| 114 |
+
pndm_speedup: 10
|
| 115 |
+
pre_align_args:
|
| 116 |
+
allow_no_txt: false
|
| 117 |
+
denoise: false
|
| 118 |
+
forced_align: mfa
|
| 119 |
+
txt_processor: zh_g2pM
|
| 120 |
+
use_sox: true
|
| 121 |
+
use_tone: false
|
| 122 |
+
pre_align_cls: data_gen.singing.pre_align.SingingPreAlign
|
| 123 |
+
predictor_dropout: 0.5
|
| 124 |
+
predictor_grad: 0.1
|
| 125 |
+
predictor_hidden: -1
|
| 126 |
+
predictor_kernel: 5
|
| 127 |
+
predictor_layers: 5
|
| 128 |
+
prenet_dropout: 0.5
|
| 129 |
+
prenet_hidden_size: 256
|
| 130 |
+
pretrain_fs_ckpt: ''
|
| 131 |
+
processed_data_dir: xxx
|
| 132 |
+
profile_infer: false
|
| 133 |
+
raw_data_dir: data/raw/Unnamed
|
| 134 |
+
ref_norm_layer: bn
|
| 135 |
+
rel_pos: true
|
| 136 |
+
reset_phone_dict: true
|
| 137 |
+
residual_channels: 384
|
| 138 |
+
residual_layers: 20
|
| 139 |
+
save_best: false
|
| 140 |
+
save_ckpt: true
|
| 141 |
+
save_codes:
|
| 142 |
+
- configs
|
| 143 |
+
- modules
|
| 144 |
+
- src
|
| 145 |
+
- utils
|
| 146 |
+
save_f0: true
|
| 147 |
+
save_gt: false
|
| 148 |
+
schedule_type: linear
|
| 149 |
+
seed: 1234
|
| 150 |
+
sort_by_len: true
|
| 151 |
+
speaker_id: Unnamed
|
| 152 |
+
spec_max:
|
| 153 |
+
- -0.025250941514968872
|
| 154 |
+
- 0.004534448496997356
|
| 155 |
+
- 0.5684943795204163
|
| 156 |
+
- 0.6527385115623474
|
| 157 |
+
- 0.659079372882843
|
| 158 |
+
- 0.7416915893554688
|
| 159 |
+
- 0.844637930393219
|
| 160 |
+
- 0.806076169013977
|
| 161 |
+
- 0.7238750457763672
|
| 162 |
+
- 0.9744535088539124
|
| 163 |
+
- 0.9476388692855835
|
| 164 |
+
- 0.9883336424827576
|
| 165 |
+
- 1.0821290016174316
|
| 166 |
+
- 1.046391248703003
|
| 167 |
+
- 0.9829667806625366
|
| 168 |
+
- 1.0163493156433105
|
| 169 |
+
- 0.9825412631034851
|
| 170 |
+
- 0.9834834337234497
|
| 171 |
+
- 1.052114725112915
|
| 172 |
+
- 1.128888726234436
|
| 173 |
+
- 1.186057209968567
|
| 174 |
+
- 1.112004280090332
|
| 175 |
+
- 1.1282787322998047
|
| 176 |
+
- 1.051572322845459
|
| 177 |
+
- 1.1104764938354492
|
| 178 |
+
- 1.176831603050232
|
| 179 |
+
- 1.13348388671875
|
| 180 |
+
- 0.9916292428970337
|
| 181 |
+
- 0.8383486270904541
|
| 182 |
+
- 0.7735869288444519
|
| 183 |
+
- 0.9303848743438721
|
| 184 |
+
- 1.1257890462875366
|
| 185 |
+
- 1.1610286235809326
|
| 186 |
+
- 1.0335885286331177
|
| 187 |
+
- 1.0645352602005005
|
| 188 |
+
- 1.0619306564331055
|
| 189 |
+
- 1.1310148239135742
|
| 190 |
+
- 1.1191954612731934
|
| 191 |
+
- 1.1307402849197388
|
| 192 |
+
- 1.2094721794128418
|
| 193 |
+
- 1.2683185338974
|
| 194 |
+
- 1.1045044660568237
|
| 195 |
+
- 1.0479614734649658
|
| 196 |
+
- 0.9491603374481201
|
| 197 |
+
- 0.9858523011207581
|
| 198 |
+
- 0.9226155281066895
|
| 199 |
+
- 0.9469702839851379
|
| 200 |
+
- 1.023751139640808
|
| 201 |
+
- 1.1348609924316406
|
| 202 |
+
- 1.087107539176941
|
| 203 |
+
- 0.962234377861023
|
| 204 |
+
- 0.8551340699195862
|
| 205 |
+
- 0.8397778272628784
|
| 206 |
+
- 0.8908605575561523
|
| 207 |
+
- 0.7986546158790588
|
| 208 |
+
- 0.7983465194702148
|
| 209 |
+
- 0.6965265274047852
|
| 210 |
+
- 0.689120352268219
|
| 211 |
+
- 0.6862147450447083
|
| 212 |
+
- 0.5631484985351562
|
| 213 |
+
- 0.48587048053741455
|
| 214 |
+
- 0.5326520800590515
|
| 215 |
+
- 0.4286036193370819
|
| 216 |
+
- 0.35252484679222107
|
| 217 |
+
- 0.3290073573589325
|
| 218 |
+
- 0.4754445552825928
|
| 219 |
+
- 0.3632410168647766
|
| 220 |
+
- 0.391481876373291
|
| 221 |
+
- 0.2200046181678772
|
| 222 |
+
- 0.1869768500328064
|
| 223 |
+
- 0.1539602279663086
|
| 224 |
+
- 0.07932852953672409
|
| 225 |
+
- 0.012834634631872177
|
| 226 |
+
- 0.16596835851669312
|
| 227 |
+
- 0.10024689882993698
|
| 228 |
+
- -0.023952053859829903
|
| 229 |
+
- 0.05635542422533035
|
| 230 |
+
- 0.10877621918916702
|
| 231 |
+
- 0.0382893942296505
|
| 232 |
+
- 0.07318088412284851
|
| 233 |
+
- 0.14075303077697754
|
| 234 |
+
- 0.057870157063007355
|
| 235 |
+
- -0.0520513579249382
|
| 236 |
+
- 0.1741427332162857
|
| 237 |
+
- -0.11154910922050476
|
| 238 |
+
- 0.03305494412779808
|
| 239 |
+
- -0.022758174687623978
|
| 240 |
+
- -0.05313302204012871
|
| 241 |
+
- 0.00024538111756555736
|
| 242 |
+
- -0.26880618929862976
|
| 243 |
+
- -0.0825519785284996
|
| 244 |
+
- -0.3040400445461273
|
| 245 |
+
- -0.44150036573410034
|
| 246 |
+
- -0.36957985162734985
|
| 247 |
+
- -0.438098281621933
|
| 248 |
+
- -0.49879470467567444
|
| 249 |
+
- -0.5903350710868835
|
| 250 |
+
- -0.6418567895889282
|
| 251 |
+
- -0.6425778865814209
|
| 252 |
+
- -0.6178902387619019
|
| 253 |
+
- -0.47356730699539185
|
| 254 |
+
- -0.6052739024162292
|
| 255 |
+
- -0.5359307527542114
|
| 256 |
+
- -0.5759448409080505
|
| 257 |
+
- -0.5498068332672119
|
| 258 |
+
- -0.4661938548088074
|
| 259 |
+
- -0.5811225771903992
|
| 260 |
+
- -0.5229856967926025
|
| 261 |
+
- -0.3902229070663452
|
| 262 |
+
- -0.7037366032600403
|
| 263 |
+
- -0.7260795831680298
|
| 264 |
+
- -0.7540019750595093
|
| 265 |
+
- -0.828707754611969
|
| 266 |
+
- -0.8374698758125305
|
| 267 |
+
- -0.8328713178634644
|
| 268 |
+
- -0.9081047177314758
|
| 269 |
+
- -0.9679695963859558
|
| 270 |
+
- -0.9587443470954895
|
| 271 |
+
- -1.0706337690353394
|
| 272 |
+
- -0.9818469285964966
|
| 273 |
+
- -0.8360191583633423
|
| 274 |
+
- -0.9938982725143433
|
| 275 |
+
- -1.0823708772659302
|
| 276 |
+
- -1.0617167949676514
|
| 277 |
+
- -1.1093820333480835
|
| 278 |
+
- -1.1300138235092163
|
| 279 |
+
- -1.2141350507736206
|
| 280 |
+
- -1.3147293329238892
|
| 281 |
+
spec_min:
|
| 282 |
+
- -4.473258972167969
|
| 283 |
+
- -4.296891689300537
|
| 284 |
+
- -4.390527725219727
|
| 285 |
+
- -4.350704669952393
|
| 286 |
+
- -4.446024417877197
|
| 287 |
+
- -4.3960185050964355
|
| 288 |
+
- -4.164802551269531
|
| 289 |
+
- -4.5063300132751465
|
| 290 |
+
- -4.608232021331787
|
| 291 |
+
- -4.251623630523682
|
| 292 |
+
- -4.4799604415893555
|
| 293 |
+
- -4.733210563659668
|
| 294 |
+
- -4.411860466003418
|
| 295 |
+
- -4.609100818634033
|
| 296 |
+
- -4.726972579956055
|
| 297 |
+
- -4.497627258300781
|
| 298 |
+
- -4.487612247467041
|
| 299 |
+
- -4.665065765380859
|
| 300 |
+
- -4.480506896972656
|
| 301 |
+
- -4.589383125305176
|
| 302 |
+
- -4.86366605758667
|
| 303 |
+
- -4.5183892250061035
|
| 304 |
+
- -4.816161632537842
|
| 305 |
+
- -4.906436443328857
|
| 306 |
+
- -4.897279262542725
|
| 307 |
+
- -4.431278705596924
|
| 308 |
+
- -4.999994277954102
|
| 309 |
+
- -4.871325969696045
|
| 310 |
+
- -4.527368068695068
|
| 311 |
+
- -4.872085094451904
|
| 312 |
+
- -4.894851207733154
|
| 313 |
+
- -4.511948585510254
|
| 314 |
+
- -4.534575939178467
|
| 315 |
+
- -4.57792854309082
|
| 316 |
+
- -4.444681644439697
|
| 317 |
+
- -4.996480464935303
|
| 318 |
+
- -4.74341344833374
|
| 319 |
+
- -4.85427713394165
|
| 320 |
+
- -4.723776817321777
|
| 321 |
+
- -4.7166008949279785
|
| 322 |
+
- -4.749168395996094
|
| 323 |
+
- -4.67240047454834
|
| 324 |
+
- -4.590690612792969
|
| 325 |
+
- -4.576009750366211
|
| 326 |
+
- -4.542308330535889
|
| 327 |
+
- -4.890907287597656
|
| 328 |
+
- -4.631724834442139
|
| 329 |
+
- -4.494126796722412
|
| 330 |
+
- -4.499763488769531
|
| 331 |
+
- -4.574635028839111
|
| 332 |
+
- -4.49362850189209
|
| 333 |
+
- -4.651009559631348
|
| 334 |
+
- -4.684722900390625
|
| 335 |
+
- -4.594520568847656
|
| 336 |
+
- -4.5510125160217285
|
| 337 |
+
- -4.616012096405029
|
| 338 |
+
- -4.561031341552734
|
| 339 |
+
- -4.633460521697998
|
| 340 |
+
- -4.541748046875
|
| 341 |
+
- -4.625052452087402
|
| 342 |
+
- -4.524572372436523
|
| 343 |
+
- -4.563175201416016
|
| 344 |
+
- -4.515830039978027
|
| 345 |
+
- -4.581448554992676
|
| 346 |
+
- -4.556764125823975
|
| 347 |
+
- -4.695038795471191
|
| 348 |
+
- -4.548621654510498
|
| 349 |
+
- -4.5828471183776855
|
| 350 |
+
- -4.750834941864014
|
| 351 |
+
- -4.569651126861572
|
| 352 |
+
- -4.577111721038818
|
| 353 |
+
- -4.549272537231445
|
| 354 |
+
- -4.5840277671813965
|
| 355 |
+
- -4.574136257171631
|
| 356 |
+
- -4.574832439422607
|
| 357 |
+
- -4.549546718597412
|
| 358 |
+
- -4.490700721740723
|
| 359 |
+
- -4.635391712188721
|
| 360 |
+
- -4.567677974700928
|
| 361 |
+
- -4.516189098358154
|
| 362 |
+
- -4.6232805252075195
|
| 363 |
+
- -4.592589855194092
|
| 364 |
+
- -4.593951225280762
|
| 365 |
+
- -4.557478904724121
|
| 366 |
+
- -4.503338813781738
|
| 367 |
+
- -4.512742519378662
|
| 368 |
+
- -4.515079498291016
|
| 369 |
+
- -4.531710147857666
|
| 370 |
+
- -4.5540852546691895
|
| 371 |
+
- -4.441158771514893
|
| 372 |
+
- -4.489132404327393
|
| 373 |
+
- -4.519915580749512
|
| 374 |
+
- -4.570128917694092
|
| 375 |
+
- -4.480836391448975
|
| 376 |
+
- -4.494598865509033
|
| 377 |
+
- -4.51900053024292
|
| 378 |
+
- -4.518474578857422
|
| 379 |
+
- -4.519540309906006
|
| 380 |
+
- -4.495180130004883
|
| 381 |
+
- -4.471179962158203
|
| 382 |
+
- -4.478188514709473
|
| 383 |
+
- -4.475483417510986
|
| 384 |
+
- -4.479583263397217
|
| 385 |
+
- -4.491953372955322
|
| 386 |
+
- -4.4339680671691895
|
| 387 |
+
- -4.469926834106445
|
| 388 |
+
- -4.46633768081665
|
| 389 |
+
- -4.468038082122803
|
| 390 |
+
- -4.489401817321777
|
| 391 |
+
- -4.472512722015381
|
| 392 |
+
- -4.43712043762207
|
| 393 |
+
- -4.469909191131592
|
| 394 |
+
- -4.475585460662842
|
| 395 |
+
- -4.460614204406738
|
| 396 |
+
- -4.4658942222595215
|
| 397 |
+
- -4.4960408210754395
|
| 398 |
+
- -4.499384880065918
|
| 399 |
+
- -4.4431610107421875
|
| 400 |
+
- -4.440634727478027
|
| 401 |
+
- -4.468203544616699
|
| 402 |
+
- -4.461722373962402
|
| 403 |
+
- -4.503596305847168
|
| 404 |
+
- -4.457762241363525
|
| 405 |
+
- -4.453769207000732
|
| 406 |
+
- -4.509873390197754
|
| 407 |
+
- -4.505057334899902
|
| 408 |
+
- -4.486324787139893
|
| 409 |
+
- -4.49452018737793
|
| 410 |
+
spk_cond_steps: []
|
| 411 |
+
stop_token_weight: 5.0
|
| 412 |
+
task_cls: training.task.SVC_task.SVCTask
|
| 413 |
+
test_ids: []
|
| 414 |
+
test_input_dir: ''
|
| 415 |
+
test_num: 0
|
| 416 |
+
test_prefixes:
|
| 417 |
+
- test
|
| 418 |
+
test_set_name: test
|
| 419 |
+
timesteps: 1000
|
| 420 |
+
train_set_name: train
|
| 421 |
+
use_crepe: true
|
| 422 |
+
use_denoise: false
|
| 423 |
+
use_energy_embed: false
|
| 424 |
+
use_gt_dur: false
|
| 425 |
+
use_gt_f0: false
|
| 426 |
+
use_midi: false
|
| 427 |
+
use_nsf: true
|
| 428 |
+
use_pitch_embed: true
|
| 429 |
+
use_pos_embed: true
|
| 430 |
+
use_spk_embed: false
|
| 431 |
+
use_spk_id: false
|
| 432 |
+
use_split_spk_id: false
|
| 433 |
+
use_uv: false
|
| 434 |
+
use_var_enc: false
|
| 435 |
+
use_vec: false
|
| 436 |
+
val_check_interval: 2000
|
| 437 |
+
valid_num: 0
|
| 438 |
+
valid_set_name: valid
|
| 439 |
+
vocoder: network.vocoders.nsf_hifigan.NsfHifiGAN
|
| 440 |
+
vocoder_ckpt: checkpoints/nsf_hifigan/model
|
| 441 |
+
warmup_updates: 2000
|
| 442 |
+
wav2spec_eps: 1e-6
|
| 443 |
+
weight_decay: 0
|
| 444 |
+
win_size: 2048
|
| 445 |
+
work_dir: checkpoints/Unnamed
|
checkpoints/Unnamed/config_nsf.yaml
ADDED
|
@@ -0,0 +1,445 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
K_step: 1000
|
| 2 |
+
accumulate_grad_batches: 1
|
| 3 |
+
audio_num_mel_bins: 128
|
| 4 |
+
audio_sample_rate: 44100
|
| 5 |
+
binarization_args:
|
| 6 |
+
shuffle: false
|
| 7 |
+
with_align: true
|
| 8 |
+
with_f0: true
|
| 9 |
+
with_hubert: true
|
| 10 |
+
with_spk_embed: false
|
| 11 |
+
with_wav: false
|
| 12 |
+
binarizer_cls: preprocessing.SVCpre.SVCBinarizer
|
| 13 |
+
binary_data_dir: data/binary/Unnamed
|
| 14 |
+
check_val_every_n_epoch: 10
|
| 15 |
+
choose_test_manually: false
|
| 16 |
+
clip_grad_norm: 1
|
| 17 |
+
config_path: training/config_nsf.yaml
|
| 18 |
+
content_cond_steps: []
|
| 19 |
+
cwt_add_f0_loss: false
|
| 20 |
+
cwt_hidden_size: 128
|
| 21 |
+
cwt_layers: 2
|
| 22 |
+
cwt_loss: l1
|
| 23 |
+
cwt_std_scale: 0.8
|
| 24 |
+
datasets:
|
| 25 |
+
- opencpop
|
| 26 |
+
debug: false
|
| 27 |
+
dec_ffn_kernel_size: 9
|
| 28 |
+
dec_layers: 4
|
| 29 |
+
decay_steps: 20000
|
| 30 |
+
decoder_type: fft
|
| 31 |
+
dict_dir: ''
|
| 32 |
+
diff_decoder_type: wavenet
|
| 33 |
+
diff_loss_type: l2
|
| 34 |
+
dilation_cycle_length: 4
|
| 35 |
+
dropout: 0.1
|
| 36 |
+
ds_workers: 4
|
| 37 |
+
dur_enc_hidden_stride_kernel:
|
| 38 |
+
- 0,2,3
|
| 39 |
+
- 0,2,3
|
| 40 |
+
- 0,1,3
|
| 41 |
+
dur_loss: mse
|
| 42 |
+
dur_predictor_kernel: 3
|
| 43 |
+
dur_predictor_layers: 5
|
| 44 |
+
enc_ffn_kernel_size: 9
|
| 45 |
+
enc_layers: 4
|
| 46 |
+
encoder_K: 8
|
| 47 |
+
encoder_type: fft
|
| 48 |
+
endless_ds: false
|
| 49 |
+
f0_bin: 256
|
| 50 |
+
f0_max: 1100.0
|
| 51 |
+
f0_min: 40.0
|
| 52 |
+
ffn_act: gelu
|
| 53 |
+
ffn_padding: SAME
|
| 54 |
+
fft_size: 2048
|
| 55 |
+
fmax: 16000
|
| 56 |
+
fmin: 40
|
| 57 |
+
fs2_ckpt: ''
|
| 58 |
+
gaussian_start: true
|
| 59 |
+
gen_dir_name: ''
|
| 60 |
+
gen_tgt_spk_id: -1
|
| 61 |
+
hidden_size: 256
|
| 62 |
+
hop_size: 512
|
| 63 |
+
hubert_gpu: true
|
| 64 |
+
hubert_path: checkpoints/hubert/hubert_soft.pt
|
| 65 |
+
infer: false
|
| 66 |
+
keep_bins: 128
|
| 67 |
+
lambda_commit: 0.25
|
| 68 |
+
lambda_energy: 0.0
|
| 69 |
+
lambda_f0: 1.0
|
| 70 |
+
lambda_ph_dur: 0.3
|
| 71 |
+
lambda_sent_dur: 1.0
|
| 72 |
+
lambda_uv: 1.0
|
| 73 |
+
lambda_word_dur: 1.0
|
| 74 |
+
load_ckpt: pretrain/nehito_ckpt_steps_1000000.ckpt
|
| 75 |
+
log_interval: 100
|
| 76 |
+
loud_norm: false
|
| 77 |
+
lr: 5.0e-05
|
| 78 |
+
max_beta: 0.02
|
| 79 |
+
max_epochs: 3000
|
| 80 |
+
max_eval_sentences: 1
|
| 81 |
+
max_eval_tokens: 60000
|
| 82 |
+
max_frames: 42000
|
| 83 |
+
max_input_tokens: 60000
|
| 84 |
+
max_sentences: 12
|
| 85 |
+
max_tokens: 128000
|
| 86 |
+
max_updates: 1000000
|
| 87 |
+
mel_loss: ssim:0.5|l1:0.5
|
| 88 |
+
mel_vmax: 1.5
|
| 89 |
+
mel_vmin: -6.0
|
| 90 |
+
min_level_db: -120
|
| 91 |
+
no_fs2: true
|
| 92 |
+
norm_type: gn
|
| 93 |
+
num_ckpt_keep: 10
|
| 94 |
+
num_heads: 2
|
| 95 |
+
num_sanity_val_steps: 1
|
| 96 |
+
num_spk: 1
|
| 97 |
+
num_test_samples: 0
|
| 98 |
+
num_valid_plots: 10
|
| 99 |
+
optimizer_adam_beta1: 0.9
|
| 100 |
+
optimizer_adam_beta2: 0.98
|
| 101 |
+
out_wav_norm: false
|
| 102 |
+
pe_ckpt: checkpoints/0102_xiaoma_pe/model_ckpt_steps_60000.ckpt
|
| 103 |
+
pe_enable: false
|
| 104 |
+
perform_enhance: true
|
| 105 |
+
pitch_ar: false
|
| 106 |
+
pitch_enc_hidden_stride_kernel:
|
| 107 |
+
- 0,2,5
|
| 108 |
+
- 0,2,5
|
| 109 |
+
- 0,2,5
|
| 110 |
+
pitch_extractor: parselmouth
|
| 111 |
+
pitch_loss: l2
|
| 112 |
+
pitch_norm: log
|
| 113 |
+
pitch_type: frame
|
| 114 |
+
pndm_speedup: 10
|
| 115 |
+
pre_align_args:
|
| 116 |
+
allow_no_txt: false
|
| 117 |
+
denoise: false
|
| 118 |
+
forced_align: mfa
|
| 119 |
+
txt_processor: zh_g2pM
|
| 120 |
+
use_sox: true
|
| 121 |
+
use_tone: false
|
| 122 |
+
pre_align_cls: data_gen.singing.pre_align.SingingPreAlign
|
| 123 |
+
predictor_dropout: 0.5
|
| 124 |
+
predictor_grad: 0.1
|
| 125 |
+
predictor_hidden: -1
|
| 126 |
+
predictor_kernel: 5
|
| 127 |
+
predictor_layers: 5
|
| 128 |
+
prenet_dropout: 0.5
|
| 129 |
+
prenet_hidden_size: 256
|
| 130 |
+
pretrain_fs_ckpt: ''
|
| 131 |
+
processed_data_dir: xxx
|
| 132 |
+
profile_infer: false
|
| 133 |
+
raw_data_dir: data/raw/Unnamed
|
| 134 |
+
ref_norm_layer: bn
|
| 135 |
+
rel_pos: true
|
| 136 |
+
reset_phone_dict: true
|
| 137 |
+
residual_channels: 384
|
| 138 |
+
residual_layers: 20
|
| 139 |
+
save_best: false
|
| 140 |
+
save_ckpt: true
|
| 141 |
+
save_codes:
|
| 142 |
+
- configs
|
| 143 |
+
- modules
|
| 144 |
+
- src
|
| 145 |
+
- utils
|
| 146 |
+
save_f0: true
|
| 147 |
+
save_gt: false
|
| 148 |
+
schedule_type: linear
|
| 149 |
+
seed: 1234
|
| 150 |
+
sort_by_len: true
|
| 151 |
+
speaker_id: Unnamed
|
| 152 |
+
spec_max:
|
| 153 |
+
- -0.4884430170059204
|
| 154 |
+
- 0.004534448496997356
|
| 155 |
+
- 0.5684943795204163
|
| 156 |
+
- 0.6527385115623474
|
| 157 |
+
- 0.659079372882843
|
| 158 |
+
- 0.7416915893554688
|
| 159 |
+
- 0.844637930393219
|
| 160 |
+
- 0.806076169013977
|
| 161 |
+
- 0.7238750457763672
|
| 162 |
+
- 0.9744535088539124
|
| 163 |
+
- 0.9476388692855835
|
| 164 |
+
- 0.9883336424827576
|
| 165 |
+
- 1.0821290016174316
|
| 166 |
+
- 1.046391248703003
|
| 167 |
+
- 0.9829667806625366
|
| 168 |
+
- 1.0163493156433105
|
| 169 |
+
- 0.9825412631034851
|
| 170 |
+
- 0.9834834337234497
|
| 171 |
+
- 0.9811502695083618
|
| 172 |
+
- 1.128888726234436
|
| 173 |
+
- 1.186057209968567
|
| 174 |
+
- 1.112004280090332
|
| 175 |
+
- 1.1282787322998047
|
| 176 |
+
- 1.051572322845459
|
| 177 |
+
- 1.0510444641113281
|
| 178 |
+
- 1.0110565423965454
|
| 179 |
+
- 0.9236567616462708
|
| 180 |
+
- 0.8036720156669617
|
| 181 |
+
- 0.8383486270904541
|
| 182 |
+
- 0.7735869288444519
|
| 183 |
+
- 0.9303848743438721
|
| 184 |
+
- 1.1257890462875366
|
| 185 |
+
- 1.1610286235809326
|
| 186 |
+
- 1.0335885286331177
|
| 187 |
+
- 1.0645352602005005
|
| 188 |
+
- 1.0619306564331055
|
| 189 |
+
- 1.1310148239135742
|
| 190 |
+
- 1.1191954612731934
|
| 191 |
+
- 1.1307402849197388
|
| 192 |
+
- 0.8837698698043823
|
| 193 |
+
- 1.1153966188430786
|
| 194 |
+
- 1.1045044660568237
|
| 195 |
+
- 1.0479614734649658
|
| 196 |
+
- 0.9491603374481201
|
| 197 |
+
- 0.9858523011207581
|
| 198 |
+
- 0.9226155281066895
|
| 199 |
+
- 0.9469702839851379
|
| 200 |
+
- 0.8791896104812622
|
| 201 |
+
- 0.997624933719635
|
| 202 |
+
- 0.9068642854690552
|
| 203 |
+
- 0.9575618505477905
|
| 204 |
+
- 0.8551340699195862
|
| 205 |
+
- 0.8397778272628784
|
| 206 |
+
- 0.8908605575561523
|
| 207 |
+
- 0.7986546158790588
|
| 208 |
+
- 0.7983465194702148
|
| 209 |
+
- 0.6965265274047852
|
| 210 |
+
- 0.640673041343689
|
| 211 |
+
- 0.6690735220909119
|
| 212 |
+
- 0.5631484985351562
|
| 213 |
+
- 0.48587048053741455
|
| 214 |
+
- 0.5326520800590515
|
| 215 |
+
- 0.4286036193370819
|
| 216 |
+
- 0.35252484679222107
|
| 217 |
+
- 0.3290073573589325
|
| 218 |
+
- 0.4754445552825928
|
| 219 |
+
- 0.3632410168647766
|
| 220 |
+
- 0.391481876373291
|
| 221 |
+
- 0.20288512110710144
|
| 222 |
+
- 0.18305960297584534
|
| 223 |
+
- 0.1539602279663086
|
| 224 |
+
- 0.03451670706272125
|
| 225 |
+
- -0.16881510615348816
|
| 226 |
+
- -0.02030198462307453
|
| 227 |
+
- 0.10024689882993698
|
| 228 |
+
- -0.023952053859829903
|
| 229 |
+
- 0.05635542422533035
|
| 230 |
+
- 0.10877621918916702
|
| 231 |
+
- 0.006155031267553568
|
| 232 |
+
- 0.07318088412284851
|
| 233 |
+
- 0.14075303077697754
|
| 234 |
+
- 0.057870157063007355
|
| 235 |
+
- -0.0520513579249382
|
| 236 |
+
- 0.1741427332162857
|
| 237 |
+
- -0.11464552581310272
|
| 238 |
+
- 0.03305494412779808
|
| 239 |
+
- -0.06897418200969696
|
| 240 |
+
- -0.12598733603954315
|
| 241 |
+
- -0.09894973039627075
|
| 242 |
+
- -0.2817802429199219
|
| 243 |
+
- -0.0825519785284996
|
| 244 |
+
- -0.3040400445461273
|
| 245 |
+
- -0.4998124837875366
|
| 246 |
+
- -0.36957985162734985
|
| 247 |
+
- -0.5409602522850037
|
| 248 |
+
- -0.49879470467567444
|
| 249 |
+
- -0.713716983795166
|
| 250 |
+
- -0.6545754671096802
|
| 251 |
+
- -0.6425778865814209
|
| 252 |
+
- -0.6178902387619019
|
| 253 |
+
- -0.47356730699539185
|
| 254 |
+
- -0.6165243983268738
|
| 255 |
+
- -0.5841533541679382
|
| 256 |
+
- -0.5759448409080505
|
| 257 |
+
- -0.5498068332672119
|
| 258 |
+
- -0.4661938548088074
|
| 259 |
+
- -0.5811225771903992
|
| 260 |
+
- -0.614664614200592
|
| 261 |
+
- -0.3902229070663452
|
| 262 |
+
- -0.7037366032600403
|
| 263 |
+
- -0.7260795831680298
|
| 264 |
+
- -0.7540019750595093
|
| 265 |
+
- -0.8360528945922852
|
| 266 |
+
- -0.8374698758125305
|
| 267 |
+
- -0.8328713178634644
|
| 268 |
+
- -0.9081047177314758
|
| 269 |
+
- -0.9679695963859558
|
| 270 |
+
- -0.9587443470954895
|
| 271 |
+
- -1.0706337690353394
|
| 272 |
+
- -0.9818469285964966
|
| 273 |
+
- -0.8360191583633423
|
| 274 |
+
- -0.9938981533050537
|
| 275 |
+
- -1.0823708772659302
|
| 276 |
+
- -1.0617167949676514
|
| 277 |
+
- -1.1093820333480835
|
| 278 |
+
- -1.1300138235092163
|
| 279 |
+
- -1.2141350507736206
|
| 280 |
+
- -1.3147293329238892
|
| 281 |
+
spec_min:
|
| 282 |
+
- -4.473258972167969
|
| 283 |
+
- -4.244492530822754
|
| 284 |
+
- -4.390527725219727
|
| 285 |
+
- -4.209497928619385
|
| 286 |
+
- -4.446024417877197
|
| 287 |
+
- -4.3960185050964355
|
| 288 |
+
- -4.164802551269531
|
| 289 |
+
- -4.5063300132751465
|
| 290 |
+
- -4.608232021331787
|
| 291 |
+
- -4.251623630523682
|
| 292 |
+
- -4.4799604415893555
|
| 293 |
+
- -4.733210563659668
|
| 294 |
+
- -4.411860466003418
|
| 295 |
+
- -4.609100818634033
|
| 296 |
+
- -4.726972579956055
|
| 297 |
+
- -4.428761959075928
|
| 298 |
+
- -4.487612247467041
|
| 299 |
+
- -4.525552749633789
|
| 300 |
+
- -4.480506896972656
|
| 301 |
+
- -4.589383125305176
|
| 302 |
+
- -4.608384132385254
|
| 303 |
+
- -4.385376453399658
|
| 304 |
+
- -4.816161632537842
|
| 305 |
+
- -4.8706955909729
|
| 306 |
+
- -4.848956108093262
|
| 307 |
+
- -4.431278705596924
|
| 308 |
+
- -4.999994277954102
|
| 309 |
+
- -4.818373203277588
|
| 310 |
+
- -4.527368068695068
|
| 311 |
+
- -4.872085094451904
|
| 312 |
+
- -4.894851207733154
|
| 313 |
+
- -4.511948585510254
|
| 314 |
+
- -4.534575939178467
|
| 315 |
+
- -4.57792854309082
|
| 316 |
+
- -4.444681644439697
|
| 317 |
+
- -4.628803253173828
|
| 318 |
+
- -4.74341344833374
|
| 319 |
+
- -4.85427713394165
|
| 320 |
+
- -4.723776817321777
|
| 321 |
+
- -4.7166008949279785
|
| 322 |
+
- -4.749168395996094
|
| 323 |
+
- -4.67240047454834
|
| 324 |
+
- -4.590690612792969
|
| 325 |
+
- -4.576009750366211
|
| 326 |
+
- -4.542308330535889
|
| 327 |
+
- -4.890907287597656
|
| 328 |
+
- -4.613001823425293
|
| 329 |
+
- -4.494126796722412
|
| 330 |
+
- -4.474257946014404
|
| 331 |
+
- -4.574635028839111
|
| 332 |
+
- -4.4817585945129395
|
| 333 |
+
- -4.651009559631348
|
| 334 |
+
- -4.478254795074463
|
| 335 |
+
- -4.523812770843506
|
| 336 |
+
- -4.546536922454834
|
| 337 |
+
- -4.535660266876221
|
| 338 |
+
- -4.470296859741211
|
| 339 |
+
- -4.577486991882324
|
| 340 |
+
- -4.541748046875
|
| 341 |
+
- -4.428532123565674
|
| 342 |
+
- -4.461862564086914
|
| 343 |
+
- -4.489077091217041
|
| 344 |
+
- -4.515830039978027
|
| 345 |
+
- -4.395663738250732
|
| 346 |
+
- -4.439975738525391
|
| 347 |
+
- -4.4290876388549805
|
| 348 |
+
- -4.397741794586182
|
| 349 |
+
- -4.478252410888672
|
| 350 |
+
- -4.399686336517334
|
| 351 |
+
- -4.45617151260376
|
| 352 |
+
- -4.434477806091309
|
| 353 |
+
- -4.442898750305176
|
| 354 |
+
- -4.5840277671813965
|
| 355 |
+
- -4.537542819976807
|
| 356 |
+
- -4.492046356201172
|
| 357 |
+
- -4.534677505493164
|
| 358 |
+
- -4.477104187011719
|
| 359 |
+
- -4.511618614196777
|
| 360 |
+
- -4.387601375579834
|
| 361 |
+
- -4.499236106872559
|
| 362 |
+
- -4.3717169761657715
|
| 363 |
+
- -4.4242024421691895
|
| 364 |
+
- -4.4055657386779785
|
| 365 |
+
- -4.429355144500732
|
| 366 |
+
- -4.4636993408203125
|
| 367 |
+
- -4.508528232574463
|
| 368 |
+
- -4.515079498291016
|
| 369 |
+
- -4.426190376281738
|
| 370 |
+
- -4.433525085449219
|
| 371 |
+
- -4.4200215339660645
|
| 372 |
+
- -4.421280860900879
|
| 373 |
+
- -4.400143623352051
|
| 374 |
+
- -4.419166088104248
|
| 375 |
+
- -4.429825305938721
|
| 376 |
+
- -4.436781406402588
|
| 377 |
+
- -4.51550817489624
|
| 378 |
+
- -4.518474578857422
|
| 379 |
+
- -4.495880603790283
|
| 380 |
+
- -4.483924865722656
|
| 381 |
+
- -4.409562587738037
|
| 382 |
+
- -4.3811845779418945
|
| 383 |
+
- -4.411908149719238
|
| 384 |
+
- -4.427165985107422
|
| 385 |
+
- -4.396549701690674
|
| 386 |
+
- -4.340637683868408
|
| 387 |
+
- -4.405435085296631
|
| 388 |
+
- -4.367630481719971
|
| 389 |
+
- -4.419083595275879
|
| 390 |
+
- -4.389026165008545
|
| 391 |
+
- -4.371067047119141
|
| 392 |
+
- -4.370710372924805
|
| 393 |
+
- -4.3755269050598145
|
| 394 |
+
- -4.39500093460083
|
| 395 |
+
- -4.451773166656494
|
| 396 |
+
- -4.365351676940918
|
| 397 |
+
- -4.348028182983398
|
| 398 |
+
- -4.408270359039307
|
| 399 |
+
- -4.390385627746582
|
| 400 |
+
- -4.347931861877441
|
| 401 |
+
- -4.378237247467041
|
| 402 |
+
- -4.426717758178711
|
| 403 |
+
- -4.364233493804932
|
| 404 |
+
- -4.371546745300293
|
| 405 |
+
- -4.402477264404297
|
| 406 |
+
- -4.430750846862793
|
| 407 |
+
- -4.404538154602051
|
| 408 |
+
- -4.384459018707275
|
| 409 |
+
- -4.401677131652832
|
| 410 |
+
spk_cond_steps: []
|
| 411 |
+
stop_token_weight: 5.0
|
| 412 |
+
task_cls: training.task.SVC_task.SVCTask
|
| 413 |
+
test_ids: []
|
| 414 |
+
test_input_dir: ''
|
| 415 |
+
test_num: 0
|
| 416 |
+
test_prefixes:
|
| 417 |
+
- test
|
| 418 |
+
test_set_name: test
|
| 419 |
+
timesteps: 1000
|
| 420 |
+
train_set_name: train
|
| 421 |
+
use_crepe: false
|
| 422 |
+
use_denoise: false
|
| 423 |
+
use_energy_embed: false
|
| 424 |
+
use_gt_dur: false
|
| 425 |
+
use_gt_f0: false
|
| 426 |
+
use_midi: false
|
| 427 |
+
use_nsf: true
|
| 428 |
+
use_pitch_embed: true
|
| 429 |
+
use_pos_embed: true
|
| 430 |
+
use_spk_embed: false
|
| 431 |
+
use_spk_id: false
|
| 432 |
+
use_split_spk_id: false
|
| 433 |
+
use_uv: false
|
| 434 |
+
use_var_enc: false
|
| 435 |
+
use_vec: false
|
| 436 |
+
val_check_interval: 1000
|
| 437 |
+
valid_num: 0
|
| 438 |
+
valid_set_name: valid
|
| 439 |
+
vocoder: network.vocoders.nsf_hifigan.NsfHifiGAN
|
| 440 |
+
vocoder_ckpt: checkpoints/nsf_hifigan/model
|
| 441 |
+
warmup_updates: 2000
|
| 442 |
+
wav2spec_eps: 1e-6
|
| 443 |
+
weight_decay: 0
|
| 444 |
+
win_size: 2048
|
| 445 |
+
work_dir: checkpoints/HokoHifi
|
checkpoints/Unnamed/lightning_logs/lastest/hparams.yaml
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{}
|
checkpoints/Unnamed/model_ckpt_steps_192000.ckpt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c441462923580893a6170dd00126084be0a20b387b1c4fb1860755acd36c881b
|
| 3 |
+
size 391390823
|
checkpoints/nsf_hifigan/NOTICE.txt
ADDED
|
@@ -0,0 +1,74 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
--- DiffSinger Community Vocoder ---
|
| 2 |
+
|
| 3 |
+
ARCHITECTURE: NSF-HiFiGAN
|
| 4 |
+
RELEASE DATE: 2022-12-11
|
| 5 |
+
|
| 6 |
+
HYPER PARAMETERS:
|
| 7 |
+
- 44100 sample rate
|
| 8 |
+
- 128 mel bins
|
| 9 |
+
- 512 hop size
|
| 10 |
+
- 2048 window size
|
| 11 |
+
- fmin at 40Hz
|
| 12 |
+
- fmax at 16000Hz
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
NOTICE:
|
| 16 |
+
|
| 17 |
+
All model weights in the [DiffSinger Community Vocoder Project](https://openvpi.github.io/vocoders/), including
|
| 18 |
+
model weights in this directory, are provided by the [OpenVPI Team](https://github.com/openvpi/), under the
|
| 19 |
+
[Attribution-NonCommercial-ShareAlike 4.0 International](https://creativecommons.org/licenses/by-nc-sa/4.0/) license.
|
| 20 |
+
|
| 21 |
+
|
| 22 |
+
ACKNOWLEDGEMENTS:
|
| 23 |
+
|
| 24 |
+
Training data of this vocoder is provided and permitted by the following organizations, societies and individuals:
|
| 25 |
+
|
| 26 |
+
孙飒 https://www.qfssr.cn
|
| 27 |
+
赤松_Akamatsu https://www.zhibin.club
|
| 28 |
+
乐威 https://www.zhibin.club
|
| 29 |
+
伯添 https://space.bilibili.com/24087011
|
| 30 |
+
雲宇光 https://space.bilibili.com/660675050
|
| 31 |
+
橙子言 https://space.bilibili.com/318486464
|
| 32 |
+
人衣大人 https://space.bilibili.com/2270344
|
| 33 |
+
玖蝶 https://space.bilibili.com/676771003
|
| 34 |
+
Yuuko
|
| 35 |
+
白夜零BYL https://space.bilibili.com/1605040503
|
| 36 |
+
嗷天 https://space.bilibili.com/5675252
|
| 37 |
+
洛泠羽 https://space.bilibili.com/347373318
|
| 38 |
+
灰条纹的灰猫君 https://space.bilibili.com/2083633
|
| 39 |
+
幽寂 https://space.bilibili.com/478860
|
| 40 |
+
恶魔王女 https://space.bilibili.com/2475098
|
| 41 |
+
AlexYHX 芮晴
|
| 42 |
+
绮萱 https://y.qq.com/n/ryqq/singer/003HjD6H4aZn1K
|
| 43 |
+
诗芸 https://y.qq.com/n/ryqq/singer/0005NInj142zm0
|
| 44 |
+
汐蕾 https://y.qq.com/n/ryqq/singer/0023cWMH1Bq1PJ
|
| 45 |
+
1262917464
|
| 46 |
+
炜阳
|
| 47 |
+
叶卡yolka
|
| 48 |
+
幸の夏 https://space.bilibili.com/1017297686
|
| 49 |
+
暮色未量 https://space.bilibili.com/272904686
|
| 50 |
+
晓寞sama https://space.bilibili.com/3463394
|
| 51 |
+
没头绪的节操君
|
| 52 |
+
串串BunC https://space.bilibili.com/95817834
|
| 53 |
+
落雨 https://space.bilibili.com/1292427
|
| 54 |
+
长尾巴的翎艾 https://space.bilibili.com/1638666
|
| 55 |
+
声闻计划 https://space.bilibili.com/392812269
|
| 56 |
+
唐家大小姐 http://5sing.kugou.com/palmusic/default.html
|
| 57 |
+
不伊子
|
| 58 |
+
|
| 59 |
+
Training machines are provided by:
|
| 60 |
+
|
| 61 |
+
花儿不哭 https://space.bilibili.com/5760446
|
| 62 |
+
|
| 63 |
+
|
| 64 |
+
TERMS OF REDISTRIBUTIONS:
|
| 65 |
+
|
| 66 |
+
1. Do not sell this vocoder, or charge any fees from redistributing it, as prohibited by
|
| 67 |
+
the license.
|
| 68 |
+
2. Include a copy of the CC BY-NC-SA 4.0 license, or a link referring to it.
|
| 69 |
+
3. Include a copy of this notice, or any other notices informing that this vocoder is
|
| 70 |
+
provided by the OpenVPI Team, that this vocoder is licensed under CC BY-NC-SA 4.0, and
|
| 71 |
+
with a complete acknowledgement list as shown above.
|
| 72 |
+
4. If you fine-tuned or modified the weights, leave a notice about what has been changed.
|
| 73 |
+
5. (Optional) Leave a link to the official release page of the vocoder, and tell users
|
| 74 |
+
that other versions and future updates of this vocoder can be obtained from the website.
|
checkpoints/nsf_hifigan/NOTICE.zh-CN.txt
ADDED
|
@@ -0,0 +1,72 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
--- DiffSinger 社区声码器 ---
|
| 2 |
+
|
| 3 |
+
架构:NSF-HiFiGAN
|
| 4 |
+
发布日期:2022-12-11
|
| 5 |
+
|
| 6 |
+
超参数:
|
| 7 |
+
- 44100 sample rate
|
| 8 |
+
- 128 mel bins
|
| 9 |
+
- 512 hop size
|
| 10 |
+
- 2048 window size
|
| 11 |
+
- fmin at 40Hz
|
| 12 |
+
- fmax at 16000Hz
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
注意事项:
|
| 16 |
+
|
| 17 |
+
[DiffSinger 社区声码器企划](https://openvpi.github.io/vocoders/) 中的所有模型权重,
|
| 18 |
+
包括此目录下的模型权重,均由 [OpenVPI Team](https://github.com/openvpi/) 提供,并基于
|
| 19 |
+
[Attribution-NonCommercial-ShareAlike 4.0 International](https://creativecommons.org/licenses/by-nc-sa/4.0/)
|
| 20 |
+
进行许可。
|
| 21 |
+
|
| 22 |
+
|
| 23 |
+
致谢:
|
| 24 |
+
|
| 25 |
+
此声码器的训练数据由以下组织、社团和个人提供并许可:
|
| 26 |
+
|
| 27 |
+
孙飒 https://www.qfssr.cn
|
| 28 |
+
赤松_Akamatsu https://www.zhibin.club
|
| 29 |
+
乐威 https://www.zhibin.club
|
| 30 |
+
伯添 https://space.bilibili.com/24087011
|
| 31 |
+
雲宇光 https://space.bilibili.com/660675050
|
| 32 |
+
橙子言 https://space.bilibili.com/318486464
|
| 33 |
+
人衣大人 https://space.bilibili.com/2270344
|
| 34 |
+
玖蝶 https://space.bilibili.com/676771003
|
| 35 |
+
Yuuko
|
| 36 |
+
白夜零BYL https://space.bilibili.com/1605040503
|
| 37 |
+
嗷天 https://space.bilibili.com/5675252
|
| 38 |
+
洛泠羽 https://space.bilibili.com/347373318
|
| 39 |
+
灰条纹的灰猫君 https://space.bilibili.com/2083633
|
| 40 |
+
幽寂 https://space.bilibili.com/478860
|
| 41 |
+
恶魔王女 https://space.bilibili.com/2475098
|
| 42 |
+
AlexYHX 芮晴
|
| 43 |
+
绮萱 https://y.qq.com/n/ryqq/singer/003HjD6H4aZn1K
|
| 44 |
+
诗芸 https://y.qq.com/n/ryqq/singer/0005NInj142zm0
|
| 45 |
+
汐蕾 https://y.qq.com/n/ryqq/singer/0023cWMH1Bq1PJ
|
| 46 |
+
1262917464
|
| 47 |
+
炜阳
|
| 48 |
+
叶卡yolka
|
| 49 |
+
幸の夏 https://space.bilibili.com/1017297686
|
| 50 |
+
暮色未量 https://space.bilibili.com/272904686
|
| 51 |
+
晓寞sama https://space.bilibili.com/3463394
|
| 52 |
+
没头绪的节操君
|
| 53 |
+
串串BunC https://space.bilibili.com/95817834
|
| 54 |
+
落雨 https://space.bilibili.com/1292427
|
| 55 |
+
长尾巴的翎艾 https://space.bilibili.com/1638666
|
| 56 |
+
声闻计划 https://space.bilibili.com/392812269
|
| 57 |
+
唐家大小姐 http://5sing.kugou.com/palmusic/default.html
|
| 58 |
+
不伊子
|
| 59 |
+
|
| 60 |
+
训练算力的提供者如下:
|
| 61 |
+
|
| 62 |
+
花儿不哭 https://space.bilibili.com/5760446
|
| 63 |
+
|
| 64 |
+
|
| 65 |
+
二次分发条款:
|
| 66 |
+
|
| 67 |
+
1. 请勿售卖此声码器或从其二次分发过程中收取任何费用,因为此类行为受到许可证的禁止。
|
| 68 |
+
2. 请在二次分发文件中包含一份 CC BY-NC-SA 4.0 许可证的副本或指向该许可证的链接。
|
| 69 |
+
3. 请在二次分发文件中包含这份声明,或以其他形式声明此声码器由 OpenVPI Team 提供并基于 CC BY-NC-SA 4.0 许可,
|
| 70 |
+
并附带上述完整的致谢名单。
|
| 71 |
+
4. 如果您微调或修改了权重,请留下一份关于其受到了何种修改的说明。
|
| 72 |
+
5.(可选)留下一份指向此声码器的官方发布页面的链接,并告知使用者可从该网站获取此声码器的其他版本和未来的更新。
|
checkpoints/nsf_hifigan/config.json
ADDED
|
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"resblock": "1",
|
| 3 |
+
"num_gpus": 4,
|
| 4 |
+
"batch_size": 10,
|
| 5 |
+
"learning_rate": 0.0002,
|
| 6 |
+
"adam_b1": 0.8,
|
| 7 |
+
"adam_b2": 0.99,
|
| 8 |
+
"lr_decay": 0.999,
|
| 9 |
+
"seed": 1234,
|
| 10 |
+
|
| 11 |
+
"upsample_rates": [ 8, 8, 2, 2, 2],
|
| 12 |
+
"upsample_kernel_sizes": [16,16, 4, 4, 4],
|
| 13 |
+
"upsample_initial_channel": 512,
|
| 14 |
+
"resblock_kernel_sizes": [3,7,11],
|
| 15 |
+
"resblock_dilation_sizes": [[1,3,5], [1,3,5], [1,3,5]],
|
| 16 |
+
"discriminator_periods": [3, 5, 7, 11, 17, 23, 37],
|
| 17 |
+
|
| 18 |
+
"segment_size": 16384,
|
| 19 |
+
"num_mels": 128,
|
| 20 |
+
"num_freq": 1025,
|
| 21 |
+
"n_fft" : 2048,
|
| 22 |
+
"hop_size": 512,
|
| 23 |
+
"win_size": 2048,
|
| 24 |
+
|
| 25 |
+
"sampling_rate": 44100,
|
| 26 |
+
|
| 27 |
+
"fmin": 40,
|
| 28 |
+
"fmax": 16000,
|
| 29 |
+
"fmax_for_loss": null,
|
| 30 |
+
|
| 31 |
+
"num_workers": 16,
|
| 32 |
+
|
| 33 |
+
"dist_config": {
|
| 34 |
+
"dist_backend": "nccl",
|
| 35 |
+
"dist_url": "tcp://localhost:54321",
|
| 36 |
+
"world_size": 1
|
| 37 |
+
}
|
| 38 |
+
}
|
checkpoints/nsf_hifigan/model
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2c576b63b7ed952161b70fad34e0562ace502ce689195520d8a2a6c051de29d6
|
| 3 |
+
size 56825430
|