| base_config:
|
| - configs/tts/base.yaml
|
| - configs/tts/base_zh.yaml
|
|
|
|
|
| datasets: []
|
| test_prefixes: []
|
| test_num: 0
|
| valid_num: 0
|
|
|
| pre_align_cls: data_gen.singing.pre_align.SingingPreAlign
|
| binarizer_cls: data_gen.singing.binarize.SingingBinarizer
|
| pre_align_args:
|
| use_tone: false
|
| forced_align: mfa
|
| use_sox: true
|
| hop_size: 128
|
| fft_size: 512
|
| win_size: 512
|
| max_frames: 8000
|
| fmin: 50
|
| fmax: 11025
|
| pitch_type: frame
|
|
|
| hidden_size: 256
|
| mel_loss: "ssim:0.5|l1:0.5"
|
| lambda_f0: 0.0
|
| lambda_uv: 0.0
|
| lambda_energy: 0.0
|
| lambda_ph_dur: 0.0
|
| lambda_sent_dur: 0.0
|
| lambda_word_dur: 0.0
|
| predictor_grad: 0.0
|
| use_spk_embed: true
|
| use_spk_id: false
|
|
|
| max_tokens: 20000
|
| max_updates: 400000
|
| num_spk: 100
|
| save_f0: true
|
| use_gt_dur: true
|
| use_gt_f0: true
|
|
|