| log_dir: ./Models/Finetune |
| save_freq: 1 |
| log_interval: 10 |
| device: cuda |
| epochs: 50 |
| batch_size: 2 |
| max_len: 160 |
| pretrained_model: ./Models/Finetune/base_model_120k_vi.pth |
| load_only_params: true |
| debug: false |
|
|
| data_params: |
| train_data: data_22k/train_phn.txt |
| val_data: data_22k/dev_phn.txt |
| root_path: data_22k |
| n_speakers: 152 |
| symbol: |
| pad: "$" |
| punctuation: ';:,.!?¡¿—…"«»“” ' |
| letters: "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz" |
| letters_ipa: "ɑɐɒæɓʙβɔɕçɗɖðʤəɘɚɛɜɝɞɟʄɡɠɢʛɦɧħɥʜɨɪʝɭɬɫɮʟɱɯɰŋɳɲɴøɵɸθœɶʘɹɺɾɻʀʁɽʂʃʈʧʉʊʋⱱʌɣɤʍχʎʏʑʐʒʔʡʕʢǀǁǂǃˈˌːˑʼʴʰʱʲʷˠˤ˞↓↑→↗↘'̩'ᵻ" |
| extend: "∫̆ăη͡123456" |
|
|
| preprocess_params: |
| sr: 24000 |
| spect_params: |
| n_fft: 2048 |
| win_length: 1200 |
| hop_length: 300 |
|
|
| training_strats: |
| |
| freeze_modules: [''] |
| ignore_modules: ['spk_emb', 'spk_ln', 'style_ln', 'gate'] |
|
|
| model_params: |
| dim_in: 64 |
| hidden_dim: 512 |
| max_conv_dim: 512 |
| n_layer: 3 |
| n_mels: 80 |
| max_dur: 50 |
| style_dim: 128 |
| dropout: 0.2 |
|
|
| ASR_params: |
| input_dim: 80 |
| hidden_dim: 256 |
| n_layers: 6 |
| token_embedding_dim: 512 |
|
|
| JDC_params: |
| num_class: 1 |
| seq_len: 192 |
|
|
| |
| decoder: |
|
|
| type: hifigan |
| resblock_kernel_sizes: [3,7,11] |
| upsample_rates : [10,5,3,2] |
| upsample_initial_channel: 512 |
| resblock_dilation_sizes: [[1,3,5], [1,3,5], [1,3,5]] |
| upsample_kernel_sizes: [20,10,6,4] |
| |
| |
| |
| |
| |
| |
| |
| |
| |
|
|
| |
| |
| |
| |
| |
| |
| loss_params: |
| lambda_mel: 5. |
| lambda_gen: 1. |
| |
| lambda_mono: 1. |
| lambda_s2s: 1. |
|
|
| lambda_F0: 1. |
| lambda_norm: 1. |
| lambda_dur: 1. |
| lambda_ce: 20. |
|
|
| optimizer_params: |
| lr: 0.0001 |
| ft_lr: 0.00001 |