snzhkhd commited on
Commit
7f741b5
·
verified ·
1 Parent(s): c545228

Upload config.yml

Browse files
Files changed (1) hide show
  1. config.yml +108 -0
config.yml ADDED
@@ -0,0 +1,108 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ASR_config: Utils\ASR\config.yml
2
+ ASR_path: Utils\ASR\epoch_00080.pth
3
+ F0_path: Utils\JDC\bst.t7
4
+ PLBERT_dir: Utils\PLBERT
5
+ batch_size: 1
6
+ data_params:
7
+ OOD_data: Data/OOD_texts_ru.txt
8
+ min_length: 50
9
+ root_path: training\prokopenko_2h\audio
10
+ train_data: training\prokopenko_2h\train_phoneme.txt
11
+ val_data: training\prokopenko_2h\validation_phoneme.txt
12
+ device: cuda
13
+ epochs: 20
14
+ load_only_params: true
15
+ log_dir: training\prokopenko_2h\models
16
+ log_interval: 10
17
+ loss_params:
18
+ diff_epoch: 0
19
+ joint_epoch: 0
20
+ lambda_F0: 1.0
21
+ lambda_ce: 20.0
22
+ lambda_diff: 1.0
23
+ lambda_dur: 1.0
24
+ lambda_gen: 1.0
25
+ lambda_mel: 5.0
26
+ lambda_mono: 1.0
27
+ lambda_norm: 1.0
28
+ lambda_s2s: 1.0
29
+ lambda_slm: 1.0
30
+ lambda_sty: 1.0
31
+ max_len: 100
32
+ model_params:
33
+ decoder:
34
+ resblock_dilation_sizes:
35
+ - - 1
36
+ - 3
37
+ - 5
38
+ - - 1
39
+ - 3
40
+ - 5
41
+ - - 1
42
+ - 3
43
+ - 5
44
+ resblock_kernel_sizes:
45
+ - 3
46
+ - 7
47
+ - 11
48
+ type: hifigan
49
+ upsample_initial_channel: 512
50
+ upsample_kernel_sizes:
51
+ - 20
52
+ - 10
53
+ - 6
54
+ - 4
55
+ upsample_rates:
56
+ - 10
57
+ - 5
58
+ - 3
59
+ - 2
60
+ diffusion:
61
+ dist:
62
+ estimate_sigma_data: true
63
+ mean: -3.0
64
+ sigma_data: 0.2
65
+ std: 1.0
66
+ embedding_mask_proba: 0.1
67
+ transformer:
68
+ head_features: 64
69
+ multiplier: 2
70
+ num_heads: 8
71
+ num_layers: 3
72
+ dim_in: 64
73
+ dropout: 0.2
74
+ hidden_dim: 512
75
+ max_conv_dim: 512
76
+ max_dur: 50
77
+ multispeaker: false
78
+ n_layer: 3
79
+ n_mels: 80
80
+ n_token: 178
81
+ slm:
82
+ hidden: 768
83
+ initial_channel: 64
84
+ model: microsoft/wavlm-base-plus
85
+ nlayers: 13
86
+ sr: 16000
87
+ style_dim: 128
88
+ optimizer_params:
89
+ bert_lr: 1.0e-05
90
+ ft_lr: 0.0001
91
+ lr: 0.0001
92
+ preprocess_params:
93
+ spect_params:
94
+ hop_length: 300
95
+ n_fft: 2048
96
+ win_length: 1200
97
+ sr: 24000
98
+ pretrained_model: models\pretrain_base_1\epochs_2nd_00020.pth
99
+ save_freq: 1
100
+ second_stage_load_pretrained: true
101
+ slmadv_params:
102
+ batch_percentage: 0.35
103
+ iter: 10
104
+ max_len: 400
105
+ min_len: 350
106
+ scale: 0.01
107
+ sig: 1.5
108
+ thresh: 5