mrfakename commited on
Commit
efe7c0d
·
verified ·
1 Parent(s): f0dc7fc

Upload folder using huggingface_hub

Browse files
Files changed (2) hide show
  1. config.yaml +168 -0
  2. model_only_last.ckpt +3 -0
config.yaml ADDED
@@ -0,0 +1,168 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ acous_params:
2
+ - - 480
3
+ - 1200
4
+ - 80
5
+ - - 240
6
+ - 1200
7
+ - 160
8
+ amp: false
9
+ audio_num_mel_bins: 160
10
+ audio_sample_rate: 24000
11
+ balance_sil: true
12
+ c_spk_enc: 512
13
+ char_dict_size: 15000
14
+ conv_use_pos: false
15
+ dec0_dilations:
16
+ - 1
17
+ - 2
18
+ - 4
19
+ - 1
20
+ - 2
21
+ - 4
22
+ - 1
23
+ dec0_kernel_size: 3
24
+ dec_dilations:
25
+ - 1
26
+ - 2
27
+ - 1
28
+ - 2
29
+ - 1
30
+ dec_ffn_kernel_size: 9
31
+ dec_hidden_size: 2048
32
+ dec_inp_add_noise: false
33
+ dec_kernel_size: 5
34
+ dec_layers: 4
35
+ dec_num_heads: 8
36
+ dec_post_net_kernel: 3
37
+ decoder_rnn_dim: 0
38
+ decoder_type: conv
39
+ dropout: 0.0
40
+ ds_add_pitch_embed: false
41
+ dur_alpha: 1.0
42
+ dur_code_size: 128
43
+ dur_context_enc: true
44
+ dur_log: true
45
+ dur_model_hidden_size: 512
46
+ dur_model_layers: 8
47
+ dur_model_type: ar_mse
48
+ dur_predictor_kernel: 3
49
+ dur_predictor_layers: 2
50
+ dur_txt_hs: 512
51
+ dur_use_char: true
52
+ dur_use_spk: true
53
+ enc_dec_norm: ln
54
+ enc_dilations:
55
+ - 1
56
+ - 1
57
+ - 1
58
+ - 1
59
+ enc_ffn_kernel_size: 3
60
+ enc_hidden_size: 256
61
+ enc_kernel_size: 5
62
+ enc_layers: 4
63
+ enc_post_net_kernel: 3
64
+ enc_pre_ln: true
65
+ enc_prenet: true
66
+ encoder_K: 8
67
+ encoder_type: rel_fft
68
+ f0_max: 600
69
+ f0_min: 60
70
+ ffn_act: gelu
71
+ ffn_hidden_size: 1024
72
+ fft_size: 1200
73
+ fg_spk_enc_hidden: 256
74
+ flatten_dec: true
75
+ fmax: 12000
76
+ fmin: 0
77
+ frames_multiple: 8
78
+ hidden_size: 512
79
+ hop_size: 240
80
+ ignore_begin_end_sil: false
81
+ lat_for_dur: false
82
+ latent_size: 256
83
+ layers_in_block: 2
84
+ ling_label_dict_size:
85
+ - 20
86
+ - 4
87
+ - 5
88
+ - 2
89
+ - 3
90
+ - 3
91
+ - 3
92
+ - 6
93
+ - 15
94
+ ling_labels:
95
+ - tone
96
+ lm_num_layers: 24
97
+ lm_use_enc: true
98
+ loud_norm: false
99
+ max_tokens: 6000
100
+ mel_vmax: 0.5
101
+ mel_vmin: -6
102
+ min_frames: 0
103
+ mix_melout_timbre: true
104
+ mix_ph_timbre: false
105
+ model_type: 1
106
+ multistage: false
107
+ no_text_enc: false
108
+ num_heads: 2
109
+ out_wav_norm: true
110
+ pad_frames: false
111
+
112
+ precision: fp16
113
+ predict_pitch: false
114
+ predictor_dropout: 0.0
115
+ predictor_grad: 1.0
116
+ predictor_hidden: -1
117
+ predictor_kernel: 5
118
+ predictor_layers: 5
119
+ print_nan_grads: true
120
+ ref_mel_bins: 160
121
+ ref_size_max: 2000
122
+ ref_size_min: 1000
123
+ remove_sil: false
124
+ shuffle_ref: false
125
+ split_ref: true
126
+ temperature: 0.8
127
+ tone_percep_ckpt: ''
128
+ train_spk_embed_only: false
129
+ use_bert_input: false
130
+ use_char: true
131
+ use_cur_global: false
132
+ use_cur_global_dec: true
133
+ use_dur_embed: true
134
+ use_dur_mask_embed: true
135
+ use_finegrained_spk: false
136
+ use_global_lat: false
137
+ use_gpt: true
138
+ use_gt_dur: false
139
+ use_gt_f0: false
140
+ use_mix_spk_embed: false
141
+ use_new_vae: false
142
+ use_ph_level_f0: false
143
+ use_ph_pos_embed: true
144
+ use_pitch_embed: false
145
+ use_pitch_embed_dec: false
146
+ use_pitch_pred: true
147
+ use_pos_embed: false
148
+ use_post_ln: false
149
+ use_random_spk_embed: false
150
+ use_rot_embed: true
151
+ use_spk_embed: false
152
+ use_spk_enc: false
153
+ use_spk_id: false
154
+ use_text_postnet: true
155
+ use_uv: true
156
+ use_vae: true
157
+ use_vqvae: true
158
+ use_word_encoder: true
159
+ use_word_input: false
160
+ vae_dur_grad: 0.1
161
+ vae_enc_hidden_size: 384
162
+ vae_word_conder_layers: 0
163
+ vq_stride: 8
164
+ w_nonsil: 10.0
165
+ w_sil: 1.0
166
+ word_dict_size: 10000
167
+ z_channels: 64
168
+ z_clamp: 2.0
model_only_last.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0f21f4205c5d3ec4bef69716a85ca3d37f25c35b429bac500477a2085039b43f
3
+ size 267955084