utkarsh2299 commited on
Commit
fa7c5a2
·
verified ·
1 Parent(s): 58f9392

Upload 5 files

Browse files
bi_model_male/config.yaml ADDED
@@ -0,0 +1,288 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ config: conf/tuning/train_fastspeech2.yaml
2
+ print_config: false
3
+ log_level: INFO
4
+ dry_run: false
5
+ iterator_type: sequence
6
+ output_dir: exp/tts_train_fastspeech2_raw_char_None
7
+ ngpu: 1
8
+ seed: 0
9
+ num_workers: 1
10
+ num_att_plot: 3
11
+ dist_backend: nccl
12
+ dist_init_method: env://
13
+ dist_world_size: 2
14
+ dist_rank: 0
15
+ local_rank: 0
16
+ dist_master_addr: localhost
17
+ dist_master_port: 59619
18
+ dist_launcher: null
19
+ multiprocessing_distributed: true
20
+ unused_parameters: false
21
+ sharded_ddp: false
22
+ cudnn_enabled: true
23
+ cudnn_benchmark: false
24
+ cudnn_deterministic: true
25
+ collect_stats: false
26
+ write_collected_feats: false
27
+ max_epoch: 1000
28
+ patience: null
29
+ val_scheduler_criterion:
30
+ - valid
31
+ - loss
32
+ early_stopping_criterion:
33
+ - valid
34
+ - loss
35
+ - min
36
+ best_model_criterion:
37
+ - - valid
38
+ - loss
39
+ - min
40
+ - - train
41
+ - loss
42
+ - min
43
+ keep_nbest_models: 5
44
+ grad_clip: 1.0
45
+ grad_clip_type: 2.0
46
+ grad_noise: false
47
+ accum_grad: 8
48
+ no_forward_run: false
49
+ resume: true
50
+ train_dtype: float32
51
+ use_amp: false
52
+ log_interval: null
53
+ use_tensorboard: true
54
+ use_wandb: false
55
+ wandb_project: null
56
+ wandb_id: null
57
+ wandb_entity: null
58
+ wandb_name: null
59
+ wandb_model_log_interval: -1
60
+ detect_anomaly: false
61
+ pretrain_path: null
62
+ init_param: []
63
+ ignore_init_mismatch: false
64
+ freeze_param: []
65
+ num_iters_per_epoch: 800
66
+ batch_size: 20
67
+ valid_batch_size: null
68
+ batch_bins: 3000000
69
+ valid_batch_bins: null
70
+ train_shape_file:
71
+ - exp/tts_stats_raw_char_None/train/text_shape.char
72
+ - exp/tts_stats_raw_char_None/train/speech_shape
73
+ valid_shape_file:
74
+ - exp/tts_stats_raw_char_None/valid/text_shape.char
75
+ - exp/tts_stats_raw_char_None/valid/speech_shape
76
+ batch_type: numel
77
+ valid_batch_type: null
78
+ fold_length:
79
+ - 150
80
+ - 409600
81
+ sort_in_batch: descending
82
+ sort_batch: descending
83
+ multiple_iterator: false
84
+ chunk_length: 500
85
+ chunk_shift_ratio: 0.5
86
+ num_cache_chunks: 1024
87
+ train_data_path_and_name_and_type:
88
+ - - dump/raw/tr_no_dev/text
89
+ - text
90
+ - text
91
+ - - duration_info/tr_no_dev/durations
92
+ - durations
93
+ - text_int
94
+ - - dump/raw/tr_no_dev/wav.scp
95
+ - speech
96
+ - sound
97
+ - - exp/tts_stats_raw_char_None/train/collect_feats/pitch.scp
98
+ - pitch
99
+ - npy
100
+ - - exp/tts_stats_raw_char_None/train/collect_feats/energy.scp
101
+ - energy
102
+ - npy
103
+ valid_data_path_and_name_and_type:
104
+ - - dump/raw/dev/text
105
+ - text
106
+ - text
107
+ - - duration_info/dev/durations
108
+ - durations
109
+ - text_int
110
+ - - dump/raw/dev/wav.scp
111
+ - speech
112
+ - sound
113
+ - - exp/tts_stats_raw_char_None/valid/collect_feats/pitch.scp
114
+ - pitch
115
+ - npy
116
+ - - exp/tts_stats_raw_char_None/valid/collect_feats/energy.scp
117
+ - energy
118
+ - npy
119
+ allow_variable_data_keys: false
120
+ max_cache_size: 0.0
121
+ max_cache_fd: 32
122
+ valid_max_cache_size: null
123
+ optim: adam
124
+ optim_conf:
125
+ lr: 1.0
126
+ scheduler: noamlr
127
+ scheduler_conf:
128
+ model_size: 384
129
+ warmup_steps: 4000
130
+ token_list:
131
+ - <blank>
132
+ - <unk>
133
+ - <space>
134
+ - a
135
+ - r
136
+ - ','
137
+ - E
138
+ - n
139
+ - A
140
+ - i
141
+ - k
142
+ - s
143
+ - I
144
+ - ट
145
+ - l
146
+ - h
147
+ - m
148
+ - d
149
+ - ड
150
+ - w
151
+ - o
152
+ - p
153
+ - t
154
+ - $
155
+ - .
156
+ - b
157
+ - ऐ
158
+ - q
159
+ - z
160
+ - U
161
+ - f
162
+ - y
163
+ - u
164
+ - अ
165
+ - ऑ
166
+ - g
167
+ - j
168
+ - श
169
+ - ङ
170
+ - औ
171
+ - c
172
+ - थ
173
+ - B
174
+ - ख
175
+ - ध
176
+ - D
177
+ - C
178
+ - ष
179
+ - M
180
+ - ण
181
+ - ठ
182
+ - घ
183
+ - J
184
+ - क
185
+ - P
186
+ - K
187
+ - R
188
+ - T
189
+ - ढ
190
+ - G
191
+ - ञ
192
+ - H
193
+ - Y
194
+ - ऍ
195
+ - e
196
+ - ള
197
+ - र
198
+ - <sos/eos>
199
+ odim: null
200
+ model_conf: {}
201
+ use_preprocessor: true
202
+ token_type: char
203
+ bpemodel: null
204
+ non_linguistic_symbols: null
205
+ cleaner: null
206
+ g2p: g2p_en_no_space
207
+ feats_extract: fbank
208
+ feats_extract_conf:
209
+ n_fft: 2048
210
+ hop_length: 512
211
+ win_length: 2048
212
+ fs: 48000
213
+ fmin: 0
214
+ fmax: 8000
215
+ n_mels: 80
216
+ normalize: global_mvn
217
+ normalize_conf:
218
+ stats_file: exp/tts_stats_raw_char_None/train/feats_stats.npz
219
+ tts: fastspeech2
220
+ tts_conf:
221
+ adim: 384
222
+ aheads: 2
223
+ elayers: 4
224
+ eunits: 1536
225
+ dlayers: 4
226
+ dunits: 1536
227
+ positionwise_layer_type: conv1d
228
+ positionwise_conv_kernel_size: 3
229
+ duration_predictor_layers: 2
230
+ duration_predictor_chans: 256
231
+ duration_predictor_kernel_size: 3
232
+ postnet_layers: 5
233
+ postnet_filts: 5
234
+ postnet_chans: 256
235
+ use_masking: true
236
+ use_scaled_pos_enc: true
237
+ encoder_normalize_before: true
238
+ decoder_normalize_before: true
239
+ reduction_factor: 1
240
+ init_type: xavier_uniform
241
+ init_enc_alpha: 1.0
242
+ init_dec_alpha: 1.0
243
+ transformer_enc_dropout_rate: 0.2
244
+ transformer_enc_positional_dropout_rate: 0.2
245
+ transformer_enc_attn_dropout_rate: 0.2
246
+ transformer_dec_dropout_rate: 0.2
247
+ transformer_dec_positional_dropout_rate: 0.2
248
+ transformer_dec_attn_dropout_rate: 0.2
249
+ pitch_predictor_layers: 5
250
+ pitch_predictor_chans: 256
251
+ pitch_predictor_kernel_size: 5
252
+ pitch_predictor_dropout: 0.5
253
+ pitch_embed_kernel_size: 1
254
+ pitch_embed_dropout: 0.0
255
+ stop_gradient_from_pitch_predictor: true
256
+ energy_predictor_layers: 2
257
+ energy_predictor_chans: 256
258
+ energy_predictor_kernel_size: 3
259
+ energy_predictor_dropout: 0.5
260
+ energy_embed_kernel_size: 1
261
+ energy_embed_dropout: 0.0
262
+ stop_gradient_from_energy_predictor: false
263
+ pitch_extract: dio
264
+ pitch_extract_conf:
265
+ fs: 48000
266
+ n_fft: 2048
267
+ hop_length: 512
268
+ f0max: 350
269
+ f0min: 40
270
+ reduction_factor: 1
271
+ pitch_normalize: global_mvn
272
+ pitch_normalize_conf:
273
+ stats_file: exp/tts_stats_raw_char_None/train/pitch_stats.npz
274
+ energy_extract: energy
275
+ energy_extract_conf:
276
+ fs: 48000
277
+ n_fft: 2048
278
+ hop_length: 512
279
+ win_length: 2048
280
+ reduction_factor: 1
281
+ energy_normalize: global_mvn
282
+ energy_normalize_conf:
283
+ stats_file: exp/tts_stats_raw_char_None/train/energy_stats.npz
284
+ required:
285
+ - output_dir
286
+ - token_list
287
+ version: 0.10.3a3
288
+ distributed: true
bi_model_male/energy_stats.npz ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1448f55315b7f828b9caf0d4b984731fa037d6d1a7c11137e53c9865363bfcdf
3
+ size 770
bi_model_male/feats_stats.npz ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:babc1f7291174371c035025da1fccaf1085386ab73d112e0624d553a552f0ca9
3
+ size 1402
bi_model_male/model.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:33c6a0e372469db10c147bddaaa42a5c47c9abebe160316ef7dd3af909256808
3
+ size 148877305
bi_model_male/pitch_stats.npz ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:12ca749b83851de2b5961a8a0b7e16dd03613a29b4cf80dc8f0287321ed82c59
3
+ size 770