utkarsh2299 commited on
Commit
2ba468b
·
verified ·
1 Parent(s): 339fb6c

Upload 22 files

Browse files
manipuri_latest/female/model/config.yaml ADDED
@@ -0,0 +1,299 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ config: conf/tuning/train_fastspeech2.yaml
2
+ print_config: false
3
+ log_level: INFO
4
+ drop_last_iter: false
5
+ dry_run: false
6
+ iterator_type: sequence
7
+ valid_iterator_type: null
8
+ output_dir: exp/tts_train_fastspeech2_raw_char_None
9
+ ngpu: 1
10
+ seed: 0
11
+ num_workers: 1
12
+ num_att_plot: 3
13
+ dist_backend: nccl
14
+ dist_init_method: env://
15
+ dist_world_size: null
16
+ dist_rank: null
17
+ local_rank: 0
18
+ dist_master_addr: null
19
+ dist_master_port: null
20
+ dist_launcher: null
21
+ multiprocessing_distributed: false
22
+ unused_parameters: false
23
+ sharded_ddp: false
24
+ use_deepspeed: false
25
+ deepspeed_config: null
26
+ cudnn_enabled: true
27
+ cudnn_benchmark: false
28
+ cudnn_deterministic: true
29
+ use_tf32: false
30
+ collect_stats: false
31
+ write_collected_feats: false
32
+ max_epoch: 1000
33
+ patience: null
34
+ val_scheduler_criterion:
35
+ - valid
36
+ - loss
37
+ early_stopping_criterion:
38
+ - valid
39
+ - loss
40
+ - min
41
+ best_model_criterion:
42
+ - - valid
43
+ - loss
44
+ - min
45
+ - - train
46
+ - loss
47
+ - min
48
+ keep_nbest_models: 5
49
+ nbest_averaging_interval: 0
50
+ grad_clip: 1.0
51
+ grad_clip_type: 2.0
52
+ grad_noise: false
53
+ accum_grad: 8
54
+ no_forward_run: false
55
+ resume: true
56
+ train_dtype: float32
57
+ use_amp: false
58
+ log_interval: null
59
+ use_matplotlib: true
60
+ use_tensorboard: true
61
+ create_graph_in_tensorboard: false
62
+ use_wandb: false
63
+ wandb_project: null
64
+ wandb_id: null
65
+ wandb_entity: null
66
+ wandb_name: null
67
+ wandb_model_log_interval: -1
68
+ detect_anomaly: false
69
+ use_adapter: false
70
+ adapter: lora
71
+ save_strategy: all
72
+ adapter_conf: {}
73
+ pretrain_path: null
74
+ init_param: []
75
+ ignore_init_mismatch: false
76
+ freeze_param: []
77
+ num_iters_per_epoch: 800
78
+ batch_size: 20
79
+ valid_batch_size: null
80
+ batch_bins: 3000000
81
+ valid_batch_bins: null
82
+ category_sample_size: 10
83
+ train_shape_file:
84
+ - exp/tts_stats_raw_char_None/train/text_shape.char
85
+ - exp/tts_stats_raw_char_None/train/speech_shape
86
+ valid_shape_file:
87
+ - exp/tts_stats_raw_char_None/valid/text_shape.char
88
+ - exp/tts_stats_raw_char_None/valid/speech_shape
89
+ batch_type: numel
90
+ valid_batch_type: null
91
+ fold_length:
92
+ - 150
93
+ - 819200
94
+ sort_in_batch: descending
95
+ shuffle_within_batch: false
96
+ sort_batch: descending
97
+ multiple_iterator: false
98
+ chunk_length: 500
99
+ chunk_shift_ratio: 0.5
100
+ num_cache_chunks: 1024
101
+ chunk_excluded_key_prefixes: []
102
+ chunk_default_fs: null
103
+ chunk_max_abs_length: null
104
+ chunk_discard_short_samples: true
105
+ train_data_path_and_name_and_type:
106
+ - - dump/raw/tr_no_dev/text
107
+ - text
108
+ - text
109
+ - - duration_info/tr_no_dev/durations
110
+ - durations
111
+ - text_int
112
+ - - dump/raw/tr_no_dev/wav.scp
113
+ - speech
114
+ - sound
115
+ - - exp/tts_stats_raw_char_None/train/collect_feats/pitch.scp
116
+ - pitch
117
+ - npy
118
+ - - exp/tts_stats_raw_char_None/train/collect_feats/energy.scp
119
+ - energy
120
+ - npy
121
+ valid_data_path_and_name_and_type:
122
+ - - dump/raw/dev/text
123
+ - text
124
+ - text
125
+ - - duration_info/dev/durations
126
+ - durations
127
+ - text_int
128
+ - - dump/raw/dev/wav.scp
129
+ - speech
130
+ - sound
131
+ - - exp/tts_stats_raw_char_None/valid/collect_feats/pitch.scp
132
+ - pitch
133
+ - npy
134
+ - - exp/tts_stats_raw_char_None/valid/collect_feats/energy.scp
135
+ - energy
136
+ - npy
137
+ multi_task_dataset: false
138
+ allow_variable_data_keys: false
139
+ max_cache_size: 0.0
140
+ max_cache_fd: 32
141
+ allow_multi_rates: false
142
+ valid_max_cache_size: null
143
+ exclude_weight_decay: false
144
+ exclude_weight_decay_conf: {}
145
+ optim: adam
146
+ optim_conf:
147
+ lr: 1.0
148
+ scheduler: noamlr
149
+ scheduler_conf:
150
+ model_size: 384
151
+ warmup_steps: 4000
152
+ token_list:
153
+ - <blank>
154
+ - <unk>
155
+ - <space>
156
+ - A
157
+ - a
158
+ - n
159
+ - m
160
+ - i
161
+ - u
162
+ - d
163
+ - b
164
+ - k
165
+ - r
166
+ - I
167
+ - l
168
+ - t
169
+ - o
170
+ - g
171
+ - p
172
+ - q
173
+ - E
174
+ - y
175
+ - s
176
+ - h
177
+ - ख
178
+ - $
179
+ - .
180
+ - श
181
+ - थ
182
+ - c
183
+ - ङ
184
+ - ऐ
185
+ - औ
186
+ - j
187
+ - P
188
+ - ','
189
+ - ट
190
+ - w
191
+ - U
192
+ - ण
193
+ - B
194
+ - ष
195
+ - ञ
196
+ - ध
197
+ - R
198
+ - ड
199
+ - D
200
+ - C
201
+ - ठ
202
+ - घ
203
+ - M
204
+ - J
205
+ - Y
206
+ - ढ
207
+ - ൺ
208
+ - H
209
+ - <sos/eos>
210
+ odim: null
211
+ model_conf: {}
212
+ use_preprocessor: true
213
+ token_type: char
214
+ bpemodel: null
215
+ non_linguistic_symbols: null
216
+ cleaner: null
217
+ g2p: null
218
+ feats_extract: fbank
219
+ feats_extract_conf:
220
+ n_fft: 8192
221
+ hop_length: 1024
222
+ win_length: 4096
223
+ fs: 48000
224
+ fmin: 0
225
+ fmax: 24000
226
+ n_mels: 160
227
+ normalize: global_mvn
228
+ normalize_conf:
229
+ stats_file: exp/tts_stats_raw_char_None/train/feats_stats.npz
230
+ tts: fastspeech2
231
+ tts_conf:
232
+ adim: 384
233
+ aheads: 2
234
+ elayers: 4
235
+ eunits: 1536
236
+ dlayers: 4
237
+ dunits: 1536
238
+ positionwise_layer_type: conv1d
239
+ positionwise_conv_kernel_size: 3
240
+ duration_predictor_layers: 2
241
+ duration_predictor_chans: 256
242
+ duration_predictor_kernel_size: 3
243
+ postnet_layers: 5
244
+ postnet_filts: 5
245
+ postnet_chans: 256
246
+ use_masking: true
247
+ use_scaled_pos_enc: true
248
+ encoder_normalize_before: true
249
+ decoder_normalize_before: true
250
+ reduction_factor: 1
251
+ init_type: xavier_uniform
252
+ init_enc_alpha: 1.0
253
+ init_dec_alpha: 1.0
254
+ transformer_enc_dropout_rate: 0.2
255
+ transformer_enc_positional_dropout_rate: 0.2
256
+ transformer_enc_attn_dropout_rate: 0.2
257
+ transformer_dec_dropout_rate: 0.2
258
+ transformer_dec_positional_dropout_rate: 0.2
259
+ transformer_dec_attn_dropout_rate: 0.2
260
+ pitch_predictor_layers: 5
261
+ pitch_predictor_chans: 256
262
+ pitch_predictor_kernel_size: 5
263
+ pitch_predictor_dropout: 0.5
264
+ pitch_embed_kernel_size: 1
265
+ pitch_embed_dropout: 0.0
266
+ stop_gradient_from_pitch_predictor: true
267
+ energy_predictor_layers: 2
268
+ energy_predictor_chans: 256
269
+ energy_predictor_kernel_size: 3
270
+ energy_predictor_dropout: 0.5
271
+ energy_embed_kernel_size: 1
272
+ energy_embed_dropout: 0.0
273
+ stop_gradient_from_energy_predictor: false
274
+ pitch_extract: dio
275
+ pitch_extract_conf:
276
+ fs: 48000
277
+ n_fft: 8192
278
+ hop_length: 1024
279
+ f0max: 350
280
+ f0min: 40
281
+ reduction_factor: 1
282
+ pitch_normalize: global_mvn
283
+ pitch_normalize_conf:
284
+ stats_file: exp/tts_stats_raw_char_None/train/pitch_stats.npz
285
+ energy_extract: energy
286
+ energy_extract_conf:
287
+ fs: 48000
288
+ n_fft: 8192
289
+ hop_length: 1024
290
+ win_length: 4096
291
+ reduction_factor: 1
292
+ energy_normalize: global_mvn
293
+ energy_normalize_conf:
294
+ stats_file: exp/tts_stats_raw_char_None/train/energy_stats.npz
295
+ required:
296
+ - output_dir
297
+ - token_list
298
+ version: '202409'
299
+ distributed: false
manipuri_latest/female/model/energy_stats.npz ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:867a29f753d8b43099642ea1713788343e4c3ef4db05c8203985f5c0c4df6bb8
3
+ size 770
manipuri_latest/female/model/feats_stats.npz ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:17d2ec356f628ddac15f5b617f38721efc838a2d4d81ad2ba22c3838f0b50656
3
+ size 2042
manipuri_latest/female/model/model.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:598228106ae4bf485a5e4af4330f52ef5f3e237d4153928227f116f604fb43e6
3
+ size 152097178
manipuri_latest/female/model/pitch_stats.npz ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4cb8fd8a7913b50766f6e805454e76fcaeab529453db85d6dbe27c2c0bd55449
3
+ size 770
manipuri_latest/male/model/config.yaml ADDED
@@ -0,0 +1,300 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ config: conf/tuning/train_fastspeech2.yaml
2
+ print_config: false
3
+ log_level: INFO
4
+ drop_last_iter: false
5
+ dry_run: false
6
+ iterator_type: sequence
7
+ valid_iterator_type: null
8
+ output_dir: exp/tts_train_fastspeech2_raw_char_None
9
+ ngpu: 1
10
+ seed: 0
11
+ num_workers: 1
12
+ num_att_plot: 3
13
+ dist_backend: nccl
14
+ dist_init_method: env://
15
+ dist_world_size: null
16
+ dist_rank: null
17
+ local_rank: 0
18
+ dist_master_addr: null
19
+ dist_master_port: null
20
+ dist_launcher: null
21
+ multiprocessing_distributed: false
22
+ unused_parameters: false
23
+ sharded_ddp: false
24
+ use_deepspeed: false
25
+ deepspeed_config: null
26
+ cudnn_enabled: true
27
+ cudnn_benchmark: false
28
+ cudnn_deterministic: true
29
+ use_tf32: false
30
+ collect_stats: false
31
+ write_collected_feats: false
32
+ max_epoch: 1000
33
+ patience: null
34
+ val_scheduler_criterion:
35
+ - valid
36
+ - loss
37
+ early_stopping_criterion:
38
+ - valid
39
+ - loss
40
+ - min
41
+ best_model_criterion:
42
+ - - valid
43
+ - loss
44
+ - min
45
+ - - train
46
+ - loss
47
+ - min
48
+ keep_nbest_models: 5
49
+ nbest_averaging_interval: 0
50
+ grad_clip: 1.0
51
+ grad_clip_type: 2.0
52
+ grad_noise: false
53
+ accum_grad: 8
54
+ no_forward_run: false
55
+ resume: true
56
+ train_dtype: float32
57
+ use_amp: false
58
+ log_interval: null
59
+ use_matplotlib: true
60
+ use_tensorboard: true
61
+ create_graph_in_tensorboard: false
62
+ use_wandb: false
63
+ wandb_project: null
64
+ wandb_id: null
65
+ wandb_entity: null
66
+ wandb_name: null
67
+ wandb_model_log_interval: -1
68
+ detect_anomaly: false
69
+ use_adapter: false
70
+ adapter: lora
71
+ save_strategy: all
72
+ adapter_conf: {}
73
+ pretrain_path: null
74
+ init_param: []
75
+ ignore_init_mismatch: false
76
+ freeze_param: []
77
+ num_iters_per_epoch: 800
78
+ batch_size: 20
79
+ valid_batch_size: null
80
+ batch_bins: 3000000
81
+ valid_batch_bins: null
82
+ category_sample_size: 10
83
+ train_shape_file:
84
+ - exp/tts_stats_raw_char_None/train/text_shape.char
85
+ - exp/tts_stats_raw_char_None/train/speech_shape
86
+ valid_shape_file:
87
+ - exp/tts_stats_raw_char_None/valid/text_shape.char
88
+ - exp/tts_stats_raw_char_None/valid/speech_shape
89
+ batch_type: numel
90
+ valid_batch_type: null
91
+ fold_length:
92
+ - 150
93
+ - 819200
94
+ sort_in_batch: descending
95
+ shuffle_within_batch: false
96
+ sort_batch: descending
97
+ multiple_iterator: false
98
+ chunk_length: 500
99
+ chunk_shift_ratio: 0.5
100
+ num_cache_chunks: 1024
101
+ chunk_excluded_key_prefixes: []
102
+ chunk_default_fs: null
103
+ chunk_max_abs_length: null
104
+ chunk_discard_short_samples: true
105
+ train_data_path_and_name_and_type:
106
+ - - dump/raw/tr_no_dev/text
107
+ - text
108
+ - text
109
+ - - duration_info/tr_no_dev/durations
110
+ - durations
111
+ - text_int
112
+ - - dump/raw/tr_no_dev/wav.scp
113
+ - speech
114
+ - sound
115
+ - - exp/tts_stats_raw_char_None/train/collect_feats/pitch.scp
116
+ - pitch
117
+ - npy
118
+ - - exp/tts_stats_raw_char_None/train/collect_feats/energy.scp
119
+ - energy
120
+ - npy
121
+ valid_data_path_and_name_and_type:
122
+ - - dump/raw/dev/text
123
+ - text
124
+ - text
125
+ - - duration_info/dev/durations
126
+ - durations
127
+ - text_int
128
+ - - dump/raw/dev/wav.scp
129
+ - speech
130
+ - sound
131
+ - - exp/tts_stats_raw_char_None/valid/collect_feats/pitch.scp
132
+ - pitch
133
+ - npy
134
+ - - exp/tts_stats_raw_char_None/valid/collect_feats/energy.scp
135
+ - energy
136
+ - npy
137
+ multi_task_dataset: false
138
+ allow_variable_data_keys: false
139
+ max_cache_size: 0.0
140
+ max_cache_fd: 32
141
+ allow_multi_rates: false
142
+ valid_max_cache_size: null
143
+ exclude_weight_decay: false
144
+ exclude_weight_decay_conf: {}
145
+ optim: adam
146
+ optim_conf:
147
+ lr: 1.0
148
+ scheduler: noamlr
149
+ scheduler_conf:
150
+ model_size: 384
151
+ warmup_steps: 4000
152
+ token_list:
153
+ - <blank>
154
+ - <unk>
155
+ - <space>
156
+ - A
157
+ - a
158
+ - n
159
+ - i
160
+ - m
161
+ - u
162
+ - b
163
+ - d
164
+ - k
165
+ - r
166
+ - I
167
+ - l
168
+ - t
169
+ - o
170
+ - g
171
+ - q
172
+ - p
173
+ - E
174
+ - y
175
+ - s
176
+ - h
177
+ - ख
178
+ - $
179
+ - .
180
+ - श
181
+ - थ
182
+ - c
183
+ - ङ
184
+ - ऐ
185
+ - औ
186
+ - j
187
+ - P
188
+ - w
189
+ - ट
190
+ - ','
191
+ - U
192
+ - ण
193
+ - B
194
+ - ष
195
+ - ञ
196
+ - ध
197
+ - R
198
+ - ड
199
+ - D
200
+ - C
201
+ - ठ
202
+ - घ
203
+ - J
204
+ - M
205
+ - Y
206
+ - ढ
207
+ - H
208
+ - ൺ
209
+ - ॠ
210
+ - <sos/eos>
211
+ odim: null
212
+ model_conf: {}
213
+ use_preprocessor: true
214
+ token_type: char
215
+ bpemodel: null
216
+ non_linguistic_symbols: null
217
+ cleaner: null
218
+ g2p: null
219
+ feats_extract: fbank
220
+ feats_extract_conf:
221
+ n_fft: 8192
222
+ hop_length: 1024
223
+ win_length: 4096
224
+ fs: 48000
225
+ fmin: 0
226
+ fmax: 24000
227
+ n_mels: 160
228
+ normalize: global_mvn
229
+ normalize_conf:
230
+ stats_file: exp/tts_stats_raw_char_None/train/feats_stats.npz
231
+ tts: fastspeech2
232
+ tts_conf:
233
+ adim: 384
234
+ aheads: 2
235
+ elayers: 4
236
+ eunits: 1536
237
+ dlayers: 4
238
+ dunits: 1536
239
+ positionwise_layer_type: conv1d
240
+ positionwise_conv_kernel_size: 3
241
+ duration_predictor_layers: 2
242
+ duration_predictor_chans: 256
243
+ duration_predictor_kernel_size: 3
244
+ postnet_layers: 5
245
+ postnet_filts: 5
246
+ postnet_chans: 256
247
+ use_masking: true
248
+ use_scaled_pos_enc: true
249
+ encoder_normalize_before: true
250
+ decoder_normalize_before: true
251
+ reduction_factor: 1
252
+ init_type: xavier_uniform
253
+ init_enc_alpha: 1.0
254
+ init_dec_alpha: 1.0
255
+ transformer_enc_dropout_rate: 0.2
256
+ transformer_enc_positional_dropout_rate: 0.2
257
+ transformer_enc_attn_dropout_rate: 0.2
258
+ transformer_dec_dropout_rate: 0.2
259
+ transformer_dec_positional_dropout_rate: 0.2
260
+ transformer_dec_attn_dropout_rate: 0.2
261
+ pitch_predictor_layers: 5
262
+ pitch_predictor_chans: 256
263
+ pitch_predictor_kernel_size: 5
264
+ pitch_predictor_dropout: 0.5
265
+ pitch_embed_kernel_size: 1
266
+ pitch_embed_dropout: 0.0
267
+ stop_gradient_from_pitch_predictor: true
268
+ energy_predictor_layers: 2
269
+ energy_predictor_chans: 256
270
+ energy_predictor_kernel_size: 3
271
+ energy_predictor_dropout: 0.5
272
+ energy_embed_kernel_size: 1
273
+ energy_embed_dropout: 0.0
274
+ stop_gradient_from_energy_predictor: false
275
+ pitch_extract: dio
276
+ pitch_extract_conf:
277
+ fs: 48000
278
+ n_fft: 8192
279
+ hop_length: 1024
280
+ f0max: 350
281
+ f0min: 40
282
+ reduction_factor: 1
283
+ pitch_normalize: global_mvn
284
+ pitch_normalize_conf:
285
+ stats_file: exp/tts_stats_raw_char_None/train/pitch_stats.npz
286
+ energy_extract: energy
287
+ energy_extract_conf:
288
+ fs: 48000
289
+ n_fft: 8192
290
+ hop_length: 1024
291
+ win_length: 4096
292
+ reduction_factor: 1
293
+ energy_normalize: global_mvn
294
+ energy_normalize_conf:
295
+ stats_file: exp/tts_stats_raw_char_None/train/energy_stats.npz
296
+ required:
297
+ - output_dir
298
+ - token_list
299
+ version: '202409'
300
+ distributed: false
manipuri_latest/male/model/energy_stats.npz ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7cd6c77944a979252ea56dc6c061d4faa9659e588da7a515cb84492da14617e5
3
+ size 770
manipuri_latest/male/model/feats_stats.npz ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d36b000ed69eeb081a5e9e1d394af0d2ebfac0350b4b12029e325a79e84d492e
3
+ size 2042
manipuri_latest/male/model/model.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9237f50de75c4deb17600a0ee001fc64bd8018691c6ce6ecb0838de4374af6ba
3
+ size 152098714
manipuri_latest/male/model/pitch_stats.npz ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3d48b513a8b1e593fd0e7299dd419daffda9162a07de012d8d0dbc4085d48b99
3
+ size 770
nepali_latest/female/model/config.yaml ADDED
@@ -0,0 +1,315 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ accum_grad: 8
2
+ adapter: lora
3
+ adapter_conf: {}
4
+ allow_multi_rates: false
5
+ allow_variable_data_keys: false
6
+ batch_bins: 3000000
7
+ batch_size: 20
8
+ batch_type: numel
9
+ best_model_criterion:
10
+ - - valid
11
+ - loss
12
+ - min
13
+ - - train
14
+ - loss
15
+ - min
16
+ bpemodel: null
17
+ chunk_default_fs: null
18
+ chunk_discard_short_samples: true
19
+ chunk_excluded_key_prefixes: []
20
+ chunk_length: 500
21
+ chunk_max_abs_length: null
22
+ chunk_shift_ratio: 0.5
23
+ cleaner: null
24
+ collect_stats: false
25
+ config: conf/tuning/train_fastspeech2.yaml
26
+ create_graph_in_tensorboard: false
27
+ cudnn_benchmark: false
28
+ cudnn_deterministic: true
29
+ cudnn_enabled: true
30
+ detect_anomaly: false
31
+ dist_backend: nccl
32
+ dist_init_method: env://
33
+ dist_launcher: null
34
+ dist_master_addr: localhost
35
+ dist_master_port: 54431
36
+ dist_rank: 0
37
+ dist_world_size: 2
38
+ distributed: true
39
+ drop_last_iter: false
40
+ dry_run: false
41
+ early_stopping_criterion:
42
+ - valid
43
+ - loss
44
+ - min
45
+ energy_extract: energy
46
+ energy_extract_conf:
47
+ fs: 48000
48
+ hop_length: 1024
49
+ n_fft: 8192
50
+ reduction_factor: 1
51
+ win_length: 4096
52
+ energy_normalize: global_mvn
53
+ energy_normalize_conf:
54
+ stats_file: /home/speech/Suji/FS2/Fastspeech2_HS/Google_VM_models/Nepali/Female/energy_stats.npz
55
+ exclude_weight_decay: false
56
+ exclude_weight_decay_conf: {}
57
+ feats_extract: fbank
58
+ feats_extract_conf:
59
+ fmax: 24000
60
+ fmin: 0
61
+ fs: 48000
62
+ hop_length: 1024
63
+ n_fft: 8192
64
+ n_mels: 160
65
+ win_length: 4096
66
+ fold_length:
67
+ - 150
68
+ - 819200
69
+ freeze_param: []
70
+ g2p: null
71
+ grad_clip: 1.0
72
+ grad_clip_type: 2.0
73
+ grad_noise: false
74
+ ignore_init_mismatch: false
75
+ init_param: []
76
+ iterator_type: sequence
77
+ keep_nbest_models: 5
78
+ local_rank: 0
79
+ log_interval: null
80
+ log_level: INFO
81
+ max_cache_fd: 32
82
+ max_cache_size: 0.0
83
+ max_epoch: 1000
84
+ model_conf: {}
85
+ multiple_iterator: false
86
+ multiprocessing_distributed: true
87
+ nbest_averaging_interval: 0
88
+ ngpu: 1
89
+ no_forward_run: false
90
+ non_linguistic_symbols: null
91
+ normalize: global_mvn
92
+ normalize_conf:
93
+ stats_file: /home/speech/Suji/FS2/Fastspeech2_HS/Google_VM_models/Nepali/Female/feats_stats.npz
94
+ num_att_plot: 3
95
+ num_cache_chunks: 1024
96
+ num_iters_per_epoch: 800
97
+ num_workers: 1
98
+ odim: null
99
+ optim: adam
100
+ optim_conf:
101
+ lr: 1.0
102
+ output_dir: exp/tts_train_fastspeech2_raw_char_None
103
+ patience: null
104
+ pitch_extract: dio
105
+ pitch_extract_conf:
106
+ f0max: 400
107
+ f0min: 80
108
+ fs: 48000
109
+ hop_length: 1024
110
+ n_fft: 8192
111
+ reduction_factor: 1
112
+ pitch_normalize: global_mvn
113
+ pitch_normalize_conf:
114
+ stats_file: /home/speech/Suji/FS2/Fastspeech2_HS/Google_VM_models/Nepali/Female/pitch_stats.npz
115
+ pretrain_path: null
116
+ print_config: false
117
+ required:
118
+ - output_dir
119
+ - token_list
120
+ resume: true
121
+ save_strategy: all
122
+ scheduler: noamlr
123
+ scheduler_conf:
124
+ model_size: 384
125
+ warmup_steps: 4000
126
+ seed: 0
127
+ sharded_ddp: false
128
+ shuffle_within_batch: false
129
+ sort_batch: descending
130
+ sort_in_batch: descending
131
+ token_list:
132
+ - <blank>
133
+ - <unk>
134
+ - <space>
135
+ - $
136
+ - ','
137
+ - .
138
+ - M
139
+ - q
140
+ - H
141
+ - k
142
+ - "\u0916"
143
+ - g
144
+ - "\u0918"
145
+ - "\u0919"
146
+ - c
147
+ - C
148
+ - j
149
+ - J
150
+ - "\u091E"
151
+ - "\u091F"
152
+ - "\u0920"
153
+ - "\u0921"
154
+ - "\u0922"
155
+ - "\u0923"
156
+ - t
157
+ - "\u0925"
158
+ - d
159
+ - "\u0927"
160
+ - n
161
+ - "\u0929"
162
+ - p
163
+ - P
164
+ - b
165
+ - B
166
+ - m
167
+ - y
168
+ - r
169
+ - "\u0931"
170
+ - l
171
+ - "\u0D33"
172
+ - Z
173
+ - w
174
+ - "\u0936"
175
+ - "\u0937"
176
+ - s
177
+ - h
178
+ - Y
179
+ - "\u093D"
180
+ - "\u0915"
181
+ - K
182
+ - G
183
+ - z
184
+ - D
185
+ - T
186
+ - f
187
+ - "\u0960"
188
+ - "\u0D7A"
189
+ - N
190
+ - "\u0D7C"
191
+ - "\u0D7D"
192
+ - "\u0D7E"
193
+ - a
194
+ - A
195
+ - i
196
+ - I
197
+ - u
198
+ - U
199
+ - R
200
+ - "\u090D"
201
+ - e
202
+ - E
203
+ - "\u0910"
204
+ - "\u0911"
205
+ - o
206
+ - O
207
+ - "\u0914"
208
+ - "\u0B89"
209
+ - <sos/eos>
210
+ token_type: char
211
+ train_data_path_and_name_and_type:
212
+ - - dump/raw/tr_no_dev/text
213
+ - text
214
+ - text
215
+ - - duration_info/tr_no_dev/durations
216
+ - durations
217
+ - text_int
218
+ - - dump/raw/tr_no_dev/wav.scp
219
+ - speech
220
+ - sound
221
+ - - exp/tts_stats_raw_char_None/train/collect_feats/pitch.scp
222
+ - pitch
223
+ - npy
224
+ - - exp/tts_stats_raw_char_None/train/collect_feats/energy.scp
225
+ - energy
226
+ - npy
227
+ train_dtype: float32
228
+ train_shape_file:
229
+ - exp/tts_stats_raw_char_None/train/text_shape.char
230
+ - exp/tts_stats_raw_char_None/train/speech_shape
231
+ tts: fastspeech2
232
+ tts_conf:
233
+ adim: 384
234
+ aheads: 2
235
+ decoder_normalize_before: true
236
+ dlayers: 4
237
+ dunits: 1536
238
+ duration_predictor_chans: 256
239
+ duration_predictor_kernel_size: 3
240
+ duration_predictor_layers: 2
241
+ elayers: 4
242
+ encoder_normalize_before: true
243
+ energy_embed_dropout: 0.0
244
+ energy_embed_kernel_size: 1
245
+ energy_predictor_chans: 256
246
+ energy_predictor_dropout: 0.5
247
+ energy_predictor_kernel_size: 3
248
+ energy_predictor_layers: 2
249
+ eunits: 1536
250
+ init_dec_alpha: 1.0
251
+ init_enc_alpha: 1.0
252
+ init_type: xavier_uniform
253
+ pitch_embed_dropout: 0.0
254
+ pitch_embed_kernel_size: 1
255
+ pitch_predictor_chans: 256
256
+ pitch_predictor_dropout: 0.5
257
+ pitch_predictor_kernel_size: 5
258
+ pitch_predictor_layers: 5
259
+ positionwise_conv_kernel_size: 3
260
+ positionwise_layer_type: conv1d
261
+ postnet_chans: 256
262
+ postnet_filts: 5
263
+ postnet_layers: 5
264
+ reduction_factor: 1
265
+ stop_gradient_from_energy_predictor: false
266
+ stop_gradient_from_pitch_predictor: true
267
+ transformer_dec_attn_dropout_rate: 0.2
268
+ transformer_dec_dropout_rate: 0.2
269
+ transformer_dec_positional_dropout_rate: 0.2
270
+ transformer_enc_attn_dropout_rate: 0.2
271
+ transformer_enc_dropout_rate: 0.2
272
+ transformer_enc_positional_dropout_rate: 0.2
273
+ use_masking: true
274
+ use_scaled_pos_enc: true
275
+ unused_parameters: false
276
+ use_adapter: false
277
+ use_amp: false
278
+ use_matplotlib: true
279
+ use_preprocessor: true
280
+ use_tensorboard: true
281
+ use_wandb: false
282
+ val_scheduler_criterion:
283
+ - valid
284
+ - loss
285
+ valid_batch_bins: null
286
+ valid_batch_size: null
287
+ valid_batch_type: null
288
+ valid_data_path_and_name_and_type:
289
+ - - dump/raw/dev/text
290
+ - text
291
+ - text
292
+ - - duration_info/dev/durations
293
+ - durations
294
+ - text_int
295
+ - - dump/raw/dev/wav.scp
296
+ - speech
297
+ - sound
298
+ - - exp/tts_stats_raw_char_None/valid/collect_feats/pitch.scp
299
+ - pitch
300
+ - npy
301
+ - - exp/tts_stats_raw_char_None/valid/collect_feats/energy.scp
302
+ - energy
303
+ - npy
304
+ valid_iterator_type: null
305
+ valid_max_cache_size: null
306
+ valid_shape_file:
307
+ - exp/tts_stats_raw_char_None/valid/text_shape.char
308
+ - exp/tts_stats_raw_char_None/valid/speech_shape
309
+ version: '202402'
310
+ wandb_entity: null
311
+ wandb_id: null
312
+ wandb_model_log_interval: -1
313
+ wandb_name: null
314
+ wandb_project: null
315
+ write_collected_feats: false
nepali_latest/female/model/energy_stats.npz ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e5da8628f6d6044d8bd2f10afe6a15b7bb39a8c39e28463956853dde29ff4359
3
+ size 770
nepali_latest/female/model/feats_stats.npz ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9c5ab1ceb377b0ef0e8457e32bcec0ea181daea534f027f0b7c72aec5d404c96
3
+ size 2042
nepali_latest/female/model/feats_type ADDED
@@ -0,0 +1 @@
 
 
1
+ raw
nepali_latest/female/model/model.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4d5baef75ffd8c685302e4e4d53a010924a1c968bd0f2b30fe490bcbba70066c
3
+ size 152128410
nepali_latest/female/model/pitch_stats.npz ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:381dafb5e8a150b4082efbeb4ec195c3e96188171f65b777d32b0b2663682572
3
+ size 770
nepali_latest/male/model/config.yaml ADDED
@@ -0,0 +1,315 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ accum_grad: 8
2
+ adapter: lora
3
+ adapter_conf: {}
4
+ allow_multi_rates: false
5
+ allow_variable_data_keys: false
6
+ batch_bins: 3000000
7
+ batch_size: 20
8
+ batch_type: numel
9
+ best_model_criterion:
10
+ - - valid
11
+ - loss
12
+ - min
13
+ - - train
14
+ - loss
15
+ - min
16
+ bpemodel: null
17
+ chunk_default_fs: null
18
+ chunk_discard_short_samples: true
19
+ chunk_excluded_key_prefixes: []
20
+ chunk_length: 500
21
+ chunk_max_abs_length: null
22
+ chunk_shift_ratio: 0.5
23
+ cleaner: null
24
+ collect_stats: false
25
+ config: conf/tuning/train_fastspeech2.yaml
26
+ create_graph_in_tensorboard: false
27
+ cudnn_benchmark: false
28
+ cudnn_deterministic: true
29
+ cudnn_enabled: true
30
+ detect_anomaly: false
31
+ dist_backend: nccl
32
+ dist_init_method: env://
33
+ dist_launcher: null
34
+ dist_master_addr: localhost
35
+ dist_master_port: 55635
36
+ dist_rank: 0
37
+ dist_world_size: 2
38
+ distributed: true
39
+ drop_last_iter: false
40
+ dry_run: false
41
+ early_stopping_criterion:
42
+ - valid
43
+ - loss
44
+ - min
45
+ energy_extract: energy
46
+ energy_extract_conf:
47
+ fs: 48000
48
+ hop_length: 1024
49
+ n_fft: 8192
50
+ reduction_factor: 1
51
+ win_length: 4096
52
+ energy_normalize: global_mvn
53
+ energy_normalize_conf:
54
+ stats_file: /home/speech/Suji/FS2/Fastspeech2_HS/Google_VM_models/Nepali/male/energy_stats.npz
55
+ exclude_weight_decay: false
56
+ exclude_weight_decay_conf: {}
57
+ feats_extract: fbank
58
+ feats_extract_conf:
59
+ fmax: 24000
60
+ fmin: 0
61
+ fs: 48000
62
+ hop_length: 1024
63
+ n_fft: 8192
64
+ n_mels: 160
65
+ win_length: 4096
66
+ fold_length:
67
+ - 150
68
+ - 819200
69
+ freeze_param: []
70
+ g2p: null
71
+ grad_clip: 1.0
72
+ grad_clip_type: 2.0
73
+ grad_noise: false
74
+ ignore_init_mismatch: false
75
+ init_param: []
76
+ iterator_type: sequence
77
+ keep_nbest_models: 5
78
+ local_rank: 0
79
+ log_interval: null
80
+ log_level: INFO
81
+ max_cache_fd: 32
82
+ max_cache_size: 0.0
83
+ max_epoch: 1000
84
+ model_conf: {}
85
+ multiple_iterator: false
86
+ multiprocessing_distributed: true
87
+ nbest_averaging_interval: 0
88
+ ngpu: 1
89
+ no_forward_run: false
90
+ non_linguistic_symbols: null
91
+ normalize: global_mvn
92
+ normalize_conf:
93
+ stats_file: /home/speech/Suji/FS2/Fastspeech2_HS/Google_VM_models/Nepali/male/feats_stats.npz
94
+ num_att_plot: 3
95
+ num_cache_chunks: 1024
96
+ num_iters_per_epoch: 800
97
+ num_workers: 1
98
+ odim: null
99
+ optim: adam
100
+ optim_conf:
101
+ lr: 1.0
102
+ output_dir: exp/tts_train_fastspeech2_raw_char_None
103
+ patience: null
104
+ pitch_extract: dio
105
+ pitch_extract_conf:
106
+ f0max: 350
107
+ f0min: 40
108
+ fs: 48000
109
+ hop_length: 1024
110
+ n_fft: 8192
111
+ reduction_factor: 1
112
+ pitch_normalize: global_mvn
113
+ pitch_normalize_conf:
114
+ stats_file: /home/speech/Suji/FS2/Fastspeech2_HS/Google_VM_models/Nepali/male/pitch_stats.npz
115
+ pretrain_path: null
116
+ print_config: false
117
+ required:
118
+ - output_dir
119
+ - token_list
120
+ resume: true
121
+ save_strategy: all
122
+ scheduler: noamlr
123
+ scheduler_conf:
124
+ model_size: 384
125
+ warmup_steps: 4000
126
+ seed: 0
127
+ sharded_ddp: false
128
+ shuffle_within_batch: false
129
+ sort_batch: descending
130
+ sort_in_batch: descending
131
+ token_list:
132
+ - <blank>
133
+ - <unk>
134
+ - <space>
135
+ - $
136
+ - ','
137
+ - .
138
+ - M
139
+ - q
140
+ - H
141
+ - k
142
+ - "\u0916"
143
+ - g
144
+ - "\u0918"
145
+ - "\u0919"
146
+ - c
147
+ - C
148
+ - j
149
+ - J
150
+ - "\u091E"
151
+ - "\u091F"
152
+ - "\u0920"
153
+ - "\u0921"
154
+ - "\u0922"
155
+ - "\u0923"
156
+ - t
157
+ - "\u0925"
158
+ - d
159
+ - "\u0927"
160
+ - n
161
+ - "\u0929"
162
+ - p
163
+ - P
164
+ - b
165
+ - B
166
+ - m
167
+ - y
168
+ - r
169
+ - "\u0931"
170
+ - l
171
+ - "\u0D33"
172
+ - Z
173
+ - w
174
+ - "\u0936"
175
+ - "\u0937"
176
+ - s
177
+ - h
178
+ - Y
179
+ - "\u093D"
180
+ - "\u0915"
181
+ - K
182
+ - G
183
+ - z
184
+ - D
185
+ - T
186
+ - f
187
+ - "\u0960"
188
+ - "\u0D7A"
189
+ - N
190
+ - "\u0D7C"
191
+ - "\u0D7D"
192
+ - "\u0D7E"
193
+ - a
194
+ - A
195
+ - i
196
+ - I
197
+ - u
198
+ - U
199
+ - R
200
+ - "\u090D"
201
+ - e
202
+ - E
203
+ - "\u0910"
204
+ - "\u0911"
205
+ - o
206
+ - O
207
+ - "\u0914"
208
+ - "\u0B89"
209
+ - <sos/eos>
210
+ token_type: char
211
+ train_data_path_and_name_and_type:
212
+ - - dump/raw/tr_no_dev/text
213
+ - text
214
+ - text
215
+ - - duration_info/tr_no_dev/durations
216
+ - durations
217
+ - text_int
218
+ - - dump/raw/tr_no_dev/wav.scp
219
+ - speech
220
+ - sound
221
+ - - exp/tts_stats_raw_char_None/train/collect_feats/pitch.scp
222
+ - pitch
223
+ - npy
224
+ - - exp/tts_stats_raw_char_None/train/collect_feats/energy.scp
225
+ - energy
226
+ - npy
227
+ train_dtype: float32
228
+ train_shape_file:
229
+ - exp/tts_stats_raw_char_None/train/text_shape.char
230
+ - exp/tts_stats_raw_char_None/train/speech_shape
231
+ tts: fastspeech2
232
+ tts_conf:
233
+ adim: 384
234
+ aheads: 2
235
+ decoder_normalize_before: true
236
+ dlayers: 4
237
+ dunits: 1536
238
+ duration_predictor_chans: 256
239
+ duration_predictor_kernel_size: 3
240
+ duration_predictor_layers: 2
241
+ elayers: 4
242
+ encoder_normalize_before: true
243
+ energy_embed_dropout: 0.0
244
+ energy_embed_kernel_size: 1
245
+ energy_predictor_chans: 256
246
+ energy_predictor_dropout: 0.5
247
+ energy_predictor_kernel_size: 3
248
+ energy_predictor_layers: 2
249
+ eunits: 1536
250
+ init_dec_alpha: 1.0
251
+ init_enc_alpha: 1.0
252
+ init_type: xavier_uniform
253
+ pitch_embed_dropout: 0.0
254
+ pitch_embed_kernel_size: 1
255
+ pitch_predictor_chans: 256
256
+ pitch_predictor_dropout: 0.5
257
+ pitch_predictor_kernel_size: 5
258
+ pitch_predictor_layers: 5
259
+ positionwise_conv_kernel_size: 3
260
+ positionwise_layer_type: conv1d
261
+ postnet_chans: 256
262
+ postnet_filts: 5
263
+ postnet_layers: 5
264
+ reduction_factor: 1
265
+ stop_gradient_from_energy_predictor: false
266
+ stop_gradient_from_pitch_predictor: true
267
+ transformer_dec_attn_dropout_rate: 0.2
268
+ transformer_dec_dropout_rate: 0.2
269
+ transformer_dec_positional_dropout_rate: 0.2
270
+ transformer_enc_attn_dropout_rate: 0.2
271
+ transformer_enc_dropout_rate: 0.2
272
+ transformer_enc_positional_dropout_rate: 0.2
273
+ use_masking: true
274
+ use_scaled_pos_enc: true
275
+ unused_parameters: false
276
+ use_adapter: false
277
+ use_amp: false
278
+ use_matplotlib: true
279
+ use_preprocessor: true
280
+ use_tensorboard: true
281
+ use_wandb: false
282
+ val_scheduler_criterion:
283
+ - valid
284
+ - loss
285
+ valid_batch_bins: null
286
+ valid_batch_size: null
287
+ valid_batch_type: null
288
+ valid_data_path_and_name_and_type:
289
+ - - dump/raw/dev/text
290
+ - text
291
+ - text
292
+ - - duration_info/dev/durations
293
+ - durations
294
+ - text_int
295
+ - - dump/raw/dev/wav.scp
296
+ - speech
297
+ - sound
298
+ - - exp/tts_stats_raw_char_None/valid/collect_feats/pitch.scp
299
+ - pitch
300
+ - npy
301
+ - - exp/tts_stats_raw_char_None/valid/collect_feats/energy.scp
302
+ - energy
303
+ - npy
304
+ valid_iterator_type: null
305
+ valid_max_cache_size: null
306
+ valid_shape_file:
307
+ - exp/tts_stats_raw_char_None/valid/text_shape.char
308
+ - exp/tts_stats_raw_char_None/valid/speech_shape
309
+ version: '202402'
310
+ wandb_entity: null
311
+ wandb_id: null
312
+ wandb_model_log_interval: -1
313
+ wandb_name: null
314
+ wandb_project: null
315
+ write_collected_feats: false
nepali_latest/male/model/energy_stats.npz ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fd9c147fea6cda9d238fa8522cbc0958bacb2930b8c6a08846347a1074378683
3
+ size 770
nepali_latest/male/model/feats_stats.npz ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:695f88723e0a72a5c40762d9e97414e7593b35e9ddd6b145d212a372cec51789
3
+ size 2042
nepali_latest/male/model/feats_type ADDED
@@ -0,0 +1 @@
 
 
1
+ raw
nepali_latest/male/model/model.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2034e3782bd4b3b016c4ab71eec2b681739961f88cd0780d0fe44b293077d165
3
+ size 152128410
nepali_latest/male/model/pitch_stats.npz ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6bdc0419f048a6d25f95020f4929f224b0095468ba1fabca056379a87ebc6553
3
+ size 770