utkarsh2299 commited on
Commit
3e589b0
·
verified ·
1 Parent(s): e689d02

Upload 22 files

Browse files
kannada_latest/female/model/config.yaml ADDED
@@ -0,0 +1,315 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ config: conf/tuning/train_fastspeech2.yaml
2
+ print_config: false
3
+ log_level: INFO
4
+ drop_last_iter: false
5
+ dry_run: false
6
+ iterator_type: sequence
7
+ valid_iterator_type: null
8
+ output_dir: exp/tts_train_fastspeech2_raw_char_None
9
+ ngpu: 1
10
+ seed: 0
11
+ num_workers: 1
12
+ num_att_plot: 3
13
+ dist_backend: nccl
14
+ dist_init_method: env://
15
+ dist_world_size: 2
16
+ dist_rank: 0
17
+ local_rank: 0
18
+ dist_master_addr: localhost
19
+ dist_master_port: 36351
20
+ dist_launcher: null
21
+ multiprocessing_distributed: true
22
+ unused_parameters: false
23
+ sharded_ddp: false
24
+ cudnn_enabled: true
25
+ cudnn_benchmark: false
26
+ cudnn_deterministic: true
27
+ collect_stats: false
28
+ write_collected_feats: false
29
+ max_epoch: 1000
30
+ patience: null
31
+ val_scheduler_criterion:
32
+ - valid
33
+ - loss
34
+ early_stopping_criterion:
35
+ - valid
36
+ - loss
37
+ - min
38
+ best_model_criterion:
39
+ - - valid
40
+ - loss
41
+ - min
42
+ - - train
43
+ - loss
44
+ - min
45
+ keep_nbest_models: 5
46
+ nbest_averaging_interval: 0
47
+ grad_clip: 1.0
48
+ grad_clip_type: 2.0
49
+ grad_noise: false
50
+ accum_grad: 8
51
+ no_forward_run: false
52
+ resume: true
53
+ train_dtype: float32
54
+ use_amp: false
55
+ log_interval: null
56
+ use_matplotlib: true
57
+ use_tensorboard: true
58
+ create_graph_in_tensorboard: false
59
+ use_wandb: false
60
+ wandb_project: null
61
+ wandb_id: null
62
+ wandb_entity: null
63
+ wandb_name: null
64
+ wandb_model_log_interval: -1
65
+ detect_anomaly: false
66
+ use_adapter: false
67
+ adapter: lora
68
+ save_strategy: all
69
+ adapter_conf: {}
70
+ pretrain_path: null
71
+ init_param: []
72
+ ignore_init_mismatch: false
73
+ freeze_param: []
74
+ num_iters_per_epoch: 800
75
+ batch_size: 20
76
+ valid_batch_size: null
77
+ batch_bins: 3000000
78
+ valid_batch_bins: null
79
+ train_shape_file:
80
+ - exp/tts_stats_raw_char_None/train/text_shape.char
81
+ - exp/tts_stats_raw_char_None/train/speech_shape
82
+ valid_shape_file:
83
+ - exp/tts_stats_raw_char_None/valid/text_shape.char
84
+ - exp/tts_stats_raw_char_None/valid/speech_shape
85
+ batch_type: numel
86
+ valid_batch_type: null
87
+ fold_length:
88
+ - 150
89
+ - 819200
90
+ sort_in_batch: descending
91
+ shuffle_within_batch: false
92
+ sort_batch: descending
93
+ multiple_iterator: false
94
+ chunk_length: 500
95
+ chunk_shift_ratio: 0.5
96
+ num_cache_chunks: 1024
97
+ chunk_excluded_key_prefixes: []
98
+ chunk_default_fs: null
99
+ chunk_max_abs_length: null
100
+ chunk_discard_short_samples: true
101
+ train_data_path_and_name_and_type:
102
+ - - dump/raw/tr_no_dev/text
103
+ - text
104
+ - text
105
+ - - duration_info/tr_no_dev/durations
106
+ - durations
107
+ - text_int
108
+ - - dump/raw/tr_no_dev/wav.scp
109
+ - speech
110
+ - sound
111
+ - - exp/tts_stats_raw_char_None/train/collect_feats/pitch.scp
112
+ - pitch
113
+ - npy
114
+ - - exp/tts_stats_raw_char_None/train/collect_feats/energy.scp
115
+ - energy
116
+ - npy
117
+ valid_data_path_and_name_and_type:
118
+ - - dump/raw/dev/text
119
+ - text
120
+ - text
121
+ - - duration_info/dev/durations
122
+ - durations
123
+ - text_int
124
+ - - dump/raw/dev/wav.scp
125
+ - speech
126
+ - sound
127
+ - - exp/tts_stats_raw_char_None/valid/collect_feats/pitch.scp
128
+ - pitch
129
+ - npy
130
+ - - exp/tts_stats_raw_char_None/valid/collect_feats/energy.scp
131
+ - energy
132
+ - npy
133
+ allow_variable_data_keys: false
134
+ max_cache_size: 0.0
135
+ max_cache_fd: 32
136
+ allow_multi_rates: false
137
+ valid_max_cache_size: null
138
+ exclude_weight_decay: false
139
+ exclude_weight_decay_conf: {}
140
+ optim: adam
141
+ optim_conf:
142
+ lr: 1.0
143
+ scheduler: noamlr
144
+ scheduler_conf:
145
+ model_size: 384
146
+ warmup_steps: 4000
147
+ token_list:
148
+ - <blank>
149
+ - <unk>
150
+ - <space>
151
+ - $
152
+ - ','
153
+ - .
154
+ - M
155
+ - q
156
+ - H
157
+ - k
158
+ - ख
159
+ - g
160
+ - घ
161
+ - ङ
162
+ - c
163
+ - C
164
+ - j
165
+ - J
166
+ - ञ
167
+ - ट
168
+ - ठ
169
+ - ड
170
+ - ढ
171
+ - ण
172
+ - t
173
+ - थ
174
+ - d
175
+ - ध
176
+ - n
177
+ - ऩ
178
+ - p
179
+ - P
180
+ - b
181
+ - B
182
+ - m
183
+ - y
184
+ - r
185
+ - ऱ
186
+ - l
187
+ - ള
188
+ - Z
189
+ - w
190
+ - श
191
+ - ष
192
+ - s
193
+ - h
194
+ - Y
195
+ - ऽ
196
+ - क
197
+ - K
198
+ - G
199
+ - z
200
+ - D
201
+ - T
202
+ - f
203
+ - ॠ
204
+ - ൺ
205
+ - N
206
+ - ർ
207
+ - ൽ
208
+ - ൾ
209
+ - a
210
+ - A
211
+ - i
212
+ - I
213
+ - u
214
+ - U
215
+ - R
216
+ - ऍ
217
+ - e
218
+ - E
219
+ - ऐ
220
+ - ऑ
221
+ - o
222
+ - O
223
+ - औ
224
+ - உ
225
+ - <sos/eos>
226
+ odim: null
227
+ model_conf: {}
228
+ use_preprocessor: true
229
+ token_type: char
230
+ bpemodel: null
231
+ non_linguistic_symbols: null
232
+ cleaner: null
233
+ g2p: null
234
+ feats_extract: fbank
235
+ feats_extract_conf:
236
+ n_fft: 8192
237
+ hop_length: 1024
238
+ win_length: 4096
239
+ fs: 48000
240
+ fmin: 0
241
+ fmax: 24000
242
+ n_mels: 160
243
+ normalize: global_mvn
244
+ normalize_conf:
245
+ stats_file: exp/tts_stats_raw_char_None/train/feats_stats.npz
246
+ tts: fastspeech2
247
+ tts_conf:
248
+ adim: 384
249
+ aheads: 2
250
+ elayers: 4
251
+ eunits: 1536
252
+ dlayers: 4
253
+ dunits: 1536
254
+ positionwise_layer_type: conv1d
255
+ positionwise_conv_kernel_size: 3
256
+ duration_predictor_layers: 2
257
+ duration_predictor_chans: 256
258
+ duration_predictor_kernel_size: 3
259
+ postnet_layers: 5
260
+ postnet_filts: 5
261
+ postnet_chans: 256
262
+ use_masking: true
263
+ use_scaled_pos_enc: true
264
+ encoder_normalize_before: true
265
+ decoder_normalize_before: true
266
+ reduction_factor: 1
267
+ init_type: xavier_uniform
268
+ init_enc_alpha: 1.0
269
+ init_dec_alpha: 1.0
270
+ transformer_enc_dropout_rate: 0.2
271
+ transformer_enc_positional_dropout_rate: 0.2
272
+ transformer_enc_attn_dropout_rate: 0.2
273
+ transformer_dec_dropout_rate: 0.2
274
+ transformer_dec_positional_dropout_rate: 0.2
275
+ transformer_dec_attn_dropout_rate: 0.2
276
+ pitch_predictor_layers: 5
277
+ pitch_predictor_chans: 256
278
+ pitch_predictor_kernel_size: 5
279
+ pitch_predictor_dropout: 0.5
280
+ pitch_embed_kernel_size: 1
281
+ pitch_embed_dropout: 0.0
282
+ stop_gradient_from_pitch_predictor: true
283
+ energy_predictor_layers: 2
284
+ energy_predictor_chans: 256
285
+ energy_predictor_kernel_size: 3
286
+ energy_predictor_dropout: 0.5
287
+ energy_embed_kernel_size: 1
288
+ energy_embed_dropout: 0.0
289
+ stop_gradient_from_energy_predictor: false
290
+ pitch_extract: dio
291
+ pitch_extract_conf:
292
+ fs: 48000
293
+ n_fft: 8192
294
+ hop_length: 1024
295
+ f0max: 400
296
+ f0min: 80
297
+ reduction_factor: 1
298
+ pitch_normalize: global_mvn
299
+ pitch_normalize_conf:
300
+ stats_file: exp/tts_stats_raw_char_None/train/pitch_stats.npz
301
+ energy_extract: energy
302
+ energy_extract_conf:
303
+ fs: 48000
304
+ n_fft: 8192
305
+ hop_length: 1024
306
+ win_length: 4096
307
+ reduction_factor: 1
308
+ energy_normalize: global_mvn
309
+ energy_normalize_conf:
310
+ stats_file: exp/tts_stats_raw_char_None/train/energy_stats.npz
311
+ required:
312
+ - output_dir
313
+ - token_list
314
+ version: '202402'
315
+ distributed: true
kannada_latest/female/model/energy_stats.npz ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a0f5acc068b51840643988321d35f3ec8b3b198024ab2b5458190affcd93dea2
3
+ size 770
kannada_latest/female/model/feats_stats.npz ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9b5fe953f9fe6aeba62ea45bfa7c7368a4075c226ad001adadce6a2776601cf9
3
+ size 2042
kannada_latest/female/model/model.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:00523b019e55d3219b32c21a44c33bb159db7b83cd4c2bea6c652b89cfe1b70b
3
+ size 152128410
kannada_latest/female/model/pitch_stats.npz ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:daf48e83866a710ffffd7d327e4dcd1c22d7b6c96d416d178198ec5b3cc27247
3
+ size 770
kannada_latest/female/model/text ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ shuffled_train_kannada_female_mono_06563 $koटटee Aqटi ennutta gOpi tanna शarटina jEbigee kEऐ hAki patrawannu teegeedukoटटanu.
2
+ shuffled_train_kannada_female_mono_06564 $Aga haളatu aqtaryadalli uളidu hosatu pallawisi beeളeedu haളeeyadakkee hoളappittaqtee Aguttadee.
3
+ shuffled_train_kannada_female_mono_06565 $naqjamma EkO eraडu dina iruwAgalE Agi biटटee akka eqdu hEളuttAളee.
4
+ shuffled_train_kannada_female_mono_06566 $jana samUhadoqdigee saqwahana naडeesalu sarkArawu,tarahada mAधyamagaളa hAgU suddiwAhinigaളa sahAyawannu teegeedukoളളuttidee.
5
+ shuffled_train_kannada_female_mono_06567 $paqju kannaडa jAlatAणada oqdu uttama blAg myAgajIn.
6
+ shuffled_train_kannada_female_mono_06568 $aqथaha anisikee irOwara jatee mIniqg Pul डayalAg baralu sAधyawilla.
7
+ shuffled_train_kannada_female_mono_06569 $Ita illiqda aqdree eraडu sAwira beeटटiqg kaटटida.
8
+ shuffled_train_kannada_female_mono_06570 $dAളiya naडuwee karnAटakada hAwEriya wEऐdya widyArथi nawIn saha duraqta sAwigee IडAgiddAnee.
9
+ shuffled_train_kannada_female_mono_06571 $adAda ऐdu warषagaളa naqtara Ata idE taqडakkAgi AडuttiddAnee,eqdu sacin hEളiddAree.
10
+ shuffled_train_kannada_female_mono_06572 $idu,awaru kAwyawannu kaटटida rIti mattu kaleeyannu biqbisuttadee.
11
+ shuffled_train_kannada_female_mono_06573 $ninagee,dinagaളu neenapideeyO illawO nAkAणee.
12
+ shuffled_train_kannada_female_mono_06574 $koneegee nAnu nannoളagina yAwa nAyigee biskattu hAkiddee ennuwudakkee,iqdu nIwu nIडuttiruwa,AtmIya bIളkoडugee aqdee.
13
+ shuffled_train_kannada_female_mono_06575 $bOधisatwanu dAridryada kaषटakkee silukidanu.
14
+ shuffled_train_kannada_female_mono_06576 $samAna aधikArada haqcikee AgabEku eqdaru.
15
+ shuffled_train_kannada_female_mono_06577 $nAnawaralla,nAnawaralla,nAnawaralla eqda nanagee kannaडada baggee शuBaशrI yaषटu AളawAda arimee illa.
16
+ shuffled_train_kannada_female_mono_06578 $saqgItada mahatwawannu pratiyobbaru ariyuwudu awaशyakawAgidee.
17
+ shuffled_train_kannada_female_mono_06579 $rAyacUru jilleeya iqडi tAlUkina iqcagEri niwAsi nAgappa haqcinAളa uttama rEऐta.
18
+ shuffled_train_kannada_female_mono_06580 $oqdu prastAwaneeya naqtara illina keelawu lEखanagaളa baggee paricaya mAडikoളളOणa.
19
+ shuffled_train_kannada_female_mono_06581 $adE,beeqkiyiqda baruwa hogee.
20
+ shuffled_train_kannada_female_mono_06582 $शukrawAra hAgU शaniwAra beeളagina jAwa maളee baqddittu.
21
+ shuffled_train_kannada_female_mono_06583 $keelawu AटagAraru tammadE wiशiषटa CApu mUडisuttAree.
22
+ shuffled_train_kannada_female_mono_06584 $namma manadallU mOdiyawaru,kAydeeyannu wApAsu paडeediddara baggee swalpa asamAधAna iddittu.
23
+ shuffled_train_kannada_female_mono_06585 $nimma taleegee nimma kEऐ,yOcisi nOडi.
24
+ shuffled_train_kannada_female_mono_06586 $rAja sEwakarannu kareedu beeqki धagaधaga uriyuwa oleeya mElee maडakeeyanniटटu adaroളagee,bAtukOളiyannu hAki bEyisi eqdu AjञApisida.
25
+ shuffled_train_kannada_female_mono_06587 $A wiषaya bAbA sAhEb aqbEडkar awarigee spaषटawAgi tiളidittu.
26
+ shuffled_train_kannada_female_mono_06588 $nammannu baleegee bIളisalu aषटE sAkittu.
27
+ shuffled_train_kannada_female_mono_06589 $huccumanasu kaqडU kAणada manada cittadalli suptawAgi kadaडi niqtidee,yAru kEളadaqtaha kalpanee rUpa nIडalAgadee awitidee.
28
+ shuffled_train_kannada_female_mono_06590 $nAnu,jAga toreeyuwa saqdarBa baqdittu.
29
+ shuffled_train_kannada_female_mono_06591 $adarallU heeqgasaru,makkaളu allalli jAga huडuki kUruttiddaru.
30
+ shuffled_train_kannada_female_mono_06592 $uषHkAlada kuളirgAളi,dEwara kIrtaneegaളE modalAduwugaളu teelugu neeladalli saqkrAqtigee swAgata kOruttadee.
31
+ shuffled_train_kannada_female_mono_06593 $keelawaru tAwu allalli aडikee sulidu,gErubIja heekki,saqpAdisidda haणadiqda eraडu kOणeegaളa maneeyannu खarIdisidaru.
32
+ shuffled_train_kannada_female_mono_06594 $nAnu suखada aramaneeyalli hEgee bEkAdarU malaguwee eqdeenisiddee.
33
+ shuffled_train_kannada_female_mono_06595 $eraडanE sala hAkikoളളalu hOdAga tiqडi खAliyAgittu.
34
+ shuffled_train_kannada_female_mono_06596 $awaru iqdina hAgee yArO mAडida औषaधigee paटटi bareedu koडuwa डAkटar alla.
35
+ shuffled_train_kannada_female_mono_06597 $namma maneeyalloqdu puटटa pApa iruwudu eqba padya IgalU neenapidee.
36
+ shuffled_train_kannada_female_mono_06598 $widyArथigaളu iqdu शikषaणa sparधeeyalli geellabahudu.
37
+ shuffled_train_kannada_female_mono_06599 $mudreeyannu eraडU kEऐyalliyU aByAsa mAडabEku.
38
+ shuffled_train_kannada_female_mono_06600 $hAडAgadee rAधeeyilla,hAडilladee kRषणanilla ennuttAree mattobba kawiwaryaru.
39
+ shuffled_train_kannada_female_mono_06601 $beeളeeduniqta huडuganigee Iga tAyi bEkilla.
40
+ shuffled_train_kannada_female_mono_06602 $I धyEyagaളa sAधaneeya joteejoteegE kannaडa BAषeeyU saha शaraणara,mAdariya wacanagaളiqdAgi hosa शakeegee sAmAjika badalAwaणeegee sAmAjika krAqtigee nAqdi hAडiwee.
41
+ shuffled_train_kannada_female_mono_06603 $mArgadalli wiशrAqti paडeeyuwAga tamagaषटu satkAra wannu mAडida rAjanigee innU saqtAnawilla weeqba saqgatiyannu शiषyaru hEളidAga mahArakषitanu rEणuka rAjanigee dEऐwAqशada kumAranobbanu huटटuttAnee.
42
+ shuffled_train_kannada_female_mono_06604 $makkaളigee bAളeehaणणiqdaree Asee tAnE eqdu awanu hattirawidda maradiqda oqdu gonee bAളeeya haणणannu kittu tanna bidiru bOninalli hAkikoqडanu.
43
+ shuffled_train_kannada_female_mono_06605 $janArdan Baट awaru muqcUणiyalli nilluwa nipuणa lEखakaru.
44
+ shuffled_train_kannada_female_mono_06606 $prati diwasada aडugee sAmAgriya weecca mattu itara weeccagaളu heeccuttalE hOguttiwee.
45
+ shuffled_train_kannada_female_mono_06607 $alladee,tanagiruwa pratiBeeya joteegee sAmAjika kaളakaളi,badधateeyannu meereediddAree.
46
+ shuffled_train_kannada_female_mono_06608 $riyal टEऐm suddigaളannu kannaडa BAषeeyalli odagisuwa modala weeb sEऐट prAraqBawAgidee.
47
+ shuffled_train_kannada_female_mono_06609 $nimma biडuwina samayawannu nimma pariwArada jotee kaളeeyiri.
48
+ shuffled_train_kannada_female_mono_06610 $धarmada nijawAda arथa tiളisida nimagee kOटi kOटi namanagaളu.
49
+ shuffled_train_kannada_female_mono_06611 $yAwa saqdarBadalli adannu baളasidaru eqbudu tiളiyabEkidee.
50
+ shuffled_train_kannada_female_mono_06612 $obbaniqda,basawaणणa obba rAjakAraणi.
51
+ shuffled_train_kannada_female_mono_06613 $idara beelee barObbari hattu lakषa rUpAyi eqdu hEളalAgidee.
52
+ shuffled_train_kannada_female_mono_06614 $kallaqgaडi bIjagaളannu saqpUrणawAgi tyajisabEku aथawA keelawu janaru mitawAgi baളasabEku.
53
+ shuffled_train_kannada_female_mono_06615 $awaളu mattoqdu janma hottu namagE mommagaളAgi barali aqta,dEwaralli dina prArथisuwee.
54
+ shuffled_train_kannada_female_mono_06616 $iqdu duडiyuwa janarigee Alasya dUrawAgisuwa agatyatee idee.
55
+ shuffled_train_kannada_female_mono_06617 $namma nilumee baളagakkee sEralu,nIwu gUgal guqpigee BEटi nIडi,nimma heesarannu noqdAyisi.
56
+ shuffled_train_kannada_female_mono_06618 $Adaree namma sAqskRtika hinneeleeyannu kApAडuwAga idu aniwArya hAgU awaशyaka.
57
+ shuffled_train_kannada_female_mono_06619 $budधiwaqta nAyi tanna prAणawannU leekkisadee prAणigaളa rakषaणeegAgi niqtiddariqda rAja Anee nAyiyannu kAडinoളagee sErisikoqडu tanna daqडanAyakanannAgisitu.
58
+ shuffled_train_kannada_female_mono_06620 $aqtaha dEऐwa ciqtaneeyuളളa Rषi aqgadanu mAtRmUrtiyE dEऐwaweeqdu bOधisi maneegee hiqdirugi hOguwudaralli yAwa wiधawAda धArmika ciqtaneeyO tArkika satwawO kaqडubaruttillawallawE awana,asAqdarBika prawartaneegee anArOgyawU wayOधikyawU kAraणagaളAgirabEkallawE,saqdEhagaളigee uttaragaളu tiളididdarU hEളadE hOdeeyAdarE ninna taleeyu oडeedu hOളAguttadee.
59
+ shuffled_train_kannada_female_mono_06621 $Ayapal,BAratadalli mArAटa mAडuwa doडडa utpannawAgidee.
60
+ shuffled_train_kannada_female_mono_06622 $oqdu haणणigee oqdu kuduree,oqdu mUटee cinna siguwudAdaree tanna maneeyannu adariqdalE tuqbisabahudu eqdu leekka hAkida awanu caqdacaqdada sEbu haणणugaളannu koyyisi gAडi tuqba hErikoqडu arasana sanniधigee hOgi nAnu baडa rEऐta.
61
+ shuffled_train_kannada_female_mono_06623 $idu yArannU टIkee mAडalu bareeda lEखanawalla,kEwala nanna praशneegee uttara huडukuwa haqbala.
62
+ shuffled_train_kannada_female_mono_06624 $ellA arab dEशagaളU bahu शrImaqtawAgiwee.
63
+ shuffled_train_kannada_female_mono_06625 $alliqda horaटu nanna rUm gee baqdee.
64
+ shuffled_train_kannada_female_mono_06626 $kAraqtara शikषaणa grahikee mattu uddEशa awara sRjanAtmaka neeleegee sAkषiyAgiwee.
65
+ shuffled_train_kannada_female_mono_06627 $Adaree awaribbarU aqgawikalarAgiddaru.
66
+ shuffled_train_kannada_female_mono_06628 $girIश sAr,nimma mAtigee nanna anisikee iqtiwee.
67
+ shuffled_train_kannada_female_mono_06629 $AdarU kUडA hiqdina kAlada pawitra yAtreegaളU innU cAltiyalliwee.
68
+ shuffled_train_kannada_female_mono_06630 $bEwu beellada saqpradAya yAwAga शuru AytO gottillA,Adra BAളa arथapUrणa padधati ada.
kannada_latest/male/model/config.yaml ADDED
@@ -0,0 +1,319 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ accum_grad: 8
2
+ adapter: lora
3
+ adapter_conf: {}
4
+ allow_multi_rates: false
5
+ allow_variable_data_keys: false
6
+ batch_bins: 3000000
7
+ batch_size: 20
8
+ batch_type: numel
9
+ best_model_criterion:
10
+ - - valid
11
+ - loss
12
+ - min
13
+ - - train
14
+ - loss
15
+ - min
16
+ bpemodel: null
17
+ chunk_default_fs: null
18
+ chunk_discard_short_samples: true
19
+ chunk_excluded_key_prefixes: []
20
+ chunk_length: 500
21
+ chunk_max_abs_length: null
22
+ chunk_shift_ratio: 0.5
23
+ cleaner: null
24
+ collect_stats: false
25
+ config: conf/tuning/train_fastspeech2.yaml
26
+ create_graph_in_tensorboard: false
27
+ cudnn_benchmark: false
28
+ cudnn_deterministic: true
29
+ cudnn_enabled: true
30
+ deepspeed_config: null
31
+ detect_anomaly: false
32
+ dist_backend: nccl
33
+ dist_init_method: env://
34
+ dist_launcher: null
35
+ dist_master_addr: localhost
36
+ dist_master_port: 49973
37
+ dist_rank: 0
38
+ dist_world_size: 2
39
+ distributed: true
40
+ drop_last_iter: false
41
+ dry_run: false
42
+ early_stopping_criterion:
43
+ - valid
44
+ - loss
45
+ - min
46
+ energy_extract: energy
47
+ energy_extract_conf:
48
+ fs: 48000
49
+ hop_length: 1024
50
+ n_fft: 8192
51
+ reduction_factor: 1
52
+ win_length: 4096
53
+ energy_normalize: global_mvn
54
+ energy_normalize_conf:
55
+ stats_file: /home/speech/Fastspeech2_latest models/Fastspeech2_HS/kannada_latest/male/model/energy_stats.npz
56
+ exclude_weight_decay: false
57
+ exclude_weight_decay_conf: {}
58
+ feats_extract: fbank
59
+ feats_extract_conf:
60
+ fmax: 24000
61
+ fmin: 0
62
+ fs: 48000
63
+ hop_length: 1024
64
+ n_fft: 8192
65
+ n_mels: 160
66
+ win_length: 4096
67
+ fold_length:
68
+ - 150
69
+ - 819200
70
+ freeze_param: []
71
+ g2p: null
72
+ grad_clip: 1.0
73
+ grad_clip_type: 2.0
74
+ grad_noise: false
75
+ ignore_init_mismatch: false
76
+ init_param: []
77
+ iterator_type: sequence
78
+ keep_nbest_models: 5
79
+ local_rank: 0
80
+ log_interval: null
81
+ log_level: INFO
82
+ max_cache_fd: 32
83
+ max_cache_size: 0.0
84
+ max_epoch: 1000
85
+ model_conf: {}
86
+ multi_task_dataset: false
87
+ multiple_iterator: false
88
+ multiprocessing_distributed: true
89
+ nbest_averaging_interval: 0
90
+ ngpu: 1
91
+ no_forward_run: false
92
+ non_linguistic_symbols: null
93
+ normalize: global_mvn
94
+ normalize_conf:
95
+ stats_file: /home/speech/Fastspeech2_latest models/Fastspeech2_HS/kannada_latest/male/model/feats_stats.npz
96
+ num_att_plot: 3
97
+ num_cache_chunks: 1024
98
+ num_iters_per_epoch: 800
99
+ num_workers: 1
100
+ odim: null
101
+ optim: adam
102
+ optim_conf:
103
+ lr: 1.0
104
+ output_dir: exp/tts_train_fastspeech2_raw_char_None
105
+ patience: null
106
+ pitch_extract: dio
107
+ pitch_extract_conf:
108
+ f0max: 350
109
+ f0min: 40
110
+ fs: 48000
111
+ hop_length: 1024
112
+ n_fft: 8192
113
+ reduction_factor: 1
114
+ pitch_normalize: global_mvn
115
+ pitch_normalize_conf:
116
+ stats_file: /home/speech/Fastspeech2_latest models/Fastspeech2_HS/kannada_latest/male/model/pitch_stats.npz
117
+ pretrain_path: null
118
+ print_config: false
119
+ required:
120
+ - output_dir
121
+ - token_list
122
+ resume: true
123
+ save_strategy: all
124
+ scheduler: noamlr
125
+ scheduler_conf:
126
+ model_size: 384
127
+ warmup_steps: 4000
128
+ seed: 0
129
+ sharded_ddp: false
130
+ shuffle_within_batch: false
131
+ sort_batch: descending
132
+ sort_in_batch: descending
133
+ token_list:
134
+ - <blank>
135
+ - <unk>
136
+ - <space>
137
+ - $
138
+ - ','
139
+ - .
140
+ - M
141
+ - q
142
+ - H
143
+ - k
144
+ - "\u0916"
145
+ - g
146
+ - "\u0918"
147
+ - "\u0919"
148
+ - c
149
+ - C
150
+ - j
151
+ - J
152
+ - "\u091E"
153
+ - "\u091F"
154
+ - "\u0920"
155
+ - "\u0921"
156
+ - "\u0922"
157
+ - "\u0923"
158
+ - t
159
+ - "\u0925"
160
+ - d
161
+ - "\u0927"
162
+ - n
163
+ - "\u0929"
164
+ - p
165
+ - P
166
+ - b
167
+ - B
168
+ - m
169
+ - y
170
+ - r
171
+ - "\u0931"
172
+ - l
173
+ - "\u0D33"
174
+ - Z
175
+ - w
176
+ - "\u0936"
177
+ - "\u0937"
178
+ - s
179
+ - h
180
+ - Y
181
+ - "\u093D"
182
+ - "\u0915"
183
+ - K
184
+ - G
185
+ - z
186
+ - D
187
+ - T
188
+ - f
189
+ - "\u0960"
190
+ - "\u0D7A"
191
+ - N
192
+ - "\u0D7C"
193
+ - "\u0D7D"
194
+ - "\u0D7E"
195
+ - a
196
+ - A
197
+ - i
198
+ - I
199
+ - u
200
+ - U
201
+ - R
202
+ - "\u090D"
203
+ - e
204
+ - E
205
+ - "\u0910"
206
+ - "\u0911"
207
+ - o
208
+ - O
209
+ - "\u0914"
210
+ - "\u0B89"
211
+ - <sos/eos>
212
+ token_type: char
213
+ train_data_path_and_name_and_type:
214
+ - - dump/raw/tr_no_dev/text
215
+ - text
216
+ - text
217
+ - - duration_info/tr_no_dev/durations
218
+ - durations
219
+ - text_int
220
+ - - dump/raw/tr_no_dev/wav.scp
221
+ - speech
222
+ - sound
223
+ - - exp/tts_stats_raw_char_None/train/collect_feats/pitch.scp
224
+ - pitch
225
+ - npy
226
+ - - exp/tts_stats_raw_char_None/train/collect_feats/energy.scp
227
+ - energy
228
+ - npy
229
+ train_dtype: float32
230
+ train_shape_file:
231
+ - exp/tts_stats_raw_char_None/train/text_shape.char
232
+ - exp/tts_stats_raw_char_None/train/speech_shape
233
+ tts: fastspeech2
234
+ tts_conf:
235
+ adim: 384
236
+ aheads: 2
237
+ decoder_normalize_before: true
238
+ dlayers: 4
239
+ dunits: 1536
240
+ duration_predictor_chans: 256
241
+ duration_predictor_kernel_size: 3
242
+ duration_predictor_layers: 2
243
+ elayers: 4
244
+ encoder_normalize_before: true
245
+ energy_embed_dropout: 0.0
246
+ energy_embed_kernel_size: 1
247
+ energy_predictor_chans: 256
248
+ energy_predictor_dropout: 0.5
249
+ energy_predictor_kernel_size: 3
250
+ energy_predictor_layers: 2
251
+ eunits: 1536
252
+ init_dec_alpha: 1.0
253
+ init_enc_alpha: 1.0
254
+ init_type: xavier_uniform
255
+ pitch_embed_dropout: 0.0
256
+ pitch_embed_kernel_size: 1
257
+ pitch_predictor_chans: 256
258
+ pitch_predictor_dropout: 0.5
259
+ pitch_predictor_kernel_size: 5
260
+ pitch_predictor_layers: 5
261
+ positionwise_conv_kernel_size: 3
262
+ positionwise_layer_type: conv1d
263
+ postnet_chans: 256
264
+ postnet_filts: 5
265
+ postnet_layers: 5
266
+ reduction_factor: 1
267
+ stop_gradient_from_energy_predictor: false
268
+ stop_gradient_from_pitch_predictor: true
269
+ transformer_dec_attn_dropout_rate: 0.2
270
+ transformer_dec_dropout_rate: 0.2
271
+ transformer_dec_positional_dropout_rate: 0.2
272
+ transformer_enc_attn_dropout_rate: 0.2
273
+ transformer_enc_dropout_rate: 0.2
274
+ transformer_enc_positional_dropout_rate: 0.2
275
+ use_masking: true
276
+ use_scaled_pos_enc: true
277
+ unused_parameters: false
278
+ use_adapter: false
279
+ use_amp: false
280
+ use_deepspeed: false
281
+ use_matplotlib: true
282
+ use_preprocessor: true
283
+ use_tensorboard: true
284
+ use_tf32: false
285
+ use_wandb: false
286
+ val_scheduler_criterion:
287
+ - valid
288
+ - loss
289
+ valid_batch_bins: null
290
+ valid_batch_size: null
291
+ valid_batch_type: null
292
+ valid_data_path_and_name_and_type:
293
+ - - dump/raw/dev/text
294
+ - text
295
+ - text
296
+ - - duration_info/dev/durations
297
+ - durations
298
+ - text_int
299
+ - - dump/raw/dev/wav.scp
300
+ - speech
301
+ - sound
302
+ - - exp/tts_stats_raw_char_None/valid/collect_feats/pitch.scp
303
+ - pitch
304
+ - npy
305
+ - - exp/tts_stats_raw_char_None/valid/collect_feats/energy.scp
306
+ - energy
307
+ - npy
308
+ valid_iterator_type: null
309
+ valid_max_cache_size: null
310
+ valid_shape_file:
311
+ - exp/tts_stats_raw_char_None/valid/text_shape.char
312
+ - exp/tts_stats_raw_char_None/valid/speech_shape
313
+ version: '202402'
314
+ wandb_entity: null
315
+ wandb_id: null
316
+ wandb_model_log_interval: -1
317
+ wandb_name: null
318
+ wandb_project: null
319
+ write_collected_feats: false
kannada_latest/male/model/energy_stats.npz ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4bc4025a0388054d4a6a704956be479c12e44541bea031b5039232055648b812
3
+ size 770
kannada_latest/male/model/feats_stats.npz ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2c5b8935d2b328800aa31ecb09a188a13e6501b258afa784e3bbbf785187c9d7
3
+ size 2042
kannada_latest/male/model/model.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:19e18bc08fdfbeb6fd385dc338236e22fd3a3fea75b0070392d85318083a72fb
3
+ size 152129434
kannada_latest/male/model/pitch_stats.npz ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:77cac728a1538d11d3fdab048cb5028d2a6b36ac1972397b4698e61a6700a3de
3
+ size 770
kannada_latest/male/model_mono/config.yaml ADDED
@@ -0,0 +1,315 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ accum_grad: 8
2
+ adapter: lora
3
+ adapter_conf: {}
4
+ allow_multi_rates: false
5
+ allow_variable_data_keys: false
6
+ batch_bins: 3000000
7
+ batch_size: 20
8
+ batch_type: numel
9
+ best_model_criterion:
10
+ - - valid
11
+ - loss
12
+ - min
13
+ - - train
14
+ - loss
15
+ - min
16
+ bpemodel: null
17
+ chunk_default_fs: null
18
+ chunk_discard_short_samples: true
19
+ chunk_excluded_key_prefixes: []
20
+ chunk_length: 500
21
+ chunk_max_abs_length: null
22
+ chunk_shift_ratio: 0.5
23
+ cleaner: null
24
+ collect_stats: false
25
+ config: conf/tuning/train_fastspeech2.yaml
26
+ create_graph_in_tensorboard: false
27
+ cudnn_benchmark: false
28
+ cudnn_deterministic: true
29
+ cudnn_enabled: true
30
+ detect_anomaly: false
31
+ dist_backend: nccl
32
+ dist_init_method: env://
33
+ dist_launcher: null
34
+ dist_master_addr: localhost
35
+ dist_master_port: 36155
36
+ dist_rank: 0
37
+ dist_world_size: 2
38
+ distributed: true
39
+ drop_last_iter: false
40
+ dry_run: false
41
+ early_stopping_criterion:
42
+ - valid
43
+ - loss
44
+ - min
45
+ energy_extract: energy
46
+ energy_extract_conf:
47
+ fs: 48000
48
+ hop_length: 1024
49
+ n_fft: 8192
50
+ reduction_factor: 1
51
+ win_length: 4096
52
+ energy_normalize: global_mvn
53
+ energy_normalize_conf:
54
+ stats_file: /home/speech/Fastspeech2_HS/kannada_latest/male/model/energy_stats.npz
55
+ exclude_weight_decay: false
56
+ exclude_weight_decay_conf: {}
57
+ feats_extract: fbank
58
+ feats_extract_conf:
59
+ fmax: 24000
60
+ fmin: 0
61
+ fs: 48000
62
+ hop_length: 1024
63
+ n_fft: 8192
64
+ n_mels: 160
65
+ win_length: 4096
66
+ fold_length:
67
+ - 150
68
+ - 819200
69
+ freeze_param: []
70
+ g2p: null
71
+ grad_clip: 1.0
72
+ grad_clip_type: 2.0
73
+ grad_noise: false
74
+ ignore_init_mismatch: false
75
+ init_param: []
76
+ iterator_type: sequence
77
+ keep_nbest_models: 5
78
+ local_rank: 0
79
+ log_interval: null
80
+ log_level: INFO
81
+ max_cache_fd: 32
82
+ max_cache_size: 0.0
83
+ max_epoch: 1000
84
+ model_conf: {}
85
+ multiple_iterator: false
86
+ multiprocessing_distributed: true
87
+ nbest_averaging_interval: 0
88
+ ngpu: 1
89
+ no_forward_run: false
90
+ non_linguistic_symbols: null
91
+ normalize: global_mvn
92
+ normalize_conf:
93
+ stats_file: /home/speech/Fastspeech2_HS/kannada_latest/male/model/feats_stats.npz
94
+ num_att_plot: 3
95
+ num_cache_chunks: 1024
96
+ num_iters_per_epoch: 800
97
+ num_workers: 1
98
+ odim: null
99
+ optim: adam
100
+ optim_conf:
101
+ lr: 1.0
102
+ output_dir: exp/tts_train_fastspeech2_raw_char_None
103
+ patience: null
104
+ pitch_extract: dio
105
+ pitch_extract_conf:
106
+ f0max: 350
107
+ f0min: 40
108
+ fs: 48000
109
+ hop_length: 1024
110
+ n_fft: 8192
111
+ reduction_factor: 1
112
+ pitch_normalize: global_mvn
113
+ pitch_normalize_conf:
114
+ stats_file: /home/speech/Fastspeech2_HS/kannada_latest/male/model/pitch_stats.npz
115
+ pretrain_path: null
116
+ print_config: false
117
+ required:
118
+ - output_dir
119
+ - token_list
120
+ resume: true
121
+ save_strategy: all
122
+ scheduler: noamlr
123
+ scheduler_conf:
124
+ model_size: 384
125
+ warmup_steps: 4000
126
+ seed: 0
127
+ sharded_ddp: false
128
+ shuffle_within_batch: false
129
+ sort_batch: descending
130
+ sort_in_batch: descending
131
+ token_list:
132
+ - <blank>
133
+ - <unk>
134
+ - <space>
135
+ - $
136
+ - ','
137
+ - .
138
+ - M
139
+ - q
140
+ - H
141
+ - k
142
+ - "\u0916"
143
+ - g
144
+ - "\u0918"
145
+ - "\u0919"
146
+ - c
147
+ - C
148
+ - j
149
+ - J
150
+ - "\u091E"
151
+ - "\u091F"
152
+ - "\u0920"
153
+ - "\u0921"
154
+ - "\u0922"
155
+ - "\u0923"
156
+ - t
157
+ - "\u0925"
158
+ - d
159
+ - "\u0927"
160
+ - n
161
+ - "\u0929"
162
+ - p
163
+ - P
164
+ - b
165
+ - B
166
+ - m
167
+ - y
168
+ - r
169
+ - "\u0931"
170
+ - l
171
+ - "\u0D33"
172
+ - Z
173
+ - w
174
+ - "\u0936"
175
+ - "\u0937"
176
+ - s
177
+ - h
178
+ - Y
179
+ - "\u093D"
180
+ - "\u0915"
181
+ - K
182
+ - G
183
+ - z
184
+ - D
185
+ - T
186
+ - f
187
+ - "\u0960"
188
+ - "\u0D7A"
189
+ - N
190
+ - "\u0D7C"
191
+ - "\u0D7D"
192
+ - "\u0D7E"
193
+ - a
194
+ - A
195
+ - i
196
+ - I
197
+ - u
198
+ - U
199
+ - R
200
+ - "\u090D"
201
+ - e
202
+ - E
203
+ - "\u0910"
204
+ - "\u0911"
205
+ - o
206
+ - O
207
+ - "\u0914"
208
+ - "\u0B89"
209
+ - <sos/eos>
210
+ token_type: char
211
+ train_data_path_and_name_and_type:
212
+ - - dump/raw/tr_no_dev/text
213
+ - text
214
+ - text
215
+ - - duration_info/tr_no_dev/durations
216
+ - durations
217
+ - text_int
218
+ - - dump/raw/tr_no_dev/wav.scp
219
+ - speech
220
+ - sound
221
+ - - exp/tts_stats_raw_char_None/train/collect_feats/pitch.scp
222
+ - pitch
223
+ - npy
224
+ - - exp/tts_stats_raw_char_None/train/collect_feats/energy.scp
225
+ - energy
226
+ - npy
227
+ train_dtype: float32
228
+ train_shape_file:
229
+ - exp/tts_stats_raw_char_None/train/text_shape.char
230
+ - exp/tts_stats_raw_char_None/train/speech_shape
231
+ tts: fastspeech2
232
+ tts_conf:
233
+ adim: 384
234
+ aheads: 2
235
+ decoder_normalize_before: true
236
+ dlayers: 4
237
+ dunits: 1536
238
+ duration_predictor_chans: 256
239
+ duration_predictor_kernel_size: 3
240
+ duration_predictor_layers: 2
241
+ elayers: 4
242
+ encoder_normalize_before: true
243
+ energy_embed_dropout: 0.0
244
+ energy_embed_kernel_size: 1
245
+ energy_predictor_chans: 256
246
+ energy_predictor_dropout: 0.5
247
+ energy_predictor_kernel_size: 3
248
+ energy_predictor_layers: 2
249
+ eunits: 1536
250
+ init_dec_alpha: 1.0
251
+ init_enc_alpha: 1.0
252
+ init_type: xavier_uniform
253
+ pitch_embed_dropout: 0.0
254
+ pitch_embed_kernel_size: 1
255
+ pitch_predictor_chans: 256
256
+ pitch_predictor_dropout: 0.5
257
+ pitch_predictor_kernel_size: 5
258
+ pitch_predictor_layers: 5
259
+ positionwise_conv_kernel_size: 3
260
+ positionwise_layer_type: conv1d
261
+ postnet_chans: 256
262
+ postnet_filts: 5
263
+ postnet_layers: 5
264
+ reduction_factor: 1
265
+ stop_gradient_from_energy_predictor: false
266
+ stop_gradient_from_pitch_predictor: true
267
+ transformer_dec_attn_dropout_rate: 0.2
268
+ transformer_dec_dropout_rate: 0.2
269
+ transformer_dec_positional_dropout_rate: 0.2
270
+ transformer_enc_attn_dropout_rate: 0.2
271
+ transformer_enc_dropout_rate: 0.2
272
+ transformer_enc_positional_dropout_rate: 0.2
273
+ use_masking: true
274
+ use_scaled_pos_enc: true
275
+ unused_parameters: false
276
+ use_adapter: false
277
+ use_amp: false
278
+ use_matplotlib: true
279
+ use_preprocessor: true
280
+ use_tensorboard: true
281
+ use_wandb: false
282
+ val_scheduler_criterion:
283
+ - valid
284
+ - loss
285
+ valid_batch_bins: null
286
+ valid_batch_size: null
287
+ valid_batch_type: null
288
+ valid_data_path_and_name_and_type:
289
+ - - dump/raw/dev/text
290
+ - text
291
+ - text
292
+ - - duration_info/dev/durations
293
+ - durations
294
+ - text_int
295
+ - - dump/raw/dev/wav.scp
296
+ - speech
297
+ - sound
298
+ - - exp/tts_stats_raw_char_None/valid/collect_feats/pitch.scp
299
+ - pitch
300
+ - npy
301
+ - - exp/tts_stats_raw_char_None/valid/collect_feats/energy.scp
302
+ - energy
303
+ - npy
304
+ valid_iterator_type: null
305
+ valid_max_cache_size: null
306
+ valid_shape_file:
307
+ - exp/tts_stats_raw_char_None/valid/text_shape.char
308
+ - exp/tts_stats_raw_char_None/valid/speech_shape
309
+ version: '202402'
310
+ wandb_entity: null
311
+ wandb_id: null
312
+ wandb_model_log_interval: -1
313
+ wandb_name: null
314
+ wandb_project: null
315
+ write_collected_feats: false
kannada_latest/male/model_mono/energy_stats.npz ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dd7a1f174f38835c20d1564f8b5eb65eccbc1fa5462959f6b82ffb361e03b164
3
+ size 770
kannada_latest/male/model_mono/feats_stats.npz ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4d4463e3d8c150c38a6c819fe18e77c02e061f0a6a9f97f9a51e67c8e2c0c778
3
+ size 2042
kannada_latest/male/model_mono/model.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:05b6c7243f896871163441905a480ff5f603685d0150702049cee1b3e5a44eb6
3
+ size 152128410
kannada_latest/male/model_mono/pitch_stats.npz ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:02c6075969aabd53c49bc60917e50dfccdd74151cb1c78a534cbad08cd83cf3d
3
+ size 770
konkani_latest/female/model/config.yaml ADDED
@@ -0,0 +1,315 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ accum_grad: 8
2
+ adapter: lora
3
+ adapter_conf: {}
4
+ allow_multi_rates: false
5
+ allow_variable_data_keys: false
6
+ batch_bins: 3000000
7
+ batch_size: 20
8
+ batch_type: numel
9
+ best_model_criterion:
10
+ - - valid
11
+ - loss
12
+ - min
13
+ - - train
14
+ - loss
15
+ - min
16
+ bpemodel: null
17
+ chunk_default_fs: null
18
+ chunk_discard_short_samples: true
19
+ chunk_excluded_key_prefixes: []
20
+ chunk_length: 500
21
+ chunk_max_abs_length: null
22
+ chunk_shift_ratio: 0.5
23
+ cleaner: null
24
+ collect_stats: false
25
+ config: conf/tuning/train_fastspeech2.yaml
26
+ create_graph_in_tensorboard: false
27
+ cudnn_benchmark: false
28
+ cudnn_deterministic: true
29
+ cudnn_enabled: true
30
+ detect_anomaly: false
31
+ dist_backend: nccl
32
+ dist_init_method: env://
33
+ dist_launcher: null
34
+ dist_master_addr: localhost
35
+ dist_master_port: 38977
36
+ dist_rank: 0
37
+ dist_world_size: 2
38
+ distributed: true
39
+ drop_last_iter: false
40
+ dry_run: false
41
+ early_stopping_criterion:
42
+ - valid
43
+ - loss
44
+ - min
45
+ energy_extract: energy
46
+ energy_extract_conf:
47
+ fs: 48000
48
+ hop_length: 1024
49
+ n_fft: 8192
50
+ reduction_factor: 1
51
+ win_length: 4096
52
+ energy_normalize: global_mvn
53
+ energy_normalize_conf:
54
+ stats_file: /home/speech/Fastspeech2_HS/konkani_latest/female/model/energy_stats.npz
55
+ exclude_weight_decay: false
56
+ exclude_weight_decay_conf: {}
57
+ feats_extract: fbank
58
+ feats_extract_conf:
59
+ fmax: 24000
60
+ fmin: 0
61
+ fs: 48000
62
+ hop_length: 1024
63
+ n_fft: 8192
64
+ n_mels: 160
65
+ win_length: 4096
66
+ fold_length:
67
+ - 150
68
+ - 819200
69
+ freeze_param: []
70
+ g2p: null
71
+ grad_clip: 1.0
72
+ grad_clip_type: 2.0
73
+ grad_noise: false
74
+ ignore_init_mismatch: false
75
+ init_param: []
76
+ iterator_type: sequence
77
+ keep_nbest_models: 5
78
+ local_rank: 0
79
+ log_interval: null
80
+ log_level: INFO
81
+ max_cache_fd: 32
82
+ max_cache_size: 0.0
83
+ max_epoch: 1000
84
+ model_conf: {}
85
+ multiple_iterator: false
86
+ multiprocessing_distributed: true
87
+ nbest_averaging_interval: 0
88
+ ngpu: 1
89
+ no_forward_run: false
90
+ non_linguistic_symbols: null
91
+ normalize: global_mvn
92
+ normalize_conf:
93
+ stats_file: /home/speech/Fastspeech2_HS/konkani_latest/female/model/feats_stats.npz
94
+ num_att_plot: 3
95
+ num_cache_chunks: 1024
96
+ num_iters_per_epoch: 800
97
+ num_workers: 1
98
+ odim: null
99
+ optim: adam
100
+ optim_conf:
101
+ lr: 1.0
102
+ output_dir: exp/tts_train_fastspeech2_raw_char_None
103
+ patience: null
104
+ pitch_extract: dio
105
+ pitch_extract_conf:
106
+ f0max: 400
107
+ f0min: 80
108
+ fs: 48000
109
+ hop_length: 1024
110
+ n_fft: 8192
111
+ reduction_factor: 1
112
+ pitch_normalize: global_mvn
113
+ pitch_normalize_conf:
114
+ stats_file: /home/speech/Fastspeech2_HS/konkani_latest/female/model/pitch_stats.npz
115
+ pretrain_path: null
116
+ print_config: false
117
+ required:
118
+ - output_dir
119
+ - token_list
120
+ resume: true
121
+ save_strategy: all
122
+ scheduler: noamlr
123
+ scheduler_conf:
124
+ model_size: 384
125
+ warmup_steps: 4000
126
+ seed: 0
127
+ sharded_ddp: false
128
+ shuffle_within_batch: false
129
+ sort_batch: descending
130
+ sort_in_batch: descending
131
+ token_list:
132
+ - <blank>
133
+ - <unk>
134
+ - <space>
135
+ - $
136
+ - ','
137
+ - .
138
+ - M
139
+ - q
140
+ - H
141
+ - k
142
+ - "\u0916"
143
+ - g
144
+ - "\u0918"
145
+ - "\u0919"
146
+ - c
147
+ - C
148
+ - j
149
+ - J
150
+ - "\u091E"
151
+ - "\u091F"
152
+ - "\u0920"
153
+ - "\u0921"
154
+ - "\u0922"
155
+ - "\u0923"
156
+ - t
157
+ - "\u0925"
158
+ - d
159
+ - "\u0927"
160
+ - n
161
+ - "\u0929"
162
+ - p
163
+ - P
164
+ - b
165
+ - B
166
+ - m
167
+ - y
168
+ - r
169
+ - "\u0931"
170
+ - l
171
+ - "\u0D33"
172
+ - Z
173
+ - w
174
+ - "\u0936"
175
+ - "\u0937"
176
+ - s
177
+ - h
178
+ - Y
179
+ - "\u093D"
180
+ - "\u0915"
181
+ - K
182
+ - G
183
+ - z
184
+ - D
185
+ - T
186
+ - f
187
+ - "\u0960"
188
+ - "\u0D7A"
189
+ - N
190
+ - "\u0D7C"
191
+ - "\u0D7D"
192
+ - "\u0D7E"
193
+ - a
194
+ - A
195
+ - i
196
+ - I
197
+ - u
198
+ - U
199
+ - R
200
+ - "\u090D"
201
+ - e
202
+ - E
203
+ - "\u0910"
204
+ - "\u0911"
205
+ - o
206
+ - O
207
+ - "\u0914"
208
+ - "\u0B89"
209
+ - <sos/eos>
210
+ token_type: char
211
+ train_data_path_and_name_and_type:
212
+ - - dump/raw/tr_no_dev/text
213
+ - text
214
+ - text
215
+ - - duration_info/tr_no_dev/durations
216
+ - durations
217
+ - text_int
218
+ - - dump/raw/tr_no_dev/wav.scp
219
+ - speech
220
+ - sound
221
+ - - exp/tts_stats_raw_char_None/train/collect_feats/pitch.scp
222
+ - pitch
223
+ - npy
224
+ - - exp/tts_stats_raw_char_None/train/collect_feats/energy.scp
225
+ - energy
226
+ - npy
227
+ train_dtype: float32
228
+ train_shape_file:
229
+ - exp/tts_stats_raw_char_None/train/text_shape.char
230
+ - exp/tts_stats_raw_char_None/train/speech_shape
231
+ tts: fastspeech2
232
+ tts_conf:
233
+ adim: 384
234
+ aheads: 2
235
+ decoder_normalize_before: true
236
+ dlayers: 4
237
+ dunits: 1536
238
+ duration_predictor_chans: 256
239
+ duration_predictor_kernel_size: 3
240
+ duration_predictor_layers: 2
241
+ elayers: 4
242
+ encoder_normalize_before: true
243
+ energy_embed_dropout: 0.0
244
+ energy_embed_kernel_size: 1
245
+ energy_predictor_chans: 256
246
+ energy_predictor_dropout: 0.5
247
+ energy_predictor_kernel_size: 3
248
+ energy_predictor_layers: 2
249
+ eunits: 1536
250
+ init_dec_alpha: 1.0
251
+ init_enc_alpha: 1.0
252
+ init_type: xavier_uniform
253
+ pitch_embed_dropout: 0.0
254
+ pitch_embed_kernel_size: 1
255
+ pitch_predictor_chans: 256
256
+ pitch_predictor_dropout: 0.5
257
+ pitch_predictor_kernel_size: 5
258
+ pitch_predictor_layers: 5
259
+ positionwise_conv_kernel_size: 3
260
+ positionwise_layer_type: conv1d
261
+ postnet_chans: 256
262
+ postnet_filts: 5
263
+ postnet_layers: 5
264
+ reduction_factor: 1
265
+ stop_gradient_from_energy_predictor: false
266
+ stop_gradient_from_pitch_predictor: true
267
+ transformer_dec_attn_dropout_rate: 0.2
268
+ transformer_dec_dropout_rate: 0.2
269
+ transformer_dec_positional_dropout_rate: 0.2
270
+ transformer_enc_attn_dropout_rate: 0.2
271
+ transformer_enc_dropout_rate: 0.2
272
+ transformer_enc_positional_dropout_rate: 0.2
273
+ use_masking: true
274
+ use_scaled_pos_enc: true
275
+ unused_parameters: false
276
+ use_adapter: false
277
+ use_amp: false
278
+ use_matplotlib: true
279
+ use_preprocessor: true
280
+ use_tensorboard: true
281
+ use_wandb: false
282
+ val_scheduler_criterion:
283
+ - valid
284
+ - loss
285
+ valid_batch_bins: null
286
+ valid_batch_size: null
287
+ valid_batch_type: null
288
+ valid_data_path_and_name_and_type:
289
+ - - dump/raw/dev/text
290
+ - text
291
+ - text
292
+ - - duration_info/dev/durations
293
+ - durations
294
+ - text_int
295
+ - - dump/raw/dev/wav.scp
296
+ - speech
297
+ - sound
298
+ - - exp/tts_stats_raw_char_None/valid/collect_feats/pitch.scp
299
+ - pitch
300
+ - npy
301
+ - - exp/tts_stats_raw_char_None/valid/collect_feats/energy.scp
302
+ - energy
303
+ - npy
304
+ valid_iterator_type: null
305
+ valid_max_cache_size: null
306
+ valid_shape_file:
307
+ - exp/tts_stats_raw_char_None/valid/text_shape.char
308
+ - exp/tts_stats_raw_char_None/valid/speech_shape
309
+ version: '202402'
310
+ wandb_entity: null
311
+ wandb_id: null
312
+ wandb_model_log_interval: -1
313
+ wandb_name: null
314
+ wandb_project: null
315
+ write_collected_feats: false
konkani_latest/female/model/energy_stats.npz ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aed5b7d76556c9ea17a84f8c6277ca58414ddb17973b7c9f30dba274e4c4867a
3
+ size 770
konkani_latest/female/model/feats_stats.npz ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:16fd2af15d581cd5132b36d10346de81910e8b76ff2abf60e0a83fc2da4f6361
3
+ size 2042
konkani_latest/female/model/feats_type ADDED
@@ -0,0 +1 @@
 
 
1
+ raw
konkani_latest/female/model/model.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:81790566fd4660ca46c4692f6fbdb30d8e5c9cd657084a0eb40c804e6ec2b9ab
3
+ size 152128410
konkani_latest/female/model/pitch_stats.npz ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1d6930364c632b19a57622ac0779b2de55bafd0cf97e28c2679da6e1633d49b7
3
+ size 770