kozakhart vineelpratap commited on
Commit
79784c7
·
0 Parent(s):

Duplicate from facebook/mms-tts

Browse files

Co-authored-by: Vineel Pratap <vineelpratap@users.noreply.huggingface.co>

This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +34 -0
  2. README.md +1206 -0
  3. full_models/abi/D_100000.pth +3 -0
  4. full_models/abi/G_100000.pth +3 -0
  5. full_models/abi/config.json +87 -0
  6. full_models/abi/vocab.txt +53 -0
  7. full_models/abp/D_100000.pth +3 -0
  8. full_models/abp/G_100000.pth +3 -0
  9. full_models/abp/config.json +87 -0
  10. full_models/abp/vocab.txt +33 -0
  11. full_models/aca/D_100000.pth +3 -0
  12. full_models/aca/G_100000.pth +3 -0
  13. full_models/aca/config.json +87 -0
  14. full_models/aca/vocab.txt +35 -0
  15. full_models/acd/D_100000.pth +3 -0
  16. full_models/acd/G_100000.pth +3 -0
  17. full_models/acd/config.json +87 -0
  18. full_models/acd/vocab.txt +28 -0
  19. full_models/ace/D_100000.pth +3 -0
  20. full_models/ace/G_100000.pth +3 -0
  21. full_models/ace/config.json +87 -0
  22. full_models/ace/vocab.txt +42 -0
  23. full_models/acf/D_100000.pth +3 -0
  24. full_models/acf/G_100000.pth +3 -0
  25. full_models/acf/config.json +87 -0
  26. full_models/acf/vocab.txt +33 -0
  27. full_models/ach/D_100000.pth +3 -0
  28. full_models/ach/G_100000.pth +3 -0
  29. full_models/ach/config.json +87 -0
  30. full_models/ach/vocab.txt +28 -0
  31. full_models/acn/D_100000.pth +3 -0
  32. full_models/acn/G_100000.pth +3 -0
  33. full_models/acn/config.json +87 -0
  34. full_models/acn/vocab.txt +37 -0
  35. full_models/acr/D_100000.pth +3 -0
  36. full_models/acr/G_100000.pth +3 -0
  37. full_models/acr/config.json +87 -0
  38. full_models/acr/vocab.txt +37 -0
  39. full_models/acu/D_100000.pth +3 -0
  40. full_models/acu/G_100000.pth +3 -0
  41. full_models/acu/config.json +87 -0
  42. full_models/acu/vocab.txt +35 -0
  43. full_models/ade/D_100000.pth +3 -0
  44. full_models/ade/G_100000.pth +3 -0
  45. full_models/ade/config.json +87 -0
  46. full_models/ade/vocab.txt +40 -0
  47. full_models/adh/D_100000.pth +3 -0
  48. full_models/adh/G_100000.pth +3 -0
  49. full_models/adh/config.json +87 -0
  50. full_models/adh/vocab.txt +29 -0
.gitattributes ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tflite filter=lfs diff=lfs merge=lfs -text
29
+ *.tgz filter=lfs diff=lfs merge=lfs -text
30
+ *.wasm filter=lfs diff=lfs merge=lfs -text
31
+ *.xz filter=lfs diff=lfs merge=lfs -text
32
+ *.zip filter=lfs diff=lfs merge=lfs -text
33
+ *.zst filter=lfs diff=lfs merge=lfs -text
34
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,1206 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: cc-by-nc-4.0
3
+ inference: false
4
+ tags:
5
+ - mms
6
+ - vits
7
+ pipeline_tag: text-to-speech
8
+ ---
9
+
10
+ # Massively Multilingual Speech (MMS) : Text-to-Speech Models
11
+
12
+ This repository contains a collection of text-to-speech (TTS) models, offering support for over 1000 languages. These models are part of Facebook's [Massively Multilingual Speech](https://arxiv.org/abs/2305.13516) project, aiming to provide speech technology across a diverse range of languages.
13
+
14
+ ## Table Of Contents
15
+
16
+ - [Usage](#usage)
17
+ - [Supported Languages](#supported-languages)
18
+ - [Model details](#model-details)
19
+ - [Additional links](#additional-links)
20
+
21
+ ## Usage
22
+ For detailed instructions on utilizing the models, please refer to the [fairseq docs](https://github.com/facebookresearch/fairseq/blob/main/examples/mms/README.md#tts-1). Additionally, you can explore the models on [MMS Space](https://huggingface.co/spaces/facebook/MMS) available on HuggingFace.
23
+
24
+ *models* folder consists of only generator, which is enough to run TTS inference. Full model checkpoint which also includes the discriminator and the optimizer states can be found in *full_models* folder. The models can be downloaded locally using *[hf_hub_download](https://huggingface.co/docs/huggingface_hub/guides/download)* API.
25
+
26
+ See [this section](https://github.com/facebookresearch/fairseq/blob/main/examples/mms/README.md#tts-1) for instructions on using the models for inference.
27
+
28
+ ## Supported Languages
29
+
30
+ This model supports 1107 languages. Unclick the following to toogle all supported languages of this checkpoint in [ISO 639-3 code](https://en.wikipedia.org/wiki/ISO_639-3).
31
+ You can find more details about the languages and their ISO 639-3 codes in the [MMS Language Coverage Overview](https://dl.fbaipublicfiles.com/mms/misc/language_coverage_mms.html).
32
+ <details>
33
+ <summary>Click to toggle</summary>
34
+
35
+ - abi
36
+ - ace
37
+ - aca
38
+ - acn
39
+ - acr
40
+ - ach
41
+ - acu
42
+ - guq
43
+ - ade
44
+ - adj
45
+ - agd
46
+ - agx
47
+ - agn
48
+ - aha
49
+ - aka
50
+ - knj
51
+ - ake
52
+ - aeu
53
+ - ahk
54
+ - bss
55
+ - alj
56
+ - sqi
57
+ - alt
58
+ - alp
59
+ - alz
60
+ - kab
61
+ - amk
62
+ - mmg
63
+ - amh
64
+ - ami
65
+ - azg
66
+ - agg
67
+ - boj
68
+ - cko
69
+ - any
70
+ - arl
71
+ - ara
72
+ - atq
73
+ - luc
74
+ - hyw
75
+ - apr
76
+ - aia
77
+ - msy
78
+ - cni
79
+ - cjo
80
+ - cpu
81
+ - cpb
82
+ - asm
83
+ - asa
84
+ - teo
85
+ - ati
86
+ - djk
87
+ - ava
88
+ - avn
89
+ - avu
90
+ - awb
91
+ - kwi
92
+ - awa
93
+ - agr
94
+ - agu
95
+ - ayr
96
+ - ayo
97
+ - abp
98
+ - blx
99
+ - sgb
100
+ - azj-script_cyrillic
101
+ - azj-script_latin
102
+ - azb
103
+ - bba
104
+ - bhz
105
+ - bvc
106
+ - bfy
107
+ - bgq
108
+ - bdq
109
+ - bdh
110
+ - bqi
111
+ - bjw
112
+ - blz
113
+ - ban
114
+ - bcc-script_latin
115
+ - bcc-script_arabic
116
+ - bam
117
+ - ptu
118
+ - bcw
119
+ - bqj
120
+ - bno
121
+ - bbb
122
+ - bfa
123
+ - bjz
124
+ - bak
125
+ - eus
126
+ - bsq
127
+ - akb
128
+ - btd
129
+ - btx
130
+ - bts
131
+ - bbc
132
+ - bvz
133
+ - bjv
134
+ - bep
135
+ - bkv
136
+ - bzj
137
+ - bem
138
+ - bng
139
+ - ben
140
+ - bom
141
+ - btt
142
+ - bha
143
+ - bgw
144
+ - bht
145
+ - beh
146
+ - sne
147
+ - ubl
148
+ - bcl
149
+ - bim
150
+ - bkd
151
+ - bjr
152
+ - bfo
153
+ - biv
154
+ - bib
155
+ - bis
156
+ - bzi
157
+ - bqp
158
+ - bpr
159
+ - bps
160
+ - bwq
161
+ - bdv
162
+ - bqc
163
+ - bus
164
+ - bnp
165
+ - bmq
166
+ - bdg
167
+ - boa
168
+ - ksr
169
+ - bor
170
+ - bru
171
+ - box
172
+ - bzh
173
+ - bgt
174
+ - sab
175
+ - bul
176
+ - bwu
177
+ - bmv
178
+ - mya
179
+ - tte
180
+ - cjp
181
+ - cbv
182
+ - kaq
183
+ - cot
184
+ - cbc
185
+ - car
186
+ - cat
187
+ - ceb
188
+ - cme
189
+ - cbi
190
+ - ceg
191
+ - cly
192
+ - cya
193
+ - che
194
+ - hne
195
+ - nya
196
+ - dig
197
+ - dug
198
+ - bgr
199
+ - cek
200
+ - cfm
201
+ - cnh
202
+ - hlt
203
+ - mwq
204
+ - ctd
205
+ - tcz
206
+ - zyp
207
+ - cco
208
+ - cnl
209
+ - cle
210
+ - chz
211
+ - cpa
212
+ - cso
213
+ - cnt
214
+ - cuc
215
+ - hak
216
+ - nan
217
+ - xnj
218
+ - cap
219
+ - cax
220
+ - ctg
221
+ - ctu
222
+ - chf
223
+ - cce
224
+ - crt
225
+ - crq
226
+ - cac-dialect_sansebastiáncoatán
227
+ - cac-dialect_sanmateoixtatán
228
+ - ckt
229
+ - ncu
230
+ - cdj
231
+ - chv
232
+ - caa
233
+ - asg
234
+ - con
235
+ - crn
236
+ - cok
237
+ - crk-script_latin
238
+ - crk-script_syllabics
239
+ - crh
240
+ - cui
241
+ - dsh
242
+ - dbq
243
+ - dga
244
+ - dgi
245
+ - dgk
246
+ - dnj-dialect_gweetaawueast
247
+ - dnj-dialect_blowowest
248
+ - daa
249
+ - dnt
250
+ - dnw
251
+ - dar
252
+ - tcc
253
+ - dwr
254
+ - ded
255
+ - mzw
256
+ - ntr
257
+ - ddn
258
+ - des
259
+ - dso
260
+ - nfa
261
+ - dhi
262
+ - gud
263
+ - did
264
+ - mhu
265
+ - dip
266
+ - dik
267
+ - tbz
268
+ - dts
269
+ - dos
270
+ - dgo
271
+ - mvp
272
+ - nld
273
+ - jen
274
+ - dzo
275
+ - idd
276
+ - eka
277
+ - cto
278
+ - emp
279
+ - eng
280
+ - enx
281
+ - sja
282
+ - myv
283
+ - mcq
284
+ - ese
285
+ - evn
286
+ - eza
287
+ - fal
288
+ - fao
289
+ - far
290
+ - fij
291
+ - fin
292
+ - fon
293
+ - frd
294
+ - fra
295
+ - ful
296
+ - flr
297
+ - gau
298
+ - gbk
299
+ - gag-script_cyrillic
300
+ - gag-script_latin
301
+ - gbi
302
+ - gmv
303
+ - lug
304
+ - pwg
305
+ - gbm
306
+ - cab
307
+ - grt
308
+ - krs
309
+ - gso
310
+ - nlg
311
+ - gej
312
+ - deu
313
+ - gri
314
+ - kik
315
+ - acd
316
+ - glk
317
+ - gof-script_latin
318
+ - gog
319
+ - gkn
320
+ - wsg
321
+ - gjn
322
+ - gqr
323
+ - gor
324
+ - gux
325
+ - gbo
326
+ - ell
327
+ - grc
328
+ - guh
329
+ - gub
330
+ - grn
331
+ - gyr
332
+ - guo
333
+ - gde
334
+ - guj
335
+ - gvl
336
+ - guk
337
+ - rub
338
+ - dah
339
+ - gwr
340
+ - gwi
341
+ - hat
342
+ - hlb
343
+ - amf
344
+ - hag
345
+ - hnn
346
+ - bgc
347
+ - had
348
+ - hau
349
+ - hwc
350
+ - hvn
351
+ - hay
352
+ - xed
353
+ - heb
354
+ - heh
355
+ - hil
356
+ - hin
357
+ - hif
358
+ - hns
359
+ - hoc
360
+ - hoy
361
+ - hus-dialect_westernpotosino
362
+ - hus-dialect_centralveracruz
363
+ - huv
364
+ - hui
365
+ - hun
366
+ - hap
367
+ - iba
368
+ - isl
369
+ - dbj
370
+ - ifa
371
+ - ifb
372
+ - ifu
373
+ - ifk
374
+ - ife
375
+ - ign
376
+ - ikk
377
+ - iqw
378
+ - ilb
379
+ - ilo
380
+ - imo
381
+ - ind
382
+ - inb
383
+ - ipi
384
+ - irk
385
+ - icr
386
+ - itv
387
+ - itl
388
+ - atg
389
+ - ixl-dialect_sanjuancotzal
390
+ - ixl-dialect_sangasparchajul
391
+ - ixl-dialect_santamarianebaj
392
+ - nca
393
+ - izr
394
+ - izz
395
+ - jac
396
+ - jam
397
+ - jav
398
+ - jvn
399
+ - kac
400
+ - dyo
401
+ - csk
402
+ - adh
403
+ - jun
404
+ - jbu
405
+ - dyu
406
+ - bex
407
+ - juy
408
+ - gna
409
+ - urb
410
+ - kbp
411
+ - cwa
412
+ - dtp
413
+ - kbr
414
+ - cgc
415
+ - kki
416
+ - kzf
417
+ - lew
418
+ - cbr
419
+ - kkj
420
+ - keo
421
+ - kqe
422
+ - kak
423
+ - kyb
424
+ - knb
425
+ - kmd
426
+ - kml
427
+ - ify
428
+ - xal
429
+ - kbq
430
+ - kay
431
+ - ktb
432
+ - hig
433
+ - gam
434
+ - cbu
435
+ - xnr
436
+ - kmu
437
+ - kne
438
+ - kan
439
+ - kby
440
+ - pam
441
+ - cak-dialect_santamaríadejesús
442
+ - cak-dialect_southcentral
443
+ - cak-dialect_yepocapa
444
+ - cak-dialect_western
445
+ - cak-dialect_santodomingoxenacoj
446
+ - cak-dialect_central
447
+ - xrb
448
+ - krc
449
+ - kaa
450
+ - krl
451
+ - pww
452
+ - xsm
453
+ - cbs
454
+ - pss
455
+ - kxf
456
+ - kyz
457
+ - kyu
458
+ - txu
459
+ - kaz
460
+ - ndp
461
+ - kbo
462
+ - kyq
463
+ - ken
464
+ - ker
465
+ - xte
466
+ - kyg
467
+ - kjh
468
+ - kca
469
+ - khm
470
+ - kxm
471
+ - kjg
472
+ - nyf
473
+ - kij
474
+ - kia
475
+ - kqr
476
+ - kqp
477
+ - krj
478
+ - zga
479
+ - kin
480
+ - pkb
481
+ - geb
482
+ - gil
483
+ - kje
484
+ - kss
485
+ - thk
486
+ - klu
487
+ - kyo
488
+ - kog
489
+ - kfb
490
+ - kpv
491
+ - bbo
492
+ - xon
493
+ - kma
494
+ - kno
495
+ - kxc
496
+ - ozm
497
+ - kqy
498
+ - kor
499
+ - coe
500
+ - kpq
501
+ - kpy
502
+ - kyf
503
+ - kff-script_telugu
504
+ - kri
505
+ - rop
506
+ - ktj
507
+ - ted
508
+ - krr
509
+ - kdt
510
+ - kez
511
+ - cul
512
+ - kle
513
+ - kdi
514
+ - kue
515
+ - kum
516
+ - kvn
517
+ - cuk
518
+ - kdn
519
+ - xuo
520
+ - key
521
+ - kpz
522
+ - knk
523
+ - kmr-script_latin
524
+ - kmr-script_arabic
525
+ - kmr-script_cyrillic
526
+ - xua
527
+ - kru
528
+ - kus
529
+ - kub
530
+ - kdc
531
+ - kxv
532
+ - blh
533
+ - cwt
534
+ - kwd
535
+ - tnk
536
+ - kwf
537
+ - cwe
538
+ - kyc
539
+ - tye
540
+ - kir
541
+ - quc-dialect_north
542
+ - quc-dialect_east
543
+ - quc-dialect_central
544
+ - lac
545
+ - lsi
546
+ - lbj
547
+ - lhu
548
+ - las
549
+ - lam
550
+ - lns
551
+ - ljp
552
+ - laj
553
+ - lao
554
+ - lat
555
+ - lav
556
+ - law
557
+ - lcp
558
+ - lzz
559
+ - lln
560
+ - lef
561
+ - acf
562
+ - lww
563
+ - mhx
564
+ - eip
565
+ - lia
566
+ - lif
567
+ - onb
568
+ - lis
569
+ - loq
570
+ - lob
571
+ - yaz
572
+ - lok
573
+ - llg
574
+ - ycl
575
+ - lom
576
+ - ngl
577
+ - lon
578
+ - lex
579
+ - lgg
580
+ - ruf
581
+ - dop
582
+ - lnd
583
+ - ndy
584
+ - lwo
585
+ - lee
586
+ - mev
587
+ - mfz
588
+ - jmc
589
+ - myy
590
+ - mbc
591
+ - mda
592
+ - mad
593
+ - mag
594
+ - ayz
595
+ - mai
596
+ - mca
597
+ - mcp
598
+ - mak
599
+ - vmw
600
+ - mgh
601
+ - kde
602
+ - mlg
603
+ - zlm
604
+ - pse
605
+ - mkn
606
+ - xmm
607
+ - mal
608
+ - xdy
609
+ - div
610
+ - mdy
611
+ - mup
612
+ - mam-dialect_central
613
+ - mam-dialect_northern
614
+ - mam-dialect_southern
615
+ - mam-dialect_western
616
+ - mqj
617
+ - mcu
618
+ - mzk
619
+ - maw
620
+ - mjl
621
+ - mnk
622
+ - mge
623
+ - mbh
624
+ - knf
625
+ - mjv
626
+ - mbt
627
+ - obo
628
+ - mbb
629
+ - mzj
630
+ - sjm
631
+ - mrw
632
+ - mar
633
+ - mpg
634
+ - mhr
635
+ - enb
636
+ - mah
637
+ - myx
638
+ - klv
639
+ - mfh
640
+ - met
641
+ - mcb
642
+ - mop
643
+ - yua
644
+ - mfy
645
+ - maz
646
+ - vmy
647
+ - maq
648
+ - mzi
649
+ - maj
650
+ - maa-dialect_sanantonio
651
+ - maa-dialect_sanjerónimo
652
+ - mhy
653
+ - mhi
654
+ - zmz
655
+ - myb
656
+ - gai
657
+ - mqb
658
+ - mbu
659
+ - med
660
+ - men
661
+ - mee
662
+ - mwv
663
+ - meq
664
+ - zim
665
+ - mgo
666
+ - mej
667
+ - mpp
668
+ - min
669
+ - gum
670
+ - mpx
671
+ - mco
672
+ - mxq
673
+ - pxm
674
+ - mto
675
+ - mim
676
+ - xta
677
+ - mbz
678
+ - mip
679
+ - mib
680
+ - miy
681
+ - mih
682
+ - miz
683
+ - xtd
684
+ - mxt
685
+ - xtm
686
+ - mxv
687
+ - xtn
688
+ - mie
689
+ - mil
690
+ - mio
691
+ - mdv
692
+ - mza
693
+ - mit
694
+ - mxb
695
+ - mpm
696
+ - soy
697
+ - cmo-script_latin
698
+ - cmo-script_khmer
699
+ - mfq
700
+ - old
701
+ - mfk
702
+ - mif
703
+ - mkl
704
+ - mox
705
+ - myl
706
+ - mqf
707
+ - mnw
708
+ - mon
709
+ - mog
710
+ - mfe
711
+ - mor
712
+ - mqn
713
+ - mgd
714
+ - mtj
715
+ - cmr
716
+ - mtd
717
+ - bmr
718
+ - moz
719
+ - mzm
720
+ - mnb
721
+ - mnf
722
+ - unr
723
+ - fmu
724
+ - mur
725
+ - tih
726
+ - muv
727
+ - muy
728
+ - sur
729
+ - moa
730
+ - wmw
731
+ - tnr
732
+ - miq
733
+ - mos
734
+ - muh
735
+ - nas
736
+ - mbj
737
+ - nfr
738
+ - kfw
739
+ - nst
740
+ - nag
741
+ - nch
742
+ - nhe
743
+ - ngu
744
+ - azz
745
+ - nhx
746
+ - ncl
747
+ - nhy
748
+ - ncj
749
+ - nsu
750
+ - npl
751
+ - nuz
752
+ - nhw
753
+ - nhi
754
+ - nlc
755
+ - nab
756
+ - gld
757
+ - nnb
758
+ - npy
759
+ - pbb
760
+ - ntm
761
+ - nmz
762
+ - naw
763
+ - nxq
764
+ - ndj
765
+ - ndz
766
+ - ndv
767
+ - new
768
+ - nij
769
+ - sba
770
+ - gng
771
+ - nga
772
+ - nnq
773
+ - ngp
774
+ - gym
775
+ - kdj
776
+ - nia
777
+ - nim
778
+ - nin
779
+ - nko
780
+ - nog
781
+ - lem
782
+ - not
783
+ - nhu
784
+ - bud
785
+ - nus
786
+ - yas
787
+ - nnw
788
+ - nwb
789
+ - nyy
790
+ - nyn
791
+ - rim
792
+ - lid
793
+ - nuj
794
+ - nyo
795
+ - nzi
796
+ - ann
797
+ - ory
798
+ - ojb-script_latin
799
+ - ojb-script_syllabics
800
+ - oku
801
+ - bsc
802
+ - bdu
803
+ - orm
804
+ - ury
805
+ - oss
806
+ - ote
807
+ - otq
808
+ - stn
809
+ - sig
810
+ - kfx
811
+ - bfz
812
+ - sey
813
+ - pao
814
+ - pau
815
+ - pce
816
+ - plw
817
+ - pmf
818
+ - pag
819
+ - pap
820
+ - prf
821
+ - pab
822
+ - pbi
823
+ - pbc
824
+ - pad
825
+ - ata
826
+ - pez
827
+ - peg
828
+ - fas
829
+ - pcm
830
+ - pis
831
+ - pny
832
+ - pir
833
+ - pjt
834
+ - poy
835
+ - pol
836
+ - pps
837
+ - pls
838
+ - poi
839
+ - poh-dialect_eastern
840
+ - poh-dialect_western
841
+ - por
842
+ - prt
843
+ - pui
844
+ - pan
845
+ - tsz
846
+ - suv
847
+ - lme
848
+ - quy
849
+ - qvc
850
+ - quz
851
+ - qve
852
+ - qub
853
+ - qvh
854
+ - qwh
855
+ - qvw
856
+ - quf
857
+ - qvm
858
+ - qul
859
+ - qvn
860
+ - qxn
861
+ - qxh
862
+ - qvs
863
+ - quh
864
+ - qxo
865
+ - qxr
866
+ - qvo
867
+ - qvz
868
+ - qxl
869
+ - quw
870
+ - kjb
871
+ - kek
872
+ - rah
873
+ - rjs
874
+ - rai
875
+ - lje
876
+ - rnl
877
+ - rkt
878
+ - rap
879
+ - yea
880
+ - raw
881
+ - rej
882
+ - rel
883
+ - ril
884
+ - iri
885
+ - rgu
886
+ - rhg
887
+ - rmc-script_latin
888
+ - rmc-script_cyrillic
889
+ - rmo
890
+ - rmy-script_latin
891
+ - rmy-script_cyrillic
892
+ - ron
893
+ - rol
894
+ - cla
895
+ - rng
896
+ - rug
897
+ - run
898
+ - rus
899
+ - lsm
900
+ - spy
901
+ - sck
902
+ - saj
903
+ - sch
904
+ - sml
905
+ - xsb
906
+ - sbl
907
+ - saq
908
+ - sbd
909
+ - smo
910
+ - rav
911
+ - sxn
912
+ - sag
913
+ - sbp
914
+ - xsu
915
+ - srm
916
+ - sas
917
+ - apb
918
+ - sgw
919
+ - tvw
920
+ - lip
921
+ - slu
922
+ - snw
923
+ - sea
924
+ - sza
925
+ - seh
926
+ - crs
927
+ - ksb
928
+ - shn
929
+ - sho
930
+ - mcd
931
+ - cbt
932
+ - xsr
933
+ - shk
934
+ - shp
935
+ - sna
936
+ - cjs
937
+ - jiv
938
+ - snp
939
+ - sya
940
+ - sid
941
+ - snn
942
+ - sri
943
+ - srx
944
+ - sil
945
+ - sld
946
+ - akp
947
+ - xog
948
+ - som
949
+ - bmu
950
+ - khq
951
+ - ses
952
+ - mnx
953
+ - spa
954
+ - srn
955
+ - sxb
956
+ - suc
957
+ - tgo
958
+ - suk
959
+ - sun
960
+ - suz
961
+ - sgj
962
+ - sus
963
+ - swh
964
+ - swe
965
+ - syl
966
+ - dyi
967
+ - myk
968
+ - spp
969
+ - tap
970
+ - tby
971
+ - tna
972
+ - shi
973
+ - klw
974
+ - tgl
975
+ - tbk
976
+ - tgj
977
+ - blt
978
+ - tbg
979
+ - omw
980
+ - tgk
981
+ - tdj
982
+ - tbc
983
+ - tlj
984
+ - tly
985
+ - ttq-script_tifinagh
986
+ - taj
987
+ - taq
988
+ - tam
989
+ - tpm
990
+ - tgp
991
+ - tnn
992
+ - tac
993
+ - rif-script_latin
994
+ - rif-script_arabic
995
+ - tat
996
+ - tav
997
+ - twb
998
+ - tbl
999
+ - kps
1000
+ - twe
1001
+ - ttc
1002
+ - tel
1003
+ - kdh
1004
+ - tes
1005
+ - tex
1006
+ - tee
1007
+ - tpp
1008
+ - tpt
1009
+ - stp
1010
+ - tfr
1011
+ - twu
1012
+ - ter
1013
+ - tew
1014
+ - tha
1015
+ - nod
1016
+ - thl
1017
+ - tem
1018
+ - adx
1019
+ - bod
1020
+ - khg
1021
+ - tca
1022
+ - tir
1023
+ - txq
1024
+ - tik
1025
+ - dgr
1026
+ - tob
1027
+ - tmf
1028
+ - tng
1029
+ - tlb
1030
+ - ood
1031
+ - tpi
1032
+ - jic
1033
+ - lbw
1034
+ - txa
1035
+ - tom
1036
+ - toh
1037
+ - tnt
1038
+ - sda
1039
+ - tcs
1040
+ - toc
1041
+ - tos
1042
+ - neb
1043
+ - trn
1044
+ - trs
1045
+ - trc
1046
+ - tri
1047
+ - cof
1048
+ - tkr
1049
+ - kdl
1050
+ - cas
1051
+ - tso
1052
+ - tuo
1053
+ - iou
1054
+ - tmc
1055
+ - tuf
1056
+ - tur
1057
+ - tuk-script_latin
1058
+ - tuk-script_arabic
1059
+ - bov
1060
+ - tue
1061
+ - kcg
1062
+ - tzh-dialect_bachajón
1063
+ - tzh-dialect_tenejapa
1064
+ - tzo-dialect_chenalhó
1065
+ - tzo-dialect_chamula
1066
+ - tzj-dialect_western
1067
+ - tzj-dialect_eastern
1068
+ - aoz
1069
+ - udm
1070
+ - udu
1071
+ - ukr
1072
+ - ppk
1073
+ - ubu
1074
+ - urk
1075
+ - ura
1076
+ - urt
1077
+ - urd-script_devanagari
1078
+ - urd-script_arabic
1079
+ - urd-script_latin
1080
+ - upv
1081
+ - usp
1082
+ - uig-script_arabic
1083
+ - uig-script_cyrillic
1084
+ - uzb-script_cyrillic
1085
+ - vag
1086
+ - bav
1087
+ - vid
1088
+ - vie
1089
+ - vif
1090
+ - vun
1091
+ - vut
1092
+ - prk
1093
+ - wwa
1094
+ - rro
1095
+ - bao
1096
+ - waw
1097
+ - lgl
1098
+ - wlx
1099
+ - cou
1100
+ - hub
1101
+ - gvc
1102
+ - mfi
1103
+ - wap
1104
+ - wba
1105
+ - war
1106
+ - way
1107
+ - guc
1108
+ - cym
1109
+ - kvw
1110
+ - tnp
1111
+ - hto
1112
+ - huu
1113
+ - wal-script_latin
1114
+ - wal-script_ethiopic
1115
+ - wlo
1116
+ - noa
1117
+ - wob
1118
+ - kao
1119
+ - xer
1120
+ - yad
1121
+ - yka
1122
+ - sah
1123
+ - yba
1124
+ - yli
1125
+ - nlk
1126
+ - yal
1127
+ - yam
1128
+ - yat
1129
+ - jmd
1130
+ - tao
1131
+ - yaa
1132
+ - ame
1133
+ - guu
1134
+ - yao
1135
+ - yre
1136
+ - yva
1137
+ - ybb
1138
+ - pib
1139
+ - byr
1140
+ - pil
1141
+ - yor
1142
+ - ycn
1143
+ - ess
1144
+ - yuz
1145
+ - atb
1146
+ - zne
1147
+ - zaq
1148
+ - zpo
1149
+ - zad
1150
+ - zpc
1151
+ - zca
1152
+ - zpg
1153
+ - zai
1154
+ - zpl
1155
+ - zam
1156
+ - zaw
1157
+ - zpm
1158
+ - zac
1159
+ - zao
1160
+ - ztq
1161
+ - zar
1162
+ - zpt
1163
+ - zpi
1164
+ - zas
1165
+ - zaa
1166
+ - zpz
1167
+ - zab
1168
+ - zpu
1169
+ - zae
1170
+ - zty
1171
+ - zav
1172
+ - zza
1173
+ - zyb
1174
+ - ziw
1175
+ - zos
1176
+ - gnd
1177
+ - ewe
1178
+
1179
+ </details>
1180
+
1181
+ ## Model details
1182
+
1183
+ - **Developed by:** Vineel Pratap et al.
1184
+ - **Model type:** Text-to-speech model
1185
+ - **Language(s):** 1107 languages, see [supported languages](#supported-languages)
1186
+ - **License:** CC-BY-NC 4.0 license
1187
+ - **Cite as:**
1188
+
1189
+ @article{pratap2023mms,
1190
+ title={Scaling Speech Technology to 1,000+ Languages},
1191
+ author={Vineel Pratap and Andros Tjandra and Bowen Shi and Paden Tomasello and Arun Babu and Sayani Kundu and Ali Elkahky and Zhaoheng Ni and Apoorv Vyas and Maryam Fazel-Zarandi and Alexei Baevski and Yossi Adi and Xiaohui Zhang and Wei-Ning Hsu and Alexis Conneau and Michael Auli},
1192
+ journal={arXiv},
1193
+ year={2023}
1194
+ }
1195
+
1196
+ ## Additional Links
1197
+
1198
+ - [Blog post](https://ai.facebook.com/blog/multilingual-model-speech-recognition/)
1199
+ - [Transformers documentation](https://huggingface.co/docs/transformers/main/en/model_doc/mms).
1200
+ - [Paper](https://arxiv.org/abs/2305.13516)
1201
+ - [GitHub Repository](https://github.com/facebookresearch/fairseq/tree/main/examples/mms#asr)
1202
+ - [Other **MMS** checkpoints](https://huggingface.co/models?other=mms)
1203
+ - MMS base checkpoints:
1204
+ - [facebook/mms-1b](https://huggingface.co/facebook/mms-1b)
1205
+ - [facebook/mms-300m](https://huggingface.co/facebook/mms-300m)
1206
+ - [Official Space](https://huggingface.co/spaces/facebook/MMS)
full_models/abi/D_100000.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0be668a16e5b9b9dedd41e08442644f4ed894e4c62f43d06e7e448158428e2fe
3
+ size 561098185
full_models/abi/G_100000.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1bee8bd93805bd44cee1ff5497bc3a87220eeeec8b0fd6a2368d0609001a2868
3
+ size 436570305
full_models/abi/config.json ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "train": {
3
+ "log_interval": 200,
4
+ "eval_interval": 1000,
5
+ "seed": 1234,
6
+ "epochs": 20000,
7
+ "learning_rate": 0.0002,
8
+ "betas": [
9
+ 0.8,
10
+ 0.99
11
+ ],
12
+ "eps": 1e-09,
13
+ "batch_size": 64,
14
+ "fp16_run": true,
15
+ "lr_decay": 0.999875,
16
+ "segment_size": 8192,
17
+ "init_lr_ratio": 1,
18
+ "warmup_epochs": 0,
19
+ "c_mel": 45,
20
+ "c_kl": 1.0
21
+ },
22
+ "data": {
23
+ "training_files": "train.ltr",
24
+ "validation_files": "dev.ltr",
25
+ "text_cleaners": [
26
+ "transliteration_cleaners"
27
+ ],
28
+ "max_wav_value": 32768.0,
29
+ "sampling_rate": 16000,
30
+ "filter_length": 1024,
31
+ "hop_length": 256,
32
+ "win_length": 1024,
33
+ "n_mel_channels": 80,
34
+ "mel_fmin": 0.0,
35
+ "mel_fmax": null,
36
+ "add_blank": true,
37
+ "n_speakers": 0,
38
+ "cleaned_text": true
39
+ },
40
+ "model": {
41
+ "inter_channels": 192,
42
+ "hidden_channels": 192,
43
+ "filter_channels": 768,
44
+ "n_heads": 2,
45
+ "n_layers": 6,
46
+ "kernel_size": 3,
47
+ "p_dropout": 0.1,
48
+ "resblock": "1",
49
+ "resblock_kernel_sizes": [
50
+ 3,
51
+ 7,
52
+ 11
53
+ ],
54
+ "resblock_dilation_sizes": [
55
+ [
56
+ 1,
57
+ 3,
58
+ 5
59
+ ],
60
+ [
61
+ 1,
62
+ 3,
63
+ 5
64
+ ],
65
+ [
66
+ 1,
67
+ 3,
68
+ 5
69
+ ]
70
+ ],
71
+ "upsample_rates": [
72
+ 8,
73
+ 8,
74
+ 2,
75
+ 2
76
+ ],
77
+ "upsample_initial_channel": 512,
78
+ "upsample_kernel_sizes": [
79
+ 16,
80
+ 16,
81
+ 4,
82
+ 4
83
+ ],
84
+ "n_layers_q": 3,
85
+ "use_spectral_norm": false
86
+ }
87
+ }
full_models/abi/vocab.txt ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ɔ
2
+ ê
3
+ ǒ
4
+
5
+ ̂
6
+ h
7
+ ě
8
+ i
9
+ ɩ
10
+ k
11
+ á
12
+ ̌
13
+ ǐ
14
+ b
15
+ p
16
+ í
17
+ ǔ
18
+ u
19
+ ń
20
+ w
21
+ '
22
+ ί
23
+ f
24
+ ó
25
+ y
26
+ s
27
+ î
28
+ m
29
+ ɛ
30
+ έ
31
+ e
32
+ ʋ
33
+ ḿ
34
+ n
35
+ ú
36
+ o
37
+ d
38
+ â
39
+ ô
40
+ c
41
+ ǎ
42
+ é
43
+ ́
44
+ j
45
+ l
46
+ -
47
+ t
48
+ _
49
+ r
50
+ g
51
+ ε
52
+ û
53
+ a
full_models/abp/D_100000.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3176637b067b5fc78605aff0b553ec09244da37ebbff3f419163cede7824c441
3
+ size 561098185
full_models/abp/G_100000.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f621739a139733b14ac70f032ab4a677e8912fa3a1132ba3f8cf599dee6dbbac
3
+ size 436524225
full_models/abp/config.json ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "train": {
3
+ "log_interval": 200,
4
+ "eval_interval": 1000,
5
+ "seed": 1234,
6
+ "epochs": 20000,
7
+ "learning_rate": 0.0002,
8
+ "betas": [
9
+ 0.8,
10
+ 0.99
11
+ ],
12
+ "eps": 1e-09,
13
+ "batch_size": 64,
14
+ "fp16_run": true,
15
+ "lr_decay": 0.999875,
16
+ "segment_size": 8192,
17
+ "init_lr_ratio": 1,
18
+ "warmup_epochs": 0,
19
+ "c_mel": 45,
20
+ "c_kl": 1.0
21
+ },
22
+ "data": {
23
+ "training_files": "train.ltr",
24
+ "validation_files": "dev.ltr",
25
+ "text_cleaners": [
26
+ "transliteration_cleaners"
27
+ ],
28
+ "max_wav_value": 32768.0,
29
+ "sampling_rate": 16000,
30
+ "filter_length": 1024,
31
+ "hop_length": 256,
32
+ "win_length": 1024,
33
+ "n_mel_channels": 80,
34
+ "mel_fmin": 0.0,
35
+ "mel_fmax": null,
36
+ "add_blank": true,
37
+ "n_speakers": 0,
38
+ "cleaned_text": true
39
+ },
40
+ "model": {
41
+ "inter_channels": 192,
42
+ "hidden_channels": 192,
43
+ "filter_channels": 768,
44
+ "n_heads": 2,
45
+ "n_layers": 6,
46
+ "kernel_size": 3,
47
+ "p_dropout": 0.1,
48
+ "resblock": "1",
49
+ "resblock_kernel_sizes": [
50
+ 3,
51
+ 7,
52
+ 11
53
+ ],
54
+ "resblock_dilation_sizes": [
55
+ [
56
+ 1,
57
+ 3,
58
+ 5
59
+ ],
60
+ [
61
+ 1,
62
+ 3,
63
+ 5
64
+ ],
65
+ [
66
+ 1,
67
+ 3,
68
+ 5
69
+ ]
70
+ ],
71
+ "upsample_rates": [
72
+ 8,
73
+ 8,
74
+ 2,
75
+ 2
76
+ ],
77
+ "upsample_initial_channel": 512,
78
+ "upsample_kernel_sizes": [
79
+ 16,
80
+ 16,
81
+ 4,
82
+ 4
83
+ ],
84
+ "n_layers_q": 3,
85
+ "use_spectral_norm": false
86
+ }
87
+ }
full_models/abp/vocab.txt ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ _
2
+ t
3
+ e
4
+ b
5
+ ō
6
+ j
7
+ c
8
+ r
9
+ f
10
+ w
11
+ i
12
+ q
13
+ h
14
+ g
15
+ l
16
+ m
17
+ k
18
+ y
19
+ d
20
+ ā
21
+ s
22
+ '
23
+ a
24
+ n
25
+ x
26
+ 6
27
+ o
28
+ -
29
+ p
30
+ u
31
+
32
+ v
33
+ z
full_models/aca/D_100000.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ae308c0880cb4e3bd2d02600485b473f068c38ff85bac0a7d5bd8951ba1ce963
3
+ size 561076199
full_models/aca/G_100000.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:78666df5cbdca3fbd91b2bb2f49841f8919b7a73ab6e504ed82f7597e41c190f
3
+ size 436353726
full_models/aca/config.json ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "train": {
3
+ "log_interval": 200,
4
+ "eval_interval": 1000,
5
+ "seed": 1234,
6
+ "epochs": 20000,
7
+ "learning_rate": 0.0002,
8
+ "betas": [
9
+ 0.8,
10
+ 0.99
11
+ ],
12
+ "eps": 1e-09,
13
+ "batch_size": 64,
14
+ "fp16_run": true,
15
+ "lr_decay": 0.999875,
16
+ "segment_size": 8192,
17
+ "init_lr_ratio": 1,
18
+ "warmup_epochs": 0,
19
+ "c_mel": 45,
20
+ "c_kl": 1.0
21
+ },
22
+ "data": {
23
+ "training_files": "train.ltr",
24
+ "validation_files": "dev.ltr",
25
+ "text_cleaners": [
26
+ "transliteration_cleaners"
27
+ ],
28
+ "max_wav_value": 32768.0,
29
+ "sampling_rate": 16000,
30
+ "filter_length": 1024,
31
+ "hop_length": 256,
32
+ "win_length": 1024,
33
+ "n_mel_channels": 80,
34
+ "mel_fmin": 0.0,
35
+ "mel_fmax": null,
36
+ "add_blank": true,
37
+ "n_speakers": 0,
38
+ "cleaned_text": true
39
+ },
40
+ "model": {
41
+ "inter_channels": 192,
42
+ "hidden_channels": 192,
43
+ "filter_channels": 768,
44
+ "n_heads": 2,
45
+ "n_layers": 6,
46
+ "kernel_size": 3,
47
+ "p_dropout": 0.1,
48
+ "resblock": "1",
49
+ "resblock_kernel_sizes": [
50
+ 3,
51
+ 7,
52
+ 11
53
+ ],
54
+ "resblock_dilation_sizes": [
55
+ [
56
+ 1,
57
+ 3,
58
+ 5
59
+ ],
60
+ [
61
+ 1,
62
+ 3,
63
+ 5
64
+ ],
65
+ [
66
+ 1,
67
+ 3,
68
+ 5
69
+ ]
70
+ ],
71
+ "upsample_rates": [
72
+ 8,
73
+ 8,
74
+ 2,
75
+ 2
76
+ ],
77
+ "upsample_initial_channel": 512,
78
+ "upsample_kernel_sizes": [
79
+ 16,
80
+ 16,
81
+ 4,
82
+ 4
83
+ ],
84
+ "n_layers_q": 3,
85
+ "use_spectral_norm": false
86
+ }
87
+ }
full_models/aca/vocab.txt ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ a
2
+ |
3
+ i
4
+ n
5
+ á
6
+ c
7
+ e
8
+ u
9
+ l
10
+ r
11
+ w
12
+ j
13
+ s
14
+ í
15
+ m
16
+ é
17
+ o
18
+ '
19
+ h
20
+ t
21
+ y
22
+ b
23
+ d
24
+ ú
25
+ q
26
+ ó
27
+ p
28
+
29
+ g
30
+ f
31
+ z
32
+ v
33
+ x
34
+ ñ
35
+
full_models/acd/D_100000.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:92c06f2853c68b0bc604a40caa9261cf439f0b02d66a510b08a0660b0f8e3201
3
+ size 561078480
full_models/acd/G_100000.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b2d3e15a2db968008b7b01eb6282b9c24115221f2abccd8935ee11a16d6f6cf9
3
+ size 436355114
full_models/acd/config.json ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "train": {
3
+ "log_interval": 200,
4
+ "eval_interval": 1000,
5
+ "seed": 1234,
6
+ "epochs": 20000,
7
+ "learning_rate": 0.0002,
8
+ "betas": [
9
+ 0.8,
10
+ 0.99
11
+ ],
12
+ "eps": 1e-09,
13
+ "batch_size": 64,
14
+ "fp16_run": true,
15
+ "lr_decay": 0.999875,
16
+ "segment_size": 8192,
17
+ "init_lr_ratio": 1,
18
+ "warmup_epochs": 0,
19
+ "c_mel": 45,
20
+ "c_kl": 1.0
21
+ },
22
+ "data": {
23
+ "training_files": "train.ltr",
24
+ "validation_files": "dev.ltr",
25
+ "text_cleaners": [
26
+ "transliteration_cleaners"
27
+ ],
28
+ "max_wav_value": 32768.0,
29
+ "sampling_rate": 16000,
30
+ "filter_length": 1024,
31
+ "hop_length": 256,
32
+ "win_length": 1024,
33
+ "n_mel_channels": 80,
34
+ "mel_fmin": 0.0,
35
+ "mel_fmax": null,
36
+ "add_blank": true,
37
+ "n_speakers": 0,
38
+ "cleaned_text": true
39
+ },
40
+ "model": {
41
+ "inter_channels": 192,
42
+ "hidden_channels": 192,
43
+ "filter_channels": 768,
44
+ "n_heads": 2,
45
+ "n_layers": 6,
46
+ "kernel_size": 3,
47
+ "p_dropout": 0.1,
48
+ "resblock": "1",
49
+ "resblock_kernel_sizes": [
50
+ 3,
51
+ 7,
52
+ 11
53
+ ],
54
+ "resblock_dilation_sizes": [
55
+ [
56
+ 1,
57
+ 3,
58
+ 5
59
+ ],
60
+ [
61
+ 1,
62
+ 3,
63
+ 5
64
+ ],
65
+ [
66
+ 1,
67
+ 3,
68
+ 5
69
+ ]
70
+ ],
71
+ "upsample_rates": [
72
+ 8,
73
+ 8,
74
+ 2,
75
+ 2
76
+ ],
77
+ "upsample_initial_channel": 512,
78
+ "upsample_kernel_sizes": [
79
+ 16,
80
+ 16,
81
+ 4,
82
+ 4
83
+ ],
84
+ "n_layers_q": 3,
85
+ "use_spectral_norm": false
86
+ }
87
+ }
full_models/acd/vocab.txt ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ |
2
+ a
3
+ ɛ
4
+ n
5
+ ɔ
6
+ i
7
+ o
8
+ m
9
+ y
10
+ e
11
+ u
12
+ g
13
+ s
14
+ k
15
+ b
16
+ r
17
+ l
18
+ d
19
+ w
20
+ f
21
+ -
22
+ t
23
+ p
24
+ '
25
+ ŋ
26
+ h
27
+ c
28
+
full_models/ace/D_100000.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ecacad3f1f738085d053f35c5618c4abceaee39c28dbff2975e17fc918cea5c7
3
+ size 561078594
full_models/ace/G_100000.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:183895b264e9f91617ababbd088b5309ea79468730819506c8c8ab5e977085e6
3
+ size 436387528
full_models/ace/config.json ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "train": {
3
+ "log_interval": 200,
4
+ "eval_interval": 1000,
5
+ "seed": 1234,
6
+ "epochs": 20000,
7
+ "learning_rate": 0.0002,
8
+ "betas": [
9
+ 0.8,
10
+ 0.99
11
+ ],
12
+ "eps": 1e-09,
13
+ "batch_size": 64,
14
+ "fp16_run": true,
15
+ "lr_decay": 0.999875,
16
+ "segment_size": 8192,
17
+ "init_lr_ratio": 1,
18
+ "warmup_epochs": 0,
19
+ "c_mel": 45,
20
+ "c_kl": 1.0
21
+ },
22
+ "data": {
23
+ "training_files": "train.ltr",
24
+ "validation_files": "dev.ltr",
25
+ "text_cleaners": [
26
+ "transliteration_cleaners"
27
+ ],
28
+ "max_wav_value": 32768.0,
29
+ "sampling_rate": 16000,
30
+ "filter_length": 1024,
31
+ "hop_length": 256,
32
+ "win_length": 1024,
33
+ "n_mel_channels": 80,
34
+ "mel_fmin": 0.0,
35
+ "mel_fmax": null,
36
+ "add_blank": true,
37
+ "n_speakers": 0,
38
+ "cleaned_text": true
39
+ },
40
+ "model": {
41
+ "inter_channels": 192,
42
+ "hidden_channels": 192,
43
+ "filter_channels": 768,
44
+ "n_heads": 2,
45
+ "n_layers": 6,
46
+ "kernel_size": 3,
47
+ "p_dropout": 0.1,
48
+ "resblock": "1",
49
+ "resblock_kernel_sizes": [
50
+ 3,
51
+ 7,
52
+ 11
53
+ ],
54
+ "resblock_dilation_sizes": [
55
+ [
56
+ 1,
57
+ 3,
58
+ 5
59
+ ],
60
+ [
61
+ 1,
62
+ 3,
63
+ 5
64
+ ],
65
+ [
66
+ 1,
67
+ 3,
68
+ 5
69
+ ]
70
+ ],
71
+ "upsample_rates": [
72
+ 8,
73
+ 8,
74
+ 2,
75
+ 2
76
+ ],
77
+ "upsample_initial_channel": 512,
78
+ "upsample_kernel_sizes": [
79
+ 16,
80
+ 16,
81
+ 4,
82
+ 4
83
+ ],
84
+ "n_layers_q": 3,
85
+ "use_spectral_norm": false
86
+ }
87
+ }
full_models/ace/vocab.txt ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ |
2
+ a
3
+ n
4
+ e
5
+ u
6
+ g
7
+ t
8
+ h
9
+ i
10
+ k
11
+ m
12
+ b
13
+ o
14
+ y
15
+ l
16
+ r
17
+ s
18
+ p
19
+ j
20
+ d
21
+ é
22
+ w
23
+ ô
24
+ ë
25
+ -
26
+ c
27
+ ö
28
+ á
29
+ ó
30
+ f
31
+ z
32
+ '
33
+ q
34
+ ú
35
+ `
36
+ 0
37
+ 6
38
+ 4
39
+ 3
40
+ 1
41
+ 2
42
+
full_models/acf/D_100000.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:262ec2da7c0e7ded6eaa1cd84e37fe55127f23280d272d9453591df621511b21
3
+ size 561078869
full_models/acf/G_100000.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:91f23d7c73bfdf05bec98d6fc71dffbf19409a9afb463b6974abccdaee53c215
3
+ size 436369451
full_models/acf/config.json ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "train": {
3
+ "log_interval": 200,
4
+ "eval_interval": 1000,
5
+ "seed": 1234,
6
+ "epochs": 20000,
7
+ "learning_rate": 0.0002,
8
+ "betas": [
9
+ 0.8,
10
+ 0.99
11
+ ],
12
+ "eps": 1e-09,
13
+ "batch_size": 64,
14
+ "fp16_run": true,
15
+ "lr_decay": 0.999875,
16
+ "segment_size": 8192,
17
+ "init_lr_ratio": 1,
18
+ "warmup_epochs": 0,
19
+ "c_mel": 45,
20
+ "c_kl": 1.0
21
+ },
22
+ "data": {
23
+ "training_files": "train.ltr",
24
+ "validation_files": "dev.ltr",
25
+ "text_cleaners": [
26
+ "transliteration_cleaners"
27
+ ],
28
+ "max_wav_value": 32768.0,
29
+ "sampling_rate": 16000,
30
+ "filter_length": 1024,
31
+ "hop_length": 256,
32
+ "win_length": 1024,
33
+ "n_mel_channels": 80,
34
+ "mel_fmin": 0.0,
35
+ "mel_fmax": null,
36
+ "add_blank": true,
37
+ "n_speakers": 0,
38
+ "cleaned_text": true
39
+ },
40
+ "model": {
41
+ "inter_channels": 192,
42
+ "hidden_channels": 192,
43
+ "filter_channels": 768,
44
+ "n_heads": 2,
45
+ "n_layers": 6,
46
+ "kernel_size": 3,
47
+ "p_dropout": 0.1,
48
+ "resblock": "1",
49
+ "resblock_kernel_sizes": [
50
+ 3,
51
+ 7,
52
+ 11
53
+ ],
54
+ "resblock_dilation_sizes": [
55
+ [
56
+ 1,
57
+ 3,
58
+ 5
59
+ ],
60
+ [
61
+ 1,
62
+ 3,
63
+ 5
64
+ ],
65
+ [
66
+ 1,
67
+ 3,
68
+ 5
69
+ ]
70
+ ],
71
+ "upsample_rates": [
72
+ 8,
73
+ 8,
74
+ 2,
75
+ 2
76
+ ],
77
+ "upsample_initial_channel": 512,
78
+ "upsample_kernel_sizes": [
79
+ 16,
80
+ 16,
81
+ 4,
82
+ 4
83
+ ],
84
+ "n_layers_q": 3,
85
+ "use_spectral_norm": false
86
+ }
87
+ }
full_models/acf/vocab.txt ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ |
2
+ a
3
+ n
4
+ é
5
+ i
6
+ o
7
+ s
8
+ t
9
+ k
10
+ y
11
+ p
12
+ l
13
+ w
14
+ m
15
+ è
16
+ u
17
+ d
18
+ -
19
+ e
20
+ b
21
+ v
22
+ j
23
+ ò
24
+ z
25
+ f
26
+ ʼ
27
+ h
28
+ g
29
+ c
30
+ r
31
+
32
+ '
33
+
full_models/ach/D_100000.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d772464b61e01ae7415ad3b4cae63851b5fd89c15b6157b311446e3f3ea7460c
3
+ size 561078618
full_models/ach/G_100000.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:46031da7d624512421e115dcfadede182580c6c3139d2ea2b7c20b4fabee7e1d
3
+ size 436355251
full_models/ach/config.json ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "train": {
3
+ "log_interval": 200,
4
+ "eval_interval": 1000,
5
+ "seed": 1234,
6
+ "epochs": 20000,
7
+ "learning_rate": 0.0002,
8
+ "betas": [
9
+ 0.8,
10
+ 0.99
11
+ ],
12
+ "eps": 1e-09,
13
+ "batch_size": 64,
14
+ "fp16_run": true,
15
+ "lr_decay": 0.999875,
16
+ "segment_size": 8192,
17
+ "init_lr_ratio": 1,
18
+ "warmup_epochs": 0,
19
+ "c_mel": 45,
20
+ "c_kl": 1.0
21
+ },
22
+ "data": {
23
+ "training_files": "train.ltr",
24
+ "validation_files": "dev.ltr",
25
+ "text_cleaners": [
26
+ "transliteration_cleaners"
27
+ ],
28
+ "max_wav_value": 32768.0,
29
+ "sampling_rate": 16000,
30
+ "filter_length": 1024,
31
+ "hop_length": 256,
32
+ "win_length": 1024,
33
+ "n_mel_channels": 80,
34
+ "mel_fmin": 0.0,
35
+ "mel_fmax": null,
36
+ "add_blank": true,
37
+ "n_speakers": 0,
38
+ "cleaned_text": true
39
+ },
40
+ "model": {
41
+ "inter_channels": 192,
42
+ "hidden_channels": 192,
43
+ "filter_channels": 768,
44
+ "n_heads": 2,
45
+ "n_layers": 6,
46
+ "kernel_size": 3,
47
+ "p_dropout": 0.1,
48
+ "resblock": "1",
49
+ "resblock_kernel_sizes": [
50
+ 3,
51
+ 7,
52
+ 11
53
+ ],
54
+ "resblock_dilation_sizes": [
55
+ [
56
+ 1,
57
+ 3,
58
+ 5
59
+ ],
60
+ [
61
+ 1,
62
+ 3,
63
+ 5
64
+ ],
65
+ [
66
+ 1,
67
+ 3,
68
+ 5
69
+ ]
70
+ ],
71
+ "upsample_rates": [
72
+ 8,
73
+ 8,
74
+ 2,
75
+ 2
76
+ ],
77
+ "upsample_initial_channel": 512,
78
+ "upsample_kernel_sizes": [
79
+ 16,
80
+ 16,
81
+ 4,
82
+ 4
83
+ ],
84
+ "n_layers_q": 3,
85
+ "use_spectral_norm": false
86
+ }
87
+ }
full_models/ach/vocab.txt ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ |
2
+ o
3
+ a
4
+ i
5
+ e
6
+ k
7
+ n
8
+ m
9
+ w
10
+ t
11
+ u
12
+ y
13
+ l
14
+ c
15
+ d
16
+ b
17
+ g
18
+ r
19
+ p
20
+ ŋ
21
+ j
22
+ -
23
+ s
24
+ '
25
+ v
26
+ f
27
+ h
28
+
full_models/acn/D_100000.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6c7c7622cba3d3aa313a67157d78898fcffb20dc0855d9c9ac93e544a79b11f3
3
+ size 561098185
full_models/acn/G_100000.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f608e1298f921bba8ebfa9e1eadb3c599aff4a0eb64614fb32a3d372340a9b4b
3
+ size 436533441
full_models/acn/config.json ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "train": {
3
+ "log_interval": 200,
4
+ "eval_interval": 1000,
5
+ "seed": 1234,
6
+ "epochs": 20000,
7
+ "learning_rate": 0.0002,
8
+ "betas": [
9
+ 0.8,
10
+ 0.99
11
+ ],
12
+ "eps": 1e-09,
13
+ "batch_size": 64,
14
+ "fp16_run": true,
15
+ "lr_decay": 0.999875,
16
+ "segment_size": 8192,
17
+ "init_lr_ratio": 1,
18
+ "warmup_epochs": 0,
19
+ "c_mel": 45,
20
+ "c_kl": 1.0
21
+ },
22
+ "data": {
23
+ "training_files": "train.ltr",
24
+ "validation_files": "dev.ltr",
25
+ "text_cleaners": [
26
+ "transliteration_cleaners"
27
+ ],
28
+ "max_wav_value": 32768.0,
29
+ "sampling_rate": 16000,
30
+ "filter_length": 1024,
31
+ "hop_length": 256,
32
+ "win_length": 1024,
33
+ "n_mel_channels": 80,
34
+ "mel_fmin": 0.0,
35
+ "mel_fmax": null,
36
+ "add_blank": true,
37
+ "n_speakers": 0,
38
+ "cleaned_text": true
39
+ },
40
+ "model": {
41
+ "inter_channels": 192,
42
+ "hidden_channels": 192,
43
+ "filter_channels": 768,
44
+ "n_heads": 2,
45
+ "n_layers": 6,
46
+ "kernel_size": 3,
47
+ "p_dropout": 0.1,
48
+ "resblock": "1",
49
+ "resblock_kernel_sizes": [
50
+ 3,
51
+ 7,
52
+ 11
53
+ ],
54
+ "resblock_dilation_sizes": [
55
+ [
56
+ 1,
57
+ 3,
58
+ 5
59
+ ],
60
+ [
61
+ 1,
62
+ 3,
63
+ 5
64
+ ],
65
+ [
66
+ 1,
67
+ 3,
68
+ 5
69
+ ]
70
+ ],
71
+ "upsample_rates": [
72
+ 8,
73
+ 8,
74
+ 2,
75
+ 2
76
+ ],
77
+ "upsample_initial_channel": 512,
78
+ "upsample_kernel_sizes": [
79
+ 16,
80
+ 16,
81
+ 4,
82
+ 4
83
+ ],
84
+ "n_layers_q": 3,
85
+ "use_spectral_norm": false
86
+ }
87
+ }
full_models/acn/vocab.txt ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ u
2
+ k
3
+ g
4
+ a
5
+ _
6
+
7
+ d
8
+ w
9
+ i
10
+ o
11
+ -
12
+ b
13
+ e
14
+ n
15
+ t
16
+ y
17
+ p
18
+ s
19
+ z
20
+ x
21
+ m
22
+ h
23
+ c
24
+
25
+ l
26
+ 0
27
+ 2
28
+ j
29
+ f
30
+ 3
31
+ 5
32
+ q
33
+ v
34
+ r
35
+ 6
36
+ 1
37
+ 4
full_models/acr/D_100000.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b2e790e6b73ad3a311ec0bd311e50a33846f38b8e63f074650687a6f588ea7df
3
+ size 561078709
full_models/acr/G_100000.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:892faf58e703daf31d96bd7b9913d2b0eda8f750305282ba760a656280a5437f
3
+ size 436375881
full_models/acr/config.json ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "train": {
3
+ "log_interval": 200,
4
+ "eval_interval": 1000,
5
+ "seed": 1234,
6
+ "epochs": 20000,
7
+ "learning_rate": 0.0002,
8
+ "betas": [
9
+ 0.8,
10
+ 0.99
11
+ ],
12
+ "eps": 1e-09,
13
+ "batch_size": 64,
14
+ "fp16_run": true,
15
+ "lr_decay": 0.999875,
16
+ "segment_size": 8192,
17
+ "init_lr_ratio": 1,
18
+ "warmup_epochs": 0,
19
+ "c_mel": 45,
20
+ "c_kl": 1.0
21
+ },
22
+ "data": {
23
+ "training_files": "train.ltr",
24
+ "validation_files": "dev.ltr",
25
+ "text_cleaners": [
26
+ "transliteration_cleaners"
27
+ ],
28
+ "max_wav_value": 32768.0,
29
+ "sampling_rate": 16000,
30
+ "filter_length": 1024,
31
+ "hop_length": 256,
32
+ "win_length": 1024,
33
+ "n_mel_channels": 80,
34
+ "mel_fmin": 0.0,
35
+ "mel_fmax": null,
36
+ "add_blank": true,
37
+ "n_speakers": 0,
38
+ "cleaned_text": true
39
+ },
40
+ "model": {
41
+ "inter_channels": 192,
42
+ "hidden_channels": 192,
43
+ "filter_channels": 768,
44
+ "n_heads": 2,
45
+ "n_layers": 6,
46
+ "kernel_size": 3,
47
+ "p_dropout": 0.1,
48
+ "resblock": "1",
49
+ "resblock_kernel_sizes": [
50
+ 3,
51
+ 7,
52
+ 11
53
+ ],
54
+ "resblock_dilation_sizes": [
55
+ [
56
+ 1,
57
+ 3,
58
+ 5
59
+ ],
60
+ [
61
+ 1,
62
+ 3,
63
+ 5
64
+ ],
65
+ [
66
+ 1,
67
+ 3,
68
+ 5
69
+ ]
70
+ ],
71
+ "upsample_rates": [
72
+ 8,
73
+ 8,
74
+ 2,
75
+ 2
76
+ ],
77
+ "upsample_initial_channel": 512,
78
+ "upsample_kernel_sizes": [
79
+ 16,
80
+ 16,
81
+ 4,
82
+ 4
83
+ ],
84
+ "n_layers_q": 3,
85
+ "use_spectral_norm": false
86
+ }
87
+ }
full_models/acr/vocab.txt ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ |
2
+ a
3
+ i
4
+ '
5
+ k
6
+ r
7
+ u
8
+ e
9
+ j
10
+ n
11
+ o
12
+ c
13
+ l
14
+ h
15
+ t
16
+ q
17
+ w
18
+ x
19
+ m
20
+ b
21
+ s
22
+ y
23
+ p
24
+ z
25
+ d
26
+
27
+ ú
28
+ g
29
+ á
30
+ é
31
+ ó
32
+ f
33
+ í
34
+ v
35
+ -
36
+ ñ
37
+
full_models/acu/D_100000.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3e8f2db7fee9018cff7d22ff7ede80bfcfb408c2cb3a38a3ccf32f1594865969
3
+ size 561078587
full_models/acu/G_100000.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5952ecca5322b2410a5056b1852650d2fb9289f3cebce5e9b95a7ee76eabc621
3
+ size 436371382
full_models/acu/config.json ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "train": {
3
+ "log_interval": 200,
4
+ "eval_interval": 1000,
5
+ "seed": 1234,
6
+ "epochs": 20000,
7
+ "learning_rate": 0.0002,
8
+ "betas": [
9
+ 0.8,
10
+ 0.99
11
+ ],
12
+ "eps": 1e-09,
13
+ "batch_size": 64,
14
+ "fp16_run": true,
15
+ "lr_decay": 0.999875,
16
+ "segment_size": 8192,
17
+ "init_lr_ratio": 1,
18
+ "warmup_epochs": 0,
19
+ "c_mel": 45,
20
+ "c_kl": 1.0
21
+ },
22
+ "data": {
23
+ "training_files": "train.ltr",
24
+ "validation_files": "dev.ltr",
25
+ "text_cleaners": [
26
+ "transliteration_cleaners"
27
+ ],
28
+ "max_wav_value": 32768.0,
29
+ "sampling_rate": 16000,
30
+ "filter_length": 1024,
31
+ "hop_length": 256,
32
+ "win_length": 1024,
33
+ "n_mel_channels": 80,
34
+ "mel_fmin": 0.0,
35
+ "mel_fmax": null,
36
+ "add_blank": true,
37
+ "n_speakers": 0,
38
+ "cleaned_text": true
39
+ },
40
+ "model": {
41
+ "inter_channels": 192,
42
+ "hidden_channels": 192,
43
+ "filter_channels": 768,
44
+ "n_heads": 2,
45
+ "n_layers": 6,
46
+ "kernel_size": 3,
47
+ "p_dropout": 0.1,
48
+ "resblock": "1",
49
+ "resblock_kernel_sizes": [
50
+ 3,
51
+ 7,
52
+ 11
53
+ ],
54
+ "resblock_dilation_sizes": [
55
+ [
56
+ 1,
57
+ 3,
58
+ 5
59
+ ],
60
+ [
61
+ 1,
62
+ 3,
63
+ 5
64
+ ],
65
+ [
66
+ 1,
67
+ 3,
68
+ 5
69
+ ]
70
+ ],
71
+ "upsample_rates": [
72
+ 8,
73
+ 8,
74
+ 2,
75
+ 2
76
+ ],
77
+ "upsample_initial_channel": 512,
78
+ "upsample_kernel_sizes": [
79
+ 16,
80
+ 16,
81
+ 4,
82
+ 4
83
+ ],
84
+ "n_layers_q": 3,
85
+ "use_spectral_norm": false
86
+ }
87
+ }
full_models/acu/vocab.txt ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ a
2
+ |
3
+ i
4
+ n
5
+ u
6
+ t
7
+ r
8
+ m
9
+ k
10
+ s
11
+ e
12
+ h
13
+ j
14
+ c
15
+ w
16
+ y
17
+ p
18
+ g
19
+ o
20
+ í
21
+
22
+ ú
23
+ d
24
+ l
25
+ é
26
+ á
27
+ b
28
+ f
29
+ v
30
+ ó
31
+ z
32
+ q
33
+ x
34
+ ñ
35
+
full_models/ade/D_100000.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:41191e5d74955fec278ea692b2218c7920de8f16147aba556fdfce56b714f4c0
3
+ size 561078757
full_models/ade/G_100000.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aed6df678ce5a605da1adef5aa7016ec89dd3b209ffebad3156843473d688668
3
+ size 436384590
full_models/ade/config.json ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "train": {
3
+ "log_interval": 200,
4
+ "eval_interval": 1000,
5
+ "seed": 1234,
6
+ "epochs": 20000,
7
+ "learning_rate": 0.0002,
8
+ "betas": [
9
+ 0.8,
10
+ 0.99
11
+ ],
12
+ "eps": 1e-09,
13
+ "batch_size": 64,
14
+ "fp16_run": true,
15
+ "lr_decay": 0.999875,
16
+ "segment_size": 8192,
17
+ "init_lr_ratio": 1,
18
+ "warmup_epochs": 0,
19
+ "c_mel": 45,
20
+ "c_kl": 1.0
21
+ },
22
+ "data": {
23
+ "training_files": "train.ltr",
24
+ "validation_files": "dev.ltr",
25
+ "text_cleaners": [
26
+ "transliteration_cleaners"
27
+ ],
28
+ "max_wav_value": 32768.0,
29
+ "sampling_rate": 16000,
30
+ "filter_length": 1024,
31
+ "hop_length": 256,
32
+ "win_length": 1024,
33
+ "n_mel_channels": 80,
34
+ "mel_fmin": 0.0,
35
+ "mel_fmax": null,
36
+ "add_blank": true,
37
+ "n_speakers": 0,
38
+ "cleaned_text": true
39
+ },
40
+ "model": {
41
+ "inter_channels": 192,
42
+ "hidden_channels": 192,
43
+ "filter_channels": 768,
44
+ "n_heads": 2,
45
+ "n_layers": 6,
46
+ "kernel_size": 3,
47
+ "p_dropout": 0.1,
48
+ "resblock": "1",
49
+ "resblock_kernel_sizes": [
50
+ 3,
51
+ 7,
52
+ 11
53
+ ],
54
+ "resblock_dilation_sizes": [
55
+ [
56
+ 1,
57
+ 3,
58
+ 5
59
+ ],
60
+ [
61
+ 1,
62
+ 3,
63
+ 5
64
+ ],
65
+ [
66
+ 1,
67
+ 3,
68
+ 5
69
+ ]
70
+ ],
71
+ "upsample_rates": [
72
+ 8,
73
+ 8,
74
+ 2,
75
+ 2
76
+ ],
77
+ "upsample_initial_channel": 512,
78
+ "upsample_kernel_sizes": [
79
+ 16,
80
+ 16,
81
+ 4,
82
+ 4
83
+ ],
84
+ "n_layers_q": 3,
85
+ "use_spectral_norm": false
86
+ }
87
+ }
full_models/ade/vocab.txt ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ |
2
+ a
3
+ â
4
+ n
5
+ e
6
+ b
7
+ g
8
+ æ
9
+ i
10
+ t
11
+ w
12
+ ô
13
+ y
14
+ k
15
+ o
16
+ r
17
+ l
18
+ u
19
+ d
20
+ m
21
+ f
22
+ s
23
+ û
24
+ p
25
+ à
26
+ -
27
+ è
28
+ ã
29
+ õ
30
+ ù
31
+ î
32
+ å
33
+ ì
34
+ ü
35
+ ǹ
36
+ ò
37
+ h
38
+ '
39
+ c
40
+
full_models/adh/D_100000.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8f466d6d8e81596e107e5e3ebb2f5fea640411555a9ae7a6b719bc80f0d11f42
3
+ size 561078757
full_models/adh/G_100000.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:db6a6d6796cd6997ca30c352c12f25d36db4d80ac374e42318aeba8a2ce1905a
3
+ size 436360329
full_models/adh/config.json ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "train": {
3
+ "log_interval": 200,
4
+ "eval_interval": 1000,
5
+ "seed": 1234,
6
+ "epochs": 20000,
7
+ "learning_rate": 0.0002,
8
+ "betas": [
9
+ 0.8,
10
+ 0.99
11
+ ],
12
+ "eps": 1e-09,
13
+ "batch_size": 64,
14
+ "fp16_run": true,
15
+ "lr_decay": 0.999875,
16
+ "segment_size": 8192,
17
+ "init_lr_ratio": 1,
18
+ "warmup_epochs": 0,
19
+ "c_mel": 45,
20
+ "c_kl": 1.0
21
+ },
22
+ "data": {
23
+ "training_files": "train.ltr",
24
+ "validation_files": "dev.ltr",
25
+ "text_cleaners": [
26
+ "transliteration_cleaners"
27
+ ],
28
+ "max_wav_value": 32768.0,
29
+ "sampling_rate": 16000,
30
+ "filter_length": 1024,
31
+ "hop_length": 256,
32
+ "win_length": 1024,
33
+ "n_mel_channels": 80,
34
+ "mel_fmin": 0.0,
35
+ "mel_fmax": null,
36
+ "add_blank": true,
37
+ "n_speakers": 0,
38
+ "cleaned_text": true
39
+ },
40
+ "model": {
41
+ "inter_channels": 192,
42
+ "hidden_channels": 192,
43
+ "filter_channels": 768,
44
+ "n_heads": 2,
45
+ "n_layers": 6,
46
+ "kernel_size": 3,
47
+ "p_dropout": 0.1,
48
+ "resblock": "1",
49
+ "resblock_kernel_sizes": [
50
+ 3,
51
+ 7,
52
+ 11
53
+ ],
54
+ "resblock_dilation_sizes": [
55
+ [
56
+ 1,
57
+ 3,
58
+ 5
59
+ ],
60
+ [
61
+ 1,
62
+ 3,
63
+ 5
64
+ ],
65
+ [
66
+ 1,
67
+ 3,
68
+ 5
69
+ ]
70
+ ],
71
+ "upsample_rates": [
72
+ 8,
73
+ 8,
74
+ 2,
75
+ 2
76
+ ],
77
+ "upsample_initial_channel": 512,
78
+ "upsample_kernel_sizes": [
79
+ 16,
80
+ 16,
81
+ 4,
82
+ 4
83
+ ],
84
+ "n_layers_q": 3,
85
+ "use_spectral_norm": false
86
+ }
87
+ }
full_models/adh/vocab.txt ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ |
2
+ o
3
+ i
4
+ a
5
+ e
6
+ n
7
+ k
8
+ m
9
+ w
10
+ r
11
+ y
12
+ h
13
+ t
14
+ d
15
+ j
16
+ g
17
+ u
18
+ l
19
+ p
20
+ c
21
+ b
22
+ ŋ
23
+ s
24
+ f
25
+ '
26
+ z
27
+ v
28
+ -
29
+