Sad44587 commited on
Commit
29becaf
·
verified ·
1 Parent(s): c208acd

Delete model.py

Browse files
Files changed (1) hide show
  1. model.py +0 -1050
model.py DELETED
@@ -1,1050 +0,0 @@
1
- import os
2
- from functools import lru_cache
3
- from pathlib import Path
4
-
5
- import sherpa_onnx
6
- from huggingface_hub import hf_hub_download
7
-
8
-
9
- def get_file(
10
- repo_id: str,
11
- filename: str,
12
- subfolder: str = ".",
13
- ) -> str:
14
- model_filename = hf_hub_download(
15
- repo_id=repo_id,
16
- filename=filename,
17
- subfolder=subfolder,
18
- )
19
- return model_filename
20
-
21
-
22
- @lru_cache(maxsize=10)
23
- def _get_vits_vctk(repo_id: str, speed: float) -> sherpa_onnx.OfflineTts:
24
- assert repo_id == "csukuangfj/vits-vctk"
25
-
26
- model = get_file(
27
- repo_id=repo_id,
28
- filename="vits-vctk.onnx",
29
- subfolder=".",
30
- )
31
-
32
- lexicon = get_file(
33
- repo_id=repo_id,
34
- filename="lexicon.txt",
35
- subfolder=".",
36
- )
37
-
38
- tokens = get_file(
39
- repo_id=repo_id,
40
- filename="tokens.txt",
41
- subfolder=".",
42
- )
43
-
44
- tts_config = sherpa_onnx.OfflineTtsConfig(
45
- model=sherpa_onnx.OfflineTtsModelConfig(
46
- vits=sherpa_onnx.OfflineTtsVitsModelConfig(
47
- model=model,
48
- lexicon=lexicon,
49
- tokens=tokens,
50
- length_scale=1.0 / speed,
51
- ),
52
- matcha=sherpa_onnx.OfflineTtsMatchaModelConfig(),
53
- provider="cpu",
54
- debug=True,
55
- num_threads=2,
56
- ),
57
- max_num_sentences=1,
58
- )
59
- tts = sherpa_onnx.OfflineTts(tts_config)
60
-
61
- return tts
62
-
63
-
64
- @lru_cache(maxsize=10)
65
- def _get_vits_ljs(repo_id: str, speed: float) -> sherpa_onnx.OfflineTts:
66
- assert repo_id == "csukuangfj/vits-ljs"
67
-
68
- model = get_file(
69
- repo_id=repo_id,
70
- filename="vits-ljs.onnx",
71
- subfolder=".",
72
- )
73
-
74
- lexicon = get_file(
75
- repo_id=repo_id,
76
- filename="lexicon.txt",
77
- subfolder=".",
78
- )
79
-
80
- tokens = get_file(
81
- repo_id=repo_id,
82
- filename="tokens.txt",
83
- subfolder=".",
84
- )
85
-
86
- tts_config = sherpa_onnx.OfflineTtsConfig(
87
- model=sherpa_onnx.OfflineTtsModelConfig(
88
- vits=sherpa_onnx.OfflineTtsVitsModelConfig(
89
- model=model,
90
- lexicon=lexicon,
91
- tokens=tokens,
92
- length_scale=1.0 / speed,
93
- ),
94
- matcha=sherpa_onnx.OfflineTtsMatchaModelConfig(),
95
- provider="cpu",
96
- debug=True,
97
- num_threads=2,
98
- ),
99
- max_num_sentences=1,
100
- )
101
- tts = sherpa_onnx.OfflineTts(tts_config)
102
-
103
- return tts
104
-
105
-
106
- @lru_cache(maxsize=10)
107
- def _get_kokoro(repo_id: str, speed: float) -> sherpa_onnx.OfflineTts:
108
- data_dir = "/tmp/espeak-ng-data"
109
- repo_id = repo_id.split("|")[0]
110
- assert repo_id in (
111
- "csukuangfj/kokoro-en-v0_19",
112
- "csukuangfj/kokoro-multi-lang-v1_0",
113
- "csukuangfj/kokoro-multi-lang-v1_1",
114
- ), repo_id
115
-
116
- model = get_file(
117
- repo_id=repo_id,
118
- filename="model.onnx",
119
- subfolder=".",
120
- )
121
-
122
- tokens = get_file(
123
- repo_id=repo_id,
124
- filename="tokens.txt",
125
- subfolder=".",
126
- )
127
-
128
- voices = get_file(
129
- repo_id=repo_id,
130
- filename="voices.bin",
131
- subfolder=".",
132
- )
133
- if repo_id in (
134
- "csukuangfj/kokoro-multi-lang-v1_0",
135
- "csukuangfj/kokoro-multi-lang-v1_1",
136
- ):
137
- lexicon_en = get_file(
138
- repo_id=repo_id,
139
- filename="lexicon-us-en.txt",
140
- subfolder=".",
141
- )
142
- lexicon_zh = get_file(
143
- repo_id=repo_id,
144
- filename="lexicon-zh.txt",
145
- subfolder=".",
146
- )
147
- lexicon = f"{lexicon_en},{lexicon_zh}"
148
-
149
- date_zh = get_file(
150
- repo_id=repo_id,
151
- filename="date-zh.fst",
152
- subfolder=".",
153
- )
154
-
155
- number_zh = get_file(
156
- repo_id=repo_id,
157
- filename="number-zh.fst",
158
- subfolder=".",
159
- )
160
- phone_zh = get_file(
161
- repo_id=repo_id,
162
- filename="phone-zh.fst",
163
- subfolder=".",
164
- )
165
- rule_fsts = f"{date_zh},{phone_zh},{number_zh}"
166
- dict_dir = "/tmp/dict"
167
- else:
168
- lexicon = ""
169
- rule_fsts = ""
170
- dict_dir = ""
171
-
172
- tts_config = sherpa_onnx.OfflineTtsConfig(
173
- model=sherpa_onnx.OfflineTtsModelConfig(
174
- kokoro=sherpa_onnx.OfflineTtsKokoroModelConfig(
175
- model=model,
176
- voices=voices,
177
- tokens=tokens,
178
- data_dir=data_dir,
179
- length_scale=1.0 / speed,
180
- lexicon=lexicon,
181
- dict_dir=dict_dir,
182
- ),
183
- provider="cpu",
184
- debug=True,
185
- num_threads=2,
186
- ),
187
- max_num_sentences=1,
188
- rule_fsts=rule_fsts,
189
- )
190
-
191
- tts = sherpa_onnx.OfflineTts(tts_config)
192
-
193
- return tts
194
-
195
-
196
- @lru_cache(maxsize=10)
197
- def _get_vits_piper(repo_id: str, speed: float) -> sherpa_onnx.OfflineTts:
198
- data_dir = "/tmp/espeak-ng-data"
199
- repo_id = repo_id.split("|")[0]
200
-
201
- if "coqui" in repo_id or "vits-mms" in repo_id:
202
- name = "model"
203
- elif "piper" in repo_id:
204
- n = len("vits-piper-")
205
- name = repo_id.split("/")[1][n:]
206
- elif "mimic3" in repo_id:
207
- n = len("vits-mimic3-")
208
- name = repo_id.split("/")[1][n:]
209
- else:
210
- raise ValueError(f"Unsupported {repo_id}")
211
-
212
- if "vits-coqui-uk-mai" in repo_id or "vits-mms" in repo_id:
213
- data_dir = ""
214
-
215
- model = get_file(
216
- repo_id=repo_id,
217
- filename=f"{name}.onnx",
218
- subfolder=".",
219
- )
220
-
221
- tokens = get_file(
222
- repo_id=repo_id,
223
- filename="tokens.txt",
224
- subfolder=".",
225
- )
226
-
227
- tts_config = sherpa_onnx.OfflineTtsConfig(
228
- model=sherpa_onnx.OfflineTtsModelConfig(
229
- vits=sherpa_onnx.OfflineTtsVitsModelConfig(
230
- model=model,
231
- lexicon="",
232
- data_dir=data_dir,
233
- tokens=tokens,
234
- length_scale=1.0 / speed,
235
- ),
236
- matcha=sherpa_onnx.OfflineTtsMatchaModelConfig(),
237
- provider="cpu",
238
- debug=True,
239
- num_threads=2,
240
- ),
241
- max_num_sentences=1,
242
- )
243
- tts = sherpa_onnx.OfflineTts(tts_config)
244
-
245
- return tts
246
-
247
-
248
- @lru_cache(maxsize=10)
249
- def _get_vits_mms(repo_id: str, speed: float) -> sherpa_onnx.OfflineTts:
250
- return _get_vits_piper(repo_id, speed)
251
-
252
-
253
- @lru_cache(maxsize=10)
254
- def _get_vits_zh_aishell3(repo_id: str, speed: float) -> sherpa_onnx.OfflineTts:
255
- assert repo_id == "csukuangfj/vits-zh-aishell3"
256
-
257
- model = get_file(
258
- repo_id=repo_id,
259
- filename="vits-aishell3.onnx",
260
- subfolder=".",
261
- )
262
-
263
- lexicon = get_file(
264
- repo_id=repo_id,
265
- filename="lexicon.txt",
266
- subfolder=".",
267
- )
268
-
269
- tokens = get_file(
270
- repo_id=repo_id,
271
- filename="tokens.txt",
272
- subfolder=".",
273
- )
274
-
275
- rule_fsts = ["phone.fst", "date.fst", "number.fst", "new_heteronym.fst"]
276
-
277
- rule_fsts = [
278
- get_file(
279
- repo_id=repo_id,
280
- filename=f,
281
- subfolder=".",
282
- )
283
- for f in rule_fsts
284
- ]
285
- rule_fsts = ",".join(rule_fsts)
286
-
287
- rule_fars = get_file(
288
- repo_id=repo_id,
289
- filename="rule.far",
290
- subfolder=".",
291
- )
292
-
293
- tts_config = sherpa_onnx.OfflineTtsConfig(
294
- model=sherpa_onnx.OfflineTtsModelConfig(
295
- vits=sherpa_onnx.OfflineTtsVitsModelConfig(
296
- model=model,
297
- lexicon=lexicon,
298
- tokens=tokens,
299
- length_scale=1.0 / speed,
300
- ),
301
- matcha=sherpa_onnx.OfflineTtsMatchaModelConfig(),
302
- provider="cpu",
303
- debug=True,
304
- num_threads=2,
305
- ),
306
- rule_fsts=rule_fsts,
307
- rule_fars=rule_fars,
308
- max_num_sentences=1,
309
- )
310
- tts = sherpa_onnx.OfflineTts(tts_config)
311
-
312
- return tts
313
-
314
-
315
- @lru_cache(maxsize=10)
316
- def _get_matcha_hf_espeak(repo_id: str, speed: float) -> sherpa_onnx.OfflineTts:
317
- repo_id = repo_id.split("|")[0]
318
- assert repo_id in (
319
- "csukuangfj/matcha-tts-fa_en-khadijah",
320
- "csukuangfj/matcha-tts-fa_en-musa",
321
- ), repo_id
322
-
323
- acoustic_model = get_file(
324
- repo_id=repo_id,
325
- filename="model.onnx",
326
- subfolder=".",
327
- )
328
-
329
- vocoder = get_file(
330
- repo_id="csukuangfj/sherpa-onnx-hifigan",
331
- filename="hifigan_v2.onnx",
332
- subfolder=".",
333
- )
334
-
335
- tokens = get_file(
336
- repo_id=repo_id,
337
- filename="tokens.txt",
338
- subfolder=".",
339
- )
340
-
341
- data_dir = "/tmp/espeak-ng-data"
342
- tts_config = sherpa_onnx.OfflineTtsConfig(
343
- model=sherpa_onnx.OfflineTtsModelConfig(
344
- vits=sherpa_onnx.OfflineTtsVitsModelConfig(),
345
- matcha=sherpa_onnx.OfflineTtsMatchaModelConfig(
346
- acoustic_model=acoustic_model,
347
- vocoder=vocoder,
348
- tokens=tokens,
349
- lexicon="",
350
- data_dir=data_dir,
351
- length_scale=1.0 / speed,
352
- ),
353
- provider="cpu",
354
- debug=True,
355
- num_threads=2,
356
- ),
357
- max_num_sentences=1,
358
- )
359
- tts = sherpa_onnx.OfflineTts(tts_config)
360
-
361
- return tts
362
-
363
-
364
- @lru_cache(maxsize=10)
365
- def _get_matcha_hf(repo_id: str, speed: float) -> sherpa_onnx.OfflineTts:
366
- repo_id = repo_id.split("|")[0]
367
- assert repo_id in ("csukuangfj/matcha-icefall-zh-baker",), repo_id
368
-
369
- if repo_id == "csukuangfj/matcha-icefall-zh-baker":
370
- acoustic_model = "model-steps-3.onnx"
371
-
372
- if not Path("/tmp/dict").is_dir():
373
- os.system(
374
- "cd /tmp; curl -SL -O https://github.com/csukuangfj/cppjieba/releases/download/sherpa-onnx-2024-04-19/dict.tar.bz2; tar xvf dict.tar.bz2"
375
- )
376
- os.system("ls -lh /tmp/dict")
377
-
378
- acoustic_model = get_file(
379
- repo_id=repo_id,
380
- filename=acoustic_model,
381
- subfolder=".",
382
- )
383
-
384
- vocoder = get_file(
385
- repo_id="csukuangfj/sherpa-onnx-hifigan",
386
- filename="hifigan_v2.onnx",
387
- subfolder=".",
388
- )
389
-
390
- lexicon = get_file(
391
- repo_id=repo_id,
392
- filename="lexicon.txt",
393
- subfolder=".",
394
- )
395
-
396
- tokens = get_file(
397
- repo_id=repo_id,
398
- filename="tokens.txt",
399
- subfolder=".",
400
- )
401
-
402
- rule_fars = ""
403
-
404
- rule_fsts = ["phone.fst", "date.fst", "number.fst"]
405
-
406
- rule_fsts = [
407
- get_file(
408
- repo_id=repo_id,
409
- filename=f,
410
- subfolder=".",
411
- )
412
- for f in rule_fsts
413
- ]
414
- rule_fsts = ",".join(rule_fsts)
415
-
416
- dict_dir = "/tmp/dict"
417
-
418
- tts_config = sherpa_onnx.OfflineTtsConfig(
419
- model=sherpa_onnx.OfflineTtsModelConfig(
420
- vits=sherpa_onnx.OfflineTtsVitsModelConfig(),
421
- matcha=sherpa_onnx.OfflineTtsMatchaModelConfig(
422
- acoustic_model=acoustic_model,
423
- vocoder=vocoder,
424
- lexicon=lexicon,
425
- tokens=tokens,
426
- dict_dir=dict_dir,
427
- length_scale=1.0 / speed,
428
- ),
429
- provider="cpu",
430
- debug=True,
431
- num_threads=2,
432
- ),
433
- rule_fsts=rule_fsts,
434
- rule_fars=rule_fars,
435
- max_num_sentences=1,
436
- )
437
- tts = sherpa_onnx.OfflineTts(tts_config)
438
-
439
- return tts
440
-
441
-
442
- @lru_cache(maxsize=10)
443
- def _get_vits_hf(repo_id: str, speed: float) -> sherpa_onnx.OfflineTts:
444
- repo_id = repo_id.split("|")[0]
445
-
446
- if "fanchen" in repo_id or "vits-cantonese-hf-xiaomaiiwn" in repo_id:
447
- model = repo_id.split("/")[-1]
448
- elif "csukuangfj/vits-melo-tts-zh_en" == repo_id:
449
- model = "model"
450
- else:
451
- model = repo_id.split("-")[-1]
452
-
453
- if "sherpa-onnx-vits-zh-ll" in repo_id:
454
- model = "model"
455
-
456
- if not Path("/tmp/dict").is_dir():
457
- os.system(
458
- "cd /tmp; curl -SL -O https://github.com/csukuangfj/cppjieba/releases/download/sherpa-onnx-2024-04-19/dict.tar.bz2; tar xvf dict.tar.bz2"
459
- )
460
- os.system("ls -lh /tmp/dict")
461
-
462
- model = get_file(
463
- repo_id=repo_id,
464
- filename=f"{model}.onnx",
465
- subfolder=".",
466
- )
467
-
468
- lexicon = get_file(
469
- repo_id=repo_id,
470
- filename="lexicon.txt",
471
- subfolder=".",
472
- )
473
-
474
- tokens = get_file(
475
- repo_id=repo_id,
476
- filename="tokens.txt",
477
- subfolder=".",
478
- )
479
-
480
- rule_fars = ""
481
-
482
- if "vits-cantonese-hf-xiaomaiiwn" not in repo_id:
483
- rule_fsts = ["phone.fst", "date.fst", "number.fst"]
484
-
485
- rule_fsts = [
486
- get_file(
487
- repo_id=repo_id,
488
- filename=f,
489
- subfolder=".",
490
- )
491
- for f in rule_fsts
492
- ]
493
- rule_fsts = ",".join(rule_fsts)
494
-
495
- # rule_fars = get_file(
496
- # repo_id=repo_id,
497
- # filename="rule.far",
498
- # subfolder=".",
499
- # )
500
- vits_dict_dir = "/tmp/dict"
501
- else:
502
- rule_fsts = get_file(
503
- repo_id=repo_id,
504
- filename="rule.fst",
505
- subfolder=".",
506
- )
507
- vits_dict_dir = ""
508
-
509
- tts_config = sherpa_onnx.OfflineTtsConfig(
510
- model=sherpa_onnx.OfflineTtsModelConfig(
511
- vits=sherpa_onnx.OfflineTtsVitsModelConfig(
512
- model=model,
513
- lexicon=lexicon,
514
- tokens=tokens,
515
- dict_dir=vits_dict_dir,
516
- length_scale=1.0 / speed,
517
- ),
518
- matcha=sherpa_onnx.OfflineTtsMatchaModelConfig(),
519
- provider="cpu",
520
- debug=True,
521
- num_threads=2,
522
- ),
523
- rule_fsts=rule_fsts,
524
- rule_fars=rule_fars,
525
- max_num_sentences=1,
526
- )
527
- tts = sherpa_onnx.OfflineTts(tts_config)
528
-
529
- return tts
530
-
531
-
532
- @lru_cache(maxsize=10)
533
- def get_pretrained_model(repo_id: str, speed: float) -> sherpa_onnx.OfflineTts:
534
- if repo_id in chinese_models:
535
- return chinese_models[repo_id](repo_id, speed)
536
- elif repo_id in chinese_english_models:
537
- return chinese_english_models[repo_id](repo_id, speed)
538
- elif repo_id in persian_english_models:
539
- return persian_english_models[repo_id](repo_id, speed)
540
- if repo_id in cantonese_models:
541
- return cantonese_models[repo_id](repo_id, speed)
542
- elif repo_id in english_models:
543
- return english_models[repo_id](repo_id, speed)
544
- elif repo_id in german_models:
545
- return german_models[repo_id](repo_id, speed)
546
- elif repo_id in spanish_models:
547
- return spanish_models[repo_id](repo_id, speed)
548
- elif repo_id in french_models:
549
- return french_models[repo_id](repo_id, speed)
550
- elif repo_id in ukrainian_models:
551
- return ukrainian_models[repo_id](repo_id, speed)
552
- elif repo_id in russian_models:
553
- return russian_models[repo_id](repo_id, speed)
554
- elif repo_id in arabic_models:
555
- return arabic_models[repo_id](repo_id, speed)
556
- elif repo_id in catalan_models:
557
- return catalan_models[repo_id](repo_id, speed)
558
- elif repo_id in czech_models:
559
- return czech_models[repo_id](repo_id, speed)
560
- elif repo_id in danish_models:
561
- return danish_models[repo_id](repo_id, speed)
562
- elif repo_id in greek_models:
563
- return greek_models[repo_id](repo_id, speed)
564
- elif repo_id in finnish_models:
565
- return finnish_models[repo_id](repo_id, speed)
566
- elif repo_id in hungarian_models:
567
- return hungarian_models[repo_id](repo_id, speed)
568
- elif repo_id in icelandic_models:
569
- return icelandic_models[repo_id](repo_id, speed)
570
- elif repo_id in italian_models:
571
- return italian_models[repo_id](repo_id, speed)
572
- elif repo_id in georgian_models:
573
- return georgian_models[repo_id](repo_id, speed)
574
- elif repo_id in kazakh_models:
575
- return kazakh_models[repo_id](repo_id, speed)
576
- elif repo_id in luxembourgish_models:
577
- return luxembourgish_models[repo_id](repo_id, speed)
578
- elif repo_id in nepali_models:
579
- return nepali_models[repo_id](repo_id, speed)
580
- elif repo_id in dutch_models:
581
- return dutch_models[repo_id](repo_id, speed)
582
- elif repo_id in norwegian_models:
583
- return norwegian_models[repo_id](repo_id, speed)
584
- elif repo_id in polish_models:
585
- return polish_models[repo_id](repo_id, speed)
586
- elif repo_id in portuguese_models:
587
- return portuguese_models[repo_id](repo_id, speed)
588
- elif repo_id in romanian_models:
589
- return romanian_models[repo_id](repo_id, speed)
590
- elif repo_id in slovak_models:
591
- return slovak_models[repo_id](repo_id, speed)
592
- elif repo_id in serbian_models:
593
- return serbian_models[repo_id](repo_id, speed)
594
- elif repo_id in swedish_models:
595
- return swedish_models[repo_id](repo_id, speed)
596
- elif repo_id in swahili_models:
597
- return swahili_models[repo_id](repo_id, speed)
598
- elif repo_id in turkish_models:
599
- return turkish_models[repo_id](repo_id, speed)
600
- elif repo_id in vietnamese_models:
601
- return vietnamese_models[repo_id](repo_id, speed)
602
- elif repo_id in bulgarian_models:
603
- return bulgarian_models[repo_id](repo_id, speed)
604
- elif repo_id in estonian_models:
605
- return estonian_models[repo_id](repo_id, speed)
606
- elif repo_id in irish_models:
607
- return irish_models[repo_id](repo_id, speed)
608
- elif repo_id in croatian_models:
609
- return croatian_models[repo_id](repo_id, speed)
610
- elif repo_id in lithuanian_models:
611
- return lithuanian_models[repo_id](repo_id, speed)
612
- elif repo_id in latvian_models:
613
- return latvian_models[repo_id](repo_id, speed)
614
- elif repo_id in maltese_models:
615
- return maltese_models[repo_id](repo_id, speed)
616
- elif repo_id in slovenian_models:
617
- return slovenian_models[repo_id](repo_id, speed)
618
- elif repo_id in bengali_models:
619
- return bengali_models[repo_id](repo_id, speed)
620
- elif repo_id in min_nan_models:
621
- return min_nan_models[repo_id](repo_id, speed)
622
- elif repo_id in thai_models:
623
- return thai_models[repo_id](repo_id, speed)
624
- elif repo_id in persian_models:
625
- return persian_models[repo_id](repo_id, speed)
626
- elif repo_id in korean_models:
627
- return korean_models[repo_id](repo_id, speed)
628
- elif repo_id in afrikaans_models:
629
- return afrikaans_models[repo_id](repo_id, speed)
630
- elif repo_id in gujarati_models:
631
- return gujarati_models[repo_id](repo_id, speed)
632
- elif repo_id in tswana_models:
633
- return tswana_models[repo_id](repo_id, speed)
634
- elif repo_id in welsh_models:
635
- return welsh_models[repo_id](repo_id, speed)
636
- else:
637
- raise ValueError(f"Unsupported repo_id: {repo_id}")
638
-
639
-
640
- cantonese_models = {
641
- "csukuangfj/vits-cantonese-hf-xiaomaiiwn": _get_vits_hf,
642
- }
643
-
644
- chinese_english_models = {
645
- "csukuangfj/kokoro-multi-lang-v1_1|103 speakers": _get_kokoro,
646
- "csukuangfj/kokoro-multi-lang-v1_0|53 speakers": _get_kokoro,
647
- "csukuangfj/vits-melo-tts-zh_en|1": _get_vits_hf, # 1
648
- }
649
-
650
- persian_english_models = {
651
- "csukuangfj/matcha-tts-fa_en-khadijah|1 speaker": _get_matcha_hf_espeak, # 1
652
- "csukuangfj/matcha-tts-fa_en-musa|1 speaker": _get_matcha_hf_espeak, # 1
653
- "csukuangfj/vits-piper-fa_en-rezahedayatfar-ibrahimwalk-medium|1": _get_vits_piper, # 1
654
- }
655
-
656
- chinese_models = {
657
- "csukuangfj/matcha-icefall-zh-baker|1 speaker": _get_matcha_hf, # 1
658
- "csukuangfj/vits-zh-hf-fanchen-wnj|1 speaker": _get_vits_hf, # 1
659
- "csukuangfj/vits-zh-hf-fanchen-C|187 speakers": _get_vits_hf, # 187
660
- "csukuangfj/sherpa-onnx-vits-zh-ll|5 speakers": _get_vits_hf, # 804
661
- "csukuangfj/vits-zh-hf-keqing|804 speakers": _get_vits_hf, # 804
662
- "csukuangfj/vits-zh-hf-theresa|804 speakers": _get_vits_hf, # 804
663
- "csukuangfj/vits-zh-hf-eula|804 speakers": _get_vits_hf, # 804
664
- "csukuangfj/vits-zh-hf-echo|804 speakers": _get_vits_hf, # 804
665
- "csukuangfj/vits-zh-hf-bronya|804 speakers": _get_vits_hf, # 804
666
- "csukuangfj/vits-zh-hf-doom|804 speakers": _get_vits_hf, # 804
667
- "csukuangfj/vits-zh-hf-zenyatta|804 speakers": _get_vits_hf, # 804
668
- "csukuangfj/vits-zh-hf-abyssinvoker|804 speakers": _get_vits_hf, # 804
669
- "csukuangfj/vits-zh-hf-fanchen-ZhiHuiLaoZhe|1 speaker": _get_vits_hf, # 1
670
- "csukuangfj/vits-zh-hf-fanchen-ZhiHuiLaoZhe_new|1 speaker": _get_vits_hf, # 1
671
- "csukuangfj/vits-zh-hf-fanchen-unity|1 speaker": _get_vits_hf, # 1
672
- "csukuangfj/vits-zh-aishell3|174 speakers": _get_vits_zh_aishell3,
673
- "csukuangfj/vits-piper-zh_CN-huayan-medium|1 speaker": _get_vits_piper,
674
- # "csukuangfj/vits-piper-zh_CN-huayan-x_low": _get_vits_piper,
675
- }
676
-
677
- english_models = {
678
- "csukuangfj/kokoro-en-v0_19|11 speakers": _get_kokoro,
679
- "csukuangfj/vits-piper-en_US-glados|1 speaker": _get_vits_piper,
680
- "csukuangfj/vits-piper-en_GB-southern_english_male-medium|8 speakers": _get_vits_piper,
681
- "csukuangfj/vits-piper-en_GB-southern_english_female-medium|6 speakers": _get_vits_piper,
682
- "csukuangfj/vits-piper-en_US-bryce-medium|1 speaker": _get_vits_piper,
683
- "csukuangfj/vits-piper-en_US-john-medium|1 speaker": _get_vits_piper,
684
- "csukuangfj/vits-piper-en_US-norman-medium|1 speaker": _get_vits_piper,
685
- # coqui-ai
686
- "csukuangfj/vits-coqui-en-ljspeech|1 speaker": _get_vits_piper,
687
- "csukuangfj/vits-coqui-en-ljspeech-neon|1 speaker": _get_vits_piper,
688
- "csukuangfj/vits-coqui-en-vctk|109 speakers": _get_vits_piper,
689
- # piper, US
690
- "csukuangfj/vits-piper-en_GB-sweetbbak-amy|1 speaker": _get_vits_piper,
691
- "csukuangfj/vits-piper-en_US-amy-low|1 speaker": _get_vits_piper,
692
- "csukuangfj/vits-piper-en_US-amy-medium|1 speaker": _get_vits_piper,
693
- "csukuangfj/vits-piper-en_US-arctic-medium|18 speakers": _get_vits_piper, # 18 speakers
694
- "csukuangfj/vits-piper-en_US-danny-low|1 speaker": _get_vits_piper,
695
- "csukuangfj/vits-piper-en_US-hfc_male-medium|1 speaker": _get_vits_piper,
696
- "csukuangfj/vits-piper-en_US-hfc_female-medium|1 speaker": _get_vits_piper,
697
- "csukuangfj/vits-piper-en_US-joe-medium|1 speaker": _get_vits_piper,
698
- "csukuangfj/vits-piper-en_US-kathleen-low|1 speaker": _get_vits_piper,
699
- "csukuangfj/vits-piper-en_US-kusal-medium|1 speaker": _get_vits_piper,
700
- "csukuangfj/vits-piper-en_US-l2arctic-medium|24 speakers": _get_vits_piper, # 24 speakers
701
- "csukuangfj/vits-piper-en_US-lessac-high|1 speaker": _get_vits_piper,
702
- "csukuangfj/vits-piper-en_US-lessac-low|1 speaker": _get_vits_piper,
703
- "csukuangfj/vits-piper-en_US-lessac-medium|1 speaker": _get_vits_piper,
704
- "csukuangfj/vits-piper-en_US-libritts-high|904 speakers": _get_vits_piper, # 904 speakers
705
- "csukuangfj/vits-piper-en_US-libritts_r-medium|904 speakers": _get_vits_piper, # 904 speakers
706
- "csukuangfj/vits-piper-en_US-ljspeech-high|1 speaker": _get_vits_piper,
707
- "csukuangfj/vits-piper-en_US-ljspeech-medium|1 speaker": _get_vits_piper,
708
- "csukuangfj/vits-piper-en_US-ryan-high|1 speaker": _get_vits_piper,
709
- "csukuangfj/vits-piper-en_US-ryan-low|1 speaker": _get_vits_piper,
710
- "csukuangfj/vits-piper-en_US-ryan-medium|1 speaker": _get_vits_piper,
711
- # piper, GB
712
- "csukuangfj/vits-piper-en_GB-alan-low|1 speaker": _get_vits_piper,
713
- "csukuangfj/vits-piper-en_GB-alan-medium|1 speaker": _get_vits_piper,
714
- "csukuangfj/vits-piper-en_GB-alan-medium": _get_vits_piper,
715
- "csukuangfj/vits-piper-en_GB-cori-high|1 speaker": _get_vits_piper,
716
- "csukuangfj/vits-piper-en_GB-cori-medium|1 speaker": _get_vits_piper,
717
- "csukuangfj/vits-piper-en_GB-jenny_dioco-medium|1 speaker": _get_vits_piper,
718
- "csukuangfj/vits-piper-en_GB-northern_english_male-medium|1 speaker": _get_vits_piper,
719
- "csukuangfj/vits-piper-en_GB-semaine-medium|4 speakers": _get_vits_piper,
720
- "csukuangfj/vits-piper-en_GB-southern_english_female-low|1 speaker": _get_vits_piper,
721
- "csukuangfj/vits-piper-en_GB-vctk-medium|109 speakers": _get_vits_piper,
722
- #
723
- "csukuangfj/vits-vctk|109 speakers": _get_vits_vctk, # 109 speakers
724
- "csukuangfj/vits-ljs|1 speaker": _get_vits_ljs,
725
- }
726
-
727
- german_models = {
728
- "csukuangfj/vits-coqui-de-css10|1 speaker": _get_vits_piper,
729
- "csukuangfj/vits-piper-de_DE-eva_k-x_low|1 speaker": _get_vits_piper,
730
- "csukuangfj/vits-piper-de_DE-karlsson-low|1 speaker": _get_vits_piper,
731
- "csukuangfj/vits-piper-de_DE-kerstin-low|1 speaker": _get_vits_piper,
732
- # "csukuangfj/vits-piper-de_DE-mls-medium": _get_vits_piper,
733
- "csukuangfj/vits-piper-de_DE-pavoque-low|1 speaker": _get_vits_piper,
734
- "csukuangfj/vits-piper-de_DE-ramona-low|1 speaker": _get_vits_piper,
735
- "csukuangfj/vits-piper-de_DE-thorsten-low|1 speaker": _get_vits_piper,
736
- "csukuangfj/vits-piper-de_DE-thorsten-medium|1 speaker": _get_vits_piper,
737
- "csukuangfj/vits-piper-de_DE-thorsten-high|1 speaker": _get_vits_piper,
738
- "csukuangfj/vits-piper-de_DE-thorsten_emotional-medium|8 speakers": _get_vits_piper, # 8 speakers
739
- }
740
-
741
- spanish_models = {
742
- # "csukuangfj/vits-coqui-es-css10": _get_vits_piper,
743
- "csukuangfj/vits-piper-es-glados-medium": _get_vits_piper,
744
- "csukuangfj/vits-piper-es_ES-carlfm-x_low": _get_vits_piper,
745
- "csukuangfj/vits-piper-es_ES-davefx-medium": _get_vits_piper,
746
- # "csukuangfj/vits-piper-es_ES-mls_10246-low": _get_vits_piper,
747
- # "csukuangfj/vits-piper-es_ES-mls_9972-low": _get_vits_piper,
748
- "csukuangfj/vits-piper-es_ES-sharvard-medium": _get_vits_piper, # 2 speakers
749
- "csukuangfj/vits-piper-es_MX-ald-medium": _get_vits_piper,
750
- "csukuangfj/vits-piper-es_MX-claude-high": _get_vits_piper,
751
- "csukuangfj/vits-mimic3-es_ES-m-ailabs_low": _get_vits_piper,
752
- }
753
-
754
- french_models = {
755
- "csukuangfj/vits-coqui-fr-css10": _get_vits_piper,
756
- # "csukuangfj/vits-piper-fr_FR-gilles-low": _get_vits_piper,
757
- # "csukuangfj/vits-piper-fr_FR-mls_1840-low": _get_vits_piper,
758
- # "csukuangfj/vits-piper-fr_FR-mls-medium": _get_vits_piper, # 2 speakers, 0-femal, 1-male
759
- "csukuangfj/vits-piper-fr_FR-upmc-medium": _get_vits_piper, # 2 speakers, 0-femal, 1-male
760
- "csukuangfj/vits-piper-fr_FR-tom-medium|1 speaker": _get_vits_piper, # 2 speakers, 0-femal, 1-male
761
- "csukuangfj/vits-piper-fr_FR-siwis-low": _get_vits_piper, # female
762
- "csukuangfj/vits-piper-fr_FR-siwis-medium": _get_vits_piper,
763
- "csukuangfj/vits-piper-fr_FR-tjiho-model1": _get_vits_piper,
764
- "csukuangfj/vits-piper-fr_FR-tjiho-model2": _get_vits_piper,
765
- "csukuangfj/vits-piper-fr_FR-tjiho-model3": _get_vits_piper,
766
- }
767
-
768
- ukrainian_models = {
769
- "csukuangfj/vits-piper-uk_UA-lada-x_low": _get_vits_piper,
770
- "csukuangfj/vits-coqui-uk-mai": _get_vits_piper,
771
- # "csukuangfj/vits-piper-uk_UA-ukrainian_tts-medium": _get_vits_piper, # does not work somehow
772
- }
773
-
774
- russian_models = {
775
- "csukuangfj/vits-piper-ru_RU-denis-medium": _get_vits_piper,
776
- "csukuangfj/vits-piper-ru_RU-dmitri-medium": _get_vits_piper,
777
- "csukuangfj/vits-piper-ru_RU-irina-medium": _get_vits_piper,
778
- "csukuangfj/vits-piper-ru_RU-ruslan-medium": _get_vits_piper,
779
- }
780
-
781
- arabic_models = {
782
- "csukuangfj/vits-piper-ar_JO-kareem-low": _get_vits_piper,
783
- "csukuangfj/vits-piper-ar_JO-kareem-medium": _get_vits_piper,
784
- }
785
-
786
- catalan_models = {
787
- "csukuangfj/vits-piper-ca_ES-upc_ona-x_low": _get_vits_piper,
788
- "csukuangfj/vits-piper-ca_ES-upc_ona-medium": _get_vits_piper,
789
- "csukuangfj/vits-piper-ca_ES-upc_pau-x_low": _get_vits_piper,
790
- }
791
-
792
- czech_models = {
793
- "csukuangfj/vits-piper-cs_CZ-jirka-low": _get_vits_piper,
794
- "csukuangfj/vits-piper-cs_CZ-jirka-medium": _get_vits_piper,
795
- "csukuangfj/vits-coqui-cs-cv": _get_vits_piper,
796
- }
797
-
798
- danish_models = {
799
- "csukuangfj/vits-coqui-da-cv": _get_vits_piper,
800
- "csukuangfj/vits-piper-da_DK-talesyntese-medium": _get_vits_piper,
801
- }
802
-
803
- greek_models = {
804
- "csukuangfj/vits-piper-el_GR-rapunzelina-low": _get_vits_piper,
805
- # "csukuangfj/vits-mimic3-el_GR-rapunzelina_low": _get_vits_piper,
806
- }
807
-
808
- finnish_models = {
809
- "csukuangfj/vits-coqui-fi-css10": _get_vits_piper,
810
- "csukuangfj/vits-piper-fi_FI-harri-low": _get_vits_piper,
811
- "csukuangfj/vits-piper-fi_FI-harri-medium": _get_vits_piper,
812
- "csukuangfj/vits-mimic3-fi_FI-harri-tapani-ylilammi_low": _get_vits_piper,
813
- }
814
-
815
- hungarian_models = {
816
- # "csukuangfj/vits-coqui-hu-css10": _get_vits_piper,
817
- "csukuangfj/vits-piper-hu_HU-anna-medium": _get_vits_piper,
818
- "csukuangfj/vits-piper-hu_HU-berta-medium": _get_vits_piper,
819
- "csukuangfj/vits-piper-hu_HU-imre-medium": _get_vits_piper,
820
- "csukuangfj/vits-mimic3-hu_HU-diana-majlinger_low": _get_vits_piper,
821
- }
822
-
823
- icelandic_models = {
824
- "csukuangfj/vits-piper-is_IS-bui-medium": _get_vits_piper,
825
- "csukuangfj/vits-piper-is_IS-salka-medium": _get_vits_piper,
826
- "csukuangfj/vits-piper-is_IS-steinn-medium": _get_vits_piper,
827
- "csukuangfj/vits-piper-is_IS-ugla-medium": _get_vits_piper,
828
- }
829
-
830
- italian_models = {
831
- "csukuangfj/vits-piper-it_IT-riccardo-x_low": _get_vits_piper,
832
- "csukuangfj/vits-piper-it_IT-paola-medium": _get_vits_piper,
833
- }
834
-
835
- georgian_models = {
836
- "csukuangfj/vits-piper-ka_GE-natia-medium": _get_vits_piper,
837
- }
838
-
839
- kazakh_models = {
840
- "csukuangfj/vits-piper-kk_KZ-iseke-x_low": _get_vits_piper,
841
- "csukuangfj/vits-piper-kk_KZ-issai-high": _get_vits_piper,
842
- "csukuangfj/vits-piper-kk_KZ-raya-x_low": _get_vits_piper,
843
- }
844
-
845
- luxembourgish_models = {
846
- "csukuangfj/vits-piper-lb_LU-marylux-medium": _get_vits_piper,
847
- }
848
-
849
- nepali_models = {
850
- "csukuangfj/vits-piper-ne_NP-google-medium": _get_vits_piper,
851
- "csukuangfj/vits-piper-ne_NP-google-x_low": _get_vits_piper,
852
- "csukuangfj/vits-mimic3-ne_NP-ne-google_low": _get_vits_piper,
853
- }
854
-
855
- dutch_models = {
856
- "csukuangfj/vits-coqui-nl-css10": _get_vits_piper,
857
- "csukuangfj/vits-piper-nl_BE-nathalie-medium": _get_vits_piper,
858
- "csukuangfj/vits-piper-nl_BE-nathalie-x_low": _get_vits_piper,
859
- "csukuangfj/vits-piper-nl_BE-rdh-medium": _get_vits_piper,
860
- "csukuangfj/vits-piper-nl_BE-rdh-x_low": _get_vits_piper,
861
- # "csukuangfj/vits-piper-nl_NL-mls-medium": _get_vits_piper,
862
- # "csukuangfj/vits-piper-nl_NL-mls_5809-low": _get_vits_piper,
863
- # "csukuangfj/vits-piper-nl_NL-mls_7432-low": _get_vits_piper,
864
- }
865
-
866
- norwegian_models = {
867
- "csukuangfj/vits-piper-no_NO-talesyntese-medium": _get_vits_piper,
868
- }
869
-
870
- polish_models = {
871
- "csukuangfj/vits-coqui-pl-mai_female": _get_vits_piper,
872
- "csukuangfj/vits-piper-pl_PL-darkman-medium": _get_vits_piper,
873
- "csukuangfj/vits-piper-pl_PL-gosia-medium": _get_vits_piper,
874
- "csukuangfj/vits-piper-pl_PL-mc_speech-medium": _get_vits_piper,
875
- # "csukuangfj/vits-piper-pl_PL-mls_6892-low": _get_vits_piper,
876
- "csukuangfj/vits-mimic3-pl_PL-m-ailabs_low": _get_vits_piper,
877
- }
878
-
879
- portuguese_models = {
880
- "csukuangfj/vits-coqui-pt-cv": _get_vits_piper,
881
- "csukuangfj/vits-piper-pt_BR-edresson-low": _get_vits_piper,
882
- "csukuangfj/vits-piper-pt_BR-faber-medium": _get_vits_piper,
883
- "csukuangfj/vits-piper-pt_PT-tugao-medium": _get_vits_piper,
884
- }
885
-
886
- romanian_models = {
887
- "csukuangfj/vits-coqui-ro-cv": _get_vits_piper,
888
- "csukuangfj/vits-piper-ro_RO-mihai-medium": _get_vits_piper,
889
- }
890
-
891
-
892
- slovak_models = {
893
- "csukuangfj/vits-coqui-sk-cv": _get_vits_piper,
894
- "csukuangfj/vits-piper-sk_SK-lili-medium": _get_vits_piper,
895
- }
896
-
897
- serbian_models = {
898
- "csukuangfj/vits-piper-sr_RS-serbski_institut-medium": _get_vits_piper,
899
- }
900
-
901
- swedish_models = {
902
- "csukuangfj/vits-coqui-sv-cv": _get_vits_piper,
903
- "csukuangfj/vits-piper-sv_SE-nst-medium": _get_vits_piper,
904
- }
905
-
906
- swahili_models = {
907
- "csukuangfj/vits-piper-sw_CD-lanfrica-medium": _get_vits_piper,
908
- }
909
-
910
- turkish_models = {
911
- "csukuangfj/vits-piper-tr_TR-dfki-medium": _get_vits_piper,
912
- "csukuangfj/vits-piper-tr_TR-fahrettin-medium": _get_vits_piper,
913
- "csukuangfj/vits-piper-tr_TR-fettah-medium|1 speaker": _get_vits_piper,
914
- }
915
-
916
- vietnamese_models = {
917
- "csukuangfj/vits-piper-vi_VN-25hours_single-low": _get_vits_piper,
918
- "csukuangfj/vits-piper-vi_VN-vais1000-medium": _get_vits_piper,
919
- "csukuangfj/vits-piper-vi_VN-vivos-x_low": _get_vits_piper,
920
- "csukuangfj/vits-mimic3-vi_VN-vais1000_low": _get_vits_piper,
921
- }
922
-
923
- bulgarian_models = {
924
- "csukuangfj/vits-coqui-bg-cv": _get_vits_piper,
925
- }
926
-
927
- estonian_models = {
928
- "csukuangfj/vits-coqui-et-cv": _get_vits_piper,
929
- }
930
-
931
- irish_models = {
932
- "csukuangfj/vits-coqui-ga-cv": _get_vits_piper,
933
- }
934
-
935
- croatian_models = {
936
- "csukuangfj/vits-coqui-hr-cv": _get_vits_piper,
937
- }
938
-
939
- lithuanian_models = {
940
- "csukuangfj/vits-coqui-lt-cv": _get_vits_piper,
941
- }
942
-
943
- latvian_models = {
944
- "csukuangfj/vits-piper-lv_LV-aivars-medium": _get_vits_piper,
945
- "csukuangfj/vits-coqui-lv-cv": _get_vits_piper,
946
- }
947
-
948
- maltese_models = {
949
- "csukuangfj/vits-coqui-mt-cv": _get_vits_piper,
950
- }
951
-
952
- slovenian_models = {
953
- "csukuangfj/vits-piper-sl_SI-artur-medium": _get_vits_piper,
954
- "csukuangfj/vits-coqui-sl-cv": _get_vits_piper,
955
- }
956
-
957
- # Bangla
958
- bengali_models = {
959
- "csukuangfj/vits-coqui-bn-custom_female": _get_vits_piper,
960
- "csukuangfj/vits-mimic3-bn-multi_low": _get_vits_piper,
961
- }
962
-
963
- min_nan_models = {
964
- "csukuangfj/vits-mms-nan": _get_vits_mms,
965
- }
966
-
967
- thai_models = {
968
- "csukuangfj/vits-mms-tha": _get_vits_mms,
969
- }
970
-
971
- persian_models = {
972
- "csukuangfj/vits-piper-fa_IR-amir-medium": _get_vits_piper,
973
- "csukuangfj/vits-piper-fa_IR-gyro-medium": _get_vits_piper,
974
- "csukuangfj/vits-mimic3-fa-haaniye_low": _get_vits_piper,
975
- }
976
-
977
- korean_models = {
978
- "csukuangfj/vits-mimic3-ko_KO-kss_low": _get_vits_piper,
979
- }
980
-
981
-
982
- afrikaans_models = {
983
- "csukuangfj/vits-mimic3-af_ZA-google-nwu_low": _get_vits_piper,
984
- }
985
-
986
- gujarati_models = {
987
- "csukuangfj/vits-mimic3-gu_IN-cmu-indic_low": _get_vits_piper,
988
- }
989
-
990
- tswana_models = {
991
- "csukuangfj/vits-mimic3-tn_ZA-google-nwu_low": _get_vits_piper,
992
- }
993
-
994
- welsh_models = {
995
- "csukuangfj/vits-piper-cy_GB-gwryw_gogleddol-medium|1 speaker": _get_vits_piper,
996
- }
997
-
998
- language_to_models = {
999
- "English": list(english_models.keys()),
1000
- "Chinese (Mandarin, 普通话)": list(chinese_models.keys()),
1001
- "Chinese+English": list(chinese_english_models.keys()),
1002
- "Persian+English": list(persian_english_models.keys()),
1003
- "Cantonese (粤语)": list(cantonese_models.keys()),
1004
- "Min-nan (闽南话)": list(min_nan_models.keys()),
1005
- "Arabic": list(arabic_models.keys()),
1006
- "Afrikaans": list(afrikaans_models.keys()),
1007
- "Bengali": list(bengali_models.keys()),
1008
- "Bulgarian": list(bulgarian_models.keys()),
1009
- "Catalan": list(catalan_models.keys()),
1010
- "Croatian": list(croatian_models.keys()),
1011
- "Czech": list(czech_models.keys()),
1012
- "Danish": list(danish_models.keys()),
1013
- "Dutch": list(dutch_models.keys()),
1014
- "Estonian": list(estonian_models.keys()),
1015
- "Finnish": list(finnish_models.keys()),
1016
- "French": list(french_models.keys()),
1017
- "Georgian": list(georgian_models.keys()),
1018
- "German": list(german_models.keys()),
1019
- "Greek": list(greek_models.keys()),
1020
- "Gujarati": list(gujarati_models.keys()),
1021
- "Hungarian": list(hungarian_models.keys()),
1022
- "Icelandic": list(icelandic_models.keys()),
1023
- "Irish": list(irish_models.keys()),
1024
- "Italian": list(italian_models.keys()),
1025
- "Kazakh": list(kazakh_models.keys()),
1026
- "Korean": list(korean_models.keys()),
1027
- "Latvian": list(latvian_models.keys()),
1028
- "Lithuanian": list(lithuanian_models.keys()),
1029
- "Luxembourgish": list(luxembourgish_models.keys()),
1030
- "Maltese": list(maltese_models.keys()),
1031
- "Nepali": list(nepali_models.keys()),
1032
- "Norwegian": list(norwegian_models.keys()),
1033
- "Persian": list(persian_models.keys()),
1034
- "Polish": list(polish_models.keys()),
1035
- "Portuguese": list(portuguese_models.keys()),
1036
- "Romanian": list(romanian_models.keys()),
1037
- "Russian": list(russian_models.keys()),
1038
- "Serbian": list(serbian_models.keys()),
1039
- "Slovak": list(slovak_models.keys()),
1040
- "Slovenian": list(slovenian_models.keys()),
1041
- "Spanish": list(spanish_models.keys()),
1042
- "Swahili": list(swahili_models.keys()),
1043
- "Swedish": list(swedish_models.keys()),
1044
- "Thai": list(thai_models.keys()),
1045
- "Tswana": list(tswana_models.keys()),
1046
- "Turkish": list(turkish_models.keys()),
1047
- "Ukrainian": list(ukrainian_models.keys()),
1048
- "Vietnamese": list(vietnamese_models.keys()),
1049
- "Welsh": list(welsh_models.keys()),
1050
- }