csukuangfj commited on
Commit
111addf
·
1 Parent(s): 6142e2c

update matcha tts zh en model

Browse files
Files changed (2) hide show
  1. model.py +72 -0
  2. requirements.txt +2 -2
model.py CHANGED
@@ -427,6 +427,77 @@ def _get_matcha_hf_espeak(repo_id: str, speed: float) -> sherpa_onnx.OfflineTts:
427
  return tts
428
 
429
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
430
  @lru_cache(maxsize=10)
431
  def _get_matcha_hf(repo_id: str, speed: float) -> sherpa_onnx.OfflineTts:
432
  repo_id = repo_id.split("|")[0]
@@ -712,6 +783,7 @@ cantonese_models = {
712
  }
713
 
714
  chinese_english_models = {
 
715
  "csukuangfj/kokoro-multi-lang-v1_1|103 speakers": _get_kokoro,
716
  "csukuangfj/kokoro-multi-lang-v1_0|53 speakers": _get_kokoro,
717
  "csukuangfj/vits-melo-tts-zh_en|1": _get_vits_hf, # 1
 
427
  return tts
428
 
429
 
430
+ @lru_cache(maxsize=10)
431
+ def _get_matcha_zh_en(repo_id: str, speed: float) -> sherpa_onnx.OfflineTts:
432
+ repo_id = repo_id.split("|")[0]
433
+ assert repo_id in ("csukuangfj/matcha-icefall-zh-en",), repo_id
434
+
435
+ if repo_id == "csukuangfj/matcha-icefall-zh-en":
436
+ acoustic_model = "model-steps-3.onnx"
437
+
438
+ acoustic_model = get_file(
439
+ repo_id=repo_id,
440
+ filename=acoustic_model,
441
+ subfolder=".",
442
+ )
443
+
444
+ vocoder = get_file(
445
+ repo_id="csukuangfj/sherpa-onnx-vocoders",
446
+ filename="vocos-16khz-univ.onnx",
447
+ subfolder=".",
448
+ )
449
+
450
+ lexicon = get_file(
451
+ repo_id=repo_id,
452
+ filename="lexicon.txt",
453
+ subfolder=".",
454
+ )
455
+
456
+ tokens = get_file(
457
+ repo_id=repo_id,
458
+ filename="tokens.txt",
459
+ subfolder=".",
460
+ )
461
+
462
+ rule_fars = ""
463
+
464
+ rule_fsts = ["phone-zh.fst", "date-zh.fst", "number-zh.fst"]
465
+
466
+ rule_fsts = [
467
+ get_file(
468
+ repo_id=repo_id,
469
+ filename=f,
470
+ subfolder=".",
471
+ )
472
+ for f in rule_fsts
473
+ ]
474
+ rule_fsts = ",".join(rule_fsts)
475
+
476
+ data_dir = "/tmp/espeak-ng-data"
477
+ tts_config = sherpa_onnx.OfflineTtsConfig(
478
+ model=sherpa_onnx.OfflineTtsModelConfig(
479
+ vits=sherpa_onnx.OfflineTtsVitsModelConfig(),
480
+ matcha=sherpa_onnx.OfflineTtsMatchaModelConfig(
481
+ acoustic_model=acoustic_model,
482
+ vocoder=vocoder,
483
+ lexicon=lexicon,
484
+ tokens=tokens,
485
+ data_dir=data_dir,
486
+ length_scale=1.0 / speed,
487
+ ),
488
+ provider="cpu",
489
+ debug=True,
490
+ num_threads=2,
491
+ ),
492
+ rule_fsts=rule_fsts,
493
+ rule_fars=rule_fars,
494
+ max_num_sentences=1,
495
+ )
496
+ tts = sherpa_onnx.OfflineTts(tts_config)
497
+
498
+ return tts
499
+
500
+
501
  @lru_cache(maxsize=10)
502
  def _get_matcha_hf(repo_id: str, speed: float) -> sherpa_onnx.OfflineTts:
503
  repo_id = repo_id.split("|")[0]
 
783
  }
784
 
785
  chinese_english_models = {
786
+ "csukuangfj/csukuangfj/matcha-icefall-zh-en|1": _get_matcha_zh_en,
787
  "csukuangfj/kokoro-multi-lang-v1_1|103 speakers": _get_kokoro,
788
  "csukuangfj/kokoro-multi-lang-v1_0|53 speakers": _get_kokoro,
789
  "csukuangfj/vits-melo-tts-zh_en|1": _get_vits_hf, # 1
requirements.txt CHANGED
@@ -1,4 +1,4 @@
1
- # https://huggingface.co/csukuangfj/sherpa-onnx-wheels/resolve/main/cpu/1.12.11/sherpa_onnx-1.12.11-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl
2
- sherpa-onnx>=1.12.15
3
 
4
  soundfile
 
1
+ https://huggingface.co/csukuangfj/sherpa-onnx-wheels/resolve/main/cpu/1.12.18/sherpa_onnx-1.12.18-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl
2
+ #sherpa-onnx>=1.12.15
3
 
4
  soundfile