| import os |
| import json |
| from functools import lru_cache |
| from pathlib import Path |
|
|
| os.environ.setdefault("HF_HUB_DISABLE_PROGRESS_BARS", "1") |
| os.environ.setdefault("HF_HUB_DISABLE_TELEMETRY", "1") |
|
|
| import sherpa_onnx |
| from huggingface_hub import hf_hub_download, snapshot_download |
|
|
|
|
| def get_file(repo_id: str, filename: str, subfolder: str = ".") -> str: |
| return hf_hub_download(repo_id=repo_id, filename=filename, subfolder=subfolder) |
|
|
|
|
| @lru_cache(maxsize=10) |
| def _get_kokoro(repo_id: str, speed: float) -> sherpa_onnx.OfflineTts: |
| data_dir = "/tmp/espeak-ng-data" |
| clean_id = repo_id.split("|")[0] |
| is_int8 = "int8" in clean_id |
| model_name = "model.int8.onnx" if is_int8 else "model.onnx" |
| model = get_file(repo_id=clean_id, filename=model_name, subfolder=".") |
| tokens = get_file(repo_id=clean_id, filename="tokens.txt", subfolder=".") |
| voices = get_file(repo_id=clean_id, filename="voices.bin", subfolder=".") |
| if "multi-lang" in clean_id: |
| lexicon_en = get_file(repo_id=clean_id, filename="lexicon-us-en.txt", subfolder=".") |
| lexicon_zh = get_file(repo_id=clean_id, filename="lexicon-zh.txt", subfolder=".") |
| lexicon = f"{lexicon_en},{lexicon_zh}" |
| date_zh = get_file(repo_id=clean_id, filename="date-zh.fst", subfolder=".") |
| number_zh = get_file(repo_id=clean_id, filename="number-zh.fst", subfolder=".") |
| phone_zh = get_file(repo_id=clean_id, filename="phone-zh.fst", subfolder=".") |
| rule_fsts = f"{date_zh},{phone_zh},{number_zh}" |
| dict_dir = "/tmp/dict" |
| else: |
| lexicon = "" |
| rule_fsts = "" |
| dict_dir = "" |
| tts_config = sherpa_onnx.OfflineTtsConfig( |
| model=sherpa_onnx.OfflineTtsModelConfig( |
| kokoro=sherpa_onnx.OfflineTtsKokoroModelConfig( |
| model=model, |
| voices=voices, |
| tokens=tokens, |
| data_dir=data_dir, |
| length_scale=1.0 / speed, |
| lexicon=lexicon, |
| dict_dir=dict_dir, |
| ), |
| provider="cpu", |
| debug=False, |
| num_threads=2, |
| ), |
| max_num_sentences=1, |
| rule_fsts=rule_fsts, |
| ) |
| return sherpa_onnx.OfflineTts(tts_config) |
|
|
|
|
| @lru_cache(maxsize=10) |
| def _get_supertonic(repo_id: str, speed: float) -> sherpa_onnx.OfflineTts: |
| clean_id = repo_id.split("|")[0] |
| dp = get_file(repo_id=clean_id, filename="duration_predictor.int8.onnx", subfolder=".") |
| te = get_file(repo_id=clean_id, filename="text_encoder.int8.onnx", subfolder=".") |
| ve = get_file(repo_id=clean_id, filename="vector_estimator.int8.onnx", subfolder=".") |
| vo = get_file(repo_id=clean_id, filename="vocoder.int8.onnx", subfolder=".") |
| tts_json = get_file(repo_id=clean_id, filename="tts.json", subfolder=".") |
| ui = get_file(repo_id=clean_id, filename="unicode_indexer.bin", subfolder=".") |
| vs = get_file(repo_id=clean_id, filename="voice.bin", subfolder=".") |
| tts_config = sherpa_onnx.OfflineTtsConfig( |
| model=sherpa_onnx.OfflineTtsModelConfig( |
| supertonic=sherpa_onnx.OfflineTtsSupertonicModelConfig( |
| duration_predictor=dp, |
| text_encoder=te, |
| vector_estimator=ve, |
| vocoder=vo, |
| tts_json=tts_json, |
| unicode_indexer=ui, |
| voice_style=vs, |
| ), |
| provider="cpu", |
| debug=False, |
| num_threads=2, |
| ), |
| max_num_sentences=1, |
| ) |
| return sherpa_onnx.OfflineTts(tts_config) |
|
|
|
|
| @lru_cache(maxsize=10) |
| def _get_vits_piper(repo_id: str, speed: float) -> sherpa_onnx.OfflineTts: |
| data_dir = "/tmp/espeak-ng-data" |
| lexicon = "" |
| rule_fsts = "" |
| clean_id = repo_id.split("|")[0] |
| if "piper" in clean_id: |
| n = len("vits-piper-") |
| name = clean_id.split("/")[1][n:] |
| elif "mimic3" in clean_id: |
| n = len("vits-mimic3-") |
| name = clean_id.split("/")[1][n:] |
| else: |
| name = "model" |
| local_dir = snapshot_download(clean_id) |
| model = f"{local_dir}/{name}.onnx" |
| tokens = f"{local_dir}/tokens.txt" |
| if "vits-piper-zh_CN-chaowen-medium" in clean_id or "vits-piper-zh_CN-xiao_ya-medium" in clean_id: |
| data_dir = "" |
| lexicon = f"{local_dir}/lexicon.txt" |
| rule_fsts = ["phone.fst", "date.fst", "number.fst"] |
| rule_fsts = ",".join(f"{local_dir}/{r}" for r in rule_fsts) |
| tts_config = sherpa_onnx.OfflineTtsConfig( |
| model=sherpa_onnx.OfflineTtsModelConfig( |
| vits=sherpa_onnx.OfflineTtsVitsModelConfig( |
| model=model, |
| lexicon=lexicon, |
| data_dir=data_dir, |
| tokens=tokens, |
| length_scale=1.0 / speed, |
| ), |
| matcha=sherpa_onnx.OfflineTtsMatchaModelConfig(), |
| provider="cpu", |
| debug=False, |
| num_threads=2, |
| ), |
| max_num_sentences=1, |
| rule_fsts=rule_fsts, |
| ) |
| return sherpa_onnx.OfflineTts(tts_config) |
|
|
|
|
| @lru_cache(maxsize=10) |
| def _get_vits_zh_aishell3(repo_id: str, speed: float) -> sherpa_onnx.OfflineTts: |
| clean_id = repo_id.split("|")[0] |
| model = get_file(repo_id=clean_id, filename="vits-aishell3.onnx", subfolder=".") |
| lexicon = get_file(repo_id=clean_id, filename="lexicon.txt", subfolder=".") |
| tokens = get_file(repo_id=clean_id, filename="tokens.txt", subfolder=".") |
| rule_fsts = ["phone.fst", "date.fst", "number.fst", "new_heteronym.fst"] |
| rule_fsts = ",".join(get_file(repo_id=clean_id, filename=f, subfolder=".") for f in rule_fsts) |
| rule_fars = get_file(repo_id=clean_id, filename="rule.far", subfolder=".") |
| tts_config = sherpa_onnx.OfflineTtsConfig( |
| model=sherpa_onnx.OfflineTtsModelConfig( |
| vits=sherpa_onnx.OfflineTtsVitsModelConfig( |
| model=model, lexicon=lexicon, tokens=tokens, length_scale=1.0 / speed, |
| ), |
| matcha=sherpa_onnx.OfflineTtsMatchaModelConfig(), |
| provider="cpu", debug=False, num_threads=2, |
| ), |
| rule_fsts=rule_fsts, rule_fars=rule_fars, max_num_sentences=1, |
| ) |
| return sherpa_onnx.OfflineTts(tts_config) |
|
|
|
|
| @lru_cache(maxsize=10) |
| def _get_matcha_zh_en(repo_id: str, speed: float) -> sherpa_onnx.OfflineTts: |
| clean_id = repo_id.split("|")[0] |
| acoustic_model = get_file(repo_id=clean_id, filename="model-steps-3.onnx", subfolder=".") |
| vocoder = get_file(repo_id="csukuangfj/sherpa-onnx-vocoders", filename="vocos-16khz-univ.onnx", subfolder=".") |
| lexicon = get_file(repo_id=clean_id, filename="lexicon.txt", subfolder=".") |
| tokens = get_file(repo_id=clean_id, filename="tokens.txt", subfolder=".") |
| rule_fsts = ["phone-zh.fst", "date-zh.fst", "number-zh.fst"] |
| rule_fsts = ",".join(get_file(repo_id=clean_id, filename=f, subfolder=".") for f in rule_fsts) |
| tts_config = sherpa_onnx.OfflineTtsConfig( |
| model=sherpa_onnx.OfflineTtsModelConfig( |
| vits=sherpa_onnx.OfflineTtsVitsModelConfig(), |
| matcha=sherpa_onnx.OfflineTtsMatchaModelConfig( |
| acoustic_model=acoustic_model, vocoder=vocoder, |
| lexicon=lexicon, tokens=tokens, data_dir="/tmp/espeak-ng-data", |
| length_scale=1.0 / speed, |
| ), |
| provider="cpu", debug=False, num_threads=2, |
| ), |
| rule_fsts=rule_fsts, rule_fars="", max_num_sentences=1, |
| ) |
| return sherpa_onnx.OfflineTts(tts_config) |
|
|
|
|
| @lru_cache(maxsize=10) |
| def _get_matcha_hf(repo_id: str, speed: float) -> sherpa_onnx.OfflineTts: |
| clean_id = repo_id.split("|")[0] |
| if not Path("/tmp/dict").is_dir(): |
| os.system("cd /tmp; curl -SL -O https://github.com/csukuangfj/cppjieba/releases/download/sherpa-onnx-2024-04-19/dict.tar.bz2; tar xf dict.tar.bz2") |
| acoustic_model = get_file(repo_id=clean_id, filename="model-steps-3.onnx", subfolder=".") |
| vocoder = get_file(repo_id="csukuangfj/sherpa-onnx-hifigan", filename="hifigan_v2.onnx", subfolder=".") |
| lexicon = get_file(repo_id=clean_id, filename="lexicon.txt", subfolder=".") |
| tokens = get_file(repo_id=clean_id, filename="tokens.txt", subfolder=".") |
| rule_fsts = ["phone.fst", "date.fst", "number.fst"] |
| rule_fsts = ",".join(get_file(repo_id=clean_id, filename=f, subfolder=".") for f in rule_fsts) |
| tts_config = sherpa_onnx.OfflineTtsConfig( |
| model=sherpa_onnx.OfflineTtsModelConfig( |
| vits=sherpa_onnx.OfflineTtsVitsModelConfig(), |
| matcha=sherpa_onnx.OfflineTtsMatchaModelConfig( |
| acoustic_model=acoustic_model, vocoder=vocoder, |
| lexicon=lexicon, tokens=tokens, dict_dir="/tmp/dict", |
| length_scale=1.0 / speed, |
| ), |
| provider="cpu", debug=False, num_threads=2, |
| ), |
| rule_fsts=rule_fsts, rule_fars="", max_num_sentences=1, |
| ) |
| return sherpa_onnx.OfflineTts(tts_config) |
|
|
|
|
| @lru_cache(maxsize=10) |
| def _get_vits_hf(repo_id: str, speed: float) -> sherpa_onnx.OfflineTts: |
| clean_id = repo_id.split("|")[0] |
| if "fanchen" in clean_id or "vits-cantonese-hf-xiaomaiiwn" in clean_id: |
| model = clean_id.split("/")[-1] |
| elif "vits-melo-tts" in clean_id: |
| model = "model" |
| else: |
| model = "model" |
| if "vits-zh-ll" in clean_id or "vits-melo-tts" in clean_id: |
| if not Path("/tmp/dict").is_dir(): |
| os.system("cd /tmp; curl -SL -O https://github.com/csukuangfj/cppjieba/releases/download/sherpa-onnx-2024-04-19/dict.tar.bz2; tar xf dict.tar.bz2") |
| model_file = get_file(repo_id=clean_id, filename=f"{model}.onnx", subfolder=".") |
| lexicon = get_file(repo_id=clean_id, filename="lexicon.txt", subfolder=".") |
| tokens = get_file(repo_id=clean_id, filename="tokens.txt", subfolder=".") |
| if "vits-cantonese-hf-xiaomaiiwn" not in clean_id: |
| rule_fsts = ["phone.fst", "date.fst", "number.fst"] |
| rule_fsts = ",".join(get_file(repo_id=clean_id, filename=f, subfolder=".") for f in rule_fsts) |
| vits_dict_dir = "/tmp/dict" |
| else: |
| rule_fsts = get_file(repo_id=clean_id, filename="rule.fst", subfolder=".") |
| vits_dict_dir = "" |
| tts_config = sherpa_onnx.OfflineTtsConfig( |
| model=sherpa_onnx.OfflineTtsModelConfig( |
| vits=sherpa_onnx.OfflineTtsVitsModelConfig( |
| model=model_file, lexicon=lexicon, tokens=tokens, |
| dict_dir=vits_dict_dir, length_scale=1.0 / speed, |
| ), |
| matcha=sherpa_onnx.OfflineTtsMatchaModelConfig(), |
| provider="cpu", debug=False, num_threads=2, |
| ), |
| rule_fsts=rule_fsts, rule_fars="", max_num_sentences=1, |
| ) |
| return sherpa_onnx.OfflineTts(tts_config) |
|
|
|
|
| @lru_cache(maxsize=10) |
| def _get_melotts_onnx(repo_id: str, speed: float) -> sherpa_onnx.OfflineTts: |
| clean_id = repo_id.split("|")[0] |
| parts = clean_id.split("/") |
| lang_subdir = parts[2] if len(parts) > 2 else parts[1] |
| model = hf_hub_download(repo_id="MiaoMint/MeloTTS-ONNX", filename=f"onnx_exports/{lang_subdir}/model.onnx") |
| tokens = hf_hub_download(repo_id="MiaoMint/MeloTTS-ONNX", filename=f"onnx_exports/{lang_subdir}/tokens.txt") |
| lexicon_path = "" |
| try: |
| lexicon_path = hf_hub_download(repo_id="MiaoMint/MeloTTS-ONNX", filename=f"onnx_exports/{lang_subdir}/lexicon.txt") |
| except Exception: |
| pass |
| data_dir = "/tmp/espeak-ng-data" |
| dict_dir = "/tmp/dict" if lang_subdir == "zh" else "" |
| if dict_dir and not Path(dict_dir).is_dir(): |
| os.system("cd /tmp; curl -SL -O https://github.com/csukuangfj/cppjieba/releases/download/sherpa-onnx-2024-04-19/dict.tar.bz2; tar xf dict.tar.bz2") |
| tts_config = sherpa_onnx.OfflineTtsConfig( |
| model=sherpa_onnx.OfflineTtsModelConfig( |
| vits=sherpa_onnx.OfflineTtsVitsModelConfig( |
| model=model, lexicon=lexicon_path, data_dir=data_dir, |
| tokens=tokens, dict_dir=dict_dir, length_scale=1.0 / speed, |
| ), |
| matcha=sherpa_onnx.OfflineTtsMatchaModelConfig(), |
| provider="cpu", debug=False, num_threads=2, |
| ), |
| max_num_sentences=1, |
| ) |
| return sherpa_onnx.OfflineTts(tts_config) |
|
|
|
|
| chinese_models = { |
| "csukuangfj2/vits-piper-zh_CN-chaowen-medium|1 speaker": _get_vits_piper, |
| "csukuangfj2/vits-piper-zh_CN-xiao_ya-medium|1 speaker": _get_vits_piper, |
| "csukuangfj/matcha-icefall-zh-baker|1 speaker": _get_matcha_hf, |
| "csukuangfj/vits-zh-hf-fanchen-wnj|1 speaker": _get_vits_hf, |
| "csukuangfj/vits-zh-hf-fanchen-C|187 speakers": _get_vits_hf, |
| "csukuangfj/sherpa-onnx-vits-zh-ll|5 speakers": _get_vits_hf, |
| "csukuangfj/vits-zh-aishell3|174 speakers": _get_vits_zh_aishell3, |
| "csukuangfj/vits-piper-zh_CN-huayan-medium|1 speaker": _get_vits_piper, |
| "MiaoMint/MeloTTS-ONNX/zh|1 speaker": _get_melotts_onnx, |
| } |
|
|
| chinese_english_models = { |
| "csukuangfj/matcha-icefall-zh-en|1": _get_matcha_zh_en, |
| "csukuangfj/kokoro-multi-lang-v1_1|103 speakers": _get_kokoro, |
| "csukuangfj/kokoro-int8-multi-lang-v1_1|103 speakers": _get_kokoro, |
| "csukuangfj/kokoro-multi-lang-v1_0|53 speakers": _get_kokoro, |
| "csukuangfj/kokoro-int8-multi-lang-v1_0|53 speakers": _get_kokoro, |
| "csukuangfj2/sherpa-onnx-supertonic-3-tts-int8-2026-05-11|10 speakers": _get_supertonic, |
| "csukuangfj/vits-melo-tts-zh_en|1": _get_vits_hf, |
| "MiaoMint/MeloTTS-ONNX/zh|1 speaker": _get_melotts_onnx, |
| } |
|
|
| cantonese_models = { |
| "csukuangfj/vits-cantonese-hf-xiaomaiiwn|1 speaker": _get_vits_hf, |
| } |
|
|
| english_models = { |
| "csukuangfj/kokoro-en-v0_19|11 speakers": _get_kokoro, |
| "csukuangfj2/sherpa-onnx-supertonic-3-tts-int8-2026-05-11|10 speakers": _get_supertonic, |
| "MiaoMint/MeloTTS-ONNX/en_newest|1 speaker": _get_melotts_onnx, |
| "csukuangfj/vits-piper-en_US-lessac-high|1 speaker": _get_vits_piper, |
| "csukuangfj/vits-piper-en_US-ryan-high|1 speaker": _get_vits_piper, |
| "csukuangfj/vits-piper-en_GB-alan-medium|1 speaker": _get_vits_piper, |
| "csukuangfj/vits-piper-en_GB-jenny_dioco-medium|1 speaker": _get_vits_piper, |
| "csukuangfj/vits-piper-en_GB-vctk-medium|109 speakers": _get_vits_piper, |
| } |
|
|
| language_to_models = { |
| "中文(普通话)": list(chinese_models.keys()), |
| "中英双语": list(chinese_english_models.keys()), |
| "粤语": list(cantonese_models.keys()), |
| "英语": list(english_models.keys()), |
| } |
|
|
| all_model_dicts = { |
| **chinese_models, |
| **chinese_english_models, |
| **cantonese_models, |
| **english_models, |
| } |
|
|
|
|
| @lru_cache(maxsize=32) |
| def get_speaker_map(repo_id: str) -> dict: |
| clean_id = repo_id.split("|")[0] |
| if "piper" in clean_id: |
| try: |
| local_dir = snapshot_download(clean_id) |
| for fn in os.listdir(local_dir): |
| if fn.endswith(".onnx.json"): |
| with open(os.path.join(local_dir, fn)) as f: |
| data = json.load(f) |
| sid_map = data.get("speaker_id_map", {}) |
| return {v: k for k, v in sid_map.items()} |
| except Exception: |
| pass |
| if "vits-zh-hf-fanchen-C" in clean_id: |
| try: |
| f = hf_hub_download(repo_id=clean_id, filename="G_C.json") |
| with open(f) as fp: |
| data = json.load(fp) |
| speakers = data.get("speakers", []) |
| return {i: s for i, s in enumerate(speakers)} |
| except Exception: |
| pass |
| if "vits-zh-aishell3" in clean_id: |
| return {} |
| if "kokoro" in clean_id: |
| kokoro_en = [ |
| (0, "美式女声-af"), (1, "美式女声-bella"), |
| (2, "美式女声-nicole"), (3, "美式女声-sarah"), |
| (4, "美式女声-sky"), (5, "美式男声-adam"), |
| (6, "美式男声-michael"), (7, "英式女声-emma"), |
| (8, "英式女声-isabella"), (9, "英式男声-george"), |
| (10, "英式男声-lewis"), |
| ] |
| kokoro_multi_v10 = [ |
| (0, "美式女声-alloy"), (1, "美式女声-aoede"), |
| (2, "美式女声-bella"), (3, "美式女声-heart"), |
| (4, "美式女声-jessica"), (5, "美式女声-kore"), |
| (6, "美式女声-nicole"), (7, "美式女声-nova"), |
| (8, "美式女声-river"), (9, "美式女声-sarah"), |
| (10, "美式女声-sky"), (11, "美式男声-adam"), |
| (12, "美式男声-echo"), (13, "美式男声-eric"), |
| (14, "美式男声-fenrir"), (15, "美式男声-liam"), |
| (16, "美式男声-michael"), (17, "美式男声-onyx"), |
| (18, "美式男声-puck"), (19, "美式男声-santa"), |
| (20, "英式女声-alice"), (21, "英式女声-emma"), |
| (22, "英式女声-isabella"), (23, "英式女声-lily"), |
| (24, "英式男声-daniel"), (25, "英式男声-fable"), |
| (26, "英式男声-george"), (27, "英式男声-lewis"), |
| (28, "英语女声-dora"), (29, "英语男声-alex"), |
| (30, "法语女声-siwis"), (31, "印地语女声-alpha"), |
| (32, "印地语女声-beta"), (33, "印地语男声-omega"), |
| (34, "印地语男声-psi"), (35, "意语女声-sara"), |
| (36, "意语男声-nicola"), (37, "日语女声-alpha"), |
| (38, "日语女声-gongitsune"), (39, "日语女声-nezumi"), |
| (40, "日语女声-tebukuro"), (41, "日语男声-kumo"), |
| (42, "葡语女声-dora"), (43, "葡语男声-alex"), |
| (44, "葡语男声-santa"), (45, "中文女声-小北"), |
| (46, "中文女声-小妮"), (47, "中文女声-小小"), |
| (48, "中文女声-小艺"), (49, "中文男声-云剑"), |
| (50, "中文男声-云希"), (51, "中文男声-云夏"), |
| (52, "中文男声-云扬"), |
| ] |
| kokoro_multi_v11 = [ |
| (0, "美式女声-maple"), (1, "美式女声-sol"), |
| (2, "英式女声-vale"), |
| (3, "中文女声-001"), (4, "中文女声-002"), |
| (5, "中文女声-003"), (6, "中文女声-004"), |
| (7, "中文女声-005"), (8, "中文女声-006"), |
| (9, "中文女声-007"), (10, "中文女声-008"), |
| (11, "中文女声-017"), (12, "中文女声-018"), |
| (13, "中文女声-019"), (14, "中文女声-021"), |
| (15, "中文女声-022"), (16, "中文女声-023"), |
| (17, "中文女声-024"), (18, "中文女声-026"), |
| (19, "中文女声-027"), (20, "中文女声-028"), |
| (21, "中文女声-032"), (22, "中文女声-036"), |
| (23, "中文女声-038"), (24, "中文女声-039"), |
| (25, "中文女声-040"), (26, "中文女声-042"), |
| (27, "中文女声-043"), (28, "中文女声-044"), |
| (29, "中文女声-046"), (30, "中文女声-047"), |
| (31, "中文女声-048"), (32, "中文女声-049"), |
| (33, "中文女声-051"), (34, "中文女声-059"), |
| (35, "中文女声-060"), (36, "中文女声-067"), |
| (37, "中文女声-070"), (38, "中文女声-071"), |
| (39, "中文女声-072"), (40, "中文女声-073"), |
| (41, "中文女声-074"), (42, "中文女声-075"), |
| (43, "中文女声-076"), (44, "中文女声-077"), |
| (45, "中文女声-078"), (46, "中文女声-079"), |
| (47, "中文女声-083"), (48, "中文女声-084"), |
| (49, "中文女声-085"), (50, "中文女声-086"), |
| (51, "中文女声-087"), (52, "中文女声-088"), |
| (53, "中文女声-090"), (54, "中文女声-092"), |
| (55, "中文女声-093"), (56, "中文女声-094"), |
| (57, "中文女声-099"), |
| (58, "中文男声-009"), (59, "中文男声-010"), |
| (60, "中文男声-011"), (61, "中文男声-012"), |
| (62, "中文男声-013"), (63, "中文男声-014"), |
| (64, "中文男声-015"), (65, "中文男声-016"), |
| (66, "中文男声-020"), (67, "中文男声-025"), |
| (68, "中文男声-029"), (69, "中文男声-030"), |
| (70, "中文男声-031"), (71, "中文男声-033"), |
| (72, "中文男声-034"), (73, "中文男声-035"), |
| (74, "中文男声-037"), (75, "中文男声-041"), |
| (76, "中文男声-045"), (77, "中文男声-050"), |
| (78, "中文男声-052"), (79, "中文男声-053"), |
| (80, "中文男声-054"), (81, "中文男声-055"), |
| (82, "中文男声-056"), (83, "中文男声-057"), |
| (84, "中文男声-058"), (85, "中文男声-061"), |
| (86, "中文男声-062"), (87, "中文男声-063"), |
| (88, "中文男声-064"), (89, "中文男声-065"), |
| (90, "中文男声-066"), (91, "中文男声-068"), |
| (92, "中文男声-069"), (93, "中文男声-080"), |
| (94, "中文男声-081"), (95, "中文男声-082"), |
| (96, "中文男声-089"), (97, "中文男声-091"), |
| (98, "中文男声-095"), (99, "中文男声-096"), |
| (100, "中文男声-097"), (101, "中文男声-098"), |
| (102, "中文男声-100"), |
| ] |
| kokoro_maps = { |
| "csukuangfj/kokoro-en-v0_19": kokoro_en, |
| "csukuangfj/kokoro-multi-lang-v1_1": kokoro_multi_v11, |
| "csukuangfj/kokoro-multi-lang-v1_0": kokoro_multi_v10, |
| "csukuangfj/kokoro-int8-multi-lang-v1_1": kokoro_multi_v11, |
| "csukuangfj/kokoro-int8-multi-lang-v1_0": kokoro_multi_v10, |
| } |
| if clean_id in kokoro_maps: |
| return {sid: name for sid, name in kokoro_maps[clean_id]} |
| if "supertonic" in clean_id: |
| return { |
| 0: "男声-M1(活泼自信)", |
| 1: "男声-M2(深沉稳重)", |
| 2: "男声-M3(专业权威)", |
| 3: "男声-M4(柔和亲切)", |
| 4: "男声-M5(温暖舒缓)", |
| 5: "女声-F1(沉稳从容)", |
| 6: "女声-F2(明快活泼)", |
| 7: "女声-F3(清晰专业)", |
| 8: "女声-F4(干练自信)", |
| 9: "女声-F5(温柔平和)", |
| } |
| if "vits-zh-ll" in clean_id: |
| return {0: "女声0", 1: "女声1", 2: "男声0", 3: "男声1", 4: "男声2"} |
| return {} |
|
|
|
|
| @lru_cache(maxsize=10) |
| def get_pretrained_model(repo_id: str, speed: float) -> sherpa_onnx.OfflineTts: |
| if repo_id in all_model_dicts: |
| return all_model_dicts[repo_id](repo_id, speed) |
| raise ValueError(f"不支持的模型: {repo_id}") |