Spaces:
Build error
Build error
Commit
·
18b91f7
1
Parent(s):
c88be4e
add a new zh+en model
Browse files
model.py
CHANGED
|
@@ -760,12 +760,57 @@ def _get_wenetspeech_pre_trained_model(
|
|
| 760 |
return recognizer
|
| 761 |
|
| 762 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 763 |
@lru_cache(maxsize=10)
|
| 764 |
def _get_chinese_english_mixed_model(
|
| 765 |
repo_id: str,
|
| 766 |
decoding_method: str,
|
| 767 |
num_active_paths: int,
|
| 768 |
-
):
|
| 769 |
assert repo_id in [
|
| 770 |
"luomingshuang/icefall_asr_tal-csasr_pruned_transducer_stateless5",
|
| 771 |
"ptrnull/icefall-asr-conv-emformer-transducer-stateless2-zh",
|
|
@@ -1705,6 +1750,7 @@ english_models = {
|
|
| 1705 |
|
| 1706 |
chinese_english_mixed_models = {
|
| 1707 |
"csukuangfj/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20": _get_streaming_zipformer_pre_trained_model,
|
|
|
|
| 1708 |
"csukuangfj/sherpa-onnx-paraformer-zh-2023-03-28": _get_paraformer_pre_trained_model,
|
| 1709 |
"ptrnull/icefall-asr-conv-emformer-transducer-stateless2-zh": _get_chinese_english_mixed_model,
|
| 1710 |
"luomingshuang/icefall_asr_tal-csasr_pruned_transducer_stateless5": _get_chinese_english_mixed_model, # noqa
|
|
|
|
| 760 |
return recognizer
|
| 761 |
|
| 762 |
|
| 763 |
+
@lru_cache(maxsize=10)
|
| 764 |
+
def _get_chinese_english_mixed_model_onnx(
|
| 765 |
+
repo_id: str,
|
| 766 |
+
decoding_method: str,
|
| 767 |
+
num_active_paths: int,
|
| 768 |
+
) -> sherpa_onnx.OfflineRecognizer:
|
| 769 |
+
assert repo_id in [
|
| 770 |
+
"zrjin/icefall-asr-zipformer-multi-zh-en-2023-11-22",
|
| 771 |
+
], repo_id
|
| 772 |
+
|
| 773 |
+
encoder_model = _get_nn_model_filename(
|
| 774 |
+
repo_id=repo_id,
|
| 775 |
+
filename="encoder-epoch-34-avg-19.int8.onnx",
|
| 776 |
+
subfolder="exp",
|
| 777 |
+
)
|
| 778 |
+
|
| 779 |
+
decoder_model = _get_nn_model_filename(
|
| 780 |
+
repo_id=repo_id,
|
| 781 |
+
filename="encoder-epoch-34-avg-19.onnx",
|
| 782 |
+
subfolder="exp",
|
| 783 |
+
)
|
| 784 |
+
|
| 785 |
+
joiner_model = _get_nn_model_filename(
|
| 786 |
+
repo_id=repo_id,
|
| 787 |
+
filename="joiner-epoch-34-avg-19.int8.onnx",
|
| 788 |
+
subfolder="exp",
|
| 789 |
+
)
|
| 790 |
+
|
| 791 |
+
tokens = _get_token_filename(repo_id=repo_id, subfolder="data/lanb_bbpe_2000")
|
| 792 |
+
|
| 793 |
+
recognizer = sherpa_onnx.OfflineRecognizer.from_transducer(
|
| 794 |
+
tokens=tokens,
|
| 795 |
+
encoder=encoder_model,
|
| 796 |
+
decoder=decoder_model,
|
| 797 |
+
joiner=joiner_model,
|
| 798 |
+
num_threads=2,
|
| 799 |
+
sample_rate=16000,
|
| 800 |
+
feature_dim=80,
|
| 801 |
+
decoding_method=decoding_method,
|
| 802 |
+
max_active_paths=num_active_paths,
|
| 803 |
+
)
|
| 804 |
+
|
| 805 |
+
return recognizer
|
| 806 |
+
|
| 807 |
+
|
| 808 |
@lru_cache(maxsize=10)
|
| 809 |
def _get_chinese_english_mixed_model(
|
| 810 |
repo_id: str,
|
| 811 |
decoding_method: str,
|
| 812 |
num_active_paths: int,
|
| 813 |
+
) -> sherpa.OfflineRecognizer:
|
| 814 |
assert repo_id in [
|
| 815 |
"luomingshuang/icefall_asr_tal-csasr_pruned_transducer_stateless5",
|
| 816 |
"ptrnull/icefall-asr-conv-emformer-transducer-stateless2-zh",
|
|
|
|
| 1750 |
|
| 1751 |
chinese_english_mixed_models = {
|
| 1752 |
"csukuangfj/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20": _get_streaming_zipformer_pre_trained_model,
|
| 1753 |
+
"zrjin/icefall-asr-zipformer-multi-zh-en-2023-11-22": _get_chinese_english_mixed_model_onnx,
|
| 1754 |
"csukuangfj/sherpa-onnx-paraformer-zh-2023-03-28": _get_paraformer_pre_trained_model,
|
| 1755 |
"ptrnull/icefall-asr-conv-emformer-transducer-stateless2-zh": _get_chinese_english_mixed_model,
|
| 1756 |
"luomingshuang/icefall_asr_tal-csasr_pruned_transducer_stateless5": _get_chinese_english_mixed_model, # noqa
|