Commit
·
10b604a
1
Parent(s):
387e4e6
add more models
Browse files
model.py
CHANGED
|
@@ -1282,6 +1282,54 @@ def _get_french_pre_trained_model(
|
|
| 1282 |
return recognizer
|
| 1283 |
|
| 1284 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1285 |
@lru_cache(maxsize=10)
|
| 1286 |
def _get_sherpa_onnx_nemo_transducer_models_int8(
|
| 1287 |
repo_id: str,
|
|
@@ -2159,6 +2207,8 @@ russian_models = {
|
|
| 2159 |
"csukuangfj/sherpa-onnx-nemo-ctc-giga-am-russian-2024-10-24": _get_russian_pre_trained_model_ctc,
|
| 2160 |
"alphacep/vosk-model-ru": _get_russian_pre_trained_model,
|
| 2161 |
"alphacep/vosk-model-small-ru": _get_russian_pre_trained_model,
|
|
|
|
|
|
|
| 2162 |
}
|
| 2163 |
|
| 2164 |
chinese_cantonese_english_models = {
|
|
|
|
| 1282 |
return recognizer
|
| 1283 |
|
| 1284 |
|
| 1285 |
+
@lru_cache(maxsize=10)
|
| 1286 |
+
def _get_streaming_vosk_pre_trained_model(
|
| 1287 |
+
repo_id: str,
|
| 1288 |
+
decoding_method: str,
|
| 1289 |
+
num_active_paths: int,
|
| 1290 |
+
) -> sherpa_onnx.OnlineRecognizer:
|
| 1291 |
+
assert repo_id in [
|
| 1292 |
+
"csukuangfj/sherpa-onnx-streaming-zipformer-small-ru-vosk-int8-2025-08-16",
|
| 1293 |
+
"csukuangfj/sherpa-onnx-streaming-zipformer-small-ru-vosk-2025-08-16",
|
| 1294 |
+
], repo_id
|
| 1295 |
+
|
| 1296 |
+
is_int8 = "int8" in repo_id
|
| 1297 |
+
|
| 1298 |
+
encoder_model = _get_nn_model_filename(
|
| 1299 |
+
repo_id=repo_id,
|
| 1300 |
+
filename="encoder.int8.onnx" if is_int8 else "encoder.onnx",
|
| 1301 |
+
subfolder=".",
|
| 1302 |
+
)
|
| 1303 |
+
|
| 1304 |
+
decoder_model = _get_nn_model_filename(
|
| 1305 |
+
repo_id=repo_id,
|
| 1306 |
+
filename="decoder.onnx",
|
| 1307 |
+
subfolder=".",
|
| 1308 |
+
)
|
| 1309 |
+
|
| 1310 |
+
joiner_model = _get_nn_model_filename(
|
| 1311 |
+
repo_id=repo_id,
|
| 1312 |
+
filename="joiner.int8.onnx" if is_int8 else "joiner.onnx",
|
| 1313 |
+
subfolder=".",
|
| 1314 |
+
)
|
| 1315 |
+
|
| 1316 |
+
tokens = _get_token_filename(repo_id=repo_id, subfolder=".")
|
| 1317 |
+
|
| 1318 |
+
recognizer = sherpa_onnx.OnlineRecognizer.from_transducer(
|
| 1319 |
+
tokens=tokens,
|
| 1320 |
+
encoder=encoder_model,
|
| 1321 |
+
decoder=decoder_model,
|
| 1322 |
+
joiner=joiner_model,
|
| 1323 |
+
num_threads=2,
|
| 1324 |
+
sample_rate=16000,
|
| 1325 |
+
feature_dim=80,
|
| 1326 |
+
decoding_method=decoding_method,
|
| 1327 |
+
max_active_paths=num_active_paths,
|
| 1328 |
+
)
|
| 1329 |
+
|
| 1330 |
+
return recognizer
|
| 1331 |
+
|
| 1332 |
+
|
| 1333 |
@lru_cache(maxsize=10)
|
| 1334 |
def _get_sherpa_onnx_nemo_transducer_models_int8(
|
| 1335 |
repo_id: str,
|
|
|
|
| 2207 |
"csukuangfj/sherpa-onnx-nemo-ctc-giga-am-russian-2024-10-24": _get_russian_pre_trained_model_ctc,
|
| 2208 |
"alphacep/vosk-model-ru": _get_russian_pre_trained_model,
|
| 2209 |
"alphacep/vosk-model-small-ru": _get_russian_pre_trained_model,
|
| 2210 |
+
"csukuangfj/sherpa-onnx-streaming-zipformer-small-ru-vosk-int8-2025-08-16": _get_streaming_vosk_pre_trained_model,
|
| 2211 |
+
"csukuangfj/sherpa-onnx-streaming-zipformer-small-ru-vosk-2025-08-16": _get_streaming_vosk_pre_trained_model,
|
| 2212 |
}
|
| 2213 |
|
| 2214 |
chinese_cantonese_english_models = {
|