Commit
·
d6e8fba
1
Parent(s):
4c746d7
Add MedASR
Browse files- model.py +41 -0
- requirements.txt +2 -2
model.py
CHANGED
|
@@ -182,6 +182,10 @@ def get_pretrained_model(
|
|
| 182 |
return funsar_nano_31_languages_models[repo_id](
|
| 183 |
repo_id, decoding_method=decoding_method, num_active_paths=num_active_paths
|
| 184 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 185 |
elif repo_id in twenty_five_languages_models:
|
| 186 |
return twenty_five_languages_models[repo_id](
|
| 187 |
repo_id, decoding_method=decoding_method, num_active_paths=num_active_paths
|
|
@@ -1425,6 +1429,36 @@ def _get_sherpa_onnx_omnilingual_asr_models(
|
|
| 1425 |
return recognizer
|
| 1426 |
|
| 1427 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1428 |
@lru_cache(maxsize=10)
|
| 1429 |
def _get_sherpa_onnx_nemo_transducer_models_int8(
|
| 1430 |
repo_id: str,
|
|
@@ -2293,6 +2327,11 @@ funsar_nano_31_languages_models = {
|
|
| 2293 |
"csukuangfj/sherpa-onnx-sense-voice-funasr-nano-2025-12-17": _get_sense_voice_pre_trained_model,
|
| 2294 |
}
|
| 2295 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2296 |
more_than_1600_languages_models = {
|
| 2297 |
"csukuangfj/sherpa-onnx-omnilingual-asr-1600-languages-300M-ctc-int8-2025-11-12": _get_sherpa_onnx_omnilingual_asr_models,
|
| 2298 |
"csukuangfj/sherpa-onnx-omnilingual-asr-1600-languages-300M-ctc-2025-11-12": _get_sherpa_onnx_omnilingual_asr_models,
|
|
@@ -2414,6 +2453,7 @@ portuguese_brazlian_models = {
|
|
| 2414 |
all_models = {
|
| 2415 |
**funsar_nano_31_languages_models,
|
| 2416 |
**more_than_1600_languages_models,
|
|
|
|
| 2417 |
**twenty_five_languages_models,
|
| 2418 |
**multi_lingual_models,
|
| 2419 |
**sichuan_models,
|
|
@@ -2437,6 +2477,7 @@ all_models = {
|
|
| 2437 |
}
|
| 2438 |
|
| 2439 |
language_to_models = {
|
|
|
|
| 2440 |
"31 languages (FunASR Nano)": list(funsar_nano_31_languages_models.keys()),
|
| 2441 |
"1600+ languages": list(more_than_1600_languages_models.keys()),
|
| 2442 |
"25 European languages": list(twenty_five_languages_models.keys()),
|
|
|
|
| 182 |
return funsar_nano_31_languages_models[repo_id](
|
| 183 |
repo_id, decoding_method=decoding_method, num_active_paths=num_active_paths
|
| 184 |
)
|
| 185 |
+
elif repo_id in medical_english_models:
|
| 186 |
+
return medical_english_models[repo_id](
|
| 187 |
+
repo_id, decoding_method=decoding_method, num_active_paths=num_active_paths
|
| 188 |
+
)
|
| 189 |
elif repo_id in twenty_five_languages_models:
|
| 190 |
return twenty_five_languages_models[repo_id](
|
| 191 |
repo_id, decoding_method=decoding_method, num_active_paths=num_active_paths
|
|
|
|
| 1429 |
return recognizer
|
| 1430 |
|
| 1431 |
|
| 1432 |
+
@lru_cache(maxsize=10)
|
| 1433 |
+
def _get_medasr_model(
|
| 1434 |
+
repo_id: str,
|
| 1435 |
+
decoding_method: str,
|
| 1436 |
+
num_active_paths: int,
|
| 1437 |
+
) -> sherpa_onnx.OfflineRecognizer:
|
| 1438 |
+
assert repo_id in [
|
| 1439 |
+
"csukuangfj/sherpa-onnx-medasr-ctc-en-int8-2025-12-25",
|
| 1440 |
+
"csukuangfj/sherpa-onnx-medasr-ctc-en-2025-12-25",
|
| 1441 |
+
], repo_id
|
| 1442 |
+
|
| 1443 |
+
is_int8 = "int8" in repo_id
|
| 1444 |
+
|
| 1445 |
+
model = _get_nn_model_filename(
|
| 1446 |
+
repo_id=repo_id,
|
| 1447 |
+
filename="model.int8.onnx" if is_int8 else "model.onnx",
|
| 1448 |
+
subfolder=".",
|
| 1449 |
+
)
|
| 1450 |
+
|
| 1451 |
+
tokens = _get_token_filename(repo_id=repo_id, subfolder=".")
|
| 1452 |
+
|
| 1453 |
+
recognizer = sherpa_onnx.OfflineRecognizer.from_medasr_ctc(
|
| 1454 |
+
tokens=tokens,
|
| 1455 |
+
model=model,
|
| 1456 |
+
num_threads=2,
|
| 1457 |
+
)
|
| 1458 |
+
|
| 1459 |
+
return recognizer
|
| 1460 |
+
|
| 1461 |
+
|
| 1462 |
@lru_cache(maxsize=10)
|
| 1463 |
def _get_sherpa_onnx_nemo_transducer_models_int8(
|
| 1464 |
repo_id: str,
|
|
|
|
| 2327 |
"csukuangfj/sherpa-onnx-sense-voice-funasr-nano-2025-12-17": _get_sense_voice_pre_trained_model,
|
| 2328 |
}
|
| 2329 |
|
| 2330 |
+
medical_english_models = {
|
| 2331 |
+
"csukuangfj/sherpa-onnx-medasr-ctc-en-int8-2025-12-25": _get_medasr_model,
|
| 2332 |
+
"csukuangfj/sherpa-onnx-medasr-ctc-en-2025-12-25": _get_medasr_model,
|
| 2333 |
+
}
|
| 2334 |
+
|
| 2335 |
more_than_1600_languages_models = {
|
| 2336 |
"csukuangfj/sherpa-onnx-omnilingual-asr-1600-languages-300M-ctc-int8-2025-11-12": _get_sherpa_onnx_omnilingual_asr_models,
|
| 2337 |
"csukuangfj/sherpa-onnx-omnilingual-asr-1600-languages-300M-ctc-2025-11-12": _get_sherpa_onnx_omnilingual_asr_models,
|
|
|
|
| 2453 |
all_models = {
|
| 2454 |
**funsar_nano_31_languages_models,
|
| 2455 |
**more_than_1600_languages_models,
|
| 2456 |
+
**medical_english_models,
|
| 2457 |
**twenty_five_languages_models,
|
| 2458 |
**multi_lingual_models,
|
| 2459 |
**sichuan_models,
|
|
|
|
| 2477 |
}
|
| 2478 |
|
| 2479 |
language_to_models = {
|
| 2480 |
+
"Medical dictation English": list(medical_english_models.keys()),
|
| 2481 |
"31 languages (FunASR Nano)": list(funsar_nano_31_languages_models.keys()),
|
| 2482 |
"1600+ languages": list(more_than_1600_languages_models.keys()),
|
| 2483 |
"25 European languages": list(twenty_five_languages_models.keys()),
|
requirements.txt
CHANGED
|
@@ -10,7 +10,7 @@ numpy<2
|
|
| 10 |
|
| 11 |
huggingface_hub
|
| 12 |
|
| 13 |
-
https://huggingface.co/csukuangfj/sherpa-onnx-wheels/resolve/main/cpu/1.12.
|
| 14 |
-
https://huggingface.co/csukuangfj/sherpa-onnx-wheels/resolve/main/cpu/1.12.
|
| 15 |
|
| 16 |
#sherpa-onnx>=1.12.6
|
|
|
|
| 10 |
|
| 11 |
huggingface_hub
|
| 12 |
|
| 13 |
+
https://huggingface.co/csukuangfj/sherpa-onnx-wheels/resolve/main/cpu/1.12.20/sherpa_onnx_core-1.12.20-py3-none-manylinux2014_x86_64.whl
|
| 14 |
+
https://huggingface.co/csukuangfj/sherpa-onnx-wheels/resolve/main/cpu/1.12.20/sherpa_onnx-1.12.20-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl
|
| 15 |
|
| 16 |
#sherpa-onnx>=1.12.6
|