csukuangfj commited on
Commit
7ee0369
·
1 Parent(s): d6e8fba

add funasr nano

Browse files
Files changed (1) hide show
  1. model.py +32 -2
model.py CHANGED
@@ -21,6 +21,8 @@ from typing import Union
21
  import torch
22
  import torchaudio
23
  from huggingface_hub import hf_hub_download
 
 
24
 
25
  os.system("find / -name libk2*.so 2>/dev/null")
26
 
@@ -2062,6 +2064,35 @@ def _get_chinese_dialect_models(
2062
  return recognizer
2063
 
2064
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2065
  @lru_cache(maxsize=10)
2066
  def _get_sense_voice_pre_trained_model(
2067
  repo_id: str,
@@ -2323,8 +2354,7 @@ english_models = {
2323
  }
2324
 
2325
  funsar_nano_31_languages_models = {
2326
- "csukuangfj/sherpa-onnx-sense-voice-funasr-nano-int8-2025-12-17": _get_sense_voice_pre_trained_model,
2327
- "csukuangfj/sherpa-onnx-sense-voice-funasr-nano-2025-12-17": _get_sense_voice_pre_trained_model,
2328
  }
2329
 
2330
  medical_english_models = {
 
21
  import torch
22
  import torchaudio
23
  from huggingface_hub import hf_hub_download
24
+ from huggingface_hub import snapshot_download
25
+
26
 
27
  os.system("find / -name libk2*.so 2>/dev/null")
28
 
 
2064
  return recognizer
2065
 
2066
 
2067
+ @lru_cache(maxsize=10)
2068
+ def _get_funasr_nano(
2069
+ repo_id: str,
2070
+ decoding_method: str,
2071
+ num_active_paths: int,
2072
+ ) -> sherpa_onnx.OfflineRecognizer:
2073
+ assert repo_id in [
2074
+ "csukuangfj/sherpa-onnx-funasr-nano-int8-2025-12-30",
2075
+ ], repo_id
2076
+
2077
+ local_dir = snapshot_download(repo_id)
2078
+
2079
+ encoder_adaptor = f"{local_dir}/encoder_adaptor.int8.onnx"
2080
+ llm_prefill = f"{local_dir}/llm_prefill.int8.onnx"
2081
+ llm_decode = f"{local_dir}/llm_decode.int8.onnx"
2082
+ embedding = f"{local_dir}/embedding.int8.onnx"
2083
+ tokenizer = f"{local_dir}/Qwen3-0.6B"
2084
+
2085
+ return sherpa_onnx.OfflineRecognizer.from_funasr_nano(
2086
+ encoder_adaptor=encoder_adaptor,
2087
+ llm_prefill=llm_prefill,
2088
+ llm_decode=llm_decode,
2089
+ embedding=embedding,
2090
+ tokenizer=tokenizer,
2091
+ num_threads=2,
2092
+ debug=True,
2093
+ )
2094
+
2095
+
2096
  @lru_cache(maxsize=10)
2097
  def _get_sense_voice_pre_trained_model(
2098
  repo_id: str,
 
2354
  }
2355
 
2356
  funsar_nano_31_languages_models = {
2357
+ "csukuangfj/sherpa-onnx-funasr-nano-int8-2025-12-30": _get_funasr_nano,
 
2358
  }
2359
 
2360
  medical_english_models = {