liumaolin commited on
Commit
ac62229
·
1 Parent(s): 2988b10

Using FunASR quantized model.

Browse files
models/asr/punc_ct-transformer_cn-en-common-vocab471067-large/{model.pt → model_quant.onnx} RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7176cae922a872e130e6b88aef9a1153581711baf79c9124c7c95be383cd6f81
3
- size 1125507622
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:562c037651ca2872d2f0edeffd324cacd0dd1df54bf3fafa46fcbd04ab8874b6
3
+ size 1011846845
models/asr/speech_seaco_paraformer_large_asr_nat-zh-cn-16k-common-vocab8404-pytorch/{model.pt → model_eb_quant.onnx} RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3d491689244ec5dfbf9170ef3827c358aa10f1f20e42a7c59e15e688647946d1
3
- size 989763045
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:967e4f4f82706df80efb72a3b9f9ecc6e5d4e46ddb11eaf6837ae17ec4e04b06
3
+ size 34028133
models/asr/speech_seaco_paraformer_large_asr_nat-zh-cn-16k-common-vocab8404-pytorch/model_quant.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1382593b11a18e0872ee149542496cbf6d4881aaeaba93a3de97bfa1cb5e7626
3
+ size 345139149
src/VoiceDialogue/services/speech/asr_service.py CHANGED
@@ -1,10 +1,11 @@
 
1
  import time
2
  import typing
3
  from queue import Queue
4
 
5
  import librosa
6
  import numpy as np
7
- from funasr import AutoModel
8
  from pywhispercpp.model import Model
9
 
10
  from config import paths
@@ -101,30 +102,34 @@ class FunASRClient:
101
  def __init__(self):
102
  # 设置模型缓存目录
103
  models_dir = paths.MODELS_PATH / "asr"
104
- asr_model_path = (
105
- models_dir
106
- / "speech_seaco_paraformer_large_asr_nat-zh-cn-16k-common-vocab8404-pytorch"
107
- )
108
- vad_model_path = models_dir / "speech_fsmn_vad_zh-cn-16k-common-pytorch"
109
- punc_model_path = (
110
- models_dir / "punc_ct-transformer_cn-en-common-vocab471067-large"
111
- )
112
- self.funasr_model = AutoModel(
113
- model=asr_model_path,
114
- vad_model=vad_model_path.as_posix(),
115
- punc_model=punc_model_path.as_posix(),
116
- log_level="ERROR",
117
- disable_update=True,
118
- )
 
 
119
 
120
  def transcribe(self, audio_array: np.ndarray, language="auto"):
121
  audio_array = ensure_minimum_audio_duration(audio_array)
122
 
123
- segments = self.funasr_model.generate(input=audio_array, disable_pbar=True)
124
 
125
  transcibed_texts = []
126
  for segment in segments:
127
- content = segment.get("text", "")
 
 
128
  transcibed_texts.append(content)
129
  return " ".join(transcibed_texts)
130
 
@@ -140,7 +145,6 @@ class UnifiedASRClient:
140
  else:
141
  self.client = WhisperCppClient()
142
 
143
-
144
  def warmup(self):
145
  """预热模型"""
146
  print('[INFO] 预热语音识别模型...')
@@ -207,5 +211,3 @@ class ASRWorker(BaseThread):
207
 
208
  voice_task.user_voice = []
209
  self.transcribed_text_queue.put(voice_task)
210
-
211
-
 
1
+ import re
2
  import time
3
  import typing
4
  from queue import Queue
5
 
6
  import librosa
7
  import numpy as np
8
+ from funasr_onnx import SeacoParaformer, CT_Transformer
9
  from pywhispercpp.model import Model
10
 
11
  from config import paths
 
102
  def __init__(self):
103
  # 设置模型缓存目录
104
  models_dir = paths.MODELS_PATH / "asr"
105
+ asr_model_path = models_dir / "speech_seaco_paraformer_large_asr_nat-zh-cn-16k-common-vocab8404-pytorch"
106
+ punc_model_path = models_dir / "punc_ct-transformer_cn-en-common-vocab471067-large"
107
+ self.funasr_model = SeacoParaformer(asr_model_path, quantize=True)
108
+ self.punc_model = CT_Transformer(punc_model_path, quantize=True)
109
+
110
+ def _fix_spaced_uppercase(self, text: str) -> str:
111
+ """
112
+ 修复类似 " G N O M E " 这样的大写字母间有空格的字符串,将其替换为 "GNOME"
113
+ """
114
+ # 匹配大写字母之间的空格模式,至少2个大写字母
115
+ pattern = r'([A-Z])\s+([A-Z](?:\s+[A-Z])*)'
116
+
117
+ def replace_func(match):
118
+ # 移除所有空格
119
+ return match.group(0).replace(' ', '')
120
+
121
+ return re.sub(pattern, replace_func, text)
122
 
123
  def transcribe(self, audio_array: np.ndarray, language="auto"):
124
  audio_array = ensure_minimum_audio_duration(audio_array)
125
 
126
+ segments = self.funasr_model(wav_content=audio_array, hotwords='')
127
 
128
  transcibed_texts = []
129
  for segment in segments:
130
+ content = segment.get("preds", "")
131
+ content, _ = self.punc_model(content)
132
+ content = self._fix_spaced_uppercase(content)
133
  transcibed_texts.append(content)
134
  return " ".join(transcibed_texts)
135
 
 
145
  else:
146
  self.client = WhisperCppClient()
147
 
 
148
  def warmup(self):
149
  """预热模型"""
150
  print('[INFO] 预热语音识别模型...')
 
211
 
212
  voice_task.user_voice = []
213
  self.transcribed_text_queue.put(voice_task)