inoryQwQ commited on
Commit
b3b007c
·
1 Parent(s): d1ae526

fix server and gradio

Browse files
Files changed (3) hide show
  1. SenseVoiceAx.py +6 -6
  2. gradio_demo.py +9 -9
  3. server.py +5 -9
SenseVoiceAx.py CHANGED
@@ -69,8 +69,8 @@ def unique_consecutive_np(arr):
69
 
70
 
71
  class SenseVoiceAx:
72
- """ SenseVoice axmodel runner """
73
-
74
  def __init__(
75
  self,
76
  model_path: str,
@@ -89,13 +89,13 @@ class SenseVoiceAx:
89
  max_len: Fixed shape of input of axmodel
90
  beam_size: Max number of hypos to hold after each decode step
91
  language: Support auto, zh(Chinese), en(English), yue(Cantonese), ja(Japanese), ko(Korean)
92
- hot_words: Words that may fail to recognize,
93
- special words/phrases (aka hotwords) like rare words, personalized information etc.
94
- use_itn: Allow Invert Text Normalization if True,
95
  ITN converts ASR model output into its written form to improve text readability,
96
  For example, the ITN module replaces “one hundred and twenty-three dollars” transcribed by an ASR model with “$123.”
97
  streaming: Processes audio in small segments or "chunks" sequentially and outputs text on the fly.
98
- Use stream_infer method if streaming is true otherwise infer.
99
 
100
  """
101
  model_path_root = os.path.dirname(model_path)
 
69
 
70
 
71
  class SenseVoiceAx:
72
+ """SenseVoice axmodel runner"""
73
+
74
  def __init__(
75
  self,
76
  model_path: str,
 
89
  max_len: Fixed shape of input of axmodel
90
  beam_size: Max number of hypos to hold after each decode step
91
  language: Support auto, zh(Chinese), en(English), yue(Cantonese), ja(Japanese), ko(Korean)
92
+ hot_words: Words that may fail to recognize,
93
+ special words/phrases (aka hotwords) like rare words, personalized information etc.
94
+ use_itn: Allow Invert Text Normalization if True,
95
  ITN converts ASR model output into its written form to improve text readability,
96
  For example, the ITN module replaces “one hundred and twenty-three dollars” transcribed by an ASR model with “$123.”
97
  streaming: Processes audio in small segments or "chunks" sequentially and outputs text on the fly.
98
+ Use stream_infer method if streaming is true otherwise infer.
99
 
100
  """
101
  model_path_root = os.path.dirname(model_path)
gradio_demo.py CHANGED
@@ -1,21 +1,22 @@
1
  import gradio as gr
2
  import os
3
  from SenseVoiceAx import SenseVoiceAx
4
- from tokenizer import SentencepiecesTokenizer
5
  from print_utils import rich_transcription_postprocess
6
- from download_utils import download_model
7
 
8
- use_itn = True # 标点符号预测
9
  max_len = 256
10
 
11
  model_path = os.path.join("sensevoice_ax650", "sensevoice.axmodel")
12
- bpemodel = "chn_jpn_yue_eng_ko_spectok.bpe.model"
13
 
14
  assert os.path.exists(model_path), f"model {model_path} not exist"
15
 
16
- tokenizer = SentencepiecesTokenizer(bpemodel=bpemodel)
17
  pipeline = SenseVoiceAx(
18
- model_path, max_len=max_len, language="auto", use_itn=use_itn, tokenizer=tokenizer
 
 
 
 
 
 
19
  )
20
 
21
 
@@ -28,10 +29,9 @@ def speech_to_text(audio_path, lang):
28
  return "无音频"
29
 
30
  pipeline.choose_language(language=lang)
31
- asr_res = pipeline.infer(audio_path, print_rtf=True)
32
- res = " ".join([rich_transcription_postprocess(i) for i in asr_res])
33
 
34
- return res
35
 
36
 
37
  def main():
 
1
  import gradio as gr
2
  import os
3
  from SenseVoiceAx import SenseVoiceAx
 
4
  from print_utils import rich_transcription_postprocess
 
5
 
 
6
  max_len = 256
7
 
8
  model_path = os.path.join("sensevoice_ax650", "sensevoice.axmodel")
 
9
 
10
  assert os.path.exists(model_path), f"model {model_path} not exist"
11
 
 
12
  pipeline = SenseVoiceAx(
13
+ model_path,
14
+ max_len=max_len,
15
+ beam_size=3,
16
+ language="auto",
17
+ hot_words=None,
18
+ use_itn=True,
19
+ streaming=False,
20
  )
21
 
22
 
 
29
  return "无音频"
30
 
31
  pipeline.choose_language(language=lang)
32
+ asr_res = pipeline.infer(audio_path, print_rtf=False)
 
33
 
34
+ return asr_res
35
 
36
 
37
  def main():
server.py CHANGED
@@ -3,11 +3,7 @@ from fastapi import FastAPI, HTTPException, Body
3
  from fastapi.responses import JSONResponse
4
  from typing import List, Optional
5
  import logging
6
- import json
7
  from SenseVoiceAx import SenseVoiceAx
8
- from tokenizer import SentencepiecesTokenizer
9
- from print_utils import rich_transcription_postprocess, rich_print_asr_res
10
- from download_utils import download_model
11
  import os
12
  import librosa
13
 
@@ -32,11 +28,10 @@ async def load_model():
32
  try:
33
  # 模型加载
34
  language = "auto"
35
- use_itn = True # 标点符号预测
36
  max_len = 256
37
 
38
  model_path = os.path.join("sensevoice_ax650", "sensevoice.axmodel")
39
- bpemodel = "chn_jpn_yue_eng_ko_spectok.bpe.model"
40
 
41
  assert os.path.exists(model_path), f"model {model_path} not exist"
42
 
@@ -44,13 +39,14 @@ async def load_model():
44
  print(f"use_itn: {use_itn}")
45
  print(f"model_path: {model_path}")
46
 
47
- tokenizer = SentencepiecesTokenizer(bpemodel=bpemodel)
48
  asr_model = SenseVoiceAx(
49
  model_path,
50
  max_len=max_len,
51
- language=language,
 
 
52
  use_itn=use_itn,
53
- tokenizer=tokenizer,
54
  )
55
 
56
  logger.info("ASR model loaded successfully")
 
3
  from fastapi.responses import JSONResponse
4
  from typing import List, Optional
5
  import logging
 
6
  from SenseVoiceAx import SenseVoiceAx
 
 
 
7
  import os
8
  import librosa
9
 
 
28
  try:
29
  # 模型加载
30
  language = "auto"
31
+ use_itn = True # 逆文本规范
32
  max_len = 256
33
 
34
  model_path = os.path.join("sensevoice_ax650", "sensevoice.axmodel")
 
35
 
36
  assert os.path.exists(model_path), f"model {model_path} not exist"
37
 
 
39
  print(f"use_itn: {use_itn}")
40
  print(f"model_path: {model_path}")
41
 
 
42
  asr_model = SenseVoiceAx(
43
  model_path,
44
  max_len=max_len,
45
+ beam_size=3,
46
+ language="auto",
47
+ hot_words=None,
48
  use_itn=use_itn,
49
+ streaming=False,
50
  )
51
 
52
  logger.info("ASR model loaded successfully")