FireRed Team commited on
Commit
f3bc9f0
·
verified ·
1 Parent(s): 92dd882

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +22 -6
app.py CHANGED
@@ -9,6 +9,7 @@ from fireredasr2s import FireRedAsr2System, FireRedAsr2SystemConfig
9
  from fireredasr2s.fireredasr2.asr import FireRedAsr2, FireRedAsr2Config
10
  from fireredasr2s.fireredvad.vad import FireRedVad, FireRedVadConfig
11
  from fireredasr2s.fireredvad.aed import FireRedAed, FireRedAedConfig
 
12
 
13
 
14
  asr_system = None
@@ -16,6 +17,7 @@ asr_model_aed = None
16
  asr_model_llm = None
17
  vad_model = None
18
  aed_model = None
 
19
 
20
 
21
  def init_model(model_dir_aed, model_dir_llm):
@@ -24,6 +26,7 @@ def init_model(model_dir_aed, model_dir_llm):
24
  global asr_model_llm
25
  global vad_model
26
  global aed_model
 
27
  if asr_system is None:
28
  asr_system_config = FireRedAsr2SystemConfig() # Use default config
29
  asr_system = FireRedAsr2System(asr_system_config)
@@ -61,7 +64,8 @@ def init_model(model_dir_aed, model_dir_llm):
61
  extend_speech_frame=0,
62
  chunk_max_frame=30000)
63
  vad_model = FireRedVad.from_pretrained("pretrained_models/FireRedVAD/VAD", vad_config)
64
- aed_config=FireRedAedConfig(
 
65
  use_gpu=False,
66
  smooth_window_size=5,
67
  speech_threshold=0.4,
@@ -74,7 +78,17 @@ def init_model(model_dir_aed, model_dir_llm):
74
  extend_speech_frame=0,
75
  chunk_max_frame=30000)
76
  aed_model = FireRedAed.from_pretrained("pretrained_models/FireRedVAD/AED", aed_config)
77
-
 
 
 
 
 
 
 
 
 
 
78
 
79
 
80
  @spaces.GPU(duration=20)
@@ -118,11 +132,13 @@ def asr_inference_llm(audio_file):
118
  def vad_inference(audio_file):
119
  if not audio_file:
120
  return "Please upload a wav file"
121
- result, probs = vad.detect(audio_file)
122
  s = f'Duration: {result["dur"]}s'
123
- s += f'\nVoice: {result["timestamps"]}'
124
- result, probs = aed.detect(audio_file)
125
- s += f'\nEvent: {results["event2ratio"]}\n {result["event2timestamps"]}'
 
 
126
  return s
127
 
128
 
 
9
  from fireredasr2s.fireredasr2.asr import FireRedAsr2, FireRedAsr2Config
10
  from fireredasr2s.fireredvad.vad import FireRedVad, FireRedVadConfig
11
  from fireredasr2s.fireredvad.aed import FireRedAed, FireRedAedConfig
12
+ from fireredasr2s.fireredvad.stream_vad import FireRedStreamVad, FireRedStreamVadConfig
13
 
14
 
15
  asr_system = None
 
17
  asr_model_llm = None
18
  vad_model = None
19
  aed_model = None
20
+ stream_vad_model = None
21
 
22
 
23
  def init_model(model_dir_aed, model_dir_llm):
 
26
  global asr_model_llm
27
  global vad_model
28
  global aed_model
29
+ global stream_vad_model
30
  if asr_system is None:
31
  asr_system_config = FireRedAsr2SystemConfig() # Use default config
32
  asr_system = FireRedAsr2System(asr_system_config)
 
64
  extend_speech_frame=0,
65
  chunk_max_frame=30000)
66
  vad_model = FireRedVad.from_pretrained("pretrained_models/FireRedVAD/VAD", vad_config)
67
+ if aed_model is None:
68
+ aed_config = FireRedAedConfig(
69
  use_gpu=False,
70
  smooth_window_size=5,
71
  speech_threshold=0.4,
 
78
  extend_speech_frame=0,
79
  chunk_max_frame=30000)
80
  aed_model = FireRedAed.from_pretrained("pretrained_models/FireRedVAD/AED", aed_config)
81
+ if stream_vad_model is None:
82
+ vad_config = FireRedStreamVadConfig(
83
+ use_gpu=False,
84
+ smooth_window_size=5,
85
+ speech_threshold=0.4,
86
+ pad_start_frame=5,
87
+ min_speech_frame=8,
88
+ max_speech_frame=2000,
89
+ min_silence_frame=20,
90
+ chunk_max_frame=30000)
91
+ stream_vad_model = FireRedStreamVad.from_pretrained("pretrained_models/FireRedVAD/Stream-VAD", vad_config)
92
 
93
 
94
  @spaces.GPU(duration=20)
 
132
  def vad_inference(audio_file):
133
  if not audio_file:
134
  return "Please upload a wav file"
135
+ result, probs = vad_model.detect(audio_file)
136
  s = f'Duration: {result["dur"]}s'
137
+ s += f'\nVAD: {result["timestamps"]}'
138
+ result, probs = stream_vad_model.detect_full(audio_file)
139
+ s += f'\nStream VAD: {result["timestamps"]}'
140
+ result, probs = aed_model.detect(audio_file)
141
+ s += f'\nAudio Event: {results["event2ratio"]}\n {result["event2timestamps"]}'
142
  return s
143
 
144