FireRed Team commited on
Commit
c0614ee
·
verified ·
1 Parent(s): a7ae0c0

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +23 -2
  2. requirements.txt +5 -5
app.py CHANGED
@@ -18,6 +18,9 @@ def init_model(model_dir_aed, model_dir_llm):
18
  global asr_system
19
  global asr_model_aed
20
  global asr_model_llm
 
 
 
21
  if asr_model_aed is None:
22
  asr_config_aed = FireRedAsr2Config(
23
  use_gpu=True,
@@ -42,6 +45,14 @@ def init_model(model_dir_aed, model_dir_llm):
42
  asr_model_llm = FireRedAsr2.from_pretrained("llm", model_dir_llm, asr_config_llm)
43
 
44
 
 
 
 
 
 
 
 
 
45
  @spaces.GPU(duration=20)
46
  def asr_inference(audio_file):
47
  if not audio_file:
@@ -70,9 +81,9 @@ def asr_inference_llm(audio_file):
70
  return text_output
71
 
72
 
73
- with gr.Blocks(title="FireRedASR") as demo:
74
  gr.HTML(
75
- "<h1 style='text-align: center'>FireRedASR2 Demo</h1>"
76
  )
77
  gr.Markdown("Upload an audio file (wav) to get speech-to-text results.")
78
 
@@ -82,11 +93,19 @@ with gr.Blocks(title="FireRedASR") as demo:
82
  audio_file = gr.Audio(label="Upload wav file", sources=["upload"], type="filepath")
83
 
84
  with gr.Column():
 
 
85
  asr_button = gr.Button("Start Recognition (FireRedASR2-AED-L)", variant="primary")
86
  text_output = gr.Textbox(label="Model Result (FireRedASR2-AED-L)", interactive=False, lines=3, max_lines=12)
87
  asr_button_llm = gr.Button("Start Recognition (FireRedASR2-LLM-L)", variant="primary")
88
  text_output_llm = gr.Textbox(label="Model Result (FireRedASR2-LLM-L)", interactive=False, lines=3, max_lines=12)
89
 
 
 
 
 
 
 
90
  asr_button.click(
91
  fn=asr_inference,
92
  inputs=[audio_file],
@@ -106,6 +125,8 @@ if __name__ == "__main__":
106
  snapshot_download(repo_id='FireRedTeam/FireRedASR2-AED', local_dir=local_dir)
107
  local_dir_llm='pretrained_models/FireRedASR2-LLM'
108
  snapshot_download(repo_id='FireRedTeam/FireRedASR2-LLM', local_dir=local_dir_llm)
 
 
109
  # Init model
110
  init_model(local_dir, local_dir_llm)
111
  # UI
 
18
  global asr_system
19
  global asr_model_aed
20
  global asr_model_llm
21
+ if asr_system is None:
22
+ asr_system_config = FireRedAsr2SystemConfig() # Use default config
23
+ asr_system = FireRedAsr2System(asr_system_config)
24
  if asr_model_aed is None:
25
  asr_config_aed = FireRedAsr2Config(
26
  use_gpu=True,
 
45
  asr_model_llm = FireRedAsr2.from_pretrained("llm", model_dir_llm, asr_config_llm)
46
 
47
 
48
+ @spaces.GPU(duration=20)
49
+ def asr_sys_inference(audio_file):
50
+ if not audio_file:
51
+ return "Please upload a wav file"
52
+ results = asr_system.process(audio_file)
53
+ return results
54
+
55
+
56
  @spaces.GPU(duration=20)
57
  def asr_inference(audio_file):
58
  if not audio_file:
 
81
  return text_output
82
 
83
 
84
+ with gr.Blocks(title="FireRedASR2S") as demo:
85
  gr.HTML(
86
+ "<h1 style='text-align: center'>FireRedASR2S Demo</h1>"
87
  )
88
  gr.Markdown("Upload an audio file (wav) to get speech-to-text results.")
89
 
 
93
  audio_file = gr.Audio(label="Upload wav file", sources=["upload"], type="filepath")
94
 
95
  with gr.Column():
96
+ asr_sys_button = gr.Button("Start Recognition (FireRedASR2S)", variant="primary")
97
+ text_sys_output = gr.Textbox(label="Model Result (FireRedASR2S)", interactive=False, lines=3, max_lines=12)
98
  asr_button = gr.Button("Start Recognition (FireRedASR2-AED-L)", variant="primary")
99
  text_output = gr.Textbox(label="Model Result (FireRedASR2-AED-L)", interactive=False, lines=3, max_lines=12)
100
  asr_button_llm = gr.Button("Start Recognition (FireRedASR2-LLM-L)", variant="primary")
101
  text_output_llm = gr.Textbox(label="Model Result (FireRedASR2-LLM-L)", interactive=False, lines=3, max_lines=12)
102
 
103
+ asr_sys_button.click(
104
+ fn=asr_sys_inference,
105
+ inputs=[audio_file],
106
+ outputs=[text_sys_output]
107
+ )
108
+
109
  asr_button.click(
110
  fn=asr_inference,
111
  inputs=[audio_file],
 
125
  snapshot_download(repo_id='FireRedTeam/FireRedASR2-AED', local_dir=local_dir)
126
  local_dir_llm='pretrained_models/FireRedASR2-LLM'
127
  snapshot_download(repo_id='FireRedTeam/FireRedASR2-LLM', local_dir=local_dir_llm)
128
+ for name in ['FireRedVAD', 'FireRedLID', 'FireRedPunc']:
129
+ snapshot_download(repo_id=f'FireRedTeam/{name}', local_dir=f'pretrained_models/{name}')
130
  # Init model
131
  init_model(local_dir, local_dir_llm)
132
  # UI
requirements.txt CHANGED
@@ -1,11 +1,11 @@
1
- torch>=2.1.0
2
- torchaudio>=2.1.0
3
- transformers>=4.51.3
4
- numpy>=1.26.1
5
  cn2an>=0.5.23
6
  kaldiio>=2.18.0
7
  kaldi_native_fbank>=1.15
8
  sentencepiece
9
  soundfile>=0.12.1
10
  textgrid
11
- peft
 
1
+ torch==2.1.0
2
+ torchaudio==2.1.0
3
+ transformers==4.51.3
4
+ numpy==1.26.1
5
  cn2an>=0.5.23
6
  kaldiio>=2.18.0
7
  kaldi_native_fbank>=1.15
8
  sentencepiece
9
  soundfile>=0.12.1
10
  textgrid
11
+ peft==0.13.2