inoryQwQ commited on
Commit
fc41265
·
1 Parent(s): 0caa3bc

Add gradio app

Browse files
Files changed (4) hide show
  1. README.md +12 -1
  2. SileroOrt.py +1 -1
  3. gradio.png +0 -0
  4. gradio_app.py +47 -0
README.md CHANGED
@@ -8,13 +8,24 @@ license: mit
8
 
9
  ## Demo
10
 
 
 
11
  ```
12
  python main.py --input demo.wav --output_dir output --model silero_vad.onnx
13
  ```
14
-
15
  被分段的语音后保存在output目录中
16
 
17
 
 
 
 
 
 
 
 
 
 
 
18
  ## 在项目中使用
19
 
20
  1. 复制silero_vad.onnx SileroOrt.py StreamVAD.py 三个文件到项目中
 
8
 
9
  ## Demo
10
 
11
+ ### CLI
12
+
13
  ```
14
  python main.py --input demo.wav --output_dir output --model silero_vad.onnx
15
  ```
 
16
  被分段的语音后保存在output目录中
17
 
18
 
19
+ ### Gradio
20
+ ```
21
+ pip install gradio
22
+
23
+ python gradio_app.py
24
+ ```
25
+ ![](/gradio.png)
26
+
27
+
28
+
29
  ## 在项目中使用
30
 
31
  1. 复制silero_vad.onnx SileroOrt.py StreamVAD.py 三个文件到项目中
SileroOrt.py CHANGED
@@ -59,7 +59,7 @@ class SileroOrt:
59
 
60
  if x.shape[0] % num_samples:
61
  pad_num = num_samples - (x.shape[0] % num_samples)
62
- x = np.pad(x, ((0, pad_num)), 'constant', value=0.0)
63
 
64
  for i in range(0, x.shape[0], num_samples):
65
  wavs_batch = x[i:i+num_samples]
 
59
 
60
  if x.shape[0] % num_samples:
61
  pad_num = num_samples - (x.shape[0] % num_samples)
62
+ x = np.pad(x, ((0, pad_num)), 'constant')
63
 
64
  for i in range(0, x.shape[0], num_samples):
65
  wavs_batch = x[i:i+num_samples]
gradio.png ADDED
gradio_app.py ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from StreamVAD import StreamVAD
3
+ from dataclasses import dataclass, field
4
+
5
+
6
+ vad = StreamVAD(
7
+ 'silero_vad.onnx'
8
+ )
9
+
10
+ @dataclass
11
+ class AppState:
12
+ history: list = field(default_factory=list)
13
+
14
+
15
+ def process_audio(audio, chatbot, state):
16
+ # print(audio)
17
+ # audio is a tuple of (sample_rate, numpy int16 array)
18
+ sr, audio_data = audio
19
+ for result in vad.run(audio_data, sr):
20
+ if result:
21
+ state.history.append(
22
+ gr.ChatMessage(role='user', content=gr.Audio(
23
+ label=f"{result['start_ts']} - {result['end_ts']}",
24
+ value=(vad.model.sr, result['audio']),
25
+ waveform_options=gr.WaveformOptions(show_recording_waveform=False),
26
+ editable=False
27
+ )
28
+ ),
29
+ )
30
+
31
+
32
+ return state.history
33
+
34
+
35
+ with gr.Blocks() as demo:
36
+ state = gr.State(value=AppState())
37
+
38
+ with gr.Row():
39
+ chatbot = gr.Chatbot(type='messages')
40
+
41
+ with gr.Row():
42
+ input_audio = gr.Audio(sources=['microphone'], type='numpy', streaming=True)
43
+
44
+ # streaming process
45
+ input_audio.stream(fn=process_audio, inputs=[input_audio, chatbot, state], outputs=[chatbot])
46
+
47
+ demo.launch(debug=True)