Konstantin Dorichev commited on
Commit
a2f7538
·
unverified ·
1 Parent(s): 0858cfd

Upload audio

Browse files
Files changed (1) hide show
  1. app.py +6 -5
app.py CHANGED
@@ -4,13 +4,14 @@ import numpy as np
4
 
5
  transcriber = pipeline("automatic-speech-recognition", model="openai/whisper-base.en")
6
 
 
7
  def transcribe(stream, new_chunk):
8
  sr, y = new_chunk
9
-
10
  # Convert to mono if stereo
11
  if y.ndim > 1:
12
  y = y.mean(axis=1)
13
-
14
  y = y.astype(np.float32)
15
  y /= np.max(np.abs(y))
16
 
@@ -18,14 +19,14 @@ def transcribe(stream, new_chunk):
18
  stream = np.concatenate([stream, y])
19
  else:
20
  stream = y
21
- return stream, transcriber({"sampling_rate": sr, "raw": stream})["text"]
 
22
 
23
  demo = gr.Interface(
24
  transcribe,
25
- ["state", gr.Audio(sources=["microphone"], streaming=True)],
26
  ["state", "text"],
27
  live=True,
28
  )
29
 
30
  demo.launch()
31
-
 
4
 
5
  transcriber = pipeline("automatic-speech-recognition", model="openai/whisper-base.en")
6
 
7
+
8
  def transcribe(stream, new_chunk):
9
  sr, y = new_chunk
10
+
11
  # Convert to mono if stereo
12
  if y.ndim > 1:
13
  y = y.mean(axis=1)
14
+
15
  y = y.astype(np.float32)
16
  y /= np.max(np.abs(y))
17
 
 
19
  stream = np.concatenate([stream, y])
20
  else:
21
  stream = y
22
+ return stream, transcriber({"sampling_rate": sr, "raw": stream})["text"]
23
+
24
 
25
  demo = gr.Interface(
26
  transcribe,
27
+ ["state", gr.Audio(sources=["upload"], streaming=True)],
28
  ["state", "text"],
29
  live=True,
30
  )
31
 
32
  demo.launch()