pratham0011 commited on
Commit
c191eff
·
verified ·
1 Parent(s): c62168c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +41 -34
app.py CHANGED
@@ -1,34 +1,41 @@
1
- import whisper
2
- import gradio as gr
3
- import time
4
-
5
- def transcribe(audio):
6
-
7
- #time.sleep(3)
8
- # load audio and pad/trim it to fit 30 seconds
9
- audio = whisper.load_audio(audio)
10
- audio = whisper.pad_or_trim(audio)
11
-
12
- # make log-Mel spectrogram and move to the same device as the model
13
- mel = whisper.log_mel_spectrogram(audio).to(model.device)
14
-
15
- # detect the spoken language
16
- _, probs = model.detect_language(mel)
17
- print(f"Detected language: {max(probs, key=probs.get)}")
18
-
19
- # decode the audio
20
- options = whisper.DecodingOptions()
21
- result = whisper.decode(model, mel, options)
22
- return result.text
23
-
24
-
25
- gr.Interface(
26
- title = 'OpenAI-Whisper Audio to Text Web UI',
27
- fn=transcribe,
28
- inputs=[
29
- gr.inputs.Audio(type="filepath")
30
- ],
31
- outputs=[
32
- "textbox"
33
- ],
34
- live=True).launch()
 
 
 
 
 
 
 
 
1
+ import whisper
2
+ import gradio as gr
3
+ import time
4
+
5
+ def transcribe(audio):
6
+
7
+ #time.sleep(3)
8
+ # load audio and pad/trim it to fit 30 seconds
9
+ audio = whisper.load_audio(audio)
10
+ audio = whisper.pad_or_trim(audio)
11
+
12
+ # make log-Mel spectrogram and move to the same device as the model
13
+ mel = whisper.log_mel_spectrogram(audio).to(model.device)
14
+
15
+ # detect the spoken language
16
+ _, probs = model.detect_language(mel)
17
+ print(f"Detected language: {max(probs, key=probs.get)}")
18
+
19
+ # decode the audio
20
+ options = whisper.DecodingOptions()
21
+ result = whisper.decode(model, mel, options)
22
+ return result.text
23
+
24
+
25
+ # gr.Interface(
26
+ # title = 'OpenAI-Whisper Audio to Text Web UI',
27
+ # fn=transcribe,
28
+ # inputs=[
29
+ # gr.inputs.Audio(type="filepath")
30
+ # ],
31
+ # outputs=[
32
+ # "textbox"
33
+ # ],
34
+ # live=True).launch()
35
+
36
+ gr.Interface(
37
+ fn=transcribe,
38
+ inputs=[gr.inputs.Audio(type="file")], # Use "file" instead of "filepath"
39
+ outputs="text",
40
+ title='OpenAI-Whisper Audio to Text Web UI',
41
+ live=True).launch()