Run on GPU
Browse files
app.py
CHANGED
|
@@ -9,8 +9,8 @@ import gradio as gr
|
|
| 9 |
import requests
|
| 10 |
|
| 11 |
# UNCOMMENT TO USE WHISPER
|
| 12 |
-
|
| 13 |
-
|
| 14 |
|
| 15 |
from langchain import ConversationChain, LLMChain
|
| 16 |
|
|
@@ -77,29 +77,29 @@ WHISPER_DETECT_LANG = "Detect language"
|
|
| 77 |
|
| 78 |
|
| 79 |
# UNCOMMENT TO USE WHISPER
|
| 80 |
-
|
| 81 |
-
|
| 82 |
-
|
| 83 |
|
| 84 |
|
| 85 |
# UNCOMMENT TO USE WHISPER
|
| 86 |
-
|
| 87 |
-
|
| 88 |
-
|
| 89 |
-
|
| 90 |
-
|
| 91 |
-
|
| 92 |
-
|
| 93 |
-
|
| 94 |
-
|
| 95 |
-
|
| 96 |
-
|
| 97 |
-
|
| 98 |
-
|
| 99 |
-
|
| 100 |
-
|
| 101 |
-
|
| 102 |
-
|
| 103 |
|
| 104 |
# TEMPORARY FOR TESTING
|
| 105 |
def transcribe_dummy(aud_inp_tb, whisper_lang):
|
|
@@ -619,10 +619,10 @@ with gr.Blocks(css=".gradio-container {background-color: lightgray}") as block:
|
|
| 619 |
submit = gr.Button(value="Send", variant="secondary").style(full_width=False)
|
| 620 |
|
| 621 |
# UNCOMMENT TO USE WHISPER
|
| 622 |
-
|
| 623 |
-
|
| 624 |
-
|
| 625 |
-
|
| 626 |
|
| 627 |
# TEMPORARY FOR TESTING
|
| 628 |
# with gr.Row():
|
|
|
|
| 9 |
import requests
|
| 10 |
|
| 11 |
# UNCOMMENT TO USE WHISPER
|
| 12 |
+
import warnings
|
| 13 |
+
import whisper
|
| 14 |
|
| 15 |
from langchain import ConversationChain, LLMChain
|
| 16 |
|
|
|
|
| 77 |
|
| 78 |
|
| 79 |
# UNCOMMENT TO USE WHISPER
|
| 80 |
+
warnings.filterwarnings("ignore")
|
| 81 |
+
WHISPER_MODEL = whisper.load_model("tiny")
|
| 82 |
+
print("WHISPER_MODEL", WHISPER_MODEL)
|
| 83 |
|
| 84 |
|
| 85 |
# UNCOMMENT TO USE WHISPER
|
| 86 |
+
def transcribe(aud_inp, whisper_lang):
|
| 87 |
+
if aud_inp is None:
|
| 88 |
+
return ""
|
| 89 |
+
aud = whisper.load_audio(aud_inp)
|
| 90 |
+
aud = whisper.pad_or_trim(aud)
|
| 91 |
+
mel = whisper.log_mel_spectrogram(aud).to(WHISPER_MODEL.device)
|
| 92 |
+
_, probs = WHISPER_MODEL.detect_language(mel)
|
| 93 |
+
options = whisper.DecodingOptions()
|
| 94 |
+
if whisper_lang != WHISPER_DETECT_LANG:
|
| 95 |
+
whisper_lang_code = POLLY_VOICE_DATA.get_whisper_lang_code(whisper_lang)
|
| 96 |
+
options = whisper.DecodingOptions(language=whisper_lang_code)
|
| 97 |
+
result = whisper.decode(WHISPER_MODEL, mel, options)
|
| 98 |
+
print("result.text", result.text)
|
| 99 |
+
result_text = ""
|
| 100 |
+
if result and result.text:
|
| 101 |
+
result_text = result.text
|
| 102 |
+
return result_text
|
| 103 |
|
| 104 |
# TEMPORARY FOR TESTING
|
| 105 |
def transcribe_dummy(aud_inp_tb, whisper_lang):
|
|
|
|
| 619 |
submit = gr.Button(value="Send", variant="secondary").style(full_width=False)
|
| 620 |
|
| 621 |
# UNCOMMENT TO USE WHISPER
|
| 622 |
+
with gr.Row():
|
| 623 |
+
audio_comp = gr.Microphone(source="microphone", type="filepath", label="Just say it!",
|
| 624 |
+
interactive=True, streaming=False)
|
| 625 |
+
audio_comp.change(transcribe, inputs=[audio_comp, whisper_lang_state], outputs=[message])
|
| 626 |
|
| 627 |
# TEMPORARY FOR TESTING
|
| 628 |
# with gr.Row():
|