Run on CPU
Browse files
app.py
CHANGED
|
@@ -9,8 +9,8 @@ import gradio as gr
|
|
| 9 |
import requests
|
| 10 |
|
| 11 |
# UNCOMMENT TO USE WHISPER
|
| 12 |
-
import warnings
|
| 13 |
-
import whisper
|
| 14 |
|
| 15 |
from langchain import ConversationChain, LLMChain
|
| 16 |
|
|
@@ -77,29 +77,47 @@ WHISPER_DETECT_LANG = "Detect language"
|
|
| 77 |
|
| 78 |
|
| 79 |
# UNCOMMENT TO USE WHISPER
|
| 80 |
-
warnings.filterwarnings("ignore")
|
| 81 |
-
WHISPER_MODEL = whisper.load_model("tiny")
|
| 82 |
-
print("WHISPER_MODEL", WHISPER_MODEL)
|
| 83 |
|
| 84 |
|
| 85 |
# UNCOMMENT TO USE WHISPER
|
| 86 |
-
def transcribe(aud_inp, whisper_lang):
|
| 87 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 88 |
return ""
|
| 89 |
-
aud = whisper.load_audio(aud_inp)
|
| 90 |
-
aud = whisper.pad_or_trim(aud)
|
| 91 |
-
mel = whisper.log_mel_spectrogram(aud).to(WHISPER_MODEL.device)
|
| 92 |
-
_, probs = WHISPER_MODEL.detect_language(mel)
|
| 93 |
-
options = whisper.DecodingOptions()
|
|
|
|
|
|
|
|
|
|
| 94 |
if whisper_lang != WHISPER_DETECT_LANG:
|
| 95 |
whisper_lang_code = POLLY_VOICE_DATA.get_whisper_lang_code(whisper_lang)
|
| 96 |
-
|
| 97 |
-
|
| 98 |
-
|
| 99 |
-
result_text = ""
|
| 100 |
-
if result and result.text:
|
| 101 |
-
result_text = result.text
|
| 102 |
-
return result_text
|
| 103 |
|
| 104 |
|
| 105 |
# Pertains to Express-inator functionality
|
|
@@ -601,10 +619,15 @@ with gr.Blocks(css=".gradio-container {background-color: lightgray}") as block:
|
|
| 601 |
submit = gr.Button(value="Send", variant="secondary").style(full_width=False)
|
| 602 |
|
| 603 |
# UNCOMMENT TO USE WHISPER
|
| 604 |
-
with gr.Row():
|
| 605 |
-
|
| 606 |
-
|
| 607 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 608 |
|
| 609 |
gr.Examples(
|
| 610 |
examples=["How many people live in Canada?",
|
|
|
|
| 9 |
import requests
|
| 10 |
|
| 11 |
# UNCOMMENT TO USE WHISPER
|
| 12 |
+
# import warnings
|
| 13 |
+
# import whisper
|
| 14 |
|
| 15 |
from langchain import ConversationChain, LLMChain
|
| 16 |
|
|
|
|
| 77 |
|
| 78 |
|
| 79 |
# UNCOMMENT TO USE WHISPER
|
| 80 |
+
# warnings.filterwarnings("ignore")
|
| 81 |
+
# WHISPER_MODEL = whisper.load_model("tiny")
|
| 82 |
+
# print("WHISPER_MODEL", WHISPER_MODEL)
|
| 83 |
|
| 84 |
|
| 85 |
# UNCOMMENT TO USE WHISPER
|
| 86 |
+
# def transcribe(aud_inp, whisper_lang):
|
| 87 |
+
# if aud_inp is None:
|
| 88 |
+
# return ""
|
| 89 |
+
# aud = whisper.load_audio(aud_inp)
|
| 90 |
+
# aud = whisper.pad_or_trim(aud)
|
| 91 |
+
# mel = whisper.log_mel_spectrogram(aud).to(WHISPER_MODEL.device)
|
| 92 |
+
# _, probs = WHISPER_MODEL.detect_language(mel)
|
| 93 |
+
# options = whisper.DecodingOptions()
|
| 94 |
+
# if whisper_lang != WHISPER_DETECT_LANG:
|
| 95 |
+
# whisper_lang_code = POLLY_VOICE_DATA.get_whisper_lang_code(whisper_lang)
|
| 96 |
+
# options = whisper.DecodingOptions(language=whisper_lang_code)
|
| 97 |
+
# result = whisper.decode(WHISPER_MODEL, mel, options)
|
| 98 |
+
# print("result.text", result.text)
|
| 99 |
+
# result_text = ""
|
| 100 |
+
# if result and result.text:
|
| 101 |
+
# result_text = result.text
|
| 102 |
+
# return result_text
|
| 103 |
+
|
| 104 |
+
# TEMPORARY FOR TESTING
|
| 105 |
+
def transcribe_dummy(aud_inp_tb, whisper_lang):
|
| 106 |
+
if aud_inp_tb is None:
|
| 107 |
return ""
|
| 108 |
+
# aud = whisper.load_audio(aud_inp)
|
| 109 |
+
# aud = whisper.pad_or_trim(aud)
|
| 110 |
+
# mel = whisper.log_mel_spectrogram(aud).to(WHISPER_MODEL.device)
|
| 111 |
+
# _, probs = WHISPER_MODEL.detect_language(mel)
|
| 112 |
+
# options = whisper.DecodingOptions()
|
| 113 |
+
# options = whisper.DecodingOptions(language="ja")
|
| 114 |
+
# result = whisper.decode(WHISPER_MODEL, mel, options)
|
| 115 |
+
result_text = "Whisper will detect language"
|
| 116 |
if whisper_lang != WHISPER_DETECT_LANG:
|
| 117 |
whisper_lang_code = POLLY_VOICE_DATA.get_whisper_lang_code(whisper_lang)
|
| 118 |
+
result_text = f"Whisper will use lang code: {whisper_lang_code}"
|
| 119 |
+
print("result_text", result_text)
|
| 120 |
+
return aud_inp_tb
|
|
|
|
|
|
|
|
|
|
|
|
|
| 121 |
|
| 122 |
|
| 123 |
# Pertains to Express-inator functionality
|
|
|
|
| 619 |
submit = gr.Button(value="Send", variant="secondary").style(full_width=False)
|
| 620 |
|
| 621 |
# UNCOMMENT TO USE WHISPER
|
| 622 |
+
# with gr.Row():
|
| 623 |
+
# audio_comp = gr.Microphone(source="microphone", type="filepath", label="Just say it!",
|
| 624 |
+
# interactive=True, streaming=False)
|
| 625 |
+
# audio_comp.change(transcribe, inputs=[audio_comp, whisper_lang_state], outputs=[message])
|
| 626 |
+
|
| 627 |
+
# TEMPORARY FOR TESTING
|
| 628 |
+
# with gr.Row():
|
| 629 |
+
# audio_comp_tb = gr.Textbox(label="Just say it!", lines=1)
|
| 630 |
+
# audio_comp_tb.submit(transcribe_dummy, inputs=[audio_comp_tb, whisper_lang_state], outputs=[message])
|
| 631 |
|
| 632 |
gr.Examples(
|
| 633 |
examples=["How many people live in Canada?",
|