Reimplement Whisper
Browse files
README.md
CHANGED
|
@@ -1,5 +1,5 @@
|
|
| 1 |
---
|
| 2 |
-
title: GPT+WolframAlpha
|
| 3 |
emoji: 👀
|
| 4 |
colorFrom: red
|
| 5 |
colorTo: gray
|
|
|
|
| 1 |
---
|
| 2 |
+
title: GPT+WolframAlpha+Whisper
|
| 3 |
emoji: 👀
|
| 4 |
colorFrom: red
|
| 5 |
colorTo: gray
|
app.py
CHANGED
|
@@ -9,8 +9,8 @@ import gradio as gr
|
|
| 9 |
import requests
|
| 10 |
|
| 11 |
# UNCOMMENT TO USE WHISPER
|
| 12 |
-
|
| 13 |
-
|
| 14 |
|
| 15 |
from langchain import ConversationChain, LLMChain
|
| 16 |
|
|
@@ -34,8 +34,8 @@ from polly_utils import PollyVoiceData, NEURAL_ENGINE
|
|
| 34 |
news_api_key = os.environ["NEWS_API_KEY"]
|
| 35 |
tmdb_bearer_token = os.environ["TMDB_BEARER_TOKEN"]
|
| 36 |
|
| 37 |
-
TOOLS_LIST = ['serpapi', 'wolfram-alpha', '
|
| 38 |
-
'open-meteo-api']
|
| 39 |
TOOLS_DEFAULT_LIST = ['serpapi', 'pal-math']
|
| 40 |
BUG_FOUND_MSG = "Congratulations, you've found a bug in this application!"
|
| 41 |
AUTH_ERR_MSG = "Please paste your OpenAI key."
|
|
@@ -56,29 +56,34 @@ PROMPT_TEMPLATE = PromptTemplate(
|
|
| 56 |
|
| 57 |
POLLY_VOICE_DATA = PollyVoiceData()
|
| 58 |
|
|
|
|
|
|
|
|
|
|
| 59 |
|
| 60 |
# UNCOMMENT TO USE WHISPER
|
| 61 |
-
|
| 62 |
-
|
| 63 |
-
|
| 64 |
|
| 65 |
|
| 66 |
# UNCOMMENT TO USE WHISPER
|
| 67 |
-
|
| 68 |
-
|
| 69 |
-
|
| 70 |
-
|
| 71 |
-
|
| 72 |
-
|
| 73 |
-
|
| 74 |
-
|
| 75 |
-
|
| 76 |
-
|
| 77 |
-
|
| 78 |
-
|
| 79 |
-
|
| 80 |
-
|
| 81 |
-
|
|
|
|
|
|
|
| 82 |
|
| 83 |
|
| 84 |
# Pertains to Express-inator functionality
|
|
@@ -441,11 +446,14 @@ with gr.Blocks(css=".gradio-container {background-color: lightgray}") as block:
|
|
| 441 |
translate_to_state = gr.State(TRANSLATE_TO_DEFAULT)
|
| 442 |
literary_style_state = gr.State(LITERARY_STYLE_DEFAULT)
|
| 443 |
|
|
|
|
|
|
|
|
|
|
| 444 |
with gr.Tab("Chat"):
|
| 445 |
with gr.Row():
|
| 446 |
with gr.Column():
|
| 447 |
gr.HTML(
|
| 448 |
-
"""<b><center>GPT + WolframAlpha</center></b>
|
| 449 |
<p><center>New feature in Settings: Babel fish mode</center></p>""")
|
| 450 |
|
| 451 |
openai_api_key_textbox = gr.Textbox(placeholder="Paste your OpenAI API key (sk-...)",
|
|
@@ -475,10 +483,10 @@ with gr.Blocks(css=".gradio-container {background-color: lightgray}") as block:
|
|
| 475 |
submit = gr.Button(value="Send", variant="secondary").style(full_width=False)
|
| 476 |
|
| 477 |
# UNCOMMENT TO USE WHISPER
|
| 478 |
-
|
| 479 |
-
|
| 480 |
-
|
| 481 |
-
|
| 482 |
|
| 483 |
gr.Examples(
|
| 484 |
examples=["How many people live in Canada?",
|
|
@@ -512,6 +520,21 @@ with gr.Blocks(css=".gradio-container {background-color: lightgray}") as block:
|
|
| 512 |
monologue_cb.change(update_foo, inputs=[monologue_cb, monologue_state],
|
| 513 |
outputs=[monologue_state])
|
| 514 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 515 |
with gr.Tab("Translate to"):
|
| 516 |
translate_to_radio = gr.Radio(label="Translate to:", choices=[
|
| 517 |
TRANSLATE_TO_DEFAULT, "Arabic", "Arabic (Gulf)", "Catalan", "Chinese (Cantonese)", "Chinese (Mandarin)",
|
|
|
|
| 9 |
import requests
|
| 10 |
|
| 11 |
# UNCOMMENT TO USE WHISPER
|
| 12 |
+
import warnings
|
| 13 |
+
import whisper
|
| 14 |
|
| 15 |
from langchain import ConversationChain, LLMChain
|
| 16 |
|
|
|
|
| 34 |
news_api_key = os.environ["NEWS_API_KEY"]
|
| 35 |
tmdb_bearer_token = os.environ["TMDB_BEARER_TOKEN"]
|
| 36 |
|
| 37 |
+
TOOLS_LIST = ['serpapi', 'wolfram-alpha', 'pal-math', 'pal-colored-objects', 'news-api', 'tmdb-api',
|
| 38 |
+
'open-meteo-api'] # 'google-search'
|
| 39 |
TOOLS_DEFAULT_LIST = ['serpapi', 'pal-math']
|
| 40 |
BUG_FOUND_MSG = "Congratulations, you've found a bug in this application!"
|
| 41 |
AUTH_ERR_MSG = "Please paste your OpenAI key."
|
|
|
|
| 56 |
|
| 57 |
POLLY_VOICE_DATA = PollyVoiceData()
|
| 58 |
|
| 59 |
+
# Pertains to WHISPER functionality
|
| 60 |
+
WHISPER_DETECT_LANG = "Detect language"
|
| 61 |
+
|
| 62 |
|
| 63 |
# UNCOMMENT TO USE WHISPER
|
| 64 |
+
warnings.filterwarnings("ignore")
|
| 65 |
+
WHISPER_MODEL = whisper.load_model("tiny")
|
| 66 |
+
print("WHISPER_MODEL", WHISPER_MODEL)
|
| 67 |
|
| 68 |
|
| 69 |
# UNCOMMENT TO USE WHISPER
|
| 70 |
+
def transcribe(aud_inp, whisper_lang):
|
| 71 |
+
if aud_inp is None:
|
| 72 |
+
return ""
|
| 73 |
+
aud = whisper.load_audio(aud_inp)
|
| 74 |
+
aud = whisper.pad_or_trim(aud)
|
| 75 |
+
mel = whisper.log_mel_spectrogram(aud).to(WHISPER_MODEL.device)
|
| 76 |
+
_, probs = WHISPER_MODEL.detect_language(mel)
|
| 77 |
+
options = whisper.DecodingOptions()
|
| 78 |
+
if whisper_lang != WHISPER_DETECT_LANG:
|
| 79 |
+
whisper_lang_code = POLLY_VOICE_DATA.get_whisper_lang_code(whisper_lang)
|
| 80 |
+
options = whisper.DecodingOptions(language=whisper_lang_code)
|
| 81 |
+
result = whisper.decode(WHISPER_MODEL, mel, options)
|
| 82 |
+
print("result.text", result.text)
|
| 83 |
+
result_text = ""
|
| 84 |
+
if result and result.text:
|
| 85 |
+
result_text = result.text
|
| 86 |
+
return result_text
|
| 87 |
|
| 88 |
|
| 89 |
# Pertains to Express-inator functionality
|
|
|
|
| 446 |
translate_to_state = gr.State(TRANSLATE_TO_DEFAULT)
|
| 447 |
literary_style_state = gr.State(LITERARY_STYLE_DEFAULT)
|
| 448 |
|
| 449 |
+
# Pertains to WHISPER functionality
|
| 450 |
+
whisper_lang_state = gr.State(WHISPER_DETECT_LANG)
|
| 451 |
+
|
| 452 |
with gr.Tab("Chat"):
|
| 453 |
with gr.Row():
|
| 454 |
with gr.Column():
|
| 455 |
gr.HTML(
|
| 456 |
+
"""<b><center>GPT + WolframAlpha + Whisper</center></b>
|
| 457 |
<p><center>New feature in Settings: Babel fish mode</center></p>""")
|
| 458 |
|
| 459 |
openai_api_key_textbox = gr.Textbox(placeholder="Paste your OpenAI API key (sk-...)",
|
|
|
|
| 483 |
submit = gr.Button(value="Send", variant="secondary").style(full_width=False)
|
| 484 |
|
| 485 |
# UNCOMMENT TO USE WHISPER
|
| 486 |
+
with gr.Row():
|
| 487 |
+
audio_comp = gr.Microphone(source="microphone", type="filepath", label="Just say it!",
|
| 488 |
+
interactive=True, streaming=False)
|
| 489 |
+
audio_comp.change(transcribe, inputs=[audio_comp, whisper_lang_state], outputs=[message])
|
| 490 |
|
| 491 |
gr.Examples(
|
| 492 |
examples=["How many people live in Canada?",
|
|
|
|
| 520 |
monologue_cb.change(update_foo, inputs=[monologue_cb, monologue_state],
|
| 521 |
outputs=[monologue_state])
|
| 522 |
|
| 523 |
+
with gr.Tab("Whisper STT"):
|
| 524 |
+
whisper_lang_radio = gr.Radio(label="Whisper speech-to-text language:", choices=[
|
| 525 |
+
WHISPER_DETECT_LANG, "Arabic", "Arabic (Gulf)", "Catalan", "Chinese (Cantonese)", "Chinese (Mandarin)",
|
| 526 |
+
"Danish", "Dutch", "English (Australian)", "English (British)", "English (Indian)", "English (New Zealand)",
|
| 527 |
+
"English (South African)", "English (US)", "English (Welsh)", "Finnish", "French", "French (Canadian)",
|
| 528 |
+
"German", "German (Austrian)", "Georgian", "Hindi", "Icelandic", "Indonesian", "Italian", "Japanese",
|
| 529 |
+
"Korean", "Norwegian", "Polish",
|
| 530 |
+
"Portuguese (Brazilian)", "Portuguese (European)", "Romanian", "Russian", "Spanish (European)",
|
| 531 |
+
"Spanish (Mexican)", "Spanish (US)", "Swedish", "Turkish", "Ukrainian", "Welsh"],
|
| 532 |
+
value=WHISPER_DETECT_LANG)
|
| 533 |
+
|
| 534 |
+
whisper_lang_radio.change(update_foo,
|
| 535 |
+
inputs=[whisper_lang_radio, whisper_lang_state],
|
| 536 |
+
outputs=[whisper_lang_state])
|
| 537 |
+
|
| 538 |
with gr.Tab("Translate to"):
|
| 539 |
translate_to_radio = gr.Radio(label="Translate to:", choices=[
|
| 540 |
TRANSLATE_TO_DEFAULT, "Arabic", "Arabic (Gulf)", "Catalan", "Chinese (Cantonese)", "Chinese (Mandarin)",
|