JLW commited on
Commit
8e730f8
·
1 Parent(s): cc600f9

Reimplement Whisper

Browse files
Files changed (2) hide show
  1. README.md +1 -1
  2. app.py +50 -27
README.md CHANGED
@@ -1,5 +1,5 @@
1
  ---
2
- title: GPT+WolframAlpha
3
  emoji: 👀
4
  colorFrom: red
5
  colorTo: gray
 
1
  ---
2
+ title: GPT+WolframAlpha+Whisper
3
  emoji: 👀
4
  colorFrom: red
5
  colorTo: gray
app.py CHANGED
@@ -9,8 +9,8 @@ import gradio as gr
9
  import requests
10
 
11
  # UNCOMMENT TO USE WHISPER
12
- # import warnings
13
- # import whisper
14
 
15
  from langchain import ConversationChain, LLMChain
16
 
@@ -34,8 +34,8 @@ from polly_utils import PollyVoiceData, NEURAL_ENGINE
34
  news_api_key = os.environ["NEWS_API_KEY"]
35
  tmdb_bearer_token = os.environ["TMDB_BEARER_TOKEN"]
36
 
37
- TOOLS_LIST = ['serpapi', 'wolfram-alpha', 'google-search', 'pal-math', 'pal-colored-objects', 'news-api', 'tmdb-api',
38
- 'open-meteo-api']
39
  TOOLS_DEFAULT_LIST = ['serpapi', 'pal-math']
40
  BUG_FOUND_MSG = "Congratulations, you've found a bug in this application!"
41
  AUTH_ERR_MSG = "Please paste your OpenAI key."
@@ -56,29 +56,34 @@ PROMPT_TEMPLATE = PromptTemplate(
56
 
57
  POLLY_VOICE_DATA = PollyVoiceData()
58
 
 
 
 
59
 
60
  # UNCOMMENT TO USE WHISPER
61
- # warnings.filterwarnings("ignore")
62
- # WHISPER_MODEL = whisper.load_model("tiny")
63
- # print("WHISPER_MODEL", WHISPER_MODEL)
64
 
65
 
66
  # UNCOMMENT TO USE WHISPER
67
- # def transcribe(aud_inp):
68
- # if aud_inp is None:
69
- # return ""
70
- # aud = whisper.load_audio(aud_inp)
71
- # aud = whisper.pad_or_trim(aud)
72
- # mel = whisper.log_mel_spectrogram(aud).to(WHISPER_MODEL.device)
73
- # _, probs = WHISPER_MODEL.detect_language(mel)
74
- # options = whisper.DecodingOptions()
75
- # # options = whisper.DecodingOptions(language="ja")
76
- # result = whisper.decode(WHISPER_MODEL, mel, options)
77
- # print("result.text", result.text)
78
- # result_text = ""
79
- # if result and result.text:
80
- # result_text = result.text
81
- # return result_text
 
 
82
 
83
 
84
  # Pertains to Express-inator functionality
@@ -441,11 +446,14 @@ with gr.Blocks(css=".gradio-container {background-color: lightgray}") as block:
441
  translate_to_state = gr.State(TRANSLATE_TO_DEFAULT)
442
  literary_style_state = gr.State(LITERARY_STYLE_DEFAULT)
443
 
 
 
 
444
  with gr.Tab("Chat"):
445
  with gr.Row():
446
  with gr.Column():
447
  gr.HTML(
448
- """<b><center>GPT + WolframAlpha</center></b>
449
  <p><center>New feature in Settings: Babel fish mode</center></p>""")
450
 
451
  openai_api_key_textbox = gr.Textbox(placeholder="Paste your OpenAI API key (sk-...)",
@@ -475,10 +483,10 @@ with gr.Blocks(css=".gradio-container {background-color: lightgray}") as block:
475
  submit = gr.Button(value="Send", variant="secondary").style(full_width=False)
476
 
477
  # UNCOMMENT TO USE WHISPER
478
- # with gr.Row():
479
- # audio_comp = gr.Microphone(source="microphone", type="filepath", label="Just say it!",
480
- # interactive=True, streaming=False)
481
- # audio_comp.change(transcribe, inputs=[audio_comp, whisper_lang_state], outputs=[message])
482
 
483
  gr.Examples(
484
  examples=["How many people live in Canada?",
@@ -512,6 +520,21 @@ with gr.Blocks(css=".gradio-container {background-color: lightgray}") as block:
512
  monologue_cb.change(update_foo, inputs=[monologue_cb, monologue_state],
513
  outputs=[monologue_state])
514
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
515
  with gr.Tab("Translate to"):
516
  translate_to_radio = gr.Radio(label="Translate to:", choices=[
517
  TRANSLATE_TO_DEFAULT, "Arabic", "Arabic (Gulf)", "Catalan", "Chinese (Cantonese)", "Chinese (Mandarin)",
 
9
  import requests
10
 
11
  # UNCOMMENT TO USE WHISPER
12
+ import warnings
13
+ import whisper
14
 
15
  from langchain import ConversationChain, LLMChain
16
 
 
34
  news_api_key = os.environ["NEWS_API_KEY"]
35
  tmdb_bearer_token = os.environ["TMDB_BEARER_TOKEN"]
36
 
37
+ TOOLS_LIST = ['serpapi', 'wolfram-alpha', 'pal-math', 'pal-colored-objects', 'news-api', 'tmdb-api',
38
+ 'open-meteo-api'] # 'google-search'
39
  TOOLS_DEFAULT_LIST = ['serpapi', 'pal-math']
40
  BUG_FOUND_MSG = "Congratulations, you've found a bug in this application!"
41
  AUTH_ERR_MSG = "Please paste your OpenAI key."
 
56
 
57
  POLLY_VOICE_DATA = PollyVoiceData()
58
 
59
+ # Pertains to WHISPER functionality
60
+ WHISPER_DETECT_LANG = "Detect language"
61
+
62
 
63
  # UNCOMMENT TO USE WHISPER
64
+ warnings.filterwarnings("ignore")
65
+ WHISPER_MODEL = whisper.load_model("tiny")
66
+ print("WHISPER_MODEL", WHISPER_MODEL)
67
 
68
 
69
  # UNCOMMENT TO USE WHISPER
70
+ def transcribe(aud_inp, whisper_lang):
71
+ if aud_inp is None:
72
+ return ""
73
+ aud = whisper.load_audio(aud_inp)
74
+ aud = whisper.pad_or_trim(aud)
75
+ mel = whisper.log_mel_spectrogram(aud).to(WHISPER_MODEL.device)
76
+ _, probs = WHISPER_MODEL.detect_language(mel)
77
+ options = whisper.DecodingOptions()
78
+ if whisper_lang != WHISPER_DETECT_LANG:
79
+ whisper_lang_code = POLLY_VOICE_DATA.get_whisper_lang_code(whisper_lang)
80
+ options = whisper.DecodingOptions(language=whisper_lang_code)
81
+ result = whisper.decode(WHISPER_MODEL, mel, options)
82
+ print("result.text", result.text)
83
+ result_text = ""
84
+ if result and result.text:
85
+ result_text = result.text
86
+ return result_text
87
 
88
 
89
  # Pertains to Express-inator functionality
 
446
  translate_to_state = gr.State(TRANSLATE_TO_DEFAULT)
447
  literary_style_state = gr.State(LITERARY_STYLE_DEFAULT)
448
 
449
+ # Pertains to WHISPER functionality
450
+ whisper_lang_state = gr.State(WHISPER_DETECT_LANG)
451
+
452
  with gr.Tab("Chat"):
453
  with gr.Row():
454
  with gr.Column():
455
  gr.HTML(
456
+ """<b><center>GPT + WolframAlpha + Whisper</center></b>
457
  <p><center>New feature in Settings: Babel fish mode</center></p>""")
458
 
459
  openai_api_key_textbox = gr.Textbox(placeholder="Paste your OpenAI API key (sk-...)",
 
483
  submit = gr.Button(value="Send", variant="secondary").style(full_width=False)
484
 
485
  # UNCOMMENT TO USE WHISPER
486
+ with gr.Row():
487
+ audio_comp = gr.Microphone(source="microphone", type="filepath", label="Just say it!",
488
+ interactive=True, streaming=False)
489
+ audio_comp.change(transcribe, inputs=[audio_comp, whisper_lang_state], outputs=[message])
490
 
491
  gr.Examples(
492
  examples=["How many people live in Canada?",
 
520
  monologue_cb.change(update_foo, inputs=[monologue_cb, monologue_state],
521
  outputs=[monologue_state])
522
 
523
+ with gr.Tab("Whisper STT"):
524
+ whisper_lang_radio = gr.Radio(label="Whisper speech-to-text language:", choices=[
525
+ WHISPER_DETECT_LANG, "Arabic", "Arabic (Gulf)", "Catalan", "Chinese (Cantonese)", "Chinese (Mandarin)",
526
+ "Danish", "Dutch", "English (Australian)", "English (British)", "English (Indian)", "English (New Zealand)",
527
+ "English (South African)", "English (US)", "English (Welsh)", "Finnish", "French", "French (Canadian)",
528
+ "German", "German (Austrian)", "Georgian", "Hindi", "Icelandic", "Indonesian", "Italian", "Japanese",
529
+ "Korean", "Norwegian", "Polish",
530
+ "Portuguese (Brazilian)", "Portuguese (European)", "Romanian", "Russian", "Spanish (European)",
531
+ "Spanish (Mexican)", "Spanish (US)", "Swedish", "Turkish", "Ukrainian", "Welsh"],
532
+ value=WHISPER_DETECT_LANG)
533
+
534
+ whisper_lang_radio.change(update_foo,
535
+ inputs=[whisper_lang_radio, whisper_lang_state],
536
+ outputs=[whisper_lang_state])
537
+
538
  with gr.Tab("Translate to"):
539
  translate_to_radio = gr.Radio(label="Translate to:", choices=[
540
  TRANSLATE_TO_DEFAULT, "Arabic", "Arabic (Gulf)", "Catalan", "Chinese (Cantonese)", "Chinese (Mandarin)",