JLW commited on
Commit
010cf64
·
1 Parent(s): 85238ea

Add talking heads

Browse files
Files changed (2) hide show
  1. app.py +89 -28
  2. azure_utils.py +155 -0
app.py CHANGED
@@ -30,6 +30,7 @@ from openai.error import AuthenticationError, InvalidRequestError, RateLimitErro
30
  from langchain.prompts import PromptTemplate
31
 
32
  from polly_utils import PollyVoiceData, NEURAL_ENGINE
 
33
 
34
  news_api_key = os.environ["NEWS_API_KEY"]
35
  tmdb_bearer_token = os.environ["TMDB_BEARER_TOKEN"]
@@ -38,9 +39,13 @@ TOOLS_LIST = ['serpapi', 'wolfram-alpha', 'pal-math', 'pal-colored-objects', 'ne
38
  'open-meteo-api'] # 'google-search'
39
  TOOLS_DEFAULT_LIST = ['serpapi', 'pal-math']
40
  BUG_FOUND_MSG = "Congratulations, you've found a bug in this application!"
41
- AUTH_ERR_MSG = "Please paste your OpenAI key. It is not necessary to hit a button or key after pasting it."
42
  MAX_TOKENS = 512
43
 
 
 
 
 
44
  # Pertains to Express-inator functionality
45
  NUM_WORDS_DEFAULT = 0
46
  MAX_WORDS = 400
@@ -56,6 +61,7 @@ PROMPT_TEMPLATE = PromptTemplate(
56
  )
57
 
58
  POLLY_VOICE_DATA = PollyVoiceData()
 
59
 
60
  # Pertains to WHISPER functionality
61
  WHISPER_DETECT_LANG = "Detect language"
@@ -295,7 +301,7 @@ class ChatWrapper:
295
 
296
  def __call__(
297
  self, api_key: str, inp: str, history: Optional[Tuple[str, str]], chain: Optional[ConversationChain],
298
- trace_chain: bool, speak_text: bool, monologue: bool, express_chain: Optional[LLMChain],
299
  num_words, formality, anticipation_level, joy_level, trust_level,
300
  fear_level, surprise_level, sadness_level, disgust_level, anger_level,
301
  lang_level, translate_to, literary_style
@@ -307,10 +313,11 @@ class ChatWrapper:
307
  print("inp: " + inp)
308
  print("trace_chain: ", trace_chain)
309
  print("speak_text: ", speak_text)
 
310
  print("monologue: ", monologue)
311
  history = history or []
312
  # If chain is None, that is because no API key was provided.
313
- output = "Please paste your OpenAI key to use this application. It is not necessary to hit a button or " \
314
  "key after pasting it."
315
  hidden_text = output
316
 
@@ -333,16 +340,32 @@ class ChatWrapper:
333
  text_to_display = hidden_text + "\n\n" + output
334
  history.append((inp, text_to_display))
335
 
336
- # html_video, temp_file = do_html_video_speak(output)
337
- html_audio, temp_aud_file = None, None
338
  if speak_text:
339
- html_audio, temp_aud_file = do_html_audio_speak(output, translate_to)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
340
  except Exception as e:
341
  raise e
342
  finally:
343
  self.lock.release()
344
- # return history, history, html_video, temp_file, ""
345
- return history, history, html_audio, temp_aud_file, ""
346
 
347
 
348
  chat = ChatWrapper()
@@ -355,9 +378,11 @@ def do_html_audio_speak(words_to_speak, polly_language):
355
  region_name=os.environ["AWS_DEFAULT_REGION"]
356
  ).client('polly')
357
 
358
- voice_id, language_code, engine = POLLY_VOICE_DATA.get_voice(polly_language, "Female")
 
359
  if not voice_id:
360
- voice_id = "Joanna"
 
361
  language_code = "en-US"
362
  engine = NEURAL_ENGINE
363
  response = polly_client.synthesize_speech(
@@ -393,24 +418,39 @@ def do_html_audio_speak(words_to_speak, polly_language):
393
  return html_audio, "audios/tempfile.mp3"
394
 
395
 
396
- def do_html_video_speak(words_to_speak):
 
 
 
 
 
 
 
 
 
 
397
  headers = {"Authorization": f"Bearer {os.environ['EXHUMAN_API_KEY']}"}
398
  body = {
399
  'bot_name': 'Masahiro',
400
  'bot_response': words_to_speak,
401
- 'voice_name': 'Masahiro-EN'
 
 
402
  }
403
  api_endpoint = "https://api.exh.ai/animations/v1/generate_lipsync"
404
  res = requests.post(api_endpoint, json=body, headers=headers)
 
405
 
406
  html_video = '<pre>no video</pre>'
407
  if isinstance(res.content, bytes):
408
  response_stream = io.BytesIO(res.content)
 
 
409
  with open('videos/tempfile.mp4', 'wb') as f:
410
  f.write(response_stream.read())
411
  temp_file = gr.File("videos/tempfile.mp4")
412
  temp_file_url = "/file=" + temp_file.value['name']
413
- html_video = f'<video width="256" height="256" autoplay><source src={temp_file_url} type="video/mp4" poster="Masahiro.png"></video>'
414
  else:
415
  print('video url unknown')
416
  return html_video, "videos/tempfile.mp4"
@@ -423,6 +463,17 @@ def update_selected_tools(widget, state, llm):
423
  return state, llm, chain, express_chain
424
 
425
 
 
 
 
 
 
 
 
 
 
 
 
426
  def update_foo(widget, state):
427
  if widget:
428
  state = widget
@@ -437,6 +488,7 @@ with gr.Blocks(css=".gradio-container {background-color: lightgray}") as block:
437
  tools_list_state = gr.State(TOOLS_DEFAULT_LIST)
438
  trace_chain_state = gr.State(False)
439
  speak_text_state = gr.State(False)
 
440
  monologue_state = gr.State(False) # Takes the input and repeats it back to the user, optionally transforming it.
441
 
442
  # Pertains to Express-inator functionality
@@ -462,17 +514,21 @@ with gr.Blocks(css=".gradio-container {background-color: lightgray}") as block:
462
  with gr.Column():
463
  gr.HTML(
464
  """<b><center>GPT + WolframAlpha + Whisper</center></b>
465
- <p><center>New feature in Settings: Babel fish mode</center></p>""")
466
 
467
  openai_api_key_textbox = gr.Textbox(placeholder="Paste your OpenAI API key (sk-...)",
468
  show_label=False, lines=1, type='password')
469
 
470
  with gr.Row():
471
- with gr.Column(scale=1, min_width=100, visible=False):
 
 
 
 
472
  my_file = gr.File(label="Upload a file", type="file", visible=False)
473
- tmp_file = gr.File("videos/Masahiro.mp4", visible=False)
474
- tmp_file_url = "/file=" + tmp_file.value['name']
475
- htm_video = f'<video width="256" height="256" autoplay muted loop><source src={tmp_file_url} type="video/mp4" poster="Masahiro.png"></video>'
476
  video_html = gr.HTML(htm_video)
477
 
478
  # my_aud_file = gr.File(label="Audio file", type="file", visible=True)
@@ -481,7 +537,7 @@ with gr.Blocks(css=".gradio-container {background-color: lightgray}") as block:
481
  htm_audio = f'<audio><source src={tmp_aud_file_url} type="audio/mp3"></audio>'
482
  audio_html = gr.HTML(htm_audio)
483
 
484
- with gr.Column(scale=3):
485
  chatbot = gr.Chatbot()
486
 
487
  with gr.Row():
@@ -519,9 +575,13 @@ with gr.Blocks(css=".gradio-container {background-color: lightgray}") as block:
519
  trace_chain_cb.change(update_foo, inputs=[trace_chain_cb, trace_chain_state],
520
  outputs=[trace_chain_state])
521
 
522
- speak_text_cb = gr.Checkbox(label="Speak text from agent", value=False)
523
- speak_text_cb.change(update_foo, inputs=[speak_text_cb, speak_text_state],
524
- outputs=[speak_text_state])
 
 
 
 
525
 
526
  monologue_cb = gr.Checkbox(label="Babel fish mode (translate/restate what you enter, no conversational agent)",
527
  value=False)
@@ -653,6 +713,7 @@ with gr.Blocks(css=".gradio-container {background-color: lightgray}") as block:
653
  <p>This application, developed by <a href='https://www.linkedin.com/in/javafxpert/'>James L. Weaver</a>,
654
  demonstrates a conversational agent implemented with OpenAI GPT-3.5 and LangChain.
655
  When necessary, it leverages tools for complex math, searching the internet, and accessing news and weather.
 
656
  For faster inference without waiting in queue, you may duplicate the space.
657
  </p>""")
658
 
@@ -674,22 +735,22 @@ with gr.Blocks(css=".gradio-container {background-color: lightgray}") as block:
674
  </center>""")
675
 
676
  message.submit(chat, inputs=[openai_api_key_textbox, message, history_state, chain_state, trace_chain_state,
677
- speak_text_state, monologue_state,
678
  express_chain_state, num_words_state, formality_state,
679
  anticipation_level_state, joy_level_state, trust_level_state, fear_level_state,
680
  surprise_level_state, sadness_level_state, disgust_level_state, anger_level_state,
681
  lang_level_state, translate_to_state, literary_style_state],
682
- # outputs=[chatbot, history_state, video_html, my_file, message])
683
- outputs=[chatbot, history_state, audio_html, tmp_aud_file, message])
684
 
685
  submit.click(chat, inputs=[openai_api_key_textbox, message, history_state, chain_state, trace_chain_state,
686
- speak_text_state, monologue_state,
687
  express_chain_state, num_words_state, formality_state,
688
  anticipation_level_state, joy_level_state, trust_level_state, fear_level_state,
689
  surprise_level_state, sadness_level_state, disgust_level_state, anger_level_state,
690
  lang_level_state, translate_to_state, literary_style_state],
691
- # outputs=[chatbot, history_state, video_html, my_file, message])
692
- outputs=[chatbot, history_state, audio_html, tmp_aud_file, message])
693
 
694
  openai_api_key_textbox.change(set_openai_api_key,
695
  inputs=[openai_api_key_textbox],
 
30
  from langchain.prompts import PromptTemplate
31
 
32
  from polly_utils import PollyVoiceData, NEURAL_ENGINE
33
+ from azure_utils import AzureVoiceData
34
 
35
  news_api_key = os.environ["NEWS_API_KEY"]
36
  tmdb_bearer_token = os.environ["TMDB_BEARER_TOKEN"]
 
39
  'open-meteo-api'] # 'google-search'
40
  TOOLS_DEFAULT_LIST = ['serpapi', 'pal-math']
41
  BUG_FOUND_MSG = "Congratulations, you've found a bug in this application!"
42
+ AUTH_ERR_MSG = "Please paste your OpenAI key from openai.com to use this application. It is not necessary to hit a button or key after pasting it."
43
  MAX_TOKENS = 512
44
 
45
+ LOOPING_TALKING_HEAD = "videos/Masahiro.mp4"
46
+ TALKING_HEAD_WIDTH = "192"
47
+ MAX_TALKING_HEAD_TEXT_LENGTH = 155
48
+
49
  # Pertains to Express-inator functionality
50
  NUM_WORDS_DEFAULT = 0
51
  MAX_WORDS = 400
 
61
  )
62
 
63
  POLLY_VOICE_DATA = PollyVoiceData()
64
+ AZURE_VOICE_DATA = AzureVoiceData()
65
 
66
  # Pertains to WHISPER functionality
67
  WHISPER_DETECT_LANG = "Detect language"
 
301
 
302
  def __call__(
303
  self, api_key: str, inp: str, history: Optional[Tuple[str, str]], chain: Optional[ConversationChain],
304
+ trace_chain: bool, speak_text: bool, talking_head: bool, monologue: bool, express_chain: Optional[LLMChain],
305
  num_words, formality, anticipation_level, joy_level, trust_level,
306
  fear_level, surprise_level, sadness_level, disgust_level, anger_level,
307
  lang_level, translate_to, literary_style
 
313
  print("inp: " + inp)
314
  print("trace_chain: ", trace_chain)
315
  print("speak_text: ", speak_text)
316
+ print("talking_head: ", talking_head)
317
  print("monologue: ", monologue)
318
  history = history or []
319
  # If chain is None, that is because no API key was provided.
320
+ output = "Please paste your OpenAI key from openai.com to use this application. It is not necessary to hit a button or " \
321
  "key after pasting it."
322
  hidden_text = output
323
 
 
340
  text_to_display = hidden_text + "\n\n" + output
341
  history.append((inp, text_to_display))
342
 
343
+ html_video, temp_file, html_audio, temp_aud_file = None, None, None, None
 
344
  if speak_text:
345
+ if talking_head:
346
+ if len(output) <= MAX_TALKING_HEAD_TEXT_LENGTH:
347
+ html_video, temp_file = do_html_video_speak(output, translate_to)
348
+ else:
349
+ temp_file = LOOPING_TALKING_HEAD
350
+ html_video = create_html_video(temp_file, TALKING_HEAD_WIDTH)
351
+ html_audio, temp_aud_file = do_html_audio_speak(output, translate_to)
352
+ else:
353
+ html_audio, temp_aud_file = do_html_audio_speak(output, translate_to)
354
+ else:
355
+ if talking_head:
356
+ temp_file = LOOPING_TALKING_HEAD
357
+ html_video = create_html_video(temp_file, TALKING_HEAD_WIDTH)
358
+ else:
359
+ # html_audio, temp_aud_file = do_html_audio_speak(output, translate_to)
360
+ # html_video = create_html_video(temp_file, "128")
361
+ pass
362
+
363
  except Exception as e:
364
  raise e
365
  finally:
366
  self.lock.release()
367
+ return history, history, html_video, temp_file, html_audio, temp_aud_file, ""
368
+ # return history, history, html_audio, temp_aud_file, ""
369
 
370
 
371
  chat = ChatWrapper()
 
378
  region_name=os.environ["AWS_DEFAULT_REGION"]
379
  ).client('polly')
380
 
381
+ # voice_id, language_code, engine = POLLY_VOICE_DATA.get_voice(polly_language, "Female")
382
+ voice_id, language_code, engine = POLLY_VOICE_DATA.get_voice(polly_language, "Male")
383
  if not voice_id:
384
+ # voice_id = "Joanna"
385
+ voice_id = "Matthew"
386
  language_code = "en-US"
387
  engine = NEURAL_ENGINE
388
  response = polly_client.synthesize_speech(
 
418
  return html_audio, "audios/tempfile.mp3"
419
 
420
 
421
+ def create_html_video(file_name, width):
422
+ temp_file_url = "/file=" + tmp_file.value['name']
423
+ html_video = f'<video width={width} height={width} autoplay muted loop><source src={temp_file_url} type="video/mp4" poster="Masahiro.png"></video>'
424
+ return html_video
425
+
426
+
427
+ def do_html_video_speak(words_to_speak, azure_language):
428
+ azure_voice = AZURE_VOICE_DATA.get_voice(azure_language, "Male")
429
+ if not azure_voice:
430
+ azure_voice = "en-US-ChristopherNeural"
431
+
432
  headers = {"Authorization": f"Bearer {os.environ['EXHUMAN_API_KEY']}"}
433
  body = {
434
  'bot_name': 'Masahiro',
435
  'bot_response': words_to_speak,
436
+ 'azure_voice': azure_voice,
437
+ 'azure_style': 'friendly',
438
+ 'animation_pipeline': 'high_speed',
439
  }
440
  api_endpoint = "https://api.exh.ai/animations/v1/generate_lipsync"
441
  res = requests.post(api_endpoint, json=body, headers=headers)
442
+ print("res.status_code: ", res.status_code)
443
 
444
  html_video = '<pre>no video</pre>'
445
  if isinstance(res.content, bytes):
446
  response_stream = io.BytesIO(res.content)
447
+ print("len(res.content)): ", len(res.content))
448
+
449
  with open('videos/tempfile.mp4', 'wb') as f:
450
  f.write(response_stream.read())
451
  temp_file = gr.File("videos/tempfile.mp4")
452
  temp_file_url = "/file=" + temp_file.value['name']
453
+ html_video = f'<video width={TALKING_HEAD_WIDTH} height={TALKING_HEAD_WIDTH} autoplay><source src={temp_file_url} type="video/mp4" poster="Masahiro.png"></video>'
454
  else:
455
  print('video url unknown')
456
  return html_video, "videos/tempfile.mp4"
 
463
  return state, llm, chain, express_chain
464
 
465
 
466
+ def update_talking_head(widget, state):
467
+ if widget:
468
+ state = widget
469
+
470
+ video_html_talking_head = create_html_video(LOOPING_TALKING_HEAD, TALKING_HEAD_WIDTH)
471
+ return state, video_html_talking_head
472
+ else:
473
+ # return state, create_html_video(LOOPING_TALKING_HEAD, "32")
474
+ return None, "<pre></pre>"
475
+
476
+
477
  def update_foo(widget, state):
478
  if widget:
479
  state = widget
 
488
  tools_list_state = gr.State(TOOLS_DEFAULT_LIST)
489
  trace_chain_state = gr.State(False)
490
  speak_text_state = gr.State(False)
491
+ talking_head_state = gr.State(True)
492
  monologue_state = gr.State(False) # Takes the input and repeats it back to the user, optionally transforming it.
493
 
494
  # Pertains to Express-inator functionality
 
514
  with gr.Column():
515
  gr.HTML(
516
  """<b><center>GPT + WolframAlpha + Whisper</center></b>
517
+ <p><center>New feature in <b>Translate to</b>: Choose <b>Language level</b> (e.g. for conversation practice or explain like I'm five)</center></p>""")
518
 
519
  openai_api_key_textbox = gr.Textbox(placeholder="Paste your OpenAI API key (sk-...)",
520
  show_label=False, lines=1, type='password')
521
 
522
  with gr.Row():
523
+ with gr.Column(scale=1, min_width=TALKING_HEAD_WIDTH, visible=True):
524
+ speak_text_cb = gr.Checkbox(label="Enable speech", value=False)
525
+ speak_text_cb.change(update_foo, inputs=[speak_text_cb, speak_text_state],
526
+ outputs=[speak_text_state])
527
+
528
  my_file = gr.File(label="Upload a file", type="file", visible=False)
529
+ tmp_file = gr.File(LOOPING_TALKING_HEAD, visible=False)
530
+ # tmp_file_url = "/file=" + tmp_file.value['name']
531
+ htm_video = create_html_video(LOOPING_TALKING_HEAD, TALKING_HEAD_WIDTH)
532
  video_html = gr.HTML(htm_video)
533
 
534
  # my_aud_file = gr.File(label="Audio file", type="file", visible=True)
 
537
  htm_audio = f'<audio><source src={tmp_aud_file_url} type="audio/mp3"></audio>'
538
  audio_html = gr.HTML(htm_audio)
539
 
540
+ with gr.Column(scale=7):
541
  chatbot = gr.Chatbot()
542
 
543
  with gr.Row():
 
575
  trace_chain_cb.change(update_foo, inputs=[trace_chain_cb, trace_chain_state],
576
  outputs=[trace_chain_state])
577
 
578
+ # speak_text_cb = gr.Checkbox(label="Speak text from agent", value=False)
579
+ # speak_text_cb.change(update_foo, inputs=[speak_text_cb, speak_text_state],
580
+ # outputs=[speak_text_state])
581
+
582
+ talking_head_cb = gr.Checkbox(label="Show talking head", value=True)
583
+ talking_head_cb.change(update_talking_head, inputs=[talking_head_cb, talking_head_state],
584
+ outputs=[talking_head_state, video_html])
585
 
586
  monologue_cb = gr.Checkbox(label="Babel fish mode (translate/restate what you enter, no conversational agent)",
587
  value=False)
 
713
  <p>This application, developed by <a href='https://www.linkedin.com/in/javafxpert/'>James L. Weaver</a>,
714
  demonstrates a conversational agent implemented with OpenAI GPT-3.5 and LangChain.
715
  When necessary, it leverages tools for complex math, searching the internet, and accessing news and weather.
716
+ Uses talking heads from <a href='https://exh.ai/'>Ex-Human</a>.
717
  For faster inference without waiting in queue, you may duplicate the space.
718
  </p>""")
719
 
 
735
  </center>""")
736
 
737
  message.submit(chat, inputs=[openai_api_key_textbox, message, history_state, chain_state, trace_chain_state,
738
+ speak_text_state, talking_head_state, monologue_state,
739
  express_chain_state, num_words_state, formality_state,
740
  anticipation_level_state, joy_level_state, trust_level_state, fear_level_state,
741
  surprise_level_state, sadness_level_state, disgust_level_state, anger_level_state,
742
  lang_level_state, translate_to_state, literary_style_state],
743
+ outputs=[chatbot, history_state, video_html, my_file, audio_html, tmp_aud_file, message])
744
+ # outputs=[chatbot, history_state, audio_html, tmp_aud_file, message])
745
 
746
  submit.click(chat, inputs=[openai_api_key_textbox, message, history_state, chain_state, trace_chain_state,
747
+ speak_text_state, talking_head_state, monologue_state,
748
  express_chain_state, num_words_state, formality_state,
749
  anticipation_level_state, joy_level_state, trust_level_state, fear_level_state,
750
  surprise_level_state, sadness_level_state, disgust_level_state, anger_level_state,
751
  lang_level_state, translate_to_state, literary_style_state],
752
+ outputs=[chatbot, history_state, video_html, my_file, audio_html, tmp_aud_file, message])
753
+ # outputs=[chatbot, history_state, audio_html, tmp_aud_file, message])
754
 
755
  openai_api_key_textbox.change(set_openai_api_key,
756
  inputs=[openai_api_key_textbox],
azure_utils.py ADDED
@@ -0,0 +1,155 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # This class stores Azure voice data. Specifically, the class stores several records containing
2
+ # language, lang_code, gender, voice_id and engine. The class also has a method to return the
3
+ # voice_id, lang_code and engine given a language and gender.
4
+
5
+ NEURAL_ENGINE = "neural"
6
+ STANDARD_ENGINE = "standard"
7
+
8
+
9
+ class AzureVoiceData:
10
+ def get_voice(self, language, gender):
11
+ for voice in self.voice_data:
12
+ if voice['language'] == language and voice['gender'] == gender:
13
+ return voice['azure_voice']
14
+ return None
15
+
16
+ def __init__(self):
17
+ self.voice_data = [
18
+ {'language': 'Arabic',
19
+ 'azure_voice': 'ar-EG-ShakirNeural',
20
+ 'gender': 'Male'},
21
+ {'language': 'Arabic (Gulf)',
22
+ 'azure_voice': 'ar-KW-FahedNeural',
23
+ 'gender': 'Male'},
24
+ {'language': 'Catalan',
25
+ 'azure_voice': 'ca-ES-EnricNeural',
26
+ 'gender': 'Male'},
27
+ {'language': 'Chinese (Cantonese)',
28
+ 'azure_voice': 'yue-CN-YunSongNeural',
29
+ 'gender': 'Male'},
30
+ {'language': 'Chinese (Mandarin)',
31
+ 'azure_voice': 'zh-CN-YunxiNeural',
32
+ 'gender': 'Male'},
33
+ {'language': 'Danish',
34
+ 'azure_voice': 'da-DK-JeppeNeural',
35
+ 'gender': 'Male'},
36
+ {'language': 'Dutch',
37
+ 'azure_voice': 'nl-NL-MaartenNeural',
38
+ 'gender': 'Male'},
39
+ {'language': 'English (Australian)',
40
+ 'azure_voice': 'en-AU-KenNeural',
41
+ 'gender': 'Male'},
42
+ {'language': 'English (British)',
43
+ 'azure_voice': 'en-GB-RyanNeural',
44
+ 'gender': 'Male'},
45
+ {'language': 'English (Indian)',
46
+ 'azure_voice': 'en-IN-PrabhatNeural',
47
+ 'gender': 'Male'},
48
+ {'language': 'English (New Zealand)',
49
+ 'azure_voice': 'en-NZ-MitchellNeural',
50
+ 'gender': 'Male'},
51
+ {'language': 'English (South African)',
52
+ 'azure_voice': 'en-ZA-LukeNeural',
53
+ 'gender': 'Male'},
54
+ {'language': 'English (US)',
55
+ 'azure_voice': 'en-US-ChristopherNeural',
56
+ 'gender': 'Male'},
57
+ {'language': 'English (Welsh)',
58
+ 'azure_voice': 'cy-GB-AledNeural',
59
+ 'gender': 'Male'},
60
+ {'language': 'Finnish',
61
+ 'azure_voice': 'fi-FI-HarriNeural',
62
+ 'gender': 'Male'},
63
+ {'language': 'French',
64
+ 'azure_voice': 'fr-FR-HenriNeural',
65
+ 'gender': 'Male'},
66
+ {'language': 'French (Canadian)',
67
+ 'azure_voice': 'fr-CA-AntoineNeural',
68
+ 'gender': 'Male'},
69
+ {'language': 'German',
70
+ 'azure_voice': 'de-DE-KlausNeural',
71
+ 'gender': 'Male'},
72
+ {'language': 'German (Austrian)',
73
+ 'azure_voice': 'de-AT-JonasNeural',
74
+ 'gender': 'Male'},
75
+ {'language': 'Hindi',
76
+ 'azure_voice': 'hi-IN-MadhurNeural',
77
+ 'gender': 'Male'},
78
+ {'language': 'Icelandic',
79
+ 'azure_voice': 'is-IS-GunnarNeural',
80
+ 'gender': 'Male'},
81
+ {'language': 'Italian',
82
+ 'azure_voice': 'it-IT-GianniNeural',
83
+ 'gender': 'Male'},
84
+ {'language': 'Japanese',
85
+ 'azure_voice': 'ja-JP-KeitaNeural',
86
+ 'gender': 'Male'},
87
+ {'language': 'Korean',
88
+ 'azure_voice': 'ko-KR-GookMinNeural',
89
+ 'gender': 'Male'},
90
+ {'language': 'Norwegian',
91
+ 'azure_voice': 'nb-NO-FinnNeural',
92
+ 'gender': 'Male'},
93
+ {'language': 'Polish',
94
+ 'azure_voice': 'pl-PL-MarekNeural',
95
+ 'gender': 'Male'},
96
+ {'language': 'Portuguese (Brazilian)',
97
+ 'azure_voice': 'pt-BR-NicolauNeural',
98
+ 'gender': 'Male'},
99
+ {'language': 'Portuguese (European)',
100
+ 'azure_voice': 'pt-PT-DuarteNeural',
101
+ 'gender': 'Male'},
102
+ {'language': 'Romanian',
103
+ 'azure_voice': 'ro-RO-EmilNeural',
104
+ 'gender': 'Male'},
105
+ {'language': 'Russian',
106
+ 'azure_voice': 'ru-RU-DmitryNeural',
107
+ 'gender': 'Male'},
108
+ {'language': 'Spanish (European)',
109
+ 'azure_voice': 'es-ES-TeoNeural',
110
+ 'gender': 'Male'},
111
+ {'language': 'Spanish (Mexican)',
112
+ 'azure_voice': 'es-MX-LibertoNeural',
113
+ 'gender': 'Male'},
114
+ {'language': 'Spanish (US)',
115
+ 'azure_voice': 'es-US-AlonsoNeural"',
116
+ 'gender': 'Male'},
117
+ {'language': 'Swedish',
118
+ 'azure_voice': 'sv-SE-MattiasNeural',
119
+ 'gender': 'Male'},
120
+ {'language': 'Turkish',
121
+ 'azure_voice': 'tr-TR-AhmetNeural',
122
+ 'gender': 'Male'},
123
+ {'language': 'Welsh',
124
+ 'azure_voice': 'cy-GB-AledNeural',
125
+ 'gender': 'Male'},
126
+ ]
127
+
128
+
129
+ # Run from the command-line
130
+ if __name__ == '__main__':
131
+ azure_voice_data = AzureVoiceData()
132
+
133
+ azure_voice = azure_voice_data.get_voice('English (US)', 'Male')
134
+ print('English (US)', 'Male', azure_voice)
135
+
136
+ azure_voice = azure_voice_data.get_voice('English (US)', 'Female')
137
+ print('English (US)', 'Female', azure_voice)
138
+
139
+ azure_voice = azure_voice_data.get_voice('French', 'Female')
140
+ print('French', 'Female', azure_voice)
141
+
142
+ azure_voice = azure_voice_data.get_voice('French', 'Male')
143
+ print('French', 'Male', azure_voice)
144
+
145
+ azure_voice = azure_voice_data.get_voice('Japanese', 'Female')
146
+ print('Japanese', 'Female', azure_voice)
147
+
148
+ azure_voice = azure_voice_data.get_voice('Japanese', 'Male')
149
+ print('Japanese', 'Male', azure_voice)
150
+
151
+ azure_voice = azure_voice_data.get_voice('Hindi', 'Female')
152
+ print('Hindi', 'Female', azure_voice)
153
+
154
+ azure_voice = azure_voice_data.get_voice('Hindi', 'Male')
155
+ print('Hindi', 'Male', azure_voice)