Spaces:

Div99
/

Chat-with-Div

Runtime error

App Files Files Community

Div99 commited on Feb 8, 2023

Commit

cb763d5

1 Parent(s): 53b0871

Update app.py

Browse files

Files changed (1) hide show

app.py +258 -56

app.py CHANGED Viewed

@@ -1,5 +1,6 @@
 import io
 import os
 from contextlib import closing
 from typing import Optional, Tuple
 import datetime
@@ -30,17 +31,31 @@ from openai.error import AuthenticationError, InvalidRequestError, RateLimitErro
 from langchain.prompts import PromptTemplate
 from polly_utils import PollyVoiceData, NEURAL_ENGINE
 news_api_key = os.environ["NEWS_API_KEY"]
 tmdb_bearer_token = os.environ["TMDB_BEARER_TOKEN"]
 TOOLS_LIST = ['serpapi', 'wolfram-alpha', 'pal-math', 'pal-colored-objects', 'news-api', 'tmdb-api',
               'open-meteo-api']  # 'google-search'
 TOOLS_DEFAULT_LIST = ['serpapi', 'pal-math']
 BUG_FOUND_MSG = "Congratulations, you've found a bug in this application!"
-AUTH_ERR_MSG = "Please paste your OpenAI key. It is not necessary to hit a button or key after pasting it."
 MAX_TOKENS = 512
 # Pertains to Express-inator functionality
 NUM_WORDS_DEFAULT = 0
 MAX_WORDS = 400
@@ -51,11 +66,13 @@ LANG_LEVEL_DEFAULT = "N/A"
 TRANSLATE_TO_DEFAULT = "N/A"
 LITERARY_STYLE_DEFAULT = "N/A"
 PROMPT_TEMPLATE = PromptTemplate(
-    input_variables=["original_words", "num_words", "formality", "emotions", "lang_level", "translate_to", "literary_style"],
     template="Restate {num_words}{formality}{emotions}{lang_level}{translate_to}{literary_style}the following: \n{original_words}\n",
 )
 POLLY_VOICE_DATA = PollyVoiceData()
 # Pertains to WHISPER functionality
 WHISPER_DETECT_LANG = "Detect language"
@@ -87,6 +104,29 @@ def transcribe(aud_inp, whisper_lang):
     return result_text
 # Pertains to Express-inator functionality
 def transform_text(desc, express_chain, num_words, formality,
                    anticipation_level, joy_level, trust_level,
@@ -143,12 +183,15 @@ def transform_text(desc, express_chain, num_words, formality,
     translate_to_str = ""
     if translate_to != TRANSLATE_TO_DEFAULT:
-        translate_to_str = "translated to " + ("" if lang_level == TRANSLATE_TO_DEFAULT else lang_level + " level ") + translate_to + ", "
     literary_style_str = ""
     if literary_style != LITERARY_STYLE_DEFAULT:
         if literary_style == "Prose":
             literary_style_str = "as prose, "
         elif literary_style == "Summary":
             literary_style_str = "as a summary, "
         elif literary_style == "Outline":
@@ -161,10 +204,14 @@ def transform_text(desc, express_chain, num_words, formality,
             literary_style_str = "as a haiku, "
         elif literary_style == "Limerick":
             literary_style_str = "as a limerick, "
         elif literary_style == "Joke":
             literary_style_str = "as a very funny joke with a setup and punchline, "
         elif literary_style == "Knock-knock":
             literary_style_str = "as a very funny knock-knock joke, "
     formatted_prompt = PROMPT_TEMPLATE.format(
         original_words=desc,
@@ -200,6 +247,7 @@ def transform_text(desc, express_chain, num_words, formality,
 def load_chain(tools_list, llm):
     chain = None
     express_chain = None
     if llm:
         print("\ntools_list", tools_list)
         tool_names = tools_list
@@ -209,21 +257,35 @@ def load_chain(tools_list, llm):
         chain = initialize_agent(tools, llm, agent="conversational-react-description", verbose=True, memory=memory)
         express_chain = LLMChain(llm=llm, prompt=PROMPT_TEMPLATE, verbose=True)
-    return chain, express_chain
 def set_openai_api_key(api_key):
     """Set the api key and return chain.
     If no api_key, then None is returned.
     """
-    if api_key and api_key.startswith("sk-") and len(api_key) > 50:
-        os.environ["OPENAI_API_KEY"] = api_key
         llm = OpenAI(temperature=0, max_tokens=MAX_TOKENS)
-        chain, express_chain = load_chain(TOOLS_DEFAULT_LIST, llm)
-        os.environ["OPENAI_API_KEY"] = ""
-        return chain, express_chain, llm
-    return None, None, None
 def run_chain(chain, inp, capture_hidden_text):
@@ -238,7 +300,8 @@ def run_chain(chain, inp, capture_hidden_text):
         try:
             output = chain.run(input=inp)
         except AuthenticationError as ae:
-            error_msg = AUTH_ERR_MSG
         except RateLimitError as rle:
             error_msg = "\n\nRateLimitError: " + str(rle)
         except ValueError as ve:
@@ -275,7 +338,8 @@ def run_chain(chain, inp, capture_hidden_text):
         try:
             output = chain.run(input=inp)
         except AuthenticationError as ae:
-            output = AUTH_ERR_MSG
         except RateLimitError as rle:
             output = "\n\nRateLimitError: " + str(rle)
         except ValueError as ve:
@@ -288,6 +352,12 @@ def run_chain(chain, inp, capture_hidden_text):
     return output, hidden_text
 class ChatWrapper:
     def __init__(self):
@@ -295,10 +365,10 @@ class ChatWrapper:
     def __call__(
             self, api_key: str, inp: str, history: Optional[Tuple[str, str]], chain: Optional[ConversationChain],
-            trace_chain: bool, speak_text: bool, monologue: bool, express_chain: Optional[LLMChain],
             num_words, formality, anticipation_level, joy_level, trust_level,
             fear_level, surprise_level, sadness_level, disgust_level, anger_level,
-            lang_level, translate_to, literary_style
     ):
         """Execute the chat functionality."""
         self.lock.acquire()
@@ -307,19 +377,29 @@ class ChatWrapper:
             print("inp: " + inp)
             print("trace_chain: ", trace_chain)
             print("speak_text: ", speak_text)
             print("monologue: ", monologue)
             history = history or []
             # If chain is None, that is because no API key was provided.
-            output = "Please paste your OpenAI key to use this application. It is not necessary to hit a button or " \
-                     "key after pasting it."
             hidden_text = output
-            if chain and chain != "":
                 # Set OpenAI key
                 import openai
                 openai.api_key = api_key
                 if not monologue:
-                    output, hidden_text = run_chain(chain, inp, capture_hidden_text=trace_chain)
                 else:
                     output, hidden_text = inp, None
@@ -333,16 +413,32 @@ class ChatWrapper:
                 text_to_display = hidden_text + "\n\n" + output
             history.append((inp, text_to_display))
-            # html_video, temp_file = do_html_video_speak(output)
-            html_audio, temp_file = None, None
             if speak_text:
-                html_audio, temp_file = do_html_audio_speak(output, translate_to)
         except Exception as e:
             raise e
         finally:
             self.lock.release()
-        # return history, history, html_video, temp_file, ""
-        return history, history, html_audio, temp_file, ""
 chat = ChatWrapper()
@@ -355,9 +451,11 @@ def do_html_audio_speak(words_to_speak, polly_language):
         region_name=os.environ["AWS_DEFAULT_REGION"]
     ).client('polly')
-    voice_id, language_code, engine = POLLY_VOICE_DATA.get_voice(polly_language, "Female")
     if not voice_id:
-        voice_id = "Joanna"
         language_code = "en-US"
         engine = NEURAL_ENGINE
     response = polly_client.synthesize_speech(
@@ -393,24 +491,39 @@ def do_html_audio_speak(words_to_speak, polly_language):
     return html_audio, "audios/tempfile.mp3"
-def do_html_video_speak(words_to_speak):
     headers = {"Authorization": f"Bearer {os.environ['EXHUMAN_API_KEY']}"}
     body = {
         'bot_name': 'Masahiro',
         'bot_response': words_to_speak,
-        'voice_name': 'Masahiro-EN'
     }
     api_endpoint = "https://api.exh.ai/animations/v1/generate_lipsync"
     res = requests.post(api_endpoint, json=body, headers=headers)
     html_video = '<pre>no video</pre>'
     if isinstance(res.content, bytes):
         response_stream = io.BytesIO(res.content)
         with open('videos/tempfile.mp4', 'wb') as f:
             f.write(response_stream.read())
         temp_file = gr.File("videos/tempfile.mp4")
         temp_file_url = "/file=" + temp_file.value['name']
-        html_video = f'<video width="256" height="256" autoplay><source src={temp_file_url} type="video/mp4" poster="Masahiro.png"></video>'
     else:
         print('video url unknown')
     return html_video, "videos/tempfile.mp4"
@@ -419,16 +532,45 @@ def do_html_video_speak(words_to_speak):
 def update_selected_tools(widget, state, llm):
     if widget:
         state = widget
-        chain, express_chain = load_chain(state, llm)
         return state, llm, chain, express_chain
 def update_foo(widget, state):
     if widget:
         state = widget
         return state
 with gr.Blocks(css=".gradio-container {background-color: lightgray}") as block:
     llm_state = gr.State()
     history_state = gr.State()
@@ -437,7 +579,9 @@ with gr.Blocks(css=".gradio-container {background-color: lightgray}") as block:
     tools_list_state = gr.State(TOOLS_DEFAULT_LIST)
     trace_chain_state = gr.State(False)
     speak_text_state = gr.State(False)
     monologue_state = gr.State(False)  # Takes the input and repeats it back to the user, optionally transforming it.
     # Pertains to Express-inator functionality
     num_words_state = gr.State(NUM_WORDS_DEFAULT)
@@ -457,22 +601,34 @@ with gr.Blocks(css=".gradio-container {background-color: lightgray}") as block:
     # Pertains to WHISPER functionality
     whisper_lang_state = gr.State(WHISPER_DETECT_LANG)
     with gr.Tab("Chat"):
         with gr.Row():
             with gr.Column():
                 gr.HTML(
                     """<b><center>GPT + WolframAlpha + Whisper</center></b>
-                    <p><center>New feature in Settings: Babel fish mode</center></p>""")
             openai_api_key_textbox = gr.Textbox(placeholder="Paste your OpenAI API key (sk-...)",
-                                                show_label=False, lines=1, type='password')
         with gr.Row():
-            with gr.Column(scale=1, min_width=100, visible=False):
                 my_file = gr.File(label="Upload a file", type="file", visible=False)
-                tmp_file = gr.File("videos/Masahiro.mp4", visible=False)
-                tmp_file_url = "/file=" + tmp_file.value['name']
-                htm_video = f'<video width="256" height="256" autoplay muted loop><source src={tmp_file_url} type="video/mp4" poster="Masahiro.png"></video>'
                 video_html = gr.HTML(htm_video)
                 # my_aud_file = gr.File(label="Audio file", type="file", visible=True)
@@ -481,7 +637,7 @@ with gr.Blocks(css=".gradio-container {background-color: lightgray}") as block:
                 htm_audio = f'<audio><source src={tmp_aud_file_url} type="audio/mp3"></audio>'
                 audio_html = gr.HTML(htm_audio)
-            with gr.Column(scale=3):
                 chatbot = gr.Chatbot()
         with gr.Row():
@@ -496,6 +652,11 @@ with gr.Blocks(css=".gradio-container {background-color: lightgray}") as block:
                                        interactive=True, streaming=False)
             audio_comp.change(transcribe, inputs=[audio_comp, whisper_lang_state], outputs=[message])
         gr.Examples(
             examples=["How many people live in Canada?",
                       "What is 2 to the 30th power?",
@@ -519,15 +680,22 @@ with gr.Blocks(css=".gradio-container {background-color: lightgray}") as block:
         trace_chain_cb.change(update_foo, inputs=[trace_chain_cb, trace_chain_state],
                               outputs=[trace_chain_state])
-        speak_text_cb = gr.Checkbox(label="Speak text from agent", value=False)
-        speak_text_cb.change(update_foo, inputs=[speak_text_cb, speak_text_state],
-                             outputs=[speak_text_state])
         monologue_cb = gr.Checkbox(label="Babel fish mode (translate/restate what you enter, no conversational agent)",
                                    value=False)
         monologue_cb.change(update_foo, inputs=[monologue_cb, monologue_state],
                             outputs=[monologue_state])
     with gr.Tab("Whisper STT"):
         whisper_lang_radio = gr.Radio(label="Whisper speech-to-text language:", choices=[
             WHISPER_DETECT_LANG, "Arabic", "Arabic (Gulf)", "Catalan", "Chinese (Cantonese)", "Chinese (Mandarin)",
@@ -559,7 +727,7 @@ with gr.Blocks(css=".gradio-container {background-color: lightgray}") as block:
             "Korean", "Norwegian", "Polish",
             "Portuguese (Brazilian)", "Portuguese (European)", "Romanian", "Russian", "Spanish (European)",
             "Spanish (Mexican)", "Spanish (US)", "Swedish", "Turkish", "Ukrainian", "Welsh",
-            "emojis", "Gen Z slang", "how the stereotypical Karen would say it", "Klingon",
             "Pirate", "Strange Planet expospeak technical talk", "Yoda"],
                                       value=TRANSLATE_TO_DEFAULT)
@@ -577,8 +745,8 @@ with gr.Blocks(css=".gradio-container {background-color: lightgray}") as block:
     with gr.Tab("Lit style"):
         literary_style_radio = gr.Radio(label="Literary style:", choices=[
-            LITERARY_STYLE_DEFAULT, "Prose", "Summary", "Outline", "Bullets", "Poetry", "Haiku", "Limerick", "Joke",
-            "Knock-knock"],
                                         value=LITERARY_STYLE_DEFAULT)
         literary_style_radio.change(update_foo,
@@ -649,34 +817,68 @@ with gr.Blocks(css=".gradio-container {background-color: lightgray}") as block:
                                 inputs=[num_words_slider, num_words_state],
                                 outputs=[num_words_state])
     gr.HTML("""
-        <p>This application, developed by AI Researcher <a href='https://divyanshgarg.com/'>Div Garg</a>,
-        demonstrates a conversational AI agent implemented with OpenAI GPT-3.5 and LangChain.
         When necessary, it leverages tools for complex math, searching the internet, and accessing news and weather.
         For faster inference without waiting in queue, you may duplicate the space.
         </p>""")
     message.submit(chat, inputs=[openai_api_key_textbox, message, history_state, chain_state, trace_chain_state,
-                                 speak_text_state, monologue_state,
                                  express_chain_state, num_words_state, formality_state,
                                  anticipation_level_state, joy_level_state, trust_level_state, fear_level_state,
                                  surprise_level_state, sadness_level_state, disgust_level_state, anger_level_state,
-                                 lang_level_state, translate_to_state, literary_style_state],
-                   # outputs=[chatbot, history_state, video_html, my_file, message])
-                   outputs=[chatbot, history_state, audio_html, tmp_aud_file, message])
     submit.click(chat, inputs=[openai_api_key_textbox, message, history_state, chain_state, trace_chain_state,
-                               speak_text_state, monologue_state,
                                express_chain_state, num_words_state, formality_state,
                                anticipation_level_state, joy_level_state, trust_level_state, fear_level_state,
                                surprise_level_state, sadness_level_state, disgust_level_state, anger_level_state,
-                               lang_level_state, translate_to_state, literary_style_state],
-                 # outputs=[chatbot, history_state, video_html, my_file, message])
-                 outputs=[chatbot, history_state, audio_html, tmp_aud_file, message])
-    openai_api_key_textbox.change(set_openai_api_key,
                                   inputs=[openai_api_key_textbox],
-                                  outputs=[chain_state, express_chain_state, llm_state])
-block.launch(debug=True)

 import io
 import os
+import ssl
 from contextlib import closing
 from typing import Optional, Tuple
 import datetime
 from langchain.prompts import PromptTemplate
 from polly_utils import PollyVoiceData, NEURAL_ENGINE
+from azure_utils import AzureVoiceData
+# Pertains to question answering functionality
+from langchain.embeddings.openai import OpenAIEmbeddings
+from langchain.text_splitter import CharacterTextSplitter
+from langchain.vectorstores.faiss import FAISS
+from langchain.docstore.document import Document
+from langchain.chains.question_answering import load_qa_chain
 news_api_key = os.environ["NEWS_API_KEY"]
 tmdb_bearer_token = os.environ["TMDB_BEARER_TOKEN"]
+openai_api_key = os.environ["OPENAI_API_KEY"]
 TOOLS_LIST = ['serpapi', 'wolfram-alpha', 'pal-math', 'pal-colored-objects', 'news-api', 'tmdb-api',
               'open-meteo-api']  # 'google-search'
 TOOLS_DEFAULT_LIST = ['serpapi', 'pal-math']
 BUG_FOUND_MSG = "Congratulations, you've found a bug in this application!"
+# AUTH_ERR_MSG = "Please paste your OpenAI key from openai.com to use this application. It is not necessary to hit a button or key after pasting it."
+AUTH_ERR_MSG = "Please paste your OpenAI key from openai.com to use this application. "
 MAX_TOKENS = 512
+LOOPING_TALKING_HEAD = "videos/Masahiro.mp4"
+TALKING_HEAD_WIDTH = "192"
+MAX_TALKING_HEAD_TEXT_LENGTH = 155
 # Pertains to Express-inator functionality
 NUM_WORDS_DEFAULT = 0
 MAX_WORDS = 400
 TRANSLATE_TO_DEFAULT = "N/A"
 LITERARY_STYLE_DEFAULT = "N/A"
 PROMPT_TEMPLATE = PromptTemplate(
+    input_variables=["original_words", "num_words", "formality", "emotions", "lang_level", "translate_to",
+                     "literary_style"],
     template="Restate {num_words}{formality}{emotions}{lang_level}{translate_to}{literary_style}the following: \n{original_words}\n",
 )
 POLLY_VOICE_DATA = PollyVoiceData()
+AZURE_VOICE_DATA = AzureVoiceData()
 # Pertains to WHISPER functionality
 WHISPER_DETECT_LANG = "Detect language"
     return result_text
+# Temporarily address Wolfram Alpha SSL certificate issue
+ssl._create_default_https_context = ssl._create_unverified_context
+# TEMPORARY FOR TESTING
+def transcribe_dummy(aud_inp_tb, whisper_lang):
+    if aud_inp_tb is None:
+        return ""
+    # aud = whisper.load_audio(aud_inp)
+    # aud = whisper.pad_or_trim(aud)
+    # mel = whisper.log_mel_spectrogram(aud).to(WHISPER_MODEL.device)
+    # _, probs = WHISPER_MODEL.detect_language(mel)
+    # options = whisper.DecodingOptions()
+    # options = whisper.DecodingOptions(language="ja")
+    # result = whisper.decode(WHISPER_MODEL, mel, options)
+    result_text = "Whisper will detect language"
+    if whisper_lang != WHISPER_DETECT_LANG:
+        whisper_lang_code = POLLY_VOICE_DATA.get_whisper_lang_code(whisper_lang)
+        result_text = f"Whisper will use lang code: {whisper_lang_code}"
+    print("result_text", result_text)
+    return aud_inp_tb
 # Pertains to Express-inator functionality
 def transform_text(desc, express_chain, num_words, formality,
                    anticipation_level, joy_level, trust_level,
     translate_to_str = ""
     if translate_to != TRANSLATE_TO_DEFAULT:
+        translate_to_str = "translated to " + (
+            "" if lang_level == TRANSLATE_TO_DEFAULT else lang_level + " level ") + translate_to + ", "
     literary_style_str = ""
     if literary_style != LITERARY_STYLE_DEFAULT:
         if literary_style == "Prose":
             literary_style_str = "as prose, "
+        if literary_style == "Story":
+            literary_style_str = "as a story, "
         elif literary_style == "Summary":
             literary_style_str = "as a summary, "
         elif literary_style == "Outline":
             literary_style_str = "as a haiku, "
         elif literary_style == "Limerick":
             literary_style_str = "as a limerick, "
+        elif literary_style == "Rap":
+            literary_style_str = "as a rap, "
         elif literary_style == "Joke":
             literary_style_str = "as a very funny joke with a setup and punchline, "
         elif literary_style == "Knock-knock":
             literary_style_str = "as a very funny knock-knock joke, "
+        elif literary_style == "FAQ":
+            literary_style_str = "as a FAQ with several questions and answers, "
     formatted_prompt = PROMPT_TEMPLATE.format(
         original_words=desc,
 def load_chain(tools_list, llm):
     chain = None
     express_chain = None
+    memory = None
     if llm:
         print("\ntools_list", tools_list)
         tool_names = tools_list
         chain = initialize_agent(tools, llm, agent="conversational-react-description", verbose=True, memory=memory)
         express_chain = LLMChain(llm=llm, prompt=PROMPT_TEMPLATE, verbose=True)
+    return chain, express_chain, memory
 def set_openai_api_key(api_key):
     """Set the api key and return chain.
     If no api_key, then None is returned.
     """
+    # if api_key and api_key.startswith("sk-") and len(api_key) > 50:
+    if "OPENAI_API_KEY" not in os.environ:
+        print("OpenAI Key Not found")
+    else:
+        # os.environ["OPENAI_API_KEY"] = api_key
+        # print("\n\n ++++++++++++++ Setting OpenAI API key ++++++++++++++ \n\n")
+        # print(str(datetime.datetime.now()) + ": Before OpenAI, OPENAI_API_KEY length: " + str(
+        #     len(os.environ["OPENAI_API_KEY"])))
         llm = OpenAI(temperature=0, max_tokens=MAX_TOKENS)
+        # print(str(datetime.datetime.now()) + ": After OpenAI, OPENAI_API_KEY length: " + str(
+        #     len(os.environ["OPENAI_API_KEY"])))
+        chain, express_chain, memory = load_chain(TOOLS_DEFAULT_LIST, llm)
+        # Pertains to question answering functionality
+        embeddings = OpenAIEmbeddings()
+        qa_chain = load_qa_chain(OpenAI(temperature=0), chain_type="stuff")
+        # print(str(datetime.datetime.now()) + ": After load_chain, OPENAI_API_KEY length: " + str(
+        #     len(os.environ["OPENAI_API_KEY"])))
+        # os.environ["OPENAI_API_KEY"] = ""
+        return chain, express_chain, llm, embeddings, qa_chain, memory
+    return None, None, None, None, None, None
 def run_chain(chain, inp, capture_hidden_text):
         try:
             output = chain.run(input=inp)
         except AuthenticationError as ae:
+            error_msg = AUTH_ERR_MSG + str(datetime.datetime.now()) + ". " + str(ae)
+            print("error_msg", error_msg)
         except RateLimitError as rle:
             error_msg = "\n\nRateLimitError: " + str(rle)
         except ValueError as ve:
         try:
             output = chain.run(input=inp)
         except AuthenticationError as ae:
+            output = AUTH_ERR_MSG + str(datetime.datetime.now()) + ". " + str(ae)
+            print("output", output)
         except RateLimitError as rle:
             output = "\n\nRateLimitError: " + str(rle)
         except ValueError as ve:
     return output, hidden_text
+def reset_memory(history, memory):
+    memory.clear()
+    history = []
+    return history, history, memory
 class ChatWrapper:
     def __init__(self):
     def __call__(
             self, api_key: str, inp: str, history: Optional[Tuple[str, str]], chain: Optional[ConversationChain],
+            trace_chain: bool, speak_text: bool, talking_head: bool, monologue: bool, express_chain: Optional[LLMChain],
             num_words, formality, anticipation_level, joy_level, trust_level,
             fear_level, surprise_level, sadness_level, disgust_level, anger_level,
+            lang_level, translate_to, literary_style, qa_chain, docsearch, use_embeddings
     ):
         """Execute the chat functionality."""
         self.lock.acquire()
             print("inp: " + inp)
             print("trace_chain: ", trace_chain)
             print("speak_text: ", speak_text)
+            print("talking_head: ", talking_head)
             print("monologue: ", monologue)
             history = history or []
             # If chain is None, that is because no API key was provided.
+            output = "Please paste your OpenAI key from openai.com to use this app. " + str(datetime.datetime.now())
             hidden_text = output
+            if chain:
                 # Set OpenAI key
                 import openai
                 openai.api_key = api_key
                 if not monologue:
+                    if use_embeddings:
+                        if inp and inp.strip() != "":
+                            if docsearch:
+                                docs = docsearch.similarity_search(inp)
+                                output = str(qa_chain.run(input_documents=docs, question=inp))
+                            else:
+                                output, hidden_text = "Please supply some text in the the Embeddings tab.", None
+                        else:
+                            output, hidden_text = "What's on your mind?", None
+                    else:
+                        output, hidden_text = run_chain(chain, inp, capture_hidden_text=trace_chain)
                 else:
                     output, hidden_text = inp, None
                 text_to_display = hidden_text + "\n\n" + output
             history.append((inp, text_to_display))
+            html_video, temp_file, html_audio, temp_aud_file = None, None, None, None
             if speak_text:
+                if talking_head:
+                    if len(output) <= MAX_TALKING_HEAD_TEXT_LENGTH:
+                        html_video, temp_file = do_html_video_speak(output, translate_to)
+                    else:
+                        temp_file = LOOPING_TALKING_HEAD
+                        html_video = create_html_video(temp_file, TALKING_HEAD_WIDTH)
+                        html_audio, temp_aud_file = do_html_audio_speak(output, translate_to)
+                else:
+                    html_audio, temp_aud_file = do_html_audio_speak(output, translate_to)
+            else:
+                if talking_head:
+                    temp_file = LOOPING_TALKING_HEAD
+                    html_video = create_html_video(temp_file, TALKING_HEAD_WIDTH)
+                else:
+                    # html_audio, temp_aud_file = do_html_audio_speak(output, translate_to)
+                    # html_video = create_html_video(temp_file, "128")
+                    pass
         except Exception as e:
             raise e
         finally:
             self.lock.release()
+        return history, history, html_video, temp_file, html_audio, temp_aud_file, ""
+        # return history, history, html_audio, temp_aud_file, ""
 chat = ChatWrapper()
         region_name=os.environ["AWS_DEFAULT_REGION"]
     ).client('polly')
+    # voice_id, language_code, engine = POLLY_VOICE_DATA.get_voice(polly_language, "Female")
+    voice_id, language_code, engine = POLLY_VOICE_DATA.get_voice(polly_language, "Male")
     if not voice_id:
+        # voice_id = "Joanna"
+        voice_id = "Matthew"
         language_code = "en-US"
         engine = NEURAL_ENGINE
     response = polly_client.synthesize_speech(
     return html_audio, "audios/tempfile.mp3"
+def create_html_video(file_name, width):
+    temp_file_url = "/file=" + tmp_file.value['name']
+    html_video = f'<video width={width} height={width} autoplay muted loop><source src={temp_file_url} type="video/mp4" poster="Masahiro.png"></video>'
+    return html_video
+def do_html_video_speak(words_to_speak, azure_language):
+    azure_voice = AZURE_VOICE_DATA.get_voice(azure_language, "Male")
+    if not azure_voice:
+        azure_voice = "en-US-ChristopherNeural"
     headers = {"Authorization": f"Bearer {os.environ['EXHUMAN_API_KEY']}"}
     body = {
         'bot_name': 'Masahiro',
         'bot_response': words_to_speak,
+        'azure_voice': azure_voice,
+        'azure_style': 'friendly',
+        'animation_pipeline': 'high_speed',
     }
     api_endpoint = "https://api.exh.ai/animations/v1/generate_lipsync"
     res = requests.post(api_endpoint, json=body, headers=headers)
+    print("res.status_code: ", res.status_code)
     html_video = '<pre>no video</pre>'
     if isinstance(res.content, bytes):
         response_stream = io.BytesIO(res.content)
+        print("len(res.content)): ", len(res.content))
         with open('videos/tempfile.mp4', 'wb') as f:
             f.write(response_stream.read())
         temp_file = gr.File("videos/tempfile.mp4")
         temp_file_url = "/file=" + temp_file.value['name']
+        html_video = f'<video width={TALKING_HEAD_WIDTH} height={TALKING_HEAD_WIDTH} autoplay><source src={temp_file_url} type="video/mp4" poster="Masahiro.png"></video>'
     else:
         print('video url unknown')
     return html_video, "videos/tempfile.mp4"
 def update_selected_tools(widget, state, llm):
     if widget:
         state = widget
+        chain, express_chain, memory = load_chain(state, llm)
         return state, llm, chain, express_chain
+def update_talking_head(widget, state):
+    if widget:
+        state = widget
+        video_html_talking_head = create_html_video(LOOPING_TALKING_HEAD, TALKING_HEAD_WIDTH)
+        return state, video_html_talking_head
+    else:
+        # return state, create_html_video(LOOPING_TALKING_HEAD, "32")
+        return None, "<pre></pre>"
 def update_foo(widget, state):
     if widget:
         state = widget
         return state
+# Pertains to question answering functionality
+def update_embeddings(embeddings_text, embeddings, qa_chain):
+    if embeddings_text:
+        text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
+        texts = text_splitter.split_text(embeddings_text)
+        docsearch = FAISS.from_texts(texts, embeddings)
+        print("Embeddings updated")
+        return docsearch
+# Pertains to question answering functionality
+def update_use_embeddings(widget, state):
+    if widget:
+        state = widget
+        return state
 with gr.Blocks(css=".gradio-container {background-color: lightgray}") as block:
     llm_state = gr.State()
     history_state = gr.State()
     tools_list_state = gr.State(TOOLS_DEFAULT_LIST)
     trace_chain_state = gr.State(False)
     speak_text_state = gr.State(False)
+    talking_head_state = gr.State(True)
     monologue_state = gr.State(False)  # Takes the input and repeats it back to the user, optionally transforming it.
+    memory_state = gr.State()
     # Pertains to Express-inator functionality
     num_words_state = gr.State(NUM_WORDS_DEFAULT)
     # Pertains to WHISPER functionality
     whisper_lang_state = gr.State(WHISPER_DETECT_LANG)
+    # Pertains to question answering functionality
+    embeddings_state = gr.State()
+    qa_chain_state = gr.State()
+    docsearch_state = gr.State()
+    use_embeddings_state = gr.State(False)
     with gr.Tab("Chat"):
         with gr.Row():
             with gr.Column():
                 gr.HTML(
                     """<b><center>GPT + WolframAlpha + Whisper</center></b>
+                    <p><center>New feature: <b>Embeddings</b></center></p>""")
             openai_api_key_textbox = gr.Textbox(placeholder="Paste your OpenAI API key (sk-...)",
+                                                show_label=False, lines=1, type='password',
+                                                               value=openai_api_key,
+                                                                visible=False,)
         with gr.Row():
+            with gr.Column(scale=1, min_width=TALKING_HEAD_WIDTH, visible=True):
+                speak_text_cb = gr.Checkbox(label="Enable speech", value=False)
+                speak_text_cb.change(update_foo, inputs=[speak_text_cb, speak_text_state],
+                                     outputs=[speak_text_state])
                 my_file = gr.File(label="Upload a file", type="file", visible=False)
+                tmp_file = gr.File(LOOPING_TALKING_HEAD, visible=False)
+                # tmp_file_url = "/file=" + tmp_file.value['name']
+                htm_video = create_html_video(LOOPING_TALKING_HEAD, TALKING_HEAD_WIDTH)
                 video_html = gr.HTML(htm_video)
                 # my_aud_file = gr.File(label="Audio file", type="file", visible=True)
                 htm_audio = f'<audio><source src={tmp_aud_file_url} type="audio/mp3"></audio>'
                 audio_html = gr.HTML(htm_audio)
+            with gr.Column(scale=7):
                 chatbot = gr.Chatbot()
         with gr.Row():
                                        interactive=True, streaming=False)
             audio_comp.change(transcribe, inputs=[audio_comp, whisper_lang_state], outputs=[message])
+        # TEMPORARY FOR TESTING
+        # with gr.Row():
+        #     audio_comp_tb = gr.Textbox(label="Just say it!", lines=1)
+        #     audio_comp_tb.submit(transcribe_dummy, inputs=[audio_comp_tb, whisper_lang_state], outputs=[message])
         gr.Examples(
             examples=["How many people live in Canada?",
                       "What is 2 to the 30th power?",
         trace_chain_cb.change(update_foo, inputs=[trace_chain_cb, trace_chain_state],
                               outputs=[trace_chain_state])
+        # speak_text_cb = gr.Checkbox(label="Speak text from agent", value=False)
+        # speak_text_cb.change(update_foo, inputs=[speak_text_cb, speak_text_state],
+        #                      outputs=[speak_text_state])
+        talking_head_cb = gr.Checkbox(label="Show talking head", value=True)
+        talking_head_cb.change(update_talking_head, inputs=[talking_head_cb, talking_head_state],
+                               outputs=[talking_head_state, video_html])
         monologue_cb = gr.Checkbox(label="Babel fish mode (translate/restate what you enter, no conversational agent)",
                                    value=False)
         monologue_cb.change(update_foo, inputs=[monologue_cb, monologue_state],
                             outputs=[monologue_state])
+        reset_btn = gr.Button(value="Reset chat", variant="secondary").style(full_width=False)
+        reset_btn.click(reset_memory, inputs=[history_state, memory_state], outputs=[chatbot, history_state, memory_state])
     with gr.Tab("Whisper STT"):
         whisper_lang_radio = gr.Radio(label="Whisper speech-to-text language:", choices=[
             WHISPER_DETECT_LANG, "Arabic", "Arabic (Gulf)", "Catalan", "Chinese (Cantonese)", "Chinese (Mandarin)",
             "Korean", "Norwegian", "Polish",
             "Portuguese (Brazilian)", "Portuguese (European)", "Romanian", "Russian", "Spanish (European)",
             "Spanish (Mexican)", "Spanish (US)", "Swedish", "Turkish", "Ukrainian", "Welsh",
+            "emojis", "Gen Z slang", "how the stereotypical Karen would say it", "Klingon", "Neanderthal",
             "Pirate", "Strange Planet expospeak technical talk", "Yoda"],
                                       value=TRANSLATE_TO_DEFAULT)
     with gr.Tab("Lit style"):
         literary_style_radio = gr.Radio(label="Literary style:", choices=[
+            LITERARY_STYLE_DEFAULT, "Prose", "Story", "Summary", "Outline", "Bullets", "Poetry", "Haiku", "Limerick", "Rap",
+            "Joke", "Knock-knock", "FAQ"],
                                         value=LITERARY_STYLE_DEFAULT)
         literary_style_radio.change(update_foo,
                                 inputs=[num_words_slider, num_words_state],
                                 outputs=[num_words_state])
+    with gr.Tab("Embeddings"):
+        embeddings_text_box = gr.Textbox(label="Enter text for embeddings and hit Create:",
+                                         lines=20)
+        with gr.Row():
+            use_embeddings_cb = gr.Checkbox(label="Use embeddings", value=False)
+            use_embeddings_cb.change(update_use_embeddings, inputs=[use_embeddings_cb, use_embeddings_state],
+                                     outputs=[use_embeddings_state])
+            embeddings_text_submit = gr.Button(value="Create", variant="secondary").style(full_width=False)
+            embeddings_text_submit.click(update_embeddings,
+                                         inputs=[embeddings_text_box, embeddings_state, qa_chain_state],
+                                         outputs=[docsearch_state])
     gr.HTML("""
+        <p>This application, developed by <a href='https://www.linkedin.com/in/javafxpert/'>James L. Weaver</a>,
+        demonstrates a conversational agent implemented with OpenAI GPT-3.5 and LangChain.
         When necessary, it leverages tools for complex math, searching the internet, and accessing news and weather.
+        Uses talking heads from <a href='https://exh.ai/'>Ex-Human</a>.
         For faster inference without waiting in queue, you may duplicate the space.
         </p>""")
+    gr.HTML("""
+<form action="https://www.paypal.com/donate" method="post" target="_blank">
+<input type="hidden" name="business" value="AK8BVNALBXSPQ" />
+<input type="hidden" name="no_recurring" value="0" />
+<input type="hidden" name="item_name" value="Please consider helping to defray the cost of APIs such as SerpAPI and WolframAlpha that this app uses." />
+<input type="hidden" name="currency_code" value="USD" />
+<input type="image" src="https://www.paypalobjects.com/en_US/i/btn/btn_donate_LG.gif" border="0" name="submit" title="PayPal - The safer, easier way to pay online!" alt="Donate with PayPal button" />
+<img alt="" border="0" src="https://www.paypal.com/en_US/i/scr/pixel.gif" width="1" height="1" />
+</form>
+    """)
+    gr.HTML("""<center>
+        <a href="https://huggingface.co/spaces/JavaFXpert/Chat-GPT-LangChain?duplicate=true">
+        <img style="margin-top: 0em; margin-bottom: 0em" src="https://bit.ly/3gLdBN6" alt="Duplicate Space"></a>
+        Powered by <a href='https://github.com/hwchase17/langchain'>LangChain 🦜️🔗</a>
+        </center>""")
     message.submit(chat, inputs=[openai_api_key_textbox, message, history_state, chain_state, trace_chain_state,
+                                 speak_text_state, talking_head_state, monologue_state,
                                  express_chain_state, num_words_state, formality_state,
                                  anticipation_level_state, joy_level_state, trust_level_state, fear_level_state,
                                  surprise_level_state, sadness_level_state, disgust_level_state, anger_level_state,
+                                 lang_level_state, translate_to_state, literary_style_state,
+                                 qa_chain_state, docsearch_state, use_embeddings_state],
+                   outputs=[chatbot, history_state, video_html, my_file, audio_html, tmp_aud_file, message])
+    # outputs=[chatbot, history_state, audio_html, tmp_aud_file, message])
     submit.click(chat, inputs=[openai_api_key_textbox, message, history_state, chain_state, trace_chain_state,
+                               speak_text_state, talking_head_state, monologue_state,
                                express_chain_state, num_words_state, formality_state,
                                anticipation_level_state, joy_level_state, trust_level_state, fear_level_state,
                                surprise_level_state, sadness_level_state, disgust_level_state, anger_level_state,
+                               lang_level_state, translate_to_state, literary_style_state,
+                               qa_chain_state, docsearch_state, use_embeddings_state],
+                 outputs=[chatbot, history_state, video_html, my_file, audio_html, tmp_aud_file, message])
+    # outputs=[chatbot, history_state, audio_html, tmp_aud_file, message])
+    block.load(set_openai_api_key,
                                   inputs=[openai_api_key_textbox],
+                                  outputs=[chain_state, express_chain_state, llm_state, embeddings_state,
+                                           qa_chain_state, memory_state])
+block.launch(debug=True)