import gradio as gr import openai, config, os import requests, base64 import pandas as pd import numpy as np from huggingface_hub import Repository from io import BytesIO from dotenv import load_dotenv from openai.embeddings_utils import get_embedding, cosine_similarity def get_openai_api_key(): openai_api_key = os.environ.get('OPENAI_API_KEY') if openai_api_key is None: load_dotenv() openai_api_key = os.environ['OPENAI_API_KEY'] return openai_api_key def get_eleven_api_key(): eleven_api_key = os.environ.get('ELEVEN_LABS_API_KEY') if eleven_api_key is None: load_dotenv() eleven_api_key = os.environ['ELEVEN_LABS_API_KEY'] return eleven_api_key openai.api_key = get_openai_api_key() voiceKey = get_eleven_api_key() # prepare Q&A embeddings dataframe question_df = pd.read_csv('data/slalom_embeddings.csv') question_df['embedding'] = question_df['embedding'].apply(eval).apply(np.array) def transcribe(audio): global question_df messages = [ {"role": "system", "content": "*" }] audio_filename_with_extension = audio + '.wav' os.rename(audio, audio_filename_with_extension) print(audio_filename_with_extension) audio_file= open(audio_filename_with_extension, "rb") transcript = openai.Audio.transcribe("whisper-1", audio_file) print (transcript) question_vector = get_embedding(transcript['text'], engine='text-embedding-ada-002') question_df["similarities"] = question_df['embedding'].apply(lambda x: cosine_similarity(x, question_vector)) question_df = question_df.sort_values("similarities", ascending=False) print (question_df) best_answer = question_df.iloc[0]['answer'] print ("best_answer:" + best_answer) user_text = f"Using the following text, answer the question '{transcript['text']}'. {config.ADVISOR_CUSTOM_PROMPT}: {best_answer}" messages.append({"role": "user", "content": user_text}) response = openai.ChatCompletion.create(model="gpt-3.5-turbo", messages=messages) system_message = response["choices"][0]["message"] messages.append(system_message) # text to speech request with eleven labs url = f"https://api.elevenlabs.io/v1/text-to-speech/{config.ADVISOR_VOICE_ID}/stream" data = { "text": system_message["content"].replace('"', ''), "voice_settings": { "stability": 0.95, "similarity_boost": 0.93 } } r = requests.post(url, headers={'xi-api-key':voiceKey}, json=data) r_audio =r.content audio_io = BytesIO(r_audio) audio_io.seek(0) audio_base64 = base64.b64encode(audio_io.read()).decode("utf-8") audio_html = f'' # save audio file #output_filename = "reply.mp3" #with open(output_filename, "wb") as output: # output.write(r.content) chat_transcript = "" for message in messages: if message['role'] != 'system': chat_transcript += message['role'] + ": " + message['content'] + "\n\n" # return chat_transcript return chat_transcript, audio_html ui = gr.Interface(title="Ask John", fn=transcribe, inputs=gr.Audio(source="microphone", type="filepath", label="Ask John a question"), outputs=[gr.Textbox(label="Response"),gr.HTML()]).launch() ui.launch()