ask-john / app.py
Kim Adams
read.me
9e66acd
import gradio as gr
import openai, config, os
import requests, base64
import pandas as pd
import numpy as np
from huggingface_hub import Repository
from io import BytesIO
from dotenv import load_dotenv
from openai.embeddings_utils import get_embedding, cosine_similarity
def get_openai_api_key():
openai_api_key = os.environ.get('OPENAI_API_KEY')
if openai_api_key is None:
load_dotenv()
openai_api_key = os.environ['OPENAI_API_KEY']
return openai_api_key
def get_eleven_api_key():
eleven_api_key = os.environ.get('ELEVEN_LABS_API_KEY')
if eleven_api_key is None:
load_dotenv()
eleven_api_key = os.environ['ELEVEN_LABS_API_KEY']
return eleven_api_key
openai.api_key = get_openai_api_key()
voiceKey = get_eleven_api_key()
# prepare Q&A embeddings dataframe
question_df = pd.read_csv('data/slalom_embeddings.csv')
question_df['embedding'] = question_df['embedding'].apply(eval).apply(np.array)
def transcribe(audio):
global question_df
messages = [ {"role": "system", "content": "*" }]
audio_filename_with_extension = audio + '.wav'
os.rename(audio, audio_filename_with_extension)
print(audio_filename_with_extension)
audio_file= open(audio_filename_with_extension, "rb")
transcript = openai.Audio.transcribe("whisper-1", audio_file)
print (transcript)
question_vector = get_embedding(transcript['text'], engine='text-embedding-ada-002')
question_df["similarities"] = question_df['embedding'].apply(lambda x: cosine_similarity(x, question_vector))
question_df = question_df.sort_values("similarities", ascending=False)
print (question_df)
best_answer = question_df.iloc[0]['answer']
print ("best_answer:" + best_answer)
user_text = f"Using the following text, answer the question '{transcript['text']}'. {config.ADVISOR_CUSTOM_PROMPT}: {best_answer}"
messages.append({"role": "user", "content": user_text})
response = openai.ChatCompletion.create(model="gpt-3.5-turbo", messages=messages)
system_message = response["choices"][0]["message"]
messages.append(system_message)
# text to speech request with eleven labs
url = f"https://api.elevenlabs.io/v1/text-to-speech/{config.ADVISOR_VOICE_ID}/stream"
data = {
"text": system_message["content"].replace('"', ''),
"voice_settings": {
"stability": 0.95,
"similarity_boost": 0.93
}
}
r = requests.post(url, headers={'xi-api-key':voiceKey}, json=data)
r_audio =r.content
audio_io = BytesIO(r_audio)
audio_io.seek(0)
audio_base64 = base64.b64encode(audio_io.read()).decode("utf-8")
audio_html = f'<audio src="data:audio/aac;base64,{audio_base64}" controls autoplay playsinline></audio>'
# save audio file
#output_filename = "reply.mp3"
#with open(output_filename, "wb") as output:
# output.write(r.content)
chat_transcript = ""
for message in messages:
if message['role'] != 'system':
chat_transcript += message['role'] + ": " + message['content'] + "\n\n"
# return chat_transcript
return chat_transcript, audio_html
ui = gr.Interface(title="Ask John", fn=transcribe, inputs=gr.Audio(source="microphone", type="filepath", label="Ask John a question"), outputs=[gr.Textbox(label="Response"),gr.HTML()]).launch()
ui.launch()