| import gradio as gr |
| import openai, config, os |
| import requests, base64 |
| import pandas as pd |
| import numpy as np |
| from huggingface_hub import Repository |
| from io import BytesIO |
| from dotenv import load_dotenv |
| from openai.embeddings_utils import get_embedding, cosine_similarity |
|
|
| def get_openai_api_key(): |
| openai_api_key = os.environ.get('OPENAI_API_KEY') |
|
|
| if openai_api_key is None: |
| load_dotenv() |
| openai_api_key = os.environ['OPENAI_API_KEY'] |
|
|
| return openai_api_key |
|
|
| def get_eleven_api_key(): |
| eleven_api_key = os.environ.get('ELEVEN_LABS_API_KEY') |
|
|
| if eleven_api_key is None: |
| load_dotenv() |
| eleven_api_key = os.environ['ELEVEN_LABS_API_KEY'] |
|
|
| return eleven_api_key |
|
|
| openai.api_key = get_openai_api_key() |
| voiceKey = get_eleven_api_key() |
|
|
| |
| question_df = pd.read_csv('data/slalom_embeddings.csv') |
| question_df['embedding'] = question_df['embedding'].apply(eval).apply(np.array) |
|
|
| def transcribe(audio): |
| global question_df |
| messages = [ {"role": "system", "content": "*" }] |
|
|
| audio_filename_with_extension = audio + '.wav' |
| os.rename(audio, audio_filename_with_extension) |
| print(audio_filename_with_extension) |
| |
| audio_file= open(audio_filename_with_extension, "rb") |
| transcript = openai.Audio.transcribe("whisper-1", audio_file) |
| print (transcript) |
|
|
| question_vector = get_embedding(transcript['text'], engine='text-embedding-ada-002') |
|
|
| question_df["similarities"] = question_df['embedding'].apply(lambda x: cosine_similarity(x, question_vector)) |
| question_df = question_df.sort_values("similarities", ascending=False) |
| print (question_df) |
| best_answer = question_df.iloc[0]['answer'] |
| print ("best_answer:" + best_answer) |
| |
| user_text = f"Using the following text, answer the question '{transcript['text']}'. {config.ADVISOR_CUSTOM_PROMPT}: {best_answer}" |
| messages.append({"role": "user", "content": user_text}) |
|
|
| response = openai.ChatCompletion.create(model="gpt-3.5-turbo", messages=messages) |
| system_message = response["choices"][0]["message"] |
| messages.append(system_message) |
|
|
| |
| url = f"https://api.elevenlabs.io/v1/text-to-speech/{config.ADVISOR_VOICE_ID}/stream" |
| data = { |
| "text": system_message["content"].replace('"', ''), |
| "voice_settings": { |
| "stability": 0.95, |
| "similarity_boost": 0.93 |
| } |
| } |
| |
| r = requests.post(url, headers={'xi-api-key':voiceKey}, json=data) |
| r_audio =r.content |
| audio_io = BytesIO(r_audio) |
| audio_io.seek(0) |
|
|
|
|
| audio_base64 = base64.b64encode(audio_io.read()).decode("utf-8") |
| audio_html = f'<audio src="data:audio/aac;base64,{audio_base64}" controls autoplay playsinline></audio>' |
|
|
| |
| |
| |
| |
|
|
| chat_transcript = "" |
| for message in messages: |
| if message['role'] != 'system': |
| chat_transcript += message['role'] + ": " + message['content'] + "\n\n" |
|
|
| |
| return chat_transcript, audio_html |
|
|
| ui = gr.Interface(title="Ask John", fn=transcribe, inputs=gr.Audio(source="microphone", type="filepath", label="Ask John a question"), outputs=[gr.Textbox(label="Response"),gr.HTML()]).launch() |
| ui.launch() |