hr-assistant / create_embedding.py
Kim Adams
USAA benefits
638362e
import openai, os
import pandas as pd
from openai.embeddings_utils import get_embedding, cosine_similarity
from utilities import api_keys
openai.api_key = api_keys.APIKeys().get_key('OPENAI_API_KEY')
OUTPUT_FILE = 'utilities/data/hr_asst_embeddings.pkl' # Change to .pkl extension
INPUT_FILE = 'utilities/data/hr_asst_data.csv'
benefit_info = None
if os.path.exists(OUTPUT_FILE):
benefit_info = pd.read_pickle(OUTPUT_FILE) # Use read_pickle
else:
benefit_info = pd.read_csv(INPUT_FILE)
benefit_info['embedding'] = benefit_info['answer'].apply(lambda row: get_embedding(str(row), engine='text-embedding-ada-002'))
benefit_info.to_pickle(OUTPUT_FILE) # Use to_pickle
def QueryEmbeddings(question):
global benefit_info
question_vector = get_embedding(question, engine='text-embedding-ada-002')
# Compute the cosine similarity
benefit_info["similarities"] = benefit_info['embedding'].apply(lambda x: cosine_similarity(x, question_vector))
sorted_benefits = benefit_info.sort_values("similarities", ascending=False).head(5)
best_answer = sorted_benefits.iloc[0]['answer']
print("Question: "+question)
print("Top question: "+ sorted_benefits.iloc[0]['question'])
print("Best answer: ", best_answer)
return best_answer