import openai, os import pandas as pd from openai.embeddings_utils import get_embedding, cosine_similarity from utilities import api_keys openai.api_key = api_keys.APIKeys().get_key('OPENAI_API_KEY') OUTPUT_FILE = 'utilities/data/hr_asst_embeddings.pkl' # Change to .pkl extension INPUT_FILE = 'utilities/data/hr_asst_data.csv' benefit_info = None if os.path.exists(OUTPUT_FILE): benefit_info = pd.read_pickle(OUTPUT_FILE) # Use read_pickle else: benefit_info = pd.read_csv(INPUT_FILE) benefit_info['embedding'] = benefit_info['answer'].apply(lambda row: get_embedding(str(row), engine='text-embedding-ada-002')) benefit_info.to_pickle(OUTPUT_FILE) # Use to_pickle def QueryEmbeddings(question): global benefit_info question_vector = get_embedding(question, engine='text-embedding-ada-002') # Compute the cosine similarity benefit_info["similarities"] = benefit_info['embedding'].apply(lambda x: cosine_similarity(x, question_vector)) sorted_benefits = benefit_info.sort_values("similarities", ascending=False).head(5) best_answer = sorted_benefits.iloc[0]['answer'] print("Question: "+question) print("Top question: "+ sorted_benefits.iloc[0]['question']) print("Best answer: ", best_answer) return best_answer