File size: 8,748 Bytes
8d19c24 e62ff42 8d19c24 30e35ef 8d19c24 30e35ef 20c55ff 30e35ef 920e566 d5df629 30e35ef e62ff42 8d19c24 30e35ef 8d19c24 30e35ef 8d19c24 30e35ef 00215d0 30e35ef 95cbc00 30e35ef 95cbc00 30e35ef 95cbc00 30e35ef b59aa9a 30e35ef 0bbc874 30e35ef 0bbc874 30e35ef 0bbc874 30e35ef 0bbc874 30e35ef 0bbc874 30e35ef 00215d0 8d19c24 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 |
import gradio as gr
import pandas as pd
import os
import spacy
import spacy.cli
import numpy as np
import zipfile
import tempfile
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from keras.preprocessing.text import text_to_word_sequence
import openai
import re
#vendor name (from column 0)
# Put your OpenAI API key here
openai.api_key = os.getenv('API')
try:
nlp = spacy.load('en_core_web_md')
except IOError:
print("Model not found. Downloading...")
spacy.cli.download("en_core_web_md")
nlp = spacy.load('en_core_web_md')
vendor_df_dict = {}
# A dictionary to store the total Trellis Score of each vendor
total_trellis_scores = {}
def preprocess_text(text):
if isinstance(text, str):
text = text.lower()
filters='!"#$%&()*+,-./:;<=>?@[\\]^_`{|}~\t\n'
text = text.translate(str.maketrans('', '', filters))
stop_words = ['does', 'the', 'offer', 'do', 'you', 'require']
words = text_to_word_sequence(text)
words = [word for word in words if word not in stop_words]
return ' '.join(words)
else:
return text
def semantic_search(query, data):
query = str(query)
data = [str(text) for text in data]
query = preprocess_text(query)
data = [preprocess_text(text) for text in data]
vectorizer = TfidfVectorizer().fit_transform([query] + data)
cosine_similarities = cosine_similarity(vectorizer[0:1], vectorizer).flatten()
return np.argmax(cosine_similarities[1:])
def parse_score(score):
level_scores = {
'Level 1 - Basic': 1,
'Level 2 - Developing': 2,
'Level 3 - Intermediate': 3,
'Level 4 - Advanced': 4,
'Level 5 - Leading': 5,
'1 - Low': 1,
'2 - Below average': 2,
'3 - Average': 3,
'4 - Above average': 4,
'5 - High': 5,
'1 - Very Low': 1,
'2 - Low': 2,
'3 - Medium': 3,
'4 - High-Medium': 4,
'5 - Very High': 5
}
if score is None or str(score).strip() == '':
return 3
if isinstance(score, str):
score = score.replace(',', '.')
if score in level_scores:
return level_scores[score]
else:
number = re.findall(r"[-+]?\d*\.\d+|\d+", score)
if number:
return float(number[0])
return 0
def load_vendor_files(zip_file_path):
global vendor_df_dict
with zipfile.ZipFile(zip_file_path, 'r') as zip_ref:
temp_dir = tempfile.TemporaryDirectory()
zip_ref.extractall(temp_dir.name)
for file_name in os.listdir(temp_dir.name):
if file_name.endswith(".xlsx"):
vendor_df_dict[file_name] = pd.read_excel(os.path.join(temp_dir.name, file_name))
vendor_df_dict[file_name] = vendor_df_dict[file_name][vendor_df_dict[file_name].iloc[:, 2] != 'No'] # Exclude rows with 'No' in the 3rd column
vendor_df_dict[file_name].iloc[:, 2] = vendor_df_dict[file_name].iloc[:, 2].apply(preprocess_text)
vendor_df_dict[file_name].iloc[:, 4].fillna('3 - Medium', inplace=True)
vendor_df_dict[file_name]['score_vendor'] = vendor_df_dict[file_name].iloc[:, 4].apply(parse_score).apply(float)
vendor_df_dict[file_name]['score_vendor'] = vendor_df_dict[file_name]['score_vendor'].fillna(0)
temp_dir.cleanup()
def process_file(client_file):
zip_file_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'vendor_files.zip')
load_vendor_files(zip_file_path) # Load vendor files from zip file
client_df = pd.read_excel(client_file.name)
client_df.iloc[:, 2].fillna('3 - Medium', inplace=True)
client_df['score_client'] = client_df.iloc[:, 2].apply(parse_score).astype(float) #return numbr only from client score
client_df = client_df[client_df.iloc[:, 1] == 'Yes']
client_df.iloc[:, 0] = client_df.iloc[:, 0].apply(preprocess_text)
client_df['score_client'] = client_df['score_client'].astype(float)
matches_found = False # Flag to check if any matches were found
highest_score_vendor = None
highest_score = 0
total_scores = {} # dictionary to store the total Trellis Score for each vendor
with pd.ExcelWriter('matches.xlsx') as writer:
common_list = []
for vendor_file, vendor_df in vendor_df_dict.items():
for index, row in client_df.iterrows():
most_similar_index = semantic_search(row[0], vendor_df.iloc[:, 2])
most_similar_row = vendor_df.iloc[most_similar_index, :]
client_score = row['score_client'] # directly access the pre-parsed client score
vendor_score = parse_score(most_similar_row[4])
client_row_selected = row[[0, 2, 3]]
vendor_row_selected = most_similar_row[[0, 2, 4, 5]]
combined_row = pd.concat([client_row_selected, vendor_row_selected])
trellis_score = client_score * vendor_score
combined_row['Trellis Score'] = trellis_score
common_list.append(combined_row)
if trellis_score > highest_score:
highest_score = trellis_score
highest_score_vendor = vendor_file
if common_list:
common_df = pd.DataFrame(common_list)
# Compute the total Trellis Score
total_trellis_score = common_df['Trellis Score'].sum()
total_trellis_scores[vendor_file] = total_trellis_score # store the total score for each vendor
# Add a row with the total Trellis Score to the DataFrame
common_df.loc[len(common_df.index)] = [np.nan]*len(common_df.columns)
common_df.at[len(common_df.index)-1, 'Trellis Score'] = total_trellis_score
common_df.to_excel(writer, sheet_name=os.path.splitext(vendor_file)[0][:31], index=False)
common_list = [] # Reset the common_list for next vendor_file
matches_found = True # Set the flag to True as matches were found
top_three_vendors = sorted(total_trellis_scores, key=total_trellis_scores.get, reverse=True)[:3]
top_three_vendors = [vendor.split(".")[0] for vendor in top_three_vendors]
if not matches_found: # In case there were no matches
return "No matching data found.", None, None
else:
def gpt3_query(prompt, engine='gpt-3.5-turbo', max_tokens=3000, temperature=0.3):
try:
response = openai.ChatCompletion.create(
model=engine,
messages=[
{"role": "system", "content": "You are a helpful AI."},
{"role": "user", "content": prompt}
],
max_tokens=max_tokens,
temperature=temperature
)
return response['choices'][0]['message']['content'].strip()
except Exception as e:
print(f"Error in gpt3_query: {str(e)}")
return None
# Get GPT-3.5-turbo to create a summary text
summary = gpt3_query(f"""Based on the Trellis Score, the top three vendors are {', '.join(top_three_vendors)}.
We have analyzed the performance of several vendors and identified the top three based on their Trellis Scores. Here are the key data points for each:
1. [name vendor 1]
- Trellis Score: {total_trellis_scores}
- Strengths: [brief summary of strengths]
- Weaknesses: [brief summary of weaknesses]
- Key Features: [brief summary of key features]
2. [name vendor 2]
- Strengths: [brief summary of strengths]
- Weaknesses: [brief summary of weaknesses]
- Key Features: [brief summary of key features]
3. [name vendor 3]
- Strengths: [brief summary of strengths]
- Weaknesses: [brief summary of weaknesses]
- Key Features: [brief summary of key features]
Based on this format, please provide a detailed breakdown of the strengths and weaknesses of each vendor, along with an overall comparison.
""")
return f"Matching data has been saved to 'matches.xlsx'.\n\n{summary}", os.path.abspath('matches.xlsx'), top_three_vendors
iface = gr.Interface(
fn=process_file,
inputs=[gr.components.File(label="Client File")],
outputs=[
gr.components.Textbox(label="Status"),
gr.components.File(label="Download Match Results"),
gr.components.Textbox(label="Top Three Vendors")
],
)
iface.launch()
|