File size: 8,748 Bytes
8d19c24
 
 
 
e62ff42
8d19c24
30e35ef
 
8d19c24
 
 
30e35ef
 
20c55ff
 
30e35ef
920e566
d5df629
30e35ef
e62ff42
 
 
 
 
 
 
8d19c24
30e35ef
 
 
 
 
8d19c24
 
 
 
 
 
 
 
 
 
 
 
 
30e35ef
 
 
8d19c24
30e35ef
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
00215d0
30e35ef
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
95cbc00
30e35ef
95cbc00
30e35ef
 
 
 
95cbc00
 
30e35ef
 
 
 
b59aa9a
 
 
30e35ef
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0bbc874
 
 
30e35ef
 
 
 
0bbc874
30e35ef
 
 
 
 
 
 
 
 
 
 
0bbc874
30e35ef
 
 
 
 
0bbc874
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
30e35ef
 
 
 
 
 
 
 
 
0bbc874
30e35ef
 
00215d0
8d19c24
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
import gradio as gr
import pandas as pd
import os
import spacy
import spacy.cli
import numpy as np
import zipfile
import tempfile
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from keras.preprocessing.text import text_to_word_sequence
import openai
import re


#vendor name (from column 0)
# Put your OpenAI API key here
openai.api_key = os.getenv('API')

try:
    nlp = spacy.load('en_core_web_md')
except IOError:
    print("Model not found. Downloading...")
    spacy.cli.download("en_core_web_md")
    nlp = spacy.load('en_core_web_md')


vendor_df_dict = {}

# A dictionary to store the total Trellis Score of each vendor
total_trellis_scores = {}


def preprocess_text(text):
    if isinstance(text, str):
        text = text.lower()
        filters='!"#$%&()*+,-./:;<=>?@[\\]^_`{|}~\t\n'
        text = text.translate(str.maketrans('', '', filters))
        stop_words = ['does', 'the', 'offer', 'do', 'you', 'require']
        words = text_to_word_sequence(text)
        words = [word for word in words if word not in stop_words]
        return ' '.join(words)
    else:
        return text

def semantic_search(query, data):
    query = str(query)
    data = [str(text) for text in data]
    query = preprocess_text(query)
    data = [preprocess_text(text) for text in data]
    vectorizer = TfidfVectorizer().fit_transform([query] + data)
    cosine_similarities = cosine_similarity(vectorizer[0:1], vectorizer).flatten()
    return np.argmax(cosine_similarities[1:])

def parse_score(score):
    level_scores = {
        'Level 1 - Basic': 1,
        'Level 2 - Developing': 2,
        'Level 3 - Intermediate': 3,
        'Level 4 - Advanced': 4,
        'Level 5 - Leading': 5,
        '1 - Low': 1,
        '2 - Below average': 2,
        '3 - Average': 3,
        '4 - Above average': 4,
        '5 - High': 5,
        '1 - Very Low': 1,
        '2 - Low': 2,
        '3 - Medium': 3,
        '4 - High-Medium': 4,  
        '5 - Very High': 5
    }
    
    if score is None or str(score).strip() == '':
        return 3

    if isinstance(score, str):
        score = score.replace(',', '.')
        if score in level_scores:
            return level_scores[score]
        else:
            number = re.findall(r"[-+]?\d*\.\d+|\d+", score)
            if number:
                return float(number[0])
    return 0

def load_vendor_files(zip_file_path):
    global vendor_df_dict
    with zipfile.ZipFile(zip_file_path, 'r') as zip_ref:
        temp_dir = tempfile.TemporaryDirectory()
        zip_ref.extractall(temp_dir.name)
        for file_name in os.listdir(temp_dir.name):
            if file_name.endswith(".xlsx"):
                vendor_df_dict[file_name] = pd.read_excel(os.path.join(temp_dir.name, file_name))
                vendor_df_dict[file_name] = vendor_df_dict[file_name][vendor_df_dict[file_name].iloc[:, 2] != 'No']  # Exclude rows with 'No' in the 3rd column
                vendor_df_dict[file_name].iloc[:, 2] = vendor_df_dict[file_name].iloc[:, 2].apply(preprocess_text)
                vendor_df_dict[file_name].iloc[:, 4].fillna('3 - Medium', inplace=True)
                vendor_df_dict[file_name]['score_vendor'] = vendor_df_dict[file_name].iloc[:, 4].apply(parse_score).apply(float)
                vendor_df_dict[file_name]['score_vendor'] = vendor_df_dict[file_name]['score_vendor'].fillna(0)
        temp_dir.cleanup()



def process_file(client_file):
    zip_file_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'vendor_files.zip')
    load_vendor_files(zip_file_path)  # Load vendor files from zip file
    client_df = pd.read_excel(client_file.name)

    client_df.iloc[:, 2].fillna('3 - Medium', inplace=True)
    
    client_df['score_client'] = client_df.iloc[:, 2].apply(parse_score).astype(float)  #return numbr only from client score

    client_df = client_df[client_df.iloc[:, 1] == 'Yes']
    client_df.iloc[:, 0] = client_df.iloc[:, 0].apply(preprocess_text)
    client_df['score_client'] = client_df['score_client'].astype(float)

    matches_found = False  # Flag to check if any matches were found
    highest_score_vendor = None
    highest_score = 0
    total_scores = {} # dictionary to store the total Trellis Score for each vendor

    with pd.ExcelWriter('matches.xlsx') as writer:
        common_list = []
        for vendor_file, vendor_df in vendor_df_dict.items():
            for index, row in client_df.iterrows():

                most_similar_index = semantic_search(row[0], vendor_df.iloc[:, 2])

                most_similar_row = vendor_df.iloc[most_similar_index, :]
                client_score = row['score_client'] # directly access the pre-parsed client score
                vendor_score = parse_score(most_similar_row[4])

                client_row_selected = row[[0, 2, 3]]
                vendor_row_selected = most_similar_row[[0, 2, 4, 5]]
                combined_row = pd.concat([client_row_selected, vendor_row_selected])

                trellis_score = client_score * vendor_score
                combined_row['Trellis Score'] = trellis_score
                common_list.append(combined_row)

                if trellis_score > highest_score:
                    highest_score = trellis_score
                    highest_score_vendor = vendor_file

            if common_list:
                common_df = pd.DataFrame(common_list)
                # Compute the total Trellis Score
                total_trellis_score = common_df['Trellis Score'].sum()
                total_trellis_scores[vendor_file] = total_trellis_score # store the total score for each vendor
                # Add a row with the total Trellis Score to the DataFrame
                common_df.loc[len(common_df.index)] = [np.nan]*len(common_df.columns)
                common_df.at[len(common_df.index)-1, 'Trellis Score'] = total_trellis_score

                common_df.to_excel(writer, sheet_name=os.path.splitext(vendor_file)[0][:31], index=False)
                common_list = []  # Reset the common_list for next vendor_file
                matches_found = True  # Set the flag to True as matches were found
        
        top_three_vendors = sorted(total_trellis_scores, key=total_trellis_scores.get, reverse=True)[:3]
        top_three_vendors = [vendor.split(".")[0] for vendor in top_three_vendors]


        if not matches_found:  # In case there were no matches
            return "No matching data found.", None, None
        else:
            def gpt3_query(prompt, engine='gpt-3.5-turbo', max_tokens=3000, temperature=0.3):
                try: 
                    response = openai.ChatCompletion.create(
                        model=engine,
                        messages=[
                            {"role": "system", "content": "You are a helpful AI."},
                            {"role": "user", "content": prompt}
                        ],
                        max_tokens=max_tokens,
                        temperature=temperature
                    )
                    return response['choices'][0]['message']['content'].strip()
                
                except Exception as e:
                    print(f"Error in gpt3_query: {str(e)}")
                    return None

        # Get GPT-3.5-turbo to create a summary text
            summary = gpt3_query(f"""Based on the Trellis Score, the top three vendors are {', '.join(top_three_vendors)}. 

We have analyzed the performance of several vendors and identified the top three based on their Trellis Scores. Here are the key data points for each:

1. [name vendor 1] 
    - Trellis Score: {total_trellis_scores}
    - Strengths: [brief summary of strengths]
    - Weaknesses: [brief summary of weaknesses]
    - Key Features: [brief summary of key features]

2. [name vendor 2]
    - Strengths: [brief summary of strengths]
    - Weaknesses: [brief summary of weaknesses]
    - Key Features: [brief summary of key features]

3. [name vendor 3]
    - Strengths: [brief summary of strengths]
    - Weaknesses: [brief summary of weaknesses]
    - Key Features: [brief summary of key features]

Based on this format, please provide a detailed breakdown of the strengths and weaknesses of each vendor, along with an overall comparison.
""")


            return f"Matching data has been saved to 'matches.xlsx'.\n\n{summary}", os.path.abspath('matches.xlsx'), top_three_vendors
         



iface = gr.Interface(
    fn=process_file, 
    inputs=[gr.components.File(label="Client File")], 
    outputs=[
        gr.components.Textbox(label="Status"),
        gr.components.File(label="Download Match Results"),
        gr.components.Textbox(label="Top Three Vendors")
    ],
)

iface.launch()