varshakolanu commited on
Commit
af935dc
·
verified ·
1 Parent(s): d68584a

Create model.py

Browse files
Files changed (1) hide show
  1. model.py +231 -0
model.py ADDED
@@ -0,0 +1,231 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ from datetime import datetime
3
+ from transformers import pipeline
4
+
5
+ # --- Constants ---
6
+ ALERT_THRESHOLD = 60 # Threshold for flagging low-performing vendors
7
+ DAYS_PER_MONTH = 30
8
+ # --- Helper Functions ---
9
+
10
+ def calculate_quality_score(incident_logs):
11
+ """
12
+ Calculates a quality score based on the number and severity of incident logs.
13
+
14
+ Args:
15
+ incident_logs (str): A string containing incident log details.
16
+
17
+ Returns:
18
+ float: A score between 0 and 100, where 100 is the highest quality.
19
+ """
20
+ if not incident_logs:
21
+ return 100 # Perfect score if no incidents
22
+
23
+ # Basic keyword matching for severity (can be expanded)
24
+ high_severity_keywords = ['major', 'critical', 'severe', 'fatality']
25
+ medium_severity_keywords = ['minor', 'moderate', 'injury']
26
+ low_severity_keywords = ['near miss', 'warning', 'caution']
27
+
28
+ high_count = sum(1 for keyword in high_severity_keywords if keyword in incident_logs.lower())
29
+ medium_count = sum(1 for keyword in medium_severity_keywords if keyword in incident_logs.lower())
30
+ low_count = sum(1 for keyword in low_severity_keywords if keyword in incident_logs.lower())
31
+
32
+ # Weighted scoring (adjust weights as needed)
33
+ score = 100 - (high_count * 20 + medium_count * 10 + low_count * 5)
34
+ return max(0, score) # Ensure score doesn't go below 0
35
+
36
+ def calculate_timeliness_score(work_completion_details, delay_reports, log_date):
37
+ """
38
+ Calculates a timeliness score based on work completion details, delay reports,
39
+ and the log date.
40
+
41
+ Args:
42
+ work_completion_details (str): Details of work completion.
43
+ delay_reports (str): Reports of delays.
44
+ log_date (str): The date of the log (YYYY-MM-DD).
45
+
46
+ Returns:
47
+ float: A score between 0 and 100, where 100 is perfectly on time.
48
+ """
49
+ if not work_completion_details:
50
+ return 100
51
+
52
+ log_date_obj = datetime.strptime(log_date, '%Y-%m-%d')
53
+ # Assume a 30-day window for "on time" (can be adjusted)
54
+ completion_window_end = log_date_obj
55
+
56
+ # Check for explicit "on time" completion
57
+ if "on time" in work_completion_details.lower():
58
+ return 100
59
+
60
+ # Penalize for delay reports
61
+ delay_penalty = 0
62
+ if delay_reports:
63
+ delay_penalty = len(delay_reports.split(',')) * 15 # 15 points per delay report (adjust as needed)
64
+
65
+ # Very basic check for "late" or "delayed"
66
+ if "late" in work_completion_details.lower() or "delayed" in work_completion_details.lower():
67
+ return max(0, 50 - delay_penalty)
68
+
69
+ return max(0, 100 - delay_penalty) # cap at 100
70
+
71
+ def calculate_safety_score(incident_logs):
72
+ """
73
+ Calculates a safety score based on the presence of incident logs.
74
+
75
+ Args:
76
+ incident_logs (str): A string containing incident log details.
77
+
78
+ Returns:
79
+ float: 100 if no incidents, otherwise a lower score.
80
+ """
81
+ if not incident_logs:
82
+ return 100
83
+ else:
84
+ # Further logic can be added to differentiate severity
85
+ return max(0, 80 - len(incident_logs.split(',')) * 10) # Reduce score per incident
86
+
87
+ def calculate_communication_score(work_completion_details):
88
+ """
89
+ Calculates a communication score based on the work completion details.
90
+ Uses a simple sentiment analysis.
91
+
92
+ Args:
93
+ work_completion_details (str): Details of work completion.
94
+
95
+ Returns:
96
+ float: A score between 0 and 100.
97
+ """
98
+ if not work_completion_details:
99
+ return 100
100
+
101
+ # Initialize sentiment analysis pipeline
102
+ sentiment_analyzer = pipeline("sentiment-analysis-ssbert-large-en") # More robust model
103
+
104
+ try:
105
+ result = sentiment_analyzer(work_completion_details)
106
+ sentiment = result[0]['label'] # Get the sentiment label
107
+ confidence = result[0]['score']
108
+
109
+ if sentiment == 'POSITIVE':
110
+ return 100
111
+ elif sentiment == 'NEGATIVE':
112
+ return max(0, 60 * confidence) # Scale the negative impact by confidence
113
+ else: # NEUTRAL
114
+ return 80
115
+ except Exception as e:
116
+ print(f"Error in sentiment analysis: {e}")
117
+ return 80 # Return a neutral score on error
118
+
119
+ def calculate_final_score(quality_score, timeliness_score, safety_score, communication_score):
120
+ """
121
+ Calculates a final score based on weighted averages of the individual scores.
122
+
123
+ Args:
124
+ quality_score (float): The quality score.
125
+ timeliness_score (float): The timeliness score.
126
+ safety_score (float): The safety score.
127
+ communication_score (float): The communication score.
128
+
129
+ Returns:
130
+ float: The final score, between 0 and 100.
131
+ """
132
+ # Weights (can be adjusted)
133
+ quality_weight = 0.4
134
+ timeliness_weight = 0.3
135
+ safety_weight = 0.2
136
+ communication_weight = 0.1
137
+
138
+ final_score = (
139
+ quality_weight * quality_score +
140
+ timeliness_weight * timeliness_score +
141
+ safety_weight * safety_score +
142
+ communication_weight * communication_score
143
+ )
144
+ return final_score
145
+
146
+ def generate_performance_report(vendor_id, scores, month, trend_data=None):
147
+ """
148
+ Generates a performance report (as a dictionary). Includes a placeholder for
149
+ certificate generation.
150
+
151
+ Args:
152
+ vendor_id (str): The ID of the vendor.
153
+ scores (dict): A dictionary containing the vendor's scores.
154
+ month (str): The month for the report (e.g., "2024-01").
155
+ trend_data (dict, optional): Trend data for the vendor. Defaults to None.
156
+
157
+ Returns:
158
+ dict: A dictionary containing the performance report.
159
+ """
160
+ report = {
161
+ 'vendor_id': vendor_id,
162
+ 'month': month,
163
+ 'quality': scores['quality'],
164
+ 'timeliness': scores['timeliness'],
165
+ 'safety': scores['safety'],
166
+ 'communication': scores['communication'],
167
+ 'final_score': scores['final_score'],
168
+ 'alert_flag': scores['final_score'] < ALERT_THRESHOLD,
169
+ 'certificate_url': f"/certificates/{vendor_id}_{month}.pdf", # Placeholder URL
170
+ }
171
+ if trend_data:
172
+ report['trend_deviation'] = trend_data.get('trend_deviation', 0)
173
+ else:
174
+ report['trend_deviation'] = 0
175
+ return report
176
+
177
+ def process_vendor_logs(vendor_logs):
178
+ """
179
+ Processes a list of vendor logs, calculates scores, and generates performance reports.
180
+
181
+ Args:
182
+ vendor_logs (list): A list of dictionaries, where each dictionary represents
183
+ a vendor log and contains the keys 'vendor_id',
184
+ 'work_completion_details', 'delay_reports', 'incident_logs', and 'log_date'.
185
+
186
+ Returns:
187
+ list: A list of performance report dictionaries, ready for Salesforce.
188
+ """
189
+ reports = []
190
+ for log in vendor_logs:
191
+ try:
192
+ vendor_id = log['vendor_id']
193
+ work_completion_details = log['work_completion_details']
194
+ delay_reports = log['delay_reports']
195
+ incident_logs = log['incident_logs']
196
+ log_date = log['log_date'] # Assuming YYYY-MM-DD format
197
+
198
+ quality_score = calculate_quality_score(incident_logs)
199
+ timeliness_score = calculate_timeliness_score(work_completion_details, delay_reports, log_date)
200
+ safety_score = calculate_safety_score(incident_logs)
201
+ communication_score = calculate_communication_score(work_completion_details)
202
+ final_score = calculate_final_score(quality_score, timeliness_score, safety_score, communication_score)
203
+
204
+ scores = {
205
+ 'quality': quality_score,
206
+ 'timeliness': timeliness_score,
207
+ 'safety': safety_score,
208
+ 'communication': communication_score,
209
+ 'final_score': final_score,
210
+ }
211
+ # Basic Trend Detection (Example)
212
+ # In a real scenario, you'd fetch previous months' scores from Salesforce
213
+ # and calculate a trend. This is a placeholder.
214
+ trend_data = None
215
+ # Placeholder logic: If current score is more than 10 points lower
216
+ # than a hypothetical previous month, we have a negative trend.
217
+ # previous_month_score = get_previous_month_score(vendor_id, log_date) #from salesforce
218
+ # if previous_month_score and (final_score < previous_month_score - 10):
219
+ # trend_data = {'trend_deviation': -1} # Negative trend
220
+ # elif previous_month_score and (final_score > previous_month_score + 10):
221
+ # trend_data = {'trend_deviation': 1}
222
+ # else:
223
+ # trend_data = {'trend_deviation': 0}
224
+ report = generate_performance_report(vendor_id, scores, log_date[:7], trend_data) # Use YYYY-MM
225
+ reports.append(report)
226
+ except Exception as e:
227
+ print(f"Error processing log for vendor {log.get('vendor_id', 'Unknown')}: {e}")
228
+ # Consider logging the error to a file or database for further analysis
229
+ # You might also want to raise the exception if it's critical
230
+ # to stop processing. For now, we'll just continue to the next log.
231
+ return reports