prudhviLatha commited on
Commit
aaf8d90
·
verified ·
1 Parent(s): 9debf25

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +450 -0
app.py ADDED
@@ -0,0 +1,450 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import gradio as gr
3
+ import pandas as pd
4
+ import numpy as np
5
+ from datetime import datetime
6
+ from simple_salesforce import Salesforce
7
+ from dotenv import load_dotenv
8
+ import plotly.express as px
9
+ import plotly.graph_objects as go
10
+ import io
11
+ import base64
12
+ from matplotlib.backends.backend_pdf import PdfPages
13
+ import matplotlib.pyplot as plt
14
+
15
+ # Load environment variables
16
+ load_dotenv()
17
+
18
+ # Salesforce credentials
19
+ SF_USERNAME = os.getenv('SF_USERNAME')
20
+ SF_PASSWORD = os.getenv('SF_PASSWORD')
21
+ SF_SECURITY_TOKEN = os.getenv('SF_SECURITY_TOKEN')
22
+
23
+ # Connect to Salesforce
24
+ try:
25
+ sf = Salesforce(
26
+ username=SF_USERNAME,
27
+ password=SF_PASSWORD,
28
+ security_token=SF_SECURITY_TOKEN
29
+ )
30
+ except Exception as e:
31
+ sf = None
32
+ print(f"Error connecting to Salesforce: {str(e)}")
33
+
34
+ # Weighted moving average forecast with heuristic shortage probability
35
+ def weighted_moving_average_forecast(df, trade, site_calendar_date):
36
+ df['Date'] = pd.to_datetime(df['Date'], format='%Y-%m-%d', errors='coerce').dt.date
37
+ trade_df = df[df['Trade'] == trade].copy()
38
+
39
+ if trade_df.empty:
40
+ return [], 0.5, None, f"No data found for trade: {trade}"
41
+
42
+ # Parse site calendar date
43
+ try:
44
+ site_calendar_date = pd.to_datetime(site_calendar_date, format='%Y-%m-%d').date()
45
+ is_weekday = site_calendar_date.weekday() < 5
46
+ site_calendar = 1 if is_weekday else 0
47
+ except ValueError:
48
+ return [], 0.5, None, f"Invalid site calendar date: {site_calendar_date}"
49
+
50
+ # Check for data on the next 3 days
51
+ future_dates = pd.date_range(site_calendar_date, periods=4, freq='D')[1:]
52
+ predictions = []
53
+ shortage_prob = 0.5 # Default shortage probability
54
+
55
+ # Filter data up to and including site_calendar_date for historical context
56
+ trade_df = trade_df[trade_df['Date'] <= site_calendar_date]
57
+ recent_data = trade_df.tail(30)[['Date', 'Attendance', 'Weather', 'Shortage_risk']]
58
+
59
+ if recent_data.empty:
60
+ return [], 0.5, None, f"No data available for trade {trade} on or before {site_calendar_date}"
61
+
62
+ # Check if future dates exist in CSV
63
+ for date in future_dates:
64
+ date = date.date() # Normalize to date-only
65
+ future_data = df[(df['Trade'] == trade) & (df['Date'] == date)]
66
+ if not future_data.empty:
67
+ # Use CSV data if available
68
+ record = future_data.iloc[0]
69
+ headcount = int(record['Attendance']) if pd.notna(record['Attendance']) else 0
70
+ shortage_prob = record['Shortage_risk'] if pd.notna(record['Shortage_risk']) else 0.5
71
+ predictions.append({
72
+ "date": date.strftime('%Y-%m-%d'),
73
+ "headcount": headcount
74
+ })
75
+ else:
76
+ # Fallback to weighted moving average
77
+ recent_attendance = recent_data['Attendance'].values
78
+ num_days = len(recent_attendance)
79
+ if num_days >= 3:
80
+ weights = np.array([0.5, 0.3, 0.2])
81
+ recent_attendance = recent_attendance[-3:]
82
+ elif num_days == 2:
83
+ weights = np.array([0.6, 0.4])
84
+ recent_attendance = recent_attendance[-2:]
85
+ else:
86
+ weights = np.array([1.0])
87
+ recent_attendance = recent_attendance[-1:]
88
+
89
+ forecast_value = np.average(recent_attendance, weights=weights)
90
+ latest_weather = recent_data['Weather'].map({'Sunny': 0, 'Rainy': 1, 'Cloudy': 0.5, np.nan: 0.5}).iloc[-1]
91
+ forecast_value *= (1 - 0.1 * latest_weather)
92
+ headcount = round(forecast_value * (1 if site_calendar == 1 else 0.8))
93
+ predictions.append({
94
+ "date": date.strftime('%Y-%m-%d'),
95
+ "headcount": headcount
96
+ })
97
+ # Use historical shortage risk for future dates if no CSV data
98
+ shortage_prob = recent_data['Shortage_risk'].tail(30).mean()
99
+ attendance_trend = recent_data['Attendance'].pct_change().mean() if num_days > 1 else 0
100
+ shortage_prob = min(max(shortage_prob + attendance_trend * 0.1, 0), 1)
101
+
102
+ site_calendar_value = site_calendar_date.strftime('%Y-%m-%d') + f" ({'Weekday' if is_weekday else 'Weekend'})"
103
+ return predictions, shortage_prob, site_calendar_value, None
104
+
105
+ # Fetch Project ID from Salesforce
106
+ def get_project_id():
107
+ if not sf:
108
+ return None, "Salesforce connection failed."
109
+ try:
110
+ query = "SELECT Id FROM Project__c ORDER BY CreatedDate DESC LIMIT 1"
111
+ result = sf.query(query)
112
+ if result['totalSize'] > 0:
113
+ return result['records'][0]['Id'], None
114
+ return None, "No project found in Salesforce."
115
+ except Exception as e:
116
+ return None, f"Error fetching Project ID: {str(e)}"
117
+
118
+ # Save to Salesforce
119
+ def save_to_salesforce(record):
120
+ if not sf:
121
+ return {"error": "Salesforce connection failed."}
122
+ try:
123
+ result = sf.Labour_Attendance_Forecast__c.create(record)
124
+ return {"success": f"Record created for {record['Trade__c']}", "record_id": result['id']}
125
+ except Exception as e:
126
+ return {"error": f"Error uploading to Salesforce for {record['Trade__c']}: {str(e)}"}
127
+
128
+ # Create heatmap for shortfall risk
129
+ def create_heatmap(df, predictions_dict, shortage_probs, site_calendar_date):
130
+ heatmap_data = []
131
+ site_calendar_date = pd.to_datetime(site_calendar_date, format='%Y-%m-%d').date()
132
+ future_dates = pd.date_range(site_calendar_date, periods=4, freq='D')[1:]
133
+
134
+ for trade in predictions_dict.keys():
135
+ # Get shortage risk for the specified date from CSV
136
+ trade_df = df[(df['Trade'] == trade) & (df['Date'] == site_calendar_date)]
137
+ if not trade_df.empty:
138
+ prob = trade_df.iloc[0]['Shortage_risk'] if pd.notna(trade_df.iloc[0]['Shortage_risk']) else 0.5
139
+ heatmap_data.append({
140
+ 'Date': site_calendar_date.strftime('%Y-%m-%d'),
141
+ 'Trade': trade,
142
+ 'Shortage_Probability': prob
143
+ })
144
+
145
+ # Get shortage probabilities for future dates
146
+ for date in future_dates:
147
+ date = date.date()
148
+ future_data = df[(df['Trade'] == trade) & (df['Date'] == date)]
149
+ if not future_data.empty:
150
+ prob = future_data.iloc[0]['Shortage_risk'] if pd.notna(future_data.iloc[0]['Shortage_risk']) else 0.5
151
+ else:
152
+ prob = shortage_probs.get(trade, 0.5)
153
+ heatmap_data.append({
154
+ 'Date': date.strftime('%Y-%m-%d'),
155
+ 'Trade': trade,
156
+ 'Shortage_Probability': prob
157
+ })
158
+
159
+ heatmap_df = pd.DataFrame(heatmap_data)
160
+ if heatmap_df.empty:
161
+ return go.Figure().update_layout(title="Shortage Risk Heatmap (No Data)")
162
+
163
+ # Create heatmap with improved styling
164
+ fig = go.Figure(data=go.Heatmap(
165
+ x=heatmap_df['Date'],
166
+ y=heatmap_df['Trade'],
167
+ z=heatmap_df['Shortage_Probability'],
168
+ colorscale='Blues',
169
+ zmin=0,
170
+ zmax=1,
171
+ text=heatmap_df['Shortage_Probability'].round(2),
172
+ texttemplate="%{text}",
173
+ textfont={"size": 12},
174
+ colorbar=dict(title="Shortage Risk", tickvals=[0, 0.5, 1], ticktext=["0%", "50%", "100%"])
175
+ ))
176
+
177
+ fig.update_layout(
178
+ title="Shortage Risk Heatmap",
179
+ xaxis_title="Date",
180
+ yaxis_title="Trade",
181
+ xaxis=dict(tickangle=45, tickformat="%Y-%m-%d"),
182
+ yaxis=dict(autorange="reversed"),
183
+ font=dict(size=14),
184
+ margin=dict(l=100, r=50, t=100, b=100),
185
+ plot_bgcolor="white",
186
+ paper_bgcolor="white",
187
+ showlegend=False,
188
+ grid=dict(rows=1, columns=1)
189
+ )
190
+
191
+ fig.update_xaxes(showgrid=True, gridcolor="lightgray")
192
+ fig.update_yaxes(showgrid=True, gridcolor="lightgray")
193
+
194
+ return fig
195
+
196
+ # Create line chart for forecasts
197
+ def create_chart(df, predictions_dict):
198
+ combined_df = pd.DataFrame()
199
+ for trade, predictions in predictions_dict.items():
200
+ trade_df = df[df['Trade'] == trade].copy()
201
+ if trade_df.empty:
202
+ continue
203
+ trade_df['Type'] = 'Historical'
204
+ trade_df['Trade'] = trade
205
+
206
+ forecast_df = pd.DataFrame(predictions)
207
+ if forecast_df.empty:
208
+ continue
209
+ forecast_df['Date'] = pd.to_datetime(forecast_df['date'], format='%Y-%m-%d').dt.date
210
+ forecast_df['Attendance'] = forecast_df['headcount']
211
+ forecast_df['Type'] = 'Forecast'
212
+ forecast_df['Trade'] = trade
213
+
214
+ combined_df = pd.concat([
215
+ combined_df,
216
+ trade_df[['Date', 'Attendance', 'Type', 'Trade']],
217
+ forecast_df[['Date', 'Attendance', 'Type', 'Trade']]
218
+ ])
219
+
220
+ if combined_df.empty:
221
+ return go.Figure().update_layout(title="Labour Attendance Forecast (No Data)")
222
+
223
+ fig = px.line(
224
+ combined_df,
225
+ x='Date',
226
+ y='Attendance',
227
+ color='Trade',
228
+ line_dash='Type',
229
+ markers=True,
230
+ title='Labour Attendance Forecast by Trade'
231
+ )
232
+ return fig
233
+
234
+ # Generate PDF summary
235
+ def generate_pdf_summary(trade_results, project_id):
236
+ buffer = io.BytesIO()
237
+ with PdfPages(buffer) as pdf:
238
+ fig, ax = plt.subplots(figsize=(10, 6))
239
+ if not trade_results:
240
+ ax.text(0.1, 0.5, "No data available for summary", fontsize=12)
241
+ else:
242
+ for i, (trade, data) in enumerate(trade_results.items()):
243
+ ax.text(0.1, 0.9 - 0.1*i,
244
+ f"{trade}: {data['Attendance']} (Actual)",
245
+ fontsize=12)
246
+ ax.set_title(f"Weekly Summary for Project {project_id}")
247
+ ax.axis('off')
248
+ pdf.savefig()
249
+ plt.close()
250
+ pdf_base64 = base64.b64encode(buffer.getvalue()).decode()
251
+ return pdf_base64
252
+
253
+ # Notify contractor (mock)
254
+ def notify_contractor(trade, alert_status):
255
+ return f"Notification sent to contractor for {trade} with alert status: {alert_status}"
256
+
257
+ # Format output to display CSV file values and Forecast_Next_3_Days__c
258
+ def format_output(trade_results, site_calendar_date):
259
+ csv_columns = ['Date', 'Trade', 'Weather', 'Alert_status', 'Shortage_risk', 'Suggested_actions', 'Attendance', 'Forecast_Next_3_Days__c']
260
+ output = []
261
+ for trade, data in trade_results.items():
262
+ output.append(f"Trade: {trade}")
263
+ for key in csv_columns:
264
+ if key == 'Date':
265
+ value = pd.to_datetime(site_calendar_date, format='%Y-%m-%d').strftime('%Y-%m-%d') if pd.notna(site_calendar_date) else 'N/A'
266
+ elif key == 'Forecast_Next_3_Days__c':
267
+ value = ', '.join([f"{item['date']}: {item['headcount']}" for item in data.get(key, [])]) if data.get(key) else 'N/A'
268
+ else:
269
+ value = data.get(key, 'N/A')
270
+ if key in ['Weather', 'Alert_status', 'Suggested_actions', 'Trade'] and value is not None:
271
+ value = str(value)
272
+ elif key == 'Shortage_risk' and value is not None:
273
+ value = str(round(value, 2))
274
+ elif key == 'Attendance' and value is not None:
275
+ value = str(int(value))
276
+ output.append(f" • {key}: {value}")
277
+ output.append("")
278
+
279
+ return "\n".join(output) if trade_results else "No valid trade data available."
280
+
281
+ # Gradio forecast function
282
+ def forecast_labour(csv_file, trade_filter=None, site_calendar_date=None):
283
+ try:
284
+ encodings = ['utf-8', 'latin1', 'iso-8859-1', 'utf-16']
285
+ df = None
286
+ for encoding in encodings:
287
+ try:
288
+ df = pd.read_csv(csv_file.name, encoding=encoding, dtype_backend='numpy_nullable')
289
+ break
290
+ except UnicodeDecodeError:
291
+ continue
292
+ if df is None:
293
+ return "Error: Could not decode CSV file.", None, None, None, None
294
+
295
+ df.columns = df.columns.str.strip().str.capitalize()
296
+ required_columns = ['Date', 'Attendance', 'Trade', 'Weather', 'Alert_status', 'Shortage_risk', 'Suggested_actions']
297
+ missing_columns = [col for col in required_columns if col not in df.columns]
298
+ if missing_columns:
299
+ return f"Error: CSV missing columns: {', '.join(missing_columns)}", None, None, None, None
300
+
301
+ # Parse dates with explicit format
302
+ df['Date'] = pd.to_datetime(df['Date'], format='%Y-%m-%d', errors='coerce').dt.date
303
+ if df['Date'].isna().all():
304
+ return "Error: All dates in CSV are invalid.", None, None, None, None
305
+
306
+ df['Attendance'] = pd.to_numeric(df['Attendance'], errors='coerce').fillna(0).astype('Int64')
307
+ df['Shortage_risk'] = df['Shortage_risk'].replace('%', '', regex=True)
308
+ df['Shortage_risk'] = pd.to_numeric(df['Shortage_risk'], errors='coerce').fillna(0.5) / 100
309
+ df['Weather'] = df['Weather'].astype(str).replace('nan', 'N/A')
310
+ df['Alert_status'] = df['Alert_status'].astype(str).replace('nan', 'N/A')
311
+ df['Suggested_actions'] = df['Suggested_actions'].astype(str).replace('nan', 'N/A')
312
+ df['Trade'] = df['Trade'].astype(str).replace('nan', 'N/A')
313
+
314
+ unique_trades = df['Trade'].dropna().unique()
315
+ if trade_filter:
316
+ selected_trades = [t.strip() for t in trade_filter.split(',') if t.strip()]
317
+ selected_trades = [t for t in selected_trades if t in unique_trades]
318
+ if not selected_trades:
319
+ return f"Error: None of the specified trades '{trade_filter}' found in CSV.", None, None, None, None
320
+ else:
321
+ selected_trades = unique_trades
322
+
323
+ trade_results = {}
324
+ predictions_dict = {}
325
+ shortage_probs = {}
326
+ errors = []
327
+
328
+ project_id, error = get_project_id()
329
+ if error:
330
+ return f"Error: {error}", None, None, None, None
331
+
332
+ # Parse site_calendar_date with explicit format
333
+ try:
334
+ site_calendar_date = pd.to_datetime(site_calendar_date, format='%Y-%m-%d', errors='coerce').date()
335
+ if pd.isna(site_calendar_date):
336
+ raise ValueError(f"Invalid site calendar date: {site_calendar_date}")
337
+ except ValueError as e:
338
+ errors.append(str(e))
339
+ return f"Error: {e}", None, None, None, None
340
+
341
+ for trade in selected_trades:
342
+ trade_df = df[df['Trade'] == trade].copy()
343
+ if trade_df.empty:
344
+ errors.append(f"No data for trade: {trade}")
345
+ continue
346
+
347
+ # Debug: Print trade_df to verify data
348
+ print(f"Trade: {trade}, Data for {site_calendar_date}:")
349
+ print(trade_df[trade_df['Date'] == site_calendar_date])
350
+
351
+ date_match = trade_df[trade_df['Date'] == site_calendar_date]
352
+ if date_match.empty:
353
+ errors.append(f"No data found for trade {trade} on {site_calendar_date}")
354
+ continue
355
+ if len(date_match) > 1:
356
+ errors.append(f"Warning: Multiple rows found for trade {trade} on {site_calendar_date}. Using first row.")
357
+
358
+ predictions, shortage_prob, site_calendar, forecast_error = weighted_moving_average_forecast(trade_df, trade, site_calendar_date)
359
+ if forecast_error:
360
+ errors.append(forecast_error)
361
+ continue
362
+ predictions_dict[trade] = predictions
363
+ shortage_probs[trade] = shortage_prob
364
+
365
+ record = date_match.iloc[0]
366
+ result_data = {
367
+ 'Date': site_calendar_date,
368
+ 'Trade': record['Trade'],
369
+ 'Weather': record['Weather'],
370
+ 'Alert_status': record['Alert_status'],
371
+ 'Shortage_risk': record['Shortage_risk'],
372
+ 'Suggested_actions': record['Suggested_actions'],
373
+ 'Attendance': record['Attendance'],
374
+ 'Forecast': predictions,
375
+ 'Shortage_Probability': round(shortage_prob, 2),
376
+ 'Forecast_Next_3_Days__c': predictions,
377
+ 'Project__c': project_id
378
+ }
379
+
380
+ salesforce_record = {
381
+ 'Trade__c': trade,
382
+ 'Shortage_Risk__c': record['Shortage_risk'],
383
+ 'Suggested_Actions__c': record['Suggested_actions'],
384
+ 'Expected_Headcount__c': predictions[0]['headcount'] if predictions else 0,
385
+ 'Actual_Headcount__c': int(record['Attendance']) if pd.notna(record['Attendance']) else 0,
386
+ 'Forecast_Next_3_Days__c': str(predictions),
387
+ 'Project_ID__c': project_id,
388
+ 'Alert_Status__c': record['Alert_status'],
389
+ 'Dashboard_Display__c': True,
390
+ 'Date__c': pd.Timestamp(site_calendar_date).isoformat()
391
+ }
392
+
393
+ sf_result = save_to_salesforce(salesforce_record)
394
+ result_data.update(sf_result)
395
+ trade_results[trade] = result_data
396
+
397
+ if not trade_results:
398
+ error_msg = "No valid trade data processed for the specified date."
399
+ if errors:
400
+ error_msg += " Errors: " + "; ".join(errors)
401
+ return error_msg, None, None, None, None
402
+
403
+ line_chart = create_chart(df, predictions_dict)
404
+ heatmap = create_heatmap(df, predictions_dict, shortage_probs, site_calendar_date)
405
+ pdf_summary = generate_pdf_summary(trade_results, project_id)
406
+ notification_trade = selected_trades[0]
407
+ notification = notify_contractor(notification_trade, trade_results[notification_trade]['Alert_status'])
408
+
409
+ error_msg = "; ".join(errors) if errors else None
410
+ return (
411
+ format_output(trade_results, site_calendar_date) + (f"\nWarnings: {error_msg}" if error_msg else ""),
412
+ line_chart,
413
+ heatmap,
414
+ f'<a href="data:application/pdf;base64,{pdf_summary}" download="summary.pdf">Download Summary PDF</a>',
415
+ notification
416
+ )
417
+
418
+ except Exception as e:
419
+ return f"Error processing file: {str(e)}", None, None, None, None
420
+
421
+ # Gradio UI
422
+ def gradio_interface():
423
+ with gr.Blocks(theme=gr.themes.Soft()) as interface:
424
+ gr.Markdown("# Labour Attendance Forecast")
425
+ gr.Markdown("Upload a CSV with columns: Date, Attendance, Trade, Weather, Alert_Status, Shortage_Risk (e.g. 22%), Suggested_Actions.")
426
+ gr.Markdown("Enter trade names (e.g., 'Painter, Electrician') separated by commas, or leave blank to process all trades.")
427
+ gr.Markdown("Enter a specific date for the site calendar (YYYY-MM-DD) to display CSV data for that date and forecast the next 3 days.")
428
+
429
+ with gr.Row():
430
+ csv_input = gr.File(label="Upload CSV")
431
+ trade_input = gr.Textbox(label="Filter by Trades (e.g., Painter, Electrician)", placeholder="Enter trade names separated by commas or leave blank for all trades")
432
+ site_calendar_input = gr.Textbox(label="Site Calendar Date (YYYY-MM-DD)", placeholder="e.g., 2025-05-24")
433
+
434
+ forecast_button = gr.Button("Generate Forecast")
435
+ result_output = gr.Textbox(label="Forecast Result", lines=20)
436
+ line_chart_output = gr.Plot(label="Forecast Trendline")
437
+ heatmap_output = gr.Plot(label="Shortage Risk Heatmap")
438
+ pdf_output = gr.HTML(label="Download Summary PDF")
439
+ notification_output = gr.Textbox(label="Contractor Notification")
440
+
441
+ forecast_button.click(
442
+ fn=forecast_labour,
443
+ inputs=[csv_input, trade_input, site_calendar_input],
444
+ outputs=[result_output, line_chart_output, heatmap_output, pdf_output, notification_output]
445
+ )
446
+
447
+ interface.launch(share=False)
448
+
449
+ if __name__ == '__main__':
450
+ gradio_interface()