import gradio as gr
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
import io
import os
import logging
from forecast import train_and_forecast
from datetime import datetime
# Configure logging directly in the code
logger = logging.getLogger(__name__)
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(levelname)s - %(message)s',
handlers=[
logging.FileHandler('app.log'),
logging.StreamHandler()
]
)
# List to store notification history
notification_history = []
# Global variable to store the latest forecast data
latest_forecast_df = None
def process_data(file, trade_selection, filter_date):
global latest_forecast_df
logger.info(f"Processing data: Trade selection={trade_selection}, File={file.name if file else 'None'}, Filter date={filter_date}")
try:
if not trade_selection:
logger.error("No trade selected")
return "Error: Please select a trade", None, None, None, None, None
if not file:
logger.error("No file uploaded")
return "Error: No file uploaded", None, None, None, None, None
# Read CSV with encoding handling
logger.debug("Reading CSV file")
try:
df = pd.read_csv(file.name, encoding='utf-8')
except UnicodeDecodeError:
logger.debug("UTF-8 encoding failed, trying utf-8-sig for BOM")
df = pd.read_csv(file.name, encoding='utf-8-sig')
logger.info(f"CSV loaded with {len(df)} rows")
# Trim whitespace from all string columns
logger.debug("Trimming whitespace from string columns")
for col in df.select_dtypes(include=['object']).columns:
df[col] = df[col].str.strip()
# Check for required columns
required_cols = ['Trade', 'Date', 'Attendance', 'Weather']
if not all(col in df.columns for col in required_cols):
missing_cols = [col for col in required_cols if col not in df.columns]
logger.error(f"Missing required columns: {missing_cols}")
return f"Error: CSV is missing required columns: {', '.join(missing_cols)}", None, None, None, None, None
# Check for empty or whitespace values
for col in required_cols:
if df[col].isna().any() or (df[col].astype(str).str.strip() == '').any():
logger.error(f"Column '{col}' contains empty or whitespace values")
return f"Error: Column '{col}' contains empty or whitespace values", None, None, None, None, None
# Parse dates in DD-MM-YYYY format
logger.debug("Parsing dates")
df['Date'] = pd.to_datetime(df['Date'], format='%d-%m-%Y', errors='coerce')
if df['Date'].isna().any():
logger.error("Invalid date format in CSV")
return "Error: Invalid date format in CSV. Use DD-MM-YYYY (e.g., 01-04-2025)", None, None, None, None, None
# Validate Attendance
logger.debug("Validating Attendance")
df['Attendance'] = pd.to_numeric(df['Attendance'], errors='coerce')
if df['Attendance'].isna().any():
logger.error("Non-numeric values in Attendance column")
return "Error: Attendance column contains non-numeric values", None, None, None, None, None
# Ensure Attendance is positive
df['Attendance'] = df['Attendance'].clip(lower=0)
if (df['Attendance'] <= 0).any():
logger.error("Attendance contains non-positive values")
return "Error: Attendance must contain only positive values", None, None, None, None, None
# Normalize Weather values to title case for consistency
df['Weather'] = df['Weather'].str.title()
# Get all trades from the CSV
available_trades = df['Trade'].unique().tolist()
logger.debug(f"Available trades in CSV: {available_trades}")
data = df.to_dict('records')
logger.debug(f"Input data for forecasting: {data}")
# Handle "All" trades or a single trade
trades_to_process = available_trades if trade_selection == "All" else [trade_selection]
logger.info(f"Trades to process: {trades_to_process}")
# Validate selected trade if not "All"
if trade_selection != "All" and trade_selection not in available_trades:
logger.error(f"Selected trade '{trade_selection}' not in CSV")
return f"Error: Selected trade '{trade_selection}' not found in CSV. Available trades: {', '.join(available_trades)}", None, None, None, None, None
# Check for duplicate dates and sufficient data for each trade
for trade in trades_to_process:
trade_data = df[df['Trade'] == trade]
duplicate_dates = trade_data[trade_data.duplicated(subset=['Date'], keep=False)]
if not duplicate_dates.empty:
logger.error(f"Duplicate dates found for trade '{trade}': {duplicate_dates['Date'].tolist()}")
return f"Error: Duplicate dates found for trade '{trade}': {duplicate_dates['Date'].tolist()}. Each date must be unique.", None, None, None, None, None
if len(trade_data) < 2:
logger.error(f"Insufficient data for trade '{trade}': only {len(trade_data)} rows")
return f"Error: Insufficient data for trade '{trade}'. At least 2 data points are required, found {len(trade_data)}", None, None, None, None, None
# Run forecast for each trade
all_forecasts = []
for trade in trades_to_process:
logger.debug(f"Running forecast for trade: {trade}")
result = train_and_forecast(data, trade)
if 'error' in result:
logger.error(f"Forecast failed for trade '{trade}': {result['error']}")
return f"Error in forecasting for trade '{trade}': {result['error']}", None, None, None, None, None
all_forecasts.append(pd.DataFrame(result['forecast']))
logger.info("All forecasts generated successfully")
forecast_df = pd.concat(all_forecasts, ignore_index=True)
forecast_3_days_json = None # Not used for "All" trades
# Parse forecast dates
forecast_df['Date'] = pd.to_datetime(forecast_df['Date'])
# Ensure only positive values for Risk and Attendance in forecast data
forecast_df['Risk'] = forecast_df['Risk'].clip(lower=0)
forecast_df['Attendance'] = forecast_df['Attendance'].clip(lower=0)
# Apply date filtering based on filter_date (daily view only)
if filter_date:
selected_date = pd.to_datetime(filter_date)
forecast_df = forecast_df[forecast_df['Date'].dt.date == selected_date.date()]
date_range_str = selected_date.strftime('%Y-%m-%d')
else:
date_range_str = "All dates"
if forecast_df.empty:
logger.warning("No data available for the selected date")
return f"Error: No forecast data available for the selected date ({date_range_str})", None, None, None, None, None
# Store the forecast data globally
latest_forecast_df = forecast_df
# Create bar graph for risk with enhanced layout
logger.debug("Generating enhanced risk bar graph")
risk_bar_fig = go.Figure()
colors = px.colors.qualitative.Plotly # Consistent color palette for trades
for idx, trade in enumerate(trades_to_process):
trade_forecast = forecast_df[forecast_df['Trade'] == trade]
risk_bar_fig.add_trace(go.Bar(
x=trade_forecast['Date'],
y=trade_forecast['Risk'].clip(lower=0), # Cap risk values to zero or above
name=trade,
marker_color=colors[idx % len(colors)],
hovertemplate=(
f"Trade: {trade}
" +
"Date: %{x}
" +
"Risk: %{y}%"
)
))
risk_bar_fig.update_layout(
title=f"Shortage Risk Bar Graph ({date_range_str}, Risk in %, {len(trades_to_process)} Trade{'s' if len(trades_to_process) != 1 else ''})",
xaxis_title="Date",
yaxis_title="Risk (%)",
barmode='group', # Group bars to avoid overlap
showlegend=True,
hovermode="closest",
legend=dict(
title="Trades",
traceorder="normal",
itemsizing="constant",
orientation="h",
yanchor="bottom",
y=1.02,
xanchor="right",
x=1
),
xaxis=dict(
tickangle=45, # Rotate for better readability
tickformat="%Y-%m-%d",
tickmode="linear", # Ensure even spacing
nticks=10, # Adjust number of ticks for clarity
gridcolor='lightgrey'
),
yaxis=dict(
range=[0, 100], # Set a reasonable range starting from 0
gridcolor='lightgrey',
zeroline=True,
zerolinewidth=2,
zerolinecolor='black'
),
plot_bgcolor='white',
margin=dict(t=80, b=80, l=60, r=60)
)
# Add annotation for capped values
max_risk = forecast_df['Risk'].max()
min_risk = forecast_df['Risk'].min()
if max_risk > 100:
risk_bar_fig.add_annotation(
text=f"Note: Risk values capped at 100% (Max: {max_risk:.1f}%)",
xref="paper", yref="paper",
x=0.5, y=-0.15,
showarrow=False,
font=dict(size=10)
)
logger.info("Enhanced risk bar graph generated")
# Create line graph for attendance
logger.debug("Generating attendance line graph")
line_fig = go.Figure()
alert_colors = {'Normal': 'blue', 'Warning': 'yellow', 'Critical': 'red'}
for idx, trade in enumerate(trades_to_process):
trade_forecast = forecast_df[forecast_df['Trade'] == trade]
# Add line for attendance
line_fig.add_trace(go.Scatter(
x=trade_forecast['Date'],
y=trade_forecast['Attendance'].clip(lower=0),
mode='lines+markers',
name=trade,
line=dict(color=colors[idx % len(colors)]),
marker=dict(size=8),
hovertemplate=(
f"Trade: {trade}
" +
"Date: %{x}
" +
"Attendance: %{y}
" +
"Risk: %{customdata[0]}%
" +
"Alert: %{customdata[1]}"
),
customdata=trade_forecast[['Risk', 'Alert']].values
))
# Add markers for Warning and Critical alerts
for alert_type in ['Warning', 'Critical']:
alert_data = trade_forecast[trade_forecast['Alert'] == alert_type]
if not alert_data.empty:
line_fig.add_trace(go.Scatter(
x=alert_data['Date'],
y=alert_data['Attendance'].clip(lower=0),
mode='markers',
name=f"{trade} {alert_type}",
marker=dict(
color=alert_colors[alert_type],
size=12,
symbol='diamond',
line=dict(width=2, color='black')
),
hovertemplate=(
f"Trade: {trade}
" +
"Date: %{x}
" +
"Attendance: %{y}
" +
"Risk: %{customdata[0]}%
" +
"Alert: {alert_type}"
),
customdata=alert_data[['Risk']].values
))
line_fig.update_layout(
title=f"Attendance Forecast Line Graph ({date_range_str}, {len(trades_to_process)} Trade{'s' if len(trades_to_process) != 1 else ''})",
xaxis_title="Date",
yaxis_title="Attendance",
showlegend=True,
hovermode="closest",
legend=dict(
title="Trades & Alerts",
traceorder="normal",
itemsizing="constant"
),
xaxis=dict(
tickangle=45,
gridcolor='lightgrey'
),
yaxis=dict(
range=[0, forecast_df['Attendance'].max() * 1.2], # Dynamic range starting from 0
gridcolor='lightgrey'
),
plot_bgcolor='white',
margin=dict(t=50, b=50)
)
logger.info("Attendance line graph generated")
# Create summary statistics for filtered data
logger.debug("Generating summary statistics")
summary_df = forecast_df.groupby('Trade').agg({
'Attendance': 'mean',
'Risk': 'mean'
}).reset_index()
summary_df['Attendance'] = summary_df['Attendance'].round(1)
summary_df['Risk'] = summary_df['Risk'].round(1)
summary_df = summary_df.rename(columns={'Attendance': 'Average Attendance', 'Risk': 'Average Risk (%)'})
logger.info(f"Summary statistics generated: {summary_df.to_dict()}")
# Create PDF report with enhanced layout and small gap between trade and alerts
logger.debug("Generating enhanced PDF report with small gap between trade and alerts")
fig = plt.figure(figsize=(12, 20))
gs = fig.add_gridspec(5, 1, height_ratios=[3, 3, 0.2, 1.5, 0.5], hspace=0.8)
# Plot bar graph for risk with improved text alignment and spacing
ax1 = fig.add_subplot(gs[0])
width = 0.35 # Adjusted width for even spacing
unique_dates = forecast_df['Date'].unique()
x_positions = range(len(unique_dates))
for idx, trade in enumerate(trades_to_process):
trade_forecast = forecast_df[forecast_df['Trade'] == trade]
trade_x = [pos + idx * width for pos in x_positions]
bars = ax1.bar(
trade_x,
trade_forecast['Risk'].clip(lower=0),
width,
label=trade,
color=colors[idx % len(colors)]
)
for bar, x, height in zip(bars, trade_x, trade_forecast['Risk'].clip(lower=0)):
# Dynamic vertical offset based on bar height with adjusted padding
offset = 5 if abs(height) < 10 else 10 # Adjusted for positive values only
ax1.text(
x + width / 2, # Center horizontally
height + offset, # Dynamic vertical offset
f'{height:.1f}%',
ha='center',
va='bottom', # Align text appropriately for positive values
fontsize=8
)
ax1.set_title(f'Shortage Risk Bar Graph ({date_range_str}, Risk in %)', fontsize=14, pad=10)
ax1.set_xlabel('Date', fontsize=12)
ax1.set_ylabel('Risk (%)', fontsize=12)
ax1.set_xticks([pos + (len(trades_to_process) - 1) * width / 2 for pos in x_positions])
ax1.set_xticklabels([date.strftime('%Y-%m-%d') for date in unique_dates], rotation=45, ha='right', fontsize=8)
ax1.set_yticks(range(0, 125, 25)) # Balanced y-axis scale with 25% increments starting from 0
ax1.grid(True, axis='y', linestyle='--', alpha=0.7)
ax1.legend(title="Trades", fontsize=10, loc='upper right', bbox_to_anchor=(1.15, 1)) # Moved legend outside
ax1.set_ylim(0, 100) # Cap y-axis to positive values only
ax1.tick_params(axis='x', pad=10) # Increase padding to prevent overlap
# Plot line graph for attendance
ax2 = fig.add_subplot(gs[1])
for idx, trade in enumerate(trades_to_process):
trade_forecast = forecast_df[forecast_df['Trade'] == trade]
ax2.plot(
trade_forecast['Date'],
trade_forecast['Attendance'].clip(lower=0),
label=trade,
color=colors[idx % len(colors)],
marker='o',
markersize=8,
linewidth=2
)
for alert_type in ['Warning', 'Critical']:
alert_data = trade_forecast[trade_forecast['Alert'] == alert_type]
if not alert_data.empty:
ax2.scatter(
alert_data['Date'],
alert_data['Attendance'].clip(lower=0),
label=f"{trade} {alert_type}",
color=alert_colors[alert_type],
marker='D',
s=100,
edgecolors='black',
linewidths=1.5
)
ax2.set_title(f'Attendance Forecast Line Graph ({date_range_str})', fontsize=14, pad=10)
ax2.set_xlabel('Date', fontsize=12)
ax2.set_ylabel('Attendance', fontsize=12)
ax2.grid(True, linestyle='--', alpha=0.7)
ax2.legend(title="Trades & Alerts", fontsize=10, loc='upper left', bbox_to_anchor=(0, -0.1), ncol=2)
ax2.tick_params(axis='x', rotation=45, labelsize=8)
ax2.set_ylim(bottom=0) # Ensure y-axis starts at 0
# Empty spacer subplot for small gap
ax_spacer = fig.add_subplot(gs[2])
ax_spacer.axis('off')
# Add summary table to the plot
ax3 = fig.add_subplot(gs[3])
ax3.axis('off')
table_data = [summary_df.columns.tolist()] + summary_df.values.tolist()
table = ax3.table(cellText=table_data, cellLoc='center', loc='center', colWidths=[0.3, 0.35, 0.35])
table.set_fontsize(12) # Set font size manually
table.scale(1.2, 1.2) # Keep the scaling
ax3.set_title('Summary Statistics (Risk in %)', fontsize=14, pad=10)
# Add legend for alerts
ax4 = fig.add_subplot(gs[4])
ax4.axis('off')
legend_elements = [
mpatches.Patch(color='red', label='Critical Alert'),
mpatches.Patch(color='yellow', label='Warning Alert'),
mpatches.Patch(color='blue', label='Normal')
]
ax4.legend(handles=legend_elements, loc='center', ncol=3, title='Alert Types', fontsize=12, title_fontsize=12)
plt.subplots_adjust(top=0.9, bottom=0.1, left=0.1, right=1.2, hspace=0.6) # Adjusted right margin for legend
buf = io.BytesIO()
plt.savefig(buf, format='pdf', dpi=300, bbox_inches='tight')
plt.close()
buf.seek(0)
pdf_path = f"forecast_report_{'all_trades' if trade_selection == 'All' else trade_selection}_{datetime.now().strftime('%Y%m%d_%H%M%S')}.pdf"
with open(pdf_path, 'wb') as f:
f.write(buf.read())
logger.info(f"Enhanced PDF report saved: {pdf_path}")
# Alerts table with color-coded column and date only
logger.debug("Generating alerts table")
alerts_df = forecast_df[forecast_df['Alert'].isin(['Warning', 'Critical'])][['Date', 'Trade', 'Attendance', 'Risk', 'Alert']].copy()
alerts_df['Risk'] = alerts_df['Risk'].apply(lambda x: f"{x}%")
alerts_df['Alert Color'] = alerts_df['Alert'].apply(lambda x: 'Red' if x == 'Critical' else 'Yellow')
alerts_df['Date'] = alerts_df['Date'].dt.strftime('%Y-%m-%d')
logger.info(f"Alerts generated with date only: {len(alerts_df)} alerts")
# Status message
status_message = f"Forecast generated successfully for {len(trades_to_process)} trade{'s' if len(trades_to_process) != 1 else ''} (Risk values are in percentages, Date: {date_range_str}, Visualization: Bar Graph (Risk), Line Graph (Attendance))"
return (
status_message,
risk_bar_fig,
line_fig,
alerts_df,
summary_df,
pdf_path
)
except Exception as e:
logger.error(f"Processing failed: {str(e)}", exc_info=True)
return f"Error: {str(e)}", None, None, None, None, None
def notify_contractor():
global latest_forecast_df
logger.info("Notify contractor button clicked")
if latest_forecast_df is None:
message = "No forecast data available to generate notifications."
timestamp = datetime.now().strftime("%Y-%m-%d %I:%M:%S %p")
notification_entry = f"**{timestamp}**: {message}"
notification_history.append(notification_entry)
history_display = "## Notification History\n" + "\n".join([f"- {entry}" for entry in notification_history])
logger.warning("No forecast data for notifications")
return message, history_display
# Calculate average risk per trade
risk_summary = latest_forecast_df.groupby('Trade')['Risk'].mean().round(1).to_dict()
# Generate notifications for each trade
timestamp = datetime.now().strftime("%Y-%m-%d %I:%M:%S %p")
notifications = []
for trade, avg_risk in risk_summary.items():
if avg_risk < 50:
risk_message = "Monitor, shortage risk is low"
elif 50 <= avg_risk <= 75:
risk_message = "Caution, shortage risk is moderate"
else:
risk_message = "Alert, shortage risk is high"
message = f"Notice for {trade}: {risk_message} at {avg_risk}%."
notifications.append(message)
notification_entry = f"**{timestamp}**: {message}"
notification_history.append(notification_entry)
logger.info(f"Notification generated for {trade}: {message}")
combined_message = "\n".join(notifications) if notifications else "No trades to notify."
history_display = "## Notification History\n" + "\n".join([f"- {entry}" for entry in notification_history])
return combined_message, history_display
def update_trades(file):
try:
if file:
df = pd.read_csv(file.name, encoding='utf-8')
trades = df['Trade'].unique().tolist()
trades.insert(0, "All")
logger.info(f"Trades updated from CSV with 'All' option: {trades}")
return gr.update(choices=trades)
logger.warning("No file provided for trade update")
return gr.update(choices=["All"])
except UnicodeDecodeError:
logger.debug("UTF-8 encoding failed in update_trades, trying utf-8-sig")
df = pd.read_csv(file.name, encoding='utf-8-sig')
trades = df['Trade'].unique().tolist()
trades.insert(0, "All")
logger.info(f"Trades updated from CSV with 'All' option: {trades}")
return gr.update(choices=trades)
except Exception as e:
logger.error(f"Failed to update trades: {str(e)}", exc_info=True)
return gr.update(choices=["All"])
with gr.Blocks() as demo:
gr.Markdown("# Labour Attendance Forecasting App")
gr.Markdown("Upload a CSV with columns: Trade, Date, Attendance, Weather")
with gr.Row():
file_input = gr.File(label="Upload CSV")
trade_dropdown = gr.Dropdown(label="Select Trade", choices=["All"])
with gr.Row():
filter_date = gr.DateTime(label="Site Calendar Date", value=None)
with gr.Row():
submit_button = gr.Button("Generate Forecast")
notification_display = gr.Markdown("## Notification History\n*No notifications yet.*", label="Notifications")
output_text = gr.Textbox(label="Status")
heatmap_output = gr.Plot(label="Shortage Risk Bar Graph (Risk in %)")
bar_output = gr.Plot(label="Attendance Forecast Line Graph")
alerts_output = gr.DataFrame(label="Alerts")
summary_output = gr.DataFrame(label="Summary Statistics (Average Attendance and Risk in %)")
notify_button = gr.Button("Notify Contractor")
notify_output = gr.Textbox(label="Notification Status")
download_button = gr.File(label="Download PDF Report")
file_input.change(update_trades, inputs=file_input, outputs=trade_dropdown)
submit_button.click(
process_data,
inputs=[file_input, trade_dropdown, filter_date],
outputs=[output_text, heatmap_output, bar_output, alerts_output, summary_output, download_button]
)
notify_button.click(
notify_contractor,
outputs=[notify_output, notification_display]
)
try:
logger.info("Launching Gradio app")
demo.launch()
except Exception as e:
logger.error(f"Failed to launch Gradio app: {str(e)}", exc_info=True)
raise