import gradio as gr import pandas as pd import plotly.express as px import plotly.graph_objects as go import matplotlib.pyplot as plt import matplotlib.patches as mpatches import io import os import logging from forecast import train_and_forecast from datetime import datetime # Configure logging directly in the code logger = logging.getLogger(__name__) logging.basicConfig( level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s', handlers=[ logging.FileHandler('app.log'), logging.StreamHandler() ] ) # List to store notification history notification_history = [] # Global variable to store the latest forecast data latest_forecast_df = None def process_data(file, trade_selection, filter_date): global latest_forecast_df logger.info(f"Processing data: Trade selection={trade_selection}, File={file.name if file else 'None'}, Filter date={filter_date}") try: if not trade_selection: logger.error("No trade selected") return "Error: Please select a trade", None, None, None, None, None if not file: logger.error("No file uploaded") return "Error: No file uploaded", None, None, None, None, None # Read CSV with encoding handling logger.debug("Reading CSV file") try: df = pd.read_csv(file.name, encoding='utf-8') except UnicodeDecodeError: logger.debug("UTF-8 encoding failed, trying utf-8-sig for BOM") df = pd.read_csv(file.name, encoding='utf-8-sig') logger.info(f"CSV loaded with {len(df)} rows") # Trim whitespace from all string columns logger.debug("Trimming whitespace from string columns") for col in df.select_dtypes(include=['object']).columns: df[col] = df[col].str.strip() # Check for required columns required_cols = ['Trade', 'Date', 'Attendance', 'Weather'] if not all(col in df.columns for col in required_cols): missing_cols = [col for col in required_cols if col not in df.columns] logger.error(f"Missing required columns: {missing_cols}") return f"Error: CSV is missing required columns: {', '.join(missing_cols)}", None, None, None, None, None # Check for empty or whitespace values for col in required_cols: if df[col].isna().any() or (df[col].astype(str).str.strip() == '').any(): logger.error(f"Column '{col}' contains empty or whitespace values") return f"Error: Column '{col}' contains empty or whitespace values", None, None, None, None, None # Parse dates in DD-MM-YYYY format logger.debug("Parsing dates") df['Date'] = pd.to_datetime(df['Date'], format='%d-%m-%Y', errors='coerce') if df['Date'].isna().any(): logger.error("Invalid date format in CSV") return "Error: Invalid date format in CSV. Use DD-MM-YYYY (e.g., 01-04-2025)", None, None, None, None, None # Validate Attendance logger.debug("Validating Attendance") df['Attendance'] = pd.to_numeric(df['Attendance'], errors='coerce') if df['Attendance'].isna().any(): logger.error("Non-numeric values in Attendance column") return "Error: Attendance column contains non-numeric values", None, None, None, None, None # Ensure Attendance is positive df['Attendance'] = df['Attendance'].clip(lower=0) if (df['Attendance'] <= 0).any(): logger.error("Attendance contains non-positive values") return "Error: Attendance must contain only positive values", None, None, None, None, None # Normalize Weather values to title case for consistency df['Weather'] = df['Weather'].str.title() # Get all trades from the CSV available_trades = df['Trade'].unique().tolist() logger.debug(f"Available trades in CSV: {available_trades}") data = df.to_dict('records') logger.debug(f"Input data for forecasting: {data}") # Handle "All" trades or a single trade trades_to_process = available_trades if trade_selection == "All" else [trade_selection] logger.info(f"Trades to process: {trades_to_process}") # Validate selected trade if not "All" if trade_selection != "All" and trade_selection not in available_trades: logger.error(f"Selected trade '{trade_selection}' not in CSV") return f"Error: Selected trade '{trade_selection}' not found in CSV. Available trades: {', '.join(available_trades)}", None, None, None, None, None # Check for duplicate dates and sufficient data for each trade for trade in trades_to_process: trade_data = df[df['Trade'] == trade] duplicate_dates = trade_data[trade_data.duplicated(subset=['Date'], keep=False)] if not duplicate_dates.empty: logger.error(f"Duplicate dates found for trade '{trade}': {duplicate_dates['Date'].tolist()}") return f"Error: Duplicate dates found for trade '{trade}': {duplicate_dates['Date'].tolist()}. Each date must be unique.", None, None, None, None, None if len(trade_data) < 2: logger.error(f"Insufficient data for trade '{trade}': only {len(trade_data)} rows") return f"Error: Insufficient data for trade '{trade}'. At least 2 data points are required, found {len(trade_data)}", None, None, None, None, None # Run forecast for each trade all_forecasts = [] for trade in trades_to_process: logger.debug(f"Running forecast for trade: {trade}") result = train_and_forecast(data, trade) if 'error' in result: logger.error(f"Forecast failed for trade '{trade}': {result['error']}") return f"Error in forecasting for trade '{trade}': {result['error']}", None, None, None, None, None all_forecasts.append(pd.DataFrame(result['forecast'])) logger.info("All forecasts generated successfully") forecast_df = pd.concat(all_forecasts, ignore_index=True) forecast_3_days_json = None # Not used for "All" trades # Parse forecast dates forecast_df['Date'] = pd.to_datetime(forecast_df['Date']) # Ensure only positive values for Risk and Attendance in forecast data forecast_df['Risk'] = forecast_df['Risk'].clip(lower=0) forecast_df['Attendance'] = forecast_df['Attendance'].clip(lower=0) # Apply date filtering based on filter_date (daily view only) if filter_date: selected_date = pd.to_datetime(filter_date) forecast_df = forecast_df[forecast_df['Date'].dt.date == selected_date.date()] date_range_str = selected_date.strftime('%Y-%m-%d') else: date_range_str = "All dates" if forecast_df.empty: logger.warning("No data available for the selected date") return f"Error: No forecast data available for the selected date ({date_range_str})", None, None, None, None, None # Store the forecast data globally latest_forecast_df = forecast_df # Create bar graph for risk with enhanced layout logger.debug("Generating enhanced risk bar graph") risk_bar_fig = go.Figure() colors = px.colors.qualitative.Plotly # Consistent color palette for trades for idx, trade in enumerate(trades_to_process): trade_forecast = forecast_df[forecast_df['Trade'] == trade] risk_bar_fig.add_trace(go.Bar( x=trade_forecast['Date'], y=trade_forecast['Risk'].clip(lower=0), # Cap risk values to zero or above name=trade, marker_color=colors[idx % len(colors)], hovertemplate=( f"Trade: {trade}
" + "Date: %{x}
" + "Risk: %{y}%" ) )) risk_bar_fig.update_layout( title=f"Shortage Risk Bar Graph ({date_range_str}, Risk in %, {len(trades_to_process)} Trade{'s' if len(trades_to_process) != 1 else ''})", xaxis_title="Date", yaxis_title="Risk (%)", barmode='group', # Group bars to avoid overlap showlegend=True, hovermode="closest", legend=dict( title="Trades", traceorder="normal", itemsizing="constant", orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1 ), xaxis=dict( tickangle=45, # Rotate for better readability tickformat="%Y-%m-%d", tickmode="linear", # Ensure even spacing nticks=10, # Adjust number of ticks for clarity gridcolor='lightgrey' ), yaxis=dict( range=[0, 100], # Set a reasonable range starting from 0 gridcolor='lightgrey', zeroline=True, zerolinewidth=2, zerolinecolor='black' ), plot_bgcolor='white', margin=dict(t=80, b=80, l=60, r=60) ) # Add annotation for capped values max_risk = forecast_df['Risk'].max() min_risk = forecast_df['Risk'].min() if max_risk > 100: risk_bar_fig.add_annotation( text=f"Note: Risk values capped at 100% (Max: {max_risk:.1f}%)", xref="paper", yref="paper", x=0.5, y=-0.15, showarrow=False, font=dict(size=10) ) logger.info("Enhanced risk bar graph generated") # Create line graph for attendance logger.debug("Generating attendance line graph") line_fig = go.Figure() alert_colors = {'Normal': 'blue', 'Warning': 'yellow', 'Critical': 'red'} for idx, trade in enumerate(trades_to_process): trade_forecast = forecast_df[forecast_df['Trade'] == trade] # Add line for attendance line_fig.add_trace(go.Scatter( x=trade_forecast['Date'], y=trade_forecast['Attendance'].clip(lower=0), mode='lines+markers', name=trade, line=dict(color=colors[idx % len(colors)]), marker=dict(size=8), hovertemplate=( f"Trade: {trade}
" + "Date: %{x}
" + "Attendance: %{y}
" + "Risk: %{customdata[0]}%
" + "Alert: %{customdata[1]}" ), customdata=trade_forecast[['Risk', 'Alert']].values )) # Add markers for Warning and Critical alerts for alert_type in ['Warning', 'Critical']: alert_data = trade_forecast[trade_forecast['Alert'] == alert_type] if not alert_data.empty: line_fig.add_trace(go.Scatter( x=alert_data['Date'], y=alert_data['Attendance'].clip(lower=0), mode='markers', name=f"{trade} {alert_type}", marker=dict( color=alert_colors[alert_type], size=12, symbol='diamond', line=dict(width=2, color='black') ), hovertemplate=( f"Trade: {trade}
" + "Date: %{x}
" + "Attendance: %{y}
" + "Risk: %{customdata[0]}%
" + "Alert: {alert_type}" ), customdata=alert_data[['Risk']].values )) line_fig.update_layout( title=f"Attendance Forecast Line Graph ({date_range_str}, {len(trades_to_process)} Trade{'s' if len(trades_to_process) != 1 else ''})", xaxis_title="Date", yaxis_title="Attendance", showlegend=True, hovermode="closest", legend=dict( title="Trades & Alerts", traceorder="normal", itemsizing="constant" ), xaxis=dict( tickangle=45, gridcolor='lightgrey' ), yaxis=dict( range=[0, forecast_df['Attendance'].max() * 1.2], # Dynamic range starting from 0 gridcolor='lightgrey' ), plot_bgcolor='white', margin=dict(t=50, b=50) ) logger.info("Attendance line graph generated") # Create summary statistics for filtered data logger.debug("Generating summary statistics") summary_df = forecast_df.groupby('Trade').agg({ 'Attendance': 'mean', 'Risk': 'mean' }).reset_index() summary_df['Attendance'] = summary_df['Attendance'].round(1) summary_df['Risk'] = summary_df['Risk'].round(1) summary_df = summary_df.rename(columns={'Attendance': 'Average Attendance', 'Risk': 'Average Risk (%)'}) logger.info(f"Summary statistics generated: {summary_df.to_dict()}") # Create PDF report with enhanced layout and small gap between trade and alerts logger.debug("Generating enhanced PDF report with small gap between trade and alerts") fig = plt.figure(figsize=(12, 20)) gs = fig.add_gridspec(5, 1, height_ratios=[3, 3, 0.2, 1.5, 0.5], hspace=0.8) # Plot bar graph for risk with improved text alignment and spacing ax1 = fig.add_subplot(gs[0]) width = 0.35 # Adjusted width for even spacing unique_dates = forecast_df['Date'].unique() x_positions = range(len(unique_dates)) for idx, trade in enumerate(trades_to_process): trade_forecast = forecast_df[forecast_df['Trade'] == trade] trade_x = [pos + idx * width for pos in x_positions] bars = ax1.bar( trade_x, trade_forecast['Risk'].clip(lower=0), width, label=trade, color=colors[idx % len(colors)] ) for bar, x, height in zip(bars, trade_x, trade_forecast['Risk'].clip(lower=0)): # Dynamic vertical offset based on bar height with adjusted padding offset = 5 if abs(height) < 10 else 10 # Adjusted for positive values only ax1.text( x + width / 2, # Center horizontally height + offset, # Dynamic vertical offset f'{height:.1f}%', ha='center', va='bottom', # Align text appropriately for positive values fontsize=8 ) ax1.set_title(f'Shortage Risk Bar Graph ({date_range_str}, Risk in %)', fontsize=14, pad=10) ax1.set_xlabel('Date', fontsize=12) ax1.set_ylabel('Risk (%)', fontsize=12) ax1.set_xticks([pos + (len(trades_to_process) - 1) * width / 2 for pos in x_positions]) ax1.set_xticklabels([date.strftime('%Y-%m-%d') for date in unique_dates], rotation=45, ha='right', fontsize=8) ax1.set_yticks(range(0, 125, 25)) # Balanced y-axis scale with 25% increments starting from 0 ax1.grid(True, axis='y', linestyle='--', alpha=0.7) ax1.legend(title="Trades", fontsize=10, loc='upper right', bbox_to_anchor=(1.15, 1)) # Moved legend outside ax1.set_ylim(0, 100) # Cap y-axis to positive values only ax1.tick_params(axis='x', pad=10) # Increase padding to prevent overlap # Plot line graph for attendance ax2 = fig.add_subplot(gs[1]) for idx, trade in enumerate(trades_to_process): trade_forecast = forecast_df[forecast_df['Trade'] == trade] ax2.plot( trade_forecast['Date'], trade_forecast['Attendance'].clip(lower=0), label=trade, color=colors[idx % len(colors)], marker='o', markersize=8, linewidth=2 ) for alert_type in ['Warning', 'Critical']: alert_data = trade_forecast[trade_forecast['Alert'] == alert_type] if not alert_data.empty: ax2.scatter( alert_data['Date'], alert_data['Attendance'].clip(lower=0), label=f"{trade} {alert_type}", color=alert_colors[alert_type], marker='D', s=100, edgecolors='black', linewidths=1.5 ) ax2.set_title(f'Attendance Forecast Line Graph ({date_range_str})', fontsize=14, pad=10) ax2.set_xlabel('Date', fontsize=12) ax2.set_ylabel('Attendance', fontsize=12) ax2.grid(True, linestyle='--', alpha=0.7) ax2.legend(title="Trades & Alerts", fontsize=10, loc='upper left', bbox_to_anchor=(0, -0.1), ncol=2) ax2.tick_params(axis='x', rotation=45, labelsize=8) ax2.set_ylim(bottom=0) # Ensure y-axis starts at 0 # Empty spacer subplot for small gap ax_spacer = fig.add_subplot(gs[2]) ax_spacer.axis('off') # Add summary table to the plot ax3 = fig.add_subplot(gs[3]) ax3.axis('off') table_data = [summary_df.columns.tolist()] + summary_df.values.tolist() table = ax3.table(cellText=table_data, cellLoc='center', loc='center', colWidths=[0.3, 0.35, 0.35]) table.set_fontsize(12) # Set font size manually table.scale(1.2, 1.2) # Keep the scaling ax3.set_title('Summary Statistics (Risk in %)', fontsize=14, pad=10) # Add legend for alerts ax4 = fig.add_subplot(gs[4]) ax4.axis('off') legend_elements = [ mpatches.Patch(color='red', label='Critical Alert'), mpatches.Patch(color='yellow', label='Warning Alert'), mpatches.Patch(color='blue', label='Normal') ] ax4.legend(handles=legend_elements, loc='center', ncol=3, title='Alert Types', fontsize=12, title_fontsize=12) plt.subplots_adjust(top=0.9, bottom=0.1, left=0.1, right=1.2, hspace=0.6) # Adjusted right margin for legend buf = io.BytesIO() plt.savefig(buf, format='pdf', dpi=300, bbox_inches='tight') plt.close() buf.seek(0) pdf_path = f"forecast_report_{'all_trades' if trade_selection == 'All' else trade_selection}_{datetime.now().strftime('%Y%m%d_%H%M%S')}.pdf" with open(pdf_path, 'wb') as f: f.write(buf.read()) logger.info(f"Enhanced PDF report saved: {pdf_path}") # Alerts table with color-coded column and date only logger.debug("Generating alerts table") alerts_df = forecast_df[forecast_df['Alert'].isin(['Warning', 'Critical'])][['Date', 'Trade', 'Attendance', 'Risk', 'Alert']].copy() alerts_df['Risk'] = alerts_df['Risk'].apply(lambda x: f"{x}%") alerts_df['Alert Color'] = alerts_df['Alert'].apply(lambda x: 'Red' if x == 'Critical' else 'Yellow') alerts_df['Date'] = alerts_df['Date'].dt.strftime('%Y-%m-%d') logger.info(f"Alerts generated with date only: {len(alerts_df)} alerts") # Status message status_message = f"Forecast generated successfully for {len(trades_to_process)} trade{'s' if len(trades_to_process) != 1 else ''} (Risk values are in percentages, Date: {date_range_str}, Visualization: Bar Graph (Risk), Line Graph (Attendance))" return ( status_message, risk_bar_fig, line_fig, alerts_df, summary_df, pdf_path ) except Exception as e: logger.error(f"Processing failed: {str(e)}", exc_info=True) return f"Error: {str(e)}", None, None, None, None, None def notify_contractor(): global latest_forecast_df logger.info("Notify contractor button clicked") if latest_forecast_df is None: message = "No forecast data available to generate notifications." timestamp = datetime.now().strftime("%Y-%m-%d %I:%M:%S %p") notification_entry = f"**{timestamp}**: {message}" notification_history.append(notification_entry) history_display = "## Notification History\n" + "\n".join([f"- {entry}" for entry in notification_history]) logger.warning("No forecast data for notifications") return message, history_display # Calculate average risk per trade risk_summary = latest_forecast_df.groupby('Trade')['Risk'].mean().round(1).to_dict() # Generate notifications for each trade timestamp = datetime.now().strftime("%Y-%m-%d %I:%M:%S %p") notifications = [] for trade, avg_risk in risk_summary.items(): if avg_risk < 50: risk_message = "Monitor, shortage risk is low" elif 50 <= avg_risk <= 75: risk_message = "Caution, shortage risk is moderate" else: risk_message = "Alert, shortage risk is high" message = f"Notice for {trade}: {risk_message} at {avg_risk}%." notifications.append(message) notification_entry = f"**{timestamp}**: {message}" notification_history.append(notification_entry) logger.info(f"Notification generated for {trade}: {message}") combined_message = "\n".join(notifications) if notifications else "No trades to notify." history_display = "## Notification History\n" + "\n".join([f"- {entry}" for entry in notification_history]) return combined_message, history_display def update_trades(file): try: if file: df = pd.read_csv(file.name, encoding='utf-8') trades = df['Trade'].unique().tolist() trades.insert(0, "All") logger.info(f"Trades updated from CSV with 'All' option: {trades}") return gr.update(choices=trades) logger.warning("No file provided for trade update") return gr.update(choices=["All"]) except UnicodeDecodeError: logger.debug("UTF-8 encoding failed in update_trades, trying utf-8-sig") df = pd.read_csv(file.name, encoding='utf-8-sig') trades = df['Trade'].unique().tolist() trades.insert(0, "All") logger.info(f"Trades updated from CSV with 'All' option: {trades}") return gr.update(choices=trades) except Exception as e: logger.error(f"Failed to update trades: {str(e)}", exc_info=True) return gr.update(choices=["All"]) with gr.Blocks() as demo: gr.Markdown("# Labour Attendance Forecasting App") gr.Markdown("Upload a CSV with columns: Trade, Date, Attendance, Weather") with gr.Row(): file_input = gr.File(label="Upload CSV") trade_dropdown = gr.Dropdown(label="Select Trade", choices=["All"]) with gr.Row(): filter_date = gr.DateTime(label="Site Calendar Date", value=None) with gr.Row(): submit_button = gr.Button("Generate Forecast") notification_display = gr.Markdown("## Notification History\n*No notifications yet.*", label="Notifications") output_text = gr.Textbox(label="Status") heatmap_output = gr.Plot(label="Shortage Risk Bar Graph (Risk in %)") bar_output = gr.Plot(label="Attendance Forecast Line Graph") alerts_output = gr.DataFrame(label="Alerts") summary_output = gr.DataFrame(label="Summary Statistics (Average Attendance and Risk in %)") notify_button = gr.Button("Notify Contractor") notify_output = gr.Textbox(label="Notification Status") download_button = gr.File(label="Download PDF Report") file_input.change(update_trades, inputs=file_input, outputs=trade_dropdown) submit_button.click( process_data, inputs=[file_input, trade_dropdown, filter_date], outputs=[output_text, heatmap_output, bar_output, alerts_output, summary_output, download_button] ) notify_button.click( notify_contractor, outputs=[notify_output, notification_display] ) try: logger.info("Launching Gradio app") demo.launch() except Exception as e: logger.error(f"Failed to launch Gradio app: {str(e)}", exc_info=True) raise