import pandas as pd from prophet import Prophet import json def train_and_forecast(data): try: # Convert input data to DataFrame if not data: return {"error": "Input data is empty"} df = pd.DataFrame(data) # Validate required columns required_cols = ['Date', 'Attendance'] if not all(col in df.columns for col in required_cols): return {"error": "Input must contain 'Date' and 'Attendance' columns"} # Validate and process Date and Attendance df['Date'] = pd.to_datetime(df['Date'], errors='coerce') if df['Date'].isna().any(): return {"error": "Invalid 'Date' format. Use YYYY-MM-DD"} df['Attendance'] = pd.to_numeric(df['Attendance'], errors='coerce') if df['Attendance'].isna().any(): return {"error": "'Attendance' must be numeric"} # Check for sufficient data if len(df) < 2: return {"error": "At least 2 data points are required for forecasting"} # Preserve Risk and Alert for historical dates df_risk_alert = None if 'Risk' in df.columns and 'Alert' in df.columns: df_risk_alert = df[['Date', 'Risk', 'Alert']].drop_duplicates(subset='Date') # Rename columns for Prophet df_prophet = df.rename(columns={'Date': 'ds', 'Attendance': 'y'}) # Initialize and train Prophet model model = Prophet( yearly_seasonality=True, weekly_seasonality=True, daily_seasonality=True ) model.fit(df_prophet) # Create future dates (30 days) future = model.make_future_dataframe(periods=30, freq='D') forecast = model.predict(future) # Select relevant columns and rename back forecast = forecast[['ds', 'yhat']].rename(columns={'ds': 'Date', 'yhat': 'Attendance'}) forecast['Date'] = forecast['Date'].astype(str) # Estimate Risk and Alert for all dates avg_attendance = df['Attendance'].mean() def estimate_risk(attendance): if attendance >= avg_attendance * 0.8: return round(10.0 + (avg_attendance - attendance) * 2, 1) elif attendance >= avg_attendance * 0.5: return round(15.0 + (avg_attendance - attendance) * 3, 1) else: return round(20.0 + (avg_attendance - attendance) * 4, 1) def estimate_alert(attendance): if attendance >= avg_attendance * 0.8: return "Low" elif attendance >= avg_attendance * 0.5: return "Medium" else: return "High" forecast['Risk'] = forecast['Attendance'].apply(estimate_risk) forecast['Alert'] = forecast['Attendance'].apply(estimate_alert) # Merge historical Risk/Alert if df_risk_alert is not None: df_risk_alert['Date'] = df_risk_alert['Date'].astype(str) forecast = forecast.merge(df_risk_alert, on='Date', how='left', suffixes=('', '_hist')) forecast['Risk'] = forecast['Risk_hist'].combine_first(forecast['Risk']) forecast['Alert'] = forecast['Alert_hist'].combine_first(forecast['Alert']) forecast = forecast.drop(columns=['Risk_hist', 'Alert_hist']) # Round numeric values forecast['Attendance'] = forecast['Attendance'].round(1) # Output relevant columns forecast = forecast[['Date', 'Attendance', 'Risk', 'Alert']] return forecast.to_dict('records') except Exception as e: return {"error": f"Forecasting failed: {str(e)}"}