Spaces:
No application file
No application file
| # ========================================== | |
| # GOOGLE COLAB DEPLOYMENT SCRIPT | |
| # MODEL: XGBoost Live Energy Forecasting | |
| # ========================================== | |
| # STEP 1: OPEN GOOGLE COLAB (https://colab.research.google.com/) | |
| # STEP 2: CREATE A NEW NOTEBOOK | |
| # STEP 3: RUN THE FOLLOWING COMMAND IN THE FIRST CELL TO INSTALL DEPENDENCIES: | |
| # !pip install gridstatus xgboost pandas matplotlib scikit-learn | |
| # STEP 4: COPY AND PASTE THIS ENTIRE SCRIPT INTO THE NEXT CELL AND RUN IT | |
| import pandas as pd | |
| import numpy as np | |
| import gridstatus | |
| import matplotlib.pyplot as plt | |
| from xgboost import XGBRegressor | |
| from sklearn.metrics import r2_score, mean_absolute_error | |
| import warnings | |
| warnings.filterwarnings('ignore') | |
| print("1. Fetching LIVE California Grid Data via API...") | |
| iso = gridstatus.CAISO() | |
| end_date = pd.Timestamp.now(tz='US/Pacific') | |
| start_date = end_date - pd.Timedelta(days=21) | |
| raw_df = iso.get_load(start=start_date.date(), end=end_date.date() + pd.Timedelta(days=1)) | |
| if 'Time' in raw_df.columns: | |
| raw_df.set_index('Time', inplace=True) | |
| elif 'Interval Start' in raw_df.columns: | |
| raw_df.set_index('Interval Start', inplace=True) | |
| df = pd.DataFrame() | |
| df['Load'] = raw_df['Load'] | |
| df_hourly = df.resample('H').mean().dropna() | |
| print("2. Engineering Chronological Features...") | |
| df_hourly['Hour'] = df_hourly.index.hour | |
| df_hourly['DayOfWeek'] = df_hourly.index.dayofweek | |
| df_hourly['Lag_24h'] = df_hourly['Load'].shift(24) | |
| df_hourly['Lag_168h'] = df_hourly['Load'].shift(168) | |
| df_hourly.dropna(inplace=True) | |
| print("3. Training XGBoost Regressor Model...") | |
| features = ['Hour', 'DayOfWeek', 'Lag_24h', 'Lag_168h'] | |
| target = 'Load' | |
| # 80% Train, 20% Test Split chronologically | |
| split = int(len(df_hourly) * 0.8) | |
| train, test = df_hourly.iloc[:split], df_hourly.iloc[split:] | |
| X_train, y_train = train[features], train[target] | |
| X_test, y_test = test[features], test[target] | |
| model = XGBRegressor(n_estimators=150, learning_rate=0.08, max_depth=6, random_state=42) | |
| model.fit(X_train, y_train) | |
| print("4. Generating Forecast Predictions...") | |
| predictions = model.predict(X_test) | |
| test['Predicted_Load'] = predictions | |
| r2 = r2_score(y_test, predictions) | |
| mae = mean_absolute_error(y_test, predictions) | |
| print(f"\n✅ Accuracy metrics against Live unseen data: R2: {r2:.4f} | Absolute Error: {mae:.1f} MW\n") | |
| print("5. Plotting Interactive Results...") | |
| plt.figure(figsize=(16, 6)) | |
| plt.plot(train.index[-72:], train['Load'].tail(72), label='Training Data (Truth)', color='grey', alpha=0.5) | |
| plt.plot(test.index, test['Load'], label='Actual Live Load (Test Set)', color='blue') | |
| plt.plot(test.index, test['Predicted_Load'], label='AI XGBoost Forecast', color='orange', linestyle='--') | |
| plt.title('Live California Grid (CAISO) - XGBoost AI Forecasting', fontsize=16) | |
| plt.ylabel('Energy Consumption (Megawatts)') | |
| plt.xlabel('Date & Time') | |
| plt.legend() | |
| plt.grid(True, alpha=0.3) | |
| plt.tight_layout() | |
| plt.show() | |
| print("Model Execution Complete! Scroll up to see the plotted chart.") | |