amd-eps / xgboost_forecasting.py
Shanthemach's picture
Upload 6 files
4add4a8 verified
# ==========================================
# GOOGLE COLAB DEPLOYMENT SCRIPT
# MODEL: XGBoost Live Energy Forecasting
# ==========================================
# STEP 1: OPEN GOOGLE COLAB (https://colab.research.google.com/)
# STEP 2: CREATE A NEW NOTEBOOK
# STEP 3: RUN THE FOLLOWING COMMAND IN THE FIRST CELL TO INSTALL DEPENDENCIES:
# !pip install gridstatus xgboost pandas matplotlib scikit-learn
# STEP 4: COPY AND PASTE THIS ENTIRE SCRIPT INTO THE NEXT CELL AND RUN IT
import pandas as pd
import numpy as np
import gridstatus
import matplotlib.pyplot as plt
from xgboost import XGBRegressor
from sklearn.metrics import r2_score, mean_absolute_error
import warnings
warnings.filterwarnings('ignore')
print("1. Fetching LIVE California Grid Data via API...")
iso = gridstatus.CAISO()
end_date = pd.Timestamp.now(tz='US/Pacific')
start_date = end_date - pd.Timedelta(days=21)
raw_df = iso.get_load(start=start_date.date(), end=end_date.date() + pd.Timedelta(days=1))
if 'Time' in raw_df.columns:
raw_df.set_index('Time', inplace=True)
elif 'Interval Start' in raw_df.columns:
raw_df.set_index('Interval Start', inplace=True)
df = pd.DataFrame()
df['Load'] = raw_df['Load']
df_hourly = df.resample('H').mean().dropna()
print("2. Engineering Chronological Features...")
df_hourly['Hour'] = df_hourly.index.hour
df_hourly['DayOfWeek'] = df_hourly.index.dayofweek
df_hourly['Lag_24h'] = df_hourly['Load'].shift(24)
df_hourly['Lag_168h'] = df_hourly['Load'].shift(168)
df_hourly.dropna(inplace=True)
print("3. Training XGBoost Regressor Model...")
features = ['Hour', 'DayOfWeek', 'Lag_24h', 'Lag_168h']
target = 'Load'
# 80% Train, 20% Test Split chronologically
split = int(len(df_hourly) * 0.8)
train, test = df_hourly.iloc[:split], df_hourly.iloc[split:]
X_train, y_train = train[features], train[target]
X_test, y_test = test[features], test[target]
model = XGBRegressor(n_estimators=150, learning_rate=0.08, max_depth=6, random_state=42)
model.fit(X_train, y_train)
print("4. Generating Forecast Predictions...")
predictions = model.predict(X_test)
test['Predicted_Load'] = predictions
r2 = r2_score(y_test, predictions)
mae = mean_absolute_error(y_test, predictions)
print(f"\n✅ Accuracy metrics against Live unseen data: R2: {r2:.4f} | Absolute Error: {mae:.1f} MW\n")
print("5. Plotting Interactive Results...")
plt.figure(figsize=(16, 6))
plt.plot(train.index[-72:], train['Load'].tail(72), label='Training Data (Truth)', color='grey', alpha=0.5)
plt.plot(test.index, test['Load'], label='Actual Live Load (Test Set)', color='blue')
plt.plot(test.index, test['Predicted_Load'], label='AI XGBoost Forecast', color='orange', linestyle='--')
plt.title('Live California Grid (CAISO) - XGBoost AI Forecasting', fontsize=16)
plt.ylabel('Energy Consumption (Megawatts)')
plt.xlabel('Date & Time')
plt.legend()
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()
print("Model Execution Complete! Scroll up to see the plotted chart.")