Energy_Forecast / app.py
LeemahLee's picture
Removing the clock
c6a9326 verified
import os
import time
import pandas as pd
import numpy as np
import joblib
import glob
import requests
import streamlit as st
from streamlit_autorefresh import st_autorefresh
import plotly.express as px
import plotly.graph_objects as go
from datetime import datetime
from pytz import timezone
from sklearn.metrics import mean_squared_error, mean_absolute_error
from scipy.stats.mstats import winsorize
# === Streamlit Setup ===
st.set_page_config(page_title="Energy Forecast Dashboard", layout="wide", page_icon="⚡")
st_autorefresh(interval=5000, key="auto_refresh")
st.title(":zap: Real-Time Power Forecast")
st.markdown("Smarter Energy Monitoring for Smarter Grids")
# === Load Model ===
@st.cache_resource
def load_model():
return joblib.load("lightgbm_energy_model_full2.pkl")
model = load_model()
# === Supabase Config ===
SUPABASE_URL = os.environ.get("SUPABASE_URL")
SUPABASE_API_KEY = os.environ.get("SUPABASE_API_KEY")
TABLE = "smart_meter_readings_1year"
HEADERS = {
"apikey": SUPABASE_API_KEY,
"Authorization": f"Bearer {SUPABASE_API_KEY}"
}
# === Pull Latest Data (Today Only) ===
@st.cache_data(ttl=300)
def fetch_today_data():
today = pd.Timestamp.now(tz=timezone("Europe/London")).strftime("%Y-%m-%d")
url = f"{SUPABASE_URL}/rest/v1/{TABLE}?select=*&timestamp=gte.{today}T00:00:00&timestamp=lt.{today}T23:59:59&order=timestamp.asc"
r = requests.get(url, headers=HEADERS)
if r.ok:
df = pd.DataFrame(r.json())
df['timestamp'] = pd.to_datetime(df['timestamp'])
return df
else:
st.error(f"Failed to fetch data: {r.status_code}")
return pd.DataFrame()
df = fetch_today_data()
df = df.dropna(subset=['region', 'property_type', 'power_consumption_kwh'])
# === Feature Engineering ===
def prepare_features(df):
if df.empty:
return df
df = df.copy()
df['hour'] = df['timestamp'].dt.hour
df['minute_of_day'] = df['hour'] * 60 + df['timestamp'].dt.minute
df['day_of_week'] = df['timestamp'].dt.dayofweek
df['is_weekend'] = (df['day_of_week'] >= 5).astype(int)
df['sin_hour'] = np.sin(2 * np.pi * df['hour'] / 24)
df['cos_hour'] = np.cos(2 * np.pi * df['hour'] / 24)
df['sin_30min'] = np.sin(2 * np.pi * df['minute_of_day'] / 1440)
df['cos_30min'] = np.cos(2 * np.pi * df['minute_of_day'] / 1440)
df['lag_30min'] = df['power_consumption_kwh'].shift(1)
df['lag_1hr'] = df['power_consumption_kwh'].shift(2)
df['lag_90min'] = df['power_consumption_kwh'].shift(3)
df['lag_2hr'] = df['power_consumption_kwh'].shift(4)
df['lag_mean_3'] = df['power_consumption_kwh'].shift(1).rolling(2).mean()
df['lag_mean_6'] = df['power_consumption_kwh'].shift(1).rolling(4).mean()
df['rolling_std_6'] = df['power_consumption_kwh'].rolling(6).std()
df['interaction_lag_voltage'] = df['lag_1hr'] * df['voltage']
df = pd.get_dummies(df, columns=['property_type', 'region'], drop_first=False)
for col in ['property_type_residential', 'property_type_commercial',
'region_north', 'region_south', 'region_east', 'region_west']:
if col not in df.columns:
df[col] = 0
df.fillna(method='ffill', inplace=True)
df.dropna(inplace=True)
expected = [
'voltage', 'cos_hour', 'sin_hour', 'is_weekend', 'minute_of_day', 'cos_30min',
'humidity_pct', 'lag_90min', 'temperature_c', 'lag_2hr', 'lag_mean_6', 'lag_mean_3',
'lag_1hr', 'rolling_std_6', 'lag_30min', 'sin_30min', 'interaction_lag_voltage',
'day_of_week', 'property_type_commercial', 'property_type_residential',
'region_north', 'region_south', 'region_east', 'region_west'
]
return df[['timestamp'] + expected]
# === Predict ===
df_features = prepare_features(df)
feature_cols = [col for col in df_features.columns if col != 'timestamp']
expected_features = feature_cols # Save correct order of features
df_features['predicted_kwh'] = model.predict(df_features[feature_cols])
# === Helper Function for Future Forecasting ===
def forecast_future_usage(last_row, model, steps=[30, 60, 90]):
preds = {}
forecast_times = []
prev_preds = [
last_row['lag_30min'],
last_row['lag_1hr'],
last_row['lag_90min']
]
for step in steps:
future_time = last_row["timestamp"] + pd.Timedelta(minutes=step)
row = last_row.copy()
row["timestamp"] = future_time
row["hour"] = future_time.hour
row["minute_of_day"] = row["hour"] * 60 + future_time.minute
row["day_of_week"] = future_time.dayofweek
row["is_weekend"] = int(future_time.dayofweek >= 5)
row["sin_hour"] = np.sin(2 * np.pi * row["hour"] / 24)
row["cos_hour"] = np.cos(2 * np.pi * row["hour"] / 24)
row["sin_30min"] = np.sin(2 * np.pi * row["minute_of_day"] / 1440)
row["cos_30min"] = np.cos(2 * np.pi * row["minute_of_day"] / 1440)
row["lag_30min"] = prev_preds[-1]
row["lag_1hr"] = prev_preds[-2] if len(prev_preds) >= 2 else prev_preds[-1]
row["lag_90min"] = prev_preds[-3] if len(prev_preds) >= 3 else prev_preds[-1]
row["lag_mean_3"] = np.mean(prev_preds[-3:])
row["lag_mean_6"] = np.mean(prev_preds[-3:] + [last_row["lag_mean_3"], last_row["lag_mean_6"]])
row_features = row[expected_features].values.reshape(1, -1)
# row_features = row[expected].values.reshape(1, -1)
pred = model.predict(row_features)[0]
preds[f"{step} min"] = pred
prev_preds.append(pred)
return preds, forecast_times
# === Tabs ===
tabs = st.tabs(["🔮 Regional Forecast", "📊 Performance Monitor", "📈 Usage Pattern", "🚗🌞 Electric Vehicle & Solar"])
# === Regional Forecast Tab ==
with tabs[0]:
st.subheader("Regional Forecast for Next Hours")
# Region filter with bold styling
st.markdown("<h4 style='margin-bottom: 0;'>📍 <b>Select a Region</b></h4>", unsafe_allow_html=True)
selected_region = st.selectbox(" ", ['south', 'east', 'north', 'west'])
for ptype in ['residential', 'commercial']:
temp_df = df_features.copy()
region_col = f'region_{selected_region}'
ptype_col = f'property_type_{ptype}'
st.markdown(f"#### {selected_region.capitalize()} - {ptype.capitalize()}")
if region_col in temp_df.columns and ptype_col in temp_df.columns:
temp_df = temp_df[(temp_df[region_col] == 1) & (temp_df[ptype_col] == 1)]
else:
st.warning(f"No data available for Region: {selected_region} and Property: {ptype}")
continue
if temp_df.empty or temp_df.shape[0] < 2:
st.info(f"Not enough data for {ptype} properties in {selected_region} region")
continue
# Sort and get latest row
temp_df = temp_df.sort_values("timestamp")
# latest_row = temp_df.iloc[-1].copy()
latest_row = temp_df.iloc[-1].copy()
# Make latest_row['timestamp'] timezone-aware if needed
if latest_row['timestamp'].tzinfo is None:
latest_row['timestamp'] = latest_row['timestamp'].tz_localize("Europe/London")
now = pd.Timestamp.now(tz="Europe/London")
# Skip forecast if data is too old
if (now - latest_row['timestamp']) > pd.Timedelta(hours=1):
st.warning(f"Last data point for {ptype} in {selected_region} is too old (at {latest_row['timestamp']:%H:%M}). Skipping forecast.")
continue
# Forecast future with both predictions and times
# preds = {}
# forecast_times = []
# current_time = latest_row['timestamp']
# features = latest_row.drop(labels=['timestamp', 'predicted_kwh'])
# for i, minutes_ahead in enumerate([30, 60, 90, 120]):
# forecast_time = (current_time + pd.Timedelta(minutes=minutes_ahead)).strftime('%H:%M')
# prediction = model.predict([features])[0]
# preds[f"{minutes_ahead} min"] = prediction
# forecast_times.append(forecast_time)
# # Display metrics with time below each forecast
# cols = st.columns(len(preds))
# for i, (label, value) in enumerate(preds.items()):
# with cols[i]:
# st.metric(label=label, value=f"{value:.2f} kWh")
# st.markdown(
# f"""
# <div style='text-align:center; font-size:15px;'>
# <span style='color:#FF0000; font-weight:bold;'>⏰ {forecast_times[i]}</span>
# </div>
# """,
# unsafe_allow_html=True
# )
preds = {}
features = latest_row.drop(labels=['timestamp', 'predicted_kwh'])
for minutes_ahead in [30, 60, 90, 120]:
prediction = model.predict([features])[0]
preds[f"{minutes_ahead} min"] = prediction
# Display metrics only (no clock/timestamp)
cols = st.columns(len(preds))
for i, (label, value) in enumerate(preds.items()):
with cols[i]:
st.metric(label=label, value=f"{value:.2f} kWh")
# Line chart of recent trend
fig = px.line(temp_df, x='timestamp', y='predicted_kwh',
title=f"{selected_region.capitalize()} {ptype.capitalize()} Forecast")
st.plotly_chart(fig, use_container_width=True)
# === Performance Monitor ===
with tabs[1]:
st.subheader("Prediction Performance Monitor")
df_eval = df_features.copy()
df_eval = df_eval.merge(df[['timestamp', 'power_consumption_kwh']], on='timestamp', how='left')
df_eval.rename(columns={'power_consumption_kwh': 'actual'}, inplace=True)
# === Error Metrics ===
mse = mean_squared_error(df_eval['actual'], df_eval['predicted_kwh'])
rmse = np.sqrt(mse)
mae = mean_absolute_error(df_eval['actual'], df_eval['predicted_kwh'])
st.metric("RMSE", f"{rmse:.3f} kWh")
st.metric("MAE", f"{mae:.3f} kWh")
# === Hourly RMSE Chart ===
# Ensure 'hour' is present
df_eval['hour'] = df_eval['timestamp'].dt.hour
hourly_rmse = df_eval.groupby('hour').apply(
lambda x: np.sqrt(mean_squared_error(x['actual'], x['predicted_kwh']))
).reset_index(name='rmse')
fig = px.bar(hourly_rmse, x='hour', y='rmse', title='Hourly RMSE')
st.plotly_chart(fig, use_container_width=True)
# === Latency Placeholders ===
st.metric("Prediction Latency", "89 ms")
st.metric("API Fetch Latency", "170 ms")
# === ⚠ Drift Detection
if rmse > 0.8:
st.warning("⚠️ Model performance has drifted: RMSE above 0.8 kWh!")
else:
st.success("✅ Model performance is within expected range.")
missing_info = df[df[['region', 'property_type']].isnull().any(axis=1)]
if not missing_info.empty:
st.warning(f"{len(missing_info)} rows had missing region or property_type and were excluded from prediction.")
# === Compute Error and Flag Anomalies ===
df_eval = df_features.copy()
df_eval = df_eval.merge(df[['timestamp', 'power_consumption_kwh']], on='timestamp', how='left')
df_eval.rename(columns={'power_consumption_kwh': 'actual'}, inplace=True)
# Compute error
df_eval['error'] = np.abs(df_eval['actual'] - df_eval['predicted_kwh'])
# Flag anomalies
threshold = df_eval['error'].quantile(0.99)
df_eval['is_anomaly'] = df_eval['error'] > threshold
# Plot anomalies
fig = px.scatter(
df_eval,
x='timestamp',
y='error',
color='is_anomaly',
title="Prediction Errors and Anomalies",
color_discrete_map={True: 'red', False: 'green'},
labels={"error": "Absolute Error (kWh)"})
st.plotly_chart(fig, use_container_width=True)
# === Usage Pattern ===
with tabs[2]:
st.subheader(" Usage Pattern")
# Filter today's data only
df_today_eval = df_features.merge(df[['timestamp', 'power_consumption_kwh']], on='timestamp', how='left')
df_today_eval.rename(columns={'power_consumption_kwh': 'actual'}, inplace=True)
df_today_eval['hour'] = df_today_eval['timestamp'].dt.strftime("%H:%M")
# Rolling 30-min average (optional smoothing)
df_today_eval['actual_rolling'] = df_today_eval['actual'].rolling(2).mean()
df_today_eval['predicted_rolling'] = df_today_eval['predicted_kwh'].rolling(2).mean()
# Plotting hourly line chart
fig_hourly = go.Figure()
fig_hourly.add_trace(go.Scatter(
x=df_today_eval['timestamp'], y=df_today_eval['actual_rolling'],
mode='lines',
name='Actual',
line=dict(color='indigo', width=2)
))
fig_hourly.add_trace(go.Scatter(
x=df_today_eval['timestamp'], y=df_today_eval['predicted_rolling'],
mode='lines',
name='Predicted',
line=dict(color='crimson', width=2,)
))
fig_hourly.update_layout(
title="⏱️ Hourly Trend for Today: Predicted vs Actual",
xaxis_title="Time",
yaxis_title="kWh",
xaxis=dict(tickformat="%H:%M", showgrid=True),
legend_title="Legend",
template="plotly_white"
)
st.plotly_chart(fig_hourly, use_container_width=True)
# === Hourly Usage by Region ===
if not df.empty:
df['hour'] = df['timestamp'].dt.hour
if 'region' in df.columns:
avg_by_region = df.groupby(['hour', 'region'])['power_consumption_kwh'].mean().reset_index()
fig = px.line(
avg_by_region, x='hour', y='power_consumption_kwh',
color='region', title="Average Hourly Usage by Region", markers=True
)
st.plotly_chart(fig, use_container_width=True)
else:
st.warning("Region information not available in raw data.")
# === Daily Comparison Logging ===
def update_daily_comparison_log(df_features, df_raw):
today = df_features['timestamp'].dt.date.iloc[0]
predicted_total = df_features['predicted_kwh'].sum()
actual_total = df_raw['power_consumption_kwh'].sum()
daily_df = pd.DataFrame([{
'date': today,
'predicted_kwh': predicted_total,
'actual_kwh': actual_total
}])
if os.path.exists("daily_comparison_log.csv"):
existing = pd.read_csv("daily_comparison_log.csv")
existing['date'] = pd.to_datetime(existing['date']).dt.date
if today not in existing['date'].values:
updated = pd.concat([existing, daily_df], ignore_index=True)
updated.to_csv("daily_comparison_log.csv", index=False)
else:
daily_df.to_csv("daily_comparison_log.csv", index=False)
update_daily_comparison_log(df_features, df)
# === 1. Log timestamp-level data for today ===
log_dir = "logs"
os.makedirs(log_dir, exist_ok=True)
today_str = pd.Timestamp.now(tz=timezone("Europe/London")).strftime("%Y-%m-%d")
log_path = os.path.join(log_dir, f"{today_str}.csv")
df_eval = df_features.merge(df[['timestamp', 'power_consumption_kwh']], on='timestamp', how='left')
df_eval.rename(columns={'power_consumption_kwh': 'actual'}, inplace=True)
df_log = df_eval[['timestamp', 'actual', 'predicted_kwh']].copy()
df_log['date'] = df_log['timestamp'].dt.date
df_log['hour'] = df_log['timestamp'].dt.strftime("%H:%M")
df_log.to_csv(log_path, index=False)
# === 2. Read all logs and concatenate ===
log_files = sorted(glob.glob(f"{log_dir}/*.csv"))
full_df = pd.concat([pd.read_csv(f) for f in log_files], ignore_index=True)
# === 3. Clean up and format ===
full_df['timestamp'] = pd.to_datetime(full_df['timestamp'])
full_df['hour'] = pd.to_datetime(full_df['hour'], format="%H:%M").dt.time
full_df['date'] = pd.to_datetime(full_df['date'])
# === 4. Plot all days together ===
fig = go.Figure()
for date in full_df['date'].dt.date.unique():
sub = full_df[full_df['date'].dt.date == date]
fig.add_trace(go.Scatter(
x=sub['timestamp'], y=sub['actual'],
mode='lines',
name=f"{date} - Actual",
line=dict(color='#008080', width=2)
))
fig.add_trace(go.Scatter(
x=sub['timestamp'], y=sub['predicted_kwh'],
mode='lines',
name=f"{date} - Predicted",
line=dict(color='#8B0000', width=2)
))
fig.update_layout(
title="📈 Hourly Trend: Actual vs Predicted Energy Consumption",
xaxis_title="Timestamp",
yaxis_title="kWh",
legend_title="Legend",
xaxis=dict(tickformat="%H:%M", showgrid=True),
template="plotly_white"
)
st.plotly_chart(fig, use_container_width=True)
missing_info = df[df[['region', 'property_type']].isnull().any(axis=1)]
if not missing_info.empty:
st.warning(f"{len(missing_info)} rows had missing region or property_type and were excluded from prediction.")
# === EV & Solar Insight ===
with tabs[3]:
st.subheader("EV & Solar Insights")
ev_df = df[df['ev_owner'] == 1]
solar_df = df[df['solar_installed'] == 1]
ev_avg = ev_df['power_consumption_kwh'].mean()
solar_avg = solar_df['power_consumption_kwh'].mean()
ev_peak_hour = ev_df['timestamp'].dt.hour.value_counts().idxmax() if not ev_df.empty else "N/A"
solar_peak_hour = solar_df['timestamp'].dt.hour.value_counts().idxmax() if not solar_df.empty else "N/A"
col1, col2 = st.columns(2)
with col1:
st.metric("EV Household Avg Usage (kWh)", f"{ev_avg:.2f}")
st.markdown(f"🚗 Peak Usage Hour: {ev_peak_hour}:00")
if ev_avg > df['power_consumption_kwh'].mean():
st.warning("⚠ Electric Vehicle consume above average energy!")
with col2:
st.metric("Solar Household Avg Usage (kWh)", f"{solar_avg:.2f}")
st.markdown(f"🌞 Peak Usage Hour: {solar_peak_hour}:00")
if solar_avg > df['power_consumption_kwh'].mean():
st.warning("⚠ Solar homes also show high consumption.")
st.markdown("### 🔍 Distribution Analysis")
col3, col4 = st.columns(2)
with col3:
fig_ev = px.box(ev_df, y='power_consumption_kwh', points="outliers",
title="EV Owners: Power Usage Box Plot")
st.plotly_chart(fig_ev, use_container_width=True)
with col4:
fig_solar = px.box(solar_df, y='power_consumption_kwh', points="outliers",
title="Solar Homes: Power Usage Box Plot")
st.plotly_chart(fig_solar, use_container_width=True)
st.markdown("### 📈 Trend Over Time")
# Filter only rows where either EV or Solar is installed
df_combo = df[df['ev_owner'] + df['solar_installed'] > 0].copy()
# Create label per row
def get_label(row):
if row['ev_owner'] and row['solar_installed']:
return "EV + Solar"
elif row['ev_owner']:
return "EV Only"
elif row['solar_installed']:
return "Solar Only"
return "Other"
df_combo['label'] = df_combo.apply(get_label, axis=1)
# Plot
fig_trend = px.line(
df_combo,
x='timestamp',
y='power_consumption_kwh',
color='label',
title="Energy Usage Trend: EV & Solar Homes"
)
st.plotly_chart(fig_trend, use_container_width=True)