Spaces:
Sleeping
Sleeping
| import os | |
| import time | |
| import pandas as pd | |
| import numpy as np | |
| import joblib | |
| import glob | |
| import requests | |
| import streamlit as st | |
| from streamlit_autorefresh import st_autorefresh | |
| import plotly.express as px | |
| import plotly.graph_objects as go | |
| from datetime import datetime | |
| from pytz import timezone | |
| from sklearn.metrics import mean_squared_error, mean_absolute_error | |
| from scipy.stats.mstats import winsorize | |
| # === Streamlit Setup === | |
| st.set_page_config(page_title="Energy Forecast Dashboard", layout="wide", page_icon="⚡") | |
| st_autorefresh(interval=5000, key="auto_refresh") | |
| st.title(":zap: Real-Time Power Forecast") | |
| st.markdown("Smarter Energy Monitoring for Smarter Grids") | |
| # === Load Model === | |
| def load_model(): | |
| return joblib.load("lightgbm_energy_model_full2.pkl") | |
| model = load_model() | |
| # === Supabase Config === | |
| SUPABASE_URL = os.environ.get("SUPABASE_URL") | |
| SUPABASE_API_KEY = os.environ.get("SUPABASE_API_KEY") | |
| TABLE = "smart_meter_readings_1year" | |
| HEADERS = { | |
| "apikey": SUPABASE_API_KEY, | |
| "Authorization": f"Bearer {SUPABASE_API_KEY}" | |
| } | |
| # === Pull Latest Data (Today Only) === | |
| def fetch_today_data(): | |
| today = pd.Timestamp.now(tz=timezone("Europe/London")).strftime("%Y-%m-%d") | |
| url = f"{SUPABASE_URL}/rest/v1/{TABLE}?select=*×tamp=gte.{today}T00:00:00×tamp=lt.{today}T23:59:59&order=timestamp.asc" | |
| r = requests.get(url, headers=HEADERS) | |
| if r.ok: | |
| df = pd.DataFrame(r.json()) | |
| df['timestamp'] = pd.to_datetime(df['timestamp']) | |
| return df | |
| else: | |
| st.error(f"Failed to fetch data: {r.status_code}") | |
| return pd.DataFrame() | |
| df = fetch_today_data() | |
| df = df.dropna(subset=['region', 'property_type', 'power_consumption_kwh']) | |
| # === Feature Engineering === | |
| def prepare_features(df): | |
| if df.empty: | |
| return df | |
| df = df.copy() | |
| df['hour'] = df['timestamp'].dt.hour | |
| df['minute_of_day'] = df['hour'] * 60 + df['timestamp'].dt.minute | |
| df['day_of_week'] = df['timestamp'].dt.dayofweek | |
| df['is_weekend'] = (df['day_of_week'] >= 5).astype(int) | |
| df['sin_hour'] = np.sin(2 * np.pi * df['hour'] / 24) | |
| df['cos_hour'] = np.cos(2 * np.pi * df['hour'] / 24) | |
| df['sin_30min'] = np.sin(2 * np.pi * df['minute_of_day'] / 1440) | |
| df['cos_30min'] = np.cos(2 * np.pi * df['minute_of_day'] / 1440) | |
| df['lag_30min'] = df['power_consumption_kwh'].shift(1) | |
| df['lag_1hr'] = df['power_consumption_kwh'].shift(2) | |
| df['lag_90min'] = df['power_consumption_kwh'].shift(3) | |
| df['lag_2hr'] = df['power_consumption_kwh'].shift(4) | |
| df['lag_mean_3'] = df['power_consumption_kwh'].shift(1).rolling(2).mean() | |
| df['lag_mean_6'] = df['power_consumption_kwh'].shift(1).rolling(4).mean() | |
| df['rolling_std_6'] = df['power_consumption_kwh'].rolling(6).std() | |
| df['interaction_lag_voltage'] = df['lag_1hr'] * df['voltage'] | |
| df = pd.get_dummies(df, columns=['property_type', 'region'], drop_first=False) | |
| for col in ['property_type_residential', 'property_type_commercial', | |
| 'region_north', 'region_south', 'region_east', 'region_west']: | |
| if col not in df.columns: | |
| df[col] = 0 | |
| df.fillna(method='ffill', inplace=True) | |
| df.dropna(inplace=True) | |
| expected = [ | |
| 'voltage', 'cos_hour', 'sin_hour', 'is_weekend', 'minute_of_day', 'cos_30min', | |
| 'humidity_pct', 'lag_90min', 'temperature_c', 'lag_2hr', 'lag_mean_6', 'lag_mean_3', | |
| 'lag_1hr', 'rolling_std_6', 'lag_30min', 'sin_30min', 'interaction_lag_voltage', | |
| 'day_of_week', 'property_type_commercial', 'property_type_residential', | |
| 'region_north', 'region_south', 'region_east', 'region_west' | |
| ] | |
| return df[['timestamp'] + expected] | |
| # === Predict === | |
| df_features = prepare_features(df) | |
| feature_cols = [col for col in df_features.columns if col != 'timestamp'] | |
| expected_features = feature_cols # Save correct order of features | |
| df_features['predicted_kwh'] = model.predict(df_features[feature_cols]) | |
| # === Helper Function for Future Forecasting === | |
| def forecast_future_usage(last_row, model, steps=[30, 60, 90]): | |
| preds = {} | |
| forecast_times = [] | |
| prev_preds = [ | |
| last_row['lag_30min'], | |
| last_row['lag_1hr'], | |
| last_row['lag_90min'] | |
| ] | |
| for step in steps: | |
| future_time = last_row["timestamp"] + pd.Timedelta(minutes=step) | |
| row = last_row.copy() | |
| row["timestamp"] = future_time | |
| row["hour"] = future_time.hour | |
| row["minute_of_day"] = row["hour"] * 60 + future_time.minute | |
| row["day_of_week"] = future_time.dayofweek | |
| row["is_weekend"] = int(future_time.dayofweek >= 5) | |
| row["sin_hour"] = np.sin(2 * np.pi * row["hour"] / 24) | |
| row["cos_hour"] = np.cos(2 * np.pi * row["hour"] / 24) | |
| row["sin_30min"] = np.sin(2 * np.pi * row["minute_of_day"] / 1440) | |
| row["cos_30min"] = np.cos(2 * np.pi * row["minute_of_day"] / 1440) | |
| row["lag_30min"] = prev_preds[-1] | |
| row["lag_1hr"] = prev_preds[-2] if len(prev_preds) >= 2 else prev_preds[-1] | |
| row["lag_90min"] = prev_preds[-3] if len(prev_preds) >= 3 else prev_preds[-1] | |
| row["lag_mean_3"] = np.mean(prev_preds[-3:]) | |
| row["lag_mean_6"] = np.mean(prev_preds[-3:] + [last_row["lag_mean_3"], last_row["lag_mean_6"]]) | |
| row_features = row[expected_features].values.reshape(1, -1) | |
| # row_features = row[expected].values.reshape(1, -1) | |
| pred = model.predict(row_features)[0] | |
| preds[f"{step} min"] = pred | |
| prev_preds.append(pred) | |
| return preds, forecast_times | |
| # === Tabs === | |
| tabs = st.tabs(["🔮 Regional Forecast", "📊 Performance Monitor", "📈 Usage Pattern", "🚗🌞 Electric Vehicle & Solar"]) | |
| # === Regional Forecast Tab == | |
| with tabs[0]: | |
| st.subheader("Regional Forecast for Next Hours") | |
| # Region filter with bold styling | |
| st.markdown("<h4 style='margin-bottom: 0;'>📍 <b>Select a Region</b></h4>", unsafe_allow_html=True) | |
| selected_region = st.selectbox(" ", ['south', 'east', 'north', 'west']) | |
| for ptype in ['residential', 'commercial']: | |
| temp_df = df_features.copy() | |
| region_col = f'region_{selected_region}' | |
| ptype_col = f'property_type_{ptype}' | |
| st.markdown(f"#### {selected_region.capitalize()} - {ptype.capitalize()}") | |
| if region_col in temp_df.columns and ptype_col in temp_df.columns: | |
| temp_df = temp_df[(temp_df[region_col] == 1) & (temp_df[ptype_col] == 1)] | |
| else: | |
| st.warning(f"No data available for Region: {selected_region} and Property: {ptype}") | |
| continue | |
| if temp_df.empty or temp_df.shape[0] < 2: | |
| st.info(f"Not enough data for {ptype} properties in {selected_region} region") | |
| continue | |
| # Sort and get latest row | |
| temp_df = temp_df.sort_values("timestamp") | |
| # latest_row = temp_df.iloc[-1].copy() | |
| latest_row = temp_df.iloc[-1].copy() | |
| # Make latest_row['timestamp'] timezone-aware if needed | |
| if latest_row['timestamp'].tzinfo is None: | |
| latest_row['timestamp'] = latest_row['timestamp'].tz_localize("Europe/London") | |
| now = pd.Timestamp.now(tz="Europe/London") | |
| # Skip forecast if data is too old | |
| if (now - latest_row['timestamp']) > pd.Timedelta(hours=1): | |
| st.warning(f"Last data point for {ptype} in {selected_region} is too old (at {latest_row['timestamp']:%H:%M}). Skipping forecast.") | |
| continue | |
| # Forecast future with both predictions and times | |
| # preds = {} | |
| # forecast_times = [] | |
| # current_time = latest_row['timestamp'] | |
| # features = latest_row.drop(labels=['timestamp', 'predicted_kwh']) | |
| # for i, minutes_ahead in enumerate([30, 60, 90, 120]): | |
| # forecast_time = (current_time + pd.Timedelta(minutes=minutes_ahead)).strftime('%H:%M') | |
| # prediction = model.predict([features])[0] | |
| # preds[f"{minutes_ahead} min"] = prediction | |
| # forecast_times.append(forecast_time) | |
| # # Display metrics with time below each forecast | |
| # cols = st.columns(len(preds)) | |
| # for i, (label, value) in enumerate(preds.items()): | |
| # with cols[i]: | |
| # st.metric(label=label, value=f"{value:.2f} kWh") | |
| # st.markdown( | |
| # f""" | |
| # <div style='text-align:center; font-size:15px;'> | |
| # <span style='color:#FF0000; font-weight:bold;'>⏰ {forecast_times[i]}</span> | |
| # </div> | |
| # """, | |
| # unsafe_allow_html=True | |
| # ) | |
| preds = {} | |
| features = latest_row.drop(labels=['timestamp', 'predicted_kwh']) | |
| for minutes_ahead in [30, 60, 90, 120]: | |
| prediction = model.predict([features])[0] | |
| preds[f"{minutes_ahead} min"] = prediction | |
| # Display metrics only (no clock/timestamp) | |
| cols = st.columns(len(preds)) | |
| for i, (label, value) in enumerate(preds.items()): | |
| with cols[i]: | |
| st.metric(label=label, value=f"{value:.2f} kWh") | |
| # Line chart of recent trend | |
| fig = px.line(temp_df, x='timestamp', y='predicted_kwh', | |
| title=f"{selected_region.capitalize()} {ptype.capitalize()} Forecast") | |
| st.plotly_chart(fig, use_container_width=True) | |
| # === Performance Monitor === | |
| with tabs[1]: | |
| st.subheader("Prediction Performance Monitor") | |
| df_eval = df_features.copy() | |
| df_eval = df_eval.merge(df[['timestamp', 'power_consumption_kwh']], on='timestamp', how='left') | |
| df_eval.rename(columns={'power_consumption_kwh': 'actual'}, inplace=True) | |
| # === Error Metrics === | |
| mse = mean_squared_error(df_eval['actual'], df_eval['predicted_kwh']) | |
| rmse = np.sqrt(mse) | |
| mae = mean_absolute_error(df_eval['actual'], df_eval['predicted_kwh']) | |
| st.metric("RMSE", f"{rmse:.3f} kWh") | |
| st.metric("MAE", f"{mae:.3f} kWh") | |
| # === Hourly RMSE Chart === | |
| # Ensure 'hour' is present | |
| df_eval['hour'] = df_eval['timestamp'].dt.hour | |
| hourly_rmse = df_eval.groupby('hour').apply( | |
| lambda x: np.sqrt(mean_squared_error(x['actual'], x['predicted_kwh'])) | |
| ).reset_index(name='rmse') | |
| fig = px.bar(hourly_rmse, x='hour', y='rmse', title='Hourly RMSE') | |
| st.plotly_chart(fig, use_container_width=True) | |
| # === Latency Placeholders === | |
| st.metric("Prediction Latency", "89 ms") | |
| st.metric("API Fetch Latency", "170 ms") | |
| # === ⚠ Drift Detection | |
| if rmse > 0.8: | |
| st.warning("⚠️ Model performance has drifted: RMSE above 0.8 kWh!") | |
| else: | |
| st.success("✅ Model performance is within expected range.") | |
| missing_info = df[df[['region', 'property_type']].isnull().any(axis=1)] | |
| if not missing_info.empty: | |
| st.warning(f"{len(missing_info)} rows had missing region or property_type and were excluded from prediction.") | |
| # === Compute Error and Flag Anomalies === | |
| df_eval = df_features.copy() | |
| df_eval = df_eval.merge(df[['timestamp', 'power_consumption_kwh']], on='timestamp', how='left') | |
| df_eval.rename(columns={'power_consumption_kwh': 'actual'}, inplace=True) | |
| # Compute error | |
| df_eval['error'] = np.abs(df_eval['actual'] - df_eval['predicted_kwh']) | |
| # Flag anomalies | |
| threshold = df_eval['error'].quantile(0.99) | |
| df_eval['is_anomaly'] = df_eval['error'] > threshold | |
| # Plot anomalies | |
| fig = px.scatter( | |
| df_eval, | |
| x='timestamp', | |
| y='error', | |
| color='is_anomaly', | |
| title="Prediction Errors and Anomalies", | |
| color_discrete_map={True: 'red', False: 'green'}, | |
| labels={"error": "Absolute Error (kWh)"}) | |
| st.plotly_chart(fig, use_container_width=True) | |
| # === Usage Pattern === | |
| with tabs[2]: | |
| st.subheader(" Usage Pattern") | |
| # Filter today's data only | |
| df_today_eval = df_features.merge(df[['timestamp', 'power_consumption_kwh']], on='timestamp', how='left') | |
| df_today_eval.rename(columns={'power_consumption_kwh': 'actual'}, inplace=True) | |
| df_today_eval['hour'] = df_today_eval['timestamp'].dt.strftime("%H:%M") | |
| # Rolling 30-min average (optional smoothing) | |
| df_today_eval['actual_rolling'] = df_today_eval['actual'].rolling(2).mean() | |
| df_today_eval['predicted_rolling'] = df_today_eval['predicted_kwh'].rolling(2).mean() | |
| # Plotting hourly line chart | |
| fig_hourly = go.Figure() | |
| fig_hourly.add_trace(go.Scatter( | |
| x=df_today_eval['timestamp'], y=df_today_eval['actual_rolling'], | |
| mode='lines', | |
| name='Actual', | |
| line=dict(color='indigo', width=2) | |
| )) | |
| fig_hourly.add_trace(go.Scatter( | |
| x=df_today_eval['timestamp'], y=df_today_eval['predicted_rolling'], | |
| mode='lines', | |
| name='Predicted', | |
| line=dict(color='crimson', width=2,) | |
| )) | |
| fig_hourly.update_layout( | |
| title="⏱️ Hourly Trend for Today: Predicted vs Actual", | |
| xaxis_title="Time", | |
| yaxis_title="kWh", | |
| xaxis=dict(tickformat="%H:%M", showgrid=True), | |
| legend_title="Legend", | |
| template="plotly_white" | |
| ) | |
| st.plotly_chart(fig_hourly, use_container_width=True) | |
| # === Hourly Usage by Region === | |
| if not df.empty: | |
| df['hour'] = df['timestamp'].dt.hour | |
| if 'region' in df.columns: | |
| avg_by_region = df.groupby(['hour', 'region'])['power_consumption_kwh'].mean().reset_index() | |
| fig = px.line( | |
| avg_by_region, x='hour', y='power_consumption_kwh', | |
| color='region', title="Average Hourly Usage by Region", markers=True | |
| ) | |
| st.plotly_chart(fig, use_container_width=True) | |
| else: | |
| st.warning("Region information not available in raw data.") | |
| # === Daily Comparison Logging === | |
| def update_daily_comparison_log(df_features, df_raw): | |
| today = df_features['timestamp'].dt.date.iloc[0] | |
| predicted_total = df_features['predicted_kwh'].sum() | |
| actual_total = df_raw['power_consumption_kwh'].sum() | |
| daily_df = pd.DataFrame([{ | |
| 'date': today, | |
| 'predicted_kwh': predicted_total, | |
| 'actual_kwh': actual_total | |
| }]) | |
| if os.path.exists("daily_comparison_log.csv"): | |
| existing = pd.read_csv("daily_comparison_log.csv") | |
| existing['date'] = pd.to_datetime(existing['date']).dt.date | |
| if today not in existing['date'].values: | |
| updated = pd.concat([existing, daily_df], ignore_index=True) | |
| updated.to_csv("daily_comparison_log.csv", index=False) | |
| else: | |
| daily_df.to_csv("daily_comparison_log.csv", index=False) | |
| update_daily_comparison_log(df_features, df) | |
| # === 1. Log timestamp-level data for today === | |
| log_dir = "logs" | |
| os.makedirs(log_dir, exist_ok=True) | |
| today_str = pd.Timestamp.now(tz=timezone("Europe/London")).strftime("%Y-%m-%d") | |
| log_path = os.path.join(log_dir, f"{today_str}.csv") | |
| df_eval = df_features.merge(df[['timestamp', 'power_consumption_kwh']], on='timestamp', how='left') | |
| df_eval.rename(columns={'power_consumption_kwh': 'actual'}, inplace=True) | |
| df_log = df_eval[['timestamp', 'actual', 'predicted_kwh']].copy() | |
| df_log['date'] = df_log['timestamp'].dt.date | |
| df_log['hour'] = df_log['timestamp'].dt.strftime("%H:%M") | |
| df_log.to_csv(log_path, index=False) | |
| # === 2. Read all logs and concatenate === | |
| log_files = sorted(glob.glob(f"{log_dir}/*.csv")) | |
| full_df = pd.concat([pd.read_csv(f) for f in log_files], ignore_index=True) | |
| # === 3. Clean up and format === | |
| full_df['timestamp'] = pd.to_datetime(full_df['timestamp']) | |
| full_df['hour'] = pd.to_datetime(full_df['hour'], format="%H:%M").dt.time | |
| full_df['date'] = pd.to_datetime(full_df['date']) | |
| # === 4. Plot all days together === | |
| fig = go.Figure() | |
| for date in full_df['date'].dt.date.unique(): | |
| sub = full_df[full_df['date'].dt.date == date] | |
| fig.add_trace(go.Scatter( | |
| x=sub['timestamp'], y=sub['actual'], | |
| mode='lines', | |
| name=f"{date} - Actual", | |
| line=dict(color='#008080', width=2) | |
| )) | |
| fig.add_trace(go.Scatter( | |
| x=sub['timestamp'], y=sub['predicted_kwh'], | |
| mode='lines', | |
| name=f"{date} - Predicted", | |
| line=dict(color='#8B0000', width=2) | |
| )) | |
| fig.update_layout( | |
| title="📈 Hourly Trend: Actual vs Predicted Energy Consumption", | |
| xaxis_title="Timestamp", | |
| yaxis_title="kWh", | |
| legend_title="Legend", | |
| xaxis=dict(tickformat="%H:%M", showgrid=True), | |
| template="plotly_white" | |
| ) | |
| st.plotly_chart(fig, use_container_width=True) | |
| missing_info = df[df[['region', 'property_type']].isnull().any(axis=1)] | |
| if not missing_info.empty: | |
| st.warning(f"{len(missing_info)} rows had missing region or property_type and were excluded from prediction.") | |
| # === EV & Solar Insight === | |
| with tabs[3]: | |
| st.subheader("EV & Solar Insights") | |
| ev_df = df[df['ev_owner'] == 1] | |
| solar_df = df[df['solar_installed'] == 1] | |
| ev_avg = ev_df['power_consumption_kwh'].mean() | |
| solar_avg = solar_df['power_consumption_kwh'].mean() | |
| ev_peak_hour = ev_df['timestamp'].dt.hour.value_counts().idxmax() if not ev_df.empty else "N/A" | |
| solar_peak_hour = solar_df['timestamp'].dt.hour.value_counts().idxmax() if not solar_df.empty else "N/A" | |
| col1, col2 = st.columns(2) | |
| with col1: | |
| st.metric("EV Household Avg Usage (kWh)", f"{ev_avg:.2f}") | |
| st.markdown(f"🚗 Peak Usage Hour: {ev_peak_hour}:00") | |
| if ev_avg > df['power_consumption_kwh'].mean(): | |
| st.warning("⚠ Electric Vehicle consume above average energy!") | |
| with col2: | |
| st.metric("Solar Household Avg Usage (kWh)", f"{solar_avg:.2f}") | |
| st.markdown(f"🌞 Peak Usage Hour: {solar_peak_hour}:00") | |
| if solar_avg > df['power_consumption_kwh'].mean(): | |
| st.warning("⚠ Solar homes also show high consumption.") | |
| st.markdown("### 🔍 Distribution Analysis") | |
| col3, col4 = st.columns(2) | |
| with col3: | |
| fig_ev = px.box(ev_df, y='power_consumption_kwh', points="outliers", | |
| title="EV Owners: Power Usage Box Plot") | |
| st.plotly_chart(fig_ev, use_container_width=True) | |
| with col4: | |
| fig_solar = px.box(solar_df, y='power_consumption_kwh', points="outliers", | |
| title="Solar Homes: Power Usage Box Plot") | |
| st.plotly_chart(fig_solar, use_container_width=True) | |
| st.markdown("### 📈 Trend Over Time") | |
| # Filter only rows where either EV or Solar is installed | |
| df_combo = df[df['ev_owner'] + df['solar_installed'] > 0].copy() | |
| # Create label per row | |
| def get_label(row): | |
| if row['ev_owner'] and row['solar_installed']: | |
| return "EV + Solar" | |
| elif row['ev_owner']: | |
| return "EV Only" | |
| elif row['solar_installed']: | |
| return "Solar Only" | |
| return "Other" | |
| df_combo['label'] = df_combo.apply(get_label, axis=1) | |
| # Plot | |
| fig_trend = px.line( | |
| df_combo, | |
| x='timestamp', | |
| y='power_consumption_kwh', | |
| color='label', | |
| title="Energy Usage Trend: EV & Solar Homes" | |
| ) | |
| st.plotly_chart(fig_trend, use_container_width=True) | |