import os import time import pandas as pd import numpy as np import joblib import requests import streamlit as st from streamlit_autorefresh import st_autorefresh import plotly.express as px import plotly.graph_objects as go from plotly.subplots import make_subplots from sklearn.metrics import mean_squared_error, mean_absolute_error import warnings warnings.filterwarnings('ignore') # Page configuration st.set_page_config( page_title="Gridflux Smart Meter Dashboard", page_icon="⚡", layout="wide", initial_sidebar_state="expanded" ) # Auto-refresh every 2 seconds st_autorefresh(interval=2000, key="refresh") # Load model @st.cache_resource def load_model(): return joblib.load("rf_model.pkl") model = load_model() # Supabase config SUPABASE_URL = os.environ["SUPABASE_URL"] SUPABASE_KEY = os.environ["SUPABASE_KEY"] TABLE = "smart_meter_readings_1year" # Initialize session state if "row_index" not in st.session_state: st.session_state.row_index = 0 if "history" not in st.session_state: st.session_state.history = pd.DataFrame() if "performance_metrics" not in st.session_state: st.session_state.performance_metrics = pd.DataFrame() if "evaluation_count" not in st.session_state: st.session_state.evaluation_count = 0 if "temp_predictions" not in st.session_state: st.session_state.temp_predictions = [] if "temp_actuals" not in st.session_state: st.session_state.temp_actuals = [] # Fetch all data @st.cache_data def fetch_all_data(): url = f"{SUPABASE_URL}/rest/v1/{TABLE}?select=*&order=timestamp.asc" headers = { "apikey": SUPABASE_KEY, "Authorization": f"Bearer {SUPABASE_KEY}" } r = requests.get(url, headers=headers) if r.ok: return pd.DataFrame(r.json()) else: st.error(f"❌ Error fetching data: {r.status_code}") return pd.DataFrame() df_all = fetch_all_data() # Feature engineering def engineer(df): if df.empty: return df df = df.copy() # Handle timestamp if pd.api.types.is_numeric_dtype(df["timestamp"]): df["datetime"] = pd.to_datetime(df["timestamp"], unit="s") else: df["datetime"] = pd.to_datetime(df["timestamp"]) df["hour_of_day"] = df["datetime"].dt.hour df["lag_30min"] = df["power_consumption_kwh"].shift(1) df["lag_1h"] = df["power_consumption_kwh"].shift(2) df['rolling_avg_1h'] = df['power_consumption_kwh'].rolling(2).mean().shift(1) df['rolling_avg_2h'] = df['power_consumption_kwh'].rolling(4).mean().shift(1) df["is_weekend"] = df["datetime"].dt.weekday >= 5 df["hour_sin"] = np.sin(2 * np.pi * df["hour_of_day"] / 24) df["hour_cos"] = np.cos(2 * np.pi * df["hour_of_day"] / 24) # One-hot encode property_type and region df = pd.get_dummies(df, columns=["property_type", "region"], drop_first=False) # Ensure all expected features exist expected_features = [ 'lag_30min', 'lag_1h', 'rolling_avg_1h', 'rolling_avg_2h', 'hour_of_day', 'is_weekend', 'hour_sin', 'hour_cos', 'temperature_c', 'ev_owner', 'solar_installed', 'property_type_commercial', 'property_type_residential', 'region_north', 'region_south', 'region_east', 'region_west' ] for col in expected_features: if col not in df.columns: df[col] = 0 return df # Multi-step forecasting function def forecast_future(df_feat, model, steps=4): """Forecast multiple steps into the future using lag features""" if df_feat.empty: return [] forecasts = [] current_data = df_feat.iloc[-1:].copy() for step in range(steps): features = current_data[[ 'lag_30min', 'lag_1h', 'rolling_avg_1h', 'rolling_avg_2h', 'hour_of_day', 'is_weekend', 'hour_sin', 'hour_cos', 'temperature_c', 'ev_owner', 'solar_installed', 'property_type_commercial', 'property_type_residential', 'region_north', 'region_south', 'region_east', 'region_west' ]] prediction = model.predict(features)[0] forecasts.append(prediction) # Update features for next step current_data = current_data.copy() current_data['lag_1h'] = current_data['lag_30min'].values[0] current_data['lag_30min'] = prediction current_data['rolling_avg_1h'] = (current_data['lag_30min'].values[0] + current_data['lag_1h'].values[0]) / 2 current_data['rolling_avg_2h'] = prediction # Update time-based features current_hour = current_data['hour_of_day'].values[0] next_hour = (current_hour + 1) % 24 current_data['hour_of_day'] = next_hour current_data['hour_sin'] = np.sin(2 * np.pi * next_hour / 24) current_data['hour_cos'] = np.cos(2 * np.pi * next_hour / 24) return forecasts # Performance evaluation with batch processing def update_performance_metrics(actual, predicted): """Update performance metrics every 10 evaluations""" st.session_state.temp_actuals.append(actual) st.session_state.temp_predictions.append(predicted) st.session_state.evaluation_count += 1 # Calculate metrics every 10 evaluations if st.session_state.evaluation_count % 10 == 0: if len(st.session_state.temp_actuals) >= 10: rmse = np.sqrt(mean_squared_error(st.session_state.temp_actuals, st.session_state.temp_predictions)) mae = mean_absolute_error(st.session_state.temp_actuals, st.session_state.temp_predictions) # Store metrics new_metric = pd.DataFrame({ 'timestamp': [pd.Timestamp.now()], 'rmse': [rmse], 'mae': [mae], 'batch_size': [len(st.session_state.temp_actuals)] }) st.session_state.performance_metrics = pd.concat([ st.session_state.performance_metrics, new_metric ], ignore_index=True) # Clear temporary storage st.session_state.temp_actuals = [] st.session_state.temp_predictions = [] return rmse, mae return None, None # Get next row def get_next_row(): if st.session_state.row_index < len(df_all): row = df_all.iloc[[st.session_state.row_index]] st.session_state.row_index += 1 return row return pd.DataFrame() # UI Layout st.title("⚡ Gridflux: Smart Meter Forecasting Dashboard") st.markdown("*Real-time power consumption forecasting and monitoring system*") # Sidebar st.sidebar.header("📊 System Status") st.sidebar.metric("Records Processed", st.session_state.row_index) st.sidebar.metric("Evaluations", st.session_state.evaluation_count) st.sidebar.metric("Performance Batches", len(st.session_state.performance_metrics)) # Main processing new_row = get_next_row() if not new_row.empty: st.session_state.history = pd.concat([st.session_state.history, new_row], ignore_index=True) # Create tabs tab1, tab2, tab3 = st.tabs(["🔮 Regional Forecasting", "📈 Performance Monitor", "🔄 Usage Patterns"]) with tab1: st.header("Multi-Step Forecasting by Region & Property Type") st.markdown("*Forecasting 2 hours ahead (30min intervals) for each region and property type combination*") regions = ['north', 'south', 'east', 'west'] property_types = ['residential', 'commercial'] # Create forecast grid for region in regions: st.subheader(f"🌍 {region.upper()} Region") region_data = st.session_state.history[st.session_state.history['region'] == region] if not region_data.empty: col1, col2 = st.columns(2) for idx, prop_type in enumerate(property_types): subset = region_data[region_data['property_type'] == prop_type] if not subset.empty and len(subset) > 2: df_feat = engineer(subset).dropna() if not df_feat.empty: # Get forecasts forecasts = forecast_future(df_feat, model, steps=4) # Display in appropriate column with col1 if idx == 0 else col2: st.markdown(f"**🏠 {prop_type.capitalize()} Properties**") if forecasts: # Create forecast metrics in a nice layout forecast_col1, forecast_col2 = st.columns(2) with forecast_col1: st.metric("30min Ahead", f"{forecasts[0]:.3f} kWh", delta=f"{forecasts[0] - df_feat['power_consumption_kwh'].iloc[-1]:.3f}") st.metric("1.5h Ahead", f"{forecasts[2]:.3f} kWh") with forecast_col2: st.metric("1h Ahead", f"{forecasts[1]:.3f} kWh") st.metric("2h Ahead", f"{forecasts[3]:.3f} kWh") # Create mini forecast chart chart_data = subset.copy() chart_data["datetime"] = pd.to_datetime(chart_data["timestamp"]) # Get last few points for context recent_data = chart_data.tail(10) fig = go.Figure() # Historical data fig.add_trace(go.Scatter( x=recent_data["datetime"], y=recent_data["power_consumption_kwh"], mode='lines+markers', name='Historical', line=dict(color='blue', width=2) )) # Forecast data last_time = recent_data["datetime"].iloc[-1] future_times = pd.date_range( start=last_time + pd.Timedelta(minutes=30), periods=4, freq='30min' ) fig.add_trace(go.Scatter( x=future_times, y=forecasts, mode='lines+markers', name='Forecast', line=dict(color='red', dash='dash', width=2) )) fig.update_layout( title=f"{region.title()} {prop_type.title()} - Forecast", xaxis_title="Time", yaxis_title="Power (kWh)", height=300, showlegend=True ) st.plotly_chart(fig, use_container_width=True) # Update performance metrics if len(df_feat) > 1: actual = df_feat['power_consumption_kwh'].iloc[-1] predicted = forecasts[0] # Use 30min forecast update_performance_metrics(actual, predicted) else: st.info("Insufficient data for forecasting") else: with col1 if idx == 0 else col2: st.markdown(f"**🏠 {prop_type.capitalize()} Properties**") st.info("No data available") else: st.info(f"No data available for {region.upper()} region") st.divider() with tab2: st.header("Real-Time Model Performance") st.markdown("*Performance metrics calculated every 10 evaluations to ensure statistical significance*") # Current batch status batch_progress = st.session_state.evaluation_count % 10 st.progress(batch_progress / 10, text=f"Current batch: {batch_progress}/10 evaluations") if len(st.session_state.performance_metrics) > 0: # Latest metrics latest_metrics = st.session_state.performance_metrics.iloc[-1] col1, col2, col3, col4 = st.columns(4) with col1: st.metric("Latest RMSE", f"{latest_metrics['rmse']:.4f}") with col2: st.metric("Latest MAE", f"{latest_metrics['mae']:.4f}") with col3: st.metric("Batch Size", f"{int(latest_metrics['batch_size'])}") with col4: st.metric("Total Batches", len(st.session_state.performance_metrics)) # Performance trends st.subheader("📊 Performance Trends Over Time") if len(st.session_state.performance_metrics) > 1: fig = make_subplots( rows=2, cols=1, subplot_titles=('Root Mean Square Error (RMSE)', 'Mean Absolute Error (MAE)'), shared_xaxes=True, vertical_spacing=0.1 ) # RMSE plot fig.add_trace( go.Scatter( x=st.session_state.performance_metrics['timestamp'], y=st.session_state.performance_metrics['rmse'], mode='lines+markers', name='RMSE', line=dict(color='#ff6b6b', width=3), marker=dict(size=8) ), row=1, col=1 ) # MAE plot fig.add_trace( go.Scatter( x=st.session_state.performance_metrics['timestamp'], y=st.session_state.performance_metrics['mae'], mode='lines+markers', name='MAE', line=dict(color='#4ecdc4', width=3), marker=dict(size=8) ), row=2, col=1 ) fig.update_layout( height=500, title_text="Model Performance Monitoring", showlegend=False ) fig.update_xaxes(title_text="Time", row=2, col=1) fig.update_yaxes(title_text="RMSE", row=1, col=1) fig.update_yaxes(title_text="MAE", row=2, col=1) st.plotly_chart(fig, use_container_width=True) # Performance summary st.subheader("📈 Performance Summary") col1, col2 = st.columns(2) with col1: st.markdown("**RMSE Statistics**") st.metric("Average", f"{st.session_state.performance_metrics['rmse'].mean():.4f}") st.metric("Best (Lowest)", f"{st.session_state.performance_metrics['rmse'].min():.4f}") st.metric("Std Deviation", f"{st.session_state.performance_metrics['rmse'].std():.4f}") with col2: st.markdown("**MAE Statistics**") st.metric("Average", f"{st.session_state.performance_metrics['mae'].mean():.4f}") st.metric("Best (Lowest)", f"{st.session_state.performance_metrics['mae'].min():.4f}") st.metric("Std Deviation", f"{st.session_state.performance_metrics['mae'].std():.4f}") else: st.info("🔄 Collecting data... Performance metrics will appear after 10 evaluations") with tab3: st.header("Power Usage Patterns & Cycles") st.markdown("*Understanding power consumption patterns across different regions and time periods*") if len(st.session_state.history) > 0: # Prepare data cycle_data = st.session_state.history.copy() cycle_data["datetime"] = pd.to_datetime(cycle_data["timestamp"]) cycle_data["hour"] = cycle_data["datetime"].dt.hour cycle_data["day_of_week"] = cycle_data["datetime"].dt.day_name() cycle_data["is_weekend"] = cycle_data["datetime"].dt.weekday >= 5 # Hourly patterns by region st.subheader("⏰ 24-Hour Usage Patterns by Region") hourly_usage = cycle_data.groupby(['region', 'hour'])['power_consumption_kwh'].mean().reset_index() fig = px.line( hourly_usage, x='hour', y='power_consumption_kwh', color='region', title='Average Power Consumption Throughout the Day', labels={ 'hour': 'Hour of Day (24-hour format)', 'power_consumption_kwh': 'Average Power Consumption (kWh)', 'region': 'Region' } ) fig.update_layout( xaxis=dict(tickmode='linear', tick0=0, dtick=2), hovermode='x unified', height=400 ) # Add annotations for typical usage periods fig.add_vrect(x0=6, x1=9, fillcolor="yellow", opacity=0.2, annotation_text="Morning Peak") fig.add_vrect(x0=17, x1=21, fillcolor="orange", opacity=0.2, annotation_text="Evening Peak") fig.add_vrect(x0=22, x1=6, fillcolor="blue", opacity=0.1, annotation_text="Night/Low Usage") st.plotly_chart(fig, use_container_width=True) # Usage insights st.subheader("🔍 Usage Insights") col1, col2 = st.columns(2) with col1: st.markdown("**📊 Regional Summary**") regional_stats = cycle_data.groupby('region')['power_consumption_kwh'].agg([ 'mean', 'std', 'min', 'max', 'count' ]).round(3) regional_stats.columns = ['Avg (kWh)', 'Std Dev', 'Min (kWh)', 'Max (kWh)', 'Data Points'] st.dataframe(regional_stats, use_container_width=True) with col2: st.markdown("**⏰ Peak Usage Times**") # Find peak hours for each region peak_hours = hourly_usage.loc[hourly_usage.groupby('region')['power_consumption_kwh'].idxmax()] peak_display = peak_hours[['region', 'hour', 'power_consumption_kwh']].copy() peak_display.columns = ['Region', 'Peak Hour', 'Peak Usage (kWh)'] peak_display['Peak Hour'] = peak_display['Peak Hour'].apply(lambda x: f"{x:02d}:00") peak_display['Peak Usage (kWh)'] = peak_display['Peak Usage (kWh)'].round(3) st.dataframe(peak_display.set_index('Region'), use_container_width=True) # Weekend vs Weekday comparison st.subheader("📅 Weekend vs Weekday Usage") weekend_comparison = cycle_data.groupby(['region', 'is_weekend'])['power_consumption_kwh'].mean().reset_index() weekend_comparison['period'] = weekend_comparison['is_weekend'].map({True: 'Weekend', False: 'Weekday'}) fig_weekend = px.bar( weekend_comparison, x='region', y='power_consumption_kwh', color='period', title='Average Power Consumption: Weekday vs Weekend', labels={ 'region': 'Region', 'power_consumption_kwh': 'Average Power Consumption (kWh)' }, barmode='group' ) fig_weekend.update_layout(height=400) st.plotly_chart(fig_weekend, use_container_width=True) # Property type patterns if 'property_type' in cycle_data.columns: st.subheader("🏠 Property Type Usage Patterns") prop_patterns = cycle_data.groupby(['property_type', 'hour'])['power_consumption_kwh'].mean().reset_index() fig_prop = px.line( prop_patterns, x='hour', y='power_consumption_kwh', color='property_type', title='Usage Patterns by Property Type', labels={ 'hour': 'Hour of Day', 'power_consumption_kwh': 'Average Power Consumption (kWh)', 'property_type': 'Property Type' } ) fig_prop.update_layout( xaxis=dict(tickmode='linear', tick0=0, dtick=2), height=400 ) st.plotly_chart(fig_prop, use_container_width=True) else: st.info("📊 Collecting usage data... Patterns will appear as data accumulates") else: st.success("✅ All data processed successfully!") # Final summary if len(st.session_state.history) > 0: st.balloons() st.header("📋 Processing Summary") col1, col2, col3, col4 = st.columns(4) with col1: st.metric("Total Records", len(st.session_state.history)) with col2: st.metric("Regions Covered", st.session_state.history['region'].nunique()) with col3: st.metric("Property Types", st.session_state.history['property_type'].nunique()) with col4: st.metric("Performance Evaluations", st.session_state.evaluation_count) # Enhanced debug sidebar with st.sidebar: st.divider() if st.checkbox("🔧 Show Debug Details"): st.write("**Data Status:**") st.write(f"- History shape: {st.session_state.history.shape}") st.write(f"- Temp predictions: {len(st.session_state.temp_predictions)}") st.write(f"- Temp actuals: {len(st.session_state.temp_actuals)}") if not st.session_state.history.empty: st.write("**Latest Record:**") latest = st.session_state.history.iloc[-1] st.json({ "region": latest.get('region', 'N/A'), "property_type": latest.get('property_type', 'N/A'), "power_consumption": f"{latest.get('power_consumption_kwh', 0):.3f} kWh", "timestamp": str(latest.get('timestamp', 'N/A')) })