Spaces:

smksean
/

gridflux

Sleeping

File size: 24,112 Bytes

import os
import time
import pandas as pd
import numpy as np
import joblib
import requests
import streamlit as st
from streamlit_autorefresh import st_autorefresh
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from sklearn.metrics import mean_squared_error, mean_absolute_error
import warnings
warnings.filterwarnings('ignore')

# Page configuration
st.set_page_config(
    page_title="Gridflux Smart Meter Dashboard",
    page_icon="⚡",
    layout="wide",
    initial_sidebar_state="expanded"
)

# Auto-refresh every 2 seconds
st_autorefresh(interval=2000, key="refresh")

# Load model
@st.cache_resource
def load_model():
    return joblib.load("rf_model.pkl")

model = load_model()

# Supabase config
SUPABASE_URL = os.environ["SUPABASE_URL"]
SUPABASE_KEY = os.environ["SUPABASE_KEY"]
TABLE = "smart_meter_readings_1year"

# Initialize session state
if "row_index" not in st.session_state:
    st.session_state.row_index = 0
if "history" not in st.session_state:
    st.session_state.history = pd.DataFrame()
if "performance_metrics" not in st.session_state:
    st.session_state.performance_metrics = pd.DataFrame()
if "evaluation_count" not in st.session_state:
    st.session_state.evaluation_count = 0
if "temp_predictions" not in st.session_state:
    st.session_state.temp_predictions = []
if "temp_actuals" not in st.session_state:
    st.session_state.temp_actuals = []

# Fetch all data
@st.cache_data
def fetch_all_data():
    url = f"{SUPABASE_URL}/rest/v1/{TABLE}?select=*&order=timestamp.asc"
    headers = {
        "apikey": SUPABASE_KEY,
        "Authorization": f"Bearer {SUPABASE_KEY}"
    }
    r = requests.get(url, headers=headers)
    if r.ok:
        return pd.DataFrame(r.json())
    else:
        st.error(f"❌ Error fetching data: {r.status_code}")
        return pd.DataFrame()

df_all = fetch_all_data()

# Feature engineering
def engineer(df):
    if df.empty:
        return df
    
    df = df.copy()
    
    # Handle timestamp
    if pd.api.types.is_numeric_dtype(df["timestamp"]):
        df["datetime"] = pd.to_datetime(df["timestamp"], unit="s")
    else:
        df["datetime"] = pd.to_datetime(df["timestamp"])
    
    df["hour_of_day"] = df["datetime"].dt.hour
    df["lag_30min"] = df["power_consumption_kwh"].shift(1)
    df["lag_1h"] = df["power_consumption_kwh"].shift(2)
    df['rolling_avg_1h'] = df['power_consumption_kwh'].rolling(2).mean().shift(1)
    df['rolling_avg_2h'] = df['power_consumption_kwh'].rolling(4).mean().shift(1)
    df["is_weekend"] = df["datetime"].dt.weekday >= 5
    df["hour_sin"] = np.sin(2 * np.pi * df["hour_of_day"] / 24)
    df["hour_cos"] = np.cos(2 * np.pi * df["hour_of_day"] / 24)

    # One-hot encode property_type and region
    df = pd.get_dummies(df, columns=["property_type", "region"], drop_first=False)

    # Ensure all expected features exist
    expected_features = [
        'lag_30min', 'lag_1h', 'rolling_avg_1h', 'rolling_avg_2h',
        'hour_of_day', 'is_weekend', 'hour_sin', 'hour_cos',
        'temperature_c', 'ev_owner', 'solar_installed',
        'property_type_commercial', 'property_type_residential',
        'region_north', 'region_south', 'region_east', 'region_west'
    ]

    for col in expected_features:
        if col not in df.columns:
            df[col] = 0

    return df

# Multi-step forecasting function
def forecast_future(df_feat, model, steps=4):
    """Forecast multiple steps into the future using lag features"""
    if df_feat.empty:
        return []
    
    forecasts = []
    current_data = df_feat.iloc[-1:].copy()
    
    for step in range(steps):
        features = current_data[[
            'lag_30min', 'lag_1h', 'rolling_avg_1h', 'rolling_avg_2h',
            'hour_of_day', 'is_weekend', 'hour_sin', 'hour_cos',
            'temperature_c', 'ev_owner', 'solar_installed',
            'property_type_commercial', 'property_type_residential',
            'region_north', 'region_south', 'region_east', 'region_west'
        ]]
        
        prediction = model.predict(features)[0]
        forecasts.append(prediction)
        
        # Update features for next step
        current_data = current_data.copy()
        current_data['lag_1h'] = current_data['lag_30min'].values[0]
        current_data['lag_30min'] = prediction
        current_data['rolling_avg_1h'] = (current_data['lag_30min'].values[0] + current_data['lag_1h'].values[0]) / 2
        current_data['rolling_avg_2h'] = prediction
        
        # Update time-based features
        current_hour = current_data['hour_of_day'].values[0]
        next_hour = (current_hour + 1) % 24
        current_data['hour_of_day'] = next_hour
        current_data['hour_sin'] = np.sin(2 * np.pi * next_hour / 24)
        current_data['hour_cos'] = np.cos(2 * np.pi * next_hour / 24)
    
    return forecasts

# Performance evaluation with batch processing
def update_performance_metrics(actual, predicted):
    """Update performance metrics every 10 evaluations"""
    st.session_state.temp_actuals.append(actual)
    st.session_state.temp_predictions.append(predicted)
    st.session_state.evaluation_count += 1
    
    # Calculate metrics every 10 evaluations
    if st.session_state.evaluation_count % 10 == 0:
        if len(st.session_state.temp_actuals) >= 10:
            rmse = np.sqrt(mean_squared_error(st.session_state.temp_actuals, st.session_state.temp_predictions))
            mae = mean_absolute_error(st.session_state.temp_actuals, st.session_state.temp_predictions)
            
            # Store metrics
            new_metric = pd.DataFrame({
                'timestamp': [pd.Timestamp.now()],
                'rmse': [rmse],
                'mae': [mae],
                'batch_size': [len(st.session_state.temp_actuals)]
            })
            
            st.session_state.performance_metrics = pd.concat([
                st.session_state.performance_metrics, new_metric
            ], ignore_index=True)
            
            # Clear temporary storage
            st.session_state.temp_actuals = []
            st.session_state.temp_predictions = []
            
            return rmse, mae
    
    return None, None

# Get next row
def get_next_row():
    if st.session_state.row_index < len(df_all):
        row = df_all.iloc[[st.session_state.row_index]]
        st.session_state.row_index += 1
        return row
    return pd.DataFrame()

# UI Layout
st.title("⚡ Gridflux: Smart Meter Forecasting Dashboard")
st.markdown("*Real-time power consumption forecasting and monitoring system*")

# Sidebar
st.sidebar.header("📊 System Status")
st.sidebar.metric("Records Processed", st.session_state.row_index)
st.sidebar.metric("Evaluations", st.session_state.evaluation_count)
st.sidebar.metric("Performance Batches", len(st.session_state.performance_metrics))

# Main processing
new_row = get_next_row()

if not new_row.empty:
    st.session_state.history = pd.concat([st.session_state.history, new_row], ignore_index=True)
    
    # Create tabs
    tab1, tab2, tab3 = st.tabs(["🔮 Regional Forecasting", "📈 Performance Monitor", "🔄 Usage Patterns"])
    
    with tab1:
        st.header("Multi-Step Forecasting by Region & Property Type")
        st.markdown("*Forecasting 2 hours ahead (30min intervals) for each region and property type combination*")
        
        regions = ['north', 'south', 'east', 'west']
        property_types = ['residential', 'commercial']
        
        # Create forecast grid
        for region in regions:
            st.subheader(f"🌍 {region.upper()} Region")
            
            region_data = st.session_state.history[st.session_state.history['region'] == region]
            
            if not region_data.empty:
                col1, col2 = st.columns(2)
                
                for idx, prop_type in enumerate(property_types):
                    subset = region_data[region_data['property_type'] == prop_type]
                    
                    if not subset.empty and len(subset) > 2:
                        df_feat = engineer(subset).dropna()
                        
                        if not df_feat.empty:
                            # Get forecasts
                            forecasts = forecast_future(df_feat, model, steps=4)
                            
                            # Display in appropriate column
                            with col1 if idx == 0 else col2:
                                st.markdown(f"**🏠 {prop_type.capitalize()} Properties**")
                                
                                if forecasts:
                                    # Create forecast metrics in a nice layout
                                    forecast_col1, forecast_col2 = st.columns(2)
                                    
                                    with forecast_col1:
                                        st.metric("30min Ahead", f"{forecasts[0]:.3f} kWh", 
                                                delta=f"{forecasts[0] - df_feat['power_consumption_kwh'].iloc[-1]:.3f}")
                                        st.metric("1.5h Ahead", f"{forecasts[2]:.3f} kWh")
                                    
                                    with forecast_col2:
                                        st.metric("1h Ahead", f"{forecasts[1]:.3f} kWh")
                                        st.metric("2h Ahead", f"{forecasts[3]:.3f} kWh")
                                    
                                    # Create mini forecast chart
                                    chart_data = subset.copy()
                                    chart_data["datetime"] = pd.to_datetime(chart_data["timestamp"])
                                    
                                    # Get last few points for context
                                    recent_data = chart_data.tail(10)
                                    
                                    fig = go.Figure()
                                    
                                    # Historical data
                                    fig.add_trace(go.Scatter(
                                        x=recent_data["datetime"],
                                        y=recent_data["power_consumption_kwh"],
                                        mode='lines+markers',
                                        name='Historical',
                                        line=dict(color='blue', width=2)
                                    ))
                                    
                                    # Forecast data
                                    last_time = recent_data["datetime"].iloc[-1]
                                    future_times = pd.date_range(
                                        start=last_time + pd.Timedelta(minutes=30), 
                                        periods=4, freq='30min'
                                    )
                                    
                                    fig.add_trace(go.Scatter(
                                        x=future_times,
                                        y=forecasts,
                                        mode='lines+markers',
                                        name='Forecast',
                                        line=dict(color='red', dash='dash', width=2)
                                    ))
                                    
                                    fig.update_layout(
                                        title=f"{region.title()} {prop_type.title()} - Forecast",
                                        xaxis_title="Time",
                                        yaxis_title="Power (kWh)",
                                        height=300,
                                        showlegend=True
                                    )
                                    
                                    st.plotly_chart(fig, use_container_width=True)
                                    
                                    # Update performance metrics
                                    if len(df_feat) > 1:
                                        actual = df_feat['power_consumption_kwh'].iloc[-1]
                                        predicted = forecasts[0]  # Use 30min forecast
                                        update_performance_metrics(actual, predicted)
                                
                                else:
                                    st.info("Insufficient data for forecasting")
                    else:
                        with col1 if idx == 0 else col2:
                            st.markdown(f"**🏠 {prop_type.capitalize()} Properties**")
                            st.info("No data available")
            else:
                st.info(f"No data available for {region.upper()} region")
            
            st.divider()
    
    with tab2:
        st.header("Real-Time Model Performance")
        st.markdown("*Performance metrics calculated every 10 evaluations to ensure statistical significance*")
        
        # Current batch status
        batch_progress = st.session_state.evaluation_count % 10
        st.progress(batch_progress / 10, text=f"Current batch: {batch_progress}/10 evaluations")
        
        if len(st.session_state.performance_metrics) > 0:
            # Latest metrics
            latest_metrics = st.session_state.performance_metrics.iloc[-1]
            
            col1, col2, col3, col4 = st.columns(4)
            
            with col1:
                st.metric("Latest RMSE", f"{latest_metrics['rmse']:.4f}")
            with col2:
                st.metric("Latest MAE", f"{latest_metrics['mae']:.4f}")
            with col3:
                st.metric("Batch Size", f"{int(latest_metrics['batch_size'])}")
            with col4:
                st.metric("Total Batches", len(st.session_state.performance_metrics))
            
            # Performance trends
            st.subheader("📊 Performance Trends Over Time")
            
            if len(st.session_state.performance_metrics) > 1:
                fig = make_subplots(
                    rows=2, cols=1,
                    subplot_titles=('Root Mean Square Error (RMSE)', 'Mean Absolute Error (MAE)'),
                    shared_xaxes=True,
                    vertical_spacing=0.1
                )
                
                # RMSE plot
                fig.add_trace(
                    go.Scatter(
                        x=st.session_state.performance_metrics['timestamp'],
                        y=st.session_state.performance_metrics['rmse'],
                        mode='lines+markers',
                        name='RMSE',
                        line=dict(color='#ff6b6b', width=3),
                        marker=dict(size=8)
                    ),
                    row=1, col=1
                )
                
                # MAE plot
                fig.add_trace(
                    go.Scatter(
                        x=st.session_state.performance_metrics['timestamp'],
                        y=st.session_state.performance_metrics['mae'],
                        mode='lines+markers',
                        name='MAE',
                        line=dict(color='#4ecdc4', width=3),
                        marker=dict(size=8)
                    ),
                    row=2, col=1
                )
                
                fig.update_layout(
                    height=500,
                    title_text="Model Performance Monitoring",
                    showlegend=False
                )
                
                fig.update_xaxes(title_text="Time", row=2, col=1)
                fig.update_yaxes(title_text="RMSE", row=1, col=1)
                fig.update_yaxes(title_text="MAE", row=2, col=1)
                
                st.plotly_chart(fig, use_container_width=True)
                
                # Performance summary
                st.subheader("📈 Performance Summary")
                
                col1, col2 = st.columns(2)
                
                with col1:
                    st.markdown("**RMSE Statistics**")
                    st.metric("Average", f"{st.session_state.performance_metrics['rmse'].mean():.4f}")
                    st.metric("Best (Lowest)", f"{st.session_state.performance_metrics['rmse'].min():.4f}")
                    st.metric("Std Deviation", f"{st.session_state.performance_metrics['rmse'].std():.4f}")
                
                with col2:
                    st.markdown("**MAE Statistics**")
                    st.metric("Average", f"{st.session_state.performance_metrics['mae'].mean():.4f}")
                    st.metric("Best (Lowest)", f"{st.session_state.performance_metrics['mae'].min():.4f}")
                    st.metric("Std Deviation", f"{st.session_state.performance_metrics['mae'].std():.4f}")
        else:
            st.info("🔄 Collecting data... Performance metrics will appear after 10 evaluations")
    
    with tab3:
        st.header("Power Usage Patterns & Cycles")
        st.markdown("*Understanding power consumption patterns across different regions and time periods*")
        
        if len(st.session_state.history) > 0:
            # Prepare data
            cycle_data = st.session_state.history.copy()
            cycle_data["datetime"] = pd.to_datetime(cycle_data["timestamp"])
            cycle_data["hour"] = cycle_data["datetime"].dt.hour
            cycle_data["day_of_week"] = cycle_data["datetime"].dt.day_name()
            cycle_data["is_weekend"] = cycle_data["datetime"].dt.weekday >= 5
            
            # Hourly patterns by region
            st.subheader("⏰ 24-Hour Usage Patterns by Region")
            
            hourly_usage = cycle_data.groupby(['region', 'hour'])['power_consumption_kwh'].mean().reset_index()
            
            fig = px.line(
                hourly_usage,
                x='hour',
                y='power_consumption_kwh',
                color='region',
                title='Average Power Consumption Throughout the Day',
                labels={
                    'hour': 'Hour of Day (24-hour format)',
                    'power_consumption_kwh': 'Average Power Consumption (kWh)',
                    'region': 'Region'
                }
            )
            
            fig.update_layout(
                xaxis=dict(tickmode='linear', tick0=0, dtick=2),
                hovermode='x unified',
                height=400
            )
            
            # Add annotations for typical usage periods
            fig.add_vrect(x0=6, x1=9, fillcolor="yellow", opacity=0.2, annotation_text="Morning Peak")
            fig.add_vrect(x0=17, x1=21, fillcolor="orange", opacity=0.2, annotation_text="Evening Peak")
            fig.add_vrect(x0=22, x1=6, fillcolor="blue", opacity=0.1, annotation_text="Night/Low Usage")
            
            st.plotly_chart(fig, use_container_width=True)
            
            # Usage insights
            st.subheader("🔍 Usage Insights")
            
            col1, col2 = st.columns(2)
            
            with col1:
                st.markdown("**📊 Regional Summary**")
                
                regional_stats = cycle_data.groupby('region')['power_consumption_kwh'].agg([
                    'mean', 'std', 'min', 'max', 'count'
                ]).round(3)
                
                regional_stats.columns = ['Avg (kWh)', 'Std Dev', 'Min (kWh)', 'Max (kWh)', 'Data Points']
                st.dataframe(regional_stats, use_container_width=True)
            
            with col2:
                st.markdown("**⏰ Peak Usage Times**")
                
                # Find peak hours for each region
                peak_hours = hourly_usage.loc[hourly_usage.groupby('region')['power_consumption_kwh'].idxmax()]
                peak_display = peak_hours[['region', 'hour', 'power_consumption_kwh']].copy()
                peak_display.columns = ['Region', 'Peak Hour', 'Peak Usage (kWh)']
                peak_display['Peak Hour'] = peak_display['Peak Hour'].apply(lambda x: f"{x:02d}:00")
                peak_display['Peak Usage (kWh)'] = peak_display['Peak Usage (kWh)'].round(3)
                
                st.dataframe(peak_display.set_index('Region'), use_container_width=True)
            
            # Weekend vs Weekday comparison
            st.subheader("📅 Weekend vs Weekday Usage")
            
            weekend_comparison = cycle_data.groupby(['region', 'is_weekend'])['power_consumption_kwh'].mean().reset_index()
            weekend_comparison['period'] = weekend_comparison['is_weekend'].map({True: 'Weekend', False: 'Weekday'})
            
            fig_weekend = px.bar(
                weekend_comparison,
                x='region',
                y='power_consumption_kwh',
                color='period',
                title='Average Power Consumption: Weekday vs Weekend',
                labels={
                    'region': 'Region',
                    'power_consumption_kwh': 'Average Power Consumption (kWh)'
                },
                barmode='group'
            )
            
            fig_weekend.update_layout(height=400)
            st.plotly_chart(fig_weekend, use_container_width=True)
            
            # Property type patterns
            if 'property_type' in cycle_data.columns:
                st.subheader("🏠 Property Type Usage Patterns")
                
                prop_patterns = cycle_data.groupby(['property_type', 'hour'])['power_consumption_kwh'].mean().reset_index()
                
                fig_prop = px.line(
                    prop_patterns,
                    x='hour',
                    y='power_consumption_kwh',
                    color='property_type',
                    title='Usage Patterns by Property Type',
                    labels={
                        'hour': 'Hour of Day',
                        'power_consumption_kwh': 'Average Power Consumption (kWh)',
                        'property_type': 'Property Type'
                    }
                )
                
                fig_prop.update_layout(
                    xaxis=dict(tickmode='linear', tick0=0, dtick=2),
                    height=400
                )
                
                st.plotly_chart(fig_prop, use_container_width=True)
        else:
            st.info("📊 Collecting usage data... Patterns will appear as data accumulates")

else:
    st.success("✅ All data processed successfully!")
    
    # Final summary
    if len(st.session_state.history) > 0:
        st.balloons()
        
        st.header("📋 Processing Summary")
        
        col1, col2, col3, col4 = st.columns(4)
        
        with col1:
            st.metric("Total Records", len(st.session_state.history))
        with col2:
            st.metric("Regions Covered", st.session_state.history['region'].nunique())
        with col3:
            st.metric("Property Types", st.session_state.history['property_type'].nunique())
        with col4:
            st.metric("Performance Evaluations", st.session_state.evaluation_count)

# Enhanced debug sidebar
with st.sidebar:
    st.divider()
    
    if st.checkbox("🔧 Show Debug Details"):
        st.write("**Data Status:**")
        st.write(f"- History shape: {st.session_state.history.shape}")
        st.write(f"- Temp predictions: {len(st.session_state.temp_predictions)}")
        st.write(f"- Temp actuals: {len(st.session_state.temp_actuals)}")
        
        if not st.session_state.history.empty:
            st.write("**Latest Record:**")
            latest = st.session_state.history.iloc[-1]
            st.json({
                "region": latest.get('region', 'N/A'),
                "property_type": latest.get('property_type', 'N/A'),
                "power_consumption": f"{latest.get('power_consumption_kwh', 0):.3f} kWh",
                "timestamp": str(latest.get('timestamp', 'N/A'))
            })