Spaces:

smksean
/

gridflux

Sleeping

App Files Files Community

smksean commited on Jul 21, 2025

Commit

2e45c28

verified ·

1 Parent(s): 394a27e

Update app.py

Browse files

Files changed (1) hide show

app.py +476 -50

app.py CHANGED Viewed

@@ -6,9 +6,23 @@ import joblib
 import requests
 import streamlit as st
 from streamlit_autorefresh import st_autorefresh
-# Auto-refresh every 5 seconds
-st_autorefresh(interval=5000, key="refresh")
 # Load model
 @st.cache_resource
@@ -27,6 +41,14 @@ if "row_index" not in st.session_state:
     st.session_state.row_index = 0
 if "history" not in st.session_state:
     st.session_state.history = pd.DataFrame()
 # Fetch all data
 @st.cache_data
@@ -45,34 +67,24 @@ def fetch_all_data():
 df_all = fetch_all_data()
-# Debug sidebar
-st.sidebar.title("🛠 Debug Info")
-st.sidebar.write("Row index:", st.session_state.row_index)
-st.sidebar.write("Total rows:", len(df_all))
-if not df_all.empty and st.session_state.row_index < len(df_all):
-    st.sidebar.write("Next row:", df_all.iloc[st.session_state.row_index].to_dict())
-# Get next row
-def get_next_row():
-    if st.session_state.row_index < len(df_all):
-        row = df_all.iloc[[st.session_state.row_index]]
-        st.session_state.row_index += 1
-        return row
-    return pd.DataFrame()
 # Feature engineering
 def engineer(df):
     # Handle timestamp
     if pd.api.types.is_numeric_dtype(df["timestamp"]):
         df["datetime"] = pd.to_datetime(df["timestamp"], unit="s")
     else:
         df["datetime"] = pd.to_datetime(df["timestamp"])
     df["hour_of_day"] = df["datetime"].dt.hour
     df["lag_30min"] = df["power_consumption_kwh"].shift(1)
     df["lag_1h"] = df["power_consumption_kwh"].shift(2)
-    df["rolling_avg_1h"] = df["power_consumption_kwh"].rolling(2).mean()
-    df["rolling_avg_2h"] = df["power_consumption_kwh"].rolling(4).mean()
     df["is_weekend"] = df["datetime"].dt.weekday >= 5
     df["hour_sin"] = np.sin(2 * np.pi * df["hour_of_day"] / 24)
     df["hour_cos"] = np.cos(2 * np.pi * df["hour_of_day"] / 24)
@@ -82,10 +94,8 @@ def engineer(df):
     # Ensure all expected features exist
     expected_features = [
-        'lag_30min', 'lag_1h',
-        'rolling_avg_1h', 'rolling_avg_2h',
-        'hour_of_day', 'is_weekend',
-        'hour_sin', 'hour_cos',
         'temperature_c', 'ev_owner', 'solar_installed',
         'property_type_commercial', 'property_type_residential',
         'region_north', 'region_south', 'region_east', 'region_west'
@@ -97,37 +107,453 @@ def engineer(df):
     return df
-# UI layout
-st.title("⚡ Gridflux: Live Smart-Meter Forecast")
-placeholder_chart = st.empty()
-placeholder_metric = st.empty()
-new_row = get_next_row()
-if not new_row.empty:
-    st.session_state.history = pd.concat([st.session_state.history, new_row], ignore_index=True)
-    df_feat = engineer(st.session_state.history).dropna()
-    if not df_feat.empty:
-        latest_input = df_feat.iloc[[-1]][[
-            'lag_30min', 'lag_1h',
-            'rolling_avg_1h', 'rolling_avg_2h',
-            'hour_of_day', 'is_weekend',
-            'hour_sin', 'hour_cos',
             'temperature_c', 'ev_owner', 'solar_installed',
             'property_type_commercial', 'property_type_residential',
             'region_north', 'region_south', 'region_east', 'region_west'
         ]]
-        prediction = model.predict(latest_input)[0]
-        # Show outputs
-        chart_df = st.session_state.history.copy()
-        chart_df["datetime"] = pd.to_datetime(chart_df["timestamp"])
-        chart_df.set_index("datetime", inplace=True)
-        placeholder_chart.line_chart(chart_df["power_consumption_kwh"])
-        placeholder_metric.metric("🔮 Predicted Power Usage (kWh)", f"{prediction:.3f}")
 else:
-    st.success("✅ All data processed.")

 import requests
 import streamlit as st
 from streamlit_autorefresh import st_autorefresh
+import plotly.express as px
+import plotly.graph_objects as go
+from plotly.subplots import make_subplots
+from sklearn.metrics import mean_squared_error, mean_absolute_error
+import warnings
+warnings.filterwarnings('ignore')
+# Page configuration
+st.set_page_config(
+    page_title="Gridflux Smart Meter Dashboard",
+    page_icon="⚡",
+    layout="wide",
+    initial_sidebar_state="expanded"
+)
+# Auto-refresh every 2 seconds
+st_autorefresh(interval=2000, key="refresh")
 # Load model
 @st.cache_resource
     st.session_state.row_index = 0
 if "history" not in st.session_state:
     st.session_state.history = pd.DataFrame()
+if "performance_metrics" not in st.session_state:
+    st.session_state.performance_metrics = pd.DataFrame()
+if "evaluation_count" not in st.session_state:
+    st.session_state.evaluation_count = 0
+if "temp_predictions" not in st.session_state:
+    st.session_state.temp_predictions = []
+if "temp_actuals" not in st.session_state:
+    st.session_state.temp_actuals = []
 # Fetch all data
 @st.cache_data
 df_all = fetch_all_data()
 # Feature engineering
 def engineer(df):
+    if df.empty:
+        return df
+    df = df.copy()
     # Handle timestamp
     if pd.api.types.is_numeric_dtype(df["timestamp"]):
         df["datetime"] = pd.to_datetime(df["timestamp"], unit="s")
     else:
         df["datetime"] = pd.to_datetime(df["timestamp"])
     df["hour_of_day"] = df["datetime"].dt.hour
     df["lag_30min"] = df["power_consumption_kwh"].shift(1)
     df["lag_1h"] = df["power_consumption_kwh"].shift(2)
+    df['rolling_avg_1h'] = df['power_consumption_kwh'].rolling(2).mean().shift(1)
+    df['rolling_avg_2h'] = df['power_consumption_kwh'].rolling(4).mean().shift(1)
     df["is_weekend"] = df["datetime"].dt.weekday >= 5
     df["hour_sin"] = np.sin(2 * np.pi * df["hour_of_day"] / 24)
     df["hour_cos"] = np.cos(2 * np.pi * df["hour_of_day"] / 24)
     # Ensure all expected features exist
     expected_features = [
+        'lag_30min', 'lag_1h', 'rolling_avg_1h', 'rolling_avg_2h',
+        'hour_of_day', 'is_weekend', 'hour_sin', 'hour_cos',
         'temperature_c', 'ev_owner', 'solar_installed',
         'property_type_commercial', 'property_type_residential',
         'region_north', 'region_south', 'region_east', 'region_west'
     return df
+# Multi-step forecasting function
+def forecast_future(df_feat, model, steps=4):
+    """Forecast multiple steps into the future using lag features"""
+    if df_feat.empty:
+        return []
+    forecasts = []
+    current_data = df_feat.iloc[-1:].copy()
+    for step in range(steps):
+        features = current_data[[
+            'lag_30min', 'lag_1h', 'rolling_avg_1h', 'rolling_avg_2h',
+            'hour_of_day', 'is_weekend', 'hour_sin', 'hour_cos',
             'temperature_c', 'ev_owner', 'solar_installed',
             'property_type_commercial', 'property_type_residential',
             'region_north', 'region_south', 'region_east', 'region_west'
         ]]
+        prediction = model.predict(features)[0]
+        forecasts.append(prediction)
+        # Update features for next step
+        current_data = current_data.copy()
+        current_data['lag_1h'] = current_data['lag_30min'].values[0]
+        current_data['lag_30min'] = prediction
+        current_data['rolling_avg_1h'] = (current_data['lag_30min'].values[0] + current_data['lag_1h'].values[0]) / 2
+        current_data['rolling_avg_2h'] = prediction
+        # Update time-based features
+        current_hour = current_data['hour_of_day'].values[0]
+        next_hour = (current_hour + 1) % 24
+        current_data['hour_of_day'] = next_hour
+        current_data['hour_sin'] = np.sin(2 * np.pi * next_hour / 24)
+        current_data['hour_cos'] = np.cos(2 * np.pi * next_hour / 24)
+    return forecasts
+# Performance evaluation with batch processing
+def update_performance_metrics(actual, predicted):
+    """Update performance metrics every 10 evaluations"""
+    st.session_state.temp_actuals.append(actual)
+    st.session_state.temp_predictions.append(predicted)
+    st.session_state.evaluation_count += 1
+    # Calculate metrics every 10 evaluations
+    if st.session_state.evaluation_count % 10 == 0:
+        if len(st.session_state.temp_actuals) >= 10:
+            rmse = np.sqrt(mean_squared_error(st.session_state.temp_actuals, st.session_state.temp_predictions))
+            mae = mean_absolute_error(st.session_state.temp_actuals, st.session_state.temp_predictions)
+            # Store metrics
+            new_metric = pd.DataFrame({
+                'timestamp': [pd.Timestamp.now()],
+                'rmse': [rmse],
+                'mae': [mae],
+                'batch_size': [len(st.session_state.temp_actuals)]
+            })
+            st.session_state.performance_metrics = pd.concat([
+                st.session_state.performance_metrics, new_metric
+            ], ignore_index=True)
+            # Clear temporary storage
+            st.session_state.temp_actuals = []
+            st.session_state.temp_predictions = []
+            return rmse, mae
+    return None, None
+# Get next row
+def get_next_row():
+    if st.session_state.row_index < len(df_all):
+        row = df_all.iloc[[st.session_state.row_index]]
+        st.session_state.row_index += 1
+        return row
+    return pd.DataFrame()
+# UI Layout
+st.title("⚡ Gridflux: Smart Meter Forecasting Dashboard")
+st.markdown("*Real-time power consumption forecasting and monitoring system*")
+# Sidebar
+st.sidebar.header("📊 System Status")
+st.sidebar.metric("Records Processed", st.session_state.row_index)
+st.sidebar.metric("Evaluations", st.session_state.evaluation_count)
+st.sidebar.metric("Performance Batches", len(st.session_state.performance_metrics))
+# Main processing
+new_row = get_next_row()
+if not new_row.empty:
+    st.session_state.history = pd.concat([st.session_state.history, new_row], ignore_index=True)
+    # Create tabs
+    tab1, tab2, tab3 = st.tabs(["🔮 Regional Forecasting", "📈 Performance Monitor", "🔄 Usage Patterns"])
+    with tab1:
+        st.header("Multi-Step Forecasting by Region & Property Type")
+        st.markdown("*Forecasting 2 hours ahead (30min intervals) for each region and property type combination*")
+        regions = ['north', 'south', 'east', 'west']
+        property_types = ['residential', 'commercial']
+        # Create forecast grid
+        for region in regions:
+            st.subheader(f"🌍 {region.upper()} Region")
+            region_data = st.session_state.history[st.session_state.history['region'] == region]
+            if not region_data.empty:
+                col1, col2 = st.columns(2)
+                for idx, prop_type in enumerate(property_types):
+                    subset = region_data[region_data['property_type'] == prop_type]
+                    if not subset.empty and len(subset) > 2:
+                        df_feat = engineer(subset).dropna()
+                        if not df_feat.empty:
+                            # Get forecasts
+                            forecasts = forecast_future(df_feat, model, steps=4)
+                            # Display in appropriate column
+                            with col1 if idx == 0 else col2:
+                                st.markdown(f"**🏠 {prop_type.capitalize()} Properties**")
+                                if forecasts:
+                                    # Create forecast metrics in a nice layout
+                                    forecast_col1, forecast_col2 = st.columns(2)
+                                    with forecast_col1:
+                                        st.metric("30min Ahead", f"{forecasts[0]:.3f} kWh",
+                                                delta=f"{forecasts[0] - df_feat['power_consumption_kwh'].iloc[-1]:.3f}")
+                                        st.metric("1.5h Ahead", f"{forecasts[2]:.3f} kWh")
+                                    with forecast_col2:
+                                        st.metric("1h Ahead", f"{forecasts[1]:.3f} kWh")
+                                        st.metric("2h Ahead", f"{forecasts[3]:.3f} kWh")
+                                    # Create mini forecast chart
+                                    chart_data = subset.copy()
+                                    chart_data["datetime"] = pd.to_datetime(chart_data["timestamp"])
+                                    # Get last few points for context
+                                    recent_data = chart_data.tail(10)
+                                    fig = go.Figure()
+                                    # Historical data
+                                    fig.add_trace(go.Scatter(
+                                        x=recent_data["datetime"],
+                                        y=recent_data["power_consumption_kwh"],
+                                        mode='lines+markers',
+                                        name='Historical',
+                                        line=dict(color='blue', width=2)
+                                    ))
+                                    # Forecast data
+                                    last_time = recent_data["datetime"].iloc[-1]
+                                    future_times = pd.date_range(
+                                        start=last_time + pd.Timedelta(minutes=30),
+                                        periods=4, freq='30min'
+                                    )
+                                    fig.add_trace(go.Scatter(
+                                        x=future_times,
+                                        y=forecasts,
+                                        mode='lines+markers',
+                                        name='Forecast',
+                                        line=dict(color='red', dash='dash', width=2)
+                                    ))
+                                    fig.update_layout(
+                                        title=f"{region.title()} {prop_type.title()} - Forecast",
+                                        xaxis_title="Time",
+                                        yaxis_title="Power (kWh)",
+                                        height=300,
+                                        showlegend=True
+                                    )
+                                    st.plotly_chart(fig, use_container_width=True)
+                                    # Update performance metrics
+                                    if len(df_feat) > 1:
+                                        actual = df_feat['power_consumption_kwh'].iloc[-1]
+                                        predicted = forecasts[0]  # Use 30min forecast
+                                        update_performance_metrics(actual, predicted)
+                                else:
+                                    st.info("Insufficient data for forecasting")
+                    else:
+                        with col1 if idx == 0 else col2:
+                            st.markdown(f"**🏠 {prop_type.capitalize()} Properties**")
+                            st.info("No data available")
+            else:
+                st.info(f"No data available for {region.upper()} region")
+            st.divider()
+    with tab2:
+        st.header("Real-Time Model Performance")
+        st.markdown("*Performance metrics calculated every 10 evaluations to ensure statistical significance*")
+        # Current batch status
+        batch_progress = st.session_state.evaluation_count % 10
+        st.progress(batch_progress / 10, text=f"Current batch: {batch_progress}/10 evaluations")
+        if len(st.session_state.performance_metrics) > 0:
+            # Latest metrics
+            latest_metrics = st.session_state.performance_metrics.iloc[-1]
+            col1, col2, col3, col4 = st.columns(4)
+            with col1:
+                st.metric("Latest RMSE", f"{latest_metrics['rmse']:.4f}")
+            with col2:
+                st.metric("Latest MAE", f"{latest_metrics['mae']:.4f}")
+            with col3:
+                st.metric("Batch Size", f"{int(latest_metrics['batch_size'])}")
+            with col4:
+                st.metric("Total Batches", len(st.session_state.performance_metrics))
+            # Performance trends
+            st.subheader("📊 Performance Trends Over Time")
+            if len(st.session_state.performance_metrics) > 1:
+                fig = make_subplots(
+                    rows=2, cols=1,
+                    subplot_titles=('Root Mean Square Error (RMSE)', 'Mean Absolute Error (MAE)'),
+                    shared_xaxes=True,
+                    vertical_spacing=0.1
+                )
+                # RMSE plot
+                fig.add_trace(
+                    go.Scatter(
+                        x=st.session_state.performance_metrics['timestamp'],
+                        y=st.session_state.performance_metrics['rmse'],
+                        mode='lines+markers',
+                        name='RMSE',
+                        line=dict(color='#ff6b6b', width=3),
+                        marker=dict(size=8)
+                    ),
+                    row=1, col=1
+                )
+                # MAE plot
+                fig.add_trace(
+                    go.Scatter(
+                        x=st.session_state.performance_metrics['timestamp'],
+                        y=st.session_state.performance_metrics['mae'],
+                        mode='lines+markers',
+                        name='MAE',
+                        line=dict(color='#4ecdc4', width=3),
+                        marker=dict(size=8)
+                    ),
+                    row=2, col=1
+                )
+                fig.update_layout(
+                    height=500,
+                    title_text="Model Performance Monitoring",
+                    showlegend=False
+                )
+                fig.update_xaxes(title_text="Time", row=2, col=1)
+                fig.update_yaxes(title_text="RMSE", row=1, col=1)
+                fig.update_yaxes(title_text="MAE", row=2, col=1)
+                st.plotly_chart(fig, use_container_width=True)
+                # Performance summary
+                st.subheader("📈 Performance Summary")
+                col1, col2 = st.columns(2)
+                with col1:
+                    st.markdown("**RMSE Statistics**")
+                    st.metric("Average", f"{st.session_state.performance_metrics['rmse'].mean():.4f}")
+                    st.metric("Best (Lowest)", f"{st.session_state.performance_metrics['rmse'].min():.4f}")
+                    st.metric("Std Deviation", f"{st.session_state.performance_metrics['rmse'].std():.4f}")
+                with col2:
+                    st.markdown("**MAE Statistics**")
+                    st.metric("Average", f"{st.session_state.performance_metrics['mae'].mean():.4f}")
+                    st.metric("Best (Lowest)", f"{st.session_state.performance_metrics['mae'].min():.4f}")
+                    st.metric("Std Deviation", f"{st.session_state.performance_metrics['mae'].std():.4f}")
+        else:
+            st.info("🔄 Collecting data... Performance metrics will appear after 10 evaluations")
+    with tab3:
+        st.header("Power Usage Patterns & Cycles")
+        st.markdown("*Understanding power consumption patterns across different regions and time periods*")
+        if len(st.session_state.history) > 0:
+            # Prepare data
+            cycle_data = st.session_state.history.copy()
+            cycle_data["datetime"] = pd.to_datetime(cycle_data["timestamp"])
+            cycle_data["hour"] = cycle_data["datetime"].dt.hour
+            cycle_data["day_of_week"] = cycle_data["datetime"].dt.day_name()
+            cycle_data["is_weekend"] = cycle_data["datetime"].dt.weekday >= 5
+            # Hourly patterns by region
+            st.subheader("⏰ 24-Hour Usage Patterns by Region")
+            hourly_usage = cycle_data.groupby(['region', 'hour'])['power_consumption_kwh'].mean().reset_index()
+            fig = px.line(
+                hourly_usage,
+                x='hour',
+                y='power_consumption_kwh',
+                color='region',
+                title='Average Power Consumption Throughout the Day',
+                labels={
+                    'hour': 'Hour of Day (24-hour format)',
+                    'power_consumption_kwh': 'Average Power Consumption (kWh)',
+                    'region': 'Region'
+                }
+            )
+            fig.update_layout(
+                xaxis=dict(tickmode='linear', tick0=0, dtick=2),
+                hovermode='x unified',
+                height=400
+            )
+            # Add annotations for typical usage periods
+            fig.add_vrect(x0=6, x1=9, fillcolor="yellow", opacity=0.2, annotation_text="Morning Peak")
+            fig.add_vrect(x0=17, x1=21, fillcolor="orange", opacity=0.2, annotation_text="Evening Peak")
+            fig.add_vrect(x0=22, x1=6, fillcolor="blue", opacity=0.1, annotation_text="Night/Low Usage")
+            st.plotly_chart(fig, use_container_width=True)
+            # Usage insights
+            st.subheader("🔍 Usage Insights")
+            col1, col2 = st.columns(2)
+            with col1:
+                st.markdown("**📊 Regional Summary**")
+                regional_stats = cycle_data.groupby('region')['power_consumption_kwh'].agg([
+                    'mean', 'std', 'min', 'max', 'count'
+                ]).round(3)
+                regional_stats.columns = ['Avg (kWh)', 'Std Dev', 'Min (kWh)', 'Max (kWh)', 'Data Points']
+                st.dataframe(regional_stats, use_container_width=True)
+            with col2:
+                st.markdown("**⏰ Peak Usage Times**")
+                # Find peak hours for each region
+                peak_hours = hourly_usage.loc[hourly_usage.groupby('region')['power_consumption_kwh'].idxmax()]
+                peak_display = peak_hours[['region', 'hour', 'power_consumption_kwh']].copy()
+                peak_display.columns = ['Region', 'Peak Hour', 'Peak Usage (kWh)']
+                peak_display['Peak Hour'] = peak_display['Peak Hour'].apply(lambda x: f"{x:02d}:00")
+                peak_display['Peak Usage (kWh)'] = peak_display['Peak Usage (kWh)'].round(3)
+                st.dataframe(peak_display.set_index('Region'), use_container_width=True)
+            # Weekend vs Weekday comparison
+            st.subheader("📅 Weekend vs Weekday Usage")
+            weekend_comparison = cycle_data.groupby(['region', 'is_weekend'])['power_consumption_kwh'].mean().reset_index()
+            weekend_comparison['period'] = weekend_comparison['is_weekend'].map({True: 'Weekend', False: 'Weekday'})
+            fig_weekend = px.bar(
+                weekend_comparison,
+                x='region',
+                y='power_consumption_kwh',
+                color='period',
+                title='Average Power Consumption: Weekday vs Weekend',
+                labels={
+                    'region': 'Region',
+                    'power_consumption_kwh': 'Average Power Consumption (kWh)'
+                },
+                barmode='group'
+            )
+            fig_weekend.update_layout(height=400)
+            st.plotly_chart(fig_weekend, use_container_width=True)
+            # Property type patterns
+            if 'property_type' in cycle_data.columns:
+                st.subheader("🏠 Property Type Usage Patterns")
+                prop_patterns = cycle_data.groupby(['property_type', 'hour'])['power_consumption_kwh'].mean().reset_index()
+                fig_prop = px.line(
+                    prop_patterns,
+                    x='hour',
+                    y='power_consumption_kwh',
+                    color='property_type',
+                    title='Usage Patterns by Property Type',
+                    labels={
+                        'hour': 'Hour of Day',
+                        'power_consumption_kwh': 'Average Power Consumption (kWh)',
+                        'property_type': 'Property Type'
+                    }
+                )
+                fig_prop.update_layout(
+                    xaxis=dict(tickmode='linear', tick0=0, dtick=2),
+                    height=400
+                )
+                st.plotly_chart(fig_prop, use_container_width=True)
+        else:
+            st.info("📊 Collecting usage data... Patterns will appear as data accumulates")
 else:
+    st.success("✅ All data processed successfully!")
+    # Final summary
+    if len(st.session_state.history) > 0:
+        st.balloons()
+        st.header("📋 Processing Summary")
+        col1, col2, col3, col4 = st.columns(4)
+        with col1:
+            st.metric("Total Records", len(st.session_state.history))
+        with col2:
+            st.metric("Regions Covered", st.session_state.history['region'].nunique())
+        with col3:
+            st.metric("Property Types", st.session_state.history['property_type'].nunique())
+        with col4:
+            st.metric("Performance Evaluations", st.session_state.evaluation_count)
+# Enhanced debug sidebar
+with st.sidebar:
+    st.divider()
+    if st.checkbox("🔧 Show Debug Details"):
+        st.write("**Data Status:**")
+        st.write(f"- History shape: {st.session_state.history.shape}")
+        st.write(f"- Temp predictions: {len(st.session_state.temp_predictions)}")
+        st.write(f"- Temp actuals: {len(st.session_state.temp_actuals)}")
+        if not st.session_state.history.empty:
+            st.write("**Latest Record:**")
+            latest = st.session_state.history.iloc[-1]
+            st.json({
+                "region": latest.get('region', 'N/A'),
+                "property_type": latest.get('property_type', 'N/A'),
+                "power_consumption": f"{latest.get('power_consumption_kwh', 0):.3f} kWh",
+                "timestamp": str(latest.get('timestamp', 'N/A'))
+            })