Spaces:

LeemahLee
/

Energy_Forecast

Sleeping

App Files Files Community

Energy_Forecast / app.py

LeemahLee

Removing the clock

c6a9326 verified 9 months ago

raw

history blame contribute delete

19.4 kB

	import os
	import time
	import pandas as pd
	import numpy as np
	import joblib
	import glob
	import requests
	import streamlit as st
	from streamlit_autorefresh import st_autorefresh
	import plotly.express as px
	import plotly.graph_objects as go
	from datetime import datetime
	from pytz import timezone
	from sklearn.metrics import mean_squared_error, mean_absolute_error
	from scipy.stats.mstats import winsorize

	# === Streamlit Setup ===
	st.set_page_config(page_title="Energy Forecast Dashboard", layout="wide", page_icon="⚡")
	st_autorefresh(interval=5000, key="auto_refresh")
	st.title(":zap: Real-Time Power Forecast")
	st.markdown("Smarter Energy Monitoring for Smarter Grids")

	# === Load Model ===
	@st.cache_resource
	def load_model():
	return joblib.load("lightgbm_energy_model_full2.pkl")


	model = load_model()

	# === Supabase Config ===
	SUPABASE_URL = os.environ.get("SUPABASE_URL")
	SUPABASE_API_KEY = os.environ.get("SUPABASE_API_KEY")
	TABLE = "smart_meter_readings_1year"

	HEADERS = {
	"apikey": SUPABASE_API_KEY,
	"Authorization": f"Bearer {SUPABASE_API_KEY}"
	}
	# === Pull Latest Data (Today Only) ===
	@st.cache_data(ttl=300)
	def fetch_today_data():
	today = pd.Timestamp.now(tz=timezone("Europe/London")).strftime("%Y-%m-%d")
	url = f"{SUPABASE_URL}/rest/v1/{TABLE}?select=*&timestamp=gte.{today}T00:00:00&timestamp=lt.{today}T23:59:59&order=timestamp.asc"
	r = requests.get(url, headers=HEADERS)
	if r.ok:
	df = pd.DataFrame(r.json())
	df['timestamp'] = pd.to_datetime(df['timestamp'])
	return df
	else:
	st.error(f"Failed to fetch data: {r.status_code}")
	return pd.DataFrame()

	df = fetch_today_data()
	df = df.dropna(subset=['region', 'property_type', 'power_consumption_kwh'])

	# === Feature Engineering ===
	def prepare_features(df):

	if df.empty:
	return df
	df = df.copy()
	df['hour'] = df['timestamp'].dt.hour
	df['minute_of_day'] = df['hour'] * 60 + df['timestamp'].dt.minute
	df['day_of_week'] = df['timestamp'].dt.dayofweek
	df['is_weekend'] = (df['day_of_week'] >= 5).astype(int)

	df['sin_hour'] = np.sin(2 * np.pi * df['hour'] / 24)
	df['cos_hour'] = np.cos(2 * np.pi * df['hour'] / 24)
	df['sin_30min'] = np.sin(2 * np.pi * df['minute_of_day'] / 1440)
	df['cos_30min'] = np.cos(2 * np.pi * df['minute_of_day'] / 1440)

	df['lag_30min'] = df['power_consumption_kwh'].shift(1)
	df['lag_1hr'] = df['power_consumption_kwh'].shift(2)
	df['lag_90min'] = df['power_consumption_kwh'].shift(3)
	df['lag_2hr'] = df['power_consumption_kwh'].shift(4)
	df['lag_mean_3'] = df['power_consumption_kwh'].shift(1).rolling(2).mean()
	df['lag_mean_6'] = df['power_consumption_kwh'].shift(1).rolling(4).mean()
	df['rolling_std_6'] = df['power_consumption_kwh'].rolling(6).std()

	df['interaction_lag_voltage'] = df['lag_1hr'] * df['voltage']

	df = pd.get_dummies(df, columns=['property_type', 'region'], drop_first=False)
	for col in ['property_type_residential', 'property_type_commercial',
	'region_north', 'region_south', 'region_east', 'region_west']:
	if col not in df.columns:
	df[col] = 0

	df.fillna(method='ffill', inplace=True)
	df.dropna(inplace=True)

	expected = [
	'voltage', 'cos_hour', 'sin_hour', 'is_weekend', 'minute_of_day', 'cos_30min',
	'humidity_pct', 'lag_90min', 'temperature_c', 'lag_2hr', 'lag_mean_6', 'lag_mean_3',
	'lag_1hr', 'rolling_std_6', 'lag_30min', 'sin_30min', 'interaction_lag_voltage',
	'day_of_week', 'property_type_commercial', 'property_type_residential',
	'region_north', 'region_south', 'region_east', 'region_west'
	]

	return df[['timestamp'] + expected]

	# === Predict ===
	df_features = prepare_features(df)
	feature_cols = [col for col in df_features.columns if col != 'timestamp']
	expected_features = feature_cols # Save correct order of features
	df_features['predicted_kwh'] = model.predict(df_features[feature_cols])


	# === Helper Function for Future Forecasting ===
	def forecast_future_usage(last_row, model, steps=[30, 60, 90]):
	preds = {}
	forecast_times = []
	prev_preds = [
	last_row['lag_30min'],
	last_row['lag_1hr'],
	last_row['lag_90min']
	]

	for step in steps:
	future_time = last_row["timestamp"] + pd.Timedelta(minutes=step)
	row = last_row.copy()
	row["timestamp"] = future_time
	row["hour"] = future_time.hour
	row["minute_of_day"] = row["hour"] * 60 + future_time.minute
	row["day_of_week"] = future_time.dayofweek
	row["is_weekend"] = int(future_time.dayofweek >= 5)
	row["sin_hour"] = np.sin(2 * np.pi * row["hour"] / 24)
	row["cos_hour"] = np.cos(2 * np.pi * row["hour"] / 24)
	row["sin_30min"] = np.sin(2 * np.pi * row["minute_of_day"] / 1440)
	row["cos_30min"] = np.cos(2 * np.pi * row["minute_of_day"] / 1440)

	row["lag_30min"] = prev_preds[-1]
	row["lag_1hr"] = prev_preds[-2] if len(prev_preds) >= 2 else prev_preds[-1]
	row["lag_90min"] = prev_preds[-3] if len(prev_preds) >= 3 else prev_preds[-1]
	row["lag_mean_3"] = np.mean(prev_preds[-3:])
	row["lag_mean_6"] = np.mean(prev_preds[-3:] + [last_row["lag_mean_3"], last_row["lag_mean_6"]])


	row_features = row[expected_features].values.reshape(1, -1)
	# row_features = row[expected].values.reshape(1, -1)
	pred = model.predict(row_features)[0]

	preds[f"{step} min"] = pred
	prev_preds.append(pred)

	return preds, forecast_times

	# === Tabs ===
	tabs = st.tabs(["🔮 Regional Forecast", "📊 Performance Monitor", "📈 Usage Pattern", "🚗🌞 Electric Vehicle & Solar"])

	# === Regional Forecast Tab ==

	with tabs[0]:
	st.subheader("Regional Forecast for Next Hours")

	# Region filter with bold styling
	st.markdown("<h4 style='margin-bottom: 0;'>📍 <b>Select a Region</b></h4>", unsafe_allow_html=True)
	selected_region = st.selectbox(" ", ['south', 'east', 'north', 'west'])

	for ptype in ['residential', 'commercial']:
	temp_df = df_features.copy()
	region_col = f'region_{selected_region}'
	ptype_col = f'property_type_{ptype}'

	st.markdown(f"#### {selected_region.capitalize()} - {ptype.capitalize()}")

	if region_col in temp_df.columns and ptype_col in temp_df.columns:
	temp_df = temp_df[(temp_df[region_col] == 1) & (temp_df[ptype_col] == 1)]
	else:
	st.warning(f"No data available for Region: {selected_region} and Property: {ptype}")
	continue

	if temp_df.empty or temp_df.shape[0] < 2:
	st.info(f"Not enough data for {ptype} properties in {selected_region} region")
	continue

	# Sort and get latest row
	temp_df = temp_df.sort_values("timestamp")
	# latest_row = temp_df.iloc[-1].copy()
	latest_row = temp_df.iloc[-1].copy()

	# Make latest_row['timestamp'] timezone-aware if needed
	if latest_row['timestamp'].tzinfo is None:
	latest_row['timestamp'] = latest_row['timestamp'].tz_localize("Europe/London")

	now = pd.Timestamp.now(tz="Europe/London")

	# Skip forecast if data is too old
	if (now - latest_row['timestamp']) > pd.Timedelta(hours=1):
	st.warning(f"Last data point for {ptype} in {selected_region} is too old (at {latest_row['timestamp']:%H:%M}). Skipping forecast.")
	continue


	# Forecast future with both predictions and times
	# preds = {}
	# forecast_times = []
	# current_time = latest_row['timestamp']
	# features = latest_row.drop(labels=['timestamp', 'predicted_kwh'])

	# for i, minutes_ahead in enumerate([30, 60, 90, 120]):
	# forecast_time = (current_time + pd.Timedelta(minutes=minutes_ahead)).strftime('%H:%M')
	# prediction = model.predict([features])[0]
	# preds[f"{minutes_ahead} min"] = prediction
	# forecast_times.append(forecast_time)

	# # Display metrics with time below each forecast
	# cols = st.columns(len(preds))
	# for i, (label, value) in enumerate(preds.items()):
	# with cols[i]:
	# st.metric(label=label, value=f"{value:.2f} kWh")
	# st.markdown(
	# f"""
	# <div style='text-align:center; font-size:15px;'>
	# <span style='color:#FF0000; font-weight:bold;'>⏰ {forecast_times[i]}</span>
	# </div>
	# """,
	# unsafe_allow_html=True
	# )

	preds = {}
	features = latest_row.drop(labels=['timestamp', 'predicted_kwh'])

	for minutes_ahead in [30, 60, 90, 120]:
	prediction = model.predict([features])[0]
	preds[f"{minutes_ahead} min"] = prediction

	# Display metrics only (no clock/timestamp)
	cols = st.columns(len(preds))
	for i, (label, value) in enumerate(preds.items()):
	with cols[i]:
	st.metric(label=label, value=f"{value:.2f} kWh")


	# Line chart of recent trend
	fig = px.line(temp_df, x='timestamp', y='predicted_kwh',
	title=f"{selected_region.capitalize()} {ptype.capitalize()} Forecast")
	st.plotly_chart(fig, use_container_width=True)

	# === Performance Monitor ===
	with tabs[1]:
	st.subheader("Prediction Performance Monitor")

	df_eval = df_features.copy()
	df_eval = df_eval.merge(df[['timestamp', 'power_consumption_kwh']], on='timestamp', how='left')
	df_eval.rename(columns={'power_consumption_kwh': 'actual'}, inplace=True)

	# === Error Metrics ===
	mse = mean_squared_error(df_eval['actual'], df_eval['predicted_kwh'])
	rmse = np.sqrt(mse)
	mae = mean_absolute_error(df_eval['actual'], df_eval['predicted_kwh'])

	st.metric("RMSE", f"{rmse:.3f} kWh")
	st.metric("MAE", f"{mae:.3f} kWh")

	# === Hourly RMSE Chart ===
	# Ensure 'hour' is present
	df_eval['hour'] = df_eval['timestamp'].dt.hour
	hourly_rmse = df_eval.groupby('hour').apply(
	lambda x: np.sqrt(mean_squared_error(x['actual'], x['predicted_kwh']))
	).reset_index(name='rmse')

	fig = px.bar(hourly_rmse, x='hour', y='rmse', title='Hourly RMSE')
	st.plotly_chart(fig, use_container_width=True)

	# === Latency Placeholders ===
	st.metric("Prediction Latency", "89 ms")
	st.metric("API Fetch Latency", "170 ms")

	# === ⚠ Drift Detection
	if rmse > 0.8:
	st.warning("⚠️ Model performance has drifted: RMSE above 0.8 kWh!")
	else:
	st.success("✅ Model performance is within expected range.")

	missing_info = df[df[['region', 'property_type']].isnull().any(axis=1)]
	if not missing_info.empty:
	st.warning(f"{len(missing_info)} rows had missing region or property_type and were excluded from prediction.")



	# === Compute Error and Flag Anomalies ===
	df_eval = df_features.copy()
	df_eval = df_eval.merge(df[['timestamp', 'power_consumption_kwh']], on='timestamp', how='left')
	df_eval.rename(columns={'power_consumption_kwh': 'actual'}, inplace=True)

	# Compute error
	df_eval['error'] = np.abs(df_eval['actual'] - df_eval['predicted_kwh'])

	# Flag anomalies
	threshold = df_eval['error'].quantile(0.99)
	df_eval['is_anomaly'] = df_eval['error'] > threshold


	# Plot anomalies
	fig = px.scatter(
	df_eval,
	x='timestamp',
	y='error',
	color='is_anomaly',
	title="Prediction Errors and Anomalies",
	color_discrete_map={True: 'red', False: 'green'},
	labels={"error": "Absolute Error (kWh)"})
	st.plotly_chart(fig, use_container_width=True)


	# === Usage Pattern ===
	with tabs[2]:
	st.subheader(" Usage Pattern")

	# Filter today's data only
	df_today_eval = df_features.merge(df[['timestamp', 'power_consumption_kwh']], on='timestamp', how='left')
	df_today_eval.rename(columns={'power_consumption_kwh': 'actual'}, inplace=True)
	df_today_eval['hour'] = df_today_eval['timestamp'].dt.strftime("%H:%M")

	# Rolling 30-min average (optional smoothing)
	df_today_eval['actual_rolling'] = df_today_eval['actual'].rolling(2).mean()
	df_today_eval['predicted_rolling'] = df_today_eval['predicted_kwh'].rolling(2).mean()

	# Plotting hourly line chart
	fig_hourly = go.Figure()

	fig_hourly.add_trace(go.Scatter(
	x=df_today_eval['timestamp'], y=df_today_eval['actual_rolling'],
	mode='lines',
	name='Actual',
	line=dict(color='indigo', width=2)
	))

	fig_hourly.add_trace(go.Scatter(
	x=df_today_eval['timestamp'], y=df_today_eval['predicted_rolling'],
	mode='lines',
	name='Predicted',
	line=dict(color='crimson', width=2,)
	))

	fig_hourly.update_layout(
	title="⏱️ Hourly Trend for Today: Predicted vs Actual",
	xaxis_title="Time",
	yaxis_title="kWh",
	xaxis=dict(tickformat="%H:%M", showgrid=True),
	legend_title="Legend",
	template="plotly_white"
	)

	st.plotly_chart(fig_hourly, use_container_width=True)

	# === Hourly Usage by Region ===
	if not df.empty:
	df['hour'] = df['timestamp'].dt.hour

	if 'region' in df.columns:
	avg_by_region = df.groupby(['hour', 'region'])['power_consumption_kwh'].mean().reset_index()
	fig = px.line(
	avg_by_region, x='hour', y='power_consumption_kwh',
	color='region', title="Average Hourly Usage by Region", markers=True
	)
	st.plotly_chart(fig, use_container_width=True)
	else:
	st.warning("Region information not available in raw data.")

	# === Daily Comparison Logging ===
	def update_daily_comparison_log(df_features, df_raw):
	today = df_features['timestamp'].dt.date.iloc[0]
	predicted_total = df_features['predicted_kwh'].sum()
	actual_total = df_raw['power_consumption_kwh'].sum()

	daily_df = pd.DataFrame([{
	'date': today,
	'predicted_kwh': predicted_total,
	'actual_kwh': actual_total
	}])

	if os.path.exists("daily_comparison_log.csv"):
	existing = pd.read_csv("daily_comparison_log.csv")
	existing['date'] = pd.to_datetime(existing['date']).dt.date
	if today not in existing['date'].values:
	updated = pd.concat([existing, daily_df], ignore_index=True)
	updated.to_csv("daily_comparison_log.csv", index=False)
	else:
	daily_df.to_csv("daily_comparison_log.csv", index=False)

	update_daily_comparison_log(df_features, df)

	# === 1. Log timestamp-level data for today ===

	log_dir = "logs"
	os.makedirs(log_dir, exist_ok=True)

	today_str = pd.Timestamp.now(tz=timezone("Europe/London")).strftime("%Y-%m-%d")
	log_path = os.path.join(log_dir, f"{today_str}.csv")

	df_eval = df_features.merge(df[['timestamp', 'power_consumption_kwh']], on='timestamp', how='left')
	df_eval.rename(columns={'power_consumption_kwh': 'actual'}, inplace=True)

	df_log = df_eval[['timestamp', 'actual', 'predicted_kwh']].copy()
	df_log['date'] = df_log['timestamp'].dt.date
	df_log['hour'] = df_log['timestamp'].dt.strftime("%H:%M")

	df_log.to_csv(log_path, index=False)

	# === 2. Read all logs and concatenate ===
	log_files = sorted(glob.glob(f"{log_dir}/*.csv"))
	full_df = pd.concat([pd.read_csv(f) for f in log_files], ignore_index=True)

	# === 3. Clean up and format ===
	full_df['timestamp'] = pd.to_datetime(full_df['timestamp'])
	full_df['hour'] = pd.to_datetime(full_df['hour'], format="%H:%M").dt.time
	full_df['date'] = pd.to_datetime(full_df['date'])

	# === 4. Plot all days together ===
	fig = go.Figure()

	for date in full_df['date'].dt.date.unique():
	sub = full_df[full_df['date'].dt.date == date]

	fig.add_trace(go.Scatter(
	x=sub['timestamp'], y=sub['actual'],
	mode='lines',
	name=f"{date} - Actual",
	line=dict(color='#008080', width=2)
	))

	fig.add_trace(go.Scatter(
	x=sub['timestamp'], y=sub['predicted_kwh'],
	mode='lines',
	name=f"{date} - Predicted",
	line=dict(color='#8B0000', width=2)
	))

	fig.update_layout(
	title="📈 Hourly Trend: Actual vs Predicted Energy Consumption",
	xaxis_title="Timestamp",
	yaxis_title="kWh",
	legend_title="Legend",
	xaxis=dict(tickformat="%H:%M", showgrid=True),
	template="plotly_white"
	)
	st.plotly_chart(fig, use_container_width=True)


	missing_info = df[df[['region', 'property_type']].isnull().any(axis=1)]
	if not missing_info.empty:
	st.warning(f"{len(missing_info)} rows had missing region or property_type and were excluded from prediction.")


	# === EV & Solar Insight ===

	with tabs[3]:
	st.subheader("EV & Solar Insights")

	ev_df = df[df['ev_owner'] == 1]
	solar_df = df[df['solar_installed'] == 1]

	ev_avg = ev_df['power_consumption_kwh'].mean()
	solar_avg = solar_df['power_consumption_kwh'].mean()

	ev_peak_hour = ev_df['timestamp'].dt.hour.value_counts().idxmax() if not ev_df.empty else "N/A"
	solar_peak_hour = solar_df['timestamp'].dt.hour.value_counts().idxmax() if not solar_df.empty else "N/A"

	col1, col2 = st.columns(2)
	with col1:
	st.metric("EV Household Avg Usage (kWh)", f"{ev_avg:.2f}")
	st.markdown(f"🚗 Peak Usage Hour: {ev_peak_hour}:00")
	if ev_avg > df['power_consumption_kwh'].mean():
	st.warning("⚠ Electric Vehicle consume above average energy!")

	with col2:
	st.metric("Solar Household Avg Usage (kWh)", f"{solar_avg:.2f}")
	st.markdown(f"🌞 Peak Usage Hour: {solar_peak_hour}:00")
	if solar_avg > df['power_consumption_kwh'].mean():
	st.warning("⚠ Solar homes also show high consumption.")

	st.markdown("### 🔍 Distribution Analysis")

	col3, col4 = st.columns(2)
	with col3:
	fig_ev = px.box(ev_df, y='power_consumption_kwh', points="outliers",
	title="EV Owners: Power Usage Box Plot")
	st.plotly_chart(fig_ev, use_container_width=True)

	with col4:
	fig_solar = px.box(solar_df, y='power_consumption_kwh', points="outliers",
	title="Solar Homes: Power Usage Box Plot")
	st.plotly_chart(fig_solar, use_container_width=True)

	st.markdown("### 📈 Trend Over Time")

	# Filter only rows where either EV or Solar is installed
	df_combo = df[df['ev_owner'] + df['solar_installed'] > 0].copy()

	# Create label per row
	def get_label(row):
	if row['ev_owner'] and row['solar_installed']:
	return "EV + Solar"
	elif row['ev_owner']:
	return "EV Only"
	elif row['solar_installed']:
	return "Solar Only"
	return "Other"

	df_combo['label'] = df_combo.apply(get_label, axis=1)

	# Plot
	fig_trend = px.line(
	df_combo,
	x='timestamp',
	y='power_consumption_kwh',
	color='label',
	title="Energy Usage Trend: EV & Solar Homes"
	)
	st.plotly_chart(fig_trend, use_container_width=True)