import os import time import numpy as np import pandas as pd import streamlit as st from stable_baselines3 import PPO from stable_baselines3.common.vec_env import DummyVecEnv, VecNormalize from smart_grid_env import SmartGridEnv # ── Page config ─────────────────────────────────────────────────────────────── st.set_page_config(page_title="Smart Grid AI Control", layout="wide") st.title("Smart Grid Energy Management System") st.markdown("### PPO Reinforcement Learning Agent vs. Rule-Based Baseline") # ── Sidebar ─────────────────────────────────────────────────────────────────── st.sidebar.header("Simulation Settings") sim_speed = st.sidebar.slider("Speed (sec / step)", 0.05, 2.0, 0.3) agent_choice = st.sidebar.radio( "Agent to run", ["PPO Agent", "Rule-Based Baseline", "Compare Both"], ) run_btn = st.sidebar.button("▶ Start 24-Hour Simulation", type="primary") st.sidebar.markdown("---") st.sidebar.markdown( "**Rule-based logic:** charge when price < 0.20 and battery < 80%, " "discharge when price > 0.40 and battery > 20%, else hold." ) # ── Helpers ─────────────────────────────────────────────────────────────────── def action_to_label(action: int) -> str: return {0: "Hold ⏸", 1: "Charge ⬆", 2: "Discharge ⬇"}.get(action, "?") def rule_based_action(obs: np.ndarray) -> int: """Simple price-threshold rule — useful as a sanity-check baseline.""" battery, solar, demand, price = obs if price < 0.20 and battery < 80.0: return 1 # cheap electricity → charge if price > 0.40 and battery > 20.0: return 2 # expensive electricity → use battery return 0 # hold def load_ppo_model(): """Load trained PPO model + normalisation stats. Returns (model, vec_env) or None.""" if not os.path.exists("ppo_smart_grid.zip"): return None, None try: env = DummyVecEnv([SmartGridEnv]) if os.path.exists("vec_normalize.pkl"): env = VecNormalize.load("vec_normalize.pkl", env) env.training = False env.norm_reward = False model = PPO.load("ppo_smart_grid", env=env) return model, env except Exception as e: st.error(f"Could not load model: {e}") return None, None def run_episode(agent: str, model=None, vec_env=None, speed: float = 0.3): """ Run a single 24-step episode and return a DataFrame of step-level data. agent: 'ppo' | 'rule' """ raw_env = SmartGridEnv() obs_raw, _ = raw_env.reset() # PPO uses the normalised vec_env; rule-based uses raw env directly if agent == "ppo" and vec_env is not None: obs_vec = vec_env.reset() records = [] total_cost = 0.0 live_battery = st.empty() live_price = st.empty() live_cost = st.empty() live_chart = st.empty() for step in range(24): # ---- Pick action ---- if agent == "ppo" and model is not None: action_arr, _ = model.predict(obs_vec, deterministic=True) action = int(action_arr.item()) # numpy → plain int obs_vec, _, _, _ = vec_env.step(action_arr) # Step the raw env with the same action to get proper info dict obs_raw, reward, terminated, _, info = raw_env.step(action) else: action = rule_based_action(obs_raw) obs_raw, reward, terminated, _, info = raw_env.step(action) cost = info["cost"] total_cost += cost battery = info["battery_soc"] solar = info["solar_kw"] demand = info["demand_kw"] price = info["price"] # ---- Live metrics ---- col1, col2, col3 = live_battery, live_price, live_cost live_battery.metric("🔋 Battery SoC", f"{battery:.1f} %", action_to_label(action)) live_price.metric( "💲 Grid Price", f"${price:.3f}/kWh") live_cost.metric( "💰 Running Cost", f"${total_cost:.2f}", delta_color="inverse") records.append({ "Hour": step + 1, "Battery (%)": round(battery, 2), "Solar (kW)": round(solar, 2), "Demand (kW)": round(demand, 2), "Price ($/kWh)": round(price, 3), "Step Cost ($)": round(cost, 3), "Action": action_to_label(action), }) # ---- Live chart (updates every step) ---- df_so_far = pd.DataFrame(records).set_index("Hour") live_chart.line_chart( df_so_far[["Battery (%)", "Solar (kW)", "Demand (kW)"]], height=250, ) time.sleep(speed) raw_env.close() return pd.DataFrame(records), total_cost def show_results(df: pd.DataFrame, total_cost: float, label: str): st.success(f"**{label}** — Total 24-hour cost: **${total_cost:.2f}**") col_a, col_b = st.columns(2) with col_a: st.subheader("Hourly step cost ($)") st.bar_chart(df.set_index("Hour")[["Step Cost ($)"]], height=220) with col_b: st.subheader("Battery, solar and demand") st.line_chart( df.set_index("Hour")[["Battery (%)", "Solar (kW)", "Demand (kW)"]], height=220, ) with st.expander("📋 Full step-by-step log"): st.dataframe(df, use_container_width=True) # ── Main simulation ─────────────────────────────────────────────────────────── if run_btn: model, vec_env = load_ppo_model() ppo_available = model is not None if not ppo_available and agent_choice in ("PPO Agent", "Compare Both"): st.warning( "ppo_smart_grid.zip not found — run `python train.py` first. " "Falling back to rule-based agent." ) # ---- PPO only ---- if agent_choice == "PPO Agent": st.markdown("### PPO Agent") agent = "ppo" if ppo_available else "rule" df, cost = run_episode(agent, model, vec_env, sim_speed) show_results(df, cost, "PPO Agent" if ppo_available else "Rule-Based (fallback)") # ---- Rule-based only ---- elif agent_choice == "Rule-Based Baseline": st.markdown("### Rule-Based Baseline") df, cost = run_episode("rule", speed=sim_speed) show_results(df, cost, "Rule-Based Baseline") # ---- Compare both ---- else: tab_ppo, tab_rule = st.tabs([" PPO Agent", " Rule-Based Baseline"]) with tab_ppo: st.markdown("#### PPO Agent — running...") agent = "ppo" if ppo_available else "rule" df_ppo, cost_ppo = run_episode(agent, model, vec_env, sim_speed) show_results(df_ppo, cost_ppo, "PPO Agent" if ppo_available else "Rule-Based (fallback)") with tab_rule: st.markdown("#### Rule-Based Baseline — running...") df_rule, cost_rule = run_episode("rule", speed=sim_speed) show_results(df_rule, cost_rule, "Rule-Based Baseline") # ---- Side-by-side cost summary ---- st.markdown("---") st.subheader("Cost comparison") c1, c2, c3 = st.columns(3) c1.metric("PPO total cost", f"${cost_ppo:.2f}") c2.metric("Rule-based total cost", f"${cost_rule:.2f}") saving = cost_rule - cost_ppo c3.metric("PPO saving", f"${saving:.2f}", delta=f"{'better' if saving > 0 else 'worse'} than rule-based", delta_color="normal" if saving > 0 else "inverse") else: st.info("Configure settings in the sidebar and click **▶ Start 24-Hour Simulation**.") st.markdown(""" #### How it works | Component | Detail | |---|---| | Environment | 24-step episode (1 step = 1 hour) | | Observation | Battery SoC, solar generation, house demand, grid price | | Actions | Hold / Charge from grid / Discharge battery | | Reward | Negative net grid cost (includes solar sell-back revenue) | | Agent | PPO with MLP policy, trained via `stable-baselines3` | | Baseline | Simple price-threshold rule for comparison | """)