Spaces:

chinmay0805
/

Smart-Grid-System

Running

App Files Files Community

chinmay0805 commited on Apr 21

Commit

06f9287

0 Parent(s):

inital commit

Browse files

Files changed (4) hide show

.gitignore +23 -0
app.py +226 -0
smart_grid_env.py +239 -0
train.py +133 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,23 @@

+# Python Cache
+__pycache__/
+*.py[cod]
+*$py.class
+# Virtual Environment
+venv/
+# Model Checkpoints & Artifacts
+best_model/
+checkpoints/
+*.zip
+# Logs and Evaluations
+eval_logs/
+tb_logs/
+*.npz
+# Pickled Data (e.g., normalization vectors)
+*.pkl
+# IDE / Editor
+.vscode/

app.py ADDED Viewed

	@@ -0,0 +1,226 @@

+"""
+app.py — Streamlit dashboard for SmartGridEnv PPO agent
+Fixes vs. original:
+    - Loads VecNormalize stats (vec_normalize.pkl) alongside the PPO model
+      so observations are correctly normalised at inference time
+    - int(action.item()) fixes numpy array comparison in action_to_text()
+    - Added a rule-based baseline agent for comparison
+    - Richer charts: cost-per-hour bar chart + solar/demand/battery area chart
+    - Step-level info table logged per episode
+    - Graceful error handling throughout
+"""
+import os
+import time
+import numpy as np
+import pandas as pd
+import streamlit as st
+from stable_baselines3 import PPO
+from stable_baselines3.common.vec_env import DummyVecEnv, VecNormalize
+from smart_grid_env import SmartGridEnv
+# ── Page config ───────────────────────────────────────────────────────────────
+st.set_page_config(page_title="Smart Grid AI Control", layout="wide")
+st.title("Smart Grid Energy Management System")
+st.markdown("### PPO Reinforcement Learning Agent vs. Rule-Based Baseline")
+# ── Sidebar ───────────────────────────────────────────────────────────────────
+st.sidebar.header("Simulation Settings")
+sim_speed   = st.sidebar.slider("Speed (sec / step)", 0.05, 2.0, 0.3)
+agent_choice = st.sidebar.radio(
+    "Agent to run",
+    ["PPO Agent", "Rule-Based Baseline", "Compare Both"],
+)
+run_btn = st.sidebar.button("▶ Start 24-Hour Simulation", type="primary")
+st.sidebar.markdown("---")
+st.sidebar.markdown(
+    "**Rule-based logic:** charge when price < 0.20 and battery < 80%, "
+    "discharge when price > 0.40 and battery > 20%, else hold."
+)
+# ── Helpers ───────────────────────────────────────────────────────────────────
+def action_to_label(action: int) -> str:
+    return {0: "Hold ⏸", 1: "Charge ⬆", 2: "Discharge ⬇"}.get(action, "?")
+def rule_based_action(obs: np.ndarray) -> int:
+    """Simple price-threshold rule — useful as a sanity-check baseline."""
+    battery, solar, demand, price = obs
+    if price < 0.20 and battery < 80.0:
+        return 1   # cheap electricity → charge
+    if price > 0.40 and battery > 20.0:
+        return 2   # expensive electricity → use battery
+    return 0       # hold
+def load_ppo_model():
+    """Load trained PPO model + normalisation stats. Returns (model, vec_env) or None."""
+    if not os.path.exists("ppo_smart_grid.zip"):
+        return None, None
+    try:
+        env = DummyVecEnv([SmartGridEnv])
+        if os.path.exists("vec_normalize.pkl"):
+            env = VecNormalize.load("vec_normalize.pkl", env)
+            env.training    = False
+            env.norm_reward = False
+        model = PPO.load("ppo_smart_grid", env=env)
+        return model, env
+    except Exception as e:
+        st.error(f"Could not load model: {e}")
+        return None, None
+def run_episode(agent: str, model=None, vec_env=None, speed: float = 0.3):
+    """
+    Run a single 24-step episode and return a DataFrame of step-level data.
+    agent: 'ppo' | 'rule'
+    """
+    raw_env = SmartGridEnv()
+    obs_raw, _ = raw_env.reset()
+    # PPO uses the normalised vec_env; rule-based uses raw env directly
+    if agent == "ppo" and vec_env is not None:
+        obs_vec = vec_env.reset()
+    records = []
+    total_cost = 0.0
+    live_battery = st.empty()
+    live_price   = st.empty()
+    live_cost    = st.empty()
+    live_chart   = st.empty()
+    for step in range(24):
+        # ---- Pick action ----
+        if agent == "ppo" and model is not None:
+            action_arr, _ = model.predict(obs_vec, deterministic=True)
+            action = int(action_arr.item())        # numpy → plain int
+            obs_vec, _, _, _ = vec_env.step(action_arr)
+            # Step the raw env with the same action to get proper info dict
+            obs_raw, reward, terminated, _, info = raw_env.step(action)
+        else:
+            action = rule_based_action(obs_raw)
+            obs_raw, reward, terminated, _, info = raw_env.step(action)
+        cost = info["cost"]
+        total_cost += cost
+        battery = info["battery_soc"]
+        solar   = info["solar_kw"]
+        demand  = info["demand_kw"]
+        price   = info["price"]
+        # ---- Live metrics ----
+        col1, col2, col3 = live_battery, live_price, live_cost
+        live_battery.metric("🔋 Battery SoC",  f"{battery:.1f} %",  action_to_label(action))
+        live_price.metric(  "💲 Grid Price",    f"${price:.3f}/kWh")
+        live_cost.metric(   "💰 Running Cost",  f"${total_cost:.2f}", delta_color="inverse")
+        records.append({
+            "Hour":        step + 1,
+            "Battery (%)": round(battery, 2),
+            "Solar (kW)":  round(solar, 2),
+            "Demand (kW)": round(demand, 2),
+            "Price ($/kWh)": round(price, 3),
+            "Step Cost ($)": round(cost, 3),
+            "Action":      action_to_label(action),
+        })
+        # ---- Live chart (updates every step) ----
+        df_so_far = pd.DataFrame(records).set_index("Hour")
+        live_chart.line_chart(
+            df_so_far[["Battery (%)", "Solar (kW)", "Demand (kW)"]],
+            height=250,
+        )
+        time.sleep(speed)
+    raw_env.close()
+    return pd.DataFrame(records), total_cost
+def show_results(df: pd.DataFrame, total_cost: float, label: str):
+    st.success(f"**{label}** — Total 24-hour cost: **${total_cost:.2f}**")
+    col_a, col_b = st.columns(2)
+    with col_a:
+        st.subheader("Hourly step cost ($)")
+        st.bar_chart(df.set_index("Hour")[["Step Cost ($)"]], height=220)
+    with col_b:
+        st.subheader("Battery, solar and demand")
+        st.line_chart(
+            df.set_index("Hour")[["Battery (%)", "Solar (kW)", "Demand (kW)"]],
+            height=220,
+        )
+    with st.expander("📋 Full step-by-step log"):
+        st.dataframe(df, use_container_width=True)
+# ── Main simulation ───────────────────────────────────────────────────────────
+if run_btn:
+    model, vec_env = load_ppo_model()
+    ppo_available  = model is not None
+    if not ppo_available and agent_choice in ("PPO Agent", "Compare Both"):
+        st.warning(
+            "ppo_smart_grid.zip not found — run `python train.py` first. "
+            "Falling back to rule-based agent."
+        )
+    # ---- PPO only ----
+    if agent_choice == "PPO Agent":
+        st.markdown("### PPO Agent")
+        agent = "ppo" if ppo_available else "rule"
+        df, cost = run_episode(agent, model, vec_env, sim_speed)
+        show_results(df, cost, "PPO Agent" if ppo_available else "Rule-Based (fallback)")
+    # ---- Rule-based only ----
+    elif agent_choice == "Rule-Based Baseline":
+        st.markdown("###  Rule-Based Baseline")
+        df, cost = run_episode("rule", speed=sim_speed)
+        show_results(df, cost, "Rule-Based Baseline")
+    # ---- Compare both ----
+    else:
+        tab_ppo, tab_rule = st.tabs([" PPO Agent", " Rule-Based Baseline"])
+        with tab_ppo:
+            st.markdown("#### PPO Agent — running...")
+            agent = "ppo" if ppo_available else "rule"
+            df_ppo, cost_ppo = run_episode(agent, model, vec_env, sim_speed)
+            show_results(df_ppo, cost_ppo, "PPO Agent" if ppo_available else "Rule-Based (fallback)")
+        with tab_rule:
+            st.markdown("#### Rule-Based Baseline — running...")
+            df_rule, cost_rule = run_episode("rule", speed=sim_speed)
+            show_results(df_rule, cost_rule, "Rule-Based Baseline")
+        # ---- Side-by-side cost summary ----
+        st.markdown("---")
+        st.subheader("Cost comparison")
+        c1, c2, c3 = st.columns(3)
+        c1.metric("PPO total cost",       f"${cost_ppo:.2f}")
+        c2.metric("Rule-based total cost", f"${cost_rule:.2f}")
+        saving = cost_rule - cost_ppo
+        c3.metric("PPO saving",           f"${saving:.2f}",
+                  delta=f"{'better' if saving > 0 else 'worse'} than rule-based",
+                  delta_color="normal" if saving > 0 else "inverse")
+else:
+    st.info("Configure settings in the sidebar and click **▶ Start 24-Hour Simulation**.")
+    st.markdown("""
+    #### How it works
+    | Component | Detail |
+    |---|---|
+    | Environment | 24-step episode (1 step = 1 hour) |
+    | Observation | Battery SoC, solar generation, house demand, grid price |
+    | Actions | Hold / Charge from grid / Discharge battery |
+    | Reward | Negative net grid cost (includes solar sell-back revenue) |
+    | Agent | PPO with MLP policy, trained via `stable-baselines3` |
+    | Baseline | Simple price-threshold rule for comparison |
+    """)

smart_grid_env.py ADDED Viewed

	@@ -0,0 +1,239 @@

+import gymnasium as gym
+import numpy as np
+from gymnasium import spaces
+class SmartGridEnv(gym.Env):
+    """
+    Smart Grid Battery Management Environment (Gymnasium-compatible)
+    Goal: Minimize daily electricity cost by intelligently charging/discharging
+          a home battery, using solar generation, and interacting with the grid.
+    Observation (4 values, all normalized to [0, 1]):
+        [battery_soc, solar_gen_kw, house_demand_kw, grid_price]
+    Action space (Discrete 3):
+        0 = Hold   — do nothing beyond covering net load from grid/solar
+        1 = Charge — buy from grid to fill battery (10 kW rate)
+        2 = Discharge — draw from battery to cover load (10 kW rate)
+    Reward:
+        Negative net cost per step (agent learns to minimize cost).
+        Includes sell-back revenue when solar surplus is fed to grid.
+        Includes a small battery health penalty for extreme SoC operation.
+    Fixes vs. original:
+        - Observation space bounds match actual value ranges
+        - Solar/demand energy balance applied in all action branches
+        - Grid sell-back (feed-in tariff) modeled in HOLD branch
+        - Battery SoC clamped to [0, battery_capacity]
+        - Charging efficiency loss (90%) modeled
+        - Correlated time-series for price/solar/demand (no more i.i.d. jumps)
+        - Battery health penalty for operating near 0% or 100% SoC
+        - render() method added
+        - Fully compatible with check_env() and VecNormalize
+    """
+    metadata = {"render_modes": ["human"]}
+    # Physical constants
+    BATTERY_CAPACITY  = 100.0   # kWh
+    CHARGE_RATE       = 10.0    # kW  (max charge/discharge per step)
+    CHARGE_EFFICIENCY = 0.90    # 90% round-trip efficiency
+    FEED_IN_TARIFF    = 0.50    # sell surplus solar at 50% of grid price
+    SOC_PENALTY_COEF  = 0.005   # small penalty for extreme battery levels
+    # Observation high limits (battery 0-100 %, solar 0-10 kW,
+    # demand 0-10 kW, price 0-1.5 $/kWh to cover double-peak)
+    OBS_HIGH = np.array([100.0, 10.0, 10.0, 1.5], dtype=np.float32)
+    OBS_LOW  = np.zeros(4, dtype=np.float32)
+    def __init__(self, render_mode=None):
+        super().__init__()
+        self.render_mode = render_mode
+        # --- Action / Observation Spaces ---
+        self.action_space = spaces.Discrete(3)
+        self.observation_space = spaces.Box(
+            low=self.OBS_LOW,
+            high=self.OBS_HIGH,
+            shape=(4,),
+            dtype=np.float32,
+        )
+        # Internal state
+        self.current_step    = 0
+        self.current_battery = self.BATTERY_CAPACITY * 0.5
+        self._state          = self._make_initial_state()
+        # For correlated time-series generation
+        self._price_base   = 0.2   # $/kWh (drifts each step)
+        self._demand_base  = 3.0   # kW
+        self._solar_base   = 0.0   # kW
+    # ------------------------------------------------------------------
+    # Gymnasium API
+    # ------------------------------------------------------------------
+    def reset(self, seed=None, options=None):
+        super().reset(seed=seed)
+        self.current_step    = 0
+        self.current_battery = self.BATTERY_CAPACITY * 0.5
+        self._price_base     = 0.2
+        self._demand_base    = 3.0
+        self._solar_base     = 0.0
+        self._state = self._make_initial_state()
+        return self._state.copy(), {}
+    def step(self, action):
+        assert self.action_space.contains(action), f"Invalid action {action}"
+        battery, solar, demand, price = self._state
+        # ---- 1. Compute net load BEFORE battery action ----
+        # Positive  → house needs more than solar provides (must buy or discharge)
+        # Negative  → solar surplus (can sell back or charge)
+        net_load = demand - solar
+        grid_cost = 0.0   # positive = paying, negative = earning
+        # ---- 2. Execute battery action ----
+        if action == 1:  # CHARGE from grid
+            # How much can we actually charge?
+            headroom = self.BATTERY_CAPACITY - battery
+            charge_requested = min(self.CHARGE_RATE, headroom)
+            # Efficiency: we buy more from grid than actually stored
+            grid_draw = charge_requested / self.CHARGE_EFFICIENCY
+            self.current_battery = np.clip(
+                battery + charge_requested,
+                0.0, self.BATTERY_CAPACITY
+            )
+            # Also cover net_load from grid
+            grid_cost = (max(0.0, net_load) + grid_draw) * price
+            # If solar surplus even after load, get sell-back credit
+            grid_cost -= max(0.0, -net_load) * price * self.FEED_IN_TARIFF
+        elif action == 2:  # DISCHARGE battery to cover load
+            # How much battery can supply?
+            discharge_requested = min(self.CHARGE_RATE, battery)
+            self.current_battery = np.clip(
+                battery - discharge_requested,
+                0.0, self.BATTERY_CAPACITY
+            )
+            # Remaining load after battery contribution
+            residual_load = net_load - discharge_requested
+            if residual_load > 0:
+                grid_cost = residual_load * price          # still need some grid
+            else:
+                grid_cost = residual_load * price * self.FEED_IN_TARIFF  # surplus → sell
+        else:  # HOLD — let solar + grid balance the load
+            if net_load > 0:
+                grid_cost = net_load * price               # buy deficit from grid
+            else:
+                grid_cost = net_load * price * self.FEED_IN_TARIFF  # sell surplus
+        # ---- 3. Battery health penalty (discourages extreme SoC) ----
+        soc_frac = self.current_battery / self.BATTERY_CAPACITY
+        health_penalty = self.SOC_PENALTY_COEF * (
+            max(0.0, soc_frac - 0.9) + max(0.0, 0.1 - soc_frac)
+        )
+        # ---- 4. Reward ----
+        reward = -(grid_cost + health_penalty)
+        # ---- 5. Advance time, generate next state ----
+        self.current_step += 1
+        terminated = self.current_step >= 24
+        truncated  = False
+        if not terminated:
+            self._state = self._generate_next_state()
+        else:
+            self._state = np.zeros(4, dtype=np.float32)
+        info = {
+            "cost":        float(grid_cost),
+            "battery_soc": float(self.current_battery),
+            "solar_kw":    float(solar),
+            "demand_kw":   float(demand),
+            "price":       float(price),
+            "action":      int(action),
+        }
+        if self.render_mode == "human":
+            self.render()
+        return self._state.copy(), float(reward), terminated, truncated, info
+    def render(self):
+        b, s, d, p = self._state
+        print(
+            f"[Hour {self.current_step:02d}] "
+            f"Battery={self.current_battery:.1f}% | "
+            f"Solar={s:.2f}kW | Demand={d:.2f}kW | Price=${p:.3f}/kWh"
+        )
+    def close(self):
+        pass
+    # ------------------------------------------------------------------
+    # Helpers
+    # ------------------------------------------------------------------
+    def _make_initial_state(self) -> np.ndarray:
+        return np.array(
+            [self.current_battery, 0.0, 2.5, 0.15],
+            dtype=np.float32
+        )
+    def _generate_next_state(self) -> np.ndarray:
+        """
+        Correlated time-series generation so successive hours are smooth.
+        Each variable drifts toward a time-of-day mean with Gaussian noise.
+        """
+        hour = self.current_step  # 0–23
+        # --- Solar: bell-curve peaking at noon, zero at night ---
+        if 6 <= hour <= 18:
+            solar_mean = 5.0 * np.exp(-0.5 * ((hour - 12) / 3.5) ** 2)
+        else:
+            solar_mean = 0.0
+        self._solar_base += 0.3 * (solar_mean - self._solar_base)
+        next_solar = float(np.clip(
+            self._solar_base + self.np_random.normal(0, 0.4),
+            0.0, 10.0
+        ))
+        # --- Demand: morning and evening peaks ---
+        demand_mean = (
+            2.5
+            + 2.0 * np.exp(-0.5 * ((hour - 8)  / 1.5) ** 2)   # morning
+            + 3.0 * np.exp(-0.5 * ((hour - 19) / 2.0) ** 2)   # evening
+        )
+        self._demand_base += 0.3 * (demand_mean - self._demand_base)
+        next_demand = float(np.clip(
+            self._demand_base + self.np_random.normal(0, 0.5),
+            0.0, 10.0
+        ))
+        # --- Price: cheap at night, expensive at peak (17–21) ---
+        if 17 <= hour <= 21:
+            price_mean = 0.65
+        elif 6 <= hour <= 9:
+            price_mean = 0.30
+        elif 23 <= hour or hour <= 5:
+            price_mean = 0.12
+        else:
+            price_mean = 0.22
+        self._price_base += 0.4 * (price_mean - self._price_base)
+        next_price = float(np.clip(
+            self._price_base + self.np_random.normal(0, 0.03),
+            0.05, 1.5
+        ))
+        return np.array(
+            [self.current_battery, next_solar, next_demand, next_price],
+            dtype=np.float32
+        )

train.py ADDED Viewed

	@@ -0,0 +1,133 @@

+"""
+train.py — PPO training script for SmartGridEnv
+Fixes vs. original:
+    - check_env() validates the environment before training starts
+    - VecNormalize auto-normalizes observations and rewards for stable gradients
+    - 500,000 timesteps (was 10,000 — far too few for PPO to learn anything)
+    - EvalCallback saves the best model checkpoint automatically
+    - Hyperparameters tuned for this problem (n_steps, batch_size, ent_coef)
+    - vec_normalize stats saved alongside model (required for correct inference)
+    - TensorBoard logging enabled (optional — run: tensorboard --logdir ./tb_logs)
+"""
+import os
+from stable_baselines3 import PPO
+from stable_baselines3.common.env_util import make_vec_env
+from stable_baselines3.common.vec_env import VecNormalize
+from stable_baselines3.common.callbacks import EvalCallback, CheckpointCallback
+from stable_baselines3.common.env_checker import check_env
+from smart_grid_env import SmartGridEnv
+# ── 1. Validate environment ───────────────────────────────────────────────────
+print("Checking environment...")
+check_env(SmartGridEnv(), warn=True)
+print("Environment check passed.\n")
+# ── 2. Vectorised training environment (4 parallel workers) ───────────────────
+N_ENVS = 4
+train_env = make_vec_env(SmartGridEnv, n_envs=N_ENVS)
+train_env = VecNormalize(
+    train_env,
+    norm_obs=True,      # normalizes each obs dimension to ~N(0,1)
+    norm_reward=True,   # normalizes reward scale — critical for PPO stability
+    clip_obs=10.0,
+)
+# ── 3. Separate evaluation environment (no reward normalisation) ───────────────
+eval_env = make_vec_env(SmartGridEnv, n_envs=1)
+eval_env = VecNormalize(
+    eval_env,
+    norm_obs=True,
+    norm_reward=False,  # raw rewards for interpretable eval metrics
+    training=False,     # stats are copied from train_env, not updated
+    clip_obs=10.0,
+)
+# ── 4. Define the PPO model ────────────────────────────────────────────────────
+model = PPO(
+    policy          = "MlpPolicy",
+    env             = train_env,
+    verbose         = 1,
+    tensorboard_log = "./tb_logs",
+    # --- Core PPO hyperparameters ---
+    n_steps         = 1024,     # steps collected per env per rollout
+    batch_size      = 256,      # minibatch size for gradient update
+    n_epochs        = 10,       # number of passes over each rollout buffer
+    gamma           = 0.99,     # discount factor (long-horizon cost matters)
+    gae_lambda      = 0.95,     # GAE smoothing
+    clip_range      = 0.2,      # PPO clip parameter
+    learning_rate   = 3e-4,     # Adam lr
+    ent_coef        = 0.01,     # entropy bonus (encourages exploration early on)
+    vf_coef         = 0.5,
+    max_grad_norm   = 0.5,
+    # --- Policy network architecture ---
+    policy_kwargs   = dict(net_arch=[128, 128]),  # 2-layer MLP, 128 units each
+)
+# ── 5. Callbacks ───────────────────────────────────────────────────────────────
+os.makedirs("./best_model",   exist_ok=True)
+os.makedirs("./checkpoints",  exist_ok=True)
+eval_callback = EvalCallback(
+    eval_env,
+    best_model_save_path = "./best_model",
+    log_path             = "./eval_logs",
+    eval_freq            = max(5_000 // N_ENVS, 1),  # evaluate every ~5k env steps
+    n_eval_episodes      = 20,      # average over 20 full 24-hour episodes
+    deterministic        = True,
+    render               = False,
+)
+checkpoint_callback = CheckpointCallback(
+    save_freq  = max(50_000 // N_ENVS, 1),
+    save_path  = "./checkpoints",
+    name_prefix= "ppo_smart_grid",
+)
+# ── 6. Train ───────────────────────────────────────────────────────────────────
+TOTAL_TIMESTEPS = 500_000
+print(f"Training PPO for {TOTAL_TIMESTEPS:,} timesteps across {N_ENVS} parallel envs...")
+print("Tip: run `tensorboard --logdir ./tb_logs` to monitor training live.\n")
+model.learn(
+    total_timesteps = TOTAL_TIMESTEPS,
+    callback        = [eval_callback, checkpoint_callback],
+    progress_bar    = True,
+)
+# ── 7. Save final model + normalisation statistics ────────────────────────────
+model.save("ppo_smart_grid")
+train_env.save("vec_normalize.pkl")   # MUST be saved — needed for inference
+print("\nTraining complete!")
+print("  Saved: ppo_smart_grid.zip")
+print("  Saved: vec_normalize.pkl  (required alongside the model for inference)")
+print("  Best checkpoint: ./best_model/best_model.zip")
+# ── 8. Quick sanity-check: run one episode with the trained agent ──────────────
+print("\n--- Sanity check: one 24-hour episode ---")
+from stable_baselines3.common.vec_env import DummyVecEnv
+test_env = DummyVecEnv([SmartGridEnv])
+test_env = VecNormalize.load("vec_normalize.pkl", test_env)
+test_env.training = False
+test_env.norm_reward = False
+obs = test_env.reset()
+total_cost = 0.0
+for hour in range(24):
+    action, _ = model.predict(obs, deterministic=True)
+    obs, reward, done, info = test_env.step(action)
+    total_cost += info[0]["cost"]
+    action_label = ["Hold", "Charge", "Discharge"][int(action[0])]
+    print(
+        f"  Hour {hour+1:02d} | Action: {action_label:<10} | "
+        f"Battery: {info[0]['battery_soc']:5.1f}% | "
+        f"Price: ${info[0]['price']:.3f} | "
+        f"Step cost: ${info[0]['cost']:.3f}"
+    )
+print(f"\nTotal 24-hour cost: ${total_cost:.2f}")
+test_env.close()