Upload 30 files
Browse files- Other_algorithms/.DS_Store +0 -0
- Other_algorithms/Flat_System/PG/_init_.py +0 -0
- Other_algorithms/Flat_System/PG/pg_evaluation.py +520 -0
- Other_algorithms/Flat_System/PG/pg_train.py +373 -0
- Other_algorithms/Flat_System/PG/trainer/__init__.py +0 -0
- Other_algorithms/Flat_System/PG/trainer/pg.py +96 -0
- Other_algorithms/Flat_System/maddpg/__init__.py +0 -0
- Other_algorithms/Flat_System/maddpg/maddpg_evaluation.py +495 -0
- Other_algorithms/Flat_System/maddpg/maddpg_train.py +382 -0
- Other_algorithms/Flat_System/maddpg/trainer/__init__.py +0 -0
- Other_algorithms/Flat_System/maddpg/trainer/maddpg.py +196 -0
- Other_algorithms/Flat_System/mappo/_init_.py +0 -0
- Other_algorithms/Flat_System/mappo/mappo_evaluation.py +500 -0
- Other_algorithms/Flat_System/mappo/mappo_train.py +439 -0
- Other_algorithms/Flat_System/mappo/trainer/__init__.py +0 -0
- Other_algorithms/Flat_System/mappo/trainer/mappo.py +243 -0
- Other_algorithms/Flat_System/meanfield/_init_.py +0 -0
- Other_algorithms/Flat_System/meanfield/meanfield_evaluation.py +492 -0
- Other_algorithms/Flat_System/meanfield/meanfield_train.py +386 -0
- Other_algorithms/Flat_System/meanfield/trainer/__init__.py +0 -0
- Other_algorithms/Flat_System/meanfield/trainer/mfac.py +219 -0
- Other_algorithms/Flat_System/solar_sys_environment.py +635 -0
- Other_algorithms/HC_MAPPO/Environment/cluster_env_wrapper.py +164 -0
- Other_algorithms/HC_MAPPO/Environment/solar_sys_environment.py +673 -0
- Other_algorithms/HC_MAPPO/HC_MAPPO_evaluation.py +618 -0
- Other_algorithms/HC_MAPPO/HC_MAPPO_train.py +579 -0
- Other_algorithms/HC_MAPPO/cluster.py +140 -0
- Other_algorithms/HC_MAPPO/mappo/_init_.py +0 -0
- Other_algorithms/HC_MAPPO/mappo/trainer/__init__.py +0 -0
- Other_algorithms/HC_MAPPO/mappo/trainer/mappo.py +199 -0
Other_algorithms/.DS_Store
ADDED
|
Binary file (6.15 kB). View file
|
|
|
Other_algorithms/Flat_System/PG/_init_.py
ADDED
|
File without changes
|
Other_algorithms/Flat_System/PG/pg_evaluation.py
ADDED
|
@@ -0,0 +1,520 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# pg_evaluate.py
|
| 2 |
+
import os
|
| 3 |
+
import sys
|
| 4 |
+
import time
|
| 5 |
+
import re
|
| 6 |
+
import numpy as np
|
| 7 |
+
import pandas as pd
|
| 8 |
+
import matplotlib.pyplot as plt
|
| 9 |
+
import torch
|
| 10 |
+
from datetime import datetime
|
| 11 |
+
|
| 12 |
+
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
|
| 13 |
+
|
| 14 |
+
from solar_sys_environment import SolarSys
|
| 15 |
+
from PG.trainer.pg import PGAgent
|
| 16 |
+
|
| 17 |
+
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
| 18 |
+
|
| 19 |
+
def compute_jains_fairness(values: np.ndarray) -> float:
|
| 20 |
+
if len(values) == 0:
|
| 21 |
+
return 0.0
|
| 22 |
+
if np.all(values == 0):
|
| 23 |
+
return 1.0
|
| 24 |
+
num = (values.sum())**2
|
| 25 |
+
den = len(values) * (values**2).sum()
|
| 26 |
+
return num / den
|
| 27 |
+
|
| 28 |
+
def main():
|
| 29 |
+
# User parameters
|
| 30 |
+
MODEL_PATH = "/path/to/project/pg_pennsylvania_10agents_10000eps/logs"
|
| 31 |
+
DATA_PATH = "/path/to/project/testing/10houses_30days_TEST.csv"
|
| 32 |
+
DAYS_TO_EVALUATE = 30
|
| 33 |
+
|
| 34 |
+
model_path = MODEL_PATH
|
| 35 |
+
data_path = DATA_PATH
|
| 36 |
+
days_to_evaluate = DAYS_TO_EVALUATE
|
| 37 |
+
SOLAR_THRESHOLD = 0.5
|
| 38 |
+
|
| 39 |
+
state_match = re.search(r"pg_(oklahoma|colorado|pennsylvania)_", model_path)
|
| 40 |
+
if not state_match:
|
| 41 |
+
raise ValueError(
|
| 42 |
+
"Could not automatically detect the state (oklahoma, colorado, or pennsylvania) "
|
| 43 |
+
"from the model path. Please ensure your model's parent folder is named correctly, "
|
| 44 |
+
"e.g., 'pg_oklahoma_...'"
|
| 45 |
+
)
|
| 46 |
+
detected_state = state_match.group(1)
|
| 47 |
+
print(f"--- Detected state: {detected_state.upper()} ---")
|
| 48 |
+
|
| 49 |
+
# Env setup
|
| 50 |
+
env = SolarSys(
|
| 51 |
+
data_path=data_path,
|
| 52 |
+
state=detected_state,
|
| 53 |
+
time_freq="15T"
|
| 54 |
+
)
|
| 55 |
+
eval_steps = env.num_steps
|
| 56 |
+
house_ids = env.house_ids
|
| 57 |
+
num_agents = env.num_agents
|
| 58 |
+
|
| 59 |
+
# Generate a unique eval run folder
|
| 60 |
+
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
| 61 |
+
run_name = f"eval_pg_{num_agents}agents_{days_to_evaluate}days_{timestamp}"
|
| 62 |
+
output_folder = os.path.join("runs_with_battery", run_name)
|
| 63 |
+
logs_dir = os.path.join(output_folder, "logs")
|
| 64 |
+
plots_dir = os.path.join(output_folder, "plots")
|
| 65 |
+
for d in (logs_dir, plots_dir):
|
| 66 |
+
os.makedirs(d, exist_ok=True)
|
| 67 |
+
print(f"Saving evaluation outputs to: {output_folder}")
|
| 68 |
+
|
| 69 |
+
local_dim = env.observation_space.shape[1]
|
| 70 |
+
act_dim = env.action_space.shape[1]
|
| 71 |
+
|
| 72 |
+
# Initialize PG agents
|
| 73 |
+
pg_agents = []
|
| 74 |
+
for i in range(num_agents):
|
| 75 |
+
agent = PGAgent(
|
| 76 |
+
state_dim=local_dim,
|
| 77 |
+
action_dim=act_dim,
|
| 78 |
+
lr=2e-4,
|
| 79 |
+
gamma=0.95,
|
| 80 |
+
)
|
| 81 |
+
|
| 82 |
+
# Load individual agent model
|
| 83 |
+
agent_model_path = os.path.join(model_path, f"best_model_agent_{i}.pth")
|
| 84 |
+
if os.path.exists(agent_model_path):
|
| 85 |
+
agent.load(agent_model_path)
|
| 86 |
+
print(f"Loaded model for agent {i}")
|
| 87 |
+
else:
|
| 88 |
+
print(f"WARNING: Model file not found for agent {i}: {agent_model_path}")
|
| 89 |
+
# Alternative: try loading a single model for all agents
|
| 90 |
+
single_model_path = os.path.join(model_path, "best_model.pth")
|
| 91 |
+
if os.path.exists(single_model_path):
|
| 92 |
+
agent.load(single_model_path)
|
| 93 |
+
print(f"Loaded single model for agent {i}")
|
| 94 |
+
|
| 95 |
+
agent.model.to(device).eval()
|
| 96 |
+
pg_agents.append(agent)
|
| 97 |
+
|
| 98 |
+
# Prepare logs
|
| 99 |
+
all_logs = []
|
| 100 |
+
daily_summaries = []
|
| 101 |
+
step_timing_list = []
|
| 102 |
+
|
| 103 |
+
evaluation_start = time.time()
|
| 104 |
+
|
| 105 |
+
for day_idx in range(days_to_evaluate):
|
| 106 |
+
obs = env.reset()
|
| 107 |
+
done = False
|
| 108 |
+
step_count = 0
|
| 109 |
+
day_logs = []
|
| 110 |
+
|
| 111 |
+
while not done:
|
| 112 |
+
step_start_time = time.time()
|
| 113 |
+
|
| 114 |
+
# Select actions with PG
|
| 115 |
+
actions = []
|
| 116 |
+
with torch.no_grad():
|
| 117 |
+
for i in range(num_agents):
|
| 118 |
+
# Convert observation to tensor and move to device
|
| 119 |
+
state = torch.FloatTensor(obs[i]).unsqueeze(0).to(device)
|
| 120 |
+
|
| 121 |
+
# Get action from actor network
|
| 122 |
+
mean, log_std, _ = pg_agents[i].model(state)
|
| 123 |
+
|
| 124 |
+
# For evaluation, use mean action (deterministic)
|
| 125 |
+
action = mean.squeeze(0).cpu().numpy()
|
| 126 |
+
|
| 127 |
+
# Clip to [0, 1] range
|
| 128 |
+
action = np.clip(action, 0.0, 1.0)
|
| 129 |
+
actions.append(action)
|
| 130 |
+
|
| 131 |
+
actions = np.array(actions, dtype=np.float32)
|
| 132 |
+
|
| 133 |
+
next_obs, rewards, done, info = env.step(actions)
|
| 134 |
+
|
| 135 |
+
# Consolidated Logging
|
| 136 |
+
step_end_time = time.time()
|
| 137 |
+
step_duration = step_end_time - step_start_time
|
| 138 |
+
|
| 139 |
+
print(f"[Day {day_idx+1}, Step {step_count}] Step time: {step_duration:.6f} seconds")
|
| 140 |
+
|
| 141 |
+
step_timing_list.append({
|
| 142 |
+
"day": day_idx + 1,
|
| 143 |
+
"step": step_count,
|
| 144 |
+
"step_time_s": step_duration
|
| 145 |
+
})
|
| 146 |
+
|
| 147 |
+
grid_price_now = env.get_grid_price(step_count)
|
| 148 |
+
peer_price_now = info.get("peer_price", env.get_peer_price(step_count,
|
| 149 |
+
float(info["p2p_sell"].sum()),
|
| 150 |
+
float(info["p2p_buy"].sum())))
|
| 151 |
+
|
| 152 |
+
for i, hid in enumerate(house_ids):
|
| 153 |
+
is_battery_house = hid in env.batteries
|
| 154 |
+
p2p_buy = float(info["p2p_buy"][i])
|
| 155 |
+
p2p_sell = float(info["p2p_sell"][i])
|
| 156 |
+
charge_amount = float(info.get("charge_amount", [0]*num_agents)[i])
|
| 157 |
+
discharge_amount = float(info.get("discharge_amount", [0]*num_agents)[i])
|
| 158 |
+
|
| 159 |
+
day_logs.append({
|
| 160 |
+
"day": day_idx + 1,
|
| 161 |
+
"step": step_count,
|
| 162 |
+
"house": hid,
|
| 163 |
+
"grid_import_no_p2p": float(info["grid_import_no_p2p"][i]),
|
| 164 |
+
"grid_import_with_p2p": float(info["grid_import_with_p2p"][i]),
|
| 165 |
+
"grid_export": float(info.get("grid_export", [0]*num_agents)[i]),
|
| 166 |
+
"p2p_buy": p2p_buy,
|
| 167 |
+
"p2p_sell": p2p_sell,
|
| 168 |
+
"actual_cost": float(info["costs"][i]),
|
| 169 |
+
"baseline_cost": float(info["grid_import_no_p2p"][i]) * grid_price_now,
|
| 170 |
+
"total_demand": float(env.demands[hid][step_count]),
|
| 171 |
+
"total_solar": float(env.solars[hid][step_count]),
|
| 172 |
+
"grid_price": grid_price_now,
|
| 173 |
+
"peer_price": peer_price_now,
|
| 174 |
+
"soc": (env.batteries[hid]["soc"] / env.batteries[hid]["max_capacity"]) if is_battery_house else np.nan,
|
| 175 |
+
"degradation_cost": ((charge_amount + discharge_amount) * env.batteries[hid]["degradation_cost_per_kwh"]) if is_battery_house else 0.0,
|
| 176 |
+
"reward": float(rewards[i]),
|
| 177 |
+
})
|
| 178 |
+
|
| 179 |
+
obs = next_obs
|
| 180 |
+
step_count += 1
|
| 181 |
+
if step_count >= eval_steps:
|
| 182 |
+
break
|
| 183 |
+
|
| 184 |
+
day_df = pd.DataFrame(day_logs)
|
| 185 |
+
all_logs.extend(day_logs)
|
| 186 |
+
|
| 187 |
+
# Consolidated daily summary calculation
|
| 188 |
+
grouped_house = day_df.groupby("house").sum(numeric_only=True)
|
| 189 |
+
grouped_step = day_df.groupby("step").sum(numeric_only=True)
|
| 190 |
+
|
| 191 |
+
total_demand = grouped_step["total_demand"].sum()
|
| 192 |
+
total_solar = grouped_step["total_solar"].sum()
|
| 193 |
+
total_p2p_buy = grouped_house["p2p_buy"].sum()
|
| 194 |
+
total_p2p_sell = grouped_house["p2p_sell"].sum()
|
| 195 |
+
|
| 196 |
+
baseline_cost_per_house = grouped_house["baseline_cost"]
|
| 197 |
+
actual_cost_per_house = grouped_house["actual_cost"]
|
| 198 |
+
cost_savings_per_house = baseline_cost_per_house - actual_cost_per_house
|
| 199 |
+
day_total_cost_savings = cost_savings_per_house.sum()
|
| 200 |
+
|
| 201 |
+
if baseline_cost_per_house.sum() > 0:
|
| 202 |
+
overall_cost_savings_pct = day_total_cost_savings / baseline_cost_per_house.sum()
|
| 203 |
+
else:
|
| 204 |
+
overall_cost_savings_pct = 0.0
|
| 205 |
+
|
| 206 |
+
baseline_import_per_house = grouped_house["grid_import_no_p2p"]
|
| 207 |
+
actual_import_per_house = grouped_house["grid_import_with_p2p"]
|
| 208 |
+
import_reduction_per_house = baseline_import_per_house - actual_import_per_house
|
| 209 |
+
day_total_import_reduction = import_reduction_per_house.sum()
|
| 210 |
+
|
| 211 |
+
if baseline_import_per_house.sum() > 0:
|
| 212 |
+
overall_import_reduction_pct = day_total_import_reduction / baseline_import_per_house.sum()
|
| 213 |
+
else:
|
| 214 |
+
overall_import_reduction_pct = 0.0
|
| 215 |
+
|
| 216 |
+
fairness_cost_savings = compute_jains_fairness(cost_savings_per_house.values)
|
| 217 |
+
fairness_import_reduction = compute_jains_fairness(import_reduction_per_house.values)
|
| 218 |
+
fairness_rewards = compute_jains_fairness(grouped_house["reward"].values)
|
| 219 |
+
fairness_p2p_buy = compute_jains_fairness(grouped_house["p2p_buy"].values)
|
| 220 |
+
fairness_p2p_sell = compute_jains_fairness(grouped_house["p2p_sell"].values)
|
| 221 |
+
fairness_p2p_total = compute_jains_fairness((grouped_house["p2p_buy"] + grouped_house["p2p_sell"]).values)
|
| 222 |
+
day_total_degradation_cost = grouped_house["degradation_cost"].sum()
|
| 223 |
+
|
| 224 |
+
daily_summaries.append({
|
| 225 |
+
"day": day_idx + 1,
|
| 226 |
+
"day_total_demand": total_demand,
|
| 227 |
+
"day_total_solar": total_solar,
|
| 228 |
+
"day_p2p_buy": total_p2p_buy,
|
| 229 |
+
"day_p2p_sell": total_p2p_sell,
|
| 230 |
+
"cost_savings_abs": day_total_cost_savings,
|
| 231 |
+
"cost_savings_pct": overall_cost_savings_pct,
|
| 232 |
+
"fairness_cost_savings": fairness_cost_savings,
|
| 233 |
+
"grid_reduction_abs": day_total_import_reduction,
|
| 234 |
+
"grid_reduction_pct": overall_import_reduction_pct,
|
| 235 |
+
"fairness_grid_reduction": fairness_import_reduction,
|
| 236 |
+
"fairness_reward": fairness_rewards,
|
| 237 |
+
"fairness_p2p_buy": fairness_p2p_buy,
|
| 238 |
+
"fairness_p2p_sell": fairness_p2p_sell,
|
| 239 |
+
"fairness_p2p_total": fairness_p2p_total,
|
| 240 |
+
"total_degradation_cost": day_total_degradation_cost
|
| 241 |
+
})
|
| 242 |
+
|
| 243 |
+
# Final processing and saving
|
| 244 |
+
evaluation_end = time.time()
|
| 245 |
+
total_eval_time = evaluation_end - evaluation_start
|
| 246 |
+
print(f"\nEvaluation loop finished. Total time: {total_eval_time:.2f} seconds.")
|
| 247 |
+
print(f"Device used: {device}")
|
| 248 |
+
|
| 249 |
+
all_days_df = pd.DataFrame(all_logs)
|
| 250 |
+
combined_csv_path = os.path.join(logs_dir, "step_logs_all_days.csv")
|
| 251 |
+
all_days_df.to_csv(combined_csv_path, index=False)
|
| 252 |
+
print(f"Saved combined step-level logs to: {combined_csv_path}")
|
| 253 |
+
|
| 254 |
+
step_timing_df = pd.DataFrame(step_timing_list)
|
| 255 |
+
timing_csv_path = os.path.join(logs_dir, "step_timing_log.csv")
|
| 256 |
+
step_timing_df.to_csv(timing_csv_path, index=False)
|
| 257 |
+
print(f"Saved step timing logs to: {timing_csv_path}")
|
| 258 |
+
|
| 259 |
+
house_level_df = all_days_df.groupby("house").agg({
|
| 260 |
+
"baseline_cost": "sum",
|
| 261 |
+
"actual_cost": "sum",
|
| 262 |
+
"grid_import_no_p2p": "sum",
|
| 263 |
+
"grid_import_with_p2p": "sum",
|
| 264 |
+
"degradation_cost": "sum"
|
| 265 |
+
})
|
| 266 |
+
house_level_df["cost_savings"] = house_level_df["baseline_cost"] - house_level_df["actual_cost"]
|
| 267 |
+
house_level_df["import_reduction"] = house_level_df["grid_import_no_p2p"] - house_level_df["grid_import_with_p2p"]
|
| 268 |
+
|
| 269 |
+
house_summary_csv = os.path.join(logs_dir, "summary_per_house.csv")
|
| 270 |
+
house_level_df.to_csv(house_summary_csv)
|
| 271 |
+
print(f"Saved final summary per house to: {house_summary_csv}")
|
| 272 |
+
|
| 273 |
+
fairness_grid_all = compute_jains_fairness(house_level_df["import_reduction"].values)
|
| 274 |
+
fairness_cost_all = compute_jains_fairness(house_level_df["cost_savings"].values)
|
| 275 |
+
|
| 276 |
+
daily_summary_df = pd.DataFrame(daily_summaries)
|
| 277 |
+
|
| 278 |
+
total_cost_savings_all = daily_summary_df["cost_savings_abs"].sum()
|
| 279 |
+
total_baseline_cost_all = all_days_df.groupby('day')['baseline_cost'].sum().sum()
|
| 280 |
+
pct_cost_savings_all = total_cost_savings_all / total_baseline_cost_all if total_baseline_cost_all > 0 else 0.0
|
| 281 |
+
|
| 282 |
+
total_grid_reduction_all = daily_summary_df["grid_reduction_abs"].sum()
|
| 283 |
+
total_baseline_import_all = all_days_df.groupby('day')['grid_import_no_p2p'].sum().sum()
|
| 284 |
+
pct_grid_reduction_all = total_grid_reduction_all / total_baseline_import_all if total_baseline_import_all > 0 else 0.0
|
| 285 |
+
|
| 286 |
+
total_degradation_cost_all = daily_summary_df["total_degradation_cost"].sum()
|
| 287 |
+
|
| 288 |
+
# Calculate alternative performance metrics
|
| 289 |
+
|
| 290 |
+
# Grid Reduction During Solar Hours
|
| 291 |
+
agg_solar_per_step = all_days_df.groupby(['day', 'step'])['total_solar'].sum()
|
| 292 |
+
sunny_steps_mask = agg_solar_per_step > (SOLAR_THRESHOLD * num_agents)
|
| 293 |
+
sunny_df = all_days_df.set_index(['day', 'step'])[sunny_steps_mask].reset_index()
|
| 294 |
+
baseline_import_sunny = sunny_df['grid_import_no_p2p'].sum()
|
| 295 |
+
actual_import_sunny = sunny_df['grid_import_with_p2p'].sum()
|
| 296 |
+
grid_reduction_sunny_pct = 0.0
|
| 297 |
+
if baseline_import_sunny > 0:
|
| 298 |
+
grid_reduction_sunny_pct = (baseline_import_sunny - actual_import_sunny) / baseline_import_sunny
|
| 299 |
+
|
| 300 |
+
# Community Sourcing Rate
|
| 301 |
+
total_p2p_buy = all_days_df['p2p_buy'].sum()
|
| 302 |
+
total_actual_grid_import = all_days_df['grid_import_with_p2p'].sum()
|
| 303 |
+
total_procured_energy = total_p2p_buy + total_actual_grid_import
|
| 304 |
+
community_sourcing_rate_pct = 0.0
|
| 305 |
+
if total_procured_energy > 0:
|
| 306 |
+
community_sourcing_rate_pct = total_p2p_buy / total_procured_energy
|
| 307 |
+
|
| 308 |
+
# Solar Sharing Efficiency
|
| 309 |
+
total_p2p_sell = all_days_df['p2p_sell'].sum()
|
| 310 |
+
total_grid_export = all_days_df['grid_export'].sum()
|
| 311 |
+
total_excess_solar = total_p2p_sell + total_grid_export
|
| 312 |
+
solar_sharing_efficiency_pct = 0.0
|
| 313 |
+
if total_excess_solar > 0:
|
| 314 |
+
solar_sharing_efficiency_pct = total_p2p_sell / total_excess_solar
|
| 315 |
+
|
| 316 |
+
baseline_cost_sunny = sunny_df['baseline_cost'].sum()
|
| 317 |
+
actual_cost_sunny = sunny_df['actual_cost'].sum()
|
| 318 |
+
cost_savings_sunny_pct = (baseline_cost_sunny - actual_cost_sunny) / baseline_cost_sunny if baseline_cost_sunny > 0 else 0.0
|
| 319 |
+
|
| 320 |
+
total_p2p_buy = all_days_df['p2p_buy'].sum()
|
| 321 |
+
total_actual_grid_import = all_days_df['grid_import_with_p2p'].sum()
|
| 322 |
+
community_sourcing_rate_pct = total_p2p_buy / (total_p2p_buy + total_actual_grid_import) if (total_p2p_buy + total_actual_grid_import) > 0 else 0.0
|
| 323 |
+
|
| 324 |
+
total_p2p_sell = all_days_df['p2p_sell'].sum()
|
| 325 |
+
total_grid_export = all_days_df['grid_export'].sum()
|
| 326 |
+
solar_sharing_efficiency_pct = total_p2p_sell / (total_p2p_sell + total_grid_export) if (total_p2p_sell + total_grid_export) > 0 else 0.0
|
| 327 |
+
|
| 328 |
+
final_row = {
|
| 329 |
+
"day": "ALL_DAYS_SUMMARY",
|
| 330 |
+
"cost_savings_abs": total_cost_savings_all,
|
| 331 |
+
"cost_savings_pct": pct_cost_savings_all,
|
| 332 |
+
"grid_reduction_abs": total_grid_reduction_all,
|
| 333 |
+
"grid_reduction_pct": pct_grid_reduction_all,
|
| 334 |
+
"fairness_cost_savings": fairness_cost_all,
|
| 335 |
+
"fairness_grid_reduction": fairness_grid_all,
|
| 336 |
+
"total_degradation_cost": total_degradation_cost_all,
|
| 337 |
+
"grid_reduction_sunny_hours_pct": grid_reduction_sunny_pct,
|
| 338 |
+
"community_sourcing_rate_pct": community_sourcing_rate_pct,
|
| 339 |
+
"solar_sharing_efficiency_pct": solar_sharing_efficiency_pct,
|
| 340 |
+
}
|
| 341 |
+
|
| 342 |
+
for col in daily_summary_df.columns:
|
| 343 |
+
if col not in final_row:
|
| 344 |
+
final_row[col] = np.nan
|
| 345 |
+
final_row_df = pd.DataFrame([final_row])
|
| 346 |
+
|
| 347 |
+
daily_summary_df = pd.concat([daily_summary_df, final_row_df], ignore_index=True)
|
| 348 |
+
summary_csv = os.path.join(logs_dir, "summary_per_day.csv")
|
| 349 |
+
daily_summary_df.to_csv(summary_csv, index=False)
|
| 350 |
+
print(f"Saved day-level summary with final multi-day row to: {summary_csv}")
|
| 351 |
+
|
| 352 |
+
# Final summary printout
|
| 353 |
+
print("\n================== EVALUATION SUMMARY ==================")
|
| 354 |
+
print(f"Evaluation finished for {days_to_evaluate} days.\n")
|
| 355 |
+
|
| 356 |
+
print("--- Standard Metrics (24-Hour Average) ---")
|
| 357 |
+
print(f"Total grid reduction: {total_grid_reduction_all:.2f} kWh ({pct_grid_reduction_all:.2%})")
|
| 358 |
+
print(f"Total cost savings: ${total_cost_savings_all:.2f} ({pct_cost_savings_all:.2%})")
|
| 359 |
+
print(f"Jain's fairness on grid reduction: {fairness_grid_all:.3f}")
|
| 360 |
+
print(f"Jain's fairness on cost savings: {fairness_cost_all:.3f}\n")
|
| 361 |
+
|
| 362 |
+
print("--- Alternative Metrics (Highlighting Peak Performance) ---")
|
| 363 |
+
print(f"Grid reduction during solar hours: {grid_reduction_sunny_pct:.2%}")
|
| 364 |
+
print(f"Cost savings during solar hours: {cost_savings_sunny_pct:.2%}")
|
| 365 |
+
print(f"Community sourcing rate: {community_sourcing_rate_pct:.2%}")
|
| 366 |
+
print(f"Solar sharing efficiency: {solar_sharing_efficiency_pct:.2%}")
|
| 367 |
+
|
| 368 |
+
print("=========================================================")
|
| 369 |
+
|
| 370 |
+
# Plots
|
| 371 |
+
plot_daily_df = daily_summary_df[daily_summary_df["day"] != "ALL_DAYS_SUMMARY"].copy()
|
| 372 |
+
plot_daily_df["day"] = plot_daily_df["day"].astype(int)
|
| 373 |
+
|
| 374 |
+
# Daily Cost Savings Percentage
|
| 375 |
+
plt.figure(figsize=(12, 6))
|
| 376 |
+
plt.bar(plot_daily_df["day"], plot_daily_df["cost_savings_pct"] * 100, color='skyblue')
|
| 377 |
+
plt.xlabel("Day")
|
| 378 |
+
plt.ylabel("Cost Savings (%)")
|
| 379 |
+
plt.title("Daily Community Cost Savings Percentage")
|
| 380 |
+
plt.xticks(plot_daily_df["day"])
|
| 381 |
+
plt.grid(axis='y', linestyle='--', alpha=0.7)
|
| 382 |
+
plt.savefig(os.path.join(plots_dir, "daily_cost_savings_percentage.png"))
|
| 383 |
+
plt.close()
|
| 384 |
+
|
| 385 |
+
# Daily Total Demand vs. Solar
|
| 386 |
+
plt.figure(figsize=(12, 6))
|
| 387 |
+
bar_width = 0.4
|
| 388 |
+
days = plot_daily_df["day"]
|
| 389 |
+
plt.bar(days - bar_width/2, plot_daily_df["day_total_demand"], width=bar_width, label="Total Demand", color='coral')
|
| 390 |
+
plt.bar(days + bar_width/2, plot_daily_df["day_total_solar"], width=bar_width, label="Total Solar Generation", color='gold')
|
| 391 |
+
plt.xlabel("Day")
|
| 392 |
+
plt.ylabel("Energy (kWh)")
|
| 393 |
+
plt.title("Total Community Demand vs. Solar Generation Per Day")
|
| 394 |
+
plt.xticks(days)
|
| 395 |
+
plt.legend()
|
| 396 |
+
plt.grid(axis='y', linestyle='--', alpha=0.7)
|
| 397 |
+
plt.savefig(os.path.join(plots_dir, "daily_demand_vs_solar.png"))
|
| 398 |
+
plt.close()
|
| 399 |
+
|
| 400 |
+
# Combined Time Series of Energy Flows
|
| 401 |
+
step_group = all_days_df.groupby(["day", "step"]).sum(numeric_only=True).reset_index()
|
| 402 |
+
step_group["global_step"] = (step_group["day"] - 1) * env.num_steps + step_group["step"]
|
| 403 |
+
|
| 404 |
+
fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(15, 12), sharex=True)
|
| 405 |
+
|
| 406 |
+
# Subplot 1: Grid Import vs P2P Buy
|
| 407 |
+
ax1.plot(step_group["global_step"], step_group["grid_import_with_p2p"], label="Grid Import (with P2P)", color='r')
|
| 408 |
+
ax1.plot(step_group["global_step"], step_group["p2p_buy"], label="P2P Buy", color='g')
|
| 409 |
+
ax1.set_ylabel("Energy (kWh)")
|
| 410 |
+
ax1.set_title("Community Energy Consumption: Grid Import vs. P2P Buy")
|
| 411 |
+
ax1.legend()
|
| 412 |
+
ax1.grid(True, linestyle='--', alpha=0.6)
|
| 413 |
+
|
| 414 |
+
# Subplot 2: Grid Export vs P2P Sell
|
| 415 |
+
ax2.plot(step_group["global_step"], step_group["grid_export"], label="Grid Export", color='orange')
|
| 416 |
+
ax2.plot(step_group["global_step"], step_group["p2p_sell"], label="P2P Sell", color='b')
|
| 417 |
+
ax2.set_xlabel("Global Timestep")
|
| 418 |
+
ax2.set_ylabel("Energy (kWh)")
|
| 419 |
+
ax2.set_title("Community Energy Generation: Grid Export vs. P2P Sell")
|
| 420 |
+
ax2.legend()
|
| 421 |
+
ax2.grid(True, linestyle='--', alpha=0.6)
|
| 422 |
+
|
| 423 |
+
plt.tight_layout()
|
| 424 |
+
plt.savefig(os.path.join(plots_dir, "combined_energy_flows_timeseries.png"))
|
| 425 |
+
plt.close()
|
| 426 |
+
|
| 427 |
+
# Stacked Bar of Daily Energy Sources
|
| 428 |
+
daily_agg = all_days_df.groupby("day").sum(numeric_only=True)
|
| 429 |
+
|
| 430 |
+
plt.figure(figsize=(12, 7))
|
| 431 |
+
plt.bar(daily_agg.index, daily_agg["grid_import_with_p2p"], label="Grid Import (with P2P)", color='crimson')
|
| 432 |
+
plt.bar(daily_agg.index, daily_agg["p2p_buy"], bottom=daily_agg["grid_import_with_p2p"], label="P2P Buy", color='limegreen')
|
| 433 |
+
plt.plot(daily_agg.index, daily_agg["grid_import_no_p2p"], label="Baseline Grid Import (No P2P)", color='blue', linestyle='--', marker='o')
|
| 434 |
+
|
| 435 |
+
plt.xlabel("Day")
|
| 436 |
+
plt.ylabel("Energy (kWh)")
|
| 437 |
+
plt.title("Daily Energy Procurement: Baseline vs. P2P+Grid")
|
| 438 |
+
plt.xticks(daily_agg.index)
|
| 439 |
+
plt.legend()
|
| 440 |
+
plt.grid(axis='y', linestyle='--', alpha=0.7)
|
| 441 |
+
plt.savefig(os.path.join(plots_dir, "daily_energy_procurement_stacked.png"))
|
| 442 |
+
plt.close()
|
| 443 |
+
|
| 444 |
+
# Fairness Metrics Over Time
|
| 445 |
+
plt.figure(figsize=(12, 6))
|
| 446 |
+
plt.plot(plot_daily_df["day"], plot_daily_df["fairness_cost_savings"], label="Cost Savings Fairness", marker='o')
|
| 447 |
+
plt.plot(plot_daily_df["day"], plot_daily_df["fairness_grid_reduction"], label="Grid Reduction Fairness", marker='s')
|
| 448 |
+
plt.plot(plot_daily_df["day"], plot_daily_df["fairness_reward"], label="Reward Fairness", marker='^')
|
| 449 |
+
plt.xlabel("Day")
|
| 450 |
+
plt.ylabel("Jain's Fairness Index")
|
| 451 |
+
plt.title("Daily Fairness Metrics")
|
| 452 |
+
plt.xticks(plot_daily_df["day"])
|
| 453 |
+
plt.ylim(0, 1.05)
|
| 454 |
+
plt.legend()
|
| 455 |
+
plt.grid(True, linestyle='--', alpha=0.7)
|
| 456 |
+
plt.savefig(os.path.join(plots_dir, "daily_fairness_metrics.png"))
|
| 457 |
+
plt.close()
|
| 458 |
+
|
| 459 |
+
# Per-House Savings and Reductions
|
| 460 |
+
fig, ax1 = plt.subplots(figsize=(15, 7))
|
| 461 |
+
|
| 462 |
+
house_ids_str = house_level_df.index.astype(str)
|
| 463 |
+
bar_width = 0.4
|
| 464 |
+
index = np.arange(len(house_ids_str))
|
| 465 |
+
|
| 466 |
+
color1 = 'tab:green'
|
| 467 |
+
ax1.set_xlabel('House ID')
|
| 468 |
+
ax1.set_ylabel('Total Cost Savings ($)', color=color1)
|
| 469 |
+
ax1.bar(index - bar_width/2, house_level_df["cost_savings"], bar_width, label='Cost Savings', color=color1)
|
| 470 |
+
ax1.tick_params(axis='y', labelcolor=color1)
|
| 471 |
+
ax1.set_xticks(index)
|
| 472 |
+
ax1.set_xticklabels(house_ids_str, rotation=45, ha="right")
|
| 473 |
+
|
| 474 |
+
ax2 = ax1.twinx()
|
| 475 |
+
color2 = 'tab:blue'
|
| 476 |
+
ax2.set_ylabel('Total Grid Import Reduction (kWh)', color=color2)
|
| 477 |
+
ax2.bar(index + bar_width/2, house_level_df["import_reduction"], bar_width, label='Import Reduction', color=color2)
|
| 478 |
+
ax2.tick_params(axis='y', labelcolor=color2)
|
| 479 |
+
|
| 480 |
+
plt.title(f'Total Cost Savings & Grid Import Reduction Per House (over {days_to_evaluate} days)')
|
| 481 |
+
fig.tight_layout()
|
| 482 |
+
plt.savefig(os.path.join(plots_dir, "per_house_summary.png"))
|
| 483 |
+
plt.close()
|
| 484 |
+
|
| 485 |
+
# Price Dynamics for a Single Day
|
| 486 |
+
day1_prices = all_days_df[all_days_df['day'] == 1][['step', 'grid_price', 'peer_price']].drop_duplicates()
|
| 487 |
+
plt.figure(figsize=(12, 6))
|
| 488 |
+
plt.plot(day1_prices['step'], day1_prices['grid_price'], label='Grid Price', color='darkorange')
|
| 489 |
+
plt.plot(day1_prices['step'], day1_prices['peer_price'], label='P2P Price', color='teal')
|
| 490 |
+
plt.xlabel("Timestep of Day")
|
| 491 |
+
plt.ylabel("Price ($/kWh)")
|
| 492 |
+
plt.title("Price Dynamics on Day 1")
|
| 493 |
+
plt.legend()
|
| 494 |
+
plt.grid(True, linestyle='--', alpha=0.6)
|
| 495 |
+
plt.savefig(os.path.join(plots_dir, "price_dynamics_day1.png"))
|
| 496 |
+
plt.close()
|
| 497 |
+
|
| 498 |
+
# Battery State of Charge for Sample Houses
|
| 499 |
+
day1_df = all_days_df[all_days_df['day'] == 1]
|
| 500 |
+
battery_houses = day1_df.dropna(subset=['soc'])['house'].unique()
|
| 501 |
+
|
| 502 |
+
if len(battery_houses) > 0:
|
| 503 |
+
sample_houses = battery_houses[:min(4, len(battery_houses))]
|
| 504 |
+
plt.figure(figsize=(12, 6))
|
| 505 |
+
for house in sample_houses:
|
| 506 |
+
house_df = day1_df[day1_df['house'] == house]
|
| 507 |
+
plt.plot(house_df['step'], house_df['soc'] * 100, label=f'House {house}')
|
| 508 |
+
|
| 509 |
+
plt.xlabel("Timestep of Day")
|
| 510 |
+
plt.ylabel("State of Charge (%)")
|
| 511 |
+
plt.title("Battery SoC on Day 1 for Sample Houses")
|
| 512 |
+
plt.legend()
|
| 513 |
+
plt.grid(True, linestyle='--', alpha=0.6)
|
| 514 |
+
plt.savefig(os.path.join(plots_dir, "soc_dynamics_day1.png"))
|
| 515 |
+
plt.close()
|
| 516 |
+
|
| 517 |
+
print("All plots have been generated and saved. Evaluation complete.")
|
| 518 |
+
|
| 519 |
+
if __name__ == "__main__":
|
| 520 |
+
main()
|
Other_algorithms/Flat_System/PG/pg_train.py
ADDED
|
@@ -0,0 +1,373 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import sys
|
| 3 |
+
import re
|
| 4 |
+
import numpy as np
|
| 5 |
+
import torch
|
| 6 |
+
import matplotlib.pyplot as plt
|
| 7 |
+
import pandas as pd
|
| 8 |
+
import time
|
| 9 |
+
from datetime import datetime
|
| 10 |
+
|
| 11 |
+
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
|
| 12 |
+
|
| 13 |
+
from solar_sys_environment import SolarSys
|
| 14 |
+
from PG.trainer.pg import PGAgent
|
| 15 |
+
|
| 16 |
+
def main():
|
| 17 |
+
STATE_TO_RUN = "pennsylvania" # "pennsylvania" or "colorado" or "oklahoma"
|
| 18 |
+
|
| 19 |
+
# Set the path to your training data
|
| 20 |
+
DATA_FILE_PATH = "/path/to/project/training/5houses_152days_TRAIN.csv"
|
| 21 |
+
num_episodes = 10000
|
| 22 |
+
batch_size = 256
|
| 23 |
+
checkpoint_interval = 100000
|
| 24 |
+
window_size = 32
|
| 25 |
+
|
| 26 |
+
env = SolarSys(
|
| 27 |
+
data_path=DATA_FILE_PATH,
|
| 28 |
+
state=STATE_TO_RUN,
|
| 29 |
+
time_freq="15T"
|
| 30 |
+
)
|
| 31 |
+
|
| 32 |
+
# Sanity check: env I/O shapes
|
| 33 |
+
print("Observation space:", env.observation_space)
|
| 34 |
+
print("Action space :", env.action_space)
|
| 35 |
+
|
| 36 |
+
# Reset and inspect obs
|
| 37 |
+
obs = env.reset()
|
| 38 |
+
print(f"Reset returned {len(obs)} agent observations; each obs shape: {np.array(obs).shape}")
|
| 39 |
+
|
| 40 |
+
# Sample random actions and do one step
|
| 41 |
+
dummy_actions = np.random.rand(env.num_agents, env.action_space.shape[1]).astype(np.float32)
|
| 42 |
+
next_obs, rewards, done, info = env.step(dummy_actions)
|
| 43 |
+
print(f"Step outputs → next_obs: {len(next_obs)}×{np.array(next_obs).shape[1]}, "
|
| 44 |
+
f"rewards: {len(rewards)}, done: {done}")
|
| 45 |
+
print("Info keys:", list(info.keys()))
|
| 46 |
+
|
| 47 |
+
# Count the number of houses in each group
|
| 48 |
+
env.group_counts = {
|
| 49 |
+
0: env.agent_groups.count(0),
|
| 50 |
+
1: env.agent_groups.count(1)
|
| 51 |
+
}
|
| 52 |
+
print(f"Number of houses in each group: {env.group_counts}")
|
| 53 |
+
|
| 54 |
+
max_steps = env.num_steps
|
| 55 |
+
|
| 56 |
+
# Dims from the env
|
| 57 |
+
num_agents = env.num_agents
|
| 58 |
+
local_state_dim = env.observation_space.shape[1]
|
| 59 |
+
action_dim = env.action_space.shape[1]
|
| 60 |
+
|
| 61 |
+
# Build a unique run directory
|
| 62 |
+
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
| 63 |
+
run_name = f"pg_{STATE_TO_RUN}_{num_agents}agents_{num_episodes}eps_{timestamp}"
|
| 64 |
+
root_dir = os.path.join("FINALE_FINALE_FINALE", run_name)
|
| 65 |
+
os.makedirs(root_dir, exist_ok=True)
|
| 66 |
+
print(f"Saving training outputs to: {root_dir}")
|
| 67 |
+
|
| 68 |
+
logs_dir = os.path.join(root_dir, "logs")
|
| 69 |
+
plots_dir = os.path.join(root_dir, "plots")
|
| 70 |
+
os.makedirs(logs_dir, exist_ok=True)
|
| 71 |
+
os.makedirs(plots_dir, exist_ok=True)
|
| 72 |
+
|
| 73 |
+
# Create PG agents with use_baseline parameter
|
| 74 |
+
pg_agents = [
|
| 75 |
+
PGAgent(
|
| 76 |
+
state_dim=local_state_dim,
|
| 77 |
+
action_dim=action_dim,
|
| 78 |
+
lr=2e-4,
|
| 79 |
+
gamma=0.95,
|
| 80 |
+
critic_loss_coef=0.5
|
| 81 |
+
)
|
| 82 |
+
for _ in range(num_agents)
|
| 83 |
+
]
|
| 84 |
+
|
| 85 |
+
# Tracking / Logging Variables
|
| 86 |
+
episode_rewards = []
|
| 87 |
+
episode_total_rewards = []
|
| 88 |
+
block_mean_rewards = []
|
| 89 |
+
block_total_rewards = []
|
| 90 |
+
|
| 91 |
+
agent_rewards_log = [[] for _ in range(num_agents)]
|
| 92 |
+
best_mean_reward = -1e9
|
| 93 |
+
best_model_path = os.path.join(logs_dir, "best_model.pth")
|
| 94 |
+
|
| 95 |
+
daily_rewards = []
|
| 96 |
+
monthly_rewards = []
|
| 97 |
+
|
| 98 |
+
training_start_time = time.time()
|
| 99 |
+
episode_durations = []
|
| 100 |
+
total_steps_global = 0
|
| 101 |
+
episode_log_data = []
|
| 102 |
+
performance_metrics_log = []
|
| 103 |
+
|
| 104 |
+
agent_charge_log = [[] for _ in range(num_agents)]
|
| 105 |
+
agent_discharge_log = [[] for _ in range(num_agents)]
|
| 106 |
+
|
| 107 |
+
# Training Loop
|
| 108 |
+
for episode in range(1, num_episodes + 1):
|
| 109 |
+
episode_start_time = time.time()
|
| 110 |
+
|
| 111 |
+
obs = np.array(env.reset(), dtype=np.float32)
|
| 112 |
+
|
| 113 |
+
if episode > 1:
|
| 114 |
+
last_episode_metrics = env.get_episode_metrics()
|
| 115 |
+
last_episode_metrics['Episode'] = episode - 1
|
| 116 |
+
performance_metrics_log.append(last_episode_metrics)
|
| 117 |
+
|
| 118 |
+
total_reward = np.zeros(num_agents, dtype=np.float32)
|
| 119 |
+
done = False
|
| 120 |
+
step_count = 0
|
| 121 |
+
day_logs = []
|
| 122 |
+
episode_charges = [[] for _ in range(num_agents)]
|
| 123 |
+
episode_discharges = [[] for _ in range(num_agents)]
|
| 124 |
+
|
| 125 |
+
# Main training loop for a single episode
|
| 126 |
+
while not done:
|
| 127 |
+
# Action Selection: Each PG agent acts independently
|
| 128 |
+
actions = []
|
| 129 |
+
for i, agent in enumerate(pg_agents):
|
| 130 |
+
agent_action = agent.select_action(obs[i])
|
| 131 |
+
actions.append(agent_action)
|
| 132 |
+
actions = np.array(actions, dtype=np.float32)
|
| 133 |
+
|
| 134 |
+
# Step the environment
|
| 135 |
+
next_obs_list, rewards, done, info = env.step(actions)
|
| 136 |
+
next_obs = np.array(next_obs_list, dtype=np.float32)
|
| 137 |
+
|
| 138 |
+
# Store Rewards: Each agent stores its own reward
|
| 139 |
+
for i, agent in enumerate(pg_agents):
|
| 140 |
+
agent.rewards.append(rewards[i])
|
| 141 |
+
agent.dones.append(done)
|
| 142 |
+
|
| 143 |
+
total_reward += rewards
|
| 144 |
+
obs = next_obs
|
| 145 |
+
step_count += 1
|
| 146 |
+
total_steps_global += 1
|
| 147 |
+
|
| 148 |
+
day_logs.append({
|
| 149 |
+
"step": step_count - 1,
|
| 150 |
+
"grid_import_no_p2p": info["grid_import_no_p2p"],
|
| 151 |
+
"grid_import_with_p2p": info["grid_import_with_p2p"],
|
| 152 |
+
"p2p_buy": info["p2p_buy"],
|
| 153 |
+
"p2p_sell": info["p2p_sell"],
|
| 154 |
+
"costs": info["costs"],
|
| 155 |
+
"charge_amount": info.get("charge_amount", np.zeros(num_agents)),
|
| 156 |
+
"discharge_amount": info.get("discharge_amount", np.zeros(num_agents))
|
| 157 |
+
})
|
| 158 |
+
|
| 159 |
+
# Track actual charge/discharge actions from the environment
|
| 160 |
+
for i in range(num_agents):
|
| 161 |
+
episode_charges[i].append(info["charge_amount"][i])
|
| 162 |
+
episode_discharges[i].append(info["discharge_amount"][i])
|
| 163 |
+
|
| 164 |
+
if step_count >= max_steps:
|
| 165 |
+
break
|
| 166 |
+
|
| 167 |
+
# After each episode
|
| 168 |
+
sum_ep_reward = float(np.sum(total_reward))
|
| 169 |
+
mean_ep_reward = float(np.mean(total_reward))
|
| 170 |
+
|
| 171 |
+
episode_total_rewards.append(sum_ep_reward)
|
| 172 |
+
episode_rewards.append(mean_ep_reward)
|
| 173 |
+
daily_rewards.append(mean_ep_reward)
|
| 174 |
+
|
| 175 |
+
if len(daily_rewards) % window_size == 0:
|
| 176 |
+
last_totals = episode_total_rewards[-window_size:]
|
| 177 |
+
block_sum = sum(last_totals)
|
| 178 |
+
block_total_rewards.append(block_sum)
|
| 179 |
+
|
| 180 |
+
last_means = daily_rewards[-window_size:]
|
| 181 |
+
block_mean = sum(last_means) / window_size
|
| 182 |
+
block_mean_rewards.append(block_mean)
|
| 183 |
+
|
| 184 |
+
block_idx = len(block_mean_rewards)
|
| 185 |
+
print(
|
| 186 |
+
f"→ Completed Block {block_idx} "
|
| 187 |
+
f"| Episodes {(block_idx - 1) * window_size + 1}–{block_idx * window_size} "
|
| 188 |
+
f"| Block Total Reward: {block_sum:.3f} "
|
| 189 |
+
f"| Block Mean Reward: {block_mean:.3f}"
|
| 190 |
+
)
|
| 191 |
+
|
| 192 |
+
for i in range(num_agents):
|
| 193 |
+
agent_rewards_log[i].append(total_reward[i])
|
| 194 |
+
agent_charge_log[i].append(np.mean(episode_charges[i]))
|
| 195 |
+
agent_discharge_log[i].append(np.mean(episode_discharges[i]))
|
| 196 |
+
|
| 197 |
+
steps_data = []
|
| 198 |
+
for entry in day_logs:
|
| 199 |
+
steps_data.append({
|
| 200 |
+
"step": entry["step"],
|
| 201 |
+
"p2p_buy_sum": float(np.sum(entry["p2p_buy"])),
|
| 202 |
+
"p2p_sell_sum": float(np.sum(entry["p2p_sell"])),
|
| 203 |
+
"grid_import_no_p2p_sum": float(np.sum(entry["grid_import_no_p2p"])),
|
| 204 |
+
"grid_import_with_p2p_sum": float(np.sum(entry["grid_import_with_p2p"]))
|
| 205 |
+
})
|
| 206 |
+
|
| 207 |
+
baseline_cost = np.sum([np.sum(entry["grid_import_no_p2p"]) * env.get_grid_price(entry["step"])
|
| 208 |
+
for entry in day_logs])
|
| 209 |
+
actual_cost = np.sum([np.sum(entry["costs"]) for entry in day_logs])
|
| 210 |
+
cost_reduction = (baseline_cost - actual_cost) / (baseline_cost + 1e-8)
|
| 211 |
+
|
| 212 |
+
# UPDATE STEP: Update each PG agent independently
|
| 213 |
+
for agent in pg_agents:
|
| 214 |
+
agent.update()
|
| 215 |
+
|
| 216 |
+
# Save best models
|
| 217 |
+
if mean_ep_reward > best_mean_reward:
|
| 218 |
+
best_mean_reward = mean_ep_reward
|
| 219 |
+
for i, agent in enumerate(pg_agents):
|
| 220 |
+
agent_path = os.path.join(logs_dir, f"best_model_agent_{i}.pth")
|
| 221 |
+
agent.save(agent_path)
|
| 222 |
+
|
| 223 |
+
if episode % checkpoint_interval == 0:
|
| 224 |
+
for i, agent in enumerate(pg_agents):
|
| 225 |
+
ckpt_path = os.path.join(logs_dir, f"checkpoint_{episode}_agent_{i}.pth")
|
| 226 |
+
agent.save(ckpt_path)
|
| 227 |
+
|
| 228 |
+
episode_end_time = time.time()
|
| 229 |
+
episode_duration = episode_end_time - episode_start_time
|
| 230 |
+
|
| 231 |
+
print(
|
| 232 |
+
f"Episode {episode}/{num_episodes} "
|
| 233 |
+
f"| Time per Episode: {episode_duration:.2f}s "
|
| 234 |
+
f"| Steps: {step_count} "
|
| 235 |
+
f"| Mean Reward: {mean_ep_reward:.3f} "
|
| 236 |
+
f"| Cost Reduction: {cost_reduction:.2%}"
|
| 237 |
+
)
|
| 238 |
+
|
| 239 |
+
episode_log_data.append({
|
| 240 |
+
"Episode": episode,
|
| 241 |
+
"Steps": step_count,
|
| 242 |
+
"Mean_Reward": mean_ep_reward,
|
| 243 |
+
"Total_Reward": sum_ep_reward,
|
| 244 |
+
"Cost_Reduction_Pct": cost_reduction * 100,
|
| 245 |
+
"Baseline_Cost": baseline_cost,
|
| 246 |
+
"Actual_Cost": actual_cost,
|
| 247 |
+
"Episode_Duration": episode_duration,
|
| 248 |
+
"Total_Charge": np.sum([np.sum(entry["charge_amount"]) for entry in day_logs]),
|
| 249 |
+
"Total_Discharge": np.sum([np.sum(entry["discharge_amount"]) for entry in day_logs])
|
| 250 |
+
})
|
| 251 |
+
|
| 252 |
+
# Periodic performance logging
|
| 253 |
+
if episode % 100 == 0:
|
| 254 |
+
avg_reward_last_100 = np.mean(daily_rewards[-100:]) if len(daily_rewards) >= 100 else np.mean(daily_rewards)
|
| 255 |
+
print(f" → Average reward (last 100 episodes): {avg_reward_last_100:.3f}")
|
| 256 |
+
|
| 257 |
+
# Final episode metrics
|
| 258 |
+
final_episode_metrics = env.get_episode_metrics()
|
| 259 |
+
final_episode_metrics['Episode'] = num_episodes
|
| 260 |
+
performance_metrics_log.append(final_episode_metrics)
|
| 261 |
+
|
| 262 |
+
training_end_time = time.time()
|
| 263 |
+
total_training_time = training_end_time - training_start_time
|
| 264 |
+
|
| 265 |
+
# Save final models
|
| 266 |
+
print("\nSaving final models...")
|
| 267 |
+
for i, agent in enumerate(pg_agents):
|
| 268 |
+
final_path = os.path.join(logs_dir, f"final_model_agent_{i}.pth")
|
| 269 |
+
agent.save(final_path)
|
| 270 |
+
|
| 271 |
+
np.save(os.path.join(logs_dir, "agent_rewards.npy"), np.array(agent_rewards_log))
|
| 272 |
+
np.save(os.path.join(logs_dir, "mean_rewards.npy"), np.array(episode_rewards))
|
| 273 |
+
np.save(os.path.join(logs_dir, "total_rewards.npy"), np.array(episode_total_rewards))
|
| 274 |
+
|
| 275 |
+
# Create DataFrames
|
| 276 |
+
df_rewards_log = pd.DataFrame(episode_log_data)
|
| 277 |
+
df_perf_log = pd.DataFrame(performance_metrics_log)
|
| 278 |
+
df_final_log = pd.merge(df_rewards_log, df_perf_log.drop(columns=[
|
| 279 |
+
'degradation_cost_over_time',
|
| 280 |
+
'cost_savings_over_time',
|
| 281 |
+
'grid_reduction_over_time'
|
| 282 |
+
]), on="Episode")
|
| 283 |
+
|
| 284 |
+
# Helper: centered moving average
|
| 285 |
+
def moving_avg(series, window):
|
| 286 |
+
return pd.Series(series).rolling(window=window, center=True, min_periods=1).mean().to_numpy()
|
| 287 |
+
|
| 288 |
+
ma_window = 300
|
| 289 |
+
episodes = np.arange(1, num_episodes + 1)
|
| 290 |
+
|
| 291 |
+
# Mean Reward moving average
|
| 292 |
+
reward_ma = moving_avg(df_final_log["Mean_Reward"], ma_window)
|
| 293 |
+
plt.figure(figsize=(8, 5))
|
| 294 |
+
plt.plot(episodes, reward_ma, linewidth=2, label=f"Mean Reward MA (win={ma_window})")
|
| 295 |
+
plt.xlabel("Episode")
|
| 296 |
+
plt.ylabel("Mean Reward")
|
| 297 |
+
plt.title("PG: Mean Reward Moving Average")
|
| 298 |
+
plt.legend()
|
| 299 |
+
plt.grid(True)
|
| 300 |
+
plt.savefig(os.path.join(plots_dir, "mean_reward_ma.png"), dpi=200)
|
| 301 |
+
plt.close()
|
| 302 |
+
|
| 303 |
+
# Total Reward moving average
|
| 304 |
+
total_ma = moving_avg(df_final_log["Total_Reward"], ma_window)
|
| 305 |
+
plt.figure(figsize=(8, 5))
|
| 306 |
+
plt.plot(episodes, total_ma, linewidth=2, label=f"Total Reward MA (win={ma_window})")
|
| 307 |
+
plt.xlabel("Episode")
|
| 308 |
+
plt.ylabel("Total Reward")
|
| 309 |
+
plt.title("PG: Total Reward Moving Average")
|
| 310 |
+
plt.legend()
|
| 311 |
+
plt.grid(True)
|
| 312 |
+
plt.savefig(os.path.join(plots_dir, "total_reward_ma.png"), dpi=200)
|
| 313 |
+
plt.close()
|
| 314 |
+
|
| 315 |
+
# Cost Reduction (%) moving average
|
| 316 |
+
cost_ma = moving_avg(df_final_log["Cost_Reduction_Pct"], ma_window)
|
| 317 |
+
plt.figure(figsize=(8, 5))
|
| 318 |
+
plt.plot(episodes, cost_ma, linewidth=2, label="Cost Reduction MA (%)")
|
| 319 |
+
plt.xlabel("Episode")
|
| 320 |
+
plt.ylabel("Cost Reduction (%)")
|
| 321 |
+
plt.title("PG: Cost Reduction Moving Average")
|
| 322 |
+
plt.legend()
|
| 323 |
+
plt.grid(True)
|
| 324 |
+
plt.savefig(os.path.join(plots_dir, "cost_reduction_ma.png"), dpi=200)
|
| 325 |
+
plt.close()
|
| 326 |
+
|
| 327 |
+
# Battery Degradation Cost moving average
|
| 328 |
+
degradation_ma = moving_avg(df_final_log["battery_degradation_cost_total"], ma_window)
|
| 329 |
+
plt.figure(figsize=(8, 5))
|
| 330 |
+
plt.plot(episodes, degradation_ma, linewidth=2, label=f"Degradation Cost MA (win={ma_window})", color='purple')
|
| 331 |
+
plt.xlabel("Episode")
|
| 332 |
+
plt.ylabel("Total Degradation Cost ($)")
|
| 333 |
+
plt.title("PG: Battery Degradation Cost Moving Average")
|
| 334 |
+
plt.legend()
|
| 335 |
+
plt.grid(True)
|
| 336 |
+
plt.savefig(os.path.join(plots_dir, "degradation_cost_ma.png"), dpi=200)
|
| 337 |
+
plt.close()
|
| 338 |
+
|
| 339 |
+
print(f"\nAll moving-average plots saved to: {plots_dir}")
|
| 340 |
+
|
| 341 |
+
# Save Final Logs to CSV
|
| 342 |
+
total_time_row = pd.DataFrame([{
|
| 343 |
+
"Episode": "Total_Training_Time",
|
| 344 |
+
"Episode_Duration": total_training_time
|
| 345 |
+
}])
|
| 346 |
+
df_to_save = pd.concat([df_final_log, total_time_row], ignore_index=True)
|
| 347 |
+
|
| 348 |
+
log_csv_path = os.path.join(logs_dir, "training_performance_log.csv")
|
| 349 |
+
|
| 350 |
+
columns_to_save = [
|
| 351 |
+
"Episode",
|
| 352 |
+
"Mean_Reward",
|
| 353 |
+
"Total_Reward",
|
| 354 |
+
"Cost_Reduction_Pct",
|
| 355 |
+
"Episode_Duration",
|
| 356 |
+
"battery_degradation_cost_total",
|
| 357 |
+
]
|
| 358 |
+
df_to_save = df_to_save[columns_to_save]
|
| 359 |
+
|
| 360 |
+
df_to_save.to_csv(log_csv_path, index=False)
|
| 361 |
+
|
| 362 |
+
print(f"Saved comprehensive training performance log to: {log_csv_path}")
|
| 363 |
+
|
| 364 |
+
# Final Timings Printout
|
| 365 |
+
print("\n" + "="*50)
|
| 366 |
+
print("TRAINING COMPLETE".center(50))
|
| 367 |
+
print(f"Total training time: {total_training_time:.2f} seconds")
|
| 368 |
+
print(f"Device used: {pg_agents[0].device}")
|
| 369 |
+
print("="*50)
|
| 370 |
+
|
| 371 |
+
|
| 372 |
+
if __name__ == "__main__":
|
| 373 |
+
main()
|
Other_algorithms/Flat_System/PG/trainer/__init__.py
ADDED
|
File without changes
|
Other_algorithms/Flat_System/PG/trainer/pg.py
ADDED
|
@@ -0,0 +1,96 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import torch
|
| 2 |
+
import torch.nn as nn
|
| 3 |
+
from torch.distributions import Normal
|
| 4 |
+
import numpy as np
|
| 5 |
+
|
| 6 |
+
class SharedActorCritic(nn.Module):
|
| 7 |
+
def __init__(self, state_dim, action_dim):
|
| 8 |
+
super(SharedActorCritic, self).__init__()
|
| 9 |
+
self.feature_extractor = nn.Sequential(
|
| 10 |
+
nn.Linear(state_dim, 128),
|
| 11 |
+
nn.ReLU(),
|
| 12 |
+
nn.Linear(128, 128),
|
| 13 |
+
nn.ReLU()
|
| 14 |
+
)
|
| 15 |
+
self.actor_head = nn.Linear(128, action_dim * 2)
|
| 16 |
+
self.critic_head = nn.Linear(128, 1)
|
| 17 |
+
|
| 18 |
+
def forward(self, state):
|
| 19 |
+
features = self.feature_extractor(state)
|
| 20 |
+
action_params = self.actor_head(features)
|
| 21 |
+
mean, log_std = torch.chunk(action_params, 2, dim=-1)
|
| 22 |
+
value = self.critic_head(features)
|
| 23 |
+
return mean, log_std, value
|
| 24 |
+
|
| 25 |
+
class PGAgent:
|
| 26 |
+
def __init__(self, state_dim, action_dim, lr=3e-4, gamma=0.95, gae_lambda=0.95, critic_loss_coef=0.5):
|
| 27 |
+
self.gamma = gamma
|
| 28 |
+
self.gae_lambda = gae_lambda
|
| 29 |
+
self.critic_loss_coef = critic_loss_coef
|
| 30 |
+
self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
| 31 |
+
self.model = SharedActorCritic(state_dim, action_dim).to(self.device)
|
| 32 |
+
self.optimizer = torch.optim.Adam(self.model.parameters(), lr=lr)
|
| 33 |
+
self.log_probs = []
|
| 34 |
+
self.rewards = []
|
| 35 |
+
self.values = []
|
| 36 |
+
self.dones = []
|
| 37 |
+
self.log_std_min = -20
|
| 38 |
+
self.log_std_max = 2
|
| 39 |
+
|
| 40 |
+
def select_action(self, state):
|
| 41 |
+
state_tensor = torch.FloatTensor(state).unsqueeze(0).to(self.device)
|
| 42 |
+
mean, log_std, value = self.model(state_tensor)
|
| 43 |
+
log_std = torch.clamp(log_std, self.log_std_min, self.log_std_max)
|
| 44 |
+
std = torch.exp(log_std)
|
| 45 |
+
dist = Normal(mean, std)
|
| 46 |
+
action = dist.sample()
|
| 47 |
+
log_prob = dist.log_prob(action).sum(dim=-1)
|
| 48 |
+
self.log_probs.append(log_prob)
|
| 49 |
+
self.values.append(value)
|
| 50 |
+
return np.clip(action.squeeze(0).cpu().detach().numpy(), 0.0, 1.0)
|
| 51 |
+
|
| 52 |
+
def update(self):
|
| 53 |
+
if not self.rewards:
|
| 54 |
+
return
|
| 55 |
+
next_value = 0
|
| 56 |
+
values = torch.cat(self.values).squeeze().detach().cpu().numpy()
|
| 57 |
+
advantages, returns = self._calculate_gae_advantages(self.rewards, values, self.dones, next_value)
|
| 58 |
+
log_probs = torch.cat(self.log_probs)
|
| 59 |
+
advantages = torch.tensor(advantages, dtype=torch.float32, device=self.device)
|
| 60 |
+
returns = torch.tensor(returns, dtype=torch.float32, device=self.device)
|
| 61 |
+
advantages = (advantages - advantages.mean()) / (advantages.std() + 1e-8)
|
| 62 |
+
actor_loss = -(log_probs * advantages).mean()
|
| 63 |
+
critic_values = torch.cat(self.values).squeeze()
|
| 64 |
+
critic_loss = nn.MSELoss()(critic_values, returns)
|
| 65 |
+
total_loss = actor_loss + self.critic_loss_coef * critic_loss
|
| 66 |
+
self.optimizer.zero_grad()
|
| 67 |
+
total_loss.backward()
|
| 68 |
+
torch.nn.utils.clip_grad_norm_(self.model.parameters(), 0.5)
|
| 69 |
+
self.optimizer.step()
|
| 70 |
+
self.rewards = []
|
| 71 |
+
self.log_probs = []
|
| 72 |
+
self.values = []
|
| 73 |
+
self.dones = []
|
| 74 |
+
|
| 75 |
+
def _calculate_gae_advantages(self, rewards, values, dones, next_value):
|
| 76 |
+
advantages = np.zeros_like(rewards, dtype=np.float32)
|
| 77 |
+
last_advantage = 0
|
| 78 |
+
for t in reversed(range(len(rewards))):
|
| 79 |
+
mask = 1.0 - dones[t]
|
| 80 |
+
v_next = values[t + 1] if t < len(rewards) - 1 else next_value
|
| 81 |
+
delta = rewards[t] + self.gamma * v_next * mask - values[t]
|
| 82 |
+
last_advantage = delta + self.gamma * self.gae_lambda * last_advantage * mask
|
| 83 |
+
advantages[t] = last_advantage
|
| 84 |
+
returns = advantages + values
|
| 85 |
+
return advantages, returns
|
| 86 |
+
|
| 87 |
+
def save(self, path):
|
| 88 |
+
torch.save({
|
| 89 |
+
'model_state_dict': self.model.state_dict(),
|
| 90 |
+
'optimizer_state_dict': self.optimizer.state_dict(),
|
| 91 |
+
}, path)
|
| 92 |
+
|
| 93 |
+
def load(self, path):
|
| 94 |
+
checkpoint = torch.load(path, map_location=self.device)
|
| 95 |
+
self.model.load_state_dict(checkpoint['model_state_dict'])
|
| 96 |
+
self.optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
|
Other_algorithms/Flat_System/maddpg/__init__.py
ADDED
|
File without changes
|
Other_algorithms/Flat_System/maddpg/maddpg_evaluation.py
ADDED
|
@@ -0,0 +1,495 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# maddpg_evaluate.py
|
| 2 |
+
import os
|
| 3 |
+
import sys
|
| 4 |
+
import time
|
| 5 |
+
import re
|
| 6 |
+
import numpy as np
|
| 7 |
+
import pandas as pd
|
| 8 |
+
import matplotlib.pyplot as plt
|
| 9 |
+
import torch
|
| 10 |
+
from datetime import datetime
|
| 11 |
+
|
| 12 |
+
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
|
| 13 |
+
|
| 14 |
+
from solar_sys_environment import SolarSys
|
| 15 |
+
from maddpg.trainer.maddpg import MADDPG
|
| 16 |
+
|
| 17 |
+
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
| 18 |
+
|
| 19 |
+
def compute_jains_fairness(values: np.ndarray) -> float:
|
| 20 |
+
if len(values) == 0:
|
| 21 |
+
return 0.0
|
| 22 |
+
if np.all(values == 0):
|
| 23 |
+
return 1.0
|
| 24 |
+
num = (values.sum())**2
|
| 25 |
+
den = len(values) * (values**2).sum()
|
| 26 |
+
return num / den
|
| 27 |
+
|
| 28 |
+
def main():
|
| 29 |
+
# User parameters
|
| 30 |
+
MODEL_PATH = "/path/to/project/maddpg_para_sharing_oklahoma_5agents_10000eps/logs/best_model.pth"
|
| 31 |
+
DATA_PATH = "/path/to/project/testing/5houses_30days_TEST.csv"
|
| 32 |
+
DAYS_TO_EVALUATE = 30
|
| 33 |
+
|
| 34 |
+
model_path = MODEL_PATH
|
| 35 |
+
data_path = DATA_PATH
|
| 36 |
+
days_to_evaluate = DAYS_TO_EVALUATE
|
| 37 |
+
SOLAR_THRESHOLD = 0.4
|
| 38 |
+
|
| 39 |
+
state_match = re.search(r"maddpg_para_sharing_(oklahoma|colorado|pennsylvania)_", model_path)
|
| 40 |
+
if not state_match:
|
| 41 |
+
raise ValueError(
|
| 42 |
+
"Could not automatically detect the state (oklahoma, colorado, or pennsylvania) "
|
| 43 |
+
"from the model path. Please ensure your model's parent folder is named correctly, "
|
| 44 |
+
"e.g., 'maddpg_para_sharing_oklahoma_...'"
|
| 45 |
+
)
|
| 46 |
+
detected_state = state_match.group(1)
|
| 47 |
+
print(f"--- Detected state: {detected_state.upper()} ---")
|
| 48 |
+
|
| 49 |
+
# Env setup
|
| 50 |
+
env = SolarSys(
|
| 51 |
+
data_path=data_path,
|
| 52 |
+
state=detected_state,
|
| 53 |
+
time_freq="15T"
|
| 54 |
+
)
|
| 55 |
+
eval_steps = env.num_steps
|
| 56 |
+
house_ids = env.house_ids
|
| 57 |
+
num_agents = env.num_agents
|
| 58 |
+
|
| 59 |
+
# Generate a unique eval run folder
|
| 60 |
+
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
| 61 |
+
run_name = f"eval_maddpg_para_sharing_{num_agents}agents_{days_to_evaluate}days_{timestamp}"
|
| 62 |
+
output_folder = os.path.join("runs_with_battery", run_name)
|
| 63 |
+
logs_dir = os.path.join(output_folder, "logs")
|
| 64 |
+
plots_dir = os.path.join(output_folder, "plots")
|
| 65 |
+
for d in (logs_dir, plots_dir):
|
| 66 |
+
os.makedirs(d, exist_ok=True)
|
| 67 |
+
print(f"Saving evaluation outputs to: {output_folder}")
|
| 68 |
+
|
| 69 |
+
local_state_dim = env.observation_space.shape[1]
|
| 70 |
+
action_dim = env.action_space.shape[1]
|
| 71 |
+
|
| 72 |
+
# Instantiate MADDPG agent
|
| 73 |
+
maddpg = MADDPG(
|
| 74 |
+
num_agents=num_agents,
|
| 75 |
+
state_dim=local_state_dim,
|
| 76 |
+
action_dim=action_dim
|
| 77 |
+
)
|
| 78 |
+
|
| 79 |
+
# Load MADDPG checkpoint
|
| 80 |
+
maddpg.load(model_path)
|
| 81 |
+
|
| 82 |
+
maddpg.actor.eval()
|
| 83 |
+
maddpg.critic.eval()
|
| 84 |
+
maddpg.target_actor.eval()
|
| 85 |
+
maddpg.target_critic.eval()
|
| 86 |
+
|
| 87 |
+
# Prepare logs
|
| 88 |
+
all_logs = []
|
| 89 |
+
daily_summaries = []
|
| 90 |
+
step_timing_list = []
|
| 91 |
+
|
| 92 |
+
evaluation_start = time.time()
|
| 93 |
+
|
| 94 |
+
for day_idx in range(days_to_evaluate):
|
| 95 |
+
obs = env.reset()
|
| 96 |
+
done = False
|
| 97 |
+
step_count = 0
|
| 98 |
+
day_logs = []
|
| 99 |
+
|
| 100 |
+
while not done:
|
| 101 |
+
step_start_time = time.time()
|
| 102 |
+
|
| 103 |
+
# Select actions with MADDPG
|
| 104 |
+
actions = maddpg.select_actions(obs, evaluate=True)
|
| 105 |
+
|
| 106 |
+
next_obs, rewards, done, info = env.step(actions)
|
| 107 |
+
|
| 108 |
+
# Consolidated Logging
|
| 109 |
+
step_end_time = time.time()
|
| 110 |
+
step_duration = step_end_time - step_start_time
|
| 111 |
+
|
| 112 |
+
print(f"[Day {day_idx+1}, Step {step_count}] Step time: {step_duration:.6f} seconds")
|
| 113 |
+
|
| 114 |
+
step_timing_list.append({
|
| 115 |
+
"day": day_idx + 1,
|
| 116 |
+
"step": step_count,
|
| 117 |
+
"step_time_s": step_duration
|
| 118 |
+
})
|
| 119 |
+
|
| 120 |
+
grid_price_now = env.get_grid_price(step_count)
|
| 121 |
+
peer_price_now = info.get("peer_price", env.get_peer_price(step_count,
|
| 122 |
+
float(info["p2p_sell"].sum()),
|
| 123 |
+
float(info["p2p_buy"].sum())))
|
| 124 |
+
|
| 125 |
+
for i, hid in enumerate(house_ids):
|
| 126 |
+
is_battery_house = hid in env.batteries
|
| 127 |
+
p2p_buy = float(info["p2p_buy"][i])
|
| 128 |
+
p2p_sell = float(info["p2p_sell"][i])
|
| 129 |
+
charge_amount = float(info.get("charge_amount", [0]*num_agents)[i])
|
| 130 |
+
discharge_amount = float(info.get("discharge_amount", [0]*num_agents)[i])
|
| 131 |
+
|
| 132 |
+
day_logs.append({
|
| 133 |
+
"day": day_idx + 1,
|
| 134 |
+
"step": step_count,
|
| 135 |
+
"house": hid,
|
| 136 |
+
"grid_import_no_p2p": float(info["grid_import_no_p2p"][i]),
|
| 137 |
+
"grid_import_with_p2p": float(info["grid_import_with_p2p"][i]),
|
| 138 |
+
"grid_export": float(info.get("grid_export", [0]*num_agents)[i]),
|
| 139 |
+
"p2p_buy": p2p_buy,
|
| 140 |
+
"p2p_sell": p2p_sell,
|
| 141 |
+
"actual_cost": float(info["costs"][i]),
|
| 142 |
+
"baseline_cost": float(info["grid_import_no_p2p"][i]) * grid_price_now,
|
| 143 |
+
"total_demand": float(env.demands[hid][step_count]),
|
| 144 |
+
"total_solar": float(env.solars[hid][step_count]),
|
| 145 |
+
"grid_price": grid_price_now,
|
| 146 |
+
"peer_price": peer_price_now,
|
| 147 |
+
"soc": (env.batteries[hid]["soc"] / env.batteries[hid]["max_capacity"]) if is_battery_house else np.nan,
|
| 148 |
+
"degradation_cost": ((charge_amount + discharge_amount) * env.batteries[hid]["degradation_cost_per_kwh"]) if is_battery_house else 0.0,
|
| 149 |
+
"reward": float(rewards[i]),
|
| 150 |
+
})
|
| 151 |
+
|
| 152 |
+
obs = next_obs
|
| 153 |
+
step_count += 1
|
| 154 |
+
if step_count >= eval_steps:
|
| 155 |
+
break
|
| 156 |
+
|
| 157 |
+
day_df = pd.DataFrame(day_logs)
|
| 158 |
+
all_logs.extend(day_logs)
|
| 159 |
+
|
| 160 |
+
# Consolidated daily summary calculation
|
| 161 |
+
grouped_house = day_df.groupby("house").sum(numeric_only=True)
|
| 162 |
+
grouped_step = day_df.groupby("step").sum(numeric_only=True)
|
| 163 |
+
|
| 164 |
+
total_demand = grouped_step["total_demand"].sum()
|
| 165 |
+
total_solar = grouped_step["total_solar"].sum()
|
| 166 |
+
total_p2p_buy = grouped_house["p2p_buy"].sum()
|
| 167 |
+
total_p2p_sell = grouped_house["p2p_sell"].sum()
|
| 168 |
+
|
| 169 |
+
baseline_cost_per_house = grouped_house["baseline_cost"]
|
| 170 |
+
actual_cost_per_house = grouped_house["actual_cost"]
|
| 171 |
+
cost_savings_per_house = baseline_cost_per_house - actual_cost_per_house
|
| 172 |
+
day_total_cost_savings = cost_savings_per_house.sum()
|
| 173 |
+
|
| 174 |
+
if baseline_cost_per_house.sum() > 0:
|
| 175 |
+
overall_cost_savings_pct = day_total_cost_savings / baseline_cost_per_house.sum()
|
| 176 |
+
else:
|
| 177 |
+
overall_cost_savings_pct = 0.0
|
| 178 |
+
|
| 179 |
+
baseline_import_per_house = grouped_house["grid_import_no_p2p"]
|
| 180 |
+
actual_import_per_house = grouped_house["grid_import_with_p2p"]
|
| 181 |
+
import_reduction_per_house = baseline_import_per_house - actual_import_per_house
|
| 182 |
+
day_total_import_reduction = import_reduction_per_house.sum()
|
| 183 |
+
|
| 184 |
+
if baseline_import_per_house.sum() > 0:
|
| 185 |
+
overall_import_reduction_pct = day_total_import_reduction / baseline_import_per_house.sum()
|
| 186 |
+
else:
|
| 187 |
+
overall_import_reduction_pct = 0.0
|
| 188 |
+
|
| 189 |
+
fairness_cost_savings = compute_jains_fairness(cost_savings_per_house.values)
|
| 190 |
+
fairness_import_reduction = compute_jains_fairness(import_reduction_per_house.values)
|
| 191 |
+
fairness_rewards = compute_jains_fairness(grouped_house["reward"].values)
|
| 192 |
+
fairness_p2p_buy = compute_jains_fairness(grouped_house["p2p_buy"].values)
|
| 193 |
+
fairness_p2p_sell = compute_jains_fairness(grouped_house["p2p_sell"].values)
|
| 194 |
+
fairness_p2p_total = compute_jains_fairness((grouped_house["p2p_buy"] + grouped_house["p2p_sell"]).values)
|
| 195 |
+
day_total_degradation_cost = grouped_house["degradation_cost"].sum()
|
| 196 |
+
|
| 197 |
+
daily_summaries.append({
|
| 198 |
+
"day": day_idx + 1,
|
| 199 |
+
"day_total_demand": total_demand,
|
| 200 |
+
"day_total_solar": total_solar,
|
| 201 |
+
"day_p2p_buy": total_p2p_buy,
|
| 202 |
+
"day_p2p_sell": total_p2p_sell,
|
| 203 |
+
"cost_savings_abs": day_total_cost_savings,
|
| 204 |
+
"cost_savings_pct": overall_cost_savings_pct,
|
| 205 |
+
"fairness_cost_savings": fairness_cost_savings,
|
| 206 |
+
"grid_reduction_abs": day_total_import_reduction,
|
| 207 |
+
"grid_reduction_pct": overall_import_reduction_pct,
|
| 208 |
+
"fairness_grid_reduction": fairness_import_reduction,
|
| 209 |
+
"fairness_reward": fairness_rewards,
|
| 210 |
+
"fairness_p2p_buy": fairness_p2p_buy,
|
| 211 |
+
"fairness_p2p_sell": fairness_p2p_sell,
|
| 212 |
+
"fairness_p2p_total": fairness_p2p_total,
|
| 213 |
+
"total_degradation_cost": day_total_degradation_cost
|
| 214 |
+
})
|
| 215 |
+
|
| 216 |
+
# Final processing and saving
|
| 217 |
+
evaluation_end = time.time()
|
| 218 |
+
total_eval_time = evaluation_end - evaluation_start
|
| 219 |
+
print(f"\nEvaluation loop finished. Total time: {total_eval_time:.2f} seconds.")
|
| 220 |
+
|
| 221 |
+
all_days_df = pd.DataFrame(all_logs)
|
| 222 |
+
combined_csv_path = os.path.join(logs_dir, "step_logs_all_days.csv")
|
| 223 |
+
all_days_df.to_csv(combined_csv_path, index=False)
|
| 224 |
+
print(f"Saved combined step-level logs to: {combined_csv_path}")
|
| 225 |
+
|
| 226 |
+
step_timing_df = pd.DataFrame(step_timing_list)
|
| 227 |
+
timing_csv_path = os.path.join(logs_dir, "step_timing_log.csv")
|
| 228 |
+
step_timing_df.to_csv(timing_csv_path, index=False)
|
| 229 |
+
print(f"Saved step timing logs to: {timing_csv_path}")
|
| 230 |
+
|
| 231 |
+
house_level_df = all_days_df.groupby("house").agg({
|
| 232 |
+
"baseline_cost": "sum",
|
| 233 |
+
"actual_cost": "sum",
|
| 234 |
+
"grid_import_no_p2p": "sum",
|
| 235 |
+
"grid_import_with_p2p": "sum",
|
| 236 |
+
"degradation_cost": "sum"
|
| 237 |
+
})
|
| 238 |
+
house_level_df["cost_savings"] = house_level_df["baseline_cost"] - house_level_df["actual_cost"]
|
| 239 |
+
house_level_df["import_reduction"] = house_level_df["grid_import_no_p2p"] - house_level_df["grid_import_with_p2p"]
|
| 240 |
+
|
| 241 |
+
house_summary_csv = os.path.join(logs_dir, "summary_per_house.csv")
|
| 242 |
+
house_level_df.to_csv(house_summary_csv)
|
| 243 |
+
print(f"Saved final summary per house to: {house_summary_csv}")
|
| 244 |
+
|
| 245 |
+
fairness_grid_all = compute_jains_fairness(house_level_df["import_reduction"].values)
|
| 246 |
+
fairness_cost_all = compute_jains_fairness(house_level_df["cost_savings"].values)
|
| 247 |
+
|
| 248 |
+
daily_summary_df = pd.DataFrame(daily_summaries)
|
| 249 |
+
|
| 250 |
+
total_cost_savings_all = daily_summary_df["cost_savings_abs"].sum()
|
| 251 |
+
total_baseline_cost_all = all_days_df.groupby('day')['baseline_cost'].sum().sum()
|
| 252 |
+
pct_cost_savings_all = total_cost_savings_all / total_baseline_cost_all if total_baseline_cost_all > 0 else 0.0
|
| 253 |
+
|
| 254 |
+
total_grid_reduction_all = daily_summary_df["grid_reduction_abs"].sum()
|
| 255 |
+
total_baseline_import_all = all_days_df.groupby('day')['grid_import_no_p2p'].sum().sum()
|
| 256 |
+
pct_grid_reduction_all = total_grid_reduction_all / total_baseline_import_all if total_baseline_import_all > 0 else 0.0
|
| 257 |
+
|
| 258 |
+
total_degradation_cost_all = daily_summary_df["total_degradation_cost"].sum()
|
| 259 |
+
|
| 260 |
+
# Calculate alternative performance metrics
|
| 261 |
+
|
| 262 |
+
# Grid Reduction During Solar Hours
|
| 263 |
+
agg_solar_per_step = all_days_df.groupby(['day', 'step'])['total_solar'].sum()
|
| 264 |
+
sunny_steps_mask = agg_solar_per_step > (SOLAR_THRESHOLD * num_agents)
|
| 265 |
+
sunny_df = all_days_df.set_index(['day', 'step'])[sunny_steps_mask].reset_index()
|
| 266 |
+
baseline_import_sunny = sunny_df['grid_import_no_p2p'].sum()
|
| 267 |
+
actual_import_sunny = sunny_df['grid_import_with_p2p'].sum()
|
| 268 |
+
grid_reduction_sunny_pct = 0.0
|
| 269 |
+
if baseline_import_sunny > 0:
|
| 270 |
+
grid_reduction_sunny_pct = (baseline_import_sunny - actual_import_sunny) / baseline_import_sunny
|
| 271 |
+
|
| 272 |
+
# Community Sourcing Rate
|
| 273 |
+
total_p2p_buy = all_days_df['p2p_buy'].sum()
|
| 274 |
+
total_actual_grid_import = all_days_df['grid_import_with_p2p'].sum()
|
| 275 |
+
total_procured_energy = total_p2p_buy + total_actual_grid_import
|
| 276 |
+
community_sourcing_rate_pct = 0.0
|
| 277 |
+
if total_procured_energy > 0:
|
| 278 |
+
community_sourcing_rate_pct = total_p2p_buy / total_procured_energy
|
| 279 |
+
|
| 280 |
+
# Solar Sharing Efficiency
|
| 281 |
+
total_p2p_sell = all_days_df['p2p_sell'].sum()
|
| 282 |
+
total_grid_export = all_days_df['grid_export'].sum()
|
| 283 |
+
total_excess_solar = total_p2p_sell + total_grid_export
|
| 284 |
+
solar_sharing_efficiency_pct = 0.0
|
| 285 |
+
if total_excess_solar > 0:
|
| 286 |
+
solar_sharing_efficiency_pct = total_p2p_sell / total_excess_solar
|
| 287 |
+
|
| 288 |
+
# Cost savings in sunny hours
|
| 289 |
+
baseline_cost_sunny = sunny_df['baseline_cost'].sum()
|
| 290 |
+
actual_cost_sunny = sunny_df['actual_cost'].sum()
|
| 291 |
+
cost_savings_sunny_pct = (baseline_cost_sunny - actual_cost_sunny) / baseline_cost_sunny if baseline_cost_sunny > 0 else 0.0
|
| 292 |
+
|
| 293 |
+
total_p2p_buy = all_days_df['p2p_buy'].sum()
|
| 294 |
+
total_actual_grid_import = all_days_df['grid_import_with_p2p'].sum()
|
| 295 |
+
community_sourcing_rate_pct = total_p2p_buy / (total_p2p_buy + total_actual_grid_import) if (total_p2p_buy + total_actual_grid_import) > 0 else 0.0
|
| 296 |
+
|
| 297 |
+
total_p2p_sell = all_days_df['p2p_sell'].sum()
|
| 298 |
+
total_grid_export = all_days_df['grid_export'].sum()
|
| 299 |
+
solar_sharing_efficiency_pct = total_p2p_sell / (total_p2p_sell + total_grid_export) if (total_p2p_sell + total_grid_export) > 0 else 0.0
|
| 300 |
+
|
| 301 |
+
final_row = {
|
| 302 |
+
"day": "ALL_DAYS_SUMMARY",
|
| 303 |
+
"cost_savings_abs": total_cost_savings_all,
|
| 304 |
+
"cost_savings_pct": pct_cost_savings_all,
|
| 305 |
+
"grid_reduction_abs": total_grid_reduction_all,
|
| 306 |
+
"grid_reduction_pct": pct_grid_reduction_all,
|
| 307 |
+
"fairness_cost_savings": fairness_cost_all,
|
| 308 |
+
"fairness_grid_reduction": fairness_grid_all,
|
| 309 |
+
"total_degradation_cost": total_degradation_cost_all,
|
| 310 |
+
"grid_reduction_sunny_hours_pct": grid_reduction_sunny_pct,
|
| 311 |
+
"community_sourcing_rate_pct": community_sourcing_rate_pct,
|
| 312 |
+
"solar_sharing_efficiency_pct": solar_sharing_efficiency_pct,
|
| 313 |
+
}
|
| 314 |
+
|
| 315 |
+
for col in daily_summary_df.columns:
|
| 316 |
+
if col not in final_row:
|
| 317 |
+
final_row[col] = np.nan
|
| 318 |
+
final_row_df = pd.DataFrame([final_row])
|
| 319 |
+
|
| 320 |
+
daily_summary_df = pd.concat([daily_summary_df, final_row_df], ignore_index=True)
|
| 321 |
+
summary_csv = os.path.join(logs_dir, "summary_per_day.csv")
|
| 322 |
+
daily_summary_df.to_csv(summary_csv, index=False)
|
| 323 |
+
print(f"Saved day-level summary with final multi-day row to: {summary_csv}")
|
| 324 |
+
|
| 325 |
+
# Final summary printout
|
| 326 |
+
print("\n================== EVALUATION SUMMARY ==================")
|
| 327 |
+
print(f"Evaluation finished for {days_to_evaluate} days.\n")
|
| 328 |
+
|
| 329 |
+
print("--- Standard Metrics (24-Hour Average) ---")
|
| 330 |
+
print(f"Total grid reduction: {total_grid_reduction_all:.2f} kWh ({pct_grid_reduction_all:.2%})")
|
| 331 |
+
print(f"Total cost savings: ${total_cost_savings_all:.2f} ({pct_cost_savings_all:.2%})")
|
| 332 |
+
print(f"Jain's fairness on grid reduction: {fairness_grid_all:.3f}")
|
| 333 |
+
print(f"Jain's fairness on cost savings: {fairness_cost_all:.3f}\n")
|
| 334 |
+
|
| 335 |
+
print("--- Alternative Metrics (Highlighting Peak Performance) ---")
|
| 336 |
+
print(f"Grid reduction during solar hours: {grid_reduction_sunny_pct:.2%}")
|
| 337 |
+
print(f"Cost savings during solar hours: {cost_savings_sunny_pct:.2%}")
|
| 338 |
+
print(f"Community sourcing rate: {community_sourcing_rate_pct:.2%}")
|
| 339 |
+
print(f"Solar sharing efficiency: {solar_sharing_efficiency_pct:.2%}")
|
| 340 |
+
|
| 341 |
+
print("=========================================================")
|
| 342 |
+
|
| 343 |
+
# Plots
|
| 344 |
+
plot_daily_df = daily_summary_df[daily_summary_df["day"] != "ALL_DAYS_SUMMARY"].copy()
|
| 345 |
+
plot_daily_df["day"] = plot_daily_df["day"].astype(int)
|
| 346 |
+
|
| 347 |
+
# Daily Cost Savings Percentage
|
| 348 |
+
plt.figure(figsize=(12, 6))
|
| 349 |
+
plt.bar(plot_daily_df["day"], plot_daily_df["cost_savings_pct"] * 100, color='skyblue')
|
| 350 |
+
plt.xlabel("Day")
|
| 351 |
+
plt.ylabel("Cost Savings (%)")
|
| 352 |
+
plt.title("Daily Community Cost Savings Percentage")
|
| 353 |
+
plt.xticks(plot_daily_df["day"])
|
| 354 |
+
plt.grid(axis='y', linestyle='--', alpha=0.7)
|
| 355 |
+
plt.savefig(os.path.join(plots_dir, "daily_cost_savings_percentage.png"))
|
| 356 |
+
plt.close()
|
| 357 |
+
|
| 358 |
+
# Daily Total Demand vs. Solar
|
| 359 |
+
plt.figure(figsize=(12, 6))
|
| 360 |
+
bar_width = 0.4
|
| 361 |
+
days = plot_daily_df["day"]
|
| 362 |
+
plt.bar(days - bar_width/2, plot_daily_df["day_total_demand"], width=bar_width, label="Total Demand", color='coral')
|
| 363 |
+
plt.bar(days + bar_width/2, plot_daily_df["day_total_solar"], width=bar_width, label="Total Solar Generation", color='gold')
|
| 364 |
+
plt.xlabel("Day")
|
| 365 |
+
plt.ylabel("Energy (kWh)")
|
| 366 |
+
plt.title("Total Community Demand vs. Solar Generation Per Day")
|
| 367 |
+
plt.xticks(days)
|
| 368 |
+
plt.legend()
|
| 369 |
+
plt.grid(axis='y', linestyle='--', alpha=0.7)
|
| 370 |
+
plt.savefig(os.path.join(plots_dir, "daily_demand_vs_solar.png"))
|
| 371 |
+
plt.close()
|
| 372 |
+
|
| 373 |
+
# Combined Time Series of Energy Flows
|
| 374 |
+
step_group = all_days_df.groupby(["day", "step"]).sum(numeric_only=True).reset_index()
|
| 375 |
+
step_group["global_step"] = (step_group["day"] - 1) * env.num_steps + step_group["step"]
|
| 376 |
+
|
| 377 |
+
fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(15, 12), sharex=True)
|
| 378 |
+
|
| 379 |
+
# Subplot 1: Grid Import vs P2P Buy
|
| 380 |
+
ax1.plot(step_group["global_step"], step_group["grid_import_with_p2p"], label="Grid Import (with P2P)", color='r')
|
| 381 |
+
ax1.plot(step_group["global_step"], step_group["p2p_buy"], label="P2P Buy", color='g')
|
| 382 |
+
ax1.set_ylabel("Energy (kWh)")
|
| 383 |
+
ax1.set_title("Community Energy Consumption: Grid Import vs. P2P Buy")
|
| 384 |
+
ax1.legend()
|
| 385 |
+
ax1.grid(True, linestyle='--', alpha=0.6)
|
| 386 |
+
|
| 387 |
+
# Subplot 2: Grid Export vs P2P Sell
|
| 388 |
+
ax2.plot(step_group["global_step"], step_group["grid_export"], label="Grid Export", color='orange')
|
| 389 |
+
ax2.plot(step_group["global_step"], step_group["p2p_sell"], label="P2P Sell", color='b')
|
| 390 |
+
ax2.set_xlabel("Global Timestep")
|
| 391 |
+
ax2.set_ylabel("Energy (kWh)")
|
| 392 |
+
ax2.set_title("Community Energy Generation: Grid Export vs. P2P Sell")
|
| 393 |
+
ax2.legend()
|
| 394 |
+
ax2.grid(True, linestyle='--', alpha=0.6)
|
| 395 |
+
|
| 396 |
+
plt.tight_layout()
|
| 397 |
+
plt.savefig(os.path.join(plots_dir, "combined_energy_flows_timeseries.png"))
|
| 398 |
+
plt.close()
|
| 399 |
+
|
| 400 |
+
# Stacked Bar of Daily Energy Sources
|
| 401 |
+
daily_agg = all_days_df.groupby("day").sum(numeric_only=True)
|
| 402 |
+
|
| 403 |
+
plt.figure(figsize=(12, 7))
|
| 404 |
+
plt.bar(daily_agg.index, daily_agg["grid_import_with_p2p"], label="Grid Import (with P2P)", color='crimson')
|
| 405 |
+
plt.bar(daily_agg.index, daily_agg["p2p_buy"], bottom=daily_agg["grid_import_with_p2p"], label="P2P Buy", color='limegreen')
|
| 406 |
+
plt.plot(daily_agg.index, daily_agg["grid_import_no_p2p"], label="Baseline Grid Import (No P2P)", color='blue', linestyle='--', marker='o')
|
| 407 |
+
|
| 408 |
+
plt.xlabel("Day")
|
| 409 |
+
plt.ylabel("Energy (kWh)")
|
| 410 |
+
plt.title("Daily Energy Procurement: Baseline vs. P2P+Grid")
|
| 411 |
+
plt.xticks(daily_agg.index)
|
| 412 |
+
plt.legend()
|
| 413 |
+
plt.grid(axis='y', linestyle='--', alpha=0.7)
|
| 414 |
+
plt.savefig(os.path.join(plots_dir, "daily_energy_procurement_stacked.png"))
|
| 415 |
+
plt.close()
|
| 416 |
+
|
| 417 |
+
# Fairness Metrics Over Time
|
| 418 |
+
plt.figure(figsize=(12, 6))
|
| 419 |
+
plt.plot(plot_daily_df["day"], plot_daily_df["fairness_cost_savings"], label="Cost Savings Fairness", marker='o')
|
| 420 |
+
plt.plot(plot_daily_df["day"], plot_daily_df["fairness_grid_reduction"], label="Grid Reduction Fairness", marker='s')
|
| 421 |
+
plt.plot(plot_daily_df["day"], plot_daily_df["fairness_reward"], label="Reward Fairness", marker='^')
|
| 422 |
+
plt.xlabel("Day")
|
| 423 |
+
plt.ylabel("Jain's Fairness Index")
|
| 424 |
+
plt.title("Daily Fairness Metrics")
|
| 425 |
+
plt.xticks(plot_daily_df["day"])
|
| 426 |
+
plt.ylim(0, 1.05)
|
| 427 |
+
plt.legend()
|
| 428 |
+
plt.grid(True, linestyle='--', alpha=0.7)
|
| 429 |
+
plt.savefig(os.path.join(plots_dir, "daily_fairness_metrics.png"))
|
| 430 |
+
plt.close()
|
| 431 |
+
|
| 432 |
+
# Per-House Savings and Reductions
|
| 433 |
+
fig, ax1 = plt.subplots(figsize=(15, 7))
|
| 434 |
+
|
| 435 |
+
house_ids_str = house_level_df.index.astype(str)
|
| 436 |
+
bar_width = 0.4
|
| 437 |
+
index = np.arange(len(house_ids_str))
|
| 438 |
+
|
| 439 |
+
# Bar chart for cost savings
|
| 440 |
+
color1 = 'tab:green'
|
| 441 |
+
ax1.set_xlabel('House ID')
|
| 442 |
+
ax1.set_ylabel('Total Cost Savings ($)', color=color1)
|
| 443 |
+
ax1.bar(index - bar_width/2, house_level_df["cost_savings"], bar_width, label='Cost Savings', color=color1)
|
| 444 |
+
ax1.tick_params(axis='y', labelcolor=color1)
|
| 445 |
+
ax1.set_xticks(index)
|
| 446 |
+
ax1.set_xticklabels(house_ids_str, rotation=45, ha="right")
|
| 447 |
+
|
| 448 |
+
# Second y-axis for grid import reduction
|
| 449 |
+
ax2 = ax1.twinx()
|
| 450 |
+
color2 = 'tab:blue'
|
| 451 |
+
ax2.set_ylabel('Total Grid Import Reduction (kWh)', color=color2)
|
| 452 |
+
ax2.bar(index + bar_width/2, house_level_df["import_reduction"], bar_width, label='Import Reduction', color=color2)
|
| 453 |
+
ax2.tick_params(axis='y', labelcolor=color2)
|
| 454 |
+
|
| 455 |
+
plt.title(f'Total Cost Savings & Grid Import Reduction Per House (over {days_to_evaluate} days)')
|
| 456 |
+
fig.tight_layout()
|
| 457 |
+
plt.savefig(os.path.join(plots_dir, "per_house_summary.png"))
|
| 458 |
+
plt.close()
|
| 459 |
+
|
| 460 |
+
# Price Dynamics for a Single Day
|
| 461 |
+
day1_prices = all_days_df[all_days_df['day'] == 1][['step', 'grid_price', 'peer_price']].drop_duplicates()
|
| 462 |
+
plt.figure(figsize=(12, 6))
|
| 463 |
+
plt.plot(day1_prices['step'], day1_prices['grid_price'], label='Grid Price', color='darkorange')
|
| 464 |
+
plt.plot(day1_prices['step'], day1_prices['peer_price'], label='P2P Price', color='teal')
|
| 465 |
+
plt.xlabel("Timestep of Day")
|
| 466 |
+
plt.ylabel("Price ($/kWh)")
|
| 467 |
+
plt.title("Price Dynamics on Day 1")
|
| 468 |
+
plt.legend()
|
| 469 |
+
plt.grid(True, linestyle='--', alpha=0.6)
|
| 470 |
+
plt.savefig(os.path.join(plots_dir, "price_dynamics_day1.png"))
|
| 471 |
+
plt.close()
|
| 472 |
+
|
| 473 |
+
# Battery State of Charge for Sample Houses
|
| 474 |
+
day1_df = all_days_df[all_days_df['day'] == 1]
|
| 475 |
+
battery_houses = day1_df.dropna(subset=['soc'])['house'].unique()
|
| 476 |
+
|
| 477 |
+
if len(battery_houses) > 0:
|
| 478 |
+
sample_houses = battery_houses[:min(4, len(battery_houses))]
|
| 479 |
+
plt.figure(figsize=(12, 6))
|
| 480 |
+
for house in sample_houses:
|
| 481 |
+
house_df = day1_df[day1_df['house'] == house]
|
| 482 |
+
plt.plot(house_df['step'], house_df['soc'] * 100, label=f'House {house}')
|
| 483 |
+
|
| 484 |
+
plt.xlabel("Timestep of Day")
|
| 485 |
+
plt.ylabel("State of Charge (%)")
|
| 486 |
+
plt.title("Battery SoC on Day 1 for Sample Houses")
|
| 487 |
+
plt.legend()
|
| 488 |
+
plt.grid(True, linestyle='--', alpha=0.6)
|
| 489 |
+
plt.savefig(os.path.join(plots_dir, "soc_dynamics_day1.png"))
|
| 490 |
+
plt.close()
|
| 491 |
+
|
| 492 |
+
print("All plots have been generated and saved. Evaluation complete.")
|
| 493 |
+
|
| 494 |
+
if __name__ == "__main__":
|
| 495 |
+
main()
|
Other_algorithms/Flat_System/maddpg/maddpg_train.py
ADDED
|
@@ -0,0 +1,382 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import sys
|
| 3 |
+
import re
|
| 4 |
+
import numpy as np
|
| 5 |
+
import torch
|
| 6 |
+
import matplotlib.pyplot as plt
|
| 7 |
+
import pandas as pd
|
| 8 |
+
import time
|
| 9 |
+
from datetime import datetime
|
| 10 |
+
|
| 11 |
+
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
|
| 12 |
+
|
| 13 |
+
from solar_sys_environment import SolarSys
|
| 14 |
+
from maddpg.trainer.maddpg import MADDPG
|
| 15 |
+
|
| 16 |
+
def main():
|
| 17 |
+
|
| 18 |
+
STATE_TO_RUN = "oklahoma" # "pennsylvania" or "colorado" or "oklahoma"
|
| 19 |
+
|
| 20 |
+
# Set the path to your training data
|
| 21 |
+
DATA_FILE_PATH = "/path/to/project/training/5houses_152days_TRAIN.csv"
|
| 22 |
+
num_episodes = 10000
|
| 23 |
+
batch_size = 256
|
| 24 |
+
checkpoint_interval = 100000
|
| 25 |
+
window_size = 32
|
| 26 |
+
|
| 27 |
+
env = SolarSys(
|
| 28 |
+
data_path=DATA_FILE_PATH,
|
| 29 |
+
state=STATE_TO_RUN,
|
| 30 |
+
time_freq="15T"
|
| 31 |
+
)
|
| 32 |
+
|
| 33 |
+
# Sanity check: env I/O shapes
|
| 34 |
+
print("Observation space:", env.observation_space)
|
| 35 |
+
print("Action space :", env.action_space)
|
| 36 |
+
|
| 37 |
+
# Reset and inspect obs
|
| 38 |
+
obs = env.reset()
|
| 39 |
+
print(f"Reset returned {len(obs)} agent observations; each obs shape: {np.array(obs).shape}")
|
| 40 |
+
|
| 41 |
+
# Sample random actions and do one step
|
| 42 |
+
dummy_actions = np.random.rand(env.num_agents, env.action_space.shape[1]).astype(np.float32)
|
| 43 |
+
next_obs, rewards, done, info = env.step(dummy_actions)
|
| 44 |
+
print(f"Step outputs → next_obs: {len(next_obs)}×{np.array(next_obs).shape[1]}, "
|
| 45 |
+
f"rewards: {len(rewards)}, done: {done}")
|
| 46 |
+
print("Info keys:", list(info.keys()))
|
| 47 |
+
|
| 48 |
+
# Count the number of houses in each group
|
| 49 |
+
env.group_counts = {
|
| 50 |
+
0: env.agent_groups.count(0),
|
| 51 |
+
1: env.agent_groups.count(1)
|
| 52 |
+
}
|
| 53 |
+
print(f"Number of houses in each group: {env.group_counts}")
|
| 54 |
+
|
| 55 |
+
max_steps = env.num_steps
|
| 56 |
+
|
| 57 |
+
# Dims from the env
|
| 58 |
+
num_agents = env.num_agents
|
| 59 |
+
local_state_dim = env.observation_space.shape[1]
|
| 60 |
+
action_dim = env.action_space.shape[1]
|
| 61 |
+
|
| 62 |
+
# Build a unique run directory
|
| 63 |
+
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
| 64 |
+
run_name = f"maddpg_para_sharing_{STATE_TO_RUN}_{num_agents}agents_{num_episodes}eps_{timestamp}"
|
| 65 |
+
root_dir = os.path.join("FINALE_FINALE_FINALE", run_name)
|
| 66 |
+
os.makedirs(root_dir, exist_ok=True)
|
| 67 |
+
print(f"Saving training outputs to: {root_dir}")
|
| 68 |
+
|
| 69 |
+
logs_dir = os.path.join(root_dir, "logs")
|
| 70 |
+
plots_dir = os.path.join(root_dir, "plots")
|
| 71 |
+
os.makedirs(logs_dir, exist_ok=True)
|
| 72 |
+
os.makedirs(plots_dir, exist_ok=True)
|
| 73 |
+
|
| 74 |
+
# Create the MADDPG agent
|
| 75 |
+
maddpg = MADDPG(
|
| 76 |
+
num_agents=num_agents,
|
| 77 |
+
state_dim=local_state_dim,
|
| 78 |
+
action_dim=action_dim,
|
| 79 |
+
gamma=0.95,
|
| 80 |
+
tau=0.01,
|
| 81 |
+
lr_actor=1e-4,
|
| 82 |
+
lr_critic=1e-3,
|
| 83 |
+
buffer_size=1000000,
|
| 84 |
+
noise_episodes=5000,
|
| 85 |
+
init_sigma=0.3,
|
| 86 |
+
final_sigma=0.01,
|
| 87 |
+
batch_size=batch_size
|
| 88 |
+
)
|
| 89 |
+
|
| 90 |
+
# Tracking / Logging Variables
|
| 91 |
+
episode_rewards = []
|
| 92 |
+
episode_total_rewards = []
|
| 93 |
+
block_mean_rewards = []
|
| 94 |
+
block_total_rewards = []
|
| 95 |
+
|
| 96 |
+
agent_rewards_log = [[] for _ in range(num_agents)]
|
| 97 |
+
best_mean_reward = -1e9
|
| 98 |
+
best_model_path = os.path.join(logs_dir, "best_model.pth")
|
| 99 |
+
|
| 100 |
+
daily_rewards = []
|
| 101 |
+
monthly_rewards = []
|
| 102 |
+
|
| 103 |
+
training_start_time = time.time()
|
| 104 |
+
episode_durations = []
|
| 105 |
+
total_steps_global = 0
|
| 106 |
+
episode_log_data = []
|
| 107 |
+
performance_metrics_log = []
|
| 108 |
+
|
| 109 |
+
agent_charge_log = [[] for _ in range(num_agents)]
|
| 110 |
+
agent_discharge_log = [[] for _ in range(num_agents)]
|
| 111 |
+
|
| 112 |
+
# Training Loop
|
| 113 |
+
for episode in range(1, num_episodes + 1):
|
| 114 |
+
episode_start_time = time.time()
|
| 115 |
+
|
| 116 |
+
obs = np.array(env.reset(), dtype=np.float32)
|
| 117 |
+
|
| 118 |
+
# Collect metrics from the previous episode
|
| 119 |
+
if episode > 1:
|
| 120 |
+
last_episode_metrics = env.get_episode_metrics()
|
| 121 |
+
last_episode_metrics['Episode'] = episode - 1
|
| 122 |
+
performance_metrics_log.append(last_episode_metrics)
|
| 123 |
+
|
| 124 |
+
total_reward = np.zeros(num_agents, dtype=np.float32)
|
| 125 |
+
done = False
|
| 126 |
+
step_count = 0
|
| 127 |
+
day_logs = []
|
| 128 |
+
episode_charges = [[] for _ in range(num_agents)]
|
| 129 |
+
episode_discharges = [[] for _ in range(num_agents)]
|
| 130 |
+
|
| 131 |
+
while not done:
|
| 132 |
+
# Select actions using the MADDPG agent
|
| 133 |
+
actions = maddpg.select_actions(obs)
|
| 134 |
+
|
| 135 |
+
# Step environment
|
| 136 |
+
next_obs_list, rewards, done, info = env.step(actions)
|
| 137 |
+
next_obs = np.array(next_obs_list, dtype=np.float32)
|
| 138 |
+
|
| 139 |
+
# Store the transition in the replay buffer
|
| 140 |
+
maddpg.store_transition(obs, actions, rewards, next_obs, done)
|
| 141 |
+
|
| 142 |
+
# Train the agent at every step
|
| 143 |
+
maddpg.train()
|
| 144 |
+
|
| 145 |
+
total_reward += rewards
|
| 146 |
+
obs = next_obs
|
| 147 |
+
step_count += 1
|
| 148 |
+
total_steps_global += 1
|
| 149 |
+
|
| 150 |
+
for i in range(num_agents):
|
| 151 |
+
episode_charges[i].append(info["charge_amount"][i])
|
| 152 |
+
episode_discharges[i].append(info["discharge_amount"][i])
|
| 153 |
+
|
| 154 |
+
day_logs.append({
|
| 155 |
+
"step": step_count - 1,
|
| 156 |
+
"grid_import_no_p2p": info["grid_import_no_p2p"],
|
| 157 |
+
"grid_import_with_p2p": info["grid_import_with_p2p"],
|
| 158 |
+
"p2p_buy": info["p2p_buy"],
|
| 159 |
+
"p2p_sell": info["p2p_sell"],
|
| 160 |
+
"costs": info["costs"],
|
| 161 |
+
"charge_amount": info.get("charge_amount", np.zeros(num_agents)),
|
| 162 |
+
"discharge_amount": info.get("discharge_amount", np.zeros(num_agents))
|
| 163 |
+
})
|
| 164 |
+
|
| 165 |
+
if step_count >= max_steps:
|
| 166 |
+
break
|
| 167 |
+
|
| 168 |
+
# After each episode
|
| 169 |
+
# Compute per-episode metrics
|
| 170 |
+
sum_ep_reward = float(np.sum(total_reward))
|
| 171 |
+
mean_ep_reward = float(np.mean(total_reward))
|
| 172 |
+
|
| 173 |
+
episode_total_rewards.append(sum_ep_reward)
|
| 174 |
+
episode_rewards.append(mean_ep_reward)
|
| 175 |
+
daily_rewards.append(mean_ep_reward)
|
| 176 |
+
|
| 177 |
+
# If we just finished a block of window_size episodes, aggregate
|
| 178 |
+
if len(daily_rewards) % window_size == 0:
|
| 179 |
+
last_totals = episode_total_rewards[-window_size:]
|
| 180 |
+
block_sum = sum(last_totals)
|
| 181 |
+
block_total_rewards.append(block_sum)
|
| 182 |
+
|
| 183 |
+
last_means = daily_rewards[-window_size:]
|
| 184 |
+
block_mean = sum(last_means) / window_size
|
| 185 |
+
block_mean_rewards.append(block_mean)
|
| 186 |
+
|
| 187 |
+
block_idx = len(block_mean_rewards)
|
| 188 |
+
print(
|
| 189 |
+
f"→ Completed Block {block_idx} "
|
| 190 |
+
f"| Episodes {(block_idx-1)*window_size + 1}–{block_idx*window_size} "
|
| 191 |
+
f"| Block Total Reward: {block_sum:.3f} "
|
| 192 |
+
f"| Block Mean Reward: {block_mean:.3f}"
|
| 193 |
+
)
|
| 194 |
+
|
| 195 |
+
# Log agent-level rewards
|
| 196 |
+
for i in range(num_agents):
|
| 197 |
+
agent_rewards_log[i].append(total_reward[i])
|
| 198 |
+
agent_charge_log[i].append(np.mean(episode_charges[i]))
|
| 199 |
+
agent_discharge_log[i].append(np.mean(episode_discharges[i]))
|
| 200 |
+
|
| 201 |
+
# Summarize P2P steps
|
| 202 |
+
steps_data = []
|
| 203 |
+
for entry in day_logs:
|
| 204 |
+
step_idx = entry["step"]
|
| 205 |
+
p2p_buy_array = entry["p2p_buy"]
|
| 206 |
+
p2p_sell_array = entry["p2p_sell"]
|
| 207 |
+
grid_no_p2p_array = entry["grid_import_no_p2p"]
|
| 208 |
+
grid_with_p2p_array = entry["grid_import_with_p2p"]
|
| 209 |
+
|
| 210 |
+
steps_data.append({
|
| 211 |
+
"step": step_idx,
|
| 212 |
+
"p2p_buy_sum": float(np.sum(p2p_buy_array)),
|
| 213 |
+
"p2p_sell_sum": float(np.sum(p2p_sell_array)),
|
| 214 |
+
"grid_import_no_p2p_sum": float(np.sum(grid_no_p2p_array)),
|
| 215 |
+
"grid_import_with_p2p_sum": float(np.sum(grid_with_p2p_array))
|
| 216 |
+
})
|
| 217 |
+
|
| 218 |
+
baseline_cost = np.sum([np.sum(entry["grid_import_no_p2p"]) * env.get_grid_price(entry["step"])
|
| 219 |
+
for entry in day_logs])
|
| 220 |
+
actual_cost = np.sum([np.sum(entry["costs"]) for entry in day_logs])
|
| 221 |
+
cost_reduction = (baseline_cost - actual_cost) / baseline_cost
|
| 222 |
+
|
| 223 |
+
# Call on_episode_end() for noise decay schedule
|
| 224 |
+
maddpg.on_episode_end()
|
| 225 |
+
|
| 226 |
+
# Save if best
|
| 227 |
+
if mean_ep_reward > best_mean_reward:
|
| 228 |
+
best_mean_reward = mean_ep_reward
|
| 229 |
+
maddpg.save(best_model_path)
|
| 230 |
+
|
| 231 |
+
if episode % checkpoint_interval == 0:
|
| 232 |
+
ckpt_path = os.path.join(logs_dir, f"checkpoint_{episode}.pth")
|
| 233 |
+
maddpg.save(ckpt_path)
|
| 234 |
+
|
| 235 |
+
episode_end_time = time.time()
|
| 236 |
+
episode_duration = episode_end_time - episode_start_time
|
| 237 |
+
|
| 238 |
+
print(
|
| 239 |
+
f"Episode {episode}/{num_episodes} "
|
| 240 |
+
f"| Time per Episode: {episode_duration:.2f}s "
|
| 241 |
+
f"| Steps: {step_count} "
|
| 242 |
+
f"| Mean Reward: {mean_ep_reward:.3f} "
|
| 243 |
+
f"| Cost Reduction: {cost_reduction:.2%}"
|
| 244 |
+
)
|
| 245 |
+
|
| 246 |
+
# Record data in per-episode log
|
| 247 |
+
episode_log_data.append({
|
| 248 |
+
"Episode": episode,
|
| 249 |
+
"Steps": step_count,
|
| 250 |
+
"Mean_Reward": mean_ep_reward,
|
| 251 |
+
"Total_Reward": sum_ep_reward,
|
| 252 |
+
"Cost_Reduction_Pct": cost_reduction * 100,
|
| 253 |
+
"Baseline_Cost": baseline_cost,
|
| 254 |
+
"Actual_Cost": actual_cost,
|
| 255 |
+
"Episode_Duration": episode_duration,
|
| 256 |
+
"Total_Charge": np.sum([np.sum(entry["charge_amount"]) for entry in day_logs]),
|
| 257 |
+
"Total_Discharge": np.sum([np.sum(entry["discharge_amount"]) for entry in day_logs])
|
| 258 |
+
})
|
| 259 |
+
|
| 260 |
+
for i in range(num_agents):
|
| 261 |
+
agent_charge_log[i].append(np.mean(episode_charges[i]))
|
| 262 |
+
agent_discharge_log[i].append(np.mean(episode_discharges[i]))
|
| 263 |
+
|
| 264 |
+
# Capture the final episode's metrics
|
| 265 |
+
final_episode_metrics = env.get_episode_metrics()
|
| 266 |
+
final_episode_metrics['Episode'] = num_episodes
|
| 267 |
+
performance_metrics_log.append(final_episode_metrics)
|
| 268 |
+
|
| 269 |
+
# End of all training
|
| 270 |
+
training_end_time = time.time()
|
| 271 |
+
total_training_time = training_end_time - training_start_time
|
| 272 |
+
|
| 273 |
+
# Save out per-episode agent rewards + mean rewards
|
| 274 |
+
np.save(os.path.join(logs_dir, "agent_rewards.npy"), np.array(agent_rewards_log))
|
| 275 |
+
np.save(os.path.join(logs_dir, "mean_rewards.npy"), np.array(episode_rewards))
|
| 276 |
+
np.save(os.path.join(logs_dir, "total_rewards.npy"), np.array(episode_total_rewards))
|
| 277 |
+
|
| 278 |
+
# Create Final DataFrame for Logging and Plotting
|
| 279 |
+
df_rewards_log = pd.DataFrame(episode_log_data)
|
| 280 |
+
df_perf_log = pd.DataFrame(performance_metrics_log)
|
| 281 |
+
|
| 282 |
+
# Merge the two DataFrames on the 'Episode' column
|
| 283 |
+
df_final_log = pd.merge(df_rewards_log, df_perf_log.drop(columns=[
|
| 284 |
+
'degradation_cost_over_time',
|
| 285 |
+
'cost_savings_over_time',
|
| 286 |
+
'grid_reduction_over_time'
|
| 287 |
+
]), on="Episode")
|
| 288 |
+
|
| 289 |
+
# PLOTTING
|
| 290 |
+
os.makedirs(plots_dir, exist_ok=True)
|
| 291 |
+
|
| 292 |
+
# Helper: centered moving average
|
| 293 |
+
def moving_avg(series, window):
|
| 294 |
+
return pd.Series(series).rolling(window=window, center=True, min_periods=1).mean().to_numpy()
|
| 295 |
+
|
| 296 |
+
# Smoothing window (in episodes)
|
| 297 |
+
ma_window = 300
|
| 298 |
+
episodes = np.arange(1, num_episodes + 1)
|
| 299 |
+
|
| 300 |
+
# Mean Reward moving average
|
| 301 |
+
reward_ma = moving_avg(df_final_log["Mean_Reward"], ma_window)
|
| 302 |
+
plt.figure(figsize=(8, 5))
|
| 303 |
+
plt.plot(episodes, reward_ma, linewidth=2, label=f"Mean Reward MA (win={ma_window})")
|
| 304 |
+
plt.xlabel("Episode")
|
| 305 |
+
plt.ylabel("Mean Reward")
|
| 306 |
+
plt.title("MADDPG: Mean Reward Moving Average")
|
| 307 |
+
plt.legend()
|
| 308 |
+
plt.grid(True)
|
| 309 |
+
plt.savefig(os.path.join(plots_dir, "mean_reward_ma.png"), dpi=200)
|
| 310 |
+
plt.close()
|
| 311 |
+
|
| 312 |
+
# Total Reward moving average
|
| 313 |
+
total_ma = moving_avg(df_final_log["Total_Reward"], ma_window)
|
| 314 |
+
plt.figure(figsize=(8, 5))
|
| 315 |
+
plt.plot(episodes, total_ma, linewidth=2, label=f"Total Reward MA (win={ma_window})")
|
| 316 |
+
plt.xlabel("Episode")
|
| 317 |
+
plt.ylabel("Total Reward")
|
| 318 |
+
plt.title("MADDPG: Total Reward Moving Average")
|
| 319 |
+
plt.legend()
|
| 320 |
+
plt.grid(True)
|
| 321 |
+
plt.savefig(os.path.join(plots_dir, "total_reward_ma.png"), dpi=200)
|
| 322 |
+
plt.close()
|
| 323 |
+
|
| 324 |
+
# Cost Reduction (%) moving average
|
| 325 |
+
cost_ma = moving_avg(df_final_log["Cost_Reduction_Pct"], ma_window)
|
| 326 |
+
plt.figure(figsize=(8, 5))
|
| 327 |
+
plt.plot(episodes, cost_ma, linewidth=2, label="Cost Reduction MA (%)")
|
| 328 |
+
plt.xlabel("Episode")
|
| 329 |
+
plt.ylabel("Cost Reduction (%)")
|
| 330 |
+
plt.title("MADDPG: Cost Reduction Moving Average")
|
| 331 |
+
plt.legend()
|
| 332 |
+
plt.grid(True)
|
| 333 |
+
plt.savefig(os.path.join(plots_dir, "cost_reduction_ma.png"), dpi=200)
|
| 334 |
+
plt.close()
|
| 335 |
+
|
| 336 |
+
# Battery Degradation Cost moving average
|
| 337 |
+
degradation_ma = moving_avg(df_final_log["battery_degradation_cost_total"], ma_window)
|
| 338 |
+
plt.figure(figsize=(8, 5))
|
| 339 |
+
plt.plot(episodes, degradation_ma, linewidth=2, label=f"Degradation Cost MA (win={ma_window})", color='purple')
|
| 340 |
+
plt.xlabel("Episode")
|
| 341 |
+
plt.ylabel("Total Degradation Cost ($)")
|
| 342 |
+
plt.title("MADDPG: Battery Degradation Cost Moving Average")
|
| 343 |
+
plt.legend()
|
| 344 |
+
plt.grid(True)
|
| 345 |
+
plt.savefig(os.path.join(plots_dir, "degradation_cost_ma.png"), dpi=200)
|
| 346 |
+
plt.close()
|
| 347 |
+
|
| 348 |
+
print(f"\nAll moving-average plots saved to: {plots_dir}")
|
| 349 |
+
|
| 350 |
+
# Save Final Logs to CSV
|
| 351 |
+
total_time_row = pd.DataFrame([{
|
| 352 |
+
"Episode": "Total_Training_Time",
|
| 353 |
+
"Episode_Duration": total_training_time
|
| 354 |
+
}])
|
| 355 |
+
df_to_save = pd.concat([df_final_log, total_time_row], ignore_index=True)
|
| 356 |
+
|
| 357 |
+
log_csv_path = os.path.join(logs_dir, "training_performance_log.csv")
|
| 358 |
+
|
| 359 |
+
# Select and reorder columns for the final CSV
|
| 360 |
+
columns_to_save = [
|
| 361 |
+
"Episode",
|
| 362 |
+
"Mean_Reward",
|
| 363 |
+
"Total_Reward",
|
| 364 |
+
"Cost_Reduction_Pct",
|
| 365 |
+
"Episode_Duration",
|
| 366 |
+
"battery_degradation_cost_total",
|
| 367 |
+
]
|
| 368 |
+
df_to_save = df_to_save[columns_to_save]
|
| 369 |
+
|
| 370 |
+
df_to_save.to_csv(log_csv_path, index=False)
|
| 371 |
+
|
| 372 |
+
print(f"Saved comprehensive training performance log to: {log_csv_path}")
|
| 373 |
+
|
| 374 |
+
# Final Timings Printout
|
| 375 |
+
print("\n" + "="*50)
|
| 376 |
+
print("TRAINING COMPLETE".center(50))
|
| 377 |
+
print(f"Total training time: {total_training_time:.2f} seconds")
|
| 378 |
+
print("="*50)
|
| 379 |
+
|
| 380 |
+
|
| 381 |
+
if __name__ == "__main__":
|
| 382 |
+
main()
|
Other_algorithms/Flat_System/maddpg/trainer/__init__.py
ADDED
|
File without changes
|
Other_algorithms/Flat_System/maddpg/trainer/maddpg.py
ADDED
|
@@ -0,0 +1,196 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import torch
|
| 2 |
+
import torch.nn as nn
|
| 3 |
+
import torch.optim as optim
|
| 4 |
+
import numpy as np
|
| 5 |
+
import random
|
| 6 |
+
from collections import deque
|
| 7 |
+
from torch.utils.data import Dataset, DataLoader
|
| 8 |
+
|
| 9 |
+
class ReplayBufferDataset(Dataset):
|
| 10 |
+
def __init__(self, max_size=100000):
|
| 11 |
+
self.buffer = deque(maxlen=max_size)
|
| 12 |
+
|
| 13 |
+
def add(self, states, actions, rewards, next_states, done):
|
| 14 |
+
data = (
|
| 15 |
+
states,
|
| 16 |
+
actions,
|
| 17 |
+
np.array(rewards, dtype=np.float32),
|
| 18 |
+
next_states,
|
| 19 |
+
np.float32(done)
|
| 20 |
+
)
|
| 21 |
+
self.buffer.append(data)
|
| 22 |
+
|
| 23 |
+
def __len__(self):
|
| 24 |
+
return len(self.buffer)
|
| 25 |
+
|
| 26 |
+
def __getitem__(self, idx):
|
| 27 |
+
states, actions, rewards, next_states, done = self.buffer[idx]
|
| 28 |
+
return (
|
| 29 |
+
torch.from_numpy(states),
|
| 30 |
+
torch.from_numpy(actions),
|
| 31 |
+
torch.from_numpy(rewards),
|
| 32 |
+
torch.from_numpy(next_states),
|
| 33 |
+
torch.tensor(done, dtype=torch.float32)
|
| 34 |
+
)
|
| 35 |
+
|
| 36 |
+
class Actor(nn.Module):
|
| 37 |
+
def __init__(self, state_dim, action_dim, hidden_dim=64):
|
| 38 |
+
super(Actor, self).__init__()
|
| 39 |
+
self.net = nn.Sequential(
|
| 40 |
+
nn.Linear(state_dim, hidden_dim),
|
| 41 |
+
nn.ReLU(),
|
| 42 |
+
nn.Linear(hidden_dim, hidden_dim),
|
| 43 |
+
nn.ReLU(),
|
| 44 |
+
nn.Linear(hidden_dim, action_dim),
|
| 45 |
+
nn.Sigmoid()
|
| 46 |
+
)
|
| 47 |
+
|
| 48 |
+
def forward(self, state):
|
| 49 |
+
return self.net(state)
|
| 50 |
+
|
| 51 |
+
class SharedCritic(nn.Module):
|
| 52 |
+
def __init__(self, global_state_dim, global_action_dim, hidden_dim=128, num_agents=1):
|
| 53 |
+
super().__init__()
|
| 54 |
+
self.net = nn.Sequential(
|
| 55 |
+
nn.Linear(global_state_dim + global_action_dim, hidden_dim),
|
| 56 |
+
nn.ReLU(),
|
| 57 |
+
nn.Linear(hidden_dim, hidden_dim),
|
| 58 |
+
nn.ReLU(),
|
| 59 |
+
nn.Linear(hidden_dim, num_agents)
|
| 60 |
+
)
|
| 61 |
+
|
| 62 |
+
def forward(self, global_state, global_action):
|
| 63 |
+
x = torch.cat([global_state, global_action], dim=1)
|
| 64 |
+
return self.net(x)
|
| 65 |
+
|
| 66 |
+
class Agent:
|
| 67 |
+
def __init__(self, local_state_dim, action_dim, lr_actor=1e-3, device=torch.device('cpu')):
|
| 68 |
+
self.device = device
|
| 69 |
+
self.actor = Actor(local_state_dim, action_dim).to(device)
|
| 70 |
+
self.target_actor = Actor(local_state_dim, action_dim).to(device)
|
| 71 |
+
self.actor_optim = optim.Adam(self.actor.parameters(), lr=lr_actor)
|
| 72 |
+
self.target_actor.load_state_dict(self.actor.state_dict())
|
| 73 |
+
|
| 74 |
+
def sync_target(self, tau):
|
| 75 |
+
for tp, p in zip(self.target_actor.parameters(), self.actor.parameters()):
|
| 76 |
+
tp.data.copy_(tau * p.data + (1.0 - tau) * tp.data)
|
| 77 |
+
|
| 78 |
+
class MADDPG:
|
| 79 |
+
def __init__(self, num_agents, local_state_dim, action_dim,
|
| 80 |
+
gamma=0.95, tau=0.01, lr_actor=1e-4, lr_critic=1e-3,
|
| 81 |
+
buffer_size=100000, noise_episodes=100, init_sigma=0.2, final_sigma=0.01,
|
| 82 |
+
batch_size=128, num_workers=0):
|
| 83 |
+
|
| 84 |
+
self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
|
| 85 |
+
self.num_agents = num_agents
|
| 86 |
+
self.gamma = gamma
|
| 87 |
+
self.tau = tau
|
| 88 |
+
self.init_sigma = init_sigma
|
| 89 |
+
self.final_sigma = final_sigma
|
| 90 |
+
self.noise_episodes = noise_episodes
|
| 91 |
+
self.current_episode = 0
|
| 92 |
+
|
| 93 |
+
self.actor = Actor(local_state_dim, action_dim).to(self.device)
|
| 94 |
+
self.target_actor = Actor(local_state_dim, action_dim).to(self.device)
|
| 95 |
+
self.target_actor.load_state_dict(self.actor.state_dict())
|
| 96 |
+
self.actor_optim = optim.Adam(self.actor.parameters(), lr=lr_actor)
|
| 97 |
+
|
| 98 |
+
global_state_dim = num_agents * local_state_dim
|
| 99 |
+
global_action_dim = num_agents * action_dim
|
| 100 |
+
self.critic = SharedCritic(global_state_dim, global_action_dim, num_agents=num_agents).to(self.device)
|
| 101 |
+
self.target_critic = SharedCritic(global_state_dim, global_action_dim, num_agents=num_agents).to(self.device)
|
| 102 |
+
self.target_critic.load_state_dict(self.critic.state_dict())
|
| 103 |
+
self.critic_optim = optim.Adam(self.critic.parameters(), lr=lr_critic)
|
| 104 |
+
|
| 105 |
+
self.batch_size = batch_size
|
| 106 |
+
self.num_workers = num_workers
|
| 107 |
+
self.memory = ReplayBufferDataset(max_size=buffer_size)
|
| 108 |
+
self.dataloader = None
|
| 109 |
+
self.loader_iter = None
|
| 110 |
+
|
| 111 |
+
def select_actions(self, states, evaluate=False):
|
| 112 |
+
states_t = torch.as_tensor(states, dtype=torch.float32, device=self.device)
|
| 113 |
+
with torch.no_grad():
|
| 114 |
+
actions_t = torch.stack([
|
| 115 |
+
self.actor(states_t[i]) for i in range(self.num_agents)
|
| 116 |
+
], dim=0)
|
| 117 |
+
actions = actions_t.cpu().numpy()
|
| 118 |
+
|
| 119 |
+
if not evaluate:
|
| 120 |
+
frac = min(float(self.current_episode) / self.noise_episodes, 1.0)
|
| 121 |
+
current_sigma = self.init_sigma - frac * (self.init_sigma - self.final_sigma)
|
| 122 |
+
noise = np.random.normal(0, current_sigma, size=actions.shape)
|
| 123 |
+
actions += noise
|
| 124 |
+
return np.clip(actions, 0.0, 1.0)
|
| 125 |
+
|
| 126 |
+
def store_transition(self, states, actions, rewards, next_states, done):
|
| 127 |
+
self.memory.add(states, actions, rewards, next_states, done)
|
| 128 |
+
|
| 129 |
+
def train(self):
|
| 130 |
+
if len(self.memory) < self.batch_size:
|
| 131 |
+
return
|
| 132 |
+
|
| 133 |
+
should_pin_memory = self.device.type == 'cuda'
|
| 134 |
+
if self.dataloader is None:
|
| 135 |
+
self.dataloader = DataLoader(self.memory, batch_size=self.batch_size, shuffle=True, num_workers=self.num_workers, pin_memory=should_pin_memory, drop_last=True)
|
| 136 |
+
self.loader_iter = iter(self.dataloader)
|
| 137 |
+
try:
|
| 138 |
+
s, a, r, s2, d = next(self.loader_iter)
|
| 139 |
+
except StopIteration:
|
| 140 |
+
self.dataloader = DataLoader(self.memory, batch_size=self.batch_size, shuffle=True, num_workers=self.num_workers, pin_memory=should_pin_memory, drop_last=True)
|
| 141 |
+
self.loader_iter = iter(self.dataloader)
|
| 142 |
+
s, a, r, s2, d = next(self.loader_iter)
|
| 143 |
+
|
| 144 |
+
s_t, a_t, r_t, s2_t, d_t = s.to(self.device), a.to(self.device), r.to(self.device), s2.to(self.device), d.to(self.device).unsqueeze(-1)
|
| 145 |
+
r_t = (r_t - r_t.mean()) / (r_t.std() + 1e-7)
|
| 146 |
+
batch_len = s_t.shape[0]
|
| 147 |
+
gs, ga, ns = s_t.reshape(batch_len, -1), a_t.reshape(batch_len, -1), s2_t.reshape(batch_len, -1)
|
| 148 |
+
|
| 149 |
+
with torch.no_grad():
|
| 150 |
+
targ_actions = torch.cat([self.target_actor(s2_t[:, i, :]) for i in range(self.num_agents)], dim=1)
|
| 151 |
+
Q_prime = self.target_critic(ns, targ_actions)
|
| 152 |
+
targets = r_t + self.gamma * (1 - d_t) * Q_prime
|
| 153 |
+
Q = self.critic(gs, ga)
|
| 154 |
+
critic_loss = nn.MSELoss()(Q, targets)
|
| 155 |
+
self.critic_optim.zero_grad()
|
| 156 |
+
critic_loss.backward()
|
| 157 |
+
torch.nn.utils.clip_grad_norm_(self.critic.parameters(), 1.0)
|
| 158 |
+
self.critic_optim.step()
|
| 159 |
+
|
| 160 |
+
all_actions = torch.cat([self.actor(s_t[:, i, :]) for i in range(self.num_agents)], dim=1)
|
| 161 |
+
actor_loss = -self.critic(gs, all_actions).mean()
|
| 162 |
+
|
| 163 |
+
self.actor_optim.zero_grad()
|
| 164 |
+
actor_loss.backward()
|
| 165 |
+
torch.nn.utils.clip_grad_norm_(self.actor.parameters(), 1.0)
|
| 166 |
+
self.actor_optim.step()
|
| 167 |
+
|
| 168 |
+
for tp, p in zip(self.target_actor.parameters(), self.actor.parameters()):
|
| 169 |
+
tp.data.copy_(self.tau * p.data + (1.0 - self.tau) * tp.data)
|
| 170 |
+
for tp, p in zip(self.target_critic.parameters(), self.critic.parameters()):
|
| 171 |
+
tp.data.copy_(self.tau * p.data + (1.0 - self.tau) * tp.data)
|
| 172 |
+
|
| 173 |
+
def on_episode_end(self):
|
| 174 |
+
self.current_episode += 1
|
| 175 |
+
|
| 176 |
+
def save(self, path: str):
|
| 177 |
+
payload = {
|
| 178 |
+
"critic": self.critic.state_dict(),
|
| 179 |
+
"target_critic": self.target_critic.state_dict(),
|
| 180 |
+
"critic_optim": self.critic_optim.state_dict(),
|
| 181 |
+
"actor": self.actor.state_dict(),
|
| 182 |
+
"target_actor": self.target_actor.state_dict(),
|
| 183 |
+
"actor_optim": self.actor_optim.state_dict(),
|
| 184 |
+
"current_episode": self.current_episode,
|
| 185 |
+
}
|
| 186 |
+
torch.save(payload, path)
|
| 187 |
+
|
| 188 |
+
def load(self, path: str):
|
| 189 |
+
checkpoint = torch.load(path, map_location=self.device)
|
| 190 |
+
self.critic.load_state_dict(checkpoint["critic"])
|
| 191 |
+
self.target_critic.load_state_dict(checkpoint["target_critic"])
|
| 192 |
+
self.critic_optim.load_state_dict(checkpoint["critic_optim"])
|
| 193 |
+
self.actor.load_state_dict(checkpoint["actor"])
|
| 194 |
+
self.target_actor.load_state_dict(checkpoint["target_actor"])
|
| 195 |
+
self.actor_optim.load_state_dict(checkpoint["actor_optim"])
|
| 196 |
+
self.current_episode = checkpoint.get("current_episode", 0)
|
Other_algorithms/Flat_System/mappo/_init_.py
ADDED
|
File without changes
|
Other_algorithms/Flat_System/mappo/mappo_evaluation.py
ADDED
|
@@ -0,0 +1,500 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# mappo_evaluate.py
|
| 2 |
+
import os
|
| 3 |
+
import sys
|
| 4 |
+
import time
|
| 5 |
+
import re
|
| 6 |
+
import numpy as np
|
| 7 |
+
import pandas as pd
|
| 8 |
+
import matplotlib.pyplot as plt
|
| 9 |
+
import torch
|
| 10 |
+
from datetime import datetime
|
| 11 |
+
|
| 12 |
+
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
|
| 13 |
+
|
| 14 |
+
from solar_sys_environment import SolarSys
|
| 15 |
+
from mappo.trainer.mappo import MAPPO
|
| 16 |
+
|
| 17 |
+
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
| 18 |
+
|
| 19 |
+
def compute_jains_fairness(values: np.ndarray) -> float:
|
| 20 |
+
if len(values) == 0:
|
| 21 |
+
return 0.0
|
| 22 |
+
if np.all(values == 0):
|
| 23 |
+
return 1.0
|
| 24 |
+
num = (values.sum())**2
|
| 25 |
+
den = len(values) * (values**2).sum()
|
| 26 |
+
return num / den
|
| 27 |
+
|
| 28 |
+
def main():
|
| 29 |
+
# User parameters
|
| 30 |
+
MODEL_PATH = "/path/to/project/mappo_pennsylvania_100agents_10000eps/logs/best_model.pth"
|
| 31 |
+
DATA_PATH = "/path/to/project/testing/100houses_30days_TEST.csv"
|
| 32 |
+
DAYS_TO_EVALUATE = 30
|
| 33 |
+
|
| 34 |
+
model_path = MODEL_PATH
|
| 35 |
+
data_path = DATA_PATH
|
| 36 |
+
days_to_evaluate = DAYS_TO_EVALUATE
|
| 37 |
+
SOLAR_THRESHOLD = 0.1
|
| 38 |
+
|
| 39 |
+
state_match = re.search(r"mappo_(oklahoma|colorado|pennsylvania)_", model_path)
|
| 40 |
+
if not state_match:
|
| 41 |
+
raise ValueError(
|
| 42 |
+
"Could not automatically detect the state (oklahoma, colorado, or pennsylvania) "
|
| 43 |
+
"from the model path. Please ensure your model's parent folder is named correctly, "
|
| 44 |
+
"e.g., 'mappo_oklahoma_...'"
|
| 45 |
+
)
|
| 46 |
+
detected_state = state_match.group(1)
|
| 47 |
+
print(f"--- Detected state: {detected_state.upper()} ---")
|
| 48 |
+
|
| 49 |
+
# Env setup
|
| 50 |
+
env = SolarSys(
|
| 51 |
+
data_path=data_path,
|
| 52 |
+
state=detected_state,
|
| 53 |
+
time_freq="3H"
|
| 54 |
+
)
|
| 55 |
+
eval_steps = env.num_steps
|
| 56 |
+
house_ids = env.house_ids
|
| 57 |
+
num_agents = env.num_agents
|
| 58 |
+
|
| 59 |
+
# Generate a unique eval run folder
|
| 60 |
+
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
| 61 |
+
run_name = f"eval_mappo_{num_agents}agents_{days_to_evaluate}days_{timestamp}"
|
| 62 |
+
output_folder = os.path.join("runs_with_battery", run_name)
|
| 63 |
+
logs_dir = os.path.join(output_folder, "logs")
|
| 64 |
+
plots_dir = os.path.join(output_folder, "plots")
|
| 65 |
+
for d in (logs_dir, plots_dir):
|
| 66 |
+
os.makedirs(d, exist_ok=True)
|
| 67 |
+
print(f"Saving evaluation outputs to: {output_folder}")
|
| 68 |
+
|
| 69 |
+
local_dim = env.observation_space.shape[1]
|
| 70 |
+
global_dim = num_agents * local_dim
|
| 71 |
+
act_dim = env.action_space.shape[1]
|
| 72 |
+
|
| 73 |
+
mappo = MAPPO(
|
| 74 |
+
n_agents=num_agents,
|
| 75 |
+
local_dim=local_dim,
|
| 76 |
+
global_dim=global_dim,
|
| 77 |
+
act_dim=act_dim,
|
| 78 |
+
lr=2e-4,
|
| 79 |
+
gamma=0.95,
|
| 80 |
+
lam=0.95,
|
| 81 |
+
clip_eps=0.2,
|
| 82 |
+
k_epochs=10,
|
| 83 |
+
batch_size=1024
|
| 84 |
+
)
|
| 85 |
+
|
| 86 |
+
# Load MAPPO checkpoint
|
| 87 |
+
mappo.load(model_path)
|
| 88 |
+
mappo.actor.to(device).eval()
|
| 89 |
+
mappo.critic.to(device).eval()
|
| 90 |
+
|
| 91 |
+
# Prepare logs
|
| 92 |
+
all_logs = []
|
| 93 |
+
daily_summaries = []
|
| 94 |
+
step_timing_list = []
|
| 95 |
+
|
| 96 |
+
evaluation_start = time.time()
|
| 97 |
+
|
| 98 |
+
for day_idx in range(days_to_evaluate):
|
| 99 |
+
obs = env.reset()
|
| 100 |
+
done = False
|
| 101 |
+
step_count = 0
|
| 102 |
+
day_logs = []
|
| 103 |
+
|
| 104 |
+
while not done:
|
| 105 |
+
step_start_time = time.time()
|
| 106 |
+
global_obs = np.array(obs).flatten()
|
| 107 |
+
|
| 108 |
+
# Select actions with MAPPO
|
| 109 |
+
actions, _ = mappo.select_action(obs, global_obs)
|
| 110 |
+
|
| 111 |
+
next_obs, rewards, done, info = env.step(actions)
|
| 112 |
+
|
| 113 |
+
# Consolidated Logging
|
| 114 |
+
step_end_time = time.time()
|
| 115 |
+
step_duration = step_end_time - step_start_time
|
| 116 |
+
|
| 117 |
+
print(f"[Day {day_idx+1}, Step {step_count}] Step time: {step_duration:.6f} seconds")
|
| 118 |
+
|
| 119 |
+
step_timing_list.append({
|
| 120 |
+
"day": day_idx + 1,
|
| 121 |
+
"step": step_count,
|
| 122 |
+
"step_time_s": step_duration
|
| 123 |
+
})
|
| 124 |
+
|
| 125 |
+
grid_price_now = env.get_grid_price(step_count)
|
| 126 |
+
peer_price_now = info.get("peer_price", env.get_peer_price(step_count,
|
| 127 |
+
float(info["p2p_sell"].sum()),
|
| 128 |
+
float(info["p2p_buy"].sum())))
|
| 129 |
+
|
| 130 |
+
for i, hid in enumerate(house_ids):
|
| 131 |
+
is_battery_house = hid in env.batteries
|
| 132 |
+
p2p_buy = float(info["p2p_buy"][i])
|
| 133 |
+
p2p_sell = float(info["p2p_sell"][i])
|
| 134 |
+
charge_amount = float(info.get("charge_amount", [0]*num_agents)[i])
|
| 135 |
+
discharge_amount = float(info.get("discharge_amount", [0]*num_agents)[i])
|
| 136 |
+
|
| 137 |
+
day_logs.append({
|
| 138 |
+
"day": day_idx + 1,
|
| 139 |
+
"step": step_count,
|
| 140 |
+
"house": hid,
|
| 141 |
+
"grid_import_no_p2p": float(info["grid_import_no_p2p"][i]),
|
| 142 |
+
"grid_import_with_p2p": float(info["grid_import_with_p2p"][i]),
|
| 143 |
+
"grid_export": float(info.get("grid_export", [0]*num_agents)[i]),
|
| 144 |
+
"p2p_buy": p2p_buy,
|
| 145 |
+
"p2p_sell": p2p_sell,
|
| 146 |
+
"actual_cost": float(info["costs"][i]),
|
| 147 |
+
"baseline_cost": float(info["grid_import_no_p2p"][i]) * grid_price_now,
|
| 148 |
+
"total_demand": float(env.demands[hid][step_count]),
|
| 149 |
+
"total_solar": float(env.solars[hid][step_count]),
|
| 150 |
+
"grid_price": grid_price_now,
|
| 151 |
+
"peer_price": peer_price_now,
|
| 152 |
+
"soc": (env.batteries[hid]["soc"] / env.batteries[hid]["max_capacity"]) if is_battery_house else np.nan,
|
| 153 |
+
"degradation_cost": ((charge_amount + discharge_amount) * env.batteries[hid]["degradation_cost_per_kwh"]) if is_battery_house else 0.0,
|
| 154 |
+
"reward": float(rewards[i]),
|
| 155 |
+
})
|
| 156 |
+
|
| 157 |
+
obs = next_obs
|
| 158 |
+
step_count += 1
|
| 159 |
+
if step_count >= eval_steps:
|
| 160 |
+
break
|
| 161 |
+
|
| 162 |
+
day_df = pd.DataFrame(day_logs)
|
| 163 |
+
all_logs.extend(day_logs)
|
| 164 |
+
|
| 165 |
+
# Consolidated daily summary calculation
|
| 166 |
+
grouped_house = day_df.groupby("house").sum(numeric_only=True)
|
| 167 |
+
grouped_step = day_df.groupby("step").sum(numeric_only=True)
|
| 168 |
+
|
| 169 |
+
total_demand = grouped_step["total_demand"].sum()
|
| 170 |
+
total_solar = grouped_step["total_solar"].sum()
|
| 171 |
+
total_p2p_buy = grouped_house["p2p_buy"].sum()
|
| 172 |
+
total_p2p_sell = grouped_house["p2p_sell"].sum()
|
| 173 |
+
|
| 174 |
+
baseline_cost_per_house = grouped_house["baseline_cost"]
|
| 175 |
+
actual_cost_per_house = grouped_house["actual_cost"]
|
| 176 |
+
cost_savings_per_house = baseline_cost_per_house - actual_cost_per_house
|
| 177 |
+
day_total_cost_savings = cost_savings_per_house.sum()
|
| 178 |
+
|
| 179 |
+
if baseline_cost_per_house.sum() > 0:
|
| 180 |
+
overall_cost_savings_pct = day_total_cost_savings / baseline_cost_per_house.sum()
|
| 181 |
+
else:
|
| 182 |
+
overall_cost_savings_pct = 0.0
|
| 183 |
+
|
| 184 |
+
baseline_import_per_house = grouped_house["grid_import_no_p2p"]
|
| 185 |
+
actual_import_per_house = grouped_house["grid_import_with_p2p"]
|
| 186 |
+
import_reduction_per_house = baseline_import_per_house - actual_import_per_house
|
| 187 |
+
day_total_import_reduction = import_reduction_per_house.sum()
|
| 188 |
+
|
| 189 |
+
if baseline_import_per_house.sum() > 0:
|
| 190 |
+
overall_import_reduction_pct = day_total_import_reduction / baseline_import_per_house.sum()
|
| 191 |
+
else:
|
| 192 |
+
overall_import_reduction_pct = 0.0
|
| 193 |
+
|
| 194 |
+
fairness_cost_savings = compute_jains_fairness(cost_savings_per_house.values)
|
| 195 |
+
fairness_import_reduction = compute_jains_fairness(import_reduction_per_house.values)
|
| 196 |
+
fairness_rewards = compute_jains_fairness(grouped_house["reward"].values)
|
| 197 |
+
fairness_p2p_buy = compute_jains_fairness(grouped_house["p2p_buy"].values)
|
| 198 |
+
fairness_p2p_sell = compute_jains_fairness(grouped_house["p2p_sell"].values)
|
| 199 |
+
fairness_p2p_total = compute_jains_fairness((grouped_house["p2p_buy"] + grouped_house["p2p_sell"]).values)
|
| 200 |
+
day_total_degradation_cost = grouped_house["degradation_cost"].sum()
|
| 201 |
+
|
| 202 |
+
daily_summaries.append({
|
| 203 |
+
"day": day_idx + 1,
|
| 204 |
+
"day_total_demand": total_demand,
|
| 205 |
+
"day_total_solar": total_solar,
|
| 206 |
+
"day_p2p_buy": total_p2p_buy,
|
| 207 |
+
"day_p2p_sell": total_p2p_sell,
|
| 208 |
+
"cost_savings_abs": day_total_cost_savings,
|
| 209 |
+
"cost_savings_pct": overall_cost_savings_pct,
|
| 210 |
+
"fairness_cost_savings": fairness_cost_savings,
|
| 211 |
+
"grid_reduction_abs": day_total_import_reduction,
|
| 212 |
+
"grid_reduction_pct": overall_import_reduction_pct,
|
| 213 |
+
"fairness_grid_reduction": fairness_import_reduction,
|
| 214 |
+
"fairness_reward": fairness_rewards,
|
| 215 |
+
"fairness_p2p_buy": fairness_p2p_buy,
|
| 216 |
+
"fairness_p2p_sell": fairness_p2p_sell,
|
| 217 |
+
"fairness_p2p_total": fairness_p2p_total,
|
| 218 |
+
"total_degradation_cost": day_total_degradation_cost
|
| 219 |
+
})
|
| 220 |
+
|
| 221 |
+
# Final processing and saving
|
| 222 |
+
evaluation_end = time.time()
|
| 223 |
+
total_eval_time = evaluation_end - evaluation_start
|
| 224 |
+
print(f"\nEvaluation loop finished. Total time: {total_eval_time:.2f} seconds.")
|
| 225 |
+
|
| 226 |
+
all_days_df = pd.DataFrame(all_logs)
|
| 227 |
+
combined_csv_path = os.path.join(logs_dir, "step_logs_all_days.csv")
|
| 228 |
+
all_days_df.to_csv(combined_csv_path, index=False)
|
| 229 |
+
print(f"Saved combined step-level logs to: {combined_csv_path}")
|
| 230 |
+
|
| 231 |
+
step_timing_df = pd.DataFrame(step_timing_list)
|
| 232 |
+
timing_csv_path = os.path.join(logs_dir, "step_timing_log.csv")
|
| 233 |
+
step_timing_df.to_csv(timing_csv_path, index=False)
|
| 234 |
+
print(f"Saved step timing logs to: {timing_csv_path}")
|
| 235 |
+
|
| 236 |
+
house_level_df = all_days_df.groupby("house").agg({
|
| 237 |
+
"baseline_cost": "sum",
|
| 238 |
+
"actual_cost": "sum",
|
| 239 |
+
"grid_import_no_p2p": "sum",
|
| 240 |
+
"grid_import_with_p2p": "sum",
|
| 241 |
+
"degradation_cost": "sum"
|
| 242 |
+
})
|
| 243 |
+
house_level_df["cost_savings"] = house_level_df["baseline_cost"] - house_level_df["actual_cost"]
|
| 244 |
+
house_level_df["import_reduction"] = house_level_df["grid_import_no_p2p"] - house_level_df["grid_import_with_p2p"]
|
| 245 |
+
|
| 246 |
+
house_summary_csv = os.path.join(logs_dir, "summary_per_house.csv")
|
| 247 |
+
house_level_df.to_csv(house_summary_csv)
|
| 248 |
+
print(f"Saved final summary per house to: {house_summary_csv}")
|
| 249 |
+
|
| 250 |
+
fairness_grid_all = compute_jains_fairness(house_level_df["import_reduction"].values)
|
| 251 |
+
fairness_cost_all = compute_jains_fairness(house_level_df["cost_savings"].values)
|
| 252 |
+
|
| 253 |
+
daily_summary_df = pd.DataFrame(daily_summaries)
|
| 254 |
+
|
| 255 |
+
total_cost_savings_all = daily_summary_df["cost_savings_abs"].sum()
|
| 256 |
+
total_baseline_cost_all = all_days_df.groupby('day')['baseline_cost'].sum().sum()
|
| 257 |
+
pct_cost_savings_all = total_cost_savings_all / total_baseline_cost_all if total_baseline_cost_all > 0 else 0.0
|
| 258 |
+
|
| 259 |
+
total_grid_reduction_all = daily_summary_df["grid_reduction_abs"].sum()
|
| 260 |
+
total_baseline_import_all = all_days_df.groupby('day')['grid_import_no_p2p'].sum().sum()
|
| 261 |
+
pct_grid_reduction_all = total_grid_reduction_all / total_baseline_import_all if total_baseline_import_all > 0 else 0.0
|
| 262 |
+
|
| 263 |
+
total_degradation_cost_all = daily_summary_df["total_degradation_cost"].sum()
|
| 264 |
+
|
| 265 |
+
# Calculate alternative performance metrics
|
| 266 |
+
|
| 267 |
+
# Grid Reduction During Solar Hours
|
| 268 |
+
agg_solar_per_step = all_days_df.groupby(['day', 'step'])['total_solar'].sum()
|
| 269 |
+
sunny_steps_mask = agg_solar_per_step > (SOLAR_THRESHOLD * num_agents)
|
| 270 |
+
sunny_df = all_days_df.set_index(['day', 'step'])[sunny_steps_mask].reset_index()
|
| 271 |
+
baseline_import_sunny = sunny_df['grid_import_no_p2p'].sum()
|
| 272 |
+
actual_import_sunny = sunny_df['grid_import_with_p2p'].sum()
|
| 273 |
+
grid_reduction_sunny_pct = 0.0
|
| 274 |
+
if baseline_import_sunny > 0:
|
| 275 |
+
grid_reduction_sunny_pct = (baseline_import_sunny - actual_import_sunny) / baseline_import_sunny
|
| 276 |
+
|
| 277 |
+
# Community Sourcing Rate
|
| 278 |
+
total_p2p_buy = all_days_df['p2p_buy'].sum()
|
| 279 |
+
total_actual_grid_import = all_days_df['grid_import_with_p2p'].sum()
|
| 280 |
+
total_procured_energy = total_p2p_buy + total_actual_grid_import
|
| 281 |
+
community_sourcing_rate_pct = 0.0
|
| 282 |
+
if total_procured_energy > 0:
|
| 283 |
+
community_sourcing_rate_pct = total_p2p_buy / total_procured_energy
|
| 284 |
+
|
| 285 |
+
# Solar Sharing Efficiency
|
| 286 |
+
total_p2p_sell = all_days_df['p2p_sell'].sum()
|
| 287 |
+
total_grid_export = all_days_df['grid_export'].sum()
|
| 288 |
+
total_excess_solar = total_p2p_sell + total_grid_export
|
| 289 |
+
solar_sharing_efficiency_pct = 0.0
|
| 290 |
+
if total_excess_solar > 0:
|
| 291 |
+
solar_sharing_efficiency_pct = total_p2p_sell / total_excess_solar
|
| 292 |
+
|
| 293 |
+
# Cost savings in sunny hours
|
| 294 |
+
baseline_cost_sunny = sunny_df['baseline_cost'].sum()
|
| 295 |
+
actual_cost_sunny = sunny_df['actual_cost'].sum()
|
| 296 |
+
cost_savings_sunny_pct = (baseline_cost_sunny - actual_cost_sunny) / baseline_cost_sunny if baseline_cost_sunny > 0 else 0.0
|
| 297 |
+
|
| 298 |
+
total_p2p_buy = all_days_df['p2p_buy'].sum()
|
| 299 |
+
total_actual_grid_import = all_days_df['grid_import_with_p2p'].sum()
|
| 300 |
+
community_sourcing_rate_pct = total_p2p_buy / (total_p2p_buy + total_actual_grid_import) if (total_p2p_buy + total_actual_grid_import) > 0 else 0.0
|
| 301 |
+
|
| 302 |
+
total_p2p_sell = all_days_df['p2p_sell'].sum()
|
| 303 |
+
total_grid_export = all_days_df['grid_export'].sum()
|
| 304 |
+
solar_sharing_efficiency_pct = total_p2p_sell / (total_p2p_sell + total_grid_export) if (total_p2p_sell + total_grid_export) > 0 else 0.0
|
| 305 |
+
|
| 306 |
+
final_row = {
|
| 307 |
+
"day": "ALL_DAYS_SUMMARY",
|
| 308 |
+
"cost_savings_abs": total_cost_savings_all,
|
| 309 |
+
"cost_savings_pct": pct_cost_savings_all,
|
| 310 |
+
"grid_reduction_abs": total_grid_reduction_all,
|
| 311 |
+
"grid_reduction_pct": pct_grid_reduction_all,
|
| 312 |
+
"fairness_cost_savings": fairness_cost_all,
|
| 313 |
+
"fairness_grid_reduction": fairness_grid_all,
|
| 314 |
+
"total_degradation_cost": total_degradation_cost_all,
|
| 315 |
+
"grid_reduction_sunny_hours_pct": grid_reduction_sunny_pct,
|
| 316 |
+
"community_sourcing_rate_pct": community_sourcing_rate_pct,
|
| 317 |
+
"solar_sharing_efficiency_pct": solar_sharing_efficiency_pct,
|
| 318 |
+
}
|
| 319 |
+
|
| 320 |
+
for col in daily_summary_df.columns:
|
| 321 |
+
if col not in final_row:
|
| 322 |
+
final_row[col] = np.nan
|
| 323 |
+
final_row_df = pd.DataFrame([final_row])
|
| 324 |
+
|
| 325 |
+
daily_summary_df = pd.concat([daily_summary_df, final_row_df], ignore_index=True)
|
| 326 |
+
summary_csv = os.path.join(logs_dir, "summary_per_day.csv")
|
| 327 |
+
daily_summary_df.to_csv(summary_csv, index=False)
|
| 328 |
+
print(f"Saved day-level summary with final multi-day row to: {summary_csv}")
|
| 329 |
+
|
| 330 |
+
# Final summary printout
|
| 331 |
+
print("\n================== EVALUATION SUMMARY ==================")
|
| 332 |
+
print(f"Evaluation finished for {days_to_evaluate} days.\n")
|
| 333 |
+
|
| 334 |
+
print("--- Standard Metrics (24-Hour Average) ---")
|
| 335 |
+
print(f"Total grid reduction: {total_grid_reduction_all:.2f} kWh ({pct_grid_reduction_all:.2%})")
|
| 336 |
+
print(f"Total cost savings: ${total_cost_savings_all:.2f} ({pct_cost_savings_all:.2%})")
|
| 337 |
+
print(f"Jain's fairness on grid reduction: {fairness_grid_all:.3f}")
|
| 338 |
+
print(f"Jain's fairness on cost savings: {fairness_cost_all:.3f}\n")
|
| 339 |
+
|
| 340 |
+
print("--- Alternative Metrics (Highlighting Peak Performance) ---")
|
| 341 |
+
print(f"Grid reduction during solar hours: {grid_reduction_sunny_pct:.2%}")
|
| 342 |
+
print(f"Cost savings during solar hours: {cost_savings_sunny_pct:.2%}")
|
| 343 |
+
print(f"Community sourcing rate: {community_sourcing_rate_pct:.2%}")
|
| 344 |
+
print(f"Solar sharing efficiency: {solar_sharing_efficiency_pct:.2%}")
|
| 345 |
+
|
| 346 |
+
print("=========================================================")
|
| 347 |
+
|
| 348 |
+
# Plots
|
| 349 |
+
plot_daily_df = daily_summary_df[daily_summary_df["day"] != "ALL_DAYS_SUMMARY"].copy()
|
| 350 |
+
plot_daily_df["day"] = plot_daily_df["day"].astype(int)
|
| 351 |
+
|
| 352 |
+
# Daily Cost Savings Percentage
|
| 353 |
+
plt.figure(figsize=(12, 6))
|
| 354 |
+
plt.bar(plot_daily_df["day"], plot_daily_df["cost_savings_pct"] * 100, color='skyblue')
|
| 355 |
+
plt.xlabel("Day")
|
| 356 |
+
plt.ylabel("Cost Savings (%)")
|
| 357 |
+
plt.title("Daily Community Cost Savings Percentage")
|
| 358 |
+
plt.xticks(plot_daily_df["day"])
|
| 359 |
+
plt.grid(axis='y', linestyle='--', alpha=0.7)
|
| 360 |
+
plt.savefig(os.path.join(plots_dir, "daily_cost_savings_percentage.png"))
|
| 361 |
+
plt.close()
|
| 362 |
+
|
| 363 |
+
# Daily Total Demand vs. Solar
|
| 364 |
+
plt.figure(figsize=(12, 6))
|
| 365 |
+
bar_width = 0.4
|
| 366 |
+
days = plot_daily_df["day"]
|
| 367 |
+
plt.bar(days - bar_width/2, plot_daily_df["day_total_demand"], width=bar_width, label="Total Demand", color='coral')
|
| 368 |
+
plt.bar(days + bar_width/2, plot_daily_df["day_total_solar"], width=bar_width, label="Total Solar Generation", color='gold')
|
| 369 |
+
plt.xlabel("Day")
|
| 370 |
+
plt.ylabel("Energy (kWh)")
|
| 371 |
+
plt.title("Total Community Demand vs. Solar Generation Per Day")
|
| 372 |
+
plt.xticks(days)
|
| 373 |
+
plt.legend()
|
| 374 |
+
plt.grid(axis='y', linestyle='--', alpha=0.7)
|
| 375 |
+
plt.savefig(os.path.join(plots_dir, "daily_demand_vs_solar.png"))
|
| 376 |
+
plt.close()
|
| 377 |
+
|
| 378 |
+
# Combined Time Series of Energy Flows
|
| 379 |
+
step_group = all_days_df.groupby(["day", "step"]).sum(numeric_only=True).reset_index()
|
| 380 |
+
step_group["global_step"] = (step_group["day"] - 1) * env.num_steps + step_group["step"]
|
| 381 |
+
|
| 382 |
+
fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(15, 12), sharex=True)
|
| 383 |
+
|
| 384 |
+
# Subplot 1: Grid Import vs P2P Buy
|
| 385 |
+
ax1.plot(step_group["global_step"], step_group["grid_import_with_p2p"], label="Grid Import (with P2P)", color='r')
|
| 386 |
+
ax1.plot(step_group["global_step"], step_group["p2p_buy"], label="P2P Buy", color='g')
|
| 387 |
+
ax1.set_ylabel("Energy (kWh)")
|
| 388 |
+
ax1.set_title("Community Energy Consumption: Grid Import vs. P2P Buy")
|
| 389 |
+
ax1.legend()
|
| 390 |
+
ax1.grid(True, linestyle='--', alpha=0.6)
|
| 391 |
+
|
| 392 |
+
# Subplot 2: Grid Export vs P2P Sell
|
| 393 |
+
ax2.plot(step_group["global_step"], step_group["grid_export"], label="Grid Export", color='orange')
|
| 394 |
+
ax2.plot(step_group["global_step"], step_group["p2p_sell"], label="P2P Sell", color='b')
|
| 395 |
+
ax2.set_xlabel("Global Timestep")
|
| 396 |
+
ax2.set_ylabel("Energy (kWh)")
|
| 397 |
+
ax2.set_title("Community Energy Generation: Grid Export vs. P2P Sell")
|
| 398 |
+
ax2.legend()
|
| 399 |
+
ax2.grid(True, linestyle='--', alpha=0.6)
|
| 400 |
+
|
| 401 |
+
plt.tight_layout()
|
| 402 |
+
plt.savefig(os.path.join(plots_dir, "combined_energy_flows_timeseries.png"))
|
| 403 |
+
plt.close()
|
| 404 |
+
|
| 405 |
+
# Stacked Bar of Daily Energy Sources
|
| 406 |
+
daily_agg = all_days_df.groupby("day").sum(numeric_only=True)
|
| 407 |
+
|
| 408 |
+
plt.figure(figsize=(12, 7))
|
| 409 |
+
plt.bar(daily_agg.index, daily_agg["grid_import_with_p2p"], label="Grid Import (with P2P)", color='crimson')
|
| 410 |
+
plt.bar(daily_agg.index, daily_agg["p2p_buy"], bottom=daily_agg["grid_import_with_p2p"], label="P2P Buy", color='limegreen')
|
| 411 |
+
plt.plot(daily_agg.index, daily_agg["grid_import_no_p2p"], label="Baseline Grid Import (No P2P)", color='blue', linestyle='--', marker='o')
|
| 412 |
+
|
| 413 |
+
plt.xlabel("Day")
|
| 414 |
+
plt.ylabel("Energy (kWh)")
|
| 415 |
+
plt.title("Daily Energy Procurement: Baseline vs. P2P+Grid")
|
| 416 |
+
plt.xticks(daily_agg.index)
|
| 417 |
+
plt.legend()
|
| 418 |
+
plt.grid(axis='y', linestyle='--', alpha=0.7)
|
| 419 |
+
plt.savefig(os.path.join(plots_dir, "daily_energy_procurement_stacked.png"))
|
| 420 |
+
plt.close()
|
| 421 |
+
|
| 422 |
+
# Fairness Metrics Over Time
|
| 423 |
+
plt.figure(figsize=(12, 6))
|
| 424 |
+
plt.plot(plot_daily_df["day"], plot_daily_df["fairness_cost_savings"], label="Cost Savings Fairness", marker='o')
|
| 425 |
+
plt.plot(plot_daily_df["day"], plot_daily_df["fairness_grid_reduction"], label="Grid Reduction Fairness", marker='s')
|
| 426 |
+
plt.plot(plot_daily_df["day"], plot_daily_df["fairness_reward"], label="Reward Fairness", marker='^')
|
| 427 |
+
plt.xlabel("Day")
|
| 428 |
+
plt.ylabel("Jain's Fairness Index")
|
| 429 |
+
plt.title("Daily Fairness Metrics")
|
| 430 |
+
plt.xticks(plot_daily_df["day"])
|
| 431 |
+
plt.ylim(0, 1.05)
|
| 432 |
+
plt.legend()
|
| 433 |
+
plt.grid(True, linestyle='--', alpha=0.7)
|
| 434 |
+
plt.savefig(os.path.join(plots_dir, "daily_fairness_metrics.png"))
|
| 435 |
+
plt.close()
|
| 436 |
+
|
| 437 |
+
# Per-House Savings and Reductions
|
| 438 |
+
fig, ax1 = plt.subplots(figsize=(15, 7))
|
| 439 |
+
|
| 440 |
+
house_ids_str = house_level_df.index.astype(str)
|
| 441 |
+
bar_width = 0.4
|
| 442 |
+
index = np.arange(len(house_ids_str))
|
| 443 |
+
|
| 444 |
+
# Bar chart for cost savings
|
| 445 |
+
color1 = 'tab:green'
|
| 446 |
+
ax1.set_xlabel('House ID')
|
| 447 |
+
ax1.set_ylabel('Total Cost Savings ($)', color=color1)
|
| 448 |
+
ax1.bar(index - bar_width/2, house_level_df["cost_savings"], bar_width, label='Cost Savings', color=color1)
|
| 449 |
+
ax1.tick_params(axis='y', labelcolor=color1)
|
| 450 |
+
ax1.set_xticks(index)
|
| 451 |
+
ax1.set_xticklabels(house_ids_str, rotation=45, ha="right")
|
| 452 |
+
|
| 453 |
+
# Second y-axis for grid import reduction
|
| 454 |
+
ax2 = ax1.twinx()
|
| 455 |
+
color2 = 'tab:blue'
|
| 456 |
+
ax2.set_ylabel('Total Grid Import Reduction (kWh)', color=color2)
|
| 457 |
+
ax2.bar(index + bar_width/2, house_level_df["import_reduction"], bar_width, label='Import Reduction', color=color2)
|
| 458 |
+
ax2.tick_params(axis='y', labelcolor=color2)
|
| 459 |
+
|
| 460 |
+
plt.title(f'Total Cost Savings & Grid Import Reduction Per House (over {days_to_evaluate} days)')
|
| 461 |
+
fig.tight_layout()
|
| 462 |
+
plt.savefig(os.path.join(plots_dir, "per_house_summary.png"))
|
| 463 |
+
plt.close()
|
| 464 |
+
|
| 465 |
+
# Price Dynamics for a Single Day
|
| 466 |
+
day1_prices = all_days_df[all_days_df['day'] == 1][['step', 'grid_price', 'peer_price']].drop_duplicates()
|
| 467 |
+
plt.figure(figsize=(12, 6))
|
| 468 |
+
plt.plot(day1_prices['step'], day1_prices['grid_price'], label='Grid Price', color='darkorange')
|
| 469 |
+
plt.plot(day1_prices['step'], day1_prices['peer_price'], label='P2P Price', color='teal')
|
| 470 |
+
plt.xlabel("Timestep of Day")
|
| 471 |
+
plt.ylabel("Price ($/kWh)")
|
| 472 |
+
plt.title("Price Dynamics on Day 1")
|
| 473 |
+
plt.legend()
|
| 474 |
+
plt.grid(True, linestyle='--', alpha=0.6)
|
| 475 |
+
plt.savefig(os.path.join(plots_dir, "price_dynamics_day1.png"))
|
| 476 |
+
plt.close()
|
| 477 |
+
|
| 478 |
+
# Battery State of Charge for Sample Houses
|
| 479 |
+
day1_df = all_days_df[all_days_df['day'] == 1]
|
| 480 |
+
battery_houses = day1_df.dropna(subset=['soc'])['house'].unique()
|
| 481 |
+
|
| 482 |
+
if len(battery_houses) > 0:
|
| 483 |
+
sample_houses = battery_houses[:min(4, len(battery_houses))]
|
| 484 |
+
plt.figure(figsize=(12, 6))
|
| 485 |
+
for house in sample_houses:
|
| 486 |
+
house_df = day1_df[day1_df['house'] == house]
|
| 487 |
+
plt.plot(house_df['step'], house_df['soc'] * 100, label=f'House {house}')
|
| 488 |
+
|
| 489 |
+
plt.xlabel("Timestep of Day")
|
| 490 |
+
plt.ylabel("State of Charge (%)")
|
| 491 |
+
plt.title("Battery SoC on Day 1 for Sample Houses")
|
| 492 |
+
plt.legend()
|
| 493 |
+
plt.grid(True, linestyle='--', alpha=0.6)
|
| 494 |
+
plt.savefig(os.path.join(plots_dir, "soc_dynamics_day1.png"))
|
| 495 |
+
plt.close()
|
| 496 |
+
|
| 497 |
+
print("All plots have been generated and saved. Evaluation complete.")
|
| 498 |
+
|
| 499 |
+
if __name__ == "__main__":
|
| 500 |
+
main()
|
Other_algorithms/Flat_System/mappo/mappo_train.py
ADDED
|
@@ -0,0 +1,439 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import sys
|
| 3 |
+
import re # ← add thist
|
| 4 |
+
import numpy as np
|
| 5 |
+
import torch
|
| 6 |
+
import matplotlib.pyplot as plt
|
| 7 |
+
import pandas as pd
|
| 8 |
+
import time
|
| 9 |
+
from datetime import datetime
|
| 10 |
+
|
| 11 |
+
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
|
| 12 |
+
|
| 13 |
+
from solar_sharer_battery_env import SolarSharer
|
| 14 |
+
from mappo.trainer.mappo import MAPPO
|
| 15 |
+
|
| 16 |
+
def main():
|
| 17 |
+
|
| 18 |
+
STATE_TO_RUN = "pennsylvania" # "pennsylvania" or "colorado" or "oklahoma"
|
| 19 |
+
|
| 20 |
+
# --- Set the path to your training data ---
|
| 21 |
+
DATA_FILE_PATH = "/Users/ananygupta/Desktop/Final_revision/Australia_data/processed_data_ausgrid_100_houses.csv"
|
| 22 |
+
num_episodes = 10000
|
| 23 |
+
# total # of episodes you want to run
|
| 24 |
+
batch_size = 256 # e.g. 512, 1024, 2048
|
| 25 |
+
checkpoint_interval = 100000
|
| 26 |
+
window_size = 32 # ← group episodes in blocks of 30
|
| 27 |
+
|
| 28 |
+
|
| 29 |
+
env = SolarSharer(
|
| 30 |
+
data_path=DATA_FILE_PATH,
|
| 31 |
+
state=STATE_TO_RUN,
|
| 32 |
+
time_freq="30T"
|
| 33 |
+
)
|
| 34 |
+
############################################################################################
|
| 35 |
+
# ─── Sanity check: env I/O shapes ─────────────────────────────────────
|
| 36 |
+
print("Observation space:", env.observation_space)
|
| 37 |
+
print("Action space :", env.action_space)
|
| 38 |
+
|
| 39 |
+
# Reset and inspect obs
|
| 40 |
+
obs = env.reset()
|
| 41 |
+
print(f"Reset returned {len(obs)} agent observations; each obs shape: {np.array(obs).shape}")
|
| 42 |
+
|
| 43 |
+
# Sample random actions and do one step
|
| 44 |
+
dummy_actions = np.random.rand(env.num_agents, env.action_space.shape[1]).astype(np.float32)
|
| 45 |
+
next_obs, rewards, done, info = env.step(dummy_actions)
|
| 46 |
+
print(f"Step outputs → next_obs: {len(next_obs)}×{np.array(next_obs).shape[1]}, "
|
| 47 |
+
f"rewards: {len(rewards)}, done: {done}")
|
| 48 |
+
print("Info keys:", list(info.keys()))
|
| 49 |
+
# ────────────────────────────────────────────────────────────────
|
| 50 |
+
|
| 51 |
+
# Count the number of houses in each group
|
| 52 |
+
env.group_counts = {
|
| 53 |
+
0: env.agent_groups.count(0),
|
| 54 |
+
1: env.agent_groups.count(1)
|
| 55 |
+
}
|
| 56 |
+
print(f"Number of houses in each group: {env.group_counts}")
|
| 57 |
+
|
| 58 |
+
max_steps = env.num_steps
|
| 59 |
+
|
| 60 |
+
# dims from the env
|
| 61 |
+
num_agents = env.num_agents
|
| 62 |
+
local_state_dim = env.observation_space.shape[1]
|
| 63 |
+
action_dim = env.action_space.shape[1]
|
| 64 |
+
|
| 65 |
+
# ─── Build a unique run directory ───────────────────────────
|
| 66 |
+
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
| 67 |
+
run_name = f"mappo_{STATE_TO_RUN}_{num_agents}agents_{num_episodes}eps_{timestamp}"
|
| 68 |
+
root_dir = os.path.join("Testing_with_australia_data", run_name)
|
| 69 |
+
os.makedirs(root_dir, exist_ok=True)
|
| 70 |
+
print(f"Saving training outputs to: {root_dir}")
|
| 71 |
+
|
| 72 |
+
logs_dir = os.path.join(root_dir, "logs")
|
| 73 |
+
plots_dir = os.path.join(root_dir, "plots")
|
| 74 |
+
os.makedirs(logs_dir, exist_ok=True)
|
| 75 |
+
os.makedirs(plots_dir, exist_ok=True)
|
| 76 |
+
|
| 77 |
+
|
| 78 |
+
# Create the MeanField agent
|
| 79 |
+
mappo = MAPPO(
|
| 80 |
+
n_agents=num_agents,
|
| 81 |
+
local_dim=local_state_dim,
|
| 82 |
+
global_dim=num_agents * local_state_dim,
|
| 83 |
+
act_dim=action_dim,
|
| 84 |
+
lr=2e-4,
|
| 85 |
+
gamma=0.95,
|
| 86 |
+
lam=0.95,
|
| 87 |
+
clip_eps=0.2,
|
| 88 |
+
k_epochs=4,
|
| 89 |
+
batch_size=batch_size
|
| 90 |
+
)
|
| 91 |
+
|
| 92 |
+
|
| 93 |
+
# ─────────────── Tracking / Logging Variables ───────────────
|
| 94 |
+
episode_rewards = [] # mean reward per episode (averaged across agents)
|
| 95 |
+
episode_total_rewards = [] # total reward per episode (sum across agents)
|
| 96 |
+
block_mean_rewards = [] # mean of mean-episode-rewards for each block of window_size
|
| 97 |
+
block_total_rewards = [] # sum of total-episode-rewards for each block of window_size
|
| 98 |
+
|
| 99 |
+
agent_rewards_log = [[] for _ in range(num_agents)]
|
| 100 |
+
best_mean_reward = -1e9
|
| 101 |
+
best_model_path = os.path.join(logs_dir, "best_model.pth")
|
| 102 |
+
|
| 103 |
+
|
| 104 |
+
daily_rewards = [] # alias for episode_rewards
|
| 105 |
+
monthly_rewards = [] # just kept in case you want the old logic
|
| 106 |
+
|
| 107 |
+
training_start_time = time.time()
|
| 108 |
+
episode_durations = []
|
| 109 |
+
total_steps_global = 0
|
| 110 |
+
episode_log_data = []
|
| 111 |
+
# ADD THIS LINE to store the new metrics from the environment
|
| 112 |
+
performance_metrics_log = [] # This will hold the detailed performance data for each episode.
|
| 113 |
+
|
| 114 |
+
|
| 115 |
+
agent_charge_log = [[] for _ in range(num_agents)] # Track charge actions
|
| 116 |
+
agent_discharge_log = [[] for _ in range(num_agents)] # Track discharge actions
|
| 117 |
+
|
| 118 |
+
|
| 119 |
+
# ──────────── Training Loop ────────────
|
| 120 |
+
for episode in range(1, num_episodes + 1):
|
| 121 |
+
episode_start_time = time.time()
|
| 122 |
+
|
| 123 |
+
obs = np.array(env.reset(), dtype=np.float32)
|
| 124 |
+
|
| 125 |
+
|
| 126 |
+
# ADD THIS BLOCK to collect metrics from the *previous* episode
|
| 127 |
+
# =================================================================
|
| 128 |
+
# The env.reset() call above finalized the metrics for the episode that just finished.
|
| 129 |
+
# We retrieve them here. We check `if episode > 1` because there are no
|
| 130 |
+
# metrics to collect before the first episode has run.
|
| 131 |
+
if episode > 1:
|
| 132 |
+
# Call the getter method you added to the environment
|
| 133 |
+
last_episode_metrics = env.get_episode_metrics()
|
| 134 |
+
|
| 135 |
+
# Add the corresponding episode number for merging later
|
| 136 |
+
last_episode_metrics['Episode'] = episode - 1
|
| 137 |
+
|
| 138 |
+
# Append the dictionary of metrics to our new log
|
| 139 |
+
performance_metrics_log.append(last_episode_metrics)
|
| 140 |
+
# =================================================================
|
| 141 |
+
|
| 142 |
+
total_reward = np.zeros(num_agents, dtype=np.float32)
|
| 143 |
+
done = False
|
| 144 |
+
step_count = 0
|
| 145 |
+
day_logs = []
|
| 146 |
+
episode_charges = [[] for _ in range(num_agents)]
|
| 147 |
+
episode_discharges = [[] for _ in range(num_agents)]
|
| 148 |
+
|
| 149 |
+
while not done:
|
| 150 |
+
|
| 151 |
+
# flatten the joint state once per step
|
| 152 |
+
# build global state and pick actions
|
| 153 |
+
# obs is already a NumPy array of shape (num_agents, local_dim)
|
| 154 |
+
global_obs = obs.flatten()
|
| 155 |
+
actions, logps = mappo.select_action(obs, global_obs)
|
| 156 |
+
|
| 157 |
+
# step environment
|
| 158 |
+
next_obs_list, rewards, done, info = env.step(actions)
|
| 159 |
+
|
| 160 |
+
# convert next observations to NumPy array too
|
| 161 |
+
next_obs = np.array(next_obs_list, dtype=np.float32)
|
| 162 |
+
next_global_obs = next_obs.flatten()
|
| 163 |
+
|
| 164 |
+
|
| 165 |
+
# store transition
|
| 166 |
+
# ensure fast conversion to torch.Tensor
|
| 167 |
+
local_obs_arr = np.array(obs, dtype=np.float32)
|
| 168 |
+
|
| 169 |
+
mappo.store(
|
| 170 |
+
local_obs_arr,
|
| 171 |
+
global_obs,
|
| 172 |
+
actions,
|
| 173 |
+
logps,
|
| 174 |
+
rewards,
|
| 175 |
+
done,
|
| 176 |
+
next_global_obs
|
| 177 |
+
)
|
| 178 |
+
total_reward += rewards
|
| 179 |
+
obs = next_obs
|
| 180 |
+
step_count += 1
|
| 181 |
+
total_steps_global += 1
|
| 182 |
+
|
| 183 |
+
day_logs.append({
|
| 184 |
+
"step": step_count - 1,
|
| 185 |
+
"grid_import_no_p2p": info["grid_import_no_p2p"],
|
| 186 |
+
"grid_import_with_p2p": info["grid_import_with_p2p"],
|
| 187 |
+
"p2p_buy": info["p2p_buy"],
|
| 188 |
+
"p2p_sell": info["p2p_sell"],
|
| 189 |
+
"costs": info["costs"], # Capture costs for analysis
|
| 190 |
+
"charge_amount": info.get("charge_amount", np.zeros(num_agents)), # New
|
| 191 |
+
"discharge_amount": info.get("discharge_amount", np.zeros(num_agents)) # New
|
| 192 |
+
})
|
| 193 |
+
|
| 194 |
+
if step_count >= max_steps:
|
| 195 |
+
break
|
| 196 |
+
|
| 197 |
+
# ─── After each episode ───
|
| 198 |
+
# 1) Compute per-episode metrics
|
| 199 |
+
sum_ep_reward = float(np.sum(total_reward)) # total reward across all agents for this episode
|
| 200 |
+
mean_ep_reward = float(np.mean(total_reward)) # mean reward across agents for this episode
|
| 201 |
+
|
| 202 |
+
episode_total_rewards.append(sum_ep_reward)
|
| 203 |
+
episode_rewards.append(mean_ep_reward)
|
| 204 |
+
daily_rewards.append(mean_ep_reward)
|
| 205 |
+
|
| 206 |
+
# 2) If we just finished a block of window_size episodes, aggregate
|
| 207 |
+
if len(daily_rewards) % window_size == 0:
|
| 208 |
+
# Sum of total rewards over the last window_size episodes
|
| 209 |
+
last_totals = episode_total_rewards[-window_size:]
|
| 210 |
+
block_sum = sum(last_totals)
|
| 211 |
+
block_total_rewards.append(block_sum)
|
| 212 |
+
|
| 213 |
+
# Mean of mean-episode-rewards over the last window_size episodes
|
| 214 |
+
last_means = daily_rewards[-window_size:]
|
| 215 |
+
block_mean = sum(last_means) / window_size
|
| 216 |
+
block_mean_rewards.append(block_mean)
|
| 217 |
+
|
| 218 |
+
block_idx = len(block_mean_rewards)
|
| 219 |
+
print(
|
| 220 |
+
f"→ Completed Block {block_idx} "
|
| 221 |
+
f"| Episodes { (block_idx-1)*window_size + 1 }–{ block_idx*window_size } "
|
| 222 |
+
f"| Block Total Reward: {block_sum:.3f} "
|
| 223 |
+
f"| Block Mean Reward: {block_mean:.3f}"
|
| 224 |
+
)
|
| 225 |
+
|
| 226 |
+
# 3) Log agent-level rewards
|
| 227 |
+
for i in range(num_agents):
|
| 228 |
+
agent_rewards_log[i].append(total_reward[i])
|
| 229 |
+
episode_charges[i].append(actions[i][4])
|
| 230 |
+
episode_discharges[i].append(actions[i][5])
|
| 231 |
+
|
| 232 |
+
# 4) Summarize P2P steps (unchanged from your original code)
|
| 233 |
+
steps_data = []
|
| 234 |
+
for entry in day_logs:
|
| 235 |
+
step_idx = entry["step"]
|
| 236 |
+
p2p_buy_array = entry["p2p_buy"]
|
| 237 |
+
p2p_sell_array = entry["p2p_sell"]
|
| 238 |
+
grid_no_p2p_array = entry["grid_import_no_p2p"]
|
| 239 |
+
grid_with_p2p_array = entry["grid_import_with_p2p"]
|
| 240 |
+
|
| 241 |
+
steps_data.append({
|
| 242 |
+
"step": step_idx,
|
| 243 |
+
"p2p_buy_sum": float(np.sum(p2p_buy_array)),
|
| 244 |
+
"p2p_sell_sum": float(np.sum(p2p_sell_array)),
|
| 245 |
+
"grid_import_no_p2p_sum": float(np.sum(grid_no_p2p_array)),
|
| 246 |
+
"grid_import_with_p2p_sum": float(np.sum(grid_with_p2p_array))
|
| 247 |
+
})
|
| 248 |
+
|
| 249 |
+
|
| 250 |
+
baseline_cost = np.sum([np.sum(entry["grid_import_no_p2p"]) * env.get_grid_price(entry["step"])
|
| 251 |
+
for entry in day_logs])
|
| 252 |
+
actual_cost = np.sum([np.sum(entry["costs"]) for entry in day_logs])
|
| 253 |
+
cost_reduction = (baseline_cost - actual_cost) / baseline_cost
|
| 254 |
+
|
| 255 |
+
# at end of episode
|
| 256 |
+
mappo.update() # Update the MAPPO agent
|
| 257 |
+
|
| 258 |
+
|
| 259 |
+
# save if best
|
| 260 |
+
if mean_ep_reward > best_mean_reward:
|
| 261 |
+
best_mean_reward = mean_ep_reward
|
| 262 |
+
mappo.save(best_model_path)
|
| 263 |
+
|
| 264 |
+
if episode % checkpoint_interval == 0:
|
| 265 |
+
ckpt_path = os.path.join(logs_dir, f"checkpoint_{episode}.pth")
|
| 266 |
+
mappo.save(ckpt_path)
|
| 267 |
+
# CORRECTED TIMING AND LOGGING
|
| 268 |
+
episode_end_time = time.time()
|
| 269 |
+
episode_duration = episode_end_time - episode_start_time
|
| 270 |
+
|
| 271 |
+
# Move the print statement here
|
| 272 |
+
print(
|
| 273 |
+
f"Episode {episode}/{num_episodes} "
|
| 274 |
+
f"| Time per Episode: {episode_duration:.2f}s "
|
| 275 |
+
f"| Steps: {step_count} "
|
| 276 |
+
f"| Mean Reward: {mean_ep_reward:.3f} "
|
| 277 |
+
f"| Cost Reduction: {cost_reduction:.2%}"
|
| 278 |
+
)
|
| 279 |
+
|
| 280 |
+
# Record data in our per-episode log
|
| 281 |
+
episode_log_data.append({
|
| 282 |
+
"Episode": episode,
|
| 283 |
+
"Steps": step_count,
|
| 284 |
+
"Mean_Reward": mean_ep_reward,
|
| 285 |
+
"Total_Reward": sum_ep_reward,
|
| 286 |
+
"Cost_Reduction_Pct": cost_reduction * 100, # New
|
| 287 |
+
"Baseline_Cost": baseline_cost, # New
|
| 288 |
+
"Actual_Cost": actual_cost, # New
|
| 289 |
+
"Episode_Duration": episode_duration,
|
| 290 |
+
"Total_Charge": np.sum([np.sum(entry["charge_amount"]) for entry in day_logs]), # New
|
| 291 |
+
"Total_Discharge": np.sum([np.sum(entry["discharge_amount"]) for entry in day_logs]) # New
|
| 292 |
+
})
|
| 293 |
+
for i in range(num_agents):
|
| 294 |
+
agent_charge_log[i].append(np.mean(episode_charges[i]))
|
| 295 |
+
agent_discharge_log[i].append(np.mean(episode_discharges[i]))
|
| 296 |
+
|
| 297 |
+
# ADD THIS BLOCK TO CAPTURE THE FINAL EPISODE'S METRICS
|
| 298 |
+
# =================================================================
|
| 299 |
+
# After the loop, the metrics for the final episode (num_episodes) are ready.
|
| 300 |
+
# We collect them here to ensure the log is complete.
|
| 301 |
+
final_episode_metrics = env.get_episode_metrics()
|
| 302 |
+
final_episode_metrics['Episode'] = num_episodes
|
| 303 |
+
performance_metrics_log.append(final_episode_metrics)
|
| 304 |
+
# =================================================================
|
| 305 |
+
|
| 306 |
+
|
| 307 |
+
|
| 308 |
+
# ─── End of all training ───
|
| 309 |
+
training_end_time = time.time()
|
| 310 |
+
total_training_time = training_end_time - training_start_time
|
| 311 |
+
|
| 312 |
+
# Save out per-episode agent rewards + mean rewards
|
| 313 |
+
np.save(os.path.join(logs_dir, "agent_rewards.npy"), np.array(agent_rewards_log))
|
| 314 |
+
np.save(os.path.join(logs_dir, "mean_rewards.npy"), np.array(episode_rewards))
|
| 315 |
+
np.save(os.path.join(logs_dir, "total_rewards.npy"), np.array(episode_total_rewards))
|
| 316 |
+
|
| 317 |
+
################################# PLOTTING & LOGGING ##################################################################
|
| 318 |
+
# ─────────── Create Final DataFrame for Logging and Plotting ───────────
|
| 319 |
+
|
| 320 |
+
# 1. Create a DataFrame from the original log data (rewards, costs, etc.)
|
| 321 |
+
df_rewards_log = pd.DataFrame(episode_log_data)
|
| 322 |
+
|
| 323 |
+
# 2. Create a DataFrame from the new performance metrics log
|
| 324 |
+
df_perf_log = pd.DataFrame(performance_metrics_log)
|
| 325 |
+
|
| 326 |
+
# 3. Merge the two DataFrames on the 'Episode' column.
|
| 327 |
+
# This combines all metrics into a single table.
|
| 328 |
+
df_final_log = pd.merge(df_rewards_log, df_perf_log.drop(columns=[
|
| 329 |
+
'degradation_cost_over_time',
|
| 330 |
+
'cost_savings_over_time',
|
| 331 |
+
'grid_reduction_over_time'
|
| 332 |
+
]), on="Episode")
|
| 333 |
+
|
| 334 |
+
|
| 335 |
+
# ─────────── PLOTTING ───────────
|
| 336 |
+
|
| 337 |
+
# Ensure plot directory exists
|
| 338 |
+
os.makedirs(plots_dir, exist_ok=True)
|
| 339 |
+
|
| 340 |
+
# Helper: centered moving average
|
| 341 |
+
def moving_avg(series, window):
|
| 342 |
+
return pd.Series(series).rolling(window=window, center=True, min_periods=1).mean().to_numpy()
|
| 343 |
+
|
| 344 |
+
# Smoothing window (in episodes)
|
| 345 |
+
ma_window = 300
|
| 346 |
+
episodes = np.arange(1, num_episodes + 1)
|
| 347 |
+
|
| 348 |
+
# 1. Mean Reward moving average
|
| 349 |
+
reward_ma = moving_avg(df_final_log["Mean_Reward"], ma_window)
|
| 350 |
+
plt.figure(figsize=(8,5))
|
| 351 |
+
plt.plot(episodes, reward_ma, linewidth=2, label=f"Mean Reward MA (win={ma_window})")
|
| 352 |
+
plt.xlabel("Episode")
|
| 353 |
+
plt.ylabel("Mean Reward")
|
| 354 |
+
plt.title("MAPPO: Mean Reward Moving Average")
|
| 355 |
+
plt.legend()
|
| 356 |
+
plt.grid(True)
|
| 357 |
+
plt.savefig(os.path.join(plots_dir, "mean_reward_ma.png"), dpi=200)
|
| 358 |
+
plt.close()
|
| 359 |
+
|
| 360 |
+
# 2. Total Reward moving average
|
| 361 |
+
total_ma = moving_avg(df_final_log["Total_Reward"], ma_window)
|
| 362 |
+
plt.figure(figsize=(8,5))
|
| 363 |
+
plt.plot(episodes, total_ma, linewidth=2, label=f"Total Reward MA (win={ma_window})")
|
| 364 |
+
plt.xlabel("Episode")
|
| 365 |
+
plt.ylabel("Total Reward")
|
| 366 |
+
plt.title("MAPPO: Total Reward Moving Average")
|
| 367 |
+
plt.legend()
|
| 368 |
+
plt.grid(True)
|
| 369 |
+
plt.savefig(os.path.join(plots_dir, "total_reward_ma.png"), dpi=200)
|
| 370 |
+
plt.close()
|
| 371 |
+
|
| 372 |
+
# 3. Cost Reduction (%) moving average
|
| 373 |
+
cost_ma = moving_avg(df_final_log["Cost_Reduction_Pct"], ma_window)
|
| 374 |
+
plt.figure(figsize=(8,5))
|
| 375 |
+
plt.plot(episodes, cost_ma, linewidth=2, label="Cost Reduction MA (%)")
|
| 376 |
+
plt.xlabel("Episode")
|
| 377 |
+
plt.ylabel("Cost Reduction (%)")
|
| 378 |
+
plt.title("MAPPO: Cost Reduction Moving Average")
|
| 379 |
+
plt.legend()
|
| 380 |
+
plt.grid(True)
|
| 381 |
+
plt.savefig(os.path.join(plots_dir, "cost_reduction_ma.png"), dpi=200)
|
| 382 |
+
plt.close()
|
| 383 |
+
|
| 384 |
+
# 4. Battery Degradation Cost moving average
|
| 385 |
+
degradation_ma = moving_avg(df_final_log["battery_degradation_cost_total"], ma_window)
|
| 386 |
+
plt.figure(figsize=(8,5))
|
| 387 |
+
plt.plot(episodes, degradation_ma, linewidth=2, label=f"Degradation Cost MA (win={ma_window})", color='purple')
|
| 388 |
+
plt.xlabel("Episode")
|
| 389 |
+
plt.ylabel("Total Degradation Cost ($)")
|
| 390 |
+
plt.title("MAPPO: Battery Degradation Cost Moving Average")
|
| 391 |
+
plt.legend()
|
| 392 |
+
plt.grid(True)
|
| 393 |
+
plt.savefig(os.path.join(plots_dir, "degradation_cost_ma.png"), dpi=200)
|
| 394 |
+
plt.close()
|
| 395 |
+
|
| 396 |
+
|
| 397 |
+
# Final confirmation message
|
| 398 |
+
print(f"\nAll moving-average plots saved to: {plots_dir}")
|
| 399 |
+
|
| 400 |
+
|
| 401 |
+
# ─── Save Final Logs to CSV ───
|
| 402 |
+
|
| 403 |
+
# 1. Add the total training time as a new row to the DataFrame
|
| 404 |
+
total_time_row = pd.DataFrame([{
|
| 405 |
+
"Episode": "Total_Training_Time",
|
| 406 |
+
"Episode_Duration": total_training_time
|
| 407 |
+
}])
|
| 408 |
+
df_to_save = pd.concat([df_final_log, total_time_row], ignore_index=True)
|
| 409 |
+
|
| 410 |
+
|
| 411 |
+
# 2. Define the path for the final CSV file.
|
| 412 |
+
log_csv_path = os.path.join(logs_dir, "training_performance_log.csv")
|
| 413 |
+
|
| 414 |
+
# 3. Select and reorder columns for the final CSV
|
| 415 |
+
columns_to_save = [
|
| 416 |
+
"Episode",
|
| 417 |
+
"Mean_Reward",
|
| 418 |
+
"Total_Reward",
|
| 419 |
+
"Cost_Reduction_Pct",
|
| 420 |
+
"Episode_Duration",
|
| 421 |
+
"battery_degradation_cost_total",
|
| 422 |
+
]
|
| 423 |
+
df_to_save = df_to_save[columns_to_save]
|
| 424 |
+
|
| 425 |
+
|
| 426 |
+
# 4. Save the comprehensive DataFrame to CSV.
|
| 427 |
+
df_to_save.to_csv(log_csv_path, index=False)
|
| 428 |
+
|
| 429 |
+
print(f"Saved comprehensive training performance log to: {log_csv_path}")
|
| 430 |
+
|
| 431 |
+
# ─── Final Timings Printout ───
|
| 432 |
+
print("\n" + "="*50)
|
| 433 |
+
print("TRAINING COMPLETE".center(50))
|
| 434 |
+
print(f"Total training time: {total_training_time:.2f} seconds")
|
| 435 |
+
print("="*50)
|
| 436 |
+
|
| 437 |
+
|
| 438 |
+
if __name__ == "__main__":
|
| 439 |
+
main()
|
Other_algorithms/Flat_System/mappo/trainer/__init__.py
ADDED
|
File without changes
|
Other_algorithms/Flat_System/mappo/trainer/mappo.py
ADDED
|
@@ -0,0 +1,243 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# mappo.py
|
| 2 |
+
import torch
|
| 3 |
+
import torch.nn as nn
|
| 4 |
+
import random
|
| 5 |
+
import numpy as np
|
| 6 |
+
from torch.distributions import Normal
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
def set_global_seed(seed: int):
|
| 10 |
+
random.seed(seed) # Python
|
| 11 |
+
np.random.seed(seed) # NumPy
|
| 12 |
+
torch.manual_seed(seed) # PyTorch CPU
|
| 13 |
+
if torch.cuda.is_available():
|
| 14 |
+
torch.cuda.manual_seed_all(seed) # PyTorch GPU
|
| 15 |
+
# make CuDNN deterministic (may slow you down a bit):
|
| 16 |
+
torch.backends.cudnn.deterministic = True
|
| 17 |
+
torch.backends.cudnn.benchmark = False
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
# Universal device selection
|
| 21 |
+
if torch.cuda.is_available():
|
| 22 |
+
device = torch.device("cuda")
|
| 23 |
+
print("Using CUDA (NVIDIA GPU)")
|
| 24 |
+
# elif torch.backends.mps.is_available():
|
| 25 |
+
# device = torch.device("mps")
|
| 26 |
+
# print("Using MPS (Apple Silicon GPU)")
|
| 27 |
+
else:
|
| 28 |
+
device = torch.device("cpu")
|
| 29 |
+
print("Using CPU")
|
| 30 |
+
|
| 31 |
+
# fix EVERYTHING
|
| 32 |
+
SEED = 42
|
| 33 |
+
set_global_seed(SEED)
|
| 34 |
+
|
| 35 |
+
|
| 36 |
+
class MLP(nn.Module):
|
| 37 |
+
def __init__(self, input_dim, hidden_dims, output_dim):
|
| 38 |
+
super().__init__()
|
| 39 |
+
layers = []
|
| 40 |
+
last_dim = input_dim
|
| 41 |
+
for h in hidden_dims:
|
| 42 |
+
layers += [nn.Linear(last_dim, h), nn.ReLU()]
|
| 43 |
+
last_dim = h
|
| 44 |
+
layers.append(nn.Linear(last_dim, output_dim))
|
| 45 |
+
self.net = nn.Sequential(*layers)
|
| 46 |
+
|
| 47 |
+
def forward(self, x):
|
| 48 |
+
return self.net(x)
|
| 49 |
+
|
| 50 |
+
class Actor(nn.Module):
|
| 51 |
+
def __init__(self, obs_dim, act_dim, hidden=(64,64)):
|
| 52 |
+
super().__init__()
|
| 53 |
+
self.net = MLP(obs_dim, hidden, act_dim)
|
| 54 |
+
self.log_std = nn.Parameter(torch.zeros(act_dim))
|
| 55 |
+
|
| 56 |
+
def forward(self, x):
|
| 57 |
+
mean = self.net(x)
|
| 58 |
+
std = torch.exp(self.log_std)
|
| 59 |
+
return mean, std
|
| 60 |
+
|
| 61 |
+
class Critic(nn.Module):
|
| 62 |
+
def __init__(self, state_dim, hidden=(128,128)):
|
| 63 |
+
super().__init__()
|
| 64 |
+
self.net = MLP(state_dim, hidden, 1)
|
| 65 |
+
|
| 66 |
+
def forward(self, x):
|
| 67 |
+
return self.net(x).squeeze(-1)
|
| 68 |
+
|
| 69 |
+
class MAPPO:
|
| 70 |
+
def __init__(
|
| 71 |
+
self,
|
| 72 |
+
n_agents,
|
| 73 |
+
local_dim,
|
| 74 |
+
global_dim,
|
| 75 |
+
act_dim,
|
| 76 |
+
lr=3e-4,
|
| 77 |
+
gamma=0.99,
|
| 78 |
+
lam=0.95,
|
| 79 |
+
clip_eps=0.2,
|
| 80 |
+
k_epochs=10,
|
| 81 |
+
batch_size=1024
|
| 82 |
+
):
|
| 83 |
+
self.n_agents = n_agents
|
| 84 |
+
self.gamma = gamma
|
| 85 |
+
self.lam = lam
|
| 86 |
+
self.clip_eps = clip_eps
|
| 87 |
+
self.k_epochs = k_epochs
|
| 88 |
+
self.batch_size = batch_size
|
| 89 |
+
|
| 90 |
+
self.actor = Actor(local_dim, act_dim).to(device)
|
| 91 |
+
self.critic = Critic(global_dim).to(device)
|
| 92 |
+
|
| 93 |
+
self.opt_a = torch.optim.Adam(self.actor.parameters(), lr=lr)
|
| 94 |
+
self.opt_c = torch.optim.Adam(self.critic.parameters(), lr=lr)
|
| 95 |
+
|
| 96 |
+
self.local_dim = local_dim
|
| 97 |
+
self.global_dim = global_dim
|
| 98 |
+
self.act_dim = act_dim
|
| 99 |
+
|
| 100 |
+
self.clear_buffer()
|
| 101 |
+
|
| 102 |
+
def clear_buffer(self):
|
| 103 |
+
self.ls = [] # local observations
|
| 104 |
+
self.gs = [] # global observations
|
| 105 |
+
self.ac = [] # actions
|
| 106 |
+
self.lp = [] # log-probs
|
| 107 |
+
self.rw = [] # rewards
|
| 108 |
+
self.done = [] # done flags
|
| 109 |
+
self.next_gs = [] # next global observations
|
| 110 |
+
|
| 111 |
+
@torch.no_grad()
|
| 112 |
+
def select_action(self, local_obs, global_obs):
|
| 113 |
+
l = torch.FloatTensor(local_obs).to(device)
|
| 114 |
+
mean, std = self.actor(l)
|
| 115 |
+
dist = Normal(mean, std)
|
| 116 |
+
a = dist.sample()
|
| 117 |
+
return a.cpu().numpy(), dist.log_prob(a).sum(-1).cpu().numpy()
|
| 118 |
+
|
| 119 |
+
def store(self, local_obs, global_obs, action, logp, reward, done, next_global_obs):
|
| 120 |
+
self.ls.append(local_obs)
|
| 121 |
+
self.gs.append(global_obs)
|
| 122 |
+
self.ac.append(action)
|
| 123 |
+
self.lp.append(logp)
|
| 124 |
+
self.rw.append(reward)
|
| 125 |
+
self.done.append(done)
|
| 126 |
+
self.next_gs.append(next_global_obs)
|
| 127 |
+
|
| 128 |
+
def compute_gae(self, values):
|
| 129 |
+
"""
|
| 130 |
+
values: torch.Tensor shape [T] (one central V(s) per timestep)
|
| 131 |
+
returns:
|
| 132 |
+
adv_flat: torch.Tensor shape [T * n_agents]
|
| 133 |
+
ret_flat: torch.Tensor shape [T * n_agents]
|
| 134 |
+
"""
|
| 135 |
+
# 1) get raw arrays
|
| 136 |
+
vals_1d = values.cpu().numpy() # [T]
|
| 137 |
+
T = len(vals_1d)
|
| 138 |
+
N = self.n_agents
|
| 139 |
+
|
| 140 |
+
# 2) broadcast to per-agent
|
| 141 |
+
# vals_agent[t,i] = V(state_t)
|
| 142 |
+
vals_agent = np.tile(vals_1d[:,None], (1, N)) # [T,N]
|
| 143 |
+
|
| 144 |
+
# 3) build next_vals likewise
|
| 145 |
+
next_vals = np.zeros_like(vals_agent) # [T,N]
|
| 146 |
+
next_vals[:-1] = vals_agent[1:]
|
| 147 |
+
# if episode didn’t end at final step, bootstrap last:
|
| 148 |
+
if not self.done[-1]:
|
| 149 |
+
with torch.no_grad():
|
| 150 |
+
v_last = self.critic(
|
| 151 |
+
torch.FloatTensor(self.next_gs[-1]).to(device)
|
| 152 |
+
).cpu().item()
|
| 153 |
+
next_vals[-1, :] = v_last
|
| 154 |
+
|
| 155 |
+
# 4) GAE loop over (T,N)
|
| 156 |
+
adv = np.zeros_like(vals_agent, dtype=np.float32)
|
| 157 |
+
prev_adv = np.zeros(N, dtype=np.float32)
|
| 158 |
+
for t in reversed(range(T)):
|
| 159 |
+
mask = 1.0 - float(self.done[t]) # scalar 0/1
|
| 160 |
+
rew_t = np.array(self.rw[t], dtype=np.float32) # [N]
|
| 161 |
+
delta = rew_t + self.gamma * next_vals[t] * mask - vals_agent[t]
|
| 162 |
+
prev_adv = delta + self.gamma * self.lam * mask * prev_adv
|
| 163 |
+
adv[t] = prev_adv
|
| 164 |
+
|
| 165 |
+
# 5) compute returns & flatten
|
| 166 |
+
ret = adv + vals_agent # [T,N]
|
| 167 |
+
adv_flat = torch.from_numpy(adv.flatten()).to(device)
|
| 168 |
+
ret_flat = torch.from_numpy(ret.flatten()).to(device)
|
| 169 |
+
return adv_flat, ret_flat
|
| 170 |
+
|
| 171 |
+
|
| 172 |
+
def update(self):
|
| 173 |
+
# 1) Raw global states tensor [T, G]
|
| 174 |
+
raw_gs = torch.FloatTensor(self.gs).to(device) # [T, G]
|
| 175 |
+
|
| 176 |
+
# 2) Compute one value V(s_t) per timestep
|
| 177 |
+
with torch.no_grad():
|
| 178 |
+
vals = self.critic(raw_gs).cpu() # [T]
|
| 179 |
+
|
| 180 |
+
# 3) Compute advantages and returns using GAE (returns flattened [T*N])
|
| 181 |
+
adv_flat, ret_flat = self.compute_gae(vals) # both shape [T * N]
|
| 182 |
+
|
| 183 |
+
# 4) Prepare per-agent flattened training tensors
|
| 184 |
+
# Local states [T*N, local_dim]
|
| 185 |
+
ls = torch.FloatTensor(self.ls).view(-1, self.local_dim).to(device)
|
| 186 |
+
# Actions [T*N, act_dim]
|
| 187 |
+
ac = torch.FloatTensor(self.ac).view(-1, self.act_dim).to(device)
|
| 188 |
+
# Old log-probs [T*N]
|
| 189 |
+
old_lp = torch.FloatTensor(self.lp).view(-1).to(device)
|
| 190 |
+
|
| 191 |
+
# Broadcast global states to per-agent: [T, G] -> [T, N, G] -> [T*N, G]
|
| 192 |
+
gs = raw_gs.unsqueeze(1).expand(-1, self.n_agents, -1) # [T, N, G]
|
| 193 |
+
gs = gs.reshape(-1, self.global_dim).to(device) # [T*N, G]
|
| 194 |
+
|
| 195 |
+
# Create dataset and loader
|
| 196 |
+
dataset = torch.utils.data.TensorDataset(
|
| 197 |
+
ls, gs, ac, old_lp, adv_flat, ret_flat
|
| 198 |
+
)
|
| 199 |
+
gen = torch.Generator()
|
| 200 |
+
gen.manual_seed(SEED)
|
| 201 |
+
loader = torch.utils.data.DataLoader(
|
| 202 |
+
dataset,
|
| 203 |
+
batch_size=self.batch_size,
|
| 204 |
+
shuffle=True,
|
| 205 |
+
num_workers=0,
|
| 206 |
+
generator=gen
|
| 207 |
+
)
|
| 208 |
+
# 5) PPO update loop
|
| 209 |
+
for _ in range(self.k_epochs):
|
| 210 |
+
for b_ls, b_gs, b_ac, b_lp, b_adv, b_ret in loader:
|
| 211 |
+
# Actor update
|
| 212 |
+
mean, std = self.actor(b_ls)
|
| 213 |
+
dist = Normal(mean, std)
|
| 214 |
+
lp_new = dist.log_prob(b_ac).sum(-1)
|
| 215 |
+
ratio = torch.exp(lp_new - b_lp)
|
| 216 |
+
surr1 = ratio * b_adv
|
| 217 |
+
surr2 = torch.clamp(ratio, 1 - self.clip_eps, 1 + self.clip_eps) * b_adv
|
| 218 |
+
actor_loss = -torch.min(surr1, surr2).mean()
|
| 219 |
+
|
| 220 |
+
self.opt_a.zero_grad()
|
| 221 |
+
actor_loss.backward()
|
| 222 |
+
self.opt_a.step()
|
| 223 |
+
|
| 224 |
+
# Critic update
|
| 225 |
+
val_pred = self.critic(b_gs)
|
| 226 |
+
critic_loss = nn.MSELoss()(val_pred, b_ret)
|
| 227 |
+
|
| 228 |
+
self.opt_c.zero_grad()
|
| 229 |
+
critic_loss.backward()
|
| 230 |
+
self.opt_c.step()
|
| 231 |
+
|
| 232 |
+
# 6) Clear buffers for next rollout
|
| 233 |
+
self.clear_buffer()
|
| 234 |
+
|
| 235 |
+
|
| 236 |
+
def save(self, path):
|
| 237 |
+
torch.save({'actor': self.actor.state_dict(),
|
| 238 |
+
'critic': self.critic.state_dict()}, path)
|
| 239 |
+
|
| 240 |
+
def load(self, path):
|
| 241 |
+
data = torch.load(path, map_location=device)
|
| 242 |
+
self.actor.load_state_dict(data['actor'])
|
| 243 |
+
self.critic.load_state_dict(data['critic'])
|
Other_algorithms/Flat_System/meanfield/_init_.py
ADDED
|
File without changes
|
Other_algorithms/Flat_System/meanfield/meanfield_evaluation.py
ADDED
|
@@ -0,0 +1,492 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#mfac_evaluate.py
|
| 2 |
+
import os
|
| 3 |
+
import sys
|
| 4 |
+
import time
|
| 5 |
+
import re
|
| 6 |
+
import numpy as np
|
| 7 |
+
import pandas as pd
|
| 8 |
+
import matplotlib.pyplot as plt
|
| 9 |
+
import torch
|
| 10 |
+
from datetime import datetime
|
| 11 |
+
|
| 12 |
+
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
|
| 13 |
+
|
| 14 |
+
from solar_sys_environment import SolarSys
|
| 15 |
+
from meanfield.trainer.mfac import MeanField
|
| 16 |
+
|
| 17 |
+
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
| 18 |
+
|
| 19 |
+
def compute_jains_fairness(values: np.ndarray) -> float:
|
| 20 |
+
if len(values) == 0:
|
| 21 |
+
return 0.0
|
| 22 |
+
if np.all(values == 0):
|
| 23 |
+
return 1.0
|
| 24 |
+
num = (values.sum())**2
|
| 25 |
+
den = len(values) * (values**2).sum()
|
| 26 |
+
return num / den
|
| 27 |
+
|
| 28 |
+
def main():
|
| 29 |
+
# User parameters
|
| 30 |
+
MODEL_PATH = "/path/to/project/meanfield_pennsylvania_100agents_10000eps/logs/best_model.pth"
|
| 31 |
+
DATA_PATH = "/path/to/project/testing/100houses_30days_TEST.csv"
|
| 32 |
+
DAYS_TO_EVALUATE = 30
|
| 33 |
+
|
| 34 |
+
model_path = MODEL_PATH
|
| 35 |
+
data_path = DATA_PATH
|
| 36 |
+
days_to_evaluate = DAYS_TO_EVALUATE
|
| 37 |
+
SOLAR_THRESHOLD = 0.1
|
| 38 |
+
|
| 39 |
+
# Env setup
|
| 40 |
+
env = SolarSys(
|
| 41 |
+
data_path=data_path,
|
| 42 |
+
state="pennsylvania",
|
| 43 |
+
time_freq="3H"
|
| 44 |
+
)
|
| 45 |
+
eval_steps = env.num_steps
|
| 46 |
+
house_ids = env.house_ids
|
| 47 |
+
num_agents = env.num_agents
|
| 48 |
+
|
| 49 |
+
# Generate a unique eval run folder
|
| 50 |
+
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
| 51 |
+
run_name = f"eval_mappo_{num_agents}agents_{days_to_evaluate}days_{timestamp}"
|
| 52 |
+
output_folder = os.path.join("runs_with_battery", run_name)
|
| 53 |
+
logs_dir = os.path.join(output_folder, "logs")
|
| 54 |
+
plots_dir = os.path.join(output_folder, "plots")
|
| 55 |
+
for d in (logs_dir, plots_dir):
|
| 56 |
+
os.makedirs(d, exist_ok=True)
|
| 57 |
+
print(f"Saving evaluation outputs to: {output_folder}")
|
| 58 |
+
|
| 59 |
+
local_dim = env.observation_space.shape[1]
|
| 60 |
+
global_dim = num_agents * local_dim
|
| 61 |
+
act_dim = env.action_space.shape[1]
|
| 62 |
+
|
| 63 |
+
mfac = MeanField(
|
| 64 |
+
n_agents=num_agents,
|
| 65 |
+
local_dim=local_dim,
|
| 66 |
+
global_dim=global_dim,
|
| 67 |
+
act_dim=act_dim,
|
| 68 |
+
lr=2e-4,
|
| 69 |
+
gamma=0.95,
|
| 70 |
+
lam=0.95,
|
| 71 |
+
clip_eps=0.2,
|
| 72 |
+
k_epochs=10,
|
| 73 |
+
batch_size=1024
|
| 74 |
+
)
|
| 75 |
+
|
| 76 |
+
# Loadmfac checkpoint
|
| 77 |
+
mfac.load(model_path)
|
| 78 |
+
mfac.actor.to(device).eval()
|
| 79 |
+
mfac.critic.to(device).eval()
|
| 80 |
+
|
| 81 |
+
# Prepare logs
|
| 82 |
+
all_logs = []
|
| 83 |
+
daily_summaries = []
|
| 84 |
+
step_timing_list = []
|
| 85 |
+
|
| 86 |
+
evaluation_start = time.time()
|
| 87 |
+
|
| 88 |
+
for day_idx in range(days_to_evaluate):
|
| 89 |
+
obs = env.reset()
|
| 90 |
+
obs = np.array(obs, dtype=np.float32)
|
| 91 |
+
done = False
|
| 92 |
+
step_count = 0
|
| 93 |
+
day_logs = []
|
| 94 |
+
|
| 95 |
+
while not done:
|
| 96 |
+
step_start_time = time.time()
|
| 97 |
+
global_obs = np.array(obs).flatten()
|
| 98 |
+
|
| 99 |
+
# Select actions withmfac
|
| 100 |
+
actions, _ =mfac.select_action(obs, global_obs)
|
| 101 |
+
|
| 102 |
+
next_obs, rewards, done, info = env.step(actions)
|
| 103 |
+
next_obs = np.array(next_obs, dtype=np.float32)
|
| 104 |
+
|
| 105 |
+
# Consolidated Logging
|
| 106 |
+
step_end_time = time.time()
|
| 107 |
+
step_duration = step_end_time - step_start_time
|
| 108 |
+
|
| 109 |
+
print(f"[Day {day_idx+1}, Step {step_count}] Step time: {step_duration:.6f} seconds")
|
| 110 |
+
|
| 111 |
+
step_timing_list.append({
|
| 112 |
+
"day": day_idx + 1,
|
| 113 |
+
"step": step_count,
|
| 114 |
+
"step_time_s": step_duration
|
| 115 |
+
})
|
| 116 |
+
|
| 117 |
+
grid_price_now = env.get_grid_price(step_count)
|
| 118 |
+
peer_price_now = info.get("peer_price", env.get_peer_price(step_count,
|
| 119 |
+
float(info["p2p_sell"].sum()),
|
| 120 |
+
float(info["p2p_buy"].sum())))
|
| 121 |
+
|
| 122 |
+
for i, hid in enumerate(house_ids):
|
| 123 |
+
is_battery_house = hid in env.batteries
|
| 124 |
+
p2p_buy = float(info["p2p_buy"][i])
|
| 125 |
+
p2p_sell = float(info["p2p_sell"][i])
|
| 126 |
+
charge_amount = float(info.get("charge_amount", [0]*num_agents)[i])
|
| 127 |
+
discharge_amount = float(info.get("discharge_amount", [0]*num_agents)[i])
|
| 128 |
+
|
| 129 |
+
day_logs.append({
|
| 130 |
+
"day": day_idx + 1,
|
| 131 |
+
"step": step_count,
|
| 132 |
+
"house": hid,
|
| 133 |
+
"grid_import_no_p2p": float(info["grid_import_no_p2p"][i]),
|
| 134 |
+
"grid_import_with_p2p": float(info["grid_import_with_p2p"][i]),
|
| 135 |
+
"grid_export": float(info.get("grid_export", [0]*num_agents)[i]),
|
| 136 |
+
"p2p_buy": p2p_buy,
|
| 137 |
+
"p2p_sell": p2p_sell,
|
| 138 |
+
"actual_cost": float(info["costs"][i]),
|
| 139 |
+
"baseline_cost": float(info["grid_import_no_p2p"][i]) * grid_price_now,
|
| 140 |
+
"total_demand": float(env.demands[hid][step_count]),
|
| 141 |
+
"total_solar": float(env.solars[hid][step_count]),
|
| 142 |
+
"grid_price": grid_price_now,
|
| 143 |
+
"peer_price": peer_price_now,
|
| 144 |
+
"soc": (env.batteries[hid]["soc"] / env.batteries[hid]["max_capacity"]) if is_battery_house else np.nan,
|
| 145 |
+
"degradation_cost": ((charge_amount + discharge_amount) * env.batteries[hid]["degradation_cost_per_kwh"]) if is_battery_house else 0.0,
|
| 146 |
+
"reward": float(rewards[i]),
|
| 147 |
+
})
|
| 148 |
+
|
| 149 |
+
obs = next_obs
|
| 150 |
+
step_count += 1
|
| 151 |
+
if step_count >= eval_steps:
|
| 152 |
+
break
|
| 153 |
+
|
| 154 |
+
day_df = pd.DataFrame(day_logs)
|
| 155 |
+
all_logs.extend(day_logs)
|
| 156 |
+
|
| 157 |
+
# Consolidated daily summary calculation
|
| 158 |
+
grouped_house = day_df.groupby("house").sum(numeric_only=True)
|
| 159 |
+
grouped_step = day_df.groupby("step").sum(numeric_only=True)
|
| 160 |
+
|
| 161 |
+
total_demand = grouped_step["total_demand"].sum()
|
| 162 |
+
total_solar = grouped_step["total_solar"].sum()
|
| 163 |
+
total_p2p_buy = grouped_house["p2p_buy"].sum()
|
| 164 |
+
total_p2p_sell = grouped_house["p2p_sell"].sum()
|
| 165 |
+
|
| 166 |
+
baseline_cost_per_house = grouped_house["baseline_cost"]
|
| 167 |
+
actual_cost_per_house = grouped_house["actual_cost"]
|
| 168 |
+
cost_savings_per_house = baseline_cost_per_house - actual_cost_per_house
|
| 169 |
+
day_total_cost_savings = cost_savings_per_house.sum()
|
| 170 |
+
|
| 171 |
+
if baseline_cost_per_house.sum() > 0:
|
| 172 |
+
overall_cost_savings_pct = day_total_cost_savings / baseline_cost_per_house.sum()
|
| 173 |
+
else:
|
| 174 |
+
overall_cost_savings_pct = 0.0
|
| 175 |
+
|
| 176 |
+
baseline_import_per_house = grouped_house["grid_import_no_p2p"]
|
| 177 |
+
actual_import_per_house = grouped_house["grid_import_with_p2p"]
|
| 178 |
+
import_reduction_per_house = baseline_import_per_house - actual_import_per_house
|
| 179 |
+
day_total_import_reduction = import_reduction_per_house.sum()
|
| 180 |
+
|
| 181 |
+
if baseline_import_per_house.sum() > 0:
|
| 182 |
+
overall_import_reduction_pct = day_total_import_reduction / baseline_import_per_house.sum()
|
| 183 |
+
else:
|
| 184 |
+
overall_import_reduction_pct = 0.0
|
| 185 |
+
|
| 186 |
+
fairness_cost_savings = compute_jains_fairness(cost_savings_per_house.values)
|
| 187 |
+
fairness_import_reduction = compute_jains_fairness(import_reduction_per_house.values)
|
| 188 |
+
fairness_rewards = compute_jains_fairness(grouped_house["reward"].values)
|
| 189 |
+
fairness_p2p_buy = compute_jains_fairness(grouped_house["p2p_buy"].values)
|
| 190 |
+
fairness_p2p_sell = compute_jains_fairness(grouped_house["p2p_sell"].values)
|
| 191 |
+
fairness_p2p_total = compute_jains_fairness((grouped_house["p2p_buy"] + grouped_house["p2p_sell"]).values)
|
| 192 |
+
day_total_degradation_cost = grouped_house["degradation_cost"].sum()
|
| 193 |
+
|
| 194 |
+
daily_summaries.append({
|
| 195 |
+
"day": day_idx + 1,
|
| 196 |
+
"day_total_demand": total_demand,
|
| 197 |
+
"day_total_solar": total_solar,
|
| 198 |
+
"day_p2p_buy": total_p2p_buy,
|
| 199 |
+
"day_p2p_sell": total_p2p_sell,
|
| 200 |
+
"cost_savings_abs": day_total_cost_savings,
|
| 201 |
+
"cost_savings_pct": overall_cost_savings_pct,
|
| 202 |
+
"fairness_cost_savings": fairness_cost_savings,
|
| 203 |
+
"grid_reduction_abs": day_total_import_reduction,
|
| 204 |
+
"grid_reduction_pct": overall_import_reduction_pct,
|
| 205 |
+
"fairness_grid_reduction": fairness_import_reduction,
|
| 206 |
+
"fairness_reward": fairness_rewards,
|
| 207 |
+
"fairness_p2p_buy": fairness_p2p_buy,
|
| 208 |
+
"fairness_p2p_sell": fairness_p2p_sell,
|
| 209 |
+
"fairness_p2p_total": fairness_p2p_total,
|
| 210 |
+
"total_degradation_cost": day_total_degradation_cost
|
| 211 |
+
})
|
| 212 |
+
|
| 213 |
+
# Final processing and saving
|
| 214 |
+
evaluation_end = time.time()
|
| 215 |
+
total_eval_time = evaluation_end - evaluation_start
|
| 216 |
+
print(f"\nEvaluation loop finished. Total time: {total_eval_time:.2f} seconds.")
|
| 217 |
+
|
| 218 |
+
all_days_df = pd.DataFrame(all_logs)
|
| 219 |
+
combined_csv_path = os.path.join(logs_dir, "step_logs_all_days.csv")
|
| 220 |
+
all_days_df.to_csv(combined_csv_path, index=False)
|
| 221 |
+
print(f"Saved combined step-level logs to: {combined_csv_path}")
|
| 222 |
+
|
| 223 |
+
step_timing_df = pd.DataFrame(step_timing_list)
|
| 224 |
+
timing_csv_path = os.path.join(logs_dir, "step_timing_log.csv")
|
| 225 |
+
step_timing_df.to_csv(timing_csv_path, index=False)
|
| 226 |
+
print(f"Saved step timing logs to: {timing_csv_path}")
|
| 227 |
+
|
| 228 |
+
house_level_df = all_days_df.groupby("house").agg({
|
| 229 |
+
"baseline_cost": "sum",
|
| 230 |
+
"actual_cost": "sum",
|
| 231 |
+
"grid_import_no_p2p": "sum",
|
| 232 |
+
"grid_import_with_p2p": "sum",
|
| 233 |
+
"degradation_cost": "sum"
|
| 234 |
+
})
|
| 235 |
+
house_level_df["cost_savings"] = house_level_df["baseline_cost"] - house_level_df["actual_cost"]
|
| 236 |
+
house_level_df["import_reduction"] = house_level_df["grid_import_no_p2p"] - house_level_df["grid_import_with_p2p"]
|
| 237 |
+
|
| 238 |
+
house_summary_csv = os.path.join(logs_dir, "summary_per_house.csv")
|
| 239 |
+
house_level_df.to_csv(house_summary_csv)
|
| 240 |
+
print(f"Saved final summary per house to: {house_summary_csv}")
|
| 241 |
+
|
| 242 |
+
fairness_grid_all = compute_jains_fairness(house_level_df["import_reduction"].values)
|
| 243 |
+
fairness_cost_all = compute_jains_fairness(house_level_df["cost_savings"].values)
|
| 244 |
+
|
| 245 |
+
daily_summary_df = pd.DataFrame(daily_summaries)
|
| 246 |
+
|
| 247 |
+
total_cost_savings_all = daily_summary_df["cost_savings_abs"].sum()
|
| 248 |
+
total_baseline_cost_all = all_days_df.groupby('day')['baseline_cost'].sum().sum()
|
| 249 |
+
pct_cost_savings_all = total_cost_savings_all / total_baseline_cost_all if total_baseline_cost_all > 0 else 0.0
|
| 250 |
+
|
| 251 |
+
total_grid_reduction_all = daily_summary_df["grid_reduction_abs"].sum()
|
| 252 |
+
total_baseline_import_all = all_days_df.groupby('day')['grid_import_no_p2p'].sum().sum()
|
| 253 |
+
pct_grid_reduction_all = total_grid_reduction_all / total_baseline_import_all if total_baseline_import_all > 0 else 0.0
|
| 254 |
+
|
| 255 |
+
total_degradation_cost_all = daily_summary_df["total_degradation_cost"].sum()
|
| 256 |
+
|
| 257 |
+
# Calculate alternative performance metrics
|
| 258 |
+
|
| 259 |
+
# Grid Reduction During Solar Hours
|
| 260 |
+
agg_solar_per_step = all_days_df.groupby(['day', 'step'])['total_solar'].sum()
|
| 261 |
+
sunny_steps_mask = agg_solar_per_step > (SOLAR_THRESHOLD * num_agents)
|
| 262 |
+
sunny_df = all_days_df.set_index(['day', 'step'])[sunny_steps_mask].reset_index()
|
| 263 |
+
baseline_import_sunny = sunny_df['grid_import_no_p2p'].sum()
|
| 264 |
+
actual_import_sunny = sunny_df['grid_import_with_p2p'].sum()
|
| 265 |
+
grid_reduction_sunny_pct = 0.0
|
| 266 |
+
if baseline_import_sunny > 0:
|
| 267 |
+
grid_reduction_sunny_pct = (baseline_import_sunny - actual_import_sunny) / baseline_import_sunny
|
| 268 |
+
|
| 269 |
+
# Community Sourcing Rate
|
| 270 |
+
total_p2p_buy = all_days_df['p2p_buy'].sum()
|
| 271 |
+
total_actual_grid_import = all_days_df['grid_import_with_p2p'].sum()
|
| 272 |
+
total_procured_energy = total_p2p_buy + total_actual_grid_import
|
| 273 |
+
community_sourcing_rate_pct = 0.0
|
| 274 |
+
if total_procured_energy > 0:
|
| 275 |
+
community_sourcing_rate_pct = total_p2p_buy / total_procured_energy
|
| 276 |
+
|
| 277 |
+
# Solar Sharing Efficiency
|
| 278 |
+
total_p2p_sell = all_days_df['p2p_sell'].sum()
|
| 279 |
+
total_grid_export = all_days_df['grid_export'].sum()
|
| 280 |
+
total_excess_solar = total_p2p_sell + total_grid_export
|
| 281 |
+
solar_sharing_efficiency_pct = 0.0
|
| 282 |
+
if total_excess_solar > 0:
|
| 283 |
+
solar_sharing_efficiency_pct = total_p2p_sell / total_excess_solar
|
| 284 |
+
|
| 285 |
+
# Cost savings in sunny hours
|
| 286 |
+
baseline_cost_sunny = sunny_df['baseline_cost'].sum()
|
| 287 |
+
actual_cost_sunny = sunny_df['actual_cost'].sum()
|
| 288 |
+
cost_savings_sunny_pct = (baseline_cost_sunny - actual_cost_sunny) / baseline_cost_sunny if baseline_cost_sunny > 0 else 0.0
|
| 289 |
+
|
| 290 |
+
total_p2p_buy = all_days_df['p2p_buy'].sum()
|
| 291 |
+
total_actual_grid_import = all_days_df['grid_import_with_p2p'].sum()
|
| 292 |
+
community_sourcing_rate_pct = total_p2p_buy / (total_p2p_buy + total_actual_grid_import) if (total_p2p_buy + total_actual_grid_import) > 0 else 0.0
|
| 293 |
+
|
| 294 |
+
total_p2p_sell = all_days_df['p2p_sell'].sum()
|
| 295 |
+
total_grid_export = all_days_df['grid_export'].sum()
|
| 296 |
+
solar_sharing_efficiency_pct = total_p2p_sell / (total_p2p_sell + total_grid_export) if (total_p2p_sell + total_grid_export) > 0 else 0.0
|
| 297 |
+
|
| 298 |
+
final_row = {
|
| 299 |
+
"day": "ALL_DAYS_SUMMARY",
|
| 300 |
+
"cost_savings_abs": total_cost_savings_all,
|
| 301 |
+
"cost_savings_pct": pct_cost_savings_all,
|
| 302 |
+
"grid_reduction_abs": total_grid_reduction_all,
|
| 303 |
+
"grid_reduction_pct": pct_grid_reduction_all,
|
| 304 |
+
"fairness_cost_savings": fairness_cost_all,
|
| 305 |
+
"fairness_grid_reduction": fairness_grid_all,
|
| 306 |
+
"total_degradation_cost": total_degradation_cost_all,
|
| 307 |
+
"grid_reduction_sunny_hours_pct": grid_reduction_sunny_pct,
|
| 308 |
+
"community_sourcing_rate_pct": community_sourcing_rate_pct,
|
| 309 |
+
"solar_sharing_efficiency_pct": solar_sharing_efficiency_pct,
|
| 310 |
+
}
|
| 311 |
+
|
| 312 |
+
for col in daily_summary_df.columns:
|
| 313 |
+
if col not in final_row:
|
| 314 |
+
final_row[col] = np.nan
|
| 315 |
+
final_row_df = pd.DataFrame([final_row])
|
| 316 |
+
|
| 317 |
+
daily_summary_df = pd.concat([daily_summary_df, final_row_df], ignore_index=True)
|
| 318 |
+
summary_csv = os.path.join(logs_dir, "summary_per_day.csv")
|
| 319 |
+
daily_summary_df.to_csv(summary_csv, index=False)
|
| 320 |
+
print(f"Saved day-level summary with final multi-day row to: {summary_csv}")
|
| 321 |
+
|
| 322 |
+
# Final summary printout
|
| 323 |
+
print("\n================== EVALUATION SUMMARY ==================")
|
| 324 |
+
print(f"Evaluation finished for {days_to_evaluate} days.\n")
|
| 325 |
+
|
| 326 |
+
print("--- Standard Metrics (24-Hour Average) ---")
|
| 327 |
+
print(f"Total grid reduction: {total_grid_reduction_all:.2f} kWh ({pct_grid_reduction_all:.2%})")
|
| 328 |
+
print(f"Total cost savings: ${total_cost_savings_all:.2f} ({pct_cost_savings_all:.2%})")
|
| 329 |
+
print(f"Jain's fairness on grid reduction: {fairness_grid_all:.3f}")
|
| 330 |
+
print(f"Jain's fairness on cost savings: {fairness_cost_all:.3f}\n")
|
| 331 |
+
|
| 332 |
+
print("--- Alternative Metrics (Highlighting Peak Performance) ---")
|
| 333 |
+
print(f"Grid reduction during solar hours: {grid_reduction_sunny_pct:.2%}")
|
| 334 |
+
print(f"Cost savings during solar hours: {cost_savings_sunny_pct:.2%}")
|
| 335 |
+
print(f"Community sourcing rate: {community_sourcing_rate_pct:.2%}")
|
| 336 |
+
print(f"Solar sharing efficiency: {solar_sharing_efficiency_pct:.2%}")
|
| 337 |
+
|
| 338 |
+
print("=========================================================")
|
| 339 |
+
|
| 340 |
+
# Plots
|
| 341 |
+
plot_daily_df = daily_summary_df[daily_summary_df["day"] != "ALL_DAYS_SUMMARY"].copy()
|
| 342 |
+
plot_daily_df["day"] = plot_daily_df["day"].astype(int)
|
| 343 |
+
|
| 344 |
+
# Daily Cost Savings Percentage
|
| 345 |
+
plt.figure(figsize=(12, 6))
|
| 346 |
+
plt.bar(plot_daily_df["day"], plot_daily_df["cost_savings_pct"] * 100, color='skyblue')
|
| 347 |
+
plt.xlabel("Day")
|
| 348 |
+
plt.ylabel("Cost Savings (%)")
|
| 349 |
+
plt.title("Daily Community Cost Savings Percentage")
|
| 350 |
+
plt.xticks(plot_daily_df["day"])
|
| 351 |
+
plt.grid(axis='y', linestyle='--', alpha=0.7)
|
| 352 |
+
plt.savefig(os.path.join(plots_dir, "daily_cost_savings_percentage.png"))
|
| 353 |
+
plt.close()
|
| 354 |
+
|
| 355 |
+
# Daily Total Demand vs. Solar
|
| 356 |
+
plt.figure(figsize=(12, 6))
|
| 357 |
+
bar_width = 0.4
|
| 358 |
+
days = plot_daily_df["day"]
|
| 359 |
+
plt.bar(days - bar_width/2, plot_daily_df["day_total_demand"], width=bar_width, label="Total Demand", color='coral')
|
| 360 |
+
plt.bar(days + bar_width/2, plot_daily_df["day_total_solar"], width=bar_width, label="Total Solar Generation", color='gold')
|
| 361 |
+
plt.xlabel("Day")
|
| 362 |
+
plt.ylabel("Energy (kWh)")
|
| 363 |
+
plt.title("Total Community Demand vs. Solar Generation Per Day")
|
| 364 |
+
plt.xticks(days)
|
| 365 |
+
plt.legend()
|
| 366 |
+
plt.grid(axis='y', linestyle='--', alpha=0.7)
|
| 367 |
+
plt.savefig(os.path.join(plots_dir, "daily_demand_vs_solar.png"))
|
| 368 |
+
plt.close()
|
| 369 |
+
|
| 370 |
+
# Combined Time Series of Energy Flows
|
| 371 |
+
step_group = all_days_df.groupby(["day", "step"]).sum(numeric_only=True).reset_index()
|
| 372 |
+
step_group["global_step"] = (step_group["day"] - 1) * env.num_steps + step_group["step"]
|
| 373 |
+
|
| 374 |
+
fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(15, 12), sharex=True)
|
| 375 |
+
|
| 376 |
+
# Subplot 1: Grid Import vs P2P Buy
|
| 377 |
+
ax1.plot(step_group["global_step"], step_group["grid_import_with_p2p"], label="Grid Import (with P2P)", color='r')
|
| 378 |
+
ax1.plot(step_group["global_step"], step_group["p2p_buy"], label="P2P Buy", color='g')
|
| 379 |
+
ax1.set_ylabel("Energy (kWh)")
|
| 380 |
+
ax1.set_title("Community Energy Consumption: Grid Import vs. P2P Buy")
|
| 381 |
+
ax1.legend()
|
| 382 |
+
ax1.grid(True, linestyle='--', alpha=0.6)
|
| 383 |
+
|
| 384 |
+
# Subplot 2: Grid Export vs P2P Sell
|
| 385 |
+
ax2.plot(step_group["global_step"], step_group["grid_export"], label="Grid Export", color='orange')
|
| 386 |
+
ax2.plot(step_group["global_step"], step_group["p2p_sell"], label="P2P Sell", color='b')
|
| 387 |
+
ax2.set_xlabel("Global Timestep")
|
| 388 |
+
ax2.set_ylabel("Energy (kWh)")
|
| 389 |
+
ax2.set_title("Community Energy Generation: Grid Export vs. P2P Sell")
|
| 390 |
+
ax2.legend()
|
| 391 |
+
ax2.grid(True, linestyle='--', alpha=0.6)
|
| 392 |
+
|
| 393 |
+
plt.tight_layout()
|
| 394 |
+
plt.savefig(os.path.join(plots_dir, "combined_energy_flows_timeseries.png"))
|
| 395 |
+
plt.close()
|
| 396 |
+
|
| 397 |
+
# Stacked Bar of Daily Energy Sources
|
| 398 |
+
daily_agg = all_days_df.groupby("day").sum(numeric_only=True)
|
| 399 |
+
|
| 400 |
+
plt.figure(figsize=(12, 7))
|
| 401 |
+
plt.bar(daily_agg.index, daily_agg["grid_import_with_p2p"], label="Grid Import (with P2P)", color='crimson')
|
| 402 |
+
plt.bar(daily_agg.index, daily_agg["p2p_buy"], bottom=daily_agg["grid_import_with_p2p"], label="P2P Buy", color='limegreen')
|
| 403 |
+
plt.plot(daily_agg.index, daily_agg["grid_import_no_p2p"], label="Baseline Grid Import (No P2P)", color='blue', linestyle='--', marker='o')
|
| 404 |
+
|
| 405 |
+
plt.xlabel("Day")
|
| 406 |
+
plt.ylabel("Energy (kWh)")
|
| 407 |
+
plt.title("Daily Energy Procurement: Baseline vs. P2P+Grid")
|
| 408 |
+
plt.xticks(daily_agg.index)
|
| 409 |
+
plt.legend()
|
| 410 |
+
plt.grid(axis='y', linestyle='--', alpha=0.7)
|
| 411 |
+
plt.savefig(os.path.join(plots_dir, "daily_energy_procurement_stacked.png"))
|
| 412 |
+
plt.close()
|
| 413 |
+
|
| 414 |
+
# Fairness Metrics Over Time
|
| 415 |
+
plt.figure(figsize=(12, 6))
|
| 416 |
+
plt.plot(plot_daily_df["day"], plot_daily_df["fairness_cost_savings"], label="Cost Savings Fairness", marker='o')
|
| 417 |
+
plt.plot(plot_daily_df["day"], plot_daily_df["fairness_grid_reduction"], label="Grid Reduction Fairness", marker='s')
|
| 418 |
+
plt.plot(plot_daily_df["day"], plot_daily_df["fairness_reward"], label="Reward Fairness", marker='^')
|
| 419 |
+
plt.xlabel("Day")
|
| 420 |
+
plt.ylabel("Jain's Fairness Index")
|
| 421 |
+
plt.title("Daily Fairness Metrics")
|
| 422 |
+
plt.xticks(plot_daily_df["day"])
|
| 423 |
+
plt.ylim(0, 1.05)
|
| 424 |
+
plt.legend()
|
| 425 |
+
plt.grid(True, linestyle='--', alpha=0.7)
|
| 426 |
+
plt.savefig(os.path.join(plots_dir, "daily_fairness_metrics.png"))
|
| 427 |
+
plt.close()
|
| 428 |
+
|
| 429 |
+
# Per-House Savings and Reductions
|
| 430 |
+
fig, ax1 = plt.subplots(figsize=(15, 7))
|
| 431 |
+
|
| 432 |
+
house_ids_str = house_level_df.index.astype(str)
|
| 433 |
+
bar_width = 0.4
|
| 434 |
+
index = np.arange(len(house_ids_str))
|
| 435 |
+
|
| 436 |
+
# Bar chart for cost savings
|
| 437 |
+
color1 = 'tab:green'
|
| 438 |
+
ax1.set_xlabel('House ID')
|
| 439 |
+
ax1.set_ylabel('Total Cost Savings ($)', color=color1)
|
| 440 |
+
ax1.bar(index - bar_width/2, house_level_df["cost_savings"], bar_width, label='Cost Savings', color=color1)
|
| 441 |
+
ax1.tick_params(axis='y', labelcolor=color1)
|
| 442 |
+
ax1.set_xticks(index)
|
| 443 |
+
ax1.set_xticklabels(house_ids_str, rotation=45, ha="right")
|
| 444 |
+
|
| 445 |
+
# Second y-axis for grid import reduction
|
| 446 |
+
ax2 = ax1.twinx()
|
| 447 |
+
color2 = 'tab:blue'
|
| 448 |
+
ax2.set_ylabel('Total Grid Import Reduction (kWh)', color=color2)
|
| 449 |
+
ax2.bar(index + bar_width/2, house_level_df["import_reduction"], bar_width, label='Import Reduction', color=color2)
|
| 450 |
+
ax2.tick_params(axis='y', labelcolor=color2)
|
| 451 |
+
|
| 452 |
+
plt.title(f'Total Cost Savings & Grid Import Reduction Per House (over {days_to_evaluate} days)')
|
| 453 |
+
fig.tight_layout()
|
| 454 |
+
plt.savefig(os.path.join(plots_dir, "per_house_summary.png"))
|
| 455 |
+
plt.close()
|
| 456 |
+
|
| 457 |
+
# Price Dynamics for a Single Day
|
| 458 |
+
day1_prices = all_days_df[all_days_df['day'] == 1][['step', 'grid_price', 'peer_price']].drop_duplicates()
|
| 459 |
+
plt.figure(figsize=(12, 6))
|
| 460 |
+
plt.plot(day1_prices['step'], day1_prices['grid_price'], label='Grid Price', color='darkorange')
|
| 461 |
+
plt.plot(day1_prices['step'], day1_prices['peer_price'], label='P2P Price', color='teal')
|
| 462 |
+
plt.xlabel("Timestep of Day")
|
| 463 |
+
plt.ylabel("Price ($/kWh)")
|
| 464 |
+
plt.title("Price Dynamics on Day 1")
|
| 465 |
+
plt.legend()
|
| 466 |
+
plt.grid(True, linestyle='--', alpha=0.6)
|
| 467 |
+
plt.savefig(os.path.join(plots_dir, "price_dynamics_day1.png"))
|
| 468 |
+
plt.close()
|
| 469 |
+
|
| 470 |
+
# Battery State of Charge for Sample Houses
|
| 471 |
+
day1_df = all_days_df[all_days_df['day'] == 1]
|
| 472 |
+
battery_houses = day1_df.dropna(subset=['soc'])['house'].unique()
|
| 473 |
+
|
| 474 |
+
if len(battery_houses) > 0:
|
| 475 |
+
sample_houses = battery_houses[:min(4, len(battery_houses))]
|
| 476 |
+
plt.figure(figsize=(12, 6))
|
| 477 |
+
for house in sample_houses:
|
| 478 |
+
house_df = day1_df[day1_df['house'] == house]
|
| 479 |
+
plt.plot(house_df['step'], house_df['soc'] * 100, label=f'House {house}')
|
| 480 |
+
|
| 481 |
+
plt.xlabel("Timestep of Day")
|
| 482 |
+
plt.ylabel("State of Charge (%)")
|
| 483 |
+
plt.title("Battery SoC on Day 1 for Sample Houses")
|
| 484 |
+
plt.legend()
|
| 485 |
+
plt.grid(True, linestyle='--', alpha=0.6)
|
| 486 |
+
plt.savefig(os.path.join(plots_dir, "soc_dynamics_day1.png"))
|
| 487 |
+
plt.close()
|
| 488 |
+
|
| 489 |
+
print("All plots have been generated and saved. Evaluation complete.")
|
| 490 |
+
|
| 491 |
+
if __name__ == "__main__":
|
| 492 |
+
main()
|
Other_algorithms/Flat_System/meanfield/meanfield_train.py
ADDED
|
@@ -0,0 +1,386 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import sys
|
| 3 |
+
import re
|
| 4 |
+
import numpy as np
|
| 5 |
+
import torch
|
| 6 |
+
import matplotlib.pyplot as plt
|
| 7 |
+
import pandas as pd
|
| 8 |
+
import time
|
| 9 |
+
from datetime import datetime
|
| 10 |
+
|
| 11 |
+
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
|
| 12 |
+
|
| 13 |
+
from solar_sys_environment import SolarSys
|
| 14 |
+
from meanfield.trainer.mfac import MeanField
|
| 15 |
+
|
| 16 |
+
def main():
|
| 17 |
+
|
| 18 |
+
STATE_TO_RUN = "pennsylvania" # "pennsylvania" or "colorado" or "oklahoma"
|
| 19 |
+
|
| 20 |
+
# Set the path to your training data
|
| 21 |
+
DATA_FILE_PATH = "/path/to/project/training/100houses_152days_TRAIN.csv"
|
| 22 |
+
num_episodes = 10000
|
| 23 |
+
batch_size = 256
|
| 24 |
+
checkpoint_interval = 100000
|
| 25 |
+
window_size = 32
|
| 26 |
+
|
| 27 |
+
env = SolarSys(
|
| 28 |
+
data_path=DATA_FILE_PATH,
|
| 29 |
+
state=STATE_TO_RUN,
|
| 30 |
+
time_freq="3H"
|
| 31 |
+
)
|
| 32 |
+
|
| 33 |
+
# Sanity check: env I/O shapes
|
| 34 |
+
print("Observation space:", env.observation_space)
|
| 35 |
+
print("Action space :", env.action_space)
|
| 36 |
+
|
| 37 |
+
# Reset and inspect obs
|
| 38 |
+
obs = env.reset()
|
| 39 |
+
print(f"Reset returned {len(obs)} agent observations; each obs shape: {np.array(obs).shape}")
|
| 40 |
+
|
| 41 |
+
# Sample random actions and do one step
|
| 42 |
+
dummy_actions = np.random.rand(env.num_agents, env.action_space.shape[1]).astype(np.float32)
|
| 43 |
+
next_obs, rewards, done, info = env.step(dummy_actions)
|
| 44 |
+
print(f"Step outputs → next_obs: {len(next_obs)}×{np.array(next_obs).shape[1]}, "
|
| 45 |
+
f"rewards: {len(rewards)}, done: {done}")
|
| 46 |
+
print("Info keys:", list(info.keys()))
|
| 47 |
+
|
| 48 |
+
# Count the number of houses in each group
|
| 49 |
+
env.group_counts = {
|
| 50 |
+
0: env.agent_groups.count(0),
|
| 51 |
+
1: env.agent_groups.count(1)
|
| 52 |
+
}
|
| 53 |
+
print(f"Number of houses in each group: {env.group_counts}")
|
| 54 |
+
|
| 55 |
+
max_steps = env.num_steps
|
| 56 |
+
|
| 57 |
+
# Dims from the env
|
| 58 |
+
num_agents = env.num_agents
|
| 59 |
+
local_state_dim = env.observation_space.shape[1]
|
| 60 |
+
action_dim = env.action_space.shape[1]
|
| 61 |
+
|
| 62 |
+
# Build a unique run directory
|
| 63 |
+
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
| 64 |
+
run_name = f"meanfield_{STATE_TO_RUN}_{num_agents}agents_{num_episodes}eps_{timestamp}"
|
| 65 |
+
root_dir = os.path.join("Training_for_granularity", run_name)
|
| 66 |
+
os.makedirs(root_dir, exist_ok=True)
|
| 67 |
+
print(f"Saving training outputs to: {root_dir}")
|
| 68 |
+
|
| 69 |
+
logs_dir = os.path.join(root_dir, "logs")
|
| 70 |
+
plots_dir = os.path.join(root_dir, "plots")
|
| 71 |
+
os.makedirs(logs_dir, exist_ok=True)
|
| 72 |
+
os.makedirs(plots_dir, exist_ok=True)
|
| 73 |
+
|
| 74 |
+
# Create the MeanField agent
|
| 75 |
+
meanfield = MeanField(
|
| 76 |
+
n_agents=num_agents,
|
| 77 |
+
local_dim=local_state_dim,
|
| 78 |
+
global_dim=num_agents * local_state_dim,
|
| 79 |
+
act_dim=action_dim,
|
| 80 |
+
lr=2e-4,
|
| 81 |
+
gamma=0.95,
|
| 82 |
+
lam=0.95,
|
| 83 |
+
clip_eps=0.2,
|
| 84 |
+
k_epochs=4,
|
| 85 |
+
batch_size=batch_size
|
| 86 |
+
)
|
| 87 |
+
|
| 88 |
+
# Tracking / Logging Variables
|
| 89 |
+
episode_rewards = []
|
| 90 |
+
episode_total_rewards = []
|
| 91 |
+
block_mean_rewards = []
|
| 92 |
+
block_total_rewards = []
|
| 93 |
+
|
| 94 |
+
agent_rewards_log = [[] for _ in range(num_agents)]
|
| 95 |
+
best_mean_reward = -1e9
|
| 96 |
+
best_model_path = os.path.join(logs_dir, "best_model.pth")
|
| 97 |
+
|
| 98 |
+
daily_rewards = []
|
| 99 |
+
monthly_rewards = []
|
| 100 |
+
|
| 101 |
+
training_start_time = time.time()
|
| 102 |
+
episode_durations = []
|
| 103 |
+
total_steps_global = 0
|
| 104 |
+
episode_log_data = []
|
| 105 |
+
performance_metrics_log = []
|
| 106 |
+
|
| 107 |
+
agent_charge_log = [[] for _ in range(num_agents)]
|
| 108 |
+
agent_discharge_log = [[] for _ in range(num_agents)]
|
| 109 |
+
|
| 110 |
+
# Training Loop
|
| 111 |
+
for episode in range(1, num_episodes + 1):
|
| 112 |
+
episode_start_time = time.time()
|
| 113 |
+
|
| 114 |
+
obs = np.array(env.reset(), dtype=np.float32)
|
| 115 |
+
|
| 116 |
+
# Collect metrics from the previous episode
|
| 117 |
+
if episode > 1:
|
| 118 |
+
last_episode_metrics = env.get_episode_metrics()
|
| 119 |
+
last_episode_metrics['Episode'] = episode - 1
|
| 120 |
+
performance_metrics_log.append(last_episode_metrics)
|
| 121 |
+
|
| 122 |
+
total_reward = np.zeros(num_agents, dtype=np.float32)
|
| 123 |
+
done = False
|
| 124 |
+
step_count = 0
|
| 125 |
+
day_logs = []
|
| 126 |
+
episode_charges = [[] for _ in range(num_agents)]
|
| 127 |
+
episode_discharges = [[] for _ in range(num_agents)]
|
| 128 |
+
|
| 129 |
+
while not done:
|
| 130 |
+
# Build global state and pick actions
|
| 131 |
+
global_obs = obs.flatten()
|
| 132 |
+
actions, logps = meanfield.select_action(obs, global_obs)
|
| 133 |
+
|
| 134 |
+
# Step environment
|
| 135 |
+
next_obs_list, rewards, done, info = env.step(actions)
|
| 136 |
+
|
| 137 |
+
# Convert next observations to NumPy array
|
| 138 |
+
next_obs = np.array(next_obs_list, dtype=np.float32)
|
| 139 |
+
next_global_obs = next_obs.flatten()
|
| 140 |
+
|
| 141 |
+
# Store transition
|
| 142 |
+
local_obs_arr = np.array(obs, dtype=np.float32)
|
| 143 |
+
|
| 144 |
+
meanfield.store(
|
| 145 |
+
local_obs_arr,
|
| 146 |
+
global_obs,
|
| 147 |
+
actions,
|
| 148 |
+
logps,
|
| 149 |
+
rewards,
|
| 150 |
+
done,
|
| 151 |
+
next_global_obs
|
| 152 |
+
)
|
| 153 |
+
total_reward += rewards
|
| 154 |
+
obs = next_obs
|
| 155 |
+
step_count += 1
|
| 156 |
+
total_steps_global += 1
|
| 157 |
+
|
| 158 |
+
day_logs.append({
|
| 159 |
+
"step": step_count - 1,
|
| 160 |
+
"grid_import_no_p2p": info["grid_import_no_p2p"],
|
| 161 |
+
"grid_import_with_p2p": info["grid_import_with_p2p"],
|
| 162 |
+
"p2p_buy": info["p2p_buy"],
|
| 163 |
+
"p2p_sell": info["p2p_sell"],
|
| 164 |
+
"costs": info["costs"],
|
| 165 |
+
"charge_amount": info.get("charge_amount", np.zeros(num_agents)),
|
| 166 |
+
"discharge_amount": info.get("discharge_amount", np.zeros(num_agents))
|
| 167 |
+
})
|
| 168 |
+
|
| 169 |
+
if step_count >= max_steps:
|
| 170 |
+
break
|
| 171 |
+
|
| 172 |
+
# After each episode
|
| 173 |
+
# Compute per-episode metrics
|
| 174 |
+
sum_ep_reward = float(np.sum(total_reward))
|
| 175 |
+
mean_ep_reward = float(np.mean(total_reward))
|
| 176 |
+
|
| 177 |
+
episode_total_rewards.append(sum_ep_reward)
|
| 178 |
+
episode_rewards.append(mean_ep_reward)
|
| 179 |
+
daily_rewards.append(mean_ep_reward)
|
| 180 |
+
|
| 181 |
+
# If we just finished a block of window_size episodes, aggregate
|
| 182 |
+
if len(daily_rewards) % window_size == 0:
|
| 183 |
+
last_totals = episode_total_rewards[-window_size:]
|
| 184 |
+
block_sum = sum(last_totals)
|
| 185 |
+
block_total_rewards.append(block_sum)
|
| 186 |
+
|
| 187 |
+
last_means = daily_rewards[-window_size:]
|
| 188 |
+
block_mean = sum(last_means) / window_size
|
| 189 |
+
block_mean_rewards.append(block_mean)
|
| 190 |
+
|
| 191 |
+
block_idx = len(block_mean_rewards)
|
| 192 |
+
print(
|
| 193 |
+
f"→ Completed Block {block_idx} "
|
| 194 |
+
f"| Episodes {(block_idx-1)*window_size + 1}–{block_idx*window_size} "
|
| 195 |
+
f"| Block Total Reward: {block_sum:.3f} "
|
| 196 |
+
f"| Block Mean Reward: {block_mean:.3f}"
|
| 197 |
+
)
|
| 198 |
+
|
| 199 |
+
# Log agent-level rewards
|
| 200 |
+
for i in range(num_agents):
|
| 201 |
+
agent_rewards_log[i].append(total_reward[i])
|
| 202 |
+
episode_charges[i].append(actions[i][4])
|
| 203 |
+
episode_discharges[i].append(actions[i][5])
|
| 204 |
+
|
| 205 |
+
# Summarize P2P steps
|
| 206 |
+
steps_data = []
|
| 207 |
+
for entry in day_logs:
|
| 208 |
+
step_idx = entry["step"]
|
| 209 |
+
p2p_buy_array = entry["p2p_buy"]
|
| 210 |
+
p2p_sell_array = entry["p2p_sell"]
|
| 211 |
+
grid_no_p2p_array = entry["grid_import_no_p2p"]
|
| 212 |
+
grid_with_p2p_array = entry["grid_import_with_p2p"]
|
| 213 |
+
|
| 214 |
+
steps_data.append({
|
| 215 |
+
"step": step_idx,
|
| 216 |
+
"p2p_buy_sum": float(np.sum(p2p_buy_array)),
|
| 217 |
+
"p2p_sell_sum": float(np.sum(p2p_sell_array)),
|
| 218 |
+
"grid_import_no_p2p_sum": float(np.sum(grid_no_p2p_array)),
|
| 219 |
+
"grid_import_with_p2p_sum": float(np.sum(grid_with_p2p_array))
|
| 220 |
+
})
|
| 221 |
+
|
| 222 |
+
baseline_cost = np.sum([np.sum(entry["grid_import_no_p2p"]) * env.get_grid_price(entry["step"])
|
| 223 |
+
for entry in day_logs])
|
| 224 |
+
actual_cost = np.sum([np.sum(entry["costs"]) for entry in day_logs])
|
| 225 |
+
cost_reduction = (baseline_cost - actual_cost) / baseline_cost
|
| 226 |
+
|
| 227 |
+
# Update the meanfield agent
|
| 228 |
+
meanfield.update()
|
| 229 |
+
|
| 230 |
+
# Save if best
|
| 231 |
+
if mean_ep_reward > best_mean_reward:
|
| 232 |
+
best_mean_reward = mean_ep_reward
|
| 233 |
+
meanfield.save(best_model_path)
|
| 234 |
+
|
| 235 |
+
if episode % checkpoint_interval == 0:
|
| 236 |
+
ckpt_path = os.path.join(logs_dir, f"checkpoint_{episode}.pth")
|
| 237 |
+
meanfield.save(ckpt_path)
|
| 238 |
+
|
| 239 |
+
episode_end_time = time.time()
|
| 240 |
+
episode_duration = episode_end_time - episode_start_time
|
| 241 |
+
|
| 242 |
+
print(
|
| 243 |
+
f"Episode {episode}/{num_episodes} "
|
| 244 |
+
f"| Time per Episode: {episode_duration:.2f}s "
|
| 245 |
+
f"| Steps: {step_count} "
|
| 246 |
+
f"| Mean Reward: {mean_ep_reward:.3f} "
|
| 247 |
+
f"| Cost Reduction: {cost_reduction:.2%}"
|
| 248 |
+
)
|
| 249 |
+
|
| 250 |
+
# Record data in per-episode log
|
| 251 |
+
episode_log_data.append({
|
| 252 |
+
"Episode": episode,
|
| 253 |
+
"Steps": step_count,
|
| 254 |
+
"Mean_Reward": mean_ep_reward,
|
| 255 |
+
"Total_Reward": sum_ep_reward,
|
| 256 |
+
"Cost_Reduction_Pct": cost_reduction * 100,
|
| 257 |
+
"Baseline_Cost": baseline_cost,
|
| 258 |
+
"Actual_Cost": actual_cost,
|
| 259 |
+
"Episode_Duration": episode_duration,
|
| 260 |
+
"Total_Charge": np.sum([np.sum(entry["charge_amount"]) for entry in day_logs]),
|
| 261 |
+
"Total_Discharge": np.sum([np.sum(entry["discharge_amount"]) for entry in day_logs])
|
| 262 |
+
})
|
| 263 |
+
|
| 264 |
+
for i in range(num_agents):
|
| 265 |
+
agent_charge_log[i].append(np.mean(episode_charges[i]))
|
| 266 |
+
agent_discharge_log[i].append(np.mean(episode_discharges[i]))
|
| 267 |
+
|
| 268 |
+
# Capture the final episode's metrics
|
| 269 |
+
final_episode_metrics = env.get_episode_metrics()
|
| 270 |
+
final_episode_metrics['Episode'] = num_episodes
|
| 271 |
+
performance_metrics_log.append(final_episode_metrics)
|
| 272 |
+
|
| 273 |
+
# End of all training
|
| 274 |
+
training_end_time = time.time()
|
| 275 |
+
total_training_time = training_end_time - training_start_time
|
| 276 |
+
|
| 277 |
+
# Save out per-episode agent rewards + mean rewards
|
| 278 |
+
np.save(os.path.join(logs_dir, "agent_rewards.npy"), np.array(agent_rewards_log))
|
| 279 |
+
np.save(os.path.join(logs_dir, "mean_rewards.npy"), np.array(episode_rewards))
|
| 280 |
+
np.save(os.path.join(logs_dir, "total_rewards.npy"), np.array(episode_total_rewards))
|
| 281 |
+
|
| 282 |
+
# Create Final DataFrame for Logging and Plotting
|
| 283 |
+
df_rewards_log = pd.DataFrame(episode_log_data)
|
| 284 |
+
df_perf_log = pd.DataFrame(performance_metrics_log)
|
| 285 |
+
|
| 286 |
+
# Merge the two DataFrames on the 'Episode' column
|
| 287 |
+
df_final_log = pd.merge(df_rewards_log, df_perf_log.drop(columns=[
|
| 288 |
+
'degradation_cost_over_time',
|
| 289 |
+
'cost_savings_over_time',
|
| 290 |
+
'grid_reduction_over_time'
|
| 291 |
+
]), on="Episode")
|
| 292 |
+
|
| 293 |
+
# PLOTTING
|
| 294 |
+
os.makedirs(plots_dir, exist_ok=True)
|
| 295 |
+
|
| 296 |
+
# Helper: centered moving average
|
| 297 |
+
def moving_avg(series, window):
|
| 298 |
+
return pd.Series(series).rolling(window=window, center=True, min_periods=1).mean().to_numpy()
|
| 299 |
+
|
| 300 |
+
# Smoothing window (in episodes)
|
| 301 |
+
ma_window = 300
|
| 302 |
+
episodes = np.arange(1, num_episodes + 1)
|
| 303 |
+
|
| 304 |
+
# Mean Reward moving average
|
| 305 |
+
reward_ma = moving_avg(df_final_log["Mean_Reward"], ma_window)
|
| 306 |
+
plt.figure(figsize=(8, 5))
|
| 307 |
+
plt.plot(episodes, reward_ma, linewidth=2, label=f"Mean Reward MA (win={ma_window})")
|
| 308 |
+
plt.xlabel("Episode")
|
| 309 |
+
plt.ylabel("Mean Reward")
|
| 310 |
+
plt.title("meanfield: Mean Reward Moving Average")
|
| 311 |
+
plt.legend()
|
| 312 |
+
plt.grid(True)
|
| 313 |
+
plt.savefig(os.path.join(plots_dir, "mean_reward_ma.png"), dpi=200)
|
| 314 |
+
plt.close()
|
| 315 |
+
|
| 316 |
+
# Total Reward moving average
|
| 317 |
+
total_ma = moving_avg(df_final_log["Total_Reward"], ma_window)
|
| 318 |
+
plt.figure(figsize=(8, 5))
|
| 319 |
+
plt.plot(episodes, total_ma, linewidth=2, label=f"Total Reward MA (win={ma_window})")
|
| 320 |
+
plt.xlabel("Episode")
|
| 321 |
+
plt.ylabel("Total Reward")
|
| 322 |
+
plt.title("meanfield: Total Reward Moving Average")
|
| 323 |
+
plt.legend()
|
| 324 |
+
plt.grid(True)
|
| 325 |
+
plt.savefig(os.path.join(plots_dir, "total_reward_ma.png"), dpi=200)
|
| 326 |
+
plt.close()
|
| 327 |
+
|
| 328 |
+
# Cost Reduction (%) moving average
|
| 329 |
+
cost_ma = moving_avg(df_final_log["Cost_Reduction_Pct"], ma_window)
|
| 330 |
+
plt.figure(figsize=(8, 5))
|
| 331 |
+
plt.plot(episodes, cost_ma, linewidth=2, label="Cost Reduction MA (%)")
|
| 332 |
+
plt.xlabel("Episode")
|
| 333 |
+
plt.ylabel("Cost Reduction (%)")
|
| 334 |
+
plt.title("meanfield: Cost Reduction Moving Average")
|
| 335 |
+
plt.legend()
|
| 336 |
+
plt.grid(True)
|
| 337 |
+
plt.savefig(os.path.join(plots_dir, "cost_reduction_ma.png"), dpi=200)
|
| 338 |
+
plt.close()
|
| 339 |
+
|
| 340 |
+
# Battery Degradation Cost moving average
|
| 341 |
+
degradation_ma = moving_avg(df_final_log["battery_degradation_cost_total"], ma_window)
|
| 342 |
+
plt.figure(figsize=(8, 5))
|
| 343 |
+
plt.plot(episodes, degradation_ma, linewidth=2, label=f"Degradation Cost MA (win={ma_window})", color='purple')
|
| 344 |
+
plt.xlabel("Episode")
|
| 345 |
+
plt.ylabel("Total Degradation Cost ($)")
|
| 346 |
+
plt.title("meanfield: Battery Degradation Cost Moving Average")
|
| 347 |
+
plt.legend()
|
| 348 |
+
plt.grid(True)
|
| 349 |
+
plt.savefig(os.path.join(plots_dir, "degradation_cost_ma.png"), dpi=200)
|
| 350 |
+
plt.close()
|
| 351 |
+
|
| 352 |
+
print(f"\nAll moving-average plots saved to: {plots_dir}")
|
| 353 |
+
|
| 354 |
+
# Save Final Logs to CSV
|
| 355 |
+
total_time_row = pd.DataFrame([{
|
| 356 |
+
"Episode": "Total_Training_Time",
|
| 357 |
+
"Episode_Duration": total_training_time
|
| 358 |
+
}])
|
| 359 |
+
df_to_save = pd.concat([df_final_log, total_time_row], ignore_index=True)
|
| 360 |
+
|
| 361 |
+
log_csv_path = os.path.join(logs_dir, "training_performance_log.csv")
|
| 362 |
+
|
| 363 |
+
# Select and reorder columns for the final CSV
|
| 364 |
+
columns_to_save = [
|
| 365 |
+
"Episode",
|
| 366 |
+
"Mean_Reward",
|
| 367 |
+
"Total_Reward",
|
| 368 |
+
"Cost_Reduction_Pct",
|
| 369 |
+
"Episode_Duration",
|
| 370 |
+
"battery_degradation_cost_total",
|
| 371 |
+
]
|
| 372 |
+
df_to_save = df_to_save[columns_to_save]
|
| 373 |
+
|
| 374 |
+
df_to_save.to_csv(log_csv_path, index=False)
|
| 375 |
+
|
| 376 |
+
print(f"Saved comprehensive training performance log to: {log_csv_path}")
|
| 377 |
+
|
| 378 |
+
# Final Timings Printout
|
| 379 |
+
print("\n" + "="*50)
|
| 380 |
+
print("TRAINING COMPLETE".center(50))
|
| 381 |
+
print(f"Total training time: {total_training_time:.2f} seconds")
|
| 382 |
+
print("="*50)
|
| 383 |
+
|
| 384 |
+
|
| 385 |
+
if __name__ == "__main__":
|
| 386 |
+
main()
|
Other_algorithms/Flat_System/meanfield/trainer/__init__.py
ADDED
|
File without changes
|
Other_algorithms/Flat_System/meanfield/trainer/mfac.py
ADDED
|
@@ -0,0 +1,219 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# meanfield.py
|
| 2 |
+
import torch
|
| 3 |
+
import torch.nn as nn
|
| 4 |
+
import random
|
| 5 |
+
import numpy as np
|
| 6 |
+
from torch.distributions import Normal
|
| 7 |
+
from torch.amp import autocast
|
| 8 |
+
from torch.cuda.amp import GradScaler
|
| 9 |
+
|
| 10 |
+
#device selection
|
| 11 |
+
if torch.cuda.is_available():
|
| 12 |
+
device = torch.device("cuda")
|
| 13 |
+
print("Using CUDA (NVIDIA GPU)")
|
| 14 |
+
else:
|
| 15 |
+
device = torch.device("cpu")
|
| 16 |
+
print("Using CPU")
|
| 17 |
+
|
| 18 |
+
def set_global_seed(seed: int):
|
| 19 |
+
random.seed(seed)
|
| 20 |
+
np.random.seed(seed)
|
| 21 |
+
torch.manual_seed(seed)
|
| 22 |
+
|
| 23 |
+
if torch.cuda.is_available():
|
| 24 |
+
torch.cuda.manual_seed_all(seed)
|
| 25 |
+
torch.backends.cudnn.deterministic = False
|
| 26 |
+
torch.backends.cudnn.benchmark = True
|
| 27 |
+
|
| 28 |
+
SEED = 42
|
| 29 |
+
set_global_seed(SEED)
|
| 30 |
+
|
| 31 |
+
class MLP(nn.Module):
|
| 32 |
+
def __init__(self, input_dim, hidden_dims, output_dim):
|
| 33 |
+
super().__init__()
|
| 34 |
+
layers = []
|
| 35 |
+
last_dim = input_dim
|
| 36 |
+
for h in hidden_dims:
|
| 37 |
+
layers += [nn.Linear(last_dim, h), nn.ReLU()]
|
| 38 |
+
last_dim = h
|
| 39 |
+
layers.append(nn.Linear(last_dim, output_dim))
|
| 40 |
+
self.net = nn.Sequential(*layers)
|
| 41 |
+
|
| 42 |
+
def forward(self, x):
|
| 43 |
+
return self.net(x)
|
| 44 |
+
|
| 45 |
+
class Actor(nn.Module):
|
| 46 |
+
def __init__(self, obs_dim, act_dim, hidden=(64,64)):
|
| 47 |
+
super().__init__()
|
| 48 |
+
self.net = MLP(obs_dim, hidden, act_dim)
|
| 49 |
+
self.log_std = nn.Parameter(torch.zeros(act_dim))
|
| 50 |
+
|
| 51 |
+
def forward(self, x):
|
| 52 |
+
mean = self.net(x)
|
| 53 |
+
std = torch.exp(self.log_std)
|
| 54 |
+
return mean, std
|
| 55 |
+
|
| 56 |
+
class Critic(nn.Module):
|
| 57 |
+
def __init__(self, state_dim, hidden=(128,128)):
|
| 58 |
+
super().__init__()
|
| 59 |
+
self.net = MLP(state_dim, hidden, 1)
|
| 60 |
+
|
| 61 |
+
def forward(self, x):
|
| 62 |
+
return self.net(x).squeeze(-1)
|
| 63 |
+
|
| 64 |
+
class MeanField:
|
| 65 |
+
def __init__(
|
| 66 |
+
self,
|
| 67 |
+
n_agents,
|
| 68 |
+
local_dim,
|
| 69 |
+
global_dim,
|
| 70 |
+
act_dim,
|
| 71 |
+
lr=3e-4,
|
| 72 |
+
gamma=0.99,
|
| 73 |
+
lam=0.95,
|
| 74 |
+
clip_eps=0.2,
|
| 75 |
+
k_epochs=10,
|
| 76 |
+
batch_size=1024,
|
| 77 |
+
episode_len=96
|
| 78 |
+
):
|
| 79 |
+
self.n_agents = n_agents
|
| 80 |
+
self.local_dim = local_dim
|
| 81 |
+
self.global_dim = global_dim
|
| 82 |
+
self.act_dim = act_dim
|
| 83 |
+
self.gamma = gamma
|
| 84 |
+
self.lam = lam
|
| 85 |
+
self.clip_eps = clip_eps
|
| 86 |
+
self.k_epochs = k_epochs
|
| 87 |
+
self.batch_size = batch_size
|
| 88 |
+
self.episode_len = episode_len
|
| 89 |
+
|
| 90 |
+
self.actor = Actor(local_dim + global_dim, act_dim).to(device)
|
| 91 |
+
self.critic = Critic(global_dim).to(device)
|
| 92 |
+
|
| 93 |
+
self.opt_a = torch.optim.Adam(self.actor.parameters(), lr=lr)
|
| 94 |
+
self.opt_c = torch.optim.Adam(self.critic.parameters(), lr=lr)
|
| 95 |
+
|
| 96 |
+
print("MeanField CUDA AMP is disabled for stability.")
|
| 97 |
+
|
| 98 |
+
self.init_buffer()
|
| 99 |
+
|
| 100 |
+
def init_buffer(self):
|
| 101 |
+
self.ls_buf = np.zeros((self.episode_len, self.n_agents, self.local_dim), dtype=np.float32)
|
| 102 |
+
self.gs_buf = np.zeros((self.episode_len, self.global_dim), dtype=np.float32)
|
| 103 |
+
self.ac_buf = np.zeros((self.episode_len, self.n_agents, self.act_dim), dtype=np.float32)
|
| 104 |
+
self.lp_buf = np.zeros((self.episode_len, self.n_agents), dtype=np.float32)
|
| 105 |
+
self.rw_buf = np.zeros((self.episode_len, self.n_agents), dtype=np.float32)
|
| 106 |
+
self.done_buf = np.zeros((self.episode_len, self.n_agents), dtype=np.float32)
|
| 107 |
+
self.next_gs_buf = np.zeros((self.episode_len, self.global_dim), dtype=np.float32)
|
| 108 |
+
self.step_idx = 0
|
| 109 |
+
|
| 110 |
+
@torch.no_grad()
|
| 111 |
+
def select_action(self, local_obs, global_obs):
|
| 112 |
+
l = torch.from_numpy(local_obs).float().to(device)
|
| 113 |
+
g = torch.from_numpy(global_obs).float().to(device).unsqueeze(0).expand(self.n_agents, -1)
|
| 114 |
+
input_x = torch.cat([l, g], dim=-1)
|
| 115 |
+
mean, std = self.actor(input_x)
|
| 116 |
+
dist = Normal(mean, std)
|
| 117 |
+
a = dist.sample()
|
| 118 |
+
return a.cpu().numpy(), dist.log_prob(a).sum(-1).cpu().numpy()
|
| 119 |
+
|
| 120 |
+
def store(self, local_obs, global_obs, action, logp, reward, done, next_global_obs):
|
| 121 |
+
if self.step_idx < self.episode_len:
|
| 122 |
+
self.ls_buf[self.step_idx] = local_obs
|
| 123 |
+
self.gs_buf[self.step_idx] = global_obs
|
| 124 |
+
self.ac_buf[self.step_idx] = action
|
| 125 |
+
self.lp_buf[self.step_idx] = logp
|
| 126 |
+
self.rw_buf[self.step_idx] = reward
|
| 127 |
+
self.done_buf[self.step_idx] = done
|
| 128 |
+
self.next_gs_buf[self.step_idx] = next_global_obs
|
| 129 |
+
self.step_idx += 1
|
| 130 |
+
|
| 131 |
+
def compute_gae(self, T, vals):
|
| 132 |
+
"""
|
| 133 |
+
Computes Generalized Advantage Estimation (GAE).
|
| 134 |
+
"""
|
| 135 |
+
N = self.n_agents
|
| 136 |
+
adv_buf = np.zeros_like(self.rw_buf[:T])
|
| 137 |
+
|
| 138 |
+
|
| 139 |
+
if not self.done_buf[T-1].all():
|
| 140 |
+
with torch.no_grad():
|
| 141 |
+
v_last = self.critic(
|
| 142 |
+
torch.from_numpy(self.next_gs_buf[T-1]).float().to(device)
|
| 143 |
+
).cpu().numpy()
|
| 144 |
+
else:
|
| 145 |
+
v_last = 0.0
|
| 146 |
+
vals_agent = vals.unsqueeze(1).expand(-1, N).cpu().numpy()
|
| 147 |
+
rewards = self.rw_buf[:T]
|
| 148 |
+
masks = 1.0 - self.done_buf[:T]
|
| 149 |
+
gae = 0
|
| 150 |
+
for t in reversed(range(T)):
|
| 151 |
+
v_next = vals_agent[t+1] if t < T - 1 else v_last
|
| 152 |
+
delta = rewards[t] + self.gamma * v_next * masks[t] - vals_agent[t]
|
| 153 |
+
adv_buf[t] = gae = delta + self.gamma * self.lam * masks[t] * gae
|
| 154 |
+
ret_buf = adv_buf + vals_agent
|
| 155 |
+
adv_flat = torch.from_numpy(adv_buf.flatten()).float().to(device)
|
| 156 |
+
ret_flat = torch.from_numpy(ret_buf.flatten()).float().to(device)
|
| 157 |
+
return adv_flat, ret_flat
|
| 158 |
+
|
| 159 |
+
def update(self):
|
| 160 |
+
T = self.step_idx
|
| 161 |
+
if T == 0: return
|
| 162 |
+
|
| 163 |
+
gs_tensor = torch.from_numpy(self.gs_buf[:T]).float().to(device)
|
| 164 |
+
ls_tensor = torch.from_numpy(self.ls_buf[:T]).float().to(device).view(T * self.n_agents, -1)
|
| 165 |
+
ac_tensor = torch.from_numpy(self.ac_buf[:T]).float().to(device).view(T * self.n_agents, -1)
|
| 166 |
+
lp_tensor = torch.from_numpy(self.lp_buf[:T]).float().to(device).view(-1)
|
| 167 |
+
|
| 168 |
+
with torch.no_grad():
|
| 169 |
+
vals = self.critic(gs_tensor)
|
| 170 |
+
|
| 171 |
+
adv_flat, ret_flat = self.compute_gae(T, vals)
|
| 172 |
+
adv_flat = (adv_flat - adv_flat.mean()) / (adv_flat.std() + 1e-8)
|
| 173 |
+
|
| 174 |
+
gs_for_batch = gs_tensor.unsqueeze(1).expand(-1, self.n_agents, -1).reshape(T * self.n_agents, self.global_dim)
|
| 175 |
+
|
| 176 |
+
dataset = torch.utils.data.TensorDataset(ls_tensor, gs_for_batch, ac_tensor, lp_tensor, adv_flat, ret_flat)
|
| 177 |
+
gen = torch.Generator()
|
| 178 |
+
gen.manual_seed(SEED)
|
| 179 |
+
loader = torch.utils.data.DataLoader(dataset, batch_size=self.batch_size, shuffle=True, generator=gen)
|
| 180 |
+
|
| 181 |
+
for _ in range(self.k_epochs):
|
| 182 |
+
for b_ls, b_gs, b_ac, b_lp, b_adv, b_ret in loader:
|
| 183 |
+
input_a = torch.cat([b_ls, b_gs], dim=-1)
|
| 184 |
+
mean, std = self.actor(input_a)
|
| 185 |
+
dist = Normal(mean, std)
|
| 186 |
+
|
| 187 |
+
entropy = dist.entropy().mean()
|
| 188 |
+
|
| 189 |
+
lp_new = dist.log_prob(b_ac).sum(-1)
|
| 190 |
+
ratio = torch.exp(lp_new - b_lp)
|
| 191 |
+
surr1 = ratio * b_adv
|
| 192 |
+
surr2 = torch.clamp(ratio, 1 - self.clip_eps, 1 + self.clip_eps) * b_adv
|
| 193 |
+
|
| 194 |
+
actor_loss = -torch.min(surr1, surr2).mean() - 0.01 * entropy
|
| 195 |
+
|
| 196 |
+
self.opt_a.zero_grad()
|
| 197 |
+
actor_loss.backward()
|
| 198 |
+
nn.utils.clip_grad_norm_(self.actor.parameters(), max_norm=0.5)
|
| 199 |
+
self.opt_a.step()
|
| 200 |
+
|
| 201 |
+
|
| 202 |
+
val_pred = self.critic(b_gs)
|
| 203 |
+
critic_loss = nn.MSELoss()(val_pred, b_ret)
|
| 204 |
+
|
| 205 |
+
self.opt_c.zero_grad()
|
| 206 |
+
critic_loss.backward()
|
| 207 |
+
nn.utils.clip_grad_norm_(self.critic.parameters(), max_norm=0.5)
|
| 208 |
+
self.opt_c.step()
|
| 209 |
+
|
| 210 |
+
self.step_idx = 0
|
| 211 |
+
|
| 212 |
+
def save(self, path):
|
| 213 |
+
torch.save({'actor': self.actor.state_dict(),
|
| 214 |
+
'critic': self.critic.state_dict()}, path)
|
| 215 |
+
|
| 216 |
+
def load(self, path):
|
| 217 |
+
data = torch.load(path, map_location=device)
|
| 218 |
+
self.actor.load_state_dict(data['actor'])
|
| 219 |
+
self.critic.load_state_dict(data['critic'])
|
Other_algorithms/Flat_System/solar_sys_environment.py
ADDED
|
@@ -0,0 +1,635 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import gym
|
| 2 |
+
import pandas as pd
|
| 3 |
+
import numpy as np
|
| 4 |
+
from collections import deque
|
| 5 |
+
import random
|
| 6 |
+
random.seed(42)
|
| 7 |
+
np.random.seed(42)
|
| 8 |
+
|
| 9 |
+
class SolarSys(gym.Env):
|
| 10 |
+
|
| 11 |
+
def __init__(
|
| 12 |
+
self,
|
| 13 |
+
data_path="/path/to/project/training/200houses_152days_TRAIN.csv",
|
| 14 |
+
state="oklahoma", # for Oklahoma (example)
|
| 15 |
+
time_freq="15T", # "15T", "30T", "1H", "3H", "6H"
|
| 16 |
+
):
|
| 17 |
+
|
| 18 |
+
super().__init__()
|
| 19 |
+
# Store config
|
| 20 |
+
self.data_path = data_path
|
| 21 |
+
self.time_freq = time_freq
|
| 22 |
+
self.state = state.lower()
|
| 23 |
+
|
| 24 |
+
# Centralized Pricing Configuration
|
| 25 |
+
self._pricing_info = {
|
| 26 |
+
"oklahoma": {
|
| 27 |
+
"max_grid_price": 0.2112,
|
| 28 |
+
"feed_in_tariff": 0.04,
|
| 29 |
+
"price_function": self._get_oklahoma_price
|
| 30 |
+
},
|
| 31 |
+
"colorado": {
|
| 32 |
+
"max_grid_price": 0.32,
|
| 33 |
+
"feed_in_tariff": 0.055,
|
| 34 |
+
"price_function": self._get_colorado_price
|
| 35 |
+
},
|
| 36 |
+
"pennsylvania": {
|
| 37 |
+
"max_grid_price": 0.12505,
|
| 38 |
+
"feed_in_tariff": 0.06,
|
| 39 |
+
"price_function": self._get_pennsylvania_price
|
| 40 |
+
}
|
| 41 |
+
}
|
| 42 |
+
|
| 43 |
+
if self.state not in self._pricing_info:
|
| 44 |
+
raise ValueError(f"State '{self.state}' is not supported. Available states: {list(self._pricing_info.keys())}")
|
| 45 |
+
|
| 46 |
+
state_config = self._pricing_info[self.state]
|
| 47 |
+
self.max_grid_price = state_config["max_grid_price"]
|
| 48 |
+
self.feed_in_tariff = state_config["feed_in_tariff"]
|
| 49 |
+
self._get_price_function = state_config["price_function"]
|
| 50 |
+
|
| 51 |
+
try:
|
| 52 |
+
all_data = pd.read_csv(data_path)
|
| 53 |
+
all_data["local_15min"] = pd.to_datetime(all_data["local_15min"], utc=True)
|
| 54 |
+
all_data.set_index("local_15min", inplace=True)
|
| 55 |
+
all_data = all_data.resample(time_freq).mean()
|
| 56 |
+
|
| 57 |
+
except FileNotFoundError:
|
| 58 |
+
raise FileNotFoundError(f"Data file {data_path} not found.")
|
| 59 |
+
except pd.errors.EmptyDataError:
|
| 60 |
+
raise ValueError(f"Data file {data_path} is empty.")
|
| 61 |
+
except Exception as e:
|
| 62 |
+
raise ValueError(f"Error loading data: {e}")
|
| 63 |
+
|
| 64 |
+
# Compute global maxima for normalization
|
| 65 |
+
grid_cols = [c for c in all_data.columns if c.startswith("grid_")]
|
| 66 |
+
solar_cols = [c for c in all_data.columns if c.startswith("total_solar_")]
|
| 67 |
+
all_grid = all_data[grid_cols].values
|
| 68 |
+
all_solar = all_data[solar_cols].values
|
| 69 |
+
|
| 70 |
+
# max total demand = max(grid + solar) over all time & agents
|
| 71 |
+
self.global_max_demand = float((all_grid + all_solar).max()) + 1e-8
|
| 72 |
+
|
| 73 |
+
# max solar generation alone
|
| 74 |
+
self.global_max_solar = float(all_solar.max()) + 1e-8
|
| 75 |
+
|
| 76 |
+
# Store the resampled dataset
|
| 77 |
+
self.all_data = all_data
|
| 78 |
+
|
| 79 |
+
self.time_freq = time_freq
|
| 80 |
+
freq_offset = pd.tseries.frequencies.to_offset(time_freq)
|
| 81 |
+
minutes_per_step = freq_offset.nanos / 1e9 / 60.0
|
| 82 |
+
self.steps_per_day = int(24 * 60 // minutes_per_step)
|
| 83 |
+
|
| 84 |
+
total_rows = len(self.all_data)
|
| 85 |
+
self.total_days = total_rows // self.steps_per_day
|
| 86 |
+
if self.total_days < 1:
|
| 87 |
+
raise ValueError(
|
| 88 |
+
f"After resampling, dataset has {total_rows} rows, which is "
|
| 89 |
+
f"less than a single day of {self.steps_per_day} steps."
|
| 90 |
+
)
|
| 91 |
+
|
| 92 |
+
self.house_ids = [
|
| 93 |
+
col.split("_")[1] for col in self.all_data.columns
|
| 94 |
+
if col.startswith("grid_")
|
| 95 |
+
]
|
| 96 |
+
self.num_agents = len(self.house_ids)
|
| 97 |
+
self.original_no_p2p_import = {}
|
| 98 |
+
for hid in self.house_ids:
|
| 99 |
+
col_grid = f"grid_{hid}"
|
| 100 |
+
self.original_no_p2p_import[hid] = self.all_data[col_grid].clip(lower=0.0).values
|
| 101 |
+
|
| 102 |
+
# Determine population groups
|
| 103 |
+
# group 1 = has any solar; group 0 = never solar
|
| 104 |
+
solar_cols = [f"total_solar_{hid}" for hid in self.house_ids]
|
| 105 |
+
solar_sums = self.all_data[solar_cols].sum(axis=0).to_dict()
|
| 106 |
+
self.agent_groups = [
|
| 107 |
+
1 if solar_sums[f"total_solar_{hid}"] > 0 else 0
|
| 108 |
+
for hid in self.house_ids
|
| 109 |
+
]
|
| 110 |
+
|
| 111 |
+
# Count the number of houses in each group
|
| 112 |
+
self.group_counts = {
|
| 113 |
+
0: self.agent_groups.count(0),
|
| 114 |
+
1: self.agent_groups.count(1)
|
| 115 |
+
}
|
| 116 |
+
print(f"Number of houses in each group: {self.group_counts}")
|
| 117 |
+
|
| 118 |
+
# Battery logic
|
| 119 |
+
self.battery_options = {
|
| 120 |
+
"teslapowerwall": {"max_capacity": 13.5, "charge_efficiency": 0.95, "discharge_efficiency": 0.90, "max_charge_rate": 5.0, "max_discharge_rate": 5.0, "degradation_cost_per_kwh": 0.005},
|
| 121 |
+
"enphase": {"max_capacity": 5.0, "charge_efficiency": 0.95, "discharge_efficiency": 0.90, "max_charge_rate": 2.0, "max_discharge_rate": 2.0, "degradation_cost_per_kwh": 0.005},
|
| 122 |
+
"franklin": {"max_capacity": 15.0, "charge_efficiency": 0.95, "discharge_efficiency": 0.90, "max_charge_rate": 6.0, "max_discharge_rate": 6.0, "degradation_cost_per_kwh": 0.005},
|
| 123 |
+
}
|
| 124 |
+
|
| 125 |
+
# Identify which houses actually have solar
|
| 126 |
+
self.solar_houses = [
|
| 127 |
+
hid for hid in self.house_ids
|
| 128 |
+
if (self.all_data[f"total_solar_{hid}"] > 0).any()
|
| 129 |
+
]
|
| 130 |
+
|
| 131 |
+
# Assign a random battery type to each solar-equipped house
|
| 132 |
+
self.batteries = {}
|
| 133 |
+
for hid in self.solar_houses:
|
| 134 |
+
choice = random.choice(list(self.battery_options))
|
| 135 |
+
specs = self.battery_options[choice]
|
| 136 |
+
self.batteries[hid] = {"soc": 0.0, **specs}
|
| 137 |
+
|
| 138 |
+
# Observation & Action Spaces
|
| 139 |
+
# [own_demand, own_solar, grid_price, peer_price,
|
| 140 |
+
# total_demand_others, total_solar_others, SOC, time_of_day]
|
| 141 |
+
self.observation_space = gym.spaces.Box(
|
| 142 |
+
low=-np.inf, high=np.inf,
|
| 143 |
+
shape=(self.num_agents, 8),
|
| 144 |
+
dtype=np.float32
|
| 145 |
+
)
|
| 146 |
+
|
| 147 |
+
# [sell_to_grid, buy_from_grid, sell_to_peers, buy_from_peers, charge_battery, discharge_battery]
|
| 148 |
+
self.action_space = gym.spaces.Box(
|
| 149 |
+
low=0.0,
|
| 150 |
+
high=1.0,
|
| 151 |
+
shape=(self.num_agents, 6),
|
| 152 |
+
dtype=np.float32
|
| 153 |
+
)
|
| 154 |
+
|
| 155 |
+
self.episode_metrics = {}
|
| 156 |
+
self._initialize_episode_metrics()
|
| 157 |
+
|
| 158 |
+
# Initialize episode variables
|
| 159 |
+
self.data = None
|
| 160 |
+
self.env_log = []
|
| 161 |
+
self.day_index = -1
|
| 162 |
+
self.current_step = 0
|
| 163 |
+
self.num_steps = self.steps_per_day
|
| 164 |
+
self.demands = {}
|
| 165 |
+
self.solars = {}
|
| 166 |
+
self.previous_actions = {
|
| 167 |
+
hid: np.zeros(6) for hid in self.house_ids
|
| 168 |
+
}
|
| 169 |
+
|
| 170 |
+
|
| 171 |
+
def _initialize_episode_metrics(self):
|
| 172 |
+
"""Initialize or reset all metrics tracked over a single episode."""
|
| 173 |
+
self.cumulative_grid_reduction = 0.0
|
| 174 |
+
self.cumulative_grid_reduction_peak = 0.0
|
| 175 |
+
self.cumulative_degradation_cost = 0.0
|
| 176 |
+
self.agent_cost_savings = np.zeros(self.num_agents)
|
| 177 |
+
self.degradation_cost_timeseries = []
|
| 178 |
+
self.cost_savings_timeseries = []
|
| 179 |
+
self.grid_reduction_timeseries = []
|
| 180 |
+
|
| 181 |
+
|
| 182 |
+
# Price Functions
|
| 183 |
+
def get_grid_price(self, step_idx):
|
| 184 |
+
"""Return grid price for the current step based on selected state."""
|
| 185 |
+
return self._get_price_function(step_idx)
|
| 186 |
+
|
| 187 |
+
|
| 188 |
+
def _get_oklahoma_price(self, step_idx):
|
| 189 |
+
# Oklahoma Gas & Electric (OG&E) TOU
|
| 190 |
+
minutes_per_step = 24 * 60 / self.steps_per_day
|
| 191 |
+
hour = int((step_idx * minutes_per_step) // 60) % 24
|
| 192 |
+
# Peak: 2 pm to 7 pm
|
| 193 |
+
if 14 <= hour < 19:
|
| 194 |
+
return 0.2112
|
| 195 |
+
# Off-peak: All other times
|
| 196 |
+
else:
|
| 197 |
+
return 0.0434
|
| 198 |
+
|
| 199 |
+
|
| 200 |
+
def _get_colorado_price(self, step_idx):
|
| 201 |
+
# Xcel Energy Colorado TOU
|
| 202 |
+
minutes_per_step = 24 * 60 / self.steps_per_day
|
| 203 |
+
hour = int((step_idx * minutes_per_step) // 60) % 24
|
| 204 |
+
# On-peak: 3 pm to 7 pm
|
| 205 |
+
if 15 <= hour < 19:
|
| 206 |
+
return 0.32
|
| 207 |
+
# Mid-peak: 1 pm to 3 pm
|
| 208 |
+
elif 13 <= hour < 15:
|
| 209 |
+
return 0.22
|
| 210 |
+
# Off-peak: Before 1 pm and after 7 pm
|
| 211 |
+
else:
|
| 212 |
+
return 0.12
|
| 213 |
+
|
| 214 |
+
|
| 215 |
+
def _get_pennsylvania_price(self, step_idx):
|
| 216 |
+
# Duquesne Light (Pennsylvania) EV TOU
|
| 217 |
+
minutes_per_step = 24 * 60 / self.steps_per_day
|
| 218 |
+
hour = int((step_idx * minutes_per_step) // 60) % 24
|
| 219 |
+
# Peak: 1 pm to 9 pm
|
| 220 |
+
if 13 <= hour < 21:
|
| 221 |
+
return 0.125048
|
| 222 |
+
# Super Off-Peak: 11 pm to 6 am
|
| 223 |
+
elif hour >= 23 or hour < 6:
|
| 224 |
+
return 0.057014
|
| 225 |
+
# Off-Peak: 6 am to 1 pm and 9 pm to 11 pm
|
| 226 |
+
else:
|
| 227 |
+
return 0.079085
|
| 228 |
+
|
| 229 |
+
|
| 230 |
+
def get_peer_price(self, step_idx, total_surplus, total_shortfall):
|
| 231 |
+
grid_price = self.get_grid_price(step_idx)
|
| 232 |
+
feed_in_tariff = self.feed_in_tariff
|
| 233 |
+
|
| 234 |
+
base_price = grid_price * 0.90
|
| 235 |
+
net_demand = total_shortfall - total_surplus
|
| 236 |
+
total_potential_trade = total_shortfall + total_surplus + 1e-6
|
| 237 |
+
elasticity_factor = 0.3
|
| 238 |
+
price_multiplier = np.exp(elasticity_factor * (net_demand / total_potential_trade))
|
| 239 |
+
peer_price = base_price * price_multiplier
|
| 240 |
+
final_price = float(np.clip(peer_price, feed_in_tariff, grid_price))
|
| 241 |
+
|
| 242 |
+
return final_price
|
| 243 |
+
|
| 244 |
+
|
| 245 |
+
def reset(self):
|
| 246 |
+
# Finalize and store metrics from completed episode before resetting
|
| 247 |
+
if self.current_step > 0:
|
| 248 |
+
positive_savings = self.agent_cost_savings[self.agent_cost_savings > 0]
|
| 249 |
+
if len(positive_savings) > 1:
|
| 250 |
+
fairness_on_savings = self._compute_jains_index(positive_savings)
|
| 251 |
+
else:
|
| 252 |
+
fairness_on_savings = 0.0
|
| 253 |
+
|
| 254 |
+
# Store all final metrics
|
| 255 |
+
self.episode_metrics = {
|
| 256 |
+
"grid_reduction_entire_day": self.cumulative_grid_reduction,
|
| 257 |
+
"grid_reduction_peak_hours": self.cumulative_grid_reduction_peak,
|
| 258 |
+
"total_cost_savings": np.sum(self.agent_cost_savings),
|
| 259 |
+
"fairness_on_cost_savings": fairness_on_savings,
|
| 260 |
+
"battery_degradation_cost_total": self.cumulative_degradation_cost,
|
| 261 |
+
"degradation_cost_over_time": self.degradation_cost_timeseries,
|
| 262 |
+
"cost_savings_over_time": self.cost_savings_timeseries,
|
| 263 |
+
"grid_reduction_over_time": self.grid_reduction_timeseries,
|
| 264 |
+
}
|
| 265 |
+
|
| 266 |
+
self.day_index = np.random.randint(0, self.total_days)
|
| 267 |
+
|
| 268 |
+
start_row = self.day_index * self.steps_per_day
|
| 269 |
+
end_row = start_row + self.steps_per_day
|
| 270 |
+
day_data = self.all_data.iloc[start_row:end_row].copy()
|
| 271 |
+
self.data = day_data
|
| 272 |
+
|
| 273 |
+
self.no_p2p_import_day = {}
|
| 274 |
+
for hid in self.house_ids:
|
| 275 |
+
self.no_p2p_import_day[hid] = self.original_no_p2p_import[hid][start_row:end_row]
|
| 276 |
+
|
| 277 |
+
self.demands = {}
|
| 278 |
+
self.solars = {}
|
| 279 |
+
|
| 280 |
+
for hid in self.house_ids:
|
| 281 |
+
col_grid = f"grid_{hid}"
|
| 282 |
+
col_solar = f"total_solar_{hid}"
|
| 283 |
+
|
| 284 |
+
grid_series = day_data[col_grid].fillna(0.0)
|
| 285 |
+
solar_series = day_data[col_solar].fillna(0.0).clip(lower=0.0)
|
| 286 |
+
|
| 287 |
+
demand_array = grid_series.values + solar_series.values
|
| 288 |
+
demand_array = np.clip(demand_array, 0.0, None)
|
| 289 |
+
|
| 290 |
+
self.demands[hid] = demand_array
|
| 291 |
+
self.solars[hid] = solar_series.values
|
| 292 |
+
|
| 293 |
+
self.current_step = 0
|
| 294 |
+
self.env_log = []
|
| 295 |
+
|
| 296 |
+
# Reset previous_actions to 6 zeros
|
| 297 |
+
for hid in self.house_ids:
|
| 298 |
+
self.previous_actions[hid] = np.zeros(6)
|
| 299 |
+
|
| 300 |
+
self._initialize_episode_metrics()
|
| 301 |
+
|
| 302 |
+
# Randomize battery SOC between 30%–70% of capacity
|
| 303 |
+
for hid, batt in self.batteries.items():
|
| 304 |
+
low = 0.30 * batt["max_capacity"]
|
| 305 |
+
high = 0.70 * batt["max_capacity"]
|
| 306 |
+
batt["soc"] = random.uniform(low, high)
|
| 307 |
+
|
| 308 |
+
obs = self._get_obs()
|
| 309 |
+
obs_list = [obs[i] for i in range(self.num_agents)]
|
| 310 |
+
return obs_list
|
| 311 |
+
|
| 312 |
+
|
| 313 |
+
def step(self, actions):
|
| 314 |
+
# Validate & clamp actions
|
| 315 |
+
actions = np.array(actions, dtype=np.float32)
|
| 316 |
+
if actions.shape != (self.num_agents, 6):
|
| 317 |
+
raise ValueError(f"Actions shape mismatch: got {actions.shape}, expected {(self.num_agents, 6)}")
|
| 318 |
+
actions = np.clip(actions, 0.0, 1.0)
|
| 319 |
+
|
| 320 |
+
a_sellGrid = actions[:, 0]
|
| 321 |
+
a_buyGrid = actions[:, 1]
|
| 322 |
+
a_sellPeers = actions[:, 2]
|
| 323 |
+
a_buyPeers = actions[:, 3]
|
| 324 |
+
a_chargeBatt = actions[:, 4]
|
| 325 |
+
a_dischargeBatt = actions[:, 5]
|
| 326 |
+
|
| 327 |
+
# Gather current demand & solar
|
| 328 |
+
demands = []
|
| 329 |
+
solars = []
|
| 330 |
+
for i, hid in enumerate(self.house_ids):
|
| 331 |
+
demands.append(self.demands[hid][self.current_step])
|
| 332 |
+
solars.append(self.solars[hid][self.current_step])
|
| 333 |
+
|
| 334 |
+
demands = np.array(demands, dtype=np.float32)
|
| 335 |
+
solars = np.array(solars, dtype=np.float32)
|
| 336 |
+
|
| 337 |
+
# Calculations for peer_price and grid_price
|
| 338 |
+
total_surplus = np.maximum(solars - demands, 0.0).sum()
|
| 339 |
+
total_shortfall = np.maximum(demands - solars, 0.0).sum()
|
| 340 |
+
peer_price = self.get_peer_price(self.current_step, total_surplus, total_shortfall)
|
| 341 |
+
grid_price = self.get_grid_price(self.current_step)
|
| 342 |
+
|
| 343 |
+
# Enforce "self-use first"
|
| 344 |
+
shortfall = np.maximum(demands - solars, 0.0)
|
| 345 |
+
surplus = np.maximum(solars - demands, 0.0)
|
| 346 |
+
|
| 347 |
+
final_shortfall = shortfall.copy()
|
| 348 |
+
final_surplus = surplus.copy()
|
| 349 |
+
grid_import = np.zeros(self.num_agents, dtype=np.float32)
|
| 350 |
+
grid_export = np.zeros(self.num_agents, dtype=np.float32)
|
| 351 |
+
|
| 352 |
+
# Battery discharge
|
| 353 |
+
discharge_amount = np.zeros(self.num_agents, dtype=np.float32)
|
| 354 |
+
for i, hid in enumerate(self.house_ids):
|
| 355 |
+
if hid in self.batteries:
|
| 356 |
+
batt = self.batteries[hid]
|
| 357 |
+
max_dis = batt["max_discharge_rate"]
|
| 358 |
+
available = batt["soc"] * batt["discharge_efficiency"]
|
| 359 |
+
desired = a_dischargeBatt[i] * max_dis
|
| 360 |
+
actual = min(desired, available, final_shortfall[i])
|
| 361 |
+
batt["soc"] -= actual / batt["discharge_efficiency"]
|
| 362 |
+
final_shortfall[i] -= actual
|
| 363 |
+
discharge_amount[i] = actual
|
| 364 |
+
|
| 365 |
+
# Battery charge
|
| 366 |
+
charge_amount = np.zeros(self.num_agents, dtype=np.float32)
|
| 367 |
+
for i, hid in enumerate(self.house_ids):
|
| 368 |
+
if hid in self.batteries:
|
| 369 |
+
batt = self.batteries[hid]
|
| 370 |
+
max_ch = batt["max_charge_rate"]
|
| 371 |
+
cap_left = batt["max_capacity"] - batt["soc"]
|
| 372 |
+
desired = a_chargeBatt[i] * max_ch
|
| 373 |
+
actual = min(desired, cap_left / batt["charge_efficiency"], final_surplus[i])
|
| 374 |
+
batt["soc"] += actual * batt["charge_efficiency"]
|
| 375 |
+
final_surplus[i] -= actual
|
| 376 |
+
charge_amount[i] = actual
|
| 377 |
+
|
| 378 |
+
# P2P matching
|
| 379 |
+
battery_offer = np.zeros(self.num_agents, dtype=np.float32)
|
| 380 |
+
for i, hid in enumerate(self.house_ids):
|
| 381 |
+
if hid in self.batteries:
|
| 382 |
+
battery_offer[i] = self.batteries[hid]["soc"] * self.batteries[hid]["discharge_efficiency"]
|
| 383 |
+
effective_surplus = final_surplus + battery_offer
|
| 384 |
+
|
| 385 |
+
netPeer = a_buyPeers - a_sellPeers
|
| 386 |
+
p2p_buy_request = np.zeros(self.num_agents, dtype=np.float32)
|
| 387 |
+
p2p_sell_offer = np.zeros(self.num_agents, dtype=np.float32)
|
| 388 |
+
for i in range(self.num_agents):
|
| 389 |
+
if netPeer[i] > 0:
|
| 390 |
+
p2p_buy_request[i] = netPeer[i] * final_shortfall[i]
|
| 391 |
+
elif netPeer[i] < 0:
|
| 392 |
+
p2p_sell_offer[i] = -netPeer[i] * effective_surplus[i]
|
| 393 |
+
|
| 394 |
+
total_sell = np.sum(p2p_sell_offer)
|
| 395 |
+
total_buy = np.sum(p2p_buy_request)
|
| 396 |
+
matched = min(total_sell, total_buy)
|
| 397 |
+
|
| 398 |
+
if matched > 1e-9:
|
| 399 |
+
sell_fraction = p2p_sell_offer / (total_sell + 1e-12)
|
| 400 |
+
buy_fraction = p2p_buy_request / (total_buy + 1e-12)
|
| 401 |
+
actual_sold = matched * sell_fraction
|
| 402 |
+
actual_bought = matched * buy_fraction
|
| 403 |
+
else:
|
| 404 |
+
actual_sold = np.zeros(self.num_agents, dtype=np.float32)
|
| 405 |
+
actual_bought = np.zeros(self.num_agents, dtype=np.float32)
|
| 406 |
+
|
| 407 |
+
from_batt_p2p = np.minimum(actual_sold, battery_offer)
|
| 408 |
+
from_solar_p2p = actual_sold - from_batt_p2p
|
| 409 |
+
|
| 410 |
+
# Update balances
|
| 411 |
+
final_surplus -= from_solar_p2p
|
| 412 |
+
final_shortfall -= actual_bought
|
| 413 |
+
|
| 414 |
+
# Deduct peer battery sales from SOC
|
| 415 |
+
for i, hid in enumerate(self.house_ids):
|
| 416 |
+
if hid in self.batteries:
|
| 417 |
+
from_batt = min(actual_sold[i], battery_offer[i])
|
| 418 |
+
self.batteries[hid]["soc"] -= from_batt / self.batteries[hid]["discharge_efficiency"]
|
| 419 |
+
self.batteries[hid]["soc"] = max(0.0, self.batteries[hid]["soc"])
|
| 420 |
+
|
| 421 |
+
# Grid trades
|
| 422 |
+
netGrid = a_buyGrid - a_sellGrid
|
| 423 |
+
for i in range(self.num_agents):
|
| 424 |
+
if netGrid[i] > 0:
|
| 425 |
+
grid_import[i] = netGrid[i] * final_shortfall[i]
|
| 426 |
+
elif netGrid[i] < 0:
|
| 427 |
+
grid_export[i] = -netGrid[i] * final_surplus[i]
|
| 428 |
+
forced = np.maximum(final_shortfall - grid_import, 0.0)
|
| 429 |
+
grid_import += forced
|
| 430 |
+
|
| 431 |
+
# Calculate costs
|
| 432 |
+
costs = (grid_import * grid_price) - (grid_export * self.feed_in_tariff) + \
|
| 433 |
+
(actual_bought * peer_price) - (actual_sold * peer_price)
|
| 434 |
+
|
| 435 |
+
# Calculate rewards
|
| 436 |
+
final_rewards = self._compute_rewards(
|
| 437 |
+
grid_import=grid_import, grid_export=grid_export,
|
| 438 |
+
actual_sold=actual_sold, actual_bought=actual_bought,
|
| 439 |
+
charge_amount=charge_amount, discharge_amount=discharge_amount,
|
| 440 |
+
costs=costs, grid_price=grid_price, peer_price=peer_price
|
| 441 |
+
)
|
| 442 |
+
|
| 443 |
+
# Metric calculations for the current step
|
| 444 |
+
no_p2p_import_this_step = np.array([
|
| 445 |
+
self.no_p2p_import_day[hid][self.current_step] for hid in self.house_ids
|
| 446 |
+
], dtype=np.float32)
|
| 447 |
+
|
| 448 |
+
# Grid Reduction metrics
|
| 449 |
+
step_grid_reduction = np.sum(no_p2p_import_this_step - grid_import)
|
| 450 |
+
self.cumulative_grid_reduction += step_grid_reduction
|
| 451 |
+
self.grid_reduction_timeseries.append(step_grid_reduction)
|
| 452 |
+
|
| 453 |
+
# Check if current grid price corresponds to peak hour
|
| 454 |
+
if grid_price >= self.max_grid_price * 0.99:
|
| 455 |
+
self.cumulative_grid_reduction_peak += step_grid_reduction
|
| 456 |
+
|
| 457 |
+
# Cost Savings
|
| 458 |
+
cost_no_p2p = no_p2p_import_this_step * grid_price
|
| 459 |
+
step_cost_savings_per_agent = cost_no_p2p - costs
|
| 460 |
+
self.agent_cost_savings += step_cost_savings_per_agent
|
| 461 |
+
self.cost_savings_timeseries.append(np.sum(step_cost_savings_per_agent))
|
| 462 |
+
|
| 463 |
+
# Battery Degradation Cost
|
| 464 |
+
step_degradation_cost = 0.0
|
| 465 |
+
for i, hid in enumerate(self.house_ids):
|
| 466 |
+
if hid in self.batteries:
|
| 467 |
+
batt = self.batteries[hid]
|
| 468 |
+
degradation_cost_agent = (charge_amount[i] + discharge_amount[i]) * batt["degradation_cost_per_kwh"]
|
| 469 |
+
step_degradation_cost += degradation_cost_agent
|
| 470 |
+
|
| 471 |
+
self.cumulative_degradation_cost += step_degradation_cost
|
| 472 |
+
self.degradation_cost_timeseries.append(step_degradation_cost)
|
| 473 |
+
|
| 474 |
+
info = {
|
| 475 |
+
"p2p_buy": actual_bought,
|
| 476 |
+
"p2p_sell": actual_sold,
|
| 477 |
+
"grid_import_with_p2p": grid_import,
|
| 478 |
+
"grid_import_no_p2p": no_p2p_import_this_step,
|
| 479 |
+
"grid_export": grid_export,
|
| 480 |
+
"costs": costs,
|
| 481 |
+
"charge_amount": charge_amount,
|
| 482 |
+
"discharge_amount": discharge_amount,
|
| 483 |
+
"step": self.current_step,
|
| 484 |
+
"step_grid_reduction": step_grid_reduction,
|
| 485 |
+
"step_cost_savings": np.sum(step_cost_savings_per_agent),
|
| 486 |
+
"step_degradation_cost": step_degradation_cost,
|
| 487 |
+
}
|
| 488 |
+
|
| 489 |
+
# Increment step & decide "done"
|
| 490 |
+
self.current_step += 1
|
| 491 |
+
done = (self.current_step >= self.num_steps)
|
| 492 |
+
|
| 493 |
+
# Return next obs, reward list, done, info
|
| 494 |
+
obs_next = self._get_obs()
|
| 495 |
+
obs_next_list = [obs_next[i] for i in range(self.num_agents)]
|
| 496 |
+
rewards_list = [final_rewards[i] for i in range(self.num_agents)]
|
| 497 |
+
|
| 498 |
+
return obs_next_list, rewards_list, done, info
|
| 499 |
+
|
| 500 |
+
|
| 501 |
+
def _get_obs(self):
|
| 502 |
+
# Build observation array for each agent, including dynamic peer pricing
|
| 503 |
+
step = min(self.current_step, self.num_steps - 1)
|
| 504 |
+
|
| 505 |
+
# Gather per-agent demand/solar into arrays
|
| 506 |
+
demands = np.array([self.demands[hid][step] for hid in self.house_ids], dtype=np.float32)
|
| 507 |
+
solars = np.array([self.solars[hid][step] for hid in self.house_ids], dtype=np.float32)
|
| 508 |
+
|
| 509 |
+
# Compute market aggregates for dynamic pricing
|
| 510 |
+
surplus = np.maximum(solars - demands, 0.0)
|
| 511 |
+
shortfall = np.maximum(demands - solars, 0.0)
|
| 512 |
+
total_surplus = float(surplus.sum())
|
| 513 |
+
total_shortfall = float(shortfall.sum())
|
| 514 |
+
|
| 515 |
+
grid_price = self.get_grid_price(step)
|
| 516 |
+
peer_price = self.get_peer_price(step, total_surplus, total_shortfall)
|
| 517 |
+
|
| 518 |
+
# Compute time-of-day feature
|
| 519 |
+
ts = self.data.index[step]
|
| 520 |
+
hour = ts.hour + ts.minute / 60.0
|
| 521 |
+
|
| 522 |
+
# Build per-agent obs
|
| 523 |
+
obs = []
|
| 524 |
+
for i, hid in enumerate(self.house_ids):
|
| 525 |
+
own_demand = demands[i]
|
| 526 |
+
own_solar = solars[i]
|
| 527 |
+
|
| 528 |
+
# Compute state-of-charge fraction (0–1), -1 for non-battery agents
|
| 529 |
+
if hid in self.batteries:
|
| 530 |
+
soc_frac = self.batteries[hid]["soc"] / self.batteries[hid]["max_capacity"]
|
| 531 |
+
else:
|
| 532 |
+
soc_frac = -1.0
|
| 533 |
+
|
| 534 |
+
obs.append([
|
| 535 |
+
own_demand,
|
| 536 |
+
own_solar,
|
| 537 |
+
soc_frac,
|
| 538 |
+
grid_price,
|
| 539 |
+
peer_price,
|
| 540 |
+
float(demands.sum() - own_demand),
|
| 541 |
+
float(solars.sum() - own_solar),
|
| 542 |
+
hour
|
| 543 |
+
])
|
| 544 |
+
|
| 545 |
+
return np.array(obs, dtype=np.float32)
|
| 546 |
+
|
| 547 |
+
|
| 548 |
+
def _compute_jains_index(self, usage_array):
|
| 549 |
+
"""Simple Jain's Fairness Index."""
|
| 550 |
+
x = np.array(usage_array, dtype=np.float32)
|
| 551 |
+
numerator = (np.sum(x))**2
|
| 552 |
+
denominator = len(x) * np.sum(x**2) + 1e-8
|
| 553 |
+
return numerator / denominator
|
| 554 |
+
|
| 555 |
+
|
| 556 |
+
def _compute_rewards(
|
| 557 |
+
self,
|
| 558 |
+
grid_import,
|
| 559 |
+
grid_export,
|
| 560 |
+
actual_sold,
|
| 561 |
+
actual_bought,
|
| 562 |
+
charge_amount,
|
| 563 |
+
discharge_amount,
|
| 564 |
+
costs,
|
| 565 |
+
grid_price,
|
| 566 |
+
peer_price
|
| 567 |
+
):
|
| 568 |
+
# Weights for each component
|
| 569 |
+
w1 = 0.3; w2 = 0.5; w3 = 0.5; w4 = 0.1; w5 = 0.05; w6 = 0.4; w7 = 1.0
|
| 570 |
+
|
| 571 |
+
# Jain's index on total P2P volume
|
| 572 |
+
jfi = self._compute_jains_index(actual_bought + actual_sold)
|
| 573 |
+
|
| 574 |
+
# Normalize prices
|
| 575 |
+
p_grid_norm = grid_price / self.max_grid_price
|
| 576 |
+
p_peer_norm = peer_price / self.max_grid_price
|
| 577 |
+
|
| 578 |
+
rewards = np.zeros(self.num_agents, dtype=np.float32)
|
| 579 |
+
for i, hid in enumerate(self.house_ids):
|
| 580 |
+
# Base reward is negative cost
|
| 581 |
+
reward = - costs[i] * w7
|
| 582 |
+
|
| 583 |
+
# Grid import penalty
|
| 584 |
+
grid_penalty = w1 * grid_import[i] * p_grid_norm
|
| 585 |
+
|
| 586 |
+
# P2P sell & buy bonuses
|
| 587 |
+
p2p_sell_bonus = w2 * actual_sold[i] * p_peer_norm
|
| 588 |
+
if peer_price < grid_price:
|
| 589 |
+
p2p_buy_bonus = w3 * actual_bought[i] * ((grid_price - peer_price) / self.max_grid_price)
|
| 590 |
+
else:
|
| 591 |
+
p2p_buy_bonus = 0.0
|
| 592 |
+
|
| 593 |
+
# Battery penalties (only solar houses have entries)
|
| 594 |
+
if hid in self.batteries:
|
| 595 |
+
batt = self.batteries[hid]
|
| 596 |
+
soc_frac = batt["soc"] / batt["max_capacity"]
|
| 597 |
+
soc_penalty = w4 * (soc_frac - 0.5) ** 2
|
| 598 |
+
degradation_penalty = w5 * (charge_amount[i] + discharge_amount[i]) * batt["degradation_cost_per_kwh"]
|
| 599 |
+
else:
|
| 600 |
+
soc_penalty = degradation_penalty = 0.0
|
| 601 |
+
|
| 602 |
+
# Fairness
|
| 603 |
+
fairness_bonus = w6 * jfi
|
| 604 |
+
|
| 605 |
+
# Combine
|
| 606 |
+
reward += (
|
| 607 |
+
- grid_penalty
|
| 608 |
+
+ p2p_sell_bonus
|
| 609 |
+
+ p2p_buy_bonus
|
| 610 |
+
- soc_penalty
|
| 611 |
+
- degradation_penalty
|
| 612 |
+
+ fairness_bonus
|
| 613 |
+
)
|
| 614 |
+
rewards[i] = reward
|
| 615 |
+
|
| 616 |
+
return rewards
|
| 617 |
+
|
| 618 |
+
|
| 619 |
+
def get_episode_metrics(self):
|
| 620 |
+
"""
|
| 621 |
+
Return performance metrics for the last completed episode.
|
| 622 |
+
Call after episode finishes (after env.reset()).
|
| 623 |
+
"""
|
| 624 |
+
return self.episode_metrics
|
| 625 |
+
|
| 626 |
+
|
| 627 |
+
def save_log(self, filename="env_log.csv"):
|
| 628 |
+
"""Save environment step log to CSV."""
|
| 629 |
+
columns = [
|
| 630 |
+
"Step", "Total_Grid_Import", "Total_Grid_Export",
|
| 631 |
+
"Total_P2P_Buy", "Total_P2P_Sell", "Total_Cost",
|
| 632 |
+
]
|
| 633 |
+
df = pd.DataFrame(self.env_log, columns=columns)
|
| 634 |
+
df.to_csv(filename, index=False)
|
| 635 |
+
print(f"Environment log saved to {filename}")
|
Other_algorithms/HC_MAPPO/Environment/cluster_env_wrapper.py
ADDED
|
@@ -0,0 +1,164 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import gym
|
| 2 |
+
import numpy as np
|
| 3 |
+
import math
|
| 4 |
+
import sys
|
| 5 |
+
import os
|
| 6 |
+
import functools
|
| 7 |
+
|
| 8 |
+
import pandas as pd
|
| 9 |
+
|
| 10 |
+
# Ensure SolarSys Environement is on the Python path
|
| 11 |
+
# Please ensure you follow proper directory structure for running this code
|
| 12 |
+
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
|
| 13 |
+
from Environment.solar_sys_environment import SolarSys
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
def form_clusters(metrics: dict, size: int) -> list:
|
| 17 |
+
"""
|
| 18 |
+
Forms balanced, heterogeneous clusters by categorizing houses based on their
|
| 19 |
+
energy profile and distributing them evenly in a round-robin fashion.
|
| 20 |
+
"""
|
| 21 |
+
house_ids = list(metrics.keys())
|
| 22 |
+
if not house_ids:
|
| 23 |
+
return []
|
| 24 |
+
all_consumption = [m['consumption'] for m in metrics.values()]
|
| 25 |
+
all_solar = [m['solar'] for m in metrics.values()]
|
| 26 |
+
|
| 27 |
+
median_consumption = np.median(all_consumption) if all_consumption else 0
|
| 28 |
+
median_solar = np.median(all_solar) if all_solar else 0
|
| 29 |
+
|
| 30 |
+
#Categorize each house based on its profile relative to the median
|
| 31 |
+
producers = [h for h in house_ids if metrics[h]['solar'] >= median_solar and metrics[h]['consumption'] < median_consumption]
|
| 32 |
+
consumers = [h for h in house_ids if metrics[h]['solar'] < median_solar and metrics[h]['consumption'] >= median_consumption]
|
| 33 |
+
prosumers = [h for h in house_ids if metrics[h]['solar'] >= median_solar and metrics[h]['consumption'] >= median_consumption]
|
| 34 |
+
neutrals = [h for h in house_ids if metrics[h]['solar'] < median_solar and metrics[h]['consumption'] < median_consumption]
|
| 35 |
+
|
| 36 |
+
# Create a master list ordered by category
|
| 37 |
+
sorted_categorized_houses = producers + consumers + prosumers + neutrals
|
| 38 |
+
|
| 39 |
+
# Add any houses that weren't categorized to ensure none are missed
|
| 40 |
+
categorized_set = set(sorted_categorized_houses)
|
| 41 |
+
uncategorized = [h for h in house_ids if h not in categorized_set]
|
| 42 |
+
final_house_list = sorted_categorized_houses + uncategorized
|
| 43 |
+
num_houses = len(house_ids)
|
| 44 |
+
num_clusters = math.ceil(num_houses / size)
|
| 45 |
+
|
| 46 |
+
clusters = [[] for _ in range(num_clusters)]
|
| 47 |
+
|
| 48 |
+
for i, house_id in enumerate(final_house_list):
|
| 49 |
+
target_cluster_idx = i % num_clusters
|
| 50 |
+
clusters[target_cluster_idx].append(house_id)
|
| 51 |
+
|
| 52 |
+
return clusters
|
| 53 |
+
|
| 54 |
+
class GlobalPriceVecEnvWrapper(gym.vector.VectorEnvWrapper):
|
| 55 |
+
def __init__(self, env, clusters: list):
|
| 56 |
+
super().__init__(env)
|
| 57 |
+
self.clusters = clusters
|
| 58 |
+
# Expose the underlying SolarSys environments for inspection by the coordinator
|
| 59 |
+
# self.env.envs gets the list of individual envs from the SyncVectorEnv
|
| 60 |
+
self.cluster_envs = self.env.envs
|
| 61 |
+
|
| 62 |
+
def step(self, actions: np.ndarray, exports: np.ndarray = None, imports: np.ndarray = None):
|
| 63 |
+
num_clusters = len(self.cluster_envs)
|
| 64 |
+
net_transfers = np.zeros(num_clusters)
|
| 65 |
+
if exports is not None and imports is not None:
|
| 66 |
+
net_transfers = imports - exports
|
| 67 |
+
batched_low_level_actions = actions
|
| 68 |
+
batched_transfers = net_transfers.reshape(-1, 1).astype(np.float32)
|
| 69 |
+
batched_prices = np.full((num_clusters, 1), -1.0, dtype=np.float32)
|
| 70 |
+
final_packed_actions_tuple = (batched_low_level_actions, batched_transfers, batched_prices)
|
| 71 |
+
obs_next, rewards, terminateds, truncateds, infos = self.env.step(final_packed_actions_tuple)
|
| 72 |
+
dones = terminateds | truncateds
|
| 73 |
+
done_all = dones.all()
|
| 74 |
+
|
| 75 |
+
|
| 76 |
+
|
| 77 |
+
if done_all:
|
| 78 |
+
final_infos = infos['final_info']
|
| 79 |
+
keys = final_infos[0].keys()
|
| 80 |
+
infos = {k: np.stack([info[k] for info in final_infos]) for k in keys}
|
| 81 |
+
|
| 82 |
+
info_agg = {
|
| 83 |
+
"cluster_dones": dones,
|
| 84 |
+
"cluster_infos": infos,
|
| 85 |
+
}
|
| 86 |
+
|
| 87 |
+
return obs_next, rewards, done_all, info_agg
|
| 88 |
+
|
| 89 |
+
def get_export_capacity(self, cluster_idx: int) -> float:
|
| 90 |
+
"""Returns the total physically exportable energy from a cluster's batteries and solar in kWh."""
|
| 91 |
+
cluster_env = self.cluster_envs[cluster_idx]
|
| 92 |
+
available_from_batt = cluster_env.battery_soc * cluster_env.battery_discharge_efficiency
|
| 93 |
+
total_exportable = np.sum(available_from_batt) + cluster_env.current_solar
|
| 94 |
+
return float(total_exportable)
|
| 95 |
+
|
| 96 |
+
def get_import_capacity(self, cluster_idx: int) -> float:
|
| 97 |
+
"""Returns the total physically importable space in a cluster's batteries in kWh."""
|
| 98 |
+
cluster_env = self.cluster_envs[cluster_idx]
|
| 99 |
+
free_space = cluster_env.battery_max_capacity - cluster_env.battery_soc
|
| 100 |
+
total_storable = np.sum(free_space)
|
| 101 |
+
return float(total_storable)
|
| 102 |
+
|
| 103 |
+
def send_energy(self, from_cluster_idx: int, amount: float) -> float:
|
| 104 |
+
"""Drains 'amount' of energy from the specified cluster (batteries first, then solar)."""
|
| 105 |
+
cluster_env = self.cluster_envs[from_cluster_idx]
|
| 106 |
+
return cluster_env.send_energy(amount)
|
| 107 |
+
|
| 108 |
+
def receive_energy(self, to_cluster_idx: int, amount: float) -> float:
|
| 109 |
+
"""Charges batteries in the specified cluster with 'amount' of energy."""
|
| 110 |
+
cluster_env = self.cluster_envs[to_cluster_idx]
|
| 111 |
+
return cluster_env.receive_energy(amount)
|
| 112 |
+
|
| 113 |
+
|
| 114 |
+
def make_vec_env(data_path: str, time_freq: str, cluster_size: int, state: str):
|
| 115 |
+
print("--- Pre-loading shared dataset for all environments ---")
|
| 116 |
+
try:
|
| 117 |
+
shared_df = pd.read_csv(data_path)
|
| 118 |
+
shared_df["local_15min"] = pd.to_datetime(shared_df["local_15min"], utc=True)
|
| 119 |
+
shared_df.set_index("local_15min", inplace=True)
|
| 120 |
+
|
| 121 |
+
# ADD THIS LINE
|
| 122 |
+
shared_df = shared_df.resample(time_freq).mean()
|
| 123 |
+
# ADD THIS LINE
|
| 124 |
+
|
| 125 |
+
except Exception as e:
|
| 126 |
+
raise ValueError(f"Failed to pre-load data in make_vec_env: {e}")
|
| 127 |
+
|
| 128 |
+
base_env_for_metrics = SolarSys(
|
| 129 |
+
data_path=data_path,
|
| 130 |
+
time_freq=time_freq,
|
| 131 |
+
preloaded_data=shared_df, # Pass the shared DataFrame here
|
| 132 |
+
state=state
|
| 133 |
+
)
|
| 134 |
+
|
| 135 |
+
# This part for calculating metrics and forming clusters
|
| 136 |
+
metrics = {}
|
| 137 |
+
for hid in base_env_for_metrics.house_ids:
|
| 138 |
+
total_consumption = float(
|
| 139 |
+
np.clip(base_env_for_metrics.original_no_p2p_import[hid], 0.0, None).sum()
|
| 140 |
+
)
|
| 141 |
+
total_solar = float(
|
| 142 |
+
base_env_for_metrics.all_data[f"total_solar_{hid}"].clip(lower=0.0).sum()
|
| 143 |
+
)
|
| 144 |
+
metrics[hid] = {'consumption': total_consumption, 'solar': total_solar}
|
| 145 |
+
|
| 146 |
+
clusters = form_clusters(metrics, cluster_size)
|
| 147 |
+
print(f"Formed {len(clusters)} clusters of size up to {cluster_size}.")
|
| 148 |
+
|
| 149 |
+
# functools.partial to create environment
|
| 150 |
+
env_fns = []
|
| 151 |
+
for cluster_house_ids in clusters:
|
| 152 |
+
preset_env_fn = functools.partial(
|
| 153 |
+
SolarSys,
|
| 154 |
+
data_path=data_path,
|
| 155 |
+
time_freq=time_freq,
|
| 156 |
+
house_ids_in_cluster=cluster_house_ids,
|
| 157 |
+
preloaded_data=shared_df,
|
| 158 |
+
state=state
|
| 159 |
+
)
|
| 160 |
+
env_fns.append(preset_env_fn)
|
| 161 |
+
sync_vec_env = gym.vector.SyncVectorEnv(env_fns)
|
| 162 |
+
wrapped_vec_env = GlobalPriceVecEnvWrapper(sync_vec_env, clusters=clusters)
|
| 163 |
+
|
| 164 |
+
return wrapped_vec_env
|
Other_algorithms/HC_MAPPO/Environment/solar_sys_environment.py
ADDED
|
@@ -0,0 +1,673 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import gym
|
| 2 |
+
import pandas as pd
|
| 3 |
+
import numpy as np
|
| 4 |
+
from collections import deque
|
| 5 |
+
import random
|
| 6 |
+
from gym.spaces import Tuple, Box
|
| 7 |
+
|
| 8 |
+
random.seed(42)
|
| 9 |
+
np.random.seed(42)
|
| 10 |
+
|
| 11 |
+
class SolarSys(gym.Env):
|
| 12 |
+
|
| 13 |
+
def __init__(
|
| 14 |
+
self,
|
| 15 |
+
data_path="DATA/training/25houses_152days_TRAIN.csv",
|
| 16 |
+
state="", # Select from 'oklahoma', 'colorado', 'pennsylvania'
|
| 17 |
+
time_freq="15T",
|
| 18 |
+
house_ids_in_cluster=None,
|
| 19 |
+
preloaded_data=None
|
| 20 |
+
|
| 21 |
+
):
|
| 22 |
+
|
| 23 |
+
super().__init__() # initialize parent gym.Env
|
| 24 |
+
self.state = state.lower()
|
| 25 |
+
|
| 26 |
+
# --- Centralized Pricing Configuration ---
|
| 27 |
+
self._pricing_info = {
|
| 28 |
+
"oklahoma": {
|
| 29 |
+
"max_grid_price": 0.2112,
|
| 30 |
+
"feed_in_tariff": 0.04,
|
| 31 |
+
"price_function": self._get_oklahoma_price
|
| 32 |
+
},
|
| 33 |
+
"colorado": {
|
| 34 |
+
"max_grid_price": 0.32,
|
| 35 |
+
"feed_in_tariff": 0.055,
|
| 36 |
+
"price_function": self._get_colorado_price
|
| 37 |
+
},
|
| 38 |
+
"pennsylvania": {
|
| 39 |
+
"max_grid_price": 0.5505,
|
| 40 |
+
"feed_in_tariff": 0.06,
|
| 41 |
+
"price_function": self._get_pennsylvania_price
|
| 42 |
+
}
|
| 43 |
+
}
|
| 44 |
+
|
| 45 |
+
if self.state not in self._pricing_info:
|
| 46 |
+
raise ValueError(f"State '{self.state}' is not supported. Available states: {list(self._pricing_info.keys())}")
|
| 47 |
+
|
| 48 |
+
state_config = self._pricing_info[self.state]
|
| 49 |
+
self.max_grid_price = state_config["max_grid_price"]
|
| 50 |
+
self.feed_in_tariff = state_config["feed_in_tariff"]
|
| 51 |
+
self._get_price_function = state_config["price_function"]
|
| 52 |
+
self.data_path = data_path
|
| 53 |
+
self.time_freq = time_freq
|
| 54 |
+
if preloaded_data is not None:
|
| 55 |
+
all_data = preloaded_data
|
| 56 |
+
if house_ids_in_cluster:
|
| 57 |
+
print(f"Using pre-loaded data for cluster with {len(house_ids_in_cluster)} houses.")
|
| 58 |
+
else:
|
| 59 |
+
print(f"Loading data from {data_path}...")
|
| 60 |
+
try:
|
| 61 |
+
all_data = pd.read_csv(data_path)
|
| 62 |
+
all_data["local_15min"] = pd.to_datetime(all_data["local_15min"], utc=True)
|
| 63 |
+
all_data.set_index("local_15min", inplace=True)
|
| 64 |
+
|
| 65 |
+
except FileNotFoundError:
|
| 66 |
+
raise FileNotFoundError(f"Data file {data_path} not found.")
|
| 67 |
+
except pd.errors.EmptyDataError:
|
| 68 |
+
raise ValueError(f"Data file {data_path} is empty.")
|
| 69 |
+
except Exception as e:
|
| 70 |
+
raise ValueError(f"Error loading data: {e}")
|
| 71 |
+
|
| 72 |
+
|
| 73 |
+
# Compute global maxima for normalization
|
| 74 |
+
grid_cols = [c for c in all_data.columns if c.startswith("grid_")]
|
| 75 |
+
solar_cols = [c for c in all_data.columns if c.startswith("total_solar_")]
|
| 76 |
+
all_grid = all_data[grid_cols].values
|
| 77 |
+
all_solar = all_data[solar_cols].values
|
| 78 |
+
|
| 79 |
+
# max total demand = max(grid + solar) over all time & agents
|
| 80 |
+
self.global_max_demand = float((all_grid + all_solar).max()) + 1e-8
|
| 81 |
+
|
| 82 |
+
# max solar generation alone
|
| 83 |
+
self.global_max_solar = float(all_solar.max()) + 1e-8
|
| 84 |
+
|
| 85 |
+
# Store the resampled dataset
|
| 86 |
+
self.all_data = all_data
|
| 87 |
+
all_house_ids_in_file = [
|
| 88 |
+
col.split("_")[1] for col in self.all_data.columns
|
| 89 |
+
if col.startswith("grid_")
|
| 90 |
+
]
|
| 91 |
+
if house_ids_in_cluster:
|
| 92 |
+
self.house_ids = [hid for hid in house_ids_in_cluster if hid in all_house_ids_in_file]
|
| 93 |
+
else:
|
| 94 |
+
self.house_ids = all_house_ids_in_file
|
| 95 |
+
|
| 96 |
+
if not self.house_ids:
|
| 97 |
+
raise ValueError("No valid house_ids found for this environment instance.")
|
| 98 |
+
|
| 99 |
+
self.env_log_infos = []
|
| 100 |
+
|
| 101 |
+
self.time_freq = time_freq
|
| 102 |
+
freq_offset = pd.tseries.frequencies.to_offset(time_freq)
|
| 103 |
+
minutes_per_step = freq_offset.nanos / 1e9 / 60.0
|
| 104 |
+
self.steps_per_day = int(24 * 60 // minutes_per_step)
|
| 105 |
+
|
| 106 |
+
total_rows = len(self.all_data)
|
| 107 |
+
self.total_days = total_rows // self.steps_per_day
|
| 108 |
+
if self.total_days < 1:
|
| 109 |
+
raise ValueError(
|
| 110 |
+
f"After resampling, dataset has {total_rows} rows, which is "
|
| 111 |
+
f"less than a single day of {self.steps_per_day} steps."
|
| 112 |
+
)
|
| 113 |
+
|
| 114 |
+
self.num_agents = len(self.house_ids)
|
| 115 |
+
self.original_no_p2p_import = {}
|
| 116 |
+
for hid in self.house_ids:
|
| 117 |
+
col_grid = f"grid_{hid}"
|
| 118 |
+
self.original_no_p2p_import[hid] = self.all_data[col_grid].clip(lower=0.0).values
|
| 119 |
+
solar_cols = [f"total_solar_{hid}" for hid in self.house_ids]
|
| 120 |
+
solar_sums = self.all_data[solar_cols].sum(axis=0).to_dict()
|
| 121 |
+
self.agent_groups = [
|
| 122 |
+
1 if solar_sums[f"total_solar_{hid}"] > 0 else 0
|
| 123 |
+
for hid in self.house_ids
|
| 124 |
+
]
|
| 125 |
+
|
| 126 |
+
self.group_counts = {
|
| 127 |
+
0: self.agent_groups.count(0),
|
| 128 |
+
1: self.agent_groups.count(1)
|
| 129 |
+
}
|
| 130 |
+
print(f"Number of houses in each group: {self.group_counts}")
|
| 131 |
+
|
| 132 |
+
#battery logic
|
| 133 |
+
self.battery_options = {
|
| 134 |
+
"teslapowerwall": {"max_capacity": 13.5, "charge_efficiency": 0.95, "discharge_efficiency": 0.90, "max_charge_rate": 5.0, "max_discharge_rate": 5.0, "degradation_cost_per_kwh": 0.005},
|
| 135 |
+
"enphase": {"max_capacity": 5.0, "charge_efficiency": 0.95, "discharge_efficiency": 0.90, "max_charge_rate": 2.0, "max_discharge_rate": 2.0, "degradation_cost_per_kwh": 0.005},
|
| 136 |
+
"franklin": {"max_capacity": 15.0, "charge_efficiency": 0.95, "discharge_efficiency": 0.90, "max_charge_rate": 6.0, "max_discharge_rate": 6.0, "degradation_cost_per_kwh": 0.005},
|
| 137 |
+
}
|
| 138 |
+
self.solar_houses = [
|
| 139 |
+
hid for hid in self.house_ids
|
| 140 |
+
if (self.all_data[f"total_solar_{hid}"] > 0).any()
|
| 141 |
+
]
|
| 142 |
+
|
| 143 |
+
self.batteries = {}
|
| 144 |
+
for hid in self.solar_houses:
|
| 145 |
+
choice = random.choice(list(self.battery_options))
|
| 146 |
+
specs = self.battery_options[choice]
|
| 147 |
+
self.batteries[hid] = {"soc": 0.0, **specs}
|
| 148 |
+
|
| 149 |
+
self.battery_charge_history = {hid: [] for hid in self.batteries}
|
| 150 |
+
self.battery_discharge_history = {hid: [] for hid in self.batteries}
|
| 151 |
+
self.battery_capacity = sum(b["max_capacity"] for b in self.batteries.values())
|
| 152 |
+
self.battery_level = sum(b["soc"] for b in self.batteries.values())
|
| 153 |
+
self.current_solar = 0.0
|
| 154 |
+
self.has_battery = np.array([1 if hid in self.batteries else 0 for hid in self.house_ids], dtype=np.float32)
|
| 155 |
+
|
| 156 |
+
# Initialize arrays for all agents, with zeros for non-battery agents
|
| 157 |
+
self.battery_soc = np.zeros(self.num_agents, dtype=np.float32)
|
| 158 |
+
self.battery_max_capacity = np.zeros(self.num_agents, dtype=np.float32)
|
| 159 |
+
self.battery_charge_efficiency = np.zeros(self.num_agents, dtype=np.float32)
|
| 160 |
+
self.battery_discharge_efficiency = np.zeros(self.num_agents, dtype=np.float32)
|
| 161 |
+
self.battery_max_charge_rate = np.zeros(self.num_agents, dtype=np.float32)
|
| 162 |
+
self.battery_max_discharge_rate = np.zeros(self.num_agents, dtype=np.float32)
|
| 163 |
+
self.battery_degradation_cost = np.zeros(self.num_agents, dtype=np.float32)
|
| 164 |
+
|
| 165 |
+
# Populate the arrays using the created battery dictionary
|
| 166 |
+
for i, hid in enumerate(self.house_ids):
|
| 167 |
+
if hid in self.batteries:
|
| 168 |
+
batt = self.batteries[hid]
|
| 169 |
+
self.battery_max_capacity[i] = batt["max_capacity"]
|
| 170 |
+
self.battery_charge_efficiency[i] = batt["charge_efficiency"]
|
| 171 |
+
self.battery_discharge_efficiency[i] = batt["discharge_efficiency"]
|
| 172 |
+
self.battery_max_charge_rate[i] = batt["max_charge_rate"]
|
| 173 |
+
self.battery_max_discharge_rate[i] = batt["max_discharge_rate"]
|
| 174 |
+
self.battery_degradation_cost[i] = batt["degradation_cost_per_kwh"]
|
| 175 |
+
|
| 176 |
+
|
| 177 |
+
# ========== SPACES (Observation & Action) ===================================
|
| 178 |
+
self.observation_space = gym.spaces.Box(
|
| 179 |
+
low=-np.inf, high=np.inf,
|
| 180 |
+
shape=(self.num_agents, 8),
|
| 181 |
+
dtype=np.float32
|
| 182 |
+
)
|
| 183 |
+
self.action_space = Tuple((
|
| 184 |
+
Box(low=0.0, high=1.0, shape=(self.num_agents, 6), dtype=np.float32),
|
| 185 |
+
Box(low=-np.inf, high=np.inf, shape=(1,), dtype=np.float32),
|
| 186 |
+
Box(low=-1.0, high=np.inf, shape=(1,), dtype=np.float32)
|
| 187 |
+
))
|
| 188 |
+
|
| 189 |
+
# ========== REWARD FUNCTION PARAMETERS ======================================
|
| 190 |
+
self.data = None
|
| 191 |
+
self.env_log = []
|
| 192 |
+
self.day_index = -1
|
| 193 |
+
self.current_step = 0
|
| 194 |
+
self.num_steps = self.steps_per_day
|
| 195 |
+
self.demands = {}
|
| 196 |
+
self.solars = {}
|
| 197 |
+
self.previous_actions = {
|
| 198 |
+
hid: np.zeros(6) for hid in self.house_ids
|
| 199 |
+
}
|
| 200 |
+
self._initialize_episode_metrics()
|
| 201 |
+
|
| 202 |
+
def get_grid_price(self, step_idx):
|
| 203 |
+
"""
|
| 204 |
+
Returns the grid price for the current step based on the selected state.
|
| 205 |
+
"""
|
| 206 |
+
return self._get_price_function(step_idx)
|
| 207 |
+
|
| 208 |
+
def _get_oklahoma_price(self, step_idx):
|
| 209 |
+
minutes_per_step = 24 * 60 / self.steps_per_day
|
| 210 |
+
hour = int((step_idx * minutes_per_step) // 60) % 24
|
| 211 |
+
if 14 <= hour < 19:
|
| 212 |
+
return 0.2112
|
| 213 |
+
else:
|
| 214 |
+
return 0.0434
|
| 215 |
+
|
| 216 |
+
def _get_colorado_price(self, step_idx):
|
| 217 |
+
minutes_per_step = 24 * 60 / self.steps_per_day
|
| 218 |
+
hour = int((step_idx * minutes_per_step) // 60) % 24
|
| 219 |
+
if 15 <= hour < 19:
|
| 220 |
+
return 0.32
|
| 221 |
+
elif 13 <= hour < 15:
|
| 222 |
+
return 0.22
|
| 223 |
+
else:
|
| 224 |
+
return 0.12
|
| 225 |
+
|
| 226 |
+
def _get_pennsylvania_price(self, step_idx):
|
| 227 |
+
minutes_per_step = 24 * 60 / self.steps_per_day
|
| 228 |
+
hour = int((step_idx * minutes_per_step) // 60) % 24
|
| 229 |
+
if 13 <= hour < 21:
|
| 230 |
+
return 0.125048
|
| 231 |
+
elif hour >= 23 or hour < 6:
|
| 232 |
+
return 0.057014
|
| 233 |
+
else:
|
| 234 |
+
return 0.079085
|
| 235 |
+
|
| 236 |
+
def get_peer_price(self, step_idx, total_surplus, total_shortfall):
|
| 237 |
+
grid_price = self.get_grid_price(step_idx)
|
| 238 |
+
feed_in_tariff = self.feed_in_tariff
|
| 239 |
+
|
| 240 |
+
# Parameters for arctangent-log pricing
|
| 241 |
+
p_balance = (grid_price * 0.80) + (feed_in_tariff * 0.20)
|
| 242 |
+
p_con = (grid_price - feed_in_tariff) / (1.5 * np.pi)
|
| 243 |
+
k = 1.5
|
| 244 |
+
epsilon = 1e-6
|
| 245 |
+
supply = total_surplus + epsilon
|
| 246 |
+
demand = total_shortfall + epsilon
|
| 247 |
+
|
| 248 |
+
ratio = demand / supply
|
| 249 |
+
log_ratio = np.log(ratio)
|
| 250 |
+
if log_ratio < 0:
|
| 251 |
+
power_term = - (np.abs(log_ratio) ** k)
|
| 252 |
+
else:
|
| 253 |
+
power_term = log_ratio ** k
|
| 254 |
+
|
| 255 |
+
price_offset = 2 * np.pi * p_con * np.arctan(power_term)
|
| 256 |
+
|
| 257 |
+
peer_price = p_balance + price_offset
|
| 258 |
+
|
| 259 |
+
final_price = float(np.clip(peer_price, feed_in_tariff, grid_price))
|
| 260 |
+
|
| 261 |
+
return final_price
|
| 262 |
+
|
| 263 |
+
|
| 264 |
+
def _initialize_episode_metrics(self):
|
| 265 |
+
"""Initializes or resets all metrics tracked over a single episode (day)."""
|
| 266 |
+
self.cumulative_grid_reduction = 0.0
|
| 267 |
+
self.cumulative_grid_reduction_peak = 0.0
|
| 268 |
+
self.cumulative_degradation_cost = 0.0
|
| 269 |
+
self.agent_cost_savings = np.zeros(self.num_agents)
|
| 270 |
+
self.degradation_cost_timeseries = []
|
| 271 |
+
self.cost_savings_timeseries = []
|
| 272 |
+
self.grid_reduction_timeseries = []
|
| 273 |
+
|
| 274 |
+
def get_episode_metrics(self):
|
| 275 |
+
"""
|
| 276 |
+
Returns a dictionary of performance metrics for the last completed episode.
|
| 277 |
+
"""
|
| 278 |
+
return self.episode_metrics
|
| 279 |
+
|
| 280 |
+
##########################################################################
|
| 281 |
+
# Gym Required Methods
|
| 282 |
+
|
| 283 |
+
def reset(self):
|
| 284 |
+
if self.current_step > 0:
|
| 285 |
+
positive_savings = self.agent_cost_savings[self.agent_cost_savings > 0]
|
| 286 |
+
if len(positive_savings) > 1:
|
| 287 |
+
fairness_on_savings = self._compute_jains_index(positive_savings)
|
| 288 |
+
else:
|
| 289 |
+
fairness_on_savings = 0.0
|
| 290 |
+
|
| 291 |
+
self.episode_metrics = {
|
| 292 |
+
"grid_reduction_entire_day": self.cumulative_grid_reduction,
|
| 293 |
+
"grid_reduction_peak_hours": self.cumulative_grid_reduction_peak,
|
| 294 |
+
"total_cost_savings": np.sum(self.agent_cost_savings),
|
| 295 |
+
"fairness_on_cost_savings": fairness_on_savings,
|
| 296 |
+
"battery_degradation_cost_total": self.cumulative_degradation_cost,
|
| 297 |
+
"degradation_cost_over_time": self.degradation_cost_timeseries,
|
| 298 |
+
"cost_savings_over_time": self.cost_savings_timeseries,
|
| 299 |
+
"grid_reduction_over_time": self.grid_reduction_timeseries,
|
| 300 |
+
}
|
| 301 |
+
self.day_index = np.random.randint(0, self.total_days)
|
| 302 |
+
|
| 303 |
+
start_row = self.day_index * self.steps_per_day
|
| 304 |
+
end_row = start_row + self.steps_per_day
|
| 305 |
+
day_data = self.all_data.iloc[start_row:end_row].copy()
|
| 306 |
+
self.data = day_data
|
| 307 |
+
|
| 308 |
+
self.no_p2p_import_day = {}
|
| 309 |
+
for hid in self.house_ids:
|
| 310 |
+
self.no_p2p_import_day[hid] = self.original_no_p2p_import[hid][start_row:end_row]
|
| 311 |
+
|
| 312 |
+
demand_list = []
|
| 313 |
+
solar_list = []
|
| 314 |
+
for hid in self.house_ids:
|
| 315 |
+
col_grid = f"grid_{hid}"
|
| 316 |
+
col_solar = f"total_solar_{hid}"
|
| 317 |
+
|
| 318 |
+
grid_series = day_data[col_grid].fillna(0.0)
|
| 319 |
+
solar_series = day_data[col_solar].fillna(0.0).clip(lower=0.0)
|
| 320 |
+
|
| 321 |
+
demand_array = grid_series.values + solar_series.values
|
| 322 |
+
demand_array = np.clip(demand_array, 0.0, None)
|
| 323 |
+
|
| 324 |
+
demand_list.append(demand_array)
|
| 325 |
+
solar_list.append(solar_series.values)
|
| 326 |
+
|
| 327 |
+
self.demands_day = np.stack(demand_list, axis=1).astype(np.float32)
|
| 328 |
+
self.solars_day = np.stack(solar_list, axis=1).astype(np.float32)
|
| 329 |
+
|
| 330 |
+
self.hours_day = (self.data.index.hour + self.data.index.minute / 60.0).values
|
| 331 |
+
|
| 332 |
+
self.current_step = 0
|
| 333 |
+
self.env_log = []
|
| 334 |
+
for hid in self.house_ids:
|
| 335 |
+
self.previous_actions[hid] = np.zeros(6)
|
| 336 |
+
|
| 337 |
+
lows = 0.30 * self.battery_max_capacity
|
| 338 |
+
highs = 0.70 * self.battery_max_capacity
|
| 339 |
+
|
| 340 |
+
self.battery_soc = np.random.uniform(low=lows, high=highs)
|
| 341 |
+
self.battery_soc *= self.has_battery
|
| 342 |
+
|
| 343 |
+
initial_demands = self.demands_day[0]
|
| 344 |
+
initial_solars = self.solars_day[0]
|
| 345 |
+
initial_surplus = np.maximum(initial_solars - initial_demands, 0.0).sum()
|
| 346 |
+
initial_shortfall = np.maximum(initial_demands - initial_solars, 0.0).sum()
|
| 347 |
+
initial_peer_price = self.get_peer_price(0, initial_surplus, initial_shortfall)
|
| 348 |
+
|
| 349 |
+
obs = self._get_obs(peer_price=initial_peer_price)
|
| 350 |
+
|
| 351 |
+
self._initialize_episode_metrics()
|
| 352 |
+
|
| 353 |
+
return obs, {}
|
| 354 |
+
|
| 355 |
+
def step(self, packed_action):
|
| 356 |
+
actions, transfer_kwh_arr, peer_price_arr = packed_action
|
| 357 |
+
inter_cluster_transfer_kwh = float(transfer_kwh_arr[0])
|
| 358 |
+
override_peer_price_val = float(peer_price_arr[0])
|
| 359 |
+
|
| 360 |
+
override_peer_price = override_peer_price_val if override_peer_price_val >= 0 else None
|
| 361 |
+
|
| 362 |
+
actions = np.array(actions, dtype=np.float32)
|
| 363 |
+
if actions.shape != (self.num_agents, 6):
|
| 364 |
+
raise ValueError(f"Actions shape mismatch: got {actions.shape}, expected {(self.num_agents, 6)}")
|
| 365 |
+
actions = np.clip(actions, 0.0, 1.0)
|
| 366 |
+
|
| 367 |
+
a_sellGrid = actions[:, 0]
|
| 368 |
+
a_buyGrid = actions[:, 1]
|
| 369 |
+
a_sellPeers = actions[:, 2]
|
| 370 |
+
a_buyPeers = actions[:, 3]
|
| 371 |
+
a_chargeBatt = actions[:, 4]
|
| 372 |
+
a_dischargeBatt = actions[:, 5]
|
| 373 |
+
|
| 374 |
+
|
| 375 |
+
demands = self.demands_day[self.current_step]
|
| 376 |
+
solars = self.solars_day[self.current_step]
|
| 377 |
+
|
| 378 |
+
total_surplus = np.maximum(solars - demands, 0.0).sum()
|
| 379 |
+
total_shortfall = np.maximum(demands - solars, 0.0).sum()
|
| 380 |
+
self.current_solar = total_surplus
|
| 381 |
+
|
| 382 |
+
if override_peer_price is not None:
|
| 383 |
+
peer_price = override_peer_price
|
| 384 |
+
else:
|
| 385 |
+
peer_price = self.get_peer_price(
|
| 386 |
+
self.current_step,
|
| 387 |
+
total_surplus,
|
| 388 |
+
total_shortfall
|
| 389 |
+
)
|
| 390 |
+
|
| 391 |
+
grid_price = self.get_grid_price(self.current_step)
|
| 392 |
+
|
| 393 |
+
shortfall = np.maximum(demands - solars, 0.0)
|
| 394 |
+
surplus = np.maximum(solars - demands, 0.0)
|
| 395 |
+
|
| 396 |
+
final_shortfall = shortfall.copy()
|
| 397 |
+
final_surplus = surplus.copy()
|
| 398 |
+
grid_import = np.zeros(self.num_agents, dtype=np.float32)
|
| 399 |
+
grid_export = np.zeros(self.num_agents, dtype=np.float32)
|
| 400 |
+
|
| 401 |
+
# ### VECTORIZED BATTERY DISCHARGE ###
|
| 402 |
+
available_from_batt = self.battery_soc * self.battery_discharge_efficiency
|
| 403 |
+
desired_discharge = a_dischargeBatt * self.battery_max_discharge_rate
|
| 404 |
+
discharge_amount = np.minimum.reduce([desired_discharge, available_from_batt, final_shortfall])
|
| 405 |
+
discharge_amount *= self.has_battery # Ensure only batteries discharge
|
| 406 |
+
|
| 407 |
+
# Update SOC (energy drawn from battery before efficiency loss)
|
| 408 |
+
self.battery_soc -= (discharge_amount / (self.battery_discharge_efficiency + 1e-9)) * self.has_battery
|
| 409 |
+
self.battery_soc = np.maximum(0.0, self.battery_soc)
|
| 410 |
+
final_shortfall -= discharge_amount
|
| 411 |
+
|
| 412 |
+
cap_left = self.battery_max_capacity - self.battery_soc
|
| 413 |
+
desired_charge = a_chargeBatt * self.battery_max_charge_rate
|
| 414 |
+
charge_amount = np.minimum.reduce([
|
| 415 |
+
desired_charge,
|
| 416 |
+
cap_left / (self.battery_charge_efficiency + 1e-9),
|
| 417 |
+
final_surplus
|
| 418 |
+
])
|
| 419 |
+
charge_amount *= self.has_battery
|
| 420 |
+
|
| 421 |
+
# Update SOC
|
| 422 |
+
self.battery_soc += charge_amount * self.battery_charge_efficiency
|
| 423 |
+
final_surplus -= charge_amount
|
| 424 |
+
|
| 425 |
+
|
| 426 |
+
|
| 427 |
+
# ### VECTORIZED P2P TRADING ###
|
| 428 |
+
battery_offer = (self.battery_soc * self.battery_discharge_efficiency) * self.has_battery
|
| 429 |
+
effective_surplus = final_surplus + battery_offer
|
| 430 |
+
|
| 431 |
+
netPeer = a_buyPeers - a_sellPeers
|
| 432 |
+
p2p_buy_request = np.maximum(0, netPeer) * final_shortfall
|
| 433 |
+
p2p_sell_offer = np.maximum(0, -netPeer) * effective_surplus
|
| 434 |
+
|
| 435 |
+
total_sell = np.sum(p2p_sell_offer)
|
| 436 |
+
total_buy = np.sum(p2p_buy_request)
|
| 437 |
+
matched = min(total_sell, total_buy)
|
| 438 |
+
|
| 439 |
+
if matched > 1e-9:
|
| 440 |
+
sell_fraction = p2p_sell_offer / (total_sell + 1e-12)
|
| 441 |
+
buy_fraction = p2p_buy_request / ( total_buy + 1e-12)
|
| 442 |
+
actual_sold = matched * sell_fraction
|
| 443 |
+
actual_bought = matched * buy_fraction
|
| 444 |
+
else:
|
| 445 |
+
actual_sold = np.zeros(self.num_agents, dtype=np.float32)
|
| 446 |
+
actual_bought = np.zeros(self.num_agents, dtype=np.float32)
|
| 447 |
+
|
| 448 |
+
|
| 449 |
+
from_batt = np.minimum(actual_sold, battery_offer)
|
| 450 |
+
from_solar = actual_sold - from_batt
|
| 451 |
+
|
| 452 |
+
final_surplus -= from_solar
|
| 453 |
+
|
| 454 |
+
final_shortfall -= actual_bought
|
| 455 |
+
soc_reduction = (from_batt / (self.battery_discharge_efficiency + 1e-9)) * self.has_battery
|
| 456 |
+
self.battery_soc -= soc_reduction
|
| 457 |
+
self.battery_soc = np.maximum(0.0, self.battery_soc)
|
| 458 |
+
|
| 459 |
+
|
| 460 |
+
if inter_cluster_transfer_kwh > 0:
|
| 461 |
+
amount_received = inter_cluster_transfer_kwh
|
| 462 |
+
|
| 463 |
+
|
| 464 |
+
total_shortfall_in_cluster = np.sum(final_shortfall)
|
| 465 |
+
if total_shortfall_in_cluster > 1e-6:
|
| 466 |
+
|
| 467 |
+
to_cover_shortfall = min(amount_received, total_shortfall_in_cluster)
|
| 468 |
+
distribution_ratio = final_shortfall / total_shortfall_in_cluster
|
| 469 |
+
shortfall_reduction = distribution_ratio * to_cover_shortfall
|
| 470 |
+
final_shortfall -= shortfall_reduction
|
| 471 |
+
|
| 472 |
+
amount_received -= to_cover_shortfall
|
| 473 |
+
|
| 474 |
+
if amount_received > 1e-6:
|
| 475 |
+
|
| 476 |
+
cap_left = self.battery_max_capacity - self.battery_soc
|
| 477 |
+
storable_energy = cap_left / (self.battery_charge_efficiency + 1e-9)
|
| 478 |
+
total_storable_in_cluster = np.sum(storable_energy * self.has_battery)
|
| 479 |
+
|
| 480 |
+
if total_storable_in_cluster > 1e-6:
|
| 481 |
+
|
| 482 |
+
to_store = min(amount_received, total_storable_in_cluster)
|
| 483 |
+
|
| 484 |
+
|
| 485 |
+
storage_ratio = storable_energy / total_storable_in_cluster
|
| 486 |
+
energy_to_store_per_batt = storage_ratio * to_store
|
| 487 |
+
|
| 488 |
+
|
| 489 |
+
self.battery_soc += (energy_to_store_per_batt * self.battery_charge_efficiency) * self.has_battery
|
| 490 |
+
|
| 491 |
+
elif inter_cluster_transfer_kwh < 0:
|
| 492 |
+
amount_to_send = abs(inter_cluster_transfer_kwh)
|
| 493 |
+
|
| 494 |
+
|
| 495 |
+
total_surplus_in_cluster = np.sum(final_surplus)
|
| 496 |
+
if total_surplus_in_cluster > 1e-6:
|
| 497 |
+
|
| 498 |
+
sent_from_surplus = min(amount_to_send, total_surplus_in_cluster)
|
| 499 |
+
draw_ratio = final_surplus / total_surplus_in_cluster
|
| 500 |
+
surplus_reduction = draw_ratio * sent_from_surplus
|
| 501 |
+
final_surplus -= surplus_reduction
|
| 502 |
+
amount_to_send -= sent_from_surplus
|
| 503 |
+
|
| 504 |
+
|
| 505 |
+
if amount_to_send > 1e-6:
|
| 506 |
+
|
| 507 |
+
available_from_batt = (self.battery_soc * self.battery_discharge_efficiency) * self.has_battery
|
| 508 |
+
total_available_from_batt = np.sum(available_from_batt)
|
| 509 |
+
|
| 510 |
+
if total_available_from_batt > 1e-6:
|
| 511 |
+
# Discharge a maximum of 'amount_to_send' from batteries
|
| 512 |
+
to_discharge = min(amount_to_send, total_available_from_batt)
|
| 513 |
+
|
| 514 |
+
# Draw this amount proportionally from each available battery
|
| 515 |
+
discharge_ratio = available_from_batt / total_available_from_batt
|
| 516 |
+
discharged_per_batt = discharge_ratio * to_discharge # This is effective energy
|
| 517 |
+
|
| 518 |
+
# Update SoC (energy drawn from battery before efficiency loss)
|
| 519 |
+
soc_reduction = (discharged_per_batt / (self.battery_discharge_efficiency + 1e-9))
|
| 520 |
+
self.battery_soc -= soc_reduction * self.has_battery
|
| 521 |
+
self.battery_soc = np.maximum(0.0, self.battery_soc)
|
| 522 |
+
# =======================================================================
|
| 523 |
+
|
| 524 |
+
netGrid = a_buyGrid - a_sellGrid
|
| 525 |
+
grid_import = np.maximum(0, netGrid) * final_shortfall
|
| 526 |
+
grid_export = np.maximum(0, -netGrid) * final_surplus
|
| 527 |
+
|
| 528 |
+
forced = np.maximum(final_shortfall - grid_import, 0.0)
|
| 529 |
+
grid_import += forced
|
| 530 |
+
final_shortfall -= forced
|
| 531 |
+
|
| 532 |
+
feed_in_tariff = self.feed_in_tariff
|
| 533 |
+
costs = (
|
| 534 |
+
(grid_import * grid_price)
|
| 535 |
+
- (grid_export * feed_in_tariff)
|
| 536 |
+
+ (actual_bought * peer_price)
|
| 537 |
+
- (actual_sold * peer_price)
|
| 538 |
+
)
|
| 539 |
+
|
| 540 |
+
final_rewards = self._compute_rewards(
|
| 541 |
+
grid_import=grid_import, grid_export=grid_export, actual_sold=actual_sold,
|
| 542 |
+
actual_bought=actual_bought, charge_amount=charge_amount, discharge_amount=discharge_amount,
|
| 543 |
+
costs=costs, grid_price=grid_price, peer_price=peer_price
|
| 544 |
+
)
|
| 545 |
+
|
| 546 |
+
no_p2p_import_this_step = np.array([
|
| 547 |
+
self.no_p2p_import_day[hid][self.current_step]
|
| 548 |
+
for hid in self.house_ids
|
| 549 |
+
], dtype=np.float32)
|
| 550 |
+
|
| 551 |
+
|
| 552 |
+
# --- Metric 1 & 2: Grid Reduction (Entire Day & Peak Hours) ---
|
| 553 |
+
step_grid_reduction = np.sum(no_p2p_import_this_step - grid_import)
|
| 554 |
+
self.cumulative_grid_reduction += step_grid_reduction
|
| 555 |
+
self.grid_reduction_timeseries.append(step_grid_reduction)
|
| 556 |
+
|
| 557 |
+
if grid_price >= self.max_grid_price * 0.99:
|
| 558 |
+
self.cumulative_grid_reduction_peak += step_grid_reduction
|
| 559 |
+
|
| 560 |
+
# --- Metric 3: Total Cost Savings ---
|
| 561 |
+
cost_no_p2p = no_p2p_import_this_step * grid_price
|
| 562 |
+
step_cost_savings_per_agent = cost_no_p2p - costs
|
| 563 |
+
self.agent_cost_savings += step_cost_savings_per_agent
|
| 564 |
+
self.cost_savings_timeseries.append(np.sum(step_cost_savings_per_agent))
|
| 565 |
+
|
| 566 |
+
# --- Metric 5 & 6: Battery Degradation Cost (Total and Over Time) ---
|
| 567 |
+
degradation_cost_agent = (charge_amount + discharge_amount) * self.battery_degradation_cost
|
| 568 |
+
step_degradation_cost = np.sum(degradation_cost_agent)
|
| 569 |
+
|
| 570 |
+
self.cumulative_degradation_cost += step_degradation_cost
|
| 571 |
+
self.degradation_cost_timeseries.append(step_degradation_cost)
|
| 572 |
+
|
| 573 |
+
info = {
|
| 574 |
+
"p2p_buy": actual_bought,
|
| 575 |
+
"p2p_sell": actual_sold,
|
| 576 |
+
"grid_import_with_p2p": grid_import,
|
| 577 |
+
"grid_import_no_p2p": no_p2p_import_this_step,
|
| 578 |
+
"grid_export": grid_export,
|
| 579 |
+
"costs": costs,
|
| 580 |
+
"charge_amount": charge_amount,
|
| 581 |
+
"discharge_amount": discharge_amount,
|
| 582 |
+
"step": self.current_step,
|
| 583 |
+
"step_grid_reduction": step_grid_reduction,
|
| 584 |
+
"step_cost_savings": np.sum(step_cost_savings_per_agent),
|
| 585 |
+
"step_degradation_cost": step_degradation_cost,
|
| 586 |
+
}
|
| 587 |
+
|
| 588 |
+
self.env_log.append([
|
| 589 |
+
self.current_step, np.sum(grid_import), np.sum(grid_export),
|
| 590 |
+
np.sum(actual_bought), np.sum(actual_sold), np.sum(costs)
|
| 591 |
+
])
|
| 592 |
+
|
| 593 |
+
self.current_step += 1
|
| 594 |
+
|
| 595 |
+
terminated = False
|
| 596 |
+
truncated = (self.current_step >= self.num_steps)
|
| 597 |
+
|
| 598 |
+
obs_next = self._get_obs(peer_price=peer_price)
|
| 599 |
+
info['agent_rewards'] = final_rewards
|
| 600 |
+
self.last_info = info
|
| 601 |
+
self.env_log_infos.append(info)
|
| 602 |
+
return obs_next, final_rewards.sum(), terminated, truncated, info
|
| 603 |
+
|
| 604 |
+
|
| 605 |
+
|
| 606 |
+
def _get_obs(self, peer_price: float):
|
| 607 |
+
step = min(self.current_step, self.num_steps - 1)
|
| 608 |
+
demands = self.demands_day[step]
|
| 609 |
+
solars = self.solars_day[step]
|
| 610 |
+
grid_price = self.get_grid_price(step)
|
| 611 |
+
hour = self.hours_day[step]
|
| 612 |
+
soc_frac = self.battery_soc / (self.battery_max_capacity + 1e-9)
|
| 613 |
+
soc_frac = np.where(self.has_battery == 1, soc_frac, -1.0)
|
| 614 |
+
total_demand_others = demands.sum() - demands
|
| 615 |
+
total_solar_others = solars.sum() - solars
|
| 616 |
+
|
| 617 |
+
obs = np.stack([
|
| 618 |
+
demands,
|
| 619 |
+
solars,
|
| 620 |
+
soc_frac,
|
| 621 |
+
np.full(self.num_agents, grid_price),
|
| 622 |
+
np.full(self.num_agents, peer_price),
|
| 623 |
+
total_demand_others,
|
| 624 |
+
total_solar_others,
|
| 625 |
+
np.full(self.num_agents, hour)
|
| 626 |
+
], axis=1).astype(np.float32)
|
| 627 |
+
|
| 628 |
+
return obs
|
| 629 |
+
|
| 630 |
+
|
| 631 |
+
def _compute_jains_index(self, usage_array):
|
| 632 |
+
x = np.array(usage_array, dtype=np.float32)
|
| 633 |
+
numerator = (np.sum(x))**2
|
| 634 |
+
denominator = len(x) * np.sum(x**2) + 1e-8
|
| 635 |
+
return numerator / denominator
|
| 636 |
+
|
| 637 |
+
|
| 638 |
+
def _compute_rewards(
|
| 639 |
+
self, grid_import, grid_export, actual_sold, actual_bought,
|
| 640 |
+
charge_amount, discharge_amount, costs, grid_price, peer_price
|
| 641 |
+
):
|
| 642 |
+
|
| 643 |
+
w1 = 0.3; w2 = 0.5; w3 = 0.5; w4 = 0.1; w5 = 0.05; w6 = 0.4; w7 = 1.0
|
| 644 |
+
|
| 645 |
+
p_grid_norm = grid_price / self.max_grid_price
|
| 646 |
+
p_peer_norm = peer_price / self.max_grid_price
|
| 647 |
+
|
| 648 |
+
rewards = -costs * w7
|
| 649 |
+
rewards -= w1 * grid_import * p_grid_norm
|
| 650 |
+
rewards += w2 * actual_sold * p_peer_norm
|
| 651 |
+
buy_bonus = w3 * actual_bought * ((grid_price - peer_price) / self.max_grid_price)
|
| 652 |
+
rewards += np.where(peer_price < grid_price, buy_bonus, 0.0)
|
| 653 |
+
|
| 654 |
+
# ### VECTORIZED REWARD PENALTIES ###
|
| 655 |
+
soc_frac = self.battery_soc / (self.battery_max_capacity + 1e-9)
|
| 656 |
+
soc_penalties = w4 * ((soc_frac - 0.5) ** 2) * self.has_battery
|
| 657 |
+
degrad_penalties = w5 * (charge_amount + discharge_amount) * self.battery_degradation_cost
|
| 658 |
+
|
| 659 |
+
rewards -= soc_penalties
|
| 660 |
+
rewards -= degrad_penalties
|
| 661 |
+
|
| 662 |
+
jfi = self._compute_jains_index(actual_bought + actual_sold)
|
| 663 |
+
rewards += w6 * jfi
|
| 664 |
+
return rewards
|
| 665 |
+
|
| 666 |
+
def save_log(self, filename="env_log.csv"):
|
| 667 |
+
columns = [
|
| 668 |
+
"Step", "Total_Grid_Import", "Total_Grid_Export",
|
| 669 |
+
"Total_P2P_Buy", "Total_P2P_Sell", "Total_Cost",
|
| 670 |
+
]
|
| 671 |
+
df = pd.DataFrame(self.env_log, columns=columns)
|
| 672 |
+
df.to_csv(filename, index=False)
|
| 673 |
+
print(f"Environment log saved to {filename}")
|
Other_algorithms/HC_MAPPO/HC_MAPPO_evaluation.py
ADDED
|
@@ -0,0 +1,618 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import sys
|
| 3 |
+
import time
|
| 4 |
+
from datetime import datetime
|
| 5 |
+
import re
|
| 6 |
+
import numpy as np
|
| 7 |
+
import torch
|
| 8 |
+
import pandas as pd
|
| 9 |
+
import matplotlib.pyplot as plt
|
| 10 |
+
import glob
|
| 11 |
+
|
| 12 |
+
# Allow imports from project root
|
| 13 |
+
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
|
| 14 |
+
|
| 15 |
+
from cluster import InterClusterCoordinator, InterClusterLedger
|
| 16 |
+
from Environment.cluster_env_wrapper import make_vec_env
|
| 17 |
+
from mappo.trainer.mappo import MAPPO
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
def compute_jains_fairness(values: np.ndarray) -> float:
|
| 21 |
+
"""
|
| 22 |
+
Compute Jain's fairness index for a given array of values.
|
| 23 |
+
Returns a value between 0 and 1, where 1 indicates perfect fairness.
|
| 24 |
+
"""
|
| 25 |
+
if len(values) == 0:
|
| 26 |
+
return 0.0
|
| 27 |
+
if np.all(values == 0):
|
| 28 |
+
return 1.0
|
| 29 |
+
num = (values.sum())**2
|
| 30 |
+
den = len(values) * (values**2).sum() + 1e-8
|
| 31 |
+
return float(num / den)
|
| 32 |
+
|
| 33 |
+
|
| 34 |
+
def main():
|
| 35 |
+
# Configuration Parameters
|
| 36 |
+
DATA_PATH = "data/testing/500houses_30days_TEST.csv"
|
| 37 |
+
MODEL_DIR = "models/hierarchical_oklahoma_500agents_10size_10000eps_latest/models"
|
| 38 |
+
|
| 39 |
+
# Auto-detect state from model path
|
| 40 |
+
state_match = re.search(r"hierarchical_(oklahoma|colorado|pennsylvania)_", MODEL_DIR)
|
| 41 |
+
if not state_match:
|
| 42 |
+
# Fallback to searching the parent directory name if the first pattern fails
|
| 43 |
+
state_match = re.search(r"mappo_(oklahoma|colorado|pennsylvania)_", MODEL_DIR)
|
| 44 |
+
|
| 45 |
+
if not state_match:
|
| 46 |
+
raise ValueError(
|
| 47 |
+
"Could not automatically detect the state (oklahoma, colorado, or pennsylvania) "
|
| 48 |
+
"from the model directory path. Please ensure the path contains the state name."
|
| 49 |
+
)
|
| 50 |
+
detected_state = state_match.group(1)
|
| 51 |
+
print(f"--- Detected state: {detected_state.upper()} ---")
|
| 52 |
+
|
| 53 |
+
# Auto-detect cluster size from model path
|
| 54 |
+
cluster_size_match = re.search(r'(\d+)size_', MODEL_DIR)
|
| 55 |
+
if not cluster_size_match:
|
| 56 |
+
raise ValueError(
|
| 57 |
+
"Could not automatically detect the cluster size from the model directory path. "
|
| 58 |
+
"Please ensure the path contains a pattern like '5size_' or '10size_'."
|
| 59 |
+
)
|
| 60 |
+
detected_cluster_size = int(cluster_size_match.group(1))
|
| 61 |
+
print(f"--- Detected cluster size: {detected_cluster_size} ---")
|
| 62 |
+
|
| 63 |
+
DAYS_TO_EVALUATE = 30
|
| 64 |
+
SOLAR_THRESHOLD = 0.1
|
| 65 |
+
MAX_TRANSFER_KWH = 1000000.0
|
| 66 |
+
|
| 67 |
+
W_COST_SAVINGS = 1.0
|
| 68 |
+
W_GRID_PENALTY = 0.5
|
| 69 |
+
W_P2P_BONUS = 0.2
|
| 70 |
+
|
| 71 |
+
# Environment Initialization
|
| 72 |
+
cluster_env = make_vec_env(
|
| 73 |
+
data_path=DATA_PATH,
|
| 74 |
+
time_freq="15T",
|
| 75 |
+
cluster_size=detected_cluster_size,
|
| 76 |
+
state=detected_state
|
| 77 |
+
)
|
| 78 |
+
n_clusters = cluster_env.num_envs
|
| 79 |
+
sample_subenv = cluster_env.cluster_envs[0]
|
| 80 |
+
eval_num_steps = sample_subenv.num_steps
|
| 81 |
+
print(f"Number of steps per day: {eval_num_steps}")
|
| 82 |
+
|
| 83 |
+
# Load intra-cluster MAPPO agents
|
| 84 |
+
n_agents_per_cluster = sample_subenv.num_agents
|
| 85 |
+
local_dim = sample_subenv.observation_space.shape[-1]
|
| 86 |
+
global_dim = n_agents_per_cluster * local_dim
|
| 87 |
+
act_dim = sample_subenv.action_space[0].shape[-1]
|
| 88 |
+
|
| 89 |
+
print(f"Creating and loading {n_clusters} independent low-level MAPPO agents...")
|
| 90 |
+
low_agents = []
|
| 91 |
+
for i in range(n_clusters):
|
| 92 |
+
agent = MAPPO(
|
| 93 |
+
n_agents=n_agents_per_cluster,
|
| 94 |
+
local_dim=local_dim,
|
| 95 |
+
global_dim=global_dim,
|
| 96 |
+
act_dim=act_dim,
|
| 97 |
+
lr=2e-4,
|
| 98 |
+
gamma=0.95,
|
| 99 |
+
lam=0.95,
|
| 100 |
+
clip_eps=0.2,
|
| 101 |
+
k_epochs=4,
|
| 102 |
+
batch_size=512,
|
| 103 |
+
episode_len=96
|
| 104 |
+
)
|
| 105 |
+
ckpt_pattern = os.path.join(MODEL_DIR, f"low_cluster{i}_ep*.pth")
|
| 106 |
+
ckpts_low = glob.glob(ckpt_pattern)
|
| 107 |
+
if not ckpts_low:
|
| 108 |
+
raise FileNotFoundError(f"No checkpoint found for cluster {i} with pattern: {ckpt_pattern}")
|
| 109 |
+
latest_low = sorted(ckpts_low, key=lambda x: int(re.search(r'ep(\d+)\.pth$', x).group(1)))[-1]
|
| 110 |
+
print(f"Loading low-level policy for cluster {i} from: {latest_low}")
|
| 111 |
+
agent.load(latest_low)
|
| 112 |
+
agent.actor.eval()
|
| 113 |
+
agent.critic.eval()
|
| 114 |
+
low_agents.append(agent)
|
| 115 |
+
|
| 116 |
+
# Output Folder Setup
|
| 117 |
+
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
| 118 |
+
num_agents = sum(subenv.num_agents for subenv in cluster_env.cluster_envs)
|
| 119 |
+
run_name = f"eval_vectorized_{num_agents}agents_{DAYS_TO_EVALUATE}days_{timestamp}"
|
| 120 |
+
output_folder = os.path.join("runs_final_vectorized_eval", run_name)
|
| 121 |
+
logs_dir = os.path.join(output_folder, "logs")
|
| 122 |
+
plots_dir = os.path.join(output_folder, "plots")
|
| 123 |
+
for d in (logs_dir, plots_dir):
|
| 124 |
+
os.makedirs(d, exist_ok=True)
|
| 125 |
+
print(f"Saving evaluation outputs to: {output_folder}")
|
| 126 |
+
|
| 127 |
+
# Load inter-cluster MAPPO agent
|
| 128 |
+
OBS_DIM_HI_LOCAL = 7
|
| 129 |
+
act_dim_inter = 2
|
| 130 |
+
|
| 131 |
+
# Define the global dimension for the high-level agent
|
| 132 |
+
OBS_DIM_HI_GLOBAL = n_clusters * OBS_DIM_HI_LOCAL
|
| 133 |
+
|
| 134 |
+
print(f"Initializing evaluation inter-agent (MAPPO): n_agents={n_clusters}, "
|
| 135 |
+
f"local_dim={OBS_DIM_HI_LOCAL}, global_dim={OBS_DIM_HI_GLOBAL}, act_dim={act_dim_inter}")
|
| 136 |
+
|
| 137 |
+
# Instantiate MAPPO for inter-cluster coordination
|
| 138 |
+
inter_agent = MAPPO(
|
| 139 |
+
n_agents=n_clusters,
|
| 140 |
+
local_dim=OBS_DIM_HI_LOCAL,
|
| 141 |
+
global_dim=OBS_DIM_HI_GLOBAL,
|
| 142 |
+
act_dim=act_dim_inter,
|
| 143 |
+
lr=2e-4,
|
| 144 |
+
gamma=0.95,
|
| 145 |
+
lam=0.95,
|
| 146 |
+
clip_eps=0.2,
|
| 147 |
+
k_epochs=4,
|
| 148 |
+
batch_size=512,
|
| 149 |
+
episode_len=96
|
| 150 |
+
)
|
| 151 |
+
|
| 152 |
+
ckpts_inter = glob.glob(os.path.join(MODEL_DIR, "inter_ep*.pth"))
|
| 153 |
+
if not ckpts_inter:
|
| 154 |
+
raise FileNotFoundError(f"No high-level checkpoints (inter_ep*.pth) in {MODEL_DIR}")
|
| 155 |
+
latest_inter = sorted(ckpts_inter, key=lambda x: int(re.search(r'ep(\d+)\.pth$', x).group(1)))[-1]
|
| 156 |
+
print("Loading inter-cluster policy from", latest_inter)
|
| 157 |
+
inter_agent.load(latest_inter)
|
| 158 |
+
inter_agent.actor.eval()
|
| 159 |
+
inter_agent.critic.eval()
|
| 160 |
+
|
| 161 |
+
# Instantiate Coordinator
|
| 162 |
+
ledger = InterClusterLedger()
|
| 163 |
+
coordinator = InterClusterCoordinator(
|
| 164 |
+
cluster_env,
|
| 165 |
+
inter_agent,
|
| 166 |
+
ledger,
|
| 167 |
+
max_transfer_kwh=MAX_TRANSFER_KWH,
|
| 168 |
+
w_cost_savings=W_COST_SAVINGS,
|
| 169 |
+
w_grid_penalty=W_GRID_PENALTY,
|
| 170 |
+
w_p2p_bonus=W_P2P_BONUS
|
| 171 |
+
)
|
| 172 |
+
|
| 173 |
+
# Data collectors
|
| 174 |
+
all_logs = []
|
| 175 |
+
daily_summaries = []
|
| 176 |
+
step_timing_list = []
|
| 177 |
+
|
| 178 |
+
# Per-day evaluation
|
| 179 |
+
evaluation_start = time.time()
|
| 180 |
+
for day in range(1, DAYS_TO_EVALUATE + 1):
|
| 181 |
+
obs_clusters, _ = cluster_env.reset()
|
| 182 |
+
done_all = False
|
| 183 |
+
step_count = 0
|
| 184 |
+
day_logs = []
|
| 185 |
+
|
| 186 |
+
while not done_all and step_count < eval_num_steps:
|
| 187 |
+
step_start_time = time.time()
|
| 188 |
+
step_count += 1
|
| 189 |
+
|
| 190 |
+
# Get high-level actions
|
| 191 |
+
inter_cluster_obs_local_list = [coordinator.get_cluster_state(se, step_count) for se in cluster_env.cluster_envs]
|
| 192 |
+
inter_cluster_obs_local = np.array(inter_cluster_obs_local_list)
|
| 193 |
+
|
| 194 |
+
# Create the global state for the high-level agent
|
| 195 |
+
inter_cluster_obs_global = inter_cluster_obs_local.flatten()
|
| 196 |
+
|
| 197 |
+
with torch.no_grad():
|
| 198 |
+
# Call select_action with both local and global states
|
| 199 |
+
high_level_action, _ = inter_agent.select_action(
|
| 200 |
+
inter_cluster_obs_local,
|
| 201 |
+
inter_cluster_obs_global
|
| 202 |
+
)
|
| 203 |
+
|
| 204 |
+
# Build transfers
|
| 205 |
+
current_reports = {i: {'export_capacity': cluster_env.get_export_capacity(i), 'import_capacity': cluster_env.get_import_capacity(i)} for i in range(n_clusters)}
|
| 206 |
+
exports, imports = coordinator.build_transfers(high_level_action, current_reports)
|
| 207 |
+
|
| 208 |
+
# Get low-level actions
|
| 209 |
+
batch_global_obs = obs_clusters.reshape(n_clusters, -1)
|
| 210 |
+
with torch.no_grad():
|
| 211 |
+
low_level_actions_list = []
|
| 212 |
+
for c_idx in range(n_clusters):
|
| 213 |
+
agent = low_agents[c_idx]
|
| 214 |
+
local_obs_cluster = obs_clusters[c_idx]
|
| 215 |
+
global_obs_cluster = batch_global_obs[c_idx]
|
| 216 |
+
actions, _ = agent.select_action(local_obs_cluster, global_obs_cluster)
|
| 217 |
+
low_level_actions_list.append(actions)
|
| 218 |
+
low_level_actions = np.stack(low_level_actions_list)
|
| 219 |
+
|
| 220 |
+
# Step the environment
|
| 221 |
+
next_obs, rewards, done_all, step_info = cluster_env.step(
|
| 222 |
+
low_level_actions, exports=exports, imports=imports
|
| 223 |
+
)
|
| 224 |
+
|
| 225 |
+
# Advance the state
|
| 226 |
+
obs_clusters = next_obs
|
| 227 |
+
|
| 228 |
+
# Timing and console printout
|
| 229 |
+
step_duration = time.time() - step_start_time
|
| 230 |
+
print(f"[Day {day}, Step {step_count}] Step time: {step_duration:.6f} seconds")
|
| 231 |
+
step_timing_list.append({"day": day, "step": step_count, "step_time_s": step_duration})
|
| 232 |
+
|
| 233 |
+
# Consolidated Logging
|
| 234 |
+
infos = step_info.get("cluster_infos")
|
| 235 |
+
for c_idx, subenv in enumerate(cluster_env.cluster_envs):
|
| 236 |
+
grid_price_now = subenv.get_grid_price(step_count - 1)
|
| 237 |
+
peer_price_now = step_info.get("peer_price_global")
|
| 238 |
+
if peer_price_now is None:
|
| 239 |
+
demands_step = subenv.demands_day[step_count-1]
|
| 240 |
+
solars_step = subenv.solars_day[step_count-1]
|
| 241 |
+
surplus = np.maximum(solars_step - demands_step, 0.0).sum()
|
| 242 |
+
shortfall = np.maximum(demands_step - solars_step, 0.0).sum()
|
| 243 |
+
peer_price_now = subenv.get_peer_price(step_count -1, surplus, shortfall)
|
| 244 |
+
|
| 245 |
+
for i, hid in enumerate(subenv.house_ids):
|
| 246 |
+
is_battery_house = hid in subenv.batteries
|
| 247 |
+
charge = infos["charge_amount"][c_idx][i]
|
| 248 |
+
discharge = infos["discharge_amount"][c_idx][i]
|
| 249 |
+
day_logs.append({
|
| 250 |
+
"day": day,
|
| 251 |
+
"step": step_count - 1,
|
| 252 |
+
"house": hid,
|
| 253 |
+
"cluster": c_idx,
|
| 254 |
+
"grid_import_no_p2p": infos["grid_import_no_p2p"][c_idx][i],
|
| 255 |
+
"grid_import_with_p2p": infos["grid_import_with_p2p"][c_idx][i],
|
| 256 |
+
"grid_export": infos["grid_export"][c_idx][i],
|
| 257 |
+
"p2p_buy": infos["p2p_buy"][c_idx][i],
|
| 258 |
+
"p2p_sell": infos["p2p_sell"][c_idx][i],
|
| 259 |
+
"actual_cost": infos["costs"][c_idx][i],
|
| 260 |
+
"baseline_cost": infos["grid_import_no_p2p"][c_idx][i] * grid_price_now,
|
| 261 |
+
"total_demand": subenv.demands_day[step_count-1, i],
|
| 262 |
+
"total_solar": subenv.solars_day[step_count-1, i],
|
| 263 |
+
"grid_price": grid_price_now,
|
| 264 |
+
"peer_price": peer_price_now,
|
| 265 |
+
"soc": (subenv.battery_soc[i] / subenv.battery_max_capacity[i]) if is_battery_house and subenv.battery_max_capacity[i] > 0 else np.nan,
|
| 266 |
+
"degradation_cost": (charge + discharge) * subenv.battery_degradation_cost[i] if is_battery_house else 0.0,
|
| 267 |
+
"reward": infos["agent_rewards"][c_idx][i],
|
| 268 |
+
})
|
| 269 |
+
|
| 270 |
+
step_duration = time.time() - step_start_time
|
| 271 |
+
|
| 272 |
+
# End of day: aggregate & summarize
|
| 273 |
+
df_day = pd.DataFrame(day_logs)
|
| 274 |
+
if df_day.empty:
|
| 275 |
+
continue
|
| 276 |
+
all_logs.extend(day_logs)
|
| 277 |
+
|
| 278 |
+
# Consolidated Daily Summary Calculation
|
| 279 |
+
# Correctly count solar houses from the daily data
|
| 280 |
+
num_solar_houses = df_day[df_day['total_solar'] > 0]['house'].nunique()
|
| 281 |
+
|
| 282 |
+
if num_solar_houses > 0:
|
| 283 |
+
# Get the total number of agents for scaling the threshold
|
| 284 |
+
num_agents_in_day = df_day['house'].nunique()
|
| 285 |
+
|
| 286 |
+
# Calculate aggregate solar generation per step
|
| 287 |
+
agg_solar_per_step = df_day.groupby("step")["total_solar"].sum()
|
| 288 |
+
|
| 289 |
+
# Find steps where aggregate solar exceeds the scaled threshold
|
| 290 |
+
sunny_steps_mask = agg_solar_per_step > (SOLAR_THRESHOLD * num_agents_in_day)
|
| 291 |
+
sunny_steps = sunny_steps_mask[sunny_steps_mask].index
|
| 292 |
+
|
| 293 |
+
# The rest of the calculation remains the same
|
| 294 |
+
trade_df = df_day[df_day["step"].isin(sunny_steps)]
|
| 295 |
+
|
| 296 |
+
grouped_house = df_day.groupby("house").sum(numeric_only=True)
|
| 297 |
+
grouped_step = df_day.groupby("step").sum(numeric_only=True)
|
| 298 |
+
|
| 299 |
+
total_demand = grouped_step["total_demand"].sum()
|
| 300 |
+
total_solar = grouped_step["total_solar"].sum()
|
| 301 |
+
total_p2p_buy = df_day['p2p_buy'].sum()
|
| 302 |
+
total_p2p_sell = df_day['p2p_sell'].sum()
|
| 303 |
+
total_actual_grid_import = df_day['grid_import_with_p2p'].sum()
|
| 304 |
+
|
| 305 |
+
baseline_cost_per_house = grouped_house["baseline_cost"]
|
| 306 |
+
actual_cost_per_house = grouped_house["actual_cost"]
|
| 307 |
+
cost_savings_per_house = baseline_cost_per_house - actual_cost_per_house
|
| 308 |
+
day_total_cost_savings = cost_savings_per_house.sum()
|
| 309 |
+
|
| 310 |
+
if baseline_cost_per_house.sum() > 0:
|
| 311 |
+
overall_cost_savings_pct = day_total_cost_savings / baseline_cost_per_house.sum()
|
| 312 |
+
else:
|
| 313 |
+
overall_cost_savings_pct = 0.0
|
| 314 |
+
|
| 315 |
+
baseline_import_per_house = grouped_house["grid_import_no_p2p"]
|
| 316 |
+
actual_import_per_house = grouped_house["grid_import_with_p2p"]
|
| 317 |
+
import_reduction_per_house = baseline_import_per_house - actual_import_per_house
|
| 318 |
+
day_total_import_reduction = import_reduction_per_house.sum()
|
| 319 |
+
|
| 320 |
+
if baseline_import_per_house.sum() > 0:
|
| 321 |
+
overall_import_reduction_pct = day_total_import_reduction / baseline_import_per_house.sum()
|
| 322 |
+
else:
|
| 323 |
+
overall_import_reduction_pct = 0.0
|
| 324 |
+
|
| 325 |
+
fairness_cost_savings = compute_jains_fairness(cost_savings_per_house.values)
|
| 326 |
+
fairness_import_reduction = compute_jains_fairness(import_reduction_per_house.values)
|
| 327 |
+
fairness_rewards = compute_jains_fairness(grouped_house["reward"].values)
|
| 328 |
+
fairness_p2p_buy = compute_jains_fairness(grouped_house["p2p_buy"].values)
|
| 329 |
+
fairness_p2p_sell = compute_jains_fairness(grouped_house["p2p_sell"].values)
|
| 330 |
+
fairness_p2p_total = compute_jains_fairness((grouped_house["p2p_buy"] + grouped_house["p2p_sell"]).values)
|
| 331 |
+
|
| 332 |
+
daily_summaries.append({
|
| 333 |
+
"day": day,
|
| 334 |
+
"day_total_demand": total_demand,
|
| 335 |
+
"day_total_solar": total_solar,
|
| 336 |
+
"day_p2p_buy": total_p2p_buy,
|
| 337 |
+
"day_p2p_sell": total_p2p_sell,
|
| 338 |
+
"cost_savings_abs": day_total_cost_savings,
|
| 339 |
+
"cost_savings_pct": overall_cost_savings_pct,
|
| 340 |
+
"fairness_cost_savings": fairness_cost_savings,
|
| 341 |
+
"grid_reduction_abs": day_total_import_reduction,
|
| 342 |
+
"grid_reduction_pct": overall_import_reduction_pct,
|
| 343 |
+
"fairness_grid_reduction": fairness_import_reduction,
|
| 344 |
+
"fairness_reward": fairness_rewards,
|
| 345 |
+
"fairness_p2p_buy": fairness_p2p_buy,
|
| 346 |
+
"fairness_p2p_sell": fairness_p2p_sell,
|
| 347 |
+
"fairness_p2p_total": fairness_p2p_total,
|
| 348 |
+
})
|
| 349 |
+
|
| 350 |
+
# Final Processing and Saving
|
| 351 |
+
evaluation_end = time.time()
|
| 352 |
+
total_eval_time = evaluation_end - evaluation_start
|
| 353 |
+
print(f"\nEvaluation loop finished. Total time: {total_eval_time:.2f} seconds.")
|
| 354 |
+
|
| 355 |
+
all_days_df = pd.DataFrame(all_logs)
|
| 356 |
+
if not all_days_df.empty:
|
| 357 |
+
# Save step-level logs
|
| 358 |
+
combined_csv_path = os.path.join(logs_dir, "step_logs_all_days.csv")
|
| 359 |
+
all_days_df.to_csv(combined_csv_path, index=False)
|
| 360 |
+
print(f"Saved combined step-level logs to: {combined_csv_path}")
|
| 361 |
+
|
| 362 |
+
# Save timing logs
|
| 363 |
+
step_timing_df = pd.DataFrame(step_timing_list)
|
| 364 |
+
timing_csv_path = os.path.join(logs_dir, "step_timing_log.csv")
|
| 365 |
+
step_timing_df.to_csv(timing_csv_path, index=False)
|
| 366 |
+
print(f"Saved step timing logs to: {timing_csv_path}")
|
| 367 |
+
|
| 368 |
+
# Save house-level summary
|
| 369 |
+
house_level_df = all_days_df.groupby("house").agg({
|
| 370 |
+
"baseline_cost": "sum",
|
| 371 |
+
"actual_cost": "sum",
|
| 372 |
+
"grid_import_no_p2p": "sum",
|
| 373 |
+
"grid_import_with_p2p": "sum",
|
| 374 |
+
"degradation_cost": "sum"
|
| 375 |
+
})
|
| 376 |
+
house_level_df["cost_savings"] = house_level_df["baseline_cost"] - house_level_df["actual_cost"]
|
| 377 |
+
house_level_df["import_reduction"] = house_level_df["grid_import_no_p2p"] - house_level_df["grid_import_with_p2p"]
|
| 378 |
+
house_summary_csv = os.path.join(logs_dir, "summary_per_house.csv")
|
| 379 |
+
house_level_df.to_csv(house_summary_csv)
|
| 380 |
+
print(f"Saved final summary per house to: {house_summary_csv}")
|
| 381 |
+
|
| 382 |
+
# Calculate Final Summary Metrics
|
| 383 |
+
daily_summary_df = pd.DataFrame(daily_summaries)
|
| 384 |
+
|
| 385 |
+
fairness_grid_all = compute_jains_fairness(house_level_df["import_reduction"].values)
|
| 386 |
+
fairness_cost_all = compute_jains_fairness(house_level_df["cost_savings"].values)
|
| 387 |
+
|
| 388 |
+
total_cost_savings_all = daily_summary_df["cost_savings_abs"].sum()
|
| 389 |
+
total_baseline_cost_all = all_days_df.groupby('day')['baseline_cost'].sum().sum()
|
| 390 |
+
pct_cost_savings_all = total_cost_savings_all / total_baseline_cost_all if total_baseline_cost_all > 0 else 0.0
|
| 391 |
+
|
| 392 |
+
total_grid_reduction_all = daily_summary_df["grid_reduction_abs"].sum()
|
| 393 |
+
total_baseline_import_all = all_days_df.groupby('day')['grid_import_no_p2p'].sum().sum()
|
| 394 |
+
pct_grid_reduction_all = total_grid_reduction_all / total_baseline_import_all if total_baseline_import_all > 0 else 0.0
|
| 395 |
+
|
| 396 |
+
total_degradation_cost_all = all_days_df["degradation_cost"].sum()
|
| 397 |
+
|
| 398 |
+
# Calculate Alternative Performance Metrics
|
| 399 |
+
agg_solar_per_step = all_days_df.groupby(['day', 'step'])['total_solar'].sum()
|
| 400 |
+
num_agents_total = len(all_days_df['house'].unique())
|
| 401 |
+
sunny_steps_mask = agg_solar_per_step > (SOLAR_THRESHOLD * num_agents_total)
|
| 402 |
+
sunny_df = all_days_df[all_days_df.set_index(['day', 'step']).index.isin(sunny_steps_mask[sunny_steps_mask].index)]
|
| 403 |
+
|
| 404 |
+
baseline_import_sunny = sunny_df['grid_import_no_p2p'].sum()
|
| 405 |
+
actual_import_sunny = sunny_df['grid_import_with_p2p'].sum()
|
| 406 |
+
grid_reduction_sunny_pct = (baseline_import_sunny - actual_import_sunny) / baseline_import_sunny if baseline_import_sunny > 0 else 0.0
|
| 407 |
+
|
| 408 |
+
total_p2p_buy = all_days_df['p2p_buy'].sum()
|
| 409 |
+
total_actual_grid_import = all_days_df['grid_import_with_p2p'].sum()
|
| 410 |
+
community_sourcing_rate_pct = total_p2p_buy / (total_p2p_buy + total_actual_grid_import) if (total_p2p_buy + total_actual_grid_import) > 0 else 0.0
|
| 411 |
+
|
| 412 |
+
total_p2p_sell = all_days_df['p2p_sell'].sum()
|
| 413 |
+
total_grid_export = all_days_df['grid_export'].sum()
|
| 414 |
+
solar_sharing_efficiency_pct = total_p2p_sell / (total_p2p_sell + total_grid_export) if (total_p2p_sell + total_grid_export) > 0 else 0.0
|
| 415 |
+
|
| 416 |
+
# Calculate cost savings in sunny hours
|
| 417 |
+
baseline_cost_sunny = sunny_df['baseline_cost'].sum()
|
| 418 |
+
actual_cost_sunny = sunny_df['actual_cost'].sum()
|
| 419 |
+
cost_savings_sunny_pct = (baseline_cost_sunny - actual_cost_sunny) / baseline_cost_sunny if baseline_cost_sunny > 0 else 0.0
|
| 420 |
+
|
| 421 |
+
# Create and Save Final Summary CSV
|
| 422 |
+
final_row = {
|
| 423 |
+
"day": "ALL_DAYS_SUMMARY",
|
| 424 |
+
"cost_savings_abs": total_cost_savings_all,
|
| 425 |
+
"cost_savings_pct": pct_cost_savings_all,
|
| 426 |
+
"grid_reduction_abs": total_grid_reduction_all,
|
| 427 |
+
"grid_reduction_pct": pct_grid_reduction_all,
|
| 428 |
+
"fairness_cost_savings": fairness_cost_all,
|
| 429 |
+
"fairness_grid_reduction": fairness_grid_all,
|
| 430 |
+
"total_degradation_cost": total_degradation_cost_all,
|
| 431 |
+
"grid_reduction_sunny_hours_pct": grid_reduction_sunny_pct,
|
| 432 |
+
"community_sourcing_rate_pct": community_sourcing_rate_pct,
|
| 433 |
+
"solar_sharing_efficiency_pct": solar_sharing_efficiency_pct,
|
| 434 |
+
}
|
| 435 |
+
final_row_df = pd.DataFrame([final_row])
|
| 436 |
+
|
| 437 |
+
# Ensure daily summary has columns before concatenating
|
| 438 |
+
if not daily_summary_df.empty:
|
| 439 |
+
daily_summary_df = pd.concat([daily_summary_df, final_row_df], ignore_index=True)
|
| 440 |
+
|
| 441 |
+
summary_csv = os.path.join(logs_dir, "summary_per_day.csv")
|
| 442 |
+
daily_summary_df.to_csv(summary_csv, index=False)
|
| 443 |
+
print(f"Saved day-level summary with final multi-day row to: {summary_csv}")
|
| 444 |
+
|
| 445 |
+
# Final Printout
|
| 446 |
+
print("\n================== EVALUATION SUMMARY ==================")
|
| 447 |
+
print(f"Evaluation finished for {DAYS_TO_EVALUATE} days.\n")
|
| 448 |
+
print("--- Standard Metrics (24-Hour Average) ---")
|
| 449 |
+
print(f"Total grid reduction: {total_grid_reduction_all:.2f} kWh ({pct_grid_reduction_all:.2%})")
|
| 450 |
+
print(f"Total cost savings: ${total_cost_savings_all:.2f} ({pct_cost_savings_all:.2%})")
|
| 451 |
+
print(f"Jain's fairness on grid reduction: {fairness_grid_all:.3f}")
|
| 452 |
+
print(f"Jain's fairness on cost savings: {fairness_cost_all:.3f}\n")
|
| 453 |
+
print("--- Alternative Metrics (Highlighting Peak Performance) ---")
|
| 454 |
+
print(f"Grid reduction during solar hours: {grid_reduction_sunny_pct:.2%}")
|
| 455 |
+
print(f"Cost savings during solar hours: {cost_savings_sunny_pct:.2%}")
|
| 456 |
+
print(f"Community sourcing rate: {community_sourcing_rate_pct:.2%}")
|
| 457 |
+
print(f"Solar sharing efficiency: {solar_sharing_efficiency_pct:.2%}")
|
| 458 |
+
print("=========================================================")
|
| 459 |
+
|
| 460 |
+
# Generate Plots
|
| 461 |
+
# Create a clean version of the daily summary for plotting, excluding the final summary row
|
| 462 |
+
plot_daily_df = daily_summary_df[daily_summary_df["day"] != "ALL_DAYS_SUMMARY"].copy()
|
| 463 |
+
plot_daily_df["day"] = plot_daily_df["day"].astype(int)
|
| 464 |
+
|
| 465 |
+
# Plot 1: Daily Cost Savings Percentage
|
| 466 |
+
plt.figure(figsize=(12, 6))
|
| 467 |
+
plt.bar(plot_daily_df["day"], plot_daily_df["cost_savings_pct"] * 100, color='skyblue')
|
| 468 |
+
plt.xlabel("Day")
|
| 469 |
+
plt.ylabel("Cost Savings (%)")
|
| 470 |
+
plt.title("Daily Community Cost Savings Percentage")
|
| 471 |
+
plt.xticks(plot_daily_df["day"])
|
| 472 |
+
plt.grid(axis='y', linestyle='--', alpha=0.7)
|
| 473 |
+
plt.savefig(os.path.join(plots_dir, "daily_cost_savings_percentage.png"))
|
| 474 |
+
plt.close()
|
| 475 |
+
|
| 476 |
+
# Plot 2: Daily Total Demand vs. Solar
|
| 477 |
+
plt.figure(figsize=(12, 6))
|
| 478 |
+
bar_width = 0.4
|
| 479 |
+
days = plot_daily_df["day"]
|
| 480 |
+
plt.bar(days - bar_width/2, plot_daily_df["day_total_demand"], width=bar_width, label="Total Demand", color='coral')
|
| 481 |
+
plt.bar(days + bar_width/2, plot_daily_df["day_total_solar"], width=bar_width, label="Total Solar Generation", color='gold')
|
| 482 |
+
plt.xlabel("Day")
|
| 483 |
+
plt.ylabel("Energy (kWh)")
|
| 484 |
+
plt.title("Total Community Demand vs. Solar Generation Per Day")
|
| 485 |
+
plt.xticks(days)
|
| 486 |
+
plt.legend()
|
| 487 |
+
plt.grid(axis='y', linestyle='--', alpha=0.7)
|
| 488 |
+
plt.savefig(os.path.join(plots_dir, "daily_demand_vs_solar.png"))
|
| 489 |
+
plt.close()
|
| 490 |
+
|
| 491 |
+
# Plot 3: Combined Time Series of Energy Flows
|
| 492 |
+
# Aggregate data by global step across all days
|
| 493 |
+
step_group = all_days_df.groupby(["day", "step"]).sum(numeric_only=True).reset_index()
|
| 494 |
+
step_group["global_step"] = (step_group["day"] - 1) * eval_num_steps + step_group["step"]
|
| 495 |
+
fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(15, 12), sharex=True)
|
| 496 |
+
|
| 497 |
+
# Subplot 1: Grid Import vs P2P Buy
|
| 498 |
+
ax1.plot(step_group["global_step"], step_group["grid_import_with_p2p"], label="Grid Import (with P2P)", color='r')
|
| 499 |
+
ax1.plot(step_group["global_step"], step_group["p2p_buy"], label="P2P Buy", color='g')
|
| 500 |
+
ax1.set_ylabel("Energy (kWh)")
|
| 501 |
+
ax1.set_title("Community Energy Consumption: Grid Import vs. P2P Buy")
|
| 502 |
+
ax1.legend()
|
| 503 |
+
ax1.grid(True, linestyle='--', alpha=0.6)
|
| 504 |
+
|
| 505 |
+
# Subplot 2: Grid Export vs P2P Sell
|
| 506 |
+
ax2.plot(step_group["global_step"], step_group["grid_export"], label="Grid Export", color='orange')
|
| 507 |
+
ax2.plot(step_group["global_step"], step_group["p2p_sell"], label="P2P Sell", color='b')
|
| 508 |
+
ax2.set_xlabel("Global Timestep")
|
| 509 |
+
ax2.set_ylabel("Energy (kWh)")
|
| 510 |
+
ax2.set_title("Community Energy Generation: Grid Export vs. P2P Sell")
|
| 511 |
+
ax2.legend()
|
| 512 |
+
ax2.grid(True, linestyle='--', alpha=0.6)
|
| 513 |
+
|
| 514 |
+
plt.tight_layout()
|
| 515 |
+
plt.savefig(os.path.join(plots_dir, "combined_energy_flows_timeseries.png"))
|
| 516 |
+
plt.close()
|
| 517 |
+
|
| 518 |
+
# Plot 4: Stacked Bar of Daily Energy Sources
|
| 519 |
+
# Shows how the community's baseline grid import is met by actual grid import vs. P2P trading
|
| 520 |
+
daily_agg = all_days_df.groupby("day").sum(numeric_only=True)
|
| 521 |
+
|
| 522 |
+
plt.figure(figsize=(12, 7))
|
| 523 |
+
plt.bar(daily_agg.index, daily_agg["grid_import_with_p2p"], label="Grid Import (with P2P)", color='crimson')
|
| 524 |
+
plt.bar(daily_agg.index, daily_agg["p2p_buy"], bottom=daily_agg["grid_import_with_p2p"], label="P2P Buy", color='limegreen')
|
| 525 |
+
plt.plot(daily_agg.index, daily_agg["grid_import_no_p2p"], label="Baseline Grid Import (No P2P)", color='blue', linestyle='--', marker='o')
|
| 526 |
+
|
| 527 |
+
plt.xlabel("Day")
|
| 528 |
+
plt.ylabel("Energy (kWh)")
|
| 529 |
+
plt.title("Daily Energy Procurement: Baseline vs. P2P+Grid")
|
| 530 |
+
plt.xticks(daily_agg.index)
|
| 531 |
+
plt.legend()
|
| 532 |
+
plt.grid(axis='y', linestyle='--', alpha=0.7)
|
| 533 |
+
plt.savefig(os.path.join(plots_dir, "daily_energy_procurement_stacked.png"))
|
| 534 |
+
plt.close()
|
| 535 |
+
|
| 536 |
+
# Plot 5: Fairness Metrics Over Time
|
| 537 |
+
plt.figure(figsize=(12, 6))
|
| 538 |
+
plt.plot(plot_daily_df["day"], plot_daily_df["fairness_cost_savings"], label="Cost Savings Fairness", marker='o')
|
| 539 |
+
plt.plot(plot_daily_df["day"], plot_daily_df["fairness_grid_reduction"], label="Grid Reduction Fairness", marker='s')
|
| 540 |
+
plt.plot(plot_daily_df["day"], plot_daily_df["fairness_reward"], label="Reward Fairness", marker='^')
|
| 541 |
+
plt.xlabel("Day")
|
| 542 |
+
plt.ylabel("Jain's Fairness Index")
|
| 543 |
+
plt.title("Daily Fairness Metrics")
|
| 544 |
+
plt.xticks(plot_daily_df["day"])
|
| 545 |
+
plt.ylim(0, 1.05)
|
| 546 |
+
plt.legend()
|
| 547 |
+
plt.grid(True, linestyle='--', alpha=0.7)
|
| 548 |
+
plt.savefig(os.path.join(plots_dir, "daily_fairness_metrics.png"))
|
| 549 |
+
plt.close()
|
| 550 |
+
|
| 551 |
+
# Plot 6: Per-House Savings and Reductions
|
| 552 |
+
# Uses the house_level_df which summarizes stats over all evaluated days
|
| 553 |
+
fig, ax1 = plt.subplots(figsize=(15, 7))
|
| 554 |
+
|
| 555 |
+
house_ids_str = house_level_df.index.astype(str)
|
| 556 |
+
bar_width = 0.4
|
| 557 |
+
index = np.arange(len(house_ids_str))
|
| 558 |
+
|
| 559 |
+
# Bar chart for cost savings
|
| 560 |
+
color1 = 'tab:green'
|
| 561 |
+
ax1.set_xlabel('House ID')
|
| 562 |
+
ax1.set_ylabel('Total Cost Savings ($)', color=color1)
|
| 563 |
+
ax1.bar(index - bar_width/2, house_level_df["cost_savings"], bar_width, label='Cost Savings', color=color1)
|
| 564 |
+
ax1.tick_params(axis='y', labelcolor=color1)
|
| 565 |
+
ax1.set_xticks(index)
|
| 566 |
+
ax1.set_xticklabels(house_ids_str, rotation=45, ha="right")
|
| 567 |
+
|
| 568 |
+
# Instantiate a second y-axis for grid import reduction
|
| 569 |
+
ax2 = ax1.twinx()
|
| 570 |
+
color2 = 'tab:blue'
|
| 571 |
+
ax2.set_ylabel('Total Grid Import Reduction (kWh)', color=color2)
|
| 572 |
+
ax2.bar(index + bar_width/2, house_level_df["import_reduction"], bar_width, label='Import Reduction', color=color2)
|
| 573 |
+
ax2.tick_params(axis='y', labelcolor=color2)
|
| 574 |
+
|
| 575 |
+
plt.title(f'Total Cost Savings & Grid Import Reduction Per House (over {DAYS_TO_EVALUATE} days)')
|
| 576 |
+
|
| 577 |
+
fig.tight_layout()
|
| 578 |
+
plt.savefig(os.path.join(plots_dir, "per_house_summary.png"))
|
| 579 |
+
plt.close()
|
| 580 |
+
|
| 581 |
+
# Plot 7: Price Dynamics for a Single Day
|
| 582 |
+
# Visualize the prices the agents see on the first day of evaluation
|
| 583 |
+
day1_prices = all_days_df[all_days_df['day'] == 1][['step', 'grid_price', 'peer_price']].drop_duplicates()
|
| 584 |
+
plt.figure(figsize=(12, 6))
|
| 585 |
+
plt.plot(day1_prices['step'], day1_prices['grid_price'], label='Grid Price', color='darkorange')
|
| 586 |
+
plt.plot(day1_prices['step'], day1_prices['peer_price'], label='P2P Price', color='teal')
|
| 587 |
+
plt.xlabel("Timestep of Day")
|
| 588 |
+
plt.ylabel("Price ($/kWh)")
|
| 589 |
+
plt.title("Price Dynamics on Day 1")
|
| 590 |
+
plt.legend()
|
| 591 |
+
plt.grid(True, linestyle='--', alpha=0.6)
|
| 592 |
+
plt.savefig(os.path.join(plots_dir, "price_dynamics_day1.png"))
|
| 593 |
+
plt.close()
|
| 594 |
+
|
| 595 |
+
# Plot 8: Battery State of Charge (SoC) for a Sample of Houses
|
| 596 |
+
day1_df = all_days_df[all_days_df['day'] == 1]
|
| 597 |
+
battery_houses = day1_df.dropna(subset=['soc'])['house'].unique()
|
| 598 |
+
|
| 599 |
+
if len(battery_houses) > 0:
|
| 600 |
+
sample_houses = battery_houses[:min(4, len(battery_houses))] # Plot up to 4 houses
|
| 601 |
+
plt.figure(figsize=(12, 6))
|
| 602 |
+
for house in sample_houses:
|
| 603 |
+
house_df = day1_df[day1_df['house'] == house]
|
| 604 |
+
plt.plot(house_df['step'], house_df['soc'] * 100, label=f'House {house}')
|
| 605 |
+
|
| 606 |
+
plt.xlabel("Timestep of Day")
|
| 607 |
+
plt.ylabel("State of Charge (%)")
|
| 608 |
+
plt.title("Battery SoC on Day 1 for Sample Houses")
|
| 609 |
+
plt.legend()
|
| 610 |
+
plt.grid(True, linestyle='--', alpha=0.6)
|
| 611 |
+
plt.savefig(os.path.join(plots_dir, "soc_dynamics_day1.png"))
|
| 612 |
+
plt.close()
|
| 613 |
+
|
| 614 |
+
print("All plots have been generated and saved. Evaluation complete.")
|
| 615 |
+
|
| 616 |
+
|
| 617 |
+
if __name__ == "__main__":
|
| 618 |
+
main()
|
Other_algorithms/HC_MAPPO/HC_MAPPO_train.py
ADDED
|
@@ -0,0 +1,579 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import sys
|
| 3 |
+
import time
|
| 4 |
+
from datetime import datetime, timedelta
|
| 5 |
+
import re
|
| 6 |
+
|
| 7 |
+
import numpy as np
|
| 8 |
+
import torch
|
| 9 |
+
import pandas as pd
|
| 10 |
+
import matplotlib.pyplot as plt
|
| 11 |
+
|
| 12 |
+
# Allow imports from project root
|
| 13 |
+
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
|
| 14 |
+
|
| 15 |
+
from cluster import InterClusterCoordinator, InterClusterLedger
|
| 16 |
+
from Environment.cluster_env_wrapper import make_vec_env
|
| 17 |
+
from mappo.trainer.mappo import MAPPO
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
def recursive_sum(item):
|
| 21 |
+
total = 0
|
| 22 |
+
# Check if the item is a list, array, or other iterable, but not a string
|
| 23 |
+
if hasattr(item, '__iter__') and not isinstance(item, str):
|
| 24 |
+
for sub_item in item:
|
| 25 |
+
total += recursive_sum(sub_item)
|
| 26 |
+
# If it's a single number, just add it
|
| 27 |
+
elif np.isreal(item):
|
| 28 |
+
total += item
|
| 29 |
+
# Ignore any non-numeric, non-iterable items
|
| 30 |
+
return total
|
| 31 |
+
|
| 32 |
+
|
| 33 |
+
def main():
|
| 34 |
+
overall_start_time = time.time()
|
| 35 |
+
|
| 36 |
+
# Training Configuration Parameters
|
| 37 |
+
STATE_TO_RUN = "oklahoma" # or "colorado", "oklahoma"
|
| 38 |
+
DATA_PATH = "data/training/1000houses_152days_TRAIN.csv"
|
| 39 |
+
|
| 40 |
+
# Dynamically extract the number of agents from the file path
|
| 41 |
+
match = re.search(r'(\d+)houses', DATA_PATH)
|
| 42 |
+
if not match:
|
| 43 |
+
raise ValueError("Could not extract the number of houses from DATA_PATH.")
|
| 44 |
+
NUMBER_OF_AGENTS = int(match.group(1))
|
| 45 |
+
|
| 46 |
+
CLUSTER_SIZE = 10
|
| 47 |
+
NUM_EPISODES = 10000
|
| 48 |
+
BATCH_SIZE = 256
|
| 49 |
+
CHECKPOINT_INTERVAL = 100000 # Reduced for more frequent saving during testing
|
| 50 |
+
WINDOW_SIZE = 80
|
| 51 |
+
MAX_TRANSFER_KWH = 100000
|
| 52 |
+
|
| 53 |
+
LR = 2e-4
|
| 54 |
+
GAMMA = 0.95
|
| 55 |
+
LAMBDA = 0.95
|
| 56 |
+
CLIP_EPS = 0.2
|
| 57 |
+
K_EPOCHS = 4
|
| 58 |
+
|
| 59 |
+
JOINT_TRAINING_START_EPISODE = 2000
|
| 60 |
+
FREEZE_HIGH_FOR_EPISODES = 20
|
| 61 |
+
FREEZE_LOW_FOR_EPISODES = 10
|
| 62 |
+
|
| 63 |
+
# Build run directories
|
| 64 |
+
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
| 65 |
+
run_name = f"hierarchical_{STATE_TO_RUN}_{NUMBER_OF_AGENTS}agents_" \
|
| 66 |
+
f"{CLUSTER_SIZE}size_{NUM_EPISODES}eps_{timestamp}"
|
| 67 |
+
root_dir = os.path.join("FINALE_FINALE_FINALE", run_name) # New folder for new runs
|
| 68 |
+
models_dir = os.path.join(root_dir, "models")
|
| 69 |
+
logs_dir = os.path.join(root_dir, "logs")
|
| 70 |
+
plots_dir = os.path.join(root_dir, "plots")
|
| 71 |
+
|
| 72 |
+
for d in (models_dir, logs_dir, plots_dir):
|
| 73 |
+
os.makedirs(d, exist_ok=True)
|
| 74 |
+
print(f"Logging to: {root_dir}")
|
| 75 |
+
|
| 76 |
+
# Environment & Agent Initialization
|
| 77 |
+
|
| 78 |
+
# Instantiate the environment using vectorized environment factory function
|
| 79 |
+
# This single call replaces the manual creation of base_env and ClusterEnvWrapper
|
| 80 |
+
cluster_env = make_vec_env(
|
| 81 |
+
data_path=DATA_PATH,
|
| 82 |
+
time_freq="15T",
|
| 83 |
+
cluster_size=CLUSTER_SIZE,
|
| 84 |
+
state=STATE_TO_RUN
|
| 85 |
+
)
|
| 86 |
+
|
| 87 |
+
# Get environment parameters from the vectorized environment object
|
| 88 |
+
n_clusters = cluster_env.num_envs
|
| 89 |
+
sample_subenv = cluster_env.cluster_envs[0] # Access a sample sub-env
|
| 90 |
+
n_agents_per_cluster = sample_subenv.num_agents
|
| 91 |
+
|
| 92 |
+
local_dim = sample_subenv.observation_space.shape[-1]
|
| 93 |
+
global_dim = n_agents_per_cluster * local_dim
|
| 94 |
+
# Access the action dim from the first part of the Tuple action space
|
| 95 |
+
act_dim = sample_subenv.action_space[0].shape[-1]
|
| 96 |
+
# The total number of transitions collected each episode is (steps_per_day * num_clusters)
|
| 97 |
+
total_buffer_size = sample_subenv.num_steps * n_clusters
|
| 98 |
+
print(f"Low-level agent buffer size set to: {total_buffer_size}")
|
| 99 |
+
|
| 100 |
+
print(f"Created {n_clusters} clusters.")
|
| 101 |
+
print(f"Shared low-level agent: {n_agents_per_cluster} agents per cluster, "
|
| 102 |
+
f"obs_dim={local_dim}, global_dim={global_dim}, act_dim={act_dim}")
|
| 103 |
+
|
| 104 |
+
print(f"Creating {n_clusters} independent low-level MAPPO agents...")
|
| 105 |
+
low_agents = []
|
| 106 |
+
for i in range(n_clusters):
|
| 107 |
+
# Each agent's buffer only needs to be as long as one episode day
|
| 108 |
+
agent_buffer_size = sample_subenv.num_steps
|
| 109 |
+
|
| 110 |
+
agent = MAPPO(
|
| 111 |
+
n_agents=n_agents_per_cluster,
|
| 112 |
+
local_dim=local_dim,
|
| 113 |
+
global_dim=global_dim,
|
| 114 |
+
act_dim=act_dim,
|
| 115 |
+
lr=LR,
|
| 116 |
+
gamma=GAMMA,
|
| 117 |
+
lam=LAMBDA,
|
| 118 |
+
clip_eps=CLIP_EPS,
|
| 119 |
+
k_epochs=K_EPOCHS,
|
| 120 |
+
batch_size=BATCH_SIZE,
|
| 121 |
+
episode_len=agent_buffer_size
|
| 122 |
+
)
|
| 123 |
+
low_agents.append(agent)
|
| 124 |
+
|
| 125 |
+
# Define dimensions for the high-level MAPPO agent
|
| 126 |
+
OBS_DIM_HI_LOCAL = 7 # Each cluster has 7 features for its local state
|
| 127 |
+
act_dim_inter = 2 # Export/Import preference for each cluster
|
| 128 |
+
|
| 129 |
+
# The global state for the high-level agent is the concatenation
|
| 130 |
+
# of all high-level local states
|
| 131 |
+
OBS_DIM_HI_GLOBAL = n_clusters * OBS_DIM_HI_LOCAL
|
| 132 |
+
|
| 133 |
+
print(f"Inter-cluster agent (MAPPO): n_agents={n_clusters}, "
|
| 134 |
+
f"local_dim={OBS_DIM_HI_LOCAL}, global_dim={OBS_DIM_HI_GLOBAL}, act_dim={act_dim_inter}")
|
| 135 |
+
|
| 136 |
+
# Instantiate MAPPO for the inter-cluster agent
|
| 137 |
+
inter_agent = MAPPO(
|
| 138 |
+
n_agents=n_clusters,
|
| 139 |
+
local_dim=OBS_DIM_HI_LOCAL,
|
| 140 |
+
global_dim=OBS_DIM_HI_GLOBAL,
|
| 141 |
+
act_dim=act_dim_inter,
|
| 142 |
+
lr=LR,
|
| 143 |
+
gamma=GAMMA,
|
| 144 |
+
lam=LAMBDA,
|
| 145 |
+
clip_eps=CLIP_EPS,
|
| 146 |
+
k_epochs=K_EPOCHS,
|
| 147 |
+
batch_size=BATCH_SIZE,
|
| 148 |
+
episode_len=sample_subenv.num_steps
|
| 149 |
+
)
|
| 150 |
+
|
| 151 |
+
ledger = InterClusterLedger()
|
| 152 |
+
coordinator = InterClusterCoordinator(
|
| 153 |
+
cluster_env,
|
| 154 |
+
inter_agent,
|
| 155 |
+
ledger,
|
| 156 |
+
max_transfer_kwh=MAX_TRANSFER_KWH
|
| 157 |
+
)
|
| 158 |
+
|
| 159 |
+
# Training loop
|
| 160 |
+
total_steps = 0
|
| 161 |
+
episode_log_data = []
|
| 162 |
+
performance_metrics_log = []
|
| 163 |
+
intra_log = {}
|
| 164 |
+
inter_log = {}
|
| 165 |
+
total_log = {}
|
| 166 |
+
cost_log = {}
|
| 167 |
+
|
| 168 |
+
for ep in range(1, NUM_EPISODES + 1):
|
| 169 |
+
step_count = 0
|
| 170 |
+
start_time = time.time()
|
| 171 |
+
ep_total_inter_cluster_reward = 0.0
|
| 172 |
+
day_logs = []
|
| 173 |
+
|
| 174 |
+
obs_clusters, _ = cluster_env.reset()
|
| 175 |
+
|
| 176 |
+
if ep > 1:
|
| 177 |
+
# For vectorized envs, call is the right way to invoke a method on all sub-envs
|
| 178 |
+
# This returns a list of dictionaries, one from each cluster env
|
| 179 |
+
all_cluster_metrics = cluster_env.call('get_episode_metrics')
|
| 180 |
+
|
| 181 |
+
# Aggregate the metrics from all clusters into a single system-wide summary
|
| 182 |
+
system_metrics = {
|
| 183 |
+
"grid_reduction_entire_day": sum(m["grid_reduction_entire_day"] for m in all_cluster_metrics),
|
| 184 |
+
"grid_reduction_peak_hours": sum(m["grid_reduction_peak_hours"] for m in all_cluster_metrics),
|
| 185 |
+
"total_cost_savings": sum(m["total_cost_savings"] for m in all_cluster_metrics),
|
| 186 |
+
"battery_degradation_cost_total": sum(m["battery_degradation_cost_total"] for m in all_cluster_metrics),
|
| 187 |
+
# For fairness, we average the fairness index across clusters
|
| 188 |
+
"fairness_on_cost_savings": np.mean([m["fairness_on_cost_savings"] for m in all_cluster_metrics]),
|
| 189 |
+
"Episode": ep - 1 # Associate with the episode that just finished
|
| 190 |
+
}
|
| 191 |
+
|
| 192 |
+
# Append the aggregated dictionary to our log
|
| 193 |
+
performance_metrics_log.append(system_metrics)
|
| 194 |
+
|
| 195 |
+
# Use a single 'done' flag for the episode
|
| 196 |
+
done_all = False
|
| 197 |
+
|
| 198 |
+
# Initialize rewards and costs
|
| 199 |
+
cluster_rewards = np.zeros((n_clusters, n_agents_per_cluster), dtype=np.float32)
|
| 200 |
+
total_cost = 0.0
|
| 201 |
+
total_grid_import = 0.0
|
| 202 |
+
|
| 203 |
+
# Determine training phase
|
| 204 |
+
is_phase_1 = ep < JOINT_TRAINING_START_EPISODE
|
| 205 |
+
|
| 206 |
+
if ep == 1:
|
| 207 |
+
print(f"\n--- Starting Phase 1: Training Low-Level Agent Only (up to ep {JOINT_TRAINING_START_EPISODE-1}) ---")
|
| 208 |
+
if ep == JOINT_TRAINING_START_EPISODE:
|
| 209 |
+
print(f"\n--- Starting Phase 2: Joint Hierarchical Training (from ep {JOINT_TRAINING_START_EPISODE}) ---")
|
| 210 |
+
|
| 211 |
+
# The main loop continues as long as the episode is not done
|
| 212 |
+
while not done_all:
|
| 213 |
+
total_steps += 1
|
| 214 |
+
step_count += 1
|
| 215 |
+
|
| 216 |
+
# Action Selection (Low-Level)
|
| 217 |
+
batch_global_obs = obs_clusters.reshape(n_clusters, -1)
|
| 218 |
+
low_level_actions_list = []
|
| 219 |
+
low_level_logps_list = []
|
| 220 |
+
|
| 221 |
+
for c_idx in range(n_clusters):
|
| 222 |
+
agent = low_agents[c_idx]
|
| 223 |
+
local_obs_cluster = obs_clusters[c_idx]
|
| 224 |
+
global_obs_cluster = batch_global_obs[c_idx]
|
| 225 |
+
actions, logps = agent.select_action(local_obs_cluster, global_obs_cluster)
|
| 226 |
+
low_level_actions_list.append(actions)
|
| 227 |
+
low_level_logps_list.append(logps)
|
| 228 |
+
|
| 229 |
+
low_level_actions = np.stack(low_level_actions_list)
|
| 230 |
+
low_level_logps = np.stack(low_level_logps_list)
|
| 231 |
+
|
| 232 |
+
# Action Selection & Transfers (High-Level, Phase 2 only)
|
| 233 |
+
if is_phase_1:
|
| 234 |
+
exports, imports = None, None
|
| 235 |
+
else:
|
| 236 |
+
inter_cluster_obs_local_list = [coordinator.get_cluster_state(se, step_count) for se in cluster_env.cluster_envs]
|
| 237 |
+
inter_cluster_obs_local = np.array(inter_cluster_obs_local_list)
|
| 238 |
+
|
| 239 |
+
# Create the global state for the high-level agent
|
| 240 |
+
inter_cluster_obs_global = inter_cluster_obs_local.flatten()
|
| 241 |
+
|
| 242 |
+
# Call select_action with local and global states
|
| 243 |
+
high_level_action, high_level_logp = inter_agent.select_action(
|
| 244 |
+
inter_cluster_obs_local,
|
| 245 |
+
inter_cluster_obs_global
|
| 246 |
+
)
|
| 247 |
+
|
| 248 |
+
current_reports = {i: {'export_capacity': cluster_env.get_export_capacity(i), 'import_capacity': cluster_env.get_import_capacity(i)} for i in range(n_clusters)}
|
| 249 |
+
exports, imports = coordinator.build_transfers(high_level_action, current_reports)
|
| 250 |
+
|
| 251 |
+
# Environment Step
|
| 252 |
+
next_obs_clusters, rewards, done_all, step_info = cluster_env.step(
|
| 253 |
+
low_level_actions, exports=exports, imports=imports
|
| 254 |
+
)
|
| 255 |
+
cluster_infos = step_info.get("cluster_infos")
|
| 256 |
+
day_logs.append({
|
| 257 |
+
"costs": cluster_infos["costs"],
|
| 258 |
+
"grid_import_no_p2p": cluster_infos["grid_import_no_p2p"],
|
| 259 |
+
"charge_amount": cluster_infos.get("charge_amount"),
|
| 260 |
+
"discharge_amount": cluster_infos.get("discharge_amount")
|
| 261 |
+
})
|
| 262 |
+
|
| 263 |
+
# Reward Calculation and Data Storage
|
| 264 |
+
per_agent_rewards = np.stack(cluster_infos['agent_rewards'])
|
| 265 |
+
rewards_for_buffer = per_agent_rewards
|
| 266 |
+
|
| 267 |
+
if not is_phase_1:
|
| 268 |
+
transfers_for_logging = (exports, imports)
|
| 269 |
+
high_level_rewards_per_cluster = coordinator.compute_inter_cluster_reward(
|
| 270 |
+
all_cluster_infos=cluster_infos,
|
| 271 |
+
actual_transfers=transfers_for_logging,
|
| 272 |
+
step_count=step_count
|
| 273 |
+
)
|
| 274 |
+
ep_total_inter_cluster_reward += np.sum(high_level_rewards_per_cluster)
|
| 275 |
+
|
| 276 |
+
# Get next state for high-level agent's buffer
|
| 277 |
+
next_inter_cluster_obs_local_list = [coordinator.get_cluster_state(se, step_count + 1) for se in cluster_env.cluster_envs]
|
| 278 |
+
next_inter_cluster_obs_local = np.array(next_inter_cluster_obs_local_list)
|
| 279 |
+
|
| 280 |
+
# Create the next global state
|
| 281 |
+
next_inter_cluster_obs_global = next_inter_cluster_obs_local.flatten()
|
| 282 |
+
|
| 283 |
+
# Store the transition in the high-level MAPPO agent's buffer
|
| 284 |
+
inter_agent.store(
|
| 285 |
+
inter_cluster_obs_local, # s_local
|
| 286 |
+
inter_cluster_obs_global, # s_global
|
| 287 |
+
high_level_action, # action
|
| 288 |
+
high_level_logp, # log_prob
|
| 289 |
+
high_level_rewards_per_cluster,# reward
|
| 290 |
+
[done_all] * n_clusters, # done
|
| 291 |
+
next_inter_cluster_obs_global # s'_global
|
| 292 |
+
)
|
| 293 |
+
|
| 294 |
+
bonus_per_agent = np.zeros_like(per_agent_rewards)
|
| 295 |
+
for c_idx in range(n_clusters):
|
| 296 |
+
num_agents_in_cluster = per_agent_rewards.shape[1]
|
| 297 |
+
if num_agents_in_cluster > 0:
|
| 298 |
+
bonus = high_level_rewards_per_cluster[c_idx] / num_agents_in_cluster
|
| 299 |
+
bonus_per_agent[c_idx, :] = bonus
|
| 300 |
+
rewards_for_buffer = per_agent_rewards + bonus_per_agent
|
| 301 |
+
|
| 302 |
+
# Data Storage (Low-Level)
|
| 303 |
+
dones_list = step_info.get("cluster_dones")
|
| 304 |
+
for idx in range(n_clusters):
|
| 305 |
+
low_agents[idx].store(
|
| 306 |
+
obs_clusters[idx],
|
| 307 |
+
batch_global_obs[idx],
|
| 308 |
+
low_level_actions[idx],
|
| 309 |
+
low_level_logps[idx],
|
| 310 |
+
rewards_for_buffer[idx],
|
| 311 |
+
dones_list[idx],
|
| 312 |
+
next_obs_clusters[idx].reshape(-1)
|
| 313 |
+
)
|
| 314 |
+
|
| 315 |
+
cluster_rewards += per_agent_rewards
|
| 316 |
+
total_cost += np.sum(cluster_infos['costs'])
|
| 317 |
+
total_grid_import += np.sum(cluster_infos['grid_import_with_p2p'])
|
| 318 |
+
obs_clusters = next_obs_clusters
|
| 319 |
+
|
| 320 |
+
# Agent Updates (End of Episode)
|
| 321 |
+
if is_phase_1:
|
| 322 |
+
for agent in low_agents:
|
| 323 |
+
agent.update()
|
| 324 |
+
else:
|
| 325 |
+
CYCLE_LENGTH = FREEZE_HIGH_FOR_EPISODES + FREEZE_LOW_FOR_EPISODES
|
| 326 |
+
phase2_episode_num = ep - JOINT_TRAINING_START_EPISODE
|
| 327 |
+
position_in_cycle = phase2_episode_num % CYCLE_LENGTH
|
| 328 |
+
|
| 329 |
+
if position_in_cycle < FREEZE_HIGH_FOR_EPISODES:
|
| 330 |
+
print(f"Updating ALL LOW-LEVEL agents (High-level is frozen).")
|
| 331 |
+
for agent in low_agents:
|
| 332 |
+
agent.update()
|
| 333 |
+
else:
|
| 334 |
+
print(f"Updating HIGH-LEVEL agent (Low-level is frozen).")
|
| 335 |
+
inter_agent.update()
|
| 336 |
+
|
| 337 |
+
# Unified End-of-Episode Logging
|
| 338 |
+
duration = time.time() - start_time
|
| 339 |
+
num_low_level_agents = n_clusters * n_agents_per_cluster
|
| 340 |
+
get_price_fn = cluster_env.cluster_envs[0].get_grid_price
|
| 341 |
+
|
| 342 |
+
# Calculate Costs & Cost Reduction
|
| 343 |
+
# Use the recursive helper function to safely sum the broken data
|
| 344 |
+
# This is guaranteed to produce a single number for each step
|
| 345 |
+
baseline_costs_per_step = [
|
| 346 |
+
recursive_sum(entry["grid_import_no_p2p"]) * get_price_fn(i)
|
| 347 |
+
for i, entry in enumerate(day_logs)
|
| 348 |
+
]
|
| 349 |
+
total_baseline_cost = sum(baseline_costs_per_step)
|
| 350 |
+
|
| 351 |
+
# Apply the same robust method to the actual costs
|
| 352 |
+
actual_costs_per_step = [recursive_sum(entry["costs"]) for entry in day_logs]
|
| 353 |
+
total_actual_cost = sum(actual_costs_per_step)
|
| 354 |
+
|
| 355 |
+
cost_reduction_pct = (1 - (total_actual_cost / total_baseline_cost)) * 100 if total_baseline_cost > 0 else 0.0
|
| 356 |
+
|
| 357 |
+
# Calculate All Reward Metrics
|
| 358 |
+
# Intra-Cluster (Low-Level) Rewards
|
| 359 |
+
total_reward_intra = cluster_rewards.sum()
|
| 360 |
+
mean_reward_intra = total_reward_intra / num_low_level_agents if num_low_level_agents > 0 else 0.0
|
| 361 |
+
|
| 362 |
+
# Inter-Cluster (High-Level) Rewards
|
| 363 |
+
total_reward_inter = ep_total_inter_cluster_reward
|
| 364 |
+
mean_reward_inter = total_reward_inter / step_count if step_count > 0 else 0.0
|
| 365 |
+
|
| 366 |
+
# Total System Rewards
|
| 367 |
+
total_reward_system = total_reward_intra + total_reward_inter
|
| 368 |
+
mean_reward_system = total_reward_system / num_low_level_agents if num_low_level_agents > 0 else 0.0
|
| 369 |
+
|
| 370 |
+
# Populate Logs for Plotting (to keep generate_plots working)
|
| 371 |
+
intra_log.setdefault('total', []).append(total_reward_intra)
|
| 372 |
+
intra_log.setdefault('mean', []).append(mean_reward_intra)
|
| 373 |
+
inter_log.setdefault('total', []).append(total_reward_inter)
|
| 374 |
+
inter_log.setdefault('mean', []).append(mean_reward_inter)
|
| 375 |
+
total_log.setdefault('total', []).append(total_reward_system)
|
| 376 |
+
total_log.setdefault('mean', []).append(mean_reward_system)
|
| 377 |
+
cost_log.setdefault('total_cost', []).append(total_actual_cost)
|
| 378 |
+
cost_log.setdefault('cost_without_p2p', []).append(total_baseline_cost)
|
| 379 |
+
|
| 380 |
+
# Populate the Main Log for the Final CSV File
|
| 381 |
+
episode_log_data.append({
|
| 382 |
+
"Episode": ep,
|
| 383 |
+
"Mean_Reward_System": mean_reward_system,
|
| 384 |
+
"Mean_Reward_Intra": mean_reward_intra,
|
| 385 |
+
"Mean_Reward_Inter": mean_reward_inter,
|
| 386 |
+
"Total_Reward_System": total_reward_system,
|
| 387 |
+
"Total_Reward_Intra": total_reward_intra,
|
| 388 |
+
"Total_Reward_Inter": total_reward_inter,
|
| 389 |
+
"Cost_Reduction_Pct": cost_reduction_pct,
|
| 390 |
+
"Episode_Duration": duration,
|
| 391 |
+
})
|
| 392 |
+
|
| 393 |
+
# Print Final Episode Summary
|
| 394 |
+
print(f"Ep {ep}/{NUM_EPISODES} | "
|
| 395 |
+
f"Mean System R: {mean_reward_system:.3f} | "
|
| 396 |
+
f"Cost Red: {cost_reduction_pct:.1f}% | "
|
| 397 |
+
f"Time: {duration:.2f}s")
|
| 398 |
+
|
| 399 |
+
if ep % CHECKPOINT_INTERVAL == 0 or ep == NUM_EPISODES:
|
| 400 |
+
for c_idx, agent in enumerate(low_agents):
|
| 401 |
+
agent.save(os.path.join(models_dir, f"low_cluster{c_idx}_ep{ep}.pth"))
|
| 402 |
+
inter_agent.save(os.path.join(models_dir, f"inter_ep{ep}.pth"))
|
| 403 |
+
print(f"Saved checkpoint at episode {ep}")
|
| 404 |
+
|
| 405 |
+
print("Training completed! Aggregating final logs...")
|
| 406 |
+
|
| 407 |
+
# Capture the metrics for the very last episode
|
| 408 |
+
final_cluster_metrics = cluster_env.call('get_episode_metrics')
|
| 409 |
+
final_system_metrics = {
|
| 410 |
+
"grid_reduction_entire_day": sum(m["grid_reduction_entire_day"] for m in final_cluster_metrics),
|
| 411 |
+
"grid_reduction_peak_hours": sum(m["grid_reduction_peak_hours"] for m in final_cluster_metrics),
|
| 412 |
+
"total_cost_savings": sum(m["total_cost_savings"] for m in final_cluster_metrics),
|
| 413 |
+
"battery_degradation_cost_total": sum(m["battery_degradation_cost_total"] for m in final_cluster_metrics),
|
| 414 |
+
"fairness_on_cost_savings": np.mean([m["fairness_on_cost_savings"] for m in final_cluster_metrics]),
|
| 415 |
+
"Episode": NUM_EPISODES
|
| 416 |
+
}
|
| 417 |
+
performance_metrics_log.append(final_system_metrics)
|
| 418 |
+
|
| 419 |
+
# Create, Merge, and Save Final DataFrame
|
| 420 |
+
df_rewards_log = pd.DataFrame(episode_log_data)
|
| 421 |
+
df_perf_log = pd.DataFrame(performance_metrics_log)
|
| 422 |
+
df_final_log = pd.merge(df_rewards_log, df_perf_log, on="Episode")
|
| 423 |
+
|
| 424 |
+
log_csv_path = os.path.join(logs_dir, "training_performance_log.csv")
|
| 425 |
+
|
| 426 |
+
# Add total training time to the dataframe before saving
|
| 427 |
+
overall_end_time = time.time()
|
| 428 |
+
total_duration_seconds = overall_end_time - overall_start_time
|
| 429 |
+
total_time_row = pd.DataFrame([{"Episode": "Total_Training_Time", "Episode_Duration": total_duration_seconds}])
|
| 430 |
+
df_to_save = pd.concat([df_final_log, total_time_row], ignore_index=True)
|
| 431 |
+
|
| 432 |
+
# Reorder and select columns for the final CSV
|
| 433 |
+
columns_to_save = [
|
| 434 |
+
"Episode",
|
| 435 |
+
"Mean_Reward_System",
|
| 436 |
+
"Mean_Reward_Intra",
|
| 437 |
+
"Mean_Reward_Inter",
|
| 438 |
+
"Total_Reward_System",
|
| 439 |
+
"Total_Reward_Intra",
|
| 440 |
+
"Total_Reward_Inter",
|
| 441 |
+
"Cost_Reduction_Pct",
|
| 442 |
+
"battery_degradation_cost_total",
|
| 443 |
+
"Episode_Duration",
|
| 444 |
+
"total_cost_savings",
|
| 445 |
+
"grid_reduction_entire_day",
|
| 446 |
+
"fairness_on_cost_savings"
|
| 447 |
+
]
|
| 448 |
+
df_to_save = df_to_save[[col for col in columns_to_save if col in df_to_save.columns]]
|
| 449 |
+
df_to_save.to_csv(log_csv_path, index=False)
|
| 450 |
+
print(f"Saved comprehensive training performance log to: {log_csv_path}")
|
| 451 |
+
|
| 452 |
+
generate_plots(
|
| 453 |
+
plots_dir=plots_dir,
|
| 454 |
+
num_episodes=NUM_EPISODES,
|
| 455 |
+
intra_log=intra_log,
|
| 456 |
+
inter_log=inter_log,
|
| 457 |
+
total_log=total_log,
|
| 458 |
+
cost_log=cost_log,
|
| 459 |
+
df_final_log=df_final_log
|
| 460 |
+
)
|
| 461 |
+
|
| 462 |
+
overall_end_time = time.time()
|
| 463 |
+
total_duration_seconds = overall_end_time - overall_start_time
|
| 464 |
+
# Format into hours, minutes, seconds
|
| 465 |
+
total_duration_formatted = str(timedelta(seconds=int(total_duration_seconds)))
|
| 466 |
+
|
| 467 |
+
print("\n" + "="*50)
|
| 468 |
+
print(f"Total Training Time: {total_duration_formatted} (HH:MM:SS)")
|
| 469 |
+
print("="*50)
|
| 470 |
+
|
| 471 |
+
|
| 472 |
+
def generate_plots(
|
| 473 |
+
plots_dir: str,
|
| 474 |
+
num_episodes: int,
|
| 475 |
+
intra_log: dict,
|
| 476 |
+
inter_log: dict,
|
| 477 |
+
total_log: dict,
|
| 478 |
+
cost_log: list,
|
| 479 |
+
df_final_log: pd.DataFrame
|
| 480 |
+
):
|
| 481 |
+
"""
|
| 482 |
+
Generates and saves all final plots after training is complete.
|
| 483 |
+
"""
|
| 484 |
+
print("Training completed! Generating plots…")
|
| 485 |
+
|
| 486 |
+
# Helper for moving average
|
| 487 |
+
def moving_avg(series, window):
|
| 488 |
+
return pd.Series(series).rolling(window=window, center=True, min_periods=1).mean().to_numpy()
|
| 489 |
+
|
| 490 |
+
ma_window = 120
|
| 491 |
+
episodes = np.arange(1, num_episodes + 1)
|
| 492 |
+
|
| 493 |
+
# Plot 1: Intra-cluster (Low-Level) Rewards
|
| 494 |
+
fig, ax = plt.subplots(figsize=(12, 7))
|
| 495 |
+
ax.plot(episodes, moving_avg(intra_log['total'], ma_window), label=f'Total Reward (MA {ma_window})', linewidth=2)
|
| 496 |
+
ax.set_xlabel("Episode")
|
| 497 |
+
ax.set_ylabel("Total Intra-Cluster Reward", color='tab:blue')
|
| 498 |
+
ax.tick_params(axis='y', labelcolor='tab:blue')
|
| 499 |
+
ax.grid(True)
|
| 500 |
+
|
| 501 |
+
ax2 = ax.twinx()
|
| 502 |
+
ax2.plot(episodes, moving_avg(intra_log['mean'], ma_window), label=f'Mean Reward (MA {ma_window})', linewidth=2, linestyle='--', color='tab:cyan')
|
| 503 |
+
ax2.set_ylabel("Mean Intra-Cluster Reward", color='tab:cyan')
|
| 504 |
+
ax2.tick_params(axis='y', labelcolor='tab:cyan')
|
| 505 |
+
|
| 506 |
+
fig.suptitle("Intra-Cluster (Low-Level Agent) Rewards")
|
| 507 |
+
fig.legend(loc="upper left", bbox_to_anchor=(0.1, 0.9))
|
| 508 |
+
plt.savefig(os.path.join(plots_dir, "1_intra_cluster_rewards.png"), dpi=200)
|
| 509 |
+
plt.close()
|
| 510 |
+
|
| 511 |
+
# Plot 2: Inter-cluster (High-Level) Rewards
|
| 512 |
+
fig, ax = plt.subplots(figsize=(12, 7))
|
| 513 |
+
ax.plot(episodes, moving_avg(inter_log['total'], ma_window), label=f'Total Reward (MA {ma_window})', linewidth=2, color='tab:green')
|
| 514 |
+
ax.set_xlabel("Episode")
|
| 515 |
+
ax.set_ylabel("Total Inter-Cluster Reward", color='tab:green')
|
| 516 |
+
ax.tick_params(axis='y', labelcolor='tab:green')
|
| 517 |
+
ax.grid(True)
|
| 518 |
+
|
| 519 |
+
ax2 = ax.twinx()
|
| 520 |
+
ax2.plot(episodes, moving_avg(inter_log['mean'], ma_window), label=f'Mean Reward (MA {ma_window})', linewidth=2, linestyle='--', color='mediumseagreen')
|
| 521 |
+
ax2.set_ylabel("Mean Inter-Cluster Reward", color='mediumseagreen')
|
| 522 |
+
ax2.tick_params(axis='y', labelcolor='mediumseagreen')
|
| 523 |
+
|
| 524 |
+
fig.suptitle("Inter-Cluster (High-Level Agent) Rewards")
|
| 525 |
+
fig.legend(loc="upper left", bbox_to_anchor=(0.1, 0.9))
|
| 526 |
+
plt.savefig(os.path.join(plots_dir, "2_inter_cluster_rewards.png"), dpi=200)
|
| 527 |
+
plt.close()
|
| 528 |
+
|
| 529 |
+
# Plot 3: Total System Rewards
|
| 530 |
+
fig, ax = plt.subplots(figsize=(12, 7))
|
| 531 |
+
ax.plot(episodes, moving_avg(total_log['total'], ma_window), label=f'Total System Reward (MA {ma_window})', linewidth=2, color='tab:red')
|
| 532 |
+
ax.set_xlabel("Episode")
|
| 533 |
+
ax.set_ylabel("Total System Reward", color='tab:red')
|
| 534 |
+
ax.tick_params(axis='y', labelcolor='tab:red')
|
| 535 |
+
ax.grid(True)
|
| 536 |
+
|
| 537 |
+
ax2 = ax.twinx()
|
| 538 |
+
ax2.plot(episodes, moving_avg(total_log['mean'], ma_window), label=f'Mean System Reward (MA {ma_window})', linewidth=2, linestyle='--', color='salmon')
|
| 539 |
+
ax2.set_ylabel("Mean System Reward per Agent", color='salmon')
|
| 540 |
+
ax2.tick_params(axis='y', labelcolor='salmon')
|
| 541 |
+
|
| 542 |
+
fig.suptitle("Total System Rewards (Intra + Inter)")
|
| 543 |
+
fig.legend(loc="upper left", bbox_to_anchor=(0.1, 0.9))
|
| 544 |
+
plt.savefig(os.path.join(plots_dir, "3_total_system_rewards.png"), dpi=200)
|
| 545 |
+
plt.close()
|
| 546 |
+
|
| 547 |
+
# Plot 4: Cost Reduction
|
| 548 |
+
cost_df = pd.DataFrame(cost_log)
|
| 549 |
+
cost_df['cost_reduction_pct'] = 100 * (1 - (cost_df['total_cost'] / cost_df['cost_without_p2p'])).clip(lower=-np.inf, upper=100)
|
| 550 |
+
plt.figure(figsize=(12, 7))
|
| 551 |
+
plt.plot(episodes, moving_avg(cost_df['cost_reduction_pct'], ma_window), label=f'Cost Reduction % (MA {ma_window})', color='purple', linewidth=2)
|
| 552 |
+
plt.xlabel("Episode")
|
| 553 |
+
plt.ylabel("Cost Reduction (%)")
|
| 554 |
+
plt.title("Total System-Wide Cost Reduction")
|
| 555 |
+
plt.legend()
|
| 556 |
+
plt.grid(True)
|
| 557 |
+
plt.savefig(os.path.join(plots_dir, "4_cost_reduction.png"), dpi=200)
|
| 558 |
+
plt.close()
|
| 559 |
+
|
| 560 |
+
df_plot = df_final_log[pd.to_numeric(df_final_log['Episode'], errors='coerce').notna()].copy()
|
| 561 |
+
df_plot['Episode'] = pd.to_numeric(df_plot['Episode'])
|
| 562 |
+
|
| 563 |
+
# Plot 5: Battery Degradation Cost
|
| 564 |
+
plt.figure(figsize=(12, 7))
|
| 565 |
+
plt.plot(df_plot["Episode"], moving_avg(df_plot["battery_degradation_cost_total"], ma_window),
|
| 566 |
+
label=f'Degradation Cost (MA {ma_window})', color='darkgreen', linewidth=2)
|
| 567 |
+
plt.xlabel("Episode")
|
| 568 |
+
plt.ylabel("Total Degradation Cost ($)")
|
| 569 |
+
plt.title("Total Battery Degradation Cost")
|
| 570 |
+
plt.legend()
|
| 571 |
+
plt.grid(True)
|
| 572 |
+
plt.savefig(os.path.join(plots_dir, "5_battery_degradation_cost.png"), dpi=200)
|
| 573 |
+
plt.close()
|
| 574 |
+
|
| 575 |
+
print(f"All plots have been saved to: {plots_dir}")
|
| 576 |
+
|
| 577 |
+
|
| 578 |
+
if __name__ == "__main__":
|
| 579 |
+
main()
|
Other_algorithms/HC_MAPPO/cluster.py
ADDED
|
@@ -0,0 +1,140 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import sys
|
| 3 |
+
import numpy as np
|
| 4 |
+
import torch
|
| 5 |
+
|
| 6 |
+
# Ensure project root is on the Python path
|
| 7 |
+
# Please ensure you follow proper directory structure for running this code
|
| 8 |
+
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
|
| 9 |
+
|
| 10 |
+
from Environment.solar_sys_environment import SolarSys
|
| 11 |
+
from Environment.cluster_env_wrapper import GlobalPriceVecEnvWrapper
|
| 12 |
+
from Environment.cluster_env_wrapper import make_vec_env
|
| 13 |
+
class InterClusterLedger:
|
| 14 |
+
"""
|
| 15 |
+
Tracks inter-cluster debts/transfers.
|
| 16 |
+
"""
|
| 17 |
+
def __init__(self):
|
| 18 |
+
self.balances = {}
|
| 19 |
+
|
| 20 |
+
def record_transfer(self, from_id: str, to_id: str, amount: float):
|
| 21 |
+
if from_id == to_id: return
|
| 22 |
+
self.balances.setdefault(from_id, {})
|
| 23 |
+
self.balances.setdefault(to_id, {})
|
| 24 |
+
self.balances[from_id][to_id] = self.balances[from_id].get(to_id, 0.0) - amount
|
| 25 |
+
self.balances[to_id][from_id] = self.balances[to_id].get(from_id, 0.0) + amount
|
| 26 |
+
|
| 27 |
+
def get_balance(self, a_id: str, b_id: str) -> float:
|
| 28 |
+
return self.balances.get(a_id, {}).get(b_id, 0.0)
|
| 29 |
+
|
| 30 |
+
def net_balances(self) -> dict:
|
| 31 |
+
return self.balances
|
| 32 |
+
|
| 33 |
+
|
| 34 |
+
class InterClusterCoordinator:
|
| 35 |
+
def __init__(
|
| 36 |
+
self,
|
| 37 |
+
cluster_env,
|
| 38 |
+
high_level_agent,
|
| 39 |
+
ledger,
|
| 40 |
+
max_transfer_kwh: float = 1000000.0,
|
| 41 |
+
w_cost_savings: float = 2.0,
|
| 42 |
+
w_grid_penalty: float = 0.3,
|
| 43 |
+
w_p2p_bonus: float = 0.3
|
| 44 |
+
):
|
| 45 |
+
self.cluster_env = cluster_env
|
| 46 |
+
self.agent = high_level_agent
|
| 47 |
+
self.ledger = ledger
|
| 48 |
+
self.max_transfer_kwh = max_transfer_kwh
|
| 49 |
+
self.w_cost_savings = w_cost_savings
|
| 50 |
+
self.w_grid_penalty = w_grid_penalty
|
| 51 |
+
self.w_p2p_bonus = w_p2p_bonus
|
| 52 |
+
|
| 53 |
+
def get_cluster_state(self, env, step_count: int) -> np.ndarray:
|
| 54 |
+
"""
|
| 55 |
+
array summarizing a single cluster's state by reading from its vectorized attributes.
|
| 56 |
+
"""
|
| 57 |
+
solar_env = env # This is one of the vectorized SolarSys envs
|
| 58 |
+
idx = min(step_count, solar_env.num_steps - 1)
|
| 59 |
+
agg_soc = np.sum(solar_env.battery_soc)
|
| 60 |
+
agg_max_capacity = np.sum(solar_env.battery_max_capacity)
|
| 61 |
+
agg_soc_fraction = agg_soc / agg_max_capacity if agg_max_capacity > 0 else 0.0
|
| 62 |
+
|
| 63 |
+
agg_demand = np.sum(solar_env.demands_day[idx])
|
| 64 |
+
agg_solar = np.sum(solar_env.solars_day[idx])
|
| 65 |
+
|
| 66 |
+
price = solar_env.get_grid_price(idx)
|
| 67 |
+
t_norm = idx / float(solar_env.steps_per_day)
|
| 68 |
+
|
| 69 |
+
return np.array([
|
| 70 |
+
agg_soc, agg_max_capacity, agg_soc_fraction,
|
| 71 |
+
agg_demand, agg_solar, price, t_norm
|
| 72 |
+
], dtype=np.float32)
|
| 73 |
+
|
| 74 |
+
def build_transfers(self, agent_action_vector: np.ndarray, reports: dict) -> tuple[np.ndarray, np.ndarray]:
|
| 75 |
+
"""
|
| 76 |
+
Acts as a centralized market maker based on agent actions and LIVE capacity reports.
|
| 77 |
+
"""
|
| 78 |
+
n = len(self.cluster_env.clusters)
|
| 79 |
+
raw_export_prefs = agent_action_vector[:, 0]
|
| 80 |
+
raw_import_prefs = agent_action_vector[:, 1]
|
| 81 |
+
|
| 82 |
+
export_prefs = torch.softmax(torch.tensor(raw_export_prefs), dim=-1).numpy()
|
| 83 |
+
import_prefs = torch.softmax(torch.tensor(raw_import_prefs), dim=-1).numpy()
|
| 84 |
+
|
| 85 |
+
total_available_for_export = 0.0
|
| 86 |
+
potential_exports = np.zeros(n)
|
| 87 |
+
for i in range(n):
|
| 88 |
+
export_capacity = reports[i]['export_capacity']
|
| 89 |
+
pref = float(export_prefs[i])
|
| 90 |
+
potential_exports[i] = min(pref * self.max_transfer_kwh, export_capacity)
|
| 91 |
+
total_available_for_export += potential_exports[i]
|
| 92 |
+
|
| 93 |
+
total_requested_for_import = 0.0
|
| 94 |
+
potential_imports = np.zeros(n)
|
| 95 |
+
for i in range(n):
|
| 96 |
+
import_capacity = reports[i]['import_capacity']
|
| 97 |
+
pref = float(import_prefs[i])
|
| 98 |
+
potential_imports[i] = min(pref * self.max_transfer_kwh, import_capacity)
|
| 99 |
+
total_requested_for_import += potential_imports[i]
|
| 100 |
+
|
| 101 |
+
total_matched_energy = min(total_available_for_export, total_requested_for_import)
|
| 102 |
+
actual_exports = np.zeros(n)
|
| 103 |
+
actual_imports = np.zeros(n)
|
| 104 |
+
|
| 105 |
+
if total_matched_energy > 1e-6:
|
| 106 |
+
if total_available_for_export > 0:
|
| 107 |
+
actual_exports = (potential_exports / total_available_for_export) * total_matched_energy
|
| 108 |
+
if total_requested_for_import > 0:
|
| 109 |
+
actual_imports = (potential_imports / total_requested_for_import) * total_matched_energy
|
| 110 |
+
|
| 111 |
+
return actual_exports, actual_imports
|
| 112 |
+
|
| 113 |
+
def compute_inter_cluster_reward(self, all_cluster_infos: dict, actual_transfers: tuple, step_count: int) -> np.ndarray:
|
| 114 |
+
"""
|
| 115 |
+
Computes an INDIVIDUAL reward for each cluster agent to solve
|
| 116 |
+
the credit assignment problem.
|
| 117 |
+
"""
|
| 118 |
+
actual_exports, actual_imports = actual_transfers
|
| 119 |
+
num_clusters = len(self.cluster_env.cluster_envs)
|
| 120 |
+
cluster_rewards = np.zeros(num_clusters, dtype=np.float32)
|
| 121 |
+
|
| 122 |
+
# Extract per-cluster cost and import data from the batched info dict
|
| 123 |
+
costs_per_cluster = [np.sum(c) for c in all_cluster_infos['costs']]
|
| 124 |
+
baseline_imports_per_cluster = [np.sum(imp) for imp in all_cluster_infos['grid_import_no_p2p']]
|
| 125 |
+
actual_imports_per_cluster = [np.sum(imp) for imp in all_cluster_infos['grid_import_with_p2p']]
|
| 126 |
+
|
| 127 |
+
# Get the single grid price for the current step
|
| 128 |
+
grid_price = self.cluster_env.cluster_envs[0].get_grid_price(step_count)
|
| 129 |
+
|
| 130 |
+
for i in range(num_clusters):
|
| 131 |
+
baseline_cost_this_cluster = baseline_imports_per_cluster[i] * grid_price
|
| 132 |
+
actual_cost_this_cluster = costs_per_cluster[i]
|
| 133 |
+
cost_saved = baseline_cost_this_cluster - actual_cost_this_cluster
|
| 134 |
+
r_savings = self.w_cost_savings * cost_saved
|
| 135 |
+
r_grid = self.w_grid_penalty * actual_imports_per_cluster[i]
|
| 136 |
+
p2p_volume_this_cluster = actual_exports[i] + actual_imports[i]
|
| 137 |
+
r_p2p = self.w_p2p_bonus * p2p_volume_this_cluster
|
| 138 |
+
cluster_rewards[i] = r_savings + r_p2p - r_grid
|
| 139 |
+
|
| 140 |
+
return cluster_rewards
|
Other_algorithms/HC_MAPPO/mappo/_init_.py
ADDED
|
File without changes
|
Other_algorithms/HC_MAPPO/mappo/trainer/__init__.py
ADDED
|
File without changes
|
Other_algorithms/HC_MAPPO/mappo/trainer/mappo.py
ADDED
|
@@ -0,0 +1,199 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import torch
|
| 2 |
+
import torch.nn as nn
|
| 3 |
+
import random
|
| 4 |
+
import numpy as np
|
| 5 |
+
from torch.distributions import Normal
|
| 6 |
+
|
| 7 |
+
if torch.cuda.is_available():
|
| 8 |
+
device = torch.device("cuda")
|
| 9 |
+
print("Using CUDA (NVIDIA GPU)")
|
| 10 |
+
else:
|
| 11 |
+
device = torch.device("cpu")
|
| 12 |
+
print("Using CPU")
|
| 13 |
+
|
| 14 |
+
def set_global_seed(seed: int):
|
| 15 |
+
random.seed(seed)
|
| 16 |
+
np.random.seed(seed)
|
| 17 |
+
torch.manual_seed(seed)
|
| 18 |
+
if torch.cuda.is_available():
|
| 19 |
+
torch.cuda.manual_seed_all(seed)
|
| 20 |
+
torch.backends.cudnn.deterministic = False
|
| 21 |
+
torch.backends.cudnn.benchmark = True
|
| 22 |
+
|
| 23 |
+
SEED = 42
|
| 24 |
+
set_global_seed(SEED)
|
| 25 |
+
|
| 26 |
+
class MLP(nn.Module):
|
| 27 |
+
def __init__(self, input_dim, hidden_dims, output_dim):
|
| 28 |
+
super().__init__()
|
| 29 |
+
layers = []
|
| 30 |
+
last_dim = input_dim
|
| 31 |
+
for h in hidden_dims:
|
| 32 |
+
layers += [nn.Linear(last_dim, h), nn.ReLU()]
|
| 33 |
+
last_dim = h
|
| 34 |
+
layers.append(nn.Linear(last_dim, output_dim))
|
| 35 |
+
self.net = nn.Sequential(*layers)
|
| 36 |
+
|
| 37 |
+
def forward(self, x):
|
| 38 |
+
return self.net(x)
|
| 39 |
+
|
| 40 |
+
class Actor(nn.Module):
|
| 41 |
+
def __init__(self, obs_dim, act_dim, hidden=(64,64)):
|
| 42 |
+
super().__init__()
|
| 43 |
+
self.net = MLP(obs_dim, hidden, act_dim)
|
| 44 |
+
self.log_std = nn.Parameter(torch.zeros(act_dim))
|
| 45 |
+
|
| 46 |
+
def forward(self, x):
|
| 47 |
+
mean = self.net(x)
|
| 48 |
+
std = torch.exp(self.log_std)
|
| 49 |
+
return mean, std
|
| 50 |
+
|
| 51 |
+
class Critic(nn.Module):
|
| 52 |
+
def __init__(self, state_dim, hidden=(128,128)):
|
| 53 |
+
super().__init__()
|
| 54 |
+
self.net = MLP(state_dim, hidden, 1)
|
| 55 |
+
|
| 56 |
+
def forward(self, x):
|
| 57 |
+
return self.net(x).squeeze(-1)
|
| 58 |
+
|
| 59 |
+
class MAPPO:
|
| 60 |
+
def __init__(
|
| 61 |
+
self,
|
| 62 |
+
n_agents,
|
| 63 |
+
local_dim,
|
| 64 |
+
global_dim,
|
| 65 |
+
act_dim,
|
| 66 |
+
lr=3e-4,
|
| 67 |
+
gamma=0.99,
|
| 68 |
+
lam=0.95,
|
| 69 |
+
clip_eps=0.2,
|
| 70 |
+
k_epochs=10,
|
| 71 |
+
batch_size=1024,
|
| 72 |
+
episode_len=96
|
| 73 |
+
):
|
| 74 |
+
self.n_agents = n_agents
|
| 75 |
+
self.local_dim = local_dim
|
| 76 |
+
self.global_dim = global_dim
|
| 77 |
+
self.act_dim = act_dim
|
| 78 |
+
self.gamma = gamma
|
| 79 |
+
self.lam = lam
|
| 80 |
+
self.clip_eps = clip_eps
|
| 81 |
+
self.k_epochs = k_epochs
|
| 82 |
+
self.batch_size = batch_size
|
| 83 |
+
self.episode_len = episode_len
|
| 84 |
+
|
| 85 |
+
self.actor = Actor(local_dim, act_dim).to(device)
|
| 86 |
+
self.critic = Critic(global_dim).to(device)
|
| 87 |
+
|
| 88 |
+
self.opt_a = torch.optim.Adam(self.actor.parameters(), lr=lr)
|
| 89 |
+
self.opt_c = torch.optim.Adam(self.critic.parameters(), lr=lr)
|
| 90 |
+
|
| 91 |
+
print("MAPPO CUDA AMP is disabled for stability.")
|
| 92 |
+
|
| 93 |
+
self.init_buffer()
|
| 94 |
+
|
| 95 |
+
def init_buffer(self):
|
| 96 |
+
self.ls_buf = np.zeros((self.episode_len, self.n_agents, self.local_dim), dtype=np.float16)
|
| 97 |
+
self.gs_buf = np.zeros((self.episode_len, self.global_dim), dtype=np.float16)
|
| 98 |
+
self.ac_buf = np.zeros((self.episode_len, self.n_agents, self.act_dim), dtype=np.float16)
|
| 99 |
+
self.lp_buf = np.zeros((self.episode_len, self.n_agents), dtype=np.float16)
|
| 100 |
+
self.rw_buf = np.zeros((self.episode_len, self.n_agents), dtype=np.float16)
|
| 101 |
+
self.done_buf = np.zeros((self.episode_len, self.n_agents), dtype=np.float16)
|
| 102 |
+
self.next_gs_buf = np.zeros((self.episode_len, self.global_dim), dtype=np.float16)
|
| 103 |
+
self.step_idx = 0
|
| 104 |
+
|
| 105 |
+
@torch.no_grad()
|
| 106 |
+
def select_action(self, local_obs, global_obs):
|
| 107 |
+
l = torch.from_numpy(local_obs).float().to(device)
|
| 108 |
+
mean, std = self.actor(l)
|
| 109 |
+
dist = Normal(mean, std)
|
| 110 |
+
a = dist.sample()
|
| 111 |
+
return a.cpu().numpy(), dist.log_prob(a).sum(-1).cpu().numpy()
|
| 112 |
+
|
| 113 |
+
def store(self, local_obs, global_obs, action, logp, reward, done, next_global_obs):
|
| 114 |
+
if self.step_idx < self.episode_len:
|
| 115 |
+
self.ls_buf[self.step_idx] = local_obs
|
| 116 |
+
self.gs_buf[self.step_idx] = global_obs
|
| 117 |
+
self.ac_buf[self.step_idx] = action
|
| 118 |
+
self.lp_buf[self.step_idx] = logp
|
| 119 |
+
self.rw_buf[self.step_idx] = reward
|
| 120 |
+
self.done_buf[self.step_idx] = done
|
| 121 |
+
self.next_gs_buf[self.step_idx] = next_global_obs
|
| 122 |
+
self.step_idx += 1
|
| 123 |
+
|
| 124 |
+
def compute_gae(self, T, vals):
|
| 125 |
+
N = self.n_agents
|
| 126 |
+
vals_agent = vals.unsqueeze(1).expand(-1, N).cpu().numpy()
|
| 127 |
+
next_vals_agent = np.zeros_like(vals_agent)
|
| 128 |
+
next_vals_agent[:-1] = vals_agent[1:]
|
| 129 |
+
if not self.done_buf[T-1].all():
|
| 130 |
+
with torch.no_grad():
|
| 131 |
+
v_last = self.critic(
|
| 132 |
+
torch.from_numpy(self.next_gs_buf[T-1]).float().to(device)
|
| 133 |
+
).cpu().item()
|
| 134 |
+
next_vals_agent[T-1, :] = v_last
|
| 135 |
+
masks = 1.0 - self.done_buf[:T]
|
| 136 |
+
rewards = self.rw_buf[:T]
|
| 137 |
+
adv = rewards + self.gamma * next_vals_agent * masks - vals_agent
|
| 138 |
+
ret = adv + vals_agent
|
| 139 |
+
adv_flat = torch.from_numpy(adv.flatten()).to(device)
|
| 140 |
+
ret_flat = torch.from_numpy(ret.flatten()).to(device)
|
| 141 |
+
return adv_flat, ret_flat
|
| 142 |
+
|
| 143 |
+
def update(self):
|
| 144 |
+
T = self.step_idx
|
| 145 |
+
if T == 0: return
|
| 146 |
+
|
| 147 |
+
gs_tensor = torch.from_numpy(self.gs_buf[:T]).float().to(device)
|
| 148 |
+
ls_tensor = torch.from_numpy(self.ls_buf[:T]).float().to(device).view(T * self.n_agents, -1)
|
| 149 |
+
ac_tensor = torch.from_numpy(self.ac_buf[:T]).float().to(device).view(T * self.n_agents, -1)
|
| 150 |
+
lp_tensor = torch.from_numpy(self.lp_buf[:T]).float().to(device).view(-1)
|
| 151 |
+
|
| 152 |
+
with torch.no_grad():
|
| 153 |
+
vals = self.critic(gs_tensor)
|
| 154 |
+
|
| 155 |
+
adv_flat, ret_flat = self.compute_gae(T, vals)
|
| 156 |
+
adv_flat = (adv_flat - adv_flat.mean()) / (adv_flat.std() + 1e-8)
|
| 157 |
+
|
| 158 |
+
gs_for_batch = gs_tensor.unsqueeze(1).expand(-1, self.n_agents, -1).reshape(T * self.n_agents, self.global_dim)
|
| 159 |
+
|
| 160 |
+
dataset = torch.utils.data.TensorDataset(ls_tensor, gs_for_batch, ac_tensor, lp_tensor, adv_flat, ret_flat)
|
| 161 |
+
gen = torch.Generator()
|
| 162 |
+
gen.manual_seed(SEED)
|
| 163 |
+
loader = torch.utils.data.DataLoader(dataset, batch_size=self.batch_size, shuffle=True, generator=gen)
|
| 164 |
+
|
| 165 |
+
for _ in range(self.k_epochs):
|
| 166 |
+
for b_ls, b_gs, b_ac, b_lp, b_adv, b_ret in loader:
|
| 167 |
+
mean, std = self.actor(b_ls)
|
| 168 |
+
dist = Normal(mean, std)
|
| 169 |
+
|
| 170 |
+
entropy = dist.entropy().mean()
|
| 171 |
+
|
| 172 |
+
lp_new = dist.log_prob(b_ac).sum(-1)
|
| 173 |
+
ratio = torch.exp(lp_new - b_lp)
|
| 174 |
+
surr1 = ratio * b_adv
|
| 175 |
+
surr2 = torch.clamp(ratio, 1 - self.clip_eps, 1 + self.clip_eps) * b_adv
|
| 176 |
+
|
| 177 |
+
actor_loss = -torch.min(surr1, surr2).mean() - 0.01 * entropy
|
| 178 |
+
|
| 179 |
+
self.opt_a.zero_grad()
|
| 180 |
+
actor_loss.backward()
|
| 181 |
+
self.opt_a.step()
|
| 182 |
+
|
| 183 |
+
val_pred = self.critic(b_gs)
|
| 184 |
+
critic_loss = nn.MSELoss()(val_pred, b_ret)
|
| 185 |
+
|
| 186 |
+
self.opt_c.zero_grad()
|
| 187 |
+
critic_loss.backward()
|
| 188 |
+
self.opt_c.step()
|
| 189 |
+
|
| 190 |
+
self.step_idx = 0
|
| 191 |
+
|
| 192 |
+
def save(self, path):
|
| 193 |
+
torch.save({'actor': self.actor.state_dict(),
|
| 194 |
+
'critic': self.critic.state_dict()}, path)
|
| 195 |
+
|
| 196 |
+
def load(self, path):
|
| 197 |
+
data = torch.load(path, map_location=device)
|
| 198 |
+
self.actor.load_state_dict(data['actor'])
|
| 199 |
+
self.critic.load_state_dict(data['critic'])
|