SolarSys2026 commited on
Commit
4a171f0
·
verified ·
1 Parent(s): ade9746

Upload 30 files

Browse files
Files changed (30) hide show
  1. Other_algorithms/.DS_Store +0 -0
  2. Other_algorithms/Flat_System/PG/_init_.py +0 -0
  3. Other_algorithms/Flat_System/PG/pg_evaluation.py +464 -0
  4. Other_algorithms/Flat_System/PG/pg_train.py +373 -0
  5. Other_algorithms/Flat_System/PG/trainer/__init__.py +0 -0
  6. Other_algorithms/Flat_System/PG/trainer/pg.py +96 -0
  7. Other_algorithms/Flat_System/maddpg/__init__.py +0 -0
  8. Other_algorithms/Flat_System/maddpg/maddpg_evaluation.py +428 -0
  9. Other_algorithms/Flat_System/maddpg/maddpg_train.py +382 -0
  10. Other_algorithms/Flat_System/maddpg/trainer/__init__.py +0 -0
  11. Other_algorithms/Flat_System/maddpg/trainer/maddpg.py +196 -0
  12. Other_algorithms/Flat_System/mappo/_init_.py +0 -0
  13. Other_algorithms/Flat_System/mappo/mappo_evaluation.py +430 -0
  14. Other_algorithms/Flat_System/mappo/mappo_train.py +439 -0
  15. Other_algorithms/Flat_System/mappo/trainer/__init__.py +0 -0
  16. Other_algorithms/Flat_System/mappo/trainer/mappo.py +243 -0
  17. Other_algorithms/Flat_System/meanfield/_init_.py +0 -0
  18. Other_algorithms/Flat_System/meanfield/meanfield_evaluation.py +429 -0
  19. Other_algorithms/Flat_System/meanfield/meanfield_train.py +386 -0
  20. Other_algorithms/Flat_System/meanfield/trainer/__init__.py +0 -0
  21. Other_algorithms/Flat_System/meanfield/trainer/mfac.py +219 -0
  22. Other_algorithms/Flat_System/solar_sys_environment.py +523 -0
  23. Other_algorithms/HC_MAPPO/Environment/cluster_env_wrapper.py +164 -0
  24. Other_algorithms/HC_MAPPO/Environment/solar_sys_environment.py +673 -0
  25. Other_algorithms/HC_MAPPO/HC_MAPPO_evaluation.py +496 -0
  26. Other_algorithms/HC_MAPPO/HC_MAPPO_train.py +579 -0
  27. Other_algorithms/HC_MAPPO/cluster.py +140 -0
  28. Other_algorithms/HC_MAPPO/mappo/_init_.py +0 -0
  29. Other_algorithms/HC_MAPPO/mappo/trainer/__init__.py +0 -0
  30. Other_algorithms/HC_MAPPO/mappo/trainer/mappo.py +199 -0
Other_algorithms/.DS_Store ADDED
Binary file (6.15 kB). View file
 
Other_algorithms/Flat_System/PG/_init_.py ADDED
File without changes
Other_algorithms/Flat_System/PG/pg_evaluation.py ADDED
@@ -0,0 +1,464 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # pg_evaluate.py
2
+ import os
3
+ import sys
4
+ import time
5
+ import re
6
+ import numpy as np
7
+ import pandas as pd
8
+ import matplotlib.pyplot as plt
9
+ import torch
10
+ from datetime import datetime
11
+
12
+ sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
13
+
14
+ from solar_sys_environment import SolarSys
15
+ from PG.trainer.pg import PGAgent
16
+
17
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
18
+
19
+ def compute_jains_fairness(values: np.ndarray) -> float:
20
+ if len(values) == 0:
21
+ return 0.0
22
+ if np.all(values == 0):
23
+ return 1.0
24
+ num = (values.sum())**2
25
+ den = len(values) * (values**2).sum()
26
+ return num / den
27
+
28
+ def main():
29
+ # User parameters
30
+ MODEL_PATH = "/path/to/project/pg_pennsylvania_10agents_10000eps/logs"
31
+ DATA_PATH = "/path/to/project/testing/10houses_30days_TEST.csv"
32
+ DAYS_TO_EVALUATE = 30
33
+
34
+ model_path = MODEL_PATH
35
+ data_path = DATA_PATH
36
+ days_to_evaluate = DAYS_TO_EVALUATE
37
+ SOLAR_THRESHOLD = 0.5
38
+
39
+ state_match = re.search(r"pg_(oklahoma|colorado|pennsylvania)_", model_path)
40
+ if not state_match:
41
+ raise ValueError(
42
+ "Could not automatically detect the state (oklahoma, colorado, or pennsylvania) "
43
+ "from the model path. Please ensure your model's parent folder is named correctly, "
44
+ "e.g., 'pg_oklahoma_...'"
45
+ )
46
+ detected_state = state_match.group(1)
47
+ print(f"--- Detected state: {detected_state.upper()} ---")
48
+
49
+ # Env setup
50
+ env = SolarSys(
51
+ data_path=data_path,
52
+ state=detected_state,
53
+ time_freq="15T"
54
+ )
55
+ eval_steps = env.num_steps
56
+ house_ids = env.house_ids
57
+ num_agents = env.num_agents
58
+
59
+ # Generate a unique eval run folder
60
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
61
+ run_name = f"eval_pg_{num_agents}agents_{days_to_evaluate}days_{timestamp}"
62
+ output_folder = os.path.join("runs_with_battery", run_name)
63
+ logs_dir = os.path.join(output_folder, "logs")
64
+ plots_dir = os.path.join(output_folder, "plots")
65
+ for d in (logs_dir, plots_dir):
66
+ os.makedirs(d, exist_ok=True)
67
+ print(f"Saving evaluation outputs to: {output_folder}")
68
+
69
+ local_dim = env.observation_space.shape[1]
70
+ act_dim = env.action_space.shape[1]
71
+
72
+ # Initialize PG agents
73
+ pg_agents = []
74
+ for i in range(num_agents):
75
+ agent = PGAgent(
76
+ state_dim=local_dim,
77
+ action_dim=act_dim,
78
+ lr=2e-4,
79
+ gamma=0.95,
80
+ )
81
+
82
+ # Load individual agent model
83
+ agent_model_path = os.path.join(model_path, f"best_model_agent_{i}.pth")
84
+ if os.path.exists(agent_model_path):
85
+ agent.load(agent_model_path)
86
+ print(f"Loaded model for agent {i}")
87
+ else:
88
+ print(f"WARNING: Model file not found for agent {i}: {agent_model_path}")
89
+ # Alternative: try loading a single model for all agents
90
+ single_model_path = os.path.join(model_path, "best_model.pth")
91
+ if os.path.exists(single_model_path):
92
+ agent.load(single_model_path)
93
+ print(f"Loaded single model for agent {i}")
94
+
95
+ agent.model.to(device).eval()
96
+ pg_agents.append(agent)
97
+
98
+ # Prepare logs
99
+ all_logs = []
100
+ daily_summaries = []
101
+ step_timing_list = []
102
+
103
+ evaluation_start = time.time()
104
+
105
+ for day_idx in range(days_to_evaluate):
106
+ obs, _ = env.reset() # Using the new reset signature
107
+ done = False
108
+ step_count = 0
109
+ day_logs = []
110
+
111
+ while not done:
112
+ step_start_time = time.time()
113
+
114
+ # Select actions with PG
115
+ actions = []
116
+ with torch.no_grad():
117
+ for i in range(num_agents):
118
+ # Convert observation to tensor and move to device
119
+ state = torch.FloatTensor(obs[i]).unsqueeze(0).to(device)
120
+
121
+ # Get action from actor network
122
+ mean, log_std, _ = pg_agents[i].model(state)
123
+
124
+ # For evaluation, use mean action (deterministic)
125
+ action = mean.squeeze(0).cpu().numpy()
126
+
127
+ # Clip to [0, 1] range
128
+ action = np.clip(action, 0.0, 1.0)
129
+ actions.append(action)
130
+
131
+ actions = np.array(actions, dtype=np.float32)
132
+
133
+ next_obs, rewards, done, info = env.step(actions)
134
+
135
+ # Consolidated Logging
136
+ step_end_time = time.time()
137
+ step_duration = step_end_time - step_start_time
138
+
139
+ # REMOVED: print(f"[Day {day_idx+1}, Step {step_count}] Step time: {step_duration:.6f} seconds")
140
+
141
+ step_timing_list.append({
142
+ "day": day_idx + 1,
143
+ "step": step_count,
144
+ "step_time_s": step_duration
145
+ })
146
+
147
+ grid_price_now = env.get_grid_price(step_count)
148
+ # Use the environment's current total surplus/shortfall to re-calculate peer price
149
+ current_demands = env.demands_day[step_count]
150
+ current_solars = env.solars_day[step_count]
151
+ current_total_surplus = float(np.maximum(current_solars - current_demands, 0.0).sum())
152
+ current_total_shortfall = float(np.maximum(current_demands - current_solars, 0.0).sum())
153
+ peer_price_now = env.get_peer_price(step_count, current_total_surplus, current_total_shortfall)
154
+
155
+
156
+ for i, hid in enumerate(house_ids):
157
+ is_battery_house = hid in env.batteries
158
+ p2p_buy = float(info["p2p_buy"][i])
159
+ p2p_sell = float(info["p2p_sell"][i])
160
+ charge_amount = float(info.get("charge_amount")[i])
161
+ discharge_amount = float(info.get("discharge_amount")[i])
162
+
163
+ day_logs.append({
164
+ "day": day_idx + 1,
165
+ "step": step_count,
166
+ "house": hid,
167
+ "grid_import_no_p2p": float(info["grid_import_no_p2p"][i]),
168
+ "grid_import_with_p2p": float(info["grid_import_with_p2p"][i]),
169
+ "grid_export": float(info.get("grid_export")[i]),
170
+ "p2p_buy": p2p_buy,
171
+ "p2p_sell": p2p_sell,
172
+ "actual_cost": float(info["costs"][i]),
173
+ "baseline_cost": float(info["grid_import_no_p2p"][i]) * grid_price_now,
174
+ "total_demand": float(env.demands_day[step_count, i]),
175
+ "total_solar": float(env.solars_day[step_count, i]),
176
+ "grid_price": grid_price_now,
177
+ "peer_price": peer_price_now,
178
+ "soc": (env.battery_soc[i] / env.battery_max_capacity[i]) if is_battery_house else np.nan,
179
+ "degradation_cost": ((charge_amount + discharge_amount) * env.battery_degradation_cost[i]) if is_battery_house else 0.0,
180
+ "reward": float(rewards[i]),
181
+ })
182
+
183
+ obs = next_obs
184
+ step_count += 1
185
+ if step_count >= eval_steps:
186
+ break
187
+
188
+ day_df = pd.DataFrame(day_logs)
189
+ all_logs.extend(day_logs)
190
+
191
+ # Consolidated daily summary calculation (Kept math, removed console output)
192
+ grouped_house = day_df.groupby("house").sum(numeric_only=True)
193
+ grouped_step = day_df.groupby("step").sum(numeric_only=True)
194
+
195
+ total_demand = grouped_step["total_demand"].sum()
196
+ total_solar = grouped_step["total_solar"].sum()
197
+ total_p2p_buy = grouped_house["p2p_buy"].sum()
198
+ total_p2p_sell = grouped_house["p2p_sell"].sum()
199
+
200
+ baseline_cost_per_house = grouped_house["baseline_cost"]
201
+ actual_cost_per_house = grouped_house["actual_cost"]
202
+ cost_savings_per_house = baseline_cost_per_house - actual_cost_per_house
203
+ day_total_cost_savings = cost_savings_per_house.sum()
204
+
205
+ overall_cost_savings_pct = day_total_cost_savings / baseline_cost_per_house.sum() if baseline_cost_per_house.sum() > 0 else 0.0
206
+
207
+ baseline_import_per_house = grouped_house["grid_import_no_p2p"]
208
+ actual_import_per_house = grouped_house["grid_import_with_p2p"]
209
+ import_reduction_per_house = baseline_import_per_house - actual_import_per_house
210
+ day_total_import_reduction = import_reduction_per_house.sum()
211
+
212
+ overall_import_reduction_pct = day_total_import_reduction / baseline_import_per_house.sum() if baseline_import_per_house.sum() > 0 else 0.0
213
+
214
+ fairness_cost_savings = compute_jains_fairness(cost_savings_per_house.values)
215
+ fairness_import_reduction = compute_jains_fairness(import_reduction_per_house.values)
216
+ fairness_rewards = compute_jains_fairness(grouped_house["reward"].values)
217
+ fairness_p2p_buy = compute_jains_fairness(grouped_house["p2p_buy"].values)
218
+ fairness_p2p_sell = compute_jains_fairness(grouped_house["p2p_sell"].values)
219
+ fairness_p2p_total = compute_jains_fairness((grouped_house["p2p_buy"] + grouped_house["p2p_sell"]).values)
220
+ day_total_degradation_cost = grouped_house["degradation_cost"].sum()
221
+
222
+ daily_summaries.append({
223
+ "day": day_idx + 1, "day_total_demand": total_demand, "day_total_solar": total_solar, "day_p2p_buy": total_p2p_buy, "day_p2p_sell": total_p2p_sell,
224
+ "cost_savings_abs": day_total_cost_savings, "cost_savings_pct": overall_cost_savings_pct, "fairness_cost_savings": fairness_cost_savings,
225
+ "grid_reduction_abs": day_total_import_reduction, "grid_reduction_pct": overall_import_reduction_pct, "fairness_grid_reduction": fairness_import_reduction,
226
+ "fairness_reward": fairness_rewards, "fairness_p2p_buy": fairness_p2p_buy, "fairness_p2p_sell": fairness_p2p_sell,
227
+ "fairness_p2p_total": fairness_p2p_total, "total_degradation_cost": day_total_degradation_cost
228
+ })
229
+
230
+ # Final processing and saving
231
+ evaluation_end = time.time()
232
+ total_eval_time = evaluation_end - evaluation_start
233
+ # REMOVED: print(f"\nEvaluation loop finished. Total time: {total_eval_time:.2f} seconds.")
234
+ # REMOVED: print(f"Device used: {device}")
235
+
236
+ all_days_df = pd.DataFrame(all_logs)
237
+ combined_csv_path = os.path.join(logs_dir, "step_logs_all_days.csv")
238
+ all_days_df.to_csv(combined_csv_path, index=False)
239
+ print(f"Saved combined step-level logs to: {combined_csv_path}")
240
+
241
+ step_timing_df = pd.DataFrame(step_timing_list)
242
+ timing_csv_path = os.path.join(logs_dir, "step_timing_log.csv")
243
+ step_timing_df.to_csv(timing_csv_path, index=False)
244
+ print(f"Saved step timing logs to: {timing_csv_path}")
245
+
246
+ house_level_df = all_days_df.groupby("house").sum(numeric_only=True)
247
+ house_level_df["cost_savings"] = house_level_df["baseline_cost"] - house_level_df["actual_cost"]
248
+ house_level_df["import_reduction"] = house_level_df["grid_import_no_p2p"] - house_level_df["grid_import_with_p2p"]
249
+
250
+ house_summary_csv = os.path.join(logs_dir, "summary_per_house.csv")
251
+ house_level_df.to_csv(house_summary_csv)
252
+ print(f"Saved final summary per house to: {house_summary_csv}")
253
+
254
+ fairness_grid_all = compute_jains_fairness(house_level_df["import_reduction"].values)
255
+ fairness_cost_all = compute_jains_fairness(house_level_df["cost_savings"].values)
256
+
257
+ daily_summary_df = pd.DataFrame(daily_summaries)
258
+
259
+ total_cost_savings_all = daily_summary_df["cost_savings_abs"].sum()
260
+ total_baseline_cost_all = all_days_df.groupby('day')['baseline_cost'].sum().sum()
261
+ pct_cost_savings_all = total_cost_savings_all / total_baseline_cost_all if total_baseline_cost_all > 0 else 0.0
262
+ total_grid_reduction_all = daily_summary_df["grid_reduction_abs"].sum()
263
+ total_baseline_import_all = all_days_df.groupby('day')['grid_import_no_p2p'].sum().sum()
264
+ pct_grid_reduction_all = total_grid_reduction_all / total_baseline_import_all if total_baseline_import_all > 0 else 0.0
265
+ total_degradation_cost_all = daily_summary_df["total_degradation_cost"].sum()
266
+ agg_solar_per_step = all_days_df.groupby(['day', 'step'])['total_solar'].sum()
267
+ sunny_steps_mask = agg_solar_per_step > (SOLAR_THRESHOLD * num_agents)
268
+ sunny_df = all_days_df.set_index(['day', 'step'])[sunny_steps_mask].reset_index()
269
+ baseline_import_sunny = sunny_df['grid_import_no_p2p'].sum()
270
+ actual_import_sunny = sunny_df['grid_import_with_p2p'].sum()
271
+ grid_reduction_sunny_pct = 0.0
272
+ if baseline_import_sunny > 0:
273
+ grid_reduction_sunny_pct = (baseline_import_sunny - actual_import_sunny) / baseline_import_sunny
274
+
275
+ total_p2p_buy = all_days_df['p2p_buy'].sum()
276
+ total_actual_grid_import = all_days_df['grid_import_with_p2p'].sum()
277
+ total_procured_energy = total_p2p_buy + total_actual_grid_import
278
+ community_sourcing_rate_pct = 0.0
279
+ if total_procured_energy > 0:
280
+ community_sourcing_rate_pct = total_p2p_buy / total_procured_energy
281
+
282
+ total_p2p_sell = all_days_df['p2p_sell'].sum()
283
+ total_grid_export = all_days_df['grid_export'].sum()
284
+ total_excess_solar = total_p2p_sell + total_grid_export
285
+ solar_sharing_efficiency_pct = 0.0
286
+ if total_excess_solar > 0:
287
+ solar_sharing_efficiency_pct = total_p2p_sell / total_excess_solar
288
+
289
+ baseline_cost_sunny = sunny_df['baseline_cost'].sum()
290
+ actual_cost_sunny = sunny_df['actual_cost'].sum()
291
+ cost_savings_sunny_pct = (baseline_cost_sunny - actual_cost_sunny) / baseline_cost_sunny if baseline_cost_sunny > 0 else 0.0
292
+
293
+ final_row = {
294
+ "day": "ALL_DAYS_SUMMARY", "cost_savings_abs": total_cost_savings_all, "cost_savings_pct": pct_cost_savings_all,
295
+ "grid_reduction_abs": total_grid_reduction_all, "grid_reduction_pct": pct_grid_reduction_all, "fairness_cost_savings": fairness_cost_all,
296
+ "fairness_grid_reduction": fairness_grid_all, "total_degradation_cost": total_degradation_cost_all, "grid_reduction_sunny_hours_pct": grid_reduction_sunny_pct,
297
+ "community_sourcing_rate_pct": community_sourcing_rate_pct, "solar_sharing_efficiency_pct": solar_sharing_efficiency_pct,
298
+ "cost_savings_sunny_hours_pct": cost_savings_sunny_pct # Added back for final row saving
299
+ }
300
+
301
+ for col in daily_summary_df.columns:
302
+ if col not in final_row:
303
+ final_row[col] = np.nan
304
+ final_row_df = pd.DataFrame([final_row])
305
+
306
+ daily_summary_df = pd.concat([daily_summary_df, final_row_df], ignore_index=True)
307
+ summary_csv = os.path.join(logs_dir, "summary_per_day.csv")
308
+ daily_summary_df.to_csv(summary_csv, index=False)
309
+ print(f"Saved day-level summary with final multi-day row to: {summary_csv}")
310
+
311
+ # The rest of the script (plotting) remains unchanged as it doesn't print numerical results to the console.
312
+
313
+ # Plots
314
+ plot_daily_df = daily_summary_df[daily_summary_df["day"] != "ALL_DAYS_SUMMARY"].copy()
315
+ plot_daily_df["day"] = plot_daily_df["day"].astype(int)
316
+
317
+ # Daily Cost Savings Percentage
318
+ plt.figure(figsize=(12, 6))
319
+ plt.bar(plot_daily_df["day"], plot_daily_df["cost_savings_pct"] * 100, color='skyblue')
320
+ plt.xlabel("Day")
321
+ plt.ylabel("Cost Savings (%)")
322
+ plt.title("Daily Community Cost Savings Percentage")
323
+ plt.xticks(plot_daily_df["day"])
324
+ plt.grid(axis='y', linestyle='--', alpha=0.7)
325
+ plt.savefig(os.path.join(plots_dir, "daily_cost_savings_percentage.png"))
326
+ plt.close()
327
+
328
+ # Daily Total Demand vs. Solar
329
+ plt.figure(figsize=(12, 6))
330
+ bar_width = 0.4
331
+ days = plot_daily_df["day"]
332
+ plt.bar(days - bar_width/2, plot_daily_df["day_total_demand"], width=bar_width, label="Total Demand", color='coral')
333
+ plt.bar(days + bar_width/2, plot_daily_df["day_total_solar"], width=bar_width, label="Total Solar Generation", color='gold')
334
+ plt.xlabel("Day")
335
+ plt.ylabel("Energy (kWh)")
336
+ plt.title("Total Community Demand vs. Solar Generation Per Day")
337
+ plt.xticks(days)
338
+ plt.legend()
339
+ plt.grid(axis='y', linestyle='--', alpha=0.7)
340
+ plt.savefig(os.path.join(plots_dir, "daily_demand_vs_solar.png"))
341
+ plt.close()
342
+
343
+ # Combined Time Series of Energy Flows
344
+ step_group = all_days_df.groupby(["day", "step"]).sum(numeric_only=True).reset_index()
345
+ step_group["global_step"] = (step_group["day"] - 1) * env.num_steps + step_group["step"]
346
+
347
+ fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(15, 12), sharex=True)
348
+
349
+ # Subplot 1: Grid Import vs P2P Buy
350
+ ax1.plot(step_group["global_step"], step_group["grid_import_with_p2p"], label="Grid Import (with P2P)", color='r')
351
+ ax1.plot(step_group["global_step"], step_group["p2p_buy"], label="P2P Buy", color='g')
352
+ ax1.set_ylabel("Energy (kWh)")
353
+ ax1.set_title("Community Energy Consumption: Grid Import vs. P2P Buy")
354
+ ax1.legend()
355
+ ax1.grid(True, linestyle='--', alpha=0.6)
356
+
357
+ # Subplot 2: Grid Export vs P2P Sell
358
+ ax2.plot(step_group["global_step"], step_group["grid_export"], label="Grid Export", color='orange')
359
+ ax2.plot(step_group["global_step"], step_group["p2p_sell"], label="P2P Sell", color='b')
360
+ ax2.set_xlabel("Global Timestep")
361
+ ax2.set_ylabel("Energy (kWh)")
362
+ ax2.set_title("Community Energy Generation: Grid Export vs. P2P Sell")
363
+ ax2.legend()
364
+ ax2.grid(True, linestyle='--', alpha=0.6)
365
+
366
+ plt.tight_layout()
367
+ plt.savefig(os.path.join(plots_dir, "combined_energy_flows_timeseries.png"))
368
+ plt.close()
369
+
370
+ # Stacked Bar of Daily Energy Sources
371
+ daily_agg = all_days_df.groupby("day").sum(numeric_only=True)
372
+
373
+ plt.figure(figsize=(12, 7))
374
+ plt.bar(daily_agg.index, daily_agg["grid_import_with_p2p"], label="Grid Import (with P2P)", color='crimson')
375
+ plt.bar(daily_agg.index, daily_agg["p2p_buy"], bottom=daily_agg["grid_import_with_p2p"], label="P2P Buy", color='limegreen')
376
+ plt.plot(daily_agg.index, daily_agg["grid_import_no_p2p"], label="Baseline Grid Import (No P2P)", color='blue', linestyle='--', marker='o')
377
+
378
+ plt.xlabel("Day")
379
+ plt.ylabel("Energy (kWh)")
380
+ plt.title("Daily Energy Procurement: Baseline vs. P2P+Grid")
381
+ plt.xticks(daily_agg.index)
382
+ plt.legend()
383
+ plt.grid(axis='y', linestyle='--', alpha=0.7)
384
+ plt.savefig(os.path.join(plots_dir, "daily_energy_procurement_stacked.png"))
385
+ plt.close()
386
+
387
+ # Fairness Metrics Over Time
388
+ plt.figure(figsize=(12, 6))
389
+ plt.plot(plot_daily_df["day"], plot_daily_df["fairness_cost_savings"], label="Cost Savings Fairness", marker='o')
390
+ plt.plot(plot_daily_df["day"], plot_daily_df["fairness_grid_reduction"], label="Grid Reduction Fairness", marker='s')
391
+ plt.plot(plot_daily_df["day"], plot_daily_df["fairness_reward"], label="Reward Fairness", marker='^')
392
+ plt.xlabel("Day")
393
+ plt.ylabel("Jain's Fairness Index")
394
+ plt.title("Daily Fairness Metrics")
395
+ plt.xticks(plot_daily_df["day"])
396
+ plt.ylim(0, 1.05)
397
+ plt.legend()
398
+ plt.grid(True, linestyle='--', alpha=0.7)
399
+ plt.savefig(os.path.join(plots_dir, "daily_fairness_metrics.png"))
400
+ plt.close()
401
+
402
+ # Per-House Savings and Reductions
403
+ fig, ax1 = plt.subplots(figsize=(15, 7))
404
+
405
+ house_ids_str = house_level_df.index.astype(str)
406
+ bar_width = 0.4
407
+ index = np.arange(len(house_ids_str))
408
+
409
+ color1 = 'tab:green'
410
+ ax1.set_xlabel('House ID')
411
+ ax1.set_ylabel('Total Cost Savings ($)', color=color1)
412
+ ax1.bar(index - bar_width/2, house_level_df["cost_savings"], bar_width, label='Cost Savings', color=color1)
413
+ ax1.tick_params(axis='y', labelcolor=color1)
414
+ ax1.set_xticks(index)
415
+ ax1.set_xticklabels(house_ids_str, rotation=45, ha="right")
416
+
417
+ ax2 = ax1.twinx()
418
+ color2 = 'tab:blue'
419
+ ax2.set_ylabel('Total Grid Import Reduction (kWh)', color=color2)
420
+ ax2.bar(index + bar_width/2, house_level_df["import_reduction"], bar_width, label='Import Reduction', color=color2)
421
+ ax2.tick_params(axis='y', labelcolor=color2)
422
+
423
+ plt.title(f'Total Cost Savings & Grid Import Reduction Per House (over {days_to_evaluate} days)')
424
+ fig.tight_layout()
425
+ plt.savefig(os.path.join(plots_dir, "per_house_summary.png"))
426
+ plt.close()
427
+
428
+ # Price Dynamics for a Single Day
429
+ day1_prices = all_days_df[all_days_df['day'] == 1][['step', 'grid_price', 'peer_price']].drop_duplicates()
430
+ plt.figure(figsize=(12, 6))
431
+ plt.plot(day1_prices['step'], day1_prices['grid_price'], label='Grid Price', color='darkorange')
432
+ plt.plot(day1_prices['step'], day1_prices['peer_price'], label='P2P Price', color='teal')
433
+ plt.xlabel("Timestep of Day")
434
+ plt.ylabel("Price ($/kWh)")
435
+ plt.title("Price Dynamics on Day 1")
436
+ plt.legend()
437
+ plt.grid(True, linestyle='--', alpha=0.6)
438
+ plt.savefig(os.path.join(plots_dir, "price_dynamics_day1.png"))
439
+ plt.close()
440
+
441
+ # Battery State of Charge for Sample Houses
442
+ day1_df = all_days_df[all_days_df['day'] == 1]
443
+ battery_houses = day1_df.dropna(subset=['soc'])['house'].unique()
444
+
445
+ if len(battery_houses) > 0:
446
+ sample_houses = battery_houses[:min(4, len(battery_houses))]
447
+ plt.figure(figsize=(12, 6))
448
+ for house in sample_houses:
449
+ house_df = day1_df[day1_df['house'] == house]
450
+ plt.plot(house_df['step'], house_df['soc'] * 100, label=f'House {house}')
451
+
452
+ plt.xlabel("Timestep of Day")
453
+ plt.ylabel("State of Charge (%)")
454
+ plt.title("Battery SoC on Day 1 for Sample Houses")
455
+ plt.legend()
456
+ plt.grid(True, linestyle='--', alpha=0.6)
457
+ plt.savefig(os.path.join(plots_dir, "soc_dynamics_day1.png"))
458
+ plt.close()
459
+
460
+ print("All plots have been generated and saved. Evaluation complete.")
461
+
462
+
463
+ if __name__ == "__main__":
464
+ main()
Other_algorithms/Flat_System/PG/pg_train.py ADDED
@@ -0,0 +1,373 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import sys
3
+ import re
4
+ import numpy as np
5
+ import torch
6
+ import matplotlib.pyplot as plt
7
+ import pandas as pd
8
+ import time
9
+ from datetime import datetime
10
+
11
+ sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
12
+
13
+ from solar_sys_environment import SolarSys
14
+ from PG.trainer.pg import PGAgent
15
+
16
+ def main():
17
+ STATE_TO_RUN = "pennsylvania" # "pennsylvania" or "colorado" or "oklahoma"
18
+
19
+ # Set the path to your training data
20
+ DATA_FILE_PATH = "/path/to/project/training/5houses_152days_TRAIN.csv"
21
+ num_episodes = 10000
22
+ batch_size = 256
23
+ checkpoint_interval = 100000
24
+ window_size = 32
25
+
26
+ env = SolarSys(
27
+ data_path=DATA_FILE_PATH,
28
+ state=STATE_TO_RUN,
29
+ time_freq="15T"
30
+ )
31
+
32
+ # Sanity check: env I/O shapes
33
+ print("Observation space:", env.observation_space)
34
+ print("Action space :", env.action_space)
35
+
36
+ # Reset and inspect obs
37
+ obs = env.reset()
38
+ print(f"Reset returned {len(obs)} agent observations; each obs shape: {np.array(obs).shape}")
39
+
40
+ # Sample random actions and do one step
41
+ dummy_actions = np.random.rand(env.num_agents, env.action_space.shape[1]).astype(np.float32)
42
+ next_obs, rewards, done, info = env.step(dummy_actions)
43
+ print(f"Step outputs → next_obs: {len(next_obs)}×{np.array(next_obs).shape[1]}, "
44
+ f"rewards: {len(rewards)}, done: {done}")
45
+ print("Info keys:", list(info.keys()))
46
+
47
+ # Count the number of houses in each group
48
+ env.group_counts = {
49
+ 0: env.agent_groups.count(0),
50
+ 1: env.agent_groups.count(1)
51
+ }
52
+ print(f"Number of houses in each group: {env.group_counts}")
53
+
54
+ max_steps = env.num_steps
55
+
56
+ # Dims from the env
57
+ num_agents = env.num_agents
58
+ local_state_dim = env.observation_space.shape[1]
59
+ action_dim = env.action_space.shape[1]
60
+
61
+ # Build a unique run directory
62
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
63
+ run_name = f"pg_{STATE_TO_RUN}_{num_agents}agents_{num_episodes}eps_{timestamp}"
64
+ root_dir = os.path.join("FINALE_FINALE_FINALE", run_name)
65
+ os.makedirs(root_dir, exist_ok=True)
66
+ print(f"Saving training outputs to: {root_dir}")
67
+
68
+ logs_dir = os.path.join(root_dir, "logs")
69
+ plots_dir = os.path.join(root_dir, "plots")
70
+ os.makedirs(logs_dir, exist_ok=True)
71
+ os.makedirs(plots_dir, exist_ok=True)
72
+
73
+ # Create PG agents with use_baseline parameter
74
+ pg_agents = [
75
+ PGAgent(
76
+ state_dim=local_state_dim,
77
+ action_dim=action_dim,
78
+ lr=2e-4,
79
+ gamma=0.95,
80
+ critic_loss_coef=0.5
81
+ )
82
+ for _ in range(num_agents)
83
+ ]
84
+
85
+ # Tracking / Logging Variables
86
+ episode_rewards = []
87
+ episode_total_rewards = []
88
+ block_mean_rewards = []
89
+ block_total_rewards = []
90
+
91
+ agent_rewards_log = [[] for _ in range(num_agents)]
92
+ best_mean_reward = -1e9
93
+ best_model_path = os.path.join(logs_dir, "best_model.pth")
94
+
95
+ daily_rewards = []
96
+ monthly_rewards = []
97
+
98
+ training_start_time = time.time()
99
+ episode_durations = []
100
+ total_steps_global = 0
101
+ episode_log_data = []
102
+ performance_metrics_log = []
103
+
104
+ agent_charge_log = [[] for _ in range(num_agents)]
105
+ agent_discharge_log = [[] for _ in range(num_agents)]
106
+
107
+ # Training Loop
108
+ for episode in range(1, num_episodes + 1):
109
+ episode_start_time = time.time()
110
+
111
+ obs = np.array(env.reset(), dtype=np.float32)
112
+
113
+ if episode > 1:
114
+ last_episode_metrics = env.get_episode_metrics()
115
+ last_episode_metrics['Episode'] = episode - 1
116
+ performance_metrics_log.append(last_episode_metrics)
117
+
118
+ total_reward = np.zeros(num_agents, dtype=np.float32)
119
+ done = False
120
+ step_count = 0
121
+ day_logs = []
122
+ episode_charges = [[] for _ in range(num_agents)]
123
+ episode_discharges = [[] for _ in range(num_agents)]
124
+
125
+ # Main training loop for a single episode
126
+ while not done:
127
+ # Action Selection: Each PG agent acts independently
128
+ actions = []
129
+ for i, agent in enumerate(pg_agents):
130
+ agent_action = agent.select_action(obs[i])
131
+ actions.append(agent_action)
132
+ actions = np.array(actions, dtype=np.float32)
133
+
134
+ # Step the environment
135
+ next_obs_list, rewards, done, info = env.step(actions)
136
+ next_obs = np.array(next_obs_list, dtype=np.float32)
137
+
138
+ # Store Rewards: Each agent stores its own reward
139
+ for i, agent in enumerate(pg_agents):
140
+ agent.rewards.append(rewards[i])
141
+ agent.dones.append(done)
142
+
143
+ total_reward += rewards
144
+ obs = next_obs
145
+ step_count += 1
146
+ total_steps_global += 1
147
+
148
+ day_logs.append({
149
+ "step": step_count - 1,
150
+ "grid_import_no_p2p": info["grid_import_no_p2p"],
151
+ "grid_import_with_p2p": info["grid_import_with_p2p"],
152
+ "p2p_buy": info["p2p_buy"],
153
+ "p2p_sell": info["p2p_sell"],
154
+ "costs": info["costs"],
155
+ "charge_amount": info.get("charge_amount", np.zeros(num_agents)),
156
+ "discharge_amount": info.get("discharge_amount", np.zeros(num_agents))
157
+ })
158
+
159
+ # Track actual charge/discharge actions from the environment
160
+ for i in range(num_agents):
161
+ episode_charges[i].append(info["charge_amount"][i])
162
+ episode_discharges[i].append(info["discharge_amount"][i])
163
+
164
+ if step_count >= max_steps:
165
+ break
166
+
167
+ # After each episode
168
+ sum_ep_reward = float(np.sum(total_reward))
169
+ mean_ep_reward = float(np.mean(total_reward))
170
+
171
+ episode_total_rewards.append(sum_ep_reward)
172
+ episode_rewards.append(mean_ep_reward)
173
+ daily_rewards.append(mean_ep_reward)
174
+
175
+ if len(daily_rewards) % window_size == 0:
176
+ last_totals = episode_total_rewards[-window_size:]
177
+ block_sum = sum(last_totals)
178
+ block_total_rewards.append(block_sum)
179
+
180
+ last_means = daily_rewards[-window_size:]
181
+ block_mean = sum(last_means) / window_size
182
+ block_mean_rewards.append(block_mean)
183
+
184
+ block_idx = len(block_mean_rewards)
185
+ print(
186
+ f"→ Completed Block {block_idx} "
187
+ f"| Episodes {(block_idx - 1) * window_size + 1}–{block_idx * window_size} "
188
+ f"| Block Total Reward: {block_sum:.3f} "
189
+ f"| Block Mean Reward: {block_mean:.3f}"
190
+ )
191
+
192
+ for i in range(num_agents):
193
+ agent_rewards_log[i].append(total_reward[i])
194
+ agent_charge_log[i].append(np.mean(episode_charges[i]))
195
+ agent_discharge_log[i].append(np.mean(episode_discharges[i]))
196
+
197
+ steps_data = []
198
+ for entry in day_logs:
199
+ steps_data.append({
200
+ "step": entry["step"],
201
+ "p2p_buy_sum": float(np.sum(entry["p2p_buy"])),
202
+ "p2p_sell_sum": float(np.sum(entry["p2p_sell"])),
203
+ "grid_import_no_p2p_sum": float(np.sum(entry["grid_import_no_p2p"])),
204
+ "grid_import_with_p2p_sum": float(np.sum(entry["grid_import_with_p2p"]))
205
+ })
206
+
207
+ baseline_cost = np.sum([np.sum(entry["grid_import_no_p2p"]) * env.get_grid_price(entry["step"])
208
+ for entry in day_logs])
209
+ actual_cost = np.sum([np.sum(entry["costs"]) for entry in day_logs])
210
+ cost_reduction = (baseline_cost - actual_cost) / (baseline_cost + 1e-8)
211
+
212
+ # UPDATE STEP: Update each PG agent independently
213
+ for agent in pg_agents:
214
+ agent.update()
215
+
216
+ # Save best models
217
+ if mean_ep_reward > best_mean_reward:
218
+ best_mean_reward = mean_ep_reward
219
+ for i, agent in enumerate(pg_agents):
220
+ agent_path = os.path.join(logs_dir, f"best_model_agent_{i}.pth")
221
+ agent.save(agent_path)
222
+
223
+ if episode % checkpoint_interval == 0:
224
+ for i, agent in enumerate(pg_agents):
225
+ ckpt_path = os.path.join(logs_dir, f"checkpoint_{episode}_agent_{i}.pth")
226
+ agent.save(ckpt_path)
227
+
228
+ episode_end_time = time.time()
229
+ episode_duration = episode_end_time - episode_start_time
230
+
231
+ print(
232
+ f"Episode {episode}/{num_episodes} "
233
+ f"| Time per Episode: {episode_duration:.2f}s "
234
+ f"| Steps: {step_count} "
235
+ f"| Mean Reward: {mean_ep_reward:.3f} "
236
+ f"| Cost Reduction: {cost_reduction:.2%}"
237
+ )
238
+
239
+ episode_log_data.append({
240
+ "Episode": episode,
241
+ "Steps": step_count,
242
+ "Mean_Reward": mean_ep_reward,
243
+ "Total_Reward": sum_ep_reward,
244
+ "Cost_Reduction_Pct": cost_reduction * 100,
245
+ "Baseline_Cost": baseline_cost,
246
+ "Actual_Cost": actual_cost,
247
+ "Episode_Duration": episode_duration,
248
+ "Total_Charge": np.sum([np.sum(entry["charge_amount"]) for entry in day_logs]),
249
+ "Total_Discharge": np.sum([np.sum(entry["discharge_amount"]) for entry in day_logs])
250
+ })
251
+
252
+ # Periodic performance logging
253
+ if episode % 100 == 0:
254
+ avg_reward_last_100 = np.mean(daily_rewards[-100:]) if len(daily_rewards) >= 100 else np.mean(daily_rewards)
255
+ print(f" → Average reward (last 100 episodes): {avg_reward_last_100:.3f}")
256
+
257
+ # Final episode metrics
258
+ final_episode_metrics = env.get_episode_metrics()
259
+ final_episode_metrics['Episode'] = num_episodes
260
+ performance_metrics_log.append(final_episode_metrics)
261
+
262
+ training_end_time = time.time()
263
+ total_training_time = training_end_time - training_start_time
264
+
265
+ # Save final models
266
+ print("\nSaving final models...")
267
+ for i, agent in enumerate(pg_agents):
268
+ final_path = os.path.join(logs_dir, f"final_model_agent_{i}.pth")
269
+ agent.save(final_path)
270
+
271
+ np.save(os.path.join(logs_dir, "agent_rewards.npy"), np.array(agent_rewards_log))
272
+ np.save(os.path.join(logs_dir, "mean_rewards.npy"), np.array(episode_rewards))
273
+ np.save(os.path.join(logs_dir, "total_rewards.npy"), np.array(episode_total_rewards))
274
+
275
+ # Create DataFrames
276
+ df_rewards_log = pd.DataFrame(episode_log_data)
277
+ df_perf_log = pd.DataFrame(performance_metrics_log)
278
+ df_final_log = pd.merge(df_rewards_log, df_perf_log.drop(columns=[
279
+ 'degradation_cost_over_time',
280
+ 'cost_savings_over_time',
281
+ 'grid_reduction_over_time'
282
+ ]), on="Episode")
283
+
284
+ # Helper: centered moving average
285
+ def moving_avg(series, window):
286
+ return pd.Series(series).rolling(window=window, center=True, min_periods=1).mean().to_numpy()
287
+
288
+ ma_window = 300
289
+ episodes = np.arange(1, num_episodes + 1)
290
+
291
+ # Mean Reward moving average
292
+ reward_ma = moving_avg(df_final_log["Mean_Reward"], ma_window)
293
+ plt.figure(figsize=(8, 5))
294
+ plt.plot(episodes, reward_ma, linewidth=2, label=f"Mean Reward MA (win={ma_window})")
295
+ plt.xlabel("Episode")
296
+ plt.ylabel("Mean Reward")
297
+ plt.title("PG: Mean Reward Moving Average")
298
+ plt.legend()
299
+ plt.grid(True)
300
+ plt.savefig(os.path.join(plots_dir, "mean_reward_ma.png"), dpi=200)
301
+ plt.close()
302
+
303
+ # Total Reward moving average
304
+ total_ma = moving_avg(df_final_log["Total_Reward"], ma_window)
305
+ plt.figure(figsize=(8, 5))
306
+ plt.plot(episodes, total_ma, linewidth=2, label=f"Total Reward MA (win={ma_window})")
307
+ plt.xlabel("Episode")
308
+ plt.ylabel("Total Reward")
309
+ plt.title("PG: Total Reward Moving Average")
310
+ plt.legend()
311
+ plt.grid(True)
312
+ plt.savefig(os.path.join(plots_dir, "total_reward_ma.png"), dpi=200)
313
+ plt.close()
314
+
315
+ # Cost Reduction (%) moving average
316
+ cost_ma = moving_avg(df_final_log["Cost_Reduction_Pct"], ma_window)
317
+ plt.figure(figsize=(8, 5))
318
+ plt.plot(episodes, cost_ma, linewidth=2, label="Cost Reduction MA (%)")
319
+ plt.xlabel("Episode")
320
+ plt.ylabel("Cost Reduction (%)")
321
+ plt.title("PG: Cost Reduction Moving Average")
322
+ plt.legend()
323
+ plt.grid(True)
324
+ plt.savefig(os.path.join(plots_dir, "cost_reduction_ma.png"), dpi=200)
325
+ plt.close()
326
+
327
+ # Battery Degradation Cost moving average
328
+ degradation_ma = moving_avg(df_final_log["battery_degradation_cost_total"], ma_window)
329
+ plt.figure(figsize=(8, 5))
330
+ plt.plot(episodes, degradation_ma, linewidth=2, label=f"Degradation Cost MA (win={ma_window})", color='purple')
331
+ plt.xlabel("Episode")
332
+ plt.ylabel("Total Degradation Cost ($)")
333
+ plt.title("PG: Battery Degradation Cost Moving Average")
334
+ plt.legend()
335
+ plt.grid(True)
336
+ plt.savefig(os.path.join(plots_dir, "degradation_cost_ma.png"), dpi=200)
337
+ plt.close()
338
+
339
+ print(f"\nAll moving-average plots saved to: {plots_dir}")
340
+
341
+ # Save Final Logs to CSV
342
+ total_time_row = pd.DataFrame([{
343
+ "Episode": "Total_Training_Time",
344
+ "Episode_Duration": total_training_time
345
+ }])
346
+ df_to_save = pd.concat([df_final_log, total_time_row], ignore_index=True)
347
+
348
+ log_csv_path = os.path.join(logs_dir, "training_performance_log.csv")
349
+
350
+ columns_to_save = [
351
+ "Episode",
352
+ "Mean_Reward",
353
+ "Total_Reward",
354
+ "Cost_Reduction_Pct",
355
+ "Episode_Duration",
356
+ "battery_degradation_cost_total",
357
+ ]
358
+ df_to_save = df_to_save[columns_to_save]
359
+
360
+ df_to_save.to_csv(log_csv_path, index=False)
361
+
362
+ print(f"Saved comprehensive training performance log to: {log_csv_path}")
363
+
364
+ # Final Timings Printout
365
+ print("\n" + "="*50)
366
+ print("TRAINING COMPLETE".center(50))
367
+ print(f"Total training time: {total_training_time:.2f} seconds")
368
+ print(f"Device used: {pg_agents[0].device}")
369
+ print("="*50)
370
+
371
+
372
+ if __name__ == "__main__":
373
+ main()
Other_algorithms/Flat_System/PG/trainer/__init__.py ADDED
File without changes
Other_algorithms/Flat_System/PG/trainer/pg.py ADDED
@@ -0,0 +1,96 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torch.nn as nn
3
+ from torch.distributions import Normal
4
+ import numpy as np
5
+
6
+ class SharedActorCritic(nn.Module):
7
+ def __init__(self, state_dim, action_dim):
8
+ super(SharedActorCritic, self).__init__()
9
+ self.feature_extractor = nn.Sequential(
10
+ nn.Linear(state_dim, 128),
11
+ nn.ReLU(),
12
+ nn.Linear(128, 128),
13
+ nn.ReLU()
14
+ )
15
+ self.actor_head = nn.Linear(128, action_dim * 2)
16
+ self.critic_head = nn.Linear(128, 1)
17
+
18
+ def forward(self, state):
19
+ features = self.feature_extractor(state)
20
+ action_params = self.actor_head(features)
21
+ mean, log_std = torch.chunk(action_params, 2, dim=-1)
22
+ value = self.critic_head(features)
23
+ return mean, log_std, value
24
+
25
+ class PGAgent:
26
+ def __init__(self, state_dim, action_dim, lr=3e-4, gamma=0.95, gae_lambda=0.95, critic_loss_coef=0.5):
27
+ self.gamma = gamma
28
+ self.gae_lambda = gae_lambda
29
+ self.critic_loss_coef = critic_loss_coef
30
+ self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
31
+ self.model = SharedActorCritic(state_dim, action_dim).to(self.device)
32
+ self.optimizer = torch.optim.Adam(self.model.parameters(), lr=lr)
33
+ self.log_probs = []
34
+ self.rewards = []
35
+ self.values = []
36
+ self.dones = []
37
+ self.log_std_min = -20
38
+ self.log_std_max = 2
39
+
40
+ def select_action(self, state):
41
+ state_tensor = torch.FloatTensor(state).unsqueeze(0).to(self.device)
42
+ mean, log_std, value = self.model(state_tensor)
43
+ log_std = torch.clamp(log_std, self.log_std_min, self.log_std_max)
44
+ std = torch.exp(log_std)
45
+ dist = Normal(mean, std)
46
+ action = dist.sample()
47
+ log_prob = dist.log_prob(action).sum(dim=-1)
48
+ self.log_probs.append(log_prob)
49
+ self.values.append(value)
50
+ return np.clip(action.squeeze(0).cpu().detach().numpy(), 0.0, 1.0)
51
+
52
+ def update(self):
53
+ if not self.rewards:
54
+ return
55
+ next_value = 0
56
+ values = torch.cat(self.values).squeeze().detach().cpu().numpy()
57
+ advantages, returns = self._calculate_gae_advantages(self.rewards, values, self.dones, next_value)
58
+ log_probs = torch.cat(self.log_probs)
59
+ advantages = torch.tensor(advantages, dtype=torch.float32, device=self.device)
60
+ returns = torch.tensor(returns, dtype=torch.float32, device=self.device)
61
+ advantages = (advantages - advantages.mean()) / (advantages.std() + 1e-8)
62
+ actor_loss = -(log_probs * advantages).mean()
63
+ critic_values = torch.cat(self.values).squeeze()
64
+ critic_loss = nn.MSELoss()(critic_values, returns)
65
+ total_loss = actor_loss + self.critic_loss_coef * critic_loss
66
+ self.optimizer.zero_grad()
67
+ total_loss.backward()
68
+ torch.nn.utils.clip_grad_norm_(self.model.parameters(), 0.5)
69
+ self.optimizer.step()
70
+ self.rewards = []
71
+ self.log_probs = []
72
+ self.values = []
73
+ self.dones = []
74
+
75
+ def _calculate_gae_advantages(self, rewards, values, dones, next_value):
76
+ advantages = np.zeros_like(rewards, dtype=np.float32)
77
+ last_advantage = 0
78
+ for t in reversed(range(len(rewards))):
79
+ mask = 1.0 - dones[t]
80
+ v_next = values[t + 1] if t < len(rewards) - 1 else next_value
81
+ delta = rewards[t] + self.gamma * v_next * mask - values[t]
82
+ last_advantage = delta + self.gamma * self.gae_lambda * last_advantage * mask
83
+ advantages[t] = last_advantage
84
+ returns = advantages + values
85
+ return advantages, returns
86
+
87
+ def save(self, path):
88
+ torch.save({
89
+ 'model_state_dict': self.model.state_dict(),
90
+ 'optimizer_state_dict': self.optimizer.state_dict(),
91
+ }, path)
92
+
93
+ def load(self, path):
94
+ checkpoint = torch.load(path, map_location=self.device)
95
+ self.model.load_state_dict(checkpoint['model_state_dict'])
96
+ self.optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
Other_algorithms/Flat_System/maddpg/__init__.py ADDED
File without changes
Other_algorithms/Flat_System/maddpg/maddpg_evaluation.py ADDED
@@ -0,0 +1,428 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # maddpg_evaluate.py
2
+ import os
3
+ import sys
4
+ import time
5
+ import re
6
+ import numpy as np
7
+ import pandas as pd
8
+ import matplotlib.pyplot as plt
9
+ import torch
10
+ from datetime import datetime
11
+
12
+ sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
13
+
14
+ from solar_sys_environment import SolarSys
15
+ from maddpg.trainer.maddpg import MADDPG
16
+
17
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
18
+
19
+ def compute_jains_fairness(values: np.ndarray) -> float:
20
+ if len(values) == 0:
21
+ return 0.0
22
+ if np.all(values == 0):
23
+ return 1.0
24
+ num = (values.sum())**2
25
+ den = len(values) * (values**2).sum()
26
+ return num / den
27
+
28
+ def main():
29
+ # User parameters
30
+ # --- GENERALIZED PATHS ---
31
+ MODEL_PATH = "./models/maddpg_para_sharing_region_a_5agents_final/logs/best_model.pth"
32
+ DATA_PATH = "./data/testing/test_data.csv"
33
+ DAYS_TO_EVALUATE = 30
34
+
35
+ model_path = MODEL_PATH
36
+ data_path = DATA_PATH
37
+ days_to_evaluate = DAYS_TO_EVALUATE
38
+ SOLAR_THRESHOLD = 0.4
39
+
40
+ # --- ANONYMITY: Implicitly detect and generalize state ---
41
+ state_match = re.search(r"maddpg_para_sharing_(oklahoma|colorado|pennsylvania)_", model_path)
42
+ if not state_match:
43
+ detected_state_key = "region_a"
44
+ else:
45
+ original_state = state_match.group(1)
46
+ if original_state == "oklahoma": detected_state_key = "region_a"
47
+ elif original_state == "colorado": detected_state_key = "region_b"
48
+ else: detected_state_key = "region_c"
49
+
50
+ # REMOVED: print(f"--- Detected state: {detected_state.upper()} ---")
51
+
52
+ # Env setup
53
+ env = SolarSys(
54
+ data_path=data_path,
55
+ state=detected_state_key, # Use anonymous key
56
+ time_freq="15T"
57
+ )
58
+ eval_steps = env.num_steps
59
+ house_ids = env.house_ids
60
+ num_agents = env.num_agents
61
+
62
+ # Generate a unique eval run folder
63
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
64
+ run_name = f"eval_maddpg_para_sharing_{num_agents}agents_{days_to_evaluate}days_{timestamp}"
65
+ output_folder = os.path.join("runs_with_battery", run_name)
66
+ logs_dir = os.path.join(output_folder, "logs")
67
+ plots_dir = os.path.join(output_folder, "plots")
68
+ for d in (logs_dir, plots_dir):
69
+ os.makedirs(d, exist_ok=True)
70
+ print(f"Saving evaluation outputs to: {output_folder}")
71
+
72
+ local_state_dim = env.observation_space.shape[1]
73
+ action_dim = env.action_space.shape[1]
74
+
75
+ # Instantiate MADDPG agent
76
+ maddpg = MADDPG(
77
+ num_agents=num_agents,
78
+ state_dim=local_state_dim,
79
+ action_dim=action_dim
80
+ )
81
+
82
+ # Load MADDPG checkpoint
83
+ maddpg.load(model_path)
84
+
85
+ maddpg.actor.eval()
86
+ maddpg.critic.eval()
87
+ maddpg.target_actor.eval()
88
+ maddpg.target_critic.eval()
89
+
90
+ # Prepare logs
91
+ all_logs = []
92
+ daily_summaries = []
93
+ step_timing_list = []
94
+
95
+ evaluation_start = time.time()
96
+
97
+ for day_idx in range(days_to_evaluate):
98
+ obs, _ = env.reset() # Use new reset signature
99
+ done = False
100
+ step_count = 0
101
+ day_logs = []
102
+
103
+ while not done:
104
+ step_start_time = time.time()
105
+
106
+ # Select actions with MADDPG
107
+ actions = maddpg.select_actions(obs, evaluate=True)
108
+
109
+ next_obs, rewards, done, info = env.step(actions)
110
+
111
+ # Consolidated Logging
112
+ step_end_time = time.time()
113
+ step_duration = step_end_time - step_start_time
114
+
115
+ # REMOVED: print(f"[Day {day_idx+1}, Step {step_count}] Step time: {step_duration:.6f} seconds")
116
+
117
+ step_timing_list.append({
118
+ "day": day_idx + 1, "step": step_count, "step_time_s": step_duration
119
+ })
120
+
121
+ grid_price_now = env.get_grid_price(step_count)
122
+ # Re-calculate peer price from current env state
123
+ current_demands = env.demands_day[step_count]
124
+ current_solars = env.solars_day[step_count]
125
+ current_total_surplus = float(np.maximum(current_solars - current_demands, 0.0).sum())
126
+ current_total_shortfall = float(np.maximum(current_demands - current_solars, 0.0).sum())
127
+ peer_price_now = env.get_peer_price(step_count, current_total_surplus, current_total_shortfall)
128
+
129
+
130
+ for i, hid in enumerate(house_ids):
131
+ is_battery_house = hid in env.batteries
132
+ p2p_buy = float(info["p2p_buy"][i])
133
+ p2p_sell = float(info["p2p_sell"][i])
134
+ charge_amount = float(info.get("charge_amount")[i])
135
+ discharge_amount = float(info.get("discharge_amount")[i])
136
+
137
+ day_logs.append({
138
+ "day": day_idx + 1, "step": step_count, "house": hid,
139
+ "grid_import_no_p2p": float(info["grid_import_no_p2p"][i]),
140
+ "grid_import_with_p2p": float(info["grid_import_with_p2p"][i]),
141
+ "grid_export": float(info.get("grid_export")[i]),
142
+ "p2p_buy": p2p_buy, "p2p_sell": p2p_sell, "actual_cost": float(info["costs"][i]),
143
+ "baseline_cost": float(info["grid_import_no_p2p"][i]) * grid_price_now,
144
+ "total_demand": float(env.demands_day[step_count, i]),
145
+ "total_solar": float(env.solars_day[step_count, i]),
146
+ "grid_price": grid_price_now, "peer_price": peer_price_now,
147
+ "soc": (env.battery_soc[i] / env.battery_max_capacity[i]) if is_battery_house else np.nan,
148
+ "degradation_cost": ((charge_amount + discharge_amount) * env.battery_degradation_cost[i]) if is_battery_house else 0.0,
149
+ "reward": float(rewards[i]),
150
+ })
151
+
152
+ obs = next_obs
153
+ step_count += 1
154
+ if step_count >= eval_steps:
155
+ break
156
+
157
+ day_df = pd.DataFrame(day_logs)
158
+ all_logs.extend(day_logs)
159
+
160
+ # Consolidated daily summary calculation (Kept math)
161
+ grouped_house = day_df.groupby("house").sum(numeric_only=True)
162
+ grouped_step = day_df.groupby("step").sum(numeric_only=True)
163
+
164
+ total_demand = grouped_step["total_demand"].sum()
165
+ total_solar = grouped_step["total_solar"].sum()
166
+ total_p2p_buy = grouped_house["p2p_buy"].sum()
167
+ total_p2p_sell = grouped_house["p2p_sell"].sum()
168
+
169
+ baseline_cost_per_house = grouped_house["baseline_cost"]
170
+ actual_cost_per_house = grouped_house["actual_cost"]
171
+ cost_savings_per_house = baseline_cost_per_house - actual_cost_per_house
172
+ day_total_cost_savings = cost_savings_per_house.sum()
173
+
174
+ overall_cost_savings_pct = day_total_cost_savings / baseline_cost_per_house.sum() if baseline_cost_per_house.sum() > 0 else 0.0
175
+
176
+ baseline_import_per_house = grouped_house["grid_import_no_p2p"]
177
+ actual_import_per_house = grouped_house["grid_import_with_p2p"]
178
+ import_reduction_per_house = baseline_import_per_house - actual_import_per_house
179
+ day_total_import_reduction = import_reduction_per_house.sum()
180
+
181
+ overall_import_reduction_pct = day_total_import_reduction / baseline_import_per_house.sum() if baseline_import_per_house.sum() > 0 else 0.0
182
+
183
+ fairness_cost_savings = compute_jains_fairness(cost_savings_per_house.values)
184
+ fairness_import_reduction = compute_jains_fairness(import_reduction_per_house.values)
185
+ fairness_rewards = compute_jains_fairness(grouped_house["reward"].values)
186
+ fairness_p2p_buy = compute_jains_fairness(grouped_house["p2p_buy"].values)
187
+ fairness_p2p_sell = compute_jains_fairness(grouped_house["p2p_sell"].values)
188
+ fairness_p2p_total = compute_jains_fairness((grouped_house["p2p_buy"] + grouped_house["p2p_sell"]).values)
189
+ day_total_degradation_cost = grouped_house["degradation_cost"].sum()
190
+
191
+ daily_summaries.append({
192
+ "day": day_idx + 1, "day_total_demand": total_demand, "day_total_solar": total_solar,
193
+ "day_p2p_buy": total_p2p_buy, "day_p2p_sell": total_p2p_sell,
194
+ "cost_savings_abs": day_total_cost_savings, "cost_savings_pct": overall_cost_savings_pct,
195
+ "fairness_cost_savings": fairness_cost_savings, "grid_reduction_abs": day_total_import_reduction,
196
+ "grid_reduction_pct": overall_import_reduction_pct, "fairness_grid_reduction": fairness_import_reduction,
197
+ "fairness_reward": fairness_rewards, "fairness_p2p_buy": fairness_p2p_buy, "fairness_p2p_sell": fairness_p2p_sell,
198
+ "fairness_p2p_total": fairness_p2p_total, "total_degradation_cost": day_total_degradation_cost
199
+ })
200
+
201
+ # Final processing and saving
202
+ evaluation_end = time.time()
203
+ total_eval_time = evaluation_end - evaluation_start
204
+ # REMOVED: print(f"\nEvaluation loop finished. Total time: {total_eval_time:.2f} seconds.")
205
+
206
+ all_days_df = pd.DataFrame(all_logs)
207
+ combined_csv_path = os.path.join(logs_dir, "step_logs_all_days.csv")
208
+ all_days_df.to_csv(combined_csv_path, index=False)
209
+ print(f"Saved combined step-level logs to: {combined_csv_path}")
210
+
211
+ step_timing_df = pd.DataFrame(step_timing_list)
212
+ timing_csv_path = os.path.join(logs_dir, "step_timing_log.csv")
213
+ step_timing_df.to_csv(timing_csv_path, index=False)
214
+ print(f"Saved step timing logs to: {timing_csv_path}")
215
+
216
+ house_level_df = all_days_df.groupby("house").sum(numeric_only=True)
217
+ house_level_df["cost_savings"] = house_level_df["baseline_cost"] - house_level_df["actual_cost"]
218
+ house_level_df["import_reduction"] = house_level_df["grid_import_no_p2p"] - house_level_df["grid_import_with_p2p"]
219
+
220
+ house_summary_csv = os.path.join(logs_dir, "summary_per_house.csv")
221
+ house_level_df.to_csv(house_summary_csv)
222
+ print(f"Saved final summary per house to: {house_summary_csv}")
223
+
224
+ fairness_grid_all = compute_jains_fairness(house_level_df["import_reduction"].values)
225
+ fairness_cost_all = compute_jains_fairness(house_level_df["cost_savings"].values)
226
+
227
+ daily_summary_df = pd.DataFrame(daily_summaries)
228
+
229
+ total_cost_savings_all = daily_summary_df["cost_savings_abs"].sum()
230
+ total_baseline_cost_all = all_days_df.groupby('day')['baseline_cost'].sum().sum()
231
+ pct_cost_savings_all = total_cost_savings_all / total_baseline_cost_all if total_baseline_cost_all > 0 else 0.0
232
+ total_grid_reduction_all = daily_summary_df["grid_reduction_abs"].sum()
233
+ total_baseline_import_all = all_days_df.groupby('day')['grid_import_no_p2p'].sum().sum()
234
+ pct_grid_reduction_all = total_grid_reduction_all / total_baseline_import_all if total_baseline_import_all > 0 else 0.0
235
+ total_degradation_cost_all = daily_summary_df["total_degradation_cost"].sum()
236
+
237
+ # Calculate alternative performance metrics
238
+ agg_solar_per_step = all_days_df.groupby(['day', 'step'])['total_solar'].sum()
239
+ num_agents_total = len(all_days_df['house'].unique())
240
+ sunny_steps_mask = agg_solar_per_step > (SOLAR_THRESHOLD * num_agents_total)
241
+ sunny_df = all_days_df.set_index(['day', 'step'])[sunny_steps_mask].reset_index()
242
+ baseline_import_sunny = sunny_df['grid_import_no_p2p'].sum()
243
+ actual_import_sunny = sunny_df['grid_import_with_p2p'].sum()
244
+ grid_reduction_sunny_pct = (baseline_import_sunny - actual_import_sunny) / baseline_import_sunny if baseline_import_sunny > 0 else 0.0
245
+ baseline_cost_sunny = sunny_df['baseline_cost'].sum()
246
+ actual_cost_sunny = sunny_df['actual_cost'].sum()
247
+ cost_savings_sunny_pct = (baseline_cost_sunny - actual_cost_sunny) / baseline_cost_sunny if baseline_cost_sunny > 0 else 0.0
248
+ total_p2p_buy = all_days_df['p2p_buy'].sum()
249
+ total_actual_grid_import = all_days_df['grid_import_with_p2p'].sum()
250
+ community_sourcing_rate_pct = total_p2p_buy / (total_p2p_buy + total_actual_grid_import) if (total_p2p_buy + total_actual_grid_import) > 0 else 0.0
251
+ total_p2p_sell = all_days_df['p2p_sell'].sum()
252
+ total_grid_export = all_days_df['grid_export'].sum()
253
+ solar_sharing_efficiency_pct = total_p2p_sell / (total_p2p_sell + total_grid_export) if (total_p2p_sell + total_grid_export) > 0 else 0.0
254
+
255
+ final_row = {
256
+ "day": "ALL_DAYS_SUMMARY", "cost_savings_abs": total_cost_savings_all, "cost_savings_pct": pct_cost_savings_all,
257
+ "grid_reduction_abs": total_grid_reduction_all, "grid_reduction_pct": pct_grid_reduction_all, "fairness_cost_savings": fairness_cost_all,
258
+ "fairness_grid_reduction": fairness_grid_all, "total_degradation_cost": total_degradation_cost_all,
259
+ "grid_reduction_sunny_hours_pct": grid_reduction_sunny_pct, "community_sourcing_rate_pct": community_sourcing_rate_pct,
260
+ "solar_sharing_efficiency_pct": solar_sharing_efficiency_pct, "cost_savings_sunny_hours_pct": cost_savings_sunny_pct
261
+ }
262
+
263
+ for col in daily_summary_df.columns:
264
+ if col not in final_row:
265
+ final_row[col] = np.nan
266
+ final_row_df = pd.DataFrame([final_row])
267
+
268
+ daily_summary_df = pd.concat([daily_summary_df, final_row_df], ignore_index=True)
269
+ summary_csv = os.path.join(logs_dir, "summary_per_day.csv")
270
+ daily_summary_df.to_csv(summary_csv, index=False)
271
+ print(f"Saved day-level summary with final multi-day row to: {summary_csv}")
272
+
273
+ # Final success message (replacing the numerical summary printout)
274
+ print("\nEvaluation run completed. All data logs (CSVs) and plots saved to disk.")
275
+
276
+ # Plots
277
+ plot_daily_df = daily_summary_df[daily_summary_df["day"] != "ALL_DAYS_SUMMARY"].copy()
278
+ plot_daily_df["day"] = plot_daily_df["day"].astype(int)
279
+
280
+ # Daily Cost Savings Percentage
281
+ plt.figure(figsize=(12, 6))
282
+ plt.bar(plot_daily_df["day"], plot_daily_df["cost_savings_pct"] * 100, color='skyblue')
283
+ plt.xlabel("Day")
284
+ plt.ylabel("Cost Savings (%)")
285
+ plt.title("Daily Community Cost Savings Percentage")
286
+ plt.xticks(plot_daily_df["day"])
287
+ plt.grid(axis='y', linestyle='--', alpha=0.7)
288
+ plt.savefig(os.path.join(plots_dir, "daily_cost_savings_percentage.png"))
289
+ plt.close()
290
+
291
+ # Daily Total Demand vs. Solar
292
+ plt.figure(figsize=(12, 6))
293
+ bar_width = 0.4
294
+ days = plot_daily_df["day"]
295
+ plt.bar(days - bar_width/2, plot_daily_df["day_total_demand"], width=bar_width, label="Total Demand", color='coral')
296
+ plt.bar(days + bar_width/2, plot_daily_df["day_total_solar"], width=bar_width, label="Total Solar Generation", color='gold')
297
+ plt.xlabel("Day")
298
+ plt.ylabel("Energy (kWh)")
299
+ plt.title("Total Community Demand vs. Solar Generation Per Day")
300
+ plt.xticks(days)
301
+ plt.legend()
302
+ plt.grid(axis='y', linestyle='--', alpha=0.7)
303
+ plt.savefig(os.path.join(plots_dir, "daily_demand_vs_solar.png"))
304
+ plt.close()
305
+
306
+ # Combined Time Series of Energy Flows
307
+ step_group = all_days_df.groupby(["day", "step"]).sum(numeric_only=True).reset_index()
308
+ step_group["global_step"] = (step_group["day"] - 1) * env.num_steps + step_group["step"]
309
+
310
+ fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(15, 12), sharex=True)
311
+
312
+ # Subplot 1: Grid Import vs P2P Buy
313
+ ax1.plot(step_group["global_step"], step_group["grid_import_with_p2p"], label="Grid Import (with P2P)", color='r')
314
+ ax1.plot(step_group["global_step"], step_group["p2p_buy"], label="P2P Buy", color='g')
315
+ ax1.set_ylabel("Energy (kWh)")
316
+ ax1.set_title("Community Energy Consumption: Grid Import vs. P2P Buy")
317
+ ax1.legend()
318
+ ax1.grid(True, linestyle='--', alpha=0.6)
319
+
320
+ # Subplot 2: Grid Export vs P2P Sell
321
+ ax2.plot(step_group["global_step"], step_group["grid_export"], label="Grid Export", color='orange')
322
+ ax2.plot(step_group["global_step"], step_group["p2p_sell"], label="P2P Sell", color='b')
323
+ ax2.set_xlabel("Global Timestep")
324
+ ax2.set_ylabel("Energy (kWh)")
325
+ ax2.set_title("Community Energy Generation: Grid Export vs. P2P Sell")
326
+ ax2.legend()
327
+ ax2.grid(True, linestyle='--', alpha=0.6)
328
+
329
+ plt.tight_layout()
330
+ plt.savefig(os.path.join(plots_dir, "combined_energy_flows_timeseries.png"))
331
+ plt.close()
332
+
333
+ # Stacked Bar of Daily Energy Sources
334
+ daily_agg = all_days_df.groupby("day").sum(numeric_only=True)
335
+
336
+ plt.figure(figsize=(12, 7))
337
+ plt.bar(daily_agg.index, daily_agg["grid_import_with_p2p"], label="Grid Import (with P2P)", color='crimson')
338
+ plt.bar(daily_agg.index, daily_agg["p2p_buy"], bottom=daily_agg["grid_import_with_p2p"], label="P2P Buy", color='limegreen')
339
+ plt.plot(daily_agg.index, daily_agg["grid_import_no_p2p"], label="Baseline Grid Import (No P2P)", color='blue', linestyle='--', marker='o')
340
+
341
+ plt.xlabel("Day")
342
+ plt.ylabel("Energy (kWh)")
343
+ plt.title("Daily Energy Procurement: Baseline vs. P2P+Grid")
344
+ plt.xticks(daily_agg.index)
345
+ plt.legend()
346
+ plt.grid(axis='y', linestyle='--', alpha=0.7)
347
+ plt.savefig(os.path.join(plots_dir, "daily_energy_procurement_stacked.png"))
348
+ plt.close()
349
+
350
+ # Fairness Metrics Over Time
351
+ plt.figure(figsize=(12, 6))
352
+ plt.plot(plot_daily_df["day"], plot_daily_df["fairness_cost_savings"], label="Cost Savings Fairness", marker='o')
353
+ plt.plot(plot_daily_df["day"], plot_daily_df["fairness_grid_reduction"], label="Grid Reduction Fairness", marker='s')
354
+ plt.plot(plot_daily_df["day"], plot_daily_df["fairness_reward"], label="Reward Fairness", marker='^')
355
+ plt.xlabel("Day")
356
+ plt.ylabel("Jain's Fairness Index")
357
+ plt.title("Daily Fairness Metrics")
358
+ plt.xticks(plot_daily_df["day"])
359
+ plt.ylim(0, 1.05)
360
+ plt.legend()
361
+ plt.grid(True, linestyle='--', alpha=0.7)
362
+ plt.savefig(os.path.join(plots_dir, "daily_fairness_metrics.png"))
363
+ plt.close()
364
+
365
+ # Per-House Savings and Reductions
366
+ fig, ax1 = plt.subplots(figsize=(15, 7))
367
+
368
+ house_ids_str = house_level_df.index.astype(str)
369
+ bar_width = 0.4
370
+ index = np.arange(len(house_ids_str))
371
+
372
+ # Bar chart for cost savings
373
+ color1 = 'tab:green'
374
+ ax1.set_xlabel('House ID')
375
+ ax1.set_ylabel('Total Cost Savings ($)', color=color1)
376
+ ax1.bar(index - bar_width/2, house_level_df["cost_savings"], bar_width, label='Cost Savings', color=color1)
377
+ ax1.tick_params(axis='y', labelcolor=color1)
378
+ ax1.set_xticks(index)
379
+ ax1.set_xticklabels(house_ids_str, rotation=45, ha="right")
380
+
381
+ # Second y-axis for grid import reduction
382
+ ax2 = ax1.twinx()
383
+ color2 = 'tab:blue'
384
+ ax2.set_ylabel('Total Grid Import Reduction (kWh)', color=color2)
385
+ ax2.bar(index + bar_width/2, house_level_df["import_reduction"], bar_width, label='Import Reduction', color=color2)
386
+ ax2.tick_params(axis='y', labelcolor=color2)
387
+
388
+ plt.title(f'Total Cost Savings & Grid Import Reduction Per House (over {days_to_evaluate} days)')
389
+ fig.tight_layout()
390
+ plt.savefig(os.path.join(plots_dir, "per_house_summary.png"))
391
+ plt.close()
392
+
393
+ # Price Dynamics for a Single Day
394
+ day1_prices = all_days_df[all_days_df['day'] == 1][['step', 'grid_price', 'peer_price']].drop_duplicates()
395
+ plt.figure(figsize=(12, 6))
396
+ plt.plot(day1_prices['step'], day1_prices['grid_price'], label='Grid Price', color='darkorange')
397
+ plt.plot(day1_prices['step'], day1_prices['peer_price'], label='P2P Price', color='teal')
398
+ plt.xlabel("Timestep of Day")
399
+ plt.ylabel("Price ($/kWh)")
400
+ plt.title("Price Dynamics on Day 1")
401
+ plt.legend()
402
+ plt.grid(True, linestyle='--', alpha=0.6)
403
+ plt.savefig(os.path.join(plots_dir, "price_dynamics_day1.png"))
404
+ plt.close()
405
+
406
+ # Battery State of Charge for Sample Houses
407
+ day1_df = all_days_df[all_days_df['day'] == 1]
408
+ battery_houses = day1_df.dropna(subset=['soc'])['house'].unique()
409
+
410
+ if len(battery_houses) > 0:
411
+ sample_houses = battery_houses[:min(4, len(battery_houses))]
412
+ plt.figure(figsize=(12, 6))
413
+ for house in sample_houses:
414
+ house_df = day1_df[day1_df['house'] == house]
415
+ plt.plot(house_df['step'], house_df['soc'] * 100, label=f'House {house}')
416
+
417
+ plt.xlabel("Timestep of Day")
418
+ plt.ylabel("State of Charge (%)")
419
+ plt.title("Battery SoC on Day 1 for Sample Houses")
420
+ plt.legend()
421
+ plt.grid(True, linestyle='--', alpha=0.6)
422
+ plt.savefig(os.path.join(plots_dir, "soc_dynamics_day1.png"))
423
+ plt.close()
424
+
425
+ print("All plots have been generated and saved. Evaluation complete.")
426
+
427
+ if __name__ == "__main__":
428
+ main()
Other_algorithms/Flat_System/maddpg/maddpg_train.py ADDED
@@ -0,0 +1,382 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import sys
3
+ import re
4
+ import numpy as np
5
+ import torch
6
+ import matplotlib.pyplot as plt
7
+ import pandas as pd
8
+ import time
9
+ from datetime import datetime
10
+
11
+ sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
12
+
13
+ from solar_sys_environment import SolarSys
14
+ from maddpg.trainer.maddpg import MADDPG
15
+
16
+ def main():
17
+
18
+ STATE_TO_RUN = "oklahoma" # "pennsylvania" or "colorado" or "oklahoma"
19
+
20
+ # Set the path to your training data
21
+ DATA_FILE_PATH = "/path/to/project/training/5houses_152days_TRAIN.csv"
22
+ num_episodes = 10000
23
+ batch_size = 256
24
+ checkpoint_interval = 100000
25
+ window_size = 32
26
+
27
+ env = SolarSys(
28
+ data_path=DATA_FILE_PATH,
29
+ state=STATE_TO_RUN,
30
+ time_freq="15T"
31
+ )
32
+
33
+ # Sanity check: env I/O shapes
34
+ print("Observation space:", env.observation_space)
35
+ print("Action space :", env.action_space)
36
+
37
+ # Reset and inspect obs
38
+ obs = env.reset()
39
+ print(f"Reset returned {len(obs)} agent observations; each obs shape: {np.array(obs).shape}")
40
+
41
+ # Sample random actions and do one step
42
+ dummy_actions = np.random.rand(env.num_agents, env.action_space.shape[1]).astype(np.float32)
43
+ next_obs, rewards, done, info = env.step(dummy_actions)
44
+ print(f"Step outputs → next_obs: {len(next_obs)}×{np.array(next_obs).shape[1]}, "
45
+ f"rewards: {len(rewards)}, done: {done}")
46
+ print("Info keys:", list(info.keys()))
47
+
48
+ # Count the number of houses in each group
49
+ env.group_counts = {
50
+ 0: env.agent_groups.count(0),
51
+ 1: env.agent_groups.count(1)
52
+ }
53
+ print(f"Number of houses in each group: {env.group_counts}")
54
+
55
+ max_steps = env.num_steps
56
+
57
+ # Dims from the env
58
+ num_agents = env.num_agents
59
+ local_state_dim = env.observation_space.shape[1]
60
+ action_dim = env.action_space.shape[1]
61
+
62
+ # Build a unique run directory
63
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
64
+ run_name = f"maddpg_para_sharing_{STATE_TO_RUN}_{num_agents}agents_{num_episodes}eps_{timestamp}"
65
+ root_dir = os.path.join("FINALE_FINALE_FINALE", run_name)
66
+ os.makedirs(root_dir, exist_ok=True)
67
+ print(f"Saving training outputs to: {root_dir}")
68
+
69
+ logs_dir = os.path.join(root_dir, "logs")
70
+ plots_dir = os.path.join(root_dir, "plots")
71
+ os.makedirs(logs_dir, exist_ok=True)
72
+ os.makedirs(plots_dir, exist_ok=True)
73
+
74
+ # Create the MADDPG agent
75
+ maddpg = MADDPG(
76
+ num_agents=num_agents,
77
+ state_dim=local_state_dim,
78
+ action_dim=action_dim,
79
+ gamma=0.95,
80
+ tau=0.01,
81
+ lr_actor=1e-4,
82
+ lr_critic=1e-3,
83
+ buffer_size=1000000,
84
+ noise_episodes=5000,
85
+ init_sigma=0.3,
86
+ final_sigma=0.01,
87
+ batch_size=batch_size
88
+ )
89
+
90
+ # Tracking / Logging Variables
91
+ episode_rewards = []
92
+ episode_total_rewards = []
93
+ block_mean_rewards = []
94
+ block_total_rewards = []
95
+
96
+ agent_rewards_log = [[] for _ in range(num_agents)]
97
+ best_mean_reward = -1e9
98
+ best_model_path = os.path.join(logs_dir, "best_model.pth")
99
+
100
+ daily_rewards = []
101
+ monthly_rewards = []
102
+
103
+ training_start_time = time.time()
104
+ episode_durations = []
105
+ total_steps_global = 0
106
+ episode_log_data = []
107
+ performance_metrics_log = []
108
+
109
+ agent_charge_log = [[] for _ in range(num_agents)]
110
+ agent_discharge_log = [[] for _ in range(num_agents)]
111
+
112
+ # Training Loop
113
+ for episode in range(1, num_episodes + 1):
114
+ episode_start_time = time.time()
115
+
116
+ obs = np.array(env.reset(), dtype=np.float32)
117
+
118
+ # Collect metrics from the previous episode
119
+ if episode > 1:
120
+ last_episode_metrics = env.get_episode_metrics()
121
+ last_episode_metrics['Episode'] = episode - 1
122
+ performance_metrics_log.append(last_episode_metrics)
123
+
124
+ total_reward = np.zeros(num_agents, dtype=np.float32)
125
+ done = False
126
+ step_count = 0
127
+ day_logs = []
128
+ episode_charges = [[] for _ in range(num_agents)]
129
+ episode_discharges = [[] for _ in range(num_agents)]
130
+
131
+ while not done:
132
+ # Select actions using the MADDPG agent
133
+ actions = maddpg.select_actions(obs)
134
+
135
+ # Step environment
136
+ next_obs_list, rewards, done, info = env.step(actions)
137
+ next_obs = np.array(next_obs_list, dtype=np.float32)
138
+
139
+ # Store the transition in the replay buffer
140
+ maddpg.store_transition(obs, actions, rewards, next_obs, done)
141
+
142
+ # Train the agent at every step
143
+ maddpg.train()
144
+
145
+ total_reward += rewards
146
+ obs = next_obs
147
+ step_count += 1
148
+ total_steps_global += 1
149
+
150
+ for i in range(num_agents):
151
+ episode_charges[i].append(info["charge_amount"][i])
152
+ episode_discharges[i].append(info["discharge_amount"][i])
153
+
154
+ day_logs.append({
155
+ "step": step_count - 1,
156
+ "grid_import_no_p2p": info["grid_import_no_p2p"],
157
+ "grid_import_with_p2p": info["grid_import_with_p2p"],
158
+ "p2p_buy": info["p2p_buy"],
159
+ "p2p_sell": info["p2p_sell"],
160
+ "costs": info["costs"],
161
+ "charge_amount": info.get("charge_amount", np.zeros(num_agents)),
162
+ "discharge_amount": info.get("discharge_amount", np.zeros(num_agents))
163
+ })
164
+
165
+ if step_count >= max_steps:
166
+ break
167
+
168
+ # After each episode
169
+ # Compute per-episode metrics
170
+ sum_ep_reward = float(np.sum(total_reward))
171
+ mean_ep_reward = float(np.mean(total_reward))
172
+
173
+ episode_total_rewards.append(sum_ep_reward)
174
+ episode_rewards.append(mean_ep_reward)
175
+ daily_rewards.append(mean_ep_reward)
176
+
177
+ # If we just finished a block of window_size episodes, aggregate
178
+ if len(daily_rewards) % window_size == 0:
179
+ last_totals = episode_total_rewards[-window_size:]
180
+ block_sum = sum(last_totals)
181
+ block_total_rewards.append(block_sum)
182
+
183
+ last_means = daily_rewards[-window_size:]
184
+ block_mean = sum(last_means) / window_size
185
+ block_mean_rewards.append(block_mean)
186
+
187
+ block_idx = len(block_mean_rewards)
188
+ print(
189
+ f"→ Completed Block {block_idx} "
190
+ f"| Episodes {(block_idx-1)*window_size + 1}–{block_idx*window_size} "
191
+ f"| Block Total Reward: {block_sum:.3f} "
192
+ f"| Block Mean Reward: {block_mean:.3f}"
193
+ )
194
+
195
+ # Log agent-level rewards
196
+ for i in range(num_agents):
197
+ agent_rewards_log[i].append(total_reward[i])
198
+ agent_charge_log[i].append(np.mean(episode_charges[i]))
199
+ agent_discharge_log[i].append(np.mean(episode_discharges[i]))
200
+
201
+ # Summarize P2P steps
202
+ steps_data = []
203
+ for entry in day_logs:
204
+ step_idx = entry["step"]
205
+ p2p_buy_array = entry["p2p_buy"]
206
+ p2p_sell_array = entry["p2p_sell"]
207
+ grid_no_p2p_array = entry["grid_import_no_p2p"]
208
+ grid_with_p2p_array = entry["grid_import_with_p2p"]
209
+
210
+ steps_data.append({
211
+ "step": step_idx,
212
+ "p2p_buy_sum": float(np.sum(p2p_buy_array)),
213
+ "p2p_sell_sum": float(np.sum(p2p_sell_array)),
214
+ "grid_import_no_p2p_sum": float(np.sum(grid_no_p2p_array)),
215
+ "grid_import_with_p2p_sum": float(np.sum(grid_with_p2p_array))
216
+ })
217
+
218
+ baseline_cost = np.sum([np.sum(entry["grid_import_no_p2p"]) * env.get_grid_price(entry["step"])
219
+ for entry in day_logs])
220
+ actual_cost = np.sum([np.sum(entry["costs"]) for entry in day_logs])
221
+ cost_reduction = (baseline_cost - actual_cost) / baseline_cost
222
+
223
+ # Call on_episode_end() for noise decay schedule
224
+ maddpg.on_episode_end()
225
+
226
+ # Save if best
227
+ if mean_ep_reward > best_mean_reward:
228
+ best_mean_reward = mean_ep_reward
229
+ maddpg.save(best_model_path)
230
+
231
+ if episode % checkpoint_interval == 0:
232
+ ckpt_path = os.path.join(logs_dir, f"checkpoint_{episode}.pth")
233
+ maddpg.save(ckpt_path)
234
+
235
+ episode_end_time = time.time()
236
+ episode_duration = episode_end_time - episode_start_time
237
+
238
+ print(
239
+ f"Episode {episode}/{num_episodes} "
240
+ f"| Time per Episode: {episode_duration:.2f}s "
241
+ f"| Steps: {step_count} "
242
+ f"| Mean Reward: {mean_ep_reward:.3f} "
243
+ f"| Cost Reduction: {cost_reduction:.2%}"
244
+ )
245
+
246
+ # Record data in per-episode log
247
+ episode_log_data.append({
248
+ "Episode": episode,
249
+ "Steps": step_count,
250
+ "Mean_Reward": mean_ep_reward,
251
+ "Total_Reward": sum_ep_reward,
252
+ "Cost_Reduction_Pct": cost_reduction * 100,
253
+ "Baseline_Cost": baseline_cost,
254
+ "Actual_Cost": actual_cost,
255
+ "Episode_Duration": episode_duration,
256
+ "Total_Charge": np.sum([np.sum(entry["charge_amount"]) for entry in day_logs]),
257
+ "Total_Discharge": np.sum([np.sum(entry["discharge_amount"]) for entry in day_logs])
258
+ })
259
+
260
+ for i in range(num_agents):
261
+ agent_charge_log[i].append(np.mean(episode_charges[i]))
262
+ agent_discharge_log[i].append(np.mean(episode_discharges[i]))
263
+
264
+ # Capture the final episode's metrics
265
+ final_episode_metrics = env.get_episode_metrics()
266
+ final_episode_metrics['Episode'] = num_episodes
267
+ performance_metrics_log.append(final_episode_metrics)
268
+
269
+ # End of all training
270
+ training_end_time = time.time()
271
+ total_training_time = training_end_time - training_start_time
272
+
273
+ # Save out per-episode agent rewards + mean rewards
274
+ np.save(os.path.join(logs_dir, "agent_rewards.npy"), np.array(agent_rewards_log))
275
+ np.save(os.path.join(logs_dir, "mean_rewards.npy"), np.array(episode_rewards))
276
+ np.save(os.path.join(logs_dir, "total_rewards.npy"), np.array(episode_total_rewards))
277
+
278
+ # Create Final DataFrame for Logging and Plotting
279
+ df_rewards_log = pd.DataFrame(episode_log_data)
280
+ df_perf_log = pd.DataFrame(performance_metrics_log)
281
+
282
+ # Merge the two DataFrames on the 'Episode' column
283
+ df_final_log = pd.merge(df_rewards_log, df_perf_log.drop(columns=[
284
+ 'degradation_cost_over_time',
285
+ 'cost_savings_over_time',
286
+ 'grid_reduction_over_time'
287
+ ]), on="Episode")
288
+
289
+ # PLOTTING
290
+ os.makedirs(plots_dir, exist_ok=True)
291
+
292
+ # Helper: centered moving average
293
+ def moving_avg(series, window):
294
+ return pd.Series(series).rolling(window=window, center=True, min_periods=1).mean().to_numpy()
295
+
296
+ # Smoothing window (in episodes)
297
+ ma_window = 300
298
+ episodes = np.arange(1, num_episodes + 1)
299
+
300
+ # Mean Reward moving average
301
+ reward_ma = moving_avg(df_final_log["Mean_Reward"], ma_window)
302
+ plt.figure(figsize=(8, 5))
303
+ plt.plot(episodes, reward_ma, linewidth=2, label=f"Mean Reward MA (win={ma_window})")
304
+ plt.xlabel("Episode")
305
+ plt.ylabel("Mean Reward")
306
+ plt.title("MADDPG: Mean Reward Moving Average")
307
+ plt.legend()
308
+ plt.grid(True)
309
+ plt.savefig(os.path.join(plots_dir, "mean_reward_ma.png"), dpi=200)
310
+ plt.close()
311
+
312
+ # Total Reward moving average
313
+ total_ma = moving_avg(df_final_log["Total_Reward"], ma_window)
314
+ plt.figure(figsize=(8, 5))
315
+ plt.plot(episodes, total_ma, linewidth=2, label=f"Total Reward MA (win={ma_window})")
316
+ plt.xlabel("Episode")
317
+ plt.ylabel("Total Reward")
318
+ plt.title("MADDPG: Total Reward Moving Average")
319
+ plt.legend()
320
+ plt.grid(True)
321
+ plt.savefig(os.path.join(plots_dir, "total_reward_ma.png"), dpi=200)
322
+ plt.close()
323
+
324
+ # Cost Reduction (%) moving average
325
+ cost_ma = moving_avg(df_final_log["Cost_Reduction_Pct"], ma_window)
326
+ plt.figure(figsize=(8, 5))
327
+ plt.plot(episodes, cost_ma, linewidth=2, label="Cost Reduction MA (%)")
328
+ plt.xlabel("Episode")
329
+ plt.ylabel("Cost Reduction (%)")
330
+ plt.title("MADDPG: Cost Reduction Moving Average")
331
+ plt.legend()
332
+ plt.grid(True)
333
+ plt.savefig(os.path.join(plots_dir, "cost_reduction_ma.png"), dpi=200)
334
+ plt.close()
335
+
336
+ # Battery Degradation Cost moving average
337
+ degradation_ma = moving_avg(df_final_log["battery_degradation_cost_total"], ma_window)
338
+ plt.figure(figsize=(8, 5))
339
+ plt.plot(episodes, degradation_ma, linewidth=2, label=f"Degradation Cost MA (win={ma_window})", color='purple')
340
+ plt.xlabel("Episode")
341
+ plt.ylabel("Total Degradation Cost ($)")
342
+ plt.title("MADDPG: Battery Degradation Cost Moving Average")
343
+ plt.legend()
344
+ plt.grid(True)
345
+ plt.savefig(os.path.join(plots_dir, "degradation_cost_ma.png"), dpi=200)
346
+ plt.close()
347
+
348
+ print(f"\nAll moving-average plots saved to: {plots_dir}")
349
+
350
+ # Save Final Logs to CSV
351
+ total_time_row = pd.DataFrame([{
352
+ "Episode": "Total_Training_Time",
353
+ "Episode_Duration": total_training_time
354
+ }])
355
+ df_to_save = pd.concat([df_final_log, total_time_row], ignore_index=True)
356
+
357
+ log_csv_path = os.path.join(logs_dir, "training_performance_log.csv")
358
+
359
+ # Select and reorder columns for the final CSV
360
+ columns_to_save = [
361
+ "Episode",
362
+ "Mean_Reward",
363
+ "Total_Reward",
364
+ "Cost_Reduction_Pct",
365
+ "Episode_Duration",
366
+ "battery_degradation_cost_total",
367
+ ]
368
+ df_to_save = df_to_save[columns_to_save]
369
+
370
+ df_to_save.to_csv(log_csv_path, index=False)
371
+
372
+ print(f"Saved comprehensive training performance log to: {log_csv_path}")
373
+
374
+ # Final Timings Printout
375
+ print("\n" + "="*50)
376
+ print("TRAINING COMPLETE".center(50))
377
+ print(f"Total training time: {total_training_time:.2f} seconds")
378
+ print("="*50)
379
+
380
+
381
+ if __name__ == "__main__":
382
+ main()
Other_algorithms/Flat_System/maddpg/trainer/__init__.py ADDED
File without changes
Other_algorithms/Flat_System/maddpg/trainer/maddpg.py ADDED
@@ -0,0 +1,196 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torch.nn as nn
3
+ import torch.optim as optim
4
+ import numpy as np
5
+ import random
6
+ from collections import deque
7
+ from torch.utils.data import Dataset, DataLoader
8
+
9
+ class ReplayBufferDataset(Dataset):
10
+ def __init__(self, max_size=100000):
11
+ self.buffer = deque(maxlen=max_size)
12
+
13
+ def add(self, states, actions, rewards, next_states, done):
14
+ data = (
15
+ states,
16
+ actions,
17
+ np.array(rewards, dtype=np.float32),
18
+ next_states,
19
+ np.float32(done)
20
+ )
21
+ self.buffer.append(data)
22
+
23
+ def __len__(self):
24
+ return len(self.buffer)
25
+
26
+ def __getitem__(self, idx):
27
+ states, actions, rewards, next_states, done = self.buffer[idx]
28
+ return (
29
+ torch.from_numpy(states),
30
+ torch.from_numpy(actions),
31
+ torch.from_numpy(rewards),
32
+ torch.from_numpy(next_states),
33
+ torch.tensor(done, dtype=torch.float32)
34
+ )
35
+
36
+ class Actor(nn.Module):
37
+ def __init__(self, state_dim, action_dim, hidden_dim=64):
38
+ super(Actor, self).__init__()
39
+ self.net = nn.Sequential(
40
+ nn.Linear(state_dim, hidden_dim),
41
+ nn.ReLU(),
42
+ nn.Linear(hidden_dim, hidden_dim),
43
+ nn.ReLU(),
44
+ nn.Linear(hidden_dim, action_dim),
45
+ nn.Sigmoid()
46
+ )
47
+
48
+ def forward(self, state):
49
+ return self.net(state)
50
+
51
+ class SharedCritic(nn.Module):
52
+ def __init__(self, global_state_dim, global_action_dim, hidden_dim=128, num_agents=1):
53
+ super().__init__()
54
+ self.net = nn.Sequential(
55
+ nn.Linear(global_state_dim + global_action_dim, hidden_dim),
56
+ nn.ReLU(),
57
+ nn.Linear(hidden_dim, hidden_dim),
58
+ nn.ReLU(),
59
+ nn.Linear(hidden_dim, num_agents)
60
+ )
61
+
62
+ def forward(self, global_state, global_action):
63
+ x = torch.cat([global_state, global_action], dim=1)
64
+ return self.net(x)
65
+
66
+ class Agent:
67
+ def __init__(self, local_state_dim, action_dim, lr_actor=1e-3, device=torch.device('cpu')):
68
+ self.device = device
69
+ self.actor = Actor(local_state_dim, action_dim).to(device)
70
+ self.target_actor = Actor(local_state_dim, action_dim).to(device)
71
+ self.actor_optim = optim.Adam(self.actor.parameters(), lr=lr_actor)
72
+ self.target_actor.load_state_dict(self.actor.state_dict())
73
+
74
+ def sync_target(self, tau):
75
+ for tp, p in zip(self.target_actor.parameters(), self.actor.parameters()):
76
+ tp.data.copy_(tau * p.data + (1.0 - tau) * tp.data)
77
+
78
+ class MADDPG:
79
+ def __init__(self, num_agents, local_state_dim, action_dim,
80
+ gamma=0.95, tau=0.01, lr_actor=1e-4, lr_critic=1e-3,
81
+ buffer_size=100000, noise_episodes=100, init_sigma=0.2, final_sigma=0.01,
82
+ batch_size=128, num_workers=0):
83
+
84
+ self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
85
+ self.num_agents = num_agents
86
+ self.gamma = gamma
87
+ self.tau = tau
88
+ self.init_sigma = init_sigma
89
+ self.final_sigma = final_sigma
90
+ self.noise_episodes = noise_episodes
91
+ self.current_episode = 0
92
+
93
+ self.actor = Actor(local_state_dim, action_dim).to(self.device)
94
+ self.target_actor = Actor(local_state_dim, action_dim).to(self.device)
95
+ self.target_actor.load_state_dict(self.actor.state_dict())
96
+ self.actor_optim = optim.Adam(self.actor.parameters(), lr=lr_actor)
97
+
98
+ global_state_dim = num_agents * local_state_dim
99
+ global_action_dim = num_agents * action_dim
100
+ self.critic = SharedCritic(global_state_dim, global_action_dim, num_agents=num_agents).to(self.device)
101
+ self.target_critic = SharedCritic(global_state_dim, global_action_dim, num_agents=num_agents).to(self.device)
102
+ self.target_critic.load_state_dict(self.critic.state_dict())
103
+ self.critic_optim = optim.Adam(self.critic.parameters(), lr=lr_critic)
104
+
105
+ self.batch_size = batch_size
106
+ self.num_workers = num_workers
107
+ self.memory = ReplayBufferDataset(max_size=buffer_size)
108
+ self.dataloader = None
109
+ self.loader_iter = None
110
+
111
+ def select_actions(self, states, evaluate=False):
112
+ states_t = torch.as_tensor(states, dtype=torch.float32, device=self.device)
113
+ with torch.no_grad():
114
+ actions_t = torch.stack([
115
+ self.actor(states_t[i]) for i in range(self.num_agents)
116
+ ], dim=0)
117
+ actions = actions_t.cpu().numpy()
118
+
119
+ if not evaluate:
120
+ frac = min(float(self.current_episode) / self.noise_episodes, 1.0)
121
+ current_sigma = self.init_sigma - frac * (self.init_sigma - self.final_sigma)
122
+ noise = np.random.normal(0, current_sigma, size=actions.shape)
123
+ actions += noise
124
+ return np.clip(actions, 0.0, 1.0)
125
+
126
+ def store_transition(self, states, actions, rewards, next_states, done):
127
+ self.memory.add(states, actions, rewards, next_states, done)
128
+
129
+ def train(self):
130
+ if len(self.memory) < self.batch_size:
131
+ return
132
+
133
+ should_pin_memory = self.device.type == 'cuda'
134
+ if self.dataloader is None:
135
+ self.dataloader = DataLoader(self.memory, batch_size=self.batch_size, shuffle=True, num_workers=self.num_workers, pin_memory=should_pin_memory, drop_last=True)
136
+ self.loader_iter = iter(self.dataloader)
137
+ try:
138
+ s, a, r, s2, d = next(self.loader_iter)
139
+ except StopIteration:
140
+ self.dataloader = DataLoader(self.memory, batch_size=self.batch_size, shuffle=True, num_workers=self.num_workers, pin_memory=should_pin_memory, drop_last=True)
141
+ self.loader_iter = iter(self.dataloader)
142
+ s, a, r, s2, d = next(self.loader_iter)
143
+
144
+ s_t, a_t, r_t, s2_t, d_t = s.to(self.device), a.to(self.device), r.to(self.device), s2.to(self.device), d.to(self.device).unsqueeze(-1)
145
+ r_t = (r_t - r_t.mean()) / (r_t.std() + 1e-7)
146
+ batch_len = s_t.shape[0]
147
+ gs, ga, ns = s_t.reshape(batch_len, -1), a_t.reshape(batch_len, -1), s2_t.reshape(batch_len, -1)
148
+
149
+ with torch.no_grad():
150
+ targ_actions = torch.cat([self.target_actor(s2_t[:, i, :]) for i in range(self.num_agents)], dim=1)
151
+ Q_prime = self.target_critic(ns, targ_actions)
152
+ targets = r_t + self.gamma * (1 - d_t) * Q_prime
153
+ Q = self.critic(gs, ga)
154
+ critic_loss = nn.MSELoss()(Q, targets)
155
+ self.critic_optim.zero_grad()
156
+ critic_loss.backward()
157
+ torch.nn.utils.clip_grad_norm_(self.critic.parameters(), 1.0)
158
+ self.critic_optim.step()
159
+
160
+ all_actions = torch.cat([self.actor(s_t[:, i, :]) for i in range(self.num_agents)], dim=1)
161
+ actor_loss = -self.critic(gs, all_actions).mean()
162
+
163
+ self.actor_optim.zero_grad()
164
+ actor_loss.backward()
165
+ torch.nn.utils.clip_grad_norm_(self.actor.parameters(), 1.0)
166
+ self.actor_optim.step()
167
+
168
+ for tp, p in zip(self.target_actor.parameters(), self.actor.parameters()):
169
+ tp.data.copy_(self.tau * p.data + (1.0 - self.tau) * tp.data)
170
+ for tp, p in zip(self.target_critic.parameters(), self.critic.parameters()):
171
+ tp.data.copy_(self.tau * p.data + (1.0 - self.tau) * tp.data)
172
+
173
+ def on_episode_end(self):
174
+ self.current_episode += 1
175
+
176
+ def save(self, path: str):
177
+ payload = {
178
+ "critic": self.critic.state_dict(),
179
+ "target_critic": self.target_critic.state_dict(),
180
+ "critic_optim": self.critic_optim.state_dict(),
181
+ "actor": self.actor.state_dict(),
182
+ "target_actor": self.target_actor.state_dict(),
183
+ "actor_optim": self.actor_optim.state_dict(),
184
+ "current_episode": self.current_episode,
185
+ }
186
+ torch.save(payload, path)
187
+
188
+ def load(self, path: str):
189
+ checkpoint = torch.load(path, map_location=self.device)
190
+ self.critic.load_state_dict(checkpoint["critic"])
191
+ self.target_critic.load_state_dict(checkpoint["target_critic"])
192
+ self.critic_optim.load_state_dict(checkpoint["critic_optim"])
193
+ self.actor.load_state_dict(checkpoint["actor"])
194
+ self.target_actor.load_state_dict(checkpoint["target_actor"])
195
+ self.actor_optim.load_state_dict(checkpoint["actor_optim"])
196
+ self.current_episode = checkpoint.get("current_episode", 0)
Other_algorithms/Flat_System/mappo/_init_.py ADDED
File without changes
Other_algorithms/Flat_System/mappo/mappo_evaluation.py ADDED
@@ -0,0 +1,430 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # mappo_evaluate.py
2
+ import os
3
+ import sys
4
+ import time
5
+ import re
6
+ import numpy as np
7
+ import pandas as pd
8
+ import matplotlib.pyplot as plt
9
+ import torch
10
+ from datetime import datetime
11
+
12
+ sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
13
+
14
+ from solar_sys_environment import SolarSys
15
+ from mappo.trainer.mappo import MAPPO
16
+
17
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
18
+
19
+ def compute_jains_fairness(values: np.ndarray) -> float:
20
+ if len(values) == 0:
21
+ return 0.0
22
+ if np.all(values == 0):
23
+ return 1.0
24
+ num = (values.sum())**2
25
+ den = len(values) * (values**2).sum()
26
+ return num / den
27
+
28
+ def main():
29
+ # User parameters
30
+ # --- GENERALIZED PATHS ---
31
+ MODEL_PATH = "./models/mappo_region_c_100agents_final/best_model.pth"
32
+ DATA_PATH = "./data/testing/test_data.csv"
33
+ DAYS_TO_EVALUATE = 30
34
+
35
+ model_path = MODEL_PATH
36
+ data_path = DATA_PATH
37
+ days_to_evaluate = DAYS_TO_EVALUATE
38
+ SOLAR_THRESHOLD = 0.1
39
+
40
+ # --- ANONYMITY: Implicitly detect and generalize state ---
41
+ state_match = re.search(r"mappo_(oklahoma|colorado|pennsylvania)_", model_path)
42
+ if not state_match:
43
+ # Default to a generic region if the pattern isn't in the path
44
+ detected_state_key = "region_c"
45
+ else:
46
+ original_state = state_match.group(1)
47
+ if original_state == "oklahoma": detected_state_key = "region_a"
48
+ elif original_state == "colorado": detected_state_key = "region_b"
49
+ else: detected_state_key = "region_c"
50
+
51
+ # REMOVED: print(f"--- Detected state: {detected_state.upper()} ---")
52
+
53
+ # Env setup
54
+ env = SolarSys(
55
+ data_path=data_path,
56
+ state=detected_state_key, # Use anonymous key
57
+ time_freq="3H"
58
+ )
59
+ eval_steps = env.num_steps
60
+ house_ids = env.house_ids
61
+ num_agents = env.num_agents
62
+
63
+ # Generate a unique eval run folder
64
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
65
+ run_name = f"eval_mappo_{num_agents}agents_{days_to_evaluate}days_{timestamp}"
66
+ output_folder = os.path.join("runs_with_battery", run_name)
67
+ logs_dir = os.path.join(output_folder, "logs")
68
+ plots_dir = os.path.join(output_folder, "plots")
69
+ for d in (logs_dir, plots_dir):
70
+ os.makedirs(d, exist_ok=True)
71
+ print(f"Saving evaluation outputs to: {output_folder}")
72
+
73
+ local_dim = env.observation_space.shape[1]
74
+ global_dim = num_agents * local_dim
75
+ act_dim = env.action_space.shape[1]
76
+
77
+ mappo = MAPPO(
78
+ n_agents=num_agents,
79
+ local_dim=local_dim,
80
+ global_dim=global_dim,
81
+ act_dim=act_dim,
82
+ lr=2e-4, gamma=0.95, lam=0.95, clip_eps=0.2, k_epochs=10, batch_size=1024
83
+ )
84
+
85
+ # Load MAPPO checkpoint
86
+ mappo.load(model_path)
87
+ mappo.actor.to(device).eval()
88
+ mappo.critic.to(device).eval()
89
+
90
+ # Prepare logs
91
+ all_logs = []
92
+ daily_summaries = []
93
+ step_timing_list = []
94
+
95
+ evaluation_start = time.time()
96
+
97
+ for day_idx in range(days_to_evaluate):
98
+ obs, _ = env.reset() # Use new reset signature
99
+ done = False
100
+ step_count = 0
101
+ day_logs = []
102
+
103
+ while not done:
104
+ step_start_time = time.time()
105
+ global_obs = np.array(obs).flatten()
106
+
107
+ # Select actions with MAPPO
108
+ with torch.no_grad():
109
+ actions, _ = mappo.select_action(obs, global_obs)
110
+
111
+ next_obs, rewards, done, info = env.step(actions)
112
+
113
+ # Consolidated Logging
114
+ step_end_time = time.time()
115
+ step_duration = step_end_time - step_start_time
116
+
117
+ # REMOVED: print(f"[Day {day_idx+1}, Step {step_count}] Step time: {step_duration:.6f} seconds")
118
+
119
+ step_timing_list.append({
120
+ "day": day_idx + 1, "step": step_count, "step_time_s": step_duration
121
+ })
122
+
123
+ grid_price_now = env.get_grid_price(step_count)
124
+ # Re-calculate peer price from current env state
125
+ current_demands = env.demands_day[step_count]
126
+ current_solars = env.solars_day[step_count]
127
+ current_total_surplus = float(np.maximum(current_solars - current_demands, 0.0).sum())
128
+ current_total_shortfall = float(np.maximum(current_demands - current_solars, 0.0).sum())
129
+ peer_price_now = env.get_peer_price(step_count, current_total_surplus, current_total_shortfall)
130
+
131
+
132
+ for i, hid in enumerate(house_ids):
133
+ is_battery_house = hid in env.batteries
134
+ p2p_buy = float(info["p2p_buy"][i])
135
+ p2p_sell = float(info["p2p_sell"][i])
136
+ charge_amount = float(info.get("charge_amount")[i])
137
+ discharge_amount = float(info.get("discharge_amount")[i])
138
+
139
+ day_logs.append({
140
+ "day": day_idx + 1, "step": step_count, "house": hid,
141
+ "grid_import_no_p2p": float(info["grid_import_no_p2p"][i]),
142
+ "grid_import_with_p2p": float(info["grid_import_with_p2p"][i]),
143
+ "grid_export": float(info.get("grid_export")[i]),
144
+ "p2p_buy": p2p_buy, "p2p_sell": p2p_sell, "actual_cost": float(info["costs"][i]),
145
+ "baseline_cost": float(info["grid_import_no_p2p"][i]) * grid_price_now,
146
+ "total_demand": float(env.demands_day[step_count, i]),
147
+ "total_solar": float(env.solars_day[step_count, i]),
148
+ "grid_price": grid_price_now, "peer_price": peer_price_now,
149
+ "soc": (env.battery_soc[i] / env.battery_max_capacity[i]) if is_battery_house else np.nan,
150
+ "degradation_cost": ((charge_amount + discharge_amount) * env.battery_degradation_cost[i]) if is_battery_house else 0.0,
151
+ "reward": float(rewards[i]),
152
+ })
153
+
154
+ obs = next_obs
155
+ step_count += 1
156
+ if step_count >= eval_steps:
157
+ break
158
+
159
+ day_df = pd.DataFrame(day_logs)
160
+ all_logs.extend(day_logs)
161
+
162
+ # Consolidated daily summary calculation (Kept math)
163
+ grouped_house = day_df.groupby("house").sum(numeric_only=True)
164
+ grouped_step = day_df.groupby("step").sum(numeric_only=True)
165
+
166
+ total_demand = grouped_step["total_demand"].sum()
167
+ total_solar = grouped_step["total_solar"].sum()
168
+ total_p2p_buy = grouped_house["p2p_buy"].sum()
169
+ total_p2p_sell = grouped_house["p2p_sell"].sum()
170
+
171
+ baseline_cost_per_house = grouped_house["baseline_cost"]
172
+ actual_cost_per_house = grouped_house["actual_cost"]
173
+ cost_savings_per_house = baseline_cost_per_house - actual_cost_per_house
174
+ day_total_cost_savings = cost_savings_per_house.sum()
175
+
176
+ overall_cost_savings_pct = day_total_cost_savings / baseline_cost_per_house.sum() if baseline_cost_per_house.sum() > 0 else 0.0
177
+
178
+ baseline_import_per_house = grouped_house["grid_import_no_p2p"]
179
+ actual_import_per_house = grouped_house["grid_import_with_p2p"]
180
+ import_reduction_per_house = baseline_import_per_house - actual_import_per_house
181
+ day_total_import_reduction = import_reduction_per_house.sum()
182
+
183
+ overall_import_reduction_pct = day_total_import_reduction / baseline_import_per_house.sum() if baseline_import_per_house.sum() > 0 else 0.0
184
+
185
+ fairness_cost_savings = compute_jains_fairness(cost_savings_per_house.values)
186
+ fairness_import_reduction = compute_jains_fairness(import_reduction_per_house.values)
187
+ fairness_rewards = compute_jains_fairness(grouped_house["reward"].values)
188
+ fairness_p2p_buy = compute_jains_fairness(grouped_house["p2p_buy"].values)
189
+ fairness_p2p_sell = compute_jains_fairness(grouped_house["p2p_sell"].values)
190
+ fairness_p2p_total = compute_jains_fairness((grouped_house["p2p_buy"] + grouped_house["p2p_sell"]).values)
191
+ day_total_degradation_cost = grouped_house["degradation_cost"].sum()
192
+
193
+ daily_summaries.append({
194
+ "day": day_idx + 1, "day_total_demand": total_demand, "day_total_solar": total_solar,
195
+ "day_p2p_buy": total_p2p_buy, "day_p2p_sell": total_p2p_sell,
196
+ "cost_savings_abs": day_total_cost_savings, "cost_savings_pct": overall_cost_savings_pct,
197
+ "fairness_cost_savings": fairness_cost_savings, "grid_reduction_abs": day_total_import_reduction,
198
+ "grid_reduction_pct": overall_import_reduction_pct, "fairness_grid_reduction": fairness_import_reduction,
199
+ "fairness_reward": fairness_rewards, "fairness_p2p_buy": fairness_p2p_buy, "fairness_p2p_sell": fairness_p2p_sell,
200
+ "fairness_p2p_total": fairness_p2p_total, "total_degradation_cost": day_total_degradation_cost
201
+ })
202
+
203
+ # Final processing and saving
204
+ evaluation_end = time.time()
205
+ total_eval_time = evaluation_end - evaluation_start
206
+ # REMOVED: print(f"\nEvaluation loop finished. Total time: {total_eval_time:.2f} seconds.")
207
+
208
+ all_days_df = pd.DataFrame(all_logs)
209
+ combined_csv_path = os.path.join(logs_dir, "step_logs_all_days.csv")
210
+ all_days_df.to_csv(combined_csv_path, index=False)
211
+ print(f"Saved combined step-level logs to: {combined_csv_path}")
212
+
213
+ step_timing_df = pd.DataFrame(step_timing_list)
214
+ timing_csv_path = os.path.join(logs_dir, "step_timing_log.csv")
215
+ step_timing_df.to_csv(timing_csv_path, index=False)
216
+ print(f"Saved step timing logs to: {timing_csv_path}")
217
+
218
+ house_level_df = all_days_df.groupby("house").sum(numeric_only=True)
219
+ house_level_df["cost_savings"] = house_level_df["baseline_cost"] - house_level_df["actual_cost"]
220
+ house_level_df["import_reduction"] = house_level_df["grid_import_no_p2p"] - house_level_df["grid_import_with_p2p"]
221
+
222
+ house_summary_csv = os.path.join(logs_dir, "summary_per_house.csv")
223
+ house_level_df.to_csv(house_summary_csv)
224
+ print(f"Saved final summary per house to: {house_summary_csv}")
225
+
226
+ fairness_grid_all = compute_jains_fairness(house_level_df["import_reduction"].values)
227
+ fairness_cost_all = compute_jains_fairness(house_level_df["cost_savings"].values)
228
+
229
+ daily_summary_df = pd.DataFrame(daily_summaries)
230
+
231
+ total_cost_savings_all = daily_summary_df["cost_savings_abs"].sum()
232
+ total_baseline_cost_all = all_days_df.groupby('day')['baseline_cost'].sum().sum()
233
+ pct_cost_savings_all = total_cost_savings_all / total_baseline_cost_all if total_baseline_cost_all > 0 else 0.0
234
+ total_grid_reduction_all = daily_summary_df["grid_reduction_abs"].sum()
235
+ total_baseline_import_all = all_days_df.groupby('day')['grid_import_no_p2p'].sum().sum()
236
+ pct_grid_reduction_all = total_grid_reduction_all / total_baseline_import_all if total_baseline_import_all > 0 else 0.0
237
+ total_degradation_cost_all = daily_summary_df["total_degradation_cost"].sum()
238
+
239
+ # Calculate alternative performance metrics
240
+ agg_solar_per_step = all_days_df.groupby(['day', 'step'])['total_solar'].sum()
241
+ num_agents_total = len(all_days_df['house'].unique())
242
+ sunny_steps_mask = agg_solar_per_step > (SOLAR_THRESHOLD * num_agents_total)
243
+ sunny_df = all_days_df.set_index(['day', 'step'])[sunny_steps_mask].reset_index()
244
+ baseline_import_sunny = sunny_df['grid_import_no_p2p'].sum()
245
+ actual_import_sunny = sunny_df['grid_import_with_p2p'].sum()
246
+ grid_reduction_sunny_pct = (baseline_import_sunny - actual_import_sunny) / baseline_import_sunny if baseline_import_sunny > 0 else 0.0
247
+ baseline_cost_sunny = sunny_df['baseline_cost'].sum()
248
+ actual_cost_sunny = sunny_df['actual_cost'].sum()
249
+ cost_savings_sunny_pct = (baseline_cost_sunny - actual_cost_sunny) / baseline_cost_sunny if baseline_cost_sunny > 0 else 0.0
250
+ total_p2p_buy = all_days_df['p2p_buy'].sum()
251
+ total_actual_grid_import = all_days_df['grid_import_with_p2p'].sum()
252
+ community_sourcing_rate_pct = total_p2p_buy / (total_p2p_buy + total_actual_grid_import) if (total_p2p_buy + total_actual_grid_import) > 0 else 0.0
253
+ total_p2p_sell = all_days_df['p2p_sell'].sum()
254
+ total_grid_export = all_days_df['grid_export'].sum()
255
+ solar_sharing_efficiency_pct = total_p2p_sell / (total_p2p_sell + total_grid_export) if (total_p2p_sell + total_grid_export) > 0 else 0.0
256
+
257
+ final_row = {
258
+ "day": "ALL_DAYS_SUMMARY", "cost_savings_abs": total_cost_savings_all, "cost_savings_pct": pct_cost_savings_all,
259
+ "grid_reduction_abs": total_grid_reduction_all, "grid_reduction_pct": pct_grid_reduction_all, "fairness_cost_savings": fairness_cost_all,
260
+ "fairness_grid_reduction": fairness_grid_all, "total_degradation_cost": total_degradation_cost_all,
261
+ "grid_reduction_sunny_hours_pct": grid_reduction_sunny_pct, "community_sourcing_rate_pct": community_sourcing_rate_pct,
262
+ "solar_sharing_efficiency_pct": solar_sharing_efficiency_pct, "cost_savings_sunny_hours_pct": cost_savings_sunny_pct
263
+ }
264
+
265
+ for col in daily_summary_df.columns:
266
+ if col not in final_row:
267
+ final_row[col] = np.nan
268
+ final_row_df = pd.DataFrame([final_row])
269
+
270
+ daily_summary_df = pd.concat([daily_summary_df, final_row_df], ignore_index=True)
271
+ summary_csv = os.path.join(logs_dir, "summary_per_day.csv")
272
+ daily_summary_df.to_csv(summary_csv, index=False)
273
+ print(f"Saved day-level summary with final multi-day row to: {summary_csv}")
274
+
275
+ # Final success message (replacing the numerical summary printout)
276
+ print("\nEvaluation run completed. All data logs (CSVs) and plots saved to disk.")
277
+
278
+ # Plots
279
+ plot_daily_df = daily_summary_df[daily_summary_df["day"] != "ALL_DAYS_SUMMARY"].copy()
280
+ plot_daily_df["day"] = plot_daily_df["day"].astype(int)
281
+
282
+ # Daily Cost Savings Percentage
283
+ plt.figure(figsize=(12, 6))
284
+ plt.bar(plot_daily_df["day"], plot_daily_df["cost_savings_pct"] * 100, color='skyblue')
285
+ plt.xlabel("Day")
286
+ plt.ylabel("Cost Savings (%)")
287
+ plt.title("Daily Community Cost Savings Percentage")
288
+ plt.xticks(plot_daily_df["day"])
289
+ plt.grid(axis='y', linestyle='--', alpha=0.7)
290
+ plt.savefig(os.path.join(plots_dir, "daily_cost_savings_percentage.png"))
291
+ plt.close()
292
+
293
+ # Daily Total Demand vs. Solar
294
+ plt.figure(figsize=(12, 6))
295
+ bar_width = 0.4
296
+ days = plot_daily_df["day"]
297
+ plt.bar(days - bar_width/2, plot_daily_df["day_total_demand"], width=bar_width, label="Total Demand", color='coral')
298
+ plt.bar(days + bar_width/2, plot_daily_df["day_total_solar"], width=bar_width, label="Total Solar Generation", color='gold')
299
+ plt.xlabel("Day")
300
+ plt.ylabel("Energy (kWh)")
301
+ plt.title("Total Community Demand vs. Solar Generation Per Day")
302
+ plt.xticks(days)
303
+ plt.legend()
304
+ plt.grid(axis='y', linestyle='--', alpha=0.7)
305
+ plt.savefig(os.path.join(plots_dir, "daily_demand_vs_solar.png"))
306
+ plt.close()
307
+
308
+ # Combined Time Series of Energy Flows
309
+ step_group = all_days_df.groupby(["day", "step"]).sum(numeric_only=True).reset_index()
310
+ step_group["global_step"] = (step_group["day"] - 1) * env.num_steps + step_group["step"]
311
+
312
+ fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(15, 12), sharex=True)
313
+
314
+ # Subplot 1: Grid Import vs P2P Buy
315
+ ax1.plot(step_group["global_step"], step_group["grid_import_with_p2p"], label="Grid Import (with P2P)", color='r')
316
+ ax1.plot(step_group["global_step"], step_group["p2p_buy"], label="P2P Buy", color='g')
317
+ ax1.set_ylabel("Energy (kWh)")
318
+ ax1.set_title("Community Energy Consumption: Grid Import vs. P2P Buy")
319
+ ax1.legend()
320
+ ax1.grid(True, linestyle='--', alpha=0.6)
321
+
322
+ # Subplot 2: Grid Export vs P2P Sell
323
+ ax2.plot(step_group["global_step"], step_group["grid_export"], label="Grid Export", color='orange')
324
+ ax2.plot(step_group["global_step"], step_group["p2p_sell"], label="P2P Sell", color='b')
325
+ ax2.set_xlabel("Global Timestep")
326
+ ax2.set_ylabel("Energy (kWh)")
327
+ ax2.set_title("Community Energy Generation: Grid Export vs. P2P Sell")
328
+ ax2.legend()
329
+ ax2.grid(True, linestyle='--', alpha=0.6)
330
+
331
+ plt.tight_layout()
332
+ plt.savefig(os.path.join(plots_dir, "combined_energy_flows_timeseries.png"))
333
+ plt.close()
334
+
335
+ # Stacked Bar of Daily Energy Sources
336
+ daily_agg = all_days_df.groupby("day").sum(numeric_only=True)
337
+
338
+ plt.figure(figsize=(12, 7))
339
+ plt.bar(daily_agg.index, daily_agg["grid_import_with_p2p"], label="Grid Import (with P2P)", color='crimson')
340
+ plt.bar(daily_agg.index, daily_agg["p2p_buy"], bottom=daily_agg["grid_import_with_p2p"], label="P2P Buy", color='limegreen')
341
+ plt.plot(daily_agg.index, daily_agg["grid_import_no_p2p"], label="Baseline Grid Import (No P2P)", color='blue', linestyle='--', marker='o')
342
+
343
+ plt.xlabel("Day")
344
+ plt.ylabel("Energy (kWh)")
345
+ plt.title("Daily Energy Procurement: Baseline vs. P2P+Grid")
346
+ plt.xticks(daily_agg.index)
347
+ plt.legend()
348
+ plt.grid(axis='y', linestyle='--', alpha=0.7)
349
+ plt.savefig(os.path.join(plots_dir, "daily_energy_procurement_stacked.png"))
350
+ plt.close()
351
+
352
+ # Fairness Metrics Over Time
353
+ plt.figure(figsize=(12, 6))
354
+ plt.plot(plot_daily_df["day"], plot_daily_df["fairness_cost_savings"], label="Cost Savings Fairness", marker='o')
355
+ plt.plot(plot_daily_df["day"], plot_daily_df["fairness_grid_reduction"], label="Grid Reduction Fairness", marker='s')
356
+ plt.plot(plot_daily_df["day"], plot_daily_df["fairness_reward"], label="Reward Fairness", marker='^')
357
+ plt.xlabel("Day")
358
+ plt.ylabel("Jain's Fairness Index")
359
+ plt.title("Daily Fairness Metrics")
360
+ plt.xticks(plot_daily_df["day"])
361
+ plt.ylim(0, 1.05)
362
+ plt.legend()
363
+ plt.grid(True, linestyle='--', alpha=0.7)
364
+ plt.savefig(os.path.join(plots_dir, "daily_fairness_metrics.png"))
365
+ plt.close()
366
+
367
+ # Per-House Savings and Reductions
368
+ fig, ax1 = plt.subplots(figsize=(15, 7))
369
+
370
+ house_ids_str = house_level_df.index.astype(str)
371
+ bar_width = 0.4
372
+ index = np.arange(len(house_ids_str))
373
+
374
+ # Bar chart for cost savings
375
+ color1 = 'tab:green'
376
+ ax1.set_xlabel('House ID')
377
+ ax1.set_ylabel('Total Cost Savings ($)', color=color1)
378
+ ax1.bar(index - bar_width/2, house_level_df["cost_savings"], bar_width, label='Cost Savings', color=color1)
379
+ ax1.tick_params(axis='y', labelcolor=color1)
380
+ ax1.set_xticks(index)
381
+ ax1.set_xticklabels(house_ids_str, rotation=45, ha="right")
382
+
383
+ # Second y-axis for grid import reduction
384
+ ax2 = ax1.twinx()
385
+ color2 = 'tab:blue'
386
+ ax2.set_ylabel('Total Grid Import Reduction (kWh)', color=color2)
387
+ ax2.bar(index + bar_width/2, house_level_df["import_reduction"], bar_width, label='Import Reduction', color=color2)
388
+ ax2.tick_params(axis='y', labelcolor=color2)
389
+
390
+ plt.title(f'Total Cost Savings & Grid Import Reduction Per House (over {days_to_evaluate} days)')
391
+ fig.tight_layout()
392
+ plt.savefig(os.path.join(plots_dir, "per_house_summary.png"))
393
+ plt.close()
394
+
395
+ # Price Dynamics for a Single Day
396
+ day1_prices = all_days_df[all_days_df['day'] == 1][['step', 'grid_price', 'peer_price']].drop_duplicates()
397
+ plt.figure(figsize=(12, 6))
398
+ plt.plot(day1_prices['step'], day1_prices['grid_price'], label='Grid Price', color='darkorange')
399
+ plt.plot(day1_prices['step'], day1_prices['peer_price'], label='P2P Price', color='teal')
400
+ plt.xlabel("Timestep of Day")
401
+ plt.ylabel("Price ($/kWh)")
402
+ plt.title("Price Dynamics on Day 1")
403
+ plt.legend()
404
+ plt.grid(True, linestyle='--', alpha=0.6)
405
+ plt.savefig(os.path.join(plots_dir, "price_dynamics_day1.png"))
406
+ plt.close()
407
+
408
+ # Battery State of Charge for Sample Houses
409
+ day1_df = all_days_df[all_days_df['day'] == 1]
410
+ battery_houses = day1_df.dropna(subset=['soc'])['house'].unique()
411
+
412
+ if len(battery_houses) > 0:
413
+ sample_houses = battery_houses[:min(4, len(battery_houses))]
414
+ plt.figure(figsize=(12, 6))
415
+ for house in sample_houses:
416
+ house_df = day1_df[day1_df['house'] == house]
417
+ plt.plot(house_df['step'], house_df['soc'] * 100, label=f'House {house}')
418
+
419
+ plt.xlabel("Timestep of Day")
420
+ plt.ylabel("State of Charge (%)")
421
+ plt.title("Battery SoC on Day 1 for Sample Houses")
422
+ plt.legend()
423
+ plt.grid(True, linestyle='--', alpha=0.6)
424
+ plt.savefig(os.path.join(plots_dir, "soc_dynamics_day1.png"))
425
+ plt.close()
426
+
427
+ print("All plots have been generated and saved. Evaluation complete.")
428
+
429
+ if __name__ == "__main__":
430
+ main()
Other_algorithms/Flat_System/mappo/mappo_train.py ADDED
@@ -0,0 +1,439 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import sys
3
+ import re # ← add thist
4
+ import numpy as np
5
+ import torch
6
+ import matplotlib.pyplot as plt
7
+ import pandas as pd
8
+ import time
9
+ from datetime import datetime
10
+
11
+ sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
12
+
13
+ from solar_sharer_battery_env import SolarSharer
14
+ from mappo.trainer.mappo import MAPPO
15
+
16
+ def main():
17
+
18
+ STATE_TO_RUN = "pennsylvania" # "pennsylvania" or "colorado" or "oklahoma"
19
+
20
+ # --- Set the path to your training data ---
21
+ DATA_FILE_PATH = "/Users/ananygupta/Desktop/Final_revision/Australia_data/processed_data_ausgrid_100_houses.csv"
22
+ num_episodes = 10000
23
+ # total # of episodes you want to run
24
+ batch_size = 256 # e.g. 512, 1024, 2048
25
+ checkpoint_interval = 100000
26
+ window_size = 32 # ← group episodes in blocks of 30
27
+
28
+
29
+ env = SolarSharer(
30
+ data_path=DATA_FILE_PATH,
31
+ state=STATE_TO_RUN,
32
+ time_freq="30T"
33
+ )
34
+ ############################################################################################
35
+ # ─── Sanity check: env I/O shapes ─────────────────────────────────────
36
+ print("Observation space:", env.observation_space)
37
+ print("Action space :", env.action_space)
38
+
39
+ # Reset and inspect obs
40
+ obs = env.reset()
41
+ print(f"Reset returned {len(obs)} agent observations; each obs shape: {np.array(obs).shape}")
42
+
43
+ # Sample random actions and do one step
44
+ dummy_actions = np.random.rand(env.num_agents, env.action_space.shape[1]).astype(np.float32)
45
+ next_obs, rewards, done, info = env.step(dummy_actions)
46
+ print(f"Step outputs → next_obs: {len(next_obs)}×{np.array(next_obs).shape[1]}, "
47
+ f"rewards: {len(rewards)}, done: {done}")
48
+ print("Info keys:", list(info.keys()))
49
+ # ────────────────────────────────────────────────────────────────
50
+
51
+ # Count the number of houses in each group
52
+ env.group_counts = {
53
+ 0: env.agent_groups.count(0),
54
+ 1: env.agent_groups.count(1)
55
+ }
56
+ print(f"Number of houses in each group: {env.group_counts}")
57
+
58
+ max_steps = env.num_steps
59
+
60
+ # dims from the env
61
+ num_agents = env.num_agents
62
+ local_state_dim = env.observation_space.shape[1]
63
+ action_dim = env.action_space.shape[1]
64
+
65
+ # ─── Build a unique run directory ───────────────────────────
66
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
67
+ run_name = f"mappo_{STATE_TO_RUN}_{num_agents}agents_{num_episodes}eps_{timestamp}"
68
+ root_dir = os.path.join("Testing_with_australia_data", run_name)
69
+ os.makedirs(root_dir, exist_ok=True)
70
+ print(f"Saving training outputs to: {root_dir}")
71
+
72
+ logs_dir = os.path.join(root_dir, "logs")
73
+ plots_dir = os.path.join(root_dir, "plots")
74
+ os.makedirs(logs_dir, exist_ok=True)
75
+ os.makedirs(plots_dir, exist_ok=True)
76
+
77
+
78
+ # Create the MeanField agent
79
+ mappo = MAPPO(
80
+ n_agents=num_agents,
81
+ local_dim=local_state_dim,
82
+ global_dim=num_agents * local_state_dim,
83
+ act_dim=action_dim,
84
+ lr=2e-4,
85
+ gamma=0.95,
86
+ lam=0.95,
87
+ clip_eps=0.2,
88
+ k_epochs=4,
89
+ batch_size=batch_size
90
+ )
91
+
92
+
93
+ # ─────────────── Tracking / Logging Variables ───────────────
94
+ episode_rewards = [] # mean reward per episode (averaged across agents)
95
+ episode_total_rewards = [] # total reward per episode (sum across agents)
96
+ block_mean_rewards = [] # mean of mean-episode-rewards for each block of window_size
97
+ block_total_rewards = [] # sum of total-episode-rewards for each block of window_size
98
+
99
+ agent_rewards_log = [[] for _ in range(num_agents)]
100
+ best_mean_reward = -1e9
101
+ best_model_path = os.path.join(logs_dir, "best_model.pth")
102
+
103
+
104
+ daily_rewards = [] # alias for episode_rewards
105
+ monthly_rewards = [] # just kept in case you want the old logic
106
+
107
+ training_start_time = time.time()
108
+ episode_durations = []
109
+ total_steps_global = 0
110
+ episode_log_data = []
111
+ # ADD THIS LINE to store the new metrics from the environment
112
+ performance_metrics_log = [] # This will hold the detailed performance data for each episode.
113
+
114
+
115
+ agent_charge_log = [[] for _ in range(num_agents)] # Track charge actions
116
+ agent_discharge_log = [[] for _ in range(num_agents)] # Track discharge actions
117
+
118
+
119
+ # ──────────── Training Loop ────────────
120
+ for episode in range(1, num_episodes + 1):
121
+ episode_start_time = time.time()
122
+
123
+ obs = np.array(env.reset(), dtype=np.float32)
124
+
125
+
126
+ # ADD THIS BLOCK to collect metrics from the *previous* episode
127
+ # =================================================================
128
+ # The env.reset() call above finalized the metrics for the episode that just finished.
129
+ # We retrieve them here. We check `if episode > 1` because there are no
130
+ # metrics to collect before the first episode has run.
131
+ if episode > 1:
132
+ # Call the getter method you added to the environment
133
+ last_episode_metrics = env.get_episode_metrics()
134
+
135
+ # Add the corresponding episode number for merging later
136
+ last_episode_metrics['Episode'] = episode - 1
137
+
138
+ # Append the dictionary of metrics to our new log
139
+ performance_metrics_log.append(last_episode_metrics)
140
+ # =================================================================
141
+
142
+ total_reward = np.zeros(num_agents, dtype=np.float32)
143
+ done = False
144
+ step_count = 0
145
+ day_logs = []
146
+ episode_charges = [[] for _ in range(num_agents)]
147
+ episode_discharges = [[] for _ in range(num_agents)]
148
+
149
+ while not done:
150
+
151
+ # flatten the joint state once per step
152
+ # build global state and pick actions
153
+ # obs is already a NumPy array of shape (num_agents, local_dim)
154
+ global_obs = obs.flatten()
155
+ actions, logps = mappo.select_action(obs, global_obs)
156
+
157
+ # step environment
158
+ next_obs_list, rewards, done, info = env.step(actions)
159
+
160
+ # convert next observations to NumPy array too
161
+ next_obs = np.array(next_obs_list, dtype=np.float32)
162
+ next_global_obs = next_obs.flatten()
163
+
164
+
165
+ # store transition
166
+ # ensure fast conversion to torch.Tensor
167
+ local_obs_arr = np.array(obs, dtype=np.float32)
168
+
169
+ mappo.store(
170
+ local_obs_arr,
171
+ global_obs,
172
+ actions,
173
+ logps,
174
+ rewards,
175
+ done,
176
+ next_global_obs
177
+ )
178
+ total_reward += rewards
179
+ obs = next_obs
180
+ step_count += 1
181
+ total_steps_global += 1
182
+
183
+ day_logs.append({
184
+ "step": step_count - 1,
185
+ "grid_import_no_p2p": info["grid_import_no_p2p"],
186
+ "grid_import_with_p2p": info["grid_import_with_p2p"],
187
+ "p2p_buy": info["p2p_buy"],
188
+ "p2p_sell": info["p2p_sell"],
189
+ "costs": info["costs"], # Capture costs for analysis
190
+ "charge_amount": info.get("charge_amount", np.zeros(num_agents)), # New
191
+ "discharge_amount": info.get("discharge_amount", np.zeros(num_agents)) # New
192
+ })
193
+
194
+ if step_count >= max_steps:
195
+ break
196
+
197
+ # ─── After each episode ───
198
+ # 1) Compute per-episode metrics
199
+ sum_ep_reward = float(np.sum(total_reward)) # total reward across all agents for this episode
200
+ mean_ep_reward = float(np.mean(total_reward)) # mean reward across agents for this episode
201
+
202
+ episode_total_rewards.append(sum_ep_reward)
203
+ episode_rewards.append(mean_ep_reward)
204
+ daily_rewards.append(mean_ep_reward)
205
+
206
+ # 2) If we just finished a block of window_size episodes, aggregate
207
+ if len(daily_rewards) % window_size == 0:
208
+ # Sum of total rewards over the last window_size episodes
209
+ last_totals = episode_total_rewards[-window_size:]
210
+ block_sum = sum(last_totals)
211
+ block_total_rewards.append(block_sum)
212
+
213
+ # Mean of mean-episode-rewards over the last window_size episodes
214
+ last_means = daily_rewards[-window_size:]
215
+ block_mean = sum(last_means) / window_size
216
+ block_mean_rewards.append(block_mean)
217
+
218
+ block_idx = len(block_mean_rewards)
219
+ print(
220
+ f"→ Completed Block {block_idx} "
221
+ f"| Episodes { (block_idx-1)*window_size + 1 }–{ block_idx*window_size } "
222
+ f"| Block Total Reward: {block_sum:.3f} "
223
+ f"| Block Mean Reward: {block_mean:.3f}"
224
+ )
225
+
226
+ # 3) Log agent-level rewards
227
+ for i in range(num_agents):
228
+ agent_rewards_log[i].append(total_reward[i])
229
+ episode_charges[i].append(actions[i][4])
230
+ episode_discharges[i].append(actions[i][5])
231
+
232
+ # 4) Summarize P2P steps (unchanged from your original code)
233
+ steps_data = []
234
+ for entry in day_logs:
235
+ step_idx = entry["step"]
236
+ p2p_buy_array = entry["p2p_buy"]
237
+ p2p_sell_array = entry["p2p_sell"]
238
+ grid_no_p2p_array = entry["grid_import_no_p2p"]
239
+ grid_with_p2p_array = entry["grid_import_with_p2p"]
240
+
241
+ steps_data.append({
242
+ "step": step_idx,
243
+ "p2p_buy_sum": float(np.sum(p2p_buy_array)),
244
+ "p2p_sell_sum": float(np.sum(p2p_sell_array)),
245
+ "grid_import_no_p2p_sum": float(np.sum(grid_no_p2p_array)),
246
+ "grid_import_with_p2p_sum": float(np.sum(grid_with_p2p_array))
247
+ })
248
+
249
+
250
+ baseline_cost = np.sum([np.sum(entry["grid_import_no_p2p"]) * env.get_grid_price(entry["step"])
251
+ for entry in day_logs])
252
+ actual_cost = np.sum([np.sum(entry["costs"]) for entry in day_logs])
253
+ cost_reduction = (baseline_cost - actual_cost) / baseline_cost
254
+
255
+ # at end of episode
256
+ mappo.update() # Update the MAPPO agent
257
+
258
+
259
+ # save if best
260
+ if mean_ep_reward > best_mean_reward:
261
+ best_mean_reward = mean_ep_reward
262
+ mappo.save(best_model_path)
263
+
264
+ if episode % checkpoint_interval == 0:
265
+ ckpt_path = os.path.join(logs_dir, f"checkpoint_{episode}.pth")
266
+ mappo.save(ckpt_path)
267
+ # CORRECTED TIMING AND LOGGING
268
+ episode_end_time = time.time()
269
+ episode_duration = episode_end_time - episode_start_time
270
+
271
+ # Move the print statement here
272
+ print(
273
+ f"Episode {episode}/{num_episodes} "
274
+ f"| Time per Episode: {episode_duration:.2f}s "
275
+ f"| Steps: {step_count} "
276
+ f"| Mean Reward: {mean_ep_reward:.3f} "
277
+ f"| Cost Reduction: {cost_reduction:.2%}"
278
+ )
279
+
280
+ # Record data in our per-episode log
281
+ episode_log_data.append({
282
+ "Episode": episode,
283
+ "Steps": step_count,
284
+ "Mean_Reward": mean_ep_reward,
285
+ "Total_Reward": sum_ep_reward,
286
+ "Cost_Reduction_Pct": cost_reduction * 100, # New
287
+ "Baseline_Cost": baseline_cost, # New
288
+ "Actual_Cost": actual_cost, # New
289
+ "Episode_Duration": episode_duration,
290
+ "Total_Charge": np.sum([np.sum(entry["charge_amount"]) for entry in day_logs]), # New
291
+ "Total_Discharge": np.sum([np.sum(entry["discharge_amount"]) for entry in day_logs]) # New
292
+ })
293
+ for i in range(num_agents):
294
+ agent_charge_log[i].append(np.mean(episode_charges[i]))
295
+ agent_discharge_log[i].append(np.mean(episode_discharges[i]))
296
+
297
+ # ADD THIS BLOCK TO CAPTURE THE FINAL EPISODE'S METRICS
298
+ # =================================================================
299
+ # After the loop, the metrics for the final episode (num_episodes) are ready.
300
+ # We collect them here to ensure the log is complete.
301
+ final_episode_metrics = env.get_episode_metrics()
302
+ final_episode_metrics['Episode'] = num_episodes
303
+ performance_metrics_log.append(final_episode_metrics)
304
+ # =================================================================
305
+
306
+
307
+
308
+ # ─── End of all training ───
309
+ training_end_time = time.time()
310
+ total_training_time = training_end_time - training_start_time
311
+
312
+ # Save out per-episode agent rewards + mean rewards
313
+ np.save(os.path.join(logs_dir, "agent_rewards.npy"), np.array(agent_rewards_log))
314
+ np.save(os.path.join(logs_dir, "mean_rewards.npy"), np.array(episode_rewards))
315
+ np.save(os.path.join(logs_dir, "total_rewards.npy"), np.array(episode_total_rewards))
316
+
317
+ ################################# PLOTTING & LOGGING ##################################################################
318
+ # ─────────── Create Final DataFrame for Logging and Plotting ───────────
319
+
320
+ # 1. Create a DataFrame from the original log data (rewards, costs, etc.)
321
+ df_rewards_log = pd.DataFrame(episode_log_data)
322
+
323
+ # 2. Create a DataFrame from the new performance metrics log
324
+ df_perf_log = pd.DataFrame(performance_metrics_log)
325
+
326
+ # 3. Merge the two DataFrames on the 'Episode' column.
327
+ # This combines all metrics into a single table.
328
+ df_final_log = pd.merge(df_rewards_log, df_perf_log.drop(columns=[
329
+ 'degradation_cost_over_time',
330
+ 'cost_savings_over_time',
331
+ 'grid_reduction_over_time'
332
+ ]), on="Episode")
333
+
334
+
335
+ # ─────────── PLOTTING ───────────
336
+
337
+ # Ensure plot directory exists
338
+ os.makedirs(plots_dir, exist_ok=True)
339
+
340
+ # Helper: centered moving average
341
+ def moving_avg(series, window):
342
+ return pd.Series(series).rolling(window=window, center=True, min_periods=1).mean().to_numpy()
343
+
344
+ # Smoothing window (in episodes)
345
+ ma_window = 300
346
+ episodes = np.arange(1, num_episodes + 1)
347
+
348
+ # 1. Mean Reward moving average
349
+ reward_ma = moving_avg(df_final_log["Mean_Reward"], ma_window)
350
+ plt.figure(figsize=(8,5))
351
+ plt.plot(episodes, reward_ma, linewidth=2, label=f"Mean Reward MA (win={ma_window})")
352
+ plt.xlabel("Episode")
353
+ plt.ylabel("Mean Reward")
354
+ plt.title("MAPPO: Mean Reward Moving Average")
355
+ plt.legend()
356
+ plt.grid(True)
357
+ plt.savefig(os.path.join(plots_dir, "mean_reward_ma.png"), dpi=200)
358
+ plt.close()
359
+
360
+ # 2. Total Reward moving average
361
+ total_ma = moving_avg(df_final_log["Total_Reward"], ma_window)
362
+ plt.figure(figsize=(8,5))
363
+ plt.plot(episodes, total_ma, linewidth=2, label=f"Total Reward MA (win={ma_window})")
364
+ plt.xlabel("Episode")
365
+ plt.ylabel("Total Reward")
366
+ plt.title("MAPPO: Total Reward Moving Average")
367
+ plt.legend()
368
+ plt.grid(True)
369
+ plt.savefig(os.path.join(plots_dir, "total_reward_ma.png"), dpi=200)
370
+ plt.close()
371
+
372
+ # 3. Cost Reduction (%) moving average
373
+ cost_ma = moving_avg(df_final_log["Cost_Reduction_Pct"], ma_window)
374
+ plt.figure(figsize=(8,5))
375
+ plt.plot(episodes, cost_ma, linewidth=2, label="Cost Reduction MA (%)")
376
+ plt.xlabel("Episode")
377
+ plt.ylabel("Cost Reduction (%)")
378
+ plt.title("MAPPO: Cost Reduction Moving Average")
379
+ plt.legend()
380
+ plt.grid(True)
381
+ plt.savefig(os.path.join(plots_dir, "cost_reduction_ma.png"), dpi=200)
382
+ plt.close()
383
+
384
+ # 4. Battery Degradation Cost moving average
385
+ degradation_ma = moving_avg(df_final_log["battery_degradation_cost_total"], ma_window)
386
+ plt.figure(figsize=(8,5))
387
+ plt.plot(episodes, degradation_ma, linewidth=2, label=f"Degradation Cost MA (win={ma_window})", color='purple')
388
+ plt.xlabel("Episode")
389
+ plt.ylabel("Total Degradation Cost ($)")
390
+ plt.title("MAPPO: Battery Degradation Cost Moving Average")
391
+ plt.legend()
392
+ plt.grid(True)
393
+ plt.savefig(os.path.join(plots_dir, "degradation_cost_ma.png"), dpi=200)
394
+ plt.close()
395
+
396
+
397
+ # Final confirmation message
398
+ print(f"\nAll moving-average plots saved to: {plots_dir}")
399
+
400
+
401
+ # ─── Save Final Logs to CSV ───
402
+
403
+ # 1. Add the total training time as a new row to the DataFrame
404
+ total_time_row = pd.DataFrame([{
405
+ "Episode": "Total_Training_Time",
406
+ "Episode_Duration": total_training_time
407
+ }])
408
+ df_to_save = pd.concat([df_final_log, total_time_row], ignore_index=True)
409
+
410
+
411
+ # 2. Define the path for the final CSV file.
412
+ log_csv_path = os.path.join(logs_dir, "training_performance_log.csv")
413
+
414
+ # 3. Select and reorder columns for the final CSV
415
+ columns_to_save = [
416
+ "Episode",
417
+ "Mean_Reward",
418
+ "Total_Reward",
419
+ "Cost_Reduction_Pct",
420
+ "Episode_Duration",
421
+ "battery_degradation_cost_total",
422
+ ]
423
+ df_to_save = df_to_save[columns_to_save]
424
+
425
+
426
+ # 4. Save the comprehensive DataFrame to CSV.
427
+ df_to_save.to_csv(log_csv_path, index=False)
428
+
429
+ print(f"Saved comprehensive training performance log to: {log_csv_path}")
430
+
431
+ # ─── Final Timings Printout ───
432
+ print("\n" + "="*50)
433
+ print("TRAINING COMPLETE".center(50))
434
+ print(f"Total training time: {total_training_time:.2f} seconds")
435
+ print("="*50)
436
+
437
+
438
+ if __name__ == "__main__":
439
+ main()
Other_algorithms/Flat_System/mappo/trainer/__init__.py ADDED
File without changes
Other_algorithms/Flat_System/mappo/trainer/mappo.py ADDED
@@ -0,0 +1,243 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # mappo.py
2
+ import torch
3
+ import torch.nn as nn
4
+ import random
5
+ import numpy as np
6
+ from torch.distributions import Normal
7
+
8
+
9
+ def set_global_seed(seed: int):
10
+ random.seed(seed) # Python
11
+ np.random.seed(seed) # NumPy
12
+ torch.manual_seed(seed) # PyTorch CPU
13
+ if torch.cuda.is_available():
14
+ torch.cuda.manual_seed_all(seed) # PyTorch GPU
15
+ # make CuDNN deterministic (may slow you down a bit):
16
+ torch.backends.cudnn.deterministic = True
17
+ torch.backends.cudnn.benchmark = False
18
+
19
+
20
+ # Universal device selection
21
+ if torch.cuda.is_available():
22
+ device = torch.device("cuda")
23
+ print("Using CUDA (NVIDIA GPU)")
24
+ # elif torch.backends.mps.is_available():
25
+ # device = torch.device("mps")
26
+ # print("Using MPS (Apple Silicon GPU)")
27
+ else:
28
+ device = torch.device("cpu")
29
+ print("Using CPU")
30
+
31
+ # fix EVERYTHING
32
+ SEED = 42
33
+ set_global_seed(SEED)
34
+
35
+
36
+ class MLP(nn.Module):
37
+ def __init__(self, input_dim, hidden_dims, output_dim):
38
+ super().__init__()
39
+ layers = []
40
+ last_dim = input_dim
41
+ for h in hidden_dims:
42
+ layers += [nn.Linear(last_dim, h), nn.ReLU()]
43
+ last_dim = h
44
+ layers.append(nn.Linear(last_dim, output_dim))
45
+ self.net = nn.Sequential(*layers)
46
+
47
+ def forward(self, x):
48
+ return self.net(x)
49
+
50
+ class Actor(nn.Module):
51
+ def __init__(self, obs_dim, act_dim, hidden=(64,64)):
52
+ super().__init__()
53
+ self.net = MLP(obs_dim, hidden, act_dim)
54
+ self.log_std = nn.Parameter(torch.zeros(act_dim))
55
+
56
+ def forward(self, x):
57
+ mean = self.net(x)
58
+ std = torch.exp(self.log_std)
59
+ return mean, std
60
+
61
+ class Critic(nn.Module):
62
+ def __init__(self, state_dim, hidden=(128,128)):
63
+ super().__init__()
64
+ self.net = MLP(state_dim, hidden, 1)
65
+
66
+ def forward(self, x):
67
+ return self.net(x).squeeze(-1)
68
+
69
+ class MAPPO:
70
+ def __init__(
71
+ self,
72
+ n_agents,
73
+ local_dim,
74
+ global_dim,
75
+ act_dim,
76
+ lr=3e-4,
77
+ gamma=0.99,
78
+ lam=0.95,
79
+ clip_eps=0.2,
80
+ k_epochs=10,
81
+ batch_size=1024
82
+ ):
83
+ self.n_agents = n_agents
84
+ self.gamma = gamma
85
+ self.lam = lam
86
+ self.clip_eps = clip_eps
87
+ self.k_epochs = k_epochs
88
+ self.batch_size = batch_size
89
+
90
+ self.actor = Actor(local_dim, act_dim).to(device)
91
+ self.critic = Critic(global_dim).to(device)
92
+
93
+ self.opt_a = torch.optim.Adam(self.actor.parameters(), lr=lr)
94
+ self.opt_c = torch.optim.Adam(self.critic.parameters(), lr=lr)
95
+
96
+ self.local_dim = local_dim
97
+ self.global_dim = global_dim
98
+ self.act_dim = act_dim
99
+
100
+ self.clear_buffer()
101
+
102
+ def clear_buffer(self):
103
+ self.ls = [] # local observations
104
+ self.gs = [] # global observations
105
+ self.ac = [] # actions
106
+ self.lp = [] # log-probs
107
+ self.rw = [] # rewards
108
+ self.done = [] # done flags
109
+ self.next_gs = [] # next global observations
110
+
111
+ @torch.no_grad()
112
+ def select_action(self, local_obs, global_obs):
113
+ l = torch.FloatTensor(local_obs).to(device)
114
+ mean, std = self.actor(l)
115
+ dist = Normal(mean, std)
116
+ a = dist.sample()
117
+ return a.cpu().numpy(), dist.log_prob(a).sum(-1).cpu().numpy()
118
+
119
+ def store(self, local_obs, global_obs, action, logp, reward, done, next_global_obs):
120
+ self.ls.append(local_obs)
121
+ self.gs.append(global_obs)
122
+ self.ac.append(action)
123
+ self.lp.append(logp)
124
+ self.rw.append(reward)
125
+ self.done.append(done)
126
+ self.next_gs.append(next_global_obs)
127
+
128
+ def compute_gae(self, values):
129
+ """
130
+ values: torch.Tensor shape [T] (one central V(s) per timestep)
131
+ returns:
132
+ adv_flat: torch.Tensor shape [T * n_agents]
133
+ ret_flat: torch.Tensor shape [T * n_agents]
134
+ """
135
+ # 1) get raw arrays
136
+ vals_1d = values.cpu().numpy() # [T]
137
+ T = len(vals_1d)
138
+ N = self.n_agents
139
+
140
+ # 2) broadcast to per-agent
141
+ # vals_agent[t,i] = V(state_t)
142
+ vals_agent = np.tile(vals_1d[:,None], (1, N)) # [T,N]
143
+
144
+ # 3) build next_vals likewise
145
+ next_vals = np.zeros_like(vals_agent) # [T,N]
146
+ next_vals[:-1] = vals_agent[1:]
147
+ # if episode didn’t end at final step, bootstrap last:
148
+ if not self.done[-1]:
149
+ with torch.no_grad():
150
+ v_last = self.critic(
151
+ torch.FloatTensor(self.next_gs[-1]).to(device)
152
+ ).cpu().item()
153
+ next_vals[-1, :] = v_last
154
+
155
+ # 4) GAE loop over (T,N)
156
+ adv = np.zeros_like(vals_agent, dtype=np.float32)
157
+ prev_adv = np.zeros(N, dtype=np.float32)
158
+ for t in reversed(range(T)):
159
+ mask = 1.0 - float(self.done[t]) # scalar 0/1
160
+ rew_t = np.array(self.rw[t], dtype=np.float32) # [N]
161
+ delta = rew_t + self.gamma * next_vals[t] * mask - vals_agent[t]
162
+ prev_adv = delta + self.gamma * self.lam * mask * prev_adv
163
+ adv[t] = prev_adv
164
+
165
+ # 5) compute returns & flatten
166
+ ret = adv + vals_agent # [T,N]
167
+ adv_flat = torch.from_numpy(adv.flatten()).to(device)
168
+ ret_flat = torch.from_numpy(ret.flatten()).to(device)
169
+ return adv_flat, ret_flat
170
+
171
+
172
+ def update(self):
173
+ # 1) Raw global states tensor [T, G]
174
+ raw_gs = torch.FloatTensor(self.gs).to(device) # [T, G]
175
+
176
+ # 2) Compute one value V(s_t) per timestep
177
+ with torch.no_grad():
178
+ vals = self.critic(raw_gs).cpu() # [T]
179
+
180
+ # 3) Compute advantages and returns using GAE (returns flattened [T*N])
181
+ adv_flat, ret_flat = self.compute_gae(vals) # both shape [T * N]
182
+
183
+ # 4) Prepare per-agent flattened training tensors
184
+ # Local states [T*N, local_dim]
185
+ ls = torch.FloatTensor(self.ls).view(-1, self.local_dim).to(device)
186
+ # Actions [T*N, act_dim]
187
+ ac = torch.FloatTensor(self.ac).view(-1, self.act_dim).to(device)
188
+ # Old log-probs [T*N]
189
+ old_lp = torch.FloatTensor(self.lp).view(-1).to(device)
190
+
191
+ # Broadcast global states to per-agent: [T, G] -> [T, N, G] -> [T*N, G]
192
+ gs = raw_gs.unsqueeze(1).expand(-1, self.n_agents, -1) # [T, N, G]
193
+ gs = gs.reshape(-1, self.global_dim).to(device) # [T*N, G]
194
+
195
+ # Create dataset and loader
196
+ dataset = torch.utils.data.TensorDataset(
197
+ ls, gs, ac, old_lp, adv_flat, ret_flat
198
+ )
199
+ gen = torch.Generator()
200
+ gen.manual_seed(SEED)
201
+ loader = torch.utils.data.DataLoader(
202
+ dataset,
203
+ batch_size=self.batch_size,
204
+ shuffle=True,
205
+ num_workers=0,
206
+ generator=gen
207
+ )
208
+ # 5) PPO update loop
209
+ for _ in range(self.k_epochs):
210
+ for b_ls, b_gs, b_ac, b_lp, b_adv, b_ret in loader:
211
+ # Actor update
212
+ mean, std = self.actor(b_ls)
213
+ dist = Normal(mean, std)
214
+ lp_new = dist.log_prob(b_ac).sum(-1)
215
+ ratio = torch.exp(lp_new - b_lp)
216
+ surr1 = ratio * b_adv
217
+ surr2 = torch.clamp(ratio, 1 - self.clip_eps, 1 + self.clip_eps) * b_adv
218
+ actor_loss = -torch.min(surr1, surr2).mean()
219
+
220
+ self.opt_a.zero_grad()
221
+ actor_loss.backward()
222
+ self.opt_a.step()
223
+
224
+ # Critic update
225
+ val_pred = self.critic(b_gs)
226
+ critic_loss = nn.MSELoss()(val_pred, b_ret)
227
+
228
+ self.opt_c.zero_grad()
229
+ critic_loss.backward()
230
+ self.opt_c.step()
231
+
232
+ # 6) Clear buffers for next rollout
233
+ self.clear_buffer()
234
+
235
+
236
+ def save(self, path):
237
+ torch.save({'actor': self.actor.state_dict(),
238
+ 'critic': self.critic.state_dict()}, path)
239
+
240
+ def load(self, path):
241
+ data = torch.load(path, map_location=device)
242
+ self.actor.load_state_dict(data['actor'])
243
+ self.critic.load_state_dict(data['critic'])
Other_algorithms/Flat_System/meanfield/_init_.py ADDED
File without changes
Other_algorithms/Flat_System/meanfield/meanfield_evaluation.py ADDED
@@ -0,0 +1,429 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # mfac_evaluate.py
2
+ import os
3
+ import sys
4
+ import time
5
+ import re
6
+ import numpy as np
7
+ import pandas as pd
8
+ import matplotlib.pyplot as plt
9
+ import torch
10
+ from datetime import datetime
11
+
12
+ sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
13
+
14
+ from solar_sys_environment import SolarSys
15
+ from meanfield.trainer.mfac import MeanField
16
+
17
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
18
+
19
+ def compute_jains_fairness(values: np.ndarray) -> float:
20
+ if len(values) == 0:
21
+ return 0.0
22
+ if np.all(values == 0):
23
+ return 1.0
24
+ num = (values.sum())**2
25
+ den = len(values) * (values**2).sum()
26
+ return num / den
27
+
28
+ def main():
29
+ # User parameters
30
+ # --- GENERALIZED PATHS ---
31
+ MODEL_PATH = "./models/meanfield_region_c_100agents_final/best_model.pth"
32
+ DATA_PATH = "./data/testing/test_data.csv"
33
+ DAYS_TO_EVALUATE = 30
34
+
35
+ model_path = MODEL_PATH
36
+ data_path = DATA_PATH
37
+ days_to_evaluate = DAYS_TO_EVALUATE
38
+ SOLAR_THRESHOLD = 0.1
39
+
40
+ # --- ANONYMITY: Implicitly detect and generalize state ---
41
+ state_match = re.search(r"meanfield_(oklahoma|colorado|pennsylvania)_", model_path)
42
+ if not state_match:
43
+ # Assume a default state or fail, but keep the name anonymous in use
44
+ detected_state_key = "region_c"
45
+ else:
46
+ # Map original state to anonymous key for use with SolarSys
47
+ original_state = state_match.group(1)
48
+ if original_state == "oklahoma": detected_state_key = "region_a"
49
+ elif original_state == "colorado": detected_state_key = "region_b"
50
+ else: detected_state_key = "region_c" # pennsylvania
51
+
52
+ # Env setup
53
+ env = SolarSys(
54
+ data_path=data_path,
55
+ state=detected_state_key, # Use anonymous key
56
+ time_freq="3H"
57
+ )
58
+ eval_steps = env.num_steps
59
+ house_ids = env.house_ids
60
+ num_agents = env.num_agents
61
+
62
+ # Generate a unique eval run folder
63
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
64
+ run_name = f"eval_mfac_{num_agents}agents_{days_to_evaluate}days_{timestamp}"
65
+ output_folder = os.path.join("runs_with_battery", run_name)
66
+ logs_dir = os.path.join(output_folder, "logs")
67
+ plots_dir = os.path.join(output_folder, "plots")
68
+ for d in (logs_dir, plots_dir):
69
+ os.makedirs(d, exist_ok=True)
70
+ print(f"Saving evaluation outputs to: {output_folder}")
71
+
72
+ local_dim = env.observation_space.shape[1]
73
+ global_dim = num_agents * local_dim
74
+ act_dim = env.action_space.shape[1]
75
+
76
+ mfac = MeanField(
77
+ n_agents=num_agents,
78
+ local_dim=local_dim,
79
+ global_dim=global_dim,
80
+ act_dim=act_dim,
81
+ lr=2e-4, gamma=0.95, lam=0.95, clip_eps=0.2, k_epochs=10, batch_size=1024
82
+ )
83
+
84
+ # Load mfac checkpoint
85
+ mfac.load(model_path)
86
+ mfac.actor.to(device).eval()
87
+ mfac.critic.to(device).eval()
88
+
89
+ # Prepare logs
90
+ all_logs = []
91
+ daily_summaries = []
92
+ step_timing_list = []
93
+
94
+ evaluation_start = time.time()
95
+
96
+ for day_idx in range(days_to_evaluate):
97
+ obs, _ = env.reset() # Use new reset signature
98
+ obs = np.array(obs, dtype=np.float32)
99
+ done = False
100
+ step_count = 0
101
+ day_logs = []
102
+
103
+ while not done:
104
+ step_start_time = time.time()
105
+ global_obs = np.array(obs).flatten()
106
+
107
+ # Select actions with mfac
108
+ actions, _ = mfac.select_action(obs, global_obs)
109
+
110
+ next_obs, rewards, done, info = env.step(actions)
111
+ next_obs = np.array(next_obs, dtype=np.float32)
112
+
113
+ # Consolidated Logging
114
+ step_end_time = time.time()
115
+ step_duration = step_end_time - step_start_time
116
+
117
+ # REMOVED: print(f"[Day {day_idx+1}, Step {step_count}] Step time: {step_duration:.6f} seconds")
118
+
119
+ step_timing_list.append({
120
+ "day": day_idx + 1, "step": step_count, "step_time_s": step_duration
121
+ })
122
+
123
+ grid_price_now = env.get_grid_price(step_count)
124
+ # Re-calculate peer price from current env state
125
+ current_demands = env.demands_day[step_count]
126
+ current_solars = env.solars_day[step_count]
127
+ current_total_surplus = float(np.maximum(current_solars - current_demands, 0.0).sum())
128
+ current_total_shortfall = float(np.maximum(current_demands - current_solars, 0.0).sum())
129
+ peer_price_now = env.get_peer_price(step_count, current_total_surplus, current_total_shortfall)
130
+
131
+
132
+ for i, hid in enumerate(house_ids):
133
+ is_battery_house = hid in env.batteries
134
+ p2p_buy = float(info["p2p_buy"][i])
135
+ p2p_sell = float(info["p2p_sell"][i])
136
+ charge_amount = float(info.get("charge_amount")[i])
137
+ discharge_amount = float(info.get("discharge_amount")[i])
138
+
139
+ day_logs.append({
140
+ "day": day_idx + 1, "step": step_count, "house": hid,
141
+ "grid_import_no_p2p": float(info["grid_import_no_p2p"][i]),
142
+ "grid_import_with_p2p": float(info["grid_import_with_p2p"][i]),
143
+ "grid_export": float(info.get("grid_export")[i]),
144
+ "p2p_buy": p2p_buy, "p2p_sell": p2p_sell, "actual_cost": float(info["costs"][i]),
145
+ "baseline_cost": float(info["grid_import_no_p2p"][i]) * grid_price_now,
146
+ "total_demand": float(env.demands_day[step_count, i]),
147
+ "total_solar": float(env.solars_day[step_count, i]),
148
+ "grid_price": grid_price_now, "peer_price": peer_price_now,
149
+ "soc": (env.battery_soc[i] / env.battery_max_capacity[i]) if is_battery_house else np.nan,
150
+ "degradation_cost": ((charge_amount + discharge_amount) * env.battery_degradation_cost[i]) if is_battery_house else 0.0,
151
+ "reward": float(rewards[i]),
152
+ })
153
+
154
+ obs = next_obs
155
+ step_count += 1
156
+ if step_count >= eval_steps:
157
+ break
158
+
159
+ day_df = pd.DataFrame(day_logs)
160
+ all_logs.extend(day_logs)
161
+
162
+ # Consolidated daily summary calculation (Kept math)
163
+ grouped_house = day_df.groupby("house").sum(numeric_only=True)
164
+ grouped_step = day_df.groupby("step").sum(numeric_only=True)
165
+
166
+ total_demand = grouped_step["total_demand"].sum()
167
+ total_solar = grouped_step["total_solar"].sum()
168
+ total_p2p_buy = grouped_house["p2p_buy"].sum()
169
+ total_p2p_sell = grouped_house["p2p_sell"].sum()
170
+
171
+ baseline_cost_per_house = grouped_house["baseline_cost"]
172
+ actual_cost_per_house = grouped_house["actual_cost"]
173
+ cost_savings_per_house = baseline_cost_per_house - actual_cost_per_house
174
+ day_total_cost_savings = cost_savings_per_house.sum()
175
+
176
+ overall_cost_savings_pct = day_total_cost_savings / baseline_cost_per_house.sum() if baseline_cost_per_house.sum() > 0 else 0.0
177
+
178
+ baseline_import_per_house = grouped_house["grid_import_no_p2p"]
179
+ actual_import_per_house = grouped_house["grid_import_with_p2p"]
180
+ import_reduction_per_house = baseline_import_per_house - actual_import_per_house
181
+ day_total_import_reduction = import_reduction_per_house.sum()
182
+
183
+ overall_import_reduction_pct = day_total_import_reduction / baseline_import_per_house.sum() if baseline_import_per_house.sum() > 0 else 0.0
184
+
185
+ fairness_cost_savings = compute_jains_fairness(cost_savings_per_house.values)
186
+ fairness_import_reduction = compute_jains_fairness(import_reduction_per_house.values)
187
+ fairness_rewards = compute_jains_fairness(grouped_house["reward"].values)
188
+ fairness_p2p_total = compute_jains_fairness((grouped_house["p2p_buy"] + grouped_house["p2p_sell"]).values)
189
+ day_total_degradation_cost = grouped_house["degradation_cost"].sum()
190
+
191
+ daily_summaries.append({
192
+ "day": day_idx + 1, "day_total_demand": total_demand, "day_total_solar": total_solar,
193
+ "day_p2p_buy": total_p2p_buy, "day_p2p_sell": total_p2p_sell,
194
+ "cost_savings_abs": day_total_cost_savings, "cost_savings_pct": overall_cost_savings_pct,
195
+ "fairness_cost_savings": fairness_cost_savings, "grid_reduction_abs": day_total_import_reduction,
196
+ "grid_reduction_pct": overall_import_reduction_pct,
197
+ "fairness_grid_reduction": fairness_import_reduction, "fairness_reward": fairness_rewards,
198
+ "fairness_p2p_buy": compute_jains_fairness(grouped_house["p2p_buy"].values),
199
+ "fairness_p2p_sell": compute_jains_fairness(grouped_house["p2p_sell"].values),
200
+ "fairness_p2p_total": fairness_p2p_total, "total_degradation_cost": day_total_degradation_cost
201
+ })
202
+
203
+ # Final processing and saving
204
+ evaluation_end = time.time()
205
+ total_eval_time = evaluation_end - evaluation_start
206
+ # REMOVED: print(f"\nEvaluation loop finished. Total time: {total_eval_time:.2f} seconds.")
207
+
208
+ all_days_df = pd.DataFrame(all_logs)
209
+ combined_csv_path = os.path.join(logs_dir, "step_logs_all_days.csv")
210
+ all_days_df.to_csv(combined_csv_path, index=False)
211
+ print(f"Saved combined step-level logs to: {combined_csv_path}")
212
+
213
+ step_timing_df = pd.DataFrame(step_timing_list)
214
+ timing_csv_path = os.path.join(logs_dir, "step_timing_log.csv")
215
+ step_timing_df.to_csv(timing_csv_path, index=False)
216
+ print(f"Saved step timing logs to: {timing_csv_path}")
217
+
218
+ house_level_df = all_days_df.groupby("house").sum(numeric_only=True)
219
+ house_level_df["cost_savings"] = house_level_df["baseline_cost"] - house_level_df["actual_cost"]
220
+ house_level_df["import_reduction"] = house_level_df["grid_import_no_p2p"] - house_level_df["grid_import_with_p2p"]
221
+
222
+ house_summary_csv = os.path.join(logs_dir, "summary_per_house.csv")
223
+ house_level_df.to_csv(house_summary_csv)
224
+ print(f"Saved final summary per house to: {house_summary_csv}")
225
+
226
+ fairness_grid_all = compute_jains_fairness(house_level_df["import_reduction"].values)
227
+ fairness_cost_all = compute_jains_fairness(house_level_df["cost_savings"].values)
228
+
229
+ daily_summary_df = pd.DataFrame(daily_summaries)
230
+
231
+ total_cost_savings_all = daily_summary_df["cost_savings_abs"].sum()
232
+ total_baseline_cost_all = all_days_df.groupby('day')['baseline_cost'].sum().sum()
233
+ pct_cost_savings_all = total_cost_savings_all / total_baseline_cost_all if total_baseline_cost_all > 0 else 0.0
234
+ total_grid_reduction_all = daily_summary_df["grid_reduction_abs"].sum()
235
+ total_baseline_import_all = all_days_df.groupby('day')['grid_import_no_p2p'].sum().sum()
236
+ pct_grid_reduction_all = total_grid_reduction_all / total_baseline_import_all if total_baseline_import_all > 0 else 0.0
237
+ total_degradation_cost_all = daily_summary_df["total_degradation_cost"].sum()
238
+ agg_solar_per_step = all_days_df.groupby(['day', 'step'])['total_solar'].sum()
239
+ num_agents_total = len(all_days_df['house'].unique())
240
+ sunny_steps_mask = agg_solar_per_step > (SOLAR_THRESHOLD * num_agents_total)
241
+ sunny_df = all_days_df.set_index(['day', 'step'])[sunny_steps_mask].reset_index()
242
+ baseline_import_sunny = sunny_df['grid_import_no_p2p'].sum()
243
+ actual_import_sunny = sunny_df['grid_import_with_p2p'].sum()
244
+ grid_reduction_sunny_pct = (baseline_import_sunny - actual_import_sunny) / baseline_import_sunny if baseline_import_sunny > 0 else 0.0
245
+ baseline_cost_sunny = sunny_df['baseline_cost'].sum()
246
+ actual_cost_sunny = sunny_df['actual_cost'].sum()
247
+ cost_savings_sunny_pct = (baseline_cost_sunny - actual_cost_sunny) / baseline_cost_sunny if baseline_cost_sunny > 0 else 0.0
248
+ total_p2p_buy = all_days_df['p2p_buy'].sum()
249
+ total_actual_grid_import = all_days_df['grid_import_with_p2p'].sum()
250
+ community_sourcing_rate_pct = total_p2p_buy / (total_p2p_buy + total_actual_grid_import) if (total_p2p_buy + total_actual_grid_import) > 0 else 0.0
251
+ total_p2p_sell = all_days_df['p2p_sell'].sum()
252
+ total_grid_export = all_days_df['grid_export'].sum()
253
+ solar_sharing_efficiency_pct = total_p2p_sell / (total_p2p_sell + total_grid_export) if (total_p2p_sell + total_grid_export) > 0 else 0.0
254
+
255
+ final_row = {
256
+ "day": "ALL_DAYS_SUMMARY", "cost_savings_abs": total_cost_savings_all, "cost_savings_pct": pct_cost_savings_all,
257
+ "grid_reduction_abs": total_grid_reduction_all, "grid_reduction_pct": pct_grid_reduction_all, "fairness_cost_savings": fairness_cost_all,
258
+ "fairness_grid_reduction": fairness_grid_all, "total_degradation_cost": total_degradation_cost_all,
259
+ "grid_reduction_sunny_hours_pct": grid_reduction_sunny_pct, "community_sourcing_rate_pct": community_sourcing_rate_pct,
260
+ "solar_sharing_efficiency_pct": solar_sharing_efficiency_pct, "cost_savings_sunny_hours_pct": cost_savings_sunny_pct
261
+ }
262
+
263
+ for col in daily_summary_df.columns:
264
+ if col not in final_row:
265
+ final_row[col] = np.nan
266
+ final_row_df = pd.DataFrame([final_row])
267
+
268
+ daily_summary_df = pd.concat([daily_summary_df, final_row_df], ignore_index=True)
269
+ summary_csv = os.path.join(logs_dir, "summary_per_day.csv")
270
+ daily_summary_df.to_csv(summary_csv, index=False)
271
+ print(f"Saved day-level summary with final multi-day row to: {summary_csv}")
272
+
273
+ # Final success message (replacing the numerical summary printout)
274
+ print("\nEvaluation run completed. All data logs (CSVs) and plots saved to disk.")
275
+
276
+ # Plots
277
+ plot_daily_df = daily_summary_df[daily_summary_df["day"] != "ALL_DAYS_SUMMARY"].copy()
278
+ plot_daily_df["day"] = plot_daily_df["day"].astype(int)
279
+
280
+ # Daily Cost Savings Percentage
281
+ plt.figure(figsize=(12, 6))
282
+ plt.bar(plot_daily_df["day"], plot_daily_df["cost_savings_pct"] * 100, color='skyblue')
283
+ plt.xlabel("Day")
284
+ plt.ylabel("Cost Savings (%)")
285
+ plt.title("Daily Community Cost Savings Percentage")
286
+ plt.xticks(plot_daily_df["day"])
287
+ plt.grid(axis='y', linestyle='--', alpha=0.7)
288
+ plt.savefig(os.path.join(plots_dir, "daily_cost_savings_percentage.png"))
289
+ plt.close()
290
+
291
+ # Daily Total Demand vs. Solar
292
+ plt.figure(figsize=(12, 6))
293
+ bar_width = 0.4
294
+ days = plot_daily_df["day"]
295
+ plt.bar(days - bar_width/2, plot_daily_df["day_total_demand"], width=bar_width, label="Total Demand", color='coral')
296
+ plt.bar(days + bar_width/2, plot_daily_df["day_total_solar"], width=bar_width, label="Total Solar Generation", color='gold')
297
+ plt.xlabel("Day")
298
+ plt.ylabel("Energy (kWh)")
299
+ plt.title("Total Community Demand vs. Solar Generation Per Day")
300
+ plt.xticks(days)
301
+ plt.legend()
302
+ plt.grid(axis='y', linestyle='--', alpha=0.7)
303
+ plt.savefig(os.path.join(plots_dir, "daily_demand_vs_solar.png"))
304
+ plt.close()
305
+
306
+ # Combined Time Series of Energy Flows
307
+ step_group = all_days_df.groupby(["day", "step"]).sum(numeric_only=True).reset_index()
308
+ step_group["global_step"] = (step_group["day"] - 1) * env.num_steps + step_group["step"]
309
+
310
+ fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(15, 12), sharex=True)
311
+
312
+ # Subplot 1: Grid Import vs P2P Buy
313
+ ax1.plot(step_group["global_step"], step_group["grid_import_with_p2p"], label="Grid Import (with P2P)", color='r')
314
+ ax1.plot(step_group["global_step"], step_group["p2p_buy"], label="P2P Buy", color='g')
315
+ ax1.set_ylabel("Energy (kWh)")
316
+ ax1.set_title("Community Energy Consumption: Grid Import vs. P2P Buy")
317
+ ax1.legend()
318
+ ax1.grid(True, linestyle='--', alpha=0.6)
319
+
320
+ # Subplot 2: Grid Export vs P2P Sell
321
+ ax2.plot(step_group["global_step"], step_group["grid_export"], label="Grid Export", color='orange')
322
+ ax2.plot(step_group["global_step"], step_group["p2p_sell"], label="P2P Sell", color='b')
323
+ ax2.set_xlabel("Global Timestep")
324
+ ax2.set_ylabel("Energy (kWh)")
325
+ ax2.set_title("Community Energy Generation: Grid Export vs. P2P Sell")
326
+ ax2.legend()
327
+ ax2.grid(True, linestyle='--', alpha=0.6)
328
+
329
+ plt.tight_layout()
330
+ plt.savefig(os.path.join(plots_dir, "combined_energy_flows_timeseries.png"))
331
+ plt.close()
332
+
333
+ # Stacked Bar of Daily Energy Sources
334
+ daily_agg = all_days_df.groupby("day").sum(numeric_only=True)
335
+
336
+ plt.figure(figsize=(12, 7))
337
+ plt.bar(daily_agg.index, daily_agg["grid_import_with_p2p"], label="Grid Import (with P2P)", color='crimson')
338
+ plt.bar(daily_agg.index, daily_agg["p2p_buy"], bottom=daily_agg["grid_import_with_p2p"], label="P2P Buy", color='limegreen')
339
+ plt.plot(daily_agg.index, daily_agg["grid_import_no_p2p"], label="Baseline Grid Import (No P2P)", color='blue', linestyle='--', marker='o')
340
+
341
+ plt.xlabel("Day")
342
+ plt.ylabel("Energy (kWh)")
343
+ plt.title("Daily Energy Procurement: Baseline vs. P2P+Grid")
344
+ plt.xticks(daily_agg.index)
345
+ plt.legend()
346
+ plt.grid(axis='y', linestyle='--', alpha=0.7)
347
+ plt.savefig(os.path.join(plots_dir, "daily_energy_procurement_stacked.png"))
348
+ plt.close()
349
+
350
+ # Fairness Metrics Over Time
351
+ plt.figure(figsize=(12, 6))
352
+ plt.plot(plot_daily_df["day"], plot_daily_df["fairness_cost_savings"], label="Cost Savings Fairness", marker='o')
353
+ plt.plot(plot_daily_df["day"], plot_daily_df["fairness_grid_reduction"], label="Grid Reduction Fairness", marker='s')
354
+ plt.plot(plot_daily_df["day"], plot_daily_df["fairness_reward"], label="Reward Fairness", marker='^')
355
+ plt.xlabel("Day")
356
+ plt.ylabel("Jain's Fairness Index")
357
+ plt.title("Daily Fairness Metrics")
358
+ plt.xticks(plot_daily_df["day"])
359
+ plt.ylim(0, 1.05)
360
+ plt.legend()
361
+ plt.grid(True, linestyle='--', alpha=0.7)
362
+ plt.savefig(os.path.join(plots_dir, "daily_fairness_metrics.png"))
363
+ plt.close()
364
+
365
+ # Per-House Savings and Reductions
366
+ fig, ax1 = plt.subplots(figsize=(15, 7))
367
+
368
+ house_ids_str = house_level_df.index.astype(str)
369
+ bar_width = 0.4
370
+ index = np.arange(len(house_ids_str))
371
+
372
+ # Bar chart for cost savings
373
+ color1 = 'tab:green'
374
+ ax1.set_xlabel('House ID')
375
+ ax1.set_ylabel('Total Cost Savings ($)', color=color1)
376
+ ax1.bar(index - bar_width/2, house_level_df["cost_savings"], bar_width, label='Cost Savings', color=color1)
377
+ ax1.tick_params(axis='y', labelcolor=color1)
378
+ ax1.set_xticks(index)
379
+ ax1.set_xticklabels(house_ids_str, rotation=45, ha="right")
380
+
381
+ # Second y-axis for grid import reduction
382
+ ax2 = ax1.twinx()
383
+ color2 = 'tab:blue'
384
+ ax2.set_ylabel('Total Grid Import Reduction (kWh)', color=color2)
385
+ ax2.bar(index + bar_width/2, house_level_df["import_reduction"], bar_width, label='Import Reduction', color=color2)
386
+ ax2.tick_params(axis='y', labelcolor=color2)
387
+
388
+ plt.title(f'Total Cost Savings & Grid Import Reduction Per House (over {days_to_evaluate} days)')
389
+ fig.tight_layout()
390
+ plt.savefig(os.path.join(plots_dir, "per_house_summary.png"))
391
+ plt.close()
392
+
393
+ # Price Dynamics for a Single Day
394
+ day1_prices = all_days_df[all_days_df['day'] == 1][['step', 'grid_price', 'peer_price']].drop_duplicates()
395
+ plt.figure(figsize=(12, 6))
396
+ plt.plot(day1_prices['step'], day1_prices['grid_price'], label='Grid Price', color='darkorange')
397
+ plt.plot(day1_prices['step'], day1_prices['peer_price'], label='P2P Price', color='teal')
398
+ plt.xlabel("Timestep of Day")
399
+ plt.ylabel("Price ($/kWh)")
400
+ plt.title("Price Dynamics on Day 1")
401
+ plt.legend()
402
+ plt.grid(True, linestyle='--', alpha=0.6)
403
+ plt.savefig(os.path.join(plots_dir, "price_dynamics_day1.png"))
404
+ plt.close()
405
+
406
+ # Battery State of Charge for Sample Houses
407
+ day1_df = all_days_df[all_days_df['day'] == 1]
408
+ battery_houses = day1_df.dropna(subset=['soc'])['house'].unique()
409
+
410
+ if len(battery_houses) > 0:
411
+ sample_houses = battery_houses[:min(4, len(battery_houses))]
412
+ plt.figure(figsize=(12, 6))
413
+ for house in sample_houses:
414
+ house_df = day1_df[day1_df['house'] == house]
415
+ plt.plot(house_df['step'], house_df['soc'] * 100, label=f'House {house}')
416
+
417
+ plt.xlabel("Timestep of Day")
418
+ plt.ylabel("State of Charge (%)")
419
+ plt.title("Battery SoC on Day 1 for Sample Houses")
420
+ plt.legend()
421
+ plt.grid(True, linestyle='--', alpha=0.6)
422
+ plt.savefig(os.path.join(plots_dir, "soc_dynamics_day1.png"))
423
+ plt.close()
424
+
425
+ print("All plots have been generated and saved. Evaluation complete.")
426
+
427
+
428
+ if __name__ == "__main__":
429
+ main()
Other_algorithms/Flat_System/meanfield/meanfield_train.py ADDED
@@ -0,0 +1,386 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import sys
3
+ import re
4
+ import numpy as np
5
+ import torch
6
+ import matplotlib.pyplot as plt
7
+ import pandas as pd
8
+ import time
9
+ from datetime import datetime
10
+
11
+ sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
12
+
13
+ from solar_sys_environment import SolarSys
14
+ from meanfield.trainer.mfac import MeanField
15
+
16
+ def main():
17
+
18
+ STATE_TO_RUN = "pennsylvania" # "pennsylvania" or "colorado" or "oklahoma"
19
+
20
+ # Set the path to your training data
21
+ DATA_FILE_PATH = "/path/to/project/training/100houses_152days_TRAIN.csv"
22
+ num_episodes = 10000
23
+ batch_size = 256
24
+ checkpoint_interval = 100000
25
+ window_size = 32
26
+
27
+ env = SolarSys(
28
+ data_path=DATA_FILE_PATH,
29
+ state=STATE_TO_RUN,
30
+ time_freq="3H"
31
+ )
32
+
33
+ # Sanity check: env I/O shapes
34
+ print("Observation space:", env.observation_space)
35
+ print("Action space :", env.action_space)
36
+
37
+ # Reset and inspect obs
38
+ obs = env.reset()
39
+ print(f"Reset returned {len(obs)} agent observations; each obs shape: {np.array(obs).shape}")
40
+
41
+ # Sample random actions and do one step
42
+ dummy_actions = np.random.rand(env.num_agents, env.action_space.shape[1]).astype(np.float32)
43
+ next_obs, rewards, done, info = env.step(dummy_actions)
44
+ print(f"Step outputs → next_obs: {len(next_obs)}×{np.array(next_obs).shape[1]}, "
45
+ f"rewards: {len(rewards)}, done: {done}")
46
+ print("Info keys:", list(info.keys()))
47
+
48
+ # Count the number of houses in each group
49
+ env.group_counts = {
50
+ 0: env.agent_groups.count(0),
51
+ 1: env.agent_groups.count(1)
52
+ }
53
+ print(f"Number of houses in each group: {env.group_counts}")
54
+
55
+ max_steps = env.num_steps
56
+
57
+ # Dims from the env
58
+ num_agents = env.num_agents
59
+ local_state_dim = env.observation_space.shape[1]
60
+ action_dim = env.action_space.shape[1]
61
+
62
+ # Build a unique run directory
63
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
64
+ run_name = f"meanfield_{STATE_TO_RUN}_{num_agents}agents_{num_episodes}eps_{timestamp}"
65
+ root_dir = os.path.join("Training_for_granularity", run_name)
66
+ os.makedirs(root_dir, exist_ok=True)
67
+ print(f"Saving training outputs to: {root_dir}")
68
+
69
+ logs_dir = os.path.join(root_dir, "logs")
70
+ plots_dir = os.path.join(root_dir, "plots")
71
+ os.makedirs(logs_dir, exist_ok=True)
72
+ os.makedirs(plots_dir, exist_ok=True)
73
+
74
+ # Create the MeanField agent
75
+ meanfield = MeanField(
76
+ n_agents=num_agents,
77
+ local_dim=local_state_dim,
78
+ global_dim=num_agents * local_state_dim,
79
+ act_dim=action_dim,
80
+ lr=2e-4,
81
+ gamma=0.95,
82
+ lam=0.95,
83
+ clip_eps=0.2,
84
+ k_epochs=4,
85
+ batch_size=batch_size
86
+ )
87
+
88
+ # Tracking / Logging Variables
89
+ episode_rewards = []
90
+ episode_total_rewards = []
91
+ block_mean_rewards = []
92
+ block_total_rewards = []
93
+
94
+ agent_rewards_log = [[] for _ in range(num_agents)]
95
+ best_mean_reward = -1e9
96
+ best_model_path = os.path.join(logs_dir, "best_model.pth")
97
+
98
+ daily_rewards = []
99
+ monthly_rewards = []
100
+
101
+ training_start_time = time.time()
102
+ episode_durations = []
103
+ total_steps_global = 0
104
+ episode_log_data = []
105
+ performance_metrics_log = []
106
+
107
+ agent_charge_log = [[] for _ in range(num_agents)]
108
+ agent_discharge_log = [[] for _ in range(num_agents)]
109
+
110
+ # Training Loop
111
+ for episode in range(1, num_episodes + 1):
112
+ episode_start_time = time.time()
113
+
114
+ obs = np.array(env.reset(), dtype=np.float32)
115
+
116
+ # Collect metrics from the previous episode
117
+ if episode > 1:
118
+ last_episode_metrics = env.get_episode_metrics()
119
+ last_episode_metrics['Episode'] = episode - 1
120
+ performance_metrics_log.append(last_episode_metrics)
121
+
122
+ total_reward = np.zeros(num_agents, dtype=np.float32)
123
+ done = False
124
+ step_count = 0
125
+ day_logs = []
126
+ episode_charges = [[] for _ in range(num_agents)]
127
+ episode_discharges = [[] for _ in range(num_agents)]
128
+
129
+ while not done:
130
+ # Build global state and pick actions
131
+ global_obs = obs.flatten()
132
+ actions, logps = meanfield.select_action(obs, global_obs)
133
+
134
+ # Step environment
135
+ next_obs_list, rewards, done, info = env.step(actions)
136
+
137
+ # Convert next observations to NumPy array
138
+ next_obs = np.array(next_obs_list, dtype=np.float32)
139
+ next_global_obs = next_obs.flatten()
140
+
141
+ # Store transition
142
+ local_obs_arr = np.array(obs, dtype=np.float32)
143
+
144
+ meanfield.store(
145
+ local_obs_arr,
146
+ global_obs,
147
+ actions,
148
+ logps,
149
+ rewards,
150
+ done,
151
+ next_global_obs
152
+ )
153
+ total_reward += rewards
154
+ obs = next_obs
155
+ step_count += 1
156
+ total_steps_global += 1
157
+
158
+ day_logs.append({
159
+ "step": step_count - 1,
160
+ "grid_import_no_p2p": info["grid_import_no_p2p"],
161
+ "grid_import_with_p2p": info["grid_import_with_p2p"],
162
+ "p2p_buy": info["p2p_buy"],
163
+ "p2p_sell": info["p2p_sell"],
164
+ "costs": info["costs"],
165
+ "charge_amount": info.get("charge_amount", np.zeros(num_agents)),
166
+ "discharge_amount": info.get("discharge_amount", np.zeros(num_agents))
167
+ })
168
+
169
+ if step_count >= max_steps:
170
+ break
171
+
172
+ # After each episode
173
+ # Compute per-episode metrics
174
+ sum_ep_reward = float(np.sum(total_reward))
175
+ mean_ep_reward = float(np.mean(total_reward))
176
+
177
+ episode_total_rewards.append(sum_ep_reward)
178
+ episode_rewards.append(mean_ep_reward)
179
+ daily_rewards.append(mean_ep_reward)
180
+
181
+ # If we just finished a block of window_size episodes, aggregate
182
+ if len(daily_rewards) % window_size == 0:
183
+ last_totals = episode_total_rewards[-window_size:]
184
+ block_sum = sum(last_totals)
185
+ block_total_rewards.append(block_sum)
186
+
187
+ last_means = daily_rewards[-window_size:]
188
+ block_mean = sum(last_means) / window_size
189
+ block_mean_rewards.append(block_mean)
190
+
191
+ block_idx = len(block_mean_rewards)
192
+ print(
193
+ f"→ Completed Block {block_idx} "
194
+ f"| Episodes {(block_idx-1)*window_size + 1}–{block_idx*window_size} "
195
+ f"| Block Total Reward: {block_sum:.3f} "
196
+ f"| Block Mean Reward: {block_mean:.3f}"
197
+ )
198
+
199
+ # Log agent-level rewards
200
+ for i in range(num_agents):
201
+ agent_rewards_log[i].append(total_reward[i])
202
+ episode_charges[i].append(actions[i][4])
203
+ episode_discharges[i].append(actions[i][5])
204
+
205
+ # Summarize P2P steps
206
+ steps_data = []
207
+ for entry in day_logs:
208
+ step_idx = entry["step"]
209
+ p2p_buy_array = entry["p2p_buy"]
210
+ p2p_sell_array = entry["p2p_sell"]
211
+ grid_no_p2p_array = entry["grid_import_no_p2p"]
212
+ grid_with_p2p_array = entry["grid_import_with_p2p"]
213
+
214
+ steps_data.append({
215
+ "step": step_idx,
216
+ "p2p_buy_sum": float(np.sum(p2p_buy_array)),
217
+ "p2p_sell_sum": float(np.sum(p2p_sell_array)),
218
+ "grid_import_no_p2p_sum": float(np.sum(grid_no_p2p_array)),
219
+ "grid_import_with_p2p_sum": float(np.sum(grid_with_p2p_array))
220
+ })
221
+
222
+ baseline_cost = np.sum([np.sum(entry["grid_import_no_p2p"]) * env.get_grid_price(entry["step"])
223
+ for entry in day_logs])
224
+ actual_cost = np.sum([np.sum(entry["costs"]) for entry in day_logs])
225
+ cost_reduction = (baseline_cost - actual_cost) / baseline_cost
226
+
227
+ # Update the meanfield agent
228
+ meanfield.update()
229
+
230
+ # Save if best
231
+ if mean_ep_reward > best_mean_reward:
232
+ best_mean_reward = mean_ep_reward
233
+ meanfield.save(best_model_path)
234
+
235
+ if episode % checkpoint_interval == 0:
236
+ ckpt_path = os.path.join(logs_dir, f"checkpoint_{episode}.pth")
237
+ meanfield.save(ckpt_path)
238
+
239
+ episode_end_time = time.time()
240
+ episode_duration = episode_end_time - episode_start_time
241
+
242
+ print(
243
+ f"Episode {episode}/{num_episodes} "
244
+ f"| Time per Episode: {episode_duration:.2f}s "
245
+ f"| Steps: {step_count} "
246
+ f"| Mean Reward: {mean_ep_reward:.3f} "
247
+ f"| Cost Reduction: {cost_reduction:.2%}"
248
+ )
249
+
250
+ # Record data in per-episode log
251
+ episode_log_data.append({
252
+ "Episode": episode,
253
+ "Steps": step_count,
254
+ "Mean_Reward": mean_ep_reward,
255
+ "Total_Reward": sum_ep_reward,
256
+ "Cost_Reduction_Pct": cost_reduction * 100,
257
+ "Baseline_Cost": baseline_cost,
258
+ "Actual_Cost": actual_cost,
259
+ "Episode_Duration": episode_duration,
260
+ "Total_Charge": np.sum([np.sum(entry["charge_amount"]) for entry in day_logs]),
261
+ "Total_Discharge": np.sum([np.sum(entry["discharge_amount"]) for entry in day_logs])
262
+ })
263
+
264
+ for i in range(num_agents):
265
+ agent_charge_log[i].append(np.mean(episode_charges[i]))
266
+ agent_discharge_log[i].append(np.mean(episode_discharges[i]))
267
+
268
+ # Capture the final episode's metrics
269
+ final_episode_metrics = env.get_episode_metrics()
270
+ final_episode_metrics['Episode'] = num_episodes
271
+ performance_metrics_log.append(final_episode_metrics)
272
+
273
+ # End of all training
274
+ training_end_time = time.time()
275
+ total_training_time = training_end_time - training_start_time
276
+
277
+ # Save out per-episode agent rewards + mean rewards
278
+ np.save(os.path.join(logs_dir, "agent_rewards.npy"), np.array(agent_rewards_log))
279
+ np.save(os.path.join(logs_dir, "mean_rewards.npy"), np.array(episode_rewards))
280
+ np.save(os.path.join(logs_dir, "total_rewards.npy"), np.array(episode_total_rewards))
281
+
282
+ # Create Final DataFrame for Logging and Plotting
283
+ df_rewards_log = pd.DataFrame(episode_log_data)
284
+ df_perf_log = pd.DataFrame(performance_metrics_log)
285
+
286
+ # Merge the two DataFrames on the 'Episode' column
287
+ df_final_log = pd.merge(df_rewards_log, df_perf_log.drop(columns=[
288
+ 'degradation_cost_over_time',
289
+ 'cost_savings_over_time',
290
+ 'grid_reduction_over_time'
291
+ ]), on="Episode")
292
+
293
+ # PLOTTING
294
+ os.makedirs(plots_dir, exist_ok=True)
295
+
296
+ # Helper: centered moving average
297
+ def moving_avg(series, window):
298
+ return pd.Series(series).rolling(window=window, center=True, min_periods=1).mean().to_numpy()
299
+
300
+ # Smoothing window (in episodes)
301
+ ma_window = 300
302
+ episodes = np.arange(1, num_episodes + 1)
303
+
304
+ # Mean Reward moving average
305
+ reward_ma = moving_avg(df_final_log["Mean_Reward"], ma_window)
306
+ plt.figure(figsize=(8, 5))
307
+ plt.plot(episodes, reward_ma, linewidth=2, label=f"Mean Reward MA (win={ma_window})")
308
+ plt.xlabel("Episode")
309
+ plt.ylabel("Mean Reward")
310
+ plt.title("meanfield: Mean Reward Moving Average")
311
+ plt.legend()
312
+ plt.grid(True)
313
+ plt.savefig(os.path.join(plots_dir, "mean_reward_ma.png"), dpi=200)
314
+ plt.close()
315
+
316
+ # Total Reward moving average
317
+ total_ma = moving_avg(df_final_log["Total_Reward"], ma_window)
318
+ plt.figure(figsize=(8, 5))
319
+ plt.plot(episodes, total_ma, linewidth=2, label=f"Total Reward MA (win={ma_window})")
320
+ plt.xlabel("Episode")
321
+ plt.ylabel("Total Reward")
322
+ plt.title("meanfield: Total Reward Moving Average")
323
+ plt.legend()
324
+ plt.grid(True)
325
+ plt.savefig(os.path.join(plots_dir, "total_reward_ma.png"), dpi=200)
326
+ plt.close()
327
+
328
+ # Cost Reduction (%) moving average
329
+ cost_ma = moving_avg(df_final_log["Cost_Reduction_Pct"], ma_window)
330
+ plt.figure(figsize=(8, 5))
331
+ plt.plot(episodes, cost_ma, linewidth=2, label="Cost Reduction MA (%)")
332
+ plt.xlabel("Episode")
333
+ plt.ylabel("Cost Reduction (%)")
334
+ plt.title("meanfield: Cost Reduction Moving Average")
335
+ plt.legend()
336
+ plt.grid(True)
337
+ plt.savefig(os.path.join(plots_dir, "cost_reduction_ma.png"), dpi=200)
338
+ plt.close()
339
+
340
+ # Battery Degradation Cost moving average
341
+ degradation_ma = moving_avg(df_final_log["battery_degradation_cost_total"], ma_window)
342
+ plt.figure(figsize=(8, 5))
343
+ plt.plot(episodes, degradation_ma, linewidth=2, label=f"Degradation Cost MA (win={ma_window})", color='purple')
344
+ plt.xlabel("Episode")
345
+ plt.ylabel("Total Degradation Cost ($)")
346
+ plt.title("meanfield: Battery Degradation Cost Moving Average")
347
+ plt.legend()
348
+ plt.grid(True)
349
+ plt.savefig(os.path.join(plots_dir, "degradation_cost_ma.png"), dpi=200)
350
+ plt.close()
351
+
352
+ print(f"\nAll moving-average plots saved to: {plots_dir}")
353
+
354
+ # Save Final Logs to CSV
355
+ total_time_row = pd.DataFrame([{
356
+ "Episode": "Total_Training_Time",
357
+ "Episode_Duration": total_training_time
358
+ }])
359
+ df_to_save = pd.concat([df_final_log, total_time_row], ignore_index=True)
360
+
361
+ log_csv_path = os.path.join(logs_dir, "training_performance_log.csv")
362
+
363
+ # Select and reorder columns for the final CSV
364
+ columns_to_save = [
365
+ "Episode",
366
+ "Mean_Reward",
367
+ "Total_Reward",
368
+ "Cost_Reduction_Pct",
369
+ "Episode_Duration",
370
+ "battery_degradation_cost_total",
371
+ ]
372
+ df_to_save = df_to_save[columns_to_save]
373
+
374
+ df_to_save.to_csv(log_csv_path, index=False)
375
+
376
+ print(f"Saved comprehensive training performance log to: {log_csv_path}")
377
+
378
+ # Final Timings Printout
379
+ print("\n" + "="*50)
380
+ print("TRAINING COMPLETE".center(50))
381
+ print(f"Total training time: {total_training_time:.2f} seconds")
382
+ print("="*50)
383
+
384
+
385
+ if __name__ == "__main__":
386
+ main()
Other_algorithms/Flat_System/meanfield/trainer/__init__.py ADDED
File without changes
Other_algorithms/Flat_System/meanfield/trainer/mfac.py ADDED
@@ -0,0 +1,219 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # meanfield.py
2
+ import torch
3
+ import torch.nn as nn
4
+ import random
5
+ import numpy as np
6
+ from torch.distributions import Normal
7
+ from torch.amp import autocast
8
+ from torch.cuda.amp import GradScaler
9
+
10
+ #device selection
11
+ if torch.cuda.is_available():
12
+ device = torch.device("cuda")
13
+ print("Using CUDA (NVIDIA GPU)")
14
+ else:
15
+ device = torch.device("cpu")
16
+ print("Using CPU")
17
+
18
+ def set_global_seed(seed: int):
19
+ random.seed(seed)
20
+ np.random.seed(seed)
21
+ torch.manual_seed(seed)
22
+
23
+ if torch.cuda.is_available():
24
+ torch.cuda.manual_seed_all(seed)
25
+ torch.backends.cudnn.deterministic = False
26
+ torch.backends.cudnn.benchmark = True
27
+
28
+ SEED = 42
29
+ set_global_seed(SEED)
30
+
31
+ class MLP(nn.Module):
32
+ def __init__(self, input_dim, hidden_dims, output_dim):
33
+ super().__init__()
34
+ layers = []
35
+ last_dim = input_dim
36
+ for h in hidden_dims:
37
+ layers += [nn.Linear(last_dim, h), nn.ReLU()]
38
+ last_dim = h
39
+ layers.append(nn.Linear(last_dim, output_dim))
40
+ self.net = nn.Sequential(*layers)
41
+
42
+ def forward(self, x):
43
+ return self.net(x)
44
+
45
+ class Actor(nn.Module):
46
+ def __init__(self, obs_dim, act_dim, hidden=(64,64)):
47
+ super().__init__()
48
+ self.net = MLP(obs_dim, hidden, act_dim)
49
+ self.log_std = nn.Parameter(torch.zeros(act_dim))
50
+
51
+ def forward(self, x):
52
+ mean = self.net(x)
53
+ std = torch.exp(self.log_std)
54
+ return mean, std
55
+
56
+ class Critic(nn.Module):
57
+ def __init__(self, state_dim, hidden=(128,128)):
58
+ super().__init__()
59
+ self.net = MLP(state_dim, hidden, 1)
60
+
61
+ def forward(self, x):
62
+ return self.net(x).squeeze(-1)
63
+
64
+ class MeanField:
65
+ def __init__(
66
+ self,
67
+ n_agents,
68
+ local_dim,
69
+ global_dim,
70
+ act_dim,
71
+ lr=3e-4,
72
+ gamma=0.99,
73
+ lam=0.95,
74
+ clip_eps=0.2,
75
+ k_epochs=10,
76
+ batch_size=1024,
77
+ episode_len=96
78
+ ):
79
+ self.n_agents = n_agents
80
+ self.local_dim = local_dim
81
+ self.global_dim = global_dim
82
+ self.act_dim = act_dim
83
+ self.gamma = gamma
84
+ self.lam = lam
85
+ self.clip_eps = clip_eps
86
+ self.k_epochs = k_epochs
87
+ self.batch_size = batch_size
88
+ self.episode_len = episode_len
89
+
90
+ self.actor = Actor(local_dim + global_dim, act_dim).to(device)
91
+ self.critic = Critic(global_dim).to(device)
92
+
93
+ self.opt_a = torch.optim.Adam(self.actor.parameters(), lr=lr)
94
+ self.opt_c = torch.optim.Adam(self.critic.parameters(), lr=lr)
95
+
96
+ print("MeanField CUDA AMP is disabled for stability.")
97
+
98
+ self.init_buffer()
99
+
100
+ def init_buffer(self):
101
+ self.ls_buf = np.zeros((self.episode_len, self.n_agents, self.local_dim), dtype=np.float32)
102
+ self.gs_buf = np.zeros((self.episode_len, self.global_dim), dtype=np.float32)
103
+ self.ac_buf = np.zeros((self.episode_len, self.n_agents, self.act_dim), dtype=np.float32)
104
+ self.lp_buf = np.zeros((self.episode_len, self.n_agents), dtype=np.float32)
105
+ self.rw_buf = np.zeros((self.episode_len, self.n_agents), dtype=np.float32)
106
+ self.done_buf = np.zeros((self.episode_len, self.n_agents), dtype=np.float32)
107
+ self.next_gs_buf = np.zeros((self.episode_len, self.global_dim), dtype=np.float32)
108
+ self.step_idx = 0
109
+
110
+ @torch.no_grad()
111
+ def select_action(self, local_obs, global_obs):
112
+ l = torch.from_numpy(local_obs).float().to(device)
113
+ g = torch.from_numpy(global_obs).float().to(device).unsqueeze(0).expand(self.n_agents, -1)
114
+ input_x = torch.cat([l, g], dim=-1)
115
+ mean, std = self.actor(input_x)
116
+ dist = Normal(mean, std)
117
+ a = dist.sample()
118
+ return a.cpu().numpy(), dist.log_prob(a).sum(-1).cpu().numpy()
119
+
120
+ def store(self, local_obs, global_obs, action, logp, reward, done, next_global_obs):
121
+ if self.step_idx < self.episode_len:
122
+ self.ls_buf[self.step_idx] = local_obs
123
+ self.gs_buf[self.step_idx] = global_obs
124
+ self.ac_buf[self.step_idx] = action
125
+ self.lp_buf[self.step_idx] = logp
126
+ self.rw_buf[self.step_idx] = reward
127
+ self.done_buf[self.step_idx] = done
128
+ self.next_gs_buf[self.step_idx] = next_global_obs
129
+ self.step_idx += 1
130
+
131
+ def compute_gae(self, T, vals):
132
+ """
133
+ Computes Generalized Advantage Estimation (GAE).
134
+ """
135
+ N = self.n_agents
136
+ adv_buf = np.zeros_like(self.rw_buf[:T])
137
+
138
+
139
+ if not self.done_buf[T-1].all():
140
+ with torch.no_grad():
141
+ v_last = self.critic(
142
+ torch.from_numpy(self.next_gs_buf[T-1]).float().to(device)
143
+ ).cpu().numpy()
144
+ else:
145
+ v_last = 0.0
146
+ vals_agent = vals.unsqueeze(1).expand(-1, N).cpu().numpy()
147
+ rewards = self.rw_buf[:T]
148
+ masks = 1.0 - self.done_buf[:T]
149
+ gae = 0
150
+ for t in reversed(range(T)):
151
+ v_next = vals_agent[t+1] if t < T - 1 else v_last
152
+ delta = rewards[t] + self.gamma * v_next * masks[t] - vals_agent[t]
153
+ adv_buf[t] = gae = delta + self.gamma * self.lam * masks[t] * gae
154
+ ret_buf = adv_buf + vals_agent
155
+ adv_flat = torch.from_numpy(adv_buf.flatten()).float().to(device)
156
+ ret_flat = torch.from_numpy(ret_buf.flatten()).float().to(device)
157
+ return adv_flat, ret_flat
158
+
159
+ def update(self):
160
+ T = self.step_idx
161
+ if T == 0: return
162
+
163
+ gs_tensor = torch.from_numpy(self.gs_buf[:T]).float().to(device)
164
+ ls_tensor = torch.from_numpy(self.ls_buf[:T]).float().to(device).view(T * self.n_agents, -1)
165
+ ac_tensor = torch.from_numpy(self.ac_buf[:T]).float().to(device).view(T * self.n_agents, -1)
166
+ lp_tensor = torch.from_numpy(self.lp_buf[:T]).float().to(device).view(-1)
167
+
168
+ with torch.no_grad():
169
+ vals = self.critic(gs_tensor)
170
+
171
+ adv_flat, ret_flat = self.compute_gae(T, vals)
172
+ adv_flat = (adv_flat - adv_flat.mean()) / (adv_flat.std() + 1e-8)
173
+
174
+ gs_for_batch = gs_tensor.unsqueeze(1).expand(-1, self.n_agents, -1).reshape(T * self.n_agents, self.global_dim)
175
+
176
+ dataset = torch.utils.data.TensorDataset(ls_tensor, gs_for_batch, ac_tensor, lp_tensor, adv_flat, ret_flat)
177
+ gen = torch.Generator()
178
+ gen.manual_seed(SEED)
179
+ loader = torch.utils.data.DataLoader(dataset, batch_size=self.batch_size, shuffle=True, generator=gen)
180
+
181
+ for _ in range(self.k_epochs):
182
+ for b_ls, b_gs, b_ac, b_lp, b_adv, b_ret in loader:
183
+ input_a = torch.cat([b_ls, b_gs], dim=-1)
184
+ mean, std = self.actor(input_a)
185
+ dist = Normal(mean, std)
186
+
187
+ entropy = dist.entropy().mean()
188
+
189
+ lp_new = dist.log_prob(b_ac).sum(-1)
190
+ ratio = torch.exp(lp_new - b_lp)
191
+ surr1 = ratio * b_adv
192
+ surr2 = torch.clamp(ratio, 1 - self.clip_eps, 1 + self.clip_eps) * b_adv
193
+
194
+ actor_loss = -torch.min(surr1, surr2).mean() - 0.01 * entropy
195
+
196
+ self.opt_a.zero_grad()
197
+ actor_loss.backward()
198
+ nn.utils.clip_grad_norm_(self.actor.parameters(), max_norm=0.5)
199
+ self.opt_a.step()
200
+
201
+
202
+ val_pred = self.critic(b_gs)
203
+ critic_loss = nn.MSELoss()(val_pred, b_ret)
204
+
205
+ self.opt_c.zero_grad()
206
+ critic_loss.backward()
207
+ nn.utils.clip_grad_norm_(self.critic.parameters(), max_norm=0.5)
208
+ self.opt_c.step()
209
+
210
+ self.step_idx = 0
211
+
212
+ def save(self, path):
213
+ torch.save({'actor': self.actor.state_dict(),
214
+ 'critic': self.critic.state_dict()}, path)
215
+
216
+ def load(self, path):
217
+ data = torch.load(path, map_location=device)
218
+ self.actor.load_state_dict(data['actor'])
219
+ self.critic.load_state_dict(data['critic'])
Other_algorithms/Flat_System/solar_sys_environment.py ADDED
@@ -0,0 +1,523 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gym
2
+ import pandas as pd
3
+ import numpy as np
4
+ import random
5
+ from gym.spaces import Box
6
+
7
+ random.seed(42)
8
+ np.random.seed(42)
9
+
10
+ class SolarSys(gym.Env):
11
+ """
12
+ Flat (non-hierarchical) OpenAI Gym Environment for Multi-Agent energy management
13
+ in a residential cluster, featuring complex P2P pricing and reward structures
14
+ similar to the low-level agents in the Hierarchical model.
15
+ """
16
+
17
+ def __init__(
18
+ self,
19
+ data_path: str = "./data/training/simulated_data.csv",
20
+ state: str = "region_a", # Generalized: region_a, region_b, region_c
21
+ time_freq: str = "15T",
22
+ ):
23
+
24
+ super().__init__()
25
+ self.data_path = data_path
26
+ self.time_freq = time_freq
27
+ self.state = state.lower()
28
+
29
+ # --- Generalized Pricing Configuration ---
30
+ self._pricing_info = {
31
+ "region_a": {
32
+ "max_grid_price": 0.2112,
33
+ "feed_in_tariff": 0.04,
34
+ "price_function": self._get_region_a_price
35
+ },
36
+ "region_b": {
37
+ "max_grid_price": 0.32,
38
+ "feed_in_tariff": 0.055,
39
+ "price_function": self._get_region_b_price
40
+ },
41
+ "region_c": {
42
+ "max_grid_price": 0.12505,
43
+ "feed_in_tariff": 0.06,
44
+ "price_function": self._get_region_c_price
45
+ }
46
+ }
47
+
48
+ if self.state not in self._pricing_info:
49
+ raise ValueError(f"State '{self.state}' is not supported. Available states: {list(self._pricing_info.keys())}")
50
+
51
+ state_config = self._pricing_info[self.state]
52
+ self.max_grid_price = state_config["max_grid_price"]
53
+ self.feed_in_tariff = state_config["feed_in_tariff"]
54
+ self._get_price_function = state_config["price_function"]
55
+
56
+ # --- Data Loading ---
57
+ try:
58
+ all_data = pd.read_csv(data_path)
59
+ all_data["local_15min"] = pd.to_datetime(all_data["local_15min"], utc=True)
60
+ all_data.set_index("local_15min", inplace=True)
61
+ all_data = all_data.resample(time_freq).mean()
62
+
63
+ except FileNotFoundError:
64
+ raise FileNotFoundError(f"Data file {data_path} not found.")
65
+ except pd.errors.EmptyDataError:
66
+ raise ValueError(f"Data file {data_path} is empty.")
67
+ except Exception as e:
68
+ raise ValueError(f"Error loading data: {e}")
69
+
70
+ # Compute global maxima for normalization
71
+ grid_cols = [c for c in all_data.columns if c.startswith("grid_")]
72
+ solar_cols = [c for c in all_data.columns if c.startswith("total_solar_")]
73
+ all_grid = all_data[grid_cols].values
74
+ all_solar = all_data[solar_cols].values
75
+
76
+ self.global_max_demand = float((all_grid + all_solar).max()) + 1e-8
77
+ self.global_max_solar = float(all_solar.max()) + 1e-8
78
+
79
+ self.all_data = all_data
80
+
81
+ # Calculate time steps
82
+ freq_offset = pd.tseries.frequencies.to_offset(time_freq)
83
+ minutes_per_step = freq_offset.nanos / 1e9 / 60.0
84
+ self.steps_per_day = int(24 * 60 // minutes_per_step)
85
+
86
+ total_rows = len(self.all_data)
87
+ self.total_days = total_rows // self.steps_per_day
88
+ if self.total_days < 1:
89
+ raise ValueError("Dataset has less than a single day of data.")
90
+
91
+ self.house_ids = [
92
+ col.split("_")[1] for col in self.all_data.columns
93
+ if col.startswith("grid_")
94
+ ]
95
+ self.num_agents = len(self.house_ids)
96
+ self.original_no_p2p_import = {}
97
+ for hid in self.house_ids:
98
+ col_grid = f"grid_{hid}"
99
+ self.original_no_p2p_import[hid] = self.all_data[col_grid].clip(lower=0.0).values
100
+
101
+ # Determine population groups and battery assignments
102
+ solar_sums = self.all_data[solar_cols].sum(axis=0).to_dict()
103
+ self.agent_groups = [
104
+ 1 if solar_sums[f"total_solar_{hid}"] > 0 else 0 for hid in self.house_ids
105
+ ]
106
+ self.solar_houses = [
107
+ hid for hid in self.house_ids if self.agent_groups[self.house_ids.index(hid)] == 1
108
+ ]
109
+
110
+ self.battery_options = {
111
+ "teslapowerwall": {"max_capacity": 13.5, "charge_efficiency": 0.95, "discharge_efficiency": 0.90, "max_charge_rate": 5.0, "max_discharge_rate": 5.0, "degradation_cost_per_kwh": 0.005},
112
+ "enphase": {"max_capacity": 5.0, "charge_efficiency": 0.95, "discharge_efficiency": 0.90, "max_charge_rate": 2.0, "max_discharge_rate": 2.0, "degradation_cost_per_kwh": 0.005},
113
+ "franklin": {"max_capacity": 15.0, "charge_efficiency": 0.95, "discharge_efficiency": 0.90, "max_charge_rate": 6.0, "max_discharge_rate": 6.0, "degradation_cost_per_kwh": 0.005},
114
+ }
115
+
116
+ # Initialize battery specs as vectorized arrays (Crucial for speed)
117
+ self.batteries = {}
118
+ self.has_battery = np.zeros(self.num_agents, dtype=np.float32)
119
+ self.battery_max_capacity = np.zeros(self.num_agents, dtype=np.float32)
120
+ self.battery_charge_efficiency = np.zeros(self.num_agents, dtype=np.float32)
121
+ self.battery_discharge_efficiency = np.zeros(self.num_agents, dtype=np.float32)
122
+ self.battery_max_charge_rate = np.zeros(self.num_agents, dtype=np.float32)
123
+ self.battery_max_discharge_rate = np.zeros(self.num_agents, dtype=np.float32)
124
+ self.battery_degradation_cost = np.zeros(self.num_agents, dtype=np.float32)
125
+ self.battery_soc = np.zeros(self.num_agents, dtype=np.float32)
126
+
127
+ for i, hid in enumerate(self.house_ids):
128
+ if hid in self.solar_houses:
129
+ choice = random.choice(list(self.battery_options))
130
+ specs = self.battery_options[choice]
131
+ self.batteries[hid] = specs
132
+
133
+ self.has_battery[i] = 1.0
134
+ self.battery_max_capacity[i] = specs["max_capacity"]
135
+ self.battery_charge_efficiency[i] = specs["charge_efficiency"]
136
+ self.battery_discharge_efficiency[i] = specs["discharge_efficiency"]
137
+ self.battery_max_charge_rate[i] = specs["max_charge_rate"]
138
+ self.battery_max_discharge_rate[i] = specs["max_discharge_rate"]
139
+ self.battery_degradation_cost[i] = specs["degradation_cost_per_kwh"]
140
+
141
+ # Observation & Action Spaces
142
+ # [demand, solar, SOC_frac, grid_price, peer_price, total_demand_others, total_solar_others, hour]
143
+ self.observation_space = Box(
144
+ low=-np.inf, high=np.inf,
145
+ shape=(self.num_agents, 8),
146
+ dtype=np.float32
147
+ )
148
+
149
+ # Action: [sell_grid, buy_grid, sell_peers, buy_peers, charge_batt, discharge_batt]
150
+ self.action_space = Box(
151
+ low=0.0,
152
+ high=1.0,
153
+ shape=(self.num_agents, 6),
154
+ dtype=np.float32
155
+ )
156
+
157
+ self.episode_metrics = {}
158
+ self._initialize_episode_metrics()
159
+
160
+ # Initialize episode variables
161
+ self.data = None
162
+ self.demands_day = None
163
+ self.solars_day = None
164
+ self.hours_day = None
165
+ self.current_step = 0
166
+ self.num_steps = self.steps_per_day
167
+ self.previous_actions = np.zeros((self.num_agents, 6), dtype=np.float32)
168
+
169
+
170
+ def _initialize_episode_metrics(self):
171
+ """Initialize or reset all metrics tracked over a single episode."""
172
+ self.cumulative_grid_reduction = 0.0
173
+ self.cumulative_grid_reduction_peak = 0.0
174
+ self.cumulative_degradation_cost = 0.0
175
+ self.agent_cost_savings = np.zeros(self.num_agents, dtype=np.float32)
176
+ self.degradation_cost_timeseries = []
177
+ self.cost_savings_timeseries = []
178
+ self.grid_reduction_timeseries = []
179
+
180
+
181
+ # --- Price Functions (Generalized) ---
182
+ def get_grid_price(self, step_idx):
183
+ """Return grid price for the current step."""
184
+ return self._get_price_function(step_idx)
185
+
186
+ def _get_region_a_price(self, step_idx):
187
+ minutes_per_step = 24 * 60 / self.steps_per_day
188
+ hour = int((step_idx * minutes_per_step) // 60) % 24
189
+ if 14 <= hour < 19:
190
+ return 0.2112
191
+ else:
192
+ return 0.0434
193
+
194
+ def _get_region_b_price(self, step_idx):
195
+ minutes_per_step = 24 * 60 / self.steps_per_day
196
+ hour = int((step_idx * minutes_per_step) // 60) % 24
197
+ if 15 <= hour < 19:
198
+ return 0.32
199
+ elif 13 <= hour < 15:
200
+ return 0.22
201
+ else:
202
+ return 0.12
203
+
204
+ def _get_region_c_price(self, step_idx):
205
+ minutes_per_step = 24 * 60 / self.steps_per_day
206
+ hour = int((step_idx * minutes_per_step) // 60) % 24
207
+ if 13 <= hour < 21:
208
+ return 0.125048
209
+ elif hour >= 23 or hour < 6:
210
+ return 0.057014
211
+ else:
212
+ return 0.079085
213
+
214
+ def get_peer_price(self, step_idx, total_surplus, total_shortfall):
215
+ """
216
+ Calculates P2P price based on supply/demand ratio (Arctangent-log approach).
217
+ This matches the logic used in the Hierarchical model's coordination layer.
218
+ """
219
+ grid_price = self.get_grid_price(step_idx)
220
+ feed_in_tariff = self.feed_in_tariff
221
+
222
+ # Parameters for arctangent-log pricing
223
+ p_balance = (grid_price * 0.80) + (feed_in_tariff * 0.20)
224
+ p_con = (grid_price - feed_in_tariff) / (1.5 * np.pi)
225
+ k = 1.5
226
+ epsilon = 1e-6
227
+ supply = total_surplus + epsilon
228
+ demand = total_shortfall + epsilon
229
+
230
+ ratio = demand / supply
231
+ log_ratio = np.log(ratio)
232
+ if log_ratio < 0:
233
+ power_term = - (np.abs(log_ratio) ** k)
234
+ else:
235
+ power_term = log_ratio ** k
236
+
237
+ price_offset = 2 * np.pi * p_con * np.arctan(power_term)
238
+
239
+ peer_price = p_balance + price_offset
240
+
241
+ final_price = float(np.clip(peer_price, feed_in_tariff, grid_price))
242
+
243
+ return final_price
244
+
245
+
246
+ def reset(self):
247
+ # 1. Store metrics from completed episode
248
+ if self.current_step > 0:
249
+ positive_savings = self.agent_cost_savings[self.agent_cost_savings > 0]
250
+ fairness_on_savings = self._compute_jains_index(positive_savings) if len(positive_savings) > 1 else 0.0
251
+ self.episode_metrics = {
252
+ "total_cost_savings": np.sum(self.agent_cost_savings),
253
+ "fairness_on_cost_savings": fairness_on_savings,
254
+ "battery_degradation_cost_total": self.cumulative_degradation_cost,
255
+ # ... other metrics ...
256
+ }
257
+
258
+ # 2. Select random day and load data
259
+ self.day_index = np.random.randint(0, self.total_days)
260
+ start_row = self.day_index * self.steps_per_day
261
+ end_row = start_row + self.steps_per_day
262
+ day_data = self.all_data.iloc[start_row:end_row].copy()
263
+ self.data = day_data
264
+
265
+ # 3. Process Demand and Solar into Vectorized Arrays
266
+ demand_list = []
267
+ solar_list = []
268
+ for hid in self.house_ids:
269
+ col_grid = f"grid_{hid}"
270
+ col_solar = f"total_solar_{hid}"
271
+ grid_series = day_data[col_grid].fillna(0.0)
272
+ solar_series = day_data[col_solar].fillna(0.0).clip(lower=0.0)
273
+ demand_array = grid_series.values + solar_series.values
274
+ demand_array = np.clip(demand_array, 0.0, None)
275
+ demand_list.append(demand_array)
276
+ solar_list.append(solar_series.values)
277
+
278
+ self.demands_day = np.stack(demand_list, axis=1).astype(np.float32)
279
+ self.solars_day = np.stack(solar_list, axis=1).astype(np.float32)
280
+ self.hours_day = (self.data.index.hour + self.data.index.minute / 60.0).values
281
+
282
+ self.no_p2p_import_day = np.stack(
283
+ [self.original_no_p2p_import[hid][start_row:end_row] for hid in self.house_ids], axis=1
284
+ )
285
+
286
+ # 4. Reset episode metrics and step counter
287
+ self.current_step = 0
288
+ self._initialize_episode_metrics()
289
+ self.previous_actions = np.zeros((self.num_agents, 6), dtype=np.float32)
290
+
291
+ # 5. Randomize battery SOC (30%–70%)
292
+ lows = 0.30 * self.battery_max_capacity
293
+ highs = 0.70 * self.battery_max_capacity
294
+ self.battery_soc = np.random.uniform(low=lows, high=highs)
295
+ self.battery_soc *= self.has_battery # Ensure non-battery homes remain zero
296
+
297
+ # 6. Return initial observation
298
+ obs = self._get_obs()
299
+ return obs, {}
300
+
301
+
302
+ def step(self, actions):
303
+ actions = np.clip(np.array(actions, dtype=np.float32), 0.0, 1.0)
304
+
305
+ a_sellGrid, a_buyGrid, a_sellPeers, a_buyPeers, a_chargeBatt, a_dischargeBatt = actions.T
306
+
307
+ demands = self.demands_day[self.current_step]
308
+ solars = self.solars_day[self.current_step]
309
+
310
+ # 1. Pricing
311
+ total_surplus = np.maximum(solars - demands, 0.0).sum()
312
+ total_shortfall = np.maximum(demands - solars, 0.0).sum()
313
+ peer_price = self.get_peer_price(self.current_step, total_surplus, total_shortfall)
314
+ grid_price = self.get_grid_price(self.current_step)
315
+ feed_in_tariff = self.feed_in_tariff
316
+
317
+ # Initial balances (self-use enforced first)
318
+ final_shortfall = np.maximum(demands - solars, 0.0)
319
+ final_surplus = np.maximum(solars - demands, 0.0)
320
+
321
+ # --- 2. VECTORIZED BATTERY DISCHARGE ---
322
+ available_from_batt = self.battery_soc * self.battery_discharge_efficiency
323
+ desired_discharge = a_dischargeBatt * self.battery_max_discharge_rate
324
+ discharge_amount = np.minimum.reduce([desired_discharge, available_from_batt, final_shortfall])
325
+ discharge_amount *= self.has_battery
326
+
327
+ # Update SOC and shortfall
328
+ self.battery_soc -= (discharge_amount / (self.battery_discharge_efficiency + 1e-9)) * self.has_battery
329
+ self.battery_soc = np.maximum(0.0, self.battery_soc)
330
+ final_shortfall -= discharge_amount
331
+
332
+ # --- 3. VECTORIZED BATTERY CHARGE ---
333
+ cap_left = self.battery_max_capacity - self.battery_soc
334
+ desired_charge = a_chargeBatt * self.battery_max_charge_rate
335
+ charge_limit = cap_left / (self.battery_charge_efficiency + 1e-9)
336
+ charge_amount = np.minimum.reduce([desired_charge, charge_limit, final_surplus])
337
+ charge_amount *= self.has_battery
338
+
339
+ # Update SOC and surplus
340
+ self.battery_soc += charge_amount * self.battery_charge_efficiency
341
+ final_surplus -= charge_amount
342
+
343
+ # --- 4. VECTORIZED P2P TRADING ---
344
+ battery_offer = (self.battery_soc * self.battery_discharge_efficiency) * self.has_battery
345
+ effective_surplus = final_surplus + battery_offer
346
+
347
+ netPeer = a_buyPeers - a_sellPeers
348
+ p2p_buy_request = np.maximum(0, netPeer) * final_shortfall
349
+ p2p_sell_offer = np.maximum(0, -netPeer) * effective_surplus
350
+
351
+ total_sell = np.sum(p2p_sell_offer)
352
+ total_buy = np.sum(p2p_buy_request)
353
+ matched = min(total_sell, total_buy)
354
+
355
+ if matched > 1e-9:
356
+ sell_fraction = p2p_sell_offer / (total_sell + 1e-12)
357
+ buy_fraction = p2p_buy_request / (total_buy + 1e-12)
358
+ actual_sold = matched * sell_fraction
359
+ actual_bought = matched * buy_fraction
360
+ else:
361
+ actual_sold = np.zeros(self.num_agents, dtype=np.float32)
362
+ actual_bought = np.zeros(self.num_agents, dtype=np.float32)
363
+
364
+ # Track energy source for sale
365
+ from_batt_p2p = np.minimum(actual_sold, battery_offer)
366
+ from_solar_p2p = actual_sold - from_batt_p2p
367
+
368
+ # Update balances
369
+ final_surplus -= from_solar_p2p
370
+ final_shortfall -= actual_bought
371
+
372
+ # Deduct peer battery sales from SOC
373
+ soc_reduction_p2p = (from_batt_p2p / (self.battery_discharge_efficiency + 1e-9)) * self.has_battery
374
+ self.battery_soc -= soc_reduction_p2p
375
+ self.battery_soc = np.maximum(0.0, self.battery_soc)
376
+
377
+ # --- 5. GRID TRADES ---
378
+ netGrid = a_buyGrid - a_sellGrid
379
+ grid_import = np.maximum(0, netGrid) * final_shortfall
380
+ grid_export = np.maximum(0, -netGrid) * final_surplus
381
+
382
+ # Any remaining shortfall must be imported (uncontrolled import)
383
+ forced_import = np.maximum(final_shortfall - grid_import, 0.0)
384
+ grid_import += forced_import
385
+
386
+ # --- 6. COSTS AND REWARDS ---
387
+ costs = (
388
+ (grid_import * grid_price)
389
+ - (grid_export * feed_in_tariff)
390
+ + (actual_bought * peer_price)
391
+ - (actual_sold * peer_price)
392
+ )
393
+
394
+ final_rewards = self._compute_rewards(
395
+ grid_import, grid_export, actual_sold, actual_bought,
396
+ charge_amount, discharge_amount, costs, grid_price, peer_price
397
+ )
398
+
399
+ # --- 7. Metric Logging ---
400
+ no_p2p_import_this_step = self.no_p2p_import_day[self.current_step]
401
+
402
+ step_grid_reduction = np.sum(no_p2p_import_this_step - grid_import)
403
+ self.cumulative_grid_reduction += step_grid_reduction
404
+ self.grid_reduction_timeseries.append(step_grid_reduction)
405
+ if grid_price >= self.max_grid_price * 0.99:
406
+ self.cumulative_grid_reduction_peak += step_grid_reduction
407
+
408
+ cost_no_p2p = no_p2p_import_this_step * grid_price
409
+ step_cost_savings_per_agent = cost_no_p2p - costs
410
+ self.agent_cost_savings += step_cost_savings_per_agent
411
+ self.cost_savings_timeseries.append(np.sum(step_cost_savings_per_agent))
412
+
413
+ degradation_cost_agent = (charge_amount + discharge_amount) * self.battery_degradation_cost
414
+ step_degradation_cost = np.sum(degradation_cost_agent)
415
+ self.cumulative_degradation_cost += step_degradation_cost
416
+ self.degradation_cost_timeseries.append(step_degradation_cost)
417
+
418
+ info = {
419
+ "p2p_buy": actual_bought, "p2p_sell": actual_sold,
420
+ "grid_import_with_p2p": grid_import, "grid_import_no_p2p": no_p2p_import_this_step,
421
+ "grid_export": grid_export, "costs": costs,
422
+ "charge_amount": charge_amount, "discharge_amount": discharge_amount,
423
+ "step": self.current_step, "agent_rewards": final_rewards,
424
+ }
425
+
426
+ # --- 8. Finalize Step ---
427
+ self.current_step += 1
428
+ done = (self.current_step >= self.num_steps)
429
+ obs_next = self._get_obs()
430
+
431
+ # Output required format for gym multi-agent environment
432
+ rewards_list = list(final_rewards)
433
+ return obs_next, rewards_list, done, info
434
+
435
+
436
+ def _get_obs(self):
437
+ step = min(self.current_step, self.num_steps - 1)
438
+ demands = self.demands_day[step]
439
+ solars = self.solars_day[step]
440
+
441
+ # Compute market aggregates
442
+ total_surplus = float(np.maximum(solars - demands, 0.0).sum())
443
+ total_shortfall = float(np.maximum(demands - solars, 0.0).sum())
444
+
445
+ grid_price = self.get_grid_price(step)
446
+ peer_price = self.get_peer_price(step, total_surplus, total_shortfall)
447
+ hour = self.hours_day[step]
448
+
449
+ # Compute SOC fraction for all agents (-1 for non-battery agents)
450
+ soc_frac = self.battery_soc / (self.battery_max_capacity + 1e-9)
451
+ soc_frac = np.where(self.has_battery == 1, soc_frac, -1.0)
452
+
453
+ # Vectorized Observation Construction
454
+ obs = np.stack([
455
+ demands,
456
+ solars,
457
+ soc_frac,
458
+ np.full(self.num_agents, grid_price),
459
+ np.full(self.num_agents, peer_price),
460
+ demands.sum() - demands, # Total demand of others
461
+ solars.sum() - solars, # Total solar of others
462
+ np.full(self.num_agents, hour)
463
+ ], axis=1).astype(np.float32)
464
+
465
+ return obs
466
+
467
+
468
+ def _compute_jains_index(self, usage_array):
469
+ """Simple Jain's Fairness Index."""
470
+ x = np.array(usage_array, dtype=np.float32)
471
+ numerator = (np.sum(x))**2
472
+ denominator = len(x) * np.sum(x**2) + 1e-8
473
+ return numerator / denominator
474
+
475
+
476
+ def _compute_rewards(
477
+ self, grid_import, grid_export, actual_sold, actual_bought,
478
+ charge_amount, discharge_amount, costs, grid_price, peer_price
479
+ ):
480
+ """Calculates the weighted, combined reward for all agents (vectorized)."""
481
+
482
+ # Weights (must match the hierarchical model's weights)
483
+ w1 = 0.3; w2 = 0.5; w3 = 0.5; w4 = 0.1; w5 = 0.05; w6 = 0.4; w7 = 1.0
484
+
485
+ # Jain's index on total P2P volume
486
+ jfi = self._compute_jains_index(actual_bought + actual_sold)
487
+
488
+ # Normalize prices
489
+ p_grid_norm = grid_price / self.max_grid_price
490
+ p_peer_norm = peer_price / self.max_grid_price
491
+
492
+ # Base reward: Negative costs (minimize expenditure)
493
+ rewards = -costs * w7
494
+
495
+ # 1. Grid import penalty (w1)
496
+ rewards -= w1 * grid_import * p_grid_norm
497
+
498
+ # 2. P2P sell bonus (w2)
499
+ rewards += w2 * actual_sold * p_peer_norm
500
+
501
+ # 3. P2P buy bonus (w3): only if peer price is better than grid price
502
+ buy_bonus_factor = (grid_price - peer_price) / self.max_grid_price
503
+ buy_bonus = w3 * actual_bought * buy_bonus_factor
504
+ rewards += np.where(peer_price < grid_price, buy_bonus, 0.0)
505
+
506
+ # 4. SOC deviation penalty (w4): only for agents with batteries
507
+ soc_frac = self.battery_soc / (self.battery_max_capacity + 1e-9)
508
+ soc_penalties = w4 * ((soc_frac - 0.5) ** 2) * self.has_battery
509
+ rewards -= soc_penalties
510
+
511
+ # 5. Battery degradation penalty (w5)
512
+ degrad_penalties = w5 * (charge_amount + discharge_amount) * self.battery_degradation_cost
513
+ rewards -= degrad_penalties
514
+
515
+ # 6. Fairness bonus (w6): applied equally to all agents in the cluster
516
+ rewards += w6 * jfi
517
+
518
+ return rewards
519
+
520
+
521
+ def get_episode_metrics(self):
522
+ """Return performance metrics for the last completed episode."""
523
+ return self.episode_metrics
Other_algorithms/HC_MAPPO/Environment/cluster_env_wrapper.py ADDED
@@ -0,0 +1,164 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gym
2
+ import numpy as np
3
+ import math
4
+ import sys
5
+ import os
6
+ import functools
7
+
8
+ import pandas as pd
9
+
10
+ # Ensure SolarSys Environement is on the Python path
11
+ # Please ensure you follow proper directory structure for running this code
12
+ sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
13
+ from Environment.solar_sys_environment import SolarSys
14
+
15
+
16
+ def form_clusters(metrics: dict, size: int) -> list:
17
+ """
18
+ Forms balanced, heterogeneous clusters by categorizing houses based on their
19
+ energy profile and distributing them evenly in a round-robin fashion.
20
+ """
21
+ house_ids = list(metrics.keys())
22
+ if not house_ids:
23
+ return []
24
+ all_consumption = [m['consumption'] for m in metrics.values()]
25
+ all_solar = [m['solar'] for m in metrics.values()]
26
+
27
+ median_consumption = np.median(all_consumption) if all_consumption else 0
28
+ median_solar = np.median(all_solar) if all_solar else 0
29
+
30
+ #Categorize each house based on its profile relative to the median
31
+ producers = [h for h in house_ids if metrics[h]['solar'] >= median_solar and metrics[h]['consumption'] < median_consumption]
32
+ consumers = [h for h in house_ids if metrics[h]['solar'] < median_solar and metrics[h]['consumption'] >= median_consumption]
33
+ prosumers = [h for h in house_ids if metrics[h]['solar'] >= median_solar and metrics[h]['consumption'] >= median_consumption]
34
+ neutrals = [h for h in house_ids if metrics[h]['solar'] < median_solar and metrics[h]['consumption'] < median_consumption]
35
+
36
+ # Create a master list ordered by category
37
+ sorted_categorized_houses = producers + consumers + prosumers + neutrals
38
+
39
+ # Add any houses that weren't categorized to ensure none are missed
40
+ categorized_set = set(sorted_categorized_houses)
41
+ uncategorized = [h for h in house_ids if h not in categorized_set]
42
+ final_house_list = sorted_categorized_houses + uncategorized
43
+ num_houses = len(house_ids)
44
+ num_clusters = math.ceil(num_houses / size)
45
+
46
+ clusters = [[] for _ in range(num_clusters)]
47
+
48
+ for i, house_id in enumerate(final_house_list):
49
+ target_cluster_idx = i % num_clusters
50
+ clusters[target_cluster_idx].append(house_id)
51
+
52
+ return clusters
53
+
54
+ class GlobalPriceVecEnvWrapper(gym.vector.VectorEnvWrapper):
55
+ def __init__(self, env, clusters: list):
56
+ super().__init__(env)
57
+ self.clusters = clusters
58
+ # Expose the underlying SolarSys environments for inspection by the coordinator
59
+ # self.env.envs gets the list of individual envs from the SyncVectorEnv
60
+ self.cluster_envs = self.env.envs
61
+
62
+ def step(self, actions: np.ndarray, exports: np.ndarray = None, imports: np.ndarray = None):
63
+ num_clusters = len(self.cluster_envs)
64
+ net_transfers = np.zeros(num_clusters)
65
+ if exports is not None and imports is not None:
66
+ net_transfers = imports - exports
67
+ batched_low_level_actions = actions
68
+ batched_transfers = net_transfers.reshape(-1, 1).astype(np.float32)
69
+ batched_prices = np.full((num_clusters, 1), -1.0, dtype=np.float32)
70
+ final_packed_actions_tuple = (batched_low_level_actions, batched_transfers, batched_prices)
71
+ obs_next, rewards, terminateds, truncateds, infos = self.env.step(final_packed_actions_tuple)
72
+ dones = terminateds | truncateds
73
+ done_all = dones.all()
74
+
75
+
76
+
77
+ if done_all:
78
+ final_infos = infos['final_info']
79
+ keys = final_infos[0].keys()
80
+ infos = {k: np.stack([info[k] for info in final_infos]) for k in keys}
81
+
82
+ info_agg = {
83
+ "cluster_dones": dones,
84
+ "cluster_infos": infos,
85
+ }
86
+
87
+ return obs_next, rewards, done_all, info_agg
88
+
89
+ def get_export_capacity(self, cluster_idx: int) -> float:
90
+ """Returns the total physically exportable energy from a cluster's batteries and solar in kWh."""
91
+ cluster_env = self.cluster_envs[cluster_idx]
92
+ available_from_batt = cluster_env.battery_soc * cluster_env.battery_discharge_efficiency
93
+ total_exportable = np.sum(available_from_batt) + cluster_env.current_solar
94
+ return float(total_exportable)
95
+
96
+ def get_import_capacity(self, cluster_idx: int) -> float:
97
+ """Returns the total physically importable space in a cluster's batteries in kWh."""
98
+ cluster_env = self.cluster_envs[cluster_idx]
99
+ free_space = cluster_env.battery_max_capacity - cluster_env.battery_soc
100
+ total_storable = np.sum(free_space)
101
+ return float(total_storable)
102
+
103
+ def send_energy(self, from_cluster_idx: int, amount: float) -> float:
104
+ """Drains 'amount' of energy from the specified cluster (batteries first, then solar)."""
105
+ cluster_env = self.cluster_envs[from_cluster_idx]
106
+ return cluster_env.send_energy(amount)
107
+
108
+ def receive_energy(self, to_cluster_idx: int, amount: float) -> float:
109
+ """Charges batteries in the specified cluster with 'amount' of energy."""
110
+ cluster_env = self.cluster_envs[to_cluster_idx]
111
+ return cluster_env.receive_energy(amount)
112
+
113
+
114
+ def make_vec_env(data_path: str, time_freq: str, cluster_size: int, state: str):
115
+ print("--- Pre-loading shared dataset for all environments ---")
116
+ try:
117
+ shared_df = pd.read_csv(data_path)
118
+ shared_df["local_15min"] = pd.to_datetime(shared_df["local_15min"], utc=True)
119
+ shared_df.set_index("local_15min", inplace=True)
120
+
121
+ # ADD THIS LINE
122
+ shared_df = shared_df.resample(time_freq).mean()
123
+ # ADD THIS LINE
124
+
125
+ except Exception as e:
126
+ raise ValueError(f"Failed to pre-load data in make_vec_env: {e}")
127
+
128
+ base_env_for_metrics = SolarSys(
129
+ data_path=data_path,
130
+ time_freq=time_freq,
131
+ preloaded_data=shared_df, # Pass the shared DataFrame here
132
+ state=state
133
+ )
134
+
135
+ # This part for calculating metrics and forming clusters
136
+ metrics = {}
137
+ for hid in base_env_for_metrics.house_ids:
138
+ total_consumption = float(
139
+ np.clip(base_env_for_metrics.original_no_p2p_import[hid], 0.0, None).sum()
140
+ )
141
+ total_solar = float(
142
+ base_env_for_metrics.all_data[f"total_solar_{hid}"].clip(lower=0.0).sum()
143
+ )
144
+ metrics[hid] = {'consumption': total_consumption, 'solar': total_solar}
145
+
146
+ clusters = form_clusters(metrics, cluster_size)
147
+ print(f"Formed {len(clusters)} clusters of size up to {cluster_size}.")
148
+
149
+ # functools.partial to create environment
150
+ env_fns = []
151
+ for cluster_house_ids in clusters:
152
+ preset_env_fn = functools.partial(
153
+ SolarSys,
154
+ data_path=data_path,
155
+ time_freq=time_freq,
156
+ house_ids_in_cluster=cluster_house_ids,
157
+ preloaded_data=shared_df,
158
+ state=state
159
+ )
160
+ env_fns.append(preset_env_fn)
161
+ sync_vec_env = gym.vector.SyncVectorEnv(env_fns)
162
+ wrapped_vec_env = GlobalPriceVecEnvWrapper(sync_vec_env, clusters=clusters)
163
+
164
+ return wrapped_vec_env
Other_algorithms/HC_MAPPO/Environment/solar_sys_environment.py ADDED
@@ -0,0 +1,673 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gym
2
+ import pandas as pd
3
+ import numpy as np
4
+ from collections import deque
5
+ import random
6
+ from gym.spaces import Tuple, Box
7
+
8
+ random.seed(42)
9
+ np.random.seed(42)
10
+
11
+ class SolarSys(gym.Env):
12
+
13
+ def __init__(
14
+ self,
15
+ data_path="DATA/training/25houses_152days_TRAIN.csv",
16
+ state="", # Select from 'oklahoma', 'colorado', 'pennsylvania'
17
+ time_freq="15T",
18
+ house_ids_in_cluster=None,
19
+ preloaded_data=None
20
+
21
+ ):
22
+
23
+ super().__init__() # initialize parent gym.Env
24
+ self.state = state.lower()
25
+
26
+ # --- Centralized Pricing Configuration ---
27
+ self._pricing_info = {
28
+ "oklahoma": {
29
+ "max_grid_price": 0.2112,
30
+ "feed_in_tariff": 0.04,
31
+ "price_function": self._get_oklahoma_price
32
+ },
33
+ "colorado": {
34
+ "max_grid_price": 0.32,
35
+ "feed_in_tariff": 0.055,
36
+ "price_function": self._get_colorado_price
37
+ },
38
+ "pennsylvania": {
39
+ "max_grid_price": 0.5505,
40
+ "feed_in_tariff": 0.06,
41
+ "price_function": self._get_pennsylvania_price
42
+ }
43
+ }
44
+
45
+ if self.state not in self._pricing_info:
46
+ raise ValueError(f"State '{self.state}' is not supported. Available states: {list(self._pricing_info.keys())}")
47
+
48
+ state_config = self._pricing_info[self.state]
49
+ self.max_grid_price = state_config["max_grid_price"]
50
+ self.feed_in_tariff = state_config["feed_in_tariff"]
51
+ self._get_price_function = state_config["price_function"]
52
+ self.data_path = data_path
53
+ self.time_freq = time_freq
54
+ if preloaded_data is not None:
55
+ all_data = preloaded_data
56
+ if house_ids_in_cluster:
57
+ print(f"Using pre-loaded data for cluster with {len(house_ids_in_cluster)} houses.")
58
+ else:
59
+ print(f"Loading data from {data_path}...")
60
+ try:
61
+ all_data = pd.read_csv(data_path)
62
+ all_data["local_15min"] = pd.to_datetime(all_data["local_15min"], utc=True)
63
+ all_data.set_index("local_15min", inplace=True)
64
+
65
+ except FileNotFoundError:
66
+ raise FileNotFoundError(f"Data file {data_path} not found.")
67
+ except pd.errors.EmptyDataError:
68
+ raise ValueError(f"Data file {data_path} is empty.")
69
+ except Exception as e:
70
+ raise ValueError(f"Error loading data: {e}")
71
+
72
+
73
+ # Compute global maxima for normalization
74
+ grid_cols = [c for c in all_data.columns if c.startswith("grid_")]
75
+ solar_cols = [c for c in all_data.columns if c.startswith("total_solar_")]
76
+ all_grid = all_data[grid_cols].values
77
+ all_solar = all_data[solar_cols].values
78
+
79
+ # max total demand = max(grid + solar) over all time & agents
80
+ self.global_max_demand = float((all_grid + all_solar).max()) + 1e-8
81
+
82
+ # max solar generation alone
83
+ self.global_max_solar = float(all_solar.max()) + 1e-8
84
+
85
+ # Store the resampled dataset
86
+ self.all_data = all_data
87
+ all_house_ids_in_file = [
88
+ col.split("_")[1] for col in self.all_data.columns
89
+ if col.startswith("grid_")
90
+ ]
91
+ if house_ids_in_cluster:
92
+ self.house_ids = [hid for hid in house_ids_in_cluster if hid in all_house_ids_in_file]
93
+ else:
94
+ self.house_ids = all_house_ids_in_file
95
+
96
+ if not self.house_ids:
97
+ raise ValueError("No valid house_ids found for this environment instance.")
98
+
99
+ self.env_log_infos = []
100
+
101
+ self.time_freq = time_freq
102
+ freq_offset = pd.tseries.frequencies.to_offset(time_freq)
103
+ minutes_per_step = freq_offset.nanos / 1e9 / 60.0
104
+ self.steps_per_day = int(24 * 60 // minutes_per_step)
105
+
106
+ total_rows = len(self.all_data)
107
+ self.total_days = total_rows // self.steps_per_day
108
+ if self.total_days < 1:
109
+ raise ValueError(
110
+ f"After resampling, dataset has {total_rows} rows, which is "
111
+ f"less than a single day of {self.steps_per_day} steps."
112
+ )
113
+
114
+ self.num_agents = len(self.house_ids)
115
+ self.original_no_p2p_import = {}
116
+ for hid in self.house_ids:
117
+ col_grid = f"grid_{hid}"
118
+ self.original_no_p2p_import[hid] = self.all_data[col_grid].clip(lower=0.0).values
119
+ solar_cols = [f"total_solar_{hid}" for hid in self.house_ids]
120
+ solar_sums = self.all_data[solar_cols].sum(axis=0).to_dict()
121
+ self.agent_groups = [
122
+ 1 if solar_sums[f"total_solar_{hid}"] > 0 else 0
123
+ for hid in self.house_ids
124
+ ]
125
+
126
+ self.group_counts = {
127
+ 0: self.agent_groups.count(0),
128
+ 1: self.agent_groups.count(1)
129
+ }
130
+ print(f"Number of houses in each group: {self.group_counts}")
131
+
132
+ #battery logic
133
+ self.battery_options = {
134
+ "teslapowerwall": {"max_capacity": 13.5, "charge_efficiency": 0.95, "discharge_efficiency": 0.90, "max_charge_rate": 5.0, "max_discharge_rate": 5.0, "degradation_cost_per_kwh": 0.005},
135
+ "enphase": {"max_capacity": 5.0, "charge_efficiency": 0.95, "discharge_efficiency": 0.90, "max_charge_rate": 2.0, "max_discharge_rate": 2.0, "degradation_cost_per_kwh": 0.005},
136
+ "franklin": {"max_capacity": 15.0, "charge_efficiency": 0.95, "discharge_efficiency": 0.90, "max_charge_rate": 6.0, "max_discharge_rate": 6.0, "degradation_cost_per_kwh": 0.005},
137
+ }
138
+ self.solar_houses = [
139
+ hid for hid in self.house_ids
140
+ if (self.all_data[f"total_solar_{hid}"] > 0).any()
141
+ ]
142
+
143
+ self.batteries = {}
144
+ for hid in self.solar_houses:
145
+ choice = random.choice(list(self.battery_options))
146
+ specs = self.battery_options[choice]
147
+ self.batteries[hid] = {"soc": 0.0, **specs}
148
+
149
+ self.battery_charge_history = {hid: [] for hid in self.batteries}
150
+ self.battery_discharge_history = {hid: [] for hid in self.batteries}
151
+ self.battery_capacity = sum(b["max_capacity"] for b in self.batteries.values())
152
+ self.battery_level = sum(b["soc"] for b in self.batteries.values())
153
+ self.current_solar = 0.0
154
+ self.has_battery = np.array([1 if hid in self.batteries else 0 for hid in self.house_ids], dtype=np.float32)
155
+
156
+ # Initialize arrays for all agents, with zeros for non-battery agents
157
+ self.battery_soc = np.zeros(self.num_agents, dtype=np.float32)
158
+ self.battery_max_capacity = np.zeros(self.num_agents, dtype=np.float32)
159
+ self.battery_charge_efficiency = np.zeros(self.num_agents, dtype=np.float32)
160
+ self.battery_discharge_efficiency = np.zeros(self.num_agents, dtype=np.float32)
161
+ self.battery_max_charge_rate = np.zeros(self.num_agents, dtype=np.float32)
162
+ self.battery_max_discharge_rate = np.zeros(self.num_agents, dtype=np.float32)
163
+ self.battery_degradation_cost = np.zeros(self.num_agents, dtype=np.float32)
164
+
165
+ # Populate the arrays using the created battery dictionary
166
+ for i, hid in enumerate(self.house_ids):
167
+ if hid in self.batteries:
168
+ batt = self.batteries[hid]
169
+ self.battery_max_capacity[i] = batt["max_capacity"]
170
+ self.battery_charge_efficiency[i] = batt["charge_efficiency"]
171
+ self.battery_discharge_efficiency[i] = batt["discharge_efficiency"]
172
+ self.battery_max_charge_rate[i] = batt["max_charge_rate"]
173
+ self.battery_max_discharge_rate[i] = batt["max_discharge_rate"]
174
+ self.battery_degradation_cost[i] = batt["degradation_cost_per_kwh"]
175
+
176
+
177
+ # ========== SPACES (Observation & Action) ===================================
178
+ self.observation_space = gym.spaces.Box(
179
+ low=-np.inf, high=np.inf,
180
+ shape=(self.num_agents, 8),
181
+ dtype=np.float32
182
+ )
183
+ self.action_space = Tuple((
184
+ Box(low=0.0, high=1.0, shape=(self.num_agents, 6), dtype=np.float32),
185
+ Box(low=-np.inf, high=np.inf, shape=(1,), dtype=np.float32),
186
+ Box(low=-1.0, high=np.inf, shape=(1,), dtype=np.float32)
187
+ ))
188
+
189
+ # ========== REWARD FUNCTION PARAMETERS ======================================
190
+ self.data = None
191
+ self.env_log = []
192
+ self.day_index = -1
193
+ self.current_step = 0
194
+ self.num_steps = self.steps_per_day
195
+ self.demands = {}
196
+ self.solars = {}
197
+ self.previous_actions = {
198
+ hid: np.zeros(6) for hid in self.house_ids
199
+ }
200
+ self._initialize_episode_metrics()
201
+
202
+ def get_grid_price(self, step_idx):
203
+ """
204
+ Returns the grid price for the current step based on the selected state.
205
+ """
206
+ return self._get_price_function(step_idx)
207
+
208
+ def _get_oklahoma_price(self, step_idx):
209
+ minutes_per_step = 24 * 60 / self.steps_per_day
210
+ hour = int((step_idx * minutes_per_step) // 60) % 24
211
+ if 14 <= hour < 19:
212
+ return 0.2112
213
+ else:
214
+ return 0.0434
215
+
216
+ def _get_colorado_price(self, step_idx):
217
+ minutes_per_step = 24 * 60 / self.steps_per_day
218
+ hour = int((step_idx * minutes_per_step) // 60) % 24
219
+ if 15 <= hour < 19:
220
+ return 0.32
221
+ elif 13 <= hour < 15:
222
+ return 0.22
223
+ else:
224
+ return 0.12
225
+
226
+ def _get_pennsylvania_price(self, step_idx):
227
+ minutes_per_step = 24 * 60 / self.steps_per_day
228
+ hour = int((step_idx * minutes_per_step) // 60) % 24
229
+ if 13 <= hour < 21:
230
+ return 0.125048
231
+ elif hour >= 23 or hour < 6:
232
+ return 0.057014
233
+ else:
234
+ return 0.079085
235
+
236
+ def get_peer_price(self, step_idx, total_surplus, total_shortfall):
237
+ grid_price = self.get_grid_price(step_idx)
238
+ feed_in_tariff = self.feed_in_tariff
239
+
240
+ # Parameters for arctangent-log pricing
241
+ p_balance = (grid_price * 0.80) + (feed_in_tariff * 0.20)
242
+ p_con = (grid_price - feed_in_tariff) / (1.5 * np.pi)
243
+ k = 1.5
244
+ epsilon = 1e-6
245
+ supply = total_surplus + epsilon
246
+ demand = total_shortfall + epsilon
247
+
248
+ ratio = demand / supply
249
+ log_ratio = np.log(ratio)
250
+ if log_ratio < 0:
251
+ power_term = - (np.abs(log_ratio) ** k)
252
+ else:
253
+ power_term = log_ratio ** k
254
+
255
+ price_offset = 2 * np.pi * p_con * np.arctan(power_term)
256
+
257
+ peer_price = p_balance + price_offset
258
+
259
+ final_price = float(np.clip(peer_price, feed_in_tariff, grid_price))
260
+
261
+ return final_price
262
+
263
+
264
+ def _initialize_episode_metrics(self):
265
+ """Initializes or resets all metrics tracked over a single episode (day)."""
266
+ self.cumulative_grid_reduction = 0.0
267
+ self.cumulative_grid_reduction_peak = 0.0
268
+ self.cumulative_degradation_cost = 0.0
269
+ self.agent_cost_savings = np.zeros(self.num_agents)
270
+ self.degradation_cost_timeseries = []
271
+ self.cost_savings_timeseries = []
272
+ self.grid_reduction_timeseries = []
273
+
274
+ def get_episode_metrics(self):
275
+ """
276
+ Returns a dictionary of performance metrics for the last completed episode.
277
+ """
278
+ return self.episode_metrics
279
+
280
+ ##########################################################################
281
+ # Gym Required Methods
282
+
283
+ def reset(self):
284
+ if self.current_step > 0:
285
+ positive_savings = self.agent_cost_savings[self.agent_cost_savings > 0]
286
+ if len(positive_savings) > 1:
287
+ fairness_on_savings = self._compute_jains_index(positive_savings)
288
+ else:
289
+ fairness_on_savings = 0.0
290
+
291
+ self.episode_metrics = {
292
+ "grid_reduction_entire_day": self.cumulative_grid_reduction,
293
+ "grid_reduction_peak_hours": self.cumulative_grid_reduction_peak,
294
+ "total_cost_savings": np.sum(self.agent_cost_savings),
295
+ "fairness_on_cost_savings": fairness_on_savings,
296
+ "battery_degradation_cost_total": self.cumulative_degradation_cost,
297
+ "degradation_cost_over_time": self.degradation_cost_timeseries,
298
+ "cost_savings_over_time": self.cost_savings_timeseries,
299
+ "grid_reduction_over_time": self.grid_reduction_timeseries,
300
+ }
301
+ self.day_index = np.random.randint(0, self.total_days)
302
+
303
+ start_row = self.day_index * self.steps_per_day
304
+ end_row = start_row + self.steps_per_day
305
+ day_data = self.all_data.iloc[start_row:end_row].copy()
306
+ self.data = day_data
307
+
308
+ self.no_p2p_import_day = {}
309
+ for hid in self.house_ids:
310
+ self.no_p2p_import_day[hid] = self.original_no_p2p_import[hid][start_row:end_row]
311
+
312
+ demand_list = []
313
+ solar_list = []
314
+ for hid in self.house_ids:
315
+ col_grid = f"grid_{hid}"
316
+ col_solar = f"total_solar_{hid}"
317
+
318
+ grid_series = day_data[col_grid].fillna(0.0)
319
+ solar_series = day_data[col_solar].fillna(0.0).clip(lower=0.0)
320
+
321
+ demand_array = grid_series.values + solar_series.values
322
+ demand_array = np.clip(demand_array, 0.0, None)
323
+
324
+ demand_list.append(demand_array)
325
+ solar_list.append(solar_series.values)
326
+
327
+ self.demands_day = np.stack(demand_list, axis=1).astype(np.float32)
328
+ self.solars_day = np.stack(solar_list, axis=1).astype(np.float32)
329
+
330
+ self.hours_day = (self.data.index.hour + self.data.index.minute / 60.0).values
331
+
332
+ self.current_step = 0
333
+ self.env_log = []
334
+ for hid in self.house_ids:
335
+ self.previous_actions[hid] = np.zeros(6)
336
+
337
+ lows = 0.30 * self.battery_max_capacity
338
+ highs = 0.70 * self.battery_max_capacity
339
+
340
+ self.battery_soc = np.random.uniform(low=lows, high=highs)
341
+ self.battery_soc *= self.has_battery
342
+
343
+ initial_demands = self.demands_day[0]
344
+ initial_solars = self.solars_day[0]
345
+ initial_surplus = np.maximum(initial_solars - initial_demands, 0.0).sum()
346
+ initial_shortfall = np.maximum(initial_demands - initial_solars, 0.0).sum()
347
+ initial_peer_price = self.get_peer_price(0, initial_surplus, initial_shortfall)
348
+
349
+ obs = self._get_obs(peer_price=initial_peer_price)
350
+
351
+ self._initialize_episode_metrics()
352
+
353
+ return obs, {}
354
+
355
+ def step(self, packed_action):
356
+ actions, transfer_kwh_arr, peer_price_arr = packed_action
357
+ inter_cluster_transfer_kwh = float(transfer_kwh_arr[0])
358
+ override_peer_price_val = float(peer_price_arr[0])
359
+
360
+ override_peer_price = override_peer_price_val if override_peer_price_val >= 0 else None
361
+
362
+ actions = np.array(actions, dtype=np.float32)
363
+ if actions.shape != (self.num_agents, 6):
364
+ raise ValueError(f"Actions shape mismatch: got {actions.shape}, expected {(self.num_agents, 6)}")
365
+ actions = np.clip(actions, 0.0, 1.0)
366
+
367
+ a_sellGrid = actions[:, 0]
368
+ a_buyGrid = actions[:, 1]
369
+ a_sellPeers = actions[:, 2]
370
+ a_buyPeers = actions[:, 3]
371
+ a_chargeBatt = actions[:, 4]
372
+ a_dischargeBatt = actions[:, 5]
373
+
374
+
375
+ demands = self.demands_day[self.current_step]
376
+ solars = self.solars_day[self.current_step]
377
+
378
+ total_surplus = np.maximum(solars - demands, 0.0).sum()
379
+ total_shortfall = np.maximum(demands - solars, 0.0).sum()
380
+ self.current_solar = total_surplus
381
+
382
+ if override_peer_price is not None:
383
+ peer_price = override_peer_price
384
+ else:
385
+ peer_price = self.get_peer_price(
386
+ self.current_step,
387
+ total_surplus,
388
+ total_shortfall
389
+ )
390
+
391
+ grid_price = self.get_grid_price(self.current_step)
392
+
393
+ shortfall = np.maximum(demands - solars, 0.0)
394
+ surplus = np.maximum(solars - demands, 0.0)
395
+
396
+ final_shortfall = shortfall.copy()
397
+ final_surplus = surplus.copy()
398
+ grid_import = np.zeros(self.num_agents, dtype=np.float32)
399
+ grid_export = np.zeros(self.num_agents, dtype=np.float32)
400
+
401
+ # ### VECTORIZED BATTERY DISCHARGE ###
402
+ available_from_batt = self.battery_soc * self.battery_discharge_efficiency
403
+ desired_discharge = a_dischargeBatt * self.battery_max_discharge_rate
404
+ discharge_amount = np.minimum.reduce([desired_discharge, available_from_batt, final_shortfall])
405
+ discharge_amount *= self.has_battery # Ensure only batteries discharge
406
+
407
+ # Update SOC (energy drawn from battery before efficiency loss)
408
+ self.battery_soc -= (discharge_amount / (self.battery_discharge_efficiency + 1e-9)) * self.has_battery
409
+ self.battery_soc = np.maximum(0.0, self.battery_soc)
410
+ final_shortfall -= discharge_amount
411
+
412
+ cap_left = self.battery_max_capacity - self.battery_soc
413
+ desired_charge = a_chargeBatt * self.battery_max_charge_rate
414
+ charge_amount = np.minimum.reduce([
415
+ desired_charge,
416
+ cap_left / (self.battery_charge_efficiency + 1e-9),
417
+ final_surplus
418
+ ])
419
+ charge_amount *= self.has_battery
420
+
421
+ # Update SOC
422
+ self.battery_soc += charge_amount * self.battery_charge_efficiency
423
+ final_surplus -= charge_amount
424
+
425
+
426
+
427
+ # ### VECTORIZED P2P TRADING ###
428
+ battery_offer = (self.battery_soc * self.battery_discharge_efficiency) * self.has_battery
429
+ effective_surplus = final_surplus + battery_offer
430
+
431
+ netPeer = a_buyPeers - a_sellPeers
432
+ p2p_buy_request = np.maximum(0, netPeer) * final_shortfall
433
+ p2p_sell_offer = np.maximum(0, -netPeer) * effective_surplus
434
+
435
+ total_sell = np.sum(p2p_sell_offer)
436
+ total_buy = np.sum(p2p_buy_request)
437
+ matched = min(total_sell, total_buy)
438
+
439
+ if matched > 1e-9:
440
+ sell_fraction = p2p_sell_offer / (total_sell + 1e-12)
441
+ buy_fraction = p2p_buy_request / ( total_buy + 1e-12)
442
+ actual_sold = matched * sell_fraction
443
+ actual_bought = matched * buy_fraction
444
+ else:
445
+ actual_sold = np.zeros(self.num_agents, dtype=np.float32)
446
+ actual_bought = np.zeros(self.num_agents, dtype=np.float32)
447
+
448
+
449
+ from_batt = np.minimum(actual_sold, battery_offer)
450
+ from_solar = actual_sold - from_batt
451
+
452
+ final_surplus -= from_solar
453
+
454
+ final_shortfall -= actual_bought
455
+ soc_reduction = (from_batt / (self.battery_discharge_efficiency + 1e-9)) * self.has_battery
456
+ self.battery_soc -= soc_reduction
457
+ self.battery_soc = np.maximum(0.0, self.battery_soc)
458
+
459
+
460
+ if inter_cluster_transfer_kwh > 0:
461
+ amount_received = inter_cluster_transfer_kwh
462
+
463
+
464
+ total_shortfall_in_cluster = np.sum(final_shortfall)
465
+ if total_shortfall_in_cluster > 1e-6:
466
+
467
+ to_cover_shortfall = min(amount_received, total_shortfall_in_cluster)
468
+ distribution_ratio = final_shortfall / total_shortfall_in_cluster
469
+ shortfall_reduction = distribution_ratio * to_cover_shortfall
470
+ final_shortfall -= shortfall_reduction
471
+
472
+ amount_received -= to_cover_shortfall
473
+
474
+ if amount_received > 1e-6:
475
+
476
+ cap_left = self.battery_max_capacity - self.battery_soc
477
+ storable_energy = cap_left / (self.battery_charge_efficiency + 1e-9)
478
+ total_storable_in_cluster = np.sum(storable_energy * self.has_battery)
479
+
480
+ if total_storable_in_cluster > 1e-6:
481
+
482
+ to_store = min(amount_received, total_storable_in_cluster)
483
+
484
+
485
+ storage_ratio = storable_energy / total_storable_in_cluster
486
+ energy_to_store_per_batt = storage_ratio * to_store
487
+
488
+
489
+ self.battery_soc += (energy_to_store_per_batt * self.battery_charge_efficiency) * self.has_battery
490
+
491
+ elif inter_cluster_transfer_kwh < 0:
492
+ amount_to_send = abs(inter_cluster_transfer_kwh)
493
+
494
+
495
+ total_surplus_in_cluster = np.sum(final_surplus)
496
+ if total_surplus_in_cluster > 1e-6:
497
+
498
+ sent_from_surplus = min(amount_to_send, total_surplus_in_cluster)
499
+ draw_ratio = final_surplus / total_surplus_in_cluster
500
+ surplus_reduction = draw_ratio * sent_from_surplus
501
+ final_surplus -= surplus_reduction
502
+ amount_to_send -= sent_from_surplus
503
+
504
+
505
+ if amount_to_send > 1e-6:
506
+
507
+ available_from_batt = (self.battery_soc * self.battery_discharge_efficiency) * self.has_battery
508
+ total_available_from_batt = np.sum(available_from_batt)
509
+
510
+ if total_available_from_batt > 1e-6:
511
+ # Discharge a maximum of 'amount_to_send' from batteries
512
+ to_discharge = min(amount_to_send, total_available_from_batt)
513
+
514
+ # Draw this amount proportionally from each available battery
515
+ discharge_ratio = available_from_batt / total_available_from_batt
516
+ discharged_per_batt = discharge_ratio * to_discharge # This is effective energy
517
+
518
+ # Update SoC (energy drawn from battery before efficiency loss)
519
+ soc_reduction = (discharged_per_batt / (self.battery_discharge_efficiency + 1e-9))
520
+ self.battery_soc -= soc_reduction * self.has_battery
521
+ self.battery_soc = np.maximum(0.0, self.battery_soc)
522
+ # =======================================================================
523
+
524
+ netGrid = a_buyGrid - a_sellGrid
525
+ grid_import = np.maximum(0, netGrid) * final_shortfall
526
+ grid_export = np.maximum(0, -netGrid) * final_surplus
527
+
528
+ forced = np.maximum(final_shortfall - grid_import, 0.0)
529
+ grid_import += forced
530
+ final_shortfall -= forced
531
+
532
+ feed_in_tariff = self.feed_in_tariff
533
+ costs = (
534
+ (grid_import * grid_price)
535
+ - (grid_export * feed_in_tariff)
536
+ + (actual_bought * peer_price)
537
+ - (actual_sold * peer_price)
538
+ )
539
+
540
+ final_rewards = self._compute_rewards(
541
+ grid_import=grid_import, grid_export=grid_export, actual_sold=actual_sold,
542
+ actual_bought=actual_bought, charge_amount=charge_amount, discharge_amount=discharge_amount,
543
+ costs=costs, grid_price=grid_price, peer_price=peer_price
544
+ )
545
+
546
+ no_p2p_import_this_step = np.array([
547
+ self.no_p2p_import_day[hid][self.current_step]
548
+ for hid in self.house_ids
549
+ ], dtype=np.float32)
550
+
551
+
552
+ # --- Metric 1 & 2: Grid Reduction (Entire Day & Peak Hours) ---
553
+ step_grid_reduction = np.sum(no_p2p_import_this_step - grid_import)
554
+ self.cumulative_grid_reduction += step_grid_reduction
555
+ self.grid_reduction_timeseries.append(step_grid_reduction)
556
+
557
+ if grid_price >= self.max_grid_price * 0.99:
558
+ self.cumulative_grid_reduction_peak += step_grid_reduction
559
+
560
+ # --- Metric 3: Total Cost Savings ---
561
+ cost_no_p2p = no_p2p_import_this_step * grid_price
562
+ step_cost_savings_per_agent = cost_no_p2p - costs
563
+ self.agent_cost_savings += step_cost_savings_per_agent
564
+ self.cost_savings_timeseries.append(np.sum(step_cost_savings_per_agent))
565
+
566
+ # --- Metric 5 & 6: Battery Degradation Cost (Total and Over Time) ---
567
+ degradation_cost_agent = (charge_amount + discharge_amount) * self.battery_degradation_cost
568
+ step_degradation_cost = np.sum(degradation_cost_agent)
569
+
570
+ self.cumulative_degradation_cost += step_degradation_cost
571
+ self.degradation_cost_timeseries.append(step_degradation_cost)
572
+
573
+ info = {
574
+ "p2p_buy": actual_bought,
575
+ "p2p_sell": actual_sold,
576
+ "grid_import_with_p2p": grid_import,
577
+ "grid_import_no_p2p": no_p2p_import_this_step,
578
+ "grid_export": grid_export,
579
+ "costs": costs,
580
+ "charge_amount": charge_amount,
581
+ "discharge_amount": discharge_amount,
582
+ "step": self.current_step,
583
+ "step_grid_reduction": step_grid_reduction,
584
+ "step_cost_savings": np.sum(step_cost_savings_per_agent),
585
+ "step_degradation_cost": step_degradation_cost,
586
+ }
587
+
588
+ self.env_log.append([
589
+ self.current_step, np.sum(grid_import), np.sum(grid_export),
590
+ np.sum(actual_bought), np.sum(actual_sold), np.sum(costs)
591
+ ])
592
+
593
+ self.current_step += 1
594
+
595
+ terminated = False
596
+ truncated = (self.current_step >= self.num_steps)
597
+
598
+ obs_next = self._get_obs(peer_price=peer_price)
599
+ info['agent_rewards'] = final_rewards
600
+ self.last_info = info
601
+ self.env_log_infos.append(info)
602
+ return obs_next, final_rewards.sum(), terminated, truncated, info
603
+
604
+
605
+
606
+ def _get_obs(self, peer_price: float):
607
+ step = min(self.current_step, self.num_steps - 1)
608
+ demands = self.demands_day[step]
609
+ solars = self.solars_day[step]
610
+ grid_price = self.get_grid_price(step)
611
+ hour = self.hours_day[step]
612
+ soc_frac = self.battery_soc / (self.battery_max_capacity + 1e-9)
613
+ soc_frac = np.where(self.has_battery == 1, soc_frac, -1.0)
614
+ total_demand_others = demands.sum() - demands
615
+ total_solar_others = solars.sum() - solars
616
+
617
+ obs = np.stack([
618
+ demands,
619
+ solars,
620
+ soc_frac,
621
+ np.full(self.num_agents, grid_price),
622
+ np.full(self.num_agents, peer_price),
623
+ total_demand_others,
624
+ total_solar_others,
625
+ np.full(self.num_agents, hour)
626
+ ], axis=1).astype(np.float32)
627
+
628
+ return obs
629
+
630
+
631
+ def _compute_jains_index(self, usage_array):
632
+ x = np.array(usage_array, dtype=np.float32)
633
+ numerator = (np.sum(x))**2
634
+ denominator = len(x) * np.sum(x**2) + 1e-8
635
+ return numerator / denominator
636
+
637
+
638
+ def _compute_rewards(
639
+ self, grid_import, grid_export, actual_sold, actual_bought,
640
+ charge_amount, discharge_amount, costs, grid_price, peer_price
641
+ ):
642
+
643
+ w1 = 0.3; w2 = 0.5; w3 = 0.5; w4 = 0.1; w5 = 0.05; w6 = 0.4; w7 = 1.0
644
+
645
+ p_grid_norm = grid_price / self.max_grid_price
646
+ p_peer_norm = peer_price / self.max_grid_price
647
+
648
+ rewards = -costs * w7
649
+ rewards -= w1 * grid_import * p_grid_norm
650
+ rewards += w2 * actual_sold * p_peer_norm
651
+ buy_bonus = w3 * actual_bought * ((grid_price - peer_price) / self.max_grid_price)
652
+ rewards += np.where(peer_price < grid_price, buy_bonus, 0.0)
653
+
654
+ # ### VECTORIZED REWARD PENALTIES ###
655
+ soc_frac = self.battery_soc / (self.battery_max_capacity + 1e-9)
656
+ soc_penalties = w4 * ((soc_frac - 0.5) ** 2) * self.has_battery
657
+ degrad_penalties = w5 * (charge_amount + discharge_amount) * self.battery_degradation_cost
658
+
659
+ rewards -= soc_penalties
660
+ rewards -= degrad_penalties
661
+
662
+ jfi = self._compute_jains_index(actual_bought + actual_sold)
663
+ rewards += w6 * jfi
664
+ return rewards
665
+
666
+ def save_log(self, filename="env_log.csv"):
667
+ columns = [
668
+ "Step", "Total_Grid_Import", "Total_Grid_Export",
669
+ "Total_P2P_Buy", "Total_P2P_Sell", "Total_Cost",
670
+ ]
671
+ df = pd.DataFrame(self.env_log, columns=columns)
672
+ df.to_csv(filename, index=False)
673
+ print(f"Environment log saved to {filename}")
Other_algorithms/HC_MAPPO/HC_MAPPO_evaluation.py ADDED
@@ -0,0 +1,496 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import sys
3
+ import time
4
+ from datetime import datetime
5
+ import re
6
+ import numpy as np
7
+ import torch
8
+ import pandas as pd
9
+ import matplotlib.pyplot as plt
10
+ import glob
11
+
12
+ # Allow imports from project root
13
+ sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
14
+
15
+ from cluster import InterClusterCoordinator, InterClusterLedger
16
+ from Environment.cluster_env_wrapper import make_vec_env
17
+ from mappo.trainer.mappo import MAPPO
18
+ # Removed: from meanfield.trainer.meanfield import MFAC (Assuming you switched Inter-Agent to MAPPO)
19
+
20
+ def compute_jains_fairness(values: np.ndarray) -> float:
21
+ """Compute Jain's fairness index."""
22
+ if len(values) == 0:
23
+ return 0.0
24
+ if np.all(values == 0):
25
+ return 1.0
26
+ num = (values.sum())**2
27
+ den = len(values) * (values**2).sum() + 1e-8
28
+ return float(num / den)
29
+
30
+
31
+ def main():
32
+ # Configuration Parameters
33
+ # --- GENERALIZED PATHS AND NAMES ---
34
+ DATA_PATH = "./data/testing/test_data.csv"
35
+ MODEL_DIR = "./training_models/hierarchical_region_a_500agents_10size_final/models"
36
+
37
+ # Auto-detect state from model path
38
+ state_match = re.search(r"hierarchical_(region_a|region_b|region_c)_", MODEL_DIR)
39
+ if not state_match:
40
+ state_match = re.search(r"mappo_(region_a|region_b|region_c)_", MODEL_DIR)
41
+
42
+ if not state_match:
43
+ raise ValueError(
44
+ "Could not detect the state (region_a, region_b, or region_c) "
45
+ "from the model directory path."
46
+ )
47
+ detected_state = state_match.group(1)
48
+ # REMOVED: print(f"--- Detected state: {detected_state.upper()} ---")
49
+
50
+ # Auto-detect cluster size from model path
51
+ cluster_size_match = re.search(r'(\d+)size_', MODEL_DIR)
52
+ if not cluster_size_match:
53
+ raise ValueError("Could not detect the cluster size from the model directory path.")
54
+ detected_cluster_size = int(cluster_size_match.group(1))
55
+ # REMOVED: print(f"--- Detected cluster size: {detected_cluster_size} ---")
56
+
57
+ DAYS_TO_EVALUATE = 30
58
+ SOLAR_THRESHOLD = 0.1
59
+ MAX_TRANSFER_KWH = 1000000.0
60
+
61
+ W_COST_SAVINGS = 1.0
62
+ W_GRID_PENALTY = 0.5
63
+ W_P2P_BONUS = 0.2
64
+
65
+ # Environment Initialization
66
+ cluster_env = make_vec_env(
67
+ data_path=DATA_PATH,
68
+ time_freq="15T",
69
+ cluster_size=detected_cluster_size,
70
+ state=detected_state
71
+ )
72
+ n_clusters = cluster_env.num_envs
73
+ sample_subenv = cluster_env.cluster_envs[0]
74
+ eval_num_steps = sample_subenv.num_steps
75
+ # REMOVED: print(f"Number of steps per day: {eval_num_steps}")
76
+
77
+ # Load intra-cluster MAPPO agents
78
+ n_agents_per_cluster = sample_subenv.num_agents
79
+ local_dim = sample_subenv.observation_space.shape[-1]
80
+ global_dim = n_agents_per_cluster * local_dim
81
+ act_dim = sample_subenv.action_space[0].shape[-1]
82
+
83
+ # REMOVED: print(f"Creating and loading {n_clusters} independent low-level MAPPO agents...")
84
+ low_agents = []
85
+ for i in range(n_clusters):
86
+ agent = MAPPO(
87
+ n_agents=n_agents_per_cluster,
88
+ local_dim=local_dim,
89
+ global_dim=global_dim,
90
+ act_dim=act_dim,
91
+ lr=2e-4, gamma=0.95, lam=0.95, clip_eps=0.2, k_epochs=4, batch_size=512, episode_len=96
92
+ )
93
+ ckpt_pattern = os.path.join(MODEL_DIR, f"low_cluster{i}_ep*.pth")
94
+ ckpts_low = glob.glob(ckpt_pattern)
95
+ if not ckpts_low:
96
+ raise FileNotFoundError(f"No checkpoint found for cluster {i}.")
97
+ latest_low = sorted(ckpts_low, key=lambda x: int(re.search(r'ep(\d+)\.pth$', x).group(1)))[-1]
98
+ # REMOVED: print(f"Loading low-level policy for cluster {i} from: {latest_low}")
99
+ agent.load(latest_low)
100
+ agent.actor.eval()
101
+ agent.critic.eval()
102
+ low_agents.append(agent)
103
+
104
+ # Output Folder Setup
105
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
106
+ num_agents = sum(subenv.num_agents for subenv in cluster_env.cluster_envs)
107
+ run_name = f"eval_vectorized_{num_agents}agents_{DAYS_TO_EVALUATE}days_{timestamp}"
108
+ output_folder = os.path.join("runs_final_vectorized_eval", run_name)
109
+ logs_dir = os.path.join(output_folder, "logs")
110
+ plots_dir = os.path.join(output_folder, "plots")
111
+ for d in (logs_dir, plots_dir):
112
+ os.makedirs(d, exist_ok=True)
113
+ # REMOVED: print(f"Saving evaluation outputs to: {output_folder}")
114
+
115
+ # Load inter-cluster MAPPO agent
116
+ OBS_DIM_HI_LOCAL = 7
117
+ act_dim_inter = 2
118
+ OBS_DIM_HI_GLOBAL = n_clusters * OBS_DIM_HI_LOCAL
119
+
120
+ # REMOVED: print(f"Initializing evaluation inter-agent (MAPPO): n_agents={n_clusters}, ...")
121
+
122
+ inter_agent = MAPPO(
123
+ n_agents=n_clusters,
124
+ local_dim=OBS_DIM_HI_LOCAL,
125
+ global_dim=OBS_DIM_HI_GLOBAL,
126
+ act_dim=act_dim_inter,
127
+ lr=2e-4, gamma=0.95, lam=0.95, clip_eps=0.2, k_epochs=4, batch_size=512, episode_len=96
128
+ )
129
+
130
+ ckpts_inter = glob.glob(os.path.join(MODEL_DIR, "inter_ep*.pth"))
131
+ if not ckpts_inter:
132
+ raise FileNotFoundError(f"No high-level checkpoints in {MODEL_DIR}")
133
+ latest_inter = sorted(ckpts_inter, key=lambda x: int(re.search(r'ep(\d+)\.pth$', x).group(1)))[-1]
134
+ # REMOVED: print("Loading inter-cluster policy from", latest_inter)
135
+ inter_agent.load(latest_inter)
136
+ inter_agent.actor.eval()
137
+ inter_agent.critic.eval()
138
+
139
+ # Instantiate Coordinator
140
+ ledger = InterClusterLedger()
141
+ coordinator = InterClusterCoordinator(
142
+ cluster_env, inter_agent, ledger, max_transfer_kwh=MAX_TRANSFER_KWH,
143
+ w_cost_savings=W_COST_SAVINGS, w_grid_penalty=W_GRID_PENALTY, w_p2p_bonus=W_P2P_BONUS
144
+ )
145
+
146
+ # Data collectors
147
+ all_logs = []
148
+ daily_summaries = []
149
+ step_timing_list = []
150
+
151
+ # Per-day evaluation
152
+ evaluation_start = time.time()
153
+ for day in range(1, DAYS_TO_EVALUATE + 1):
154
+ obs_clusters, _ = cluster_env.reset()
155
+ done_all = False
156
+ step_count = 0
157
+ day_logs = []
158
+
159
+ while not done_all and step_count < eval_num_steps:
160
+ step_start_time = time.time()
161
+ step_count += 1
162
+
163
+ # Get high-level actions
164
+ inter_cluster_obs_local_list = [coordinator.get_cluster_state(se, step_count) for se in cluster_env.cluster_envs]
165
+ inter_cluster_obs_local = np.array(inter_cluster_obs_local_list)
166
+ inter_cluster_obs_global = inter_cluster_obs_local.flatten()
167
+
168
+ with torch.no_grad():
169
+ high_level_action, _ = inter_agent.select_action(inter_cluster_obs_local, inter_cluster_obs_global)
170
+
171
+ # Build transfers
172
+ current_reports = {i: {'export_capacity': cluster_env.get_export_capacity(i), 'import_capacity': cluster_env.get_import_capacity(i)} for i in range(n_clusters)}
173
+ exports, imports = coordinator.build_transfers(high_level_action, current_reports)
174
+
175
+ # Get low-level actions
176
+ batch_global_obs = obs_clusters.reshape(n_clusters, -1)
177
+ with torch.no_grad():
178
+ low_level_actions_list = []
179
+ for c_idx in range(n_clusters):
180
+ agent = low_agents[c_idx]
181
+ local_obs_cluster = obs_clusters[c_idx]
182
+ global_obs_cluster = batch_global_obs[c_idx]
183
+ actions, _ = agent.select_action(local_obs_cluster, global_obs_cluster)
184
+ low_level_actions_list.append(actions)
185
+ low_level_actions = np.stack(low_level_actions_list)
186
+
187
+ # Step the environment
188
+ next_obs, rewards, done_all, step_info = cluster_env.step(
189
+ low_level_actions, exports=exports, imports=imports
190
+ )
191
+
192
+ # Advance the state
193
+ obs_clusters = next_obs
194
+
195
+ # Timing and console printout
196
+ step_duration = time.time() - step_start_time
197
+ # REMOVED: print(f"[Day {day}, Step {step_count}] Step time: {step_duration:.6f} seconds")
198
+ step_timing_list.append({"day": day, "step": step_count, "step_time_s": step_duration})
199
+
200
+ # Consolidated Logging (Keep math)
201
+ infos = step_info.get("cluster_infos")
202
+ for c_idx, subenv in enumerate(cluster_env.cluster_envs):
203
+ grid_price_now = subenv.get_grid_price(step_count - 1)
204
+ peer_price_now = step_info.get("peer_price_global")
205
+ if peer_price_now is None:
206
+ demands_step = subenv.demands_day[step_count-1]
207
+ solars_step = subenv.solars_day[step_count-1]
208
+ surplus = np.maximum(solars_step - demands_step, 0.0).sum()
209
+ shortfall = np.maximum(demands_step - solars_step, 0.0).sum()
210
+ peer_price_now = subenv.get_peer_price(step_count -1, surplus, shortfall)
211
+
212
+ for i, hid in enumerate(subenv.house_ids):
213
+ is_battery_house = hid in subenv.batteries
214
+ charge = infos["charge_amount"][c_idx][i]
215
+ discharge = infos["discharge_amount"][c_idx][i]
216
+ day_logs.append({
217
+ "day": day, "step": step_count - 1, "house": hid, "cluster": c_idx,
218
+ "grid_import_no_p2p": infos["grid_import_no_p2p"][c_idx][i],
219
+ "grid_import_with_p2p": infos["grid_import_with_p2p"][c_idx][i],
220
+ "grid_export": infos["grid_export"][c_idx][i],
221
+ "p2p_buy": infos["p2p_buy"][c_idx][i], "p2p_sell": infos["p2p_sell"][c_idx][i],
222
+ "actual_cost": infos["costs"][c_idx][i],
223
+ "baseline_cost": infos["grid_import_no_p2p"][c_idx][i] * grid_price_now,
224
+ "total_demand": subenv.demands_day[step_count-1, i],
225
+ "total_solar": subenv.solars_day[step_count-1, i],
226
+ "grid_price": grid_price_now, "peer_price": peer_price_now,
227
+ "soc": (subenv.battery_soc[i] / subenv.battery_max_capacity[i]) if is_battery_house and subenv.battery_max_capacity[i] > 0 else np.nan,
228
+ "degradation_cost": (charge + discharge) * subenv.battery_degradation_cost[i] if is_battery_house else 0.0,
229
+ "reward": infos["agent_rewards"][c_idx][i],
230
+ })
231
+
232
+ step_duration = time.time() - step_start_time
233
+
234
+ # End of day: aggregate & summarize (Keep math)
235
+ df_day = pd.DataFrame(day_logs)
236
+ if df_day.empty: continue
237
+ all_logs.extend(day_logs)
238
+
239
+ grouped_house = df_day.groupby("house").sum(numeric_only=True)
240
+ grouped_step = df_day.groupby("step").sum(numeric_only=True)
241
+
242
+ total_demand = grouped_step["total_demand"].sum()
243
+ total_solar = grouped_step["total_solar"].sum()
244
+ total_p2p_buy = df_day['p2p_buy'].sum()
245
+ total_p2p_sell = df_day['p2p_sell'].sum()
246
+
247
+ baseline_cost_per_house = grouped_house["baseline_cost"]
248
+ actual_cost_per_house = grouped_house["actual_cost"]
249
+ cost_savings_per_house = baseline_cost_per_house - actual_cost_per_house
250
+ day_total_cost_savings = cost_savings_per_house.sum()
251
+
252
+ overall_cost_savings_pct = day_total_cost_savings / baseline_cost_per_house.sum() if baseline_cost_per_house.sum() > 0 else 0.0
253
+
254
+ baseline_import_per_house = grouped_house["grid_import_no_p2p"]
255
+ actual_import_per_house = grouped_house["grid_import_with_p2p"]
256
+ import_reduction_per_house = baseline_import_per_house - actual_import_per_house
257
+ day_total_import_reduction = import_reduction_per_house.sum()
258
+
259
+ overall_import_reduction_pct = day_total_import_reduction / baseline_import_per_house.sum() if baseline_import_per_house.sum() > 0 else 0.0
260
+
261
+ fairness_cost_savings = compute_jains_fairness(cost_savings_per_house.values)
262
+ fairness_import_reduction = compute_jains_fairness(import_reduction_per_house.values)
263
+ fairness_rewards = compute_jains_fairness(grouped_house["reward"].values)
264
+ fairness_p2p_total = compute_jains_fairness((grouped_house["p2p_buy"] + grouped_house["p2p_sell"]).values)
265
+
266
+ daily_summaries.append({
267
+ "day": day, "day_total_demand": total_demand, "day_total_solar": total_solar,
268
+ "day_p2p_buy": total_p2p_buy, "day_p2p_sell": total_p2p_sell,
269
+ "cost_savings_abs": day_total_cost_savings, "cost_savings_pct": overall_cost_savings_pct,
270
+ "fairness_cost_savings": fairness_cost_savings, "grid_reduction_abs": day_total_import_reduction,
271
+ "grid_reduction_pct": overall_import_reduction_pct,
272
+ "fairness_grid_reduction": fairness_import_reduction, "fairness_reward": fairness_rewards,
273
+ "fairness_p2p_buy": compute_jains_fairness(grouped_house["p2p_buy"].values),
274
+ "fairness_p2p_sell": compute_jains_fairness(grouped_house["p2p_sell"].values),
275
+ "fairness_p2p_total": fairness_p2p_total,
276
+ })
277
+
278
+ # Final Processing and Saving (Keep saving, remove print summary)
279
+ evaluation_end = time.time()
280
+ total_eval_time = evaluation_end - evaluation_start
281
+
282
+ all_days_df = pd.DataFrame(all_logs)
283
+ if not all_days_df.empty:
284
+ # Save step-level logs
285
+ combined_csv_path = os.path.join(logs_dir, "step_logs_all_days.csv")
286
+ all_days_df.to_csv(combined_csv_path, index=False)
287
+
288
+ # Save timing logs
289
+ step_timing_df = pd.DataFrame(step_timing_list)
290
+ timing_csv_path = os.path.join(logs_dir, "step_timing_log.csv")
291
+ step_timing_df.to_csv(timing_csv_path, index=False)
292
+
293
+ # Save house-level summary
294
+ house_level_df = all_days_df.groupby("house").agg({
295
+ "baseline_cost": "sum", "actual_cost": "sum", "grid_import_no_p2p": "sum",
296
+ "grid_import_with_p2p": "sum", "degradation_cost": "sum"
297
+ })
298
+ house_level_df["cost_savings"] = house_level_df["baseline_cost"] - house_level_df["actual_cost"]
299
+ house_level_df["import_reduction"] = house_level_df["grid_import_no_p2p"] - house_level_df["grid_import_with_p2p"]
300
+ house_summary_csv = os.path.join(logs_dir, "summary_per_house.csv")
301
+ house_level_df.to_csv(house_summary_csv)
302
+
303
+ # Calculate Final Summary Metrics (For saving to the final row)
304
+ daily_summary_df = pd.DataFrame(daily_summaries)
305
+ fairness_grid_all = compute_jains_fairness(house_level_df["import_reduction"].values)
306
+ fairness_cost_all = compute_jains_fairness(house_level_df["cost_savings"].values)
307
+ total_cost_savings_all = daily_summary_df["cost_savings_abs"].sum()
308
+ total_baseline_cost_all = all_days_df.groupby('day')['baseline_cost'].sum().sum()
309
+ pct_cost_savings_all = total_cost_savings_all / total_baseline_cost_all if total_baseline_cost_all > 0 else 0.0
310
+ total_grid_reduction_all = daily_summary_df["grid_reduction_abs"].sum()
311
+ total_baseline_import_all = all_days_df.groupby('day')['grid_import_no_p2p'].sum().sum()
312
+ pct_grid_reduction_all = total_grid_reduction_all / total_baseline_import_all if total_baseline_import_all > 0 else 0.0
313
+ total_degradation_cost_all = all_days_df["degradation_cost"].sum()
314
+ agg_solar_per_step = all_days_df.groupby(['day', 'step'])['total_solar'].sum()
315
+ num_agents_total = len(all_days_df['house'].unique())
316
+ sunny_steps_mask = agg_solar_per_step > (SOLAR_THRESHOLD * num_agents_total)
317
+ sunny_df = all_days_df[all_days_df.set_index(['day', 'step']).index.isin(sunny_steps_mask[sunny_steps_mask].index)]
318
+ baseline_import_sunny = sunny_df['grid_import_no_p2p'].sum()
319
+ actual_import_sunny = sunny_df['grid_import_with_p2p'].sum()
320
+ grid_reduction_sunny_pct = (baseline_import_sunny - actual_import_sunny) / baseline_import_sunny if baseline_import_sunny > 0 else 0.0
321
+ baseline_cost_sunny = sunny_df['baseline_cost'].sum()
322
+ actual_cost_sunny = sunny_df['actual_cost'].sum()
323
+ cost_savings_sunny_pct = (baseline_cost_sunny - actual_cost_sunny) / baseline_cost_sunny if baseline_cost_sunny > 0 else 0.0
324
+ total_p2p_buy = all_days_df['p2p_buy'].sum()
325
+ total_actual_grid_import = all_days_df['grid_import_with_p2p'].sum()
326
+ community_sourcing_rate_pct = total_p2p_buy / (total_p2p_buy + total_actual_grid_import) if (total_p2p_buy + total_actual_grid_import) > 0 else 0.0
327
+ total_p2p_sell = all_days_df['p2p_sell'].sum()
328
+ total_grid_export = all_days_df['grid_export'].sum()
329
+ solar_sharing_efficiency_pct = total_p2p_sell / (total_p2p_sell + total_grid_export) if (total_p2p_sell + total_grid_export) > 0 else 0.0
330
+
331
+
332
+ final_row = {
333
+ "day": "ALL_DAYS_SUMMARY", "cost_savings_abs": total_cost_savings_all, "cost_savings_pct": pct_cost_savings_all,
334
+ "grid_reduction_abs": total_grid_reduction_all, "grid_reduction_pct": pct_grid_reduction_all,
335
+ "fairness_cost_savings": fairness_cost_all, "fairness_grid_reduction": fairness_grid_all,
336
+ "total_degradation_cost": total_degradation_cost_all,
337
+ "grid_reduction_sunny_hours_pct": grid_reduction_sunny_pct,
338
+ "cost_savings_sunny_hours_pct": cost_savings_sunny_pct,
339
+ "community_sourcing_rate_pct": community_sourcing_rate_pct,
340
+ "solar_sharing_efficiency_pct": solar_sharing_efficiency_pct,
341
+ }
342
+ final_row_df = pd.DataFrame([final_row])
343
+
344
+ if not daily_summary_df.empty:
345
+ daily_summary_df = pd.concat([daily_summary_df, final_row_df], ignore_index=True)
346
+
347
+ summary_csv = os.path.join(logs_dir, "summary_per_day.csv")
348
+ daily_summary_df.to_csv(summary_csv, index=False)
349
+ print("Evaluation run completed. All data logs (CSVs) and plots saved to disk.")
350
+
351
+ # --- Plots follow (no changes needed here, as the previous request already cleaned them up) ---
352
+
353
+ plot_daily_df = daily_summary_df[daily_summary_df["day"] != "ALL_DAYS_SUMMARY"].copy()
354
+ plot_daily_df["day"] = plot_daily_df["day"].astype(int)
355
+
356
+ # 1) Daily Cost Savings Percentage
357
+ plt.figure(figsize=(12, 6))
358
+ plt.bar(plot_daily_df["day"], plot_daily_df["cost_savings_pct"] * 100, color='skyblue')
359
+ plt.xlabel("Day")
360
+ plt.ylabel("Cost Savings (%)")
361
+ plt.title("Daily Community Cost Savings Percentage")
362
+ plt.xticks(plot_daily_df["day"])
363
+ plt.grid(axis='y', linestyle='--', alpha=0.7)
364
+ plt.savefig(os.path.join(plots_dir, "daily_cost_savings_percentage.png"))
365
+ plt.close()
366
+
367
+ # 2) Daily Total Demand vs. Solar
368
+ plt.figure(figsize=(12, 6))
369
+ bar_width = 0.4
370
+ days = plot_daily_df["day"]
371
+ plt.bar(days - bar_width/2, plot_daily_df["day_total_demand"], width=bar_width, label="Total Demand", color='coral')
372
+ plt.bar(days + bar_width/2, plot_daily_df["day_total_solar"], width=bar_width, label="Total Solar Generation", color='gold')
373
+ plt.xlabel("Day")
374
+ plt.ylabel("Energy (kWh)")
375
+ plt.title("Total Community Demand vs. Solar Generation Per Day")
376
+ plt.xticks(days)
377
+ plt.legend()
378
+ plt.grid(axis='y', linestyle='--', alpha=0.7)
379
+ plt.savefig(os.path.join(plots_dir, "daily_demand_vs_solar.png"))
380
+ plt.close()
381
+
382
+ # 3) Combined Time Series of Energy Flows
383
+ step_group = all_days_df.groupby(["day", "step"]).sum(numeric_only=True).reset_index()
384
+ step_group["global_step"] = (step_group["day"] - 1) * eval_num_steps + step_group["step"]
385
+ fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(15, 12), sharex=True)
386
+
387
+ ax1.plot(step_group["global_step"], step_group["grid_import_with_p2p"], label="Grid Import (with P2P)", color='r')
388
+ ax1.plot(step_group["global_step"], step_group["p2p_buy"], label="P2P Buy", color='g')
389
+ ax1.set_ylabel("Energy (kWh)")
390
+ ax1.set_title("Community Energy Consumption: Grid Import vs. P2P Buy")
391
+ ax1.legend()
392
+ ax1.grid(True, linestyle='--', alpha=0.6)
393
+
394
+ ax2.plot(step_group["global_step"], step_group["grid_export"], label="Grid Export", color='orange')
395
+ ax2.plot(step_group["global_step"], step_group["p2p_sell"], label="P2P Sell", color='b')
396
+ ax2.set_xlabel("Global Timestep")
397
+ ax2.set_ylabel("Energy (kWh)")
398
+ ax2.set_title("Community Energy Generation: Grid Export vs. P2P Sell")
399
+ ax2.legend()
400
+ ax2.grid(True, linestyle='--', alpha=0.6)
401
+
402
+ plt.tight_layout()
403
+ plt.savefig(os.path.join(plots_dir, "combined_energy_flows_timeseries.png"))
404
+ plt.close()
405
+
406
+ # 4) Stacked Bar of Daily Energy Sources
407
+ daily_agg = all_days_df.groupby("day").sum(numeric_only=True)
408
+
409
+ plt.figure(figsize=(12, 7))
410
+ plt.bar(daily_agg.index, daily_agg["grid_import_with_p2p"], label="Grid Import (with P2P)", color='crimson')
411
+ plt.bar(daily_agg.index, daily_agg["p2p_buy"], bottom=daily_agg["grid_import_with_p2p"], label="P2P Buy", color='limegreen')
412
+ plt.plot(daily_agg.index, daily_agg["grid_import_no_p2p"], label="Baseline Grid Import (No P2P)", color='blue', linestyle='--', marker='o')
413
+
414
+ plt.xlabel("Day")
415
+ plt.ylabel("Energy (kWh)")
416
+ plt.title("Daily Energy Procurement: Baseline vs. P2P+Grid")
417
+ plt.xticks(daily_agg.index)
418
+ plt.legend()
419
+ plt.grid(axis='y', linestyle='--', alpha=0.7)
420
+ plt.savefig(os.path.join(plots_dir, "daily_energy_procurement_stacked.png"))
421
+ plt.close()
422
+
423
+ # 5) Fairness Metrics Over Time
424
+ plt.figure(figsize=(12, 6))
425
+ plt.plot(plot_daily_df["day"], plot_daily_df["fairness_cost_savings"], label="Cost Savings Fairness", marker='o')
426
+ plt.plot(plot_daily_df["day"], plot_daily_df["fairness_grid_reduction"], label="Grid Reduction Fairness", marker='s')
427
+ plt.plot(plot_daily_df["day"], plot_daily_df["fairness_reward"], label="Reward Fairness", marker='^')
428
+ plt.xlabel("Day")
429
+ plt.ylabel("Jain's Fairness Index")
430
+ plt.title("Daily Fairness Metrics")
431
+ plt.xticks(plot_daily_df["day"])
432
+ plt.ylim(0, 1.05)
433
+ plt.legend()
434
+ plt.grid(True, linestyle='--', alpha=0.7)
435
+ plt.savefig(os.path.join(plots_dir, "daily_fairness_metrics.png"))
436
+ plt.close()
437
+
438
+ # 6) Per-House Savings and Reductions
439
+ fig, ax1 = plt.subplots(figsize=(15, 7))
440
+ house_ids_str = house_level_df.index.astype(str)
441
+ bar_width = 0.4
442
+ index = np.arange(len(house_ids_str))
443
+ color1 = 'tab:green'
444
+ ax1.set_xlabel('House ID')
445
+ ax1.set_ylabel('Total Cost Savings ($)', color=color1)
446
+ ax1.bar(index - bar_width/2, house_level_df["cost_savings"], bar_width, label='Cost Savings', color=color1)
447
+ ax1.tick_params(axis='y', labelcolor=color1)
448
+ ax1.set_xticks(index)
449
+ ax1.set_xticklabels(house_ids_str, rotation=45, ha="right")
450
+ ax2 = ax1.twinx()
451
+ color2 = 'tab:blue'
452
+ ax2.set_ylabel('Total Grid Import Reduction (kWh)', color=color2)
453
+ ax2.bar(index + bar_width/2, house_level_df["import_reduction"], bar_width, label='Import Reduction', color=color2)
454
+ ax2.tick_params(axis='y', labelcolor=color2)
455
+ plt.title(f'Total Cost Savings & Grid Import Reduction Per House (over {DAYS_TO_EVALUATE} days)')
456
+ fig.tight_layout()
457
+ plt.savefig(os.path.join(plots_dir, "per_house_summary.png"))
458
+ plt.close()
459
+
460
+ # 7) Price Dynamics for a Single Day
461
+ day1_prices = all_days_df[all_days_df['day'] == 1][['step', 'grid_price', 'peer_price']].drop_duplicates()
462
+ plt.figure(figsize=(12, 6))
463
+ plt.plot(day1_prices['step'], day1_prices['grid_price'], label='Grid Price', color='darkorange')
464
+ plt.plot(day1_prices['step'], day1_prices['peer_price'], label='P2P Price', color='teal')
465
+ plt.xlabel("Timestep of Day")
466
+ plt.ylabel("Price ($/kWh)")
467
+ plt.title("Price Dynamics on Day 1")
468
+ plt.legend()
469
+ plt.grid(True, linestyle='--', alpha=0.6)
470
+ plt.savefig(os.path.join(plots_dir, "price_dynamics_day1.png"))
471
+ plt.close()
472
+
473
+ # 8) Battery State of Charge (SoC) for a Sample of Houses
474
+ day1_df = all_days_df[all_days_df['day'] == 1]
475
+ battery_houses = day1_df.dropna(subset=['soc'])['house'].unique()
476
+
477
+ if len(battery_houses) > 0:
478
+ sample_houses = battery_houses[:min(4, len(battery_houses))]
479
+ plt.figure(figsize=(12, 6))
480
+ for house in sample_houses:
481
+ house_df = day1_df[day1_df['house'] == house]
482
+ plt.plot(house_df['step'], house_df['soc'] * 100, label=f'House {house}')
483
+
484
+ plt.xlabel("Timestep of Day")
485
+ plt.ylabel("State of Charge (%)")
486
+ plt.title("Battery SoC on Day 1 for Sample Houses")
487
+ plt.legend()
488
+ plt.grid(True, linestyle='--', alpha=0.6)
489
+ plt.savefig(os.path.join(plots_dir, "soc_dynamics_day1.png"))
490
+ plt.close()
491
+
492
+ print("Evaluation run completed. All data logs (CSVs) and plots saved to disk.")
493
+
494
+
495
+ if __name__ == "__main__":
496
+ main()
Other_algorithms/HC_MAPPO/HC_MAPPO_train.py ADDED
@@ -0,0 +1,579 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import sys
3
+ import time
4
+ from datetime import datetime, timedelta
5
+ import re
6
+
7
+ import numpy as np
8
+ import torch
9
+ import pandas as pd
10
+ import matplotlib.pyplot as plt
11
+
12
+ # Allow imports from project root
13
+ sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
14
+
15
+ from cluster import InterClusterCoordinator, InterClusterLedger
16
+ from Environment.cluster_env_wrapper import make_vec_env
17
+ from mappo.trainer.mappo import MAPPO
18
+
19
+
20
+ def recursive_sum(item):
21
+ total = 0
22
+ # Check if the item is a list, array, or other iterable, but not a string
23
+ if hasattr(item, '__iter__') and not isinstance(item, str):
24
+ for sub_item in item:
25
+ total += recursive_sum(sub_item)
26
+ # If it's a single number, just add it
27
+ elif np.isreal(item):
28
+ total += item
29
+ # Ignore any non-numeric, non-iterable items
30
+ return total
31
+
32
+
33
+ def main():
34
+ overall_start_time = time.time()
35
+
36
+ # Training Configuration Parameters
37
+ STATE_TO_RUN = "oklahoma" # or "colorado", "oklahoma"
38
+ DATA_PATH = "data/training/1000houses_152days_TRAIN.csv"
39
+
40
+ # Dynamically extract the number of agents from the file path
41
+ match = re.search(r'(\d+)houses', DATA_PATH)
42
+ if not match:
43
+ raise ValueError("Could not extract the number of houses from DATA_PATH.")
44
+ NUMBER_OF_AGENTS = int(match.group(1))
45
+
46
+ CLUSTER_SIZE = 10
47
+ NUM_EPISODES = 10000
48
+ BATCH_SIZE = 256
49
+ CHECKPOINT_INTERVAL = 100000 # Reduced for more frequent saving during testing
50
+ WINDOW_SIZE = 80
51
+ MAX_TRANSFER_KWH = 100000
52
+
53
+ LR = 2e-4
54
+ GAMMA = 0.95
55
+ LAMBDA = 0.95
56
+ CLIP_EPS = 0.2
57
+ K_EPOCHS = 4
58
+
59
+ JOINT_TRAINING_START_EPISODE = 2000
60
+ FREEZE_HIGH_FOR_EPISODES = 20
61
+ FREEZE_LOW_FOR_EPISODES = 10
62
+
63
+ # Build run directories
64
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
65
+ run_name = f"hierarchical_{STATE_TO_RUN}_{NUMBER_OF_AGENTS}agents_" \
66
+ f"{CLUSTER_SIZE}size_{NUM_EPISODES}eps_{timestamp}"
67
+ root_dir = os.path.join("FINALE_FINALE_FINALE", run_name) # New folder for new runs
68
+ models_dir = os.path.join(root_dir, "models")
69
+ logs_dir = os.path.join(root_dir, "logs")
70
+ plots_dir = os.path.join(root_dir, "plots")
71
+
72
+ for d in (models_dir, logs_dir, plots_dir):
73
+ os.makedirs(d, exist_ok=True)
74
+ print(f"Logging to: {root_dir}")
75
+
76
+ # Environment & Agent Initialization
77
+
78
+ # Instantiate the environment using vectorized environment factory function
79
+ # This single call replaces the manual creation of base_env and ClusterEnvWrapper
80
+ cluster_env = make_vec_env(
81
+ data_path=DATA_PATH,
82
+ time_freq="15T",
83
+ cluster_size=CLUSTER_SIZE,
84
+ state=STATE_TO_RUN
85
+ )
86
+
87
+ # Get environment parameters from the vectorized environment object
88
+ n_clusters = cluster_env.num_envs
89
+ sample_subenv = cluster_env.cluster_envs[0] # Access a sample sub-env
90
+ n_agents_per_cluster = sample_subenv.num_agents
91
+
92
+ local_dim = sample_subenv.observation_space.shape[-1]
93
+ global_dim = n_agents_per_cluster * local_dim
94
+ # Access the action dim from the first part of the Tuple action space
95
+ act_dim = sample_subenv.action_space[0].shape[-1]
96
+ # The total number of transitions collected each episode is (steps_per_day * num_clusters)
97
+ total_buffer_size = sample_subenv.num_steps * n_clusters
98
+ print(f"Low-level agent buffer size set to: {total_buffer_size}")
99
+
100
+ print(f"Created {n_clusters} clusters.")
101
+ print(f"Shared low-level agent: {n_agents_per_cluster} agents per cluster, "
102
+ f"obs_dim={local_dim}, global_dim={global_dim}, act_dim={act_dim}")
103
+
104
+ print(f"Creating {n_clusters} independent low-level MAPPO agents...")
105
+ low_agents = []
106
+ for i in range(n_clusters):
107
+ # Each agent's buffer only needs to be as long as one episode day
108
+ agent_buffer_size = sample_subenv.num_steps
109
+
110
+ agent = MAPPO(
111
+ n_agents=n_agents_per_cluster,
112
+ local_dim=local_dim,
113
+ global_dim=global_dim,
114
+ act_dim=act_dim,
115
+ lr=LR,
116
+ gamma=GAMMA,
117
+ lam=LAMBDA,
118
+ clip_eps=CLIP_EPS,
119
+ k_epochs=K_EPOCHS,
120
+ batch_size=BATCH_SIZE,
121
+ episode_len=agent_buffer_size
122
+ )
123
+ low_agents.append(agent)
124
+
125
+ # Define dimensions for the high-level MAPPO agent
126
+ OBS_DIM_HI_LOCAL = 7 # Each cluster has 7 features for its local state
127
+ act_dim_inter = 2 # Export/Import preference for each cluster
128
+
129
+ # The global state for the high-level agent is the concatenation
130
+ # of all high-level local states
131
+ OBS_DIM_HI_GLOBAL = n_clusters * OBS_DIM_HI_LOCAL
132
+
133
+ print(f"Inter-cluster agent (MAPPO): n_agents={n_clusters}, "
134
+ f"local_dim={OBS_DIM_HI_LOCAL}, global_dim={OBS_DIM_HI_GLOBAL}, act_dim={act_dim_inter}")
135
+
136
+ # Instantiate MAPPO for the inter-cluster agent
137
+ inter_agent = MAPPO(
138
+ n_agents=n_clusters,
139
+ local_dim=OBS_DIM_HI_LOCAL,
140
+ global_dim=OBS_DIM_HI_GLOBAL,
141
+ act_dim=act_dim_inter,
142
+ lr=LR,
143
+ gamma=GAMMA,
144
+ lam=LAMBDA,
145
+ clip_eps=CLIP_EPS,
146
+ k_epochs=K_EPOCHS,
147
+ batch_size=BATCH_SIZE,
148
+ episode_len=sample_subenv.num_steps
149
+ )
150
+
151
+ ledger = InterClusterLedger()
152
+ coordinator = InterClusterCoordinator(
153
+ cluster_env,
154
+ inter_agent,
155
+ ledger,
156
+ max_transfer_kwh=MAX_TRANSFER_KWH
157
+ )
158
+
159
+ # Training loop
160
+ total_steps = 0
161
+ episode_log_data = []
162
+ performance_metrics_log = []
163
+ intra_log = {}
164
+ inter_log = {}
165
+ total_log = {}
166
+ cost_log = {}
167
+
168
+ for ep in range(1, NUM_EPISODES + 1):
169
+ step_count = 0
170
+ start_time = time.time()
171
+ ep_total_inter_cluster_reward = 0.0
172
+ day_logs = []
173
+
174
+ obs_clusters, _ = cluster_env.reset()
175
+
176
+ if ep > 1:
177
+ # For vectorized envs, call is the right way to invoke a method on all sub-envs
178
+ # This returns a list of dictionaries, one from each cluster env
179
+ all_cluster_metrics = cluster_env.call('get_episode_metrics')
180
+
181
+ # Aggregate the metrics from all clusters into a single system-wide summary
182
+ system_metrics = {
183
+ "grid_reduction_entire_day": sum(m["grid_reduction_entire_day"] for m in all_cluster_metrics),
184
+ "grid_reduction_peak_hours": sum(m["grid_reduction_peak_hours"] for m in all_cluster_metrics),
185
+ "total_cost_savings": sum(m["total_cost_savings"] for m in all_cluster_metrics),
186
+ "battery_degradation_cost_total": sum(m["battery_degradation_cost_total"] for m in all_cluster_metrics),
187
+ # For fairness, we average the fairness index across clusters
188
+ "fairness_on_cost_savings": np.mean([m["fairness_on_cost_savings"] for m in all_cluster_metrics]),
189
+ "Episode": ep - 1 # Associate with the episode that just finished
190
+ }
191
+
192
+ # Append the aggregated dictionary to our log
193
+ performance_metrics_log.append(system_metrics)
194
+
195
+ # Use a single 'done' flag for the episode
196
+ done_all = False
197
+
198
+ # Initialize rewards and costs
199
+ cluster_rewards = np.zeros((n_clusters, n_agents_per_cluster), dtype=np.float32)
200
+ total_cost = 0.0
201
+ total_grid_import = 0.0
202
+
203
+ # Determine training phase
204
+ is_phase_1 = ep < JOINT_TRAINING_START_EPISODE
205
+
206
+ if ep == 1:
207
+ print(f"\n--- Starting Phase 1: Training Low-Level Agent Only (up to ep {JOINT_TRAINING_START_EPISODE-1}) ---")
208
+ if ep == JOINT_TRAINING_START_EPISODE:
209
+ print(f"\n--- Starting Phase 2: Joint Hierarchical Training (from ep {JOINT_TRAINING_START_EPISODE}) ---")
210
+
211
+ # The main loop continues as long as the episode is not done
212
+ while not done_all:
213
+ total_steps += 1
214
+ step_count += 1
215
+
216
+ # Action Selection (Low-Level)
217
+ batch_global_obs = obs_clusters.reshape(n_clusters, -1)
218
+ low_level_actions_list = []
219
+ low_level_logps_list = []
220
+
221
+ for c_idx in range(n_clusters):
222
+ agent = low_agents[c_idx]
223
+ local_obs_cluster = obs_clusters[c_idx]
224
+ global_obs_cluster = batch_global_obs[c_idx]
225
+ actions, logps = agent.select_action(local_obs_cluster, global_obs_cluster)
226
+ low_level_actions_list.append(actions)
227
+ low_level_logps_list.append(logps)
228
+
229
+ low_level_actions = np.stack(low_level_actions_list)
230
+ low_level_logps = np.stack(low_level_logps_list)
231
+
232
+ # Action Selection & Transfers (High-Level, Phase 2 only)
233
+ if is_phase_1:
234
+ exports, imports = None, None
235
+ else:
236
+ inter_cluster_obs_local_list = [coordinator.get_cluster_state(se, step_count) for se in cluster_env.cluster_envs]
237
+ inter_cluster_obs_local = np.array(inter_cluster_obs_local_list)
238
+
239
+ # Create the global state for the high-level agent
240
+ inter_cluster_obs_global = inter_cluster_obs_local.flatten()
241
+
242
+ # Call select_action with local and global states
243
+ high_level_action, high_level_logp = inter_agent.select_action(
244
+ inter_cluster_obs_local,
245
+ inter_cluster_obs_global
246
+ )
247
+
248
+ current_reports = {i: {'export_capacity': cluster_env.get_export_capacity(i), 'import_capacity': cluster_env.get_import_capacity(i)} for i in range(n_clusters)}
249
+ exports, imports = coordinator.build_transfers(high_level_action, current_reports)
250
+
251
+ # Environment Step
252
+ next_obs_clusters, rewards, done_all, step_info = cluster_env.step(
253
+ low_level_actions, exports=exports, imports=imports
254
+ )
255
+ cluster_infos = step_info.get("cluster_infos")
256
+ day_logs.append({
257
+ "costs": cluster_infos["costs"],
258
+ "grid_import_no_p2p": cluster_infos["grid_import_no_p2p"],
259
+ "charge_amount": cluster_infos.get("charge_amount"),
260
+ "discharge_amount": cluster_infos.get("discharge_amount")
261
+ })
262
+
263
+ # Reward Calculation and Data Storage
264
+ per_agent_rewards = np.stack(cluster_infos['agent_rewards'])
265
+ rewards_for_buffer = per_agent_rewards
266
+
267
+ if not is_phase_1:
268
+ transfers_for_logging = (exports, imports)
269
+ high_level_rewards_per_cluster = coordinator.compute_inter_cluster_reward(
270
+ all_cluster_infos=cluster_infos,
271
+ actual_transfers=transfers_for_logging,
272
+ step_count=step_count
273
+ )
274
+ ep_total_inter_cluster_reward += np.sum(high_level_rewards_per_cluster)
275
+
276
+ # Get next state for high-level agent's buffer
277
+ next_inter_cluster_obs_local_list = [coordinator.get_cluster_state(se, step_count + 1) for se in cluster_env.cluster_envs]
278
+ next_inter_cluster_obs_local = np.array(next_inter_cluster_obs_local_list)
279
+
280
+ # Create the next global state
281
+ next_inter_cluster_obs_global = next_inter_cluster_obs_local.flatten()
282
+
283
+ # Store the transition in the high-level MAPPO agent's buffer
284
+ inter_agent.store(
285
+ inter_cluster_obs_local, # s_local
286
+ inter_cluster_obs_global, # s_global
287
+ high_level_action, # action
288
+ high_level_logp, # log_prob
289
+ high_level_rewards_per_cluster,# reward
290
+ [done_all] * n_clusters, # done
291
+ next_inter_cluster_obs_global # s'_global
292
+ )
293
+
294
+ bonus_per_agent = np.zeros_like(per_agent_rewards)
295
+ for c_idx in range(n_clusters):
296
+ num_agents_in_cluster = per_agent_rewards.shape[1]
297
+ if num_agents_in_cluster > 0:
298
+ bonus = high_level_rewards_per_cluster[c_idx] / num_agents_in_cluster
299
+ bonus_per_agent[c_idx, :] = bonus
300
+ rewards_for_buffer = per_agent_rewards + bonus_per_agent
301
+
302
+ # Data Storage (Low-Level)
303
+ dones_list = step_info.get("cluster_dones")
304
+ for idx in range(n_clusters):
305
+ low_agents[idx].store(
306
+ obs_clusters[idx],
307
+ batch_global_obs[idx],
308
+ low_level_actions[idx],
309
+ low_level_logps[idx],
310
+ rewards_for_buffer[idx],
311
+ dones_list[idx],
312
+ next_obs_clusters[idx].reshape(-1)
313
+ )
314
+
315
+ cluster_rewards += per_agent_rewards
316
+ total_cost += np.sum(cluster_infos['costs'])
317
+ total_grid_import += np.sum(cluster_infos['grid_import_with_p2p'])
318
+ obs_clusters = next_obs_clusters
319
+
320
+ # Agent Updates (End of Episode)
321
+ if is_phase_1:
322
+ for agent in low_agents:
323
+ agent.update()
324
+ else:
325
+ CYCLE_LENGTH = FREEZE_HIGH_FOR_EPISODES + FREEZE_LOW_FOR_EPISODES
326
+ phase2_episode_num = ep - JOINT_TRAINING_START_EPISODE
327
+ position_in_cycle = phase2_episode_num % CYCLE_LENGTH
328
+
329
+ if position_in_cycle < FREEZE_HIGH_FOR_EPISODES:
330
+ print(f"Updating ALL LOW-LEVEL agents (High-level is frozen).")
331
+ for agent in low_agents:
332
+ agent.update()
333
+ else:
334
+ print(f"Updating HIGH-LEVEL agent (Low-level is frozen).")
335
+ inter_agent.update()
336
+
337
+ # Unified End-of-Episode Logging
338
+ duration = time.time() - start_time
339
+ num_low_level_agents = n_clusters * n_agents_per_cluster
340
+ get_price_fn = cluster_env.cluster_envs[0].get_grid_price
341
+
342
+ # Calculate Costs & Cost Reduction
343
+ # Use the recursive helper function to safely sum the broken data
344
+ # This is guaranteed to produce a single number for each step
345
+ baseline_costs_per_step = [
346
+ recursive_sum(entry["grid_import_no_p2p"]) * get_price_fn(i)
347
+ for i, entry in enumerate(day_logs)
348
+ ]
349
+ total_baseline_cost = sum(baseline_costs_per_step)
350
+
351
+ # Apply the same robust method to the actual costs
352
+ actual_costs_per_step = [recursive_sum(entry["costs"]) for entry in day_logs]
353
+ total_actual_cost = sum(actual_costs_per_step)
354
+
355
+ cost_reduction_pct = (1 - (total_actual_cost / total_baseline_cost)) * 100 if total_baseline_cost > 0 else 0.0
356
+
357
+ # Calculate All Reward Metrics
358
+ # Intra-Cluster (Low-Level) Rewards
359
+ total_reward_intra = cluster_rewards.sum()
360
+ mean_reward_intra = total_reward_intra / num_low_level_agents if num_low_level_agents > 0 else 0.0
361
+
362
+ # Inter-Cluster (High-Level) Rewards
363
+ total_reward_inter = ep_total_inter_cluster_reward
364
+ mean_reward_inter = total_reward_inter / step_count if step_count > 0 else 0.0
365
+
366
+ # Total System Rewards
367
+ total_reward_system = total_reward_intra + total_reward_inter
368
+ mean_reward_system = total_reward_system / num_low_level_agents if num_low_level_agents > 0 else 0.0
369
+
370
+ # Populate Logs for Plotting (to keep generate_plots working)
371
+ intra_log.setdefault('total', []).append(total_reward_intra)
372
+ intra_log.setdefault('mean', []).append(mean_reward_intra)
373
+ inter_log.setdefault('total', []).append(total_reward_inter)
374
+ inter_log.setdefault('mean', []).append(mean_reward_inter)
375
+ total_log.setdefault('total', []).append(total_reward_system)
376
+ total_log.setdefault('mean', []).append(mean_reward_system)
377
+ cost_log.setdefault('total_cost', []).append(total_actual_cost)
378
+ cost_log.setdefault('cost_without_p2p', []).append(total_baseline_cost)
379
+
380
+ # Populate the Main Log for the Final CSV File
381
+ episode_log_data.append({
382
+ "Episode": ep,
383
+ "Mean_Reward_System": mean_reward_system,
384
+ "Mean_Reward_Intra": mean_reward_intra,
385
+ "Mean_Reward_Inter": mean_reward_inter,
386
+ "Total_Reward_System": total_reward_system,
387
+ "Total_Reward_Intra": total_reward_intra,
388
+ "Total_Reward_Inter": total_reward_inter,
389
+ "Cost_Reduction_Pct": cost_reduction_pct,
390
+ "Episode_Duration": duration,
391
+ })
392
+
393
+ # Print Final Episode Summary
394
+ print(f"Ep {ep}/{NUM_EPISODES} | "
395
+ f"Mean System R: {mean_reward_system:.3f} | "
396
+ f"Cost Red: {cost_reduction_pct:.1f}% | "
397
+ f"Time: {duration:.2f}s")
398
+
399
+ if ep % CHECKPOINT_INTERVAL == 0 or ep == NUM_EPISODES:
400
+ for c_idx, agent in enumerate(low_agents):
401
+ agent.save(os.path.join(models_dir, f"low_cluster{c_idx}_ep{ep}.pth"))
402
+ inter_agent.save(os.path.join(models_dir, f"inter_ep{ep}.pth"))
403
+ print(f"Saved checkpoint at episode {ep}")
404
+
405
+ print("Training completed! Aggregating final logs...")
406
+
407
+ # Capture the metrics for the very last episode
408
+ final_cluster_metrics = cluster_env.call('get_episode_metrics')
409
+ final_system_metrics = {
410
+ "grid_reduction_entire_day": sum(m["grid_reduction_entire_day"] for m in final_cluster_metrics),
411
+ "grid_reduction_peak_hours": sum(m["grid_reduction_peak_hours"] for m in final_cluster_metrics),
412
+ "total_cost_savings": sum(m["total_cost_savings"] for m in final_cluster_metrics),
413
+ "battery_degradation_cost_total": sum(m["battery_degradation_cost_total"] for m in final_cluster_metrics),
414
+ "fairness_on_cost_savings": np.mean([m["fairness_on_cost_savings"] for m in final_cluster_metrics]),
415
+ "Episode": NUM_EPISODES
416
+ }
417
+ performance_metrics_log.append(final_system_metrics)
418
+
419
+ # Create, Merge, and Save Final DataFrame
420
+ df_rewards_log = pd.DataFrame(episode_log_data)
421
+ df_perf_log = pd.DataFrame(performance_metrics_log)
422
+ df_final_log = pd.merge(df_rewards_log, df_perf_log, on="Episode")
423
+
424
+ log_csv_path = os.path.join(logs_dir, "training_performance_log.csv")
425
+
426
+ # Add total training time to the dataframe before saving
427
+ overall_end_time = time.time()
428
+ total_duration_seconds = overall_end_time - overall_start_time
429
+ total_time_row = pd.DataFrame([{"Episode": "Total_Training_Time", "Episode_Duration": total_duration_seconds}])
430
+ df_to_save = pd.concat([df_final_log, total_time_row], ignore_index=True)
431
+
432
+ # Reorder and select columns for the final CSV
433
+ columns_to_save = [
434
+ "Episode",
435
+ "Mean_Reward_System",
436
+ "Mean_Reward_Intra",
437
+ "Mean_Reward_Inter",
438
+ "Total_Reward_System",
439
+ "Total_Reward_Intra",
440
+ "Total_Reward_Inter",
441
+ "Cost_Reduction_Pct",
442
+ "battery_degradation_cost_total",
443
+ "Episode_Duration",
444
+ "total_cost_savings",
445
+ "grid_reduction_entire_day",
446
+ "fairness_on_cost_savings"
447
+ ]
448
+ df_to_save = df_to_save[[col for col in columns_to_save if col in df_to_save.columns]]
449
+ df_to_save.to_csv(log_csv_path, index=False)
450
+ print(f"Saved comprehensive training performance log to: {log_csv_path}")
451
+
452
+ generate_plots(
453
+ plots_dir=plots_dir,
454
+ num_episodes=NUM_EPISODES,
455
+ intra_log=intra_log,
456
+ inter_log=inter_log,
457
+ total_log=total_log,
458
+ cost_log=cost_log,
459
+ df_final_log=df_final_log
460
+ )
461
+
462
+ overall_end_time = time.time()
463
+ total_duration_seconds = overall_end_time - overall_start_time
464
+ # Format into hours, minutes, seconds
465
+ total_duration_formatted = str(timedelta(seconds=int(total_duration_seconds)))
466
+
467
+ print("\n" + "="*50)
468
+ print(f"Total Training Time: {total_duration_formatted} (HH:MM:SS)")
469
+ print("="*50)
470
+
471
+
472
+ def generate_plots(
473
+ plots_dir: str,
474
+ num_episodes: int,
475
+ intra_log: dict,
476
+ inter_log: dict,
477
+ total_log: dict,
478
+ cost_log: list,
479
+ df_final_log: pd.DataFrame
480
+ ):
481
+ """
482
+ Generates and saves all final plots after training is complete.
483
+ """
484
+ print("Training completed! Generating plots…")
485
+
486
+ # Helper for moving average
487
+ def moving_avg(series, window):
488
+ return pd.Series(series).rolling(window=window, center=True, min_periods=1).mean().to_numpy()
489
+
490
+ ma_window = 120
491
+ episodes = np.arange(1, num_episodes + 1)
492
+
493
+ # Plot 1: Intra-cluster (Low-Level) Rewards
494
+ fig, ax = plt.subplots(figsize=(12, 7))
495
+ ax.plot(episodes, moving_avg(intra_log['total'], ma_window), label=f'Total Reward (MA {ma_window})', linewidth=2)
496
+ ax.set_xlabel("Episode")
497
+ ax.set_ylabel("Total Intra-Cluster Reward", color='tab:blue')
498
+ ax.tick_params(axis='y', labelcolor='tab:blue')
499
+ ax.grid(True)
500
+
501
+ ax2 = ax.twinx()
502
+ ax2.plot(episodes, moving_avg(intra_log['mean'], ma_window), label=f'Mean Reward (MA {ma_window})', linewidth=2, linestyle='--', color='tab:cyan')
503
+ ax2.set_ylabel("Mean Intra-Cluster Reward", color='tab:cyan')
504
+ ax2.tick_params(axis='y', labelcolor='tab:cyan')
505
+
506
+ fig.suptitle("Intra-Cluster (Low-Level Agent) Rewards")
507
+ fig.legend(loc="upper left", bbox_to_anchor=(0.1, 0.9))
508
+ plt.savefig(os.path.join(plots_dir, "1_intra_cluster_rewards.png"), dpi=200)
509
+ plt.close()
510
+
511
+ # Plot 2: Inter-cluster (High-Level) Rewards
512
+ fig, ax = plt.subplots(figsize=(12, 7))
513
+ ax.plot(episodes, moving_avg(inter_log['total'], ma_window), label=f'Total Reward (MA {ma_window})', linewidth=2, color='tab:green')
514
+ ax.set_xlabel("Episode")
515
+ ax.set_ylabel("Total Inter-Cluster Reward", color='tab:green')
516
+ ax.tick_params(axis='y', labelcolor='tab:green')
517
+ ax.grid(True)
518
+
519
+ ax2 = ax.twinx()
520
+ ax2.plot(episodes, moving_avg(inter_log['mean'], ma_window), label=f'Mean Reward (MA {ma_window})', linewidth=2, linestyle='--', color='mediumseagreen')
521
+ ax2.set_ylabel("Mean Inter-Cluster Reward", color='mediumseagreen')
522
+ ax2.tick_params(axis='y', labelcolor='mediumseagreen')
523
+
524
+ fig.suptitle("Inter-Cluster (High-Level Agent) Rewards")
525
+ fig.legend(loc="upper left", bbox_to_anchor=(0.1, 0.9))
526
+ plt.savefig(os.path.join(plots_dir, "2_inter_cluster_rewards.png"), dpi=200)
527
+ plt.close()
528
+
529
+ # Plot 3: Total System Rewards
530
+ fig, ax = plt.subplots(figsize=(12, 7))
531
+ ax.plot(episodes, moving_avg(total_log['total'], ma_window), label=f'Total System Reward (MA {ma_window})', linewidth=2, color='tab:red')
532
+ ax.set_xlabel("Episode")
533
+ ax.set_ylabel("Total System Reward", color='tab:red')
534
+ ax.tick_params(axis='y', labelcolor='tab:red')
535
+ ax.grid(True)
536
+
537
+ ax2 = ax.twinx()
538
+ ax2.plot(episodes, moving_avg(total_log['mean'], ma_window), label=f'Mean System Reward (MA {ma_window})', linewidth=2, linestyle='--', color='salmon')
539
+ ax2.set_ylabel("Mean System Reward per Agent", color='salmon')
540
+ ax2.tick_params(axis='y', labelcolor='salmon')
541
+
542
+ fig.suptitle("Total System Rewards (Intra + Inter)")
543
+ fig.legend(loc="upper left", bbox_to_anchor=(0.1, 0.9))
544
+ plt.savefig(os.path.join(plots_dir, "3_total_system_rewards.png"), dpi=200)
545
+ plt.close()
546
+
547
+ # Plot 4: Cost Reduction
548
+ cost_df = pd.DataFrame(cost_log)
549
+ cost_df['cost_reduction_pct'] = 100 * (1 - (cost_df['total_cost'] / cost_df['cost_without_p2p'])).clip(lower=-np.inf, upper=100)
550
+ plt.figure(figsize=(12, 7))
551
+ plt.plot(episodes, moving_avg(cost_df['cost_reduction_pct'], ma_window), label=f'Cost Reduction % (MA {ma_window})', color='purple', linewidth=2)
552
+ plt.xlabel("Episode")
553
+ plt.ylabel("Cost Reduction (%)")
554
+ plt.title("Total System-Wide Cost Reduction")
555
+ plt.legend()
556
+ plt.grid(True)
557
+ plt.savefig(os.path.join(plots_dir, "4_cost_reduction.png"), dpi=200)
558
+ plt.close()
559
+
560
+ df_plot = df_final_log[pd.to_numeric(df_final_log['Episode'], errors='coerce').notna()].copy()
561
+ df_plot['Episode'] = pd.to_numeric(df_plot['Episode'])
562
+
563
+ # Plot 5: Battery Degradation Cost
564
+ plt.figure(figsize=(12, 7))
565
+ plt.plot(df_plot["Episode"], moving_avg(df_plot["battery_degradation_cost_total"], ma_window),
566
+ label=f'Degradation Cost (MA {ma_window})', color='darkgreen', linewidth=2)
567
+ plt.xlabel("Episode")
568
+ plt.ylabel("Total Degradation Cost ($)")
569
+ plt.title("Total Battery Degradation Cost")
570
+ plt.legend()
571
+ plt.grid(True)
572
+ plt.savefig(os.path.join(plots_dir, "5_battery_degradation_cost.png"), dpi=200)
573
+ plt.close()
574
+
575
+ print(f"All plots have been saved to: {plots_dir}")
576
+
577
+
578
+ if __name__ == "__main__":
579
+ main()
Other_algorithms/HC_MAPPO/cluster.py ADDED
@@ -0,0 +1,140 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import sys
3
+ import numpy as np
4
+ import torch
5
+
6
+ # Ensure project root is on the Python path
7
+ # Please ensure you follow proper directory structure for running this code
8
+ sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
9
+
10
+ from Environment.solar_sys_environment import SolarSys
11
+ from Environment.cluster_env_wrapper import GlobalPriceVecEnvWrapper
12
+ from Environment.cluster_env_wrapper import make_vec_env
13
+ class InterClusterLedger:
14
+ """
15
+ Tracks inter-cluster debts/transfers.
16
+ """
17
+ def __init__(self):
18
+ self.balances = {}
19
+
20
+ def record_transfer(self, from_id: str, to_id: str, amount: float):
21
+ if from_id == to_id: return
22
+ self.balances.setdefault(from_id, {})
23
+ self.balances.setdefault(to_id, {})
24
+ self.balances[from_id][to_id] = self.balances[from_id].get(to_id, 0.0) - amount
25
+ self.balances[to_id][from_id] = self.balances[to_id].get(from_id, 0.0) + amount
26
+
27
+ def get_balance(self, a_id: str, b_id: str) -> float:
28
+ return self.balances.get(a_id, {}).get(b_id, 0.0)
29
+
30
+ def net_balances(self) -> dict:
31
+ return self.balances
32
+
33
+
34
+ class InterClusterCoordinator:
35
+ def __init__(
36
+ self,
37
+ cluster_env,
38
+ high_level_agent,
39
+ ledger,
40
+ max_transfer_kwh: float = 1000000.0,
41
+ w_cost_savings: float = 2.0,
42
+ w_grid_penalty: float = 0.3,
43
+ w_p2p_bonus: float = 0.3
44
+ ):
45
+ self.cluster_env = cluster_env
46
+ self.agent = high_level_agent
47
+ self.ledger = ledger
48
+ self.max_transfer_kwh = max_transfer_kwh
49
+ self.w_cost_savings = w_cost_savings
50
+ self.w_grid_penalty = w_grid_penalty
51
+ self.w_p2p_bonus = w_p2p_bonus
52
+
53
+ def get_cluster_state(self, env, step_count: int) -> np.ndarray:
54
+ """
55
+ array summarizing a single cluster's state by reading from its vectorized attributes.
56
+ """
57
+ solar_env = env # This is one of the vectorized SolarSys envs
58
+ idx = min(step_count, solar_env.num_steps - 1)
59
+ agg_soc = np.sum(solar_env.battery_soc)
60
+ agg_max_capacity = np.sum(solar_env.battery_max_capacity)
61
+ agg_soc_fraction = agg_soc / agg_max_capacity if agg_max_capacity > 0 else 0.0
62
+
63
+ agg_demand = np.sum(solar_env.demands_day[idx])
64
+ agg_solar = np.sum(solar_env.solars_day[idx])
65
+
66
+ price = solar_env.get_grid_price(idx)
67
+ t_norm = idx / float(solar_env.steps_per_day)
68
+
69
+ return np.array([
70
+ agg_soc, agg_max_capacity, agg_soc_fraction,
71
+ agg_demand, agg_solar, price, t_norm
72
+ ], dtype=np.float32)
73
+
74
+ def build_transfers(self, agent_action_vector: np.ndarray, reports: dict) -> tuple[np.ndarray, np.ndarray]:
75
+ """
76
+ Acts as a centralized market maker based on agent actions and LIVE capacity reports.
77
+ """
78
+ n = len(self.cluster_env.clusters)
79
+ raw_export_prefs = agent_action_vector[:, 0]
80
+ raw_import_prefs = agent_action_vector[:, 1]
81
+
82
+ export_prefs = torch.softmax(torch.tensor(raw_export_prefs), dim=-1).numpy()
83
+ import_prefs = torch.softmax(torch.tensor(raw_import_prefs), dim=-1).numpy()
84
+
85
+ total_available_for_export = 0.0
86
+ potential_exports = np.zeros(n)
87
+ for i in range(n):
88
+ export_capacity = reports[i]['export_capacity']
89
+ pref = float(export_prefs[i])
90
+ potential_exports[i] = min(pref * self.max_transfer_kwh, export_capacity)
91
+ total_available_for_export += potential_exports[i]
92
+
93
+ total_requested_for_import = 0.0
94
+ potential_imports = np.zeros(n)
95
+ for i in range(n):
96
+ import_capacity = reports[i]['import_capacity']
97
+ pref = float(import_prefs[i])
98
+ potential_imports[i] = min(pref * self.max_transfer_kwh, import_capacity)
99
+ total_requested_for_import += potential_imports[i]
100
+
101
+ total_matched_energy = min(total_available_for_export, total_requested_for_import)
102
+ actual_exports = np.zeros(n)
103
+ actual_imports = np.zeros(n)
104
+
105
+ if total_matched_energy > 1e-6:
106
+ if total_available_for_export > 0:
107
+ actual_exports = (potential_exports / total_available_for_export) * total_matched_energy
108
+ if total_requested_for_import > 0:
109
+ actual_imports = (potential_imports / total_requested_for_import) * total_matched_energy
110
+
111
+ return actual_exports, actual_imports
112
+
113
+ def compute_inter_cluster_reward(self, all_cluster_infos: dict, actual_transfers: tuple, step_count: int) -> np.ndarray:
114
+ """
115
+ Computes an INDIVIDUAL reward for each cluster agent to solve
116
+ the credit assignment problem.
117
+ """
118
+ actual_exports, actual_imports = actual_transfers
119
+ num_clusters = len(self.cluster_env.cluster_envs)
120
+ cluster_rewards = np.zeros(num_clusters, dtype=np.float32)
121
+
122
+ # Extract per-cluster cost and import data from the batched info dict
123
+ costs_per_cluster = [np.sum(c) for c in all_cluster_infos['costs']]
124
+ baseline_imports_per_cluster = [np.sum(imp) for imp in all_cluster_infos['grid_import_no_p2p']]
125
+ actual_imports_per_cluster = [np.sum(imp) for imp in all_cluster_infos['grid_import_with_p2p']]
126
+
127
+ # Get the single grid price for the current step
128
+ grid_price = self.cluster_env.cluster_envs[0].get_grid_price(step_count)
129
+
130
+ for i in range(num_clusters):
131
+ baseline_cost_this_cluster = baseline_imports_per_cluster[i] * grid_price
132
+ actual_cost_this_cluster = costs_per_cluster[i]
133
+ cost_saved = baseline_cost_this_cluster - actual_cost_this_cluster
134
+ r_savings = self.w_cost_savings * cost_saved
135
+ r_grid = self.w_grid_penalty * actual_imports_per_cluster[i]
136
+ p2p_volume_this_cluster = actual_exports[i] + actual_imports[i]
137
+ r_p2p = self.w_p2p_bonus * p2p_volume_this_cluster
138
+ cluster_rewards[i] = r_savings + r_p2p - r_grid
139
+
140
+ return cluster_rewards
Other_algorithms/HC_MAPPO/mappo/_init_.py ADDED
File without changes
Other_algorithms/HC_MAPPO/mappo/trainer/__init__.py ADDED
File without changes
Other_algorithms/HC_MAPPO/mappo/trainer/mappo.py ADDED
@@ -0,0 +1,199 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torch.nn as nn
3
+ import random
4
+ import numpy as np
5
+ from torch.distributions import Normal
6
+
7
+ if torch.cuda.is_available():
8
+ device = torch.device("cuda")
9
+ print("Using CUDA (NVIDIA GPU)")
10
+ else:
11
+ device = torch.device("cpu")
12
+ print("Using CPU")
13
+
14
+ def set_global_seed(seed: int):
15
+ random.seed(seed)
16
+ np.random.seed(seed)
17
+ torch.manual_seed(seed)
18
+ if torch.cuda.is_available():
19
+ torch.cuda.manual_seed_all(seed)
20
+ torch.backends.cudnn.deterministic = False
21
+ torch.backends.cudnn.benchmark = True
22
+
23
+ SEED = 42
24
+ set_global_seed(SEED)
25
+
26
+ class MLP(nn.Module):
27
+ def __init__(self, input_dim, hidden_dims, output_dim):
28
+ super().__init__()
29
+ layers = []
30
+ last_dim = input_dim
31
+ for h in hidden_dims:
32
+ layers += [nn.Linear(last_dim, h), nn.ReLU()]
33
+ last_dim = h
34
+ layers.append(nn.Linear(last_dim, output_dim))
35
+ self.net = nn.Sequential(*layers)
36
+
37
+ def forward(self, x):
38
+ return self.net(x)
39
+
40
+ class Actor(nn.Module):
41
+ def __init__(self, obs_dim, act_dim, hidden=(64,64)):
42
+ super().__init__()
43
+ self.net = MLP(obs_dim, hidden, act_dim)
44
+ self.log_std = nn.Parameter(torch.zeros(act_dim))
45
+
46
+ def forward(self, x):
47
+ mean = self.net(x)
48
+ std = torch.exp(self.log_std)
49
+ return mean, std
50
+
51
+ class Critic(nn.Module):
52
+ def __init__(self, state_dim, hidden=(128,128)):
53
+ super().__init__()
54
+ self.net = MLP(state_dim, hidden, 1)
55
+
56
+ def forward(self, x):
57
+ return self.net(x).squeeze(-1)
58
+
59
+ class MAPPO:
60
+ def __init__(
61
+ self,
62
+ n_agents,
63
+ local_dim,
64
+ global_dim,
65
+ act_dim,
66
+ lr=3e-4,
67
+ gamma=0.99,
68
+ lam=0.95,
69
+ clip_eps=0.2,
70
+ k_epochs=10,
71
+ batch_size=1024,
72
+ episode_len=96
73
+ ):
74
+ self.n_agents = n_agents
75
+ self.local_dim = local_dim
76
+ self.global_dim = global_dim
77
+ self.act_dim = act_dim
78
+ self.gamma = gamma
79
+ self.lam = lam
80
+ self.clip_eps = clip_eps
81
+ self.k_epochs = k_epochs
82
+ self.batch_size = batch_size
83
+ self.episode_len = episode_len
84
+
85
+ self.actor = Actor(local_dim, act_dim).to(device)
86
+ self.critic = Critic(global_dim).to(device)
87
+
88
+ self.opt_a = torch.optim.Adam(self.actor.parameters(), lr=lr)
89
+ self.opt_c = torch.optim.Adam(self.critic.parameters(), lr=lr)
90
+
91
+ print("MAPPO CUDA AMP is disabled for stability.")
92
+
93
+ self.init_buffer()
94
+
95
+ def init_buffer(self):
96
+ self.ls_buf = np.zeros((self.episode_len, self.n_agents, self.local_dim), dtype=np.float16)
97
+ self.gs_buf = np.zeros((self.episode_len, self.global_dim), dtype=np.float16)
98
+ self.ac_buf = np.zeros((self.episode_len, self.n_agents, self.act_dim), dtype=np.float16)
99
+ self.lp_buf = np.zeros((self.episode_len, self.n_agents), dtype=np.float16)
100
+ self.rw_buf = np.zeros((self.episode_len, self.n_agents), dtype=np.float16)
101
+ self.done_buf = np.zeros((self.episode_len, self.n_agents), dtype=np.float16)
102
+ self.next_gs_buf = np.zeros((self.episode_len, self.global_dim), dtype=np.float16)
103
+ self.step_idx = 0
104
+
105
+ @torch.no_grad()
106
+ def select_action(self, local_obs, global_obs):
107
+ l = torch.from_numpy(local_obs).float().to(device)
108
+ mean, std = self.actor(l)
109
+ dist = Normal(mean, std)
110
+ a = dist.sample()
111
+ return a.cpu().numpy(), dist.log_prob(a).sum(-1).cpu().numpy()
112
+
113
+ def store(self, local_obs, global_obs, action, logp, reward, done, next_global_obs):
114
+ if self.step_idx < self.episode_len:
115
+ self.ls_buf[self.step_idx] = local_obs
116
+ self.gs_buf[self.step_idx] = global_obs
117
+ self.ac_buf[self.step_idx] = action
118
+ self.lp_buf[self.step_idx] = logp
119
+ self.rw_buf[self.step_idx] = reward
120
+ self.done_buf[self.step_idx] = done
121
+ self.next_gs_buf[self.step_idx] = next_global_obs
122
+ self.step_idx += 1
123
+
124
+ def compute_gae(self, T, vals):
125
+ N = self.n_agents
126
+ vals_agent = vals.unsqueeze(1).expand(-1, N).cpu().numpy()
127
+ next_vals_agent = np.zeros_like(vals_agent)
128
+ next_vals_agent[:-1] = vals_agent[1:]
129
+ if not self.done_buf[T-1].all():
130
+ with torch.no_grad():
131
+ v_last = self.critic(
132
+ torch.from_numpy(self.next_gs_buf[T-1]).float().to(device)
133
+ ).cpu().item()
134
+ next_vals_agent[T-1, :] = v_last
135
+ masks = 1.0 - self.done_buf[:T]
136
+ rewards = self.rw_buf[:T]
137
+ adv = rewards + self.gamma * next_vals_agent * masks - vals_agent
138
+ ret = adv + vals_agent
139
+ adv_flat = torch.from_numpy(adv.flatten()).to(device)
140
+ ret_flat = torch.from_numpy(ret.flatten()).to(device)
141
+ return adv_flat, ret_flat
142
+
143
+ def update(self):
144
+ T = self.step_idx
145
+ if T == 0: return
146
+
147
+ gs_tensor = torch.from_numpy(self.gs_buf[:T]).float().to(device)
148
+ ls_tensor = torch.from_numpy(self.ls_buf[:T]).float().to(device).view(T * self.n_agents, -1)
149
+ ac_tensor = torch.from_numpy(self.ac_buf[:T]).float().to(device).view(T * self.n_agents, -1)
150
+ lp_tensor = torch.from_numpy(self.lp_buf[:T]).float().to(device).view(-1)
151
+
152
+ with torch.no_grad():
153
+ vals = self.critic(gs_tensor)
154
+
155
+ adv_flat, ret_flat = self.compute_gae(T, vals)
156
+ adv_flat = (adv_flat - adv_flat.mean()) / (adv_flat.std() + 1e-8)
157
+
158
+ gs_for_batch = gs_tensor.unsqueeze(1).expand(-1, self.n_agents, -1).reshape(T * self.n_agents, self.global_dim)
159
+
160
+ dataset = torch.utils.data.TensorDataset(ls_tensor, gs_for_batch, ac_tensor, lp_tensor, adv_flat, ret_flat)
161
+ gen = torch.Generator()
162
+ gen.manual_seed(SEED)
163
+ loader = torch.utils.data.DataLoader(dataset, batch_size=self.batch_size, shuffle=True, generator=gen)
164
+
165
+ for _ in range(self.k_epochs):
166
+ for b_ls, b_gs, b_ac, b_lp, b_adv, b_ret in loader:
167
+ mean, std = self.actor(b_ls)
168
+ dist = Normal(mean, std)
169
+
170
+ entropy = dist.entropy().mean()
171
+
172
+ lp_new = dist.log_prob(b_ac).sum(-1)
173
+ ratio = torch.exp(lp_new - b_lp)
174
+ surr1 = ratio * b_adv
175
+ surr2 = torch.clamp(ratio, 1 - self.clip_eps, 1 + self.clip_eps) * b_adv
176
+
177
+ actor_loss = -torch.min(surr1, surr2).mean() - 0.01 * entropy
178
+
179
+ self.opt_a.zero_grad()
180
+ actor_loss.backward()
181
+ self.opt_a.step()
182
+
183
+ val_pred = self.critic(b_gs)
184
+ critic_loss = nn.MSELoss()(val_pred, b_ret)
185
+
186
+ self.opt_c.zero_grad()
187
+ critic_loss.backward()
188
+ self.opt_c.step()
189
+
190
+ self.step_idx = 0
191
+
192
+ def save(self, path):
193
+ torch.save({'actor': self.actor.state_dict(),
194
+ 'critic': self.critic.state_dict()}, path)
195
+
196
+ def load(self, path):
197
+ data = torch.load(path, map_location=device)
198
+ self.actor.load_state_dict(data['actor'])
199
+ self.critic.load_state_dict(data['critic'])