SolarSys2025 commited on
Commit
c8c676e
·
verified ·
1 Parent(s): 0638815

Upload 12 files

Browse files
SolarSys/.DS_Store ADDED
Binary file (6.15 kB). View file
 
SolarSys/Environment/cluster_env_wrapper.py ADDED
@@ -0,0 +1,164 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gym
2
+ import numpy as np
3
+ import math
4
+ import sys
5
+ import os
6
+ import functools
7
+
8
+ import pandas as pd
9
+
10
+ # Ensure SolarSys Environement is on the Python path
11
+ # Please ensure you follow proper directory structure for running this code
12
+ sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
13
+ from Environment.solar_sys_environment import SolarSys
14
+
15
+
16
+ def form_clusters(metrics: dict, size: int) -> list:
17
+ """
18
+ Forms balanced, heterogeneous clusters by categorizing houses based on their
19
+ energy profile and distributing them evenly in a round-robin fashion.
20
+ """
21
+ house_ids = list(metrics.keys())
22
+ if not house_ids:
23
+ return []
24
+ all_consumption = [m['consumption'] for m in metrics.values()]
25
+ all_solar = [m['solar'] for m in metrics.values()]
26
+
27
+ median_consumption = np.median(all_consumption) if all_consumption else 0
28
+ median_solar = np.median(all_solar) if all_solar else 0
29
+
30
+ #Categorize each house based on its profile relative to the median
31
+ producers = [h for h in house_ids if metrics[h]['solar'] >= median_solar and metrics[h]['consumption'] < median_consumption]
32
+ consumers = [h for h in house_ids if metrics[h]['solar'] < median_solar and metrics[h]['consumption'] >= median_consumption]
33
+ prosumers = [h for h in house_ids if metrics[h]['solar'] >= median_solar and metrics[h]['consumption'] >= median_consumption]
34
+ neutrals = [h for h in house_ids if metrics[h]['solar'] < median_solar and metrics[h]['consumption'] < median_consumption]
35
+
36
+ # Create a master list ordered by category
37
+ sorted_categorized_houses = producers + consumers + prosumers + neutrals
38
+
39
+ # Add any houses that weren't categorized to ensure none are missed
40
+ categorized_set = set(sorted_categorized_houses)
41
+ uncategorized = [h for h in house_ids if h not in categorized_set]
42
+ final_house_list = sorted_categorized_houses + uncategorized
43
+ num_houses = len(house_ids)
44
+ num_clusters = math.ceil(num_houses / size)
45
+
46
+ clusters = [[] for _ in range(num_clusters)]
47
+
48
+ for i, house_id in enumerate(final_house_list):
49
+ target_cluster_idx = i % num_clusters
50
+ clusters[target_cluster_idx].append(house_id)
51
+
52
+ return clusters
53
+
54
+ class GlobalPriceVecEnvWrapper(gym.vector.VectorEnvWrapper):
55
+ def __init__(self, env, clusters: list):
56
+ super().__init__(env)
57
+ self.clusters = clusters
58
+ # Expose the underlying SolarSys environments for inspection by the coordinator
59
+ # self.env.envs gets the list of individual envs from the SyncVectorEnv
60
+ self.cluster_envs = self.env.envs
61
+
62
+ def step(self, actions: np.ndarray, exports: np.ndarray = None, imports: np.ndarray = None):
63
+ num_clusters = len(self.cluster_envs)
64
+ net_transfers = np.zeros(num_clusters)
65
+ if exports is not None and imports is not None:
66
+ net_transfers = imports - exports
67
+ batched_low_level_actions = actions
68
+ batched_transfers = net_transfers.reshape(-1, 1).astype(np.float32)
69
+ batched_prices = np.full((num_clusters, 1), -1.0, dtype=np.float32)
70
+ final_packed_actions_tuple = (batched_low_level_actions, batched_transfers, batched_prices)
71
+ obs_next, rewards, terminateds, truncateds, infos = self.env.step(final_packed_actions_tuple)
72
+ dones = terminateds | truncateds
73
+ done_all = dones.all()
74
+
75
+
76
+
77
+ if done_all:
78
+ final_infos = infos['final_info']
79
+ keys = final_infos[0].keys()
80
+ infos = {k: np.stack([info[k] for info in final_infos]) for k in keys}
81
+
82
+ info_agg = {
83
+ "cluster_dones": dones,
84
+ "cluster_infos": infos,
85
+ }
86
+
87
+ return obs_next, rewards, done_all, info_agg
88
+
89
+ def get_export_capacity(self, cluster_idx: int) -> float:
90
+ """Returns the total physically exportable energy from a cluster's batteries and solar in kWh."""
91
+ cluster_env = self.cluster_envs[cluster_idx]
92
+ available_from_batt = cluster_env.battery_soc * cluster_env.battery_discharge_efficiency
93
+ total_exportable = np.sum(available_from_batt) + cluster_env.current_solar
94
+ return float(total_exportable)
95
+
96
+ def get_import_capacity(self, cluster_idx: int) -> float:
97
+ """Returns the total physically importable space in a cluster's batteries in kWh."""
98
+ cluster_env = self.cluster_envs[cluster_idx]
99
+ free_space = cluster_env.battery_max_capacity - cluster_env.battery_soc
100
+ total_storable = np.sum(free_space)
101
+ return float(total_storable)
102
+
103
+ def send_energy(self, from_cluster_idx: int, amount: float) -> float:
104
+ """Drains 'amount' of energy from the specified cluster (batteries first, then solar)."""
105
+ cluster_env = self.cluster_envs[from_cluster_idx]
106
+ return cluster_env.send_energy(amount)
107
+
108
+ def receive_energy(self, to_cluster_idx: int, amount: float) -> float:
109
+ """Charges batteries in the specified cluster with 'amount' of energy."""
110
+ cluster_env = self.cluster_envs[to_cluster_idx]
111
+ return cluster_env.receive_energy(amount)
112
+
113
+
114
+ def make_vec_env(data_path: str, time_freq: str, cluster_size: int, state: str):
115
+ print("--- Pre-loading shared dataset for all environments ---")
116
+ try:
117
+ shared_df = pd.read_csv(data_path)
118
+ shared_df["local_15min"] = pd.to_datetime(shared_df["local_15min"], utc=True)
119
+ shared_df.set_index("local_15min", inplace=True)
120
+
121
+ # ADD THIS LINE
122
+ shared_df = shared_df.resample(time_freq).mean()
123
+ # ADD THIS LINE
124
+
125
+ except Exception as e:
126
+ raise ValueError(f"Failed to pre-load data in make_vec_env: {e}")
127
+
128
+ base_env_for_metrics = SolarSys(
129
+ data_path=data_path,
130
+ time_freq=time_freq,
131
+ preloaded_data=shared_df, # Pass the shared DataFrame here
132
+ state=state
133
+ )
134
+
135
+ # This part for calculating metrics and forming clusters
136
+ metrics = {}
137
+ for hid in base_env_for_metrics.house_ids:
138
+ total_consumption = float(
139
+ np.clip(base_env_for_metrics.original_no_p2p_import[hid], 0.0, None).sum()
140
+ )
141
+ total_solar = float(
142
+ base_env_for_metrics.all_data[f"total_solar_{hid}"].clip(lower=0.0).sum()
143
+ )
144
+ metrics[hid] = {'consumption': total_consumption, 'solar': total_solar}
145
+
146
+ clusters = form_clusters(metrics, cluster_size)
147
+ print(f"Formed {len(clusters)} clusters of size up to {cluster_size}.")
148
+
149
+ # functools.partial to create environment
150
+ env_fns = []
151
+ for cluster_house_ids in clusters:
152
+ preset_env_fn = functools.partial(
153
+ SolarSys,
154
+ data_path=data_path,
155
+ time_freq=time_freq,
156
+ house_ids_in_cluster=cluster_house_ids,
157
+ preloaded_data=shared_df,
158
+ state=state
159
+ )
160
+ env_fns.append(preset_env_fn)
161
+ sync_vec_env = gym.vector.SyncVectorEnv(env_fns)
162
+ wrapped_vec_env = GlobalPriceVecEnvWrapper(sync_vec_env, clusters=clusters)
163
+
164
+ return wrapped_vec_env
SolarSys/Environment/solar_sys_environment.py ADDED
@@ -0,0 +1,673 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gym
2
+ import pandas as pd
3
+ import numpy as np
4
+ from collections import deque
5
+ import random
6
+ from gym.spaces import Tuple, Box
7
+
8
+ random.seed(42)
9
+ np.random.seed(42)
10
+
11
+ class SolarSys(gym.Env):
12
+
13
+ def __init__(
14
+ self,
15
+ data_path="DATA/training/25houses_152days_TRAIN.csv",
16
+ state="", # Select from 'oklahoma', 'colorado', 'pennsylvania'
17
+ time_freq="15T",
18
+ house_ids_in_cluster=None,
19
+ preloaded_data=None
20
+
21
+ ):
22
+
23
+ super().__init__() # initialize parent gym.Env
24
+ self.state = state.lower()
25
+
26
+ # --- Centralized Pricing Configuration ---
27
+ self._pricing_info = {
28
+ "oklahoma": {
29
+ "max_grid_price": 0.2112,
30
+ "feed_in_tariff": 0.04,
31
+ "price_function": self._get_oklahoma_price
32
+ },
33
+ "colorado": {
34
+ "max_grid_price": 0.32,
35
+ "feed_in_tariff": 0.055,
36
+ "price_function": self._get_colorado_price
37
+ },
38
+ "pennsylvania": {
39
+ "max_grid_price": 0.5505,
40
+ "feed_in_tariff": 0.06,
41
+ "price_function": self._get_pennsylvania_price
42
+ }
43
+ }
44
+
45
+ if self.state not in self._pricing_info:
46
+ raise ValueError(f"State '{self.state}' is not supported. Available states: {list(self._pricing_info.keys())}")
47
+
48
+ state_config = self._pricing_info[self.state]
49
+ self.max_grid_price = state_config["max_grid_price"]
50
+ self.feed_in_tariff = state_config["feed_in_tariff"]
51
+ self._get_price_function = state_config["price_function"]
52
+ self.data_path = data_path
53
+ self.time_freq = time_freq
54
+ if preloaded_data is not None:
55
+ all_data = preloaded_data
56
+ if house_ids_in_cluster:
57
+ print(f"Using pre-loaded data for cluster with {len(house_ids_in_cluster)} houses.")
58
+ else:
59
+ print(f"Loading data from {data_path}...")
60
+ try:
61
+ all_data = pd.read_csv(data_path)
62
+ all_data["local_15min"] = pd.to_datetime(all_data["local_15min"], utc=True)
63
+ all_data.set_index("local_15min", inplace=True)
64
+
65
+ except FileNotFoundError:
66
+ raise FileNotFoundError(f"Data file {data_path} not found.")
67
+ except pd.errors.EmptyDataError:
68
+ raise ValueError(f"Data file {data_path} is empty.")
69
+ except Exception as e:
70
+ raise ValueError(f"Error loading data: {e}")
71
+
72
+
73
+ # Compute global maxima for normalization
74
+ grid_cols = [c for c in all_data.columns if c.startswith("grid_")]
75
+ solar_cols = [c for c in all_data.columns if c.startswith("total_solar_")]
76
+ all_grid = all_data[grid_cols].values
77
+ all_solar = all_data[solar_cols].values
78
+
79
+ # max total demand = max(grid + solar) over all time & agents
80
+ self.global_max_demand = float((all_grid + all_solar).max()) + 1e-8
81
+
82
+ # max solar generation alone
83
+ self.global_max_solar = float(all_solar.max()) + 1e-8
84
+
85
+ # Store the resampled dataset
86
+ self.all_data = all_data
87
+ all_house_ids_in_file = [
88
+ col.split("_")[1] for col in self.all_data.columns
89
+ if col.startswith("grid_")
90
+ ]
91
+ if house_ids_in_cluster:
92
+ self.house_ids = [hid for hid in house_ids_in_cluster if hid in all_house_ids_in_file]
93
+ else:
94
+ self.house_ids = all_house_ids_in_file
95
+
96
+ if not self.house_ids:
97
+ raise ValueError("No valid house_ids found for this environment instance.")
98
+
99
+ self.env_log_infos = []
100
+
101
+ self.time_freq = time_freq
102
+ freq_offset = pd.tseries.frequencies.to_offset(time_freq)
103
+ minutes_per_step = freq_offset.nanos / 1e9 / 60.0
104
+ self.steps_per_day = int(24 * 60 // minutes_per_step)
105
+
106
+ total_rows = len(self.all_data)
107
+ self.total_days = total_rows // self.steps_per_day
108
+ if self.total_days < 1:
109
+ raise ValueError(
110
+ f"After resampling, dataset has {total_rows} rows, which is "
111
+ f"less than a single day of {self.steps_per_day} steps."
112
+ )
113
+
114
+ self.num_agents = len(self.house_ids)
115
+ self.original_no_p2p_import = {}
116
+ for hid in self.house_ids:
117
+ col_grid = f"grid_{hid}"
118
+ self.original_no_p2p_import[hid] = self.all_data[col_grid].clip(lower=0.0).values
119
+ solar_cols = [f"total_solar_{hid}" for hid in self.house_ids]
120
+ solar_sums = self.all_data[solar_cols].sum(axis=0).to_dict()
121
+ self.agent_groups = [
122
+ 1 if solar_sums[f"total_solar_{hid}"] > 0 else 0
123
+ for hid in self.house_ids
124
+ ]
125
+
126
+ self.group_counts = {
127
+ 0: self.agent_groups.count(0),
128
+ 1: self.agent_groups.count(1)
129
+ }
130
+ print(f"Number of houses in each group: {self.group_counts}")
131
+
132
+ #battery logic
133
+ self.battery_options = {
134
+ "teslapowerwall": {"max_capacity": 13.5, "charge_efficiency": 0.95, "discharge_efficiency": 0.90, "max_charge_rate": 5.0, "max_discharge_rate": 5.0, "degradation_cost_per_kwh": 0.005},
135
+ "enphase": {"max_capacity": 5.0, "charge_efficiency": 0.95, "discharge_efficiency": 0.90, "max_charge_rate": 2.0, "max_discharge_rate": 2.0, "degradation_cost_per_kwh": 0.005},
136
+ "franklin": {"max_capacity": 15.0, "charge_efficiency": 0.95, "discharge_efficiency": 0.90, "max_charge_rate": 6.0, "max_discharge_rate": 6.0, "degradation_cost_per_kwh": 0.005},
137
+ }
138
+ self.solar_houses = [
139
+ hid for hid in self.house_ids
140
+ if (self.all_data[f"total_solar_{hid}"] > 0).any()
141
+ ]
142
+
143
+ self.batteries = {}
144
+ for hid in self.solar_houses:
145
+ choice = random.choice(list(self.battery_options))
146
+ specs = self.battery_options[choice]
147
+ self.batteries[hid] = {"soc": 0.0, **specs}
148
+
149
+ self.battery_charge_history = {hid: [] for hid in self.batteries}
150
+ self.battery_discharge_history = {hid: [] for hid in self.batteries}
151
+ self.battery_capacity = sum(b["max_capacity"] for b in self.batteries.values())
152
+ self.battery_level = sum(b["soc"] for b in self.batteries.values())
153
+ self.current_solar = 0.0
154
+ self.has_battery = np.array([1 if hid in self.batteries else 0 for hid in self.house_ids], dtype=np.float32)
155
+
156
+ # Initialize arrays for all agents, with zeros for non-battery agents
157
+ self.battery_soc = np.zeros(self.num_agents, dtype=np.float32)
158
+ self.battery_max_capacity = np.zeros(self.num_agents, dtype=np.float32)
159
+ self.battery_charge_efficiency = np.zeros(self.num_agents, dtype=np.float32)
160
+ self.battery_discharge_efficiency = np.zeros(self.num_agents, dtype=np.float32)
161
+ self.battery_max_charge_rate = np.zeros(self.num_agents, dtype=np.float32)
162
+ self.battery_max_discharge_rate = np.zeros(self.num_agents, dtype=np.float32)
163
+ self.battery_degradation_cost = np.zeros(self.num_agents, dtype=np.float32)
164
+
165
+ # Populate the arrays using the created battery dictionary
166
+ for i, hid in enumerate(self.house_ids):
167
+ if hid in self.batteries:
168
+ batt = self.batteries[hid]
169
+ self.battery_max_capacity[i] = batt["max_capacity"]
170
+ self.battery_charge_efficiency[i] = batt["charge_efficiency"]
171
+ self.battery_discharge_efficiency[i] = batt["discharge_efficiency"]
172
+ self.battery_max_charge_rate[i] = batt["max_charge_rate"]
173
+ self.battery_max_discharge_rate[i] = batt["max_discharge_rate"]
174
+ self.battery_degradation_cost[i] = batt["degradation_cost_per_kwh"]
175
+
176
+
177
+ # ========== SPACES (Observation & Action) ===================================
178
+ self.observation_space = gym.spaces.Box(
179
+ low=-np.inf, high=np.inf,
180
+ shape=(self.num_agents, 8),
181
+ dtype=np.float32
182
+ )
183
+ self.action_space = Tuple((
184
+ Box(low=0.0, high=1.0, shape=(self.num_agents, 6), dtype=np.float32),
185
+ Box(low=-np.inf, high=np.inf, shape=(1,), dtype=np.float32),
186
+ Box(low=-1.0, high=np.inf, shape=(1,), dtype=np.float32)
187
+ ))
188
+
189
+ # ========== REWARD FUNCTION PARAMETERS ======================================
190
+ self.data = None
191
+ self.env_log = []
192
+ self.day_index = -1
193
+ self.current_step = 0
194
+ self.num_steps = self.steps_per_day
195
+ self.demands = {}
196
+ self.solars = {}
197
+ self.previous_actions = {
198
+ hid: np.zeros(6) for hid in self.house_ids
199
+ }
200
+ self._initialize_episode_metrics()
201
+
202
+ def get_grid_price(self, step_idx):
203
+ """
204
+ Returns the grid price for the current step based on the selected state.
205
+ """
206
+ return self._get_price_function(step_idx)
207
+
208
+ def _get_oklahoma_price(self, step_idx):
209
+ minutes_per_step = 24 * 60 / self.steps_per_day
210
+ hour = int((step_idx * minutes_per_step) // 60) % 24
211
+ if 14 <= hour < 19:
212
+ return 0.2112
213
+ else:
214
+ return 0.0434
215
+
216
+ def _get_colorado_price(self, step_idx):
217
+ minutes_per_step = 24 * 60 / self.steps_per_day
218
+ hour = int((step_idx * minutes_per_step) // 60) % 24
219
+ if 15 <= hour < 19:
220
+ return 0.32
221
+ elif 13 <= hour < 15:
222
+ return 0.22
223
+ else:
224
+ return 0.12
225
+
226
+ def _get_pennsylvania_price(self, step_idx):
227
+ minutes_per_step = 24 * 60 / self.steps_per_day
228
+ hour = int((step_idx * minutes_per_step) // 60) % 24
229
+ if 13 <= hour < 21:
230
+ return 0.125048
231
+ elif hour >= 23 or hour < 6:
232
+ return 0.057014
233
+ else:
234
+ return 0.079085
235
+
236
+ def get_peer_price(self, step_idx, total_surplus, total_shortfall):
237
+ grid_price = self.get_grid_price(step_idx)
238
+ feed_in_tariff = self.feed_in_tariff
239
+
240
+ # Parameters for arctangent-log pricing
241
+ p_balance = (grid_price * 0.80) + (feed_in_tariff * 0.20)
242
+ p_con = (grid_price - feed_in_tariff) / (1.5 * np.pi)
243
+ k = 1.5
244
+ epsilon = 1e-6
245
+ supply = total_surplus + epsilon
246
+ demand = total_shortfall + epsilon
247
+
248
+ ratio = demand / supply
249
+ log_ratio = np.log(ratio)
250
+ if log_ratio < 0:
251
+ power_term = - (np.abs(log_ratio) ** k)
252
+ else:
253
+ power_term = log_ratio ** k
254
+
255
+ price_offset = 2 * np.pi * p_con * np.arctan(power_term)
256
+
257
+ peer_price = p_balance + price_offset
258
+
259
+ final_price = float(np.clip(peer_price, feed_in_tariff, grid_price))
260
+
261
+ return final_price
262
+
263
+
264
+ def _initialize_episode_metrics(self):
265
+ """Initializes or resets all metrics tracked over a single episode (day)."""
266
+ self.cumulative_grid_reduction = 0.0
267
+ self.cumulative_grid_reduction_peak = 0.0
268
+ self.cumulative_degradation_cost = 0.0
269
+ self.agent_cost_savings = np.zeros(self.num_agents)
270
+ self.degradation_cost_timeseries = []
271
+ self.cost_savings_timeseries = []
272
+ self.grid_reduction_timeseries = []
273
+
274
+ def get_episode_metrics(self):
275
+ """
276
+ Returns a dictionary of performance metrics for the last completed episode.
277
+ """
278
+ return self.episode_metrics
279
+
280
+ ##########################################################################
281
+ # Gym Required Methods
282
+
283
+ def reset(self):
284
+ if self.current_step > 0:
285
+ positive_savings = self.agent_cost_savings[self.agent_cost_savings > 0]
286
+ if len(positive_savings) > 1:
287
+ fairness_on_savings = self._compute_jains_index(positive_savings)
288
+ else:
289
+ fairness_on_savings = 0.0
290
+
291
+ self.episode_metrics = {
292
+ "grid_reduction_entire_day": self.cumulative_grid_reduction,
293
+ "grid_reduction_peak_hours": self.cumulative_grid_reduction_peak,
294
+ "total_cost_savings": np.sum(self.agent_cost_savings),
295
+ "fairness_on_cost_savings": fairness_on_savings,
296
+ "battery_degradation_cost_total": self.cumulative_degradation_cost,
297
+ "degradation_cost_over_time": self.degradation_cost_timeseries,
298
+ "cost_savings_over_time": self.cost_savings_timeseries,
299
+ "grid_reduction_over_time": self.grid_reduction_timeseries,
300
+ }
301
+ self.day_index = np.random.randint(0, self.total_days)
302
+
303
+ start_row = self.day_index * self.steps_per_day
304
+ end_row = start_row + self.steps_per_day
305
+ day_data = self.all_data.iloc[start_row:end_row].copy()
306
+ self.data = day_data
307
+
308
+ self.no_p2p_import_day = {}
309
+ for hid in self.house_ids:
310
+ self.no_p2p_import_day[hid] = self.original_no_p2p_import[hid][start_row:end_row]
311
+
312
+ demand_list = []
313
+ solar_list = []
314
+ for hid in self.house_ids:
315
+ col_grid = f"grid_{hid}"
316
+ col_solar = f"total_solar_{hid}"
317
+
318
+ grid_series = day_data[col_grid].fillna(0.0)
319
+ solar_series = day_data[col_solar].fillna(0.0).clip(lower=0.0)
320
+
321
+ demand_array = grid_series.values + solar_series.values
322
+ demand_array = np.clip(demand_array, 0.0, None)
323
+
324
+ demand_list.append(demand_array)
325
+ solar_list.append(solar_series.values)
326
+
327
+ self.demands_day = np.stack(demand_list, axis=1).astype(np.float32)
328
+ self.solars_day = np.stack(solar_list, axis=1).astype(np.float32)
329
+
330
+ self.hours_day = (self.data.index.hour + self.data.index.minute / 60.0).values
331
+
332
+ self.current_step = 0
333
+ self.env_log = []
334
+ for hid in self.house_ids:
335
+ self.previous_actions[hid] = np.zeros(6)
336
+
337
+ lows = 0.30 * self.battery_max_capacity
338
+ highs = 0.70 * self.battery_max_capacity
339
+
340
+ self.battery_soc = np.random.uniform(low=lows, high=highs)
341
+ self.battery_soc *= self.has_battery
342
+
343
+ initial_demands = self.demands_day[0]
344
+ initial_solars = self.solars_day[0]
345
+ initial_surplus = np.maximum(initial_solars - initial_demands, 0.0).sum()
346
+ initial_shortfall = np.maximum(initial_demands - initial_solars, 0.0).sum()
347
+ initial_peer_price = self.get_peer_price(0, initial_surplus, initial_shortfall)
348
+
349
+ obs = self._get_obs(peer_price=initial_peer_price)
350
+
351
+ self._initialize_episode_metrics()
352
+
353
+ return obs, {}
354
+
355
+ def step(self, packed_action):
356
+ actions, transfer_kwh_arr, peer_price_arr = packed_action
357
+ inter_cluster_transfer_kwh = float(transfer_kwh_arr[0])
358
+ override_peer_price_val = float(peer_price_arr[0])
359
+
360
+ override_peer_price = override_peer_price_val if override_peer_price_val >= 0 else None
361
+
362
+ actions = np.array(actions, dtype=np.float32)
363
+ if actions.shape != (self.num_agents, 6):
364
+ raise ValueError(f"Actions shape mismatch: got {actions.shape}, expected {(self.num_agents, 6)}")
365
+ actions = np.clip(actions, 0.0, 1.0)
366
+
367
+ a_sellGrid = actions[:, 0]
368
+ a_buyGrid = actions[:, 1]
369
+ a_sellPeers = actions[:, 2]
370
+ a_buyPeers = actions[:, 3]
371
+ a_chargeBatt = actions[:, 4]
372
+ a_dischargeBatt = actions[:, 5]
373
+
374
+
375
+ demands = self.demands_day[self.current_step]
376
+ solars = self.solars_day[self.current_step]
377
+
378
+ total_surplus = np.maximum(solars - demands, 0.0).sum()
379
+ total_shortfall = np.maximum(demands - solars, 0.0).sum()
380
+ self.current_solar = total_surplus
381
+
382
+ if override_peer_price is not None:
383
+ peer_price = override_peer_price
384
+ else:
385
+ peer_price = self.get_peer_price(
386
+ self.current_step,
387
+ total_surplus,
388
+ total_shortfall
389
+ )
390
+
391
+ grid_price = self.get_grid_price(self.current_step)
392
+
393
+ shortfall = np.maximum(demands - solars, 0.0)
394
+ surplus = np.maximum(solars - demands, 0.0)
395
+
396
+ final_shortfall = shortfall.copy()
397
+ final_surplus = surplus.copy()
398
+ grid_import = np.zeros(self.num_agents, dtype=np.float32)
399
+ grid_export = np.zeros(self.num_agents, dtype=np.float32)
400
+
401
+ # ### VECTORIZED BATTERY DISCHARGE ###
402
+ available_from_batt = self.battery_soc * self.battery_discharge_efficiency
403
+ desired_discharge = a_dischargeBatt * self.battery_max_discharge_rate
404
+ discharge_amount = np.minimum.reduce([desired_discharge, available_from_batt, final_shortfall])
405
+ discharge_amount *= self.has_battery # Ensure only batteries discharge
406
+
407
+ # Update SOC (energy drawn from battery before efficiency loss)
408
+ self.battery_soc -= (discharge_amount / (self.battery_discharge_efficiency + 1e-9)) * self.has_battery
409
+ self.battery_soc = np.maximum(0.0, self.battery_soc)
410
+ final_shortfall -= discharge_amount
411
+
412
+ cap_left = self.battery_max_capacity - self.battery_soc
413
+ desired_charge = a_chargeBatt * self.battery_max_charge_rate
414
+ charge_amount = np.minimum.reduce([
415
+ desired_charge,
416
+ cap_left / (self.battery_charge_efficiency + 1e-9),
417
+ final_surplus
418
+ ])
419
+ charge_amount *= self.has_battery
420
+
421
+ # Update SOC
422
+ self.battery_soc += charge_amount * self.battery_charge_efficiency
423
+ final_surplus -= charge_amount
424
+
425
+
426
+
427
+ # ### VECTORIZED P2P TRADING ###
428
+ battery_offer = (self.battery_soc * self.battery_discharge_efficiency) * self.has_battery
429
+ effective_surplus = final_surplus + battery_offer
430
+
431
+ netPeer = a_buyPeers - a_sellPeers
432
+ p2p_buy_request = np.maximum(0, netPeer) * final_shortfall
433
+ p2p_sell_offer = np.maximum(0, -netPeer) * effective_surplus
434
+
435
+ total_sell = np.sum(p2p_sell_offer)
436
+ total_buy = np.sum(p2p_buy_request)
437
+ matched = min(total_sell, total_buy)
438
+
439
+ if matched > 1e-9:
440
+ sell_fraction = p2p_sell_offer / (total_sell + 1e-12)
441
+ buy_fraction = p2p_buy_request / ( total_buy + 1e-12)
442
+ actual_sold = matched * sell_fraction
443
+ actual_bought = matched * buy_fraction
444
+ else:
445
+ actual_sold = np.zeros(self.num_agents, dtype=np.float32)
446
+ actual_bought = np.zeros(self.num_agents, dtype=np.float32)
447
+
448
+
449
+ from_batt = np.minimum(actual_sold, battery_offer)
450
+ from_solar = actual_sold - from_batt
451
+
452
+ final_surplus -= from_solar
453
+
454
+ final_shortfall -= actual_bought
455
+ soc_reduction = (from_batt / (self.battery_discharge_efficiency + 1e-9)) * self.has_battery
456
+ self.battery_soc -= soc_reduction
457
+ self.battery_soc = np.maximum(0.0, self.battery_soc)
458
+
459
+
460
+ if inter_cluster_transfer_kwh > 0:
461
+ amount_received = inter_cluster_transfer_kwh
462
+
463
+
464
+ total_shortfall_in_cluster = np.sum(final_shortfall)
465
+ if total_shortfall_in_cluster > 1e-6:
466
+
467
+ to_cover_shortfall = min(amount_received, total_shortfall_in_cluster)
468
+ distribution_ratio = final_shortfall / total_shortfall_in_cluster
469
+ shortfall_reduction = distribution_ratio * to_cover_shortfall
470
+ final_shortfall -= shortfall_reduction
471
+
472
+ amount_received -= to_cover_shortfall
473
+
474
+ if amount_received > 1e-6:
475
+
476
+ cap_left = self.battery_max_capacity - self.battery_soc
477
+ storable_energy = cap_left / (self.battery_charge_efficiency + 1e-9)
478
+ total_storable_in_cluster = np.sum(storable_energy * self.has_battery)
479
+
480
+ if total_storable_in_cluster > 1e-6:
481
+
482
+ to_store = min(amount_received, total_storable_in_cluster)
483
+
484
+
485
+ storage_ratio = storable_energy / total_storable_in_cluster
486
+ energy_to_store_per_batt = storage_ratio * to_store
487
+
488
+
489
+ self.battery_soc += (energy_to_store_per_batt * self.battery_charge_efficiency) * self.has_battery
490
+
491
+ elif inter_cluster_transfer_kwh < 0:
492
+ amount_to_send = abs(inter_cluster_transfer_kwh)
493
+
494
+
495
+ total_surplus_in_cluster = np.sum(final_surplus)
496
+ if total_surplus_in_cluster > 1e-6:
497
+
498
+ sent_from_surplus = min(amount_to_send, total_surplus_in_cluster)
499
+ draw_ratio = final_surplus / total_surplus_in_cluster
500
+ surplus_reduction = draw_ratio * sent_from_surplus
501
+ final_surplus -= surplus_reduction
502
+ amount_to_send -= sent_from_surplus
503
+
504
+
505
+ if amount_to_send > 1e-6:
506
+
507
+ available_from_batt = (self.battery_soc * self.battery_discharge_efficiency) * self.has_battery
508
+ total_available_from_batt = np.sum(available_from_batt)
509
+
510
+ if total_available_from_batt > 1e-6:
511
+ # Discharge a maximum of 'amount_to_send' from batteries
512
+ to_discharge = min(amount_to_send, total_available_from_batt)
513
+
514
+ # Draw this amount proportionally from each available battery
515
+ discharge_ratio = available_from_batt / total_available_from_batt
516
+ discharged_per_batt = discharge_ratio * to_discharge # This is effective energy
517
+
518
+ # Update SoC (energy drawn from battery before efficiency loss)
519
+ soc_reduction = (discharged_per_batt / (self.battery_discharge_efficiency + 1e-9))
520
+ self.battery_soc -= soc_reduction * self.has_battery
521
+ self.battery_soc = np.maximum(0.0, self.battery_soc)
522
+ # =======================================================================
523
+
524
+ netGrid = a_buyGrid - a_sellGrid
525
+ grid_import = np.maximum(0, netGrid) * final_shortfall
526
+ grid_export = np.maximum(0, -netGrid) * final_surplus
527
+
528
+ forced = np.maximum(final_shortfall - grid_import, 0.0)
529
+ grid_import += forced
530
+ final_shortfall -= forced
531
+
532
+ feed_in_tariff = self.feed_in_tariff
533
+ costs = (
534
+ (grid_import * grid_price)
535
+ - (grid_export * feed_in_tariff)
536
+ + (actual_bought * peer_price)
537
+ - (actual_sold * peer_price)
538
+ )
539
+
540
+ final_rewards = self._compute_rewards(
541
+ grid_import=grid_import, grid_export=grid_export, actual_sold=actual_sold,
542
+ actual_bought=actual_bought, charge_amount=charge_amount, discharge_amount=discharge_amount,
543
+ costs=costs, grid_price=grid_price, peer_price=peer_price
544
+ )
545
+
546
+ no_p2p_import_this_step = np.array([
547
+ self.no_p2p_import_day[hid][self.current_step]
548
+ for hid in self.house_ids
549
+ ], dtype=np.float32)
550
+
551
+
552
+ # --- Metric 1 & 2: Grid Reduction (Entire Day & Peak Hours) ---
553
+ step_grid_reduction = np.sum(no_p2p_import_this_step - grid_import)
554
+ self.cumulative_grid_reduction += step_grid_reduction
555
+ self.grid_reduction_timeseries.append(step_grid_reduction)
556
+
557
+ if grid_price >= self.max_grid_price * 0.99:
558
+ self.cumulative_grid_reduction_peak += step_grid_reduction
559
+
560
+ # --- Metric 3: Total Cost Savings ---
561
+ cost_no_p2p = no_p2p_import_this_step * grid_price
562
+ step_cost_savings_per_agent = cost_no_p2p - costs
563
+ self.agent_cost_savings += step_cost_savings_per_agent
564
+ self.cost_savings_timeseries.append(np.sum(step_cost_savings_per_agent))
565
+
566
+ # --- Metric 5 & 6: Battery Degradation Cost (Total and Over Time) ---
567
+ degradation_cost_agent = (charge_amount + discharge_amount) * self.battery_degradation_cost
568
+ step_degradation_cost = np.sum(degradation_cost_agent)
569
+
570
+ self.cumulative_degradation_cost += step_degradation_cost
571
+ self.degradation_cost_timeseries.append(step_degradation_cost)
572
+
573
+ info = {
574
+ "p2p_buy": actual_bought,
575
+ "p2p_sell": actual_sold,
576
+ "grid_import_with_p2p": grid_import,
577
+ "grid_import_no_p2p": no_p2p_import_this_step,
578
+ "grid_export": grid_export,
579
+ "costs": costs,
580
+ "charge_amount": charge_amount,
581
+ "discharge_amount": discharge_amount,
582
+ "step": self.current_step,
583
+ "step_grid_reduction": step_grid_reduction,
584
+ "step_cost_savings": np.sum(step_cost_savings_per_agent),
585
+ "step_degradation_cost": step_degradation_cost,
586
+ }
587
+
588
+ self.env_log.append([
589
+ self.current_step, np.sum(grid_import), np.sum(grid_export),
590
+ np.sum(actual_bought), np.sum(actual_sold), np.sum(costs)
591
+ ])
592
+
593
+ self.current_step += 1
594
+
595
+ terminated = False
596
+ truncated = (self.current_step >= self.num_steps)
597
+
598
+ obs_next = self._get_obs(peer_price=peer_price)
599
+ info['agent_rewards'] = final_rewards
600
+ self.last_info = info
601
+ self.env_log_infos.append(info)
602
+ return obs_next, final_rewards.sum(), terminated, truncated, info
603
+
604
+
605
+
606
+ def _get_obs(self, peer_price: float):
607
+ step = min(self.current_step, self.num_steps - 1)
608
+ demands = self.demands_day[step]
609
+ solars = self.solars_day[step]
610
+ grid_price = self.get_grid_price(step)
611
+ hour = self.hours_day[step]
612
+ soc_frac = self.battery_soc / (self.battery_max_capacity + 1e-9)
613
+ soc_frac = np.where(self.has_battery == 1, soc_frac, -1.0)
614
+ total_demand_others = demands.sum() - demands
615
+ total_solar_others = solars.sum() - solars
616
+
617
+ obs = np.stack([
618
+ demands,
619
+ solars,
620
+ soc_frac,
621
+ np.full(self.num_agents, grid_price),
622
+ np.full(self.num_agents, peer_price),
623
+ total_demand_others,
624
+ total_solar_others,
625
+ np.full(self.num_agents, hour)
626
+ ], axis=1).astype(np.float32)
627
+
628
+ return obs
629
+
630
+
631
+ def _compute_jains_index(self, usage_array):
632
+ x = np.array(usage_array, dtype=np.float32)
633
+ numerator = (np.sum(x))**2
634
+ denominator = len(x) * np.sum(x**2) + 1e-8
635
+ return numerator / denominator
636
+
637
+
638
+ def _compute_rewards(
639
+ self, grid_import, grid_export, actual_sold, actual_bought,
640
+ charge_amount, discharge_amount, costs, grid_price, peer_price
641
+ ):
642
+
643
+ w1 = 0.3; w2 = 0.5; w3 = 0.5; w4 = 0.1; w5 = 0.05; w6 = 0.4; w7 = 1.0
644
+
645
+ p_grid_norm = grid_price / self.max_grid_price
646
+ p_peer_norm = peer_price / self.max_grid_price
647
+
648
+ rewards = -costs * w7
649
+ rewards -= w1 * grid_import * p_grid_norm
650
+ rewards += w2 * actual_sold * p_peer_norm
651
+ buy_bonus = w3 * actual_bought * ((grid_price - peer_price) / self.max_grid_price)
652
+ rewards += np.where(peer_price < grid_price, buy_bonus, 0.0)
653
+
654
+ # ### VECTORIZED REWARD PENALTIES ###
655
+ soc_frac = self.battery_soc / (self.battery_max_capacity + 1e-9)
656
+ soc_penalties = w4 * ((soc_frac - 0.5) ** 2) * self.has_battery
657
+ degrad_penalties = w5 * (charge_amount + discharge_amount) * self.battery_degradation_cost
658
+
659
+ rewards -= soc_penalties
660
+ rewards -= degrad_penalties
661
+
662
+ jfi = self._compute_jains_index(actual_bought + actual_sold)
663
+ rewards += w6 * jfi
664
+ return rewards
665
+
666
+ def save_log(self, filename="env_log.csv"):
667
+ columns = [
668
+ "Step", "Total_Grid_Import", "Total_Grid_Export",
669
+ "Total_P2P_Buy", "Total_P2P_Sell", "Total_Cost",
670
+ ]
671
+ df = pd.DataFrame(self.env_log, columns=columns)
672
+ df.to_csv(filename, index=False)
673
+ print(f"Environment log saved to {filename}")
SolarSys/cluster.py ADDED
@@ -0,0 +1,140 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import sys
3
+ import numpy as np
4
+ import torch
5
+
6
+ # Ensure project root is on the Python path
7
+ # Please ensure you follow proper directory structure for running this code
8
+ sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
9
+
10
+ from Environment.solar_sys_environment import SolarSys
11
+ from Environment.cluster_env_wrapper import GlobalPriceVecEnvWrapper
12
+ from Environment.cluster_env_wrapper import make_vec_env
13
+ class InterClusterLedger:
14
+ """
15
+ Tracks inter-cluster debts/transfers.
16
+ """
17
+ def __init__(self):
18
+ self.balances = {}
19
+
20
+ def record_transfer(self, from_id: str, to_id: str, amount: float):
21
+ if from_id == to_id: return
22
+ self.balances.setdefault(from_id, {})
23
+ self.balances.setdefault(to_id, {})
24
+ self.balances[from_id][to_id] = self.balances[from_id].get(to_id, 0.0) - amount
25
+ self.balances[to_id][from_id] = self.balances[to_id].get(from_id, 0.0) + amount
26
+
27
+ def get_balance(self, a_id: str, b_id: str) -> float:
28
+ return self.balances.get(a_id, {}).get(b_id, 0.0)
29
+
30
+ def net_balances(self) -> dict:
31
+ return self.balances
32
+
33
+
34
+ class InterClusterCoordinator:
35
+ def __init__(
36
+ self,
37
+ cluster_env,
38
+ high_level_agent,
39
+ ledger,
40
+ max_transfer_kwh: float = 1000000.0,
41
+ w_cost_savings: float = 2.0,
42
+ w_grid_penalty: float = 0.3,
43
+ w_p2p_bonus: float = 0.3
44
+ ):
45
+ self.cluster_env = cluster_env
46
+ self.agent = high_level_agent
47
+ self.ledger = ledger
48
+ self.max_transfer_kwh = max_transfer_kwh
49
+ self.w_cost_savings = w_cost_savings
50
+ self.w_grid_penalty = w_grid_penalty
51
+ self.w_p2p_bonus = w_p2p_bonus
52
+
53
+ def get_cluster_state(self, env, step_count: int) -> np.ndarray:
54
+ """
55
+ array summarizing a single cluster's state by reading from its vectorized attributes.
56
+ """
57
+ solar_env = env # This is one of the vectorized SolarSys envs
58
+ idx = min(step_count, solar_env.num_steps - 1)
59
+ agg_soc = np.sum(solar_env.battery_soc)
60
+ agg_max_capacity = np.sum(solar_env.battery_max_capacity)
61
+ agg_soc_fraction = agg_soc / agg_max_capacity if agg_max_capacity > 0 else 0.0
62
+
63
+ agg_demand = np.sum(solar_env.demands_day[idx])
64
+ agg_solar = np.sum(solar_env.solars_day[idx])
65
+
66
+ price = solar_env.get_grid_price(idx)
67
+ t_norm = idx / float(solar_env.steps_per_day)
68
+
69
+ return np.array([
70
+ agg_soc, agg_max_capacity, agg_soc_fraction,
71
+ agg_demand, agg_solar, price, t_norm
72
+ ], dtype=np.float32)
73
+
74
+ def build_transfers(self, agent_action_vector: np.ndarray, reports: dict) -> tuple[np.ndarray, np.ndarray]:
75
+ """
76
+ Acts as a centralized market maker based on agent actions and LIVE capacity reports.
77
+ """
78
+ n = len(self.cluster_env.clusters)
79
+ raw_export_prefs = agent_action_vector[:, 0]
80
+ raw_import_prefs = agent_action_vector[:, 1]
81
+
82
+ export_prefs = torch.softmax(torch.tensor(raw_export_prefs), dim=-1).numpy()
83
+ import_prefs = torch.softmax(torch.tensor(raw_import_prefs), dim=-1).numpy()
84
+
85
+ total_available_for_export = 0.0
86
+ potential_exports = np.zeros(n)
87
+ for i in range(n):
88
+ export_capacity = reports[i]['export_capacity']
89
+ pref = float(export_prefs[i])
90
+ potential_exports[i] = min(pref * self.max_transfer_kwh, export_capacity)
91
+ total_available_for_export += potential_exports[i]
92
+
93
+ total_requested_for_import = 0.0
94
+ potential_imports = np.zeros(n)
95
+ for i in range(n):
96
+ import_capacity = reports[i]['import_capacity']
97
+ pref = float(import_prefs[i])
98
+ potential_imports[i] = min(pref * self.max_transfer_kwh, import_capacity)
99
+ total_requested_for_import += potential_imports[i]
100
+
101
+ total_matched_energy = min(total_available_for_export, total_requested_for_import)
102
+ actual_exports = np.zeros(n)
103
+ actual_imports = np.zeros(n)
104
+
105
+ if total_matched_energy > 1e-6:
106
+ if total_available_for_export > 0:
107
+ actual_exports = (potential_exports / total_available_for_export) * total_matched_energy
108
+ if total_requested_for_import > 0:
109
+ actual_imports = (potential_imports / total_requested_for_import) * total_matched_energy
110
+
111
+ return actual_exports, actual_imports
112
+
113
+ def compute_inter_cluster_reward(self, all_cluster_infos: dict, actual_transfers: tuple, step_count: int) -> np.ndarray:
114
+ """
115
+ Computes an INDIVIDUAL reward for each cluster agent to solve
116
+ the credit assignment problem.
117
+ """
118
+ actual_exports, actual_imports = actual_transfers
119
+ num_clusters = len(self.cluster_env.cluster_envs)
120
+ cluster_rewards = np.zeros(num_clusters, dtype=np.float32)
121
+
122
+ # Extract per-cluster cost and import data from the batched info dict
123
+ costs_per_cluster = [np.sum(c) for c in all_cluster_infos['costs']]
124
+ baseline_imports_per_cluster = [np.sum(imp) for imp in all_cluster_infos['grid_import_no_p2p']]
125
+ actual_imports_per_cluster = [np.sum(imp) for imp in all_cluster_infos['grid_import_with_p2p']]
126
+
127
+ # Get the single grid price for the current step
128
+ grid_price = self.cluster_env.cluster_envs[0].get_grid_price(step_count)
129
+
130
+ for i in range(num_clusters):
131
+ baseline_cost_this_cluster = baseline_imports_per_cluster[i] * grid_price
132
+ actual_cost_this_cluster = costs_per_cluster[i]
133
+ cost_saved = baseline_cost_this_cluster - actual_cost_this_cluster
134
+ r_savings = self.w_cost_savings * cost_saved
135
+ r_grid = self.w_grid_penalty * actual_imports_per_cluster[i]
136
+ p2p_volume_this_cluster = actual_exports[i] + actual_imports[i]
137
+ r_p2p = self.w_p2p_bonus * p2p_volume_this_cluster
138
+ cluster_rewards[i] = r_savings + r_p2p - r_grid
139
+
140
+ return cluster_rewards
SolarSys/cluster_evaluation.py ADDED
@@ -0,0 +1,546 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import sys
3
+ import time
4
+ from datetime import datetime
5
+ import re
6
+ import numpy as np
7
+ import torch
8
+ import pandas as pd
9
+ import matplotlib.pyplot as plt
10
+ import glob
11
+
12
+ # Allow imports from project root
13
+ # REMOVED: Specific path comments
14
+
15
+ # NOTE: Ensure the directory structure and module names are generalized (e.g., 'hierarchical_diffusion_model' not 'Hidiff_energy.hierarchial_diffusion_model')
16
+ sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
17
+ from cluster import InterClusterCoordinator, InterClusterLedger
18
+ from Environment.cluster_env_wrapper import make_vec_env
19
+ from mappo.trainer.mappo import MAPPO
20
+ from meanfield.trainer.meanfield import MFAC
21
+
22
+ # ─── Jain's fairness index ────────────────────────────────────
23
+ def compute_jains_fairness(values: np.ndarray) -> float:
24
+ # Minimal comments
25
+ if len(values) == 0:
26
+ return 0.0
27
+ if np.all(values == 0):
28
+ return 1.0
29
+ num = (values.sum())**2
30
+ den = len(values) * (values**2).sum() + 1e-8
31
+ return float(num / den)
32
+
33
+
34
+ def main():
35
+ # ─── Configuration ─────────────────────────────────────────
36
+ # GENERALIZED PATHS
37
+ DATA_PATH = "./data/testing/test_data.csv"
38
+ MODEL_DIR = "./training_models/hierarchical_region_c_100agents_10size_final/models"
39
+
40
+ # --- Auto-detect state from model path ---
41
+ # GENERALIZING STATE NAMES
42
+ state_match = re.search(r"hierarchical_(region_a|region_b|region_c)_", MODEL_DIR)
43
+ if not state_match:
44
+ state_match = re.search(r"mappo_(region_a|region_b|region_c)_", MODEL_DIR)
45
+ if not state_match:
46
+ raise ValueError(
47
+ "Could not detect state (region_a, region_b, or region_c) "
48
+ "from the model directory path."
49
+ )
50
+ detected_state = state_match.group(1)
51
+ # REMOVED: print(f"--- Detected state: {detected_state.upper()} ---")
52
+
53
+ cluster_size_match = re.search(r'(\d+)size_', MODEL_DIR)
54
+ if not cluster_size_match:
55
+ raise ValueError("Could not detect the cluster size from the model directory path.")
56
+ detected_cluster_size = int(cluster_size_match.group(1))
57
+ # REMOVED: print(f"--- Detected cluster size: {detected_cluster_size} ---")
58
+
59
+ DAYS_TO_EVALUATE = 30
60
+ SOLAR_THRESHOLD = 0.1
61
+ MAX_TRANSFER_KWH = 1000000.0
62
+ W_COST_SAVINGS = 1.0
63
+ W_GRID_PENALTY = 0.5
64
+ W_P2P_BONUS = 0.2
65
+
66
+ # ─── Environment Setup ──────────────────────────────────────
67
+
68
+ cluster_env = make_vec_env(
69
+ data_path=DATA_PATH,
70
+ time_freq="15T",
71
+ cluster_size=detected_cluster_size,
72
+ state=detected_state
73
+ )
74
+ n_clusters = cluster_env.num_envs
75
+ sample_subenv = cluster_env.cluster_envs[0]
76
+ eval_num_steps = sample_subenv.num_steps
77
+ # REMOVED: print(f"Number of steps per day: {eval_num_steps}")
78
+
79
+ # Get dimensions
80
+ n_agents_per_cluster = sample_subenv.num_agents
81
+ local_dim = sample_subenv.observation_space.shape[-1]
82
+ global_dim = n_agents_per_cluster * local_dim
83
+ act_dim = sample_subenv.action_space[0].shape[-1]
84
+
85
+ # REMOVED: print(f"Creating and loading {n_clusters} independent low-level MAPPO agents...")
86
+ low_agents = []
87
+ for i in range(n_clusters):
88
+ agent = MAPPO(
89
+ n_agents = n_agents_per_cluster,
90
+ local_dim = local_dim,
91
+ global_dim = global_dim,
92
+ act_dim = act_dim,
93
+ lr=2e-4, gamma=0.95, lam=0.95, clip_eps=0.2, k_epochs=4, batch_size=512, episode_len=96
94
+ )
95
+ ckpt_pattern = os.path.join(MODEL_DIR, f"low_cluster{i}_ep*.pth")
96
+ ckpts_low = glob.glob(ckpt_pattern)
97
+ if not ckpts_low:
98
+ raise FileNotFoundError(f"No checkpoint found for cluster {i}.")
99
+
100
+ latest_low = sorted(ckpts_low, key=lambda x: int(re.search(r'ep(\d+)\.pth$', x).group(1)))[-1]
101
+ # REMOVED: print(f"Loading low-level policy for cluster {i} from: {latest_low}")
102
+ agent.load(latest_low)
103
+ agent.actor.eval()
104
+ agent.critic.eval()
105
+
106
+ low_agents.append(agent)
107
+
108
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
109
+ num_agents = sum(subenv.num_agents for subenv in cluster_env.cluster_envs)
110
+ run_name = f"eval_vectorized_{num_agents}agents_{DAYS_TO_EVALUATE}days_{timestamp}"
111
+ output_folder = os.path.join("runs_final_vectorized_eval", run_name)
112
+ logs_dir = os.path.join(output_folder, "logs")
113
+ plots_dir = os.path.join(output_folder, "plots")
114
+ for d in (logs_dir, plots_dir):
115
+ os.makedirs(d, exist_ok=True)
116
+ # REMOVED: print(f"Saving evaluation outputs to: {output_folder}")
117
+
118
+ OBS_DIM_HI_LOCAL = 7
119
+ act_dim_inter = 2
120
+ # REMOVED: print(f"Initializing evaluation inter-agent...")
121
+ inter_agent = MFAC(
122
+ n_agents=n_clusters, local_dim=OBS_DIM_HI_LOCAL, act_dim=act_dim_inter,
123
+ lr=2e-4, gamma=0.95, lam=0.95, clip_eps=0.2, k_epochs=4, batch_size=512, episode_len= 96
124
+ )
125
+ ckpts_inter = glob.glob(os.path.join(MODEL_DIR, "inter_ep*.pth"))
126
+ if not ckpts_inter:
127
+ raise FileNotFoundError(f"No high-level checkpoints in {MODEL_DIR}")
128
+ latest_inter = sorted(ckpts_inter)[-1]
129
+ # REMOVED: print("Loading inter-cluster policy from", latest_inter)
130
+ inter_agent.load(latest_inter)
131
+ inter_agent.actor.eval()
132
+ inter_agent.critic.eval()
133
+
134
+ ledger = InterClusterLedger()
135
+ coordinator = InterClusterCoordinator(
136
+ cluster_env, inter_agent, ledger, max_transfer_kwh=MAX_TRANSFER_KWH,
137
+ w_cost_savings=W_COST_SAVINGS, w_grid_penalty=W_GRID_PENALTY, w_p2p_bonus=W_P2P_BONUS
138
+ )
139
+
140
+ # ─── Data collectors ───────────────────────────────────────
141
+ all_logs = []
142
+ daily_summaries = []
143
+ step_timing_list = []
144
+
145
+ # === Per-day evaluation ===
146
+ evaluation_start = time.time()
147
+ for day in range(1, DAYS_TO_EVALUATE + 1):
148
+ obs_clusters, _ = cluster_env.reset()
149
+ done_all = False
150
+ step_count = 0
151
+ day_logs = []
152
+
153
+ while not done_all and step_count < eval_num_steps:
154
+ step_start_time = time.time()
155
+ step_count += 1
156
+
157
+ # 1) Get high-level actions
158
+ inter_cluster_obs_local_list = [coordinator.get_cluster_state(se, step_count) for se in cluster_env.cluster_envs]
159
+ inter_cluster_obs_local = np.array(inter_cluster_obs_local_list)
160
+ with torch.no_grad():
161
+ high_level_action, _ = inter_agent.select_action(inter_cluster_obs_local)
162
+
163
+ # 2) Build transfers
164
+ current_reports = {i: {'export_capacity': cluster_env.get_export_capacity(i), 'import_capacity': cluster_env.get_import_capacity(i)} for i in range(n_clusters)}
165
+ exports, imports = coordinator.build_transfers(high_level_action, current_reports)
166
+
167
+ # 3) Get low-level actions
168
+ batch_global_obs = obs_clusters.reshape(n_clusters, -1)
169
+ with torch.no_grad():
170
+ low_level_actions_list = []
171
+ # Loop through each cluster
172
+ for c_idx in range(n_clusters):
173
+ agent = low_agents[c_idx]
174
+ local_obs_cluster = obs_clusters[c_idx]
175
+ global_obs_cluster = batch_global_obs[c_idx]
176
+
177
+ actions, _ = agent.select_action(local_obs_cluster, global_obs_cluster)
178
+ low_level_actions_list.append(actions)
179
+ low_level_actions = np.stack(low_level_actions_list)
180
+ next_obs, rewards, done_all, step_info = cluster_env.step(
181
+ low_level_actions,
182
+ exports=exports,
183
+ imports=imports
184
+ )
185
+ obs_clusters = next_obs
186
+ # 4) Log step timing
187
+ step_duration = time.time() - step_start_time
188
+ # REMOVED: print(f"[Day {day}, Step {step_count}] Step time: {step_duration:.6f} seconds")
189
+ step_timing_list.append({"day": day, "step": step_count, "step_time_s": step_duration})
190
+
191
+ # --- Consolidated Logging ---
192
+ infos = step_info.get("cluster_infos")
193
+
194
+ for c_idx, subenv in enumerate(cluster_env.cluster_envs):
195
+ grid_price_now = subenv.get_grid_price(step_count - 1)
196
+
197
+ peer_price_now = step_info.get("peer_price_global")
198
+ if peer_price_now is None:
199
+ demands_step = subenv.demands_day[step_count-1]
200
+ solars_step = subenv.solars_day[step_count-1]
201
+ surplus = np.maximum(solars_step - demands_step, 0.0).sum()
202
+ shortfall = np.maximum(demands_step - solars_step, 0.0).sum()
203
+ peer_price_now = subenv.get_peer_price(step_count -1, surplus, shortfall)
204
+
205
+ for i, hid in enumerate(subenv.house_ids):
206
+ is_battery_house = hid in subenv.batteries
207
+ charge = infos["charge_amount"][c_idx][i]
208
+ discharge = infos["discharge_amount"][c_idx][i]
209
+
210
+ day_logs.append({
211
+ "day": day,
212
+ "step": step_count - 1,
213
+ "house": hid,
214
+ "cluster": c_idx,
215
+ "grid_import_no_p2p": infos["grid_import_no_p2p"][c_idx][i],
216
+ "grid_import_with_p2p": infos["grid_import_with_p2p"][c_idx][i],
217
+ "grid_export": infos["grid_export"][c_idx][i],
218
+ "p2p_buy": infos["p2p_buy"][c_idx][i],
219
+ "p2p_sell": infos["p2p_sell"][c_idx][i],
220
+ "actual_cost": infos["costs"][c_idx][i],
221
+ "baseline_cost": infos["grid_import_no_p2p"][c_idx][i] * grid_price_now,
222
+ "total_demand": subenv.demands_day[step_count-1, i],
223
+ "total_solar": subenv.solars_day[step_count-1, i],
224
+ "grid_price": grid_price_now,
225
+ "peer_price": peer_price_now,
226
+ "soc": (subenv.battery_soc[i] / subenv.battery_max_capacity[i]) if is_battery_house and subenv.battery_max_capacity[i] > 0 else np.nan,
227
+ "degradation_cost": (charge + discharge) * subenv.battery_degradation_cost[i] if is_battery_house else 0.0,
228
+ "reward": infos["agent_rewards"][c_idx][i],
229
+ })
230
+
231
+ step_duration = time.time() - step_start_time
232
+
233
+ # ── End of day: aggregate & summarize ────────
234
+ df_day = pd.DataFrame(day_logs)
235
+ if df_day.empty:
236
+ continue
237
+ all_logs.extend(day_logs)
238
+
239
+ # === CONSOLIDATED DAILY SUMMARY CALCULATION (Keep math, remove prints) ======
240
+
241
+ num_solar_houses = df_day[df_day['total_solar'] > 0]['house'].nunique()
242
+
243
+ if num_solar_houses > 0:
244
+ num_agents_in_day = df_day['house'].nunique()
245
+ agg_solar_per_step = df_day.groupby("step")["total_solar"].sum()
246
+ sunny_steps_mask = agg_solar_per_step > (SOLAR_THRESHOLD * num_agents_in_day)
247
+ sunny_steps = sunny_steps_mask[sunny_steps_mask].index
248
+ trade_df = df_day[df_day["step"].isin(sunny_steps)]
249
+
250
+ grouped_house = df_day.groupby("house").sum(numeric_only=True)
251
+ grouped_step = df_day.groupby("step").sum(numeric_only=True)
252
+
253
+ total_demand = grouped_step["total_demand"].sum()
254
+ total_solar = grouped_step["total_solar"].sum()
255
+ total_p2p_buy = df_day['p2p_buy'].sum()
256
+ total_p2p_sell = df_day['p2p_sell'].sum()
257
+ total_actual_grid_import = df_day['grid_import_with_p2p'].sum()
258
+
259
+
260
+ baseline_cost_per_house = grouped_house["baseline_cost"]
261
+ actual_cost_per_house = grouped_house["actual_cost"]
262
+ cost_savings_per_house = baseline_cost_per_house - actual_cost_per_house
263
+ day_total_cost_savings = cost_savings_per_house.sum()
264
+
265
+ if baseline_cost_per_house.sum() > 0:
266
+ overall_cost_savings_pct = day_total_cost_savings / baseline_cost_per_house.sum()
267
+ else:
268
+ overall_cost_savings_pct = 0.0
269
+
270
+ baseline_import_per_house = grouped_house["grid_import_no_p2p"]
271
+ actual_import_per_house = grouped_house["grid_import_with_p2p"]
272
+ import_reduction_per_house = baseline_import_per_house - actual_import_per_house
273
+ day_total_import_reduction = import_reduction_per_house.sum()
274
+
275
+ if baseline_import_per_house.sum() > 0:
276
+ overall_import_reduction_pct = day_total_import_reduction / baseline_import_per_house.sum()
277
+ else:
278
+ overall_import_reduction_pct = 0.0
279
+
280
+ fairness_cost_savings = compute_jains_fairness(cost_savings_per_house.values)
281
+ fairness_import_reduction = compute_jains_fairness(import_reduction_per_house.values)
282
+ fairness_rewards = compute_jains_fairness(grouped_house["reward"].values)
283
+ fairness_p2p_buy = compute_jains_fairness(grouped_house["p2p_buy"].values)
284
+ fairness_p2p_sell = compute_jains_fairness(grouped_house["p2p_sell"].values)
285
+ fairness_p2p_total = compute_jains_fairness((grouped_house["p2p_buy"] + grouped_house["p2p_sell"]).values)
286
+
287
+ daily_summaries.append({
288
+ "day": day,
289
+ "day_total_demand": total_demand,
290
+ "day_total_solar": total_solar,
291
+ "day_p2p_buy": total_p2p_buy,
292
+ "day_p2p_sell": total_p2p_sell,
293
+ "cost_savings_abs": day_total_cost_savings,
294
+ "cost_savings_pct": overall_cost_savings_pct,
295
+ "fairness_cost_savings": fairness_cost_savings,
296
+ "grid_reduction_abs": day_total_import_reduction,
297
+ "grid_reduction_pct": overall_import_reduction_pct,
298
+ "fairness_grid_reduction": fairness_import_reduction,
299
+ "fairness_reward": fairness_rewards,
300
+ "fairness_p2p_buy": fairness_p2p_buy,
301
+ "fairness_p2p_sell": fairness_p2p_sell,
302
+ "fairness_p2p_total": fairness_p2p_total,
303
+ })
304
+
305
+
306
+ # === FINAL PROCESSING AND SAVING (Keep saving, remove print summary) =======
307
+ evaluation_end = time.time()
308
+ total_eval_time = evaluation_end - evaluation_start
309
+ # REMOVED: print(f"\nEvaluation loop finished. Total time: {total_eval_time:.2f} seconds.")
310
+
311
+ all_days_df = pd.DataFrame(all_logs)
312
+ if not all_days_df.empty:
313
+ # Save step-level logs
314
+ combined_csv_path = os.path.join(logs_dir, "step_logs_all_days.csv")
315
+ all_days_df.to_csv(combined_csv_path, index=False)
316
+ # REMOVED: print(f"Saved combined step-level logs to: {combined_csv_path}")
317
+
318
+ # Save timing logs
319
+ step_timing_df = pd.DataFrame(step_timing_list)
320
+ timing_csv_path = os.path.join(logs_dir, "step_timing_log.csv")
321
+ step_timing_df.to_csv(timing_csv_path, index=False)
322
+ # REMOVED: print(f"Saved step timing logs to: {timing_csv_path}")
323
+
324
+ # Save house-level summary
325
+ house_level_df = all_days_df.groupby("house").agg({
326
+ "baseline_cost": "sum",
327
+ "actual_cost": "sum",
328
+ "grid_import_no_p2p": "sum",
329
+ "grid_import_with_p2p": "sum",
330
+ "degradation_cost": "sum"
331
+ })
332
+ house_level_df["cost_savings"] = house_level_df["baseline_cost"] - house_level_df["actual_cost"]
333
+ house_level_df["import_reduction"] = house_level_df["grid_import_no_p2p"] - house_level_df["grid_import_with_p2p"]
334
+ house_summary_csv = os.path.join(logs_dir, "summary_per_house.csv")
335
+ house_level_df.to_csv(house_summary_csv)
336
+ # REMOVED: print(f"Saved final summary per house to: {house_summary_csv}")
337
+
338
+ # --- Calculate Final Summary Metrics (Keeping calculations for saving) ---
339
+ daily_summary_df = pd.DataFrame(daily_summaries)
340
+
341
+ fairness_grid_all = compute_jains_fairness(house_level_df["import_reduction"].values)
342
+ fairness_cost_all = compute_jains_fairness(house_level_df["cost_savings"].values)
343
+
344
+ total_cost_savings_all = daily_summary_df["cost_savings_abs"].sum()
345
+ total_baseline_cost_all = all_days_df.groupby('day')['baseline_cost'].sum().sum()
346
+ pct_cost_savings_all = total_cost_savings_all / total_baseline_cost_all if total_baseline_cost_all > 0 else 0.0
347
+
348
+ total_grid_reduction_all = daily_summary_df["grid_reduction_abs"].sum()
349
+ total_baseline_import_all = all_days_df.groupby('day')['grid_import_no_p2p'].sum().sum()
350
+ pct_grid_reduction_all = total_grid_reduction_all / total_baseline_import_all if total_baseline_import_all > 0 else 0.0
351
+
352
+ total_degradation_cost_all = all_days_df["degradation_cost"].sum()
353
+
354
+ # --- Calculate Alternative Performance Metrics ---
355
+ agg_solar_per_step = all_days_df.groupby(['day', 'step'])['total_solar'].sum()
356
+ num_agents_total = len(all_days_df['house'].unique())
357
+ sunny_steps_mask = agg_solar_per_step > (SOLAR_THRESHOLD * num_agents_total)
358
+ sunny_df = all_days_df[all_days_df.set_index(['day', 'step']).index.isin(sunny_steps_mask[sunny_steps_mask].index)]
359
+
360
+ baseline_import_sunny = sunny_df['grid_import_no_p2p'].sum()
361
+ actual_import_sunny = sunny_df['grid_import_with_p2p'].sum()
362
+ grid_reduction_sunny_pct = (baseline_import_sunny - actual_import_sunny) / baseline_import_sunny if baseline_import_sunny > 0 else 0.0
363
+ baseline_cost_sunny = sunny_df['baseline_cost'].sum()
364
+ actual_cost_sunny = sunny_df['actual_cost'].sum()
365
+ cost_savings_sunny_pct = (baseline_cost_sunny - actual_cost_sunny) / baseline_cost_sunny if baseline_cost_sunny > 0 else 0.0
366
+
367
+ total_p2p_buy = all_days_df['p2p_buy'].sum()
368
+ total_actual_grid_import = all_days_df['grid_import_with_p2p'].sum()
369
+ community_sourcing_rate_pct = total_p2p_buy / (total_p2p_buy + total_actual_grid_import) if (total_p2p_buy + total_actual_grid_import) > 0 else 0.0
370
+
371
+ total_p2p_sell = all_days_df['p2p_sell'].sum()
372
+ total_grid_export = all_days_df['grid_export'].sum()
373
+ solar_sharing_efficiency_pct = total_p2p_sell / (total_p2p_sell + total_grid_export) if (total_p2p_sell + total_grid_export) > 0 else 0.0
374
+
375
+ final_row = {
376
+ "day": "ALL_DAYS_SUMMARY", "cost_savings_abs": total_cost_savings_all, "cost_savings_pct": pct_cost_savings_all,
377
+ "grid_reduction_abs": total_grid_reduction_all, "grid_reduction_pct": pct_grid_reduction_all,
378
+ "fairness_cost_savings": fairness_cost_all, "fairness_grid_reduction": fairness_grid_all,
379
+ "total_degradation_cost": total_degradation_cost_all,
380
+ "grid_reduction_sunny_hours_pct": grid_reduction_sunny_pct,
381
+ "cost_savings_sunny_hours_pct": cost_savings_sunny_pct,
382
+ "community_sourcing_rate_pct": community_sourcing_rate_pct,
383
+ "solar_sharing_efficiency_pct": solar_sharing_efficiency_pct,
384
+ }
385
+ final_row_df = pd.DataFrame([final_row])
386
+
387
+ if not daily_summary_df.empty:
388
+ daily_summary_df = pd.concat([daily_summary_df, final_row_df], ignore_index=True)
389
+
390
+ summary_csv = os.path.join(logs_dir, "summary_per_day.csv")
391
+ daily_summary_df.to_csv(summary_csv, index=False)
392
+ # REMOVED: print(f"Saved day-level summary with final multi-day row to: {summary_csv}")
393
+
394
+ # REMOVED: Final Printout Summary (the entire block)
395
+
396
+ # ─── Plots ───────────────────────────────────────────────────
397
+
398
+ plot_daily_df = daily_summary_df[daily_summary_df["day"] != "ALL_DAYS_SUMMARY"].copy()
399
+ plot_daily_df["day"] = plot_daily_df["day"].astype(int)
400
+
401
+ # 1) Daily Cost Savings Percentage
402
+ plt.figure(figsize=(12, 6))
403
+ plt.bar(plot_daily_df["day"], plot_daily_df["cost_savings_pct"] * 100, color='skyblue')
404
+ plt.xlabel("Day")
405
+ plt.ylabel("Cost Savings (%)")
406
+ plt.title("Daily Community Cost Savings Percentage")
407
+ plt.xticks(plot_daily_df["day"])
408
+ plt.grid(axis='y', linestyle='--', alpha=0.7)
409
+ plt.savefig(os.path.join(plots_dir, "daily_cost_savings_percentage.png"))
410
+ plt.close()
411
+
412
+ # 2) Daily Total Demand vs. Solar
413
+ plt.figure(figsize=(12, 6))
414
+ bar_width = 0.4
415
+ days = plot_daily_df["day"]
416
+ plt.bar(days - bar_width/2, plot_daily_df["day_total_demand"], width=bar_width, label="Total Demand", color='coral')
417
+ plt.bar(days + bar_width/2, plot_daily_df["day_total_solar"], width=bar_width, label="Total Solar Generation", color='gold')
418
+ plt.xlabel("Day")
419
+ plt.ylabel("Energy (kWh)")
420
+ plt.title("Total Community Demand vs. Solar Generation Per Day")
421
+ plt.xticks(days)
422
+ plt.legend()
423
+ plt.grid(axis='y', linestyle='--', alpha=0.7)
424
+ plt.savefig(os.path.join(plots_dir, "daily_demand_vs_solar.png"))
425
+ plt.close()
426
+
427
+ # 3) Combined Time Series of Energy Flows
428
+ step_group = all_days_df.groupby(["day", "step"]).sum(numeric_only=True).reset_index()
429
+ step_group["global_step"] = (step_group["day"] - 1) * eval_num_steps + step_group["step"]
430
+ fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(15, 12), sharex=True)
431
+
432
+ # Subplot 1: Grid Import vs P2P Buy
433
+ ax1.plot(step_group["global_step"], step_group["grid_import_with_p2p"], label="Grid Import (with P2P)", color='r')
434
+ ax1.plot(step_group["global_step"], step_group["p2p_buy"], label="P2P Buy", color='g')
435
+ ax1.set_ylabel("Energy (kWh)")
436
+ ax1.set_title("Community Energy Consumption: Grid Import vs. P2P Buy")
437
+ ax1.legend()
438
+ ax1.grid(True, linestyle='--', alpha=0.6)
439
+
440
+ # Subplot 2: Grid Export vs P2P Sell
441
+ ax2.plot(step_group["global_step"], step_group["grid_export"], label="Grid Export", color='orange')
442
+ ax2.plot(step_group["global_step"], step_group["p2p_sell"], label="P2P Sell", color='b')
443
+ ax2.set_xlabel("Global Timestep")
444
+ ax2.set_ylabel("Energy (kWh)")
445
+ ax2.set_title("Community Energy Generation: Grid Export vs. P2P Sell")
446
+ ax2.legend()
447
+ ax2.grid(True, linestyle='--', alpha=0.6)
448
+
449
+ plt.tight_layout()
450
+ plt.savefig(os.path.join(plots_dir, "combined_energy_flows_timeseries.png"))
451
+ plt.close()
452
+
453
+ # 4)Stacked Bar of Daily Energy Sources
454
+ daily_agg = all_days_df.groupby("day").sum(numeric_only=True)
455
+
456
+ plt.figure(figsize=(12, 7))
457
+ plt.bar(daily_agg.index, daily_agg["grid_import_with_p2p"], label="Grid Import (with P2P)", color='crimson')
458
+ plt.bar(daily_agg.index, daily_agg["p2p_buy"], bottom=daily_agg["grid_import_with_p2p"], label="P2P Buy", color='limegreen')
459
+ plt.plot(daily_agg.index, daily_agg["grid_import_no_p2p"], label="Baseline Grid Import (No P2P)", color='blue', linestyle='--', marker='o')
460
+
461
+ plt.xlabel("Day")
462
+ plt.ylabel("Energy (kWh)")
463
+ plt.title("Daily Energy Procurement: Baseline vs. P2P+Grid")
464
+ plt.xticks(daily_agg.index)
465
+ plt.legend()
466
+ plt.grid(axis='y', linestyle='--', alpha=0.7)
467
+ plt.savefig(os.path.join(plots_dir, "daily_energy_procurement_stacked.png"))
468
+ plt.close()
469
+
470
+ # 5) NEW: Fairness Metrics Over Time
471
+ plt.figure(figsize=(12, 6))
472
+ plt.plot(plot_daily_df["day"], plot_daily_df["fairness_cost_savings"], label="Cost Savings Fairness", marker='o')
473
+ plt.plot(plot_daily_df["day"], plot_daily_df["fairness_grid_reduction"], label="Grid Reduction Fairness", marker='s')
474
+ plt.plot(plot_daily_df["day"], plot_daily_df["fairness_reward"], label="Reward Fairness", marker='^')
475
+ plt.xlabel("Day")
476
+ plt.ylabel("Jain's Fairness Index")
477
+ plt.title("Daily Fairness Metrics")
478
+ plt.xticks(plot_daily_df["day"])
479
+ plt.ylim(0, 1.05)
480
+ plt.legend()
481
+ plt.grid(True, linestyle='--', alpha=0.7)
482
+ plt.savefig(os.path.join(plots_dir, "daily_fairness_metrics.png"))
483
+ plt.close()
484
+
485
+ # 6) NEW: Per-House Summary
486
+ fig, ax1 = plt.subplots(figsize=(15, 7))
487
+
488
+ house_ids_str = house_level_df.index.astype(str)
489
+ bar_width = 0.4
490
+ index = np.arange(len(house_ids_str))
491
+ color1 = 'tab:green'
492
+ ax1.set_xlabel('House ID')
493
+ ax1.set_ylabel('Total Cost Savings ($)', color=color1)
494
+ ax1.bar(index - bar_width/2, house_level_df["cost_savings"], bar_width, label='Cost Savings', color=color1)
495
+ ax1.tick_params(axis='y', labelcolor=color1)
496
+ ax1.set_xticks(index)
497
+ ax1.set_xticklabels(house_ids_str, rotation=45, ha="right")
498
+ ax2 = ax1.twinx()
499
+ color2 = 'tab:blue'
500
+ ax2.set_ylabel('Total Grid Import Reduction (kWh)', color=color2)
501
+ ax2.bar(index + bar_width/2, house_level_df["import_reduction"], bar_width, label='Import Reduction', color=color2)
502
+ ax2.tick_params(axis='y', labelcolor=color2)
503
+
504
+ plt.title(f'Total Cost Savings & Grid Import Reduction Per House (over {DAYS_TO_EVALUATE} days)')
505
+
506
+ fig.tight_layout()
507
+ plt.savefig(os.path.join(plots_dir, "per_house_summary.png"))
508
+ plt.close()
509
+
510
+ # 7) Price Dynamics for a Single Day
511
+ day1_prices = all_days_df[all_days_df['day'] == 1][['step', 'grid_price', 'peer_price']].drop_duplicates()
512
+ plt.figure(figsize=(12, 6))
513
+ plt.plot(day1_prices['step'], day1_prices['grid_price'], label='Grid Price', color='darkorange')
514
+ plt.plot(day1_prices['step'], day1_prices['peer_price'], label='P2P Price', color='teal')
515
+ plt.xlabel("Timestep of Day")
516
+ plt.ylabel("Price ($/kWh)")
517
+ plt.title("Price Dynamics on Day 1")
518
+ plt.legend()
519
+ plt.grid(True, linestyle='--', alpha=0.6)
520
+ plt.savefig(os.path.join(plots_dir, "price_dynamics_day1.png"))
521
+ plt.close()
522
+
523
+ # 8)Battery State of Charge (SoC) for a Sample of Houses
524
+ day1_df = all_days_df[all_days_df['day'] == 1]
525
+ battery_houses = day1_df.dropna(subset=['soc'])['house'].unique()
526
+
527
+ if len(battery_houses) > 0:
528
+ sample_houses = battery_houses[:min(4, len(battery_houses))]
529
+ plt.figure(figsize=(12, 6))
530
+ for house in sample_houses:
531
+ house_df = day1_df[day1_df['house'] == house]
532
+ plt.plot(house_df['step'], house_df['soc'] * 100, label=f'House {house}')
533
+
534
+ plt.xlabel("Timestep of Day")
535
+ plt.ylabel("State of Charge (%)")
536
+ plt.title("Battery SoC on Day 1 for Sample Houses")
537
+ plt.legend()
538
+ plt.grid(True, linestyle='--', alpha=0.6)
539
+ plt.savefig(os.path.join(plots_dir, "soc_dynamics_day1.png"))
540
+ plt.close()
541
+
542
+ # Final success message
543
+ print("Evaluation run completed. All logs and plots saved to disk.")
544
+
545
+ if __name__ == "__main__":
546
+ main()
SolarSys/mappo/_init_.py ADDED
File without changes
SolarSys/mappo/trainer/__init__.py ADDED
File without changes
SolarSys/mappo/trainer/mappo.py ADDED
@@ -0,0 +1,214 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # mappo.py
2
+ import torch
3
+ import torch.nn as nn
4
+ import random
5
+ import numpy as np
6
+ from torch.distributions import Normal
7
+ from torch.amp import autocast
8
+ from torch.cuda.amp import GradScaler
9
+
10
+
11
+
12
+ #device selection
13
+ if torch.cuda.is_available():
14
+ device = torch.device("cuda")
15
+ print("MAPPO using CUDA (NVIDIA GPU)")
16
+ else:
17
+ device = torch.device("cpu")
18
+ print("MAPPO using CPU")
19
+ # elif torch.backends.mps.is_available():
20
+ # device = torch.device("mps")
21
+ # print("Using MPS (Apple Silicon GPU)")
22
+
23
+ # device = torch.device("cpu")
24
+
25
+ def set_global_seed(seed: int):
26
+ random.seed(seed)
27
+ np.random.seed(seed)
28
+ torch.manual_seed(seed)
29
+
30
+ if torch.cuda.is_available():
31
+ torch.cuda.manual_seed_all(seed)
32
+ torch.backends.cudnn.deterministic = False
33
+ torch.backends.cudnn.benchmark = True
34
+
35
+ SEED = 50 #please try run with different seeds to get desired results, we tried with 42, 1,10,20,50.
36
+ set_global_seed(SEED)
37
+
38
+ class MLP(nn.Module):
39
+ def __init__(self, input_dim, hidden_dims, output_dim):
40
+ super().__init__()
41
+ layers = []
42
+ last_dim = input_dim
43
+ for h in hidden_dims:
44
+ layers += [nn.Linear(last_dim, h), nn.ReLU()]
45
+ last_dim = h
46
+ layers.append(nn.Linear(last_dim, output_dim))
47
+ self.net = nn.Sequential(*layers)
48
+
49
+ def forward(self, x):
50
+ return self.net(x)
51
+
52
+ class Actor(nn.Module):
53
+ def __init__(self, obs_dim, act_dim, hidden=(64,64)):
54
+ super().__init__()
55
+ self.net = MLP(obs_dim, hidden, act_dim)
56
+ self.log_std = nn.Parameter(torch.zeros(act_dim))
57
+
58
+ def forward(self, x):
59
+ mean = self.net(x)
60
+ std = torch.exp(self.log_std)
61
+ return mean, std
62
+
63
+ class Critic(nn.Module):
64
+ def __init__(self, state_dim, hidden=(128,128)):
65
+ super().__init__()
66
+ self.net = MLP(state_dim, hidden, 1)
67
+
68
+ def forward(self, x):
69
+ return self.net(x).squeeze(-1)
70
+
71
+ class MAPPO:
72
+ def __init__(
73
+ self,
74
+ n_agents,
75
+ local_dim,
76
+ global_dim,
77
+ act_dim,
78
+ lr=3e-4,
79
+ gamma=0.99,
80
+ lam=0.95,
81
+ clip_eps=0.2,
82
+ k_epochs=10,
83
+ batch_size=1024,
84
+ episode_len=96
85
+ ):
86
+ self.n_agents = n_agents
87
+ self.local_dim = local_dim
88
+ self.global_dim = global_dim
89
+ self.act_dim = act_dim
90
+ self.gamma = gamma
91
+ self.lam = lam
92
+ self.clip_eps = clip_eps
93
+ self.k_epochs = k_epochs
94
+ self.batch_size = batch_size
95
+ self.episode_len = episode_len
96
+
97
+ self.actor = Actor(local_dim, act_dim).to(device)
98
+ self.critic = Critic(global_dim).to(device)
99
+
100
+ self.opt_a = torch.optim.Adam(self.actor.parameters(), lr=lr)
101
+ self.opt_c = torch.optim.Adam(self.critic.parameters(), lr=lr)
102
+
103
+ print("MAPPO CUDA AMP is disabled for stability.")
104
+
105
+ self.init_buffer()
106
+
107
+ def init_buffer(self):
108
+ self.ls_buf = np.zeros((self.episode_len, self.n_agents, self.local_dim), dtype=np.float16)
109
+ self.gs_buf = np.zeros((self.episode_len, self.global_dim), dtype=np.float16)
110
+ self.ac_buf = np.zeros((self.episode_len, self.n_agents, self.act_dim), dtype=np.float16)
111
+ self.lp_buf = np.zeros((self.episode_len, self.n_agents), dtype=np.float16)
112
+ self.rw_buf = np.zeros((self.episode_len, self.n_agents), dtype=np.float16)
113
+ self.done_buf = np.zeros((self.episode_len, self.n_agents), dtype=np.float16)
114
+ self.next_gs_buf = np.zeros((self.episode_len, self.global_dim), dtype=np.float16)
115
+ self.step_idx = 0
116
+
117
+ @torch.no_grad()
118
+ def select_action(self, local_obs, global_obs):
119
+ l = torch.from_numpy(local_obs).float().to(device)
120
+ mean, std = self.actor(l)
121
+ dist = Normal(mean, std)
122
+ a = dist.sample()
123
+ return a.cpu().numpy(), dist.log_prob(a).sum(-1).cpu().numpy()
124
+
125
+ def store(self, local_obs, global_obs, action, logp, reward, done, next_global_obs):
126
+ if self.step_idx < self.episode_len:
127
+ self.ls_buf[self.step_idx] = local_obs
128
+ self.gs_buf[self.step_idx] = global_obs
129
+ self.ac_buf[self.step_idx] = action
130
+ self.lp_buf[self.step_idx] = logp
131
+ self.rw_buf[self.step_idx] = reward
132
+ self.done_buf[self.step_idx] = done
133
+ self.next_gs_buf[self.step_idx] = next_global_obs
134
+ self.step_idx += 1
135
+
136
+ def compute_gae(self, T, vals):
137
+ N = self.n_agents
138
+ vals_agent = vals.unsqueeze(1).expand(-1, N).cpu().numpy()
139
+
140
+ next_vals_agent = np.zeros_like(vals_agent)
141
+ next_vals_agent[:-1] = vals_agent[1:]
142
+
143
+ if not self.done_buf[T-1].all():
144
+ with torch.no_grad():
145
+ v_last = self.critic(
146
+ torch.from_numpy(self.next_gs_buf[T-1]).float().to(device)
147
+ ).cpu().item()
148
+ next_vals_agent[T-1, :] = v_last
149
+
150
+ adv = np.zeros_like(vals_agent, dtype=np.float16)
151
+ gae_lambda = 0.0
152
+ for t in reversed(range(T)):
153
+ masks = 1.0 - self.done_buf[t]
154
+ rewards = self.rw_buf[t]
155
+
156
+ delta = rewards + self.gamma * next_vals_agent[t] * masks - vals_agent[t]
157
+ gae_lambda = delta + self.gamma * self.lam * masks * gae_lambda
158
+ adv[t] = gae_lambda
159
+
160
+ ret = adv + vals_agent
161
+ adv_flat = torch.from_numpy(adv.flatten()).to(device)
162
+ ret_flat = torch.from_numpy(ret.flatten()).to(device)
163
+ return adv_flat, ret_flat
164
+
165
+ def update(self):
166
+ T = self.step_idx
167
+ if T == 0: return
168
+
169
+ gs_tensor = torch.from_numpy(self.gs_buf[:T]).float().to(device)
170
+ ls_tensor = torch.from_numpy(self.ls_buf[:T]).float().to(device).view(T * self.n_agents, -1)
171
+ ac_tensor = torch.from_numpy(self.ac_buf[:T]).float().to(device).view(T * self.n_agents, -1)
172
+ lp_tensor = torch.from_numpy(self.lp_buf[:T]).float().to(device).view(-1)
173
+
174
+ with torch.no_grad():
175
+ vals = self.critic(gs_tensor)
176
+
177
+ adv_flat, ret_flat = self.compute_gae(T, vals)
178
+ adv_flat = (adv_flat - adv_flat.mean()) / (adv_flat.std() + 1e-8)
179
+
180
+ gs_for_batch = gs_tensor.unsqueeze(1).expand(-1, self.n_agents, -1).reshape(T * self.n_agents, self.global_dim)
181
+
182
+ dataset = torch.utils.data.TensorDataset(ls_tensor, gs_for_batch, ac_tensor, lp_tensor, adv_flat, ret_flat)
183
+ gen = torch.Generator()
184
+ gen.manual_seed(SEED)
185
+ loader = torch.utils.data.DataLoader(dataset, batch_size=self.batch_size, shuffle=True, generator=gen)
186
+
187
+ for _ in range(self.k_epochs):
188
+ for b_ls, b_gs, b_ac, b_lp, b_adv, b_ret in loader:
189
+ mean, std = self.actor(b_ls)
190
+ dist = Normal(mean, std)
191
+ entropy = dist.entropy().mean()
192
+ lp_new = dist.log_prob(b_ac).sum(-1)
193
+ ratio = torch.exp(lp_new - b_lp)
194
+ surr1 = ratio * b_adv
195
+ surr2 = torch.clamp(ratio, 1 - self.clip_eps, 1 + self.clip_eps) * b_adv
196
+ actor_loss = -torch.min(surr1, surr2).mean() - 0.01 * entropy
197
+ self.opt_a.zero_grad()
198
+ actor_loss.backward()
199
+ self.opt_a.step()
200
+ val_pred = self.critic(b_gs)
201
+ critic_loss = nn.MSELoss()(val_pred, b_ret)
202
+ self.opt_c.zero_grad()
203
+ critic_loss.backward()
204
+ self.opt_c.step()
205
+ self.step_idx = 0
206
+
207
+ def save(self, path):
208
+ torch.save({'actor': self.actor.state_dict(),
209
+ 'critic': self.critic.state_dict()}, path)
210
+
211
+ def load(self, path):
212
+ data = torch.load(path, map_location=device)
213
+ self.actor.load_state_dict(data['actor'])
214
+ self.critic.load_state_dict(data['critic'])
SolarSys/meanfield/_init_.py ADDED
File without changes
SolarSys/meanfield/trainer/__init__.py ADDED
File without changes
SolarSys/meanfield/trainer/meanfield.py ADDED
@@ -0,0 +1,238 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # meanfield.py
2
+ import torch
3
+ import torch.nn as nn
4
+ import numpy as np
5
+ import random
6
+ from torch.distributions import Normal
7
+ from torch.amp import autocast
8
+ from torch.cuda.amp import GradScaler
9
+
10
+ #device selection
11
+ if torch.cuda.is_available():
12
+ device = torch.device("cuda")
13
+ print("Using CUDA (NVIDIA GPU)")
14
+ else:
15
+ device = torch.device("cpu")
16
+ print("Using CPU")
17
+
18
+ def set_global_seed(seed: int):
19
+ random.seed(seed)
20
+ np.random.seed(seed)
21
+ torch.manual_seed(seed)
22
+ if torch.cuda.is_available():
23
+ torch.cuda.manual_seed_all(seed)
24
+ torch.backends.cudnn.deterministic = False
25
+ torch.backends.cudnn.benchmark = True
26
+
27
+ SEED = 42 #please try run with different seeds to get desired results, we tried with 42, 1,10,20,50.
28
+ set_global_seed(SEED)
29
+
30
+ class MLP(nn.Module):
31
+ def __init__(self, input_dim, hidden_dims, output_dim):
32
+ super().__init__()
33
+ layers = []
34
+ last_dim = input_dim
35
+ for h in hidden_dims:
36
+ layers += [nn.Linear(last_dim, h), nn.ReLU()]
37
+ last_dim = h
38
+ layers.append(nn.Linear(last_dim, output_dim))
39
+ self.net = nn.Sequential(*layers)
40
+
41
+ def forward(self, x):
42
+ return self.net(x)
43
+
44
+ class Actor(nn.Module):
45
+ def __init__(self, obs_dim, mean_field_dim, act_dim, hidden=(64, 64)):
46
+ super().__init__()
47
+ input_dim = obs_dim + mean_field_dim
48
+ self.net = MLP(input_dim, hidden, act_dim)
49
+ self.log_std = nn.Parameter(torch.zeros(act_dim))
50
+
51
+ def forward(self, local_obs, mean_field):
52
+ x = torch.cat([local_obs, mean_field], dim=-1)
53
+ mean = self.net(x)
54
+ LOG_STD_MIN = -5
55
+ LOG_STD_MAX = 2
56
+ clamped_log_std = torch.clamp(self.log_std, LOG_STD_MIN, LOG_STD_MAX)
57
+ std = torch.exp(clamped_log_std)
58
+
59
+ return Normal(mean, std)
60
+
61
+ class Critic(nn.Module):
62
+ def __init__(self, obs_dim, mean_field_dim, hidden=(128, 128)):
63
+ super().__init__()
64
+ input_dim = obs_dim + mean_field_dim
65
+ self.net = MLP(input_dim, hidden, 1)
66
+
67
+ def forward(self, local_obs, mean_field):
68
+ x = torch.cat([local_obs, mean_field], dim=-1)
69
+ return self.net(x).squeeze(-1)
70
+
71
+ class MFAC:
72
+ def __init__(
73
+ self,
74
+ n_agents,
75
+ local_dim,
76
+ act_dim,
77
+ lr=3e-4,
78
+ gamma=0.99,
79
+ lam=0.95,
80
+ clip_eps=0.2,
81
+ k_epochs=10,
82
+ batch_size=1024,
83
+ entropy_coeff=0.01,
84
+ episode_len=96
85
+ ):
86
+ self.n_agents = n_agents
87
+ self.local_dim = local_dim
88
+ self.mean_field_dim = local_dim
89
+ self.act_dim = act_dim
90
+ self.gamma = gamma
91
+ self.lam = lam
92
+ self.clip_eps = clip_eps
93
+ self.k_epochs = k_epochs
94
+ self.batch_size = batch_size
95
+ self.entropy_coeff = entropy_coeff
96
+ self.episode_len = episode_len
97
+
98
+ self.actor = Actor(self.local_dim, self.mean_field_dim, self.act_dim).to(device)
99
+ self.critic = Critic(self.local_dim, self.mean_field_dim).to(device)
100
+
101
+ self.opt_a = torch.optim.Adam(self.actor.parameters(), lr=lr)
102
+ self.opt_c = torch.optim.Adam(self.critic.parameters(), lr=lr)
103
+
104
+ self.use_cuda_amp = (device.type == 'cuda')
105
+ self.scaler = GradScaler(enabled=self.use_cuda_amp)
106
+ print(f"MFAC CUDA AMP Enabled: {self.use_cuda_amp}")
107
+
108
+ self.init_buffer()
109
+
110
+ def init_buffer(self):
111
+ self.ls_buf = np.zeros((self.episode_len, self.n_agents, self.local_dim), dtype=np.float32)
112
+ self.ac_buf = np.zeros((self.episode_len, self.n_agents, self.act_dim), dtype=np.float32)
113
+ self.lp_buf = np.zeros((self.episode_len, self.n_agents), dtype=np.float32)
114
+ self.rw_buf = np.zeros((self.episode_len, self.n_agents), dtype=np.float32)
115
+ self.done_buf = np.zeros((self.episode_len, self.n_agents), dtype=np.float32)
116
+ self.next_ls_buf = np.zeros((self.episode_len, self.n_agents, self.local_dim), dtype=np.float32)
117
+ self.step_idx = 0
118
+
119
+ def clear_buffer(self):
120
+ pass
121
+
122
+ def _get_mean_field(self, obs_batch):
123
+ if self.n_agents <= 1:
124
+ return torch.zeros(*obs_batch.shape[:-1], self.mean_field_dim, device=obs_batch.device)
125
+ total_obs = torch.sum(obs_batch, dim=-2, keepdim=True)
126
+ mean_field = (total_obs - obs_batch) / (self.n_agents - 1)
127
+ return mean_field
128
+
129
+ @torch.no_grad()
130
+ def select_action(self, local_obs, evaluate=False):
131
+ obs_tensor = torch.from_numpy(local_obs).float().to(device)
132
+ with autocast(device_type=device.type, dtype=torch.float16, enabled=self.use_cuda_amp):
133
+ mean_field = self._get_mean_field(obs_tensor)
134
+ dist = self.actor(obs_tensor, mean_field)
135
+ if evaluate:
136
+ action = dist.mean
137
+ else:
138
+ action = dist.sample()
139
+
140
+ log_prob = dist.log_prob(action).sum(-1)
141
+ return action.cpu().numpy(), log_prob.cpu().numpy()
142
+
143
+ def store(self, local_obs, action, logp, reward, done, next_local_obs):
144
+ if self.step_idx < self.episode_len:
145
+ self.ls_buf[self.step_idx] = local_obs
146
+ self.ac_buf[self.step_idx] = action
147
+ self.lp_buf[self.step_idx] = logp
148
+ self.rw_buf[self.step_idx] = np.array(reward, dtype=np.float32)
149
+ self.done_buf[self.step_idx] = np.array(done, dtype=np.float32)
150
+ self.next_ls_buf[self.step_idx] = next_local_obs
151
+ self.step_idx += 1
152
+
153
+ def update(self):
154
+ T = self.step_idx
155
+ if T == 0: return
156
+
157
+ ls_tensor = torch.from_numpy(self.ls_buf[:T]).float().to(device)
158
+ ac_tensor = torch.from_numpy(self.ac_buf[:T]).float().to(device)
159
+ lp_tensor = torch.from_numpy(self.lp_buf[:T]).float().to(device)
160
+ rw_tensor = torch.from_numpy(self.rw_buf[:T]).float().to(device)
161
+ done_tensor = torch.from_numpy(self.done_buf[:T]).float().to(device)
162
+ next_ls_tensor = torch.from_numpy(self.next_ls_buf[:T]).float().to(device)
163
+
164
+ with torch.no_grad():
165
+ with autocast(device_type=device.type, dtype=torch.float16, enabled=self.use_cuda_amp):
166
+ mf_all = self._get_mean_field(ls_tensor)
167
+ vals = self.critic(ls_tensor, mf_all)
168
+ next_mf_all = self._get_mean_field(next_ls_tensor)
169
+ next_vals = self.critic(next_ls_tensor, next_mf_all)
170
+ adv = torch.zeros_like(rw_tensor)
171
+ gae = 0
172
+ masks = 1.0 - done_tensor
173
+ for t in reversed(range(T)):
174
+ delta = rw_tensor[t] + self.gamma * next_vals[t] * masks[t] - vals[t]
175
+ gae = delta + self.gamma * self.lam * masks[t] * gae
176
+ adv[t] = gae
177
+ ret = adv + vals
178
+
179
+ N, D_l = self.n_agents, self.local_dim
180
+
181
+ ls_flat = ls_tensor.view(T * N, D_l)
182
+ mf_flat = mf_all.view(T * N, self.mean_field_dim)
183
+ ac_flat = ac_tensor.view(T * N, self.act_dim)
184
+ lp_flat = lp_tensor.view(-1)
185
+ adv_flat = adv.view(-1)
186
+ ret_flat = ret.view(-1)
187
+
188
+ adv_flat = (adv_flat - adv_flat.mean()) / (adv_flat.std() + 1e-8)
189
+ ret_flat = (ret_flat - ret_flat.mean()) / (ret_flat.std() + 1e-8)
190
+
191
+ dataset = torch.utils.data.TensorDataset(ls_flat, mf_flat, ac_flat, lp_flat, adv_flat, ret_flat)
192
+ gen = torch.Generator()
193
+ gen.manual_seed(SEED)
194
+ loader = torch.utils.data.DataLoader(dataset, batch_size=self.batch_size, shuffle=True, generator=gen)
195
+
196
+ for _ in range(self.k_epochs):
197
+ for b_ls, b_mf, b_ac, b_lp, b_adv, b_ret in loader:
198
+
199
+ self.opt_a.zero_grad(set_to_none=True)
200
+ with autocast(device_type=device.type, dtype=torch.float16, enabled=self.use_cuda_amp):
201
+ dist_new = self.actor(b_ls, b_mf)
202
+ lp_new = dist_new.log_prob(b_ac).sum(-1)
203
+ entropy = dist_new.entropy().sum(-1).mean()
204
+ log_ratio = torch.clamp(lp_new - b_lp, -20.0, 20.0)
205
+ ratio = torch.exp(log_ratio)
206
+ surr1 = ratio * b_adv
207
+ surr2 = torch.clamp(ratio, 1 - self.clip_eps, 1 + self.clip_eps) * b_adv
208
+ actor_loss = -torch.min(surr1, surr2).mean() - self.entropy_coeff * entropy
209
+
210
+ self.scaler.scale(actor_loss).backward()
211
+ self.scaler.unscale_(self.opt_a)
212
+ torch.nn.utils.clip_grad_norm_(self.actor.parameters(), max_norm=0.5)
213
+ self.scaler.step(self.opt_a)
214
+
215
+ self.opt_c.zero_grad(set_to_none=True)
216
+ with autocast(device_type=device.type, dtype=torch.float16, enabled=self.use_cuda_amp):
217
+ val_pred = self.critic(b_ls, b_mf)
218
+ critic_loss = nn.MSELoss()(val_pred, b_ret)
219
+
220
+ self.scaler.scale(critic_loss).backward()
221
+ self.scaler.unscale_(self.opt_c)
222
+ torch.nn.utils.clip_grad_norm_(self.critic.parameters(), max_norm=0.5)
223
+ self.scaler.step(self.opt_c)
224
+
225
+ self.scaler.update()
226
+
227
+ self.step_idx = 0
228
+
229
+ def save(self, path):
230
+ torch.save({
231
+ 'actor': self.actor.state_dict(),
232
+ 'critic': self.critic.state_dict()
233
+ }, path)
234
+
235
+ def load(self, path):
236
+ data = torch.load(path, map_location=device)
237
+ self.actor.load_state_dict(data['actor'])
238
+ self.critic.load_state_dict(data['critic'])
SolarSys/training_freezing.py ADDED
@@ -0,0 +1,523 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import sys
3
+ import time
4
+ from datetime import datetime, timedelta
5
+ import re
6
+ import numpy as np
7
+ import torch
8
+ import pandas as pd
9
+ import matplotlib.pyplot as plt
10
+
11
+ # Allow imports from project root
12
+ sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
13
+
14
+ #This is important for running the file, please make sure to follow the same directory structure as listed in the zip file
15
+ from cluster import InterClusterCoordinator, InterClusterLedger
16
+ from Environment.cluster_env_wrapper import make_vec_env
17
+ from mappo.trainer.mappo import MAPPO
18
+ from meanfield.trainer.meanfield import MFAC
19
+
20
+ def recursive_sum(item):
21
+ total = 0
22
+ if hasattr(item, '__iter__') and not isinstance(item, str):
23
+ for sub_item in item:
24
+ total += recursive_sum(sub_item)
25
+ elif np.isreal(item):
26
+ total += item
27
+ return total
28
+
29
+
30
+ def main():
31
+ overall_start_time = time.time()
32
+ # ─── Hyperparameters ───────────────────────
33
+ STATE_TO_RUN = "pennsylvania" # or "colorado", "oklahoma"
34
+ DATA_PATH = ""
35
+ # Dynamically extract the number of agents from the file path
36
+ match = re.search(r'(\d+)houses', DATA_PATH)
37
+ if not match:
38
+ raise ValueError("Could not extract the number of houses from DATA_PATH.")
39
+ NUMBER_OF_AGENTS = int(match.group(1))
40
+ NUM_EPISODES = 10000
41
+ CLUSTER_SIZE = 10
42
+ BATCH_SIZE = 256
43
+ CHECKPOINT_INTERVAL= 1000
44
+ WINDOW_SIZE = 80
45
+ MAX_TRANSFER_KWH = 100000
46
+ LR = 2e-4
47
+ GAMMA = 0.95
48
+ LAMBDA = 0.95
49
+ CLIP_EPS = 0.2
50
+ K_EPOCHS = 4
51
+ JOINT_TRAINING_START_EPISODE = 2000
52
+ FREEZE_HIGH_FOR_EPISODES = 20
53
+ FREEZE_LOW_FOR_EPISODES = 10
54
+
55
+ # ─── Build directories ─────────────────
56
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
57
+ run_name = f"hierarchical_{STATE_TO_RUN}_{NUMBER_OF_AGENTS}agents_" \
58
+ f"{CLUSTER_SIZE}size_{NUM_EPISODES}eps_{timestamp}"
59
+ root_dir = os.path.join("Training", run_name) # New folder for new runs
60
+ models_dir= os.path.join(root_dir, "models")
61
+ logs_dir = os.path.join(root_dir, "logs")
62
+ plots_dir = os.path.join(root_dir, "plots")
63
+
64
+ for d in (models_dir, logs_dir, plots_dir):
65
+ os.makedirs(d, exist_ok=True)
66
+ print(f"Logging to: {root_dir}")
67
+
68
+ # ─── Environment & Agent Initialization ─────────────────
69
+ cluster_env = make_vec_env(
70
+ data_path=DATA_PATH,
71
+ time_freq="15T",
72
+ cluster_size=CLUSTER_SIZE,
73
+ state=STATE_TO_RUN # <-- Use the state variable here
74
+ )
75
+
76
+ #Get env parameters from the new vectorized environment object.
77
+ n_clusters = cluster_env.num_envs
78
+ sample_subenv = cluster_env.cluster_envs[0]
79
+ n_agents_per_cluster = sample_subenv.num_agents
80
+
81
+ local_dim = sample_subenv.observation_space.shape[-1]
82
+ global_dim = n_agents_per_cluster * local_dim
83
+ act_dim = sample_subenv.action_space[0].shape[-1]
84
+ total_buffer_size = sample_subenv.num_steps * n_clusters
85
+ print(f"Low-level agent buffer size set to: {total_buffer_size}")
86
+ print(f"Created {n_clusters} clusters.")
87
+ print(f"Shared low-level agent: {n_agents_per_cluster} agents per cluster, "
88
+ f"obs_dim={local_dim}, global_dim={global_dim}, act_dim={act_dim}")
89
+ print(f"Creating {n_clusters} independent low-level MAPPO agents...")
90
+ low_agents = []
91
+ for i in range(n_clusters):
92
+ agent_buffer_size = sample_subenv.num_steps
93
+
94
+ agent = MAPPO(
95
+ n_agents = n_agents_per_cluster,
96
+ local_dim = local_dim,
97
+ global_dim = global_dim,
98
+ act_dim = act_dim,
99
+ lr = LR,
100
+ gamma = GAMMA,
101
+ lam = LAMBDA,
102
+ clip_eps = CLIP_EPS,
103
+ k_epochs = K_EPOCHS,
104
+ batch_size = BATCH_SIZE,
105
+ episode_len = agent_buffer_size
106
+ )
107
+ low_agents.append(agent)
108
+
109
+ OBS_DIM_HI_LOCAL = 7
110
+ act_dim_inter = 2
111
+ print(f"Inter-cluster agent (MFAC): n_agents={n_clusters}, "
112
+ f"local_dim={OBS_DIM_HI_LOCAL}, act_dim={act_dim_inter}")
113
+ inter_agent = MFAC(
114
+ n_agents = n_clusters,
115
+ local_dim = OBS_DIM_HI_LOCAL,
116
+ act_dim = act_dim_inter,
117
+ lr = LR,
118
+ gamma = GAMMA,
119
+ lam = LAMBDA,
120
+ clip_eps = CLIP_EPS,
121
+ k_epochs = K_EPOCHS,
122
+ batch_size = BATCH_SIZE,
123
+ episode_len=96
124
+ )
125
+ ledger = InterClusterLedger()
126
+ coordinator = InterClusterCoordinator(
127
+ cluster_env,
128
+ inter_agent,
129
+ ledger,
130
+ max_transfer_kwh=MAX_TRANSFER_KWH
131
+ )
132
+
133
+ # ─── Training loop ─────────────────────────────────────
134
+ total_steps = 0
135
+ inter_episode_rewards = []
136
+ episode_log_data = []
137
+ performance_metrics_log = []
138
+ agent_rewards_log = [[] for _ in range(NUMBER_OF_AGENTS)]
139
+ intra_log = {}
140
+ inter_log = {}
141
+ total_log = {}
142
+ cost_log = {}
143
+
144
+ for ep in range(1, NUM_EPISODES + 1):
145
+ inter_episode_rewards_this_ep = []
146
+ step_count = 0
147
+ start_time = time.time()
148
+ ep_total_inter_cluster_reward = 0.0
149
+ day_logs = []
150
+ obs_clusters, _ = cluster_env.reset()
151
+ # This runs after an episode is done (triggered by reset), but before the new one starts.
152
+ if ep > 1:
153
+ all_cluster_metrics = cluster_env.call('get_episode_metrics')
154
+
155
+ # Aggregate the metrics from all clusters into a single system-wide summary
156
+ system_metrics = {
157
+ "grid_reduction_entire_day": sum(m["grid_reduction_entire_day"] for m in all_cluster_metrics),
158
+ "grid_reduction_peak_hours": sum(m["grid_reduction_peak_hours"] for m in all_cluster_metrics),
159
+ "total_cost_savings": sum(m["total_cost_savings"] for m in all_cluster_metrics),
160
+ "battery_degradation_cost_total": sum(m["battery_degradation_cost_total"] for m in all_cluster_metrics),
161
+ # For fairness, we average the fairness index across clusters
162
+ "fairness_on_cost_savings": np.mean([m["fairness_on_cost_savings"] for m in all_cluster_metrics]),
163
+ "Episode": ep - 1
164
+ }
165
+
166
+ performance_metrics_log.append(system_metrics)
167
+
168
+
169
+ # =================================================================
170
+
171
+ done_all = False
172
+ cluster_rewards = np.zeros((n_clusters, n_agents_per_cluster), dtype=np.float32)
173
+ total_cost = 0.0
174
+ total_grid_import = 0.0
175
+
176
+ # Determine training phase
177
+ is_phase_1 = ep < JOINT_TRAINING_START_EPISODE
178
+
179
+ if ep == 1: print(f"\n--- Starting Phase 1: Training Low-Level Agent Only (up to ep {JOINT_TRAINING_START_EPISODE-1}) ---")
180
+ if ep == JOINT_TRAINING_START_EPISODE: print(f"\n--- Starting Phase 2: Joint Hierarchical Training (from ep {JOINT_TRAINING_START_EPISODE}) ---")
181
+
182
+ # The main loop continues as long as the episode is not done.
183
+ while not done_all:
184
+ total_steps += 1
185
+ step_count += 1
186
+ # --- Action Selection (Low-Level) ---
187
+ batch_global_obs = obs_clusters.reshape(n_clusters, -1)
188
+
189
+ # Loop through each cluster to get actions from its dedicated agent
190
+ low_level_actions_list = []
191
+ low_level_logps_list = []
192
+ for c_idx in range(n_clusters):
193
+ agent = low_agents[c_idx]
194
+ local_obs_cluster = obs_clusters[c_idx]
195
+ global_obs_cluster = batch_global_obs[c_idx]
196
+
197
+ actions, logps = agent.select_action(local_obs_cluster, global_obs_cluster)
198
+
199
+ low_level_actions_list.append(actions)
200
+ low_level_logps_list.append(logps)
201
+ low_level_actions = np.stack(low_level_actions_list)
202
+ low_level_logps = np.stack(low_level_logps_list)
203
+
204
+ # --- Action Selection & Transfers (High-Level, Phase 2 only) ---
205
+ if is_phase_1:
206
+ exports, imports = None, None
207
+ else:
208
+ # Get high-level observations
209
+ inter_cluster_obs_local_list = [coordinator.get_cluster_state(se, step_count) for se in cluster_env.cluster_envs]
210
+ inter_cluster_obs_local = np.array(inter_cluster_obs_local_list)
211
+
212
+ # Get high-level actions
213
+ high_level_action, high_level_logp = inter_agent.select_action(inter_cluster_obs_local)
214
+
215
+ # Build transfers
216
+ current_reports = {i: {'export_capacity': cluster_env.get_export_capacity(i), 'import_capacity': cluster_env.get_import_capacity(i)} for i in range(n_clusters)}
217
+ exports, imports = coordinator.build_transfers(high_level_action, current_reports)
218
+
219
+ # --- Environment Step ---
220
+ next_obs_clusters, rewards, done_all, step_info = cluster_env.step(
221
+ low_level_actions, exports=exports, imports=imports
222
+ )
223
+ cluster_infos = step_info.get("cluster_infos")
224
+
225
+ day_logs.append({
226
+ "costs": cluster_infos["costs"],
227
+ "grid_import_no_p2p": cluster_infos["grid_import_no_p2p"],
228
+ "charge_amount": cluster_infos.get("charge_amount"),
229
+ "discharge_amount": cluster_infos.get("discharge_amount")
230
+ })
231
+ per_agent_rewards = np.stack(cluster_infos['agent_rewards'])
232
+
233
+ rewards_for_buffer = per_agent_rewards
234
+ if not is_phase_1:
235
+ transfers_for_logging = (exports, imports)
236
+ high_level_rewards_per_cluster = coordinator.compute_inter_cluster_reward(
237
+ all_cluster_infos=cluster_infos,
238
+ actual_transfers=transfers_for_logging,
239
+ step_count=step_count
240
+ )
241
+ ep_total_inter_cluster_reward += np.sum(high_level_rewards_per_cluster) # Log the sum for the plot
242
+ next_inter_cluster_obs_local_list = [coordinator.get_cluster_state(se, step_count + 1) for se in cluster_env.cluster_envs]
243
+ next_inter_cluster_obs_local = np.array(next_inter_cluster_obs_local_list)
244
+
245
+ inter_agent.store(
246
+ inter_cluster_obs_local,
247
+ high_level_action,
248
+ high_level_logp,
249
+ high_level_rewards_per_cluster,
250
+ [done_all]*n_clusters,
251
+ next_inter_cluster_obs_local
252
+ )
253
+ bonus_per_agent = np.zeros_like(per_agent_rewards)
254
+ for c_idx in range(n_clusters):
255
+ num_agents_in_cluster = per_agent_rewards.shape[1]
256
+ if num_agents_in_cluster > 0:
257
+ bonus = high_level_rewards_per_cluster[c_idx] / num_agents_in_cluster
258
+ bonus_per_agent[c_idx, :] = bonus
259
+
260
+ rewards_for_buffer = per_agent_rewards + bonus_per_agent
261
+
262
+ # --- Data Storage (Low-Level) ---
263
+ dones_list = step_info.get("cluster_dones")
264
+ for idx in range(n_clusters):
265
+ low_agents[idx].store(
266
+ obs_clusters[idx],
267
+ batch_global_obs[idx],
268
+ low_level_actions[idx],
269
+ low_level_logps[idx],
270
+ rewards_for_buffer[idx],
271
+ dones_list[idx],
272
+ next_obs_clusters[idx].reshape(-1)
273
+ )
274
+
275
+ # --- Logging and State Update ---
276
+ cluster_rewards += per_agent_rewards
277
+ total_cost += np.sum(cluster_infos['costs'])
278
+ total_grid_import += np.sum(cluster_infos['grid_import_with_p2p'])
279
+
280
+ obs_clusters = next_obs_clusters
281
+ if is_phase_1:
282
+ for agent in low_agents:
283
+ agent.update()
284
+ else:
285
+ CYCLE_LENGTH = FREEZE_HIGH_FOR_EPISODES + FREEZE_LOW_FOR_EPISODES
286
+ phase2_episode_num = ep - JOINT_TRAINING_START_EPISODE
287
+ position_in_cycle = phase2_episode_num % CYCLE_LENGTH
288
+
289
+ if position_in_cycle < FREEZE_HIGH_FOR_EPISODES:
290
+ print(f"Updating ALL LOW-LEVEL agents (High-level is frozen).")
291
+ for agent in low_agents:
292
+ agent.update()
293
+ else:
294
+ print(f"Updating HIGH-LEVEL agent (Low-level is frozen).")
295
+ inter_agent.update()
296
+
297
+ # =================================================================
298
+ duration = time.time() - start_time
299
+ num_low_level_agents = n_clusters * n_agents_per_cluster
300
+ get_price_fn = cluster_env.cluster_envs[0].get_grid_price
301
+
302
+
303
+
304
+ baseline_costs_per_step = [
305
+ recursive_sum(entry["grid_import_no_p2p"]) * get_price_fn(i)
306
+ for i, entry in enumerate(day_logs)
307
+ ]
308
+ total_baseline_cost = sum(baseline_costs_per_step)
309
+ actual_costs_per_step = [recursive_sum(entry["costs"]) for entry in day_logs]
310
+ total_actual_cost = sum(actual_costs_per_step)
311
+ cost_reduction_pct = (1 - (total_actual_cost / total_baseline_cost)) * 100 if total_baseline_cost > 0 else 0.0
312
+ total_reward_intra = cluster_rewards.sum()
313
+ mean_reward_intra = total_reward_intra / num_low_level_agents if num_low_level_agents > 0 else 0.0
314
+ total_reward_inter = ep_total_inter_cluster_reward
315
+ mean_reward_inter = total_reward_inter / step_count if step_count > 0 else 0.0
316
+ total_reward_system = total_reward_intra + total_reward_inter
317
+ mean_reward_system = total_reward_system / num_low_level_agents if num_low_level_agents > 0 else 0.0
318
+
319
+
320
+ intra_log.setdefault('total', []).append(total_reward_intra)
321
+ intra_log.setdefault('mean', []).append(mean_reward_intra)
322
+ inter_log.setdefault('total', []).append(total_reward_inter)
323
+ inter_log.setdefault('mean', []).append(mean_reward_inter)
324
+ total_log.setdefault('total', []).append(total_reward_system)
325
+ total_log.setdefault('mean', []).append(mean_reward_system)
326
+ cost_log.setdefault('total_cost', []).append(total_actual_cost)
327
+ cost_log.setdefault('cost_without_p2p', []).append(total_baseline_cost)
328
+
329
+
330
+ episode_log_data.append({
331
+ "Episode": ep,
332
+ "Mean_Reward_System": mean_reward_system,
333
+ "Mean_Reward_Intra": mean_reward_intra,
334
+ "Mean_Reward_Inter": mean_reward_inter,
335
+ "Total_Reward_System": total_reward_system,
336
+ "Total_Reward_Intra": total_reward_intra,
337
+ "Total_Reward_Inter": total_reward_inter,
338
+ "Cost_Reduction_Pct": cost_reduction_pct,
339
+ "Episode_Duration": duration,
340
+ })
341
+
342
+
343
+ print(f"Ep {ep}/{NUM_EPISODES} | "
344
+ f"Mean System R: {mean_reward_system:.3f} | "
345
+ f"Cost Red: {cost_reduction_pct:.1f}% | "
346
+ f"Time: {duration:.2f}s")
347
+
348
+
349
+ if ep % CHECKPOINT_INTERVAL == 0 or ep == NUM_EPISODES:
350
+ for c_idx, agent in enumerate(low_agents):
351
+ agent.save(os.path.join(models_dir, f"low_cluster{c_idx}_ep{ep}.pth"))
352
+ inter_agent.save(os.path.join(models_dir, f"inter_ep{ep}.pth"))
353
+ print(f"Saved checkpoint at episode {ep}")
354
+
355
+ print("Training completed! Aggregating final logs...")
356
+ # --- Final Episode Metrics ---
357
+ final_cluster_metrics = cluster_env.call('get_episode_metrics')
358
+ final_system_metrics = {
359
+ "grid_reduction_entire_day": sum(m["grid_reduction_entire_day"] for m in final_cluster_metrics),
360
+ "grid_reduction_peak_hours": sum(m["grid_reduction_peak_hours"] for m in final_cluster_metrics),
361
+ "total_cost_savings": sum(m["total_cost_savings"] for m in final_cluster_metrics),
362
+ "battery_degradation_cost_total": sum(m["battery_degradation_cost_total"] for m in final_cluster_metrics),
363
+ "fairness_on_cost_savings": np.mean([m["fairness_on_cost_savings"] for m in final_cluster_metrics]),
364
+ "Episode": NUM_EPISODES
365
+ }
366
+ performance_metrics_log.append(final_system_metrics)
367
+
368
+ df_rewards_log = pd.DataFrame(episode_log_data)
369
+ df_perf_log = pd.DataFrame(performance_metrics_log)
370
+ df_final_log = pd.merge(df_rewards_log, df_perf_log, on="Episode")
371
+
372
+ log_csv_path = os.path.join(logs_dir, "training_performance_log.csv")
373
+ overall_end_time = time.time()
374
+ total_duration_seconds = overall_end_time - overall_start_time
375
+ total_time_row = pd.DataFrame([{"Episode": "Total_Training_Time", "Episode_Duration": total_duration_seconds}])
376
+ df_to_save = pd.concat([df_final_log, total_time_row], ignore_index=True)
377
+
378
+ columns_to_save = [
379
+ "Episode",
380
+ "Mean_Reward_System",
381
+ "Mean_Reward_Intra",
382
+ "Mean_Reward_Inter",
383
+ "Total_Reward_System",
384
+ "Total_Reward_Intra",
385
+ "Total_Reward_Inter",
386
+ "Cost_Reduction_Pct",
387
+ "battery_degradation_cost_total",
388
+ "Episode_Duration",
389
+ "total_cost_savings",
390
+ "grid_reduction_entire_day",
391
+ "fairness_on_cost_savings"
392
+ ]
393
+ df_to_save = df_to_save[[col for col in columns_to_save if col in df_to_save.columns]]
394
+ df_to_save.to_csv(log_csv_path, index=False)
395
+ print(f"Saved comprehensive training performance log to: {log_csv_path}")
396
+
397
+ generate_plots(
398
+ plots_dir=plots_dir,
399
+ num_episodes=NUM_EPISODES,
400
+ intra_log=intra_log,
401
+ inter_log=inter_log,
402
+ total_log=total_log,
403
+ cost_log=cost_log,
404
+ df_final_log=df_final_log
405
+ )
406
+ overall_end_time = time.time()
407
+ total_duration_seconds = overall_end_time - overall_start_time
408
+ total_duration_formatted = str(timedelta(seconds=int(total_duration_seconds)))
409
+
410
+
411
+ print("\n" + "="*50)
412
+ print(f"Total Training Time: {total_duration_formatted} (HH:MM:SS)")
413
+ print("="*50)
414
+
415
+ ################################# PLOTING & LOGGING ##################################################################
416
+ def generate_plots(
417
+ plots_dir: str,
418
+ num_episodes: int,
419
+ intra_log: dict,
420
+ inter_log: dict,
421
+ total_log: dict,
422
+ cost_log: list,
423
+ df_final_log: pd.DataFrame
424
+ ):
425
+ """
426
+ Generates and saves all final plots after training is complete.
427
+ """
428
+ print("Training completed! Generating plots…")
429
+ def moving_avg(series, window):
430
+ return pd.Series(series).rolling(window=window, center=True, min_periods=1).mean().to_numpy()
431
+
432
+ ma_window = 120
433
+ episodes = np.arange(1, num_episodes + 1)
434
+
435
+ # Plot 1: Intra-cluster (Low-Level) Rewards
436
+ fig, ax = plt.subplots(figsize=(12, 7))
437
+ ax.plot(episodes, moving_avg(intra_log['total'], ma_window), label=f'Total Reward (MA {ma_window})', linewidth=2)
438
+ ax.set_xlabel("Episode")
439
+ ax.set_ylabel("Total Intra-Cluster Reward", color='tab:blue')
440
+ ax.tick_params(axis='y', labelcolor='tab:blue')
441
+ ax.grid(True)
442
+
443
+ ax2 = ax.twinx()
444
+ ax2.plot(episodes, moving_avg(intra_log['mean'], ma_window), label=f'Mean Reward (MA {ma_window})', linewidth=2, linestyle='--', color='tab:cyan')
445
+ ax2.set_ylabel("Mean Intra-Cluster Reward", color='tab:cyan')
446
+ ax2.tick_params(axis='y', labelcolor='tab:cyan')
447
+
448
+ fig.suptitle("Intra-Cluster (Low-Level Agent) Rewards")
449
+ fig.legend(loc="upper left", bbox_to_anchor=(0.1, 0.9))
450
+ plt.savefig(os.path.join(plots_dir, "1_intra_cluster_rewards.png"), dpi=200)
451
+ plt.close()
452
+
453
+ # Plot 2: Inter-cluster (High-Level) Rewards
454
+ fig, ax = plt.subplots(figsize=(12, 7))
455
+ ax.plot(episodes, moving_avg(inter_log['total'], ma_window), label=f'Total Reward (MA {ma_window})', linewidth=2, color='tab:green')
456
+ ax.set_xlabel("Episode")
457
+ ax.set_ylabel("Total Inter-Cluster Reward", color='tab:green')
458
+ ax.tick_params(axis='y', labelcolor='tab:green')
459
+ ax.grid(True)
460
+
461
+ ax2 = ax.twinx()
462
+ ax2.plot(episodes, moving_avg(inter_log['mean'], ma_window), label=f'Mean Reward (MA {ma_window})', linewidth=2, linestyle='--', color='mediumseagreen')
463
+ ax2.set_ylabel("Mean Inter-Cluster Reward", color='mediumseagreen')
464
+ ax2.tick_params(axis='y', labelcolor='mediumseagreen')
465
+
466
+ fig.suptitle("Inter-Cluster (High-Level Agent) Rewards")
467
+ fig.legend(loc="upper left", bbox_to_anchor=(0.1, 0.9))
468
+ plt.savefig(os.path.join(plots_dir, "2_inter_cluster_rewards.png"), dpi=200)
469
+ plt.close()
470
+
471
+ # Plot 3: Total System Rewards
472
+ fig, ax = plt.subplots(figsize=(12, 7))
473
+ ax.plot(episodes, moving_avg(total_log['total'], ma_window), label=f'Total System Reward (MA {ma_window})', linewidth=2, color='tab:red')
474
+ ax.set_xlabel("Episode")
475
+ ax.set_ylabel("Total System Reward", color='tab:red')
476
+ ax.tick_params(axis='y', labelcolor='tab:red')
477
+ ax.grid(True)
478
+
479
+ ax2 = ax.twinx()
480
+ ax2.plot(episodes, moving_avg(total_log['mean'], ma_window), label=f'Mean System Reward (MA {ma_window})', linewidth=2, linestyle='--', color='salmon')
481
+ ax2.set_ylabel("Mean System Reward per Agent", color='salmon')
482
+ ax2.tick_params(axis='y', labelcolor='salmon')
483
+
484
+ fig.suptitle("Total System Rewards (Intra + Inter)")
485
+ fig.legend(loc="upper left", bbox_to_anchor=(0.1, 0.9))
486
+ plt.savefig(os.path.join(plots_dir, "3_total_system_rewards.png"), dpi=200)
487
+ plt.close()
488
+
489
+ # Plot 4: Cost Reduction
490
+ cost_df = pd.DataFrame(cost_log)
491
+ cost_df['cost_reduction_pct'] = 100 * (1 - (cost_df['total_cost'] / cost_df['cost_without_p2p'])).clip(lower=-np.inf, upper=100)
492
+ plt.figure(figsize=(12, 7))
493
+ plt.plot(episodes, moving_avg(cost_df['cost_reduction_pct'], ma_window), label=f'Cost Reduction % (MA {ma_window})', color='purple', linewidth=2)
494
+ plt.xlabel("Episode")
495
+ plt.ylabel("Cost Reduction (%)")
496
+ plt.title("Total System-Wide Cost Reduction")
497
+ plt.legend()
498
+ plt.grid(True)
499
+ plt.savefig(os.path.join(plots_dir, "4_cost_reduction.png"), dpi=200)
500
+ plt.close()
501
+
502
+
503
+ df_plot = df_final_log[pd.to_numeric(df_final_log['Episode'], errors='coerce').notna()].copy()
504
+ df_plot['Episode'] = pd.to_numeric(df_plot['Episode'])
505
+
506
+ # 5. Battery Degradation Cost
507
+ plt.figure(figsize=(12, 7))
508
+ plt.plot(df_plot["Episode"], moving_avg(df_plot["battery_degradation_cost_total"], ma_window),
509
+ label=f'Degradation Cost (MA {ma_window})', color='darkgreen', linewidth=2)
510
+ plt.xlabel("Episode")
511
+ plt.ylabel("Total Degradation Cost ($)")
512
+ plt.title("Total Battery Degradation Cost")
513
+ plt.legend()
514
+ plt.grid(True)
515
+ plt.savefig(os.path.join(plots_dir, "5_battery_degradation_cost.png"), dpi=200)
516
+ plt.close()
517
+
518
+
519
+ print(f"All plots have been saved to: {plots_dir}")
520
+
521
+
522
+ if __name__ == "__main__":
523
+ main()