|
|
import gym |
|
|
import pandas as pd |
|
|
import numpy as np |
|
|
from collections import deque |
|
|
import random |
|
|
from gym.spaces import Tuple, Box |
|
|
|
|
|
random.seed(42) |
|
|
np.random.seed(42) |
|
|
|
|
|
class SolarSys(gym.Env): |
|
|
|
|
|
def __init__( |
|
|
self, |
|
|
data_path="DATA/training/25houses_152days_TRAIN.csv", |
|
|
state="", |
|
|
time_freq="15T", |
|
|
house_ids_in_cluster=None, |
|
|
preloaded_data=None |
|
|
|
|
|
): |
|
|
|
|
|
super().__init__() |
|
|
self.state = state.lower() |
|
|
|
|
|
|
|
|
self._pricing_info = { |
|
|
"oklahoma": { |
|
|
"max_grid_price": 0.2112, |
|
|
"feed_in_tariff": 0.04, |
|
|
"price_function": self._get_oklahoma_price |
|
|
}, |
|
|
"colorado": { |
|
|
"max_grid_price": 0.32, |
|
|
"feed_in_tariff": 0.055, |
|
|
"price_function": self._get_colorado_price |
|
|
}, |
|
|
"pennsylvania": { |
|
|
"max_grid_price": 0.5505, |
|
|
"feed_in_tariff": 0.06, |
|
|
"price_function": self._get_pennsylvania_price |
|
|
} |
|
|
} |
|
|
|
|
|
if self.state not in self._pricing_info: |
|
|
raise ValueError(f"State '{self.state}' is not supported. Available states: {list(self._pricing_info.keys())}") |
|
|
|
|
|
state_config = self._pricing_info[self.state] |
|
|
self.max_grid_price = state_config["max_grid_price"] |
|
|
self.feed_in_tariff = state_config["feed_in_tariff"] |
|
|
self._get_price_function = state_config["price_function"] |
|
|
self.data_path = data_path |
|
|
self.time_freq = time_freq |
|
|
if preloaded_data is not None: |
|
|
all_data = preloaded_data |
|
|
if house_ids_in_cluster: |
|
|
print(f"Using pre-loaded data for cluster with {len(house_ids_in_cluster)} houses.") |
|
|
else: |
|
|
print(f"Loading data from {data_path}...") |
|
|
try: |
|
|
all_data = pd.read_csv(data_path) |
|
|
all_data["local_15min"] = pd.to_datetime(all_data["local_15min"], utc=True) |
|
|
all_data.set_index("local_15min", inplace=True) |
|
|
|
|
|
except FileNotFoundError: |
|
|
raise FileNotFoundError(f"Data file {data_path} not found.") |
|
|
except pd.errors.EmptyDataError: |
|
|
raise ValueError(f"Data file {data_path} is empty.") |
|
|
except Exception as e: |
|
|
raise ValueError(f"Error loading data: {e}") |
|
|
|
|
|
|
|
|
|
|
|
grid_cols = [c for c in all_data.columns if c.startswith("grid_")] |
|
|
solar_cols = [c for c in all_data.columns if c.startswith("total_solar_")] |
|
|
all_grid = all_data[grid_cols].values |
|
|
all_solar = all_data[solar_cols].values |
|
|
|
|
|
|
|
|
self.global_max_demand = float((all_grid + all_solar).max()) + 1e-8 |
|
|
|
|
|
|
|
|
self.global_max_solar = float(all_solar.max()) + 1e-8 |
|
|
|
|
|
|
|
|
self.all_data = all_data |
|
|
all_house_ids_in_file = [ |
|
|
col.split("_")[1] for col in self.all_data.columns |
|
|
if col.startswith("grid_") |
|
|
] |
|
|
if house_ids_in_cluster: |
|
|
self.house_ids = [hid for hid in house_ids_in_cluster if hid in all_house_ids_in_file] |
|
|
else: |
|
|
self.house_ids = all_house_ids_in_file |
|
|
|
|
|
if not self.house_ids: |
|
|
raise ValueError("No valid house_ids found for this environment instance.") |
|
|
|
|
|
self.env_log_infos = [] |
|
|
|
|
|
self.time_freq = time_freq |
|
|
freq_offset = pd.tseries.frequencies.to_offset(time_freq) |
|
|
minutes_per_step = freq_offset.nanos / 1e9 / 60.0 |
|
|
self.steps_per_day = int(24 * 60 // minutes_per_step) |
|
|
|
|
|
total_rows = len(self.all_data) |
|
|
self.total_days = total_rows // self.steps_per_day |
|
|
if self.total_days < 1: |
|
|
raise ValueError( |
|
|
f"After resampling, dataset has {total_rows} rows, which is " |
|
|
f"less than a single day of {self.steps_per_day} steps." |
|
|
) |
|
|
|
|
|
self.num_agents = len(self.house_ids) |
|
|
self.original_no_p2p_import = {} |
|
|
for hid in self.house_ids: |
|
|
col_grid = f"grid_{hid}" |
|
|
self.original_no_p2p_import[hid] = self.all_data[col_grid].clip(lower=0.0).values |
|
|
solar_cols = [f"total_solar_{hid}" for hid in self.house_ids] |
|
|
solar_sums = self.all_data[solar_cols].sum(axis=0).to_dict() |
|
|
self.agent_groups = [ |
|
|
1 if solar_sums[f"total_solar_{hid}"] > 0 else 0 |
|
|
for hid in self.house_ids |
|
|
] |
|
|
|
|
|
self.group_counts = { |
|
|
0: self.agent_groups.count(0), |
|
|
1: self.agent_groups.count(1) |
|
|
} |
|
|
print(f"Number of houses in each group: {self.group_counts}") |
|
|
|
|
|
|
|
|
self.battery_options = { |
|
|
"teslapowerwall": {"max_capacity": 13.5, "charge_efficiency": 0.95, "discharge_efficiency": 0.90, "max_charge_rate": 5.0, "max_discharge_rate": 5.0, "degradation_cost_per_kwh": 0.005}, |
|
|
"enphase": {"max_capacity": 5.0, "charge_efficiency": 0.95, "discharge_efficiency": 0.90, "max_charge_rate": 2.0, "max_discharge_rate": 2.0, "degradation_cost_per_kwh": 0.005}, |
|
|
"franklin": {"max_capacity": 15.0, "charge_efficiency": 0.95, "discharge_efficiency": 0.90, "max_charge_rate": 6.0, "max_discharge_rate": 6.0, "degradation_cost_per_kwh": 0.005}, |
|
|
} |
|
|
self.solar_houses = [ |
|
|
hid for hid in self.house_ids |
|
|
if (self.all_data[f"total_solar_{hid}"] > 0).any() |
|
|
] |
|
|
|
|
|
self.batteries = {} |
|
|
for hid in self.solar_houses: |
|
|
choice = random.choice(list(self.battery_options)) |
|
|
specs = self.battery_options[choice] |
|
|
self.batteries[hid] = {"soc": 0.0, **specs} |
|
|
|
|
|
self.battery_charge_history = {hid: [] for hid in self.batteries} |
|
|
self.battery_discharge_history = {hid: [] for hid in self.batteries} |
|
|
self.battery_capacity = sum(b["max_capacity"] for b in self.batteries.values()) |
|
|
self.battery_level = sum(b["soc"] for b in self.batteries.values()) |
|
|
self.current_solar = 0.0 |
|
|
self.has_battery = np.array([1 if hid in self.batteries else 0 for hid in self.house_ids], dtype=np.float32) |
|
|
|
|
|
|
|
|
self.battery_soc = np.zeros(self.num_agents, dtype=np.float32) |
|
|
self.battery_max_capacity = np.zeros(self.num_agents, dtype=np.float32) |
|
|
self.battery_charge_efficiency = np.zeros(self.num_agents, dtype=np.float32) |
|
|
self.battery_discharge_efficiency = np.zeros(self.num_agents, dtype=np.float32) |
|
|
self.battery_max_charge_rate = np.zeros(self.num_agents, dtype=np.float32) |
|
|
self.battery_max_discharge_rate = np.zeros(self.num_agents, dtype=np.float32) |
|
|
self.battery_degradation_cost = np.zeros(self.num_agents, dtype=np.float32) |
|
|
|
|
|
|
|
|
for i, hid in enumerate(self.house_ids): |
|
|
if hid in self.batteries: |
|
|
batt = self.batteries[hid] |
|
|
self.battery_max_capacity[i] = batt["max_capacity"] |
|
|
self.battery_charge_efficiency[i] = batt["charge_efficiency"] |
|
|
self.battery_discharge_efficiency[i] = batt["discharge_efficiency"] |
|
|
self.battery_max_charge_rate[i] = batt["max_charge_rate"] |
|
|
self.battery_max_discharge_rate[i] = batt["max_discharge_rate"] |
|
|
self.battery_degradation_cost[i] = batt["degradation_cost_per_kwh"] |
|
|
|
|
|
|
|
|
|
|
|
self.observation_space = gym.spaces.Box( |
|
|
low=-np.inf, high=np.inf, |
|
|
shape=(self.num_agents, 8), |
|
|
dtype=np.float32 |
|
|
) |
|
|
self.action_space = Tuple(( |
|
|
Box(low=0.0, high=1.0, shape=(self.num_agents, 6), dtype=np.float32), |
|
|
Box(low=-np.inf, high=np.inf, shape=(1,), dtype=np.float32), |
|
|
Box(low=-1.0, high=np.inf, shape=(1,), dtype=np.float32) |
|
|
)) |
|
|
|
|
|
|
|
|
self.data = None |
|
|
self.env_log = [] |
|
|
self.day_index = -1 |
|
|
self.current_step = 0 |
|
|
self.num_steps = self.steps_per_day |
|
|
self.demands = {} |
|
|
self.solars = {} |
|
|
self.previous_actions = { |
|
|
hid: np.zeros(6) for hid in self.house_ids |
|
|
} |
|
|
self._initialize_episode_metrics() |
|
|
|
|
|
def get_grid_price(self, step_idx): |
|
|
""" |
|
|
Returns the grid price for the current step based on the selected state. |
|
|
""" |
|
|
return self._get_price_function(step_idx) |
|
|
|
|
|
def _get_oklahoma_price(self, step_idx): |
|
|
minutes_per_step = 24 * 60 / self.steps_per_day |
|
|
hour = int((step_idx * minutes_per_step) // 60) % 24 |
|
|
if 14 <= hour < 19: |
|
|
return 0.2112 |
|
|
else: |
|
|
return 0.0434 |
|
|
|
|
|
def _get_colorado_price(self, step_idx): |
|
|
minutes_per_step = 24 * 60 / self.steps_per_day |
|
|
hour = int((step_idx * minutes_per_step) // 60) % 24 |
|
|
if 15 <= hour < 19: |
|
|
return 0.32 |
|
|
elif 13 <= hour < 15: |
|
|
return 0.22 |
|
|
else: |
|
|
return 0.12 |
|
|
|
|
|
def _get_pennsylvania_price(self, step_idx): |
|
|
minutes_per_step = 24 * 60 / self.steps_per_day |
|
|
hour = int((step_idx * minutes_per_step) // 60) % 24 |
|
|
if 13 <= hour < 21: |
|
|
return 0.125048 |
|
|
elif hour >= 23 or hour < 6: |
|
|
return 0.057014 |
|
|
else: |
|
|
return 0.079085 |
|
|
|
|
|
def get_peer_price(self, step_idx, total_surplus, total_shortfall): |
|
|
grid_price = self.get_grid_price(step_idx) |
|
|
feed_in_tariff = self.feed_in_tariff |
|
|
|
|
|
|
|
|
p_balance = (grid_price * 0.80) + (feed_in_tariff * 0.20) |
|
|
p_con = (grid_price - feed_in_tariff) / (1.5 * np.pi) |
|
|
k = 1.5 |
|
|
epsilon = 1e-6 |
|
|
supply = total_surplus + epsilon |
|
|
demand = total_shortfall + epsilon |
|
|
|
|
|
ratio = demand / supply |
|
|
log_ratio = np.log(ratio) |
|
|
if log_ratio < 0: |
|
|
power_term = - (np.abs(log_ratio) ** k) |
|
|
else: |
|
|
power_term = log_ratio ** k |
|
|
|
|
|
price_offset = 2 * np.pi * p_con * np.arctan(power_term) |
|
|
|
|
|
peer_price = p_balance + price_offset |
|
|
|
|
|
final_price = float(np.clip(peer_price, feed_in_tariff, grid_price)) |
|
|
|
|
|
return final_price |
|
|
|
|
|
|
|
|
def _initialize_episode_metrics(self): |
|
|
"""Initializes or resets all metrics tracked over a single episode (day).""" |
|
|
self.cumulative_grid_reduction = 0.0 |
|
|
self.cumulative_grid_reduction_peak = 0.0 |
|
|
self.cumulative_degradation_cost = 0.0 |
|
|
self.agent_cost_savings = np.zeros(self.num_agents) |
|
|
self.degradation_cost_timeseries = [] |
|
|
self.cost_savings_timeseries = [] |
|
|
self.grid_reduction_timeseries = [] |
|
|
|
|
|
def get_episode_metrics(self): |
|
|
""" |
|
|
Returns a dictionary of performance metrics for the last completed episode. |
|
|
""" |
|
|
return self.episode_metrics |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def reset(self): |
|
|
if self.current_step > 0: |
|
|
positive_savings = self.agent_cost_savings[self.agent_cost_savings > 0] |
|
|
if len(positive_savings) > 1: |
|
|
fairness_on_savings = self._compute_jains_index(positive_savings) |
|
|
else: |
|
|
fairness_on_savings = 0.0 |
|
|
|
|
|
self.episode_metrics = { |
|
|
"grid_reduction_entire_day": self.cumulative_grid_reduction, |
|
|
"grid_reduction_peak_hours": self.cumulative_grid_reduction_peak, |
|
|
"total_cost_savings": np.sum(self.agent_cost_savings), |
|
|
"fairness_on_cost_savings": fairness_on_savings, |
|
|
"battery_degradation_cost_total": self.cumulative_degradation_cost, |
|
|
"degradation_cost_over_time": self.degradation_cost_timeseries, |
|
|
"cost_savings_over_time": self.cost_savings_timeseries, |
|
|
"grid_reduction_over_time": self.grid_reduction_timeseries, |
|
|
} |
|
|
self.day_index = np.random.randint(0, self.total_days) |
|
|
|
|
|
start_row = self.day_index * self.steps_per_day |
|
|
end_row = start_row + self.steps_per_day |
|
|
day_data = self.all_data.iloc[start_row:end_row].copy() |
|
|
self.data = day_data |
|
|
|
|
|
self.no_p2p_import_day = {} |
|
|
for hid in self.house_ids: |
|
|
self.no_p2p_import_day[hid] = self.original_no_p2p_import[hid][start_row:end_row] |
|
|
|
|
|
demand_list = [] |
|
|
solar_list = [] |
|
|
for hid in self.house_ids: |
|
|
col_grid = f"grid_{hid}" |
|
|
col_solar = f"total_solar_{hid}" |
|
|
|
|
|
grid_series = day_data[col_grid].fillna(0.0) |
|
|
solar_series = day_data[col_solar].fillna(0.0).clip(lower=0.0) |
|
|
|
|
|
demand_array = grid_series.values + solar_series.values |
|
|
demand_array = np.clip(demand_array, 0.0, None) |
|
|
|
|
|
demand_list.append(demand_array) |
|
|
solar_list.append(solar_series.values) |
|
|
|
|
|
self.demands_day = np.stack(demand_list, axis=1).astype(np.float32) |
|
|
self.solars_day = np.stack(solar_list, axis=1).astype(np.float32) |
|
|
|
|
|
self.hours_day = (self.data.index.hour + self.data.index.minute / 60.0).values |
|
|
|
|
|
self.current_step = 0 |
|
|
self.env_log = [] |
|
|
for hid in self.house_ids: |
|
|
self.previous_actions[hid] = np.zeros(6) |
|
|
|
|
|
lows = 0.30 * self.battery_max_capacity |
|
|
highs = 0.70 * self.battery_max_capacity |
|
|
|
|
|
self.battery_soc = np.random.uniform(low=lows, high=highs) |
|
|
self.battery_soc *= self.has_battery |
|
|
|
|
|
initial_demands = self.demands_day[0] |
|
|
initial_solars = self.solars_day[0] |
|
|
initial_surplus = np.maximum(initial_solars - initial_demands, 0.0).sum() |
|
|
initial_shortfall = np.maximum(initial_demands - initial_solars, 0.0).sum() |
|
|
initial_peer_price = self.get_peer_price(0, initial_surplus, initial_shortfall) |
|
|
|
|
|
obs = self._get_obs(peer_price=initial_peer_price) |
|
|
|
|
|
self._initialize_episode_metrics() |
|
|
|
|
|
return obs, {} |
|
|
|
|
|
def step(self, packed_action): |
|
|
actions, transfer_kwh_arr, peer_price_arr = packed_action |
|
|
inter_cluster_transfer_kwh = float(transfer_kwh_arr[0]) |
|
|
override_peer_price_val = float(peer_price_arr[0]) |
|
|
|
|
|
override_peer_price = override_peer_price_val if override_peer_price_val >= 0 else None |
|
|
|
|
|
actions = np.array(actions, dtype=np.float32) |
|
|
if actions.shape != (self.num_agents, 6): |
|
|
raise ValueError(f"Actions shape mismatch: got {actions.shape}, expected {(self.num_agents, 6)}") |
|
|
actions = np.clip(actions, 0.0, 1.0) |
|
|
|
|
|
a_sellGrid = actions[:, 0] |
|
|
a_buyGrid = actions[:, 1] |
|
|
a_sellPeers = actions[:, 2] |
|
|
a_buyPeers = actions[:, 3] |
|
|
a_chargeBatt = actions[:, 4] |
|
|
a_dischargeBatt = actions[:, 5] |
|
|
|
|
|
|
|
|
demands = self.demands_day[self.current_step] |
|
|
solars = self.solars_day[self.current_step] |
|
|
|
|
|
total_surplus = np.maximum(solars - demands, 0.0).sum() |
|
|
total_shortfall = np.maximum(demands - solars, 0.0).sum() |
|
|
self.current_solar = total_surplus |
|
|
|
|
|
if override_peer_price is not None: |
|
|
peer_price = override_peer_price |
|
|
else: |
|
|
peer_price = self.get_peer_price( |
|
|
self.current_step, |
|
|
total_surplus, |
|
|
total_shortfall |
|
|
) |
|
|
|
|
|
grid_price = self.get_grid_price(self.current_step) |
|
|
|
|
|
shortfall = np.maximum(demands - solars, 0.0) |
|
|
surplus = np.maximum(solars - demands, 0.0) |
|
|
|
|
|
final_shortfall = shortfall.copy() |
|
|
final_surplus = surplus.copy() |
|
|
grid_import = np.zeros(self.num_agents, dtype=np.float32) |
|
|
grid_export = np.zeros(self.num_agents, dtype=np.float32) |
|
|
|
|
|
|
|
|
available_from_batt = self.battery_soc * self.battery_discharge_efficiency |
|
|
desired_discharge = a_dischargeBatt * self.battery_max_discharge_rate |
|
|
discharge_amount = np.minimum.reduce([desired_discharge, available_from_batt, final_shortfall]) |
|
|
discharge_amount *= self.has_battery |
|
|
|
|
|
|
|
|
self.battery_soc -= (discharge_amount / (self.battery_discharge_efficiency + 1e-9)) * self.has_battery |
|
|
self.battery_soc = np.maximum(0.0, self.battery_soc) |
|
|
final_shortfall -= discharge_amount |
|
|
|
|
|
cap_left = self.battery_max_capacity - self.battery_soc |
|
|
desired_charge = a_chargeBatt * self.battery_max_charge_rate |
|
|
charge_amount = np.minimum.reduce([ |
|
|
desired_charge, |
|
|
cap_left / (self.battery_charge_efficiency + 1e-9), |
|
|
final_surplus |
|
|
]) |
|
|
charge_amount *= self.has_battery |
|
|
|
|
|
|
|
|
self.battery_soc += charge_amount * self.battery_charge_efficiency |
|
|
final_surplus -= charge_amount |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
battery_offer = (self.battery_soc * self.battery_discharge_efficiency) * self.has_battery |
|
|
effective_surplus = final_surplus + battery_offer |
|
|
|
|
|
netPeer = a_buyPeers - a_sellPeers |
|
|
p2p_buy_request = np.maximum(0, netPeer) * final_shortfall |
|
|
p2p_sell_offer = np.maximum(0, -netPeer) * effective_surplus |
|
|
|
|
|
total_sell = np.sum(p2p_sell_offer) |
|
|
total_buy = np.sum(p2p_buy_request) |
|
|
matched = min(total_sell, total_buy) |
|
|
|
|
|
if matched > 1e-9: |
|
|
sell_fraction = p2p_sell_offer / (total_sell + 1e-12) |
|
|
buy_fraction = p2p_buy_request / ( total_buy + 1e-12) |
|
|
actual_sold = matched * sell_fraction |
|
|
actual_bought = matched * buy_fraction |
|
|
else: |
|
|
actual_sold = np.zeros(self.num_agents, dtype=np.float32) |
|
|
actual_bought = np.zeros(self.num_agents, dtype=np.float32) |
|
|
|
|
|
|
|
|
from_batt = np.minimum(actual_sold, battery_offer) |
|
|
from_solar = actual_sold - from_batt |
|
|
|
|
|
final_surplus -= from_solar |
|
|
|
|
|
final_shortfall -= actual_bought |
|
|
soc_reduction = (from_batt / (self.battery_discharge_efficiency + 1e-9)) * self.has_battery |
|
|
self.battery_soc -= soc_reduction |
|
|
self.battery_soc = np.maximum(0.0, self.battery_soc) |
|
|
|
|
|
|
|
|
if inter_cluster_transfer_kwh > 0: |
|
|
amount_received = inter_cluster_transfer_kwh |
|
|
|
|
|
|
|
|
total_shortfall_in_cluster = np.sum(final_shortfall) |
|
|
if total_shortfall_in_cluster > 1e-6: |
|
|
|
|
|
to_cover_shortfall = min(amount_received, total_shortfall_in_cluster) |
|
|
distribution_ratio = final_shortfall / total_shortfall_in_cluster |
|
|
shortfall_reduction = distribution_ratio * to_cover_shortfall |
|
|
final_shortfall -= shortfall_reduction |
|
|
|
|
|
amount_received -= to_cover_shortfall |
|
|
|
|
|
if amount_received > 1e-6: |
|
|
|
|
|
cap_left = self.battery_max_capacity - self.battery_soc |
|
|
storable_energy = cap_left / (self.battery_charge_efficiency + 1e-9) |
|
|
total_storable_in_cluster = np.sum(storable_energy * self.has_battery) |
|
|
|
|
|
if total_storable_in_cluster > 1e-6: |
|
|
|
|
|
to_store = min(amount_received, total_storable_in_cluster) |
|
|
|
|
|
|
|
|
storage_ratio = storable_energy / total_storable_in_cluster |
|
|
energy_to_store_per_batt = storage_ratio * to_store |
|
|
|
|
|
|
|
|
self.battery_soc += (energy_to_store_per_batt * self.battery_charge_efficiency) * self.has_battery |
|
|
|
|
|
elif inter_cluster_transfer_kwh < 0: |
|
|
amount_to_send = abs(inter_cluster_transfer_kwh) |
|
|
|
|
|
|
|
|
total_surplus_in_cluster = np.sum(final_surplus) |
|
|
if total_surplus_in_cluster > 1e-6: |
|
|
|
|
|
sent_from_surplus = min(amount_to_send, total_surplus_in_cluster) |
|
|
draw_ratio = final_surplus / total_surplus_in_cluster |
|
|
surplus_reduction = draw_ratio * sent_from_surplus |
|
|
final_surplus -= surplus_reduction |
|
|
amount_to_send -= sent_from_surplus |
|
|
|
|
|
|
|
|
if amount_to_send > 1e-6: |
|
|
|
|
|
available_from_batt = (self.battery_soc * self.battery_discharge_efficiency) * self.has_battery |
|
|
total_available_from_batt = np.sum(available_from_batt) |
|
|
|
|
|
if total_available_from_batt > 1e-6: |
|
|
|
|
|
to_discharge = min(amount_to_send, total_available_from_batt) |
|
|
|
|
|
|
|
|
discharge_ratio = available_from_batt / total_available_from_batt |
|
|
discharged_per_batt = discharge_ratio * to_discharge |
|
|
|
|
|
|
|
|
soc_reduction = (discharged_per_batt / (self.battery_discharge_efficiency + 1e-9)) |
|
|
self.battery_soc -= soc_reduction * self.has_battery |
|
|
self.battery_soc = np.maximum(0.0, self.battery_soc) |
|
|
|
|
|
|
|
|
netGrid = a_buyGrid - a_sellGrid |
|
|
grid_import = np.maximum(0, netGrid) * final_shortfall |
|
|
grid_export = np.maximum(0, -netGrid) * final_surplus |
|
|
|
|
|
forced = np.maximum(final_shortfall - grid_import, 0.0) |
|
|
grid_import += forced |
|
|
final_shortfall -= forced |
|
|
|
|
|
feed_in_tariff = self.feed_in_tariff |
|
|
costs = ( |
|
|
(grid_import * grid_price) |
|
|
- (grid_export * feed_in_tariff) |
|
|
+ (actual_bought * peer_price) |
|
|
- (actual_sold * peer_price) |
|
|
) |
|
|
|
|
|
final_rewards = self._compute_rewards( |
|
|
grid_import=grid_import, grid_export=grid_export, actual_sold=actual_sold, |
|
|
actual_bought=actual_bought, charge_amount=charge_amount, discharge_amount=discharge_amount, |
|
|
costs=costs, grid_price=grid_price, peer_price=peer_price |
|
|
) |
|
|
|
|
|
no_p2p_import_this_step = np.array([ |
|
|
self.no_p2p_import_day[hid][self.current_step] |
|
|
for hid in self.house_ids |
|
|
], dtype=np.float32) |
|
|
|
|
|
|
|
|
|
|
|
step_grid_reduction = np.sum(no_p2p_import_this_step - grid_import) |
|
|
self.cumulative_grid_reduction += step_grid_reduction |
|
|
self.grid_reduction_timeseries.append(step_grid_reduction) |
|
|
|
|
|
if grid_price >= self.max_grid_price * 0.99: |
|
|
self.cumulative_grid_reduction_peak += step_grid_reduction |
|
|
|
|
|
|
|
|
cost_no_p2p = no_p2p_import_this_step * grid_price |
|
|
step_cost_savings_per_agent = cost_no_p2p - costs |
|
|
self.agent_cost_savings += step_cost_savings_per_agent |
|
|
self.cost_savings_timeseries.append(np.sum(step_cost_savings_per_agent)) |
|
|
|
|
|
|
|
|
degradation_cost_agent = (charge_amount + discharge_amount) * self.battery_degradation_cost |
|
|
step_degradation_cost = np.sum(degradation_cost_agent) |
|
|
|
|
|
self.cumulative_degradation_cost += step_degradation_cost |
|
|
self.degradation_cost_timeseries.append(step_degradation_cost) |
|
|
|
|
|
info = { |
|
|
"p2p_buy": actual_bought, |
|
|
"p2p_sell": actual_sold, |
|
|
"grid_import_with_p2p": grid_import, |
|
|
"grid_import_no_p2p": no_p2p_import_this_step, |
|
|
"grid_export": grid_export, |
|
|
"costs": costs, |
|
|
"charge_amount": charge_amount, |
|
|
"discharge_amount": discharge_amount, |
|
|
"step": self.current_step, |
|
|
"step_grid_reduction": step_grid_reduction, |
|
|
"step_cost_savings": np.sum(step_cost_savings_per_agent), |
|
|
"step_degradation_cost": step_degradation_cost, |
|
|
} |
|
|
|
|
|
self.env_log.append([ |
|
|
self.current_step, np.sum(grid_import), np.sum(grid_export), |
|
|
np.sum(actual_bought), np.sum(actual_sold), np.sum(costs) |
|
|
]) |
|
|
|
|
|
self.current_step += 1 |
|
|
|
|
|
terminated = False |
|
|
truncated = (self.current_step >= self.num_steps) |
|
|
|
|
|
obs_next = self._get_obs(peer_price=peer_price) |
|
|
info['agent_rewards'] = final_rewards |
|
|
self.last_info = info |
|
|
self.env_log_infos.append(info) |
|
|
return obs_next, final_rewards.sum(), terminated, truncated, info |
|
|
|
|
|
|
|
|
|
|
|
def _get_obs(self, peer_price: float): |
|
|
step = min(self.current_step, self.num_steps - 1) |
|
|
demands = self.demands_day[step] |
|
|
solars = self.solars_day[step] |
|
|
grid_price = self.get_grid_price(step) |
|
|
hour = self.hours_day[step] |
|
|
soc_frac = self.battery_soc / (self.battery_max_capacity + 1e-9) |
|
|
soc_frac = np.where(self.has_battery == 1, soc_frac, -1.0) |
|
|
total_demand_others = demands.sum() - demands |
|
|
total_solar_others = solars.sum() - solars |
|
|
|
|
|
obs = np.stack([ |
|
|
demands, |
|
|
solars, |
|
|
soc_frac, |
|
|
np.full(self.num_agents, grid_price), |
|
|
np.full(self.num_agents, peer_price), |
|
|
total_demand_others, |
|
|
total_solar_others, |
|
|
np.full(self.num_agents, hour) |
|
|
], axis=1).astype(np.float32) |
|
|
|
|
|
return obs |
|
|
|
|
|
|
|
|
def _compute_jains_index(self, usage_array): |
|
|
x = np.array(usage_array, dtype=np.float32) |
|
|
numerator = (np.sum(x))**2 |
|
|
denominator = len(x) * np.sum(x**2) + 1e-8 |
|
|
return numerator / denominator |
|
|
|
|
|
|
|
|
def _compute_rewards( |
|
|
self, grid_import, grid_export, actual_sold, actual_bought, |
|
|
charge_amount, discharge_amount, costs, grid_price, peer_price |
|
|
): |
|
|
|
|
|
w1 = 0.3; w2 = 0.5; w3 = 0.5; w4 = 0.1; w5 = 0.05; w6 = 0.4; w7 = 1.0 |
|
|
|
|
|
p_grid_norm = grid_price / self.max_grid_price |
|
|
p_peer_norm = peer_price / self.max_grid_price |
|
|
|
|
|
rewards = -costs * w7 |
|
|
rewards -= w1 * grid_import * p_grid_norm |
|
|
rewards += w2 * actual_sold * p_peer_norm |
|
|
buy_bonus = w3 * actual_bought * ((grid_price - peer_price) / self.max_grid_price) |
|
|
rewards += np.where(peer_price < grid_price, buy_bonus, 0.0) |
|
|
|
|
|
|
|
|
soc_frac = self.battery_soc / (self.battery_max_capacity + 1e-9) |
|
|
soc_penalties = w4 * ((soc_frac - 0.5) ** 2) * self.has_battery |
|
|
degrad_penalties = w5 * (charge_amount + discharge_amount) * self.battery_degradation_cost |
|
|
|
|
|
rewards -= soc_penalties |
|
|
rewards -= degrad_penalties |
|
|
|
|
|
jfi = self._compute_jains_index(actual_bought + actual_sold) |
|
|
rewards += w6 * jfi |
|
|
return rewards |
|
|
|
|
|
def save_log(self, filename="env_log.csv"): |
|
|
columns = [ |
|
|
"Step", "Total_Grid_Import", "Total_Grid_Export", |
|
|
"Total_P2P_Buy", "Total_P2P_Sell", "Total_Cost", |
|
|
] |
|
|
df = pd.DataFrame(self.env_log, columns=columns) |
|
|
df.to_csv(filename, index=False) |
|
|
print(f"Environment log saved to {filename}") |