import gym
import pandas as pd
import numpy as np
from collections import deque
import random
from gym.spaces import Tuple, Box

random.seed(42)
np.random.seed(42)

class SolarSys(gym.Env):

    def __init__(
        self,
        data_path="DATA/training/25houses_152days_TRAIN.csv",
        state="", # Select from 'oklahoma', 'colorado', 'pennsylvania'
        time_freq="15T",  
        house_ids_in_cluster=None,
        preloaded_data=None
        
    ):
        
        super().__init__()  # initialize parent gym.Env
        self.state = state.lower()

        # --- Centralized Pricing Configuration ---
        self._pricing_info = {
            "oklahoma": {
                "max_grid_price": 0.2112,
                "feed_in_tariff": 0.04,
                "price_function": self._get_oklahoma_price
            },
            "colorado": {
                "max_grid_price": 0.32,
                "feed_in_tariff": 0.055,
                "price_function": self._get_colorado_price
            },
            "pennsylvania": {
                "max_grid_price": 0.5505,
                "feed_in_tariff": 0.06,
                "price_function": self._get_pennsylvania_price
            }
        }

        if self.state not in self._pricing_info:
            raise ValueError(f"State '{self.state}' is not supported. Available states: {list(self._pricing_info.keys())}")

        state_config = self._pricing_info[self.state]
        self.max_grid_price = state_config["max_grid_price"]
        self.feed_in_tariff = state_config["feed_in_tariff"]
        self._get_price_function = state_config["price_function"]       
        self.data_path      = data_path
        self.time_freq      = time_freq
        if preloaded_data is not None:
            all_data = preloaded_data
            if house_ids_in_cluster:
                 print(f"Using pre-loaded data for cluster with {len(house_ids_in_cluster)} houses.")
        else:
            print(f"Loading data from {data_path}...")
            try:
                all_data = pd.read_csv(data_path)
                all_data["local_15min"] = pd.to_datetime(all_data["local_15min"], utc=True)
                all_data.set_index("local_15min", inplace=True)

            except FileNotFoundError:
                raise FileNotFoundError(f"Data file {data_path} not found.")
            except pd.errors.EmptyDataError:
                raise ValueError(f"Data file {data_path} is empty.")
            except Exception as e:
                raise ValueError(f"Error loading data: {e}")


        # Compute global maxima for normalization
        grid_cols  = [c for c in all_data.columns if c.startswith("grid_")]
        solar_cols = [c for c in all_data.columns if c.startswith("total_solar_")]
        all_grid  = all_data[grid_cols].values       
        all_solar = all_data[solar_cols].values     

        # max total demand = max(grid + solar) over all time & agents
        self.global_max_demand = float((all_grid + all_solar).max()) + 1e-8

        # max solar generation alone
        self.global_max_solar  = float(all_solar.max()) + 1e-8

        # Store the resampled dataset
        self.all_data = all_data
        all_house_ids_in_file = [
            col.split("_")[1] for col in self.all_data.columns
            if col.startswith("grid_")
        ]
        if house_ids_in_cluster:
            self.house_ids = [hid for hid in house_ids_in_cluster if hid in all_house_ids_in_file]
        else:
            self.house_ids = all_house_ids_in_file
        
        if not self.house_ids:
            raise ValueError("No valid house_ids found for this environment instance.")
        
        self.env_log_infos = []  

        self.time_freq = time_freq 
        freq_offset = pd.tseries.frequencies.to_offset(time_freq)
        minutes_per_step = freq_offset.nanos / 1e9 / 60.0
        self.steps_per_day = int(24 * 60 // minutes_per_step)

        total_rows = len(self.all_data)
        self.total_days = total_rows // self.steps_per_day
        if self.total_days < 1:
            raise ValueError(
                f"After resampling, dataset has {total_rows} rows, which is "
                f"less than a single day of {self.steps_per_day} steps."
            )

        self.num_agents = len(self.house_ids)
        self.original_no_p2p_import = {}
        for hid in self.house_ids:
            col_grid = f"grid_{hid}"
            self.original_no_p2p_import[hid] = self.all_data[col_grid].clip(lower=0.0).values
        solar_cols = [f"total_solar_{hid}" for hid in self.house_ids]
        solar_sums = self.all_data[solar_cols].sum(axis=0).to_dict()
        self.agent_groups = [
            1 if solar_sums[f"total_solar_{hid}"] > 0 else 0
            for hid in self.house_ids
        ]

        self.group_counts = {
            0: self.agent_groups.count(0),
            1: self.agent_groups.count(1)
        }
        print(f"Number of houses in each group: {self.group_counts}")

        #battery logic
        self.battery_options = {
            "teslapowerwall": {"max_capacity": 13.5, "charge_efficiency": 0.95, "discharge_efficiency": 0.90, "max_charge_rate": 5.0, "max_discharge_rate": 5.0, "degradation_cost_per_kwh": 0.005},
            "enphase":         {"max_capacity": 5.0,  "charge_efficiency": 0.95, "discharge_efficiency": 0.90, "max_charge_rate": 2.0, "max_discharge_rate": 2.0, "degradation_cost_per_kwh": 0.005},
            "franklin":        {"max_capacity": 15.0, "charge_efficiency": 0.95, "discharge_efficiency": 0.90, "max_charge_rate": 6.0, "max_discharge_rate": 6.0, "degradation_cost_per_kwh": 0.005},
        }
        self.solar_houses = [
            hid for hid in self.house_ids
            if (self.all_data[f"total_solar_{hid}"] > 0).any()
        ]
      
        self.batteries = {}
        for hid in self.solar_houses:
            choice = random.choice(list(self.battery_options))
            specs  = self.battery_options[choice]
            self.batteries[hid] = {"soc": 0.0, **specs}

        self.battery_charge_history = {hid: [] for hid in self.batteries}
        self.battery_discharge_history = {hid: [] for hid in self.batteries}
        self.battery_capacity = sum(b["max_capacity"] for b in self.batteries.values())
        self.battery_level    = sum(b["soc"]          for b in self.batteries.values())
        self.current_solar    = 0.0
        self.has_battery = np.array([1 if hid in self.batteries else 0 for hid in self.house_ids], dtype=np.float32)

        # Initialize arrays for all agents, with zeros for non-battery agents
        self.battery_soc = np.zeros(self.num_agents, dtype=np.float32)
        self.battery_max_capacity = np.zeros(self.num_agents, dtype=np.float32)
        self.battery_charge_efficiency = np.zeros(self.num_agents, dtype=np.float32)
        self.battery_discharge_efficiency = np.zeros(self.num_agents, dtype=np.float32)
        self.battery_max_charge_rate = np.zeros(self.num_agents, dtype=np.float32)
        self.battery_max_discharge_rate = np.zeros(self.num_agents, dtype=np.float32)
        self.battery_degradation_cost = np.zeros(self.num_agents, dtype=np.float32)

        # Populate the arrays using the created battery dictionary
        for i, hid in enumerate(self.house_ids):
            if hid in self.batteries:
                batt = self.batteries[hid]
                self.battery_max_capacity[i] = batt["max_capacity"]
                self.battery_charge_efficiency[i] = batt["charge_efficiency"]
                self.battery_discharge_efficiency[i] = batt["discharge_efficiency"]
                self.battery_max_charge_rate[i] = batt["max_charge_rate"]
                self.battery_max_discharge_rate[i] = batt["max_discharge_rate"]
                self.battery_degradation_cost[i] = batt["degradation_cost_per_kwh"]


        # ========== SPACES (Observation & Action) ===================================
        self.observation_space = gym.spaces.Box(
            low=-np.inf, high=np.inf,
            shape=(self.num_agents, 8),
            dtype=np.float32
        )
        self.action_space = Tuple((
            Box(low=0.0, high=1.0, shape=(self.num_agents, 6), dtype=np.float32),
            Box(low=-np.inf, high=np.inf, shape=(1,), dtype=np.float32),
            Box(low=-1.0, high=np.inf, shape=(1,), dtype=np.float32)
        ))
        
        # ========== REWARD FUNCTION PARAMETERS ======================================
        self.data = None
        self.env_log = []
        self.day_index = -1  
        self.current_step = 0
        self.num_steps = self.steps_per_day  
        self.demands = {}
        self.solars = {}
        self.previous_actions = {
            hid: np.zeros(6) for hid in self.house_ids
        }
        self._initialize_episode_metrics()

    def get_grid_price(self, step_idx):
        """
        Returns the grid price for the current step based on the selected state.
        """
        return self._get_price_function(step_idx)

    def _get_oklahoma_price(self, step_idx):
        minutes_per_step = 24 * 60 / self.steps_per_day
        hour = int((step_idx * minutes_per_step) // 60) % 24
        if 14 <= hour < 19:
            return 0.2112
        else:
            return 0.0434

    def _get_colorado_price(self, step_idx):
        minutes_per_step = 24 * 60 / self.steps_per_day
        hour = int((step_idx * minutes_per_step) // 60) % 24
        if 15 <= hour < 19:
            return 0.32
        elif 13 <= hour < 15:
            return 0.22
        else:
            return 0.12

    def _get_pennsylvania_price(self, step_idx):
        minutes_per_step = 24 * 60 / self.steps_per_day
        hour = int((step_idx * minutes_per_step) // 60) % 24
        if 13 <= hour < 21:
            return 0.125048
        elif hour >= 23 or hour < 6:
            return 0.057014
        else:
            return 0.079085
        
    def get_peer_price(self, step_idx, total_surplus, total_shortfall):
        grid_price = self.get_grid_price(step_idx)
        feed_in_tariff = self.feed_in_tariff
        
        # Parameters for arctangent-log pricing
        p_balance = (grid_price * 0.80) + (feed_in_tariff * 0.20)
        p_con = (grid_price - feed_in_tariff) / (1.5 * np.pi)
        k = 1.5 
        epsilon = 1e-6
        supply = total_surplus + epsilon
        demand = total_shortfall + epsilon
        
        ratio = demand / supply
        log_ratio = np.log(ratio)
        if log_ratio < 0:
            power_term = - (np.abs(log_ratio) ** k)
        else:
            power_term = log_ratio ** k
        
        price_offset = 2 * np.pi * p_con * np.arctan(power_term)
        
        peer_price = p_balance + price_offset
        
        final_price = float(np.clip(peer_price, feed_in_tariff, grid_price))
        
        return final_price

 
    def _initialize_episode_metrics(self):
        """Initializes or resets all metrics tracked over a single episode (day)."""
        self.cumulative_grid_reduction = 0.0
        self.cumulative_grid_reduction_peak = 0.0
        self.cumulative_degradation_cost = 0.0
        self.agent_cost_savings = np.zeros(self.num_agents)
        self.degradation_cost_timeseries = []
        self.cost_savings_timeseries = []
        self.grid_reduction_timeseries = []

    def get_episode_metrics(self):
        """
        Returns a dictionary of performance metrics for the last completed episode.
        """
        return self.episode_metrics
        
   ##########################################################################
    # Gym Required Methods
 
    def reset(self):
        if self.current_step > 0:
            positive_savings = self.agent_cost_savings[self.agent_cost_savings > 0]
            if len(positive_savings) > 1:
                fairness_on_savings = self._compute_jains_index(positive_savings)
            else:
                fairness_on_savings = 0.0

            self.episode_metrics = {
                "grid_reduction_entire_day": self.cumulative_grid_reduction,
                "grid_reduction_peak_hours": self.cumulative_grid_reduction_peak,
                "total_cost_savings": np.sum(self.agent_cost_savings),
                "fairness_on_cost_savings": fairness_on_savings,
                "battery_degradation_cost_total": self.cumulative_degradation_cost,
                "degradation_cost_over_time": self.degradation_cost_timeseries,
                "cost_savings_over_time": self.cost_savings_timeseries,
                "grid_reduction_over_time": self.grid_reduction_timeseries,
            }
        self.day_index = np.random.randint(0, self.total_days)

        start_row = self.day_index * self.steps_per_day
        end_row = start_row + self.steps_per_day
        day_data = self.all_data.iloc[start_row:end_row].copy()
        self.data = day_data  

        self.no_p2p_import_day = {}
        for hid in self.house_ids:
            self.no_p2p_import_day[hid] = self.original_no_p2p_import[hid][start_row:end_row]

        demand_list = []
        solar_list = []
        for hid in self.house_ids:
            col_grid = f"grid_{hid}"
            col_solar = f"total_solar_{hid}"

            grid_series = day_data[col_grid].fillna(0.0)
            solar_series = day_data[col_solar].fillna(0.0).clip(lower=0.0)

            demand_array = grid_series.values + solar_series.values
            demand_array = np.clip(demand_array, 0.0, None)

            demand_list.append(demand_array)
            solar_list.append(solar_series.values)

        self.demands_day = np.stack(demand_list, axis=1).astype(np.float32)
        self.solars_day = np.stack(solar_list, axis=1).astype(np.float32)

        self.hours_day = (self.data.index.hour + self.data.index.minute / 60.0).values

        self.current_step = 0
        self.env_log     = []
        for hid in self.house_ids:
            self.previous_actions[hid] = np.zeros(6)

        lows = 0.30 * self.battery_max_capacity
        highs = 0.70 * self.battery_max_capacity

        self.battery_soc = np.random.uniform(low=lows, high=highs)
        self.battery_soc *= self.has_battery

        initial_demands = self.demands_day[0]
        initial_solars = self.solars_day[0]
        initial_surplus = np.maximum(initial_solars - initial_demands, 0.0).sum()
        initial_shortfall = np.maximum(initial_demands - initial_solars, 0.0).sum()
        initial_peer_price = self.get_peer_price(0, initial_surplus, initial_shortfall)

        obs = self._get_obs(peer_price=initial_peer_price)

        self._initialize_episode_metrics()

        return obs, {}

    def step(self, packed_action):
        actions, transfer_kwh_arr, peer_price_arr = packed_action
        inter_cluster_transfer_kwh = float(transfer_kwh_arr[0])
        override_peer_price_val = float(peer_price_arr[0])
        
        override_peer_price = override_peer_price_val if override_peer_price_val >= 0 else None

        actions = np.array(actions, dtype=np.float32)
        if actions.shape != (self.num_agents, 6):
            raise ValueError(f"Actions shape mismatch: got {actions.shape}, expected {(self.num_agents, 6)}")
        actions = np.clip(actions, 0.0, 1.0)

        a_sellGrid      = actions[:, 0]
        a_buyGrid       = actions[:, 1]
        a_sellPeers     = actions[:, 2]
        a_buyPeers      = actions[:, 3]
        a_chargeBatt    = actions[:, 4]
        a_dischargeBatt = actions[:, 5]
        

        demands = self.demands_day[self.current_step]
        solars  = self.solars_day[self.current_step]

        total_surplus   = np.maximum(solars  - demands, 0.0).sum()
        total_shortfall = np.maximum(demands - solars,  0.0).sum()
        self.current_solar = total_surplus

        if override_peer_price is not None:
            peer_price = override_peer_price
        else:
            peer_price = self.get_peer_price(
                self.current_step,
                total_surplus,
                total_shortfall
            )
        
        grid_price = self.get_grid_price(self.current_step)
    
        shortfall = np.maximum(demands - solars, 0.0)
        surplus   = np.maximum(solars  - demands, 0.0)
        
        final_shortfall = shortfall.copy()
        final_surplus   = surplus.copy()
        grid_import     = np.zeros(self.num_agents, dtype=np.float32)
        grid_export     = np.zeros(self.num_agents, dtype=np.float32)

        # ### VECTORIZED BATTERY DISCHARGE ###
        available_from_batt = self.battery_soc * self.battery_discharge_efficiency
        desired_discharge = a_dischargeBatt * self.battery_max_discharge_rate
        discharge_amount = np.minimum.reduce([desired_discharge, available_from_batt, final_shortfall])
        discharge_amount *= self.has_battery # Ensure only batteries discharge

        # Update SOC (energy drawn from battery before efficiency loss)
        self.battery_soc -= (discharge_amount / (self.battery_discharge_efficiency + 1e-9)) * self.has_battery
        self.battery_soc = np.maximum(0.0, self.battery_soc)
        final_shortfall -= discharge_amount

        cap_left = self.battery_max_capacity - self.battery_soc
        desired_charge = a_chargeBatt * self.battery_max_charge_rate
        charge_amount = np.minimum.reduce([
            desired_charge,
            cap_left / (self.battery_charge_efficiency + 1e-9),
            final_surplus
        ])
        charge_amount *= self.has_battery 

        # Update SOC 
        self.battery_soc += charge_amount * self.battery_charge_efficiency
        final_surplus -= charge_amount

        
        # ### VECTORIZED P2P TRADING ###
        battery_offer = (self.battery_soc * self.battery_discharge_efficiency) * self.has_battery
        effective_surplus = final_surplus + battery_offer

        netPeer = a_buyPeers - a_sellPeers
        p2p_buy_request = np.maximum(0, netPeer) * final_shortfall
        p2p_sell_offer = np.maximum(0, -netPeer) * effective_surplus

        total_sell = np.sum(p2p_sell_offer)
        total_buy  = np.sum(p2p_buy_request)
        matched    = min(total_sell, total_buy)

        if matched > 1e-9:
            sell_fraction = p2p_sell_offer / (total_sell + 1e-12)
            buy_fraction  = p2p_buy_request / ( total_buy + 1e-12)
            actual_sold   = matched * sell_fraction
            actual_bought = matched * buy_fraction
        else:
            actual_sold   = np.zeros(self.num_agents, dtype=np.float32)
            actual_bought = np.zeros(self.num_agents, dtype=np.float32)
        

        from_batt = np.minimum(actual_sold, battery_offer)
        from_solar = actual_sold - from_batt

        final_surplus -= from_solar
        
        final_shortfall -= actual_bought
        soc_reduction = (from_batt / (self.battery_discharge_efficiency + 1e-9)) * self.has_battery
        self.battery_soc -= soc_reduction
        self.battery_soc = np.maximum(0.0, self.battery_soc) 


        if inter_cluster_transfer_kwh > 0:
            amount_received = inter_cluster_transfer_kwh
            
           
            total_shortfall_in_cluster = np.sum(final_shortfall)
            if total_shortfall_in_cluster > 1e-6:
                
                to_cover_shortfall = min(amount_received, total_shortfall_in_cluster)
                distribution_ratio = final_shortfall / total_shortfall_in_cluster
                shortfall_reduction = distribution_ratio * to_cover_shortfall
                final_shortfall -= shortfall_reduction
                
                amount_received -= to_cover_shortfall

            if amount_received > 1e-6:
           
                cap_left = self.battery_max_capacity - self.battery_soc
                storable_energy = cap_left / (self.battery_charge_efficiency + 1e-9)
                total_storable_in_cluster = np.sum(storable_energy * self.has_battery)

                if total_storable_in_cluster > 1e-6:
                    
                    to_store = min(amount_received, total_storable_in_cluster)
                    
                    
                    storage_ratio = storable_energy / total_storable_in_cluster
                    energy_to_store_per_batt = storage_ratio * to_store
                    
                    
                    self.battery_soc += (energy_to_store_per_batt * self.battery_charge_efficiency) * self.has_battery

        elif inter_cluster_transfer_kwh < 0:  
            amount_to_send = abs(inter_cluster_transfer_kwh)
            
            
            total_surplus_in_cluster = np.sum(final_surplus)
            if total_surplus_in_cluster > 1e-6:
                
                sent_from_surplus = min(amount_to_send, total_surplus_in_cluster)           
                draw_ratio = final_surplus / total_surplus_in_cluster
                surplus_reduction = draw_ratio * sent_from_surplus
                final_surplus -= surplus_reduction   
                amount_to_send -= sent_from_surplus

            
            if amount_to_send > 1e-6:
                
                available_from_batt = (self.battery_soc * self.battery_discharge_efficiency) * self.has_battery
                total_available_from_batt = np.sum(available_from_batt)

                if total_available_from_batt > 1e-6:
                    # Discharge a maximum of 'amount_to_send' from batteries
                    to_discharge = min(amount_to_send, total_available_from_batt)
                    
                    # Draw this amount proportionally from each available battery
                    discharge_ratio = available_from_batt / total_available_from_batt
                    discharged_per_batt = discharge_ratio * to_discharge # This is effective energy
                    
                    # Update SoC (energy drawn from battery before efficiency loss)
                    soc_reduction = (discharged_per_batt / (self.battery_discharge_efficiency + 1e-9))
                    self.battery_soc -= soc_reduction * self.has_battery
                    self.battery_soc = np.maximum(0.0, self.battery_soc)
        # =======================================================================
        
        netGrid = a_buyGrid - a_sellGrid
        grid_import = np.maximum(0, netGrid) * final_shortfall
        grid_export = np.maximum(0, -netGrid) * final_surplus

        forced = np.maximum(final_shortfall - grid_import, 0.0)
        grid_import += forced
        final_shortfall -= forced       

        feed_in_tariff = self.feed_in_tariff
        costs = (
            (grid_import * grid_price)
            - (grid_export * feed_in_tariff)
            + (actual_bought * peer_price)
            - (actual_sold * peer_price)
        )
       
        final_rewards = self._compute_rewards(
            grid_import=grid_import, grid_export=grid_export, actual_sold=actual_sold,
            actual_bought=actual_bought, charge_amount=charge_amount, discharge_amount=discharge_amount,
            costs=costs, grid_price=grid_price, peer_price=peer_price
        )

        no_p2p_import_this_step = np.array([
            self.no_p2p_import_day[hid][self.current_step] 
            for hid in self.house_ids
        ], dtype=np.float32)


        # --- Metric 1 & 2: Grid Reduction (Entire Day & Peak Hours) ---
        step_grid_reduction = np.sum(no_p2p_import_this_step - grid_import)
        self.cumulative_grid_reduction += step_grid_reduction
        self.grid_reduction_timeseries.append(step_grid_reduction)

        if grid_price >= self.max_grid_price * 0.99:
            self.cumulative_grid_reduction_peak += step_grid_reduction

        # --- Metric 3: Total Cost Savings ---
        cost_no_p2p = no_p2p_import_this_step * grid_price
        step_cost_savings_per_agent = cost_no_p2p - costs
        self.agent_cost_savings += step_cost_savings_per_agent
        self.cost_savings_timeseries.append(np.sum(step_cost_savings_per_agent))

        # --- Metric 5 & 6: Battery Degradation Cost (Total and Over Time) ---
        degradation_cost_agent = (charge_amount + discharge_amount) * self.battery_degradation_cost
        step_degradation_cost = np.sum(degradation_cost_agent)

        self.cumulative_degradation_cost += step_degradation_cost
        self.degradation_cost_timeseries.append(step_degradation_cost)

        info = {
            "p2p_buy": actual_bought,
            "p2p_sell": actual_sold,
            "grid_import_with_p2p": grid_import,
            "grid_import_no_p2p": no_p2p_import_this_step,
            "grid_export": grid_export,
            "costs": costs,
            "charge_amount": charge_amount,
            "discharge_amount": discharge_amount,
            "step": self.current_step,
            "step_grid_reduction": step_grid_reduction,
            "step_cost_savings": np.sum(step_cost_savings_per_agent),
            "step_degradation_cost": step_degradation_cost,
        }

        self.env_log.append([
            self.current_step, np.sum(grid_import), np.sum(grid_export),
            np.sum(actual_bought), np.sum(actual_sold), np.sum(costs)
        ])

        self.current_step += 1
       
        terminated = False 
        truncated = (self.current_step >= self.num_steps)

        obs_next = self._get_obs(peer_price=peer_price)
        info['agent_rewards'] = final_rewards
        self.last_info = info
        self.env_log_infos.append(info)
        return obs_next, final_rewards.sum(), terminated, truncated, info
        
    
    def _get_obs(self, peer_price: float):
        step = min(self.current_step, self.num_steps - 1)
        demands = self.demands_day[step]
        solars  = self.solars_day[step]
        grid_price = self.get_grid_price(step)
        hour = self.hours_day[step]
        soc_frac = self.battery_soc / (self.battery_max_capacity + 1e-9)
        soc_frac = np.where(self.has_battery == 1, soc_frac, -1.0)
        total_demand_others = demands.sum() - demands
        total_solar_others = solars.sum() - solars

        obs = np.stack([
            demands,
            solars,
            soc_frac,
            np.full(self.num_agents, grid_price),
            np.full(self.num_agents, peer_price),
            total_demand_others,
            total_solar_others,
            np.full(self.num_agents, hour)
        ], axis=1).astype(np.float32)

        return obs


    def _compute_jains_index(self, usage_array):
        x = np.array(usage_array, dtype=np.float32)
        numerator = (np.sum(x))**2
        denominator = len(x) * np.sum(x**2) + 1e-8
        return numerator / denominator


    def _compute_rewards(
        self, grid_import, grid_export, actual_sold, actual_bought,
        charge_amount, discharge_amount, costs, grid_price, peer_price
    ):
        
        w1 = 0.3; w2 = 0.5; w3 = 0.5; w4 = 0.1; w5 = 0.05; w6 = 0.4; w7 = 1.0

        p_grid_norm = grid_price / self.max_grid_price
        p_peer_norm = peer_price / self.max_grid_price

        rewards = -costs * w7
        rewards -= w1 * grid_import * p_grid_norm
        rewards += w2 * actual_sold * p_peer_norm
        buy_bonus = w3 * actual_bought * ((grid_price - peer_price) / self.max_grid_price)
        rewards += np.where(peer_price < grid_price, buy_bonus, 0.0)

        # ### VECTORIZED REWARD PENALTIES ###
        soc_frac = self.battery_soc / (self.battery_max_capacity + 1e-9)
        soc_penalties = w4 * ((soc_frac - 0.5) ** 2) * self.has_battery
        degrad_penalties = w5 * (charge_amount + discharge_amount) * self.battery_degradation_cost

        rewards -= soc_penalties
        rewards -= degrad_penalties

        jfi = self._compute_jains_index(actual_bought + actual_sold)
        rewards += w6 * jfi
        return rewards
        
    def save_log(self, filename="env_log.csv"):
        columns = [
            "Step", "Total_Grid_Import", "Total_Grid_Export",
            "Total_P2P_Buy", "Total_P2P_Sell", "Total_Cost",
        ]
        df = pd.DataFrame(self.env_log, columns=columns)
        df.to_csv(filename, index=False)
        print(f"Environment log saved to {filename}")