import os
import sys
import time
from datetime import datetime, timedelta
import re
import numpy as np
import torch
import pandas as pd
import matplotlib.pyplot as plt

# Allow imports from project root
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))

#This is important for running the file, please make sure to follow the same directory structure as listed in the zip file
from cluster import InterClusterCoordinator, InterClusterLedger
from Environment.cluster_env_wrapper import make_vec_env 
from mappo.trainer.mappo import MAPPO
from meanfield.trainer.meanfield import MFAC

def recursive_sum(item):
    total = 0
    if hasattr(item, '__iter__') and not isinstance(item, str):
        for sub_item in item:
            total += recursive_sum(sub_item)
    elif np.isreal(item):
        total += item
    return total


def main():
    overall_start_time = time.time()
    # ─── Hyperparameters ───────────────────────
    STATE_TO_RUN = "pennsylvania" # or "colorado", "oklahoma"
    DATA_PATH = ""
    # Dynamically extract the number of agents from the file path
    match = re.search(r'(\d+)houses', DATA_PATH)
    if not match:
        raise ValueError("Could not extract the number of houses from DATA_PATH.")
    NUMBER_OF_AGENTS = int(match.group(1))
    NUM_EPISODES       = 10000
    CLUSTER_SIZE       = 10 
    BATCH_SIZE         = 256
    CHECKPOINT_INTERVAL= 1000
    WINDOW_SIZE        = 80
    MAX_TRANSFER_KWH   = 100000
    LR       = 2e-4
    GAMMA    = 0.95
    LAMBDA   = 0.95
    CLIP_EPS = 0.2
    K_EPOCHS = 4
    JOINT_TRAINING_START_EPISODE = 2000
    FREEZE_HIGH_FOR_EPISODES = 20
    FREEZE_LOW_FOR_EPISODES  = 10

    # ─── Build directories ─────────────────
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    run_name  = f"hierarchical_{STATE_TO_RUN}_{NUMBER_OF_AGENTS}agents_" \
            f"{CLUSTER_SIZE}size_{NUM_EPISODES}eps_{timestamp}"
    root_dir  = os.path.join("Training", run_name) # New folder for new runs
    models_dir= os.path.join(root_dir, "models")
    logs_dir  = os.path.join(root_dir, "logs")
    plots_dir = os.path.join(root_dir, "plots")

    for d in (models_dir, logs_dir, plots_dir):
        os.makedirs(d, exist_ok=True)
    print(f"Logging to: {root_dir}")

    # ─── Environment & Agent Initialization ─────────────────
    cluster_env = make_vec_env(
        data_path=DATA_PATH,
        time_freq="15T",
        cluster_size=CLUSTER_SIZE,
        state=STATE_TO_RUN  # <-- Use the state variable here
    )

    #Get env parameters from the new vectorized environment object.
    n_clusters = cluster_env.num_envs
    sample_subenv = cluster_env.cluster_envs[0]
    n_agents_per_cluster = sample_subenv.num_agents

    local_dim  = sample_subenv.observation_space.shape[-1]
    global_dim = n_agents_per_cluster * local_dim
    act_dim    = sample_subenv.action_space[0].shape[-1] 
    total_buffer_size = sample_subenv.num_steps * n_clusters 
    print(f"Low-level agent buffer size set to: {total_buffer_size}")
    print(f"Created {n_clusters} clusters.")
    print(f"Shared low-level agent: {n_agents_per_cluster} agents per cluster, "
          f"obs_dim={local_dim}, global_dim={global_dim}, act_dim={act_dim}")
    print(f"Creating {n_clusters} independent low-level MAPPO agents...")
    low_agents = []
    for i in range(n_clusters):
        agent_buffer_size = sample_subenv.num_steps 

        agent = MAPPO(
            n_agents   = n_agents_per_cluster,
            local_dim  = local_dim,
            global_dim = global_dim,
            act_dim    = act_dim,
            lr         = LR,
            gamma      = GAMMA,
            lam        = LAMBDA,
            clip_eps   = CLIP_EPS,
            k_epochs   = K_EPOCHS,
            batch_size = BATCH_SIZE,
            episode_len = agent_buffer_size 
        )
        low_agents.append(agent)

    OBS_DIM_HI_LOCAL = 7 
    act_dim_inter = 2    
    print(f"Inter-cluster agent (MFAC): n_agents={n_clusters}, "
        f"local_dim={OBS_DIM_HI_LOCAL}, act_dim={act_dim_inter}")
    inter_agent = MFAC(
        n_agents   = n_clusters,
        local_dim  = OBS_DIM_HI_LOCAL,
        act_dim    = act_dim_inter,
        lr         = LR,
        gamma      = GAMMA,
        lam        = LAMBDA,
        clip_eps   = CLIP_EPS,
        k_epochs   = K_EPOCHS,
        batch_size = BATCH_SIZE,
        episode_len=96
    )
    ledger      = InterClusterLedger()
    coordinator = InterClusterCoordinator(
        cluster_env,
        inter_agent,
        ledger,
        max_transfer_kwh=MAX_TRANSFER_KWH
    )

    # ─── Training loop ─────────────────────────────────────
    total_steps = 0
    inter_episode_rewards = []
    episode_log_data = []
    performance_metrics_log = [] 
    agent_rewards_log = [[] for _ in range(NUMBER_OF_AGENTS)]
    intra_log = {}
    inter_log = {}
    total_log = {}
    cost_log = {}

    for ep in range(1, NUM_EPISODES + 1):
        inter_episode_rewards_this_ep = []
        step_count     = 0
        start_time     = time.time()
        ep_total_inter_cluster_reward = 0.0
        day_logs = []
        obs_clusters, _ = cluster_env.reset()
        # This runs after an episode is done (triggered by reset), but before the new one starts.
        if ep > 1:
            all_cluster_metrics = cluster_env.call('get_episode_metrics')

            # Aggregate the metrics from all clusters into a single system-wide summary
            system_metrics = {
                "grid_reduction_entire_day": sum(m["grid_reduction_entire_day"] for m in all_cluster_metrics),
                "grid_reduction_peak_hours": sum(m["grid_reduction_peak_hours"] for m in all_cluster_metrics),
                "total_cost_savings": sum(m["total_cost_savings"] for m in all_cluster_metrics),
                "battery_degradation_cost_total": sum(m["battery_degradation_cost_total"] for m in all_cluster_metrics),
                # For fairness, we average the fairness index across clusters
                "fairness_on_cost_savings": np.mean([m["fairness_on_cost_savings"] for m in all_cluster_metrics]),
                "Episode": ep - 1
            }

            performance_metrics_log.append(system_metrics)


        # =================================================================
        
        done_all = False
        cluster_rewards = np.zeros((n_clusters, n_agents_per_cluster), dtype=np.float32)
        total_cost = 0.0
        total_grid_import = 0.0
        
        # Determine training phase
        is_phase_1 = ep < JOINT_TRAINING_START_EPISODE

        if ep == 1: print(f"\n--- Starting Phase 1: Training Low-Level Agent Only (up to ep {JOINT_TRAINING_START_EPISODE-1}) ---")
        if ep == JOINT_TRAINING_START_EPISODE: print(f"\n--- Starting Phase 2: Joint Hierarchical Training (from ep {JOINT_TRAINING_START_EPISODE}) ---")
        
        # The main loop continues as long as the episode is not done.
        while not done_all:
            total_steps += 1
            step_count += 1
            # --- Action Selection (Low-Level) ---
            batch_global_obs = obs_clusters.reshape(n_clusters, -1)

            # Loop through each cluster to get actions from its dedicated agent
            low_level_actions_list = []
            low_level_logps_list = []
            for c_idx in range(n_clusters):
                agent = low_agents[c_idx]
                local_obs_cluster = obs_clusters[c_idx]
                global_obs_cluster = batch_global_obs[c_idx]

                actions, logps = agent.select_action(local_obs_cluster, global_obs_cluster)

                low_level_actions_list.append(actions)
                low_level_logps_list.append(logps)
            low_level_actions = np.stack(low_level_actions_list)
            low_level_logps = np.stack(low_level_logps_list)
            
            # --- Action Selection & Transfers (High-Level, Phase 2 only) ---
            if is_phase_1:
                exports, imports = None, None
            else:
                # Get high-level observations
                inter_cluster_obs_local_list = [coordinator.get_cluster_state(se, step_count) for se in cluster_env.cluster_envs]
                inter_cluster_obs_local = np.array(inter_cluster_obs_local_list)
                
                # Get high-level actions
                high_level_action, high_level_logp = inter_agent.select_action(inter_cluster_obs_local)

                # Build transfers
                current_reports = {i: {'export_capacity': cluster_env.get_export_capacity(i), 'import_capacity': cluster_env.get_import_capacity(i)} for i in range(n_clusters)}
                exports, imports = coordinator.build_transfers(high_level_action, current_reports)

            # --- Environment Step ---
            next_obs_clusters, rewards, done_all, step_info = cluster_env.step(
                low_level_actions, exports=exports, imports=imports
            )
            cluster_infos = step_info.get("cluster_infos")

            day_logs.append({
                "costs": cluster_infos["costs"],
                "grid_import_no_p2p": cluster_infos["grid_import_no_p2p"],
                "charge_amount": cluster_infos.get("charge_amount"),
                "discharge_amount": cluster_infos.get("discharge_amount")
            })
            per_agent_rewards = np.stack(cluster_infos['agent_rewards'])

            rewards_for_buffer = per_agent_rewards
            if not is_phase_1:
                transfers_for_logging = (exports, imports)
                high_level_rewards_per_cluster = coordinator.compute_inter_cluster_reward(
                    all_cluster_infos=cluster_infos,
                    actual_transfers=transfers_for_logging,
                    step_count=step_count
                )
                ep_total_inter_cluster_reward += np.sum(high_level_rewards_per_cluster) # Log the sum for the plot
                next_inter_cluster_obs_local_list = [coordinator.get_cluster_state(se, step_count + 1) for se in cluster_env.cluster_envs]
                next_inter_cluster_obs_local = np.array(next_inter_cluster_obs_local_list)

                inter_agent.store(
                    inter_cluster_obs_local, 
                    high_level_action, 
                    high_level_logp,
                    high_level_rewards_per_cluster,
                    [done_all]*n_clusters, 
                    next_inter_cluster_obs_local 
                )
                bonus_per_agent = np.zeros_like(per_agent_rewards)
                for c_idx in range(n_clusters):
                    num_agents_in_cluster = per_agent_rewards.shape[1]
                    if num_agents_in_cluster > 0:
                        bonus = high_level_rewards_per_cluster[c_idx] / num_agents_in_cluster
                        bonus_per_agent[c_idx, :] = bonus

                rewards_for_buffer = per_agent_rewards + bonus_per_agent

            # --- Data Storage (Low-Level) ---
            dones_list = step_info.get("cluster_dones")
            for idx in range(n_clusters):
                low_agents[idx].store(
                    obs_clusters[idx],
                    batch_global_obs[idx],
                    low_level_actions[idx],
                    low_level_logps[idx],
                    rewards_for_buffer[idx],
                    dones_list[idx],
                    next_obs_clusters[idx].reshape(-1)
                )

            # --- Logging and State Update ---
            cluster_rewards += per_agent_rewards
            total_cost += np.sum(cluster_infos['costs'])
            total_grid_import += np.sum(cluster_infos['grid_import_with_p2p'])

            obs_clusters = next_obs_clusters         
        if is_phase_1:
            for agent in low_agents:
                agent.update()
        else:
            CYCLE_LENGTH = FREEZE_HIGH_FOR_EPISODES + FREEZE_LOW_FOR_EPISODES
            phase2_episode_num = ep - JOINT_TRAINING_START_EPISODE
            position_in_cycle = phase2_episode_num % CYCLE_LENGTH

            if position_in_cycle < FREEZE_HIGH_FOR_EPISODES:
                print(f"Updating ALL LOW-LEVEL agents (High-level is frozen).")
                for agent in low_agents:
                    agent.update()
            else:
                print(f"Updating HIGH-LEVEL agent (Low-level is frozen).")
                inter_agent.update()

        # =================================================================
        duration = time.time() - start_time
        num_low_level_agents = n_clusters * n_agents_per_cluster
        get_price_fn = cluster_env.cluster_envs[0].get_grid_price


        baseline_costs_per_step = [
            recursive_sum(entry["grid_import_no_p2p"]) * get_price_fn(i)
            for i, entry in enumerate(day_logs)
        ]
        total_baseline_cost = sum(baseline_costs_per_step)
        actual_costs_per_step = [recursive_sum(entry["costs"]) for entry in day_logs]
        total_actual_cost = sum(actual_costs_per_step)
        cost_reduction_pct = (1 - (total_actual_cost / total_baseline_cost)) * 100 if total_baseline_cost > 0 else 0.0
        total_reward_intra = cluster_rewards.sum()
        mean_reward_intra = total_reward_intra / num_low_level_agents if num_low_level_agents > 0 else 0.0
        total_reward_inter = ep_total_inter_cluster_reward
        mean_reward_inter = total_reward_inter / step_count if step_count > 0 else 0.0
        total_reward_system = total_reward_intra + total_reward_inter
        mean_reward_system = total_reward_system / num_low_level_agents if num_low_level_agents > 0 else 0.0


        intra_log.setdefault('total', []).append(total_reward_intra)
        intra_log.setdefault('mean', []).append(mean_reward_intra)
        inter_log.setdefault('total', []).append(total_reward_inter)
        inter_log.setdefault('mean', []).append(mean_reward_inter)
        total_log.setdefault('total', []).append(total_reward_system)
        total_log.setdefault('mean', []).append(mean_reward_system)
        cost_log.setdefault('total_cost', []).append(total_actual_cost)
        cost_log.setdefault('cost_without_p2p', []).append(total_baseline_cost)


        episode_log_data.append({
            "Episode": ep,
            "Mean_Reward_System": mean_reward_system,
            "Mean_Reward_Intra": mean_reward_intra,
            "Mean_Reward_Inter": mean_reward_inter,
            "Total_Reward_System": total_reward_system,
            "Total_Reward_Intra": total_reward_intra,
            "Total_Reward_Inter": total_reward_inter,
            "Cost_Reduction_Pct": cost_reduction_pct,
            "Episode_Duration": duration,
        })


        print(f"Ep {ep}/{NUM_EPISODES} | "
              f"Mean System R: {mean_reward_system:.3f} | "
              f"Cost Red: {cost_reduction_pct:.1f}% | "
              f"Time: {duration:.2f}s")


        if ep % CHECKPOINT_INTERVAL == 0 or ep == NUM_EPISODES:
            for c_idx, agent in enumerate(low_agents):
                agent.save(os.path.join(models_dir, f"low_cluster{c_idx}_ep{ep}.pth"))
            inter_agent.save(os.path.join(models_dir, f"inter_ep{ep}.pth"))
            print(f"Saved checkpoint at episode {ep}")

    print("Training completed! Aggregating final logs...")
    # --- Final Episode Metrics ---
    final_cluster_metrics = cluster_env.call('get_episode_metrics')
    final_system_metrics = {
        "grid_reduction_entire_day": sum(m["grid_reduction_entire_day"] for m in final_cluster_metrics),
        "grid_reduction_peak_hours": sum(m["grid_reduction_peak_hours"] for m in final_cluster_metrics),
        "total_cost_savings": sum(m["total_cost_savings"] for m in final_cluster_metrics),
        "battery_degradation_cost_total": sum(m["battery_degradation_cost_total"] for m in final_cluster_metrics),
        "fairness_on_cost_savings": np.mean([m["fairness_on_cost_savings"] for m in final_cluster_metrics]),
        "Episode": NUM_EPISODES
    }
    performance_metrics_log.append(final_system_metrics)

    df_rewards_log = pd.DataFrame(episode_log_data)
    df_perf_log = pd.DataFrame(performance_metrics_log)
    df_final_log = pd.merge(df_rewards_log, df_perf_log, on="Episode")

    log_csv_path = os.path.join(logs_dir, "training_performance_log.csv")
    overall_end_time = time.time()
    total_duration_seconds = overall_end_time - overall_start_time
    total_time_row = pd.DataFrame([{"Episode": "Total_Training_Time", "Episode_Duration": total_duration_seconds}])
    df_to_save = pd.concat([df_final_log, total_time_row], ignore_index=True)

    columns_to_save = [
        "Episode",
        "Mean_Reward_System",
        "Mean_Reward_Intra",
        "Mean_Reward_Inter",
        "Total_Reward_System",
        "Total_Reward_Intra",
        "Total_Reward_Inter",
        "Cost_Reduction_Pct",
        "battery_degradation_cost_total",
        "Episode_Duration",
        "total_cost_savings",
        "grid_reduction_entire_day",
        "fairness_on_cost_savings"
    ]
    df_to_save = df_to_save[[col for col in columns_to_save if col in df_to_save.columns]]
    df_to_save.to_csv(log_csv_path, index=False)
    print(f"Saved comprehensive training performance log to: {log_csv_path}")

    generate_plots(
        plots_dir=plots_dir,
        num_episodes=NUM_EPISODES,
        intra_log=intra_log,
        inter_log=inter_log,
        total_log=total_log,
        cost_log=cost_log,
        df_final_log=df_final_log
    )
    overall_end_time = time.time()
    total_duration_seconds = overall_end_time - overall_start_time
    total_duration_formatted = str(timedelta(seconds=int(total_duration_seconds)))

    
    print("\n" + "="*50)
    print(f"Total Training Time: {total_duration_formatted} (HH:MM:SS)")
    print("="*50)

################################# PLOTING & LOGGING ##################################################################
def generate_plots(
        plots_dir: str,
        num_episodes: int,
        intra_log: dict,
        inter_log: dict,
        total_log: dict,
        cost_log: list,
        df_final_log: pd.DataFrame
    ):
        """
        Generates and saves all final plots after training is complete.
        """
        print("Training completed! Generating plots…")
        def moving_avg(series, window):
            return pd.Series(series).rolling(window=window, center=True, min_periods=1).mean().to_numpy()

        ma_window = 120
        episodes = np.arange(1, num_episodes + 1)

        # Plot 1: Intra-cluster (Low-Level) Rewards
        fig, ax = plt.subplots(figsize=(12, 7))
        ax.plot(episodes, moving_avg(intra_log['total'], ma_window), label=f'Total Reward (MA {ma_window})', linewidth=2)
        ax.set_xlabel("Episode")
        ax.set_ylabel("Total Intra-Cluster Reward", color='tab:blue')
        ax.tick_params(axis='y', labelcolor='tab:blue')
        ax.grid(True)
        
        ax2 = ax.twinx()
        ax2.plot(episodes, moving_avg(intra_log['mean'], ma_window), label=f'Mean Reward (MA {ma_window})', linewidth=2, linestyle='--', color='tab:cyan')
        ax2.set_ylabel("Mean Intra-Cluster Reward", color='tab:cyan')
        ax2.tick_params(axis='y', labelcolor='tab:cyan')
        
        fig.suptitle("Intra-Cluster (Low-Level Agent) Rewards")
        fig.legend(loc="upper left", bbox_to_anchor=(0.1, 0.9))
        plt.savefig(os.path.join(plots_dir, "1_intra_cluster_rewards.png"), dpi=200)
        plt.close()

        # Plot 2: Inter-cluster (High-Level) Rewards
        fig, ax = plt.subplots(figsize=(12, 7))
        ax.plot(episodes, moving_avg(inter_log['total'], ma_window), label=f'Total Reward (MA {ma_window})', linewidth=2, color='tab:green')
        ax.set_xlabel("Episode")
        ax.set_ylabel("Total Inter-Cluster Reward", color='tab:green')
        ax.tick_params(axis='y', labelcolor='tab:green')
        ax.grid(True)
        
        ax2 = ax.twinx()
        ax2.plot(episodes, moving_avg(inter_log['mean'], ma_window), label=f'Mean Reward (MA {ma_window})', linewidth=2, linestyle='--', color='mediumseagreen')
        ax2.set_ylabel("Mean Inter-Cluster Reward", color='mediumseagreen')
        ax2.tick_params(axis='y', labelcolor='mediumseagreen')
        
        fig.suptitle("Inter-Cluster (High-Level Agent) Rewards")
        fig.legend(loc="upper left", bbox_to_anchor=(0.1, 0.9))
        plt.savefig(os.path.join(plots_dir, "2_inter_cluster_rewards.png"), dpi=200)
        plt.close()

        # Plot 3: Total System Rewards
        fig, ax = plt.subplots(figsize=(12, 7))
        ax.plot(episodes, moving_avg(total_log['total'], ma_window), label=f'Total System Reward (MA {ma_window})', linewidth=2, color='tab:red')
        ax.set_xlabel("Episode")
        ax.set_ylabel("Total System Reward", color='tab:red')
        ax.tick_params(axis='y', labelcolor='tab:red')
        ax.grid(True)
        
        ax2 = ax.twinx()
        ax2.plot(episodes, moving_avg(total_log['mean'], ma_window), label=f'Mean System Reward (MA {ma_window})', linewidth=2, linestyle='--', color='salmon')
        ax2.set_ylabel("Mean System Reward per Agent", color='salmon')
        ax2.tick_params(axis='y', labelcolor='salmon')
        
        fig.suptitle("Total System Rewards (Intra + Inter)")
        fig.legend(loc="upper left", bbox_to_anchor=(0.1, 0.9))
        plt.savefig(os.path.join(plots_dir, "3_total_system_rewards.png"), dpi=200)
        plt.close()

        # Plot 4: Cost Reduction
        cost_df = pd.DataFrame(cost_log)
        cost_df['cost_reduction_pct'] = 100 * (1 - (cost_df['total_cost'] / cost_df['cost_without_p2p'])).clip(lower=-np.inf, upper=100)
        plt.figure(figsize=(12, 7))
        plt.plot(episodes, moving_avg(cost_df['cost_reduction_pct'], ma_window), label=f'Cost Reduction % (MA {ma_window})', color='purple', linewidth=2)
        plt.xlabel("Episode")
        plt.ylabel("Cost Reduction (%)")
        plt.title("Total System-Wide Cost Reduction")
        plt.legend()
        plt.grid(True)
        plt.savefig(os.path.join(plots_dir, "4_cost_reduction.png"), dpi=200)
        plt.close()

        
        df_plot = df_final_log[pd.to_numeric(df_final_log['Episode'], errors='coerce').notna()].copy()
        df_plot['Episode'] = pd.to_numeric(df_plot['Episode'])

        # 5. Battery Degradation Cost
        plt.figure(figsize=(12, 7))
        plt.plot(df_plot["Episode"], moving_avg(df_plot["battery_degradation_cost_total"], ma_window), 
                label=f'Degradation Cost (MA {ma_window})', color='darkgreen', linewidth=2)
        plt.xlabel("Episode")
        plt.ylabel("Total Degradation Cost ($)")
        plt.title("Total Battery Degradation Cost")
        plt.legend()
        plt.grid(True)
        plt.savefig(os.path.join(plots_dir, "5_battery_degradation_cost.png"), dpi=200)
        plt.close()


        print(f"All plots have been saved to: {plots_dir}")


if __name__ == "__main__":
    main()