| | import os |
| | import sys |
| | import time |
| | from datetime import datetime, timedelta |
| | import re |
| |
|
| | import numpy as np |
| | import torch |
| | import pandas as pd |
| | import matplotlib.pyplot as plt |
| |
|
| | |
| | sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))) |
| |
|
| | from cluster import InterClusterCoordinator, InterClusterLedger |
| | from Environment.cluster_env_wrapper import make_vec_env |
| | from mappo.trainer.mappo import MAPPO |
| |
|
| |
|
| | def recursive_sum(item): |
| | total = 0 |
| | |
| | if hasattr(item, '__iter__') and not isinstance(item, str): |
| | for sub_item in item: |
| | total += recursive_sum(sub_item) |
| | |
| | elif np.isreal(item): |
| | total += item |
| | |
| | return total |
| |
|
| |
|
| | def main(): |
| | overall_start_time = time.time() |
| | |
| | |
| | STATE_TO_RUN = "oklahoma" |
| | DATA_PATH = "data/training/1000houses_152days_TRAIN.csv" |
| | |
| | |
| | match = re.search(r'(\d+)houses', DATA_PATH) |
| | if not match: |
| | raise ValueError("Could not extract the number of houses from DATA_PATH.") |
| | NUMBER_OF_AGENTS = int(match.group(1)) |
| |
|
| | CLUSTER_SIZE = 10 |
| | NUM_EPISODES = 10000 |
| | BATCH_SIZE = 256 |
| | CHECKPOINT_INTERVAL = 100000 |
| | WINDOW_SIZE = 80 |
| | MAX_TRANSFER_KWH = 100000 |
| |
|
| | LR = 2e-4 |
| | GAMMA = 0.95 |
| | LAMBDA = 0.95 |
| | CLIP_EPS = 0.2 |
| | K_EPOCHS = 4 |
| | |
| | JOINT_TRAINING_START_EPISODE = 2000 |
| | FREEZE_HIGH_FOR_EPISODES = 20 |
| | FREEZE_LOW_FOR_EPISODES = 10 |
| |
|
| | |
| | timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") |
| | run_name = f"hierarchical_{STATE_TO_RUN}_{NUMBER_OF_AGENTS}agents_" \ |
| | f"{CLUSTER_SIZE}size_{NUM_EPISODES}eps_{timestamp}" |
| | root_dir = os.path.join("FINALE_FINALE_FINALE", run_name) |
| | models_dir = os.path.join(root_dir, "models") |
| | logs_dir = os.path.join(root_dir, "logs") |
| | plots_dir = os.path.join(root_dir, "plots") |
| |
|
| | for d in (models_dir, logs_dir, plots_dir): |
| | os.makedirs(d, exist_ok=True) |
| | print(f"Logging to: {root_dir}") |
| |
|
| | |
| | |
| | |
| | |
| | cluster_env = make_vec_env( |
| | data_path=DATA_PATH, |
| | time_freq="15T", |
| | cluster_size=CLUSTER_SIZE, |
| | state=STATE_TO_RUN |
| | ) |
| |
|
| | |
| | n_clusters = cluster_env.num_envs |
| | sample_subenv = cluster_env.cluster_envs[0] |
| | n_agents_per_cluster = sample_subenv.num_agents |
| |
|
| | local_dim = sample_subenv.observation_space.shape[-1] |
| | global_dim = n_agents_per_cluster * local_dim |
| | |
| | act_dim = sample_subenv.action_space[0].shape[-1] |
| | |
| | total_buffer_size = sample_subenv.num_steps * n_clusters |
| | print(f"Low-level agent buffer size set to: {total_buffer_size}") |
| |
|
| | print(f"Created {n_clusters} clusters.") |
| | print(f"Shared low-level agent: {n_agents_per_cluster} agents per cluster, " |
| | f"obs_dim={local_dim}, global_dim={global_dim}, act_dim={act_dim}") |
| |
|
| | print(f"Creating {n_clusters} independent low-level MAPPO agents...") |
| | low_agents = [] |
| | for i in range(n_clusters): |
| | |
| | agent_buffer_size = sample_subenv.num_steps |
| |
|
| | agent = MAPPO( |
| | n_agents=n_agents_per_cluster, |
| | local_dim=local_dim, |
| | global_dim=global_dim, |
| | act_dim=act_dim, |
| | lr=LR, |
| | gamma=GAMMA, |
| | lam=LAMBDA, |
| | clip_eps=CLIP_EPS, |
| | k_epochs=K_EPOCHS, |
| | batch_size=BATCH_SIZE, |
| | episode_len=agent_buffer_size |
| | ) |
| | low_agents.append(agent) |
| |
|
| | |
| | OBS_DIM_HI_LOCAL = 7 |
| | act_dim_inter = 2 |
| | |
| | |
| | |
| | OBS_DIM_HI_GLOBAL = n_clusters * OBS_DIM_HI_LOCAL |
| |
|
| | print(f"Inter-cluster agent (MAPPO): n_agents={n_clusters}, " |
| | f"local_dim={OBS_DIM_HI_LOCAL}, global_dim={OBS_DIM_HI_GLOBAL}, act_dim={act_dim_inter}") |
| |
|
| | |
| | inter_agent = MAPPO( |
| | n_agents=n_clusters, |
| | local_dim=OBS_DIM_HI_LOCAL, |
| | global_dim=OBS_DIM_HI_GLOBAL, |
| | act_dim=act_dim_inter, |
| | lr=LR, |
| | gamma=GAMMA, |
| | lam=LAMBDA, |
| | clip_eps=CLIP_EPS, |
| | k_epochs=K_EPOCHS, |
| | batch_size=BATCH_SIZE, |
| | episode_len=sample_subenv.num_steps |
| | ) |
| |
|
| | ledger = InterClusterLedger() |
| | coordinator = InterClusterCoordinator( |
| | cluster_env, |
| | inter_agent, |
| | ledger, |
| | max_transfer_kwh=MAX_TRANSFER_KWH |
| | ) |
| |
|
| | |
| | total_steps = 0 |
| | episode_log_data = [] |
| | performance_metrics_log = [] |
| | intra_log = {} |
| | inter_log = {} |
| | total_log = {} |
| | cost_log = {} |
| |
|
| | for ep in range(1, NUM_EPISODES + 1): |
| | step_count = 0 |
| | start_time = time.time() |
| | ep_total_inter_cluster_reward = 0.0 |
| | day_logs = [] |
| | |
| | obs_clusters, _ = cluster_env.reset() |
| |
|
| | if ep > 1: |
| | |
| | |
| | all_cluster_metrics = cluster_env.call('get_episode_metrics') |
| |
|
| | |
| | system_metrics = { |
| | "grid_reduction_entire_day": sum(m["grid_reduction_entire_day"] for m in all_cluster_metrics), |
| | "grid_reduction_peak_hours": sum(m["grid_reduction_peak_hours"] for m in all_cluster_metrics), |
| | "total_cost_savings": sum(m["total_cost_savings"] for m in all_cluster_metrics), |
| | "battery_degradation_cost_total": sum(m["battery_degradation_cost_total"] for m in all_cluster_metrics), |
| | |
| | "fairness_on_cost_savings": np.mean([m["fairness_on_cost_savings"] for m in all_cluster_metrics]), |
| | "Episode": ep - 1 |
| | } |
| |
|
| | |
| | performance_metrics_log.append(system_metrics) |
| | |
| | |
| | done_all = False |
| | |
| | |
| | cluster_rewards = np.zeros((n_clusters, n_agents_per_cluster), dtype=np.float32) |
| | total_cost = 0.0 |
| | total_grid_import = 0.0 |
| | |
| | |
| | is_phase_1 = ep < JOINT_TRAINING_START_EPISODE |
| |
|
| | if ep == 1: |
| | print(f"\n--- Starting Phase 1: Training Low-Level Agent Only (up to ep {JOINT_TRAINING_START_EPISODE-1}) ---") |
| | if ep == JOINT_TRAINING_START_EPISODE: |
| | print(f"\n--- Starting Phase 2: Joint Hierarchical Training (from ep {JOINT_TRAINING_START_EPISODE}) ---") |
| | |
| | |
| | while not done_all: |
| | total_steps += 1 |
| | step_count += 1 |
| | |
| | |
| | batch_global_obs = obs_clusters.reshape(n_clusters, -1) |
| | low_level_actions_list = [] |
| | low_level_logps_list = [] |
| | |
| | for c_idx in range(n_clusters): |
| | agent = low_agents[c_idx] |
| | local_obs_cluster = obs_clusters[c_idx] |
| | global_obs_cluster = batch_global_obs[c_idx] |
| | actions, logps = agent.select_action(local_obs_cluster, global_obs_cluster) |
| | low_level_actions_list.append(actions) |
| | low_level_logps_list.append(logps) |
| |
|
| | low_level_actions = np.stack(low_level_actions_list) |
| | low_level_logps = np.stack(low_level_logps_list) |
| |
|
| | |
| | if is_phase_1: |
| | exports, imports = None, None |
| | else: |
| | inter_cluster_obs_local_list = [coordinator.get_cluster_state(se, step_count) for se in cluster_env.cluster_envs] |
| | inter_cluster_obs_local = np.array(inter_cluster_obs_local_list) |
| | |
| | |
| | inter_cluster_obs_global = inter_cluster_obs_local.flatten() |
| |
|
| | |
| | high_level_action, high_level_logp = inter_agent.select_action( |
| | inter_cluster_obs_local, |
| | inter_cluster_obs_global |
| | ) |
| |
|
| | current_reports = {i: {'export_capacity': cluster_env.get_export_capacity(i), 'import_capacity': cluster_env.get_import_capacity(i)} for i in range(n_clusters)} |
| | exports, imports = coordinator.build_transfers(high_level_action, current_reports) |
| |
|
| | |
| | next_obs_clusters, rewards, done_all, step_info = cluster_env.step( |
| | low_level_actions, exports=exports, imports=imports |
| | ) |
| | cluster_infos = step_info.get("cluster_infos") |
| | day_logs.append({ |
| | "costs": cluster_infos["costs"], |
| | "grid_import_no_p2p": cluster_infos["grid_import_no_p2p"], |
| | "charge_amount": cluster_infos.get("charge_amount"), |
| | "discharge_amount": cluster_infos.get("discharge_amount") |
| | }) |
| |
|
| | |
| | per_agent_rewards = np.stack(cluster_infos['agent_rewards']) |
| | rewards_for_buffer = per_agent_rewards |
| |
|
| | if not is_phase_1: |
| | transfers_for_logging = (exports, imports) |
| | high_level_rewards_per_cluster = coordinator.compute_inter_cluster_reward( |
| | all_cluster_infos=cluster_infos, |
| | actual_transfers=transfers_for_logging, |
| | step_count=step_count |
| | ) |
| | ep_total_inter_cluster_reward += np.sum(high_level_rewards_per_cluster) |
| |
|
| | |
| | next_inter_cluster_obs_local_list = [coordinator.get_cluster_state(se, step_count + 1) for se in cluster_env.cluster_envs] |
| | next_inter_cluster_obs_local = np.array(next_inter_cluster_obs_local_list) |
| | |
| | |
| | next_inter_cluster_obs_global = next_inter_cluster_obs_local.flatten() |
| |
|
| | |
| | inter_agent.store( |
| | inter_cluster_obs_local, |
| | inter_cluster_obs_global, |
| | high_level_action, |
| | high_level_logp, |
| | high_level_rewards_per_cluster, |
| | [done_all] * n_clusters, |
| | next_inter_cluster_obs_global |
| | ) |
| |
|
| | bonus_per_agent = np.zeros_like(per_agent_rewards) |
| | for c_idx in range(n_clusters): |
| | num_agents_in_cluster = per_agent_rewards.shape[1] |
| | if num_agents_in_cluster > 0: |
| | bonus = high_level_rewards_per_cluster[c_idx] / num_agents_in_cluster |
| | bonus_per_agent[c_idx, :] = bonus |
| | rewards_for_buffer = per_agent_rewards + bonus_per_agent |
| |
|
| | |
| | dones_list = step_info.get("cluster_dones") |
| | for idx in range(n_clusters): |
| | low_agents[idx].store( |
| | obs_clusters[idx], |
| | batch_global_obs[idx], |
| | low_level_actions[idx], |
| | low_level_logps[idx], |
| | rewards_for_buffer[idx], |
| | dones_list[idx], |
| | next_obs_clusters[idx].reshape(-1) |
| | ) |
| |
|
| | cluster_rewards += per_agent_rewards |
| | total_cost += np.sum(cluster_infos['costs']) |
| | total_grid_import += np.sum(cluster_infos['grid_import_with_p2p']) |
| | obs_clusters = next_obs_clusters |
| |
|
| | |
| | if is_phase_1: |
| | for agent in low_agents: |
| | agent.update() |
| | else: |
| | CYCLE_LENGTH = FREEZE_HIGH_FOR_EPISODES + FREEZE_LOW_FOR_EPISODES |
| | phase2_episode_num = ep - JOINT_TRAINING_START_EPISODE |
| | position_in_cycle = phase2_episode_num % CYCLE_LENGTH |
| |
|
| | if position_in_cycle < FREEZE_HIGH_FOR_EPISODES: |
| | print(f"Updating ALL LOW-LEVEL agents (High-level is frozen).") |
| | for agent in low_agents: |
| | agent.update() |
| | else: |
| | print(f"Updating HIGH-LEVEL agent (Low-level is frozen).") |
| | inter_agent.update() |
| |
|
| | |
| | duration = time.time() - start_time |
| | num_low_level_agents = n_clusters * n_agents_per_cluster |
| | get_price_fn = cluster_env.cluster_envs[0].get_grid_price |
| |
|
| | |
| | |
| | |
| | baseline_costs_per_step = [ |
| | recursive_sum(entry["grid_import_no_p2p"]) * get_price_fn(i) |
| | for i, entry in enumerate(day_logs) |
| | ] |
| | total_baseline_cost = sum(baseline_costs_per_step) |
| |
|
| | |
| | actual_costs_per_step = [recursive_sum(entry["costs"]) for entry in day_logs] |
| | total_actual_cost = sum(actual_costs_per_step) |
| | |
| | cost_reduction_pct = (1 - (total_actual_cost / total_baseline_cost)) * 100 if total_baseline_cost > 0 else 0.0 |
| |
|
| | |
| | |
| | total_reward_intra = cluster_rewards.sum() |
| | mean_reward_intra = total_reward_intra / num_low_level_agents if num_low_level_agents > 0 else 0.0 |
| |
|
| | |
| | total_reward_inter = ep_total_inter_cluster_reward |
| | mean_reward_inter = total_reward_inter / step_count if step_count > 0 else 0.0 |
| |
|
| | |
| | total_reward_system = total_reward_intra + total_reward_inter |
| | mean_reward_system = total_reward_system / num_low_level_agents if num_low_level_agents > 0 else 0.0 |
| |
|
| | |
| | intra_log.setdefault('total', []).append(total_reward_intra) |
| | intra_log.setdefault('mean', []).append(mean_reward_intra) |
| | inter_log.setdefault('total', []).append(total_reward_inter) |
| | inter_log.setdefault('mean', []).append(mean_reward_inter) |
| | total_log.setdefault('total', []).append(total_reward_system) |
| | total_log.setdefault('mean', []).append(mean_reward_system) |
| | cost_log.setdefault('total_cost', []).append(total_actual_cost) |
| | cost_log.setdefault('cost_without_p2p', []).append(total_baseline_cost) |
| |
|
| | |
| | episode_log_data.append({ |
| | "Episode": ep, |
| | "Mean_Reward_System": mean_reward_system, |
| | "Mean_Reward_Intra": mean_reward_intra, |
| | "Mean_Reward_Inter": mean_reward_inter, |
| | "Total_Reward_System": total_reward_system, |
| | "Total_Reward_Intra": total_reward_intra, |
| | "Total_Reward_Inter": total_reward_inter, |
| | "Cost_Reduction_Pct": cost_reduction_pct, |
| | "Episode_Duration": duration, |
| | }) |
| |
|
| | |
| | print(f"Ep {ep}/{NUM_EPISODES} | " |
| | f"Mean System R: {mean_reward_system:.3f} | " |
| | f"Cost Red: {cost_reduction_pct:.1f}% | " |
| | f"Time: {duration:.2f}s") |
| |
|
| | if ep % CHECKPOINT_INTERVAL == 0 or ep == NUM_EPISODES: |
| | for c_idx, agent in enumerate(low_agents): |
| | agent.save(os.path.join(models_dir, f"low_cluster{c_idx}_ep{ep}.pth")) |
| | inter_agent.save(os.path.join(models_dir, f"inter_ep{ep}.pth")) |
| | print(f"Saved checkpoint at episode {ep}") |
| |
|
| | print("Training completed! Aggregating final logs...") |
| |
|
| | |
| | final_cluster_metrics = cluster_env.call('get_episode_metrics') |
| | final_system_metrics = { |
| | "grid_reduction_entire_day": sum(m["grid_reduction_entire_day"] for m in final_cluster_metrics), |
| | "grid_reduction_peak_hours": sum(m["grid_reduction_peak_hours"] for m in final_cluster_metrics), |
| | "total_cost_savings": sum(m["total_cost_savings"] for m in final_cluster_metrics), |
| | "battery_degradation_cost_total": sum(m["battery_degradation_cost_total"] for m in final_cluster_metrics), |
| | "fairness_on_cost_savings": np.mean([m["fairness_on_cost_savings"] for m in final_cluster_metrics]), |
| | "Episode": NUM_EPISODES |
| | } |
| | performance_metrics_log.append(final_system_metrics) |
| |
|
| | |
| | df_rewards_log = pd.DataFrame(episode_log_data) |
| | df_perf_log = pd.DataFrame(performance_metrics_log) |
| | df_final_log = pd.merge(df_rewards_log, df_perf_log, on="Episode") |
| |
|
| | log_csv_path = os.path.join(logs_dir, "training_performance_log.csv") |
| |
|
| | |
| | overall_end_time = time.time() |
| | total_duration_seconds = overall_end_time - overall_start_time |
| | total_time_row = pd.DataFrame([{"Episode": "Total_Training_Time", "Episode_Duration": total_duration_seconds}]) |
| | df_to_save = pd.concat([df_final_log, total_time_row], ignore_index=True) |
| |
|
| | |
| | columns_to_save = [ |
| | "Episode", |
| | "Mean_Reward_System", |
| | "Mean_Reward_Intra", |
| | "Mean_Reward_Inter", |
| | "Total_Reward_System", |
| | "Total_Reward_Intra", |
| | "Total_Reward_Inter", |
| | "Cost_Reduction_Pct", |
| | "battery_degradation_cost_total", |
| | "Episode_Duration", |
| | "total_cost_savings", |
| | "grid_reduction_entire_day", |
| | "fairness_on_cost_savings" |
| | ] |
| | df_to_save = df_to_save[[col for col in columns_to_save if col in df_to_save.columns]] |
| | df_to_save.to_csv(log_csv_path, index=False) |
| | print(f"Saved comprehensive training performance log to: {log_csv_path}") |
| |
|
| | generate_plots( |
| | plots_dir=plots_dir, |
| | num_episodes=NUM_EPISODES, |
| | intra_log=intra_log, |
| | inter_log=inter_log, |
| | total_log=total_log, |
| | cost_log=cost_log, |
| | df_final_log=df_final_log |
| | ) |
| | |
| | overall_end_time = time.time() |
| | total_duration_seconds = overall_end_time - overall_start_time |
| | |
| | total_duration_formatted = str(timedelta(seconds=int(total_duration_seconds))) |
| | |
| | print("\n" + "="*50) |
| | print(f"Total Training Time: {total_duration_formatted} (HH:MM:SS)") |
| | print("="*50) |
| |
|
| |
|
| | def generate_plots( |
| | plots_dir: str, |
| | num_episodes: int, |
| | intra_log: dict, |
| | inter_log: dict, |
| | total_log: dict, |
| | cost_log: list, |
| | df_final_log: pd.DataFrame |
| | ): |
| | """ |
| | Generates and saves all final plots after training is complete. |
| | """ |
| | print("Training completed! Generating plots…") |
| |
|
| | |
| | def moving_avg(series, window): |
| | return pd.Series(series).rolling(window=window, center=True, min_periods=1).mean().to_numpy() |
| |
|
| | ma_window = 120 |
| | episodes = np.arange(1, num_episodes + 1) |
| |
|
| | |
| | fig, ax = plt.subplots(figsize=(12, 7)) |
| | ax.plot(episodes, moving_avg(intra_log['total'], ma_window), label=f'Total Reward (MA {ma_window})', linewidth=2) |
| | ax.set_xlabel("Episode") |
| | ax.set_ylabel("Total Intra-Cluster Reward", color='tab:blue') |
| | ax.tick_params(axis='y', labelcolor='tab:blue') |
| | ax.grid(True) |
| | |
| | ax2 = ax.twinx() |
| | ax2.plot(episodes, moving_avg(intra_log['mean'], ma_window), label=f'Mean Reward (MA {ma_window})', linewidth=2, linestyle='--', color='tab:cyan') |
| | ax2.set_ylabel("Mean Intra-Cluster Reward", color='tab:cyan') |
| | ax2.tick_params(axis='y', labelcolor='tab:cyan') |
| | |
| | fig.suptitle("Intra-Cluster (Low-Level Agent) Rewards") |
| | fig.legend(loc="upper left", bbox_to_anchor=(0.1, 0.9)) |
| | plt.savefig(os.path.join(plots_dir, "1_intra_cluster_rewards.png"), dpi=200) |
| | plt.close() |
| |
|
| | |
| | fig, ax = plt.subplots(figsize=(12, 7)) |
| | ax.plot(episodes, moving_avg(inter_log['total'], ma_window), label=f'Total Reward (MA {ma_window})', linewidth=2, color='tab:green') |
| | ax.set_xlabel("Episode") |
| | ax.set_ylabel("Total Inter-Cluster Reward", color='tab:green') |
| | ax.tick_params(axis='y', labelcolor='tab:green') |
| | ax.grid(True) |
| | |
| | ax2 = ax.twinx() |
| | ax2.plot(episodes, moving_avg(inter_log['mean'], ma_window), label=f'Mean Reward (MA {ma_window})', linewidth=2, linestyle='--', color='mediumseagreen') |
| | ax2.set_ylabel("Mean Inter-Cluster Reward", color='mediumseagreen') |
| | ax2.tick_params(axis='y', labelcolor='mediumseagreen') |
| | |
| | fig.suptitle("Inter-Cluster (High-Level Agent) Rewards") |
| | fig.legend(loc="upper left", bbox_to_anchor=(0.1, 0.9)) |
| | plt.savefig(os.path.join(plots_dir, "2_inter_cluster_rewards.png"), dpi=200) |
| | plt.close() |
| |
|
| | |
| | fig, ax = plt.subplots(figsize=(12, 7)) |
| | ax.plot(episodes, moving_avg(total_log['total'], ma_window), label=f'Total System Reward (MA {ma_window})', linewidth=2, color='tab:red') |
| | ax.set_xlabel("Episode") |
| | ax.set_ylabel("Total System Reward", color='tab:red') |
| | ax.tick_params(axis='y', labelcolor='tab:red') |
| | ax.grid(True) |
| | |
| | ax2 = ax.twinx() |
| | ax2.plot(episodes, moving_avg(total_log['mean'], ma_window), label=f'Mean System Reward (MA {ma_window})', linewidth=2, linestyle='--', color='salmon') |
| | ax2.set_ylabel("Mean System Reward per Agent", color='salmon') |
| | ax2.tick_params(axis='y', labelcolor='salmon') |
| | |
| | fig.suptitle("Total System Rewards (Intra + Inter)") |
| | fig.legend(loc="upper left", bbox_to_anchor=(0.1, 0.9)) |
| | plt.savefig(os.path.join(plots_dir, "3_total_system_rewards.png"), dpi=200) |
| | plt.close() |
| |
|
| | |
| | cost_df = pd.DataFrame(cost_log) |
| | cost_df['cost_reduction_pct'] = 100 * (1 - (cost_df['total_cost'] / cost_df['cost_without_p2p'])).clip(lower=-np.inf, upper=100) |
| | plt.figure(figsize=(12, 7)) |
| | plt.plot(episodes, moving_avg(cost_df['cost_reduction_pct'], ma_window), label=f'Cost Reduction % (MA {ma_window})', color='purple', linewidth=2) |
| | plt.xlabel("Episode") |
| | plt.ylabel("Cost Reduction (%)") |
| | plt.title("Total System-Wide Cost Reduction") |
| | plt.legend() |
| | plt.grid(True) |
| | plt.savefig(os.path.join(plots_dir, "4_cost_reduction.png"), dpi=200) |
| | plt.close() |
| |
|
| | df_plot = df_final_log[pd.to_numeric(df_final_log['Episode'], errors='coerce').notna()].copy() |
| | df_plot['Episode'] = pd.to_numeric(df_plot['Episode']) |
| |
|
| | |
| | plt.figure(figsize=(12, 7)) |
| | plt.plot(df_plot["Episode"], moving_avg(df_plot["battery_degradation_cost_total"], ma_window), |
| | label=f'Degradation Cost (MA {ma_window})', color='darkgreen', linewidth=2) |
| | plt.xlabel("Episode") |
| | plt.ylabel("Total Degradation Cost ($)") |
| | plt.title("Total Battery Degradation Cost") |
| | plt.legend() |
| | plt.grid(True) |
| | plt.savefig(os.path.join(plots_dir, "5_battery_degradation_cost.png"), dpi=200) |
| | plt.close() |
| |
|
| | print(f"All plots have been saved to: {plots_dir}") |
| |
|
| |
|
| | if __name__ == "__main__": |
| | main() |