|
|
| """
|
| Train an RL agent on the Energy Optimization Environment.
|
| """
|
|
|
| import sys
|
| import os
|
| sys.path.insert(0, os.path.dirname(__file__))
|
|
|
|
|
| import types
|
| he_demo = types.ModuleType('he_demo')
|
| from models import EnergyOptimizationAction, EnergyOptimizationObservation, Task, TaskSummary
|
| he_demo.EnergyOptimizationAction = EnergyOptimizationAction
|
| he_demo.EnergyOptimizationObservation = EnergyOptimizationObservation
|
| he_demo.Task = Task
|
| he_demo.TaskSummary = TaskSummary
|
| sys.modules['he_demo'] = he_demo
|
| sys.modules['he_demo.models'] = he_demo
|
|
|
| from gym_wrapper import EnergyOptimizationGymEnv
|
| from stable_baselines3 import PPO
|
| from stable_baselines3.common.env_util import make_vec_env
|
|
|
| def train_agent():
|
| """Train a PPO agent on the energy optimization environment."""
|
|
|
| print("🚀 Training PPO Agent on Energy Optimization Environment")
|
| print("=" * 60)
|
|
|
|
|
| def make_env():
|
| return EnergyOptimizationGymEnv()
|
|
|
| env = make_vec_env(make_env, n_envs=4)
|
|
|
|
|
| model = PPO(
|
| "MlpPolicy",
|
| env,
|
| verbose=1,
|
| learning_rate=3e-4,
|
| n_steps=2048,
|
| batch_size=64,
|
| n_epochs=10,
|
| gamma=0.99,
|
| gae_lambda=0.95,
|
| clip_range=0.2,
|
| ent_coef=0.0,
|
| vf_coef=0.5,
|
| max_grad_norm=0.5,
|
| )
|
|
|
|
|
| print("Training for 10,000 timesteps...")
|
| model.learn(total_timesteps=10000)
|
|
|
|
|
| model.save("energy_optimization_ppo")
|
| print("✅ Model saved as 'energy_optimization_ppo.zip'")
|
|
|
|
|
| print("\n🧪 Testing trained agent...")
|
| test_env = EnergyOptimizationGymEnv()
|
| obs, _ = test_env.reset()
|
|
|
| total_reward = 0
|
| steps = 0
|
|
|
| while steps < 50:
|
|
|
| action, _ = model.predict(obs, deterministic=True)
|
|
|
|
|
| obs, reward, done, _, _ = test_env.step(action)
|
|
|
| total_reward += reward
|
| steps += 1
|
|
|
|
|
| action_type_index = int(action[0])
|
| intensity = float(action[1])
|
| action_types = ["reduce_ram", "optimize_energy", "balance_resources", "monitor_system"]
|
| action_type = action_types[action_type_index]
|
|
|
| print(f"Step {steps}: {action_type}({intensity:.1f}) -> RAM={obs[0]:.1f}%, Energy={obs[1]:.1f}kWh, Reward={reward:.2f}")
|
|
|
| if done:
|
| break
|
|
|
| if __name__ == "__main__":
|
| train_agent() |