| import gymnasium as gym |
| import numpy as np |
| import tensorflow as tf |
| import os |
| import config |
| from utilities import init_gpu |
| from agent import PPOAgent |
| from trainer import PPOTrainer |
|
|
| def main(): |
| init_gpu() |
|
|
| |
| temp_env = gym.make(config.ENV_NAME) |
| action_bounds = temp_env.action_space.high[0] |
| temp_env.close() |
|
|
| agent = PPOAgent(action_bounds) |
| trainer = PPOTrainer(agent) |
|
|
| summary_writer = tf.summary.create_file_writer(config.LOG_DIR) |
|
|
| |
| global_iteration = tf.Variable(1, dtype=tf.int64) |
| checkpoint = tf.train.Checkpoint( |
| actor_critic=agent.ac, |
| actor_optimizer=agent.actor_opt, |
| critic_optimizer=agent.critic_opt, |
| iteration=global_iteration |
| ) |
| checkpoint_manager = tf.train.CheckpointManager( |
| checkpoint, directory=config.CHECKPOINT_DIR, max_to_keep=1000 |
| ) |
|
|
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
|
|
| |
| if checkpoint_manager.latest_checkpoint: |
| checkpoint.restore(checkpoint_manager.latest_checkpoint) |
| print(f"β‘ Resuming pipeline safely from Checkpoint Iteration: {global_iteration.numpy()}") |
| else: |
| print("π± No active checkpoint located. Initializing new optimization cycle...") |
|
|
| start_iter = global_iteration.numpy() |
| best_score = -float('inf') |
|
|
| for itr in range(start_iter, config.TOTAL_ITERATIONS + 1): |
| global_iteration.assign(itr) |
| |
| |
| states, actions, log_probs, returns, advantages, ep_scores = trainer.collect_rollouts() |
| |
| |
| actor_loss, critic_loss = trainer.train_epoch(states, actions, log_probs, returns, advantages) |
| |
| |
| avg_score = np.mean(ep_scores) if ep_scores else -100.0 |
| with summary_writer.as_default(): |
| tf.summary.scalar("Loss/Actor_Loss", actor_loss, step=itr) |
| tf.summary.scalar("Loss/Critic_Loss", critic_loss, step=itr) |
| tf.summary.scalar("Parameters/Exploration_Log_Std", agent.ac.log_std.numpy()[0], step=itr) |
| if ep_scores: |
| tf.summary.scalar("Metrics/Mean_Reward_Raw", avg_score, step=itr) |
|
|
| if itr % 5 == 0: |
| print(f"Iteration: {itr:3d}/{config.TOTAL_ITERATIONS} | Mean Env Score: {avg_score:6.2f} | Variance: {agent.ac.log_std.numpy()[0]:.3f}") |
| |
| |
| checkpoint_manager.save() |
| |
| |
| |
| |
| |
| |
|
|
| if avg_score > best_score and avg_score > 0.0: |
| best_score = avg_score |
| print(f"π New performance milestone! Saving best weights with score: {best_score:.2f}") |
| |
| agent.ac.save_weights("ppo_mountain_car_weights.weights.h5") |
|
|
| print(f"\nπ Finished all {config.TOTAL_ITERATIONS} training iterations successfully.") |
| trainer.close() |
|
|
| if __name__ == "__main__": |
| main() |