privateboss's picture
Upload 8 files
a063d15 verified
Raw
History Blame Contribute Delete
4.3 kB
import gymnasium as gym
import numpy as np
import tensorflow as tf
import os
import config
from utilities import init_gpu
from agent import PPOAgent
from trainer import PPOTrainer
def main():
init_gpu()
# Dynamic lookup to establish environment dimensions
temp_env = gym.make(config.ENV_NAME)
action_bounds = temp_env.action_space.high[0]
temp_env.close()
agent = PPOAgent(action_bounds)
trainer = PPOTrainer(agent)
summary_writer = tf.summary.create_file_writer(config.LOG_DIR)
# Checkpoint Manager Implementation (Saves Optimizers + Weights)
global_iteration = tf.Variable(1, dtype=tf.int64)
checkpoint = tf.train.Checkpoint(
actor_critic=agent.ac,
actor_optimizer=agent.actor_opt,
critic_optimizer=agent.critic_opt,
iteration=global_iteration
)
checkpoint_manager = tf.train.CheckpointManager(
checkpoint, directory=config.CHECKPOINT_DIR, max_to_keep=1000
)
# Checkpoint Manager Implementation (Explicitly bind underlying layer variables)
#global_iteration = tf.Variable(1, dtype=tf.int64)
#checkpoint = tf.train.Checkpoint(
#actor_dense1=agent.ac.actor_dense1,
#actor_dense2=agent.ac.actor_dense2,
#mu=agent.ac.mu,
#log_std=agent.ac.log_std,
#critic_dense1=agent.ac.critic_dense1,
#critic_dense2=agent.ac.critic_dense2,
#value=agent.ac.value,
#actor_optimizer=agent.actor_opt,
#critic_optimizer=agent.critic_opt,
#iteration=global_iteration
#)
#checkpoint_manager = tf.train.CheckpointManager(
#checkpoint, directory=config.CHECKPOINT_DIR, max_to_keep=3
#)
# Automatically check for existing weights to resume execution
if checkpoint_manager.latest_checkpoint:
checkpoint.restore(checkpoint_manager.latest_checkpoint)
print(f"⚑ Resuming pipeline safely from Checkpoint Iteration: {global_iteration.numpy()}")
else:
print("🌱 No active checkpoint located. Initializing new optimization cycle...")
start_iter = global_iteration.numpy()
best_score = -float('inf') # Track the highest score achieved for best-weight tracking
for itr in range(start_iter, config.TOTAL_ITERATIONS + 1):
global_iteration.assign(itr)
# 1. Gather concurrent rollouts via multiprocessing
states, actions, log_probs, returns, advantages, ep_scores = trainer.collect_rollouts()
# 2. Perform optimization on the collected data
actor_loss, critic_loss = trainer.train_epoch(states, actions, log_probs, returns, advantages)
# 3. Log values to TensorBoard
avg_score = np.mean(ep_scores) if ep_scores else -100.0
with summary_writer.as_default():
tf.summary.scalar("Loss/Actor_Loss", actor_loss, step=itr)
tf.summary.scalar("Loss/Critic_Loss", critic_loss, step=itr)
tf.summary.scalar("Parameters/Exploration_Log_Std", agent.ac.log_std.numpy()[0], step=itr)
if ep_scores:
tf.summary.scalar("Metrics/Mean_Reward_Raw", avg_score, step=itr)
if itr % 5 == 0:
print(f"Iteration: {itr:3d}/{config.TOTAL_ITERATIONS} | Mean Env Score: {avg_score:6.2f} | Variance: {agent.ac.log_std.numpy()[0]:.3f}")
# Save periodic checkpoint defensively for power-loss protection
checkpoint_manager.save()
# πŸ† Dynamic Best Weight Tracking: Save deployment weights if performance improves
#if avg_score > best_score and avg_score > 0.0:
#best_score = avg_score
#print(f"🌟 New performance milestone! Saving best weights with score: {best_score:.2f}")
#agent.ac.save_weights("ppo_mountain_car_weights.h5")
if avg_score > best_score and avg_score > 0.0:
best_score = avg_score
print(f"🌟 New performance milestone! Saving best weights with score: {best_score:.2f}")
# CHANGE THIS LINE:
agent.ac.save_weights("ppo_mountain_car_weights.weights.h5")
print(f"\n🏁 Finished all {config.TOTAL_ITERATIONS} training iterations successfully.")
trainer.close()
if __name__ == "__main__":
main()