File size: 4,301 Bytes
a063d15
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
import gymnasium as gym
import numpy as np
import tensorflow as tf
import os
import config
from utilities import init_gpu
from agent import PPOAgent
from trainer import PPOTrainer

def main():
    init_gpu()

    # Dynamic lookup to establish environment dimensions
    temp_env = gym.make(config.ENV_NAME)
    action_bounds = temp_env.action_space.high[0]
    temp_env.close()

    agent = PPOAgent(action_bounds)
    trainer = PPOTrainer(agent)

    summary_writer = tf.summary.create_file_writer(config.LOG_DIR)

    # Checkpoint Manager Implementation (Saves Optimizers + Weights)
    global_iteration = tf.Variable(1, dtype=tf.int64)
    checkpoint = tf.train.Checkpoint(
        actor_critic=agent.ac,
        actor_optimizer=agent.actor_opt,
        critic_optimizer=agent.critic_opt,
        iteration=global_iteration
    )
    checkpoint_manager = tf.train.CheckpointManager(
        checkpoint, directory=config.CHECKPOINT_DIR, max_to_keep=1000
    )

    # Checkpoint Manager Implementation (Explicitly bind underlying layer variables)
    #global_iteration = tf.Variable(1, dtype=tf.int64)
    #checkpoint = tf.train.Checkpoint(
        #actor_dense1=agent.ac.actor_dense1,
        #actor_dense2=agent.ac.actor_dense2,
        #mu=agent.ac.mu,
        #log_std=agent.ac.log_std,
        #critic_dense1=agent.ac.critic_dense1,
        #critic_dense2=agent.ac.critic_dense2,
        #value=agent.ac.value,
        #actor_optimizer=agent.actor_opt,
        #critic_optimizer=agent.critic_opt,
        #iteration=global_iteration
    #)
    #checkpoint_manager = tf.train.CheckpointManager(
        #checkpoint, directory=config.CHECKPOINT_DIR, max_to_keep=3
    #)

    # Automatically check for existing weights to resume execution
    if checkpoint_manager.latest_checkpoint:
        checkpoint.restore(checkpoint_manager.latest_checkpoint)
        print(f"โšก Resuming pipeline safely from Checkpoint Iteration: {global_iteration.numpy()}")
    else:
        print("๐ŸŒฑ No active checkpoint located. Initializing new optimization cycle...")

    start_iter = global_iteration.numpy()
    best_score = -float('inf')  # Track the highest score achieved for best-weight tracking

    for itr in range(start_iter, config.TOTAL_ITERATIONS + 1):
        global_iteration.assign(itr)
        
        # 1. Gather concurrent rollouts via multiprocessing
        states, actions, log_probs, returns, advantages, ep_scores = trainer.collect_rollouts()
        
        # 2. Perform optimization on the collected data
        actor_loss, critic_loss = trainer.train_epoch(states, actions, log_probs, returns, advantages)
        
        # 3. Log values to TensorBoard
        avg_score = np.mean(ep_scores) if ep_scores else -100.0
        with summary_writer.as_default():
            tf.summary.scalar("Loss/Actor_Loss", actor_loss, step=itr)
            tf.summary.scalar("Loss/Critic_Loss", critic_loss, step=itr)
            tf.summary.scalar("Parameters/Exploration_Log_Std", agent.ac.log_std.numpy()[0], step=itr)
            if ep_scores:
                tf.summary.scalar("Metrics/Mean_Reward_Raw", avg_score, step=itr)

        if itr % 5 == 0:
            print(f"Iteration: {itr:3d}/{config.TOTAL_ITERATIONS} | Mean Env Score: {avg_score:6.2f} | Variance: {agent.ac.log_std.numpy()[0]:.3f}")
            
            # Save periodic checkpoint defensively for power-loss protection
            checkpoint_manager.save()
            
            # ๐Ÿ† Dynamic Best Weight Tracking: Save deployment weights if performance improves
            #if avg_score > best_score and avg_score > 0.0:
                #best_score = avg_score
                #print(f"๐ŸŒŸ New performance milestone! Saving best weights with score: {best_score:.2f}")
                #agent.ac.save_weights("ppo_mountain_car_weights.h5")

            if avg_score > best_score and avg_score > 0.0:
                best_score = avg_score
                print(f"๐ŸŒŸ New performance milestone! Saving best weights with score: {best_score:.2f}")
                    # CHANGE THIS LINE:
                agent.ac.save_weights("ppo_mountain_car_weights.weights.h5") 

    print(f"\n๐Ÿ Finished all {config.TOTAL_ITERATIONS} training iterations successfully.")
    trainer.close()

if __name__ == "__main__":
    main()