privateboss
/

SnakeAI_TF_PPO_V1

Model card Files Files and versions

xet

Community

privateboss commited on Jul 11, 2025

Commit

285e9ec

verified ·

1 Parent(s): 6797b6d

Create Trained_PPO_Agent.py

Browse files

Files changed (1) hide show

Trained_PPO_Agent.py +103 -0

Trained_PPO_Agent.py ADDED Viewed

	@@ -0,0 +1,103 @@

+import gymnasium as gym
+from Snake_EnvAndAgent import SnakeGameEnv
+from PPO_Model import PPOAgent
+import os
+import time
+import numpy as np
+from plot_utility_Trained_Agent import init_live_plot, update_live_plot, save_live_plot_final, smooth_curve
+PLAY_CONFIG = {
+    'grid_size': 30,
+    'model_path_prefix': 'snake_ppo_models/ppo_snake_final',
+    'num_episodes_to_play': 100,
+    'render_fps': 10,
+    'live_plot_interval_episodes': 1,
+    'plot_smoothing_factor': 0.8
+}
+PLOT_SAVE_DIR = 'snake_ppo_plots'
+os.makedirs(PLOT_SAVE_DIR, exist_ok=True)
+def play_agent():
+    print("Initializing environment for playback...")
+    env = SnakeGameEnv(render_mode='human')
+    if PLAY_CONFIG['render_fps'] > 0:
+        env.metadata["render_fps"] = PLAY_CONFIG['render_fps']
+    obs_shape = env.observation_space.shape
+    action_size = env.action_space.n
+    agent = PPOAgent(
+        observation_space_shape=obs_shape,
+        action_space_size=action_size,
+        actor_lr=3e-4,
+        critic_lr=3e-4,
+        hidden_layer_sizes=[512, 512, 512]
+    )
+    print(f"loading models from: {PLAY_CONFIG['model_path_prefix']}")
+    load_success = agent.load_models(PLAY_CONFIG['model_path_prefix'])
+    if not load_success:
+        print("\nFATAL ERROR: Failed to load trained models from disk. The agent CANNOT perform as trained. Exiting playback!.")
+        env.close()
+        return
+    print("--- Trained models loaded successfully. Loading playback. ---")
+    print("Starting agent playback...")
+    episode_rewards_playback = []
+    fig, ax, line = init_live_plot(PLOT_SAVE_DIR, filename="live_playback_rewards_plot.png")
+    ax.set_title('Live Playback Progress (Episode Rewards)')
+    ax.set_xlabel('Episode')
+    ax.set_ylabel('Total Reward')
+    for episode in range(PLAY_CONFIG['num_episodes_to_play']):
+        state, info = env.reset()
+        done = False
+        episode_reward = 0
+        steps = 0
+        while not done:
+            action, _, _ = agent.choose_action(state)
+            next_state, reward, terminated, truncated, info = env.step(action)
+            episode_reward += reward
+            state = next_state
+            steps += 1
+            done = terminated or truncated
+        episode_rewards_playback.append(episode_reward)
+        print(f"Episode {episode + 1}: Total Reward = {episode_reward:.2f}, Score = {info['score']}, Steps = {steps}")
+        if (episode + 1) % PLAY_CONFIG['live_plot_interval_episodes'] == 0:
+            current_episodes = list(range(1, len(episode_rewards_playback) + 1))
+            smoothed_rewards = smooth_curve(episode_rewards_playback, factor=PLAY_CONFIG['plot_smoothing_factor'])
+            update_live_plot(fig, ax, line, current_episodes, smoothed_rewards,
+                             current_timestep=episode + 1,
+                             total_timesteps=PLAY_CONFIG['num_episodes_to_play'])
+        time.sleep(0.5)
+    env.close()
+    print("\nPlayback finished.")
+    current_episodes = list(range(1, len(episode_rewards_playback) + 1))
+    smoothed_rewards = smooth_curve(episode_rewards_playback, factor=PLAY_CONFIG['plot_smoothing_factor'])
+    update_live_plot(fig, ax, line, current_episodes, smoothed_rewards,
+                     current_timestep=PLAY_CONFIG['num_episodes_to_play'],
+                     total_timesteps=PLAY_CONFIG['num_episodes_to_play'])
+    save_live_plot_final(fig, ax)
+    avg_playback_reward = np.mean(episode_rewards_playback)
+    print(f"Average Reward over {PLAY_CONFIG['num_episodes_to_play']} playback episodes: {avg_playback_reward:.2f}")
+if __name__ == "__main__":
+    play_agent()