ledmands commited on
Commit ·
c37ff18
1
Parent(s): ebb75df
Modified watch_agent.py to include ability to give an argument to adjust repeat action probability.
Browse files- agents/version_2/watch_agent.py +16 -10
agents/version_2/watch_agent.py
CHANGED
|
@@ -3,20 +3,26 @@ from stable_baselines3.common.evaluation import evaluate_policy
|
|
| 3 |
from stable_baselines3.common.monitor import Monitor
|
| 4 |
import gymnasium as gym
|
| 5 |
|
| 6 |
-
|
| 7 |
|
| 8 |
-
#
|
| 9 |
-
|
| 10 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 11 |
|
| 12 |
-
|
| 13 |
-
|
| 14 |
|
|
|
|
| 15 |
|
| 16 |
# Retrieve the environment
|
| 17 |
-
eval_env = Monitor(gym.make("ALE/Pacman-v5", render_mode="
|
| 18 |
|
| 19 |
# Evaluate the policy
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
print(
|
|
|
|
| 3 |
from stable_baselines3.common.monitor import Monitor
|
| 4 |
import gymnasium as gym
|
| 5 |
|
| 6 |
+
import argparse
|
| 7 |
|
| 8 |
+
# This script should have some options
|
| 9 |
+
# 1. Turn off the stochasticity as determined by the ALEv5
|
| 10 |
+
# Even if deterministic is set to true in evaluate policy, the environment will ignore this 25% of the time
|
| 11 |
+
# To compensate for this, we can set the repeat action probability to 0
|
| 12 |
+
|
| 13 |
+
parser = argparse.ArgumentParser()
|
| 14 |
+
parser.add_argument("-r", "--repeat_action_probability", help="repeat action probability", type=float, default=0.25)
|
| 15 |
+
args = parser.parse_args()
|
| 16 |
|
| 17 |
+
MODEL_NAME = "ALE-Pacman-v5"
|
| 18 |
+
rpt_act_prob = args.repeat_action_probability
|
| 19 |
|
| 20 |
+
loaded_model = DQN.load(MODEL_NAME)
|
| 21 |
|
| 22 |
# Retrieve the environment
|
| 23 |
+
eval_env = Monitor(gym.make("ALE/Pacman-v5", render_mode="rgb_array", repeat_action_probability=rpt_act_prob))
|
| 24 |
|
| 25 |
# Evaluate the policy
|
| 26 |
+
mean_rwd, std_rwd = evaluate_policy(loaded_model.policy, eval_env, n_eval_episodes=1)
|
| 27 |
+
print("mean rwd: ", mean_rwd)
|
| 28 |
+
print("std rwd: ", std_rwd)
|