ledmands
commited on
Commit
·
cbe0575
1
Parent(s):
c37ff18
Added option flags to the watch_agent.py file.
Browse files- agents/version_2/{evaluations_v1.npz → evals/evaluations_v0.npz} +0 -0
- agents/version_2/{evaluations_v2.npz → evals/evaluations_v1.npz} +0 -0
- agents/version_2/{evaluations_v3.npz → evals/evaluations_v2.npz} +0 -0
- agents/version_2/evals/evaluations_v3.npz +3 -0
- agents/version_2/version_2-3/events.out.tfevents.1715694677.5da7e309893b.24.0 +3 -0
- agents/version_2/watch_agent.py +26 -5
agents/version_2/{evaluations_v1.npz → evals/evaluations_v0.npz}
RENAMED
|
File without changes
|
agents/version_2/{evaluations_v2.npz → evals/evaluations_v1.npz}
RENAMED
|
File without changes
|
agents/version_2/{evaluations_v3.npz → evals/evaluations_v2.npz}
RENAMED
|
File without changes
|
agents/version_2/evals/evaluations_v3.npz
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:456a8cf62175f6e78ef93d29a63a88d5b7d577fc7c34e62bd2ec4390b3cbb86f
|
| 3 |
+
size 4130
|
agents/version_2/version_2-3/events.out.tfevents.1715694677.5da7e309893b.24.0
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f3a00c50b58bf881d2e40e8df0ac03b0a16d0b27b4373f9f8c37d954a5cc8dba
|
| 3 |
+
size 2797090
|
agents/version_2/watch_agent.py
CHANGED
|
@@ -5,23 +5,44 @@ import gymnasium as gym
|
|
| 5 |
|
| 6 |
import argparse
|
| 7 |
|
|
|
|
|
|
|
|
|
|
| 8 |
# This script should have some options
|
| 9 |
# 1. Turn off the stochasticity as determined by the ALEv5
|
| 10 |
# Even if deterministic is set to true in evaluate policy, the environment will ignore this 25% of the time
|
| 11 |
# To compensate for this, we can set the repeat action probability to 0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 12 |
|
| 13 |
parser = argparse.ArgumentParser()
|
| 14 |
parser.add_argument("-r", "--repeat_action_probability", help="repeat action probability", type=float, default=0.25)
|
|
|
|
|
|
|
|
|
|
| 15 |
args = parser.parse_args()
|
| 16 |
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
|
|
|
|
| 21 |
|
| 22 |
# Retrieve the environment
|
| 23 |
-
eval_env = Monitor(gym.make("ALE/Pacman-v5",
|
|
|
|
|
|
|
|
|
|
| 24 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 25 |
# Evaluate the policy
|
| 26 |
mean_rwd, std_rwd = evaluate_policy(loaded_model.policy, eval_env, n_eval_episodes=1)
|
| 27 |
print("mean rwd: ", mean_rwd)
|
|
|
|
| 5 |
|
| 6 |
import argparse
|
| 7 |
|
| 8 |
+
MODEL_NAME = "ALE-Pacman-v5"
|
| 9 |
+
loaded_model = DQN.load(MODEL_NAME)
|
| 10 |
+
|
| 11 |
# This script should have some options
|
| 12 |
# 1. Turn off the stochasticity as determined by the ALEv5
|
| 13 |
# Even if deterministic is set to true in evaluate policy, the environment will ignore this 25% of the time
|
| 14 |
# To compensate for this, we can set the repeat action probability to 0
|
| 15 |
+
# DONE
|
| 16 |
+
# 2. Print out the evaluation metrics or save to file
|
| 17 |
+
# 3. Render in the ALE or not
|
| 18 |
+
# DONE
|
| 19 |
+
# 4. Print the keyword args for the environment? I think this might be helpful...
|
| 20 |
+
# IN PROGRESS
|
| 21 |
+
# 5.
|
| 22 |
|
| 23 |
parser = argparse.ArgumentParser()
|
| 24 |
parser.add_argument("-r", "--repeat_action_probability", help="repeat action probability", type=float, default=0.25)
|
| 25 |
+
parser.add_argument("-f", "--frameskip", help="frameskip", type=int, default=4)
|
| 26 |
+
parser.add_argument("-o", "--observe", help="observe agent", action="store_const", const=True)
|
| 27 |
+
parser.add_argument("-p", "--print", help="print environment information", action="store_const", const=True)
|
| 28 |
args = parser.parse_args()
|
| 29 |
|
| 30 |
+
# Toggle the render mode based on the -o flag
|
| 31 |
+
if args.observe == True:
|
| 32 |
+
mode = "human"
|
| 33 |
+
else:
|
| 34 |
+
mode = "rgb_array"
|
| 35 |
|
| 36 |
# Retrieve the environment
|
| 37 |
+
eval_env = Monitor(gym.make("ALE/Pacman-v5",
|
| 38 |
+
render_mode=mode,
|
| 39 |
+
repeat_action_probability=args.repeat_action_probability,
|
| 40 |
+
frameskip=args.frameskip,))
|
| 41 |
|
| 42 |
+
if args.print == True:
|
| 43 |
+
env_info = str(eval_env.spec).split(", ")
|
| 44 |
+
for item in env_info:
|
| 45 |
+
print(item)
|
| 46 |
# Evaluate the policy
|
| 47 |
mean_rwd, std_rwd = evaluate_policy(loaded_model.policy, eval_env, n_eval_episodes=1)
|
| 48 |
print("mean rwd: ", mean_rwd)
|