ledmands
commited on
Commit
·
ca16748
1
Parent(s):
baeb5f8
Moved the config and watch scripts to the root directory. Split the watch script into two scripts: watch and evaluate.
Browse files- agents/dqn_v2-6/evals.txt +6 -0
- evaluate_agent.py +73 -0
- agents/watch_agent.py → watch_agent.py +12 -10
agents/dqn_v2-6/evals.txt
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
-----
|
| 2 |
+
Evaluation of agents/dqn_v2-6/ALE-Pacman-v5 on 16 May 2024 at 11:24:26 AM
|
| 3 |
+
Episodes evaluated: 1
|
| 4 |
+
mean_rwd: 399.0
|
| 5 |
+
std_rwd: 0.0
|
| 6 |
+
|
evaluate_agent.py
ADDED
|
@@ -0,0 +1,73 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from stable_baselines3 import DQN
|
| 2 |
+
from stable_baselines3.common.evaluation import evaluate_policy
|
| 3 |
+
from stable_baselines3.common.monitor import Monitor
|
| 4 |
+
import gymnasium as gym
|
| 5 |
+
|
| 6 |
+
import argparse
|
| 7 |
+
from datetime import datetime
|
| 8 |
+
|
| 9 |
+
|
| 10 |
+
# This script should have some options
|
| 11 |
+
# 1. Turn off the stochasticity as determined by the ALEv5
|
| 12 |
+
# Even if deterministic is set to true in evaluate policy, the environment will ignore this 25% of the time
|
| 13 |
+
# To compensate for this, we can set the repeat action probability to 0
|
| 14 |
+
# DONE
|
| 15 |
+
# 2. Print out the evaluation metrics or save to file
|
| 16 |
+
# DONE
|
| 17 |
+
# 4. Print the keyword args for the environment? I think this might be helpful...
|
| 18 |
+
# DONE (ish), printing the environment specifications.
|
| 19 |
+
# 5. Add option flag to accept file path for model
|
| 20 |
+
# DONE
|
| 21 |
+
# 6. Add option flag to accept number of episodes
|
| 22 |
+
# DONE
|
| 23 |
+
# 7. Save evaluations in a log file
|
| 24 |
+
# DONE
|
| 25 |
+
# 8. Add option flag for mean rewards/length or discrete rewards/lengths
|
| 26 |
+
# IN PROGRESS
|
| 27 |
+
|
| 28 |
+
parser = argparse.ArgumentParser()
|
| 29 |
+
parser.add_argument("-r", "--repeat_action_probability", help="repeat action probability, default 0.25", type=float, default=0.25)
|
| 30 |
+
parser.add_argument("-f", "--frameskip", help="frameskip, default 4", type=int, default=4)
|
| 31 |
+
# parser.add_argument("-o", "--observe", help="observe agent", action="store_const", const=True)
|
| 32 |
+
parser.add_argument("-p", "--print", help="print environment information", action="store_const", const=True)
|
| 33 |
+
parser.add_argument("-e", "--num_episodes", help="specify the number of episodes to evaluate, default 1", type=int, default=1)
|
| 34 |
+
parser.add_argument("-a", "--agent_filepath", help="file path to agent to watch, minus the .zip extension", type=str, required=True)
|
| 35 |
+
# parser.add_argument("-s", "--savefile", help="Specify a filepath to save the evaluation metrics.", type=str, default="evals")
|
| 36 |
+
args = parser.parse_args()
|
| 37 |
+
|
| 38 |
+
model_name = args.agent_filepath
|
| 39 |
+
model = DQN.load(model_name)
|
| 40 |
+
# There should really be a condition here to catch input defining directories with forward slashes
|
| 41 |
+
dirs = model_name.split("/")
|
| 42 |
+
# remove the last item, as it is the zip file
|
| 43 |
+
dirs.pop()
|
| 44 |
+
model_dir = "/".join(dirs)
|
| 45 |
+
print(type(model_dir))
|
| 46 |
+
print(model_dir)
|
| 47 |
+
|
| 48 |
+
# Retrieve the environment
|
| 49 |
+
eval_env = Monitor(gym.make("ALE/Pacman-v5",
|
| 50 |
+
render_mode="rgb_array",
|
| 51 |
+
repeat_action_probability=args.repeat_action_probability,
|
| 52 |
+
frameskip=args.frameskip))
|
| 53 |
+
|
| 54 |
+
if args.print == True:
|
| 55 |
+
env_info = str(eval_env.spec).split(", ")
|
| 56 |
+
for item in env_info:
|
| 57 |
+
print(item)
|
| 58 |
+
# Evaluate the policy
|
| 59 |
+
# Toggle the mean or discrete evaluations here
|
| 60 |
+
mean_rwd, std_rwd = evaluate_policy(model.policy, eval_env, n_eval_episodes=args.num_episodes)
|
| 61 |
+
|
| 62 |
+
# savefile = args.savefile
|
| 63 |
+
savefile = model_dir + "/evals"
|
| 64 |
+
date = datetime.now().strftime("%d %b %Y")
|
| 65 |
+
time = datetime.now().strftime("%I:%M:%S %p")
|
| 66 |
+
|
| 67 |
+
with open(f"{savefile}.txt", "a") as file:
|
| 68 |
+
file.write("-----\n")
|
| 69 |
+
file.write(f"Evaluation of {model_name} on {date} at {time}\n")
|
| 70 |
+
file.write(f"Episodes evaluated: {args.num_episodes}\n")
|
| 71 |
+
file.write(f"mean_rwd: {mean_rwd}\n")
|
| 72 |
+
file.write(f"std_rwd: {std_rwd}\n\n")
|
| 73 |
+
|
agents/watch_agent.py → watch_agent.py
RENAMED
|
@@ -24,7 +24,7 @@ import argparse
|
|
| 24 |
parser = argparse.ArgumentParser()
|
| 25 |
parser.add_argument("-r", "--repeat_action_probability", help="repeat action probability, default 0.25", type=float, default=0.25)
|
| 26 |
parser.add_argument("-f", "--frameskip", help="frameskip, default 4", type=int, default=4)
|
| 27 |
-
parser.add_argument("-o", "--observe", help="observe agent", action="store_const", const=True)
|
| 28 |
parser.add_argument("-p", "--print", help="print environment information", action="store_const", const=True)
|
| 29 |
parser.add_argument("-e", "--num_episodes", help="specify the number of episodes to evaluate, default 1", type=int, default=1)
|
| 30 |
parser.add_argument("-a", "--agent_filepath", help="file path to agent to watch, minus the .zip extension", type=str, required=True)
|
|
@@ -34,14 +34,14 @@ MODEL_NAME = args.agent_filepath
|
|
| 34 |
loaded_model = DQN.load(MODEL_NAME)
|
| 35 |
|
| 36 |
# Toggle the render mode based on the -o flag
|
| 37 |
-
if args.observe == True:
|
| 38 |
-
|
| 39 |
-
else:
|
| 40 |
-
|
| 41 |
|
| 42 |
# Retrieve the environment
|
| 43 |
eval_env = Monitor(gym.make("ALE/Pacman-v5",
|
| 44 |
-
render_mode=
|
| 45 |
repeat_action_probability=args.repeat_action_probability,
|
| 46 |
frameskip=args.frameskip,))
|
| 47 |
|
|
@@ -50,7 +50,9 @@ if args.print == True:
|
|
| 50 |
for item in env_info:
|
| 51 |
print(item)
|
| 52 |
# Evaluate the policy
|
| 53 |
-
mean_rwd, std_rwd =
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
print("
|
|
|
|
|
|
|
|
|
| 24 |
parser = argparse.ArgumentParser()
|
| 25 |
parser.add_argument("-r", "--repeat_action_probability", help="repeat action probability, default 0.25", type=float, default=0.25)
|
| 26 |
parser.add_argument("-f", "--frameskip", help="frameskip, default 4", type=int, default=4)
|
| 27 |
+
# parser.add_argument("-o", "--observe", help="observe agent", action="store_const", const=True)
|
| 28 |
parser.add_argument("-p", "--print", help="print environment information", action="store_const", const=True)
|
| 29 |
parser.add_argument("-e", "--num_episodes", help="specify the number of episodes to evaluate, default 1", type=int, default=1)
|
| 30 |
parser.add_argument("-a", "--agent_filepath", help="file path to agent to watch, minus the .zip extension", type=str, required=True)
|
|
|
|
| 34 |
loaded_model = DQN.load(MODEL_NAME)
|
| 35 |
|
| 36 |
# Toggle the render mode based on the -o flag
|
| 37 |
+
# if args.observe == True:
|
| 38 |
+
# mode = "human"
|
| 39 |
+
# else:
|
| 40 |
+
# mode = "rgb_array"
|
| 41 |
|
| 42 |
# Retrieve the environment
|
| 43 |
eval_env = Monitor(gym.make("ALE/Pacman-v5",
|
| 44 |
+
render_mode="human",
|
| 45 |
repeat_action_probability=args.repeat_action_probability,
|
| 46 |
frameskip=args.frameskip,))
|
| 47 |
|
|
|
|
| 50 |
for item in env_info:
|
| 51 |
print(item)
|
| 52 |
# Evaluate the policy
|
| 53 |
+
# mean_rwd, std_rwd =
|
| 54 |
+
|
| 55 |
+
evaluate_policy(loaded_model.policy, eval_env, n_eval_episodes=args.num_episodes)
|
| 56 |
+
# print("eval episodes: ", args.num_episodes)
|
| 57 |
+
# print("mean rwd: ", mean_rwd)
|
| 58 |
+
# print("std rwd: ", std_rwd)
|