semantic_rl / train_procgen /single_graph.py
leonepson's picture
Upload 254 files
5960497 verified
from single_graph_util import plot_experiment, switch_to_outer_plot
from constants import ENV_NAMES, NAME_TO_CASE, EASY_GAME_RANGES, HARD_GAME_RANGES
import matplotlib
import matplotlib.pyplot as plt
import argparse
from utils import str2bool
import os
def main():
parser = argparse.ArgumentParser()
parser.add_argument('--env_name', type=str, default='coinrun')
parser.add_argument('--distribution_mode', type=str, default='easy',
help="Environment distribution_mode ('easy' or 'hard')")
parser.add_argument('--normalize_and_reduce', dest='normalize_and_reduce', action='store_true')
parser.add_argument('--restrict_training_set', type=str2bool, default=False,
help='True for 200 levels; False for all distribution')
parser.add_argument('--save', dest='save', action='store_false')
args = parser.parse_args()
run_directory_prefix = main_pcg_sample_entry(args.env_name, args.distribution_mode, args.normalize_and_reduce,
args.restrict_training_set)
plt.rcParams.update({'font.size': 20})
plt.tight_layout()
if args.save:
if not os.path.isdir('figures'):
os.makedirs('figures')
suffix = '-mean' if args.normalize_and_reduce else ''
plt.savefig(f'figures/{run_directory_prefix}{suffix}.png')
plt.show()
else:
plt.show()
def main_pcg_sample_entry(env_name, distribution_mode, normalize_and_reduce, restrict_training_set):
params = {
'xtick.labelsize': 12,
'ytick.labelsize': 12,
'axes.titlesize': 16,
'axes.labelsize': 24,
'legend.fontsize': 18,
'figure.figsize': [9, 9]
}
matplotlib.rcParams.update(params)
kwargs = {'smoothing': .9}
if distribution_mode == 'easy':
kwargs[
'x_scale'] = 1 * 256 * 64 / 1e6 # num_workers * num_steps_per_rollout * num_envs_per_worker / graph_scaling
num_train_levels = 200
normalization_ranges = EASY_GAME_RANGES
elif distribution_mode == 'hard':
kwargs[
'x_scale'] = 4 * 256 * 64 / 1e6 # num_workers * num_steps_per_rollout * num_envs_per_worker / graph_scaling
num_train_levels = 500
normalization_ranges = HARD_GAME_RANGES
else:
assert False, "specify distribution_mode as 'easy' or 'hard'"
y_label = 'Score'
x_label = 'Timesteps (Millions)'
run_directory_prefix = f"{env_name}_{distribution_mode}_{num_train_levels if restrict_training_set else '0'}"
kwargs['run_directory_prefix'] = f"{run_directory_prefix}"
# We throw out the first few datapoints to give the episodic reward buffers time to fill up
# Otherwise, there could be a short-episode bias
kwargs['env_name'] = env_name
kwargs['first_valid'] = 10
if restrict_training_set:
kwargs['suffixes'] = ['', 'rank001']
else:
kwargs['suffixes'] = ['']
if normalize_and_reduce:
kwargs['normalization_ranges'] = normalization_ranges
y_label = 'Mean Normalized Score'
fig, axarr = plot_experiment(**kwargs)
if normalize_and_reduce:
axarr.set_xlabel(x_label, labelpad=10)
axarr.set_ylabel(y_label, labelpad=10)
else:
ax0 = switch_to_outer_plot(fig)
ax0.set_xlabel(x_label, labelpad=40)
ax0.set_ylabel(y_label, labelpad=35)
return run_directory_prefix
if __name__ == '__main__':
main()