leonepson
/

semantic_rl

Reinforcement Learning

interpretablity

Model card Files Files and versions

semantic_rl / train_procgen /single_graph.py

leonepson's picture

Upload 254 files

5960497 verified 3 months ago

history blame contribute delete

3.48 kB

	from single_graph_util import plot_experiment, switch_to_outer_plot
	from constants import ENV_NAMES, NAME_TO_CASE, EASY_GAME_RANGES, HARD_GAME_RANGES

	import matplotlib
	import matplotlib.pyplot as plt

	import argparse
	from utils import str2bool
	import os


	def main():
	parser = argparse.ArgumentParser()
	parser.add_argument('--env_name', type=str, default='coinrun')
	parser.add_argument('--distribution_mode', type=str, default='easy',
	help="Environment distribution_mode ('easy' or 'hard')")
	parser.add_argument('--normalize_and_reduce', dest='normalize_and_reduce', action='store_true')
	parser.add_argument('--restrict_training_set', type=str2bool, default=False,
	help='True for 200 levels; False for all distribution')
	parser.add_argument('--save', dest='save', action='store_false')
	args = parser.parse_args()

	run_directory_prefix = main_pcg_sample_entry(args.env_name, args.distribution_mode, args.normalize_and_reduce,
	args.restrict_training_set)
	plt.rcParams.update({'font.size': 20})
	plt.tight_layout()

	if args.save:
	if not os.path.isdir('figures'):
	os.makedirs('figures')

	suffix = '-mean' if args.normalize_and_reduce else ''
	plt.savefig(f'figures/{run_directory_prefix}{suffix}.png')
	plt.show()
	else:
	plt.show()


	def main_pcg_sample_entry(env_name, distribution_mode, normalize_and_reduce, restrict_training_set):
	params = {
	'xtick.labelsize': 12,
	'ytick.labelsize': 12,
	'axes.titlesize': 16,
	'axes.labelsize': 24,
	'legend.fontsize': 18,
	'figure.figsize': [9, 9]
	}
	matplotlib.rcParams.update(params)

	kwargs = {'smoothing': .9}

	if distribution_mode == 'easy':
	kwargs[
	'x_scale'] = 1 * 256 * 64 / 1e6 # num_workers * num_steps_per_rollout * num_envs_per_worker / graph_scaling
	num_train_levels = 200
	normalization_ranges = EASY_GAME_RANGES
	elif distribution_mode == 'hard':
	kwargs[
	'x_scale'] = 4 * 256 * 64 / 1e6 # num_workers * num_steps_per_rollout * num_envs_per_worker / graph_scaling
	num_train_levels = 500
	normalization_ranges = HARD_GAME_RANGES
	else:
	assert False, "specify distribution_mode as 'easy' or 'hard'"

	y_label = 'Score'
	x_label = 'Timesteps (Millions)'

	run_directory_prefix = f"{env_name}_{distribution_mode}_{num_train_levels if restrict_training_set else '0'}"
	kwargs['run_directory_prefix'] = f"{run_directory_prefix}"

	# We throw out the first few datapoints to give the episodic reward buffers time to fill up
	# Otherwise, there could be a short-episode bias
	kwargs['env_name'] = env_name
	kwargs['first_valid'] = 10

	if restrict_training_set:
	kwargs['suffixes'] = ['', 'rank001']
	else:
	kwargs['suffixes'] = ['']

	if normalize_and_reduce:
	kwargs['normalization_ranges'] = normalization_ranges
	y_label = 'Mean Normalized Score'

	fig, axarr = plot_experiment(**kwargs)

	if normalize_and_reduce:
	axarr.set_xlabel(x_label, labelpad=10)
	axarr.set_ylabel(y_label, labelpad=10)
	else:
	ax0 = switch_to_outer_plot(fig)
	ax0.set_xlabel(x_label, labelpad=40)
	ax0.set_ylabel(y_label, labelpad=35)

	return run_directory_prefix


	if __name__ == '__main__':
	main()