File size: 3,478 Bytes
from single_graph_util import plot_experiment, switch_to_outer_plot
from constants import ENV_NAMES, NAME_TO_CASE, EASY_GAME_RANGES, HARD_GAME_RANGES

import matplotlib
import matplotlib.pyplot as plt

import argparse
from utils import str2bool
import os


def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--env_name', type=str, default='coinrun')
    parser.add_argument('--distribution_mode', type=str, default='easy',
                        help="Environment distribution_mode ('easy' or 'hard')")
    parser.add_argument('--normalize_and_reduce', dest='normalize_and_reduce', action='store_true')
    parser.add_argument('--restrict_training_set', type=str2bool, default=False,
                        help='True for 200 levels; False for all distribution')
    parser.add_argument('--save', dest='save', action='store_false')
    args = parser.parse_args()

    run_directory_prefix = main_pcg_sample_entry(args.env_name, args.distribution_mode, args.normalize_and_reduce,
                                                 args.restrict_training_set)
    plt.rcParams.update({'font.size': 20})
    plt.tight_layout()

    if args.save:
        if not os.path.isdir('figures'):
            os.makedirs('figures')

        suffix = '-mean' if args.normalize_and_reduce else ''
        plt.savefig(f'figures/{run_directory_prefix}{suffix}.png')
        plt.show()
    else:
        plt.show()


def main_pcg_sample_entry(env_name, distribution_mode, normalize_and_reduce, restrict_training_set):
    params = {
        'xtick.labelsize': 12,
        'ytick.labelsize': 12,
        'axes.titlesize': 16,
        'axes.labelsize': 24,
        'legend.fontsize': 18,
        'figure.figsize': [9, 9]
    }
    matplotlib.rcParams.update(params)

    kwargs = {'smoothing': .9}

    if distribution_mode == 'easy':
        kwargs[
            'x_scale'] = 1 * 256 * 64 / 1e6  # num_workers * num_steps_per_rollout * num_envs_per_worker / graph_scaling
        num_train_levels = 200
        normalization_ranges = EASY_GAME_RANGES
    elif distribution_mode == 'hard':
        kwargs[
            'x_scale'] = 4 * 256 * 64 / 1e6  # num_workers * num_steps_per_rollout * num_envs_per_worker / graph_scaling
        num_train_levels = 500
        normalization_ranges = HARD_GAME_RANGES
    else:
        assert False, "specify distribution_mode as 'easy' or 'hard'"

    y_label = 'Score'
    x_label = 'Timesteps (Millions)'

    run_directory_prefix = f"{env_name}_{distribution_mode}_{num_train_levels if restrict_training_set else '0'}"
    kwargs['run_directory_prefix'] = f"{run_directory_prefix}"

    # We throw out the first few datapoints to give the episodic reward buffers time to fill up
    # Otherwise, there could be a short-episode bias
    kwargs['env_name'] = env_name
    kwargs['first_valid'] = 10

    if restrict_training_set:
        kwargs['suffixes'] = ['', 'rank001']
    else:
        kwargs['suffixes'] = ['']

    if normalize_and_reduce:
        kwargs['normalization_ranges'] = normalization_ranges
        y_label = 'Mean Normalized Score'

    fig, axarr = plot_experiment(**kwargs)

    if normalize_and_reduce:
        axarr.set_xlabel(x_label, labelpad=10)
        axarr.set_ylabel(y_label, labelpad=10)
    else:
        ax0 = switch_to_outer_plot(fig)
        ax0.set_xlabel(x_label, labelpad=40)
        ax0.set_ylabel(y_label, labelpad=35)

    return run_directory_prefix


if __name__ == '__main__':
    main()