|
|
CartPole-v1: &cartpole-defaults |
|
|
n_timesteps: !!float 1e5 |
|
|
env_hyperparams: |
|
|
n_envs: 8 |
|
|
algo_hyperparams: |
|
|
n_steps: 32 |
|
|
batch_size: 256 |
|
|
n_epochs: 20 |
|
|
gae_lambda: 0.8 |
|
|
gamma: 0.98 |
|
|
ent_coef: 0.0 |
|
|
learning_rate: 0.001 |
|
|
learning_rate_decay: linear |
|
|
clip_range: 0.2 |
|
|
clip_range_decay: linear |
|
|
eval_params: |
|
|
step_freq: !!float 2.5e4 |
|
|
|
|
|
CartPole-v0: |
|
|
<<: *cartpole-defaults |
|
|
n_timesteps: !!float 5e4 |
|
|
|
|
|
MountainCar-v0: |
|
|
n_timesteps: !!float 1e6 |
|
|
env_hyperparams: |
|
|
normalize: true |
|
|
n_envs: 16 |
|
|
algo_hyperparams: |
|
|
n_steps: 16 |
|
|
n_epochs: 4 |
|
|
gae_lambda: 0.98 |
|
|
gamma: 0.99 |
|
|
ent_coef: 0.0 |
|
|
|
|
|
MountainCarContinuous-v0: |
|
|
n_timesteps: !!float 1e5 |
|
|
env_hyperparams: |
|
|
normalize: true |
|
|
n_envs: 4 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
algo_hyperparams: |
|
|
n_steps: 512 |
|
|
batch_size: 256 |
|
|
n_epochs: 10 |
|
|
learning_rate: !!float 7.77e-5 |
|
|
ent_coef: 0.01 |
|
|
ent_coef_decay: linear |
|
|
clip_range: 0.1 |
|
|
gae_lambda: 0.9 |
|
|
max_grad_norm: 5 |
|
|
vf_coef: 0.19 |
|
|
eval_params: |
|
|
step_freq: 5000 |
|
|
|
|
|
Acrobot-v1: |
|
|
n_timesteps: !!float 1e6 |
|
|
env_hyperparams: |
|
|
n_envs: 16 |
|
|
normalize: true |
|
|
algo_hyperparams: |
|
|
n_steps: 256 |
|
|
n_epochs: 4 |
|
|
gae_lambda: 0.94 |
|
|
gamma: 0.99 |
|
|
ent_coef: 0.0 |
|
|
|
|
|
LunarLander-v2: |
|
|
n_timesteps: !!float 1e6 |
|
|
env_hyperparams: |
|
|
n_envs: 16 |
|
|
algo_hyperparams: |
|
|
n_steps: 1024 |
|
|
batch_size: 64 |
|
|
n_epochs: 4 |
|
|
gae_lambda: 0.98 |
|
|
gamma: 0.999 |
|
|
ent_coef: 0.01 |
|
|
ent_coef_decay: linear |
|
|
normalize_advantage: false |
|
|
|
|
|
CarRacing-v0: &carracing-defaults |
|
|
n_timesteps: !!float 4e6 |
|
|
env_hyperparams: |
|
|
n_envs: 8 |
|
|
frame_stack: 4 |
|
|
policy_hyperparams: &carracing-policy-defaults |
|
|
use_sde: true |
|
|
log_std_init: -2 |
|
|
init_layers_orthogonal: false |
|
|
activation_fn: relu |
|
|
share_features_extractor: false |
|
|
cnn_feature_dim: 256 |
|
|
hidden_sizes: [256] |
|
|
algo_hyperparams: |
|
|
n_steps: 512 |
|
|
batch_size: 128 |
|
|
n_epochs: 10 |
|
|
learning_rate: !!float 1e-4 |
|
|
learning_rate_decay: linear |
|
|
gamma: 0.99 |
|
|
gae_lambda: 0.95 |
|
|
ent_coef: 0.0 |
|
|
sde_sample_freq: 4 |
|
|
max_grad_norm: 0.5 |
|
|
vf_coef: 0.5 |
|
|
clip_range: 0.2 |
|
|
|
|
|
impala-CarRacing-v0: |
|
|
<<: *carracing-defaults |
|
|
env_id: CarRacing-v0 |
|
|
policy_hyperparams: |
|
|
<<: *carracing-policy-defaults |
|
|
cnn_style: impala |
|
|
init_layers_orthogonal: true |
|
|
cnn_layers_init_orthogonal: false |
|
|
hidden_sizes: [] |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
_atari: &atari-defaults |
|
|
n_timesteps: !!float 1e7 |
|
|
env_hyperparams: &atari-env-defaults |
|
|
n_envs: 8 |
|
|
frame_stack: 4 |
|
|
no_reward_timeout_steps: 1000 |
|
|
no_reward_fire_steps: 500 |
|
|
vec_env_class: subproc |
|
|
policy_hyperparams: &atari-policy-defaults |
|
|
activation_fn: relu |
|
|
algo_hyperparams: |
|
|
n_steps: 128 |
|
|
batch_size: 256 |
|
|
n_epochs: 4 |
|
|
learning_rate: !!float 2.5e-4 |
|
|
learning_rate_decay: linear |
|
|
clip_range: 0.1 |
|
|
clip_range_decay: linear |
|
|
vf_coef: 0.5 |
|
|
ent_coef: 0.01 |
|
|
eval_params: |
|
|
deterministic: false |
|
|
|
|
|
debug-PongNoFrameskip-v4: |
|
|
<<: *atari-defaults |
|
|
device: cpu |
|
|
env_id: PongNoFrameskip-v4 |
|
|
env_hyperparams: |
|
|
<<: *atari-env-defaults |
|
|
vec_env_class: dummy |
|
|
|
|
|
_impala-atari: &impala-atari-defaults |
|
|
<<: *atari-defaults |
|
|
policy_hyperparams: |
|
|
<<: *atari-policy-defaults |
|
|
cnn_style: impala |
|
|
cnn_feature_dim: 256 |
|
|
init_layers_orthogonal: true |
|
|
cnn_layers_init_orthogonal: false |
|
|
|
|
|
impala-PongNoFrameskip-v4: |
|
|
<<: *impala-atari-defaults |
|
|
env_id: PongNoFrameskip-v4 |
|
|
|
|
|
impala-BreakoutNoFrameskip-v4: |
|
|
<<: *impala-atari-defaults |
|
|
env_id: BreakoutNoFrameskip-v4 |
|
|
|
|
|
impala-SpaceInvadersNoFrameskip-v4: |
|
|
<<: *impala-atari-defaults |
|
|
env_id: SpaceInvadersNoFrameskip-v4 |
|
|
|
|
|
impala-QbertNoFrameskip-v4: |
|
|
<<: *impala-atari-defaults |
|
|
env_id: QbertNoFrameskip-v4 |
|
|
|
|
|
HalfCheetahBulletEnv-v0: &pybullet-defaults |
|
|
n_timesteps: !!float 2e6 |
|
|
env_hyperparams: &pybullet-env-defaults |
|
|
n_envs: 16 |
|
|
normalize: true |
|
|
policy_hyperparams: &pybullet-policy-defaults |
|
|
pi_hidden_sizes: [256, 256] |
|
|
v_hidden_sizes: [256, 256] |
|
|
activation_fn: relu |
|
|
algo_hyperparams: &pybullet-algo-defaults |
|
|
n_steps: 512 |
|
|
batch_size: 128 |
|
|
n_epochs: 20 |
|
|
gamma: 0.99 |
|
|
gae_lambda: 0.9 |
|
|
ent_coef: 0.0 |
|
|
max_grad_norm: 0.5 |
|
|
vf_coef: 0.5 |
|
|
learning_rate: !!float 3e-5 |
|
|
clip_range: 0.4 |
|
|
|
|
|
AntBulletEnv-v0: |
|
|
<<: *pybullet-defaults |
|
|
policy_hyperparams: |
|
|
<<: *pybullet-policy-defaults |
|
|
algo_hyperparams: |
|
|
<<: *pybullet-algo-defaults |
|
|
|
|
|
Walker2DBulletEnv-v0: |
|
|
<<: *pybullet-defaults |
|
|
algo_hyperparams: |
|
|
<<: *pybullet-algo-defaults |
|
|
clip_range_decay: linear |
|
|
|
|
|
HopperBulletEnv-v0: |
|
|
<<: *pybullet-defaults |
|
|
algo_hyperparams: |
|
|
<<: *pybullet-algo-defaults |
|
|
clip_range_decay: linear |
|
|
|
|
|
HumanoidBulletEnv-v0: |
|
|
<<: *pybullet-defaults |
|
|
n_timesteps: !!float 1e7 |
|
|
env_hyperparams: |
|
|
<<: *pybullet-env-defaults |
|
|
n_envs: 8 |
|
|
policy_hyperparams: |
|
|
<<: *pybullet-policy-defaults |
|
|
|
|
|
algo_hyperparams: |
|
|
<<: *pybullet-algo-defaults |
|
|
n_steps: 2048 |
|
|
batch_size: 64 |
|
|
n_epochs: 10 |
|
|
gae_lambda: 0.95 |
|
|
learning_rate: !!float 2.5e-4 |
|
|
clip_range: 0.2 |
|
|
|
|
|
_procgen: &procgen-defaults |
|
|
env_hyperparams: &procgen-env-defaults |
|
|
is_procgen: true |
|
|
n_envs: 64 |
|
|
|
|
|
|
|
|
normalize: true |
|
|
policy_hyperparams: &procgen-policy-defaults |
|
|
activation_fn: relu |
|
|
cnn_style: impala |
|
|
cnn_feature_dim: 256 |
|
|
init_layers_orthogonal: true |
|
|
cnn_layers_init_orthogonal: false |
|
|
algo_hyperparams: &procgen-algo-defaults |
|
|
gamma: 0.999 |
|
|
gae_lambda: 0.95 |
|
|
n_steps: 256 |
|
|
batch_size: 2048 |
|
|
n_epochs: 3 |
|
|
ent_coef: 0.01 |
|
|
clip_range: 0.2 |
|
|
|
|
|
clip_range_vf: 0.2 |
|
|
learning_rate: !!float 5e-4 |
|
|
|
|
|
vf_coef: 0.5 |
|
|
eval_params: &procgen-eval-defaults |
|
|
ignore_first_episode: true |
|
|
|
|
|
step_freq: !!float 1e5 |
|
|
|
|
|
_procgen-easy: &procgen-easy-defaults |
|
|
<<: *procgen-defaults |
|
|
n_timesteps: !!float 25e6 |
|
|
env_hyperparams: &procgen-easy-env-defaults |
|
|
<<: *procgen-env-defaults |
|
|
make_kwargs: |
|
|
distribution_mode: easy |
|
|
|
|
|
procgen-coinrun-easy: &coinrun-easy-defaults |
|
|
<<: *procgen-easy-defaults |
|
|
env_id: coinrun |
|
|
|
|
|
debug-procgen-coinrun: |
|
|
<<: *coinrun-easy-defaults |
|
|
device: cpu |
|
|
|
|
|
procgen-starpilot-easy: |
|
|
<<: *procgen-easy-defaults |
|
|
env_id: starpilot |
|
|
|
|
|
procgen-bossfight-easy: |
|
|
<<: *procgen-easy-defaults |
|
|
env_id: bossfight |
|
|
|
|
|
procgen-bigfish-easy: |
|
|
<<: *procgen-easy-defaults |
|
|
env_id: bigfish |
|
|
|
|
|
_procgen-hard: &procgen-hard-defaults |
|
|
<<: *procgen-defaults |
|
|
n_timesteps: !!float 200e6 |
|
|
env_hyperparams: &procgen-hard-env-defaults |
|
|
<<: *procgen-env-defaults |
|
|
n_envs: 256 |
|
|
make_kwargs: |
|
|
distribution_mode: hard |
|
|
algo_hyperparams: |
|
|
<<: *procgen-algo-defaults |
|
|
batch_size: 8192 |
|
|
eval_params: |
|
|
<<: *procgen-eval-defaults |
|
|
step_freq: !!float 5e5 |
|
|
|
|
|
procgen-starpilot-hard: &procgen-starpilot-hard-defaults |
|
|
<<: *procgen-hard-defaults |
|
|
env_id: starpilot |
|
|
|
|
|
procgen-starpilot-hard-2xIMPALA: |
|
|
<<: *procgen-starpilot-hard-defaults |
|
|
policy_hyperparams: |
|
|
<<: *procgen-policy-defaults |
|
|
impala_channels: [32, 64, 64] |
|
|
|
|
|
procgen-starpilot-hard-2xIMPALA-fat: |
|
|
<<: *procgen-starpilot-hard-defaults |
|
|
policy_hyperparams: |
|
|
<<: *procgen-policy-defaults |
|
|
impala_channels: [32, 64, 64] |
|
|
cnn_feature_dim: 512 |
|
|
|
|
|
procgen-starpilot-hard-4xIMPALA: |
|
|
<<: *procgen-starpilot-hard-defaults |
|
|
policy_hyperparams: |
|
|
<<: *procgen-policy-defaults |
|
|
impala_channels: [64, 128, 128] |
|
|
|