| { |
| "---- Shared parameters ---": "----------------", |
| "gamma": 0.99, |
| "replay_buffer_size": 1000000, |
| "n_initial_samples": 20000, |
| "n_epochs": 200, |
| "n_training_steps_per_epoch": 250000, |
| "n_training_steps_per_online_update": 4, |
| "horizon": 27000, |
| "starting_eps": 1, |
| "ending_eps": 0.01, |
| "duration_eps": 250000, |
| "batch_size": 32, |
| "---- i-DQN ---": "----------------------------", |
| "idqn_learning_rate": 6.25e-5, |
| "idqn_optimizer_eps": 1.5e-4, |
| "idqn_n_step_return": 1, |
| "idqn_n_training_steps_per_target_update": 30, |
| "idqn_n_training_steps_per_window_shift": 6000, |
| "idqn_head_behaviorial_policy": "uniform", |
| "idqn_shared_network": true, |
| "---- i-IQN ---": "----------------------------", |
| "iiqn_learning_rate": 0.00005, |
| "iiqn_optimizer_eps": 0.0003125, |
| "iiqn_n_step_return": 3, |
| "iiqn_n_training_steps_per_target_update": 30, |
| "iiqn_n_training_steps_per_window_shift": 6000, |
| "iiqn_head_behaviorial_policy": "uniform", |
| "iiqn_n_quantiles_policy": 32, |
| "iiqn_n_quantiles": 64, |
| "iiqn_n_quantiles_target": 64, |
| "iiqn_shared_network": true |
| } |