| { | |
| "env_config": { | |
| "env_id": "Reacher-v5", | |
| "env_kwargs": {}, | |
| "max_steps": null, | |
| "normalize_obs": false, | |
| "use_image": false, | |
| "vector_env_num": 6, | |
| "use_multi_processing": true, | |
| "image_shape": null, | |
| "frame_stack": 1, | |
| "frame_skip": 1, | |
| "training_render_mode": null | |
| }, | |
| "device": "cpu", | |
| "learning_rate": 0.0003, | |
| "gamma": 0.995, | |
| "checkpoint_pathname": "", | |
| "max_grad_norm": 0.5, | |
| "log_interval": 100, | |
| "eval_episodes": 50, | |
| "eval_random_seed": 42, | |
| "eval_video_num": 10, | |
| "total_steps": 60000, | |
| "hidden_sizes": [ | |
| 64, | |
| 64 | |
| ], | |
| "use_layer_norm": false, | |
| "critic_lr": 0.0003, | |
| "replay_buffer_capacity": 12000, | |
| "batch_size": 256, | |
| "update_start_step": 5000, | |
| "alpha": 0.2, | |
| "auto_tune_alpha": true, | |
| "alpha_lr": 0.0003, | |
| "target_entropy": -2.0, | |
| "tau": 0.005, | |
| "max_action": 1.0, | |
| "log_std_min": -7.0, | |
| "log_std_max": 2.0, | |
| "sac_update_interval": 1, | |
| "model_update_interval": 250, | |
| "update_num_per_epoch": 1, | |
| "model_based_config": { | |
| "num_models": 3, | |
| "model_hidden_sizes": [ | |
| 256, | |
| 256 | |
| ], | |
| "done_threshold": 0.5, | |
| "log_std_bounds": [ | |
| -5.0, | |
| 2.0 | |
| ], | |
| "eps": 1e-06, | |
| "train": { | |
| "epochs": 20, | |
| "batch_size": 256, | |
| "lr": 0.001, | |
| "weight_decay": 1e-06, | |
| "loss_weight_delta": 1.0, | |
| "loss_weight_reward": 1.0, | |
| "loss_weight_done": 1.0, | |
| "bootstrap": true, | |
| "buffer_ratio_for_val": 0.1, | |
| "early_stop_patience": 6, | |
| "dataloader_num_workers": 0, | |
| "dataloader_pin_memory": false | |
| } | |
| }, | |
| "model_rollout_config": { | |
| "rollout_num": 10, | |
| "rollout_len": { | |
| "_type": "LinearSchedule", | |
| "_module": "practice.utils_for_coding.scheduler_utils", | |
| "_v0": 1, | |
| "_v1": 4, | |
| "_t0": 0, | |
| "_t1": 48000, | |
| "_duration": 48000, | |
| "_v_diff": 3 | |
| }, | |
| "replay_buffer_capacity": 24000, | |
| "batch_rate_of_sample": { | |
| "_type": "LinearSchedule", | |
| "_module": "practice.utils_for_coding.scheduler_utils", | |
| "_v0": 0.15, | |
| "_v1": 0.3, | |
| "_t0": 0, | |
| "_t1": 48000, | |
| "_duration": 48000, | |
| "_v_diff": 0.15 | |
| } | |
| } | |
| } |