{ "env_config": { "env_id": "Reacher-v5", "env_kwargs": {}, "max_steps": null, "normalize_obs": false, "use_image": false, "vector_env_num": 6, "use_multi_processing": true, "image_shape": null, "frame_stack": 1, "frame_skip": 1, "training_render_mode": null }, "device": "cpu", "learning_rate": 0.0003, "gamma": 0.995, "checkpoint_pathname": "", "max_grad_norm": 0.5, "log_interval": 100, "eval_episodes": 50, "eval_random_seed": 42, "eval_video_num": 10, "total_steps": 60000, "hidden_sizes": [ 64, 64 ], "use_layer_norm": false, "critic_lr": 0.0003, "replay_buffer_capacity": 12000, "batch_size": 256, "update_start_step": 5000, "alpha": 0.2, "auto_tune_alpha": true, "alpha_lr": 0.0003, "target_entropy": -2.0, "tau": 0.005, "max_action": 1.0, "log_std_min": -7.0, "log_std_max": 2.0, "sac_update_interval": 1, "model_update_interval": 250, "update_num_per_epoch": 1, "model_based_config": { "num_models": 3, "model_hidden_sizes": [ 256, 256 ], "done_threshold": 0.5, "log_std_bounds": [ -5.0, 2.0 ], "eps": 1e-06, "train": { "epochs": 20, "batch_size": 256, "lr": 0.001, "weight_decay": 1e-06, "loss_weight_delta": 1.0, "loss_weight_reward": 1.0, "loss_weight_done": 1.0, "bootstrap": true, "buffer_ratio_for_val": 0.1, "early_stop_patience": 6, "dataloader_num_workers": 0, "dataloader_pin_memory": false } }, "model_rollout_config": { "rollout_num": 10, "rollout_len": { "_type": "LinearSchedule", "_module": "practice.utils_for_coding.scheduler_utils", "_v0": 1, "_v1": 4, "_t0": 0, "_t1": 48000, "_duration": 48000, "_v_diff": 3 }, "replay_buffer_capacity": 24000, "batch_rate_of_sample": { "_type": "LinearSchedule", "_module": "practice.utils_for_coding.scheduler_utils", "_v0": 0.15, "_v1": 0.3, "_t0": 0, "_t1": 48000, "_duration": 48000, "_v_diff": 0.15 } } }