{
    "env_config": {
        "env_id": "Reacher-v5",
        "env_kwargs": {},
        "max_steps": null,
        "normalize_obs": false,
        "use_image": false,
        "vector_env_num": 6,
        "use_multi_processing": true,
        "image_shape": null,
        "frame_stack": 1,
        "frame_skip": 1,
        "training_render_mode": null
    },
    "device": "cpu",
    "learning_rate": 0.0003,
    "gamma": 0.995,
    "checkpoint_pathname": "",
    "max_grad_norm": 0.5,
    "log_interval": 100,
    "eval_episodes": 50,
    "eval_random_seed": 42,
    "eval_video_num": 10,
    "total_steps": 60000,
    "hidden_sizes": [
        64,
        64
    ],
    "use_layer_norm": false,
    "critic_lr": 0.0003,
    "replay_buffer_capacity": 12000,
    "batch_size": 256,
    "update_start_step": 5000,
    "alpha": 0.2,
    "auto_tune_alpha": true,
    "alpha_lr": 0.0003,
    "target_entropy": -2.0,
    "tau": 0.005,
    "max_action": 1.0,
    "log_std_min": -7.0,
    "log_std_max": 2.0,
    "sac_update_interval": 1,
    "model_update_interval": 250,
    "update_num_per_epoch": 1,
    "model_based_config": {
        "num_models": 3,
        "model_hidden_sizes": [
            256,
            256
        ],
        "done_threshold": 0.5,
        "log_std_bounds": [
            -5.0,
            2.0
        ],
        "eps": 1e-06,
        "train": {
            "epochs": 20,
            "batch_size": 256,
            "lr": 0.001,
            "weight_decay": 1e-06,
            "loss_weight_delta": 1.0,
            "loss_weight_reward": 1.0,
            "loss_weight_done": 1.0,
            "bootstrap": true,
            "buffer_ratio_for_val": 0.1,
            "early_stop_patience": 6,
            "dataloader_num_workers": 0,
            "dataloader_pin_memory": false
        }
    },
    "model_rollout_config": {
        "rollout_num": 10,
        "rollout_len": {
            "_type": "LinearSchedule",
            "_module": "practice.utils_for_coding.scheduler_utils",
            "_v0": 1,
            "_v1": 4,
            "_t0": 0,
            "_t1": 48000,
            "_duration": 48000,
            "_v_diff": 3
        },
        "replay_buffer_capacity": 24000,
        "batch_rate_of_sample": {
            "_type": "LinearSchedule",
            "_module": "practice.utils_for_coding.scheduler_utils",
            "_v0": 0.15,
            "_v1": 0.3,
            "_t0": 0,
            "_t1": 48000,
            "_duration": 48000,
            "_v_diff": 0.15
        }
    }
}