| action_optimizer: |
| _target_: mbrl.planning.CEMOptimizer |
| alpha: 0.1 |
| clipped_normal: false |
| device: cpu:0 |
| elite_ratio: 0.1 |
| lower_bound: ??? |
| num_iterations: 5 |
| population_size: 350 |
| return_mean_elites: true |
| upper_bound: ??? |
| algorithm: |
| agent: |
| _target_: mbrl.third_party.pytorch_sac_pranz24.sac.SAC |
| action_space: |
| _target_: gym.env.Box |
| high: |
| - 1.0 |
| low: |
| - -1.0 |
| shape: |
| - 1 |
| args: |
| alpha: 0.2 |
| automatic_entropy_tuning: true |
| device: cpu:0 |
| gamma: 0.99 |
| hidden_size: 256 |
| lr: 0.0003 |
| policy: Gaussian |
| target_entropy: -0.05 |
| target_update_interval: 4 |
| tau: 0.005 |
| num_inputs: 4 |
| freq_train_model: 200 |
| initial_exploration_steps: 5000 |
| learned_rewards: true |
| name: mbpo |
| normalize: true |
| normalize_double_precision: true |
| num_eval_episodes: 1 |
| random_initial_explore: false |
| real_data_ratio: 0.0 |
| sac_samples_action: true |
| target_is_delta: true |
| debug_mode: false |
| device: cpu:0 |
| dynamics_model: |
| _target_: mbrl.models.GaussianMLP |
| activation_fn_cfg: |
| _target_: torch.nn.SiLU |
| deterministic: false |
| device: cpu:0 |
| ensemble_size: 7 |
| hid_size: 200 |
| in_size: 5 |
| learn_logvar_bounds: false |
| num_layers: 4 |
| out_size: 5 |
| propagation_method: random_model |
| experiment: default |
| log_frequency_agent: 1000 |
| overrides: |
| cem_alpha: 0.1 |
| cem_clipped_normal: false |
| cem_elite_ratio: 0.1 |
| cem_num_iters: 5 |
| cem_population_size: 350 |
| effective_model_rollouts_per_step: 400 |
| env: cartpole_continuous |
| epoch_length: 200 |
| freq_train_model: 200 |
| model_batch_size: 256 |
| model_lr: 0.001 |
| model_wd: 5.0e-05 |
| num_elites: 5 |
| num_epochs_to_retain_sac_buffer: 1 |
| num_sac_updates_per_step: 20 |
| num_steps: 5000 |
| patience: 5 |
| planning_horizon: 15 |
| rollout_schedule: |
| - 1 |
| - 15 |
| - 1 |
| - 1 |
| sac_alpha: 0.2 |
| sac_automatic_entropy_tuning: true |
| sac_batch_size: 256 |
| sac_gamma: 0.99 |
| sac_hidden_size: 256 |
| sac_lr: 0.0003 |
| sac_policy: Gaussian |
| sac_target_entropy: -0.05 |
| sac_target_update_interval: 4 |
| sac_tau: 0.005 |
| sac_updates_every_steps: 1 |
| trial_length: 200 |
| validation_ratio: 0.2 |
| root_dir: ./logs |
| save_video: false |
| seed: 0 |
|
|