| algo: | |
| ddpg: | |
| params: | |
| target_update_tau: 0.01 | |
| policy: | |
| exploration: | |
| sigma: 0.3 | |
| theta: 0.15 | |
| deterministic_params: | |
| buffer_batch_size: 32 | |
| min_buffer_size: 10000 | |
| n_train_steps: 500 | |
| qf_lr: 0.0001 | |
| steps_per_epoch: 1 | |
| dqn: | |
| params: | |
| clip_gradient: 10 | |
| deterministic_eval: true | |
| double_q: false | |
| target_update_freq: 2 | |
| policy: | |
| exploration: | |
| decay_ratio: 0.5 | |
| max_epsilon: 1.0 | |
| min_epsilon: 0.05 | |
| general_params: | |
| discount: 0.99 | |
| package: garage | |
| policy: | |
| hidden_sizes: | |
| - 128 | |
| - 128 | |
| pretrained_policy: null | |
| ppo: | |
| params: | |
| center_adv: false | |
| num_train_per_epoch: 1 | |
| tanhnormal: false | |
| pretrain: | |
| additional_config: null | |
| algo_to_pretrain: null | |
| params: | |
| episodes_per_batch: 10 | |
| loss: log_prob | |
| policy_lr: 0.01 | |
| pretrain_algo: rbc | |
| replay_buffer: | |
| buffer_size: 200000 | |
| rnd: | |
| batch_size: 64 | |
| bound_reward_weight: cosine | |
| bound_reward_weight_initial_ratio: 0.999999 | |
| bound_reward_weight_transient_epochs: 10 | |
| hidden_sizes: | |
| - 64 | |
| - 64 | |
| intrinsic_reward_weight: 0.0001 | |
| n_train_steps: 32 | |
| output_dim: 128 | |
| predictor_lr: 0.001 | |
| standardize_extrinsic_reward: true | |
| standardize_intrinsic_reward: true | |
| sampler: | |
| n_workers: 16 | |
| type: ray | |
| train: | |
| batch_size: 50000 | |
| gpu_id: 0 | |
| n_epochs: 100 | |
| steps_per_epoch: 32 | |
| use_gpu: false | |
| type: ppo | |
| context: | |
| disable_logging: false | |
| experiment_name: null | |
| log_dir: | |
| from_keys: | |
| - microgrid.config.scenario | |
| - microgrid.methods.set_forecaster.forecaster | |
| - microgrid.methods.set_module_attrs.battery_transition_model | |
| - context.seed | |
| - env.domain_randomization.noise_std | |
| - algo.ppo.tanhnormal | |
| - algo.rnd.intrinsic_reward_weight | |
| parent: /home/ahalev/data/GridRL/paper_experiments | |
| use_existing_dir: false | |
| seed: 42 | |
| snapshot_gap: 10 | |
| verbose: 0 | |
| wandb: | |
| api_key_file: ../../local/wandb_api_key.txt | |
| group: null | |
| log_density: 1 | |
| plot_baseline: | |
| - mpc | |
| - rbc | |
| username: ahalev | |
| env: | |
| cls: DiscreteMicrogridEnv | |
| domain_randomization: | |
| noise_std: 0.01 | |
| relative_noise: true | |
| forced_genset: null | |
| net_load: | |
| slack_module: grid | |
| use: true | |
| observation_keys: | |
| - soc | |
| - net_load | |
| - import_price_current | |
| - import_price_forecast_0 | |
| - import_price_forecast_1 | |
| - import_price_forecast_2 | |
| - import_price_forecast_3 | |
| - import_price_forecast_4 | |
| - import_price_forecast_5 | |
| - import_price_forecast_6 | |
| - import_price_forecast_7 | |
| - import_price_forecast_8 | |
| - import_price_forecast_9 | |
| - import_price_forecast_10 | |
| - import_price_forecast_11 | |
| - import_price_forecast_12 | |
| - import_price_forecast_13 | |
| - import_price_forecast_14 | |
| - import_price_forecast_15 | |
| - import_price_forecast_16 | |
| - import_price_forecast_17 | |
| - import_price_forecast_18 | |
| - import_price_forecast_19 | |
| - import_price_forecast_20 | |
| - import_price_forecast_21 | |
| - import_price_forecast_22 | |
| microgrid: | |
| attributes: | |
| reward_shaping_func: !BaselineShaper | |
| baseline_module: false | |
| module: | |
| - grid | |
| - 0 | |
| config: | |
| scenario: 6 | |
| methods: | |
| set_forecaster: | |
| forecast_horizon: 23 | |
| forecaster: 0.0 | |
| forecaster_increase_uncertainty: true | |
| forecaster_relative_noise: true | |
| set_module_attrs: | |
| battery_transition_model: null | |
| normalized_action_bounds: | |
| - 0.0 | |
| - 1.0 | |
| trajectory: | |
| evaluate: | |
| final_step: -1 | |
| initial_step: 5840 | |
| trajectory_func: null | |
| train: | |
| final_step: 5840 | |
| initial_step: 0 | |
| trajectory_func: !FixedLengthStochasticTrajectory | |
| trajectory_length: 720 | |
| verbose: 1 | |