| algo: |
| ddpg: |
| params: |
| target_update_tau: 0.01 |
| policy: |
| exploration: |
| sigma: 0.3 |
| theta: 0.15 |
| deterministic_params: |
| buffer_batch_size: 32 |
| min_buffer_size: 10000 |
| n_train_steps: 500 |
| qf_lr: 0.0001 |
| steps_per_epoch: 1 |
| dqn: |
| params: |
| clip_gradient: 10 |
| deterministic_eval: true |
| double_q: false |
| target_update_freq: 2 |
| policy: |
| exploration: |
| decay_ratio: 0.5 |
| max_epsilon: 1.0 |
| min_epsilon: 0.05 |
| general_params: |
| discount: 0.99 |
| package: garage |
| policy: |
| hidden_sizes: |
| - 128 |
| - 128 |
| pretrained_policy: null |
| ppo: |
| params: |
| center_adv: false |
| tanhnormal: false |
| pretrain: |
| additional_config: null |
| algo_to_pretrain: null |
| params: |
| episodes_per_batch: 10 |
| loss: log_prob |
| policy_lr: 0.01 |
| pretrain_algo: rbc |
| replay_buffer: |
| buffer_size: 200000 |
| rnd: |
| batch_size: 64 |
| bound_reward_weight: cosine |
| bound_reward_weight_initial_ratio: 0.999999 |
| bound_reward_weight_transient_epochs: 10 |
| hidden_sizes: |
| - 64 |
| - 64 |
| intrinsic_reward_weight: 0.0001 |
| n_train_steps: 32 |
| output_dim: 128 |
| predictor_lr: 0.001 |
| standardize_extrinsic_reward: true |
| standardize_intrinsic_reward: true |
| sampler: |
| n_workers: 16 |
| type: ray |
| train: |
| batch_size: 50000 |
| n_epochs: 100 |
| steps_per_epoch: 32 |
| type: ppo |
| context: |
| disable_logging: false |
| experiment_name: null |
| log_dir: |
| from_keys: |
| - microgrid.config.scenario |
| - microgrid.methods.set_forecaster.forecaster |
| - microgrid.methods.set_module_attrs.battery_transition_model |
| - context.seed |
| - env.domain_randomization.noise_std |
| - algo.ppo.tanhnormal |
| - algo.rnd.intrinsic_reward_weight |
| parent: /home/ahalev/data/GridRL/paper_experiments |
| use_existing_dir: false |
| seed: 42 |
| snapshot_gap: 10 |
| verbose: 0 |
| wandb: |
| api_key_file: ../../local/wandb_api_key.txt |
| group: null |
| log_density: 1 |
| plot_baseline: |
| - mpc |
| - rbc |
| username: ahalev |
| env: |
| cls: DiscreteMicrogridEnv |
| domain_randomization: |
| noise_std: 0.01 |
| relative_noise: true |
| forced_genset: null |
| net_load: |
| slack_module: genset |
| use: true |
| observation_keys: |
| - soc |
| - net_load |
| - import_price_current |
| - import_price_forecast_0 |
| - import_price_forecast_1 |
| - import_price_forecast_2 |
| - import_price_forecast_3 |
| - import_price_forecast_4 |
| - import_price_forecast_5 |
| - import_price_forecast_6 |
| - import_price_forecast_7 |
| - import_price_forecast_8 |
| - import_price_forecast_9 |
| - import_price_forecast_10 |
| - import_price_forecast_11 |
| - import_price_forecast_12 |
| - import_price_forecast_13 |
| - import_price_forecast_14 |
| - import_price_forecast_15 |
| - import_price_forecast_16 |
| - import_price_forecast_17 |
| - import_price_forecast_18 |
| - import_price_forecast_19 |
| - import_price_forecast_20 |
| - import_price_forecast_21 |
| - import_price_forecast_22 |
| microgrid: |
| attributes: |
| reward_shaping_func: !BaselineShaper |
| baseline_module: false |
| module: |
| - genset |
| - 0 |
| config: |
| scenario: 3 |
| methods: |
| set_forecaster: |
| forecast_horizon: 23 |
| forecaster: 0.0 |
| forecaster_increase_uncertainty: true |
| forecaster_relative_noise: true |
| set_module_attrs: |
| battery_transition_model: null |
| normalized_action_bounds: |
| - 0.0 |
| - 1.0 |
| trajectory: |
| evaluate: |
| final_step: -1 |
| initial_step: 5840 |
| trajectory_func: null |
| train: |
| final_step: 5840 |
| initial_step: 0 |
| trajectory_func: !FixedLengthStochasticTrajectory |
| trajectory_length: 720 |
| verbose: 1 |
|
|