import numpy as np from mrunner.helpers.specification_helper import create_experiments_helper from mrunner_exps.utils import combine_config_with_defaults name = globals()["script"][:-3] # params for all exps config = { "exp_tag": "reinforce_goal_c3", "run_kind": "reinforce", "log_to_wandb": True, "pretrain_steps": 1000, "steps": 2000, "log_every": 1, "num_eval_eps": 10, "verbose": False, "lr": 0.001, "c": 1.0, "start_x": 0.0, "goal_x": 50.0, "bias_in_state": True, "position_in_state": False, "time_limit": 100, "gamma": 0.99, "wandbcommit": 1000, "pretrain": "phase2", "finetune": "full", "update_every": 10, # for good definition on gradient } config = combine_config_with_defaults(config) # params different between exps params_grid = [ { "seed": list(range(10)), "c": list(np.arange(0.1, 1.1, 0.1)), "goal_x": list(np.arange(5, 50, 5)), } ] experiments_list = create_experiments_helper( experiment_name=name, project_name="apple", with_neptune=False, script="python3 mrunner_run.py", python_path=".", tags=[name], exclude=["logs", "wandb"], base_config=config, params_grid=params_grid, )