parameters: project_name: jaxgmg2_3phase_fast_tight action: rl rl_action: train lr: 5e-5 cheese_loc: any env_layout: open mask_type: first_episode use_prev_action: false num_total_env_steps: 10_000_000_000 num_levels: 9600 grad_acc_per_chunk: 4 num_rollout_steps: 64 ckpt_dir: jaxgmg2_3phase_fast_tight eval_schedule: "0:1,250:2,500:5,2000:10" f_str_ckpt: "al_{alpha}_g_{discount_rate}_seed_{seed}_pa_1" use_wandb: true use_hf: true wandb_project: jaxgmg2_3phase_fast_tight ntfy: david_jaxgmg sweep: - - alpha: 0.0 discount_rate: 0.97 seed: 100 - alpha: 0.0 discount_rate: 0.99 seed: 100 - alpha: 0.45 discount_rate: 0.97 seed: 103 - alpha: 0.45 discount_rate: 0.98 seed: 100 - alpha: 0.4 discount_rate: 0.99 seed: 104