| parameters: | |
| project_name: jaxgmg2_3phase_fast_tight | |
| action: rl | |
| rl_action: train | |
| lr: 5e-5 | |
| cheese_loc: any | |
| env_layout: open | |
| mask_type: first_episode | |
| use_prev_action: false | |
| num_total_env_steps: 10_000_000_000 | |
| num_levels: 9600 | |
| grad_acc_per_chunk: 4 | |
| num_rollout_steps: 64 | |
| ckpt_dir: jaxgmg2_3phase_fast_tight | |
| eval_schedule: "0:1,250:2,500:5,2000:10" | |
| f_str_ckpt: "al_{alpha}_g_{discount_rate}_seed_{seed}_pa_1" | |
| use_wandb: true | |
| use_hf: true | |
| wandb_project: jaxgmg2_3phase_fast_tight | |
| ntfy: david_jaxgmg | |
| sweep: | |
| - - alpha: 0.0 | |
| discount_rate: 0.97 | |
| seed: 100 | |
| - alpha: 0.0 | |
| discount_rate: 0.99 | |
| seed: 100 | |
| - alpha: 0.45 | |
| discount_rate: 0.97 | |
| seed: 103 | |
| - alpha: 0.45 | |
| discount_rate: 0.98 | |
| seed: 100 | |
| - alpha: 0.4 | |
| discount_rate: 0.99 | |
| seed: 104 | |