| arch: | |
| H_layers: 2 | |
| L_layers: 6 | |
| dep_rank: 64 | |
| dep_topk: 12 | |
| expansion: 4.0 | |
| forward_dtype: bfloat16 | |
| glps_dep_graph: true | |
| glps_enabled: true | |
| glps_fill_obvious: true | |
| glps_global_propagate_on_low_conf: true | |
| glps_max_targeted_iters: 4 | |
| glps_tau_halt: 0.92 | |
| glps_tau_uncertain: 0.8 | |
| glps_token_masking: true | |
| halt_exploration_prob: 0.1 | |
| halt_max_steps: 16 | |
| hidden_size: 512 | |
| loss: | |
| loss_type: stablemax_cross_entropy | |
| name: losses@ACTLossHead | |
| mlp_t: false | |
| name: recursive_reasoning.glps@GLPS_ACTV1 | |
| num_heads: 8 | |
| pos_encodings: rope | |
| puzzle_emb_ndim: 512 | |
| rms_norm_eps: 1.0e-05 | |
| rope_theta: 10000.0 | |
| share_levels: true | |
| shared_layers: 9 | |
| beta1: 0.9 | |
| beta2: 0.95 | |
| checkpoint_every_eval: true | |
| checkpoint_path: checkpoints/Arc2concept-aug-1000-ACT-torch/pretrain_att_arc2concept_4 | |
| data_paths: | |
| - data/arc2concept-aug-1000 | |
| data_paths_test: [] | |
| ema: true | |
| ema_rate: 0.999 | |
| epochs: 100000 | |
| eval_glps_max_targeted_iters: null | |
| eval_glps_tau_halt: null | |
| eval_halt_max_steps: null | |
| eval_interval: 10000 | |
| eval_only: false | |
| eval_save_outputs: [] | |
| evaluators: | |
| - name: arc@ARC | |
| freeze_weights: false | |
| global_batch_size: 768 | |
| load_checkpoint: null | |
| lr: 0.0001 | |
| lr_min_ratio: 0.1 | |
| lr_warmup_steps: 2000 | |
| min_eval_interval: 0 | |
| project_name: Arc2concept-aug-1000-ACT-torch | |
| puzzle_emb_lr: 0.01 | |
| puzzle_emb_weight_decay: 0.1 | |
| run_name: pretrain_att_arc2concept_4 | |
| seed: 0 | |
| weight_decay: 0.1 | |