arch: H_layers: 2 L_layers: 6 dep_rank: 64 dep_topk: 12 expansion: 4.0 forward_dtype: bfloat16 glps_dep_graph: true glps_enabled: true glps_fill_obvious: true glps_global_propagate_on_low_conf: true glps_max_targeted_iters: 4 glps_tau_halt: 0.92 glps_tau_uncertain: 0.8 glps_token_masking: true halt_exploration_prob: 0.1 halt_max_steps: 16 hidden_size: 512 loss: loss_type: stablemax_cross_entropy name: losses@ACTLossHead mlp_t: false name: recursive_reasoning.glps@GLPS_ACTV1 num_heads: 8 pos_encodings: rope puzzle_emb_ndim: 512 rms_norm_eps: 1.0e-05 rope_theta: 10000.0 share_levels: true shared_layers: 9 beta1: 0.9 beta2: 0.95 checkpoint_every_eval: true checkpoint_path: checkpoints/Arc2concept-aug-1000-ACT-torch/pretrain_att_arc2concept_4 data_paths: - data/arc2concept-aug-1000 data_paths_test: [] ema: true ema_rate: 0.999 epochs: 100000 eval_glps_max_targeted_iters: null eval_glps_tau_halt: null eval_halt_max_steps: null eval_interval: 10000 eval_only: false eval_save_outputs: [] evaluators: - name: arc@ARC freeze_weights: false global_batch_size: 768 load_checkpoint: null lr: 0.0001 lr_min_ratio: 0.1 lr_warmup_steps: 2000 min_eval_interval: 0 project_name: Arc2concept-aug-1000-ACT-torch puzzle_emb_lr: 0.01 puzzle_emb_weight_decay: 0.1 run_name: pretrain_att_arc2concept_4 seed: 0 weight_decay: 0.1