File size: 1,382 Bytes
63dc939 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 |
arch:
H_layers: 2
L_layers: 6
dep_rank: 64
dep_topk: 12
expansion: 4.0
forward_dtype: bfloat16
glps_dep_graph: true
glps_enabled: true
glps_fill_obvious: true
glps_global_propagate_on_low_conf: true
glps_max_targeted_iters: 4
glps_tau_halt: 0.92
glps_tau_uncertain: 0.8
glps_token_masking: true
halt_exploration_prob: 0.1
halt_max_steps: 16
hidden_size: 512
loss:
loss_type: stablemax_cross_entropy
name: losses@ACTLossHead
mlp_t: false
name: recursive_reasoning.glps@GLPS_ACTV1
num_heads: 8
pos_encodings: rope
puzzle_emb_ndim: 512
rms_norm_eps: 1.0e-05
rope_theta: 10000.0
share_levels: true
shared_layers: 9
beta1: 0.9
beta2: 0.95
checkpoint_every_eval: true
checkpoint_path: checkpoints/Arc2concept-aug-1000-ACT-torch/pretrain_att_arc2concept_4
data_paths:
- data/arc2concept-aug-1000
data_paths_test: []
ema: true
ema_rate: 0.999
epochs: 100000
eval_glps_max_targeted_iters: null
eval_glps_tau_halt: null
eval_halt_max_steps: null
eval_interval: 10000
eval_only: false
eval_save_outputs: []
evaluators:
- name: arc@ARC
freeze_weights: false
global_batch_size: 768
load_checkpoint: null
lr: 0.0001
lr_min_ratio: 0.1
lr_warmup_steps: 2000
min_eval_interval: 0
project_name: Arc2concept-aug-1000-ACT-torch
puzzle_emb_lr: 0.01
puzzle_emb_weight_decay: 0.1
run_name: pretrain_att_arc2concept_4
seed: 0
weight_decay: 0.1
|