File size: 485 Bytes
4cd2f3f | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 | arch:
H_cycles: 2
L_cycles: 6
bptt: true
forward_dtype: bfloat16
head_dim: 64
hidden_size: 512
intermediate_size: 2048
name: hrm@HRM
norm_eps: 1.0e-06
num_layers: 2
rope_theta: 10000.0
beta1: 0.9
beta2: 0.95
cycles_per_data: 16
data:
augment: true
dataset_name: /sg-pretrain/datasets/sudoku-extreme-1k
name: sudoku
repeat: 200
ema: 0.999
epochs: 20
local_batch_size: 96
log_interval: 5
lr: 0.0001
lr_min_ratio: 1.0
lr_warmup_steps: 2000
weight_decay: 1.0
|