File size: 485 Bytes
4cd2f3f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
arch:
  H_cycles: 2
  L_cycles: 6
  bptt: true
  forward_dtype: bfloat16
  head_dim: 64
  hidden_size: 512
  intermediate_size: 2048
  name: hrm@HRM
  norm_eps: 1.0e-06
  num_layers: 2
  rope_theta: 10000.0
beta1: 0.9
beta2: 0.95
cycles_per_data: 16
data:
  augment: true
  dataset_name: /sg-pretrain/datasets/sudoku-extreme-1k
  name: sudoku
  repeat: 200
ema: 0.999
epochs: 20
local_batch_size: 96
log_interval: 5
lr: 0.0001
lr_min_ratio: 1.0
lr_warmup_steps: 2000
weight_decay: 1.0