File size: 274 Bytes
cb30831
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
{
  "d_model": 2048,
  "d_sae": 16384,
  "initial_threshold": 0.001,
  "bandwidth": 0.001,
  "learning_rate": 0.0003,
  "warmup_steps": 500,
  "batch_size": 2048,
  "target_l0": 50.0,
  "sparsity_coef": 0.0001,
  "num_steps": 5000,
  "seq_length": 128,
  "gemma_layer": 12
}