File size: 274 Bytes
cb30831 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 |
{
"d_model": 2048,
"d_sae": 16384,
"initial_threshold": 0.001,
"bandwidth": 0.001,
"learning_rate": 0.0003,
"warmup_steps": 500,
"batch_size": 2048,
"target_l0": 50.0,
"sparsity_coef": 0.0001,
"num_steps": 5000,
"seq_length": 128,
"gemma_layer": 12
} |