| { | |
| "d_model": 2048, | |
| "d_sae": 16384, | |
| "initial_threshold": 0.001, | |
| "bandwidth": 0.001, | |
| "learning_rate": 0.0003, | |
| "warmup_steps": 500, | |
| "batch_size": 2048, | |
| "target_l0": 50.0, | |
| "sparsity_coef": 0.0001, | |
| "num_steps": 5000, | |
| "seq_length": 128, | |
| "gemma_layer": 12 | |
| } |