{ "d_model": 2048, "d_sae": 16384, "initial_threshold": 0.001, "bandwidth": 0.001, "learning_rate": 0.0003, "warmup_steps": 500, "batch_size": 2048, "target_l0": 50.0, "sparsity_coef": 0.0001, "num_steps": 5000, "seq_length": 128, "gemma_layer": 12 }