{ "n_layers": 3, "d_model": 768, "d_latent": 6144, "k": 48, "auxk": 192, "dead_steps_threshold": 2441, "auxk_coef": 0.03125, "alpha_spec": 0.01 }