| { | |
| "n_layers": 3, | |
| "d_model": 768, | |
| "d_latent": 6144, | |
| "k": 48, | |
| "auxk": 192, | |
| "dead_steps_threshold": 2441, | |
| "auxk_coef": 0.03125, | |
| "alpha_spec": 0.01 | |
| } |
| { | |
| "n_layers": 3, | |
| "d_model": 768, | |
| "d_latent": 6144, | |
| "k": 48, | |
| "auxk": 192, | |
| "dead_steps_threshold": 2441, | |
| "auxk_coef": 0.03125, | |
| "alpha_spec": 0.01 | |
| } |