| { | |
| "n_layers": 6, | |
| "d_model": 128, | |
| "d_mlp": 512, | |
| "d_head": 64, | |
| "n_heads": 8, | |
| "n_ctx": 59, | |
| "d_vocab": 61, | |
| "act_fn": "gelu", | |
| "attn_only": false, | |
| "normalization_type": "LNPre" | |
| } |
| { | |
| "n_layers": 6, | |
| "d_model": 128, | |
| "d_mlp": 512, | |
| "d_head": 64, | |
| "n_heads": 8, | |
| "n_ctx": 59, | |
| "d_vocab": 61, | |
| "act_fn": "gelu", | |
| "attn_only": false, | |
| "normalization_type": "LNPre" | |
| } |