| { | |
| "n_layers": 6, | |
| "d_model": 128, | |
| "d_head": 64, | |
| "n_heads": 8, | |
| "d_mlp": 512, | |
| "d_vocab": 61, | |
| "n_ctx": 59, | |
| "act_fn": "gelu", | |
| "normalization_type": "LN", | |
| "att_only": False, | |
| "architecture": "mingpt" | |
| } |
| { | |
| "n_layers": 6, | |
| "d_model": 128, | |
| "d_head": 64, | |
| "n_heads": 8, | |
| "d_mlp": 512, | |
| "d_vocab": 61, | |
| "n_ctx": 59, | |
| "act_fn": "gelu", | |
| "normalization_type": "LN", | |
| "att_only": False, | |
| "architecture": "mingpt" | |
| } |