| { | |
| "d_model": 8, | |
| "d_head": 8, | |
| "n_layers": 2, | |
| "n_ctx": 32, | |
| "n_heads": 1, | |
| "d_vocab": 10, | |
| "normalization_type": "LN", | |
| "attn_only": true, | |
| "positional_embedding_type": "shortformer", | |
| "seed": 0 | |
| } |
| { | |
| "d_model": 8, | |
| "d_head": 8, | |
| "n_layers": 2, | |
| "n_ctx": 32, | |
| "n_heads": 1, | |
| "d_vocab": 10, | |
| "normalization_type": "LN", | |
| "attn_only": true, | |
| "positional_embedding_type": "shortformer", | |
| "seed": 0 | |
| } |