| { | |
| "dim": 4096, | |
| "n_layers": 32, | |
| "n_heads": 32, | |
| "vocab_size": 50432, | |
| "norm_eps": 1e-5, | |
| "seq_len": 2048, | |
| "weight_tying": false, | |
| "apply_qk_norm": false, | |
| "norm_type": "gain_only_layer_norm", | |
| "positional_embedding_type": "rotary", | |
| "ffn_type": "swiglu" | |
| } |
| { | |
| "dim": 4096, | |
| "n_layers": 32, | |
| "n_heads": 32, | |
| "vocab_size": 50432, | |
| "norm_eps": 1e-5, | |
| "seq_len": 2048, | |
| "weight_tying": false, | |
| "apply_qk_norm": false, | |
| "norm_type": "gain_only_layer_norm", | |
| "positional_embedding_type": "rotary", | |
| "ffn_type": "swiglu" | |
| } |