| { |
| "LanguageModel": { |
| "input": { |
| "vocab_size": 51200, |
| "pad_vocab_size_multiple": 8 |
| } |
| }, |
| "MixerModel": { |
| "input": { |
| "d_model": 2048, |
| "n_layer": 24, |
| "lm_head_prenorm": "layer" |
| } |
| }, |
| "Block1": { |
| "n_layers": 24, |
| "BlockType": "modules.phi_block", |
| "block_input": { |
| "resid_dropout": 0.0 |
| }, |
| "CoreType": "modules.mixers.discrete_mamba2", |
| "core_input": { |
| "d_state": 64, |
| "n_v_heads": 32, |
| "n_qk_heads": 32, |
| "d_conv": 4, |
| "conv_bias": true, |
| "expand": 1, |
| "chunk_size": 128, |
| "activation": "identity", |
| "bias": false |
| } |
| } |
| } |
|
|