| { |
| "hidden_size": 768, |
| "num_hidden_layers": 18, |
| "intermediate_size": 2416, |
| "vocab_size": 32000, |
| "num_attention_heads": 12, |
| "num_key_value_heads": 4, |
| "attention_type": "gqa", |
| "attention_dropout": 0.0, |
| "use_qk_norm": true, |
| "sliding_window": null, |
| "max_position_embeddings": 2048, |
| "rope_theta": 10000.0, |
| "rope_type": "standard", |
| "mlp_type": "swiglu", |
| "hidden_act": "silu", |
| "num_experts": 1, |
| "token_frequencies": null, |
| "use_mu_guidance": false, |
| "use_mu_projection": false, |
| "disable_mu_guidance": false, |
| "norm_type": "rmsnorm", |
| "norm_eps": 1e-06, |
| "tie_word_embeddings": true, |
| "use_sdpa": true, |
| "use_cache": true, |
| "initializer_range": 0.02, |
| "extra_config": {} |
| } |