| { | |
| "variant": "gptoss_phase1", | |
| "model_type": "adamba-moe", | |
| "architecture": "HybridMoEGPT (Attention + MoE + Mamba)", | |
| "base_model": "gpt-oss-20b", | |
| "parameters": "21.9B", | |
| "n_embd": 2880, | |
| "features": [ | |
| "mamba_integration", | |
| "moe_32experts" | |
| ], | |
| "n_layers": 36, | |
| "vocab_size": 201088, | |
| "num_experts": 32, | |
| "experts_per_token": 4 | |
| } |