{ "architectures": [ "NRM" ], "model_type": "nrm", "dim": 2048, "n_layers": 1, "n_heads": 16, "n_mem_tokens": 64, "vocab_size": 32000, "inner_loops": 8, "outer_loops": 16, "truncation_loops": 2, "moe_experts": 8, "experts_per_token": 2, "num_shared_experts": 2, "use_mla": true, "kv_latent_dim": 512, "rope_head_dim": 64, "rope_base": 10000.0, "mtp_num_heads": 4, "use_conv_swiglu": true, "p_exit": 0.1, "tokenizer_class": "LlamaTokenizer" }