{ "_name_or_path": "BeardedMonster/MOE", "architectures": [ "GPTJXMoEForCausalLM" ], "auto_map": { "AutoConfig": "configuration.GPTJXMoEConfig", "AutoModelForCausalLM": "modeling.GPTJXMoEForCausalLM" }, "bias": false, "block_size": 32768, "dropout": 0.0, "kv_cache_dtype": "float32", "max_batch_size": 1, "model_type": "sabiyarn", "moe_dim": 3072, "n_embd": 768, "n_heads": 12, "n_layer": 12, "num_experts": 7, "num_experts_per_tok": 2, "torch_dtype": "float32", "transformers_version": "4.41.2", "use_kv_cache": true, "use_moe": true, "vocab_size": 52050 }