{ "architectures": [ "TinyMixtralForCausalLM" ], "auto_map": { "AutoConfig": "modeling_tiny_mixtral.TinyMixtralConfig", "AutoModelForCausalLM": "modeling_tiny_mixtral.TinyMixtralForCausalLM" }, "attn_dropout": 0.0, "attn_eps": 1e-06, "d_head": 64, "d_model": 768, "device": "cpu", "dropout": 0.0, "ffn_eps": 1e-06, "max_seq_len": 1024, "model_type": "tiny_mixtral_5l_active", "n_experts": 8, "n_heads": 12, "n_layers": 5, "norm_eps": 1e-06, "top_k": null, "top_k_experts": 2, "torch_dtype": "float32", "transformers_version": "4.47.1", "vocab_size": 50257 }