gpt2_201718 / config.json
talphaidze's picture
Initial commit of GPT2 201718
0590cc4 verified
raw
history blame contribute delete
717 Bytes
{
"architectures": [
"MoEGPTForCausalLM"
],
"auto_map": {
"AutoConfig": "configuration.MoEGPTConfig",
"AutoModelForCausalLM": "modeling.MoEGPTForCausalLM",
"AutoTokenizer": "GPT2TokenizerFast"
},
"bias": false,
"dropout": 0.0,
"mlp_dim_exp_factor": 1.0,
"model_type": "moegpt",
"moe": false,
"moe_aux_loss_factor": 0.01,
"moe_num_experts": 1,
"moe_num_experts_per_tok": 2,
"moe_router_loss": "load_balancing_z_loss",
"moe_routing": "None",
"moe_softmax_order": "softmax_topk",
"moe_z_loss_factor": 1.0,
"n_embd": 1152,
"n_head": 16,
"n_layer": 24,
"sequence_length": 1024,
"torch_dtype": "float32",
"transformers_version": "4.51.1",
"vocab_size": 50304
}