mohammadmahdinouri
/

mol-trained

ModernALBERT_MoL

Model card Files Files and versions

mol-trained / config.json

mohammadmahdinouri's picture

mohammadmahdinouri

Upload ModernALBERTForMaskedLM

235846e verified 8 months ago

history blame contribute delete

602 Bytes

	{
	"architectures": [
	"ModernALBERTForMaskedLM"
	],
	"attention_probs_dropout_prob": 0.1,
	"embedding_size": 128,
	"hidden_act": "gelu",
	"hidden_dropout_prob": 0.1,
	"hidden_size": 1024,
	"initializer_range": 0.02,
	"intermediate_size": 2048,
	"layer_norm_eps": 1e-12,
	"max_position_embeddings": 8192,
	"model_type": "ModernALBERT_MoL",
	"num_attention_heads": 16,
	"num_expert_layers": 6,
	"num_hidden_layers": 18,
	"pad_token_id": 0,
	"router_aux_loss_coef": 0.001,
	"top_k_experts": 1,
	"torch_dtype": "bfloat16",
	"transformers_version": "4.55.0",
	"vocab_size": 50368
	}