mohammadmahdinouri
/

mol-final

ModernALBERT_MoL

Model card Files Files and versions

mol-final / config.json

mohammadmahdinouri's picture

mohammadmahdinouri

Upload ModernALBERTForMaskedLM

fe723ac verified 8 months ago

history blame contribute delete

867 Bytes

	{
	"adapter_reduction_factor": 16,
	"architectures": [
	"ModernALBERTForMaskedLM"
	],
	"attention_probs_dropout_prob": 0.1,
	"concat_routing_inputs": true,
	"embedding_size": 128,
	"hidden_act": "gelu",
	"hidden_dropout_prob": 0.1,
	"hidden_size": 1024,
	"initializer_range": 0.02,
	"intermediate_size": 2048,
	"layer_norm_eps": 1e-12,
	"max_position_embeddings": 8192,
	"model_type": "ModernALBERT_MoL",
	"num_attention_heads": 16,
	"num_expert_layers": 6,
	"num_hidden_layers": 18,
	"num_static_groups": 2,
	"pad_token_id": 0,
	"router_aux_loss_coef": 0.01,
	"router_warmup_steps": 7000,
	"routing_noise_std": 1.0,
	"routing_noise_type": "gumbel",
	"static_group_depth": 3,
	"top_k_experts": 1,
	"torch_dtype": "float32",
	"transformers_version": "4.55.2",
	"use_adapters": true,
	"use_gated_residual": true,
	"vocab_size": 50368
	}