mohammadmahdinouri commited on
Commit
3560193
·
verified ·
1 Parent(s): c8a7252

Upload ModernALBERTForMaskedLM

Browse files
Files changed (2) hide show
  1. config.json +17 -15
  2. model.safetensors +2 -2
config.json CHANGED
@@ -1,31 +1,33 @@
1
  {
 
2
  "architectures": [
3
  "ModernALBERTForMaskedLM"
4
  ],
5
  "attention_probs_dropout_prob": 0.1,
6
- "concat_routing_inputs": true,
7
  "embedding_size": 128,
 
 
8
  "hidden_act": "gelu",
9
  "hidden_dropout_prob": 0.1,
10
  "hidden_size": 1024,
11
  "initializer_range": 0.02,
12
- "intermediate_size": 2048,
13
- "layer_norm_eps": 1e-12,
 
 
14
  "max_position_embeddings": 8192,
15
- "model_type": "ModernALBERT_MoL",
16
  "num_attention_heads": 16,
17
- "num_expert_layers": 6,
18
- "num_hidden_layers": 18,
19
- "num_static_groups": 2,
20
  "pad_token_id": 0,
21
- "router_aux_loss_coef": 0.01,
22
- "router_warmup_steps": 2000,
23
- "routing_noise_std": 1.0,
24
- "routing_noise_type": "gumbel",
25
- "static_group_depth": 3,
26
- "top_k_experts": 1,
27
  "torch_dtype": "float32",
28
- "transformers_version": "4.55.2",
29
- "use_gated_residual": true,
 
 
30
  "vocab_size": 50368
31
  }
 
1
  {
2
+ "adapter_reduction": 16,
3
  "architectures": [
4
  "ModernALBERTForMaskedLM"
5
  ],
6
  "attention_probs_dropout_prob": 0.1,
7
+ "classifier_dropout": null,
8
  "embedding_size": 128,
9
+ "expert_intermediate_size": 2624,
10
+ "group_depth": 4,
11
  "hidden_act": "gelu",
12
  "hidden_dropout_prob": 0.1,
13
  "hidden_size": 1024,
14
  "initializer_range": 0.02,
15
+ "intermediate_size": 2624,
16
+ "layer_norm_eps": 1e-06,
17
+ "load_balancing_loss_coef": 0.0,
18
+ "lora_rank": 16,
19
  "max_position_embeddings": 8192,
20
+ "model_type": "ModernALBERT",
21
  "num_attention_heads": 16,
22
+ "num_experts": 1,
23
+ "num_hidden_layers": 16,
 
24
  "pad_token_id": 0,
25
+ "router_jitter_noise": 0.01,
26
+ "top_k": 1,
 
 
 
 
27
  "torch_dtype": "float32",
28
+ "transformers_version": "4.55.4",
29
+ "use_adapter": false,
30
+ "use_cache": true,
31
+ "use_moa": true,
32
  "vocab_size": 50368
33
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1812cf0c1b7951357f2edf6a19d991163fc9fa711c1aaf72846f7cfa21002698
3
- size 278716128
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0e2861841a5d6d5aa9160ebdf65eac049a17608fbef39680f3f9fe9d83c99e56
3
+ size 293240464