ainewtrend07 commited on
Commit
471b01c
·
verified ·
1 Parent(s): 1a98128

Upload ModernBertForSequenceClassification

Browse files
Files changed (2) hide show
  1. config.json +38 -18
  2. model.safetensors +2 -2
config.json CHANGED
@@ -1,14 +1,25 @@
1
  {
2
- "activation": "gelu",
3
  "architectures": [
4
- "DistilBertForSequenceClassification"
5
  ],
6
- "attention_dropout": 0.1,
7
- "dim": 768,
8
- "dropout": 0.1,
 
 
 
 
 
 
 
9
  "dtype": "float32",
10
- "finetuning_task": "sst-2",
11
- "hidden_dim": 3072,
 
 
 
 
 
12
  "id2label": {
13
  "0": "LABEL_0",
14
  "1": "LABEL_1",
@@ -25,7 +36,9 @@
25
  "12": "LABEL_12",
26
  "13": "LABEL_13"
27
  },
 
28
  "initializer_range": 0.02,
 
29
  "label2id": {
30
  "LABEL_0": 0,
31
  "LABEL_1": 1,
@@ -42,16 +55,23 @@
42
  "LABEL_8": 8,
43
  "LABEL_9": 9
44
  },
45
- "max_position_embeddings": 512,
46
- "model_type": "distilbert",
47
- "n_heads": 12,
48
- "n_layers": 6,
49
- "output_past": true,
50
- "pad_token_id": 0,
51
- "qa_dropout": 0.1,
52
- "seq_classif_dropout": 0.2,
53
- "sinusoidal_pos_embds": false,
54
- "tie_weights_": true,
 
 
 
 
 
 
 
55
  "transformers_version": "4.56.1",
56
- "vocab_size": 30522
57
  }
 
1
  {
 
2
  "architectures": [
3
+ "ModernBertForSequenceClassification"
4
  ],
5
+ "attention_bias": false,
6
+ "attention_dropout": 0.0,
7
+ "bos_token_id": 50281,
8
+ "classifier_activation": "gelu",
9
+ "classifier_bias": false,
10
+ "classifier_dropout": 0.0,
11
+ "classifier_pooling": "mean",
12
+ "cls_token_id": 50281,
13
+ "decoder_bias": true,
14
+ "deterministic_flash_attn": false,
15
  "dtype": "float32",
16
+ "embedding_dropout": 0.0,
17
+ "eos_token_id": 50282,
18
+ "global_attn_every_n_layers": 3,
19
+ "global_rope_theta": 160000.0,
20
+ "gradient_checkpointing": false,
21
+ "hidden_activation": "gelu",
22
+ "hidden_size": 1024,
23
  "id2label": {
24
  "0": "LABEL_0",
25
  "1": "LABEL_1",
 
36
  "12": "LABEL_12",
37
  "13": "LABEL_13"
38
  },
39
+ "initializer_cutoff_factor": 2.0,
40
  "initializer_range": 0.02,
41
+ "intermediate_size": 2624,
42
  "label2id": {
43
  "LABEL_0": 0,
44
  "LABEL_1": 1,
 
55
  "LABEL_8": 8,
56
  "LABEL_9": 9
57
  },
58
+ "layer_norm_eps": 1e-05,
59
+ "local_attention": 128,
60
+ "local_rope_theta": 10000.0,
61
+ "max_position_embeddings": 8192,
62
+ "mlp_bias": false,
63
+ "mlp_dropout": 0.0,
64
+ "model_type": "modernbert",
65
+ "norm_bias": false,
66
+ "norm_eps": 1e-05,
67
+ "num_attention_heads": 16,
68
+ "num_hidden_layers": 28,
69
+ "pad_token_id": 50283,
70
+ "position_embedding_type": "absolute",
71
+ "repad_logits_with_grad": false,
72
+ "sep_token_id": 50282,
73
+ "sparse_pred_ignore_index": -100,
74
+ "sparse_prediction": false,
75
  "transformers_version": "4.56.1",
76
+ "vocab_size": 50368
77
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d6d23e6beaca44e57cc4cdd14c9259eaa91ad62a392d65213dd941396f881767
3
- size 267869480
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:171183e718f70b99486a2f4ab5a2df10698296792aa8780e17d0d08b5bcec4d3
3
+ size 1583400840