Shuu12121 commited on
Commit
8d1b033
·
verified ·
1 Parent(s): 022994d

Upload ModernBERT model

Browse files
config.json ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "ModernBertForMaskedLM"
4
+ ],
5
+ "attention_bias": false,
6
+ "attention_dropout": 0.0,
7
+ "attention_probs_dropout_prob": 0.1,
8
+ "bos_token_id": null,
9
+ "classifier_activation": "gelu",
10
+ "classifier_bias": false,
11
+ "classifier_dropout": 0.0,
12
+ "classifier_pooling": "cls",
13
+ "cls_token_id": 50281,
14
+ "decoder_bias": true,
15
+ "deterministic_flash_attn": false,
16
+ "embedding_dropout": 0.0,
17
+ "eos_token_id": null,
18
+ "global_attn_every_n_layers": 3,
19
+ "global_rope_theta": 160000.0,
20
+ "hidden_activation": "gelu",
21
+ "hidden_dropout_prob": 0.1,
22
+ "hidden_size": 1024,
23
+ "initializer_cutoff_factor": 2.0,
24
+ "initializer_range": 0.02,
25
+ "intermediate_size": 1536,
26
+ "local_attention": 128,
27
+ "local_attention_rope_theta": 10000,
28
+ "local_attention_window": 128,
29
+ "local_rope_theta": 10000.0,
30
+ "max_position_embeddings": 8192,
31
+ "mlp_bias": false,
32
+ "mlp_dropout": 0.0,
33
+ "model_type": "modernbert",
34
+ "norm_bias": false,
35
+ "norm_eps": 1e-05,
36
+ "num_attention_heads": 16,
37
+ "num_hidden_layers": 28,
38
+ "pad_token_id": 1,
39
+ "repad_logits_with_grad": false,
40
+ "sep_token_id": 50282,
41
+ "sparse_pred_ignore_index": -100,
42
+ "sparse_prediction": false,
43
+ "torch_dtype": "float32",
44
+ "transformers_version": "4.55.3",
45
+ "type_vocab_size": 2,
46
+ "vocab_size": 50368
47
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f5095d7c534ec40a7902fbe36498a1b121860bd11ca61ccff391f30f7fd581f8
3
+ size 1209203136
special_tokens_map.json ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "[TYPE]",
4
+ "[TYPE_START]",
5
+ "[TYPE_END]",
6
+ "[TYPE_PAD]",
7
+ "[VAR]",
8
+ "[VAR_START]",
9
+ "[VAR_END]",
10
+ "[NAME_START]",
11
+ "[NAME_END]",
12
+ "[VAR_PAD]",
13
+ "[NAME_PAD]",
14
+ "[COMMIT_MSG]",
15
+ "[COMMIT_MSG_START]",
16
+ "[COMMIT_MSG_END]",
17
+ "[COMMIT_MSG_PAD]",
18
+ "[PATCH]",
19
+ "[PATCH_START]",
20
+ "[PATCH_END]",
21
+ "[PATCH_PAD]"
22
+ ],
23
+ "cls_token": {
24
+ "content": "[CLS]",
25
+ "lstrip": false,
26
+ "normalized": false,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ },
30
+ "mask_token": {
31
+ "content": "[MASK]",
32
+ "lstrip": false,
33
+ "normalized": false,
34
+ "rstrip": false,
35
+ "single_word": false
36
+ },
37
+ "pad_token": {
38
+ "content": "[PAD]",
39
+ "lstrip": false,
40
+ "normalized": false,
41
+ "rstrip": false,
42
+ "single_word": false
43
+ },
44
+ "sep_token": {
45
+ "content": "[SEP]",
46
+ "lstrip": false,
47
+ "normalized": false,
48
+ "rstrip": false,
49
+ "single_word": false
50
+ },
51
+ "unk_token": {
52
+ "content": "[UNK]",
53
+ "lstrip": false,
54
+ "normalized": false,
55
+ "rstrip": false,
56
+ "single_word": false
57
+ }
58
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,230 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[UNK]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "1": {
12
+ "content": "[PAD]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "2": {
20
+ "content": "[CLS]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "3": {
28
+ "content": "[SEP]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "4": {
36
+ "content": "[MASK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ },
43
+ "5": {
44
+ "content": "[TYPE]",
45
+ "lstrip": false,
46
+ "normalized": false,
47
+ "rstrip": false,
48
+ "single_word": false,
49
+ "special": true
50
+ },
51
+ "6": {
52
+ "content": "[TYPE_START]",
53
+ "lstrip": false,
54
+ "normalized": false,
55
+ "rstrip": false,
56
+ "single_word": false,
57
+ "special": true
58
+ },
59
+ "7": {
60
+ "content": "[TYPE_END]",
61
+ "lstrip": false,
62
+ "normalized": false,
63
+ "rstrip": false,
64
+ "single_word": false,
65
+ "special": true
66
+ },
67
+ "8": {
68
+ "content": "[TYPE_PAD]",
69
+ "lstrip": false,
70
+ "normalized": false,
71
+ "rstrip": false,
72
+ "single_word": false,
73
+ "special": true
74
+ },
75
+ "9": {
76
+ "content": "[VAR]",
77
+ "lstrip": false,
78
+ "normalized": false,
79
+ "rstrip": false,
80
+ "single_word": false,
81
+ "special": true
82
+ },
83
+ "10": {
84
+ "content": "[VAR_START]",
85
+ "lstrip": false,
86
+ "normalized": false,
87
+ "rstrip": false,
88
+ "single_word": false,
89
+ "special": true
90
+ },
91
+ "11": {
92
+ "content": "[VAR_END]",
93
+ "lstrip": false,
94
+ "normalized": false,
95
+ "rstrip": false,
96
+ "single_word": false,
97
+ "special": true
98
+ },
99
+ "12": {
100
+ "content": "[NAME_START]",
101
+ "lstrip": false,
102
+ "normalized": false,
103
+ "rstrip": false,
104
+ "single_word": false,
105
+ "special": true
106
+ },
107
+ "13": {
108
+ "content": "[NAME_END]",
109
+ "lstrip": false,
110
+ "normalized": false,
111
+ "rstrip": false,
112
+ "single_word": false,
113
+ "special": true
114
+ },
115
+ "14": {
116
+ "content": "[VAR_PAD]",
117
+ "lstrip": false,
118
+ "normalized": false,
119
+ "rstrip": false,
120
+ "single_word": false,
121
+ "special": true
122
+ },
123
+ "15": {
124
+ "content": "[NAME_PAD]",
125
+ "lstrip": false,
126
+ "normalized": false,
127
+ "rstrip": false,
128
+ "single_word": false,
129
+ "special": true
130
+ },
131
+ "16": {
132
+ "content": "[COMMIT_MSG]",
133
+ "lstrip": false,
134
+ "normalized": false,
135
+ "rstrip": false,
136
+ "single_word": false,
137
+ "special": true
138
+ },
139
+ "17": {
140
+ "content": "[COMMIT_MSG_START]",
141
+ "lstrip": false,
142
+ "normalized": false,
143
+ "rstrip": false,
144
+ "single_word": false,
145
+ "special": true
146
+ },
147
+ "18": {
148
+ "content": "[COMMIT_MSG_END]",
149
+ "lstrip": false,
150
+ "normalized": false,
151
+ "rstrip": false,
152
+ "single_word": false,
153
+ "special": true
154
+ },
155
+ "19": {
156
+ "content": "[COMMIT_MSG_PAD]",
157
+ "lstrip": false,
158
+ "normalized": false,
159
+ "rstrip": false,
160
+ "single_word": false,
161
+ "special": true
162
+ },
163
+ "20": {
164
+ "content": "[PATCH]",
165
+ "lstrip": false,
166
+ "normalized": false,
167
+ "rstrip": false,
168
+ "single_word": false,
169
+ "special": true
170
+ },
171
+ "21": {
172
+ "content": "[PATCH_START]",
173
+ "lstrip": false,
174
+ "normalized": false,
175
+ "rstrip": false,
176
+ "single_word": false,
177
+ "special": true
178
+ },
179
+ "22": {
180
+ "content": "[PATCH_END]",
181
+ "lstrip": false,
182
+ "normalized": false,
183
+ "rstrip": false,
184
+ "single_word": false,
185
+ "special": true
186
+ },
187
+ "23": {
188
+ "content": "[PATCH_PAD]",
189
+ "lstrip": false,
190
+ "normalized": false,
191
+ "rstrip": false,
192
+ "single_word": false,
193
+ "special": true
194
+ }
195
+ },
196
+ "additional_special_tokens": [
197
+ "[TYPE]",
198
+ "[TYPE_START]",
199
+ "[TYPE_END]",
200
+ "[TYPE_PAD]",
201
+ "[VAR]",
202
+ "[VAR_START]",
203
+ "[VAR_END]",
204
+ "[NAME_START]",
205
+ "[NAME_END]",
206
+ "[VAR_PAD]",
207
+ "[NAME_PAD]",
208
+ "[COMMIT_MSG]",
209
+ "[COMMIT_MSG_START]",
210
+ "[COMMIT_MSG_END]",
211
+ "[COMMIT_MSG_PAD]",
212
+ "[PATCH]",
213
+ "[PATCH_START]",
214
+ "[PATCH_END]",
215
+ "[PATCH_PAD]"
216
+ ],
217
+ "clean_up_tokenization_spaces": false,
218
+ "cls_token": "[CLS]",
219
+ "extra_special_tokens": {},
220
+ "mask_token": "[MASK]",
221
+ "model_input_names": [
222
+ "input_ids",
223
+ "attention_mask"
224
+ ],
225
+ "model_max_length": 1000000000000000019884624838656,
226
+ "pad_token": "[PAD]",
227
+ "sep_token": "[SEP]",
228
+ "tokenizer_class": "PreTrainedTokenizerFast",
229
+ "unk_token": "[UNK]"
230
+ }