Allanxu commited on
Commit
3d02714
·
verified ·
1 Parent(s): 37f0b0c

Upload folder using huggingface_hub

Browse files
config.json ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "RoFormerForMaskedLM"
4
+ ],
5
+ "attention_probs_dropout_prob": 0.1,
6
+ "embedding_size": 512,
7
+ "hidden_act": "gelu",
8
+ "hidden_dropout_prob": 0.1,
9
+ "hidden_size": 512,
10
+ "initializer_range": 0.02,
11
+ "intermediate_size": 2048,
12
+ "layer_norm_eps": 1e-12,
13
+ "max_position_embeddings": 514,
14
+ "model_type": "roformer",
15
+ "num_attention_heads": 8,
16
+ "num_hidden_layers": 8,
17
+ "output_attentions": true,
18
+ "pad_token_id": 0,
19
+ "rotary_value": true,
20
+ "torch_dtype": "float32",
21
+ "transformers_version": "4.45.2",
22
+ "type_vocab_size": 2,
23
+ "use_cache": true,
24
+ "vocab_size": 261
25
+ }
generation_config.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "output_attentions": true,
4
+ "pad_token_id": 0,
5
+ "return_dict_in_generate": true,
6
+ "transformers_version": "4.45.2"
7
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:04e8d9a54236d933fc1430a87ba2f020b5b4bb33d956960620a7031b1a3c3ff7
3
+ size 102622716
special_tokens_map.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": "[CLS]",
3
+ "mask_token": "[MASK]",
4
+ "pad_token": "[PAD]",
5
+ "sep_token": "[SEP]",
6
+ "unk_token": "[UNK]"
7
+ }
tokenizer.json ADDED
@@ -0,0 +1,411 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "version": "1.0",
3
+ "truncation": null,
4
+ "padding": null,
5
+ "added_tokens": [
6
+ {
7
+ "id": 0,
8
+ "content": "[PAD]",
9
+ "single_word": false,
10
+ "lstrip": false,
11
+ "rstrip": false,
12
+ "normalized": false,
13
+ "special": true
14
+ },
15
+ {
16
+ "id": 1,
17
+ "content": "[UNK]",
18
+ "single_word": false,
19
+ "lstrip": false,
20
+ "rstrip": false,
21
+ "normalized": false,
22
+ "special": true
23
+ },
24
+ {
25
+ "id": 2,
26
+ "content": "[CLS]",
27
+ "single_word": false,
28
+ "lstrip": false,
29
+ "rstrip": false,
30
+ "normalized": false,
31
+ "special": true
32
+ },
33
+ {
34
+ "id": 3,
35
+ "content": "[SEP]",
36
+ "single_word": false,
37
+ "lstrip": false,
38
+ "rstrip": false,
39
+ "normalized": false,
40
+ "special": true
41
+ },
42
+ {
43
+ "id": 4,
44
+ "content": "[MASK]",
45
+ "single_word": false,
46
+ "lstrip": false,
47
+ "rstrip": false,
48
+ "normalized": false,
49
+ "special": true
50
+ }
51
+ ],
52
+ "normalizer": {
53
+ "type": "BertNormalizer",
54
+ "clean_text": true,
55
+ "handle_chinese_chars": true,
56
+ "strip_accents": null,
57
+ "lowercase": false
58
+ },
59
+ "pre_tokenizer": {
60
+ "type": "BertPreTokenizer"
61
+ },
62
+ "post_processor": {
63
+ "type": "TemplateProcessing",
64
+ "single": [
65
+ {
66
+ "SpecialToken": {
67
+ "id": "[CLS]",
68
+ "type_id": 0
69
+ }
70
+ },
71
+ {
72
+ "Sequence": {
73
+ "id": "A",
74
+ "type_id": 0
75
+ }
76
+ },
77
+ {
78
+ "SpecialToken": {
79
+ "id": "[SEP]",
80
+ "type_id": 0
81
+ }
82
+ }
83
+ ],
84
+ "pair": [
85
+ {
86
+ "SpecialToken": {
87
+ "id": "[CLS]",
88
+ "type_id": 0
89
+ }
90
+ },
91
+ {
92
+ "Sequence": {
93
+ "id": "A",
94
+ "type_id": 0
95
+ }
96
+ },
97
+ {
98
+ "SpecialToken": {
99
+ "id": "[SEP]",
100
+ "type_id": 0
101
+ }
102
+ },
103
+ {
104
+ "Sequence": {
105
+ "id": "B",
106
+ "type_id": 1
107
+ }
108
+ },
109
+ {
110
+ "SpecialToken": {
111
+ "id": "[SEP]",
112
+ "type_id": 1
113
+ }
114
+ }
115
+ ],
116
+ "special_tokens": {
117
+ "[CLS]": {
118
+ "id": "[CLS]",
119
+ "ids": [
120
+ 2
121
+ ],
122
+ "tokens": [
123
+ "[CLS]"
124
+ ]
125
+ },
126
+ "[SEP]": {
127
+ "id": "[SEP]",
128
+ "ids": [
129
+ 3
130
+ ],
131
+ "tokens": [
132
+ "[SEP]"
133
+ ]
134
+ }
135
+ }
136
+ },
137
+ "decoder": {
138
+ "type": "WordPiece",
139
+ "prefix": "##",
140
+ "cleanup": true
141
+ },
142
+ "model": {
143
+ "type": "WordPiece",
144
+ "unk_token": "[UNK]",
145
+ "continuing_subword_prefix": "##",
146
+ "max_input_chars_per_word": 100,
147
+ "vocab": {
148
+ "[PAD]": 0,
149
+ "[UNK]": 1,
150
+ "[CLS]": 2,
151
+ "[SEP]": 3,
152
+ "[MASK]": 4,
153
+ "AAAA": 5,
154
+ "AAAT": 6,
155
+ "AAAC": 7,
156
+ "AAAG": 8,
157
+ "AATA": 9,
158
+ "AATT": 10,
159
+ "AATC": 11,
160
+ "AATG": 12,
161
+ "AACA": 13,
162
+ "AACT": 14,
163
+ "AACC": 15,
164
+ "AACG": 16,
165
+ "AAGA": 17,
166
+ "AAGT": 18,
167
+ "AAGC": 19,
168
+ "AAGG": 20,
169
+ "ATAA": 21,
170
+ "ATAT": 22,
171
+ "ATAC": 23,
172
+ "ATAG": 24,
173
+ "ATTA": 25,
174
+ "ATTT": 26,
175
+ "ATTC": 27,
176
+ "ATTG": 28,
177
+ "ATCA": 29,
178
+ "ATCT": 30,
179
+ "ATCC": 31,
180
+ "ATCG": 32,
181
+ "ATGA": 33,
182
+ "ATGT": 34,
183
+ "ATGC": 35,
184
+ "ATGG": 36,
185
+ "ACAA": 37,
186
+ "ACAT": 38,
187
+ "ACAC": 39,
188
+ "ACAG": 40,
189
+ "ACTA": 41,
190
+ "ACTT": 42,
191
+ "ACTC": 43,
192
+ "ACTG": 44,
193
+ "ACCA": 45,
194
+ "ACCT": 46,
195
+ "ACCC": 47,
196
+ "ACCG": 48,
197
+ "ACGA": 49,
198
+ "ACGT": 50,
199
+ "ACGC": 51,
200
+ "ACGG": 52,
201
+ "AGAA": 53,
202
+ "AGAT": 54,
203
+ "AGAC": 55,
204
+ "AGAG": 56,
205
+ "AGTA": 57,
206
+ "AGTT": 58,
207
+ "AGTC": 59,
208
+ "AGTG": 60,
209
+ "AGCA": 61,
210
+ "AGCT": 62,
211
+ "AGCC": 63,
212
+ "AGCG": 64,
213
+ "AGGA": 65,
214
+ "AGGT": 66,
215
+ "AGGC": 67,
216
+ "AGGG": 68,
217
+ "TAAA": 69,
218
+ "TAAT": 70,
219
+ "TAAC": 71,
220
+ "TAAG": 72,
221
+ "TATA": 73,
222
+ "TATT": 74,
223
+ "TATC": 75,
224
+ "TATG": 76,
225
+ "TACA": 77,
226
+ "TACT": 78,
227
+ "TACC": 79,
228
+ "TACG": 80,
229
+ "TAGA": 81,
230
+ "TAGT": 82,
231
+ "TAGC": 83,
232
+ "TAGG": 84,
233
+ "TTAA": 85,
234
+ "TTAT": 86,
235
+ "TTAC": 87,
236
+ "TTAG": 88,
237
+ "TTTA": 89,
238
+ "TTTT": 90,
239
+ "TTTC": 91,
240
+ "TTTG": 92,
241
+ "TTCA": 93,
242
+ "TTCT": 94,
243
+ "TTCC": 95,
244
+ "TTCG": 96,
245
+ "TTGA": 97,
246
+ "TTGT": 98,
247
+ "TTGC": 99,
248
+ "TTGG": 100,
249
+ "TCAA": 101,
250
+ "TCAT": 102,
251
+ "TCAC": 103,
252
+ "TCAG": 104,
253
+ "TCTA": 105,
254
+ "TCTT": 106,
255
+ "TCTC": 107,
256
+ "TCTG": 108,
257
+ "TCCA": 109,
258
+ "TCCT": 110,
259
+ "TCCC": 111,
260
+ "TCCG": 112,
261
+ "TCGA": 113,
262
+ "TCGT": 114,
263
+ "TCGC": 115,
264
+ "TCGG": 116,
265
+ "TGAA": 117,
266
+ "TGAT": 118,
267
+ "TGAC": 119,
268
+ "TGAG": 120,
269
+ "TGTA": 121,
270
+ "TGTT": 122,
271
+ "TGTC": 123,
272
+ "TGTG": 124,
273
+ "TGCA": 125,
274
+ "TGCT": 126,
275
+ "TGCC": 127,
276
+ "TGCG": 128,
277
+ "TGGA": 129,
278
+ "TGGT": 130,
279
+ "TGGC": 131,
280
+ "TGGG": 132,
281
+ "CAAA": 133,
282
+ "CAAT": 134,
283
+ "CAAC": 135,
284
+ "CAAG": 136,
285
+ "CATA": 137,
286
+ "CATT": 138,
287
+ "CATC": 139,
288
+ "CATG": 140,
289
+ "CACA": 141,
290
+ "CACT": 142,
291
+ "CACC": 143,
292
+ "CACG": 144,
293
+ "CAGA": 145,
294
+ "CAGT": 146,
295
+ "CAGC": 147,
296
+ "CAGG": 148,
297
+ "CTAA": 149,
298
+ "CTAT": 150,
299
+ "CTAC": 151,
300
+ "CTAG": 152,
301
+ "CTTA": 153,
302
+ "CTTT": 154,
303
+ "CTTC": 155,
304
+ "CTTG": 156,
305
+ "CTCA": 157,
306
+ "CTCT": 158,
307
+ "CTCC": 159,
308
+ "CTCG": 160,
309
+ "CTGA": 161,
310
+ "CTGT": 162,
311
+ "CTGC": 163,
312
+ "CTGG": 164,
313
+ "CCAA": 165,
314
+ "CCAT": 166,
315
+ "CCAC": 167,
316
+ "CCAG": 168,
317
+ "CCTA": 169,
318
+ "CCTT": 170,
319
+ "CCTC": 171,
320
+ "CCTG": 172,
321
+ "CCCA": 173,
322
+ "CCCT": 174,
323
+ "CCCC": 175,
324
+ "CCCG": 176,
325
+ "CCGA": 177,
326
+ "CCGT": 178,
327
+ "CCGC": 179,
328
+ "CCGG": 180,
329
+ "CGAA": 181,
330
+ "CGAT": 182,
331
+ "CGAC": 183,
332
+ "CGAG": 184,
333
+ "CGTA": 185,
334
+ "CGTT": 186,
335
+ "CGTC": 187,
336
+ "CGTG": 188,
337
+ "CGCA": 189,
338
+ "CGCT": 190,
339
+ "CGCC": 191,
340
+ "CGCG": 192,
341
+ "CGGA": 193,
342
+ "CGGT": 194,
343
+ "CGGC": 195,
344
+ "CGGG": 196,
345
+ "GAAA": 197,
346
+ "GAAT": 198,
347
+ "GAAC": 199,
348
+ "GAAG": 200,
349
+ "GATA": 201,
350
+ "GATT": 202,
351
+ "GATC": 203,
352
+ "GATG": 204,
353
+ "GACA": 205,
354
+ "GACT": 206,
355
+ "GACC": 207,
356
+ "GACG": 208,
357
+ "GAGA": 209,
358
+ "GAGT": 210,
359
+ "GAGC": 211,
360
+ "GAGG": 212,
361
+ "GTAA": 213,
362
+ "GTAT": 214,
363
+ "GTAC": 215,
364
+ "GTAG": 216,
365
+ "GTTA": 217,
366
+ "GTTT": 218,
367
+ "GTTC": 219,
368
+ "GTTG": 220,
369
+ "GTCA": 221,
370
+ "GTCT": 222,
371
+ "GTCC": 223,
372
+ "GTCG": 224,
373
+ "GTGA": 225,
374
+ "GTGT": 226,
375
+ "GTGC": 227,
376
+ "GTGG": 228,
377
+ "GCAA": 229,
378
+ "GCAT": 230,
379
+ "GCAC": 231,
380
+ "GCAG": 232,
381
+ "GCTA": 233,
382
+ "GCTT": 234,
383
+ "GCTC": 235,
384
+ "GCTG": 236,
385
+ "GCCA": 237,
386
+ "GCCT": 238,
387
+ "GCCC": 239,
388
+ "GCCG": 240,
389
+ "GCGA": 241,
390
+ "GCGT": 242,
391
+ "GCGC": 243,
392
+ "GCGG": 244,
393
+ "GGAA": 245,
394
+ "GGAT": 246,
395
+ "GGAC": 247,
396
+ "GGAG": 248,
397
+ "GGTA": 249,
398
+ "GGTT": 250,
399
+ "GGTC": 251,
400
+ "GGTG": 252,
401
+ "GGCA": 253,
402
+ "GGCT": 254,
403
+ "GGCC": 255,
404
+ "GGCG": 256,
405
+ "GGGA": 257,
406
+ "GGGT": 258,
407
+ "GGGC": 259,
408
+ "GGGG": 260
409
+ }
410
+ }
411
+ }
tokenizer_config.json ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[PAD]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "1": {
12
+ "content": "[UNK]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "2": {
20
+ "content": "[CLS]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "3": {
28
+ "content": "[SEP]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "4": {
36
+ "content": "[MASK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "clean_up_tokenization_spaces": true,
45
+ "cls_token": "[CLS]",
46
+ "do_basic_tokenize": true,
47
+ "do_lower_case": false,
48
+ "mask_token": "[MASK]",
49
+ "model_max_length": 514,
50
+ "never_split": null,
51
+ "pad_token": "[PAD]",
52
+ "sep_token": "[SEP]",
53
+ "strip_accents": null,
54
+ "tokenize_chinese_chars": true,
55
+ "tokenizer_class": "BertTokenizer",
56
+ "unk_token": "[UNK]"
57
+ }