permutans commited on
Commit
32c2c50
·
verified ·
1 Parent(s): 7004a5f

Upload folder using huggingface_hub

Browse files
Files changed (4) hide show
  1. config.json +66 -18
  2. model.safetensors +2 -2
  3. tokenizer.json +0 -0
  4. tokenizer_config.json +7 -5
config.json CHANGED
@@ -1,16 +1,23 @@
1
  {
2
- "add_cross_attention": false,
3
  "architectures": [
4
- "BertForSequenceClassification"
5
  ],
6
- "attention_probs_dropout_prob": 0.1,
7
- "bos_token_id": null,
8
- "classifier_dropout": null,
 
 
 
 
 
 
 
9
  "dtype": "float32",
10
- "eos_token_id": null,
 
 
11
  "gradient_checkpointing": false,
12
- "hidden_act": "gelu",
13
- "hidden_dropout_prob": 0.1,
14
  "hidden_size": 768,
15
  "id2label": {
16
  "0": "LABEL_0",
@@ -32,9 +39,9 @@
32
  "16": "LABEL_16",
33
  "17": "LABEL_17"
34
  },
 
35
  "initializer_range": 0.02,
36
- "intermediate_size": 3072,
37
- "is_decoder": false,
38
  "label2id": {
39
  "LABEL_0": 0,
40
  "LABEL_1": 1,
@@ -55,16 +62,57 @@
55
  "LABEL_8": 8,
56
  "LABEL_9": 9
57
  },
58
- "layer_norm_eps": 1e-12,
59
- "max_position_embeddings": 512,
60
- "model_type": "bert",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
61
  "num_attention_heads": 12,
62
- "num_hidden_layers": 12,
63
- "pad_token_id": 0,
64
  "position_embedding_type": "absolute",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
65
  "tie_word_embeddings": true,
66
  "transformers_version": "5.0.0",
67
- "type_vocab_size": 2,
68
- "use_cache": true,
69
- "vocab_size": 30522
70
  }
 
1
  {
 
2
  "architectures": [
3
+ "ModernBertForSequenceClassification"
4
  ],
5
+ "attention_bias": false,
6
+ "attention_dropout": 0.0,
7
+ "bos_token_id": 50281,
8
+ "classifier_activation": "gelu",
9
+ "classifier_bias": false,
10
+ "classifier_dropout": 0.0,
11
+ "classifier_pooling": "mean",
12
+ "cls_token_id": 50281,
13
+ "decoder_bias": true,
14
+ "deterministic_flash_attn": false,
15
  "dtype": "float32",
16
+ "embedding_dropout": 0.0,
17
+ "eos_token_id": 50282,
18
+ "global_attn_every_n_layers": 3,
19
  "gradient_checkpointing": false,
20
+ "hidden_activation": "gelu",
 
21
  "hidden_size": 768,
22
  "id2label": {
23
  "0": "LABEL_0",
 
39
  "16": "LABEL_16",
40
  "17": "LABEL_17"
41
  },
42
+ "initializer_cutoff_factor": 2.0,
43
  "initializer_range": 0.02,
44
+ "intermediate_size": 1152,
 
45
  "label2id": {
46
  "LABEL_0": 0,
47
  "LABEL_1": 1,
 
62
  "LABEL_8": 8,
63
  "LABEL_9": 9
64
  },
65
+ "layer_norm_eps": 1e-05,
66
+ "layer_types": [
67
+ "full_attention",
68
+ "sliding_attention",
69
+ "sliding_attention",
70
+ "full_attention",
71
+ "sliding_attention",
72
+ "sliding_attention",
73
+ "full_attention",
74
+ "sliding_attention",
75
+ "sliding_attention",
76
+ "full_attention",
77
+ "sliding_attention",
78
+ "sliding_attention",
79
+ "full_attention",
80
+ "sliding_attention",
81
+ "sliding_attention",
82
+ "full_attention",
83
+ "sliding_attention",
84
+ "sliding_attention",
85
+ "full_attention",
86
+ "sliding_attention",
87
+ "sliding_attention",
88
+ "full_attention"
89
+ ],
90
+ "local_attention": 128,
91
+ "max_position_embeddings": 8192,
92
+ "mlp_bias": false,
93
+ "mlp_dropout": 0.0,
94
+ "model_type": "modernbert",
95
+ "norm_bias": false,
96
+ "norm_eps": 1e-05,
97
  "num_attention_heads": 12,
98
+ "num_hidden_layers": 22,
99
+ "pad_token_id": 50283,
100
  "position_embedding_type": "absolute",
101
+ "repad_logits_with_grad": false,
102
+ "rope_parameters": {
103
+ "full_attention": {
104
+ "rope_theta": 160000.0,
105
+ "rope_type": "default"
106
+ },
107
+ "sliding_attention": {
108
+ "rope_theta": 10000.0,
109
+ "rope_type": "default"
110
+ }
111
+ },
112
+ "sep_token_id": 50282,
113
+ "sparse_pred_ignore_index": -100,
114
+ "sparse_prediction": false,
115
  "tie_word_embeddings": true,
116
  "transformers_version": "5.0.0",
117
+ "vocab_size": 50368
 
 
118
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c5ff8e76f6215435a90012ec97f2922326711d0a074051bfa2cd8cdb6bafbad0
3
- size 780114704
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bed6fd51b2762d3399f60d589ebd1c4c59bc05293f60b94d973a4908f3512d9b
3
+ size 1039686728
tokenizer.json CHANGED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json CHANGED
@@ -1,14 +1,16 @@
1
  {
2
  "backend": "tokenizers",
 
3
  "cls_token": "[CLS]",
4
- "do_lower_case": true,
5
  "is_local": false,
6
  "mask_token": "[MASK]",
7
- "model_max_length": 512,
 
 
 
 
8
  "pad_token": "[PAD]",
9
  "sep_token": "[SEP]",
10
- "strip_accents": null,
11
- "tokenize_chinese_chars": true,
12
- "tokenizer_class": "BertTokenizer",
13
  "unk_token": "[UNK]"
14
  }
 
1
  {
2
  "backend": "tokenizers",
3
+ "clean_up_tokenization_spaces": true,
4
  "cls_token": "[CLS]",
 
5
  "is_local": false,
6
  "mask_token": "[MASK]",
7
+ "model_input_names": [
8
+ "input_ids",
9
+ "attention_mask"
10
+ ],
11
+ "model_max_length": 8192,
12
  "pad_token": "[PAD]",
13
  "sep_token": "[SEP]",
14
+ "tokenizer_class": "TokenizersBackend",
 
 
15
  "unk_token": "[UNK]"
16
  }