iolimat482 commited on
Commit
c44639c
·
verified ·
1 Parent(s): 39a316d

Training in progress, epoch 1

Browse files
config.json ADDED
@@ -0,0 +1,139 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "answerdotai/ModernBERT-base",
3
+ "architectures": [
4
+ "ModernBertForSequenceClassification"
5
+ ],
6
+ "attention_bias": false,
7
+ "attention_dropout": 0.0,
8
+ "bos_token_id": 50281,
9
+ "classifier_activation": "gelu",
10
+ "classifier_bias": false,
11
+ "classifier_dropout": 0.0,
12
+ "classifier_pooling": "mean",
13
+ "cls_token_id": 50281,
14
+ "decoder_bias": true,
15
+ "deterministic_flash_attn": false,
16
+ "embedding_dropout": 0.0,
17
+ "eos_token_id": 50282,
18
+ "global_attn_every_n_layers": 3,
19
+ "global_rope_theta": 160000.0,
20
+ "gradient_checkpointing": false,
21
+ "hidden_activation": "gelu",
22
+ "hidden_size": 768,
23
+ "id2label": {
24
+ "0": "1.OA.A.1",
25
+ "1": "1.OA.A.2",
26
+ "10": "3.NBT.A.2",
27
+ "11": "3.OA.A.3",
28
+ "12": "3.OA.A.4",
29
+ "13": "3.OA.C.7",
30
+ "14": "3.OA.D.8",
31
+ "15": "4.MD.A.2",
32
+ "16": "4.MD.A.3",
33
+ "17": "4.NBT.B.4",
34
+ "18": "4.NBT.B.5",
35
+ "19": "4.NBT.B.6",
36
+ "2": "1.OA.D.8",
37
+ "20": "4.NF.A.2",
38
+ "21": "4.OA.A.3",
39
+ "22": "4.OA.B.4",
40
+ "23": "5.NBT.B.5",
41
+ "24": "5.NBT.B.6",
42
+ "25": "5.NBT.B.7",
43
+ "26": "5.NF.A.1",
44
+ "27": "5.NF.A.2",
45
+ "28": "5.NF.B.4",
46
+ "29": "5.OA.A.1",
47
+ "3": "2.MD.B.5",
48
+ "30": "6.EE.A.1",
49
+ "31": "6.EE.B.7",
50
+ "32": "6.NS.B.2",
51
+ "33": "6.NS.B.3",
52
+ "34": "7.NS.A.1",
53
+ "35": "7.NS.A.2",
54
+ "36": "7.NS.A.3",
55
+ "37": "8.EE.A.2",
56
+ "38": "8.EE.C.7",
57
+ "39": "8.EE.C.8",
58
+ "4": "2.MD.C.8",
59
+ "40": "K.CC.C.7",
60
+ "41": "K.NBT.A.1",
61
+ "42": "K.OA.A.4",
62
+ "43": "K.OA.A.5",
63
+ "5": "2.NBT.B.5",
64
+ "6": "2.NBT.B.6",
65
+ "7": "2.NBT.B.7",
66
+ "8": "2.OA.A.1",
67
+ "9": "3.MD.D.8"
68
+ },
69
+ "initializer_cutoff_factor": 2.0,
70
+ "initializer_range": 0.02,
71
+ "intermediate_size": 1152,
72
+ "label2id": {
73
+ "1.OA.A.1": "0",
74
+ "1.OA.A.2": "1",
75
+ "1.OA.D.8": "2",
76
+ "2.MD.B.5": "3",
77
+ "2.MD.C.8": "4",
78
+ "2.NBT.B.5": "5",
79
+ "2.NBT.B.6": "6",
80
+ "2.NBT.B.7": "7",
81
+ "2.OA.A.1": "8",
82
+ "3.MD.D.8": "9",
83
+ "3.NBT.A.2": "10",
84
+ "3.OA.A.3": "11",
85
+ "3.OA.A.4": "12",
86
+ "3.OA.C.7": "13",
87
+ "3.OA.D.8": "14",
88
+ "4.MD.A.2": "15",
89
+ "4.MD.A.3": "16",
90
+ "4.NBT.B.4": "17",
91
+ "4.NBT.B.5": "18",
92
+ "4.NBT.B.6": "19",
93
+ "4.NF.A.2": "20",
94
+ "4.OA.A.3": "21",
95
+ "4.OA.B.4": "22",
96
+ "5.NBT.B.5": "23",
97
+ "5.NBT.B.6": "24",
98
+ "5.NBT.B.7": "25",
99
+ "5.NF.A.1": "26",
100
+ "5.NF.A.2": "27",
101
+ "5.NF.B.4": "28",
102
+ "5.OA.A.1": "29",
103
+ "6.EE.A.1": "30",
104
+ "6.EE.B.7": "31",
105
+ "6.NS.B.2": "32",
106
+ "6.NS.B.3": "33",
107
+ "7.NS.A.1": "34",
108
+ "7.NS.A.2": "35",
109
+ "7.NS.A.3": "36",
110
+ "8.EE.A.2": "37",
111
+ "8.EE.C.7": "38",
112
+ "8.EE.C.8": "39",
113
+ "K.CC.C.7": "40",
114
+ "K.NBT.A.1": "41",
115
+ "K.OA.A.4": "42",
116
+ "K.OA.A.5": "43"
117
+ },
118
+ "layer_norm_eps": 1e-05,
119
+ "local_attention": 128,
120
+ "local_rope_theta": 10000.0,
121
+ "max_position_embeddings": 8192,
122
+ "mlp_bias": false,
123
+ "mlp_dropout": 0.0,
124
+ "model_type": "modernbert",
125
+ "norm_bias": false,
126
+ "norm_eps": 1e-05,
127
+ "num_attention_heads": 12,
128
+ "num_hidden_layers": 22,
129
+ "pad_token_id": 50283,
130
+ "position_embedding_type": "absolute",
131
+ "problem_type": "single_label_classification",
132
+ "reference_compile": true,
133
+ "sep_token_id": 50282,
134
+ "sparse_pred_ignore_index": -100,
135
+ "sparse_prediction": false,
136
+ "torch_dtype": "bfloat16",
137
+ "transformers_version": "4.48.0.dev0",
138
+ "vocab_size": 50368
139
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b6d065923597987c530134e22ad2cc6f678d214b2bf37846ca63ed1fbc3c33cc
3
+ size 299291688
runs/Jan09_07-45-57_ip-10-192-12-117/events.out.tfevents.1736408758.ip-10-192-12-117.6654.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ed3e1d9cca40a565dc29f64596f34f3d4dad348e93fe3fe3f2d058b0e65e97a7
3
+ size 7725
runs/Jan09_07-48-53_ip-10-192-12-117/events.out.tfevents.1736408934.ip-10-192-12-117.12854.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d26f6b2dc21a2fd32814ad14c0c749dbc0bb35f35434eb0f6c8235701ce7f3a5
3
+ size 7725
runs/Jan09_08-03-12_ip-10-192-12-117/events.out.tfevents.1736409792.ip-10-192-12-117.30120.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d98e0beb19e285e239c4ca9fbb02998d7bf35d75d79301a5c10f393195ccda0b
3
+ size 7725
runs/Jan09_08-03-38_ip-10-192-12-117/events.out.tfevents.1736409818.ip-10-192-12-117.30903.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f55a7c4d0e47c94b8022df9ae41d7d44f78bfd7576fff0334dad570a7a9e41bd
3
+ size 8243
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c8c93029b0723ef784cabdbdd7655c3d33bb7ef4720c38b9e410efbdf59e7c32
3
+ size 5432