Rypsor commited on
Commit
7696981
·
verified ·
1 Parent(s): 2ea8272
config.json ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "BertForSequenceClassification"
4
+ ],
5
+ "attention_probs_dropout_prob": 0.1,
6
+ "classifier_dropout": null,
7
+ "hidden_act": "gelu",
8
+ "hidden_dropout_prob": 0.1,
9
+ "hidden_size": 768,
10
+ "id2label": {
11
+ "0": "cardiovascular",
12
+ "1": "hepatorenal",
13
+ "2": "neurological",
14
+ "3": "oncological"
15
+ },
16
+ "initializer_range": 0.02,
17
+ "intermediate_size": 3072,
18
+ "label2id": {
19
+ "cardiovascular": 0,
20
+ "hepatorenal": 1,
21
+ "neurological": 2,
22
+ "oncological": 3
23
+ },
24
+ "layer_norm_eps": 1e-12,
25
+ "max_position_embeddings": 512,
26
+ "model_type": "bert",
27
+ "num_attention_heads": 12,
28
+ "num_hidden_layers": 12,
29
+ "pad_token_id": 0,
30
+ "position_embedding_type": "absolute",
31
+ "problem_type": "multi_label_classification",
32
+ "torch_dtype": "float32",
33
+ "transformers_version": "4.52.4",
34
+ "type_vocab_size": 2,
35
+ "use_cache": true,
36
+ "vocab_size": 31090
37
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0e26268954bd9906cd275473e97d8c93e2c09d0825bb294150faea71fd9c26ac
3
+ size 439709696
special_tokens_map.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": "[CLS]",
3
+ "mask_token": "[MASK]",
4
+ "pad_token": "[PAD]",
5
+ "sep_token": "[SEP]",
6
+ "unk_token": "[UNK]"
7
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[PAD]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "101": {
12
+ "content": "[UNK]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "102": {
20
+ "content": "[CLS]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "103": {
28
+ "content": "[SEP]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "104": {
36
+ "content": "[MASK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "clean_up_tokenization_spaces": true,
45
+ "cls_token": "[CLS]",
46
+ "do_basic_tokenize": true,
47
+ "do_lower_case": true,
48
+ "extra_special_tokens": {},
49
+ "mask_token": "[MASK]",
50
+ "model_max_length": 1000000000000000019884624838656,
51
+ "never_split": null,
52
+ "pad_token": "[PAD]",
53
+ "sep_token": "[SEP]",
54
+ "strip_accents": null,
55
+ "tokenize_chinese_chars": true,
56
+ "tokenizer_class": "BertTokenizer",
57
+ "unk_token": "[UNK]"
58
+ }
trainer_state.json ADDED
@@ -0,0 +1,274 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": 1092,
3
+ "best_metric": 0.9566188454020108,
4
+ "best_model_checkpoint": "/kaggle/working/hpo_scibert_uncased/trial_6/checkpoint-1092",
5
+ "epoch": 7.0,
6
+ "eval_steps": 500,
7
+ "global_step": 1092,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 0.32051282051282054,
14
+ "grad_norm": 121545.390625,
15
+ "learning_rate": 4.628257110973899e-05,
16
+ "loss": 0.6602,
17
+ "step": 50
18
+ },
19
+ {
20
+ "epoch": 0.6410256410256411,
21
+ "grad_norm": 165566.34375,
22
+ "learning_rate": 4.406384766248401e-05,
23
+ "loss": 0.2825,
24
+ "step": 100
25
+ },
26
+ {
27
+ "epoch": 0.9615384615384616,
28
+ "grad_norm": 183398.71875,
29
+ "learning_rate": 4.184512421522902e-05,
30
+ "loss": 0.2105,
31
+ "step": 150
32
+ },
33
+ {
34
+ "epoch": 1.0,
35
+ "eval_f1_macro": 0.9405513571748276,
36
+ "eval_f1_micro": 0.9374337221633087,
37
+ "eval_f1_weighted": 0.9373994117527249,
38
+ "eval_loss": 0.19546222686767578,
39
+ "eval_roc_auc_macro": 0.9773648844952434,
40
+ "eval_runtime": 6.6101,
41
+ "eval_samples_per_second": 54.008,
42
+ "eval_steps_per_second": 0.908,
43
+ "step": 156
44
+ },
45
+ {
46
+ "epoch": 1.282051282051282,
47
+ "grad_norm": 119565.84375,
48
+ "learning_rate": 3.962640076797404e-05,
49
+ "loss": 0.1367,
50
+ "step": 200
51
+ },
52
+ {
53
+ "epoch": 1.6025641025641026,
54
+ "grad_norm": 162723.625,
55
+ "learning_rate": 3.740767732071905e-05,
56
+ "loss": 0.1247,
57
+ "step": 250
58
+ },
59
+ {
60
+ "epoch": 1.9230769230769231,
61
+ "grad_norm": 39087.80859375,
62
+ "learning_rate": 3.5188953873464064e-05,
63
+ "loss": 0.1182,
64
+ "step": 300
65
+ },
66
+ {
67
+ "epoch": 2.0,
68
+ "eval_f1_macro": 0.9436427931387608,
69
+ "eval_f1_micro": 0.94375,
70
+ "eval_f1_weighted": 0.9438667437399432,
71
+ "eval_loss": 0.16677653789520264,
72
+ "eval_roc_auc_macro": 0.9820117282613596,
73
+ "eval_runtime": 6.6121,
74
+ "eval_samples_per_second": 53.992,
75
+ "eval_steps_per_second": 0.907,
76
+ "step": 312
77
+ },
78
+ {
79
+ "epoch": 2.2435897435897436,
80
+ "grad_norm": 125978.8359375,
81
+ "learning_rate": 3.297023042620908e-05,
82
+ "loss": 0.0894,
83
+ "step": 350
84
+ },
85
+ {
86
+ "epoch": 2.564102564102564,
87
+ "grad_norm": 264165.40625,
88
+ "learning_rate": 3.0751506978954095e-05,
89
+ "loss": 0.0687,
90
+ "step": 400
91
+ },
92
+ {
93
+ "epoch": 2.8846153846153846,
94
+ "grad_norm": 68308.296875,
95
+ "learning_rate": 2.853278353169911e-05,
96
+ "loss": 0.0651,
97
+ "step": 450
98
+ },
99
+ {
100
+ "epoch": 3.0,
101
+ "eval_f1_macro": 0.9445835024754138,
102
+ "eval_f1_micro": 0.948024948024948,
103
+ "eval_f1_weighted": 0.9482797012589066,
104
+ "eval_loss": 0.15806053578853607,
105
+ "eval_roc_auc_macro": 0.9879374351632613,
106
+ "eval_runtime": 6.6786,
107
+ "eval_samples_per_second": 53.454,
108
+ "eval_steps_per_second": 0.898,
109
+ "step": 468
110
+ },
111
+ {
112
+ "epoch": 3.2051282051282053,
113
+ "grad_norm": 15062.0087890625,
114
+ "learning_rate": 2.6314060084444123e-05,
115
+ "loss": 0.0452,
116
+ "step": 500
117
+ },
118
+ {
119
+ "epoch": 3.5256410256410255,
120
+ "grad_norm": 10435.2138671875,
121
+ "learning_rate": 2.409533663718914e-05,
122
+ "loss": 0.0429,
123
+ "step": 550
124
+ },
125
+ {
126
+ "epoch": 3.8461538461538463,
127
+ "grad_norm": 240173.984375,
128
+ "learning_rate": 2.1876613189934154e-05,
129
+ "loss": 0.0495,
130
+ "step": 600
131
+ },
132
+ {
133
+ "epoch": 4.0,
134
+ "eval_f1_macro": 0.9468900491419501,
135
+ "eval_f1_micro": 0.9495798319327731,
136
+ "eval_f1_weighted": 0.9496287427368676,
137
+ "eval_loss": 0.19388458132743835,
138
+ "eval_roc_auc_macro": 0.9843316823250352,
139
+ "eval_runtime": 6.5837,
140
+ "eval_samples_per_second": 54.225,
141
+ "eval_steps_per_second": 0.911,
142
+ "step": 624
143
+ },
144
+ {
145
+ "epoch": 4.166666666666667,
146
+ "grad_norm": 3220.84521484375,
147
+ "learning_rate": 1.965788974267917e-05,
148
+ "loss": 0.0278,
149
+ "step": 650
150
+ },
151
+ {
152
+ "epoch": 4.487179487179487,
153
+ "grad_norm": 77989.6796875,
154
+ "learning_rate": 1.7439166295424185e-05,
155
+ "loss": 0.0328,
156
+ "step": 700
157
+ },
158
+ {
159
+ "epoch": 4.8076923076923075,
160
+ "grad_norm": 17635.943359375,
161
+ "learning_rate": 1.5220442848169199e-05,
162
+ "loss": 0.0194,
163
+ "step": 750
164
+ },
165
+ {
166
+ "epoch": 5.0,
167
+ "eval_f1_macro": 0.9542678396871944,
168
+ "eval_f1_micro": 0.9544513457556936,
169
+ "eval_f1_weighted": 0.954747368183765,
170
+ "eval_loss": 0.17806702852249146,
171
+ "eval_roc_auc_macro": 0.9850715189437066,
172
+ "eval_runtime": 6.6153,
173
+ "eval_samples_per_second": 53.966,
174
+ "eval_steps_per_second": 0.907,
175
+ "step": 780
176
+ },
177
+ {
178
+ "epoch": 5.128205128205128,
179
+ "grad_norm": 7326.7744140625,
180
+ "learning_rate": 1.3001719400914214e-05,
181
+ "loss": 0.0209,
182
+ "step": 800
183
+ },
184
+ {
185
+ "epoch": 5.448717948717949,
186
+ "grad_norm": 42033.6640625,
187
+ "learning_rate": 1.0782995953659228e-05,
188
+ "loss": 0.0112,
189
+ "step": 850
190
+ },
191
+ {
192
+ "epoch": 5.769230769230769,
193
+ "grad_norm": 20092.216796875,
194
+ "learning_rate": 8.564272506404243e-06,
195
+ "loss": 0.0179,
196
+ "step": 900
197
+ },
198
+ {
199
+ "epoch": 6.0,
200
+ "eval_f1_macro": 0.9521823752294121,
201
+ "eval_f1_micro": 0.955067920585162,
202
+ "eval_f1_weighted": 0.9553084318062802,
203
+ "eval_loss": 0.22233474254608154,
204
+ "eval_roc_auc_macro": 0.9832222195348375,
205
+ "eval_runtime": 6.606,
206
+ "eval_samples_per_second": 54.042,
207
+ "eval_steps_per_second": 0.908,
208
+ "step": 936
209
+ },
210
+ {
211
+ "epoch": 6.089743589743589,
212
+ "grad_norm": 630.0228881835938,
213
+ "learning_rate": 6.345549059149257e-06,
214
+ "loss": 0.0115,
215
+ "step": 950
216
+ },
217
+ {
218
+ "epoch": 6.410256410256411,
219
+ "grad_norm": 469.14447021484375,
220
+ "learning_rate": 4.126825611894273e-06,
221
+ "loss": 0.005,
222
+ "step": 1000
223
+ },
224
+ {
225
+ "epoch": 6.730769230769231,
226
+ "grad_norm": 564.0679931640625,
227
+ "learning_rate": 1.9081021646392873e-06,
228
+ "loss": 0.0125,
229
+ "step": 1050
230
+ },
231
+ {
232
+ "epoch": 7.0,
233
+ "eval_f1_macro": 0.9548856387728133,
234
+ "eval_f1_micro": 0.9563409563409563,
235
+ "eval_f1_weighted": 0.9566188454020108,
236
+ "eval_loss": 0.2219453752040863,
237
+ "eval_roc_auc_macro": 0.9854251710108592,
238
+ "eval_runtime": 6.6485,
239
+ "eval_samples_per_second": 53.697,
240
+ "eval_steps_per_second": 0.902,
241
+ "step": 1092
242
+ }
243
+ ],
244
+ "logging_steps": 50,
245
+ "max_steps": 1092,
246
+ "num_input_tokens_seen": 0,
247
+ "num_train_epochs": 7,
248
+ "save_steps": 500,
249
+ "stateful_callbacks": {
250
+ "EarlyStoppingCallback": {
251
+ "args": {
252
+ "early_stopping_patience": 1,
253
+ "early_stopping_threshold": 0.0
254
+ },
255
+ "attributes": {
256
+ "early_stopping_patience_counter": 0
257
+ }
258
+ },
259
+ "TrainerControl": {
260
+ "args": {
261
+ "should_epoch_stop": false,
262
+ "should_evaluate": false,
263
+ "should_log": false,
264
+ "should_save": true,
265
+ "should_training_stop": true
266
+ },
267
+ "attributes": {}
268
+ }
269
+ },
270
+ "total_flos": 9155639800000512.0,
271
+ "train_batch_size": 32,
272
+ "trial_name": null,
273
+ "trial_params": null
274
+ }
vocab.txt ADDED
The diff for this file is too large to render. See raw diff