dlsmallw commited on
Commit
302654d
·
verified ·
1 Parent(s): abe1854

Delete checkpoint-2500

Browse files
checkpoint-2500/config.json DELETED
@@ -1,42 +0,0 @@
1
- {
2
- "architectures": [
3
- "BertForSequenceClassification"
4
- ],
5
- "attention_probs_dropout_prob": 0.1,
6
- "classifier_dropout": null,
7
- "gradient_checkpointing": false,
8
- "hidden_act": "gelu",
9
- "hidden_dropout_prob": 0.1,
10
- "hidden_size": 768,
11
- "id2label": {
12
- "0": "LABEL_0",
13
- "1": "LABEL_1",
14
- "2": "LABEL_2",
15
- "3": "LABEL_3",
16
- "4": "LABEL_4",
17
- "5": "LABEL_5"
18
- },
19
- "initializer_range": 0.02,
20
- "intermediate_size": 3072,
21
- "label2id": {
22
- "LABEL_0": 0,
23
- "LABEL_1": 1,
24
- "LABEL_2": 2,
25
- "LABEL_3": 3,
26
- "LABEL_4": 4,
27
- "LABEL_5": 5
28
- },
29
- "layer_norm_eps": 1e-12,
30
- "max_position_embeddings": 512,
31
- "model_type": "bert",
32
- "num_attention_heads": 12,
33
- "num_hidden_layers": 12,
34
- "pad_token_id": 0,
35
- "position_embedding_type": "absolute",
36
- "problem_type": "multi_label_classification",
37
- "torch_dtype": "float32",
38
- "transformers_version": "4.50.2",
39
- "type_vocab_size": 2,
40
- "use_cache": true,
41
- "vocab_size": 30522
42
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
checkpoint-2500/model.safetensors DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:483e74f6a884462e3d8482e4c0ed5116900e5000c6164d9da5b1745dd399fb95
3
- size 437970952
 
 
 
 
checkpoint-2500/optimizer.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:3f62a2936594740dae41575092b145a7f7507c978e24206568bfa02cb1871b98
3
- size 876057338
 
 
 
 
checkpoint-2500/rng_state.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:1b68a4cc1137de98be5b543bf8df031d2d231b841376ad9d88990abda4f842d6
3
- size 13990
 
 
 
 
checkpoint-2500/scheduler.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:1705a7c11aa908addc84d5fd7ef2c7e81922a331e53011688220f858f888dc32
3
- size 1064
 
 
 
 
checkpoint-2500/special_tokens_map.json DELETED
@@ -1,7 +0,0 @@
1
- {
2
- "cls_token": "[CLS]",
3
- "mask_token": "[MASK]",
4
- "pad_token": "[PAD]",
5
- "sep_token": "[SEP]",
6
- "unk_token": "[UNK]"
7
- }
 
 
 
 
 
 
 
 
checkpoint-2500/tokenizer.json DELETED
The diff for this file is too large to render. See raw diff
 
checkpoint-2500/tokenizer_config.json DELETED
@@ -1,56 +0,0 @@
1
- {
2
- "added_tokens_decoder": {
3
- "0": {
4
- "content": "[PAD]",
5
- "lstrip": false,
6
- "normalized": false,
7
- "rstrip": false,
8
- "single_word": false,
9
- "special": true
10
- },
11
- "100": {
12
- "content": "[UNK]",
13
- "lstrip": false,
14
- "normalized": false,
15
- "rstrip": false,
16
- "single_word": false,
17
- "special": true
18
- },
19
- "101": {
20
- "content": "[CLS]",
21
- "lstrip": false,
22
- "normalized": false,
23
- "rstrip": false,
24
- "single_word": false,
25
- "special": true
26
- },
27
- "102": {
28
- "content": "[SEP]",
29
- "lstrip": false,
30
- "normalized": false,
31
- "rstrip": false,
32
- "single_word": false,
33
- "special": true
34
- },
35
- "103": {
36
- "content": "[MASK]",
37
- "lstrip": false,
38
- "normalized": false,
39
- "rstrip": false,
40
- "single_word": false,
41
- "special": true
42
- }
43
- },
44
- "clean_up_tokenization_spaces": false,
45
- "cls_token": "[CLS]",
46
- "do_lower_case": true,
47
- "extra_special_tokens": {},
48
- "mask_token": "[MASK]",
49
- "model_max_length": 512,
50
- "pad_token": "[PAD]",
51
- "sep_token": "[SEP]",
52
- "strip_accents": null,
53
- "tokenize_chinese_chars": true,
54
- "tokenizer_class": "BertTokenizer",
55
- "unk_token": "[UNK]"
56
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
checkpoint-2500/trainer_state.json DELETED
@@ -1,289 +0,0 @@
1
- {
2
- "best_global_step": 2500,
3
- "best_metric": 0.07224167883396149,
4
- "best_model_checkpoint": "C:\\Users\\Daniel\\Desktop\\GitHub\\NLPinitiative\\models\\dlsmallw\\NLPinitiative-Multilabel-Regression\\checkpoint-2500",
5
- "epoch": 2.7203482045701852,
6
- "eval_steps": 500,
7
- "global_step": 2500,
8
- "is_hyper_param_search": false,
9
- "is_local_process_zero": true,
10
- "is_world_process_zero": true,
11
- "log_history": [
12
- {
13
- "epoch": 0.544069640914037,
14
- "grad_norm": 0.5715875625610352,
15
- "learning_rate": 1.637286906057309e-05,
16
- "loss": 0.0289,
17
- "step": 500
18
- },
19
- {
20
- "epoch": 0.544069640914037,
21
- "eval_loss": 0.011422036215662956,
22
- "eval_mae_per_cat": [
23
- 0.0535765178501606,
24
- 0.059359531849622726,
25
- 0.05057797580957413,
26
- 0.031064841896295547,
27
- 0.06608528643846512,
28
- 0.015492343343794346
29
- ],
30
- "eval_mean_mae": 0.0460260808467865,
31
- "eval_mean_pearson": 0.9106294274330139,
32
- "eval_mean_r2": 0.6782520413398743,
33
- "eval_mean_rmse": 0.09810739755630493,
34
- "eval_pearson_per_cat": [
35
- 0.9014391899108887,
36
- 0.923698365688324,
37
- 0.8990985155105591,
38
- 0.9024374485015869,
39
- 0.9264736175537109,
40
- NaN
41
- ],
42
- "eval_r2_per_cat": [
43
- 0.7889423370361328,
44
- 0.8516361713409424,
45
- 0.7870068550109863,
46
- 0.8109977841377258,
47
- 0.8309295177459717,
48
- 0.0
49
- ],
50
- "eval_rmse_per_cat": [
51
- 0.11551735550165176,
52
- 0.1352391391992569,
53
- 0.10884607583284378,
54
- 0.0673733726143837,
55
- 0.1418396383523941,
56
- 0.019828785210847855
57
- ],
58
- "eval_runtime": 100.9453,
59
- "eval_samples_per_second": 31.225,
60
- "eval_steps_per_second": 3.903,
61
- "step": 500
62
- },
63
- {
64
- "epoch": 1.088139281828074,
65
- "grad_norm": 0.3862350583076477,
66
- "learning_rate": 1.2745738121146174e-05,
67
- "loss": 0.0108,
68
- "step": 1000
69
- },
70
- {
71
- "epoch": 1.088139281828074,
72
- "eval_loss": 0.008166518062353134,
73
- "eval_mae_per_cat": [
74
- 0.0379190668463707,
75
- 0.0694158673286438,
76
- 0.032430507242679596,
77
- 0.025210702791810036,
78
- 0.03987590968608856,
79
- 0.0133794154971838
80
- ],
81
- "eval_mean_mae": 0.03637191280722618,
82
- "eval_mean_pearson": 0.9361262321472168,
83
- "eval_mean_r2": 0.7225764393806458,
84
- "eval_mean_rmse": 0.0826454609632492,
85
- "eval_pearson_per_cat": [
86
- 0.9333711266517639,
87
- 0.9291742444038391,
88
- 0.9329087138175964,
89
- 0.9267426133155823,
90
- 0.9584344625473022,
91
- NaN
92
- ],
93
- "eval_r2_per_cat": [
94
- 0.8588278889656067,
95
- 0.8505284190177917,
96
- 0.8572311997413635,
97
- 0.8514397144317627,
98
- 0.9174313545227051,
99
- 0.0
100
- ],
101
- "eval_rmse_per_cat": [
102
- 0.09447590261697769,
103
- 0.13574306666851044,
104
- 0.08911412954330444,
105
- 0.05973187834024429,
106
- 0.09912225604057312,
107
- 0.01768551953136921
108
- ],
109
- "eval_runtime": 99.9899,
110
- "eval_samples_per_second": 31.523,
111
- "eval_steps_per_second": 3.94,
112
- "step": 1000
113
- },
114
- {
115
- "epoch": 1.632208922742111,
116
- "grad_norm": 0.17565102875232697,
117
- "learning_rate": 9.11860718171926e-06,
118
- "loss": 0.0059,
119
- "step": 1500
120
- },
121
- {
122
- "epoch": 1.632208922742111,
123
- "eval_loss": 0.007027682848274708,
124
- "eval_mae_per_cat": [
125
- 0.02809344045817852,
126
- 0.039010003209114075,
127
- 0.03197040781378746,
128
- 0.01924479752779007,
129
- 0.033096104860305786,
130
- 0.00918651558458805
131
- ],
132
- "eval_mean_mae": 0.026766879484057426,
133
- "eval_mean_pearson": 0.9401901483535766,
134
- "eval_mean_r2": 0.7350945472717285,
135
- "eval_mean_rmse": 0.07674583047628403,
136
- "eval_pearson_per_cat": [
137
- 0.9324471950531006,
138
- 0.9467915296554565,
139
- 0.9339689612388611,
140
- 0.9317143559455872,
141
- 0.9560286998748779,
142
- NaN
143
- ],
144
- "eval_r2_per_cat": [
145
- 0.8691833019256592,
146
- 0.8961314558982849,
147
- 0.8661218285560608,
148
- 0.8654465675354004,
149
- 0.9136843681335449,
150
- 0.0
151
- ],
152
- "eval_rmse_per_cat": [
153
- 0.09094487875699997,
154
- 0.11315672099590302,
155
- 0.08629484474658966,
156
- 0.05684630945324898,
157
- 0.10134641826152802,
158
- 0.01188575103878975
159
- ],
160
- "eval_runtime": 102.2428,
161
- "eval_samples_per_second": 30.829,
162
- "eval_steps_per_second": 3.854,
163
- "step": 1500
164
- },
165
- {
166
- "epoch": 2.176278563656148,
167
- "grad_norm": 0.19192808866500854,
168
- "learning_rate": 5.491476242292347e-06,
169
- "loss": 0.005,
170
- "step": 2000
171
- },
172
- {
173
- "epoch": 2.176278563656148,
174
- "eval_loss": 0.006472242530435324,
175
- "eval_mae_per_cat": [
176
- 0.02706627920269966,
177
- 0.04395332559943199,
178
- 0.024061929434537888,
179
- 0.016889315098524094,
180
- 0.033904846757650375,
181
- 0.007554503157734871
182
- ],
183
- "eval_mean_mae": 0.025571702048182487,
184
- "eval_mean_pearson": 0.9477012753486633,
185
- "eval_mean_r2": 0.7445929050445557,
186
- "eval_mean_rmse": 0.07305774837732315,
187
- "eval_pearson_per_cat": [
188
- 0.9431683421134949,
189
- 0.9483680725097656,
190
- 0.9359709024429321,
191
- 0.9474267959594727,
192
- 0.9635722637176514,
193
- NaN
194
- ],
195
- "eval_r2_per_cat": [
196
- 0.8886928558349609,
197
- 0.8949806690216064,
198
- 0.8702350854873657,
199
- 0.8879827260971069,
200
- 0.9256656765937805,
201
- 0.0
202
- ],
203
- "eval_rmse_per_cat": [
204
- 0.08388959616422653,
205
- 0.11378183960914612,
206
- 0.08495883643627167,
207
- 0.051867734640836716,
208
- 0.09404993057250977,
209
- 0.009798564948141575
210
- ],
211
- "eval_runtime": 103.1288,
212
- "eval_samples_per_second": 30.564,
213
- "eval_steps_per_second": 3.82,
214
- "step": 2000
215
- },
216
- {
217
- "epoch": 2.7203482045701852,
218
- "grad_norm": 0.12346106767654419,
219
- "learning_rate": 1.8643453028654336e-06,
220
- "loss": 0.0034,
221
- "step": 2500
222
- },
223
- {
224
- "epoch": 2.7203482045701852,
225
- "eval_loss": 0.006370748393237591,
226
- "eval_mae_per_cat": [
227
- 0.025086652487516403,
228
- 0.03752453625202179,
229
- 0.02304081991314888,
230
- 0.013712005689740181,
231
- 0.030296988785266876,
232
- 0.006659933365881443
233
- ],
234
- "eval_mean_mae": 0.022720156237483025,
235
- "eval_mean_pearson": 0.9476440191268921,
236
- "eval_mean_r2": 0.7471505999565125,
237
- "eval_mean_rmse": 0.07224167883396149,
238
- "eval_pearson_per_cat": [
239
- 0.9419375658035278,
240
- 0.9502590894699097,
241
- 0.9368577599525452,
242
- 0.948063850402832,
243
- 0.9611018300056458,
244
- NaN
245
- ],
246
- "eval_r2_per_cat": [
247
- 0.8866956233978271,
248
- 0.8990016579627991,
249
- 0.8755217790603638,
250
- 0.8987970352172852,
251
- 0.9228875637054443,
252
- 0.0
253
- ],
254
- "eval_rmse_per_cat": [
255
- 0.08463889360427856,
256
- 0.11158231645822525,
257
- 0.08321022242307663,
258
- 0.04930051788687706,
259
- 0.09579125046730042,
260
- 0.008926868438720703
261
- ],
262
- "eval_runtime": 104.2098,
263
- "eval_samples_per_second": 30.247,
264
- "eval_steps_per_second": 3.781,
265
- "step": 2500
266
- }
267
- ],
268
- "logging_steps": 500,
269
- "max_steps": 2757,
270
- "num_input_tokens_seen": 0,
271
- "num_train_epochs": 3,
272
- "save_steps": 500,
273
- "stateful_callbacks": {
274
- "TrainerControl": {
275
- "args": {
276
- "should_epoch_stop": false,
277
- "should_evaluate": false,
278
- "should_log": false,
279
- "should_save": true,
280
- "should_training_stop": false
281
- },
282
- "attributes": {}
283
- }
284
- },
285
- "total_flos": 1315602524160000.0,
286
- "train_batch_size": 8,
287
- "trial_name": null,
288
- "trial_params": null
289
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
checkpoint-2500/training_args.bin DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:5a4da5aebe6b89fd7e07911bd4c7fa268b697325176b9687d4960d3c59a2a59b
3
- size 5560
 
 
 
 
checkpoint-2500/vocab.txt DELETED
The diff for this file is too large to render. See raw diff