kms-engineer commited on
Commit
d823dd8
·
1 Parent(s): b81e2b4

Remove training checkpoints to reduce model size

Browse files
checkpoint-440/config.json DELETED
@@ -1,68 +0,0 @@
1
- {
2
- "architectures": [
3
- "RobertaForTokenClassification"
4
- ],
5
- "attention_probs_dropout_prob": 0.1,
6
- "bos_token_id": 0,
7
- "classifier_dropout": null,
8
- "dtype": "float32",
9
- "eos_token_id": 2,
10
- "hidden_act": "gelu",
11
- "hidden_dropout_prob": 0.1,
12
- "hidden_size": 768,
13
- "id2label": {
14
- "0": "O",
15
- "1": "B-NAME",
16
- "2": "I-NAME",
17
- "3": "B-PHONE",
18
- "4": "I-PHONE",
19
- "5": "B-EMAIL",
20
- "6": "I-EMAIL",
21
- "7": "B-ADDRESS",
22
- "8": "I-ADDRESS",
23
- "9": "B-BIRTHDAY",
24
- "10": "I-BIRTHDAY",
25
- "11": "B-TAG",
26
- "12": "I-TAG",
27
- "13": "B-NOTE_TEXT",
28
- "14": "I-NOTE_TEXT",
29
- "15": "B-ID",
30
- "16": "I-ID",
31
- "17": "B-DAYS",
32
- "18": "I-DAYS"
33
- },
34
- "initializer_range": 0.02,
35
- "intermediate_size": 3072,
36
- "label2id": {
37
- "B-ADDRESS": 7,
38
- "B-BIRTHDAY": 9,
39
- "B-DAYS": 17,
40
- "B-EMAIL": 5,
41
- "B-ID": 15,
42
- "B-NAME": 1,
43
- "B-NOTE_TEXT": 13,
44
- "B-PHONE": 3,
45
- "B-TAG": 11,
46
- "I-ADDRESS": 8,
47
- "I-BIRTHDAY": 10,
48
- "I-DAYS": 18,
49
- "I-EMAIL": 6,
50
- "I-ID": 16,
51
- "I-NAME": 2,
52
- "I-NOTE_TEXT": 14,
53
- "I-PHONE": 4,
54
- "I-TAG": 12,
55
- "O": 0
56
- },
57
- "layer_norm_eps": 1e-05,
58
- "max_position_embeddings": 514,
59
- "model_type": "roberta",
60
- "num_attention_heads": 12,
61
- "num_hidden_layers": 12,
62
- "pad_token_id": 1,
63
- "position_embedding_type": "absolute",
64
- "transformers_version": "4.57.0",
65
- "type_vocab_size": 1,
66
- "use_cache": true,
67
- "vocab_size": 50265
68
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
checkpoint-440/merges.txt DELETED
The diff for this file is too large to render. See raw diff
 
checkpoint-440/model.safetensors DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:0e6925d4464558c4fa2b499c0649d19e82b2051fa2648e86740bd0377a33b38b
3
- size 496302532
 
 
 
 
checkpoint-440/optimizer.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:dde9e5ce97a8e1b19da745994c3d70b89b7553cbcfa4d2d39e5671779e1b1d74
3
- size 992718539
 
 
 
 
checkpoint-440/rng_state.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:67627e3b026c4c5d776980914bd7f99f2f9814ae6ac5a3bd1d93ee8d2ff6784f
3
- size 14455
 
 
 
 
checkpoint-440/scheduler.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:c22bfc28a19135b4e6445d20b99bbb370fc9d3030a0d53133fe99a0bffe1765d
3
- size 1465
 
 
 
 
checkpoint-440/special_tokens_map.json DELETED
@@ -1,15 +0,0 @@
1
- {
2
- "bos_token": "<s>",
3
- "cls_token": "<s>",
4
- "eos_token": "</s>",
5
- "mask_token": {
6
- "content": "<mask>",
7
- "lstrip": true,
8
- "normalized": false,
9
- "rstrip": false,
10
- "single_word": false
11
- },
12
- "pad_token": "<pad>",
13
- "sep_token": "</s>",
14
- "unk_token": "<unk>"
15
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
checkpoint-440/tokenizer.json DELETED
The diff for this file is too large to render. See raw diff
 
checkpoint-440/tokenizer_config.json DELETED
@@ -1,58 +0,0 @@
1
- {
2
- "add_prefix_space": true,
3
- "added_tokens_decoder": {
4
- "0": {
5
- "content": "<s>",
6
- "lstrip": false,
7
- "normalized": true,
8
- "rstrip": false,
9
- "single_word": false,
10
- "special": true
11
- },
12
- "1": {
13
- "content": "<pad>",
14
- "lstrip": false,
15
- "normalized": true,
16
- "rstrip": false,
17
- "single_word": false,
18
- "special": true
19
- },
20
- "2": {
21
- "content": "</s>",
22
- "lstrip": false,
23
- "normalized": true,
24
- "rstrip": false,
25
- "single_word": false,
26
- "special": true
27
- },
28
- "3": {
29
- "content": "<unk>",
30
- "lstrip": false,
31
- "normalized": true,
32
- "rstrip": false,
33
- "single_word": false,
34
- "special": true
35
- },
36
- "50264": {
37
- "content": "<mask>",
38
- "lstrip": true,
39
- "normalized": false,
40
- "rstrip": false,
41
- "single_word": false,
42
- "special": true
43
- }
44
- },
45
- "bos_token": "<s>",
46
- "clean_up_tokenization_spaces": false,
47
- "cls_token": "<s>",
48
- "eos_token": "</s>",
49
- "errors": "replace",
50
- "extra_special_tokens": {},
51
- "mask_token": "<mask>",
52
- "model_max_length": 512,
53
- "pad_token": "<pad>",
54
- "sep_token": "</s>",
55
- "tokenizer_class": "RobertaTokenizer",
56
- "trim_offsets": true,
57
- "unk_token": "<unk>"
58
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
checkpoint-440/trainer_state.json DELETED
@@ -1,403 +0,0 @@
1
- {
2
- "best_global_step": 440,
3
- "best_metric": 0.940876969962022,
4
- "best_model_checkpoint": "models/ner_model/checkpoint-440",
5
- "epoch": 4.0,
6
- "eval_steps": 500,
7
- "global_step": 440,
8
- "is_hyper_param_search": false,
9
- "is_local_process_zero": true,
10
- "is_world_process_zero": true,
11
- "log_history": [
12
- {
13
- "epoch": 0.09090909090909091,
14
- "grad_norm": 6.7610626220703125,
15
- "learning_rate": 2.7e-06,
16
- "loss": 3.1763,
17
- "step": 10
18
- },
19
- {
20
- "epoch": 0.18181818181818182,
21
- "grad_norm": 8.019376754760742,
22
- "learning_rate": 5.7000000000000005e-06,
23
- "loss": 3.0538,
24
- "step": 20
25
- },
26
- {
27
- "epoch": 0.2727272727272727,
28
- "grad_norm": 8.582132339477539,
29
- "learning_rate": 8.7e-06,
30
- "loss": 2.7102,
31
- "step": 30
32
- },
33
- {
34
- "epoch": 0.36363636363636365,
35
- "grad_norm": 7.020857810974121,
36
- "learning_rate": 1.1700000000000001e-05,
37
- "loss": 1.9306,
38
- "step": 40
39
- },
40
- {
41
- "epoch": 0.45454545454545453,
42
- "grad_norm": 6.591127872467041,
43
- "learning_rate": 1.47e-05,
44
- "loss": 1.5462,
45
- "step": 50
46
- },
47
- {
48
- "epoch": 0.5454545454545454,
49
- "grad_norm": 6.085566520690918,
50
- "learning_rate": 1.77e-05,
51
- "loss": 1.158,
52
- "step": 60
53
- },
54
- {
55
- "epoch": 0.6363636363636364,
56
- "grad_norm": 10.360540390014648,
57
- "learning_rate": 2.07e-05,
58
- "loss": 0.88,
59
- "step": 70
60
- },
61
- {
62
- "epoch": 0.7272727272727273,
63
- "grad_norm": 4.375982761383057,
64
- "learning_rate": 2.37e-05,
65
- "loss": 0.6575,
66
- "step": 80
67
- },
68
- {
69
- "epoch": 0.8181818181818182,
70
- "grad_norm": 10.760860443115234,
71
- "learning_rate": 2.6700000000000002e-05,
72
- "loss": 0.6622,
73
- "step": 90
74
- },
75
- {
76
- "epoch": 0.9090909090909091,
77
- "grad_norm": 8.98079776763916,
78
- "learning_rate": 2.97e-05,
79
- "loss": 0.6058,
80
- "step": 100
81
- },
82
- {
83
- "epoch": 1.0,
84
- "grad_norm": 7.706189155578613,
85
- "learning_rate": 2.94e-05,
86
- "loss": 0.509,
87
- "step": 110
88
- },
89
- {
90
- "epoch": 1.0,
91
- "eval_accuracy": 0.8916666666666667,
92
- "eval_entity_accuracy": 0.8543662742950999,
93
- "eval_f1": 0.8670505036976651,
94
- "eval_loss": 0.38678058981895447,
95
- "eval_precision": 0.861471336118019,
96
- "eval_recall": 0.8916666666666667,
97
- "eval_runtime": 2.6176,
98
- "eval_samples_per_second": 166.949,
99
- "eval_steps_per_second": 10.697,
100
- "step": 110
101
- },
102
- {
103
- "epoch": 1.0909090909090908,
104
- "grad_norm": 4.655617713928223,
105
- "learning_rate": 2.873333333333333e-05,
106
- "loss": 0.3582,
107
- "step": 120
108
- },
109
- {
110
- "epoch": 1.1818181818181819,
111
- "grad_norm": 6.469715118408203,
112
- "learning_rate": 2.8066666666666667e-05,
113
- "loss": 0.4039,
114
- "step": 130
115
- },
116
- {
117
- "epoch": 1.2727272727272727,
118
- "grad_norm": 6.508236408233643,
119
- "learning_rate": 2.7400000000000002e-05,
120
- "loss": 0.3456,
121
- "step": 140
122
- },
123
- {
124
- "epoch": 1.3636363636363638,
125
- "grad_norm": 7.3737874031066895,
126
- "learning_rate": 2.6733333333333334e-05,
127
- "loss": 0.4672,
128
- "step": 150
129
- },
130
- {
131
- "epoch": 1.4545454545454546,
132
- "grad_norm": 6.929003715515137,
133
- "learning_rate": 2.606666666666667e-05,
134
- "loss": 0.3472,
135
- "step": 160
136
- },
137
- {
138
- "epoch": 1.5454545454545454,
139
- "grad_norm": 5.1204986572265625,
140
- "learning_rate": 2.54e-05,
141
- "loss": 0.3808,
142
- "step": 170
143
- },
144
- {
145
- "epoch": 1.6363636363636362,
146
- "grad_norm": 3.2450709342956543,
147
- "learning_rate": 2.4733333333333333e-05,
148
- "loss": 0.2377,
149
- "step": 180
150
- },
151
- {
152
- "epoch": 1.7272727272727273,
153
- "grad_norm": 3.943157196044922,
154
- "learning_rate": 2.4066666666666664e-05,
155
- "loss": 0.34,
156
- "step": 190
157
- },
158
- {
159
- "epoch": 1.8181818181818183,
160
- "grad_norm": 4.091476917266846,
161
- "learning_rate": 2.3400000000000003e-05,
162
- "loss": 0.3028,
163
- "step": 200
164
- },
165
- {
166
- "epoch": 1.9090909090909092,
167
- "grad_norm": 7.115076541900635,
168
- "learning_rate": 2.2733333333333335e-05,
169
- "loss": 0.4381,
170
- "step": 210
171
- },
172
- {
173
- "epoch": 2.0,
174
- "grad_norm": 17.910232543945312,
175
- "learning_rate": 2.2066666666666667e-05,
176
- "loss": 0.2634,
177
- "step": 220
178
- },
179
- {
180
- "epoch": 2.0,
181
- "eval_accuracy": 0.9202830188679245,
182
- "eval_entity_accuracy": 0.8863947440459896,
183
- "eval_f1": 0.9039013364488941,
184
- "eval_loss": 0.2899700701236725,
185
- "eval_precision": 0.9016844893810626,
186
- "eval_recall": 0.9202830188679245,
187
- "eval_runtime": 2.6518,
188
- "eval_samples_per_second": 164.792,
189
- "eval_steps_per_second": 10.559,
190
- "step": 220
191
- },
192
- {
193
- "epoch": 2.090909090909091,
194
- "grad_norm": 11.105402946472168,
195
- "learning_rate": 2.1400000000000002e-05,
196
- "loss": 0.2834,
197
- "step": 230
198
- },
199
- {
200
- "epoch": 2.1818181818181817,
201
- "grad_norm": 6.341246604919434,
202
- "learning_rate": 2.0733333333333334e-05,
203
- "loss": 0.2477,
204
- "step": 240
205
- },
206
- {
207
- "epoch": 2.2727272727272725,
208
- "grad_norm": 4.475722312927246,
209
- "learning_rate": 2.0066666666666665e-05,
210
- "loss": 0.2042,
211
- "step": 250
212
- },
213
- {
214
- "epoch": 2.3636363636363638,
215
- "grad_norm": 3.8562307357788086,
216
- "learning_rate": 1.9399999999999997e-05,
217
- "loss": 0.2935,
218
- "step": 260
219
- },
220
- {
221
- "epoch": 2.4545454545454546,
222
- "grad_norm": 3.3338725566864014,
223
- "learning_rate": 1.8733333333333336e-05,
224
- "loss": 0.2232,
225
- "step": 270
226
- },
227
- {
228
- "epoch": 2.5454545454545454,
229
- "grad_norm": 3.610917568206787,
230
- "learning_rate": 1.8066666666666668e-05,
231
- "loss": 0.2578,
232
- "step": 280
233
- },
234
- {
235
- "epoch": 2.6363636363636362,
236
- "grad_norm": 6.411995887756348,
237
- "learning_rate": 1.74e-05,
238
- "loss": 0.2617,
239
- "step": 290
240
- },
241
- {
242
- "epoch": 2.7272727272727275,
243
- "grad_norm": 18.01243782043457,
244
- "learning_rate": 1.6733333333333335e-05,
245
- "loss": 0.2323,
246
- "step": 300
247
- },
248
- {
249
- "epoch": 2.8181818181818183,
250
- "grad_norm": 6.243415832519531,
251
- "learning_rate": 1.6066666666666666e-05,
252
- "loss": 0.2226,
253
- "step": 310
254
- },
255
- {
256
- "epoch": 2.909090909090909,
257
- "grad_norm": 3.8026018142700195,
258
- "learning_rate": 1.5399999999999998e-05,
259
- "loss": 0.1841,
260
- "step": 320
261
- },
262
- {
263
- "epoch": 3.0,
264
- "grad_norm": 4.6072797775268555,
265
- "learning_rate": 1.4733333333333333e-05,
266
- "loss": 0.2261,
267
- "step": 330
268
- },
269
- {
270
- "epoch": 3.0,
271
- "eval_accuracy": 0.9429245283018868,
272
- "eval_entity_accuracy": 0.9280043799616753,
273
- "eval_f1": 0.9367311510359461,
274
- "eval_loss": 0.22553826868534088,
275
- "eval_precision": 0.9404566874612201,
276
- "eval_recall": 0.9429245283018868,
277
- "eval_runtime": 2.6044,
278
- "eval_samples_per_second": 167.791,
279
- "eval_steps_per_second": 10.751,
280
- "step": 330
281
- },
282
- {
283
- "epoch": 3.090909090909091,
284
- "grad_norm": 3.598348617553711,
285
- "learning_rate": 1.4066666666666667e-05,
286
- "loss": 0.2008,
287
- "step": 340
288
- },
289
- {
290
- "epoch": 3.1818181818181817,
291
- "grad_norm": 3.3810553550720215,
292
- "learning_rate": 1.34e-05,
293
- "loss": 0.1801,
294
- "step": 350
295
- },
296
- {
297
- "epoch": 3.2727272727272725,
298
- "grad_norm": 2.5113697052001953,
299
- "learning_rate": 1.2733333333333334e-05,
300
- "loss": 0.1917,
301
- "step": 360
302
- },
303
- {
304
- "epoch": 3.3636363636363638,
305
- "grad_norm": 5.911487102508545,
306
- "learning_rate": 1.2066666666666666e-05,
307
- "loss": 0.1861,
308
- "step": 370
309
- },
310
- {
311
- "epoch": 3.4545454545454546,
312
- "grad_norm": 4.358926296234131,
313
- "learning_rate": 1.1400000000000001e-05,
314
- "loss": 0.1568,
315
- "step": 380
316
- },
317
- {
318
- "epoch": 3.5454545454545454,
319
- "grad_norm": 3.0283701419830322,
320
- "learning_rate": 1.0733333333333333e-05,
321
- "loss": 0.2127,
322
- "step": 390
323
- },
324
- {
325
- "epoch": 3.6363636363636362,
326
- "grad_norm": 8.457649230957031,
327
- "learning_rate": 1.0066666666666666e-05,
328
- "loss": 0.1752,
329
- "step": 400
330
- },
331
- {
332
- "epoch": 3.7272727272727275,
333
- "grad_norm": 2.7831077575683594,
334
- "learning_rate": 9.400000000000001e-06,
335
- "loss": 0.2328,
336
- "step": 410
337
- },
338
- {
339
- "epoch": 3.8181818181818183,
340
- "grad_norm": 2.8275463581085205,
341
- "learning_rate": 8.733333333333333e-06,
342
- "loss": 0.1345,
343
- "step": 420
344
- },
345
- {
346
- "epoch": 3.909090909090909,
347
- "grad_norm": 2.621518850326538,
348
- "learning_rate": 8.066666666666667e-06,
349
- "loss": 0.1343,
350
- "step": 430
351
- },
352
- {
353
- "epoch": 4.0,
354
- "grad_norm": 12.00428295135498,
355
- "learning_rate": 7.4e-06,
356
- "loss": 0.1415,
357
- "step": 440
358
- },
359
- {
360
- "epoch": 4.0,
361
- "eval_accuracy": 0.9476415094339623,
362
- "eval_entity_accuracy": 0.9277306323569668,
363
- "eval_f1": 0.940876969962022,
364
- "eval_loss": 0.20769040286540985,
365
- "eval_precision": 0.9463580459003759,
366
- "eval_recall": 0.9476415094339623,
367
- "eval_runtime": 2.6645,
368
- "eval_samples_per_second": 164.008,
369
- "eval_steps_per_second": 10.509,
370
- "step": 440
371
- }
372
- ],
373
- "logging_steps": 10,
374
- "max_steps": 550,
375
- "num_input_tokens_seen": 0,
376
- "num_train_epochs": 5,
377
- "save_steps": 500,
378
- "stateful_callbacks": {
379
- "EarlyStoppingCallback": {
380
- "args": {
381
- "early_stopping_patience": 3,
382
- "early_stopping_threshold": 0.0
383
- },
384
- "attributes": {
385
- "early_stopping_patience_counter": 0
386
- }
387
- },
388
- "TrainerControl": {
389
- "args": {
390
- "should_epoch_stop": false,
391
- "should_evaluate": false,
392
- "should_log": false,
393
- "should_save": true,
394
- "should_training_stop": false
395
- },
396
- "attributes": {}
397
- }
398
- },
399
- "total_flos": 456816930902016.0,
400
- "train_batch_size": 16,
401
- "trial_name": null,
402
- "trial_params": null
403
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
checkpoint-440/training_args.bin DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:3b10c00a9a1c9932d65673b77c6d1b79c2c5dff3551979c1efd6bd5406bf6626
3
- size 5777
 
 
 
 
checkpoint-440/vocab.json DELETED
The diff for this file is too large to render. See raw diff
 
checkpoint-550/config.json DELETED
@@ -1,68 +0,0 @@
1
- {
2
- "architectures": [
3
- "RobertaForTokenClassification"
4
- ],
5
- "attention_probs_dropout_prob": 0.1,
6
- "bos_token_id": 0,
7
- "classifier_dropout": null,
8
- "dtype": "float32",
9
- "eos_token_id": 2,
10
- "hidden_act": "gelu",
11
- "hidden_dropout_prob": 0.1,
12
- "hidden_size": 768,
13
- "id2label": {
14
- "0": "O",
15
- "1": "B-NAME",
16
- "2": "I-NAME",
17
- "3": "B-PHONE",
18
- "4": "I-PHONE",
19
- "5": "B-EMAIL",
20
- "6": "I-EMAIL",
21
- "7": "B-ADDRESS",
22
- "8": "I-ADDRESS",
23
- "9": "B-BIRTHDAY",
24
- "10": "I-BIRTHDAY",
25
- "11": "B-TAG",
26
- "12": "I-TAG",
27
- "13": "B-NOTE_TEXT",
28
- "14": "I-NOTE_TEXT",
29
- "15": "B-ID",
30
- "16": "I-ID",
31
- "17": "B-DAYS",
32
- "18": "I-DAYS"
33
- },
34
- "initializer_range": 0.02,
35
- "intermediate_size": 3072,
36
- "label2id": {
37
- "B-ADDRESS": 7,
38
- "B-BIRTHDAY": 9,
39
- "B-DAYS": 17,
40
- "B-EMAIL": 5,
41
- "B-ID": 15,
42
- "B-NAME": 1,
43
- "B-NOTE_TEXT": 13,
44
- "B-PHONE": 3,
45
- "B-TAG": 11,
46
- "I-ADDRESS": 8,
47
- "I-BIRTHDAY": 10,
48
- "I-DAYS": 18,
49
- "I-EMAIL": 6,
50
- "I-ID": 16,
51
- "I-NAME": 2,
52
- "I-NOTE_TEXT": 14,
53
- "I-PHONE": 4,
54
- "I-TAG": 12,
55
- "O": 0
56
- },
57
- "layer_norm_eps": 1e-05,
58
- "max_position_embeddings": 514,
59
- "model_type": "roberta",
60
- "num_attention_heads": 12,
61
- "num_hidden_layers": 12,
62
- "pad_token_id": 1,
63
- "position_embedding_type": "absolute",
64
- "transformers_version": "4.57.0",
65
- "type_vocab_size": 1,
66
- "use_cache": true,
67
- "vocab_size": 50265
68
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
checkpoint-550/merges.txt DELETED
The diff for this file is too large to render. See raw diff
 
checkpoint-550/model.safetensors DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:957b3bb302656a94b3d310dbb314687051d3ec5466f0007ec9935e7cbb2c3dca
3
- size 496302532
 
 
 
 
checkpoint-550/optimizer.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:2c93dd40f9e9461168846f2f3a281b115fb0ade484d2c1ad57ebf3053dc98d49
3
- size 992718539
 
 
 
 
checkpoint-550/rng_state.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:2b652c6269b998b96ab924b2734c0818fab436c642524e13fc6cd4d9082e62b5
3
- size 14455
 
 
 
 
checkpoint-550/scheduler.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:8347c87874a9ce917c0e044950f2dad7926c4a36308ea197e55b33fd6803a815
3
- size 1465
 
 
 
 
checkpoint-550/special_tokens_map.json DELETED
@@ -1,15 +0,0 @@
1
- {
2
- "bos_token": "<s>",
3
- "cls_token": "<s>",
4
- "eos_token": "</s>",
5
- "mask_token": {
6
- "content": "<mask>",
7
- "lstrip": true,
8
- "normalized": false,
9
- "rstrip": false,
10
- "single_word": false
11
- },
12
- "pad_token": "<pad>",
13
- "sep_token": "</s>",
14
- "unk_token": "<unk>"
15
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
checkpoint-550/tokenizer.json DELETED
The diff for this file is too large to render. See raw diff
 
checkpoint-550/tokenizer_config.json DELETED
@@ -1,58 +0,0 @@
1
- {
2
- "add_prefix_space": true,
3
- "added_tokens_decoder": {
4
- "0": {
5
- "content": "<s>",
6
- "lstrip": false,
7
- "normalized": true,
8
- "rstrip": false,
9
- "single_word": false,
10
- "special": true
11
- },
12
- "1": {
13
- "content": "<pad>",
14
- "lstrip": false,
15
- "normalized": true,
16
- "rstrip": false,
17
- "single_word": false,
18
- "special": true
19
- },
20
- "2": {
21
- "content": "</s>",
22
- "lstrip": false,
23
- "normalized": true,
24
- "rstrip": false,
25
- "single_word": false,
26
- "special": true
27
- },
28
- "3": {
29
- "content": "<unk>",
30
- "lstrip": false,
31
- "normalized": true,
32
- "rstrip": false,
33
- "single_word": false,
34
- "special": true
35
- },
36
- "50264": {
37
- "content": "<mask>",
38
- "lstrip": true,
39
- "normalized": false,
40
- "rstrip": false,
41
- "single_word": false,
42
- "special": true
43
- }
44
- },
45
- "bos_token": "<s>",
46
- "clean_up_tokenization_spaces": false,
47
- "cls_token": "<s>",
48
- "eos_token": "</s>",
49
- "errors": "replace",
50
- "extra_special_tokens": {},
51
- "mask_token": "<mask>",
52
- "model_max_length": 512,
53
- "pad_token": "<pad>",
54
- "sep_token": "</s>",
55
- "tokenizer_class": "RobertaTokenizer",
56
- "trim_offsets": true,
57
- "unk_token": "<unk>"
58
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
checkpoint-550/trainer_state.json DELETED
@@ -1,493 +0,0 @@
1
- {
2
- "best_global_step": 550,
3
- "best_metric": 0.9458296266430395,
4
- "best_model_checkpoint": "models/ner_model/checkpoint-550",
5
- "epoch": 5.0,
6
- "eval_steps": 500,
7
- "global_step": 550,
8
- "is_hyper_param_search": false,
9
- "is_local_process_zero": true,
10
- "is_world_process_zero": true,
11
- "log_history": [
12
- {
13
- "epoch": 0.09090909090909091,
14
- "grad_norm": 6.7610626220703125,
15
- "learning_rate": 2.7e-06,
16
- "loss": 3.1763,
17
- "step": 10
18
- },
19
- {
20
- "epoch": 0.18181818181818182,
21
- "grad_norm": 8.019376754760742,
22
- "learning_rate": 5.7000000000000005e-06,
23
- "loss": 3.0538,
24
- "step": 20
25
- },
26
- {
27
- "epoch": 0.2727272727272727,
28
- "grad_norm": 8.582132339477539,
29
- "learning_rate": 8.7e-06,
30
- "loss": 2.7102,
31
- "step": 30
32
- },
33
- {
34
- "epoch": 0.36363636363636365,
35
- "grad_norm": 7.020857810974121,
36
- "learning_rate": 1.1700000000000001e-05,
37
- "loss": 1.9306,
38
- "step": 40
39
- },
40
- {
41
- "epoch": 0.45454545454545453,
42
- "grad_norm": 6.591127872467041,
43
- "learning_rate": 1.47e-05,
44
- "loss": 1.5462,
45
- "step": 50
46
- },
47
- {
48
- "epoch": 0.5454545454545454,
49
- "grad_norm": 6.085566520690918,
50
- "learning_rate": 1.77e-05,
51
- "loss": 1.158,
52
- "step": 60
53
- },
54
- {
55
- "epoch": 0.6363636363636364,
56
- "grad_norm": 10.360540390014648,
57
- "learning_rate": 2.07e-05,
58
- "loss": 0.88,
59
- "step": 70
60
- },
61
- {
62
- "epoch": 0.7272727272727273,
63
- "grad_norm": 4.375982761383057,
64
- "learning_rate": 2.37e-05,
65
- "loss": 0.6575,
66
- "step": 80
67
- },
68
- {
69
- "epoch": 0.8181818181818182,
70
- "grad_norm": 10.760860443115234,
71
- "learning_rate": 2.6700000000000002e-05,
72
- "loss": 0.6622,
73
- "step": 90
74
- },
75
- {
76
- "epoch": 0.9090909090909091,
77
- "grad_norm": 8.98079776763916,
78
- "learning_rate": 2.97e-05,
79
- "loss": 0.6058,
80
- "step": 100
81
- },
82
- {
83
- "epoch": 1.0,
84
- "grad_norm": 7.706189155578613,
85
- "learning_rate": 2.94e-05,
86
- "loss": 0.509,
87
- "step": 110
88
- },
89
- {
90
- "epoch": 1.0,
91
- "eval_accuracy": 0.8916666666666667,
92
- "eval_entity_accuracy": 0.8543662742950999,
93
- "eval_f1": 0.8670505036976651,
94
- "eval_loss": 0.38678058981895447,
95
- "eval_precision": 0.861471336118019,
96
- "eval_recall": 0.8916666666666667,
97
- "eval_runtime": 2.6176,
98
- "eval_samples_per_second": 166.949,
99
- "eval_steps_per_second": 10.697,
100
- "step": 110
101
- },
102
- {
103
- "epoch": 1.0909090909090908,
104
- "grad_norm": 4.655617713928223,
105
- "learning_rate": 2.873333333333333e-05,
106
- "loss": 0.3582,
107
- "step": 120
108
- },
109
- {
110
- "epoch": 1.1818181818181819,
111
- "grad_norm": 6.469715118408203,
112
- "learning_rate": 2.8066666666666667e-05,
113
- "loss": 0.4039,
114
- "step": 130
115
- },
116
- {
117
- "epoch": 1.2727272727272727,
118
- "grad_norm": 6.508236408233643,
119
- "learning_rate": 2.7400000000000002e-05,
120
- "loss": 0.3456,
121
- "step": 140
122
- },
123
- {
124
- "epoch": 1.3636363636363638,
125
- "grad_norm": 7.3737874031066895,
126
- "learning_rate": 2.6733333333333334e-05,
127
- "loss": 0.4672,
128
- "step": 150
129
- },
130
- {
131
- "epoch": 1.4545454545454546,
132
- "grad_norm": 6.929003715515137,
133
- "learning_rate": 2.606666666666667e-05,
134
- "loss": 0.3472,
135
- "step": 160
136
- },
137
- {
138
- "epoch": 1.5454545454545454,
139
- "grad_norm": 5.1204986572265625,
140
- "learning_rate": 2.54e-05,
141
- "loss": 0.3808,
142
- "step": 170
143
- },
144
- {
145
- "epoch": 1.6363636363636362,
146
- "grad_norm": 3.2450709342956543,
147
- "learning_rate": 2.4733333333333333e-05,
148
- "loss": 0.2377,
149
- "step": 180
150
- },
151
- {
152
- "epoch": 1.7272727272727273,
153
- "grad_norm": 3.943157196044922,
154
- "learning_rate": 2.4066666666666664e-05,
155
- "loss": 0.34,
156
- "step": 190
157
- },
158
- {
159
- "epoch": 1.8181818181818183,
160
- "grad_norm": 4.091476917266846,
161
- "learning_rate": 2.3400000000000003e-05,
162
- "loss": 0.3028,
163
- "step": 200
164
- },
165
- {
166
- "epoch": 1.9090909090909092,
167
- "grad_norm": 7.115076541900635,
168
- "learning_rate": 2.2733333333333335e-05,
169
- "loss": 0.4381,
170
- "step": 210
171
- },
172
- {
173
- "epoch": 2.0,
174
- "grad_norm": 17.910232543945312,
175
- "learning_rate": 2.2066666666666667e-05,
176
- "loss": 0.2634,
177
- "step": 220
178
- },
179
- {
180
- "epoch": 2.0,
181
- "eval_accuracy": 0.9202830188679245,
182
- "eval_entity_accuracy": 0.8863947440459896,
183
- "eval_f1": 0.9039013364488941,
184
- "eval_loss": 0.2899700701236725,
185
- "eval_precision": 0.9016844893810626,
186
- "eval_recall": 0.9202830188679245,
187
- "eval_runtime": 2.6518,
188
- "eval_samples_per_second": 164.792,
189
- "eval_steps_per_second": 10.559,
190
- "step": 220
191
- },
192
- {
193
- "epoch": 2.090909090909091,
194
- "grad_norm": 11.105402946472168,
195
- "learning_rate": 2.1400000000000002e-05,
196
- "loss": 0.2834,
197
- "step": 230
198
- },
199
- {
200
- "epoch": 2.1818181818181817,
201
- "grad_norm": 6.341246604919434,
202
- "learning_rate": 2.0733333333333334e-05,
203
- "loss": 0.2477,
204
- "step": 240
205
- },
206
- {
207
- "epoch": 2.2727272727272725,
208
- "grad_norm": 4.475722312927246,
209
- "learning_rate": 2.0066666666666665e-05,
210
- "loss": 0.2042,
211
- "step": 250
212
- },
213
- {
214
- "epoch": 2.3636363636363638,
215
- "grad_norm": 3.8562307357788086,
216
- "learning_rate": 1.9399999999999997e-05,
217
- "loss": 0.2935,
218
- "step": 260
219
- },
220
- {
221
- "epoch": 2.4545454545454546,
222
- "grad_norm": 3.3338725566864014,
223
- "learning_rate": 1.8733333333333336e-05,
224
- "loss": 0.2232,
225
- "step": 270
226
- },
227
- {
228
- "epoch": 2.5454545454545454,
229
- "grad_norm": 3.610917568206787,
230
- "learning_rate": 1.8066666666666668e-05,
231
- "loss": 0.2578,
232
- "step": 280
233
- },
234
- {
235
- "epoch": 2.6363636363636362,
236
- "grad_norm": 6.411995887756348,
237
- "learning_rate": 1.74e-05,
238
- "loss": 0.2617,
239
- "step": 290
240
- },
241
- {
242
- "epoch": 2.7272727272727275,
243
- "grad_norm": 18.01243782043457,
244
- "learning_rate": 1.6733333333333335e-05,
245
- "loss": 0.2323,
246
- "step": 300
247
- },
248
- {
249
- "epoch": 2.8181818181818183,
250
- "grad_norm": 6.243415832519531,
251
- "learning_rate": 1.6066666666666666e-05,
252
- "loss": 0.2226,
253
- "step": 310
254
- },
255
- {
256
- "epoch": 2.909090909090909,
257
- "grad_norm": 3.8026018142700195,
258
- "learning_rate": 1.5399999999999998e-05,
259
- "loss": 0.1841,
260
- "step": 320
261
- },
262
- {
263
- "epoch": 3.0,
264
- "grad_norm": 4.6072797775268555,
265
- "learning_rate": 1.4733333333333333e-05,
266
- "loss": 0.2261,
267
- "step": 330
268
- },
269
- {
270
- "epoch": 3.0,
271
- "eval_accuracy": 0.9429245283018868,
272
- "eval_entity_accuracy": 0.9280043799616753,
273
- "eval_f1": 0.9367311510359461,
274
- "eval_loss": 0.22553826868534088,
275
- "eval_precision": 0.9404566874612201,
276
- "eval_recall": 0.9429245283018868,
277
- "eval_runtime": 2.6044,
278
- "eval_samples_per_second": 167.791,
279
- "eval_steps_per_second": 10.751,
280
- "step": 330
281
- },
282
- {
283
- "epoch": 3.090909090909091,
284
- "grad_norm": 3.598348617553711,
285
- "learning_rate": 1.4066666666666667e-05,
286
- "loss": 0.2008,
287
- "step": 340
288
- },
289
- {
290
- "epoch": 3.1818181818181817,
291
- "grad_norm": 3.3810553550720215,
292
- "learning_rate": 1.34e-05,
293
- "loss": 0.1801,
294
- "step": 350
295
- },
296
- {
297
- "epoch": 3.2727272727272725,
298
- "grad_norm": 2.5113697052001953,
299
- "learning_rate": 1.2733333333333334e-05,
300
- "loss": 0.1917,
301
- "step": 360
302
- },
303
- {
304
- "epoch": 3.3636363636363638,
305
- "grad_norm": 5.911487102508545,
306
- "learning_rate": 1.2066666666666666e-05,
307
- "loss": 0.1861,
308
- "step": 370
309
- },
310
- {
311
- "epoch": 3.4545454545454546,
312
- "grad_norm": 4.358926296234131,
313
- "learning_rate": 1.1400000000000001e-05,
314
- "loss": 0.1568,
315
- "step": 380
316
- },
317
- {
318
- "epoch": 3.5454545454545454,
319
- "grad_norm": 3.0283701419830322,
320
- "learning_rate": 1.0733333333333333e-05,
321
- "loss": 0.2127,
322
- "step": 390
323
- },
324
- {
325
- "epoch": 3.6363636363636362,
326
- "grad_norm": 8.457649230957031,
327
- "learning_rate": 1.0066666666666666e-05,
328
- "loss": 0.1752,
329
- "step": 400
330
- },
331
- {
332
- "epoch": 3.7272727272727275,
333
- "grad_norm": 2.7831077575683594,
334
- "learning_rate": 9.400000000000001e-06,
335
- "loss": 0.2328,
336
- "step": 410
337
- },
338
- {
339
- "epoch": 3.8181818181818183,
340
- "grad_norm": 2.8275463581085205,
341
- "learning_rate": 8.733333333333333e-06,
342
- "loss": 0.1345,
343
- "step": 420
344
- },
345
- {
346
- "epoch": 3.909090909090909,
347
- "grad_norm": 2.621518850326538,
348
- "learning_rate": 8.066666666666667e-06,
349
- "loss": 0.1343,
350
- "step": 430
351
- },
352
- {
353
- "epoch": 4.0,
354
- "grad_norm": 12.00428295135498,
355
- "learning_rate": 7.4e-06,
356
- "loss": 0.1415,
357
- "step": 440
358
- },
359
- {
360
- "epoch": 4.0,
361
- "eval_accuracy": 0.9476415094339623,
362
- "eval_entity_accuracy": 0.9277306323569668,
363
- "eval_f1": 0.940876969962022,
364
- "eval_loss": 0.20769040286540985,
365
- "eval_precision": 0.9463580459003759,
366
- "eval_recall": 0.9476415094339623,
367
- "eval_runtime": 2.6645,
368
- "eval_samples_per_second": 164.008,
369
- "eval_steps_per_second": 10.509,
370
- "step": 440
371
- },
372
- {
373
- "epoch": 4.090909090909091,
374
- "grad_norm": 4.293393611907959,
375
- "learning_rate": 6.733333333333334e-06,
376
- "loss": 0.1635,
377
- "step": 450
378
- },
379
- {
380
- "epoch": 4.181818181818182,
381
- "grad_norm": 6.984195709228516,
382
- "learning_rate": 6.0666666666666665e-06,
383
- "loss": 0.1462,
384
- "step": 460
385
- },
386
- {
387
- "epoch": 4.2727272727272725,
388
- "grad_norm": 2.344493865966797,
389
- "learning_rate": 5.4e-06,
390
- "loss": 0.1356,
391
- "step": 470
392
- },
393
- {
394
- "epoch": 4.363636363636363,
395
- "grad_norm": 2.468249797821045,
396
- "learning_rate": 4.7333333333333335e-06,
397
- "loss": 0.1248,
398
- "step": 480
399
- },
400
- {
401
- "epoch": 4.454545454545454,
402
- "grad_norm": 1.7062819004058838,
403
- "learning_rate": 4.066666666666667e-06,
404
- "loss": 0.0904,
405
- "step": 490
406
- },
407
- {
408
- "epoch": 4.545454545454545,
409
- "grad_norm": 1.412717580795288,
410
- "learning_rate": 3.4e-06,
411
- "loss": 0.1255,
412
- "step": 500
413
- },
414
- {
415
- "epoch": 4.636363636363637,
416
- "grad_norm": 2.9533627033233643,
417
- "learning_rate": 2.7333333333333336e-06,
418
- "loss": 0.1249,
419
- "step": 510
420
- },
421
- {
422
- "epoch": 4.7272727272727275,
423
- "grad_norm": 11.376725196838379,
424
- "learning_rate": 2.0666666666666666e-06,
425
- "loss": 0.1459,
426
- "step": 520
427
- },
428
- {
429
- "epoch": 4.818181818181818,
430
- "grad_norm": 24.830320358276367,
431
- "learning_rate": 1.4000000000000001e-06,
432
- "loss": 0.1765,
433
- "step": 530
434
- },
435
- {
436
- "epoch": 4.909090909090909,
437
- "grad_norm": 3.52287220954895,
438
- "learning_rate": 7.333333333333334e-07,
439
- "loss": 0.192,
440
- "step": 540
441
- },
442
- {
443
- "epoch": 5.0,
444
- "grad_norm": 28.654787063598633,
445
- "learning_rate": 6.666666666666667e-08,
446
- "loss": 0.1539,
447
- "step": 550
448
- },
449
- {
450
- "epoch": 5.0,
451
- "eval_accuracy": 0.9511006289308176,
452
- "eval_entity_accuracy": 0.9370380509170545,
453
- "eval_f1": 0.9458296266430395,
454
- "eval_loss": 0.18345355987548828,
455
- "eval_precision": 0.9486871211363513,
456
- "eval_recall": 0.9511006289308176,
457
- "eval_runtime": 2.7757,
458
- "eval_samples_per_second": 157.438,
459
- "eval_steps_per_second": 10.088,
460
- "step": 550
461
- }
462
- ],
463
- "logging_steps": 10,
464
- "max_steps": 550,
465
- "num_input_tokens_seen": 0,
466
- "num_train_epochs": 5,
467
- "save_steps": 500,
468
- "stateful_callbacks": {
469
- "EarlyStoppingCallback": {
470
- "args": {
471
- "early_stopping_patience": 3,
472
- "early_stopping_threshold": 0.0
473
- },
474
- "attributes": {
475
- "early_stopping_patience_counter": 0
476
- }
477
- },
478
- "TrainerControl": {
479
- "args": {
480
- "should_epoch_stop": false,
481
- "should_evaluate": false,
482
- "should_log": false,
483
- "should_save": true,
484
- "should_training_stop": true
485
- },
486
- "attributes": {}
487
- }
488
- },
489
- "total_flos": 571021163627520.0,
490
- "train_batch_size": 16,
491
- "trial_name": null,
492
- "trial_params": null
493
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
checkpoint-550/training_args.bin DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:3b10c00a9a1c9932d65673b77c6d1b79c2c5dff3551979c1efd6bd5406bf6626
3
- size 5777
 
 
 
 
checkpoint-550/vocab.json DELETED
The diff for this file is too large to render. See raw diff