Trisham97 commited on
Commit
fb6c332
·
verified ·
1 Parent(s): 38b2add

Add fine-tuned energy NER model

Browse files
checkpoint-1144/config.json ADDED
@@ -0,0 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "activation": "gelu",
3
+ "architectures": [
4
+ "DistilBertForTokenClassification"
5
+ ],
6
+ "attention_dropout": 0.1,
7
+ "bos_token_id": null,
8
+ "dim": 768,
9
+ "dropout": 0.1,
10
+ "dtype": "float32",
11
+ "eos_token_id": null,
12
+ "hidden_dim": 3072,
13
+ "id2label": {
14
+ "0": "O",
15
+ "1": "B-PERSON",
16
+ "2": "I-PERSON",
17
+ "3": "B-ORGANIZATION",
18
+ "4": "I-ORGANIZATION",
19
+ "5": "B-LOCATION",
20
+ "6": "I-LOCATION",
21
+ "7": "B-COMMODITY",
22
+ "8": "I-COMMODITY",
23
+ "9": "B-EVENT",
24
+ "10": "I-EVENT",
25
+ "11": "B-INFRASTRUCTURE",
26
+ "12": "I-INFRASTRUCTURE",
27
+ "13": "B-MARKET",
28
+ "14": "I-MARKET",
29
+ "15": "B-COUNTRY",
30
+ "16": "I-COUNTRY",
31
+ "17": "B-COMPANY",
32
+ "18": "I-COMPANY"
33
+ },
34
+ "initializer_range": 0.02,
35
+ "label2id": {
36
+ "B-COMMODITY": 7,
37
+ "B-COMPANY": 17,
38
+ "B-COUNTRY": 15,
39
+ "B-EVENT": 9,
40
+ "B-INFRASTRUCTURE": 11,
41
+ "B-LOCATION": 5,
42
+ "B-MARKET": 13,
43
+ "B-ORGANIZATION": 3,
44
+ "B-PERSON": 1,
45
+ "I-COMMODITY": 8,
46
+ "I-COMPANY": 18,
47
+ "I-COUNTRY": 16,
48
+ "I-EVENT": 10,
49
+ "I-INFRASTRUCTURE": 12,
50
+ "I-LOCATION": 6,
51
+ "I-MARKET": 14,
52
+ "I-ORGANIZATION": 4,
53
+ "I-PERSON": 2,
54
+ "O": 0
55
+ },
56
+ "max_position_embeddings": 512,
57
+ "model_type": "distilbert",
58
+ "n_heads": 12,
59
+ "n_layers": 6,
60
+ "pad_token_id": 0,
61
+ "qa_dropout": 0.1,
62
+ "seq_classif_dropout": 0.2,
63
+ "sinusoidal_pos_embds": false,
64
+ "tie_weights_": true,
65
+ "tie_word_embeddings": true,
66
+ "transformers_version": "5.4.0",
67
+ "use_cache": false,
68
+ "vocab_size": 30522
69
+ }
checkpoint-1144/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d25632e9a1e3cfe7e93b0c3c0c42b0d2f2db5bf07485dc3f5fa8c8d40f525e03
3
+ size 265522308
checkpoint-1144/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b6338cb422558aa2586651e7eca2eca6797a5a7cd223eac58e15cc14d1886648
3
+ size 531107339
checkpoint-1144/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:17fd235f35f59614815cb2fd468d6f337af5f89f9394ed6d053ec8f4e83ff456
3
+ size 14645
checkpoint-1144/scaler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:395d5cc08190d91379a1dd262d1d334180bd662bb74097683f2c0352601a5661
3
+ size 1383
checkpoint-1144/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:12176d1c5282642ec71dd52f5a5b4573ece3ec4f62352bfff4065a8df1f35177
3
+ size 1465
checkpoint-1144/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-1144/tokenizer_config.json ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "backend": "tokenizers",
3
+ "cls_token": "[CLS]",
4
+ "do_lower_case": true,
5
+ "is_local": false,
6
+ "mask_token": "[MASK]",
7
+ "max_length": 128,
8
+ "model_max_length": 512,
9
+ "pad_token": "[PAD]",
10
+ "sep_token": "[SEP]",
11
+ "stride": 0,
12
+ "strip_accents": null,
13
+ "tokenize_chinese_chars": true,
14
+ "tokenizer_class": "BertTokenizer",
15
+ "truncation_side": "right",
16
+ "truncation_strategy": "longest_first",
17
+ "unk_token": "[UNK]"
18
+ }
checkpoint-1144/trainer_state.json ADDED
@@ -0,0 +1,517 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": 1144,
3
+ "best_metric": 0.6738893476465732,
4
+ "best_model_checkpoint": "energy_intelligence_multitask_custom_ner/checkpoint-1144",
5
+ "epoch": 4.0,
6
+ "eval_steps": 500,
7
+ "global_step": 1144,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 0.06993006993006994,
14
+ "grad_norm": 14.66324520111084,
15
+ "learning_rate": 2.6573426573426574e-06,
16
+ "loss": 2.297128105163574,
17
+ "step": 20
18
+ },
19
+ {
20
+ "epoch": 0.13986013986013987,
21
+ "grad_norm": 8.70977783203125,
22
+ "learning_rate": 5.4545454545454545e-06,
23
+ "loss": 1.5782511711120606,
24
+ "step": 40
25
+ },
26
+ {
27
+ "epoch": 0.2097902097902098,
28
+ "grad_norm": 0.9631044268608093,
29
+ "learning_rate": 8.251748251748254e-06,
30
+ "loss": 0.7814332008361816,
31
+ "step": 60
32
+ },
33
+ {
34
+ "epoch": 0.27972027972027974,
35
+ "grad_norm": 0.513141393661499,
36
+ "learning_rate": 1.1048951048951048e-05,
37
+ "loss": 0.4949788570404053,
38
+ "step": 80
39
+ },
40
+ {
41
+ "epoch": 0.34965034965034963,
42
+ "grad_norm": 0.3357048034667969,
43
+ "learning_rate": 1.3846153846153847e-05,
44
+ "loss": 0.4088569164276123,
45
+ "step": 100
46
+ },
47
+ {
48
+ "epoch": 0.4195804195804196,
49
+ "grad_norm": 0.44831985235214233,
50
+ "learning_rate": 1.6643356643356645e-05,
51
+ "loss": 0.3331140518188477,
52
+ "step": 120
53
+ },
54
+ {
55
+ "epoch": 0.48951048951048953,
56
+ "grad_norm": 0.38962382078170776,
57
+ "learning_rate": 1.944055944055944e-05,
58
+ "loss": 0.28518569469451904,
59
+ "step": 140
60
+ },
61
+ {
62
+ "epoch": 0.5594405594405595,
63
+ "grad_norm": 0.550012469291687,
64
+ "learning_rate": 1.9751359751359752e-05,
65
+ "loss": 0.255126428604126,
66
+ "step": 160
67
+ },
68
+ {
69
+ "epoch": 0.6293706293706294,
70
+ "grad_norm": 0.4113660752773285,
71
+ "learning_rate": 1.944055944055944e-05,
72
+ "loss": 0.23595545291900635,
73
+ "step": 180
74
+ },
75
+ {
76
+ "epoch": 0.6993006993006993,
77
+ "grad_norm": 0.5528178811073303,
78
+ "learning_rate": 1.912975912975913e-05,
79
+ "loss": 0.23385438919067383,
80
+ "step": 200
81
+ },
82
+ {
83
+ "epoch": 0.7692307692307693,
84
+ "grad_norm": 0.5938523411750793,
85
+ "learning_rate": 1.8818958818958822e-05,
86
+ "loss": 0.217702054977417,
87
+ "step": 220
88
+ },
89
+ {
90
+ "epoch": 0.8391608391608392,
91
+ "grad_norm": 0.5627780556678772,
92
+ "learning_rate": 1.850815850815851e-05,
93
+ "loss": 0.20575783252716065,
94
+ "step": 240
95
+ },
96
+ {
97
+ "epoch": 0.9090909090909091,
98
+ "grad_norm": 0.6800552606582642,
99
+ "learning_rate": 1.81973581973582e-05,
100
+ "loss": 0.20057928562164307,
101
+ "step": 260
102
+ },
103
+ {
104
+ "epoch": 0.9790209790209791,
105
+ "grad_norm": 0.5200193524360657,
106
+ "learning_rate": 1.7886557886557888e-05,
107
+ "loss": 0.1952407717704773,
108
+ "step": 280
109
+ },
110
+ {
111
+ "epoch": 1.0,
112
+ "eval_COMMODITY_f1": 0.4243,
113
+ "eval_COMPANY_f1": 0.6975,
114
+ "eval_COUNTRY_f1": 0.8103,
115
+ "eval_EVENT_f1": 0.0509,
116
+ "eval_INFRASTRUCTURE_f1": 0.0265,
117
+ "eval_LOCATION_f1": 0.5881,
118
+ "eval_MARKET_f1": 0.3048,
119
+ "eval_ORGANIZATION_f1": 0.5435,
120
+ "eval_PERSON_f1": 0.6313,
121
+ "eval_accuracy": 0.9383044022343244,
122
+ "eval_f1": 0.5853641863485661,
123
+ "eval_loss": 0.18643628060817719,
124
+ "eval_precision": 0.5841462582713691,
125
+ "eval_recall": 0.5865872037265378,
126
+ "eval_runtime": 4.3741,
127
+ "eval_samples_per_second": 261.541,
128
+ "eval_steps_per_second": 4.115,
129
+ "step": 286
130
+ },
131
+ {
132
+ "epoch": 1.048951048951049,
133
+ "grad_norm": 0.45493006706237793,
134
+ "learning_rate": 1.7575757575757576e-05,
135
+ "loss": 0.18901759386062622,
136
+ "step": 300
137
+ },
138
+ {
139
+ "epoch": 1.118881118881119,
140
+ "grad_norm": 0.4867892861366272,
141
+ "learning_rate": 1.7264957264957267e-05,
142
+ "loss": 0.18016949892044068,
143
+ "step": 320
144
+ },
145
+ {
146
+ "epoch": 1.1888111888111887,
147
+ "grad_norm": 0.5517728328704834,
148
+ "learning_rate": 1.6954156954156954e-05,
149
+ "loss": 0.18962303400039673,
150
+ "step": 340
151
+ },
152
+ {
153
+ "epoch": 1.2587412587412588,
154
+ "grad_norm": 0.5174335241317749,
155
+ "learning_rate": 1.6643356643356645e-05,
156
+ "loss": 0.18070975542068482,
157
+ "step": 360
158
+ },
159
+ {
160
+ "epoch": 1.3286713286713288,
161
+ "grad_norm": 0.5536178350448608,
162
+ "learning_rate": 1.6332556332556333e-05,
163
+ "loss": 0.17582471370697023,
164
+ "step": 380
165
+ },
166
+ {
167
+ "epoch": 1.3986013986013985,
168
+ "grad_norm": 0.49411725997924805,
169
+ "learning_rate": 1.6021756021756024e-05,
170
+ "loss": 0.17660335302352906,
171
+ "step": 400
172
+ },
173
+ {
174
+ "epoch": 1.4685314685314685,
175
+ "grad_norm": 0.37996432185173035,
176
+ "learning_rate": 1.5710955710955715e-05,
177
+ "loss": 0.1685216546058655,
178
+ "step": 420
179
+ },
180
+ {
181
+ "epoch": 1.5384615384615383,
182
+ "grad_norm": 0.5632461905479431,
183
+ "learning_rate": 1.5400155400155402e-05,
184
+ "loss": 0.1708904027938843,
185
+ "step": 440
186
+ },
187
+ {
188
+ "epoch": 1.6083916083916083,
189
+ "grad_norm": 0.4930890202522278,
190
+ "learning_rate": 1.5089355089355091e-05,
191
+ "loss": 0.16708383560180665,
192
+ "step": 460
193
+ },
194
+ {
195
+ "epoch": 1.6783216783216783,
196
+ "grad_norm": 0.5667280554771423,
197
+ "learning_rate": 1.4778554778554779e-05,
198
+ "loss": 0.16816866397857666,
199
+ "step": 480
200
+ },
201
+ {
202
+ "epoch": 1.7482517482517483,
203
+ "grad_norm": 0.5394991040229797,
204
+ "learning_rate": 1.4467754467754468e-05,
205
+ "loss": 0.15965052843093872,
206
+ "step": 500
207
+ },
208
+ {
209
+ "epoch": 1.8181818181818183,
210
+ "grad_norm": 0.4395284354686737,
211
+ "learning_rate": 1.415695415695416e-05,
212
+ "loss": 0.16719096899032593,
213
+ "step": 520
214
+ },
215
+ {
216
+ "epoch": 1.8881118881118881,
217
+ "grad_norm": 0.44819605350494385,
218
+ "learning_rate": 1.3846153846153847e-05,
219
+ "loss": 0.16433268785476685,
220
+ "step": 540
221
+ },
222
+ {
223
+ "epoch": 1.958041958041958,
224
+ "grad_norm": 0.630233645439148,
225
+ "learning_rate": 1.3535353535353538e-05,
226
+ "loss": 0.1701101541519165,
227
+ "step": 560
228
+ },
229
+ {
230
+ "epoch": 2.0,
231
+ "eval_COMMODITY_f1": 0.5098,
232
+ "eval_COMPANY_f1": 0.7415,
233
+ "eval_COUNTRY_f1": 0.8307,
234
+ "eval_EVENT_f1": 0.2393,
235
+ "eval_INFRASTRUCTURE_f1": 0.2385,
236
+ "eval_LOCATION_f1": 0.6418,
237
+ "eval_MARKET_f1": 0.3517,
238
+ "eval_ORGANIZATION_f1": 0.6229,
239
+ "eval_PERSON_f1": 0.6953,
240
+ "eval_accuracy": 0.9454946483875253,
241
+ "eval_f1": 0.6355833913245186,
242
+ "eval_loss": 0.15933312475681305,
243
+ "eval_precision": 0.6155722446883626,
244
+ "eval_recall": 0.6569393067543499,
245
+ "eval_runtime": 4.404,
246
+ "eval_samples_per_second": 259.765,
247
+ "eval_steps_per_second": 4.087,
248
+ "step": 572
249
+ },
250
+ {
251
+ "epoch": 2.027972027972028,
252
+ "grad_norm": 0.6636055111885071,
253
+ "learning_rate": 1.3224553224553225e-05,
254
+ "loss": 0.16211626529693604,
255
+ "step": 580
256
+ },
257
+ {
258
+ "epoch": 2.097902097902098,
259
+ "grad_norm": 0.5422558188438416,
260
+ "learning_rate": 1.2913752913752915e-05,
261
+ "loss": 0.1504289388656616,
262
+ "step": 600
263
+ },
264
+ {
265
+ "epoch": 2.167832167832168,
266
+ "grad_norm": 0.5456656813621521,
267
+ "learning_rate": 1.2602952602952606e-05,
268
+ "loss": 0.15346094369888305,
269
+ "step": 620
270
+ },
271
+ {
272
+ "epoch": 2.237762237762238,
273
+ "grad_norm": 0.5587140321731567,
274
+ "learning_rate": 1.2292152292152293e-05,
275
+ "loss": 0.15093343257904052,
276
+ "step": 640
277
+ },
278
+ {
279
+ "epoch": 2.3076923076923075,
280
+ "grad_norm": 0.4914584457874298,
281
+ "learning_rate": 1.1981351981351982e-05,
282
+ "loss": 0.14887770414352416,
283
+ "step": 660
284
+ },
285
+ {
286
+ "epoch": 2.3776223776223775,
287
+ "grad_norm": 0.42390987277030945,
288
+ "learning_rate": 1.1670551670551672e-05,
289
+ "loss": 0.15100462436676027,
290
+ "step": 680
291
+ },
292
+ {
293
+ "epoch": 2.4475524475524475,
294
+ "grad_norm": 0.42798611521720886,
295
+ "learning_rate": 1.1359751359751361e-05,
296
+ "loss": 0.14843168258666992,
297
+ "step": 700
298
+ },
299
+ {
300
+ "epoch": 2.5174825174825175,
301
+ "grad_norm": 0.559911847114563,
302
+ "learning_rate": 1.1048951048951048e-05,
303
+ "loss": 0.14720556735992432,
304
+ "step": 720
305
+ },
306
+ {
307
+ "epoch": 2.5874125874125875,
308
+ "grad_norm": 0.5178841948509216,
309
+ "learning_rate": 1.073815073815074e-05,
310
+ "loss": 0.14619035720825196,
311
+ "step": 740
312
+ },
313
+ {
314
+ "epoch": 2.6573426573426575,
315
+ "grad_norm": 0.6083295941352844,
316
+ "learning_rate": 1.0427350427350429e-05,
317
+ "loss": 0.14750727415084838,
318
+ "step": 760
319
+ },
320
+ {
321
+ "epoch": 2.7272727272727275,
322
+ "grad_norm": 0.46528416872024536,
323
+ "learning_rate": 1.0116550116550116e-05,
324
+ "loss": 0.14334226846694947,
325
+ "step": 780
326
+ },
327
+ {
328
+ "epoch": 2.797202797202797,
329
+ "grad_norm": 0.48938772082328796,
330
+ "learning_rate": 9.805749805749807e-06,
331
+ "loss": 0.14555764198303223,
332
+ "step": 800
333
+ },
334
+ {
335
+ "epoch": 2.867132867132867,
336
+ "grad_norm": 0.4275546371936798,
337
+ "learning_rate": 9.494949494949497e-06,
338
+ "loss": 0.14947701692581178,
339
+ "step": 820
340
+ },
341
+ {
342
+ "epoch": 2.937062937062937,
343
+ "grad_norm": 0.5199507474899292,
344
+ "learning_rate": 9.184149184149184e-06,
345
+ "loss": 0.14035249948501588,
346
+ "step": 840
347
+ },
348
+ {
349
+ "epoch": 3.0,
350
+ "eval_COMMODITY_f1": 0.4887,
351
+ "eval_COMPANY_f1": 0.7755,
352
+ "eval_COUNTRY_f1": 0.8455,
353
+ "eval_EVENT_f1": 0.3252,
354
+ "eval_INFRASTRUCTURE_f1": 0.3,
355
+ "eval_LOCATION_f1": 0.6741,
356
+ "eval_MARKET_f1": 0.4189,
357
+ "eval_ORGANIZATION_f1": 0.6693,
358
+ "eval_PERSON_f1": 0.7555,
359
+ "eval_accuracy": 0.9481756978768325,
360
+ "eval_f1": 0.6681568653232964,
361
+ "eval_loss": 0.15031008422374725,
362
+ "eval_precision": 0.6422928648170385,
363
+ "eval_recall": 0.6961912590765859,
364
+ "eval_runtime": 4.4312,
365
+ "eval_samples_per_second": 258.167,
366
+ "eval_steps_per_second": 4.062,
367
+ "step": 858
368
+ },
369
+ {
370
+ "epoch": 3.006993006993007,
371
+ "grad_norm": 0.5143063068389893,
372
+ "learning_rate": 8.873348873348873e-06,
373
+ "loss": 0.13901138305664062,
374
+ "step": 860
375
+ },
376
+ {
377
+ "epoch": 3.076923076923077,
378
+ "grad_norm": 0.46927395462989807,
379
+ "learning_rate": 8.562548562548563e-06,
380
+ "loss": 0.13868144750595093,
381
+ "step": 880
382
+ },
383
+ {
384
+ "epoch": 3.1468531468531467,
385
+ "grad_norm": 0.4429858922958374,
386
+ "learning_rate": 8.251748251748254e-06,
387
+ "loss": 0.13494281768798827,
388
+ "step": 900
389
+ },
390
+ {
391
+ "epoch": 3.2167832167832167,
392
+ "grad_norm": 0.4369621276855469,
393
+ "learning_rate": 7.940947940947941e-06,
394
+ "loss": 0.13719457387924194,
395
+ "step": 920
396
+ },
397
+ {
398
+ "epoch": 3.2867132867132867,
399
+ "grad_norm": 0.5295616388320923,
400
+ "learning_rate": 7.63014763014763e-06,
401
+ "loss": 0.13230640888214112,
402
+ "step": 940
403
+ },
404
+ {
405
+ "epoch": 3.3566433566433567,
406
+ "grad_norm": 0.7585546374320984,
407
+ "learning_rate": 7.31934731934732e-06,
408
+ "loss": 0.13142707347869872,
409
+ "step": 960
410
+ },
411
+ {
412
+ "epoch": 3.4265734265734267,
413
+ "grad_norm": 0.7057129740715027,
414
+ "learning_rate": 7.008547008547009e-06,
415
+ "loss": 0.13272271156311036,
416
+ "step": 980
417
+ },
418
+ {
419
+ "epoch": 3.4965034965034967,
420
+ "grad_norm": 0.4822175204753876,
421
+ "learning_rate": 6.697746697746699e-06,
422
+ "loss": 0.13051105737686158,
423
+ "step": 1000
424
+ },
425
+ {
426
+ "epoch": 3.5664335664335667,
427
+ "grad_norm": 0.6353693008422852,
428
+ "learning_rate": 6.3869463869463875e-06,
429
+ "loss": 0.1364367723464966,
430
+ "step": 1020
431
+ },
432
+ {
433
+ "epoch": 3.6363636363636362,
434
+ "grad_norm": 0.6322542428970337,
435
+ "learning_rate": 6.076146076146077e-06,
436
+ "loss": 0.13394793272018432,
437
+ "step": 1040
438
+ },
439
+ {
440
+ "epoch": 3.7062937062937062,
441
+ "grad_norm": 0.6142822504043579,
442
+ "learning_rate": 5.765345765345766e-06,
443
+ "loss": 0.13053072690963746,
444
+ "step": 1060
445
+ },
446
+ {
447
+ "epoch": 3.7762237762237763,
448
+ "grad_norm": 0.4618857800960541,
449
+ "learning_rate": 5.4545454545454545e-06,
450
+ "loss": 0.13272134065628052,
451
+ "step": 1080
452
+ },
453
+ {
454
+ "epoch": 3.8461538461538463,
455
+ "grad_norm": 0.5411708354949951,
456
+ "learning_rate": 5.1437451437451446e-06,
457
+ "loss": 0.1273587703704834,
458
+ "step": 1100
459
+ },
460
+ {
461
+ "epoch": 3.916083916083916,
462
+ "grad_norm": 0.438820481300354,
463
+ "learning_rate": 4.832944832944833e-06,
464
+ "loss": 0.13514026403427123,
465
+ "step": 1120
466
+ },
467
+ {
468
+ "epoch": 3.986013986013986,
469
+ "grad_norm": 0.580314040184021,
470
+ "learning_rate": 4.522144522144522e-06,
471
+ "loss": 0.13724528551101683,
472
+ "step": 1140
473
+ },
474
+ {
475
+ "epoch": 4.0,
476
+ "eval_COMMODITY_f1": 0.5015,
477
+ "eval_COMPANY_f1": 0.7911,
478
+ "eval_COUNTRY_f1": 0.8487,
479
+ "eval_EVENT_f1": 0.3324,
480
+ "eval_INFRASTRUCTURE_f1": 0.3305,
481
+ "eval_LOCATION_f1": 0.6716,
482
+ "eval_MARKET_f1": 0.4217,
483
+ "eval_ORGANIZATION_f1": 0.68,
484
+ "eval_PERSON_f1": 0.7632,
485
+ "eval_accuracy": 0.9506155577771408,
486
+ "eval_f1": 0.6738893476465732,
487
+ "eval_loss": 0.1432034969329834,
488
+ "eval_precision": 0.6506984754736238,
489
+ "eval_recall": 0.6987943553911494,
490
+ "eval_runtime": 4.3982,
491
+ "eval_samples_per_second": 260.109,
492
+ "eval_steps_per_second": 4.093,
493
+ "step": 1144
494
+ }
495
+ ],
496
+ "logging_steps": 20,
497
+ "max_steps": 1430,
498
+ "num_input_tokens_seen": 0,
499
+ "num_train_epochs": 5,
500
+ "save_steps": 500,
501
+ "stateful_callbacks": {
502
+ "TrainerControl": {
503
+ "args": {
504
+ "should_epoch_stop": false,
505
+ "should_evaluate": false,
506
+ "should_log": false,
507
+ "should_save": true,
508
+ "should_training_stop": false
509
+ },
510
+ "attributes": {}
511
+ }
512
+ },
513
+ "total_flos": 2392209436311552.0,
514
+ "train_batch_size": 32,
515
+ "trial_name": null,
516
+ "trial_params": null
517
+ }
checkpoint-1144/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8204d5b5b5d20a8a826cfd288410ff08f7b4fb17860aaa84560ac542fcc5ed98
3
+ size 5265
checkpoint-1430/config.json ADDED
@@ -0,0 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "activation": "gelu",
3
+ "architectures": [
4
+ "DistilBertForTokenClassification"
5
+ ],
6
+ "attention_dropout": 0.1,
7
+ "bos_token_id": null,
8
+ "dim": 768,
9
+ "dropout": 0.1,
10
+ "dtype": "float32",
11
+ "eos_token_id": null,
12
+ "hidden_dim": 3072,
13
+ "id2label": {
14
+ "0": "O",
15
+ "1": "B-PERSON",
16
+ "2": "I-PERSON",
17
+ "3": "B-ORGANIZATION",
18
+ "4": "I-ORGANIZATION",
19
+ "5": "B-LOCATION",
20
+ "6": "I-LOCATION",
21
+ "7": "B-COMMODITY",
22
+ "8": "I-COMMODITY",
23
+ "9": "B-EVENT",
24
+ "10": "I-EVENT",
25
+ "11": "B-INFRASTRUCTURE",
26
+ "12": "I-INFRASTRUCTURE",
27
+ "13": "B-MARKET",
28
+ "14": "I-MARKET",
29
+ "15": "B-COUNTRY",
30
+ "16": "I-COUNTRY",
31
+ "17": "B-COMPANY",
32
+ "18": "I-COMPANY"
33
+ },
34
+ "initializer_range": 0.02,
35
+ "label2id": {
36
+ "B-COMMODITY": 7,
37
+ "B-COMPANY": 17,
38
+ "B-COUNTRY": 15,
39
+ "B-EVENT": 9,
40
+ "B-INFRASTRUCTURE": 11,
41
+ "B-LOCATION": 5,
42
+ "B-MARKET": 13,
43
+ "B-ORGANIZATION": 3,
44
+ "B-PERSON": 1,
45
+ "I-COMMODITY": 8,
46
+ "I-COMPANY": 18,
47
+ "I-COUNTRY": 16,
48
+ "I-EVENT": 10,
49
+ "I-INFRASTRUCTURE": 12,
50
+ "I-LOCATION": 6,
51
+ "I-MARKET": 14,
52
+ "I-ORGANIZATION": 4,
53
+ "I-PERSON": 2,
54
+ "O": 0
55
+ },
56
+ "max_position_embeddings": 512,
57
+ "model_type": "distilbert",
58
+ "n_heads": 12,
59
+ "n_layers": 6,
60
+ "pad_token_id": 0,
61
+ "qa_dropout": 0.1,
62
+ "seq_classif_dropout": 0.2,
63
+ "sinusoidal_pos_embds": false,
64
+ "tie_weights_": true,
65
+ "tie_word_embeddings": true,
66
+ "transformers_version": "5.4.0",
67
+ "use_cache": false,
68
+ "vocab_size": 30522
69
+ }
checkpoint-1430/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f4ec2f9a7b2be096b72416ac4621e438efe83b6fe557ab5f2cf2ce0cf02dc026
3
+ size 265522308
checkpoint-1430/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:911e7b31330ed5368ef8736cfc3ee9f74dce027ee9f80f916f547e9d5f7396ef
3
+ size 531107339
checkpoint-1430/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:03d80aee595ed1eee38394577792f7e96fb5e130d74dd64e897732e1d818d8c6
3
+ size 14645
checkpoint-1430/scaler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:257a2a904def3cf63a0812c7134928c8eefda9fd8c5577afaa60eb81d61073d3
3
+ size 1383
checkpoint-1430/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:622bdd6c2187356192d2050258720e8bd0fed29be8320778a28cdd5879315dfc
3
+ size 1465
checkpoint-1430/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-1430/tokenizer_config.json ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "backend": "tokenizers",
3
+ "cls_token": "[CLS]",
4
+ "do_lower_case": true,
5
+ "is_local": false,
6
+ "mask_token": "[MASK]",
7
+ "max_length": 128,
8
+ "model_max_length": 512,
9
+ "pad_token": "[PAD]",
10
+ "sep_token": "[SEP]",
11
+ "stride": 0,
12
+ "strip_accents": null,
13
+ "tokenize_chinese_chars": true,
14
+ "tokenizer_class": "BertTokenizer",
15
+ "truncation_side": "right",
16
+ "truncation_strategy": "longest_first",
17
+ "unk_token": "[UNK]"
18
+ }
checkpoint-1430/trainer_state.json ADDED
@@ -0,0 +1,636 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": 1430,
3
+ "best_metric": 0.6789528523052879,
4
+ "best_model_checkpoint": "energy_intelligence_multitask_custom_ner/checkpoint-1430",
5
+ "epoch": 5.0,
6
+ "eval_steps": 500,
7
+ "global_step": 1430,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 0.06993006993006994,
14
+ "grad_norm": 14.66324520111084,
15
+ "learning_rate": 2.6573426573426574e-06,
16
+ "loss": 2.297128105163574,
17
+ "step": 20
18
+ },
19
+ {
20
+ "epoch": 0.13986013986013987,
21
+ "grad_norm": 8.70977783203125,
22
+ "learning_rate": 5.4545454545454545e-06,
23
+ "loss": 1.5782511711120606,
24
+ "step": 40
25
+ },
26
+ {
27
+ "epoch": 0.2097902097902098,
28
+ "grad_norm": 0.9631044268608093,
29
+ "learning_rate": 8.251748251748254e-06,
30
+ "loss": 0.7814332008361816,
31
+ "step": 60
32
+ },
33
+ {
34
+ "epoch": 0.27972027972027974,
35
+ "grad_norm": 0.513141393661499,
36
+ "learning_rate": 1.1048951048951048e-05,
37
+ "loss": 0.4949788570404053,
38
+ "step": 80
39
+ },
40
+ {
41
+ "epoch": 0.34965034965034963,
42
+ "grad_norm": 0.3357048034667969,
43
+ "learning_rate": 1.3846153846153847e-05,
44
+ "loss": 0.4088569164276123,
45
+ "step": 100
46
+ },
47
+ {
48
+ "epoch": 0.4195804195804196,
49
+ "grad_norm": 0.44831985235214233,
50
+ "learning_rate": 1.6643356643356645e-05,
51
+ "loss": 0.3331140518188477,
52
+ "step": 120
53
+ },
54
+ {
55
+ "epoch": 0.48951048951048953,
56
+ "grad_norm": 0.38962382078170776,
57
+ "learning_rate": 1.944055944055944e-05,
58
+ "loss": 0.28518569469451904,
59
+ "step": 140
60
+ },
61
+ {
62
+ "epoch": 0.5594405594405595,
63
+ "grad_norm": 0.550012469291687,
64
+ "learning_rate": 1.9751359751359752e-05,
65
+ "loss": 0.255126428604126,
66
+ "step": 160
67
+ },
68
+ {
69
+ "epoch": 0.6293706293706294,
70
+ "grad_norm": 0.4113660752773285,
71
+ "learning_rate": 1.944055944055944e-05,
72
+ "loss": 0.23595545291900635,
73
+ "step": 180
74
+ },
75
+ {
76
+ "epoch": 0.6993006993006993,
77
+ "grad_norm": 0.5528178811073303,
78
+ "learning_rate": 1.912975912975913e-05,
79
+ "loss": 0.23385438919067383,
80
+ "step": 200
81
+ },
82
+ {
83
+ "epoch": 0.7692307692307693,
84
+ "grad_norm": 0.5938523411750793,
85
+ "learning_rate": 1.8818958818958822e-05,
86
+ "loss": 0.217702054977417,
87
+ "step": 220
88
+ },
89
+ {
90
+ "epoch": 0.8391608391608392,
91
+ "grad_norm": 0.5627780556678772,
92
+ "learning_rate": 1.850815850815851e-05,
93
+ "loss": 0.20575783252716065,
94
+ "step": 240
95
+ },
96
+ {
97
+ "epoch": 0.9090909090909091,
98
+ "grad_norm": 0.6800552606582642,
99
+ "learning_rate": 1.81973581973582e-05,
100
+ "loss": 0.20057928562164307,
101
+ "step": 260
102
+ },
103
+ {
104
+ "epoch": 0.9790209790209791,
105
+ "grad_norm": 0.5200193524360657,
106
+ "learning_rate": 1.7886557886557888e-05,
107
+ "loss": 0.1952407717704773,
108
+ "step": 280
109
+ },
110
+ {
111
+ "epoch": 1.0,
112
+ "eval_COMMODITY_f1": 0.4243,
113
+ "eval_COMPANY_f1": 0.6975,
114
+ "eval_COUNTRY_f1": 0.8103,
115
+ "eval_EVENT_f1": 0.0509,
116
+ "eval_INFRASTRUCTURE_f1": 0.0265,
117
+ "eval_LOCATION_f1": 0.5881,
118
+ "eval_MARKET_f1": 0.3048,
119
+ "eval_ORGANIZATION_f1": 0.5435,
120
+ "eval_PERSON_f1": 0.6313,
121
+ "eval_accuracy": 0.9383044022343244,
122
+ "eval_f1": 0.5853641863485661,
123
+ "eval_loss": 0.18643628060817719,
124
+ "eval_precision": 0.5841462582713691,
125
+ "eval_recall": 0.5865872037265378,
126
+ "eval_runtime": 4.3741,
127
+ "eval_samples_per_second": 261.541,
128
+ "eval_steps_per_second": 4.115,
129
+ "step": 286
130
+ },
131
+ {
132
+ "epoch": 1.048951048951049,
133
+ "grad_norm": 0.45493006706237793,
134
+ "learning_rate": 1.7575757575757576e-05,
135
+ "loss": 0.18901759386062622,
136
+ "step": 300
137
+ },
138
+ {
139
+ "epoch": 1.118881118881119,
140
+ "grad_norm": 0.4867892861366272,
141
+ "learning_rate": 1.7264957264957267e-05,
142
+ "loss": 0.18016949892044068,
143
+ "step": 320
144
+ },
145
+ {
146
+ "epoch": 1.1888111888111887,
147
+ "grad_norm": 0.5517728328704834,
148
+ "learning_rate": 1.6954156954156954e-05,
149
+ "loss": 0.18962303400039673,
150
+ "step": 340
151
+ },
152
+ {
153
+ "epoch": 1.2587412587412588,
154
+ "grad_norm": 0.5174335241317749,
155
+ "learning_rate": 1.6643356643356645e-05,
156
+ "loss": 0.18070975542068482,
157
+ "step": 360
158
+ },
159
+ {
160
+ "epoch": 1.3286713286713288,
161
+ "grad_norm": 0.5536178350448608,
162
+ "learning_rate": 1.6332556332556333e-05,
163
+ "loss": 0.17582471370697023,
164
+ "step": 380
165
+ },
166
+ {
167
+ "epoch": 1.3986013986013985,
168
+ "grad_norm": 0.49411725997924805,
169
+ "learning_rate": 1.6021756021756024e-05,
170
+ "loss": 0.17660335302352906,
171
+ "step": 400
172
+ },
173
+ {
174
+ "epoch": 1.4685314685314685,
175
+ "grad_norm": 0.37996432185173035,
176
+ "learning_rate": 1.5710955710955715e-05,
177
+ "loss": 0.1685216546058655,
178
+ "step": 420
179
+ },
180
+ {
181
+ "epoch": 1.5384615384615383,
182
+ "grad_norm": 0.5632461905479431,
183
+ "learning_rate": 1.5400155400155402e-05,
184
+ "loss": 0.1708904027938843,
185
+ "step": 440
186
+ },
187
+ {
188
+ "epoch": 1.6083916083916083,
189
+ "grad_norm": 0.4930890202522278,
190
+ "learning_rate": 1.5089355089355091e-05,
191
+ "loss": 0.16708383560180665,
192
+ "step": 460
193
+ },
194
+ {
195
+ "epoch": 1.6783216783216783,
196
+ "grad_norm": 0.5667280554771423,
197
+ "learning_rate": 1.4778554778554779e-05,
198
+ "loss": 0.16816866397857666,
199
+ "step": 480
200
+ },
201
+ {
202
+ "epoch": 1.7482517482517483,
203
+ "grad_norm": 0.5394991040229797,
204
+ "learning_rate": 1.4467754467754468e-05,
205
+ "loss": 0.15965052843093872,
206
+ "step": 500
207
+ },
208
+ {
209
+ "epoch": 1.8181818181818183,
210
+ "grad_norm": 0.4395284354686737,
211
+ "learning_rate": 1.415695415695416e-05,
212
+ "loss": 0.16719096899032593,
213
+ "step": 520
214
+ },
215
+ {
216
+ "epoch": 1.8881118881118881,
217
+ "grad_norm": 0.44819605350494385,
218
+ "learning_rate": 1.3846153846153847e-05,
219
+ "loss": 0.16433268785476685,
220
+ "step": 540
221
+ },
222
+ {
223
+ "epoch": 1.958041958041958,
224
+ "grad_norm": 0.630233645439148,
225
+ "learning_rate": 1.3535353535353538e-05,
226
+ "loss": 0.1701101541519165,
227
+ "step": 560
228
+ },
229
+ {
230
+ "epoch": 2.0,
231
+ "eval_COMMODITY_f1": 0.5098,
232
+ "eval_COMPANY_f1": 0.7415,
233
+ "eval_COUNTRY_f1": 0.8307,
234
+ "eval_EVENT_f1": 0.2393,
235
+ "eval_INFRASTRUCTURE_f1": 0.2385,
236
+ "eval_LOCATION_f1": 0.6418,
237
+ "eval_MARKET_f1": 0.3517,
238
+ "eval_ORGANIZATION_f1": 0.6229,
239
+ "eval_PERSON_f1": 0.6953,
240
+ "eval_accuracy": 0.9454946483875253,
241
+ "eval_f1": 0.6355833913245186,
242
+ "eval_loss": 0.15933312475681305,
243
+ "eval_precision": 0.6155722446883626,
244
+ "eval_recall": 0.6569393067543499,
245
+ "eval_runtime": 4.404,
246
+ "eval_samples_per_second": 259.765,
247
+ "eval_steps_per_second": 4.087,
248
+ "step": 572
249
+ },
250
+ {
251
+ "epoch": 2.027972027972028,
252
+ "grad_norm": 0.6636055111885071,
253
+ "learning_rate": 1.3224553224553225e-05,
254
+ "loss": 0.16211626529693604,
255
+ "step": 580
256
+ },
257
+ {
258
+ "epoch": 2.097902097902098,
259
+ "grad_norm": 0.5422558188438416,
260
+ "learning_rate": 1.2913752913752915e-05,
261
+ "loss": 0.1504289388656616,
262
+ "step": 600
263
+ },
264
+ {
265
+ "epoch": 2.167832167832168,
266
+ "grad_norm": 0.5456656813621521,
267
+ "learning_rate": 1.2602952602952606e-05,
268
+ "loss": 0.15346094369888305,
269
+ "step": 620
270
+ },
271
+ {
272
+ "epoch": 2.237762237762238,
273
+ "grad_norm": 0.5587140321731567,
274
+ "learning_rate": 1.2292152292152293e-05,
275
+ "loss": 0.15093343257904052,
276
+ "step": 640
277
+ },
278
+ {
279
+ "epoch": 2.3076923076923075,
280
+ "grad_norm": 0.4914584457874298,
281
+ "learning_rate": 1.1981351981351982e-05,
282
+ "loss": 0.14887770414352416,
283
+ "step": 660
284
+ },
285
+ {
286
+ "epoch": 2.3776223776223775,
287
+ "grad_norm": 0.42390987277030945,
288
+ "learning_rate": 1.1670551670551672e-05,
289
+ "loss": 0.15100462436676027,
290
+ "step": 680
291
+ },
292
+ {
293
+ "epoch": 2.4475524475524475,
294
+ "grad_norm": 0.42798611521720886,
295
+ "learning_rate": 1.1359751359751361e-05,
296
+ "loss": 0.14843168258666992,
297
+ "step": 700
298
+ },
299
+ {
300
+ "epoch": 2.5174825174825175,
301
+ "grad_norm": 0.559911847114563,
302
+ "learning_rate": 1.1048951048951048e-05,
303
+ "loss": 0.14720556735992432,
304
+ "step": 720
305
+ },
306
+ {
307
+ "epoch": 2.5874125874125875,
308
+ "grad_norm": 0.5178841948509216,
309
+ "learning_rate": 1.073815073815074e-05,
310
+ "loss": 0.14619035720825196,
311
+ "step": 740
312
+ },
313
+ {
314
+ "epoch": 2.6573426573426575,
315
+ "grad_norm": 0.6083295941352844,
316
+ "learning_rate": 1.0427350427350429e-05,
317
+ "loss": 0.14750727415084838,
318
+ "step": 760
319
+ },
320
+ {
321
+ "epoch": 2.7272727272727275,
322
+ "grad_norm": 0.46528416872024536,
323
+ "learning_rate": 1.0116550116550116e-05,
324
+ "loss": 0.14334226846694947,
325
+ "step": 780
326
+ },
327
+ {
328
+ "epoch": 2.797202797202797,
329
+ "grad_norm": 0.48938772082328796,
330
+ "learning_rate": 9.805749805749807e-06,
331
+ "loss": 0.14555764198303223,
332
+ "step": 800
333
+ },
334
+ {
335
+ "epoch": 2.867132867132867,
336
+ "grad_norm": 0.4275546371936798,
337
+ "learning_rate": 9.494949494949497e-06,
338
+ "loss": 0.14947701692581178,
339
+ "step": 820
340
+ },
341
+ {
342
+ "epoch": 2.937062937062937,
343
+ "grad_norm": 0.5199507474899292,
344
+ "learning_rate": 9.184149184149184e-06,
345
+ "loss": 0.14035249948501588,
346
+ "step": 840
347
+ },
348
+ {
349
+ "epoch": 3.0,
350
+ "eval_COMMODITY_f1": 0.4887,
351
+ "eval_COMPANY_f1": 0.7755,
352
+ "eval_COUNTRY_f1": 0.8455,
353
+ "eval_EVENT_f1": 0.3252,
354
+ "eval_INFRASTRUCTURE_f1": 0.3,
355
+ "eval_LOCATION_f1": 0.6741,
356
+ "eval_MARKET_f1": 0.4189,
357
+ "eval_ORGANIZATION_f1": 0.6693,
358
+ "eval_PERSON_f1": 0.7555,
359
+ "eval_accuracy": 0.9481756978768325,
360
+ "eval_f1": 0.6681568653232964,
361
+ "eval_loss": 0.15031008422374725,
362
+ "eval_precision": 0.6422928648170385,
363
+ "eval_recall": 0.6961912590765859,
364
+ "eval_runtime": 4.4312,
365
+ "eval_samples_per_second": 258.167,
366
+ "eval_steps_per_second": 4.062,
367
+ "step": 858
368
+ },
369
+ {
370
+ "epoch": 3.006993006993007,
371
+ "grad_norm": 0.5143063068389893,
372
+ "learning_rate": 8.873348873348873e-06,
373
+ "loss": 0.13901138305664062,
374
+ "step": 860
375
+ },
376
+ {
377
+ "epoch": 3.076923076923077,
378
+ "grad_norm": 0.46927395462989807,
379
+ "learning_rate": 8.562548562548563e-06,
380
+ "loss": 0.13868144750595093,
381
+ "step": 880
382
+ },
383
+ {
384
+ "epoch": 3.1468531468531467,
385
+ "grad_norm": 0.4429858922958374,
386
+ "learning_rate": 8.251748251748254e-06,
387
+ "loss": 0.13494281768798827,
388
+ "step": 900
389
+ },
390
+ {
391
+ "epoch": 3.2167832167832167,
392
+ "grad_norm": 0.4369621276855469,
393
+ "learning_rate": 7.940947940947941e-06,
394
+ "loss": 0.13719457387924194,
395
+ "step": 920
396
+ },
397
+ {
398
+ "epoch": 3.2867132867132867,
399
+ "grad_norm": 0.5295616388320923,
400
+ "learning_rate": 7.63014763014763e-06,
401
+ "loss": 0.13230640888214112,
402
+ "step": 940
403
+ },
404
+ {
405
+ "epoch": 3.3566433566433567,
406
+ "grad_norm": 0.7585546374320984,
407
+ "learning_rate": 7.31934731934732e-06,
408
+ "loss": 0.13142707347869872,
409
+ "step": 960
410
+ },
411
+ {
412
+ "epoch": 3.4265734265734267,
413
+ "grad_norm": 0.7057129740715027,
414
+ "learning_rate": 7.008547008547009e-06,
415
+ "loss": 0.13272271156311036,
416
+ "step": 980
417
+ },
418
+ {
419
+ "epoch": 3.4965034965034967,
420
+ "grad_norm": 0.4822175204753876,
421
+ "learning_rate": 6.697746697746699e-06,
422
+ "loss": 0.13051105737686158,
423
+ "step": 1000
424
+ },
425
+ {
426
+ "epoch": 3.5664335664335667,
427
+ "grad_norm": 0.6353693008422852,
428
+ "learning_rate": 6.3869463869463875e-06,
429
+ "loss": 0.1364367723464966,
430
+ "step": 1020
431
+ },
432
+ {
433
+ "epoch": 3.6363636363636362,
434
+ "grad_norm": 0.6322542428970337,
435
+ "learning_rate": 6.076146076146077e-06,
436
+ "loss": 0.13394793272018432,
437
+ "step": 1040
438
+ },
439
+ {
440
+ "epoch": 3.7062937062937062,
441
+ "grad_norm": 0.6142822504043579,
442
+ "learning_rate": 5.765345765345766e-06,
443
+ "loss": 0.13053072690963746,
444
+ "step": 1060
445
+ },
446
+ {
447
+ "epoch": 3.7762237762237763,
448
+ "grad_norm": 0.4618857800960541,
449
+ "learning_rate": 5.4545454545454545e-06,
450
+ "loss": 0.13272134065628052,
451
+ "step": 1080
452
+ },
453
+ {
454
+ "epoch": 3.8461538461538463,
455
+ "grad_norm": 0.5411708354949951,
456
+ "learning_rate": 5.1437451437451446e-06,
457
+ "loss": 0.1273587703704834,
458
+ "step": 1100
459
+ },
460
+ {
461
+ "epoch": 3.916083916083916,
462
+ "grad_norm": 0.438820481300354,
463
+ "learning_rate": 4.832944832944833e-06,
464
+ "loss": 0.13514026403427123,
465
+ "step": 1120
466
+ },
467
+ {
468
+ "epoch": 3.986013986013986,
469
+ "grad_norm": 0.580314040184021,
470
+ "learning_rate": 4.522144522144522e-06,
471
+ "loss": 0.13724528551101683,
472
+ "step": 1140
473
+ },
474
+ {
475
+ "epoch": 4.0,
476
+ "eval_COMMODITY_f1": 0.5015,
477
+ "eval_COMPANY_f1": 0.7911,
478
+ "eval_COUNTRY_f1": 0.8487,
479
+ "eval_EVENT_f1": 0.3324,
480
+ "eval_INFRASTRUCTURE_f1": 0.3305,
481
+ "eval_LOCATION_f1": 0.6716,
482
+ "eval_MARKET_f1": 0.4217,
483
+ "eval_ORGANIZATION_f1": 0.68,
484
+ "eval_PERSON_f1": 0.7632,
485
+ "eval_accuracy": 0.9506155577771408,
486
+ "eval_f1": 0.6738893476465732,
487
+ "eval_loss": 0.1432034969329834,
488
+ "eval_precision": 0.6506984754736238,
489
+ "eval_recall": 0.6987943553911494,
490
+ "eval_runtime": 4.3982,
491
+ "eval_samples_per_second": 260.109,
492
+ "eval_steps_per_second": 4.093,
493
+ "step": 1144
494
+ },
495
+ {
496
+ "epoch": 4.055944055944056,
497
+ "grad_norm": 0.5337245464324951,
498
+ "learning_rate": 4.2113442113442115e-06,
499
+ "loss": 0.1314162254333496,
500
+ "step": 1160
501
+ },
502
+ {
503
+ "epoch": 4.125874125874126,
504
+ "grad_norm": 0.5353516936302185,
505
+ "learning_rate": 3.900543900543901e-06,
506
+ "loss": 0.1259661316871643,
507
+ "step": 1180
508
+ },
509
+ {
510
+ "epoch": 4.195804195804196,
511
+ "grad_norm": 0.5923385620117188,
512
+ "learning_rate": 3.58974358974359e-06,
513
+ "loss": 0.12636299133300782,
514
+ "step": 1200
515
+ },
516
+ {
517
+ "epoch": 4.265734265734266,
518
+ "grad_norm": 0.4649389982223511,
519
+ "learning_rate": 3.278943278943279e-06,
520
+ "loss": 0.12732148170471191,
521
+ "step": 1220
522
+ },
523
+ {
524
+ "epoch": 4.335664335664336,
525
+ "grad_norm": 0.44835126399993896,
526
+ "learning_rate": 2.9681429681429686e-06,
527
+ "loss": 0.12263227701187134,
528
+ "step": 1240
529
+ },
530
+ {
531
+ "epoch": 4.405594405594406,
532
+ "grad_norm": 0.48421064019203186,
533
+ "learning_rate": 2.6573426573426574e-06,
534
+ "loss": 0.12957746982574464,
535
+ "step": 1260
536
+ },
537
+ {
538
+ "epoch": 4.475524475524476,
539
+ "grad_norm": 0.5995394587516785,
540
+ "learning_rate": 2.3465423465423467e-06,
541
+ "loss": 0.12977392673492433,
542
+ "step": 1280
543
+ },
544
+ {
545
+ "epoch": 4.545454545454545,
546
+ "grad_norm": 0.49631068110466003,
547
+ "learning_rate": 2.035742035742036e-06,
548
+ "loss": 0.12345067262649537,
549
+ "step": 1300
550
+ },
551
+ {
552
+ "epoch": 4.615384615384615,
553
+ "grad_norm": 0.5985969305038452,
554
+ "learning_rate": 1.724941724941725e-06,
555
+ "loss": 0.12002362012863159,
556
+ "step": 1320
557
+ },
558
+ {
559
+ "epoch": 4.685314685314685,
560
+ "grad_norm": 0.5093550086021423,
561
+ "learning_rate": 1.4141414141414143e-06,
562
+ "loss": 0.12667241096496581,
563
+ "step": 1340
564
+ },
565
+ {
566
+ "epoch": 4.755244755244755,
567
+ "grad_norm": 0.5784407258033752,
568
+ "learning_rate": 1.1033411033411034e-06,
569
+ "loss": 0.12439815998077393,
570
+ "step": 1360
571
+ },
572
+ {
573
+ "epoch": 4.825174825174825,
574
+ "grad_norm": 0.4653092920780182,
575
+ "learning_rate": 7.925407925407925e-07,
576
+ "loss": 0.11679568290710449,
577
+ "step": 1380
578
+ },
579
+ {
580
+ "epoch": 4.895104895104895,
581
+ "grad_norm": 0.7826379537582397,
582
+ "learning_rate": 4.817404817404818e-07,
583
+ "loss": 0.12205266952514648,
584
+ "step": 1400
585
+ },
586
+ {
587
+ "epoch": 4.965034965034965,
588
+ "grad_norm": 0.5184548497200012,
589
+ "learning_rate": 1.7094017094017097e-07,
590
+ "loss": 0.12192339897155761,
591
+ "step": 1420
592
+ },
593
+ {
594
+ "epoch": 5.0,
595
+ "eval_COMMODITY_f1": 0.5255,
596
+ "eval_COMPANY_f1": 0.7943,
597
+ "eval_COUNTRY_f1": 0.849,
598
+ "eval_EVENT_f1": 0.3433,
599
+ "eval_INFRASTRUCTURE_f1": 0.3473,
600
+ "eval_LOCATION_f1": 0.6793,
601
+ "eval_MARKET_f1": 0.4206,
602
+ "eval_ORGANIZATION_f1": 0.6913,
603
+ "eval_PERSON_f1": 0.7696,
604
+ "eval_accuracy": 0.950874224872589,
605
+ "eval_f1": 0.6789528523052879,
606
+ "eval_loss": 0.14190641045570374,
607
+ "eval_precision": 0.647015017996773,
608
+ "eval_recall": 0.7142074256747499,
609
+ "eval_runtime": 4.3789,
610
+ "eval_samples_per_second": 261.252,
611
+ "eval_steps_per_second": 4.111,
612
+ "step": 1430
613
+ }
614
+ ],
615
+ "logging_steps": 20,
616
+ "max_steps": 1430,
617
+ "num_input_tokens_seen": 0,
618
+ "num_train_epochs": 5,
619
+ "save_steps": 500,
620
+ "stateful_callbacks": {
621
+ "TrainerControl": {
622
+ "args": {
623
+ "should_epoch_stop": false,
624
+ "should_evaluate": false,
625
+ "should_log": false,
626
+ "should_save": true,
627
+ "should_training_stop": true
628
+ },
629
+ "attributes": {}
630
+ }
631
+ },
632
+ "total_flos": 2990261795389440.0,
633
+ "train_batch_size": 32,
634
+ "trial_name": null,
635
+ "trial_params": null
636
+ }
checkpoint-1430/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8204d5b5b5d20a8a826cfd288410ff08f7b4fb17860aaa84560ac542fcc5ed98
3
+ size 5265
config.json ADDED
@@ -0,0 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "activation": "gelu",
3
+ "architectures": [
4
+ "DistilBertForTokenClassification"
5
+ ],
6
+ "attention_dropout": 0.1,
7
+ "bos_token_id": null,
8
+ "dim": 768,
9
+ "dropout": 0.1,
10
+ "dtype": "float32",
11
+ "eos_token_id": null,
12
+ "hidden_dim": 3072,
13
+ "id2label": {
14
+ "0": "O",
15
+ "1": "B-PERSON",
16
+ "2": "I-PERSON",
17
+ "3": "B-ORGANIZATION",
18
+ "4": "I-ORGANIZATION",
19
+ "5": "B-LOCATION",
20
+ "6": "I-LOCATION",
21
+ "7": "B-COMMODITY",
22
+ "8": "I-COMMODITY",
23
+ "9": "B-EVENT",
24
+ "10": "I-EVENT",
25
+ "11": "B-INFRASTRUCTURE",
26
+ "12": "I-INFRASTRUCTURE",
27
+ "13": "B-MARKET",
28
+ "14": "I-MARKET",
29
+ "15": "B-COUNTRY",
30
+ "16": "I-COUNTRY",
31
+ "17": "B-COMPANY",
32
+ "18": "I-COMPANY"
33
+ },
34
+ "initializer_range": 0.02,
35
+ "label2id": {
36
+ "B-COMMODITY": 7,
37
+ "B-COMPANY": 17,
38
+ "B-COUNTRY": 15,
39
+ "B-EVENT": 9,
40
+ "B-INFRASTRUCTURE": 11,
41
+ "B-LOCATION": 5,
42
+ "B-MARKET": 13,
43
+ "B-ORGANIZATION": 3,
44
+ "B-PERSON": 1,
45
+ "I-COMMODITY": 8,
46
+ "I-COMPANY": 18,
47
+ "I-COUNTRY": 16,
48
+ "I-EVENT": 10,
49
+ "I-INFRASTRUCTURE": 12,
50
+ "I-LOCATION": 6,
51
+ "I-MARKET": 14,
52
+ "I-ORGANIZATION": 4,
53
+ "I-PERSON": 2,
54
+ "O": 0
55
+ },
56
+ "max_position_embeddings": 512,
57
+ "model_type": "distilbert",
58
+ "n_heads": 12,
59
+ "n_layers": 6,
60
+ "pad_token_id": 0,
61
+ "qa_dropout": 0.1,
62
+ "seq_classif_dropout": 0.2,
63
+ "sinusoidal_pos_embds": false,
64
+ "tie_weights_": true,
65
+ "tie_word_embeddings": true,
66
+ "transformers_version": "5.4.0",
67
+ "use_cache": false,
68
+ "vocab_size": 30522
69
+ }
label_map.json ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "label2id": {
3
+ "O": 0,
4
+ "B-PERSON": 1,
5
+ "I-PERSON": 2,
6
+ "B-ORGANIZATION": 3,
7
+ "I-ORGANIZATION": 4,
8
+ "B-LOCATION": 5,
9
+ "I-LOCATION": 6,
10
+ "B-COMMODITY": 7,
11
+ "I-COMMODITY": 8,
12
+ "B-EVENT": 9,
13
+ "I-EVENT": 10,
14
+ "B-INFRASTRUCTURE": 11,
15
+ "I-INFRASTRUCTURE": 12,
16
+ "B-MARKET": 13,
17
+ "I-MARKET": 14,
18
+ "B-COUNTRY": 15,
19
+ "I-COUNTRY": 16,
20
+ "B-COMPANY": 17,
21
+ "I-COMPANY": 18
22
+ },
23
+ "id2label": {
24
+ "0": "O",
25
+ "1": "B-PERSON",
26
+ "2": "I-PERSON",
27
+ "3": "B-ORGANIZATION",
28
+ "4": "I-ORGANIZATION",
29
+ "5": "B-LOCATION",
30
+ "6": "I-LOCATION",
31
+ "7": "B-COMMODITY",
32
+ "8": "I-COMMODITY",
33
+ "9": "B-EVENT",
34
+ "10": "I-EVENT",
35
+ "11": "B-INFRASTRUCTURE",
36
+ "12": "I-INFRASTRUCTURE",
37
+ "13": "B-MARKET",
38
+ "14": "I-MARKET",
39
+ "15": "B-COUNTRY",
40
+ "16": "I-COUNTRY",
41
+ "17": "B-COMPANY",
42
+ "18": "I-COMPANY"
43
+ }
44
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f4ec2f9a7b2be096b72416ac4621e438efe83b6fe557ab5f2cf2ce0cf02dc026
3
+ size 265522308
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "backend": "tokenizers",
3
+ "cls_token": "[CLS]",
4
+ "do_lower_case": true,
5
+ "is_local": false,
6
+ "mask_token": "[MASK]",
7
+ "max_length": 128,
8
+ "model_max_length": 512,
9
+ "pad_token": "[PAD]",
10
+ "sep_token": "[SEP]",
11
+ "stride": 0,
12
+ "strip_accents": null,
13
+ "tokenize_chinese_chars": true,
14
+ "tokenizer_class": "BertTokenizer",
15
+ "truncation_side": "right",
16
+ "truncation_strategy": "longest_first",
17
+ "unk_token": "[UNK]"
18
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8204d5b5b5d20a8a826cfd288410ff08f7b4fb17860aaa84560ac542fcc5ed98
3
+ size 5265