MoyYuan commited on
Commit
4583025
·
verified ·
1 Parent(s): 03cc7b1

Upload folder using huggingface_hub

Browse files
Files changed (6) hide show
  1. config.json +30 -0
  2. optimizer.pt +3 -0
  3. pytorch_model.bin +3 -0
  4. scheduler.pt +3 -0
  5. trainer_state.json +510 -0
  6. training_args.bin +3 -0
config.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "bert-base-uncased",
3
+ "architectures": [
4
+ "BertForSequenceClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "gradient_checkpointing": false,
8
+ "hidden_act": "gelu",
9
+ "hidden_dropout_prob": 0.1,
10
+ "hidden_size": 768,
11
+ "id2label": {
12
+ "0": "LABEL_0"
13
+ },
14
+ "initializer_range": 0.02,
15
+ "intermediate_size": 3072,
16
+ "label2id": {
17
+ "LABEL_0": 0
18
+ },
19
+ "layer_norm_eps": 1e-12,
20
+ "max_position_embeddings": 512,
21
+ "model_type": "bert",
22
+ "num_attention_heads": 12,
23
+ "num_hidden_layers": 12,
24
+ "pad_token_id": 0,
25
+ "position_embedding_type": "absolute",
26
+ "transformers_version": "4.6.0.dev0",
27
+ "type_vocab_size": 2,
28
+ "use_cache": true,
29
+ "vocab_size": 30522
30
+ }
optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f33afe3b1d6cda469e758a7a94a438e0c364345deeb750320779499a50ec93f7
3
+ size 875990225
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:af05cbe48f08400e7d12faabe4d1a6205d33142c1b598ceee71996fc8acbb27e
3
+ size 438021385
scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:665ccd583a8492bb1b433db6ea2c5a85fb0ab94a21940a2445d098659b89e706
3
+ size 623
trainer_state.json ADDED
@@ -0,0 +1,510 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.06226002424955368,
3
+ "best_model_checkpoint": "training_output/reranker_focal_answer/checkpoint-26182",
4
+ "epoch": 26.0,
5
+ "global_step": 26182,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 0.5,
12
+ "learning_rate": 4.950347567030785e-05,
13
+ "loss": 0.0273,
14
+ "step": 500
15
+ },
16
+ {
17
+ "epoch": 0.99,
18
+ "learning_rate": 4.900695134061569e-05,
19
+ "loss": 0.013,
20
+ "step": 1000
21
+ },
22
+ {
23
+ "epoch": 1.0,
24
+ "eval_loss": 0.06812480837106705,
25
+ "eval_runtime": 56.2831,
26
+ "eval_samples_per_second": 556.508,
27
+ "step": 1007
28
+ },
29
+ {
30
+ "epoch": 1.49,
31
+ "learning_rate": 4.8510427010923536e-05,
32
+ "loss": 0.0109,
33
+ "step": 1500
34
+ },
35
+ {
36
+ "epoch": 1.99,
37
+ "learning_rate": 4.801390268123138e-05,
38
+ "loss": 0.0106,
39
+ "step": 2000
40
+ },
41
+ {
42
+ "epoch": 2.0,
43
+ "eval_loss": 0.07386605441570282,
44
+ "eval_runtime": 56.2854,
45
+ "eval_samples_per_second": 556.486,
46
+ "step": 2014
47
+ },
48
+ {
49
+ "epoch": 2.48,
50
+ "learning_rate": 4.7517378351539225e-05,
51
+ "loss": 0.0095,
52
+ "step": 2500
53
+ },
54
+ {
55
+ "epoch": 2.98,
56
+ "learning_rate": 4.702085402184707e-05,
57
+ "loss": 0.0095,
58
+ "step": 3000
59
+ },
60
+ {
61
+ "epoch": 3.0,
62
+ "eval_loss": 0.06689883023500443,
63
+ "eval_runtime": 56.2997,
64
+ "eval_samples_per_second": 556.344,
65
+ "step": 3021
66
+ },
67
+ {
68
+ "epoch": 3.48,
69
+ "learning_rate": 4.6524329692154915e-05,
70
+ "loss": 0.0083,
71
+ "step": 3500
72
+ },
73
+ {
74
+ "epoch": 3.97,
75
+ "learning_rate": 4.602780536246276e-05,
76
+ "loss": 0.0085,
77
+ "step": 4000
78
+ },
79
+ {
80
+ "epoch": 4.0,
81
+ "eval_loss": 0.06948165595531464,
82
+ "eval_runtime": 56.3068,
83
+ "eval_samples_per_second": 556.274,
84
+ "step": 4028
85
+ },
86
+ {
87
+ "epoch": 4.47,
88
+ "learning_rate": 4.5531281032770604e-05,
89
+ "loss": 0.0072,
90
+ "step": 4500
91
+ },
92
+ {
93
+ "epoch": 4.97,
94
+ "learning_rate": 4.503475670307845e-05,
95
+ "loss": 0.0073,
96
+ "step": 5000
97
+ },
98
+ {
99
+ "epoch": 5.0,
100
+ "eval_loss": 0.06620071828365326,
101
+ "eval_runtime": 56.3226,
102
+ "eval_samples_per_second": 556.118,
103
+ "step": 5035
104
+ },
105
+ {
106
+ "epoch": 5.46,
107
+ "learning_rate": 4.453823237338629e-05,
108
+ "loss": 0.0062,
109
+ "step": 5500
110
+ },
111
+ {
112
+ "epoch": 5.96,
113
+ "learning_rate": 4.404170804369414e-05,
114
+ "loss": 0.0064,
115
+ "step": 6000
116
+ },
117
+ {
118
+ "epoch": 6.0,
119
+ "eval_loss": 0.06671101599931717,
120
+ "eval_runtime": 56.334,
121
+ "eval_samples_per_second": 556.005,
122
+ "step": 6042
123
+ },
124
+ {
125
+ "epoch": 6.45,
126
+ "learning_rate": 4.354518371400199e-05,
127
+ "loss": 0.0054,
128
+ "step": 6500
129
+ },
130
+ {
131
+ "epoch": 6.95,
132
+ "learning_rate": 4.3048659384309834e-05,
133
+ "loss": 0.0056,
134
+ "step": 7000
135
+ },
136
+ {
137
+ "epoch": 7.0,
138
+ "eval_loss": 0.06806950271129608,
139
+ "eval_runtime": 56.3386,
140
+ "eval_samples_per_second": 555.96,
141
+ "step": 7049
142
+ },
143
+ {
144
+ "epoch": 7.45,
145
+ "learning_rate": 4.255213505461768e-05,
146
+ "loss": 0.0049,
147
+ "step": 7500
148
+ },
149
+ {
150
+ "epoch": 7.94,
151
+ "learning_rate": 4.205561072492552e-05,
152
+ "loss": 0.0049,
153
+ "step": 8000
154
+ },
155
+ {
156
+ "epoch": 8.0,
157
+ "eval_loss": 0.06494650989770889,
158
+ "eval_runtime": 56.3526,
159
+ "eval_samples_per_second": 555.821,
160
+ "step": 8056
161
+ },
162
+ {
163
+ "epoch": 8.44,
164
+ "learning_rate": 4.155908639523337e-05,
165
+ "loss": 0.0043,
166
+ "step": 8500
167
+ },
168
+ {
169
+ "epoch": 8.94,
170
+ "learning_rate": 4.106256206554122e-05,
171
+ "loss": 0.0044,
172
+ "step": 9000
173
+ },
174
+ {
175
+ "epoch": 9.0,
176
+ "eval_loss": 0.06944319605827332,
177
+ "eval_runtime": 56.3534,
178
+ "eval_samples_per_second": 555.814,
179
+ "step": 9063
180
+ },
181
+ {
182
+ "epoch": 9.43,
183
+ "learning_rate": 4.0566037735849064e-05,
184
+ "loss": 0.0038,
185
+ "step": 9500
186
+ },
187
+ {
188
+ "epoch": 9.93,
189
+ "learning_rate": 4.006951340615691e-05,
190
+ "loss": 0.0039,
191
+ "step": 10000
192
+ },
193
+ {
194
+ "epoch": 10.0,
195
+ "eval_loss": 0.06448082625865936,
196
+ "eval_runtime": 56.3612,
197
+ "eval_samples_per_second": 555.737,
198
+ "step": 10070
199
+ },
200
+ {
201
+ "epoch": 10.43,
202
+ "learning_rate": 3.957298907646475e-05,
203
+ "loss": 0.0035,
204
+ "step": 10500
205
+ },
206
+ {
207
+ "epoch": 10.92,
208
+ "learning_rate": 3.90764647467726e-05,
209
+ "loss": 0.0035,
210
+ "step": 11000
211
+ },
212
+ {
213
+ "epoch": 11.0,
214
+ "eval_loss": 0.06930028647184372,
215
+ "eval_runtime": 56.3501,
216
+ "eval_samples_per_second": 555.846,
217
+ "step": 11077
218
+ },
219
+ {
220
+ "epoch": 11.42,
221
+ "learning_rate": 3.857994041708044e-05,
222
+ "loss": 0.0031,
223
+ "step": 11500
224
+ },
225
+ {
226
+ "epoch": 11.92,
227
+ "learning_rate": 3.8083416087388287e-05,
228
+ "loss": 0.0032,
229
+ "step": 12000
230
+ },
231
+ {
232
+ "epoch": 12.0,
233
+ "eval_loss": 0.06855376809835434,
234
+ "eval_runtime": 56.3685,
235
+ "eval_samples_per_second": 555.665,
236
+ "step": 12084
237
+ },
238
+ {
239
+ "epoch": 12.41,
240
+ "learning_rate": 3.758689175769613e-05,
241
+ "loss": 0.0028,
242
+ "step": 12500
243
+ },
244
+ {
245
+ "epoch": 12.91,
246
+ "learning_rate": 3.7090367428003976e-05,
247
+ "loss": 0.003,
248
+ "step": 13000
249
+ },
250
+ {
251
+ "epoch": 13.0,
252
+ "eval_loss": 0.06912727653980255,
253
+ "eval_runtime": 56.3545,
254
+ "eval_samples_per_second": 555.803,
255
+ "step": 13091
256
+ },
257
+ {
258
+ "epoch": 13.41,
259
+ "learning_rate": 3.659384309831182e-05,
260
+ "loss": 0.0027,
261
+ "step": 13500
262
+ },
263
+ {
264
+ "epoch": 13.9,
265
+ "learning_rate": 3.6097318768619665e-05,
266
+ "loss": 0.0027,
267
+ "step": 14000
268
+ },
269
+ {
270
+ "epoch": 14.0,
271
+ "eval_loss": 0.06820587813854218,
272
+ "eval_runtime": 56.3582,
273
+ "eval_samples_per_second": 555.767,
274
+ "step": 14098
275
+ },
276
+ {
277
+ "epoch": 14.4,
278
+ "learning_rate": 3.560079443892751e-05,
279
+ "loss": 0.0025,
280
+ "step": 14500
281
+ },
282
+ {
283
+ "epoch": 14.9,
284
+ "learning_rate": 3.5104270109235354e-05,
285
+ "loss": 0.0025,
286
+ "step": 15000
287
+ },
288
+ {
289
+ "epoch": 15.0,
290
+ "eval_loss": 0.06939652562141418,
291
+ "eval_runtime": 56.3589,
292
+ "eval_samples_per_second": 555.76,
293
+ "step": 15105
294
+ },
295
+ {
296
+ "epoch": 15.39,
297
+ "learning_rate": 3.46077457795432e-05,
298
+ "loss": 0.0023,
299
+ "step": 15500
300
+ },
301
+ {
302
+ "epoch": 15.89,
303
+ "learning_rate": 3.4111221449851043e-05,
304
+ "loss": 0.0023,
305
+ "step": 16000
306
+ },
307
+ {
308
+ "epoch": 16.0,
309
+ "eval_loss": 0.06309447437524796,
310
+ "eval_runtime": 56.3564,
311
+ "eval_samples_per_second": 555.784,
312
+ "step": 16112
313
+ },
314
+ {
315
+ "epoch": 16.39,
316
+ "learning_rate": 3.361469712015889e-05,
317
+ "loss": 0.0022,
318
+ "step": 16500
319
+ },
320
+ {
321
+ "epoch": 16.88,
322
+ "learning_rate": 3.311817279046673e-05,
323
+ "loss": 0.0023,
324
+ "step": 17000
325
+ },
326
+ {
327
+ "epoch": 17.0,
328
+ "eval_loss": 0.06566380709409714,
329
+ "eval_runtime": 56.4037,
330
+ "eval_samples_per_second": 555.318,
331
+ "step": 17119
332
+ },
333
+ {
334
+ "epoch": 17.38,
335
+ "learning_rate": 3.262164846077458e-05,
336
+ "loss": 0.0021,
337
+ "step": 17500
338
+ },
339
+ {
340
+ "epoch": 17.87,
341
+ "learning_rate": 3.212512413108242e-05,
342
+ "loss": 0.002,
343
+ "step": 18000
344
+ },
345
+ {
346
+ "epoch": 18.0,
347
+ "eval_loss": 0.06591986119747162,
348
+ "eval_runtime": 56.3845,
349
+ "eval_samples_per_second": 555.507,
350
+ "step": 18126
351
+ },
352
+ {
353
+ "epoch": 18.37,
354
+ "learning_rate": 3.1628599801390267e-05,
355
+ "loss": 0.002,
356
+ "step": 18500
357
+ },
358
+ {
359
+ "epoch": 18.87,
360
+ "learning_rate": 3.113207547169811e-05,
361
+ "loss": 0.002,
362
+ "step": 19000
363
+ },
364
+ {
365
+ "epoch": 19.0,
366
+ "eval_loss": 0.0679154321551323,
367
+ "eval_runtime": 56.3981,
368
+ "eval_samples_per_second": 555.373,
369
+ "step": 19133
370
+ },
371
+ {
372
+ "epoch": 19.36,
373
+ "learning_rate": 3.0635551142005956e-05,
374
+ "loss": 0.0018,
375
+ "step": 19500
376
+ },
377
+ {
378
+ "epoch": 19.86,
379
+ "learning_rate": 3.0139026812313804e-05,
380
+ "loss": 0.0019,
381
+ "step": 20000
382
+ },
383
+ {
384
+ "epoch": 20.0,
385
+ "eval_loss": 0.06612461805343628,
386
+ "eval_runtime": 56.3877,
387
+ "eval_samples_per_second": 555.476,
388
+ "step": 20140
389
+ },
390
+ {
391
+ "epoch": 20.36,
392
+ "learning_rate": 2.964250248262165e-05,
393
+ "loss": 0.0019,
394
+ "step": 20500
395
+ },
396
+ {
397
+ "epoch": 20.85,
398
+ "learning_rate": 2.9145978152929493e-05,
399
+ "loss": 0.0017,
400
+ "step": 21000
401
+ },
402
+ {
403
+ "epoch": 21.0,
404
+ "eval_loss": 0.06515755504369736,
405
+ "eval_runtime": 56.3934,
406
+ "eval_samples_per_second": 555.42,
407
+ "step": 21147
408
+ },
409
+ {
410
+ "epoch": 21.35,
411
+ "learning_rate": 2.8649453823237338e-05,
412
+ "loss": 0.0017,
413
+ "step": 21500
414
+ },
415
+ {
416
+ "epoch": 21.85,
417
+ "learning_rate": 2.8152929493545182e-05,
418
+ "loss": 0.0018,
419
+ "step": 22000
420
+ },
421
+ {
422
+ "epoch": 22.0,
423
+ "eval_loss": 0.062369268387556076,
424
+ "eval_runtime": 56.3907,
425
+ "eval_samples_per_second": 555.446,
426
+ "step": 22154
427
+ },
428
+ {
429
+ "epoch": 22.34,
430
+ "learning_rate": 2.7656405163853027e-05,
431
+ "loss": 0.0017,
432
+ "step": 22500
433
+ },
434
+ {
435
+ "epoch": 22.84,
436
+ "learning_rate": 2.7159880834160878e-05,
437
+ "loss": 0.0017,
438
+ "step": 23000
439
+ },
440
+ {
441
+ "epoch": 23.0,
442
+ "eval_loss": 0.064911849796772,
443
+ "eval_runtime": 56.3943,
444
+ "eval_samples_per_second": 555.411,
445
+ "step": 23161
446
+ },
447
+ {
448
+ "epoch": 23.34,
449
+ "learning_rate": 2.6663356504468723e-05,
450
+ "loss": 0.0017,
451
+ "step": 23500
452
+ },
453
+ {
454
+ "epoch": 23.83,
455
+ "learning_rate": 2.6166832174776567e-05,
456
+ "loss": 0.0016,
457
+ "step": 24000
458
+ },
459
+ {
460
+ "epoch": 24.0,
461
+ "eval_loss": 0.06586817651987076,
462
+ "eval_runtime": 56.3912,
463
+ "eval_samples_per_second": 555.441,
464
+ "step": 24168
465
+ },
466
+ {
467
+ "epoch": 24.33,
468
+ "learning_rate": 2.5670307845084412e-05,
469
+ "loss": 0.0016,
470
+ "step": 24500
471
+ },
472
+ {
473
+ "epoch": 24.83,
474
+ "learning_rate": 2.5173783515392257e-05,
475
+ "loss": 0.0015,
476
+ "step": 25000
477
+ },
478
+ {
479
+ "epoch": 25.0,
480
+ "eval_loss": 0.06666699051856995,
481
+ "eval_runtime": 56.3936,
482
+ "eval_samples_per_second": 555.418,
483
+ "step": 25175
484
+ },
485
+ {
486
+ "epoch": 25.32,
487
+ "learning_rate": 2.46772591857001e-05,
488
+ "loss": 0.0015,
489
+ "step": 25500
490
+ },
491
+ {
492
+ "epoch": 25.82,
493
+ "learning_rate": 2.4180734856007946e-05,
494
+ "loss": 0.0016,
495
+ "step": 26000
496
+ },
497
+ {
498
+ "epoch": 26.0,
499
+ "eval_loss": 0.06226002424955368,
500
+ "eval_runtime": 56.4017,
501
+ "eval_samples_per_second": 555.338,
502
+ "step": 26182
503
+ }
504
+ ],
505
+ "max_steps": 50350,
506
+ "num_train_epochs": 50,
507
+ "total_flos": 1.4079942379067136e+17,
508
+ "trial_name": null,
509
+ "trial_params": null
510
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6f5f4eaa6716b95dd342e62939c6f1cb03103ed92637fb120d53f34759682a1b
3
+ size 2351