File size: 14,063 Bytes
0169065
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
{
  "best_metric": 0.017690911889076233,
  "best_model_checkpoint": "grey-multilabel-classification-3/checkpoint-10720",
  "epoch": 18.0,
  "eval_steps": 500,
  "global_step": 19296,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.4664179104477612,
      "grad_norm": 0.3367554545402527,
      "learning_rate": 1.953358208955224e-05,
      "loss": 0.0801,
      "step": 500
    },
    {
      "epoch": 0.9328358208955224,
      "grad_norm": 0.2535526752471924,
      "learning_rate": 1.9067164179104477e-05,
      "loss": 0.0452,
      "step": 1000
    },
    {
      "epoch": 1.0,
      "eval_accuracy": 0.9954128111840032,
      "eval_f1": 0.0,
      "eval_loss": 0.021581802517175674,
      "eval_precision": 0.0,
      "eval_recall": 0.0,
      "eval_runtime": 13.6235,
      "eval_samples_per_second": 39.344,
      "eval_steps_per_second": 39.344,
      "step": 1072
    },
    {
      "epoch": 1.3992537313432836,
      "grad_norm": 0.39555880427360535,
      "learning_rate": 1.860074626865672e-05,
      "loss": 0.0429,
      "step": 1500
    },
    {
      "epoch": 1.8656716417910446,
      "grad_norm": 0.2884560227394104,
      "learning_rate": 1.8134328358208956e-05,
      "loss": 0.0433,
      "step": 2000
    },
    {
      "epoch": 2.0,
      "eval_accuracy": 0.9955238204371616,
      "eval_f1": 0.0713224368499257,
      "eval_loss": 0.020623503252863884,
      "eval_precision": 0.7384615384615385,
      "eval_recall": 0.03747072599531616,
      "eval_runtime": 13.1268,
      "eval_samples_per_second": 40.832,
      "eval_steps_per_second": 40.832,
      "step": 2144
    },
    {
      "epoch": 2.332089552238806,
      "grad_norm": 0.31138914823532104,
      "learning_rate": 1.7667910447761197e-05,
      "loss": 0.0414,
      "step": 2500
    },
    {
      "epoch": 2.798507462686567,
      "grad_norm": 0.28297460079193115,
      "learning_rate": 1.7201492537313432e-05,
      "loss": 0.0401,
      "step": 3000
    },
    {
      "epoch": 3.0,
      "eval_accuracy": 0.9957243532815767,
      "eval_f1": 0.19973190348525469,
      "eval_loss": 0.019770797342061996,
      "eval_precision": 0.7061611374407583,
      "eval_recall": 0.11631537861046058,
      "eval_runtime": 13.0974,
      "eval_samples_per_second": 40.924,
      "eval_steps_per_second": 40.924,
      "step": 3216
    },
    {
      "epoch": 3.264925373134328,
      "grad_norm": 0.17224232852458954,
      "learning_rate": 1.6735074626865673e-05,
      "loss": 0.0388,
      "step": 3500
    },
    {
      "epoch": 3.7313432835820897,
      "grad_norm": 0.23225364089012146,
      "learning_rate": 1.626865671641791e-05,
      "loss": 0.0381,
      "step": 4000
    },
    {
      "epoch": 4.0,
      "eval_accuracy": 0.9958532672529865,
      "eval_f1": 0.28606658446362515,
      "eval_loss": 0.01913285069167614,
      "eval_precision": 0.6803519061583577,
      "eval_recall": 0.18110850897736144,
      "eval_runtime": 13.086,
      "eval_samples_per_second": 40.96,
      "eval_steps_per_second": 40.96,
      "step": 4288
    },
    {
      "epoch": 4.197761194029851,
      "grad_norm": 0.2774255573749542,
      "learning_rate": 1.5802238805970152e-05,
      "loss": 0.0362,
      "step": 4500
    },
    {
      "epoch": 4.664179104477612,
      "grad_norm": 0.34336161613464355,
      "learning_rate": 1.533582089552239e-05,
      "loss": 0.0348,
      "step": 5000
    },
    {
      "epoch": 5.0,
      "eval_accuracy": 0.9959105623513909,
      "eval_f1": 0.29244114002478316,
      "eval_loss": 0.01866454817354679,
      "eval_precision": 0.7087087087087087,
      "eval_recall": 0.18423106947697113,
      "eval_runtime": 13.0537,
      "eval_samples_per_second": 41.061,
      "eval_steps_per_second": 41.061,
      "step": 5360
    },
    {
      "epoch": 5.130597014925373,
      "grad_norm": 0.2382318079471588,
      "learning_rate": 1.4869402985074627e-05,
      "loss": 0.0349,
      "step": 5500
    },
    {
      "epoch": 5.597014925373134,
      "grad_norm": 0.20123334228992462,
      "learning_rate": 1.4402985074626867e-05,
      "loss": 0.0334,
      "step": 6000
    },
    {
      "epoch": 6.0,
      "eval_accuracy": 0.9959177242386914,
      "eval_f1": 0.30317848410757947,
      "eval_loss": 0.018318546935915947,
      "eval_precision": 0.6985915492957746,
      "eval_recall": 0.19359875097580015,
      "eval_runtime": 13.2299,
      "eval_samples_per_second": 40.514,
      "eval_steps_per_second": 40.514,
      "step": 6432
    },
    {
      "epoch": 6.063432835820896,
      "grad_norm": 0.2717812657356262,
      "learning_rate": 1.3936567164179106e-05,
      "loss": 0.0334,
      "step": 6500
    },
    {
      "epoch": 6.529850746268656,
      "grad_norm": 0.2352936565876007,
      "learning_rate": 1.3470149253731344e-05,
      "loss": 0.0314,
      "step": 7000
    },
    {
      "epoch": 6.996268656716418,
      "grad_norm": 0.3104737401008606,
      "learning_rate": 1.3003731343283584e-05,
      "loss": 0.0314,
      "step": 7500
    },
    {
      "epoch": 7.0,
      "eval_accuracy": 0.9959606955624947,
      "eval_f1": 0.3079754601226994,
      "eval_loss": 0.018033917993307114,
      "eval_precision": 0.7191977077363897,
      "eval_recall": 0.1959406713505074,
      "eval_runtime": 13.8378,
      "eval_samples_per_second": 38.734,
      "eval_steps_per_second": 38.734,
      "step": 7504
    },
    {
      "epoch": 7.462686567164179,
      "grad_norm": 0.23265156149864197,
      "learning_rate": 1.2537313432835823e-05,
      "loss": 0.0295,
      "step": 8000
    },
    {
      "epoch": 7.92910447761194,
      "grad_norm": 0.2608203887939453,
      "learning_rate": 1.207089552238806e-05,
      "loss": 0.0292,
      "step": 8500
    },
    {
      "epoch": 8.0,
      "eval_accuracy": 0.9960358953791503,
      "eval_f1": 0.351493848857645,
      "eval_loss": 0.0177922360599041,
      "eval_precision": 0.704225352112676,
      "eval_recall": 0.234192037470726,
      "eval_runtime": 12.9508,
      "eval_samples_per_second": 41.387,
      "eval_steps_per_second": 41.387,
      "step": 8576
    },
    {
      "epoch": 8.395522388059701,
      "grad_norm": 0.28066638112068176,
      "learning_rate": 1.1604477611940299e-05,
      "loss": 0.0285,
      "step": 9000
    },
    {
      "epoch": 8.861940298507463,
      "grad_norm": 0.3340807259082794,
      "learning_rate": 1.1138059701492538e-05,
      "loss": 0.0284,
      "step": 9500
    },
    {
      "epoch": 9.0,
      "eval_accuracy": 0.9960573810410519,
      "eval_f1": 0.3697767601602748,
      "eval_loss": 0.017844857648015022,
      "eval_precision": 0.6931330472103004,
      "eval_recall": 0.25214676034348166,
      "eval_runtime": 13.5821,
      "eval_samples_per_second": 39.464,
      "eval_steps_per_second": 39.464,
      "step": 9648
    },
    {
      "epoch": 9.328358208955224,
      "grad_norm": 0.1388923078775406,
      "learning_rate": 1.0671641791044778e-05,
      "loss": 0.0275,
      "step": 10000
    },
    {
      "epoch": 9.794776119402986,
      "grad_norm": 0.200577512383461,
      "learning_rate": 1.0205223880597015e-05,
      "loss": 0.0265,
      "step": 10500
    },
    {
      "epoch": 10.0,
      "eval_accuracy": 0.9961039333085054,
      "eval_f1": 0.3652275379229872,
      "eval_loss": 0.017690911889076233,
      "eval_precision": 0.7228637413394919,
      "eval_recall": 0.24434035909445745,
      "eval_runtime": 13.8518,
      "eval_samples_per_second": 38.695,
      "eval_steps_per_second": 38.695,
      "step": 10720
    },
    {
      "epoch": 10.261194029850746,
      "grad_norm": 0.1815371811389923,
      "learning_rate": 9.738805970149255e-06,
      "loss": 0.0261,
      "step": 11000
    },
    {
      "epoch": 10.727611940298507,
      "grad_norm": 0.21693415939807892,
      "learning_rate": 9.272388059701494e-06,
      "loss": 0.0251,
      "step": 11500
    },
    {
      "epoch": 11.0,
      "eval_accuracy": 0.9959857621680466,
      "eval_f1": 0.37825845812534664,
      "eval_loss": 0.018003830686211586,
      "eval_precision": 0.6532567049808429,
      "eval_recall": 0.26619828259172523,
      "eval_runtime": 13.2669,
      "eval_samples_per_second": 40.401,
      "eval_steps_per_second": 40.401,
      "step": 11792
    },
    {
      "epoch": 11.194029850746269,
      "grad_norm": 0.2108106017112732,
      "learning_rate": 8.805970149253732e-06,
      "loss": 0.024,
      "step": 12000
    },
    {
      "epoch": 11.66044776119403,
      "grad_norm": 0.3812670409679413,
      "learning_rate": 8.339552238805972e-06,
      "loss": 0.0245,
      "step": 12500
    },
    {
      "epoch": 12.0,
      "eval_accuracy": 0.9960215716045492,
      "eval_f1": 0.3984840281537629,
      "eval_loss": 0.017995502799749374,
      "eval_precision": 0.6501766784452296,
      "eval_recall": 0.28727556596409054,
      "eval_runtime": 14.2572,
      "eval_samples_per_second": 37.595,
      "eval_steps_per_second": 37.595,
      "step": 12864
    },
    {
      "epoch": 12.126865671641792,
      "grad_norm": 0.1880696564912796,
      "learning_rate": 7.87313432835821e-06,
      "loss": 0.0235,
      "step": 13000
    },
    {
      "epoch": 12.593283582089553,
      "grad_norm": 0.1416264921426773,
      "learning_rate": 7.406716417910448e-06,
      "loss": 0.0227,
      "step": 13500
    },
    {
      "epoch": 13.0,
      "eval_accuracy": 0.996071704815653,
      "eval_f1": 0.3989041095890411,
      "eval_loss": 0.018045414239168167,
      "eval_precision": 0.6691176470588235,
      "eval_recall": 0.28415300546448086,
      "eval_runtime": 14.1382,
      "eval_samples_per_second": 37.912,
      "eval_steps_per_second": 37.912,
      "step": 13936
    },
    {
      "epoch": 13.059701492537313,
      "grad_norm": 0.10282892733812332,
      "learning_rate": 6.9402985074626876e-06,
      "loss": 0.0231,
      "step": 14000
    },
    {
      "epoch": 13.526119402985074,
      "grad_norm": 0.22511619329452515,
      "learning_rate": 6.473880597014925e-06,
      "loss": 0.0225,
      "step": 14500
    },
    {
      "epoch": 13.992537313432836,
      "grad_norm": 0.2805801331996918,
      "learning_rate": 6.007462686567165e-06,
      "loss": 0.0223,
      "step": 15000
    },
    {
      "epoch": 14.0,
      "eval_accuracy": 0.9960108287735984,
      "eval_f1": 0.40171858216971,
      "eval_loss": 0.018198266625404358,
      "eval_precision": 0.6437177280550774,
      "eval_recall": 0.29195940671350507,
      "eval_runtime": 13.2494,
      "eval_samples_per_second": 40.455,
      "eval_steps_per_second": 40.455,
      "step": 15008
    },
    {
      "epoch": 14.458955223880597,
      "grad_norm": 0.2002527117729187,
      "learning_rate": 5.5410447761194035e-06,
      "loss": 0.0211,
      "step": 15500
    },
    {
      "epoch": 14.925373134328359,
      "grad_norm": 0.050380606204271317,
      "learning_rate": 5.074626865671642e-06,
      "loss": 0.0214,
      "step": 16000
    },
    {
      "epoch": 15.0,
      "eval_accuracy": 0.9960860285902541,
      "eval_f1": 0.40630092341118956,
      "eval_loss": 0.018185345456004143,
      "eval_precision": 0.6678571428571428,
      "eval_recall": 0.29195940671350507,
      "eval_runtime": 13.1752,
      "eval_samples_per_second": 40.683,
      "eval_steps_per_second": 40.683,
      "step": 16080
    },
    {
      "epoch": 15.39179104477612,
      "grad_norm": 0.29372188448905945,
      "learning_rate": 4.6082089552238816e-06,
      "loss": 0.0205,
      "step": 16500
    },
    {
      "epoch": 15.85820895522388,
      "grad_norm": 0.145236998796463,
      "learning_rate": 4.141791044776119e-06,
      "loss": 0.0213,
      "step": 17000
    },
    {
      "epoch": 16.0,
      "eval_accuracy": 0.9960573810410519,
      "eval_f1": 0.40582838640043173,
      "eval_loss": 0.018394598737359047,
      "eval_precision": 0.6573426573426573,
      "eval_recall": 0.2935206869633099,
      "eval_runtime": 13.885,
      "eval_samples_per_second": 38.603,
      "eval_steps_per_second": 38.603,
      "step": 17152
    },
    {
      "epoch": 16.324626865671643,
      "grad_norm": 0.3503226637840271,
      "learning_rate": 3.6753731343283584e-06,
      "loss": 0.0205,
      "step": 17500
    },
    {
      "epoch": 16.791044776119403,
      "grad_norm": 0.22795088589191437,
      "learning_rate": 3.208955223880597e-06,
      "loss": 0.0198,
      "step": 18000
    },
    {
      "epoch": 17.0,
      "eval_accuracy": 0.9960108287735984,
      "eval_f1": 0.40807651434643993,
      "eval_loss": 0.018566885963082314,
      "eval_precision": 0.6389351081530782,
      "eval_recall": 0.2997658079625293,
      "eval_runtime": 13.6531,
      "eval_samples_per_second": 39.259,
      "eval_steps_per_second": 39.259,
      "step": 18224
    },
    {
      "epoch": 17.257462686567163,
      "grad_norm": 0.09603070467710495,
      "learning_rate": 2.742537313432836e-06,
      "loss": 0.0198,
      "step": 18500
    },
    {
      "epoch": 17.723880597014926,
      "grad_norm": 0.3136885166168213,
      "learning_rate": 2.2761194029850747e-06,
      "loss": 0.0197,
      "step": 19000
    },
    {
      "epoch": 18.0,
      "eval_accuracy": 0.9960538000974016,
      "eval_f1": 0.42,
      "eval_loss": 0.018600279465317726,
      "eval_precision": 0.6445880452342488,
      "eval_recall": 0.3114754098360656,
      "eval_runtime": 12.9973,
      "eval_samples_per_second": 41.239,
      "eval_steps_per_second": 41.239,
      "step": 19296
    }
  ],
  "logging_steps": 500,
  "max_steps": 21440,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 20,
  "save_steps": 500,
  "stateful_callbacks": {
    "TrainerControl": {
      "args": {
        "should_epoch_stop": false,
        "should_evaluate": false,
        "should_log": false,
        "should_save": true,
        "should_training_stop": false
      },
      "attributes": {}
    }
  },
  "total_flos": 2579840201687040.0,
  "train_batch_size": 1,
  "trial_name": null,
  "trial_params": null
}