ChrisLalk commited on
Commit
eea1e06
·
verified ·
1 Parent(s): 6c58f61

Delete trainer_state.json

Browse files
Files changed (1) hide show
  1. trainer_state.json +0 -850
trainer_state.json DELETED
@@ -1,850 +0,0 @@
1
- {
2
- "best_metric": 0.11314285546541214,
3
- "best_model_checkpoint": "/content/drive/MyDrive/Emotionen/multilingual-e5-large/multilingual_e5_large_2024-05-28/checkpoint-10126",
4
- "epoch": 10.0,
5
- "eval_steps": 1,
6
- "global_step": 50630,
7
- "is_hyper_param_search": false,
8
- "is_local_process_zero": true,
9
- "is_world_process_zero": true,
10
- "log_history": [
11
- {
12
- "epoch": 0.09875567845151097,
13
- "grad_norm": 0.31362369656562805,
14
- "learning_rate": 1.980248864309698e-05,
15
- "loss": 0.1658,
16
- "step": 500
17
- },
18
- {
19
- "epoch": 0.19751135690302193,
20
- "grad_norm": 0.3713989555835724,
21
- "learning_rate": 1.9604977286193957e-05,
22
- "loss": 0.1306,
23
- "step": 1000
24
- },
25
- {
26
- "epoch": 0.29626703535453286,
27
- "grad_norm": 0.33911290764808655,
28
- "learning_rate": 1.9407465929290937e-05,
29
- "loss": 0.1229,
30
- "step": 1500
31
- },
32
- {
33
- "epoch": 0.39502271380604387,
34
- "grad_norm": 0.41552165150642395,
35
- "learning_rate": 1.9209954572387913e-05,
36
- "loss": 0.1199,
37
- "step": 2000
38
- },
39
- {
40
- "epoch": 0.4937783922575548,
41
- "grad_norm": 0.5132314562797546,
42
- "learning_rate": 1.9012443215484893e-05,
43
- "loss": 0.1189,
44
- "step": 2500
45
- },
46
- {
47
- "epoch": 0.5925340707090657,
48
- "grad_norm": 0.460574209690094,
49
- "learning_rate": 1.8814931858581872e-05,
50
- "loss": 0.118,
51
- "step": 3000
52
- },
53
- {
54
- "epoch": 0.6912897491605767,
55
- "grad_norm": 0.4870443642139435,
56
- "learning_rate": 1.8617420501678848e-05,
57
- "loss": 0.117,
58
- "step": 3500
59
- },
60
- {
61
- "epoch": 0.7900454276120877,
62
- "grad_norm": 0.39810240268707275,
63
- "learning_rate": 1.8419909144775828e-05,
64
- "loss": 0.1164,
65
- "step": 4000
66
- },
67
- {
68
- "epoch": 0.8888011060635986,
69
- "grad_norm": 0.3674732744693756,
70
- "learning_rate": 1.8222397787872804e-05,
71
- "loss": 0.1153,
72
- "step": 4500
73
- },
74
- {
75
- "epoch": 0.9875567845151096,
76
- "grad_norm": 0.419662743806839,
77
- "learning_rate": 1.802488643096978e-05,
78
- "loss": 0.1158,
79
- "step": 5000
80
- },
81
- {
82
- "epoch": 1.0,
83
- "eval_accuracy": 0.18166666666666667,
84
- "eval_f1_micro": 0.30011781927237674,
85
- "eval_loss": 0.11470416933298111,
86
- "eval_roc_auc": 0.5961780161730722,
87
- "eval_runtime": 39.4359,
88
- "eval_samples_per_second": 456.437,
89
- "eval_steps_per_second": 14.276,
90
- "step": 5063
91
- },
92
- {
93
- "epoch": 1.0863124629666205,
94
- "grad_norm": 0.41265037655830383,
95
- "learning_rate": 1.782737507406676e-05,
96
- "loss": 0.1126,
97
- "step": 5500
98
- },
99
- {
100
- "epoch": 1.1850681414181317,
101
- "grad_norm": 0.38658422231674194,
102
- "learning_rate": 1.762986371716374e-05,
103
- "loss": 0.1106,
104
- "step": 6000
105
- },
106
- {
107
- "epoch": 1.2838238198696426,
108
- "grad_norm": 0.45577454566955566,
109
- "learning_rate": 1.7432352360260715e-05,
110
- "loss": 0.1114,
111
- "step": 6500
112
- },
113
- {
114
- "epoch": 1.3825794983211535,
115
- "grad_norm": 0.5785081386566162,
116
- "learning_rate": 1.7234841003357695e-05,
117
- "loss": 0.1117,
118
- "step": 7000
119
- },
120
- {
121
- "epoch": 1.4813351767726644,
122
- "grad_norm": 0.42792847752571106,
123
- "learning_rate": 1.703732964645467e-05,
124
- "loss": 0.1113,
125
- "step": 7500
126
- },
127
- {
128
- "epoch": 1.5800908552241753,
129
- "grad_norm": 0.5515544414520264,
130
- "learning_rate": 1.683981828955165e-05,
131
- "loss": 0.1107,
132
- "step": 8000
133
- },
134
- {
135
- "epoch": 1.6788465336756864,
136
- "grad_norm": 0.4049302637577057,
137
- "learning_rate": 1.664230693264863e-05,
138
- "loss": 0.1113,
139
- "step": 8500
140
- },
141
- {
142
- "epoch": 1.7776022121271973,
143
- "grad_norm": 0.4182845652103424,
144
- "learning_rate": 1.6444795575745606e-05,
145
- "loss": 0.1112,
146
- "step": 9000
147
- },
148
- {
149
- "epoch": 1.8763578905787082,
150
- "grad_norm": 0.4281316101551056,
151
- "learning_rate": 1.6247284218842586e-05,
152
- "loss": 0.1109,
153
- "step": 9500
154
- },
155
- {
156
- "epoch": 1.9751135690302193,
157
- "grad_norm": 0.4670790433883667,
158
- "learning_rate": 1.6049772861939562e-05,
159
- "loss": 0.1102,
160
- "step": 10000
161
- },
162
- {
163
- "epoch": 2.0,
164
- "eval_accuracy": 0.21488888888888888,
165
- "eval_f1_micro": 0.3304371622083875,
166
- "eval_loss": 0.11314285546541214,
167
- "eval_roc_auc": 0.6102983856871789,
168
- "eval_runtime": 39.4597,
169
- "eval_samples_per_second": 456.162,
170
- "eval_steps_per_second": 14.268,
171
- "step": 10126
172
- },
173
- {
174
- "epoch": 2.0738692474817304,
175
- "grad_norm": 0.46797189116477966,
176
- "learning_rate": 1.585226150503654e-05,
177
- "loss": 0.1071,
178
- "step": 10500
179
- },
180
- {
181
- "epoch": 2.172624925933241,
182
- "grad_norm": 0.5314794778823853,
183
- "learning_rate": 1.565475014813352e-05,
184
- "loss": 0.1066,
185
- "step": 11000
186
- },
187
- {
188
- "epoch": 2.271380604384752,
189
- "grad_norm": 0.6094779372215271,
190
- "learning_rate": 1.5457238791230497e-05,
191
- "loss": 0.1048,
192
- "step": 11500
193
- },
194
- {
195
- "epoch": 2.3701362828362633,
196
- "grad_norm": 0.46292218565940857,
197
- "learning_rate": 1.5259727434327476e-05,
198
- "loss": 0.1063,
199
- "step": 12000
200
- },
201
- {
202
- "epoch": 2.468891961287774,
203
- "grad_norm": 0.4565483033657074,
204
- "learning_rate": 1.5062216077424454e-05,
205
- "loss": 0.1052,
206
- "step": 12500
207
- },
208
- {
209
- "epoch": 2.567647639739285,
210
- "grad_norm": 0.5294711589813232,
211
- "learning_rate": 1.486470472052143e-05,
212
- "loss": 0.107,
213
- "step": 13000
214
- },
215
- {
216
- "epoch": 2.6664033181907962,
217
- "grad_norm": 0.5409520864486694,
218
- "learning_rate": 1.4667193363618408e-05,
219
- "loss": 0.1061,
220
- "step": 13500
221
- },
222
- {
223
- "epoch": 2.765158996642307,
224
- "grad_norm": 0.572907567024231,
225
- "learning_rate": 1.4469682006715386e-05,
226
- "loss": 0.1062,
227
- "step": 14000
228
- },
229
- {
230
- "epoch": 2.863914675093818,
231
- "grad_norm": 0.4891163110733032,
232
- "learning_rate": 1.4272170649812364e-05,
233
- "loss": 0.1065,
234
- "step": 14500
235
- },
236
- {
237
- "epoch": 2.9626703535453287,
238
- "grad_norm": 0.49211814999580383,
239
- "learning_rate": 1.4074659292909343e-05,
240
- "loss": 0.1059,
241
- "step": 15000
242
- },
243
- {
244
- "epoch": 3.0,
245
- "eval_accuracy": 0.2378888888888889,
246
- "eval_f1_micro": 0.35730189914865756,
247
- "eval_loss": 0.11334217339754105,
248
- "eval_roc_auc": 0.6243103133779553,
249
- "eval_runtime": 39.4216,
250
- "eval_samples_per_second": 456.603,
251
- "eval_steps_per_second": 14.282,
252
- "step": 15189
253
- },
254
- {
255
- "epoch": 3.06142603199684,
256
- "grad_norm": 0.7181198596954346,
257
- "learning_rate": 1.3877147936006321e-05,
258
- "loss": 0.1024,
259
- "step": 15500
260
- },
261
- {
262
- "epoch": 3.160181710448351,
263
- "grad_norm": 0.57438725233078,
264
- "learning_rate": 1.3679636579103299e-05,
265
- "loss": 0.0994,
266
- "step": 16000
267
- },
268
- {
269
- "epoch": 3.2589373888998616,
270
- "grad_norm": 0.7449280619621277,
271
- "learning_rate": 1.3482125222200277e-05,
272
- "loss": 0.1003,
273
- "step": 16500
274
- },
275
- {
276
- "epoch": 3.3576930673513727,
277
- "grad_norm": 0.6857854723930359,
278
- "learning_rate": 1.3284613865297255e-05,
279
- "loss": 0.1019,
280
- "step": 17000
281
- },
282
- {
283
- "epoch": 3.456448745802884,
284
- "grad_norm": 0.6422079801559448,
285
- "learning_rate": 1.3087102508394234e-05,
286
- "loss": 0.1011,
287
- "step": 17500
288
- },
289
- {
290
- "epoch": 3.5552044242543945,
291
- "grad_norm": 0.6514602899551392,
292
- "learning_rate": 1.2889591151491212e-05,
293
- "loss": 0.1012,
294
- "step": 18000
295
- },
296
- {
297
- "epoch": 3.6539601027059057,
298
- "grad_norm": 0.5365302562713623,
299
- "learning_rate": 1.269207979458819e-05,
300
- "loss": 0.1017,
301
- "step": 18500
302
- },
303
- {
304
- "epoch": 3.7527157811574163,
305
- "grad_norm": 0.6385504603385925,
306
- "learning_rate": 1.2494568437685168e-05,
307
- "loss": 0.1009,
308
- "step": 19000
309
- },
310
- {
311
- "epoch": 3.8514714596089275,
312
- "grad_norm": 0.6212201714515686,
313
- "learning_rate": 1.2297057080782147e-05,
314
- "loss": 0.1014,
315
- "step": 19500
316
- },
317
- {
318
- "epoch": 3.9502271380604386,
319
- "grad_norm": 0.7167210578918457,
320
- "learning_rate": 1.2099545723879125e-05,
321
- "loss": 0.102,
322
- "step": 20000
323
- },
324
- {
325
- "epoch": 4.0,
326
- "eval_accuracy": 0.24166666666666667,
327
- "eval_f1_micro": 0.3643195453110199,
328
- "eval_loss": 0.11566581577062607,
329
- "eval_roc_auc": 0.63082221324154,
330
- "eval_runtime": 39.4388,
331
- "eval_samples_per_second": 456.403,
332
- "eval_steps_per_second": 14.275,
333
- "step": 20252
334
- },
335
- {
336
- "epoch": 4.048982816511949,
337
- "grad_norm": 0.7785666584968567,
338
- "learning_rate": 1.1902034366976103e-05,
339
- "loss": 0.099,
340
- "step": 20500
341
- },
342
- {
343
- "epoch": 4.147738494963461,
344
- "grad_norm": 0.6057512760162354,
345
- "learning_rate": 1.1704523010073081e-05,
346
- "loss": 0.0951,
347
- "step": 21000
348
- },
349
- {
350
- "epoch": 4.2464941734149715,
351
- "grad_norm": 0.7943652868270874,
352
- "learning_rate": 1.1507011653170059e-05,
353
- "loss": 0.0956,
354
- "step": 21500
355
- },
356
- {
357
- "epoch": 4.345249851866482,
358
- "grad_norm": 0.8373062014579773,
359
- "learning_rate": 1.1309500296267035e-05,
360
- "loss": 0.0949,
361
- "step": 22000
362
- },
363
- {
364
- "epoch": 4.444005530317993,
365
- "grad_norm": 0.6966761946678162,
366
- "learning_rate": 1.1111988939364013e-05,
367
- "loss": 0.0962,
368
- "step": 22500
369
- },
370
- {
371
- "epoch": 4.542761208769504,
372
- "grad_norm": 0.7649087309837341,
373
- "learning_rate": 1.0914477582460992e-05,
374
- "loss": 0.0964,
375
- "step": 23000
376
- },
377
- {
378
- "epoch": 4.641516887221015,
379
- "grad_norm": 0.695787787437439,
380
- "learning_rate": 1.071696622555797e-05,
381
- "loss": 0.0967,
382
- "step": 23500
383
- },
384
- {
385
- "epoch": 4.740272565672527,
386
- "grad_norm": 0.6631536483764648,
387
- "learning_rate": 1.0519454868654948e-05,
388
- "loss": 0.0968,
389
- "step": 24000
390
- },
391
- {
392
- "epoch": 4.839028244124037,
393
- "grad_norm": 0.6649767160415649,
394
- "learning_rate": 1.0321943511751926e-05,
395
- "loss": 0.0966,
396
- "step": 24500
397
- },
398
- {
399
- "epoch": 4.937783922575548,
400
- "grad_norm": 0.6164096593856812,
401
- "learning_rate": 1.0124432154848904e-05,
402
- "loss": 0.0969,
403
- "step": 25000
404
- },
405
- {
406
- "epoch": 5.0,
407
- "eval_accuracy": 0.25194444444444447,
408
- "eval_f1_micro": 0.37071333765472114,
409
- "eval_loss": 0.11759943515062332,
410
- "eval_roc_auc": 0.6358616290591664,
411
- "eval_runtime": 39.4749,
412
- "eval_samples_per_second": 455.985,
413
- "eval_steps_per_second": 14.262,
414
- "step": 25315
415
- },
416
- {
417
- "epoch": 5.036539601027059,
418
- "grad_norm": 0.8620449304580688,
419
- "learning_rate": 9.926920797945883e-06,
420
- "loss": 0.0955,
421
- "step": 25500
422
- },
423
- {
424
- "epoch": 5.13529527947857,
425
- "grad_norm": 0.7160885334014893,
426
- "learning_rate": 9.729409441042861e-06,
427
- "loss": 0.0897,
428
- "step": 26000
429
- },
430
- {
431
- "epoch": 5.234050957930081,
432
- "grad_norm": 0.8834371566772461,
433
- "learning_rate": 9.531898084139839e-06,
434
- "loss": 0.0897,
435
- "step": 26500
436
- },
437
- {
438
- "epoch": 5.332806636381592,
439
- "grad_norm": 0.7659549117088318,
440
- "learning_rate": 9.334386727236817e-06,
441
- "loss": 0.0915,
442
- "step": 27000
443
- },
444
- {
445
- "epoch": 5.431562314833103,
446
- "grad_norm": 0.9961947202682495,
447
- "learning_rate": 9.136875370333796e-06,
448
- "loss": 0.0917,
449
- "step": 27500
450
- },
451
- {
452
- "epoch": 5.530317993284614,
453
- "grad_norm": 0.8744510412216187,
454
- "learning_rate": 8.939364013430774e-06,
455
- "loss": 0.0911,
456
- "step": 28000
457
- },
458
- {
459
- "epoch": 5.6290736717361245,
460
- "grad_norm": 0.8435235023498535,
461
- "learning_rate": 8.74185265652775e-06,
462
- "loss": 0.0935,
463
- "step": 28500
464
- },
465
- {
466
- "epoch": 5.727829350187636,
467
- "grad_norm": 0.848759114742279,
468
- "learning_rate": 8.544341299624728e-06,
469
- "loss": 0.0924,
470
- "step": 29000
471
- },
472
- {
473
- "epoch": 5.826585028639147,
474
- "grad_norm": 0.7850985527038574,
475
- "learning_rate": 8.346829942721707e-06,
476
- "loss": 0.0935,
477
- "step": 29500
478
- },
479
- {
480
- "epoch": 5.925340707090657,
481
- "grad_norm": 0.8016532063484192,
482
- "learning_rate": 8.149318585818685e-06,
483
- "loss": 0.0917,
484
- "step": 30000
485
- },
486
- {
487
- "epoch": 6.0,
488
- "eval_accuracy": 0.25655555555555554,
489
- "eval_f1_micro": 0.3737782371179684,
490
- "eval_loss": 0.12158573418855667,
491
- "eval_roc_auc": 0.6422934232904187,
492
- "eval_runtime": 39.5217,
493
- "eval_samples_per_second": 455.446,
494
- "eval_steps_per_second": 14.245,
495
- "step": 30378
496
- },
497
- {
498
- "epoch": 6.024096385542169,
499
- "grad_norm": 0.782853364944458,
500
- "learning_rate": 7.951807228915663e-06,
501
- "loss": 0.0914,
502
- "step": 30500
503
- },
504
- {
505
- "epoch": 6.12285206399368,
506
- "grad_norm": 0.8463727235794067,
507
- "learning_rate": 7.754295872012641e-06,
508
- "loss": 0.0862,
509
- "step": 31000
510
- },
511
- {
512
- "epoch": 6.22160774244519,
513
- "grad_norm": 0.9679527282714844,
514
- "learning_rate": 7.55678451510962e-06,
515
- "loss": 0.0868,
516
- "step": 31500
517
- },
518
- {
519
- "epoch": 6.320363420896702,
520
- "grad_norm": 0.8114763498306274,
521
- "learning_rate": 7.3592731582065975e-06,
522
- "loss": 0.0865,
523
- "step": 32000
524
- },
525
- {
526
- "epoch": 6.419119099348213,
527
- "grad_norm": 0.8869866132736206,
528
- "learning_rate": 7.161761801303575e-06,
529
- "loss": 0.0881,
530
- "step": 32500
531
- },
532
- {
533
- "epoch": 6.517874777799723,
534
- "grad_norm": 0.9731593132019043,
535
- "learning_rate": 6.964250444400553e-06,
536
- "loss": 0.0866,
537
- "step": 33000
538
- },
539
- {
540
- "epoch": 6.616630456251235,
541
- "grad_norm": 0.8720415830612183,
542
- "learning_rate": 6.766739087497531e-06,
543
- "loss": 0.0881,
544
- "step": 33500
545
- },
546
- {
547
- "epoch": 6.7153861347027455,
548
- "grad_norm": 0.8622246980667114,
549
- "learning_rate": 6.56922773059451e-06,
550
- "loss": 0.0883,
551
- "step": 34000
552
- },
553
- {
554
- "epoch": 6.814141813154256,
555
- "grad_norm": 0.8367873430252075,
556
- "learning_rate": 6.3717163736914875e-06,
557
- "loss": 0.089,
558
- "step": 34500
559
- },
560
- {
561
- "epoch": 6.912897491605768,
562
- "grad_norm": 0.8973580598831177,
563
- "learning_rate": 6.174205016788466e-06,
564
- "loss": 0.088,
565
- "step": 35000
566
- },
567
- {
568
- "epoch": 7.0,
569
- "eval_accuracy": 0.2574444444444444,
570
- "eval_f1_micro": 0.37320491873763867,
571
- "eval_loss": 0.12590783834457397,
572
- "eval_roc_auc": 0.6456773567793932,
573
- "eval_runtime": 39.5146,
574
- "eval_samples_per_second": 455.528,
575
- "eval_steps_per_second": 14.248,
576
- "step": 35441
577
- },
578
- {
579
- "epoch": 7.011653170057278,
580
- "grad_norm": 0.9826680421829224,
581
- "learning_rate": 5.976693659885444e-06,
582
- "loss": 0.0876,
583
- "step": 35500
584
- },
585
- {
586
- "epoch": 7.110408848508789,
587
- "grad_norm": 1.1457393169403076,
588
- "learning_rate": 5.779182302982423e-06,
589
- "loss": 0.0813,
590
- "step": 36000
591
- },
592
- {
593
- "epoch": 7.209164526960301,
594
- "grad_norm": 0.9791042804718018,
595
- "learning_rate": 5.5816709460794006e-06,
596
- "loss": 0.0834,
597
- "step": 36500
598
- },
599
- {
600
- "epoch": 7.307920205411811,
601
- "grad_norm": 0.996573805809021,
602
- "learning_rate": 5.3841595891763776e-06,
603
- "loss": 0.0834,
604
- "step": 37000
605
- },
606
- {
607
- "epoch": 7.406675883863322,
608
- "grad_norm": 1.0137947797775269,
609
- "learning_rate": 5.186648232273355e-06,
610
- "loss": 0.0836,
611
- "step": 37500
612
- },
613
- {
614
- "epoch": 7.505431562314833,
615
- "grad_norm": 0.9656222462654114,
616
- "learning_rate": 4.989136875370334e-06,
617
- "loss": 0.0838,
618
- "step": 38000
619
- },
620
- {
621
- "epoch": 7.604187240766344,
622
- "grad_norm": 0.9735883474349976,
623
- "learning_rate": 4.791625518467312e-06,
624
- "loss": 0.0832,
625
- "step": 38500
626
- },
627
- {
628
- "epoch": 7.702942919217855,
629
- "grad_norm": 0.9456019997596741,
630
- "learning_rate": 4.594114161564291e-06,
631
- "loss": 0.0845,
632
- "step": 39000
633
- },
634
- {
635
- "epoch": 7.8016985976693665,
636
- "grad_norm": 0.98697429895401,
637
- "learning_rate": 4.3966028046612684e-06,
638
- "loss": 0.0842,
639
- "step": 39500
640
- },
641
- {
642
- "epoch": 7.900454276120877,
643
- "grad_norm": 0.9538049697875977,
644
- "learning_rate": 4.199091447758246e-06,
645
- "loss": 0.0843,
646
- "step": 40000
647
- },
648
- {
649
- "epoch": 7.999209954572388,
650
- "grad_norm": 0.9173344373703003,
651
- "learning_rate": 4.001580090855224e-06,
652
- "loss": 0.0848,
653
- "step": 40500
654
- },
655
- {
656
- "epoch": 8.0,
657
- "eval_accuracy": 0.25755555555555554,
658
- "eval_f1_micro": 0.370994086172909,
659
- "eval_loss": 0.12952594459056854,
660
- "eval_roc_auc": 0.6469219706450647,
661
- "eval_runtime": 39.7032,
662
- "eval_samples_per_second": 453.364,
663
- "eval_steps_per_second": 14.18,
664
- "step": 40504
665
- },
666
- {
667
- "epoch": 8.097965633023898,
668
- "grad_norm": 0.9694753885269165,
669
- "learning_rate": 3.8040687339522024e-06,
670
- "loss": 0.0784,
671
- "step": 41000
672
- },
673
- {
674
- "epoch": 8.19672131147541,
675
- "grad_norm": 1.1527094841003418,
676
- "learning_rate": 3.6065573770491806e-06,
677
- "loss": 0.0798,
678
- "step": 41500
679
- },
680
- {
681
- "epoch": 8.295476989926922,
682
- "grad_norm": 1.3963054418563843,
683
- "learning_rate": 3.409046020146159e-06,
684
- "loss": 0.0791,
685
- "step": 42000
686
- },
687
- {
688
- "epoch": 8.394232668378432,
689
- "grad_norm": 1.226542353630066,
690
- "learning_rate": 3.2115346632431367e-06,
691
- "loss": 0.0808,
692
- "step": 42500
693
- },
694
- {
695
- "epoch": 8.492988346829943,
696
- "grad_norm": 1.0286864042282104,
697
- "learning_rate": 3.0140233063401146e-06,
698
- "loss": 0.0803,
699
- "step": 43000
700
- },
701
- {
702
- "epoch": 8.591744025281454,
703
- "grad_norm": 1.1959236860275269,
704
- "learning_rate": 2.816511949437093e-06,
705
- "loss": 0.0808,
706
- "step": 43500
707
- },
708
- {
709
- "epoch": 8.690499703732964,
710
- "grad_norm": 0.9853003025054932,
711
- "learning_rate": 2.619000592534071e-06,
712
- "loss": 0.0809,
713
- "step": 44000
714
- },
715
- {
716
- "epoch": 8.789255382184475,
717
- "grad_norm": 0.8722342252731323,
718
- "learning_rate": 2.421489235631049e-06,
719
- "loss": 0.0812,
720
- "step": 44500
721
- },
722
- {
723
- "epoch": 8.888011060635986,
724
- "grad_norm": 0.9437873959541321,
725
- "learning_rate": 2.2239778787280267e-06,
726
- "loss": 0.0806,
727
- "step": 45000
728
- },
729
- {
730
- "epoch": 8.986766739087498,
731
- "grad_norm": 1.0116231441497803,
732
- "learning_rate": 2.026466521825005e-06,
733
- "loss": 0.0805,
734
- "step": 45500
735
- },
736
- {
737
- "epoch": 9.0,
738
- "eval_accuracy": 0.2628333333333333,
739
- "eval_f1_micro": 0.3753562815172111,
740
- "eval_loss": 0.13347476720809937,
741
- "eval_roc_auc": 0.6523150114535268,
742
- "eval_runtime": 39.603,
743
- "eval_samples_per_second": 454.511,
744
- "eval_steps_per_second": 14.216,
745
- "step": 45567
746
- },
747
- {
748
- "epoch": 9.085522417539009,
749
- "grad_norm": 0.9861115217208862,
750
- "learning_rate": 1.828955164921983e-06,
751
- "loss": 0.0768,
752
- "step": 46000
753
- },
754
- {
755
- "epoch": 9.18427809599052,
756
- "grad_norm": 0.7694704532623291,
757
- "learning_rate": 1.6314438080189613e-06,
758
- "loss": 0.0771,
759
- "step": 46500
760
- },
761
- {
762
- "epoch": 9.28303377444203,
763
- "grad_norm": 1.1088521480560303,
764
- "learning_rate": 1.4339324511159391e-06,
765
- "loss": 0.0775,
766
- "step": 47000
767
- },
768
- {
769
- "epoch": 9.38178945289354,
770
- "grad_norm": 1.0436288118362427,
771
- "learning_rate": 1.2364210942129174e-06,
772
- "loss": 0.0769,
773
- "step": 47500
774
- },
775
- {
776
- "epoch": 9.480545131345052,
777
- "grad_norm": 1.055021047592163,
778
- "learning_rate": 1.0389097373098954e-06,
779
- "loss": 0.0781,
780
- "step": 48000
781
- },
782
- {
783
- "epoch": 9.579300809796564,
784
- "grad_norm": 1.1790095567703247,
785
- "learning_rate": 8.413983804068734e-07,
786
- "loss": 0.077,
787
- "step": 48500
788
- },
789
- {
790
- "epoch": 9.678056488248075,
791
- "grad_norm": 0.8629137873649597,
792
- "learning_rate": 6.438870235038514e-07,
793
- "loss": 0.0783,
794
- "step": 49000
795
- },
796
- {
797
- "epoch": 9.776812166699585,
798
- "grad_norm": 1.006084680557251,
799
- "learning_rate": 4.463756666008296e-07,
800
- "loss": 0.0766,
801
- "step": 49500
802
- },
803
- {
804
- "epoch": 9.875567845151096,
805
- "grad_norm": 1.0732314586639404,
806
- "learning_rate": 2.4886430969780764e-07,
807
- "loss": 0.0774,
808
- "step": 50000
809
- },
810
- {
811
- "epoch": 9.974323523602607,
812
- "grad_norm": 1.122566819190979,
813
- "learning_rate": 5.1352952794785706e-08,
814
- "loss": 0.0778,
815
- "step": 50500
816
- },
817
- {
818
- "epoch": 10.0,
819
- "eval_accuracy": 0.2629444444444444,
820
- "eval_f1_micro": 0.3750875491622218,
821
- "eval_loss": 0.13676531612873077,
822
- "eval_roc_auc": 0.6544563659537705,
823
- "eval_runtime": 39.7687,
824
- "eval_samples_per_second": 452.617,
825
- "eval_steps_per_second": 14.157,
826
- "step": 50630
827
- }
828
- ],
829
- "logging_steps": 500,
830
- "max_steps": 50630,
831
- "num_input_tokens_seen": 0,
832
- "num_train_epochs": 10,
833
- "save_steps": 500,
834
- "stateful_callbacks": {
835
- "TrainerControl": {
836
- "args": {
837
- "should_epoch_stop": false,
838
- "should_evaluate": false,
839
- "should_log": false,
840
- "should_save": true,
841
- "should_training_stop": true
842
- },
843
- "attributes": {}
844
- }
845
- },
846
- "total_flos": 1.3843807083022118e+17,
847
- "train_batch_size": 32,
848
- "trial_name": null,
849
- "trial_params": null
850
- }