RobertoSonic's picture
End of training
fcc25d7 verified
{
"best_metric": 0.9314285714285714,
"best_model_checkpoint": "swinv2-tiny-patch4-window8-256-dmae-humeda-DAV68/checkpoint-720",
"epoch": 42.52173913043478,
"eval_steps": 500,
"global_step": 765,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.5797101449275363,
"grad_norm": 3.7087650299072266,
"learning_rate": 3.896103896103896e-06,
"loss": 1.0651,
"step": 10
},
{
"epoch": 1.0,
"eval_accuracy": 0.4685714285714286,
"eval_loss": 1.0769113302230835,
"eval_runtime": 1.9723,
"eval_samples_per_second": 88.729,
"eval_steps_per_second": 5.577,
"step": 18
},
{
"epoch": 1.1159420289855073,
"grad_norm": 3.6832127571105957,
"learning_rate": 7.792207792207792e-06,
"loss": 0.9566,
"step": 20
},
{
"epoch": 1.6956521739130435,
"grad_norm": 3.8521084785461426,
"learning_rate": 1.168831168831169e-05,
"loss": 0.9503,
"step": 30
},
{
"epoch": 2.0,
"eval_accuracy": 0.7257142857142858,
"eval_loss": 0.8111104965209961,
"eval_runtime": 1.9367,
"eval_samples_per_second": 90.359,
"eval_steps_per_second": 5.68,
"step": 36
},
{
"epoch": 2.2318840579710146,
"grad_norm": 5.586236476898193,
"learning_rate": 1.5584415584415583e-05,
"loss": 0.7219,
"step": 40
},
{
"epoch": 2.8115942028985508,
"grad_norm": 8.486871719360352,
"learning_rate": 1.948051948051948e-05,
"loss": 0.5745,
"step": 50
},
{
"epoch": 3.0,
"eval_accuracy": 0.7314285714285714,
"eval_loss": 0.4972275197505951,
"eval_runtime": 1.904,
"eval_samples_per_second": 91.91,
"eval_steps_per_second": 5.777,
"step": 54
},
{
"epoch": 3.3478260869565215,
"grad_norm": 10.05865478515625,
"learning_rate": 2.337662337662338e-05,
"loss": 0.4438,
"step": 60
},
{
"epoch": 3.927536231884058,
"grad_norm": 12.097763061523438,
"learning_rate": 2.6883116883116883e-05,
"loss": 0.4746,
"step": 70
},
{
"epoch": 4.0,
"eval_accuracy": 0.7485714285714286,
"eval_loss": 0.478755921125412,
"eval_runtime": 1.9296,
"eval_samples_per_second": 90.693,
"eval_steps_per_second": 5.701,
"step": 72
},
{
"epoch": 4.463768115942029,
"grad_norm": 5.598774433135986,
"learning_rate": 2.991279069767442e-05,
"loss": 0.3793,
"step": 80
},
{
"epoch": 5.0,
"grad_norm": 3.355344295501709,
"learning_rate": 2.947674418604651e-05,
"loss": 0.4363,
"step": 90
},
{
"epoch": 5.0,
"eval_accuracy": 0.7314285714285714,
"eval_loss": 0.5427026748657227,
"eval_runtime": 1.9262,
"eval_samples_per_second": 90.853,
"eval_steps_per_second": 5.711,
"step": 90
},
{
"epoch": 5.579710144927536,
"grad_norm": 10.868864059448242,
"learning_rate": 2.9040697674418607e-05,
"loss": 0.4362,
"step": 100
},
{
"epoch": 6.0,
"eval_accuracy": 0.8685714285714285,
"eval_loss": 0.3581157624721527,
"eval_runtime": 1.8969,
"eval_samples_per_second": 92.257,
"eval_steps_per_second": 5.799,
"step": 108
},
{
"epoch": 6.115942028985507,
"grad_norm": 18.57970428466797,
"learning_rate": 2.86046511627907e-05,
"loss": 0.3911,
"step": 110
},
{
"epoch": 6.695652173913043,
"grad_norm": 11.689884185791016,
"learning_rate": 2.8168604651162793e-05,
"loss": 0.3476,
"step": 120
},
{
"epoch": 7.0,
"eval_accuracy": 0.8685714285714285,
"eval_loss": 0.3572000563144684,
"eval_runtime": 1.9174,
"eval_samples_per_second": 91.27,
"eval_steps_per_second": 5.737,
"step": 126
},
{
"epoch": 7.231884057971015,
"grad_norm": 7.682909965515137,
"learning_rate": 2.7776162790697673e-05,
"loss": 0.3861,
"step": 130
},
{
"epoch": 7.811594202898551,
"grad_norm": 9.239214897155762,
"learning_rate": 2.7340116279069766e-05,
"loss": 0.3113,
"step": 140
},
{
"epoch": 8.0,
"eval_accuracy": 0.7885714285714286,
"eval_loss": 0.4334910809993744,
"eval_runtime": 2.4972,
"eval_samples_per_second": 70.078,
"eval_steps_per_second": 4.405,
"step": 144
},
{
"epoch": 8.347826086956522,
"grad_norm": 10.808152198791504,
"learning_rate": 2.6904069767441863e-05,
"loss": 0.2685,
"step": 150
},
{
"epoch": 8.927536231884059,
"grad_norm": 11.47179889678955,
"learning_rate": 2.6468023255813956e-05,
"loss": 0.3943,
"step": 160
},
{
"epoch": 9.0,
"eval_accuracy": 0.8685714285714285,
"eval_loss": 0.27819445729255676,
"eval_runtime": 2.4012,
"eval_samples_per_second": 72.88,
"eval_steps_per_second": 4.581,
"step": 162
},
{
"epoch": 9.46376811594203,
"grad_norm": 6.136239051818848,
"learning_rate": 2.6031976744186046e-05,
"loss": 0.2876,
"step": 170
},
{
"epoch": 10.0,
"grad_norm": 1.0991929769515991,
"learning_rate": 2.559593023255814e-05,
"loss": 0.2574,
"step": 180
},
{
"epoch": 10.0,
"eval_accuracy": 0.8685714285714285,
"eval_loss": 0.3320414125919342,
"eval_runtime": 1.9305,
"eval_samples_per_second": 90.652,
"eval_steps_per_second": 5.698,
"step": 180
},
{
"epoch": 10.579710144927537,
"grad_norm": 9.614068984985352,
"learning_rate": 2.5159883720930236e-05,
"loss": 0.2345,
"step": 190
},
{
"epoch": 11.0,
"eval_accuracy": 0.8342857142857143,
"eval_loss": 0.4383019506931305,
"eval_runtime": 1.9283,
"eval_samples_per_second": 90.753,
"eval_steps_per_second": 5.704,
"step": 198
},
{
"epoch": 11.115942028985508,
"grad_norm": 10.854966163635254,
"learning_rate": 2.4723837209302326e-05,
"loss": 0.2437,
"step": 200
},
{
"epoch": 11.695652173913043,
"grad_norm": 8.31966495513916,
"learning_rate": 2.428779069767442e-05,
"loss": 0.3002,
"step": 210
},
{
"epoch": 12.0,
"eval_accuracy": 0.8685714285714285,
"eval_loss": 0.3052798807621002,
"eval_runtime": 1.9211,
"eval_samples_per_second": 91.096,
"eval_steps_per_second": 5.726,
"step": 216
},
{
"epoch": 12.231884057971014,
"grad_norm": 12.32070541381836,
"learning_rate": 2.3851744186046512e-05,
"loss": 0.2268,
"step": 220
},
{
"epoch": 12.81159420289855,
"grad_norm": 5.423739433288574,
"learning_rate": 2.3415697674418605e-05,
"loss": 0.2038,
"step": 230
},
{
"epoch": 13.0,
"eval_accuracy": 0.8742857142857143,
"eval_loss": 0.3189180791378021,
"eval_runtime": 1.9021,
"eval_samples_per_second": 92.005,
"eval_steps_per_second": 5.783,
"step": 234
},
{
"epoch": 13.347826086956522,
"grad_norm": 6.77400016784668,
"learning_rate": 2.29796511627907e-05,
"loss": 0.2148,
"step": 240
},
{
"epoch": 13.927536231884059,
"grad_norm": 7.098783016204834,
"learning_rate": 2.2543604651162792e-05,
"loss": 0.2244,
"step": 250
},
{
"epoch": 14.0,
"eval_accuracy": 0.8742857142857143,
"eval_loss": 0.27661752700805664,
"eval_runtime": 1.9403,
"eval_samples_per_second": 90.191,
"eval_steps_per_second": 5.669,
"step": 252
},
{
"epoch": 14.46376811594203,
"grad_norm": 8.359834671020508,
"learning_rate": 2.2107558139534885e-05,
"loss": 0.1859,
"step": 260
},
{
"epoch": 15.0,
"grad_norm": 25.05173683166504,
"learning_rate": 2.1671511627906975e-05,
"loss": 0.2277,
"step": 270
},
{
"epoch": 15.0,
"eval_accuracy": 0.8857142857142857,
"eval_loss": 0.2637348771095276,
"eval_runtime": 2.2383,
"eval_samples_per_second": 78.185,
"eval_steps_per_second": 4.914,
"step": 270
},
{
"epoch": 15.579710144927537,
"grad_norm": 6.100148677825928,
"learning_rate": 2.123546511627907e-05,
"loss": 0.2318,
"step": 280
},
{
"epoch": 16.0,
"eval_accuracy": 0.8114285714285714,
"eval_loss": 0.46118730306625366,
"eval_runtime": 2.6144,
"eval_samples_per_second": 66.937,
"eval_steps_per_second": 4.207,
"step": 288
},
{
"epoch": 16.115942028985508,
"grad_norm": 6.950288772583008,
"learning_rate": 2.0799418604651165e-05,
"loss": 0.1786,
"step": 290
},
{
"epoch": 16.695652173913043,
"grad_norm": 14.06019401550293,
"learning_rate": 2.0363372093023254e-05,
"loss": 0.1908,
"step": 300
},
{
"epoch": 17.0,
"eval_accuracy": 0.8857142857142857,
"eval_loss": 0.31668975949287415,
"eval_runtime": 4.7899,
"eval_samples_per_second": 36.535,
"eval_steps_per_second": 2.297,
"step": 306
},
{
"epoch": 17.231884057971016,
"grad_norm": 14.938794136047363,
"learning_rate": 1.9927325581395348e-05,
"loss": 0.2174,
"step": 310
},
{
"epoch": 17.81159420289855,
"grad_norm": 10.554563522338867,
"learning_rate": 1.9491279069767444e-05,
"loss": 0.1932,
"step": 320
},
{
"epoch": 18.0,
"eval_accuracy": 0.9028571428571428,
"eval_loss": 0.29490649700164795,
"eval_runtime": 1.9334,
"eval_samples_per_second": 90.514,
"eval_steps_per_second": 5.689,
"step": 324
},
{
"epoch": 18.347826086956523,
"grad_norm": 11.751145362854004,
"learning_rate": 1.9055232558139538e-05,
"loss": 0.1432,
"step": 330
},
{
"epoch": 18.92753623188406,
"grad_norm": 9.6201171875,
"learning_rate": 1.8619186046511627e-05,
"loss": 0.1676,
"step": 340
},
{
"epoch": 19.0,
"eval_accuracy": 0.9085714285714286,
"eval_loss": 0.26273345947265625,
"eval_runtime": 2.1044,
"eval_samples_per_second": 83.159,
"eval_steps_per_second": 5.227,
"step": 342
},
{
"epoch": 19.463768115942027,
"grad_norm": 5.4566731452941895,
"learning_rate": 1.822674418604651e-05,
"loss": 0.1472,
"step": 350
},
{
"epoch": 20.0,
"grad_norm": 0.2691696584224701,
"learning_rate": 1.7790697674418608e-05,
"loss": 0.1442,
"step": 360
},
{
"epoch": 20.0,
"eval_accuracy": 0.9142857142857143,
"eval_loss": 0.2584407329559326,
"eval_runtime": 4.6506,
"eval_samples_per_second": 37.629,
"eval_steps_per_second": 2.365,
"step": 360
},
{
"epoch": 20.579710144927535,
"grad_norm": 14.349730491638184,
"learning_rate": 1.7354651162790697e-05,
"loss": 0.1606,
"step": 370
},
{
"epoch": 21.0,
"eval_accuracy": 0.9142857142857143,
"eval_loss": 0.2625867426395416,
"eval_runtime": 2.0381,
"eval_samples_per_second": 85.864,
"eval_steps_per_second": 5.397,
"step": 378
},
{
"epoch": 21.115942028985508,
"grad_norm": 6.261329650878906,
"learning_rate": 1.691860465116279e-05,
"loss": 0.1291,
"step": 380
},
{
"epoch": 21.695652173913043,
"grad_norm": 11.669342994689941,
"learning_rate": 1.6482558139534884e-05,
"loss": 0.1624,
"step": 390
},
{
"epoch": 22.0,
"eval_accuracy": 0.9257142857142857,
"eval_loss": 0.2351078987121582,
"eval_runtime": 2.7162,
"eval_samples_per_second": 64.427,
"eval_steps_per_second": 4.05,
"step": 396
},
{
"epoch": 22.231884057971016,
"grad_norm": 9.660877227783203,
"learning_rate": 1.6046511627906977e-05,
"loss": 0.1447,
"step": 400
},
{
"epoch": 22.81159420289855,
"grad_norm": 15.922120094299316,
"learning_rate": 1.561046511627907e-05,
"loss": 0.1735,
"step": 410
},
{
"epoch": 23.0,
"eval_accuracy": 0.9257142857142857,
"eval_loss": 0.27460697293281555,
"eval_runtime": 2.0043,
"eval_samples_per_second": 87.311,
"eval_steps_per_second": 5.488,
"step": 414
},
{
"epoch": 23.347826086956523,
"grad_norm": 7.466541767120361,
"learning_rate": 1.5174418604651163e-05,
"loss": 0.0943,
"step": 420
},
{
"epoch": 23.92753623188406,
"grad_norm": 7.257338047027588,
"learning_rate": 1.4738372093023255e-05,
"loss": 0.1604,
"step": 430
},
{
"epoch": 24.0,
"eval_accuracy": 0.8914285714285715,
"eval_loss": 0.3236704170703888,
"eval_runtime": 1.9055,
"eval_samples_per_second": 91.842,
"eval_steps_per_second": 5.773,
"step": 432
},
{
"epoch": 24.463768115942027,
"grad_norm": 8.80977725982666,
"learning_rate": 1.430232558139535e-05,
"loss": 0.1495,
"step": 440
},
{
"epoch": 25.0,
"grad_norm": 1.1711186170578003,
"learning_rate": 1.3866279069767441e-05,
"loss": 0.122,
"step": 450
},
{
"epoch": 25.0,
"eval_accuracy": 0.8914285714285715,
"eval_loss": 0.28520700335502625,
"eval_runtime": 1.9027,
"eval_samples_per_second": 91.974,
"eval_steps_per_second": 5.781,
"step": 450
},
{
"epoch": 25.579710144927535,
"grad_norm": 10.047913551330566,
"learning_rate": 1.3430232558139536e-05,
"loss": 0.1447,
"step": 460
},
{
"epoch": 26.0,
"eval_accuracy": 0.92,
"eval_loss": 0.2593800127506256,
"eval_runtime": 1.9095,
"eval_samples_per_second": 91.646,
"eval_steps_per_second": 5.761,
"step": 468
},
{
"epoch": 26.115942028985508,
"grad_norm": 4.829050064086914,
"learning_rate": 1.2994186046511628e-05,
"loss": 0.1318,
"step": 470
},
{
"epoch": 26.695652173913043,
"grad_norm": 3.806666851043701,
"learning_rate": 1.2558139534883723e-05,
"loss": 0.1265,
"step": 480
},
{
"epoch": 27.0,
"eval_accuracy": 0.9028571428571428,
"eval_loss": 0.28569361567497253,
"eval_runtime": 1.9576,
"eval_samples_per_second": 89.395,
"eval_steps_per_second": 5.619,
"step": 486
},
{
"epoch": 27.231884057971016,
"grad_norm": 13.416851043701172,
"learning_rate": 1.2122093023255814e-05,
"loss": 0.1198,
"step": 490
},
{
"epoch": 27.81159420289855,
"grad_norm": 16.207693099975586,
"learning_rate": 1.1686046511627907e-05,
"loss": 0.1265,
"step": 500
},
{
"epoch": 28.0,
"eval_accuracy": 0.8742857142857143,
"eval_loss": 0.32380279898643494,
"eval_runtime": 2.636,
"eval_samples_per_second": 66.388,
"eval_steps_per_second": 4.173,
"step": 504
},
{
"epoch": 28.347826086956523,
"grad_norm": 6.766117572784424,
"learning_rate": 1.125e-05,
"loss": 0.0857,
"step": 510
},
{
"epoch": 28.92753623188406,
"grad_norm": 2.6189181804656982,
"learning_rate": 1.0813953488372092e-05,
"loss": 0.122,
"step": 520
},
{
"epoch": 29.0,
"eval_accuracy": 0.8857142857142857,
"eval_loss": 0.30293118953704834,
"eval_runtime": 2.1444,
"eval_samples_per_second": 81.609,
"eval_steps_per_second": 5.13,
"step": 522
},
{
"epoch": 29.463768115942027,
"grad_norm": 7.471499443054199,
"learning_rate": 1.0377906976744187e-05,
"loss": 0.1182,
"step": 530
},
{
"epoch": 30.0,
"grad_norm": 0.14097607135772705,
"learning_rate": 9.941860465116279e-06,
"loss": 0.0929,
"step": 540
},
{
"epoch": 30.0,
"eval_accuracy": 0.9028571428571428,
"eval_loss": 0.2936091423034668,
"eval_runtime": 1.9297,
"eval_samples_per_second": 90.69,
"eval_steps_per_second": 5.7,
"step": 540
},
{
"epoch": 30.579710144927535,
"grad_norm": 9.875027656555176,
"learning_rate": 9.505813953488372e-06,
"loss": 0.1276,
"step": 550
},
{
"epoch": 31.0,
"eval_accuracy": 0.9142857142857143,
"eval_loss": 0.2777470052242279,
"eval_runtime": 1.9462,
"eval_samples_per_second": 89.918,
"eval_steps_per_second": 5.652,
"step": 558
},
{
"epoch": 31.115942028985508,
"grad_norm": 10.335461616516113,
"learning_rate": 9.069767441860465e-06,
"loss": 0.1278,
"step": 560
},
{
"epoch": 31.695652173913043,
"grad_norm": 5.649544715881348,
"learning_rate": 8.633720930232558e-06,
"loss": 0.1118,
"step": 570
},
{
"epoch": 32.0,
"eval_accuracy": 0.9142857142857143,
"eval_loss": 0.2812146544456482,
"eval_runtime": 1.9255,
"eval_samples_per_second": 90.885,
"eval_steps_per_second": 5.713,
"step": 576
},
{
"epoch": 32.231884057971016,
"grad_norm": 3.817037582397461,
"learning_rate": 8.197674418604652e-06,
"loss": 0.1021,
"step": 580
},
{
"epoch": 32.81159420289855,
"grad_norm": 8.079113960266113,
"learning_rate": 7.761627906976745e-06,
"loss": 0.1058,
"step": 590
},
{
"epoch": 33.0,
"eval_accuracy": 0.92,
"eval_loss": 0.2924804389476776,
"eval_runtime": 1.9242,
"eval_samples_per_second": 90.947,
"eval_steps_per_second": 5.717,
"step": 594
},
{
"epoch": 33.34782608695652,
"grad_norm": 7.013218402862549,
"learning_rate": 7.325581395348837e-06,
"loss": 0.0822,
"step": 600
},
{
"epoch": 33.927536231884055,
"grad_norm": 5.116663455963135,
"learning_rate": 6.88953488372093e-06,
"loss": 0.0824,
"step": 610
},
{
"epoch": 34.0,
"eval_accuracy": 0.8914285714285715,
"eval_loss": 0.35194164514541626,
"eval_runtime": 2.0353,
"eval_samples_per_second": 85.982,
"eval_steps_per_second": 5.405,
"step": 612
},
{
"epoch": 34.46376811594203,
"grad_norm": 7.844626426696777,
"learning_rate": 6.453488372093024e-06,
"loss": 0.0959,
"step": 620
},
{
"epoch": 35.0,
"grad_norm": 0.001561504672281444,
"learning_rate": 6.017441860465116e-06,
"loss": 0.1084,
"step": 630
},
{
"epoch": 35.0,
"eval_accuracy": 0.92,
"eval_loss": 0.28469112515449524,
"eval_runtime": 2.7076,
"eval_samples_per_second": 64.634,
"eval_steps_per_second": 4.063,
"step": 630
},
{
"epoch": 35.57971014492754,
"grad_norm": 6.455016136169434,
"learning_rate": 5.581395348837209e-06,
"loss": 0.1074,
"step": 640
},
{
"epoch": 36.0,
"eval_accuracy": 0.9142857142857143,
"eval_loss": 0.27351856231689453,
"eval_runtime": 1.9489,
"eval_samples_per_second": 89.796,
"eval_steps_per_second": 5.644,
"step": 648
},
{
"epoch": 36.11594202898551,
"grad_norm": 4.332115650177002,
"learning_rate": 5.145348837209302e-06,
"loss": 0.0896,
"step": 650
},
{
"epoch": 36.69565217391305,
"grad_norm": 17.33997917175293,
"learning_rate": 4.709302325581396e-06,
"loss": 0.1415,
"step": 660
},
{
"epoch": 37.0,
"eval_accuracy": 0.9257142857142857,
"eval_loss": 0.27236348390579224,
"eval_runtime": 1.9276,
"eval_samples_per_second": 90.787,
"eval_steps_per_second": 5.707,
"step": 666
},
{
"epoch": 37.231884057971016,
"grad_norm": 5.769250392913818,
"learning_rate": 4.273255813953489e-06,
"loss": 0.0783,
"step": 670
},
{
"epoch": 37.81159420289855,
"grad_norm": 8.946736335754395,
"learning_rate": 3.837209302325582e-06,
"loss": 0.0702,
"step": 680
},
{
"epoch": 38.0,
"eval_accuracy": 0.92,
"eval_loss": 0.28733909130096436,
"eval_runtime": 1.9313,
"eval_samples_per_second": 90.614,
"eval_steps_per_second": 5.696,
"step": 684
},
{
"epoch": 38.34782608695652,
"grad_norm": 3.792422294616699,
"learning_rate": 3.4011627906976744e-06,
"loss": 0.0708,
"step": 690
},
{
"epoch": 38.927536231884055,
"grad_norm": 10.4917573928833,
"learning_rate": 2.965116279069767e-06,
"loss": 0.0987,
"step": 700
},
{
"epoch": 39.0,
"eval_accuracy": 0.92,
"eval_loss": 0.292362242937088,
"eval_runtime": 1.9062,
"eval_samples_per_second": 91.808,
"eval_steps_per_second": 5.771,
"step": 702
},
{
"epoch": 39.46376811594203,
"grad_norm": 5.309926509857178,
"learning_rate": 2.5290697674418604e-06,
"loss": 0.0749,
"step": 710
},
{
"epoch": 40.0,
"grad_norm": 0.004375269636511803,
"learning_rate": 2.0930232558139536e-06,
"loss": 0.0637,
"step": 720
},
{
"epoch": 40.0,
"eval_accuracy": 0.9314285714285714,
"eval_loss": 0.2867558002471924,
"eval_runtime": 1.9415,
"eval_samples_per_second": 90.138,
"eval_steps_per_second": 5.666,
"step": 720
},
{
"epoch": 40.57971014492754,
"grad_norm": 8.937047004699707,
"learning_rate": 1.6569767441860467e-06,
"loss": 0.1183,
"step": 730
},
{
"epoch": 41.0,
"eval_accuracy": 0.92,
"eval_loss": 0.28917399048805237,
"eval_runtime": 1.9784,
"eval_samples_per_second": 88.457,
"eval_steps_per_second": 5.56,
"step": 738
},
{
"epoch": 41.11594202898551,
"grad_norm": 2.839174747467041,
"learning_rate": 1.2209302325581397e-06,
"loss": 0.0732,
"step": 740
},
{
"epoch": 41.69565217391305,
"grad_norm": 9.90665054321289,
"learning_rate": 7.848837209302327e-07,
"loss": 0.096,
"step": 750
},
{
"epoch": 42.0,
"eval_accuracy": 0.9142857142857143,
"eval_loss": 0.29103556275367737,
"eval_runtime": 2.0391,
"eval_samples_per_second": 85.824,
"eval_steps_per_second": 5.395,
"step": 756
},
{
"epoch": 42.231884057971016,
"grad_norm": 10.19823932647705,
"learning_rate": 3.4883720930232557e-07,
"loss": 0.0719,
"step": 760
},
{
"epoch": 42.52173913043478,
"eval_accuracy": 0.9142857142857143,
"eval_loss": 0.28974097967147827,
"eval_runtime": 2.2909,
"eval_samples_per_second": 76.39,
"eval_steps_per_second": 4.802,
"step": 765
},
{
"epoch": 42.52173913043478,
"step": 765,
"total_flos": 1.5068369042520146e+18,
"train_loss": 0.22573306322876924,
"train_runtime": 1056.0428,
"train_samples_per_second": 46.404,
"train_steps_per_second": 0.724
}
],
"logging_steps": 10,
"max_steps": 765,
"num_input_tokens_seen": 0,
"num_train_epochs": 45,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 1.5068369042520146e+18,
"train_batch_size": 16,
"trial_name": null,
"trial_params": null
}