RobertoSonic's picture
End of training
e3fa80a verified
{
"best_global_step": 750,
"best_metric": 0.9028571428571428,
"best_model_checkpoint": "swinv2-tiny-patch4-window8-256-dmae-humeda-DAV71/checkpoint-750",
"epoch": 56.0,
"eval_steps": 500,
"global_step": 840,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.6779661016949152,
"grad_norm": 3.3897862434387207,
"learning_rate": 6.428571428571429e-06,
"loss": 1.1246,
"step": 10
},
{
"epoch": 1.0,
"eval_accuracy": 0.41714285714285715,
"eval_loss": 1.05587899684906,
"eval_runtime": 2.0285,
"eval_samples_per_second": 86.269,
"eval_steps_per_second": 5.423,
"step": 15
},
{
"epoch": 1.3389830508474576,
"grad_norm": 4.0562825202941895,
"learning_rate": 1.3571428571428572e-05,
"loss": 1.0075,
"step": 20
},
{
"epoch": 2.0,
"grad_norm": 4.147009372711182,
"learning_rate": 2.0714285714285715e-05,
"loss": 0.8728,
"step": 30
},
{
"epoch": 2.0,
"eval_accuracy": 0.6971428571428572,
"eval_loss": 0.746044933795929,
"eval_runtime": 2.3392,
"eval_samples_per_second": 74.811,
"eval_steps_per_second": 4.702,
"step": 30
},
{
"epoch": 2.6779661016949152,
"grad_norm": 8.447574615478516,
"learning_rate": 2.7857142857142858e-05,
"loss": 0.6663,
"step": 40
},
{
"epoch": 3.0,
"eval_accuracy": 0.8057142857142857,
"eval_loss": 0.4562816619873047,
"eval_runtime": 1.9074,
"eval_samples_per_second": 91.749,
"eval_steps_per_second": 5.767,
"step": 45
},
{
"epoch": 3.3389830508474576,
"grad_norm": 11.307677268981934,
"learning_rate": 3.5000000000000004e-05,
"loss": 0.5432,
"step": 50
},
{
"epoch": 4.0,
"grad_norm": 8.582294464111328,
"learning_rate": 4.214285714285714e-05,
"loss": 0.4632,
"step": 60
},
{
"epoch": 4.0,
"eval_accuracy": 0.8285714285714286,
"eval_loss": 0.40758973360061646,
"eval_runtime": 1.8904,
"eval_samples_per_second": 92.573,
"eval_steps_per_second": 5.819,
"step": 60
},
{
"epoch": 4.677966101694915,
"grad_norm": 9.387711524963379,
"learning_rate": 4.928571428571428e-05,
"loss": 0.4278,
"step": 70
},
{
"epoch": 5.0,
"eval_accuracy": 0.84,
"eval_loss": 0.3669876456260681,
"eval_runtime": 2.6198,
"eval_samples_per_second": 66.799,
"eval_steps_per_second": 4.199,
"step": 75
},
{
"epoch": 5.338983050847458,
"grad_norm": 8.6371431350708,
"learning_rate": 5.642857142857143e-05,
"loss": 0.3608,
"step": 80
},
{
"epoch": 6.0,
"grad_norm": 11.995199203491211,
"learning_rate": 5.96031746031746e-05,
"loss": 0.361,
"step": 90
},
{
"epoch": 6.0,
"eval_accuracy": 0.8457142857142858,
"eval_loss": 0.36241406202316284,
"eval_runtime": 1.9339,
"eval_samples_per_second": 90.49,
"eval_steps_per_second": 5.688,
"step": 90
},
{
"epoch": 6.677966101694915,
"grad_norm": 10.142115592956543,
"learning_rate": 5.880952380952381e-05,
"loss": 0.3742,
"step": 100
},
{
"epoch": 7.0,
"eval_accuracy": 0.8628571428571429,
"eval_loss": 0.3504450023174286,
"eval_runtime": 1.8843,
"eval_samples_per_second": 92.872,
"eval_steps_per_second": 5.838,
"step": 105
},
{
"epoch": 7.338983050847458,
"grad_norm": 11.923420906066895,
"learning_rate": 5.801587301587302e-05,
"loss": 0.2351,
"step": 110
},
{
"epoch": 8.0,
"grad_norm": 4.905794620513916,
"learning_rate": 5.722222222222223e-05,
"loss": 0.3313,
"step": 120
},
{
"epoch": 8.0,
"eval_accuracy": 0.8628571428571429,
"eval_loss": 0.2962282598018646,
"eval_runtime": 2.4213,
"eval_samples_per_second": 72.274,
"eval_steps_per_second": 4.543,
"step": 120
},
{
"epoch": 8.677966101694915,
"grad_norm": 8.893570899963379,
"learning_rate": 5.642857142857143e-05,
"loss": 0.2977,
"step": 130
},
{
"epoch": 9.0,
"eval_accuracy": 0.8685714285714285,
"eval_loss": 0.33206191658973694,
"eval_runtime": 1.9246,
"eval_samples_per_second": 90.929,
"eval_steps_per_second": 5.716,
"step": 135
},
{
"epoch": 9.338983050847457,
"grad_norm": 12.26343822479248,
"learning_rate": 5.563492063492064e-05,
"loss": 0.3156,
"step": 140
},
{
"epoch": 10.0,
"grad_norm": 6.5488362312316895,
"learning_rate": 5.4841269841269845e-05,
"loss": 0.2589,
"step": 150
},
{
"epoch": 10.0,
"eval_accuracy": 0.8628571428571429,
"eval_loss": 0.34251242876052856,
"eval_runtime": 1.9199,
"eval_samples_per_second": 91.152,
"eval_steps_per_second": 5.73,
"step": 150
},
{
"epoch": 10.677966101694915,
"grad_norm": 5.538010120391846,
"learning_rate": 5.404761904761905e-05,
"loss": 0.2477,
"step": 160
},
{
"epoch": 11.0,
"eval_accuracy": 0.8457142857142858,
"eval_loss": 0.39819851517677307,
"eval_runtime": 1.9172,
"eval_samples_per_second": 91.281,
"eval_steps_per_second": 5.738,
"step": 165
},
{
"epoch": 11.338983050847457,
"grad_norm": 6.982760906219482,
"learning_rate": 5.333333333333333e-05,
"loss": 0.2527,
"step": 170
},
{
"epoch": 12.0,
"grad_norm": 8.375303268432617,
"learning_rate": 5.253968253968254e-05,
"loss": 0.2187,
"step": 180
},
{
"epoch": 12.0,
"eval_accuracy": 0.8514285714285714,
"eval_loss": 0.5953956246376038,
"eval_runtime": 2.0286,
"eval_samples_per_second": 86.268,
"eval_steps_per_second": 5.423,
"step": 180
},
{
"epoch": 12.677966101694915,
"grad_norm": 7.4971723556518555,
"learning_rate": 5.174603174603175e-05,
"loss": 0.2342,
"step": 190
},
{
"epoch": 13.0,
"eval_accuracy": 0.8514285714285714,
"eval_loss": 0.3745245933532715,
"eval_runtime": 1.8701,
"eval_samples_per_second": 93.58,
"eval_steps_per_second": 5.882,
"step": 195
},
{
"epoch": 13.338983050847457,
"grad_norm": 6.758434772491455,
"learning_rate": 5.095238095238095e-05,
"loss": 0.2354,
"step": 200
},
{
"epoch": 14.0,
"grad_norm": 4.655900001525879,
"learning_rate": 5.015873015873016e-05,
"loss": 0.2444,
"step": 210
},
{
"epoch": 14.0,
"eval_accuracy": 0.8628571428571429,
"eval_loss": 0.5219993591308594,
"eval_runtime": 2.5324,
"eval_samples_per_second": 69.106,
"eval_steps_per_second": 4.344,
"step": 210
},
{
"epoch": 14.677966101694915,
"grad_norm": 8.788654327392578,
"learning_rate": 4.936507936507937e-05,
"loss": 0.2067,
"step": 220
},
{
"epoch": 15.0,
"eval_accuracy": 0.8457142857142858,
"eval_loss": 0.44333723187446594,
"eval_runtime": 1.9312,
"eval_samples_per_second": 90.617,
"eval_steps_per_second": 5.696,
"step": 225
},
{
"epoch": 15.338983050847457,
"grad_norm": 8.221491813659668,
"learning_rate": 4.8571428571428576e-05,
"loss": 0.2097,
"step": 230
},
{
"epoch": 16.0,
"grad_norm": 5.007316589355469,
"learning_rate": 4.777777777777778e-05,
"loss": 0.1882,
"step": 240
},
{
"epoch": 16.0,
"eval_accuracy": 0.8628571428571429,
"eval_loss": 0.3937312960624695,
"eval_runtime": 1.922,
"eval_samples_per_second": 91.052,
"eval_steps_per_second": 5.723,
"step": 240
},
{
"epoch": 16.677966101694913,
"grad_norm": 9.630002975463867,
"learning_rate": 4.6984126984126986e-05,
"loss": 0.199,
"step": 250
},
{
"epoch": 17.0,
"eval_accuracy": 0.8628571428571429,
"eval_loss": 0.5102602243423462,
"eval_runtime": 2.5902,
"eval_samples_per_second": 67.563,
"eval_steps_per_second": 4.247,
"step": 255
},
{
"epoch": 17.338983050847457,
"grad_norm": 5.763312339782715,
"learning_rate": 4.6190476190476194e-05,
"loss": 0.174,
"step": 260
},
{
"epoch": 18.0,
"grad_norm": 2.7853012084960938,
"learning_rate": 4.53968253968254e-05,
"loss": 0.1565,
"step": 270
},
{
"epoch": 18.0,
"eval_accuracy": 0.8857142857142857,
"eval_loss": 0.36082425713539124,
"eval_runtime": 1.8955,
"eval_samples_per_second": 92.326,
"eval_steps_per_second": 5.803,
"step": 270
},
{
"epoch": 18.677966101694913,
"grad_norm": 7.840061187744141,
"learning_rate": 4.4603174603174604e-05,
"loss": 0.2068,
"step": 280
},
{
"epoch": 19.0,
"eval_accuracy": 0.88,
"eval_loss": 0.3678865134716034,
"eval_runtime": 1.914,
"eval_samples_per_second": 91.43,
"eval_steps_per_second": 5.747,
"step": 285
},
{
"epoch": 19.338983050847457,
"grad_norm": 10.269192695617676,
"learning_rate": 4.3809523809523805e-05,
"loss": 0.1742,
"step": 290
},
{
"epoch": 20.0,
"grad_norm": 11.602302551269531,
"learning_rate": 4.301587301587302e-05,
"loss": 0.194,
"step": 300
},
{
"epoch": 20.0,
"eval_accuracy": 0.8457142857142858,
"eval_loss": 0.5581231117248535,
"eval_runtime": 2.5904,
"eval_samples_per_second": 67.556,
"eval_steps_per_second": 4.246,
"step": 300
},
{
"epoch": 20.677966101694913,
"grad_norm": 4.199820518493652,
"learning_rate": 4.222222222222222e-05,
"loss": 0.1654,
"step": 310
},
{
"epoch": 21.0,
"eval_accuracy": 0.8685714285714285,
"eval_loss": 0.5074398517608643,
"eval_runtime": 1.9301,
"eval_samples_per_second": 90.667,
"eval_steps_per_second": 5.699,
"step": 315
},
{
"epoch": 21.338983050847457,
"grad_norm": 8.24092960357666,
"learning_rate": 4.142857142857143e-05,
"loss": 0.1393,
"step": 320
},
{
"epoch": 22.0,
"grad_norm": 6.02392053604126,
"learning_rate": 4.063492063492063e-05,
"loss": 0.1986,
"step": 330
},
{
"epoch": 22.0,
"eval_accuracy": 0.88,
"eval_loss": 0.4395482540130615,
"eval_runtime": 1.9058,
"eval_samples_per_second": 91.826,
"eval_steps_per_second": 5.772,
"step": 330
},
{
"epoch": 22.677966101694913,
"grad_norm": 5.646173000335693,
"learning_rate": 3.9841269841269846e-05,
"loss": 0.1257,
"step": 340
},
{
"epoch": 23.0,
"eval_accuracy": 0.8685714285714285,
"eval_loss": 0.42931947112083435,
"eval_runtime": 2.3278,
"eval_samples_per_second": 75.179,
"eval_steps_per_second": 4.726,
"step": 345
},
{
"epoch": 23.338983050847457,
"grad_norm": 7.199140548706055,
"learning_rate": 3.904761904761905e-05,
"loss": 0.1364,
"step": 350
},
{
"epoch": 24.0,
"grad_norm": 2.1807098388671875,
"learning_rate": 3.8253968253968256e-05,
"loss": 0.1976,
"step": 360
},
{
"epoch": 24.0,
"eval_accuracy": 0.8571428571428571,
"eval_loss": 0.4932045638561249,
"eval_runtime": 1.9503,
"eval_samples_per_second": 89.731,
"eval_steps_per_second": 5.64,
"step": 360
},
{
"epoch": 24.677966101694913,
"grad_norm": 3.774115562438965,
"learning_rate": 3.746031746031746e-05,
"loss": 0.1563,
"step": 370
},
{
"epoch": 25.0,
"eval_accuracy": 0.8857142857142857,
"eval_loss": 0.42544516921043396,
"eval_runtime": 1.9489,
"eval_samples_per_second": 89.793,
"eval_steps_per_second": 5.644,
"step": 375
},
{
"epoch": 25.338983050847457,
"grad_norm": 5.813924789428711,
"learning_rate": 3.666666666666667e-05,
"loss": 0.1537,
"step": 380
},
{
"epoch": 26.0,
"grad_norm": 5.670418739318848,
"learning_rate": 3.5873015873015874e-05,
"loss": 0.0985,
"step": 390
},
{
"epoch": 26.0,
"eval_accuracy": 0.8685714285714285,
"eval_loss": 0.5096610188484192,
"eval_runtime": 2.2632,
"eval_samples_per_second": 77.323,
"eval_steps_per_second": 4.86,
"step": 390
},
{
"epoch": 26.677966101694913,
"grad_norm": 7.973656177520752,
"learning_rate": 3.507936507936508e-05,
"loss": 0.1238,
"step": 400
},
{
"epoch": 27.0,
"eval_accuracy": 0.8514285714285714,
"eval_loss": 0.7264113426208496,
"eval_runtime": 2.2954,
"eval_samples_per_second": 76.239,
"eval_steps_per_second": 4.792,
"step": 405
},
{
"epoch": 27.338983050847457,
"grad_norm": 5.4732866287231445,
"learning_rate": 3.4285714285714284e-05,
"loss": 0.1385,
"step": 410
},
{
"epoch": 28.0,
"grad_norm": 4.48883056640625,
"learning_rate": 3.34920634920635e-05,
"loss": 0.1577,
"step": 420
},
{
"epoch": 28.0,
"eval_accuracy": 0.8571428571428571,
"eval_loss": 0.4826878011226654,
"eval_runtime": 1.9183,
"eval_samples_per_second": 91.227,
"eval_steps_per_second": 5.734,
"step": 420
},
{
"epoch": 28.677966101694913,
"grad_norm": 4.5706787109375,
"learning_rate": 3.26984126984127e-05,
"loss": 0.1271,
"step": 430
},
{
"epoch": 29.0,
"eval_accuracy": 0.8685714285714285,
"eval_loss": 0.530450165271759,
"eval_runtime": 2.147,
"eval_samples_per_second": 81.509,
"eval_steps_per_second": 5.123,
"step": 435
},
{
"epoch": 29.338983050847457,
"grad_norm": 8.417387962341309,
"learning_rate": 3.190476190476191e-05,
"loss": 0.1171,
"step": 440
},
{
"epoch": 30.0,
"grad_norm": 1.3924190998077393,
"learning_rate": 3.111111111111111e-05,
"loss": 0.1002,
"step": 450
},
{
"epoch": 30.0,
"eval_accuracy": 0.8628571428571429,
"eval_loss": 0.5888301134109497,
"eval_runtime": 1.8837,
"eval_samples_per_second": 92.902,
"eval_steps_per_second": 5.84,
"step": 450
},
{
"epoch": 30.677966101694913,
"grad_norm": 4.39676570892334,
"learning_rate": 3.031746031746032e-05,
"loss": 0.1268,
"step": 460
},
{
"epoch": 31.0,
"eval_accuracy": 0.8571428571428571,
"eval_loss": 0.6432701945304871,
"eval_runtime": 1.9204,
"eval_samples_per_second": 91.126,
"eval_steps_per_second": 5.728,
"step": 465
},
{
"epoch": 31.338983050847457,
"grad_norm": 6.017373561859131,
"learning_rate": 2.9523809523809523e-05,
"loss": 0.1077,
"step": 470
},
{
"epoch": 32.0,
"grad_norm": 5.33542013168335,
"learning_rate": 2.873015873015873e-05,
"loss": 0.1153,
"step": 480
},
{
"epoch": 32.0,
"eval_accuracy": 0.8342857142857143,
"eval_loss": 0.8394165635108948,
"eval_runtime": 2.2924,
"eval_samples_per_second": 76.34,
"eval_steps_per_second": 4.798,
"step": 480
},
{
"epoch": 32.67796610169491,
"grad_norm": 13.854134559631348,
"learning_rate": 2.7936507936507936e-05,
"loss": 0.1191,
"step": 490
},
{
"epoch": 33.0,
"eval_accuracy": 0.84,
"eval_loss": 0.747542142868042,
"eval_runtime": 1.9178,
"eval_samples_per_second": 91.249,
"eval_steps_per_second": 5.736,
"step": 495
},
{
"epoch": 33.33898305084746,
"grad_norm": 8.244441986083984,
"learning_rate": 2.7142857142857144e-05,
"loss": 0.1271,
"step": 500
},
{
"epoch": 34.0,
"grad_norm": 2.1750755310058594,
"learning_rate": 2.634920634920635e-05,
"loss": 0.1184,
"step": 510
},
{
"epoch": 34.0,
"eval_accuracy": 0.8742857142857143,
"eval_loss": 0.4883846938610077,
"eval_runtime": 1.8782,
"eval_samples_per_second": 93.173,
"eval_steps_per_second": 5.857,
"step": 510
},
{
"epoch": 34.67796610169491,
"grad_norm": 9.822646141052246,
"learning_rate": 2.5555555555555557e-05,
"loss": 0.1332,
"step": 520
},
{
"epoch": 35.0,
"eval_accuracy": 0.8857142857142857,
"eval_loss": 0.5834174752235413,
"eval_runtime": 2.1992,
"eval_samples_per_second": 79.573,
"eval_steps_per_second": 5.002,
"step": 525
},
{
"epoch": 35.33898305084746,
"grad_norm": 5.659489154815674,
"learning_rate": 2.4761904761904762e-05,
"loss": 0.0804,
"step": 530
},
{
"epoch": 36.0,
"grad_norm": 5.710267066955566,
"learning_rate": 2.396825396825397e-05,
"loss": 0.1071,
"step": 540
},
{
"epoch": 36.0,
"eval_accuracy": 0.8571428571428571,
"eval_loss": 0.627878725528717,
"eval_runtime": 1.9363,
"eval_samples_per_second": 90.377,
"eval_steps_per_second": 5.681,
"step": 540
},
{
"epoch": 36.67796610169491,
"grad_norm": 6.440234661102295,
"learning_rate": 2.3174603174603175e-05,
"loss": 0.0886,
"step": 550
},
{
"epoch": 37.0,
"eval_accuracy": 0.8628571428571429,
"eval_loss": 0.6998600363731384,
"eval_runtime": 1.9136,
"eval_samples_per_second": 91.451,
"eval_steps_per_second": 5.748,
"step": 555
},
{
"epoch": 37.33898305084746,
"grad_norm": 4.174771785736084,
"learning_rate": 2.238095238095238e-05,
"loss": 0.0845,
"step": 560
},
{
"epoch": 38.0,
"grad_norm": 0.6355146765708923,
"learning_rate": 2.1587301587301585e-05,
"loss": 0.0744,
"step": 570
},
{
"epoch": 38.0,
"eval_accuracy": 0.8685714285714285,
"eval_loss": 0.729452908039093,
"eval_runtime": 2.1953,
"eval_samples_per_second": 79.716,
"eval_steps_per_second": 5.011,
"step": 570
},
{
"epoch": 38.67796610169491,
"grad_norm": 12.643016815185547,
"learning_rate": 2.0793650793650793e-05,
"loss": 0.1274,
"step": 580
},
{
"epoch": 39.0,
"eval_accuracy": 0.8914285714285715,
"eval_loss": 0.6137147545814514,
"eval_runtime": 1.9072,
"eval_samples_per_second": 91.758,
"eval_steps_per_second": 5.768,
"step": 585
},
{
"epoch": 39.33898305084746,
"grad_norm": 8.217287063598633,
"learning_rate": 1.9999999999999998e-05,
"loss": 0.0905,
"step": 590
},
{
"epoch": 40.0,
"grad_norm": 2.896934986114502,
"learning_rate": 1.9206349206349206e-05,
"loss": 0.0795,
"step": 600
},
{
"epoch": 40.0,
"eval_accuracy": 0.8742857142857143,
"eval_loss": 0.5706229209899902,
"eval_runtime": 2.3574,
"eval_samples_per_second": 74.235,
"eval_steps_per_second": 4.666,
"step": 600
},
{
"epoch": 40.67796610169491,
"grad_norm": 8.571166038513184,
"learning_rate": 1.841269841269841e-05,
"loss": 0.0962,
"step": 610
},
{
"epoch": 41.0,
"eval_accuracy": 0.8857142857142857,
"eval_loss": 0.6100188493728638,
"eval_runtime": 2.0136,
"eval_samples_per_second": 86.908,
"eval_steps_per_second": 5.463,
"step": 615
},
{
"epoch": 41.33898305084746,
"grad_norm": 4.700484752655029,
"learning_rate": 1.761904761904762e-05,
"loss": 0.0749,
"step": 620
},
{
"epoch": 42.0,
"grad_norm": 2.4115490913391113,
"learning_rate": 1.6825396825396824e-05,
"loss": 0.094,
"step": 630
},
{
"epoch": 42.0,
"eval_accuracy": 0.8742857142857143,
"eval_loss": 0.6148616075515747,
"eval_runtime": 1.9168,
"eval_samples_per_second": 91.298,
"eval_steps_per_second": 5.739,
"step": 630
},
{
"epoch": 42.67796610169491,
"grad_norm": 4.732550144195557,
"learning_rate": 1.6031746031746033e-05,
"loss": 0.0945,
"step": 640
},
{
"epoch": 43.0,
"eval_accuracy": 0.88,
"eval_loss": 0.5688998699188232,
"eval_runtime": 2.0595,
"eval_samples_per_second": 84.974,
"eval_steps_per_second": 5.341,
"step": 645
},
{
"epoch": 43.33898305084746,
"grad_norm": 4.677188396453857,
"learning_rate": 1.5238095238095238e-05,
"loss": 0.0851,
"step": 650
},
{
"epoch": 44.0,
"grad_norm": 6.458128452301025,
"learning_rate": 1.4444444444444444e-05,
"loss": 0.0584,
"step": 660
},
{
"epoch": 44.0,
"eval_accuracy": 0.8742857142857143,
"eval_loss": 0.7018650770187378,
"eval_runtime": 2.3309,
"eval_samples_per_second": 75.079,
"eval_steps_per_second": 4.719,
"step": 660
},
{
"epoch": 44.67796610169491,
"grad_norm": 4.308237552642822,
"learning_rate": 1.365079365079365e-05,
"loss": 0.0676,
"step": 670
},
{
"epoch": 45.0,
"eval_accuracy": 0.88,
"eval_loss": 0.6934124231338501,
"eval_runtime": 1.9251,
"eval_samples_per_second": 90.902,
"eval_steps_per_second": 5.714,
"step": 675
},
{
"epoch": 45.33898305084746,
"grad_norm": 2.8312790393829346,
"learning_rate": 1.2857142857142857e-05,
"loss": 0.0893,
"step": 680
},
{
"epoch": 46.0,
"grad_norm": 7.0031328201293945,
"learning_rate": 1.2063492063492064e-05,
"loss": 0.0763,
"step": 690
},
{
"epoch": 46.0,
"eval_accuracy": 0.8914285714285715,
"eval_loss": 0.6047118902206421,
"eval_runtime": 2.0296,
"eval_samples_per_second": 86.224,
"eval_steps_per_second": 5.42,
"step": 690
},
{
"epoch": 46.67796610169491,
"grad_norm": 8.401297569274902,
"learning_rate": 1.126984126984127e-05,
"loss": 0.0762,
"step": 700
},
{
"epoch": 47.0,
"eval_accuracy": 0.88,
"eval_loss": 0.6063617467880249,
"eval_runtime": 1.8566,
"eval_samples_per_second": 94.259,
"eval_steps_per_second": 5.925,
"step": 705
},
{
"epoch": 47.33898305084746,
"grad_norm": 1.462274432182312,
"learning_rate": 1.0476190476190475e-05,
"loss": 0.0563,
"step": 710
},
{
"epoch": 48.0,
"grad_norm": 1.8739376068115234,
"learning_rate": 9.682539682539682e-06,
"loss": 0.0696,
"step": 720
},
{
"epoch": 48.0,
"eval_accuracy": 0.8685714285714285,
"eval_loss": 0.7335702776908875,
"eval_runtime": 1.911,
"eval_samples_per_second": 91.576,
"eval_steps_per_second": 5.756,
"step": 720
},
{
"epoch": 48.67796610169491,
"grad_norm": 2.9189000129699707,
"learning_rate": 8.888888888888888e-06,
"loss": 0.0555,
"step": 730
},
{
"epoch": 49.0,
"eval_accuracy": 0.8742857142857143,
"eval_loss": 0.6598544120788574,
"eval_runtime": 1.9333,
"eval_samples_per_second": 90.519,
"eval_steps_per_second": 5.69,
"step": 735
},
{
"epoch": 49.33898305084746,
"grad_norm": 3.1225035190582275,
"learning_rate": 8.095238095238095e-06,
"loss": 0.1129,
"step": 740
},
{
"epoch": 50.0,
"grad_norm": 2.0588467121124268,
"learning_rate": 7.301587301587301e-06,
"loss": 0.0572,
"step": 750
},
{
"epoch": 50.0,
"eval_accuracy": 0.9028571428571428,
"eval_loss": 0.597748875617981,
"eval_runtime": 2.541,
"eval_samples_per_second": 68.87,
"eval_steps_per_second": 4.329,
"step": 750
},
{
"epoch": 50.67796610169491,
"grad_norm": 2.578906536102295,
"learning_rate": 6.507936507936508e-06,
"loss": 0.0648,
"step": 760
},
{
"epoch": 51.0,
"eval_accuracy": 0.88,
"eval_loss": 0.6257001757621765,
"eval_runtime": 1.8911,
"eval_samples_per_second": 92.541,
"eval_steps_per_second": 5.817,
"step": 765
},
{
"epoch": 51.33898305084746,
"grad_norm": 3.871882677078247,
"learning_rate": 5.7142857142857145e-06,
"loss": 0.0521,
"step": 770
},
{
"epoch": 52.0,
"grad_norm": 3.6807923316955566,
"learning_rate": 4.92063492063492e-06,
"loss": 0.0705,
"step": 780
},
{
"epoch": 52.0,
"eval_accuracy": 0.8857142857142857,
"eval_loss": 0.6653619408607483,
"eval_runtime": 1.8778,
"eval_samples_per_second": 93.193,
"eval_steps_per_second": 5.858,
"step": 780
},
{
"epoch": 52.67796610169491,
"grad_norm": 6.3525519371032715,
"learning_rate": 4.126984126984127e-06,
"loss": 0.0646,
"step": 790
},
{
"epoch": 53.0,
"eval_accuracy": 0.8685714285714285,
"eval_loss": 0.6813338994979858,
"eval_runtime": 1.8783,
"eval_samples_per_second": 93.171,
"eval_steps_per_second": 5.856,
"step": 795
},
{
"epoch": 53.33898305084746,
"grad_norm": 5.389460563659668,
"learning_rate": 3.3333333333333333e-06,
"loss": 0.0463,
"step": 800
},
{
"epoch": 54.0,
"grad_norm": 0.5001619458198547,
"learning_rate": 2.5396825396825395e-06,
"loss": 0.0795,
"step": 810
},
{
"epoch": 54.0,
"eval_accuracy": 0.8742857142857143,
"eval_loss": 0.6209337711334229,
"eval_runtime": 2.6137,
"eval_samples_per_second": 66.955,
"eval_steps_per_second": 4.209,
"step": 810
},
{
"epoch": 54.67796610169491,
"grad_norm": 9.643752098083496,
"learning_rate": 1.746031746031746e-06,
"loss": 0.0828,
"step": 820
},
{
"epoch": 55.0,
"eval_accuracy": 0.8742857142857143,
"eval_loss": 0.6456648707389832,
"eval_runtime": 1.8674,
"eval_samples_per_second": 93.711,
"eval_steps_per_second": 5.89,
"step": 825
},
{
"epoch": 55.33898305084746,
"grad_norm": 7.717844486236572,
"learning_rate": 9.523809523809523e-07,
"loss": 0.0916,
"step": 830
},
{
"epoch": 56.0,
"grad_norm": 3.563279390335083,
"learning_rate": 1.5873015873015872e-07,
"loss": 0.0674,
"step": 840
},
{
"epoch": 56.0,
"eval_accuracy": 0.88,
"eval_loss": 0.6521316766738892,
"eval_runtime": 1.8761,
"eval_samples_per_second": 93.278,
"eval_steps_per_second": 5.863,
"step": 840
},
{
"epoch": 56.0,
"step": 840,
"total_flos": 1.7108328318259692e+18,
"train_loss": 0.19382851386354083,
"train_runtime": 1219.347,
"train_samples_per_second": 46.205,
"train_steps_per_second": 0.689
}
],
"logging_steps": 10,
"max_steps": 840,
"num_input_tokens_seen": 0,
"num_train_epochs": 60,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 1.7108328318259692e+18,
"train_batch_size": 16,
"trial_name": null,
"trial_params": null
}