Shawon16's picture
End of training
118cc9a verified
{
"best_metric": 0.9929411764705882,
"best_model_checkpoint": "/media/cse/HDD/Shawon/shawon/MY DATA/VideoMAE_BdSLW60_FrameRateCorrected_withAug_100/checkpoint-13466",
"epoch": 19.040078125,
"eval_steps": 500,
"global_step": 17955,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.004464285714285714,
"grad_norm": 11.12140941619873,
"learning_rate": 2.2321428571428573e-06,
"loss": 4.1557,
"step": 100
},
{
"epoch": 0.008928571428571428,
"grad_norm": 10.578296661376953,
"learning_rate": 4.464285714285715e-06,
"loss": 4.1159,
"step": 200
},
{
"epoch": 0.013392857142857142,
"grad_norm": 9.035299301147461,
"learning_rate": 6.696428571428572e-06,
"loss": 4.0848,
"step": 300
},
{
"epoch": 0.017857142857142856,
"grad_norm": 9.214325904846191,
"learning_rate": 8.92857142857143e-06,
"loss": 4.0703,
"step": 400
},
{
"epoch": 0.022321428571428572,
"grad_norm": 8.834626197814941,
"learning_rate": 1.1160714285714287e-05,
"loss": 4.0688,
"step": 500
},
{
"epoch": 0.026785714285714284,
"grad_norm": 10.655806541442871,
"learning_rate": 1.3392857142857144e-05,
"loss": 3.8577,
"step": 600
},
{
"epoch": 0.03125,
"grad_norm": 11.894658088684082,
"learning_rate": 1.5625e-05,
"loss": 3.4927,
"step": 700
},
{
"epoch": 0.03571428571428571,
"grad_norm": 13.875555992126465,
"learning_rate": 1.785714285714286e-05,
"loss": 3.0699,
"step": 800
},
{
"epoch": 0.040044642857142855,
"eval_accuracy": 0.4752941176470588,
"eval_loss": 2.454066514968872,
"eval_runtime": 290.1198,
"eval_samples_per_second": 2.93,
"eval_steps_per_second": 1.465,
"step": 897
},
{
"epoch": 1.0001004464285714,
"grad_norm": 11.632246017456055,
"learning_rate": 2.0089285714285717e-05,
"loss": 2.5881,
"step": 900
},
{
"epoch": 1.0045647321428572,
"grad_norm": 15.39003849029541,
"learning_rate": 2.2321428571428575e-05,
"loss": 2.2052,
"step": 1000
},
{
"epoch": 1.0090290178571428,
"grad_norm": 17.561227798461914,
"learning_rate": 2.455357142857143e-05,
"loss": 1.8017,
"step": 1100
},
{
"epoch": 1.0134933035714286,
"grad_norm": 16.368633270263672,
"learning_rate": 2.6785714285714288e-05,
"loss": 1.5213,
"step": 1200
},
{
"epoch": 1.0179575892857142,
"grad_norm": 18.419261932373047,
"learning_rate": 2.9017857142857146e-05,
"loss": 1.1462,
"step": 1300
},
{
"epoch": 1.022421875,
"grad_norm": 14.493526458740234,
"learning_rate": 3.125e-05,
"loss": 1.0545,
"step": 1400
},
{
"epoch": 1.0268861607142856,
"grad_norm": 15.404373168945312,
"learning_rate": 3.348214285714286e-05,
"loss": 0.7972,
"step": 1500
},
{
"epoch": 1.0313504464285714,
"grad_norm": 7.37654972076416,
"learning_rate": 3.571428571428572e-05,
"loss": 0.6743,
"step": 1600
},
{
"epoch": 1.0358147321428572,
"grad_norm": 17.836456298828125,
"learning_rate": 3.794642857142857e-05,
"loss": 0.6366,
"step": 1700
},
{
"epoch": 1.0400558035714287,
"eval_accuracy": 0.84,
"eval_loss": 0.6831679344177246,
"eval_runtime": 295.5183,
"eval_samples_per_second": 2.876,
"eval_steps_per_second": 1.438,
"step": 1795
},
{
"epoch": 2.000200892857143,
"grad_norm": 27.33871078491211,
"learning_rate": 4.017857142857143e-05,
"loss": 0.6165,
"step": 1800
},
{
"epoch": 2.0046651785714285,
"grad_norm": 1.4543864727020264,
"learning_rate": 4.2410714285714285e-05,
"loss": 0.4179,
"step": 1900
},
{
"epoch": 2.0091294642857145,
"grad_norm": 7.2733659744262695,
"learning_rate": 4.464285714285715e-05,
"loss": 0.4156,
"step": 2000
},
{
"epoch": 2.01359375,
"grad_norm": 21.995115280151367,
"learning_rate": 4.6875e-05,
"loss": 0.3666,
"step": 2100
},
{
"epoch": 2.0180580357142857,
"grad_norm": 19.265806198120117,
"learning_rate": 4.910714285714286e-05,
"loss": 0.3751,
"step": 2200
},
{
"epoch": 2.0225223214285712,
"grad_norm": 26.048490524291992,
"learning_rate": 4.985119047619048e-05,
"loss": 0.3401,
"step": 2300
},
{
"epoch": 2.0269866071428573,
"grad_norm": 26.414731979370117,
"learning_rate": 4.960317460317461e-05,
"loss": 0.2955,
"step": 2400
},
{
"epoch": 2.031450892857143,
"grad_norm": 17.34372901916504,
"learning_rate": 4.9355158730158735e-05,
"loss": 0.2859,
"step": 2500
},
{
"epoch": 2.0359151785714285,
"grad_norm": 3.029252767562866,
"learning_rate": 4.910714285714286e-05,
"loss": 0.2253,
"step": 2600
},
{
"epoch": 2.0400669642857143,
"eval_accuracy": 0.9023529411764706,
"eval_loss": 0.3464316725730896,
"eval_runtime": 282.6757,
"eval_samples_per_second": 3.007,
"eval_steps_per_second": 1.503,
"step": 2693
},
{
"epoch": 3.000301339285714,
"grad_norm": 11.130131721496582,
"learning_rate": 4.8859126984126984e-05,
"loss": 0.232,
"step": 2700
},
{
"epoch": 3.004765625,
"grad_norm": 3.47011661529541,
"learning_rate": 4.8611111111111115e-05,
"loss": 0.1247,
"step": 2800
},
{
"epoch": 3.0092299107142857,
"grad_norm": 18.701496124267578,
"learning_rate": 4.836309523809524e-05,
"loss": 0.1293,
"step": 2900
},
{
"epoch": 3.0136941964285713,
"grad_norm": 0.7256734371185303,
"learning_rate": 4.811507936507937e-05,
"loss": 0.1291,
"step": 3000
},
{
"epoch": 3.0181584821428573,
"grad_norm": 24.983957290649414,
"learning_rate": 4.7867063492063496e-05,
"loss": 0.195,
"step": 3100
},
{
"epoch": 3.022622767857143,
"grad_norm": 0.1959875524044037,
"learning_rate": 4.761904761904762e-05,
"loss": 0.0969,
"step": 3200
},
{
"epoch": 3.0270870535714285,
"grad_norm": 1.1051886081695557,
"learning_rate": 4.7371031746031745e-05,
"loss": 0.1691,
"step": 3300
},
{
"epoch": 3.031551339285714,
"grad_norm": 0.48205551505088806,
"learning_rate": 4.7123015873015876e-05,
"loss": 0.1297,
"step": 3400
},
{
"epoch": 3.036015625,
"grad_norm": 0.8840370774269104,
"learning_rate": 4.6875e-05,
"loss": 0.1229,
"step": 3500
},
{
"epoch": 3.040078125,
"eval_accuracy": 0.9647058823529412,
"eval_loss": 0.14670781791210175,
"eval_runtime": 285.475,
"eval_samples_per_second": 2.977,
"eval_steps_per_second": 1.489,
"step": 3591
},
{
"epoch": 4.000401785714286,
"grad_norm": 0.21204273402690887,
"learning_rate": 4.662698412698413e-05,
"loss": 0.1337,
"step": 3600
},
{
"epoch": 4.004866071428571,
"grad_norm": 2.2111618518829346,
"learning_rate": 4.637896825396826e-05,
"loss": 0.0821,
"step": 3700
},
{
"epoch": 4.009330357142857,
"grad_norm": 2.208402395248413,
"learning_rate": 4.613095238095239e-05,
"loss": 0.098,
"step": 3800
},
{
"epoch": 4.0137946428571425,
"grad_norm": 3.035139560699463,
"learning_rate": 4.5882936507936506e-05,
"loss": 0.0828,
"step": 3900
},
{
"epoch": 4.018258928571429,
"grad_norm": 0.06664509326219559,
"learning_rate": 4.563492063492064e-05,
"loss": 0.0705,
"step": 4000
},
{
"epoch": 4.0227232142857146,
"grad_norm": 0.049911659210920334,
"learning_rate": 4.538690476190476e-05,
"loss": 0.0506,
"step": 4100
},
{
"epoch": 4.0271875,
"grad_norm": 6.9254374504089355,
"learning_rate": 4.5138888888888894e-05,
"loss": 0.0895,
"step": 4200
},
{
"epoch": 4.031651785714286,
"grad_norm": 0.6636308431625366,
"learning_rate": 4.489087301587302e-05,
"loss": 0.0762,
"step": 4300
},
{
"epoch": 4.036116071428571,
"grad_norm": 0.07036083936691284,
"learning_rate": 4.464285714285715e-05,
"loss": 0.1045,
"step": 4400
},
{
"epoch": 4.040044642857143,
"eval_accuracy": 0.9635294117647059,
"eval_loss": 0.1458999365568161,
"eval_runtime": 292.1403,
"eval_samples_per_second": 2.91,
"eval_steps_per_second": 1.455,
"step": 4488
},
{
"epoch": 5.000502232142857,
"grad_norm": 25.948030471801758,
"learning_rate": 4.439484126984127e-05,
"loss": 0.1201,
"step": 4500
},
{
"epoch": 5.0049665178571425,
"grad_norm": 4.851236343383789,
"learning_rate": 4.41468253968254e-05,
"loss": 0.0751,
"step": 4600
},
{
"epoch": 5.009430803571429,
"grad_norm": 2.069117307662964,
"learning_rate": 4.3898809523809523e-05,
"loss": 0.06,
"step": 4700
},
{
"epoch": 5.013895089285715,
"grad_norm": 0.02893979474902153,
"learning_rate": 4.3650793650793655e-05,
"loss": 0.0583,
"step": 4800
},
{
"epoch": 5.018359375,
"grad_norm": 38.84079360961914,
"learning_rate": 4.340277777777778e-05,
"loss": 0.0854,
"step": 4900
},
{
"epoch": 5.022823660714286,
"grad_norm": 0.01713498868048191,
"learning_rate": 4.315476190476191e-05,
"loss": 0.1064,
"step": 5000
},
{
"epoch": 5.027287946428571,
"grad_norm": 2.2113935947418213,
"learning_rate": 4.290674603174603e-05,
"loss": 0.0534,
"step": 5100
},
{
"epoch": 5.031752232142857,
"grad_norm": 0.030846355482935905,
"learning_rate": 4.265873015873016e-05,
"loss": 0.0812,
"step": 5200
},
{
"epoch": 5.0362165178571425,
"grad_norm": 63.66303253173828,
"learning_rate": 4.2410714285714285e-05,
"loss": 0.0631,
"step": 5300
},
{
"epoch": 5.040055803571429,
"eval_accuracy": 0.971764705882353,
"eval_loss": 0.13126207888126373,
"eval_runtime": 282.9661,
"eval_samples_per_second": 3.004,
"eval_steps_per_second": 1.502,
"step": 5386
},
{
"epoch": 6.000602678571428,
"grad_norm": 0.01721133291721344,
"learning_rate": 4.2162698412698416e-05,
"loss": 0.1066,
"step": 5400
},
{
"epoch": 6.005066964285715,
"grad_norm": 0.06797400861978531,
"learning_rate": 4.191468253968254e-05,
"loss": 0.0751,
"step": 5500
},
{
"epoch": 6.00953125,
"grad_norm": 0.22653132677078247,
"learning_rate": 4.166666666666667e-05,
"loss": 0.0417,
"step": 5600
},
{
"epoch": 6.013995535714286,
"grad_norm": 0.07131924480199814,
"learning_rate": 4.14186507936508e-05,
"loss": 0.0158,
"step": 5700
},
{
"epoch": 6.018459821428571,
"grad_norm": 40.63113784790039,
"learning_rate": 4.117063492063492e-05,
"loss": 0.0522,
"step": 5800
},
{
"epoch": 6.022924107142857,
"grad_norm": 0.09443258494138718,
"learning_rate": 4.0922619047619046e-05,
"loss": 0.072,
"step": 5900
},
{
"epoch": 6.027388392857143,
"grad_norm": 0.5265907049179077,
"learning_rate": 4.067460317460318e-05,
"loss": 0.0318,
"step": 6000
},
{
"epoch": 6.031852678571428,
"grad_norm": 0.03210202232003212,
"learning_rate": 4.04265873015873e-05,
"loss": 0.0877,
"step": 6100
},
{
"epoch": 6.036316964285715,
"grad_norm": 0.34825244545936584,
"learning_rate": 4.017857142857143e-05,
"loss": 0.0736,
"step": 6200
},
{
"epoch": 6.040066964285714,
"eval_accuracy": 0.9635294117647059,
"eval_loss": 0.18067213892936707,
"eval_runtime": 285.3373,
"eval_samples_per_second": 2.979,
"eval_steps_per_second": 1.489,
"step": 6284
},
{
"epoch": 7.000703125,
"grad_norm": 0.006914912257343531,
"learning_rate": 3.993055555555556e-05,
"loss": 0.0283,
"step": 6300
},
{
"epoch": 7.005167410714286,
"grad_norm": 0.0338265560567379,
"learning_rate": 3.968253968253968e-05,
"loss": 0.0499,
"step": 6400
},
{
"epoch": 7.009631696428571,
"grad_norm": 10.877938270568848,
"learning_rate": 3.943452380952381e-05,
"loss": 0.0082,
"step": 6500
},
{
"epoch": 7.014095982142857,
"grad_norm": 0.10941223055124283,
"learning_rate": 3.918650793650794e-05,
"loss": 0.0657,
"step": 6600
},
{
"epoch": 7.018560267857143,
"grad_norm": 12.054357528686523,
"learning_rate": 3.893849206349206e-05,
"loss": 0.0609,
"step": 6700
},
{
"epoch": 7.023024553571428,
"grad_norm": 0.006210957653820515,
"learning_rate": 3.8690476190476195e-05,
"loss": 0.0486,
"step": 6800
},
{
"epoch": 7.027488839285715,
"grad_norm": 0.013958507217466831,
"learning_rate": 3.844246031746032e-05,
"loss": 0.0747,
"step": 6900
},
{
"epoch": 7.031953125,
"grad_norm": 14.515870094299316,
"learning_rate": 3.8194444444444444e-05,
"loss": 0.0343,
"step": 7000
},
{
"epoch": 7.036417410714286,
"grad_norm": 0.007723964750766754,
"learning_rate": 3.794642857142857e-05,
"loss": 0.0673,
"step": 7100
},
{
"epoch": 7.040078125,
"eval_accuracy": 0.9694117647058823,
"eval_loss": 0.14643678069114685,
"eval_runtime": 288.72,
"eval_samples_per_second": 2.944,
"eval_steps_per_second": 1.472,
"step": 7182
},
{
"epoch": 8.000803571428571,
"grad_norm": 45.418617248535156,
"learning_rate": 3.76984126984127e-05,
"loss": 0.0476,
"step": 7200
},
{
"epoch": 8.005267857142858,
"grad_norm": 0.008381331339478493,
"learning_rate": 3.7450396825396824e-05,
"loss": 0.0421,
"step": 7300
},
{
"epoch": 8.009732142857143,
"grad_norm": 0.7666055560112,
"learning_rate": 3.7202380952380956e-05,
"loss": 0.0832,
"step": 7400
},
{
"epoch": 8.014196428571429,
"grad_norm": 0.09307380765676498,
"learning_rate": 3.695436507936508e-05,
"loss": 0.0875,
"step": 7500
},
{
"epoch": 8.018660714285714,
"grad_norm": 0.012713397853076458,
"learning_rate": 3.6706349206349205e-05,
"loss": 0.0441,
"step": 7600
},
{
"epoch": 8.023125,
"grad_norm": 0.021006299182772636,
"learning_rate": 3.6458333333333336e-05,
"loss": 0.054,
"step": 7700
},
{
"epoch": 8.027589285714285,
"grad_norm": 0.1419028341770172,
"learning_rate": 3.621031746031746e-05,
"loss": 0.0608,
"step": 7800
},
{
"epoch": 8.032053571428571,
"grad_norm": 0.025018220767378807,
"learning_rate": 3.5962301587301586e-05,
"loss": 0.0479,
"step": 7900
},
{
"epoch": 8.036517857142858,
"grad_norm": 0.5912023186683655,
"learning_rate": 3.571428571428572e-05,
"loss": 0.0239,
"step": 8000
},
{
"epoch": 8.040044642857143,
"eval_accuracy": 0.9576470588235294,
"eval_loss": 0.193200945854187,
"eval_runtime": 279.9813,
"eval_samples_per_second": 3.036,
"eval_steps_per_second": 1.518,
"step": 8079
},
{
"epoch": 9.000904017857144,
"grad_norm": 0.0350213348865509,
"learning_rate": 3.546626984126984e-05,
"loss": 0.067,
"step": 8100
},
{
"epoch": 9.005368303571428,
"grad_norm": 2.537632465362549,
"learning_rate": 3.521825396825397e-05,
"loss": 0.0245,
"step": 8200
},
{
"epoch": 9.009832589285715,
"grad_norm": 2.564781665802002,
"learning_rate": 3.49702380952381e-05,
"loss": 0.0262,
"step": 8300
},
{
"epoch": 9.014296875,
"grad_norm": 0.00803827028721571,
"learning_rate": 3.472222222222222e-05,
"loss": 0.0559,
"step": 8400
},
{
"epoch": 9.018761160714286,
"grad_norm": 0.005816516932100058,
"learning_rate": 3.4474206349206354e-05,
"loss": 0.0519,
"step": 8500
},
{
"epoch": 9.02322544642857,
"grad_norm": 0.021420830860733986,
"learning_rate": 3.422619047619048e-05,
"loss": 0.032,
"step": 8600
},
{
"epoch": 9.027689732142857,
"grad_norm": 0.028336547315120697,
"learning_rate": 3.397817460317461e-05,
"loss": 0.0227,
"step": 8700
},
{
"epoch": 9.032154017857144,
"grad_norm": 0.02300655096769333,
"learning_rate": 3.3730158730158734e-05,
"loss": 0.0392,
"step": 8800
},
{
"epoch": 9.036618303571428,
"grad_norm": 0.05427232384681702,
"learning_rate": 3.348214285714286e-05,
"loss": 0.0868,
"step": 8900
},
{
"epoch": 9.040055803571429,
"eval_accuracy": 0.9882352941176471,
"eval_loss": 0.05633905157446861,
"eval_runtime": 285.433,
"eval_samples_per_second": 2.978,
"eval_steps_per_second": 1.489,
"step": 8977
},
{
"epoch": 10.001004464285714,
"grad_norm": 0.0491323284804821,
"learning_rate": 3.3234126984126983e-05,
"loss": 0.0618,
"step": 9000
},
{
"epoch": 10.00546875,
"grad_norm": 1.0003972053527832,
"learning_rate": 3.2986111111111115e-05,
"loss": 0.0202,
"step": 9100
},
{
"epoch": 10.009933035714285,
"grad_norm": 0.00252954987809062,
"learning_rate": 3.273809523809524e-05,
"loss": 0.0531,
"step": 9200
},
{
"epoch": 10.014397321428572,
"grad_norm": 9.270633697509766,
"learning_rate": 3.249007936507937e-05,
"loss": 0.035,
"step": 9300
},
{
"epoch": 10.018861607142858,
"grad_norm": 0.014138671569526196,
"learning_rate": 3.2242063492063495e-05,
"loss": 0.0392,
"step": 9400
},
{
"epoch": 10.023325892857143,
"grad_norm": 0.01277222577482462,
"learning_rate": 3.199404761904762e-05,
"loss": 0.059,
"step": 9500
},
{
"epoch": 10.02779017857143,
"grad_norm": 0.0034905134234577417,
"learning_rate": 3.1746031746031745e-05,
"loss": 0.0664,
"step": 9600
},
{
"epoch": 10.032254464285714,
"grad_norm": 0.0024051007349044085,
"learning_rate": 3.1498015873015876e-05,
"loss": 0.0286,
"step": 9700
},
{
"epoch": 10.03671875,
"grad_norm": 0.002095526549965143,
"learning_rate": 3.125e-05,
"loss": 0.0016,
"step": 9800
},
{
"epoch": 10.040066964285714,
"eval_accuracy": 0.9776470588235294,
"eval_loss": 0.08437661826610565,
"eval_runtime": 280.7764,
"eval_samples_per_second": 3.027,
"eval_steps_per_second": 1.514,
"step": 9875
},
{
"epoch": 11.001104910714286,
"grad_norm": 0.0019545548129826784,
"learning_rate": 3.100198412698413e-05,
"loss": 0.0109,
"step": 9900
},
{
"epoch": 11.00556919642857,
"grad_norm": 0.005866718012839556,
"learning_rate": 3.075396825396826e-05,
"loss": 0.0479,
"step": 10000
},
{
"epoch": 11.010033482142857,
"grad_norm": 0.012244959361851215,
"learning_rate": 3.0505952380952385e-05,
"loss": 0.0116,
"step": 10100
},
{
"epoch": 11.014497767857144,
"grad_norm": 0.004522031173110008,
"learning_rate": 3.0257936507936506e-05,
"loss": 0.025,
"step": 10200
},
{
"epoch": 11.018962053571428,
"grad_norm": 0.010159791447222233,
"learning_rate": 3.0009920634920634e-05,
"loss": 0.0036,
"step": 10300
},
{
"epoch": 11.023426339285715,
"grad_norm": 0.40824609994888306,
"learning_rate": 2.9761904761904762e-05,
"loss": 0.0933,
"step": 10400
},
{
"epoch": 11.027890625,
"grad_norm": 0.11058317124843597,
"learning_rate": 2.951388888888889e-05,
"loss": 0.0161,
"step": 10500
},
{
"epoch": 11.032354910714286,
"grad_norm": 1.2187433242797852,
"learning_rate": 2.9265873015873018e-05,
"loss": 0.0329,
"step": 10600
},
{
"epoch": 11.03681919642857,
"grad_norm": 0.020026879385113716,
"learning_rate": 2.9017857142857146e-05,
"loss": 0.0318,
"step": 10700
},
{
"epoch": 11.040078125,
"eval_accuracy": 0.9752941176470589,
"eval_loss": 0.11233757436275482,
"eval_runtime": 279.6949,
"eval_samples_per_second": 3.039,
"eval_steps_per_second": 1.52,
"step": 10773
},
{
"epoch": 12.001205357142856,
"grad_norm": 0.004233605694025755,
"learning_rate": 2.876984126984127e-05,
"loss": 0.0145,
"step": 10800
},
{
"epoch": 12.005669642857143,
"grad_norm": 0.0020020680967718363,
"learning_rate": 2.8521825396825395e-05,
"loss": 0.0022,
"step": 10900
},
{
"epoch": 12.01013392857143,
"grad_norm": 0.0010592287871986628,
"learning_rate": 2.8273809523809523e-05,
"loss": 0.0029,
"step": 11000
},
{
"epoch": 12.014598214285714,
"grad_norm": 0.01872986927628517,
"learning_rate": 2.802579365079365e-05,
"loss": 0.0352,
"step": 11100
},
{
"epoch": 12.0190625,
"grad_norm": 0.05156349390745163,
"learning_rate": 2.777777777777778e-05,
"loss": 0.0047,
"step": 11200
},
{
"epoch": 12.023526785714285,
"grad_norm": 0.00894691701978445,
"learning_rate": 2.7529761904761907e-05,
"loss": 0.0303,
"step": 11300
},
{
"epoch": 12.027991071428572,
"grad_norm": 0.004200028255581856,
"learning_rate": 2.7281746031746032e-05,
"loss": 0.0782,
"step": 11400
},
{
"epoch": 12.032455357142856,
"grad_norm": 0.008372528478503227,
"learning_rate": 2.703373015873016e-05,
"loss": 0.0154,
"step": 11500
},
{
"epoch": 12.036919642857143,
"grad_norm": 0.010021534748375416,
"learning_rate": 2.6785714285714288e-05,
"loss": 0.0144,
"step": 11600
},
{
"epoch": 12.040044642857143,
"eval_accuracy": 0.9894117647058823,
"eval_loss": 0.04987098649144173,
"eval_runtime": 331.781,
"eval_samples_per_second": 2.562,
"eval_steps_per_second": 1.281,
"step": 11670
},
{
"epoch": 13.001305803571428,
"grad_norm": 0.3831511437892914,
"learning_rate": 2.6537698412698416e-05,
"loss": 0.0175,
"step": 11700
},
{
"epoch": 13.005770089285715,
"grad_norm": 0.0010712681105360389,
"learning_rate": 2.628968253968254e-05,
"loss": 0.0281,
"step": 11800
},
{
"epoch": 13.010234375,
"grad_norm": 0.004961916245520115,
"learning_rate": 2.604166666666667e-05,
"loss": 0.0162,
"step": 11900
},
{
"epoch": 13.014698660714286,
"grad_norm": 0.3577312231063843,
"learning_rate": 2.5793650793650796e-05,
"loss": 0.0133,
"step": 12000
},
{
"epoch": 13.01916294642857,
"grad_norm": 0.0016846248181536794,
"learning_rate": 2.554563492063492e-05,
"loss": 0.0456,
"step": 12100
},
{
"epoch": 13.023627232142857,
"grad_norm": 0.005252454895526171,
"learning_rate": 2.529761904761905e-05,
"loss": 0.0043,
"step": 12200
},
{
"epoch": 13.028091517857144,
"grad_norm": 65.35294342041016,
"learning_rate": 2.5049603174603177e-05,
"loss": 0.0248,
"step": 12300
},
{
"epoch": 13.032555803571428,
"grad_norm": 0.0010413563577458262,
"learning_rate": 2.4801587301587305e-05,
"loss": 0.033,
"step": 12400
},
{
"epoch": 13.037020089285715,
"grad_norm": 28.086708068847656,
"learning_rate": 2.455357142857143e-05,
"loss": 0.0028,
"step": 12500
},
{
"epoch": 13.040055803571429,
"eval_accuracy": 0.9870588235294118,
"eval_loss": 0.08093971014022827,
"eval_runtime": 287.2538,
"eval_samples_per_second": 2.959,
"eval_steps_per_second": 1.48,
"step": 12568
},
{
"epoch": 14.00140625,
"grad_norm": 0.011327456682920456,
"learning_rate": 2.4305555555555558e-05,
"loss": 0.0203,
"step": 12600
},
{
"epoch": 14.005870535714285,
"grad_norm": 0.006360394414514303,
"learning_rate": 2.4057539682539686e-05,
"loss": 0.0009,
"step": 12700
},
{
"epoch": 14.010334821428572,
"grad_norm": 1.3321506977081299,
"learning_rate": 2.380952380952381e-05,
"loss": 0.0186,
"step": 12800
},
{
"epoch": 14.014799107142856,
"grad_norm": 0.0009386364254169166,
"learning_rate": 2.3561507936507938e-05,
"loss": 0.0048,
"step": 12900
},
{
"epoch": 14.019263392857143,
"grad_norm": 0.0016534485621377826,
"learning_rate": 2.3313492063492066e-05,
"loss": 0.037,
"step": 13000
},
{
"epoch": 14.02372767857143,
"grad_norm": 0.001421699533239007,
"learning_rate": 2.3065476190476194e-05,
"loss": 0.0111,
"step": 13100
},
{
"epoch": 14.028191964285714,
"grad_norm": 0.0014466221909970045,
"learning_rate": 2.281746031746032e-05,
"loss": 0.0169,
"step": 13200
},
{
"epoch": 14.03265625,
"grad_norm": 0.0036468463949859142,
"learning_rate": 2.2569444444444447e-05,
"loss": 0.019,
"step": 13300
},
{
"epoch": 14.037120535714285,
"grad_norm": 0.0012320175301283598,
"learning_rate": 2.2321428571428575e-05,
"loss": 0.0074,
"step": 13400
},
{
"epoch": 14.040066964285714,
"eval_accuracy": 0.9929411764705882,
"eval_loss": 0.045501772314310074,
"eval_runtime": 285.3107,
"eval_samples_per_second": 2.979,
"eval_steps_per_second": 1.49,
"step": 13466
},
{
"epoch": 15.00150669642857,
"grad_norm": 0.0006422046571969986,
"learning_rate": 2.20734126984127e-05,
"loss": 0.0202,
"step": 13500
},
{
"epoch": 15.005970982142857,
"grad_norm": 0.0008420124650001526,
"learning_rate": 2.1825396825396827e-05,
"loss": 0.0116,
"step": 13600
},
{
"epoch": 15.010435267857142,
"grad_norm": 0.018089979887008667,
"learning_rate": 2.1577380952380955e-05,
"loss": 0.0099,
"step": 13700
},
{
"epoch": 15.014899553571428,
"grad_norm": 0.0031337908003479242,
"learning_rate": 2.132936507936508e-05,
"loss": 0.0566,
"step": 13800
},
{
"epoch": 15.019363839285715,
"grad_norm": 0.0016157528152689338,
"learning_rate": 2.1081349206349208e-05,
"loss": 0.0212,
"step": 13900
},
{
"epoch": 15.023828125,
"grad_norm": 0.01456926204264164,
"learning_rate": 2.0833333333333336e-05,
"loss": 0.0003,
"step": 14000
},
{
"epoch": 15.028292410714286,
"grad_norm": 0.001924099400639534,
"learning_rate": 2.058531746031746e-05,
"loss": 0.0149,
"step": 14100
},
{
"epoch": 15.03275669642857,
"grad_norm": 0.0008741599158383906,
"learning_rate": 2.033730158730159e-05,
"loss": 0.0168,
"step": 14200
},
{
"epoch": 15.037220982142857,
"grad_norm": 0.06954433768987656,
"learning_rate": 2.0089285714285717e-05,
"loss": 0.0002,
"step": 14300
},
{
"epoch": 15.040078125,
"eval_accuracy": 0.9905882352941177,
"eval_loss": 0.058066971600055695,
"eval_runtime": 289.1743,
"eval_samples_per_second": 2.939,
"eval_steps_per_second": 1.47,
"step": 14364
},
{
"epoch": 16.001607142857143,
"grad_norm": 0.0014486366417258978,
"learning_rate": 1.984126984126984e-05,
"loss": 0.0063,
"step": 14400
},
{
"epoch": 16.006071428571428,
"grad_norm": 0.0007301854784600437,
"learning_rate": 1.959325396825397e-05,
"loss": 0.0186,
"step": 14500
},
{
"epoch": 16.010535714285716,
"grad_norm": 0.003457231679931283,
"learning_rate": 1.9345238095238097e-05,
"loss": 0.0236,
"step": 14600
},
{
"epoch": 16.015,
"grad_norm": 0.005807195790112019,
"learning_rate": 1.9097222222222222e-05,
"loss": 0.0183,
"step": 14700
},
{
"epoch": 16.019464285714285,
"grad_norm": 0.002843959955498576,
"learning_rate": 1.884920634920635e-05,
"loss": 0.0242,
"step": 14800
},
{
"epoch": 16.02392857142857,
"grad_norm": 0.37613585591316223,
"learning_rate": 1.8601190476190478e-05,
"loss": 0.0101,
"step": 14900
},
{
"epoch": 16.028392857142858,
"grad_norm": 0.0005575509858317673,
"learning_rate": 1.8353174603174602e-05,
"loss": 0.0109,
"step": 15000
},
{
"epoch": 16.032857142857143,
"grad_norm": 0.0006386680179275572,
"learning_rate": 1.810515873015873e-05,
"loss": 0.0013,
"step": 15100
},
{
"epoch": 16.037321428571428,
"grad_norm": 0.0010088573908433318,
"learning_rate": 1.785714285714286e-05,
"loss": 0.0077,
"step": 15200
},
{
"epoch": 16.040044642857143,
"eval_accuracy": 0.9894117647058823,
"eval_loss": 0.05021252483129501,
"eval_runtime": 284.0094,
"eval_samples_per_second": 2.993,
"eval_steps_per_second": 1.496,
"step": 15261
},
{
"epoch": 17.001707589285715,
"grad_norm": 0.0006336846854537725,
"learning_rate": 1.7609126984126986e-05,
"loss": 0.0212,
"step": 15300
},
{
"epoch": 17.006171875,
"grad_norm": 0.0005883209523744881,
"learning_rate": 1.736111111111111e-05,
"loss": 0.0095,
"step": 15400
},
{
"epoch": 17.010636160714284,
"grad_norm": 0.0021267228294163942,
"learning_rate": 1.711309523809524e-05,
"loss": 0.0113,
"step": 15500
},
{
"epoch": 17.015100446428573,
"grad_norm": 0.0009908992797136307,
"learning_rate": 1.6865079365079367e-05,
"loss": 0.0118,
"step": 15600
},
{
"epoch": 17.019564732142857,
"grad_norm": 0.000644190120510757,
"learning_rate": 1.6617063492063492e-05,
"loss": 0.0007,
"step": 15700
},
{
"epoch": 17.024029017857142,
"grad_norm": 0.0005113797378726304,
"learning_rate": 1.636904761904762e-05,
"loss": 0.01,
"step": 15800
},
{
"epoch": 17.02849330357143,
"grad_norm": 0.0008760132477618754,
"learning_rate": 1.6121031746031748e-05,
"loss": 0.0026,
"step": 15900
},
{
"epoch": 17.032957589285715,
"grad_norm": 0.00030510194483213127,
"learning_rate": 1.5873015873015872e-05,
"loss": 0.0015,
"step": 16000
},
{
"epoch": 17.037421875,
"grad_norm": 0.0004963899846188724,
"learning_rate": 1.5625e-05,
"loss": 0.0005,
"step": 16100
},
{
"epoch": 17.040055803571427,
"eval_accuracy": 0.9929411764705882,
"eval_loss": 0.04069099575281143,
"eval_runtime": 285.9614,
"eval_samples_per_second": 2.972,
"eval_steps_per_second": 1.486,
"step": 16159
},
{
"epoch": 18.001808035714287,
"grad_norm": 0.0015891814837232232,
"learning_rate": 1.537698412698413e-05,
"loss": 0.0376,
"step": 16200
},
{
"epoch": 18.006272321428572,
"grad_norm": 0.008500500582158566,
"learning_rate": 1.5128968253968253e-05,
"loss": 0.0203,
"step": 16300
},
{
"epoch": 18.010736607142857,
"grad_norm": 0.0030595629941672087,
"learning_rate": 1.4880952380952381e-05,
"loss": 0.0042,
"step": 16400
},
{
"epoch": 18.01520089285714,
"grad_norm": 1.0810060501098633,
"learning_rate": 1.4632936507936509e-05,
"loss": 0.017,
"step": 16500
},
{
"epoch": 18.01966517857143,
"grad_norm": 0.0005325720412656665,
"learning_rate": 1.4384920634920635e-05,
"loss": 0.0036,
"step": 16600
},
{
"epoch": 18.024129464285714,
"grad_norm": 0.0014920306857675314,
"learning_rate": 1.4136904761904762e-05,
"loss": 0.0236,
"step": 16700
},
{
"epoch": 18.02859375,
"grad_norm": 0.00048302882350981236,
"learning_rate": 1.388888888888889e-05,
"loss": 0.0127,
"step": 16800
},
{
"epoch": 18.033058035714287,
"grad_norm": 0.002715888200327754,
"learning_rate": 1.3640873015873016e-05,
"loss": 0.0146,
"step": 16900
},
{
"epoch": 18.037522321428572,
"grad_norm": 0.0004213691863697022,
"learning_rate": 1.3392857142857144e-05,
"loss": 0.0004,
"step": 17000
},
{
"epoch": 18.040066964285714,
"eval_accuracy": 0.9905882352941177,
"eval_loss": 0.05496314540505409,
"eval_runtime": 318.6642,
"eval_samples_per_second": 2.667,
"eval_steps_per_second": 1.334,
"step": 17057
},
{
"epoch": 19.001908482142856,
"grad_norm": 0.00044045469257980585,
"learning_rate": 1.314484126984127e-05,
"loss": 0.0143,
"step": 17100
},
{
"epoch": 19.006372767857144,
"grad_norm": 0.0004946400295011699,
"learning_rate": 1.2896825396825398e-05,
"loss": 0.0002,
"step": 17200
},
{
"epoch": 19.01083705357143,
"grad_norm": 0.014897634275257587,
"learning_rate": 1.2648809523809524e-05,
"loss": 0.0011,
"step": 17300
},
{
"epoch": 19.015301339285713,
"grad_norm": 0.015875551849603653,
"learning_rate": 1.2400793650793652e-05,
"loss": 0.0007,
"step": 17400
},
{
"epoch": 19.019765625,
"grad_norm": 0.0004391854163259268,
"learning_rate": 1.2152777777777779e-05,
"loss": 0.0068,
"step": 17500
},
{
"epoch": 19.024229910714286,
"grad_norm": 0.00046034177648834884,
"learning_rate": 1.1904761904761905e-05,
"loss": 0.0001,
"step": 17600
},
{
"epoch": 19.02869419642857,
"grad_norm": 0.0017288514645770192,
"learning_rate": 1.1656746031746033e-05,
"loss": 0.0001,
"step": 17700
},
{
"epoch": 19.033158482142856,
"grad_norm": 0.0026627290062606335,
"learning_rate": 1.140873015873016e-05,
"loss": 0.0001,
"step": 17800
},
{
"epoch": 19.037622767857144,
"grad_norm": 0.0004681396530941129,
"learning_rate": 1.1160714285714287e-05,
"loss": 0.0001,
"step": 17900
},
{
"epoch": 19.040078125,
"eval_accuracy": 0.9929411764705882,
"eval_loss": 0.05834496021270752,
"eval_runtime": 239.7594,
"eval_samples_per_second": 3.545,
"eval_steps_per_second": 1.773,
"step": 17955
},
{
"epoch": 19.040078125,
"step": 17955,
"total_flos": 1.7905236367909847e+20,
"train_loss": 0.295465733557037,
"train_runtime": 68646.4816,
"train_samples_per_second": 2.61,
"train_steps_per_second": 0.326
},
{
"epoch": 19.040078125,
"eval_accuracy": 0.9929411764705882,
"eval_loss": 0.04550177976489067,
"eval_runtime": 230.0122,
"eval_samples_per_second": 3.695,
"eval_steps_per_second": 1.848,
"step": 17955
},
{
"epoch": 19.040078125,
"eval_accuracy": 0.8973354231974922,
"eval_loss": 0.5587517619132996,
"eval_runtime": 352.6874,
"eval_samples_per_second": 3.618,
"eval_steps_per_second": 1.809,
"step": 17955
}
],
"logging_steps": 100,
"max_steps": 22400,
"num_input_tokens_seen": 0,
"num_train_epochs": 9223372036854775807,
"save_steps": 500,
"stateful_callbacks": {
"EarlyStoppingCallback": {
"args": {
"early_stopping_patience": 5,
"early_stopping_threshold": 0.0
},
"attributes": {
"early_stopping_patience_counter": 0
}
},
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 1.7905236367909847e+20,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}