{ "best_metric": 0.9929411764705882, "best_model_checkpoint": "/media/cse/HDD/Shawon/shawon/MY DATA/VideoMAE_BdSLW60_FrameRateCorrected_withAug_100/checkpoint-13466", "epoch": 19.040078125, "eval_steps": 500, "global_step": 17955, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.004464285714285714, "grad_norm": 11.12140941619873, "learning_rate": 2.2321428571428573e-06, "loss": 4.1557, "step": 100 }, { "epoch": 0.008928571428571428, "grad_norm": 10.578296661376953, "learning_rate": 4.464285714285715e-06, "loss": 4.1159, "step": 200 }, { "epoch": 0.013392857142857142, "grad_norm": 9.035299301147461, "learning_rate": 6.696428571428572e-06, "loss": 4.0848, "step": 300 }, { "epoch": 0.017857142857142856, "grad_norm": 9.214325904846191, "learning_rate": 8.92857142857143e-06, "loss": 4.0703, "step": 400 }, { "epoch": 0.022321428571428572, "grad_norm": 8.834626197814941, "learning_rate": 1.1160714285714287e-05, "loss": 4.0688, "step": 500 }, { "epoch": 0.026785714285714284, "grad_norm": 10.655806541442871, "learning_rate": 1.3392857142857144e-05, "loss": 3.8577, "step": 600 }, { "epoch": 0.03125, "grad_norm": 11.894658088684082, "learning_rate": 1.5625e-05, "loss": 3.4927, "step": 700 }, { "epoch": 0.03571428571428571, "grad_norm": 13.875555992126465, "learning_rate": 1.785714285714286e-05, "loss": 3.0699, "step": 800 }, { "epoch": 0.040044642857142855, "eval_accuracy": 0.4752941176470588, "eval_loss": 2.454066514968872, "eval_runtime": 290.1198, "eval_samples_per_second": 2.93, "eval_steps_per_second": 1.465, "step": 897 }, { "epoch": 1.0001004464285714, "grad_norm": 11.632246017456055, "learning_rate": 2.0089285714285717e-05, "loss": 2.5881, "step": 900 }, { "epoch": 1.0045647321428572, "grad_norm": 15.39003849029541, "learning_rate": 2.2321428571428575e-05, "loss": 2.2052, "step": 1000 }, { "epoch": 1.0090290178571428, "grad_norm": 17.561227798461914, "learning_rate": 2.455357142857143e-05, "loss": 1.8017, "step": 1100 }, { "epoch": 1.0134933035714286, "grad_norm": 16.368633270263672, "learning_rate": 2.6785714285714288e-05, "loss": 1.5213, "step": 1200 }, { "epoch": 1.0179575892857142, "grad_norm": 18.419261932373047, "learning_rate": 2.9017857142857146e-05, "loss": 1.1462, "step": 1300 }, { "epoch": 1.022421875, "grad_norm": 14.493526458740234, "learning_rate": 3.125e-05, "loss": 1.0545, "step": 1400 }, { "epoch": 1.0268861607142856, "grad_norm": 15.404373168945312, "learning_rate": 3.348214285714286e-05, "loss": 0.7972, "step": 1500 }, { "epoch": 1.0313504464285714, "grad_norm": 7.37654972076416, "learning_rate": 3.571428571428572e-05, "loss": 0.6743, "step": 1600 }, { "epoch": 1.0358147321428572, "grad_norm": 17.836456298828125, "learning_rate": 3.794642857142857e-05, "loss": 0.6366, "step": 1700 }, { "epoch": 1.0400558035714287, "eval_accuracy": 0.84, "eval_loss": 0.6831679344177246, "eval_runtime": 295.5183, "eval_samples_per_second": 2.876, "eval_steps_per_second": 1.438, "step": 1795 }, { "epoch": 2.000200892857143, "grad_norm": 27.33871078491211, "learning_rate": 4.017857142857143e-05, "loss": 0.6165, "step": 1800 }, { "epoch": 2.0046651785714285, "grad_norm": 1.4543864727020264, "learning_rate": 4.2410714285714285e-05, "loss": 0.4179, "step": 1900 }, { "epoch": 2.0091294642857145, "grad_norm": 7.2733659744262695, "learning_rate": 4.464285714285715e-05, "loss": 0.4156, "step": 2000 }, { "epoch": 2.01359375, "grad_norm": 21.995115280151367, "learning_rate": 4.6875e-05, "loss": 0.3666, "step": 2100 }, { "epoch": 2.0180580357142857, "grad_norm": 19.265806198120117, "learning_rate": 4.910714285714286e-05, "loss": 0.3751, "step": 2200 }, { "epoch": 2.0225223214285712, "grad_norm": 26.048490524291992, "learning_rate": 4.985119047619048e-05, "loss": 0.3401, "step": 2300 }, { "epoch": 2.0269866071428573, "grad_norm": 26.414731979370117, "learning_rate": 4.960317460317461e-05, "loss": 0.2955, "step": 2400 }, { "epoch": 2.031450892857143, "grad_norm": 17.34372901916504, "learning_rate": 4.9355158730158735e-05, "loss": 0.2859, "step": 2500 }, { "epoch": 2.0359151785714285, "grad_norm": 3.029252767562866, "learning_rate": 4.910714285714286e-05, "loss": 0.2253, "step": 2600 }, { "epoch": 2.0400669642857143, "eval_accuracy": 0.9023529411764706, "eval_loss": 0.3464316725730896, "eval_runtime": 282.6757, "eval_samples_per_second": 3.007, "eval_steps_per_second": 1.503, "step": 2693 }, { "epoch": 3.000301339285714, "grad_norm": 11.130131721496582, "learning_rate": 4.8859126984126984e-05, "loss": 0.232, "step": 2700 }, { "epoch": 3.004765625, "grad_norm": 3.47011661529541, "learning_rate": 4.8611111111111115e-05, "loss": 0.1247, "step": 2800 }, { "epoch": 3.0092299107142857, "grad_norm": 18.701496124267578, "learning_rate": 4.836309523809524e-05, "loss": 0.1293, "step": 2900 }, { "epoch": 3.0136941964285713, "grad_norm": 0.7256734371185303, "learning_rate": 4.811507936507937e-05, "loss": 0.1291, "step": 3000 }, { "epoch": 3.0181584821428573, "grad_norm": 24.983957290649414, "learning_rate": 4.7867063492063496e-05, "loss": 0.195, "step": 3100 }, { "epoch": 3.022622767857143, "grad_norm": 0.1959875524044037, "learning_rate": 4.761904761904762e-05, "loss": 0.0969, "step": 3200 }, { "epoch": 3.0270870535714285, "grad_norm": 1.1051886081695557, "learning_rate": 4.7371031746031745e-05, "loss": 0.1691, "step": 3300 }, { "epoch": 3.031551339285714, "grad_norm": 0.48205551505088806, "learning_rate": 4.7123015873015876e-05, "loss": 0.1297, "step": 3400 }, { "epoch": 3.036015625, "grad_norm": 0.8840370774269104, "learning_rate": 4.6875e-05, "loss": 0.1229, "step": 3500 }, { "epoch": 3.040078125, "eval_accuracy": 0.9647058823529412, "eval_loss": 0.14670781791210175, "eval_runtime": 285.475, "eval_samples_per_second": 2.977, "eval_steps_per_second": 1.489, "step": 3591 }, { "epoch": 4.000401785714286, "grad_norm": 0.21204273402690887, "learning_rate": 4.662698412698413e-05, "loss": 0.1337, "step": 3600 }, { "epoch": 4.004866071428571, "grad_norm": 2.2111618518829346, "learning_rate": 4.637896825396826e-05, "loss": 0.0821, "step": 3700 }, { "epoch": 4.009330357142857, "grad_norm": 2.208402395248413, "learning_rate": 4.613095238095239e-05, "loss": 0.098, "step": 3800 }, { "epoch": 4.0137946428571425, "grad_norm": 3.035139560699463, "learning_rate": 4.5882936507936506e-05, "loss": 0.0828, "step": 3900 }, { "epoch": 4.018258928571429, "grad_norm": 0.06664509326219559, "learning_rate": 4.563492063492064e-05, "loss": 0.0705, "step": 4000 }, { "epoch": 4.0227232142857146, "grad_norm": 0.049911659210920334, "learning_rate": 4.538690476190476e-05, "loss": 0.0506, "step": 4100 }, { "epoch": 4.0271875, "grad_norm": 6.9254374504089355, "learning_rate": 4.5138888888888894e-05, "loss": 0.0895, "step": 4200 }, { "epoch": 4.031651785714286, "grad_norm": 0.6636308431625366, "learning_rate": 4.489087301587302e-05, "loss": 0.0762, "step": 4300 }, { "epoch": 4.036116071428571, "grad_norm": 0.07036083936691284, "learning_rate": 4.464285714285715e-05, "loss": 0.1045, "step": 4400 }, { "epoch": 4.040044642857143, "eval_accuracy": 0.9635294117647059, "eval_loss": 0.1458999365568161, "eval_runtime": 292.1403, "eval_samples_per_second": 2.91, "eval_steps_per_second": 1.455, "step": 4488 }, { "epoch": 5.000502232142857, "grad_norm": 25.948030471801758, "learning_rate": 4.439484126984127e-05, "loss": 0.1201, "step": 4500 }, { "epoch": 5.0049665178571425, "grad_norm": 4.851236343383789, "learning_rate": 4.41468253968254e-05, "loss": 0.0751, "step": 4600 }, { "epoch": 5.009430803571429, "grad_norm": 2.069117307662964, "learning_rate": 4.3898809523809523e-05, "loss": 0.06, "step": 4700 }, { "epoch": 5.013895089285715, "grad_norm": 0.02893979474902153, "learning_rate": 4.3650793650793655e-05, "loss": 0.0583, "step": 4800 }, { "epoch": 5.018359375, "grad_norm": 38.84079360961914, "learning_rate": 4.340277777777778e-05, "loss": 0.0854, "step": 4900 }, { "epoch": 5.022823660714286, "grad_norm": 0.01713498868048191, "learning_rate": 4.315476190476191e-05, "loss": 0.1064, "step": 5000 }, { "epoch": 5.027287946428571, "grad_norm": 2.2113935947418213, "learning_rate": 4.290674603174603e-05, "loss": 0.0534, "step": 5100 }, { "epoch": 5.031752232142857, "grad_norm": 0.030846355482935905, "learning_rate": 4.265873015873016e-05, "loss": 0.0812, "step": 5200 }, { "epoch": 5.0362165178571425, "grad_norm": 63.66303253173828, "learning_rate": 4.2410714285714285e-05, "loss": 0.0631, "step": 5300 }, { "epoch": 5.040055803571429, "eval_accuracy": 0.971764705882353, "eval_loss": 0.13126207888126373, "eval_runtime": 282.9661, "eval_samples_per_second": 3.004, "eval_steps_per_second": 1.502, "step": 5386 }, { "epoch": 6.000602678571428, "grad_norm": 0.01721133291721344, "learning_rate": 4.2162698412698416e-05, "loss": 0.1066, "step": 5400 }, { "epoch": 6.005066964285715, "grad_norm": 0.06797400861978531, "learning_rate": 4.191468253968254e-05, "loss": 0.0751, "step": 5500 }, { "epoch": 6.00953125, "grad_norm": 0.22653132677078247, "learning_rate": 4.166666666666667e-05, "loss": 0.0417, "step": 5600 }, { "epoch": 6.013995535714286, "grad_norm": 0.07131924480199814, "learning_rate": 4.14186507936508e-05, "loss": 0.0158, "step": 5700 }, { "epoch": 6.018459821428571, "grad_norm": 40.63113784790039, "learning_rate": 4.117063492063492e-05, "loss": 0.0522, "step": 5800 }, { "epoch": 6.022924107142857, "grad_norm": 0.09443258494138718, "learning_rate": 4.0922619047619046e-05, "loss": 0.072, "step": 5900 }, { "epoch": 6.027388392857143, "grad_norm": 0.5265907049179077, "learning_rate": 4.067460317460318e-05, "loss": 0.0318, "step": 6000 }, { "epoch": 6.031852678571428, "grad_norm": 0.03210202232003212, "learning_rate": 4.04265873015873e-05, "loss": 0.0877, "step": 6100 }, { "epoch": 6.036316964285715, "grad_norm": 0.34825244545936584, "learning_rate": 4.017857142857143e-05, "loss": 0.0736, "step": 6200 }, { "epoch": 6.040066964285714, "eval_accuracy": 0.9635294117647059, "eval_loss": 0.18067213892936707, "eval_runtime": 285.3373, "eval_samples_per_second": 2.979, "eval_steps_per_second": 1.489, "step": 6284 }, { "epoch": 7.000703125, "grad_norm": 0.006914912257343531, "learning_rate": 3.993055555555556e-05, "loss": 0.0283, "step": 6300 }, { "epoch": 7.005167410714286, "grad_norm": 0.0338265560567379, "learning_rate": 3.968253968253968e-05, "loss": 0.0499, "step": 6400 }, { "epoch": 7.009631696428571, "grad_norm": 10.877938270568848, "learning_rate": 3.943452380952381e-05, "loss": 0.0082, "step": 6500 }, { "epoch": 7.014095982142857, "grad_norm": 0.10941223055124283, "learning_rate": 3.918650793650794e-05, "loss": 0.0657, "step": 6600 }, { "epoch": 7.018560267857143, "grad_norm": 12.054357528686523, "learning_rate": 3.893849206349206e-05, "loss": 0.0609, "step": 6700 }, { "epoch": 7.023024553571428, "grad_norm": 0.006210957653820515, "learning_rate": 3.8690476190476195e-05, "loss": 0.0486, "step": 6800 }, { "epoch": 7.027488839285715, "grad_norm": 0.013958507217466831, "learning_rate": 3.844246031746032e-05, "loss": 0.0747, "step": 6900 }, { "epoch": 7.031953125, "grad_norm": 14.515870094299316, "learning_rate": 3.8194444444444444e-05, "loss": 0.0343, "step": 7000 }, { "epoch": 7.036417410714286, "grad_norm": 0.007723964750766754, "learning_rate": 3.794642857142857e-05, "loss": 0.0673, "step": 7100 }, { "epoch": 7.040078125, "eval_accuracy": 0.9694117647058823, "eval_loss": 0.14643678069114685, "eval_runtime": 288.72, "eval_samples_per_second": 2.944, "eval_steps_per_second": 1.472, "step": 7182 }, { "epoch": 8.000803571428571, "grad_norm": 45.418617248535156, "learning_rate": 3.76984126984127e-05, "loss": 0.0476, "step": 7200 }, { "epoch": 8.005267857142858, "grad_norm": 0.008381331339478493, "learning_rate": 3.7450396825396824e-05, "loss": 0.0421, "step": 7300 }, { "epoch": 8.009732142857143, "grad_norm": 0.7666055560112, "learning_rate": 3.7202380952380956e-05, "loss": 0.0832, "step": 7400 }, { "epoch": 8.014196428571429, "grad_norm": 0.09307380765676498, "learning_rate": 3.695436507936508e-05, "loss": 0.0875, "step": 7500 }, { "epoch": 8.018660714285714, "grad_norm": 0.012713397853076458, "learning_rate": 3.6706349206349205e-05, "loss": 0.0441, "step": 7600 }, { "epoch": 8.023125, "grad_norm": 0.021006299182772636, "learning_rate": 3.6458333333333336e-05, "loss": 0.054, "step": 7700 }, { "epoch": 8.027589285714285, "grad_norm": 0.1419028341770172, "learning_rate": 3.621031746031746e-05, "loss": 0.0608, "step": 7800 }, { "epoch": 8.032053571428571, "grad_norm": 0.025018220767378807, "learning_rate": 3.5962301587301586e-05, "loss": 0.0479, "step": 7900 }, { "epoch": 8.036517857142858, "grad_norm": 0.5912023186683655, "learning_rate": 3.571428571428572e-05, "loss": 0.0239, "step": 8000 }, { "epoch": 8.040044642857143, "eval_accuracy": 0.9576470588235294, "eval_loss": 0.193200945854187, "eval_runtime": 279.9813, "eval_samples_per_second": 3.036, "eval_steps_per_second": 1.518, "step": 8079 }, { "epoch": 9.000904017857144, "grad_norm": 0.0350213348865509, "learning_rate": 3.546626984126984e-05, "loss": 0.067, "step": 8100 }, { "epoch": 9.005368303571428, "grad_norm": 2.537632465362549, "learning_rate": 3.521825396825397e-05, "loss": 0.0245, "step": 8200 }, { "epoch": 9.009832589285715, "grad_norm": 2.564781665802002, "learning_rate": 3.49702380952381e-05, "loss": 0.0262, "step": 8300 }, { "epoch": 9.014296875, "grad_norm": 0.00803827028721571, "learning_rate": 3.472222222222222e-05, "loss": 0.0559, "step": 8400 }, { "epoch": 9.018761160714286, "grad_norm": 0.005816516932100058, "learning_rate": 3.4474206349206354e-05, "loss": 0.0519, "step": 8500 }, { "epoch": 9.02322544642857, "grad_norm": 0.021420830860733986, "learning_rate": 3.422619047619048e-05, "loss": 0.032, "step": 8600 }, { "epoch": 9.027689732142857, "grad_norm": 0.028336547315120697, "learning_rate": 3.397817460317461e-05, "loss": 0.0227, "step": 8700 }, { "epoch": 9.032154017857144, "grad_norm": 0.02300655096769333, "learning_rate": 3.3730158730158734e-05, "loss": 0.0392, "step": 8800 }, { "epoch": 9.036618303571428, "grad_norm": 0.05427232384681702, "learning_rate": 3.348214285714286e-05, "loss": 0.0868, "step": 8900 }, { "epoch": 9.040055803571429, "eval_accuracy": 0.9882352941176471, "eval_loss": 0.05633905157446861, "eval_runtime": 285.433, "eval_samples_per_second": 2.978, "eval_steps_per_second": 1.489, "step": 8977 }, { "epoch": 10.001004464285714, "grad_norm": 0.0491323284804821, "learning_rate": 3.3234126984126983e-05, "loss": 0.0618, "step": 9000 }, { "epoch": 10.00546875, "grad_norm": 1.0003972053527832, "learning_rate": 3.2986111111111115e-05, "loss": 0.0202, "step": 9100 }, { "epoch": 10.009933035714285, "grad_norm": 0.00252954987809062, "learning_rate": 3.273809523809524e-05, "loss": 0.0531, "step": 9200 }, { "epoch": 10.014397321428572, "grad_norm": 9.270633697509766, "learning_rate": 3.249007936507937e-05, "loss": 0.035, "step": 9300 }, { "epoch": 10.018861607142858, "grad_norm": 0.014138671569526196, "learning_rate": 3.2242063492063495e-05, "loss": 0.0392, "step": 9400 }, { "epoch": 10.023325892857143, "grad_norm": 0.01277222577482462, "learning_rate": 3.199404761904762e-05, "loss": 0.059, "step": 9500 }, { "epoch": 10.02779017857143, "grad_norm": 0.0034905134234577417, "learning_rate": 3.1746031746031745e-05, "loss": 0.0664, "step": 9600 }, { "epoch": 10.032254464285714, "grad_norm": 0.0024051007349044085, "learning_rate": 3.1498015873015876e-05, "loss": 0.0286, "step": 9700 }, { "epoch": 10.03671875, "grad_norm": 0.002095526549965143, "learning_rate": 3.125e-05, "loss": 0.0016, "step": 9800 }, { "epoch": 10.040066964285714, "eval_accuracy": 0.9776470588235294, "eval_loss": 0.08437661826610565, "eval_runtime": 280.7764, "eval_samples_per_second": 3.027, "eval_steps_per_second": 1.514, "step": 9875 }, { "epoch": 11.001104910714286, "grad_norm": 0.0019545548129826784, "learning_rate": 3.100198412698413e-05, "loss": 0.0109, "step": 9900 }, { "epoch": 11.00556919642857, "grad_norm": 0.005866718012839556, "learning_rate": 3.075396825396826e-05, "loss": 0.0479, "step": 10000 }, { "epoch": 11.010033482142857, "grad_norm": 0.012244959361851215, "learning_rate": 3.0505952380952385e-05, "loss": 0.0116, "step": 10100 }, { "epoch": 11.014497767857144, "grad_norm": 0.004522031173110008, "learning_rate": 3.0257936507936506e-05, "loss": 0.025, "step": 10200 }, { "epoch": 11.018962053571428, "grad_norm": 0.010159791447222233, "learning_rate": 3.0009920634920634e-05, "loss": 0.0036, "step": 10300 }, { "epoch": 11.023426339285715, "grad_norm": 0.40824609994888306, "learning_rate": 2.9761904761904762e-05, "loss": 0.0933, "step": 10400 }, { "epoch": 11.027890625, "grad_norm": 0.11058317124843597, "learning_rate": 2.951388888888889e-05, "loss": 0.0161, "step": 10500 }, { "epoch": 11.032354910714286, "grad_norm": 1.2187433242797852, "learning_rate": 2.9265873015873018e-05, "loss": 0.0329, "step": 10600 }, { "epoch": 11.03681919642857, "grad_norm": 0.020026879385113716, "learning_rate": 2.9017857142857146e-05, "loss": 0.0318, "step": 10700 }, { "epoch": 11.040078125, "eval_accuracy": 0.9752941176470589, "eval_loss": 0.11233757436275482, "eval_runtime": 279.6949, "eval_samples_per_second": 3.039, "eval_steps_per_second": 1.52, "step": 10773 }, { "epoch": 12.001205357142856, "grad_norm": 0.004233605694025755, "learning_rate": 2.876984126984127e-05, "loss": 0.0145, "step": 10800 }, { "epoch": 12.005669642857143, "grad_norm": 0.0020020680967718363, "learning_rate": 2.8521825396825395e-05, "loss": 0.0022, "step": 10900 }, { "epoch": 12.01013392857143, "grad_norm": 0.0010592287871986628, "learning_rate": 2.8273809523809523e-05, "loss": 0.0029, "step": 11000 }, { "epoch": 12.014598214285714, "grad_norm": 0.01872986927628517, "learning_rate": 2.802579365079365e-05, "loss": 0.0352, "step": 11100 }, { "epoch": 12.0190625, "grad_norm": 0.05156349390745163, "learning_rate": 2.777777777777778e-05, "loss": 0.0047, "step": 11200 }, { "epoch": 12.023526785714285, "grad_norm": 0.00894691701978445, "learning_rate": 2.7529761904761907e-05, "loss": 0.0303, "step": 11300 }, { "epoch": 12.027991071428572, "grad_norm": 0.004200028255581856, "learning_rate": 2.7281746031746032e-05, "loss": 0.0782, "step": 11400 }, { "epoch": 12.032455357142856, "grad_norm": 0.008372528478503227, "learning_rate": 2.703373015873016e-05, "loss": 0.0154, "step": 11500 }, { "epoch": 12.036919642857143, "grad_norm": 0.010021534748375416, "learning_rate": 2.6785714285714288e-05, "loss": 0.0144, "step": 11600 }, { "epoch": 12.040044642857143, "eval_accuracy": 0.9894117647058823, "eval_loss": 0.04987098649144173, "eval_runtime": 331.781, "eval_samples_per_second": 2.562, "eval_steps_per_second": 1.281, "step": 11670 }, { "epoch": 13.001305803571428, "grad_norm": 0.3831511437892914, "learning_rate": 2.6537698412698416e-05, "loss": 0.0175, "step": 11700 }, { "epoch": 13.005770089285715, "grad_norm": 0.0010712681105360389, "learning_rate": 2.628968253968254e-05, "loss": 0.0281, "step": 11800 }, { "epoch": 13.010234375, "grad_norm": 0.004961916245520115, "learning_rate": 2.604166666666667e-05, "loss": 0.0162, "step": 11900 }, { "epoch": 13.014698660714286, "grad_norm": 0.3577312231063843, "learning_rate": 2.5793650793650796e-05, "loss": 0.0133, "step": 12000 }, { "epoch": 13.01916294642857, "grad_norm": 0.0016846248181536794, "learning_rate": 2.554563492063492e-05, "loss": 0.0456, "step": 12100 }, { "epoch": 13.023627232142857, "grad_norm": 0.005252454895526171, "learning_rate": 2.529761904761905e-05, "loss": 0.0043, "step": 12200 }, { "epoch": 13.028091517857144, "grad_norm": 65.35294342041016, "learning_rate": 2.5049603174603177e-05, "loss": 0.0248, "step": 12300 }, { "epoch": 13.032555803571428, "grad_norm": 0.0010413563577458262, "learning_rate": 2.4801587301587305e-05, "loss": 0.033, "step": 12400 }, { "epoch": 13.037020089285715, "grad_norm": 28.086708068847656, "learning_rate": 2.455357142857143e-05, "loss": 0.0028, "step": 12500 }, { "epoch": 13.040055803571429, "eval_accuracy": 0.9870588235294118, "eval_loss": 0.08093971014022827, "eval_runtime": 287.2538, "eval_samples_per_second": 2.959, "eval_steps_per_second": 1.48, "step": 12568 }, { "epoch": 14.00140625, "grad_norm": 0.011327456682920456, "learning_rate": 2.4305555555555558e-05, "loss": 0.0203, "step": 12600 }, { "epoch": 14.005870535714285, "grad_norm": 0.006360394414514303, "learning_rate": 2.4057539682539686e-05, "loss": 0.0009, "step": 12700 }, { "epoch": 14.010334821428572, "grad_norm": 1.3321506977081299, "learning_rate": 2.380952380952381e-05, "loss": 0.0186, "step": 12800 }, { "epoch": 14.014799107142856, "grad_norm": 0.0009386364254169166, "learning_rate": 2.3561507936507938e-05, "loss": 0.0048, "step": 12900 }, { "epoch": 14.019263392857143, "grad_norm": 0.0016534485621377826, "learning_rate": 2.3313492063492066e-05, "loss": 0.037, "step": 13000 }, { "epoch": 14.02372767857143, "grad_norm": 0.001421699533239007, "learning_rate": 2.3065476190476194e-05, "loss": 0.0111, "step": 13100 }, { "epoch": 14.028191964285714, "grad_norm": 0.0014466221909970045, "learning_rate": 2.281746031746032e-05, "loss": 0.0169, "step": 13200 }, { "epoch": 14.03265625, "grad_norm": 0.0036468463949859142, "learning_rate": 2.2569444444444447e-05, "loss": 0.019, "step": 13300 }, { "epoch": 14.037120535714285, "grad_norm": 0.0012320175301283598, "learning_rate": 2.2321428571428575e-05, "loss": 0.0074, "step": 13400 }, { "epoch": 14.040066964285714, "eval_accuracy": 0.9929411764705882, "eval_loss": 0.045501772314310074, "eval_runtime": 285.3107, "eval_samples_per_second": 2.979, "eval_steps_per_second": 1.49, "step": 13466 }, { "epoch": 15.00150669642857, "grad_norm": 0.0006422046571969986, "learning_rate": 2.20734126984127e-05, "loss": 0.0202, "step": 13500 }, { "epoch": 15.005970982142857, "grad_norm": 0.0008420124650001526, "learning_rate": 2.1825396825396827e-05, "loss": 0.0116, "step": 13600 }, { "epoch": 15.010435267857142, "grad_norm": 0.018089979887008667, "learning_rate": 2.1577380952380955e-05, "loss": 0.0099, "step": 13700 }, { "epoch": 15.014899553571428, "grad_norm": 0.0031337908003479242, "learning_rate": 2.132936507936508e-05, "loss": 0.0566, "step": 13800 }, { "epoch": 15.019363839285715, "grad_norm": 0.0016157528152689338, "learning_rate": 2.1081349206349208e-05, "loss": 0.0212, "step": 13900 }, { "epoch": 15.023828125, "grad_norm": 0.01456926204264164, "learning_rate": 2.0833333333333336e-05, "loss": 0.0003, "step": 14000 }, { "epoch": 15.028292410714286, "grad_norm": 0.001924099400639534, "learning_rate": 2.058531746031746e-05, "loss": 0.0149, "step": 14100 }, { "epoch": 15.03275669642857, "grad_norm": 0.0008741599158383906, "learning_rate": 2.033730158730159e-05, "loss": 0.0168, "step": 14200 }, { "epoch": 15.037220982142857, "grad_norm": 0.06954433768987656, "learning_rate": 2.0089285714285717e-05, "loss": 0.0002, "step": 14300 }, { "epoch": 15.040078125, "eval_accuracy": 0.9905882352941177, "eval_loss": 0.058066971600055695, "eval_runtime": 289.1743, "eval_samples_per_second": 2.939, "eval_steps_per_second": 1.47, "step": 14364 }, { "epoch": 16.001607142857143, "grad_norm": 0.0014486366417258978, "learning_rate": 1.984126984126984e-05, "loss": 0.0063, "step": 14400 }, { "epoch": 16.006071428571428, "grad_norm": 0.0007301854784600437, "learning_rate": 1.959325396825397e-05, "loss": 0.0186, "step": 14500 }, { "epoch": 16.010535714285716, "grad_norm": 0.003457231679931283, "learning_rate": 1.9345238095238097e-05, "loss": 0.0236, "step": 14600 }, { "epoch": 16.015, "grad_norm": 0.005807195790112019, "learning_rate": 1.9097222222222222e-05, "loss": 0.0183, "step": 14700 }, { "epoch": 16.019464285714285, "grad_norm": 0.002843959955498576, "learning_rate": 1.884920634920635e-05, "loss": 0.0242, "step": 14800 }, { "epoch": 16.02392857142857, "grad_norm": 0.37613585591316223, "learning_rate": 1.8601190476190478e-05, "loss": 0.0101, "step": 14900 }, { "epoch": 16.028392857142858, "grad_norm": 0.0005575509858317673, "learning_rate": 1.8353174603174602e-05, "loss": 0.0109, "step": 15000 }, { "epoch": 16.032857142857143, "grad_norm": 0.0006386680179275572, "learning_rate": 1.810515873015873e-05, "loss": 0.0013, "step": 15100 }, { "epoch": 16.037321428571428, "grad_norm": 0.0010088573908433318, "learning_rate": 1.785714285714286e-05, "loss": 0.0077, "step": 15200 }, { "epoch": 16.040044642857143, "eval_accuracy": 0.9894117647058823, "eval_loss": 0.05021252483129501, "eval_runtime": 284.0094, "eval_samples_per_second": 2.993, "eval_steps_per_second": 1.496, "step": 15261 }, { "epoch": 17.001707589285715, "grad_norm": 0.0006336846854537725, "learning_rate": 1.7609126984126986e-05, "loss": 0.0212, "step": 15300 }, { "epoch": 17.006171875, "grad_norm": 0.0005883209523744881, "learning_rate": 1.736111111111111e-05, "loss": 0.0095, "step": 15400 }, { "epoch": 17.010636160714284, "grad_norm": 0.0021267228294163942, "learning_rate": 1.711309523809524e-05, "loss": 0.0113, "step": 15500 }, { "epoch": 17.015100446428573, "grad_norm": 0.0009908992797136307, "learning_rate": 1.6865079365079367e-05, "loss": 0.0118, "step": 15600 }, { "epoch": 17.019564732142857, "grad_norm": 0.000644190120510757, "learning_rate": 1.6617063492063492e-05, "loss": 0.0007, "step": 15700 }, { "epoch": 17.024029017857142, "grad_norm": 0.0005113797378726304, "learning_rate": 1.636904761904762e-05, "loss": 0.01, "step": 15800 }, { "epoch": 17.02849330357143, "grad_norm": 0.0008760132477618754, "learning_rate": 1.6121031746031748e-05, "loss": 0.0026, "step": 15900 }, { "epoch": 17.032957589285715, "grad_norm": 0.00030510194483213127, "learning_rate": 1.5873015873015872e-05, "loss": 0.0015, "step": 16000 }, { "epoch": 17.037421875, "grad_norm": 0.0004963899846188724, "learning_rate": 1.5625e-05, "loss": 0.0005, "step": 16100 }, { "epoch": 17.040055803571427, "eval_accuracy": 0.9929411764705882, "eval_loss": 0.04069099575281143, "eval_runtime": 285.9614, "eval_samples_per_second": 2.972, "eval_steps_per_second": 1.486, "step": 16159 }, { "epoch": 18.001808035714287, "grad_norm": 0.0015891814837232232, "learning_rate": 1.537698412698413e-05, "loss": 0.0376, "step": 16200 }, { "epoch": 18.006272321428572, "grad_norm": 0.008500500582158566, "learning_rate": 1.5128968253968253e-05, "loss": 0.0203, "step": 16300 }, { "epoch": 18.010736607142857, "grad_norm": 0.0030595629941672087, "learning_rate": 1.4880952380952381e-05, "loss": 0.0042, "step": 16400 }, { "epoch": 18.01520089285714, "grad_norm": 1.0810060501098633, "learning_rate": 1.4632936507936509e-05, "loss": 0.017, "step": 16500 }, { "epoch": 18.01966517857143, "grad_norm": 0.0005325720412656665, "learning_rate": 1.4384920634920635e-05, "loss": 0.0036, "step": 16600 }, { "epoch": 18.024129464285714, "grad_norm": 0.0014920306857675314, "learning_rate": 1.4136904761904762e-05, "loss": 0.0236, "step": 16700 }, { "epoch": 18.02859375, "grad_norm": 0.00048302882350981236, "learning_rate": 1.388888888888889e-05, "loss": 0.0127, "step": 16800 }, { "epoch": 18.033058035714287, "grad_norm": 0.002715888200327754, "learning_rate": 1.3640873015873016e-05, "loss": 0.0146, "step": 16900 }, { "epoch": 18.037522321428572, "grad_norm": 0.0004213691863697022, "learning_rate": 1.3392857142857144e-05, "loss": 0.0004, "step": 17000 }, { "epoch": 18.040066964285714, "eval_accuracy": 0.9905882352941177, "eval_loss": 0.05496314540505409, "eval_runtime": 318.6642, "eval_samples_per_second": 2.667, "eval_steps_per_second": 1.334, "step": 17057 }, { "epoch": 19.001908482142856, "grad_norm": 0.00044045469257980585, "learning_rate": 1.314484126984127e-05, "loss": 0.0143, "step": 17100 }, { "epoch": 19.006372767857144, "grad_norm": 0.0004946400295011699, "learning_rate": 1.2896825396825398e-05, "loss": 0.0002, "step": 17200 }, { "epoch": 19.01083705357143, "grad_norm": 0.014897634275257587, "learning_rate": 1.2648809523809524e-05, "loss": 0.0011, "step": 17300 }, { "epoch": 19.015301339285713, "grad_norm": 0.015875551849603653, "learning_rate": 1.2400793650793652e-05, "loss": 0.0007, "step": 17400 }, { "epoch": 19.019765625, "grad_norm": 0.0004391854163259268, "learning_rate": 1.2152777777777779e-05, "loss": 0.0068, "step": 17500 }, { "epoch": 19.024229910714286, "grad_norm": 0.00046034177648834884, "learning_rate": 1.1904761904761905e-05, "loss": 0.0001, "step": 17600 }, { "epoch": 19.02869419642857, "grad_norm": 0.0017288514645770192, "learning_rate": 1.1656746031746033e-05, "loss": 0.0001, "step": 17700 }, { "epoch": 19.033158482142856, "grad_norm": 0.0026627290062606335, "learning_rate": 1.140873015873016e-05, "loss": 0.0001, "step": 17800 }, { "epoch": 19.037622767857144, "grad_norm": 0.0004681396530941129, "learning_rate": 1.1160714285714287e-05, "loss": 0.0001, "step": 17900 }, { "epoch": 19.040078125, "eval_accuracy": 0.9929411764705882, "eval_loss": 0.05834496021270752, "eval_runtime": 239.7594, "eval_samples_per_second": 3.545, "eval_steps_per_second": 1.773, "step": 17955 }, { "epoch": 19.040078125, "step": 17955, "total_flos": 1.7905236367909847e+20, "train_loss": 0.295465733557037, "train_runtime": 68646.4816, "train_samples_per_second": 2.61, "train_steps_per_second": 0.326 }, { "epoch": 19.040078125, "eval_accuracy": 0.9929411764705882, "eval_loss": 0.04550177976489067, "eval_runtime": 230.0122, "eval_samples_per_second": 3.695, "eval_steps_per_second": 1.848, "step": 17955 }, { "epoch": 19.040078125, "eval_accuracy": 0.8973354231974922, "eval_loss": 0.5587517619132996, "eval_runtime": 352.6874, "eval_samples_per_second": 3.618, "eval_steps_per_second": 1.809, "step": 17955 } ], "logging_steps": 100, "max_steps": 22400, "num_input_tokens_seen": 0, "num_train_epochs": 9223372036854775807, "save_steps": 500, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 5, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.7905236367909847e+20, "train_batch_size": 2, "trial_name": null, "trial_params": null }