| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 30.48780487804878, |
| "eval_steps": 500, |
| "global_step": 15000, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.02032520325203252, |
| "grad_norm": 13.226722717285156, |
| "learning_rate": 1.2000000000000002e-06, |
| "loss": 0.9641, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.04065040650406504, |
| "grad_norm": 6.338687896728516, |
| "learning_rate": 2.5333333333333334e-06, |
| "loss": 0.855, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.06097560975609756, |
| "grad_norm": 2.033618211746216, |
| "learning_rate": 3.866666666666667e-06, |
| "loss": 0.5057, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.08130081300813008, |
| "grad_norm": 1.127258539199829, |
| "learning_rate": 5.2e-06, |
| "loss": 0.3473, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.1016260162601626, |
| "grad_norm": 1.834140419960022, |
| "learning_rate": 6.533333333333333e-06, |
| "loss": 0.2456, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.12195121951219512, |
| "grad_norm": 2.0608911514282227, |
| "learning_rate": 7.866666666666667e-06, |
| "loss": 0.1905, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.14227642276422764, |
| "grad_norm": 1.4218802452087402, |
| "learning_rate": 9.2e-06, |
| "loss": 0.1439, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.16260162601626016, |
| "grad_norm": 0.7227293848991394, |
| "learning_rate": 1.0533333333333335e-05, |
| "loss": 0.1313, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.18292682926829268, |
| "grad_norm": 0.9034302830696106, |
| "learning_rate": 1.1866666666666668e-05, |
| "loss": 0.1073, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.2032520325203252, |
| "grad_norm": 1.4675464630126953, |
| "learning_rate": 1.32e-05, |
| "loss": 0.1265, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.22357723577235772, |
| "grad_norm": 1.0163527727127075, |
| "learning_rate": 1.4533333333333335e-05, |
| "loss": 0.1173, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.24390243902439024, |
| "grad_norm": 0.5881352424621582, |
| "learning_rate": 1.586666666666667e-05, |
| "loss": 0.0945, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.26422764227642276, |
| "grad_norm": 1.0253976583480835, |
| "learning_rate": 1.7199999999999998e-05, |
| "loss": 0.0968, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.2845528455284553, |
| "grad_norm": 0.696062445640564, |
| "learning_rate": 1.8533333333333334e-05, |
| "loss": 0.0893, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.3048780487804878, |
| "grad_norm": 0.6593964099884033, |
| "learning_rate": 1.9866666666666667e-05, |
| "loss": 0.0863, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.3252032520325203, |
| "grad_norm": 0.9947993159294128, |
| "learning_rate": 2.12e-05, |
| "loss": 0.0906, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.34552845528455284, |
| "grad_norm": 1.0011829137802124, |
| "learning_rate": 2.2533333333333333e-05, |
| "loss": 0.0846, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.36585365853658536, |
| "grad_norm": 0.9370313286781311, |
| "learning_rate": 2.3866666666666666e-05, |
| "loss": 0.0927, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.3861788617886179, |
| "grad_norm": 0.5689650774002075, |
| "learning_rate": 2.5200000000000003e-05, |
| "loss": 0.0768, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.4065040650406504, |
| "grad_norm": 0.4828910529613495, |
| "learning_rate": 2.6533333333333332e-05, |
| "loss": 0.0811, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.4268292682926829, |
| "grad_norm": 0.62357097864151, |
| "learning_rate": 2.786666666666667e-05, |
| "loss": 0.0885, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.44715447154471544, |
| "grad_norm": 0.5117200016975403, |
| "learning_rate": 2.9199999999999998e-05, |
| "loss": 0.0866, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.46747967479674796, |
| "grad_norm": 0.791079580783844, |
| "learning_rate": 3.0533333333333335e-05, |
| "loss": 0.0827, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.4878048780487805, |
| "grad_norm": 0.5069907307624817, |
| "learning_rate": 3.1866666666666664e-05, |
| "loss": 0.0843, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.508130081300813, |
| "grad_norm": 0.7022382616996765, |
| "learning_rate": 3.32e-05, |
| "loss": 0.0837, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.5284552845528455, |
| "grad_norm": 0.7299018502235413, |
| "learning_rate": 3.453333333333334e-05, |
| "loss": 0.0773, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.5487804878048781, |
| "grad_norm": 0.532425582408905, |
| "learning_rate": 3.586666666666667e-05, |
| "loss": 0.0881, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.5691056910569106, |
| "grad_norm": 1.002967357635498, |
| "learning_rate": 3.72e-05, |
| "loss": 0.0768, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.5894308943089431, |
| "grad_norm": 0.891633152961731, |
| "learning_rate": 3.853333333333334e-05, |
| "loss": 0.0913, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.6097560975609756, |
| "grad_norm": 1.0027371644973755, |
| "learning_rate": 3.986666666666667e-05, |
| "loss": 0.0776, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.6300813008130082, |
| "grad_norm": 0.8202036619186401, |
| "learning_rate": 4.12e-05, |
| "loss": 0.0849, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.6504065040650406, |
| "grad_norm": 0.7221282720565796, |
| "learning_rate": 4.2533333333333335e-05, |
| "loss": 0.0787, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.6707317073170732, |
| "grad_norm": 0.5890057682991028, |
| "learning_rate": 4.3866666666666665e-05, |
| "loss": 0.0836, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.6910569105691057, |
| "grad_norm": 0.43137913942337036, |
| "learning_rate": 4.52e-05, |
| "loss": 0.0726, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.7113821138211383, |
| "grad_norm": 0.59128737449646, |
| "learning_rate": 4.653333333333334e-05, |
| "loss": 0.0673, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.7317073170731707, |
| "grad_norm": 0.5243335366249084, |
| "learning_rate": 4.7866666666666674e-05, |
| "loss": 0.0737, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.7520325203252033, |
| "grad_norm": 0.4521591067314148, |
| "learning_rate": 4.92e-05, |
| "loss": 0.0712, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.7723577235772358, |
| "grad_norm": 0.5582333207130432, |
| "learning_rate": 5.053333333333333e-05, |
| "loss": 0.0797, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.7926829268292683, |
| "grad_norm": 0.4571887254714966, |
| "learning_rate": 5.1866666666666676e-05, |
| "loss": 0.0698, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.8130081300813008, |
| "grad_norm": 0.8901530504226685, |
| "learning_rate": 5.3200000000000006e-05, |
| "loss": 0.0715, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.8333333333333334, |
| "grad_norm": 0.7452099919319153, |
| "learning_rate": 5.4533333333333335e-05, |
| "loss": 0.0775, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.8536585365853658, |
| "grad_norm": 0.5491011142730713, |
| "learning_rate": 5.5866666666666665e-05, |
| "loss": 0.0753, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.8739837398373984, |
| "grad_norm": 0.6520193219184875, |
| "learning_rate": 5.72e-05, |
| "loss": 0.0757, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.8943089430894309, |
| "grad_norm": 0.6066526770591736, |
| "learning_rate": 5.853333333333334e-05, |
| "loss": 0.0733, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.9146341463414634, |
| "grad_norm": 0.633453905582428, |
| "learning_rate": 5.9866666666666674e-05, |
| "loss": 0.072, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.9349593495934959, |
| "grad_norm": 0.5940499901771545, |
| "learning_rate": 6.12e-05, |
| "loss": 0.0808, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.9552845528455285, |
| "grad_norm": 0.6593416929244995, |
| "learning_rate": 6.253333333333333e-05, |
| "loss": 0.0752, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.975609756097561, |
| "grad_norm": 0.8113526701927185, |
| "learning_rate": 6.386666666666667e-05, |
| "loss": 0.072, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.9959349593495935, |
| "grad_norm": 0.6880797147750854, |
| "learning_rate": 6.52e-05, |
| "loss": 0.0733, |
| "step": 490 |
| }, |
| { |
| "epoch": 1.016260162601626, |
| "grad_norm": 0.6124709248542786, |
| "learning_rate": 6.653333333333334e-05, |
| "loss": 0.0682, |
| "step": 500 |
| }, |
| { |
| "epoch": 1.0365853658536586, |
| "grad_norm": 0.6620608568191528, |
| "learning_rate": 6.786666666666667e-05, |
| "loss": 0.075, |
| "step": 510 |
| }, |
| { |
| "epoch": 1.056910569105691, |
| "grad_norm": 0.44583699107170105, |
| "learning_rate": 6.92e-05, |
| "loss": 0.0649, |
| "step": 520 |
| }, |
| { |
| "epoch": 1.0772357723577235, |
| "grad_norm": 0.5196840763092041, |
| "learning_rate": 7.053333333333334e-05, |
| "loss": 0.0698, |
| "step": 530 |
| }, |
| { |
| "epoch": 1.0975609756097562, |
| "grad_norm": 0.43396124243736267, |
| "learning_rate": 7.186666666666667e-05, |
| "loss": 0.0643, |
| "step": 540 |
| }, |
| { |
| "epoch": 1.1178861788617886, |
| "grad_norm": 0.6352965831756592, |
| "learning_rate": 7.32e-05, |
| "loss": 0.0656, |
| "step": 550 |
| }, |
| { |
| "epoch": 1.1382113821138211, |
| "grad_norm": 0.6376074552536011, |
| "learning_rate": 7.453333333333333e-05, |
| "loss": 0.0639, |
| "step": 560 |
| }, |
| { |
| "epoch": 1.1585365853658536, |
| "grad_norm": 0.7378827929496765, |
| "learning_rate": 7.586666666666668e-05, |
| "loss": 0.0707, |
| "step": 570 |
| }, |
| { |
| "epoch": 1.1788617886178863, |
| "grad_norm": 0.7607502341270447, |
| "learning_rate": 7.72e-05, |
| "loss": 0.0667, |
| "step": 580 |
| }, |
| { |
| "epoch": 1.1991869918699187, |
| "grad_norm": 0.4571978747844696, |
| "learning_rate": 7.853333333333334e-05, |
| "loss": 0.0727, |
| "step": 590 |
| }, |
| { |
| "epoch": 1.2195121951219512, |
| "grad_norm": 0.5919597148895264, |
| "learning_rate": 7.986666666666667e-05, |
| "loss": 0.0673, |
| "step": 600 |
| }, |
| { |
| "epoch": 1.2398373983739837, |
| "grad_norm": 0.46557170152664185, |
| "learning_rate": 8.120000000000001e-05, |
| "loss": 0.0738, |
| "step": 610 |
| }, |
| { |
| "epoch": 1.2601626016260163, |
| "grad_norm": 0.5049855709075928, |
| "learning_rate": 8.253333333333334e-05, |
| "loss": 0.062, |
| "step": 620 |
| }, |
| { |
| "epoch": 1.2804878048780488, |
| "grad_norm": 0.5219482779502869, |
| "learning_rate": 8.386666666666667e-05, |
| "loss": 0.0609, |
| "step": 630 |
| }, |
| { |
| "epoch": 1.3008130081300813, |
| "grad_norm": 0.5961984395980835, |
| "learning_rate": 8.52e-05, |
| "loss": 0.062, |
| "step": 640 |
| }, |
| { |
| "epoch": 1.321138211382114, |
| "grad_norm": 0.5225047469139099, |
| "learning_rate": 8.653333333333333e-05, |
| "loss": 0.0712, |
| "step": 650 |
| }, |
| { |
| "epoch": 1.3414634146341464, |
| "grad_norm": 0.5839872360229492, |
| "learning_rate": 8.786666666666667e-05, |
| "loss": 0.0625, |
| "step": 660 |
| }, |
| { |
| "epoch": 1.3617886178861789, |
| "grad_norm": 0.71927809715271, |
| "learning_rate": 8.92e-05, |
| "loss": 0.0755, |
| "step": 670 |
| }, |
| { |
| "epoch": 1.3821138211382114, |
| "grad_norm": 0.4162643551826477, |
| "learning_rate": 9.053333333333334e-05, |
| "loss": 0.0643, |
| "step": 680 |
| }, |
| { |
| "epoch": 1.4024390243902438, |
| "grad_norm": 0.514099657535553, |
| "learning_rate": 9.186666666666667e-05, |
| "loss": 0.0622, |
| "step": 690 |
| }, |
| { |
| "epoch": 1.4227642276422765, |
| "grad_norm": 0.5861640572547913, |
| "learning_rate": 9.320000000000002e-05, |
| "loss": 0.0682, |
| "step": 700 |
| }, |
| { |
| "epoch": 1.443089430894309, |
| "grad_norm": 0.484695702791214, |
| "learning_rate": 9.453333333333335e-05, |
| "loss": 0.0753, |
| "step": 710 |
| }, |
| { |
| "epoch": 1.4634146341463414, |
| "grad_norm": 0.3829363286495209, |
| "learning_rate": 9.586666666666667e-05, |
| "loss": 0.0615, |
| "step": 720 |
| }, |
| { |
| "epoch": 1.4837398373983741, |
| "grad_norm": 0.5547038912773132, |
| "learning_rate": 9.72e-05, |
| "loss": 0.0717, |
| "step": 730 |
| }, |
| { |
| "epoch": 1.5040650406504064, |
| "grad_norm": 0.4040966033935547, |
| "learning_rate": 9.853333333333333e-05, |
| "loss": 0.063, |
| "step": 740 |
| }, |
| { |
| "epoch": 1.524390243902439, |
| "grad_norm": 0.36058497428894043, |
| "learning_rate": 9.986666666666668e-05, |
| "loss": 0.0648, |
| "step": 750 |
| }, |
| { |
| "epoch": 1.5447154471544715, |
| "grad_norm": 0.5361801981925964, |
| "learning_rate": 9.999990157738453e-05, |
| "loss": 0.0653, |
| "step": 760 |
| }, |
| { |
| "epoch": 1.565040650406504, |
| "grad_norm": 0.4126802980899811, |
| "learning_rate": 9.999956135155687e-05, |
| "loss": 0.0608, |
| "step": 770 |
| }, |
| { |
| "epoch": 1.5853658536585367, |
| "grad_norm": 0.47281134128570557, |
| "learning_rate": 9.99989781090763e-05, |
| "loss": 0.0641, |
| "step": 780 |
| }, |
| { |
| "epoch": 1.6056910569105691, |
| "grad_norm": 0.682732105255127, |
| "learning_rate": 9.999815185277755e-05, |
| "loss": 0.0621, |
| "step": 790 |
| }, |
| { |
| "epoch": 1.6260162601626016, |
| "grad_norm": 0.5980079770088196, |
| "learning_rate": 9.999708258667652e-05, |
| "loss": 0.0718, |
| "step": 800 |
| }, |
| { |
| "epoch": 1.6463414634146343, |
| "grad_norm": 0.5243200659751892, |
| "learning_rate": 9.999577031597029e-05, |
| "loss": 0.0672, |
| "step": 810 |
| }, |
| { |
| "epoch": 1.6666666666666665, |
| "grad_norm": 0.7068093419075012, |
| "learning_rate": 9.999421504703696e-05, |
| "loss": 0.0758, |
| "step": 820 |
| }, |
| { |
| "epoch": 1.6869918699186992, |
| "grad_norm": 0.3850698173046112, |
| "learning_rate": 9.999241678743574e-05, |
| "loss": 0.0663, |
| "step": 830 |
| }, |
| { |
| "epoch": 1.7073170731707317, |
| "grad_norm": 0.5258147716522217, |
| "learning_rate": 9.999037554590683e-05, |
| "loss": 0.0567, |
| "step": 840 |
| }, |
| { |
| "epoch": 1.7276422764227641, |
| "grad_norm": 0.6071597933769226, |
| "learning_rate": 9.998809133237143e-05, |
| "loss": 0.0707, |
| "step": 850 |
| }, |
| { |
| "epoch": 1.7479674796747968, |
| "grad_norm": 0.48662713170051575, |
| "learning_rate": 9.998556415793169e-05, |
| "loss": 0.0627, |
| "step": 860 |
| }, |
| { |
| "epoch": 1.7682926829268293, |
| "grad_norm": 0.47866326570510864, |
| "learning_rate": 9.998279403487062e-05, |
| "loss": 0.0683, |
| "step": 870 |
| }, |
| { |
| "epoch": 1.7886178861788617, |
| "grad_norm": 0.5103737115859985, |
| "learning_rate": 9.997978097665205e-05, |
| "loss": 0.0599, |
| "step": 880 |
| }, |
| { |
| "epoch": 1.8089430894308944, |
| "grad_norm": 0.5845749378204346, |
| "learning_rate": 9.99765249979206e-05, |
| "loss": 0.06, |
| "step": 890 |
| }, |
| { |
| "epoch": 1.8292682926829267, |
| "grad_norm": 0.28062498569488525, |
| "learning_rate": 9.997302611450154e-05, |
| "loss": 0.065, |
| "step": 900 |
| }, |
| { |
| "epoch": 1.8495934959349594, |
| "grad_norm": 0.5082411766052246, |
| "learning_rate": 9.996928434340073e-05, |
| "loss": 0.0527, |
| "step": 910 |
| }, |
| { |
| "epoch": 1.8699186991869918, |
| "grad_norm": 0.3810727298259735, |
| "learning_rate": 9.996529970280462e-05, |
| "loss": 0.0579, |
| "step": 920 |
| }, |
| { |
| "epoch": 1.8902439024390243, |
| "grad_norm": 0.6368547677993774, |
| "learning_rate": 9.996107221208004e-05, |
| "loss": 0.0584, |
| "step": 930 |
| }, |
| { |
| "epoch": 1.910569105691057, |
| "grad_norm": 0.3547976613044739, |
| "learning_rate": 9.995660189177419e-05, |
| "loss": 0.0534, |
| "step": 940 |
| }, |
| { |
| "epoch": 1.9308943089430894, |
| "grad_norm": 0.40976643562316895, |
| "learning_rate": 9.995188876361451e-05, |
| "loss": 0.0594, |
| "step": 950 |
| }, |
| { |
| "epoch": 1.951219512195122, |
| "grad_norm": 0.33262038230895996, |
| "learning_rate": 9.994693285050857e-05, |
| "loss": 0.0499, |
| "step": 960 |
| }, |
| { |
| "epoch": 1.9715447154471546, |
| "grad_norm": 0.3865192234516144, |
| "learning_rate": 9.994173417654395e-05, |
| "loss": 0.0549, |
| "step": 970 |
| }, |
| { |
| "epoch": 1.9918699186991868, |
| "grad_norm": 0.36267194151878357, |
| "learning_rate": 9.993629276698821e-05, |
| "loss": 0.0508, |
| "step": 980 |
| }, |
| { |
| "epoch": 2.0121951219512195, |
| "grad_norm": 0.53200763463974, |
| "learning_rate": 9.993060864828858e-05, |
| "loss": 0.058, |
| "step": 990 |
| }, |
| { |
| "epoch": 2.032520325203252, |
| "grad_norm": 0.3534744381904602, |
| "learning_rate": 9.992468184807206e-05, |
| "loss": 0.0555, |
| "step": 1000 |
| }, |
| { |
| "epoch": 2.0528455284552845, |
| "grad_norm": 0.4305236041545868, |
| "learning_rate": 9.991851239514511e-05, |
| "loss": 0.0554, |
| "step": 1010 |
| }, |
| { |
| "epoch": 2.073170731707317, |
| "grad_norm": 0.45501708984375, |
| "learning_rate": 9.991210031949359e-05, |
| "loss": 0.0574, |
| "step": 1020 |
| }, |
| { |
| "epoch": 2.0934959349593494, |
| "grad_norm": 0.38146859407424927, |
| "learning_rate": 9.990544565228259e-05, |
| "loss": 0.0522, |
| "step": 1030 |
| }, |
| { |
| "epoch": 2.113821138211382, |
| "grad_norm": 0.3539182245731354, |
| "learning_rate": 9.989854842585631e-05, |
| "loss": 0.0557, |
| "step": 1040 |
| }, |
| { |
| "epoch": 2.1341463414634148, |
| "grad_norm": 0.3897205889225006, |
| "learning_rate": 9.989140867373783e-05, |
| "loss": 0.0534, |
| "step": 1050 |
| }, |
| { |
| "epoch": 2.154471544715447, |
| "grad_norm": 0.44654178619384766, |
| "learning_rate": 9.988402643062907e-05, |
| "loss": 0.0626, |
| "step": 1060 |
| }, |
| { |
| "epoch": 2.1747967479674797, |
| "grad_norm": 0.43859946727752686, |
| "learning_rate": 9.987640173241046e-05, |
| "loss": 0.0554, |
| "step": 1070 |
| }, |
| { |
| "epoch": 2.1951219512195124, |
| "grad_norm": 0.299344003200531, |
| "learning_rate": 9.986853461614093e-05, |
| "loss": 0.051, |
| "step": 1080 |
| }, |
| { |
| "epoch": 2.2154471544715446, |
| "grad_norm": 0.28412163257598877, |
| "learning_rate": 9.986042512005763e-05, |
| "loss": 0.0537, |
| "step": 1090 |
| }, |
| { |
| "epoch": 2.2357723577235773, |
| "grad_norm": 0.501313328742981, |
| "learning_rate": 9.985207328357573e-05, |
| "loss": 0.056, |
| "step": 1100 |
| }, |
| { |
| "epoch": 2.2560975609756095, |
| "grad_norm": 0.37945660948753357, |
| "learning_rate": 9.984347914728829e-05, |
| "loss": 0.052, |
| "step": 1110 |
| }, |
| { |
| "epoch": 2.2764227642276422, |
| "grad_norm": 0.5372085571289062, |
| "learning_rate": 9.983464275296605e-05, |
| "loss": 0.0528, |
| "step": 1120 |
| }, |
| { |
| "epoch": 2.296747967479675, |
| "grad_norm": 0.33128976821899414, |
| "learning_rate": 9.982556414355724e-05, |
| "loss": 0.0565, |
| "step": 1130 |
| }, |
| { |
| "epoch": 2.317073170731707, |
| "grad_norm": 0.2715194523334503, |
| "learning_rate": 9.981624336318726e-05, |
| "loss": 0.0573, |
| "step": 1140 |
| }, |
| { |
| "epoch": 2.33739837398374, |
| "grad_norm": 0.48618754744529724, |
| "learning_rate": 9.980668045715864e-05, |
| "loss": 0.0541, |
| "step": 1150 |
| }, |
| { |
| "epoch": 2.3577235772357725, |
| "grad_norm": 0.4522000253200531, |
| "learning_rate": 9.979687547195066e-05, |
| "loss": 0.0554, |
| "step": 1160 |
| }, |
| { |
| "epoch": 2.3780487804878048, |
| "grad_norm": 0.4867287576198578, |
| "learning_rate": 9.978682845521927e-05, |
| "loss": 0.053, |
| "step": 1170 |
| }, |
| { |
| "epoch": 2.3983739837398375, |
| "grad_norm": 0.35228079557418823, |
| "learning_rate": 9.977653945579673e-05, |
| "loss": 0.05, |
| "step": 1180 |
| }, |
| { |
| "epoch": 2.41869918699187, |
| "grad_norm": 0.35814642906188965, |
| "learning_rate": 9.976600852369144e-05, |
| "loss": 0.0534, |
| "step": 1190 |
| }, |
| { |
| "epoch": 2.4390243902439024, |
| "grad_norm": 0.5843226909637451, |
| "learning_rate": 9.975523571008769e-05, |
| "loss": 0.0524, |
| "step": 1200 |
| }, |
| { |
| "epoch": 2.459349593495935, |
| "grad_norm": 0.5159884691238403, |
| "learning_rate": 9.97442210673454e-05, |
| "loss": 0.05, |
| "step": 1210 |
| }, |
| { |
| "epoch": 2.4796747967479673, |
| "grad_norm": 0.5046380162239075, |
| "learning_rate": 9.973296464899988e-05, |
| "loss": 0.0486, |
| "step": 1220 |
| }, |
| { |
| "epoch": 2.5, |
| "grad_norm": 0.3927498757839203, |
| "learning_rate": 9.972146650976154e-05, |
| "loss": 0.0521, |
| "step": 1230 |
| }, |
| { |
| "epoch": 2.5203252032520327, |
| "grad_norm": 0.36474692821502686, |
| "learning_rate": 9.970972670551566e-05, |
| "loss": 0.0549, |
| "step": 1240 |
| }, |
| { |
| "epoch": 2.540650406504065, |
| "grad_norm": 0.35434460639953613, |
| "learning_rate": 9.969774529332212e-05, |
| "loss": 0.0604, |
| "step": 1250 |
| }, |
| { |
| "epoch": 2.5609756097560976, |
| "grad_norm": 0.3699084520339966, |
| "learning_rate": 9.968552233141504e-05, |
| "loss": 0.0547, |
| "step": 1260 |
| }, |
| { |
| "epoch": 2.58130081300813, |
| "grad_norm": 0.3049577474594116, |
| "learning_rate": 9.967305787920264e-05, |
| "loss": 0.0543, |
| "step": 1270 |
| }, |
| { |
| "epoch": 2.6016260162601625, |
| "grad_norm": 0.3516198992729187, |
| "learning_rate": 9.966035199726684e-05, |
| "loss": 0.0508, |
| "step": 1280 |
| }, |
| { |
| "epoch": 2.6219512195121952, |
| "grad_norm": 0.37977465987205505, |
| "learning_rate": 9.9647404747363e-05, |
| "loss": 0.0558, |
| "step": 1290 |
| }, |
| { |
| "epoch": 2.642276422764228, |
| "grad_norm": 0.4862557053565979, |
| "learning_rate": 9.96342161924196e-05, |
| "loss": 0.0527, |
| "step": 1300 |
| }, |
| { |
| "epoch": 2.66260162601626, |
| "grad_norm": 0.4220629334449768, |
| "learning_rate": 9.962078639653797e-05, |
| "loss": 0.0537, |
| "step": 1310 |
| }, |
| { |
| "epoch": 2.682926829268293, |
| "grad_norm": 0.29033005237579346, |
| "learning_rate": 9.960711542499202e-05, |
| "loss": 0.0542, |
| "step": 1320 |
| }, |
| { |
| "epoch": 2.703252032520325, |
| "grad_norm": 0.29148346185684204, |
| "learning_rate": 9.959320334422772e-05, |
| "loss": 0.046, |
| "step": 1330 |
| }, |
| { |
| "epoch": 2.7235772357723578, |
| "grad_norm": 0.260502427816391, |
| "learning_rate": 9.957905022186309e-05, |
| "loss": 0.0527, |
| "step": 1340 |
| }, |
| { |
| "epoch": 2.7439024390243905, |
| "grad_norm": 0.4924025535583496, |
| "learning_rate": 9.956465612668757e-05, |
| "loss": 0.0483, |
| "step": 1350 |
| }, |
| { |
| "epoch": 2.7642276422764227, |
| "grad_norm": 0.4993506968021393, |
| "learning_rate": 9.95500211286619e-05, |
| "loss": 0.0508, |
| "step": 1360 |
| }, |
| { |
| "epoch": 2.7845528455284554, |
| "grad_norm": 0.4157140552997589, |
| "learning_rate": 9.953514529891763e-05, |
| "loss": 0.0448, |
| "step": 1370 |
| }, |
| { |
| "epoch": 2.8048780487804876, |
| "grad_norm": 0.2626568377017975, |
| "learning_rate": 9.952002870975693e-05, |
| "loss": 0.0527, |
| "step": 1380 |
| }, |
| { |
| "epoch": 2.8252032520325203, |
| "grad_norm": 0.3102031946182251, |
| "learning_rate": 9.950467143465207e-05, |
| "loss": 0.0416, |
| "step": 1390 |
| }, |
| { |
| "epoch": 2.845528455284553, |
| "grad_norm": 0.32706791162490845, |
| "learning_rate": 9.94890735482452e-05, |
| "loss": 0.0511, |
| "step": 1400 |
| }, |
| { |
| "epoch": 2.8658536585365852, |
| "grad_norm": 0.2708079516887665, |
| "learning_rate": 9.947323512634788e-05, |
| "loss": 0.0486, |
| "step": 1410 |
| }, |
| { |
| "epoch": 2.886178861788618, |
| "grad_norm": 0.3637838363647461, |
| "learning_rate": 9.945715624594081e-05, |
| "loss": 0.0488, |
| "step": 1420 |
| }, |
| { |
| "epoch": 2.90650406504065, |
| "grad_norm": 0.3679792582988739, |
| "learning_rate": 9.944083698517339e-05, |
| "loss": 0.0476, |
| "step": 1430 |
| }, |
| { |
| "epoch": 2.926829268292683, |
| "grad_norm": 0.29563894867897034, |
| "learning_rate": 9.942427742336334e-05, |
| "loss": 0.0481, |
| "step": 1440 |
| }, |
| { |
| "epoch": 2.9471544715447155, |
| "grad_norm": 0.2425679713487625, |
| "learning_rate": 9.940747764099638e-05, |
| "loss": 0.0456, |
| "step": 1450 |
| }, |
| { |
| "epoch": 2.9674796747967482, |
| "grad_norm": 0.404369980096817, |
| "learning_rate": 9.939043771972574e-05, |
| "loss": 0.0463, |
| "step": 1460 |
| }, |
| { |
| "epoch": 2.9878048780487805, |
| "grad_norm": 0.3223326802253723, |
| "learning_rate": 9.937315774237186e-05, |
| "loss": 0.0468, |
| "step": 1470 |
| }, |
| { |
| "epoch": 3.008130081300813, |
| "grad_norm": 0.35848402976989746, |
| "learning_rate": 9.93556377929219e-05, |
| "loss": 0.0518, |
| "step": 1480 |
| }, |
| { |
| "epoch": 3.0284552845528454, |
| "grad_norm": 0.2589901089668274, |
| "learning_rate": 9.933787795652942e-05, |
| "loss": 0.0437, |
| "step": 1490 |
| }, |
| { |
| "epoch": 3.048780487804878, |
| "grad_norm": 0.4505438506603241, |
| "learning_rate": 9.931987831951386e-05, |
| "loss": 0.054, |
| "step": 1500 |
| }, |
| { |
| "epoch": 3.069105691056911, |
| "grad_norm": 0.41765499114990234, |
| "learning_rate": 9.930163896936027e-05, |
| "loss": 0.0515, |
| "step": 1510 |
| }, |
| { |
| "epoch": 3.089430894308943, |
| "grad_norm": 0.33510351181030273, |
| "learning_rate": 9.92831599947187e-05, |
| "loss": 0.0502, |
| "step": 1520 |
| }, |
| { |
| "epoch": 3.1097560975609757, |
| "grad_norm": 0.35163936018943787, |
| "learning_rate": 9.926444148540393e-05, |
| "loss": 0.0458, |
| "step": 1530 |
| }, |
| { |
| "epoch": 3.130081300813008, |
| "grad_norm": 0.393381267786026, |
| "learning_rate": 9.924548353239495e-05, |
| "loss": 0.053, |
| "step": 1540 |
| }, |
| { |
| "epoch": 3.1504065040650406, |
| "grad_norm": 0.27485236525535583, |
| "learning_rate": 9.922628622783451e-05, |
| "loss": 0.0479, |
| "step": 1550 |
| }, |
| { |
| "epoch": 3.1707317073170733, |
| "grad_norm": 0.23032088577747345, |
| "learning_rate": 9.920684966502878e-05, |
| "loss": 0.0465, |
| "step": 1560 |
| }, |
| { |
| "epoch": 3.1910569105691056, |
| "grad_norm": 0.36205798387527466, |
| "learning_rate": 9.918717393844669e-05, |
| "loss": 0.046, |
| "step": 1570 |
| }, |
| { |
| "epoch": 3.2113821138211383, |
| "grad_norm": 0.30545753240585327, |
| "learning_rate": 9.916725914371969e-05, |
| "loss": 0.0484, |
| "step": 1580 |
| }, |
| { |
| "epoch": 3.231707317073171, |
| "grad_norm": 0.3029427230358124, |
| "learning_rate": 9.914710537764117e-05, |
| "loss": 0.049, |
| "step": 1590 |
| }, |
| { |
| "epoch": 3.252032520325203, |
| "grad_norm": 0.21751540899276733, |
| "learning_rate": 9.912671273816601e-05, |
| "loss": 0.0437, |
| "step": 1600 |
| }, |
| { |
| "epoch": 3.272357723577236, |
| "grad_norm": 0.4366188943386078, |
| "learning_rate": 9.910608132441008e-05, |
| "loss": 0.0442, |
| "step": 1610 |
| }, |
| { |
| "epoch": 3.292682926829268, |
| "grad_norm": 0.25756415724754333, |
| "learning_rate": 9.908521123664981e-05, |
| "loss": 0.0398, |
| "step": 1620 |
| }, |
| { |
| "epoch": 3.313008130081301, |
| "grad_norm": 0.34685221314430237, |
| "learning_rate": 9.906410257632168e-05, |
| "loss": 0.0453, |
| "step": 1630 |
| }, |
| { |
| "epoch": 3.3333333333333335, |
| "grad_norm": 0.3353132903575897, |
| "learning_rate": 9.904275544602169e-05, |
| "loss": 0.0468, |
| "step": 1640 |
| }, |
| { |
| "epoch": 3.3536585365853657, |
| "grad_norm": 0.3785768747329712, |
| "learning_rate": 9.902116994950493e-05, |
| "loss": 0.0468, |
| "step": 1650 |
| }, |
| { |
| "epoch": 3.3739837398373984, |
| "grad_norm": 0.23636843264102936, |
| "learning_rate": 9.899934619168501e-05, |
| "loss": 0.0434, |
| "step": 1660 |
| }, |
| { |
| "epoch": 3.394308943089431, |
| "grad_norm": 0.25570765137672424, |
| "learning_rate": 9.89772842786336e-05, |
| "loss": 0.0476, |
| "step": 1670 |
| }, |
| { |
| "epoch": 3.4146341463414633, |
| "grad_norm": 0.389314740896225, |
| "learning_rate": 9.895498431757989e-05, |
| "loss": 0.0536, |
| "step": 1680 |
| }, |
| { |
| "epoch": 3.434959349593496, |
| "grad_norm": 0.35203060507774353, |
| "learning_rate": 9.893244641691006e-05, |
| "loss": 0.05, |
| "step": 1690 |
| }, |
| { |
| "epoch": 3.4552845528455283, |
| "grad_norm": 0.33974689245224, |
| "learning_rate": 9.890967068616677e-05, |
| "loss": 0.0479, |
| "step": 1700 |
| }, |
| { |
| "epoch": 3.475609756097561, |
| "grad_norm": 0.3649257719516754, |
| "learning_rate": 9.888665723604864e-05, |
| "loss": 0.0493, |
| "step": 1710 |
| }, |
| { |
| "epoch": 3.4959349593495936, |
| "grad_norm": 0.3207753002643585, |
| "learning_rate": 9.886340617840968e-05, |
| "loss": 0.0451, |
| "step": 1720 |
| }, |
| { |
| "epoch": 3.516260162601626, |
| "grad_norm": 0.40109390020370483, |
| "learning_rate": 9.883991762625876e-05, |
| "loss": 0.052, |
| "step": 1730 |
| }, |
| { |
| "epoch": 3.5365853658536586, |
| "grad_norm": 0.3837567865848541, |
| "learning_rate": 9.881619169375908e-05, |
| "loss": 0.0455, |
| "step": 1740 |
| }, |
| { |
| "epoch": 3.556910569105691, |
| "grad_norm": 0.327057421207428, |
| "learning_rate": 9.879222849622758e-05, |
| "loss": 0.0487, |
| "step": 1750 |
| }, |
| { |
| "epoch": 3.5772357723577235, |
| "grad_norm": 0.34808802604675293, |
| "learning_rate": 9.876802815013439e-05, |
| "loss": 0.049, |
| "step": 1760 |
| }, |
| { |
| "epoch": 3.597560975609756, |
| "grad_norm": 0.28107383847236633, |
| "learning_rate": 9.87435907731023e-05, |
| "loss": 0.0449, |
| "step": 1770 |
| }, |
| { |
| "epoch": 3.617886178861789, |
| "grad_norm": 0.41478756070137024, |
| "learning_rate": 9.871891648390614e-05, |
| "loss": 0.0465, |
| "step": 1780 |
| }, |
| { |
| "epoch": 3.638211382113821, |
| "grad_norm": 0.40074285864830017, |
| "learning_rate": 9.869400540247223e-05, |
| "loss": 0.0507, |
| "step": 1790 |
| }, |
| { |
| "epoch": 3.658536585365854, |
| "grad_norm": 0.45639923214912415, |
| "learning_rate": 9.866885764987776e-05, |
| "loss": 0.0483, |
| "step": 1800 |
| }, |
| { |
| "epoch": 3.678861788617886, |
| "grad_norm": 0.36190950870513916, |
| "learning_rate": 9.86434733483503e-05, |
| "loss": 0.0479, |
| "step": 1810 |
| }, |
| { |
| "epoch": 3.6991869918699187, |
| "grad_norm": 0.24315151572227478, |
| "learning_rate": 9.861785262126705e-05, |
| "loss": 0.0432, |
| "step": 1820 |
| }, |
| { |
| "epoch": 3.7195121951219514, |
| "grad_norm": 0.391323983669281, |
| "learning_rate": 9.85919955931544e-05, |
| "loss": 0.0439, |
| "step": 1830 |
| }, |
| { |
| "epoch": 3.7398373983739837, |
| "grad_norm": 0.3237808346748352, |
| "learning_rate": 9.856590238968721e-05, |
| "loss": 0.0488, |
| "step": 1840 |
| }, |
| { |
| "epoch": 3.7601626016260163, |
| "grad_norm": 0.3073022663593292, |
| "learning_rate": 9.853957313768824e-05, |
| "loss": 0.0442, |
| "step": 1850 |
| }, |
| { |
| "epoch": 3.7804878048780486, |
| "grad_norm": 0.30783456563949585, |
| "learning_rate": 9.851300796512755e-05, |
| "loss": 0.0515, |
| "step": 1860 |
| }, |
| { |
| "epoch": 3.8008130081300813, |
| "grad_norm": 0.2766580879688263, |
| "learning_rate": 9.848620700112188e-05, |
| "loss": 0.0484, |
| "step": 1870 |
| }, |
| { |
| "epoch": 3.821138211382114, |
| "grad_norm": 0.4244973063468933, |
| "learning_rate": 9.845917037593396e-05, |
| "loss": 0.0405, |
| "step": 1880 |
| }, |
| { |
| "epoch": 3.841463414634146, |
| "grad_norm": 0.29562023282051086, |
| "learning_rate": 9.843189822097196e-05, |
| "loss": 0.039, |
| "step": 1890 |
| }, |
| { |
| "epoch": 3.861788617886179, |
| "grad_norm": 0.29623448848724365, |
| "learning_rate": 9.84043906687888e-05, |
| "loss": 0.044, |
| "step": 1900 |
| }, |
| { |
| "epoch": 3.882113821138211, |
| "grad_norm": 0.3283941149711609, |
| "learning_rate": 9.837664785308149e-05, |
| "loss": 0.0449, |
| "step": 1910 |
| }, |
| { |
| "epoch": 3.902439024390244, |
| "grad_norm": 0.32358860969543457, |
| "learning_rate": 9.834866990869059e-05, |
| "loss": 0.0447, |
| "step": 1920 |
| }, |
| { |
| "epoch": 3.9227642276422765, |
| "grad_norm": 0.18848779797554016, |
| "learning_rate": 9.832045697159938e-05, |
| "loss": 0.0466, |
| "step": 1930 |
| }, |
| { |
| "epoch": 3.943089430894309, |
| "grad_norm": 0.38725459575653076, |
| "learning_rate": 9.829200917893334e-05, |
| "loss": 0.0446, |
| "step": 1940 |
| }, |
| { |
| "epoch": 3.9634146341463414, |
| "grad_norm": 0.3182893991470337, |
| "learning_rate": 9.826332666895944e-05, |
| "loss": 0.0422, |
| "step": 1950 |
| }, |
| { |
| "epoch": 3.983739837398374, |
| "grad_norm": 0.27023324370384216, |
| "learning_rate": 9.823440958108545e-05, |
| "loss": 0.0374, |
| "step": 1960 |
| }, |
| { |
| "epoch": 4.004065040650406, |
| "grad_norm": 0.47869518399238586, |
| "learning_rate": 9.820525805585927e-05, |
| "loss": 0.0466, |
| "step": 1970 |
| }, |
| { |
| "epoch": 4.024390243902439, |
| "grad_norm": 0.3792267441749573, |
| "learning_rate": 9.81758722349683e-05, |
| "loss": 0.0469, |
| "step": 1980 |
| }, |
| { |
| "epoch": 4.044715447154472, |
| "grad_norm": 0.3714332580566406, |
| "learning_rate": 9.814625226123862e-05, |
| "loss": 0.0398, |
| "step": 1990 |
| }, |
| { |
| "epoch": 4.065040650406504, |
| "grad_norm": 0.35444778203964233, |
| "learning_rate": 9.811639827863449e-05, |
| "loss": 0.0406, |
| "step": 2000 |
| }, |
| { |
| "epoch": 4.085365853658536, |
| "grad_norm": 0.23252159357070923, |
| "learning_rate": 9.808631043225741e-05, |
| "loss": 0.0378, |
| "step": 2010 |
| }, |
| { |
| "epoch": 4.105691056910569, |
| "grad_norm": 0.3677350580692291, |
| "learning_rate": 9.805598886834567e-05, |
| "loss": 0.041, |
| "step": 2020 |
| }, |
| { |
| "epoch": 4.126016260162602, |
| "grad_norm": 0.30878278613090515, |
| "learning_rate": 9.802543373427344e-05, |
| "loss": 0.0427, |
| "step": 2030 |
| }, |
| { |
| "epoch": 4.146341463414634, |
| "grad_norm": 0.24736954271793365, |
| "learning_rate": 9.799464517855018e-05, |
| "loss": 0.0475, |
| "step": 2040 |
| }, |
| { |
| "epoch": 4.166666666666667, |
| "grad_norm": 0.3511386215686798, |
| "learning_rate": 9.79636233508198e-05, |
| "loss": 0.045, |
| "step": 2050 |
| }, |
| { |
| "epoch": 4.186991869918699, |
| "grad_norm": 0.41636860370635986, |
| "learning_rate": 9.793236840186005e-05, |
| "loss": 0.0392, |
| "step": 2060 |
| }, |
| { |
| "epoch": 4.2073170731707314, |
| "grad_norm": 0.4612429141998291, |
| "learning_rate": 9.790088048358175e-05, |
| "loss": 0.0462, |
| "step": 2070 |
| }, |
| { |
| "epoch": 4.227642276422764, |
| "grad_norm": 0.30272865295410156, |
| "learning_rate": 9.786915974902798e-05, |
| "loss": 0.0436, |
| "step": 2080 |
| }, |
| { |
| "epoch": 4.247967479674797, |
| "grad_norm": 0.39128997921943665, |
| "learning_rate": 9.783720635237343e-05, |
| "loss": 0.0366, |
| "step": 2090 |
| }, |
| { |
| "epoch": 4.2682926829268295, |
| "grad_norm": 0.2508282959461212, |
| "learning_rate": 9.780502044892362e-05, |
| "loss": 0.0383, |
| "step": 2100 |
| }, |
| { |
| "epoch": 4.288617886178862, |
| "grad_norm": 0.39258867502212524, |
| "learning_rate": 9.777260219511415e-05, |
| "loss": 0.0387, |
| "step": 2110 |
| }, |
| { |
| "epoch": 4.308943089430894, |
| "grad_norm": 0.25197938084602356, |
| "learning_rate": 9.773995174850989e-05, |
| "loss": 0.0361, |
| "step": 2120 |
| }, |
| { |
| "epoch": 4.329268292682927, |
| "grad_norm": 0.32829299569129944, |
| "learning_rate": 9.770706926780428e-05, |
| "loss": 0.0406, |
| "step": 2130 |
| }, |
| { |
| "epoch": 4.349593495934959, |
| "grad_norm": 0.33921363949775696, |
| "learning_rate": 9.767395491281855e-05, |
| "loss": 0.0389, |
| "step": 2140 |
| }, |
| { |
| "epoch": 4.369918699186992, |
| "grad_norm": 0.23240751028060913, |
| "learning_rate": 9.764060884450086e-05, |
| "loss": 0.0471, |
| "step": 2150 |
| }, |
| { |
| "epoch": 4.390243902439025, |
| "grad_norm": 0.3543383777141571, |
| "learning_rate": 9.76070312249257e-05, |
| "loss": 0.0397, |
| "step": 2160 |
| }, |
| { |
| "epoch": 4.4105691056910565, |
| "grad_norm": 0.2758557200431824, |
| "learning_rate": 9.757322221729283e-05, |
| "loss": 0.0361, |
| "step": 2170 |
| }, |
| { |
| "epoch": 4.430894308943089, |
| "grad_norm": 0.37993648648262024, |
| "learning_rate": 9.753918198592682e-05, |
| "loss": 0.0417, |
| "step": 2180 |
| }, |
| { |
| "epoch": 4.451219512195122, |
| "grad_norm": 0.2048022300004959, |
| "learning_rate": 9.750491069627593e-05, |
| "loss": 0.0374, |
| "step": 2190 |
| }, |
| { |
| "epoch": 4.471544715447155, |
| "grad_norm": 0.3218154013156891, |
| "learning_rate": 9.747040851491149e-05, |
| "loss": 0.037, |
| "step": 2200 |
| }, |
| { |
| "epoch": 4.491869918699187, |
| "grad_norm": 0.20155727863311768, |
| "learning_rate": 9.743567560952711e-05, |
| "loss": 0.0388, |
| "step": 2210 |
| }, |
| { |
| "epoch": 4.512195121951219, |
| "grad_norm": 0.2601895034313202, |
| "learning_rate": 9.740071214893773e-05, |
| "loss": 0.038, |
| "step": 2220 |
| }, |
| { |
| "epoch": 4.532520325203252, |
| "grad_norm": 0.18895843625068665, |
| "learning_rate": 9.736551830307892e-05, |
| "loss": 0.0402, |
| "step": 2230 |
| }, |
| { |
| "epoch": 4.5528455284552845, |
| "grad_norm": 0.25968900322914124, |
| "learning_rate": 9.733009424300597e-05, |
| "loss": 0.036, |
| "step": 2240 |
| }, |
| { |
| "epoch": 4.573170731707317, |
| "grad_norm": 0.22050248086452484, |
| "learning_rate": 9.729444014089314e-05, |
| "loss": 0.0399, |
| "step": 2250 |
| }, |
| { |
| "epoch": 4.59349593495935, |
| "grad_norm": 0.3891421854496002, |
| "learning_rate": 9.725855617003275e-05, |
| "loss": 0.0418, |
| "step": 2260 |
| }, |
| { |
| "epoch": 4.613821138211382, |
| "grad_norm": 0.2956850528717041, |
| "learning_rate": 9.72224425048344e-05, |
| "loss": 0.0378, |
| "step": 2270 |
| }, |
| { |
| "epoch": 4.634146341463414, |
| "grad_norm": 0.2368791252374649, |
| "learning_rate": 9.718609932082405e-05, |
| "loss": 0.041, |
| "step": 2280 |
| }, |
| { |
| "epoch": 4.654471544715447, |
| "grad_norm": 0.3309593200683594, |
| "learning_rate": 9.714952679464323e-05, |
| "loss": 0.0397, |
| "step": 2290 |
| }, |
| { |
| "epoch": 4.67479674796748, |
| "grad_norm": 0.32733872532844543, |
| "learning_rate": 9.711272510404816e-05, |
| "loss": 0.0382, |
| "step": 2300 |
| }, |
| { |
| "epoch": 4.695121951219512, |
| "grad_norm": 0.30426713824272156, |
| "learning_rate": 9.70756944279089e-05, |
| "loss": 0.0443, |
| "step": 2310 |
| }, |
| { |
| "epoch": 4.715447154471545, |
| "grad_norm": 0.23480118811130524, |
| "learning_rate": 9.70384349462084e-05, |
| "loss": 0.0346, |
| "step": 2320 |
| }, |
| { |
| "epoch": 4.735772357723577, |
| "grad_norm": 0.45616215467453003, |
| "learning_rate": 9.700094684004182e-05, |
| "loss": 0.0452, |
| "step": 2330 |
| }, |
| { |
| "epoch": 4.7560975609756095, |
| "grad_norm": 0.3060383200645447, |
| "learning_rate": 9.696323029161535e-05, |
| "loss": 0.0399, |
| "step": 2340 |
| }, |
| { |
| "epoch": 4.776422764227642, |
| "grad_norm": 0.410624235868454, |
| "learning_rate": 9.692528548424567e-05, |
| "loss": 0.0443, |
| "step": 2350 |
| }, |
| { |
| "epoch": 4.796747967479675, |
| "grad_norm": 0.38253292441368103, |
| "learning_rate": 9.688711260235872e-05, |
| "loss": 0.0377, |
| "step": 2360 |
| }, |
| { |
| "epoch": 4.817073170731708, |
| "grad_norm": 0.2503706216812134, |
| "learning_rate": 9.684871183148912e-05, |
| "loss": 0.0356, |
| "step": 2370 |
| }, |
| { |
| "epoch": 4.83739837398374, |
| "grad_norm": 0.2619319260120392, |
| "learning_rate": 9.681008335827898e-05, |
| "loss": 0.0361, |
| "step": 2380 |
| }, |
| { |
| "epoch": 4.857723577235772, |
| "grad_norm": 0.18718576431274414, |
| "learning_rate": 9.677122737047724e-05, |
| "loss": 0.0363, |
| "step": 2390 |
| }, |
| { |
| "epoch": 4.878048780487805, |
| "grad_norm": 0.27881988883018494, |
| "learning_rate": 9.673214405693857e-05, |
| "loss": 0.0361, |
| "step": 2400 |
| }, |
| { |
| "epoch": 4.8983739837398375, |
| "grad_norm": 0.22834157943725586, |
| "learning_rate": 9.669283360762258e-05, |
| "loss": 0.0303, |
| "step": 2410 |
| }, |
| { |
| "epoch": 4.91869918699187, |
| "grad_norm": 0.23578988015651703, |
| "learning_rate": 9.66532962135928e-05, |
| "loss": 0.0348, |
| "step": 2420 |
| }, |
| { |
| "epoch": 4.939024390243903, |
| "grad_norm": 0.3880244195461273, |
| "learning_rate": 9.661353206701582e-05, |
| "loss": 0.0328, |
| "step": 2430 |
| }, |
| { |
| "epoch": 4.959349593495935, |
| "grad_norm": 0.26335278153419495, |
| "learning_rate": 9.657354136116035e-05, |
| "loss": 0.0358, |
| "step": 2440 |
| }, |
| { |
| "epoch": 4.979674796747967, |
| "grad_norm": 0.37027862668037415, |
| "learning_rate": 9.653332429039625e-05, |
| "loss": 0.039, |
| "step": 2450 |
| }, |
| { |
| "epoch": 5.0, |
| "grad_norm": 0.29535719752311707, |
| "learning_rate": 9.649288105019356e-05, |
| "loss": 0.0394, |
| "step": 2460 |
| }, |
| { |
| "epoch": 5.020325203252033, |
| "grad_norm": 0.32351961731910706, |
| "learning_rate": 9.645221183712165e-05, |
| "loss": 0.0358, |
| "step": 2470 |
| }, |
| { |
| "epoch": 5.040650406504065, |
| "grad_norm": 0.2990739345550537, |
| "learning_rate": 9.641131684884817e-05, |
| "loss": 0.0349, |
| "step": 2480 |
| }, |
| { |
| "epoch": 5.060975609756097, |
| "grad_norm": 0.4529745876789093, |
| "learning_rate": 9.637019628413813e-05, |
| "loss": 0.0351, |
| "step": 2490 |
| }, |
| { |
| "epoch": 5.08130081300813, |
| "grad_norm": 0.2561455965042114, |
| "learning_rate": 9.632885034285291e-05, |
| "loss": 0.0368, |
| "step": 2500 |
| }, |
| { |
| "epoch": 5.1016260162601625, |
| "grad_norm": 0.3451431691646576, |
| "learning_rate": 9.628727922594931e-05, |
| "loss": 0.0353, |
| "step": 2510 |
| }, |
| { |
| "epoch": 5.121951219512195, |
| "grad_norm": 0.25922685861587524, |
| "learning_rate": 9.624548313547862e-05, |
| "loss": 0.0351, |
| "step": 2520 |
| }, |
| { |
| "epoch": 5.142276422764228, |
| "grad_norm": 0.29846322536468506, |
| "learning_rate": 9.620346227458547e-05, |
| "loss": 0.0327, |
| "step": 2530 |
| }, |
| { |
| "epoch": 5.16260162601626, |
| "grad_norm": 0.2221139669418335, |
| "learning_rate": 9.616121684750712e-05, |
| "loss": 0.0381, |
| "step": 2540 |
| }, |
| { |
| "epoch": 5.182926829268292, |
| "grad_norm": 0.26518428325653076, |
| "learning_rate": 9.611874705957215e-05, |
| "loss": 0.0381, |
| "step": 2550 |
| }, |
| { |
| "epoch": 5.203252032520325, |
| "grad_norm": 0.20282354950904846, |
| "learning_rate": 9.607605311719972e-05, |
| "loss": 0.0381, |
| "step": 2560 |
| }, |
| { |
| "epoch": 5.223577235772358, |
| "grad_norm": 0.25701841711997986, |
| "learning_rate": 9.603313522789841e-05, |
| "loss": 0.0355, |
| "step": 2570 |
| }, |
| { |
| "epoch": 5.2439024390243905, |
| "grad_norm": 0.33082619309425354, |
| "learning_rate": 9.598999360026529e-05, |
| "loss": 0.036, |
| "step": 2580 |
| }, |
| { |
| "epoch": 5.264227642276423, |
| "grad_norm": 0.23188990354537964, |
| "learning_rate": 9.59466284439849e-05, |
| "loss": 0.0368, |
| "step": 2590 |
| }, |
| { |
| "epoch": 5.284552845528455, |
| "grad_norm": 0.3070012927055359, |
| "learning_rate": 9.590303996982815e-05, |
| "loss": 0.0318, |
| "step": 2600 |
| }, |
| { |
| "epoch": 5.304878048780488, |
| "grad_norm": 0.2157573103904724, |
| "learning_rate": 9.585922838965145e-05, |
| "loss": 0.0311, |
| "step": 2610 |
| }, |
| { |
| "epoch": 5.32520325203252, |
| "grad_norm": 0.16244249045848846, |
| "learning_rate": 9.581519391639549e-05, |
| "loss": 0.0335, |
| "step": 2620 |
| }, |
| { |
| "epoch": 5.345528455284553, |
| "grad_norm": 0.23334026336669922, |
| "learning_rate": 9.577093676408439e-05, |
| "loss": 0.0293, |
| "step": 2630 |
| }, |
| { |
| "epoch": 5.365853658536586, |
| "grad_norm": 0.27338385581970215, |
| "learning_rate": 9.572645714782453e-05, |
| "loss": 0.0307, |
| "step": 2640 |
| }, |
| { |
| "epoch": 5.3861788617886175, |
| "grad_norm": 0.2784765958786011, |
| "learning_rate": 9.568175528380354e-05, |
| "loss": 0.0366, |
| "step": 2650 |
| }, |
| { |
| "epoch": 5.40650406504065, |
| "grad_norm": 0.3815922439098358, |
| "learning_rate": 9.56368313892893e-05, |
| "loss": 0.0393, |
| "step": 2660 |
| }, |
| { |
| "epoch": 5.426829268292683, |
| "grad_norm": 0.431367427110672, |
| "learning_rate": 9.55916856826288e-05, |
| "loss": 0.0372, |
| "step": 2670 |
| }, |
| { |
| "epoch": 5.4471544715447155, |
| "grad_norm": 0.3358304798603058, |
| "learning_rate": 9.554631838324713e-05, |
| "loss": 0.0398, |
| "step": 2680 |
| }, |
| { |
| "epoch": 5.467479674796748, |
| "grad_norm": 0.24542434513568878, |
| "learning_rate": 9.55007297116464e-05, |
| "loss": 0.0346, |
| "step": 2690 |
| }, |
| { |
| "epoch": 5.487804878048781, |
| "grad_norm": 0.35996758937835693, |
| "learning_rate": 9.545491988940472e-05, |
| "loss": 0.0389, |
| "step": 2700 |
| }, |
| { |
| "epoch": 5.508130081300813, |
| "grad_norm": 0.34098583459854126, |
| "learning_rate": 9.540888913917501e-05, |
| "loss": 0.0399, |
| "step": 2710 |
| }, |
| { |
| "epoch": 5.528455284552845, |
| "grad_norm": 0.4673711955547333, |
| "learning_rate": 9.536263768468401e-05, |
| "loss": 0.033, |
| "step": 2720 |
| }, |
| { |
| "epoch": 5.548780487804878, |
| "grad_norm": 0.24956433475017548, |
| "learning_rate": 9.531616575073117e-05, |
| "loss": 0.0364, |
| "step": 2730 |
| }, |
| { |
| "epoch": 5.569105691056911, |
| "grad_norm": 0.3244880139827728, |
| "learning_rate": 9.526947356318754e-05, |
| "loss": 0.0365, |
| "step": 2740 |
| }, |
| { |
| "epoch": 5.5894308943089435, |
| "grad_norm": 0.28384968638420105, |
| "learning_rate": 9.52225613489947e-05, |
| "loss": 0.0331, |
| "step": 2750 |
| }, |
| { |
| "epoch": 5.609756097560975, |
| "grad_norm": 0.20069807767868042, |
| "learning_rate": 9.517542933616365e-05, |
| "loss": 0.0335, |
| "step": 2760 |
| }, |
| { |
| "epoch": 5.630081300813008, |
| "grad_norm": 0.28787484765052795, |
| "learning_rate": 9.512807775377366e-05, |
| "loss": 0.0376, |
| "step": 2770 |
| }, |
| { |
| "epoch": 5.650406504065041, |
| "grad_norm": 0.3660148084163666, |
| "learning_rate": 9.508050683197121e-05, |
| "loss": 0.0411, |
| "step": 2780 |
| }, |
| { |
| "epoch": 5.670731707317073, |
| "grad_norm": 0.2562386095523834, |
| "learning_rate": 9.503271680196888e-05, |
| "loss": 0.0394, |
| "step": 2790 |
| }, |
| { |
| "epoch": 5.691056910569106, |
| "grad_norm": 0.3705272674560547, |
| "learning_rate": 9.498470789604413e-05, |
| "loss": 0.0393, |
| "step": 2800 |
| }, |
| { |
| "epoch": 5.711382113821138, |
| "grad_norm": 0.23900169134140015, |
| "learning_rate": 9.49364803475383e-05, |
| "loss": 0.0362, |
| "step": 2810 |
| }, |
| { |
| "epoch": 5.7317073170731705, |
| "grad_norm": 0.3208228349685669, |
| "learning_rate": 9.48880343908554e-05, |
| "loss": 0.0334, |
| "step": 2820 |
| }, |
| { |
| "epoch": 5.752032520325203, |
| "grad_norm": 0.22293923795223236, |
| "learning_rate": 9.4839370261461e-05, |
| "loss": 0.0334, |
| "step": 2830 |
| }, |
| { |
| "epoch": 5.772357723577236, |
| "grad_norm": 0.24014882743358612, |
| "learning_rate": 9.479048819588098e-05, |
| "loss": 0.0375, |
| "step": 2840 |
| }, |
| { |
| "epoch": 5.7926829268292686, |
| "grad_norm": 0.2373969554901123, |
| "learning_rate": 9.474138843170063e-05, |
| "loss": 0.0361, |
| "step": 2850 |
| }, |
| { |
| "epoch": 5.8130081300813, |
| "grad_norm": 0.22191618382930756, |
| "learning_rate": 9.46920712075632e-05, |
| "loss": 0.0327, |
| "step": 2860 |
| }, |
| { |
| "epoch": 5.833333333333333, |
| "grad_norm": 0.21625275909900665, |
| "learning_rate": 9.464253676316893e-05, |
| "loss": 0.03, |
| "step": 2870 |
| }, |
| { |
| "epoch": 5.853658536585366, |
| "grad_norm": 0.21678130328655243, |
| "learning_rate": 9.459278533927384e-05, |
| "loss": 0.0314, |
| "step": 2880 |
| }, |
| { |
| "epoch": 5.873983739837398, |
| "grad_norm": 0.26047956943511963, |
| "learning_rate": 9.454281717768854e-05, |
| "loss": 0.0329, |
| "step": 2890 |
| }, |
| { |
| "epoch": 5.894308943089431, |
| "grad_norm": 0.24275684356689453, |
| "learning_rate": 9.449263252127708e-05, |
| "loss": 0.0352, |
| "step": 2900 |
| }, |
| { |
| "epoch": 5.914634146341464, |
| "grad_norm": 0.3038444519042969, |
| "learning_rate": 9.444223161395573e-05, |
| "loss": 0.0405, |
| "step": 2910 |
| }, |
| { |
| "epoch": 5.934959349593496, |
| "grad_norm": 0.21918287873268127, |
| "learning_rate": 9.439161470069184e-05, |
| "loss": 0.0322, |
| "step": 2920 |
| }, |
| { |
| "epoch": 5.955284552845528, |
| "grad_norm": 0.3447254002094269, |
| "learning_rate": 9.43407820275026e-05, |
| "loss": 0.0302, |
| "step": 2930 |
| }, |
| { |
| "epoch": 5.975609756097561, |
| "grad_norm": 0.23503296077251434, |
| "learning_rate": 9.428973384145396e-05, |
| "loss": 0.0322, |
| "step": 2940 |
| }, |
| { |
| "epoch": 5.995934959349594, |
| "grad_norm": 0.21576885879039764, |
| "learning_rate": 9.423847039065922e-05, |
| "loss": 0.0353, |
| "step": 2950 |
| }, |
| { |
| "epoch": 6.016260162601626, |
| "grad_norm": 0.23675313591957092, |
| "learning_rate": 9.418699192427805e-05, |
| "loss": 0.0378, |
| "step": 2960 |
| }, |
| { |
| "epoch": 6.036585365853658, |
| "grad_norm": 0.16742359101772308, |
| "learning_rate": 9.41352986925151e-05, |
| "loss": 0.0372, |
| "step": 2970 |
| }, |
| { |
| "epoch": 6.056910569105691, |
| "grad_norm": 0.21645894646644592, |
| "learning_rate": 9.408339094661895e-05, |
| "loss": 0.0329, |
| "step": 2980 |
| }, |
| { |
| "epoch": 6.0772357723577235, |
| "grad_norm": 0.2730005383491516, |
| "learning_rate": 9.40312689388807e-05, |
| "loss": 0.0348, |
| "step": 2990 |
| }, |
| { |
| "epoch": 6.097560975609756, |
| "grad_norm": 0.2974849343299866, |
| "learning_rate": 9.397893292263292e-05, |
| "loss": 0.0351, |
| "step": 3000 |
| }, |
| { |
| "epoch": 6.117886178861789, |
| "grad_norm": 0.2710169851779938, |
| "learning_rate": 9.392638315224829e-05, |
| "loss": 0.0342, |
| "step": 3010 |
| }, |
| { |
| "epoch": 6.138211382113822, |
| "grad_norm": 0.2089296132326126, |
| "learning_rate": 9.387361988313846e-05, |
| "loss": 0.0388, |
| "step": 3020 |
| }, |
| { |
| "epoch": 6.158536585365853, |
| "grad_norm": 0.2190776765346527, |
| "learning_rate": 9.38206433717527e-05, |
| "loss": 0.0313, |
| "step": 3030 |
| }, |
| { |
| "epoch": 6.178861788617886, |
| "grad_norm": 0.29918304085731506, |
| "learning_rate": 9.376745387557681e-05, |
| "loss": 0.0289, |
| "step": 3040 |
| }, |
| { |
| "epoch": 6.199186991869919, |
| "grad_norm": 0.39586132764816284, |
| "learning_rate": 9.371405165313169e-05, |
| "loss": 0.0392, |
| "step": 3050 |
| }, |
| { |
| "epoch": 6.219512195121951, |
| "grad_norm": 0.2960090935230255, |
| "learning_rate": 9.366043696397222e-05, |
| "loss": 0.0374, |
| "step": 3060 |
| }, |
| { |
| "epoch": 6.239837398373984, |
| "grad_norm": 0.2631392478942871, |
| "learning_rate": 9.360661006868592e-05, |
| "loss": 0.0336, |
| "step": 3070 |
| }, |
| { |
| "epoch": 6.260162601626016, |
| "grad_norm": 0.2667069137096405, |
| "learning_rate": 9.355257122889173e-05, |
| "loss": 0.0308, |
| "step": 3080 |
| }, |
| { |
| "epoch": 6.280487804878049, |
| "grad_norm": 0.1516672670841217, |
| "learning_rate": 9.349832070723871e-05, |
| "loss": 0.0316, |
| "step": 3090 |
| }, |
| { |
| "epoch": 6.300813008130081, |
| "grad_norm": 0.22945436835289001, |
| "learning_rate": 9.34438587674048e-05, |
| "loss": 0.0329, |
| "step": 3100 |
| }, |
| { |
| "epoch": 6.321138211382114, |
| "grad_norm": 0.19599388539791107, |
| "learning_rate": 9.338918567409545e-05, |
| "loss": 0.0349, |
| "step": 3110 |
| }, |
| { |
| "epoch": 6.341463414634147, |
| "grad_norm": 0.19596216082572937, |
| "learning_rate": 9.333430169304247e-05, |
| "loss": 0.0285, |
| "step": 3120 |
| }, |
| { |
| "epoch": 6.361788617886178, |
| "grad_norm": 0.3172271251678467, |
| "learning_rate": 9.327920709100259e-05, |
| "loss": 0.035, |
| "step": 3130 |
| }, |
| { |
| "epoch": 6.382113821138211, |
| "grad_norm": 0.29143592715263367, |
| "learning_rate": 9.322390213575631e-05, |
| "loss": 0.0339, |
| "step": 3140 |
| }, |
| { |
| "epoch": 6.402439024390244, |
| "grad_norm": 0.21700842678546906, |
| "learning_rate": 9.316838709610648e-05, |
| "loss": 0.0307, |
| "step": 3150 |
| }, |
| { |
| "epoch": 6.4227642276422765, |
| "grad_norm": 0.33240920305252075, |
| "learning_rate": 9.311266224187706e-05, |
| "loss": 0.0317, |
| "step": 3160 |
| }, |
| { |
| "epoch": 6.443089430894309, |
| "grad_norm": 0.25290894508361816, |
| "learning_rate": 9.305672784391175e-05, |
| "loss": 0.0333, |
| "step": 3170 |
| }, |
| { |
| "epoch": 6.463414634146342, |
| "grad_norm": 0.30824029445648193, |
| "learning_rate": 9.300058417407276e-05, |
| "loss": 0.0284, |
| "step": 3180 |
| }, |
| { |
| "epoch": 6.483739837398374, |
| "grad_norm": 0.24259643256664276, |
| "learning_rate": 9.29442315052394e-05, |
| "loss": 0.0306, |
| "step": 3190 |
| }, |
| { |
| "epoch": 6.504065040650406, |
| "grad_norm": 0.2715694010257721, |
| "learning_rate": 9.288767011130684e-05, |
| "loss": 0.0372, |
| "step": 3200 |
| }, |
| { |
| "epoch": 6.524390243902439, |
| "grad_norm": 0.22602279484272003, |
| "learning_rate": 9.283090026718466e-05, |
| "loss": 0.0332, |
| "step": 3210 |
| }, |
| { |
| "epoch": 6.544715447154472, |
| "grad_norm": 0.24035237729549408, |
| "learning_rate": 9.277392224879568e-05, |
| "loss": 0.0301, |
| "step": 3220 |
| }, |
| { |
| "epoch": 6.565040650406504, |
| "grad_norm": 0.2930043637752533, |
| "learning_rate": 9.271673633307445e-05, |
| "loss": 0.0343, |
| "step": 3230 |
| }, |
| { |
| "epoch": 6.585365853658536, |
| "grad_norm": 0.27956822514533997, |
| "learning_rate": 9.265934279796602e-05, |
| "loss": 0.0266, |
| "step": 3240 |
| }, |
| { |
| "epoch": 6.605691056910569, |
| "grad_norm": 0.2060442715883255, |
| "learning_rate": 9.260174192242453e-05, |
| "loss": 0.0282, |
| "step": 3250 |
| }, |
| { |
| "epoch": 6.626016260162602, |
| "grad_norm": 0.18642476201057434, |
| "learning_rate": 9.254393398641185e-05, |
| "loss": 0.0384, |
| "step": 3260 |
| }, |
| { |
| "epoch": 6.646341463414634, |
| "grad_norm": 0.28357410430908203, |
| "learning_rate": 9.248591927089628e-05, |
| "loss": 0.0362, |
| "step": 3270 |
| }, |
| { |
| "epoch": 6.666666666666667, |
| "grad_norm": 0.23892942070960999, |
| "learning_rate": 9.242769805785115e-05, |
| "loss": 0.032, |
| "step": 3280 |
| }, |
| { |
| "epoch": 6.6869918699187, |
| "grad_norm": 0.3044309616088867, |
| "learning_rate": 9.236927063025342e-05, |
| "loss": 0.0327, |
| "step": 3290 |
| }, |
| { |
| "epoch": 6.7073170731707314, |
| "grad_norm": 0.300341933965683, |
| "learning_rate": 9.231063727208234e-05, |
| "loss": 0.03, |
| "step": 3300 |
| }, |
| { |
| "epoch": 6.727642276422764, |
| "grad_norm": 0.32395273447036743, |
| "learning_rate": 9.225179826831807e-05, |
| "loss": 0.0282, |
| "step": 3310 |
| }, |
| { |
| "epoch": 6.747967479674797, |
| "grad_norm": 0.2567685842514038, |
| "learning_rate": 9.219275390494024e-05, |
| "loss": 0.0302, |
| "step": 3320 |
| }, |
| { |
| "epoch": 6.7682926829268295, |
| "grad_norm": 0.2030506134033203, |
| "learning_rate": 9.213350446892668e-05, |
| "loss": 0.0318, |
| "step": 3330 |
| }, |
| { |
| "epoch": 6.788617886178862, |
| "grad_norm": 0.22847704589366913, |
| "learning_rate": 9.207405024825186e-05, |
| "loss": 0.0311, |
| "step": 3340 |
| }, |
| { |
| "epoch": 6.808943089430894, |
| "grad_norm": 0.18336626887321472, |
| "learning_rate": 9.201439153188569e-05, |
| "loss": 0.032, |
| "step": 3350 |
| }, |
| { |
| "epoch": 6.829268292682927, |
| "grad_norm": 0.21650265157222748, |
| "learning_rate": 9.19545286097919e-05, |
| "loss": 0.0271, |
| "step": 3360 |
| }, |
| { |
| "epoch": 6.849593495934959, |
| "grad_norm": 0.2715393304824829, |
| "learning_rate": 9.189446177292679e-05, |
| "loss": 0.042, |
| "step": 3370 |
| }, |
| { |
| "epoch": 6.869918699186992, |
| "grad_norm": 0.3490676283836365, |
| "learning_rate": 9.183419131323778e-05, |
| "loss": 0.0332, |
| "step": 3380 |
| }, |
| { |
| "epoch": 6.890243902439025, |
| "grad_norm": 0.22679398953914642, |
| "learning_rate": 9.177371752366191e-05, |
| "loss": 0.0354, |
| "step": 3390 |
| }, |
| { |
| "epoch": 6.9105691056910565, |
| "grad_norm": 0.22772999107837677, |
| "learning_rate": 9.171304069812454e-05, |
| "loss": 0.0338, |
| "step": 3400 |
| }, |
| { |
| "epoch": 6.930894308943089, |
| "grad_norm": 0.21342279016971588, |
| "learning_rate": 9.165216113153782e-05, |
| "loss": 0.0342, |
| "step": 3410 |
| }, |
| { |
| "epoch": 6.951219512195122, |
| "grad_norm": 0.22726966440677643, |
| "learning_rate": 9.159107911979936e-05, |
| "loss": 0.0315, |
| "step": 3420 |
| }, |
| { |
| "epoch": 6.971544715447155, |
| "grad_norm": 0.2902209758758545, |
| "learning_rate": 9.152979495979063e-05, |
| "loss": 0.0303, |
| "step": 3430 |
| }, |
| { |
| "epoch": 6.991869918699187, |
| "grad_norm": 0.24953265488147736, |
| "learning_rate": 9.146830894937571e-05, |
| "loss": 0.0315, |
| "step": 3440 |
| }, |
| { |
| "epoch": 7.012195121951219, |
| "grad_norm": 0.21846653521060944, |
| "learning_rate": 9.140662138739969e-05, |
| "loss": 0.0263, |
| "step": 3450 |
| }, |
| { |
| "epoch": 7.032520325203252, |
| "grad_norm": 0.212110698223114, |
| "learning_rate": 9.134473257368732e-05, |
| "loss": 0.0301, |
| "step": 3460 |
| }, |
| { |
| "epoch": 7.0528455284552845, |
| "grad_norm": 0.2702498733997345, |
| "learning_rate": 9.128264280904145e-05, |
| "loss": 0.0255, |
| "step": 3470 |
| }, |
| { |
| "epoch": 7.073170731707317, |
| "grad_norm": 0.30093133449554443, |
| "learning_rate": 9.122035239524169e-05, |
| "loss": 0.0349, |
| "step": 3480 |
| }, |
| { |
| "epoch": 7.09349593495935, |
| "grad_norm": 0.2671796977519989, |
| "learning_rate": 9.115786163504285e-05, |
| "loss": 0.026, |
| "step": 3490 |
| }, |
| { |
| "epoch": 7.1138211382113825, |
| "grad_norm": 0.25653183460235596, |
| "learning_rate": 9.10951708321735e-05, |
| "loss": 0.0346, |
| "step": 3500 |
| }, |
| { |
| "epoch": 7.134146341463414, |
| "grad_norm": 0.3087276518344879, |
| "learning_rate": 9.10322802913345e-05, |
| "loss": 0.032, |
| "step": 3510 |
| }, |
| { |
| "epoch": 7.154471544715447, |
| "grad_norm": 0.21461734175682068, |
| "learning_rate": 9.096919031819751e-05, |
| "loss": 0.0317, |
| "step": 3520 |
| }, |
| { |
| "epoch": 7.17479674796748, |
| "grad_norm": 0.18722882866859436, |
| "learning_rate": 9.090590121940348e-05, |
| "loss": 0.0262, |
| "step": 3530 |
| }, |
| { |
| "epoch": 7.195121951219512, |
| "grad_norm": 0.2623171806335449, |
| "learning_rate": 9.084241330256121e-05, |
| "loss": 0.0329, |
| "step": 3540 |
| }, |
| { |
| "epoch": 7.215447154471545, |
| "grad_norm": 0.15801429748535156, |
| "learning_rate": 9.077872687624586e-05, |
| "loss": 0.0314, |
| "step": 3550 |
| }, |
| { |
| "epoch": 7.235772357723577, |
| "grad_norm": 0.3193058371543884, |
| "learning_rate": 9.071484224999735e-05, |
| "loss": 0.0336, |
| "step": 3560 |
| }, |
| { |
| "epoch": 7.2560975609756095, |
| "grad_norm": 0.22909541428089142, |
| "learning_rate": 9.0650759734319e-05, |
| "loss": 0.0324, |
| "step": 3570 |
| }, |
| { |
| "epoch": 7.276422764227642, |
| "grad_norm": 0.2797752320766449, |
| "learning_rate": 9.05864796406759e-05, |
| "loss": 0.0322, |
| "step": 3580 |
| }, |
| { |
| "epoch": 7.296747967479675, |
| "grad_norm": 0.19688351452350616, |
| "learning_rate": 9.052200228149343e-05, |
| "loss": 0.0272, |
| "step": 3590 |
| }, |
| { |
| "epoch": 7.317073170731708, |
| "grad_norm": 0.30110496282577515, |
| "learning_rate": 9.04573279701558e-05, |
| "loss": 0.0308, |
| "step": 3600 |
| }, |
| { |
| "epoch": 7.33739837398374, |
| "grad_norm": 0.19201114773750305, |
| "learning_rate": 9.039245702100448e-05, |
| "loss": 0.0257, |
| "step": 3610 |
| }, |
| { |
| "epoch": 7.357723577235772, |
| "grad_norm": 0.2802717387676239, |
| "learning_rate": 9.032738974933664e-05, |
| "loss": 0.0289, |
| "step": 3620 |
| }, |
| { |
| "epoch": 7.378048780487805, |
| "grad_norm": 0.2547508180141449, |
| "learning_rate": 9.026212647140365e-05, |
| "loss": 0.0285, |
| "step": 3630 |
| }, |
| { |
| "epoch": 7.3983739837398375, |
| "grad_norm": 0.34926745295524597, |
| "learning_rate": 9.019666750440956e-05, |
| "loss": 0.0263, |
| "step": 3640 |
| }, |
| { |
| "epoch": 7.41869918699187, |
| "grad_norm": 0.1898447424173355, |
| "learning_rate": 9.013101316650956e-05, |
| "loss": 0.0351, |
| "step": 3650 |
| }, |
| { |
| "epoch": 7.439024390243903, |
| "grad_norm": 0.33788618445396423, |
| "learning_rate": 9.00651637768084e-05, |
| "loss": 0.0243, |
| "step": 3660 |
| }, |
| { |
| "epoch": 7.459349593495935, |
| "grad_norm": 0.2711202800273895, |
| "learning_rate": 8.999911965535885e-05, |
| "loss": 0.0289, |
| "step": 3670 |
| }, |
| { |
| "epoch": 7.479674796747967, |
| "grad_norm": 0.22120773792266846, |
| "learning_rate": 8.993288112316014e-05, |
| "loss": 0.0313, |
| "step": 3680 |
| }, |
| { |
| "epoch": 7.5, |
| "grad_norm": 0.17247223854064941, |
| "learning_rate": 8.986644850215644e-05, |
| "loss": 0.0317, |
| "step": 3690 |
| }, |
| { |
| "epoch": 7.520325203252033, |
| "grad_norm": 0.2290961742401123, |
| "learning_rate": 8.979982211523523e-05, |
| "loss": 0.0308, |
| "step": 3700 |
| }, |
| { |
| "epoch": 7.540650406504065, |
| "grad_norm": 0.2302488088607788, |
| "learning_rate": 8.97330022862258e-05, |
| "loss": 0.0375, |
| "step": 3710 |
| }, |
| { |
| "epoch": 7.560975609756097, |
| "grad_norm": 0.2837287485599518, |
| "learning_rate": 8.96659893398976e-05, |
| "loss": 0.0364, |
| "step": 3720 |
| }, |
| { |
| "epoch": 7.58130081300813, |
| "grad_norm": 0.19260552525520325, |
| "learning_rate": 8.959878360195876e-05, |
| "loss": 0.0254, |
| "step": 3730 |
| }, |
| { |
| "epoch": 7.6016260162601625, |
| "grad_norm": 0.1930834949016571, |
| "learning_rate": 8.953138539905438e-05, |
| "loss": 0.0306, |
| "step": 3740 |
| }, |
| { |
| "epoch": 7.621951219512195, |
| "grad_norm": 0.2278723120689392, |
| "learning_rate": 8.946379505876506e-05, |
| "loss": 0.0325, |
| "step": 3750 |
| }, |
| { |
| "epoch": 7.642276422764228, |
| "grad_norm": 0.3849963843822479, |
| "learning_rate": 8.939601290960527e-05, |
| "loss": 0.0296, |
| "step": 3760 |
| }, |
| { |
| "epoch": 7.66260162601626, |
| "grad_norm": 0.34938275814056396, |
| "learning_rate": 8.932803928102167e-05, |
| "loss": 0.0415, |
| "step": 3770 |
| }, |
| { |
| "epoch": 7.682926829268292, |
| "grad_norm": 0.2133302241563797, |
| "learning_rate": 8.925987450339168e-05, |
| "loss": 0.0309, |
| "step": 3780 |
| }, |
| { |
| "epoch": 7.703252032520325, |
| "grad_norm": 0.1982925832271576, |
| "learning_rate": 8.919151890802172e-05, |
| "loss": 0.0261, |
| "step": 3790 |
| }, |
| { |
| "epoch": 7.723577235772358, |
| "grad_norm": 0.21407748758792877, |
| "learning_rate": 8.912297282714564e-05, |
| "loss": 0.0315, |
| "step": 3800 |
| }, |
| { |
| "epoch": 7.7439024390243905, |
| "grad_norm": 0.2122614085674286, |
| "learning_rate": 8.905423659392316e-05, |
| "loss": 0.0268, |
| "step": 3810 |
| }, |
| { |
| "epoch": 7.764227642276423, |
| "grad_norm": 0.2770064175128937, |
| "learning_rate": 8.898531054243822e-05, |
| "loss": 0.0331, |
| "step": 3820 |
| }, |
| { |
| "epoch": 7.784552845528455, |
| "grad_norm": 0.25917738676071167, |
| "learning_rate": 8.891619500769729e-05, |
| "loss": 0.0339, |
| "step": 3830 |
| }, |
| { |
| "epoch": 7.804878048780488, |
| "grad_norm": 0.20752471685409546, |
| "learning_rate": 8.884689032562785e-05, |
| "loss": 0.0282, |
| "step": 3840 |
| }, |
| { |
| "epoch": 7.82520325203252, |
| "grad_norm": 0.1928926408290863, |
| "learning_rate": 8.87773968330767e-05, |
| "loss": 0.0257, |
| "step": 3850 |
| }, |
| { |
| "epoch": 7.845528455284553, |
| "grad_norm": 0.18641775846481323, |
| "learning_rate": 8.870771486780832e-05, |
| "loss": 0.0288, |
| "step": 3860 |
| }, |
| { |
| "epoch": 7.865853658536586, |
| "grad_norm": 0.2024519294500351, |
| "learning_rate": 8.863784476850322e-05, |
| "loss": 0.0264, |
| "step": 3870 |
| }, |
| { |
| "epoch": 7.886178861788618, |
| "grad_norm": 0.37722542881965637, |
| "learning_rate": 8.856778687475635e-05, |
| "loss": 0.0265, |
| "step": 3880 |
| }, |
| { |
| "epoch": 7.90650406504065, |
| "grad_norm": 0.20428268611431122, |
| "learning_rate": 8.849754152707541e-05, |
| "loss": 0.0269, |
| "step": 3890 |
| }, |
| { |
| "epoch": 7.926829268292683, |
| "grad_norm": 0.20872460305690765, |
| "learning_rate": 8.842710906687916e-05, |
| "loss": 0.0294, |
| "step": 3900 |
| }, |
| { |
| "epoch": 7.9471544715447155, |
| "grad_norm": 0.24264350533485413, |
| "learning_rate": 8.83564898364958e-05, |
| "loss": 0.0302, |
| "step": 3910 |
| }, |
| { |
| "epoch": 7.967479674796748, |
| "grad_norm": 0.3859758973121643, |
| "learning_rate": 8.828568417916136e-05, |
| "loss": 0.0294, |
| "step": 3920 |
| }, |
| { |
| "epoch": 7.987804878048781, |
| "grad_norm": 0.4600236415863037, |
| "learning_rate": 8.821469243901794e-05, |
| "loss": 0.0316, |
| "step": 3930 |
| }, |
| { |
| "epoch": 8.008130081300813, |
| "grad_norm": 0.30919063091278076, |
| "learning_rate": 8.814351496111201e-05, |
| "loss": 0.0335, |
| "step": 3940 |
| }, |
| { |
| "epoch": 8.028455284552846, |
| "grad_norm": 0.22191697359085083, |
| "learning_rate": 8.807215209139293e-05, |
| "loss": 0.0315, |
| "step": 3950 |
| }, |
| { |
| "epoch": 8.048780487804878, |
| "grad_norm": 0.30846142768859863, |
| "learning_rate": 8.8000604176711e-05, |
| "loss": 0.0311, |
| "step": 3960 |
| }, |
| { |
| "epoch": 8.06910569105691, |
| "grad_norm": 0.195384681224823, |
| "learning_rate": 8.792887156481598e-05, |
| "loss": 0.0313, |
| "step": 3970 |
| }, |
| { |
| "epoch": 8.089430894308943, |
| "grad_norm": 0.19313912093639374, |
| "learning_rate": 8.785695460435534e-05, |
| "loss": 0.0263, |
| "step": 3980 |
| }, |
| { |
| "epoch": 8.109756097560975, |
| "grad_norm": 0.17431680858135223, |
| "learning_rate": 8.778485364487248e-05, |
| "loss": 0.0247, |
| "step": 3990 |
| }, |
| { |
| "epoch": 8.130081300813009, |
| "grad_norm": 0.3186069428920746, |
| "learning_rate": 8.771256903680519e-05, |
| "loss": 0.031, |
| "step": 4000 |
| }, |
| { |
| "epoch": 8.15040650406504, |
| "grad_norm": 0.31058987975120544, |
| "learning_rate": 8.764010113148382e-05, |
| "loss": 0.0306, |
| "step": 4010 |
| }, |
| { |
| "epoch": 8.170731707317072, |
| "grad_norm": 0.18163661658763885, |
| "learning_rate": 8.756745028112959e-05, |
| "loss": 0.0281, |
| "step": 4020 |
| }, |
| { |
| "epoch": 8.191056910569106, |
| "grad_norm": 0.22458802163600922, |
| "learning_rate": 8.749461683885296e-05, |
| "loss": 0.0312, |
| "step": 4030 |
| }, |
| { |
| "epoch": 8.211382113821138, |
| "grad_norm": 0.2205217480659485, |
| "learning_rate": 8.742160115865179e-05, |
| "loss": 0.0318, |
| "step": 4040 |
| }, |
| { |
| "epoch": 8.231707317073171, |
| "grad_norm": 0.23504704236984253, |
| "learning_rate": 8.734840359540974e-05, |
| "loss": 0.0298, |
| "step": 4050 |
| }, |
| { |
| "epoch": 8.252032520325203, |
| "grad_norm": 0.2698604166507721, |
| "learning_rate": 8.727502450489446e-05, |
| "loss": 0.0279, |
| "step": 4060 |
| }, |
| { |
| "epoch": 8.272357723577235, |
| "grad_norm": 0.25933247804641724, |
| "learning_rate": 8.720146424375591e-05, |
| "loss": 0.0292, |
| "step": 4070 |
| }, |
| { |
| "epoch": 8.292682926829269, |
| "grad_norm": 0.2248280793428421, |
| "learning_rate": 8.712772316952458e-05, |
| "loss": 0.021, |
| "step": 4080 |
| }, |
| { |
| "epoch": 8.3130081300813, |
| "grad_norm": 0.14027050137519836, |
| "learning_rate": 8.705380164060982e-05, |
| "loss": 0.0257, |
| "step": 4090 |
| }, |
| { |
| "epoch": 8.333333333333334, |
| "grad_norm": 0.17492854595184326, |
| "learning_rate": 8.697970001629799e-05, |
| "loss": 0.0309, |
| "step": 4100 |
| }, |
| { |
| "epoch": 8.353658536585366, |
| "grad_norm": 0.20115551352500916, |
| "learning_rate": 8.690541865675084e-05, |
| "loss": 0.0272, |
| "step": 4110 |
| }, |
| { |
| "epoch": 8.373983739837398, |
| "grad_norm": 0.2150316685438156, |
| "learning_rate": 8.68309579230037e-05, |
| "loss": 0.0267, |
| "step": 4120 |
| }, |
| { |
| "epoch": 8.394308943089431, |
| "grad_norm": 0.26918819546699524, |
| "learning_rate": 8.675631817696372e-05, |
| "loss": 0.0248, |
| "step": 4130 |
| }, |
| { |
| "epoch": 8.414634146341463, |
| "grad_norm": 0.26900121569633484, |
| "learning_rate": 8.668149978140808e-05, |
| "loss": 0.03, |
| "step": 4140 |
| }, |
| { |
| "epoch": 8.434959349593496, |
| "grad_norm": 0.2637103199958801, |
| "learning_rate": 8.66065030999823e-05, |
| "loss": 0.0251, |
| "step": 4150 |
| }, |
| { |
| "epoch": 8.455284552845528, |
| "grad_norm": 0.19074967503547668, |
| "learning_rate": 8.653132849719845e-05, |
| "loss": 0.0231, |
| "step": 4160 |
| }, |
| { |
| "epoch": 8.475609756097562, |
| "grad_norm": 0.2522426247596741, |
| "learning_rate": 8.64559763384333e-05, |
| "loss": 0.0297, |
| "step": 4170 |
| }, |
| { |
| "epoch": 8.495934959349594, |
| "grad_norm": 0.2497992217540741, |
| "learning_rate": 8.638044698992669e-05, |
| "loss": 0.0277, |
| "step": 4180 |
| }, |
| { |
| "epoch": 8.516260162601625, |
| "grad_norm": 0.2424316704273224, |
| "learning_rate": 8.630474081877959e-05, |
| "loss": 0.0313, |
| "step": 4190 |
| }, |
| { |
| "epoch": 8.536585365853659, |
| "grad_norm": 0.23851478099822998, |
| "learning_rate": 8.62288581929525e-05, |
| "loss": 0.0267, |
| "step": 4200 |
| }, |
| { |
| "epoch": 8.55691056910569, |
| "grad_norm": 0.2154664844274521, |
| "learning_rate": 8.615279948126343e-05, |
| "loss": 0.028, |
| "step": 4210 |
| }, |
| { |
| "epoch": 8.577235772357724, |
| "grad_norm": 0.19693239033222198, |
| "learning_rate": 8.60765650533863e-05, |
| "loss": 0.0257, |
| "step": 4220 |
| }, |
| { |
| "epoch": 8.597560975609756, |
| "grad_norm": 0.2548753321170807, |
| "learning_rate": 8.60001552798491e-05, |
| "loss": 0.023, |
| "step": 4230 |
| }, |
| { |
| "epoch": 8.617886178861788, |
| "grad_norm": 0.23749999701976776, |
| "learning_rate": 8.592357053203202e-05, |
| "loss": 0.0273, |
| "step": 4240 |
| }, |
| { |
| "epoch": 8.638211382113822, |
| "grad_norm": 0.23216712474822998, |
| "learning_rate": 8.58468111821657e-05, |
| "loss": 0.0284, |
| "step": 4250 |
| }, |
| { |
| "epoch": 8.658536585365853, |
| "grad_norm": 0.17603376507759094, |
| "learning_rate": 8.576987760332943e-05, |
| "loss": 0.0296, |
| "step": 4260 |
| }, |
| { |
| "epoch": 8.678861788617887, |
| "grad_norm": 0.21435929834842682, |
| "learning_rate": 8.56927701694493e-05, |
| "loss": 0.0269, |
| "step": 4270 |
| }, |
| { |
| "epoch": 8.699186991869919, |
| "grad_norm": 0.20830167829990387, |
| "learning_rate": 8.561548925529643e-05, |
| "loss": 0.0257, |
| "step": 4280 |
| }, |
| { |
| "epoch": 8.71951219512195, |
| "grad_norm": 0.17705325782299042, |
| "learning_rate": 8.553803523648506e-05, |
| "loss": 0.0241, |
| "step": 4290 |
| }, |
| { |
| "epoch": 8.739837398373984, |
| "grad_norm": 0.2524889409542084, |
| "learning_rate": 8.546040848947086e-05, |
| "loss": 0.0264, |
| "step": 4300 |
| }, |
| { |
| "epoch": 8.760162601626016, |
| "grad_norm": 0.27627524733543396, |
| "learning_rate": 8.538260939154894e-05, |
| "loss": 0.0268, |
| "step": 4310 |
| }, |
| { |
| "epoch": 8.78048780487805, |
| "grad_norm": 0.20441976189613342, |
| "learning_rate": 8.530463832085218e-05, |
| "loss": 0.0255, |
| "step": 4320 |
| }, |
| { |
| "epoch": 8.800813008130081, |
| "grad_norm": 0.2210913449525833, |
| "learning_rate": 8.522649565634927e-05, |
| "loss": 0.0242, |
| "step": 4330 |
| }, |
| { |
| "epoch": 8.821138211382113, |
| "grad_norm": 0.2106960266828537, |
| "learning_rate": 8.51481817778429e-05, |
| "loss": 0.0224, |
| "step": 4340 |
| }, |
| { |
| "epoch": 8.841463414634147, |
| "grad_norm": 0.17626696825027466, |
| "learning_rate": 8.506969706596797e-05, |
| "loss": 0.0232, |
| "step": 4350 |
| }, |
| { |
| "epoch": 8.861788617886178, |
| "grad_norm": 0.17255190014839172, |
| "learning_rate": 8.499104190218964e-05, |
| "loss": 0.0238, |
| "step": 4360 |
| }, |
| { |
| "epoch": 8.882113821138212, |
| "grad_norm": 0.22408510744571686, |
| "learning_rate": 8.49122166688016e-05, |
| "loss": 0.0233, |
| "step": 4370 |
| }, |
| { |
| "epoch": 8.902439024390244, |
| "grad_norm": 0.14295317232608795, |
| "learning_rate": 8.483322174892404e-05, |
| "loss": 0.0228, |
| "step": 4380 |
| }, |
| { |
| "epoch": 8.922764227642276, |
| "grad_norm": 0.20528960227966309, |
| "learning_rate": 8.475405752650199e-05, |
| "loss": 0.0301, |
| "step": 4390 |
| }, |
| { |
| "epoch": 8.94308943089431, |
| "grad_norm": 0.2917337417602539, |
| "learning_rate": 8.467472438630328e-05, |
| "loss": 0.0309, |
| "step": 4400 |
| }, |
| { |
| "epoch": 8.963414634146341, |
| "grad_norm": 0.2525673806667328, |
| "learning_rate": 8.459522271391682e-05, |
| "loss": 0.0248, |
| "step": 4410 |
| }, |
| { |
| "epoch": 8.983739837398375, |
| "grad_norm": 0.2192648947238922, |
| "learning_rate": 8.451555289575057e-05, |
| "loss": 0.0293, |
| "step": 4420 |
| }, |
| { |
| "epoch": 9.004065040650406, |
| "grad_norm": 0.2079612761735916, |
| "learning_rate": 8.443571531902981e-05, |
| "loss": 0.0287, |
| "step": 4430 |
| }, |
| { |
| "epoch": 9.024390243902438, |
| "grad_norm": 0.27692246437072754, |
| "learning_rate": 8.435571037179512e-05, |
| "loss": 0.0329, |
| "step": 4440 |
| }, |
| { |
| "epoch": 9.044715447154472, |
| "grad_norm": 0.21819686889648438, |
| "learning_rate": 8.427553844290062e-05, |
| "loss": 0.0248, |
| "step": 4450 |
| }, |
| { |
| "epoch": 9.065040650406504, |
| "grad_norm": 0.14863276481628418, |
| "learning_rate": 8.419519992201201e-05, |
| "loss": 0.027, |
| "step": 4460 |
| }, |
| { |
| "epoch": 9.085365853658537, |
| "grad_norm": 0.269767701625824, |
| "learning_rate": 8.411469519960469e-05, |
| "loss": 0.0246, |
| "step": 4470 |
| }, |
| { |
| "epoch": 9.105691056910569, |
| "grad_norm": 0.1592557430267334, |
| "learning_rate": 8.403402466696182e-05, |
| "loss": 0.0242, |
| "step": 4480 |
| }, |
| { |
| "epoch": 9.126016260162602, |
| "grad_norm": 0.21700291335582733, |
| "learning_rate": 8.395318871617255e-05, |
| "loss": 0.0245, |
| "step": 4490 |
| }, |
| { |
| "epoch": 9.146341463414634, |
| "grad_norm": 0.16219979524612427, |
| "learning_rate": 8.387218774012992e-05, |
| "loss": 0.0231, |
| "step": 4500 |
| }, |
| { |
| "epoch": 9.166666666666666, |
| "grad_norm": 0.19992409646511078, |
| "learning_rate": 8.379102213252915e-05, |
| "loss": 0.0294, |
| "step": 4510 |
| }, |
| { |
| "epoch": 9.1869918699187, |
| "grad_norm": 0.22747237980365753, |
| "learning_rate": 8.370969228786556e-05, |
| "loss": 0.024, |
| "step": 4520 |
| }, |
| { |
| "epoch": 9.207317073170731, |
| "grad_norm": 0.1960316151380539, |
| "learning_rate": 8.362819860143275e-05, |
| "loss": 0.0292, |
| "step": 4530 |
| }, |
| { |
| "epoch": 9.227642276422765, |
| "grad_norm": 0.20597191154956818, |
| "learning_rate": 8.354654146932066e-05, |
| "loss": 0.0258, |
| "step": 4540 |
| }, |
| { |
| "epoch": 9.247967479674797, |
| "grad_norm": 0.2013852894306183, |
| "learning_rate": 8.346472128841364e-05, |
| "loss": 0.0211, |
| "step": 4550 |
| }, |
| { |
| "epoch": 9.268292682926829, |
| "grad_norm": 0.20590724050998688, |
| "learning_rate": 8.338273845638848e-05, |
| "loss": 0.0244, |
| "step": 4560 |
| }, |
| { |
| "epoch": 9.288617886178862, |
| "grad_norm": 0.2510227859020233, |
| "learning_rate": 8.330059337171258e-05, |
| "loss": 0.0228, |
| "step": 4570 |
| }, |
| { |
| "epoch": 9.308943089430894, |
| "grad_norm": 0.22839580476284027, |
| "learning_rate": 8.32182864336419e-05, |
| "loss": 0.0247, |
| "step": 4580 |
| }, |
| { |
| "epoch": 9.329268292682928, |
| "grad_norm": 0.2431192696094513, |
| "learning_rate": 8.313581804221908e-05, |
| "loss": 0.0246, |
| "step": 4590 |
| }, |
| { |
| "epoch": 9.34959349593496, |
| "grad_norm": 0.17871959507465363, |
| "learning_rate": 8.305318859827147e-05, |
| "loss": 0.0218, |
| "step": 4600 |
| }, |
| { |
| "epoch": 9.369918699186991, |
| "grad_norm": 0.18869948387145996, |
| "learning_rate": 8.297039850340923e-05, |
| "loss": 0.0286, |
| "step": 4610 |
| }, |
| { |
| "epoch": 9.390243902439025, |
| "grad_norm": 0.24030964076519012, |
| "learning_rate": 8.288744816002331e-05, |
| "loss": 0.0264, |
| "step": 4620 |
| }, |
| { |
| "epoch": 9.410569105691057, |
| "grad_norm": 0.25945621728897095, |
| "learning_rate": 8.280433797128357e-05, |
| "loss": 0.0258, |
| "step": 4630 |
| }, |
| { |
| "epoch": 9.43089430894309, |
| "grad_norm": 0.28410178422927856, |
| "learning_rate": 8.272106834113674e-05, |
| "loss": 0.0264, |
| "step": 4640 |
| }, |
| { |
| "epoch": 9.451219512195122, |
| "grad_norm": 0.26565882563591003, |
| "learning_rate": 8.26376396743045e-05, |
| "loss": 0.0262, |
| "step": 4650 |
| }, |
| { |
| "epoch": 9.471544715447154, |
| "grad_norm": 0.27305731177330017, |
| "learning_rate": 8.25540523762815e-05, |
| "loss": 0.0337, |
| "step": 4660 |
| }, |
| { |
| "epoch": 9.491869918699187, |
| "grad_norm": 0.25521767139434814, |
| "learning_rate": 8.247030685333346e-05, |
| "loss": 0.0273, |
| "step": 4670 |
| }, |
| { |
| "epoch": 9.512195121951219, |
| "grad_norm": 0.22723345458507538, |
| "learning_rate": 8.238640351249503e-05, |
| "loss": 0.0209, |
| "step": 4680 |
| }, |
| { |
| "epoch": 9.532520325203253, |
| "grad_norm": 0.23032833635807037, |
| "learning_rate": 8.2302342761568e-05, |
| "loss": 0.0259, |
| "step": 4690 |
| }, |
| { |
| "epoch": 9.552845528455284, |
| "grad_norm": 0.26375725865364075, |
| "learning_rate": 8.221812500911919e-05, |
| "loss": 0.0295, |
| "step": 4700 |
| }, |
| { |
| "epoch": 9.573170731707316, |
| "grad_norm": 0.1927800178527832, |
| "learning_rate": 8.213375066447853e-05, |
| "loss": 0.0251, |
| "step": 4710 |
| }, |
| { |
| "epoch": 9.59349593495935, |
| "grad_norm": 0.2849136292934418, |
| "learning_rate": 8.204922013773702e-05, |
| "loss": 0.0258, |
| "step": 4720 |
| }, |
| { |
| "epoch": 9.613821138211382, |
| "grad_norm": 0.287185937166214, |
| "learning_rate": 8.196453383974478e-05, |
| "loss": 0.0265, |
| "step": 4730 |
| }, |
| { |
| "epoch": 9.634146341463415, |
| "grad_norm": 0.20117615163326263, |
| "learning_rate": 8.187969218210904e-05, |
| "loss": 0.0245, |
| "step": 4740 |
| }, |
| { |
| "epoch": 9.654471544715447, |
| "grad_norm": 0.2523585557937622, |
| "learning_rate": 8.179469557719213e-05, |
| "loss": 0.0265, |
| "step": 4750 |
| }, |
| { |
| "epoch": 9.67479674796748, |
| "grad_norm": 0.17117203772068024, |
| "learning_rate": 8.170954443810948e-05, |
| "loss": 0.0264, |
| "step": 4760 |
| }, |
| { |
| "epoch": 9.695121951219512, |
| "grad_norm": 0.17058758437633514, |
| "learning_rate": 8.162423917872764e-05, |
| "loss": 0.0277, |
| "step": 4770 |
| }, |
| { |
| "epoch": 9.715447154471544, |
| "grad_norm": 0.19005297124385834, |
| "learning_rate": 8.153878021366217e-05, |
| "loss": 0.0243, |
| "step": 4780 |
| }, |
| { |
| "epoch": 9.735772357723578, |
| "grad_norm": 0.2171137034893036, |
| "learning_rate": 8.14531679582758e-05, |
| "loss": 0.025, |
| "step": 4790 |
| }, |
| { |
| "epoch": 9.75609756097561, |
| "grad_norm": 0.21392473578453064, |
| "learning_rate": 8.136740282867621e-05, |
| "loss": 0.0296, |
| "step": 4800 |
| }, |
| { |
| "epoch": 9.776422764227643, |
| "grad_norm": 0.2003464698791504, |
| "learning_rate": 8.128148524171418e-05, |
| "loss": 0.0252, |
| "step": 4810 |
| }, |
| { |
| "epoch": 9.796747967479675, |
| "grad_norm": 0.17530317604541779, |
| "learning_rate": 8.119541561498146e-05, |
| "loss": 0.0221, |
| "step": 4820 |
| }, |
| { |
| "epoch": 9.817073170731707, |
| "grad_norm": 0.16956830024719238, |
| "learning_rate": 8.110919436680877e-05, |
| "loss": 0.0233, |
| "step": 4830 |
| }, |
| { |
| "epoch": 9.83739837398374, |
| "grad_norm": 0.1363949030637741, |
| "learning_rate": 8.102282191626378e-05, |
| "loss": 0.0305, |
| "step": 4840 |
| }, |
| { |
| "epoch": 9.857723577235772, |
| "grad_norm": 0.1618584543466568, |
| "learning_rate": 8.0936298683149e-05, |
| "loss": 0.0225, |
| "step": 4850 |
| }, |
| { |
| "epoch": 9.878048780487806, |
| "grad_norm": 0.18399667739868164, |
| "learning_rate": 8.084962508799991e-05, |
| "loss": 0.0234, |
| "step": 4860 |
| }, |
| { |
| "epoch": 9.898373983739837, |
| "grad_norm": 0.24996976554393768, |
| "learning_rate": 8.076280155208273e-05, |
| "loss": 0.021, |
| "step": 4870 |
| }, |
| { |
| "epoch": 9.91869918699187, |
| "grad_norm": 0.25863030552864075, |
| "learning_rate": 8.067582849739245e-05, |
| "loss": 0.0288, |
| "step": 4880 |
| }, |
| { |
| "epoch": 9.939024390243903, |
| "grad_norm": 0.22144946455955505, |
| "learning_rate": 8.058870634665079e-05, |
| "loss": 0.0232, |
| "step": 4890 |
| }, |
| { |
| "epoch": 9.959349593495935, |
| "grad_norm": 0.16196279227733612, |
| "learning_rate": 8.050143552330414e-05, |
| "loss": 0.021, |
| "step": 4900 |
| }, |
| { |
| "epoch": 9.979674796747968, |
| "grad_norm": 0.2325410395860672, |
| "learning_rate": 8.041401645152151e-05, |
| "loss": 0.0248, |
| "step": 4910 |
| }, |
| { |
| "epoch": 10.0, |
| "grad_norm": 0.20593298971652985, |
| "learning_rate": 8.032644955619239e-05, |
| "loss": 0.0229, |
| "step": 4920 |
| }, |
| { |
| "epoch": 10.020325203252032, |
| "grad_norm": 0.19367890059947968, |
| "learning_rate": 8.023873526292483e-05, |
| "loss": 0.0256, |
| "step": 4930 |
| }, |
| { |
| "epoch": 10.040650406504065, |
| "grad_norm": 0.2022327035665512, |
| "learning_rate": 8.015087399804322e-05, |
| "loss": 0.0227, |
| "step": 4940 |
| }, |
| { |
| "epoch": 10.060975609756097, |
| "grad_norm": 0.17880947887897491, |
| "learning_rate": 8.006286618858635e-05, |
| "loss": 0.0278, |
| "step": 4950 |
| }, |
| { |
| "epoch": 10.08130081300813, |
| "grad_norm": 0.1883138120174408, |
| "learning_rate": 7.99747122623052e-05, |
| "loss": 0.0208, |
| "step": 4960 |
| }, |
| { |
| "epoch": 10.101626016260163, |
| "grad_norm": 0.22545097768306732, |
| "learning_rate": 7.988641264766097e-05, |
| "loss": 0.0255, |
| "step": 4970 |
| }, |
| { |
| "epoch": 10.121951219512194, |
| "grad_norm": 0.14307765662670135, |
| "learning_rate": 7.9797967773823e-05, |
| "loss": 0.0299, |
| "step": 4980 |
| }, |
| { |
| "epoch": 10.142276422764228, |
| "grad_norm": 0.1591491848230362, |
| "learning_rate": 7.970937807066659e-05, |
| "loss": 0.0255, |
| "step": 4990 |
| }, |
| { |
| "epoch": 10.16260162601626, |
| "grad_norm": 0.26083746552467346, |
| "learning_rate": 7.962064396877098e-05, |
| "loss": 0.0259, |
| "step": 5000 |
| }, |
| { |
| "epoch": 10.182926829268293, |
| "grad_norm": 0.22072488069534302, |
| "learning_rate": 7.953176589941722e-05, |
| "loss": 0.0228, |
| "step": 5010 |
| }, |
| { |
| "epoch": 10.203252032520325, |
| "grad_norm": 0.2204706072807312, |
| "learning_rate": 7.944274429458614e-05, |
| "loss": 0.0237, |
| "step": 5020 |
| }, |
| { |
| "epoch": 10.223577235772357, |
| "grad_norm": 0.3005303740501404, |
| "learning_rate": 7.93535795869562e-05, |
| "loss": 0.0243, |
| "step": 5030 |
| }, |
| { |
| "epoch": 10.24390243902439, |
| "grad_norm": 0.15468287467956543, |
| "learning_rate": 7.926427220990134e-05, |
| "loss": 0.0236, |
| "step": 5040 |
| }, |
| { |
| "epoch": 10.264227642276422, |
| "grad_norm": 0.23432765901088715, |
| "learning_rate": 7.9174822597489e-05, |
| "loss": 0.0231, |
| "step": 5050 |
| }, |
| { |
| "epoch": 10.284552845528456, |
| "grad_norm": 0.212994784116745, |
| "learning_rate": 7.908523118447789e-05, |
| "loss": 0.0213, |
| "step": 5060 |
| }, |
| { |
| "epoch": 10.304878048780488, |
| "grad_norm": 0.2055157721042633, |
| "learning_rate": 7.89954984063159e-05, |
| "loss": 0.0194, |
| "step": 5070 |
| }, |
| { |
| "epoch": 10.32520325203252, |
| "grad_norm": 0.1862349510192871, |
| "learning_rate": 7.890562469913811e-05, |
| "loss": 0.0251, |
| "step": 5080 |
| }, |
| { |
| "epoch": 10.345528455284553, |
| "grad_norm": 0.18946106731891632, |
| "learning_rate": 7.881561049976447e-05, |
| "loss": 0.025, |
| "step": 5090 |
| }, |
| { |
| "epoch": 10.365853658536585, |
| "grad_norm": 0.12222699075937271, |
| "learning_rate": 7.872545624569779e-05, |
| "loss": 0.0273, |
| "step": 5100 |
| }, |
| { |
| "epoch": 10.386178861788618, |
| "grad_norm": 0.19618262350559235, |
| "learning_rate": 7.863516237512164e-05, |
| "loss": 0.0224, |
| "step": 5110 |
| }, |
| { |
| "epoch": 10.40650406504065, |
| "grad_norm": 0.2092956155538559, |
| "learning_rate": 7.854472932689815e-05, |
| "loss": 0.0269, |
| "step": 5120 |
| }, |
| { |
| "epoch": 10.426829268292684, |
| "grad_norm": 0.14048688113689423, |
| "learning_rate": 7.845415754056591e-05, |
| "loss": 0.0195, |
| "step": 5130 |
| }, |
| { |
| "epoch": 10.447154471544716, |
| "grad_norm": 0.24474139511585236, |
| "learning_rate": 7.836344745633783e-05, |
| "loss": 0.021, |
| "step": 5140 |
| }, |
| { |
| "epoch": 10.467479674796747, |
| "grad_norm": 0.20247799158096313, |
| "learning_rate": 7.8272599515099e-05, |
| "loss": 0.0202, |
| "step": 5150 |
| }, |
| { |
| "epoch": 10.487804878048781, |
| "grad_norm": 0.1718481332063675, |
| "learning_rate": 7.818161415840453e-05, |
| "loss": 0.0247, |
| "step": 5160 |
| }, |
| { |
| "epoch": 10.508130081300813, |
| "grad_norm": 0.1697309911251068, |
| "learning_rate": 7.809049182847745e-05, |
| "loss": 0.0182, |
| "step": 5170 |
| }, |
| { |
| "epoch": 10.528455284552846, |
| "grad_norm": 0.23599614202976227, |
| "learning_rate": 7.799923296820653e-05, |
| "loss": 0.0222, |
| "step": 5180 |
| }, |
| { |
| "epoch": 10.548780487804878, |
| "grad_norm": 0.15558257699012756, |
| "learning_rate": 7.790783802114408e-05, |
| "loss": 0.0262, |
| "step": 5190 |
| }, |
| { |
| "epoch": 10.56910569105691, |
| "grad_norm": 0.21534393727779388, |
| "learning_rate": 7.781630743150392e-05, |
| "loss": 0.028, |
| "step": 5200 |
| }, |
| { |
| "epoch": 10.589430894308943, |
| "grad_norm": 0.22029508650302887, |
| "learning_rate": 7.772464164415907e-05, |
| "loss": 0.0233, |
| "step": 5210 |
| }, |
| { |
| "epoch": 10.609756097560975, |
| "grad_norm": 0.1564619094133377, |
| "learning_rate": 7.763284110463973e-05, |
| "loss": 0.0221, |
| "step": 5220 |
| }, |
| { |
| "epoch": 10.630081300813009, |
| "grad_norm": 0.17775817215442657, |
| "learning_rate": 7.754090625913099e-05, |
| "loss": 0.0218, |
| "step": 5230 |
| }, |
| { |
| "epoch": 10.65040650406504, |
| "grad_norm": 0.3057042360305786, |
| "learning_rate": 7.744883755447075e-05, |
| "loss": 0.0235, |
| "step": 5240 |
| }, |
| { |
| "epoch": 10.670731707317072, |
| "grad_norm": 0.2097449004650116, |
| "learning_rate": 7.735663543814749e-05, |
| "loss": 0.0229, |
| "step": 5250 |
| }, |
| { |
| "epoch": 10.691056910569106, |
| "grad_norm": 0.2134266197681427, |
| "learning_rate": 7.726430035829813e-05, |
| "loss": 0.0239, |
| "step": 5260 |
| }, |
| { |
| "epoch": 10.711382113821138, |
| "grad_norm": 0.16942216455936432, |
| "learning_rate": 7.717183276370586e-05, |
| "loss": 0.0247, |
| "step": 5270 |
| }, |
| { |
| "epoch": 10.731707317073171, |
| "grad_norm": 0.20789335668087006, |
| "learning_rate": 7.707923310379794e-05, |
| "loss": 0.022, |
| "step": 5280 |
| }, |
| { |
| "epoch": 10.752032520325203, |
| "grad_norm": 0.17640726268291473, |
| "learning_rate": 7.698650182864351e-05, |
| "loss": 0.0219, |
| "step": 5290 |
| }, |
| { |
| "epoch": 10.772357723577235, |
| "grad_norm": 0.16752611100673676, |
| "learning_rate": 7.689363938895138e-05, |
| "loss": 0.0259, |
| "step": 5300 |
| }, |
| { |
| "epoch": 10.792682926829269, |
| "grad_norm": 0.2684347331523895, |
| "learning_rate": 7.680064623606791e-05, |
| "loss": 0.021, |
| "step": 5310 |
| }, |
| { |
| "epoch": 10.8130081300813, |
| "grad_norm": 0.238786518573761, |
| "learning_rate": 7.670752282197476e-05, |
| "loss": 0.0237, |
| "step": 5320 |
| }, |
| { |
| "epoch": 10.833333333333334, |
| "grad_norm": 0.21161924302577972, |
| "learning_rate": 7.66142695992867e-05, |
| "loss": 0.0195, |
| "step": 5330 |
| }, |
| { |
| "epoch": 10.853658536585366, |
| "grad_norm": 0.2526039183139801, |
| "learning_rate": 7.652088702124944e-05, |
| "loss": 0.0245, |
| "step": 5340 |
| }, |
| { |
| "epoch": 10.8739837398374, |
| "grad_norm": 0.25313472747802734, |
| "learning_rate": 7.64273755417374e-05, |
| "loss": 0.0195, |
| "step": 5350 |
| }, |
| { |
| "epoch": 10.894308943089431, |
| "grad_norm": 0.19629882276058197, |
| "learning_rate": 7.633373561525148e-05, |
| "loss": 0.0224, |
| "step": 5360 |
| }, |
| { |
| "epoch": 10.914634146341463, |
| "grad_norm": 0.3054946959018707, |
| "learning_rate": 7.623996769691691e-05, |
| "loss": 0.0238, |
| "step": 5370 |
| }, |
| { |
| "epoch": 10.934959349593496, |
| "grad_norm": 0.19822648167610168, |
| "learning_rate": 7.614607224248103e-05, |
| "loss": 0.0209, |
| "step": 5380 |
| }, |
| { |
| "epoch": 10.955284552845528, |
| "grad_norm": 0.15420231223106384, |
| "learning_rate": 7.605204970831096e-05, |
| "loss": 0.0197, |
| "step": 5390 |
| }, |
| { |
| "epoch": 10.975609756097562, |
| "grad_norm": 0.18817901611328125, |
| "learning_rate": 7.595790055139163e-05, |
| "loss": 0.0215, |
| "step": 5400 |
| }, |
| { |
| "epoch": 10.995934959349594, |
| "grad_norm": 0.20940722525119781, |
| "learning_rate": 7.586362522932323e-05, |
| "loss": 0.0225, |
| "step": 5410 |
| }, |
| { |
| "epoch": 11.016260162601625, |
| "grad_norm": 0.1898982971906662, |
| "learning_rate": 7.576922420031929e-05, |
| "loss": 0.0186, |
| "step": 5420 |
| }, |
| { |
| "epoch": 11.036585365853659, |
| "grad_norm": 0.28592678904533386, |
| "learning_rate": 7.567469792320428e-05, |
| "loss": 0.0238, |
| "step": 5430 |
| }, |
| { |
| "epoch": 11.05691056910569, |
| "grad_norm": 0.2053905427455902, |
| "learning_rate": 7.558004685741137e-05, |
| "loss": 0.0207, |
| "step": 5440 |
| }, |
| { |
| "epoch": 11.077235772357724, |
| "grad_norm": 0.2409668266773224, |
| "learning_rate": 7.548527146298036e-05, |
| "loss": 0.0267, |
| "step": 5450 |
| }, |
| { |
| "epoch": 11.097560975609756, |
| "grad_norm": 0.16952195763587952, |
| "learning_rate": 7.539037220055527e-05, |
| "loss": 0.0165, |
| "step": 5460 |
| }, |
| { |
| "epoch": 11.117886178861788, |
| "grad_norm": 0.1960296481847763, |
| "learning_rate": 7.529534953138213e-05, |
| "loss": 0.0225, |
| "step": 5470 |
| }, |
| { |
| "epoch": 11.138211382113822, |
| "grad_norm": 0.17874296009540558, |
| "learning_rate": 7.520020391730684e-05, |
| "loss": 0.0212, |
| "step": 5480 |
| }, |
| { |
| "epoch": 11.158536585365853, |
| "grad_norm": 0.16215310990810394, |
| "learning_rate": 7.510493582077281e-05, |
| "loss": 0.0197, |
| "step": 5490 |
| }, |
| { |
| "epoch": 11.178861788617887, |
| "grad_norm": 0.1989363729953766, |
| "learning_rate": 7.500954570481882e-05, |
| "loss": 0.0176, |
| "step": 5500 |
| }, |
| { |
| "epoch": 11.199186991869919, |
| "grad_norm": 0.16380657255649567, |
| "learning_rate": 7.491403403307662e-05, |
| "loss": 0.0199, |
| "step": 5510 |
| }, |
| { |
| "epoch": 11.21951219512195, |
| "grad_norm": 0.25316646695137024, |
| "learning_rate": 7.481840126976885e-05, |
| "loss": 0.0187, |
| "step": 5520 |
| }, |
| { |
| "epoch": 11.239837398373984, |
| "grad_norm": 0.19226357340812683, |
| "learning_rate": 7.472264787970666e-05, |
| "loss": 0.0297, |
| "step": 5530 |
| }, |
| { |
| "epoch": 11.260162601626016, |
| "grad_norm": 0.16172873973846436, |
| "learning_rate": 7.462677432828751e-05, |
| "loss": 0.0178, |
| "step": 5540 |
| }, |
| { |
| "epoch": 11.28048780487805, |
| "grad_norm": 0.217548206448555, |
| "learning_rate": 7.453078108149287e-05, |
| "loss": 0.0191, |
| "step": 5550 |
| }, |
| { |
| "epoch": 11.300813008130081, |
| "grad_norm": 0.1852748543024063, |
| "learning_rate": 7.443466860588599e-05, |
| "loss": 0.0207, |
| "step": 5560 |
| }, |
| { |
| "epoch": 11.321138211382113, |
| "grad_norm": 0.212760791182518, |
| "learning_rate": 7.43384373686096e-05, |
| "loss": 0.018, |
| "step": 5570 |
| }, |
| { |
| "epoch": 11.341463414634147, |
| "grad_norm": 0.146515354514122, |
| "learning_rate": 7.424208783738367e-05, |
| "loss": 0.0219, |
| "step": 5580 |
| }, |
| { |
| "epoch": 11.361788617886178, |
| "grad_norm": 0.18900711834430695, |
| "learning_rate": 7.414562048050315e-05, |
| "loss": 0.0216, |
| "step": 5590 |
| }, |
| { |
| "epoch": 11.382113821138212, |
| "grad_norm": 0.2096913754940033, |
| "learning_rate": 7.404903576683559e-05, |
| "loss": 0.0242, |
| "step": 5600 |
| }, |
| { |
| "epoch": 11.402439024390244, |
| "grad_norm": 0.17401063442230225, |
| "learning_rate": 7.3952334165819e-05, |
| "loss": 0.0219, |
| "step": 5610 |
| }, |
| { |
| "epoch": 11.422764227642276, |
| "grad_norm": 0.18080481886863708, |
| "learning_rate": 7.385551614745952e-05, |
| "loss": 0.0211, |
| "step": 5620 |
| }, |
| { |
| "epoch": 11.44308943089431, |
| "grad_norm": 0.20653265714645386, |
| "learning_rate": 7.375858218232905e-05, |
| "loss": 0.0212, |
| "step": 5630 |
| }, |
| { |
| "epoch": 11.463414634146341, |
| "grad_norm": 0.24207787215709686, |
| "learning_rate": 7.366153274156312e-05, |
| "loss": 0.0262, |
| "step": 5640 |
| }, |
| { |
| "epoch": 11.483739837398375, |
| "grad_norm": 0.2646133303642273, |
| "learning_rate": 7.356436829685844e-05, |
| "loss": 0.0159, |
| "step": 5650 |
| }, |
| { |
| "epoch": 11.504065040650406, |
| "grad_norm": 0.1713922768831253, |
| "learning_rate": 7.346708932047074e-05, |
| "loss": 0.0215, |
| "step": 5660 |
| }, |
| { |
| "epoch": 11.524390243902438, |
| "grad_norm": 0.18752823770046234, |
| "learning_rate": 7.336969628521237e-05, |
| "loss": 0.0221, |
| "step": 5670 |
| }, |
| { |
| "epoch": 11.544715447154472, |
| "grad_norm": 0.1929113268852234, |
| "learning_rate": 7.32721896644501e-05, |
| "loss": 0.0219, |
| "step": 5680 |
| }, |
| { |
| "epoch": 11.565040650406504, |
| "grad_norm": 0.17113937437534332, |
| "learning_rate": 7.317456993210272e-05, |
| "loss": 0.0271, |
| "step": 5690 |
| }, |
| { |
| "epoch": 11.585365853658537, |
| "grad_norm": 0.21283072233200073, |
| "learning_rate": 7.307683756263881e-05, |
| "loss": 0.0176, |
| "step": 5700 |
| }, |
| { |
| "epoch": 11.605691056910569, |
| "grad_norm": 0.19880950450897217, |
| "learning_rate": 7.297899303107441e-05, |
| "loss": 0.0214, |
| "step": 5710 |
| }, |
| { |
| "epoch": 11.6260162601626, |
| "grad_norm": 0.14940781891345978, |
| "learning_rate": 7.288103681297068e-05, |
| "loss": 0.0231, |
| "step": 5720 |
| }, |
| { |
| "epoch": 11.646341463414634, |
| "grad_norm": 0.16071899235248566, |
| "learning_rate": 7.278296938443166e-05, |
| "loss": 0.026, |
| "step": 5730 |
| }, |
| { |
| "epoch": 11.666666666666666, |
| "grad_norm": 0.11081783473491669, |
| "learning_rate": 7.26847912221019e-05, |
| "loss": 0.0202, |
| "step": 5740 |
| }, |
| { |
| "epoch": 11.6869918699187, |
| "grad_norm": 0.1508558690547943, |
| "learning_rate": 7.258650280316415e-05, |
| "loss": 0.0207, |
| "step": 5750 |
| }, |
| { |
| "epoch": 11.707317073170731, |
| "grad_norm": 0.15217100083827972, |
| "learning_rate": 7.248810460533706e-05, |
| "loss": 0.0205, |
| "step": 5760 |
| }, |
| { |
| "epoch": 11.727642276422765, |
| "grad_norm": 0.20388838648796082, |
| "learning_rate": 7.238959710687282e-05, |
| "loss": 0.0206, |
| "step": 5770 |
| }, |
| { |
| "epoch": 11.747967479674797, |
| "grad_norm": 0.16872060298919678, |
| "learning_rate": 7.229098078655489e-05, |
| "loss": 0.0228, |
| "step": 5780 |
| }, |
| { |
| "epoch": 11.768292682926829, |
| "grad_norm": 0.16713246703147888, |
| "learning_rate": 7.219225612369565e-05, |
| "loss": 0.0242, |
| "step": 5790 |
| }, |
| { |
| "epoch": 11.788617886178862, |
| "grad_norm": 0.14406579732894897, |
| "learning_rate": 7.209342359813404e-05, |
| "loss": 0.0195, |
| "step": 5800 |
| }, |
| { |
| "epoch": 11.808943089430894, |
| "grad_norm": 0.1978255659341812, |
| "learning_rate": 7.199448369023327e-05, |
| "loss": 0.0193, |
| "step": 5810 |
| }, |
| { |
| "epoch": 11.829268292682928, |
| "grad_norm": 0.24132715165615082, |
| "learning_rate": 7.189543688087845e-05, |
| "loss": 0.0195, |
| "step": 5820 |
| }, |
| { |
| "epoch": 11.84959349593496, |
| "grad_norm": 0.2294216752052307, |
| "learning_rate": 7.17962836514743e-05, |
| "loss": 0.0219, |
| "step": 5830 |
| }, |
| { |
| "epoch": 11.869918699186991, |
| "grad_norm": 0.2080947607755661, |
| "learning_rate": 7.169702448394279e-05, |
| "loss": 0.0203, |
| "step": 5840 |
| }, |
| { |
| "epoch": 11.890243902439025, |
| "grad_norm": 0.16323475539684296, |
| "learning_rate": 7.159765986072071e-05, |
| "loss": 0.0277, |
| "step": 5850 |
| }, |
| { |
| "epoch": 11.910569105691057, |
| "grad_norm": 0.2618698179721832, |
| "learning_rate": 7.149819026475751e-05, |
| "loss": 0.0214, |
| "step": 5860 |
| }, |
| { |
| "epoch": 11.93089430894309, |
| "grad_norm": 0.22444747388362885, |
| "learning_rate": 7.139861617951275e-05, |
| "loss": 0.0197, |
| "step": 5870 |
| }, |
| { |
| "epoch": 11.951219512195122, |
| "grad_norm": 0.22842451930046082, |
| "learning_rate": 7.129893808895395e-05, |
| "loss": 0.0279, |
| "step": 5880 |
| }, |
| { |
| "epoch": 11.971544715447154, |
| "grad_norm": 0.1826316863298416, |
| "learning_rate": 7.119915647755404e-05, |
| "loss": 0.0195, |
| "step": 5890 |
| }, |
| { |
| "epoch": 11.991869918699187, |
| "grad_norm": 0.2494143843650818, |
| "learning_rate": 7.109927183028914e-05, |
| "loss": 0.0222, |
| "step": 5900 |
| }, |
| { |
| "epoch": 12.012195121951219, |
| "grad_norm": 0.13060876727104187, |
| "learning_rate": 7.099928463263619e-05, |
| "loss": 0.0215, |
| "step": 5910 |
| }, |
| { |
| "epoch": 12.032520325203253, |
| "grad_norm": 0.13381272554397583, |
| "learning_rate": 7.08991953705705e-05, |
| "loss": 0.0205, |
| "step": 5920 |
| }, |
| { |
| "epoch": 12.052845528455284, |
| "grad_norm": 0.23280222713947296, |
| "learning_rate": 7.07990045305635e-05, |
| "loss": 0.0183, |
| "step": 5930 |
| }, |
| { |
| "epoch": 12.073170731707316, |
| "grad_norm": 0.13017290830612183, |
| "learning_rate": 7.069871259958034e-05, |
| "loss": 0.0192, |
| "step": 5940 |
| }, |
| { |
| "epoch": 12.09349593495935, |
| "grad_norm": 0.17966489493846893, |
| "learning_rate": 7.059832006507745e-05, |
| "loss": 0.0191, |
| "step": 5950 |
| }, |
| { |
| "epoch": 12.113821138211382, |
| "grad_norm": 0.15518441796302795, |
| "learning_rate": 7.049782741500028e-05, |
| "loss": 0.0193, |
| "step": 5960 |
| }, |
| { |
| "epoch": 12.134146341463415, |
| "grad_norm": 0.13540403544902802, |
| "learning_rate": 7.039723513778087e-05, |
| "loss": 0.0169, |
| "step": 5970 |
| }, |
| { |
| "epoch": 12.154471544715447, |
| "grad_norm": 0.19922709465026855, |
| "learning_rate": 7.029654372233544e-05, |
| "loss": 0.0206, |
| "step": 5980 |
| }, |
| { |
| "epoch": 12.17479674796748, |
| "grad_norm": 0.23210512101650238, |
| "learning_rate": 7.019575365806215e-05, |
| "loss": 0.0205, |
| "step": 5990 |
| }, |
| { |
| "epoch": 12.195121951219512, |
| "grad_norm": 0.12691457569599152, |
| "learning_rate": 7.009486543483858e-05, |
| "loss": 0.0214, |
| "step": 6000 |
| }, |
| { |
| "epoch": 12.215447154471544, |
| "grad_norm": 0.1701710820198059, |
| "learning_rate": 6.999387954301934e-05, |
| "loss": 0.0204, |
| "step": 6010 |
| }, |
| { |
| "epoch": 12.235772357723578, |
| "grad_norm": 0.15799103677272797, |
| "learning_rate": 6.989279647343388e-05, |
| "loss": 0.0228, |
| "step": 6020 |
| }, |
| { |
| "epoch": 12.25609756097561, |
| "grad_norm": 0.14731904864311218, |
| "learning_rate": 6.979161671738382e-05, |
| "loss": 0.0166, |
| "step": 6030 |
| }, |
| { |
| "epoch": 12.276422764227643, |
| "grad_norm": 0.1335950791835785, |
| "learning_rate": 6.969034076664085e-05, |
| "loss": 0.0188, |
| "step": 6040 |
| }, |
| { |
| "epoch": 12.296747967479675, |
| "grad_norm": 0.18221591413021088, |
| "learning_rate": 6.958896911344411e-05, |
| "loss": 0.0146, |
| "step": 6050 |
| }, |
| { |
| "epoch": 12.317073170731707, |
| "grad_norm": 0.18732091784477234, |
| "learning_rate": 6.948750225049791e-05, |
| "loss": 0.0183, |
| "step": 6060 |
| }, |
| { |
| "epoch": 12.33739837398374, |
| "grad_norm": 0.1735702008008957, |
| "learning_rate": 6.938594067096936e-05, |
| "loss": 0.0174, |
| "step": 6070 |
| }, |
| { |
| "epoch": 12.357723577235772, |
| "grad_norm": 0.14985719323158264, |
| "learning_rate": 6.928428486848587e-05, |
| "loss": 0.0228, |
| "step": 6080 |
| }, |
| { |
| "epoch": 12.378048780487806, |
| "grad_norm": 0.14891484379768372, |
| "learning_rate": 6.918253533713282e-05, |
| "loss": 0.0183, |
| "step": 6090 |
| }, |
| { |
| "epoch": 12.398373983739837, |
| "grad_norm": 0.18387916684150696, |
| "learning_rate": 6.908069257145118e-05, |
| "loss": 0.0163, |
| "step": 6100 |
| }, |
| { |
| "epoch": 12.41869918699187, |
| "grad_norm": 0.15331916511058807, |
| "learning_rate": 6.897875706643506e-05, |
| "loss": 0.0196, |
| "step": 6110 |
| }, |
| { |
| "epoch": 12.439024390243903, |
| "grad_norm": 0.22543849050998688, |
| "learning_rate": 6.887672931752927e-05, |
| "loss": 0.0201, |
| "step": 6120 |
| }, |
| { |
| "epoch": 12.459349593495935, |
| "grad_norm": 0.15449005365371704, |
| "learning_rate": 6.877460982062706e-05, |
| "loss": 0.0192, |
| "step": 6130 |
| }, |
| { |
| "epoch": 12.479674796747968, |
| "grad_norm": 0.12795570492744446, |
| "learning_rate": 6.86723990720675e-05, |
| "loss": 0.0152, |
| "step": 6140 |
| }, |
| { |
| "epoch": 12.5, |
| "grad_norm": 0.14221183955669403, |
| "learning_rate": 6.857009756863326e-05, |
| "loss": 0.0167, |
| "step": 6150 |
| }, |
| { |
| "epoch": 12.520325203252032, |
| "grad_norm": 0.17970189452171326, |
| "learning_rate": 6.846770580754807e-05, |
| "loss": 0.0236, |
| "step": 6160 |
| }, |
| { |
| "epoch": 12.540650406504065, |
| "grad_norm": 0.2241237312555313, |
| "learning_rate": 6.836522428647438e-05, |
| "loss": 0.0195, |
| "step": 6170 |
| }, |
| { |
| "epoch": 12.560975609756097, |
| "grad_norm": 0.23520494997501373, |
| "learning_rate": 6.826265350351083e-05, |
| "loss": 0.021, |
| "step": 6180 |
| }, |
| { |
| "epoch": 12.58130081300813, |
| "grad_norm": 0.22116564214229584, |
| "learning_rate": 6.815999395719e-05, |
| "loss": 0.0214, |
| "step": 6190 |
| }, |
| { |
| "epoch": 12.601626016260163, |
| "grad_norm": 0.212169349193573, |
| "learning_rate": 6.805724614647586e-05, |
| "loss": 0.0194, |
| "step": 6200 |
| }, |
| { |
| "epoch": 12.621951219512194, |
| "grad_norm": 0.2118973284959793, |
| "learning_rate": 6.795441057076136e-05, |
| "loss": 0.0207, |
| "step": 6210 |
| }, |
| { |
| "epoch": 12.642276422764228, |
| "grad_norm": 0.21665440499782562, |
| "learning_rate": 6.785148772986603e-05, |
| "loss": 0.0191, |
| "step": 6220 |
| }, |
| { |
| "epoch": 12.66260162601626, |
| "grad_norm": 0.1934552788734436, |
| "learning_rate": 6.774847812403355e-05, |
| "loss": 0.0194, |
| "step": 6230 |
| }, |
| { |
| "epoch": 12.682926829268293, |
| "grad_norm": 0.22638703882694244, |
| "learning_rate": 6.76453822539293e-05, |
| "loss": 0.0219, |
| "step": 6240 |
| }, |
| { |
| "epoch": 12.703252032520325, |
| "grad_norm": 0.20078861713409424, |
| "learning_rate": 6.754220062063793e-05, |
| "loss": 0.0213, |
| "step": 6250 |
| }, |
| { |
| "epoch": 12.723577235772357, |
| "grad_norm": 0.18275423347949982, |
| "learning_rate": 6.743893372566099e-05, |
| "loss": 0.0218, |
| "step": 6260 |
| }, |
| { |
| "epoch": 12.74390243902439, |
| "grad_norm": 0.15998774766921997, |
| "learning_rate": 6.733558207091434e-05, |
| "loss": 0.0194, |
| "step": 6270 |
| }, |
| { |
| "epoch": 12.764227642276422, |
| "grad_norm": 0.14701031148433685, |
| "learning_rate": 6.723214615872585e-05, |
| "loss": 0.0221, |
| "step": 6280 |
| }, |
| { |
| "epoch": 12.784552845528456, |
| "grad_norm": 0.310570627450943, |
| "learning_rate": 6.712862649183295e-05, |
| "loss": 0.0223, |
| "step": 6290 |
| }, |
| { |
| "epoch": 12.804878048780488, |
| "grad_norm": 0.1784200817346573, |
| "learning_rate": 6.70250235733801e-05, |
| "loss": 0.0194, |
| "step": 6300 |
| }, |
| { |
| "epoch": 12.82520325203252, |
| "grad_norm": 0.2551976144313812, |
| "learning_rate": 6.692133790691639e-05, |
| "loss": 0.024, |
| "step": 6310 |
| }, |
| { |
| "epoch": 12.845528455284553, |
| "grad_norm": 0.14004677534103394, |
| "learning_rate": 6.681756999639311e-05, |
| "loss": 0.0202, |
| "step": 6320 |
| }, |
| { |
| "epoch": 12.865853658536585, |
| "grad_norm": 0.16402773559093475, |
| "learning_rate": 6.671372034616132e-05, |
| "loss": 0.0165, |
| "step": 6330 |
| }, |
| { |
| "epoch": 12.886178861788618, |
| "grad_norm": 0.18575695157051086, |
| "learning_rate": 6.660978946096933e-05, |
| "loss": 0.0192, |
| "step": 6340 |
| }, |
| { |
| "epoch": 12.90650406504065, |
| "grad_norm": 0.1559947431087494, |
| "learning_rate": 6.650577784596026e-05, |
| "loss": 0.0181, |
| "step": 6350 |
| }, |
| { |
| "epoch": 12.926829268292684, |
| "grad_norm": 0.21637220680713654, |
| "learning_rate": 6.640168600666967e-05, |
| "loss": 0.0215, |
| "step": 6360 |
| }, |
| { |
| "epoch": 12.947154471544716, |
| "grad_norm": 0.17190884053707123, |
| "learning_rate": 6.629751444902299e-05, |
| "loss": 0.0194, |
| "step": 6370 |
| }, |
| { |
| "epoch": 12.967479674796747, |
| "grad_norm": 0.1417016088962555, |
| "learning_rate": 6.619326367933312e-05, |
| "loss": 0.0189, |
| "step": 6380 |
| }, |
| { |
| "epoch": 12.987804878048781, |
| "grad_norm": 0.19975803792476654, |
| "learning_rate": 6.608893420429798e-05, |
| "loss": 0.0208, |
| "step": 6390 |
| }, |
| { |
| "epoch": 13.008130081300813, |
| "grad_norm": 0.1410190463066101, |
| "learning_rate": 6.598452653099803e-05, |
| "loss": 0.0206, |
| "step": 6400 |
| }, |
| { |
| "epoch": 13.028455284552846, |
| "grad_norm": 0.12187732011079788, |
| "learning_rate": 6.588004116689375e-05, |
| "loss": 0.0189, |
| "step": 6410 |
| }, |
| { |
| "epoch": 13.048780487804878, |
| "grad_norm": 0.19220778346061707, |
| "learning_rate": 6.57754786198233e-05, |
| "loss": 0.0199, |
| "step": 6420 |
| }, |
| { |
| "epoch": 13.06910569105691, |
| "grad_norm": 0.11318925768136978, |
| "learning_rate": 6.567083939799992e-05, |
| "loss": 0.0164, |
| "step": 6430 |
| }, |
| { |
| "epoch": 13.089430894308943, |
| "grad_norm": 0.20521433651447296, |
| "learning_rate": 6.556612401000954e-05, |
| "loss": 0.0192, |
| "step": 6440 |
| }, |
| { |
| "epoch": 13.109756097560975, |
| "grad_norm": 0.17373771965503693, |
| "learning_rate": 6.54613329648083e-05, |
| "loss": 0.0151, |
| "step": 6450 |
| }, |
| { |
| "epoch": 13.130081300813009, |
| "grad_norm": 0.13978348672389984, |
| "learning_rate": 6.535646677172005e-05, |
| "loss": 0.0175, |
| "step": 6460 |
| }, |
| { |
| "epoch": 13.15040650406504, |
| "grad_norm": 0.1641884446144104, |
| "learning_rate": 6.52515259404339e-05, |
| "loss": 0.0211, |
| "step": 6470 |
| }, |
| { |
| "epoch": 13.170731707317072, |
| "grad_norm": 0.25421059131622314, |
| "learning_rate": 6.514651098100167e-05, |
| "loss": 0.0169, |
| "step": 6480 |
| }, |
| { |
| "epoch": 13.191056910569106, |
| "grad_norm": 0.18321838974952698, |
| "learning_rate": 6.504142240383555e-05, |
| "loss": 0.017, |
| "step": 6490 |
| }, |
| { |
| "epoch": 13.211382113821138, |
| "grad_norm": 0.24143821001052856, |
| "learning_rate": 6.493626071970549e-05, |
| "loss": 0.0192, |
| "step": 6500 |
| }, |
| { |
| "epoch": 13.231707317073171, |
| "grad_norm": 0.18711033463478088, |
| "learning_rate": 6.483102643973682e-05, |
| "loss": 0.0208, |
| "step": 6510 |
| }, |
| { |
| "epoch": 13.252032520325203, |
| "grad_norm": 0.17849287390708923, |
| "learning_rate": 6.472572007540764e-05, |
| "loss": 0.0233, |
| "step": 6520 |
| }, |
| { |
| "epoch": 13.272357723577235, |
| "grad_norm": 0.21863636374473572, |
| "learning_rate": 6.462034213854645e-05, |
| "loss": 0.0207, |
| "step": 6530 |
| }, |
| { |
| "epoch": 13.292682926829269, |
| "grad_norm": 0.12835566699504852, |
| "learning_rate": 6.451489314132962e-05, |
| "loss": 0.0159, |
| "step": 6540 |
| }, |
| { |
| "epoch": 13.3130081300813, |
| "grad_norm": 0.24241842329502106, |
| "learning_rate": 6.440937359627893e-05, |
| "loss": 0.0198, |
| "step": 6550 |
| }, |
| { |
| "epoch": 13.333333333333334, |
| "grad_norm": 0.1560855656862259, |
| "learning_rate": 6.430378401625894e-05, |
| "loss": 0.0187, |
| "step": 6560 |
| }, |
| { |
| "epoch": 13.353658536585366, |
| "grad_norm": 0.17956890165805817, |
| "learning_rate": 6.419812491447472e-05, |
| "loss": 0.0224, |
| "step": 6570 |
| }, |
| { |
| "epoch": 13.373983739837398, |
| "grad_norm": 0.23864564299583435, |
| "learning_rate": 6.409239680446919e-05, |
| "loss": 0.024, |
| "step": 6580 |
| }, |
| { |
| "epoch": 13.394308943089431, |
| "grad_norm": 0.19811518490314484, |
| "learning_rate": 6.398660020012072e-05, |
| "loss": 0.0175, |
| "step": 6590 |
| }, |
| { |
| "epoch": 13.414634146341463, |
| "grad_norm": 0.2658018469810486, |
| "learning_rate": 6.38807356156405e-05, |
| "loss": 0.0195, |
| "step": 6600 |
| }, |
| { |
| "epoch": 13.434959349593496, |
| "grad_norm": 0.20414608716964722, |
| "learning_rate": 6.377480356557022e-05, |
| "loss": 0.019, |
| "step": 6610 |
| }, |
| { |
| "epoch": 13.455284552845528, |
| "grad_norm": 0.2046128660440445, |
| "learning_rate": 6.366880456477942e-05, |
| "loss": 0.0168, |
| "step": 6620 |
| }, |
| { |
| "epoch": 13.475609756097562, |
| "grad_norm": 0.19554445147514343, |
| "learning_rate": 6.356273912846312e-05, |
| "loss": 0.0197, |
| "step": 6630 |
| }, |
| { |
| "epoch": 13.495934959349594, |
| "grad_norm": 0.2673018276691437, |
| "learning_rate": 6.34566077721391e-05, |
| "loss": 0.0194, |
| "step": 6640 |
| }, |
| { |
| "epoch": 13.516260162601625, |
| "grad_norm": 0.17518429458141327, |
| "learning_rate": 6.335041101164569e-05, |
| "loss": 0.0185, |
| "step": 6650 |
| }, |
| { |
| "epoch": 13.536585365853659, |
| "grad_norm": 0.19595539569854736, |
| "learning_rate": 6.324414936313904e-05, |
| "loss": 0.0193, |
| "step": 6660 |
| }, |
| { |
| "epoch": 13.55691056910569, |
| "grad_norm": 0.19745945930480957, |
| "learning_rate": 6.313782334309066e-05, |
| "loss": 0.0178, |
| "step": 6670 |
| }, |
| { |
| "epoch": 13.577235772357724, |
| "grad_norm": 0.11477528512477875, |
| "learning_rate": 6.303143346828499e-05, |
| "loss": 0.0205, |
| "step": 6680 |
| }, |
| { |
| "epoch": 13.597560975609756, |
| "grad_norm": 0.18739905953407288, |
| "learning_rate": 6.292498025581674e-05, |
| "loss": 0.0189, |
| "step": 6690 |
| }, |
| { |
| "epoch": 13.617886178861788, |
| "grad_norm": 0.22458316385746002, |
| "learning_rate": 6.281846422308857e-05, |
| "loss": 0.0186, |
| "step": 6700 |
| }, |
| { |
| "epoch": 13.638211382113822, |
| "grad_norm": 0.11343209445476532, |
| "learning_rate": 6.271188588780839e-05, |
| "loss": 0.0173, |
| "step": 6710 |
| }, |
| { |
| "epoch": 13.658536585365853, |
| "grad_norm": 0.2310679703950882, |
| "learning_rate": 6.260524576798694e-05, |
| "loss": 0.0188, |
| "step": 6720 |
| }, |
| { |
| "epoch": 13.678861788617887, |
| "grad_norm": 0.2177576720714569, |
| "learning_rate": 6.249854438193528e-05, |
| "loss": 0.0218, |
| "step": 6730 |
| }, |
| { |
| "epoch": 13.699186991869919, |
| "grad_norm": 0.18544794619083405, |
| "learning_rate": 6.239178224826224e-05, |
| "loss": 0.0223, |
| "step": 6740 |
| }, |
| { |
| "epoch": 13.71951219512195, |
| "grad_norm": 0.22832602262496948, |
| "learning_rate": 6.228495988587188e-05, |
| "loss": 0.0159, |
| "step": 6750 |
| }, |
| { |
| "epoch": 13.739837398373984, |
| "grad_norm": 0.24159882962703705, |
| "learning_rate": 6.217807781396106e-05, |
| "loss": 0.0197, |
| "step": 6760 |
| }, |
| { |
| "epoch": 13.760162601626016, |
| "grad_norm": 0.22287015616893768, |
| "learning_rate": 6.207113655201676e-05, |
| "loss": 0.0163, |
| "step": 6770 |
| }, |
| { |
| "epoch": 13.78048780487805, |
| "grad_norm": 0.1738264560699463, |
| "learning_rate": 6.196413661981368e-05, |
| "loss": 0.0171, |
| "step": 6780 |
| }, |
| { |
| "epoch": 13.800813008130081, |
| "grad_norm": 0.22115349769592285, |
| "learning_rate": 6.185707853741175e-05, |
| "loss": 0.018, |
| "step": 6790 |
| }, |
| { |
| "epoch": 13.821138211382113, |
| "grad_norm": 0.18646171689033508, |
| "learning_rate": 6.174996282515344e-05, |
| "loss": 0.024, |
| "step": 6800 |
| }, |
| { |
| "epoch": 13.841463414634147, |
| "grad_norm": 0.18444843590259552, |
| "learning_rate": 6.164279000366131e-05, |
| "loss": 0.0146, |
| "step": 6810 |
| }, |
| { |
| "epoch": 13.861788617886178, |
| "grad_norm": 0.14431391656398773, |
| "learning_rate": 6.153556059383561e-05, |
| "loss": 0.015, |
| "step": 6820 |
| }, |
| { |
| "epoch": 13.882113821138212, |
| "grad_norm": 0.15836864709854126, |
| "learning_rate": 6.142827511685152e-05, |
| "loss": 0.0143, |
| "step": 6830 |
| }, |
| { |
| "epoch": 13.902439024390244, |
| "grad_norm": 0.24831633269786835, |
| "learning_rate": 6.132093409415678e-05, |
| "loss": 0.018, |
| "step": 6840 |
| }, |
| { |
| "epoch": 13.922764227642276, |
| "grad_norm": 0.17718598246574402, |
| "learning_rate": 6.121353804746907e-05, |
| "loss": 0.0157, |
| "step": 6850 |
| }, |
| { |
| "epoch": 13.94308943089431, |
| "grad_norm": 0.19644102454185486, |
| "learning_rate": 6.110608749877352e-05, |
| "loss": 0.0195, |
| "step": 6860 |
| }, |
| { |
| "epoch": 13.963414634146341, |
| "grad_norm": 0.16622968018054962, |
| "learning_rate": 6.0998582970320205e-05, |
| "loss": 0.0198, |
| "step": 6870 |
| }, |
| { |
| "epoch": 13.983739837398375, |
| "grad_norm": 0.2506231367588043, |
| "learning_rate": 6.0891024984621506e-05, |
| "loss": 0.0247, |
| "step": 6880 |
| }, |
| { |
| "epoch": 14.004065040650406, |
| "grad_norm": 0.21900992095470428, |
| "learning_rate": 6.078341406444961e-05, |
| "loss": 0.02, |
| "step": 6890 |
| }, |
| { |
| "epoch": 14.024390243902438, |
| "grad_norm": 0.1864655762910843, |
| "learning_rate": 6.067575073283405e-05, |
| "loss": 0.0191, |
| "step": 6900 |
| }, |
| { |
| "epoch": 14.044715447154472, |
| "grad_norm": 0.22156104445457458, |
| "learning_rate": 6.0568035513059073e-05, |
| "loss": 0.0198, |
| "step": 6910 |
| }, |
| { |
| "epoch": 14.065040650406504, |
| "grad_norm": 0.19245333969593048, |
| "learning_rate": 6.046026892866109e-05, |
| "loss": 0.0268, |
| "step": 6920 |
| }, |
| { |
| "epoch": 14.085365853658537, |
| "grad_norm": 0.16246770322322845, |
| "learning_rate": 6.0352451503426214e-05, |
| "loss": 0.0163, |
| "step": 6930 |
| }, |
| { |
| "epoch": 14.105691056910569, |
| "grad_norm": 0.1798103153705597, |
| "learning_rate": 6.024458376138762e-05, |
| "loss": 0.015, |
| "step": 6940 |
| }, |
| { |
| "epoch": 14.126016260162602, |
| "grad_norm": 0.14961481094360352, |
| "learning_rate": 6.013666622682306e-05, |
| "loss": 0.0201, |
| "step": 6950 |
| }, |
| { |
| "epoch": 14.146341463414634, |
| "grad_norm": 0.17163357138633728, |
| "learning_rate": 6.002869942425231e-05, |
| "loss": 0.0173, |
| "step": 6960 |
| }, |
| { |
| "epoch": 14.166666666666666, |
| "grad_norm": 0.22165465354919434, |
| "learning_rate": 5.992068387843459e-05, |
| "loss": 0.0174, |
| "step": 6970 |
| }, |
| { |
| "epoch": 14.1869918699187, |
| "grad_norm": 0.15428420901298523, |
| "learning_rate": 5.981262011436603e-05, |
| "loss": 0.0175, |
| "step": 6980 |
| }, |
| { |
| "epoch": 14.207317073170731, |
| "grad_norm": 0.16303707659244537, |
| "learning_rate": 5.970450865727712e-05, |
| "loss": 0.0196, |
| "step": 6990 |
| }, |
| { |
| "epoch": 14.227642276422765, |
| "grad_norm": 0.1545059233903885, |
| "learning_rate": 5.9596350032630156e-05, |
| "loss": 0.0182, |
| "step": 7000 |
| }, |
| { |
| "epoch": 14.247967479674797, |
| "grad_norm": 0.20550492405891418, |
| "learning_rate": 5.9488144766116714e-05, |
| "loss": 0.0185, |
| "step": 7010 |
| }, |
| { |
| "epoch": 14.268292682926829, |
| "grad_norm": 0.1110968366265297, |
| "learning_rate": 5.9379893383655006e-05, |
| "loss": 0.0155, |
| "step": 7020 |
| }, |
| { |
| "epoch": 14.288617886178862, |
| "grad_norm": 0.12301554530858994, |
| "learning_rate": 5.927159641138744e-05, |
| "loss": 0.018, |
| "step": 7030 |
| }, |
| { |
| "epoch": 14.308943089430894, |
| "grad_norm": 0.12619097530841827, |
| "learning_rate": 5.916325437567799e-05, |
| "loss": 0.0183, |
| "step": 7040 |
| }, |
| { |
| "epoch": 14.329268292682928, |
| "grad_norm": 0.1619870513677597, |
| "learning_rate": 5.905486780310966e-05, |
| "loss": 0.0222, |
| "step": 7050 |
| }, |
| { |
| "epoch": 14.34959349593496, |
| "grad_norm": 0.20483283698558807, |
| "learning_rate": 5.8946437220481887e-05, |
| "loss": 0.0149, |
| "step": 7060 |
| }, |
| { |
| "epoch": 14.369918699186991, |
| "grad_norm": 0.11128581315279007, |
| "learning_rate": 5.883796315480805e-05, |
| "loss": 0.0204, |
| "step": 7070 |
| }, |
| { |
| "epoch": 14.390243902439025, |
| "grad_norm": 0.12385226041078568, |
| "learning_rate": 5.872944613331288e-05, |
| "loss": 0.0137, |
| "step": 7080 |
| }, |
| { |
| "epoch": 14.410569105691057, |
| "grad_norm": 0.12931764125823975, |
| "learning_rate": 5.862088668342986e-05, |
| "loss": 0.0141, |
| "step": 7090 |
| }, |
| { |
| "epoch": 14.43089430894309, |
| "grad_norm": 0.1460898071527481, |
| "learning_rate": 5.8512285332798714e-05, |
| "loss": 0.0172, |
| "step": 7100 |
| }, |
| { |
| "epoch": 14.451219512195122, |
| "grad_norm": 0.1483217179775238, |
| "learning_rate": 5.840364260926277e-05, |
| "loss": 0.0166, |
| "step": 7110 |
| }, |
| { |
| "epoch": 14.471544715447154, |
| "grad_norm": 0.19570297002792358, |
| "learning_rate": 5.8294959040866505e-05, |
| "loss": 0.0181, |
| "step": 7120 |
| }, |
| { |
| "epoch": 14.491869918699187, |
| "grad_norm": 0.2213359773159027, |
| "learning_rate": 5.818623515585292e-05, |
| "loss": 0.017, |
| "step": 7130 |
| }, |
| { |
| "epoch": 14.512195121951219, |
| "grad_norm": 0.16189566254615784, |
| "learning_rate": 5.8077471482660896e-05, |
| "loss": 0.0206, |
| "step": 7140 |
| }, |
| { |
| "epoch": 14.532520325203253, |
| "grad_norm": 0.1307971179485321, |
| "learning_rate": 5.796866854992276e-05, |
| "loss": 0.0188, |
| "step": 7150 |
| }, |
| { |
| "epoch": 14.552845528455284, |
| "grad_norm": 0.12432897835969925, |
| "learning_rate": 5.7859826886461676e-05, |
| "loss": 0.0218, |
| "step": 7160 |
| }, |
| { |
| "epoch": 14.573170731707316, |
| "grad_norm": 0.20361848175525665, |
| "learning_rate": 5.775094702128899e-05, |
| "loss": 0.0177, |
| "step": 7170 |
| }, |
| { |
| "epoch": 14.59349593495935, |
| "grad_norm": 0.18532606959342957, |
| "learning_rate": 5.7642029483601746e-05, |
| "loss": 0.0187, |
| "step": 7180 |
| }, |
| { |
| "epoch": 14.613821138211382, |
| "grad_norm": 0.15062542259693146, |
| "learning_rate": 5.753307480278012e-05, |
| "loss": 0.0231, |
| "step": 7190 |
| }, |
| { |
| "epoch": 14.634146341463415, |
| "grad_norm": 0.17582647502422333, |
| "learning_rate": 5.742408350838478e-05, |
| "loss": 0.0192, |
| "step": 7200 |
| }, |
| { |
| "epoch": 14.654471544715447, |
| "grad_norm": 0.14718493819236755, |
| "learning_rate": 5.7315056130154374e-05, |
| "loss": 0.017, |
| "step": 7210 |
| }, |
| { |
| "epoch": 14.67479674796748, |
| "grad_norm": 0.25164714455604553, |
| "learning_rate": 5.720599319800292e-05, |
| "loss": 0.0262, |
| "step": 7220 |
| }, |
| { |
| "epoch": 14.695121951219512, |
| "grad_norm": 0.17039872705936432, |
| "learning_rate": 5.709689524201722e-05, |
| "loss": 0.0203, |
| "step": 7230 |
| }, |
| { |
| "epoch": 14.715447154471544, |
| "grad_norm": 0.21887938678264618, |
| "learning_rate": 5.698776279245437e-05, |
| "loss": 0.0201, |
| "step": 7240 |
| }, |
| { |
| "epoch": 14.735772357723578, |
| "grad_norm": 0.19460436701774597, |
| "learning_rate": 5.6878596379739036e-05, |
| "loss": 0.0249, |
| "step": 7250 |
| }, |
| { |
| "epoch": 14.75609756097561, |
| "grad_norm": 0.25877246260643005, |
| "learning_rate": 5.676939653446103e-05, |
| "loss": 0.0208, |
| "step": 7260 |
| }, |
| { |
| "epoch": 14.776422764227643, |
| "grad_norm": 0.22199095785617828, |
| "learning_rate": 5.666016378737261e-05, |
| "loss": 0.022, |
| "step": 7270 |
| }, |
| { |
| "epoch": 14.796747967479675, |
| "grad_norm": 0.14779351651668549, |
| "learning_rate": 5.655089866938596e-05, |
| "loss": 0.0168, |
| "step": 7280 |
| }, |
| { |
| "epoch": 14.817073170731707, |
| "grad_norm": 0.2230490893125534, |
| "learning_rate": 5.6441601711570615e-05, |
| "loss": 0.0189, |
| "step": 7290 |
| }, |
| { |
| "epoch": 14.83739837398374, |
| "grad_norm": 0.17648757994174957, |
| "learning_rate": 5.633227344515085e-05, |
| "loss": 0.0204, |
| "step": 7300 |
| }, |
| { |
| "epoch": 14.857723577235772, |
| "grad_norm": 0.1408594250679016, |
| "learning_rate": 5.6222914401503116e-05, |
| "loss": 0.0192, |
| "step": 7310 |
| }, |
| { |
| "epoch": 14.878048780487806, |
| "grad_norm": 0.14772988855838776, |
| "learning_rate": 5.611352511215343e-05, |
| "loss": 0.0176, |
| "step": 7320 |
| }, |
| { |
| "epoch": 14.898373983739837, |
| "grad_norm": 0.14210304617881775, |
| "learning_rate": 5.600410610877488e-05, |
| "loss": 0.019, |
| "step": 7330 |
| }, |
| { |
| "epoch": 14.91869918699187, |
| "grad_norm": 0.16473866999149323, |
| "learning_rate": 5.58946579231849e-05, |
| "loss": 0.0183, |
| "step": 7340 |
| }, |
| { |
| "epoch": 14.939024390243903, |
| "grad_norm": 0.15072952210903168, |
| "learning_rate": 5.578518108734279e-05, |
| "loss": 0.0149, |
| "step": 7350 |
| }, |
| { |
| "epoch": 14.959349593495935, |
| "grad_norm": 0.17104724049568176, |
| "learning_rate": 5.5675676133347096e-05, |
| "loss": 0.0169, |
| "step": 7360 |
| }, |
| { |
| "epoch": 14.979674796747968, |
| "grad_norm": 0.09811348468065262, |
| "learning_rate": 5.556614359343307e-05, |
| "loss": 0.0168, |
| "step": 7370 |
| }, |
| { |
| "epoch": 15.0, |
| "grad_norm": 0.18304497003555298, |
| "learning_rate": 5.545658399996999e-05, |
| "loss": 0.0158, |
| "step": 7380 |
| }, |
| { |
| "epoch": 15.020325203252032, |
| "grad_norm": 0.1639220118522644, |
| "learning_rate": 5.534699788545862e-05, |
| "loss": 0.0183, |
| "step": 7390 |
| }, |
| { |
| "epoch": 15.040650406504065, |
| "grad_norm": 0.13093116879463196, |
| "learning_rate": 5.523738578252867e-05, |
| "loss": 0.0193, |
| "step": 7400 |
| }, |
| { |
| "epoch": 15.060975609756097, |
| "grad_norm": 0.1492796391248703, |
| "learning_rate": 5.512774822393614e-05, |
| "loss": 0.0148, |
| "step": 7410 |
| }, |
| { |
| "epoch": 15.08130081300813, |
| "grad_norm": 0.23351886868476868, |
| "learning_rate": 5.5018085742560744e-05, |
| "loss": 0.0191, |
| "step": 7420 |
| }, |
| { |
| "epoch": 15.101626016260163, |
| "grad_norm": 0.19075217843055725, |
| "learning_rate": 5.4908398871403365e-05, |
| "loss": 0.0168, |
| "step": 7430 |
| }, |
| { |
| "epoch": 15.121951219512194, |
| "grad_norm": 0.1332959085702896, |
| "learning_rate": 5.4798688143583375e-05, |
| "loss": 0.0173, |
| "step": 7440 |
| }, |
| { |
| "epoch": 15.142276422764228, |
| "grad_norm": 0.11717317998409271, |
| "learning_rate": 5.468895409233615e-05, |
| "loss": 0.0143, |
| "step": 7450 |
| }, |
| { |
| "epoch": 15.16260162601626, |
| "grad_norm": 0.17847301065921783, |
| "learning_rate": 5.4579197251010414e-05, |
| "loss": 0.0172, |
| "step": 7460 |
| }, |
| { |
| "epoch": 15.182926829268293, |
| "grad_norm": 0.18227623403072357, |
| "learning_rate": 5.446941815306563e-05, |
| "loss": 0.0152, |
| "step": 7470 |
| }, |
| { |
| "epoch": 15.203252032520325, |
| "grad_norm": 0.1648562103509903, |
| "learning_rate": 5.435961733206947e-05, |
| "loss": 0.0184, |
| "step": 7480 |
| }, |
| { |
| "epoch": 15.223577235772357, |
| "grad_norm": 0.21375487744808197, |
| "learning_rate": 5.424979532169516e-05, |
| "loss": 0.0179, |
| "step": 7490 |
| }, |
| { |
| "epoch": 15.24390243902439, |
| "grad_norm": 0.16193552315235138, |
| "learning_rate": 5.413995265571895e-05, |
| "loss": 0.0156, |
| "step": 7500 |
| }, |
| { |
| "epoch": 15.264227642276422, |
| "grad_norm": 0.20847506821155548, |
| "learning_rate": 5.403008986801746e-05, |
| "loss": 0.013, |
| "step": 7510 |
| }, |
| { |
| "epoch": 15.284552845528456, |
| "grad_norm": 0.11255639046430588, |
| "learning_rate": 5.3920207492565114e-05, |
| "loss": 0.0177, |
| "step": 7520 |
| }, |
| { |
| "epoch": 15.304878048780488, |
| "grad_norm": 0.221723273396492, |
| "learning_rate": 5.381030606343154e-05, |
| "loss": 0.0192, |
| "step": 7530 |
| }, |
| { |
| "epoch": 15.32520325203252, |
| "grad_norm": 0.24689878523349762, |
| "learning_rate": 5.370038611477894e-05, |
| "loss": 0.0164, |
| "step": 7540 |
| }, |
| { |
| "epoch": 15.345528455284553, |
| "grad_norm": 0.17949624359607697, |
| "learning_rate": 5.359044818085963e-05, |
| "loss": 0.0166, |
| "step": 7550 |
| }, |
| { |
| "epoch": 15.365853658536585, |
| "grad_norm": 0.23291277885437012, |
| "learning_rate": 5.3480492796013214e-05, |
| "loss": 0.0168, |
| "step": 7560 |
| }, |
| { |
| "epoch": 15.386178861788618, |
| "grad_norm": 0.20025502145290375, |
| "learning_rate": 5.33705204946642e-05, |
| "loss": 0.0164, |
| "step": 7570 |
| }, |
| { |
| "epoch": 15.40650406504065, |
| "grad_norm": 0.23018495738506317, |
| "learning_rate": 5.326053181131927e-05, |
| "loss": 0.0199, |
| "step": 7580 |
| }, |
| { |
| "epoch": 15.426829268292684, |
| "grad_norm": 0.1864101141691208, |
| "learning_rate": 5.3150527280564776e-05, |
| "loss": 0.0171, |
| "step": 7590 |
| }, |
| { |
| "epoch": 15.447154471544716, |
| "grad_norm": 0.14866892993450165, |
| "learning_rate": 5.3040507437064034e-05, |
| "loss": 0.0171, |
| "step": 7600 |
| }, |
| { |
| "epoch": 15.467479674796747, |
| "grad_norm": 0.16208836436271667, |
| "learning_rate": 5.293047281555482e-05, |
| "loss": 0.0141, |
| "step": 7610 |
| }, |
| { |
| "epoch": 15.487804878048781, |
| "grad_norm": 0.12760621309280396, |
| "learning_rate": 5.2820423950846765e-05, |
| "loss": 0.0154, |
| "step": 7620 |
| }, |
| { |
| "epoch": 15.508130081300813, |
| "grad_norm": 0.17664365470409393, |
| "learning_rate": 5.2710361377818696e-05, |
| "loss": 0.0165, |
| "step": 7630 |
| }, |
| { |
| "epoch": 15.528455284552846, |
| "grad_norm": 0.17898857593536377, |
| "learning_rate": 5.2600285631416026e-05, |
| "loss": 0.016, |
| "step": 7640 |
| }, |
| { |
| "epoch": 15.548780487804878, |
| "grad_norm": 0.09963621944189072, |
| "learning_rate": 5.249019724664826e-05, |
| "loss": 0.0189, |
| "step": 7650 |
| }, |
| { |
| "epoch": 15.56910569105691, |
| "grad_norm": 0.15128383040428162, |
| "learning_rate": 5.2380096758586315e-05, |
| "loss": 0.0158, |
| "step": 7660 |
| }, |
| { |
| "epoch": 15.589430894308943, |
| "grad_norm": 0.12781989574432373, |
| "learning_rate": 5.226998470235993e-05, |
| "loss": 0.0158, |
| "step": 7670 |
| }, |
| { |
| "epoch": 15.609756097560975, |
| "grad_norm": 0.17681658267974854, |
| "learning_rate": 5.215986161315507e-05, |
| "loss": 0.0141, |
| "step": 7680 |
| }, |
| { |
| "epoch": 15.630081300813009, |
| "grad_norm": 0.1291031688451767, |
| "learning_rate": 5.20497280262113e-05, |
| "loss": 0.0154, |
| "step": 7690 |
| }, |
| { |
| "epoch": 15.65040650406504, |
| "grad_norm": 0.1151236891746521, |
| "learning_rate": 5.193958447681924e-05, |
| "loss": 0.0142, |
| "step": 7700 |
| }, |
| { |
| "epoch": 15.670731707317072, |
| "grad_norm": 0.16700609028339386, |
| "learning_rate": 5.182943150031793e-05, |
| "loss": 0.0156, |
| "step": 7710 |
| }, |
| { |
| "epoch": 15.691056910569106, |
| "grad_norm": 0.22444726526737213, |
| "learning_rate": 5.1719269632092204e-05, |
| "loss": 0.0161, |
| "step": 7720 |
| }, |
| { |
| "epoch": 15.711382113821138, |
| "grad_norm": 0.1340688318014145, |
| "learning_rate": 5.160909940757015e-05, |
| "loss": 0.0193, |
| "step": 7730 |
| }, |
| { |
| "epoch": 15.731707317073171, |
| "grad_norm": 0.10712575912475586, |
| "learning_rate": 5.149892136222043e-05, |
| "loss": 0.0133, |
| "step": 7740 |
| }, |
| { |
| "epoch": 15.752032520325203, |
| "grad_norm": 0.1398649662733078, |
| "learning_rate": 5.1388736031549744e-05, |
| "loss": 0.0149, |
| "step": 7750 |
| }, |
| { |
| "epoch": 15.772357723577235, |
| "grad_norm": 0.1141858771443367, |
| "learning_rate": 5.127854395110021e-05, |
| "loss": 0.0155, |
| "step": 7760 |
| }, |
| { |
| "epoch": 15.792682926829269, |
| "grad_norm": 0.16649234294891357, |
| "learning_rate": 5.116834565644671e-05, |
| "loss": 0.0122, |
| "step": 7770 |
| }, |
| { |
| "epoch": 15.8130081300813, |
| "grad_norm": 0.14712487161159515, |
| "learning_rate": 5.10581416831944e-05, |
| "loss": 0.0169, |
| "step": 7780 |
| }, |
| { |
| "epoch": 15.833333333333334, |
| "grad_norm": 0.1849357783794403, |
| "learning_rate": 5.094793256697593e-05, |
| "loss": 0.0141, |
| "step": 7790 |
| }, |
| { |
| "epoch": 15.853658536585366, |
| "grad_norm": 0.20242075622081757, |
| "learning_rate": 5.0837718843449075e-05, |
| "loss": 0.0151, |
| "step": 7800 |
| }, |
| { |
| "epoch": 15.8739837398374, |
| "grad_norm": 0.16380542516708374, |
| "learning_rate": 5.07275010482939e-05, |
| "loss": 0.0143, |
| "step": 7810 |
| }, |
| { |
| "epoch": 15.894308943089431, |
| "grad_norm": 0.20087213814258575, |
| "learning_rate": 5.061727971721032e-05, |
| "loss": 0.0179, |
| "step": 7820 |
| }, |
| { |
| "epoch": 15.914634146341463, |
| "grad_norm": 0.12002623826265335, |
| "learning_rate": 5.050705538591538e-05, |
| "loss": 0.0213, |
| "step": 7830 |
| }, |
| { |
| "epoch": 15.934959349593496, |
| "grad_norm": 0.16061867773532867, |
| "learning_rate": 5.0396828590140785e-05, |
| "loss": 0.0159, |
| "step": 7840 |
| }, |
| { |
| "epoch": 15.955284552845528, |
| "grad_norm": 0.2045939415693283, |
| "learning_rate": 5.0286599865630157e-05, |
| "loss": 0.0136, |
| "step": 7850 |
| }, |
| { |
| "epoch": 15.975609756097562, |
| "grad_norm": 0.11727229505777359, |
| "learning_rate": 5.017636974813649e-05, |
| "loss": 0.0125, |
| "step": 7860 |
| }, |
| { |
| "epoch": 15.995934959349594, |
| "grad_norm": 0.1615324318408966, |
| "learning_rate": 5.006613877341959e-05, |
| "loss": 0.0141, |
| "step": 7870 |
| }, |
| { |
| "epoch": 16.016260162601625, |
| "grad_norm": 0.17539259791374207, |
| "learning_rate": 4.99559074772434e-05, |
| "loss": 0.0166, |
| "step": 7880 |
| }, |
| { |
| "epoch": 16.036585365853657, |
| "grad_norm": 0.1632055640220642, |
| "learning_rate": 4.9845676395373455e-05, |
| "loss": 0.0163, |
| "step": 7890 |
| }, |
| { |
| "epoch": 16.056910569105693, |
| "grad_norm": 0.1939256489276886, |
| "learning_rate": 4.9735446063574184e-05, |
| "loss": 0.0148, |
| "step": 7900 |
| }, |
| { |
| "epoch": 16.077235772357724, |
| "grad_norm": 0.22934892773628235, |
| "learning_rate": 4.962521701760645e-05, |
| "loss": 0.0199, |
| "step": 7910 |
| }, |
| { |
| "epoch": 16.097560975609756, |
| "grad_norm": 0.09549865126609802, |
| "learning_rate": 4.951498979322482e-05, |
| "loss": 0.0146, |
| "step": 7920 |
| }, |
| { |
| "epoch": 16.117886178861788, |
| "grad_norm": 0.12365490198135376, |
| "learning_rate": 4.9404764926174996e-05, |
| "loss": 0.019, |
| "step": 7930 |
| }, |
| { |
| "epoch": 16.13821138211382, |
| "grad_norm": 0.1805732101202011, |
| "learning_rate": 4.929454295219127e-05, |
| "loss": 0.0163, |
| "step": 7940 |
| }, |
| { |
| "epoch": 16.158536585365855, |
| "grad_norm": 0.11624112725257874, |
| "learning_rate": 4.9184324406993844e-05, |
| "loss": 0.0203, |
| "step": 7950 |
| }, |
| { |
| "epoch": 16.178861788617887, |
| "grad_norm": 0.11672668159008026, |
| "learning_rate": 4.907410982628623e-05, |
| "loss": 0.0174, |
| "step": 7960 |
| }, |
| { |
| "epoch": 16.19918699186992, |
| "grad_norm": 0.2546161115169525, |
| "learning_rate": 4.896389974575273e-05, |
| "loss": 0.0174, |
| "step": 7970 |
| }, |
| { |
| "epoch": 16.21951219512195, |
| "grad_norm": 0.16103267669677734, |
| "learning_rate": 4.885369470105571e-05, |
| "loss": 0.0231, |
| "step": 7980 |
| }, |
| { |
| "epoch": 16.239837398373982, |
| "grad_norm": 0.14676164090633392, |
| "learning_rate": 4.874349522783313e-05, |
| "loss": 0.0167, |
| "step": 7990 |
| }, |
| { |
| "epoch": 16.260162601626018, |
| "grad_norm": 0.1342374086380005, |
| "learning_rate": 4.863330186169581e-05, |
| "loss": 0.0135, |
| "step": 8000 |
| }, |
| { |
| "epoch": 16.28048780487805, |
| "grad_norm": 0.1830085813999176, |
| "learning_rate": 4.8523115138224885e-05, |
| "loss": 0.0159, |
| "step": 8010 |
| }, |
| { |
| "epoch": 16.30081300813008, |
| "grad_norm": 0.12788920104503632, |
| "learning_rate": 4.841293559296928e-05, |
| "loss": 0.0133, |
| "step": 8020 |
| }, |
| { |
| "epoch": 16.321138211382113, |
| "grad_norm": 0.13414596021175385, |
| "learning_rate": 4.830276376144295e-05, |
| "loss": 0.0157, |
| "step": 8030 |
| }, |
| { |
| "epoch": 16.341463414634145, |
| "grad_norm": 0.15788240730762482, |
| "learning_rate": 4.819260017912237e-05, |
| "loss": 0.0128, |
| "step": 8040 |
| }, |
| { |
| "epoch": 16.36178861788618, |
| "grad_norm": 0.20657338201999664, |
| "learning_rate": 4.808244538144396e-05, |
| "loss": 0.0147, |
| "step": 8050 |
| }, |
| { |
| "epoch": 16.382113821138212, |
| "grad_norm": 0.1241159588098526, |
| "learning_rate": 4.797229990380142e-05, |
| "loss": 0.014, |
| "step": 8060 |
| }, |
| { |
| "epoch": 16.402439024390244, |
| "grad_norm": 0.14420954883098602, |
| "learning_rate": 4.786216428154317e-05, |
| "loss": 0.0139, |
| "step": 8070 |
| }, |
| { |
| "epoch": 16.422764227642276, |
| "grad_norm": 0.15971283614635468, |
| "learning_rate": 4.7752039049969685e-05, |
| "loss": 0.0148, |
| "step": 8080 |
| }, |
| { |
| "epoch": 16.443089430894307, |
| "grad_norm": 0.11482447385787964, |
| "learning_rate": 4.7641924744330956e-05, |
| "loss": 0.0133, |
| "step": 8090 |
| }, |
| { |
| "epoch": 16.463414634146343, |
| "grad_norm": 0.17538700997829437, |
| "learning_rate": 4.7531821899823925e-05, |
| "loss": 0.0122, |
| "step": 8100 |
| }, |
| { |
| "epoch": 16.483739837398375, |
| "grad_norm": 0.13359621167182922, |
| "learning_rate": 4.742173105158973e-05, |
| "loss": 0.0141, |
| "step": 8110 |
| }, |
| { |
| "epoch": 16.504065040650406, |
| "grad_norm": 0.13688933849334717, |
| "learning_rate": 4.731165273471129e-05, |
| "loss": 0.0146, |
| "step": 8120 |
| }, |
| { |
| "epoch": 16.524390243902438, |
| "grad_norm": 0.11756113171577454, |
| "learning_rate": 4.720158748421057e-05, |
| "loss": 0.011, |
| "step": 8130 |
| }, |
| { |
| "epoch": 16.54471544715447, |
| "grad_norm": 0.1685679405927658, |
| "learning_rate": 4.709153583504602e-05, |
| "loss": 0.0172, |
| "step": 8140 |
| }, |
| { |
| "epoch": 16.565040650406505, |
| "grad_norm": 0.12983818352222443, |
| "learning_rate": 4.6981498322110027e-05, |
| "loss": 0.0157, |
| "step": 8150 |
| }, |
| { |
| "epoch": 16.585365853658537, |
| "grad_norm": 0.13751214742660522, |
| "learning_rate": 4.6871475480226256e-05, |
| "loss": 0.0135, |
| "step": 8160 |
| }, |
| { |
| "epoch": 16.60569105691057, |
| "grad_norm": 0.14502060413360596, |
| "learning_rate": 4.6761467844147004e-05, |
| "loss": 0.0126, |
| "step": 8170 |
| }, |
| { |
| "epoch": 16.6260162601626, |
| "grad_norm": 0.14088159799575806, |
| "learning_rate": 4.665147594855076e-05, |
| "loss": 0.0158, |
| "step": 8180 |
| }, |
| { |
| "epoch": 16.646341463414632, |
| "grad_norm": 0.12108345329761505, |
| "learning_rate": 4.654150032803943e-05, |
| "loss": 0.0142, |
| "step": 8190 |
| }, |
| { |
| "epoch": 16.666666666666668, |
| "grad_norm": 0.12457619607448578, |
| "learning_rate": 4.643154151713588e-05, |
| "loss": 0.0162, |
| "step": 8200 |
| }, |
| { |
| "epoch": 16.6869918699187, |
| "grad_norm": 0.19529499113559723, |
| "learning_rate": 4.6321600050281225e-05, |
| "loss": 0.0155, |
| "step": 8210 |
| }, |
| { |
| "epoch": 16.70731707317073, |
| "grad_norm": 0.10389215499162674, |
| "learning_rate": 4.6211676461832264e-05, |
| "loss": 0.016, |
| "step": 8220 |
| }, |
| { |
| "epoch": 16.727642276422763, |
| "grad_norm": 0.18743805587291718, |
| "learning_rate": 4.610177128605899e-05, |
| "loss": 0.0162, |
| "step": 8230 |
| }, |
| { |
| "epoch": 16.747967479674795, |
| "grad_norm": 0.14482690393924713, |
| "learning_rate": 4.599188505714184e-05, |
| "loss": 0.0142, |
| "step": 8240 |
| }, |
| { |
| "epoch": 16.76829268292683, |
| "grad_norm": 0.08358287066221237, |
| "learning_rate": 4.588201830916912e-05, |
| "loss": 0.0142, |
| "step": 8250 |
| }, |
| { |
| "epoch": 16.788617886178862, |
| "grad_norm": 0.11201161891222, |
| "learning_rate": 4.577217157613456e-05, |
| "loss": 0.0125, |
| "step": 8260 |
| }, |
| { |
| "epoch": 16.808943089430894, |
| "grad_norm": 0.18672020733356476, |
| "learning_rate": 4.566234539193452e-05, |
| "loss": 0.021, |
| "step": 8270 |
| }, |
| { |
| "epoch": 16.829268292682926, |
| "grad_norm": 0.11721731722354889, |
| "learning_rate": 4.555254029036555e-05, |
| "loss": 0.013, |
| "step": 8280 |
| }, |
| { |
| "epoch": 16.84959349593496, |
| "grad_norm": 0.16724960505962372, |
| "learning_rate": 4.544275680512165e-05, |
| "loss": 0.0143, |
| "step": 8290 |
| }, |
| { |
| "epoch": 16.869918699186993, |
| "grad_norm": 0.17072100937366486, |
| "learning_rate": 4.5332995469791836e-05, |
| "loss": 0.0125, |
| "step": 8300 |
| }, |
| { |
| "epoch": 16.890243902439025, |
| "grad_norm": 0.16732802987098694, |
| "learning_rate": 4.522325681785744e-05, |
| "loss": 0.0161, |
| "step": 8310 |
| }, |
| { |
| "epoch": 16.910569105691057, |
| "grad_norm": 0.15675050020217896, |
| "learning_rate": 4.511354138268952e-05, |
| "loss": 0.0147, |
| "step": 8320 |
| }, |
| { |
| "epoch": 16.93089430894309, |
| "grad_norm": 0.1608588695526123, |
| "learning_rate": 4.50038496975463e-05, |
| "loss": 0.0145, |
| "step": 8330 |
| }, |
| { |
| "epoch": 16.951219512195124, |
| "grad_norm": 0.1279238760471344, |
| "learning_rate": 4.489418229557063e-05, |
| "loss": 0.0113, |
| "step": 8340 |
| }, |
| { |
| "epoch": 16.971544715447155, |
| "grad_norm": 0.11838237196207047, |
| "learning_rate": 4.478453970978722e-05, |
| "loss": 0.0162, |
| "step": 8350 |
| }, |
| { |
| "epoch": 16.991869918699187, |
| "grad_norm": 0.11877836287021637, |
| "learning_rate": 4.4674922473100286e-05, |
| "loss": 0.0139, |
| "step": 8360 |
| }, |
| { |
| "epoch": 17.01219512195122, |
| "grad_norm": 0.20165173709392548, |
| "learning_rate": 4.4565331118290756e-05, |
| "loss": 0.0113, |
| "step": 8370 |
| }, |
| { |
| "epoch": 17.03252032520325, |
| "grad_norm": 0.09803508222103119, |
| "learning_rate": 4.4455766178013775e-05, |
| "loss": 0.0139, |
| "step": 8380 |
| }, |
| { |
| "epoch": 17.052845528455286, |
| "grad_norm": 0.15660974383354187, |
| "learning_rate": 4.434622818479615e-05, |
| "loss": 0.0155, |
| "step": 8390 |
| }, |
| { |
| "epoch": 17.073170731707318, |
| "grad_norm": 0.1324063241481781, |
| "learning_rate": 4.4236717671033646e-05, |
| "loss": 0.0134, |
| "step": 8400 |
| }, |
| { |
| "epoch": 17.09349593495935, |
| "grad_norm": 0.15221671760082245, |
| "learning_rate": 4.412723516898853e-05, |
| "loss": 0.0124, |
| "step": 8410 |
| }, |
| { |
| "epoch": 17.11382113821138, |
| "grad_norm": 0.10603677481412888, |
| "learning_rate": 4.40177812107869e-05, |
| "loss": 0.0131, |
| "step": 8420 |
| }, |
| { |
| "epoch": 17.134146341463413, |
| "grad_norm": 0.12352833151817322, |
| "learning_rate": 4.390835632841606e-05, |
| "loss": 0.0121, |
| "step": 8430 |
| }, |
| { |
| "epoch": 17.15447154471545, |
| "grad_norm": 0.16352899372577667, |
| "learning_rate": 4.3798961053722115e-05, |
| "loss": 0.0128, |
| "step": 8440 |
| }, |
| { |
| "epoch": 17.17479674796748, |
| "grad_norm": 0.128566175699234, |
| "learning_rate": 4.368959591840718e-05, |
| "loss": 0.0116, |
| "step": 8450 |
| }, |
| { |
| "epoch": 17.195121951219512, |
| "grad_norm": 0.13584142923355103, |
| "learning_rate": 4.3580261454026865e-05, |
| "loss": 0.0117, |
| "step": 8460 |
| }, |
| { |
| "epoch": 17.215447154471544, |
| "grad_norm": 0.17808644473552704, |
| "learning_rate": 4.3470958191987786e-05, |
| "loss": 0.0125, |
| "step": 8470 |
| }, |
| { |
| "epoch": 17.235772357723576, |
| "grad_norm": 0.13361839950084686, |
| "learning_rate": 4.336168666354484e-05, |
| "loss": 0.019, |
| "step": 8480 |
| }, |
| { |
| "epoch": 17.25609756097561, |
| "grad_norm": 0.13176748156547546, |
| "learning_rate": 4.325244739979873e-05, |
| "loss": 0.0153, |
| "step": 8490 |
| }, |
| { |
| "epoch": 17.276422764227643, |
| "grad_norm": 0.135588139295578, |
| "learning_rate": 4.314324093169332e-05, |
| "loss": 0.0118, |
| "step": 8500 |
| }, |
| { |
| "epoch": 17.296747967479675, |
| "grad_norm": 0.1549520641565323, |
| "learning_rate": 4.303406779001302e-05, |
| "loss": 0.0133, |
| "step": 8510 |
| }, |
| { |
| "epoch": 17.317073170731707, |
| "grad_norm": 0.16039688885211945, |
| "learning_rate": 4.292492850538038e-05, |
| "loss": 0.0145, |
| "step": 8520 |
| }, |
| { |
| "epoch": 17.33739837398374, |
| "grad_norm": 0.1547240912914276, |
| "learning_rate": 4.28158236082533e-05, |
| "loss": 0.0116, |
| "step": 8530 |
| }, |
| { |
| "epoch": 17.357723577235774, |
| "grad_norm": 0.15895530581474304, |
| "learning_rate": 4.270675362892256e-05, |
| "loss": 0.0119, |
| "step": 8540 |
| }, |
| { |
| "epoch": 17.378048780487806, |
| "grad_norm": 0.10659079998731613, |
| "learning_rate": 4.2597719097509246e-05, |
| "loss": 0.0147, |
| "step": 8550 |
| }, |
| { |
| "epoch": 17.398373983739837, |
| "grad_norm": 0.1645200252532959, |
| "learning_rate": 4.2488720543962146e-05, |
| "loss": 0.0118, |
| "step": 8560 |
| }, |
| { |
| "epoch": 17.41869918699187, |
| "grad_norm": 0.14108268916606903, |
| "learning_rate": 4.23797584980552e-05, |
| "loss": 0.0158, |
| "step": 8570 |
| }, |
| { |
| "epoch": 17.4390243902439, |
| "grad_norm": 0.1322506219148636, |
| "learning_rate": 4.227083348938486e-05, |
| "loss": 0.012, |
| "step": 8580 |
| }, |
| { |
| "epoch": 17.459349593495936, |
| "grad_norm": 0.1606353223323822, |
| "learning_rate": 4.2161946047367586e-05, |
| "loss": 0.0131, |
| "step": 8590 |
| }, |
| { |
| "epoch": 17.479674796747968, |
| "grad_norm": 0.20726698637008667, |
| "learning_rate": 4.2053096701237294e-05, |
| "loss": 0.0168, |
| "step": 8600 |
| }, |
| { |
| "epoch": 17.5, |
| "grad_norm": 0.18475304543972015, |
| "learning_rate": 4.1944285980042656e-05, |
| "loss": 0.0144, |
| "step": 8610 |
| }, |
| { |
| "epoch": 17.520325203252032, |
| "grad_norm": 0.13266246020793915, |
| "learning_rate": 4.183551441264469e-05, |
| "loss": 0.0153, |
| "step": 8620 |
| }, |
| { |
| "epoch": 17.540650406504064, |
| "grad_norm": 0.1916080266237259, |
| "learning_rate": 4.172678252771408e-05, |
| "loss": 0.015, |
| "step": 8630 |
| }, |
| { |
| "epoch": 17.5609756097561, |
| "grad_norm": 0.1388619840145111, |
| "learning_rate": 4.16180908537286e-05, |
| "loss": 0.0122, |
| "step": 8640 |
| }, |
| { |
| "epoch": 17.58130081300813, |
| "grad_norm": 0.19394637644290924, |
| "learning_rate": 4.150943991897065e-05, |
| "loss": 0.0169, |
| "step": 8650 |
| }, |
| { |
| "epoch": 17.601626016260163, |
| "grad_norm": 0.1618029922246933, |
| "learning_rate": 4.1400830251524605e-05, |
| "loss": 0.0116, |
| "step": 8660 |
| }, |
| { |
| "epoch": 17.621951219512194, |
| "grad_norm": 0.11678081750869751, |
| "learning_rate": 4.1292262379274215e-05, |
| "loss": 0.0144, |
| "step": 8670 |
| }, |
| { |
| "epoch": 17.642276422764226, |
| "grad_norm": 0.11671017855405807, |
| "learning_rate": 4.118373682990016e-05, |
| "loss": 0.0159, |
| "step": 8680 |
| }, |
| { |
| "epoch": 17.66260162601626, |
| "grad_norm": 0.1848084032535553, |
| "learning_rate": 4.107525413087737e-05, |
| "loss": 0.0139, |
| "step": 8690 |
| }, |
| { |
| "epoch": 17.682926829268293, |
| "grad_norm": 0.17534123361110687, |
| "learning_rate": 4.096681480947252e-05, |
| "loss": 0.0143, |
| "step": 8700 |
| }, |
| { |
| "epoch": 17.703252032520325, |
| "grad_norm": 0.12786678969860077, |
| "learning_rate": 4.085841939274146e-05, |
| "loss": 0.0109, |
| "step": 8710 |
| }, |
| { |
| "epoch": 17.723577235772357, |
| "grad_norm": 0.1237667053937912, |
| "learning_rate": 4.075006840752662e-05, |
| "loss": 0.0139, |
| "step": 8720 |
| }, |
| { |
| "epoch": 17.74390243902439, |
| "grad_norm": 0.15702199935913086, |
| "learning_rate": 4.0641762380454515e-05, |
| "loss": 0.0132, |
| "step": 8730 |
| }, |
| { |
| "epoch": 17.764227642276424, |
| "grad_norm": 0.10557420551776886, |
| "learning_rate": 4.0533501837933134e-05, |
| "loss": 0.0132, |
| "step": 8740 |
| }, |
| { |
| "epoch": 17.784552845528456, |
| "grad_norm": 0.17406034469604492, |
| "learning_rate": 4.042528730614936e-05, |
| "loss": 0.0151, |
| "step": 8750 |
| }, |
| { |
| "epoch": 17.804878048780488, |
| "grad_norm": 0.13269108533859253, |
| "learning_rate": 4.0317119311066486e-05, |
| "loss": 0.0159, |
| "step": 8760 |
| }, |
| { |
| "epoch": 17.82520325203252, |
| "grad_norm": 0.12828195095062256, |
| "learning_rate": 4.02089983784216e-05, |
| "loss": 0.0132, |
| "step": 8770 |
| }, |
| { |
| "epoch": 17.84552845528455, |
| "grad_norm": 0.17932254076004028, |
| "learning_rate": 4.010092503372309e-05, |
| "loss": 0.0152, |
| "step": 8780 |
| }, |
| { |
| "epoch": 17.865853658536587, |
| "grad_norm": 0.2130482792854309, |
| "learning_rate": 3.999289980224797e-05, |
| "loss": 0.0145, |
| "step": 8790 |
| }, |
| { |
| "epoch": 17.88617886178862, |
| "grad_norm": 0.20235665142536163, |
| "learning_rate": 3.9884923209039455e-05, |
| "loss": 0.0128, |
| "step": 8800 |
| }, |
| { |
| "epoch": 17.90650406504065, |
| "grad_norm": 0.09971121698617935, |
| "learning_rate": 3.977699577890439e-05, |
| "loss": 0.0148, |
| "step": 8810 |
| }, |
| { |
| "epoch": 17.926829268292682, |
| "grad_norm": 0.14858904480934143, |
| "learning_rate": 3.96691180364106e-05, |
| "loss": 0.0158, |
| "step": 8820 |
| }, |
| { |
| "epoch": 17.947154471544714, |
| "grad_norm": 0.15000762045383453, |
| "learning_rate": 3.956129050588446e-05, |
| "loss": 0.0132, |
| "step": 8830 |
| }, |
| { |
| "epoch": 17.96747967479675, |
| "grad_norm": 0.15528957545757294, |
| "learning_rate": 3.9453513711408275e-05, |
| "loss": 0.0127, |
| "step": 8840 |
| }, |
| { |
| "epoch": 17.98780487804878, |
| "grad_norm": 0.162289097905159, |
| "learning_rate": 3.934578817681774e-05, |
| "loss": 0.0136, |
| "step": 8850 |
| }, |
| { |
| "epoch": 18.008130081300813, |
| "grad_norm": 0.16899584233760834, |
| "learning_rate": 3.9238114425699465e-05, |
| "loss": 0.0139, |
| "step": 8860 |
| }, |
| { |
| "epoch": 18.028455284552845, |
| "grad_norm": 0.13523997366428375, |
| "learning_rate": 3.91304929813883e-05, |
| "loss": 0.0115, |
| "step": 8870 |
| }, |
| { |
| "epoch": 18.048780487804876, |
| "grad_norm": 0.15300580859184265, |
| "learning_rate": 3.902292436696489e-05, |
| "loss": 0.0159, |
| "step": 8880 |
| }, |
| { |
| "epoch": 18.06910569105691, |
| "grad_norm": 0.16981197893619537, |
| "learning_rate": 3.891540910525316e-05, |
| "loss": 0.0115, |
| "step": 8890 |
| }, |
| { |
| "epoch": 18.089430894308943, |
| "grad_norm": 0.15623369812965393, |
| "learning_rate": 3.8807947718817624e-05, |
| "loss": 0.0138, |
| "step": 8900 |
| }, |
| { |
| "epoch": 18.109756097560975, |
| "grad_norm": 0.16787086427211761, |
| "learning_rate": 3.870054072996103e-05, |
| "loss": 0.012, |
| "step": 8910 |
| }, |
| { |
| "epoch": 18.130081300813007, |
| "grad_norm": 0.12681931257247925, |
| "learning_rate": 3.859318866072168e-05, |
| "loss": 0.0115, |
| "step": 8920 |
| }, |
| { |
| "epoch": 18.150406504065042, |
| "grad_norm": 0.12948468327522278, |
| "learning_rate": 3.8485892032870965e-05, |
| "loss": 0.0111, |
| "step": 8930 |
| }, |
| { |
| "epoch": 18.170731707317074, |
| "grad_norm": 0.11079384386539459, |
| "learning_rate": 3.83786513679108e-05, |
| "loss": 0.0119, |
| "step": 8940 |
| }, |
| { |
| "epoch": 18.191056910569106, |
| "grad_norm": 0.1111801415681839, |
| "learning_rate": 3.8271467187071134e-05, |
| "loss": 0.0123, |
| "step": 8950 |
| }, |
| { |
| "epoch": 18.211382113821138, |
| "grad_norm": 0.1817050576210022, |
| "learning_rate": 3.816434001130732e-05, |
| "loss": 0.015, |
| "step": 8960 |
| }, |
| { |
| "epoch": 18.23170731707317, |
| "grad_norm": 0.16134339570999146, |
| "learning_rate": 3.8057270361297706e-05, |
| "loss": 0.0119, |
| "step": 8970 |
| }, |
| { |
| "epoch": 18.252032520325205, |
| "grad_norm": 0.07816947251558304, |
| "learning_rate": 3.7950258757440985e-05, |
| "loss": 0.015, |
| "step": 8980 |
| }, |
| { |
| "epoch": 18.272357723577237, |
| "grad_norm": 0.13936541974544525, |
| "learning_rate": 3.78433057198538e-05, |
| "loss": 0.0129, |
| "step": 8990 |
| }, |
| { |
| "epoch": 18.29268292682927, |
| "grad_norm": 0.10490652918815613, |
| "learning_rate": 3.773641176836807e-05, |
| "loss": 0.015, |
| "step": 9000 |
| }, |
| { |
| "epoch": 18.3130081300813, |
| "grad_norm": 0.0925993099808693, |
| "learning_rate": 3.7629577422528555e-05, |
| "loss": 0.0118, |
| "step": 9010 |
| }, |
| { |
| "epoch": 18.333333333333332, |
| "grad_norm": 0.15427745878696442, |
| "learning_rate": 3.7522803201590325e-05, |
| "loss": 0.0093, |
| "step": 9020 |
| }, |
| { |
| "epoch": 18.353658536585368, |
| "grad_norm": 0.07753149420022964, |
| "learning_rate": 3.741608962451621e-05, |
| "loss": 0.0103, |
| "step": 9030 |
| }, |
| { |
| "epoch": 18.3739837398374, |
| "grad_norm": 0.08675969392061234, |
| "learning_rate": 3.730943720997427e-05, |
| "loss": 0.0117, |
| "step": 9040 |
| }, |
| { |
| "epoch": 18.39430894308943, |
| "grad_norm": 0.06992247700691223, |
| "learning_rate": 3.720284647633532e-05, |
| "loss": 0.0108, |
| "step": 9050 |
| }, |
| { |
| "epoch": 18.414634146341463, |
| "grad_norm": 0.10455995798110962, |
| "learning_rate": 3.7096317941670365e-05, |
| "loss": 0.0145, |
| "step": 9060 |
| }, |
| { |
| "epoch": 18.434959349593495, |
| "grad_norm": 0.13448111712932587, |
| "learning_rate": 3.698985212374814e-05, |
| "loss": 0.0127, |
| "step": 9070 |
| }, |
| { |
| "epoch": 18.45528455284553, |
| "grad_norm": 0.13202343881130219, |
| "learning_rate": 3.6883449540032477e-05, |
| "loss": 0.012, |
| "step": 9080 |
| }, |
| { |
| "epoch": 18.475609756097562, |
| "grad_norm": 0.1117219552397728, |
| "learning_rate": 3.6777110707679905e-05, |
| "loss": 0.0119, |
| "step": 9090 |
| }, |
| { |
| "epoch": 18.495934959349594, |
| "grad_norm": 0.12106060236692429, |
| "learning_rate": 3.667083614353715e-05, |
| "loss": 0.012, |
| "step": 9100 |
| }, |
| { |
| "epoch": 18.516260162601625, |
| "grad_norm": 0.14695453643798828, |
| "learning_rate": 3.6564626364138465e-05, |
| "loss": 0.0169, |
| "step": 9110 |
| }, |
| { |
| "epoch": 18.536585365853657, |
| "grad_norm": 0.12978288531303406, |
| "learning_rate": 3.645848188570331e-05, |
| "loss": 0.0145, |
| "step": 9120 |
| }, |
| { |
| "epoch": 18.556910569105693, |
| "grad_norm": 0.10005785524845123, |
| "learning_rate": 3.635240322413374e-05, |
| "loss": 0.0095, |
| "step": 9130 |
| }, |
| { |
| "epoch": 18.577235772357724, |
| "grad_norm": 0.13557754456996918, |
| "learning_rate": 3.624639089501187e-05, |
| "loss": 0.0115, |
| "step": 9140 |
| }, |
| { |
| "epoch": 18.597560975609756, |
| "grad_norm": 0.12689892947673798, |
| "learning_rate": 3.614044541359749e-05, |
| "loss": 0.0152, |
| "step": 9150 |
| }, |
| { |
| "epoch": 18.617886178861788, |
| "grad_norm": 0.10084543377161026, |
| "learning_rate": 3.603456729482541e-05, |
| "loss": 0.0133, |
| "step": 9160 |
| }, |
| { |
| "epoch": 18.63821138211382, |
| "grad_norm": 0.15835894644260406, |
| "learning_rate": 3.5928757053303055e-05, |
| "loss": 0.0163, |
| "step": 9170 |
| }, |
| { |
| "epoch": 18.658536585365855, |
| "grad_norm": 0.1506408303976059, |
| "learning_rate": 3.5823015203308e-05, |
| "loss": 0.0146, |
| "step": 9180 |
| }, |
| { |
| "epoch": 18.678861788617887, |
| "grad_norm": 0.10163554549217224, |
| "learning_rate": 3.57173422587853e-05, |
| "loss": 0.0123, |
| "step": 9190 |
| }, |
| { |
| "epoch": 18.69918699186992, |
| "grad_norm": 0.09243937581777573, |
| "learning_rate": 3.561173873334522e-05, |
| "loss": 0.0145, |
| "step": 9200 |
| }, |
| { |
| "epoch": 18.71951219512195, |
| "grad_norm": 0.128352090716362, |
| "learning_rate": 3.550620514026056e-05, |
| "loss": 0.0111, |
| "step": 9210 |
| }, |
| { |
| "epoch": 18.739837398373982, |
| "grad_norm": 0.14999257028102875, |
| "learning_rate": 3.54007419924642e-05, |
| "loss": 0.0156, |
| "step": 9220 |
| }, |
| { |
| "epoch": 18.760162601626018, |
| "grad_norm": 0.18344224989414215, |
| "learning_rate": 3.52953498025467e-05, |
| "loss": 0.0135, |
| "step": 9230 |
| }, |
| { |
| "epoch": 18.78048780487805, |
| "grad_norm": 0.12680011987686157, |
| "learning_rate": 3.519002908275368e-05, |
| "loss": 0.0107, |
| "step": 9240 |
| }, |
| { |
| "epoch": 18.80081300813008, |
| "grad_norm": 0.20007681846618652, |
| "learning_rate": 3.508478034498339e-05, |
| "loss": 0.0158, |
| "step": 9250 |
| }, |
| { |
| "epoch": 18.821138211382113, |
| "grad_norm": 0.11284324526786804, |
| "learning_rate": 3.497960410078427e-05, |
| "loss": 0.0114, |
| "step": 9260 |
| }, |
| { |
| "epoch": 18.841463414634145, |
| "grad_norm": 0.17493268847465515, |
| "learning_rate": 3.487450086135236e-05, |
| "loss": 0.0148, |
| "step": 9270 |
| }, |
| { |
| "epoch": 18.86178861788618, |
| "grad_norm": 0.1810353547334671, |
| "learning_rate": 3.476947113752891e-05, |
| "loss": 0.0096, |
| "step": 9280 |
| }, |
| { |
| "epoch": 18.882113821138212, |
| "grad_norm": 0.18835191428661346, |
| "learning_rate": 3.4664515439797823e-05, |
| "loss": 0.0114, |
| "step": 9290 |
| }, |
| { |
| "epoch": 18.902439024390244, |
| "grad_norm": 0.1354275792837143, |
| "learning_rate": 3.45596342782832e-05, |
| "loss": 0.0146, |
| "step": 9300 |
| }, |
| { |
| "epoch": 18.922764227642276, |
| "grad_norm": 0.12778376042842865, |
| "learning_rate": 3.4454828162746936e-05, |
| "loss": 0.0122, |
| "step": 9310 |
| }, |
| { |
| "epoch": 18.943089430894307, |
| "grad_norm": 0.13828104734420776, |
| "learning_rate": 3.435009760258608e-05, |
| "loss": 0.0123, |
| "step": 9320 |
| }, |
| { |
| "epoch": 18.963414634146343, |
| "grad_norm": 0.1193520799279213, |
| "learning_rate": 3.424544310683057e-05, |
| "loss": 0.0133, |
| "step": 9330 |
| }, |
| { |
| "epoch": 18.983739837398375, |
| "grad_norm": 0.12786108255386353, |
| "learning_rate": 3.41408651841405e-05, |
| "loss": 0.0126, |
| "step": 9340 |
| }, |
| { |
| "epoch": 19.004065040650406, |
| "grad_norm": 0.0895795002579689, |
| "learning_rate": 3.403636434280388e-05, |
| "loss": 0.0096, |
| "step": 9350 |
| }, |
| { |
| "epoch": 19.024390243902438, |
| "grad_norm": 0.14265698194503784, |
| "learning_rate": 3.393194109073411e-05, |
| "loss": 0.0134, |
| "step": 9360 |
| }, |
| { |
| "epoch": 19.04471544715447, |
| "grad_norm": 0.1201973482966423, |
| "learning_rate": 3.3827595935467376e-05, |
| "loss": 0.0148, |
| "step": 9370 |
| }, |
| { |
| "epoch": 19.065040650406505, |
| "grad_norm": 0.14567726850509644, |
| "learning_rate": 3.3723329384160344e-05, |
| "loss": 0.0149, |
| "step": 9380 |
| }, |
| { |
| "epoch": 19.085365853658537, |
| "grad_norm": 0.13860636949539185, |
| "learning_rate": 3.3619141943587646e-05, |
| "loss": 0.0141, |
| "step": 9390 |
| }, |
| { |
| "epoch": 19.10569105691057, |
| "grad_norm": 0.09485989809036255, |
| "learning_rate": 3.351503412013935e-05, |
| "loss": 0.0087, |
| "step": 9400 |
| }, |
| { |
| "epoch": 19.1260162601626, |
| "grad_norm": 0.14823415875434875, |
| "learning_rate": 3.341100641981863e-05, |
| "loss": 0.0115, |
| "step": 9410 |
| }, |
| { |
| "epoch": 19.146341463414632, |
| "grad_norm": 0.17353856563568115, |
| "learning_rate": 3.330705934823919e-05, |
| "loss": 0.0124, |
| "step": 9420 |
| }, |
| { |
| "epoch": 19.166666666666668, |
| "grad_norm": 0.1345943808555603, |
| "learning_rate": 3.3203193410622804e-05, |
| "loss": 0.0121, |
| "step": 9430 |
| }, |
| { |
| "epoch": 19.1869918699187, |
| "grad_norm": 0.19979223608970642, |
| "learning_rate": 3.309940911179701e-05, |
| "loss": 0.0138, |
| "step": 9440 |
| }, |
| { |
| "epoch": 19.20731707317073, |
| "grad_norm": 0.15768824517726898, |
| "learning_rate": 3.2995706956192465e-05, |
| "loss": 0.0124, |
| "step": 9450 |
| }, |
| { |
| "epoch": 19.227642276422763, |
| "grad_norm": 0.15080216526985168, |
| "learning_rate": 3.289208744784059e-05, |
| "loss": 0.0145, |
| "step": 9460 |
| }, |
| { |
| "epoch": 19.247967479674795, |
| "grad_norm": 0.11168956756591797, |
| "learning_rate": 3.2788551090371164e-05, |
| "loss": 0.012, |
| "step": 9470 |
| }, |
| { |
| "epoch": 19.26829268292683, |
| "grad_norm": 0.10030027478933334, |
| "learning_rate": 3.268509838700974e-05, |
| "loss": 0.0109, |
| "step": 9480 |
| }, |
| { |
| "epoch": 19.288617886178862, |
| "grad_norm": 0.15326887369155884, |
| "learning_rate": 3.258172984057535e-05, |
| "loss": 0.0137, |
| "step": 9490 |
| }, |
| { |
| "epoch": 19.308943089430894, |
| "grad_norm": 0.16193649172782898, |
| "learning_rate": 3.247844595347798e-05, |
| "loss": 0.013, |
| "step": 9500 |
| }, |
| { |
| "epoch": 19.329268292682926, |
| "grad_norm": 0.1177881509065628, |
| "learning_rate": 3.2375247227716077e-05, |
| "loss": 0.0122, |
| "step": 9510 |
| }, |
| { |
| "epoch": 19.34959349593496, |
| "grad_norm": 0.11959271878004074, |
| "learning_rate": 3.2272134164874264e-05, |
| "loss": 0.0141, |
| "step": 9520 |
| }, |
| { |
| "epoch": 19.369918699186993, |
| "grad_norm": 0.13897369801998138, |
| "learning_rate": 3.216910726612073e-05, |
| "loss": 0.012, |
| "step": 9530 |
| }, |
| { |
| "epoch": 19.390243902439025, |
| "grad_norm": 0.13143090903759003, |
| "learning_rate": 3.2066167032204956e-05, |
| "loss": 0.014, |
| "step": 9540 |
| }, |
| { |
| "epoch": 19.410569105691057, |
| "grad_norm": 0.15241993963718414, |
| "learning_rate": 3.196331396345512e-05, |
| "loss": 0.0145, |
| "step": 9550 |
| }, |
| { |
| "epoch": 19.43089430894309, |
| "grad_norm": 0.17542560398578644, |
| "learning_rate": 3.186054855977577e-05, |
| "loss": 0.0144, |
| "step": 9560 |
| }, |
| { |
| "epoch": 19.451219512195124, |
| "grad_norm": 0.1463475078344345, |
| "learning_rate": 3.175787132064542e-05, |
| "loss": 0.014, |
| "step": 9570 |
| }, |
| { |
| "epoch": 19.471544715447155, |
| "grad_norm": 0.12420643121004105, |
| "learning_rate": 3.165528274511397e-05, |
| "loss": 0.0125, |
| "step": 9580 |
| }, |
| { |
| "epoch": 19.491869918699187, |
| "grad_norm": 0.11292680352926254, |
| "learning_rate": 3.155278333180047e-05, |
| "loss": 0.0094, |
| "step": 9590 |
| }, |
| { |
| "epoch": 19.51219512195122, |
| "grad_norm": 0.11575955152511597, |
| "learning_rate": 3.14503735788906e-05, |
| "loss": 0.0124, |
| "step": 9600 |
| }, |
| { |
| "epoch": 19.53252032520325, |
| "grad_norm": 0.1290699690580368, |
| "learning_rate": 3.134805398413419e-05, |
| "loss": 0.0135, |
| "step": 9610 |
| }, |
| { |
| "epoch": 19.552845528455286, |
| "grad_norm": 0.12234389036893845, |
| "learning_rate": 3.1245825044842954e-05, |
| "loss": 0.0136, |
| "step": 9620 |
| }, |
| { |
| "epoch": 19.573170731707318, |
| "grad_norm": 0.13453805446624756, |
| "learning_rate": 3.114368725788791e-05, |
| "loss": 0.0132, |
| "step": 9630 |
| }, |
| { |
| "epoch": 19.59349593495935, |
| "grad_norm": 0.15979017317295074, |
| "learning_rate": 3.1041641119697075e-05, |
| "loss": 0.0112, |
| "step": 9640 |
| }, |
| { |
| "epoch": 19.61382113821138, |
| "grad_norm": 0.12622177600860596, |
| "learning_rate": 3.093968712625306e-05, |
| "loss": 0.0137, |
| "step": 9650 |
| }, |
| { |
| "epoch": 19.634146341463413, |
| "grad_norm": 0.1539682000875473, |
| "learning_rate": 3.0837825773090535e-05, |
| "loss": 0.0115, |
| "step": 9660 |
| }, |
| { |
| "epoch": 19.65447154471545, |
| "grad_norm": 0.11947452276945114, |
| "learning_rate": 3.073605755529395e-05, |
| "loss": 0.0141, |
| "step": 9670 |
| }, |
| { |
| "epoch": 19.67479674796748, |
| "grad_norm": 0.14298219978809357, |
| "learning_rate": 3.063438296749511e-05, |
| "loss": 0.014, |
| "step": 9680 |
| }, |
| { |
| "epoch": 19.695121951219512, |
| "grad_norm": 0.11294319480657578, |
| "learning_rate": 3.053280250387067e-05, |
| "loss": 0.0096, |
| "step": 9690 |
| }, |
| { |
| "epoch": 19.715447154471544, |
| "grad_norm": 0.13358741998672485, |
| "learning_rate": 3.043131665813988e-05, |
| "loss": 0.0103, |
| "step": 9700 |
| }, |
| { |
| "epoch": 19.735772357723576, |
| "grad_norm": 0.08656350523233414, |
| "learning_rate": 3.0329925923562073e-05, |
| "loss": 0.0132, |
| "step": 9710 |
| }, |
| { |
| "epoch": 19.75609756097561, |
| "grad_norm": 0.2658238410949707, |
| "learning_rate": 3.0228630792934277e-05, |
| "loss": 0.0129, |
| "step": 9720 |
| }, |
| { |
| "epoch": 19.776422764227643, |
| "grad_norm": 0.1336117833852768, |
| "learning_rate": 3.0127431758588918e-05, |
| "loss": 0.0142, |
| "step": 9730 |
| }, |
| { |
| "epoch": 19.796747967479675, |
| "grad_norm": 0.15740816295146942, |
| "learning_rate": 3.002632931239133e-05, |
| "loss": 0.0113, |
| "step": 9740 |
| }, |
| { |
| "epoch": 19.817073170731707, |
| "grad_norm": 0.14007125794887543, |
| "learning_rate": 2.992532394573735e-05, |
| "loss": 0.0107, |
| "step": 9750 |
| }, |
| { |
| "epoch": 19.83739837398374, |
| "grad_norm": 0.11406854540109634, |
| "learning_rate": 2.982441614955105e-05, |
| "loss": 0.01, |
| "step": 9760 |
| }, |
| { |
| "epoch": 19.857723577235774, |
| "grad_norm": 0.07972941547632217, |
| "learning_rate": 2.972360641428218e-05, |
| "loss": 0.01, |
| "step": 9770 |
| }, |
| { |
| "epoch": 19.878048780487806, |
| "grad_norm": 0.11841104924678802, |
| "learning_rate": 2.9622895229903973e-05, |
| "loss": 0.0094, |
| "step": 9780 |
| }, |
| { |
| "epoch": 19.898373983739837, |
| "grad_norm": 0.12949933111667633, |
| "learning_rate": 2.9522283085910612e-05, |
| "loss": 0.0124, |
| "step": 9790 |
| }, |
| { |
| "epoch": 19.91869918699187, |
| "grad_norm": 0.1191362589597702, |
| "learning_rate": 2.942177047131489e-05, |
| "loss": 0.0093, |
| "step": 9800 |
| }, |
| { |
| "epoch": 19.9390243902439, |
| "grad_norm": 0.11803624778985977, |
| "learning_rate": 2.9321357874645905e-05, |
| "loss": 0.0129, |
| "step": 9810 |
| }, |
| { |
| "epoch": 19.959349593495936, |
| "grad_norm": 0.13236430287361145, |
| "learning_rate": 2.9221045783946577e-05, |
| "loss": 0.0092, |
| "step": 9820 |
| }, |
| { |
| "epoch": 19.979674796747968, |
| "grad_norm": 0.09027262032032013, |
| "learning_rate": 2.9120834686771394e-05, |
| "loss": 0.0138, |
| "step": 9830 |
| }, |
| { |
| "epoch": 20.0, |
| "grad_norm": 0.1077914610505104, |
| "learning_rate": 2.902072507018392e-05, |
| "loss": 0.0153, |
| "step": 9840 |
| }, |
| { |
| "epoch": 20.020325203252032, |
| "grad_norm": 0.1104595884680748, |
| "learning_rate": 2.892071742075446e-05, |
| "loss": 0.0124, |
| "step": 9850 |
| }, |
| { |
| "epoch": 20.040650406504064, |
| "grad_norm": 0.1770085245370865, |
| "learning_rate": 2.8820812224557812e-05, |
| "loss": 0.0138, |
| "step": 9860 |
| }, |
| { |
| "epoch": 20.0609756097561, |
| "grad_norm": 0.16476783156394958, |
| "learning_rate": 2.8721009967170764e-05, |
| "loss": 0.0106, |
| "step": 9870 |
| }, |
| { |
| "epoch": 20.08130081300813, |
| "grad_norm": 0.1422470360994339, |
| "learning_rate": 2.8621311133669748e-05, |
| "loss": 0.0127, |
| "step": 9880 |
| }, |
| { |
| "epoch": 20.101626016260163, |
| "grad_norm": 0.14497612416744232, |
| "learning_rate": 2.8521716208628595e-05, |
| "loss": 0.0155, |
| "step": 9890 |
| }, |
| { |
| "epoch": 20.121951219512194, |
| "grad_norm": 0.1829124540090561, |
| "learning_rate": 2.8422225676116015e-05, |
| "loss": 0.0135, |
| "step": 9900 |
| }, |
| { |
| "epoch": 20.142276422764226, |
| "grad_norm": 0.1418256163597107, |
| "learning_rate": 2.832284001969342e-05, |
| "loss": 0.0122, |
| "step": 9910 |
| }, |
| { |
| "epoch": 20.16260162601626, |
| "grad_norm": 0.14839938282966614, |
| "learning_rate": 2.8223559722412408e-05, |
| "loss": 0.0144, |
| "step": 9920 |
| }, |
| { |
| "epoch": 20.182926829268293, |
| "grad_norm": 0.16532327234745026, |
| "learning_rate": 2.8124385266812516e-05, |
| "loss": 0.0085, |
| "step": 9930 |
| }, |
| { |
| "epoch": 20.203252032520325, |
| "grad_norm": 0.09330819547176361, |
| "learning_rate": 2.802531713491886e-05, |
| "loss": 0.0151, |
| "step": 9940 |
| }, |
| { |
| "epoch": 20.223577235772357, |
| "grad_norm": 0.17031468451023102, |
| "learning_rate": 2.7926355808239822e-05, |
| "loss": 0.0146, |
| "step": 9950 |
| }, |
| { |
| "epoch": 20.24390243902439, |
| "grad_norm": 0.11071918159723282, |
| "learning_rate": 2.782750176776458e-05, |
| "loss": 0.0112, |
| "step": 9960 |
| }, |
| { |
| "epoch": 20.264227642276424, |
| "grad_norm": 0.13747528195381165, |
| "learning_rate": 2.7728755493960946e-05, |
| "loss": 0.0122, |
| "step": 9970 |
| }, |
| { |
| "epoch": 20.284552845528456, |
| "grad_norm": 0.11134276539087296, |
| "learning_rate": 2.7630117466772876e-05, |
| "loss": 0.0098, |
| "step": 9980 |
| }, |
| { |
| "epoch": 20.304878048780488, |
| "grad_norm": 0.11619052290916443, |
| "learning_rate": 2.7531588165618278e-05, |
| "loss": 0.0119, |
| "step": 9990 |
| }, |
| { |
| "epoch": 20.32520325203252, |
| "grad_norm": 0.1301652193069458, |
| "learning_rate": 2.7433168069386533e-05, |
| "loss": 0.0116, |
| "step": 10000 |
| }, |
| { |
| "epoch": 20.34552845528455, |
| "grad_norm": 0.1226324513554573, |
| "learning_rate": 2.7334857656436308e-05, |
| "loss": 0.0116, |
| "step": 10010 |
| }, |
| { |
| "epoch": 20.365853658536587, |
| "grad_norm": 0.11929962784051895, |
| "learning_rate": 2.7236657404593157e-05, |
| "loss": 0.0107, |
| "step": 10020 |
| }, |
| { |
| "epoch": 20.38617886178862, |
| "grad_norm": 0.11318682879209518, |
| "learning_rate": 2.713856779114716e-05, |
| "loss": 0.011, |
| "step": 10030 |
| }, |
| { |
| "epoch": 20.40650406504065, |
| "grad_norm": 0.18702766299247742, |
| "learning_rate": 2.704058929285074e-05, |
| "loss": 0.0138, |
| "step": 10040 |
| }, |
| { |
| "epoch": 20.426829268292682, |
| "grad_norm": 0.11912211030721664, |
| "learning_rate": 2.6942722385916175e-05, |
| "loss": 0.0109, |
| "step": 10050 |
| }, |
| { |
| "epoch": 20.447154471544714, |
| "grad_norm": 0.12498341500759125, |
| "learning_rate": 2.6844967546013394e-05, |
| "loss": 0.0131, |
| "step": 10060 |
| }, |
| { |
| "epoch": 20.46747967479675, |
| "grad_norm": 0.13249894976615906, |
| "learning_rate": 2.6747325248267673e-05, |
| "loss": 0.0126, |
| "step": 10070 |
| }, |
| { |
| "epoch": 20.48780487804878, |
| "grad_norm": 0.09908751398324966, |
| "learning_rate": 2.664979596725724e-05, |
| "loss": 0.0121, |
| "step": 10080 |
| }, |
| { |
| "epoch": 20.508130081300813, |
| "grad_norm": 0.12584403157234192, |
| "learning_rate": 2.655238017701105e-05, |
| "loss": 0.011, |
| "step": 10090 |
| }, |
| { |
| "epoch": 20.528455284552845, |
| "grad_norm": 0.14452055096626282, |
| "learning_rate": 2.6455078351006455e-05, |
| "loss": 0.0105, |
| "step": 10100 |
| }, |
| { |
| "epoch": 20.548780487804876, |
| "grad_norm": 0.10302133113145828, |
| "learning_rate": 2.6357890962166866e-05, |
| "loss": 0.0125, |
| "step": 10110 |
| }, |
| { |
| "epoch": 20.56910569105691, |
| "grad_norm": 0.12645643949508667, |
| "learning_rate": 2.6260818482859534e-05, |
| "loss": 0.0108, |
| "step": 10120 |
| }, |
| { |
| "epoch": 20.589430894308943, |
| "grad_norm": 0.08891498297452927, |
| "learning_rate": 2.6163861384893156e-05, |
| "loss": 0.0096, |
| "step": 10130 |
| }, |
| { |
| "epoch": 20.609756097560975, |
| "grad_norm": 0.12570464611053467, |
| "learning_rate": 2.606702013951564e-05, |
| "loss": 0.0087, |
| "step": 10140 |
| }, |
| { |
| "epoch": 20.630081300813007, |
| "grad_norm": 0.07609464228153229, |
| "learning_rate": 2.5970295217411844e-05, |
| "loss": 0.0094, |
| "step": 10150 |
| }, |
| { |
| "epoch": 20.65040650406504, |
| "grad_norm": 0.10837171971797943, |
| "learning_rate": 2.5873687088701236e-05, |
| "loss": 0.0107, |
| "step": 10160 |
| }, |
| { |
| "epoch": 20.670731707317074, |
| "grad_norm": 0.12332095205783844, |
| "learning_rate": 2.5777196222935596e-05, |
| "loss": 0.0137, |
| "step": 10170 |
| }, |
| { |
| "epoch": 20.691056910569106, |
| "grad_norm": 0.11973092705011368, |
| "learning_rate": 2.5680823089096807e-05, |
| "loss": 0.0122, |
| "step": 10180 |
| }, |
| { |
| "epoch": 20.711382113821138, |
| "grad_norm": 0.07781854271888733, |
| "learning_rate": 2.558456815559448e-05, |
| "loss": 0.0107, |
| "step": 10190 |
| }, |
| { |
| "epoch": 20.73170731707317, |
| "grad_norm": 0.08621246367692947, |
| "learning_rate": 2.548843189026378e-05, |
| "loss": 0.0109, |
| "step": 10200 |
| }, |
| { |
| "epoch": 20.752032520325205, |
| "grad_norm": 0.10767322778701782, |
| "learning_rate": 2.5392414760363048e-05, |
| "loss": 0.0117, |
| "step": 10210 |
| }, |
| { |
| "epoch": 20.772357723577237, |
| "grad_norm": 0.15958262979984283, |
| "learning_rate": 2.529651723257162e-05, |
| "loss": 0.0136, |
| "step": 10220 |
| }, |
| { |
| "epoch": 20.79268292682927, |
| "grad_norm": 0.08153461664915085, |
| "learning_rate": 2.5200739772987537e-05, |
| "loss": 0.0118, |
| "step": 10230 |
| }, |
| { |
| "epoch": 20.8130081300813, |
| "grad_norm": 0.10004610568284988, |
| "learning_rate": 2.5105082847125184e-05, |
| "loss": 0.009, |
| "step": 10240 |
| }, |
| { |
| "epoch": 20.833333333333332, |
| "grad_norm": 0.13535667955875397, |
| "learning_rate": 2.5009546919913218e-05, |
| "loss": 0.0097, |
| "step": 10250 |
| }, |
| { |
| "epoch": 20.853658536585368, |
| "grad_norm": 0.11622301489114761, |
| "learning_rate": 2.4914132455692098e-05, |
| "loss": 0.0105, |
| "step": 10260 |
| }, |
| { |
| "epoch": 20.8739837398374, |
| "grad_norm": 0.12948839366436005, |
| "learning_rate": 2.4818839918211962e-05, |
| "loss": 0.009, |
| "step": 10270 |
| }, |
| { |
| "epoch": 20.89430894308943, |
| "grad_norm": 0.15754957497119904, |
| "learning_rate": 2.4723669770630376e-05, |
| "loss": 0.0143, |
| "step": 10280 |
| }, |
| { |
| "epoch": 20.914634146341463, |
| "grad_norm": 0.14372849464416504, |
| "learning_rate": 2.4628622475509972e-05, |
| "loss": 0.012, |
| "step": 10290 |
| }, |
| { |
| "epoch": 20.934959349593495, |
| "grad_norm": 0.1215457022190094, |
| "learning_rate": 2.4533698494816342e-05, |
| "loss": 0.0095, |
| "step": 10300 |
| }, |
| { |
| "epoch": 20.95528455284553, |
| "grad_norm": 0.08143052458763123, |
| "learning_rate": 2.44388982899157e-05, |
| "loss": 0.0102, |
| "step": 10310 |
| }, |
| { |
| "epoch": 20.975609756097562, |
| "grad_norm": 0.08094421029090881, |
| "learning_rate": 2.4344222321572636e-05, |
| "loss": 0.0157, |
| "step": 10320 |
| }, |
| { |
| "epoch": 20.995934959349594, |
| "grad_norm": 0.11086557060480118, |
| "learning_rate": 2.4249671049947954e-05, |
| "loss": 0.0096, |
| "step": 10330 |
| }, |
| { |
| "epoch": 21.016260162601625, |
| "grad_norm": 0.09536684304475784, |
| "learning_rate": 2.4155244934596333e-05, |
| "loss": 0.0084, |
| "step": 10340 |
| }, |
| { |
| "epoch": 21.036585365853657, |
| "grad_norm": 0.10263389348983765, |
| "learning_rate": 2.406094443446416e-05, |
| "loss": 0.0096, |
| "step": 10350 |
| }, |
| { |
| "epoch": 21.056910569105693, |
| "grad_norm": 0.12646539509296417, |
| "learning_rate": 2.3966770007887317e-05, |
| "loss": 0.0078, |
| "step": 10360 |
| }, |
| { |
| "epoch": 21.077235772357724, |
| "grad_norm": 0.15264266729354858, |
| "learning_rate": 2.3872722112588903e-05, |
| "loss": 0.0118, |
| "step": 10370 |
| }, |
| { |
| "epoch": 21.097560975609756, |
| "grad_norm": 0.16815584897994995, |
| "learning_rate": 2.3778801205676997e-05, |
| "loss": 0.0106, |
| "step": 10380 |
| }, |
| { |
| "epoch": 21.117886178861788, |
| "grad_norm": 0.1217648908495903, |
| "learning_rate": 2.3685007743642524e-05, |
| "loss": 0.0081, |
| "step": 10390 |
| }, |
| { |
| "epoch": 21.13821138211382, |
| "grad_norm": 0.11896563321352005, |
| "learning_rate": 2.3591342182356914e-05, |
| "loss": 0.0125, |
| "step": 10400 |
| }, |
| { |
| "epoch": 21.158536585365855, |
| "grad_norm": 0.11437961459159851, |
| "learning_rate": 2.3497804977070016e-05, |
| "loss": 0.0102, |
| "step": 10410 |
| }, |
| { |
| "epoch": 21.178861788617887, |
| "grad_norm": 0.10344325751066208, |
| "learning_rate": 2.3404396582407777e-05, |
| "loss": 0.0094, |
| "step": 10420 |
| }, |
| { |
| "epoch": 21.19918699186992, |
| "grad_norm": 0.1586674153804779, |
| "learning_rate": 2.331111745237007e-05, |
| "loss": 0.0105, |
| "step": 10430 |
| }, |
| { |
| "epoch": 21.21951219512195, |
| "grad_norm": 0.15335488319396973, |
| "learning_rate": 2.3217968040328526e-05, |
| "loss": 0.0126, |
| "step": 10440 |
| }, |
| { |
| "epoch": 21.239837398373982, |
| "grad_norm": 0.16755497455596924, |
| "learning_rate": 2.3124948799024286e-05, |
| "loss": 0.0125, |
| "step": 10450 |
| }, |
| { |
| "epoch": 21.260162601626018, |
| "grad_norm": 0.1016865074634552, |
| "learning_rate": 2.3032060180565828e-05, |
| "loss": 0.0094, |
| "step": 10460 |
| }, |
| { |
| "epoch": 21.28048780487805, |
| "grad_norm": 0.13073071837425232, |
| "learning_rate": 2.2939302636426724e-05, |
| "loss": 0.0105, |
| "step": 10470 |
| }, |
| { |
| "epoch": 21.30081300813008, |
| "grad_norm": 0.13936924934387207, |
| "learning_rate": 2.2846676617443458e-05, |
| "loss": 0.0096, |
| "step": 10480 |
| }, |
| { |
| "epoch": 21.321138211382113, |
| "grad_norm": 0.1346622258424759, |
| "learning_rate": 2.275418257381332e-05, |
| "loss": 0.0102, |
| "step": 10490 |
| }, |
| { |
| "epoch": 21.341463414634145, |
| "grad_norm": 0.13846242427825928, |
| "learning_rate": 2.2661820955092083e-05, |
| "loss": 0.009, |
| "step": 10500 |
| }, |
| { |
| "epoch": 21.36178861788618, |
| "grad_norm": 0.10490627586841583, |
| "learning_rate": 2.256959221019193e-05, |
| "loss": 0.0136, |
| "step": 10510 |
| }, |
| { |
| "epoch": 21.382113821138212, |
| "grad_norm": 0.159734308719635, |
| "learning_rate": 2.2477496787379227e-05, |
| "loss": 0.0127, |
| "step": 10520 |
| }, |
| { |
| "epoch": 21.402439024390244, |
| "grad_norm": 0.10712343454360962, |
| "learning_rate": 2.238553513427229e-05, |
| "loss": 0.0116, |
| "step": 10530 |
| }, |
| { |
| "epoch": 21.422764227642276, |
| "grad_norm": 0.1423291116952896, |
| "learning_rate": 2.2293707697839344e-05, |
| "loss": 0.0104, |
| "step": 10540 |
| }, |
| { |
| "epoch": 21.443089430894307, |
| "grad_norm": 0.11537821590900421, |
| "learning_rate": 2.2202014924396214e-05, |
| "loss": 0.0088, |
| "step": 10550 |
| }, |
| { |
| "epoch": 21.463414634146343, |
| "grad_norm": 0.10486509650945663, |
| "learning_rate": 2.21104572596042e-05, |
| "loss": 0.0091, |
| "step": 10560 |
| }, |
| { |
| "epoch": 21.483739837398375, |
| "grad_norm": 0.0840308740735054, |
| "learning_rate": 2.2019035148468e-05, |
| "loss": 0.0097, |
| "step": 10570 |
| }, |
| { |
| "epoch": 21.504065040650406, |
| "grad_norm": 0.1811951994895935, |
| "learning_rate": 2.1927749035333374e-05, |
| "loss": 0.0095, |
| "step": 10580 |
| }, |
| { |
| "epoch": 21.524390243902438, |
| "grad_norm": 0.13960686326026917, |
| "learning_rate": 2.1836599363885152e-05, |
| "loss": 0.0093, |
| "step": 10590 |
| }, |
| { |
| "epoch": 21.54471544715447, |
| "grad_norm": 0.09911748021841049, |
| "learning_rate": 2.1745586577144993e-05, |
| "loss": 0.0109, |
| "step": 10600 |
| }, |
| { |
| "epoch": 21.565040650406505, |
| "grad_norm": 0.1075323298573494, |
| "learning_rate": 2.1654711117469207e-05, |
| "loss": 0.0092, |
| "step": 10610 |
| }, |
| { |
| "epoch": 21.585365853658537, |
| "grad_norm": 0.08886104822158813, |
| "learning_rate": 2.1563973426546702e-05, |
| "loss": 0.0092, |
| "step": 10620 |
| }, |
| { |
| "epoch": 21.60569105691057, |
| "grad_norm": 0.09648662060499191, |
| "learning_rate": 2.1473373945396728e-05, |
| "loss": 0.0096, |
| "step": 10630 |
| }, |
| { |
| "epoch": 21.6260162601626, |
| "grad_norm": 0.1114833727478981, |
| "learning_rate": 2.138291311436679e-05, |
| "loss": 0.0086, |
| "step": 10640 |
| }, |
| { |
| "epoch": 21.646341463414632, |
| "grad_norm": 0.07100456207990646, |
| "learning_rate": 2.1292591373130518e-05, |
| "loss": 0.0082, |
| "step": 10650 |
| }, |
| { |
| "epoch": 21.666666666666668, |
| "grad_norm": 0.08392198383808136, |
| "learning_rate": 2.1202409160685528e-05, |
| "loss": 0.0113, |
| "step": 10660 |
| }, |
| { |
| "epoch": 21.6869918699187, |
| "grad_norm": 0.12464640289545059, |
| "learning_rate": 2.1112366915351228e-05, |
| "loss": 0.0098, |
| "step": 10670 |
| }, |
| { |
| "epoch": 21.70731707317073, |
| "grad_norm": 0.0857594907283783, |
| "learning_rate": 2.102246507476679e-05, |
| "loss": 0.0112, |
| "step": 10680 |
| }, |
| { |
| "epoch": 21.727642276422763, |
| "grad_norm": 0.12353429198265076, |
| "learning_rate": 2.09327040758889e-05, |
| "loss": 0.0096, |
| "step": 10690 |
| }, |
| { |
| "epoch": 21.747967479674795, |
| "grad_norm": 0.14459079504013062, |
| "learning_rate": 2.0843084354989767e-05, |
| "loss": 0.0084, |
| "step": 10700 |
| }, |
| { |
| "epoch": 21.76829268292683, |
| "grad_norm": 0.2441745102405548, |
| "learning_rate": 2.0753606347654892e-05, |
| "loss": 0.0137, |
| "step": 10710 |
| }, |
| { |
| "epoch": 21.788617886178862, |
| "grad_norm": 0.09381800144910812, |
| "learning_rate": 2.0664270488780985e-05, |
| "loss": 0.0074, |
| "step": 10720 |
| }, |
| { |
| "epoch": 21.808943089430894, |
| "grad_norm": 0.085075244307518, |
| "learning_rate": 2.0575077212573905e-05, |
| "loss": 0.0081, |
| "step": 10730 |
| }, |
| { |
| "epoch": 21.829268292682926, |
| "grad_norm": 0.1668519526720047, |
| "learning_rate": 2.0486026952546484e-05, |
| "loss": 0.0116, |
| "step": 10740 |
| }, |
| { |
| "epoch": 21.84959349593496, |
| "grad_norm": 0.10299837589263916, |
| "learning_rate": 2.0397120141516457e-05, |
| "loss": 0.0094, |
| "step": 10750 |
| }, |
| { |
| "epoch": 21.869918699186993, |
| "grad_norm": 0.09836846590042114, |
| "learning_rate": 2.0308357211604313e-05, |
| "loss": 0.0097, |
| "step": 10760 |
| }, |
| { |
| "epoch": 21.890243902439025, |
| "grad_norm": 0.10448900610208511, |
| "learning_rate": 2.0219738594231224e-05, |
| "loss": 0.0092, |
| "step": 10770 |
| }, |
| { |
| "epoch": 21.910569105691057, |
| "grad_norm": 0.0666520744562149, |
| "learning_rate": 2.0131264720116993e-05, |
| "loss": 0.0079, |
| "step": 10780 |
| }, |
| { |
| "epoch": 21.93089430894309, |
| "grad_norm": 0.08662780374288559, |
| "learning_rate": 2.0042936019277853e-05, |
| "loss": 0.0117, |
| "step": 10790 |
| }, |
| { |
| "epoch": 21.951219512195124, |
| "grad_norm": 0.11221914738416672, |
| "learning_rate": 1.99547529210245e-05, |
| "loss": 0.0088, |
| "step": 10800 |
| }, |
| { |
| "epoch": 21.971544715447155, |
| "grad_norm": 0.08087283372879028, |
| "learning_rate": 1.9866715853959934e-05, |
| "loss": 0.0097, |
| "step": 10810 |
| }, |
| { |
| "epoch": 21.991869918699187, |
| "grad_norm": 0.12656255066394806, |
| "learning_rate": 1.977882524597734e-05, |
| "loss": 0.0131, |
| "step": 10820 |
| }, |
| { |
| "epoch": 22.01219512195122, |
| "grad_norm": 0.12034684419631958, |
| "learning_rate": 1.969108152425813e-05, |
| "loss": 0.0102, |
| "step": 10830 |
| }, |
| { |
| "epoch": 22.03252032520325, |
| "grad_norm": 0.08809592574834824, |
| "learning_rate": 1.9603485115269744e-05, |
| "loss": 0.0113, |
| "step": 10840 |
| }, |
| { |
| "epoch": 22.052845528455286, |
| "grad_norm": 0.07460938394069672, |
| "learning_rate": 1.9516036444763613e-05, |
| "loss": 0.0102, |
| "step": 10850 |
| }, |
| { |
| "epoch": 22.073170731707318, |
| "grad_norm": 0.08779294788837433, |
| "learning_rate": 1.9428735937773173e-05, |
| "loss": 0.0109, |
| "step": 10860 |
| }, |
| { |
| "epoch": 22.09349593495935, |
| "grad_norm": 0.09611085057258606, |
| "learning_rate": 1.9341584018611646e-05, |
| "loss": 0.0084, |
| "step": 10870 |
| }, |
| { |
| "epoch": 22.11382113821138, |
| "grad_norm": 0.1048993468284607, |
| "learning_rate": 1.9254581110870123e-05, |
| "loss": 0.009, |
| "step": 10880 |
| }, |
| { |
| "epoch": 22.134146341463413, |
| "grad_norm": 0.11739665269851685, |
| "learning_rate": 1.916772763741544e-05, |
| "loss": 0.0095, |
| "step": 10890 |
| }, |
| { |
| "epoch": 22.15447154471545, |
| "grad_norm": 0.1125992089509964, |
| "learning_rate": 1.908102402038807e-05, |
| "loss": 0.0095, |
| "step": 10900 |
| }, |
| { |
| "epoch": 22.17479674796748, |
| "grad_norm": 0.10314033180475235, |
| "learning_rate": 1.8994470681200204e-05, |
| "loss": 0.0086, |
| "step": 10910 |
| }, |
| { |
| "epoch": 22.195121951219512, |
| "grad_norm": 0.07227867096662521, |
| "learning_rate": 1.8908068040533578e-05, |
| "loss": 0.0075, |
| "step": 10920 |
| }, |
| { |
| "epoch": 22.215447154471544, |
| "grad_norm": 0.0778256207704544, |
| "learning_rate": 1.8821816518337455e-05, |
| "loss": 0.0113, |
| "step": 10930 |
| }, |
| { |
| "epoch": 22.235772357723576, |
| "grad_norm": 0.11512289941310883, |
| "learning_rate": 1.8735716533826663e-05, |
| "loss": 0.0071, |
| "step": 10940 |
| }, |
| { |
| "epoch": 22.25609756097561, |
| "grad_norm": 0.1081552803516388, |
| "learning_rate": 1.8649768505479476e-05, |
| "loss": 0.0089, |
| "step": 10950 |
| }, |
| { |
| "epoch": 22.276422764227643, |
| "grad_norm": 0.12834273278713226, |
| "learning_rate": 1.8563972851035616e-05, |
| "loss": 0.0082, |
| "step": 10960 |
| }, |
| { |
| "epoch": 22.296747967479675, |
| "grad_norm": 0.11064130812883377, |
| "learning_rate": 1.847832998749418e-05, |
| "loss": 0.0081, |
| "step": 10970 |
| }, |
| { |
| "epoch": 22.317073170731707, |
| "grad_norm": 0.11904542148113251, |
| "learning_rate": 1.8392840331111644e-05, |
| "loss": 0.0084, |
| "step": 10980 |
| }, |
| { |
| "epoch": 22.33739837398374, |
| "grad_norm": 0.08828697353601456, |
| "learning_rate": 1.830750429739989e-05, |
| "loss": 0.0107, |
| "step": 10990 |
| }, |
| { |
| "epoch": 22.357723577235774, |
| "grad_norm": 0.13146309554576874, |
| "learning_rate": 1.822232230112409e-05, |
| "loss": 0.0135, |
| "step": 11000 |
| }, |
| { |
| "epoch": 22.378048780487806, |
| "grad_norm": 0.12254566699266434, |
| "learning_rate": 1.813729475630071e-05, |
| "loss": 0.0078, |
| "step": 11010 |
| }, |
| { |
| "epoch": 22.398373983739837, |
| "grad_norm": 0.10865466296672821, |
| "learning_rate": 1.8052422076195635e-05, |
| "loss": 0.0084, |
| "step": 11020 |
| }, |
| { |
| "epoch": 22.41869918699187, |
| "grad_norm": 0.1604381948709488, |
| "learning_rate": 1.7967704673321918e-05, |
| "loss": 0.0127, |
| "step": 11030 |
| }, |
| { |
| "epoch": 22.4390243902439, |
| "grad_norm": 0.12828870117664337, |
| "learning_rate": 1.7883142959438004e-05, |
| "loss": 0.0071, |
| "step": 11040 |
| }, |
| { |
| "epoch": 22.459349593495936, |
| "grad_norm": 0.1228393092751503, |
| "learning_rate": 1.779873734554558e-05, |
| "loss": 0.0101, |
| "step": 11050 |
| }, |
| { |
| "epoch": 22.479674796747968, |
| "grad_norm": 0.07193803787231445, |
| "learning_rate": 1.771448824188761e-05, |
| "loss": 0.0073, |
| "step": 11060 |
| }, |
| { |
| "epoch": 22.5, |
| "grad_norm": 0.10907436907291412, |
| "learning_rate": 1.763039605794644e-05, |
| "loss": 0.0092, |
| "step": 11070 |
| }, |
| { |
| "epoch": 22.520325203252032, |
| "grad_norm": 0.10717453062534332, |
| "learning_rate": 1.754646120244164e-05, |
| "loss": 0.0097, |
| "step": 11080 |
| }, |
| { |
| "epoch": 22.540650406504064, |
| "grad_norm": 0.09861317276954651, |
| "learning_rate": 1.7462684083328144e-05, |
| "loss": 0.0093, |
| "step": 11090 |
| }, |
| { |
| "epoch": 22.5609756097561, |
| "grad_norm": 0.13518258929252625, |
| "learning_rate": 1.7379065107794262e-05, |
| "loss": 0.0106, |
| "step": 11100 |
| }, |
| { |
| "epoch": 22.58130081300813, |
| "grad_norm": 0.08812890946865082, |
| "learning_rate": 1.7295604682259586e-05, |
| "loss": 0.0128, |
| "step": 11110 |
| }, |
| { |
| "epoch": 22.601626016260163, |
| "grad_norm": 0.11440128833055496, |
| "learning_rate": 1.7212303212373175e-05, |
| "loss": 0.0091, |
| "step": 11120 |
| }, |
| { |
| "epoch": 22.621951219512194, |
| "grad_norm": 0.11984211951494217, |
| "learning_rate": 1.712916110301146e-05, |
| "loss": 0.0098, |
| "step": 11130 |
| }, |
| { |
| "epoch": 22.642276422764226, |
| "grad_norm": 0.11255427449941635, |
| "learning_rate": 1.7046178758276298e-05, |
| "loss": 0.0142, |
| "step": 11140 |
| }, |
| { |
| "epoch": 22.66260162601626, |
| "grad_norm": 0.1485426425933838, |
| "learning_rate": 1.696335658149309e-05, |
| "loss": 0.0153, |
| "step": 11150 |
| }, |
| { |
| "epoch": 22.682926829268293, |
| "grad_norm": 0.1576770395040512, |
| "learning_rate": 1.6880694975208727e-05, |
| "loss": 0.0115, |
| "step": 11160 |
| }, |
| { |
| "epoch": 22.703252032520325, |
| "grad_norm": 0.07304835319519043, |
| "learning_rate": 1.6798194341189687e-05, |
| "loss": 0.0096, |
| "step": 11170 |
| }, |
| { |
| "epoch": 22.723577235772357, |
| "grad_norm": 0.0978945642709732, |
| "learning_rate": 1.671585508042003e-05, |
| "loss": 0.0087, |
| "step": 11180 |
| }, |
| { |
| "epoch": 22.74390243902439, |
| "grad_norm": 0.11587736010551453, |
| "learning_rate": 1.6633677593099483e-05, |
| "loss": 0.0142, |
| "step": 11190 |
| }, |
| { |
| "epoch": 22.764227642276424, |
| "grad_norm": 0.08535769581794739, |
| "learning_rate": 1.655166227864154e-05, |
| "loss": 0.0121, |
| "step": 11200 |
| }, |
| { |
| "epoch": 22.784552845528456, |
| "grad_norm": 0.07661209255456924, |
| "learning_rate": 1.6469809535671426e-05, |
| "loss": 0.0136, |
| "step": 11210 |
| }, |
| { |
| "epoch": 22.804878048780488, |
| "grad_norm": 0.10255829989910126, |
| "learning_rate": 1.638811976202421e-05, |
| "loss": 0.0094, |
| "step": 11220 |
| }, |
| { |
| "epoch": 22.82520325203252, |
| "grad_norm": 0.11505745351314545, |
| "learning_rate": 1.6306593354742895e-05, |
| "loss": 0.0131, |
| "step": 11230 |
| }, |
| { |
| "epoch": 22.84552845528455, |
| "grad_norm": 0.07341789454221725, |
| "learning_rate": 1.6225230710076455e-05, |
| "loss": 0.008, |
| "step": 11240 |
| }, |
| { |
| "epoch": 22.865853658536587, |
| "grad_norm": 0.11690469831228256, |
| "learning_rate": 1.6144032223477924e-05, |
| "loss": 0.0125, |
| "step": 11250 |
| }, |
| { |
| "epoch": 22.88617886178862, |
| "grad_norm": 0.1037013903260231, |
| "learning_rate": 1.606299828960243e-05, |
| "loss": 0.008, |
| "step": 11260 |
| }, |
| { |
| "epoch": 22.90650406504065, |
| "grad_norm": 0.06866220384836197, |
| "learning_rate": 1.5982129302305337e-05, |
| "loss": 0.008, |
| "step": 11270 |
| }, |
| { |
| "epoch": 22.926829268292682, |
| "grad_norm": 0.06975048035383224, |
| "learning_rate": 1.590142565464032e-05, |
| "loss": 0.0085, |
| "step": 11280 |
| }, |
| { |
| "epoch": 22.947154471544714, |
| "grad_norm": 0.13513271510601044, |
| "learning_rate": 1.5820887738857408e-05, |
| "loss": 0.0086, |
| "step": 11290 |
| }, |
| { |
| "epoch": 22.96747967479675, |
| "grad_norm": 0.08723549544811249, |
| "learning_rate": 1.5740515946401134e-05, |
| "loss": 0.0102, |
| "step": 11300 |
| }, |
| { |
| "epoch": 22.98780487804878, |
| "grad_norm": 0.09078790247440338, |
| "learning_rate": 1.5660310667908634e-05, |
| "loss": 0.0093, |
| "step": 11310 |
| }, |
| { |
| "epoch": 23.008130081300813, |
| "grad_norm": 0.10816732048988342, |
| "learning_rate": 1.5580272293207655e-05, |
| "loss": 0.009, |
| "step": 11320 |
| }, |
| { |
| "epoch": 23.028455284552845, |
| "grad_norm": 0.09973511844873428, |
| "learning_rate": 1.5500401211314796e-05, |
| "loss": 0.0122, |
| "step": 11330 |
| }, |
| { |
| "epoch": 23.048780487804876, |
| "grad_norm": 0.13049659132957458, |
| "learning_rate": 1.542069781043351e-05, |
| "loss": 0.0129, |
| "step": 11340 |
| }, |
| { |
| "epoch": 23.06910569105691, |
| "grad_norm": 0.11349806189537048, |
| "learning_rate": 1.534116247795226e-05, |
| "loss": 0.0102, |
| "step": 11350 |
| }, |
| { |
| "epoch": 23.089430894308943, |
| "grad_norm": 0.09644364565610886, |
| "learning_rate": 1.526179560044267e-05, |
| "loss": 0.0073, |
| "step": 11360 |
| }, |
| { |
| "epoch": 23.109756097560975, |
| "grad_norm": 0.0995858833193779, |
| "learning_rate": 1.5182597563657552e-05, |
| "loss": 0.0122, |
| "step": 11370 |
| }, |
| { |
| "epoch": 23.130081300813007, |
| "grad_norm": 0.07347644865512848, |
| "learning_rate": 1.5103568752529135e-05, |
| "loss": 0.0081, |
| "step": 11380 |
| }, |
| { |
| "epoch": 23.150406504065042, |
| "grad_norm": 0.07966236770153046, |
| "learning_rate": 1.5024709551167142e-05, |
| "loss": 0.0072, |
| "step": 11390 |
| }, |
| { |
| "epoch": 23.170731707317074, |
| "grad_norm": 0.08882099390029907, |
| "learning_rate": 1.4946020342856898e-05, |
| "loss": 0.0063, |
| "step": 11400 |
| }, |
| { |
| "epoch": 23.191056910569106, |
| "grad_norm": 0.11498501151800156, |
| "learning_rate": 1.4867501510057546e-05, |
| "loss": 0.0089, |
| "step": 11410 |
| }, |
| { |
| "epoch": 23.211382113821138, |
| "grad_norm": 0.09351756423711777, |
| "learning_rate": 1.4789153434400094e-05, |
| "loss": 0.0122, |
| "step": 11420 |
| }, |
| { |
| "epoch": 23.23170731707317, |
| "grad_norm": 0.13294996321201324, |
| "learning_rate": 1.4710976496685614e-05, |
| "loss": 0.0097, |
| "step": 11430 |
| }, |
| { |
| "epoch": 23.252032520325205, |
| "grad_norm": 0.11491288244724274, |
| "learning_rate": 1.4632971076883406e-05, |
| "loss": 0.0084, |
| "step": 11440 |
| }, |
| { |
| "epoch": 23.272357723577237, |
| "grad_norm": 0.08280199766159058, |
| "learning_rate": 1.4555137554129117e-05, |
| "loss": 0.0108, |
| "step": 11450 |
| }, |
| { |
| "epoch": 23.29268292682927, |
| "grad_norm": 0.13369178771972656, |
| "learning_rate": 1.4477476306722925e-05, |
| "loss": 0.0121, |
| "step": 11460 |
| }, |
| { |
| "epoch": 23.3130081300813, |
| "grad_norm": 0.06917362660169601, |
| "learning_rate": 1.439998771212766e-05, |
| "loss": 0.0101, |
| "step": 11470 |
| }, |
| { |
| "epoch": 23.333333333333332, |
| "grad_norm": 0.06669972836971283, |
| "learning_rate": 1.4322672146966982e-05, |
| "loss": 0.009, |
| "step": 11480 |
| }, |
| { |
| "epoch": 23.353658536585368, |
| "grad_norm": 0.10002262890338898, |
| "learning_rate": 1.4245529987023621e-05, |
| "loss": 0.0068, |
| "step": 11490 |
| }, |
| { |
| "epoch": 23.3739837398374, |
| "grad_norm": 0.09954614192247391, |
| "learning_rate": 1.4168561607237436e-05, |
| "loss": 0.0094, |
| "step": 11500 |
| }, |
| { |
| "epoch": 23.39430894308943, |
| "grad_norm": 0.11286766827106476, |
| "learning_rate": 1.4091767381703657e-05, |
| "loss": 0.0078, |
| "step": 11510 |
| }, |
| { |
| "epoch": 23.414634146341463, |
| "grad_norm": 0.06297627091407776, |
| "learning_rate": 1.4015147683671087e-05, |
| "loss": 0.0112, |
| "step": 11520 |
| }, |
| { |
| "epoch": 23.434959349593495, |
| "grad_norm": 0.09290836751461029, |
| "learning_rate": 1.3938702885540239e-05, |
| "loss": 0.0101, |
| "step": 11530 |
| }, |
| { |
| "epoch": 23.45528455284553, |
| "grad_norm": 0.09421167522668839, |
| "learning_rate": 1.3862433358861576e-05, |
| "loss": 0.008, |
| "step": 11540 |
| }, |
| { |
| "epoch": 23.475609756097562, |
| "grad_norm": 0.10376714915037155, |
| "learning_rate": 1.3786339474333636e-05, |
| "loss": 0.0102, |
| "step": 11550 |
| }, |
| { |
| "epoch": 23.495934959349594, |
| "grad_norm": 0.11214353144168854, |
| "learning_rate": 1.3710421601801265e-05, |
| "loss": 0.0089, |
| "step": 11560 |
| }, |
| { |
| "epoch": 23.516260162601625, |
| "grad_norm": 0.05647756904363632, |
| "learning_rate": 1.3634680110253883e-05, |
| "loss": 0.0085, |
| "step": 11570 |
| }, |
| { |
| "epoch": 23.536585365853657, |
| "grad_norm": 0.08150362223386765, |
| "learning_rate": 1.3559115367823556e-05, |
| "loss": 0.0075, |
| "step": 11580 |
| }, |
| { |
| "epoch": 23.556910569105693, |
| "grad_norm": 0.07136379182338715, |
| "learning_rate": 1.3483727741783342e-05, |
| "loss": 0.0102, |
| "step": 11590 |
| }, |
| { |
| "epoch": 23.577235772357724, |
| "grad_norm": 0.11355695873498917, |
| "learning_rate": 1.3408517598545444e-05, |
| "loss": 0.0083, |
| "step": 11600 |
| }, |
| { |
| "epoch": 23.597560975609756, |
| "grad_norm": 0.0908714309334755, |
| "learning_rate": 1.3333485303659381e-05, |
| "loss": 0.0091, |
| "step": 11610 |
| }, |
| { |
| "epoch": 23.617886178861788, |
| "grad_norm": 0.10554931312799454, |
| "learning_rate": 1.3258631221810331e-05, |
| "loss": 0.0116, |
| "step": 11620 |
| }, |
| { |
| "epoch": 23.63821138211382, |
| "grad_norm": 0.10275840759277344, |
| "learning_rate": 1.3183955716817232e-05, |
| "loss": 0.009, |
| "step": 11630 |
| }, |
| { |
| "epoch": 23.658536585365855, |
| "grad_norm": 0.09203638881444931, |
| "learning_rate": 1.3109459151631076e-05, |
| "loss": 0.0106, |
| "step": 11640 |
| }, |
| { |
| "epoch": 23.678861788617887, |
| "grad_norm": 0.08367224037647247, |
| "learning_rate": 1.3035141888333202e-05, |
| "loss": 0.0081, |
| "step": 11650 |
| }, |
| { |
| "epoch": 23.69918699186992, |
| "grad_norm": 0.09575346112251282, |
| "learning_rate": 1.2961004288133388e-05, |
| "loss": 0.0069, |
| "step": 11660 |
| }, |
| { |
| "epoch": 23.71951219512195, |
| "grad_norm": 0.134243905544281, |
| "learning_rate": 1.2887046711368245e-05, |
| "loss": 0.0091, |
| "step": 11670 |
| }, |
| { |
| "epoch": 23.739837398373982, |
| "grad_norm": 0.111112080514431, |
| "learning_rate": 1.2813269517499399e-05, |
| "loss": 0.0076, |
| "step": 11680 |
| }, |
| { |
| "epoch": 23.760162601626018, |
| "grad_norm": 0.12154891341924667, |
| "learning_rate": 1.273967306511169e-05, |
| "loss": 0.0089, |
| "step": 11690 |
| }, |
| { |
| "epoch": 23.78048780487805, |
| "grad_norm": 0.15236243605613708, |
| "learning_rate": 1.2666257711911566e-05, |
| "loss": 0.0094, |
| "step": 11700 |
| }, |
| { |
| "epoch": 23.80081300813008, |
| "grad_norm": 0.0687737986445427, |
| "learning_rate": 1.2593023814725214e-05, |
| "loss": 0.0127, |
| "step": 11710 |
| }, |
| { |
| "epoch": 23.821138211382113, |
| "grad_norm": 0.10326932370662689, |
| "learning_rate": 1.251997172949686e-05, |
| "loss": 0.007, |
| "step": 11720 |
| }, |
| { |
| "epoch": 23.841463414634145, |
| "grad_norm": 0.1114373728632927, |
| "learning_rate": 1.2447101811287109e-05, |
| "loss": 0.0091, |
| "step": 11730 |
| }, |
| { |
| "epoch": 23.86178861788618, |
| "grad_norm": 0.08497825264930725, |
| "learning_rate": 1.237441441427114e-05, |
| "loss": 0.0082, |
| "step": 11740 |
| }, |
| { |
| "epoch": 23.882113821138212, |
| "grad_norm": 0.10188353061676025, |
| "learning_rate": 1.2301909891737018e-05, |
| "loss": 0.0067, |
| "step": 11750 |
| }, |
| { |
| "epoch": 23.902439024390244, |
| "grad_norm": 0.12289886176586151, |
| "learning_rate": 1.2229588596083957e-05, |
| "loss": 0.0089, |
| "step": 11760 |
| }, |
| { |
| "epoch": 23.922764227642276, |
| "grad_norm": 0.11247570812702179, |
| "learning_rate": 1.2157450878820608e-05, |
| "loss": 0.0064, |
| "step": 11770 |
| }, |
| { |
| "epoch": 23.943089430894307, |
| "grad_norm": 0.11704237014055252, |
| "learning_rate": 1.2085497090563407e-05, |
| "loss": 0.0066, |
| "step": 11780 |
| }, |
| { |
| "epoch": 23.963414634146343, |
| "grad_norm": 0.055815473198890686, |
| "learning_rate": 1.2013727581034783e-05, |
| "loss": 0.0084, |
| "step": 11790 |
| }, |
| { |
| "epoch": 23.983739837398375, |
| "grad_norm": 0.07490572333335876, |
| "learning_rate": 1.1942142699061498e-05, |
| "loss": 0.0075, |
| "step": 11800 |
| }, |
| { |
| "epoch": 24.004065040650406, |
| "grad_norm": 0.10884711891412735, |
| "learning_rate": 1.1870742792572992e-05, |
| "loss": 0.0079, |
| "step": 11810 |
| }, |
| { |
| "epoch": 24.024390243902438, |
| "grad_norm": 0.0868370532989502, |
| "learning_rate": 1.1799528208599637e-05, |
| "loss": 0.0114, |
| "step": 11820 |
| }, |
| { |
| "epoch": 24.04471544715447, |
| "grad_norm": 0.09366288781166077, |
| "learning_rate": 1.1728499293271079e-05, |
| "loss": 0.0092, |
| "step": 11830 |
| }, |
| { |
| "epoch": 24.065040650406505, |
| "grad_norm": 0.10993952304124832, |
| "learning_rate": 1.1657656391814509e-05, |
| "loss": 0.0071, |
| "step": 11840 |
| }, |
| { |
| "epoch": 24.085365853658537, |
| "grad_norm": 0.06420900672674179, |
| "learning_rate": 1.1586999848553043e-05, |
| "loss": 0.0108, |
| "step": 11850 |
| }, |
| { |
| "epoch": 24.10569105691057, |
| "grad_norm": 0.10133112967014313, |
| "learning_rate": 1.1516530006904053e-05, |
| "loss": 0.012, |
| "step": 11860 |
| }, |
| { |
| "epoch": 24.1260162601626, |
| "grad_norm": 0.15871313214302063, |
| "learning_rate": 1.1446247209377403e-05, |
| "loss": 0.0099, |
| "step": 11870 |
| }, |
| { |
| "epoch": 24.146341463414632, |
| "grad_norm": 0.12604136765003204, |
| "learning_rate": 1.1376151797573925e-05, |
| "loss": 0.0062, |
| "step": 11880 |
| }, |
| { |
| "epoch": 24.166666666666668, |
| "grad_norm": 0.13042505085468292, |
| "learning_rate": 1.1306244112183662e-05, |
| "loss": 0.0112, |
| "step": 11890 |
| }, |
| { |
| "epoch": 24.1869918699187, |
| "grad_norm": 0.1565147340297699, |
| "learning_rate": 1.1236524492984203e-05, |
| "loss": 0.0113, |
| "step": 11900 |
| }, |
| { |
| "epoch": 24.20731707317073, |
| "grad_norm": 0.09882476925849915, |
| "learning_rate": 1.116699327883911e-05, |
| "loss": 0.0099, |
| "step": 11910 |
| }, |
| { |
| "epoch": 24.227642276422763, |
| "grad_norm": 0.1394849717617035, |
| "learning_rate": 1.1097650807696209e-05, |
| "loss": 0.0073, |
| "step": 11920 |
| }, |
| { |
| "epoch": 24.247967479674795, |
| "grad_norm": 0.07821723818778992, |
| "learning_rate": 1.1028497416585931e-05, |
| "loss": 0.006, |
| "step": 11930 |
| }, |
| { |
| "epoch": 24.26829268292683, |
| "grad_norm": 0.06742383539676666, |
| "learning_rate": 1.0959533441619762e-05, |
| "loss": 0.009, |
| "step": 11940 |
| }, |
| { |
| "epoch": 24.288617886178862, |
| "grad_norm": 0.06799096614122391, |
| "learning_rate": 1.0890759217988527e-05, |
| "loss": 0.007, |
| "step": 11950 |
| }, |
| { |
| "epoch": 24.308943089430894, |
| "grad_norm": 0.08466274291276932, |
| "learning_rate": 1.0822175079960806e-05, |
| "loss": 0.0078, |
| "step": 11960 |
| }, |
| { |
| "epoch": 24.329268292682926, |
| "grad_norm": 0.07107431441545486, |
| "learning_rate": 1.0753781360881265e-05, |
| "loss": 0.0111, |
| "step": 11970 |
| }, |
| { |
| "epoch": 24.34959349593496, |
| "grad_norm": 0.09816905856132507, |
| "learning_rate": 1.0685578393169055e-05, |
| "loss": 0.0075, |
| "step": 11980 |
| }, |
| { |
| "epoch": 24.369918699186993, |
| "grad_norm": 0.100242480635643, |
| "learning_rate": 1.061756650831625e-05, |
| "loss": 0.0081, |
| "step": 11990 |
| }, |
| { |
| "epoch": 24.390243902439025, |
| "grad_norm": 0.07489926367998123, |
| "learning_rate": 1.054974603688616e-05, |
| "loss": 0.007, |
| "step": 12000 |
| }, |
| { |
| "epoch": 24.410569105691057, |
| "grad_norm": 0.10150320082902908, |
| "learning_rate": 1.048211730851173e-05, |
| "loss": 0.0087, |
| "step": 12010 |
| }, |
| { |
| "epoch": 24.43089430894309, |
| "grad_norm": 0.11625931411981583, |
| "learning_rate": 1.0414680651894004e-05, |
| "loss": 0.0111, |
| "step": 12020 |
| }, |
| { |
| "epoch": 24.451219512195124, |
| "grad_norm": 0.11474552005529404, |
| "learning_rate": 1.034743639480047e-05, |
| "loss": 0.0093, |
| "step": 12030 |
| }, |
| { |
| "epoch": 24.471544715447155, |
| "grad_norm": 0.0563468337059021, |
| "learning_rate": 1.0280384864063497e-05, |
| "loss": 0.0061, |
| "step": 12040 |
| }, |
| { |
| "epoch": 24.491869918699187, |
| "grad_norm": 0.08342014253139496, |
| "learning_rate": 1.0213526385578704e-05, |
| "loss": 0.0085, |
| "step": 12050 |
| }, |
| { |
| "epoch": 24.51219512195122, |
| "grad_norm": 0.1682083010673523, |
| "learning_rate": 1.0146861284303394e-05, |
| "loss": 0.0103, |
| "step": 12060 |
| }, |
| { |
| "epoch": 24.53252032520325, |
| "grad_norm": 0.11061355471611023, |
| "learning_rate": 1.0080389884255037e-05, |
| "loss": 0.0071, |
| "step": 12070 |
| }, |
| { |
| "epoch": 24.552845528455286, |
| "grad_norm": 0.11372610926628113, |
| "learning_rate": 1.0014112508509588e-05, |
| "loss": 0.0068, |
| "step": 12080 |
| }, |
| { |
| "epoch": 24.573170731707318, |
| "grad_norm": 0.06454111635684967, |
| "learning_rate": 9.948029479199994e-06, |
| "loss": 0.0068, |
| "step": 12090 |
| }, |
| { |
| "epoch": 24.59349593495935, |
| "grad_norm": 0.1143406480550766, |
| "learning_rate": 9.882141117514632e-06, |
| "loss": 0.0092, |
| "step": 12100 |
| }, |
| { |
| "epoch": 24.61382113821138, |
| "grad_norm": 0.09725458174943924, |
| "learning_rate": 9.816447743695656e-06, |
| "loss": 0.0064, |
| "step": 12110 |
| }, |
| { |
| "epoch": 24.634146341463413, |
| "grad_norm": 0.10150446742773056, |
| "learning_rate": 9.75094967703758e-06, |
| "loss": 0.0082, |
| "step": 12120 |
| }, |
| { |
| "epoch": 24.65447154471545, |
| "grad_norm": 0.07894540578126907, |
| "learning_rate": 9.685647235885597e-06, |
| "loss": 0.0077, |
| "step": 12130 |
| }, |
| { |
| "epoch": 24.67479674796748, |
| "grad_norm": 0.07765673100948334, |
| "learning_rate": 9.620540737634087e-06, |
| "loss": 0.0082, |
| "step": 12140 |
| }, |
| { |
| "epoch": 24.695121951219512, |
| "grad_norm": 0.1181579977273941, |
| "learning_rate": 9.555630498725133e-06, |
| "loss": 0.009, |
| "step": 12150 |
| }, |
| { |
| "epoch": 24.715447154471544, |
| "grad_norm": 0.09069491177797318, |
| "learning_rate": 9.49091683464684e-06, |
| "loss": 0.0123, |
| "step": 12160 |
| }, |
| { |
| "epoch": 24.735772357723576, |
| "grad_norm": 0.12418906390666962, |
| "learning_rate": 9.426400059931955e-06, |
| "loss": 0.0072, |
| "step": 12170 |
| }, |
| { |
| "epoch": 24.75609756097561, |
| "grad_norm": 0.09817170351743698, |
| "learning_rate": 9.362080488156245e-06, |
| "loss": 0.0101, |
| "step": 12180 |
| }, |
| { |
| "epoch": 24.776422764227643, |
| "grad_norm": 0.15471002459526062, |
| "learning_rate": 9.29795843193697e-06, |
| "loss": 0.0098, |
| "step": 12190 |
| }, |
| { |
| "epoch": 24.796747967479675, |
| "grad_norm": 0.10904843360185623, |
| "learning_rate": 9.234034202931447e-06, |
| "loss": 0.0052, |
| "step": 12200 |
| }, |
| { |
| "epoch": 24.817073170731707, |
| "grad_norm": 0.1243189200758934, |
| "learning_rate": 9.170308111835418e-06, |
| "loss": 0.0103, |
| "step": 12210 |
| }, |
| { |
| "epoch": 24.83739837398374, |
| "grad_norm": 0.18022632598876953, |
| "learning_rate": 9.106780468381631e-06, |
| "loss": 0.0085, |
| "step": 12220 |
| }, |
| { |
| "epoch": 24.857723577235774, |
| "grad_norm": 0.12732474505901337, |
| "learning_rate": 9.043451581338302e-06, |
| "loss": 0.0107, |
| "step": 12230 |
| }, |
| { |
| "epoch": 24.878048780487806, |
| "grad_norm": 0.09132570028305054, |
| "learning_rate": 8.980321758507615e-06, |
| "loss": 0.0067, |
| "step": 12240 |
| }, |
| { |
| "epoch": 24.898373983739837, |
| "grad_norm": 0.08355541527271271, |
| "learning_rate": 8.91739130672425e-06, |
| "loss": 0.0084, |
| "step": 12250 |
| }, |
| { |
| "epoch": 24.91869918699187, |
| "grad_norm": 0.11257751286029816, |
| "learning_rate": 8.85466053185382e-06, |
| "loss": 0.0082, |
| "step": 12260 |
| }, |
| { |
| "epoch": 24.9390243902439, |
| "grad_norm": 0.09904129058122635, |
| "learning_rate": 8.792129738791455e-06, |
| "loss": 0.0087, |
| "step": 12270 |
| }, |
| { |
| "epoch": 24.959349593495936, |
| "grad_norm": 0.13384602963924408, |
| "learning_rate": 8.729799231460318e-06, |
| "loss": 0.008, |
| "step": 12280 |
| }, |
| { |
| "epoch": 24.979674796747968, |
| "grad_norm": 0.0904664397239685, |
| "learning_rate": 8.66766931281009e-06, |
| "loss": 0.0072, |
| "step": 12290 |
| }, |
| { |
| "epoch": 25.0, |
| "grad_norm": 0.09332982450723648, |
| "learning_rate": 8.6057402848155e-06, |
| "loss": 0.0071, |
| "step": 12300 |
| }, |
| { |
| "epoch": 25.020325203252032, |
| "grad_norm": 0.10007297992706299, |
| "learning_rate": 8.544012448474904e-06, |
| "loss": 0.008, |
| "step": 12310 |
| }, |
| { |
| "epoch": 25.040650406504064, |
| "grad_norm": 0.08375833183526993, |
| "learning_rate": 8.482486103808779e-06, |
| "loss": 0.0075, |
| "step": 12320 |
| }, |
| { |
| "epoch": 25.0609756097561, |
| "grad_norm": 0.08920438587665558, |
| "learning_rate": 8.42116154985828e-06, |
| "loss": 0.0067, |
| "step": 12330 |
| }, |
| { |
| "epoch": 25.08130081300813, |
| "grad_norm": 0.08444127440452576, |
| "learning_rate": 8.360039084683779e-06, |
| "loss": 0.0071, |
| "step": 12340 |
| }, |
| { |
| "epoch": 25.101626016260163, |
| "grad_norm": 0.09630052745342255, |
| "learning_rate": 8.299119005363404e-06, |
| "loss": 0.0071, |
| "step": 12350 |
| }, |
| { |
| "epoch": 25.121951219512194, |
| "grad_norm": 0.06598120927810669, |
| "learning_rate": 8.238401607991647e-06, |
| "loss": 0.0066, |
| "step": 12360 |
| }, |
| { |
| "epoch": 25.142276422764226, |
| "grad_norm": 0.0607173927128315, |
| "learning_rate": 8.177887187677847e-06, |
| "loss": 0.0058, |
| "step": 12370 |
| }, |
| { |
| "epoch": 25.16260162601626, |
| "grad_norm": 0.09175746887922287, |
| "learning_rate": 8.117576038544838e-06, |
| "loss": 0.0104, |
| "step": 12380 |
| }, |
| { |
| "epoch": 25.182926829268293, |
| "grad_norm": 0.07160919904708862, |
| "learning_rate": 8.057468453727479e-06, |
| "loss": 0.0052, |
| "step": 12390 |
| }, |
| { |
| "epoch": 25.203252032520325, |
| "grad_norm": 0.08971722424030304, |
| "learning_rate": 7.997564725371182e-06, |
| "loss": 0.0089, |
| "step": 12400 |
| }, |
| { |
| "epoch": 25.223577235772357, |
| "grad_norm": 0.15288154780864716, |
| "learning_rate": 7.937865144630601e-06, |
| "loss": 0.0095, |
| "step": 12410 |
| }, |
| { |
| "epoch": 25.24390243902439, |
| "grad_norm": 0.0708981305360794, |
| "learning_rate": 7.878370001668116e-06, |
| "loss": 0.0091, |
| "step": 12420 |
| }, |
| { |
| "epoch": 25.264227642276424, |
| "grad_norm": 0.11280938237905502, |
| "learning_rate": 7.819079585652461e-06, |
| "loss": 0.0083, |
| "step": 12430 |
| }, |
| { |
| "epoch": 25.284552845528456, |
| "grad_norm": 0.0850948765873909, |
| "learning_rate": 7.759994184757358e-06, |
| "loss": 0.0073, |
| "step": 12440 |
| }, |
| { |
| "epoch": 25.304878048780488, |
| "grad_norm": 0.0783085823059082, |
| "learning_rate": 7.701114086160027e-06, |
| "loss": 0.0062, |
| "step": 12450 |
| }, |
| { |
| "epoch": 25.32520325203252, |
| "grad_norm": 0.12934930622577667, |
| "learning_rate": 7.642439576039884e-06, |
| "loss": 0.0102, |
| "step": 12460 |
| }, |
| { |
| "epoch": 25.34552845528455, |
| "grad_norm": 0.11071004718542099, |
| "learning_rate": 7.583970939577101e-06, |
| "loss": 0.0074, |
| "step": 12470 |
| }, |
| { |
| "epoch": 25.365853658536587, |
| "grad_norm": 0.11486160755157471, |
| "learning_rate": 7.525708460951197e-06, |
| "loss": 0.009, |
| "step": 12480 |
| }, |
| { |
| "epoch": 25.38617886178862, |
| "grad_norm": 0.10318905860185623, |
| "learning_rate": 7.467652423339733e-06, |
| "loss": 0.0079, |
| "step": 12490 |
| }, |
| { |
| "epoch": 25.40650406504065, |
| "grad_norm": 0.13283970952033997, |
| "learning_rate": 7.409803108916841e-06, |
| "loss": 0.0072, |
| "step": 12500 |
| }, |
| { |
| "epoch": 25.426829268292682, |
| "grad_norm": 0.09182985872030258, |
| "learning_rate": 7.35216079885192e-06, |
| "loss": 0.0062, |
| "step": 12510 |
| }, |
| { |
| "epoch": 25.447154471544714, |
| "grad_norm": 0.12830130755901337, |
| "learning_rate": 7.29472577330827e-06, |
| "loss": 0.0078, |
| "step": 12520 |
| }, |
| { |
| "epoch": 25.46747967479675, |
| "grad_norm": 0.06168400123715401, |
| "learning_rate": 7.237498311441676e-06, |
| "loss": 0.007, |
| "step": 12530 |
| }, |
| { |
| "epoch": 25.48780487804878, |
| "grad_norm": 0.10279642045497894, |
| "learning_rate": 7.180478691399134e-06, |
| "loss": 0.0064, |
| "step": 12540 |
| }, |
| { |
| "epoch": 25.508130081300813, |
| "grad_norm": 0.138838529586792, |
| "learning_rate": 7.123667190317396e-06, |
| "loss": 0.0092, |
| "step": 12550 |
| }, |
| { |
| "epoch": 25.528455284552845, |
| "grad_norm": 0.07453944534063339, |
| "learning_rate": 7.06706408432169e-06, |
| "loss": 0.0055, |
| "step": 12560 |
| }, |
| { |
| "epoch": 25.548780487804876, |
| "grad_norm": 0.14314241707324982, |
| "learning_rate": 7.010669648524404e-06, |
| "loss": 0.0094, |
| "step": 12570 |
| }, |
| { |
| "epoch": 25.56910569105691, |
| "grad_norm": 0.12263060361146927, |
| "learning_rate": 6.954484157023661e-06, |
| "loss": 0.007, |
| "step": 12580 |
| }, |
| { |
| "epoch": 25.589430894308943, |
| "grad_norm": 0.047071073204278946, |
| "learning_rate": 6.898507882902078e-06, |
| "loss": 0.0057, |
| "step": 12590 |
| }, |
| { |
| "epoch": 25.609756097560975, |
| "grad_norm": 0.12850365042686462, |
| "learning_rate": 6.842741098225358e-06, |
| "loss": 0.0095, |
| "step": 12600 |
| }, |
| { |
| "epoch": 25.630081300813007, |
| "grad_norm": 0.06848274171352386, |
| "learning_rate": 6.787184074041031e-06, |
| "loss": 0.0072, |
| "step": 12610 |
| }, |
| { |
| "epoch": 25.65040650406504, |
| "grad_norm": 0.06645460426807404, |
| "learning_rate": 6.731837080377129e-06, |
| "loss": 0.0064, |
| "step": 12620 |
| }, |
| { |
| "epoch": 25.670731707317074, |
| "grad_norm": 0.08997969329357147, |
| "learning_rate": 6.676700386240814e-06, |
| "loss": 0.0094, |
| "step": 12630 |
| }, |
| { |
| "epoch": 25.691056910569106, |
| "grad_norm": 0.06717319041490555, |
| "learning_rate": 6.621774259617125e-06, |
| "loss": 0.006, |
| "step": 12640 |
| }, |
| { |
| "epoch": 25.711382113821138, |
| "grad_norm": 0.08344772458076477, |
| "learning_rate": 6.567058967467704e-06, |
| "loss": 0.0064, |
| "step": 12650 |
| }, |
| { |
| "epoch": 25.73170731707317, |
| "grad_norm": 0.11553539335727692, |
| "learning_rate": 6.51255477572939e-06, |
| "loss": 0.0083, |
| "step": 12660 |
| }, |
| { |
| "epoch": 25.752032520325205, |
| "grad_norm": 0.12962360680103302, |
| "learning_rate": 6.45826194931306e-06, |
| "loss": 0.0079, |
| "step": 12670 |
| }, |
| { |
| "epoch": 25.772357723577237, |
| "grad_norm": 0.10802538692951202, |
| "learning_rate": 6.4041807521022454e-06, |
| "loss": 0.006, |
| "step": 12680 |
| }, |
| { |
| "epoch": 25.79268292682927, |
| "grad_norm": 0.08937724679708481, |
| "learning_rate": 6.350311446951868e-06, |
| "loss": 0.01, |
| "step": 12690 |
| }, |
| { |
| "epoch": 25.8130081300813, |
| "grad_norm": 0.09280110895633698, |
| "learning_rate": 6.29665429568701e-06, |
| "loss": 0.0091, |
| "step": 12700 |
| }, |
| { |
| "epoch": 25.833333333333332, |
| "grad_norm": 0.055351488292217255, |
| "learning_rate": 6.2432095591015705e-06, |
| "loss": 0.0058, |
| "step": 12710 |
| }, |
| { |
| "epoch": 25.853658536585368, |
| "grad_norm": 0.09830985963344574, |
| "learning_rate": 6.1899774969570444e-06, |
| "loss": 0.0083, |
| "step": 12720 |
| }, |
| { |
| "epoch": 25.8739837398374, |
| "grad_norm": 0.06478270888328552, |
| "learning_rate": 6.136958367981272e-06, |
| "loss": 0.0061, |
| "step": 12730 |
| }, |
| { |
| "epoch": 25.89430894308943, |
| "grad_norm": 0.05733104050159454, |
| "learning_rate": 6.084152429867113e-06, |
| "loss": 0.009, |
| "step": 12740 |
| }, |
| { |
| "epoch": 25.914634146341463, |
| "grad_norm": 0.08511028438806534, |
| "learning_rate": 6.0315599392712865e-06, |
| "loss": 0.0062, |
| "step": 12750 |
| }, |
| { |
| "epoch": 25.934959349593495, |
| "grad_norm": 0.09071079641580582, |
| "learning_rate": 5.979181151813057e-06, |
| "loss": 0.0068, |
| "step": 12760 |
| }, |
| { |
| "epoch": 25.95528455284553, |
| "grad_norm": 0.06143470108509064, |
| "learning_rate": 5.927016322072992e-06, |
| "loss": 0.0058, |
| "step": 12770 |
| }, |
| { |
| "epoch": 25.975609756097562, |
| "grad_norm": 0.06430324912071228, |
| "learning_rate": 5.875065703591787e-06, |
| "loss": 0.0113, |
| "step": 12780 |
| }, |
| { |
| "epoch": 25.995934959349594, |
| "grad_norm": 0.10490266978740692, |
| "learning_rate": 5.823329548868939e-06, |
| "loss": 0.0108, |
| "step": 12790 |
| }, |
| { |
| "epoch": 26.016260162601625, |
| "grad_norm": 0.05621851235628128, |
| "learning_rate": 5.77180810936162e-06, |
| "loss": 0.0091, |
| "step": 12800 |
| }, |
| { |
| "epoch": 26.036585365853657, |
| "grad_norm": 0.10055341571569443, |
| "learning_rate": 5.720501635483366e-06, |
| "loss": 0.0096, |
| "step": 12810 |
| }, |
| { |
| "epoch": 26.056910569105693, |
| "grad_norm": 0.06854842603206635, |
| "learning_rate": 5.669410376602918e-06, |
| "loss": 0.0085, |
| "step": 12820 |
| }, |
| { |
| "epoch": 26.077235772357724, |
| "grad_norm": 0.13524769246578217, |
| "learning_rate": 5.618534581043011e-06, |
| "loss": 0.0077, |
| "step": 12830 |
| }, |
| { |
| "epoch": 26.097560975609756, |
| "grad_norm": 0.09193708002567291, |
| "learning_rate": 5.5678744960791005e-06, |
| "loss": 0.0073, |
| "step": 12840 |
| }, |
| { |
| "epoch": 26.117886178861788, |
| "grad_norm": 0.08699709177017212, |
| "learning_rate": 5.517430367938237e-06, |
| "loss": 0.0051, |
| "step": 12850 |
| }, |
| { |
| "epoch": 26.13821138211382, |
| "grad_norm": 0.11282419413328171, |
| "learning_rate": 5.467202441797842e-06, |
| "loss": 0.0065, |
| "step": 12860 |
| }, |
| { |
| "epoch": 26.158536585365855, |
| "grad_norm": 0.053357355296611786, |
| "learning_rate": 5.417190961784497e-06, |
| "loss": 0.008, |
| "step": 12870 |
| }, |
| { |
| "epoch": 26.178861788617887, |
| "grad_norm": 0.125547856092453, |
| "learning_rate": 5.3673961709727885e-06, |
| "loss": 0.0077, |
| "step": 12880 |
| }, |
| { |
| "epoch": 26.19918699186992, |
| "grad_norm": 0.076795294880867, |
| "learning_rate": 5.317818311384115e-06, |
| "loss": 0.0069, |
| "step": 12890 |
| }, |
| { |
| "epoch": 26.21951219512195, |
| "grad_norm": 0.05759301036596298, |
| "learning_rate": 5.2684576239854895e-06, |
| "loss": 0.0071, |
| "step": 12900 |
| }, |
| { |
| "epoch": 26.239837398373982, |
| "grad_norm": 0.08413347601890564, |
| "learning_rate": 5.219314348688414e-06, |
| "loss": 0.0073, |
| "step": 12910 |
| }, |
| { |
| "epoch": 26.260162601626018, |
| "grad_norm": 0.10227837413549423, |
| "learning_rate": 5.170388724347658e-06, |
| "loss": 0.0087, |
| "step": 12920 |
| }, |
| { |
| "epoch": 26.28048780487805, |
| "grad_norm": 0.08494935929775238, |
| "learning_rate": 5.1216809887601245e-06, |
| "loss": 0.0066, |
| "step": 12930 |
| }, |
| { |
| "epoch": 26.30081300813008, |
| "grad_norm": 0.10262715071439743, |
| "learning_rate": 5.073191378663733e-06, |
| "loss": 0.0046, |
| "step": 12940 |
| }, |
| { |
| "epoch": 26.321138211382113, |
| "grad_norm": 0.09187835454940796, |
| "learning_rate": 5.024920129736188e-06, |
| "loss": 0.0063, |
| "step": 12950 |
| }, |
| { |
| "epoch": 26.341463414634145, |
| "grad_norm": 0.1465609073638916, |
| "learning_rate": 4.976867476593894e-06, |
| "loss": 0.008, |
| "step": 12960 |
| }, |
| { |
| "epoch": 26.36178861788618, |
| "grad_norm": 0.08398236334323883, |
| "learning_rate": 4.929033652790821e-06, |
| "loss": 0.0084, |
| "step": 12970 |
| }, |
| { |
| "epoch": 26.382113821138212, |
| "grad_norm": 0.08007702231407166, |
| "learning_rate": 4.881418890817296e-06, |
| "loss": 0.0093, |
| "step": 12980 |
| }, |
| { |
| "epoch": 26.402439024390244, |
| "grad_norm": 0.13101662695407867, |
| "learning_rate": 4.834023422098971e-06, |
| "loss": 0.0075, |
| "step": 12990 |
| }, |
| { |
| "epoch": 26.422764227642276, |
| "grad_norm": 0.07166078686714172, |
| "learning_rate": 4.7868474769956266e-06, |
| "loss": 0.0094, |
| "step": 13000 |
| }, |
| { |
| "epoch": 26.443089430894307, |
| "grad_norm": 0.0517122708261013, |
| "learning_rate": 4.7398912848000636e-06, |
| "loss": 0.0098, |
| "step": 13010 |
| }, |
| { |
| "epoch": 26.463414634146343, |
| "grad_norm": 0.06409566849470139, |
| "learning_rate": 4.6931550737370264e-06, |
| "loss": 0.0082, |
| "step": 13020 |
| }, |
| { |
| "epoch": 26.483739837398375, |
| "grad_norm": 0.07286877185106277, |
| "learning_rate": 4.646639070962067e-06, |
| "loss": 0.0072, |
| "step": 13030 |
| }, |
| { |
| "epoch": 26.504065040650406, |
| "grad_norm": 0.05960990488529205, |
| "learning_rate": 4.600343502560439e-06, |
| "loss": 0.0064, |
| "step": 13040 |
| }, |
| { |
| "epoch": 26.524390243902438, |
| "grad_norm": 0.07247988879680634, |
| "learning_rate": 4.55426859354599e-06, |
| "loss": 0.0084, |
| "step": 13050 |
| }, |
| { |
| "epoch": 26.54471544715447, |
| "grad_norm": 0.06973347067832947, |
| "learning_rate": 4.5084145678600805e-06, |
| "loss": 0.0081, |
| "step": 13060 |
| }, |
| { |
| "epoch": 26.565040650406505, |
| "grad_norm": 0.1406828910112381, |
| "learning_rate": 4.462781648370518e-06, |
| "loss": 0.0055, |
| "step": 13070 |
| }, |
| { |
| "epoch": 26.585365853658537, |
| "grad_norm": 0.08069416135549545, |
| "learning_rate": 4.417370056870418e-06, |
| "loss": 0.006, |
| "step": 13080 |
| }, |
| { |
| "epoch": 26.60569105691057, |
| "grad_norm": 0.10259624570608139, |
| "learning_rate": 4.372180014077193e-06, |
| "loss": 0.0091, |
| "step": 13090 |
| }, |
| { |
| "epoch": 26.6260162601626, |
| "grad_norm": 0.08780638128519058, |
| "learning_rate": 4.327211739631415e-06, |
| "loss": 0.0067, |
| "step": 13100 |
| }, |
| { |
| "epoch": 26.646341463414632, |
| "grad_norm": 0.0880453810095787, |
| "learning_rate": 4.282465452095802e-06, |
| "loss": 0.0071, |
| "step": 13110 |
| }, |
| { |
| "epoch": 26.666666666666668, |
| "grad_norm": 0.09189548343420029, |
| "learning_rate": 4.237941368954124e-06, |
| "loss": 0.0094, |
| "step": 13120 |
| }, |
| { |
| "epoch": 26.6869918699187, |
| "grad_norm": 0.05421117693185806, |
| "learning_rate": 4.193639706610147e-06, |
| "loss": 0.0092, |
| "step": 13130 |
| }, |
| { |
| "epoch": 26.70731707317073, |
| "grad_norm": 0.10190868377685547, |
| "learning_rate": 4.149560680386588e-06, |
| "loss": 0.0074, |
| "step": 13140 |
| }, |
| { |
| "epoch": 26.727642276422763, |
| "grad_norm": 0.11536475270986557, |
| "learning_rate": 4.105704504524094e-06, |
| "loss": 0.0068, |
| "step": 13150 |
| }, |
| { |
| "epoch": 26.747967479674795, |
| "grad_norm": 0.08702490478754044, |
| "learning_rate": 4.0620713921801334e-06, |
| "loss": 0.0047, |
| "step": 13160 |
| }, |
| { |
| "epoch": 26.76829268292683, |
| "grad_norm": 0.0775069072842598, |
| "learning_rate": 4.0186615554280385e-06, |
| "loss": 0.0084, |
| "step": 13170 |
| }, |
| { |
| "epoch": 26.788617886178862, |
| "grad_norm": 0.06977381557226181, |
| "learning_rate": 3.975475205255929e-06, |
| "loss": 0.0067, |
| "step": 13180 |
| }, |
| { |
| "epoch": 26.808943089430894, |
| "grad_norm": 0.05994332954287529, |
| "learning_rate": 3.932512551565676e-06, |
| "loss": 0.0051, |
| "step": 13190 |
| }, |
| { |
| "epoch": 26.829268292682926, |
| "grad_norm": 0.06960119307041168, |
| "learning_rate": 3.889773803171936e-06, |
| "loss": 0.0105, |
| "step": 13200 |
| }, |
| { |
| "epoch": 26.84959349593496, |
| "grad_norm": 0.05765342339873314, |
| "learning_rate": 3.847259167801076e-06, |
| "loss": 0.0066, |
| "step": 13210 |
| }, |
| { |
| "epoch": 26.869918699186993, |
| "grad_norm": 0.11678333580493927, |
| "learning_rate": 3.804968852090185e-06, |
| "loss": 0.0074, |
| "step": 13220 |
| }, |
| { |
| "epoch": 26.890243902439025, |
| "grad_norm": 0.06083038076758385, |
| "learning_rate": 3.762903061586104e-06, |
| "loss": 0.0072, |
| "step": 13230 |
| }, |
| { |
| "epoch": 26.910569105691057, |
| "grad_norm": 0.04766000062227249, |
| "learning_rate": 3.721062000744363e-06, |
| "loss": 0.0098, |
| "step": 13240 |
| }, |
| { |
| "epoch": 26.93089430894309, |
| "grad_norm": 0.06560375541448593, |
| "learning_rate": 3.679445872928244e-06, |
| "loss": 0.0094, |
| "step": 13250 |
| }, |
| { |
| "epoch": 26.951219512195124, |
| "grad_norm": 0.08765765279531479, |
| "learning_rate": 3.6380548804077707e-06, |
| "loss": 0.0093, |
| "step": 13260 |
| }, |
| { |
| "epoch": 26.971544715447155, |
| "grad_norm": 0.12069438397884369, |
| "learning_rate": 3.5968892243587016e-06, |
| "loss": 0.0091, |
| "step": 13270 |
| }, |
| { |
| "epoch": 26.991869918699187, |
| "grad_norm": 0.07621457427740097, |
| "learning_rate": 3.555949104861611e-06, |
| "loss": 0.0077, |
| "step": 13280 |
| }, |
| { |
| "epoch": 27.01219512195122, |
| "grad_norm": 0.1240905299782753, |
| "learning_rate": 3.5152347209008394e-06, |
| "loss": 0.0087, |
| "step": 13290 |
| }, |
| { |
| "epoch": 27.03252032520325, |
| "grad_norm": 0.167019784450531, |
| "learning_rate": 3.4747462703636104e-06, |
| "loss": 0.0093, |
| "step": 13300 |
| }, |
| { |
| "epoch": 27.052845528455286, |
| "grad_norm": 0.06322633475065231, |
| "learning_rate": 3.434483950038986e-06, |
| "loss": 0.006, |
| "step": 13310 |
| }, |
| { |
| "epoch": 27.073170731707318, |
| "grad_norm": 0.05990873649716377, |
| "learning_rate": 3.3944479556169806e-06, |
| "loss": 0.0076, |
| "step": 13320 |
| }, |
| { |
| "epoch": 27.09349593495935, |
| "grad_norm": 0.057164546102285385, |
| "learning_rate": 3.3546384816875665e-06, |
| "loss": 0.0078, |
| "step": 13330 |
| }, |
| { |
| "epoch": 27.11382113821138, |
| "grad_norm": 0.08408369868993759, |
| "learning_rate": 3.315055721739746e-06, |
| "loss": 0.007, |
| "step": 13340 |
| }, |
| { |
| "epoch": 27.134146341463413, |
| "grad_norm": 0.05896256864070892, |
| "learning_rate": 3.275699868160592e-06, |
| "loss": 0.005, |
| "step": 13350 |
| }, |
| { |
| "epoch": 27.15447154471545, |
| "grad_norm": 0.09455864876508713, |
| "learning_rate": 3.23657111223436e-06, |
| "loss": 0.0072, |
| "step": 13360 |
| }, |
| { |
| "epoch": 27.17479674796748, |
| "grad_norm": 0.13212212920188904, |
| "learning_rate": 3.1976696441414764e-06, |
| "loss": 0.0069, |
| "step": 13370 |
| }, |
| { |
| "epoch": 27.195121951219512, |
| "grad_norm": 0.04661380499601364, |
| "learning_rate": 3.158995652957719e-06, |
| "loss": 0.0072, |
| "step": 13380 |
| }, |
| { |
| "epoch": 27.215447154471544, |
| "grad_norm": 0.06701108068227768, |
| "learning_rate": 3.1205493266531937e-06, |
| "loss": 0.0092, |
| "step": 13390 |
| }, |
| { |
| "epoch": 27.235772357723576, |
| "grad_norm": 0.07119470834732056, |
| "learning_rate": 3.082330852091497e-06, |
| "loss": 0.0062, |
| "step": 13400 |
| }, |
| { |
| "epoch": 27.25609756097561, |
| "grad_norm": 0.11395665258169174, |
| "learning_rate": 3.0443404150287847e-06, |
| "loss": 0.0065, |
| "step": 13410 |
| }, |
| { |
| "epoch": 27.276422764227643, |
| "grad_norm": 0.08765079081058502, |
| "learning_rate": 3.0065782001128475e-06, |
| "loss": 0.0064, |
| "step": 13420 |
| }, |
| { |
| "epoch": 27.296747967479675, |
| "grad_norm": 0.05511057376861572, |
| "learning_rate": 2.9690443908822252e-06, |
| "loss": 0.007, |
| "step": 13430 |
| }, |
| { |
| "epoch": 27.317073170731707, |
| "grad_norm": 0.04316573217511177, |
| "learning_rate": 2.9317391697653518e-06, |
| "loss": 0.0098, |
| "step": 13440 |
| }, |
| { |
| "epoch": 27.33739837398374, |
| "grad_norm": 0.0791875571012497, |
| "learning_rate": 2.8946627180795936e-06, |
| "loss": 0.0054, |
| "step": 13450 |
| }, |
| { |
| "epoch": 27.357723577235774, |
| "grad_norm": 0.07902947813272476, |
| "learning_rate": 2.8578152160304573e-06, |
| "loss": 0.0079, |
| "step": 13460 |
| }, |
| { |
| "epoch": 27.378048780487806, |
| "grad_norm": 0.06220165267586708, |
| "learning_rate": 2.821196842710638e-06, |
| "loss": 0.0086, |
| "step": 13470 |
| }, |
| { |
| "epoch": 27.398373983739837, |
| "grad_norm": 0.09437467157840729, |
| "learning_rate": 2.7848077760991853e-06, |
| "loss": 0.0061, |
| "step": 13480 |
| }, |
| { |
| "epoch": 27.41869918699187, |
| "grad_norm": 0.06286446005105972, |
| "learning_rate": 2.7486481930606434e-06, |
| "loss": 0.0127, |
| "step": 13490 |
| }, |
| { |
| "epoch": 27.4390243902439, |
| "grad_norm": 0.05507444962859154, |
| "learning_rate": 2.712718269344161e-06, |
| "loss": 0.0099, |
| "step": 13500 |
| }, |
| { |
| "epoch": 27.459349593495936, |
| "grad_norm": 0.09279751032590866, |
| "learning_rate": 2.677018179582669e-06, |
| "loss": 0.0116, |
| "step": 13510 |
| }, |
| { |
| "epoch": 27.479674796747968, |
| "grad_norm": 0.06787155568599701, |
| "learning_rate": 2.641548097292024e-06, |
| "loss": 0.005, |
| "step": 13520 |
| }, |
| { |
| "epoch": 27.5, |
| "grad_norm": 0.061754241585731506, |
| "learning_rate": 2.606308194870133e-06, |
| "loss": 0.007, |
| "step": 13530 |
| }, |
| { |
| "epoch": 27.520325203252032, |
| "grad_norm": 0.10742611438035965, |
| "learning_rate": 2.5712986435961707e-06, |
| "loss": 0.0124, |
| "step": 13540 |
| }, |
| { |
| "epoch": 27.540650406504064, |
| "grad_norm": 0.12080876529216766, |
| "learning_rate": 2.536519613629723e-06, |
| "loss": 0.0086, |
| "step": 13550 |
| }, |
| { |
| "epoch": 27.5609756097561, |
| "grad_norm": 0.10064790397882462, |
| "learning_rate": 2.501971274009923e-06, |
| "loss": 0.0088, |
| "step": 13560 |
| }, |
| { |
| "epoch": 27.58130081300813, |
| "grad_norm": 0.07646507024765015, |
| "learning_rate": 2.467653792654695e-06, |
| "loss": 0.0049, |
| "step": 13570 |
| }, |
| { |
| "epoch": 27.601626016260163, |
| "grad_norm": 0.06533602625131607, |
| "learning_rate": 2.4335673363598822e-06, |
| "loss": 0.0092, |
| "step": 13580 |
| }, |
| { |
| "epoch": 27.621951219512194, |
| "grad_norm": 0.10205673426389694, |
| "learning_rate": 2.399712070798471e-06, |
| "loss": 0.0086, |
| "step": 13590 |
| }, |
| { |
| "epoch": 27.642276422764226, |
| "grad_norm": 0.05475059896707535, |
| "learning_rate": 2.3660881605197694e-06, |
| "loss": 0.0089, |
| "step": 13600 |
| }, |
| { |
| "epoch": 27.66260162601626, |
| "grad_norm": 0.06472466886043549, |
| "learning_rate": 2.332695768948617e-06, |
| "loss": 0.0087, |
| "step": 13610 |
| }, |
| { |
| "epoch": 27.682926829268293, |
| "grad_norm": 0.07751797884702682, |
| "learning_rate": 2.299535058384583e-06, |
| "loss": 0.0069, |
| "step": 13620 |
| }, |
| { |
| "epoch": 27.703252032520325, |
| "grad_norm": 0.06442529708147049, |
| "learning_rate": 2.266606190001186e-06, |
| "loss": 0.0074, |
| "step": 13630 |
| }, |
| { |
| "epoch": 27.723577235772357, |
| "grad_norm": 0.06449927389621735, |
| "learning_rate": 2.2339093238450737e-06, |
| "loss": 0.0085, |
| "step": 13640 |
| }, |
| { |
| "epoch": 27.74390243902439, |
| "grad_norm": 0.08560163527727127, |
| "learning_rate": 2.20144461883533e-06, |
| "loss": 0.0063, |
| "step": 13650 |
| }, |
| { |
| "epoch": 27.764227642276424, |
| "grad_norm": 0.06769910454750061, |
| "learning_rate": 2.1692122327625908e-06, |
| "loss": 0.0096, |
| "step": 13660 |
| }, |
| { |
| "epoch": 27.784552845528456, |
| "grad_norm": 0.03535304218530655, |
| "learning_rate": 2.137212322288379e-06, |
| "loss": 0.0043, |
| "step": 13670 |
| }, |
| { |
| "epoch": 27.804878048780488, |
| "grad_norm": 0.09477891027927399, |
| "learning_rate": 2.105445042944282e-06, |
| "loss": 0.0109, |
| "step": 13680 |
| }, |
| { |
| "epoch": 27.82520325203252, |
| "grad_norm": 0.3004438281059265, |
| "learning_rate": 2.0739105491312027e-06, |
| "loss": 0.0066, |
| "step": 13690 |
| }, |
| { |
| "epoch": 27.84552845528455, |
| "grad_norm": 0.05074850842356682, |
| "learning_rate": 2.0426089941186443e-06, |
| "loss": 0.0064, |
| "step": 13700 |
| }, |
| { |
| "epoch": 27.865853658536587, |
| "grad_norm": 0.06132441386580467, |
| "learning_rate": 2.0115405300439093e-06, |
| "loss": 0.009, |
| "step": 13710 |
| }, |
| { |
| "epoch": 27.88617886178862, |
| "grad_norm": 0.09876362234354019, |
| "learning_rate": 1.9807053079114013e-06, |
| "loss": 0.0057, |
| "step": 13720 |
| }, |
| { |
| "epoch": 27.90650406504065, |
| "grad_norm": 0.13496215641498566, |
| "learning_rate": 1.9501034775919024e-06, |
| "loss": 0.0068, |
| "step": 13730 |
| }, |
| { |
| "epoch": 27.926829268292682, |
| "grad_norm": 0.07970809936523438, |
| "learning_rate": 1.9197351878217917e-06, |
| "loss": 0.0058, |
| "step": 13740 |
| }, |
| { |
| "epoch": 27.947154471544714, |
| "grad_norm": 0.10741535574197769, |
| "learning_rate": 1.8896005862023669e-06, |
| "loss": 0.0093, |
| "step": 13750 |
| }, |
| { |
| "epoch": 27.96747967479675, |
| "grad_norm": 0.09508837759494781, |
| "learning_rate": 1.8596998191991288e-06, |
| "loss": 0.007, |
| "step": 13760 |
| }, |
| { |
| "epoch": 27.98780487804878, |
| "grad_norm": 0.07169695198535919, |
| "learning_rate": 1.8300330321410208e-06, |
| "loss": 0.0052, |
| "step": 13770 |
| }, |
| { |
| "epoch": 28.008130081300813, |
| "grad_norm": 0.07175235450267792, |
| "learning_rate": 1.8006003692197794e-06, |
| "loss": 0.0064, |
| "step": 13780 |
| }, |
| { |
| "epoch": 28.028455284552845, |
| "grad_norm": 0.12354432791471481, |
| "learning_rate": 1.7714019734892062e-06, |
| "loss": 0.0077, |
| "step": 13790 |
| }, |
| { |
| "epoch": 28.048780487804876, |
| "grad_norm": 0.10336112231016159, |
| "learning_rate": 1.7424379868644759e-06, |
| "loss": 0.0082, |
| "step": 13800 |
| }, |
| { |
| "epoch": 28.06910569105691, |
| "grad_norm": 0.08332820981740952, |
| "learning_rate": 1.71370855012144e-06, |
| "loss": 0.0097, |
| "step": 13810 |
| }, |
| { |
| "epoch": 28.089430894308943, |
| "grad_norm": 0.047899335622787476, |
| "learning_rate": 1.6852138028959574e-06, |
| "loss": 0.0055, |
| "step": 13820 |
| }, |
| { |
| "epoch": 28.109756097560975, |
| "grad_norm": 0.13389968872070312, |
| "learning_rate": 1.6569538836832044e-06, |
| "loss": 0.0097, |
| "step": 13830 |
| }, |
| { |
| "epoch": 28.130081300813007, |
| "grad_norm": 0.09461624920368195, |
| "learning_rate": 1.6289289298370147e-06, |
| "loss": 0.0071, |
| "step": 13840 |
| }, |
| { |
| "epoch": 28.150406504065042, |
| "grad_norm": 0.046392664313316345, |
| "learning_rate": 1.6011390775691748e-06, |
| "loss": 0.0062, |
| "step": 13850 |
| }, |
| { |
| "epoch": 28.170731707317074, |
| "grad_norm": 0.09244752675294876, |
| "learning_rate": 1.5735844619488238e-06, |
| "loss": 0.0089, |
| "step": 13860 |
| }, |
| { |
| "epoch": 28.191056910569106, |
| "grad_norm": 0.08714314550161362, |
| "learning_rate": 1.5462652169017322e-06, |
| "loss": 0.0044, |
| "step": 13870 |
| }, |
| { |
| "epoch": 28.211382113821138, |
| "grad_norm": 0.06532461196184158, |
| "learning_rate": 1.5191814752097023e-06, |
| "loss": 0.0065, |
| "step": 13880 |
| }, |
| { |
| "epoch": 28.23170731707317, |
| "grad_norm": 0.07722274214029312, |
| "learning_rate": 1.492333368509896e-06, |
| "loss": 0.0066, |
| "step": 13890 |
| }, |
| { |
| "epoch": 28.252032520325205, |
| "grad_norm": 0.1046539843082428, |
| "learning_rate": 1.4657210272941923e-06, |
| "loss": 0.0084, |
| "step": 13900 |
| }, |
| { |
| "epoch": 28.272357723577237, |
| "grad_norm": 0.05809272080659866, |
| "learning_rate": 1.4393445809085748e-06, |
| "loss": 0.0052, |
| "step": 13910 |
| }, |
| { |
| "epoch": 28.29268292682927, |
| "grad_norm": 0.06860511004924774, |
| "learning_rate": 1.4132041575524834e-06, |
| "loss": 0.0078, |
| "step": 13920 |
| }, |
| { |
| "epoch": 28.3130081300813, |
| "grad_norm": 0.051914114505052567, |
| "learning_rate": 1.387299884278187e-06, |
| "loss": 0.0062, |
| "step": 13930 |
| }, |
| { |
| "epoch": 28.333333333333332, |
| "grad_norm": 0.07846979796886444, |
| "learning_rate": 1.3616318869901945e-06, |
| "loss": 0.0059, |
| "step": 13940 |
| }, |
| { |
| "epoch": 28.353658536585368, |
| "grad_norm": 0.08878767490386963, |
| "learning_rate": 1.336200290444606e-06, |
| "loss": 0.0091, |
| "step": 13950 |
| }, |
| { |
| "epoch": 28.3739837398374, |
| "grad_norm": 0.04697742313146591, |
| "learning_rate": 1.3110052182485454e-06, |
| "loss": 0.0071, |
| "step": 13960 |
| }, |
| { |
| "epoch": 28.39430894308943, |
| "grad_norm": 0.05483054742217064, |
| "learning_rate": 1.2860467928595298e-06, |
| "loss": 0.0048, |
| "step": 13970 |
| }, |
| { |
| "epoch": 28.414634146341463, |
| "grad_norm": 0.04687541350722313, |
| "learning_rate": 1.2613251355848732e-06, |
| "loss": 0.0065, |
| "step": 13980 |
| }, |
| { |
| "epoch": 28.434959349593495, |
| "grad_norm": 0.06474806368350983, |
| "learning_rate": 1.2368403665811324e-06, |
| "loss": 0.0064, |
| "step": 13990 |
| }, |
| { |
| "epoch": 28.45528455284553, |
| "grad_norm": 0.053517088294029236, |
| "learning_rate": 1.2125926048534686e-06, |
| "loss": 0.006, |
| "step": 14000 |
| }, |
| { |
| "epoch": 28.475609756097562, |
| "grad_norm": 0.0952361673116684, |
| "learning_rate": 1.1885819682551259e-06, |
| "loss": 0.0075, |
| "step": 14010 |
| }, |
| { |
| "epoch": 28.495934959349594, |
| "grad_norm": 0.11012918502092361, |
| "learning_rate": 1.164808573486814e-06, |
| "loss": 0.0079, |
| "step": 14020 |
| }, |
| { |
| "epoch": 28.516260162601625, |
| "grad_norm": 0.06327813118696213, |
| "learning_rate": 1.1412725360961608e-06, |
| "loss": 0.0062, |
| "step": 14030 |
| }, |
| { |
| "epoch": 28.536585365853657, |
| "grad_norm": 0.13205966353416443, |
| "learning_rate": 1.1179739704771486e-06, |
| "loss": 0.0064, |
| "step": 14040 |
| }, |
| { |
| "epoch": 28.556910569105693, |
| "grad_norm": 0.06284066289663315, |
| "learning_rate": 1.0949129898695675e-06, |
| "loss": 0.006, |
| "step": 14050 |
| }, |
| { |
| "epoch": 28.577235772357724, |
| "grad_norm": 0.10052363574504852, |
| "learning_rate": 1.0720897063584423e-06, |
| "loss": 0.0066, |
| "step": 14060 |
| }, |
| { |
| "epoch": 28.597560975609756, |
| "grad_norm": 0.09994717687368393, |
| "learning_rate": 1.0495042308735103e-06, |
| "loss": 0.0072, |
| "step": 14070 |
| }, |
| { |
| "epoch": 28.617886178861788, |
| "grad_norm": 0.06892990320920944, |
| "learning_rate": 1.0271566731886617e-06, |
| "loss": 0.0058, |
| "step": 14080 |
| }, |
| { |
| "epoch": 28.63821138211382, |
| "grad_norm": 0.061333317309617996, |
| "learning_rate": 1.005047141921428e-06, |
| "loss": 0.0054, |
| "step": 14090 |
| }, |
| { |
| "epoch": 28.658536585365855, |
| "grad_norm": 0.1620459407567978, |
| "learning_rate": 9.831757445324274e-07, |
| "loss": 0.0084, |
| "step": 14100 |
| }, |
| { |
| "epoch": 28.678861788617887, |
| "grad_norm": 0.09121565520763397, |
| "learning_rate": 9.615425873248761e-07, |
| "loss": 0.0053, |
| "step": 14110 |
| }, |
| { |
| "epoch": 28.69918699186992, |
| "grad_norm": 0.08609441667795181, |
| "learning_rate": 9.401477754440502e-07, |
| "loss": 0.0058, |
| "step": 14120 |
| }, |
| { |
| "epoch": 28.71951219512195, |
| "grad_norm": 0.11729393154382706, |
| "learning_rate": 9.189914128767684e-07, |
| "loss": 0.0086, |
| "step": 14130 |
| }, |
| { |
| "epoch": 28.739837398373982, |
| "grad_norm": 0.063324473798275, |
| "learning_rate": 8.980736024508996e-07, |
| "loss": 0.0097, |
| "step": 14140 |
| }, |
| { |
| "epoch": 28.760162601626018, |
| "grad_norm": 0.07590149343013763, |
| "learning_rate": 8.77394445834867e-07, |
| "loss": 0.006, |
| "step": 14150 |
| }, |
| { |
| "epoch": 28.78048780487805, |
| "grad_norm": 0.09050539880990982, |
| "learning_rate": 8.569540435371281e-07, |
| "loss": 0.0063, |
| "step": 14160 |
| }, |
| { |
| "epoch": 28.80081300813008, |
| "grad_norm": 0.06578807532787323, |
| "learning_rate": 8.367524949057348e-07, |
| "loss": 0.0095, |
| "step": 14170 |
| }, |
| { |
| "epoch": 28.821138211382113, |
| "grad_norm": 0.11855498701334, |
| "learning_rate": 8.167898981277844e-07, |
| "loss": 0.007, |
| "step": 14180 |
| }, |
| { |
| "epoch": 28.841463414634145, |
| "grad_norm": 0.059526070952415466, |
| "learning_rate": 7.970663502290143e-07, |
| "loss": 0.0053, |
| "step": 14190 |
| }, |
| { |
| "epoch": 28.86178861788618, |
| "grad_norm": 0.09736321866512299, |
| "learning_rate": 7.775819470732692e-07, |
| "loss": 0.0053, |
| "step": 14200 |
| }, |
| { |
| "epoch": 28.882113821138212, |
| "grad_norm": 0.13699515163898468, |
| "learning_rate": 7.583367833620681e-07, |
| "loss": 0.0079, |
| "step": 14210 |
| }, |
| { |
| "epoch": 28.902439024390244, |
| "grad_norm": 0.08840407431125641, |
| "learning_rate": 7.39330952634143e-07, |
| "loss": 0.0081, |
| "step": 14220 |
| }, |
| { |
| "epoch": 28.922764227642276, |
| "grad_norm": 0.040457312017679214, |
| "learning_rate": 7.205645472649681e-07, |
| "loss": 0.0074, |
| "step": 14230 |
| }, |
| { |
| "epoch": 28.943089430894307, |
| "grad_norm": 0.1015058159828186, |
| "learning_rate": 7.020376584663202e-07, |
| "loss": 0.0058, |
| "step": 14240 |
| }, |
| { |
| "epoch": 28.963414634146343, |
| "grad_norm": 0.0492464154958725, |
| "learning_rate": 6.83750376285841e-07, |
| "loss": 0.0066, |
| "step": 14250 |
| }, |
| { |
| "epoch": 28.983739837398375, |
| "grad_norm": 0.07046113163232803, |
| "learning_rate": 6.657027896065982e-07, |
| "loss": 0.0075, |
| "step": 14260 |
| }, |
| { |
| "epoch": 29.004065040650406, |
| "grad_norm": 0.08203638345003128, |
| "learning_rate": 6.478949861466355e-07, |
| "loss": 0.0057, |
| "step": 14270 |
| }, |
| { |
| "epoch": 29.024390243902438, |
| "grad_norm": 0.07042611390352249, |
| "learning_rate": 6.303270524585736e-07, |
| "loss": 0.0057, |
| "step": 14280 |
| }, |
| { |
| "epoch": 29.04471544715447, |
| "grad_norm": 0.10965394973754883, |
| "learning_rate": 6.129990739291713e-07, |
| "loss": 0.0043, |
| "step": 14290 |
| }, |
| { |
| "epoch": 29.065040650406505, |
| "grad_norm": 0.04560219496488571, |
| "learning_rate": 5.959111347789093e-07, |
| "loss": 0.0062, |
| "step": 14300 |
| }, |
| { |
| "epoch": 29.085365853658537, |
| "grad_norm": 0.1140056774020195, |
| "learning_rate": 5.790633180615956e-07, |
| "loss": 0.0053, |
| "step": 14310 |
| }, |
| { |
| "epoch": 29.10569105691057, |
| "grad_norm": 0.0836968868970871, |
| "learning_rate": 5.624557056639446e-07, |
| "loss": 0.0094, |
| "step": 14320 |
| }, |
| { |
| "epoch": 29.1260162601626, |
| "grad_norm": 0.06501860171556473, |
| "learning_rate": 5.460883783051984e-07, |
| "loss": 0.0073, |
| "step": 14330 |
| }, |
| { |
| "epoch": 29.146341463414632, |
| "grad_norm": 0.08121626824140549, |
| "learning_rate": 5.299614155367171e-07, |
| "loss": 0.0075, |
| "step": 14340 |
| }, |
| { |
| "epoch": 29.166666666666668, |
| "grad_norm": 0.05819685012102127, |
| "learning_rate": 5.140748957415897e-07, |
| "loss": 0.0061, |
| "step": 14350 |
| }, |
| { |
| "epoch": 29.1869918699187, |
| "grad_norm": 0.1377357393503189, |
| "learning_rate": 4.984288961342787e-07, |
| "loss": 0.0075, |
| "step": 14360 |
| }, |
| { |
| "epoch": 29.20731707317073, |
| "grad_norm": 0.10657922178506851, |
| "learning_rate": 4.830234927602206e-07, |
| "loss": 0.0064, |
| "step": 14370 |
| }, |
| { |
| "epoch": 29.227642276422763, |
| "grad_norm": 0.05665343627333641, |
| "learning_rate": 4.6785876049545986e-07, |
| "loss": 0.0084, |
| "step": 14380 |
| }, |
| { |
| "epoch": 29.247967479674795, |
| "grad_norm": 0.07270264625549316, |
| "learning_rate": 4.5293477304629297e-07, |
| "loss": 0.0063, |
| "step": 14390 |
| }, |
| { |
| "epoch": 29.26829268292683, |
| "grad_norm": 0.050934601575136185, |
| "learning_rate": 4.382516029489081e-07, |
| "loss": 0.0083, |
| "step": 14400 |
| }, |
| { |
| "epoch": 29.288617886178862, |
| "grad_norm": 0.06483236700296402, |
| "learning_rate": 4.2380932156902975e-07, |
| "loss": 0.0083, |
| "step": 14410 |
| }, |
| { |
| "epoch": 29.308943089430894, |
| "grad_norm": 0.07827263325452805, |
| "learning_rate": 4.0960799910156335e-07, |
| "loss": 0.006, |
| "step": 14420 |
| }, |
| { |
| "epoch": 29.329268292682926, |
| "grad_norm": 0.07869955897331238, |
| "learning_rate": 3.956477045702844e-07, |
| "loss": 0.0047, |
| "step": 14430 |
| }, |
| { |
| "epoch": 29.34959349593496, |
| "grad_norm": 0.07201409339904785, |
| "learning_rate": 3.819285058274613e-07, |
| "loss": 0.005, |
| "step": 14440 |
| }, |
| { |
| "epoch": 29.369918699186993, |
| "grad_norm": 0.12948809564113617, |
| "learning_rate": 3.684504695535496e-07, |
| "loss": 0.0115, |
| "step": 14450 |
| }, |
| { |
| "epoch": 29.390243902439025, |
| "grad_norm": 0.06162776052951813, |
| "learning_rate": 3.552136612568813e-07, |
| "loss": 0.0054, |
| "step": 14460 |
| }, |
| { |
| "epoch": 29.410569105691057, |
| "grad_norm": 0.11850345879793167, |
| "learning_rate": 3.422181452733042e-07, |
| "loss": 0.0093, |
| "step": 14470 |
| }, |
| { |
| "epoch": 29.43089430894309, |
| "grad_norm": 0.0406278520822525, |
| "learning_rate": 3.294639847659209e-07, |
| "loss": 0.0106, |
| "step": 14480 |
| }, |
| { |
| "epoch": 29.451219512195124, |
| "grad_norm": 0.1252788007259369, |
| "learning_rate": 3.169512417247389e-07, |
| "loss": 0.0073, |
| "step": 14490 |
| }, |
| { |
| "epoch": 29.471544715447155, |
| "grad_norm": 0.049834635108709335, |
| "learning_rate": 3.046799769663822e-07, |
| "loss": 0.0044, |
| "step": 14500 |
| }, |
| { |
| "epoch": 29.491869918699187, |
| "grad_norm": 0.04352590814232826, |
| "learning_rate": 2.926502501338191e-07, |
| "loss": 0.0058, |
| "step": 14510 |
| }, |
| { |
| "epoch": 29.51219512195122, |
| "grad_norm": 0.08490035682916641, |
| "learning_rate": 2.808621196960404e-07, |
| "loss": 0.0059, |
| "step": 14520 |
| }, |
| { |
| "epoch": 29.53252032520325, |
| "grad_norm": 0.09383466094732285, |
| "learning_rate": 2.6931564294778164e-07, |
| "loss": 0.0082, |
| "step": 14530 |
| }, |
| { |
| "epoch": 29.552845528455286, |
| "grad_norm": 0.06676391512155533, |
| "learning_rate": 2.58010876009257e-07, |
| "loss": 0.0072, |
| "step": 14540 |
| }, |
| { |
| "epoch": 29.573170731707318, |
| "grad_norm": 0.08328359574079514, |
| "learning_rate": 2.4694787382589237e-07, |
| "loss": 0.0074, |
| "step": 14550 |
| }, |
| { |
| "epoch": 29.59349593495935, |
| "grad_norm": 0.08633271604776382, |
| "learning_rate": 2.3612669016802592e-07, |
| "loss": 0.0063, |
| "step": 14560 |
| }, |
| { |
| "epoch": 29.61382113821138, |
| "grad_norm": 0.0577889010310173, |
| "learning_rate": 2.2554737763068045e-07, |
| "loss": 0.0085, |
| "step": 14570 |
| }, |
| { |
| "epoch": 29.634146341463413, |
| "grad_norm": 0.05909036099910736, |
| "learning_rate": 2.152099876332858e-07, |
| "loss": 0.0069, |
| "step": 14580 |
| }, |
| { |
| "epoch": 29.65447154471545, |
| "grad_norm": 0.07131492346525192, |
| "learning_rate": 2.051145704194457e-07, |
| "loss": 0.0063, |
| "step": 14590 |
| }, |
| { |
| "epoch": 29.67479674796748, |
| "grad_norm": 0.04138827696442604, |
| "learning_rate": 1.9526117505667129e-07, |
| "loss": 0.0058, |
| "step": 14600 |
| }, |
| { |
| "epoch": 29.695121951219512, |
| "grad_norm": 0.08429524302482605, |
| "learning_rate": 1.856498494361758e-07, |
| "loss": 0.0076, |
| "step": 14610 |
| }, |
| { |
| "epoch": 29.715447154471544, |
| "grad_norm": 0.0781969353556633, |
| "learning_rate": 1.7628064027260803e-07, |
| "loss": 0.0053, |
| "step": 14620 |
| }, |
| { |
| "epoch": 29.735772357723576, |
| "grad_norm": 0.09103638678789139, |
| "learning_rate": 1.671535931038415e-07, |
| "loss": 0.0115, |
| "step": 14630 |
| }, |
| { |
| "epoch": 29.75609756097561, |
| "grad_norm": 0.09687261283397675, |
| "learning_rate": 1.5826875229076333e-07, |
| "loss": 0.0066, |
| "step": 14640 |
| }, |
| { |
| "epoch": 29.776422764227643, |
| "grad_norm": 0.07173122465610504, |
| "learning_rate": 1.496261610170302e-07, |
| "loss": 0.0068, |
| "step": 14650 |
| }, |
| { |
| "epoch": 29.796747967479675, |
| "grad_norm": 0.08791932463645935, |
| "learning_rate": 1.4122586128888503e-07, |
| "loss": 0.0054, |
| "step": 14660 |
| }, |
| { |
| "epoch": 29.817073170731707, |
| "grad_norm": 0.04476994648575783, |
| "learning_rate": 1.3306789393494612e-07, |
| "loss": 0.0039, |
| "step": 14670 |
| }, |
| { |
| "epoch": 29.83739837398374, |
| "grad_norm": 0.07385668158531189, |
| "learning_rate": 1.2515229860599054e-07, |
| "loss": 0.0064, |
| "step": 14680 |
| }, |
| { |
| "epoch": 29.857723577235774, |
| "grad_norm": 0.06950011104345322, |
| "learning_rate": 1.1747911377478771e-07, |
| "loss": 0.0057, |
| "step": 14690 |
| }, |
| { |
| "epoch": 29.878048780487806, |
| "grad_norm": 0.036131761968135834, |
| "learning_rate": 1.1004837673589952e-07, |
| "loss": 0.0067, |
| "step": 14700 |
| }, |
| { |
| "epoch": 29.898373983739837, |
| "grad_norm": 0.044530197978019714, |
| "learning_rate": 1.0286012360550267e-07, |
| "loss": 0.0149, |
| "step": 14710 |
| }, |
| { |
| "epoch": 29.91869918699187, |
| "grad_norm": 0.0911911204457283, |
| "learning_rate": 9.591438932121111e-08, |
| "loss": 0.0073, |
| "step": 14720 |
| }, |
| { |
| "epoch": 29.9390243902439, |
| "grad_norm": 0.058300457894802094, |
| "learning_rate": 8.921120764189272e-08, |
| "loss": 0.007, |
| "step": 14730 |
| }, |
| { |
| "epoch": 29.959349593495936, |
| "grad_norm": 0.09375248104333878, |
| "learning_rate": 8.275061114753068e-08, |
| "loss": 0.0103, |
| "step": 14740 |
| }, |
| { |
| "epoch": 29.979674796747968, |
| "grad_norm": 0.08694697916507721, |
| "learning_rate": 7.65326312390624e-08, |
| "loss": 0.0079, |
| "step": 14750 |
| }, |
| { |
| "epoch": 30.0, |
| "grad_norm": 0.04056360200047493, |
| "learning_rate": 7.055729813819079e-08, |
| "loss": 0.0071, |
| "step": 14760 |
| }, |
| { |
| "epoch": 30.020325203252032, |
| "grad_norm": 0.1188066303730011, |
| "learning_rate": 6.48246408872899e-08, |
| "loss": 0.0094, |
| "step": 14770 |
| }, |
| { |
| "epoch": 30.040650406504064, |
| "grad_norm": 0.07885562628507614, |
| "learning_rate": 5.9334687349227314e-08, |
| "loss": 0.0068, |
| "step": 14780 |
| }, |
| { |
| "epoch": 30.0609756097561, |
| "grad_norm": 0.07324660569429398, |
| "learning_rate": 5.4087464207236426e-08, |
| "loss": 0.0084, |
| "step": 14790 |
| }, |
| { |
| "epoch": 30.08130081300813, |
| "grad_norm": 0.031079620122909546, |
| "learning_rate": 4.9082996964794345e-08, |
| "loss": 0.0047, |
| "step": 14800 |
| }, |
| { |
| "epoch": 30.101626016260163, |
| "grad_norm": 0.08347532153129578, |
| "learning_rate": 4.432130994548866e-08, |
| "loss": 0.0061, |
| "step": 14810 |
| }, |
| { |
| "epoch": 30.121951219512194, |
| "grad_norm": 0.10173796117305756, |
| "learning_rate": 3.980242629291198e-08, |
| "loss": 0.0067, |
| "step": 14820 |
| }, |
| { |
| "epoch": 30.142276422764226, |
| "grad_norm": 0.07320091128349304, |
| "learning_rate": 3.5526367970539765e-08, |
| "loss": 0.0073, |
| "step": 14830 |
| }, |
| { |
| "epoch": 30.16260162601626, |
| "grad_norm": 0.09657250344753265, |
| "learning_rate": 3.1493155761613826e-08, |
| "loss": 0.0059, |
| "step": 14840 |
| }, |
| { |
| "epoch": 30.182926829268293, |
| "grad_norm": 0.0755375325679779, |
| "learning_rate": 2.7702809269058992e-08, |
| "loss": 0.0057, |
| "step": 14850 |
| }, |
| { |
| "epoch": 30.203252032520325, |
| "grad_norm": 0.11463439464569092, |
| "learning_rate": 2.4155346915394337e-08, |
| "loss": 0.008, |
| "step": 14860 |
| }, |
| { |
| "epoch": 30.223577235772357, |
| "grad_norm": 0.0628993809223175, |
| "learning_rate": 2.085078594261103e-08, |
| "loss": 0.0077, |
| "step": 14870 |
| }, |
| { |
| "epoch": 30.24390243902439, |
| "grad_norm": 0.050431057810783386, |
| "learning_rate": 1.7789142412122372e-08, |
| "loss": 0.006, |
| "step": 14880 |
| }, |
| { |
| "epoch": 30.264227642276424, |
| "grad_norm": 0.05110840126872063, |
| "learning_rate": 1.4970431204663905e-08, |
| "loss": 0.0066, |
| "step": 14890 |
| }, |
| { |
| "epoch": 30.284552845528456, |
| "grad_norm": 0.05618211254477501, |
| "learning_rate": 1.2394666020226764e-08, |
| "loss": 0.0057, |
| "step": 14900 |
| }, |
| { |
| "epoch": 30.304878048780488, |
| "grad_norm": 0.06732738763093948, |
| "learning_rate": 1.0061859378007743e-08, |
| "loss": 0.0047, |
| "step": 14910 |
| }, |
| { |
| "epoch": 30.32520325203252, |
| "grad_norm": 0.11842504143714905, |
| "learning_rate": 7.97202261630936e-09, |
| "loss": 0.0107, |
| "step": 14920 |
| }, |
| { |
| "epoch": 30.34552845528455, |
| "grad_norm": 0.039801180362701416, |
| "learning_rate": 6.125165892539863e-09, |
| "loss": 0.0066, |
| "step": 14930 |
| }, |
| { |
| "epoch": 30.365853658536587, |
| "grad_norm": 0.08215242624282837, |
| "learning_rate": 4.5212981831022076e-09, |
| "loss": 0.006, |
| "step": 14940 |
| }, |
| { |
| "epoch": 30.38617886178862, |
| "grad_norm": 0.043269477784633636, |
| "learning_rate": 3.1604272834051542e-09, |
| "loss": 0.0056, |
| "step": 14950 |
| }, |
| { |
| "epoch": 30.40650406504065, |
| "grad_norm": 0.09183301776647568, |
| "learning_rate": 2.04255980778556e-09, |
| "loss": 0.0049, |
| "step": 14960 |
| }, |
| { |
| "epoch": 30.426829268292682, |
| "grad_norm": 0.02949368581175804, |
| "learning_rate": 1.1677011895028234e-09, |
| "loss": 0.0047, |
| "step": 14970 |
| }, |
| { |
| "epoch": 30.447154471544714, |
| "grad_norm": 0.06685309112071991, |
| "learning_rate": 5.358556807000259e-10, |
| "loss": 0.0052, |
| "step": 14980 |
| }, |
| { |
| "epoch": 30.46747967479675, |
| "grad_norm": 0.07415076345205307, |
| "learning_rate": 1.4702635238728058e-10, |
| "loss": 0.007, |
| "step": 14990 |
| }, |
| { |
| "epoch": 30.48780487804878, |
| "grad_norm": 0.06969624757766724, |
| "learning_rate": 1.2150944139754927e-12, |
| "loss": 0.0058, |
| "step": 15000 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 15000, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 31, |
| "save_steps": 1000, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 0.0, |
| "train_batch_size": 32, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|