{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 35.0, "eval_steps": 500, "global_step": 47775, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.007326007326007326, "grad_norm": 12.75, "learning_rate": 3.767266638760988e-07, "loss": 3.0794, "step": 10 }, { "epoch": 0.014652014652014652, "grad_norm": 12.5625, "learning_rate": 7.953118459606531e-07, "loss": 3.0738, "step": 20 }, { "epoch": 0.02197802197802198, "grad_norm": 12.625, "learning_rate": 1.2138970280452072e-06, "loss": 3.0732, "step": 30 }, { "epoch": 0.029304029304029304, "grad_norm": 12.4375, "learning_rate": 1.6324822101297615e-06, "loss": 3.0657, "step": 40 }, { "epoch": 0.03663003663003663, "grad_norm": 12.75, "learning_rate": 2.051067392214316e-06, "loss": 3.0529, "step": 50 }, { "epoch": 0.04395604395604396, "grad_norm": 12.5, "learning_rate": 2.46965257429887e-06, "loss": 3.0187, "step": 60 }, { "epoch": 0.05128205128205128, "grad_norm": 12.1875, "learning_rate": 2.888237756383424e-06, "loss": 2.9708, "step": 70 }, { "epoch": 0.05860805860805861, "grad_norm": 11.8125, "learning_rate": 3.3068229384679787e-06, "loss": 2.9156, "step": 80 }, { "epoch": 0.06593406593406594, "grad_norm": 11.375, "learning_rate": 3.725408120552533e-06, "loss": 2.8708, "step": 90 }, { "epoch": 0.07326007326007326, "grad_norm": 11.3125, "learning_rate": 4.1439933026370875e-06, "loss": 2.8174, "step": 100 }, { "epoch": 0.08058608058608059, "grad_norm": 10.125, "learning_rate": 4.562578484721641e-06, "loss": 2.7002, "step": 110 }, { "epoch": 0.08791208791208792, "grad_norm": 9.25, "learning_rate": 4.981163666806196e-06, "loss": 2.5764, "step": 120 }, { "epoch": 0.09523809523809523, "grad_norm": 8.25, "learning_rate": 5.3997488488907495e-06, "loss": 2.4532, "step": 130 }, { "epoch": 0.10256410256410256, "grad_norm": 7.40625, "learning_rate": 5.818334030975304e-06, "loss": 2.3577, "step": 140 }, { "epoch": 0.10989010989010989, "grad_norm": 6.375, "learning_rate": 6.236919213059858e-06, "loss": 2.2577, "step": 150 }, { "epoch": 0.11721611721611722, "grad_norm": 5.40625, "learning_rate": 6.6555043951444115e-06, "loss": 2.1509, "step": 160 }, { "epoch": 0.12454212454212454, "grad_norm": 4.625, "learning_rate": 7.074089577228966e-06, "loss": 2.0424, "step": 170 }, { "epoch": 0.13186813186813187, "grad_norm": 3.71875, "learning_rate": 7.4926747593135215e-06, "loss": 1.9498, "step": 180 }, { "epoch": 0.1391941391941392, "grad_norm": 2.734375, "learning_rate": 7.911259941398075e-06, "loss": 1.8733, "step": 190 }, { "epoch": 0.14652014652014653, "grad_norm": 2.21875, "learning_rate": 8.329845123482629e-06, "loss": 1.7909, "step": 200 }, { "epoch": 0.15384615384615385, "grad_norm": 1.9375, "learning_rate": 8.748430305567183e-06, "loss": 1.7241, "step": 210 }, { "epoch": 0.16117216117216118, "grad_norm": 1.671875, "learning_rate": 9.167015487651738e-06, "loss": 1.6787, "step": 220 }, { "epoch": 0.1684981684981685, "grad_norm": 1.515625, "learning_rate": 9.585600669736292e-06, "loss": 1.6364, "step": 230 }, { "epoch": 0.17582417582417584, "grad_norm": 1.3984375, "learning_rate": 1.0004185851820847e-05, "loss": 1.6088, "step": 240 }, { "epoch": 0.18315018315018314, "grad_norm": 1.4140625, "learning_rate": 1.04227710339054e-05, "loss": 1.5654, "step": 250 }, { "epoch": 0.19047619047619047, "grad_norm": 1.34375, "learning_rate": 1.0841356215989955e-05, "loss": 1.5427, "step": 260 }, { "epoch": 0.1978021978021978, "grad_norm": 1.40625, "learning_rate": 1.1259941398074508e-05, "loss": 1.5195, "step": 270 }, { "epoch": 0.20512820512820512, "grad_norm": 1.3203125, "learning_rate": 1.1678526580159064e-05, "loss": 1.4793, "step": 280 }, { "epoch": 0.21245421245421245, "grad_norm": 1.1953125, "learning_rate": 1.2097111762243616e-05, "loss": 1.4661, "step": 290 }, { "epoch": 0.21978021978021978, "grad_norm": 1.6640625, "learning_rate": 1.251569694432817e-05, "loss": 1.457, "step": 300 }, { "epoch": 0.2271062271062271, "grad_norm": 1.1953125, "learning_rate": 1.2934282126412725e-05, "loss": 1.4157, "step": 310 }, { "epoch": 0.23443223443223443, "grad_norm": 1.203125, "learning_rate": 1.335286730849728e-05, "loss": 1.4044, "step": 320 }, { "epoch": 0.24175824175824176, "grad_norm": 1.328125, "learning_rate": 1.3771452490581834e-05, "loss": 1.3967, "step": 330 }, { "epoch": 0.2490842490842491, "grad_norm": 1.46875, "learning_rate": 1.4190037672666386e-05, "loss": 1.392, "step": 340 }, { "epoch": 0.2564102564102564, "grad_norm": 1.3125, "learning_rate": 1.4608622854750942e-05, "loss": 1.3806, "step": 350 }, { "epoch": 0.26373626373626374, "grad_norm": 1.296875, "learning_rate": 1.5027208036835497e-05, "loss": 1.3626, "step": 360 }, { "epoch": 0.27106227106227104, "grad_norm": 1.3125, "learning_rate": 1.544579321892005e-05, "loss": 1.3656, "step": 370 }, { "epoch": 0.2783882783882784, "grad_norm": 1.21875, "learning_rate": 1.5864378401004608e-05, "loss": 1.347, "step": 380 }, { "epoch": 0.2857142857142857, "grad_norm": 1.2109375, "learning_rate": 1.6282963583089158e-05, "loss": 1.3393, "step": 390 }, { "epoch": 0.29304029304029305, "grad_norm": 1.234375, "learning_rate": 1.6701548765173712e-05, "loss": 1.3178, "step": 400 }, { "epoch": 0.30036630036630035, "grad_norm": 1.2734375, "learning_rate": 1.712013394725827e-05, "loss": 1.3293, "step": 410 }, { "epoch": 0.3076923076923077, "grad_norm": 1.4609375, "learning_rate": 1.7538719129342823e-05, "loss": 1.3226, "step": 420 }, { "epoch": 0.315018315018315, "grad_norm": 1.1640625, "learning_rate": 1.7957304311427376e-05, "loss": 1.3059, "step": 430 }, { "epoch": 0.32234432234432236, "grad_norm": 1.2890625, "learning_rate": 1.837588949351193e-05, "loss": 1.2929, "step": 440 }, { "epoch": 0.32967032967032966, "grad_norm": 1.3828125, "learning_rate": 1.8794474675596484e-05, "loss": 1.2792, "step": 450 }, { "epoch": 0.336996336996337, "grad_norm": 1.2890625, "learning_rate": 1.921305985768104e-05, "loss": 1.2809, "step": 460 }, { "epoch": 0.3443223443223443, "grad_norm": 1.5, "learning_rate": 1.963164503976559e-05, "loss": 1.2765, "step": 470 }, { "epoch": 0.3516483516483517, "grad_norm": 1.28125, "learning_rate": 2.005023022185015e-05, "loss": 1.2529, "step": 480 }, { "epoch": 0.358974358974359, "grad_norm": 1.2578125, "learning_rate": 2.0468815403934702e-05, "loss": 1.2296, "step": 490 }, { "epoch": 0.3663003663003663, "grad_norm": 1.4453125, "learning_rate": 2.0887400586019256e-05, "loss": 1.2246, "step": 500 }, { "epoch": 0.37362637362637363, "grad_norm": 1.796875, "learning_rate": 2.130598576810381e-05, "loss": 1.2169, "step": 510 }, { "epoch": 0.38095238095238093, "grad_norm": 1.765625, "learning_rate": 2.1724570950188363e-05, "loss": 1.1674, "step": 520 }, { "epoch": 0.3882783882783883, "grad_norm": 1.4765625, "learning_rate": 2.2143156132272917e-05, "loss": 1.1379, "step": 530 }, { "epoch": 0.3956043956043956, "grad_norm": 1.6015625, "learning_rate": 2.2561741314357474e-05, "loss": 1.1004, "step": 540 }, { "epoch": 0.40293040293040294, "grad_norm": 1.5703125, "learning_rate": 2.2980326496442028e-05, "loss": 1.0548, "step": 550 }, { "epoch": 0.41025641025641024, "grad_norm": 1.5078125, "learning_rate": 2.339891167852658e-05, "loss": 1.0101, "step": 560 }, { "epoch": 0.4175824175824176, "grad_norm": 1.90625, "learning_rate": 2.3817496860611135e-05, "loss": 0.9664, "step": 570 }, { "epoch": 0.4249084249084249, "grad_norm": 1.703125, "learning_rate": 2.423608204269569e-05, "loss": 0.8978, "step": 580 }, { "epoch": 0.43223443223443225, "grad_norm": 1.828125, "learning_rate": 2.4654667224780246e-05, "loss": 0.8869, "step": 590 }, { "epoch": 0.43956043956043955, "grad_norm": 1.9921875, "learning_rate": 2.50732524068648e-05, "loss": 0.8278, "step": 600 }, { "epoch": 0.4468864468864469, "grad_norm": 2.140625, "learning_rate": 2.5491837588949354e-05, "loss": 0.8243, "step": 610 }, { "epoch": 0.4542124542124542, "grad_norm": 2.046875, "learning_rate": 2.5910422771033904e-05, "loss": 0.7612, "step": 620 }, { "epoch": 0.46153846153846156, "grad_norm": 1.859375, "learning_rate": 2.632900795311846e-05, "loss": 0.7003, "step": 630 }, { "epoch": 0.46886446886446886, "grad_norm": 1.8203125, "learning_rate": 2.6747593135203015e-05, "loss": 0.6927, "step": 640 }, { "epoch": 0.47619047619047616, "grad_norm": 1.6953125, "learning_rate": 2.7166178317287572e-05, "loss": 0.7216, "step": 650 }, { "epoch": 0.4835164835164835, "grad_norm": 1.90625, "learning_rate": 2.7584763499372122e-05, "loss": 0.685, "step": 660 }, { "epoch": 0.4908424908424908, "grad_norm": 1.90625, "learning_rate": 2.8003348681456676e-05, "loss": 0.6776, "step": 670 }, { "epoch": 0.4981684981684982, "grad_norm": 1.8515625, "learning_rate": 2.8421933863541233e-05, "loss": 0.6532, "step": 680 }, { "epoch": 0.5054945054945055, "grad_norm": 1.90625, "learning_rate": 2.8840519045625787e-05, "loss": 0.6281, "step": 690 }, { "epoch": 0.5128205128205128, "grad_norm": 2.046875, "learning_rate": 2.9259104227710337e-05, "loss": 0.6221, "step": 700 }, { "epoch": 0.5201465201465202, "grad_norm": 2.109375, "learning_rate": 2.9677689409794894e-05, "loss": 0.6164, "step": 710 }, { "epoch": 0.5274725274725275, "grad_norm": 1.5234375, "learning_rate": 3.0096274591879448e-05, "loss": 0.6025, "step": 720 }, { "epoch": 0.5347985347985348, "grad_norm": 1.9140625, "learning_rate": 3.0514859773964005e-05, "loss": 0.5781, "step": 730 }, { "epoch": 0.5421245421245421, "grad_norm": 1.703125, "learning_rate": 3.093344495604856e-05, "loss": 0.5298, "step": 740 }, { "epoch": 0.5494505494505495, "grad_norm": 1.40625, "learning_rate": 3.135203013813311e-05, "loss": 0.5548, "step": 750 }, { "epoch": 0.5567765567765568, "grad_norm": 1.6953125, "learning_rate": 3.1770615320217666e-05, "loss": 0.5252, "step": 760 }, { "epoch": 0.5641025641025641, "grad_norm": 2.03125, "learning_rate": 3.218920050230222e-05, "loss": 0.5463, "step": 770 }, { "epoch": 0.5714285714285714, "grad_norm": 1.4375, "learning_rate": 3.2607785684386774e-05, "loss": 0.5315, "step": 780 }, { "epoch": 0.5787545787545788, "grad_norm": 2.0625, "learning_rate": 3.302637086647133e-05, "loss": 0.5121, "step": 790 }, { "epoch": 0.5860805860805861, "grad_norm": 1.578125, "learning_rate": 3.344495604855588e-05, "loss": 0.5087, "step": 800 }, { "epoch": 0.5934065934065934, "grad_norm": 1.46875, "learning_rate": 3.386354123064044e-05, "loss": 0.547, "step": 810 }, { "epoch": 0.6007326007326007, "grad_norm": 1.8828125, "learning_rate": 3.428212641272499e-05, "loss": 0.5097, "step": 820 }, { "epoch": 0.608058608058608, "grad_norm": 2.015625, "learning_rate": 3.470071159480954e-05, "loss": 0.5026, "step": 830 }, { "epoch": 0.6153846153846154, "grad_norm": 1.8828125, "learning_rate": 3.51192967768941e-05, "loss": 0.4773, "step": 840 }, { "epoch": 0.6227106227106227, "grad_norm": 1.78125, "learning_rate": 3.553788195897865e-05, "loss": 0.5086, "step": 850 }, { "epoch": 0.63003663003663, "grad_norm": 1.8359375, "learning_rate": 3.595646714106321e-05, "loss": 0.4942, "step": 860 }, { "epoch": 0.6373626373626373, "grad_norm": 2.453125, "learning_rate": 3.6375052323147764e-05, "loss": 0.4723, "step": 870 }, { "epoch": 0.6446886446886447, "grad_norm": 1.71875, "learning_rate": 3.679363750523232e-05, "loss": 0.463, "step": 880 }, { "epoch": 0.652014652014652, "grad_norm": 1.609375, "learning_rate": 3.721222268731687e-05, "loss": 0.4276, "step": 890 }, { "epoch": 0.6593406593406593, "grad_norm": 1.46875, "learning_rate": 3.7630807869401425e-05, "loss": 0.436, "step": 900 }, { "epoch": 0.6666666666666666, "grad_norm": 1.5859375, "learning_rate": 3.804939305148598e-05, "loss": 0.3618, "step": 910 }, { "epoch": 0.673992673992674, "grad_norm": 1.6953125, "learning_rate": 3.846797823357053e-05, "loss": 0.3845, "step": 920 }, { "epoch": 0.6813186813186813, "grad_norm": 1.2734375, "learning_rate": 3.8886563415655086e-05, "loss": 0.3486, "step": 930 }, { "epoch": 0.6886446886446886, "grad_norm": 1.171875, "learning_rate": 3.930514859773965e-05, "loss": 0.3237, "step": 940 }, { "epoch": 0.6959706959706959, "grad_norm": 1.390625, "learning_rate": 3.9723733779824194e-05, "loss": 0.3623, "step": 950 }, { "epoch": 0.7032967032967034, "grad_norm": 1.3515625, "learning_rate": 4.014231896190875e-05, "loss": 0.3173, "step": 960 }, { "epoch": 0.7106227106227107, "grad_norm": 1.3125, "learning_rate": 4.056090414399331e-05, "loss": 0.3203, "step": 970 }, { "epoch": 0.717948717948718, "grad_norm": 1.5390625, "learning_rate": 4.0979489326077855e-05, "loss": 0.3435, "step": 980 }, { "epoch": 0.7252747252747253, "grad_norm": 1.1015625, "learning_rate": 4.1398074508162416e-05, "loss": 0.3269, "step": 990 }, { "epoch": 0.7326007326007326, "grad_norm": 1.3359375, "learning_rate": 4.181665969024697e-05, "loss": 0.3215, "step": 1000 }, { "epoch": 0.73992673992674, "grad_norm": 1.328125, "learning_rate": 4.223524487233152e-05, "loss": 0.2918, "step": 1010 }, { "epoch": 0.7472527472527473, "grad_norm": 1.5390625, "learning_rate": 4.265383005441608e-05, "loss": 0.31, "step": 1020 }, { "epoch": 0.7545787545787546, "grad_norm": 1.2265625, "learning_rate": 4.307241523650063e-05, "loss": 0.3111, "step": 1030 }, { "epoch": 0.7619047619047619, "grad_norm": 1.2578125, "learning_rate": 4.3491000418585184e-05, "loss": 0.317, "step": 1040 }, { "epoch": 0.7692307692307693, "grad_norm": 1.4609375, "learning_rate": 4.390958560066974e-05, "loss": 0.2891, "step": 1050 }, { "epoch": 0.7765567765567766, "grad_norm": 1.40625, "learning_rate": 4.432817078275429e-05, "loss": 0.3115, "step": 1060 }, { "epoch": 0.7838827838827839, "grad_norm": 1.109375, "learning_rate": 4.474675596483885e-05, "loss": 0.3009, "step": 1070 }, { "epoch": 0.7912087912087912, "grad_norm": 1.3828125, "learning_rate": 4.51653411469234e-05, "loss": 0.2859, "step": 1080 }, { "epoch": 0.7985347985347986, "grad_norm": 1.125, "learning_rate": 4.558392632900795e-05, "loss": 0.2615, "step": 1090 }, { "epoch": 0.8058608058608059, "grad_norm": 1.3671875, "learning_rate": 4.600251151109251e-05, "loss": 0.2835, "step": 1100 }, { "epoch": 0.8131868131868132, "grad_norm": 1.4375, "learning_rate": 4.642109669317706e-05, "loss": 0.2885, "step": 1110 }, { "epoch": 0.8205128205128205, "grad_norm": 1.2890625, "learning_rate": 4.683968187526162e-05, "loss": 0.275, "step": 1120 }, { "epoch": 0.8278388278388278, "grad_norm": 1.1640625, "learning_rate": 4.7258267057346174e-05, "loss": 0.2876, "step": 1130 }, { "epoch": 0.8351648351648352, "grad_norm": 1.0703125, "learning_rate": 4.767685223943073e-05, "loss": 0.2835, "step": 1140 }, { "epoch": 0.8424908424908425, "grad_norm": 1.140625, "learning_rate": 4.809543742151528e-05, "loss": 0.2885, "step": 1150 }, { "epoch": 0.8498168498168498, "grad_norm": 1.4921875, "learning_rate": 4.8514022603599836e-05, "loss": 0.2779, "step": 1160 }, { "epoch": 0.8571428571428571, "grad_norm": 1.078125, "learning_rate": 4.893260778568439e-05, "loss": 0.2585, "step": 1170 }, { "epoch": 0.8644688644688645, "grad_norm": 1.71875, "learning_rate": 4.935119296776894e-05, "loss": 0.2673, "step": 1180 }, { "epoch": 0.8717948717948718, "grad_norm": 1.28125, "learning_rate": 4.97697781498535e-05, "loss": 0.2575, "step": 1190 }, { "epoch": 0.8791208791208791, "grad_norm": 1.1328125, "learning_rate": 5.018836333193805e-05, "loss": 0.2736, "step": 1200 }, { "epoch": 0.8864468864468864, "grad_norm": 0.98828125, "learning_rate": 5.06069485140226e-05, "loss": 0.268, "step": 1210 }, { "epoch": 0.8937728937728938, "grad_norm": 1.1875, "learning_rate": 5.1025533696107165e-05, "loss": 0.2505, "step": 1220 }, { "epoch": 0.9010989010989011, "grad_norm": 1.453125, "learning_rate": 5.144411887819172e-05, "loss": 0.2618, "step": 1230 }, { "epoch": 0.9084249084249084, "grad_norm": 1.125, "learning_rate": 5.1862704060276265e-05, "loss": 0.2358, "step": 1240 }, { "epoch": 0.9157509157509157, "grad_norm": 1.0625, "learning_rate": 5.228128924236082e-05, "loss": 0.2579, "step": 1250 }, { "epoch": 0.9230769230769231, "grad_norm": 1.234375, "learning_rate": 5.269987442444537e-05, "loss": 0.2509, "step": 1260 }, { "epoch": 0.9304029304029304, "grad_norm": 1.4921875, "learning_rate": 5.3118459606529933e-05, "loss": 0.2599, "step": 1270 }, { "epoch": 0.9377289377289377, "grad_norm": 1.34375, "learning_rate": 5.353704478861449e-05, "loss": 0.2441, "step": 1280 }, { "epoch": 0.945054945054945, "grad_norm": 1.2734375, "learning_rate": 5.395562997069904e-05, "loss": 0.2574, "step": 1290 }, { "epoch": 0.9523809523809523, "grad_norm": 1.0625, "learning_rate": 5.4374215152783595e-05, "loss": 0.2289, "step": 1300 }, { "epoch": 0.9597069597069597, "grad_norm": 1.1171875, "learning_rate": 5.479280033486814e-05, "loss": 0.2363, "step": 1310 }, { "epoch": 0.967032967032967, "grad_norm": 1.453125, "learning_rate": 5.521138551695271e-05, "loss": 0.2471, "step": 1320 }, { "epoch": 0.9743589743589743, "grad_norm": 1.125, "learning_rate": 5.562997069903726e-05, "loss": 0.2207, "step": 1330 }, { "epoch": 0.9816849816849816, "grad_norm": 1.25, "learning_rate": 5.604855588112181e-05, "loss": 0.2354, "step": 1340 }, { "epoch": 0.989010989010989, "grad_norm": 1.171875, "learning_rate": 5.646714106320636e-05, "loss": 0.2169, "step": 1350 }, { "epoch": 0.9963369963369964, "grad_norm": 1.4453125, "learning_rate": 5.688572624529092e-05, "loss": 0.2279, "step": 1360 }, { "epoch": 1.0036630036630036, "grad_norm": 1.4375, "learning_rate": 5.730431142737548e-05, "loss": 0.2111, "step": 1370 }, { "epoch": 1.010989010989011, "grad_norm": 1.0078125, "learning_rate": 5.772289660946003e-05, "loss": 0.2161, "step": 1380 }, { "epoch": 1.0183150183150182, "grad_norm": 1.03125, "learning_rate": 5.8141481791544585e-05, "loss": 0.2049, "step": 1390 }, { "epoch": 1.0256410256410255, "grad_norm": 1.15625, "learning_rate": 5.856006697362913e-05, "loss": 0.2289, "step": 1400 }, { "epoch": 1.032967032967033, "grad_norm": 1.203125, "learning_rate": 5.8978652155713686e-05, "loss": 0.2086, "step": 1410 }, { "epoch": 1.0402930402930404, "grad_norm": 1.296875, "learning_rate": 5.939723733779824e-05, "loss": 0.2235, "step": 1420 }, { "epoch": 1.0476190476190477, "grad_norm": 1.34375, "learning_rate": 5.98158225198828e-05, "loss": 0.2127, "step": 1430 }, { "epoch": 1.054945054945055, "grad_norm": 1.1171875, "learning_rate": 6.0234407701967354e-05, "loss": 0.2174, "step": 1440 }, { "epoch": 1.0622710622710623, "grad_norm": 1.0859375, "learning_rate": 6.065299288405191e-05, "loss": 0.2162, "step": 1450 }, { "epoch": 1.0695970695970696, "grad_norm": 1.03125, "learning_rate": 6.107157806613646e-05, "loss": 0.2048, "step": 1460 }, { "epoch": 1.0769230769230769, "grad_norm": 1.703125, "learning_rate": 6.149016324822101e-05, "loss": 0.2029, "step": 1470 }, { "epoch": 1.0842490842490842, "grad_norm": 1.3359375, "learning_rate": 6.190874843030557e-05, "loss": 0.2231, "step": 1480 }, { "epoch": 1.0915750915750915, "grad_norm": 1.59375, "learning_rate": 6.232733361239013e-05, "loss": 0.2099, "step": 1490 }, { "epoch": 1.098901098901099, "grad_norm": 1.671875, "learning_rate": 6.274591879447468e-05, "loss": 0.223, "step": 1500 }, { "epoch": 1.1062271062271063, "grad_norm": 1.4609375, "learning_rate": 6.316450397655924e-05, "loss": 0.1957, "step": 1510 }, { "epoch": 1.1135531135531136, "grad_norm": 1.21875, "learning_rate": 6.358308915864378e-05, "loss": 0.2159, "step": 1520 }, { "epoch": 1.120879120879121, "grad_norm": 1.21875, "learning_rate": 6.400167434072834e-05, "loss": 0.2019, "step": 1530 }, { "epoch": 1.1282051282051282, "grad_norm": 1.640625, "learning_rate": 6.44202595228129e-05, "loss": 0.2065, "step": 1540 }, { "epoch": 1.1355311355311355, "grad_norm": 1.0546875, "learning_rate": 6.483884470489745e-05, "loss": 0.1978, "step": 1550 }, { "epoch": 1.1428571428571428, "grad_norm": 1.0234375, "learning_rate": 6.5257429886982e-05, "loss": 0.1939, "step": 1560 }, { "epoch": 1.15018315018315, "grad_norm": 1.265625, "learning_rate": 6.567601506906656e-05, "loss": 0.1854, "step": 1570 }, { "epoch": 1.1575091575091574, "grad_norm": 1.34375, "learning_rate": 6.609460025115112e-05, "loss": 0.1853, "step": 1580 }, { "epoch": 1.164835164835165, "grad_norm": 1.15625, "learning_rate": 6.651318543323567e-05, "loss": 0.1905, "step": 1590 }, { "epoch": 1.1721611721611722, "grad_norm": 1.9375, "learning_rate": 6.693177061532023e-05, "loss": 0.2015, "step": 1600 }, { "epoch": 1.1794871794871795, "grad_norm": 1.2421875, "learning_rate": 6.735035579740477e-05, "loss": 0.1749, "step": 1610 }, { "epoch": 1.1868131868131868, "grad_norm": 1.03125, "learning_rate": 6.776894097948932e-05, "loss": 0.1886, "step": 1620 }, { "epoch": 1.1941391941391941, "grad_norm": 1.1015625, "learning_rate": 6.818752616157388e-05, "loss": 0.1817, "step": 1630 }, { "epoch": 1.2014652014652014, "grad_norm": 1.3671875, "learning_rate": 6.860611134365844e-05, "loss": 0.1879, "step": 1640 }, { "epoch": 1.2087912087912087, "grad_norm": 1.5546875, "learning_rate": 6.902469652574299e-05, "loss": 0.1973, "step": 1650 }, { "epoch": 1.2161172161172162, "grad_norm": 1.078125, "learning_rate": 6.944328170782755e-05, "loss": 0.1728, "step": 1660 }, { "epoch": 1.2234432234432235, "grad_norm": 1.6953125, "learning_rate": 6.98618668899121e-05, "loss": 0.1878, "step": 1670 }, { "epoch": 1.2307692307692308, "grad_norm": 1.2109375, "learning_rate": 7.028045207199666e-05, "loss": 0.2021, "step": 1680 }, { "epoch": 1.2380952380952381, "grad_norm": 1.4140625, "learning_rate": 7.069903725408122e-05, "loss": 0.1919, "step": 1690 }, { "epoch": 1.2454212454212454, "grad_norm": 1.9296875, "learning_rate": 7.111762243616576e-05, "loss": 0.1743, "step": 1700 }, { "epoch": 1.2527472527472527, "grad_norm": 1.53125, "learning_rate": 7.153620761825031e-05, "loss": 0.1735, "step": 1710 }, { "epoch": 1.26007326007326, "grad_norm": 2.015625, "learning_rate": 7.195479280033487e-05, "loss": 0.1884, "step": 1720 }, { "epoch": 1.2673992673992673, "grad_norm": 1.9609375, "learning_rate": 7.237337798241942e-05, "loss": 0.1882, "step": 1730 }, { "epoch": 1.2747252747252746, "grad_norm": 1.203125, "learning_rate": 7.279196316450398e-05, "loss": 0.174, "step": 1740 }, { "epoch": 1.282051282051282, "grad_norm": 1.671875, "learning_rate": 7.321054834658854e-05, "loss": 0.1757, "step": 1750 }, { "epoch": 1.2893772893772895, "grad_norm": 1.3203125, "learning_rate": 7.362913352867309e-05, "loss": 0.1754, "step": 1760 }, { "epoch": 1.2967032967032968, "grad_norm": 1.1015625, "learning_rate": 7.404771871075765e-05, "loss": 0.1767, "step": 1770 }, { "epoch": 1.304029304029304, "grad_norm": 1.078125, "learning_rate": 7.44663038928422e-05, "loss": 0.1733, "step": 1780 }, { "epoch": 1.3113553113553114, "grad_norm": 1.15625, "learning_rate": 7.488488907492675e-05, "loss": 0.1829, "step": 1790 }, { "epoch": 1.3186813186813187, "grad_norm": 1.078125, "learning_rate": 7.530347425701131e-05, "loss": 0.1686, "step": 1800 }, { "epoch": 1.326007326007326, "grad_norm": 1.3671875, "learning_rate": 7.572205943909586e-05, "loss": 0.1731, "step": 1810 }, { "epoch": 1.3333333333333333, "grad_norm": 1.46875, "learning_rate": 7.614064462118041e-05, "loss": 0.1661, "step": 1820 }, { "epoch": 1.3406593406593408, "grad_norm": 1.2890625, "learning_rate": 7.655922980326497e-05, "loss": 0.1672, "step": 1830 }, { "epoch": 1.347985347985348, "grad_norm": 1.109375, "learning_rate": 7.697781498534953e-05, "loss": 0.1639, "step": 1840 }, { "epoch": 1.3553113553113554, "grad_norm": 1.3515625, "learning_rate": 7.739640016743408e-05, "loss": 0.1671, "step": 1850 }, { "epoch": 1.3626373626373627, "grad_norm": 1.1484375, "learning_rate": 7.781498534951864e-05, "loss": 0.1733, "step": 1860 }, { "epoch": 1.36996336996337, "grad_norm": 1.46875, "learning_rate": 7.823357053160318e-05, "loss": 0.1757, "step": 1870 }, { "epoch": 1.3772893772893773, "grad_norm": 1.2734375, "learning_rate": 7.865215571368773e-05, "loss": 0.1636, "step": 1880 }, { "epoch": 1.3846153846153846, "grad_norm": 1.0390625, "learning_rate": 7.907074089577229e-05, "loss": 0.1493, "step": 1890 }, { "epoch": 1.3919413919413919, "grad_norm": 1.484375, "learning_rate": 7.948932607785685e-05, "loss": 0.1746, "step": 1900 }, { "epoch": 1.3992673992673992, "grad_norm": 1.90625, "learning_rate": 7.99079112599414e-05, "loss": 0.1581, "step": 1910 }, { "epoch": 1.4065934065934065, "grad_norm": 1.3515625, "learning_rate": 8.032649644202596e-05, "loss": 0.1594, "step": 1920 }, { "epoch": 1.4139194139194138, "grad_norm": 1.609375, "learning_rate": 8.07450816241105e-05, "loss": 0.1583, "step": 1930 }, { "epoch": 1.4212454212454213, "grad_norm": 1.4921875, "learning_rate": 8.116366680619505e-05, "loss": 0.1644, "step": 1940 }, { "epoch": 1.4285714285714286, "grad_norm": 1.46875, "learning_rate": 8.158225198827963e-05, "loss": 0.1696, "step": 1950 }, { "epoch": 1.435897435897436, "grad_norm": 1.4921875, "learning_rate": 8.200083717036417e-05, "loss": 0.1511, "step": 1960 }, { "epoch": 1.4432234432234432, "grad_norm": 1.125, "learning_rate": 8.241942235244872e-05, "loss": 0.1628, "step": 1970 }, { "epoch": 1.4505494505494505, "grad_norm": 1.5234375, "learning_rate": 8.283800753453328e-05, "loss": 0.1728, "step": 1980 }, { "epoch": 1.4578754578754578, "grad_norm": 1.375, "learning_rate": 8.325659271661783e-05, "loss": 0.1521, "step": 1990 }, { "epoch": 1.4652014652014653, "grad_norm": 2.09375, "learning_rate": 8.367517789870239e-05, "loss": 0.1505, "step": 2000 }, { "epoch": 1.4725274725274726, "grad_norm": 1.484375, "learning_rate": 8.409376308078695e-05, "loss": 0.1385, "step": 2010 }, { "epoch": 1.47985347985348, "grad_norm": 1.390625, "learning_rate": 8.45123482628715e-05, "loss": 0.1534, "step": 2020 }, { "epoch": 1.4871794871794872, "grad_norm": 1.1953125, "learning_rate": 8.493093344495606e-05, "loss": 0.1509, "step": 2030 }, { "epoch": 1.4945054945054945, "grad_norm": 1.1796875, "learning_rate": 8.53495186270406e-05, "loss": 0.1556, "step": 2040 }, { "epoch": 1.5018315018315018, "grad_norm": 1.28125, "learning_rate": 8.576810380912516e-05, "loss": 0.1448, "step": 2050 }, { "epoch": 1.5091575091575091, "grad_norm": 1.1953125, "learning_rate": 8.618668899120973e-05, "loss": 0.1472, "step": 2060 }, { "epoch": 1.5164835164835164, "grad_norm": 1.203125, "learning_rate": 8.660527417329427e-05, "loss": 0.139, "step": 2070 }, { "epoch": 1.5238095238095237, "grad_norm": 1.328125, "learning_rate": 8.702385935537882e-05, "loss": 0.1348, "step": 2080 }, { "epoch": 1.531135531135531, "grad_norm": 1.546875, "learning_rate": 8.744244453746338e-05, "loss": 0.1432, "step": 2090 }, { "epoch": 1.5384615384615383, "grad_norm": 1.1015625, "learning_rate": 8.786102971954793e-05, "loss": 0.1381, "step": 2100 }, { "epoch": 1.5457875457875456, "grad_norm": 1.09375, "learning_rate": 8.827961490163249e-05, "loss": 0.1373, "step": 2110 }, { "epoch": 1.5531135531135531, "grad_norm": 1.4296875, "learning_rate": 8.869820008371705e-05, "loss": 0.1336, "step": 2120 }, { "epoch": 1.5604395604395604, "grad_norm": 0.98046875, "learning_rate": 8.91167852658016e-05, "loss": 0.1435, "step": 2130 }, { "epoch": 1.5677655677655677, "grad_norm": 1.390625, "learning_rate": 8.953537044788614e-05, "loss": 0.1462, "step": 2140 }, { "epoch": 1.575091575091575, "grad_norm": 1.421875, "learning_rate": 8.99539556299707e-05, "loss": 0.1618, "step": 2150 }, { "epoch": 1.5824175824175826, "grad_norm": 1.453125, "learning_rate": 9.037254081205526e-05, "loss": 0.1342, "step": 2160 }, { "epoch": 1.5897435897435899, "grad_norm": 1.625, "learning_rate": 9.079112599413981e-05, "loss": 0.1372, "step": 2170 }, { "epoch": 1.5970695970695972, "grad_norm": 1.8671875, "learning_rate": 9.120971117622437e-05, "loss": 0.1502, "step": 2180 }, { "epoch": 1.6043956043956045, "grad_norm": 1.59375, "learning_rate": 9.162829635830892e-05, "loss": 0.1314, "step": 2190 }, { "epoch": 1.6117216117216118, "grad_norm": 1.1484375, "learning_rate": 9.204688154039346e-05, "loss": 0.1301, "step": 2200 }, { "epoch": 1.619047619047619, "grad_norm": 1.453125, "learning_rate": 9.246546672247804e-05, "loss": 0.1352, "step": 2210 }, { "epoch": 1.6263736263736264, "grad_norm": 1.015625, "learning_rate": 9.288405190456258e-05, "loss": 0.127, "step": 2220 }, { "epoch": 1.6336996336996337, "grad_norm": 1.953125, "learning_rate": 9.330263708664713e-05, "loss": 0.1407, "step": 2230 }, { "epoch": 1.641025641025641, "grad_norm": 1.4765625, "learning_rate": 9.372122226873169e-05, "loss": 0.1257, "step": 2240 }, { "epoch": 1.6483516483516483, "grad_norm": 1.5234375, "learning_rate": 9.413980745081624e-05, "loss": 0.1403, "step": 2250 }, { "epoch": 1.6556776556776556, "grad_norm": 1.171875, "learning_rate": 9.45583926329008e-05, "loss": 0.1401, "step": 2260 }, { "epoch": 1.6630036630036629, "grad_norm": 1.15625, "learning_rate": 9.497697781498536e-05, "loss": 0.127, "step": 2270 }, { "epoch": 1.6703296703296702, "grad_norm": 1.421875, "learning_rate": 9.539556299706991e-05, "loss": 0.1437, "step": 2280 }, { "epoch": 1.6776556776556777, "grad_norm": 0.90234375, "learning_rate": 9.581414817915447e-05, "loss": 0.1246, "step": 2290 }, { "epoch": 1.684981684981685, "grad_norm": 1.1484375, "learning_rate": 9.623273336123901e-05, "loss": 0.1204, "step": 2300 }, { "epoch": 1.6923076923076923, "grad_norm": 1.484375, "learning_rate": 9.665131854332356e-05, "loss": 0.1401, "step": 2310 }, { "epoch": 1.6996336996336996, "grad_norm": 1.296875, "learning_rate": 9.706990372540814e-05, "loss": 0.1281, "step": 2320 }, { "epoch": 1.7069597069597071, "grad_norm": 1.875, "learning_rate": 9.748848890749268e-05, "loss": 0.1379, "step": 2330 }, { "epoch": 1.7142857142857144, "grad_norm": 1.3359375, "learning_rate": 9.790707408957723e-05, "loss": 0.1339, "step": 2340 }, { "epoch": 1.7216117216117217, "grad_norm": 1.203125, "learning_rate": 9.832565927166179e-05, "loss": 0.1285, "step": 2350 }, { "epoch": 1.728937728937729, "grad_norm": 1.1015625, "learning_rate": 9.874424445374634e-05, "loss": 0.118, "step": 2360 }, { "epoch": 1.7362637362637363, "grad_norm": 0.89453125, "learning_rate": 9.91628296358309e-05, "loss": 0.1271, "step": 2370 }, { "epoch": 1.7435897435897436, "grad_norm": 1.25, "learning_rate": 9.958141481791546e-05, "loss": 0.1162, "step": 2380 }, { "epoch": 1.750915750915751, "grad_norm": 1.2734375, "learning_rate": 0.0001, "loss": 0.131, "step": 2390 }, { "epoch": 1.7582417582417582, "grad_norm": 1.984375, "learning_rate": 9.999998814146298e-05, "loss": 0.1338, "step": 2400 }, { "epoch": 1.7655677655677655, "grad_norm": 1.1875, "learning_rate": 9.999995256585762e-05, "loss": 0.1126, "step": 2410 }, { "epoch": 1.7728937728937728, "grad_norm": 0.93359375, "learning_rate": 9.999989327320091e-05, "loss": 0.1235, "step": 2420 }, { "epoch": 1.7802197802197801, "grad_norm": 1.6640625, "learning_rate": 9.999981026352132e-05, "loss": 0.1162, "step": 2430 }, { "epoch": 1.7875457875457874, "grad_norm": 1.46875, "learning_rate": 9.999970353685857e-05, "loss": 0.1269, "step": 2440 }, { "epoch": 1.7948717948717947, "grad_norm": 1.5234375, "learning_rate": 9.999957309326385e-05, "loss": 0.1207, "step": 2450 }, { "epoch": 1.8021978021978022, "grad_norm": 1.3125, "learning_rate": 9.999941893279963e-05, "loss": 0.1283, "step": 2460 }, { "epoch": 1.8095238095238095, "grad_norm": 1.5078125, "learning_rate": 9.999924105553978e-05, "loss": 0.1201, "step": 2470 }, { "epoch": 1.8168498168498168, "grad_norm": 1.4375, "learning_rate": 9.999903946156951e-05, "loss": 0.1206, "step": 2480 }, { "epoch": 1.8241758241758241, "grad_norm": 1.15625, "learning_rate": 9.999881415098543e-05, "loss": 0.102, "step": 2490 }, { "epoch": 1.8315018315018317, "grad_norm": 1.890625, "learning_rate": 9.99985651238955e-05, "loss": 0.1228, "step": 2500 }, { "epoch": 1.838827838827839, "grad_norm": 1.28125, "learning_rate": 9.999829238041902e-05, "loss": 0.1202, "step": 2510 }, { "epoch": 1.8461538461538463, "grad_norm": 1.1796875, "learning_rate": 9.999799592068666e-05, "loss": 0.1098, "step": 2520 }, { "epoch": 1.8534798534798536, "grad_norm": 1.296875, "learning_rate": 9.99976757448405e-05, "loss": 0.107, "step": 2530 }, { "epoch": 1.8608058608058609, "grad_norm": 1.3515625, "learning_rate": 9.999733185303392e-05, "loss": 0.1233, "step": 2540 }, { "epoch": 1.8681318681318682, "grad_norm": 1.109375, "learning_rate": 9.999696424543167e-05, "loss": 0.1294, "step": 2550 }, { "epoch": 1.8754578754578755, "grad_norm": 1.3984375, "learning_rate": 9.999657292220994e-05, "loss": 0.116, "step": 2560 }, { "epoch": 1.8827838827838828, "grad_norm": 1.5625, "learning_rate": 9.999615788355619e-05, "loss": 0.1124, "step": 2570 }, { "epoch": 1.89010989010989, "grad_norm": 1.4296875, "learning_rate": 9.999571912966927e-05, "loss": 0.1177, "step": 2580 }, { "epoch": 1.8974358974358974, "grad_norm": 0.89453125, "learning_rate": 9.999525666075943e-05, "loss": 0.1191, "step": 2590 }, { "epoch": 1.9047619047619047, "grad_norm": 1.484375, "learning_rate": 9.999477047704822e-05, "loss": 0.1096, "step": 2600 }, { "epoch": 1.912087912087912, "grad_norm": 2.015625, "learning_rate": 9.999426057876861e-05, "loss": 0.108, "step": 2610 }, { "epoch": 1.9194139194139193, "grad_norm": 1.0390625, "learning_rate": 9.999372696616491e-05, "loss": 0.1154, "step": 2620 }, { "epoch": 1.9267399267399268, "grad_norm": 1.21875, "learning_rate": 9.999316963949279e-05, "loss": 0.1078, "step": 2630 }, { "epoch": 1.934065934065934, "grad_norm": 1.03125, "learning_rate": 9.999258859901927e-05, "loss": 0.0991, "step": 2640 }, { "epoch": 1.9413919413919414, "grad_norm": 1.09375, "learning_rate": 9.999198384502276e-05, "loss": 0.1119, "step": 2650 }, { "epoch": 1.9487179487179487, "grad_norm": 1.40625, "learning_rate": 9.9991355377793e-05, "loss": 0.1267, "step": 2660 }, { "epoch": 1.9560439560439562, "grad_norm": 1.1015625, "learning_rate": 9.999070319763113e-05, "loss": 0.115, "step": 2670 }, { "epoch": 1.9633699633699635, "grad_norm": 1.4921875, "learning_rate": 9.999002730484962e-05, "loss": 0.1152, "step": 2680 }, { "epoch": 1.9706959706959708, "grad_norm": 1.078125, "learning_rate": 9.998932769977231e-05, "loss": 0.1114, "step": 2690 }, { "epoch": 1.978021978021978, "grad_norm": 1.2578125, "learning_rate": 9.99886043827344e-05, "loss": 0.1139, "step": 2700 }, { "epoch": 1.9853479853479854, "grad_norm": 1.1015625, "learning_rate": 9.998785735408248e-05, "loss": 0.1107, "step": 2710 }, { "epoch": 1.9926739926739927, "grad_norm": 1.4375, "learning_rate": 9.998708661417447e-05, "loss": 0.112, "step": 2720 }, { "epoch": 2.0, "grad_norm": 1.25, "learning_rate": 9.998629216337962e-05, "loss": 0.1069, "step": 2730 }, { "epoch": 2.0073260073260073, "grad_norm": 0.984375, "learning_rate": 9.998547400207861e-05, "loss": 0.1059, "step": 2740 }, { "epoch": 2.0146520146520146, "grad_norm": 1.234375, "learning_rate": 9.998463213066344e-05, "loss": 0.0928, "step": 2750 }, { "epoch": 2.021978021978022, "grad_norm": 1.5546875, "learning_rate": 9.998376654953749e-05, "loss": 0.0965, "step": 2760 }, { "epoch": 2.029304029304029, "grad_norm": 1.25, "learning_rate": 9.998287725911547e-05, "loss": 0.0966, "step": 2770 }, { "epoch": 2.0366300366300365, "grad_norm": 1.4140625, "learning_rate": 9.998196425982348e-05, "loss": 0.0987, "step": 2780 }, { "epoch": 2.043956043956044, "grad_norm": 1.6640625, "learning_rate": 9.998102755209897e-05, "loss": 0.0898, "step": 2790 }, { "epoch": 2.051282051282051, "grad_norm": 1.4296875, "learning_rate": 9.998006713639073e-05, "loss": 0.0984, "step": 2800 }, { "epoch": 2.0586080586080584, "grad_norm": 1.1171875, "learning_rate": 9.997908301315893e-05, "loss": 0.0998, "step": 2810 }, { "epoch": 2.065934065934066, "grad_norm": 1.59375, "learning_rate": 9.997807518287511e-05, "loss": 0.0893, "step": 2820 }, { "epoch": 2.0732600732600734, "grad_norm": 1.3984375, "learning_rate": 9.997704364602215e-05, "loss": 0.0974, "step": 2830 }, { "epoch": 2.0805860805860807, "grad_norm": 1.2109375, "learning_rate": 9.99759884030943e-05, "loss": 0.0971, "step": 2840 }, { "epoch": 2.087912087912088, "grad_norm": 1.6796875, "learning_rate": 9.997490945459714e-05, "loss": 0.1006, "step": 2850 }, { "epoch": 2.0952380952380953, "grad_norm": 1.078125, "learning_rate": 9.997380680104766e-05, "loss": 0.0998, "step": 2860 }, { "epoch": 2.1025641025641026, "grad_norm": 0.8671875, "learning_rate": 9.997268044297413e-05, "loss": 0.0924, "step": 2870 }, { "epoch": 2.10989010989011, "grad_norm": 1.1640625, "learning_rate": 9.997153038091628e-05, "loss": 0.0912, "step": 2880 }, { "epoch": 2.1172161172161172, "grad_norm": 1.3203125, "learning_rate": 9.99703566154251e-05, "loss": 0.0922, "step": 2890 }, { "epoch": 2.1245421245421245, "grad_norm": 1.2578125, "learning_rate": 9.9969159147063e-05, "loss": 0.0973, "step": 2900 }, { "epoch": 2.131868131868132, "grad_norm": 1.234375, "learning_rate": 9.996793797640374e-05, "loss": 0.1174, "step": 2910 }, { "epoch": 2.139194139194139, "grad_norm": 1.25, "learning_rate": 9.996669310403239e-05, "loss": 0.0968, "step": 2920 }, { "epoch": 2.1465201465201464, "grad_norm": 1.2265625, "learning_rate": 9.996542453054544e-05, "loss": 0.0939, "step": 2930 }, { "epoch": 2.1538461538461537, "grad_norm": 1.1171875, "learning_rate": 9.996413225655068e-05, "loss": 0.0934, "step": 2940 }, { "epoch": 2.161172161172161, "grad_norm": 1.4921875, "learning_rate": 9.996281628266729e-05, "loss": 0.0974, "step": 2950 }, { "epoch": 2.1684981684981683, "grad_norm": 1.4609375, "learning_rate": 9.99614766095258e-05, "loss": 0.1074, "step": 2960 }, { "epoch": 2.1758241758241756, "grad_norm": 1.21875, "learning_rate": 9.996011323776809e-05, "loss": 0.0959, "step": 2970 }, { "epoch": 2.183150183150183, "grad_norm": 1.1484375, "learning_rate": 9.99587261680474e-05, "loss": 0.0872, "step": 2980 }, { "epoch": 2.1904761904761907, "grad_norm": 1.1015625, "learning_rate": 9.995731540102831e-05, "loss": 0.0834, "step": 2990 }, { "epoch": 2.197802197802198, "grad_norm": 1.453125, "learning_rate": 9.995588093738677e-05, "loss": 0.1022, "step": 3000 }, { "epoch": 2.2051282051282053, "grad_norm": 1.15625, "learning_rate": 9.995442277781009e-05, "loss": 0.0986, "step": 3010 }, { "epoch": 2.2124542124542126, "grad_norm": 1.1953125, "learning_rate": 9.99529409229969e-05, "loss": 0.1006, "step": 3020 }, { "epoch": 2.21978021978022, "grad_norm": 1.1484375, "learning_rate": 9.99514353736572e-05, "loss": 0.0887, "step": 3030 }, { "epoch": 2.227106227106227, "grad_norm": 1.25, "learning_rate": 9.994990613051238e-05, "loss": 0.0884, "step": 3040 }, { "epoch": 2.2344322344322345, "grad_norm": 1.0078125, "learning_rate": 9.994835319429515e-05, "loss": 0.097, "step": 3050 }, { "epoch": 2.241758241758242, "grad_norm": 1.4140625, "learning_rate": 9.994677656574954e-05, "loss": 0.0929, "step": 3060 }, { "epoch": 2.249084249084249, "grad_norm": 1.1328125, "learning_rate": 9.994517624563098e-05, "loss": 0.1004, "step": 3070 }, { "epoch": 2.2564102564102564, "grad_norm": 1.1796875, "learning_rate": 9.994355223470626e-05, "loss": 0.094, "step": 3080 }, { "epoch": 2.2637362637362637, "grad_norm": 1.125, "learning_rate": 9.994190453375347e-05, "loss": 0.1002, "step": 3090 }, { "epoch": 2.271062271062271, "grad_norm": 1.546875, "learning_rate": 9.994023314356207e-05, "loss": 0.0976, "step": 3100 }, { "epoch": 2.2783882783882783, "grad_norm": 1.03125, "learning_rate": 9.993853806493289e-05, "loss": 0.0961, "step": 3110 }, { "epoch": 2.2857142857142856, "grad_norm": 0.9296875, "learning_rate": 9.993681929867812e-05, "loss": 0.0903, "step": 3120 }, { "epoch": 2.293040293040293, "grad_norm": 1.3984375, "learning_rate": 9.993507684562124e-05, "loss": 0.0937, "step": 3130 }, { "epoch": 2.3003663003663, "grad_norm": 0.94921875, "learning_rate": 9.993331070659712e-05, "loss": 0.0932, "step": 3140 }, { "epoch": 2.3076923076923075, "grad_norm": 0.98828125, "learning_rate": 9.9931520882452e-05, "loss": 0.0879, "step": 3150 }, { "epoch": 2.315018315018315, "grad_norm": 1.1640625, "learning_rate": 9.992970737404344e-05, "loss": 0.0867, "step": 3160 }, { "epoch": 2.3223443223443225, "grad_norm": 0.94921875, "learning_rate": 9.992787018224032e-05, "loss": 0.0898, "step": 3170 }, { "epoch": 2.32967032967033, "grad_norm": 1.25, "learning_rate": 9.992600930792294e-05, "loss": 0.0873, "step": 3180 }, { "epoch": 2.336996336996337, "grad_norm": 1.6328125, "learning_rate": 9.992412475198287e-05, "loss": 0.0905, "step": 3190 }, { "epoch": 2.3443223443223444, "grad_norm": 1.6171875, "learning_rate": 9.992221651532311e-05, "loss": 0.1036, "step": 3200 }, { "epoch": 2.3516483516483517, "grad_norm": 1.0546875, "learning_rate": 9.99202845988579e-05, "loss": 0.0916, "step": 3210 }, { "epoch": 2.358974358974359, "grad_norm": 1.015625, "learning_rate": 9.991832900351291e-05, "loss": 0.0865, "step": 3220 }, { "epoch": 2.3663003663003663, "grad_norm": 0.9609375, "learning_rate": 9.991634973022515e-05, "loss": 0.0877, "step": 3230 }, { "epoch": 2.3736263736263736, "grad_norm": 0.81640625, "learning_rate": 9.991434677994293e-05, "loss": 0.0765, "step": 3240 }, { "epoch": 2.380952380952381, "grad_norm": 0.84765625, "learning_rate": 9.991232015362594e-05, "loss": 0.0781, "step": 3250 }, { "epoch": 2.3882783882783882, "grad_norm": 1.0078125, "learning_rate": 9.99102698522452e-05, "loss": 0.1029, "step": 3260 }, { "epoch": 2.3956043956043955, "grad_norm": 1.21875, "learning_rate": 9.990819587678308e-05, "loss": 0.0826, "step": 3270 }, { "epoch": 2.402930402930403, "grad_norm": 1.4140625, "learning_rate": 9.990609822823327e-05, "loss": 0.0811, "step": 3280 }, { "epoch": 2.41025641025641, "grad_norm": 1.03125, "learning_rate": 9.990397690760086e-05, "loss": 0.091, "step": 3290 }, { "epoch": 2.4175824175824174, "grad_norm": 1.078125, "learning_rate": 9.99018319159022e-05, "loss": 0.0785, "step": 3300 }, { "epoch": 2.4249084249084247, "grad_norm": 0.89453125, "learning_rate": 9.989966325416506e-05, "loss": 0.0786, "step": 3310 }, { "epoch": 2.4322344322344325, "grad_norm": 0.9609375, "learning_rate": 9.989747092342852e-05, "loss": 0.0817, "step": 3320 }, { "epoch": 2.4395604395604398, "grad_norm": 1.359375, "learning_rate": 9.989525492474297e-05, "loss": 0.0828, "step": 3330 }, { "epoch": 2.446886446886447, "grad_norm": 0.97265625, "learning_rate": 9.989301525917016e-05, "loss": 0.0801, "step": 3340 }, { "epoch": 2.4542124542124544, "grad_norm": 1.2890625, "learning_rate": 9.989075192778323e-05, "loss": 0.0926, "step": 3350 }, { "epoch": 2.4615384615384617, "grad_norm": 1.0546875, "learning_rate": 9.98884649316666e-05, "loss": 0.0766, "step": 3360 }, { "epoch": 2.468864468864469, "grad_norm": 1.09375, "learning_rate": 9.988615427191603e-05, "loss": 0.099, "step": 3370 }, { "epoch": 2.4761904761904763, "grad_norm": 1.1640625, "learning_rate": 9.988381994963864e-05, "loss": 0.0886, "step": 3380 }, { "epoch": 2.4835164835164836, "grad_norm": 1.0859375, "learning_rate": 9.988146196595289e-05, "loss": 0.0944, "step": 3390 }, { "epoch": 2.490842490842491, "grad_norm": 1.0234375, "learning_rate": 9.987908032198856e-05, "loss": 0.0923, "step": 3400 }, { "epoch": 2.498168498168498, "grad_norm": 1.015625, "learning_rate": 9.987667501888676e-05, "loss": 0.082, "step": 3410 }, { "epoch": 2.5054945054945055, "grad_norm": 0.9140625, "learning_rate": 9.987424605779998e-05, "loss": 0.0872, "step": 3420 }, { "epoch": 2.5128205128205128, "grad_norm": 0.984375, "learning_rate": 9.9871793439892e-05, "loss": 0.0866, "step": 3430 }, { "epoch": 2.52014652014652, "grad_norm": 1.515625, "learning_rate": 9.986931716633795e-05, "loss": 0.0816, "step": 3440 }, { "epoch": 2.5274725274725274, "grad_norm": 0.98046875, "learning_rate": 9.986681723832428e-05, "loss": 0.0814, "step": 3450 }, { "epoch": 2.5347985347985347, "grad_norm": 1.015625, "learning_rate": 9.98642936570488e-05, "loss": 0.0914, "step": 3460 }, { "epoch": 2.542124542124542, "grad_norm": 0.78125, "learning_rate": 9.986174642372064e-05, "loss": 0.0744, "step": 3470 }, { "epoch": 2.5494505494505493, "grad_norm": 1.1328125, "learning_rate": 9.985917553956028e-05, "loss": 0.0863, "step": 3480 }, { "epoch": 2.5567765567765566, "grad_norm": 0.87890625, "learning_rate": 9.985658100579948e-05, "loss": 0.0796, "step": 3490 }, { "epoch": 2.564102564102564, "grad_norm": 1.015625, "learning_rate": 9.985396282368139e-05, "loss": 0.0936, "step": 3500 }, { "epoch": 2.571428571428571, "grad_norm": 1.015625, "learning_rate": 9.985132099446045e-05, "loss": 0.0878, "step": 3510 }, { "epoch": 2.578754578754579, "grad_norm": 1.0390625, "learning_rate": 9.984865551940247e-05, "loss": 0.0778, "step": 3520 }, { "epoch": 2.586080586080586, "grad_norm": 1.0234375, "learning_rate": 9.984596639978455e-05, "loss": 0.0752, "step": 3530 }, { "epoch": 2.5934065934065935, "grad_norm": 1.1328125, "learning_rate": 9.984325363689514e-05, "loss": 0.0919, "step": 3540 }, { "epoch": 2.600732600732601, "grad_norm": 1.0390625, "learning_rate": 9.984051723203402e-05, "loss": 0.0847, "step": 3550 }, { "epoch": 2.608058608058608, "grad_norm": 1.015625, "learning_rate": 9.983775718651229e-05, "loss": 0.0823, "step": 3560 }, { "epoch": 2.6153846153846154, "grad_norm": 1.09375, "learning_rate": 9.983497350165236e-05, "loss": 0.0885, "step": 3570 }, { "epoch": 2.6227106227106227, "grad_norm": 0.96875, "learning_rate": 9.983216617878802e-05, "loss": 0.0796, "step": 3580 }, { "epoch": 2.63003663003663, "grad_norm": 0.9140625, "learning_rate": 9.98293352192643e-05, "loss": 0.0786, "step": 3590 }, { "epoch": 2.6373626373626373, "grad_norm": 1.0625, "learning_rate": 9.982648062443764e-05, "loss": 0.086, "step": 3600 }, { "epoch": 2.6446886446886446, "grad_norm": 0.88671875, "learning_rate": 9.982360239567579e-05, "loss": 0.0742, "step": 3610 }, { "epoch": 2.652014652014652, "grad_norm": 0.99609375, "learning_rate": 9.982070053435775e-05, "loss": 0.0808, "step": 3620 }, { "epoch": 2.659340659340659, "grad_norm": 0.9375, "learning_rate": 9.981777504187394e-05, "loss": 0.0837, "step": 3630 }, { "epoch": 2.6666666666666665, "grad_norm": 1.0390625, "learning_rate": 9.981482591962606e-05, "loss": 0.0789, "step": 3640 }, { "epoch": 2.6739926739926743, "grad_norm": 0.796875, "learning_rate": 9.98118531690271e-05, "loss": 0.0834, "step": 3650 }, { "epoch": 2.6813186813186816, "grad_norm": 0.6796875, "learning_rate": 9.98088567915014e-05, "loss": 0.072, "step": 3660 }, { "epoch": 2.688644688644689, "grad_norm": 0.85546875, "learning_rate": 9.980583678848467e-05, "loss": 0.0824, "step": 3670 }, { "epoch": 2.695970695970696, "grad_norm": 1.1640625, "learning_rate": 9.980279316142385e-05, "loss": 0.0864, "step": 3680 }, { "epoch": 2.7032967032967035, "grad_norm": 0.98828125, "learning_rate": 9.979972591177725e-05, "loss": 0.0866, "step": 3690 }, { "epoch": 2.7106227106227108, "grad_norm": 1.1640625, "learning_rate": 9.97966350410145e-05, "loss": 0.0835, "step": 3700 }, { "epoch": 2.717948717948718, "grad_norm": 0.86328125, "learning_rate": 9.979352055061654e-05, "loss": 0.0803, "step": 3710 }, { "epoch": 2.7252747252747254, "grad_norm": 0.96484375, "learning_rate": 9.979038244207561e-05, "loss": 0.0763, "step": 3720 }, { "epoch": 2.7326007326007327, "grad_norm": 0.9765625, "learning_rate": 9.97872207168953e-05, "loss": 0.0706, "step": 3730 }, { "epoch": 2.73992673992674, "grad_norm": 0.8984375, "learning_rate": 9.978403537659047e-05, "loss": 0.0784, "step": 3740 }, { "epoch": 2.7472527472527473, "grad_norm": 0.9375, "learning_rate": 9.978082642268733e-05, "loss": 0.0781, "step": 3750 }, { "epoch": 2.7545787545787546, "grad_norm": 0.89453125, "learning_rate": 9.977759385672342e-05, "loss": 0.0776, "step": 3760 }, { "epoch": 2.761904761904762, "grad_norm": 1.0390625, "learning_rate": 9.977433768024754e-05, "loss": 0.0912, "step": 3770 }, { "epoch": 2.769230769230769, "grad_norm": 0.85546875, "learning_rate": 9.977105789481984e-05, "loss": 0.0771, "step": 3780 }, { "epoch": 2.7765567765567765, "grad_norm": 1.078125, "learning_rate": 9.976775450201177e-05, "loss": 0.0733, "step": 3790 }, { "epoch": 2.7838827838827838, "grad_norm": 1.09375, "learning_rate": 9.97644275034061e-05, "loss": 0.0765, "step": 3800 }, { "epoch": 2.791208791208791, "grad_norm": 1.0234375, "learning_rate": 9.976107690059689e-05, "loss": 0.0726, "step": 3810 }, { "epoch": 2.7985347985347984, "grad_norm": 0.88671875, "learning_rate": 9.975770269518955e-05, "loss": 0.0814, "step": 3820 }, { "epoch": 2.8058608058608057, "grad_norm": 0.89453125, "learning_rate": 9.975430488880076e-05, "loss": 0.0846, "step": 3830 }, { "epoch": 2.813186813186813, "grad_norm": 0.8125, "learning_rate": 9.97508834830585e-05, "loss": 0.0721, "step": 3840 }, { "epoch": 2.8205128205128203, "grad_norm": 0.97265625, "learning_rate": 9.974743847960212e-05, "loss": 0.0787, "step": 3850 }, { "epoch": 2.8278388278388276, "grad_norm": 0.98046875, "learning_rate": 9.97439698800822e-05, "loss": 0.0781, "step": 3860 }, { "epoch": 2.8351648351648353, "grad_norm": 1.5078125, "learning_rate": 9.974047768616065e-05, "loss": 0.0809, "step": 3870 }, { "epoch": 2.8424908424908426, "grad_norm": 0.85546875, "learning_rate": 9.973696189951072e-05, "loss": 0.0779, "step": 3880 }, { "epoch": 2.84981684981685, "grad_norm": 0.8203125, "learning_rate": 9.973342252181696e-05, "loss": 0.0745, "step": 3890 }, { "epoch": 2.857142857142857, "grad_norm": 0.9765625, "learning_rate": 9.972985955477514e-05, "loss": 0.0748, "step": 3900 }, { "epoch": 2.8644688644688645, "grad_norm": 1.15625, "learning_rate": 9.972627300009246e-05, "loss": 0.0751, "step": 3910 }, { "epoch": 2.871794871794872, "grad_norm": 1.1328125, "learning_rate": 9.97226628594873e-05, "loss": 0.0755, "step": 3920 }, { "epoch": 2.879120879120879, "grad_norm": 0.83203125, "learning_rate": 9.971902913468944e-05, "loss": 0.0726, "step": 3930 }, { "epoch": 2.8864468864468864, "grad_norm": 1.15625, "learning_rate": 9.971537182743988e-05, "loss": 0.0786, "step": 3940 }, { "epoch": 2.8937728937728937, "grad_norm": 1.1328125, "learning_rate": 9.971169093949099e-05, "loss": 0.0812, "step": 3950 }, { "epoch": 2.901098901098901, "grad_norm": 0.9609375, "learning_rate": 9.970798647260639e-05, "loss": 0.0701, "step": 3960 }, { "epoch": 2.9084249084249083, "grad_norm": 0.9296875, "learning_rate": 9.970425842856099e-05, "loss": 0.0748, "step": 3970 }, { "epoch": 2.9157509157509156, "grad_norm": 0.94921875, "learning_rate": 9.970050680914105e-05, "loss": 0.0723, "step": 3980 }, { "epoch": 2.9230769230769234, "grad_norm": 0.69921875, "learning_rate": 9.96967316161441e-05, "loss": 0.0721, "step": 3990 }, { "epoch": 2.9304029304029307, "grad_norm": 0.984375, "learning_rate": 9.969293285137891e-05, "loss": 0.0798, "step": 4000 }, { "epoch": 2.937728937728938, "grad_norm": 1.2421875, "learning_rate": 9.968911051666566e-05, "loss": 0.0733, "step": 4010 }, { "epoch": 2.9450549450549453, "grad_norm": 1.09375, "learning_rate": 9.968526461383569e-05, "loss": 0.0693, "step": 4020 }, { "epoch": 2.9523809523809526, "grad_norm": 1.0390625, "learning_rate": 9.968139514473174e-05, "loss": 0.0814, "step": 4030 }, { "epoch": 2.95970695970696, "grad_norm": 0.921875, "learning_rate": 9.967750211120778e-05, "loss": 0.0834, "step": 4040 }, { "epoch": 2.967032967032967, "grad_norm": 0.75390625, "learning_rate": 9.96735855151291e-05, "loss": 0.0699, "step": 4050 }, { "epoch": 2.9743589743589745, "grad_norm": 1.1171875, "learning_rate": 9.966964535837229e-05, "loss": 0.0783, "step": 4060 }, { "epoch": 2.9816849816849818, "grad_norm": 0.8125, "learning_rate": 9.966568164282517e-05, "loss": 0.0795, "step": 4070 }, { "epoch": 2.989010989010989, "grad_norm": 0.890625, "learning_rate": 9.96616943703869e-05, "loss": 0.0681, "step": 4080 }, { "epoch": 2.9963369963369964, "grad_norm": 0.91796875, "learning_rate": 9.965768354296791e-05, "loss": 0.07, "step": 4090 }, { "epoch": 3.0036630036630036, "grad_norm": 0.89453125, "learning_rate": 9.965364916248993e-05, "loss": 0.0773, "step": 4100 }, { "epoch": 3.010989010989011, "grad_norm": 1.125, "learning_rate": 9.964959123088598e-05, "loss": 0.0727, "step": 4110 }, { "epoch": 3.0183150183150182, "grad_norm": 1.0234375, "learning_rate": 9.96455097501003e-05, "loss": 0.0703, "step": 4120 }, { "epoch": 3.0256410256410255, "grad_norm": 1.5546875, "learning_rate": 9.964140472208849e-05, "loss": 0.0756, "step": 4130 }, { "epoch": 3.032967032967033, "grad_norm": 1.2109375, "learning_rate": 9.963727614881742e-05, "loss": 0.0756, "step": 4140 }, { "epoch": 3.04029304029304, "grad_norm": 0.8671875, "learning_rate": 9.963312403226518e-05, "loss": 0.0656, "step": 4150 }, { "epoch": 3.0476190476190474, "grad_norm": 0.70703125, "learning_rate": 9.962894837442123e-05, "loss": 0.0585, "step": 4160 }, { "epoch": 3.0549450549450547, "grad_norm": 0.98046875, "learning_rate": 9.962474917728624e-05, "loss": 0.0732, "step": 4170 }, { "epoch": 3.062271062271062, "grad_norm": 0.95703125, "learning_rate": 9.96205264428722e-05, "loss": 0.0681, "step": 4180 }, { "epoch": 3.06959706959707, "grad_norm": 0.9140625, "learning_rate": 9.961628017320234e-05, "loss": 0.0701, "step": 4190 }, { "epoch": 3.076923076923077, "grad_norm": 0.75, "learning_rate": 9.961201037031119e-05, "loss": 0.0623, "step": 4200 }, { "epoch": 3.0842490842490844, "grad_norm": 0.97265625, "learning_rate": 9.960771703624457e-05, "loss": 0.0629, "step": 4210 }, { "epoch": 3.0915750915750917, "grad_norm": 0.87890625, "learning_rate": 9.960340017305954e-05, "loss": 0.0636, "step": 4220 }, { "epoch": 3.098901098901099, "grad_norm": 0.98828125, "learning_rate": 9.959905978282446e-05, "loss": 0.0623, "step": 4230 }, { "epoch": 3.1062271062271063, "grad_norm": 0.80859375, "learning_rate": 9.959469586761898e-05, "loss": 0.0707, "step": 4240 }, { "epoch": 3.1135531135531136, "grad_norm": 1.5, "learning_rate": 9.959030842953393e-05, "loss": 0.0719, "step": 4250 }, { "epoch": 3.120879120879121, "grad_norm": 0.7734375, "learning_rate": 9.958589747067151e-05, "loss": 0.0726, "step": 4260 }, { "epoch": 3.128205128205128, "grad_norm": 0.78125, "learning_rate": 9.958146299314519e-05, "loss": 0.0664, "step": 4270 }, { "epoch": 3.1355311355311355, "grad_norm": 0.86328125, "learning_rate": 9.957700499907961e-05, "loss": 0.067, "step": 4280 }, { "epoch": 3.142857142857143, "grad_norm": 0.984375, "learning_rate": 9.957252349061078e-05, "loss": 0.063, "step": 4290 }, { "epoch": 3.15018315018315, "grad_norm": 1.1796875, "learning_rate": 9.956801846988593e-05, "loss": 0.0729, "step": 4300 }, { "epoch": 3.1575091575091574, "grad_norm": 1.03125, "learning_rate": 9.956348993906358e-05, "loss": 0.0642, "step": 4310 }, { "epoch": 3.1648351648351647, "grad_norm": 0.80859375, "learning_rate": 9.955893790031346e-05, "loss": 0.0669, "step": 4320 }, { "epoch": 3.172161172161172, "grad_norm": 0.8828125, "learning_rate": 9.955436235581662e-05, "loss": 0.0639, "step": 4330 }, { "epoch": 3.1794871794871793, "grad_norm": 0.83984375, "learning_rate": 9.954976330776536e-05, "loss": 0.0666, "step": 4340 }, { "epoch": 3.186813186813187, "grad_norm": 1.0234375, "learning_rate": 9.954514075836323e-05, "loss": 0.065, "step": 4350 }, { "epoch": 3.1941391941391943, "grad_norm": 0.984375, "learning_rate": 9.954049470982505e-05, "loss": 0.0611, "step": 4360 }, { "epoch": 3.2014652014652016, "grad_norm": 0.7421875, "learning_rate": 9.953582516437688e-05, "loss": 0.0633, "step": 4370 }, { "epoch": 3.208791208791209, "grad_norm": 0.84375, "learning_rate": 9.953113212425608e-05, "loss": 0.0633, "step": 4380 }, { "epoch": 3.2161172161172162, "grad_norm": 1.046875, "learning_rate": 9.95264155917112e-05, "loss": 0.0708, "step": 4390 }, { "epoch": 3.2234432234432235, "grad_norm": 1.6796875, "learning_rate": 9.952167556900212e-05, "loss": 0.0792, "step": 4400 }, { "epoch": 3.230769230769231, "grad_norm": 0.953125, "learning_rate": 9.951691205839992e-05, "loss": 0.0674, "step": 4410 }, { "epoch": 3.238095238095238, "grad_norm": 1.03125, "learning_rate": 9.951212506218696e-05, "loss": 0.0759, "step": 4420 }, { "epoch": 3.2454212454212454, "grad_norm": 0.8046875, "learning_rate": 9.950731458265685e-05, "loss": 0.067, "step": 4430 }, { "epoch": 3.2527472527472527, "grad_norm": 1.5, "learning_rate": 9.950248062211445e-05, "loss": 0.0733, "step": 4440 }, { "epoch": 3.26007326007326, "grad_norm": 0.8984375, "learning_rate": 9.949762318287587e-05, "loss": 0.0628, "step": 4450 }, { "epoch": 3.2673992673992673, "grad_norm": 0.80859375, "learning_rate": 9.949274226726844e-05, "loss": 0.0684, "step": 4460 }, { "epoch": 3.2747252747252746, "grad_norm": 1.0859375, "learning_rate": 9.948783787763082e-05, "loss": 0.0659, "step": 4470 }, { "epoch": 3.282051282051282, "grad_norm": 0.87890625, "learning_rate": 9.948291001631283e-05, "loss": 0.0638, "step": 4480 }, { "epoch": 3.2893772893772892, "grad_norm": 0.90234375, "learning_rate": 9.947795868567556e-05, "loss": 0.0655, "step": 4490 }, { "epoch": 3.2967032967032965, "grad_norm": 0.68359375, "learning_rate": 9.947298388809139e-05, "loss": 0.0628, "step": 4500 }, { "epoch": 3.304029304029304, "grad_norm": 0.99609375, "learning_rate": 9.946798562594389e-05, "loss": 0.0764, "step": 4510 }, { "epoch": 3.311355311355311, "grad_norm": 1.03125, "learning_rate": 9.946296390162786e-05, "loss": 0.0635, "step": 4520 }, { "epoch": 3.3186813186813184, "grad_norm": 0.93359375, "learning_rate": 9.945791871754943e-05, "loss": 0.0709, "step": 4530 }, { "epoch": 3.326007326007326, "grad_norm": 0.8828125, "learning_rate": 9.945285007612586e-05, "loss": 0.0633, "step": 4540 }, { "epoch": 3.3333333333333335, "grad_norm": 0.75390625, "learning_rate": 9.944775797978575e-05, "loss": 0.0622, "step": 4550 }, { "epoch": 3.340659340659341, "grad_norm": 0.80859375, "learning_rate": 9.944264243096887e-05, "loss": 0.0602, "step": 4560 }, { "epoch": 3.347985347985348, "grad_norm": 0.8515625, "learning_rate": 9.943750343212624e-05, "loss": 0.0646, "step": 4570 }, { "epoch": 3.3553113553113554, "grad_norm": 0.97265625, "learning_rate": 9.943234098572013e-05, "loss": 0.0689, "step": 4580 }, { "epoch": 3.3626373626373627, "grad_norm": 0.76171875, "learning_rate": 9.942715509422404e-05, "loss": 0.0647, "step": 4590 }, { "epoch": 3.36996336996337, "grad_norm": 0.8984375, "learning_rate": 9.942194576012269e-05, "loss": 0.0682, "step": 4600 }, { "epoch": 3.3772893772893773, "grad_norm": 0.8515625, "learning_rate": 9.941671298591208e-05, "loss": 0.0671, "step": 4610 }, { "epoch": 3.3846153846153846, "grad_norm": 0.91015625, "learning_rate": 9.941145677409934e-05, "loss": 0.0714, "step": 4620 }, { "epoch": 3.391941391941392, "grad_norm": 0.97265625, "learning_rate": 9.940617712720294e-05, "loss": 0.068, "step": 4630 }, { "epoch": 3.399267399267399, "grad_norm": 0.8828125, "learning_rate": 9.940087404775251e-05, "loss": 0.0691, "step": 4640 }, { "epoch": 3.4065934065934065, "grad_norm": 0.94140625, "learning_rate": 9.939554753828895e-05, "loss": 0.0719, "step": 4650 }, { "epoch": 3.413919413919414, "grad_norm": 0.98828125, "learning_rate": 9.939019760136437e-05, "loss": 0.0773, "step": 4660 }, { "epoch": 3.421245421245421, "grad_norm": 1.0, "learning_rate": 9.938482423954207e-05, "loss": 0.0693, "step": 4670 }, { "epoch": 3.4285714285714284, "grad_norm": 0.6875, "learning_rate": 9.93794274553966e-05, "loss": 0.0572, "step": 4680 }, { "epoch": 3.435897435897436, "grad_norm": 0.7890625, "learning_rate": 9.937400725151376e-05, "loss": 0.0706, "step": 4690 }, { "epoch": 3.4432234432234434, "grad_norm": 0.97265625, "learning_rate": 9.936856363049057e-05, "loss": 0.0685, "step": 4700 }, { "epoch": 3.4505494505494507, "grad_norm": 0.95703125, "learning_rate": 9.93630965949352e-05, "loss": 0.0622, "step": 4710 }, { "epoch": 3.457875457875458, "grad_norm": 0.8359375, "learning_rate": 9.935760614746712e-05, "loss": 0.0644, "step": 4720 }, { "epoch": 3.4652014652014653, "grad_norm": 0.875, "learning_rate": 9.935209229071696e-05, "loss": 0.0631, "step": 4730 }, { "epoch": 3.4725274725274726, "grad_norm": 0.86328125, "learning_rate": 9.93465550273266e-05, "loss": 0.0606, "step": 4740 }, { "epoch": 3.47985347985348, "grad_norm": 0.921875, "learning_rate": 9.934099435994912e-05, "loss": 0.0627, "step": 4750 }, { "epoch": 3.4871794871794872, "grad_norm": 0.75390625, "learning_rate": 9.933541029124883e-05, "loss": 0.0646, "step": 4760 }, { "epoch": 3.4945054945054945, "grad_norm": 0.78125, "learning_rate": 9.932980282390123e-05, "loss": 0.0661, "step": 4770 }, { "epoch": 3.501831501831502, "grad_norm": 0.74609375, "learning_rate": 9.932417196059305e-05, "loss": 0.0682, "step": 4780 }, { "epoch": 3.509157509157509, "grad_norm": 0.921875, "learning_rate": 9.93185177040222e-05, "loss": 0.062, "step": 4790 }, { "epoch": 3.5164835164835164, "grad_norm": 0.8671875, "learning_rate": 9.931284005689784e-05, "loss": 0.066, "step": 4800 }, { "epoch": 3.5238095238095237, "grad_norm": 0.8671875, "learning_rate": 9.930713902194033e-05, "loss": 0.0603, "step": 4810 }, { "epoch": 3.531135531135531, "grad_norm": 0.81640625, "learning_rate": 9.930141460188119e-05, "loss": 0.0593, "step": 4820 }, { "epoch": 3.5384615384615383, "grad_norm": 1.15625, "learning_rate": 9.92956667994632e-05, "loss": 0.0686, "step": 4830 }, { "epoch": 3.5457875457875456, "grad_norm": 0.83203125, "learning_rate": 9.928989561744031e-05, "loss": 0.0748, "step": 4840 }, { "epoch": 3.553113553113553, "grad_norm": 0.9375, "learning_rate": 9.928410105857768e-05, "loss": 0.064, "step": 4850 }, { "epoch": 3.5604395604395602, "grad_norm": 0.95703125, "learning_rate": 9.927828312565167e-05, "loss": 0.0593, "step": 4860 }, { "epoch": 3.5677655677655675, "grad_norm": 0.77734375, "learning_rate": 9.927244182144988e-05, "loss": 0.0615, "step": 4870 }, { "epoch": 3.575091575091575, "grad_norm": 0.953125, "learning_rate": 9.926657714877103e-05, "loss": 0.0706, "step": 4880 }, { "epoch": 3.5824175824175826, "grad_norm": 1.203125, "learning_rate": 9.926068911042509e-05, "loss": 0.0642, "step": 4890 }, { "epoch": 3.58974358974359, "grad_norm": 1.078125, "learning_rate": 9.92547777092332e-05, "loss": 0.0655, "step": 4900 }, { "epoch": 3.597069597069597, "grad_norm": 0.6953125, "learning_rate": 9.924884294802773e-05, "loss": 0.0642, "step": 4910 }, { "epoch": 3.6043956043956045, "grad_norm": 0.67578125, "learning_rate": 9.924288482965221e-05, "loss": 0.0573, "step": 4920 }, { "epoch": 3.6117216117216118, "grad_norm": 1.2109375, "learning_rate": 9.923690335696136e-05, "loss": 0.0696, "step": 4930 }, { "epoch": 3.619047619047619, "grad_norm": 0.984375, "learning_rate": 9.923089853282112e-05, "loss": 0.0639, "step": 4940 }, { "epoch": 3.6263736263736264, "grad_norm": 0.7890625, "learning_rate": 9.922487036010858e-05, "loss": 0.0637, "step": 4950 }, { "epoch": 3.6336996336996337, "grad_norm": 0.70703125, "learning_rate": 9.921881884171203e-05, "loss": 0.0582, "step": 4960 }, { "epoch": 3.641025641025641, "grad_norm": 0.95703125, "learning_rate": 9.921274398053096e-05, "loss": 0.0655, "step": 4970 }, { "epoch": 3.6483516483516483, "grad_norm": 0.97265625, "learning_rate": 9.920664577947605e-05, "loss": 0.0727, "step": 4980 }, { "epoch": 3.6556776556776556, "grad_norm": 1.046875, "learning_rate": 9.920052424146914e-05, "loss": 0.0653, "step": 4990 }, { "epoch": 3.663003663003663, "grad_norm": 1.0234375, "learning_rate": 9.919437936944323e-05, "loss": 0.0644, "step": 5000 }, { "epoch": 3.67032967032967, "grad_norm": 0.77734375, "learning_rate": 9.918821116634258e-05, "loss": 0.0621, "step": 5010 }, { "epoch": 3.677655677655678, "grad_norm": 1.0078125, "learning_rate": 9.918201963512253e-05, "loss": 0.0634, "step": 5020 }, { "epoch": 3.684981684981685, "grad_norm": 0.7265625, "learning_rate": 9.917580477874969e-05, "loss": 0.0614, "step": 5030 }, { "epoch": 3.6923076923076925, "grad_norm": 0.8046875, "learning_rate": 9.916956660020177e-05, "loss": 0.0589, "step": 5040 }, { "epoch": 3.6996336996337, "grad_norm": 0.7421875, "learning_rate": 9.916330510246769e-05, "loss": 0.0628, "step": 5050 }, { "epoch": 3.706959706959707, "grad_norm": 1.015625, "learning_rate": 9.915702028854755e-05, "loss": 0.0597, "step": 5060 }, { "epoch": 3.7142857142857144, "grad_norm": 0.71875, "learning_rate": 9.915071216145262e-05, "loss": 0.0539, "step": 5070 }, { "epoch": 3.7216117216117217, "grad_norm": 0.9609375, "learning_rate": 9.91443807242053e-05, "loss": 0.0654, "step": 5080 }, { "epoch": 3.728937728937729, "grad_norm": 0.80859375, "learning_rate": 9.913802597983921e-05, "loss": 0.0654, "step": 5090 }, { "epoch": 3.7362637362637363, "grad_norm": 0.8046875, "learning_rate": 9.913164793139911e-05, "loss": 0.0638, "step": 5100 }, { "epoch": 3.7435897435897436, "grad_norm": 0.7421875, "learning_rate": 9.912524658194094e-05, "loss": 0.0666, "step": 5110 }, { "epoch": 3.750915750915751, "grad_norm": 0.8359375, "learning_rate": 9.911882193453178e-05, "loss": 0.0763, "step": 5120 }, { "epoch": 3.758241758241758, "grad_norm": 0.83984375, "learning_rate": 9.911237399224991e-05, "loss": 0.0623, "step": 5130 }, { "epoch": 3.7655677655677655, "grad_norm": 0.84375, "learning_rate": 9.910590275818476e-05, "loss": 0.0577, "step": 5140 }, { "epoch": 3.772893772893773, "grad_norm": 1.015625, "learning_rate": 9.909940823543686e-05, "loss": 0.0587, "step": 5150 }, { "epoch": 3.78021978021978, "grad_norm": 1.109375, "learning_rate": 9.909289042711799e-05, "loss": 0.0657, "step": 5160 }, { "epoch": 3.7875457875457874, "grad_norm": 0.8359375, "learning_rate": 9.908634933635103e-05, "loss": 0.0523, "step": 5170 }, { "epoch": 3.7948717948717947, "grad_norm": 0.70703125, "learning_rate": 9.907978496627005e-05, "loss": 0.0564, "step": 5180 }, { "epoch": 3.802197802197802, "grad_norm": 1.0859375, "learning_rate": 9.907319732002025e-05, "loss": 0.0619, "step": 5190 }, { "epoch": 3.8095238095238093, "grad_norm": 1.0546875, "learning_rate": 9.906658640075796e-05, "loss": 0.0691, "step": 5200 }, { "epoch": 3.8168498168498166, "grad_norm": 0.98046875, "learning_rate": 9.905995221165073e-05, "loss": 0.0618, "step": 5210 }, { "epoch": 3.824175824175824, "grad_norm": 0.6796875, "learning_rate": 9.905329475587718e-05, "loss": 0.0617, "step": 5220 }, { "epoch": 3.8315018315018317, "grad_norm": 0.8203125, "learning_rate": 9.904661403662713e-05, "loss": 0.0659, "step": 5230 }, { "epoch": 3.838827838827839, "grad_norm": 0.82421875, "learning_rate": 9.903991005710153e-05, "loss": 0.0625, "step": 5240 }, { "epoch": 3.8461538461538463, "grad_norm": 0.953125, "learning_rate": 9.90331828205125e-05, "loss": 0.0605, "step": 5250 }, { "epoch": 3.8534798534798536, "grad_norm": 0.69140625, "learning_rate": 9.902643233008324e-05, "loss": 0.064, "step": 5260 }, { "epoch": 3.860805860805861, "grad_norm": 0.70703125, "learning_rate": 9.901965858904816e-05, "loss": 0.0654, "step": 5270 }, { "epoch": 3.868131868131868, "grad_norm": 0.8203125, "learning_rate": 9.901286160065278e-05, "loss": 0.0642, "step": 5280 }, { "epoch": 3.8754578754578755, "grad_norm": 0.6953125, "learning_rate": 9.900604136815374e-05, "loss": 0.0643, "step": 5290 }, { "epoch": 3.8827838827838828, "grad_norm": 1.015625, "learning_rate": 9.899919789481886e-05, "loss": 0.0626, "step": 5300 }, { "epoch": 3.89010989010989, "grad_norm": 0.94921875, "learning_rate": 9.899233118392706e-05, "loss": 0.0597, "step": 5310 }, { "epoch": 3.8974358974358974, "grad_norm": 0.86328125, "learning_rate": 9.898544123876841e-05, "loss": 0.0644, "step": 5320 }, { "epoch": 3.9047619047619047, "grad_norm": 0.765625, "learning_rate": 9.89785280626441e-05, "loss": 0.0538, "step": 5330 }, { "epoch": 3.912087912087912, "grad_norm": 1.046875, "learning_rate": 9.897159165886648e-05, "loss": 0.0653, "step": 5340 }, { "epoch": 3.9194139194139193, "grad_norm": 0.8203125, "learning_rate": 9.896463203075898e-05, "loss": 0.0618, "step": 5350 }, { "epoch": 3.926739926739927, "grad_norm": 0.70703125, "learning_rate": 9.895764918165621e-05, "loss": 0.0624, "step": 5360 }, { "epoch": 3.9340659340659343, "grad_norm": 0.9296875, "learning_rate": 9.895064311490388e-05, "loss": 0.0672, "step": 5370 }, { "epoch": 3.9413919413919416, "grad_norm": 0.875, "learning_rate": 9.89436138338588e-05, "loss": 0.0611, "step": 5380 }, { "epoch": 3.948717948717949, "grad_norm": 0.7734375, "learning_rate": 9.893656134188896e-05, "loss": 0.0587, "step": 5390 }, { "epoch": 3.956043956043956, "grad_norm": 0.73046875, "learning_rate": 9.892948564237346e-05, "loss": 0.0589, "step": 5400 }, { "epoch": 3.9633699633699635, "grad_norm": 0.9140625, "learning_rate": 9.892238673870245e-05, "loss": 0.0638, "step": 5410 }, { "epoch": 3.970695970695971, "grad_norm": 0.91796875, "learning_rate": 9.891526463427724e-05, "loss": 0.057, "step": 5420 }, { "epoch": 3.978021978021978, "grad_norm": 1.0703125, "learning_rate": 9.890811933251033e-05, "loss": 0.0632, "step": 5430 }, { "epoch": 3.9853479853479854, "grad_norm": 0.78125, "learning_rate": 9.890095083682523e-05, "loss": 0.0563, "step": 5440 }, { "epoch": 3.9926739926739927, "grad_norm": 1.2734375, "learning_rate": 9.889375915065658e-05, "loss": 0.0701, "step": 5450 }, { "epoch": 4.0, "grad_norm": 0.58984375, "learning_rate": 9.888654427745016e-05, "loss": 0.0589, "step": 5460 }, { "epoch": 4.007326007326007, "grad_norm": 0.66015625, "learning_rate": 9.88793062206629e-05, "loss": 0.0558, "step": 5470 }, { "epoch": 4.014652014652015, "grad_norm": 0.7421875, "learning_rate": 9.887204498376275e-05, "loss": 0.0575, "step": 5480 }, { "epoch": 4.021978021978022, "grad_norm": 0.63671875, "learning_rate": 9.88647605702288e-05, "loss": 0.0494, "step": 5490 }, { "epoch": 4.029304029304029, "grad_norm": 0.86328125, "learning_rate": 9.885745298355128e-05, "loss": 0.0537, "step": 5500 }, { "epoch": 4.0366300366300365, "grad_norm": 0.8671875, "learning_rate": 9.885012222723147e-05, "loss": 0.0585, "step": 5510 }, { "epoch": 4.043956043956044, "grad_norm": 0.66796875, "learning_rate": 9.88427683047818e-05, "loss": 0.0515, "step": 5520 }, { "epoch": 4.051282051282051, "grad_norm": 0.8515625, "learning_rate": 9.883539121972573e-05, "loss": 0.0621, "step": 5530 }, { "epoch": 4.058608058608058, "grad_norm": 0.5, "learning_rate": 9.882799097559793e-05, "loss": 0.0538, "step": 5540 }, { "epoch": 4.065934065934066, "grad_norm": 0.86328125, "learning_rate": 9.882056757594405e-05, "loss": 0.0503, "step": 5550 }, { "epoch": 4.073260073260073, "grad_norm": 0.734375, "learning_rate": 9.88131210243209e-05, "loss": 0.054, "step": 5560 }, { "epoch": 4.08058608058608, "grad_norm": 0.9375, "learning_rate": 9.880565132429638e-05, "loss": 0.0562, "step": 5570 }, { "epoch": 4.087912087912088, "grad_norm": 0.7421875, "learning_rate": 9.879815847944944e-05, "loss": 0.0577, "step": 5580 }, { "epoch": 4.095238095238095, "grad_norm": 0.83203125, "learning_rate": 9.879064249337016e-05, "loss": 0.0571, "step": 5590 }, { "epoch": 4.102564102564102, "grad_norm": 1.078125, "learning_rate": 9.878310336965971e-05, "loss": 0.0606, "step": 5600 }, { "epoch": 4.1098901098901095, "grad_norm": 0.703125, "learning_rate": 9.87755411119303e-05, "loss": 0.0581, "step": 5610 }, { "epoch": 4.117216117216117, "grad_norm": 0.8828125, "learning_rate": 9.876795572380528e-05, "loss": 0.06, "step": 5620 }, { "epoch": 4.124542124542124, "grad_norm": 0.6171875, "learning_rate": 9.876034720891907e-05, "loss": 0.0555, "step": 5630 }, { "epoch": 4.131868131868132, "grad_norm": 0.734375, "learning_rate": 9.875271557091713e-05, "loss": 0.0628, "step": 5640 }, { "epoch": 4.13919413919414, "grad_norm": 0.7265625, "learning_rate": 9.874506081345604e-05, "loss": 0.0491, "step": 5650 }, { "epoch": 4.146520146520147, "grad_norm": 0.89453125, "learning_rate": 9.873738294020346e-05, "loss": 0.057, "step": 5660 }, { "epoch": 4.153846153846154, "grad_norm": 0.6796875, "learning_rate": 9.872968195483808e-05, "loss": 0.0546, "step": 5670 }, { "epoch": 4.1611721611721615, "grad_norm": 0.6328125, "learning_rate": 9.872195786104972e-05, "loss": 0.0521, "step": 5680 }, { "epoch": 4.168498168498169, "grad_norm": 0.953125, "learning_rate": 9.871421066253924e-05, "loss": 0.0628, "step": 5690 }, { "epoch": 4.175824175824176, "grad_norm": 0.5390625, "learning_rate": 9.870644036301857e-05, "loss": 0.0501, "step": 5700 }, { "epoch": 4.183150183150183, "grad_norm": 0.8984375, "learning_rate": 9.869864696621074e-05, "loss": 0.0615, "step": 5710 }, { "epoch": 4.190476190476191, "grad_norm": 1.078125, "learning_rate": 9.86908304758498e-05, "loss": 0.0666, "step": 5720 }, { "epoch": 4.197802197802198, "grad_norm": 0.8125, "learning_rate": 9.868299089568086e-05, "loss": 0.0633, "step": 5730 }, { "epoch": 4.205128205128205, "grad_norm": 0.7578125, "learning_rate": 9.867512822946017e-05, "loss": 0.0514, "step": 5740 }, { "epoch": 4.212454212454213, "grad_norm": 0.59375, "learning_rate": 9.866724248095497e-05, "loss": 0.0529, "step": 5750 }, { "epoch": 4.21978021978022, "grad_norm": 0.64453125, "learning_rate": 9.865933365394358e-05, "loss": 0.0542, "step": 5760 }, { "epoch": 4.227106227106227, "grad_norm": 0.734375, "learning_rate": 9.865140175221539e-05, "loss": 0.0596, "step": 5770 }, { "epoch": 4.2344322344322345, "grad_norm": 0.6640625, "learning_rate": 9.864344677957082e-05, "loss": 0.0613, "step": 5780 }, { "epoch": 4.241758241758242, "grad_norm": 0.78125, "learning_rate": 9.863546873982136e-05, "loss": 0.0553, "step": 5790 }, { "epoch": 4.249084249084249, "grad_norm": 0.7734375, "learning_rate": 9.862746763678956e-05, "loss": 0.0551, "step": 5800 }, { "epoch": 4.256410256410256, "grad_norm": 0.7265625, "learning_rate": 9.8619443474309e-05, "loss": 0.0548, "step": 5810 }, { "epoch": 4.263736263736264, "grad_norm": 0.98828125, "learning_rate": 9.861139625622433e-05, "loss": 0.0562, "step": 5820 }, { "epoch": 4.271062271062271, "grad_norm": 0.98828125, "learning_rate": 9.860332598639123e-05, "loss": 0.0604, "step": 5830 }, { "epoch": 4.278388278388278, "grad_norm": 1.2890625, "learning_rate": 9.859523266867643e-05, "loss": 0.0578, "step": 5840 }, { "epoch": 4.285714285714286, "grad_norm": 0.86328125, "learning_rate": 9.858711630695771e-05, "loss": 0.066, "step": 5850 }, { "epoch": 4.293040293040293, "grad_norm": 0.7109375, "learning_rate": 9.857897690512387e-05, "loss": 0.0584, "step": 5860 }, { "epoch": 4.3003663003663, "grad_norm": 0.625, "learning_rate": 9.85708144670748e-05, "loss": 0.0526, "step": 5870 }, { "epoch": 4.3076923076923075, "grad_norm": 1.09375, "learning_rate": 9.856262899672135e-05, "loss": 0.0718, "step": 5880 }, { "epoch": 4.315018315018315, "grad_norm": 0.88671875, "learning_rate": 9.855442049798545e-05, "loss": 0.0583, "step": 5890 }, { "epoch": 4.322344322344322, "grad_norm": 0.58203125, "learning_rate": 9.854618897480011e-05, "loss": 0.055, "step": 5900 }, { "epoch": 4.329670329670329, "grad_norm": 0.65234375, "learning_rate": 9.853793443110927e-05, "loss": 0.0594, "step": 5910 }, { "epoch": 4.336996336996337, "grad_norm": 0.83984375, "learning_rate": 9.852965687086797e-05, "loss": 0.062, "step": 5920 }, { "epoch": 4.344322344322344, "grad_norm": 0.7265625, "learning_rate": 9.852135629804227e-05, "loss": 0.0524, "step": 5930 }, { "epoch": 4.351648351648351, "grad_norm": 0.62890625, "learning_rate": 9.851303271660924e-05, "loss": 0.0615, "step": 5940 }, { "epoch": 4.358974358974359, "grad_norm": 0.87109375, "learning_rate": 9.850468613055698e-05, "loss": 0.0529, "step": 5950 }, { "epoch": 4.366300366300366, "grad_norm": 0.80078125, "learning_rate": 9.849631654388461e-05, "loss": 0.051, "step": 5960 }, { "epoch": 4.373626373626374, "grad_norm": 0.6015625, "learning_rate": 9.848792396060228e-05, "loss": 0.0498, "step": 5970 }, { "epoch": 4.380952380952381, "grad_norm": 0.921875, "learning_rate": 9.847950838473114e-05, "loss": 0.0566, "step": 5980 }, { "epoch": 4.388278388278389, "grad_norm": 0.75390625, "learning_rate": 9.847106982030337e-05, "loss": 0.0507, "step": 5990 }, { "epoch": 4.395604395604396, "grad_norm": 0.875, "learning_rate": 9.846260827136219e-05, "loss": 0.0586, "step": 6000 }, { "epoch": 4.402930402930403, "grad_norm": 0.8984375, "learning_rate": 9.845412374196178e-05, "loss": 0.0547, "step": 6010 }, { "epoch": 4.410256410256411, "grad_norm": 0.5703125, "learning_rate": 9.844561623616736e-05, "loss": 0.0544, "step": 6020 }, { "epoch": 4.417582417582418, "grad_norm": 0.6875, "learning_rate": 9.843708575805516e-05, "loss": 0.0501, "step": 6030 }, { "epoch": 4.424908424908425, "grad_norm": 0.52734375, "learning_rate": 9.842853231171241e-05, "loss": 0.0608, "step": 6040 }, { "epoch": 4.4322344322344325, "grad_norm": 0.80078125, "learning_rate": 9.841995590123734e-05, "loss": 0.0489, "step": 6050 }, { "epoch": 4.43956043956044, "grad_norm": 0.72265625, "learning_rate": 9.84113565307392e-05, "loss": 0.0542, "step": 6060 }, { "epoch": 4.446886446886447, "grad_norm": 0.75, "learning_rate": 9.840273420433822e-05, "loss": 0.053, "step": 6070 }, { "epoch": 4.454212454212454, "grad_norm": 0.82421875, "learning_rate": 9.839408892616566e-05, "loss": 0.0615, "step": 6080 }, { "epoch": 4.461538461538462, "grad_norm": 0.75, "learning_rate": 9.838542070036373e-05, "loss": 0.0564, "step": 6090 }, { "epoch": 4.468864468864469, "grad_norm": 0.83203125, "learning_rate": 9.837672953108568e-05, "loss": 0.048, "step": 6100 }, { "epoch": 4.476190476190476, "grad_norm": 0.5625, "learning_rate": 9.836801542249574e-05, "loss": 0.0503, "step": 6110 }, { "epoch": 4.483516483516484, "grad_norm": 0.68359375, "learning_rate": 9.835927837876909e-05, "loss": 0.0574, "step": 6120 }, { "epoch": 4.490842490842491, "grad_norm": 0.546875, "learning_rate": 9.835051840409198e-05, "loss": 0.0576, "step": 6130 }, { "epoch": 4.498168498168498, "grad_norm": 0.63671875, "learning_rate": 9.834173550266157e-05, "loss": 0.0497, "step": 6140 }, { "epoch": 4.5054945054945055, "grad_norm": 0.72265625, "learning_rate": 9.833292967868604e-05, "loss": 0.0539, "step": 6150 }, { "epoch": 4.512820512820513, "grad_norm": 0.6015625, "learning_rate": 9.832410093638455e-05, "loss": 0.0597, "step": 6160 }, { "epoch": 4.52014652014652, "grad_norm": 0.7421875, "learning_rate": 9.831524927998726e-05, "loss": 0.059, "step": 6170 }, { "epoch": 4.527472527472527, "grad_norm": 0.88671875, "learning_rate": 9.830637471373528e-05, "loss": 0.0575, "step": 6180 }, { "epoch": 4.534798534798535, "grad_norm": 1.1015625, "learning_rate": 9.829747724188068e-05, "loss": 0.0613, "step": 6190 }, { "epoch": 4.542124542124542, "grad_norm": 0.75, "learning_rate": 9.828855686868657e-05, "loss": 0.0564, "step": 6200 }, { "epoch": 4.549450549450549, "grad_norm": 0.91796875, "learning_rate": 9.827961359842695e-05, "loss": 0.0554, "step": 6210 }, { "epoch": 4.556776556776557, "grad_norm": 0.69921875, "learning_rate": 9.827064743538687e-05, "loss": 0.0539, "step": 6220 }, { "epoch": 4.564102564102564, "grad_norm": 0.921875, "learning_rate": 9.82616583838623e-05, "loss": 0.0548, "step": 6230 }, { "epoch": 4.571428571428571, "grad_norm": 1.3359375, "learning_rate": 9.825264644816017e-05, "loss": 0.057, "step": 6240 }, { "epoch": 4.5787545787545785, "grad_norm": 0.80859375, "learning_rate": 9.824361163259844e-05, "loss": 0.0561, "step": 6250 }, { "epoch": 4.586080586080586, "grad_norm": 0.76953125, "learning_rate": 9.823455394150597e-05, "loss": 0.0574, "step": 6260 }, { "epoch": 4.593406593406593, "grad_norm": 0.66796875, "learning_rate": 9.822547337922257e-05, "loss": 0.0551, "step": 6270 }, { "epoch": 4.6007326007326, "grad_norm": 0.796875, "learning_rate": 9.821636995009903e-05, "loss": 0.0548, "step": 6280 }, { "epoch": 4.608058608058608, "grad_norm": 0.56640625, "learning_rate": 9.820724365849718e-05, "loss": 0.0574, "step": 6290 }, { "epoch": 4.615384615384615, "grad_norm": 0.64453125, "learning_rate": 9.819809450878963e-05, "loss": 0.0538, "step": 6300 }, { "epoch": 4.622710622710622, "grad_norm": 0.8515625, "learning_rate": 9.81889225053601e-05, "loss": 0.0564, "step": 6310 }, { "epoch": 4.63003663003663, "grad_norm": 0.69140625, "learning_rate": 9.817972765260315e-05, "loss": 0.0487, "step": 6320 }, { "epoch": 4.637362637362637, "grad_norm": 0.58203125, "learning_rate": 9.817050995492439e-05, "loss": 0.0482, "step": 6330 }, { "epoch": 4.644688644688645, "grad_norm": 0.69140625, "learning_rate": 9.816126941674027e-05, "loss": 0.0535, "step": 6340 }, { "epoch": 4.652014652014652, "grad_norm": 0.609375, "learning_rate": 9.815200604247826e-05, "loss": 0.0496, "step": 6350 }, { "epoch": 4.65934065934066, "grad_norm": 0.76171875, "learning_rate": 9.814271983657675e-05, "loss": 0.0521, "step": 6360 }, { "epoch": 4.666666666666667, "grad_norm": 0.6171875, "learning_rate": 9.813341080348506e-05, "loss": 0.0558, "step": 6370 }, { "epoch": 4.673992673992674, "grad_norm": 0.76171875, "learning_rate": 9.812407894766344e-05, "loss": 0.0508, "step": 6380 }, { "epoch": 4.681318681318682, "grad_norm": 0.9375, "learning_rate": 9.811472427358314e-05, "loss": 0.0596, "step": 6390 }, { "epoch": 4.688644688644689, "grad_norm": 0.75, "learning_rate": 9.810534678572623e-05, "loss": 0.0537, "step": 6400 }, { "epoch": 4.695970695970696, "grad_norm": 0.65234375, "learning_rate": 9.809594648858579e-05, "loss": 0.056, "step": 6410 }, { "epoch": 4.7032967032967035, "grad_norm": 0.59375, "learning_rate": 9.808652338666582e-05, "loss": 0.0563, "step": 6420 }, { "epoch": 4.710622710622711, "grad_norm": 0.62890625, "learning_rate": 9.807707748448124e-05, "loss": 0.0471, "step": 6430 }, { "epoch": 4.717948717948718, "grad_norm": 0.6796875, "learning_rate": 9.806760878655788e-05, "loss": 0.0525, "step": 6440 }, { "epoch": 4.725274725274725, "grad_norm": 0.75, "learning_rate": 9.805811729743251e-05, "loss": 0.0464, "step": 6450 }, { "epoch": 4.732600732600733, "grad_norm": 0.60546875, "learning_rate": 9.80486030216528e-05, "loss": 0.049, "step": 6460 }, { "epoch": 4.73992673992674, "grad_norm": 0.8984375, "learning_rate": 9.803906596377738e-05, "loss": 0.058, "step": 6470 }, { "epoch": 4.747252747252747, "grad_norm": 0.8125, "learning_rate": 9.802950612837576e-05, "loss": 0.0537, "step": 6480 }, { "epoch": 4.754578754578755, "grad_norm": 0.64453125, "learning_rate": 9.801992352002834e-05, "loss": 0.0507, "step": 6490 }, { "epoch": 4.761904761904762, "grad_norm": 0.8046875, "learning_rate": 9.801031814332651e-05, "loss": 0.0508, "step": 6500 }, { "epoch": 4.769230769230769, "grad_norm": 0.8515625, "learning_rate": 9.800069000287246e-05, "loss": 0.055, "step": 6510 }, { "epoch": 4.7765567765567765, "grad_norm": 0.89453125, "learning_rate": 9.799103910327942e-05, "loss": 0.0591, "step": 6520 }, { "epoch": 4.783882783882784, "grad_norm": 0.79296875, "learning_rate": 9.798136544917141e-05, "loss": 0.056, "step": 6530 }, { "epoch": 4.791208791208791, "grad_norm": 0.62109375, "learning_rate": 9.797166904518338e-05, "loss": 0.0559, "step": 6540 }, { "epoch": 4.798534798534798, "grad_norm": 0.73046875, "learning_rate": 9.796194989596122e-05, "loss": 0.055, "step": 6550 }, { "epoch": 4.805860805860806, "grad_norm": 0.59765625, "learning_rate": 9.79522080061617e-05, "loss": 0.0546, "step": 6560 }, { "epoch": 4.813186813186813, "grad_norm": 0.68359375, "learning_rate": 9.794244338045248e-05, "loss": 0.0583, "step": 6570 }, { "epoch": 4.82051282051282, "grad_norm": 0.7578125, "learning_rate": 9.793265602351208e-05, "loss": 0.0722, "step": 6580 }, { "epoch": 4.827838827838828, "grad_norm": 1.078125, "learning_rate": 9.792284594002997e-05, "loss": 0.0554, "step": 6590 }, { "epoch": 4.835164835164835, "grad_norm": 0.73828125, "learning_rate": 9.79130131347065e-05, "loss": 0.055, "step": 6600 }, { "epoch": 4.842490842490842, "grad_norm": 0.62890625, "learning_rate": 9.790315761225284e-05, "loss": 0.0516, "step": 6610 }, { "epoch": 4.8498168498168495, "grad_norm": 0.71875, "learning_rate": 9.789327937739113e-05, "loss": 0.0543, "step": 6620 }, { "epoch": 4.857142857142857, "grad_norm": 0.96484375, "learning_rate": 9.788337843485435e-05, "loss": 0.0576, "step": 6630 }, { "epoch": 4.864468864468865, "grad_norm": 0.75, "learning_rate": 9.787345478938637e-05, "loss": 0.0556, "step": 6640 }, { "epoch": 4.871794871794872, "grad_norm": 0.9765625, "learning_rate": 9.786350844574191e-05, "loss": 0.0499, "step": 6650 }, { "epoch": 4.8791208791208796, "grad_norm": 0.62109375, "learning_rate": 9.785353940868662e-05, "loss": 0.0505, "step": 6660 }, { "epoch": 4.886446886446887, "grad_norm": 0.83203125, "learning_rate": 9.7843547682997e-05, "loss": 0.053, "step": 6670 }, { "epoch": 4.893772893772894, "grad_norm": 0.6953125, "learning_rate": 9.783353327346038e-05, "loss": 0.057, "step": 6680 }, { "epoch": 4.9010989010989015, "grad_norm": 0.9296875, "learning_rate": 9.7823496184875e-05, "loss": 0.0562, "step": 6690 }, { "epoch": 4.908424908424909, "grad_norm": 0.80078125, "learning_rate": 9.781343642204998e-05, "loss": 0.0508, "step": 6700 }, { "epoch": 4.915750915750916, "grad_norm": 0.86328125, "learning_rate": 9.780335398980524e-05, "loss": 0.0499, "step": 6710 }, { "epoch": 4.923076923076923, "grad_norm": 0.7421875, "learning_rate": 9.779324889297165e-05, "loss": 0.0494, "step": 6720 }, { "epoch": 4.930402930402931, "grad_norm": 1.1640625, "learning_rate": 9.778312113639087e-05, "loss": 0.0567, "step": 6730 }, { "epoch": 4.937728937728938, "grad_norm": 0.6953125, "learning_rate": 9.777297072491546e-05, "loss": 0.0619, "step": 6740 }, { "epoch": 4.945054945054945, "grad_norm": 0.7734375, "learning_rate": 9.776279766340878e-05, "loss": 0.0534, "step": 6750 }, { "epoch": 4.9523809523809526, "grad_norm": 0.75390625, "learning_rate": 9.775260195674513e-05, "loss": 0.0528, "step": 6760 }, { "epoch": 4.95970695970696, "grad_norm": 0.84765625, "learning_rate": 9.774238360980957e-05, "loss": 0.0495, "step": 6770 }, { "epoch": 4.967032967032967, "grad_norm": 0.66015625, "learning_rate": 9.773214262749802e-05, "loss": 0.0511, "step": 6780 }, { "epoch": 4.9743589743589745, "grad_norm": 0.84765625, "learning_rate": 9.772187901471732e-05, "loss": 0.054, "step": 6790 }, { "epoch": 4.981684981684982, "grad_norm": 1.265625, "learning_rate": 9.771159277638508e-05, "loss": 0.0648, "step": 6800 }, { "epoch": 4.989010989010989, "grad_norm": 0.8046875, "learning_rate": 9.770128391742979e-05, "loss": 0.0463, "step": 6810 }, { "epoch": 4.996336996336996, "grad_norm": 0.91015625, "learning_rate": 9.769095244279074e-05, "loss": 0.0497, "step": 6820 }, { "epoch": 5.003663003663004, "grad_norm": 0.64453125, "learning_rate": 9.76805983574181e-05, "loss": 0.045, "step": 6830 }, { "epoch": 5.010989010989011, "grad_norm": 1.1328125, "learning_rate": 9.767022166627284e-05, "loss": 0.0516, "step": 6840 }, { "epoch": 5.018315018315018, "grad_norm": 0.6171875, "learning_rate": 9.765982237432678e-05, "loss": 0.0454, "step": 6850 }, { "epoch": 5.0256410256410255, "grad_norm": 0.83203125, "learning_rate": 9.764940048656255e-05, "loss": 0.0552, "step": 6860 }, { "epoch": 5.032967032967033, "grad_norm": 1.1796875, "learning_rate": 9.763895600797363e-05, "loss": 0.0603, "step": 6870 }, { "epoch": 5.04029304029304, "grad_norm": 0.73828125, "learning_rate": 9.762848894356433e-05, "loss": 0.0611, "step": 6880 }, { "epoch": 5.0476190476190474, "grad_norm": 0.52734375, "learning_rate": 9.76179992983497e-05, "loss": 0.054, "step": 6890 }, { "epoch": 5.054945054945055, "grad_norm": 0.93359375, "learning_rate": 9.760748707735575e-05, "loss": 0.0507, "step": 6900 }, { "epoch": 5.062271062271062, "grad_norm": 0.94921875, "learning_rate": 9.759695228561918e-05, "loss": 0.0513, "step": 6910 }, { "epoch": 5.069597069597069, "grad_norm": 1.0234375, "learning_rate": 9.758639492818759e-05, "loss": 0.0598, "step": 6920 }, { "epoch": 5.076923076923077, "grad_norm": 0.78515625, "learning_rate": 9.757581501011932e-05, "loss": 0.0467, "step": 6930 }, { "epoch": 5.084249084249084, "grad_norm": 0.66796875, "learning_rate": 9.756521253648358e-05, "loss": 0.0488, "step": 6940 }, { "epoch": 5.091575091575091, "grad_norm": 0.94921875, "learning_rate": 9.755458751236033e-05, "loss": 0.0559, "step": 6950 }, { "epoch": 5.0989010989010985, "grad_norm": 0.9140625, "learning_rate": 9.754393994284041e-05, "loss": 0.0501, "step": 6960 }, { "epoch": 5.106227106227106, "grad_norm": 0.98046875, "learning_rate": 9.75332698330254e-05, "loss": 0.0499, "step": 6970 }, { "epoch": 5.113553113553113, "grad_norm": 0.7890625, "learning_rate": 9.752257718802771e-05, "loss": 0.0478, "step": 6980 }, { "epoch": 5.1208791208791204, "grad_norm": 0.703125, "learning_rate": 9.751186201297053e-05, "loss": 0.0554, "step": 6990 }, { "epoch": 5.128205128205128, "grad_norm": 0.7890625, "learning_rate": 9.750112431298784e-05, "loss": 0.0498, "step": 7000 }, { "epoch": 5.135531135531136, "grad_norm": 0.59375, "learning_rate": 9.749036409322445e-05, "loss": 0.0494, "step": 7010 }, { "epoch": 5.142857142857143, "grad_norm": 0.63671875, "learning_rate": 9.747958135883592e-05, "loss": 0.0439, "step": 7020 }, { "epoch": 5.1501831501831505, "grad_norm": 0.5625, "learning_rate": 9.74687761149886e-05, "loss": 0.0491, "step": 7030 }, { "epoch": 5.157509157509158, "grad_norm": 0.498046875, "learning_rate": 9.745794836685965e-05, "loss": 0.0533, "step": 7040 }, { "epoch": 5.164835164835165, "grad_norm": 0.88671875, "learning_rate": 9.744709811963701e-05, "loss": 0.054, "step": 7050 }, { "epoch": 5.172161172161172, "grad_norm": 0.671875, "learning_rate": 9.743622537851938e-05, "loss": 0.0499, "step": 7060 }, { "epoch": 5.17948717948718, "grad_norm": 0.75390625, "learning_rate": 9.742533014871624e-05, "loss": 0.0547, "step": 7070 }, { "epoch": 5.186813186813187, "grad_norm": 0.59375, "learning_rate": 9.741441243544783e-05, "loss": 0.0455, "step": 7080 }, { "epoch": 5.194139194139194, "grad_norm": 0.5078125, "learning_rate": 9.740347224394525e-05, "loss": 0.0463, "step": 7090 }, { "epoch": 5.201465201465202, "grad_norm": 0.6171875, "learning_rate": 9.739250957945023e-05, "loss": 0.048, "step": 7100 }, { "epoch": 5.208791208791209, "grad_norm": 0.69921875, "learning_rate": 9.73815244472154e-05, "loss": 0.0484, "step": 7110 }, { "epoch": 5.216117216117216, "grad_norm": 0.92578125, "learning_rate": 9.737051685250408e-05, "loss": 0.0518, "step": 7120 }, { "epoch": 5.2234432234432235, "grad_norm": 0.6796875, "learning_rate": 9.735948680059036e-05, "loss": 0.0456, "step": 7130 }, { "epoch": 5.230769230769231, "grad_norm": 0.73046875, "learning_rate": 9.734843429675911e-05, "loss": 0.0556, "step": 7140 }, { "epoch": 5.238095238095238, "grad_norm": 0.70703125, "learning_rate": 9.733735934630593e-05, "loss": 0.0529, "step": 7150 }, { "epoch": 5.245421245421245, "grad_norm": 0.5703125, "learning_rate": 9.732626195453723e-05, "loss": 0.052, "step": 7160 }, { "epoch": 5.252747252747253, "grad_norm": 0.4375, "learning_rate": 9.731514212677008e-05, "loss": 0.0508, "step": 7170 }, { "epoch": 5.26007326007326, "grad_norm": 0.63671875, "learning_rate": 9.730399986833239e-05, "loss": 0.0506, "step": 7180 }, { "epoch": 5.267399267399267, "grad_norm": 0.85546875, "learning_rate": 9.729283518456277e-05, "loss": 0.0604, "step": 7190 }, { "epoch": 5.274725274725275, "grad_norm": 0.6171875, "learning_rate": 9.728164808081061e-05, "loss": 0.0486, "step": 7200 }, { "epoch": 5.282051282051282, "grad_norm": 0.640625, "learning_rate": 9.727043856243598e-05, "loss": 0.0416, "step": 7210 }, { "epoch": 5.289377289377289, "grad_norm": 0.87109375, "learning_rate": 9.725920663480975e-05, "loss": 0.0547, "step": 7220 }, { "epoch": 5.2967032967032965, "grad_norm": 0.65234375, "learning_rate": 9.724795230331352e-05, "loss": 0.0463, "step": 7230 }, { "epoch": 5.304029304029304, "grad_norm": 0.64453125, "learning_rate": 9.723667557333957e-05, "loss": 0.0496, "step": 7240 }, { "epoch": 5.311355311355311, "grad_norm": 0.6171875, "learning_rate": 9.722537645029098e-05, "loss": 0.0456, "step": 7250 }, { "epoch": 5.318681318681318, "grad_norm": 0.84765625, "learning_rate": 9.721405493958152e-05, "loss": 0.0499, "step": 7260 }, { "epoch": 5.326007326007326, "grad_norm": 0.671875, "learning_rate": 9.720271104663572e-05, "loss": 0.0485, "step": 7270 }, { "epoch": 5.333333333333333, "grad_norm": 0.91796875, "learning_rate": 9.719134477688877e-05, "loss": 0.0489, "step": 7280 }, { "epoch": 5.34065934065934, "grad_norm": 0.75390625, "learning_rate": 9.717995613578666e-05, "loss": 0.0469, "step": 7290 }, { "epoch": 5.347985347985348, "grad_norm": 0.48046875, "learning_rate": 9.716854512878604e-05, "loss": 0.053, "step": 7300 }, { "epoch": 5.355311355311355, "grad_norm": 0.61328125, "learning_rate": 9.715711176135431e-05, "loss": 0.0547, "step": 7310 }, { "epoch": 5.362637362637362, "grad_norm": 0.63671875, "learning_rate": 9.714565603896956e-05, "loss": 0.0495, "step": 7320 }, { "epoch": 5.36996336996337, "grad_norm": 0.703125, "learning_rate": 9.713417796712061e-05, "loss": 0.0547, "step": 7330 }, { "epoch": 5.377289377289378, "grad_norm": 0.6953125, "learning_rate": 9.712267755130698e-05, "loss": 0.0495, "step": 7340 }, { "epoch": 5.384615384615385, "grad_norm": 0.61328125, "learning_rate": 9.711115479703889e-05, "loss": 0.047, "step": 7350 }, { "epoch": 5.391941391941392, "grad_norm": 0.76953125, "learning_rate": 9.709960970983728e-05, "loss": 0.0467, "step": 7360 }, { "epoch": 5.3992673992674, "grad_norm": 0.765625, "learning_rate": 9.708804229523378e-05, "loss": 0.05, "step": 7370 }, { "epoch": 5.406593406593407, "grad_norm": 0.77734375, "learning_rate": 9.707645255877071e-05, "loss": 0.0496, "step": 7380 }, { "epoch": 5.413919413919414, "grad_norm": 0.71875, "learning_rate": 9.70648405060011e-05, "loss": 0.0531, "step": 7390 }, { "epoch": 5.4212454212454215, "grad_norm": 0.63671875, "learning_rate": 9.705320614248867e-05, "loss": 0.051, "step": 7400 }, { "epoch": 5.428571428571429, "grad_norm": 0.6171875, "learning_rate": 9.70415494738078e-05, "loss": 0.0487, "step": 7410 }, { "epoch": 5.435897435897436, "grad_norm": 0.67578125, "learning_rate": 9.70298705055436e-05, "loss": 0.0512, "step": 7420 }, { "epoch": 5.443223443223443, "grad_norm": 1.0390625, "learning_rate": 9.701816924329185e-05, "loss": 0.0501, "step": 7430 }, { "epoch": 5.450549450549451, "grad_norm": 0.71875, "learning_rate": 9.700644569265901e-05, "loss": 0.0488, "step": 7440 }, { "epoch": 5.457875457875458, "grad_norm": 0.61328125, "learning_rate": 9.69946998592622e-05, "loss": 0.0477, "step": 7450 }, { "epoch": 5.465201465201465, "grad_norm": 0.7421875, "learning_rate": 9.698293174872928e-05, "loss": 0.0541, "step": 7460 }, { "epoch": 5.472527472527473, "grad_norm": 0.74609375, "learning_rate": 9.697114136669867e-05, "loss": 0.0494, "step": 7470 }, { "epoch": 5.47985347985348, "grad_norm": 0.51953125, "learning_rate": 9.695932871881957e-05, "loss": 0.0485, "step": 7480 }, { "epoch": 5.487179487179487, "grad_norm": 0.7578125, "learning_rate": 9.694749381075182e-05, "loss": 0.0529, "step": 7490 }, { "epoch": 5.4945054945054945, "grad_norm": 0.63671875, "learning_rate": 9.693563664816586e-05, "loss": 0.0474, "step": 7500 }, { "epoch": 5.501831501831502, "grad_norm": 0.75390625, "learning_rate": 9.69237572367429e-05, "loss": 0.0451, "step": 7510 }, { "epoch": 5.509157509157509, "grad_norm": 0.87890625, "learning_rate": 9.691185558217472e-05, "loss": 0.0534, "step": 7520 }, { "epoch": 5.516483516483516, "grad_norm": 0.66015625, "learning_rate": 9.689993169016383e-05, "loss": 0.0526, "step": 7530 }, { "epoch": 5.523809523809524, "grad_norm": 0.5625, "learning_rate": 9.688798556642334e-05, "loss": 0.0471, "step": 7540 }, { "epoch": 5.531135531135531, "grad_norm": 0.9921875, "learning_rate": 9.6876017216677e-05, "loss": 0.0477, "step": 7550 }, { "epoch": 5.538461538461538, "grad_norm": 0.828125, "learning_rate": 9.686402664665928e-05, "loss": 0.0464, "step": 7560 }, { "epoch": 5.545787545787546, "grad_norm": 0.98046875, "learning_rate": 9.685201386211523e-05, "loss": 0.05, "step": 7570 }, { "epoch": 5.553113553113553, "grad_norm": 0.66015625, "learning_rate": 9.683997886880059e-05, "loss": 0.0575, "step": 7580 }, { "epoch": 5.56043956043956, "grad_norm": 0.98046875, "learning_rate": 9.682792167248172e-05, "loss": 0.0525, "step": 7590 }, { "epoch": 5.5677655677655675, "grad_norm": 0.7890625, "learning_rate": 9.681584227893559e-05, "loss": 0.0539, "step": 7600 }, { "epoch": 5.575091575091575, "grad_norm": 0.96875, "learning_rate": 9.680374069394985e-05, "loss": 0.0474, "step": 7610 }, { "epoch": 5.582417582417582, "grad_norm": 0.7734375, "learning_rate": 9.679161692332277e-05, "loss": 0.0497, "step": 7620 }, { "epoch": 5.589743589743589, "grad_norm": 0.8203125, "learning_rate": 9.677947097286325e-05, "loss": 0.0503, "step": 7630 }, { "epoch": 5.597069597069597, "grad_norm": 0.6640625, "learning_rate": 9.67673028483908e-05, "loss": 0.0509, "step": 7640 }, { "epoch": 5.604395604395604, "grad_norm": 0.57421875, "learning_rate": 9.67551125557356e-05, "loss": 0.0538, "step": 7650 }, { "epoch": 5.611721611721611, "grad_norm": 0.7109375, "learning_rate": 9.674290010073836e-05, "loss": 0.0496, "step": 7660 }, { "epoch": 5.619047619047619, "grad_norm": 0.6875, "learning_rate": 9.673066548925052e-05, "loss": 0.0522, "step": 7670 }, { "epoch": 5.626373626373626, "grad_norm": 0.52734375, "learning_rate": 9.671840872713407e-05, "loss": 0.0492, "step": 7680 }, { "epoch": 5.633699633699633, "grad_norm": 0.6875, "learning_rate": 9.670612982026162e-05, "loss": 0.0476, "step": 7690 }, { "epoch": 5.641025641025641, "grad_norm": 0.51953125, "learning_rate": 9.66938287745164e-05, "loss": 0.0464, "step": 7700 }, { "epoch": 5.648351648351649, "grad_norm": 0.8828125, "learning_rate": 9.668150559579225e-05, "loss": 0.0494, "step": 7710 }, { "epoch": 5.655677655677656, "grad_norm": 0.59375, "learning_rate": 9.666916028999359e-05, "loss": 0.0448, "step": 7720 }, { "epoch": 5.663003663003663, "grad_norm": 0.64453125, "learning_rate": 9.665679286303548e-05, "loss": 0.054, "step": 7730 }, { "epoch": 5.670329670329671, "grad_norm": 0.70703125, "learning_rate": 9.664440332084355e-05, "loss": 0.0469, "step": 7740 }, { "epoch": 5.677655677655678, "grad_norm": 0.68359375, "learning_rate": 9.663199166935406e-05, "loss": 0.0469, "step": 7750 }, { "epoch": 5.684981684981685, "grad_norm": 0.765625, "learning_rate": 9.66195579145138e-05, "loss": 0.0506, "step": 7760 }, { "epoch": 5.6923076923076925, "grad_norm": 0.68359375, "learning_rate": 9.660710206228024e-05, "loss": 0.0409, "step": 7770 }, { "epoch": 5.6996336996337, "grad_norm": 0.6875, "learning_rate": 9.659462411862133e-05, "loss": 0.0522, "step": 7780 }, { "epoch": 5.706959706959707, "grad_norm": 0.66015625, "learning_rate": 9.65821240895157e-05, "loss": 0.0485, "step": 7790 }, { "epoch": 5.714285714285714, "grad_norm": 0.64453125, "learning_rate": 9.656960198095252e-05, "loss": 0.052, "step": 7800 }, { "epoch": 5.721611721611722, "grad_norm": 0.6953125, "learning_rate": 9.655705779893152e-05, "loss": 0.0515, "step": 7810 }, { "epoch": 5.728937728937729, "grad_norm": 0.79296875, "learning_rate": 9.654449154946307e-05, "loss": 0.0554, "step": 7820 }, { "epoch": 5.736263736263736, "grad_norm": 0.58984375, "learning_rate": 9.653190323856802e-05, "loss": 0.0604, "step": 7830 }, { "epoch": 5.743589743589744, "grad_norm": 1.765625, "learning_rate": 9.651929287227789e-05, "loss": 0.0554, "step": 7840 }, { "epoch": 5.750915750915751, "grad_norm": 0.671875, "learning_rate": 9.65066604566347e-05, "loss": 0.0467, "step": 7850 }, { "epoch": 5.758241758241758, "grad_norm": 0.609375, "learning_rate": 9.649400599769106e-05, "loss": 0.0426, "step": 7860 }, { "epoch": 5.7655677655677655, "grad_norm": 0.6640625, "learning_rate": 9.648132950151012e-05, "loss": 0.0504, "step": 7870 }, { "epoch": 5.772893772893773, "grad_norm": 0.68359375, "learning_rate": 9.646863097416564e-05, "loss": 0.0473, "step": 7880 }, { "epoch": 5.78021978021978, "grad_norm": 0.80078125, "learning_rate": 9.645591042174186e-05, "loss": 0.046, "step": 7890 }, { "epoch": 5.787545787545787, "grad_norm": 0.7265625, "learning_rate": 9.644316785033363e-05, "loss": 0.0476, "step": 7900 }, { "epoch": 5.794871794871795, "grad_norm": 0.9140625, "learning_rate": 9.643040326604637e-05, "loss": 0.0563, "step": 7910 }, { "epoch": 5.802197802197802, "grad_norm": 0.57421875, "learning_rate": 9.641761667499595e-05, "loss": 0.0476, "step": 7920 }, { "epoch": 5.809523809523809, "grad_norm": 1.3203125, "learning_rate": 9.640480808330888e-05, "loss": 0.0505, "step": 7930 }, { "epoch": 5.816849816849817, "grad_norm": 0.91796875, "learning_rate": 9.639197749712216e-05, "loss": 0.0509, "step": 7940 }, { "epoch": 5.824175824175824, "grad_norm": 0.71875, "learning_rate": 9.637912492258338e-05, "loss": 0.0582, "step": 7950 }, { "epoch": 5.831501831501831, "grad_norm": 0.8125, "learning_rate": 9.636625036585059e-05, "loss": 0.0559, "step": 7960 }, { "epoch": 5.8388278388278385, "grad_norm": 0.80859375, "learning_rate": 9.635335383309241e-05, "loss": 0.0513, "step": 7970 }, { "epoch": 5.846153846153846, "grad_norm": 0.80859375, "learning_rate": 9.634043533048805e-05, "loss": 0.0549, "step": 7980 }, { "epoch": 5.853479853479853, "grad_norm": 0.7421875, "learning_rate": 9.632749486422713e-05, "loss": 0.0498, "step": 7990 }, { "epoch": 5.860805860805861, "grad_norm": 0.53125, "learning_rate": 9.631453244050987e-05, "loss": 0.0519, "step": 8000 }, { "epoch": 5.868131868131869, "grad_norm": 0.62890625, "learning_rate": 9.630154806554698e-05, "loss": 0.0517, "step": 8010 }, { "epoch": 5.875457875457876, "grad_norm": 0.65234375, "learning_rate": 9.628854174555972e-05, "loss": 0.0493, "step": 8020 }, { "epoch": 5.882783882783883, "grad_norm": 0.546875, "learning_rate": 9.627551348677985e-05, "loss": 0.0514, "step": 8030 }, { "epoch": 5.8901098901098905, "grad_norm": 0.7578125, "learning_rate": 9.62624632954496e-05, "loss": 0.0497, "step": 8040 }, { "epoch": 5.897435897435898, "grad_norm": 0.7421875, "learning_rate": 9.62493911778218e-05, "loss": 0.0442, "step": 8050 }, { "epoch": 5.904761904761905, "grad_norm": 0.62890625, "learning_rate": 9.623629714015968e-05, "loss": 0.051, "step": 8060 }, { "epoch": 5.912087912087912, "grad_norm": 0.6328125, "learning_rate": 9.622318118873705e-05, "loss": 0.0462, "step": 8070 }, { "epoch": 5.91941391941392, "grad_norm": 0.58984375, "learning_rate": 9.621004332983817e-05, "loss": 0.0476, "step": 8080 }, { "epoch": 5.926739926739927, "grad_norm": 0.640625, "learning_rate": 9.619688356975784e-05, "loss": 0.044, "step": 8090 }, { "epoch": 5.934065934065934, "grad_norm": 0.7734375, "learning_rate": 9.618370191480134e-05, "loss": 0.0456, "step": 8100 }, { "epoch": 5.941391941391942, "grad_norm": 0.625, "learning_rate": 9.61704983712844e-05, "loss": 0.0486, "step": 8110 }, { "epoch": 5.948717948717949, "grad_norm": 0.6015625, "learning_rate": 9.61572729455333e-05, "loss": 0.0477, "step": 8120 }, { "epoch": 5.956043956043956, "grad_norm": 0.55859375, "learning_rate": 9.614402564388478e-05, "loss": 0.0472, "step": 8130 }, { "epoch": 5.9633699633699635, "grad_norm": 0.71484375, "learning_rate": 9.613075647268602e-05, "loss": 0.0495, "step": 8140 }, { "epoch": 5.970695970695971, "grad_norm": 0.64453125, "learning_rate": 9.611746543829475e-05, "loss": 0.0485, "step": 8150 }, { "epoch": 5.978021978021978, "grad_norm": 0.60546875, "learning_rate": 9.610415254707914e-05, "loss": 0.047, "step": 8160 }, { "epoch": 5.985347985347985, "grad_norm": 0.6171875, "learning_rate": 9.60908178054178e-05, "loss": 0.047, "step": 8170 }, { "epoch": 5.992673992673993, "grad_norm": 0.5, "learning_rate": 9.607746121969986e-05, "loss": 0.0494, "step": 8180 }, { "epoch": 6.0, "grad_norm": 0.482421875, "learning_rate": 9.606408279632492e-05, "loss": 0.047, "step": 8190 }, { "epoch": 6.007326007326007, "grad_norm": 0.60546875, "learning_rate": 9.605068254170301e-05, "loss": 0.0459, "step": 8200 }, { "epoch": 6.014652014652015, "grad_norm": 0.6484375, "learning_rate": 9.603726046225462e-05, "loss": 0.0468, "step": 8210 }, { "epoch": 6.021978021978022, "grad_norm": 0.51171875, "learning_rate": 9.60238165644107e-05, "loss": 0.0515, "step": 8220 }, { "epoch": 6.029304029304029, "grad_norm": 1.125, "learning_rate": 9.601035085461268e-05, "loss": 0.0509, "step": 8230 }, { "epoch": 6.0366300366300365, "grad_norm": 0.466796875, "learning_rate": 9.599686333931244e-05, "loss": 0.0405, "step": 8240 }, { "epoch": 6.043956043956044, "grad_norm": 0.5078125, "learning_rate": 9.598335402497227e-05, "loss": 0.0428, "step": 8250 }, { "epoch": 6.051282051282051, "grad_norm": 0.57421875, "learning_rate": 9.596982291806491e-05, "loss": 0.0429, "step": 8260 }, { "epoch": 6.058608058608058, "grad_norm": 0.765625, "learning_rate": 9.595627002507358e-05, "loss": 0.0584, "step": 8270 }, { "epoch": 6.065934065934066, "grad_norm": 0.55078125, "learning_rate": 9.594269535249192e-05, "loss": 0.0469, "step": 8280 }, { "epoch": 6.073260073260073, "grad_norm": 0.71875, "learning_rate": 9.592909890682398e-05, "loss": 0.0488, "step": 8290 }, { "epoch": 6.08058608058608, "grad_norm": 0.53515625, "learning_rate": 9.591548069458427e-05, "loss": 0.0478, "step": 8300 }, { "epoch": 6.087912087912088, "grad_norm": 0.8671875, "learning_rate": 9.590184072229773e-05, "loss": 0.05, "step": 8310 }, { "epoch": 6.095238095238095, "grad_norm": 0.73828125, "learning_rate": 9.588817899649974e-05, "loss": 0.0495, "step": 8320 }, { "epoch": 6.102564102564102, "grad_norm": 0.6796875, "learning_rate": 9.587449552373603e-05, "loss": 0.0453, "step": 8330 }, { "epoch": 6.1098901098901095, "grad_norm": 0.66796875, "learning_rate": 9.586079031056284e-05, "loss": 0.0451, "step": 8340 }, { "epoch": 6.117216117216117, "grad_norm": 0.494140625, "learning_rate": 9.584706336354674e-05, "loss": 0.0511, "step": 8350 }, { "epoch": 6.124542124542124, "grad_norm": 0.59765625, "learning_rate": 9.583331468926483e-05, "loss": 0.0445, "step": 8360 }, { "epoch": 6.131868131868132, "grad_norm": 0.640625, "learning_rate": 9.58195442943045e-05, "loss": 0.0387, "step": 8370 }, { "epoch": 6.13919413919414, "grad_norm": 0.60546875, "learning_rate": 9.580575218526363e-05, "loss": 0.0509, "step": 8380 }, { "epoch": 6.146520146520147, "grad_norm": 0.6796875, "learning_rate": 9.579193836875045e-05, "loss": 0.0511, "step": 8390 }, { "epoch": 6.153846153846154, "grad_norm": 0.6640625, "learning_rate": 9.577810285138361e-05, "loss": 0.0457, "step": 8400 }, { "epoch": 6.1611721611721615, "grad_norm": 0.6484375, "learning_rate": 9.576424563979217e-05, "loss": 0.0447, "step": 8410 }, { "epoch": 6.168498168498169, "grad_norm": 0.5390625, "learning_rate": 9.575036674061557e-05, "loss": 0.045, "step": 8420 }, { "epoch": 6.175824175824176, "grad_norm": 0.6328125, "learning_rate": 9.573646616050366e-05, "loss": 0.0445, "step": 8430 }, { "epoch": 6.183150183150183, "grad_norm": 1.1328125, "learning_rate": 9.572254390611664e-05, "loss": 0.0436, "step": 8440 }, { "epoch": 6.190476190476191, "grad_norm": 0.6875, "learning_rate": 9.570859998412515e-05, "loss": 0.0433, "step": 8450 }, { "epoch": 6.197802197802198, "grad_norm": 0.486328125, "learning_rate": 9.569463440121015e-05, "loss": 0.0432, "step": 8460 }, { "epoch": 6.205128205128205, "grad_norm": 0.890625, "learning_rate": 9.568064716406303e-05, "loss": 0.0442, "step": 8470 }, { "epoch": 6.212454212454213, "grad_norm": 0.921875, "learning_rate": 9.566663827938551e-05, "loss": 0.0536, "step": 8480 }, { "epoch": 6.21978021978022, "grad_norm": 0.56640625, "learning_rate": 9.565260775388975e-05, "loss": 0.0485, "step": 8490 }, { "epoch": 6.227106227106227, "grad_norm": 0.6953125, "learning_rate": 9.563855559429818e-05, "loss": 0.0441, "step": 8500 }, { "epoch": 6.2344322344322345, "grad_norm": 0.9453125, "learning_rate": 9.562448180734369e-05, "loss": 0.0481, "step": 8510 }, { "epoch": 6.241758241758242, "grad_norm": 0.5546875, "learning_rate": 9.561038639976947e-05, "loss": 0.0466, "step": 8520 }, { "epoch": 6.249084249084249, "grad_norm": 0.53125, "learning_rate": 9.559626937832912e-05, "loss": 0.0413, "step": 8530 }, { "epoch": 6.256410256410256, "grad_norm": 0.60546875, "learning_rate": 9.558213074978653e-05, "loss": 0.0398, "step": 8540 }, { "epoch": 6.263736263736264, "grad_norm": 0.6796875, "learning_rate": 9.556797052091602e-05, "loss": 0.0472, "step": 8550 }, { "epoch": 6.271062271062271, "grad_norm": 0.578125, "learning_rate": 9.555378869850219e-05, "loss": 0.0441, "step": 8560 }, { "epoch": 6.278388278388278, "grad_norm": 0.7421875, "learning_rate": 9.553958528934005e-05, "loss": 0.0505, "step": 8570 }, { "epoch": 6.285714285714286, "grad_norm": 0.60546875, "learning_rate": 9.552536030023488e-05, "loss": 0.0458, "step": 8580 }, { "epoch": 6.293040293040293, "grad_norm": 0.84765625, "learning_rate": 9.551111373800236e-05, "loss": 0.0455, "step": 8590 }, { "epoch": 6.3003663003663, "grad_norm": 1.0, "learning_rate": 9.549684560946848e-05, "loss": 0.0518, "step": 8600 }, { "epoch": 6.3076923076923075, "grad_norm": 1.1640625, "learning_rate": 9.548255592146959e-05, "loss": 0.0495, "step": 8610 }, { "epoch": 6.315018315018315, "grad_norm": 0.7734375, "learning_rate": 9.546824468085231e-05, "loss": 0.0503, "step": 8620 }, { "epoch": 6.322344322344322, "grad_norm": 0.80859375, "learning_rate": 9.545391189447364e-05, "loss": 0.0462, "step": 8630 }, { "epoch": 6.329670329670329, "grad_norm": 0.703125, "learning_rate": 9.543955756920088e-05, "loss": 0.0426, "step": 8640 }, { "epoch": 6.336996336996337, "grad_norm": 0.875, "learning_rate": 9.542518171191171e-05, "loss": 0.0525, "step": 8650 }, { "epoch": 6.344322344322344, "grad_norm": 1.015625, "learning_rate": 9.541078432949401e-05, "loss": 0.0467, "step": 8660 }, { "epoch": 6.351648351648351, "grad_norm": 0.6953125, "learning_rate": 9.539636542884608e-05, "loss": 0.0432, "step": 8670 }, { "epoch": 6.358974358974359, "grad_norm": 0.8046875, "learning_rate": 9.538192501687645e-05, "loss": 0.0478, "step": 8680 }, { "epoch": 6.366300366300366, "grad_norm": 0.62109375, "learning_rate": 9.536746310050404e-05, "loss": 0.0458, "step": 8690 }, { "epoch": 6.373626373626374, "grad_norm": 0.5859375, "learning_rate": 9.535297968665799e-05, "loss": 0.0452, "step": 8700 }, { "epoch": 6.380952380952381, "grad_norm": 0.58203125, "learning_rate": 9.53384747822778e-05, "loss": 0.0389, "step": 8710 }, { "epoch": 6.388278388278389, "grad_norm": 0.6796875, "learning_rate": 9.532394839431323e-05, "loss": 0.0457, "step": 8720 }, { "epoch": 6.395604395604396, "grad_norm": 0.59765625, "learning_rate": 9.530940052972438e-05, "loss": 0.0472, "step": 8730 }, { "epoch": 6.402930402930403, "grad_norm": 0.97265625, "learning_rate": 9.529483119548157e-05, "loss": 0.0448, "step": 8740 }, { "epoch": 6.410256410256411, "grad_norm": 0.625, "learning_rate": 9.528024039856548e-05, "loss": 0.0479, "step": 8750 }, { "epoch": 6.417582417582418, "grad_norm": 0.84765625, "learning_rate": 9.526562814596701e-05, "loss": 0.0476, "step": 8760 }, { "epoch": 6.424908424908425, "grad_norm": 0.6328125, "learning_rate": 9.525099444468739e-05, "loss": 0.0432, "step": 8770 }, { "epoch": 6.4322344322344325, "grad_norm": 0.69921875, "learning_rate": 9.52363393017381e-05, "loss": 0.0454, "step": 8780 }, { "epoch": 6.43956043956044, "grad_norm": 0.70703125, "learning_rate": 9.52216627241409e-05, "loss": 0.052, "step": 8790 }, { "epoch": 6.446886446886447, "grad_norm": 0.703125, "learning_rate": 9.520696471892783e-05, "loss": 0.0449, "step": 8800 }, { "epoch": 6.454212454212454, "grad_norm": 0.71875, "learning_rate": 9.519224529314115e-05, "loss": 0.0496, "step": 8810 }, { "epoch": 6.461538461538462, "grad_norm": 0.73828125, "learning_rate": 9.517750445383347e-05, "loss": 0.0483, "step": 8820 }, { "epoch": 6.468864468864469, "grad_norm": 0.8828125, "learning_rate": 9.516274220806757e-05, "loss": 0.046, "step": 8830 }, { "epoch": 6.476190476190476, "grad_norm": 0.55078125, "learning_rate": 9.514795856291654e-05, "loss": 0.0401, "step": 8840 }, { "epoch": 6.483516483516484, "grad_norm": 0.8046875, "learning_rate": 9.513315352546373e-05, "loss": 0.0512, "step": 8850 }, { "epoch": 6.490842490842491, "grad_norm": 0.828125, "learning_rate": 9.51183271028027e-05, "loss": 0.0484, "step": 8860 }, { "epoch": 6.498168498168498, "grad_norm": 0.546875, "learning_rate": 9.510347930203727e-05, "loss": 0.0415, "step": 8870 }, { "epoch": 6.5054945054945055, "grad_norm": 0.5390625, "learning_rate": 9.508861013028151e-05, "loss": 0.0486, "step": 8880 }, { "epoch": 6.512820512820513, "grad_norm": 0.875, "learning_rate": 9.507371959465973e-05, "loss": 0.0452, "step": 8890 }, { "epoch": 6.52014652014652, "grad_norm": 0.73828125, "learning_rate": 9.50588077023065e-05, "loss": 0.0521, "step": 8900 }, { "epoch": 6.527472527472527, "grad_norm": 0.74609375, "learning_rate": 9.504387446036655e-05, "loss": 0.0406, "step": 8910 }, { "epoch": 6.534798534798535, "grad_norm": 0.6015625, "learning_rate": 9.502891987599491e-05, "loss": 0.0387, "step": 8920 }, { "epoch": 6.542124542124542, "grad_norm": 0.4921875, "learning_rate": 9.501394395635683e-05, "loss": 0.0399, "step": 8930 }, { "epoch": 6.549450549450549, "grad_norm": 0.55078125, "learning_rate": 9.499894670862771e-05, "loss": 0.0447, "step": 8940 }, { "epoch": 6.556776556776557, "grad_norm": 0.56640625, "learning_rate": 9.49839281399933e-05, "loss": 0.0491, "step": 8950 }, { "epoch": 6.564102564102564, "grad_norm": 0.8046875, "learning_rate": 9.496888825764943e-05, "loss": 0.0479, "step": 8960 }, { "epoch": 6.571428571428571, "grad_norm": 0.59765625, "learning_rate": 9.495382706880222e-05, "loss": 0.0419, "step": 8970 }, { "epoch": 6.5787545787545785, "grad_norm": 0.52734375, "learning_rate": 9.493874458066799e-05, "loss": 0.0475, "step": 8980 }, { "epoch": 6.586080586080586, "grad_norm": 0.87109375, "learning_rate": 9.492364080047324e-05, "loss": 0.0458, "step": 8990 }, { "epoch": 6.593406593406593, "grad_norm": 0.5859375, "learning_rate": 9.490851573545469e-05, "loss": 0.0497, "step": 9000 }, { "epoch": 6.6007326007326, "grad_norm": 0.5, "learning_rate": 9.489336939285924e-05, "loss": 0.0435, "step": 9010 }, { "epoch": 6.608058608058608, "grad_norm": 0.83984375, "learning_rate": 9.487820177994404e-05, "loss": 0.0411, "step": 9020 }, { "epoch": 6.615384615384615, "grad_norm": 0.455078125, "learning_rate": 9.486301290397635e-05, "loss": 0.0414, "step": 9030 }, { "epoch": 6.622710622710622, "grad_norm": 0.80859375, "learning_rate": 9.484780277223368e-05, "loss": 0.0434, "step": 9040 }, { "epoch": 6.63003663003663, "grad_norm": 0.54296875, "learning_rate": 9.483257139200372e-05, "loss": 0.0449, "step": 9050 }, { "epoch": 6.637362637362637, "grad_norm": 0.6015625, "learning_rate": 9.481731877058428e-05, "loss": 0.0461, "step": 9060 }, { "epoch": 6.644688644688645, "grad_norm": 0.78515625, "learning_rate": 9.480204491528341e-05, "loss": 0.0448, "step": 9070 }, { "epoch": 6.652014652014652, "grad_norm": 0.58203125, "learning_rate": 9.478674983341932e-05, "loss": 0.0423, "step": 9080 }, { "epoch": 6.65934065934066, "grad_norm": 0.474609375, "learning_rate": 9.47714335323204e-05, "loss": 0.044, "step": 9090 }, { "epoch": 6.666666666666667, "grad_norm": 0.546875, "learning_rate": 9.475609601932517e-05, "loss": 0.0464, "step": 9100 }, { "epoch": 6.673992673992674, "grad_norm": 0.625, "learning_rate": 9.474073730178235e-05, "loss": 0.0495, "step": 9110 }, { "epoch": 6.681318681318682, "grad_norm": 0.69921875, "learning_rate": 9.472535738705078e-05, "loss": 0.047, "step": 9120 }, { "epoch": 6.688644688644689, "grad_norm": 0.51953125, "learning_rate": 9.470995628249951e-05, "loss": 0.0399, "step": 9130 }, { "epoch": 6.695970695970696, "grad_norm": 0.78125, "learning_rate": 9.469453399550771e-05, "loss": 0.0447, "step": 9140 }, { "epoch": 6.7032967032967035, "grad_norm": 0.91015625, "learning_rate": 9.467909053346467e-05, "loss": 0.0542, "step": 9150 }, { "epoch": 6.710622710622711, "grad_norm": 0.625, "learning_rate": 9.466362590376992e-05, "loss": 0.0438, "step": 9160 }, { "epoch": 6.717948717948718, "grad_norm": 1.015625, "learning_rate": 9.464814011383304e-05, "loss": 0.0538, "step": 9170 }, { "epoch": 6.725274725274725, "grad_norm": 0.8515625, "learning_rate": 9.463263317107377e-05, "loss": 0.0486, "step": 9180 }, { "epoch": 6.732600732600733, "grad_norm": 0.6875, "learning_rate": 9.4617105082922e-05, "loss": 0.0438, "step": 9190 }, { "epoch": 6.73992673992674, "grad_norm": 0.578125, "learning_rate": 9.460155585681775e-05, "loss": 0.0474, "step": 9200 }, { "epoch": 6.747252747252747, "grad_norm": 0.9296875, "learning_rate": 9.458598550021116e-05, "loss": 0.0447, "step": 9210 }, { "epoch": 6.754578754578755, "grad_norm": 0.61328125, "learning_rate": 9.45703940205625e-05, "loss": 0.0509, "step": 9220 }, { "epoch": 6.761904761904762, "grad_norm": 0.7890625, "learning_rate": 9.455478142534219e-05, "loss": 0.0444, "step": 9230 }, { "epoch": 6.769230769230769, "grad_norm": 0.82421875, "learning_rate": 9.453914772203069e-05, "loss": 0.0472, "step": 9240 }, { "epoch": 6.7765567765567765, "grad_norm": 0.84375, "learning_rate": 9.452349291811864e-05, "loss": 0.0499, "step": 9250 }, { "epoch": 6.783882783882784, "grad_norm": 0.8203125, "learning_rate": 9.450781702110677e-05, "loss": 0.0449, "step": 9260 }, { "epoch": 6.791208791208791, "grad_norm": 0.609375, "learning_rate": 9.449212003850593e-05, "loss": 0.0482, "step": 9270 }, { "epoch": 6.798534798534798, "grad_norm": 0.57421875, "learning_rate": 9.447640197783704e-05, "loss": 0.043, "step": 9280 }, { "epoch": 6.805860805860806, "grad_norm": 0.6328125, "learning_rate": 9.446066284663113e-05, "loss": 0.0459, "step": 9290 }, { "epoch": 6.813186813186813, "grad_norm": 0.625, "learning_rate": 9.444490265242937e-05, "loss": 0.0448, "step": 9300 }, { "epoch": 6.82051282051282, "grad_norm": 0.92578125, "learning_rate": 9.442912140278294e-05, "loss": 0.046, "step": 9310 }, { "epoch": 6.827838827838828, "grad_norm": 0.8671875, "learning_rate": 9.44133191052532e-05, "loss": 0.0489, "step": 9320 }, { "epoch": 6.835164835164835, "grad_norm": 1.0703125, "learning_rate": 9.439749576741151e-05, "loss": 0.0503, "step": 9330 }, { "epoch": 6.842490842490842, "grad_norm": 0.66796875, "learning_rate": 9.438165139683939e-05, "loss": 0.0481, "step": 9340 }, { "epoch": 6.8498168498168495, "grad_norm": 0.74609375, "learning_rate": 9.436578600112838e-05, "loss": 0.0432, "step": 9350 }, { "epoch": 6.857142857142857, "grad_norm": 0.703125, "learning_rate": 9.434989958788009e-05, "loss": 0.0463, "step": 9360 }, { "epoch": 6.864468864468865, "grad_norm": 0.54296875, "learning_rate": 9.433399216470623e-05, "loss": 0.0508, "step": 9370 }, { "epoch": 6.871794871794872, "grad_norm": 0.7421875, "learning_rate": 9.431806373922859e-05, "loss": 0.0518, "step": 9380 }, { "epoch": 6.8791208791208796, "grad_norm": 0.75390625, "learning_rate": 9.4302114319079e-05, "loss": 0.0494, "step": 9390 }, { "epoch": 6.886446886446887, "grad_norm": 0.66796875, "learning_rate": 9.428614391189932e-05, "loss": 0.044, "step": 9400 }, { "epoch": 6.893772893772894, "grad_norm": 0.66015625, "learning_rate": 9.427015252534155e-05, "loss": 0.0398, "step": 9410 }, { "epoch": 6.9010989010989015, "grad_norm": 0.609375, "learning_rate": 9.425414016706762e-05, "loss": 0.0506, "step": 9420 }, { "epoch": 6.908424908424909, "grad_norm": 0.62890625, "learning_rate": 9.423810684474963e-05, "loss": 0.0425, "step": 9430 }, { "epoch": 6.915750915750916, "grad_norm": 1.0859375, "learning_rate": 9.422205256606963e-05, "loss": 0.0529, "step": 9440 }, { "epoch": 6.923076923076923, "grad_norm": 0.62109375, "learning_rate": 9.42059773387198e-05, "loss": 0.042, "step": 9450 }, { "epoch": 6.930402930402931, "grad_norm": 0.66796875, "learning_rate": 9.418988117040226e-05, "loss": 0.0475, "step": 9460 }, { "epoch": 6.937728937728938, "grad_norm": 0.63671875, "learning_rate": 9.417376406882924e-05, "loss": 0.0445, "step": 9470 }, { "epoch": 6.945054945054945, "grad_norm": 0.671875, "learning_rate": 9.415762604172296e-05, "loss": 0.0437, "step": 9480 }, { "epoch": 6.9523809523809526, "grad_norm": 0.7890625, "learning_rate": 9.41414670968157e-05, "loss": 0.0488, "step": 9490 }, { "epoch": 6.95970695970696, "grad_norm": 1.3984375, "learning_rate": 9.412528724184973e-05, "loss": 0.0472, "step": 9500 }, { "epoch": 6.967032967032967, "grad_norm": 0.7421875, "learning_rate": 9.410908648457733e-05, "loss": 0.0449, "step": 9510 }, { "epoch": 6.9743589743589745, "grad_norm": 0.890625, "learning_rate": 9.40928648327608e-05, "loss": 0.0514, "step": 9520 }, { "epoch": 6.981684981684982, "grad_norm": 0.69140625, "learning_rate": 9.407662229417253e-05, "loss": 0.0468, "step": 9530 }, { "epoch": 6.989010989010989, "grad_norm": 0.58203125, "learning_rate": 9.406035887659483e-05, "loss": 0.0513, "step": 9540 }, { "epoch": 6.996336996336996, "grad_norm": 1.0234375, "learning_rate": 9.404407458781998e-05, "loss": 0.0514, "step": 9550 }, { "epoch": 7.003663003663004, "grad_norm": 0.80859375, "learning_rate": 9.40277694356504e-05, "loss": 0.0502, "step": 9560 }, { "epoch": 7.010989010989011, "grad_norm": 0.859375, "learning_rate": 9.401144342789835e-05, "loss": 0.0474, "step": 9570 }, { "epoch": 7.018315018315018, "grad_norm": 0.6875, "learning_rate": 9.399509657238623e-05, "loss": 0.0504, "step": 9580 }, { "epoch": 7.0256410256410255, "grad_norm": 0.4140625, "learning_rate": 9.39787288769463e-05, "loss": 0.0378, "step": 9590 }, { "epoch": 7.032967032967033, "grad_norm": 0.7734375, "learning_rate": 9.396234034942087e-05, "loss": 0.047, "step": 9600 }, { "epoch": 7.04029304029304, "grad_norm": 1.28125, "learning_rate": 9.394593099766222e-05, "loss": 0.0531, "step": 9610 }, { "epoch": 7.0476190476190474, "grad_norm": 0.87109375, "learning_rate": 9.392950082953262e-05, "loss": 0.0463, "step": 9620 }, { "epoch": 7.054945054945055, "grad_norm": 0.66796875, "learning_rate": 9.391304985290431e-05, "loss": 0.0499, "step": 9630 }, { "epoch": 7.062271062271062, "grad_norm": 0.47265625, "learning_rate": 9.389657807565947e-05, "loss": 0.0444, "step": 9640 }, { "epoch": 7.069597069597069, "grad_norm": 0.5625, "learning_rate": 9.388008550569027e-05, "loss": 0.0442, "step": 9650 }, { "epoch": 7.076923076923077, "grad_norm": 0.82421875, "learning_rate": 9.386357215089888e-05, "loss": 0.0409, "step": 9660 }, { "epoch": 7.084249084249084, "grad_norm": 0.474609375, "learning_rate": 9.384703801919733e-05, "loss": 0.0449, "step": 9670 }, { "epoch": 7.091575091575091, "grad_norm": 0.80078125, "learning_rate": 9.383048311850771e-05, "loss": 0.0501, "step": 9680 }, { "epoch": 7.0989010989010985, "grad_norm": 0.55078125, "learning_rate": 9.3813907456762e-05, "loss": 0.0388, "step": 9690 }, { "epoch": 7.106227106227106, "grad_norm": 0.51171875, "learning_rate": 9.379731104190215e-05, "loss": 0.0395, "step": 9700 }, { "epoch": 7.113553113553113, "grad_norm": 0.7421875, "learning_rate": 9.378069388188003e-05, "loss": 0.0465, "step": 9710 }, { "epoch": 7.1208791208791204, "grad_norm": 0.80078125, "learning_rate": 9.376405598465748e-05, "loss": 0.0427, "step": 9720 }, { "epoch": 7.128205128205128, "grad_norm": 0.99609375, "learning_rate": 9.374739735820628e-05, "loss": 0.0484, "step": 9730 }, { "epoch": 7.135531135531136, "grad_norm": 0.5859375, "learning_rate": 9.37307180105081e-05, "loss": 0.0453, "step": 9740 }, { "epoch": 7.142857142857143, "grad_norm": 0.546875, "learning_rate": 9.371401794955457e-05, "loss": 0.0418, "step": 9750 }, { "epoch": 7.1501831501831505, "grad_norm": 0.65234375, "learning_rate": 9.369729718334721e-05, "loss": 0.0393, "step": 9760 }, { "epoch": 7.157509157509158, "grad_norm": 0.5234375, "learning_rate": 9.368055571989753e-05, "loss": 0.0434, "step": 9770 }, { "epoch": 7.164835164835165, "grad_norm": 0.6875, "learning_rate": 9.366379356722689e-05, "loss": 0.0416, "step": 9780 }, { "epoch": 7.172161172161172, "grad_norm": 0.58203125, "learning_rate": 9.364701073336661e-05, "loss": 0.049, "step": 9790 }, { "epoch": 7.17948717948718, "grad_norm": 0.494140625, "learning_rate": 9.363020722635787e-05, "loss": 0.037, "step": 9800 }, { "epoch": 7.186813186813187, "grad_norm": 0.8046875, "learning_rate": 9.36133830542518e-05, "loss": 0.0456, "step": 9810 }, { "epoch": 7.194139194139194, "grad_norm": 0.70703125, "learning_rate": 9.35965382251094e-05, "loss": 0.0397, "step": 9820 }, { "epoch": 7.201465201465202, "grad_norm": 0.53125, "learning_rate": 9.357967274700159e-05, "loss": 0.0484, "step": 9830 }, { "epoch": 7.208791208791209, "grad_norm": 0.66015625, "learning_rate": 9.356278662800916e-05, "loss": 0.0502, "step": 9840 }, { "epoch": 7.216117216117216, "grad_norm": 0.9375, "learning_rate": 9.354587987622282e-05, "loss": 0.0491, "step": 9850 }, { "epoch": 7.2234432234432235, "grad_norm": 0.64453125, "learning_rate": 9.352895249974314e-05, "loss": 0.0451, "step": 9860 }, { "epoch": 7.230769230769231, "grad_norm": 1.046875, "learning_rate": 9.35120045066806e-05, "loss": 0.042, "step": 9870 }, { "epoch": 7.238095238095238, "grad_norm": 0.65234375, "learning_rate": 9.34950359051555e-05, "loss": 0.0426, "step": 9880 }, { "epoch": 7.245421245421245, "grad_norm": 0.9140625, "learning_rate": 9.347804670329809e-05, "loss": 0.052, "step": 9890 }, { "epoch": 7.252747252747253, "grad_norm": 0.96875, "learning_rate": 9.346103690924845e-05, "loss": 0.0486, "step": 9900 }, { "epoch": 7.26007326007326, "grad_norm": 0.68359375, "learning_rate": 9.34440065311565e-05, "loss": 0.0446, "step": 9910 }, { "epoch": 7.267399267399267, "grad_norm": 0.7421875, "learning_rate": 9.342695557718209e-05, "loss": 0.0487, "step": 9920 }, { "epoch": 7.274725274725275, "grad_norm": 0.46875, "learning_rate": 9.340988405549488e-05, "loss": 0.0377, "step": 9930 }, { "epoch": 7.282051282051282, "grad_norm": 0.5390625, "learning_rate": 9.339279197427437e-05, "loss": 0.0489, "step": 9940 }, { "epoch": 7.289377289377289, "grad_norm": 0.9609375, "learning_rate": 9.337567934170998e-05, "loss": 0.0495, "step": 9950 }, { "epoch": 7.2967032967032965, "grad_norm": 0.75390625, "learning_rate": 9.335854616600092e-05, "loss": 0.0438, "step": 9960 }, { "epoch": 7.304029304029304, "grad_norm": 0.91015625, "learning_rate": 9.334139245535623e-05, "loss": 0.0447, "step": 9970 }, { "epoch": 7.311355311355311, "grad_norm": 0.62890625, "learning_rate": 9.332421821799485e-05, "loss": 0.046, "step": 9980 }, { "epoch": 7.318681318681318, "grad_norm": 0.6640625, "learning_rate": 9.33070234621455e-05, "loss": 0.0441, "step": 9990 }, { "epoch": 7.326007326007326, "grad_norm": 0.88671875, "learning_rate": 9.328980819604676e-05, "loss": 0.0527, "step": 10000 }, { "epoch": 7.333333333333333, "grad_norm": 0.86328125, "learning_rate": 9.327257242794701e-05, "loss": 0.0445, "step": 10010 }, { "epoch": 7.34065934065934, "grad_norm": 0.609375, "learning_rate": 9.32553161661045e-05, "loss": 0.0442, "step": 10020 }, { "epoch": 7.347985347985348, "grad_norm": 0.859375, "learning_rate": 9.323803941878726e-05, "loss": 0.0468, "step": 10030 }, { "epoch": 7.355311355311355, "grad_norm": 1.0078125, "learning_rate": 9.322074219427314e-05, "loss": 0.0519, "step": 10040 }, { "epoch": 7.362637362637362, "grad_norm": 0.55859375, "learning_rate": 9.320342450084982e-05, "loss": 0.0429, "step": 10050 }, { "epoch": 7.36996336996337, "grad_norm": 0.5625, "learning_rate": 9.318608634681477e-05, "loss": 0.0485, "step": 10060 }, { "epoch": 7.377289377289378, "grad_norm": 0.55078125, "learning_rate": 9.316872774047526e-05, "loss": 0.0452, "step": 10070 }, { "epoch": 7.384615384615385, "grad_norm": 0.734375, "learning_rate": 9.315134869014836e-05, "loss": 0.0467, "step": 10080 }, { "epoch": 7.391941391941392, "grad_norm": 0.50390625, "learning_rate": 9.313394920416098e-05, "loss": 0.0426, "step": 10090 }, { "epoch": 7.3992673992674, "grad_norm": 0.7734375, "learning_rate": 9.311652929084978e-05, "loss": 0.0421, "step": 10100 }, { "epoch": 7.406593406593407, "grad_norm": 0.62109375, "learning_rate": 9.309908895856118e-05, "loss": 0.0446, "step": 10110 }, { "epoch": 7.413919413919414, "grad_norm": 0.56640625, "learning_rate": 9.30816282156514e-05, "loss": 0.0415, "step": 10120 }, { "epoch": 7.4212454212454215, "grad_norm": 0.78515625, "learning_rate": 9.306414707048652e-05, "loss": 0.0447, "step": 10130 }, { "epoch": 7.428571428571429, "grad_norm": 0.66796875, "learning_rate": 9.30466455314423e-05, "loss": 0.0413, "step": 10140 }, { "epoch": 7.435897435897436, "grad_norm": 0.51953125, "learning_rate": 9.302912360690427e-05, "loss": 0.0423, "step": 10150 }, { "epoch": 7.443223443223443, "grad_norm": 0.703125, "learning_rate": 9.301158130526779e-05, "loss": 0.0416, "step": 10160 }, { "epoch": 7.450549450549451, "grad_norm": 0.4921875, "learning_rate": 9.299401863493798e-05, "loss": 0.0451, "step": 10170 }, { "epoch": 7.457875457875458, "grad_norm": 0.83984375, "learning_rate": 9.297643560432961e-05, "loss": 0.0434, "step": 10180 }, { "epoch": 7.465201465201465, "grad_norm": 0.7265625, "learning_rate": 9.295883222186739e-05, "loss": 0.0452, "step": 10190 }, { "epoch": 7.472527472527473, "grad_norm": 0.51953125, "learning_rate": 9.294120849598558e-05, "loss": 0.0427, "step": 10200 }, { "epoch": 7.47985347985348, "grad_norm": 0.5, "learning_rate": 9.292356443512835e-05, "loss": 0.0476, "step": 10210 }, { "epoch": 7.487179487179487, "grad_norm": 0.93359375, "learning_rate": 9.290590004774951e-05, "loss": 0.0478, "step": 10220 }, { "epoch": 7.4945054945054945, "grad_norm": 0.62890625, "learning_rate": 9.288821534231269e-05, "loss": 0.045, "step": 10230 }, { "epoch": 7.501831501831502, "grad_norm": 0.69921875, "learning_rate": 9.287051032729118e-05, "loss": 0.0404, "step": 10240 }, { "epoch": 7.509157509157509, "grad_norm": 0.75390625, "learning_rate": 9.285278501116803e-05, "loss": 0.0435, "step": 10250 }, { "epoch": 7.516483516483516, "grad_norm": 0.71484375, "learning_rate": 9.283503940243605e-05, "loss": 0.0439, "step": 10260 }, { "epoch": 7.523809523809524, "grad_norm": 0.546875, "learning_rate": 9.281727350959772e-05, "loss": 0.0405, "step": 10270 }, { "epoch": 7.531135531135531, "grad_norm": 1.0859375, "learning_rate": 9.279948734116526e-05, "loss": 0.0532, "step": 10280 }, { "epoch": 7.538461538461538, "grad_norm": 0.578125, "learning_rate": 9.278168090566062e-05, "loss": 0.0466, "step": 10290 }, { "epoch": 7.545787545787546, "grad_norm": 0.62109375, "learning_rate": 9.276385421161544e-05, "loss": 0.0415, "step": 10300 }, { "epoch": 7.553113553113553, "grad_norm": 0.61328125, "learning_rate": 9.274600726757108e-05, "loss": 0.0425, "step": 10310 }, { "epoch": 7.56043956043956, "grad_norm": 0.5625, "learning_rate": 9.27281400820786e-05, "loss": 0.0469, "step": 10320 }, { "epoch": 7.5677655677655675, "grad_norm": 0.50390625, "learning_rate": 9.271025266369872e-05, "loss": 0.0408, "step": 10330 }, { "epoch": 7.575091575091575, "grad_norm": 0.66796875, "learning_rate": 9.269234502100193e-05, "loss": 0.0398, "step": 10340 }, { "epoch": 7.582417582417582, "grad_norm": 0.56640625, "learning_rate": 9.267441716256837e-05, "loss": 0.0424, "step": 10350 }, { "epoch": 7.589743589743589, "grad_norm": 0.546875, "learning_rate": 9.265646909698785e-05, "loss": 0.0373, "step": 10360 }, { "epoch": 7.597069597069597, "grad_norm": 0.73046875, "learning_rate": 9.263850083285985e-05, "loss": 0.0455, "step": 10370 }, { "epoch": 7.604395604395604, "grad_norm": 0.703125, "learning_rate": 9.262051237879361e-05, "loss": 0.0429, "step": 10380 }, { "epoch": 7.611721611721611, "grad_norm": 0.5546875, "learning_rate": 9.260250374340795e-05, "loss": 0.0411, "step": 10390 }, { "epoch": 7.619047619047619, "grad_norm": 0.5625, "learning_rate": 9.258447493533141e-05, "loss": 0.0465, "step": 10400 }, { "epoch": 7.626373626373626, "grad_norm": 0.55078125, "learning_rate": 9.256642596320219e-05, "loss": 0.0453, "step": 10410 }, { "epoch": 7.633699633699633, "grad_norm": 0.59375, "learning_rate": 9.254835683566812e-05, "loss": 0.0445, "step": 10420 }, { "epoch": 7.641025641025641, "grad_norm": 0.85546875, "learning_rate": 9.253026756138674e-05, "loss": 0.0408, "step": 10430 }, { "epoch": 7.648351648351649, "grad_norm": 0.6171875, "learning_rate": 9.251215814902519e-05, "loss": 0.0461, "step": 10440 }, { "epoch": 7.655677655677656, "grad_norm": 0.66015625, "learning_rate": 9.249402860726033e-05, "loss": 0.0457, "step": 10450 }, { "epoch": 7.663003663003663, "grad_norm": 0.65234375, "learning_rate": 9.247587894477856e-05, "loss": 0.0404, "step": 10460 }, { "epoch": 7.670329670329671, "grad_norm": 0.58203125, "learning_rate": 9.245770917027602e-05, "loss": 0.0445, "step": 10470 }, { "epoch": 7.677655677655678, "grad_norm": 0.5546875, "learning_rate": 9.243951929245843e-05, "loss": 0.0454, "step": 10480 }, { "epoch": 7.684981684981685, "grad_norm": 0.6953125, "learning_rate": 9.242130932004114e-05, "loss": 0.044, "step": 10490 }, { "epoch": 7.6923076923076925, "grad_norm": 0.484375, "learning_rate": 9.240307926174916e-05, "loss": 0.0414, "step": 10500 }, { "epoch": 7.6996336996337, "grad_norm": 0.66015625, "learning_rate": 9.238482912631713e-05, "loss": 0.0461, "step": 10510 }, { "epoch": 7.706959706959707, "grad_norm": 0.94921875, "learning_rate": 9.236655892248927e-05, "loss": 0.0493, "step": 10520 }, { "epoch": 7.714285714285714, "grad_norm": 0.69921875, "learning_rate": 9.234826865901942e-05, "loss": 0.0463, "step": 10530 }, { "epoch": 7.721611721611722, "grad_norm": 0.6171875, "learning_rate": 9.232995834467107e-05, "loss": 0.0413, "step": 10540 }, { "epoch": 7.728937728937729, "grad_norm": 0.6328125, "learning_rate": 9.231162798821726e-05, "loss": 0.0376, "step": 10550 }, { "epoch": 7.736263736263736, "grad_norm": 1.0625, "learning_rate": 9.229327759844071e-05, "loss": 0.0449, "step": 10560 }, { "epoch": 7.743589743589744, "grad_norm": 0.55078125, "learning_rate": 9.227490718413366e-05, "loss": 0.044, "step": 10570 }, { "epoch": 7.750915750915751, "grad_norm": 0.63671875, "learning_rate": 9.2256516754098e-05, "loss": 0.0378, "step": 10580 }, { "epoch": 7.758241758241758, "grad_norm": 0.8359375, "learning_rate": 9.223810631714517e-05, "loss": 0.045, "step": 10590 }, { "epoch": 7.7655677655677655, "grad_norm": 0.62890625, "learning_rate": 9.221967588209623e-05, "loss": 0.0444, "step": 10600 }, { "epoch": 7.772893772893773, "grad_norm": 0.462890625, "learning_rate": 9.220122545778176e-05, "loss": 0.0435, "step": 10610 }, { "epoch": 7.78021978021978, "grad_norm": 0.494140625, "learning_rate": 9.218275505304204e-05, "loss": 0.0399, "step": 10620 }, { "epoch": 7.787545787545787, "grad_norm": 0.63671875, "learning_rate": 9.216426467672679e-05, "loss": 0.0445, "step": 10630 }, { "epoch": 7.794871794871795, "grad_norm": 0.65625, "learning_rate": 9.21457543376954e-05, "loss": 0.0403, "step": 10640 }, { "epoch": 7.802197802197802, "grad_norm": 0.5546875, "learning_rate": 9.212722404481674e-05, "loss": 0.0403, "step": 10650 }, { "epoch": 7.809523809523809, "grad_norm": 0.72265625, "learning_rate": 9.210867380696929e-05, "loss": 0.0423, "step": 10660 }, { "epoch": 7.816849816849817, "grad_norm": 0.55859375, "learning_rate": 9.20901036330411e-05, "loss": 0.0445, "step": 10670 }, { "epoch": 7.824175824175824, "grad_norm": 0.6171875, "learning_rate": 9.207151353192971e-05, "loss": 0.0451, "step": 10680 }, { "epoch": 7.831501831501831, "grad_norm": 0.66015625, "learning_rate": 9.205290351254227e-05, "loss": 0.0414, "step": 10690 }, { "epoch": 7.8388278388278385, "grad_norm": 0.490234375, "learning_rate": 9.203427358379548e-05, "loss": 0.0404, "step": 10700 }, { "epoch": 7.846153846153846, "grad_norm": 0.953125, "learning_rate": 9.201562375461551e-05, "loss": 0.0448, "step": 10710 }, { "epoch": 7.853479853479853, "grad_norm": 0.53125, "learning_rate": 9.19969540339381e-05, "loss": 0.0396, "step": 10720 }, { "epoch": 7.860805860805861, "grad_norm": 0.60546875, "learning_rate": 9.197826443070855e-05, "loss": 0.038, "step": 10730 }, { "epoch": 7.868131868131869, "grad_norm": 0.7265625, "learning_rate": 9.195955495388167e-05, "loss": 0.0477, "step": 10740 }, { "epoch": 7.875457875457876, "grad_norm": 0.5234375, "learning_rate": 9.194082561242175e-05, "loss": 0.0415, "step": 10750 }, { "epoch": 7.882783882783883, "grad_norm": 0.6875, "learning_rate": 9.192207641530267e-05, "loss": 0.0423, "step": 10760 }, { "epoch": 7.8901098901098905, "grad_norm": 0.64453125, "learning_rate": 9.190330737150774e-05, "loss": 0.042, "step": 10770 }, { "epoch": 7.897435897435898, "grad_norm": 0.53125, "learning_rate": 9.188451849002987e-05, "loss": 0.0405, "step": 10780 }, { "epoch": 7.904761904761905, "grad_norm": 0.58984375, "learning_rate": 9.18657097798714e-05, "loss": 0.0427, "step": 10790 }, { "epoch": 7.912087912087912, "grad_norm": 0.83203125, "learning_rate": 9.184688125004421e-05, "loss": 0.0438, "step": 10800 }, { "epoch": 7.91941391941392, "grad_norm": 0.53125, "learning_rate": 9.182803290956967e-05, "loss": 0.0402, "step": 10810 }, { "epoch": 7.926739926739927, "grad_norm": 0.81640625, "learning_rate": 9.180916476747865e-05, "loss": 0.0471, "step": 10820 }, { "epoch": 7.934065934065934, "grad_norm": 0.70703125, "learning_rate": 9.179027683281146e-05, "loss": 0.0432, "step": 10830 }, { "epoch": 7.941391941391942, "grad_norm": 0.59765625, "learning_rate": 9.177136911461797e-05, "loss": 0.0442, "step": 10840 }, { "epoch": 7.948717948717949, "grad_norm": 0.55859375, "learning_rate": 9.175244162195745e-05, "loss": 0.0433, "step": 10850 }, { "epoch": 7.956043956043956, "grad_norm": 0.5546875, "learning_rate": 9.17334943638987e-05, "loss": 0.0457, "step": 10860 }, { "epoch": 7.9633699633699635, "grad_norm": 0.423828125, "learning_rate": 9.171452734951999e-05, "loss": 0.04, "step": 10870 }, { "epoch": 7.970695970695971, "grad_norm": 0.74609375, "learning_rate": 9.169554058790903e-05, "loss": 0.0446, "step": 10880 }, { "epoch": 7.978021978021978, "grad_norm": 0.8515625, "learning_rate": 9.167653408816297e-05, "loss": 0.0463, "step": 10890 }, { "epoch": 7.985347985347985, "grad_norm": 0.78515625, "learning_rate": 9.165750785938849e-05, "loss": 0.0421, "step": 10900 }, { "epoch": 7.992673992673993, "grad_norm": 0.4921875, "learning_rate": 9.163846191070163e-05, "loss": 0.0408, "step": 10910 }, { "epoch": 8.0, "grad_norm": 0.484375, "learning_rate": 9.1619396251228e-05, "loss": 0.0395, "step": 10920 }, { "epoch": 8.007326007326007, "grad_norm": 0.6953125, "learning_rate": 9.160031089010251e-05, "loss": 0.0453, "step": 10930 }, { "epoch": 8.014652014652015, "grad_norm": 0.578125, "learning_rate": 9.158120583646964e-05, "loss": 0.0403, "step": 10940 }, { "epoch": 8.021978021978022, "grad_norm": 0.69921875, "learning_rate": 9.156208109948321e-05, "loss": 0.0436, "step": 10950 }, { "epoch": 8.02930402930403, "grad_norm": 0.546875, "learning_rate": 9.154293668830653e-05, "loss": 0.0397, "step": 10960 }, { "epoch": 8.036630036630036, "grad_norm": 0.57421875, "learning_rate": 9.152377261211232e-05, "loss": 0.0398, "step": 10970 }, { "epoch": 8.043956043956044, "grad_norm": 0.55859375, "learning_rate": 9.150458888008269e-05, "loss": 0.0402, "step": 10980 }, { "epoch": 8.051282051282051, "grad_norm": 0.7578125, "learning_rate": 9.148538550140922e-05, "loss": 0.0472, "step": 10990 }, { "epoch": 8.058608058608058, "grad_norm": 0.546875, "learning_rate": 9.146616248529286e-05, "loss": 0.0486, "step": 11000 }, { "epoch": 8.065934065934066, "grad_norm": 0.59375, "learning_rate": 9.1446919840944e-05, "loss": 0.0396, "step": 11010 }, { "epoch": 8.073260073260073, "grad_norm": 0.515625, "learning_rate": 9.142765757758242e-05, "loss": 0.04, "step": 11020 }, { "epoch": 8.08058608058608, "grad_norm": 0.6640625, "learning_rate": 9.140837570443732e-05, "loss": 0.0463, "step": 11030 }, { "epoch": 8.087912087912088, "grad_norm": 0.5234375, "learning_rate": 9.138907423074724e-05, "loss": 0.0407, "step": 11040 }, { "epoch": 8.095238095238095, "grad_norm": 0.7421875, "learning_rate": 9.136975316576019e-05, "loss": 0.0457, "step": 11050 }, { "epoch": 8.102564102564102, "grad_norm": 0.625, "learning_rate": 9.135041251873349e-05, "loss": 0.037, "step": 11060 }, { "epoch": 8.10989010989011, "grad_norm": 0.67578125, "learning_rate": 9.133105229893388e-05, "loss": 0.0471, "step": 11070 }, { "epoch": 8.117216117216117, "grad_norm": 0.5859375, "learning_rate": 9.131167251563752e-05, "loss": 0.0443, "step": 11080 }, { "epoch": 8.124542124542124, "grad_norm": 0.6328125, "learning_rate": 9.129227317812986e-05, "loss": 0.0442, "step": 11090 }, { "epoch": 8.131868131868131, "grad_norm": 0.68359375, "learning_rate": 9.127285429570577e-05, "loss": 0.0419, "step": 11100 }, { "epoch": 8.139194139194139, "grad_norm": 0.68359375, "learning_rate": 9.125341587766946e-05, "loss": 0.0472, "step": 11110 }, { "epoch": 8.146520146520146, "grad_norm": 0.76953125, "learning_rate": 9.123395793333454e-05, "loss": 0.0443, "step": 11120 }, { "epoch": 8.153846153846153, "grad_norm": 0.4453125, "learning_rate": 9.121448047202395e-05, "loss": 0.0403, "step": 11130 }, { "epoch": 8.16117216117216, "grad_norm": 0.640625, "learning_rate": 9.119498350306995e-05, "loss": 0.0476, "step": 11140 }, { "epoch": 8.168498168498168, "grad_norm": 0.75390625, "learning_rate": 9.11754670358142e-05, "loss": 0.0526, "step": 11150 }, { "epoch": 8.175824175824175, "grad_norm": 0.62890625, "learning_rate": 9.115593107960769e-05, "loss": 0.0496, "step": 11160 }, { "epoch": 8.183150183150182, "grad_norm": 0.671875, "learning_rate": 9.11363756438107e-05, "loss": 0.0452, "step": 11170 }, { "epoch": 8.19047619047619, "grad_norm": 0.55078125, "learning_rate": 9.111680073779291e-05, "loss": 0.0388, "step": 11180 }, { "epoch": 8.197802197802197, "grad_norm": 0.87890625, "learning_rate": 9.109720637093331e-05, "loss": 0.049, "step": 11190 }, { "epoch": 8.205128205128204, "grad_norm": 0.69921875, "learning_rate": 9.107759255262017e-05, "loss": 0.0492, "step": 11200 }, { "epoch": 8.212454212454212, "grad_norm": 0.53515625, "learning_rate": 9.105795929225115e-05, "loss": 0.0496, "step": 11210 }, { "epoch": 8.219780219780219, "grad_norm": 0.671875, "learning_rate": 9.103830659923315e-05, "loss": 0.0455, "step": 11220 }, { "epoch": 8.227106227106226, "grad_norm": 0.90625, "learning_rate": 9.101863448298244e-05, "loss": 0.0466, "step": 11230 }, { "epoch": 8.234432234432234, "grad_norm": 0.5625, "learning_rate": 9.099894295292459e-05, "loss": 0.0391, "step": 11240 }, { "epoch": 8.241758241758241, "grad_norm": 0.609375, "learning_rate": 9.097923201849445e-05, "loss": 0.0487, "step": 11250 }, { "epoch": 8.249084249084248, "grad_norm": 0.44140625, "learning_rate": 9.095950168913616e-05, "loss": 0.0395, "step": 11260 }, { "epoch": 8.256410256410255, "grad_norm": 0.486328125, "learning_rate": 9.093975197430318e-05, "loss": 0.0384, "step": 11270 }, { "epoch": 8.263736263736265, "grad_norm": 0.419921875, "learning_rate": 9.091998288345824e-05, "loss": 0.039, "step": 11280 }, { "epoch": 8.271062271062272, "grad_norm": 0.75390625, "learning_rate": 9.090019442607336e-05, "loss": 0.0442, "step": 11290 }, { "epoch": 8.27838827838828, "grad_norm": 0.609375, "learning_rate": 9.088038661162985e-05, "loss": 0.0424, "step": 11300 }, { "epoch": 8.285714285714286, "grad_norm": 0.734375, "learning_rate": 9.08605594496183e-05, "loss": 0.0431, "step": 11310 }, { "epoch": 8.293040293040294, "grad_norm": 0.6796875, "learning_rate": 9.08407129495385e-05, "loss": 0.0426, "step": 11320 }, { "epoch": 8.300366300366301, "grad_norm": 0.5625, "learning_rate": 9.08208471208996e-05, "loss": 0.0462, "step": 11330 }, { "epoch": 8.307692307692308, "grad_norm": 0.98828125, "learning_rate": 9.080096197321997e-05, "loss": 0.0479, "step": 11340 }, { "epoch": 8.315018315018316, "grad_norm": 0.63671875, "learning_rate": 9.078105751602723e-05, "loss": 0.0408, "step": 11350 }, { "epoch": 8.322344322344323, "grad_norm": 0.57421875, "learning_rate": 9.076113375885823e-05, "loss": 0.0395, "step": 11360 }, { "epoch": 8.32967032967033, "grad_norm": 0.5703125, "learning_rate": 9.074119071125914e-05, "loss": 0.0495, "step": 11370 }, { "epoch": 8.336996336996338, "grad_norm": 0.57421875, "learning_rate": 9.072122838278531e-05, "loss": 0.0378, "step": 11380 }, { "epoch": 8.344322344322345, "grad_norm": 0.50390625, "learning_rate": 9.070124678300135e-05, "loss": 0.0377, "step": 11390 }, { "epoch": 8.351648351648352, "grad_norm": 0.7109375, "learning_rate": 9.068124592148109e-05, "loss": 0.0374, "step": 11400 }, { "epoch": 8.35897435897436, "grad_norm": 0.51171875, "learning_rate": 9.066122580780761e-05, "loss": 0.0433, "step": 11410 }, { "epoch": 8.366300366300367, "grad_norm": 0.71484375, "learning_rate": 9.064118645157322e-05, "loss": 0.0456, "step": 11420 }, { "epoch": 8.373626373626374, "grad_norm": 0.60546875, "learning_rate": 9.062112786237939e-05, "loss": 0.0407, "step": 11430 }, { "epoch": 8.380952380952381, "grad_norm": 0.59765625, "learning_rate": 9.060105004983687e-05, "loss": 0.0397, "step": 11440 }, { "epoch": 8.388278388278389, "grad_norm": 0.5859375, "learning_rate": 9.05809530235656e-05, "loss": 0.042, "step": 11450 }, { "epoch": 8.395604395604396, "grad_norm": 1.1875, "learning_rate": 9.056083679319473e-05, "loss": 0.0521, "step": 11460 }, { "epoch": 8.402930402930403, "grad_norm": 0.388671875, "learning_rate": 9.05407013683626e-05, "loss": 0.0452, "step": 11470 }, { "epoch": 8.41025641025641, "grad_norm": 0.65234375, "learning_rate": 9.052054675871675e-05, "loss": 0.0422, "step": 11480 }, { "epoch": 8.417582417582418, "grad_norm": 0.546875, "learning_rate": 9.050037297391391e-05, "loss": 0.036, "step": 11490 }, { "epoch": 8.424908424908425, "grad_norm": 0.91796875, "learning_rate": 9.048018002362001e-05, "loss": 0.0438, "step": 11500 }, { "epoch": 8.432234432234432, "grad_norm": 0.578125, "learning_rate": 9.045996791751016e-05, "loss": 0.0416, "step": 11510 }, { "epoch": 8.43956043956044, "grad_norm": 0.474609375, "learning_rate": 9.043973666526864e-05, "loss": 0.0401, "step": 11520 }, { "epoch": 8.446886446886447, "grad_norm": 0.60546875, "learning_rate": 9.041948627658892e-05, "loss": 0.0449, "step": 11530 }, { "epoch": 8.454212454212454, "grad_norm": 0.55078125, "learning_rate": 9.039921676117357e-05, "loss": 0.0377, "step": 11540 }, { "epoch": 8.461538461538462, "grad_norm": 0.74609375, "learning_rate": 9.037892812873444e-05, "loss": 0.044, "step": 11550 }, { "epoch": 8.468864468864469, "grad_norm": 0.78125, "learning_rate": 9.035862038899246e-05, "loss": 0.0422, "step": 11560 }, { "epoch": 8.476190476190476, "grad_norm": 0.578125, "learning_rate": 9.033829355167775e-05, "loss": 0.043, "step": 11570 }, { "epoch": 8.483516483516484, "grad_norm": 0.63671875, "learning_rate": 9.031794762652952e-05, "loss": 0.0377, "step": 11580 }, { "epoch": 8.49084249084249, "grad_norm": 0.51171875, "learning_rate": 9.029758262329621e-05, "loss": 0.0449, "step": 11590 }, { "epoch": 8.498168498168498, "grad_norm": 0.59765625, "learning_rate": 9.027719855173536e-05, "loss": 0.0396, "step": 11600 }, { "epoch": 8.505494505494505, "grad_norm": 0.431640625, "learning_rate": 9.025679542161361e-05, "loss": 0.0429, "step": 11610 }, { "epoch": 8.512820512820513, "grad_norm": 0.859375, "learning_rate": 9.023637324270682e-05, "loss": 0.0441, "step": 11620 }, { "epoch": 8.52014652014652, "grad_norm": 0.68359375, "learning_rate": 9.02159320247999e-05, "loss": 0.0402, "step": 11630 }, { "epoch": 8.527472527472527, "grad_norm": 0.5234375, "learning_rate": 9.019547177768692e-05, "loss": 0.044, "step": 11640 }, { "epoch": 8.534798534798535, "grad_norm": 0.55078125, "learning_rate": 9.017499251117104e-05, "loss": 0.0405, "step": 11650 }, { "epoch": 8.542124542124542, "grad_norm": 0.4453125, "learning_rate": 9.015449423506454e-05, "loss": 0.0384, "step": 11660 }, { "epoch": 8.54945054945055, "grad_norm": 0.7890625, "learning_rate": 9.013397695918884e-05, "loss": 0.0447, "step": 11670 }, { "epoch": 8.556776556776557, "grad_norm": 0.482421875, "learning_rate": 9.011344069337443e-05, "loss": 0.0375, "step": 11680 }, { "epoch": 8.564102564102564, "grad_norm": 0.69140625, "learning_rate": 9.009288544746091e-05, "loss": 0.0408, "step": 11690 }, { "epoch": 8.571428571428571, "grad_norm": 1.25, "learning_rate": 9.007231123129697e-05, "loss": 0.0473, "step": 11700 }, { "epoch": 8.578754578754578, "grad_norm": 0.5546875, "learning_rate": 9.005171805474039e-05, "loss": 0.0492, "step": 11710 }, { "epoch": 8.586080586080586, "grad_norm": 0.55078125, "learning_rate": 9.003110592765805e-05, "loss": 0.0378, "step": 11720 }, { "epoch": 8.593406593406593, "grad_norm": 0.65625, "learning_rate": 9.001047485992589e-05, "loss": 0.0403, "step": 11730 }, { "epoch": 8.6007326007326, "grad_norm": 0.59765625, "learning_rate": 8.998982486142891e-05, "loss": 0.0388, "step": 11740 }, { "epoch": 8.608058608058608, "grad_norm": 0.5625, "learning_rate": 8.996915594206124e-05, "loss": 0.0415, "step": 11750 }, { "epoch": 8.615384615384615, "grad_norm": 0.6796875, "learning_rate": 8.994846811172603e-05, "loss": 0.0432, "step": 11760 }, { "epoch": 8.622710622710622, "grad_norm": 0.5625, "learning_rate": 8.992776138033547e-05, "loss": 0.0362, "step": 11770 }, { "epoch": 8.63003663003663, "grad_norm": 0.5078125, "learning_rate": 8.990703575781085e-05, "loss": 0.036, "step": 11780 }, { "epoch": 8.637362637362637, "grad_norm": 0.50390625, "learning_rate": 8.988629125408248e-05, "loss": 0.0414, "step": 11790 }, { "epoch": 8.644688644688644, "grad_norm": 0.578125, "learning_rate": 8.986552787908977e-05, "loss": 0.04, "step": 11800 }, { "epoch": 8.652014652014651, "grad_norm": 0.58203125, "learning_rate": 8.984474564278111e-05, "loss": 0.0409, "step": 11810 }, { "epoch": 8.659340659340659, "grad_norm": 0.60546875, "learning_rate": 8.982394455511394e-05, "loss": 0.0453, "step": 11820 }, { "epoch": 8.666666666666666, "grad_norm": 0.5625, "learning_rate": 8.980312462605478e-05, "loss": 0.041, "step": 11830 }, { "epoch": 8.673992673992673, "grad_norm": 0.5859375, "learning_rate": 8.978228586557912e-05, "loss": 0.0403, "step": 11840 }, { "epoch": 8.68131868131868, "grad_norm": 0.455078125, "learning_rate": 8.976142828367148e-05, "loss": 0.044, "step": 11850 }, { "epoch": 8.688644688644688, "grad_norm": 0.52734375, "learning_rate": 8.974055189032542e-05, "loss": 0.0466, "step": 11860 }, { "epoch": 8.695970695970695, "grad_norm": 0.62890625, "learning_rate": 8.971965669554352e-05, "loss": 0.0471, "step": 11870 }, { "epoch": 8.703296703296703, "grad_norm": 0.8359375, "learning_rate": 8.969874270933734e-05, "loss": 0.0385, "step": 11880 }, { "epoch": 8.71062271062271, "grad_norm": 0.76953125, "learning_rate": 8.967780994172746e-05, "loss": 0.0426, "step": 11890 }, { "epoch": 8.717948717948717, "grad_norm": 0.828125, "learning_rate": 8.965685840274346e-05, "loss": 0.0454, "step": 11900 }, { "epoch": 8.725274725274724, "grad_norm": 0.53125, "learning_rate": 8.96358881024239e-05, "loss": 0.0421, "step": 11910 }, { "epoch": 8.732600732600732, "grad_norm": 0.625, "learning_rate": 8.961489905081634e-05, "loss": 0.0405, "step": 11920 }, { "epoch": 8.73992673992674, "grad_norm": 0.5, "learning_rate": 8.959389125797733e-05, "loss": 0.0361, "step": 11930 }, { "epoch": 8.747252747252748, "grad_norm": 0.51953125, "learning_rate": 8.957286473397239e-05, "loss": 0.0428, "step": 11940 }, { "epoch": 8.754578754578755, "grad_norm": 0.5078125, "learning_rate": 8.955181948887602e-05, "loss": 0.0384, "step": 11950 }, { "epoch": 8.761904761904763, "grad_norm": 0.50390625, "learning_rate": 8.95307555327717e-05, "loss": 0.0404, "step": 11960 }, { "epoch": 8.76923076923077, "grad_norm": 0.58984375, "learning_rate": 8.950967287575182e-05, "loss": 0.0452, "step": 11970 }, { "epoch": 8.776556776556777, "grad_norm": 0.6328125, "learning_rate": 8.948857152791781e-05, "loss": 0.04, "step": 11980 }, { "epoch": 8.783882783882785, "grad_norm": 0.515625, "learning_rate": 8.946745149938e-05, "loss": 0.0409, "step": 11990 }, { "epoch": 8.791208791208792, "grad_norm": 0.58203125, "learning_rate": 8.944631280025773e-05, "loss": 0.0371, "step": 12000 }, { "epoch": 8.7985347985348, "grad_norm": 0.484375, "learning_rate": 8.942515544067919e-05, "loss": 0.0412, "step": 12010 }, { "epoch": 8.805860805860807, "grad_norm": 1.1796875, "learning_rate": 8.940397943078157e-05, "loss": 0.0489, "step": 12020 }, { "epoch": 8.813186813186814, "grad_norm": 0.52734375, "learning_rate": 8.938278478071103e-05, "loss": 0.0405, "step": 12030 }, { "epoch": 8.820512820512821, "grad_norm": 0.53125, "learning_rate": 8.936157150062257e-05, "loss": 0.0389, "step": 12040 }, { "epoch": 8.827838827838828, "grad_norm": 0.7890625, "learning_rate": 8.934033960068024e-05, "loss": 0.0427, "step": 12050 }, { "epoch": 8.835164835164836, "grad_norm": 1.28125, "learning_rate": 8.931908909105688e-05, "loss": 0.0475, "step": 12060 }, { "epoch": 8.842490842490843, "grad_norm": 0.6796875, "learning_rate": 8.92978199819343e-05, "loss": 0.0419, "step": 12070 }, { "epoch": 8.84981684981685, "grad_norm": 0.5703125, "learning_rate": 8.927653228350329e-05, "loss": 0.0391, "step": 12080 }, { "epoch": 8.857142857142858, "grad_norm": 0.515625, "learning_rate": 8.925522600596343e-05, "loss": 0.0372, "step": 12090 }, { "epoch": 8.864468864468865, "grad_norm": 0.61328125, "learning_rate": 8.923390115952328e-05, "loss": 0.0437, "step": 12100 }, { "epoch": 8.871794871794872, "grad_norm": 0.5625, "learning_rate": 8.921255775440027e-05, "loss": 0.0411, "step": 12110 }, { "epoch": 8.87912087912088, "grad_norm": 0.63671875, "learning_rate": 8.91911958008207e-05, "loss": 0.0447, "step": 12120 }, { "epoch": 8.886446886446887, "grad_norm": 0.6953125, "learning_rate": 8.916981530901983e-05, "loss": 0.0379, "step": 12130 }, { "epoch": 8.893772893772894, "grad_norm": 0.55078125, "learning_rate": 8.914841628924172e-05, "loss": 0.0401, "step": 12140 }, { "epoch": 8.901098901098901, "grad_norm": 0.5234375, "learning_rate": 8.912699875173935e-05, "loss": 0.0417, "step": 12150 }, { "epoch": 8.908424908424909, "grad_norm": 0.609375, "learning_rate": 8.910556270677459e-05, "loss": 0.0488, "step": 12160 }, { "epoch": 8.915750915750916, "grad_norm": 0.5390625, "learning_rate": 8.908410816461811e-05, "loss": 0.0388, "step": 12170 }, { "epoch": 8.923076923076923, "grad_norm": 0.65234375, "learning_rate": 8.906263513554951e-05, "loss": 0.0425, "step": 12180 }, { "epoch": 8.93040293040293, "grad_norm": 0.9375, "learning_rate": 8.904114362985722e-05, "loss": 0.0442, "step": 12190 }, { "epoch": 8.937728937728938, "grad_norm": 0.64453125, "learning_rate": 8.901963365783852e-05, "loss": 0.0364, "step": 12200 }, { "epoch": 8.945054945054945, "grad_norm": 0.94140625, "learning_rate": 8.899810522979956e-05, "loss": 0.0462, "step": 12210 }, { "epoch": 8.952380952380953, "grad_norm": 0.51171875, "learning_rate": 8.897655835605529e-05, "loss": 0.043, "step": 12220 }, { "epoch": 8.95970695970696, "grad_norm": 0.88671875, "learning_rate": 8.895499304692953e-05, "loss": 0.0443, "step": 12230 }, { "epoch": 8.967032967032967, "grad_norm": 0.7578125, "learning_rate": 8.893340931275495e-05, "loss": 0.0394, "step": 12240 }, { "epoch": 8.974358974358974, "grad_norm": 0.69140625, "learning_rate": 8.891180716387302e-05, "loss": 0.0426, "step": 12250 }, { "epoch": 8.981684981684982, "grad_norm": 0.84765625, "learning_rate": 8.889018661063401e-05, "loss": 0.0449, "step": 12260 }, { "epoch": 8.989010989010989, "grad_norm": 0.89453125, "learning_rate": 8.886854766339706e-05, "loss": 0.0462, "step": 12270 }, { "epoch": 8.996336996336996, "grad_norm": 1.140625, "learning_rate": 8.88468903325301e-05, "loss": 0.0476, "step": 12280 }, { "epoch": 9.003663003663004, "grad_norm": 0.6953125, "learning_rate": 8.882521462840983e-05, "loss": 0.0423, "step": 12290 }, { "epoch": 9.010989010989011, "grad_norm": 0.52734375, "learning_rate": 8.880352056142187e-05, "loss": 0.037, "step": 12300 }, { "epoch": 9.018315018315018, "grad_norm": 0.75, "learning_rate": 8.878180814196047e-05, "loss": 0.0397, "step": 12310 }, { "epoch": 9.025641025641026, "grad_norm": 0.474609375, "learning_rate": 8.876007738042884e-05, "loss": 0.0378, "step": 12320 }, { "epoch": 9.032967032967033, "grad_norm": 0.58203125, "learning_rate": 8.873832828723885e-05, "loss": 0.0377, "step": 12330 }, { "epoch": 9.04029304029304, "grad_norm": 0.62109375, "learning_rate": 8.871656087281123e-05, "loss": 0.0382, "step": 12340 }, { "epoch": 9.047619047619047, "grad_norm": 0.4765625, "learning_rate": 8.869477514757543e-05, "loss": 0.039, "step": 12350 }, { "epoch": 9.054945054945055, "grad_norm": 0.54296875, "learning_rate": 8.867297112196974e-05, "loss": 0.0383, "step": 12360 }, { "epoch": 9.062271062271062, "grad_norm": 0.37109375, "learning_rate": 8.865114880644117e-05, "loss": 0.034, "step": 12370 }, { "epoch": 9.06959706959707, "grad_norm": 0.76953125, "learning_rate": 8.862930821144551e-05, "loss": 0.0459, "step": 12380 }, { "epoch": 9.076923076923077, "grad_norm": 0.84765625, "learning_rate": 8.86074493474473e-05, "loss": 0.0385, "step": 12390 }, { "epoch": 9.084249084249084, "grad_norm": 0.78125, "learning_rate": 8.858557222491984e-05, "loss": 0.0442, "step": 12400 }, { "epoch": 9.091575091575091, "grad_norm": 0.8359375, "learning_rate": 8.85636768543452e-05, "loss": 0.0427, "step": 12410 }, { "epoch": 9.098901098901099, "grad_norm": 0.435546875, "learning_rate": 8.854176324621413e-05, "loss": 0.0406, "step": 12420 }, { "epoch": 9.106227106227106, "grad_norm": 0.8828125, "learning_rate": 8.85198314110262e-05, "loss": 0.043, "step": 12430 }, { "epoch": 9.113553113553113, "grad_norm": 0.6328125, "learning_rate": 8.849788135928964e-05, "loss": 0.0381, "step": 12440 }, { "epoch": 9.12087912087912, "grad_norm": 0.5625, "learning_rate": 8.847591310152146e-05, "loss": 0.0351, "step": 12450 }, { "epoch": 9.128205128205128, "grad_norm": 0.91015625, "learning_rate": 8.845392664824737e-05, "loss": 0.0452, "step": 12460 }, { "epoch": 9.135531135531135, "grad_norm": 0.44140625, "learning_rate": 8.843192201000176e-05, "loss": 0.0393, "step": 12470 }, { "epoch": 9.142857142857142, "grad_norm": 0.671875, "learning_rate": 8.840989919732784e-05, "loss": 0.0426, "step": 12480 }, { "epoch": 9.15018315018315, "grad_norm": 0.6953125, "learning_rate": 8.838785822077742e-05, "loss": 0.0416, "step": 12490 }, { "epoch": 9.157509157509157, "grad_norm": 0.77734375, "learning_rate": 8.836579909091109e-05, "loss": 0.0411, "step": 12500 }, { "epoch": 9.164835164835164, "grad_norm": 0.58203125, "learning_rate": 8.834372181829806e-05, "loss": 0.0364, "step": 12510 }, { "epoch": 9.172161172161172, "grad_norm": 0.6953125, "learning_rate": 8.832162641351628e-05, "loss": 0.0413, "step": 12520 }, { "epoch": 9.179487179487179, "grad_norm": 0.470703125, "learning_rate": 8.829951288715243e-05, "loss": 0.0392, "step": 12530 }, { "epoch": 9.186813186813186, "grad_norm": 0.671875, "learning_rate": 8.827738124980178e-05, "loss": 0.0379, "step": 12540 }, { "epoch": 9.194139194139193, "grad_norm": 0.53125, "learning_rate": 8.825523151206835e-05, "loss": 0.0399, "step": 12550 }, { "epoch": 9.2014652014652, "grad_norm": 0.490234375, "learning_rate": 8.823306368456479e-05, "loss": 0.0354, "step": 12560 }, { "epoch": 9.208791208791208, "grad_norm": 0.875, "learning_rate": 8.821087777791242e-05, "loss": 0.042, "step": 12570 }, { "epoch": 9.216117216117215, "grad_norm": 0.39453125, "learning_rate": 8.818867380274127e-05, "loss": 0.0382, "step": 12580 }, { "epoch": 9.223443223443223, "grad_norm": 0.373046875, "learning_rate": 8.816645176968997e-05, "loss": 0.0362, "step": 12590 }, { "epoch": 9.23076923076923, "grad_norm": 0.62109375, "learning_rate": 8.814421168940585e-05, "loss": 0.04, "step": 12600 }, { "epoch": 9.238095238095237, "grad_norm": 0.4609375, "learning_rate": 8.812195357254481e-05, "loss": 0.0371, "step": 12610 }, { "epoch": 9.245421245421245, "grad_norm": 0.51953125, "learning_rate": 8.809967742977151e-05, "loss": 0.0389, "step": 12620 }, { "epoch": 9.252747252747252, "grad_norm": 0.9453125, "learning_rate": 8.807738327175913e-05, "loss": 0.0428, "step": 12630 }, { "epoch": 9.260073260073261, "grad_norm": 0.4921875, "learning_rate": 8.805507110918955e-05, "loss": 0.0389, "step": 12640 }, { "epoch": 9.267399267399268, "grad_norm": 0.72265625, "learning_rate": 8.803274095275328e-05, "loss": 0.0386, "step": 12650 }, { "epoch": 9.274725274725276, "grad_norm": 0.90234375, "learning_rate": 8.80103928131494e-05, "loss": 0.0427, "step": 12660 }, { "epoch": 9.282051282051283, "grad_norm": 0.8671875, "learning_rate": 8.798802670108565e-05, "loss": 0.0412, "step": 12670 }, { "epoch": 9.28937728937729, "grad_norm": 0.47265625, "learning_rate": 8.796564262727835e-05, "loss": 0.0362, "step": 12680 }, { "epoch": 9.296703296703297, "grad_norm": 0.7734375, "learning_rate": 8.794324060245248e-05, "loss": 0.0462, "step": 12690 }, { "epoch": 9.304029304029305, "grad_norm": 0.94140625, "learning_rate": 8.792082063734157e-05, "loss": 0.0468, "step": 12700 }, { "epoch": 9.311355311355312, "grad_norm": 0.52734375, "learning_rate": 8.789838274268774e-05, "loss": 0.0359, "step": 12710 }, { "epoch": 9.31868131868132, "grad_norm": 0.68359375, "learning_rate": 8.787592692924173e-05, "loss": 0.0423, "step": 12720 }, { "epoch": 9.326007326007327, "grad_norm": 0.66796875, "learning_rate": 8.785345320776289e-05, "loss": 0.0407, "step": 12730 }, { "epoch": 9.333333333333334, "grad_norm": 0.65234375, "learning_rate": 8.783096158901908e-05, "loss": 0.0438, "step": 12740 }, { "epoch": 9.340659340659341, "grad_norm": 0.83984375, "learning_rate": 8.780845208378678e-05, "loss": 0.0384, "step": 12750 }, { "epoch": 9.347985347985349, "grad_norm": 0.58984375, "learning_rate": 8.778592470285105e-05, "loss": 0.042, "step": 12760 }, { "epoch": 9.355311355311356, "grad_norm": 0.412109375, "learning_rate": 8.776337945700547e-05, "loss": 0.0389, "step": 12770 }, { "epoch": 9.362637362637363, "grad_norm": 0.49609375, "learning_rate": 8.774081635705223e-05, "loss": 0.0452, "step": 12780 }, { "epoch": 9.36996336996337, "grad_norm": 0.84375, "learning_rate": 8.771823541380202e-05, "loss": 0.0439, "step": 12790 }, { "epoch": 9.377289377289378, "grad_norm": 0.59765625, "learning_rate": 8.769563663807415e-05, "loss": 0.0421, "step": 12800 }, { "epoch": 9.384615384615385, "grad_norm": 0.71875, "learning_rate": 8.767302004069642e-05, "loss": 0.0458, "step": 12810 }, { "epoch": 9.391941391941392, "grad_norm": 0.6328125, "learning_rate": 8.765038563250519e-05, "loss": 0.0424, "step": 12820 }, { "epoch": 9.3992673992674, "grad_norm": 0.7265625, "learning_rate": 8.762773342434534e-05, "loss": 0.0421, "step": 12830 }, { "epoch": 9.406593406593407, "grad_norm": 0.75, "learning_rate": 8.760506342707028e-05, "loss": 0.0388, "step": 12840 }, { "epoch": 9.413919413919414, "grad_norm": 0.46875, "learning_rate": 8.758237565154195e-05, "loss": 0.0371, "step": 12850 }, { "epoch": 9.421245421245422, "grad_norm": 0.53515625, "learning_rate": 8.755967010863081e-05, "loss": 0.0382, "step": 12860 }, { "epoch": 9.428571428571429, "grad_norm": 0.7109375, "learning_rate": 8.753694680921585e-05, "loss": 0.0422, "step": 12870 }, { "epoch": 9.435897435897436, "grad_norm": 0.70703125, "learning_rate": 8.751420576418452e-05, "loss": 0.0418, "step": 12880 }, { "epoch": 9.443223443223443, "grad_norm": 0.53125, "learning_rate": 8.749144698443282e-05, "loss": 0.0373, "step": 12890 }, { "epoch": 9.45054945054945, "grad_norm": 0.49609375, "learning_rate": 8.74686704808652e-05, "loss": 0.0374, "step": 12900 }, { "epoch": 9.457875457875458, "grad_norm": 0.609375, "learning_rate": 8.744587626439468e-05, "loss": 0.0378, "step": 12910 }, { "epoch": 9.465201465201465, "grad_norm": 0.6328125, "learning_rate": 8.742306434594267e-05, "loss": 0.0434, "step": 12920 }, { "epoch": 9.472527472527473, "grad_norm": 0.5859375, "learning_rate": 8.740023473643912e-05, "loss": 0.0399, "step": 12930 }, { "epoch": 9.47985347985348, "grad_norm": 0.484375, "learning_rate": 8.737738744682247e-05, "loss": 0.0352, "step": 12940 }, { "epoch": 9.487179487179487, "grad_norm": 0.470703125, "learning_rate": 8.735452248803957e-05, "loss": 0.0368, "step": 12950 }, { "epoch": 9.494505494505495, "grad_norm": 0.76953125, "learning_rate": 8.73316398710458e-05, "loss": 0.0392, "step": 12960 }, { "epoch": 9.501831501831502, "grad_norm": 0.58984375, "learning_rate": 8.730873960680494e-05, "loss": 0.043, "step": 12970 }, { "epoch": 9.50915750915751, "grad_norm": 0.54296875, "learning_rate": 8.72858217062893e-05, "loss": 0.0404, "step": 12980 }, { "epoch": 9.516483516483516, "grad_norm": 0.86328125, "learning_rate": 8.72628861804796e-05, "loss": 0.0418, "step": 12990 }, { "epoch": 9.523809523809524, "grad_norm": 0.69140625, "learning_rate": 8.723993304036495e-05, "loss": 0.0402, "step": 13000 }, { "epoch": 9.531135531135531, "grad_norm": 0.47265625, "learning_rate": 8.721696229694299e-05, "loss": 0.0436, "step": 13010 }, { "epoch": 9.538461538461538, "grad_norm": 0.578125, "learning_rate": 8.719397396121976e-05, "loss": 0.0405, "step": 13020 }, { "epoch": 9.545787545787546, "grad_norm": 0.578125, "learning_rate": 8.71709680442097e-05, "loss": 0.04, "step": 13030 }, { "epoch": 9.553113553113553, "grad_norm": 0.52734375, "learning_rate": 8.714794455693572e-05, "loss": 0.0372, "step": 13040 }, { "epoch": 9.56043956043956, "grad_norm": 0.5078125, "learning_rate": 8.712490351042913e-05, "loss": 0.0399, "step": 13050 }, { "epoch": 9.567765567765568, "grad_norm": 0.60546875, "learning_rate": 8.710184491572963e-05, "loss": 0.0401, "step": 13060 }, { "epoch": 9.575091575091575, "grad_norm": 0.3828125, "learning_rate": 8.707876878388537e-05, "loss": 0.0324, "step": 13070 }, { "epoch": 9.582417582417582, "grad_norm": 0.703125, "learning_rate": 8.705567512595289e-05, "loss": 0.0388, "step": 13080 }, { "epoch": 9.58974358974359, "grad_norm": 0.5859375, "learning_rate": 8.703256395299708e-05, "loss": 0.0413, "step": 13090 }, { "epoch": 9.597069597069597, "grad_norm": 0.64453125, "learning_rate": 8.70094352760913e-05, "loss": 0.0386, "step": 13100 }, { "epoch": 9.604395604395604, "grad_norm": 0.5, "learning_rate": 8.698628910631725e-05, "loss": 0.0392, "step": 13110 }, { "epoch": 9.611721611721611, "grad_norm": 0.53125, "learning_rate": 8.696312545476503e-05, "loss": 0.0412, "step": 13120 }, { "epoch": 9.619047619047619, "grad_norm": 0.490234375, "learning_rate": 8.693994433253304e-05, "loss": 0.0425, "step": 13130 }, { "epoch": 9.626373626373626, "grad_norm": 0.6875, "learning_rate": 8.69167457507282e-05, "loss": 0.0366, "step": 13140 }, { "epoch": 9.633699633699633, "grad_norm": 0.73046875, "learning_rate": 8.689352972046568e-05, "loss": 0.0366, "step": 13150 }, { "epoch": 9.64102564102564, "grad_norm": 0.478515625, "learning_rate": 8.687029625286904e-05, "loss": 0.0354, "step": 13160 }, { "epoch": 9.648351648351648, "grad_norm": 0.6015625, "learning_rate": 8.684704535907017e-05, "loss": 0.0419, "step": 13170 }, { "epoch": 9.655677655677655, "grad_norm": 0.4296875, "learning_rate": 8.682377705020938e-05, "loss": 0.0385, "step": 13180 }, { "epoch": 9.663003663003662, "grad_norm": 0.4921875, "learning_rate": 8.680049133743525e-05, "loss": 0.0363, "step": 13190 }, { "epoch": 9.67032967032967, "grad_norm": 0.6640625, "learning_rate": 8.677718823190477e-05, "loss": 0.0458, "step": 13200 }, { "epoch": 9.677655677655677, "grad_norm": 0.65625, "learning_rate": 8.675386774478316e-05, "loss": 0.0419, "step": 13210 }, { "epoch": 9.684981684981684, "grad_norm": 0.66796875, "learning_rate": 8.673052988724406e-05, "loss": 0.0403, "step": 13220 }, { "epoch": 9.692307692307692, "grad_norm": 0.671875, "learning_rate": 8.670717467046942e-05, "loss": 0.0435, "step": 13230 }, { "epoch": 9.699633699633699, "grad_norm": 0.427734375, "learning_rate": 8.668380210564949e-05, "loss": 0.0453, "step": 13240 }, { "epoch": 9.706959706959706, "grad_norm": 0.5234375, "learning_rate": 8.666041220398282e-05, "loss": 0.0434, "step": 13250 }, { "epoch": 9.714285714285714, "grad_norm": 0.72265625, "learning_rate": 8.663700497667625e-05, "loss": 0.0466, "step": 13260 }, { "epoch": 9.72161172161172, "grad_norm": 0.96484375, "learning_rate": 8.661358043494501e-05, "loss": 0.0446, "step": 13270 }, { "epoch": 9.728937728937728, "grad_norm": 0.50390625, "learning_rate": 8.659013859001252e-05, "loss": 0.0423, "step": 13280 }, { "epoch": 9.736263736263737, "grad_norm": 0.6015625, "learning_rate": 8.656667945311055e-05, "loss": 0.0416, "step": 13290 }, { "epoch": 9.743589743589745, "grad_norm": 0.61328125, "learning_rate": 8.654320303547916e-05, "loss": 0.0455, "step": 13300 }, { "epoch": 9.750915750915752, "grad_norm": 0.6328125, "learning_rate": 8.651970934836666e-05, "loss": 0.0351, "step": 13310 }, { "epoch": 9.758241758241759, "grad_norm": 0.58203125, "learning_rate": 8.649619840302963e-05, "loss": 0.0399, "step": 13320 }, { "epoch": 9.765567765567766, "grad_norm": 0.57421875, "learning_rate": 8.647267021073295e-05, "loss": 0.0464, "step": 13330 }, { "epoch": 9.772893772893774, "grad_norm": 0.53515625, "learning_rate": 8.644912478274975e-05, "loss": 0.039, "step": 13340 }, { "epoch": 9.780219780219781, "grad_norm": 0.5234375, "learning_rate": 8.642556213036142e-05, "loss": 0.0346, "step": 13350 }, { "epoch": 9.787545787545788, "grad_norm": 0.8203125, "learning_rate": 8.640198226485759e-05, "loss": 0.0373, "step": 13360 }, { "epoch": 9.794871794871796, "grad_norm": 0.8984375, "learning_rate": 8.637838519753615e-05, "loss": 0.0386, "step": 13370 }, { "epoch": 9.802197802197803, "grad_norm": 0.640625, "learning_rate": 8.635477093970325e-05, "loss": 0.0391, "step": 13380 }, { "epoch": 9.80952380952381, "grad_norm": 0.45703125, "learning_rate": 8.633113950267319e-05, "loss": 0.0397, "step": 13390 }, { "epoch": 9.816849816849818, "grad_norm": 0.578125, "learning_rate": 8.630749089776865e-05, "loss": 0.0364, "step": 13400 }, { "epoch": 9.824175824175825, "grad_norm": 0.7265625, "learning_rate": 8.628382513632042e-05, "loss": 0.039, "step": 13410 }, { "epoch": 9.831501831501832, "grad_norm": 0.796875, "learning_rate": 8.626014222966753e-05, "loss": 0.0393, "step": 13420 }, { "epoch": 9.83882783882784, "grad_norm": 0.6015625, "learning_rate": 8.623644218915726e-05, "loss": 0.0417, "step": 13430 }, { "epoch": 9.846153846153847, "grad_norm": 0.58984375, "learning_rate": 8.621272502614503e-05, "loss": 0.0469, "step": 13440 }, { "epoch": 9.853479853479854, "grad_norm": 0.53125, "learning_rate": 8.618899075199457e-05, "loss": 0.0407, "step": 13450 }, { "epoch": 9.860805860805861, "grad_norm": 0.5234375, "learning_rate": 8.616523937807772e-05, "loss": 0.0405, "step": 13460 }, { "epoch": 9.868131868131869, "grad_norm": 0.486328125, "learning_rate": 8.614147091577453e-05, "loss": 0.0452, "step": 13470 }, { "epoch": 9.875457875457876, "grad_norm": 0.486328125, "learning_rate": 8.611768537647328e-05, "loss": 0.0346, "step": 13480 }, { "epoch": 9.882783882783883, "grad_norm": 0.578125, "learning_rate": 8.609388277157039e-05, "loss": 0.0535, "step": 13490 }, { "epoch": 9.89010989010989, "grad_norm": 0.6640625, "learning_rate": 8.607006311247047e-05, "loss": 0.0434, "step": 13500 }, { "epoch": 9.897435897435898, "grad_norm": 0.75, "learning_rate": 8.604622641058628e-05, "loss": 0.0476, "step": 13510 }, { "epoch": 9.904761904761905, "grad_norm": 0.6015625, "learning_rate": 8.60223726773388e-05, "loss": 0.0364, "step": 13520 }, { "epoch": 9.912087912087912, "grad_norm": 0.52734375, "learning_rate": 8.599850192415713e-05, "loss": 0.0361, "step": 13530 }, { "epoch": 9.91941391941392, "grad_norm": 0.93359375, "learning_rate": 8.597461416247851e-05, "loss": 0.0497, "step": 13540 }, { "epoch": 9.926739926739927, "grad_norm": 0.6875, "learning_rate": 8.595070940374835e-05, "loss": 0.0431, "step": 13550 }, { "epoch": 9.934065934065934, "grad_norm": 0.703125, "learning_rate": 8.592678765942022e-05, "loss": 0.0424, "step": 13560 }, { "epoch": 9.941391941391942, "grad_norm": 0.498046875, "learning_rate": 8.590284894095581e-05, "loss": 0.0338, "step": 13570 }, { "epoch": 9.948717948717949, "grad_norm": 0.52734375, "learning_rate": 8.587889325982495e-05, "loss": 0.0435, "step": 13580 }, { "epoch": 9.956043956043956, "grad_norm": 0.49609375, "learning_rate": 8.585492062750558e-05, "loss": 0.0423, "step": 13590 }, { "epoch": 9.963369963369964, "grad_norm": 0.5625, "learning_rate": 8.583093105548378e-05, "loss": 0.0411, "step": 13600 }, { "epoch": 9.97069597069597, "grad_norm": 0.58203125, "learning_rate": 8.580692455525375e-05, "loss": 0.0417, "step": 13610 }, { "epoch": 9.978021978021978, "grad_norm": 0.609375, "learning_rate": 8.578290113831779e-05, "loss": 0.0405, "step": 13620 }, { "epoch": 9.985347985347985, "grad_norm": 0.69140625, "learning_rate": 8.575886081618628e-05, "loss": 0.0433, "step": 13630 }, { "epoch": 9.992673992673993, "grad_norm": 0.40234375, "learning_rate": 8.573480360037776e-05, "loss": 0.0427, "step": 13640 }, { "epoch": 10.0, "grad_norm": 0.578125, "learning_rate": 8.57107295024188e-05, "loss": 0.0358, "step": 13650 }, { "epoch": 10.007326007326007, "grad_norm": 0.56640625, "learning_rate": 8.568663853384415e-05, "loss": 0.0411, "step": 13660 }, { "epoch": 10.014652014652015, "grad_norm": 0.515625, "learning_rate": 8.56625307061965e-05, "loss": 0.0332, "step": 13670 }, { "epoch": 10.021978021978022, "grad_norm": 0.85546875, "learning_rate": 8.563840603102678e-05, "loss": 0.0469, "step": 13680 }, { "epoch": 10.02930402930403, "grad_norm": 0.79296875, "learning_rate": 8.561426451989385e-05, "loss": 0.037, "step": 13690 }, { "epoch": 10.036630036630036, "grad_norm": 0.70703125, "learning_rate": 8.559010618436474e-05, "loss": 0.039, "step": 13700 }, { "epoch": 10.043956043956044, "grad_norm": 0.65234375, "learning_rate": 8.556593103601449e-05, "loss": 0.0408, "step": 13710 }, { "epoch": 10.051282051282051, "grad_norm": 0.75390625, "learning_rate": 8.554173908642622e-05, "loss": 0.035, "step": 13720 }, { "epoch": 10.058608058608058, "grad_norm": 0.9140625, "learning_rate": 8.551753034719104e-05, "loss": 0.04, "step": 13730 }, { "epoch": 10.065934065934066, "grad_norm": 0.62890625, "learning_rate": 8.54933048299082e-05, "loss": 0.0366, "step": 13740 }, { "epoch": 10.073260073260073, "grad_norm": 0.58984375, "learning_rate": 8.546906254618492e-05, "loss": 0.0413, "step": 13750 }, { "epoch": 10.08058608058608, "grad_norm": 0.640625, "learning_rate": 8.544480350763649e-05, "loss": 0.0386, "step": 13760 }, { "epoch": 10.087912087912088, "grad_norm": 0.6171875, "learning_rate": 8.542052772588618e-05, "loss": 0.036, "step": 13770 }, { "epoch": 10.095238095238095, "grad_norm": 0.50390625, "learning_rate": 8.539623521256535e-05, "loss": 0.0374, "step": 13780 }, { "epoch": 10.102564102564102, "grad_norm": 0.62890625, "learning_rate": 8.53719259793133e-05, "loss": 0.0384, "step": 13790 }, { "epoch": 10.10989010989011, "grad_norm": 0.85546875, "learning_rate": 8.534760003777743e-05, "loss": 0.0419, "step": 13800 }, { "epoch": 10.117216117216117, "grad_norm": 0.65625, "learning_rate": 8.532325739961304e-05, "loss": 0.0437, "step": 13810 }, { "epoch": 10.124542124542124, "grad_norm": 0.5625, "learning_rate": 8.529889807648355e-05, "loss": 0.0371, "step": 13820 }, { "epoch": 10.131868131868131, "grad_norm": 0.97265625, "learning_rate": 8.527452208006025e-05, "loss": 0.0399, "step": 13830 }, { "epoch": 10.139194139194139, "grad_norm": 0.63671875, "learning_rate": 8.525012942202252e-05, "loss": 0.0377, "step": 13840 }, { "epoch": 10.146520146520146, "grad_norm": 0.40625, "learning_rate": 8.522572011405766e-05, "loss": 0.0384, "step": 13850 }, { "epoch": 10.153846153846153, "grad_norm": 0.80859375, "learning_rate": 8.5201294167861e-05, "loss": 0.0445, "step": 13860 }, { "epoch": 10.16117216117216, "grad_norm": 0.5390625, "learning_rate": 8.517685159513577e-05, "loss": 0.034, "step": 13870 }, { "epoch": 10.168498168498168, "grad_norm": 0.4609375, "learning_rate": 8.515239240759325e-05, "loss": 0.0383, "step": 13880 }, { "epoch": 10.175824175824175, "grad_norm": 0.5625, "learning_rate": 8.512791661695261e-05, "loss": 0.0399, "step": 13890 }, { "epoch": 10.183150183150182, "grad_norm": 0.71484375, "learning_rate": 8.510342423494101e-05, "loss": 0.0438, "step": 13900 }, { "epoch": 10.19047619047619, "grad_norm": 0.55859375, "learning_rate": 8.507891527329356e-05, "loss": 0.0363, "step": 13910 }, { "epoch": 10.197802197802197, "grad_norm": 0.6484375, "learning_rate": 8.505438974375333e-05, "loss": 0.0415, "step": 13920 }, { "epoch": 10.205128205128204, "grad_norm": 0.52734375, "learning_rate": 8.502984765807125e-05, "loss": 0.0364, "step": 13930 }, { "epoch": 10.212454212454212, "grad_norm": 0.50390625, "learning_rate": 8.500528902800628e-05, "loss": 0.0383, "step": 13940 }, { "epoch": 10.219780219780219, "grad_norm": 0.66015625, "learning_rate": 8.498071386532525e-05, "loss": 0.0432, "step": 13950 }, { "epoch": 10.227106227106226, "grad_norm": 0.5390625, "learning_rate": 8.495612218180296e-05, "loss": 0.0383, "step": 13960 }, { "epoch": 10.234432234432234, "grad_norm": 0.5546875, "learning_rate": 8.493151398922204e-05, "loss": 0.0366, "step": 13970 }, { "epoch": 10.241758241758241, "grad_norm": 0.4921875, "learning_rate": 8.49068892993731e-05, "loss": 0.0394, "step": 13980 }, { "epoch": 10.249084249084248, "grad_norm": 0.58984375, "learning_rate": 8.488224812405466e-05, "loss": 0.0391, "step": 13990 }, { "epoch": 10.256410256410255, "grad_norm": 0.59375, "learning_rate": 8.48575904750731e-05, "loss": 0.038, "step": 14000 }, { "epoch": 10.263736263736265, "grad_norm": 0.79296875, "learning_rate": 8.48329163642427e-05, "loss": 0.0407, "step": 14010 }, { "epoch": 10.271062271062272, "grad_norm": 0.71875, "learning_rate": 8.480822580338564e-05, "loss": 0.0432, "step": 14020 }, { "epoch": 10.27838827838828, "grad_norm": 0.58203125, "learning_rate": 8.478351880433199e-05, "loss": 0.0367, "step": 14030 }, { "epoch": 10.285714285714286, "grad_norm": 0.82421875, "learning_rate": 8.475879537891969e-05, "loss": 0.0394, "step": 14040 }, { "epoch": 10.293040293040294, "grad_norm": 0.478515625, "learning_rate": 8.473405553899451e-05, "loss": 0.0371, "step": 14050 }, { "epoch": 10.300366300366301, "grad_norm": 0.7109375, "learning_rate": 8.470929929641014e-05, "loss": 0.0361, "step": 14060 }, { "epoch": 10.307692307692308, "grad_norm": 0.73046875, "learning_rate": 8.468452666302813e-05, "loss": 0.0402, "step": 14070 }, { "epoch": 10.315018315018316, "grad_norm": 0.546875, "learning_rate": 8.465973765071781e-05, "loss": 0.0353, "step": 14080 }, { "epoch": 10.322344322344323, "grad_norm": 0.6171875, "learning_rate": 8.463493227135646e-05, "loss": 0.0367, "step": 14090 }, { "epoch": 10.32967032967033, "grad_norm": 1.0546875, "learning_rate": 8.461011053682911e-05, "loss": 0.0397, "step": 14100 }, { "epoch": 10.336996336996338, "grad_norm": 0.63671875, "learning_rate": 8.458527245902869e-05, "loss": 0.034, "step": 14110 }, { "epoch": 10.344322344322345, "grad_norm": 0.408203125, "learning_rate": 8.456041804985594e-05, "loss": 0.037, "step": 14120 }, { "epoch": 10.351648351648352, "grad_norm": 0.93359375, "learning_rate": 8.453554732121942e-05, "loss": 0.0417, "step": 14130 }, { "epoch": 10.35897435897436, "grad_norm": 0.57421875, "learning_rate": 8.45106602850355e-05, "loss": 0.0427, "step": 14140 }, { "epoch": 10.366300366300367, "grad_norm": 0.91015625, "learning_rate": 8.448575695322838e-05, "loss": 0.0382, "step": 14150 }, { "epoch": 10.373626373626374, "grad_norm": 0.6875, "learning_rate": 8.446083733773009e-05, "loss": 0.0369, "step": 14160 }, { "epoch": 10.380952380952381, "grad_norm": 0.42578125, "learning_rate": 8.44359014504804e-05, "loss": 0.0443, "step": 14170 }, { "epoch": 10.388278388278389, "grad_norm": 0.5234375, "learning_rate": 8.441094930342692e-05, "loss": 0.038, "step": 14180 }, { "epoch": 10.395604395604396, "grad_norm": 0.451171875, "learning_rate": 8.438598090852506e-05, "loss": 0.0355, "step": 14190 }, { "epoch": 10.402930402930403, "grad_norm": 0.61328125, "learning_rate": 8.436099627773799e-05, "loss": 0.037, "step": 14200 }, { "epoch": 10.41025641025641, "grad_norm": 0.62109375, "learning_rate": 8.433599542303664e-05, "loss": 0.0406, "step": 14210 }, { "epoch": 10.417582417582418, "grad_norm": 0.57421875, "learning_rate": 8.431097835639976e-05, "loss": 0.0418, "step": 14220 }, { "epoch": 10.424908424908425, "grad_norm": 0.54296875, "learning_rate": 8.428594508981386e-05, "loss": 0.0399, "step": 14230 }, { "epoch": 10.432234432234432, "grad_norm": 0.64453125, "learning_rate": 8.426089563527319e-05, "loss": 0.0366, "step": 14240 }, { "epoch": 10.43956043956044, "grad_norm": 0.58984375, "learning_rate": 8.423583000477975e-05, "loss": 0.0395, "step": 14250 }, { "epoch": 10.446886446886447, "grad_norm": 0.609375, "learning_rate": 8.421074821034333e-05, "loss": 0.0417, "step": 14260 }, { "epoch": 10.454212454212454, "grad_norm": 0.51171875, "learning_rate": 8.418565026398142e-05, "loss": 0.0351, "step": 14270 }, { "epoch": 10.461538461538462, "grad_norm": 0.515625, "learning_rate": 8.416053617771928e-05, "loss": 0.0384, "step": 14280 }, { "epoch": 10.468864468864469, "grad_norm": 0.42578125, "learning_rate": 8.41354059635899e-05, "loss": 0.0358, "step": 14290 }, { "epoch": 10.476190476190476, "grad_norm": 0.42578125, "learning_rate": 8.411025963363397e-05, "loss": 0.039, "step": 14300 }, { "epoch": 10.483516483516484, "grad_norm": 0.87109375, "learning_rate": 8.408509719989995e-05, "loss": 0.0375, "step": 14310 }, { "epoch": 10.49084249084249, "grad_norm": 0.80078125, "learning_rate": 8.405991867444397e-05, "loss": 0.0448, "step": 14320 }, { "epoch": 10.498168498168498, "grad_norm": 0.69140625, "learning_rate": 8.403472406932987e-05, "loss": 0.0448, "step": 14330 }, { "epoch": 10.505494505494505, "grad_norm": 0.6015625, "learning_rate": 8.400951339662923e-05, "loss": 0.041, "step": 14340 }, { "epoch": 10.512820512820513, "grad_norm": 0.5546875, "learning_rate": 8.398428666842133e-05, "loss": 0.0373, "step": 14350 }, { "epoch": 10.52014652014652, "grad_norm": 0.62109375, "learning_rate": 8.39590438967931e-05, "loss": 0.0442, "step": 14360 }, { "epoch": 10.527472527472527, "grad_norm": 0.51953125, "learning_rate": 8.393378509383918e-05, "loss": 0.0414, "step": 14370 }, { "epoch": 10.534798534798535, "grad_norm": 0.5546875, "learning_rate": 8.39085102716619e-05, "loss": 0.0404, "step": 14380 }, { "epoch": 10.542124542124542, "grad_norm": 0.578125, "learning_rate": 8.388321944237124e-05, "loss": 0.0398, "step": 14390 }, { "epoch": 10.54945054945055, "grad_norm": 0.53125, "learning_rate": 8.38579126180849e-05, "loss": 0.0392, "step": 14400 }, { "epoch": 10.556776556776557, "grad_norm": 0.6015625, "learning_rate": 8.383258981092817e-05, "loss": 0.0425, "step": 14410 }, { "epoch": 10.564102564102564, "grad_norm": 0.65625, "learning_rate": 8.380725103303405e-05, "loss": 0.0377, "step": 14420 }, { "epoch": 10.571428571428571, "grad_norm": 0.84375, "learning_rate": 8.37818962965432e-05, "loss": 0.0354, "step": 14430 }, { "epoch": 10.578754578754578, "grad_norm": 0.73046875, "learning_rate": 8.37565256136039e-05, "loss": 0.0374, "step": 14440 }, { "epoch": 10.586080586080586, "grad_norm": 0.8671875, "learning_rate": 8.373113899637204e-05, "loss": 0.0452, "step": 14450 }, { "epoch": 10.593406593406593, "grad_norm": 0.609375, "learning_rate": 8.370573645701123e-05, "loss": 0.036, "step": 14460 }, { "epoch": 10.6007326007326, "grad_norm": 0.61328125, "learning_rate": 8.368031800769263e-05, "loss": 0.0436, "step": 14470 }, { "epoch": 10.608058608058608, "grad_norm": 0.65234375, "learning_rate": 8.365488366059505e-05, "loss": 0.0414, "step": 14480 }, { "epoch": 10.615384615384615, "grad_norm": 0.828125, "learning_rate": 8.362943342790494e-05, "loss": 0.0405, "step": 14490 }, { "epoch": 10.622710622710622, "grad_norm": 0.404296875, "learning_rate": 8.360396732181636e-05, "loss": 0.034, "step": 14500 }, { "epoch": 10.63003663003663, "grad_norm": 0.45703125, "learning_rate": 8.357848535453092e-05, "loss": 0.0395, "step": 14510 }, { "epoch": 10.637362637362637, "grad_norm": 0.65625, "learning_rate": 8.355298753825786e-05, "loss": 0.036, "step": 14520 }, { "epoch": 10.644688644688644, "grad_norm": 0.91015625, "learning_rate": 8.352747388521406e-05, "loss": 0.0411, "step": 14530 }, { "epoch": 10.652014652014651, "grad_norm": 0.75390625, "learning_rate": 8.35019444076239e-05, "loss": 0.0427, "step": 14540 }, { "epoch": 10.659340659340659, "grad_norm": 0.57421875, "learning_rate": 8.347639911771944e-05, "loss": 0.0398, "step": 14550 }, { "epoch": 10.666666666666666, "grad_norm": 0.39453125, "learning_rate": 8.345083802774024e-05, "loss": 0.0357, "step": 14560 }, { "epoch": 10.673992673992673, "grad_norm": 0.69921875, "learning_rate": 8.342526114993345e-05, "loss": 0.0414, "step": 14570 }, { "epoch": 10.68131868131868, "grad_norm": 0.5546875, "learning_rate": 8.33996684965538e-05, "loss": 0.0416, "step": 14580 }, { "epoch": 10.688644688644688, "grad_norm": 0.8671875, "learning_rate": 8.337406007986357e-05, "loss": 0.0382, "step": 14590 }, { "epoch": 10.695970695970695, "grad_norm": 0.546875, "learning_rate": 8.33484359121326e-05, "loss": 0.0378, "step": 14600 }, { "epoch": 10.703296703296703, "grad_norm": 0.6875, "learning_rate": 8.332279600563826e-05, "loss": 0.0413, "step": 14610 }, { "epoch": 10.71062271062271, "grad_norm": 0.83203125, "learning_rate": 8.329714037266548e-05, "loss": 0.0371, "step": 14620 }, { "epoch": 10.717948717948717, "grad_norm": 0.7109375, "learning_rate": 8.327146902550672e-05, "loss": 0.0452, "step": 14630 }, { "epoch": 10.725274725274724, "grad_norm": 0.66796875, "learning_rate": 8.324578197646193e-05, "loss": 0.0389, "step": 14640 }, { "epoch": 10.732600732600732, "grad_norm": 0.57421875, "learning_rate": 8.322007923783863e-05, "loss": 0.0393, "step": 14650 }, { "epoch": 10.73992673992674, "grad_norm": 0.54296875, "learning_rate": 8.319436082195188e-05, "loss": 0.0369, "step": 14660 }, { "epoch": 10.747252747252748, "grad_norm": 0.416015625, "learning_rate": 8.31686267411242e-05, "loss": 0.0343, "step": 14670 }, { "epoch": 10.754578754578755, "grad_norm": 0.54296875, "learning_rate": 8.31428770076856e-05, "loss": 0.0376, "step": 14680 }, { "epoch": 10.761904761904763, "grad_norm": 0.73046875, "learning_rate": 8.311711163397368e-05, "loss": 0.0344, "step": 14690 }, { "epoch": 10.76923076923077, "grad_norm": 0.64453125, "learning_rate": 8.309133063233344e-05, "loss": 0.0404, "step": 14700 }, { "epoch": 10.776556776556777, "grad_norm": 0.6328125, "learning_rate": 8.306553401511737e-05, "loss": 0.0548, "step": 14710 }, { "epoch": 10.783882783882785, "grad_norm": 0.6796875, "learning_rate": 8.303972179468556e-05, "loss": 0.0422, "step": 14720 }, { "epoch": 10.791208791208792, "grad_norm": 0.66015625, "learning_rate": 8.301389398340541e-05, "loss": 0.0412, "step": 14730 }, { "epoch": 10.7985347985348, "grad_norm": 1.265625, "learning_rate": 8.298805059365192e-05, "loss": 0.0493, "step": 14740 }, { "epoch": 10.805860805860807, "grad_norm": 0.87109375, "learning_rate": 8.296219163780751e-05, "loss": 0.0409, "step": 14750 }, { "epoch": 10.813186813186814, "grad_norm": 0.55078125, "learning_rate": 8.293631712826203e-05, "loss": 0.042, "step": 14760 }, { "epoch": 10.820512820512821, "grad_norm": 1.1953125, "learning_rate": 8.29104270774128e-05, "loss": 0.0424, "step": 14770 }, { "epoch": 10.827838827838828, "grad_norm": 0.5546875, "learning_rate": 8.288452149766462e-05, "loss": 0.0391, "step": 14780 }, { "epoch": 10.835164835164836, "grad_norm": 0.5078125, "learning_rate": 8.285860040142966e-05, "loss": 0.0411, "step": 14790 }, { "epoch": 10.842490842490843, "grad_norm": 0.59375, "learning_rate": 8.283266380112763e-05, "loss": 0.0345, "step": 14800 }, { "epoch": 10.84981684981685, "grad_norm": 0.625, "learning_rate": 8.280671170918554e-05, "loss": 0.038, "step": 14810 }, { "epoch": 10.857142857142858, "grad_norm": 0.66796875, "learning_rate": 8.278074413803796e-05, "loss": 0.0374, "step": 14820 }, { "epoch": 10.864468864468865, "grad_norm": 0.640625, "learning_rate": 8.275476110012677e-05, "loss": 0.0387, "step": 14830 }, { "epoch": 10.871794871794872, "grad_norm": 0.73046875, "learning_rate": 8.272876260790125e-05, "loss": 0.0373, "step": 14840 }, { "epoch": 10.87912087912088, "grad_norm": 0.52734375, "learning_rate": 8.270274867381821e-05, "loss": 0.0373, "step": 14850 }, { "epoch": 10.886446886446887, "grad_norm": 0.61328125, "learning_rate": 8.267671931034174e-05, "loss": 0.0395, "step": 14860 }, { "epoch": 10.893772893772894, "grad_norm": 0.546875, "learning_rate": 8.265067452994335e-05, "loss": 0.0371, "step": 14870 }, { "epoch": 10.901098901098901, "grad_norm": 0.578125, "learning_rate": 8.262461434510199e-05, "loss": 0.0355, "step": 14880 }, { "epoch": 10.908424908424909, "grad_norm": 0.39453125, "learning_rate": 8.259853876830388e-05, "loss": 0.0371, "step": 14890 }, { "epoch": 10.915750915750916, "grad_norm": 0.51171875, "learning_rate": 8.257244781204276e-05, "loss": 0.0357, "step": 14900 }, { "epoch": 10.923076923076923, "grad_norm": 0.56640625, "learning_rate": 8.254634148881962e-05, "loss": 0.0343, "step": 14910 }, { "epoch": 10.93040293040293, "grad_norm": 1.078125, "learning_rate": 8.252021981114287e-05, "loss": 0.0478, "step": 14920 }, { "epoch": 10.937728937728938, "grad_norm": 0.384765625, "learning_rate": 8.249408279152827e-05, "loss": 0.0361, "step": 14930 }, { "epoch": 10.945054945054945, "grad_norm": 0.62109375, "learning_rate": 8.24679304424989e-05, "loss": 0.0389, "step": 14940 }, { "epoch": 10.952380952380953, "grad_norm": 0.6015625, "learning_rate": 8.244176277658524e-05, "loss": 0.0408, "step": 14950 }, { "epoch": 10.95970695970696, "grad_norm": 0.4921875, "learning_rate": 8.241557980632504e-05, "loss": 0.0346, "step": 14960 }, { "epoch": 10.967032967032967, "grad_norm": 0.498046875, "learning_rate": 8.238938154426344e-05, "loss": 0.0327, "step": 14970 }, { "epoch": 10.974358974358974, "grad_norm": 0.421875, "learning_rate": 8.23631680029529e-05, "loss": 0.0368, "step": 14980 }, { "epoch": 10.981684981684982, "grad_norm": 0.44921875, "learning_rate": 8.233693919495315e-05, "loss": 0.0427, "step": 14990 }, { "epoch": 10.989010989010989, "grad_norm": 0.5859375, "learning_rate": 8.23106951328313e-05, "loss": 0.0374, "step": 15000 }, { "epoch": 10.996336996336996, "grad_norm": 1.046875, "learning_rate": 8.228443582916178e-05, "loss": 0.039, "step": 15010 }, { "epoch": 11.003663003663004, "grad_norm": 0.50390625, "learning_rate": 8.225816129652621e-05, "loss": 0.0343, "step": 15020 }, { "epoch": 11.010989010989011, "grad_norm": 0.55078125, "learning_rate": 8.223187154751361e-05, "loss": 0.0388, "step": 15030 }, { "epoch": 11.018315018315018, "grad_norm": 0.5625, "learning_rate": 8.220556659472027e-05, "loss": 0.0378, "step": 15040 }, { "epoch": 11.025641025641026, "grad_norm": 0.373046875, "learning_rate": 8.217924645074974e-05, "loss": 0.0385, "step": 15050 }, { "epoch": 11.032967032967033, "grad_norm": 0.51953125, "learning_rate": 8.215291112821287e-05, "loss": 0.0354, "step": 15060 }, { "epoch": 11.04029304029304, "grad_norm": 0.7109375, "learning_rate": 8.212656063972779e-05, "loss": 0.0387, "step": 15070 }, { "epoch": 11.047619047619047, "grad_norm": 0.53125, "learning_rate": 8.210019499791989e-05, "loss": 0.0355, "step": 15080 }, { "epoch": 11.054945054945055, "grad_norm": 0.474609375, "learning_rate": 8.207381421542177e-05, "loss": 0.0355, "step": 15090 }, { "epoch": 11.062271062271062, "grad_norm": 0.546875, "learning_rate": 8.204741830487337e-05, "loss": 0.0363, "step": 15100 }, { "epoch": 11.06959706959707, "grad_norm": 0.859375, "learning_rate": 8.202100727892183e-05, "loss": 0.0433, "step": 15110 }, { "epoch": 11.076923076923077, "grad_norm": 0.61328125, "learning_rate": 8.199458115022152e-05, "loss": 0.0458, "step": 15120 }, { "epoch": 11.084249084249084, "grad_norm": 0.65234375, "learning_rate": 8.196813993143409e-05, "loss": 0.0379, "step": 15130 }, { "epoch": 11.091575091575091, "grad_norm": 0.4765625, "learning_rate": 8.194168363522839e-05, "loss": 0.0352, "step": 15140 }, { "epoch": 11.098901098901099, "grad_norm": 0.3515625, "learning_rate": 8.191521227428046e-05, "loss": 0.0373, "step": 15150 }, { "epoch": 11.106227106227106, "grad_norm": 0.53515625, "learning_rate": 8.188872586127365e-05, "loss": 0.0354, "step": 15160 }, { "epoch": 11.113553113553113, "grad_norm": 0.75, "learning_rate": 8.186222440889846e-05, "loss": 0.0359, "step": 15170 }, { "epoch": 11.12087912087912, "grad_norm": 0.54296875, "learning_rate": 8.183570792985258e-05, "loss": 0.0335, "step": 15180 }, { "epoch": 11.128205128205128, "grad_norm": 0.57421875, "learning_rate": 8.180917643684094e-05, "loss": 0.0394, "step": 15190 }, { "epoch": 11.135531135531135, "grad_norm": 0.427734375, "learning_rate": 8.178262994257567e-05, "loss": 0.0392, "step": 15200 }, { "epoch": 11.142857142857142, "grad_norm": 0.48828125, "learning_rate": 8.175606845977602e-05, "loss": 0.0339, "step": 15210 }, { "epoch": 11.15018315018315, "grad_norm": 0.69140625, "learning_rate": 8.17294920011685e-05, "loss": 0.0357, "step": 15220 }, { "epoch": 11.157509157509157, "grad_norm": 0.474609375, "learning_rate": 8.170290057948677e-05, "loss": 0.0359, "step": 15230 }, { "epoch": 11.164835164835164, "grad_norm": 0.59765625, "learning_rate": 8.167629420747163e-05, "loss": 0.0379, "step": 15240 }, { "epoch": 11.172161172161172, "grad_norm": 0.76171875, "learning_rate": 8.164967289787107e-05, "loss": 0.04, "step": 15250 }, { "epoch": 11.179487179487179, "grad_norm": 0.59765625, "learning_rate": 8.162303666344026e-05, "loss": 0.0352, "step": 15260 }, { "epoch": 11.186813186813186, "grad_norm": 0.5078125, "learning_rate": 8.159638551694145e-05, "loss": 0.0393, "step": 15270 }, { "epoch": 11.194139194139193, "grad_norm": 0.66796875, "learning_rate": 8.15697194711441e-05, "loss": 0.0358, "step": 15280 }, { "epoch": 11.2014652014652, "grad_norm": 0.5234375, "learning_rate": 8.154303853882479e-05, "loss": 0.0394, "step": 15290 }, { "epoch": 11.208791208791208, "grad_norm": 0.5546875, "learning_rate": 8.151634273276724e-05, "loss": 0.0367, "step": 15300 }, { "epoch": 11.216117216117215, "grad_norm": 0.48046875, "learning_rate": 8.148963206576226e-05, "loss": 0.04, "step": 15310 }, { "epoch": 11.223443223443223, "grad_norm": 0.8046875, "learning_rate": 8.146290655060782e-05, "loss": 0.0464, "step": 15320 }, { "epoch": 11.23076923076923, "grad_norm": 0.53125, "learning_rate": 8.1436166200109e-05, "loss": 0.0413, "step": 15330 }, { "epoch": 11.238095238095237, "grad_norm": 0.6328125, "learning_rate": 8.140941102707797e-05, "loss": 0.0397, "step": 15340 }, { "epoch": 11.245421245421245, "grad_norm": 0.5, "learning_rate": 8.138264104433399e-05, "loss": 0.0361, "step": 15350 }, { "epoch": 11.252747252747252, "grad_norm": 0.349609375, "learning_rate": 8.13558562647035e-05, "loss": 0.0367, "step": 15360 }, { "epoch": 11.260073260073261, "grad_norm": 0.41796875, "learning_rate": 8.13290567010199e-05, "loss": 0.0359, "step": 15370 }, { "epoch": 11.267399267399268, "grad_norm": 0.5390625, "learning_rate": 8.130224236612378e-05, "loss": 0.0345, "step": 15380 }, { "epoch": 11.274725274725276, "grad_norm": 0.6640625, "learning_rate": 8.127541327286276e-05, "loss": 0.048, "step": 15390 }, { "epoch": 11.282051282051283, "grad_norm": 0.94921875, "learning_rate": 8.124856943409152e-05, "loss": 0.0417, "step": 15400 }, { "epoch": 11.28937728937729, "grad_norm": 0.439453125, "learning_rate": 8.122171086267183e-05, "loss": 0.0369, "step": 15410 }, { "epoch": 11.296703296703297, "grad_norm": 0.73828125, "learning_rate": 8.119483757147254e-05, "loss": 0.0482, "step": 15420 }, { "epoch": 11.304029304029305, "grad_norm": 0.4921875, "learning_rate": 8.116794957336947e-05, "loss": 0.0344, "step": 15430 }, { "epoch": 11.311355311355312, "grad_norm": 0.482421875, "learning_rate": 8.11410468812456e-05, "loss": 0.0362, "step": 15440 }, { "epoch": 11.31868131868132, "grad_norm": 0.625, "learning_rate": 8.111412950799089e-05, "loss": 0.0407, "step": 15450 }, { "epoch": 11.326007326007327, "grad_norm": 0.5390625, "learning_rate": 8.108719746650229e-05, "loss": 0.0387, "step": 15460 }, { "epoch": 11.333333333333334, "grad_norm": 0.62890625, "learning_rate": 8.106025076968385e-05, "loss": 0.0369, "step": 15470 }, { "epoch": 11.340659340659341, "grad_norm": 0.81640625, "learning_rate": 8.103328943044663e-05, "loss": 0.0339, "step": 15480 }, { "epoch": 11.347985347985349, "grad_norm": 0.55859375, "learning_rate": 8.100631346170869e-05, "loss": 0.0409, "step": 15490 }, { "epoch": 11.355311355311356, "grad_norm": 0.73828125, "learning_rate": 8.097932287639507e-05, "loss": 0.0373, "step": 15500 }, { "epoch": 11.362637362637363, "grad_norm": 0.515625, "learning_rate": 8.095231768743789e-05, "loss": 0.0352, "step": 15510 }, { "epoch": 11.36996336996337, "grad_norm": 0.51953125, "learning_rate": 8.092529790777617e-05, "loss": 0.0379, "step": 15520 }, { "epoch": 11.377289377289378, "grad_norm": 0.5234375, "learning_rate": 8.089826355035601e-05, "loss": 0.0451, "step": 15530 }, { "epoch": 11.384615384615385, "grad_norm": 0.66015625, "learning_rate": 8.087121462813046e-05, "loss": 0.0427, "step": 15540 }, { "epoch": 11.391941391941392, "grad_norm": 0.5078125, "learning_rate": 8.084415115405952e-05, "loss": 0.0364, "step": 15550 }, { "epoch": 11.3992673992674, "grad_norm": 0.49609375, "learning_rate": 8.081707314111022e-05, "loss": 0.0366, "step": 15560 }, { "epoch": 11.406593406593407, "grad_norm": 0.5, "learning_rate": 8.078998060225649e-05, "loss": 0.0335, "step": 15570 }, { "epoch": 11.413919413919414, "grad_norm": 0.69921875, "learning_rate": 8.076287355047929e-05, "loss": 0.0362, "step": 15580 }, { "epoch": 11.421245421245422, "grad_norm": 0.58984375, "learning_rate": 8.073575199876647e-05, "loss": 0.037, "step": 15590 }, { "epoch": 11.428571428571429, "grad_norm": 0.421875, "learning_rate": 8.070861596011287e-05, "loss": 0.0339, "step": 15600 }, { "epoch": 11.435897435897436, "grad_norm": 0.486328125, "learning_rate": 8.068146544752024e-05, "loss": 0.038, "step": 15610 }, { "epoch": 11.443223443223443, "grad_norm": 0.78125, "learning_rate": 8.065430047399732e-05, "loss": 0.0383, "step": 15620 }, { "epoch": 11.45054945054945, "grad_norm": 0.470703125, "learning_rate": 8.062712105255969e-05, "loss": 0.0327, "step": 15630 }, { "epoch": 11.457875457875458, "grad_norm": 0.63671875, "learning_rate": 8.059992719622991e-05, "loss": 0.0364, "step": 15640 }, { "epoch": 11.465201465201465, "grad_norm": 0.4765625, "learning_rate": 8.05727189180375e-05, "loss": 0.0407, "step": 15650 }, { "epoch": 11.472527472527473, "grad_norm": 0.4296875, "learning_rate": 8.05454962310188e-05, "loss": 0.0375, "step": 15660 }, { "epoch": 11.47985347985348, "grad_norm": 0.48828125, "learning_rate": 8.051825914821706e-05, "loss": 0.0371, "step": 15670 }, { "epoch": 11.487179487179487, "grad_norm": 0.78515625, "learning_rate": 8.049100768268253e-05, "loss": 0.0364, "step": 15680 }, { "epoch": 11.494505494505495, "grad_norm": 0.474609375, "learning_rate": 8.046374184747224e-05, "loss": 0.0435, "step": 15690 }, { "epoch": 11.501831501831502, "grad_norm": 0.62109375, "learning_rate": 8.043646165565014e-05, "loss": 0.0345, "step": 15700 }, { "epoch": 11.50915750915751, "grad_norm": 0.61328125, "learning_rate": 8.04091671202871e-05, "loss": 0.0392, "step": 15710 }, { "epoch": 11.516483516483516, "grad_norm": 0.390625, "learning_rate": 8.038185825446079e-05, "loss": 0.0404, "step": 15720 }, { "epoch": 11.523809523809524, "grad_norm": 0.84375, "learning_rate": 8.03545350712558e-05, "loss": 0.0335, "step": 15730 }, { "epoch": 11.531135531135531, "grad_norm": 0.435546875, "learning_rate": 8.032719758376356e-05, "loss": 0.0383, "step": 15740 }, { "epoch": 11.538461538461538, "grad_norm": 0.70703125, "learning_rate": 8.029984580508237e-05, "loss": 0.0346, "step": 15750 }, { "epoch": 11.545787545787546, "grad_norm": 0.53515625, "learning_rate": 8.027247974831735e-05, "loss": 0.0386, "step": 15760 }, { "epoch": 11.553113553113553, "grad_norm": 0.58203125, "learning_rate": 8.024509942658048e-05, "loss": 0.039, "step": 15770 }, { "epoch": 11.56043956043956, "grad_norm": 0.53515625, "learning_rate": 8.021770485299056e-05, "loss": 0.0391, "step": 15780 }, { "epoch": 11.567765567765568, "grad_norm": 0.50390625, "learning_rate": 8.019029604067325e-05, "loss": 0.0361, "step": 15790 }, { "epoch": 11.575091575091575, "grad_norm": 0.8046875, "learning_rate": 8.016287300276098e-05, "loss": 0.0387, "step": 15800 }, { "epoch": 11.582417582417582, "grad_norm": 0.5234375, "learning_rate": 8.013543575239309e-05, "loss": 0.0374, "step": 15810 }, { "epoch": 11.58974358974359, "grad_norm": 0.52734375, "learning_rate": 8.010798430271558e-05, "loss": 0.045, "step": 15820 }, { "epoch": 11.597069597069597, "grad_norm": 0.71875, "learning_rate": 8.008051866688144e-05, "loss": 0.0387, "step": 15830 }, { "epoch": 11.604395604395604, "grad_norm": 0.48828125, "learning_rate": 8.005303885805024e-05, "loss": 0.0367, "step": 15840 }, { "epoch": 11.611721611721611, "grad_norm": 0.439453125, "learning_rate": 8.002554488938855e-05, "loss": 0.0398, "step": 15850 }, { "epoch": 11.619047619047619, "grad_norm": 0.51953125, "learning_rate": 7.99980367740696e-05, "loss": 0.0363, "step": 15860 }, { "epoch": 11.626373626373626, "grad_norm": 0.5390625, "learning_rate": 7.997051452527344e-05, "loss": 0.0374, "step": 15870 }, { "epoch": 11.633699633699633, "grad_norm": 0.640625, "learning_rate": 7.994297815618687e-05, "loss": 0.0458, "step": 15880 }, { "epoch": 11.64102564102564, "grad_norm": 0.796875, "learning_rate": 7.991542768000347e-05, "loss": 0.0369, "step": 15890 }, { "epoch": 11.648351648351648, "grad_norm": 0.69921875, "learning_rate": 7.988786310992358e-05, "loss": 0.0417, "step": 15900 }, { "epoch": 11.655677655677655, "grad_norm": 0.53125, "learning_rate": 7.98602844591543e-05, "loss": 0.0437, "step": 15910 }, { "epoch": 11.663003663003662, "grad_norm": 0.5, "learning_rate": 7.983269174090944e-05, "loss": 0.0393, "step": 15920 }, { "epoch": 11.67032967032967, "grad_norm": 0.703125, "learning_rate": 7.98050849684096e-05, "loss": 0.047, "step": 15930 }, { "epoch": 11.677655677655677, "grad_norm": 0.6796875, "learning_rate": 7.977746415488208e-05, "loss": 0.0329, "step": 15940 }, { "epoch": 11.684981684981684, "grad_norm": 0.7265625, "learning_rate": 7.974982931356094e-05, "loss": 0.04, "step": 15950 }, { "epoch": 11.692307692307692, "grad_norm": 0.546875, "learning_rate": 7.972218045768691e-05, "loss": 0.0435, "step": 15960 }, { "epoch": 11.699633699633699, "grad_norm": 0.55859375, "learning_rate": 7.969451760050747e-05, "loss": 0.0384, "step": 15970 }, { "epoch": 11.706959706959706, "grad_norm": 0.8984375, "learning_rate": 7.966684075527682e-05, "loss": 0.0393, "step": 15980 }, { "epoch": 11.714285714285714, "grad_norm": 0.60546875, "learning_rate": 7.963914993525581e-05, "loss": 0.0382, "step": 15990 }, { "epoch": 11.72161172161172, "grad_norm": 0.453125, "learning_rate": 7.961144515371205e-05, "loss": 0.0357, "step": 16000 }, { "epoch": 11.728937728937728, "grad_norm": 0.53125, "learning_rate": 7.958372642391981e-05, "loss": 0.0356, "step": 16010 }, { "epoch": 11.736263736263737, "grad_norm": 0.50390625, "learning_rate": 7.955599375916004e-05, "loss": 0.0337, "step": 16020 }, { "epoch": 11.743589743589745, "grad_norm": 0.384765625, "learning_rate": 7.952824717272035e-05, "loss": 0.0326, "step": 16030 }, { "epoch": 11.750915750915752, "grad_norm": 0.578125, "learning_rate": 7.950048667789505e-05, "loss": 0.0405, "step": 16040 }, { "epoch": 11.758241758241759, "grad_norm": 0.439453125, "learning_rate": 7.947271228798513e-05, "loss": 0.0377, "step": 16050 }, { "epoch": 11.765567765567766, "grad_norm": 0.4453125, "learning_rate": 7.944492401629818e-05, "loss": 0.0356, "step": 16060 }, { "epoch": 11.772893772893774, "grad_norm": 0.9296875, "learning_rate": 7.941712187614847e-05, "loss": 0.0373, "step": 16070 }, { "epoch": 11.780219780219781, "grad_norm": 0.6875, "learning_rate": 7.938930588085693e-05, "loss": 0.0397, "step": 16080 }, { "epoch": 11.787545787545788, "grad_norm": 0.78515625, "learning_rate": 7.936147604375113e-05, "loss": 0.0387, "step": 16090 }, { "epoch": 11.794871794871796, "grad_norm": 0.41796875, "learning_rate": 7.933363237816526e-05, "loss": 0.0371, "step": 16100 }, { "epoch": 11.802197802197803, "grad_norm": 0.640625, "learning_rate": 7.930577489744008e-05, "loss": 0.0356, "step": 16110 }, { "epoch": 11.80952380952381, "grad_norm": 0.52734375, "learning_rate": 7.927790361492309e-05, "loss": 0.0391, "step": 16120 }, { "epoch": 11.816849816849818, "grad_norm": 0.72265625, "learning_rate": 7.925001854396827e-05, "loss": 0.0346, "step": 16130 }, { "epoch": 11.824175824175825, "grad_norm": 0.52734375, "learning_rate": 7.922211969793634e-05, "loss": 0.0403, "step": 16140 }, { "epoch": 11.831501831501832, "grad_norm": 0.5078125, "learning_rate": 7.919420709019452e-05, "loss": 0.0347, "step": 16150 }, { "epoch": 11.83882783882784, "grad_norm": 0.51953125, "learning_rate": 7.916628073411665e-05, "loss": 0.044, "step": 16160 }, { "epoch": 11.846153846153847, "grad_norm": 0.48046875, "learning_rate": 7.913834064308317e-05, "loss": 0.0361, "step": 16170 }, { "epoch": 11.853479853479854, "grad_norm": 0.56640625, "learning_rate": 7.911038683048109e-05, "loss": 0.0413, "step": 16180 }, { "epoch": 11.860805860805861, "grad_norm": 0.50390625, "learning_rate": 7.908241930970401e-05, "loss": 0.0376, "step": 16190 }, { "epoch": 11.868131868131869, "grad_norm": 0.66015625, "learning_rate": 7.90544380941521e-05, "loss": 0.0447, "step": 16200 }, { "epoch": 11.875457875457876, "grad_norm": 0.478515625, "learning_rate": 7.902644319723203e-05, "loss": 0.0336, "step": 16210 }, { "epoch": 11.882783882783883, "grad_norm": 0.71484375, "learning_rate": 7.899843463235713e-05, "loss": 0.0402, "step": 16220 }, { "epoch": 11.89010989010989, "grad_norm": 0.578125, "learning_rate": 7.897041241294717e-05, "loss": 0.0371, "step": 16230 }, { "epoch": 11.897435897435898, "grad_norm": 0.703125, "learning_rate": 7.894237655242854e-05, "loss": 0.038, "step": 16240 }, { "epoch": 11.904761904761905, "grad_norm": 0.66015625, "learning_rate": 7.891432706423416e-05, "loss": 0.0358, "step": 16250 }, { "epoch": 11.912087912087912, "grad_norm": 0.6328125, "learning_rate": 7.888626396180341e-05, "loss": 0.0392, "step": 16260 }, { "epoch": 11.91941391941392, "grad_norm": 0.419921875, "learning_rate": 7.88581872585823e-05, "loss": 0.0348, "step": 16270 }, { "epoch": 11.926739926739927, "grad_norm": 0.98828125, "learning_rate": 7.883009696802327e-05, "loss": 0.0354, "step": 16280 }, { "epoch": 11.934065934065934, "grad_norm": 0.46875, "learning_rate": 7.88019931035853e-05, "loss": 0.0406, "step": 16290 }, { "epoch": 11.941391941391942, "grad_norm": 0.5234375, "learning_rate": 7.877387567873387e-05, "loss": 0.0386, "step": 16300 }, { "epoch": 11.948717948717949, "grad_norm": 0.66015625, "learning_rate": 7.874574470694096e-05, "loss": 0.0403, "step": 16310 }, { "epoch": 11.956043956043956, "grad_norm": 0.400390625, "learning_rate": 7.871760020168508e-05, "loss": 0.0351, "step": 16320 }, { "epoch": 11.963369963369964, "grad_norm": 0.73046875, "learning_rate": 7.868944217645113e-05, "loss": 0.0405, "step": 16330 }, { "epoch": 11.97069597069597, "grad_norm": 0.546875, "learning_rate": 7.866127064473057e-05, "loss": 0.0372, "step": 16340 }, { "epoch": 11.978021978021978, "grad_norm": 0.6015625, "learning_rate": 7.86330856200213e-05, "loss": 0.0359, "step": 16350 }, { "epoch": 11.985347985347985, "grad_norm": 0.482421875, "learning_rate": 7.860488711582769e-05, "loss": 0.0387, "step": 16360 }, { "epoch": 11.992673992673993, "grad_norm": 0.73046875, "learning_rate": 7.857667514566061e-05, "loss": 0.0364, "step": 16370 }, { "epoch": 12.0, "grad_norm": 0.49609375, "learning_rate": 7.854844972303726e-05, "loss": 0.0416, "step": 16380 }, { "epoch": 12.007326007326007, "grad_norm": 0.54296875, "learning_rate": 7.852021086148142e-05, "loss": 0.0379, "step": 16390 }, { "epoch": 12.014652014652015, "grad_norm": 0.392578125, "learning_rate": 7.849195857452326e-05, "loss": 0.038, "step": 16400 }, { "epoch": 12.021978021978022, "grad_norm": 0.7265625, "learning_rate": 7.846369287569934e-05, "loss": 0.0426, "step": 16410 }, { "epoch": 12.02930402930403, "grad_norm": 0.55859375, "learning_rate": 7.843541377855271e-05, "loss": 0.0322, "step": 16420 }, { "epoch": 12.036630036630036, "grad_norm": 0.64453125, "learning_rate": 7.840712129663282e-05, "loss": 0.0352, "step": 16430 }, { "epoch": 12.043956043956044, "grad_norm": 0.5859375, "learning_rate": 7.83788154434955e-05, "loss": 0.0361, "step": 16440 }, { "epoch": 12.051282051282051, "grad_norm": 0.6484375, "learning_rate": 7.835049623270303e-05, "loss": 0.0353, "step": 16450 }, { "epoch": 12.058608058608058, "grad_norm": 0.5390625, "learning_rate": 7.832216367782406e-05, "loss": 0.0352, "step": 16460 }, { "epoch": 12.065934065934066, "grad_norm": 0.51953125, "learning_rate": 7.829381779243368e-05, "loss": 0.0368, "step": 16470 }, { "epoch": 12.073260073260073, "grad_norm": 0.474609375, "learning_rate": 7.826545859011331e-05, "loss": 0.0383, "step": 16480 }, { "epoch": 12.08058608058608, "grad_norm": 0.65625, "learning_rate": 7.823708608445076e-05, "loss": 0.0412, "step": 16490 }, { "epoch": 12.087912087912088, "grad_norm": 0.8125, "learning_rate": 7.820870028904025e-05, "loss": 0.0383, "step": 16500 }, { "epoch": 12.095238095238095, "grad_norm": 0.6484375, "learning_rate": 7.818030121748233e-05, "loss": 0.036, "step": 16510 }, { "epoch": 12.102564102564102, "grad_norm": 0.494140625, "learning_rate": 7.815188888338395e-05, "loss": 0.0351, "step": 16520 }, { "epoch": 12.10989010989011, "grad_norm": 0.4453125, "learning_rate": 7.812346330035837e-05, "loss": 0.0361, "step": 16530 }, { "epoch": 12.117216117216117, "grad_norm": 0.5, "learning_rate": 7.809502448202523e-05, "loss": 0.0365, "step": 16540 }, { "epoch": 12.124542124542124, "grad_norm": 0.4609375, "learning_rate": 7.80665724420105e-05, "loss": 0.04, "step": 16550 }, { "epoch": 12.131868131868131, "grad_norm": 0.6484375, "learning_rate": 7.803810719394647e-05, "loss": 0.0346, "step": 16560 }, { "epoch": 12.139194139194139, "grad_norm": 0.59375, "learning_rate": 7.800962875147179e-05, "loss": 0.0366, "step": 16570 }, { "epoch": 12.146520146520146, "grad_norm": 0.73046875, "learning_rate": 7.798113712823144e-05, "loss": 0.038, "step": 16580 }, { "epoch": 12.153846153846153, "grad_norm": 0.62109375, "learning_rate": 7.795263233787666e-05, "loss": 0.0346, "step": 16590 }, { "epoch": 12.16117216117216, "grad_norm": 0.51171875, "learning_rate": 7.792411439406502e-05, "loss": 0.037, "step": 16600 }, { "epoch": 12.168498168498168, "grad_norm": 0.66796875, "learning_rate": 7.789558331046042e-05, "loss": 0.0392, "step": 16610 }, { "epoch": 12.175824175824175, "grad_norm": 0.6015625, "learning_rate": 7.786703910073305e-05, "loss": 0.037, "step": 16620 }, { "epoch": 12.183150183150182, "grad_norm": 0.390625, "learning_rate": 7.783848177855934e-05, "loss": 0.0352, "step": 16630 }, { "epoch": 12.19047619047619, "grad_norm": 0.48046875, "learning_rate": 7.780991135762207e-05, "loss": 0.036, "step": 16640 }, { "epoch": 12.197802197802197, "grad_norm": 0.419921875, "learning_rate": 7.778132785161027e-05, "loss": 0.0381, "step": 16650 }, { "epoch": 12.205128205128204, "grad_norm": 0.49609375, "learning_rate": 7.775273127421924e-05, "loss": 0.0355, "step": 16660 }, { "epoch": 12.212454212454212, "grad_norm": 0.73046875, "learning_rate": 7.772412163915051e-05, "loss": 0.0355, "step": 16670 }, { "epoch": 12.219780219780219, "grad_norm": 0.5390625, "learning_rate": 7.76954989601119e-05, "loss": 0.0361, "step": 16680 }, { "epoch": 12.227106227106226, "grad_norm": 0.67578125, "learning_rate": 7.766686325081748e-05, "loss": 0.0371, "step": 16690 }, { "epoch": 12.234432234432234, "grad_norm": 0.62890625, "learning_rate": 7.763821452498757e-05, "loss": 0.043, "step": 16700 }, { "epoch": 12.241758241758241, "grad_norm": 0.59375, "learning_rate": 7.76095527963487e-05, "loss": 0.0325, "step": 16710 }, { "epoch": 12.249084249084248, "grad_norm": 0.52734375, "learning_rate": 7.758087807863367e-05, "loss": 0.0375, "step": 16720 }, { "epoch": 12.256410256410255, "grad_norm": 0.466796875, "learning_rate": 7.755219038558145e-05, "loss": 0.0362, "step": 16730 }, { "epoch": 12.263736263736265, "grad_norm": 0.44921875, "learning_rate": 7.752348973093725e-05, "loss": 0.0317, "step": 16740 }, { "epoch": 12.271062271062272, "grad_norm": 0.478515625, "learning_rate": 7.749477612845252e-05, "loss": 0.0342, "step": 16750 }, { "epoch": 12.27838827838828, "grad_norm": 0.6171875, "learning_rate": 7.746604959188487e-05, "loss": 0.0373, "step": 16760 }, { "epoch": 12.285714285714286, "grad_norm": 0.53125, "learning_rate": 7.743731013499814e-05, "loss": 0.0391, "step": 16770 }, { "epoch": 12.293040293040294, "grad_norm": 0.515625, "learning_rate": 7.740855777156235e-05, "loss": 0.0341, "step": 16780 }, { "epoch": 12.300366300366301, "grad_norm": 0.5703125, "learning_rate": 7.737979251535367e-05, "loss": 0.0361, "step": 16790 }, { "epoch": 12.307692307692308, "grad_norm": 0.3828125, "learning_rate": 7.735101438015451e-05, "loss": 0.0356, "step": 16800 }, { "epoch": 12.315018315018316, "grad_norm": 0.68359375, "learning_rate": 7.732222337975342e-05, "loss": 0.0403, "step": 16810 }, { "epoch": 12.322344322344323, "grad_norm": 0.59765625, "learning_rate": 7.729341952794509e-05, "loss": 0.0381, "step": 16820 }, { "epoch": 12.32967032967033, "grad_norm": 0.71875, "learning_rate": 7.726460283853038e-05, "loss": 0.0335, "step": 16830 }, { "epoch": 12.336996336996338, "grad_norm": 0.6328125, "learning_rate": 7.723577332531637e-05, "loss": 0.035, "step": 16840 }, { "epoch": 12.344322344322345, "grad_norm": 0.49609375, "learning_rate": 7.720693100211617e-05, "loss": 0.0373, "step": 16850 }, { "epoch": 12.351648351648352, "grad_norm": 0.65234375, "learning_rate": 7.717807588274908e-05, "loss": 0.0357, "step": 16860 }, { "epoch": 12.35897435897436, "grad_norm": 0.50390625, "learning_rate": 7.714920798104058e-05, "loss": 0.0345, "step": 16870 }, { "epoch": 12.366300366300367, "grad_norm": 0.57421875, "learning_rate": 7.712032731082221e-05, "loss": 0.0404, "step": 16880 }, { "epoch": 12.373626373626374, "grad_norm": 0.625, "learning_rate": 7.709143388593164e-05, "loss": 0.0442, "step": 16890 }, { "epoch": 12.380952380952381, "grad_norm": 0.7734375, "learning_rate": 7.706252772021264e-05, "loss": 0.049, "step": 16900 }, { "epoch": 12.388278388278389, "grad_norm": 0.482421875, "learning_rate": 7.703360882751514e-05, "loss": 0.0363, "step": 16910 }, { "epoch": 12.395604395604396, "grad_norm": 0.55078125, "learning_rate": 7.700467722169511e-05, "loss": 0.0366, "step": 16920 }, { "epoch": 12.402930402930403, "grad_norm": 0.58203125, "learning_rate": 7.697573291661463e-05, "loss": 0.0341, "step": 16930 }, { "epoch": 12.41025641025641, "grad_norm": 0.6953125, "learning_rate": 7.694677592614185e-05, "loss": 0.0368, "step": 16940 }, { "epoch": 12.417582417582418, "grad_norm": 0.439453125, "learning_rate": 7.691780626415106e-05, "loss": 0.0359, "step": 16950 }, { "epoch": 12.424908424908425, "grad_norm": 0.4921875, "learning_rate": 7.688882394452255e-05, "loss": 0.0427, "step": 16960 }, { "epoch": 12.432234432234432, "grad_norm": 0.546875, "learning_rate": 7.685982898114269e-05, "loss": 0.041, "step": 16970 }, { "epoch": 12.43956043956044, "grad_norm": 0.62890625, "learning_rate": 7.683082138790394e-05, "loss": 0.0334, "step": 16980 }, { "epoch": 12.446886446886447, "grad_norm": 0.63671875, "learning_rate": 7.680180117870477e-05, "loss": 0.0409, "step": 16990 }, { "epoch": 12.454212454212454, "grad_norm": 0.91015625, "learning_rate": 7.677276836744969e-05, "loss": 0.0417, "step": 17000 }, { "epoch": 12.461538461538462, "grad_norm": 0.53515625, "learning_rate": 7.674372296804935e-05, "loss": 0.0381, "step": 17010 }, { "epoch": 12.468864468864469, "grad_norm": 0.486328125, "learning_rate": 7.671466499442029e-05, "loss": 0.0391, "step": 17020 }, { "epoch": 12.476190476190476, "grad_norm": 0.58203125, "learning_rate": 7.668559446048515e-05, "loss": 0.0388, "step": 17030 }, { "epoch": 12.483516483516484, "grad_norm": 0.6171875, "learning_rate": 7.66565113801726e-05, "loss": 0.0352, "step": 17040 }, { "epoch": 12.49084249084249, "grad_norm": 0.44140625, "learning_rate": 7.662741576741726e-05, "loss": 0.0344, "step": 17050 }, { "epoch": 12.498168498168498, "grad_norm": 0.59375, "learning_rate": 7.659830763615985e-05, "loss": 0.0359, "step": 17060 }, { "epoch": 12.505494505494505, "grad_norm": 0.69140625, "learning_rate": 7.656918700034695e-05, "loss": 0.037, "step": 17070 }, { "epoch": 12.512820512820513, "grad_norm": 0.546875, "learning_rate": 7.654005387393129e-05, "loss": 0.0377, "step": 17080 }, { "epoch": 12.52014652014652, "grad_norm": 0.49609375, "learning_rate": 7.651090827087143e-05, "loss": 0.0355, "step": 17090 }, { "epoch": 12.527472527472527, "grad_norm": 0.474609375, "learning_rate": 7.64817502051321e-05, "loss": 0.036, "step": 17100 }, { "epoch": 12.534798534798535, "grad_norm": 1.1328125, "learning_rate": 7.645257969068375e-05, "loss": 0.0389, "step": 17110 }, { "epoch": 12.542124542124542, "grad_norm": 0.80078125, "learning_rate": 7.642339674150302e-05, "loss": 0.0384, "step": 17120 }, { "epoch": 12.54945054945055, "grad_norm": 0.60546875, "learning_rate": 7.63942013715724e-05, "loss": 0.0442, "step": 17130 }, { "epoch": 12.556776556776557, "grad_norm": 0.6171875, "learning_rate": 7.636499359488032e-05, "loss": 0.0409, "step": 17140 }, { "epoch": 12.564102564102564, "grad_norm": 0.50390625, "learning_rate": 7.633577342542122e-05, "loss": 0.0362, "step": 17150 }, { "epoch": 12.571428571428571, "grad_norm": 0.67578125, "learning_rate": 7.630654087719542e-05, "loss": 0.0369, "step": 17160 }, { "epoch": 12.578754578754578, "grad_norm": 0.478515625, "learning_rate": 7.627729596420921e-05, "loss": 0.0346, "step": 17170 }, { "epoch": 12.586080586080586, "grad_norm": 0.6171875, "learning_rate": 7.624803870047477e-05, "loss": 0.0363, "step": 17180 }, { "epoch": 12.593406593406593, "grad_norm": 0.61328125, "learning_rate": 7.621876910001024e-05, "loss": 0.035, "step": 17190 }, { "epoch": 12.6007326007326, "grad_norm": 0.72265625, "learning_rate": 7.61894871768396e-05, "loss": 0.0371, "step": 17200 }, { "epoch": 12.608058608058608, "grad_norm": 0.515625, "learning_rate": 7.616019294499283e-05, "loss": 0.0408, "step": 17210 }, { "epoch": 12.615384615384615, "grad_norm": 0.5234375, "learning_rate": 7.613088641850573e-05, "loss": 0.0365, "step": 17220 }, { "epoch": 12.622710622710622, "grad_norm": 0.56640625, "learning_rate": 7.610156761142005e-05, "loss": 0.0339, "step": 17230 }, { "epoch": 12.63003663003663, "grad_norm": 0.58984375, "learning_rate": 7.607223653778334e-05, "loss": 0.0368, "step": 17240 }, { "epoch": 12.637362637362637, "grad_norm": 0.431640625, "learning_rate": 7.604289321164911e-05, "loss": 0.0388, "step": 17250 }, { "epoch": 12.644688644688644, "grad_norm": 0.7578125, "learning_rate": 7.601353764707671e-05, "loss": 0.0373, "step": 17260 }, { "epoch": 12.652014652014651, "grad_norm": 0.625, "learning_rate": 7.598416985813135e-05, "loss": 0.0362, "step": 17270 }, { "epoch": 12.659340659340659, "grad_norm": 0.64453125, "learning_rate": 7.595478985888411e-05, "loss": 0.0398, "step": 17280 }, { "epoch": 12.666666666666666, "grad_norm": 0.5078125, "learning_rate": 7.592539766341191e-05, "loss": 0.034, "step": 17290 }, { "epoch": 12.673992673992673, "grad_norm": 0.50390625, "learning_rate": 7.589599328579749e-05, "loss": 0.0425, "step": 17300 }, { "epoch": 12.68131868131868, "grad_norm": 0.734375, "learning_rate": 7.586657674012949e-05, "loss": 0.0382, "step": 17310 }, { "epoch": 12.688644688644688, "grad_norm": 0.41015625, "learning_rate": 7.583714804050231e-05, "loss": 0.0336, "step": 17320 }, { "epoch": 12.695970695970695, "grad_norm": 0.453125, "learning_rate": 7.580770720101623e-05, "loss": 0.0394, "step": 17330 }, { "epoch": 12.703296703296703, "grad_norm": 0.625, "learning_rate": 7.57782542357773e-05, "loss": 0.0391, "step": 17340 }, { "epoch": 12.71062271062271, "grad_norm": 0.578125, "learning_rate": 7.574878915889743e-05, "loss": 0.0397, "step": 17350 }, { "epoch": 12.717948717948717, "grad_norm": 0.7578125, "learning_rate": 7.57193119844943e-05, "loss": 0.0374, "step": 17360 }, { "epoch": 12.725274725274724, "grad_norm": 0.71875, "learning_rate": 7.568982272669135e-05, "loss": 0.0352, "step": 17370 }, { "epoch": 12.732600732600732, "grad_norm": 0.443359375, "learning_rate": 7.566032139961789e-05, "loss": 0.0354, "step": 17380 }, { "epoch": 12.73992673992674, "grad_norm": 0.640625, "learning_rate": 7.563080801740896e-05, "loss": 0.0345, "step": 17390 }, { "epoch": 12.747252747252748, "grad_norm": 0.99609375, "learning_rate": 7.56012825942054e-05, "loss": 0.034, "step": 17400 }, { "epoch": 12.754578754578755, "grad_norm": 0.5625, "learning_rate": 7.557174514415382e-05, "loss": 0.0373, "step": 17410 }, { "epoch": 12.761904761904763, "grad_norm": 0.62109375, "learning_rate": 7.554219568140655e-05, "loss": 0.0369, "step": 17420 }, { "epoch": 12.76923076923077, "grad_norm": 0.48828125, "learning_rate": 7.551263422012172e-05, "loss": 0.0383, "step": 17430 }, { "epoch": 12.776556776556777, "grad_norm": 0.65234375, "learning_rate": 7.548306077446319e-05, "loss": 0.0335, "step": 17440 }, { "epoch": 12.783882783882785, "grad_norm": 0.375, "learning_rate": 7.545347535860058e-05, "loss": 0.0341, "step": 17450 }, { "epoch": 12.791208791208792, "grad_norm": 0.44140625, "learning_rate": 7.542387798670924e-05, "loss": 0.0361, "step": 17460 }, { "epoch": 12.7985347985348, "grad_norm": 0.9140625, "learning_rate": 7.539426867297024e-05, "loss": 0.0369, "step": 17470 }, { "epoch": 12.805860805860807, "grad_norm": 0.546875, "learning_rate": 7.536464743157034e-05, "loss": 0.0378, "step": 17480 }, { "epoch": 12.813186813186814, "grad_norm": 0.453125, "learning_rate": 7.533501427670209e-05, "loss": 0.0335, "step": 17490 }, { "epoch": 12.820512820512821, "grad_norm": 0.36328125, "learning_rate": 7.530536922256367e-05, "loss": 0.0298, "step": 17500 }, { "epoch": 12.827838827838828, "grad_norm": 0.466796875, "learning_rate": 7.527571228335903e-05, "loss": 0.0327, "step": 17510 }, { "epoch": 12.835164835164836, "grad_norm": 0.4453125, "learning_rate": 7.524604347329777e-05, "loss": 0.0343, "step": 17520 }, { "epoch": 12.842490842490843, "grad_norm": 0.7734375, "learning_rate": 7.52163628065952e-05, "loss": 0.0377, "step": 17530 }, { "epoch": 12.84981684981685, "grad_norm": 0.494140625, "learning_rate": 7.518667029747228e-05, "loss": 0.0419, "step": 17540 }, { "epoch": 12.857142857142858, "grad_norm": 0.578125, "learning_rate": 7.515696596015569e-05, "loss": 0.0424, "step": 17550 }, { "epoch": 12.864468864468865, "grad_norm": 0.609375, "learning_rate": 7.512724980887774e-05, "loss": 0.0372, "step": 17560 }, { "epoch": 12.871794871794872, "grad_norm": 0.375, "learning_rate": 7.509752185787641e-05, "loss": 0.034, "step": 17570 }, { "epoch": 12.87912087912088, "grad_norm": 0.58203125, "learning_rate": 7.506778212139532e-05, "loss": 0.0393, "step": 17580 }, { "epoch": 12.886446886446887, "grad_norm": 0.3828125, "learning_rate": 7.503803061368381e-05, "loss": 0.0346, "step": 17590 }, { "epoch": 12.893772893772894, "grad_norm": 0.447265625, "learning_rate": 7.500826734899674e-05, "loss": 0.0322, "step": 17600 }, { "epoch": 12.901098901098901, "grad_norm": 0.388671875, "learning_rate": 7.497849234159471e-05, "loss": 0.033, "step": 17610 }, { "epoch": 12.908424908424909, "grad_norm": 0.65234375, "learning_rate": 7.494870560574388e-05, "loss": 0.0368, "step": 17620 }, { "epoch": 12.915750915750916, "grad_norm": 0.76171875, "learning_rate": 7.491890715571605e-05, "loss": 0.0364, "step": 17630 }, { "epoch": 12.923076923076923, "grad_norm": 0.5859375, "learning_rate": 7.488909700578865e-05, "loss": 0.037, "step": 17640 }, { "epoch": 12.93040293040293, "grad_norm": 0.431640625, "learning_rate": 7.485927517024469e-05, "loss": 0.0346, "step": 17650 }, { "epoch": 12.937728937728938, "grad_norm": 0.42578125, "learning_rate": 7.48294416633728e-05, "loss": 0.0386, "step": 17660 }, { "epoch": 12.945054945054945, "grad_norm": 0.53125, "learning_rate": 7.47995964994672e-05, "loss": 0.0472, "step": 17670 }, { "epoch": 12.952380952380953, "grad_norm": 0.54296875, "learning_rate": 7.476973969282765e-05, "loss": 0.0349, "step": 17680 }, { "epoch": 12.95970695970696, "grad_norm": 0.61328125, "learning_rate": 7.473987125775956e-05, "loss": 0.0315, "step": 17690 }, { "epoch": 12.967032967032967, "grad_norm": 0.5625, "learning_rate": 7.470999120857387e-05, "loss": 0.0448, "step": 17700 }, { "epoch": 12.974358974358974, "grad_norm": 0.482421875, "learning_rate": 7.468009955958708e-05, "loss": 0.0368, "step": 17710 }, { "epoch": 12.981684981684982, "grad_norm": 0.86328125, "learning_rate": 7.465019632512127e-05, "loss": 0.0372, "step": 17720 }, { "epoch": 12.989010989010989, "grad_norm": 0.86328125, "learning_rate": 7.462028151950407e-05, "loss": 0.0402, "step": 17730 }, { "epoch": 12.996336996336996, "grad_norm": 0.484375, "learning_rate": 7.459035515706862e-05, "loss": 0.0396, "step": 17740 }, { "epoch": 13.003663003663004, "grad_norm": 0.66015625, "learning_rate": 7.456041725215364e-05, "loss": 0.0338, "step": 17750 }, { "epoch": 13.010989010989011, "grad_norm": 0.85546875, "learning_rate": 7.453046781910335e-05, "loss": 0.038, "step": 17760 }, { "epoch": 13.018315018315018, "grad_norm": 0.50390625, "learning_rate": 7.450050687226753e-05, "loss": 0.0423, "step": 17770 }, { "epoch": 13.025641025641026, "grad_norm": 0.53125, "learning_rate": 7.447053442600144e-05, "loss": 0.0371, "step": 17780 }, { "epoch": 13.032967032967033, "grad_norm": 0.55078125, "learning_rate": 7.444055049466586e-05, "loss": 0.0372, "step": 17790 }, { "epoch": 13.04029304029304, "grad_norm": 0.470703125, "learning_rate": 7.441055509262706e-05, "loss": 0.04, "step": 17800 }, { "epoch": 13.047619047619047, "grad_norm": 0.56640625, "learning_rate": 7.438054823425684e-05, "loss": 0.0354, "step": 17810 }, { "epoch": 13.054945054945055, "grad_norm": 1.1171875, "learning_rate": 7.435052993393246e-05, "loss": 0.0346, "step": 17820 }, { "epoch": 13.062271062271062, "grad_norm": 0.625, "learning_rate": 7.432050020603669e-05, "loss": 0.0379, "step": 17830 }, { "epoch": 13.06959706959707, "grad_norm": 0.8515625, "learning_rate": 7.429045906495774e-05, "loss": 0.0409, "step": 17840 }, { "epoch": 13.076923076923077, "grad_norm": 0.55859375, "learning_rate": 7.426040652508932e-05, "loss": 0.0507, "step": 17850 }, { "epoch": 13.084249084249084, "grad_norm": 0.58203125, "learning_rate": 7.423034260083057e-05, "loss": 0.0329, "step": 17860 }, { "epoch": 13.091575091575091, "grad_norm": 0.478515625, "learning_rate": 7.420026730658613e-05, "loss": 0.0356, "step": 17870 }, { "epoch": 13.098901098901099, "grad_norm": 0.84375, "learning_rate": 7.417018065676603e-05, "loss": 0.0411, "step": 17880 }, { "epoch": 13.106227106227106, "grad_norm": 0.69921875, "learning_rate": 7.414008266578577e-05, "loss": 0.0343, "step": 17890 }, { "epoch": 13.113553113553113, "grad_norm": 0.66796875, "learning_rate": 7.410997334806632e-05, "loss": 0.0373, "step": 17900 }, { "epoch": 13.12087912087912, "grad_norm": 0.65234375, "learning_rate": 7.407985271803402e-05, "loss": 0.0338, "step": 17910 }, { "epoch": 13.128205128205128, "grad_norm": 0.462890625, "learning_rate": 7.404972079012066e-05, "loss": 0.038, "step": 17920 }, { "epoch": 13.135531135531135, "grad_norm": 0.578125, "learning_rate": 7.401957757876342e-05, "loss": 0.0367, "step": 17930 }, { "epoch": 13.142857142857142, "grad_norm": 0.609375, "learning_rate": 7.39894230984049e-05, "loss": 0.0379, "step": 17940 }, { "epoch": 13.15018315018315, "grad_norm": 0.53515625, "learning_rate": 7.395925736349313e-05, "loss": 0.0338, "step": 17950 }, { "epoch": 13.157509157509157, "grad_norm": 0.99609375, "learning_rate": 7.392908038848148e-05, "loss": 0.0396, "step": 17960 }, { "epoch": 13.164835164835164, "grad_norm": 0.51953125, "learning_rate": 7.389889218782872e-05, "loss": 0.0436, "step": 17970 }, { "epoch": 13.172161172161172, "grad_norm": 0.48828125, "learning_rate": 7.386869277599902e-05, "loss": 0.0356, "step": 17980 }, { "epoch": 13.179487179487179, "grad_norm": 0.515625, "learning_rate": 7.383848216746193e-05, "loss": 0.0389, "step": 17990 }, { "epoch": 13.186813186813186, "grad_norm": 0.734375, "learning_rate": 7.38082603766923e-05, "loss": 0.0404, "step": 18000 }, { "epoch": 13.194139194139193, "grad_norm": 0.5546875, "learning_rate": 7.377802741817043e-05, "loss": 0.0367, "step": 18010 }, { "epoch": 13.2014652014652, "grad_norm": 0.494140625, "learning_rate": 7.374778330638189e-05, "loss": 0.0385, "step": 18020 }, { "epoch": 13.208791208791208, "grad_norm": 0.404296875, "learning_rate": 7.371752805581762e-05, "loss": 0.0402, "step": 18030 }, { "epoch": 13.216117216117215, "grad_norm": 0.5078125, "learning_rate": 7.368726168097393e-05, "loss": 0.0387, "step": 18040 }, { "epoch": 13.223443223443223, "grad_norm": 0.56640625, "learning_rate": 7.365698419635241e-05, "loss": 0.0416, "step": 18050 }, { "epoch": 13.23076923076923, "grad_norm": 0.396484375, "learning_rate": 7.362669561646001e-05, "loss": 0.0404, "step": 18060 }, { "epoch": 13.238095238095237, "grad_norm": 0.3203125, "learning_rate": 7.359639595580898e-05, "loss": 0.0355, "step": 18070 }, { "epoch": 13.245421245421245, "grad_norm": 0.5234375, "learning_rate": 7.356608522891687e-05, "loss": 0.0359, "step": 18080 }, { "epoch": 13.252747252747252, "grad_norm": 0.470703125, "learning_rate": 7.353576345030657e-05, "loss": 0.0336, "step": 18090 }, { "epoch": 13.260073260073261, "grad_norm": 0.55078125, "learning_rate": 7.350543063450623e-05, "loss": 0.0357, "step": 18100 }, { "epoch": 13.267399267399268, "grad_norm": 0.4609375, "learning_rate": 7.347508679604927e-05, "loss": 0.0348, "step": 18110 }, { "epoch": 13.274725274725276, "grad_norm": 0.63671875, "learning_rate": 7.344473194947444e-05, "loss": 0.0358, "step": 18120 }, { "epoch": 13.282051282051283, "grad_norm": 0.49609375, "learning_rate": 7.341436610932574e-05, "loss": 0.0311, "step": 18130 }, { "epoch": 13.28937728937729, "grad_norm": 0.375, "learning_rate": 7.338398929015244e-05, "loss": 0.0399, "step": 18140 }, { "epoch": 13.296703296703297, "grad_norm": 0.5, "learning_rate": 7.335360150650908e-05, "loss": 0.0323, "step": 18150 }, { "epoch": 13.304029304029305, "grad_norm": 0.59765625, "learning_rate": 7.332320277295544e-05, "loss": 0.0401, "step": 18160 }, { "epoch": 13.311355311355312, "grad_norm": 0.53515625, "learning_rate": 7.329279310405654e-05, "loss": 0.0351, "step": 18170 }, { "epoch": 13.31868131868132, "grad_norm": 0.65625, "learning_rate": 7.326237251438267e-05, "loss": 0.0397, "step": 18180 }, { "epoch": 13.326007326007327, "grad_norm": 0.80078125, "learning_rate": 7.32319410185093e-05, "loss": 0.0374, "step": 18190 }, { "epoch": 13.333333333333334, "grad_norm": 0.53515625, "learning_rate": 7.320149863101719e-05, "loss": 0.0324, "step": 18200 }, { "epoch": 13.340659340659341, "grad_norm": 0.46875, "learning_rate": 7.317104536649227e-05, "loss": 0.0305, "step": 18210 }, { "epoch": 13.347985347985349, "grad_norm": 0.6015625, "learning_rate": 7.31405812395257e-05, "loss": 0.0356, "step": 18220 }, { "epoch": 13.355311355311356, "grad_norm": 0.466796875, "learning_rate": 7.311010626471385e-05, "loss": 0.0357, "step": 18230 }, { "epoch": 13.362637362637363, "grad_norm": 0.90234375, "learning_rate": 7.307962045665827e-05, "loss": 0.0378, "step": 18240 }, { "epoch": 13.36996336996337, "grad_norm": 0.9296875, "learning_rate": 7.304912382996573e-05, "loss": 0.0389, "step": 18250 }, { "epoch": 13.377289377289378, "grad_norm": 0.375, "learning_rate": 7.301861639924814e-05, "loss": 0.0332, "step": 18260 }, { "epoch": 13.384615384615385, "grad_norm": 0.5234375, "learning_rate": 7.298809817912261e-05, "loss": 0.0368, "step": 18270 }, { "epoch": 13.391941391941392, "grad_norm": 0.416015625, "learning_rate": 7.295756918421145e-05, "loss": 0.0336, "step": 18280 }, { "epoch": 13.3992673992674, "grad_norm": 0.7265625, "learning_rate": 7.292702942914208e-05, "loss": 0.0402, "step": 18290 }, { "epoch": 13.406593406593407, "grad_norm": 0.5703125, "learning_rate": 7.28964789285471e-05, "loss": 0.0395, "step": 18300 }, { "epoch": 13.413919413919414, "grad_norm": 0.56640625, "learning_rate": 7.286591769706428e-05, "loss": 0.0373, "step": 18310 }, { "epoch": 13.421245421245422, "grad_norm": 0.4609375, "learning_rate": 7.283534574933649e-05, "loss": 0.0305, "step": 18320 }, { "epoch": 13.428571428571429, "grad_norm": 0.69140625, "learning_rate": 7.280476310001172e-05, "loss": 0.0366, "step": 18330 }, { "epoch": 13.435897435897436, "grad_norm": 0.64453125, "learning_rate": 7.277416976374317e-05, "loss": 0.0348, "step": 18340 }, { "epoch": 13.443223443223443, "grad_norm": 0.4921875, "learning_rate": 7.274356575518911e-05, "loss": 0.0335, "step": 18350 }, { "epoch": 13.45054945054945, "grad_norm": 0.625, "learning_rate": 7.271295108901289e-05, "loss": 0.0388, "step": 18360 }, { "epoch": 13.457875457875458, "grad_norm": 0.9765625, "learning_rate": 7.268232577988304e-05, "loss": 0.0426, "step": 18370 }, { "epoch": 13.465201465201465, "grad_norm": 0.67578125, "learning_rate": 7.265168984247311e-05, "loss": 0.038, "step": 18380 }, { "epoch": 13.472527472527473, "grad_norm": 0.494140625, "learning_rate": 7.262104329146181e-05, "loss": 0.0362, "step": 18390 }, { "epoch": 13.47985347985348, "grad_norm": 0.451171875, "learning_rate": 7.25903861415329e-05, "loss": 0.0405, "step": 18400 }, { "epoch": 13.487179487179487, "grad_norm": 0.4375, "learning_rate": 7.255971840737524e-05, "loss": 0.0381, "step": 18410 }, { "epoch": 13.494505494505495, "grad_norm": 0.412109375, "learning_rate": 7.252904010368273e-05, "loss": 0.0361, "step": 18420 }, { "epoch": 13.501831501831502, "grad_norm": 0.59765625, "learning_rate": 7.249835124515435e-05, "loss": 0.0377, "step": 18430 }, { "epoch": 13.50915750915751, "grad_norm": 0.7890625, "learning_rate": 7.246765184649415e-05, "loss": 0.0366, "step": 18440 }, { "epoch": 13.516483516483516, "grad_norm": 0.431640625, "learning_rate": 7.243694192241121e-05, "loss": 0.0361, "step": 18450 }, { "epoch": 13.523809523809524, "grad_norm": 0.56640625, "learning_rate": 7.240622148761969e-05, "loss": 0.0393, "step": 18460 }, { "epoch": 13.531135531135531, "grad_norm": 0.5546875, "learning_rate": 7.23754905568387e-05, "loss": 0.0327, "step": 18470 }, { "epoch": 13.538461538461538, "grad_norm": 1.015625, "learning_rate": 7.234474914479252e-05, "loss": 0.0333, "step": 18480 }, { "epoch": 13.545787545787546, "grad_norm": 0.5703125, "learning_rate": 7.231399726621028e-05, "loss": 0.0358, "step": 18490 }, { "epoch": 13.553113553113553, "grad_norm": 0.8671875, "learning_rate": 7.228323493582629e-05, "loss": 0.0387, "step": 18500 }, { "epoch": 13.56043956043956, "grad_norm": 0.65625, "learning_rate": 7.225246216837974e-05, "loss": 0.0403, "step": 18510 }, { "epoch": 13.567765567765568, "grad_norm": 0.408203125, "learning_rate": 7.222167897861488e-05, "loss": 0.0377, "step": 18520 }, { "epoch": 13.575091575091575, "grad_norm": 0.58203125, "learning_rate": 7.219088538128097e-05, "loss": 0.038, "step": 18530 }, { "epoch": 13.582417582417582, "grad_norm": 0.6875, "learning_rate": 7.216008139113221e-05, "loss": 0.047, "step": 18540 }, { "epoch": 13.58974358974359, "grad_norm": 0.435546875, "learning_rate": 7.212926702292783e-05, "loss": 0.0343, "step": 18550 }, { "epoch": 13.597069597069597, "grad_norm": 0.48828125, "learning_rate": 7.209844229143198e-05, "loss": 0.0389, "step": 18560 }, { "epoch": 13.604395604395604, "grad_norm": 0.466796875, "learning_rate": 7.20676072114138e-05, "loss": 0.0378, "step": 18570 }, { "epoch": 13.611721611721611, "grad_norm": 0.470703125, "learning_rate": 7.203676179764738e-05, "loss": 0.0345, "step": 18580 }, { "epoch": 13.619047619047619, "grad_norm": 0.828125, "learning_rate": 7.200590606491182e-05, "loss": 0.0407, "step": 18590 }, { "epoch": 13.626373626373626, "grad_norm": 0.77734375, "learning_rate": 7.197504002799104e-05, "loss": 0.0381, "step": 18600 }, { "epoch": 13.633699633699633, "grad_norm": 0.69921875, "learning_rate": 7.194416370167404e-05, "loss": 0.0379, "step": 18610 }, { "epoch": 13.64102564102564, "grad_norm": 0.5234375, "learning_rate": 7.191327710075464e-05, "loss": 0.0356, "step": 18620 }, { "epoch": 13.648351648351648, "grad_norm": 0.5234375, "learning_rate": 7.188238024003164e-05, "loss": 0.0353, "step": 18630 }, { "epoch": 13.655677655677655, "grad_norm": 0.435546875, "learning_rate": 7.185147313430875e-05, "loss": 0.0333, "step": 18640 }, { "epoch": 13.663003663003662, "grad_norm": 0.3984375, "learning_rate": 7.182055579839453e-05, "loss": 0.0336, "step": 18650 }, { "epoch": 13.67032967032967, "grad_norm": 0.70703125, "learning_rate": 7.178962824710253e-05, "loss": 0.0344, "step": 18660 }, { "epoch": 13.677655677655677, "grad_norm": 0.578125, "learning_rate": 7.175869049525115e-05, "loss": 0.0455, "step": 18670 }, { "epoch": 13.684981684981684, "grad_norm": 0.390625, "learning_rate": 7.17277425576637e-05, "loss": 0.0369, "step": 18680 }, { "epoch": 13.692307692307692, "grad_norm": 0.50390625, "learning_rate": 7.169678444916831e-05, "loss": 0.0361, "step": 18690 }, { "epoch": 13.699633699633699, "grad_norm": 0.455078125, "learning_rate": 7.166581618459806e-05, "loss": 0.0393, "step": 18700 }, { "epoch": 13.706959706959706, "grad_norm": 0.466796875, "learning_rate": 7.163483777879082e-05, "loss": 0.037, "step": 18710 }, { "epoch": 13.714285714285714, "grad_norm": 0.71484375, "learning_rate": 7.160384924658942e-05, "loss": 0.0337, "step": 18720 }, { "epoch": 13.72161172161172, "grad_norm": 0.41796875, "learning_rate": 7.157285060284144e-05, "loss": 0.0364, "step": 18730 }, { "epoch": 13.728937728937728, "grad_norm": 0.77734375, "learning_rate": 7.154184186239936e-05, "loss": 0.0358, "step": 18740 }, { "epoch": 13.736263736263737, "grad_norm": 0.484375, "learning_rate": 7.151082304012048e-05, "loss": 0.033, "step": 18750 }, { "epoch": 13.743589743589745, "grad_norm": 0.396484375, "learning_rate": 7.147979415086695e-05, "loss": 0.034, "step": 18760 }, { "epoch": 13.750915750915752, "grad_norm": 0.671875, "learning_rate": 7.144875520950571e-05, "loss": 0.0353, "step": 18770 }, { "epoch": 13.758241758241759, "grad_norm": 0.6953125, "learning_rate": 7.141770623090855e-05, "loss": 0.0364, "step": 18780 }, { "epoch": 13.765567765567766, "grad_norm": 0.71875, "learning_rate": 7.138664722995204e-05, "loss": 0.0388, "step": 18790 }, { "epoch": 13.772893772893774, "grad_norm": 0.5625, "learning_rate": 7.135557822151757e-05, "loss": 0.0341, "step": 18800 }, { "epoch": 13.780219780219781, "grad_norm": 0.396484375, "learning_rate": 7.132449922049133e-05, "loss": 0.0383, "step": 18810 }, { "epoch": 13.787545787545788, "grad_norm": 0.5859375, "learning_rate": 7.129341024176429e-05, "loss": 0.0383, "step": 18820 }, { "epoch": 13.794871794871796, "grad_norm": 0.67578125, "learning_rate": 7.126231130023218e-05, "loss": 0.0346, "step": 18830 }, { "epoch": 13.802197802197803, "grad_norm": 0.47265625, "learning_rate": 7.123120241079555e-05, "loss": 0.0341, "step": 18840 }, { "epoch": 13.80952380952381, "grad_norm": 0.486328125, "learning_rate": 7.120008358835967e-05, "loss": 0.0345, "step": 18850 }, { "epoch": 13.816849816849818, "grad_norm": 0.5625, "learning_rate": 7.116895484783461e-05, "loss": 0.0319, "step": 18860 }, { "epoch": 13.824175824175825, "grad_norm": 0.408203125, "learning_rate": 7.113781620413514e-05, "loss": 0.0344, "step": 18870 }, { "epoch": 13.831501831501832, "grad_norm": 0.5078125, "learning_rate": 7.110666767218084e-05, "loss": 0.0386, "step": 18880 }, { "epoch": 13.83882783882784, "grad_norm": 0.462890625, "learning_rate": 7.107550926689595e-05, "loss": 0.0325, "step": 18890 }, { "epoch": 13.846153846153847, "grad_norm": 0.53515625, "learning_rate": 7.104434100320952e-05, "loss": 0.0362, "step": 18900 }, { "epoch": 13.853479853479854, "grad_norm": 0.80078125, "learning_rate": 7.10131628960553e-05, "loss": 0.0353, "step": 18910 }, { "epoch": 13.860805860805861, "grad_norm": 0.78125, "learning_rate": 7.098197496037169e-05, "loss": 0.0414, "step": 18920 }, { "epoch": 13.868131868131869, "grad_norm": 0.41015625, "learning_rate": 7.095077721110189e-05, "loss": 0.0383, "step": 18930 }, { "epoch": 13.875457875457876, "grad_norm": 0.416015625, "learning_rate": 7.091956966319379e-05, "loss": 0.0345, "step": 18940 }, { "epoch": 13.882783882783883, "grad_norm": 0.48828125, "learning_rate": 7.088835233159988e-05, "loss": 0.0327, "step": 18950 }, { "epoch": 13.89010989010989, "grad_norm": 0.6328125, "learning_rate": 7.085712523127749e-05, "loss": 0.0359, "step": 18960 }, { "epoch": 13.897435897435898, "grad_norm": 0.52734375, "learning_rate": 7.082588837718846e-05, "loss": 0.0357, "step": 18970 }, { "epoch": 13.904761904761905, "grad_norm": 0.416015625, "learning_rate": 7.079464178429946e-05, "loss": 0.0347, "step": 18980 }, { "epoch": 13.912087912087912, "grad_norm": 0.65234375, "learning_rate": 7.076338546758175e-05, "loss": 0.0309, "step": 18990 }, { "epoch": 13.91941391941392, "grad_norm": 0.42578125, "learning_rate": 7.073211944201123e-05, "loss": 0.0327, "step": 19000 }, { "epoch": 13.926739926739927, "grad_norm": 0.609375, "learning_rate": 7.070084372256849e-05, "loss": 0.034, "step": 19010 }, { "epoch": 13.934065934065934, "grad_norm": 0.52734375, "learning_rate": 7.066955832423875e-05, "loss": 0.0375, "step": 19020 }, { "epoch": 13.941391941391942, "grad_norm": 0.625, "learning_rate": 7.063826326201188e-05, "loss": 0.0425, "step": 19030 }, { "epoch": 13.948717948717949, "grad_norm": 0.57421875, "learning_rate": 7.060695855088238e-05, "loss": 0.0328, "step": 19040 }, { "epoch": 13.956043956043956, "grad_norm": 0.390625, "learning_rate": 7.057564420584936e-05, "loss": 0.0336, "step": 19050 }, { "epoch": 13.963369963369964, "grad_norm": 0.609375, "learning_rate": 7.054432024191649e-05, "loss": 0.0328, "step": 19060 }, { "epoch": 13.97069597069597, "grad_norm": 0.54296875, "learning_rate": 7.05129866740922e-05, "loss": 0.0362, "step": 19070 }, { "epoch": 13.978021978021978, "grad_norm": 0.734375, "learning_rate": 7.048164351738939e-05, "loss": 0.0365, "step": 19080 }, { "epoch": 13.985347985347985, "grad_norm": 0.416015625, "learning_rate": 7.045029078682559e-05, "loss": 0.0326, "step": 19090 }, { "epoch": 13.992673992673993, "grad_norm": 0.65234375, "learning_rate": 7.041892849742292e-05, "loss": 0.0362, "step": 19100 }, { "epoch": 14.0, "grad_norm": 0.427734375, "learning_rate": 7.038755666420809e-05, "loss": 0.0329, "step": 19110 }, { "epoch": 14.007326007326007, "grad_norm": 0.392578125, "learning_rate": 7.035617530221236e-05, "loss": 0.0329, "step": 19120 }, { "epoch": 14.014652014652015, "grad_norm": 0.56640625, "learning_rate": 7.032478442647161e-05, "loss": 0.0363, "step": 19130 }, { "epoch": 14.021978021978022, "grad_norm": 0.50390625, "learning_rate": 7.02933840520262e-05, "loss": 0.0366, "step": 19140 }, { "epoch": 14.02930402930403, "grad_norm": 0.6015625, "learning_rate": 7.026197419392109e-05, "loss": 0.0379, "step": 19150 }, { "epoch": 14.036630036630036, "grad_norm": 0.62890625, "learning_rate": 7.023055486720578e-05, "loss": 0.0373, "step": 19160 }, { "epoch": 14.043956043956044, "grad_norm": 0.80859375, "learning_rate": 7.019912608693429e-05, "loss": 0.0376, "step": 19170 }, { "epoch": 14.051282051282051, "grad_norm": 0.46484375, "learning_rate": 7.016768786816517e-05, "loss": 0.032, "step": 19180 }, { "epoch": 14.058608058608058, "grad_norm": 0.44140625, "learning_rate": 7.013624022596155e-05, "loss": 0.0333, "step": 19190 }, { "epoch": 14.065934065934066, "grad_norm": 0.56640625, "learning_rate": 7.010478317539096e-05, "loss": 0.0365, "step": 19200 }, { "epoch": 14.073260073260073, "grad_norm": 0.6015625, "learning_rate": 7.007331673152557e-05, "loss": 0.034, "step": 19210 }, { "epoch": 14.08058608058608, "grad_norm": 0.474609375, "learning_rate": 7.004184090944194e-05, "loss": 0.0302, "step": 19220 }, { "epoch": 14.087912087912088, "grad_norm": 0.62890625, "learning_rate": 7.001035572422117e-05, "loss": 0.0375, "step": 19230 }, { "epoch": 14.095238095238095, "grad_norm": 0.6328125, "learning_rate": 6.997886119094888e-05, "loss": 0.0342, "step": 19240 }, { "epoch": 14.102564102564102, "grad_norm": 0.52734375, "learning_rate": 6.994735732471512e-05, "loss": 0.038, "step": 19250 }, { "epoch": 14.10989010989011, "grad_norm": 0.38671875, "learning_rate": 6.99158441406144e-05, "loss": 0.0353, "step": 19260 }, { "epoch": 14.117216117216117, "grad_norm": 0.451171875, "learning_rate": 6.988432165374575e-05, "loss": 0.0334, "step": 19270 }, { "epoch": 14.124542124542124, "grad_norm": 0.87890625, "learning_rate": 6.98527898792126e-05, "loss": 0.038, "step": 19280 }, { "epoch": 14.131868131868131, "grad_norm": 0.65625, "learning_rate": 6.982124883212288e-05, "loss": 0.0365, "step": 19290 }, { "epoch": 14.139194139194139, "grad_norm": 0.4140625, "learning_rate": 6.978969852758894e-05, "loss": 0.0332, "step": 19300 }, { "epoch": 14.146520146520146, "grad_norm": 0.5, "learning_rate": 6.975813898072757e-05, "loss": 0.0337, "step": 19310 }, { "epoch": 14.153846153846153, "grad_norm": 0.423828125, "learning_rate": 6.972657020665997e-05, "loss": 0.0315, "step": 19320 }, { "epoch": 14.16117216117216, "grad_norm": 0.4140625, "learning_rate": 6.96949922205118e-05, "loss": 0.0352, "step": 19330 }, { "epoch": 14.168498168498168, "grad_norm": 0.470703125, "learning_rate": 6.966340503741305e-05, "loss": 0.0362, "step": 19340 }, { "epoch": 14.175824175824175, "grad_norm": 0.70703125, "learning_rate": 6.963180867249824e-05, "loss": 0.0375, "step": 19350 }, { "epoch": 14.183150183150182, "grad_norm": 0.458984375, "learning_rate": 6.96002031409062e-05, "loss": 0.0332, "step": 19360 }, { "epoch": 14.19047619047619, "grad_norm": 0.52734375, "learning_rate": 6.956858845778017e-05, "loss": 0.0358, "step": 19370 }, { "epoch": 14.197802197802197, "grad_norm": 0.458984375, "learning_rate": 6.953696463826779e-05, "loss": 0.0346, "step": 19380 }, { "epoch": 14.205128205128204, "grad_norm": 0.703125, "learning_rate": 6.950533169752109e-05, "loss": 0.0365, "step": 19390 }, { "epoch": 14.212454212454212, "grad_norm": 0.390625, "learning_rate": 6.94736896506964e-05, "loss": 0.0344, "step": 19400 }, { "epoch": 14.219780219780219, "grad_norm": 0.8515625, "learning_rate": 6.944203851295452e-05, "loss": 0.0409, "step": 19410 }, { "epoch": 14.227106227106226, "grad_norm": 0.70703125, "learning_rate": 6.94103782994605e-05, "loss": 0.0332, "step": 19420 }, { "epoch": 14.234432234432234, "grad_norm": 0.482421875, "learning_rate": 6.937870902538381e-05, "loss": 0.0324, "step": 19430 }, { "epoch": 14.241758241758241, "grad_norm": 0.67578125, "learning_rate": 6.934703070589824e-05, "loss": 0.0364, "step": 19440 }, { "epoch": 14.249084249084248, "grad_norm": 0.55859375, "learning_rate": 6.931534335618193e-05, "loss": 0.0349, "step": 19450 }, { "epoch": 14.256410256410255, "grad_norm": 0.39453125, "learning_rate": 6.928364699141729e-05, "loss": 0.0319, "step": 19460 }, { "epoch": 14.263736263736265, "grad_norm": 0.42578125, "learning_rate": 6.92519416267911e-05, "loss": 0.0383, "step": 19470 }, { "epoch": 14.271062271062272, "grad_norm": 0.546875, "learning_rate": 6.922022727749444e-05, "loss": 0.0422, "step": 19480 }, { "epoch": 14.27838827838828, "grad_norm": 0.62109375, "learning_rate": 6.918850395872272e-05, "loss": 0.0434, "step": 19490 }, { "epoch": 14.285714285714286, "grad_norm": 0.8671875, "learning_rate": 6.915677168567558e-05, "loss": 0.0375, "step": 19500 }, { "epoch": 14.293040293040294, "grad_norm": 0.5, "learning_rate": 6.912503047355703e-05, "loss": 0.0375, "step": 19510 }, { "epoch": 14.300366300366301, "grad_norm": 0.47265625, "learning_rate": 6.909328033757532e-05, "loss": 0.0353, "step": 19520 }, { "epoch": 14.307692307692308, "grad_norm": 0.50390625, "learning_rate": 6.906152129294297e-05, "loss": 0.0375, "step": 19530 }, { "epoch": 14.315018315018316, "grad_norm": 0.37890625, "learning_rate": 6.902975335487678e-05, "loss": 0.0323, "step": 19540 }, { "epoch": 14.322344322344323, "grad_norm": 0.484375, "learning_rate": 6.899797653859782e-05, "loss": 0.0379, "step": 19550 }, { "epoch": 14.32967032967033, "grad_norm": 0.734375, "learning_rate": 6.89661908593314e-05, "loss": 0.0378, "step": 19560 }, { "epoch": 14.336996336996338, "grad_norm": 0.494140625, "learning_rate": 6.893439633230709e-05, "loss": 0.0397, "step": 19570 }, { "epoch": 14.344322344322345, "grad_norm": 0.369140625, "learning_rate": 6.890259297275869e-05, "loss": 0.0365, "step": 19580 }, { "epoch": 14.351648351648352, "grad_norm": 0.455078125, "learning_rate": 6.887078079592422e-05, "loss": 0.0322, "step": 19590 }, { "epoch": 14.35897435897436, "grad_norm": 0.41015625, "learning_rate": 6.883895981704593e-05, "loss": 0.0343, "step": 19600 }, { "epoch": 14.366300366300367, "grad_norm": 0.41015625, "learning_rate": 6.880713005137033e-05, "loss": 0.0315, "step": 19610 }, { "epoch": 14.373626373626374, "grad_norm": 0.64453125, "learning_rate": 6.877529151414807e-05, "loss": 0.0341, "step": 19620 }, { "epoch": 14.380952380952381, "grad_norm": 0.46484375, "learning_rate": 6.874344422063406e-05, "loss": 0.033, "step": 19630 }, { "epoch": 14.388278388278389, "grad_norm": 0.37890625, "learning_rate": 6.87115881860874e-05, "loss": 0.0307, "step": 19640 }, { "epoch": 14.395604395604396, "grad_norm": 0.373046875, "learning_rate": 6.86797234257713e-05, "loss": 0.0303, "step": 19650 }, { "epoch": 14.402930402930403, "grad_norm": 0.482421875, "learning_rate": 6.864784995495327e-05, "loss": 0.038, "step": 19660 }, { "epoch": 14.41025641025641, "grad_norm": 0.54296875, "learning_rate": 6.86159677889049e-05, "loss": 0.0364, "step": 19670 }, { "epoch": 14.417582417582418, "grad_norm": 0.3828125, "learning_rate": 6.858407694290202e-05, "loss": 0.0349, "step": 19680 }, { "epoch": 14.424908424908425, "grad_norm": 0.45703125, "learning_rate": 6.855217743222454e-05, "loss": 0.0341, "step": 19690 }, { "epoch": 14.432234432234432, "grad_norm": 0.8671875, "learning_rate": 6.852026927215659e-05, "loss": 0.0407, "step": 19700 }, { "epoch": 14.43956043956044, "grad_norm": 0.42578125, "learning_rate": 6.84883524779864e-05, "loss": 0.0331, "step": 19710 }, { "epoch": 14.446886446886447, "grad_norm": 0.447265625, "learning_rate": 6.845642706500636e-05, "loss": 0.0359, "step": 19720 }, { "epoch": 14.454212454212454, "grad_norm": 0.412109375, "learning_rate": 6.842449304851301e-05, "loss": 0.037, "step": 19730 }, { "epoch": 14.461538461538462, "grad_norm": 0.73828125, "learning_rate": 6.839255044380691e-05, "loss": 0.0352, "step": 19740 }, { "epoch": 14.468864468864469, "grad_norm": 0.380859375, "learning_rate": 6.836059926619288e-05, "loss": 0.0383, "step": 19750 }, { "epoch": 14.476190476190476, "grad_norm": 0.6171875, "learning_rate": 6.832863953097977e-05, "loss": 0.0357, "step": 19760 }, { "epoch": 14.483516483516484, "grad_norm": 0.640625, "learning_rate": 6.82966712534805e-05, "loss": 0.0345, "step": 19770 }, { "epoch": 14.49084249084249, "grad_norm": 0.5625, "learning_rate": 6.826469444901214e-05, "loss": 0.0354, "step": 19780 }, { "epoch": 14.498168498168498, "grad_norm": 0.71875, "learning_rate": 6.823270913289583e-05, "loss": 0.0365, "step": 19790 }, { "epoch": 14.505494505494505, "grad_norm": 0.61328125, "learning_rate": 6.82007153204568e-05, "loss": 0.0366, "step": 19800 }, { "epoch": 14.512820512820513, "grad_norm": 0.49609375, "learning_rate": 6.816871302702431e-05, "loss": 0.0353, "step": 19810 }, { "epoch": 14.52014652014652, "grad_norm": 0.9453125, "learning_rate": 6.813670226793173e-05, "loss": 0.0341, "step": 19820 }, { "epoch": 14.527472527472527, "grad_norm": 0.447265625, "learning_rate": 6.810468305851644e-05, "loss": 0.0399, "step": 19830 }, { "epoch": 14.534798534798535, "grad_norm": 0.375, "learning_rate": 6.807265541411989e-05, "loss": 0.0421, "step": 19840 }, { "epoch": 14.542124542124542, "grad_norm": 1.0546875, "learning_rate": 6.80406193500876e-05, "loss": 0.0416, "step": 19850 }, { "epoch": 14.54945054945055, "grad_norm": 0.625, "learning_rate": 6.800857488176912e-05, "loss": 0.0403, "step": 19860 }, { "epoch": 14.556776556776557, "grad_norm": 0.462890625, "learning_rate": 6.797652202451795e-05, "loss": 0.0367, "step": 19870 }, { "epoch": 14.564102564102564, "grad_norm": 0.58203125, "learning_rate": 6.79444607936917e-05, "loss": 0.033, "step": 19880 }, { "epoch": 14.571428571428571, "grad_norm": 0.42578125, "learning_rate": 6.791239120465193e-05, "loss": 0.0374, "step": 19890 }, { "epoch": 14.578754578754578, "grad_norm": 0.703125, "learning_rate": 6.788031327276429e-05, "loss": 0.0358, "step": 19900 }, { "epoch": 14.586080586080586, "grad_norm": 0.5, "learning_rate": 6.784822701339832e-05, "loss": 0.0327, "step": 19910 }, { "epoch": 14.593406593406593, "grad_norm": 0.36328125, "learning_rate": 6.781613244192759e-05, "loss": 0.0343, "step": 19920 }, { "epoch": 14.6007326007326, "grad_norm": 0.515625, "learning_rate": 6.77840295737297e-05, "loss": 0.035, "step": 19930 }, { "epoch": 14.608058608058608, "grad_norm": 0.578125, "learning_rate": 6.775191842418617e-05, "loss": 0.0344, "step": 19940 }, { "epoch": 14.615384615384615, "grad_norm": 0.453125, "learning_rate": 6.77197990086825e-05, "loss": 0.0373, "step": 19950 }, { "epoch": 14.622710622710622, "grad_norm": 0.4453125, "learning_rate": 6.768767134260818e-05, "loss": 0.0349, "step": 19960 }, { "epoch": 14.63003663003663, "grad_norm": 0.6171875, "learning_rate": 6.765553544135659e-05, "loss": 0.0374, "step": 19970 }, { "epoch": 14.637362637362637, "grad_norm": 0.490234375, "learning_rate": 6.762339132032512e-05, "loss": 0.0332, "step": 19980 }, { "epoch": 14.644688644688644, "grad_norm": 0.77734375, "learning_rate": 6.759123899491507e-05, "loss": 0.0394, "step": 19990 }, { "epoch": 14.652014652014651, "grad_norm": 0.5078125, "learning_rate": 6.755907848053168e-05, "loss": 0.0331, "step": 20000 }, { "epoch": 14.659340659340659, "grad_norm": 0.703125, "learning_rate": 6.752690979258409e-05, "loss": 0.0346, "step": 20010 }, { "epoch": 14.666666666666666, "grad_norm": 0.62890625, "learning_rate": 6.749473294648538e-05, "loss": 0.0345, "step": 20020 }, { "epoch": 14.673992673992673, "grad_norm": 0.39453125, "learning_rate": 6.746254795765255e-05, "loss": 0.0345, "step": 20030 }, { "epoch": 14.68131868131868, "grad_norm": 0.515625, "learning_rate": 6.743035484150648e-05, "loss": 0.0361, "step": 20040 }, { "epoch": 14.688644688644688, "grad_norm": 0.462890625, "learning_rate": 6.73981536134719e-05, "loss": 0.0337, "step": 20050 }, { "epoch": 14.695970695970695, "grad_norm": 0.453125, "learning_rate": 6.736594428897754e-05, "loss": 0.0383, "step": 20060 }, { "epoch": 14.703296703296703, "grad_norm": 0.515625, "learning_rate": 6.733372688345592e-05, "loss": 0.0371, "step": 20070 }, { "epoch": 14.71062271062271, "grad_norm": 0.72265625, "learning_rate": 6.730150141234346e-05, "loss": 0.0344, "step": 20080 }, { "epoch": 14.717948717948717, "grad_norm": 0.53515625, "learning_rate": 6.726926789108042e-05, "loss": 0.0322, "step": 20090 }, { "epoch": 14.725274725274724, "grad_norm": 0.61328125, "learning_rate": 6.723702633511097e-05, "loss": 0.0329, "step": 20100 }, { "epoch": 14.732600732600732, "grad_norm": 0.400390625, "learning_rate": 6.720477675988305e-05, "loss": 0.0359, "step": 20110 }, { "epoch": 14.73992673992674, "grad_norm": 0.466796875, "learning_rate": 6.717251918084853e-05, "loss": 0.0327, "step": 20120 }, { "epoch": 14.747252747252748, "grad_norm": 0.546875, "learning_rate": 6.714025361346307e-05, "loss": 0.0327, "step": 20130 }, { "epoch": 14.754578754578755, "grad_norm": 1.0703125, "learning_rate": 6.710798007318615e-05, "loss": 0.0354, "step": 20140 }, { "epoch": 14.761904761904763, "grad_norm": 0.42578125, "learning_rate": 6.707569857548109e-05, "loss": 0.0385, "step": 20150 }, { "epoch": 14.76923076923077, "grad_norm": 0.859375, "learning_rate": 6.704340913581502e-05, "loss": 0.0386, "step": 20160 }, { "epoch": 14.776556776556777, "grad_norm": 0.73046875, "learning_rate": 6.701111176965885e-05, "loss": 0.0326, "step": 20170 }, { "epoch": 14.783882783882785, "grad_norm": 0.45703125, "learning_rate": 6.697880649248732e-05, "loss": 0.0378, "step": 20180 }, { "epoch": 14.791208791208792, "grad_norm": 0.5, "learning_rate": 6.694649331977894e-05, "loss": 0.035, "step": 20190 }, { "epoch": 14.7985347985348, "grad_norm": 0.6640625, "learning_rate": 6.691417226701603e-05, "loss": 0.0402, "step": 20200 }, { "epoch": 14.805860805860807, "grad_norm": 0.55859375, "learning_rate": 6.688184334968466e-05, "loss": 0.0361, "step": 20210 }, { "epoch": 14.813186813186814, "grad_norm": 0.51171875, "learning_rate": 6.684950658327467e-05, "loss": 0.0364, "step": 20220 }, { "epoch": 14.820512820512821, "grad_norm": 0.515625, "learning_rate": 6.681716198327966e-05, "loss": 0.0331, "step": 20230 }, { "epoch": 14.827838827838828, "grad_norm": 0.91015625, "learning_rate": 6.6784809565197e-05, "loss": 0.0357, "step": 20240 }, { "epoch": 14.835164835164836, "grad_norm": 0.52734375, "learning_rate": 6.675244934452779e-05, "loss": 0.031, "step": 20250 }, { "epoch": 14.842490842490843, "grad_norm": 0.51953125, "learning_rate": 6.672008133677687e-05, "loss": 0.0363, "step": 20260 }, { "epoch": 14.84981684981685, "grad_norm": 0.4453125, "learning_rate": 6.668770555745282e-05, "loss": 0.036, "step": 20270 }, { "epoch": 14.857142857142858, "grad_norm": 0.51171875, "learning_rate": 6.665532202206794e-05, "loss": 0.0311, "step": 20280 }, { "epoch": 14.864468864468865, "grad_norm": 0.5625, "learning_rate": 6.662293074613823e-05, "loss": 0.0309, "step": 20290 }, { "epoch": 14.871794871794872, "grad_norm": 0.4921875, "learning_rate": 6.659053174518343e-05, "loss": 0.0342, "step": 20300 }, { "epoch": 14.87912087912088, "grad_norm": 0.5, "learning_rate": 6.655812503472697e-05, "loss": 0.0329, "step": 20310 }, { "epoch": 14.886446886446887, "grad_norm": 0.388671875, "learning_rate": 6.652571063029595e-05, "loss": 0.0338, "step": 20320 }, { "epoch": 14.893772893772894, "grad_norm": 0.47265625, "learning_rate": 6.649328854742116e-05, "loss": 0.035, "step": 20330 }, { "epoch": 14.901098901098901, "grad_norm": 0.90625, "learning_rate": 6.646085880163713e-05, "loss": 0.0435, "step": 20340 }, { "epoch": 14.908424908424909, "grad_norm": 0.66796875, "learning_rate": 6.642842140848198e-05, "loss": 0.035, "step": 20350 }, { "epoch": 14.915750915750916, "grad_norm": 0.765625, "learning_rate": 6.639597638349755e-05, "loss": 0.0325, "step": 20360 }, { "epoch": 14.923076923076923, "grad_norm": 0.4296875, "learning_rate": 6.636352374222929e-05, "loss": 0.0318, "step": 20370 }, { "epoch": 14.93040293040293, "grad_norm": 0.69140625, "learning_rate": 6.633106350022634e-05, "loss": 0.0305, "step": 20380 }, { "epoch": 14.937728937728938, "grad_norm": 0.478515625, "learning_rate": 6.629859567304146e-05, "loss": 0.0357, "step": 20390 }, { "epoch": 14.945054945054945, "grad_norm": 0.62890625, "learning_rate": 6.626612027623108e-05, "loss": 0.0306, "step": 20400 }, { "epoch": 14.952380952380953, "grad_norm": 0.4765625, "learning_rate": 6.62336373253552e-05, "loss": 0.0385, "step": 20410 }, { "epoch": 14.95970695970696, "grad_norm": 0.61328125, "learning_rate": 6.620114683597747e-05, "loss": 0.0337, "step": 20420 }, { "epoch": 14.967032967032967, "grad_norm": 0.357421875, "learning_rate": 6.616864882366516e-05, "loss": 0.0331, "step": 20430 }, { "epoch": 14.974358974358974, "grad_norm": 0.78515625, "learning_rate": 6.613614330398911e-05, "loss": 0.0425, "step": 20440 }, { "epoch": 14.981684981684982, "grad_norm": 0.54296875, "learning_rate": 6.610363029252381e-05, "loss": 0.033, "step": 20450 }, { "epoch": 14.989010989010989, "grad_norm": 0.427734375, "learning_rate": 6.60711098048473e-05, "loss": 0.0368, "step": 20460 }, { "epoch": 14.996336996336996, "grad_norm": 0.400390625, "learning_rate": 6.603858185654121e-05, "loss": 0.0367, "step": 20470 }, { "epoch": 15.003663003663004, "grad_norm": 0.447265625, "learning_rate": 6.600604646319073e-05, "loss": 0.0324, "step": 20480 }, { "epoch": 15.010989010989011, "grad_norm": 0.7578125, "learning_rate": 6.597350364038469e-05, "loss": 0.0325, "step": 20490 }, { "epoch": 15.018315018315018, "grad_norm": 0.4375, "learning_rate": 6.594095340371536e-05, "loss": 0.0311, "step": 20500 }, { "epoch": 15.025641025641026, "grad_norm": 0.474609375, "learning_rate": 6.590839576877868e-05, "loss": 0.0373, "step": 20510 }, { "epoch": 15.032967032967033, "grad_norm": 0.58203125, "learning_rate": 6.587583075117402e-05, "loss": 0.0351, "step": 20520 }, { "epoch": 15.04029304029304, "grad_norm": 0.427734375, "learning_rate": 6.58432583665044e-05, "loss": 0.0325, "step": 20530 }, { "epoch": 15.047619047619047, "grad_norm": 0.453125, "learning_rate": 6.58106786303763e-05, "loss": 0.0369, "step": 20540 }, { "epoch": 15.054945054945055, "grad_norm": 0.6015625, "learning_rate": 6.577809155839974e-05, "loss": 0.0352, "step": 20550 }, { "epoch": 15.062271062271062, "grad_norm": 0.66796875, "learning_rate": 6.574549716618825e-05, "loss": 0.0386, "step": 20560 }, { "epoch": 15.06959706959707, "grad_norm": 0.392578125, "learning_rate": 6.57128954693589e-05, "loss": 0.0362, "step": 20570 }, { "epoch": 15.076923076923077, "grad_norm": 0.81640625, "learning_rate": 6.56802864835322e-05, "loss": 0.0368, "step": 20580 }, { "epoch": 15.084249084249084, "grad_norm": 0.56640625, "learning_rate": 6.564767022433219e-05, "loss": 0.0326, "step": 20590 }, { "epoch": 15.091575091575091, "grad_norm": 0.7734375, "learning_rate": 6.561504670738641e-05, "loss": 0.0392, "step": 20600 }, { "epoch": 15.098901098901099, "grad_norm": 0.58203125, "learning_rate": 6.558241594832583e-05, "loss": 0.036, "step": 20610 }, { "epoch": 15.106227106227106, "grad_norm": 0.6953125, "learning_rate": 6.554977796278494e-05, "loss": 0.0316, "step": 20620 }, { "epoch": 15.113553113553113, "grad_norm": 0.69140625, "learning_rate": 6.551713276640166e-05, "loss": 0.032, "step": 20630 }, { "epoch": 15.12087912087912, "grad_norm": 0.310546875, "learning_rate": 6.548448037481737e-05, "loss": 0.0316, "step": 20640 }, { "epoch": 15.128205128205128, "grad_norm": 0.5625, "learning_rate": 6.54518208036769e-05, "loss": 0.0341, "step": 20650 }, { "epoch": 15.135531135531135, "grad_norm": 0.97265625, "learning_rate": 6.541915406862856e-05, "loss": 0.0405, "step": 20660 }, { "epoch": 15.142857142857142, "grad_norm": 0.4921875, "learning_rate": 6.538648018532403e-05, "loss": 0.0351, "step": 20670 }, { "epoch": 15.15018315018315, "grad_norm": 0.6796875, "learning_rate": 6.535379916941841e-05, "loss": 0.0332, "step": 20680 }, { "epoch": 15.157509157509157, "grad_norm": 0.55078125, "learning_rate": 6.532111103657026e-05, "loss": 0.0361, "step": 20690 }, { "epoch": 15.164835164835164, "grad_norm": 0.74609375, "learning_rate": 6.528841580244155e-05, "loss": 0.035, "step": 20700 }, { "epoch": 15.172161172161172, "grad_norm": 0.458984375, "learning_rate": 6.525571348269766e-05, "loss": 0.0317, "step": 20710 }, { "epoch": 15.179487179487179, "grad_norm": 0.51171875, "learning_rate": 6.522300409300733e-05, "loss": 0.0336, "step": 20720 }, { "epoch": 15.186813186813186, "grad_norm": 0.47265625, "learning_rate": 6.519028764904265e-05, "loss": 0.0356, "step": 20730 }, { "epoch": 15.194139194139193, "grad_norm": 0.384765625, "learning_rate": 6.515756416647921e-05, "loss": 0.0329, "step": 20740 }, { "epoch": 15.2014652014652, "grad_norm": 0.388671875, "learning_rate": 6.512483366099588e-05, "loss": 0.0325, "step": 20750 }, { "epoch": 15.208791208791208, "grad_norm": 0.52734375, "learning_rate": 6.509209614827491e-05, "loss": 0.0341, "step": 20760 }, { "epoch": 15.216117216117215, "grad_norm": 0.4375, "learning_rate": 6.505935164400192e-05, "loss": 0.0336, "step": 20770 }, { "epoch": 15.223443223443223, "grad_norm": 0.7890625, "learning_rate": 6.50266001638659e-05, "loss": 0.0398, "step": 20780 }, { "epoch": 15.23076923076923, "grad_norm": 0.40234375, "learning_rate": 6.499384172355912e-05, "loss": 0.0349, "step": 20790 }, { "epoch": 15.238095238095237, "grad_norm": 0.671875, "learning_rate": 6.496107633877726e-05, "loss": 0.0392, "step": 20800 }, { "epoch": 15.245421245421245, "grad_norm": 0.65234375, "learning_rate": 6.492830402521928e-05, "loss": 0.0342, "step": 20810 }, { "epoch": 15.252747252747252, "grad_norm": 0.455078125, "learning_rate": 6.489552479858745e-05, "loss": 0.0369, "step": 20820 }, { "epoch": 15.260073260073261, "grad_norm": 0.54296875, "learning_rate": 6.486273867458738e-05, "loss": 0.0319, "step": 20830 }, { "epoch": 15.267399267399268, "grad_norm": 0.6875, "learning_rate": 6.482994566892798e-05, "loss": 0.036, "step": 20840 }, { "epoch": 15.274725274725276, "grad_norm": 0.427734375, "learning_rate": 6.47971457973215e-05, "loss": 0.0346, "step": 20850 }, { "epoch": 15.282051282051283, "grad_norm": 0.40625, "learning_rate": 6.476433907548335e-05, "loss": 0.0331, "step": 20860 }, { "epoch": 15.28937728937729, "grad_norm": 0.49609375, "learning_rate": 6.473152551913238e-05, "loss": 0.0316, "step": 20870 }, { "epoch": 15.296703296703297, "grad_norm": 0.494140625, "learning_rate": 6.46987051439906e-05, "loss": 0.0325, "step": 20880 }, { "epoch": 15.304029304029305, "grad_norm": 0.439453125, "learning_rate": 6.466587796578335e-05, "loss": 0.0328, "step": 20890 }, { "epoch": 15.311355311355312, "grad_norm": 0.4453125, "learning_rate": 6.463304400023919e-05, "loss": 0.0344, "step": 20900 }, { "epoch": 15.31868131868132, "grad_norm": 0.46484375, "learning_rate": 6.460020326308998e-05, "loss": 0.037, "step": 20910 }, { "epoch": 15.326007326007327, "grad_norm": 0.376953125, "learning_rate": 6.456735577007075e-05, "loss": 0.0363, "step": 20920 }, { "epoch": 15.333333333333334, "grad_norm": 0.6875, "learning_rate": 6.453450153691984e-05, "loss": 0.0335, "step": 20930 }, { "epoch": 15.340659340659341, "grad_norm": 0.466796875, "learning_rate": 6.450164057937879e-05, "loss": 0.0339, "step": 20940 }, { "epoch": 15.347985347985349, "grad_norm": 0.4609375, "learning_rate": 6.446877291319237e-05, "loss": 0.0336, "step": 20950 }, { "epoch": 15.355311355311356, "grad_norm": 0.5390625, "learning_rate": 6.443589855410852e-05, "loss": 0.0373, "step": 20960 }, { "epoch": 15.362637362637363, "grad_norm": 0.3515625, "learning_rate": 6.440301751787848e-05, "loss": 0.0397, "step": 20970 }, { "epoch": 15.36996336996337, "grad_norm": 0.46484375, "learning_rate": 6.437012982025659e-05, "loss": 0.0333, "step": 20980 }, { "epoch": 15.377289377289378, "grad_norm": 0.4140625, "learning_rate": 6.433723547700045e-05, "loss": 0.0398, "step": 20990 }, { "epoch": 15.384615384615385, "grad_norm": 0.56640625, "learning_rate": 6.43043345038708e-05, "loss": 0.0399, "step": 21000 }, { "epoch": 15.391941391941392, "grad_norm": 0.4609375, "learning_rate": 6.427142691663158e-05, "loss": 0.0309, "step": 21010 }, { "epoch": 15.3992673992674, "grad_norm": 0.55078125, "learning_rate": 6.42385127310499e-05, "loss": 0.0322, "step": 21020 }, { "epoch": 15.406593406593407, "grad_norm": 0.46484375, "learning_rate": 6.420559196289603e-05, "loss": 0.0341, "step": 21030 }, { "epoch": 15.413919413919414, "grad_norm": 0.466796875, "learning_rate": 6.41726646279434e-05, "loss": 0.0337, "step": 21040 }, { "epoch": 15.421245421245422, "grad_norm": 0.515625, "learning_rate": 6.413973074196854e-05, "loss": 0.034, "step": 21050 }, { "epoch": 15.428571428571429, "grad_norm": 0.8046875, "learning_rate": 6.410679032075117e-05, "loss": 0.0364, "step": 21060 }, { "epoch": 15.435897435897436, "grad_norm": 0.76953125, "learning_rate": 6.407384338007413e-05, "loss": 0.0317, "step": 21070 }, { "epoch": 15.443223443223443, "grad_norm": 0.609375, "learning_rate": 6.404088993572342e-05, "loss": 0.0441, "step": 21080 }, { "epoch": 15.45054945054945, "grad_norm": 0.40625, "learning_rate": 6.400793000348805e-05, "loss": 0.0327, "step": 21090 }, { "epoch": 15.457875457875458, "grad_norm": 0.84765625, "learning_rate": 6.397496359916026e-05, "loss": 0.038, "step": 21100 }, { "epoch": 15.465201465201465, "grad_norm": 0.427734375, "learning_rate": 6.394199073853528e-05, "loss": 0.032, "step": 21110 }, { "epoch": 15.472527472527473, "grad_norm": 0.58203125, "learning_rate": 6.390901143741155e-05, "loss": 0.0335, "step": 21120 }, { "epoch": 15.47985347985348, "grad_norm": 0.404296875, "learning_rate": 6.387602571159049e-05, "loss": 0.0327, "step": 21130 }, { "epoch": 15.487179487179487, "grad_norm": 0.48046875, "learning_rate": 6.384303357687665e-05, "loss": 0.0353, "step": 21140 }, { "epoch": 15.494505494505495, "grad_norm": 0.62890625, "learning_rate": 6.381003504907765e-05, "loss": 0.0364, "step": 21150 }, { "epoch": 15.501831501831502, "grad_norm": 0.48828125, "learning_rate": 6.377703014400419e-05, "loss": 0.0323, "step": 21160 }, { "epoch": 15.50915750915751, "grad_norm": 0.52734375, "learning_rate": 6.374401887746999e-05, "loss": 0.0364, "step": 21170 }, { "epoch": 15.516483516483516, "grad_norm": 0.47265625, "learning_rate": 6.371100126529179e-05, "loss": 0.0342, "step": 21180 }, { "epoch": 15.523809523809524, "grad_norm": 0.4765625, "learning_rate": 6.367797732328946e-05, "loss": 0.0337, "step": 21190 }, { "epoch": 15.531135531135531, "grad_norm": 0.71875, "learning_rate": 6.364494706728584e-05, "loss": 0.0343, "step": 21200 }, { "epoch": 15.538461538461538, "grad_norm": 0.392578125, "learning_rate": 6.361191051310683e-05, "loss": 0.0336, "step": 21210 }, { "epoch": 15.545787545787546, "grad_norm": 0.37890625, "learning_rate": 6.357886767658128e-05, "loss": 0.0325, "step": 21220 }, { "epoch": 15.553113553113553, "grad_norm": 0.376953125, "learning_rate": 6.354581857354113e-05, "loss": 0.0377, "step": 21230 }, { "epoch": 15.56043956043956, "grad_norm": 0.416015625, "learning_rate": 6.351276321982128e-05, "loss": 0.0369, "step": 21240 }, { "epoch": 15.567765567765568, "grad_norm": 0.390625, "learning_rate": 6.347970163125965e-05, "loss": 0.0365, "step": 21250 }, { "epoch": 15.575091575091575, "grad_norm": 0.6953125, "learning_rate": 6.344663382369712e-05, "loss": 0.039, "step": 21260 }, { "epoch": 15.582417582417582, "grad_norm": 0.51953125, "learning_rate": 6.341355981297753e-05, "loss": 0.0398, "step": 21270 }, { "epoch": 15.58974358974359, "grad_norm": 0.447265625, "learning_rate": 6.338047961494777e-05, "loss": 0.0372, "step": 21280 }, { "epoch": 15.597069597069597, "grad_norm": 0.435546875, "learning_rate": 6.334739324545762e-05, "loss": 0.0337, "step": 21290 }, { "epoch": 15.604395604395604, "grad_norm": 0.353515625, "learning_rate": 6.331430072035988e-05, "loss": 0.0388, "step": 21300 }, { "epoch": 15.611721611721611, "grad_norm": 0.453125, "learning_rate": 6.328120205551024e-05, "loss": 0.0294, "step": 21310 }, { "epoch": 15.619047619047619, "grad_norm": 0.8359375, "learning_rate": 6.324809726676734e-05, "loss": 0.0353, "step": 21320 }, { "epoch": 15.626373626373626, "grad_norm": 0.69921875, "learning_rate": 6.32149863699928e-05, "loss": 0.0334, "step": 21330 }, { "epoch": 15.633699633699633, "grad_norm": 0.5703125, "learning_rate": 6.318186938105113e-05, "loss": 0.0341, "step": 21340 }, { "epoch": 15.64102564102564, "grad_norm": 0.4453125, "learning_rate": 6.314874631580976e-05, "loss": 0.0332, "step": 21350 }, { "epoch": 15.648351648351648, "grad_norm": 0.4765625, "learning_rate": 6.311561719013904e-05, "loss": 0.0394, "step": 21360 }, { "epoch": 15.655677655677655, "grad_norm": 0.421875, "learning_rate": 6.308248201991219e-05, "loss": 0.0366, "step": 21370 }, { "epoch": 15.663003663003662, "grad_norm": 0.46484375, "learning_rate": 6.304934082100541e-05, "loss": 0.034, "step": 21380 }, { "epoch": 15.67032967032967, "grad_norm": 0.55078125, "learning_rate": 6.301619360929772e-05, "loss": 0.0332, "step": 21390 }, { "epoch": 15.677655677655677, "grad_norm": 0.39453125, "learning_rate": 6.298304040067103e-05, "loss": 0.0346, "step": 21400 }, { "epoch": 15.684981684981684, "grad_norm": 0.640625, "learning_rate": 6.294988121101011e-05, "loss": 0.0354, "step": 21410 }, { "epoch": 15.692307692307692, "grad_norm": 0.439453125, "learning_rate": 6.291671605620265e-05, "loss": 0.0337, "step": 21420 }, { "epoch": 15.699633699633699, "grad_norm": 0.52734375, "learning_rate": 6.288354495213913e-05, "loss": 0.0357, "step": 21430 }, { "epoch": 15.706959706959706, "grad_norm": 0.6484375, "learning_rate": 6.285036791471296e-05, "loss": 0.0375, "step": 21440 }, { "epoch": 15.714285714285714, "grad_norm": 0.6953125, "learning_rate": 6.28171849598203e-05, "loss": 0.0354, "step": 21450 }, { "epoch": 15.72161172161172, "grad_norm": 0.49609375, "learning_rate": 6.278399610336021e-05, "loss": 0.0309, "step": 21460 }, { "epoch": 15.728937728937728, "grad_norm": 0.53125, "learning_rate": 6.275080136123458e-05, "loss": 0.036, "step": 21470 }, { "epoch": 15.736263736263737, "grad_norm": 0.51171875, "learning_rate": 6.271760074934808e-05, "loss": 0.0356, "step": 21480 }, { "epoch": 15.743589743589745, "grad_norm": 0.578125, "learning_rate": 6.26843942836082e-05, "loss": 0.0365, "step": 21490 }, { "epoch": 15.750915750915752, "grad_norm": 0.478515625, "learning_rate": 6.265118197992528e-05, "loss": 0.0355, "step": 21500 }, { "epoch": 15.758241758241759, "grad_norm": 0.361328125, "learning_rate": 6.261796385421239e-05, "loss": 0.0326, "step": 21510 }, { "epoch": 15.765567765567766, "grad_norm": 0.470703125, "learning_rate": 6.258473992238544e-05, "loss": 0.0397, "step": 21520 }, { "epoch": 15.772893772893774, "grad_norm": 0.392578125, "learning_rate": 6.255151020036312e-05, "loss": 0.0315, "step": 21530 }, { "epoch": 15.780219780219781, "grad_norm": 0.38671875, "learning_rate": 6.251827470406685e-05, "loss": 0.0327, "step": 21540 }, { "epoch": 15.787545787545788, "grad_norm": 0.455078125, "learning_rate": 6.248503344942087e-05, "loss": 0.0309, "step": 21550 }, { "epoch": 15.794871794871796, "grad_norm": 0.388671875, "learning_rate": 6.245178645235215e-05, "loss": 0.0305, "step": 21560 }, { "epoch": 15.802197802197803, "grad_norm": 0.51953125, "learning_rate": 6.241853372879042e-05, "loss": 0.0334, "step": 21570 }, { "epoch": 15.80952380952381, "grad_norm": 0.458984375, "learning_rate": 6.238527529466814e-05, "loss": 0.0361, "step": 21580 }, { "epoch": 15.816849816849818, "grad_norm": 0.70703125, "learning_rate": 6.235201116592053e-05, "loss": 0.0407, "step": 21590 }, { "epoch": 15.824175824175825, "grad_norm": 0.462890625, "learning_rate": 6.231874135848551e-05, "loss": 0.0329, "step": 21600 }, { "epoch": 15.831501831501832, "grad_norm": 0.75390625, "learning_rate": 6.228546588830376e-05, "loss": 0.0355, "step": 21610 }, { "epoch": 15.83882783882784, "grad_norm": 0.498046875, "learning_rate": 6.225218477131862e-05, "loss": 0.036, "step": 21620 }, { "epoch": 15.846153846153847, "grad_norm": 0.462890625, "learning_rate": 6.221889802347618e-05, "loss": 0.0343, "step": 21630 }, { "epoch": 15.853479853479854, "grad_norm": 0.71875, "learning_rate": 6.218560566072521e-05, "loss": 0.0329, "step": 21640 }, { "epoch": 15.860805860805861, "grad_norm": 0.484375, "learning_rate": 6.215230769901718e-05, "loss": 0.0316, "step": 21650 }, { "epoch": 15.868131868131869, "grad_norm": 0.4140625, "learning_rate": 6.211900415430625e-05, "loss": 0.0319, "step": 21660 }, { "epoch": 15.875457875457876, "grad_norm": 0.80078125, "learning_rate": 6.20856950425492e-05, "loss": 0.0346, "step": 21670 }, { "epoch": 15.882783882783883, "grad_norm": 0.48828125, "learning_rate": 6.205238037970554e-05, "loss": 0.0329, "step": 21680 }, { "epoch": 15.89010989010989, "grad_norm": 0.37109375, "learning_rate": 6.201906018173742e-05, "loss": 0.0333, "step": 21690 }, { "epoch": 15.897435897435898, "grad_norm": 0.53125, "learning_rate": 6.198573446460965e-05, "loss": 0.038, "step": 21700 }, { "epoch": 15.904761904761905, "grad_norm": 0.5546875, "learning_rate": 6.195240324428966e-05, "loss": 0.0339, "step": 21710 }, { "epoch": 15.912087912087912, "grad_norm": 0.478515625, "learning_rate": 6.191906653674753e-05, "loss": 0.0323, "step": 21720 }, { "epoch": 15.91941391941392, "grad_norm": 0.7421875, "learning_rate": 6.188572435795598e-05, "loss": 0.0374, "step": 21730 }, { "epoch": 15.926739926739927, "grad_norm": 0.341796875, "learning_rate": 6.185237672389034e-05, "loss": 0.0329, "step": 21740 }, { "epoch": 15.934065934065934, "grad_norm": 0.65625, "learning_rate": 6.181902365052854e-05, "loss": 0.0347, "step": 21750 }, { "epoch": 15.941391941391942, "grad_norm": 0.5390625, "learning_rate": 6.178566515385115e-05, "loss": 0.0355, "step": 21760 }, { "epoch": 15.948717948717949, "grad_norm": 0.3828125, "learning_rate": 6.175230124984133e-05, "loss": 0.0332, "step": 21770 }, { "epoch": 15.956043956043956, "grad_norm": 0.3984375, "learning_rate": 6.171893195448478e-05, "loss": 0.0343, "step": 21780 }, { "epoch": 15.963369963369964, "grad_norm": 0.5, "learning_rate": 6.168555728376986e-05, "loss": 0.0327, "step": 21790 }, { "epoch": 15.97069597069597, "grad_norm": 0.7734375, "learning_rate": 6.165217725368749e-05, "loss": 0.0348, "step": 21800 }, { "epoch": 15.978021978021978, "grad_norm": 0.71875, "learning_rate": 6.161879188023106e-05, "loss": 0.039, "step": 21810 }, { "epoch": 15.985347985347985, "grad_norm": 0.447265625, "learning_rate": 6.158540117939667e-05, "loss": 0.0324, "step": 21820 }, { "epoch": 15.992673992673993, "grad_norm": 0.54296875, "learning_rate": 6.155200516718285e-05, "loss": 0.0358, "step": 21830 }, { "epoch": 16.0, "grad_norm": 0.75390625, "learning_rate": 6.151860385959078e-05, "loss": 0.0328, "step": 21840 }, { "epoch": 16.007326007326007, "grad_norm": 0.53515625, "learning_rate": 6.148519727262407e-05, "loss": 0.036, "step": 21850 }, { "epoch": 16.014652014652015, "grad_norm": 0.59765625, "learning_rate": 6.145178542228894e-05, "loss": 0.0319, "step": 21860 }, { "epoch": 16.021978021978022, "grad_norm": 0.9140625, "learning_rate": 6.141836832459412e-05, "loss": 0.0356, "step": 21870 }, { "epoch": 16.02930402930403, "grad_norm": 0.50390625, "learning_rate": 6.13849459955508e-05, "loss": 0.034, "step": 21880 }, { "epoch": 16.036630036630036, "grad_norm": 0.431640625, "learning_rate": 6.135151845117274e-05, "loss": 0.0334, "step": 21890 }, { "epoch": 16.043956043956044, "grad_norm": 0.49609375, "learning_rate": 6.131808570747613e-05, "loss": 0.0336, "step": 21900 }, { "epoch": 16.05128205128205, "grad_norm": 0.337890625, "learning_rate": 6.128464778047976e-05, "loss": 0.0341, "step": 21910 }, { "epoch": 16.05860805860806, "grad_norm": 0.341796875, "learning_rate": 6.12512046862048e-05, "loss": 0.0316, "step": 21920 }, { "epoch": 16.065934065934066, "grad_norm": 0.6796875, "learning_rate": 6.121775644067493e-05, "loss": 0.0417, "step": 21930 }, { "epoch": 16.073260073260073, "grad_norm": 0.6875, "learning_rate": 6.118430305991633e-05, "loss": 0.038, "step": 21940 }, { "epoch": 16.08058608058608, "grad_norm": 0.51171875, "learning_rate": 6.115084455995757e-05, "loss": 0.0356, "step": 21950 }, { "epoch": 16.087912087912088, "grad_norm": 0.49609375, "learning_rate": 6.111738095682975e-05, "loss": 0.0343, "step": 21960 }, { "epoch": 16.095238095238095, "grad_norm": 0.69921875, "learning_rate": 6.108391226656635e-05, "loss": 0.0301, "step": 21970 }, { "epoch": 16.102564102564102, "grad_norm": 0.60546875, "learning_rate": 6.105043850520336e-05, "loss": 0.0412, "step": 21980 }, { "epoch": 16.10989010989011, "grad_norm": 0.330078125, "learning_rate": 6.101695968877915e-05, "loss": 0.0338, "step": 21990 }, { "epoch": 16.117216117216117, "grad_norm": 0.48828125, "learning_rate": 6.098347583333447e-05, "loss": 0.0338, "step": 22000 }, { "epoch": 16.124542124542124, "grad_norm": 0.439453125, "learning_rate": 6.0949986954912574e-05, "loss": 0.0351, "step": 22010 }, { "epoch": 16.13186813186813, "grad_norm": 0.5625, "learning_rate": 6.091649306955908e-05, "loss": 0.0368, "step": 22020 }, { "epoch": 16.13919413919414, "grad_norm": 0.609375, "learning_rate": 6.0882994193322006e-05, "loss": 0.0344, "step": 22030 }, { "epoch": 16.146520146520146, "grad_norm": 0.58984375, "learning_rate": 6.084949034225176e-05, "loss": 0.0339, "step": 22040 }, { "epoch": 16.153846153846153, "grad_norm": 0.419921875, "learning_rate": 6.081598153240111e-05, "loss": 0.0322, "step": 22050 }, { "epoch": 16.16117216117216, "grad_norm": 0.390625, "learning_rate": 6.0782467779825293e-05, "loss": 0.0326, "step": 22060 }, { "epoch": 16.168498168498168, "grad_norm": 0.515625, "learning_rate": 6.0748949100581786e-05, "loss": 0.0308, "step": 22070 }, { "epoch": 16.175824175824175, "grad_norm": 0.447265625, "learning_rate": 6.07154255107305e-05, "loss": 0.0356, "step": 22080 }, { "epoch": 16.183150183150182, "grad_norm": 0.427734375, "learning_rate": 6.06818970263337e-05, "loss": 0.0353, "step": 22090 }, { "epoch": 16.19047619047619, "grad_norm": 0.7734375, "learning_rate": 6.064836366345598e-05, "loss": 0.0358, "step": 22100 }, { "epoch": 16.197802197802197, "grad_norm": 0.447265625, "learning_rate": 6.061482543816428e-05, "loss": 0.0365, "step": 22110 }, { "epoch": 16.205128205128204, "grad_norm": 0.365234375, "learning_rate": 6.058128236652786e-05, "loss": 0.0337, "step": 22120 }, { "epoch": 16.21245421245421, "grad_norm": 0.431640625, "learning_rate": 6.0547734464618276e-05, "loss": 0.0309, "step": 22130 }, { "epoch": 16.21978021978022, "grad_norm": 0.48046875, "learning_rate": 6.051418174850947e-05, "loss": 0.0309, "step": 22140 }, { "epoch": 16.227106227106226, "grad_norm": 0.43359375, "learning_rate": 6.048062423427763e-05, "loss": 0.0347, "step": 22150 }, { "epoch": 16.234432234432234, "grad_norm": 0.59375, "learning_rate": 6.044706193800128e-05, "loss": 0.036, "step": 22160 }, { "epoch": 16.24175824175824, "grad_norm": 0.435546875, "learning_rate": 6.041349487576119e-05, "loss": 0.0298, "step": 22170 }, { "epoch": 16.249084249084248, "grad_norm": 0.70703125, "learning_rate": 6.037992306364046e-05, "loss": 0.0333, "step": 22180 }, { "epoch": 16.256410256410255, "grad_norm": 0.4375, "learning_rate": 6.034634651772443e-05, "loss": 0.031, "step": 22190 }, { "epoch": 16.263736263736263, "grad_norm": 0.453125, "learning_rate": 6.0312765254100744e-05, "loss": 0.0349, "step": 22200 }, { "epoch": 16.27106227106227, "grad_norm": 0.388671875, "learning_rate": 6.0279179288859275e-05, "loss": 0.0314, "step": 22210 }, { "epoch": 16.278388278388277, "grad_norm": 0.396484375, "learning_rate": 6.0245588638092164e-05, "loss": 0.0335, "step": 22220 }, { "epoch": 16.285714285714285, "grad_norm": 0.74609375, "learning_rate": 6.02119933178938e-05, "loss": 0.0357, "step": 22230 }, { "epoch": 16.293040293040292, "grad_norm": 0.6328125, "learning_rate": 6.0178393344360784e-05, "loss": 0.0344, "step": 22240 }, { "epoch": 16.3003663003663, "grad_norm": 0.84375, "learning_rate": 6.014478873359201e-05, "loss": 0.0346, "step": 22250 }, { "epoch": 16.307692307692307, "grad_norm": 0.384765625, "learning_rate": 6.01111795016885e-05, "loss": 0.0358, "step": 22260 }, { "epoch": 16.315018315018314, "grad_norm": 0.5546875, "learning_rate": 6.0077565664753544e-05, "loss": 0.0308, "step": 22270 }, { "epoch": 16.32234432234432, "grad_norm": 0.40234375, "learning_rate": 6.0043947238892664e-05, "loss": 0.0325, "step": 22280 }, { "epoch": 16.32967032967033, "grad_norm": 0.443359375, "learning_rate": 6.001032424021353e-05, "loss": 0.0319, "step": 22290 }, { "epoch": 16.336996336996336, "grad_norm": 0.48828125, "learning_rate": 5.997669668482603e-05, "loss": 0.0307, "step": 22300 }, { "epoch": 16.344322344322343, "grad_norm": 0.388671875, "learning_rate": 5.9943064588842235e-05, "loss": 0.0322, "step": 22310 }, { "epoch": 16.35164835164835, "grad_norm": 0.62890625, "learning_rate": 5.990942796837637e-05, "loss": 0.0385, "step": 22320 }, { "epoch": 16.358974358974358, "grad_norm": 0.39453125, "learning_rate": 5.9875786839544846e-05, "loss": 0.0359, "step": 22330 }, { "epoch": 16.366300366300365, "grad_norm": 0.408203125, "learning_rate": 5.984214121846625e-05, "loss": 0.0327, "step": 22340 }, { "epoch": 16.373626373626372, "grad_norm": 0.373046875, "learning_rate": 5.980849112126129e-05, "loss": 0.034, "step": 22350 }, { "epoch": 16.38095238095238, "grad_norm": 0.578125, "learning_rate": 5.977483656405283e-05, "loss": 0.0301, "step": 22360 }, { "epoch": 16.388278388278387, "grad_norm": 0.55859375, "learning_rate": 5.9741177562965866e-05, "loss": 0.0353, "step": 22370 }, { "epoch": 16.395604395604394, "grad_norm": 0.40234375, "learning_rate": 5.970751413412756e-05, "loss": 0.038, "step": 22380 }, { "epoch": 16.4029304029304, "grad_norm": 0.6953125, "learning_rate": 5.967384629366713e-05, "loss": 0.0334, "step": 22390 }, { "epoch": 16.41025641025641, "grad_norm": 0.470703125, "learning_rate": 5.964017405771596e-05, "loss": 0.0382, "step": 22400 }, { "epoch": 16.417582417582416, "grad_norm": 0.4921875, "learning_rate": 5.9606497442407515e-05, "loss": 0.0392, "step": 22410 }, { "epoch": 16.424908424908423, "grad_norm": 0.357421875, "learning_rate": 5.957281646387738e-05, "loss": 0.0313, "step": 22420 }, { "epoch": 16.43223443223443, "grad_norm": 0.68359375, "learning_rate": 5.9539131138263206e-05, "loss": 0.0317, "step": 22430 }, { "epoch": 16.439560439560438, "grad_norm": 0.41796875, "learning_rate": 5.950544148170474e-05, "loss": 0.0318, "step": 22440 }, { "epoch": 16.446886446886445, "grad_norm": 0.49609375, "learning_rate": 5.94717475103438e-05, "loss": 0.0413, "step": 22450 }, { "epoch": 16.454212454212453, "grad_norm": 0.365234375, "learning_rate": 5.943804924032426e-05, "loss": 0.038, "step": 22460 }, { "epoch": 16.46153846153846, "grad_norm": 0.421875, "learning_rate": 5.9404346687792104e-05, "loss": 0.0331, "step": 22470 }, { "epoch": 16.468864468864467, "grad_norm": 0.46875, "learning_rate": 5.937063986889529e-05, "loss": 0.033, "step": 22480 }, { "epoch": 16.476190476190474, "grad_norm": 0.73828125, "learning_rate": 5.9336928799783885e-05, "loss": 0.0389, "step": 22490 }, { "epoch": 16.483516483516482, "grad_norm": 0.3984375, "learning_rate": 5.930321349660997e-05, "loss": 0.0314, "step": 22500 }, { "epoch": 16.49084249084249, "grad_norm": 0.4921875, "learning_rate": 5.9269493975527635e-05, "loss": 0.032, "step": 22510 }, { "epoch": 16.498168498168496, "grad_norm": 0.640625, "learning_rate": 5.9235770252693026e-05, "loss": 0.0316, "step": 22520 }, { "epoch": 16.505494505494504, "grad_norm": 0.56640625, "learning_rate": 5.920204234426428e-05, "loss": 0.0349, "step": 22530 }, { "epoch": 16.51282051282051, "grad_norm": 0.828125, "learning_rate": 5.9168310266401526e-05, "loss": 0.0307, "step": 22540 }, { "epoch": 16.520146520146522, "grad_norm": 0.328125, "learning_rate": 5.913457403526692e-05, "loss": 0.033, "step": 22550 }, { "epoch": 16.52747252747253, "grad_norm": 0.306640625, "learning_rate": 5.9100833667024635e-05, "loss": 0.0299, "step": 22560 }, { "epoch": 16.534798534798536, "grad_norm": 0.70703125, "learning_rate": 5.906708917784074e-05, "loss": 0.0366, "step": 22570 }, { "epoch": 16.542124542124544, "grad_norm": 0.408203125, "learning_rate": 5.903334058388334e-05, "loss": 0.0291, "step": 22580 }, { "epoch": 16.54945054945055, "grad_norm": 0.51953125, "learning_rate": 5.8999587901322495e-05, "loss": 0.0327, "step": 22590 }, { "epoch": 16.55677655677656, "grad_norm": 0.3359375, "learning_rate": 5.896583114633023e-05, "loss": 0.0306, "step": 22600 }, { "epoch": 16.564102564102566, "grad_norm": 0.9609375, "learning_rate": 5.8932070335080504e-05, "loss": 0.0335, "step": 22610 }, { "epoch": 16.571428571428573, "grad_norm": 0.458984375, "learning_rate": 5.8898305483749236e-05, "loss": 0.0326, "step": 22620 }, { "epoch": 16.57875457875458, "grad_norm": 0.365234375, "learning_rate": 5.8864536608514274e-05, "loss": 0.0305, "step": 22630 }, { "epoch": 16.586080586080588, "grad_norm": 0.6328125, "learning_rate": 5.883076372555538e-05, "loss": 0.0341, "step": 22640 }, { "epoch": 16.593406593406595, "grad_norm": 0.91796875, "learning_rate": 5.879698685105428e-05, "loss": 0.0343, "step": 22650 }, { "epoch": 16.600732600732602, "grad_norm": 0.439453125, "learning_rate": 5.876320600119455e-05, "loss": 0.0357, "step": 22660 }, { "epoch": 16.60805860805861, "grad_norm": 0.369140625, "learning_rate": 5.8729421192161714e-05, "loss": 0.0344, "step": 22670 }, { "epoch": 16.615384615384617, "grad_norm": 0.52734375, "learning_rate": 5.86956324401432e-05, "loss": 0.0356, "step": 22680 }, { "epoch": 16.622710622710624, "grad_norm": 0.494140625, "learning_rate": 5.86618397613283e-05, "loss": 0.0341, "step": 22690 }, { "epoch": 16.63003663003663, "grad_norm": 0.56640625, "learning_rate": 5.862804317190816e-05, "loss": 0.0386, "step": 22700 }, { "epoch": 16.63736263736264, "grad_norm": 0.515625, "learning_rate": 5.859424268807591e-05, "loss": 0.0364, "step": 22710 }, { "epoch": 16.644688644688646, "grad_norm": 0.64453125, "learning_rate": 5.8560438326026376e-05, "loss": 0.037, "step": 22720 }, { "epoch": 16.652014652014653, "grad_norm": 0.79296875, "learning_rate": 5.852663010195641e-05, "loss": 0.0379, "step": 22730 }, { "epoch": 16.65934065934066, "grad_norm": 0.376953125, "learning_rate": 5.849281803206462e-05, "loss": 0.0369, "step": 22740 }, { "epoch": 16.666666666666668, "grad_norm": 0.4140625, "learning_rate": 5.8459002132551464e-05, "loss": 0.0333, "step": 22750 }, { "epoch": 16.673992673992675, "grad_norm": 0.7734375, "learning_rate": 5.842518241961927e-05, "loss": 0.0352, "step": 22760 }, { "epoch": 16.681318681318682, "grad_norm": 0.45703125, "learning_rate": 5.839135890947215e-05, "loss": 0.0366, "step": 22770 }, { "epoch": 16.68864468864469, "grad_norm": 0.53125, "learning_rate": 5.835753161831608e-05, "loss": 0.0319, "step": 22780 }, { "epoch": 16.695970695970697, "grad_norm": 0.62890625, "learning_rate": 5.8323700562358826e-05, "loss": 0.0372, "step": 22790 }, { "epoch": 16.703296703296704, "grad_norm": 0.71875, "learning_rate": 5.828986575780992e-05, "loss": 0.0374, "step": 22800 }, { "epoch": 16.71062271062271, "grad_norm": 0.7734375, "learning_rate": 5.825602722088076e-05, "loss": 0.037, "step": 22810 }, { "epoch": 16.71794871794872, "grad_norm": 0.609375, "learning_rate": 5.822218496778451e-05, "loss": 0.0359, "step": 22820 }, { "epoch": 16.725274725274726, "grad_norm": 0.462890625, "learning_rate": 5.818833901473606e-05, "loss": 0.0326, "step": 22830 }, { "epoch": 16.732600732600734, "grad_norm": 0.68359375, "learning_rate": 5.815448937795216e-05, "loss": 0.0355, "step": 22840 }, { "epoch": 16.73992673992674, "grad_norm": 0.373046875, "learning_rate": 5.8120636073651255e-05, "loss": 0.0326, "step": 22850 }, { "epoch": 16.747252747252748, "grad_norm": 0.5078125, "learning_rate": 5.808677911805358e-05, "loss": 0.0353, "step": 22860 }, { "epoch": 16.754578754578755, "grad_norm": 0.462890625, "learning_rate": 5.8052918527381114e-05, "loss": 0.0313, "step": 22870 }, { "epoch": 16.761904761904763, "grad_norm": 0.314453125, "learning_rate": 5.801905431785759e-05, "loss": 0.0332, "step": 22880 }, { "epoch": 16.76923076923077, "grad_norm": 0.41796875, "learning_rate": 5.7985186505708455e-05, "loss": 0.0352, "step": 22890 }, { "epoch": 16.776556776556777, "grad_norm": 0.40625, "learning_rate": 5.795131510716087e-05, "loss": 0.033, "step": 22900 }, { "epoch": 16.783882783882785, "grad_norm": 0.41796875, "learning_rate": 5.7917440138443746e-05, "loss": 0.0381, "step": 22910 }, { "epoch": 16.791208791208792, "grad_norm": 0.48828125, "learning_rate": 5.788356161578768e-05, "loss": 0.0345, "step": 22920 }, { "epoch": 16.7985347985348, "grad_norm": 0.6484375, "learning_rate": 5.784967955542504e-05, "loss": 0.0342, "step": 22930 }, { "epoch": 16.805860805860807, "grad_norm": 0.380859375, "learning_rate": 5.781579397358976e-05, "loss": 0.0308, "step": 22940 }, { "epoch": 16.813186813186814, "grad_norm": 0.5625, "learning_rate": 5.778190488651758e-05, "loss": 0.0356, "step": 22950 }, { "epoch": 16.82051282051282, "grad_norm": 0.5, "learning_rate": 5.7748012310445845e-05, "loss": 0.0336, "step": 22960 }, { "epoch": 16.82783882783883, "grad_norm": 0.412109375, "learning_rate": 5.7714116261613614e-05, "loss": 0.0304, "step": 22970 }, { "epoch": 16.835164835164836, "grad_norm": 0.51953125, "learning_rate": 5.76802167562616e-05, "loss": 0.0342, "step": 22980 }, { "epoch": 16.842490842490843, "grad_norm": 0.43359375, "learning_rate": 5.764631381063216e-05, "loss": 0.0315, "step": 22990 }, { "epoch": 16.84981684981685, "grad_norm": 0.8125, "learning_rate": 5.761240744096929e-05, "loss": 0.0338, "step": 23000 }, { "epoch": 16.857142857142858, "grad_norm": 0.55859375, "learning_rate": 5.7578497663518695e-05, "loss": 0.0356, "step": 23010 }, { "epoch": 16.864468864468865, "grad_norm": 0.609375, "learning_rate": 5.754458449452762e-05, "loss": 0.0345, "step": 23020 }, { "epoch": 16.871794871794872, "grad_norm": 0.359375, "learning_rate": 5.751066795024499e-05, "loss": 0.036, "step": 23030 }, { "epoch": 16.87912087912088, "grad_norm": 0.55078125, "learning_rate": 5.747674804692134e-05, "loss": 0.0363, "step": 23040 }, { "epoch": 16.886446886446887, "grad_norm": 0.73046875, "learning_rate": 5.744282480080878e-05, "loss": 0.0395, "step": 23050 }, { "epoch": 16.893772893772894, "grad_norm": 0.5078125, "learning_rate": 5.740889822816109e-05, "loss": 0.0359, "step": 23060 }, { "epoch": 16.9010989010989, "grad_norm": 0.6171875, "learning_rate": 5.737496834523359e-05, "loss": 0.0382, "step": 23070 }, { "epoch": 16.90842490842491, "grad_norm": 0.427734375, "learning_rate": 5.734103516828319e-05, "loss": 0.0367, "step": 23080 }, { "epoch": 16.915750915750916, "grad_norm": 0.421875, "learning_rate": 5.730709871356837e-05, "loss": 0.0351, "step": 23090 }, { "epoch": 16.923076923076923, "grad_norm": 0.703125, "learning_rate": 5.727315899734924e-05, "loss": 0.0398, "step": 23100 }, { "epoch": 16.93040293040293, "grad_norm": 0.5078125, "learning_rate": 5.723921603588742e-05, "loss": 0.0326, "step": 23110 }, { "epoch": 16.937728937728938, "grad_norm": 0.5859375, "learning_rate": 5.720526984544608e-05, "loss": 0.0344, "step": 23120 }, { "epoch": 16.945054945054945, "grad_norm": 0.6171875, "learning_rate": 5.717132044228998e-05, "loss": 0.0313, "step": 23130 }, { "epoch": 16.952380952380953, "grad_norm": 0.44140625, "learning_rate": 5.7137367842685374e-05, "loss": 0.0357, "step": 23140 }, { "epoch": 16.95970695970696, "grad_norm": 0.337890625, "learning_rate": 5.7103412062900056e-05, "loss": 0.0368, "step": 23150 }, { "epoch": 16.967032967032967, "grad_norm": 0.50390625, "learning_rate": 5.7069453119203385e-05, "loss": 0.0384, "step": 23160 }, { "epoch": 16.974358974358974, "grad_norm": 0.57421875, "learning_rate": 5.7035491027866186e-05, "loss": 0.0331, "step": 23170 }, { "epoch": 16.98168498168498, "grad_norm": 0.3984375, "learning_rate": 5.7001525805160796e-05, "loss": 0.0362, "step": 23180 }, { "epoch": 16.98901098901099, "grad_norm": 0.38671875, "learning_rate": 5.696755746736109e-05, "loss": 0.0305, "step": 23190 }, { "epoch": 16.996336996336996, "grad_norm": 0.50390625, "learning_rate": 5.693358603074244e-05, "loss": 0.0344, "step": 23200 }, { "epoch": 17.003663003663004, "grad_norm": 0.462890625, "learning_rate": 5.68996115115816e-05, "loss": 0.0396, "step": 23210 }, { "epoch": 17.01098901098901, "grad_norm": 0.50390625, "learning_rate": 5.6865633926156934e-05, "loss": 0.0338, "step": 23220 }, { "epoch": 17.01831501831502, "grad_norm": 0.3984375, "learning_rate": 5.683165329074821e-05, "loss": 0.046, "step": 23230 }, { "epoch": 17.025641025641026, "grad_norm": 0.470703125, "learning_rate": 5.679766962163666e-05, "loss": 0.0363, "step": 23240 }, { "epoch": 17.032967032967033, "grad_norm": 0.7265625, "learning_rate": 5.6763682935104964e-05, "loss": 0.0421, "step": 23250 }, { "epoch": 17.04029304029304, "grad_norm": 0.6640625, "learning_rate": 5.67296932474373e-05, "loss": 0.0391, "step": 23260 }, { "epoch": 17.047619047619047, "grad_norm": 0.443359375, "learning_rate": 5.6695700574919174e-05, "loss": 0.0313, "step": 23270 }, { "epoch": 17.054945054945055, "grad_norm": 0.7265625, "learning_rate": 5.666170493383765e-05, "loss": 0.037, "step": 23280 }, { "epoch": 17.062271062271062, "grad_norm": 0.609375, "learning_rate": 5.6627706340481125e-05, "loss": 0.0374, "step": 23290 }, { "epoch": 17.06959706959707, "grad_norm": 0.39453125, "learning_rate": 5.659370481113945e-05, "loss": 0.0308, "step": 23300 }, { "epoch": 17.076923076923077, "grad_norm": 0.51171875, "learning_rate": 5.6559700362103874e-05, "loss": 0.0333, "step": 23310 }, { "epoch": 17.084249084249084, "grad_norm": 0.4140625, "learning_rate": 5.6525693009667025e-05, "loss": 0.0321, "step": 23320 }, { "epoch": 17.09157509157509, "grad_norm": 0.376953125, "learning_rate": 5.649168277012299e-05, "loss": 0.0331, "step": 23330 }, { "epoch": 17.0989010989011, "grad_norm": 0.640625, "learning_rate": 5.645766965976715e-05, "loss": 0.0327, "step": 23340 }, { "epoch": 17.106227106227106, "grad_norm": 0.80859375, "learning_rate": 5.642365369489631e-05, "loss": 0.0344, "step": 23350 }, { "epoch": 17.113553113553113, "grad_norm": 0.416015625, "learning_rate": 5.6389634891808626e-05, "loss": 0.0282, "step": 23360 }, { "epoch": 17.12087912087912, "grad_norm": 0.396484375, "learning_rate": 5.635561326680363e-05, "loss": 0.034, "step": 23370 }, { "epoch": 17.128205128205128, "grad_norm": 0.7734375, "learning_rate": 5.632158883618222e-05, "loss": 0.0342, "step": 23380 }, { "epoch": 17.135531135531135, "grad_norm": 0.466796875, "learning_rate": 5.6287561616246596e-05, "loss": 0.0373, "step": 23390 }, { "epoch": 17.142857142857142, "grad_norm": 0.373046875, "learning_rate": 5.6253531623300305e-05, "loss": 0.0327, "step": 23400 }, { "epoch": 17.15018315018315, "grad_norm": 0.3203125, "learning_rate": 5.621949887364825e-05, "loss": 0.0352, "step": 23410 }, { "epoch": 17.157509157509157, "grad_norm": 0.431640625, "learning_rate": 5.618546338359663e-05, "loss": 0.0333, "step": 23420 }, { "epoch": 17.164835164835164, "grad_norm": 0.40625, "learning_rate": 5.615142516945299e-05, "loss": 0.032, "step": 23430 }, { "epoch": 17.17216117216117, "grad_norm": 0.3671875, "learning_rate": 5.611738424752611e-05, "loss": 0.0351, "step": 23440 }, { "epoch": 17.17948717948718, "grad_norm": 0.48046875, "learning_rate": 5.6083340634126135e-05, "loss": 0.0351, "step": 23450 }, { "epoch": 17.186813186813186, "grad_norm": 0.5, "learning_rate": 5.6049294345564495e-05, "loss": 0.0369, "step": 23460 }, { "epoch": 17.194139194139193, "grad_norm": 0.81640625, "learning_rate": 5.601524539815387e-05, "loss": 0.0346, "step": 23470 }, { "epoch": 17.2014652014652, "grad_norm": 0.546875, "learning_rate": 5.598119380820822e-05, "loss": 0.037, "step": 23480 }, { "epoch": 17.208791208791208, "grad_norm": 0.62109375, "learning_rate": 5.594713959204278e-05, "loss": 0.0353, "step": 23490 }, { "epoch": 17.216117216117215, "grad_norm": 0.3828125, "learning_rate": 5.5913082765974045e-05, "loss": 0.0323, "step": 23500 }, { "epoch": 17.223443223443223, "grad_norm": 0.486328125, "learning_rate": 5.587902334631977e-05, "loss": 0.033, "step": 23510 }, { "epoch": 17.23076923076923, "grad_norm": 0.5703125, "learning_rate": 5.584496134939893e-05, "loss": 0.0369, "step": 23520 }, { "epoch": 17.238095238095237, "grad_norm": 0.515625, "learning_rate": 5.581089679153173e-05, "loss": 0.0315, "step": 23530 }, { "epoch": 17.245421245421245, "grad_norm": 0.6171875, "learning_rate": 5.577682968903964e-05, "loss": 0.0352, "step": 23540 }, { "epoch": 17.252747252747252, "grad_norm": 0.455078125, "learning_rate": 5.5742760058245314e-05, "loss": 0.03, "step": 23550 }, { "epoch": 17.26007326007326, "grad_norm": 0.3515625, "learning_rate": 5.5708687915472656e-05, "loss": 0.0328, "step": 23560 }, { "epoch": 17.267399267399266, "grad_norm": 0.54296875, "learning_rate": 5.56746132770467e-05, "loss": 0.033, "step": 23570 }, { "epoch": 17.274725274725274, "grad_norm": 0.62109375, "learning_rate": 5.564053615929378e-05, "loss": 0.0352, "step": 23580 }, { "epoch": 17.28205128205128, "grad_norm": 0.82421875, "learning_rate": 5.560645657854129e-05, "loss": 0.0379, "step": 23590 }, { "epoch": 17.28937728937729, "grad_norm": 0.5625, "learning_rate": 5.557237455111795e-05, "loss": 0.0357, "step": 23600 }, { "epoch": 17.296703296703296, "grad_norm": 0.5234375, "learning_rate": 5.553829009335353e-05, "loss": 0.035, "step": 23610 }, { "epoch": 17.304029304029303, "grad_norm": 0.77734375, "learning_rate": 5.550420322157902e-05, "loss": 0.0363, "step": 23620 }, { "epoch": 17.31135531135531, "grad_norm": 0.44921875, "learning_rate": 5.5470113952126566e-05, "loss": 0.0317, "step": 23630 }, { "epoch": 17.318681318681318, "grad_norm": 0.984375, "learning_rate": 5.543602230132946e-05, "loss": 0.0419, "step": 23640 }, { "epoch": 17.326007326007325, "grad_norm": 0.59765625, "learning_rate": 5.540192828552214e-05, "loss": 0.0352, "step": 23650 }, { "epoch": 17.333333333333332, "grad_norm": 0.458984375, "learning_rate": 5.536783192104013e-05, "loss": 0.0359, "step": 23660 }, { "epoch": 17.34065934065934, "grad_norm": 0.58203125, "learning_rate": 5.533373322422013e-05, "loss": 0.0348, "step": 23670 }, { "epoch": 17.347985347985347, "grad_norm": 0.5390625, "learning_rate": 5.5299632211399965e-05, "loss": 0.0332, "step": 23680 }, { "epoch": 17.355311355311354, "grad_norm": 0.48828125, "learning_rate": 5.526552889891855e-05, "loss": 0.0345, "step": 23690 }, { "epoch": 17.36263736263736, "grad_norm": 0.53125, "learning_rate": 5.523142330311586e-05, "loss": 0.035, "step": 23700 }, { "epoch": 17.36996336996337, "grad_norm": 0.4609375, "learning_rate": 5.5197315440333054e-05, "loss": 0.0305, "step": 23710 }, { "epoch": 17.377289377289376, "grad_norm": 0.37890625, "learning_rate": 5.5163205326912306e-05, "loss": 0.0355, "step": 23720 }, { "epoch": 17.384615384615383, "grad_norm": 0.380859375, "learning_rate": 5.512909297919687e-05, "loss": 0.0405, "step": 23730 }, { "epoch": 17.39194139194139, "grad_norm": 0.451171875, "learning_rate": 5.509497841353113e-05, "loss": 0.0314, "step": 23740 }, { "epoch": 17.399267399267398, "grad_norm": 0.3984375, "learning_rate": 5.506086164626048e-05, "loss": 0.0346, "step": 23750 }, { "epoch": 17.406593406593405, "grad_norm": 0.44140625, "learning_rate": 5.502674269373136e-05, "loss": 0.0381, "step": 23760 }, { "epoch": 17.413919413919412, "grad_norm": 0.56640625, "learning_rate": 5.4992621572291316e-05, "loss": 0.0344, "step": 23770 }, { "epoch": 17.42124542124542, "grad_norm": 0.3671875, "learning_rate": 5.495849829828886e-05, "loss": 0.0318, "step": 23780 }, { "epoch": 17.428571428571427, "grad_norm": 0.91796875, "learning_rate": 5.4924372888073614e-05, "loss": 0.035, "step": 23790 }, { "epoch": 17.435897435897434, "grad_norm": 0.416015625, "learning_rate": 5.489024535799615e-05, "loss": 0.0339, "step": 23800 }, { "epoch": 17.44322344322344, "grad_norm": 0.95703125, "learning_rate": 5.485611572440809e-05, "loss": 0.037, "step": 23810 }, { "epoch": 17.45054945054945, "grad_norm": 0.427734375, "learning_rate": 5.482198400366206e-05, "loss": 0.0328, "step": 23820 }, { "epoch": 17.457875457875456, "grad_norm": 0.73828125, "learning_rate": 5.4787850212111715e-05, "loss": 0.0368, "step": 23830 }, { "epoch": 17.465201465201464, "grad_norm": 0.66796875, "learning_rate": 5.4753714366111644e-05, "loss": 0.0345, "step": 23840 }, { "epoch": 17.47252747252747, "grad_norm": 0.4296875, "learning_rate": 5.471957648201745e-05, "loss": 0.0408, "step": 23850 }, { "epoch": 17.479853479853478, "grad_norm": 0.45703125, "learning_rate": 5.468543657618574e-05, "loss": 0.0342, "step": 23860 }, { "epoch": 17.487179487179485, "grad_norm": 0.51953125, "learning_rate": 5.465129466497404e-05, "loss": 0.0388, "step": 23870 }, { "epoch": 17.494505494505496, "grad_norm": 1.1640625, "learning_rate": 5.461715076474086e-05, "loss": 0.0404, "step": 23880 }, { "epoch": 17.501831501831504, "grad_norm": 0.42578125, "learning_rate": 5.4583004891845666e-05, "loss": 0.0372, "step": 23890 }, { "epoch": 17.50915750915751, "grad_norm": 0.37890625, "learning_rate": 5.454885706264889e-05, "loss": 0.03, "step": 23900 }, { "epoch": 17.516483516483518, "grad_norm": 0.86328125, "learning_rate": 5.4514707293511845e-05, "loss": 0.038, "step": 23910 }, { "epoch": 17.523809523809526, "grad_norm": 0.61328125, "learning_rate": 5.4480555600796836e-05, "loss": 0.0434, "step": 23920 }, { "epoch": 17.531135531135533, "grad_norm": 0.7578125, "learning_rate": 5.4446402000867016e-05, "loss": 0.0369, "step": 23930 }, { "epoch": 17.53846153846154, "grad_norm": 0.66015625, "learning_rate": 5.441224651008652e-05, "loss": 0.0359, "step": 23940 }, { "epoch": 17.545787545787547, "grad_norm": 0.41796875, "learning_rate": 5.4378089144820364e-05, "loss": 0.0347, "step": 23950 }, { "epoch": 17.553113553113555, "grad_norm": 0.8046875, "learning_rate": 5.434392992143448e-05, "loss": 0.0339, "step": 23960 }, { "epoch": 17.560439560439562, "grad_norm": 0.50390625, "learning_rate": 5.430976885629565e-05, "loss": 0.0309, "step": 23970 }, { "epoch": 17.56776556776557, "grad_norm": 0.4140625, "learning_rate": 5.427560596577156e-05, "loss": 0.0326, "step": 23980 }, { "epoch": 17.575091575091577, "grad_norm": 0.44140625, "learning_rate": 5.4241441266230774e-05, "loss": 0.0379, "step": 23990 }, { "epoch": 17.582417582417584, "grad_norm": 0.396484375, "learning_rate": 5.420727477404273e-05, "loss": 0.0364, "step": 24000 }, { "epoch": 17.58974358974359, "grad_norm": 0.57421875, "learning_rate": 5.417310650557772e-05, "loss": 0.0356, "step": 24010 }, { "epoch": 17.5970695970696, "grad_norm": 0.7734375, "learning_rate": 5.413893647720686e-05, "loss": 0.0388, "step": 24020 }, { "epoch": 17.604395604395606, "grad_norm": 0.48046875, "learning_rate": 5.4104764705302155e-05, "loss": 0.0314, "step": 24030 }, { "epoch": 17.611721611721613, "grad_norm": 0.482421875, "learning_rate": 5.40705912062364e-05, "loss": 0.034, "step": 24040 }, { "epoch": 17.61904761904762, "grad_norm": 0.408203125, "learning_rate": 5.403641599638326e-05, "loss": 0.0352, "step": 24050 }, { "epoch": 17.626373626373628, "grad_norm": 0.392578125, "learning_rate": 5.400223909211718e-05, "loss": 0.0328, "step": 24060 }, { "epoch": 17.633699633699635, "grad_norm": 0.48828125, "learning_rate": 5.3968060509813444e-05, "loss": 0.0354, "step": 24070 }, { "epoch": 17.641025641025642, "grad_norm": 0.48828125, "learning_rate": 5.393388026584815e-05, "loss": 0.0312, "step": 24080 }, { "epoch": 17.64835164835165, "grad_norm": 0.51171875, "learning_rate": 5.389969837659815e-05, "loss": 0.0384, "step": 24090 }, { "epoch": 17.655677655677657, "grad_norm": 0.484375, "learning_rate": 5.3865514858441125e-05, "loss": 0.0286, "step": 24100 }, { "epoch": 17.663003663003664, "grad_norm": 0.4453125, "learning_rate": 5.383132972775551e-05, "loss": 0.0321, "step": 24110 }, { "epoch": 17.67032967032967, "grad_norm": 0.48828125, "learning_rate": 5.379714300092051e-05, "loss": 0.0346, "step": 24120 }, { "epoch": 17.67765567765568, "grad_norm": 0.66015625, "learning_rate": 5.376295469431613e-05, "loss": 0.0336, "step": 24130 }, { "epoch": 17.684981684981686, "grad_norm": 0.3671875, "learning_rate": 5.372876482432312e-05, "loss": 0.0323, "step": 24140 }, { "epoch": 17.692307692307693, "grad_norm": 0.7109375, "learning_rate": 5.369457340732294e-05, "loss": 0.034, "step": 24150 }, { "epoch": 17.6996336996337, "grad_norm": 0.388671875, "learning_rate": 5.3660380459697815e-05, "loss": 0.0316, "step": 24160 }, { "epoch": 17.706959706959708, "grad_norm": 0.5, "learning_rate": 5.3626185997830735e-05, "loss": 0.032, "step": 24170 }, { "epoch": 17.714285714285715, "grad_norm": 0.50390625, "learning_rate": 5.359199003810538e-05, "loss": 0.0307, "step": 24180 }, { "epoch": 17.721611721611723, "grad_norm": 0.361328125, "learning_rate": 5.355779259690614e-05, "loss": 0.0359, "step": 24190 }, { "epoch": 17.72893772893773, "grad_norm": 0.470703125, "learning_rate": 5.3523593690618154e-05, "loss": 0.0311, "step": 24200 }, { "epoch": 17.736263736263737, "grad_norm": 0.75, "learning_rate": 5.3489393335627235e-05, "loss": 0.0335, "step": 24210 }, { "epoch": 17.743589743589745, "grad_norm": 0.63671875, "learning_rate": 5.3455191548319885e-05, "loss": 0.0345, "step": 24220 }, { "epoch": 17.750915750915752, "grad_norm": 0.65234375, "learning_rate": 5.34209883450833e-05, "loss": 0.0326, "step": 24230 }, { "epoch": 17.75824175824176, "grad_norm": 0.443359375, "learning_rate": 5.338678374230538e-05, "loss": 0.0371, "step": 24240 }, { "epoch": 17.765567765567766, "grad_norm": 0.365234375, "learning_rate": 5.3352577756374643e-05, "loss": 0.0307, "step": 24250 }, { "epoch": 17.772893772893774, "grad_norm": 0.53515625, "learning_rate": 5.3318370403680316e-05, "loss": 0.0368, "step": 24260 }, { "epoch": 17.78021978021978, "grad_norm": 0.470703125, "learning_rate": 5.328416170061226e-05, "loss": 0.0334, "step": 24270 }, { "epoch": 17.78754578754579, "grad_norm": 0.8203125, "learning_rate": 5.3249951663560996e-05, "loss": 0.0359, "step": 24280 }, { "epoch": 17.794871794871796, "grad_norm": 0.431640625, "learning_rate": 5.3215740308917675e-05, "loss": 0.0322, "step": 24290 }, { "epoch": 17.802197802197803, "grad_norm": 0.62109375, "learning_rate": 5.318152765307405e-05, "loss": 0.0335, "step": 24300 }, { "epoch": 17.80952380952381, "grad_norm": 0.39453125, "learning_rate": 5.314731371242255e-05, "loss": 0.031, "step": 24310 }, { "epoch": 17.816849816849818, "grad_norm": 0.40234375, "learning_rate": 5.311309850335619e-05, "loss": 0.0401, "step": 24320 }, { "epoch": 17.824175824175825, "grad_norm": 0.40625, "learning_rate": 5.3078882042268626e-05, "loss": 0.0346, "step": 24330 }, { "epoch": 17.831501831501832, "grad_norm": 0.392578125, "learning_rate": 5.304466434555406e-05, "loss": 0.034, "step": 24340 }, { "epoch": 17.83882783882784, "grad_norm": 0.36328125, "learning_rate": 5.301044542960731e-05, "loss": 0.0355, "step": 24350 }, { "epoch": 17.846153846153847, "grad_norm": 0.56640625, "learning_rate": 5.297622531082379e-05, "loss": 0.0357, "step": 24360 }, { "epoch": 17.853479853479854, "grad_norm": 0.5078125, "learning_rate": 5.29420040055995e-05, "loss": 0.0337, "step": 24370 }, { "epoch": 17.86080586080586, "grad_norm": 0.48046875, "learning_rate": 5.290778153033094e-05, "loss": 0.0324, "step": 24380 }, { "epoch": 17.86813186813187, "grad_norm": 0.396484375, "learning_rate": 5.287355790141527e-05, "loss": 0.038, "step": 24390 }, { "epoch": 17.875457875457876, "grad_norm": 0.466796875, "learning_rate": 5.283933313525013e-05, "loss": 0.0312, "step": 24400 }, { "epoch": 17.882783882783883, "grad_norm": 0.515625, "learning_rate": 5.280510724823375e-05, "loss": 0.0334, "step": 24410 }, { "epoch": 17.89010989010989, "grad_norm": 0.95703125, "learning_rate": 5.277088025676484e-05, "loss": 0.041, "step": 24420 }, { "epoch": 17.897435897435898, "grad_norm": 0.46484375, "learning_rate": 5.273665217724271e-05, "loss": 0.0312, "step": 24430 }, { "epoch": 17.904761904761905, "grad_norm": 0.4453125, "learning_rate": 5.270242302606713e-05, "loss": 0.031, "step": 24440 }, { "epoch": 17.912087912087912, "grad_norm": 0.69921875, "learning_rate": 5.266819281963841e-05, "loss": 0.037, "step": 24450 }, { "epoch": 17.91941391941392, "grad_norm": 0.373046875, "learning_rate": 5.263396157435738e-05, "loss": 0.0341, "step": 24460 }, { "epoch": 17.926739926739927, "grad_norm": 0.466796875, "learning_rate": 5.259972930662537e-05, "loss": 0.0391, "step": 24470 }, { "epoch": 17.934065934065934, "grad_norm": 0.396484375, "learning_rate": 5.256549603284413e-05, "loss": 0.0325, "step": 24480 }, { "epoch": 17.94139194139194, "grad_norm": 0.392578125, "learning_rate": 5.2531261769415974e-05, "loss": 0.0333, "step": 24490 }, { "epoch": 17.94871794871795, "grad_norm": 0.33984375, "learning_rate": 5.249702653274366e-05, "loss": 0.0307, "step": 24500 }, { "epoch": 17.956043956043956, "grad_norm": 0.48046875, "learning_rate": 5.246279033923044e-05, "loss": 0.0313, "step": 24510 }, { "epoch": 17.963369963369964, "grad_norm": 0.3359375, "learning_rate": 5.242855320527996e-05, "loss": 0.0351, "step": 24520 }, { "epoch": 17.97069597069597, "grad_norm": 0.9453125, "learning_rate": 5.239431514729635e-05, "loss": 0.0398, "step": 24530 }, { "epoch": 17.978021978021978, "grad_norm": 0.490234375, "learning_rate": 5.2360076181684225e-05, "loss": 0.0349, "step": 24540 }, { "epoch": 17.985347985347985, "grad_norm": 0.49609375, "learning_rate": 5.232583632484855e-05, "loss": 0.0306, "step": 24550 }, { "epoch": 17.992673992673993, "grad_norm": 0.423828125, "learning_rate": 5.229159559319481e-05, "loss": 0.0318, "step": 24560 }, { "epoch": 18.0, "grad_norm": 0.515625, "learning_rate": 5.225735400312883e-05, "loss": 0.0284, "step": 24570 }, { "epoch": 18.007326007326007, "grad_norm": 0.412109375, "learning_rate": 5.2223111571056896e-05, "loss": 0.0281, "step": 24580 }, { "epoch": 18.014652014652015, "grad_norm": 0.421875, "learning_rate": 5.218886831338567e-05, "loss": 0.0363, "step": 24590 }, { "epoch": 18.021978021978022, "grad_norm": 0.353515625, "learning_rate": 5.2154624246522234e-05, "loss": 0.0399, "step": 24600 }, { "epoch": 18.02930402930403, "grad_norm": 0.50390625, "learning_rate": 5.212037938687403e-05, "loss": 0.0374, "step": 24610 }, { "epoch": 18.036630036630036, "grad_norm": 0.53125, "learning_rate": 5.2086133750848886e-05, "loss": 0.035, "step": 24620 }, { "epoch": 18.043956043956044, "grad_norm": 0.46484375, "learning_rate": 5.205188735485502e-05, "loss": 0.0345, "step": 24630 }, { "epoch": 18.05128205128205, "grad_norm": 0.357421875, "learning_rate": 5.2017640215301034e-05, "loss": 0.0291, "step": 24640 }, { "epoch": 18.05860805860806, "grad_norm": 0.57421875, "learning_rate": 5.1983392348595805e-05, "loss": 0.0325, "step": 24650 }, { "epoch": 18.065934065934066, "grad_norm": 0.53125, "learning_rate": 5.194914377114866e-05, "loss": 0.0302, "step": 24660 }, { "epoch": 18.073260073260073, "grad_norm": 0.376953125, "learning_rate": 5.191489449936916e-05, "loss": 0.0318, "step": 24670 }, { "epoch": 18.08058608058608, "grad_norm": 0.44921875, "learning_rate": 5.18806445496673e-05, "loss": 0.0358, "step": 24680 }, { "epoch": 18.087912087912088, "grad_norm": 0.6171875, "learning_rate": 5.184639393845333e-05, "loss": 0.0371, "step": 24690 }, { "epoch": 18.095238095238095, "grad_norm": 0.38671875, "learning_rate": 5.1812142682137865e-05, "loss": 0.0358, "step": 24700 }, { "epoch": 18.102564102564102, "grad_norm": 0.5625, "learning_rate": 5.177789079713177e-05, "loss": 0.0294, "step": 24710 }, { "epoch": 18.10989010989011, "grad_norm": 0.41015625, "learning_rate": 5.1743638299846276e-05, "loss": 0.0369, "step": 24720 }, { "epoch": 18.117216117216117, "grad_norm": 0.41015625, "learning_rate": 5.170938520669289e-05, "loss": 0.0322, "step": 24730 }, { "epoch": 18.124542124542124, "grad_norm": 0.369140625, "learning_rate": 5.1675131534083344e-05, "loss": 0.03, "step": 24740 }, { "epoch": 18.13186813186813, "grad_norm": 0.55078125, "learning_rate": 5.1640877298429744e-05, "loss": 0.0345, "step": 24750 }, { "epoch": 18.13919413919414, "grad_norm": 0.369140625, "learning_rate": 5.160662251614439e-05, "loss": 0.029, "step": 24760 }, { "epoch": 18.146520146520146, "grad_norm": 0.486328125, "learning_rate": 5.157236720363987e-05, "loss": 0.0325, "step": 24770 }, { "epoch": 18.153846153846153, "grad_norm": 0.51953125, "learning_rate": 5.1538111377329065e-05, "loss": 0.0351, "step": 24780 }, { "epoch": 18.16117216117216, "grad_norm": 0.486328125, "learning_rate": 5.1503855053625036e-05, "loss": 0.0359, "step": 24790 }, { "epoch": 18.168498168498168, "grad_norm": 0.51953125, "learning_rate": 5.14695982489411e-05, "loss": 0.0349, "step": 24800 }, { "epoch": 18.175824175824175, "grad_norm": 0.578125, "learning_rate": 5.143534097969084e-05, "loss": 0.0351, "step": 24810 }, { "epoch": 18.183150183150182, "grad_norm": 0.66015625, "learning_rate": 5.140108326228803e-05, "loss": 0.029, "step": 24820 }, { "epoch": 18.19047619047619, "grad_norm": 0.55078125, "learning_rate": 5.136682511314667e-05, "loss": 0.0344, "step": 24830 }, { "epoch": 18.197802197802197, "grad_norm": 0.40625, "learning_rate": 5.133256654868095e-05, "loss": 0.0325, "step": 24840 }, { "epoch": 18.205128205128204, "grad_norm": 0.447265625, "learning_rate": 5.1298307585305285e-05, "loss": 0.0364, "step": 24850 }, { "epoch": 18.21245421245421, "grad_norm": 0.466796875, "learning_rate": 5.126404823943426e-05, "loss": 0.0385, "step": 24860 }, { "epoch": 18.21978021978022, "grad_norm": 0.3984375, "learning_rate": 5.1229788527482655e-05, "loss": 0.0297, "step": 24870 }, { "epoch": 18.227106227106226, "grad_norm": 0.32421875, "learning_rate": 5.119552846586543e-05, "loss": 0.0343, "step": 24880 }, { "epoch": 18.234432234432234, "grad_norm": 0.37890625, "learning_rate": 5.116126807099769e-05, "loss": 0.0324, "step": 24890 }, { "epoch": 18.24175824175824, "grad_norm": 0.41015625, "learning_rate": 5.112700735929472e-05, "loss": 0.0307, "step": 24900 }, { "epoch": 18.249084249084248, "grad_norm": 0.330078125, "learning_rate": 5.109274634717197e-05, "loss": 0.0308, "step": 24910 }, { "epoch": 18.256410256410255, "grad_norm": 0.466796875, "learning_rate": 5.1058485051045e-05, "loss": 0.0309, "step": 24920 }, { "epoch": 18.263736263736263, "grad_norm": 0.6796875, "learning_rate": 5.102422348732949e-05, "loss": 0.0331, "step": 24930 }, { "epoch": 18.27106227106227, "grad_norm": 0.33984375, "learning_rate": 5.098996167244132e-05, "loss": 0.0396, "step": 24940 }, { "epoch": 18.278388278388277, "grad_norm": 0.47265625, "learning_rate": 5.095569962279644e-05, "loss": 0.0328, "step": 24950 }, { "epoch": 18.285714285714285, "grad_norm": 0.59375, "learning_rate": 5.092143735481093e-05, "loss": 0.0313, "step": 24960 }, { "epoch": 18.293040293040292, "grad_norm": 0.484375, "learning_rate": 5.088717488490094e-05, "loss": 0.0416, "step": 24970 }, { "epoch": 18.3003663003663, "grad_norm": 0.474609375, "learning_rate": 5.0852912229482776e-05, "loss": 0.0334, "step": 24980 }, { "epoch": 18.307692307692307, "grad_norm": 0.427734375, "learning_rate": 5.081864940497276e-05, "loss": 0.0328, "step": 24990 }, { "epoch": 18.315018315018314, "grad_norm": 0.5703125, "learning_rate": 5.078438642778738e-05, "loss": 0.0324, "step": 25000 }, { "epoch": 18.32234432234432, "grad_norm": 0.33984375, "learning_rate": 5.075012331434313e-05, "loss": 0.0367, "step": 25010 }, { "epoch": 18.32967032967033, "grad_norm": 0.4296875, "learning_rate": 5.0715860081056585e-05, "loss": 0.036, "step": 25020 }, { "epoch": 18.336996336996336, "grad_norm": 0.55078125, "learning_rate": 5.0681596744344394e-05, "loss": 0.0308, "step": 25030 }, { "epoch": 18.344322344322343, "grad_norm": 0.39453125, "learning_rate": 5.064733332062326e-05, "loss": 0.0313, "step": 25040 }, { "epoch": 18.35164835164835, "grad_norm": 0.43359375, "learning_rate": 5.06130698263099e-05, "loss": 0.0329, "step": 25050 }, { "epoch": 18.358974358974358, "grad_norm": 0.84765625, "learning_rate": 5.0578806277821054e-05, "loss": 0.0362, "step": 25060 }, { "epoch": 18.366300366300365, "grad_norm": 0.345703125, "learning_rate": 5.054454269157354e-05, "loss": 0.0308, "step": 25070 }, { "epoch": 18.373626373626372, "grad_norm": 0.55859375, "learning_rate": 5.0510279083984156e-05, "loss": 0.0305, "step": 25080 }, { "epoch": 18.38095238095238, "grad_norm": 0.3359375, "learning_rate": 5.0476015471469736e-05, "loss": 0.033, "step": 25090 }, { "epoch": 18.388278388278387, "grad_norm": 0.390625, "learning_rate": 5.044175187044711e-05, "loss": 0.0331, "step": 25100 }, { "epoch": 18.395604395604394, "grad_norm": 0.482421875, "learning_rate": 5.040748829733305e-05, "loss": 0.0377, "step": 25110 }, { "epoch": 18.4029304029304, "grad_norm": 0.462890625, "learning_rate": 5.037322476854438e-05, "loss": 0.0375, "step": 25120 }, { "epoch": 18.41025641025641, "grad_norm": 0.6015625, "learning_rate": 5.033896130049788e-05, "loss": 0.0312, "step": 25130 }, { "epoch": 18.417582417582416, "grad_norm": 0.7109375, "learning_rate": 5.030469790961032e-05, "loss": 0.0365, "step": 25140 }, { "epoch": 18.424908424908423, "grad_norm": 0.5234375, "learning_rate": 5.027043461229839e-05, "loss": 0.0398, "step": 25150 }, { "epoch": 18.43223443223443, "grad_norm": 0.55078125, "learning_rate": 5.023617142497877e-05, "loss": 0.0358, "step": 25160 }, { "epoch": 18.439560439560438, "grad_norm": 0.4140625, "learning_rate": 5.0201908364068076e-05, "loss": 0.0339, "step": 25170 }, { "epoch": 18.446886446886445, "grad_norm": 0.400390625, "learning_rate": 5.016764544598287e-05, "loss": 0.0313, "step": 25180 }, { "epoch": 18.454212454212453, "grad_norm": 0.373046875, "learning_rate": 5.0133382687139616e-05, "loss": 0.0325, "step": 25190 }, { "epoch": 18.46153846153846, "grad_norm": 0.6953125, "learning_rate": 5.0099120103954786e-05, "loss": 0.0374, "step": 25200 }, { "epoch": 18.468864468864467, "grad_norm": 0.5390625, "learning_rate": 5.006485771284464e-05, "loss": 0.0299, "step": 25210 }, { "epoch": 18.476190476190474, "grad_norm": 0.40625, "learning_rate": 5.0030595530225435e-05, "loss": 0.0306, "step": 25220 }, { "epoch": 18.483516483516482, "grad_norm": 0.3984375, "learning_rate": 4.9996333572513314e-05, "loss": 0.0316, "step": 25230 }, { "epoch": 18.49084249084249, "grad_norm": 0.5390625, "learning_rate": 4.996207185612432e-05, "loss": 0.0313, "step": 25240 }, { "epoch": 18.498168498168496, "grad_norm": 0.337890625, "learning_rate": 4.992781039747433e-05, "loss": 0.0389, "step": 25250 }, { "epoch": 18.505494505494504, "grad_norm": 0.380859375, "learning_rate": 4.989354921297915e-05, "loss": 0.0335, "step": 25260 }, { "epoch": 18.51282051282051, "grad_norm": 0.6015625, "learning_rate": 4.985928831905444e-05, "loss": 0.0368, "step": 25270 }, { "epoch": 18.520146520146522, "grad_norm": 0.421875, "learning_rate": 4.9825027732115735e-05, "loss": 0.0311, "step": 25280 }, { "epoch": 18.52747252747253, "grad_norm": 0.5, "learning_rate": 4.979076746857837e-05, "loss": 0.0296, "step": 25290 }, { "epoch": 18.534798534798536, "grad_norm": 0.58984375, "learning_rate": 4.975650754485759e-05, "loss": 0.0331, "step": 25300 }, { "epoch": 18.542124542124544, "grad_norm": 0.62890625, "learning_rate": 4.972224797736843e-05, "loss": 0.0335, "step": 25310 }, { "epoch": 18.54945054945055, "grad_norm": 0.396484375, "learning_rate": 4.968798878252579e-05, "loss": 0.0349, "step": 25320 }, { "epoch": 18.55677655677656, "grad_norm": 0.392578125, "learning_rate": 4.965372997674438e-05, "loss": 0.0352, "step": 25330 }, { "epoch": 18.564102564102566, "grad_norm": 0.375, "learning_rate": 4.9619471576438686e-05, "loss": 0.0326, "step": 25340 }, { "epoch": 18.571428571428573, "grad_norm": 0.4296875, "learning_rate": 4.958521359802305e-05, "loss": 0.0371, "step": 25350 }, { "epoch": 18.57875457875458, "grad_norm": 0.5390625, "learning_rate": 4.955095605791158e-05, "loss": 0.0316, "step": 25360 }, { "epoch": 18.586080586080588, "grad_norm": 0.470703125, "learning_rate": 4.951669897251822e-05, "loss": 0.0382, "step": 25370 }, { "epoch": 18.593406593406595, "grad_norm": 0.625, "learning_rate": 4.948244235825663e-05, "loss": 0.0364, "step": 25380 }, { "epoch": 18.600732600732602, "grad_norm": 0.34375, "learning_rate": 4.944818623154029e-05, "loss": 0.0306, "step": 25390 }, { "epoch": 18.60805860805861, "grad_norm": 0.392578125, "learning_rate": 4.941393060878244e-05, "loss": 0.031, "step": 25400 }, { "epoch": 18.615384615384617, "grad_norm": 0.671875, "learning_rate": 4.9379675506396075e-05, "loss": 0.0336, "step": 25410 }, { "epoch": 18.622710622710624, "grad_norm": 0.56640625, "learning_rate": 4.9345420940793916e-05, "loss": 0.0369, "step": 25420 }, { "epoch": 18.63003663003663, "grad_norm": 0.474609375, "learning_rate": 4.931116692838847e-05, "loss": 0.0352, "step": 25430 }, { "epoch": 18.63736263736264, "grad_norm": 0.48828125, "learning_rate": 4.927691348559195e-05, "loss": 0.0391, "step": 25440 }, { "epoch": 18.644688644688646, "grad_norm": 0.46875, "learning_rate": 4.924266062881633e-05, "loss": 0.0365, "step": 25450 }, { "epoch": 18.652014652014653, "grad_norm": 0.482421875, "learning_rate": 4.920840837447324e-05, "loss": 0.0379, "step": 25460 }, { "epoch": 18.65934065934066, "grad_norm": 0.39453125, "learning_rate": 4.917415673897409e-05, "loss": 0.0308, "step": 25470 }, { "epoch": 18.666666666666668, "grad_norm": 0.515625, "learning_rate": 4.913990573872992e-05, "loss": 0.0305, "step": 25480 }, { "epoch": 18.673992673992675, "grad_norm": 0.9140625, "learning_rate": 4.910565539015154e-05, "loss": 0.0339, "step": 25490 }, { "epoch": 18.681318681318682, "grad_norm": 0.5859375, "learning_rate": 4.907140570964941e-05, "loss": 0.0357, "step": 25500 }, { "epoch": 18.68864468864469, "grad_norm": 0.37109375, "learning_rate": 4.903715671363368e-05, "loss": 0.0411, "step": 25510 }, { "epoch": 18.695970695970697, "grad_norm": 0.361328125, "learning_rate": 4.900290841851415e-05, "loss": 0.0344, "step": 25520 }, { "epoch": 18.703296703296704, "grad_norm": 0.48046875, "learning_rate": 4.896866084070033e-05, "loss": 0.0395, "step": 25530 }, { "epoch": 18.71062271062271, "grad_norm": 0.478515625, "learning_rate": 4.8934413996601326e-05, "loss": 0.0328, "step": 25540 }, { "epoch": 18.71794871794872, "grad_norm": 0.431640625, "learning_rate": 4.890016790262596e-05, "loss": 0.0307, "step": 25550 }, { "epoch": 18.725274725274726, "grad_norm": 0.8125, "learning_rate": 4.8865922575182625e-05, "loss": 0.0372, "step": 25560 }, { "epoch": 18.732600732600734, "grad_norm": 0.61328125, "learning_rate": 4.88316780306794e-05, "loss": 0.0381, "step": 25570 }, { "epoch": 18.73992673992674, "grad_norm": 0.470703125, "learning_rate": 4.879743428552399e-05, "loss": 0.0317, "step": 25580 }, { "epoch": 18.747252747252748, "grad_norm": 0.609375, "learning_rate": 4.876319135612365e-05, "loss": 0.0332, "step": 25590 }, { "epoch": 18.754578754578755, "grad_norm": 0.53515625, "learning_rate": 4.872894925888533e-05, "loss": 0.0339, "step": 25600 }, { "epoch": 18.761904761904763, "grad_norm": 0.59375, "learning_rate": 4.8694708010215504e-05, "loss": 0.033, "step": 25610 }, { "epoch": 18.76923076923077, "grad_norm": 0.5078125, "learning_rate": 4.866046762652031e-05, "loss": 0.0306, "step": 25620 }, { "epoch": 18.776556776556777, "grad_norm": 0.55859375, "learning_rate": 4.8626228124205424e-05, "loss": 0.0374, "step": 25630 }, { "epoch": 18.783882783882785, "grad_norm": 0.341796875, "learning_rate": 4.8591989519676134e-05, "loss": 0.033, "step": 25640 }, { "epoch": 18.791208791208792, "grad_norm": 0.58984375, "learning_rate": 4.855775182933725e-05, "loss": 0.0321, "step": 25650 }, { "epoch": 18.7985347985348, "grad_norm": 0.333984375, "learning_rate": 4.852351506959319e-05, "loss": 0.0344, "step": 25660 }, { "epoch": 18.805860805860807, "grad_norm": 0.435546875, "learning_rate": 4.848927925684791e-05, "loss": 0.0332, "step": 25670 }, { "epoch": 18.813186813186814, "grad_norm": 0.404296875, "learning_rate": 4.8455044407504905e-05, "loss": 0.0334, "step": 25680 }, { "epoch": 18.82051282051282, "grad_norm": 0.54296875, "learning_rate": 4.842081053796722e-05, "loss": 0.0338, "step": 25690 }, { "epoch": 18.82783882783883, "grad_norm": 0.55859375, "learning_rate": 4.8386577664637425e-05, "loss": 0.0376, "step": 25700 }, { "epoch": 18.835164835164836, "grad_norm": 0.421875, "learning_rate": 4.8352345803917626e-05, "loss": 0.0331, "step": 25710 }, { "epoch": 18.842490842490843, "grad_norm": 0.98828125, "learning_rate": 4.8318114972209386e-05, "loss": 0.0342, "step": 25720 }, { "epoch": 18.84981684981685, "grad_norm": 0.640625, "learning_rate": 4.828388518591386e-05, "loss": 0.0357, "step": 25730 }, { "epoch": 18.857142857142858, "grad_norm": 0.390625, "learning_rate": 4.8249656461431647e-05, "loss": 0.0347, "step": 25740 }, { "epoch": 18.864468864468865, "grad_norm": 0.93359375, "learning_rate": 4.821542881516285e-05, "loss": 0.0313, "step": 25750 }, { "epoch": 18.871794871794872, "grad_norm": 0.349609375, "learning_rate": 4.818120226350706e-05, "loss": 0.034, "step": 25760 }, { "epoch": 18.87912087912088, "grad_norm": 0.341796875, "learning_rate": 4.814697682286336e-05, "loss": 0.0309, "step": 25770 }, { "epoch": 18.886446886446887, "grad_norm": 0.75, "learning_rate": 4.8112752509630254e-05, "loss": 0.0397, "step": 25780 }, { "epoch": 18.893772893772894, "grad_norm": 0.39453125, "learning_rate": 4.807852934020574e-05, "loss": 0.0313, "step": 25790 }, { "epoch": 18.9010989010989, "grad_norm": 0.44921875, "learning_rate": 4.804430733098727e-05, "loss": 0.0314, "step": 25800 }, { "epoch": 18.90842490842491, "grad_norm": 0.50390625, "learning_rate": 4.801008649837173e-05, "loss": 0.0307, "step": 25810 }, { "epoch": 18.915750915750916, "grad_norm": 0.345703125, "learning_rate": 4.7975866858755425e-05, "loss": 0.0299, "step": 25820 }, { "epoch": 18.923076923076923, "grad_norm": 0.7578125, "learning_rate": 4.794164842853413e-05, "loss": 0.0353, "step": 25830 }, { "epoch": 18.93040293040293, "grad_norm": 0.73828125, "learning_rate": 4.790743122410303e-05, "loss": 0.0339, "step": 25840 }, { "epoch": 18.937728937728938, "grad_norm": 0.470703125, "learning_rate": 4.7873215261856667e-05, "loss": 0.0357, "step": 25850 }, { "epoch": 18.945054945054945, "grad_norm": 0.5234375, "learning_rate": 4.7839000558189055e-05, "loss": 0.0355, "step": 25860 }, { "epoch": 18.952380952380953, "grad_norm": 0.404296875, "learning_rate": 4.7804787129493595e-05, "loss": 0.0346, "step": 25870 }, { "epoch": 18.95970695970696, "grad_norm": 0.451171875, "learning_rate": 4.777057499216304e-05, "loss": 0.0343, "step": 25880 }, { "epoch": 18.967032967032967, "grad_norm": 0.6015625, "learning_rate": 4.773636416258954e-05, "loss": 0.0328, "step": 25890 }, { "epoch": 18.974358974358974, "grad_norm": 0.77734375, "learning_rate": 4.770215465716466e-05, "loss": 0.0345, "step": 25900 }, { "epoch": 18.98168498168498, "grad_norm": 0.35546875, "learning_rate": 4.766794649227927e-05, "loss": 0.0347, "step": 25910 }, { "epoch": 18.98901098901099, "grad_norm": 0.333984375, "learning_rate": 4.763373968432363e-05, "loss": 0.0297, "step": 25920 }, { "epoch": 18.996336996336996, "grad_norm": 0.361328125, "learning_rate": 4.759953424968735e-05, "loss": 0.0296, "step": 25930 }, { "epoch": 19.003663003663004, "grad_norm": 0.32421875, "learning_rate": 4.7565330204759365e-05, "loss": 0.0333, "step": 25940 }, { "epoch": 19.01098901098901, "grad_norm": 0.55859375, "learning_rate": 4.753112756592798e-05, "loss": 0.0342, "step": 25950 }, { "epoch": 19.01831501831502, "grad_norm": 0.80078125, "learning_rate": 4.749692634958078e-05, "loss": 0.032, "step": 25960 }, { "epoch": 19.025641025641026, "grad_norm": 0.4765625, "learning_rate": 4.7462726572104694e-05, "loss": 0.0344, "step": 25970 }, { "epoch": 19.032967032967033, "grad_norm": 0.703125, "learning_rate": 4.742852824988595e-05, "loss": 0.0313, "step": 25980 }, { "epoch": 19.04029304029304, "grad_norm": 0.53125, "learning_rate": 4.739433139931009e-05, "loss": 0.0374, "step": 25990 }, { "epoch": 19.047619047619047, "grad_norm": 0.59765625, "learning_rate": 4.736013603676196e-05, "loss": 0.0371, "step": 26000 }, { "epoch": 19.054945054945055, "grad_norm": 0.435546875, "learning_rate": 4.732594217862566e-05, "loss": 0.0339, "step": 26010 }, { "epoch": 19.062271062271062, "grad_norm": 0.59375, "learning_rate": 4.7291749841284596e-05, "loss": 0.0325, "step": 26020 }, { "epoch": 19.06959706959707, "grad_norm": 0.419921875, "learning_rate": 4.7257559041121445e-05, "loss": 0.0364, "step": 26030 }, { "epoch": 19.076923076923077, "grad_norm": 0.34375, "learning_rate": 4.7223369794518146e-05, "loss": 0.0319, "step": 26040 }, { "epoch": 19.084249084249084, "grad_norm": 0.63671875, "learning_rate": 4.7189182117855876e-05, "loss": 0.0331, "step": 26050 }, { "epoch": 19.09157509157509, "grad_norm": 0.63671875, "learning_rate": 4.715499602751508e-05, "loss": 0.0344, "step": 26060 }, { "epoch": 19.0989010989011, "grad_norm": 0.546875, "learning_rate": 4.7120811539875424e-05, "loss": 0.0303, "step": 26070 }, { "epoch": 19.106227106227106, "grad_norm": 0.400390625, "learning_rate": 4.708662867131585e-05, "loss": 0.0369, "step": 26080 }, { "epoch": 19.113553113553113, "grad_norm": 0.5, "learning_rate": 4.705244743821445e-05, "loss": 0.0314, "step": 26090 }, { "epoch": 19.12087912087912, "grad_norm": 0.546875, "learning_rate": 4.701826785694861e-05, "loss": 0.0382, "step": 26100 }, { "epoch": 19.128205128205128, "grad_norm": 0.703125, "learning_rate": 4.698408994389486e-05, "loss": 0.0316, "step": 26110 }, { "epoch": 19.135531135531135, "grad_norm": 0.53125, "learning_rate": 4.694991371542897e-05, "loss": 0.0348, "step": 26120 }, { "epoch": 19.142857142857142, "grad_norm": 0.3828125, "learning_rate": 4.6915739187925876e-05, "loss": 0.0318, "step": 26130 }, { "epoch": 19.15018315018315, "grad_norm": 0.384765625, "learning_rate": 4.688156637775976e-05, "loss": 0.0312, "step": 26140 }, { "epoch": 19.157509157509157, "grad_norm": 0.4140625, "learning_rate": 4.68473953013039e-05, "loss": 0.036, "step": 26150 }, { "epoch": 19.164835164835164, "grad_norm": 0.431640625, "learning_rate": 4.681322597493078e-05, "loss": 0.0359, "step": 26160 }, { "epoch": 19.17216117216117, "grad_norm": 0.73828125, "learning_rate": 4.677905841501206e-05, "loss": 0.0334, "step": 26170 }, { "epoch": 19.17948717948718, "grad_norm": 0.63671875, "learning_rate": 4.674489263791854e-05, "loss": 0.0383, "step": 26180 }, { "epoch": 19.186813186813186, "grad_norm": 0.5625, "learning_rate": 4.671072866002015e-05, "loss": 0.0307, "step": 26190 }, { "epoch": 19.194139194139193, "grad_norm": 0.78515625, "learning_rate": 4.6676566497685985e-05, "loss": 0.0385, "step": 26200 }, { "epoch": 19.2014652014652, "grad_norm": 0.41796875, "learning_rate": 4.664240616728425e-05, "loss": 0.0346, "step": 26210 }, { "epoch": 19.208791208791208, "grad_norm": 0.63671875, "learning_rate": 4.6608247685182306e-05, "loss": 0.0347, "step": 26220 }, { "epoch": 19.216117216117215, "grad_norm": 0.470703125, "learning_rate": 4.657409106774657e-05, "loss": 0.0357, "step": 26230 }, { "epoch": 19.223443223443223, "grad_norm": 0.5390625, "learning_rate": 4.6539936331342596e-05, "loss": 0.033, "step": 26240 }, { "epoch": 19.23076923076923, "grad_norm": 0.388671875, "learning_rate": 4.6505783492335046e-05, "loss": 0.0342, "step": 26250 }, { "epoch": 19.238095238095237, "grad_norm": 0.44921875, "learning_rate": 4.6471632567087676e-05, "loss": 0.0341, "step": 26260 }, { "epoch": 19.245421245421245, "grad_norm": 0.439453125, "learning_rate": 4.6437483571963325e-05, "loss": 0.0337, "step": 26270 }, { "epoch": 19.252747252747252, "grad_norm": 0.458984375, "learning_rate": 4.6403336523323865e-05, "loss": 0.0318, "step": 26280 }, { "epoch": 19.26007326007326, "grad_norm": 0.5703125, "learning_rate": 4.636919143753029e-05, "loss": 0.0348, "step": 26290 }, { "epoch": 19.267399267399266, "grad_norm": 0.58984375, "learning_rate": 4.633504833094262e-05, "loss": 0.0331, "step": 26300 }, { "epoch": 19.274725274725274, "grad_norm": 0.345703125, "learning_rate": 4.630090721991994e-05, "loss": 0.0324, "step": 26310 }, { "epoch": 19.28205128205128, "grad_norm": 0.408203125, "learning_rate": 4.626676812082039e-05, "loss": 0.0294, "step": 26320 }, { "epoch": 19.28937728937729, "grad_norm": 0.515625, "learning_rate": 4.623263105000111e-05, "loss": 0.0352, "step": 26330 }, { "epoch": 19.296703296703296, "grad_norm": 0.474609375, "learning_rate": 4.619849602381829e-05, "loss": 0.0307, "step": 26340 }, { "epoch": 19.304029304029303, "grad_norm": 0.44140625, "learning_rate": 4.6164363058627196e-05, "loss": 0.0359, "step": 26350 }, { "epoch": 19.31135531135531, "grad_norm": 0.376953125, "learning_rate": 4.6130232170781976e-05, "loss": 0.0317, "step": 26360 }, { "epoch": 19.318681318681318, "grad_norm": 0.427734375, "learning_rate": 4.60961033766359e-05, "loss": 0.0318, "step": 26370 }, { "epoch": 19.326007326007325, "grad_norm": 0.70703125, "learning_rate": 4.606197669254118e-05, "loss": 0.0379, "step": 26380 }, { "epoch": 19.333333333333332, "grad_norm": 0.408203125, "learning_rate": 4.602785213484903e-05, "loss": 0.0285, "step": 26390 }, { "epoch": 19.34065934065934, "grad_norm": 0.35546875, "learning_rate": 4.5993729719909645e-05, "loss": 0.0303, "step": 26400 }, { "epoch": 19.347985347985347, "grad_norm": 0.7578125, "learning_rate": 4.5959609464072205e-05, "loss": 0.0346, "step": 26410 }, { "epoch": 19.355311355311354, "grad_norm": 0.380859375, "learning_rate": 4.592549138368483e-05, "loss": 0.0299, "step": 26420 }, { "epoch": 19.36263736263736, "grad_norm": 0.458984375, "learning_rate": 4.589137549509462e-05, "loss": 0.0331, "step": 26430 }, { "epoch": 19.36996336996337, "grad_norm": 0.79296875, "learning_rate": 4.585726181464761e-05, "loss": 0.0327, "step": 26440 }, { "epoch": 19.377289377289376, "grad_norm": 0.765625, "learning_rate": 4.58231503586888e-05, "loss": 0.0345, "step": 26450 }, { "epoch": 19.384615384615383, "grad_norm": 0.431640625, "learning_rate": 4.5789041143562087e-05, "loss": 0.0358, "step": 26460 }, { "epoch": 19.39194139194139, "grad_norm": 0.3359375, "learning_rate": 4.575493418561035e-05, "loss": 0.0296, "step": 26470 }, { "epoch": 19.399267399267398, "grad_norm": 0.45703125, "learning_rate": 4.572082950117529e-05, "loss": 0.031, "step": 26480 }, { "epoch": 19.406593406593405, "grad_norm": 0.37890625, "learning_rate": 4.5686727106597624e-05, "loss": 0.0292, "step": 26490 }, { "epoch": 19.413919413919412, "grad_norm": 0.35546875, "learning_rate": 4.5652627018216925e-05, "loss": 0.0367, "step": 26500 }, { "epoch": 19.42124542124542, "grad_norm": 0.5078125, "learning_rate": 4.5618529252371654e-05, "loss": 0.0342, "step": 26510 }, { "epoch": 19.428571428571427, "grad_norm": 0.396484375, "learning_rate": 4.558443382539918e-05, "loss": 0.0393, "step": 26520 }, { "epoch": 19.435897435897434, "grad_norm": 0.38671875, "learning_rate": 4.555034075363572e-05, "loss": 0.0302, "step": 26530 }, { "epoch": 19.44322344322344, "grad_norm": 0.421875, "learning_rate": 4.551625005341641e-05, "loss": 0.0361, "step": 26540 }, { "epoch": 19.45054945054945, "grad_norm": 0.486328125, "learning_rate": 4.548216174107521e-05, "loss": 0.0354, "step": 26550 }, { "epoch": 19.457875457875456, "grad_norm": 0.5546875, "learning_rate": 4.544807583294494e-05, "loss": 0.0318, "step": 26560 }, { "epoch": 19.465201465201464, "grad_norm": 0.484375, "learning_rate": 4.5413992345357295e-05, "loss": 0.0334, "step": 26570 }, { "epoch": 19.47252747252747, "grad_norm": 0.474609375, "learning_rate": 4.537991129464279e-05, "loss": 0.0386, "step": 26580 }, { "epoch": 19.479853479853478, "grad_norm": 0.365234375, "learning_rate": 4.534583269713076e-05, "loss": 0.0337, "step": 26590 }, { "epoch": 19.487179487179485, "grad_norm": 0.6953125, "learning_rate": 4.5311756569149405e-05, "loss": 0.0374, "step": 26600 }, { "epoch": 19.494505494505496, "grad_norm": 0.341796875, "learning_rate": 4.5277682927025674e-05, "loss": 0.0351, "step": 26610 }, { "epoch": 19.501831501831504, "grad_norm": 0.396484375, "learning_rate": 4.52436117870854e-05, "loss": 0.0308, "step": 26620 }, { "epoch": 19.50915750915751, "grad_norm": 0.8125, "learning_rate": 4.5209543165653175e-05, "loss": 0.0349, "step": 26630 }, { "epoch": 19.516483516483518, "grad_norm": 0.5546875, "learning_rate": 4.517547707905239e-05, "loss": 0.0404, "step": 26640 }, { "epoch": 19.523809523809526, "grad_norm": 0.41015625, "learning_rate": 4.514141354360523e-05, "loss": 0.0374, "step": 26650 }, { "epoch": 19.531135531135533, "grad_norm": 0.345703125, "learning_rate": 4.510735257563265e-05, "loss": 0.033, "step": 26660 }, { "epoch": 19.53846153846154, "grad_norm": 0.65234375, "learning_rate": 4.507329419145438e-05, "loss": 0.0335, "step": 26670 }, { "epoch": 19.545787545787547, "grad_norm": 0.8515625, "learning_rate": 4.503923840738891e-05, "loss": 0.0332, "step": 26680 }, { "epoch": 19.553113553113555, "grad_norm": 0.546875, "learning_rate": 4.5005185239753475e-05, "loss": 0.0344, "step": 26690 }, { "epoch": 19.560439560439562, "grad_norm": 0.416015625, "learning_rate": 4.497113470486407e-05, "loss": 0.0323, "step": 26700 }, { "epoch": 19.56776556776557, "grad_norm": 0.419921875, "learning_rate": 4.493708681903542e-05, "loss": 0.0383, "step": 26710 }, { "epoch": 19.575091575091577, "grad_norm": 0.287109375, "learning_rate": 4.4903041598581005e-05, "loss": 0.0359, "step": 26720 }, { "epoch": 19.582417582417584, "grad_norm": 0.63671875, "learning_rate": 4.4868999059813e-05, "loss": 0.0318, "step": 26730 }, { "epoch": 19.58974358974359, "grad_norm": 0.443359375, "learning_rate": 4.4834959219042274e-05, "loss": 0.0308, "step": 26740 }, { "epoch": 19.5970695970696, "grad_norm": 0.388671875, "learning_rate": 4.480092209257844e-05, "loss": 0.0326, "step": 26750 }, { "epoch": 19.604395604395606, "grad_norm": 0.384765625, "learning_rate": 4.476688769672982e-05, "loss": 0.0354, "step": 26760 }, { "epoch": 19.611721611721613, "grad_norm": 0.359375, "learning_rate": 4.473285604780341e-05, "loss": 0.0319, "step": 26770 }, { "epoch": 19.61904761904762, "grad_norm": 0.462890625, "learning_rate": 4.469882716210487e-05, "loss": 0.0335, "step": 26780 }, { "epoch": 19.626373626373628, "grad_norm": 0.45703125, "learning_rate": 4.4664801055938567e-05, "loss": 0.0319, "step": 26790 }, { "epoch": 19.633699633699635, "grad_norm": 0.291015625, "learning_rate": 4.4630777745607525e-05, "loss": 0.0353, "step": 26800 }, { "epoch": 19.641025641025642, "grad_norm": 0.515625, "learning_rate": 4.459675724741344e-05, "loss": 0.0363, "step": 26810 }, { "epoch": 19.64835164835165, "grad_norm": 0.59765625, "learning_rate": 4.456273957765663e-05, "loss": 0.0321, "step": 26820 }, { "epoch": 19.655677655677657, "grad_norm": 0.421875, "learning_rate": 4.452872475263607e-05, "loss": 0.0334, "step": 26830 }, { "epoch": 19.663003663003664, "grad_norm": 0.37890625, "learning_rate": 4.44947127886494e-05, "loss": 0.0363, "step": 26840 }, { "epoch": 19.67032967032967, "grad_norm": 0.8125, "learning_rate": 4.446070370199288e-05, "loss": 0.0343, "step": 26850 }, { "epoch": 19.67765567765568, "grad_norm": 0.578125, "learning_rate": 4.442669750896134e-05, "loss": 0.0394, "step": 26860 }, { "epoch": 19.684981684981686, "grad_norm": 0.48828125, "learning_rate": 4.4392694225848294e-05, "loss": 0.0343, "step": 26870 }, { "epoch": 19.692307692307693, "grad_norm": 0.45703125, "learning_rate": 4.43586938689458e-05, "loss": 0.0387, "step": 26880 }, { "epoch": 19.6996336996337, "grad_norm": 0.3671875, "learning_rate": 4.432469645454457e-05, "loss": 0.0282, "step": 26890 }, { "epoch": 19.706959706959708, "grad_norm": 0.57421875, "learning_rate": 4.4290701998933875e-05, "loss": 0.0345, "step": 26900 }, { "epoch": 19.714285714285715, "grad_norm": 0.3828125, "learning_rate": 4.425671051840158e-05, "loss": 0.0344, "step": 26910 }, { "epoch": 19.721611721611723, "grad_norm": 0.3125, "learning_rate": 4.42227220292341e-05, "loss": 0.0304, "step": 26920 }, { "epoch": 19.72893772893773, "grad_norm": 0.60546875, "learning_rate": 4.4188736547716445e-05, "loss": 0.0305, "step": 26930 }, { "epoch": 19.736263736263737, "grad_norm": 0.6015625, "learning_rate": 4.415475409013218e-05, "loss": 0.0403, "step": 26940 }, { "epoch": 19.743589743589745, "grad_norm": 0.326171875, "learning_rate": 4.4120774672763415e-05, "loss": 0.0346, "step": 26950 }, { "epoch": 19.750915750915752, "grad_norm": 0.396484375, "learning_rate": 4.40867983118908e-05, "loss": 0.0331, "step": 26960 }, { "epoch": 19.75824175824176, "grad_norm": 0.60546875, "learning_rate": 4.4052825023793516e-05, "loss": 0.0316, "step": 26970 }, { "epoch": 19.765567765567766, "grad_norm": 0.3359375, "learning_rate": 4.4018854824749307e-05, "loss": 0.0326, "step": 26980 }, { "epoch": 19.772893772893774, "grad_norm": 0.4921875, "learning_rate": 4.398488773103437e-05, "loss": 0.0363, "step": 26990 }, { "epoch": 19.78021978021978, "grad_norm": 0.31640625, "learning_rate": 4.39509237589235e-05, "loss": 0.0332, "step": 27000 }, { "epoch": 19.78754578754579, "grad_norm": 0.5703125, "learning_rate": 4.39169629246899e-05, "loss": 0.0374, "step": 27010 }, { "epoch": 19.794871794871796, "grad_norm": 0.337890625, "learning_rate": 4.388300524460535e-05, "loss": 0.0323, "step": 27020 }, { "epoch": 19.802197802197803, "grad_norm": 0.3828125, "learning_rate": 4.384905073494008e-05, "loss": 0.0317, "step": 27030 }, { "epoch": 19.80952380952381, "grad_norm": 0.71875, "learning_rate": 4.3815099411962826e-05, "loss": 0.0316, "step": 27040 }, { "epoch": 19.816849816849818, "grad_norm": 0.74609375, "learning_rate": 4.378115129194076e-05, "loss": 0.0313, "step": 27050 }, { "epoch": 19.824175824175825, "grad_norm": 0.671875, "learning_rate": 4.3747206391139544e-05, "loss": 0.03, "step": 27060 }, { "epoch": 19.831501831501832, "grad_norm": 0.53515625, "learning_rate": 4.37132647258233e-05, "loss": 0.0325, "step": 27070 }, { "epoch": 19.83882783882784, "grad_norm": 0.404296875, "learning_rate": 4.36793263122546e-05, "loss": 0.035, "step": 27080 }, { "epoch": 19.846153846153847, "grad_norm": 0.4140625, "learning_rate": 4.364539116669443e-05, "loss": 0.0369, "step": 27090 }, { "epoch": 19.853479853479854, "grad_norm": 0.578125, "learning_rate": 4.3611459305402244e-05, "loss": 0.0356, "step": 27100 }, { "epoch": 19.86080586080586, "grad_norm": 0.47265625, "learning_rate": 4.3577530744635944e-05, "loss": 0.0343, "step": 27110 }, { "epoch": 19.86813186813187, "grad_norm": 0.5, "learning_rate": 4.3543605500651754e-05, "loss": 0.0308, "step": 27120 }, { "epoch": 19.875457875457876, "grad_norm": 0.3671875, "learning_rate": 4.3509683589704393e-05, "loss": 0.0366, "step": 27130 }, { "epoch": 19.882783882783883, "grad_norm": 0.458984375, "learning_rate": 4.347576502804699e-05, "loss": 0.0341, "step": 27140 }, { "epoch": 19.89010989010989, "grad_norm": 0.490234375, "learning_rate": 4.3441849831931e-05, "loss": 0.036, "step": 27150 }, { "epoch": 19.897435897435898, "grad_norm": 0.40234375, "learning_rate": 4.340793801760633e-05, "loss": 0.0341, "step": 27160 }, { "epoch": 19.904761904761905, "grad_norm": 0.5390625, "learning_rate": 4.3374029601321254e-05, "loss": 0.0332, "step": 27170 }, { "epoch": 19.912087912087912, "grad_norm": 0.59375, "learning_rate": 4.334012459932237e-05, "loss": 0.041, "step": 27180 }, { "epoch": 19.91941391941392, "grad_norm": 0.40234375, "learning_rate": 4.330622302785471e-05, "loss": 0.0363, "step": 27190 }, { "epoch": 19.926739926739927, "grad_norm": 0.51953125, "learning_rate": 4.327232490316161e-05, "loss": 0.0349, "step": 27200 }, { "epoch": 19.934065934065934, "grad_norm": 0.6640625, "learning_rate": 4.3238430241484795e-05, "loss": 0.0306, "step": 27210 }, { "epoch": 19.94139194139194, "grad_norm": 0.4765625, "learning_rate": 4.320453905906429e-05, "loss": 0.0314, "step": 27220 }, { "epoch": 19.94871794871795, "grad_norm": 0.326171875, "learning_rate": 4.3170651372138466e-05, "loss": 0.0332, "step": 27230 }, { "epoch": 19.956043956043956, "grad_norm": 0.49609375, "learning_rate": 4.3136767196944064e-05, "loss": 0.0326, "step": 27240 }, { "epoch": 19.963369963369964, "grad_norm": 0.345703125, "learning_rate": 4.310288654971605e-05, "loss": 0.0346, "step": 27250 }, { "epoch": 19.97069597069597, "grad_norm": 0.431640625, "learning_rate": 4.3069009446687794e-05, "loss": 0.0366, "step": 27260 }, { "epoch": 19.978021978021978, "grad_norm": 0.490234375, "learning_rate": 4.3035135904090916e-05, "loss": 0.0313, "step": 27270 }, { "epoch": 19.985347985347985, "grad_norm": 0.310546875, "learning_rate": 4.300126593815533e-05, "loss": 0.0341, "step": 27280 }, { "epoch": 19.992673992673993, "grad_norm": 0.37890625, "learning_rate": 4.296739956510927e-05, "loss": 0.034, "step": 27290 }, { "epoch": 20.0, "grad_norm": 0.859375, "learning_rate": 4.29335368011792e-05, "loss": 0.0364, "step": 27300 }, { "epoch": 20.007326007326007, "grad_norm": 0.390625, "learning_rate": 4.289967766258992e-05, "loss": 0.0329, "step": 27310 }, { "epoch": 20.014652014652015, "grad_norm": 0.6796875, "learning_rate": 4.2865822165564406e-05, "loss": 0.0347, "step": 27320 }, { "epoch": 20.021978021978022, "grad_norm": 0.41796875, "learning_rate": 4.283197032632397e-05, "loss": 0.0308, "step": 27330 }, { "epoch": 20.02930402930403, "grad_norm": 0.578125, "learning_rate": 4.2798122161088136e-05, "loss": 0.0345, "step": 27340 }, { "epoch": 20.036630036630036, "grad_norm": 0.326171875, "learning_rate": 4.276427768607466e-05, "loss": 0.0384, "step": 27350 }, { "epoch": 20.043956043956044, "grad_norm": 0.82421875, "learning_rate": 4.273043691749955e-05, "loss": 0.0375, "step": 27360 }, { "epoch": 20.05128205128205, "grad_norm": 0.38671875, "learning_rate": 4.269659987157703e-05, "loss": 0.0362, "step": 27370 }, { "epoch": 20.05860805860806, "grad_norm": 0.5625, "learning_rate": 4.2662766564519515e-05, "loss": 0.0313, "step": 27380 }, { "epoch": 20.065934065934066, "grad_norm": 0.419921875, "learning_rate": 4.2628937012537676e-05, "loss": 0.0368, "step": 27390 }, { "epoch": 20.073260073260073, "grad_norm": 0.4296875, "learning_rate": 4.259511123184037e-05, "loss": 0.0347, "step": 27400 }, { "epoch": 20.08058608058608, "grad_norm": 0.40625, "learning_rate": 4.25612892386346e-05, "loss": 0.0332, "step": 27410 }, { "epoch": 20.087912087912088, "grad_norm": 0.33984375, "learning_rate": 4.252747104912563e-05, "loss": 0.037, "step": 27420 }, { "epoch": 20.095238095238095, "grad_norm": 0.37890625, "learning_rate": 4.2493656679516836e-05, "loss": 0.0297, "step": 27430 }, { "epoch": 20.102564102564102, "grad_norm": 0.298828125, "learning_rate": 4.245984614600983e-05, "loss": 0.0303, "step": 27440 }, { "epoch": 20.10989010989011, "grad_norm": 0.66796875, "learning_rate": 4.2426039464804305e-05, "loss": 0.0349, "step": 27450 }, { "epoch": 20.117216117216117, "grad_norm": 0.376953125, "learning_rate": 4.2392236652098176e-05, "loss": 0.0403, "step": 27460 }, { "epoch": 20.124542124542124, "grad_norm": 0.40625, "learning_rate": 4.235843772408747e-05, "loss": 0.0308, "step": 27470 }, { "epoch": 20.13186813186813, "grad_norm": 0.640625, "learning_rate": 4.2324642696966376e-05, "loss": 0.0315, "step": 27480 }, { "epoch": 20.13919413919414, "grad_norm": 0.421875, "learning_rate": 4.229085158692719e-05, "loss": 0.0383, "step": 27490 }, { "epoch": 20.146520146520146, "grad_norm": 0.703125, "learning_rate": 4.2257064410160343e-05, "loss": 0.037, "step": 27500 }, { "epoch": 20.153846153846153, "grad_norm": 0.42578125, "learning_rate": 4.222328118285437e-05, "loss": 0.0337, "step": 27510 }, { "epoch": 20.16117216117216, "grad_norm": 0.392578125, "learning_rate": 4.2189501921195936e-05, "loss": 0.0366, "step": 27520 }, { "epoch": 20.168498168498168, "grad_norm": 0.3125, "learning_rate": 4.215572664136979e-05, "loss": 0.0294, "step": 27530 }, { "epoch": 20.175824175824175, "grad_norm": 0.455078125, "learning_rate": 4.212195535955878e-05, "loss": 0.0329, "step": 27540 }, { "epoch": 20.183150183150182, "grad_norm": 0.76953125, "learning_rate": 4.208818809194382e-05, "loss": 0.031, "step": 27550 }, { "epoch": 20.19047619047619, "grad_norm": 0.392578125, "learning_rate": 4.2054424854703936e-05, "loss": 0.0324, "step": 27560 }, { "epoch": 20.197802197802197, "grad_norm": 0.443359375, "learning_rate": 4.202066566401619e-05, "loss": 0.0325, "step": 27570 }, { "epoch": 20.205128205128204, "grad_norm": 0.6171875, "learning_rate": 4.198691053605573e-05, "loss": 0.0305, "step": 27580 }, { "epoch": 20.21245421245421, "grad_norm": 0.353515625, "learning_rate": 4.195315948699573e-05, "loss": 0.0295, "step": 27590 }, { "epoch": 20.21978021978022, "grad_norm": 0.7578125, "learning_rate": 4.191941253300743e-05, "loss": 0.0338, "step": 27600 }, { "epoch": 20.227106227106226, "grad_norm": 0.44921875, "learning_rate": 4.188566969026011e-05, "loss": 0.0346, "step": 27610 }, { "epoch": 20.234432234432234, "grad_norm": 0.4453125, "learning_rate": 4.1851930974921085e-05, "loss": 0.0308, "step": 27620 }, { "epoch": 20.24175824175824, "grad_norm": 0.3515625, "learning_rate": 4.181819640315565e-05, "loss": 0.033, "step": 27630 }, { "epoch": 20.249084249084248, "grad_norm": 0.5234375, "learning_rate": 4.178446599112716e-05, "loss": 0.0355, "step": 27640 }, { "epoch": 20.256410256410255, "grad_norm": 0.73828125, "learning_rate": 4.1750739754996956e-05, "loss": 0.0397, "step": 27650 }, { "epoch": 20.263736263736263, "grad_norm": 0.427734375, "learning_rate": 4.171701771092438e-05, "loss": 0.0334, "step": 27660 }, { "epoch": 20.27106227106227, "grad_norm": 0.494140625, "learning_rate": 4.16832998750668e-05, "loss": 0.0345, "step": 27670 }, { "epoch": 20.278388278388277, "grad_norm": 0.54296875, "learning_rate": 4.16495862635795e-05, "loss": 0.034, "step": 27680 }, { "epoch": 20.285714285714285, "grad_norm": 0.427734375, "learning_rate": 4.16158768926158e-05, "loss": 0.0308, "step": 27690 }, { "epoch": 20.293040293040292, "grad_norm": 0.458984375, "learning_rate": 4.158217177832695e-05, "loss": 0.0323, "step": 27700 }, { "epoch": 20.3003663003663, "grad_norm": 0.373046875, "learning_rate": 4.15484709368622e-05, "loss": 0.0297, "step": 27710 }, { "epoch": 20.307692307692307, "grad_norm": 0.328125, "learning_rate": 4.1514774384368705e-05, "loss": 0.0321, "step": 27720 }, { "epoch": 20.315018315018314, "grad_norm": 0.65234375, "learning_rate": 4.1481082136991596e-05, "loss": 0.0406, "step": 27730 }, { "epoch": 20.32234432234432, "grad_norm": 0.39453125, "learning_rate": 4.144739421087396e-05, "loss": 0.0366, "step": 27740 }, { "epoch": 20.32967032967033, "grad_norm": 0.486328125, "learning_rate": 4.141371062215674e-05, "loss": 0.0337, "step": 27750 }, { "epoch": 20.336996336996336, "grad_norm": 0.47265625, "learning_rate": 4.1380031386978866e-05, "loss": 0.0346, "step": 27760 }, { "epoch": 20.344322344322343, "grad_norm": 0.357421875, "learning_rate": 4.134635652147719e-05, "loss": 0.0329, "step": 27770 }, { "epoch": 20.35164835164835, "grad_norm": 0.453125, "learning_rate": 4.131268604178642e-05, "loss": 0.0339, "step": 27780 }, { "epoch": 20.358974358974358, "grad_norm": 0.52734375, "learning_rate": 4.1279019964039186e-05, "loss": 0.0334, "step": 27790 }, { "epoch": 20.366300366300365, "grad_norm": 0.373046875, "learning_rate": 4.124535830436601e-05, "loss": 0.0291, "step": 27800 }, { "epoch": 20.373626373626372, "grad_norm": 0.373046875, "learning_rate": 4.121170107889533e-05, "loss": 0.0362, "step": 27810 }, { "epoch": 20.38095238095238, "grad_norm": 0.470703125, "learning_rate": 4.117804830375338e-05, "loss": 0.0337, "step": 27820 }, { "epoch": 20.388278388278387, "grad_norm": 0.44921875, "learning_rate": 4.114439999506434e-05, "loss": 0.032, "step": 27830 }, { "epoch": 20.395604395604394, "grad_norm": 0.40625, "learning_rate": 4.111075616895021e-05, "loss": 0.0311, "step": 27840 }, { "epoch": 20.4029304029304, "grad_norm": 0.384765625, "learning_rate": 4.107711684153085e-05, "loss": 0.032, "step": 27850 }, { "epoch": 20.41025641025641, "grad_norm": 0.34375, "learning_rate": 4.104348202892396e-05, "loss": 0.0308, "step": 27860 }, { "epoch": 20.417582417582416, "grad_norm": 0.40234375, "learning_rate": 4.100985174724511e-05, "loss": 0.0319, "step": 27870 }, { "epoch": 20.424908424908423, "grad_norm": 0.423828125, "learning_rate": 4.0976226012607625e-05, "loss": 0.0337, "step": 27880 }, { "epoch": 20.43223443223443, "grad_norm": 0.27734375, "learning_rate": 4.0942604841122725e-05, "loss": 0.0347, "step": 27890 }, { "epoch": 20.439560439560438, "grad_norm": 0.4296875, "learning_rate": 4.090898824889943e-05, "loss": 0.0322, "step": 27900 }, { "epoch": 20.446886446886445, "grad_norm": 0.44140625, "learning_rate": 4.087537625204452e-05, "loss": 0.0352, "step": 27910 }, { "epoch": 20.454212454212453, "grad_norm": 0.3359375, "learning_rate": 4.084176886666262e-05, "loss": 0.0347, "step": 27920 }, { "epoch": 20.46153846153846, "grad_norm": 0.890625, "learning_rate": 4.0808166108856135e-05, "loss": 0.0395, "step": 27930 }, { "epoch": 20.468864468864467, "grad_norm": 0.373046875, "learning_rate": 4.077456799472525e-05, "loss": 0.0328, "step": 27940 }, { "epoch": 20.476190476190474, "grad_norm": 0.349609375, "learning_rate": 4.074097454036792e-05, "loss": 0.0352, "step": 27950 }, { "epoch": 20.483516483516482, "grad_norm": 0.4140625, "learning_rate": 4.070738576187986e-05, "loss": 0.0329, "step": 27960 }, { "epoch": 20.49084249084249, "grad_norm": 0.4375, "learning_rate": 4.067380167535457e-05, "loss": 0.0331, "step": 27970 }, { "epoch": 20.498168498168496, "grad_norm": 0.8359375, "learning_rate": 4.06402222968833e-05, "loss": 0.0337, "step": 27980 }, { "epoch": 20.505494505494504, "grad_norm": 0.3046875, "learning_rate": 4.060664764255499e-05, "loss": 0.0313, "step": 27990 }, { "epoch": 20.51282051282051, "grad_norm": 0.369140625, "learning_rate": 4.057307772845642e-05, "loss": 0.031, "step": 28000 }, { "epoch": 20.520146520146522, "grad_norm": 0.3515625, "learning_rate": 4.0539512570671975e-05, "loss": 0.0282, "step": 28010 }, { "epoch": 20.52747252747253, "grad_norm": 0.58203125, "learning_rate": 4.050595218528385e-05, "loss": 0.0413, "step": 28020 }, { "epoch": 20.534798534798536, "grad_norm": 0.4140625, "learning_rate": 4.047239658837193e-05, "loss": 0.0337, "step": 28030 }, { "epoch": 20.542124542124544, "grad_norm": 0.392578125, "learning_rate": 4.0438845796013815e-05, "loss": 0.036, "step": 28040 }, { "epoch": 20.54945054945055, "grad_norm": 0.427734375, "learning_rate": 4.0405299824284754e-05, "loss": 0.0307, "step": 28050 }, { "epoch": 20.55677655677656, "grad_norm": 0.357421875, "learning_rate": 4.037175868925775e-05, "loss": 0.0321, "step": 28060 }, { "epoch": 20.564102564102566, "grad_norm": 0.392578125, "learning_rate": 4.033822240700345e-05, "loss": 0.0288, "step": 28070 }, { "epoch": 20.571428571428573, "grad_norm": 0.3671875, "learning_rate": 4.030469099359021e-05, "loss": 0.0357, "step": 28080 }, { "epoch": 20.57875457875458, "grad_norm": 0.64453125, "learning_rate": 4.027116446508399e-05, "loss": 0.0348, "step": 28090 }, { "epoch": 20.586080586080588, "grad_norm": 0.412109375, "learning_rate": 4.023764283754848e-05, "loss": 0.0374, "step": 28100 }, { "epoch": 20.593406593406595, "grad_norm": 0.439453125, "learning_rate": 4.0204126127044976e-05, "loss": 0.0291, "step": 28110 }, { "epoch": 20.600732600732602, "grad_norm": 0.330078125, "learning_rate": 4.017061434963247e-05, "loss": 0.032, "step": 28120 }, { "epoch": 20.60805860805861, "grad_norm": 0.36328125, "learning_rate": 4.0137107521367513e-05, "loss": 0.0281, "step": 28130 }, { "epoch": 20.615384615384617, "grad_norm": 0.498046875, "learning_rate": 4.010360565830433e-05, "loss": 0.029, "step": 28140 }, { "epoch": 20.622710622710624, "grad_norm": 0.314453125, "learning_rate": 4.007010877649476e-05, "loss": 0.0315, "step": 28150 }, { "epoch": 20.63003663003663, "grad_norm": 0.373046875, "learning_rate": 4.003661689198827e-05, "loss": 0.0318, "step": 28160 }, { "epoch": 20.63736263736264, "grad_norm": 0.3828125, "learning_rate": 4.000313002083194e-05, "loss": 0.0315, "step": 28170 }, { "epoch": 20.644688644688646, "grad_norm": 0.60546875, "learning_rate": 3.996964817907039e-05, "loss": 0.0312, "step": 28180 }, { "epoch": 20.652014652014653, "grad_norm": 0.359375, "learning_rate": 3.993617138274589e-05, "loss": 0.0301, "step": 28190 }, { "epoch": 20.65934065934066, "grad_norm": 0.416015625, "learning_rate": 3.990269964789827e-05, "loss": 0.0355, "step": 28200 }, { "epoch": 20.666666666666668, "grad_norm": 0.4765625, "learning_rate": 3.986923299056494e-05, "loss": 0.0328, "step": 28210 }, { "epoch": 20.673992673992675, "grad_norm": 0.419921875, "learning_rate": 3.9835771426780854e-05, "loss": 0.0346, "step": 28220 }, { "epoch": 20.681318681318682, "grad_norm": 0.51953125, "learning_rate": 3.9802314972578566e-05, "loss": 0.0369, "step": 28230 }, { "epoch": 20.68864468864469, "grad_norm": 0.44140625, "learning_rate": 3.9768863643988155e-05, "loss": 0.0326, "step": 28240 }, { "epoch": 20.695970695970697, "grad_norm": 0.4375, "learning_rate": 3.9735417457037266e-05, "loss": 0.0332, "step": 28250 }, { "epoch": 20.703296703296704, "grad_norm": 0.4453125, "learning_rate": 3.970197642775103e-05, "loss": 0.033, "step": 28260 }, { "epoch": 20.71062271062271, "grad_norm": 0.275390625, "learning_rate": 3.9668540572152166e-05, "loss": 0.0344, "step": 28270 }, { "epoch": 20.71794871794872, "grad_norm": 0.5234375, "learning_rate": 3.963510990626087e-05, "loss": 0.032, "step": 28280 }, { "epoch": 20.725274725274726, "grad_norm": 0.431640625, "learning_rate": 3.9601684446094896e-05, "loss": 0.0299, "step": 28290 }, { "epoch": 20.732600732600734, "grad_norm": 0.45703125, "learning_rate": 3.9568264207669446e-05, "loss": 0.0304, "step": 28300 }, { "epoch": 20.73992673992674, "grad_norm": 0.470703125, "learning_rate": 3.95348492069973e-05, "loss": 0.032, "step": 28310 }, { "epoch": 20.747252747252748, "grad_norm": 0.470703125, "learning_rate": 3.950143946008863e-05, "loss": 0.0344, "step": 28320 }, { "epoch": 20.754578754578755, "grad_norm": 0.400390625, "learning_rate": 3.946803498295118e-05, "loss": 0.0395, "step": 28330 }, { "epoch": 20.761904761904763, "grad_norm": 0.48046875, "learning_rate": 3.943463579159011e-05, "loss": 0.0317, "step": 28340 }, { "epoch": 20.76923076923077, "grad_norm": 0.32421875, "learning_rate": 3.940124190200808e-05, "loss": 0.0323, "step": 28350 }, { "epoch": 20.776556776556777, "grad_norm": 0.5234375, "learning_rate": 3.936785333020519e-05, "loss": 0.0303, "step": 28360 }, { "epoch": 20.783882783882785, "grad_norm": 0.42578125, "learning_rate": 3.9334470092178996e-05, "loss": 0.0349, "step": 28370 }, { "epoch": 20.791208791208792, "grad_norm": 0.376953125, "learning_rate": 3.930109220392454e-05, "loss": 0.0336, "step": 28380 }, { "epoch": 20.7985347985348, "grad_norm": 0.5625, "learning_rate": 3.92677196814342e-05, "loss": 0.0317, "step": 28390 }, { "epoch": 20.805860805860807, "grad_norm": 0.5546875, "learning_rate": 3.923435254069789e-05, "loss": 0.0352, "step": 28400 }, { "epoch": 20.813186813186814, "grad_norm": 0.435546875, "learning_rate": 3.92009907977029e-05, "loss": 0.0354, "step": 28410 }, { "epoch": 20.82051282051282, "grad_norm": 0.5234375, "learning_rate": 3.9167634468433906e-05, "loss": 0.0358, "step": 28420 }, { "epoch": 20.82783882783883, "grad_norm": 0.298828125, "learning_rate": 3.913428356887305e-05, "loss": 0.0323, "step": 28430 }, { "epoch": 20.835164835164836, "grad_norm": 0.53125, "learning_rate": 3.910093811499985e-05, "loss": 0.0394, "step": 28440 }, { "epoch": 20.842490842490843, "grad_norm": 0.71875, "learning_rate": 3.906759812279116e-05, "loss": 0.0318, "step": 28450 }, { "epoch": 20.84981684981685, "grad_norm": 0.478515625, "learning_rate": 3.903426360822131e-05, "loss": 0.0367, "step": 28460 }, { "epoch": 20.857142857142858, "grad_norm": 0.49609375, "learning_rate": 3.9000934587261936e-05, "loss": 0.0306, "step": 28470 }, { "epoch": 20.864468864468865, "grad_norm": 0.5078125, "learning_rate": 3.896761107588209e-05, "loss": 0.0334, "step": 28480 }, { "epoch": 20.871794871794872, "grad_norm": 0.43359375, "learning_rate": 3.8934293090048125e-05, "loss": 0.037, "step": 28490 }, { "epoch": 20.87912087912088, "grad_norm": 0.48046875, "learning_rate": 3.890098064572381e-05, "loss": 0.0327, "step": 28500 }, { "epoch": 20.886446886446887, "grad_norm": 0.44140625, "learning_rate": 3.8867673758870234e-05, "loss": 0.0317, "step": 28510 }, { "epoch": 20.893772893772894, "grad_norm": 0.404296875, "learning_rate": 3.8834372445445776e-05, "loss": 0.0332, "step": 28520 }, { "epoch": 20.9010989010989, "grad_norm": 0.57421875, "learning_rate": 3.880107672140623e-05, "loss": 0.0321, "step": 28530 }, { "epoch": 20.90842490842491, "grad_norm": 0.671875, "learning_rate": 3.8767786602704654e-05, "loss": 0.0377, "step": 28540 }, { "epoch": 20.915750915750916, "grad_norm": 0.5703125, "learning_rate": 3.8734502105291425e-05, "loss": 0.0331, "step": 28550 }, { "epoch": 20.923076923076923, "grad_norm": 0.353515625, "learning_rate": 3.870122324511426e-05, "loss": 0.0292, "step": 28560 }, { "epoch": 20.93040293040293, "grad_norm": 0.361328125, "learning_rate": 3.866795003811815e-05, "loss": 0.0306, "step": 28570 }, { "epoch": 20.937728937728938, "grad_norm": 0.41796875, "learning_rate": 3.863468250024536e-05, "loss": 0.0347, "step": 28580 }, { "epoch": 20.945054945054945, "grad_norm": 0.3984375, "learning_rate": 3.860142064743548e-05, "loss": 0.0323, "step": 28590 }, { "epoch": 20.952380952380953, "grad_norm": 0.4140625, "learning_rate": 3.8568164495625347e-05, "loss": 0.0341, "step": 28600 }, { "epoch": 20.95970695970696, "grad_norm": 0.5078125, "learning_rate": 3.8534914060749075e-05, "loss": 0.0326, "step": 28610 }, { "epoch": 20.967032967032967, "grad_norm": 0.48828125, "learning_rate": 3.850166935873804e-05, "loss": 0.0406, "step": 28620 }, { "epoch": 20.974358974358974, "grad_norm": 0.42578125, "learning_rate": 3.846843040552088e-05, "loss": 0.0335, "step": 28630 }, { "epoch": 20.98168498168498, "grad_norm": 0.5703125, "learning_rate": 3.8435197217023434e-05, "loss": 0.0337, "step": 28640 }, { "epoch": 20.98901098901099, "grad_norm": 0.45703125, "learning_rate": 3.8401969809168844e-05, "loss": 0.0301, "step": 28650 }, { "epoch": 20.996336996336996, "grad_norm": 0.359375, "learning_rate": 3.836874819787743e-05, "loss": 0.0291, "step": 28660 }, { "epoch": 21.003663003663004, "grad_norm": 0.5703125, "learning_rate": 3.833553239906678e-05, "loss": 0.0353, "step": 28670 }, { "epoch": 21.01098901098901, "grad_norm": 0.375, "learning_rate": 3.8302322428651656e-05, "loss": 0.0336, "step": 28680 }, { "epoch": 21.01831501831502, "grad_norm": 0.439453125, "learning_rate": 3.8269118302544046e-05, "loss": 0.0316, "step": 28690 }, { "epoch": 21.025641025641026, "grad_norm": 0.42578125, "learning_rate": 3.823592003665314e-05, "loss": 0.0297, "step": 28700 }, { "epoch": 21.032967032967033, "grad_norm": 0.38671875, "learning_rate": 3.820272764688533e-05, "loss": 0.0332, "step": 28710 }, { "epoch": 21.04029304029304, "grad_norm": 0.318359375, "learning_rate": 3.8169541149144145e-05, "loss": 0.0313, "step": 28720 }, { "epoch": 21.047619047619047, "grad_norm": 0.46484375, "learning_rate": 3.8136360559330346e-05, "loss": 0.0317, "step": 28730 }, { "epoch": 21.054945054945055, "grad_norm": 0.4765625, "learning_rate": 3.810318589334185e-05, "loss": 0.0299, "step": 28740 }, { "epoch": 21.062271062271062, "grad_norm": 0.419921875, "learning_rate": 3.807001716707372e-05, "loss": 0.0296, "step": 28750 }, { "epoch": 21.06959706959707, "grad_norm": 0.6796875, "learning_rate": 3.803685439641818e-05, "loss": 0.0362, "step": 28760 }, { "epoch": 21.076923076923077, "grad_norm": 0.318359375, "learning_rate": 3.8003697597264603e-05, "loss": 0.0336, "step": 28770 }, { "epoch": 21.084249084249084, "grad_norm": 0.75, "learning_rate": 3.7970546785499484e-05, "loss": 0.0352, "step": 28780 }, { "epoch": 21.09157509157509, "grad_norm": 0.431640625, "learning_rate": 3.7937401977006483e-05, "loss": 0.0312, "step": 28790 }, { "epoch": 21.0989010989011, "grad_norm": 0.39453125, "learning_rate": 3.7904263187666364e-05, "loss": 0.0323, "step": 28800 }, { "epoch": 21.106227106227106, "grad_norm": 0.44921875, "learning_rate": 3.7871130433357e-05, "loss": 0.0304, "step": 28810 }, { "epoch": 21.113553113553113, "grad_norm": 0.328125, "learning_rate": 3.783800372995338e-05, "loss": 0.0307, "step": 28820 }, { "epoch": 21.12087912087912, "grad_norm": 0.30859375, "learning_rate": 3.78048830933276e-05, "loss": 0.0289, "step": 28830 }, { "epoch": 21.128205128205128, "grad_norm": 0.359375, "learning_rate": 3.7771768539348855e-05, "loss": 0.0337, "step": 28840 }, { "epoch": 21.135531135531135, "grad_norm": 0.58203125, "learning_rate": 3.773866008388339e-05, "loss": 0.0338, "step": 28850 }, { "epoch": 21.142857142857142, "grad_norm": 0.35546875, "learning_rate": 3.770555774279457e-05, "loss": 0.0354, "step": 28860 }, { "epoch": 21.15018315018315, "grad_norm": 0.61328125, "learning_rate": 3.7672461531942805e-05, "loss": 0.0374, "step": 28870 }, { "epoch": 21.157509157509157, "grad_norm": 0.369140625, "learning_rate": 3.763937146718559e-05, "loss": 0.0309, "step": 28880 }, { "epoch": 21.164835164835164, "grad_norm": 0.41015625, "learning_rate": 3.760628756437744e-05, "loss": 0.0304, "step": 28890 }, { "epoch": 21.17216117216117, "grad_norm": 0.384765625, "learning_rate": 3.757320983936995e-05, "loss": 0.035, "step": 28900 }, { "epoch": 21.17948717948718, "grad_norm": 0.43359375, "learning_rate": 3.7540138308011734e-05, "loss": 0.0319, "step": 28910 }, { "epoch": 21.186813186813186, "grad_norm": 0.466796875, "learning_rate": 3.750707298614845e-05, "loss": 0.0353, "step": 28920 }, { "epoch": 21.194139194139193, "grad_norm": 0.59375, "learning_rate": 3.7474013889622776e-05, "loss": 0.0369, "step": 28930 }, { "epoch": 21.2014652014652, "grad_norm": 0.359375, "learning_rate": 3.744096103427443e-05, "loss": 0.0285, "step": 28940 }, { "epoch": 21.208791208791208, "grad_norm": 0.392578125, "learning_rate": 3.7407914435940105e-05, "loss": 0.0303, "step": 28950 }, { "epoch": 21.216117216117215, "grad_norm": 0.404296875, "learning_rate": 3.73748741104535e-05, "loss": 0.0319, "step": 28960 }, { "epoch": 21.223443223443223, "grad_norm": 0.36328125, "learning_rate": 3.734184007364534e-05, "loss": 0.0329, "step": 28970 }, { "epoch": 21.23076923076923, "grad_norm": 0.37890625, "learning_rate": 3.7308812341343304e-05, "loss": 0.0328, "step": 28980 }, { "epoch": 21.238095238095237, "grad_norm": 0.62109375, "learning_rate": 3.727579092937206e-05, "loss": 0.0406, "step": 28990 }, { "epoch": 21.245421245421245, "grad_norm": 0.39453125, "learning_rate": 3.724277585355326e-05, "loss": 0.0305, "step": 29000 }, { "epoch": 21.252747252747252, "grad_norm": 0.396484375, "learning_rate": 3.720976712970551e-05, "loss": 0.0387, "step": 29010 }, { "epoch": 21.26007326007326, "grad_norm": 0.3671875, "learning_rate": 3.7176764773644354e-05, "loss": 0.0322, "step": 29020 }, { "epoch": 21.267399267399266, "grad_norm": 0.515625, "learning_rate": 3.714376880118232e-05, "loss": 0.0288, "step": 29030 }, { "epoch": 21.274725274725274, "grad_norm": 0.6484375, "learning_rate": 3.7110779228128856e-05, "loss": 0.0369, "step": 29040 }, { "epoch": 21.28205128205128, "grad_norm": 0.421875, "learning_rate": 3.7077796070290346e-05, "loss": 0.0296, "step": 29050 }, { "epoch": 21.28937728937729, "grad_norm": 0.478515625, "learning_rate": 3.7044819343470105e-05, "loss": 0.036, "step": 29060 }, { "epoch": 21.296703296703296, "grad_norm": 0.3671875, "learning_rate": 3.701184906346837e-05, "loss": 0.0318, "step": 29070 }, { "epoch": 21.304029304029303, "grad_norm": 0.6640625, "learning_rate": 3.697888524608228e-05, "loss": 0.0322, "step": 29080 }, { "epoch": 21.31135531135531, "grad_norm": 0.5078125, "learning_rate": 3.6945927907105874e-05, "loss": 0.0388, "step": 29090 }, { "epoch": 21.318681318681318, "grad_norm": 0.46875, "learning_rate": 3.691297706233012e-05, "loss": 0.032, "step": 29100 }, { "epoch": 21.326007326007325, "grad_norm": 0.47265625, "learning_rate": 3.688003272754282e-05, "loss": 0.0392, "step": 29110 }, { "epoch": 21.333333333333332, "grad_norm": 0.40234375, "learning_rate": 3.684709491852869e-05, "loss": 0.0309, "step": 29120 }, { "epoch": 21.34065934065934, "grad_norm": 0.412109375, "learning_rate": 3.681416365106933e-05, "loss": 0.0299, "step": 29130 }, { "epoch": 21.347985347985347, "grad_norm": 0.6328125, "learning_rate": 3.67812389409432e-05, "loss": 0.0326, "step": 29140 }, { "epoch": 21.355311355311354, "grad_norm": 0.546875, "learning_rate": 3.674832080392557e-05, "loss": 0.029, "step": 29150 }, { "epoch": 21.36263736263736, "grad_norm": 0.41796875, "learning_rate": 3.671540925578862e-05, "loss": 0.0323, "step": 29160 }, { "epoch": 21.36996336996337, "grad_norm": 0.6171875, "learning_rate": 3.6682504312301364e-05, "loss": 0.0328, "step": 29170 }, { "epoch": 21.377289377289376, "grad_norm": 0.443359375, "learning_rate": 3.6649605989229616e-05, "loss": 0.0343, "step": 29180 }, { "epoch": 21.384615384615383, "grad_norm": 0.365234375, "learning_rate": 3.661671430233606e-05, "loss": 0.0292, "step": 29190 }, { "epoch": 21.39194139194139, "grad_norm": 0.37890625, "learning_rate": 3.658382926738017e-05, "loss": 0.0366, "step": 29200 }, { "epoch": 21.399267399267398, "grad_norm": 0.431640625, "learning_rate": 3.655095090011826e-05, "loss": 0.0332, "step": 29210 }, { "epoch": 21.406593406593405, "grad_norm": 0.6875, "learning_rate": 3.6518079216303424e-05, "loss": 0.0398, "step": 29220 }, { "epoch": 21.413919413919412, "grad_norm": 0.43359375, "learning_rate": 3.648521423168557e-05, "loss": 0.0305, "step": 29230 }, { "epoch": 21.42124542124542, "grad_norm": 0.375, "learning_rate": 3.645235596201138e-05, "loss": 0.0299, "step": 29240 }, { "epoch": 21.428571428571427, "grad_norm": 0.34375, "learning_rate": 3.6419504423024344e-05, "loss": 0.0301, "step": 29250 }, { "epoch": 21.435897435897434, "grad_norm": 0.4765625, "learning_rate": 3.638665963046471e-05, "loss": 0.0358, "step": 29260 }, { "epoch": 21.44322344322344, "grad_norm": 0.71484375, "learning_rate": 3.63538216000695e-05, "loss": 0.0338, "step": 29270 }, { "epoch": 21.45054945054945, "grad_norm": 0.408203125, "learning_rate": 3.632099034757246e-05, "loss": 0.0295, "step": 29280 }, { "epoch": 21.457875457875456, "grad_norm": 0.33984375, "learning_rate": 3.628816588870414e-05, "loss": 0.0315, "step": 29290 }, { "epoch": 21.465201465201464, "grad_norm": 0.408203125, "learning_rate": 3.625534823919184e-05, "loss": 0.03, "step": 29300 }, { "epoch": 21.47252747252747, "grad_norm": 0.380859375, "learning_rate": 3.622253741475954e-05, "loss": 0.0326, "step": 29310 }, { "epoch": 21.479853479853478, "grad_norm": 0.443359375, "learning_rate": 3.618973343112798e-05, "loss": 0.0319, "step": 29320 }, { "epoch": 21.487179487179485, "grad_norm": 0.50390625, "learning_rate": 3.6156936304014644e-05, "loss": 0.0366, "step": 29330 }, { "epoch": 21.494505494505496, "grad_norm": 0.6015625, "learning_rate": 3.612414604913372e-05, "loss": 0.0307, "step": 29340 }, { "epoch": 21.501831501831504, "grad_norm": 0.349609375, "learning_rate": 3.6091362682196054e-05, "loss": 0.036, "step": 29350 }, { "epoch": 21.50915750915751, "grad_norm": 0.35546875, "learning_rate": 3.605858621890928e-05, "loss": 0.0322, "step": 29360 }, { "epoch": 21.516483516483518, "grad_norm": 0.388671875, "learning_rate": 3.602581667497764e-05, "loss": 0.0299, "step": 29370 }, { "epoch": 21.523809523809526, "grad_norm": 0.357421875, "learning_rate": 3.5993054066102126e-05, "loss": 0.0352, "step": 29380 }, { "epoch": 21.531135531135533, "grad_norm": 0.455078125, "learning_rate": 3.596029840798036e-05, "loss": 0.0316, "step": 29390 }, { "epoch": 21.53846153846154, "grad_norm": 0.5078125, "learning_rate": 3.592754971630668e-05, "loss": 0.0365, "step": 29400 }, { "epoch": 21.545787545787547, "grad_norm": 0.37109375, "learning_rate": 3.589480800677201e-05, "loss": 0.0327, "step": 29410 }, { "epoch": 21.553113553113555, "grad_norm": 0.416015625, "learning_rate": 3.586207329506401e-05, "loss": 0.0318, "step": 29420 }, { "epoch": 21.560439560439562, "grad_norm": 0.81640625, "learning_rate": 3.5829345596866946e-05, "loss": 0.035, "step": 29430 }, { "epoch": 21.56776556776557, "grad_norm": 0.26171875, "learning_rate": 3.5796624927861745e-05, "loss": 0.0297, "step": 29440 }, { "epoch": 21.575091575091577, "grad_norm": 0.453125, "learning_rate": 3.5763911303725925e-05, "loss": 0.0333, "step": 29450 }, { "epoch": 21.582417582417584, "grad_norm": 0.53515625, "learning_rate": 3.573120474013369e-05, "loss": 0.0348, "step": 29460 }, { "epoch": 21.58974358974359, "grad_norm": 0.5625, "learning_rate": 3.56985052527558e-05, "loss": 0.0316, "step": 29470 }, { "epoch": 21.5970695970696, "grad_norm": 0.404296875, "learning_rate": 3.5665812857259684e-05, "loss": 0.0343, "step": 29480 }, { "epoch": 21.604395604395606, "grad_norm": 0.515625, "learning_rate": 3.5633127569309306e-05, "loss": 0.0302, "step": 29490 }, { "epoch": 21.611721611721613, "grad_norm": 0.3984375, "learning_rate": 3.560044940456528e-05, "loss": 0.0299, "step": 29500 }, { "epoch": 21.61904761904762, "grad_norm": 0.66796875, "learning_rate": 3.5567778378684776e-05, "loss": 0.0392, "step": 29510 }, { "epoch": 21.626373626373628, "grad_norm": 0.328125, "learning_rate": 3.5535114507321584e-05, "loss": 0.0347, "step": 29520 }, { "epoch": 21.633699633699635, "grad_norm": 0.322265625, "learning_rate": 3.5502457806126e-05, "loss": 0.0316, "step": 29530 }, { "epoch": 21.641025641025642, "grad_norm": 0.40234375, "learning_rate": 3.546980829074493e-05, "loss": 0.0316, "step": 29540 }, { "epoch": 21.64835164835165, "grad_norm": 0.326171875, "learning_rate": 3.543716597682183e-05, "loss": 0.0306, "step": 29550 }, { "epoch": 21.655677655677657, "grad_norm": 0.5078125, "learning_rate": 3.54045308799967e-05, "loss": 0.0366, "step": 29560 }, { "epoch": 21.663003663003664, "grad_norm": 0.416015625, "learning_rate": 3.537190301590611e-05, "loss": 0.03, "step": 29570 }, { "epoch": 21.67032967032967, "grad_norm": 0.80859375, "learning_rate": 3.533928240018311e-05, "loss": 0.0335, "step": 29580 }, { "epoch": 21.67765567765568, "grad_norm": 0.470703125, "learning_rate": 3.530666904845731e-05, "loss": 0.036, "step": 29590 }, { "epoch": 21.684981684981686, "grad_norm": 0.5390625, "learning_rate": 3.527406297635485e-05, "loss": 0.0376, "step": 29600 }, { "epoch": 21.692307692307693, "grad_norm": 0.388671875, "learning_rate": 3.524146419949838e-05, "loss": 0.0338, "step": 29610 }, { "epoch": 21.6996336996337, "grad_norm": 0.3671875, "learning_rate": 3.5208872733507016e-05, "loss": 0.0303, "step": 29620 }, { "epoch": 21.706959706959708, "grad_norm": 0.46875, "learning_rate": 3.517628859399641e-05, "loss": 0.0308, "step": 29630 }, { "epoch": 21.714285714285715, "grad_norm": 0.435546875, "learning_rate": 3.514371179657869e-05, "loss": 0.0312, "step": 29640 }, { "epoch": 21.721611721611723, "grad_norm": 0.490234375, "learning_rate": 3.5111142356862494e-05, "loss": 0.0343, "step": 29650 }, { "epoch": 21.72893772893773, "grad_norm": 0.439453125, "learning_rate": 3.507858029045287e-05, "loss": 0.0325, "step": 29660 }, { "epoch": 21.736263736263737, "grad_norm": 0.322265625, "learning_rate": 3.5046025612951385e-05, "loss": 0.035, "step": 29670 }, { "epoch": 21.743589743589745, "grad_norm": 0.392578125, "learning_rate": 3.501347833995606e-05, "loss": 0.0333, "step": 29680 }, { "epoch": 21.750915750915752, "grad_norm": 0.6953125, "learning_rate": 3.498093848706136e-05, "loss": 0.0296, "step": 29690 }, { "epoch": 21.75824175824176, "grad_norm": 0.341796875, "learning_rate": 3.4948406069858184e-05, "loss": 0.0308, "step": 29700 }, { "epoch": 21.765567765567766, "grad_norm": 0.4296875, "learning_rate": 3.491588110393391e-05, "loss": 0.0329, "step": 29710 }, { "epoch": 21.772893772893774, "grad_norm": 0.373046875, "learning_rate": 3.488336360487228e-05, "loss": 0.0338, "step": 29720 }, { "epoch": 21.78021978021978, "grad_norm": 0.609375, "learning_rate": 3.485085358825351e-05, "loss": 0.0352, "step": 29730 }, { "epoch": 21.78754578754579, "grad_norm": 0.62890625, "learning_rate": 3.481835106965423e-05, "loss": 0.0353, "step": 29740 }, { "epoch": 21.794871794871796, "grad_norm": 0.30078125, "learning_rate": 3.4785856064647447e-05, "loss": 0.0303, "step": 29750 }, { "epoch": 21.802197802197803, "grad_norm": 0.482421875, "learning_rate": 3.4753368588802584e-05, "loss": 0.0363, "step": 29760 }, { "epoch": 21.80952380952381, "grad_norm": 0.3828125, "learning_rate": 3.472088865768546e-05, "loss": 0.0327, "step": 29770 }, { "epoch": 21.816849816849818, "grad_norm": 0.7421875, "learning_rate": 3.46884162868583e-05, "loss": 0.0332, "step": 29780 }, { "epoch": 21.824175824175825, "grad_norm": 0.76953125, "learning_rate": 3.465595149187963e-05, "loss": 0.0352, "step": 29790 }, { "epoch": 21.831501831501832, "grad_norm": 0.474609375, "learning_rate": 3.462349428830444e-05, "loss": 0.0367, "step": 29800 }, { "epoch": 21.83882783882784, "grad_norm": 0.4609375, "learning_rate": 3.459104469168401e-05, "loss": 0.0359, "step": 29810 }, { "epoch": 21.846153846153847, "grad_norm": 0.50390625, "learning_rate": 3.455860271756604e-05, "loss": 0.037, "step": 29820 }, { "epoch": 21.853479853479854, "grad_norm": 0.5859375, "learning_rate": 3.452616838149451e-05, "loss": 0.0309, "step": 29830 }, { "epoch": 21.86080586080586, "grad_norm": 0.5, "learning_rate": 3.449374169900981e-05, "loss": 0.0346, "step": 29840 }, { "epoch": 21.86813186813187, "grad_norm": 0.4609375, "learning_rate": 3.44613226856486e-05, "loss": 0.0337, "step": 29850 }, { "epoch": 21.875457875457876, "grad_norm": 1.03125, "learning_rate": 3.44289113569439e-05, "loss": 0.0351, "step": 29860 }, { "epoch": 21.882783882783883, "grad_norm": 0.55859375, "learning_rate": 3.4396507728425046e-05, "loss": 0.0394, "step": 29870 }, { "epoch": 21.89010989010989, "grad_norm": 0.2890625, "learning_rate": 3.436411181561769e-05, "loss": 0.0301, "step": 29880 }, { "epoch": 21.897435897435898, "grad_norm": 0.3515625, "learning_rate": 3.433172363404375e-05, "loss": 0.0295, "step": 29890 }, { "epoch": 21.904761904761905, "grad_norm": 0.46875, "learning_rate": 3.429934319922151e-05, "loss": 0.0361, "step": 29900 }, { "epoch": 21.912087912087912, "grad_norm": 0.365234375, "learning_rate": 3.4266970526665436e-05, "loss": 0.0313, "step": 29910 }, { "epoch": 21.91941391941392, "grad_norm": 0.37109375, "learning_rate": 3.423460563188638e-05, "loss": 0.0294, "step": 29920 }, { "epoch": 21.926739926739927, "grad_norm": 0.43359375, "learning_rate": 3.4202248530391405e-05, "loss": 0.0351, "step": 29930 }, { "epoch": 21.934065934065934, "grad_norm": 0.41796875, "learning_rate": 3.416989923768389e-05, "loss": 0.0312, "step": 29940 }, { "epoch": 21.94139194139194, "grad_norm": 0.52734375, "learning_rate": 3.413755776926341e-05, "loss": 0.031, "step": 29950 }, { "epoch": 21.94871794871795, "grad_norm": 0.373046875, "learning_rate": 3.410522414062584e-05, "loss": 0.0359, "step": 29960 }, { "epoch": 21.956043956043956, "grad_norm": 0.47265625, "learning_rate": 3.407289836726328e-05, "loss": 0.0333, "step": 29970 }, { "epoch": 21.963369963369964, "grad_norm": 0.4140625, "learning_rate": 3.404058046466407e-05, "loss": 0.0323, "step": 29980 }, { "epoch": 21.97069597069597, "grad_norm": 0.4453125, "learning_rate": 3.4008270448312776e-05, "loss": 0.0331, "step": 29990 }, { "epoch": 21.978021978021978, "grad_norm": 0.36328125, "learning_rate": 3.3975968333690175e-05, "loss": 0.0362, "step": 30000 }, { "epoch": 21.985347985347985, "grad_norm": 0.47265625, "learning_rate": 3.394367413627329e-05, "loss": 0.037, "step": 30010 }, { "epoch": 21.992673992673993, "grad_norm": 0.5625, "learning_rate": 3.3911387871535327e-05, "loss": 0.0338, "step": 30020 }, { "epoch": 22.0, "grad_norm": 0.388671875, "learning_rate": 3.3879109554945694e-05, "loss": 0.0346, "step": 30030 }, { "epoch": 22.007326007326007, "grad_norm": 0.5234375, "learning_rate": 3.3846839201969974e-05, "loss": 0.0345, "step": 30040 }, { "epoch": 22.014652014652015, "grad_norm": 0.40234375, "learning_rate": 3.381457682806996e-05, "loss": 0.0306, "step": 30050 }, { "epoch": 22.021978021978022, "grad_norm": 0.37109375, "learning_rate": 3.3782322448703625e-05, "loss": 0.0301, "step": 30060 }, { "epoch": 22.02930402930403, "grad_norm": 0.353515625, "learning_rate": 3.375007607932509e-05, "loss": 0.0321, "step": 30070 }, { "epoch": 22.036630036630036, "grad_norm": 0.322265625, "learning_rate": 3.371783773538465e-05, "loss": 0.0275, "step": 30080 }, { "epoch": 22.043956043956044, "grad_norm": 0.474609375, "learning_rate": 3.368560743232876e-05, "loss": 0.0345, "step": 30090 }, { "epoch": 22.05128205128205, "grad_norm": 0.337890625, "learning_rate": 3.36533851856e-05, "loss": 0.0328, "step": 30100 }, { "epoch": 22.05860805860806, "grad_norm": 0.76171875, "learning_rate": 3.362117101063714e-05, "loss": 0.033, "step": 30110 }, { "epoch": 22.065934065934066, "grad_norm": 0.30859375, "learning_rate": 3.358896492287501e-05, "loss": 0.0325, "step": 30120 }, { "epoch": 22.073260073260073, "grad_norm": 0.484375, "learning_rate": 3.355676693774462e-05, "loss": 0.0392, "step": 30130 }, { "epoch": 22.08058608058608, "grad_norm": 0.48828125, "learning_rate": 3.352457707067307e-05, "loss": 0.0355, "step": 30140 }, { "epoch": 22.087912087912088, "grad_norm": 0.5078125, "learning_rate": 3.34923953370836e-05, "loss": 0.0387, "step": 30150 }, { "epoch": 22.095238095238095, "grad_norm": 0.466796875, "learning_rate": 3.346022175239554e-05, "loss": 0.0358, "step": 30160 }, { "epoch": 22.102564102564102, "grad_norm": 0.326171875, "learning_rate": 3.342805633202429e-05, "loss": 0.034, "step": 30170 }, { "epoch": 22.10989010989011, "grad_norm": 0.8671875, "learning_rate": 3.3395899091381344e-05, "loss": 0.039, "step": 30180 }, { "epoch": 22.117216117216117, "grad_norm": 0.40234375, "learning_rate": 3.336375004587431e-05, "loss": 0.034, "step": 30190 }, { "epoch": 22.124542124542124, "grad_norm": 0.41015625, "learning_rate": 3.3331609210906876e-05, "loss": 0.0359, "step": 30200 }, { "epoch": 22.13186813186813, "grad_norm": 0.48828125, "learning_rate": 3.329947660187872e-05, "loss": 0.034, "step": 30210 }, { "epoch": 22.13919413919414, "grad_norm": 0.466796875, "learning_rate": 3.3267352234185665e-05, "loss": 0.0353, "step": 30220 }, { "epoch": 22.146520146520146, "grad_norm": 0.349609375, "learning_rate": 3.323523612321954e-05, "loss": 0.0345, "step": 30230 }, { "epoch": 22.153846153846153, "grad_norm": 0.359375, "learning_rate": 3.320312828436822e-05, "loss": 0.0328, "step": 30240 }, { "epoch": 22.16117216117216, "grad_norm": 0.2431640625, "learning_rate": 3.317102873301564e-05, "loss": 0.0284, "step": 30250 }, { "epoch": 22.168498168498168, "grad_norm": 0.3828125, "learning_rate": 3.3138937484541725e-05, "loss": 0.0339, "step": 30260 }, { "epoch": 22.175824175824175, "grad_norm": 0.318359375, "learning_rate": 3.310685455432247e-05, "loss": 0.0337, "step": 30270 }, { "epoch": 22.183150183150182, "grad_norm": 0.474609375, "learning_rate": 3.307477995772986e-05, "loss": 0.0355, "step": 30280 }, { "epoch": 22.19047619047619, "grad_norm": 0.37109375, "learning_rate": 3.3042713710131854e-05, "loss": 0.0311, "step": 30290 }, { "epoch": 22.197802197802197, "grad_norm": 0.427734375, "learning_rate": 3.301065582689248e-05, "loss": 0.0326, "step": 30300 }, { "epoch": 22.205128205128204, "grad_norm": 0.5078125, "learning_rate": 3.297860632337169e-05, "loss": 0.0366, "step": 30310 }, { "epoch": 22.21245421245421, "grad_norm": 0.6953125, "learning_rate": 3.2946565214925474e-05, "loss": 0.032, "step": 30320 }, { "epoch": 22.21978021978022, "grad_norm": 0.9375, "learning_rate": 3.2914532516905765e-05, "loss": 0.0359, "step": 30330 }, { "epoch": 22.227106227106226, "grad_norm": 0.51953125, "learning_rate": 3.2882508244660504e-05, "loss": 0.0286, "step": 30340 }, { "epoch": 22.234432234432234, "grad_norm": 0.31640625, "learning_rate": 3.2850492413533545e-05, "loss": 0.0305, "step": 30350 }, { "epoch": 22.24175824175824, "grad_norm": 0.55078125, "learning_rate": 3.2818485038864735e-05, "loss": 0.0322, "step": 30360 }, { "epoch": 22.249084249084248, "grad_norm": 0.375, "learning_rate": 3.278648613598985e-05, "loss": 0.0348, "step": 30370 }, { "epoch": 22.256410256410255, "grad_norm": 0.44921875, "learning_rate": 3.275449572024062e-05, "loss": 0.0378, "step": 30380 }, { "epoch": 22.263736263736263, "grad_norm": 0.4765625, "learning_rate": 3.272251380694469e-05, "loss": 0.0344, "step": 30390 }, { "epoch": 22.27106227106227, "grad_norm": 0.4296875, "learning_rate": 3.2690540411425665e-05, "loss": 0.0331, "step": 30400 }, { "epoch": 22.278388278388277, "grad_norm": 0.453125, "learning_rate": 3.265857554900306e-05, "loss": 0.0316, "step": 30410 }, { "epoch": 22.285714285714285, "grad_norm": 0.62890625, "learning_rate": 3.262661923499224e-05, "loss": 0.0325, "step": 30420 }, { "epoch": 22.293040293040292, "grad_norm": 0.455078125, "learning_rate": 3.259467148470456e-05, "loss": 0.0324, "step": 30430 }, { "epoch": 22.3003663003663, "grad_norm": 0.46875, "learning_rate": 3.2562732313447226e-05, "loss": 0.0325, "step": 30440 }, { "epoch": 22.307692307692307, "grad_norm": 0.40625, "learning_rate": 3.253080173652333e-05, "loss": 0.0303, "step": 30450 }, { "epoch": 22.315018315018314, "grad_norm": 0.4609375, "learning_rate": 3.2498879769231884e-05, "loss": 0.0363, "step": 30460 }, { "epoch": 22.32234432234432, "grad_norm": 0.3359375, "learning_rate": 3.246696642686774e-05, "loss": 0.031, "step": 30470 }, { "epoch": 22.32967032967033, "grad_norm": 0.53125, "learning_rate": 3.243506172472162e-05, "loss": 0.0328, "step": 30480 }, { "epoch": 22.336996336996336, "grad_norm": 0.46484375, "learning_rate": 3.240316567808011e-05, "loss": 0.0328, "step": 30490 }, { "epoch": 22.344322344322343, "grad_norm": 0.466796875, "learning_rate": 3.2371278302225665e-05, "loss": 0.0321, "step": 30500 }, { "epoch": 22.35164835164835, "grad_norm": 0.306640625, "learning_rate": 3.233939961243659e-05, "loss": 0.0348, "step": 30510 }, { "epoch": 22.358974358974358, "grad_norm": 0.66015625, "learning_rate": 3.230752962398697e-05, "loss": 0.033, "step": 30520 }, { "epoch": 22.366300366300365, "grad_norm": 0.57421875, "learning_rate": 3.227566835214678e-05, "loss": 0.0404, "step": 30530 }, { "epoch": 22.373626373626372, "grad_norm": 0.474609375, "learning_rate": 3.224381581218182e-05, "loss": 0.0322, "step": 30540 }, { "epoch": 22.38095238095238, "grad_norm": 0.50390625, "learning_rate": 3.2211972019353665e-05, "loss": 0.0349, "step": 30550 }, { "epoch": 22.388278388278387, "grad_norm": 0.369140625, "learning_rate": 3.218013698891971e-05, "loss": 0.0333, "step": 30560 }, { "epoch": 22.395604395604394, "grad_norm": 0.419921875, "learning_rate": 3.21483107361332e-05, "loss": 0.0351, "step": 30570 }, { "epoch": 22.4029304029304, "grad_norm": 0.52734375, "learning_rate": 3.2116493276243096e-05, "loss": 0.0357, "step": 30580 }, { "epoch": 22.41025641025641, "grad_norm": 0.275390625, "learning_rate": 3.2084684624494215e-05, "loss": 0.0321, "step": 30590 }, { "epoch": 22.417582417582416, "grad_norm": 0.3125, "learning_rate": 3.205288479612711e-05, "loss": 0.0361, "step": 30600 }, { "epoch": 22.424908424908423, "grad_norm": 0.41015625, "learning_rate": 3.202109380637814e-05, "loss": 0.0323, "step": 30610 }, { "epoch": 22.43223443223443, "grad_norm": 0.400390625, "learning_rate": 3.198931167047939e-05, "loss": 0.0299, "step": 30620 }, { "epoch": 22.439560439560438, "grad_norm": 0.33984375, "learning_rate": 3.1957538403658746e-05, "loss": 0.0293, "step": 30630 }, { "epoch": 22.446886446886445, "grad_norm": 0.458984375, "learning_rate": 3.1925774021139795e-05, "loss": 0.0281, "step": 30640 }, { "epoch": 22.454212454212453, "grad_norm": 0.337890625, "learning_rate": 3.189401853814193e-05, "loss": 0.0309, "step": 30650 }, { "epoch": 22.46153846153846, "grad_norm": 0.359375, "learning_rate": 3.186227196988021e-05, "loss": 0.0346, "step": 30660 }, { "epoch": 22.468864468864467, "grad_norm": 0.33984375, "learning_rate": 3.183053433156549e-05, "loss": 0.0298, "step": 30670 }, { "epoch": 22.476190476190474, "grad_norm": 0.421875, "learning_rate": 3.1798805638404285e-05, "loss": 0.0336, "step": 30680 }, { "epoch": 22.483516483516482, "grad_norm": 0.369140625, "learning_rate": 3.176708590559886e-05, "loss": 0.0322, "step": 30690 }, { "epoch": 22.49084249084249, "grad_norm": 0.384765625, "learning_rate": 3.1735375148347195e-05, "loss": 0.0317, "step": 30700 }, { "epoch": 22.498168498168496, "grad_norm": 0.466796875, "learning_rate": 3.170367338184293e-05, "loss": 0.0348, "step": 30710 }, { "epoch": 22.505494505494504, "grad_norm": 0.33984375, "learning_rate": 3.167198062127544e-05, "loss": 0.0349, "step": 30720 }, { "epoch": 22.51282051282051, "grad_norm": 0.33984375, "learning_rate": 3.1640296881829764e-05, "loss": 0.0376, "step": 30730 }, { "epoch": 22.520146520146522, "grad_norm": 0.5625, "learning_rate": 3.160862217868662e-05, "loss": 0.0357, "step": 30740 }, { "epoch": 22.52747252747253, "grad_norm": 0.310546875, "learning_rate": 3.1576956527022394e-05, "loss": 0.0341, "step": 30750 }, { "epoch": 22.534798534798536, "grad_norm": 0.326171875, "learning_rate": 3.154529994200914e-05, "loss": 0.0302, "step": 30760 }, { "epoch": 22.542124542124544, "grad_norm": 0.466796875, "learning_rate": 3.1513652438814564e-05, "loss": 0.0412, "step": 30770 }, { "epoch": 22.54945054945055, "grad_norm": 0.40625, "learning_rate": 3.148201403260203e-05, "loss": 0.0303, "step": 30780 }, { "epoch": 22.55677655677656, "grad_norm": 0.625, "learning_rate": 3.1450384738530536e-05, "loss": 0.0363, "step": 30790 }, { "epoch": 22.564102564102566, "grad_norm": 0.30859375, "learning_rate": 3.141876457175471e-05, "loss": 0.0309, "step": 30800 }, { "epoch": 22.571428571428573, "grad_norm": 0.326171875, "learning_rate": 3.1387153547424805e-05, "loss": 0.0291, "step": 30810 }, { "epoch": 22.57875457875458, "grad_norm": 0.396484375, "learning_rate": 3.135555168068671e-05, "loss": 0.0299, "step": 30820 }, { "epoch": 22.586080586080588, "grad_norm": 0.53515625, "learning_rate": 3.132395898668191e-05, "loss": 0.0359, "step": 30830 }, { "epoch": 22.593406593406595, "grad_norm": 0.33984375, "learning_rate": 3.129237548054752e-05, "loss": 0.0362, "step": 30840 }, { "epoch": 22.600732600732602, "grad_norm": 0.34375, "learning_rate": 3.12608011774162e-05, "loss": 0.0342, "step": 30850 }, { "epoch": 22.60805860805861, "grad_norm": 0.4140625, "learning_rate": 3.1229236092416256e-05, "loss": 0.0307, "step": 30860 }, { "epoch": 22.615384615384617, "grad_norm": 0.5078125, "learning_rate": 3.1197680240671544e-05, "loss": 0.0395, "step": 30870 }, { "epoch": 22.622710622710624, "grad_norm": 0.380859375, "learning_rate": 3.116613363730154e-05, "loss": 0.034, "step": 30880 }, { "epoch": 22.63003663003663, "grad_norm": 0.40625, "learning_rate": 3.1134596297421214e-05, "loss": 0.0327, "step": 30890 }, { "epoch": 22.63736263736264, "grad_norm": 0.4140625, "learning_rate": 3.1103068236141174e-05, "loss": 0.0314, "step": 30900 }, { "epoch": 22.644688644688646, "grad_norm": 0.283203125, "learning_rate": 3.1071549468567516e-05, "loss": 0.0325, "step": 30910 }, { "epoch": 22.652014652014653, "grad_norm": 0.35546875, "learning_rate": 3.104004000980197e-05, "loss": 0.0306, "step": 30920 }, { "epoch": 22.65934065934066, "grad_norm": 0.51953125, "learning_rate": 3.100853987494169e-05, "loss": 0.0293, "step": 30930 }, { "epoch": 22.666666666666668, "grad_norm": 0.40234375, "learning_rate": 3.097704907907946e-05, "loss": 0.0302, "step": 30940 }, { "epoch": 22.673992673992675, "grad_norm": 0.59375, "learning_rate": 3.094556763730353e-05, "loss": 0.0376, "step": 30950 }, { "epoch": 22.681318681318682, "grad_norm": 0.44140625, "learning_rate": 3.09140955646977e-05, "loss": 0.0327, "step": 30960 }, { "epoch": 22.68864468864469, "grad_norm": 0.73828125, "learning_rate": 3.08826328763413e-05, "loss": 0.0334, "step": 30970 }, { "epoch": 22.695970695970697, "grad_norm": 0.45703125, "learning_rate": 3.0851179587309094e-05, "loss": 0.0353, "step": 30980 }, { "epoch": 22.703296703296704, "grad_norm": 0.392578125, "learning_rate": 3.0819735712671406e-05, "loss": 0.0351, "step": 30990 }, { "epoch": 22.71062271062271, "grad_norm": 0.458984375, "learning_rate": 3.078830126749403e-05, "loss": 0.0369, "step": 31000 }, { "epoch": 22.71794871794872, "grad_norm": 0.330078125, "learning_rate": 3.075687626683824e-05, "loss": 0.0319, "step": 31010 }, { "epoch": 22.725274725274726, "grad_norm": 0.4140625, "learning_rate": 3.0725460725760767e-05, "loss": 0.0346, "step": 31020 }, { "epoch": 22.732600732600734, "grad_norm": 0.458984375, "learning_rate": 3.069405465931386e-05, "loss": 0.0347, "step": 31030 }, { "epoch": 22.73992673992674, "grad_norm": 0.4609375, "learning_rate": 3.066265808254516e-05, "loss": 0.032, "step": 31040 }, { "epoch": 22.747252747252748, "grad_norm": 0.45703125, "learning_rate": 3.063127101049784e-05, "loss": 0.0314, "step": 31050 }, { "epoch": 22.754578754578755, "grad_norm": 0.50390625, "learning_rate": 3.059989345821043e-05, "loss": 0.034, "step": 31060 }, { "epoch": 22.761904761904763, "grad_norm": 0.54296875, "learning_rate": 3.056852544071698e-05, "loss": 0.0345, "step": 31070 }, { "epoch": 22.76923076923077, "grad_norm": 0.408203125, "learning_rate": 3.0537166973046896e-05, "loss": 0.0333, "step": 31080 }, { "epoch": 22.776556776556777, "grad_norm": 0.37109375, "learning_rate": 3.050581807022509e-05, "loss": 0.0374, "step": 31090 }, { "epoch": 22.783882783882785, "grad_norm": 0.48828125, "learning_rate": 3.047447874727183e-05, "loss": 0.0331, "step": 31100 }, { "epoch": 22.791208791208792, "grad_norm": 0.34375, "learning_rate": 3.0443149019202827e-05, "loss": 0.0398, "step": 31110 }, { "epoch": 22.7985347985348, "grad_norm": 0.291015625, "learning_rate": 3.041182890102916e-05, "loss": 0.032, "step": 31120 }, { "epoch": 22.805860805860807, "grad_norm": 0.65234375, "learning_rate": 3.0380518407757342e-05, "loss": 0.0348, "step": 31130 }, { "epoch": 22.813186813186814, "grad_norm": 0.390625, "learning_rate": 3.0349217554389255e-05, "loss": 0.033, "step": 31140 }, { "epoch": 22.82051282051282, "grad_norm": 0.546875, "learning_rate": 3.031792635592216e-05, "loss": 0.0396, "step": 31150 }, { "epoch": 22.82783882783883, "grad_norm": 0.296875, "learning_rate": 3.0286644827348697e-05, "loss": 0.0343, "step": 31160 }, { "epoch": 22.835164835164836, "grad_norm": 0.375, "learning_rate": 3.02553729836569e-05, "loss": 0.0302, "step": 31170 }, { "epoch": 22.842490842490843, "grad_norm": 0.388671875, "learning_rate": 3.0224110839830082e-05, "loss": 0.0324, "step": 31180 }, { "epoch": 22.84981684981685, "grad_norm": 0.3828125, "learning_rate": 3.0192858410846997e-05, "loss": 0.031, "step": 31190 }, { "epoch": 22.857142857142858, "grad_norm": 0.388671875, "learning_rate": 3.0161615711681718e-05, "loss": 0.0354, "step": 31200 }, { "epoch": 22.864468864468865, "grad_norm": 0.36328125, "learning_rate": 3.0130382757303616e-05, "loss": 0.0288, "step": 31210 }, { "epoch": 22.871794871794872, "grad_norm": 0.427734375, "learning_rate": 3.0099159562677437e-05, "loss": 0.0372, "step": 31220 }, { "epoch": 22.87912087912088, "grad_norm": 0.35546875, "learning_rate": 3.0067946142763236e-05, "loss": 0.0287, "step": 31230 }, { "epoch": 22.886446886446887, "grad_norm": 0.3984375, "learning_rate": 3.0036742512516412e-05, "loss": 0.0343, "step": 31240 }, { "epoch": 22.893772893772894, "grad_norm": 0.44921875, "learning_rate": 3.000554868688762e-05, "loss": 0.0312, "step": 31250 }, { "epoch": 22.9010989010989, "grad_norm": 0.322265625, "learning_rate": 2.997436468082284e-05, "loss": 0.0286, "step": 31260 }, { "epoch": 22.90842490842491, "grad_norm": 0.578125, "learning_rate": 2.9943190509263375e-05, "loss": 0.033, "step": 31270 }, { "epoch": 22.915750915750916, "grad_norm": 0.55078125, "learning_rate": 2.9912026187145786e-05, "loss": 0.0339, "step": 31280 }, { "epoch": 22.923076923076923, "grad_norm": 0.61328125, "learning_rate": 2.9880871729401922e-05, "loss": 0.0351, "step": 31290 }, { "epoch": 22.93040293040293, "grad_norm": 0.64453125, "learning_rate": 2.98497271509589e-05, "loss": 0.0321, "step": 31300 }, { "epoch": 22.937728937728938, "grad_norm": 0.310546875, "learning_rate": 2.981859246673912e-05, "loss": 0.0293, "step": 31310 }, { "epoch": 22.945054945054945, "grad_norm": 0.75390625, "learning_rate": 2.9787467691660225e-05, "loss": 0.0373, "step": 31320 }, { "epoch": 22.952380952380953, "grad_norm": 0.435546875, "learning_rate": 2.9756352840635095e-05, "loss": 0.0274, "step": 31330 }, { "epoch": 22.95970695970696, "grad_norm": 0.4453125, "learning_rate": 2.972524792857191e-05, "loss": 0.0321, "step": 31340 }, { "epoch": 22.967032967032967, "grad_norm": 0.578125, "learning_rate": 2.969415297037402e-05, "loss": 0.0298, "step": 31350 }, { "epoch": 22.974358974358974, "grad_norm": 0.384765625, "learning_rate": 2.9663067980940085e-05, "loss": 0.0342, "step": 31360 }, { "epoch": 22.98168498168498, "grad_norm": 0.37109375, "learning_rate": 2.9631992975163914e-05, "loss": 0.0323, "step": 31370 }, { "epoch": 22.98901098901099, "grad_norm": 0.396484375, "learning_rate": 2.9600927967934544e-05, "loss": 0.0306, "step": 31380 }, { "epoch": 22.996336996336996, "grad_norm": 0.46875, "learning_rate": 2.956987297413628e-05, "loss": 0.0307, "step": 31390 }, { "epoch": 23.003663003663004, "grad_norm": 0.37890625, "learning_rate": 2.9538828008648568e-05, "loss": 0.0315, "step": 31400 }, { "epoch": 23.01098901098901, "grad_norm": 0.365234375, "learning_rate": 2.9507793086346057e-05, "loss": 0.0363, "step": 31410 }, { "epoch": 23.01831501831502, "grad_norm": 0.435546875, "learning_rate": 2.9476768222098616e-05, "loss": 0.0295, "step": 31420 }, { "epoch": 23.025641025641026, "grad_norm": 0.326171875, "learning_rate": 2.944575343077129e-05, "loss": 0.0309, "step": 31430 }, { "epoch": 23.032967032967033, "grad_norm": 0.404296875, "learning_rate": 2.9414748727224246e-05, "loss": 0.0327, "step": 31440 }, { "epoch": 23.04029304029304, "grad_norm": 0.412109375, "learning_rate": 2.9383754126312872e-05, "loss": 0.0344, "step": 31450 }, { "epoch": 23.047619047619047, "grad_norm": 0.330078125, "learning_rate": 2.9352769642887683e-05, "loss": 0.0311, "step": 31460 }, { "epoch": 23.054945054945055, "grad_norm": 0.375, "learning_rate": 2.9321795291794398e-05, "loss": 0.0347, "step": 31470 }, { "epoch": 23.062271062271062, "grad_norm": 0.77734375, "learning_rate": 2.9290831087873792e-05, "loss": 0.0315, "step": 31480 }, { "epoch": 23.06959706959707, "grad_norm": 0.41796875, "learning_rate": 2.9259877045961893e-05, "loss": 0.036, "step": 31490 }, { "epoch": 23.076923076923077, "grad_norm": 0.55078125, "learning_rate": 2.9228933180889762e-05, "loss": 0.0362, "step": 31500 }, { "epoch": 23.084249084249084, "grad_norm": 0.328125, "learning_rate": 2.9197999507483596e-05, "loss": 0.0363, "step": 31510 }, { "epoch": 23.09157509157509, "grad_norm": 0.6796875, "learning_rate": 2.9167076040564796e-05, "loss": 0.0303, "step": 31520 }, { "epoch": 23.0989010989011, "grad_norm": 0.51953125, "learning_rate": 2.913616279494977e-05, "loss": 0.037, "step": 31530 }, { "epoch": 23.106227106227106, "grad_norm": 0.302734375, "learning_rate": 2.9105259785450045e-05, "loss": 0.0313, "step": 31540 }, { "epoch": 23.113553113553113, "grad_norm": 0.73046875, "learning_rate": 2.9074367026872324e-05, "loss": 0.0314, "step": 31550 }, { "epoch": 23.12087912087912, "grad_norm": 0.486328125, "learning_rate": 2.9043484534018312e-05, "loss": 0.0331, "step": 31560 }, { "epoch": 23.128205128205128, "grad_norm": 0.53515625, "learning_rate": 2.9012612321684824e-05, "loss": 0.0377, "step": 31570 }, { "epoch": 23.135531135531135, "grad_norm": 0.376953125, "learning_rate": 2.898175040466372e-05, "loss": 0.0326, "step": 31580 }, { "epoch": 23.142857142857142, "grad_norm": 0.5390625, "learning_rate": 2.8950898797742025e-05, "loss": 0.03, "step": 31590 }, { "epoch": 23.15018315018315, "grad_norm": 0.53515625, "learning_rate": 2.892005751570171e-05, "loss": 0.0432, "step": 31600 }, { "epoch": 23.157509157509157, "grad_norm": 0.31640625, "learning_rate": 2.8889226573319826e-05, "loss": 0.0348, "step": 31610 }, { "epoch": 23.164835164835164, "grad_norm": 0.345703125, "learning_rate": 2.8858405985368547e-05, "loss": 0.0308, "step": 31620 }, { "epoch": 23.17216117216117, "grad_norm": 0.341796875, "learning_rate": 2.8827595766615e-05, "loss": 0.0316, "step": 31630 }, { "epoch": 23.17948717948718, "grad_norm": 0.609375, "learning_rate": 2.879679593182134e-05, "loss": 0.0357, "step": 31640 }, { "epoch": 23.186813186813186, "grad_norm": 0.546875, "learning_rate": 2.8766006495744835e-05, "loss": 0.0333, "step": 31650 }, { "epoch": 23.194139194139193, "grad_norm": 0.451171875, "learning_rate": 2.8735227473137676e-05, "loss": 0.0328, "step": 31660 }, { "epoch": 23.2014652014652, "grad_norm": 0.423828125, "learning_rate": 2.8704458878747102e-05, "loss": 0.0334, "step": 31670 }, { "epoch": 23.208791208791208, "grad_norm": 0.376953125, "learning_rate": 2.867370072731541e-05, "loss": 0.0302, "step": 31680 }, { "epoch": 23.216117216117215, "grad_norm": 0.3125, "learning_rate": 2.8642953033579752e-05, "loss": 0.0367, "step": 31690 }, { "epoch": 23.223443223443223, "grad_norm": 0.32421875, "learning_rate": 2.861221581227242e-05, "loss": 0.0319, "step": 31700 }, { "epoch": 23.23076923076923, "grad_norm": 0.41796875, "learning_rate": 2.858148907812059e-05, "loss": 0.0353, "step": 31710 }, { "epoch": 23.238095238095237, "grad_norm": 0.421875, "learning_rate": 2.855077284584648e-05, "loss": 0.0387, "step": 31720 }, { "epoch": 23.245421245421245, "grad_norm": 0.625, "learning_rate": 2.8520067130167237e-05, "loss": 0.0329, "step": 31730 }, { "epoch": 23.252747252747252, "grad_norm": 0.267578125, "learning_rate": 2.8489371945794945e-05, "loss": 0.031, "step": 31740 }, { "epoch": 23.26007326007326, "grad_norm": 0.361328125, "learning_rate": 2.8458687307436727e-05, "loss": 0.0322, "step": 31750 }, { "epoch": 23.267399267399266, "grad_norm": 0.451171875, "learning_rate": 2.842801322979456e-05, "loss": 0.0337, "step": 31760 }, { "epoch": 23.274725274725274, "grad_norm": 0.37890625, "learning_rate": 2.839734972756539e-05, "loss": 0.0321, "step": 31770 }, { "epoch": 23.28205128205128, "grad_norm": 0.484375, "learning_rate": 2.8366696815441163e-05, "loss": 0.0429, "step": 31780 }, { "epoch": 23.28937728937729, "grad_norm": 0.65625, "learning_rate": 2.8336054508108633e-05, "loss": 0.0312, "step": 31790 }, { "epoch": 23.296703296703296, "grad_norm": 0.484375, "learning_rate": 2.8305422820249584e-05, "loss": 0.0332, "step": 31800 }, { "epoch": 23.304029304029303, "grad_norm": 0.271484375, "learning_rate": 2.8274801766540665e-05, "loss": 0.0296, "step": 31810 }, { "epoch": 23.31135531135531, "grad_norm": 0.404296875, "learning_rate": 2.824419136165336e-05, "loss": 0.0341, "step": 31820 }, { "epoch": 23.318681318681318, "grad_norm": 0.42578125, "learning_rate": 2.8213591620254186e-05, "loss": 0.0313, "step": 31830 }, { "epoch": 23.326007326007325, "grad_norm": 0.353515625, "learning_rate": 2.8183002557004434e-05, "loss": 0.0347, "step": 31840 }, { "epoch": 23.333333333333332, "grad_norm": 0.349609375, "learning_rate": 2.8152424186560383e-05, "loss": 0.0344, "step": 31850 }, { "epoch": 23.34065934065934, "grad_norm": 0.61328125, "learning_rate": 2.8121856523573097e-05, "loss": 0.0346, "step": 31860 }, { "epoch": 23.347985347985347, "grad_norm": 0.37890625, "learning_rate": 2.809129958268853e-05, "loss": 0.0353, "step": 31870 }, { "epoch": 23.355311355311354, "grad_norm": 0.51171875, "learning_rate": 2.806075337854756e-05, "loss": 0.029, "step": 31880 }, { "epoch": 23.36263736263736, "grad_norm": 0.412109375, "learning_rate": 2.8030217925785857e-05, "loss": 0.0318, "step": 31890 }, { "epoch": 23.36996336996337, "grad_norm": 0.458984375, "learning_rate": 2.7999693239033938e-05, "loss": 0.0363, "step": 31900 }, { "epoch": 23.377289377289376, "grad_norm": 0.419921875, "learning_rate": 2.7969179332917212e-05, "loss": 0.0303, "step": 31910 }, { "epoch": 23.384615384615383, "grad_norm": 0.326171875, "learning_rate": 2.7938676222055865e-05, "loss": 0.0311, "step": 31920 }, { "epoch": 23.39194139194139, "grad_norm": 0.36328125, "learning_rate": 2.7908183921064967e-05, "loss": 0.032, "step": 31930 }, { "epoch": 23.399267399267398, "grad_norm": 0.41796875, "learning_rate": 2.7877702444554364e-05, "loss": 0.0369, "step": 31940 }, { "epoch": 23.406593406593405, "grad_norm": 0.443359375, "learning_rate": 2.784723180712874e-05, "loss": 0.0312, "step": 31950 }, { "epoch": 23.413919413919412, "grad_norm": 0.4765625, "learning_rate": 2.781677202338756e-05, "loss": 0.0414, "step": 31960 }, { "epoch": 23.42124542124542, "grad_norm": 0.353515625, "learning_rate": 2.778632310792511e-05, "loss": 0.0311, "step": 31970 }, { "epoch": 23.428571428571427, "grad_norm": 0.5390625, "learning_rate": 2.7755885075330484e-05, "loss": 0.0367, "step": 31980 }, { "epoch": 23.435897435897434, "grad_norm": 0.33203125, "learning_rate": 2.772545794018752e-05, "loss": 0.0287, "step": 31990 }, { "epoch": 23.44322344322344, "grad_norm": 0.37890625, "learning_rate": 2.7695041717074854e-05, "loss": 0.0345, "step": 32000 }, { "epoch": 23.45054945054945, "grad_norm": 0.412109375, "learning_rate": 2.766463642056592e-05, "loss": 0.0286, "step": 32010 }, { "epoch": 23.457875457875456, "grad_norm": 0.6796875, "learning_rate": 2.7634242065228864e-05, "loss": 0.0321, "step": 32020 }, { "epoch": 23.465201465201464, "grad_norm": 0.494140625, "learning_rate": 2.760385866562666e-05, "loss": 0.0344, "step": 32030 }, { "epoch": 23.47252747252747, "grad_norm": 0.765625, "learning_rate": 2.7573486236316966e-05, "loss": 0.0321, "step": 32040 }, { "epoch": 23.479853479853478, "grad_norm": 0.45703125, "learning_rate": 2.7543124791852186e-05, "loss": 0.0303, "step": 32050 }, { "epoch": 23.487179487179485, "grad_norm": 0.5390625, "learning_rate": 2.7512774346779545e-05, "loss": 0.0348, "step": 32060 }, { "epoch": 23.494505494505496, "grad_norm": 0.353515625, "learning_rate": 2.7482434915640864e-05, "loss": 0.0291, "step": 32070 }, { "epoch": 23.501831501831504, "grad_norm": 0.34375, "learning_rate": 2.745210651297282e-05, "loss": 0.037, "step": 32080 }, { "epoch": 23.50915750915751, "grad_norm": 0.59765625, "learning_rate": 2.7421789153306715e-05, "loss": 0.0363, "step": 32090 }, { "epoch": 23.516483516483518, "grad_norm": 0.490234375, "learning_rate": 2.7391482851168583e-05, "loss": 0.0317, "step": 32100 }, { "epoch": 23.523809523809526, "grad_norm": 0.51953125, "learning_rate": 2.7361187621079203e-05, "loss": 0.0335, "step": 32110 }, { "epoch": 23.531135531135533, "grad_norm": 0.65234375, "learning_rate": 2.733090347755399e-05, "loss": 0.0402, "step": 32120 }, { "epoch": 23.53846153846154, "grad_norm": 0.302734375, "learning_rate": 2.7300630435103054e-05, "loss": 0.0276, "step": 32130 }, { "epoch": 23.545787545787547, "grad_norm": 0.62890625, "learning_rate": 2.7270368508231242e-05, "loss": 0.0341, "step": 32140 }, { "epoch": 23.553113553113555, "grad_norm": 0.61328125, "learning_rate": 2.7240117711437996e-05, "loss": 0.032, "step": 32150 }, { "epoch": 23.560439560439562, "grad_norm": 0.369140625, "learning_rate": 2.72098780592175e-05, "loss": 0.0337, "step": 32160 }, { "epoch": 23.56776556776557, "grad_norm": 0.79296875, "learning_rate": 2.7179649566058563e-05, "loss": 0.0394, "step": 32170 }, { "epoch": 23.575091575091577, "grad_norm": 0.59765625, "learning_rate": 2.714943224644461e-05, "loss": 0.0359, "step": 32180 }, { "epoch": 23.582417582417584, "grad_norm": 0.60546875, "learning_rate": 2.7119226114853813e-05, "loss": 0.0299, "step": 32190 }, { "epoch": 23.58974358974359, "grad_norm": 0.384765625, "learning_rate": 2.708903118575884e-05, "loss": 0.0284, "step": 32200 }, { "epoch": 23.5970695970696, "grad_norm": 0.478515625, "learning_rate": 2.7058847473627137e-05, "loss": 0.0328, "step": 32210 }, { "epoch": 23.604395604395606, "grad_norm": 0.259765625, "learning_rate": 2.7028674992920688e-05, "loss": 0.0327, "step": 32220 }, { "epoch": 23.611721611721613, "grad_norm": 0.69140625, "learning_rate": 2.69985137580961e-05, "loss": 0.0349, "step": 32230 }, { "epoch": 23.61904761904762, "grad_norm": 0.58203125, "learning_rate": 2.6968363783604643e-05, "loss": 0.0337, "step": 32240 }, { "epoch": 23.626373626373628, "grad_norm": 0.47265625, "learning_rate": 2.6938225083892133e-05, "loss": 0.0355, "step": 32250 }, { "epoch": 23.633699633699635, "grad_norm": 0.66796875, "learning_rate": 2.6908097673399036e-05, "loss": 0.0288, "step": 32260 }, { "epoch": 23.641025641025642, "grad_norm": 0.412109375, "learning_rate": 2.6877981566560363e-05, "loss": 0.0312, "step": 32270 }, { "epoch": 23.64835164835165, "grad_norm": 0.5, "learning_rate": 2.684787677780572e-05, "loss": 0.035, "step": 32280 }, { "epoch": 23.655677655677657, "grad_norm": 0.3359375, "learning_rate": 2.6817783321559337e-05, "loss": 0.0367, "step": 32290 }, { "epoch": 23.663003663003664, "grad_norm": 0.4375, "learning_rate": 2.6787701212239945e-05, "loss": 0.0311, "step": 32300 }, { "epoch": 23.67032967032967, "grad_norm": 0.419921875, "learning_rate": 2.6757630464260864e-05, "loss": 0.0329, "step": 32310 }, { "epoch": 23.67765567765568, "grad_norm": 0.37109375, "learning_rate": 2.6727571092030023e-05, "loss": 0.0307, "step": 32320 }, { "epoch": 23.684981684981686, "grad_norm": 0.41796875, "learning_rate": 2.6697523109949785e-05, "loss": 0.0344, "step": 32330 }, { "epoch": 23.692307692307693, "grad_norm": 0.3984375, "learning_rate": 2.6667486532417175e-05, "loss": 0.034, "step": 32340 }, { "epoch": 23.6996336996337, "grad_norm": 0.42578125, "learning_rate": 2.6637461373823687e-05, "loss": 0.0332, "step": 32350 }, { "epoch": 23.706959706959708, "grad_norm": 0.50390625, "learning_rate": 2.660744764855534e-05, "loss": 0.033, "step": 32360 }, { "epoch": 23.714285714285715, "grad_norm": 0.75, "learning_rate": 2.6577445370992727e-05, "loss": 0.0365, "step": 32370 }, { "epoch": 23.721611721611723, "grad_norm": 0.47265625, "learning_rate": 2.6547454555510902e-05, "loss": 0.0311, "step": 32380 }, { "epoch": 23.72893772893773, "grad_norm": 0.359375, "learning_rate": 2.651747521647946e-05, "loss": 0.0379, "step": 32390 }, { "epoch": 23.736263736263737, "grad_norm": 0.439453125, "learning_rate": 2.6487507368262498e-05, "loss": 0.0376, "step": 32400 }, { "epoch": 23.743589743589745, "grad_norm": 0.71875, "learning_rate": 2.645755102521855e-05, "loss": 0.0318, "step": 32410 }, { "epoch": 23.750915750915752, "grad_norm": 0.376953125, "learning_rate": 2.6427606201700734e-05, "loss": 0.0332, "step": 32420 }, { "epoch": 23.75824175824176, "grad_norm": 0.41796875, "learning_rate": 2.639767291205658e-05, "loss": 0.0352, "step": 32430 }, { "epoch": 23.765567765567766, "grad_norm": 0.57421875, "learning_rate": 2.6367751170628097e-05, "loss": 0.0363, "step": 32440 }, { "epoch": 23.772893772893774, "grad_norm": 0.404296875, "learning_rate": 2.633784099175179e-05, "loss": 0.0319, "step": 32450 }, { "epoch": 23.78021978021978, "grad_norm": 0.3125, "learning_rate": 2.6307942389758573e-05, "loss": 0.0321, "step": 32460 }, { "epoch": 23.78754578754579, "grad_norm": 0.36328125, "learning_rate": 2.6278055378973893e-05, "loss": 0.0324, "step": 32470 }, { "epoch": 23.794871794871796, "grad_norm": 0.376953125, "learning_rate": 2.6248179973717556e-05, "loss": 0.0279, "step": 32480 }, { "epoch": 23.802197802197803, "grad_norm": 0.66015625, "learning_rate": 2.6218316188303894e-05, "loss": 0.0352, "step": 32490 }, { "epoch": 23.80952380952381, "grad_norm": 0.7890625, "learning_rate": 2.6188464037041594e-05, "loss": 0.035, "step": 32500 }, { "epoch": 23.816849816849818, "grad_norm": 0.515625, "learning_rate": 2.6158623534233794e-05, "loss": 0.0338, "step": 32510 }, { "epoch": 23.824175824175825, "grad_norm": 0.5390625, "learning_rate": 2.6128794694178093e-05, "loss": 0.0325, "step": 32520 }, { "epoch": 23.831501831501832, "grad_norm": 0.482421875, "learning_rate": 2.6098977531166446e-05, "loss": 0.0353, "step": 32530 }, { "epoch": 23.83882783882784, "grad_norm": 0.439453125, "learning_rate": 2.606917205948522e-05, "loss": 0.0373, "step": 32540 }, { "epoch": 23.846153846153847, "grad_norm": 0.62890625, "learning_rate": 2.6039378293415218e-05, "loss": 0.0336, "step": 32550 }, { "epoch": 23.853479853479854, "grad_norm": 0.5, "learning_rate": 2.600959624723159e-05, "loss": 0.0332, "step": 32560 }, { "epoch": 23.86080586080586, "grad_norm": 0.41796875, "learning_rate": 2.5979825935203934e-05, "loss": 0.0372, "step": 32570 }, { "epoch": 23.86813186813187, "grad_norm": 0.439453125, "learning_rate": 2.5950067371596143e-05, "loss": 0.0434, "step": 32580 }, { "epoch": 23.875457875457876, "grad_norm": 0.44140625, "learning_rate": 2.592032057066651e-05, "loss": 0.0358, "step": 32590 }, { "epoch": 23.882783882783883, "grad_norm": 0.455078125, "learning_rate": 2.589058554666774e-05, "loss": 0.0382, "step": 32600 }, { "epoch": 23.89010989010989, "grad_norm": 0.4453125, "learning_rate": 2.5860862313846823e-05, "loss": 0.0335, "step": 32610 }, { "epoch": 23.897435897435898, "grad_norm": 0.7109375, "learning_rate": 2.5831150886445178e-05, "loss": 0.0341, "step": 32620 }, { "epoch": 23.904761904761905, "grad_norm": 0.44921875, "learning_rate": 2.5801451278698498e-05, "loss": 0.033, "step": 32630 }, { "epoch": 23.912087912087912, "grad_norm": 0.40234375, "learning_rate": 2.5771763504836822e-05, "loss": 0.0338, "step": 32640 }, { "epoch": 23.91941391941392, "grad_norm": 0.71875, "learning_rate": 2.5742087579084584e-05, "loss": 0.0381, "step": 32650 }, { "epoch": 23.926739926739927, "grad_norm": 0.45703125, "learning_rate": 2.5712423515660462e-05, "loss": 0.0321, "step": 32660 }, { "epoch": 23.934065934065934, "grad_norm": 0.73046875, "learning_rate": 2.5682771328777466e-05, "loss": 0.0356, "step": 32670 }, { "epoch": 23.94139194139194, "grad_norm": 0.431640625, "learning_rate": 2.5653131032642974e-05, "loss": 0.032, "step": 32680 }, { "epoch": 23.94871794871795, "grad_norm": 0.328125, "learning_rate": 2.562350264145859e-05, "loss": 0.0301, "step": 32690 }, { "epoch": 23.956043956043956, "grad_norm": 0.435546875, "learning_rate": 2.559388616942029e-05, "loss": 0.0301, "step": 32700 }, { "epoch": 23.963369963369964, "grad_norm": 0.578125, "learning_rate": 2.556428163071824e-05, "loss": 0.0343, "step": 32710 }, { "epoch": 23.97069597069597, "grad_norm": 0.59765625, "learning_rate": 2.5534689039536985e-05, "loss": 0.0322, "step": 32720 }, { "epoch": 23.978021978021978, "grad_norm": 0.41015625, "learning_rate": 2.55051084100553e-05, "loss": 0.0293, "step": 32730 }, { "epoch": 23.985347985347985, "grad_norm": 0.439453125, "learning_rate": 2.547553975644621e-05, "loss": 0.033, "step": 32740 }, { "epoch": 23.992673992673993, "grad_norm": 0.640625, "learning_rate": 2.5445983092877068e-05, "loss": 0.0342, "step": 32750 }, { "epoch": 24.0, "grad_norm": 0.376953125, "learning_rate": 2.541643843350942e-05, "loss": 0.0331, "step": 32760 }, { "epoch": 24.007326007326007, "grad_norm": 0.7109375, "learning_rate": 2.5386905792499065e-05, "loss": 0.0383, "step": 32770 }, { "epoch": 24.014652014652015, "grad_norm": 0.53125, "learning_rate": 2.5357385183996103e-05, "loss": 0.0355, "step": 32780 }, { "epoch": 24.021978021978022, "grad_norm": 0.474609375, "learning_rate": 2.5327876622144785e-05, "loss": 0.0304, "step": 32790 }, { "epoch": 24.02930402930403, "grad_norm": 0.43359375, "learning_rate": 2.529838012108364e-05, "loss": 0.036, "step": 32800 }, { "epoch": 24.036630036630036, "grad_norm": 0.3828125, "learning_rate": 2.5268895694945438e-05, "loss": 0.0331, "step": 32810 }, { "epoch": 24.043956043956044, "grad_norm": 0.408203125, "learning_rate": 2.523942335785709e-05, "loss": 0.0288, "step": 32820 }, { "epoch": 24.05128205128205, "grad_norm": 0.42578125, "learning_rate": 2.5209963123939822e-05, "loss": 0.0304, "step": 32830 }, { "epoch": 24.05860805860806, "grad_norm": 0.435546875, "learning_rate": 2.5180515007308924e-05, "loss": 0.0328, "step": 32840 }, { "epoch": 24.065934065934066, "grad_norm": 0.453125, "learning_rate": 2.515107902207401e-05, "loss": 0.039, "step": 32850 }, { "epoch": 24.073260073260073, "grad_norm": 0.3828125, "learning_rate": 2.5121655182338805e-05, "loss": 0.0319, "step": 32860 }, { "epoch": 24.08058608058608, "grad_norm": 0.49609375, "learning_rate": 2.5092243502201217e-05, "loss": 0.0332, "step": 32870 }, { "epoch": 24.087912087912088, "grad_norm": 0.3515625, "learning_rate": 2.5062843995753394e-05, "loss": 0.0302, "step": 32880 }, { "epoch": 24.095238095238095, "grad_norm": 0.43359375, "learning_rate": 2.503345667708157e-05, "loss": 0.0345, "step": 32890 }, { "epoch": 24.102564102564102, "grad_norm": 0.423828125, "learning_rate": 2.5004081560266167e-05, "loss": 0.035, "step": 32900 }, { "epoch": 24.10989010989011, "grad_norm": 0.3671875, "learning_rate": 2.49747186593818e-05, "loss": 0.0313, "step": 32910 }, { "epoch": 24.117216117216117, "grad_norm": 0.3671875, "learning_rate": 2.494536798849716e-05, "loss": 0.0312, "step": 32920 }, { "epoch": 24.124542124542124, "grad_norm": 0.42578125, "learning_rate": 2.491602956167516e-05, "loss": 0.0296, "step": 32930 }, { "epoch": 24.13186813186813, "grad_norm": 0.73046875, "learning_rate": 2.4886703392972772e-05, "loss": 0.0336, "step": 32940 }, { "epoch": 24.13919413919414, "grad_norm": 0.431640625, "learning_rate": 2.485738949644114e-05, "loss": 0.0365, "step": 32950 }, { "epoch": 24.146520146520146, "grad_norm": 0.306640625, "learning_rate": 2.4828087886125498e-05, "loss": 0.033, "step": 32960 }, { "epoch": 24.153846153846153, "grad_norm": 0.58984375, "learning_rate": 2.47987985760652e-05, "loss": 0.0373, "step": 32970 }, { "epoch": 24.16117216117216, "grad_norm": 0.60546875, "learning_rate": 2.476952158029376e-05, "loss": 0.0349, "step": 32980 }, { "epoch": 24.168498168498168, "grad_norm": 0.443359375, "learning_rate": 2.4740256912838717e-05, "loss": 0.0278, "step": 32990 }, { "epoch": 24.175824175824175, "grad_norm": 0.498046875, "learning_rate": 2.471100458772171e-05, "loss": 0.0302, "step": 33000 }, { "epoch": 24.183150183150182, "grad_norm": 0.400390625, "learning_rate": 2.4681764618958538e-05, "loss": 0.0337, "step": 33010 }, { "epoch": 24.19047619047619, "grad_norm": 0.44140625, "learning_rate": 2.4652537020559004e-05, "loss": 0.0301, "step": 33020 }, { "epoch": 24.197802197802197, "grad_norm": 0.54296875, "learning_rate": 2.4623321806526994e-05, "loss": 0.0313, "step": 33030 }, { "epoch": 24.205128205128204, "grad_norm": 0.34765625, "learning_rate": 2.4594118990860512e-05, "loss": 0.0327, "step": 33040 }, { "epoch": 24.21245421245421, "grad_norm": 0.31640625, "learning_rate": 2.4564928587551545e-05, "loss": 0.0334, "step": 33050 }, { "epoch": 24.21978021978022, "grad_norm": 0.396484375, "learning_rate": 2.4535750610586207e-05, "loss": 0.0319, "step": 33060 }, { "epoch": 24.227106227106226, "grad_norm": 0.75390625, "learning_rate": 2.450658507394462e-05, "loss": 0.0291, "step": 33070 }, { "epoch": 24.234432234432234, "grad_norm": 0.470703125, "learning_rate": 2.447743199160093e-05, "loss": 0.0307, "step": 33080 }, { "epoch": 24.24175824175824, "grad_norm": 0.3671875, "learning_rate": 2.444829137752335e-05, "loss": 0.0327, "step": 33090 }, { "epoch": 24.249084249084248, "grad_norm": 0.310546875, "learning_rate": 2.4419163245674077e-05, "loss": 0.0324, "step": 33100 }, { "epoch": 24.256410256410255, "grad_norm": 0.4609375, "learning_rate": 2.43900476100094e-05, "loss": 0.0337, "step": 33110 }, { "epoch": 24.263736263736263, "grad_norm": 0.341796875, "learning_rate": 2.436094448447955e-05, "loss": 0.0295, "step": 33120 }, { "epoch": 24.27106227106227, "grad_norm": 0.291015625, "learning_rate": 2.4331853883028772e-05, "loss": 0.0322, "step": 33130 }, { "epoch": 24.278388278388277, "grad_norm": 0.52734375, "learning_rate": 2.430277581959537e-05, "loss": 0.0335, "step": 33140 }, { "epoch": 24.285714285714285, "grad_norm": 0.4296875, "learning_rate": 2.427371030811155e-05, "loss": 0.0358, "step": 33150 }, { "epoch": 24.293040293040292, "grad_norm": 0.408203125, "learning_rate": 2.42446573625036e-05, "loss": 0.0323, "step": 33160 }, { "epoch": 24.3003663003663, "grad_norm": 0.5390625, "learning_rate": 2.421561699669172e-05, "loss": 0.0361, "step": 33170 }, { "epoch": 24.307692307692307, "grad_norm": 0.6328125, "learning_rate": 2.4186589224590078e-05, "loss": 0.0336, "step": 33180 }, { "epoch": 24.315018315018314, "grad_norm": 0.79296875, "learning_rate": 2.415757406010687e-05, "loss": 0.0401, "step": 33190 }, { "epoch": 24.32234432234432, "grad_norm": 0.349609375, "learning_rate": 2.4128571517144194e-05, "loss": 0.035, "step": 33200 }, { "epoch": 24.32967032967033, "grad_norm": 0.330078125, "learning_rate": 2.4099581609598127e-05, "loss": 0.0329, "step": 33210 }, { "epoch": 24.336996336996336, "grad_norm": 0.396484375, "learning_rate": 2.4070604351358672e-05, "loss": 0.0321, "step": 33220 }, { "epoch": 24.344322344322343, "grad_norm": 0.40234375, "learning_rate": 2.404163975630978e-05, "loss": 0.0309, "step": 33230 }, { "epoch": 24.35164835164835, "grad_norm": 0.455078125, "learning_rate": 2.401268783832937e-05, "loss": 0.0302, "step": 33240 }, { "epoch": 24.358974358974358, "grad_norm": 0.31640625, "learning_rate": 2.398374861128923e-05, "loss": 0.0314, "step": 33250 }, { "epoch": 24.366300366300365, "grad_norm": 0.37890625, "learning_rate": 2.395482208905509e-05, "loss": 0.0314, "step": 33260 }, { "epoch": 24.373626373626372, "grad_norm": 0.625, "learning_rate": 2.392590828548662e-05, "loss": 0.0343, "step": 33270 }, { "epoch": 24.38095238095238, "grad_norm": 0.400390625, "learning_rate": 2.389700721443734e-05, "loss": 0.0358, "step": 33280 }, { "epoch": 24.388278388278387, "grad_norm": 0.326171875, "learning_rate": 2.3868118889754733e-05, "loss": 0.0279, "step": 33290 }, { "epoch": 24.395604395604394, "grad_norm": 0.50390625, "learning_rate": 2.3839243325280135e-05, "loss": 0.0347, "step": 33300 }, { "epoch": 24.4029304029304, "grad_norm": 0.375, "learning_rate": 2.3810380534848746e-05, "loss": 0.0331, "step": 33310 }, { "epoch": 24.41025641025641, "grad_norm": 0.404296875, "learning_rate": 2.3781530532289725e-05, "loss": 0.0347, "step": 33320 }, { "epoch": 24.417582417582416, "grad_norm": 0.69921875, "learning_rate": 2.3752693331426026e-05, "loss": 0.0377, "step": 33330 }, { "epoch": 24.424908424908423, "grad_norm": 0.58203125, "learning_rate": 2.3723868946074505e-05, "loss": 0.0316, "step": 33340 }, { "epoch": 24.43223443223443, "grad_norm": 0.318359375, "learning_rate": 2.3695057390045864e-05, "loss": 0.03, "step": 33350 }, { "epoch": 24.439560439560438, "grad_norm": 0.3984375, "learning_rate": 2.366625867714465e-05, "loss": 0.0353, "step": 33360 }, { "epoch": 24.446886446886445, "grad_norm": 0.51953125, "learning_rate": 2.363747282116931e-05, "loss": 0.0422, "step": 33370 }, { "epoch": 24.454212454212453, "grad_norm": 0.578125, "learning_rate": 2.3608699835912056e-05, "loss": 0.0362, "step": 33380 }, { "epoch": 24.46153846153846, "grad_norm": 0.5, "learning_rate": 2.3579939735158998e-05, "loss": 0.0336, "step": 33390 }, { "epoch": 24.468864468864467, "grad_norm": 0.287109375, "learning_rate": 2.355119253269003e-05, "loss": 0.0319, "step": 33400 }, { "epoch": 24.476190476190474, "grad_norm": 0.4375, "learning_rate": 2.3522458242278866e-05, "loss": 0.0296, "step": 33410 }, { "epoch": 24.483516483516482, "grad_norm": 0.73828125, "learning_rate": 2.349373687769308e-05, "loss": 0.034, "step": 33420 }, { "epoch": 24.49084249084249, "grad_norm": 0.380859375, "learning_rate": 2.3465028452694e-05, "loss": 0.0344, "step": 33430 }, { "epoch": 24.498168498168496, "grad_norm": 0.427734375, "learning_rate": 2.3436332981036757e-05, "loss": 0.0314, "step": 33440 }, { "epoch": 24.505494505494504, "grad_norm": 0.3984375, "learning_rate": 2.3407650476470337e-05, "loss": 0.0296, "step": 33450 }, { "epoch": 24.51282051282051, "grad_norm": 0.43359375, "learning_rate": 2.337898095273744e-05, "loss": 0.0341, "step": 33460 }, { "epoch": 24.520146520146522, "grad_norm": 0.458984375, "learning_rate": 2.335032442357458e-05, "loss": 0.0336, "step": 33470 }, { "epoch": 24.52747252747253, "grad_norm": 0.435546875, "learning_rate": 2.3321680902712037e-05, "loss": 0.0315, "step": 33480 }, { "epoch": 24.534798534798536, "grad_norm": 0.353515625, "learning_rate": 2.329305040387385e-05, "loss": 0.034, "step": 33490 }, { "epoch": 24.542124542124544, "grad_norm": 0.53125, "learning_rate": 2.3264432940777863e-05, "loss": 0.0379, "step": 33500 }, { "epoch": 24.54945054945055, "grad_norm": 0.359375, "learning_rate": 2.3235828527135605e-05, "loss": 0.0346, "step": 33510 }, { "epoch": 24.55677655677656, "grad_norm": 0.359375, "learning_rate": 2.3207237176652426e-05, "loss": 0.0295, "step": 33520 }, { "epoch": 24.564102564102566, "grad_norm": 0.5546875, "learning_rate": 2.317865890302736e-05, "loss": 0.0343, "step": 33530 }, { "epoch": 24.571428571428573, "grad_norm": 0.357421875, "learning_rate": 2.3150093719953178e-05, "loss": 0.0334, "step": 33540 }, { "epoch": 24.57875457875458, "grad_norm": 0.4296875, "learning_rate": 2.3121541641116437e-05, "loss": 0.0327, "step": 33550 }, { "epoch": 24.586080586080588, "grad_norm": 0.37109375, "learning_rate": 2.309300268019735e-05, "loss": 0.0313, "step": 33560 }, { "epoch": 24.593406593406595, "grad_norm": 0.376953125, "learning_rate": 2.306447685086986e-05, "loss": 0.03, "step": 33570 }, { "epoch": 24.600732600732602, "grad_norm": 0.3125, "learning_rate": 2.303596416680165e-05, "loss": 0.0364, "step": 33580 }, { "epoch": 24.60805860805861, "grad_norm": 0.640625, "learning_rate": 2.300746464165408e-05, "loss": 0.0348, "step": 33590 }, { "epoch": 24.615384615384617, "grad_norm": 0.357421875, "learning_rate": 2.29789782890822e-05, "loss": 0.0366, "step": 33600 }, { "epoch": 24.622710622710624, "grad_norm": 0.37890625, "learning_rate": 2.2950505122734732e-05, "loss": 0.0342, "step": 33610 }, { "epoch": 24.63003663003663, "grad_norm": 0.52734375, "learning_rate": 2.292204515625415e-05, "loss": 0.0348, "step": 33620 }, { "epoch": 24.63736263736264, "grad_norm": 0.328125, "learning_rate": 2.2893598403276542e-05, "loss": 0.0322, "step": 33630 }, { "epoch": 24.644688644688646, "grad_norm": 0.3125, "learning_rate": 2.2865164877431653e-05, "loss": 0.0336, "step": 33640 }, { "epoch": 24.652014652014653, "grad_norm": 0.72265625, "learning_rate": 2.2836744592342955e-05, "loss": 0.0381, "step": 33650 }, { "epoch": 24.65934065934066, "grad_norm": 0.58203125, "learning_rate": 2.280833756162753e-05, "loss": 0.0321, "step": 33660 }, { "epoch": 24.666666666666668, "grad_norm": 0.306640625, "learning_rate": 2.2779943798896088e-05, "loss": 0.0315, "step": 33670 }, { "epoch": 24.673992673992675, "grad_norm": 0.6328125, "learning_rate": 2.275156331775306e-05, "loss": 0.0311, "step": 33680 }, { "epoch": 24.681318681318682, "grad_norm": 0.453125, "learning_rate": 2.272319613179642e-05, "loss": 0.033, "step": 33690 }, { "epoch": 24.68864468864469, "grad_norm": 0.55859375, "learning_rate": 2.269484225461786e-05, "loss": 0.0326, "step": 33700 }, { "epoch": 24.695970695970697, "grad_norm": 0.4765625, "learning_rate": 2.2666501699802636e-05, "loss": 0.0317, "step": 33710 }, { "epoch": 24.703296703296704, "grad_norm": 0.3515625, "learning_rate": 2.263817448092963e-05, "loss": 0.0306, "step": 33720 }, { "epoch": 24.71062271062271, "grad_norm": 0.4140625, "learning_rate": 2.260986061157135e-05, "loss": 0.0293, "step": 33730 }, { "epoch": 24.71794871794872, "grad_norm": 0.640625, "learning_rate": 2.2581560105293878e-05, "loss": 0.0355, "step": 33740 }, { "epoch": 24.725274725274726, "grad_norm": 0.283203125, "learning_rate": 2.2553272975656947e-05, "loss": 0.0294, "step": 33750 }, { "epoch": 24.732600732600734, "grad_norm": 0.361328125, "learning_rate": 2.2524999236213836e-05, "loss": 0.0334, "step": 33760 }, { "epoch": 24.73992673992674, "grad_norm": 0.38671875, "learning_rate": 2.24967389005114e-05, "loss": 0.0329, "step": 33770 }, { "epoch": 24.747252747252748, "grad_norm": 0.3125, "learning_rate": 2.246849198209014e-05, "loss": 0.0344, "step": 33780 }, { "epoch": 24.754578754578755, "grad_norm": 0.294921875, "learning_rate": 2.244025849448405e-05, "loss": 0.0291, "step": 33790 }, { "epoch": 24.761904761904763, "grad_norm": 0.703125, "learning_rate": 2.2412038451220708e-05, "loss": 0.0391, "step": 33800 }, { "epoch": 24.76923076923077, "grad_norm": 0.421875, "learning_rate": 2.2383831865821292e-05, "loss": 0.0305, "step": 33810 }, { "epoch": 24.776556776556777, "grad_norm": 0.41015625, "learning_rate": 2.2355638751800478e-05, "loss": 0.0319, "step": 33820 }, { "epoch": 24.783882783882785, "grad_norm": 0.734375, "learning_rate": 2.232745912266654e-05, "loss": 0.0323, "step": 33830 }, { "epoch": 24.791208791208792, "grad_norm": 0.328125, "learning_rate": 2.2299292991921246e-05, "loss": 0.0356, "step": 33840 }, { "epoch": 24.7985347985348, "grad_norm": 0.388671875, "learning_rate": 2.2271140373059914e-05, "loss": 0.0341, "step": 33850 }, { "epoch": 24.805860805860807, "grad_norm": 0.6171875, "learning_rate": 2.2243001279571393e-05, "loss": 0.0322, "step": 33860 }, { "epoch": 24.813186813186814, "grad_norm": 0.322265625, "learning_rate": 2.2214875724938018e-05, "loss": 0.0289, "step": 33870 }, { "epoch": 24.82051282051282, "grad_norm": 0.419921875, "learning_rate": 2.2186763722635708e-05, "loss": 0.0318, "step": 33880 }, { "epoch": 24.82783882783883, "grad_norm": 0.40234375, "learning_rate": 2.215866528613382e-05, "loss": 0.0329, "step": 33890 }, { "epoch": 24.835164835164836, "grad_norm": 0.322265625, "learning_rate": 2.2130580428895224e-05, "loss": 0.0339, "step": 33900 }, { "epoch": 24.842490842490843, "grad_norm": 0.37890625, "learning_rate": 2.2102509164376337e-05, "loss": 0.0324, "step": 33910 }, { "epoch": 24.84981684981685, "grad_norm": 0.451171875, "learning_rate": 2.207445150602698e-05, "loss": 0.03, "step": 33920 }, { "epoch": 24.857142857142858, "grad_norm": 0.4140625, "learning_rate": 2.2046407467290532e-05, "loss": 0.0284, "step": 33930 }, { "epoch": 24.864468864468865, "grad_norm": 0.326171875, "learning_rate": 2.20183770616038e-05, "loss": 0.0294, "step": 33940 }, { "epoch": 24.871794871794872, "grad_norm": 0.58984375, "learning_rate": 2.1990360302397053e-05, "loss": 0.0357, "step": 33950 }, { "epoch": 24.87912087912088, "grad_norm": 0.37109375, "learning_rate": 2.1962357203094076e-05, "loss": 0.0315, "step": 33960 }, { "epoch": 24.886446886446887, "grad_norm": 0.66015625, "learning_rate": 2.1934367777112045e-05, "loss": 0.0359, "step": 33970 }, { "epoch": 24.893772893772894, "grad_norm": 0.41796875, "learning_rate": 2.190639203786162e-05, "loss": 0.0324, "step": 33980 }, { "epoch": 24.9010989010989, "grad_norm": 0.345703125, "learning_rate": 2.18784299987469e-05, "loss": 0.0303, "step": 33990 }, { "epoch": 24.90842490842491, "grad_norm": 0.94140625, "learning_rate": 2.1850481673165393e-05, "loss": 0.042, "step": 34000 }, { "epoch": 24.915750915750916, "grad_norm": 0.400390625, "learning_rate": 2.1822547074508086e-05, "loss": 0.0374, "step": 34010 }, { "epoch": 24.923076923076923, "grad_norm": 0.373046875, "learning_rate": 2.1794626216159356e-05, "loss": 0.0374, "step": 34020 }, { "epoch": 24.93040293040293, "grad_norm": 0.396484375, "learning_rate": 2.1766719111496985e-05, "loss": 0.0343, "step": 34030 }, { "epoch": 24.937728937728938, "grad_norm": 0.55859375, "learning_rate": 2.173882577389221e-05, "loss": 0.0325, "step": 34040 }, { "epoch": 24.945054945054945, "grad_norm": 0.4609375, "learning_rate": 2.171094621670961e-05, "loss": 0.0308, "step": 34050 }, { "epoch": 24.952380952380953, "grad_norm": 0.48828125, "learning_rate": 2.168308045330723e-05, "loss": 0.0308, "step": 34060 }, { "epoch": 24.95970695970696, "grad_norm": 0.7421875, "learning_rate": 2.165522849703646e-05, "loss": 0.0379, "step": 34070 }, { "epoch": 24.967032967032967, "grad_norm": 0.50390625, "learning_rate": 2.1627390361242064e-05, "loss": 0.0302, "step": 34080 }, { "epoch": 24.974358974358974, "grad_norm": 0.291015625, "learning_rate": 2.1599566059262246e-05, "loss": 0.0283, "step": 34090 }, { "epoch": 24.98168498168498, "grad_norm": 0.4140625, "learning_rate": 2.1571755604428518e-05, "loss": 0.029, "step": 34100 }, { "epoch": 24.98901098901099, "grad_norm": 0.57421875, "learning_rate": 2.1543959010065784e-05, "loss": 0.0349, "step": 34110 }, { "epoch": 24.996336996336996, "grad_norm": 0.302734375, "learning_rate": 2.1516176289492304e-05, "loss": 0.0338, "step": 34120 }, { "epoch": 25.003663003663004, "grad_norm": 0.57421875, "learning_rate": 2.1488407456019677e-05, "loss": 0.0291, "step": 34130 }, { "epoch": 25.01098901098901, "grad_norm": 0.34375, "learning_rate": 2.146065252295289e-05, "loss": 0.0301, "step": 34140 }, { "epoch": 25.01831501831502, "grad_norm": 0.34375, "learning_rate": 2.1432911503590214e-05, "loss": 0.0379, "step": 34150 }, { "epoch": 25.025641025641026, "grad_norm": 0.353515625, "learning_rate": 2.140518441122331e-05, "loss": 0.0293, "step": 34160 }, { "epoch": 25.032967032967033, "grad_norm": 0.376953125, "learning_rate": 2.1377471259137127e-05, "loss": 0.0323, "step": 34170 }, { "epoch": 25.04029304029304, "grad_norm": 0.33984375, "learning_rate": 2.1349772060609924e-05, "loss": 0.0323, "step": 34180 }, { "epoch": 25.047619047619047, "grad_norm": 0.34375, "learning_rate": 2.1322086828913332e-05, "loss": 0.0318, "step": 34190 }, { "epoch": 25.054945054945055, "grad_norm": 0.50390625, "learning_rate": 2.129441557731224e-05, "loss": 0.0325, "step": 34200 }, { "epoch": 25.062271062271062, "grad_norm": 0.41796875, "learning_rate": 2.126675831906483e-05, "loss": 0.0349, "step": 34210 }, { "epoch": 25.06959706959707, "grad_norm": 0.45703125, "learning_rate": 2.1239115067422643e-05, "loss": 0.0334, "step": 34220 }, { "epoch": 25.076923076923077, "grad_norm": 0.423828125, "learning_rate": 2.1211485835630424e-05, "loss": 0.0307, "step": 34230 }, { "epoch": 25.084249084249084, "grad_norm": 0.4453125, "learning_rate": 2.1183870636926276e-05, "loss": 0.0311, "step": 34240 }, { "epoch": 25.09157509157509, "grad_norm": 0.349609375, "learning_rate": 2.1156269484541536e-05, "loss": 0.033, "step": 34250 }, { "epoch": 25.0989010989011, "grad_norm": 0.44140625, "learning_rate": 2.112868239170081e-05, "loss": 0.0321, "step": 34260 }, { "epoch": 25.106227106227106, "grad_norm": 0.70703125, "learning_rate": 2.1101109371622002e-05, "loss": 0.0377, "step": 34270 }, { "epoch": 25.113553113553113, "grad_norm": 0.6640625, "learning_rate": 2.1073550437516225e-05, "loss": 0.0322, "step": 34280 }, { "epoch": 25.12087912087912, "grad_norm": 0.45703125, "learning_rate": 2.10460056025879e-05, "loss": 0.035, "step": 34290 }, { "epoch": 25.128205128205128, "grad_norm": 0.5859375, "learning_rate": 2.101847488003465e-05, "loss": 0.0306, "step": 34300 }, { "epoch": 25.135531135531135, "grad_norm": 0.578125, "learning_rate": 2.0990958283047316e-05, "loss": 0.0356, "step": 34310 }, { "epoch": 25.142857142857142, "grad_norm": 0.3671875, "learning_rate": 2.0963455824810052e-05, "loss": 0.0332, "step": 34320 }, { "epoch": 25.15018315018315, "grad_norm": 0.453125, "learning_rate": 2.0935967518500157e-05, "loss": 0.0323, "step": 34330 }, { "epoch": 25.157509157509157, "grad_norm": 0.50390625, "learning_rate": 2.0908493377288176e-05, "loss": 0.0332, "step": 34340 }, { "epoch": 25.164835164835164, "grad_norm": 0.81640625, "learning_rate": 2.0881033414337922e-05, "loss": 0.0372, "step": 34350 }, { "epoch": 25.17216117216117, "grad_norm": 0.462890625, "learning_rate": 2.0853587642806284e-05, "loss": 0.0344, "step": 34360 }, { "epoch": 25.17948717948718, "grad_norm": 0.390625, "learning_rate": 2.082615607584349e-05, "loss": 0.0324, "step": 34370 }, { "epoch": 25.186813186813186, "grad_norm": 0.71875, "learning_rate": 2.079873872659287e-05, "loss": 0.035, "step": 34380 }, { "epoch": 25.194139194139193, "grad_norm": 0.427734375, "learning_rate": 2.077133560819101e-05, "loss": 0.0315, "step": 34390 }, { "epoch": 25.2014652014652, "grad_norm": 0.41015625, "learning_rate": 2.0743946733767626e-05, "loss": 0.0335, "step": 34400 }, { "epoch": 25.208791208791208, "grad_norm": 0.7734375, "learning_rate": 2.0716572116445605e-05, "loss": 0.0329, "step": 34410 }, { "epoch": 25.216117216117215, "grad_norm": 0.39453125, "learning_rate": 2.0689211769341064e-05, "loss": 0.0329, "step": 34420 }, { "epoch": 25.223443223443223, "grad_norm": 0.6015625, "learning_rate": 2.066186570556323e-05, "loss": 0.0378, "step": 34430 }, { "epoch": 25.23076923076923, "grad_norm": 0.435546875, "learning_rate": 2.0634533938214466e-05, "loss": 0.0337, "step": 34440 }, { "epoch": 25.238095238095237, "grad_norm": 0.58203125, "learning_rate": 2.060721648039037e-05, "loss": 0.0337, "step": 34450 }, { "epoch": 25.245421245421245, "grad_norm": 0.515625, "learning_rate": 2.0579913345179605e-05, "loss": 0.0301, "step": 34460 }, { "epoch": 25.252747252747252, "grad_norm": 0.49609375, "learning_rate": 2.055262454566398e-05, "loss": 0.0323, "step": 34470 }, { "epoch": 25.26007326007326, "grad_norm": 0.447265625, "learning_rate": 2.052535009491852e-05, "loss": 0.0311, "step": 34480 }, { "epoch": 25.267399267399266, "grad_norm": 0.59765625, "learning_rate": 2.0498090006011226e-05, "loss": 0.0356, "step": 34490 }, { "epoch": 25.274725274725274, "grad_norm": 0.38671875, "learning_rate": 2.0470844292003358e-05, "loss": 0.031, "step": 34500 }, { "epoch": 25.28205128205128, "grad_norm": 0.3984375, "learning_rate": 2.0443612965949195e-05, "loss": 0.031, "step": 34510 }, { "epoch": 25.28937728937729, "grad_norm": 0.4765625, "learning_rate": 2.0416396040896196e-05, "loss": 0.0333, "step": 34520 }, { "epoch": 25.296703296703296, "grad_norm": 0.353515625, "learning_rate": 2.038919352988486e-05, "loss": 0.0313, "step": 34530 }, { "epoch": 25.304029304029303, "grad_norm": 0.45703125, "learning_rate": 2.0362005445948802e-05, "loss": 0.0301, "step": 34540 }, { "epoch": 25.31135531135531, "grad_norm": 0.3515625, "learning_rate": 2.033483180211473e-05, "loss": 0.0341, "step": 34550 }, { "epoch": 25.318681318681318, "grad_norm": 0.36328125, "learning_rate": 2.030767261140244e-05, "loss": 0.0333, "step": 34560 }, { "epoch": 25.326007326007325, "grad_norm": 0.33984375, "learning_rate": 2.028052788682476e-05, "loss": 0.0297, "step": 34570 }, { "epoch": 25.333333333333332, "grad_norm": 0.30078125, "learning_rate": 2.0253397641387652e-05, "loss": 0.0303, "step": 34580 }, { "epoch": 25.34065934065934, "grad_norm": 1.0078125, "learning_rate": 2.0226281888090072e-05, "loss": 0.0384, "step": 34590 }, { "epoch": 25.347985347985347, "grad_norm": 0.2890625, "learning_rate": 2.0199180639924132e-05, "loss": 0.0319, "step": 34600 }, { "epoch": 25.355311355311354, "grad_norm": 0.412109375, "learning_rate": 2.0172093909874844e-05, "loss": 0.0345, "step": 34610 }, { "epoch": 25.36263736263736, "grad_norm": 0.5078125, "learning_rate": 2.0145021710920403e-05, "loss": 0.0343, "step": 34620 }, { "epoch": 25.36996336996337, "grad_norm": 0.39453125, "learning_rate": 2.0117964056031976e-05, "loss": 0.0316, "step": 34630 }, { "epoch": 25.377289377289376, "grad_norm": 0.45703125, "learning_rate": 2.0090920958173755e-05, "loss": 0.0329, "step": 34640 }, { "epoch": 25.384615384615383, "grad_norm": 0.341796875, "learning_rate": 2.006389243030301e-05, "loss": 0.0345, "step": 34650 }, { "epoch": 25.39194139194139, "grad_norm": 0.412109375, "learning_rate": 2.0036878485369978e-05, "loss": 0.0326, "step": 34660 }, { "epoch": 25.399267399267398, "grad_norm": 0.375, "learning_rate": 2.0009879136317903e-05, "loss": 0.0283, "step": 34670 }, { "epoch": 25.406593406593405, "grad_norm": 0.416015625, "learning_rate": 1.9982894396083108e-05, "loss": 0.0326, "step": 34680 }, { "epoch": 25.413919413919412, "grad_norm": 0.39453125, "learning_rate": 1.9955924277594834e-05, "loss": 0.0344, "step": 34690 }, { "epoch": 25.42124542124542, "grad_norm": 0.76171875, "learning_rate": 1.992896879377534e-05, "loss": 0.0336, "step": 34700 }, { "epoch": 25.428571428571427, "grad_norm": 0.46875, "learning_rate": 1.990202795753992e-05, "loss": 0.0296, "step": 34710 }, { "epoch": 25.435897435897434, "grad_norm": 0.419921875, "learning_rate": 1.987510178179677e-05, "loss": 0.0331, "step": 34720 }, { "epoch": 25.44322344322344, "grad_norm": 0.58984375, "learning_rate": 1.9848190279447164e-05, "loss": 0.0332, "step": 34730 }, { "epoch": 25.45054945054945, "grad_norm": 0.60546875, "learning_rate": 1.9821293463385207e-05, "loss": 0.0349, "step": 34740 }, { "epoch": 25.457875457875456, "grad_norm": 0.35546875, "learning_rate": 1.9794411346498114e-05, "loss": 0.0333, "step": 34750 }, { "epoch": 25.465201465201464, "grad_norm": 0.58203125, "learning_rate": 1.9767543941665953e-05, "loss": 0.0317, "step": 34760 }, { "epoch": 25.47252747252747, "grad_norm": 0.447265625, "learning_rate": 1.9740691261761777e-05, "loss": 0.0318, "step": 34770 }, { "epoch": 25.479853479853478, "grad_norm": 0.6953125, "learning_rate": 1.9713853319651606e-05, "loss": 0.0345, "step": 34780 }, { "epoch": 25.487179487179485, "grad_norm": 0.408203125, "learning_rate": 1.9687030128194376e-05, "loss": 0.0345, "step": 34790 }, { "epoch": 25.494505494505496, "grad_norm": 0.734375, "learning_rate": 1.9660221700241934e-05, "loss": 0.0413, "step": 34800 }, { "epoch": 25.501831501831504, "grad_norm": 0.384765625, "learning_rate": 1.963342804863911e-05, "loss": 0.0292, "step": 34810 }, { "epoch": 25.50915750915751, "grad_norm": 0.48046875, "learning_rate": 1.9606649186223592e-05, "loss": 0.0317, "step": 34820 }, { "epoch": 25.516483516483518, "grad_norm": 0.431640625, "learning_rate": 1.957988512582604e-05, "loss": 0.0275, "step": 34830 }, { "epoch": 25.523809523809526, "grad_norm": 0.453125, "learning_rate": 1.955313588026998e-05, "loss": 0.0328, "step": 34840 }, { "epoch": 25.531135531135533, "grad_norm": 0.259765625, "learning_rate": 1.952640146237184e-05, "loss": 0.0306, "step": 34850 }, { "epoch": 25.53846153846154, "grad_norm": 0.41015625, "learning_rate": 1.9499681884941003e-05, "loss": 0.0346, "step": 34860 }, { "epoch": 25.545787545787547, "grad_norm": 0.330078125, "learning_rate": 1.9472977160779625e-05, "loss": 0.0296, "step": 34870 }, { "epoch": 25.553113553113555, "grad_norm": 0.318359375, "learning_rate": 1.9446287302682865e-05, "loss": 0.0307, "step": 34880 }, { "epoch": 25.560439560439562, "grad_norm": 0.53125, "learning_rate": 1.9419612323438703e-05, "loss": 0.0294, "step": 34890 }, { "epoch": 25.56776556776557, "grad_norm": 0.4140625, "learning_rate": 1.9392952235827973e-05, "loss": 0.0291, "step": 34900 }, { "epoch": 25.575091575091577, "grad_norm": 0.376953125, "learning_rate": 1.936630705262443e-05, "loss": 0.0347, "step": 34910 }, { "epoch": 25.582417582417584, "grad_norm": 0.89453125, "learning_rate": 1.9339676786594644e-05, "loss": 0.0374, "step": 34920 }, { "epoch": 25.58974358974359, "grad_norm": 0.421875, "learning_rate": 1.931306145049802e-05, "loss": 0.0369, "step": 34930 }, { "epoch": 25.5970695970696, "grad_norm": 0.3828125, "learning_rate": 1.9286461057086892e-05, "loss": 0.031, "step": 34940 }, { "epoch": 25.604395604395606, "grad_norm": 0.462890625, "learning_rate": 1.9259875619106332e-05, "loss": 0.031, "step": 34950 }, { "epoch": 25.611721611721613, "grad_norm": 0.42578125, "learning_rate": 1.9233305149294336e-05, "loss": 0.0332, "step": 34960 }, { "epoch": 25.61904761904762, "grad_norm": 0.466796875, "learning_rate": 1.920674966038167e-05, "loss": 0.0311, "step": 34970 }, { "epoch": 25.626373626373628, "grad_norm": 0.3828125, "learning_rate": 1.9180209165091916e-05, "loss": 0.0345, "step": 34980 }, { "epoch": 25.633699633699635, "grad_norm": 0.287109375, "learning_rate": 1.9153683676141566e-05, "loss": 0.0309, "step": 34990 }, { "epoch": 25.641025641025642, "grad_norm": 0.4453125, "learning_rate": 1.912717320623976e-05, "loss": 0.0345, "step": 35000 }, { "epoch": 25.64835164835165, "grad_norm": 0.46484375, "learning_rate": 1.9100677768088598e-05, "loss": 0.0308, "step": 35010 }, { "epoch": 25.655677655677657, "grad_norm": 0.359375, "learning_rate": 1.9074197374382897e-05, "loss": 0.0313, "step": 35020 }, { "epoch": 25.663003663003664, "grad_norm": 0.6328125, "learning_rate": 1.9047732037810265e-05, "loss": 0.0376, "step": 35030 }, { "epoch": 25.67032967032967, "grad_norm": 0.3671875, "learning_rate": 1.9021281771051143e-05, "loss": 0.0368, "step": 35040 }, { "epoch": 25.67765567765568, "grad_norm": 0.333984375, "learning_rate": 1.899484658677869e-05, "loss": 0.0368, "step": 35050 }, { "epoch": 25.684981684981686, "grad_norm": 0.400390625, "learning_rate": 1.8968426497658905e-05, "loss": 0.0346, "step": 35060 }, { "epoch": 25.692307692307693, "grad_norm": 0.41015625, "learning_rate": 1.89420215163505e-05, "loss": 0.0355, "step": 35070 }, { "epoch": 25.6996336996337, "grad_norm": 0.453125, "learning_rate": 1.8915631655504958e-05, "loss": 0.032, "step": 35080 }, { "epoch": 25.706959706959708, "grad_norm": 0.66015625, "learning_rate": 1.8889256927766546e-05, "loss": 0.0333, "step": 35090 }, { "epoch": 25.714285714285715, "grad_norm": 0.384765625, "learning_rate": 1.8862897345772263e-05, "loss": 0.0338, "step": 35100 }, { "epoch": 25.721611721611723, "grad_norm": 0.380859375, "learning_rate": 1.883655292215183e-05, "loss": 0.0334, "step": 35110 }, { "epoch": 25.72893772893773, "grad_norm": 0.349609375, "learning_rate": 1.8810223669527745e-05, "loss": 0.033, "step": 35120 }, { "epoch": 25.736263736263737, "grad_norm": 0.3984375, "learning_rate": 1.8783909600515188e-05, "loss": 0.0351, "step": 35130 }, { "epoch": 25.743589743589745, "grad_norm": 0.384765625, "learning_rate": 1.875761072772213e-05, "loss": 0.036, "step": 35140 }, { "epoch": 25.750915750915752, "grad_norm": 0.33203125, "learning_rate": 1.8731327063749202e-05, "loss": 0.0359, "step": 35150 }, { "epoch": 25.75824175824176, "grad_norm": 0.40625, "learning_rate": 1.870505862118976e-05, "loss": 0.0311, "step": 35160 }, { "epoch": 25.765567765567766, "grad_norm": 0.53515625, "learning_rate": 1.8678805412629907e-05, "loss": 0.0387, "step": 35170 }, { "epoch": 25.772893772893774, "grad_norm": 0.8359375, "learning_rate": 1.8652567450648377e-05, "loss": 0.0307, "step": 35180 }, { "epoch": 25.78021978021978, "grad_norm": 0.51953125, "learning_rate": 1.8626344747816683e-05, "loss": 0.0331, "step": 35190 }, { "epoch": 25.78754578754579, "grad_norm": 0.412109375, "learning_rate": 1.860013731669896e-05, "loss": 0.0353, "step": 35200 }, { "epoch": 25.794871794871796, "grad_norm": 0.33203125, "learning_rate": 1.857394516985203e-05, "loss": 0.0338, "step": 35210 }, { "epoch": 25.802197802197803, "grad_norm": 0.32421875, "learning_rate": 1.8547768319825446e-05, "loss": 0.0275, "step": 35220 }, { "epoch": 25.80952380952381, "grad_norm": 0.4453125, "learning_rate": 1.8521606779161375e-05, "loss": 0.0275, "step": 35230 }, { "epoch": 25.816849816849818, "grad_norm": 0.38671875, "learning_rate": 1.8495460560394677e-05, "loss": 0.029, "step": 35240 }, { "epoch": 25.824175824175825, "grad_norm": 0.3203125, "learning_rate": 1.8469329676052854e-05, "loss": 0.0354, "step": 35250 }, { "epoch": 25.831501831501832, "grad_norm": 0.62109375, "learning_rate": 1.8443214138656063e-05, "loss": 0.0386, "step": 35260 }, { "epoch": 25.83882783882784, "grad_norm": 0.34375, "learning_rate": 1.8417113960717138e-05, "loss": 0.0334, "step": 35270 }, { "epoch": 25.846153846153847, "grad_norm": 0.51171875, "learning_rate": 1.8391029154741508e-05, "loss": 0.0373, "step": 35280 }, { "epoch": 25.853479853479854, "grad_norm": 0.3515625, "learning_rate": 1.8364959733227283e-05, "loss": 0.0356, "step": 35290 }, { "epoch": 25.86080586080586, "grad_norm": 0.353515625, "learning_rate": 1.8338905708665167e-05, "loss": 0.0341, "step": 35300 }, { "epoch": 25.86813186813187, "grad_norm": 0.376953125, "learning_rate": 1.8312867093538484e-05, "loss": 0.0377, "step": 35310 }, { "epoch": 25.875457875457876, "grad_norm": 0.6171875, "learning_rate": 1.828684390032321e-05, "loss": 0.0326, "step": 35320 }, { "epoch": 25.882783882783883, "grad_norm": 0.423828125, "learning_rate": 1.8260836141487904e-05, "loss": 0.0306, "step": 35330 }, { "epoch": 25.89010989010989, "grad_norm": 0.296875, "learning_rate": 1.8234843829493722e-05, "loss": 0.0287, "step": 35340 }, { "epoch": 25.897435897435898, "grad_norm": 0.43359375, "learning_rate": 1.8208866976794456e-05, "loss": 0.0323, "step": 35350 }, { "epoch": 25.904761904761905, "grad_norm": 0.51171875, "learning_rate": 1.818290559583645e-05, "loss": 0.0304, "step": 35360 }, { "epoch": 25.912087912087912, "grad_norm": 0.265625, "learning_rate": 1.815695969905869e-05, "loss": 0.04, "step": 35370 }, { "epoch": 25.91941391941392, "grad_norm": 0.322265625, "learning_rate": 1.8131029298892657e-05, "loss": 0.0351, "step": 35380 }, { "epoch": 25.926739926739927, "grad_norm": 0.7421875, "learning_rate": 1.810511440776247e-05, "loss": 0.0367, "step": 35390 }, { "epoch": 25.934065934065934, "grad_norm": 0.55859375, "learning_rate": 1.807921503808482e-05, "loss": 0.0302, "step": 35400 }, { "epoch": 25.94139194139194, "grad_norm": 0.419921875, "learning_rate": 1.8053331202268927e-05, "loss": 0.0315, "step": 35410 }, { "epoch": 25.94871794871795, "grad_norm": 0.3515625, "learning_rate": 1.8027462912716604e-05, "loss": 0.0306, "step": 35420 }, { "epoch": 25.956043956043956, "grad_norm": 0.314453125, "learning_rate": 1.80016101818222e-05, "loss": 0.0299, "step": 35430 }, { "epoch": 25.963369963369964, "grad_norm": 0.3515625, "learning_rate": 1.7975773021972568e-05, "loss": 0.0312, "step": 35440 }, { "epoch": 25.97069597069597, "grad_norm": 0.3515625, "learning_rate": 1.7949951445547186e-05, "loss": 0.0361, "step": 35450 }, { "epoch": 25.978021978021978, "grad_norm": 0.375, "learning_rate": 1.7924145464917994e-05, "loss": 0.03, "step": 35460 }, { "epoch": 25.985347985347985, "grad_norm": 0.333984375, "learning_rate": 1.789835509244947e-05, "loss": 0.0308, "step": 35470 }, { "epoch": 25.992673992673993, "grad_norm": 0.3671875, "learning_rate": 1.7872580340498667e-05, "loss": 0.0312, "step": 35480 }, { "epoch": 26.0, "grad_norm": 0.361328125, "learning_rate": 1.7846821221415098e-05, "loss": 0.0296, "step": 35490 }, { "epoch": 26.007326007326007, "grad_norm": 0.337890625, "learning_rate": 1.782107774754079e-05, "loss": 0.033, "step": 35500 }, { "epoch": 26.014652014652015, "grad_norm": 0.34375, "learning_rate": 1.7795349931210275e-05, "loss": 0.0342, "step": 35510 }, { "epoch": 26.021978021978022, "grad_norm": 0.404296875, "learning_rate": 1.7769637784750633e-05, "loss": 0.0272, "step": 35520 }, { "epoch": 26.02930402930403, "grad_norm": 0.578125, "learning_rate": 1.7743941320481374e-05, "loss": 0.0315, "step": 35530 }, { "epoch": 26.036630036630036, "grad_norm": 0.384765625, "learning_rate": 1.7718260550714503e-05, "loss": 0.0327, "step": 35540 }, { "epoch": 26.043956043956044, "grad_norm": 0.50390625, "learning_rate": 1.7692595487754555e-05, "loss": 0.0301, "step": 35550 }, { "epoch": 26.05128205128205, "grad_norm": 0.66796875, "learning_rate": 1.7666946143898478e-05, "loss": 0.0317, "step": 35560 }, { "epoch": 26.05860805860806, "grad_norm": 0.75390625, "learning_rate": 1.7641312531435708e-05, "loss": 0.0333, "step": 35570 }, { "epoch": 26.065934065934066, "grad_norm": 0.373046875, "learning_rate": 1.761569466264818e-05, "loss": 0.0337, "step": 35580 }, { "epoch": 26.073260073260073, "grad_norm": 0.41796875, "learning_rate": 1.759009254981022e-05, "loss": 0.0322, "step": 35590 }, { "epoch": 26.08058608058608, "grad_norm": 0.494140625, "learning_rate": 1.756450620518867e-05, "loss": 0.0341, "step": 35600 }, { "epoch": 26.087912087912088, "grad_norm": 0.4296875, "learning_rate": 1.7538935641042776e-05, "loss": 0.0354, "step": 35610 }, { "epoch": 26.095238095238095, "grad_norm": 0.3203125, "learning_rate": 1.7513380869624237e-05, "loss": 0.0324, "step": 35620 }, { "epoch": 26.102564102564102, "grad_norm": 0.388671875, "learning_rate": 1.7487841903177175e-05, "loss": 0.0315, "step": 35630 }, { "epoch": 26.10989010989011, "grad_norm": 0.369140625, "learning_rate": 1.7462318753938134e-05, "loss": 0.037, "step": 35640 }, { "epoch": 26.117216117216117, "grad_norm": 0.43359375, "learning_rate": 1.7436811434136124e-05, "loss": 0.0322, "step": 35650 }, { "epoch": 26.124542124542124, "grad_norm": 0.435546875, "learning_rate": 1.7411319955992524e-05, "loss": 0.0288, "step": 35660 }, { "epoch": 26.13186813186813, "grad_norm": 0.447265625, "learning_rate": 1.7385844331721117e-05, "loss": 0.0348, "step": 35670 }, { "epoch": 26.13919413919414, "grad_norm": 0.333984375, "learning_rate": 1.7360384573528157e-05, "loss": 0.0327, "step": 35680 }, { "epoch": 26.146520146520146, "grad_norm": 0.4765625, "learning_rate": 1.7334940693612216e-05, "loss": 0.0331, "step": 35690 }, { "epoch": 26.153846153846153, "grad_norm": 0.37109375, "learning_rate": 1.7309512704164283e-05, "loss": 0.0368, "step": 35700 }, { "epoch": 26.16117216117216, "grad_norm": 0.373046875, "learning_rate": 1.728410061736777e-05, "loss": 0.0318, "step": 35710 }, { "epoch": 26.168498168498168, "grad_norm": 0.357421875, "learning_rate": 1.7258704445398413e-05, "loss": 0.0346, "step": 35720 }, { "epoch": 26.175824175824175, "grad_norm": 0.53515625, "learning_rate": 1.7233324200424386e-05, "loss": 0.0352, "step": 35730 }, { "epoch": 26.183150183150182, "grad_norm": 0.451171875, "learning_rate": 1.7207959894606176e-05, "loss": 0.0318, "step": 35740 }, { "epoch": 26.19047619047619, "grad_norm": 0.353515625, "learning_rate": 1.7182611540096654e-05, "loss": 0.0318, "step": 35750 }, { "epoch": 26.197802197802197, "grad_norm": 0.359375, "learning_rate": 1.715727914904106e-05, "loss": 0.0351, "step": 35760 }, { "epoch": 26.205128205128204, "grad_norm": 0.37890625, "learning_rate": 1.7131962733576937e-05, "loss": 0.0342, "step": 35770 }, { "epoch": 26.21245421245421, "grad_norm": 0.431640625, "learning_rate": 1.7106662305834262e-05, "loss": 0.0294, "step": 35780 }, { "epoch": 26.21978021978022, "grad_norm": 0.447265625, "learning_rate": 1.708137787793527e-05, "loss": 0.0307, "step": 35790 }, { "epoch": 26.227106227106226, "grad_norm": 0.515625, "learning_rate": 1.705610946199455e-05, "loss": 0.0312, "step": 35800 }, { "epoch": 26.234432234432234, "grad_norm": 0.50390625, "learning_rate": 1.703085707011906e-05, "loss": 0.0339, "step": 35810 }, { "epoch": 26.24175824175824, "grad_norm": 0.28515625, "learning_rate": 1.700562071440804e-05, "loss": 0.0311, "step": 35820 }, { "epoch": 26.249084249084248, "grad_norm": 0.50390625, "learning_rate": 1.6980400406953023e-05, "loss": 0.0374, "step": 35830 }, { "epoch": 26.256410256410255, "grad_norm": 0.88671875, "learning_rate": 1.695519615983794e-05, "loss": 0.0347, "step": 35840 }, { "epoch": 26.263736263736263, "grad_norm": 0.515625, "learning_rate": 1.693000798513892e-05, "loss": 0.0335, "step": 35850 }, { "epoch": 26.27106227106227, "grad_norm": 0.359375, "learning_rate": 1.6904835894924492e-05, "loss": 0.0315, "step": 35860 }, { "epoch": 26.278388278388277, "grad_norm": 0.416015625, "learning_rate": 1.6879679901255402e-05, "loss": 0.0305, "step": 35870 }, { "epoch": 26.285714285714285, "grad_norm": 0.419921875, "learning_rate": 1.6854540016184725e-05, "loss": 0.0303, "step": 35880 }, { "epoch": 26.293040293040292, "grad_norm": 0.353515625, "learning_rate": 1.6829416251757788e-05, "loss": 0.0309, "step": 35890 }, { "epoch": 26.3003663003663, "grad_norm": 0.35546875, "learning_rate": 1.6804308620012213e-05, "loss": 0.0315, "step": 35900 }, { "epoch": 26.307692307692307, "grad_norm": 0.35546875, "learning_rate": 1.6779217132977905e-05, "loss": 0.0298, "step": 35910 }, { "epoch": 26.315018315018314, "grad_norm": 0.5859375, "learning_rate": 1.6754141802677018e-05, "loss": 0.0337, "step": 35920 }, { "epoch": 26.32234432234432, "grad_norm": 0.87890625, "learning_rate": 1.672908264112394e-05, "loss": 0.0362, "step": 35930 }, { "epoch": 26.32967032967033, "grad_norm": 0.353515625, "learning_rate": 1.670403966032537e-05, "loss": 0.0344, "step": 35940 }, { "epoch": 26.336996336996336, "grad_norm": 0.33203125, "learning_rate": 1.6679012872280187e-05, "loss": 0.0317, "step": 35950 }, { "epoch": 26.344322344322343, "grad_norm": 0.376953125, "learning_rate": 1.665400228897959e-05, "loss": 0.0271, "step": 35960 }, { "epoch": 26.35164835164835, "grad_norm": 0.44140625, "learning_rate": 1.6629007922406942e-05, "loss": 0.0297, "step": 35970 }, { "epoch": 26.358974358974358, "grad_norm": 0.375, "learning_rate": 1.6604029784537855e-05, "loss": 0.0337, "step": 35980 }, { "epoch": 26.366300366300365, "grad_norm": 0.353515625, "learning_rate": 1.6579067887340204e-05, "loss": 0.0293, "step": 35990 }, { "epoch": 26.373626373626372, "grad_norm": 0.416015625, "learning_rate": 1.655412224277404e-05, "loss": 0.0343, "step": 36000 }, { "epoch": 26.38095238095238, "grad_norm": 0.34375, "learning_rate": 1.6529192862791635e-05, "loss": 0.0326, "step": 36010 }, { "epoch": 26.388278388278387, "grad_norm": 0.419921875, "learning_rate": 1.650427975933747e-05, "loss": 0.032, "step": 36020 }, { "epoch": 26.395604395604394, "grad_norm": 0.32421875, "learning_rate": 1.6479382944348225e-05, "loss": 0.0341, "step": 36030 }, { "epoch": 26.4029304029304, "grad_norm": 0.41796875, "learning_rate": 1.6454502429752803e-05, "loss": 0.0313, "step": 36040 }, { "epoch": 26.41025641025641, "grad_norm": 0.33203125, "learning_rate": 1.6429638227472263e-05, "loss": 0.0332, "step": 36050 }, { "epoch": 26.417582417582416, "grad_norm": 0.7265625, "learning_rate": 1.6404790349419846e-05, "loss": 0.0341, "step": 36060 }, { "epoch": 26.424908424908423, "grad_norm": 0.298828125, "learning_rate": 1.6379958807501016e-05, "loss": 0.032, "step": 36070 }, { "epoch": 26.43223443223443, "grad_norm": 0.546875, "learning_rate": 1.6355143613613347e-05, "loss": 0.032, "step": 36080 }, { "epoch": 26.439560439560438, "grad_norm": 0.43359375, "learning_rate": 1.633034477964664e-05, "loss": 0.0379, "step": 36090 }, { "epoch": 26.446886446886445, "grad_norm": 0.51953125, "learning_rate": 1.630556231748282e-05, "loss": 0.033, "step": 36100 }, { "epoch": 26.454212454212453, "grad_norm": 0.5703125, "learning_rate": 1.6280796238995964e-05, "loss": 0.0317, "step": 36110 }, { "epoch": 26.46153846153846, "grad_norm": 0.41015625, "learning_rate": 1.6256046556052333e-05, "loss": 0.0287, "step": 36120 }, { "epoch": 26.468864468864467, "grad_norm": 0.375, "learning_rate": 1.623131328051031e-05, "loss": 0.0362, "step": 36130 }, { "epoch": 26.476190476190474, "grad_norm": 0.515625, "learning_rate": 1.620659642422041e-05, "loss": 0.0324, "step": 36140 }, { "epoch": 26.483516483516482, "grad_norm": 0.375, "learning_rate": 1.6181895999025296e-05, "loss": 0.0326, "step": 36150 }, { "epoch": 26.49084249084249, "grad_norm": 0.4453125, "learning_rate": 1.615721201675973e-05, "loss": 0.0315, "step": 36160 }, { "epoch": 26.498168498168496, "grad_norm": 0.3203125, "learning_rate": 1.6132544489250654e-05, "loss": 0.0313, "step": 36170 }, { "epoch": 26.505494505494504, "grad_norm": 0.416015625, "learning_rate": 1.610789342831705e-05, "loss": 0.0368, "step": 36180 }, { "epoch": 26.51282051282051, "grad_norm": 0.328125, "learning_rate": 1.608325884577009e-05, "loss": 0.0332, "step": 36190 }, { "epoch": 26.520146520146522, "grad_norm": 0.439453125, "learning_rate": 1.6058640753413e-05, "loss": 0.0287, "step": 36200 }, { "epoch": 26.52747252747253, "grad_norm": 0.375, "learning_rate": 1.603403916304109e-05, "loss": 0.0334, "step": 36210 }, { "epoch": 26.534798534798536, "grad_norm": 0.51953125, "learning_rate": 1.600945408644182e-05, "loss": 0.0318, "step": 36220 }, { "epoch": 26.542124542124544, "grad_norm": 0.3984375, "learning_rate": 1.59848855353947e-05, "loss": 0.03, "step": 36230 }, { "epoch": 26.54945054945055, "grad_norm": 0.41796875, "learning_rate": 1.5960333521671303e-05, "loss": 0.0353, "step": 36240 }, { "epoch": 26.55677655677656, "grad_norm": 0.470703125, "learning_rate": 1.5935798057035338e-05, "loss": 0.0295, "step": 36250 }, { "epoch": 26.564102564102566, "grad_norm": 0.48828125, "learning_rate": 1.591127915324254e-05, "loss": 0.0345, "step": 36260 }, { "epoch": 26.571428571428573, "grad_norm": 0.59375, "learning_rate": 1.5886776822040718e-05, "loss": 0.0361, "step": 36270 }, { "epoch": 26.57875457875458, "grad_norm": 0.375, "learning_rate": 1.5862291075169747e-05, "loss": 0.0403, "step": 36280 }, { "epoch": 26.586080586080588, "grad_norm": 0.515625, "learning_rate": 1.5837821924361526e-05, "loss": 0.0314, "step": 36290 }, { "epoch": 26.593406593406595, "grad_norm": 0.4140625, "learning_rate": 1.5813369381340066e-05, "loss": 0.0277, "step": 36300 }, { "epoch": 26.600732600732602, "grad_norm": 0.388671875, "learning_rate": 1.5788933457821353e-05, "loss": 0.0309, "step": 36310 }, { "epoch": 26.60805860805861, "grad_norm": 0.306640625, "learning_rate": 1.5764514165513466e-05, "loss": 0.0309, "step": 36320 }, { "epoch": 26.615384615384617, "grad_norm": 0.486328125, "learning_rate": 1.574011151611648e-05, "loss": 0.0402, "step": 36330 }, { "epoch": 26.622710622710624, "grad_norm": 0.32421875, "learning_rate": 1.571572552132248e-05, "loss": 0.0335, "step": 36340 }, { "epoch": 26.63003663003663, "grad_norm": 0.43359375, "learning_rate": 1.569135619281564e-05, "loss": 0.0356, "step": 36350 }, { "epoch": 26.63736263736264, "grad_norm": 0.326171875, "learning_rate": 1.5667003542272074e-05, "loss": 0.0317, "step": 36360 }, { "epoch": 26.644688644688646, "grad_norm": 0.333984375, "learning_rate": 1.564266758135993e-05, "loss": 0.0353, "step": 36370 }, { "epoch": 26.652014652014653, "grad_norm": 0.515625, "learning_rate": 1.561834832173941e-05, "loss": 0.0332, "step": 36380 }, { "epoch": 26.65934065934066, "grad_norm": 0.3984375, "learning_rate": 1.5594045775062607e-05, "loss": 0.0325, "step": 36390 }, { "epoch": 26.666666666666668, "grad_norm": 0.337890625, "learning_rate": 1.556975995297372e-05, "loss": 0.0289, "step": 36400 }, { "epoch": 26.673992673992675, "grad_norm": 0.43359375, "learning_rate": 1.5545490867108843e-05, "loss": 0.0322, "step": 36410 }, { "epoch": 26.681318681318682, "grad_norm": 0.51953125, "learning_rate": 1.5521238529096126e-05, "loss": 0.0326, "step": 36420 }, { "epoch": 26.68864468864469, "grad_norm": 0.62109375, "learning_rate": 1.5497002950555643e-05, "loss": 0.0388, "step": 36430 }, { "epoch": 26.695970695970697, "grad_norm": 0.380859375, "learning_rate": 1.5472784143099445e-05, "loss": 0.0358, "step": 36440 }, { "epoch": 26.703296703296704, "grad_norm": 0.3671875, "learning_rate": 1.544858211833158e-05, "loss": 0.0334, "step": 36450 }, { "epoch": 26.71062271062271, "grad_norm": 0.333984375, "learning_rate": 1.5424396887848026e-05, "loss": 0.0331, "step": 36460 }, { "epoch": 26.71794871794872, "grad_norm": 0.38671875, "learning_rate": 1.5400228463236697e-05, "loss": 0.0327, "step": 36470 }, { "epoch": 26.725274725274726, "grad_norm": 0.61328125, "learning_rate": 1.5376076856077512e-05, "loss": 0.0336, "step": 36480 }, { "epoch": 26.732600732600734, "grad_norm": 0.38671875, "learning_rate": 1.535194207794227e-05, "loss": 0.0316, "step": 36490 }, { "epoch": 26.73992673992674, "grad_norm": 0.3203125, "learning_rate": 1.532782414039476e-05, "loss": 0.0316, "step": 36500 }, { "epoch": 26.747252747252748, "grad_norm": 0.328125, "learning_rate": 1.5303723054990683e-05, "loss": 0.034, "step": 36510 }, { "epoch": 26.754578754578755, "grad_norm": 0.451171875, "learning_rate": 1.527963883327761e-05, "loss": 0.0319, "step": 36520 }, { "epoch": 26.761904761904763, "grad_norm": 0.37890625, "learning_rate": 1.5255571486795131e-05, "loss": 0.0334, "step": 36530 }, { "epoch": 26.76923076923077, "grad_norm": 0.373046875, "learning_rate": 1.5231521027074668e-05, "loss": 0.0348, "step": 36540 }, { "epoch": 26.776556776556777, "grad_norm": 0.8515625, "learning_rate": 1.5207487465639616e-05, "loss": 0.0358, "step": 36550 }, { "epoch": 26.783882783882785, "grad_norm": 0.6484375, "learning_rate": 1.518347081400523e-05, "loss": 0.0308, "step": 36560 }, { "epoch": 26.791208791208792, "grad_norm": 0.30078125, "learning_rate": 1.5159471083678658e-05, "loss": 0.0329, "step": 36570 }, { "epoch": 26.7985347985348, "grad_norm": 0.390625, "learning_rate": 1.5135488286158983e-05, "loss": 0.0338, "step": 36580 }, { "epoch": 26.805860805860807, "grad_norm": 0.357421875, "learning_rate": 1.5111522432937153e-05, "loss": 0.036, "step": 36590 }, { "epoch": 26.813186813186814, "grad_norm": 0.361328125, "learning_rate": 1.5087573535495961e-05, "loss": 0.0319, "step": 36600 }, { "epoch": 26.82051282051282, "grad_norm": 0.32421875, "learning_rate": 1.5063641605310144e-05, "loss": 0.0316, "step": 36610 }, { "epoch": 26.82783882783883, "grad_norm": 0.54296875, "learning_rate": 1.5039726653846247e-05, "loss": 0.0403, "step": 36620 }, { "epoch": 26.835164835164836, "grad_norm": 0.4765625, "learning_rate": 1.5015828692562746e-05, "loss": 0.0305, "step": 36630 }, { "epoch": 26.842490842490843, "grad_norm": 0.37890625, "learning_rate": 1.4991947732909908e-05, "loss": 0.0331, "step": 36640 }, { "epoch": 26.84981684981685, "grad_norm": 0.66796875, "learning_rate": 1.4968083786329897e-05, "loss": 0.0358, "step": 36650 }, { "epoch": 26.857142857142858, "grad_norm": 0.765625, "learning_rate": 1.4944236864256703e-05, "loss": 0.0353, "step": 36660 }, { "epoch": 26.864468864468865, "grad_norm": 0.466796875, "learning_rate": 1.4920406978116161e-05, "loss": 0.0324, "step": 36670 }, { "epoch": 26.871794871794872, "grad_norm": 0.361328125, "learning_rate": 1.4896594139325975e-05, "loss": 0.0365, "step": 36680 }, { "epoch": 26.87912087912088, "grad_norm": 0.453125, "learning_rate": 1.4872798359295652e-05, "loss": 0.0306, "step": 36690 }, { "epoch": 26.886446886446887, "grad_norm": 0.32421875, "learning_rate": 1.4849019649426505e-05, "loss": 0.0304, "step": 36700 }, { "epoch": 26.893772893772894, "grad_norm": 0.357421875, "learning_rate": 1.4825258021111734e-05, "loss": 0.0338, "step": 36710 }, { "epoch": 26.9010989010989, "grad_norm": 0.287109375, "learning_rate": 1.4801513485736274e-05, "loss": 0.0295, "step": 36720 }, { "epoch": 26.90842490842491, "grad_norm": 0.50390625, "learning_rate": 1.4777786054676945e-05, "loss": 0.0357, "step": 36730 }, { "epoch": 26.915750915750916, "grad_norm": 0.35546875, "learning_rate": 1.4754075739302333e-05, "loss": 0.0313, "step": 36740 }, { "epoch": 26.923076923076923, "grad_norm": 0.484375, "learning_rate": 1.4730382550972799e-05, "loss": 0.0405, "step": 36750 }, { "epoch": 26.93040293040293, "grad_norm": 0.357421875, "learning_rate": 1.4706706501040577e-05, "loss": 0.0301, "step": 36760 }, { "epoch": 26.937728937728938, "grad_norm": 0.3515625, "learning_rate": 1.4683047600849579e-05, "loss": 0.031, "step": 36770 }, { "epoch": 26.945054945054945, "grad_norm": 0.46484375, "learning_rate": 1.4659405861735606e-05, "loss": 0.0343, "step": 36780 }, { "epoch": 26.952380952380953, "grad_norm": 0.44140625, "learning_rate": 1.4635781295026174e-05, "loss": 0.0328, "step": 36790 }, { "epoch": 26.95970695970696, "grad_norm": 0.42578125, "learning_rate": 1.4612173912040578e-05, "loss": 0.0345, "step": 36800 }, { "epoch": 26.967032967032967, "grad_norm": 0.5234375, "learning_rate": 1.4588583724089911e-05, "loss": 0.0375, "step": 36810 }, { "epoch": 26.974358974358974, "grad_norm": 0.474609375, "learning_rate": 1.4565010742477e-05, "loss": 0.0344, "step": 36820 }, { "epoch": 26.98168498168498, "grad_norm": 0.380859375, "learning_rate": 1.4541454978496417e-05, "loss": 0.0311, "step": 36830 }, { "epoch": 26.98901098901099, "grad_norm": 0.390625, "learning_rate": 1.4517916443434529e-05, "loss": 0.0347, "step": 36840 }, { "epoch": 26.996336996336996, "grad_norm": 0.8515625, "learning_rate": 1.4494395148569387e-05, "loss": 0.0422, "step": 36850 }, { "epoch": 27.003663003663004, "grad_norm": 0.828125, "learning_rate": 1.4470891105170856e-05, "loss": 0.0327, "step": 36860 }, { "epoch": 27.01098901098901, "grad_norm": 0.52734375, "learning_rate": 1.4447404324500463e-05, "loss": 0.0286, "step": 36870 }, { "epoch": 27.01831501831502, "grad_norm": 0.359375, "learning_rate": 1.4423934817811499e-05, "loss": 0.0317, "step": 36880 }, { "epoch": 27.025641025641026, "grad_norm": 0.6640625, "learning_rate": 1.4400482596349004e-05, "loss": 0.0353, "step": 36890 }, { "epoch": 27.032967032967033, "grad_norm": 0.90625, "learning_rate": 1.4377047671349652e-05, "loss": 0.0313, "step": 36900 }, { "epoch": 27.04029304029304, "grad_norm": 0.412109375, "learning_rate": 1.4353630054041937e-05, "loss": 0.0336, "step": 36910 }, { "epoch": 27.047619047619047, "grad_norm": 0.390625, "learning_rate": 1.4330229755645983e-05, "loss": 0.0347, "step": 36920 }, { "epoch": 27.054945054945055, "grad_norm": 0.431640625, "learning_rate": 1.4306846787373624e-05, "loss": 0.0349, "step": 36930 }, { "epoch": 27.062271062271062, "grad_norm": 0.294921875, "learning_rate": 1.4283481160428453e-05, "loss": 0.0264, "step": 36940 }, { "epoch": 27.06959706959707, "grad_norm": 0.318359375, "learning_rate": 1.4260132886005658e-05, "loss": 0.0315, "step": 36950 }, { "epoch": 27.076923076923077, "grad_norm": 0.515625, "learning_rate": 1.4236801975292208e-05, "loss": 0.0327, "step": 36960 }, { "epoch": 27.084249084249084, "grad_norm": 0.66015625, "learning_rate": 1.4213488439466683e-05, "loss": 0.0328, "step": 36970 }, { "epoch": 27.09157509157509, "grad_norm": 0.318359375, "learning_rate": 1.4190192289699356e-05, "loss": 0.0315, "step": 36980 }, { "epoch": 27.0989010989011, "grad_norm": 0.384765625, "learning_rate": 1.4166913537152205e-05, "loss": 0.0331, "step": 36990 }, { "epoch": 27.106227106227106, "grad_norm": 0.4453125, "learning_rate": 1.414365219297882e-05, "loss": 0.0306, "step": 37000 }, { "epoch": 27.113553113553113, "grad_norm": 0.6328125, "learning_rate": 1.4120408268324476e-05, "loss": 0.0343, "step": 37010 }, { "epoch": 27.12087912087912, "grad_norm": 0.314453125, "learning_rate": 1.4097181774326128e-05, "loss": 0.0347, "step": 37020 }, { "epoch": 27.128205128205128, "grad_norm": 0.310546875, "learning_rate": 1.4073972722112302e-05, "loss": 0.0364, "step": 37030 }, { "epoch": 27.135531135531135, "grad_norm": 0.33984375, "learning_rate": 1.4050781122803261e-05, "loss": 0.0318, "step": 37040 }, { "epoch": 27.142857142857142, "grad_norm": 0.49609375, "learning_rate": 1.4027606987510842e-05, "loss": 0.0306, "step": 37050 }, { "epoch": 27.15018315018315, "grad_norm": 0.4765625, "learning_rate": 1.400445032733852e-05, "loss": 0.0329, "step": 37060 }, { "epoch": 27.157509157509157, "grad_norm": 0.59765625, "learning_rate": 1.3981311153381437e-05, "loss": 0.033, "step": 37070 }, { "epoch": 27.164835164835164, "grad_norm": 0.8125, "learning_rate": 1.3958189476726305e-05, "loss": 0.036, "step": 37080 }, { "epoch": 27.17216117216117, "grad_norm": 0.353515625, "learning_rate": 1.3935085308451509e-05, "loss": 0.0333, "step": 37090 }, { "epoch": 27.17948717948718, "grad_norm": 0.427734375, "learning_rate": 1.3911998659626999e-05, "loss": 0.0326, "step": 37100 }, { "epoch": 27.186813186813186, "grad_norm": 0.443359375, "learning_rate": 1.388892954131432e-05, "loss": 0.0336, "step": 37110 }, { "epoch": 27.194139194139193, "grad_norm": 0.369140625, "learning_rate": 1.386587796456669e-05, "loss": 0.0309, "step": 37120 }, { "epoch": 27.2014652014652, "grad_norm": 0.296875, "learning_rate": 1.3842843940428848e-05, "loss": 0.0323, "step": 37130 }, { "epoch": 27.208791208791208, "grad_norm": 0.62109375, "learning_rate": 1.3819827479937142e-05, "loss": 0.0386, "step": 37140 }, { "epoch": 27.216117216117215, "grad_norm": 0.455078125, "learning_rate": 1.3796828594119563e-05, "loss": 0.0329, "step": 37150 }, { "epoch": 27.223443223443223, "grad_norm": 0.310546875, "learning_rate": 1.3773847293995553e-05, "loss": 0.0286, "step": 37160 }, { "epoch": 27.23076923076923, "grad_norm": 0.33203125, "learning_rate": 1.3750883590576278e-05, "loss": 0.0336, "step": 37170 }, { "epoch": 27.238095238095237, "grad_norm": 0.4375, "learning_rate": 1.3727937494864352e-05, "loss": 0.0345, "step": 37180 }, { "epoch": 27.245421245421245, "grad_norm": 0.4453125, "learning_rate": 1.3705009017854035e-05, "loss": 0.0331, "step": 37190 }, { "epoch": 27.252747252747252, "grad_norm": 0.51171875, "learning_rate": 1.368209817053111e-05, "loss": 0.0315, "step": 37200 }, { "epoch": 27.26007326007326, "grad_norm": 0.40234375, "learning_rate": 1.3659204963872883e-05, "loss": 0.0351, "step": 37210 }, { "epoch": 27.267399267399266, "grad_norm": 0.37890625, "learning_rate": 1.3636329408848283e-05, "loss": 0.0327, "step": 37220 }, { "epoch": 27.274725274725274, "grad_norm": 0.482421875, "learning_rate": 1.3613471516417726e-05, "loss": 0.0334, "step": 37230 }, { "epoch": 27.28205128205128, "grad_norm": 0.23046875, "learning_rate": 1.3590631297533147e-05, "loss": 0.0306, "step": 37240 }, { "epoch": 27.28937728937729, "grad_norm": 0.328125, "learning_rate": 1.3567808763138092e-05, "loss": 0.0344, "step": 37250 }, { "epoch": 27.296703296703296, "grad_norm": 0.4296875, "learning_rate": 1.3545003924167557e-05, "loss": 0.031, "step": 37260 }, { "epoch": 27.304029304029303, "grad_norm": 0.328125, "learning_rate": 1.3522216791548101e-05, "loss": 0.0291, "step": 37270 }, { "epoch": 27.31135531135531, "grad_norm": 0.380859375, "learning_rate": 1.3499447376197776e-05, "loss": 0.0347, "step": 37280 }, { "epoch": 27.318681318681318, "grad_norm": 0.3515625, "learning_rate": 1.3476695689026156e-05, "loss": 0.0293, "step": 37290 }, { "epoch": 27.326007326007325, "grad_norm": 0.306640625, "learning_rate": 1.3453961740934332e-05, "loss": 0.0318, "step": 37300 }, { "epoch": 27.333333333333332, "grad_norm": 0.447265625, "learning_rate": 1.3431245542814863e-05, "loss": 0.0333, "step": 37310 }, { "epoch": 27.34065934065934, "grad_norm": 0.310546875, "learning_rate": 1.3408547105551856e-05, "loss": 0.0295, "step": 37320 }, { "epoch": 27.347985347985347, "grad_norm": 0.49609375, "learning_rate": 1.3385866440020862e-05, "loss": 0.0301, "step": 37330 }, { "epoch": 27.355311355311354, "grad_norm": 0.375, "learning_rate": 1.3363203557088915e-05, "loss": 0.0277, "step": 37340 }, { "epoch": 27.36263736263736, "grad_norm": 0.6015625, "learning_rate": 1.3340558467614583e-05, "loss": 0.0384, "step": 37350 }, { "epoch": 27.36996336996337, "grad_norm": 0.42578125, "learning_rate": 1.331793118244784e-05, "loss": 0.0328, "step": 37360 }, { "epoch": 27.377289377289376, "grad_norm": 0.5390625, "learning_rate": 1.3295321712430161e-05, "loss": 0.032, "step": 37370 }, { "epoch": 27.384615384615383, "grad_norm": 0.384765625, "learning_rate": 1.3272730068394512e-05, "loss": 0.0383, "step": 37380 }, { "epoch": 27.39194139194139, "grad_norm": 0.412109375, "learning_rate": 1.3250156261165256e-05, "loss": 0.0291, "step": 37390 }, { "epoch": 27.399267399267398, "grad_norm": 0.46484375, "learning_rate": 1.322760030155829e-05, "loss": 0.0354, "step": 37400 }, { "epoch": 27.406593406593405, "grad_norm": 0.50390625, "learning_rate": 1.320506220038086e-05, "loss": 0.0299, "step": 37410 }, { "epoch": 27.413919413919412, "grad_norm": 0.427734375, "learning_rate": 1.3182541968431749e-05, "loss": 0.031, "step": 37420 }, { "epoch": 27.42124542124542, "grad_norm": 0.427734375, "learning_rate": 1.3160039616501127e-05, "loss": 0.0352, "step": 37430 }, { "epoch": 27.428571428571427, "grad_norm": 0.37109375, "learning_rate": 1.313755515537059e-05, "loss": 0.0337, "step": 37440 }, { "epoch": 27.435897435897434, "grad_norm": 0.427734375, "learning_rate": 1.3115088595813219e-05, "loss": 0.0319, "step": 37450 }, { "epoch": 27.44322344322344, "grad_norm": 0.400390625, "learning_rate": 1.3092639948593457e-05, "loss": 0.0298, "step": 37460 }, { "epoch": 27.45054945054945, "grad_norm": 0.6015625, "learning_rate": 1.3070209224467175e-05, "loss": 0.0388, "step": 37470 }, { "epoch": 27.457875457875456, "grad_norm": 0.44921875, "learning_rate": 1.3047796434181702e-05, "loss": 0.0327, "step": 37480 }, { "epoch": 27.465201465201464, "grad_norm": 0.439453125, "learning_rate": 1.3025401588475722e-05, "loss": 0.036, "step": 37490 }, { "epoch": 27.47252747252747, "grad_norm": 0.50390625, "learning_rate": 1.3003024698079325e-05, "loss": 0.0334, "step": 37500 }, { "epoch": 27.479853479853478, "grad_norm": 0.341796875, "learning_rate": 1.2980665773714052e-05, "loss": 0.0303, "step": 37510 }, { "epoch": 27.487179487179485, "grad_norm": 0.470703125, "learning_rate": 1.2958324826092754e-05, "loss": 0.0307, "step": 37520 }, { "epoch": 27.494505494505496, "grad_norm": 0.46875, "learning_rate": 1.2936001865919762e-05, "loss": 0.0318, "step": 37530 }, { "epoch": 27.501831501831504, "grad_norm": 0.482421875, "learning_rate": 1.2913696903890685e-05, "loss": 0.0334, "step": 37540 }, { "epoch": 27.50915750915751, "grad_norm": 0.388671875, "learning_rate": 1.2891409950692602e-05, "loss": 0.0348, "step": 37550 }, { "epoch": 27.516483516483518, "grad_norm": 0.37109375, "learning_rate": 1.2869141017003902e-05, "loss": 0.0322, "step": 37560 }, { "epoch": 27.523809523809526, "grad_norm": 0.34375, "learning_rate": 1.2846890113494358e-05, "loss": 0.0356, "step": 37570 }, { "epoch": 27.531135531135533, "grad_norm": 0.4140625, "learning_rate": 1.2824657250825136e-05, "loss": 0.0282, "step": 37580 }, { "epoch": 27.53846153846154, "grad_norm": 0.330078125, "learning_rate": 1.2802442439648707e-05, "loss": 0.0341, "step": 37590 }, { "epoch": 27.545787545787547, "grad_norm": 0.578125, "learning_rate": 1.2780245690608904e-05, "loss": 0.0343, "step": 37600 }, { "epoch": 27.553113553113555, "grad_norm": 0.41015625, "learning_rate": 1.2758067014340961e-05, "loss": 0.0338, "step": 37610 }, { "epoch": 27.560439560439562, "grad_norm": 0.4375, "learning_rate": 1.2735906421471358e-05, "loss": 0.0308, "step": 37620 }, { "epoch": 27.56776556776557, "grad_norm": 0.376953125, "learning_rate": 1.2713763922618006e-05, "loss": 0.0289, "step": 37630 }, { "epoch": 27.575091575091577, "grad_norm": 0.703125, "learning_rate": 1.2691639528390085e-05, "loss": 0.0373, "step": 37640 }, { "epoch": 27.582417582417584, "grad_norm": 0.34375, "learning_rate": 1.2669533249388116e-05, "loss": 0.0316, "step": 37650 }, { "epoch": 27.58974358974359, "grad_norm": 0.6171875, "learning_rate": 1.2647445096203942e-05, "loss": 0.0326, "step": 37660 }, { "epoch": 27.5970695970696, "grad_norm": 0.51953125, "learning_rate": 1.2625375079420709e-05, "loss": 0.0332, "step": 37670 }, { "epoch": 27.604395604395606, "grad_norm": 0.408203125, "learning_rate": 1.2603323209612919e-05, "loss": 0.0334, "step": 37680 }, { "epoch": 27.611721611721613, "grad_norm": 0.494140625, "learning_rate": 1.2581289497346321e-05, "loss": 0.031, "step": 37690 }, { "epoch": 27.61904761904762, "grad_norm": 0.77734375, "learning_rate": 1.2559273953177988e-05, "loss": 0.0369, "step": 37700 }, { "epoch": 27.626373626373628, "grad_norm": 0.6171875, "learning_rate": 1.2537276587656311e-05, "loss": 0.0324, "step": 37710 }, { "epoch": 27.633699633699635, "grad_norm": 0.45703125, "learning_rate": 1.251529741132094e-05, "loss": 0.0349, "step": 37720 }, { "epoch": 27.641025641025642, "grad_norm": 0.41796875, "learning_rate": 1.2493336434702794e-05, "loss": 0.0313, "step": 37730 }, { "epoch": 27.64835164835165, "grad_norm": 0.36328125, "learning_rate": 1.2471393668324138e-05, "loss": 0.0305, "step": 37740 }, { "epoch": 27.655677655677657, "grad_norm": 0.3984375, "learning_rate": 1.2449469122698437e-05, "loss": 0.0314, "step": 37750 }, { "epoch": 27.663003663003664, "grad_norm": 0.318359375, "learning_rate": 1.2427562808330494e-05, "loss": 0.0329, "step": 37760 }, { "epoch": 27.67032967032967, "grad_norm": 0.283203125, "learning_rate": 1.2405674735716322e-05, "loss": 0.0336, "step": 37770 }, { "epoch": 27.67765567765568, "grad_norm": 0.4375, "learning_rate": 1.2383804915343216e-05, "loss": 0.0293, "step": 37780 }, { "epoch": 27.684981684981686, "grad_norm": 0.431640625, "learning_rate": 1.2361953357689726e-05, "loss": 0.0323, "step": 37790 }, { "epoch": 27.692307692307693, "grad_norm": 0.330078125, "learning_rate": 1.234012007322564e-05, "loss": 0.0289, "step": 37800 }, { "epoch": 27.6996336996337, "grad_norm": 0.423828125, "learning_rate": 1.2318305072412024e-05, "loss": 0.0298, "step": 37810 }, { "epoch": 27.706959706959708, "grad_norm": 0.412109375, "learning_rate": 1.2296508365701149e-05, "loss": 0.0331, "step": 37820 }, { "epoch": 27.714285714285715, "grad_norm": 0.30078125, "learning_rate": 1.2274729963536514e-05, "loss": 0.0365, "step": 37830 }, { "epoch": 27.721611721611723, "grad_norm": 0.65234375, "learning_rate": 1.2252969876352894e-05, "loss": 0.0353, "step": 37840 }, { "epoch": 27.72893772893773, "grad_norm": 0.69140625, "learning_rate": 1.2231228114576243e-05, "loss": 0.0327, "step": 37850 }, { "epoch": 27.736263736263737, "grad_norm": 0.310546875, "learning_rate": 1.220950468862377e-05, "loss": 0.0323, "step": 37860 }, { "epoch": 27.743589743589745, "grad_norm": 0.443359375, "learning_rate": 1.2187799608903867e-05, "loss": 0.0321, "step": 37870 }, { "epoch": 27.750915750915752, "grad_norm": 0.361328125, "learning_rate": 1.2166112885816143e-05, "loss": 0.027, "step": 37880 }, { "epoch": 27.75824175824176, "grad_norm": 0.375, "learning_rate": 1.2144444529751441e-05, "loss": 0.0333, "step": 37890 }, { "epoch": 27.765567765567766, "grad_norm": 0.75390625, "learning_rate": 1.2122794551091767e-05, "loss": 0.0397, "step": 37900 }, { "epoch": 27.772893772893774, "grad_norm": 0.5625, "learning_rate": 1.2101162960210342e-05, "loss": 0.0289, "step": 37910 }, { "epoch": 27.78021978021978, "grad_norm": 0.44921875, "learning_rate": 1.2079549767471562e-05, "loss": 0.0341, "step": 37920 }, { "epoch": 27.78754578754579, "grad_norm": 0.33984375, "learning_rate": 1.2057954983231013e-05, "loss": 0.0367, "step": 37930 }, { "epoch": 27.794871794871796, "grad_norm": 0.3515625, "learning_rate": 1.2036378617835485e-05, "loss": 0.0338, "step": 37940 }, { "epoch": 27.802197802197803, "grad_norm": 0.359375, "learning_rate": 1.2014820681622909e-05, "loss": 0.0331, "step": 37950 }, { "epoch": 27.80952380952381, "grad_norm": 0.57421875, "learning_rate": 1.1993281184922389e-05, "loss": 0.0363, "step": 37960 }, { "epoch": 27.816849816849818, "grad_norm": 0.4296875, "learning_rate": 1.1971760138054232e-05, "loss": 0.0348, "step": 37970 }, { "epoch": 27.824175824175825, "grad_norm": 0.76171875, "learning_rate": 1.1950257551329848e-05, "loss": 0.0348, "step": 37980 }, { "epoch": 27.831501831501832, "grad_norm": 0.5625, "learning_rate": 1.1928773435051859e-05, "loss": 0.0314, "step": 37990 }, { "epoch": 27.83882783882784, "grad_norm": 0.470703125, "learning_rate": 1.1907307799513997e-05, "loss": 0.0319, "step": 38000 }, { "epoch": 27.846153846153847, "grad_norm": 0.435546875, "learning_rate": 1.188586065500114e-05, "loss": 0.0308, "step": 38010 }, { "epoch": 27.853479853479854, "grad_norm": 0.365234375, "learning_rate": 1.1864432011789345e-05, "loss": 0.0299, "step": 38020 }, { "epoch": 27.86080586080586, "grad_norm": 0.498046875, "learning_rate": 1.1843021880145765e-05, "loss": 0.0303, "step": 38030 }, { "epoch": 27.86813186813187, "grad_norm": 0.39453125, "learning_rate": 1.1821630270328692e-05, "loss": 0.0323, "step": 38040 }, { "epoch": 27.875457875457876, "grad_norm": 0.365234375, "learning_rate": 1.1800257192587552e-05, "loss": 0.0315, "step": 38050 }, { "epoch": 27.882783882783883, "grad_norm": 0.357421875, "learning_rate": 1.1778902657162876e-05, "loss": 0.0378, "step": 38060 }, { "epoch": 27.89010989010989, "grad_norm": 0.373046875, "learning_rate": 1.1757566674286349e-05, "loss": 0.035, "step": 38070 }, { "epoch": 27.897435897435898, "grad_norm": 0.61328125, "learning_rate": 1.173624925418071e-05, "loss": 0.0386, "step": 38080 }, { "epoch": 27.904761904761905, "grad_norm": 0.5078125, "learning_rate": 1.1714950407059867e-05, "loss": 0.0316, "step": 38090 }, { "epoch": 27.912087912087912, "grad_norm": 0.36328125, "learning_rate": 1.1693670143128777e-05, "loss": 0.0353, "step": 38100 }, { "epoch": 27.91941391941392, "grad_norm": 0.34765625, "learning_rate": 1.1672408472583495e-05, "loss": 0.0373, "step": 38110 }, { "epoch": 27.926739926739927, "grad_norm": 0.357421875, "learning_rate": 1.1651165405611228e-05, "loss": 0.0357, "step": 38120 }, { "epoch": 27.934065934065934, "grad_norm": 0.412109375, "learning_rate": 1.1629940952390196e-05, "loss": 0.035, "step": 38130 }, { "epoch": 27.94139194139194, "grad_norm": 0.796875, "learning_rate": 1.160873512308973e-05, "loss": 0.0326, "step": 38140 }, { "epoch": 27.94871794871795, "grad_norm": 0.359375, "learning_rate": 1.1587547927870254e-05, "loss": 0.0389, "step": 38150 }, { "epoch": 27.956043956043956, "grad_norm": 0.310546875, "learning_rate": 1.1566379376883247e-05, "loss": 0.0328, "step": 38160 }, { "epoch": 27.963369963369964, "grad_norm": 0.44140625, "learning_rate": 1.1545229480271247e-05, "loss": 0.0295, "step": 38170 }, { "epoch": 27.97069597069597, "grad_norm": 0.365234375, "learning_rate": 1.152409824816787e-05, "loss": 0.0334, "step": 38180 }, { "epoch": 27.978021978021978, "grad_norm": 0.478515625, "learning_rate": 1.1502985690697764e-05, "loss": 0.0313, "step": 38190 }, { "epoch": 27.985347985347985, "grad_norm": 0.5390625, "learning_rate": 1.1481891817976676e-05, "loss": 0.0367, "step": 38200 }, { "epoch": 27.992673992673993, "grad_norm": 0.7109375, "learning_rate": 1.1460816640111342e-05, "loss": 0.0439, "step": 38210 }, { "epoch": 28.0, "grad_norm": 0.357421875, "learning_rate": 1.1439760167199607e-05, "loss": 0.0334, "step": 38220 }, { "epoch": 28.007326007326007, "grad_norm": 0.703125, "learning_rate": 1.14187224093303e-05, "loss": 0.0366, "step": 38230 }, { "epoch": 28.014652014652015, "grad_norm": 0.39453125, "learning_rate": 1.139770337658329e-05, "loss": 0.0317, "step": 38240 }, { "epoch": 28.021978021978022, "grad_norm": 0.37890625, "learning_rate": 1.1376703079029508e-05, "loss": 0.0329, "step": 38250 }, { "epoch": 28.02930402930403, "grad_norm": 0.39453125, "learning_rate": 1.135572152673087e-05, "loss": 0.035, "step": 38260 }, { "epoch": 28.036630036630036, "grad_norm": 0.70703125, "learning_rate": 1.133475872974032e-05, "loss": 0.0309, "step": 38270 }, { "epoch": 28.043956043956044, "grad_norm": 0.51171875, "learning_rate": 1.1313814698101844e-05, "loss": 0.036, "step": 38280 }, { "epoch": 28.05128205128205, "grad_norm": 0.419921875, "learning_rate": 1.1292889441850398e-05, "loss": 0.0342, "step": 38290 }, { "epoch": 28.05860805860806, "grad_norm": 0.44921875, "learning_rate": 1.1271982971011973e-05, "loss": 0.033, "step": 38300 }, { "epoch": 28.065934065934066, "grad_norm": 0.40234375, "learning_rate": 1.1251095295603514e-05, "loss": 0.0362, "step": 38310 }, { "epoch": 28.073260073260073, "grad_norm": 0.388671875, "learning_rate": 1.1230226425633023e-05, "loss": 0.0298, "step": 38320 }, { "epoch": 28.08058608058608, "grad_norm": 0.51171875, "learning_rate": 1.1209376371099457e-05, "loss": 0.0303, "step": 38330 }, { "epoch": 28.087912087912088, "grad_norm": 0.251953125, "learning_rate": 1.1188545141992739e-05, "loss": 0.0321, "step": 38340 }, { "epoch": 28.095238095238095, "grad_norm": 0.56640625, "learning_rate": 1.1167732748293826e-05, "loss": 0.0359, "step": 38350 }, { "epoch": 28.102564102564102, "grad_norm": 0.3984375, "learning_rate": 1.1146939199974594e-05, "loss": 0.0296, "step": 38360 }, { "epoch": 28.10989010989011, "grad_norm": 0.61328125, "learning_rate": 1.1126164506997917e-05, "loss": 0.0344, "step": 38370 }, { "epoch": 28.117216117216117, "grad_norm": 0.51171875, "learning_rate": 1.1105408679317646e-05, "loss": 0.0352, "step": 38380 }, { "epoch": 28.124542124542124, "grad_norm": 0.373046875, "learning_rate": 1.1084671726878564e-05, "loss": 0.0376, "step": 38390 }, { "epoch": 28.13186813186813, "grad_norm": 0.337890625, "learning_rate": 1.1063953659616438e-05, "loss": 0.034, "step": 38400 }, { "epoch": 28.13919413919414, "grad_norm": 0.5, "learning_rate": 1.1043254487457967e-05, "loss": 0.0307, "step": 38410 }, { "epoch": 28.146520146520146, "grad_norm": 0.37109375, "learning_rate": 1.1022574220320809e-05, "loss": 0.0329, "step": 38420 }, { "epoch": 28.153846153846153, "grad_norm": 0.326171875, "learning_rate": 1.1001912868113547e-05, "loss": 0.0347, "step": 38430 }, { "epoch": 28.16117216117216, "grad_norm": 0.34765625, "learning_rate": 1.0981270440735716e-05, "loss": 0.0341, "step": 38440 }, { "epoch": 28.168498168498168, "grad_norm": 0.640625, "learning_rate": 1.096064694807779e-05, "loss": 0.0341, "step": 38450 }, { "epoch": 28.175824175824175, "grad_norm": 0.7265625, "learning_rate": 1.0940042400021154e-05, "loss": 0.0326, "step": 38460 }, { "epoch": 28.183150183150182, "grad_norm": 0.326171875, "learning_rate": 1.0919456806438105e-05, "loss": 0.0295, "step": 38470 }, { "epoch": 28.19047619047619, "grad_norm": 0.439453125, "learning_rate": 1.089889017719192e-05, "loss": 0.0343, "step": 38480 }, { "epoch": 28.197802197802197, "grad_norm": 0.40234375, "learning_rate": 1.0878342522136713e-05, "loss": 0.032, "step": 38490 }, { "epoch": 28.205128205128204, "grad_norm": 0.6484375, "learning_rate": 1.0857813851117535e-05, "loss": 0.0386, "step": 38500 }, { "epoch": 28.21245421245421, "grad_norm": 0.400390625, "learning_rate": 1.083730417397037e-05, "loss": 0.0343, "step": 38510 }, { "epoch": 28.21978021978022, "grad_norm": 0.328125, "learning_rate": 1.0816813500522055e-05, "loss": 0.0363, "step": 38520 }, { "epoch": 28.227106227106226, "grad_norm": 0.546875, "learning_rate": 1.0796341840590366e-05, "loss": 0.0296, "step": 38530 }, { "epoch": 28.234432234432234, "grad_norm": 0.52734375, "learning_rate": 1.0775889203983934e-05, "loss": 0.0301, "step": 38540 }, { "epoch": 28.24175824175824, "grad_norm": 0.4140625, "learning_rate": 1.0755455600502288e-05, "loss": 0.0364, "step": 38550 }, { "epoch": 28.249084249084248, "grad_norm": 0.5703125, "learning_rate": 1.073504103993584e-05, "loss": 0.0379, "step": 38560 }, { "epoch": 28.256410256410255, "grad_norm": 0.421875, "learning_rate": 1.0714645532065864e-05, "loss": 0.0334, "step": 38570 }, { "epoch": 28.263736263736263, "grad_norm": 0.3046875, "learning_rate": 1.0694269086664537e-05, "loss": 0.0322, "step": 38580 }, { "epoch": 28.27106227106227, "grad_norm": 0.33984375, "learning_rate": 1.067391171349488e-05, "loss": 0.036, "step": 38590 }, { "epoch": 28.278388278388277, "grad_norm": 0.337890625, "learning_rate": 1.0653573422310755e-05, "loss": 0.0327, "step": 38600 }, { "epoch": 28.285714285714285, "grad_norm": 0.78515625, "learning_rate": 1.0633254222856933e-05, "loss": 0.0356, "step": 38610 }, { "epoch": 28.293040293040292, "grad_norm": 0.359375, "learning_rate": 1.0612954124868985e-05, "loss": 0.0318, "step": 38620 }, { "epoch": 28.3003663003663, "grad_norm": 0.447265625, "learning_rate": 1.059267313807338e-05, "loss": 0.0383, "step": 38630 }, { "epoch": 28.307692307692307, "grad_norm": 0.69921875, "learning_rate": 1.0572411272187387e-05, "loss": 0.0371, "step": 38640 }, { "epoch": 28.315018315018314, "grad_norm": 0.37109375, "learning_rate": 1.0552168536919126e-05, "loss": 0.0327, "step": 38650 }, { "epoch": 28.32234432234432, "grad_norm": 0.37109375, "learning_rate": 1.0531944941967579e-05, "loss": 0.0285, "step": 38660 }, { "epoch": 28.32967032967033, "grad_norm": 0.369140625, "learning_rate": 1.0511740497022514e-05, "loss": 0.0329, "step": 38670 }, { "epoch": 28.336996336996336, "grad_norm": 0.451171875, "learning_rate": 1.0491555211764549e-05, "loss": 0.0317, "step": 38680 }, { "epoch": 28.344322344322343, "grad_norm": 0.314453125, "learning_rate": 1.0471389095865115e-05, "loss": 0.0306, "step": 38690 }, { "epoch": 28.35164835164835, "grad_norm": 0.3359375, "learning_rate": 1.0451242158986447e-05, "loss": 0.0312, "step": 38700 }, { "epoch": 28.358974358974358, "grad_norm": 0.345703125, "learning_rate": 1.043111441078163e-05, "loss": 0.0321, "step": 38710 }, { "epoch": 28.366300366300365, "grad_norm": 0.322265625, "learning_rate": 1.0411005860894516e-05, "loss": 0.0294, "step": 38720 }, { "epoch": 28.373626373626372, "grad_norm": 0.546875, "learning_rate": 1.039091651895976e-05, "loss": 0.0359, "step": 38730 }, { "epoch": 28.38095238095238, "grad_norm": 0.75390625, "learning_rate": 1.0370846394602845e-05, "loss": 0.0413, "step": 38740 }, { "epoch": 28.388278388278387, "grad_norm": 0.63671875, "learning_rate": 1.0350795497440008e-05, "loss": 0.0326, "step": 38750 }, { "epoch": 28.395604395604394, "grad_norm": 0.4375, "learning_rate": 1.0330763837078308e-05, "loss": 0.0343, "step": 38760 }, { "epoch": 28.4029304029304, "grad_norm": 1.0703125, "learning_rate": 1.0310751423115562e-05, "loss": 0.0376, "step": 38770 }, { "epoch": 28.41025641025641, "grad_norm": 0.6015625, "learning_rate": 1.0290758265140362e-05, "loss": 0.0341, "step": 38780 }, { "epoch": 28.417582417582416, "grad_norm": 0.396484375, "learning_rate": 1.0270784372732115e-05, "loss": 0.0405, "step": 38790 }, { "epoch": 28.424908424908423, "grad_norm": 0.373046875, "learning_rate": 1.0250829755460952e-05, "loss": 0.0311, "step": 38800 }, { "epoch": 28.43223443223443, "grad_norm": 0.6171875, "learning_rate": 1.0230894422887778e-05, "loss": 0.0337, "step": 38810 }, { "epoch": 28.439560439560438, "grad_norm": 0.275390625, "learning_rate": 1.0210978384564279e-05, "loss": 0.0339, "step": 38820 }, { "epoch": 28.446886446886445, "grad_norm": 0.365234375, "learning_rate": 1.019108165003286e-05, "loss": 0.0306, "step": 38830 }, { "epoch": 28.454212454212453, "grad_norm": 0.396484375, "learning_rate": 1.0171204228826728e-05, "loss": 0.0346, "step": 38840 }, { "epoch": 28.46153846153846, "grad_norm": 0.6015625, "learning_rate": 1.0151346130469789e-05, "loss": 0.031, "step": 38850 }, { "epoch": 28.468864468864467, "grad_norm": 0.5546875, "learning_rate": 1.0131507364476709e-05, "loss": 0.032, "step": 38860 }, { "epoch": 28.476190476190474, "grad_norm": 0.353515625, "learning_rate": 1.0111687940352901e-05, "loss": 0.0385, "step": 38870 }, { "epoch": 28.483516483516482, "grad_norm": 0.443359375, "learning_rate": 1.0091887867594493e-05, "loss": 0.0344, "step": 38880 }, { "epoch": 28.49084249084249, "grad_norm": 0.3125, "learning_rate": 1.0072107155688361e-05, "loss": 0.0311, "step": 38890 }, { "epoch": 28.498168498168496, "grad_norm": 0.30859375, "learning_rate": 1.0052345814112089e-05, "loss": 0.0292, "step": 38900 }, { "epoch": 28.505494505494504, "grad_norm": 0.3203125, "learning_rate": 1.0032603852333967e-05, "loss": 0.031, "step": 38910 }, { "epoch": 28.51282051282051, "grad_norm": 0.404296875, "learning_rate": 1.0012881279813053e-05, "loss": 0.0337, "step": 38920 }, { "epoch": 28.520146520146522, "grad_norm": 0.455078125, "learning_rate": 9.993178105999039e-06, "loss": 0.0318, "step": 38930 }, { "epoch": 28.52747252747253, "grad_norm": 0.322265625, "learning_rate": 9.973494340332382e-06, "loss": 0.0344, "step": 38940 }, { "epoch": 28.534798534798536, "grad_norm": 0.40234375, "learning_rate": 9.953829992244226e-06, "loss": 0.0317, "step": 38950 }, { "epoch": 28.542124542124544, "grad_norm": 0.384765625, "learning_rate": 9.934185071156376e-06, "loss": 0.0315, "step": 38960 }, { "epoch": 28.54945054945055, "grad_norm": 0.41015625, "learning_rate": 9.914559586481397e-06, "loss": 0.0304, "step": 38970 }, { "epoch": 28.55677655677656, "grad_norm": 0.384765625, "learning_rate": 9.894953547622468e-06, "loss": 0.0322, "step": 38980 }, { "epoch": 28.564102564102566, "grad_norm": 0.421875, "learning_rate": 9.875366963973517e-06, "loss": 0.0298, "step": 38990 }, { "epoch": 28.571428571428573, "grad_norm": 0.369140625, "learning_rate": 9.855799844919104e-06, "loss": 0.0286, "step": 39000 }, { "epoch": 28.57875457875458, "grad_norm": 0.71875, "learning_rate": 9.836252199834462e-06, "loss": 0.0389, "step": 39010 }, { "epoch": 28.586080586080588, "grad_norm": 0.37109375, "learning_rate": 9.816724038085542e-06, "loss": 0.0334, "step": 39020 }, { "epoch": 28.593406593406595, "grad_norm": 0.4140625, "learning_rate": 9.797215369028905e-06, "loss": 0.0306, "step": 39030 }, { "epoch": 28.600732600732602, "grad_norm": 0.7109375, "learning_rate": 9.777726202011787e-06, "loss": 0.0334, "step": 39040 }, { "epoch": 28.60805860805861, "grad_norm": 0.37109375, "learning_rate": 9.758256546372124e-06, "loss": 0.0363, "step": 39050 }, { "epoch": 28.615384615384617, "grad_norm": 0.462890625, "learning_rate": 9.738806411438414e-06, "loss": 0.0331, "step": 39060 }, { "epoch": 28.622710622710624, "grad_norm": 0.390625, "learning_rate": 9.7193758065299e-06, "loss": 0.0313, "step": 39070 }, { "epoch": 28.63003663003663, "grad_norm": 0.3984375, "learning_rate": 9.699964740956403e-06, "loss": 0.0296, "step": 39080 }, { "epoch": 28.63736263736264, "grad_norm": 0.67578125, "learning_rate": 9.680573224018391e-06, "loss": 0.0358, "step": 39090 }, { "epoch": 28.644688644688646, "grad_norm": 0.482421875, "learning_rate": 9.661201265007005e-06, "loss": 0.03, "step": 39100 }, { "epoch": 28.652014652014653, "grad_norm": 0.65234375, "learning_rate": 9.64184887320395e-06, "loss": 0.039, "step": 39110 }, { "epoch": 28.65934065934066, "grad_norm": 0.359375, "learning_rate": 9.622516057881627e-06, "loss": 0.0305, "step": 39120 }, { "epoch": 28.666666666666668, "grad_norm": 0.6171875, "learning_rate": 9.60320282830301e-06, "loss": 0.0322, "step": 39130 }, { "epoch": 28.673992673992675, "grad_norm": 0.328125, "learning_rate": 9.58390919372168e-06, "loss": 0.0367, "step": 39140 }, { "epoch": 28.681318681318682, "grad_norm": 0.388671875, "learning_rate": 9.564635163381882e-06, "loss": 0.0329, "step": 39150 }, { "epoch": 28.68864468864469, "grad_norm": 0.53125, "learning_rate": 9.545380746518421e-06, "loss": 0.032, "step": 39160 }, { "epoch": 28.695970695970697, "grad_norm": 0.41015625, "learning_rate": 9.526145952356712e-06, "loss": 0.0309, "step": 39170 }, { "epoch": 28.703296703296704, "grad_norm": 0.40234375, "learning_rate": 9.506930790112805e-06, "loss": 0.0279, "step": 39180 }, { "epoch": 28.71062271062271, "grad_norm": 0.5390625, "learning_rate": 9.487735268993268e-06, "loss": 0.0332, "step": 39190 }, { "epoch": 28.71794871794872, "grad_norm": 0.4453125, "learning_rate": 9.468559398195348e-06, "loss": 0.0328, "step": 39200 }, { "epoch": 28.725274725274726, "grad_norm": 0.388671875, "learning_rate": 9.449403186906808e-06, "loss": 0.0337, "step": 39210 }, { "epoch": 28.732600732600734, "grad_norm": 0.388671875, "learning_rate": 9.430266644306032e-06, "loss": 0.0361, "step": 39220 }, { "epoch": 28.73992673992674, "grad_norm": 0.41796875, "learning_rate": 9.411149779561965e-06, "loss": 0.0301, "step": 39230 }, { "epoch": 28.747252747252748, "grad_norm": 0.333984375, "learning_rate": 9.392052601834105e-06, "loss": 0.033, "step": 39240 }, { "epoch": 28.754578754578755, "grad_norm": 0.51171875, "learning_rate": 9.372975120272561e-06, "loss": 0.0343, "step": 39250 }, { "epoch": 28.761904761904763, "grad_norm": 0.419921875, "learning_rate": 9.353917344017976e-06, "loss": 0.0316, "step": 39260 }, { "epoch": 28.76923076923077, "grad_norm": 0.3515625, "learning_rate": 9.33487928220153e-06, "loss": 0.0285, "step": 39270 }, { "epoch": 28.776556776556777, "grad_norm": 0.361328125, "learning_rate": 9.315860943945021e-06, "loss": 0.0296, "step": 39280 }, { "epoch": 28.783882783882785, "grad_norm": 0.6953125, "learning_rate": 9.296862338360724e-06, "loss": 0.0369, "step": 39290 }, { "epoch": 28.791208791208792, "grad_norm": 0.369140625, "learning_rate": 9.277883474551523e-06, "loss": 0.0318, "step": 39300 }, { "epoch": 28.7985347985348, "grad_norm": 0.5625, "learning_rate": 9.258924361610818e-06, "loss": 0.0348, "step": 39310 }, { "epoch": 28.805860805860807, "grad_norm": 0.37890625, "learning_rate": 9.2399850086225e-06, "loss": 0.0363, "step": 39320 }, { "epoch": 28.813186813186814, "grad_norm": 0.408203125, "learning_rate": 9.221065424661072e-06, "loss": 0.0319, "step": 39330 }, { "epoch": 28.82051282051282, "grad_norm": 0.640625, "learning_rate": 9.202165618791504e-06, "loss": 0.0412, "step": 39340 }, { "epoch": 28.82783882783883, "grad_norm": 0.58203125, "learning_rate": 9.183285600069339e-06, "loss": 0.0309, "step": 39350 }, { "epoch": 28.835164835164836, "grad_norm": 0.5390625, "learning_rate": 9.164425377540596e-06, "loss": 0.0304, "step": 39360 }, { "epoch": 28.842490842490843, "grad_norm": 0.349609375, "learning_rate": 9.145584960241822e-06, "loss": 0.0297, "step": 39370 }, { "epoch": 28.84981684981685, "grad_norm": 0.3671875, "learning_rate": 9.126764357200094e-06, "loss": 0.0317, "step": 39380 }, { "epoch": 28.857142857142858, "grad_norm": 0.31640625, "learning_rate": 9.107963577432977e-06, "loss": 0.0298, "step": 39390 }, { "epoch": 28.864468864468865, "grad_norm": 0.419921875, "learning_rate": 9.089182629948527e-06, "loss": 0.0301, "step": 39400 }, { "epoch": 28.871794871794872, "grad_norm": 0.39453125, "learning_rate": 9.070421523745335e-06, "loss": 0.0298, "step": 39410 }, { "epoch": 28.87912087912088, "grad_norm": 0.357421875, "learning_rate": 9.051680267812433e-06, "loss": 0.0296, "step": 39420 }, { "epoch": 28.886446886446887, "grad_norm": 0.76171875, "learning_rate": 9.032958871129418e-06, "loss": 0.0317, "step": 39430 }, { "epoch": 28.893772893772894, "grad_norm": 0.62109375, "learning_rate": 9.014257342666271e-06, "loss": 0.0335, "step": 39440 }, { "epoch": 28.9010989010989, "grad_norm": 0.38671875, "learning_rate": 8.99557569138354e-06, "loss": 0.0313, "step": 39450 }, { "epoch": 28.90842490842491, "grad_norm": 0.326171875, "learning_rate": 8.97691392623222e-06, "loss": 0.033, "step": 39460 }, { "epoch": 28.915750915750916, "grad_norm": 0.76953125, "learning_rate": 8.958272056153748e-06, "loss": 0.0335, "step": 39470 }, { "epoch": 28.923076923076923, "grad_norm": 0.330078125, "learning_rate": 8.939650090080084e-06, "loss": 0.0334, "step": 39480 }, { "epoch": 28.93040293040293, "grad_norm": 0.4140625, "learning_rate": 8.92104803693361e-06, "loss": 0.0365, "step": 39490 }, { "epoch": 28.937728937728938, "grad_norm": 0.2177734375, "learning_rate": 8.902465905627176e-06, "loss": 0.0296, "step": 39500 }, { "epoch": 28.945054945054945, "grad_norm": 0.294921875, "learning_rate": 8.883903705064103e-06, "loss": 0.0354, "step": 39510 }, { "epoch": 28.952380952380953, "grad_norm": 0.392578125, "learning_rate": 8.865361444138136e-06, "loss": 0.0365, "step": 39520 }, { "epoch": 28.95970695970696, "grad_norm": 0.40625, "learning_rate": 8.8468391317335e-06, "loss": 0.032, "step": 39530 }, { "epoch": 28.967032967032967, "grad_norm": 0.42578125, "learning_rate": 8.828336776724828e-06, "loss": 0.0333, "step": 39540 }, { "epoch": 28.974358974358974, "grad_norm": 0.388671875, "learning_rate": 8.809854387977198e-06, "loss": 0.0321, "step": 39550 }, { "epoch": 28.98168498168498, "grad_norm": 0.4375, "learning_rate": 8.79139197434616e-06, "loss": 0.0299, "step": 39560 }, { "epoch": 28.98901098901099, "grad_norm": 0.55078125, "learning_rate": 8.77294954467762e-06, "loss": 0.0377, "step": 39570 }, { "epoch": 28.996336996336996, "grad_norm": 0.3671875, "learning_rate": 8.754527107807985e-06, "loss": 0.0331, "step": 39580 }, { "epoch": 29.003663003663004, "grad_norm": 0.392578125, "learning_rate": 8.736124672564037e-06, "loss": 0.0361, "step": 39590 }, { "epoch": 29.01098901098901, "grad_norm": 0.34375, "learning_rate": 8.71774224776297e-06, "loss": 0.0307, "step": 39600 }, { "epoch": 29.01831501831502, "grad_norm": 0.71875, "learning_rate": 8.699379842212438e-06, "loss": 0.0346, "step": 39610 }, { "epoch": 29.025641025641026, "grad_norm": 0.380859375, "learning_rate": 8.681037464710452e-06, "loss": 0.0326, "step": 39620 }, { "epoch": 29.032967032967033, "grad_norm": 0.51171875, "learning_rate": 8.662715124045443e-06, "loss": 0.0312, "step": 39630 }, { "epoch": 29.04029304029304, "grad_norm": 0.3828125, "learning_rate": 8.644412828996264e-06, "loss": 0.0312, "step": 39640 }, { "epoch": 29.047619047619047, "grad_norm": 0.474609375, "learning_rate": 8.626130588332128e-06, "loss": 0.0331, "step": 39650 }, { "epoch": 29.054945054945055, "grad_norm": 0.392578125, "learning_rate": 8.60786841081267e-06, "loss": 0.0361, "step": 39660 }, { "epoch": 29.062271062271062, "grad_norm": 0.4296875, "learning_rate": 8.589626305187893e-06, "loss": 0.0346, "step": 39670 }, { "epoch": 29.06959706959707, "grad_norm": 0.296875, "learning_rate": 8.571404280198175e-06, "loss": 0.0351, "step": 39680 }, { "epoch": 29.076923076923077, "grad_norm": 0.3828125, "learning_rate": 8.55320234457432e-06, "loss": 0.0336, "step": 39690 }, { "epoch": 29.084249084249084, "grad_norm": 0.375, "learning_rate": 8.535020507037432e-06, "loss": 0.0323, "step": 39700 }, { "epoch": 29.09157509157509, "grad_norm": 0.515625, "learning_rate": 8.51685877629905e-06, "loss": 0.0348, "step": 39710 }, { "epoch": 29.0989010989011, "grad_norm": 0.3984375, "learning_rate": 8.498717161061055e-06, "loss": 0.0348, "step": 39720 }, { "epoch": 29.106227106227106, "grad_norm": 0.392578125, "learning_rate": 8.48059567001567e-06, "loss": 0.0338, "step": 39730 }, { "epoch": 29.113553113553113, "grad_norm": 0.296875, "learning_rate": 8.46249431184552e-06, "loss": 0.0323, "step": 39740 }, { "epoch": 29.12087912087912, "grad_norm": 0.4296875, "learning_rate": 8.444413095223544e-06, "loss": 0.0343, "step": 39750 }, { "epoch": 29.128205128205128, "grad_norm": 0.384765625, "learning_rate": 8.426352028813062e-06, "loss": 0.0322, "step": 39760 }, { "epoch": 29.135531135531135, "grad_norm": 0.306640625, "learning_rate": 8.40831112126771e-06, "loss": 0.0319, "step": 39770 }, { "epoch": 29.142857142857142, "grad_norm": 0.427734375, "learning_rate": 8.390290381231473e-06, "loss": 0.032, "step": 39780 }, { "epoch": 29.15018315018315, "grad_norm": 0.4140625, "learning_rate": 8.3722898173387e-06, "loss": 0.0353, "step": 39790 }, { "epoch": 29.157509157509157, "grad_norm": 0.54296875, "learning_rate": 8.354309438214037e-06, "loss": 0.0319, "step": 39800 }, { "epoch": 29.164835164835164, "grad_norm": 0.5234375, "learning_rate": 8.336349252472479e-06, "loss": 0.0379, "step": 39810 }, { "epoch": 29.17216117216117, "grad_norm": 0.302734375, "learning_rate": 8.31840926871934e-06, "loss": 0.0312, "step": 39820 }, { "epoch": 29.17948717948718, "grad_norm": 0.328125, "learning_rate": 8.300489495550239e-06, "loss": 0.0314, "step": 39830 }, { "epoch": 29.186813186813186, "grad_norm": 0.609375, "learning_rate": 8.282589941551152e-06, "loss": 0.0352, "step": 39840 }, { "epoch": 29.194139194139193, "grad_norm": 0.90625, "learning_rate": 8.264710615298329e-06, "loss": 0.0391, "step": 39850 }, { "epoch": 29.2014652014652, "grad_norm": 0.9765625, "learning_rate": 8.246851525358335e-06, "loss": 0.0408, "step": 39860 }, { "epoch": 29.208791208791208, "grad_norm": 0.435546875, "learning_rate": 8.229012680288064e-06, "loss": 0.0331, "step": 39870 }, { "epoch": 29.216117216117215, "grad_norm": 0.376953125, "learning_rate": 8.211194088634677e-06, "loss": 0.0331, "step": 39880 }, { "epoch": 29.223443223443223, "grad_norm": 0.29296875, "learning_rate": 8.193395758935659e-06, "loss": 0.0302, "step": 39890 }, { "epoch": 29.23076923076923, "grad_norm": 0.337890625, "learning_rate": 8.175617699718768e-06, "loss": 0.0342, "step": 39900 }, { "epoch": 29.238095238095237, "grad_norm": 0.375, "learning_rate": 8.157859919502043e-06, "loss": 0.0382, "step": 39910 }, { "epoch": 29.245421245421245, "grad_norm": 0.703125, "learning_rate": 8.140122426793841e-06, "loss": 0.0379, "step": 39920 }, { "epoch": 29.252747252747252, "grad_norm": 0.470703125, "learning_rate": 8.122405230092765e-06, "loss": 0.031, "step": 39930 }, { "epoch": 29.26007326007326, "grad_norm": 0.421875, "learning_rate": 8.104708337887706e-06, "loss": 0.0288, "step": 39940 }, { "epoch": 29.267399267399266, "grad_norm": 0.271484375, "learning_rate": 8.087031758657826e-06, "loss": 0.0333, "step": 39950 }, { "epoch": 29.274725274725274, "grad_norm": 0.341796875, "learning_rate": 8.069375500872543e-06, "loss": 0.0312, "step": 39960 }, { "epoch": 29.28205128205128, "grad_norm": 0.349609375, "learning_rate": 8.051739572991573e-06, "loss": 0.0295, "step": 39970 }, { "epoch": 29.28937728937729, "grad_norm": 0.359375, "learning_rate": 8.034123983464847e-06, "loss": 0.0329, "step": 39980 }, { "epoch": 29.296703296703296, "grad_norm": 0.322265625, "learning_rate": 8.016528740732583e-06, "loss": 0.0361, "step": 39990 }, { "epoch": 29.304029304029303, "grad_norm": 0.4375, "learning_rate": 7.99895385322524e-06, "loss": 0.0317, "step": 40000 }, { "epoch": 29.31135531135531, "grad_norm": 0.365234375, "learning_rate": 7.981399329363507e-06, "loss": 0.0284, "step": 40010 }, { "epoch": 29.318681318681318, "grad_norm": 0.431640625, "learning_rate": 7.963865177558354e-06, "loss": 0.032, "step": 40020 }, { "epoch": 29.326007326007325, "grad_norm": 0.357421875, "learning_rate": 7.946351406210962e-06, "loss": 0.0339, "step": 40030 }, { "epoch": 29.333333333333332, "grad_norm": 0.443359375, "learning_rate": 7.928858023712733e-06, "loss": 0.0316, "step": 40040 }, { "epoch": 29.34065934065934, "grad_norm": 0.416015625, "learning_rate": 7.911385038445352e-06, "loss": 0.0324, "step": 40050 }, { "epoch": 29.347985347985347, "grad_norm": 0.53125, "learning_rate": 7.893932458780688e-06, "loss": 0.0351, "step": 40060 }, { "epoch": 29.355311355311354, "grad_norm": 0.376953125, "learning_rate": 7.876500293080843e-06, "loss": 0.033, "step": 40070 }, { "epoch": 29.36263736263736, "grad_norm": 0.38671875, "learning_rate": 7.859088549698133e-06, "loss": 0.0328, "step": 40080 }, { "epoch": 29.36996336996337, "grad_norm": 0.41015625, "learning_rate": 7.84169723697509e-06, "loss": 0.032, "step": 40090 }, { "epoch": 29.377289377289376, "grad_norm": 0.50390625, "learning_rate": 7.82432636324449e-06, "loss": 0.0308, "step": 40100 }, { "epoch": 29.384615384615383, "grad_norm": 0.412109375, "learning_rate": 7.806975936829258e-06, "loss": 0.0299, "step": 40110 }, { "epoch": 29.39194139194139, "grad_norm": 0.41015625, "learning_rate": 7.789645966042574e-06, "loss": 0.0306, "step": 40120 }, { "epoch": 29.399267399267398, "grad_norm": 0.3515625, "learning_rate": 7.772336459187793e-06, "loss": 0.0289, "step": 40130 }, { "epoch": 29.406593406593405, "grad_norm": 0.67578125, "learning_rate": 7.75504742455845e-06, "loss": 0.0353, "step": 40140 }, { "epoch": 29.413919413919412, "grad_norm": 0.326171875, "learning_rate": 7.73777887043831e-06, "loss": 0.03, "step": 40150 }, { "epoch": 29.42124542124542, "grad_norm": 0.380859375, "learning_rate": 7.720530805101291e-06, "loss": 0.0346, "step": 40160 }, { "epoch": 29.428571428571427, "grad_norm": 0.73828125, "learning_rate": 7.703303236811509e-06, "loss": 0.0357, "step": 40170 }, { "epoch": 29.435897435897434, "grad_norm": 0.47265625, "learning_rate": 7.686096173823265e-06, "loss": 0.029, "step": 40180 }, { "epoch": 29.44322344322344, "grad_norm": 0.34765625, "learning_rate": 7.668909624381015e-06, "loss": 0.0308, "step": 40190 }, { "epoch": 29.45054945054945, "grad_norm": 0.47265625, "learning_rate": 7.651743596719423e-06, "loss": 0.0326, "step": 40200 }, { "epoch": 29.457875457875456, "grad_norm": 0.37109375, "learning_rate": 7.634598099063264e-06, "loss": 0.0325, "step": 40210 }, { "epoch": 29.465201465201464, "grad_norm": 0.703125, "learning_rate": 7.617473139627523e-06, "loss": 0.036, "step": 40220 }, { "epoch": 29.47252747252747, "grad_norm": 0.4296875, "learning_rate": 7.600368726617339e-06, "loss": 0.0308, "step": 40230 }, { "epoch": 29.479853479853478, "grad_norm": 0.37109375, "learning_rate": 7.583284868227976e-06, "loss": 0.0323, "step": 40240 }, { "epoch": 29.487179487179485, "grad_norm": 0.451171875, "learning_rate": 7.566221572644892e-06, "loss": 0.0348, "step": 40250 }, { "epoch": 29.494505494505496, "grad_norm": 0.3359375, "learning_rate": 7.549178848043661e-06, "loss": 0.0316, "step": 40260 }, { "epoch": 29.501831501831504, "grad_norm": 0.462890625, "learning_rate": 7.532156702590005e-06, "loss": 0.0302, "step": 40270 }, { "epoch": 29.50915750915751, "grad_norm": 0.404296875, "learning_rate": 7.515155144439813e-06, "loss": 0.0325, "step": 40280 }, { "epoch": 29.516483516483518, "grad_norm": 0.3671875, "learning_rate": 7.4981741817390785e-06, "loss": 0.0315, "step": 40290 }, { "epoch": 29.523809523809526, "grad_norm": 0.4765625, "learning_rate": 7.481213822623924e-06, "loss": 0.0353, "step": 40300 }, { "epoch": 29.531135531135533, "grad_norm": 0.921875, "learning_rate": 7.464274075220632e-06, "loss": 0.036, "step": 40310 }, { "epoch": 29.53846153846154, "grad_norm": 0.388671875, "learning_rate": 7.447354947645595e-06, "loss": 0.0333, "step": 40320 }, { "epoch": 29.545787545787547, "grad_norm": 0.455078125, "learning_rate": 7.430456448005307e-06, "loss": 0.0312, "step": 40330 }, { "epoch": 29.553113553113555, "grad_norm": 0.359375, "learning_rate": 7.413578584396393e-06, "loss": 0.0383, "step": 40340 }, { "epoch": 29.560439560439562, "grad_norm": 0.375, "learning_rate": 7.396721364905604e-06, "loss": 0.0349, "step": 40350 }, { "epoch": 29.56776556776557, "grad_norm": 0.453125, "learning_rate": 7.379884797609786e-06, "loss": 0.0322, "step": 40360 }, { "epoch": 29.575091575091577, "grad_norm": 0.353515625, "learning_rate": 7.363068890575872e-06, "loss": 0.0333, "step": 40370 }, { "epoch": 29.582417582417584, "grad_norm": 0.32421875, "learning_rate": 7.346273651860938e-06, "loss": 0.0311, "step": 40380 }, { "epoch": 29.58974358974359, "grad_norm": 0.703125, "learning_rate": 7.3294990895121266e-06, "loss": 0.0325, "step": 40390 }, { "epoch": 29.5970695970696, "grad_norm": 0.40625, "learning_rate": 7.3127452115666685e-06, "loss": 0.0312, "step": 40400 }, { "epoch": 29.604395604395606, "grad_norm": 0.326171875, "learning_rate": 7.296012026051912e-06, "loss": 0.0351, "step": 40410 }, { "epoch": 29.611721611721613, "grad_norm": 0.42578125, "learning_rate": 7.2792995409852665e-06, "loss": 0.0371, "step": 40420 }, { "epoch": 29.61904761904762, "grad_norm": 0.34375, "learning_rate": 7.262607764374239e-06, "loss": 0.0353, "step": 40430 }, { "epoch": 29.626373626373628, "grad_norm": 0.64453125, "learning_rate": 7.245936704216408e-06, "loss": 0.036, "step": 40440 }, { "epoch": 29.633699633699635, "grad_norm": 0.38671875, "learning_rate": 7.229286368499416e-06, "loss": 0.0322, "step": 40450 }, { "epoch": 29.641025641025642, "grad_norm": 0.470703125, "learning_rate": 7.212656765200998e-06, "loss": 0.0326, "step": 40460 }, { "epoch": 29.64835164835165, "grad_norm": 0.458984375, "learning_rate": 7.196047902288915e-06, "loss": 0.0305, "step": 40470 }, { "epoch": 29.655677655677657, "grad_norm": 0.45703125, "learning_rate": 7.179459787721051e-06, "loss": 0.0362, "step": 40480 }, { "epoch": 29.663003663003664, "grad_norm": 0.361328125, "learning_rate": 7.162892429445305e-06, "loss": 0.0313, "step": 40490 }, { "epoch": 29.67032967032967, "grad_norm": 0.462890625, "learning_rate": 7.1463458353996285e-06, "loss": 0.0323, "step": 40500 }, { "epoch": 29.67765567765568, "grad_norm": 0.326171875, "learning_rate": 7.129820013512056e-06, "loss": 0.033, "step": 40510 }, { "epoch": 29.684981684981686, "grad_norm": 0.451171875, "learning_rate": 7.113314971700648e-06, "loss": 0.0325, "step": 40520 }, { "epoch": 29.692307692307693, "grad_norm": 0.36328125, "learning_rate": 7.096830717873492e-06, "loss": 0.0333, "step": 40530 }, { "epoch": 29.6996336996337, "grad_norm": 0.435546875, "learning_rate": 7.080367259928763e-06, "loss": 0.0317, "step": 40540 }, { "epoch": 29.706959706959708, "grad_norm": 0.3515625, "learning_rate": 7.063924605754618e-06, "loss": 0.0317, "step": 40550 }, { "epoch": 29.714285714285715, "grad_norm": 0.5703125, "learning_rate": 7.047502763229299e-06, "loss": 0.0318, "step": 40560 }, { "epoch": 29.721611721611723, "grad_norm": 0.5859375, "learning_rate": 7.03110174022103e-06, "loss": 0.0312, "step": 40570 }, { "epoch": 29.72893772893773, "grad_norm": 0.6015625, "learning_rate": 7.0147215445880875e-06, "loss": 0.0352, "step": 40580 }, { "epoch": 29.736263736263737, "grad_norm": 0.302734375, "learning_rate": 6.998362184178761e-06, "loss": 0.037, "step": 40590 }, { "epoch": 29.743589743589745, "grad_norm": 0.46875, "learning_rate": 6.982023666831339e-06, "loss": 0.0372, "step": 40600 }, { "epoch": 29.750915750915752, "grad_norm": 0.5078125, "learning_rate": 6.965706000374168e-06, "loss": 0.0296, "step": 40610 }, { "epoch": 29.75824175824176, "grad_norm": 0.376953125, "learning_rate": 6.949409192625563e-06, "loss": 0.0345, "step": 40620 }, { "epoch": 29.765567765567766, "grad_norm": 0.416015625, "learning_rate": 6.93313325139385e-06, "loss": 0.0306, "step": 40630 }, { "epoch": 29.772893772893774, "grad_norm": 0.423828125, "learning_rate": 6.916878184477392e-06, "loss": 0.0285, "step": 40640 }, { "epoch": 29.78021978021978, "grad_norm": 0.369140625, "learning_rate": 6.900643999664497e-06, "loss": 0.0317, "step": 40650 }, { "epoch": 29.78754578754579, "grad_norm": 0.36328125, "learning_rate": 6.884430704733523e-06, "loss": 0.0313, "step": 40660 }, { "epoch": 29.794871794871796, "grad_norm": 0.34765625, "learning_rate": 6.868238307452776e-06, "loss": 0.0313, "step": 40670 }, { "epoch": 29.802197802197803, "grad_norm": 0.443359375, "learning_rate": 6.852066815580552e-06, "loss": 0.0337, "step": 40680 }, { "epoch": 29.80952380952381, "grad_norm": 0.53515625, "learning_rate": 6.8359162368651765e-06, "loss": 0.0317, "step": 40690 }, { "epoch": 29.816849816849818, "grad_norm": 0.291015625, "learning_rate": 6.819786579044899e-06, "loss": 0.0319, "step": 40700 }, { "epoch": 29.824175824175825, "grad_norm": 0.341796875, "learning_rate": 6.8036778498479725e-06, "loss": 0.0321, "step": 40710 }, { "epoch": 29.831501831501832, "grad_norm": 0.41015625, "learning_rate": 6.787590056992617e-06, "loss": 0.0375, "step": 40720 }, { "epoch": 29.83882783882784, "grad_norm": 0.89453125, "learning_rate": 6.7715232081870086e-06, "loss": 0.0349, "step": 40730 }, { "epoch": 29.846153846153847, "grad_norm": 0.443359375, "learning_rate": 6.755477311129324e-06, "loss": 0.0368, "step": 40740 }, { "epoch": 29.853479853479854, "grad_norm": 0.73046875, "learning_rate": 6.739452373507676e-06, "loss": 0.0372, "step": 40750 }, { "epoch": 29.86080586080586, "grad_norm": 0.5390625, "learning_rate": 6.72344840300012e-06, "loss": 0.0363, "step": 40760 }, { "epoch": 29.86813186813187, "grad_norm": 0.328125, "learning_rate": 6.7074654072747005e-06, "loss": 0.0286, "step": 40770 }, { "epoch": 29.875457875457876, "grad_norm": 0.54296875, "learning_rate": 6.691503393989383e-06, "loss": 0.0351, "step": 40780 }, { "epoch": 29.882783882783883, "grad_norm": 0.455078125, "learning_rate": 6.67556237079211e-06, "loss": 0.0375, "step": 40790 }, { "epoch": 29.89010989010989, "grad_norm": 0.75390625, "learning_rate": 6.659642345320738e-06, "loss": 0.0302, "step": 40800 }, { "epoch": 29.897435897435898, "grad_norm": 0.400390625, "learning_rate": 6.643743325203058e-06, "loss": 0.0344, "step": 40810 }, { "epoch": 29.904761904761905, "grad_norm": 0.3828125, "learning_rate": 6.627865318056843e-06, "loss": 0.0281, "step": 40820 }, { "epoch": 29.912087912087912, "grad_norm": 0.34765625, "learning_rate": 6.612008331489744e-06, "loss": 0.0316, "step": 40830 }, { "epoch": 29.91941391941392, "grad_norm": 0.74609375, "learning_rate": 6.59617237309938e-06, "loss": 0.0318, "step": 40840 }, { "epoch": 29.926739926739927, "grad_norm": 0.494140625, "learning_rate": 6.5803574504732595e-06, "loss": 0.0307, "step": 40850 }, { "epoch": 29.934065934065934, "grad_norm": 0.48046875, "learning_rate": 6.564563571188834e-06, "loss": 0.0322, "step": 40860 }, { "epoch": 29.94139194139194, "grad_norm": 0.609375, "learning_rate": 6.5487907428134874e-06, "loss": 0.037, "step": 40870 }, { "epoch": 29.94871794871795, "grad_norm": 0.50390625, "learning_rate": 6.533038972904479e-06, "loss": 0.0294, "step": 40880 }, { "epoch": 29.956043956043956, "grad_norm": 0.54296875, "learning_rate": 6.517308269009008e-06, "loss": 0.0342, "step": 40890 }, { "epoch": 29.963369963369964, "grad_norm": 0.294921875, "learning_rate": 6.501598638664177e-06, "loss": 0.0318, "step": 40900 }, { "epoch": 29.97069597069597, "grad_norm": 0.35546875, "learning_rate": 6.485910089396967e-06, "loss": 0.0344, "step": 40910 }, { "epoch": 29.978021978021978, "grad_norm": 0.291015625, "learning_rate": 6.470242628724297e-06, "loss": 0.0282, "step": 40920 }, { "epoch": 29.985347985347985, "grad_norm": 0.64453125, "learning_rate": 6.45459626415295e-06, "loss": 0.0365, "step": 40930 }, { "epoch": 29.992673992673993, "grad_norm": 0.47265625, "learning_rate": 6.4389710031796096e-06, "loss": 0.031, "step": 40940 }, { "epoch": 30.0, "grad_norm": 0.337890625, "learning_rate": 6.42336685329086e-06, "loss": 0.0378, "step": 40950 }, { "epoch": 30.007326007326007, "grad_norm": 0.302734375, "learning_rate": 6.407783821963165e-06, "loss": 0.0373, "step": 40960 }, { "epoch": 30.014652014652015, "grad_norm": 0.3671875, "learning_rate": 6.39222191666285e-06, "loss": 0.0324, "step": 40970 }, { "epoch": 30.021978021978022, "grad_norm": 0.416015625, "learning_rate": 6.376681144846146e-06, "loss": 0.033, "step": 40980 }, { "epoch": 30.02930402930403, "grad_norm": 0.396484375, "learning_rate": 6.3611615139591376e-06, "loss": 0.0285, "step": 40990 }, { "epoch": 30.036630036630036, "grad_norm": 0.384765625, "learning_rate": 6.345663031437799e-06, "loss": 0.0334, "step": 41000 }, { "epoch": 30.043956043956044, "grad_norm": 0.310546875, "learning_rate": 6.330185704707954e-06, "loss": 0.032, "step": 41010 }, { "epoch": 30.05128205128205, "grad_norm": 0.37890625, "learning_rate": 6.3147295411853064e-06, "loss": 0.0304, "step": 41020 }, { "epoch": 30.05860805860806, "grad_norm": 0.435546875, "learning_rate": 6.299294548275407e-06, "loss": 0.0311, "step": 41030 }, { "epoch": 30.065934065934066, "grad_norm": 0.2470703125, "learning_rate": 6.283880733373658e-06, "loss": 0.0272, "step": 41040 }, { "epoch": 30.073260073260073, "grad_norm": 0.67578125, "learning_rate": 6.2684881038653415e-06, "loss": 0.0343, "step": 41050 }, { "epoch": 30.08058608058608, "grad_norm": 0.349609375, "learning_rate": 6.253116667125565e-06, "loss": 0.031, "step": 41060 }, { "epoch": 30.087912087912088, "grad_norm": 0.482421875, "learning_rate": 6.237766430519274e-06, "loss": 0.0346, "step": 41070 }, { "epoch": 30.095238095238095, "grad_norm": 0.65234375, "learning_rate": 6.2224374014013026e-06, "loss": 0.0328, "step": 41080 }, { "epoch": 30.102564102564102, "grad_norm": 0.5078125, "learning_rate": 6.20712958711625e-06, "loss": 0.0295, "step": 41090 }, { "epoch": 30.10989010989011, "grad_norm": 0.4765625, "learning_rate": 6.1918429949986184e-06, "loss": 0.0308, "step": 41100 }, { "epoch": 30.117216117216117, "grad_norm": 0.44140625, "learning_rate": 6.1765776323727085e-06, "loss": 0.0335, "step": 41110 }, { "epoch": 30.124542124542124, "grad_norm": 0.287109375, "learning_rate": 6.161333506552661e-06, "loss": 0.0313, "step": 41120 }, { "epoch": 30.13186813186813, "grad_norm": 0.5078125, "learning_rate": 6.1461106248424385e-06, "loss": 0.0319, "step": 41130 }, { "epoch": 30.13919413919414, "grad_norm": 0.5546875, "learning_rate": 6.13090899453581e-06, "loss": 0.0331, "step": 41140 }, { "epoch": 30.146520146520146, "grad_norm": 0.3984375, "learning_rate": 6.115728622916392e-06, "loss": 0.0349, "step": 41150 }, { "epoch": 30.153846153846153, "grad_norm": 0.515625, "learning_rate": 6.100569517257587e-06, "loss": 0.0316, "step": 41160 }, { "epoch": 30.16117216117216, "grad_norm": 0.458984375, "learning_rate": 6.085431684822613e-06, "loss": 0.0321, "step": 41170 }, { "epoch": 30.168498168498168, "grad_norm": 0.314453125, "learning_rate": 6.070315132864529e-06, "loss": 0.0333, "step": 41180 }, { "epoch": 30.175824175824175, "grad_norm": 0.400390625, "learning_rate": 6.05521986862614e-06, "loss": 0.0329, "step": 41190 }, { "epoch": 30.183150183150182, "grad_norm": 0.46875, "learning_rate": 6.0401458993401106e-06, "loss": 0.0338, "step": 41200 }, { "epoch": 30.19047619047619, "grad_norm": 0.4296875, "learning_rate": 6.0250932322288625e-06, "loss": 0.0348, "step": 41210 }, { "epoch": 30.197802197802197, "grad_norm": 0.404296875, "learning_rate": 6.010061874504606e-06, "loss": 0.0331, "step": 41220 }, { "epoch": 30.205128205128204, "grad_norm": 0.29296875, "learning_rate": 5.995051833369382e-06, "loss": 0.0295, "step": 41230 }, { "epoch": 30.21245421245421, "grad_norm": 0.5, "learning_rate": 5.980063116014972e-06, "loss": 0.0326, "step": 41240 }, { "epoch": 30.21978021978022, "grad_norm": 0.4296875, "learning_rate": 5.9650957296229884e-06, "loss": 0.0356, "step": 41250 }, { "epoch": 30.227106227106226, "grad_norm": 0.435546875, "learning_rate": 5.950149681364775e-06, "loss": 0.0333, "step": 41260 }, { "epoch": 30.234432234432234, "grad_norm": 0.41015625, "learning_rate": 5.935224978401474e-06, "loss": 0.0291, "step": 41270 }, { "epoch": 30.24175824175824, "grad_norm": 0.318359375, "learning_rate": 5.920321627884019e-06, "loss": 0.0319, "step": 41280 }, { "epoch": 30.249084249084248, "grad_norm": 0.404296875, "learning_rate": 5.905439636953086e-06, "loss": 0.0293, "step": 41290 }, { "epoch": 30.256410256410255, "grad_norm": 0.75, "learning_rate": 5.89057901273911e-06, "loss": 0.0344, "step": 41300 }, { "epoch": 30.263736263736263, "grad_norm": 1.0078125, "learning_rate": 5.875739762362321e-06, "loss": 0.0333, "step": 41310 }, { "epoch": 30.27106227106227, "grad_norm": 0.46484375, "learning_rate": 5.86092189293268e-06, "loss": 0.0305, "step": 41320 }, { "epoch": 30.278388278388277, "grad_norm": 0.37109375, "learning_rate": 5.846125411549926e-06, "loss": 0.0326, "step": 41330 }, { "epoch": 30.285714285714285, "grad_norm": 0.318359375, "learning_rate": 5.831350325303533e-06, "loss": 0.0288, "step": 41340 }, { "epoch": 30.293040293040292, "grad_norm": 0.8359375, "learning_rate": 5.816596641272733e-06, "loss": 0.0356, "step": 41350 }, { "epoch": 30.3003663003663, "grad_norm": 0.318359375, "learning_rate": 5.801864366526491e-06, "loss": 0.0325, "step": 41360 }, { "epoch": 30.307692307692307, "grad_norm": 0.4296875, "learning_rate": 5.787153508123522e-06, "loss": 0.0323, "step": 41370 }, { "epoch": 30.315018315018314, "grad_norm": 0.478515625, "learning_rate": 5.7724640731123e-06, "loss": 0.0331, "step": 41380 }, { "epoch": 30.32234432234432, "grad_norm": 0.39453125, "learning_rate": 5.757796068530996e-06, "loss": 0.0376, "step": 41390 }, { "epoch": 30.32967032967033, "grad_norm": 0.390625, "learning_rate": 5.74314950140753e-06, "loss": 0.0313, "step": 41400 }, { "epoch": 30.336996336996336, "grad_norm": 0.337890625, "learning_rate": 5.7285243787595755e-06, "loss": 0.0365, "step": 41410 }, { "epoch": 30.344322344322343, "grad_norm": 0.498046875, "learning_rate": 5.713920707594478e-06, "loss": 0.0345, "step": 41420 }, { "epoch": 30.35164835164835, "grad_norm": 0.4375, "learning_rate": 5.699338494909365e-06, "loss": 0.0387, "step": 41430 }, { "epoch": 30.358974358974358, "grad_norm": 0.330078125, "learning_rate": 5.684777747691042e-06, "loss": 0.0353, "step": 41440 }, { "epoch": 30.366300366300365, "grad_norm": 0.484375, "learning_rate": 5.670238472916028e-06, "loss": 0.0348, "step": 41450 }, { "epoch": 30.373626373626372, "grad_norm": 0.392578125, "learning_rate": 5.655720677550585e-06, "loss": 0.03, "step": 41460 }, { "epoch": 30.38095238095238, "grad_norm": 0.431640625, "learning_rate": 5.641224368550658e-06, "loss": 0.0307, "step": 41470 }, { "epoch": 30.388278388278387, "grad_norm": 0.451171875, "learning_rate": 5.626749552861911e-06, "loss": 0.0294, "step": 41480 }, { "epoch": 30.395604395604394, "grad_norm": 0.373046875, "learning_rate": 5.61229623741969e-06, "loss": 0.0334, "step": 41490 }, { "epoch": 30.4029304029304, "grad_norm": 0.51171875, "learning_rate": 5.597864429149044e-06, "loss": 0.0296, "step": 41500 }, { "epoch": 30.41025641025641, "grad_norm": 0.51953125, "learning_rate": 5.583454134964757e-06, "loss": 0.0298, "step": 41510 }, { "epoch": 30.417582417582416, "grad_norm": 0.55859375, "learning_rate": 5.569065361771253e-06, "loss": 0.0294, "step": 41520 }, { "epoch": 30.424908424908423, "grad_norm": 0.6171875, "learning_rate": 5.554698116462663e-06, "loss": 0.0376, "step": 41530 }, { "epoch": 30.43223443223443, "grad_norm": 0.75, "learning_rate": 5.540352405922817e-06, "loss": 0.0306, "step": 41540 }, { "epoch": 30.439560439560438, "grad_norm": 0.53515625, "learning_rate": 5.526028237025203e-06, "loss": 0.0351, "step": 41550 }, { "epoch": 30.446886446886445, "grad_norm": 0.357421875, "learning_rate": 5.511725616633019e-06, "loss": 0.033, "step": 41560 }, { "epoch": 30.454212454212453, "grad_norm": 0.4140625, "learning_rate": 5.4974445515991085e-06, "loss": 0.0331, "step": 41570 }, { "epoch": 30.46153846153846, "grad_norm": 0.39453125, "learning_rate": 5.483185048765987e-06, "loss": 0.0289, "step": 41580 }, { "epoch": 30.468864468864467, "grad_norm": 0.5859375, "learning_rate": 5.4689471149658775e-06, "loss": 0.0318, "step": 41590 }, { "epoch": 30.476190476190474, "grad_norm": 0.33984375, "learning_rate": 5.454730757020611e-06, "loss": 0.0306, "step": 41600 }, { "epoch": 30.483516483516482, "grad_norm": 0.46484375, "learning_rate": 5.4405359817417305e-06, "loss": 0.0317, "step": 41610 }, { "epoch": 30.49084249084249, "grad_norm": 0.435546875, "learning_rate": 5.426362795930419e-06, "loss": 0.0377, "step": 41620 }, { "epoch": 30.498168498168496, "grad_norm": 0.328125, "learning_rate": 5.412211206377496e-06, "loss": 0.0295, "step": 41630 }, { "epoch": 30.505494505494504, "grad_norm": 0.349609375, "learning_rate": 5.398081219863474e-06, "loss": 0.0355, "step": 41640 }, { "epoch": 30.51282051282051, "grad_norm": 0.419921875, "learning_rate": 5.383972843158473e-06, "loss": 0.0336, "step": 41650 }, { "epoch": 30.520146520146522, "grad_norm": 0.408203125, "learning_rate": 5.3698860830223046e-06, "loss": 0.0327, "step": 41660 }, { "epoch": 30.52747252747253, "grad_norm": 0.34375, "learning_rate": 5.355820946204383e-06, "loss": 0.0312, "step": 41670 }, { "epoch": 30.534798534798536, "grad_norm": 0.3125, "learning_rate": 5.341777439443765e-06, "loss": 0.0315, "step": 41680 }, { "epoch": 30.542124542124544, "grad_norm": 0.515625, "learning_rate": 5.327755569469181e-06, "loss": 0.0372, "step": 41690 }, { "epoch": 30.54945054945055, "grad_norm": 0.546875, "learning_rate": 5.313755342998955e-06, "loss": 0.0298, "step": 41700 }, { "epoch": 30.55677655677656, "grad_norm": 0.30078125, "learning_rate": 5.299776766741044e-06, "loss": 0.0317, "step": 41710 }, { "epoch": 30.564102564102566, "grad_norm": 0.369140625, "learning_rate": 5.285819847393079e-06, "loss": 0.0353, "step": 41720 }, { "epoch": 30.571428571428573, "grad_norm": 0.41015625, "learning_rate": 5.271884591642226e-06, "loss": 0.0382, "step": 41730 }, { "epoch": 30.57875457875458, "grad_norm": 0.373046875, "learning_rate": 5.257971006165368e-06, "loss": 0.0318, "step": 41740 }, { "epoch": 30.586080586080588, "grad_norm": 0.50390625, "learning_rate": 5.244079097628937e-06, "loss": 0.0323, "step": 41750 }, { "epoch": 30.593406593406595, "grad_norm": 0.419921875, "learning_rate": 5.230208872689001e-06, "loss": 0.0295, "step": 41760 }, { "epoch": 30.600732600732602, "grad_norm": 0.3359375, "learning_rate": 5.216360337991255e-06, "loss": 0.0339, "step": 41770 }, { "epoch": 30.60805860805861, "grad_norm": 0.4765625, "learning_rate": 5.20253350017097e-06, "loss": 0.0331, "step": 41780 }, { "epoch": 30.615384615384617, "grad_norm": 0.482421875, "learning_rate": 5.188728365853055e-06, "loss": 0.0325, "step": 41790 }, { "epoch": 30.622710622710624, "grad_norm": 0.4765625, "learning_rate": 5.174944941651993e-06, "loss": 0.0347, "step": 41800 }, { "epoch": 30.63003663003663, "grad_norm": 0.4765625, "learning_rate": 5.161183234171865e-06, "loss": 0.0348, "step": 41810 }, { "epoch": 30.63736263736264, "grad_norm": 0.30859375, "learning_rate": 5.147443250006374e-06, "loss": 0.0323, "step": 41820 }, { "epoch": 30.644688644688646, "grad_norm": 0.4765625, "learning_rate": 5.133724995738796e-06, "loss": 0.0384, "step": 41830 }, { "epoch": 30.652014652014653, "grad_norm": 0.265625, "learning_rate": 5.120028477941979e-06, "loss": 0.032, "step": 41840 }, { "epoch": 30.65934065934066, "grad_norm": 0.734375, "learning_rate": 5.106353703178399e-06, "loss": 0.0319, "step": 41850 }, { "epoch": 30.666666666666668, "grad_norm": 0.4140625, "learning_rate": 5.0927006780000615e-06, "loss": 0.0318, "step": 41860 }, { "epoch": 30.673992673992675, "grad_norm": 0.37890625, "learning_rate": 5.079069408948602e-06, "loss": 0.0349, "step": 41870 }, { "epoch": 30.681318681318682, "grad_norm": 0.431640625, "learning_rate": 5.065459902555202e-06, "loss": 0.0291, "step": 41880 }, { "epoch": 30.68864468864469, "grad_norm": 0.341796875, "learning_rate": 5.051872165340611e-06, "loss": 0.0337, "step": 41890 }, { "epoch": 30.695970695970697, "grad_norm": 0.341796875, "learning_rate": 5.0383062038151755e-06, "loss": 0.0347, "step": 41900 }, { "epoch": 30.703296703296704, "grad_norm": 0.373046875, "learning_rate": 5.0247620244787795e-06, "loss": 0.0315, "step": 41910 }, { "epoch": 30.71062271062271, "grad_norm": 0.33984375, "learning_rate": 5.011239633820899e-06, "loss": 0.0361, "step": 41920 }, { "epoch": 30.71794871794872, "grad_norm": 0.427734375, "learning_rate": 4.997739038320549e-06, "loss": 0.0364, "step": 41930 }, { "epoch": 30.725274725274726, "grad_norm": 0.58984375, "learning_rate": 4.984260244446296e-06, "loss": 0.0324, "step": 41940 }, { "epoch": 30.732600732600734, "grad_norm": 0.6484375, "learning_rate": 4.970803258656291e-06, "loss": 0.0355, "step": 41950 }, { "epoch": 30.73992673992674, "grad_norm": 0.291015625, "learning_rate": 4.957368087398206e-06, "loss": 0.0332, "step": 41960 }, { "epoch": 30.747252747252748, "grad_norm": 0.36328125, "learning_rate": 4.943954737109276e-06, "loss": 0.0324, "step": 41970 }, { "epoch": 30.754578754578755, "grad_norm": 0.66796875, "learning_rate": 4.930563214216276e-06, "loss": 0.0361, "step": 41980 }, { "epoch": 30.761904761904763, "grad_norm": 0.416015625, "learning_rate": 4.9171935251355145e-06, "loss": 0.0306, "step": 41990 }, { "epoch": 30.76923076923077, "grad_norm": 0.388671875, "learning_rate": 4.903845676272865e-06, "loss": 0.0327, "step": 42000 }, { "epoch": 30.776556776556777, "grad_norm": 0.80859375, "learning_rate": 4.8905196740237025e-06, "loss": 0.0357, "step": 42010 }, { "epoch": 30.783882783882785, "grad_norm": 0.34765625, "learning_rate": 4.877215524772976e-06, "loss": 0.0307, "step": 42020 }, { "epoch": 30.791208791208792, "grad_norm": 0.37890625, "learning_rate": 4.86393323489512e-06, "loss": 0.0331, "step": 42030 }, { "epoch": 30.7985347985348, "grad_norm": 0.462890625, "learning_rate": 4.8506728107541145e-06, "loss": 0.0355, "step": 42040 }, { "epoch": 30.805860805860807, "grad_norm": 0.427734375, "learning_rate": 4.837434258703477e-06, "loss": 0.0304, "step": 42050 }, { "epoch": 30.813186813186814, "grad_norm": 0.45703125, "learning_rate": 4.824217585086225e-06, "loss": 0.031, "step": 42060 }, { "epoch": 30.82051282051282, "grad_norm": 0.38671875, "learning_rate": 4.811022796234894e-06, "loss": 0.0376, "step": 42070 }, { "epoch": 30.82783882783883, "grad_norm": 0.6875, "learning_rate": 4.797849898471554e-06, "loss": 0.0318, "step": 42080 }, { "epoch": 30.835164835164836, "grad_norm": 0.5078125, "learning_rate": 4.784698898107763e-06, "loss": 0.0318, "step": 42090 }, { "epoch": 30.842490842490843, "grad_norm": 0.55859375, "learning_rate": 4.771569801444609e-06, "loss": 0.0315, "step": 42100 }, { "epoch": 30.84981684981685, "grad_norm": 0.404296875, "learning_rate": 4.758462614772657e-06, "loss": 0.0295, "step": 42110 }, { "epoch": 30.857142857142858, "grad_norm": 0.75390625, "learning_rate": 4.7453773443719875e-06, "loss": 0.0333, "step": 42120 }, { "epoch": 30.864468864468865, "grad_norm": 0.3046875, "learning_rate": 4.732313996512204e-06, "loss": 0.0295, "step": 42130 }, { "epoch": 30.871794871794872, "grad_norm": 0.43359375, "learning_rate": 4.7192725774523594e-06, "loss": 0.0319, "step": 42140 }, { "epoch": 30.87912087912088, "grad_norm": 0.65625, "learning_rate": 4.706253093441046e-06, "loss": 0.0389, "step": 42150 }, { "epoch": 30.886446886446887, "grad_norm": 0.41015625, "learning_rate": 4.693255550716322e-06, "loss": 0.0314, "step": 42160 }, { "epoch": 30.893772893772894, "grad_norm": 0.37890625, "learning_rate": 4.680279955505727e-06, "loss": 0.0301, "step": 42170 }, { "epoch": 30.9010989010989, "grad_norm": 0.376953125, "learning_rate": 4.667326314026304e-06, "loss": 0.0302, "step": 42180 }, { "epoch": 30.90842490842491, "grad_norm": 0.6953125, "learning_rate": 4.654394632484568e-06, "loss": 0.0318, "step": 42190 }, { "epoch": 30.915750915750916, "grad_norm": 0.8984375, "learning_rate": 4.641484917076497e-06, "loss": 0.0335, "step": 42200 }, { "epoch": 30.923076923076923, "grad_norm": 0.765625, "learning_rate": 4.628597173987581e-06, "loss": 0.0317, "step": 42210 }, { "epoch": 30.93040293040293, "grad_norm": 0.3671875, "learning_rate": 4.615731409392742e-06, "loss": 0.0294, "step": 42220 }, { "epoch": 30.937728937728938, "grad_norm": 0.412109375, "learning_rate": 4.602887629456406e-06, "loss": 0.0272, "step": 42230 }, { "epoch": 30.945054945054945, "grad_norm": 0.30078125, "learning_rate": 4.590065840332436e-06, "loss": 0.0361, "step": 42240 }, { "epoch": 30.952380952380953, "grad_norm": 0.455078125, "learning_rate": 4.577266048164176e-06, "loss": 0.0373, "step": 42250 }, { "epoch": 30.95970695970696, "grad_norm": 0.287109375, "learning_rate": 4.564488259084437e-06, "loss": 0.033, "step": 42260 }, { "epoch": 30.967032967032967, "grad_norm": 0.3828125, "learning_rate": 4.551732479215451e-06, "loss": 0.0307, "step": 42270 }, { "epoch": 30.974358974358974, "grad_norm": 0.349609375, "learning_rate": 4.538998714668959e-06, "loss": 0.0348, "step": 42280 }, { "epoch": 30.98168498168498, "grad_norm": 0.28515625, "learning_rate": 4.5262869715461106e-06, "loss": 0.0321, "step": 42290 }, { "epoch": 30.98901098901099, "grad_norm": 0.384765625, "learning_rate": 4.513597255937513e-06, "loss": 0.0332, "step": 42300 }, { "epoch": 30.996336996336996, "grad_norm": 0.333984375, "learning_rate": 4.5009295739232456e-06, "loss": 0.033, "step": 42310 }, { "epoch": 31.003663003663004, "grad_norm": 0.357421875, "learning_rate": 4.488283931572787e-06, "loss": 0.0301, "step": 42320 }, { "epoch": 31.01098901098901, "grad_norm": 0.439453125, "learning_rate": 4.4756603349451e-06, "loss": 0.033, "step": 42330 }, { "epoch": 31.01831501831502, "grad_norm": 0.37109375, "learning_rate": 4.46305879008856e-06, "loss": 0.0328, "step": 42340 }, { "epoch": 31.025641025641026, "grad_norm": 0.5390625, "learning_rate": 4.450479303040961e-06, "loss": 0.0281, "step": 42350 }, { "epoch": 31.032967032967033, "grad_norm": 0.478515625, "learning_rate": 4.437921879829583e-06, "loss": 0.0352, "step": 42360 }, { "epoch": 31.04029304029304, "grad_norm": 0.48828125, "learning_rate": 4.425386526471061e-06, "loss": 0.0305, "step": 42370 }, { "epoch": 31.047619047619047, "grad_norm": 0.412109375, "learning_rate": 4.412873248971521e-06, "loss": 0.0346, "step": 42380 }, { "epoch": 31.054945054945055, "grad_norm": 0.34375, "learning_rate": 4.400382053326474e-06, "loss": 0.032, "step": 42390 }, { "epoch": 31.062271062271062, "grad_norm": 0.376953125, "learning_rate": 4.3879129455208615e-06, "loss": 0.0295, "step": 42400 }, { "epoch": 31.06959706959707, "grad_norm": 0.2890625, "learning_rate": 4.375465931529045e-06, "loss": 0.0323, "step": 42410 }, { "epoch": 31.076923076923077, "grad_norm": 0.34375, "learning_rate": 4.363041017314799e-06, "loss": 0.0322, "step": 42420 }, { "epoch": 31.084249084249084, "grad_norm": 0.392578125, "learning_rate": 4.350638208831299e-06, "loss": 0.0292, "step": 42430 }, { "epoch": 31.09157509157509, "grad_norm": 0.72265625, "learning_rate": 4.33825751202115e-06, "loss": 0.0337, "step": 42440 }, { "epoch": 31.0989010989011, "grad_norm": 0.5234375, "learning_rate": 4.3258989328163385e-06, "loss": 0.0318, "step": 42450 }, { "epoch": 31.106227106227106, "grad_norm": 0.609375, "learning_rate": 4.313562477138274e-06, "loss": 0.0351, "step": 42460 }, { "epoch": 31.113553113553113, "grad_norm": 0.53515625, "learning_rate": 4.301248150897753e-06, "loss": 0.032, "step": 42470 }, { "epoch": 31.12087912087912, "grad_norm": 0.70703125, "learning_rate": 4.288955959994978e-06, "loss": 0.0375, "step": 42480 }, { "epoch": 31.128205128205128, "grad_norm": 0.5078125, "learning_rate": 4.276685910319532e-06, "loss": 0.0315, "step": 42490 }, { "epoch": 31.135531135531135, "grad_norm": 0.458984375, "learning_rate": 4.264438007750397e-06, "loss": 0.0296, "step": 42500 }, { "epoch": 31.142857142857142, "grad_norm": 0.7734375, "learning_rate": 4.2522122581559616e-06, "loss": 0.0338, "step": 42510 }, { "epoch": 31.15018315018315, "grad_norm": 0.421875, "learning_rate": 4.240008667393969e-06, "loss": 0.0295, "step": 42520 }, { "epoch": 31.157509157509157, "grad_norm": 0.49609375, "learning_rate": 4.227827241311558e-06, "loss": 0.0341, "step": 42530 }, { "epoch": 31.164835164835164, "grad_norm": 0.42578125, "learning_rate": 4.215667985745264e-06, "loss": 0.0271, "step": 42540 }, { "epoch": 31.17216117216117, "grad_norm": 0.361328125, "learning_rate": 4.2035309065209665e-06, "loss": 0.0301, "step": 42550 }, { "epoch": 31.17948717948718, "grad_norm": 0.625, "learning_rate": 4.191416009453959e-06, "loss": 0.0333, "step": 42560 }, { "epoch": 31.186813186813186, "grad_norm": 0.330078125, "learning_rate": 4.179323300348877e-06, "loss": 0.0334, "step": 42570 }, { "epoch": 31.194139194139193, "grad_norm": 0.333984375, "learning_rate": 4.16725278499972e-06, "loss": 0.033, "step": 42580 }, { "epoch": 31.2014652014652, "grad_norm": 0.322265625, "learning_rate": 4.15520446918989e-06, "loss": 0.0338, "step": 42590 }, { "epoch": 31.208791208791208, "grad_norm": 0.4921875, "learning_rate": 4.143178358692121e-06, "loss": 0.0298, "step": 42600 }, { "epoch": 31.216117216117215, "grad_norm": 0.36328125, "learning_rate": 4.131174459268521e-06, "loss": 0.0398, "step": 42610 }, { "epoch": 31.223443223443223, "grad_norm": 0.32421875, "learning_rate": 4.119192776670543e-06, "loss": 0.0344, "step": 42620 }, { "epoch": 31.23076923076923, "grad_norm": 0.3203125, "learning_rate": 4.107233316639009e-06, "loss": 0.0297, "step": 42630 }, { "epoch": 31.238095238095237, "grad_norm": 0.458984375, "learning_rate": 4.095296084904092e-06, "loss": 0.0335, "step": 42640 }, { "epoch": 31.245421245421245, "grad_norm": 0.337890625, "learning_rate": 4.083381087185303e-06, "loss": 0.0366, "step": 42650 }, { "epoch": 31.252747252747252, "grad_norm": 0.361328125, "learning_rate": 4.071488329191517e-06, "loss": 0.0298, "step": 42660 }, { "epoch": 31.26007326007326, "grad_norm": 0.478515625, "learning_rate": 4.059617816620941e-06, "loss": 0.0316, "step": 42670 }, { "epoch": 31.267399267399266, "grad_norm": 0.4453125, "learning_rate": 4.047769555161124e-06, "loss": 0.0328, "step": 42680 }, { "epoch": 31.274725274725274, "grad_norm": 0.51953125, "learning_rate": 4.035943550488966e-06, "loss": 0.0326, "step": 42690 }, { "epoch": 31.28205128205128, "grad_norm": 0.53125, "learning_rate": 4.024139808270693e-06, "loss": 0.0407, "step": 42700 }, { "epoch": 31.28937728937729, "grad_norm": 0.3046875, "learning_rate": 4.012358334161846e-06, "loss": 0.0283, "step": 42710 }, { "epoch": 31.296703296703296, "grad_norm": 0.62109375, "learning_rate": 4.00059913380734e-06, "loss": 0.0389, "step": 42720 }, { "epoch": 31.304029304029303, "grad_norm": 0.365234375, "learning_rate": 3.988862212841384e-06, "loss": 0.0289, "step": 42730 }, { "epoch": 31.31135531135531, "grad_norm": 0.390625, "learning_rate": 3.977147576887525e-06, "loss": 0.0331, "step": 42740 }, { "epoch": 31.318681318681318, "grad_norm": 0.3828125, "learning_rate": 3.965455231558625e-06, "loss": 0.0336, "step": 42750 }, { "epoch": 31.326007326007325, "grad_norm": 0.408203125, "learning_rate": 3.953785182456861e-06, "loss": 0.0277, "step": 42760 }, { "epoch": 31.333333333333332, "grad_norm": 0.58984375, "learning_rate": 3.9421374351737555e-06, "loss": 0.0321, "step": 42770 }, { "epoch": 31.34065934065934, "grad_norm": 0.64453125, "learning_rate": 3.9305119952901066e-06, "loss": 0.0322, "step": 42780 }, { "epoch": 31.347985347985347, "grad_norm": 0.466796875, "learning_rate": 3.918908868376066e-06, "loss": 0.0371, "step": 42790 }, { "epoch": 31.355311355311354, "grad_norm": 0.73046875, "learning_rate": 3.907328059991063e-06, "loss": 0.0316, "step": 42800 }, { "epoch": 31.36263736263736, "grad_norm": 0.62890625, "learning_rate": 3.895769575683834e-06, "loss": 0.0316, "step": 42810 }, { "epoch": 31.36996336996337, "grad_norm": 0.38671875, "learning_rate": 3.88423342099244e-06, "loss": 0.0343, "step": 42820 }, { "epoch": 31.377289377289376, "grad_norm": 0.421875, "learning_rate": 3.872719601444224e-06, "loss": 0.0308, "step": 42830 }, { "epoch": 31.384615384615383, "grad_norm": 0.326171875, "learning_rate": 3.861228122555839e-06, "loss": 0.0351, "step": 42840 }, { "epoch": 31.39194139194139, "grad_norm": 0.37109375, "learning_rate": 3.849758989833229e-06, "loss": 0.032, "step": 42850 }, { "epoch": 31.399267399267398, "grad_norm": 0.37890625, "learning_rate": 3.838312208771631e-06, "loss": 0.0349, "step": 42860 }, { "epoch": 31.406593406593405, "grad_norm": 0.298828125, "learning_rate": 3.8268877848555764e-06, "loss": 0.0323, "step": 42870 }, { "epoch": 31.413919413919412, "grad_norm": 0.40625, "learning_rate": 3.815485723558872e-06, "loss": 0.0332, "step": 42880 }, { "epoch": 31.42124542124542, "grad_norm": 0.49609375, "learning_rate": 3.8041060303446202e-06, "loss": 0.0299, "step": 42890 }, { "epoch": 31.428571428571427, "grad_norm": 0.3359375, "learning_rate": 3.792748710665221e-06, "loss": 0.033, "step": 42900 }, { "epoch": 31.435897435897434, "grad_norm": 0.55859375, "learning_rate": 3.7814137699623207e-06, "loss": 0.0307, "step": 42910 }, { "epoch": 31.44322344322344, "grad_norm": 0.67578125, "learning_rate": 3.770101213666874e-06, "loss": 0.038, "step": 42920 }, { "epoch": 31.45054945054945, "grad_norm": 0.34375, "learning_rate": 3.758811047199096e-06, "loss": 0.0291, "step": 42930 }, { "epoch": 31.457875457875456, "grad_norm": 0.478515625, "learning_rate": 3.7475432759684624e-06, "loss": 0.0378, "step": 42940 }, { "epoch": 31.465201465201464, "grad_norm": 0.361328125, "learning_rate": 3.7362979053737563e-06, "loss": 0.0282, "step": 42950 }, { "epoch": 31.47252747252747, "grad_norm": 0.498046875, "learning_rate": 3.725074940802984e-06, "loss": 0.0317, "step": 42960 }, { "epoch": 31.479853479853478, "grad_norm": 0.421875, "learning_rate": 3.713874387633434e-06, "loss": 0.0336, "step": 42970 }, { "epoch": 31.487179487179485, "grad_norm": 0.328125, "learning_rate": 3.7026962512316785e-06, "loss": 0.0311, "step": 42980 }, { "epoch": 31.494505494505496, "grad_norm": 0.423828125, "learning_rate": 3.6915405369535145e-06, "loss": 0.0305, "step": 42990 }, { "epoch": 31.501831501831504, "grad_norm": 0.38671875, "learning_rate": 3.680407250144015e-06, "loss": 0.0312, "step": 43000 }, { "epoch": 31.50915750915751, "grad_norm": 0.390625, "learning_rate": 3.6692963961374936e-06, "loss": 0.0318, "step": 43010 }, { "epoch": 31.516483516483518, "grad_norm": 0.3515625, "learning_rate": 3.658207980257542e-06, "loss": 0.034, "step": 43020 }, { "epoch": 31.523809523809526, "grad_norm": 0.40234375, "learning_rate": 3.6471420078169754e-06, "loss": 0.033, "step": 43030 }, { "epoch": 31.531135531135533, "grad_norm": 0.8984375, "learning_rate": 3.6360984841178572e-06, "loss": 0.0375, "step": 43040 }, { "epoch": 31.53846153846154, "grad_norm": 0.302734375, "learning_rate": 3.6250774144515107e-06, "loss": 0.0293, "step": 43050 }, { "epoch": 31.545787545787547, "grad_norm": 0.451171875, "learning_rate": 3.6140788040984973e-06, "loss": 0.0334, "step": 43060 }, { "epoch": 31.553113553113555, "grad_norm": 0.263671875, "learning_rate": 3.603102658328594e-06, "loss": 0.0322, "step": 43070 }, { "epoch": 31.560439560439562, "grad_norm": 0.39453125, "learning_rate": 3.5921489824008523e-06, "loss": 0.0313, "step": 43080 }, { "epoch": 31.56776556776557, "grad_norm": 0.6796875, "learning_rate": 3.5812177815635303e-06, "loss": 0.0337, "step": 43090 }, { "epoch": 31.575091575091577, "grad_norm": 0.40234375, "learning_rate": 3.57030906105411e-06, "loss": 0.0308, "step": 43100 }, { "epoch": 31.582417582417584, "grad_norm": 0.451171875, "learning_rate": 3.5594228260993427e-06, "loss": 0.0312, "step": 43110 }, { "epoch": 31.58974358974359, "grad_norm": 0.32421875, "learning_rate": 3.548559081915168e-06, "loss": 0.0311, "step": 43120 }, { "epoch": 31.5970695970696, "grad_norm": 0.279296875, "learning_rate": 3.537717833706764e-06, "loss": 0.0348, "step": 43130 }, { "epoch": 31.604395604395606, "grad_norm": 0.380859375, "learning_rate": 3.52689908666852e-06, "loss": 0.0303, "step": 43140 }, { "epoch": 31.611721611721613, "grad_norm": 0.36328125, "learning_rate": 3.5161028459840657e-06, "loss": 0.034, "step": 43150 }, { "epoch": 31.61904761904762, "grad_norm": 0.5390625, "learning_rate": 3.505329116826226e-06, "loss": 0.0323, "step": 43160 }, { "epoch": 31.626373626373628, "grad_norm": 0.482421875, "learning_rate": 3.4945779043570492e-06, "loss": 0.0338, "step": 43170 }, { "epoch": 31.633699633699635, "grad_norm": 0.37890625, "learning_rate": 3.4838492137277967e-06, "loss": 0.0321, "step": 43180 }, { "epoch": 31.641025641025642, "grad_norm": 0.37890625, "learning_rate": 3.473143050078939e-06, "loss": 0.0268, "step": 43190 }, { "epoch": 31.64835164835165, "grad_norm": 0.3984375, "learning_rate": 3.462459418540136e-06, "loss": 0.0324, "step": 43200 }, { "epoch": 31.655677655677657, "grad_norm": 0.412109375, "learning_rate": 3.451798324230282e-06, "loss": 0.0309, "step": 43210 }, { "epoch": 31.663003663003664, "grad_norm": 0.451171875, "learning_rate": 3.441159772257449e-06, "loss": 0.0363, "step": 43220 }, { "epoch": 31.67032967032967, "grad_norm": 0.54296875, "learning_rate": 3.4305437677189244e-06, "loss": 0.035, "step": 43230 }, { "epoch": 31.67765567765568, "grad_norm": 0.384765625, "learning_rate": 3.4199503157011757e-06, "loss": 0.0273, "step": 43240 }, { "epoch": 31.684981684981686, "grad_norm": 0.5859375, "learning_rate": 3.409379421279877e-06, "loss": 0.0357, "step": 43250 }, { "epoch": 31.692307692307693, "grad_norm": 0.388671875, "learning_rate": 3.3988310895198923e-06, "loss": 0.032, "step": 43260 }, { "epoch": 31.6996336996337, "grad_norm": 0.404296875, "learning_rate": 3.3883053254752583e-06, "loss": 0.0327, "step": 43270 }, { "epoch": 31.706959706959708, "grad_norm": 0.3515625, "learning_rate": 3.377802134189237e-06, "loss": 0.0342, "step": 43280 }, { "epoch": 31.714285714285715, "grad_norm": 0.41015625, "learning_rate": 3.367321520694241e-06, "loss": 0.0315, "step": 43290 }, { "epoch": 31.721611721611723, "grad_norm": 0.39453125, "learning_rate": 3.356863490011866e-06, "loss": 0.0315, "step": 43300 }, { "epoch": 31.72893772893773, "grad_norm": 0.3828125, "learning_rate": 3.3464280471529225e-06, "loss": 0.0343, "step": 43310 }, { "epoch": 31.736263736263737, "grad_norm": 0.455078125, "learning_rate": 3.336015197117355e-06, "loss": 0.0332, "step": 43320 }, { "epoch": 31.743589743589745, "grad_norm": 0.578125, "learning_rate": 3.3256249448942968e-06, "loss": 0.0304, "step": 43330 }, { "epoch": 31.750915750915752, "grad_norm": 0.5859375, "learning_rate": 3.315257295462075e-06, "loss": 0.0335, "step": 43340 }, { "epoch": 31.75824175824176, "grad_norm": 0.32421875, "learning_rate": 3.304912253788158e-06, "loss": 0.0319, "step": 43350 }, { "epoch": 31.765567765567766, "grad_norm": 0.4609375, "learning_rate": 3.294589824829203e-06, "loss": 0.0316, "step": 43360 }, { "epoch": 31.772893772893774, "grad_norm": 0.296875, "learning_rate": 3.2842900135310216e-06, "loss": 0.0317, "step": 43370 }, { "epoch": 31.78021978021978, "grad_norm": 0.353515625, "learning_rate": 3.2740128248285963e-06, "loss": 0.0285, "step": 43380 }, { "epoch": 31.78754578754579, "grad_norm": 0.447265625, "learning_rate": 3.2637582636460586e-06, "loss": 0.0357, "step": 43390 }, { "epoch": 31.794871794871796, "grad_norm": 0.5625, "learning_rate": 3.2535263348966927e-06, "loss": 0.0335, "step": 43400 }, { "epoch": 31.802197802197803, "grad_norm": 0.3046875, "learning_rate": 3.243317043482979e-06, "loss": 0.0294, "step": 43410 }, { "epoch": 31.80952380952381, "grad_norm": 0.51171875, "learning_rate": 3.23313039429651e-06, "loss": 0.0337, "step": 43420 }, { "epoch": 31.816849816849818, "grad_norm": 0.30078125, "learning_rate": 3.2229663922180313e-06, "loss": 0.0351, "step": 43430 }, { "epoch": 31.824175824175825, "grad_norm": 0.5390625, "learning_rate": 3.212825042117472e-06, "loss": 0.0341, "step": 43440 }, { "epoch": 31.831501831501832, "grad_norm": 0.41796875, "learning_rate": 3.202706348853875e-06, "loss": 0.0318, "step": 43450 }, { "epoch": 31.83882783882784, "grad_norm": 0.5703125, "learning_rate": 3.1926103172754392e-06, "loss": 0.0315, "step": 43460 }, { "epoch": 31.846153846153847, "grad_norm": 0.703125, "learning_rate": 3.1825369522195016e-06, "loss": 0.0343, "step": 43470 }, { "epoch": 31.853479853479854, "grad_norm": 0.53125, "learning_rate": 3.1724862585125464e-06, "loss": 0.0313, "step": 43480 }, { "epoch": 31.86080586080586, "grad_norm": 0.609375, "learning_rate": 3.162458240970192e-06, "loss": 0.0401, "step": 43490 }, { "epoch": 31.86813186813187, "grad_norm": 0.71484375, "learning_rate": 3.1524529043971832e-06, "loss": 0.0334, "step": 43500 }, { "epoch": 31.875457875457876, "grad_norm": 0.359375, "learning_rate": 3.142470253587413e-06, "loss": 0.0341, "step": 43510 }, { "epoch": 31.882783882783883, "grad_norm": 0.3515625, "learning_rate": 3.1325102933238927e-06, "loss": 0.0343, "step": 43520 }, { "epoch": 31.89010989010989, "grad_norm": 0.470703125, "learning_rate": 3.1225730283787618e-06, "loss": 0.0312, "step": 43530 }, { "epoch": 31.897435897435898, "grad_norm": 0.43359375, "learning_rate": 3.1126584635133025e-06, "loss": 0.0316, "step": 43540 }, { "epoch": 31.904761904761905, "grad_norm": 0.4140625, "learning_rate": 3.1027666034778984e-06, "loss": 0.0327, "step": 43550 }, { "epoch": 31.912087912087912, "grad_norm": 0.37890625, "learning_rate": 3.0928974530120576e-06, "loss": 0.0345, "step": 43560 }, { "epoch": 31.91941391941392, "grad_norm": 0.443359375, "learning_rate": 3.0830510168444333e-06, "loss": 0.0292, "step": 43570 }, { "epoch": 31.926739926739927, "grad_norm": 0.427734375, "learning_rate": 3.0732272996927545e-06, "loss": 0.03, "step": 43580 }, { "epoch": 31.934065934065934, "grad_norm": 0.439453125, "learning_rate": 3.0634263062639068e-06, "loss": 0.0308, "step": 43590 }, { "epoch": 31.94139194139194, "grad_norm": 0.388671875, "learning_rate": 3.053648041253857e-06, "loss": 0.0341, "step": 43600 }, { "epoch": 31.94871794871795, "grad_norm": 0.40625, "learning_rate": 3.0438925093476893e-06, "loss": 0.0333, "step": 43610 }, { "epoch": 31.956043956043956, "grad_norm": 0.6484375, "learning_rate": 3.0341597152196075e-06, "loss": 0.0326, "step": 43620 }, { "epoch": 31.963369963369964, "grad_norm": 0.44140625, "learning_rate": 3.024449663532912e-06, "loss": 0.0317, "step": 43630 }, { "epoch": 31.97069597069597, "grad_norm": 0.279296875, "learning_rate": 3.0147623589399994e-06, "loss": 0.0322, "step": 43640 }, { "epoch": 31.978021978021978, "grad_norm": 0.76953125, "learning_rate": 3.0050978060823855e-06, "loss": 0.0342, "step": 43650 }, { "epoch": 31.985347985347985, "grad_norm": 0.328125, "learning_rate": 2.9954560095906555e-06, "loss": 0.031, "step": 43660 }, { "epoch": 31.992673992673993, "grad_norm": 0.447265625, "learning_rate": 2.9858369740845347e-06, "loss": 0.0287, "step": 43670 }, { "epoch": 32.0, "grad_norm": 0.80859375, "learning_rate": 2.9762407041728017e-06, "loss": 0.0347, "step": 43680 }, { "epoch": 32.00732600732601, "grad_norm": 0.33984375, "learning_rate": 2.966667204453353e-06, "loss": 0.0343, "step": 43690 }, { "epoch": 32.014652014652015, "grad_norm": 0.40625, "learning_rate": 2.9571164795131666e-06, "loss": 0.0339, "step": 43700 }, { "epoch": 32.02197802197802, "grad_norm": 0.380859375, "learning_rate": 2.9475885339282996e-06, "loss": 0.033, "step": 43710 }, { "epoch": 32.02930402930403, "grad_norm": 0.380859375, "learning_rate": 2.938083372263912e-06, "loss": 0.0333, "step": 43720 }, { "epoch": 32.03663003663004, "grad_norm": 0.3046875, "learning_rate": 2.928600999074238e-06, "loss": 0.0342, "step": 43730 }, { "epoch": 32.043956043956044, "grad_norm": 0.2890625, "learning_rate": 2.919141418902587e-06, "loss": 0.0316, "step": 43740 }, { "epoch": 32.05128205128205, "grad_norm": 0.228515625, "learning_rate": 2.909704636281365e-06, "loss": 0.0325, "step": 43750 }, { "epoch": 32.05860805860806, "grad_norm": 0.5546875, "learning_rate": 2.900290655732031e-06, "loss": 0.0317, "step": 43760 }, { "epoch": 32.065934065934066, "grad_norm": 0.3046875, "learning_rate": 2.8908994817651395e-06, "loss": 0.0338, "step": 43770 }, { "epoch": 32.07326007326007, "grad_norm": 0.6640625, "learning_rate": 2.8815311188803106e-06, "loss": 0.0319, "step": 43780 }, { "epoch": 32.08058608058608, "grad_norm": 0.3828125, "learning_rate": 2.8721855715662268e-06, "loss": 0.0302, "step": 43790 }, { "epoch": 32.08791208791209, "grad_norm": 0.267578125, "learning_rate": 2.862862844300662e-06, "loss": 0.0324, "step": 43800 }, { "epoch": 32.095238095238095, "grad_norm": 0.3125, "learning_rate": 2.8535629415504217e-06, "loss": 0.0305, "step": 43810 }, { "epoch": 32.1025641025641, "grad_norm": 0.50390625, "learning_rate": 2.844285867771413e-06, "loss": 0.0349, "step": 43820 }, { "epoch": 32.10989010989011, "grad_norm": 0.453125, "learning_rate": 2.8350316274085736e-06, "loss": 0.0302, "step": 43830 }, { "epoch": 32.11721611721612, "grad_norm": 0.4296875, "learning_rate": 2.8258002248959106e-06, "loss": 0.0337, "step": 43840 }, { "epoch": 32.124542124542124, "grad_norm": 0.369140625, "learning_rate": 2.8165916646565057e-06, "loss": 0.035, "step": 43850 }, { "epoch": 32.13186813186813, "grad_norm": 0.392578125, "learning_rate": 2.807405951102478e-06, "loss": 0.0383, "step": 43860 }, { "epoch": 32.13919413919414, "grad_norm": 0.396484375, "learning_rate": 2.7982430886349916e-06, "loss": 0.0375, "step": 43870 }, { "epoch": 32.146520146520146, "grad_norm": 0.345703125, "learning_rate": 2.7891030816442987e-06, "loss": 0.0297, "step": 43880 }, { "epoch": 32.15384615384615, "grad_norm": 0.390625, "learning_rate": 2.779985934509648e-06, "loss": 0.0302, "step": 43890 }, { "epoch": 32.16117216117216, "grad_norm": 0.6171875, "learning_rate": 2.770891651599385e-06, "loss": 0.0339, "step": 43900 }, { "epoch": 32.16849816849817, "grad_norm": 0.423828125, "learning_rate": 2.7618202372708697e-06, "loss": 0.0298, "step": 43910 }, { "epoch": 32.175824175824175, "grad_norm": 0.35546875, "learning_rate": 2.752771695870515e-06, "loss": 0.0319, "step": 43920 }, { "epoch": 32.18315018315018, "grad_norm": 0.59765625, "learning_rate": 2.7437460317337807e-06, "loss": 0.0322, "step": 43930 }, { "epoch": 32.19047619047619, "grad_norm": 0.3203125, "learning_rate": 2.7347432491851465e-06, "loss": 0.0354, "step": 43940 }, { "epoch": 32.1978021978022, "grad_norm": 0.6171875, "learning_rate": 2.7257633525381512e-06, "loss": 0.0361, "step": 43950 }, { "epoch": 32.205128205128204, "grad_norm": 0.36328125, "learning_rate": 2.7168063460953568e-06, "loss": 0.0301, "step": 43960 }, { "epoch": 32.21245421245421, "grad_norm": 0.333984375, "learning_rate": 2.707872234148353e-06, "loss": 0.0337, "step": 43970 }, { "epoch": 32.21978021978022, "grad_norm": 0.44140625, "learning_rate": 2.6989610209777695e-06, "loss": 0.0301, "step": 43980 }, { "epoch": 32.227106227106226, "grad_norm": 0.294921875, "learning_rate": 2.6900727108532567e-06, "loss": 0.0372, "step": 43990 }, { "epoch": 32.234432234432234, "grad_norm": 0.8046875, "learning_rate": 2.681207308033512e-06, "loss": 0.0314, "step": 44000 }, { "epoch": 32.24175824175824, "grad_norm": 0.365234375, "learning_rate": 2.672364816766225e-06, "loss": 0.0269, "step": 44010 }, { "epoch": 32.24908424908425, "grad_norm": 0.400390625, "learning_rate": 2.663545241288122e-06, "loss": 0.0351, "step": 44020 }, { "epoch": 32.256410256410255, "grad_norm": 0.326171875, "learning_rate": 2.6547485858249594e-06, "loss": 0.0312, "step": 44030 }, { "epoch": 32.26373626373626, "grad_norm": 0.43359375, "learning_rate": 2.645974854591497e-06, "loss": 0.0338, "step": 44040 }, { "epoch": 32.27106227106227, "grad_norm": 0.77734375, "learning_rate": 2.6372240517915206e-06, "loss": 0.0353, "step": 44050 }, { "epoch": 32.27838827838828, "grad_norm": 0.3515625, "learning_rate": 2.6284961816178296e-06, "loss": 0.0309, "step": 44060 }, { "epoch": 32.285714285714285, "grad_norm": 0.48828125, "learning_rate": 2.619791248252227e-06, "loss": 0.0316, "step": 44070 }, { "epoch": 32.29304029304029, "grad_norm": 0.52734375, "learning_rate": 2.6111092558655356e-06, "loss": 0.0299, "step": 44080 }, { "epoch": 32.3003663003663, "grad_norm": 0.451171875, "learning_rate": 2.6024502086175864e-06, "loss": 0.0303, "step": 44090 }, { "epoch": 32.30769230769231, "grad_norm": 0.69140625, "learning_rate": 2.5938141106571982e-06, "loss": 0.0327, "step": 44100 }, { "epoch": 32.315018315018314, "grad_norm": 0.53515625, "learning_rate": 2.585200966122231e-06, "loss": 0.0302, "step": 44110 }, { "epoch": 32.32234432234432, "grad_norm": 0.345703125, "learning_rate": 2.5766107791395036e-06, "loss": 0.03, "step": 44120 }, { "epoch": 32.32967032967033, "grad_norm": 0.408203125, "learning_rate": 2.568043553824878e-06, "loss": 0.0291, "step": 44130 }, { "epoch": 32.336996336996336, "grad_norm": 0.314453125, "learning_rate": 2.559499294283174e-06, "loss": 0.0331, "step": 44140 }, { "epoch": 32.34432234432234, "grad_norm": 0.314453125, "learning_rate": 2.5509780046082324e-06, "loss": 0.033, "step": 44150 }, { "epoch": 32.35164835164835, "grad_norm": 0.46484375, "learning_rate": 2.542479688882891e-06, "loss": 0.0329, "step": 44160 }, { "epoch": 32.35897435897436, "grad_norm": 0.546875, "learning_rate": 2.5340043511789527e-06, "loss": 0.0333, "step": 44170 }, { "epoch": 32.366300366300365, "grad_norm": 0.4140625, "learning_rate": 2.5255519955572507e-06, "loss": 0.0313, "step": 44180 }, { "epoch": 32.37362637362637, "grad_norm": 0.40234375, "learning_rate": 2.5171226260675794e-06, "loss": 0.033, "step": 44190 }, { "epoch": 32.38095238095238, "grad_norm": 0.412109375, "learning_rate": 2.508716246748717e-06, "loss": 0.031, "step": 44200 }, { "epoch": 32.38827838827839, "grad_norm": 0.359375, "learning_rate": 2.5003328616284473e-06, "loss": 0.032, "step": 44210 }, { "epoch": 32.395604395604394, "grad_norm": 0.4375, "learning_rate": 2.4919724747235134e-06, "loss": 0.0338, "step": 44220 }, { "epoch": 32.4029304029304, "grad_norm": 0.39453125, "learning_rate": 2.483635090039665e-06, "loss": 0.0292, "step": 44230 }, { "epoch": 32.41025641025641, "grad_norm": 0.3359375, "learning_rate": 2.4753207115716084e-06, "loss": 0.0292, "step": 44240 }, { "epoch": 32.417582417582416, "grad_norm": 0.40234375, "learning_rate": 2.467029343303035e-06, "loss": 0.0333, "step": 44250 }, { "epoch": 32.42490842490842, "grad_norm": 0.3828125, "learning_rate": 2.4587609892066184e-06, "loss": 0.0366, "step": 44260 }, { "epoch": 32.43223443223443, "grad_norm": 0.55859375, "learning_rate": 2.450515653243985e-06, "loss": 0.0317, "step": 44270 }, { "epoch": 32.43956043956044, "grad_norm": 0.58984375, "learning_rate": 2.4422933393657602e-06, "loss": 0.0347, "step": 44280 }, { "epoch": 32.446886446886445, "grad_norm": 0.4140625, "learning_rate": 2.4340940515115167e-06, "loss": 0.0314, "step": 44290 }, { "epoch": 32.45421245421245, "grad_norm": 0.33984375, "learning_rate": 2.4259177936097985e-06, "loss": 0.0324, "step": 44300 }, { "epoch": 32.46153846153846, "grad_norm": 0.4296875, "learning_rate": 2.417764569578124e-06, "loss": 0.0387, "step": 44310 }, { "epoch": 32.46886446886447, "grad_norm": 0.3359375, "learning_rate": 2.4096343833229733e-06, "loss": 0.0321, "step": 44320 }, { "epoch": 32.476190476190474, "grad_norm": 0.408203125, "learning_rate": 2.4015272387397775e-06, "loss": 0.0339, "step": 44330 }, { "epoch": 32.48351648351648, "grad_norm": 0.443359375, "learning_rate": 2.3934431397129456e-06, "loss": 0.0315, "step": 44340 }, { "epoch": 32.49084249084249, "grad_norm": 0.45703125, "learning_rate": 2.385382090115823e-06, "loss": 0.0295, "step": 44350 }, { "epoch": 32.498168498168496, "grad_norm": 0.87109375, "learning_rate": 2.377344093810737e-06, "loss": 0.0343, "step": 44360 }, { "epoch": 32.505494505494504, "grad_norm": 0.40625, "learning_rate": 2.369329154648946e-06, "loss": 0.0297, "step": 44370 }, { "epoch": 32.51282051282051, "grad_norm": 0.4296875, "learning_rate": 2.3613372764706626e-06, "loss": 0.0318, "step": 44380 }, { "epoch": 32.52014652014652, "grad_norm": 0.419921875, "learning_rate": 2.3533684631050866e-06, "loss": 0.034, "step": 44390 }, { "epoch": 32.527472527472526, "grad_norm": 0.458984375, "learning_rate": 2.345422718370299e-06, "loss": 0.0322, "step": 44400 }, { "epoch": 32.53479853479853, "grad_norm": 0.57421875, "learning_rate": 2.337500046073398e-06, "loss": 0.0346, "step": 44410 }, { "epoch": 32.54212454212454, "grad_norm": 0.296875, "learning_rate": 2.329600450010382e-06, "loss": 0.0402, "step": 44420 }, { "epoch": 32.54945054945055, "grad_norm": 0.4296875, "learning_rate": 2.3217239339662004e-06, "loss": 0.0338, "step": 44430 }, { "epoch": 32.556776556776555, "grad_norm": 0.341796875, "learning_rate": 2.3138705017147675e-06, "loss": 0.03, "step": 44440 }, { "epoch": 32.56410256410256, "grad_norm": 0.3203125, "learning_rate": 2.3060401570189065e-06, "loss": 0.0304, "step": 44450 }, { "epoch": 32.57142857142857, "grad_norm": 0.51953125, "learning_rate": 2.2982329036304054e-06, "loss": 0.0321, "step": 44460 }, { "epoch": 32.57875457875458, "grad_norm": 0.6796875, "learning_rate": 2.290448745289963e-06, "loss": 0.0359, "step": 44470 }, { "epoch": 32.586080586080584, "grad_norm": 0.3671875, "learning_rate": 2.2826876857272255e-06, "loss": 0.0308, "step": 44480 }, { "epoch": 32.59340659340659, "grad_norm": 0.62109375, "learning_rate": 2.2749497286607823e-06, "loss": 0.0333, "step": 44490 }, { "epoch": 32.6007326007326, "grad_norm": 0.466796875, "learning_rate": 2.267234877798139e-06, "loss": 0.0328, "step": 44500 }, { "epoch": 32.608058608058606, "grad_norm": 0.369140625, "learning_rate": 2.2595431368357203e-06, "loss": 0.0296, "step": 44510 }, { "epoch": 32.61538461538461, "grad_norm": 0.326171875, "learning_rate": 2.2518745094589116e-06, "loss": 0.0325, "step": 44520 }, { "epoch": 32.62271062271062, "grad_norm": 0.57421875, "learning_rate": 2.244228999341986e-06, "loss": 0.0329, "step": 44530 }, { "epoch": 32.63003663003663, "grad_norm": 0.396484375, "learning_rate": 2.2366066101481697e-06, "loss": 0.0306, "step": 44540 }, { "epoch": 32.637362637362635, "grad_norm": 0.498046875, "learning_rate": 2.2290073455295943e-06, "loss": 0.0332, "step": 44550 }, { "epoch": 32.64468864468864, "grad_norm": 0.462890625, "learning_rate": 2.2214312091273163e-06, "loss": 0.0358, "step": 44560 }, { "epoch": 32.65201465201465, "grad_norm": 0.318359375, "learning_rate": 2.213878204571315e-06, "loss": 0.0284, "step": 44570 }, { "epoch": 32.65934065934066, "grad_norm": 0.341796875, "learning_rate": 2.206348335480479e-06, "loss": 0.0315, "step": 44580 }, { "epoch": 32.666666666666664, "grad_norm": 0.35546875, "learning_rate": 2.198841605462622e-06, "loss": 0.0411, "step": 44590 }, { "epoch": 32.67399267399267, "grad_norm": 0.44921875, "learning_rate": 2.191358018114463e-06, "loss": 0.0281, "step": 44600 }, { "epoch": 32.68131868131868, "grad_norm": 0.3046875, "learning_rate": 2.183897577021625e-06, "loss": 0.0319, "step": 44610 }, { "epoch": 32.688644688644686, "grad_norm": 0.37109375, "learning_rate": 2.1764602857586587e-06, "loss": 0.0348, "step": 44620 }, { "epoch": 32.69597069597069, "grad_norm": 0.484375, "learning_rate": 2.1690461478890116e-06, "loss": 0.0314, "step": 44630 }, { "epoch": 32.7032967032967, "grad_norm": 0.6171875, "learning_rate": 2.161655166965048e-06, "loss": 0.0362, "step": 44640 }, { "epoch": 32.71062271062271, "grad_norm": 0.40625, "learning_rate": 2.1542873465280144e-06, "loss": 0.0365, "step": 44650 }, { "epoch": 32.717948717948715, "grad_norm": 0.330078125, "learning_rate": 2.146942690108083e-06, "loss": 0.0341, "step": 44660 }, { "epoch": 32.72527472527472, "grad_norm": 0.328125, "learning_rate": 2.1396212012243256e-06, "loss": 0.0317, "step": 44670 }, { "epoch": 32.73260073260073, "grad_norm": 0.4453125, "learning_rate": 2.1323228833846965e-06, "loss": 0.0337, "step": 44680 }, { "epoch": 32.73992673992674, "grad_norm": 0.6171875, "learning_rate": 2.1250477400860695e-06, "loss": 0.0331, "step": 44690 }, { "epoch": 32.747252747252745, "grad_norm": 0.7421875, "learning_rate": 2.1177957748141977e-06, "loss": 0.0381, "step": 44700 }, { "epoch": 32.75457875457875, "grad_norm": 0.291015625, "learning_rate": 2.110566991043736e-06, "loss": 0.0296, "step": 44710 }, { "epoch": 32.76190476190476, "grad_norm": 0.31640625, "learning_rate": 2.1033613922382337e-06, "loss": 0.0299, "step": 44720 }, { "epoch": 32.76923076923077, "grad_norm": 0.35546875, "learning_rate": 2.0961789818501346e-06, "loss": 0.0334, "step": 44730 }, { "epoch": 32.776556776556774, "grad_norm": 0.443359375, "learning_rate": 2.089019763320752e-06, "loss": 0.0348, "step": 44740 }, { "epoch": 32.78388278388278, "grad_norm": 0.4375, "learning_rate": 2.0818837400803222e-06, "loss": 0.0326, "step": 44750 }, { "epoch": 32.79120879120879, "grad_norm": 0.392578125, "learning_rate": 2.0747709155479398e-06, "loss": 0.0313, "step": 44760 }, { "epoch": 32.798534798534796, "grad_norm": 0.91796875, "learning_rate": 2.0676812931315827e-06, "loss": 0.0334, "step": 44770 }, { "epoch": 32.8058608058608, "grad_norm": 0.37109375, "learning_rate": 2.0606148762281386e-06, "loss": 0.0363, "step": 44780 }, { "epoch": 32.81318681318681, "grad_norm": 0.60546875, "learning_rate": 2.05357166822334e-06, "loss": 0.0366, "step": 44790 }, { "epoch": 32.82051282051282, "grad_norm": 0.3671875, "learning_rate": 2.0465516724918378e-06, "loss": 0.0344, "step": 44800 }, { "epoch": 32.827838827838825, "grad_norm": 0.37109375, "learning_rate": 2.0395548923971252e-06, "loss": 0.032, "step": 44810 }, { "epoch": 32.83516483516483, "grad_norm": 0.380859375, "learning_rate": 2.0325813312916067e-06, "loss": 0.0375, "step": 44820 }, { "epoch": 32.84249084249084, "grad_norm": 0.341796875, "learning_rate": 2.0256309925165392e-06, "loss": 0.0399, "step": 44830 }, { "epoch": 32.84981684981685, "grad_norm": 0.55078125, "learning_rate": 2.0187038794020442e-06, "loss": 0.0337, "step": 44840 }, { "epoch": 32.857142857142854, "grad_norm": 0.37890625, "learning_rate": 2.011799995267147e-06, "loss": 0.0322, "step": 44850 }, { "epoch": 32.86446886446886, "grad_norm": 0.48046875, "learning_rate": 2.004919343419713e-06, "loss": 0.0316, "step": 44860 }, { "epoch": 32.87179487179487, "grad_norm": 0.3671875, "learning_rate": 1.998061927156489e-06, "loss": 0.0402, "step": 44870 }, { "epoch": 32.879120879120876, "grad_norm": 0.259765625, "learning_rate": 1.9912277497630978e-06, "loss": 0.0311, "step": 44880 }, { "epoch": 32.88644688644688, "grad_norm": 0.30078125, "learning_rate": 1.9844168145140078e-06, "loss": 0.0363, "step": 44890 }, { "epoch": 32.89377289377289, "grad_norm": 0.466796875, "learning_rate": 1.9776291246725748e-06, "loss": 0.0368, "step": 44900 }, { "epoch": 32.9010989010989, "grad_norm": 0.341796875, "learning_rate": 1.970864683490987e-06, "loss": 0.0344, "step": 44910 }, { "epoch": 32.908424908424905, "grad_norm": 0.353515625, "learning_rate": 1.9641234942103156e-06, "loss": 0.0345, "step": 44920 }, { "epoch": 32.91575091575091, "grad_norm": 0.41015625, "learning_rate": 1.9574055600604912e-06, "loss": 0.0295, "step": 44930 }, { "epoch": 32.92307692307692, "grad_norm": 0.4296875, "learning_rate": 1.9507108842602897e-06, "loss": 0.0309, "step": 44940 }, { "epoch": 32.93040293040293, "grad_norm": 0.333984375, "learning_rate": 1.944039470017358e-06, "loss": 0.0307, "step": 44950 }, { "epoch": 32.937728937728934, "grad_norm": 0.31640625, "learning_rate": 1.9373913205281802e-06, "loss": 0.0296, "step": 44960 }, { "epoch": 32.94505494505494, "grad_norm": 0.396484375, "learning_rate": 1.9307664389781026e-06, "loss": 0.0276, "step": 44970 }, { "epoch": 32.95238095238095, "grad_norm": 0.4296875, "learning_rate": 1.9241648285413363e-06, "loss": 0.0299, "step": 44980 }, { "epoch": 32.959706959706956, "grad_norm": 0.51171875, "learning_rate": 1.9175864923809207e-06, "loss": 0.0321, "step": 44990 }, { "epoch": 32.967032967032964, "grad_norm": 0.412109375, "learning_rate": 1.911031433648748e-06, "loss": 0.0334, "step": 45000 }, { "epoch": 32.97435897435897, "grad_norm": 0.380859375, "learning_rate": 1.904499655485569e-06, "loss": 0.0309, "step": 45010 }, { "epoch": 32.98168498168498, "grad_norm": 0.35546875, "learning_rate": 1.897991161020969e-06, "loss": 0.0338, "step": 45020 }, { "epoch": 32.98901098901099, "grad_norm": 0.3984375, "learning_rate": 1.8915059533733854e-06, "loss": 0.037, "step": 45030 }, { "epoch": 32.99633699633699, "grad_norm": 0.66015625, "learning_rate": 1.8850440356500875e-06, "loss": 0.0337, "step": 45040 }, { "epoch": 33.00366300366301, "grad_norm": 0.330078125, "learning_rate": 1.878605410947199e-06, "loss": 0.0317, "step": 45050 }, { "epoch": 33.010989010989015, "grad_norm": 0.3671875, "learning_rate": 1.872190082349673e-06, "loss": 0.0317, "step": 45060 }, { "epoch": 33.01831501831502, "grad_norm": 0.55859375, "learning_rate": 1.8657980529313044e-06, "loss": 0.0367, "step": 45070 }, { "epoch": 33.02564102564103, "grad_norm": 0.470703125, "learning_rate": 1.859429325754722e-06, "loss": 0.0311, "step": 45080 }, { "epoch": 33.032967032967036, "grad_norm": 0.5078125, "learning_rate": 1.8530839038713977e-06, "loss": 0.0364, "step": 45090 }, { "epoch": 33.040293040293044, "grad_norm": 0.2421875, "learning_rate": 1.8467617903216217e-06, "loss": 0.0318, "step": 45100 }, { "epoch": 33.04761904761905, "grad_norm": 0.4296875, "learning_rate": 1.8404629881345372e-06, "loss": 0.0321, "step": 45110 }, { "epoch": 33.05494505494506, "grad_norm": 0.359375, "learning_rate": 1.834187500328094e-06, "loss": 0.0302, "step": 45120 }, { "epoch": 33.062271062271066, "grad_norm": 0.412109375, "learning_rate": 1.8279353299091014e-06, "loss": 0.0344, "step": 45130 }, { "epoch": 33.06959706959707, "grad_norm": 0.427734375, "learning_rate": 1.8217064798731639e-06, "loss": 0.0359, "step": 45140 }, { "epoch": 33.07692307692308, "grad_norm": 0.33984375, "learning_rate": 1.8155009532047408e-06, "loss": 0.0328, "step": 45150 }, { "epoch": 33.08424908424909, "grad_norm": 0.400390625, "learning_rate": 1.8093187528770914e-06, "loss": 0.0352, "step": 45160 }, { "epoch": 33.091575091575095, "grad_norm": 0.62890625, "learning_rate": 1.8031598818523127e-06, "loss": 0.0328, "step": 45170 }, { "epoch": 33.0989010989011, "grad_norm": 0.287109375, "learning_rate": 1.79702434308133e-06, "loss": 0.0315, "step": 45180 }, { "epoch": 33.10622710622711, "grad_norm": 0.357421875, "learning_rate": 1.7909121395038738e-06, "loss": 0.0303, "step": 45190 }, { "epoch": 33.11355311355312, "grad_norm": 0.41015625, "learning_rate": 1.7848232740485024e-06, "loss": 0.0299, "step": 45200 }, { "epoch": 33.120879120879124, "grad_norm": 0.455078125, "learning_rate": 1.7787577496325949e-06, "loss": 0.0405, "step": 45210 }, { "epoch": 33.12820512820513, "grad_norm": 0.490234375, "learning_rate": 1.7727155691623369e-06, "loss": 0.0297, "step": 45220 }, { "epoch": 33.13553113553114, "grad_norm": 0.427734375, "learning_rate": 1.7666967355327412e-06, "loss": 0.0323, "step": 45230 }, { "epoch": 33.142857142857146, "grad_norm": 0.400390625, "learning_rate": 1.7607012516276261e-06, "loss": 0.0329, "step": 45240 }, { "epoch": 33.15018315018315, "grad_norm": 0.54296875, "learning_rate": 1.7547291203196258e-06, "loss": 0.0337, "step": 45250 }, { "epoch": 33.15750915750916, "grad_norm": 0.337890625, "learning_rate": 1.7487803444701807e-06, "loss": 0.0332, "step": 45260 }, { "epoch": 33.16483516483517, "grad_norm": 0.53125, "learning_rate": 1.7428549269295526e-06, "loss": 0.032, "step": 45270 }, { "epoch": 33.172161172161175, "grad_norm": 0.34375, "learning_rate": 1.7369528705367986e-06, "loss": 0.0298, "step": 45280 }, { "epoch": 33.17948717948718, "grad_norm": 0.419921875, "learning_rate": 1.73107417811978e-06, "loss": 0.0369, "step": 45290 }, { "epoch": 33.18681318681319, "grad_norm": 0.314453125, "learning_rate": 1.7252188524951808e-06, "loss": 0.0364, "step": 45300 }, { "epoch": 33.1941391941392, "grad_norm": 0.400390625, "learning_rate": 1.7193868964684792e-06, "loss": 0.0362, "step": 45310 }, { "epoch": 33.201465201465204, "grad_norm": 0.328125, "learning_rate": 1.7135783128339487e-06, "loss": 0.031, "step": 45320 }, { "epoch": 33.20879120879121, "grad_norm": 0.37109375, "learning_rate": 1.7077931043746835e-06, "loss": 0.0391, "step": 45330 }, { "epoch": 33.21611721611722, "grad_norm": 0.3203125, "learning_rate": 1.7020312738625561e-06, "loss": 0.0312, "step": 45340 }, { "epoch": 33.223443223443226, "grad_norm": 0.37890625, "learning_rate": 1.6962928240582564e-06, "loss": 0.0288, "step": 45350 }, { "epoch": 33.23076923076923, "grad_norm": 0.38671875, "learning_rate": 1.6905777577112619e-06, "loss": 0.0351, "step": 45360 }, { "epoch": 33.23809523809524, "grad_norm": 0.62890625, "learning_rate": 1.684886077559851e-06, "loss": 0.035, "step": 45370 }, { "epoch": 33.24542124542125, "grad_norm": 0.443359375, "learning_rate": 1.6792177863310856e-06, "loss": 0.0343, "step": 45380 }, { "epoch": 33.252747252747255, "grad_norm": 0.380859375, "learning_rate": 1.6735728867408373e-06, "loss": 0.0332, "step": 45390 }, { "epoch": 33.26007326007326, "grad_norm": 0.4140625, "learning_rate": 1.6679513814937631e-06, "loss": 0.0349, "step": 45400 }, { "epoch": 33.26739926739927, "grad_norm": 0.8359375, "learning_rate": 1.6623532732833077e-06, "loss": 0.0377, "step": 45410 }, { "epoch": 33.27472527472528, "grad_norm": 0.7265625, "learning_rate": 1.6567785647917098e-06, "loss": 0.0344, "step": 45420 }, { "epoch": 33.282051282051285, "grad_norm": 0.453125, "learning_rate": 1.651227258689988e-06, "loss": 0.0355, "step": 45430 }, { "epoch": 33.28937728937729, "grad_norm": 0.40625, "learning_rate": 1.6456993576379694e-06, "loss": 0.0325, "step": 45440 }, { "epoch": 33.2967032967033, "grad_norm": 0.640625, "learning_rate": 1.640194864284239e-06, "loss": 0.0311, "step": 45450 }, { "epoch": 33.30402930402931, "grad_norm": 0.35546875, "learning_rate": 1.634713781266187e-06, "loss": 0.03, "step": 45460 }, { "epoch": 33.311355311355314, "grad_norm": 0.7421875, "learning_rate": 1.6292561112099862e-06, "loss": 0.0355, "step": 45470 }, { "epoch": 33.31868131868132, "grad_norm": 0.53125, "learning_rate": 1.6238218567305713e-06, "loss": 0.0343, "step": 45480 }, { "epoch": 33.32600732600733, "grad_norm": 0.40234375, "learning_rate": 1.6184110204316827e-06, "loss": 0.0336, "step": 45490 }, { "epoch": 33.333333333333336, "grad_norm": 0.828125, "learning_rate": 1.6130236049058218e-06, "loss": 0.0335, "step": 45500 }, { "epoch": 33.34065934065934, "grad_norm": 0.71484375, "learning_rate": 1.6076596127342854e-06, "loss": 0.0345, "step": 45510 }, { "epoch": 33.34798534798535, "grad_norm": 0.5, "learning_rate": 1.602319046487131e-06, "loss": 0.0308, "step": 45520 }, { "epoch": 33.35531135531136, "grad_norm": 0.31640625, "learning_rate": 1.5970019087232046e-06, "loss": 0.0379, "step": 45530 }, { "epoch": 33.362637362637365, "grad_norm": 0.4453125, "learning_rate": 1.591708201990115e-06, "loss": 0.0346, "step": 45540 }, { "epoch": 33.36996336996337, "grad_norm": 0.37109375, "learning_rate": 1.5864379288242532e-06, "loss": 0.034, "step": 45550 }, { "epoch": 33.37728937728938, "grad_norm": 0.337890625, "learning_rate": 1.5811910917507788e-06, "loss": 0.0307, "step": 45560 }, { "epoch": 33.38461538461539, "grad_norm": 0.373046875, "learning_rate": 1.5759676932836275e-06, "loss": 0.031, "step": 45570 }, { "epoch": 33.391941391941394, "grad_norm": 0.51171875, "learning_rate": 1.5707677359254974e-06, "loss": 0.033, "step": 45580 }, { "epoch": 33.3992673992674, "grad_norm": 0.341796875, "learning_rate": 1.565591222167859e-06, "loss": 0.0299, "step": 45590 }, { "epoch": 33.40659340659341, "grad_norm": 0.67578125, "learning_rate": 1.5604381544909558e-06, "loss": 0.0304, "step": 45600 }, { "epoch": 33.413919413919416, "grad_norm": 0.4375, "learning_rate": 1.5553085353637809e-06, "loss": 0.0307, "step": 45610 }, { "epoch": 33.42124542124542, "grad_norm": 0.423828125, "learning_rate": 1.5502023672441115e-06, "loss": 0.0326, "step": 45620 }, { "epoch": 33.42857142857143, "grad_norm": 0.453125, "learning_rate": 1.5451196525784805e-06, "loss": 0.0319, "step": 45630 }, { "epoch": 33.43589743589744, "grad_norm": 0.296875, "learning_rate": 1.5400603938021712e-06, "loss": 0.0287, "step": 45640 }, { "epoch": 33.443223443223445, "grad_norm": 0.28125, "learning_rate": 1.5350245933392563e-06, "loss": 0.0271, "step": 45650 }, { "epoch": 33.45054945054945, "grad_norm": 0.283203125, "learning_rate": 1.5300122536025473e-06, "loss": 0.0321, "step": 45660 }, { "epoch": 33.45787545787546, "grad_norm": 0.6171875, "learning_rate": 1.5250233769936182e-06, "loss": 0.0319, "step": 45670 }, { "epoch": 33.46520146520147, "grad_norm": 0.439453125, "learning_rate": 1.5200579659028096e-06, "loss": 0.0292, "step": 45680 }, { "epoch": 33.472527472527474, "grad_norm": 0.404296875, "learning_rate": 1.5151160227092006e-06, "loss": 0.0336, "step": 45690 }, { "epoch": 33.47985347985348, "grad_norm": 0.34765625, "learning_rate": 1.5101975497806496e-06, "loss": 0.0322, "step": 45700 }, { "epoch": 33.48717948717949, "grad_norm": 0.34765625, "learning_rate": 1.5053025494737485e-06, "loss": 0.0334, "step": 45710 }, { "epoch": 33.494505494505496, "grad_norm": 0.291015625, "learning_rate": 1.5004310241338672e-06, "loss": 0.0281, "step": 45720 }, { "epoch": 33.501831501831504, "grad_norm": 0.376953125, "learning_rate": 1.495582976095104e-06, "loss": 0.0329, "step": 45730 }, { "epoch": 33.50915750915751, "grad_norm": 0.56640625, "learning_rate": 1.4907584076803136e-06, "loss": 0.0343, "step": 45740 }, { "epoch": 33.51648351648352, "grad_norm": 0.390625, "learning_rate": 1.4859573212011114e-06, "loss": 0.0338, "step": 45750 }, { "epoch": 33.523809523809526, "grad_norm": 0.431640625, "learning_rate": 1.4811797189578588e-06, "loss": 0.0342, "step": 45760 }, { "epoch": 33.53113553113553, "grad_norm": 0.431640625, "learning_rate": 1.4764256032396504e-06, "loss": 0.0331, "step": 45770 }, { "epoch": 33.53846153846154, "grad_norm": 0.7421875, "learning_rate": 1.4716949763243482e-06, "loss": 0.0326, "step": 45780 }, { "epoch": 33.54578754578755, "grad_norm": 0.431640625, "learning_rate": 1.466987840478548e-06, "loss": 0.0301, "step": 45790 }, { "epoch": 33.553113553113555, "grad_norm": 0.33203125, "learning_rate": 1.4623041979575966e-06, "loss": 0.0311, "step": 45800 }, { "epoch": 33.56043956043956, "grad_norm": 0.333984375, "learning_rate": 1.457644051005574e-06, "loss": 0.0335, "step": 45810 }, { "epoch": 33.56776556776557, "grad_norm": 0.3671875, "learning_rate": 1.4530074018553112e-06, "loss": 0.0301, "step": 45820 }, { "epoch": 33.57509157509158, "grad_norm": 0.609375, "learning_rate": 1.4483942527283843e-06, "loss": 0.0328, "step": 45830 }, { "epoch": 33.582417582417584, "grad_norm": 0.36328125, "learning_rate": 1.4438046058350973e-06, "loss": 0.0381, "step": 45840 }, { "epoch": 33.58974358974359, "grad_norm": 0.400390625, "learning_rate": 1.4392384633745114e-06, "loss": 0.0328, "step": 45850 }, { "epoch": 33.5970695970696, "grad_norm": 0.28515625, "learning_rate": 1.4346958275344039e-06, "loss": 0.0354, "step": 45860 }, { "epoch": 33.604395604395606, "grad_norm": 0.52734375, "learning_rate": 1.430176700491298e-06, "loss": 0.0356, "step": 45870 }, { "epoch": 33.61172161172161, "grad_norm": 0.416015625, "learning_rate": 1.4256810844104728e-06, "loss": 0.0304, "step": 45880 }, { "epoch": 33.61904761904762, "grad_norm": 0.69921875, "learning_rate": 1.421208981445908e-06, "loss": 0.0317, "step": 45890 }, { "epoch": 33.62637362637363, "grad_norm": 0.36328125, "learning_rate": 1.416760393740345e-06, "loss": 0.0324, "step": 45900 }, { "epoch": 33.633699633699635, "grad_norm": 0.310546875, "learning_rate": 1.412335323425249e-06, "loss": 0.0323, "step": 45910 }, { "epoch": 33.64102564102564, "grad_norm": 0.28125, "learning_rate": 1.4079337726208068e-06, "loss": 0.0316, "step": 45920 }, { "epoch": 33.64835164835165, "grad_norm": 0.36328125, "learning_rate": 1.4035557434359512e-06, "loss": 0.032, "step": 45930 }, { "epoch": 33.65567765567766, "grad_norm": 0.73046875, "learning_rate": 1.3992012379683376e-06, "loss": 0.0357, "step": 45940 }, { "epoch": 33.663003663003664, "grad_norm": 0.59765625, "learning_rate": 1.3948702583043613e-06, "loss": 0.0327, "step": 45950 }, { "epoch": 33.67032967032967, "grad_norm": 0.6484375, "learning_rate": 1.3905628065191292e-06, "loss": 0.0333, "step": 45960 }, { "epoch": 33.67765567765568, "grad_norm": 0.38671875, "learning_rate": 1.3862788846764772e-06, "loss": 0.034, "step": 45970 }, { "epoch": 33.684981684981686, "grad_norm": 0.453125, "learning_rate": 1.3820184948289852e-06, "loss": 0.0332, "step": 45980 }, { "epoch": 33.69230769230769, "grad_norm": 0.3828125, "learning_rate": 1.3777816390179412e-06, "loss": 0.0323, "step": 45990 }, { "epoch": 33.6996336996337, "grad_norm": 0.404296875, "learning_rate": 1.373568319273355e-06, "loss": 0.0289, "step": 46000 }, { "epoch": 33.70695970695971, "grad_norm": 0.41796875, "learning_rate": 1.3693785376139758e-06, "loss": 0.0326, "step": 46010 }, { "epoch": 33.714285714285715, "grad_norm": 0.45703125, "learning_rate": 1.3652122960472608e-06, "loss": 0.0343, "step": 46020 }, { "epoch": 33.72161172161172, "grad_norm": 0.41796875, "learning_rate": 1.3610695965693938e-06, "loss": 0.0336, "step": 46030 }, { "epoch": 33.72893772893773, "grad_norm": 0.27734375, "learning_rate": 1.3569504411652775e-06, "loss": 0.0295, "step": 46040 }, { "epoch": 33.73626373626374, "grad_norm": 0.62890625, "learning_rate": 1.3528548318085371e-06, "loss": 0.0349, "step": 46050 }, { "epoch": 33.743589743589745, "grad_norm": 0.3515625, "learning_rate": 1.3487827704615098e-06, "loss": 0.0342, "step": 46060 }, { "epoch": 33.75091575091575, "grad_norm": 0.490234375, "learning_rate": 1.3447342590752505e-06, "loss": 0.032, "step": 46070 }, { "epoch": 33.75824175824176, "grad_norm": 0.4140625, "learning_rate": 1.3407092995895427e-06, "loss": 0.0345, "step": 46080 }, { "epoch": 33.765567765567766, "grad_norm": 0.373046875, "learning_rate": 1.336707893932876e-06, "loss": 0.0327, "step": 46090 }, { "epoch": 33.772893772893774, "grad_norm": 0.64453125, "learning_rate": 1.3327300440224465e-06, "loss": 0.036, "step": 46100 }, { "epoch": 33.78021978021978, "grad_norm": 0.69921875, "learning_rate": 1.3287757517641744e-06, "loss": 0.0316, "step": 46110 }, { "epoch": 33.78754578754579, "grad_norm": 0.5234375, "learning_rate": 1.3248450190527017e-06, "loss": 0.0306, "step": 46120 }, { "epoch": 33.794871794871796, "grad_norm": 0.38671875, "learning_rate": 1.3209378477713608e-06, "loss": 0.0303, "step": 46130 }, { "epoch": 33.8021978021978, "grad_norm": 0.484375, "learning_rate": 1.3170542397922074e-06, "loss": 0.0332, "step": 46140 }, { "epoch": 33.80952380952381, "grad_norm": 1.0078125, "learning_rate": 1.3131941969760033e-06, "loss": 0.0322, "step": 46150 }, { "epoch": 33.81684981684982, "grad_norm": 0.39453125, "learning_rate": 1.3093577211722289e-06, "loss": 0.0363, "step": 46160 }, { "epoch": 33.824175824175825, "grad_norm": 0.322265625, "learning_rate": 1.3055448142190637e-06, "loss": 0.0364, "step": 46170 }, { "epoch": 33.83150183150183, "grad_norm": 0.38671875, "learning_rate": 1.3017554779433954e-06, "loss": 0.029, "step": 46180 }, { "epoch": 33.83882783882784, "grad_norm": 0.482421875, "learning_rate": 1.2979897141608175e-06, "loss": 0.031, "step": 46190 }, { "epoch": 33.84615384615385, "grad_norm": 0.451171875, "learning_rate": 1.2942475246756302e-06, "loss": 0.0376, "step": 46200 }, { "epoch": 33.853479853479854, "grad_norm": 0.45703125, "learning_rate": 1.2905289112808405e-06, "loss": 0.0379, "step": 46210 }, { "epoch": 33.86080586080586, "grad_norm": 0.5, "learning_rate": 1.2868338757581615e-06, "loss": 0.0321, "step": 46220 }, { "epoch": 33.86813186813187, "grad_norm": 0.458984375, "learning_rate": 1.2831624198779964e-06, "loss": 0.0356, "step": 46230 }, { "epoch": 33.875457875457876, "grad_norm": 0.4453125, "learning_rate": 1.2795145453994719e-06, "loss": 0.0315, "step": 46240 }, { "epoch": 33.88278388278388, "grad_norm": 0.478515625, "learning_rate": 1.2758902540703985e-06, "loss": 0.035, "step": 46250 }, { "epoch": 33.89010989010989, "grad_norm": 0.498046875, "learning_rate": 1.2722895476272937e-06, "loss": 0.0293, "step": 46260 }, { "epoch": 33.8974358974359, "grad_norm": 0.365234375, "learning_rate": 1.2687124277953763e-06, "loss": 0.0311, "step": 46270 }, { "epoch": 33.904761904761905, "grad_norm": 0.310546875, "learning_rate": 1.2651588962885601e-06, "loss": 0.03, "step": 46280 }, { "epoch": 33.91208791208791, "grad_norm": 0.2353515625, "learning_rate": 1.2616289548094602e-06, "loss": 0.0386, "step": 46290 }, { "epoch": 33.91941391941392, "grad_norm": 0.357421875, "learning_rate": 1.258122605049376e-06, "loss": 0.038, "step": 46300 }, { "epoch": 33.92673992673993, "grad_norm": 0.33984375, "learning_rate": 1.2546398486883303e-06, "loss": 0.0353, "step": 46310 }, { "epoch": 33.934065934065934, "grad_norm": 0.453125, "learning_rate": 1.2511806873950247e-06, "loss": 0.0336, "step": 46320 }, { "epoch": 33.94139194139194, "grad_norm": 0.42578125, "learning_rate": 1.2477451228268453e-06, "loss": 0.0309, "step": 46330 }, { "epoch": 33.94871794871795, "grad_norm": 0.322265625, "learning_rate": 1.2443331566298908e-06, "loss": 0.0311, "step": 46340 }, { "epoch": 33.956043956043956, "grad_norm": 0.4765625, "learning_rate": 1.240944790438949e-06, "loss": 0.0346, "step": 46350 }, { "epoch": 33.96336996336996, "grad_norm": 0.59375, "learning_rate": 1.2375800258774877e-06, "loss": 0.031, "step": 46360 }, { "epoch": 33.97069597069597, "grad_norm": 0.43359375, "learning_rate": 1.2342388645576915e-06, "loss": 0.0303, "step": 46370 }, { "epoch": 33.97802197802198, "grad_norm": 0.349609375, "learning_rate": 1.2309213080804022e-06, "loss": 0.0335, "step": 46380 }, { "epoch": 33.985347985347985, "grad_norm": 0.38671875, "learning_rate": 1.2276273580351856e-06, "loss": 0.0331, "step": 46390 }, { "epoch": 33.99267399267399, "grad_norm": 0.359375, "learning_rate": 1.2243570160002736e-06, "loss": 0.0306, "step": 46400 }, { "epoch": 34.0, "grad_norm": 0.5, "learning_rate": 1.2211102835425955e-06, "loss": 0.0304, "step": 46410 }, { "epoch": 34.00732600732601, "grad_norm": 0.44140625, "learning_rate": 1.2178871622177642e-06, "loss": 0.0309, "step": 46420 }, { "epoch": 34.014652014652015, "grad_norm": 0.388671875, "learning_rate": 1.2146876535700885e-06, "loss": 0.0306, "step": 46430 }, { "epoch": 34.02197802197802, "grad_norm": 0.4765625, "learning_rate": 1.2115117591325564e-06, "loss": 0.0347, "step": 46440 }, { "epoch": 34.02930402930403, "grad_norm": 0.404296875, "learning_rate": 1.208359480426845e-06, "loss": 0.0301, "step": 46450 }, { "epoch": 34.03663003663004, "grad_norm": 0.4453125, "learning_rate": 1.2052308189633061e-06, "loss": 0.0325, "step": 46460 }, { "epoch": 34.043956043956044, "grad_norm": 0.36328125, "learning_rate": 1.202125776240992e-06, "loss": 0.0313, "step": 46470 }, { "epoch": 34.05128205128205, "grad_norm": 0.373046875, "learning_rate": 1.1990443537476234e-06, "loss": 0.035, "step": 46480 }, { "epoch": 34.05860805860806, "grad_norm": 0.365234375, "learning_rate": 1.1959865529596214e-06, "loss": 0.0317, "step": 46490 }, { "epoch": 34.065934065934066, "grad_norm": 0.408203125, "learning_rate": 1.19295237534207e-06, "loss": 0.0319, "step": 46500 }, { "epoch": 34.07326007326007, "grad_norm": 0.330078125, "learning_rate": 1.1899418223487492e-06, "loss": 0.032, "step": 46510 }, { "epoch": 34.08058608058608, "grad_norm": 0.33984375, "learning_rate": 1.1869548954221064e-06, "loss": 0.0362, "step": 46520 }, { "epoch": 34.08791208791209, "grad_norm": 0.45703125, "learning_rate": 1.1839915959932847e-06, "loss": 0.0333, "step": 46530 }, { "epoch": 34.095238095238095, "grad_norm": 0.33984375, "learning_rate": 1.1810519254820903e-06, "loss": 0.0311, "step": 46540 }, { "epoch": 34.1025641025641, "grad_norm": 0.27734375, "learning_rate": 1.178135885297024e-06, "loss": 0.0357, "step": 46550 }, { "epoch": 34.10989010989011, "grad_norm": 0.57421875, "learning_rate": 1.17524347683525e-06, "loss": 0.0336, "step": 46560 }, { "epoch": 34.11721611721612, "grad_norm": 0.400390625, "learning_rate": 1.1723747014826167e-06, "loss": 0.0309, "step": 46570 }, { "epoch": 34.124542124542124, "grad_norm": 0.3828125, "learning_rate": 1.1695295606136517e-06, "loss": 0.0311, "step": 46580 }, { "epoch": 34.13186813186813, "grad_norm": 0.361328125, "learning_rate": 1.1667080555915501e-06, "loss": 0.0322, "step": 46590 }, { "epoch": 34.13919413919414, "grad_norm": 0.37109375, "learning_rate": 1.1639101877681926e-06, "loss": 0.0356, "step": 46600 }, { "epoch": 34.146520146520146, "grad_norm": 0.3984375, "learning_rate": 1.161135958484127e-06, "loss": 0.0364, "step": 46610 }, { "epoch": 34.15384615384615, "grad_norm": 0.37109375, "learning_rate": 1.1583853690685806e-06, "loss": 0.0322, "step": 46620 }, { "epoch": 34.16117216117216, "grad_norm": 0.353515625, "learning_rate": 1.1556584208394486e-06, "loss": 0.0315, "step": 46630 }, { "epoch": 34.16849816849817, "grad_norm": 0.423828125, "learning_rate": 1.1529551151033003e-06, "loss": 0.0318, "step": 46640 }, { "epoch": 34.175824175824175, "grad_norm": 0.4140625, "learning_rate": 1.1502754531553774e-06, "loss": 0.0333, "step": 46650 }, { "epoch": 34.18315018315018, "grad_norm": 0.46875, "learning_rate": 1.1476194362795965e-06, "loss": 0.0292, "step": 46660 }, { "epoch": 34.19047619047619, "grad_norm": 0.423828125, "learning_rate": 1.144987065748541e-06, "loss": 0.0377, "step": 46670 }, { "epoch": 34.1978021978022, "grad_norm": 0.462890625, "learning_rate": 1.1423783428234694e-06, "loss": 0.0344, "step": 46680 }, { "epoch": 34.205128205128204, "grad_norm": 0.435546875, "learning_rate": 1.1397932687543013e-06, "loss": 0.0331, "step": 46690 }, { "epoch": 34.21245421245421, "grad_norm": 0.3828125, "learning_rate": 1.13723184477963e-06, "loss": 0.0293, "step": 46700 }, { "epoch": 34.21978021978022, "grad_norm": 0.515625, "learning_rate": 1.1346940721267238e-06, "loss": 0.034, "step": 46710 }, { "epoch": 34.227106227106226, "grad_norm": 0.2890625, "learning_rate": 1.1321799520115119e-06, "loss": 0.0294, "step": 46720 }, { "epoch": 34.234432234432234, "grad_norm": 0.359375, "learning_rate": 1.1296894856385866e-06, "loss": 0.0319, "step": 46730 }, { "epoch": 34.24175824175824, "grad_norm": 0.44140625, "learning_rate": 1.1272226742012144e-06, "loss": 0.036, "step": 46740 }, { "epoch": 34.24908424908425, "grad_norm": 0.6328125, "learning_rate": 1.1247795188813291e-06, "loss": 0.0317, "step": 46750 }, { "epoch": 34.256410256410255, "grad_norm": 0.453125, "learning_rate": 1.1223600208495218e-06, "loss": 0.0306, "step": 46760 }, { "epoch": 34.26373626373626, "grad_norm": 0.546875, "learning_rate": 1.119964181265057e-06, "loss": 0.0363, "step": 46770 }, { "epoch": 34.27106227106227, "grad_norm": 0.54296875, "learning_rate": 1.117592001275857e-06, "loss": 0.0308, "step": 46780 }, { "epoch": 34.27838827838828, "grad_norm": 0.431640625, "learning_rate": 1.115243482018511e-06, "loss": 0.0283, "step": 46790 }, { "epoch": 34.285714285714285, "grad_norm": 0.41796875, "learning_rate": 1.1129186246182718e-06, "loss": 0.035, "step": 46800 }, { "epoch": 34.29304029304029, "grad_norm": 0.408203125, "learning_rate": 1.1106174301890596e-06, "loss": 0.0351, "step": 46810 }, { "epoch": 34.3003663003663, "grad_norm": 0.310546875, "learning_rate": 1.1083398998334468e-06, "loss": 0.0281, "step": 46820 }, { "epoch": 34.30769230769231, "grad_norm": 0.66796875, "learning_rate": 1.106086034642673e-06, "loss": 0.0377, "step": 46830 }, { "epoch": 34.315018315018314, "grad_norm": 0.37109375, "learning_rate": 1.1038558356966468e-06, "loss": 0.0316, "step": 46840 }, { "epoch": 34.32234432234432, "grad_norm": 0.361328125, "learning_rate": 1.1016493040639166e-06, "loss": 0.0367, "step": 46850 }, { "epoch": 34.32967032967033, "grad_norm": 0.39453125, "learning_rate": 1.099466440801716e-06, "loss": 0.0328, "step": 46860 }, { "epoch": 34.336996336996336, "grad_norm": 0.296875, "learning_rate": 1.0973072469559187e-06, "loss": 0.0321, "step": 46870 }, { "epoch": 34.34432234432234, "grad_norm": 0.44140625, "learning_rate": 1.0951717235610667e-06, "loss": 0.0353, "step": 46880 }, { "epoch": 34.35164835164835, "grad_norm": 0.369140625, "learning_rate": 1.0930598716403647e-06, "loss": 0.0296, "step": 46890 }, { "epoch": 34.35897435897436, "grad_norm": 0.419921875, "learning_rate": 1.0909716922056573e-06, "loss": 0.0333, "step": 46900 }, { "epoch": 34.366300366300365, "grad_norm": 0.400390625, "learning_rate": 1.0889071862574803e-06, "loss": 0.0306, "step": 46910 }, { "epoch": 34.37362637362637, "grad_norm": 0.431640625, "learning_rate": 1.086866354784987e-06, "loss": 0.036, "step": 46920 }, { "epoch": 34.38095238095238, "grad_norm": 0.4609375, "learning_rate": 1.084849198766016e-06, "loss": 0.0355, "step": 46930 }, { "epoch": 34.38827838827839, "grad_norm": 0.2412109375, "learning_rate": 1.0828557191670517e-06, "loss": 0.0366, "step": 46940 }, { "epoch": 34.395604395604394, "grad_norm": 0.5390625, "learning_rate": 1.0808859169432352e-06, "loss": 0.0342, "step": 46950 }, { "epoch": 34.4029304029304, "grad_norm": 0.33203125, "learning_rate": 1.078939793038365e-06, "loss": 0.033, "step": 46960 }, { "epoch": 34.41025641025641, "grad_norm": 0.369140625, "learning_rate": 1.0770173483848863e-06, "loss": 0.0321, "step": 46970 }, { "epoch": 34.417582417582416, "grad_norm": 0.3984375, "learning_rate": 1.075118583903917e-06, "loss": 0.0299, "step": 46980 }, { "epoch": 34.42490842490842, "grad_norm": 0.455078125, "learning_rate": 1.0732435005052047e-06, "loss": 0.035, "step": 46990 }, { "epoch": 34.43223443223443, "grad_norm": 0.279296875, "learning_rate": 1.071392099087171e-06, "loss": 0.0348, "step": 47000 }, { "epoch": 34.43956043956044, "grad_norm": 0.326171875, "learning_rate": 1.0695643805368832e-06, "loss": 0.0317, "step": 47010 }, { "epoch": 34.446886446886445, "grad_norm": 0.3125, "learning_rate": 1.0677603457300602e-06, "loss": 0.0347, "step": 47020 }, { "epoch": 34.45421245421245, "grad_norm": 0.22265625, "learning_rate": 1.0659799955310727e-06, "loss": 0.0335, "step": 47030 }, { "epoch": 34.46153846153846, "grad_norm": 0.69140625, "learning_rate": 1.0642233307929485e-06, "loss": 0.0324, "step": 47040 }, { "epoch": 34.46886446886447, "grad_norm": 0.5546875, "learning_rate": 1.0624903523573555e-06, "loss": 0.0351, "step": 47050 }, { "epoch": 34.476190476190474, "grad_norm": 0.3359375, "learning_rate": 1.0607810610546301e-06, "loss": 0.0288, "step": 47060 }, { "epoch": 34.48351648351648, "grad_norm": 0.259765625, "learning_rate": 1.0590954577037384e-06, "loss": 0.0345, "step": 47070 }, { "epoch": 34.49084249084249, "grad_norm": 0.404296875, "learning_rate": 1.0574335431123194e-06, "loss": 0.0371, "step": 47080 }, { "epoch": 34.498168498168496, "grad_norm": 0.4375, "learning_rate": 1.0557953180766483e-06, "loss": 0.0389, "step": 47090 }, { "epoch": 34.505494505494504, "grad_norm": 0.345703125, "learning_rate": 1.0541807833816507e-06, "loss": 0.0314, "step": 47100 }, { "epoch": 34.51282051282051, "grad_norm": 0.95703125, "learning_rate": 1.0525899398009045e-06, "loss": 0.0365, "step": 47110 }, { "epoch": 34.52014652014652, "grad_norm": 0.462890625, "learning_rate": 1.051022788096628e-06, "loss": 0.0318, "step": 47120 }, { "epoch": 34.527472527472526, "grad_norm": 0.412109375, "learning_rate": 1.0494793290197074e-06, "loss": 0.0371, "step": 47130 }, { "epoch": 34.53479853479853, "grad_norm": 0.6796875, "learning_rate": 1.0479595633096536e-06, "loss": 0.0343, "step": 47140 }, { "epoch": 34.54212454212454, "grad_norm": 0.31640625, "learning_rate": 1.0464634916946446e-06, "loss": 0.0291, "step": 47150 }, { "epoch": 34.54945054945055, "grad_norm": 0.53125, "learning_rate": 1.0449911148914885e-06, "loss": 0.0333, "step": 47160 }, { "epoch": 34.556776556776555, "grad_norm": 0.330078125, "learning_rate": 1.0435424336056564e-06, "loss": 0.0316, "step": 47170 }, { "epoch": 34.56410256410256, "grad_norm": 0.384765625, "learning_rate": 1.0421174485312592e-06, "loss": 0.0305, "step": 47180 }, { "epoch": 34.57142857142857, "grad_norm": 0.58984375, "learning_rate": 1.040716160351049e-06, "loss": 0.0358, "step": 47190 }, { "epoch": 34.57875457875458, "grad_norm": 0.4453125, "learning_rate": 1.0393385697364348e-06, "loss": 0.0281, "step": 47200 }, { "epoch": 34.586080586080584, "grad_norm": 0.53515625, "learning_rate": 1.0379846773474602e-06, "loss": 0.0334, "step": 47210 }, { "epoch": 34.59340659340659, "grad_norm": 0.353515625, "learning_rate": 1.0366544838328213e-06, "loss": 0.0318, "step": 47220 }, { "epoch": 34.6007326007326, "grad_norm": 0.443359375, "learning_rate": 1.0353479898298593e-06, "loss": 0.0363, "step": 47230 }, { "epoch": 34.608058608058606, "grad_norm": 0.384765625, "learning_rate": 1.0340651959645567e-06, "loss": 0.0304, "step": 47240 }, { "epoch": 34.61538461538461, "grad_norm": 0.515625, "learning_rate": 1.0328061028515421e-06, "loss": 0.0325, "step": 47250 }, { "epoch": 34.62271062271062, "grad_norm": 0.88671875, "learning_rate": 1.0315707110940841e-06, "loss": 0.032, "step": 47260 }, { "epoch": 34.63003663003663, "grad_norm": 0.380859375, "learning_rate": 1.0303590212841086e-06, "loss": 0.0304, "step": 47270 }, { "epoch": 34.637362637362635, "grad_norm": 0.380859375, "learning_rate": 1.0291710340021716e-06, "loss": 0.0321, "step": 47280 }, { "epoch": 34.64468864468864, "grad_norm": 0.54296875, "learning_rate": 1.0280067498174744e-06, "loss": 0.0316, "step": 47290 }, { "epoch": 34.65201465201465, "grad_norm": 0.81640625, "learning_rate": 1.026866169287865e-06, "loss": 0.0347, "step": 47300 }, { "epoch": 34.65934065934066, "grad_norm": 0.373046875, "learning_rate": 1.0257492929598373e-06, "loss": 0.0367, "step": 47310 }, { "epoch": 34.666666666666664, "grad_norm": 0.34375, "learning_rate": 1.0246561213685147e-06, "loss": 0.0404, "step": 47320 }, { "epoch": 34.67399267399267, "grad_norm": 0.4453125, "learning_rate": 1.023586655037683e-06, "loss": 0.0334, "step": 47330 }, { "epoch": 34.68131868131868, "grad_norm": 0.46484375, "learning_rate": 1.022540894479747e-06, "loss": 0.0344, "step": 47340 }, { "epoch": 34.688644688644686, "grad_norm": 0.4453125, "learning_rate": 1.0215188401957682e-06, "loss": 0.0292, "step": 47350 }, { "epoch": 34.69597069597069, "grad_norm": 0.39453125, "learning_rate": 1.020520492675455e-06, "loss": 0.0296, "step": 47360 }, { "epoch": 34.7032967032967, "grad_norm": 0.37890625, "learning_rate": 1.0195458523971384e-06, "loss": 0.0294, "step": 47370 }, { "epoch": 34.71062271062271, "grad_norm": 0.388671875, "learning_rate": 1.0185949198278025e-06, "loss": 0.0318, "step": 47380 }, { "epoch": 34.717948717948715, "grad_norm": 0.6875, "learning_rate": 1.017667695423071e-06, "loss": 0.0309, "step": 47390 }, { "epoch": 34.72527472527472, "grad_norm": 0.31640625, "learning_rate": 1.0167641796272088e-06, "loss": 0.0329, "step": 47400 }, { "epoch": 34.73260073260073, "grad_norm": 0.318359375, "learning_rate": 1.0158843728731207e-06, "loss": 0.0322, "step": 47410 }, { "epoch": 34.73992673992674, "grad_norm": 0.40234375, "learning_rate": 1.0150282755823472e-06, "loss": 0.0346, "step": 47420 }, { "epoch": 34.747252747252745, "grad_norm": 0.515625, "learning_rate": 1.0141958881650805e-06, "loss": 0.0369, "step": 47430 }, { "epoch": 34.75457875457875, "grad_norm": 0.462890625, "learning_rate": 1.013387211020136e-06, "loss": 0.0277, "step": 47440 }, { "epoch": 34.76190476190476, "grad_norm": 0.302734375, "learning_rate": 1.0126022445349764e-06, "loss": 0.0305, "step": 47450 }, { "epoch": 34.76923076923077, "grad_norm": 0.7578125, "learning_rate": 1.011840989085715e-06, "loss": 0.0387, "step": 47460 }, { "epoch": 34.776556776556774, "grad_norm": 0.5625, "learning_rate": 1.0111034450370893e-06, "loss": 0.0322, "step": 47470 }, { "epoch": 34.78388278388278, "grad_norm": 0.33203125, "learning_rate": 1.0103896127424726e-06, "loss": 0.033, "step": 47480 }, { "epoch": 34.79120879120879, "grad_norm": 0.52734375, "learning_rate": 1.0096994925438999e-06, "loss": 0.0378, "step": 47490 }, { "epoch": 34.798534798534796, "grad_norm": 0.39453125, "learning_rate": 1.00903308477202e-06, "loss": 0.0316, "step": 47500 }, { "epoch": 34.8058608058608, "grad_norm": 0.50390625, "learning_rate": 1.0083903897461328e-06, "loss": 0.0341, "step": 47510 }, { "epoch": 34.81318681318681, "grad_norm": 0.353515625, "learning_rate": 1.007771407774179e-06, "loss": 0.0351, "step": 47520 }, { "epoch": 34.82051282051282, "grad_norm": 0.337890625, "learning_rate": 1.007176139152729e-06, "loss": 0.0283, "step": 47530 }, { "epoch": 34.827838827838825, "grad_norm": 0.408203125, "learning_rate": 1.0066045841669937e-06, "loss": 0.0349, "step": 47540 }, { "epoch": 34.83516483516483, "grad_norm": 0.33984375, "learning_rate": 1.0060567430908298e-06, "loss": 0.0335, "step": 47550 }, { "epoch": 34.84249084249084, "grad_norm": 0.45703125, "learning_rate": 1.0055326161867239e-06, "loss": 0.0317, "step": 47560 }, { "epoch": 34.84981684981685, "grad_norm": 0.390625, "learning_rate": 1.0050322037057978e-06, "loss": 0.0326, "step": 47570 }, { "epoch": 34.857142857142854, "grad_norm": 0.43359375, "learning_rate": 1.0045555058878188e-06, "loss": 0.0305, "step": 47580 }, { "epoch": 34.86446886446886, "grad_norm": 0.412109375, "learning_rate": 1.0041025229611904e-06, "loss": 0.0318, "step": 47590 }, { "epoch": 34.87179487179487, "grad_norm": 0.56640625, "learning_rate": 1.0036732551429496e-06, "loss": 0.0355, "step": 47600 }, { "epoch": 34.879120879120876, "grad_norm": 0.337890625, "learning_rate": 1.0032677026387756e-06, "loss": 0.0338, "step": 47610 }, { "epoch": 34.88644688644688, "grad_norm": 0.384765625, "learning_rate": 1.0028858656429762e-06, "loss": 0.0347, "step": 47620 }, { "epoch": 34.89377289377289, "grad_norm": 0.451171875, "learning_rate": 1.0025277443385055e-06, "loss": 0.0319, "step": 47630 }, { "epoch": 34.9010989010989, "grad_norm": 0.3984375, "learning_rate": 1.0021933388969472e-06, "loss": 0.0323, "step": 47640 }, { "epoch": 34.908424908424905, "grad_norm": 0.330078125, "learning_rate": 1.0018826494785367e-06, "loss": 0.0341, "step": 47650 }, { "epoch": 34.91575091575091, "grad_norm": 0.296875, "learning_rate": 1.0015956762321227e-06, "loss": 0.0311, "step": 47660 }, { "epoch": 34.92307692307692, "grad_norm": 0.31640625, "learning_rate": 1.0013324192952103e-06, "loss": 0.0334, "step": 47670 }, { "epoch": 34.93040293040293, "grad_norm": 0.392578125, "learning_rate": 1.0010928787939348e-06, "loss": 0.0324, "step": 47680 }, { "epoch": 34.937728937728934, "grad_norm": 0.26171875, "learning_rate": 1.0008770548430663e-06, "loss": 0.0321, "step": 47690 }, { "epoch": 34.94505494505494, "grad_norm": 0.29296875, "learning_rate": 1.0006849475460103e-06, "loss": 0.036, "step": 47700 }, { "epoch": 34.95238095238095, "grad_norm": 0.310546875, "learning_rate": 1.0005165569948179e-06, "loss": 0.0339, "step": 47710 }, { "epoch": 34.959706959706956, "grad_norm": 0.427734375, "learning_rate": 1.0003718832701645e-06, "loss": 0.0325, "step": 47720 }, { "epoch": 34.967032967032964, "grad_norm": 0.30859375, "learning_rate": 1.0002509264413722e-06, "loss": 0.0277, "step": 47730 }, { "epoch": 34.97435897435897, "grad_norm": 0.3125, "learning_rate": 1.000153686566397e-06, "loss": 0.0326, "step": 47740 }, { "epoch": 34.98168498168498, "grad_norm": 0.427734375, "learning_rate": 1.00008016369182e-06, "loss": 0.0303, "step": 47750 }, { "epoch": 34.98901098901099, "grad_norm": 0.66015625, "learning_rate": 1.0000303578528843e-06, "loss": 0.0323, "step": 47760 }, { "epoch": 34.99633699633699, "grad_norm": 0.404296875, "learning_rate": 1.0000042690734357e-06, "loss": 0.0304, "step": 47770 }, { "epoch": 35.0, "step": 47775, "total_flos": 0.0, "train_loss": 0.06935119654544669, "train_runtime": 33452.7167, "train_samples_per_second": 365.64, "train_steps_per_second": 1.428 } ], "logging_steps": 10, "max_steps": 47775, "num_input_tokens_seen": 0, "num_train_epochs": 35, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 32, "trial_name": null, "trial_params": null }