| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 6.8119891008174385, |
| "eval_steps": 500, |
| "global_step": 10000, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.006811989100817439, |
| "grad_norm": 11.914588928222656, |
| "learning_rate": 3.6e-06, |
| "loss": 0.9281, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.013623978201634877, |
| "grad_norm": 2.2081298828125, |
| "learning_rate": 7.6e-06, |
| "loss": 0.4782, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.020435967302452316, |
| "grad_norm": 1.648837924003601, |
| "learning_rate": 1.16e-05, |
| "loss": 0.3026, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.027247956403269755, |
| "grad_norm": 1.3150748014450073, |
| "learning_rate": 1.56e-05, |
| "loss": 0.2283, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.0340599455040872, |
| "grad_norm": 1.7804750204086304, |
| "learning_rate": 1.9600000000000002e-05, |
| "loss": 0.2138, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.04087193460490463, |
| "grad_norm": 1.8128317594528198, |
| "learning_rate": 2.36e-05, |
| "loss": 0.2035, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.047683923705722074, |
| "grad_norm": 1.4426037073135376, |
| "learning_rate": 2.7600000000000003e-05, |
| "loss": 0.1941, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.05449591280653951, |
| "grad_norm": 2.272278070449829, |
| "learning_rate": 3.16e-05, |
| "loss": 0.179, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.06130790190735695, |
| "grad_norm": 1.347985863685608, |
| "learning_rate": 3.56e-05, |
| "loss": 0.148, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.0681198910081744, |
| "grad_norm": 1.5960944890975952, |
| "learning_rate": 3.960000000000001e-05, |
| "loss": 0.1421, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.07493188010899182, |
| "grad_norm": 0.8870837688446045, |
| "learning_rate": 4.36e-05, |
| "loss": 0.1422, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.08174386920980926, |
| "grad_norm": 1.381057858467102, |
| "learning_rate": 4.76e-05, |
| "loss": 0.1392, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.0885558583106267, |
| "grad_norm": 1.140463948249817, |
| "learning_rate": 5.16e-05, |
| "loss": 0.13, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.09536784741144415, |
| "grad_norm": 1.0376285314559937, |
| "learning_rate": 5.560000000000001e-05, |
| "loss": 0.1254, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.10217983651226158, |
| "grad_norm": 0.6157049536705017, |
| "learning_rate": 5.96e-05, |
| "loss": 0.1216, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.10899182561307902, |
| "grad_norm": 1.6728339195251465, |
| "learning_rate": 6.36e-05, |
| "loss": 0.1169, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.11580381471389646, |
| "grad_norm": 1.3726643323898315, |
| "learning_rate": 6.76e-05, |
| "loss": 0.1201, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.1226158038147139, |
| "grad_norm": 1.2212395668029785, |
| "learning_rate": 7.16e-05, |
| "loss": 0.12, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.12942779291553133, |
| "grad_norm": 1.034472107887268, |
| "learning_rate": 7.560000000000001e-05, |
| "loss": 0.098, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.1362397820163488, |
| "grad_norm": 0.7338757514953613, |
| "learning_rate": 7.960000000000001e-05, |
| "loss": 0.0965, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.14305177111716622, |
| "grad_norm": 0.62514328956604, |
| "learning_rate": 8.36e-05, |
| "loss": 0.1108, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.14986376021798364, |
| "grad_norm": 0.8058353662490845, |
| "learning_rate": 8.76e-05, |
| "loss": 0.0948, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.1566757493188011, |
| "grad_norm": 0.837098240852356, |
| "learning_rate": 9.16e-05, |
| "loss": 0.0948, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.16348773841961853, |
| "grad_norm": 1.094609260559082, |
| "learning_rate": 9.56e-05, |
| "loss": 0.0935, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.17029972752043596, |
| "grad_norm": 1.555716872215271, |
| "learning_rate": 9.960000000000001e-05, |
| "loss": 0.0949, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.1771117166212534, |
| "grad_norm": 0.8324354290962219, |
| "learning_rate": 9.999911419878559e-05, |
| "loss": 0.0894, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.18392370572207084, |
| "grad_norm": 1.3196247816085815, |
| "learning_rate": 9.999605221019081e-05, |
| "loss": 0.1098, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.1907356948228883, |
| "grad_norm": 0.6212723255157471, |
| "learning_rate": 9.999080323230761e-05, |
| "loss": 0.0848, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.19754768392370572, |
| "grad_norm": 0.9073509573936462, |
| "learning_rate": 9.998336749474329e-05, |
| "loss": 0.099, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.20435967302452315, |
| "grad_norm": 0.8732384443283081, |
| "learning_rate": 9.997374532276107e-05, |
| "loss": 0.0838, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.2111716621253406, |
| "grad_norm": 1.0056062936782837, |
| "learning_rate": 9.996193713726596e-05, |
| "loss": 0.0818, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.21798365122615804, |
| "grad_norm": 0.7375788688659668, |
| "learning_rate": 9.994794345478624e-05, |
| "loss": 0.0768, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.22479564032697547, |
| "grad_norm": 1.0606452226638794, |
| "learning_rate": 9.99317648874509e-05, |
| "loss": 0.0845, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.23160762942779292, |
| "grad_norm": 0.699203610420227, |
| "learning_rate": 9.991340214296292e-05, |
| "loss": 0.0767, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.23841961852861035, |
| "grad_norm": 1.3575586080551147, |
| "learning_rate": 9.989285602456819e-05, |
| "loss": 0.075, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.2452316076294278, |
| "grad_norm": 0.7841135263442993, |
| "learning_rate": 9.98701274310205e-05, |
| "loss": 0.0728, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.25204359673024523, |
| "grad_norm": 0.7767183184623718, |
| "learning_rate": 9.984521735654218e-05, |
| "loss": 0.0769, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.25885558583106266, |
| "grad_norm": 0.483733594417572, |
| "learning_rate": 9.981812689078057e-05, |
| "loss": 0.0669, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.2656675749318801, |
| "grad_norm": 0.4933801591396332, |
| "learning_rate": 9.978885721876041e-05, |
| "loss": 0.0696, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.2724795640326976, |
| "grad_norm": 0.6362014412879944, |
| "learning_rate": 9.975740962083198e-05, |
| "loss": 0.0678, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.279291553133515, |
| "grad_norm": 0.7683391571044922, |
| "learning_rate": 9.972378547261504e-05, |
| "loss": 0.0735, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.28610354223433243, |
| "grad_norm": 0.8926170468330383, |
| "learning_rate": 9.968798624493885e-05, |
| "loss": 0.0656, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.29291553133514986, |
| "grad_norm": 0.6009325385093689, |
| "learning_rate": 9.965001350377753e-05, |
| "loss": 0.074, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.2997275204359673, |
| "grad_norm": 0.45287570357322693, |
| "learning_rate": 9.960986891018183e-05, |
| "loss": 0.0602, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.3065395095367847, |
| "grad_norm": 0.47310397028923035, |
| "learning_rate": 9.95675542202063e-05, |
| "loss": 0.0619, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.3133514986376022, |
| "grad_norm": 1.0169494152069092, |
| "learning_rate": 9.952307128483256e-05, |
| "loss": 0.0709, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.3201634877384196, |
| "grad_norm": 0.7056031227111816, |
| "learning_rate": 9.947642204988835e-05, |
| "loss": 0.0666, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.32697547683923706, |
| "grad_norm": 0.817714512348175, |
| "learning_rate": 9.942760855596226e-05, |
| "loss": 0.0734, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.3337874659400545, |
| "grad_norm": 0.5847324728965759, |
| "learning_rate": 9.937663293831471e-05, |
| "loss": 0.0643, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.3405994550408719, |
| "grad_norm": 0.8371860384941101, |
| "learning_rate": 9.932349742678433e-05, |
| "loss": 0.0617, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.3474114441416894, |
| "grad_norm": 0.47964030504226685, |
| "learning_rate": 9.926820434569051e-05, |
| "loss": 0.0649, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.3542234332425068, |
| "grad_norm": 0.6083551645278931, |
| "learning_rate": 9.921075611373179e-05, |
| "loss": 0.0634, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.36103542234332425, |
| "grad_norm": 0.6717512011528015, |
| "learning_rate": 9.915115524387988e-05, |
| "loss": 0.0637, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.3678474114441417, |
| "grad_norm": 0.7015753984451294, |
| "learning_rate": 9.908940434326997e-05, |
| "loss": 0.0633, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.3746594005449591, |
| "grad_norm": 0.6212232708930969, |
| "learning_rate": 9.902550611308645e-05, |
| "loss": 0.0593, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.3814713896457766, |
| "grad_norm": 0.6970530152320862, |
| "learning_rate": 9.895946334844494e-05, |
| "loss": 0.0579, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.388283378746594, |
| "grad_norm": 0.5176441669464111, |
| "learning_rate": 9.889127893826989e-05, |
| "loss": 0.0559, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.39509536784741145, |
| "grad_norm": 0.44634121656417847, |
| "learning_rate": 9.882095586516831e-05, |
| "loss": 0.0576, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.4019073569482289, |
| "grad_norm": 0.6069617867469788, |
| "learning_rate": 9.874849720529921e-05, |
| "loss": 0.0608, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.4087193460490463, |
| "grad_norm": 0.610893726348877, |
| "learning_rate": 9.867390612823914e-05, |
| "loss": 0.0592, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.41553133514986373, |
| "grad_norm": 0.3479655683040619, |
| "learning_rate": 9.859718589684344e-05, |
| "loss": 0.0658, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.4223433242506812, |
| "grad_norm": 0.7216345071792603, |
| "learning_rate": 9.851833986710353e-05, |
| "loss": 0.056, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.42915531335149865, |
| "grad_norm": 0.5811245441436768, |
| "learning_rate": 9.843737148800023e-05, |
| "loss": 0.0489, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.4359673024523161, |
| "grad_norm": 0.6808714866638184, |
| "learning_rate": 9.835428430135271e-05, |
| "loss": 0.0489, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.4427792915531335, |
| "grad_norm": 0.53304123878479, |
| "learning_rate": 9.82690819416637e-05, |
| "loss": 0.0551, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.44959128065395093, |
| "grad_norm": 0.5133969783782959, |
| "learning_rate": 9.818176813596041e-05, |
| "loss": 0.0523, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.4564032697547684, |
| "grad_norm": 0.42300981283187866, |
| "learning_rate": 9.809234670363159e-05, |
| "loss": 0.0635, |
| "step": 670 |
| }, |
| { |
| "epoch": 0.46321525885558584, |
| "grad_norm": 0.5513554811477661, |
| "learning_rate": 9.800082155626034e-05, |
| "loss": 0.0571, |
| "step": 680 |
| }, |
| { |
| "epoch": 0.47002724795640327, |
| "grad_norm": 0.4101255238056183, |
| "learning_rate": 9.790719669745312e-05, |
| "loss": 0.0571, |
| "step": 690 |
| }, |
| { |
| "epoch": 0.4768392370572207, |
| "grad_norm": 0.7928181886672974, |
| "learning_rate": 9.781147622266455e-05, |
| "loss": 0.0576, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.48365122615803813, |
| "grad_norm": 0.6665974855422974, |
| "learning_rate": 9.771366431901831e-05, |
| "loss": 0.0626, |
| "step": 710 |
| }, |
| { |
| "epoch": 0.4904632152588556, |
| "grad_norm": 0.43793386220932007, |
| "learning_rate": 9.761376526512394e-05, |
| "loss": 0.0567, |
| "step": 720 |
| }, |
| { |
| "epoch": 0.49727520435967304, |
| "grad_norm": 0.4338440001010895, |
| "learning_rate": 9.751178343088963e-05, |
| "loss": 0.0519, |
| "step": 730 |
| }, |
| { |
| "epoch": 0.5040871934604905, |
| "grad_norm": 0.47942614555358887, |
| "learning_rate": 9.740772327733123e-05, |
| "loss": 0.0446, |
| "step": 740 |
| }, |
| { |
| "epoch": 0.510899182561308, |
| "grad_norm": 0.3280750811100006, |
| "learning_rate": 9.730158935637697e-05, |
| "loss": 0.052, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.5177111716621253, |
| "grad_norm": 0.6672011017799377, |
| "learning_rate": 9.719338631066834e-05, |
| "loss": 0.0417, |
| "step": 760 |
| }, |
| { |
| "epoch": 0.5245231607629428, |
| "grad_norm": 0.386070191860199, |
| "learning_rate": 9.708311887335713e-05, |
| "loss": 0.0436, |
| "step": 770 |
| }, |
| { |
| "epoch": 0.5313351498637602, |
| "grad_norm": 0.428362637758255, |
| "learning_rate": 9.697079186789823e-05, |
| "loss": 0.0539, |
| "step": 780 |
| }, |
| { |
| "epoch": 0.5381471389645777, |
| "grad_norm": 0.4888722896575928, |
| "learning_rate": 9.685641020783876e-05, |
| "loss": 0.0517, |
| "step": 790 |
| }, |
| { |
| "epoch": 0.5449591280653951, |
| "grad_norm": 0.4673832952976227, |
| "learning_rate": 9.67399788966031e-05, |
| "loss": 0.0523, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.5517711171662125, |
| "grad_norm": 0.29115697741508484, |
| "learning_rate": 9.662150302727395e-05, |
| "loss": 0.0521, |
| "step": 810 |
| }, |
| { |
| "epoch": 0.55858310626703, |
| "grad_norm": 0.827384352684021, |
| "learning_rate": 9.650098778236968e-05, |
| "loss": 0.0477, |
| "step": 820 |
| }, |
| { |
| "epoch": 0.5653950953678474, |
| "grad_norm": 0.33872804045677185, |
| "learning_rate": 9.637843843361749e-05, |
| "loss": 0.0471, |
| "step": 830 |
| }, |
| { |
| "epoch": 0.5722070844686649, |
| "grad_norm": 0.3877025842666626, |
| "learning_rate": 9.62538603417229e-05, |
| "loss": 0.0426, |
| "step": 840 |
| }, |
| { |
| "epoch": 0.5790190735694822, |
| "grad_norm": 0.42489877343177795, |
| "learning_rate": 9.612725895613526e-05, |
| "loss": 0.0551, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.5858310626702997, |
| "grad_norm": 0.3988380432128906, |
| "learning_rate": 9.599863981480926e-05, |
| "loss": 0.0487, |
| "step": 860 |
| }, |
| { |
| "epoch": 0.5926430517711172, |
| "grad_norm": 0.5466487407684326, |
| "learning_rate": 9.586800854396283e-05, |
| "loss": 0.0467, |
| "step": 870 |
| }, |
| { |
| "epoch": 0.5994550408719346, |
| "grad_norm": 0.37913820147514343, |
| "learning_rate": 9.573537085783095e-05, |
| "loss": 0.0399, |
| "step": 880 |
| }, |
| { |
| "epoch": 0.6062670299727521, |
| "grad_norm": 0.47171854972839355, |
| "learning_rate": 9.560073255841571e-05, |
| "loss": 0.0402, |
| "step": 890 |
| }, |
| { |
| "epoch": 0.6130790190735694, |
| "grad_norm": 0.5175816416740417, |
| "learning_rate": 9.546409953523247e-05, |
| "loss": 0.0514, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.6198910081743869, |
| "grad_norm": 0.3573410212993622, |
| "learning_rate": 9.532547776505229e-05, |
| "loss": 0.0384, |
| "step": 910 |
| }, |
| { |
| "epoch": 0.6267029972752044, |
| "grad_norm": 0.4385380148887634, |
| "learning_rate": 9.518487331164048e-05, |
| "loss": 0.0478, |
| "step": 920 |
| }, |
| { |
| "epoch": 0.6335149863760218, |
| "grad_norm": 0.49251607060432434, |
| "learning_rate": 9.504229232549134e-05, |
| "loss": 0.0421, |
| "step": 930 |
| }, |
| { |
| "epoch": 0.6403269754768393, |
| "grad_norm": 0.33070334792137146, |
| "learning_rate": 9.489774104355909e-05, |
| "loss": 0.0402, |
| "step": 940 |
| }, |
| { |
| "epoch": 0.6471389645776566, |
| "grad_norm": 0.616314709186554, |
| "learning_rate": 9.475122578898507e-05, |
| "loss": 0.0561, |
| "step": 950 |
| }, |
| { |
| "epoch": 0.6539509536784741, |
| "grad_norm": 0.7195887565612793, |
| "learning_rate": 9.460275297082119e-05, |
| "loss": 0.0392, |
| "step": 960 |
| }, |
| { |
| "epoch": 0.6607629427792916, |
| "grad_norm": 0.6277886033058167, |
| "learning_rate": 9.445232908374948e-05, |
| "loss": 0.0433, |
| "step": 970 |
| }, |
| { |
| "epoch": 0.667574931880109, |
| "grad_norm": 0.7239585518836975, |
| "learning_rate": 9.429996070779808e-05, |
| "loss": 0.0513, |
| "step": 980 |
| }, |
| { |
| "epoch": 0.6743869209809265, |
| "grad_norm": 0.8502191305160522, |
| "learning_rate": 9.414565450805333e-05, |
| "loss": 0.052, |
| "step": 990 |
| }, |
| { |
| "epoch": 0.6811989100817438, |
| "grad_norm": 0.26333141326904297, |
| "learning_rate": 9.398941723436831e-05, |
| "loss": 0.0467, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.6880108991825613, |
| "grad_norm": 0.6643750071525574, |
| "learning_rate": 9.383125572106752e-05, |
| "loss": 0.0416, |
| "step": 1010 |
| }, |
| { |
| "epoch": 0.6948228882833788, |
| "grad_norm": 0.4409726858139038, |
| "learning_rate": 9.367117688664791e-05, |
| "loss": 0.0484, |
| "step": 1020 |
| }, |
| { |
| "epoch": 0.7016348773841962, |
| "grad_norm": 0.575430154800415, |
| "learning_rate": 9.35091877334763e-05, |
| "loss": 0.0495, |
| "step": 1030 |
| }, |
| { |
| "epoch": 0.7084468664850136, |
| "grad_norm": 0.4284694492816925, |
| "learning_rate": 9.334529534748297e-05, |
| "loss": 0.0442, |
| "step": 1040 |
| }, |
| { |
| "epoch": 0.715258855585831, |
| "grad_norm": 0.4911642372608185, |
| "learning_rate": 9.317950689785188e-05, |
| "loss": 0.0405, |
| "step": 1050 |
| }, |
| { |
| "epoch": 0.7220708446866485, |
| "grad_norm": 0.7064844965934753, |
| "learning_rate": 9.301182963670688e-05, |
| "loss": 0.054, |
| "step": 1060 |
| }, |
| { |
| "epoch": 0.728882833787466, |
| "grad_norm": 0.7730808258056641, |
| "learning_rate": 9.284227089879456e-05, |
| "loss": 0.048, |
| "step": 1070 |
| }, |
| { |
| "epoch": 0.7356948228882834, |
| "grad_norm": 0.3709051012992859, |
| "learning_rate": 9.26708381011634e-05, |
| "loss": 0.0505, |
| "step": 1080 |
| }, |
| { |
| "epoch": 0.7425068119891008, |
| "grad_norm": 0.5168929100036621, |
| "learning_rate": 9.249753874283937e-05, |
| "loss": 0.0367, |
| "step": 1090 |
| }, |
| { |
| "epoch": 0.7493188010899182, |
| "grad_norm": 0.4022100269794464, |
| "learning_rate": 9.232238040449779e-05, |
| "loss": 0.0421, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.7561307901907357, |
| "grad_norm": 0.4457673728466034, |
| "learning_rate": 9.214537074813181e-05, |
| "loss": 0.0417, |
| "step": 1110 |
| }, |
| { |
| "epoch": 0.7629427792915532, |
| "grad_norm": 0.5297082662582397, |
| "learning_rate": 9.196651751671724e-05, |
| "loss": 0.0429, |
| "step": 1120 |
| }, |
| { |
| "epoch": 0.7697547683923706, |
| "grad_norm": 0.2837148904800415, |
| "learning_rate": 9.178582853387384e-05, |
| "loss": 0.0435, |
| "step": 1130 |
| }, |
| { |
| "epoch": 0.776566757493188, |
| "grad_norm": 0.49884119629859924, |
| "learning_rate": 9.160331170352304e-05, |
| "loss": 0.0428, |
| "step": 1140 |
| }, |
| { |
| "epoch": 0.7833787465940054, |
| "grad_norm": 0.3587488532066345, |
| "learning_rate": 9.141897500954229e-05, |
| "loss": 0.0403, |
| "step": 1150 |
| }, |
| { |
| "epoch": 0.7901907356948229, |
| "grad_norm": 0.4518432319164276, |
| "learning_rate": 9.123282651541576e-05, |
| "loss": 0.0376, |
| "step": 1160 |
| }, |
| { |
| "epoch": 0.7970027247956403, |
| "grad_norm": 0.48306331038475037, |
| "learning_rate": 9.104487436388161e-05, |
| "loss": 0.0414, |
| "step": 1170 |
| }, |
| { |
| "epoch": 0.8038147138964578, |
| "grad_norm": 0.44562119245529175, |
| "learning_rate": 9.085512677657582e-05, |
| "loss": 0.039, |
| "step": 1180 |
| }, |
| { |
| "epoch": 0.8106267029972752, |
| "grad_norm": 0.5168417692184448, |
| "learning_rate": 9.066359205367258e-05, |
| "loss": 0.0433, |
| "step": 1190 |
| }, |
| { |
| "epoch": 0.8174386920980926, |
| "grad_norm": 0.3128747344017029, |
| "learning_rate": 9.047027857352112e-05, |
| "loss": 0.0459, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.8242506811989101, |
| "grad_norm": 0.43080934882164, |
| "learning_rate": 9.027519479227935e-05, |
| "loss": 0.0418, |
| "step": 1210 |
| }, |
| { |
| "epoch": 0.8310626702997275, |
| "grad_norm": 0.39959844946861267, |
| "learning_rate": 9.007834924354383e-05, |
| "loss": 0.0382, |
| "step": 1220 |
| }, |
| { |
| "epoch": 0.837874659400545, |
| "grad_norm": 0.46520474553108215, |
| "learning_rate": 8.987975053797655e-05, |
| "loss": 0.0435, |
| "step": 1230 |
| }, |
| { |
| "epoch": 0.8446866485013624, |
| "grad_norm": 0.500769317150116, |
| "learning_rate": 8.967940736292825e-05, |
| "loss": 0.0376, |
| "step": 1240 |
| }, |
| { |
| "epoch": 0.8514986376021798, |
| "grad_norm": 0.7235398888587952, |
| "learning_rate": 8.947732848205846e-05, |
| "loss": 0.0495, |
| "step": 1250 |
| }, |
| { |
| "epoch": 0.8583106267029973, |
| "grad_norm": 0.3423875570297241, |
| "learning_rate": 8.927352273495204e-05, |
| "loss": 0.0382, |
| "step": 1260 |
| }, |
| { |
| "epoch": 0.8651226158038147, |
| "grad_norm": 0.5364619493484497, |
| "learning_rate": 8.906799903673265e-05, |
| "loss": 0.0417, |
| "step": 1270 |
| }, |
| { |
| "epoch": 0.8719346049046321, |
| "grad_norm": 0.6217823028564453, |
| "learning_rate": 8.88607663776726e-05, |
| "loss": 0.0436, |
| "step": 1280 |
| }, |
| { |
| "epoch": 0.8787465940054496, |
| "grad_norm": 0.3814774751663208, |
| "learning_rate": 8.865183382279978e-05, |
| "loss": 0.0456, |
| "step": 1290 |
| }, |
| { |
| "epoch": 0.885558583106267, |
| "grad_norm": 0.5894232988357544, |
| "learning_rate": 8.844121051150096e-05, |
| "loss": 0.0413, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.8923705722070845, |
| "grad_norm": 0.4752817153930664, |
| "learning_rate": 8.822890565712211e-05, |
| "loss": 0.0365, |
| "step": 1310 |
| }, |
| { |
| "epoch": 0.8991825613079019, |
| "grad_norm": 0.3989897072315216, |
| "learning_rate": 8.801492854656536e-05, |
| "loss": 0.0397, |
| "step": 1320 |
| }, |
| { |
| "epoch": 0.9059945504087193, |
| "grad_norm": 0.3819690942764282, |
| "learning_rate": 8.779928853988268e-05, |
| "loss": 0.0351, |
| "step": 1330 |
| }, |
| { |
| "epoch": 0.9128065395095368, |
| "grad_norm": 0.487627774477005, |
| "learning_rate": 8.758199506986655e-05, |
| "loss": 0.038, |
| "step": 1340 |
| }, |
| { |
| "epoch": 0.9196185286103542, |
| "grad_norm": 0.40310102701187134, |
| "learning_rate": 8.73630576416373e-05, |
| "loss": 0.0365, |
| "step": 1350 |
| }, |
| { |
| "epoch": 0.9264305177111717, |
| "grad_norm": 0.30654123425483704, |
| "learning_rate": 8.714248583222726e-05, |
| "loss": 0.04, |
| "step": 1360 |
| }, |
| { |
| "epoch": 0.9332425068119891, |
| "grad_norm": 0.5350182056427002, |
| "learning_rate": 8.692028929016196e-05, |
| "loss": 0.0377, |
| "step": 1370 |
| }, |
| { |
| "epoch": 0.9400544959128065, |
| "grad_norm": 0.39081400632858276, |
| "learning_rate": 8.669647773503797e-05, |
| "loss": 0.0363, |
| "step": 1380 |
| }, |
| { |
| "epoch": 0.946866485013624, |
| "grad_norm": 0.4992840886116028, |
| "learning_rate": 8.647106095709773e-05, |
| "loss": 0.0355, |
| "step": 1390 |
| }, |
| { |
| "epoch": 0.9536784741144414, |
| "grad_norm": 0.3442818224430084, |
| "learning_rate": 8.624404881680139e-05, |
| "loss": 0.0452, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.9604904632152589, |
| "grad_norm": 0.5338506102561951, |
| "learning_rate": 8.601545124439535e-05, |
| "loss": 0.0358, |
| "step": 1410 |
| }, |
| { |
| "epoch": 0.9673024523160763, |
| "grad_norm": 0.3899770975112915, |
| "learning_rate": 8.5785278239478e-05, |
| "loss": 0.0422, |
| "step": 1420 |
| }, |
| { |
| "epoch": 0.9741144414168937, |
| "grad_norm": 0.5235274434089661, |
| "learning_rate": 8.555353987056224e-05, |
| "loss": 0.0411, |
| "step": 1430 |
| }, |
| { |
| "epoch": 0.9809264305177112, |
| "grad_norm": 0.4164868891239166, |
| "learning_rate": 8.532024627463505e-05, |
| "loss": 0.0351, |
| "step": 1440 |
| }, |
| { |
| "epoch": 0.9877384196185286, |
| "grad_norm": 0.3429436683654785, |
| "learning_rate": 8.508540765671407e-05, |
| "loss": 0.0396, |
| "step": 1450 |
| }, |
| { |
| "epoch": 0.9945504087193461, |
| "grad_norm": 0.45483162999153137, |
| "learning_rate": 8.484903428940121e-05, |
| "loss": 0.0388, |
| "step": 1460 |
| }, |
| { |
| "epoch": 1.0013623978201636, |
| "grad_norm": 0.4117540419101715, |
| "learning_rate": 8.461113651243334e-05, |
| "loss": 0.0396, |
| "step": 1470 |
| }, |
| { |
| "epoch": 1.008174386920981, |
| "grad_norm": 0.44719594717025757, |
| "learning_rate": 8.437172473222987e-05, |
| "loss": 0.0411, |
| "step": 1480 |
| }, |
| { |
| "epoch": 1.0149863760217983, |
| "grad_norm": 0.5068361759185791, |
| "learning_rate": 8.413080942143767e-05, |
| "loss": 0.0343, |
| "step": 1490 |
| }, |
| { |
| "epoch": 1.021798365122616, |
| "grad_norm": 0.43941476941108704, |
| "learning_rate": 8.388840111847288e-05, |
| "loss": 0.045, |
| "step": 1500 |
| }, |
| { |
| "epoch": 1.0286103542234333, |
| "grad_norm": 0.4756196141242981, |
| "learning_rate": 8.364451042705998e-05, |
| "loss": 0.0337, |
| "step": 1510 |
| }, |
| { |
| "epoch": 1.0354223433242506, |
| "grad_norm": 0.3626450002193451, |
| "learning_rate": 8.33991480157679e-05, |
| "loss": 0.0379, |
| "step": 1520 |
| }, |
| { |
| "epoch": 1.042234332425068, |
| "grad_norm": 0.5754261016845703, |
| "learning_rate": 8.315232461754338e-05, |
| "loss": 0.0374, |
| "step": 1530 |
| }, |
| { |
| "epoch": 1.0490463215258856, |
| "grad_norm": 0.45411282777786255, |
| "learning_rate": 8.290405102924144e-05, |
| "loss": 0.0404, |
| "step": 1540 |
| }, |
| { |
| "epoch": 1.055858310626703, |
| "grad_norm": 0.5540292263031006, |
| "learning_rate": 8.265433811115316e-05, |
| "loss": 0.0406, |
| "step": 1550 |
| }, |
| { |
| "epoch": 1.0626702997275204, |
| "grad_norm": 0.4548736810684204, |
| "learning_rate": 8.240319678653049e-05, |
| "loss": 0.0353, |
| "step": 1560 |
| }, |
| { |
| "epoch": 1.069482288828338, |
| "grad_norm": 0.3220965564250946, |
| "learning_rate": 8.215063804110857e-05, |
| "loss": 0.0395, |
| "step": 1570 |
| }, |
| { |
| "epoch": 1.0762942779291553, |
| "grad_norm": 0.33744776248931885, |
| "learning_rate": 8.189667292262512e-05, |
| "loss": 0.0327, |
| "step": 1580 |
| }, |
| { |
| "epoch": 1.0831062670299727, |
| "grad_norm": 0.34971827268600464, |
| "learning_rate": 8.164131254033716e-05, |
| "loss": 0.0382, |
| "step": 1590 |
| }, |
| { |
| "epoch": 1.0899182561307903, |
| "grad_norm": 0.3128986060619354, |
| "learning_rate": 8.138456806453503e-05, |
| "loss": 0.0322, |
| "step": 1600 |
| }, |
| { |
| "epoch": 1.0967302452316077, |
| "grad_norm": 0.2257993221282959, |
| "learning_rate": 8.112645072605386e-05, |
| "loss": 0.0271, |
| "step": 1610 |
| }, |
| { |
| "epoch": 1.103542234332425, |
| "grad_norm": 0.30597376823425293, |
| "learning_rate": 8.086697181578222e-05, |
| "loss": 0.0278, |
| "step": 1620 |
| }, |
| { |
| "epoch": 1.1103542234332424, |
| "grad_norm": 0.31509286165237427, |
| "learning_rate": 8.060614268416823e-05, |
| "loss": 0.0301, |
| "step": 1630 |
| }, |
| { |
| "epoch": 1.11716621253406, |
| "grad_norm": 0.4431317150592804, |
| "learning_rate": 8.034397474072309e-05, |
| "loss": 0.0309, |
| "step": 1640 |
| }, |
| { |
| "epoch": 1.1239782016348774, |
| "grad_norm": 0.4654938578605652, |
| "learning_rate": 8.008047945352193e-05, |
| "loss": 0.0406, |
| "step": 1650 |
| }, |
| { |
| "epoch": 1.1307901907356948, |
| "grad_norm": 0.42640626430511475, |
| "learning_rate": 7.981566834870225e-05, |
| "loss": 0.0299, |
| "step": 1660 |
| }, |
| { |
| "epoch": 1.1376021798365124, |
| "grad_norm": 0.41219788789749146, |
| "learning_rate": 7.954955300995961e-05, |
| "loss": 0.0318, |
| "step": 1670 |
| }, |
| { |
| "epoch": 1.1444141689373297, |
| "grad_norm": 0.3845755159854889, |
| "learning_rate": 7.928214507804104e-05, |
| "loss": 0.0338, |
| "step": 1680 |
| }, |
| { |
| "epoch": 1.151226158038147, |
| "grad_norm": 0.31636008620262146, |
| "learning_rate": 7.901345625023576e-05, |
| "loss": 0.0352, |
| "step": 1690 |
| }, |
| { |
| "epoch": 1.1580381471389645, |
| "grad_norm": 0.34709426760673523, |
| "learning_rate": 7.874349827986354e-05, |
| "loss": 0.0331, |
| "step": 1700 |
| }, |
| { |
| "epoch": 1.164850136239782, |
| "grad_norm": 0.4313192665576935, |
| "learning_rate": 7.847228297576053e-05, |
| "loss": 0.0326, |
| "step": 1710 |
| }, |
| { |
| "epoch": 1.1716621253405994, |
| "grad_norm": 0.4032236933708191, |
| "learning_rate": 7.819982220176276e-05, |
| "loss": 0.0355, |
| "step": 1720 |
| }, |
| { |
| "epoch": 1.1784741144414168, |
| "grad_norm": 0.3324613571166992, |
| "learning_rate": 7.792612787618714e-05, |
| "loss": 0.0355, |
| "step": 1730 |
| }, |
| { |
| "epoch": 1.1852861035422344, |
| "grad_norm": 0.44290757179260254, |
| "learning_rate": 7.765121197131009e-05, |
| "loss": 0.0327, |
| "step": 1740 |
| }, |
| { |
| "epoch": 1.1920980926430518, |
| "grad_norm": 0.28540492057800293, |
| "learning_rate": 7.737508651284391e-05, |
| "loss": 0.0367, |
| "step": 1750 |
| }, |
| { |
| "epoch": 1.1989100817438691, |
| "grad_norm": 0.38834914565086365, |
| "learning_rate": 7.709776357941069e-05, |
| "loss": 0.0373, |
| "step": 1760 |
| }, |
| { |
| "epoch": 1.2057220708446867, |
| "grad_norm": 0.34177857637405396, |
| "learning_rate": 7.681925530201392e-05, |
| "loss": 0.0368, |
| "step": 1770 |
| }, |
| { |
| "epoch": 1.2125340599455041, |
| "grad_norm": 0.45681893825531006, |
| "learning_rate": 7.65395738635079e-05, |
| "loss": 0.0318, |
| "step": 1780 |
| }, |
| { |
| "epoch": 1.2193460490463215, |
| "grad_norm": 0.32232654094696045, |
| "learning_rate": 7.62587314980648e-05, |
| "loss": 0.0365, |
| "step": 1790 |
| }, |
| { |
| "epoch": 1.226158038147139, |
| "grad_norm": 0.2634826898574829, |
| "learning_rate": 7.597674049063947e-05, |
| "loss": 0.0327, |
| "step": 1800 |
| }, |
| { |
| "epoch": 1.2329700272479565, |
| "grad_norm": 0.4753483235836029, |
| "learning_rate": 7.569361317643211e-05, |
| "loss": 0.0337, |
| "step": 1810 |
| }, |
| { |
| "epoch": 1.2397820163487738, |
| "grad_norm": 0.3038065433502197, |
| "learning_rate": 7.540936194034865e-05, |
| "loss": 0.0309, |
| "step": 1820 |
| }, |
| { |
| "epoch": 1.2465940054495912, |
| "grad_norm": 0.32555919885635376, |
| "learning_rate": 7.512399921645901e-05, |
| "loss": 0.0313, |
| "step": 1830 |
| }, |
| { |
| "epoch": 1.2534059945504088, |
| "grad_norm": 0.3383468985557556, |
| "learning_rate": 7.483753748745317e-05, |
| "loss": 0.032, |
| "step": 1840 |
| }, |
| { |
| "epoch": 1.2602179836512262, |
| "grad_norm": 0.26944777369499207, |
| "learning_rate": 7.454998928409516e-05, |
| "loss": 0.0308, |
| "step": 1850 |
| }, |
| { |
| "epoch": 1.2670299727520435, |
| "grad_norm": 0.2938184142112732, |
| "learning_rate": 7.426136718467493e-05, |
| "loss": 0.0324, |
| "step": 1860 |
| }, |
| { |
| "epoch": 1.273841961852861, |
| "grad_norm": 0.276143878698349, |
| "learning_rate": 7.397168381445812e-05, |
| "loss": 0.0325, |
| "step": 1870 |
| }, |
| { |
| "epoch": 1.2806539509536785, |
| "grad_norm": 0.3054909408092499, |
| "learning_rate": 7.368095184513377e-05, |
| "loss": 0.03, |
| "step": 1880 |
| }, |
| { |
| "epoch": 1.2874659400544959, |
| "grad_norm": 0.24084536731243134, |
| "learning_rate": 7.338918399426005e-05, |
| "loss": 0.0274, |
| "step": 1890 |
| }, |
| { |
| "epoch": 1.2942779291553133, |
| "grad_norm": 0.41324862837791443, |
| "learning_rate": 7.309639302470801e-05, |
| "loss": 0.0348, |
| "step": 1900 |
| }, |
| { |
| "epoch": 1.3010899182561309, |
| "grad_norm": 0.29731935262680054, |
| "learning_rate": 7.280259174410312e-05, |
| "loss": 0.0312, |
| "step": 1910 |
| }, |
| { |
| "epoch": 1.3079019073569482, |
| "grad_norm": 0.22514300048351288, |
| "learning_rate": 7.250779300426517e-05, |
| "loss": 0.0312, |
| "step": 1920 |
| }, |
| { |
| "epoch": 1.3147138964577656, |
| "grad_norm": 0.5704501271247864, |
| "learning_rate": 7.22120097006461e-05, |
| "loss": 0.0325, |
| "step": 1930 |
| }, |
| { |
| "epoch": 1.3215258855585832, |
| "grad_norm": 0.27702492475509644, |
| "learning_rate": 7.191525477176577e-05, |
| "loss": 0.0321, |
| "step": 1940 |
| }, |
| { |
| "epoch": 1.3283378746594006, |
| "grad_norm": 0.34598076343536377, |
| "learning_rate": 7.161754119864616e-05, |
| "loss": 0.0298, |
| "step": 1950 |
| }, |
| { |
| "epoch": 1.335149863760218, |
| "grad_norm": 0.24778622388839722, |
| "learning_rate": 7.131888200424339e-05, |
| "loss": 0.0277, |
| "step": 1960 |
| }, |
| { |
| "epoch": 1.3419618528610355, |
| "grad_norm": 0.2454395443201065, |
| "learning_rate": 7.101929025287816e-05, |
| "loss": 0.0357, |
| "step": 1970 |
| }, |
| { |
| "epoch": 1.348773841961853, |
| "grad_norm": 0.47679805755615234, |
| "learning_rate": 7.071877904966423e-05, |
| "loss": 0.0378, |
| "step": 1980 |
| }, |
| { |
| "epoch": 1.3555858310626703, |
| "grad_norm": 0.2696547210216522, |
| "learning_rate": 7.04173615399351e-05, |
| "loss": 0.0299, |
| "step": 1990 |
| }, |
| { |
| "epoch": 1.3623978201634879, |
| "grad_norm": 0.3305070698261261, |
| "learning_rate": 7.011505090866913e-05, |
| "loss": 0.0298, |
| "step": 2000 |
| }, |
| { |
| "epoch": 1.3692098092643052, |
| "grad_norm": 0.35810503363609314, |
| "learning_rate": 6.981186037991271e-05, |
| "loss": 0.0304, |
| "step": 2010 |
| }, |
| { |
| "epoch": 1.3760217983651226, |
| "grad_norm": 0.314117968082428, |
| "learning_rate": 6.950780321620174e-05, |
| "loss": 0.0352, |
| "step": 2020 |
| }, |
| { |
| "epoch": 1.38283378746594, |
| "grad_norm": 0.33775216341018677, |
| "learning_rate": 6.920289271798157e-05, |
| "loss": 0.0378, |
| "step": 2030 |
| }, |
| { |
| "epoch": 1.3896457765667574, |
| "grad_norm": 0.33370664715766907, |
| "learning_rate": 6.889714222302517e-05, |
| "loss": 0.0336, |
| "step": 2040 |
| }, |
| { |
| "epoch": 1.396457765667575, |
| "grad_norm": 0.48640260100364685, |
| "learning_rate": 6.85905651058497e-05, |
| "loss": 0.0323, |
| "step": 2050 |
| }, |
| { |
| "epoch": 1.4032697547683923, |
| "grad_norm": 0.3220215141773224, |
| "learning_rate": 6.82831747771314e-05, |
| "loss": 0.0276, |
| "step": 2060 |
| }, |
| { |
| "epoch": 1.4100817438692097, |
| "grad_norm": 0.32791373133659363, |
| "learning_rate": 6.797498468311907e-05, |
| "loss": 0.0287, |
| "step": 2070 |
| }, |
| { |
| "epoch": 1.4168937329700273, |
| "grad_norm": 0.36337828636169434, |
| "learning_rate": 6.766600830504585e-05, |
| "loss": 0.0291, |
| "step": 2080 |
| }, |
| { |
| "epoch": 1.4237057220708447, |
| "grad_norm": 0.3391413390636444, |
| "learning_rate": 6.735625915853942e-05, |
| "loss": 0.0284, |
| "step": 2090 |
| }, |
| { |
| "epoch": 1.430517711171662, |
| "grad_norm": 0.35755249857902527, |
| "learning_rate": 6.70457507930309e-05, |
| "loss": 0.0274, |
| "step": 2100 |
| }, |
| { |
| "epoch": 1.4373297002724796, |
| "grad_norm": 0.2682415843009949, |
| "learning_rate": 6.673449679116215e-05, |
| "loss": 0.0274, |
| "step": 2110 |
| }, |
| { |
| "epoch": 1.444141689373297, |
| "grad_norm": 0.475309818983078, |
| "learning_rate": 6.642251076819148e-05, |
| "loss": 0.0262, |
| "step": 2120 |
| }, |
| { |
| "epoch": 1.4509536784741144, |
| "grad_norm": 0.3676445186138153, |
| "learning_rate": 6.610980637139827e-05, |
| "loss": 0.0318, |
| "step": 2130 |
| }, |
| { |
| "epoch": 1.457765667574932, |
| "grad_norm": 0.45259350538253784, |
| "learning_rate": 6.579639727948583e-05, |
| "loss": 0.0296, |
| "step": 2140 |
| }, |
| { |
| "epoch": 1.4645776566757494, |
| "grad_norm": 0.38819339871406555, |
| "learning_rate": 6.548229720198315e-05, |
| "loss": 0.0334, |
| "step": 2150 |
| }, |
| { |
| "epoch": 1.4713896457765667, |
| "grad_norm": 0.4020323157310486, |
| "learning_rate": 6.516751987864517e-05, |
| "loss": 0.0273, |
| "step": 2160 |
| }, |
| { |
| "epoch": 1.4782016348773843, |
| "grad_norm": 0.1928047388792038, |
| "learning_rate": 6.485207907885175e-05, |
| "loss": 0.0266, |
| "step": 2170 |
| }, |
| { |
| "epoch": 1.4850136239782017, |
| "grad_norm": 0.442618727684021, |
| "learning_rate": 6.453598860100536e-05, |
| "loss": 0.0299, |
| "step": 2180 |
| }, |
| { |
| "epoch": 1.491825613079019, |
| "grad_norm": 0.36381062865257263, |
| "learning_rate": 6.421926227192749e-05, |
| "loss": 0.0252, |
| "step": 2190 |
| }, |
| { |
| "epoch": 1.4986376021798364, |
| "grad_norm": 0.4495033621788025, |
| "learning_rate": 6.390191394625381e-05, |
| "loss": 0.0265, |
| "step": 2200 |
| }, |
| { |
| "epoch": 1.5054495912806538, |
| "grad_norm": 0.3564695715904236, |
| "learning_rate": 6.358395750582817e-05, |
| "loss": 0.026, |
| "step": 2210 |
| }, |
| { |
| "epoch": 1.5122615803814714, |
| "grad_norm": 0.28276216983795166, |
| "learning_rate": 6.326540685909532e-05, |
| "loss": 0.0245, |
| "step": 2220 |
| }, |
| { |
| "epoch": 1.5190735694822888, |
| "grad_norm": 0.44450217485427856, |
| "learning_rate": 6.294627594049249e-05, |
| "loss": 0.0253, |
| "step": 2230 |
| }, |
| { |
| "epoch": 1.5258855585831061, |
| "grad_norm": 0.2726491391658783, |
| "learning_rate": 6.262657870983989e-05, |
| "loss": 0.0258, |
| "step": 2240 |
| }, |
| { |
| "epoch": 1.5326975476839237, |
| "grad_norm": 0.35235723853111267, |
| "learning_rate": 6.230632915173009e-05, |
| "loss": 0.0303, |
| "step": 2250 |
| }, |
| { |
| "epoch": 1.5395095367847411, |
| "grad_norm": 0.2119748741388321, |
| "learning_rate": 6.198554127491622e-05, |
| "loss": 0.029, |
| "step": 2260 |
| }, |
| { |
| "epoch": 1.5463215258855585, |
| "grad_norm": 0.34444141387939453, |
| "learning_rate": 6.166422911169923e-05, |
| "loss": 0.0269, |
| "step": 2270 |
| }, |
| { |
| "epoch": 1.553133514986376, |
| "grad_norm": 0.2883770763874054, |
| "learning_rate": 6.1342406717314e-05, |
| "loss": 0.0303, |
| "step": 2280 |
| }, |
| { |
| "epoch": 1.5599455040871935, |
| "grad_norm": 0.2837648093700409, |
| "learning_rate": 6.102008816931466e-05, |
| "loss": 0.0272, |
| "step": 2290 |
| }, |
| { |
| "epoch": 1.5667574931880108, |
| "grad_norm": 0.2236020863056183, |
| "learning_rate": 6.069728756695866e-05, |
| "loss": 0.0234, |
| "step": 2300 |
| }, |
| { |
| "epoch": 1.5735694822888284, |
| "grad_norm": 0.4470672607421875, |
| "learning_rate": 6.037401903059008e-05, |
| "loss": 0.032, |
| "step": 2310 |
| }, |
| { |
| "epoch": 1.5803814713896458, |
| "grad_norm": 0.3020336627960205, |
| "learning_rate": 6.005029670102195e-05, |
| "loss": 0.0227, |
| "step": 2320 |
| }, |
| { |
| "epoch": 1.5871934604904632, |
| "grad_norm": 0.27960023283958435, |
| "learning_rate": 5.972613473891766e-05, |
| "loss": 0.0335, |
| "step": 2330 |
| }, |
| { |
| "epoch": 1.5940054495912808, |
| "grad_norm": 0.308479368686676, |
| "learning_rate": 5.940154732417158e-05, |
| "loss": 0.0297, |
| "step": 2340 |
| }, |
| { |
| "epoch": 1.6008174386920981, |
| "grad_norm": 0.3311978876590729, |
| "learning_rate": 5.907654865528876e-05, |
| "loss": 0.0312, |
| "step": 2350 |
| }, |
| { |
| "epoch": 1.6076294277929155, |
| "grad_norm": 0.26757732033729553, |
| "learning_rate": 5.875115294876381e-05, |
| "loss": 0.0234, |
| "step": 2360 |
| }, |
| { |
| "epoch": 1.614441416893733, |
| "grad_norm": 0.40103888511657715, |
| "learning_rate": 5.842537443845908e-05, |
| "loss": 0.0274, |
| "step": 2370 |
| }, |
| { |
| "epoch": 1.6212534059945503, |
| "grad_norm": 0.17837531864643097, |
| "learning_rate": 5.809922737498198e-05, |
| "loss": 0.0225, |
| "step": 2380 |
| }, |
| { |
| "epoch": 1.6280653950953679, |
| "grad_norm": 0.42968425154685974, |
| "learning_rate": 5.777272602506165e-05, |
| "loss": 0.027, |
| "step": 2390 |
| }, |
| { |
| "epoch": 1.6348773841961854, |
| "grad_norm": 0.24213114380836487, |
| "learning_rate": 5.744588467092483e-05, |
| "loss": 0.0265, |
| "step": 2400 |
| }, |
| { |
| "epoch": 1.6416893732970026, |
| "grad_norm": 0.3060871660709381, |
| "learning_rate": 5.7118717609671194e-05, |
| "loss": 0.0235, |
| "step": 2410 |
| }, |
| { |
| "epoch": 1.6485013623978202, |
| "grad_norm": 0.20384085178375244, |
| "learning_rate": 5.679123915264786e-05, |
| "loss": 0.0261, |
| "step": 2420 |
| }, |
| { |
| "epoch": 1.6553133514986376, |
| "grad_norm": 0.3139786720275879, |
| "learning_rate": 5.646346362482342e-05, |
| "loss": 0.0225, |
| "step": 2430 |
| }, |
| { |
| "epoch": 1.662125340599455, |
| "grad_norm": 0.2353772073984146, |
| "learning_rate": 5.613540536416132e-05, |
| "loss": 0.0273, |
| "step": 2440 |
| }, |
| { |
| "epoch": 1.6689373297002725, |
| "grad_norm": 0.3663155436515808, |
| "learning_rate": 5.5807078720992645e-05, |
| "loss": 0.0237, |
| "step": 2450 |
| }, |
| { |
| "epoch": 1.67574931880109, |
| "grad_norm": 0.4667767882347107, |
| "learning_rate": 5.547849805738836e-05, |
| "loss": 0.0308, |
| "step": 2460 |
| }, |
| { |
| "epoch": 1.6825613079019073, |
| "grad_norm": 0.2913496792316437, |
| "learning_rate": 5.514967774653118e-05, |
| "loss": 0.0222, |
| "step": 2470 |
| }, |
| { |
| "epoch": 1.6893732970027249, |
| "grad_norm": 0.22617073357105255, |
| "learning_rate": 5.482063217208674e-05, |
| "loss": 0.0251, |
| "step": 2480 |
| }, |
| { |
| "epoch": 1.6961852861035422, |
| "grad_norm": 0.3499128222465515, |
| "learning_rate": 5.449137572757439e-05, |
| "loss": 0.0216, |
| "step": 2490 |
| }, |
| { |
| "epoch": 1.7029972752043596, |
| "grad_norm": 0.24365057051181793, |
| "learning_rate": 5.4161922815737696e-05, |
| "loss": 0.0268, |
| "step": 2500 |
| }, |
| { |
| "epoch": 1.7098092643051772, |
| "grad_norm": 0.21294479072093964, |
| "learning_rate": 5.3832287847914276e-05, |
| "loss": 0.0273, |
| "step": 2510 |
| }, |
| { |
| "epoch": 1.7166212534059946, |
| "grad_norm": 0.31520646810531616, |
| "learning_rate": 5.35024852434055e-05, |
| "loss": 0.0258, |
| "step": 2520 |
| }, |
| { |
| "epoch": 1.723433242506812, |
| "grad_norm": 0.4261656403541565, |
| "learning_rate": 5.317252942884567e-05, |
| "loss": 0.0231, |
| "step": 2530 |
| }, |
| { |
| "epoch": 1.7302452316076296, |
| "grad_norm": 0.29408591985702515, |
| "learning_rate": 5.284243483757109e-05, |
| "loss": 0.0304, |
| "step": 2540 |
| }, |
| { |
| "epoch": 1.7370572207084467, |
| "grad_norm": 0.333383172750473, |
| "learning_rate": 5.2512215908988484e-05, |
| "loss": 0.0295, |
| "step": 2550 |
| }, |
| { |
| "epoch": 1.7438692098092643, |
| "grad_norm": 0.2510589361190796, |
| "learning_rate": 5.218188708794357e-05, |
| "loss": 0.0254, |
| "step": 2560 |
| }, |
| { |
| "epoch": 1.750681198910082, |
| "grad_norm": 0.3071255385875702, |
| "learning_rate": 5.18514628240891e-05, |
| "loss": 0.0233, |
| "step": 2570 |
| }, |
| { |
| "epoch": 1.757493188010899, |
| "grad_norm": 0.3328297436237335, |
| "learning_rate": 5.1520957571252795e-05, |
| "loss": 0.0237, |
| "step": 2580 |
| }, |
| { |
| "epoch": 1.7643051771117166, |
| "grad_norm": 0.2048969864845276, |
| "learning_rate": 5.1190385786805106e-05, |
| "loss": 0.0278, |
| "step": 2590 |
| }, |
| { |
| "epoch": 1.771117166212534, |
| "grad_norm": 0.4445406496524811, |
| "learning_rate": 5.085976193102677e-05, |
| "loss": 0.0247, |
| "step": 2600 |
| }, |
| { |
| "epoch": 1.7779291553133514, |
| "grad_norm": 0.2530488967895508, |
| "learning_rate": 5.052910046647634e-05, |
| "loss": 0.0218, |
| "step": 2610 |
| }, |
| { |
| "epoch": 1.784741144414169, |
| "grad_norm": 0.31554245948791504, |
| "learning_rate": 5.0198415857357464e-05, |
| "loss": 0.0237, |
| "step": 2620 |
| }, |
| { |
| "epoch": 1.7915531335149864, |
| "grad_norm": 0.2431655079126358, |
| "learning_rate": 4.9867722568886223e-05, |
| "loss": 0.0214, |
| "step": 2630 |
| }, |
| { |
| "epoch": 1.7983651226158037, |
| "grad_norm": 0.28798162937164307, |
| "learning_rate": 4.9537035066658314e-05, |
| "loss": 0.0213, |
| "step": 2640 |
| }, |
| { |
| "epoch": 1.8051771117166213, |
| "grad_norm": 0.25857627391815186, |
| "learning_rate": 4.920636781601638e-05, |
| "loss": 0.0272, |
| "step": 2650 |
| }, |
| { |
| "epoch": 1.8119891008174387, |
| "grad_norm": 0.2804415225982666, |
| "learning_rate": 4.88757352814172e-05, |
| "loss": 0.0288, |
| "step": 2660 |
| }, |
| { |
| "epoch": 1.818801089918256, |
| "grad_norm": 0.23555926978588104, |
| "learning_rate": 4.8545151925798924e-05, |
| "loss": 0.0247, |
| "step": 2670 |
| }, |
| { |
| "epoch": 1.8256130790190737, |
| "grad_norm": 0.3501521050930023, |
| "learning_rate": 4.821463220994848e-05, |
| "loss": 0.026, |
| "step": 2680 |
| }, |
| { |
| "epoch": 1.832425068119891, |
| "grad_norm": 0.3100302517414093, |
| "learning_rate": 4.788419059186895e-05, |
| "loss": 0.021, |
| "step": 2690 |
| }, |
| { |
| "epoch": 1.8392370572207084, |
| "grad_norm": 0.28045013546943665, |
| "learning_rate": 4.7553841526147205e-05, |
| "loss": 0.0257, |
| "step": 2700 |
| }, |
| { |
| "epoch": 1.846049046321526, |
| "grad_norm": 0.17547450959682465, |
| "learning_rate": 4.722359946332156e-05, |
| "loss": 0.023, |
| "step": 2710 |
| }, |
| { |
| "epoch": 1.8528610354223434, |
| "grad_norm": 0.2572614550590515, |
| "learning_rate": 4.6893478849249654e-05, |
| "loss": 0.0226, |
| "step": 2720 |
| }, |
| { |
| "epoch": 1.8596730245231607, |
| "grad_norm": 0.42476364970207214, |
| "learning_rate": 4.656349412447664e-05, |
| "loss": 0.023, |
| "step": 2730 |
| }, |
| { |
| "epoch": 1.8664850136239783, |
| "grad_norm": 0.37075158953666687, |
| "learning_rate": 4.623365972360337e-05, |
| "loss": 0.0239, |
| "step": 2740 |
| }, |
| { |
| "epoch": 1.8732970027247955, |
| "grad_norm": 0.27569836378097534, |
| "learning_rate": 4.590399007465503e-05, |
| "loss": 0.0216, |
| "step": 2750 |
| }, |
| { |
| "epoch": 1.880108991825613, |
| "grad_norm": 0.25869858264923096, |
| "learning_rate": 4.557449959845005e-05, |
| "loss": 0.024, |
| "step": 2760 |
| }, |
| { |
| "epoch": 1.8869209809264307, |
| "grad_norm": 0.2198791801929474, |
| "learning_rate": 4.524520270796927e-05, |
| "loss": 0.0213, |
| "step": 2770 |
| }, |
| { |
| "epoch": 1.8937329700272478, |
| "grad_norm": 0.3058468997478485, |
| "learning_rate": 4.491611380772545e-05, |
| "loss": 0.0218, |
| "step": 2780 |
| }, |
| { |
| "epoch": 1.9005449591280654, |
| "grad_norm": 0.2228512316942215, |
| "learning_rate": 4.458724729313318e-05, |
| "loss": 0.0218, |
| "step": 2790 |
| }, |
| { |
| "epoch": 1.9073569482288828, |
| "grad_norm": 0.2506347894668579, |
| "learning_rate": 4.42586175498792e-05, |
| "loss": 0.023, |
| "step": 2800 |
| }, |
| { |
| "epoch": 1.9141689373297002, |
| "grad_norm": 0.28511497378349304, |
| "learning_rate": 4.3930238953293094e-05, |
| "loss": 0.0211, |
| "step": 2810 |
| }, |
| { |
| "epoch": 1.9209809264305178, |
| "grad_norm": 0.2836903929710388, |
| "learning_rate": 4.360212586771847e-05, |
| "loss": 0.0174, |
| "step": 2820 |
| }, |
| { |
| "epoch": 1.9277929155313351, |
| "grad_norm": 0.2694113254547119, |
| "learning_rate": 4.327429264588463e-05, |
| "loss": 0.024, |
| "step": 2830 |
| }, |
| { |
| "epoch": 1.9346049046321525, |
| "grad_norm": 0.25238320231437683, |
| "learning_rate": 4.2946753628278725e-05, |
| "loss": 0.022, |
| "step": 2840 |
| }, |
| { |
| "epoch": 1.94141689373297, |
| "grad_norm": 0.22233974933624268, |
| "learning_rate": 4.2619523142518474e-05, |
| "loss": 0.0218, |
| "step": 2850 |
| }, |
| { |
| "epoch": 1.9482288828337875, |
| "grad_norm": 0.22567766904830933, |
| "learning_rate": 4.229261550272539e-05, |
| "loss": 0.0211, |
| "step": 2860 |
| }, |
| { |
| "epoch": 1.9550408719346049, |
| "grad_norm": 0.21269120275974274, |
| "learning_rate": 4.196604500889868e-05, |
| "loss": 0.0207, |
| "step": 2870 |
| }, |
| { |
| "epoch": 1.9618528610354224, |
| "grad_norm": 0.25701943039894104, |
| "learning_rate": 4.163982594628969e-05, |
| "loss": 0.0218, |
| "step": 2880 |
| }, |
| { |
| "epoch": 1.9686648501362398, |
| "grad_norm": 0.2941311299800873, |
| "learning_rate": 4.131397258477702e-05, |
| "loss": 0.0222, |
| "step": 2890 |
| }, |
| { |
| "epoch": 1.9754768392370572, |
| "grad_norm": 0.20397907495498657, |
| "learning_rate": 4.0988499178242315e-05, |
| "loss": 0.0205, |
| "step": 2900 |
| }, |
| { |
| "epoch": 1.9822888283378748, |
| "grad_norm": 0.21562394499778748, |
| "learning_rate": 4.066341996394678e-05, |
| "loss": 0.0288, |
| "step": 2910 |
| }, |
| { |
| "epoch": 1.989100817438692, |
| "grad_norm": 0.25813037157058716, |
| "learning_rate": 4.033874916190833e-05, |
| "loss": 0.0215, |
| "step": 2920 |
| }, |
| { |
| "epoch": 1.9959128065395095, |
| "grad_norm": 0.1991417109966278, |
| "learning_rate": 4.001450097427966e-05, |
| "loss": 0.019, |
| "step": 2930 |
| }, |
| { |
| "epoch": 2.002724795640327, |
| "grad_norm": 0.21835818886756897, |
| "learning_rate": 3.9690689584726894e-05, |
| "loss": 0.0249, |
| "step": 2940 |
| }, |
| { |
| "epoch": 2.0095367847411443, |
| "grad_norm": 0.24195794761180878, |
| "learning_rate": 3.936732915780923e-05, |
| "loss": 0.0177, |
| "step": 2950 |
| }, |
| { |
| "epoch": 2.016348773841962, |
| "grad_norm": 0.3374285101890564, |
| "learning_rate": 3.904443383835929e-05, |
| "loss": 0.0247, |
| "step": 2960 |
| }, |
| { |
| "epoch": 2.0231607629427795, |
| "grad_norm": 0.2824082374572754, |
| "learning_rate": 3.872201775086437e-05, |
| "loss": 0.0216, |
| "step": 2970 |
| }, |
| { |
| "epoch": 2.0299727520435966, |
| "grad_norm": 0.29006993770599365, |
| "learning_rate": 3.8400094998848616e-05, |
| "loss": 0.0206, |
| "step": 2980 |
| }, |
| { |
| "epoch": 2.036784741144414, |
| "grad_norm": 0.3308681547641754, |
| "learning_rate": 3.807867966425611e-05, |
| "loss": 0.0178, |
| "step": 2990 |
| }, |
| { |
| "epoch": 2.043596730245232, |
| "grad_norm": 0.24560880661010742, |
| "learning_rate": 3.775778580683481e-05, |
| "loss": 0.0226, |
| "step": 3000 |
| }, |
| { |
| "epoch": 2.050408719346049, |
| "grad_norm": 0.2389586716890335, |
| "learning_rate": 3.743742746352156e-05, |
| "loss": 0.021, |
| "step": 3010 |
| }, |
| { |
| "epoch": 2.0572207084468666, |
| "grad_norm": 0.35238826274871826, |
| "learning_rate": 3.711761864782817e-05, |
| "loss": 0.0251, |
| "step": 3020 |
| }, |
| { |
| "epoch": 2.0640326975476837, |
| "grad_norm": 0.2502613365650177, |
| "learning_rate": 3.679837334922825e-05, |
| "loss": 0.0201, |
| "step": 3030 |
| }, |
| { |
| "epoch": 2.0708446866485013, |
| "grad_norm": 0.2527748942375183, |
| "learning_rate": 3.647970553254538e-05, |
| "loss": 0.0211, |
| "step": 3040 |
| }, |
| { |
| "epoch": 2.077656675749319, |
| "grad_norm": 0.3349742293357849, |
| "learning_rate": 3.61616291373422e-05, |
| "loss": 0.0243, |
| "step": 3050 |
| }, |
| { |
| "epoch": 2.084468664850136, |
| "grad_norm": 0.2768033444881439, |
| "learning_rate": 3.584415807731065e-05, |
| "loss": 0.0229, |
| "step": 3060 |
| }, |
| { |
| "epoch": 2.0912806539509536, |
| "grad_norm": 0.21673381328582764, |
| "learning_rate": 3.552730623966337e-05, |
| "loss": 0.0223, |
| "step": 3070 |
| }, |
| { |
| "epoch": 2.0980926430517712, |
| "grad_norm": 0.20745591819286346, |
| "learning_rate": 3.521108748452617e-05, |
| "loss": 0.0196, |
| "step": 3080 |
| }, |
| { |
| "epoch": 2.1049046321525884, |
| "grad_norm": 0.27668702602386475, |
| "learning_rate": 3.489551564433186e-05, |
| "loss": 0.024, |
| "step": 3090 |
| }, |
| { |
| "epoch": 2.111716621253406, |
| "grad_norm": 0.2564879357814789, |
| "learning_rate": 3.4580604523215006e-05, |
| "loss": 0.0194, |
| "step": 3100 |
| }, |
| { |
| "epoch": 2.1185286103542236, |
| "grad_norm": 0.21311357617378235, |
| "learning_rate": 3.4266367896408216e-05, |
| "loss": 0.0291, |
| "step": 3110 |
| }, |
| { |
| "epoch": 2.1253405994550407, |
| "grad_norm": 0.21265241503715515, |
| "learning_rate": 3.3952819509639534e-05, |
| "loss": 0.019, |
| "step": 3120 |
| }, |
| { |
| "epoch": 2.1321525885558583, |
| "grad_norm": 0.25450852513313293, |
| "learning_rate": 3.3639973078531165e-05, |
| "loss": 0.0207, |
| "step": 3130 |
| }, |
| { |
| "epoch": 2.138964577656676, |
| "grad_norm": 0.24124109745025635, |
| "learning_rate": 3.332784228799947e-05, |
| "loss": 0.0195, |
| "step": 3140 |
| }, |
| { |
| "epoch": 2.145776566757493, |
| "grad_norm": 0.3012523055076599, |
| "learning_rate": 3.301644079165638e-05, |
| "loss": 0.0206, |
| "step": 3150 |
| }, |
| { |
| "epoch": 2.1525885558583107, |
| "grad_norm": 0.2553965151309967, |
| "learning_rate": 3.27057822112122e-05, |
| "loss": 0.0169, |
| "step": 3160 |
| }, |
| { |
| "epoch": 2.1594005449591283, |
| "grad_norm": 0.28278952836990356, |
| "learning_rate": 3.239588013587958e-05, |
| "loss": 0.0222, |
| "step": 3170 |
| }, |
| { |
| "epoch": 2.1662125340599454, |
| "grad_norm": 0.2095153033733368, |
| "learning_rate": 3.208674812177926e-05, |
| "loss": 0.0189, |
| "step": 3180 |
| }, |
| { |
| "epoch": 2.173024523160763, |
| "grad_norm": 0.30485105514526367, |
| "learning_rate": 3.177839969134698e-05, |
| "loss": 0.0219, |
| "step": 3190 |
| }, |
| { |
| "epoch": 2.1798365122615806, |
| "grad_norm": 0.35161760449409485, |
| "learning_rate": 3.1470848332742e-05, |
| "loss": 0.0217, |
| "step": 3200 |
| }, |
| { |
| "epoch": 2.1866485013623977, |
| "grad_norm": 0.24349473416805267, |
| "learning_rate": 3.116410749925708e-05, |
| "loss": 0.0222, |
| "step": 3210 |
| }, |
| { |
| "epoch": 2.1934604904632153, |
| "grad_norm": 0.15715332329273224, |
| "learning_rate": 3.085819060872995e-05, |
| "loss": 0.0179, |
| "step": 3220 |
| }, |
| { |
| "epoch": 2.2002724795640325, |
| "grad_norm": 0.22666095197200775, |
| "learning_rate": 3.055311104295648e-05, |
| "loss": 0.0198, |
| "step": 3230 |
| }, |
| { |
| "epoch": 2.20708446866485, |
| "grad_norm": 0.22959241271018982, |
| "learning_rate": 3.024888214710517e-05, |
| "loss": 0.0162, |
| "step": 3240 |
| }, |
| { |
| "epoch": 2.2138964577656677, |
| "grad_norm": 0.22255851328372955, |
| "learning_rate": 2.994551722913349e-05, |
| "loss": 0.0159, |
| "step": 3250 |
| }, |
| { |
| "epoch": 2.220708446866485, |
| "grad_norm": 0.2214617133140564, |
| "learning_rate": 2.9643029559205727e-05, |
| "loss": 0.0225, |
| "step": 3260 |
| }, |
| { |
| "epoch": 2.2275204359673024, |
| "grad_norm": 0.1882133036851883, |
| "learning_rate": 2.934143236911248e-05, |
| "loss": 0.0179, |
| "step": 3270 |
| }, |
| { |
| "epoch": 2.23433242506812, |
| "grad_norm": 0.4131694436073303, |
| "learning_rate": 2.90407388516919e-05, |
| "loss": 0.0194, |
| "step": 3280 |
| }, |
| { |
| "epoch": 2.241144414168937, |
| "grad_norm": 0.3278559148311615, |
| "learning_rate": 2.8740962160252495e-05, |
| "loss": 0.02, |
| "step": 3290 |
| }, |
| { |
| "epoch": 2.2479564032697548, |
| "grad_norm": 0.21860350668430328, |
| "learning_rate": 2.844211540799797e-05, |
| "loss": 0.0177, |
| "step": 3300 |
| }, |
| { |
| "epoch": 2.2547683923705724, |
| "grad_norm": 0.2650901675224304, |
| "learning_rate": 2.8144211667453368e-05, |
| "loss": 0.0183, |
| "step": 3310 |
| }, |
| { |
| "epoch": 2.2615803814713895, |
| "grad_norm": 0.2598157823085785, |
| "learning_rate": 2.7847263969893344e-05, |
| "loss": 0.016, |
| "step": 3320 |
| }, |
| { |
| "epoch": 2.268392370572207, |
| "grad_norm": 0.21535956859588623, |
| "learning_rate": 2.7551285304772206e-05, |
| "loss": 0.0173, |
| "step": 3330 |
| }, |
| { |
| "epoch": 2.2752043596730247, |
| "grad_norm": 0.19479890167713165, |
| "learning_rate": 2.7256288619155567e-05, |
| "loss": 0.0181, |
| "step": 3340 |
| }, |
| { |
| "epoch": 2.282016348773842, |
| "grad_norm": 0.21761104464530945, |
| "learning_rate": 2.6962286817154158e-05, |
| "loss": 0.0208, |
| "step": 3350 |
| }, |
| { |
| "epoch": 2.2888283378746594, |
| "grad_norm": 0.18495774269104004, |
| "learning_rate": 2.6669292759359166e-05, |
| "loss": 0.0173, |
| "step": 3360 |
| }, |
| { |
| "epoch": 2.2956403269754766, |
| "grad_norm": 0.2476925402879715, |
| "learning_rate": 2.637731926227993e-05, |
| "loss": 0.0231, |
| "step": 3370 |
| }, |
| { |
| "epoch": 2.302452316076294, |
| "grad_norm": 0.3167796730995178, |
| "learning_rate": 2.6086379097783033e-05, |
| "loss": 0.0219, |
| "step": 3380 |
| }, |
| { |
| "epoch": 2.309264305177112, |
| "grad_norm": 0.3013063371181488, |
| "learning_rate": 2.579648499253377e-05, |
| "loss": 0.0183, |
| "step": 3390 |
| }, |
| { |
| "epoch": 2.316076294277929, |
| "grad_norm": 0.2609173357486725, |
| "learning_rate": 2.5507649627439466e-05, |
| "loss": 0.0214, |
| "step": 3400 |
| }, |
| { |
| "epoch": 2.3228882833787465, |
| "grad_norm": 0.1826580911874771, |
| "learning_rate": 2.5219885637094653e-05, |
| "loss": 0.0191, |
| "step": 3410 |
| }, |
| { |
| "epoch": 2.329700272479564, |
| "grad_norm": 0.21605326235294342, |
| "learning_rate": 2.4933205609228533e-05, |
| "loss": 0.0209, |
| "step": 3420 |
| }, |
| { |
| "epoch": 2.3365122615803813, |
| "grad_norm": 0.23476341366767883, |
| "learning_rate": 2.464762208415419e-05, |
| "loss": 0.018, |
| "step": 3430 |
| }, |
| { |
| "epoch": 2.343324250681199, |
| "grad_norm": 0.1948312371969223, |
| "learning_rate": 2.4363147554220213e-05, |
| "loss": 0.0145, |
| "step": 3440 |
| }, |
| { |
| "epoch": 2.3501362397820165, |
| "grad_norm": 0.20815841853618622, |
| "learning_rate": 2.407979446326411e-05, |
| "loss": 0.0196, |
| "step": 3450 |
| }, |
| { |
| "epoch": 2.3569482288828336, |
| "grad_norm": 0.23515887558460236, |
| "learning_rate": 2.379757520606799e-05, |
| "loss": 0.0203, |
| "step": 3460 |
| }, |
| { |
| "epoch": 2.363760217983651, |
| "grad_norm": 0.2154649794101715, |
| "learning_rate": 2.3516502127816455e-05, |
| "loss": 0.0175, |
| "step": 3470 |
| }, |
| { |
| "epoch": 2.370572207084469, |
| "grad_norm": 0.23456346988677979, |
| "learning_rate": 2.323658752355647e-05, |
| "loss": 0.0173, |
| "step": 3480 |
| }, |
| { |
| "epoch": 2.377384196185286, |
| "grad_norm": 0.21330733597278595, |
| "learning_rate": 2.2957843637659654e-05, |
| "loss": 0.0178, |
| "step": 3490 |
| }, |
| { |
| "epoch": 2.3841961852861036, |
| "grad_norm": 0.19244815409183502, |
| "learning_rate": 2.2680282663286552e-05, |
| "loss": 0.0229, |
| "step": 3500 |
| }, |
| { |
| "epoch": 2.391008174386921, |
| "grad_norm": 0.20745113492012024, |
| "learning_rate": 2.2403916741853364e-05, |
| "loss": 0.0173, |
| "step": 3510 |
| }, |
| { |
| "epoch": 2.3978201634877383, |
| "grad_norm": 0.19936102628707886, |
| "learning_rate": 2.2128757962500817e-05, |
| "loss": 0.0172, |
| "step": 3520 |
| }, |
| { |
| "epoch": 2.404632152588556, |
| "grad_norm": 0.2921135127544403, |
| "learning_rate": 2.1854818361565275e-05, |
| "loss": 0.0171, |
| "step": 3530 |
| }, |
| { |
| "epoch": 2.4114441416893735, |
| "grad_norm": 0.2126695066690445, |
| "learning_rate": 2.1582109922052364e-05, |
| "loss": 0.0199, |
| "step": 3540 |
| }, |
| { |
| "epoch": 2.4182561307901906, |
| "grad_norm": 0.161210298538208, |
| "learning_rate": 2.1310644573112635e-05, |
| "loss": 0.0202, |
| "step": 3550 |
| }, |
| { |
| "epoch": 2.4250681198910082, |
| "grad_norm": 0.1921418011188507, |
| "learning_rate": 2.1040434189519924e-05, |
| "loss": 0.0168, |
| "step": 3560 |
| }, |
| { |
| "epoch": 2.431880108991826, |
| "grad_norm": 0.17595872282981873, |
| "learning_rate": 2.0771490591151733e-05, |
| "loss": 0.0208, |
| "step": 3570 |
| }, |
| { |
| "epoch": 2.438692098092643, |
| "grad_norm": 0.18638396263122559, |
| "learning_rate": 2.0503825542472317e-05, |
| "loss": 0.0214, |
| "step": 3580 |
| }, |
| { |
| "epoch": 2.4455040871934606, |
| "grad_norm": 0.24000069499015808, |
| "learning_rate": 2.023745075201805e-05, |
| "loss": 0.0155, |
| "step": 3590 |
| }, |
| { |
| "epoch": 2.452316076294278, |
| "grad_norm": 0.13929104804992676, |
| "learning_rate": 1.9972377871885157e-05, |
| "loss": 0.0201, |
| "step": 3600 |
| }, |
| { |
| "epoch": 2.4591280653950953, |
| "grad_norm": 0.23332083225250244, |
| "learning_rate": 1.970861849722017e-05, |
| "loss": 0.0159, |
| "step": 3610 |
| }, |
| { |
| "epoch": 2.465940054495913, |
| "grad_norm": 0.2451397329568863, |
| "learning_rate": 1.9446184165712587e-05, |
| "loss": 0.0172, |
| "step": 3620 |
| }, |
| { |
| "epoch": 2.47275204359673, |
| "grad_norm": 0.1490626186132431, |
| "learning_rate": 1.9185086357090214e-05, |
| "loss": 0.018, |
| "step": 3630 |
| }, |
| { |
| "epoch": 2.4795640326975477, |
| "grad_norm": 0.16023452579975128, |
| "learning_rate": 1.8925336492617057e-05, |
| "loss": 0.0167, |
| "step": 3640 |
| }, |
| { |
| "epoch": 2.4863760217983653, |
| "grad_norm": 0.2159489542245865, |
| "learning_rate": 1.8666945934593666e-05, |
| "loss": 0.0185, |
| "step": 3650 |
| }, |
| { |
| "epoch": 2.4931880108991824, |
| "grad_norm": 0.18671192228794098, |
| "learning_rate": 1.8409925985860126e-05, |
| "loss": 0.0129, |
| "step": 3660 |
| }, |
| { |
| "epoch": 2.5, |
| "grad_norm": 0.20349836349487305, |
| "learning_rate": 1.8154287889301603e-05, |
| "loss": 0.0177, |
| "step": 3670 |
| }, |
| { |
| "epoch": 2.5068119891008176, |
| "grad_norm": 0.18601705133914948, |
| "learning_rate": 1.7900042827356612e-05, |
| "loss": 0.0205, |
| "step": 3680 |
| }, |
| { |
| "epoch": 2.5136239782016347, |
| "grad_norm": 0.22594991326332092, |
| "learning_rate": 1.76472019215278e-05, |
| "loss": 0.0205, |
| "step": 3690 |
| }, |
| { |
| "epoch": 2.5204359673024523, |
| "grad_norm": 0.18238820135593414, |
| "learning_rate": 1.739577623189545e-05, |
| "loss": 0.0142, |
| "step": 3700 |
| }, |
| { |
| "epoch": 2.5272479564032695, |
| "grad_norm": 0.1694435328245163, |
| "learning_rate": 1.7145776756633768e-05, |
| "loss": 0.022, |
| "step": 3710 |
| }, |
| { |
| "epoch": 2.534059945504087, |
| "grad_norm": 0.2308904379606247, |
| "learning_rate": 1.6897214431529646e-05, |
| "loss": 0.0166, |
| "step": 3720 |
| }, |
| { |
| "epoch": 2.5408719346049047, |
| "grad_norm": 0.18409192562103271, |
| "learning_rate": 1.6650100129504475e-05, |
| "loss": 0.0132, |
| "step": 3730 |
| }, |
| { |
| "epoch": 2.547683923705722, |
| "grad_norm": 0.17650723457336426, |
| "learning_rate": 1.6404444660138335e-05, |
| "loss": 0.0197, |
| "step": 3740 |
| }, |
| { |
| "epoch": 2.5544959128065394, |
| "grad_norm": 0.24465468525886536, |
| "learning_rate": 1.616025876919725e-05, |
| "loss": 0.0163, |
| "step": 3750 |
| }, |
| { |
| "epoch": 2.561307901907357, |
| "grad_norm": 0.19395938515663147, |
| "learning_rate": 1.5917553138163172e-05, |
| "loss": 0.0176, |
| "step": 3760 |
| }, |
| { |
| "epoch": 2.568119891008174, |
| "grad_norm": 0.19339482486248016, |
| "learning_rate": 1.5676338383766632e-05, |
| "loss": 0.0196, |
| "step": 3770 |
| }, |
| { |
| "epoch": 2.5749318801089918, |
| "grad_norm": 0.18326933681964874, |
| "learning_rate": 1.5436625057522447e-05, |
| "loss": 0.0154, |
| "step": 3780 |
| }, |
| { |
| "epoch": 2.5817438692098094, |
| "grad_norm": 0.17008966207504272, |
| "learning_rate": 1.519842364526804e-05, |
| "loss": 0.0137, |
| "step": 3790 |
| }, |
| { |
| "epoch": 2.5885558583106265, |
| "grad_norm": 0.1793888807296753, |
| "learning_rate": 1.4961744566704855e-05, |
| "loss": 0.0165, |
| "step": 3800 |
| }, |
| { |
| "epoch": 2.595367847411444, |
| "grad_norm": 0.1575794667005539, |
| "learning_rate": 1.4726598174942551e-05, |
| "loss": 0.0147, |
| "step": 3810 |
| }, |
| { |
| "epoch": 2.6021798365122617, |
| "grad_norm": 0.24643422663211823, |
| "learning_rate": 1.4492994756046035e-05, |
| "loss": 0.0207, |
| "step": 3820 |
| }, |
| { |
| "epoch": 2.608991825613079, |
| "grad_norm": 0.1690363883972168, |
| "learning_rate": 1.4260944528585645e-05, |
| "loss": 0.0179, |
| "step": 3830 |
| }, |
| { |
| "epoch": 2.6158038147138964, |
| "grad_norm": 0.229860320687294, |
| "learning_rate": 1.4030457643190048e-05, |
| "loss": 0.0138, |
| "step": 3840 |
| }, |
| { |
| "epoch": 2.622615803814714, |
| "grad_norm": 0.1885327398777008, |
| "learning_rate": 1.3801544182102311e-05, |
| "loss": 0.016, |
| "step": 3850 |
| }, |
| { |
| "epoch": 2.629427792915531, |
| "grad_norm": 0.1853918582201004, |
| "learning_rate": 1.3574214158738763e-05, |
| "loss": 0.0178, |
| "step": 3860 |
| }, |
| { |
| "epoch": 2.636239782016349, |
| "grad_norm": 0.17312754690647125, |
| "learning_rate": 1.3348477517251101e-05, |
| "loss": 0.0159, |
| "step": 3870 |
| }, |
| { |
| "epoch": 2.6430517711171664, |
| "grad_norm": 0.14870062470436096, |
| "learning_rate": 1.312434413209131e-05, |
| "loss": 0.0179, |
| "step": 3880 |
| }, |
| { |
| "epoch": 2.6498637602179835, |
| "grad_norm": 0.35962745547294617, |
| "learning_rate": 1.2901823807579727e-05, |
| "loss": 0.0148, |
| "step": 3890 |
| }, |
| { |
| "epoch": 2.656675749318801, |
| "grad_norm": 0.14894793927669525, |
| "learning_rate": 1.2680926277476245e-05, |
| "loss": 0.017, |
| "step": 3900 |
| }, |
| { |
| "epoch": 2.6634877384196187, |
| "grad_norm": 0.20324669778347015, |
| "learning_rate": 1.2461661204554397e-05, |
| "loss": 0.0166, |
| "step": 3910 |
| }, |
| { |
| "epoch": 2.670299727520436, |
| "grad_norm": 0.2097160369157791, |
| "learning_rate": 1.2244038180178835e-05, |
| "loss": 0.0161, |
| "step": 3920 |
| }, |
| { |
| "epoch": 2.6771117166212535, |
| "grad_norm": 0.17441681027412415, |
| "learning_rate": 1.2028066723885612e-05, |
| "loss": 0.0163, |
| "step": 3930 |
| }, |
| { |
| "epoch": 2.683923705722071, |
| "grad_norm": 0.18608888983726501, |
| "learning_rate": 1.1813756282965888e-05, |
| "loss": 0.0176, |
| "step": 3940 |
| }, |
| { |
| "epoch": 2.690735694822888, |
| "grad_norm": 0.18648923933506012, |
| "learning_rate": 1.1601116232052638e-05, |
| "loss": 0.0168, |
| "step": 3950 |
| }, |
| { |
| "epoch": 2.697547683923706, |
| "grad_norm": 0.15261727571487427, |
| "learning_rate": 1.1390155872710517e-05, |
| "loss": 0.0149, |
| "step": 3960 |
| }, |
| { |
| "epoch": 2.7043596730245234, |
| "grad_norm": 0.2162063866853714, |
| "learning_rate": 1.1180884433029087e-05, |
| "loss": 0.0168, |
| "step": 3970 |
| }, |
| { |
| "epoch": 2.7111716621253406, |
| "grad_norm": 0.24533921480178833, |
| "learning_rate": 1.097331106721904e-05, |
| "loss": 0.0147, |
| "step": 3980 |
| }, |
| { |
| "epoch": 2.717983651226158, |
| "grad_norm": 0.20895080268383026, |
| "learning_rate": 1.0767444855211862e-05, |
| "loss": 0.015, |
| "step": 3990 |
| }, |
| { |
| "epoch": 2.7247956403269757, |
| "grad_norm": 0.2006479650735855, |
| "learning_rate": 1.0563294802262558e-05, |
| "loss": 0.0173, |
| "step": 4000 |
| }, |
| { |
| "epoch": 2.731607629427793, |
| "grad_norm": 0.16398422420024872, |
| "learning_rate": 1.0360869838555809e-05, |
| "loss": 0.0169, |
| "step": 4010 |
| }, |
| { |
| "epoch": 2.7384196185286105, |
| "grad_norm": 0.22024202346801758, |
| "learning_rate": 1.0160178818815313e-05, |
| "loss": 0.015, |
| "step": 4020 |
| }, |
| { |
| "epoch": 2.7452316076294276, |
| "grad_norm": 0.1872708946466446, |
| "learning_rate": 9.961230521916387e-06, |
| "loss": 0.0168, |
| "step": 4030 |
| }, |
| { |
| "epoch": 2.7520435967302452, |
| "grad_norm": 0.2346954643726349, |
| "learning_rate": 9.764033650502074e-06, |
| "loss": 0.0176, |
| "step": 4040 |
| }, |
| { |
| "epoch": 2.758855585831063, |
| "grad_norm": 0.15068836510181427, |
| "learning_rate": 9.568596830602344e-06, |
| "loss": 0.0137, |
| "step": 4050 |
| }, |
| { |
| "epoch": 2.76566757493188, |
| "grad_norm": 0.20182640850543976, |
| "learning_rate": 9.37492861125681e-06, |
| "loss": 0.0181, |
| "step": 4060 |
| }, |
| { |
| "epoch": 2.7724795640326976, |
| "grad_norm": 0.1375190019607544, |
| "learning_rate": 9.183037464140804e-06, |
| "loss": 0.0158, |
| "step": 4070 |
| }, |
| { |
| "epoch": 2.7792915531335147, |
| "grad_norm": 0.25182825326919556, |
| "learning_rate": 8.992931783194735e-06, |
| "loss": 0.0134, |
| "step": 4080 |
| }, |
| { |
| "epoch": 2.7861035422343323, |
| "grad_norm": 0.18647728860378265, |
| "learning_rate": 8.80461988425696e-06, |
| "loss": 0.0136, |
| "step": 4090 |
| }, |
| { |
| "epoch": 2.79291553133515, |
| "grad_norm": 0.16191458702087402, |
| "learning_rate": 8.618110004699974e-06, |
| "loss": 0.0164, |
| "step": 4100 |
| }, |
| { |
| "epoch": 2.799727520435967, |
| "grad_norm": 0.18361864984035492, |
| "learning_rate": 8.4334103030701e-06, |
| "loss": 0.0155, |
| "step": 4110 |
| }, |
| { |
| "epoch": 2.8065395095367847, |
| "grad_norm": 0.21431824564933777, |
| "learning_rate": 8.25052885873066e-06, |
| "loss": 0.0154, |
| "step": 4120 |
| }, |
| { |
| "epoch": 2.8133514986376023, |
| "grad_norm": 0.18994954228401184, |
| "learning_rate": 8.06947367150846e-06, |
| "loss": 0.016, |
| "step": 4130 |
| }, |
| { |
| "epoch": 2.8201634877384194, |
| "grad_norm": 0.21481618285179138, |
| "learning_rate": 7.890252661343938e-06, |
| "loss": 0.0166, |
| "step": 4140 |
| }, |
| { |
| "epoch": 2.826975476839237, |
| "grad_norm": 0.11670587211847305, |
| "learning_rate": 7.712873667944681e-06, |
| "loss": 0.0142, |
| "step": 4150 |
| }, |
| { |
| "epoch": 2.8337874659400546, |
| "grad_norm": 0.19601042568683624, |
| "learning_rate": 7.537344450442469e-06, |
| "loss": 0.0104, |
| "step": 4160 |
| }, |
| { |
| "epoch": 2.8405994550408717, |
| "grad_norm": 0.15036450326442719, |
| "learning_rate": 7.36367268705393e-06, |
| "loss": 0.0174, |
| "step": 4170 |
| }, |
| { |
| "epoch": 2.8474114441416893, |
| "grad_norm": 0.23941321671009064, |
| "learning_rate": 7.1918659747446e-06, |
| "loss": 0.0191, |
| "step": 4180 |
| }, |
| { |
| "epoch": 2.854223433242507, |
| "grad_norm": 0.1950898915529251, |
| "learning_rate": 7.021931828896666e-06, |
| "loss": 0.018, |
| "step": 4190 |
| }, |
| { |
| "epoch": 2.861035422343324, |
| "grad_norm": 0.23307918012142181, |
| "learning_rate": 6.8538776829801584e-06, |
| "loss": 0.0127, |
| "step": 4200 |
| }, |
| { |
| "epoch": 2.8678474114441417, |
| "grad_norm": 0.23717965185642242, |
| "learning_rate": 6.687710888227849e-06, |
| "loss": 0.0125, |
| "step": 4210 |
| }, |
| { |
| "epoch": 2.8746594005449593, |
| "grad_norm": 0.18568864464759827, |
| "learning_rate": 6.5234387133136565e-06, |
| "loss": 0.0132, |
| "step": 4220 |
| }, |
| { |
| "epoch": 2.8814713896457764, |
| "grad_norm": 0.18601331114768982, |
| "learning_rate": 6.361068344034665e-06, |
| "loss": 0.0156, |
| "step": 4230 |
| }, |
| { |
| "epoch": 2.888283378746594, |
| "grad_norm": 0.1895252913236618, |
| "learning_rate": 6.200606882996846e-06, |
| "loss": 0.0144, |
| "step": 4240 |
| }, |
| { |
| "epoch": 2.8950953678474116, |
| "grad_norm": 0.13856425881385803, |
| "learning_rate": 6.042061349304312e-06, |
| "loss": 0.0164, |
| "step": 4250 |
| }, |
| { |
| "epoch": 2.9019073569482288, |
| "grad_norm": 0.14244164526462555, |
| "learning_rate": 5.885438678252342e-06, |
| "loss": 0.0178, |
| "step": 4260 |
| }, |
| { |
| "epoch": 2.9087193460490464, |
| "grad_norm": 0.10831771790981293, |
| "learning_rate": 5.730745721023939e-06, |
| "loss": 0.0135, |
| "step": 4270 |
| }, |
| { |
| "epoch": 2.915531335149864, |
| "grad_norm": 0.2154112458229065, |
| "learning_rate": 5.577989244390192e-06, |
| "loss": 0.014, |
| "step": 4280 |
| }, |
| { |
| "epoch": 2.922343324250681, |
| "grad_norm": 0.1787579506635666, |
| "learning_rate": 5.4271759304142635e-06, |
| "loss": 0.0122, |
| "step": 4290 |
| }, |
| { |
| "epoch": 2.9291553133514987, |
| "grad_norm": 0.20089909434318542, |
| "learning_rate": 5.278312376159051e-06, |
| "loss": 0.0147, |
| "step": 4300 |
| }, |
| { |
| "epoch": 2.9359673024523163, |
| "grad_norm": 0.20588675141334534, |
| "learning_rate": 5.1314050933986944e-06, |
| "loss": 0.0134, |
| "step": 4310 |
| }, |
| { |
| "epoch": 2.9427792915531334, |
| "grad_norm": 0.15475359559059143, |
| "learning_rate": 4.986460508333634e-06, |
| "loss": 0.0171, |
| "step": 4320 |
| }, |
| { |
| "epoch": 2.949591280653951, |
| "grad_norm": 0.12290208041667938, |
| "learning_rate": 4.843484961309597e-06, |
| "loss": 0.0108, |
| "step": 4330 |
| }, |
| { |
| "epoch": 2.9564032697547686, |
| "grad_norm": 0.23685646057128906, |
| "learning_rate": 4.702484706540161e-06, |
| "loss": 0.015, |
| "step": 4340 |
| }, |
| { |
| "epoch": 2.963215258855586, |
| "grad_norm": 0.17012353241443634, |
| "learning_rate": 4.563465911833259e-06, |
| "loss": 0.0144, |
| "step": 4350 |
| }, |
| { |
| "epoch": 2.9700272479564034, |
| "grad_norm": 0.15839093923568726, |
| "learning_rate": 4.426434658321344e-06, |
| "loss": 0.0118, |
| "step": 4360 |
| }, |
| { |
| "epoch": 2.976839237057221, |
| "grad_norm": 0.14717762172222137, |
| "learning_rate": 4.2913969401953466e-06, |
| "loss": 0.0135, |
| "step": 4370 |
| }, |
| { |
| "epoch": 2.983651226158038, |
| "grad_norm": 0.16831554472446442, |
| "learning_rate": 4.15835866444253e-06, |
| "loss": 0.013, |
| "step": 4380 |
| }, |
| { |
| "epoch": 2.9904632152588557, |
| "grad_norm": 0.13316653668880463, |
| "learning_rate": 4.027325650588043e-06, |
| "loss": 0.0167, |
| "step": 4390 |
| }, |
| { |
| "epoch": 2.997275204359673, |
| "grad_norm": 0.2631996273994446, |
| "learning_rate": 3.898303630440419e-06, |
| "loss": 0.0178, |
| "step": 4400 |
| }, |
| { |
| "epoch": 3.0040871934604905, |
| "grad_norm": 0.16159358620643616, |
| "learning_rate": 3.7712982478407877e-06, |
| "loss": 0.0169, |
| "step": 4410 |
| }, |
| { |
| "epoch": 3.010899182561308, |
| "grad_norm": 0.16235774755477905, |
| "learning_rate": 3.6463150584160053e-06, |
| "loss": 0.0171, |
| "step": 4420 |
| }, |
| { |
| "epoch": 3.017711171662125, |
| "grad_norm": 0.11211927980184555, |
| "learning_rate": 3.5233595293356957e-06, |
| "loss": 0.0117, |
| "step": 4430 |
| }, |
| { |
| "epoch": 3.024523160762943, |
| "grad_norm": 0.18224704265594482, |
| "learning_rate": 3.4024370390730033e-06, |
| "loss": 0.017, |
| "step": 4440 |
| }, |
| { |
| "epoch": 3.0313351498637604, |
| "grad_norm": 0.18648995459079742, |
| "learning_rate": 3.2835528771693992e-06, |
| "loss": 0.0144, |
| "step": 4450 |
| }, |
| { |
| "epoch": 3.0381471389645776, |
| "grad_norm": 0.1381874680519104, |
| "learning_rate": 3.1667122440032505e-06, |
| "loss": 0.0131, |
| "step": 4460 |
| }, |
| { |
| "epoch": 3.044959128065395, |
| "grad_norm": 0.13673441112041473, |
| "learning_rate": 3.051920250562351e-06, |
| "loss": 0.0126, |
| "step": 4470 |
| }, |
| { |
| "epoch": 3.0517711171662127, |
| "grad_norm": 0.17434169352054596, |
| "learning_rate": 2.939181918220385e-06, |
| "loss": 0.0136, |
| "step": 4480 |
| }, |
| { |
| "epoch": 3.05858310626703, |
| "grad_norm": 0.17766402661800385, |
| "learning_rate": 2.8285021785172226e-06, |
| "loss": 0.0137, |
| "step": 4490 |
| }, |
| { |
| "epoch": 3.0653950953678475, |
| "grad_norm": 0.22053247690200806, |
| "learning_rate": 2.7198858729432288e-06, |
| "loss": 0.0145, |
| "step": 4500 |
| }, |
| { |
| "epoch": 3.0722070844686646, |
| "grad_norm": 0.19397376477718353, |
| "learning_rate": 2.6133377527274905e-06, |
| "loss": 0.0149, |
| "step": 4510 |
| }, |
| { |
| "epoch": 3.0790190735694822, |
| "grad_norm": 0.10889869183301926, |
| "learning_rate": 2.5088624786299366e-06, |
| "loss": 0.0123, |
| "step": 4520 |
| }, |
| { |
| "epoch": 3.0858310626703, |
| "grad_norm": 0.1636773943901062, |
| "learning_rate": 2.406464620737531e-06, |
| "loss": 0.0127, |
| "step": 4530 |
| }, |
| { |
| "epoch": 3.092643051771117, |
| "grad_norm": 0.13466012477874756, |
| "learning_rate": 2.3061486582642734e-06, |
| "loss": 0.0135, |
| "step": 4540 |
| }, |
| { |
| "epoch": 3.0994550408719346, |
| "grad_norm": 0.13705144822597504, |
| "learning_rate": 2.2079189793553667e-06, |
| "loss": 0.0126, |
| "step": 4550 |
| }, |
| { |
| "epoch": 3.106267029972752, |
| "grad_norm": 0.204204723238945, |
| "learning_rate": 2.111779880895165e-06, |
| "loss": 0.011, |
| "step": 4560 |
| }, |
| { |
| "epoch": 3.1130790190735693, |
| "grad_norm": 0.17932602763175964, |
| "learning_rate": 2.01773556831929e-06, |
| "loss": 0.0118, |
| "step": 4570 |
| }, |
| { |
| "epoch": 3.119891008174387, |
| "grad_norm": 0.18473263084888458, |
| "learning_rate": 1.9257901554306513e-06, |
| "loss": 0.0118, |
| "step": 4580 |
| }, |
| { |
| "epoch": 3.1267029972752045, |
| "grad_norm": 0.1656373143196106, |
| "learning_rate": 1.835947664219445e-06, |
| "loss": 0.0135, |
| "step": 4590 |
| }, |
| { |
| "epoch": 3.1335149863760217, |
| "grad_norm": 0.18078264594078064, |
| "learning_rate": 1.748212024687307e-06, |
| "loss": 0.0118, |
| "step": 4600 |
| }, |
| { |
| "epoch": 3.1403269754768393, |
| "grad_norm": 0.23627698421478271, |
| "learning_rate": 1.6625870746753147e-06, |
| "loss": 0.0151, |
| "step": 4610 |
| }, |
| { |
| "epoch": 3.147138964577657, |
| "grad_norm": 0.15048933029174805, |
| "learning_rate": 1.5790765596961853e-06, |
| "loss": 0.015, |
| "step": 4620 |
| }, |
| { |
| "epoch": 3.153950953678474, |
| "grad_norm": 0.2178574502468109, |
| "learning_rate": 1.4976841327703717e-06, |
| "loss": 0.0135, |
| "step": 4630 |
| }, |
| { |
| "epoch": 3.1607629427792916, |
| "grad_norm": 0.1818486899137497, |
| "learning_rate": 1.4184133542663014e-06, |
| "loss": 0.0122, |
| "step": 4640 |
| }, |
| { |
| "epoch": 3.167574931880109, |
| "grad_norm": 0.1654607504606247, |
| "learning_rate": 1.341267691744641e-06, |
| "loss": 0.0128, |
| "step": 4650 |
| }, |
| { |
| "epoch": 3.1743869209809263, |
| "grad_norm": 0.207754448056221, |
| "learning_rate": 1.2662505198065666e-06, |
| "loss": 0.0224, |
| "step": 4660 |
| }, |
| { |
| "epoch": 3.181198910081744, |
| "grad_norm": 0.16341248154640198, |
| "learning_rate": 1.193365119946216e-06, |
| "loss": 0.0153, |
| "step": 4670 |
| }, |
| { |
| "epoch": 3.1880108991825615, |
| "grad_norm": 0.1576090306043625, |
| "learning_rate": 1.1226146804070859e-06, |
| "loss": 0.0102, |
| "step": 4680 |
| }, |
| { |
| "epoch": 3.1948228882833787, |
| "grad_norm": 0.1799905151128769, |
| "learning_rate": 1.0540022960426111e-06, |
| "loss": 0.0134, |
| "step": 4690 |
| }, |
| { |
| "epoch": 3.2016348773841963, |
| "grad_norm": 0.23539473116397858, |
| "learning_rate": 9.875309681807443e-07, |
| "loss": 0.0171, |
| "step": 4700 |
| }, |
| { |
| "epoch": 3.2084468664850134, |
| "grad_norm": 0.1891935020685196, |
| "learning_rate": 9.232036044927061e-07, |
| "loss": 0.0136, |
| "step": 4710 |
| }, |
| { |
| "epoch": 3.215258855585831, |
| "grad_norm": 0.1502537727355957, |
| "learning_rate": 8.610230188657919e-07, |
| "loss": 0.0135, |
| "step": 4720 |
| }, |
| { |
| "epoch": 3.2220708446866486, |
| "grad_norm": 0.14308865368366241, |
| "learning_rate": 8.009919312802372e-07, |
| "loss": 0.0125, |
| "step": 4730 |
| }, |
| { |
| "epoch": 3.2288828337874658, |
| "grad_norm": 0.17500245571136475, |
| "learning_rate": 7.431129676902904e-07, |
| "loss": 0.01, |
| "step": 4740 |
| }, |
| { |
| "epoch": 3.2356948228882834, |
| "grad_norm": 0.12005341053009033, |
| "learning_rate": 6.873886599093215e-07, |
| "loss": 0.013, |
| "step": 4750 |
| }, |
| { |
| "epoch": 3.242506811989101, |
| "grad_norm": 0.22890767455101013, |
| "learning_rate": 6.338214454990776e-07, |
| "loss": 0.0165, |
| "step": 4760 |
| }, |
| { |
| "epoch": 3.249318801089918, |
| "grad_norm": 0.12232371419668198, |
| "learning_rate": 5.82413667663051e-07, |
| "loss": 0.0153, |
| "step": 4770 |
| }, |
| { |
| "epoch": 3.2561307901907357, |
| "grad_norm": 0.16289682686328888, |
| "learning_rate": 5.331675751439725e-07, |
| "loss": 0.0144, |
| "step": 4780 |
| }, |
| { |
| "epoch": 3.2629427792915533, |
| "grad_norm": 0.19280773401260376, |
| "learning_rate": 4.86085322125479e-07, |
| "loss": 0.012, |
| "step": 4790 |
| }, |
| { |
| "epoch": 3.2697547683923704, |
| "grad_norm": 0.12008260190486908, |
| "learning_rate": 4.411689681378284e-07, |
| "loss": 0.0148, |
| "step": 4800 |
| }, |
| { |
| "epoch": 3.276566757493188, |
| "grad_norm": 0.16363725066184998, |
| "learning_rate": 3.9842047796786466e-07, |
| "loss": 0.0125, |
| "step": 4810 |
| }, |
| { |
| "epoch": 3.2833787465940056, |
| "grad_norm": 0.16861975193023682, |
| "learning_rate": 3.578417215730323e-07, |
| "loss": 0.0114, |
| "step": 4820 |
| }, |
| { |
| "epoch": 3.290190735694823, |
| "grad_norm": 0.17795579135417938, |
| "learning_rate": 3.1943447399958027e-07, |
| "loss": 0.0136, |
| "step": 4830 |
| }, |
| { |
| "epoch": 3.2970027247956404, |
| "grad_norm": 0.07885803282260895, |
| "learning_rate": 2.8320041530495724e-07, |
| "loss": 0.0203, |
| "step": 4840 |
| }, |
| { |
| "epoch": 3.3038147138964575, |
| "grad_norm": 0.12531672418117523, |
| "learning_rate": 2.491411304842539e-07, |
| "loss": 0.0129, |
| "step": 4850 |
| }, |
| { |
| "epoch": 3.310626702997275, |
| "grad_norm": 0.17444051802158356, |
| "learning_rate": 2.1725810940094183e-07, |
| "loss": 0.012, |
| "step": 4860 |
| }, |
| { |
| "epoch": 3.3174386920980927, |
| "grad_norm": 0.14167378842830658, |
| "learning_rate": 1.8755274672164202e-07, |
| "loss": 0.0129, |
| "step": 4870 |
| }, |
| { |
| "epoch": 3.32425068119891, |
| "grad_norm": 0.12788553535938263, |
| "learning_rate": 1.600263418551573e-07, |
| "loss": 0.0148, |
| "step": 4880 |
| }, |
| { |
| "epoch": 3.3310626702997275, |
| "grad_norm": 0.2057434469461441, |
| "learning_rate": 1.346800988955954e-07, |
| "loss": 0.0154, |
| "step": 4890 |
| }, |
| { |
| "epoch": 3.337874659400545, |
| "grad_norm": 0.17330636084079742, |
| "learning_rate": 1.1151512656975005e-07, |
| "loss": 0.0116, |
| "step": 4900 |
| }, |
| { |
| "epoch": 3.344686648501362, |
| "grad_norm": 0.09420597553253174, |
| "learning_rate": 9.053243818853973e-08, |
| "loss": 0.0124, |
| "step": 4910 |
| }, |
| { |
| "epoch": 3.35149863760218, |
| "grad_norm": 0.15236696600914001, |
| "learning_rate": 7.173295160273763e-08, |
| "loss": 0.0124, |
| "step": 4920 |
| }, |
| { |
| "epoch": 3.3583106267029974, |
| "grad_norm": 0.15374703705310822, |
| "learning_rate": 5.511748916279258e-08, |
| "loss": 0.0132, |
| "step": 4930 |
| }, |
| { |
| "epoch": 3.3651226158038146, |
| "grad_norm": 0.273964524269104, |
| "learning_rate": 4.068677768285234e-08, |
| "loss": 0.0115, |
| "step": 4940 |
| }, |
| { |
| "epoch": 3.371934604904632, |
| "grad_norm": 0.11771193891763687, |
| "learning_rate": 2.844144840898344e-08, |
| "loss": 0.015, |
| "step": 4950 |
| }, |
| { |
| "epoch": 3.3787465940054497, |
| "grad_norm": 0.12935270369052887, |
| "learning_rate": 1.8382036991559936e-08, |
| "loss": 0.0112, |
| "step": 4960 |
| }, |
| { |
| "epoch": 3.385558583106267, |
| "grad_norm": 0.1538880318403244, |
| "learning_rate": 1.0508983461832156e-08, |
| "loss": 0.0166, |
| "step": 4970 |
| }, |
| { |
| "epoch": 3.3923705722070845, |
| "grad_norm": 0.10488380491733551, |
| "learning_rate": 4.822632212653222e-09, |
| "loss": 0.018, |
| "step": 4980 |
| }, |
| { |
| "epoch": 3.399182561307902, |
| "grad_norm": 0.18381240963935852, |
| "learning_rate": 1.3232319834632912e-09, |
| "loss": 0.0135, |
| "step": 4990 |
| }, |
| { |
| "epoch": 3.4059945504087192, |
| "grad_norm": 0.14481449127197266, |
| "learning_rate": 1.0935849353854721e-11, |
| "loss": 0.0137, |
| "step": 5000 |
| }, |
| { |
| "epoch": 3.412806539509537, |
| "grad_norm": 0.44966641068458557, |
| "learning_rate": 5.398064519110622e-05, |
| "loss": 0.0203, |
| "step": 5010 |
| }, |
| { |
| "epoch": 3.4196185286103544, |
| "grad_norm": 0.3312857747077942, |
| "learning_rate": 5.3815801579167394e-05, |
| "loss": 0.0175, |
| "step": 5020 |
| }, |
| { |
| "epoch": 3.4264305177111716, |
| "grad_norm": 0.5842679738998413, |
| "learning_rate": 5.365091623823382e-05, |
| "loss": 0.0213, |
| "step": 5030 |
| }, |
| { |
| "epoch": 3.433242506811989, |
| "grad_norm": 0.5707949995994568, |
| "learning_rate": 5.348599097146521e-05, |
| "loss": 0.0228, |
| "step": 5040 |
| }, |
| { |
| "epoch": 3.4400544959128068, |
| "grad_norm": 0.2389402836561203, |
| "learning_rate": 5.3321027582457836e-05, |
| "loss": 0.0237, |
| "step": 5050 |
| }, |
| { |
| "epoch": 3.446866485013624, |
| "grad_norm": 0.3142755329608917, |
| "learning_rate": 5.315602787522491e-05, |
| "loss": 0.0248, |
| "step": 5060 |
| }, |
| { |
| "epoch": 3.4536784741144415, |
| "grad_norm": 0.35478901863098145, |
| "learning_rate": 5.299099365417678e-05, |
| "loss": 0.0201, |
| "step": 5070 |
| }, |
| { |
| "epoch": 3.460490463215259, |
| "grad_norm": 0.4287269115447998, |
| "learning_rate": 5.2825926724101236e-05, |
| "loss": 0.026, |
| "step": 5080 |
| }, |
| { |
| "epoch": 3.4673024523160763, |
| "grad_norm": 0.5050956606864929, |
| "learning_rate": 5.26608288901438e-05, |
| "loss": 0.0295, |
| "step": 5090 |
| }, |
| { |
| "epoch": 3.474114441416894, |
| "grad_norm": 0.36942875385284424, |
| "learning_rate": 5.24957019577879e-05, |
| "loss": 0.0259, |
| "step": 5100 |
| }, |
| { |
| "epoch": 3.480926430517711, |
| "grad_norm": 0.40414538979530334, |
| "learning_rate": 5.2330547732835266e-05, |
| "loss": 0.0242, |
| "step": 5110 |
| }, |
| { |
| "epoch": 3.4877384196185286, |
| "grad_norm": 0.35221511125564575, |
| "learning_rate": 5.2165368021385996e-05, |
| "loss": 0.0304, |
| "step": 5120 |
| }, |
| { |
| "epoch": 3.494550408719346, |
| "grad_norm": 0.4094237685203552, |
| "learning_rate": 5.200016462981897e-05, |
| "loss": 0.0249, |
| "step": 5130 |
| }, |
| { |
| "epoch": 3.5013623978201633, |
| "grad_norm": 0.24707941710948944, |
| "learning_rate": 5.1834939364772015e-05, |
| "loss": 0.0219, |
| "step": 5140 |
| }, |
| { |
| "epoch": 3.508174386920981, |
| "grad_norm": 0.38713163137435913, |
| "learning_rate": 5.166969403312214e-05, |
| "loss": 0.0288, |
| "step": 5150 |
| }, |
| { |
| "epoch": 3.5149863760217985, |
| "grad_norm": 0.3290533721446991, |
| "learning_rate": 5.1504430441965844e-05, |
| "loss": 0.0262, |
| "step": 5160 |
| }, |
| { |
| "epoch": 3.5217983651226157, |
| "grad_norm": 0.3959462642669678, |
| "learning_rate": 5.133915039859923e-05, |
| "loss": 0.02, |
| "step": 5170 |
| }, |
| { |
| "epoch": 3.5286103542234333, |
| "grad_norm": 0.3446705937385559, |
| "learning_rate": 5.1173855710498444e-05, |
| "loss": 0.023, |
| "step": 5180 |
| }, |
| { |
| "epoch": 3.5354223433242504, |
| "grad_norm": 0.27180591225624084, |
| "learning_rate": 5.100854818529967e-05, |
| "loss": 0.0283, |
| "step": 5190 |
| }, |
| { |
| "epoch": 3.542234332425068, |
| "grad_norm": 0.39243829250335693, |
| "learning_rate": 5.084322963077951e-05, |
| "loss": 0.029, |
| "step": 5200 |
| }, |
| { |
| "epoch": 3.5490463215258856, |
| "grad_norm": 0.2588927149772644, |
| "learning_rate": 5.067790185483522e-05, |
| "loss": 0.0282, |
| "step": 5210 |
| }, |
| { |
| "epoch": 3.5558583106267028, |
| "grad_norm": 0.18376407027244568, |
| "learning_rate": 5.0512566665464844e-05, |
| "loss": 0.0272, |
| "step": 5220 |
| }, |
| { |
| "epoch": 3.5626702997275204, |
| "grad_norm": 0.29992175102233887, |
| "learning_rate": 5.034722587074755e-05, |
| "loss": 0.0257, |
| "step": 5230 |
| }, |
| { |
| "epoch": 3.569482288828338, |
| "grad_norm": 0.243015319108963, |
| "learning_rate": 5.018188127882375e-05, |
| "loss": 0.0229, |
| "step": 5240 |
| }, |
| { |
| "epoch": 3.576294277929155, |
| "grad_norm": 0.32886067032814026, |
| "learning_rate": 5.0016534697875417e-05, |
| "loss": 0.0211, |
| "step": 5250 |
| }, |
| { |
| "epoch": 3.5831062670299727, |
| "grad_norm": 0.4220637083053589, |
| "learning_rate": 4.9851187936106294e-05, |
| "loss": 0.0246, |
| "step": 5260 |
| }, |
| { |
| "epoch": 3.5899182561307903, |
| "grad_norm": 0.2974489629268646, |
| "learning_rate": 4.968584280172206e-05, |
| "loss": 0.0233, |
| "step": 5270 |
| }, |
| { |
| "epoch": 3.5967302452316074, |
| "grad_norm": 0.502668023109436, |
| "learning_rate": 4.95205011029106e-05, |
| "loss": 0.0285, |
| "step": 5280 |
| }, |
| { |
| "epoch": 3.603542234332425, |
| "grad_norm": 0.3639957904815674, |
| "learning_rate": 4.935516464782227e-05, |
| "loss": 0.0268, |
| "step": 5290 |
| }, |
| { |
| "epoch": 3.6103542234332426, |
| "grad_norm": 0.36707913875579834, |
| "learning_rate": 4.918983524455003e-05, |
| "loss": 0.0246, |
| "step": 5300 |
| }, |
| { |
| "epoch": 3.61716621253406, |
| "grad_norm": 0.22181017696857452, |
| "learning_rate": 4.9024514701109766e-05, |
| "loss": 0.0252, |
| "step": 5310 |
| }, |
| { |
| "epoch": 3.6239782016348774, |
| "grad_norm": 0.42766740918159485, |
| "learning_rate": 4.885920482542043e-05, |
| "loss": 0.0225, |
| "step": 5320 |
| }, |
| { |
| "epoch": 3.630790190735695, |
| "grad_norm": 0.26574602723121643, |
| "learning_rate": 4.869390742528438e-05, |
| "loss": 0.0208, |
| "step": 5330 |
| }, |
| { |
| "epoch": 3.637602179836512, |
| "grad_norm": 0.18494778871536255, |
| "learning_rate": 4.852862430836744e-05, |
| "loss": 0.0248, |
| "step": 5340 |
| }, |
| { |
| "epoch": 3.6444141689373297, |
| "grad_norm": 0.3686949610710144, |
| "learning_rate": 4.836335728217933e-05, |
| "loss": 0.0226, |
| "step": 5350 |
| }, |
| { |
| "epoch": 3.6512261580381473, |
| "grad_norm": 0.29411113262176514, |
| "learning_rate": 4.819810815405379e-05, |
| "loss": 0.0255, |
| "step": 5360 |
| }, |
| { |
| "epoch": 3.6580381471389645, |
| "grad_norm": 0.2379477620124817, |
| "learning_rate": 4.803287873112877e-05, |
| "loss": 0.0229, |
| "step": 5370 |
| }, |
| { |
| "epoch": 3.664850136239782, |
| "grad_norm": 0.3780541718006134, |
| "learning_rate": 4.786767082032681e-05, |
| "loss": 0.0234, |
| "step": 5380 |
| }, |
| { |
| "epoch": 3.6716621253405997, |
| "grad_norm": 0.24052190780639648, |
| "learning_rate": 4.77024862283351e-05, |
| "loss": 0.0229, |
| "step": 5390 |
| }, |
| { |
| "epoch": 3.678474114441417, |
| "grad_norm": 0.2713554799556732, |
| "learning_rate": 4.753732676158593e-05, |
| "loss": 0.0242, |
| "step": 5400 |
| }, |
| { |
| "epoch": 3.6852861035422344, |
| "grad_norm": 0.3661803603172302, |
| "learning_rate": 4.737219422623672e-05, |
| "loss": 0.0239, |
| "step": 5410 |
| }, |
| { |
| "epoch": 3.692098092643052, |
| "grad_norm": 0.4185531735420227, |
| "learning_rate": 4.720709042815044e-05, |
| "loss": 0.0204, |
| "step": 5420 |
| }, |
| { |
| "epoch": 3.698910081743869, |
| "grad_norm": 0.2620242238044739, |
| "learning_rate": 4.704201717287578e-05, |
| "loss": 0.0211, |
| "step": 5430 |
| }, |
| { |
| "epoch": 3.7057220708446867, |
| "grad_norm": 0.26090627908706665, |
| "learning_rate": 4.6876976265627404e-05, |
| "loss": 0.0224, |
| "step": 5440 |
| }, |
| { |
| "epoch": 3.7125340599455043, |
| "grad_norm": 0.2731458842754364, |
| "learning_rate": 4.671196951126626e-05, |
| "loss": 0.0269, |
| "step": 5450 |
| }, |
| { |
| "epoch": 3.7193460490463215, |
| "grad_norm": 0.31026485562324524, |
| "learning_rate": 4.654699871427971e-05, |
| "loss": 0.0218, |
| "step": 5460 |
| }, |
| { |
| "epoch": 3.726158038147139, |
| "grad_norm": 0.233415424823761, |
| "learning_rate": 4.6382065678762034e-05, |
| "loss": 0.0204, |
| "step": 5470 |
| }, |
| { |
| "epoch": 3.7329700272479567, |
| "grad_norm": 0.3344708979129791, |
| "learning_rate": 4.6217172208394424e-05, |
| "loss": 0.0197, |
| "step": 5480 |
| }, |
| { |
| "epoch": 3.739782016348774, |
| "grad_norm": 0.25975632667541504, |
| "learning_rate": 4.605232010642549e-05, |
| "loss": 0.0194, |
| "step": 5490 |
| }, |
| { |
| "epoch": 3.7465940054495914, |
| "grad_norm": 0.2950715720653534, |
| "learning_rate": 4.588751117565142e-05, |
| "loss": 0.0193, |
| "step": 5500 |
| }, |
| { |
| "epoch": 3.7534059945504086, |
| "grad_norm": 0.2784842252731323, |
| "learning_rate": 4.5722747218396214e-05, |
| "loss": 0.0251, |
| "step": 5510 |
| }, |
| { |
| "epoch": 3.760217983651226, |
| "grad_norm": 0.2176719456911087, |
| "learning_rate": 4.5558030036492194e-05, |
| "loss": 0.0204, |
| "step": 5520 |
| }, |
| { |
| "epoch": 3.7670299727520433, |
| "grad_norm": 0.28440573811531067, |
| "learning_rate": 4.539336143125999e-05, |
| "loss": 0.0265, |
| "step": 5530 |
| }, |
| { |
| "epoch": 3.773841961852861, |
| "grad_norm": 0.25604936480522156, |
| "learning_rate": 4.522874320348916e-05, |
| "loss": 0.0225, |
| "step": 5540 |
| }, |
| { |
| "epoch": 3.7806539509536785, |
| "grad_norm": 0.2565711438655853, |
| "learning_rate": 4.506417715341821e-05, |
| "loss": 0.019, |
| "step": 5550 |
| }, |
| { |
| "epoch": 3.7874659400544957, |
| "grad_norm": 0.2216968685388565, |
| "learning_rate": 4.489966508071511e-05, |
| "loss": 0.022, |
| "step": 5560 |
| }, |
| { |
| "epoch": 3.7942779291553133, |
| "grad_norm": 0.22490093111991882, |
| "learning_rate": 4.4735208784457575e-05, |
| "loss": 0.0197, |
| "step": 5570 |
| }, |
| { |
| "epoch": 3.801089918256131, |
| "grad_norm": 0.3565233647823334, |
| "learning_rate": 4.457081006311325e-05, |
| "loss": 0.0242, |
| "step": 5580 |
| }, |
| { |
| "epoch": 3.807901907356948, |
| "grad_norm": 0.270898699760437, |
| "learning_rate": 4.440647071452027e-05, |
| "loss": 0.0226, |
| "step": 5590 |
| }, |
| { |
| "epoch": 3.8147138964577656, |
| "grad_norm": 0.26380616426467896, |
| "learning_rate": 4.424219253586737e-05, |
| "loss": 0.0221, |
| "step": 5600 |
| }, |
| { |
| "epoch": 3.821525885558583, |
| "grad_norm": 0.3055083751678467, |
| "learning_rate": 4.407797732367443e-05, |
| "loss": 0.0251, |
| "step": 5610 |
| }, |
| { |
| "epoch": 3.8283378746594003, |
| "grad_norm": 0.2543126046657562, |
| "learning_rate": 4.391382687377268e-05, |
| "loss": 0.0248, |
| "step": 5620 |
| }, |
| { |
| "epoch": 3.835149863760218, |
| "grad_norm": 0.43203112483024597, |
| "learning_rate": 4.374974298128512e-05, |
| "loss": 0.0202, |
| "step": 5630 |
| }, |
| { |
| "epoch": 3.8419618528610355, |
| "grad_norm": 0.20501923561096191, |
| "learning_rate": 4.358572744060699e-05, |
| "loss": 0.0243, |
| "step": 5640 |
| }, |
| { |
| "epoch": 3.8487738419618527, |
| "grad_norm": 0.2543809413909912, |
| "learning_rate": 4.342178204538588e-05, |
| "loss": 0.0202, |
| "step": 5650 |
| }, |
| { |
| "epoch": 3.8555858310626703, |
| "grad_norm": 0.37627357244491577, |
| "learning_rate": 4.325790858850241e-05, |
| "loss": 0.0208, |
| "step": 5660 |
| }, |
| { |
| "epoch": 3.862397820163488, |
| "grad_norm": 0.19202812016010284, |
| "learning_rate": 4.309410886205043e-05, |
| "loss": 0.0228, |
| "step": 5670 |
| }, |
| { |
| "epoch": 3.869209809264305, |
| "grad_norm": 0.19026115536689758, |
| "learning_rate": 4.293038465731752e-05, |
| "loss": 0.0222, |
| "step": 5680 |
| }, |
| { |
| "epoch": 3.8760217983651226, |
| "grad_norm": 0.27928781509399414, |
| "learning_rate": 4.276673776476533e-05, |
| "loss": 0.0222, |
| "step": 5690 |
| }, |
| { |
| "epoch": 3.88283378746594, |
| "grad_norm": 0.30648189783096313, |
| "learning_rate": 4.260316997401007e-05, |
| "loss": 0.0186, |
| "step": 5700 |
| }, |
| { |
| "epoch": 3.8896457765667574, |
| "grad_norm": 0.2663455903530121, |
| "learning_rate": 4.243968307380293e-05, |
| "loss": 0.0237, |
| "step": 5710 |
| }, |
| { |
| "epoch": 3.896457765667575, |
| "grad_norm": 0.22592630982398987, |
| "learning_rate": 4.22762788520104e-05, |
| "loss": 0.0234, |
| "step": 5720 |
| }, |
| { |
| "epoch": 3.9032697547683926, |
| "grad_norm": 0.21950973570346832, |
| "learning_rate": 4.211295909559491e-05, |
| "loss": 0.0265, |
| "step": 5730 |
| }, |
| { |
| "epoch": 3.9100817438692097, |
| "grad_norm": 0.21050743758678436, |
| "learning_rate": 4.194972559059511e-05, |
| "loss": 0.0197, |
| "step": 5740 |
| }, |
| { |
| "epoch": 3.9168937329700273, |
| "grad_norm": 0.22975432872772217, |
| "learning_rate": 4.178658012210651e-05, |
| "loss": 0.0228, |
| "step": 5750 |
| }, |
| { |
| "epoch": 3.923705722070845, |
| "grad_norm": 0.349044531583786, |
| "learning_rate": 4.162352447426177e-05, |
| "loss": 0.0207, |
| "step": 5760 |
| }, |
| { |
| "epoch": 3.930517711171662, |
| "grad_norm": 0.22395232319831848, |
| "learning_rate": 4.146056043021135e-05, |
| "loss": 0.0203, |
| "step": 5770 |
| }, |
| { |
| "epoch": 3.9373297002724796, |
| "grad_norm": 0.24076318740844727, |
| "learning_rate": 4.1297689772103944e-05, |
| "loss": 0.0218, |
| "step": 5780 |
| }, |
| { |
| "epoch": 3.9441416893732972, |
| "grad_norm": 0.311708003282547, |
| "learning_rate": 4.113491428106694e-05, |
| "loss": 0.0247, |
| "step": 5790 |
| }, |
| { |
| "epoch": 3.9509536784741144, |
| "grad_norm": 0.265595406293869, |
| "learning_rate": 4.0972235737187055e-05, |
| "loss": 0.0181, |
| "step": 5800 |
| }, |
| { |
| "epoch": 3.957765667574932, |
| "grad_norm": 0.3528865873813629, |
| "learning_rate": 4.080965591949076e-05, |
| "loss": 0.0194, |
| "step": 5810 |
| }, |
| { |
| "epoch": 3.9645776566757496, |
| "grad_norm": 0.3113243877887726, |
| "learning_rate": 4.0647176605924924e-05, |
| "loss": 0.0225, |
| "step": 5820 |
| }, |
| { |
| "epoch": 3.9713896457765667, |
| "grad_norm": 0.3198659420013428, |
| "learning_rate": 4.0484799573337255e-05, |
| "loss": 0.0256, |
| "step": 5830 |
| }, |
| { |
| "epoch": 3.9782016348773843, |
| "grad_norm": 0.22167012095451355, |
| "learning_rate": 4.032252659745699e-05, |
| "loss": 0.0226, |
| "step": 5840 |
| }, |
| { |
| "epoch": 3.9850136239782015, |
| "grad_norm": 0.22256286442279816, |
| "learning_rate": 4.016035945287539e-05, |
| "loss": 0.0278, |
| "step": 5850 |
| }, |
| { |
| "epoch": 3.991825613079019, |
| "grad_norm": 0.2504684329032898, |
| "learning_rate": 3.999829991302635e-05, |
| "loss": 0.0245, |
| "step": 5860 |
| }, |
| { |
| "epoch": 3.9986376021798367, |
| "grad_norm": 0.2460675686597824, |
| "learning_rate": 3.983634975016707e-05, |
| "loss": 0.0214, |
| "step": 5870 |
| }, |
| { |
| "epoch": 4.005449591280654, |
| "grad_norm": 0.28262001276016235, |
| "learning_rate": 3.967451073535854e-05, |
| "loss": 0.0256, |
| "step": 5880 |
| }, |
| { |
| "epoch": 4.012261580381471, |
| "grad_norm": 0.3468887507915497, |
| "learning_rate": 3.951278463844633e-05, |
| "loss": 0.0251, |
| "step": 5890 |
| }, |
| { |
| "epoch": 4.0190735694822886, |
| "grad_norm": 0.3931543231010437, |
| "learning_rate": 3.935117322804111e-05, |
| "loss": 0.0234, |
| "step": 5900 |
| }, |
| { |
| "epoch": 4.025885558583107, |
| "grad_norm": 0.35787367820739746, |
| "learning_rate": 3.918967827149938e-05, |
| "loss": 0.0175, |
| "step": 5910 |
| }, |
| { |
| "epoch": 4.032697547683924, |
| "grad_norm": 0.24113652110099792, |
| "learning_rate": 3.9028301534904094e-05, |
| "loss": 0.0222, |
| "step": 5920 |
| }, |
| { |
| "epoch": 4.039509536784741, |
| "grad_norm": 0.265298068523407, |
| "learning_rate": 3.88670447830454e-05, |
| "loss": 0.0218, |
| "step": 5930 |
| }, |
| { |
| "epoch": 4.046321525885559, |
| "grad_norm": 0.3670673072338104, |
| "learning_rate": 3.870590977940132e-05, |
| "loss": 0.0195, |
| "step": 5940 |
| }, |
| { |
| "epoch": 4.053133514986376, |
| "grad_norm": 0.20872969925403595, |
| "learning_rate": 3.8544898286118404e-05, |
| "loss": 0.0181, |
| "step": 5950 |
| }, |
| { |
| "epoch": 4.059945504087193, |
| "grad_norm": 0.2174217849969864, |
| "learning_rate": 3.838401206399257e-05, |
| "loss": 0.0189, |
| "step": 5960 |
| }, |
| { |
| "epoch": 4.066757493188011, |
| "grad_norm": 0.25039082765579224, |
| "learning_rate": 3.822325287244975e-05, |
| "loss": 0.0203, |
| "step": 5970 |
| }, |
| { |
| "epoch": 4.073569482288828, |
| "grad_norm": 0.2662447690963745, |
| "learning_rate": 3.8062622469526725e-05, |
| "loss": 0.0195, |
| "step": 5980 |
| }, |
| { |
| "epoch": 4.080381471389646, |
| "grad_norm": 0.2717086374759674, |
| "learning_rate": 3.790212261185183e-05, |
| "loss": 0.0186, |
| "step": 5990 |
| }, |
| { |
| "epoch": 4.087193460490464, |
| "grad_norm": 0.2525738477706909, |
| "learning_rate": 3.7741755054625794e-05, |
| "loss": 0.0229, |
| "step": 6000 |
| }, |
| { |
| "epoch": 4.094005449591281, |
| "grad_norm": 0.20453284680843353, |
| "learning_rate": 3.758152155160255e-05, |
| "loss": 0.0178, |
| "step": 6010 |
| }, |
| { |
| "epoch": 4.100817438692098, |
| "grad_norm": 0.28254011273384094, |
| "learning_rate": 3.742142385506999e-05, |
| "loss": 0.0171, |
| "step": 6020 |
| }, |
| { |
| "epoch": 4.107629427792916, |
| "grad_norm": 0.4284875690937042, |
| "learning_rate": 3.72614637158309e-05, |
| "loss": 0.0196, |
| "step": 6030 |
| }, |
| { |
| "epoch": 4.114441416893733, |
| "grad_norm": 0.2086813747882843, |
| "learning_rate": 3.710164288318371e-05, |
| "loss": 0.0194, |
| "step": 6040 |
| }, |
| { |
| "epoch": 4.12125340599455, |
| "grad_norm": 0.20289340615272522, |
| "learning_rate": 3.694196310490345e-05, |
| "loss": 0.0152, |
| "step": 6050 |
| }, |
| { |
| "epoch": 4.128065395095367, |
| "grad_norm": 0.34958550333976746, |
| "learning_rate": 3.678242612722259e-05, |
| "loss": 0.0209, |
| "step": 6060 |
| }, |
| { |
| "epoch": 4.1348773841961854, |
| "grad_norm": 0.2462022453546524, |
| "learning_rate": 3.6623033694811953e-05, |
| "loss": 0.0186, |
| "step": 6070 |
| }, |
| { |
| "epoch": 4.141689373297003, |
| "grad_norm": 0.15042909979820251, |
| "learning_rate": 3.6463787550761665e-05, |
| "loss": 0.0198, |
| "step": 6080 |
| }, |
| { |
| "epoch": 4.14850136239782, |
| "grad_norm": 0.2676561176776886, |
| "learning_rate": 3.630468943656202e-05, |
| "loss": 0.0181, |
| "step": 6090 |
| }, |
| { |
| "epoch": 4.155313351498638, |
| "grad_norm": 0.2557305097579956, |
| "learning_rate": 3.6145741092084523e-05, |
| "loss": 0.0168, |
| "step": 6100 |
| }, |
| { |
| "epoch": 4.162125340599455, |
| "grad_norm": 0.28285613656044006, |
| "learning_rate": 3.598694425556278e-05, |
| "loss": 0.0174, |
| "step": 6110 |
| }, |
| { |
| "epoch": 4.168937329700272, |
| "grad_norm": 0.19794082641601562, |
| "learning_rate": 3.58283006635736e-05, |
| "loss": 0.0187, |
| "step": 6120 |
| }, |
| { |
| "epoch": 4.17574931880109, |
| "grad_norm": 0.3199867308139801, |
| "learning_rate": 3.566981205101781e-05, |
| "loss": 0.0188, |
| "step": 6130 |
| }, |
| { |
| "epoch": 4.182561307901907, |
| "grad_norm": 0.21557827293872833, |
| "learning_rate": 3.5511480151101556e-05, |
| "loss": 0.0179, |
| "step": 6140 |
| }, |
| { |
| "epoch": 4.189373297002724, |
| "grad_norm": 0.22134508192539215, |
| "learning_rate": 3.5353306695317104e-05, |
| "loss": 0.0148, |
| "step": 6150 |
| }, |
| { |
| "epoch": 4.1961852861035425, |
| "grad_norm": 0.2104470133781433, |
| "learning_rate": 3.519529341342402e-05, |
| "loss": 0.0193, |
| "step": 6160 |
| }, |
| { |
| "epoch": 4.20299727520436, |
| "grad_norm": 0.2785221040248871, |
| "learning_rate": 3.503744203343026e-05, |
| "loss": 0.0182, |
| "step": 6170 |
| }, |
| { |
| "epoch": 4.209809264305177, |
| "grad_norm": 0.27562573552131653, |
| "learning_rate": 3.487975428157318e-05, |
| "loss": 0.0197, |
| "step": 6180 |
| }, |
| { |
| "epoch": 4.216621253405995, |
| "grad_norm": 0.35668033361434937, |
| "learning_rate": 3.472223188230083e-05, |
| "loss": 0.0196, |
| "step": 6190 |
| }, |
| { |
| "epoch": 4.223433242506812, |
| "grad_norm": 0.2609441578388214, |
| "learning_rate": 3.4564876558252866e-05, |
| "loss": 0.0218, |
| "step": 6200 |
| }, |
| { |
| "epoch": 4.230245231607629, |
| "grad_norm": 0.28609734773635864, |
| "learning_rate": 3.440769003024195e-05, |
| "loss": 0.0169, |
| "step": 6210 |
| }, |
| { |
| "epoch": 4.237057220708447, |
| "grad_norm": 0.18339040875434875, |
| "learning_rate": 3.425067401723477e-05, |
| "loss": 0.0186, |
| "step": 6220 |
| }, |
| { |
| "epoch": 4.243869209809264, |
| "grad_norm": 0.138119637966156, |
| "learning_rate": 3.409383023633325e-05, |
| "loss": 0.0177, |
| "step": 6230 |
| }, |
| { |
| "epoch": 4.2506811989100814, |
| "grad_norm": 0.22962254285812378, |
| "learning_rate": 3.3937160402755894e-05, |
| "loss": 0.0172, |
| "step": 6240 |
| }, |
| { |
| "epoch": 4.2574931880108995, |
| "grad_norm": 0.2682797908782959, |
| "learning_rate": 3.378066622981885e-05, |
| "loss": 0.0189, |
| "step": 6250 |
| }, |
| { |
| "epoch": 4.264305177111717, |
| "grad_norm": 0.20227015018463135, |
| "learning_rate": 3.362434942891738e-05, |
| "loss": 0.022, |
| "step": 6260 |
| }, |
| { |
| "epoch": 4.271117166212534, |
| "grad_norm": 0.2518126368522644, |
| "learning_rate": 3.346821170950693e-05, |
| "loss": 0.0195, |
| "step": 6270 |
| }, |
| { |
| "epoch": 4.277929155313352, |
| "grad_norm": 0.2634272575378418, |
| "learning_rate": 3.3312254779084585e-05, |
| "loss": 0.0188, |
| "step": 6280 |
| }, |
| { |
| "epoch": 4.284741144414169, |
| "grad_norm": 0.25564995408058167, |
| "learning_rate": 3.315648034317039e-05, |
| "loss": 0.0215, |
| "step": 6290 |
| }, |
| { |
| "epoch": 4.291553133514986, |
| "grad_norm": 0.31350597739219666, |
| "learning_rate": 3.3000890105288564e-05, |
| "loss": 0.0181, |
| "step": 6300 |
| }, |
| { |
| "epoch": 4.298365122615804, |
| "grad_norm": 0.23671625554561615, |
| "learning_rate": 3.284548576694908e-05, |
| "loss": 0.0176, |
| "step": 6310 |
| }, |
| { |
| "epoch": 4.305177111716621, |
| "grad_norm": 0.2342391163110733, |
| "learning_rate": 3.2690269027628815e-05, |
| "loss": 0.0156, |
| "step": 6320 |
| }, |
| { |
| "epoch": 4.3119891008174385, |
| "grad_norm": 0.24075733125209808, |
| "learning_rate": 3.253524158475324e-05, |
| "loss": 0.0187, |
| "step": 6330 |
| }, |
| { |
| "epoch": 4.3188010899182565, |
| "grad_norm": 0.16117766499519348, |
| "learning_rate": 3.238040513367757e-05, |
| "loss": 0.019, |
| "step": 6340 |
| }, |
| { |
| "epoch": 4.325613079019074, |
| "grad_norm": 0.16016744077205658, |
| "learning_rate": 3.222576136766843e-05, |
| "loss": 0.0146, |
| "step": 6350 |
| }, |
| { |
| "epoch": 4.332425068119891, |
| "grad_norm": 0.236736461520195, |
| "learning_rate": 3.2071311977885324e-05, |
| "loss": 0.018, |
| "step": 6360 |
| }, |
| { |
| "epoch": 4.339237057220709, |
| "grad_norm": 0.182217076420784, |
| "learning_rate": 3.191705865336197e-05, |
| "loss": 0.0171, |
| "step": 6370 |
| }, |
| { |
| "epoch": 4.346049046321526, |
| "grad_norm": 0.19513262808322906, |
| "learning_rate": 3.1763003080988075e-05, |
| "loss": 0.0155, |
| "step": 6380 |
| }, |
| { |
| "epoch": 4.352861035422343, |
| "grad_norm": 0.19296902418136597, |
| "learning_rate": 3.160914694549063e-05, |
| "loss": 0.0191, |
| "step": 6390 |
| }, |
| { |
| "epoch": 4.359673024523161, |
| "grad_norm": 0.2901662588119507, |
| "learning_rate": 3.145549192941573e-05, |
| "loss": 0.0174, |
| "step": 6400 |
| }, |
| { |
| "epoch": 4.366485013623978, |
| "grad_norm": 0.21007436513900757, |
| "learning_rate": 3.130203971310999e-05, |
| "loss": 0.0194, |
| "step": 6410 |
| }, |
| { |
| "epoch": 4.3732970027247955, |
| "grad_norm": 0.19525951147079468, |
| "learning_rate": 3.114879197470225e-05, |
| "loss": 0.0181, |
| "step": 6420 |
| }, |
| { |
| "epoch": 4.3801089918256135, |
| "grad_norm": 0.22157281637191772, |
| "learning_rate": 3.0995750390085285e-05, |
| "loss": 0.0165, |
| "step": 6430 |
| }, |
| { |
| "epoch": 4.386920980926431, |
| "grad_norm": 0.2652972340583801, |
| "learning_rate": 3.084291663289728e-05, |
| "loss": 0.0171, |
| "step": 6440 |
| }, |
| { |
| "epoch": 4.393732970027248, |
| "grad_norm": 0.24139529466629028, |
| "learning_rate": 3.069029237450375e-05, |
| "loss": 0.0143, |
| "step": 6450 |
| }, |
| { |
| "epoch": 4.400544959128065, |
| "grad_norm": 0.17755670845508575, |
| "learning_rate": 3.053787928397911e-05, |
| "loss": 0.0145, |
| "step": 6460 |
| }, |
| { |
| "epoch": 4.407356948228883, |
| "grad_norm": 0.33576318621635437, |
| "learning_rate": 3.0385679028088526e-05, |
| "loss": 0.0179, |
| "step": 6470 |
| }, |
| { |
| "epoch": 4.4141689373297, |
| "grad_norm": 0.12500669062137604, |
| "learning_rate": 3.023369327126959e-05, |
| "loss": 0.0147, |
| "step": 6480 |
| }, |
| { |
| "epoch": 4.420980926430517, |
| "grad_norm": 0.17751501500606537, |
| "learning_rate": 3.0081923675614198e-05, |
| "loss": 0.016, |
| "step": 6490 |
| }, |
| { |
| "epoch": 4.427792915531335, |
| "grad_norm": 0.22551394999027252, |
| "learning_rate": 2.993037190085034e-05, |
| "loss": 0.0157, |
| "step": 6500 |
| }, |
| { |
| "epoch": 4.4346049046321525, |
| "grad_norm": 0.22585496306419373, |
| "learning_rate": 2.977903960432392e-05, |
| "loss": 0.0168, |
| "step": 6510 |
| }, |
| { |
| "epoch": 4.44141689373297, |
| "grad_norm": 0.21578261256217957, |
| "learning_rate": 2.9627928440980722e-05, |
| "loss": 0.0187, |
| "step": 6520 |
| }, |
| { |
| "epoch": 4.448228882833788, |
| "grad_norm": 0.22021692991256714, |
| "learning_rate": 2.9477040063348183e-05, |
| "loss": 0.0188, |
| "step": 6530 |
| }, |
| { |
| "epoch": 4.455040871934605, |
| "grad_norm": 0.28897762298583984, |
| "learning_rate": 2.9326376121517456e-05, |
| "loss": 0.0165, |
| "step": 6540 |
| }, |
| { |
| "epoch": 4.461852861035422, |
| "grad_norm": 0.25159355998039246, |
| "learning_rate": 2.9175938263125236e-05, |
| "loss": 0.0152, |
| "step": 6550 |
| }, |
| { |
| "epoch": 4.46866485013624, |
| "grad_norm": 0.23536550998687744, |
| "learning_rate": 2.9025728133335873e-05, |
| "loss": 0.0203, |
| "step": 6560 |
| }, |
| { |
| "epoch": 4.475476839237057, |
| "grad_norm": 0.2687535583972931, |
| "learning_rate": 2.8875747374823288e-05, |
| "loss": 0.0217, |
| "step": 6570 |
| }, |
| { |
| "epoch": 4.482288828337874, |
| "grad_norm": 0.28790390491485596, |
| "learning_rate": 2.872599762775298e-05, |
| "loss": 0.0228, |
| "step": 6580 |
| }, |
| { |
| "epoch": 4.489100817438692, |
| "grad_norm": 0.20599423348903656, |
| "learning_rate": 2.857648052976425e-05, |
| "loss": 0.0154, |
| "step": 6590 |
| }, |
| { |
| "epoch": 4.4959128065395095, |
| "grad_norm": 0.25862014293670654, |
| "learning_rate": 2.8427197715952047e-05, |
| "loss": 0.0169, |
| "step": 6600 |
| }, |
| { |
| "epoch": 4.502724795640327, |
| "grad_norm": 0.29005661606788635, |
| "learning_rate": 2.8278150818849393e-05, |
| "loss": 0.0176, |
| "step": 6610 |
| }, |
| { |
| "epoch": 4.509536784741145, |
| "grad_norm": 0.20444929599761963, |
| "learning_rate": 2.812934146840922e-05, |
| "loss": 0.0168, |
| "step": 6620 |
| }, |
| { |
| "epoch": 4.516348773841962, |
| "grad_norm": 0.16426856815814972, |
| "learning_rate": 2.7980771291986764e-05, |
| "loss": 0.0183, |
| "step": 6630 |
| }, |
| { |
| "epoch": 4.523160762942779, |
| "grad_norm": 0.2749025225639343, |
| "learning_rate": 2.783244191432167e-05, |
| "loss": 0.0175, |
| "step": 6640 |
| }, |
| { |
| "epoch": 4.529972752043597, |
| "grad_norm": 0.28311431407928467, |
| "learning_rate": 2.768435495752022e-05, |
| "loss": 0.0151, |
| "step": 6650 |
| }, |
| { |
| "epoch": 4.536784741144414, |
| "grad_norm": 0.24218498170375824, |
| "learning_rate": 2.753651204103771e-05, |
| "loss": 0.0175, |
| "step": 6660 |
| }, |
| { |
| "epoch": 4.543596730245231, |
| "grad_norm": 0.214820995926857, |
| "learning_rate": 2.7388914781660523e-05, |
| "loss": 0.0138, |
| "step": 6670 |
| }, |
| { |
| "epoch": 4.550408719346049, |
| "grad_norm": 0.2261001467704773, |
| "learning_rate": 2.7241564793488693e-05, |
| "loss": 0.0183, |
| "step": 6680 |
| }, |
| { |
| "epoch": 4.5572207084468666, |
| "grad_norm": 0.21669824421405792, |
| "learning_rate": 2.7094463687918037e-05, |
| "loss": 0.0161, |
| "step": 6690 |
| }, |
| { |
| "epoch": 4.564032697547684, |
| "grad_norm": 0.25731489062309265, |
| "learning_rate": 2.694761307362268e-05, |
| "loss": 0.0149, |
| "step": 6700 |
| }, |
| { |
| "epoch": 4.570844686648502, |
| "grad_norm": 0.29376113414764404, |
| "learning_rate": 2.6801014556537467e-05, |
| "loss": 0.0179, |
| "step": 6710 |
| }, |
| { |
| "epoch": 4.577656675749319, |
| "grad_norm": 0.2186402678489685, |
| "learning_rate": 2.6654669739840243e-05, |
| "loss": 0.0191, |
| "step": 6720 |
| }, |
| { |
| "epoch": 4.584468664850136, |
| "grad_norm": 0.21597842872142792, |
| "learning_rate": 2.650858022393451e-05, |
| "loss": 0.019, |
| "step": 6730 |
| }, |
| { |
| "epoch": 4.591280653950953, |
| "grad_norm": 0.20672723650932312, |
| "learning_rate": 2.6362747606431747e-05, |
| "loss": 0.0173, |
| "step": 6740 |
| }, |
| { |
| "epoch": 4.598092643051771, |
| "grad_norm": 0.16333183646202087, |
| "learning_rate": 2.6217173482134172e-05, |
| "loss": 0.0149, |
| "step": 6750 |
| }, |
| { |
| "epoch": 4.604904632152588, |
| "grad_norm": 0.3173683285713196, |
| "learning_rate": 2.6071859443017044e-05, |
| "loss": 0.0136, |
| "step": 6760 |
| }, |
| { |
| "epoch": 4.6117166212534055, |
| "grad_norm": 0.331967294216156, |
| "learning_rate": 2.5926807078211414e-05, |
| "loss": 0.0147, |
| "step": 6770 |
| }, |
| { |
| "epoch": 4.618528610354224, |
| "grad_norm": 0.26017463207244873, |
| "learning_rate": 2.5782017973986728e-05, |
| "loss": 0.0151, |
| "step": 6780 |
| }, |
| { |
| "epoch": 4.625340599455041, |
| "grad_norm": 0.17480212450027466, |
| "learning_rate": 2.5637493713733374e-05, |
| "loss": 0.0171, |
| "step": 6790 |
| }, |
| { |
| "epoch": 4.632152588555858, |
| "grad_norm": 0.20509187877178192, |
| "learning_rate": 2.549323587794559e-05, |
| "loss": 0.0203, |
| "step": 6800 |
| }, |
| { |
| "epoch": 4.638964577656676, |
| "grad_norm": 0.203098326921463, |
| "learning_rate": 2.5349246044203895e-05, |
| "loss": 0.0144, |
| "step": 6810 |
| }, |
| { |
| "epoch": 4.645776566757493, |
| "grad_norm": 0.25146251916885376, |
| "learning_rate": 2.520552578715808e-05, |
| "loss": 0.0159, |
| "step": 6820 |
| }, |
| { |
| "epoch": 4.65258855585831, |
| "grad_norm": 0.2880435585975647, |
| "learning_rate": 2.506207667850981e-05, |
| "loss": 0.0154, |
| "step": 6830 |
| }, |
| { |
| "epoch": 4.659400544959128, |
| "grad_norm": 0.1960860937833786, |
| "learning_rate": 2.4918900286995555e-05, |
| "loss": 0.0155, |
| "step": 6840 |
| }, |
| { |
| "epoch": 4.666212534059945, |
| "grad_norm": 0.1823454052209854, |
| "learning_rate": 2.4775998178369458e-05, |
| "loss": 0.0145, |
| "step": 6850 |
| }, |
| { |
| "epoch": 4.6730245231607626, |
| "grad_norm": 0.2692583203315735, |
| "learning_rate": 2.4633371915386017e-05, |
| "loss": 0.0161, |
| "step": 6860 |
| }, |
| { |
| "epoch": 4.679836512261581, |
| "grad_norm": 0.30107152462005615, |
| "learning_rate": 2.4491023057783235e-05, |
| "loss": 0.0184, |
| "step": 6870 |
| }, |
| { |
| "epoch": 4.686648501362398, |
| "grad_norm": 0.19429023563861847, |
| "learning_rate": 2.4348953162265375e-05, |
| "loss": 0.0179, |
| "step": 6880 |
| }, |
| { |
| "epoch": 4.693460490463215, |
| "grad_norm": 0.18987010419368744, |
| "learning_rate": 2.420716378248607e-05, |
| "loss": 0.0191, |
| "step": 6890 |
| }, |
| { |
| "epoch": 4.700272479564033, |
| "grad_norm": 0.21912752091884613, |
| "learning_rate": 2.4065656469031266e-05, |
| "loss": 0.0136, |
| "step": 6900 |
| }, |
| { |
| "epoch": 4.70708446866485, |
| "grad_norm": 0.17700830101966858, |
| "learning_rate": 2.3924432769402268e-05, |
| "loss": 0.0167, |
| "step": 6910 |
| }, |
| { |
| "epoch": 4.713896457765667, |
| "grad_norm": 0.14365394413471222, |
| "learning_rate": 2.3783494227998844e-05, |
| "loss": 0.0203, |
| "step": 6920 |
| }, |
| { |
| "epoch": 4.720708446866485, |
| "grad_norm": 0.2490224689245224, |
| "learning_rate": 2.3642842386102264e-05, |
| "loss": 0.0163, |
| "step": 6930 |
| }, |
| { |
| "epoch": 4.727520435967302, |
| "grad_norm": 0.3222252428531647, |
| "learning_rate": 2.3502478781858567e-05, |
| "loss": 0.0133, |
| "step": 6940 |
| }, |
| { |
| "epoch": 4.73433242506812, |
| "grad_norm": 0.2206520438194275, |
| "learning_rate": 2.3362404950261628e-05, |
| "loss": 0.0164, |
| "step": 6950 |
| }, |
| { |
| "epoch": 4.741144414168938, |
| "grad_norm": 0.21772713959217072, |
| "learning_rate": 2.3222622423136458e-05, |
| "loss": 0.0148, |
| "step": 6960 |
| }, |
| { |
| "epoch": 4.747956403269755, |
| "grad_norm": 0.18722061812877655, |
| "learning_rate": 2.3083132729122332e-05, |
| "loss": 0.014, |
| "step": 6970 |
| }, |
| { |
| "epoch": 4.754768392370572, |
| "grad_norm": 0.3535923659801483, |
| "learning_rate": 2.294393739365621e-05, |
| "loss": 0.0211, |
| "step": 6980 |
| }, |
| { |
| "epoch": 4.76158038147139, |
| "grad_norm": 0.1893048882484436, |
| "learning_rate": 2.2805037938956e-05, |
| "loss": 0.0167, |
| "step": 6990 |
| }, |
| { |
| "epoch": 4.768392370572207, |
| "grad_norm": 0.23466837406158447, |
| "learning_rate": 2.266643588400386e-05, |
| "loss": 0.0169, |
| "step": 7000 |
| }, |
| { |
| "epoch": 4.775204359673024, |
| "grad_norm": 0.1818532645702362, |
| "learning_rate": 2.252813274452969e-05, |
| "loss": 0.0174, |
| "step": 7010 |
| }, |
| { |
| "epoch": 4.782016348773842, |
| "grad_norm": 0.24044625461101532, |
| "learning_rate": 2.2390130032994427e-05, |
| "loss": 0.0146, |
| "step": 7020 |
| }, |
| { |
| "epoch": 4.7888283378746594, |
| "grad_norm": 0.19146227836608887, |
| "learning_rate": 2.2252429258573633e-05, |
| "loss": 0.0163, |
| "step": 7030 |
| }, |
| { |
| "epoch": 4.795640326975477, |
| "grad_norm": 0.2928459644317627, |
| "learning_rate": 2.2115031927140904e-05, |
| "loss": 0.0159, |
| "step": 7040 |
| }, |
| { |
| "epoch": 4.802452316076295, |
| "grad_norm": 0.26016002893447876, |
| "learning_rate": 2.1977939541251463e-05, |
| "loss": 0.0182, |
| "step": 7050 |
| }, |
| { |
| "epoch": 4.809264305177112, |
| "grad_norm": 0.2691255509853363, |
| "learning_rate": 2.1841153600125684e-05, |
| "loss": 0.0158, |
| "step": 7060 |
| }, |
| { |
| "epoch": 4.816076294277929, |
| "grad_norm": 0.21671241521835327, |
| "learning_rate": 2.170467559963267e-05, |
| "loss": 0.0167, |
| "step": 7070 |
| }, |
| { |
| "epoch": 4.822888283378747, |
| "grad_norm": 0.2578423023223877, |
| "learning_rate": 2.1568507032273982e-05, |
| "loss": 0.0131, |
| "step": 7080 |
| }, |
| { |
| "epoch": 4.829700272479564, |
| "grad_norm": 0.22187665104866028, |
| "learning_rate": 2.1432649387167264e-05, |
| "loss": 0.0147, |
| "step": 7090 |
| }, |
| { |
| "epoch": 4.836512261580381, |
| "grad_norm": 0.16120664775371552, |
| "learning_rate": 2.1297104150029973e-05, |
| "loss": 0.0146, |
| "step": 7100 |
| }, |
| { |
| "epoch": 4.843324250681199, |
| "grad_norm": 0.20281171798706055, |
| "learning_rate": 2.116187280316307e-05, |
| "loss": 0.0163, |
| "step": 7110 |
| }, |
| { |
| "epoch": 4.8501362397820165, |
| "grad_norm": 0.30870872735977173, |
| "learning_rate": 2.1026956825434908e-05, |
| "loss": 0.0137, |
| "step": 7120 |
| }, |
| { |
| "epoch": 4.856948228882834, |
| "grad_norm": 0.16448527574539185, |
| "learning_rate": 2.0892357692265017e-05, |
| "loss": 0.0135, |
| "step": 7130 |
| }, |
| { |
| "epoch": 4.863760217983652, |
| "grad_norm": 0.229940727353096, |
| "learning_rate": 2.0758076875607947e-05, |
| "loss": 0.0159, |
| "step": 7140 |
| }, |
| { |
| "epoch": 4.870572207084469, |
| "grad_norm": 0.1661119908094406, |
| "learning_rate": 2.0624115843937207e-05, |
| "loss": 0.0171, |
| "step": 7150 |
| }, |
| { |
| "epoch": 4.877384196185286, |
| "grad_norm": 0.17345386743545532, |
| "learning_rate": 2.0490476062229157e-05, |
| "loss": 0.0156, |
| "step": 7160 |
| }, |
| { |
| "epoch": 4.884196185286104, |
| "grad_norm": 0.30998191237449646, |
| "learning_rate": 2.035715899194704e-05, |
| "loss": 0.0151, |
| "step": 7170 |
| }, |
| { |
| "epoch": 4.891008174386921, |
| "grad_norm": 0.16312265396118164, |
| "learning_rate": 2.022416609102499e-05, |
| "loss": 0.0146, |
| "step": 7180 |
| }, |
| { |
| "epoch": 4.897820163487738, |
| "grad_norm": 0.19796396791934967, |
| "learning_rate": 2.009149881385205e-05, |
| "loss": 0.0197, |
| "step": 7190 |
| }, |
| { |
| "epoch": 4.904632152588556, |
| "grad_norm": 0.27385029196739197, |
| "learning_rate": 1.995915861125634e-05, |
| "loss": 0.0143, |
| "step": 7200 |
| }, |
| { |
| "epoch": 4.9114441416893735, |
| "grad_norm": 0.13566231727600098, |
| "learning_rate": 1.9827146930489065e-05, |
| "loss": 0.0131, |
| "step": 7210 |
| }, |
| { |
| "epoch": 4.918256130790191, |
| "grad_norm": 0.26954782009124756, |
| "learning_rate": 1.9695465215208848e-05, |
| "loss": 0.018, |
| "step": 7220 |
| }, |
| { |
| "epoch": 4.925068119891008, |
| "grad_norm": 0.20488935708999634, |
| "learning_rate": 1.9564114905465813e-05, |
| "loss": 0.0139, |
| "step": 7230 |
| }, |
| { |
| "epoch": 4.931880108991826, |
| "grad_norm": 0.25250253081321716, |
| "learning_rate": 1.9433097437685936e-05, |
| "loss": 0.014, |
| "step": 7240 |
| }, |
| { |
| "epoch": 4.938692098092643, |
| "grad_norm": 0.22722044587135315, |
| "learning_rate": 1.930241424465521e-05, |
| "loss": 0.0129, |
| "step": 7250 |
| }, |
| { |
| "epoch": 4.94550408719346, |
| "grad_norm": 0.12395540624856949, |
| "learning_rate": 1.9172066755504115e-05, |
| "loss": 0.0117, |
| "step": 7260 |
| }, |
| { |
| "epoch": 4.952316076294278, |
| "grad_norm": 0.1848660111427307, |
| "learning_rate": 1.9042056395691914e-05, |
| "loss": 0.0153, |
| "step": 7270 |
| }, |
| { |
| "epoch": 4.959128065395095, |
| "grad_norm": 0.1646895408630371, |
| "learning_rate": 1.8912384586991066e-05, |
| "loss": 0.0127, |
| "step": 7280 |
| }, |
| { |
| "epoch": 4.9659400544959125, |
| "grad_norm": 0.2536143958568573, |
| "learning_rate": 1.8783052747471717e-05, |
| "loss": 0.0145, |
| "step": 7290 |
| }, |
| { |
| "epoch": 4.9727520435967305, |
| "grad_norm": 0.17167410254478455, |
| "learning_rate": 1.865406229148611e-05, |
| "loss": 0.0138, |
| "step": 7300 |
| }, |
| { |
| "epoch": 4.979564032697548, |
| "grad_norm": 0.24148517847061157, |
| "learning_rate": 1.8525414629653233e-05, |
| "loss": 0.016, |
| "step": 7310 |
| }, |
| { |
| "epoch": 4.986376021798365, |
| "grad_norm": 0.2849847674369812, |
| "learning_rate": 1.8397111168843255e-05, |
| "loss": 0.0142, |
| "step": 7320 |
| }, |
| { |
| "epoch": 4.993188010899183, |
| "grad_norm": 0.19562356173992157, |
| "learning_rate": 1.8269153312162323e-05, |
| "loss": 0.0153, |
| "step": 7330 |
| }, |
| { |
| "epoch": 5.0, |
| "grad_norm": 0.25182366371154785, |
| "learning_rate": 1.8141542458937054e-05, |
| "loss": 0.0128, |
| "step": 7340 |
| }, |
| { |
| "epoch": 5.006811989100817, |
| "grad_norm": 0.22833839058876038, |
| "learning_rate": 1.8014280004699268e-05, |
| "loss": 0.0127, |
| "step": 7350 |
| }, |
| { |
| "epoch": 5.013623978201635, |
| "grad_norm": 0.17050805687904358, |
| "learning_rate": 1.788736734117078e-05, |
| "loss": 0.0113, |
| "step": 7360 |
| }, |
| { |
| "epoch": 5.020435967302452, |
| "grad_norm": 0.2042902410030365, |
| "learning_rate": 1.7760805856248152e-05, |
| "loss": 0.0131, |
| "step": 7370 |
| }, |
| { |
| "epoch": 5.0272479564032695, |
| "grad_norm": 0.24889463186264038, |
| "learning_rate": 1.7634596933987518e-05, |
| "loss": 0.0164, |
| "step": 7380 |
| }, |
| { |
| "epoch": 5.0340599455040875, |
| "grad_norm": 0.2117907702922821, |
| "learning_rate": 1.7508741954589404e-05, |
| "loss": 0.0177, |
| "step": 7390 |
| }, |
| { |
| "epoch": 5.040871934604905, |
| "grad_norm": 0.17846384644508362, |
| "learning_rate": 1.7383242294383717e-05, |
| "loss": 0.0135, |
| "step": 7400 |
| }, |
| { |
| "epoch": 5.047683923705722, |
| "grad_norm": 0.17487211525440216, |
| "learning_rate": 1.7258099325814632e-05, |
| "loss": 0.0115, |
| "step": 7410 |
| }, |
| { |
| "epoch": 5.05449591280654, |
| "grad_norm": 0.18704567849636078, |
| "learning_rate": 1.7133314417425594e-05, |
| "loss": 0.0146, |
| "step": 7420 |
| }, |
| { |
| "epoch": 5.061307901907357, |
| "grad_norm": 0.24722889065742493, |
| "learning_rate": 1.7008888933844408e-05, |
| "loss": 0.0148, |
| "step": 7430 |
| }, |
| { |
| "epoch": 5.068119891008174, |
| "grad_norm": 0.19086501002311707, |
| "learning_rate": 1.6884824235768172e-05, |
| "loss": 0.0148, |
| "step": 7440 |
| }, |
| { |
| "epoch": 5.074931880108992, |
| "grad_norm": 0.18787351250648499, |
| "learning_rate": 1.6761121679948592e-05, |
| "loss": 0.0125, |
| "step": 7450 |
| }, |
| { |
| "epoch": 5.081743869209809, |
| "grad_norm": 0.28582966327667236, |
| "learning_rate": 1.663778261917695e-05, |
| "loss": 0.0148, |
| "step": 7460 |
| }, |
| { |
| "epoch": 5.0885558583106265, |
| "grad_norm": 0.1483089029788971, |
| "learning_rate": 1.651480840226952e-05, |
| "loss": 0.0123, |
| "step": 7470 |
| }, |
| { |
| "epoch": 5.0953678474114446, |
| "grad_norm": 0.24858340620994568, |
| "learning_rate": 1.639220037405258e-05, |
| "loss": 0.0148, |
| "step": 7480 |
| }, |
| { |
| "epoch": 5.102179836512262, |
| "grad_norm": 0.1595468819141388, |
| "learning_rate": 1.6269959875347906e-05, |
| "loss": 0.0137, |
| "step": 7490 |
| }, |
| { |
| "epoch": 5.108991825613079, |
| "grad_norm": 0.27670302987098694, |
| "learning_rate": 1.614808824295802e-05, |
| "loss": 0.0143, |
| "step": 7500 |
| }, |
| { |
| "epoch": 5.115803814713897, |
| "grad_norm": 0.13641585409641266, |
| "learning_rate": 1.602658680965152e-05, |
| "loss": 0.0103, |
| "step": 7510 |
| }, |
| { |
| "epoch": 5.122615803814714, |
| "grad_norm": 0.23924027383327484, |
| "learning_rate": 1.5905456904148686e-05, |
| "loss": 0.0146, |
| "step": 7520 |
| }, |
| { |
| "epoch": 5.129427792915531, |
| "grad_norm": 0.21490581333637238, |
| "learning_rate": 1.57846998511067e-05, |
| "loss": 0.0146, |
| "step": 7530 |
| }, |
| { |
| "epoch": 5.136239782016348, |
| "grad_norm": 0.28059524297714233, |
| "learning_rate": 1.566431697110538e-05, |
| "loss": 0.0117, |
| "step": 7540 |
| }, |
| { |
| "epoch": 5.143051771117166, |
| "grad_norm": 0.20346660912036896, |
| "learning_rate": 1.554430958063259e-05, |
| "loss": 0.0113, |
| "step": 7550 |
| }, |
| { |
| "epoch": 5.1498637602179835, |
| "grad_norm": 0.17303743958473206, |
| "learning_rate": 1.5424678992069912e-05, |
| "loss": 0.0123, |
| "step": 7560 |
| }, |
| { |
| "epoch": 5.156675749318801, |
| "grad_norm": 0.21222537755966187, |
| "learning_rate": 1.5305426513678362e-05, |
| "loss": 0.012, |
| "step": 7570 |
| }, |
| { |
| "epoch": 5.163487738419619, |
| "grad_norm": 0.22923482954502106, |
| "learning_rate": 1.518655344958388e-05, |
| "loss": 0.0136, |
| "step": 7580 |
| }, |
| { |
| "epoch": 5.170299727520436, |
| "grad_norm": 0.28508874773979187, |
| "learning_rate": 1.5068061099763275e-05, |
| "loss": 0.0124, |
| "step": 7590 |
| }, |
| { |
| "epoch": 5.177111716621253, |
| "grad_norm": 0.31998851895332336, |
| "learning_rate": 1.494995076002988e-05, |
| "loss": 0.0155, |
| "step": 7600 |
| }, |
| { |
| "epoch": 5.183923705722071, |
| "grad_norm": 0.20339614152908325, |
| "learning_rate": 1.4832223722019456e-05, |
| "loss": 0.0125, |
| "step": 7610 |
| }, |
| { |
| "epoch": 5.190735694822888, |
| "grad_norm": 0.19148500263690948, |
| "learning_rate": 1.4714881273176035e-05, |
| "loss": 0.0139, |
| "step": 7620 |
| }, |
| { |
| "epoch": 5.197547683923705, |
| "grad_norm": 0.17169404029846191, |
| "learning_rate": 1.4597924696737835e-05, |
| "loss": 0.0123, |
| "step": 7630 |
| }, |
| { |
| "epoch": 5.204359673024523, |
| "grad_norm": 0.17913302779197693, |
| "learning_rate": 1.4481355271723252e-05, |
| "loss": 0.0123, |
| "step": 7640 |
| }, |
| { |
| "epoch": 5.2111716621253406, |
| "grad_norm": 0.34465935826301575, |
| "learning_rate": 1.4365174272916809e-05, |
| "loss": 0.0154, |
| "step": 7650 |
| }, |
| { |
| "epoch": 5.217983651226158, |
| "grad_norm": 0.17733906209468842, |
| "learning_rate": 1.4249382970855319e-05, |
| "loss": 0.012, |
| "step": 7660 |
| }, |
| { |
| "epoch": 5.224795640326976, |
| "grad_norm": 0.12495987117290497, |
| "learning_rate": 1.4133982631813903e-05, |
| "loss": 0.014, |
| "step": 7670 |
| }, |
| { |
| "epoch": 5.231607629427793, |
| "grad_norm": 0.14085407555103302, |
| "learning_rate": 1.4018974517792194e-05, |
| "loss": 0.0108, |
| "step": 7680 |
| }, |
| { |
| "epoch": 5.23841961852861, |
| "grad_norm": 0.1950143575668335, |
| "learning_rate": 1.390435988650048e-05, |
| "loss": 0.0115, |
| "step": 7690 |
| }, |
| { |
| "epoch": 5.245231607629428, |
| "grad_norm": 0.2449447363615036, |
| "learning_rate": 1.3790139991346006e-05, |
| "loss": 0.0126, |
| "step": 7700 |
| }, |
| { |
| "epoch": 5.252043596730245, |
| "grad_norm": 0.148986354470253, |
| "learning_rate": 1.367631608141926e-05, |
| "loss": 0.0117, |
| "step": 7710 |
| }, |
| { |
| "epoch": 5.258855585831062, |
| "grad_norm": 0.2584574520587921, |
| "learning_rate": 1.3562889401480278e-05, |
| "loss": 0.0122, |
| "step": 7720 |
| }, |
| { |
| "epoch": 5.26566757493188, |
| "grad_norm": 0.19506706297397614, |
| "learning_rate": 1.3449861191945074e-05, |
| "loss": 0.0145, |
| "step": 7730 |
| }, |
| { |
| "epoch": 5.272479564032698, |
| "grad_norm": 0.15596213936805725, |
| "learning_rate": 1.3337232688872009e-05, |
| "loss": 0.0131, |
| "step": 7740 |
| }, |
| { |
| "epoch": 5.279291553133515, |
| "grad_norm": 0.227974072098732, |
| "learning_rate": 1.3225005123948364e-05, |
| "loss": 0.0128, |
| "step": 7750 |
| }, |
| { |
| "epoch": 5.286103542234333, |
| "grad_norm": 0.15332451462745667, |
| "learning_rate": 1.311317972447681e-05, |
| "loss": 0.0103, |
| "step": 7760 |
| }, |
| { |
| "epoch": 5.29291553133515, |
| "grad_norm": 0.14478209614753723, |
| "learning_rate": 1.3001757713361996e-05, |
| "loss": 0.0123, |
| "step": 7770 |
| }, |
| { |
| "epoch": 5.299727520435967, |
| "grad_norm": 0.2231355756521225, |
| "learning_rate": 1.2890740309097204e-05, |
| "loss": 0.0122, |
| "step": 7780 |
| }, |
| { |
| "epoch": 5.306539509536785, |
| "grad_norm": 0.15700560808181763, |
| "learning_rate": 1.2780128725750944e-05, |
| "loss": 0.0122, |
| "step": 7790 |
| }, |
| { |
| "epoch": 5.313351498637602, |
| "grad_norm": 0.1040923222899437, |
| "learning_rate": 1.266992417295379e-05, |
| "loss": 0.0122, |
| "step": 7800 |
| }, |
| { |
| "epoch": 5.320163487738419, |
| "grad_norm": 0.23007836937904358, |
| "learning_rate": 1.2560127855885073e-05, |
| "loss": 0.0125, |
| "step": 7810 |
| }, |
| { |
| "epoch": 5.3269754768392374, |
| "grad_norm": 0.19076195359230042, |
| "learning_rate": 1.2450740975259745e-05, |
| "loss": 0.0146, |
| "step": 7820 |
| }, |
| { |
| "epoch": 5.333787465940055, |
| "grad_norm": 0.1625741422176361, |
| "learning_rate": 1.234176472731517e-05, |
| "loss": 0.0206, |
| "step": 7830 |
| }, |
| { |
| "epoch": 5.340599455040872, |
| "grad_norm": 0.19957181811332703, |
| "learning_rate": 1.2233200303798158e-05, |
| "loss": 0.0111, |
| "step": 7840 |
| }, |
| { |
| "epoch": 5.34741144414169, |
| "grad_norm": 0.26022225618362427, |
| "learning_rate": 1.2125048891951846e-05, |
| "loss": 0.015, |
| "step": 7850 |
| }, |
| { |
| "epoch": 5.354223433242507, |
| "grad_norm": 0.19087891280651093, |
| "learning_rate": 1.2017311674502745e-05, |
| "loss": 0.0099, |
| "step": 7860 |
| }, |
| { |
| "epoch": 5.361035422343324, |
| "grad_norm": 0.20264586806297302, |
| "learning_rate": 1.1909989829647822e-05, |
| "loss": 0.0111, |
| "step": 7870 |
| }, |
| { |
| "epoch": 5.367847411444142, |
| "grad_norm": 0.15192349255084991, |
| "learning_rate": 1.1803084531041553e-05, |
| "loss": 0.0133, |
| "step": 7880 |
| }, |
| { |
| "epoch": 5.374659400544959, |
| "grad_norm": 0.1685389131307602, |
| "learning_rate": 1.1696596947783162e-05, |
| "loss": 0.0115, |
| "step": 7890 |
| }, |
| { |
| "epoch": 5.381471389645776, |
| "grad_norm": 0.16295170783996582, |
| "learning_rate": 1.1590528244403803e-05, |
| "loss": 0.012, |
| "step": 7900 |
| }, |
| { |
| "epoch": 5.3882833787465945, |
| "grad_norm": 0.21759124100208282, |
| "learning_rate": 1.148487958085382e-05, |
| "loss": 0.013, |
| "step": 7910 |
| }, |
| { |
| "epoch": 5.395095367847412, |
| "grad_norm": 0.14942030608654022, |
| "learning_rate": 1.1379652112490086e-05, |
| "loss": 0.0139, |
| "step": 7920 |
| }, |
| { |
| "epoch": 5.401907356948229, |
| "grad_norm": 0.22778572142124176, |
| "learning_rate": 1.1274846990063315e-05, |
| "loss": 0.0121, |
| "step": 7930 |
| }, |
| { |
| "epoch": 5.408719346049046, |
| "grad_norm": 0.18669773638248444, |
| "learning_rate": 1.117046535970554e-05, |
| "loss": 0.0147, |
| "step": 7940 |
| }, |
| { |
| "epoch": 5.415531335149864, |
| "grad_norm": 0.15506009757518768, |
| "learning_rate": 1.106650836291755e-05, |
| "loss": 0.0177, |
| "step": 7950 |
| }, |
| { |
| "epoch": 5.422343324250681, |
| "grad_norm": 0.2541573643684387, |
| "learning_rate": 1.0962977136556418e-05, |
| "loss": 0.0153, |
| "step": 7960 |
| }, |
| { |
| "epoch": 5.429155313351498, |
| "grad_norm": 0.1981164813041687, |
| "learning_rate": 1.0859872812823024e-05, |
| "loss": 0.0132, |
| "step": 7970 |
| }, |
| { |
| "epoch": 5.435967302452316, |
| "grad_norm": 0.2570594549179077, |
| "learning_rate": 1.0757196519249747e-05, |
| "loss": 0.016, |
| "step": 7980 |
| }, |
| { |
| "epoch": 5.4427792915531334, |
| "grad_norm": 0.11997192353010178, |
| "learning_rate": 1.0654949378688077e-05, |
| "loss": 0.014, |
| "step": 7990 |
| }, |
| { |
| "epoch": 5.449591280653951, |
| "grad_norm": 0.194411501288414, |
| "learning_rate": 1.0553132509296376e-05, |
| "loss": 0.013, |
| "step": 8000 |
| }, |
| { |
| "epoch": 5.456403269754769, |
| "grad_norm": 0.16398310661315918, |
| "learning_rate": 1.0451747024527613e-05, |
| "loss": 0.013, |
| "step": 8010 |
| }, |
| { |
| "epoch": 5.463215258855586, |
| "grad_norm": 0.2351941168308258, |
| "learning_rate": 1.0350794033117189e-05, |
| "loss": 0.0141, |
| "step": 8020 |
| }, |
| { |
| "epoch": 5.470027247956403, |
| "grad_norm": 0.254794180393219, |
| "learning_rate": 1.0250274639070856e-05, |
| "loss": 0.0115, |
| "step": 8030 |
| }, |
| { |
| "epoch": 5.476839237057221, |
| "grad_norm": 0.12862807512283325, |
| "learning_rate": 1.0150189941652599e-05, |
| "loss": 0.0104, |
| "step": 8040 |
| }, |
| { |
| "epoch": 5.483651226158038, |
| "grad_norm": 0.1112130880355835, |
| "learning_rate": 1.0050541035372635e-05, |
| "loss": 0.0105, |
| "step": 8050 |
| }, |
| { |
| "epoch": 5.490463215258855, |
| "grad_norm": 0.15486888587474823, |
| "learning_rate": 9.951329009975458e-06, |
| "loss": 0.0108, |
| "step": 8060 |
| }, |
| { |
| "epoch": 5.497275204359673, |
| "grad_norm": 0.13335685431957245, |
| "learning_rate": 9.852554950427845e-06, |
| "loss": 0.0128, |
| "step": 8070 |
| }, |
| { |
| "epoch": 5.5040871934604905, |
| "grad_norm": 0.16484335064888, |
| "learning_rate": 9.754219936907105e-06, |
| "loss": 0.0123, |
| "step": 8080 |
| }, |
| { |
| "epoch": 5.510899182561308, |
| "grad_norm": 0.13687945902347565, |
| "learning_rate": 9.656325044789194e-06, |
| "loss": 0.0096, |
| "step": 8090 |
| }, |
| { |
| "epoch": 5.517711171662126, |
| "grad_norm": 0.1303662657737732, |
| "learning_rate": 9.55887134463697e-06, |
| "loss": 0.0086, |
| "step": 8100 |
| }, |
| { |
| "epoch": 5.524523160762943, |
| "grad_norm": 0.1451333612203598, |
| "learning_rate": 9.461859902188475e-06, |
| "loss": 0.0119, |
| "step": 8110 |
| }, |
| { |
| "epoch": 5.53133514986376, |
| "grad_norm": 0.2170574814081192, |
| "learning_rate": 9.365291778345303e-06, |
| "loss": 0.0121, |
| "step": 8120 |
| }, |
| { |
| "epoch": 5.538147138964578, |
| "grad_norm": 0.16463404893875122, |
| "learning_rate": 9.269168029160991e-06, |
| "loss": 0.0089, |
| "step": 8130 |
| }, |
| { |
| "epoch": 5.544959128065395, |
| "grad_norm": 0.2275201976299286, |
| "learning_rate": 9.173489705829447e-06, |
| "loss": 0.0129, |
| "step": 8140 |
| }, |
| { |
| "epoch": 5.551771117166212, |
| "grad_norm": 0.11590515077114105, |
| "learning_rate": 9.078257854673516e-06, |
| "loss": 0.0124, |
| "step": 8150 |
| }, |
| { |
| "epoch": 5.55858310626703, |
| "grad_norm": 0.1156085953116417, |
| "learning_rate": 8.983473517133429e-06, |
| "loss": 0.0135, |
| "step": 8160 |
| }, |
| { |
| "epoch": 5.5653950953678475, |
| "grad_norm": 0.12850528955459595, |
| "learning_rate": 8.889137729755537e-06, |
| "loss": 0.0105, |
| "step": 8170 |
| }, |
| { |
| "epoch": 5.572207084468665, |
| "grad_norm": 0.2325507402420044, |
| "learning_rate": 8.79525152418087e-06, |
| "loss": 0.0136, |
| "step": 8180 |
| }, |
| { |
| "epoch": 5.579019073569482, |
| "grad_norm": 0.17301425337791443, |
| "learning_rate": 8.701815927133961e-06, |
| "loss": 0.0124, |
| "step": 8190 |
| }, |
| { |
| "epoch": 5.5858310626703, |
| "grad_norm": 0.13355191051959991, |
| "learning_rate": 8.608831960411534e-06, |
| "loss": 0.0114, |
| "step": 8200 |
| }, |
| { |
| "epoch": 5.592643051771117, |
| "grad_norm": 0.2157035917043686, |
| "learning_rate": 8.516300640871321e-06, |
| "loss": 0.0126, |
| "step": 8210 |
| }, |
| { |
| "epoch": 5.599455040871934, |
| "grad_norm": 0.16316112875938416, |
| "learning_rate": 8.424222980421038e-06, |
| "loss": 0.0133, |
| "step": 8220 |
| }, |
| { |
| "epoch": 5.606267029972752, |
| "grad_norm": 0.10164311528205872, |
| "learning_rate": 8.332599986007184e-06, |
| "loss": 0.0111, |
| "step": 8230 |
| }, |
| { |
| "epoch": 5.613079019073569, |
| "grad_norm": 0.13754205405712128, |
| "learning_rate": 8.241432659604203e-06, |
| "loss": 0.0098, |
| "step": 8240 |
| }, |
| { |
| "epoch": 5.6198910081743865, |
| "grad_norm": 0.17243002355098724, |
| "learning_rate": 8.150721998203331e-06, |
| "loss": 0.0092, |
| "step": 8250 |
| }, |
| { |
| "epoch": 5.6267029972752045, |
| "grad_norm": 0.14749637246131897, |
| "learning_rate": 8.06046899380184e-06, |
| "loss": 0.0098, |
| "step": 8260 |
| }, |
| { |
| "epoch": 5.633514986376022, |
| "grad_norm": 0.12213444709777832, |
| "learning_rate": 7.970674633392133e-06, |
| "loss": 0.0139, |
| "step": 8270 |
| }, |
| { |
| "epoch": 5.640326975476839, |
| "grad_norm": 0.1787102073431015, |
| "learning_rate": 7.881339898950924e-06, |
| "loss": 0.0142, |
| "step": 8280 |
| }, |
| { |
| "epoch": 5.647138964577657, |
| "grad_norm": 0.10263296216726303, |
| "learning_rate": 7.792465767428597e-06, |
| "loss": 0.0116, |
| "step": 8290 |
| }, |
| { |
| "epoch": 5.653950953678474, |
| "grad_norm": 0.11837161332368851, |
| "learning_rate": 7.704053210738376e-06, |
| "loss": 0.0095, |
| "step": 8300 |
| }, |
| { |
| "epoch": 5.660762942779291, |
| "grad_norm": 0.13488389551639557, |
| "learning_rate": 7.6161031957458494e-06, |
| "loss": 0.0138, |
| "step": 8310 |
| }, |
| { |
| "epoch": 5.667574931880109, |
| "grad_norm": 0.19569364190101624, |
| "learning_rate": 7.5286166842582605e-06, |
| "loss": 0.0121, |
| "step": 8320 |
| }, |
| { |
| "epoch": 5.674386920980926, |
| "grad_norm": 0.19341343641281128, |
| "learning_rate": 7.4415946330140814e-06, |
| "loss": 0.0115, |
| "step": 8330 |
| }, |
| { |
| "epoch": 5.6811989100817435, |
| "grad_norm": 0.15243728458881378, |
| "learning_rate": 7.3550379936725644e-06, |
| "loss": 0.0114, |
| "step": 8340 |
| }, |
| { |
| "epoch": 5.6880108991825615, |
| "grad_norm": 0.13914422690868378, |
| "learning_rate": 7.2689477128032035e-06, |
| "loss": 0.0125, |
| "step": 8350 |
| }, |
| { |
| "epoch": 5.694822888283379, |
| "grad_norm": 0.15893633663654327, |
| "learning_rate": 7.183324731875551e-06, |
| "loss": 0.0098, |
| "step": 8360 |
| }, |
| { |
| "epoch": 5.701634877384196, |
| "grad_norm": 0.16882383823394775, |
| "learning_rate": 7.098169987248782e-06, |
| "loss": 0.0089, |
| "step": 8370 |
| }, |
| { |
| "epoch": 5.708446866485014, |
| "grad_norm": 0.11707707494497299, |
| "learning_rate": 7.013484410161553e-06, |
| "loss": 0.0111, |
| "step": 8380 |
| }, |
| { |
| "epoch": 5.715258855585831, |
| "grad_norm": 0.15138401091098785, |
| "learning_rate": 6.92926892672176e-06, |
| "loss": 0.011, |
| "step": 8390 |
| }, |
| { |
| "epoch": 5.722070844686648, |
| "grad_norm": 0.15782202780246735, |
| "learning_rate": 6.845524457896446e-06, |
| "loss": 0.0087, |
| "step": 8400 |
| }, |
| { |
| "epoch": 5.728882833787466, |
| "grad_norm": 0.14364789426326752, |
| "learning_rate": 6.7622519195017165e-06, |
| "loss": 0.0099, |
| "step": 8410 |
| }, |
| { |
| "epoch": 5.735694822888283, |
| "grad_norm": 0.1990385502576828, |
| "learning_rate": 6.679452222192684e-06, |
| "loss": 0.0099, |
| "step": 8420 |
| }, |
| { |
| "epoch": 5.7425068119891005, |
| "grad_norm": 0.11444421857595444, |
| "learning_rate": 6.597126271453579e-06, |
| "loss": 0.0088, |
| "step": 8430 |
| }, |
| { |
| "epoch": 5.7493188010899186, |
| "grad_norm": 0.09519212692975998, |
| "learning_rate": 6.51527496758782e-06, |
| "loss": 0.0089, |
| "step": 8440 |
| }, |
| { |
| "epoch": 5.756130790190736, |
| "grad_norm": 0.1508159637451172, |
| "learning_rate": 6.433899205708155e-06, |
| "loss": 0.0097, |
| "step": 8450 |
| }, |
| { |
| "epoch": 5.762942779291553, |
| "grad_norm": 0.12732820212841034, |
| "learning_rate": 6.352999875726856e-06, |
| "loss": 0.0091, |
| "step": 8460 |
| }, |
| { |
| "epoch": 5.769754768392371, |
| "grad_norm": 0.09891568869352341, |
| "learning_rate": 6.272577862346052e-06, |
| "loss": 0.0113, |
| "step": 8470 |
| }, |
| { |
| "epoch": 5.776566757493188, |
| "grad_norm": 0.2046702355146408, |
| "learning_rate": 6.192634045047996e-06, |
| "loss": 0.0112, |
| "step": 8480 |
| }, |
| { |
| "epoch": 5.783378746594005, |
| "grad_norm": 0.2202032059431076, |
| "learning_rate": 6.113169298085458e-06, |
| "loss": 0.0166, |
| "step": 8490 |
| }, |
| { |
| "epoch": 5.790190735694823, |
| "grad_norm": 0.2339613288640976, |
| "learning_rate": 6.034184490472195e-06, |
| "loss": 0.0079, |
| "step": 8500 |
| }, |
| { |
| "epoch": 5.79700272479564, |
| "grad_norm": 0.20225585997104645, |
| "learning_rate": 5.955680485973386e-06, |
| "loss": 0.0131, |
| "step": 8510 |
| }, |
| { |
| "epoch": 5.8038147138964575, |
| "grad_norm": 0.2018497884273529, |
| "learning_rate": 5.877658143096265e-06, |
| "loss": 0.011, |
| "step": 8520 |
| }, |
| { |
| "epoch": 5.810626702997276, |
| "grad_norm": 0.13856525719165802, |
| "learning_rate": 5.800118315080661e-06, |
| "loss": 0.01, |
| "step": 8530 |
| }, |
| { |
| "epoch": 5.817438692098093, |
| "grad_norm": 0.1401432752609253, |
| "learning_rate": 5.723061849889716e-06, |
| "loss": 0.0084, |
| "step": 8540 |
| }, |
| { |
| "epoch": 5.82425068119891, |
| "grad_norm": 0.1731623411178589, |
| "learning_rate": 5.646489590200604e-06, |
| "loss": 0.0126, |
| "step": 8550 |
| }, |
| { |
| "epoch": 5.831062670299728, |
| "grad_norm": 0.12786374986171722, |
| "learning_rate": 5.570402373395256e-06, |
| "loss": 0.01, |
| "step": 8560 |
| }, |
| { |
| "epoch": 5.837874659400545, |
| "grad_norm": 0.2641719877719879, |
| "learning_rate": 5.494801031551305e-06, |
| "loss": 0.0111, |
| "step": 8570 |
| }, |
| { |
| "epoch": 5.844686648501362, |
| "grad_norm": 0.16163118183612823, |
| "learning_rate": 5.41968639143291e-06, |
| "loss": 0.0106, |
| "step": 8580 |
| }, |
| { |
| "epoch": 5.85149863760218, |
| "grad_norm": 0.1381234973669052, |
| "learning_rate": 5.345059274481751e-06, |
| "loss": 0.0093, |
| "step": 8590 |
| }, |
| { |
| "epoch": 5.858310626702997, |
| "grad_norm": 0.1420307159423828, |
| "learning_rate": 5.270920496808002e-06, |
| "loss": 0.0134, |
| "step": 8600 |
| }, |
| { |
| "epoch": 5.8651226158038146, |
| "grad_norm": 0.1673470139503479, |
| "learning_rate": 5.1972708691814695e-06, |
| "loss": 0.0109, |
| "step": 8610 |
| }, |
| { |
| "epoch": 5.871934604904633, |
| "grad_norm": 0.2173473834991455, |
| "learning_rate": 5.124111197022674e-06, |
| "loss": 0.0119, |
| "step": 8620 |
| }, |
| { |
| "epoch": 5.87874659400545, |
| "grad_norm": 0.11630476266145706, |
| "learning_rate": 5.051442280394081e-06, |
| "loss": 0.009, |
| "step": 8630 |
| }, |
| { |
| "epoch": 5.885558583106267, |
| "grad_norm": 0.0949091911315918, |
| "learning_rate": 4.979264913991322e-06, |
| "loss": 0.0119, |
| "step": 8640 |
| }, |
| { |
| "epoch": 5.892370572207085, |
| "grad_norm": 0.16577839851379395, |
| "learning_rate": 4.907579887134489e-06, |
| "loss": 0.0126, |
| "step": 8650 |
| }, |
| { |
| "epoch": 5.899182561307902, |
| "grad_norm": 0.17283402383327484, |
| "learning_rate": 4.836387983759572e-06, |
| "loss": 0.011, |
| "step": 8660 |
| }, |
| { |
| "epoch": 5.905994550408719, |
| "grad_norm": 0.1463468372821808, |
| "learning_rate": 4.765689982409816e-06, |
| "loss": 0.0102, |
| "step": 8670 |
| }, |
| { |
| "epoch": 5.912806539509537, |
| "grad_norm": 0.21168796718120575, |
| "learning_rate": 4.695486656227233e-06, |
| "loss": 0.0132, |
| "step": 8680 |
| }, |
| { |
| "epoch": 5.919618528610354, |
| "grad_norm": 0.24207310378551483, |
| "learning_rate": 4.625778772944156e-06, |
| "loss": 0.0119, |
| "step": 8690 |
| }, |
| { |
| "epoch": 5.926430517711172, |
| "grad_norm": 0.20093917846679688, |
| "learning_rate": 4.556567094874825e-06, |
| "loss": 0.0123, |
| "step": 8700 |
| }, |
| { |
| "epoch": 5.933242506811989, |
| "grad_norm": 0.11250998079776764, |
| "learning_rate": 4.487852378907059e-06, |
| "loss": 0.0076, |
| "step": 8710 |
| }, |
| { |
| "epoch": 5.940054495912807, |
| "grad_norm": 0.11169631779193878, |
| "learning_rate": 4.419635376493986e-06, |
| "loss": 0.0113, |
| "step": 8720 |
| }, |
| { |
| "epoch": 5.946866485013624, |
| "grad_norm": 0.17939099669456482, |
| "learning_rate": 4.351916833645825e-06, |
| "loss": 0.0105, |
| "step": 8730 |
| }, |
| { |
| "epoch": 5.953678474114441, |
| "grad_norm": 0.19434191286563873, |
| "learning_rate": 4.284697490921691e-06, |
| "loss": 0.0106, |
| "step": 8740 |
| }, |
| { |
| "epoch": 5.960490463215259, |
| "grad_norm": 0.16198799014091492, |
| "learning_rate": 4.2179780834215585e-06, |
| "loss": 0.0127, |
| "step": 8750 |
| }, |
| { |
| "epoch": 5.967302452316076, |
| "grad_norm": 0.22619812190532684, |
| "learning_rate": 4.151759340778178e-06, |
| "loss": 0.0117, |
| "step": 8760 |
| }, |
| { |
| "epoch": 5.974114441416893, |
| "grad_norm": 0.2598056495189667, |
| "learning_rate": 4.086041987149109e-06, |
| "loss": 0.01, |
| "step": 8770 |
| }, |
| { |
| "epoch": 5.9809264305177114, |
| "grad_norm": 0.18251881003379822, |
| "learning_rate": 4.020826741208811e-06, |
| "loss": 0.0102, |
| "step": 8780 |
| }, |
| { |
| "epoch": 5.987738419618529, |
| "grad_norm": 0.18505583703517914, |
| "learning_rate": 3.956114316140746e-06, |
| "loss": 0.0121, |
| "step": 8790 |
| }, |
| { |
| "epoch": 5.994550408719346, |
| "grad_norm": 0.14361293613910675, |
| "learning_rate": 3.891905419629643e-06, |
| "loss": 0.0099, |
| "step": 8800 |
| }, |
| { |
| "epoch": 6.001362397820164, |
| "grad_norm": 0.10514985024929047, |
| "learning_rate": 3.8282007538536946e-06, |
| "loss": 0.0127, |
| "step": 8810 |
| }, |
| { |
| "epoch": 6.008174386920981, |
| "grad_norm": 0.16004830598831177, |
| "learning_rate": 3.7650010154769265e-06, |
| "loss": 0.0089, |
| "step": 8820 |
| }, |
| { |
| "epoch": 6.014986376021798, |
| "grad_norm": 0.18699565529823303, |
| "learning_rate": 3.7023068956415608e-06, |
| "loss": 0.0123, |
| "step": 8830 |
| }, |
| { |
| "epoch": 6.021798365122616, |
| "grad_norm": 0.17017434537410736, |
| "learning_rate": 3.6401190799604303e-06, |
| "loss": 0.0084, |
| "step": 8840 |
| }, |
| { |
| "epoch": 6.028610354223433, |
| "grad_norm": 0.18797238171100616, |
| "learning_rate": 3.578438248509536e-06, |
| "loss": 0.012, |
| "step": 8850 |
| }, |
| { |
| "epoch": 6.03542234332425, |
| "grad_norm": 0.16716784238815308, |
| "learning_rate": 3.5172650758205583e-06, |
| "loss": 0.012, |
| "step": 8860 |
| }, |
| { |
| "epoch": 6.0422343324250685, |
| "grad_norm": 0.10475629568099976, |
| "learning_rate": 3.45660023087353e-06, |
| "loss": 0.008, |
| "step": 8870 |
| }, |
| { |
| "epoch": 6.049046321525886, |
| "grad_norm": 0.12020768970251083, |
| "learning_rate": 3.3964443770894528e-06, |
| "loss": 0.0087, |
| "step": 8880 |
| }, |
| { |
| "epoch": 6.055858310626703, |
| "grad_norm": 0.10397229343652725, |
| "learning_rate": 3.3367981723231245e-06, |
| "loss": 0.0091, |
| "step": 8890 |
| }, |
| { |
| "epoch": 6.062670299727521, |
| "grad_norm": 0.20012831687927246, |
| "learning_rate": 3.2776622688558746e-06, |
| "loss": 0.0099, |
| "step": 8900 |
| }, |
| { |
| "epoch": 6.069482288828338, |
| "grad_norm": 0.19983907043933868, |
| "learning_rate": 3.2190373133884677e-06, |
| "loss": 0.0102, |
| "step": 8910 |
| }, |
| { |
| "epoch": 6.076294277929155, |
| "grad_norm": 0.17271621525287628, |
| "learning_rate": 3.1609239470340446e-06, |
| "loss": 0.0104, |
| "step": 8920 |
| }, |
| { |
| "epoch": 6.083106267029973, |
| "grad_norm": 0.16302776336669922, |
| "learning_rate": 3.1033228053110373e-06, |
| "loss": 0.0078, |
| "step": 8930 |
| }, |
| { |
| "epoch": 6.08991825613079, |
| "grad_norm": 0.12263508886098862, |
| "learning_rate": 3.0462345181363314e-06, |
| "loss": 0.009, |
| "step": 8940 |
| }, |
| { |
| "epoch": 6.0967302452316074, |
| "grad_norm": 0.11456681787967682, |
| "learning_rate": 2.9896597098182654e-06, |
| "loss": 0.0109, |
| "step": 8950 |
| }, |
| { |
| "epoch": 6.1035422343324255, |
| "grad_norm": 0.08905057609081268, |
| "learning_rate": 2.933598999049891e-06, |
| "loss": 0.0112, |
| "step": 8960 |
| }, |
| { |
| "epoch": 6.110354223433243, |
| "grad_norm": 0.15491244196891785, |
| "learning_rate": 2.8780529989021697e-06, |
| "loss": 0.0095, |
| "step": 8970 |
| }, |
| { |
| "epoch": 6.11716621253406, |
| "grad_norm": 0.15372540056705475, |
| "learning_rate": 2.823022316817242e-06, |
| "loss": 0.0124, |
| "step": 8980 |
| }, |
| { |
| "epoch": 6.123978201634877, |
| "grad_norm": 0.20342043042182922, |
| "learning_rate": 2.7685075546018456e-06, |
| "loss": 0.0123, |
| "step": 8990 |
| }, |
| { |
| "epoch": 6.130790190735695, |
| "grad_norm": 0.12789203226566315, |
| "learning_rate": 2.7145093084206598e-06, |
| "loss": 0.0108, |
| "step": 9000 |
| }, |
| { |
| "epoch": 6.137602179836512, |
| "grad_norm": 0.19718892872333527, |
| "learning_rate": 2.661028168789892e-06, |
| "loss": 0.0094, |
| "step": 9010 |
| }, |
| { |
| "epoch": 6.144414168937329, |
| "grad_norm": 0.2571142911911011, |
| "learning_rate": 2.6080647205706855e-06, |
| "loss": 0.0091, |
| "step": 9020 |
| }, |
| { |
| "epoch": 6.151226158038147, |
| "grad_norm": 0.08045794069766998, |
| "learning_rate": 2.555619542962834e-06, |
| "loss": 0.0101, |
| "step": 9030 |
| }, |
| { |
| "epoch": 6.1580381471389645, |
| "grad_norm": 0.10921412706375122, |
| "learning_rate": 2.503693209498409e-06, |
| "loss": 0.0064, |
| "step": 9040 |
| }, |
| { |
| "epoch": 6.164850136239782, |
| "grad_norm": 0.14346344769001007, |
| "learning_rate": 2.452286288035449e-06, |
| "loss": 0.0091, |
| "step": 9050 |
| }, |
| { |
| "epoch": 6.1716621253406, |
| "grad_norm": 0.12146768718957901, |
| "learning_rate": 2.4013993407518363e-06, |
| "loss": 0.0127, |
| "step": 9060 |
| }, |
| { |
| "epoch": 6.178474114441417, |
| "grad_norm": 0.11175204068422318, |
| "learning_rate": 2.351032924139063e-06, |
| "loss": 0.0076, |
| "step": 9070 |
| }, |
| { |
| "epoch": 6.185286103542234, |
| "grad_norm": 0.1668560802936554, |
| "learning_rate": 2.30118758899619e-06, |
| "loss": 0.0112, |
| "step": 9080 |
| }, |
| { |
| "epoch": 6.192098092643052, |
| "grad_norm": 0.15498773753643036, |
| "learning_rate": 2.2518638804238157e-06, |
| "loss": 0.0084, |
| "step": 9090 |
| }, |
| { |
| "epoch": 6.198910081743869, |
| "grad_norm": 0.2582722306251526, |
| "learning_rate": 2.203062337818118e-06, |
| "loss": 0.0107, |
| "step": 9100 |
| }, |
| { |
| "epoch": 6.205722070844686, |
| "grad_norm": 0.14667384326457977, |
| "learning_rate": 2.1547834948649483e-06, |
| "loss": 0.0106, |
| "step": 9110 |
| }, |
| { |
| "epoch": 6.212534059945504, |
| "grad_norm": 0.08730646222829819, |
| "learning_rate": 2.1070278795340017e-06, |
| "loss": 0.011, |
| "step": 9120 |
| }, |
| { |
| "epoch": 6.2193460490463215, |
| "grad_norm": 0.19518472254276276, |
| "learning_rate": 2.059796014073029e-06, |
| "loss": 0.0078, |
| "step": 9130 |
| }, |
| { |
| "epoch": 6.226158038147139, |
| "grad_norm": 0.09343539923429489, |
| "learning_rate": 2.01308841500214e-06, |
| "loss": 0.0079, |
| "step": 9140 |
| }, |
| { |
| "epoch": 6.232970027247957, |
| "grad_norm": 0.2299136221408844, |
| "learning_rate": 1.9669055931081704e-06, |
| "loss": 0.0122, |
| "step": 9150 |
| }, |
| { |
| "epoch": 6.239782016348774, |
| "grad_norm": 0.16625770926475525, |
| "learning_rate": 1.9212480534390507e-06, |
| "loss": 0.0097, |
| "step": 9160 |
| }, |
| { |
| "epoch": 6.246594005449591, |
| "grad_norm": 0.18141430616378784, |
| "learning_rate": 1.8761162952983246e-06, |
| "loss": 0.011, |
| "step": 9170 |
| }, |
| { |
| "epoch": 6.253405994550409, |
| "grad_norm": 0.22686415910720825, |
| "learning_rate": 1.8315108122396618e-06, |
| "loss": 0.0114, |
| "step": 9180 |
| }, |
| { |
| "epoch": 6.260217983651226, |
| "grad_norm": 0.14493921399116516, |
| "learning_rate": 1.787432092061475e-06, |
| "loss": 0.0085, |
| "step": 9190 |
| }, |
| { |
| "epoch": 6.267029972752043, |
| "grad_norm": 0.12274694442749023, |
| "learning_rate": 1.743880616801602e-06, |
| "loss": 0.0113, |
| "step": 9200 |
| }, |
| { |
| "epoch": 6.273841961852861, |
| "grad_norm": 0.10201839357614517, |
| "learning_rate": 1.7008568627319865e-06, |
| "loss": 0.0096, |
| "step": 9210 |
| }, |
| { |
| "epoch": 6.2806539509536785, |
| "grad_norm": 0.20207750797271729, |
| "learning_rate": 1.6583613003535226e-06, |
| "loss": 0.0121, |
| "step": 9220 |
| }, |
| { |
| "epoch": 6.287465940054496, |
| "grad_norm": 0.13486947119235992, |
| "learning_rate": 1.6163943943908522e-06, |
| "loss": 0.0123, |
| "step": 9230 |
| }, |
| { |
| "epoch": 6.294277929155314, |
| "grad_norm": 0.14914485812187195, |
| "learning_rate": 1.5749566037873476e-06, |
| "loss": 0.01, |
| "step": 9240 |
| }, |
| { |
| "epoch": 6.301089918256131, |
| "grad_norm": 0.1396232694387436, |
| "learning_rate": 1.5340483817000428e-06, |
| "loss": 0.0113, |
| "step": 9250 |
| }, |
| { |
| "epoch": 6.307901907356948, |
| "grad_norm": 0.11976684629917145, |
| "learning_rate": 1.4936701754947101e-06, |
| "loss": 0.0096, |
| "step": 9260 |
| }, |
| { |
| "epoch": 6.314713896457766, |
| "grad_norm": 0.14177760481834412, |
| "learning_rate": 1.4538224267409361e-06, |
| "loss": 0.0116, |
| "step": 9270 |
| }, |
| { |
| "epoch": 6.321525885558583, |
| "grad_norm": 0.15875473618507385, |
| "learning_rate": 1.414505571207314e-06, |
| "loss": 0.0076, |
| "step": 9280 |
| }, |
| { |
| "epoch": 6.3283378746594, |
| "grad_norm": 0.10427635163068771, |
| "learning_rate": 1.3757200388566816e-06, |
| "loss": 0.0077, |
| "step": 9290 |
| }, |
| { |
| "epoch": 6.335149863760218, |
| "grad_norm": 0.11724657565355301, |
| "learning_rate": 1.3374662538414074e-06, |
| "loss": 0.0123, |
| "step": 9300 |
| }, |
| { |
| "epoch": 6.3419618528610355, |
| "grad_norm": 0.08624394983053207, |
| "learning_rate": 1.2997446344987617e-06, |
| "loss": 0.0099, |
| "step": 9310 |
| }, |
| { |
| "epoch": 6.348773841961853, |
| "grad_norm": 0.11943169683218002, |
| "learning_rate": 1.262555593346315e-06, |
| "loss": 0.0099, |
| "step": 9320 |
| }, |
| { |
| "epoch": 6.355585831062671, |
| "grad_norm": 0.22859704494476318, |
| "learning_rate": 1.2258995370774685e-06, |
| "loss": 0.0116, |
| "step": 9330 |
| }, |
| { |
| "epoch": 6.362397820163488, |
| "grad_norm": 0.20983096957206726, |
| "learning_rate": 1.1897768665569798e-06, |
| "loss": 0.0117, |
| "step": 9340 |
| }, |
| { |
| "epoch": 6.369209809264305, |
| "grad_norm": 0.13772162795066833, |
| "learning_rate": 1.1541879768165954e-06, |
| "loss": 0.0092, |
| "step": 9350 |
| }, |
| { |
| "epoch": 6.376021798365123, |
| "grad_norm": 0.15202628076076508, |
| "learning_rate": 1.1191332570507085e-06, |
| "loss": 0.0098, |
| "step": 9360 |
| }, |
| { |
| "epoch": 6.38283378746594, |
| "grad_norm": 0.18177203834056854, |
| "learning_rate": 1.0846130906121132e-06, |
| "loss": 0.0164, |
| "step": 9370 |
| }, |
| { |
| "epoch": 6.389645776566757, |
| "grad_norm": 0.17858490347862244, |
| "learning_rate": 1.0506278550078131e-06, |
| "loss": 0.0103, |
| "step": 9380 |
| }, |
| { |
| "epoch": 6.396457765667575, |
| "grad_norm": 0.18811877071857452, |
| "learning_rate": 1.0171779218949185e-06, |
| "loss": 0.0125, |
| "step": 9390 |
| }, |
| { |
| "epoch": 6.4032697547683926, |
| "grad_norm": 0.1804962009191513, |
| "learning_rate": 9.842636570765174e-07, |
| "loss": 0.0097, |
| "step": 9400 |
| }, |
| { |
| "epoch": 6.41008174386921, |
| "grad_norm": 0.20443765819072723, |
| "learning_rate": 9.518854204977612e-07, |
| "loss": 0.01, |
| "step": 9410 |
| }, |
| { |
| "epoch": 6.416893732970027, |
| "grad_norm": 0.11135527491569519, |
| "learning_rate": 9.200435662418349e-07, |
| "loss": 0.0101, |
| "step": 9420 |
| }, |
| { |
| "epoch": 6.423705722070845, |
| "grad_norm": 0.10986144840717316, |
| "learning_rate": 8.887384425261658e-07, |
| "loss": 0.008, |
| "step": 9430 |
| }, |
| { |
| "epoch": 6.430517711171662, |
| "grad_norm": 0.15490956604480743, |
| "learning_rate": 8.579703916985648e-07, |
| "loss": 0.0094, |
| "step": 9440 |
| }, |
| { |
| "epoch": 6.437329700272479, |
| "grad_norm": 0.12304934859275818, |
| "learning_rate": 8.277397502335194e-07, |
| "loss": 0.0134, |
| "step": 9450 |
| }, |
| { |
| "epoch": 6.444141689373297, |
| "grad_norm": 0.15748490393161774, |
| "learning_rate": 7.980468487284675e-07, |
| "loss": 0.0104, |
| "step": 9460 |
| }, |
| { |
| "epoch": 6.450953678474114, |
| "grad_norm": 0.15610432624816895, |
| "learning_rate": 7.688920119002297e-07, |
| "loss": 0.0089, |
| "step": 9470 |
| }, |
| { |
| "epoch": 6.4577656675749315, |
| "grad_norm": 0.1030815839767456, |
| "learning_rate": 7.402755585814269e-07, |
| "loss": 0.0099, |
| "step": 9480 |
| }, |
| { |
| "epoch": 6.46457765667575, |
| "grad_norm": 0.20818915963172913, |
| "learning_rate": 7.121978017170073e-07, |
| "loss": 0.0115, |
| "step": 9490 |
| }, |
| { |
| "epoch": 6.471389645776567, |
| "grad_norm": 0.1520918905735016, |
| "learning_rate": 6.846590483608306e-07, |
| "loss": 0.0084, |
| "step": 9500 |
| }, |
| { |
| "epoch": 6.478201634877384, |
| "grad_norm": 0.13606111705303192, |
| "learning_rate": 6.576595996722834e-07, |
| "loss": 0.0159, |
| "step": 9510 |
| }, |
| { |
| "epoch": 6.485013623978202, |
| "grad_norm": 0.1213141530752182, |
| "learning_rate": 6.311997509130141e-07, |
| "loss": 0.0093, |
| "step": 9520 |
| }, |
| { |
| "epoch": 6.491825613079019, |
| "grad_norm": 0.18930743634700775, |
| "learning_rate": 6.052797914436803e-07, |
| "loss": 0.0114, |
| "step": 9530 |
| }, |
| { |
| "epoch": 6.498637602179836, |
| "grad_norm": 0.2151637226343155, |
| "learning_rate": 5.799000047208181e-07, |
| "loss": 0.0133, |
| "step": 9540 |
| }, |
| { |
| "epoch": 6.505449591280654, |
| "grad_norm": 0.16114141047000885, |
| "learning_rate": 5.550606682937054e-07, |
| "loss": 0.0115, |
| "step": 9550 |
| }, |
| { |
| "epoch": 6.512261580381471, |
| "grad_norm": 0.1699608713388443, |
| "learning_rate": 5.307620538013481e-07, |
| "loss": 0.0114, |
| "step": 9560 |
| }, |
| { |
| "epoch": 6.5190735694822886, |
| "grad_norm": 0.15840139985084534, |
| "learning_rate": 5.070044269694874e-07, |
| "loss": 0.0101, |
| "step": 9570 |
| }, |
| { |
| "epoch": 6.525885558583107, |
| "grad_norm": 0.22831596434116364, |
| "learning_rate": 4.837880476077417e-07, |
| "loss": 0.0106, |
| "step": 9580 |
| }, |
| { |
| "epoch": 6.532697547683924, |
| "grad_norm": 0.118828684091568, |
| "learning_rate": 4.6111316960670835e-07, |
| "loss": 0.0098, |
| "step": 9590 |
| }, |
| { |
| "epoch": 6.539509536784741, |
| "grad_norm": 0.1655462384223938, |
| "learning_rate": 4.389800409352218e-07, |
| "loss": 0.0082, |
| "step": 9600 |
| }, |
| { |
| "epoch": 6.546321525885559, |
| "grad_norm": 0.1253342479467392, |
| "learning_rate": 4.173889036376277e-07, |
| "loss": 0.0111, |
| "step": 9610 |
| }, |
| { |
| "epoch": 6.553133514986376, |
| "grad_norm": 0.15380145609378815, |
| "learning_rate": 3.963399938311463e-07, |
| "loss": 0.0115, |
| "step": 9620 |
| }, |
| { |
| "epoch": 6.559945504087193, |
| "grad_norm": 0.13774822652339935, |
| "learning_rate": 3.7583354170328545e-07, |
| "loss": 0.012, |
| "step": 9630 |
| }, |
| { |
| "epoch": 6.566757493188011, |
| "grad_norm": 0.08887213468551636, |
| "learning_rate": 3.558697715093207e-07, |
| "loss": 0.0084, |
| "step": 9640 |
| }, |
| { |
| "epoch": 6.573569482288828, |
| "grad_norm": 0.2804868817329407, |
| "learning_rate": 3.3644890156983576e-07, |
| "loss": 0.0109, |
| "step": 9650 |
| }, |
| { |
| "epoch": 6.580381471389646, |
| "grad_norm": 0.12525686621665955, |
| "learning_rate": 3.175711442683638e-07, |
| "loss": 0.0084, |
| "step": 9660 |
| }, |
| { |
| "epoch": 6.587193460490463, |
| "grad_norm": 0.12775982916355133, |
| "learning_rate": 2.9923670604902197e-07, |
| "loss": 0.0097, |
| "step": 9670 |
| }, |
| { |
| "epoch": 6.594005449591281, |
| "grad_norm": 0.22419363260269165, |
| "learning_rate": 2.814457874143028e-07, |
| "loss": 0.0122, |
| "step": 9680 |
| }, |
| { |
| "epoch": 6.600817438692098, |
| "grad_norm": 0.16230632364749908, |
| "learning_rate": 2.641985829228366e-07, |
| "loss": 0.0102, |
| "step": 9690 |
| }, |
| { |
| "epoch": 6.607629427792915, |
| "grad_norm": 0.15815846621990204, |
| "learning_rate": 2.474952811872877e-07, |
| "loss": 0.0092, |
| "step": 9700 |
| }, |
| { |
| "epoch": 6.614441416893733, |
| "grad_norm": 0.13755181431770325, |
| "learning_rate": 2.3133606487228397e-07, |
| "loss": 0.0116, |
| "step": 9710 |
| }, |
| { |
| "epoch": 6.62125340599455, |
| "grad_norm": 0.09371072053909302, |
| "learning_rate": 2.157211106924295e-07, |
| "loss": 0.0087, |
| "step": 9720 |
| }, |
| { |
| "epoch": 6.628065395095367, |
| "grad_norm": 0.1671672761440277, |
| "learning_rate": 2.006505894103672e-07, |
| "loss": 0.0107, |
| "step": 9730 |
| }, |
| { |
| "epoch": 6.6348773841961854, |
| "grad_norm": 0.1295129358768463, |
| "learning_rate": 1.8612466583489696e-07, |
| "loss": 0.0098, |
| "step": 9740 |
| }, |
| { |
| "epoch": 6.641689373297003, |
| "grad_norm": 0.2207920253276825, |
| "learning_rate": 1.7214349881918834e-07, |
| "loss": 0.0097, |
| "step": 9750 |
| }, |
| { |
| "epoch": 6.64850136239782, |
| "grad_norm": 0.130056232213974, |
| "learning_rate": 1.5870724125904845e-07, |
| "loss": 0.0081, |
| "step": 9760 |
| }, |
| { |
| "epoch": 6.655313351498638, |
| "grad_norm": 0.12633217871189117, |
| "learning_rate": 1.4581604009124006e-07, |
| "loss": 0.0096, |
| "step": 9770 |
| }, |
| { |
| "epoch": 6.662125340599455, |
| "grad_norm": 0.16835469007492065, |
| "learning_rate": 1.334700362918717e-07, |
| "loss": 0.0091, |
| "step": 9780 |
| }, |
| { |
| "epoch": 6.668937329700272, |
| "grad_norm": 0.26601773500442505, |
| "learning_rate": 1.2166936487486015e-07, |
| "loss": 0.0104, |
| "step": 9790 |
| }, |
| { |
| "epoch": 6.67574931880109, |
| "grad_norm": 0.15718552470207214, |
| "learning_rate": 1.1041415489045914e-07, |
| "loss": 0.0089, |
| "step": 9800 |
| }, |
| { |
| "epoch": 6.682561307901907, |
| "grad_norm": 0.14041031897068024, |
| "learning_rate": 9.970452942384412e-08, |
| "loss": 0.0104, |
| "step": 9810 |
| }, |
| { |
| "epoch": 6.689373297002724, |
| "grad_norm": 0.10807531327009201, |
| "learning_rate": 8.954060559375754e-08, |
| "loss": 0.0087, |
| "step": 9820 |
| }, |
| { |
| "epoch": 6.6961852861035425, |
| "grad_norm": 0.13568098843097687, |
| "learning_rate": 7.99224945512489e-08, |
| "loss": 0.0159, |
| "step": 9830 |
| }, |
| { |
| "epoch": 6.70299727520436, |
| "grad_norm": 0.306471049785614, |
| "learning_rate": 7.085030147843675e-08, |
| "loss": 0.0124, |
| "step": 9840 |
| }, |
| { |
| "epoch": 6.709809264305177, |
| "grad_norm": 0.14044924080371857, |
| "learning_rate": 6.232412558736523e-08, |
| "loss": 0.0117, |
| "step": 9850 |
| }, |
| { |
| "epoch": 6.716621253405995, |
| "grad_norm": 0.14973674714565277, |
| "learning_rate": 5.434406011893822e-08, |
| "loss": 0.0139, |
| "step": 9860 |
| }, |
| { |
| "epoch": 6.723433242506812, |
| "grad_norm": 0.10210314393043518, |
| "learning_rate": 4.6910192341864664e-08, |
| "loss": 0.0078, |
| "step": 9870 |
| }, |
| { |
| "epoch": 6.730245231607629, |
| "grad_norm": 0.15292491018772125, |
| "learning_rate": 4.0022603551737035e-08, |
| "loss": 0.0099, |
| "step": 9880 |
| }, |
| { |
| "epoch": 6.737057220708447, |
| "grad_norm": 0.17868728935718536, |
| "learning_rate": 3.3681369070120985e-08, |
| "loss": 0.012, |
| "step": 9890 |
| }, |
| { |
| "epoch": 6.743869209809264, |
| "grad_norm": 0.17693090438842773, |
| "learning_rate": 2.7886558243744866e-08, |
| "loss": 0.0112, |
| "step": 9900 |
| }, |
| { |
| "epoch": 6.7506811989100814, |
| "grad_norm": 0.1320875883102417, |
| "learning_rate": 2.2638234443722596e-08, |
| "loss": 0.0096, |
| "step": 9910 |
| }, |
| { |
| "epoch": 6.7574931880108995, |
| "grad_norm": 0.1211492195725441, |
| "learning_rate": 1.7936455064887504e-08, |
| "loss": 0.013, |
| "step": 9920 |
| }, |
| { |
| "epoch": 6.764305177111717, |
| "grad_norm": 0.1284903734922409, |
| "learning_rate": 1.378127152514841e-08, |
| "loss": 0.0066, |
| "step": 9930 |
| }, |
| { |
| "epoch": 6.771117166212534, |
| "grad_norm": 0.12337515503168106, |
| "learning_rate": 1.0172729264917857e-08, |
| "loss": 0.0118, |
| "step": 9940 |
| }, |
| { |
| "epoch": 6.777929155313352, |
| "grad_norm": 0.15872040390968323, |
| "learning_rate": 7.1108677466458215e-09, |
| "loss": 0.0107, |
| "step": 9950 |
| }, |
| { |
| "epoch": 6.784741144414169, |
| "grad_norm": 0.13814593851566315, |
| "learning_rate": 4.595720454353414e-09, |
| "loss": 0.0119, |
| "step": 9960 |
| }, |
| { |
| "epoch": 6.791553133514986, |
| "grad_norm": 0.16548724472522736, |
| "learning_rate": 2.627314893294264e-09, |
| "loss": 0.008, |
| "step": 9970 |
| }, |
| { |
| "epoch": 6.798365122615804, |
| "grad_norm": 0.16446246206760406, |
| "learning_rate": 1.2056725896270048e-09, |
| "loss": 0.0097, |
| "step": 9980 |
| }, |
| { |
| "epoch": 6.805177111716621, |
| "grad_norm": 0.1332317590713501, |
| "learning_rate": 3.308090902098826e-10, |
| "loss": 0.0099, |
| "step": 9990 |
| }, |
| { |
| "epoch": 6.8119891008174385, |
| "grad_norm": 0.15192656219005585, |
| "learning_rate": 2.7339624120159555e-12, |
| "loss": 0.01, |
| "step": 10000 |
| }, |
| { |
| "epoch": 6.8119891008174385, |
| "step": 10000, |
| "total_flos": 0.0, |
| "train_loss": 0.0075618208244442936, |
| "train_runtime": 3810.2859, |
| "train_samples_per_second": 83.983, |
| "train_steps_per_second": 2.624 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 10000, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 7, |
| "save_steps": 1000, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 0.0, |
| "train_batch_size": 32, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|