| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 3.0, |
| "eval_steps": 500, |
| "global_step": 3651, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0008221993833504625, |
| "grad_norm": 0.9403873682022095, |
| "learning_rate": 0.0, |
| "loss": 0.8484, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.0041109969167523125, |
| "grad_norm": 0.8725608587265015, |
| "learning_rate": 4.3715846994535524e-07, |
| "loss": 0.9215, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.008221993833504625, |
| "grad_norm": 0.8453531861305237, |
| "learning_rate": 9.836065573770493e-07, |
| "loss": 0.9604, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.012332990750256937, |
| "grad_norm": 0.7130156755447388, |
| "learning_rate": 1.5300546448087432e-06, |
| "loss": 0.8716, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.01644398766700925, |
| "grad_norm": 1.148436427116394, |
| "learning_rate": 2.0765027322404376e-06, |
| "loss": 0.9847, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.020554984583761562, |
| "grad_norm": 1.012150764465332, |
| "learning_rate": 2.6229508196721314e-06, |
| "loss": 0.8876, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.024665981500513873, |
| "grad_norm": 0.9746986627578735, |
| "learning_rate": 3.1693989071038255e-06, |
| "loss": 0.9536, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.02877697841726619, |
| "grad_norm": 1.1652287244796753, |
| "learning_rate": 3.7158469945355197e-06, |
| "loss": 0.9013, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.0328879753340185, |
| "grad_norm": 0.9780025482177734, |
| "learning_rate": 4.2622950819672135e-06, |
| "loss": 0.8908, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.03699897225077081, |
| "grad_norm": 0.9223105907440186, |
| "learning_rate": 4.808743169398907e-06, |
| "loss": 0.821, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.041109969167523124, |
| "grad_norm": 0.7905207276344299, |
| "learning_rate": 5.355191256830602e-06, |
| "loss": 0.8651, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.045220966084275435, |
| "grad_norm": 0.7562019228935242, |
| "learning_rate": 5.9016393442622956e-06, |
| "loss": 0.7358, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.04933196300102775, |
| "grad_norm": 0.38864654302597046, |
| "learning_rate": 6.44808743169399e-06, |
| "loss": 0.764, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.05344295991778006, |
| "grad_norm": 0.6753324866294861, |
| "learning_rate": 6.994535519125684e-06, |
| "loss": 0.8409, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.05755395683453238, |
| "grad_norm": 0.4633980691432953, |
| "learning_rate": 7.540983606557377e-06, |
| "loss": 0.6562, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.06166495375128469, |
| "grad_norm": 0.41575130820274353, |
| "learning_rate": 8.087431693989072e-06, |
| "loss": 0.7852, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.065775950668037, |
| "grad_norm": 0.6721888184547424, |
| "learning_rate": 8.633879781420765e-06, |
| "loss": 0.7728, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.0698869475847893, |
| "grad_norm": 0.7128573060035706, |
| "learning_rate": 9.18032786885246e-06, |
| "loss": 0.6927, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.07399794450154162, |
| "grad_norm": 0.4412561058998108, |
| "learning_rate": 9.726775956284153e-06, |
| "loss": 0.6382, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.07810894141829394, |
| "grad_norm": 0.501618504524231, |
| "learning_rate": 1.0273224043715849e-05, |
| "loss": 0.685, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.08221993833504625, |
| "grad_norm": 0.3464237153530121, |
| "learning_rate": 1.0819672131147544e-05, |
| "loss": 0.6624, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.08633093525179857, |
| "grad_norm": 0.5196655988693237, |
| "learning_rate": 1.1366120218579235e-05, |
| "loss": 0.6127, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.09044193216855087, |
| "grad_norm": 0.5325089693069458, |
| "learning_rate": 1.191256830601093e-05, |
| "loss": 0.5875, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.09455292908530319, |
| "grad_norm": 0.30435436964035034, |
| "learning_rate": 1.2459016393442624e-05, |
| "loss": 0.5136, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.0986639260020555, |
| "grad_norm": 0.7467198371887207, |
| "learning_rate": 1.3005464480874317e-05, |
| "loss": 0.5921, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.10277492291880781, |
| "grad_norm": 0.7135942578315735, |
| "learning_rate": 1.3551912568306011e-05, |
| "loss": 0.5228, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.10688591983556012, |
| "grad_norm": 0.663159966468811, |
| "learning_rate": 1.4098360655737706e-05, |
| "loss": 0.5076, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.11099691675231244, |
| "grad_norm": 0.5868617296218872, |
| "learning_rate": 1.46448087431694e-05, |
| "loss": 0.4551, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.11510791366906475, |
| "grad_norm": 0.6190376877784729, |
| "learning_rate": 1.5191256830601094e-05, |
| "loss": 0.5692, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.11921891058581706, |
| "grad_norm": 0.8358873724937439, |
| "learning_rate": 1.5737704918032788e-05, |
| "loss": 0.5348, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.12332990750256938, |
| "grad_norm": 0.6471666693687439, |
| "learning_rate": 1.628415300546448e-05, |
| "loss": 0.4363, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.12744090441932168, |
| "grad_norm": 0.5585145354270935, |
| "learning_rate": 1.6830601092896177e-05, |
| "loss": 0.4181, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.131551901336074, |
| "grad_norm": 0.5833807587623596, |
| "learning_rate": 1.737704918032787e-05, |
| "loss": 0.4133, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.13566289825282632, |
| "grad_norm": 0.7664275169372559, |
| "learning_rate": 1.7923497267759563e-05, |
| "loss": 0.5059, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.1397738951695786, |
| "grad_norm": 0.7638784050941467, |
| "learning_rate": 1.846994535519126e-05, |
| "loss": 0.3536, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.14388489208633093, |
| "grad_norm": 0.5982478857040405, |
| "learning_rate": 1.9016393442622952e-05, |
| "loss": 0.4113, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.14799588900308325, |
| "grad_norm": 0.7887367606163025, |
| "learning_rate": 1.9562841530054645e-05, |
| "loss": 0.3836, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.15210688591983557, |
| "grad_norm": 0.6601587533950806, |
| "learning_rate": 1.9999995896905283e-05, |
| "loss": 0.4506, |
| "step": 185 |
| }, |
| { |
| "epoch": 0.15621788283658788, |
| "grad_norm": 0.7067707180976868, |
| "learning_rate": 1.9999852288943748e-05, |
| "loss": 0.4673, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.16032887975334018, |
| "grad_norm": 0.6842494606971741, |
| "learning_rate": 1.9999503529613444e-05, |
| "loss": 0.425, |
| "step": 195 |
| }, |
| { |
| "epoch": 0.1644398766700925, |
| "grad_norm": 0.7092298865318298, |
| "learning_rate": 1.999894962606933e-05, |
| "loss": 0.3451, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.1685508735868448, |
| "grad_norm": 0.9632850289344788, |
| "learning_rate": 1.999819058967497e-05, |
| "loss": 0.3734, |
| "step": 205 |
| }, |
| { |
| "epoch": 0.17266187050359713, |
| "grad_norm": 0.8995895385742188, |
| "learning_rate": 1.999722643600234e-05, |
| "loss": 0.4026, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.17677286742034942, |
| "grad_norm": 1.071627140045166, |
| "learning_rate": 1.9996057184831475e-05, |
| "loss": 0.3483, |
| "step": 215 |
| }, |
| { |
| "epoch": 0.18088386433710174, |
| "grad_norm": 0.7492429614067078, |
| "learning_rate": 1.9994682860150073e-05, |
| "loss": 0.3331, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.18499486125385406, |
| "grad_norm": 0.8149610161781311, |
| "learning_rate": 1.999310349015301e-05, |
| "loss": 0.3281, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.18910585817060638, |
| "grad_norm": 0.8723487257957458, |
| "learning_rate": 1.9991319107241766e-05, |
| "loss": 0.4638, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.1932168550873587, |
| "grad_norm": 0.8918459415435791, |
| "learning_rate": 1.9989329748023728e-05, |
| "loss": 0.3805, |
| "step": 235 |
| }, |
| { |
| "epoch": 0.197327852004111, |
| "grad_norm": 0.7838431596755981, |
| "learning_rate": 1.998713545331148e-05, |
| "loss": 0.3863, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.2014388489208633, |
| "grad_norm": 0.8793269991874695, |
| "learning_rate": 1.9984736268121944e-05, |
| "loss": 0.3898, |
| "step": 245 |
| }, |
| { |
| "epoch": 0.20554984583761562, |
| "grad_norm": 0.9354943633079529, |
| "learning_rate": 1.998213224167546e-05, |
| "loss": 0.371, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.20966084275436794, |
| "grad_norm": 0.8775899410247803, |
| "learning_rate": 1.997932342739478e-05, |
| "loss": 0.3388, |
| "step": 255 |
| }, |
| { |
| "epoch": 0.21377183967112023, |
| "grad_norm": 1.0165460109710693, |
| "learning_rate": 1.9976309882903957e-05, |
| "loss": 0.3304, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.21788283658787255, |
| "grad_norm": 0.9298381805419922, |
| "learning_rate": 1.9973091670027184e-05, |
| "loss": 0.3581, |
| "step": 265 |
| }, |
| { |
| "epoch": 0.22199383350462487, |
| "grad_norm": 0.853493332862854, |
| "learning_rate": 1.996966885478752e-05, |
| "loss": 0.3808, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.2261048304213772, |
| "grad_norm": 1.078824520111084, |
| "learning_rate": 1.996604150740552e-05, |
| "loss": 0.3559, |
| "step": 275 |
| }, |
| { |
| "epoch": 0.2302158273381295, |
| "grad_norm": 0.6552051305770874, |
| "learning_rate": 1.9962209702297807e-05, |
| "loss": 0.3194, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.2343268242548818, |
| "grad_norm": 1.0376273393630981, |
| "learning_rate": 1.995817351807556e-05, |
| "loss": 0.4223, |
| "step": 285 |
| }, |
| { |
| "epoch": 0.23843782117163412, |
| "grad_norm": 1.0463075637817383, |
| "learning_rate": 1.9953933037542864e-05, |
| "loss": 0.3439, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.24254881808838644, |
| "grad_norm": 0.997489869594574, |
| "learning_rate": 1.9949488347695044e-05, |
| "loss": 0.3264, |
| "step": 295 |
| }, |
| { |
| "epoch": 0.24665981500513876, |
| "grad_norm": 1.2122714519500732, |
| "learning_rate": 1.994483953971687e-05, |
| "loss": 0.2846, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.25077081192189105, |
| "grad_norm": 1.008628010749817, |
| "learning_rate": 1.9939986708980686e-05, |
| "loss": 0.2891, |
| "step": 305 |
| }, |
| { |
| "epoch": 0.25488180883864336, |
| "grad_norm": 0.8791748285293579, |
| "learning_rate": 1.993492995504444e-05, |
| "loss": 0.3377, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.2589928057553957, |
| "grad_norm": 0.9464241862297058, |
| "learning_rate": 1.9929669381649673e-05, |
| "loss": 0.3894, |
| "step": 315 |
| }, |
| { |
| "epoch": 0.263103802672148, |
| "grad_norm": 1.1983399391174316, |
| "learning_rate": 1.992420509671936e-05, |
| "loss": 0.3583, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.2672147995889003, |
| "grad_norm": 1.1101964712142944, |
| "learning_rate": 1.9918537212355704e-05, |
| "loss": 0.2783, |
| "step": 325 |
| }, |
| { |
| "epoch": 0.27132579650565264, |
| "grad_norm": 1.0709152221679688, |
| "learning_rate": 1.9912665844837855e-05, |
| "loss": 0.349, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.27543679342240496, |
| "grad_norm": 1.0815412998199463, |
| "learning_rate": 1.9906591114619494e-05, |
| "loss": 0.3074, |
| "step": 335 |
| }, |
| { |
| "epoch": 0.2795477903391572, |
| "grad_norm": 1.0269906520843506, |
| "learning_rate": 1.9900313146326384e-05, |
| "loss": 0.2844, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.28365878725590954, |
| "grad_norm": 1.0790280103683472, |
| "learning_rate": 1.989383206875381e-05, |
| "loss": 0.2912, |
| "step": 345 |
| }, |
| { |
| "epoch": 0.28776978417266186, |
| "grad_norm": 0.8625520467758179, |
| "learning_rate": 1.988714801486393e-05, |
| "loss": 0.2805, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.2918807810894142, |
| "grad_norm": 0.9437581896781921, |
| "learning_rate": 1.9880261121783046e-05, |
| "loss": 0.3312, |
| "step": 355 |
| }, |
| { |
| "epoch": 0.2959917780061665, |
| "grad_norm": 0.8448299765586853, |
| "learning_rate": 1.98731715307988e-05, |
| "loss": 0.252, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.3001027749229188, |
| "grad_norm": 0.7578240633010864, |
| "learning_rate": 1.9865879387357272e-05, |
| "loss": 0.3417, |
| "step": 365 |
| }, |
| { |
| "epoch": 0.30421377183967113, |
| "grad_norm": 1.0512038469314575, |
| "learning_rate": 1.985838484105999e-05, |
| "loss": 0.3185, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.30832476875642345, |
| "grad_norm": 1.0025371313095093, |
| "learning_rate": 1.985068804566087e-05, |
| "loss": 0.3161, |
| "step": 375 |
| }, |
| { |
| "epoch": 0.31243576567317577, |
| "grad_norm": 0.8572078347206116, |
| "learning_rate": 1.9842789159063056e-05, |
| "loss": 0.328, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.31654676258992803, |
| "grad_norm": 1.0248758792877197, |
| "learning_rate": 1.983468834331568e-05, |
| "loss": 0.3121, |
| "step": 385 |
| }, |
| { |
| "epoch": 0.32065775950668035, |
| "grad_norm": 0.899937629699707, |
| "learning_rate": 1.9826385764610542e-05, |
| "loss": 0.3092, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.32476875642343267, |
| "grad_norm": 0.9734247922897339, |
| "learning_rate": 1.9817881593278695e-05, |
| "loss": 0.2803, |
| "step": 395 |
| }, |
| { |
| "epoch": 0.328879753340185, |
| "grad_norm": 0.99278324842453, |
| "learning_rate": 1.9809176003786953e-05, |
| "loss": 0.3602, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.3329907502569373, |
| "grad_norm": 0.9903774857521057, |
| "learning_rate": 1.980026917473432e-05, |
| "loss": 0.2851, |
| "step": 405 |
| }, |
| { |
| "epoch": 0.3371017471736896, |
| "grad_norm": 0.9723519682884216, |
| "learning_rate": 1.979116128884831e-05, |
| "loss": 0.3119, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.34121274409044194, |
| "grad_norm": 1.1728214025497437, |
| "learning_rate": 1.9781852532981204e-05, |
| "loss": 0.2933, |
| "step": 415 |
| }, |
| { |
| "epoch": 0.34532374100719426, |
| "grad_norm": 1.1374309062957764, |
| "learning_rate": 1.9772343098106227e-05, |
| "loss": 0.2955, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.3494347379239466, |
| "grad_norm": 1.145560622215271, |
| "learning_rate": 1.9762633179313625e-05, |
| "loss": 0.3452, |
| "step": 425 |
| }, |
| { |
| "epoch": 0.35354573484069884, |
| "grad_norm": 1.1968661546707153, |
| "learning_rate": 1.9752722975806643e-05, |
| "loss": 0.3625, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.35765673175745116, |
| "grad_norm": 1.1781413555145264, |
| "learning_rate": 1.9742612690897473e-05, |
| "loss": 0.2798, |
| "step": 435 |
| }, |
| { |
| "epoch": 0.3617677286742035, |
| "grad_norm": 1.2856999635696411, |
| "learning_rate": 1.973230253200305e-05, |
| "loss": 0.265, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.3658787255909558, |
| "grad_norm": 1.1101672649383545, |
| "learning_rate": 1.972179271064083e-05, |
| "loss": 0.2528, |
| "step": 445 |
| }, |
| { |
| "epoch": 0.3699897225077081, |
| "grad_norm": 1.117969274520874, |
| "learning_rate": 1.971108344242441e-05, |
| "loss": 0.3159, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.37410071942446044, |
| "grad_norm": 1.3405277729034424, |
| "learning_rate": 1.9700174947059146e-05, |
| "loss": 0.2576, |
| "step": 455 |
| }, |
| { |
| "epoch": 0.37821171634121276, |
| "grad_norm": 1.0911766290664673, |
| "learning_rate": 1.968906744833762e-05, |
| "loss": 0.2896, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.3823227132579651, |
| "grad_norm": 0.8619614243507385, |
| "learning_rate": 1.967776117413505e-05, |
| "loss": 0.2734, |
| "step": 465 |
| }, |
| { |
| "epoch": 0.3864337101747174, |
| "grad_norm": 0.9228631258010864, |
| "learning_rate": 1.9666256356404628e-05, |
| "loss": 0.295, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.39054470709146966, |
| "grad_norm": 1.2374449968338013, |
| "learning_rate": 1.9654553231172748e-05, |
| "loss": 0.2974, |
| "step": 475 |
| }, |
| { |
| "epoch": 0.394655704008222, |
| "grad_norm": 1.177212119102478, |
| "learning_rate": 1.9642652038534174e-05, |
| "loss": 0.2841, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.3987667009249743, |
| "grad_norm": 0.7735481262207031, |
| "learning_rate": 1.9630553022647113e-05, |
| "loss": 0.226, |
| "step": 485 |
| }, |
| { |
| "epoch": 0.4028776978417266, |
| "grad_norm": 1.1202296018600464, |
| "learning_rate": 1.961825643172819e-05, |
| "loss": 0.3067, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.40698869475847893, |
| "grad_norm": 1.363702654838562, |
| "learning_rate": 1.9605762518047387e-05, |
| "loss": 0.3293, |
| "step": 495 |
| }, |
| { |
| "epoch": 0.41109969167523125, |
| "grad_norm": 0.9886282682418823, |
| "learning_rate": 1.959307153792283e-05, |
| "loss": 0.2591, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.41521068859198357, |
| "grad_norm": 0.8387702703475952, |
| "learning_rate": 1.9580183751715563e-05, |
| "loss": 0.2815, |
| "step": 505 |
| }, |
| { |
| "epoch": 0.4193216855087359, |
| "grad_norm": 1.0631368160247803, |
| "learning_rate": 1.956709942382419e-05, |
| "loss": 0.2227, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.4234326824254882, |
| "grad_norm": 0.6852805614471436, |
| "learning_rate": 1.955381882267945e-05, |
| "loss": 0.2567, |
| "step": 515 |
| }, |
| { |
| "epoch": 0.42754367934224047, |
| "grad_norm": 1.046186089515686, |
| "learning_rate": 1.9540342220738726e-05, |
| "loss": 0.2553, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.4316546762589928, |
| "grad_norm": 0.8813861608505249, |
| "learning_rate": 1.952666989448043e-05, |
| "loss": 0.311, |
| "step": 525 |
| }, |
| { |
| "epoch": 0.4357656731757451, |
| "grad_norm": 1.0005453824996948, |
| "learning_rate": 1.9512802124398348e-05, |
| "loss": 0.3037, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.4398766700924974, |
| "grad_norm": 1.0475354194641113, |
| "learning_rate": 1.9498739194995885e-05, |
| "loss": 0.257, |
| "step": 535 |
| }, |
| { |
| "epoch": 0.44398766700924974, |
| "grad_norm": 1.3572496175765991, |
| "learning_rate": 1.9484481394780225e-05, |
| "loss": 0.2658, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.44809866392600206, |
| "grad_norm": 1.3119100332260132, |
| "learning_rate": 1.9470029016256417e-05, |
| "loss": 0.3076, |
| "step": 545 |
| }, |
| { |
| "epoch": 0.4522096608427544, |
| "grad_norm": 1.0053207874298096, |
| "learning_rate": 1.945538235592135e-05, |
| "loss": 0.2648, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.4563206577595067, |
| "grad_norm": 1.1991180181503296, |
| "learning_rate": 1.944054171425772e-05, |
| "loss": 0.2933, |
| "step": 555 |
| }, |
| { |
| "epoch": 0.460431654676259, |
| "grad_norm": 0.9475321173667908, |
| "learning_rate": 1.942550739572781e-05, |
| "loss": 0.3323, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.4645426515930113, |
| "grad_norm": 1.4173755645751953, |
| "learning_rate": 1.9410279708767282e-05, |
| "loss": 0.2799, |
| "step": 565 |
| }, |
| { |
| "epoch": 0.4686536485097636, |
| "grad_norm": 1.1307871341705322, |
| "learning_rate": 1.939485896577883e-05, |
| "loss": 0.2934, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.4727646454265159, |
| "grad_norm": 1.2060418128967285, |
| "learning_rate": 1.9379245483125783e-05, |
| "loss": 0.2336, |
| "step": 575 |
| }, |
| { |
| "epoch": 0.47687564234326824, |
| "grad_norm": 1.452938199043274, |
| "learning_rate": 1.9363439581125603e-05, |
| "loss": 0.2929, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.48098663926002055, |
| "grad_norm": 1.2634321451187134, |
| "learning_rate": 1.9347441584043325e-05, |
| "loss": 0.2966, |
| "step": 585 |
| }, |
| { |
| "epoch": 0.4850976361767729, |
| "grad_norm": 1.2690166234970093, |
| "learning_rate": 1.9331251820084897e-05, |
| "loss": 0.2765, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.4892086330935252, |
| "grad_norm": 1.231868028640747, |
| "learning_rate": 1.931487062139045e-05, |
| "loss": 0.2978, |
| "step": 595 |
| }, |
| { |
| "epoch": 0.4933196300102775, |
| "grad_norm": 1.2768539190292358, |
| "learning_rate": 1.9298298324027476e-05, |
| "loss": 0.2712, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.49743062692702983, |
| "grad_norm": 1.0039498805999756, |
| "learning_rate": 1.928153526798395e-05, |
| "loss": 0.2542, |
| "step": 605 |
| }, |
| { |
| "epoch": 0.5015416238437821, |
| "grad_norm": 0.9825944900512695, |
| "learning_rate": 1.9264581797161345e-05, |
| "loss": 0.2919, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.5056526207605344, |
| "grad_norm": 1.2589720487594604, |
| "learning_rate": 1.9247438259367562e-05, |
| "loss": 0.2647, |
| "step": 615 |
| }, |
| { |
| "epoch": 0.5097636176772867, |
| "grad_norm": 0.9228754639625549, |
| "learning_rate": 1.923010500630983e-05, |
| "loss": 0.314, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.513874614594039, |
| "grad_norm": 1.0735732316970825, |
| "learning_rate": 1.921258239358746e-05, |
| "loss": 0.2404, |
| "step": 625 |
| }, |
| { |
| "epoch": 0.5179856115107914, |
| "grad_norm": 1.3011178970336914, |
| "learning_rate": 1.919487078068455e-05, |
| "loss": 0.2818, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.5220966084275437, |
| "grad_norm": 1.1417852640151978, |
| "learning_rate": 1.9176970530962644e-05, |
| "loss": 0.2779, |
| "step": 635 |
| }, |
| { |
| "epoch": 0.526207605344296, |
| "grad_norm": 1.5373178720474243, |
| "learning_rate": 1.9158882011653233e-05, |
| "loss": 0.2582, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.5303186022610483, |
| "grad_norm": 1.3577611446380615, |
| "learning_rate": 1.914060559385025e-05, |
| "loss": 0.3093, |
| "step": 645 |
| }, |
| { |
| "epoch": 0.5344295991778006, |
| "grad_norm": 1.2170337438583374, |
| "learning_rate": 1.9122141652502442e-05, |
| "loss": 0.255, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.538540596094553, |
| "grad_norm": 1.4195996522903442, |
| "learning_rate": 1.9103490566405694e-05, |
| "loss": 0.2874, |
| "step": 655 |
| }, |
| { |
| "epoch": 0.5426515930113053, |
| "grad_norm": 1.1771873235702515, |
| "learning_rate": 1.9084652718195237e-05, |
| "loss": 0.2817, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.5467625899280576, |
| "grad_norm": 1.158410668373108, |
| "learning_rate": 1.906562849433782e-05, |
| "loss": 0.2421, |
| "step": 665 |
| }, |
| { |
| "epoch": 0.5508735868448099, |
| "grad_norm": 1.0603723526000977, |
| "learning_rate": 1.9046418285123755e-05, |
| "loss": 0.2557, |
| "step": 670 |
| }, |
| { |
| "epoch": 0.5549845837615622, |
| "grad_norm": 1.2592222690582275, |
| "learning_rate": 1.9027022484658947e-05, |
| "loss": 0.2406, |
| "step": 675 |
| }, |
| { |
| "epoch": 0.5590955806783144, |
| "grad_norm": 0.9237696528434753, |
| "learning_rate": 1.9007441490856764e-05, |
| "loss": 0.2735, |
| "step": 680 |
| }, |
| { |
| "epoch": 0.5632065775950668, |
| "grad_norm": 1.1776955127716064, |
| "learning_rate": 1.8987675705429916e-05, |
| "loss": 0.2444, |
| "step": 685 |
| }, |
| { |
| "epoch": 0.5673175745118191, |
| "grad_norm": 1.037622332572937, |
| "learning_rate": 1.896772553388218e-05, |
| "loss": 0.2928, |
| "step": 690 |
| }, |
| { |
| "epoch": 0.5714285714285714, |
| "grad_norm": 1.2918436527252197, |
| "learning_rate": 1.8947591385500104e-05, |
| "loss": 0.2912, |
| "step": 695 |
| }, |
| { |
| "epoch": 0.5755395683453237, |
| "grad_norm": 0.9499860405921936, |
| "learning_rate": 1.89272736733446e-05, |
| "loss": 0.2149, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.579650565262076, |
| "grad_norm": 0.8249284029006958, |
| "learning_rate": 1.8906772814242472e-05, |
| "loss": 0.2664, |
| "step": 705 |
| }, |
| { |
| "epoch": 0.5837615621788284, |
| "grad_norm": 0.9467989802360535, |
| "learning_rate": 1.8886089228777858e-05, |
| "loss": 0.2239, |
| "step": 710 |
| }, |
| { |
| "epoch": 0.5878725590955807, |
| "grad_norm": 0.9601849317550659, |
| "learning_rate": 1.8865223341283618e-05, |
| "loss": 0.2638, |
| "step": 715 |
| }, |
| { |
| "epoch": 0.591983556012333, |
| "grad_norm": 1.234498381614685, |
| "learning_rate": 1.8844175579832613e-05, |
| "loss": 0.2486, |
| "step": 720 |
| }, |
| { |
| "epoch": 0.5960945529290853, |
| "grad_norm": 1.1814972162246704, |
| "learning_rate": 1.8822946376228926e-05, |
| "loss": 0.3067, |
| "step": 725 |
| }, |
| { |
| "epoch": 0.6002055498458376, |
| "grad_norm": 1.0824075937271118, |
| "learning_rate": 1.8801536165999008e-05, |
| "loss": 0.2663, |
| "step": 730 |
| }, |
| { |
| "epoch": 0.60431654676259, |
| "grad_norm": 1.0348601341247559, |
| "learning_rate": 1.8779945388382742e-05, |
| "loss": 0.2127, |
| "step": 735 |
| }, |
| { |
| "epoch": 0.6084275436793423, |
| "grad_norm": 1.0171630382537842, |
| "learning_rate": 1.875817448632443e-05, |
| "loss": 0.2737, |
| "step": 740 |
| }, |
| { |
| "epoch": 0.6125385405960946, |
| "grad_norm": 0.9645372033119202, |
| "learning_rate": 1.8736223906463698e-05, |
| "loss": 0.1949, |
| "step": 745 |
| }, |
| { |
| "epoch": 0.6166495375128469, |
| "grad_norm": 1.3011705875396729, |
| "learning_rate": 1.8714094099126353e-05, |
| "loss": 0.2427, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.6207605344295992, |
| "grad_norm": 1.6664115190505981, |
| "learning_rate": 1.8691785518315132e-05, |
| "loss": 0.2432, |
| "step": 755 |
| }, |
| { |
| "epoch": 0.6248715313463515, |
| "grad_norm": 1.0895479917526245, |
| "learning_rate": 1.866929862170038e-05, |
| "loss": 0.2512, |
| "step": 760 |
| }, |
| { |
| "epoch": 0.6289825282631039, |
| "grad_norm": 1.159765362739563, |
| "learning_rate": 1.864663387061067e-05, |
| "loss": 0.2373, |
| "step": 765 |
| }, |
| { |
| "epoch": 0.6330935251798561, |
| "grad_norm": 1.1278481483459473, |
| "learning_rate": 1.8623791730023347e-05, |
| "loss": 0.2669, |
| "step": 770 |
| }, |
| { |
| "epoch": 0.6372045220966084, |
| "grad_norm": 1.1040396690368652, |
| "learning_rate": 1.860077266855497e-05, |
| "loss": 0.2374, |
| "step": 775 |
| }, |
| { |
| "epoch": 0.6413155190133607, |
| "grad_norm": 1.3087905645370483, |
| "learning_rate": 1.8577577158451713e-05, |
| "loss": 0.2313, |
| "step": 780 |
| }, |
| { |
| "epoch": 0.645426515930113, |
| "grad_norm": 1.0294170379638672, |
| "learning_rate": 1.8554205675579665e-05, |
| "loss": 0.2876, |
| "step": 785 |
| }, |
| { |
| "epoch": 0.6495375128468653, |
| "grad_norm": 1.6003506183624268, |
| "learning_rate": 1.8530658699415088e-05, |
| "loss": 0.2334, |
| "step": 790 |
| }, |
| { |
| "epoch": 0.6536485097636177, |
| "grad_norm": 1.433346152305603, |
| "learning_rate": 1.850693671303455e-05, |
| "loss": 0.2283, |
| "step": 795 |
| }, |
| { |
| "epoch": 0.65775950668037, |
| "grad_norm": 1.0867021083831787, |
| "learning_rate": 1.8483040203105038e-05, |
| "loss": 0.2255, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.6618705035971223, |
| "grad_norm": 1.2296451330184937, |
| "learning_rate": 1.8458969659873966e-05, |
| "loss": 0.2523, |
| "step": 805 |
| }, |
| { |
| "epoch": 0.6659815005138746, |
| "grad_norm": 1.2868030071258545, |
| "learning_rate": 1.843472557715912e-05, |
| "loss": 0.2133, |
| "step": 810 |
| }, |
| { |
| "epoch": 0.6700924974306269, |
| "grad_norm": 1.3351691961288452, |
| "learning_rate": 1.841030845233852e-05, |
| "loss": 0.205, |
| "step": 815 |
| }, |
| { |
| "epoch": 0.6742034943473793, |
| "grad_norm": 1.3833565711975098, |
| "learning_rate": 1.8385718786340216e-05, |
| "loss": 0.2426, |
| "step": 820 |
| }, |
| { |
| "epoch": 0.6783144912641316, |
| "grad_norm": 0.7566655278205872, |
| "learning_rate": 1.8360957083632037e-05, |
| "loss": 0.2464, |
| "step": 825 |
| }, |
| { |
| "epoch": 0.6824254881808839, |
| "grad_norm": 1.2757395505905151, |
| "learning_rate": 1.8336023852211197e-05, |
| "loss": 0.2916, |
| "step": 830 |
| }, |
| { |
| "epoch": 0.6865364850976362, |
| "grad_norm": 1.3110612630844116, |
| "learning_rate": 1.831091960359391e-05, |
| "loss": 0.2279, |
| "step": 835 |
| }, |
| { |
| "epoch": 0.6906474820143885, |
| "grad_norm": 1.1281332969665527, |
| "learning_rate": 1.828564485280488e-05, |
| "loss": 0.2405, |
| "step": 840 |
| }, |
| { |
| "epoch": 0.6947584789311408, |
| "grad_norm": 0.971794068813324, |
| "learning_rate": 1.826020011836674e-05, |
| "loss": 0.2175, |
| "step": 845 |
| }, |
| { |
| "epoch": 0.6988694758478932, |
| "grad_norm": 1.0543270111083984, |
| "learning_rate": 1.8234585922289408e-05, |
| "loss": 0.1993, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.7029804727646455, |
| "grad_norm": 0.9650211334228516, |
| "learning_rate": 1.8208802790059383e-05, |
| "loss": 0.2252, |
| "step": 855 |
| }, |
| { |
| "epoch": 0.7070914696813977, |
| "grad_norm": 1.2143207788467407, |
| "learning_rate": 1.818285125062897e-05, |
| "loss": 0.2717, |
| "step": 860 |
| }, |
| { |
| "epoch": 0.71120246659815, |
| "grad_norm": 1.3001195192337036, |
| "learning_rate": 1.815673183640541e-05, |
| "loss": 0.2537, |
| "step": 865 |
| }, |
| { |
| "epoch": 0.7153134635149023, |
| "grad_norm": 1.3198888301849365, |
| "learning_rate": 1.8130445083239982e-05, |
| "loss": 0.247, |
| "step": 870 |
| }, |
| { |
| "epoch": 0.7194244604316546, |
| "grad_norm": 1.1122859716415405, |
| "learning_rate": 1.8103991530416992e-05, |
| "loss": 0.2735, |
| "step": 875 |
| }, |
| { |
| "epoch": 0.723535457348407, |
| "grad_norm": 0.8387332558631897, |
| "learning_rate": 1.807737172064271e-05, |
| "loss": 0.2444, |
| "step": 880 |
| }, |
| { |
| "epoch": 0.7276464542651593, |
| "grad_norm": 0.7013024687767029, |
| "learning_rate": 1.805058620003424e-05, |
| "loss": 0.2819, |
| "step": 885 |
| }, |
| { |
| "epoch": 0.7317574511819116, |
| "grad_norm": 0.9751718640327454, |
| "learning_rate": 1.802363551810833e-05, |
| "loss": 0.222, |
| "step": 890 |
| }, |
| { |
| "epoch": 0.7358684480986639, |
| "grad_norm": 1.452439785003662, |
| "learning_rate": 1.7996520227770067e-05, |
| "loss": 0.2552, |
| "step": 895 |
| }, |
| { |
| "epoch": 0.7399794450154162, |
| "grad_norm": 1.1648454666137695, |
| "learning_rate": 1.7969240885301564e-05, |
| "loss": 0.2902, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.7440904419321686, |
| "grad_norm": 0.932353138923645, |
| "learning_rate": 1.7941798050350535e-05, |
| "loss": 0.2491, |
| "step": 905 |
| }, |
| { |
| "epoch": 0.7482014388489209, |
| "grad_norm": 1.0840809345245361, |
| "learning_rate": 1.7914192285918807e-05, |
| "loss": 0.2195, |
| "step": 910 |
| }, |
| { |
| "epoch": 0.7523124357656732, |
| "grad_norm": 1.059841513633728, |
| "learning_rate": 1.7886424158350784e-05, |
| "loss": 0.2218, |
| "step": 915 |
| }, |
| { |
| "epoch": 0.7564234326824255, |
| "grad_norm": 1.077287197113037, |
| "learning_rate": 1.785849423732182e-05, |
| "loss": 0.2158, |
| "step": 920 |
| }, |
| { |
| "epoch": 0.7605344295991778, |
| "grad_norm": 1.2055879831314087, |
| "learning_rate": 1.7830403095826527e-05, |
| "loss": 0.2193, |
| "step": 925 |
| }, |
| { |
| "epoch": 0.7646454265159301, |
| "grad_norm": 1.289574384689331, |
| "learning_rate": 1.7802151310167033e-05, |
| "loss": 0.2343, |
| "step": 930 |
| }, |
| { |
| "epoch": 0.7687564234326825, |
| "grad_norm": 1.0531070232391357, |
| "learning_rate": 1.777373945994115e-05, |
| "loss": 0.2344, |
| "step": 935 |
| }, |
| { |
| "epoch": 0.7728674203494348, |
| "grad_norm": 1.2793794870376587, |
| "learning_rate": 1.7745168128030483e-05, |
| "loss": 0.2019, |
| "step": 940 |
| }, |
| { |
| "epoch": 0.7769784172661871, |
| "grad_norm": 1.2498109340667725, |
| "learning_rate": 1.7716437900588475e-05, |
| "loss": 0.2167, |
| "step": 945 |
| }, |
| { |
| "epoch": 0.7810894141829393, |
| "grad_norm": 1.266842007637024, |
| "learning_rate": 1.7687549367028382e-05, |
| "loss": 0.2029, |
| "step": 950 |
| }, |
| { |
| "epoch": 0.7852004110996916, |
| "grad_norm": 1.0297633409500122, |
| "learning_rate": 1.7658503120011177e-05, |
| "loss": 0.2142, |
| "step": 955 |
| }, |
| { |
| "epoch": 0.789311408016444, |
| "grad_norm": 1.0781053304672241, |
| "learning_rate": 1.7629299755433396e-05, |
| "loss": 0.2336, |
| "step": 960 |
| }, |
| { |
| "epoch": 0.7934224049331963, |
| "grad_norm": 1.2344133853912354, |
| "learning_rate": 1.759993987241491e-05, |
| "loss": 0.3145, |
| "step": 965 |
| }, |
| { |
| "epoch": 0.7975334018499486, |
| "grad_norm": 1.3422881364822388, |
| "learning_rate": 1.7570424073286635e-05, |
| "loss": 0.2252, |
| "step": 970 |
| }, |
| { |
| "epoch": 0.8016443987667009, |
| "grad_norm": 1.1779459714889526, |
| "learning_rate": 1.7540752963578174e-05, |
| "loss": 0.2348, |
| "step": 975 |
| }, |
| { |
| "epoch": 0.8057553956834532, |
| "grad_norm": 0.9927631616592407, |
| "learning_rate": 1.7510927152005394e-05, |
| "loss": 0.2733, |
| "step": 980 |
| }, |
| { |
| "epoch": 0.8098663926002055, |
| "grad_norm": 1.211342215538025, |
| "learning_rate": 1.748094725045794e-05, |
| "loss": 0.2836, |
| "step": 985 |
| }, |
| { |
| "epoch": 0.8139773895169579, |
| "grad_norm": 1.2209789752960205, |
| "learning_rate": 1.7450813873986692e-05, |
| "loss": 0.2094, |
| "step": 990 |
| }, |
| { |
| "epoch": 0.8180883864337102, |
| "grad_norm": 1.2045336961746216, |
| "learning_rate": 1.7420527640791108e-05, |
| "loss": 0.2166, |
| "step": 995 |
| }, |
| { |
| "epoch": 0.8221993833504625, |
| "grad_norm": 1.2108007669448853, |
| "learning_rate": 1.7390089172206594e-05, |
| "loss": 0.2397, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.8263103802672148, |
| "grad_norm": 1.415350079536438, |
| "learning_rate": 1.735949909269172e-05, |
| "loss": 0.2734, |
| "step": 1005 |
| }, |
| { |
| "epoch": 0.8304213771839671, |
| "grad_norm": 1.2366716861724854, |
| "learning_rate": 1.7328758029815425e-05, |
| "loss": 0.2218, |
| "step": 1010 |
| }, |
| { |
| "epoch": 0.8345323741007195, |
| "grad_norm": 1.1275813579559326, |
| "learning_rate": 1.7297866614244142e-05, |
| "loss": 0.1967, |
| "step": 1015 |
| }, |
| { |
| "epoch": 0.8386433710174718, |
| "grad_norm": 1.3222373723983765, |
| "learning_rate": 1.7266825479728843e-05, |
| "loss": 0.2229, |
| "step": 1020 |
| }, |
| { |
| "epoch": 0.8427543679342241, |
| "grad_norm": 1.0726741552352905, |
| "learning_rate": 1.7235635263092066e-05, |
| "loss": 0.2334, |
| "step": 1025 |
| }, |
| { |
| "epoch": 0.8468653648509764, |
| "grad_norm": 1.1619436740875244, |
| "learning_rate": 1.7204296604214818e-05, |
| "loss": 0.2326, |
| "step": 1030 |
| }, |
| { |
| "epoch": 0.8509763617677287, |
| "grad_norm": 0.8942896723747253, |
| "learning_rate": 1.7172810146023476e-05, |
| "loss": 0.2356, |
| "step": 1035 |
| }, |
| { |
| "epoch": 0.8550873586844809, |
| "grad_norm": 1.0873944759368896, |
| "learning_rate": 1.7141176534476586e-05, |
| "loss": 0.219, |
| "step": 1040 |
| }, |
| { |
| "epoch": 0.8591983556012333, |
| "grad_norm": 1.1331894397735596, |
| "learning_rate": 1.71093964185516e-05, |
| "loss": 0.2287, |
| "step": 1045 |
| }, |
| { |
| "epoch": 0.8633093525179856, |
| "grad_norm": 1.2221019268035889, |
| "learning_rate": 1.7077470450231573e-05, |
| "loss": 0.2129, |
| "step": 1050 |
| }, |
| { |
| "epoch": 0.8674203494347379, |
| "grad_norm": 1.3943604230880737, |
| "learning_rate": 1.7045399284491796e-05, |
| "loss": 0.1951, |
| "step": 1055 |
| }, |
| { |
| "epoch": 0.8715313463514902, |
| "grad_norm": 1.3927587270736694, |
| "learning_rate": 1.701318357928634e-05, |
| "loss": 0.2304, |
| "step": 1060 |
| }, |
| { |
| "epoch": 0.8756423432682425, |
| "grad_norm": 1.363983392715454, |
| "learning_rate": 1.698082399553457e-05, |
| "loss": 0.1764, |
| "step": 1065 |
| }, |
| { |
| "epoch": 0.8797533401849948, |
| "grad_norm": 0.9369764924049377, |
| "learning_rate": 1.694832119710758e-05, |
| "loss": 0.2145, |
| "step": 1070 |
| }, |
| { |
| "epoch": 0.8838643371017472, |
| "grad_norm": 1.4691601991653442, |
| "learning_rate": 1.691567585081458e-05, |
| "loss": 0.2607, |
| "step": 1075 |
| }, |
| { |
| "epoch": 0.8879753340184995, |
| "grad_norm": 1.1497533321380615, |
| "learning_rate": 1.6882888626389214e-05, |
| "loss": 0.1688, |
| "step": 1080 |
| }, |
| { |
| "epoch": 0.8920863309352518, |
| "grad_norm": 1.4440010786056519, |
| "learning_rate": 1.6849960196475808e-05, |
| "loss": 0.232, |
| "step": 1085 |
| }, |
| { |
| "epoch": 0.8961973278520041, |
| "grad_norm": 1.0219671726226807, |
| "learning_rate": 1.6816891236615588e-05, |
| "loss": 0.2776, |
| "step": 1090 |
| }, |
| { |
| "epoch": 0.9003083247687564, |
| "grad_norm": 1.03767728805542, |
| "learning_rate": 1.678368242523282e-05, |
| "loss": 0.212, |
| "step": 1095 |
| }, |
| { |
| "epoch": 0.9044193216855088, |
| "grad_norm": 1.0109082460403442, |
| "learning_rate": 1.675033444362087e-05, |
| "loss": 0.2467, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.9085303186022611, |
| "grad_norm": 1.099452257156372, |
| "learning_rate": 1.6716847975928256e-05, |
| "loss": 0.223, |
| "step": 1105 |
| }, |
| { |
| "epoch": 0.9126413155190134, |
| "grad_norm": 1.117277979850769, |
| "learning_rate": 1.668322370914459e-05, |
| "loss": 0.2235, |
| "step": 1110 |
| }, |
| { |
| "epoch": 0.9167523124357657, |
| "grad_norm": 1.1694763898849487, |
| "learning_rate": 1.66494623330865e-05, |
| "loss": 0.2628, |
| "step": 1115 |
| }, |
| { |
| "epoch": 0.920863309352518, |
| "grad_norm": 1.2822755575180054, |
| "learning_rate": 1.6615564540383465e-05, |
| "loss": 0.172, |
| "step": 1120 |
| }, |
| { |
| "epoch": 0.9249743062692704, |
| "grad_norm": 1.2870562076568604, |
| "learning_rate": 1.658153102646362e-05, |
| "loss": 0.2177, |
| "step": 1125 |
| }, |
| { |
| "epoch": 0.9290853031860226, |
| "grad_norm": 1.1713252067565918, |
| "learning_rate": 1.6547362489539473e-05, |
| "loss": 0.2219, |
| "step": 1130 |
| }, |
| { |
| "epoch": 0.9331963001027749, |
| "grad_norm": 0.9934049248695374, |
| "learning_rate": 1.651305963059358e-05, |
| "loss": 0.2257, |
| "step": 1135 |
| }, |
| { |
| "epoch": 0.9373072970195272, |
| "grad_norm": 1.4836572408676147, |
| "learning_rate": 1.6478623153364197e-05, |
| "loss": 0.2625, |
| "step": 1140 |
| }, |
| { |
| "epoch": 0.9414182939362795, |
| "grad_norm": 1.4694515466690063, |
| "learning_rate": 1.6444053764330794e-05, |
| "loss": 0.3092, |
| "step": 1145 |
| }, |
| { |
| "epoch": 0.9455292908530318, |
| "grad_norm": 0.8094273209571838, |
| "learning_rate": 1.64093521726996e-05, |
| "loss": 0.1948, |
| "step": 1150 |
| }, |
| { |
| "epoch": 0.9496402877697842, |
| "grad_norm": 1.1239687204360962, |
| "learning_rate": 1.637451909038903e-05, |
| "loss": 0.3174, |
| "step": 1155 |
| }, |
| { |
| "epoch": 0.9537512846865365, |
| "grad_norm": 1.182337999343872, |
| "learning_rate": 1.6339555232015093e-05, |
| "loss": 0.223, |
| "step": 1160 |
| }, |
| { |
| "epoch": 0.9578622816032888, |
| "grad_norm": 1.086424469947815, |
| "learning_rate": 1.6304461314876722e-05, |
| "loss": 0.2526, |
| "step": 1165 |
| }, |
| { |
| "epoch": 0.9619732785200411, |
| "grad_norm": 1.3039159774780273, |
| "learning_rate": 1.626923805894107e-05, |
| "loss": 0.2073, |
| "step": 1170 |
| }, |
| { |
| "epoch": 0.9660842754367934, |
| "grad_norm": 1.206239938735962, |
| "learning_rate": 1.6233886186828718e-05, |
| "loss": 0.2323, |
| "step": 1175 |
| }, |
| { |
| "epoch": 0.9701952723535457, |
| "grad_norm": 1.1538772583007812, |
| "learning_rate": 1.619840642379888e-05, |
| "loss": 0.2259, |
| "step": 1180 |
| }, |
| { |
| "epoch": 0.9743062692702981, |
| "grad_norm": 0.9316990375518799, |
| "learning_rate": 1.6162799497734508e-05, |
| "loss": 0.2018, |
| "step": 1185 |
| }, |
| { |
| "epoch": 0.9784172661870504, |
| "grad_norm": 1.386884093284607, |
| "learning_rate": 1.612706613912735e-05, |
| "loss": 0.2461, |
| "step": 1190 |
| }, |
| { |
| "epoch": 0.9825282631038027, |
| "grad_norm": 1.1741526126861572, |
| "learning_rate": 1.6091207081062973e-05, |
| "loss": 0.2356, |
| "step": 1195 |
| }, |
| { |
| "epoch": 0.986639260020555, |
| "grad_norm": 1.0439505577087402, |
| "learning_rate": 1.605522305920573e-05, |
| "loss": 0.1756, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.9907502569373073, |
| "grad_norm": 1.1836326122283936, |
| "learning_rate": 1.6019114811783663e-05, |
| "loss": 0.2192, |
| "step": 1205 |
| }, |
| { |
| "epoch": 0.9948612538540597, |
| "grad_norm": 1.37388014793396, |
| "learning_rate": 1.5982883079573354e-05, |
| "loss": 0.2178, |
| "step": 1210 |
| }, |
| { |
| "epoch": 0.998972250770812, |
| "grad_norm": 0.9809684157371521, |
| "learning_rate": 1.5946528605884717e-05, |
| "loss": 0.2096, |
| "step": 1215 |
| }, |
| { |
| "epoch": 1.0024665981500513, |
| "grad_norm": 1.4830330610275269, |
| "learning_rate": 1.5910052136545788e-05, |
| "loss": 0.1877, |
| "step": 1220 |
| }, |
| { |
| "epoch": 1.0065775950668037, |
| "grad_norm": 1.681783676147461, |
| "learning_rate": 1.5873454419887365e-05, |
| "loss": 0.2309, |
| "step": 1225 |
| }, |
| { |
| "epoch": 1.010688591983556, |
| "grad_norm": 1.1318143606185913, |
| "learning_rate": 1.5836736206727717e-05, |
| "loss": 0.2246, |
| "step": 1230 |
| }, |
| { |
| "epoch": 1.0147995889003083, |
| "grad_norm": 1.2023489475250244, |
| "learning_rate": 1.5799898250357134e-05, |
| "loss": 0.2398, |
| "step": 1235 |
| }, |
| { |
| "epoch": 1.0189105858170606, |
| "grad_norm": 1.1647961139678955, |
| "learning_rate": 1.5762941306522504e-05, |
| "loss": 0.2077, |
| "step": 1240 |
| }, |
| { |
| "epoch": 1.023021582733813, |
| "grad_norm": 1.2750186920166016, |
| "learning_rate": 1.5725866133411777e-05, |
| "loss": 0.2246, |
| "step": 1245 |
| }, |
| { |
| "epoch": 1.0271325796505653, |
| "grad_norm": 1.1721622943878174, |
| "learning_rate": 1.5688673491638452e-05, |
| "loss": 0.2273, |
| "step": 1250 |
| }, |
| { |
| "epoch": 1.0312435765673176, |
| "grad_norm": 0.9394551515579224, |
| "learning_rate": 1.565136414422592e-05, |
| "loss": 0.2112, |
| "step": 1255 |
| }, |
| { |
| "epoch": 1.03535457348407, |
| "grad_norm": 1.4058277606964111, |
| "learning_rate": 1.5613938856591867e-05, |
| "loss": 0.2574, |
| "step": 1260 |
| }, |
| { |
| "epoch": 1.0394655704008222, |
| "grad_norm": 1.388596534729004, |
| "learning_rate": 1.5576398396532538e-05, |
| "loss": 0.1818, |
| "step": 1265 |
| }, |
| { |
| "epoch": 1.0435765673175745, |
| "grad_norm": 1.1128935813903809, |
| "learning_rate": 1.5538743534206968e-05, |
| "loss": 0.1823, |
| "step": 1270 |
| }, |
| { |
| "epoch": 1.0476875642343269, |
| "grad_norm": 1.1479747295379639, |
| "learning_rate": 1.550097504212124e-05, |
| "loss": 0.2382, |
| "step": 1275 |
| }, |
| { |
| "epoch": 1.0517985611510792, |
| "grad_norm": 1.4205436706542969, |
| "learning_rate": 1.5463093695112572e-05, |
| "loss": 0.2548, |
| "step": 1280 |
| }, |
| { |
| "epoch": 1.0559095580678315, |
| "grad_norm": 0.9275251030921936, |
| "learning_rate": 1.542510027033347e-05, |
| "loss": 0.1469, |
| "step": 1285 |
| }, |
| { |
| "epoch": 1.0600205549845838, |
| "grad_norm": 1.2624469995498657, |
| "learning_rate": 1.5386995547235756e-05, |
| "loss": 0.2276, |
| "step": 1290 |
| }, |
| { |
| "epoch": 1.0641315519013361, |
| "grad_norm": 1.2979212999343872, |
| "learning_rate": 1.534878030755458e-05, |
| "loss": 0.1892, |
| "step": 1295 |
| }, |
| { |
| "epoch": 1.0682425488180884, |
| "grad_norm": 1.4504139423370361, |
| "learning_rate": 1.5310455335292404e-05, |
| "loss": 0.2041, |
| "step": 1300 |
| }, |
| { |
| "epoch": 1.0723535457348408, |
| "grad_norm": 1.2005679607391357, |
| "learning_rate": 1.527202141670289e-05, |
| "loss": 0.2174, |
| "step": 1305 |
| }, |
| { |
| "epoch": 1.076464542651593, |
| "grad_norm": 1.295580506324768, |
| "learning_rate": 1.5233479340274783e-05, |
| "loss": 0.1981, |
| "step": 1310 |
| }, |
| { |
| "epoch": 1.0805755395683454, |
| "grad_norm": 1.29360830783844, |
| "learning_rate": 1.5194829896715741e-05, |
| "loss": 0.2448, |
| "step": 1315 |
| }, |
| { |
| "epoch": 1.0846865364850977, |
| "grad_norm": 0.9235324263572693, |
| "learning_rate": 1.51560738789361e-05, |
| "loss": 0.2284, |
| "step": 1320 |
| }, |
| { |
| "epoch": 1.08879753340185, |
| "grad_norm": 1.3092626333236694, |
| "learning_rate": 1.5117212082032611e-05, |
| "loss": 0.2107, |
| "step": 1325 |
| }, |
| { |
| "epoch": 1.0929085303186024, |
| "grad_norm": 1.1513203382492065, |
| "learning_rate": 1.5078245303272133e-05, |
| "loss": 0.1805, |
| "step": 1330 |
| }, |
| { |
| "epoch": 1.0970195272353547, |
| "grad_norm": 1.2433505058288574, |
| "learning_rate": 1.5039174342075278e-05, |
| "loss": 0.2306, |
| "step": 1335 |
| }, |
| { |
| "epoch": 1.101130524152107, |
| "grad_norm": 1.0607112646102905, |
| "learning_rate": 1.5000000000000002e-05, |
| "loss": 0.2046, |
| "step": 1340 |
| }, |
| { |
| "epoch": 1.105241521068859, |
| "grad_norm": 1.30000638961792, |
| "learning_rate": 1.4960723080725164e-05, |
| "loss": 0.2182, |
| "step": 1345 |
| }, |
| { |
| "epoch": 1.1093525179856114, |
| "grad_norm": 1.1230967044830322, |
| "learning_rate": 1.492134439003404e-05, |
| "loss": 0.1732, |
| "step": 1350 |
| }, |
| { |
| "epoch": 1.1134635149023637, |
| "grad_norm": 1.5389747619628906, |
| "learning_rate": 1.4881864735797798e-05, |
| "loss": 0.232, |
| "step": 1355 |
| }, |
| { |
| "epoch": 1.117574511819116, |
| "grad_norm": 1.6007003784179688, |
| "learning_rate": 1.4842284927958908e-05, |
| "loss": 0.2694, |
| "step": 1360 |
| }, |
| { |
| "epoch": 1.1216855087358684, |
| "grad_norm": 1.1094415187835693, |
| "learning_rate": 1.4802605778514541e-05, |
| "loss": 0.1816, |
| "step": 1365 |
| }, |
| { |
| "epoch": 1.1257965056526207, |
| "grad_norm": 1.5207327604293823, |
| "learning_rate": 1.4762828101499902e-05, |
| "loss": 0.2355, |
| "step": 1370 |
| }, |
| { |
| "epoch": 1.129907502569373, |
| "grad_norm": 1.3849849700927734, |
| "learning_rate": 1.4722952712971535e-05, |
| "loss": 0.2039, |
| "step": 1375 |
| }, |
| { |
| "epoch": 1.1340184994861253, |
| "grad_norm": 1.5623657703399658, |
| "learning_rate": 1.4682980430990577e-05, |
| "loss": 0.2081, |
| "step": 1380 |
| }, |
| { |
| "epoch": 1.1381294964028776, |
| "grad_norm": 1.3388752937316895, |
| "learning_rate": 1.4642912075605976e-05, |
| "loss": 0.2469, |
| "step": 1385 |
| }, |
| { |
| "epoch": 1.14224049331963, |
| "grad_norm": 1.5565121173858643, |
| "learning_rate": 1.4602748468837669e-05, |
| "loss": 0.2172, |
| "step": 1390 |
| }, |
| { |
| "epoch": 1.1463514902363823, |
| "grad_norm": 1.3776781558990479, |
| "learning_rate": 1.456249043465972e-05, |
| "loss": 0.1848, |
| "step": 1395 |
| }, |
| { |
| "epoch": 1.1504624871531346, |
| "grad_norm": 1.578230857849121, |
| "learning_rate": 1.4522138798983408e-05, |
| "loss": 0.1939, |
| "step": 1400 |
| }, |
| { |
| "epoch": 1.154573484069887, |
| "grad_norm": 1.3362317085266113, |
| "learning_rate": 1.4481694389640291e-05, |
| "loss": 0.1807, |
| "step": 1405 |
| }, |
| { |
| "epoch": 1.1586844809866392, |
| "grad_norm": 1.7067232131958008, |
| "learning_rate": 1.444115803636522e-05, |
| "loss": 0.2204, |
| "step": 1410 |
| }, |
| { |
| "epoch": 1.1627954779033916, |
| "grad_norm": 1.0936253070831299, |
| "learning_rate": 1.440053057077931e-05, |
| "loss": 0.1973, |
| "step": 1415 |
| }, |
| { |
| "epoch": 1.1669064748201439, |
| "grad_norm": 1.5256032943725586, |
| "learning_rate": 1.4359812826372894e-05, |
| "loss": 0.2216, |
| "step": 1420 |
| }, |
| { |
| "epoch": 1.1710174717368962, |
| "grad_norm": 1.19940185546875, |
| "learning_rate": 1.4319005638488413e-05, |
| "loss": 0.2439, |
| "step": 1425 |
| }, |
| { |
| "epoch": 1.1751284686536485, |
| "grad_norm": 0.9025769233703613, |
| "learning_rate": 1.4278109844303271e-05, |
| "loss": 0.2084, |
| "step": 1430 |
| }, |
| { |
| "epoch": 1.1792394655704008, |
| "grad_norm": 1.2088083028793335, |
| "learning_rate": 1.4237126282812684e-05, |
| "loss": 0.2359, |
| "step": 1435 |
| }, |
| { |
| "epoch": 1.1833504624871531, |
| "grad_norm": 1.2607866525650024, |
| "learning_rate": 1.4196055794812437e-05, |
| "loss": 0.2318, |
| "step": 1440 |
| }, |
| { |
| "epoch": 1.1874614594039055, |
| "grad_norm": 1.3030301332473755, |
| "learning_rate": 1.4154899222881666e-05, |
| "loss": 0.1825, |
| "step": 1445 |
| }, |
| { |
| "epoch": 1.1915724563206578, |
| "grad_norm": 1.9711260795593262, |
| "learning_rate": 1.4113657411365546e-05, |
| "loss": 0.2345, |
| "step": 1450 |
| }, |
| { |
| "epoch": 1.19568345323741, |
| "grad_norm": 1.4095484018325806, |
| "learning_rate": 1.4072331206357986e-05, |
| "loss": 0.2242, |
| "step": 1455 |
| }, |
| { |
| "epoch": 1.1997944501541624, |
| "grad_norm": 1.1082674264907837, |
| "learning_rate": 1.4030921455684255e-05, |
| "loss": 0.2083, |
| "step": 1460 |
| }, |
| { |
| "epoch": 1.2039054470709147, |
| "grad_norm": 1.8573055267333984, |
| "learning_rate": 1.3989429008883613e-05, |
| "loss": 0.2097, |
| "step": 1465 |
| }, |
| { |
| "epoch": 1.208016443987667, |
| "grad_norm": 1.4647445678710938, |
| "learning_rate": 1.3947854717191853e-05, |
| "loss": 0.2139, |
| "step": 1470 |
| }, |
| { |
| "epoch": 1.2121274409044194, |
| "grad_norm": 1.2996141910552979, |
| "learning_rate": 1.390619943352386e-05, |
| "loss": 0.244, |
| "step": 1475 |
| }, |
| { |
| "epoch": 1.2162384378211717, |
| "grad_norm": 1.5465834140777588, |
| "learning_rate": 1.3864464012456103e-05, |
| "loss": 0.1681, |
| "step": 1480 |
| }, |
| { |
| "epoch": 1.220349434737924, |
| "grad_norm": 1.2666230201721191, |
| "learning_rate": 1.3822649310209106e-05, |
| "loss": 0.1859, |
| "step": 1485 |
| }, |
| { |
| "epoch": 1.2244604316546763, |
| "grad_norm": 0.884738028049469, |
| "learning_rate": 1.378075618462988e-05, |
| "loss": 0.2195, |
| "step": 1490 |
| }, |
| { |
| "epoch": 1.2285714285714286, |
| "grad_norm": 1.7185105085372925, |
| "learning_rate": 1.3738785495174325e-05, |
| "loss": 0.2417, |
| "step": 1495 |
| }, |
| { |
| "epoch": 1.232682425488181, |
| "grad_norm": 1.2726584672927856, |
| "learning_rate": 1.36967381028896e-05, |
| "loss": 0.1806, |
| "step": 1500 |
| }, |
| { |
| "epoch": 1.2367934224049333, |
| "grad_norm": 1.3861750364303589, |
| "learning_rate": 1.3654614870396455e-05, |
| "loss": 0.1957, |
| "step": 1505 |
| }, |
| { |
| "epoch": 1.2409044193216856, |
| "grad_norm": 1.6232922077178955, |
| "learning_rate": 1.3612416661871532e-05, |
| "loss": 0.2038, |
| "step": 1510 |
| }, |
| { |
| "epoch": 1.245015416238438, |
| "grad_norm": 1.4664605855941772, |
| "learning_rate": 1.3570144343029644e-05, |
| "loss": 0.2569, |
| "step": 1515 |
| }, |
| { |
| "epoch": 1.24912641315519, |
| "grad_norm": 1.2884601354599, |
| "learning_rate": 1.352779878110601e-05, |
| "loss": 0.1872, |
| "step": 1520 |
| }, |
| { |
| "epoch": 1.2532374100719426, |
| "grad_norm": 1.3926196098327637, |
| "learning_rate": 1.3485380844838461e-05, |
| "loss": 0.2134, |
| "step": 1525 |
| }, |
| { |
| "epoch": 1.2573484069886947, |
| "grad_norm": 1.4619618654251099, |
| "learning_rate": 1.3442891404449615e-05, |
| "loss": 0.2232, |
| "step": 1530 |
| }, |
| { |
| "epoch": 1.2614594039054472, |
| "grad_norm": 1.2975983619689941, |
| "learning_rate": 1.3400331331629038e-05, |
| "loss": 0.1845, |
| "step": 1535 |
| }, |
| { |
| "epoch": 1.2655704008221993, |
| "grad_norm": 1.7390352487564087, |
| "learning_rate": 1.3357701499515345e-05, |
| "loss": 0.2343, |
| "step": 1540 |
| }, |
| { |
| "epoch": 1.2696813977389518, |
| "grad_norm": 1.1968086957931519, |
| "learning_rate": 1.3315002782678299e-05, |
| "loss": 0.2185, |
| "step": 1545 |
| }, |
| { |
| "epoch": 1.273792394655704, |
| "grad_norm": 0.9323089718818665, |
| "learning_rate": 1.3272236057100849e-05, |
| "loss": 0.1602, |
| "step": 1550 |
| }, |
| { |
| "epoch": 1.2779033915724562, |
| "grad_norm": 1.4192270040512085, |
| "learning_rate": 1.3229402200161197e-05, |
| "loss": 0.2131, |
| "step": 1555 |
| }, |
| { |
| "epoch": 1.2820143884892086, |
| "grad_norm": 1.3882615566253662, |
| "learning_rate": 1.3186502090614752e-05, |
| "loss": 0.1807, |
| "step": 1560 |
| }, |
| { |
| "epoch": 1.2861253854059609, |
| "grad_norm": 1.6137700080871582, |
| "learning_rate": 1.3143536608576141e-05, |
| "loss": 0.1716, |
| "step": 1565 |
| }, |
| { |
| "epoch": 1.2902363823227132, |
| "grad_norm": 1.1685277223587036, |
| "learning_rate": 1.310050663550112e-05, |
| "loss": 0.1609, |
| "step": 1570 |
| }, |
| { |
| "epoch": 1.2943473792394655, |
| "grad_norm": 1.3866482973098755, |
| "learning_rate": 1.3057413054168525e-05, |
| "loss": 0.1936, |
| "step": 1575 |
| }, |
| { |
| "epoch": 1.2984583761562178, |
| "grad_norm": 1.498495101928711, |
| "learning_rate": 1.3014256748662127e-05, |
| "loss": 0.1979, |
| "step": 1580 |
| }, |
| { |
| "epoch": 1.3025693730729702, |
| "grad_norm": 1.0271029472351074, |
| "learning_rate": 1.2971038604352521e-05, |
| "loss": 0.1659, |
| "step": 1585 |
| }, |
| { |
| "epoch": 1.3066803699897225, |
| "grad_norm": 1.0579352378845215, |
| "learning_rate": 1.2927759507878954e-05, |
| "loss": 0.1897, |
| "step": 1590 |
| }, |
| { |
| "epoch": 1.3107913669064748, |
| "grad_norm": 1.6076267957687378, |
| "learning_rate": 1.2884420347131123e-05, |
| "loss": 0.1919, |
| "step": 1595 |
| }, |
| { |
| "epoch": 1.3149023638232271, |
| "grad_norm": 1.143676519393921, |
| "learning_rate": 1.284102201123098e-05, |
| "loss": 0.1931, |
| "step": 1600 |
| }, |
| { |
| "epoch": 1.3190133607399794, |
| "grad_norm": 1.077234148979187, |
| "learning_rate": 1.2797565390514478e-05, |
| "loss": 0.2089, |
| "step": 1605 |
| }, |
| { |
| "epoch": 1.3231243576567318, |
| "grad_norm": 1.0523713827133179, |
| "learning_rate": 1.2754051376513304e-05, |
| "loss": 0.1682, |
| "step": 1610 |
| }, |
| { |
| "epoch": 1.327235354573484, |
| "grad_norm": 1.4638235569000244, |
| "learning_rate": 1.27104808619366e-05, |
| "loss": 0.1963, |
| "step": 1615 |
| }, |
| { |
| "epoch": 1.3313463514902364, |
| "grad_norm": 1.4082380533218384, |
| "learning_rate": 1.2666854740652641e-05, |
| "loss": 0.2477, |
| "step": 1620 |
| }, |
| { |
| "epoch": 1.3354573484069887, |
| "grad_norm": 0.8287607431411743, |
| "learning_rate": 1.2623173907670494e-05, |
| "loss": 0.2151, |
| "step": 1625 |
| }, |
| { |
| "epoch": 1.339568345323741, |
| "grad_norm": 1.617641568183899, |
| "learning_rate": 1.2579439259121665e-05, |
| "loss": 0.2654, |
| "step": 1630 |
| }, |
| { |
| "epoch": 1.3436793422404933, |
| "grad_norm": 0.7832649350166321, |
| "learning_rate": 1.253565169224171e-05, |
| "loss": 0.1956, |
| "step": 1635 |
| }, |
| { |
| "epoch": 1.3477903391572457, |
| "grad_norm": 1.3806982040405273, |
| "learning_rate": 1.2491812105351824e-05, |
| "loss": 0.2043, |
| "step": 1640 |
| }, |
| { |
| "epoch": 1.351901336073998, |
| "grad_norm": 1.2232375144958496, |
| "learning_rate": 1.2447921397840417e-05, |
| "loss": 0.1707, |
| "step": 1645 |
| }, |
| { |
| "epoch": 1.3560123329907503, |
| "grad_norm": 1.5429998636245728, |
| "learning_rate": 1.240398047014466e-05, |
| "loss": 0.1683, |
| "step": 1650 |
| }, |
| { |
| "epoch": 1.3601233299075026, |
| "grad_norm": 1.6056349277496338, |
| "learning_rate": 1.2359990223732023e-05, |
| "loss": 0.239, |
| "step": 1655 |
| }, |
| { |
| "epoch": 1.364234326824255, |
| "grad_norm": 1.1654701232910156, |
| "learning_rate": 1.2315951561081754e-05, |
| "loss": 0.2667, |
| "step": 1660 |
| }, |
| { |
| "epoch": 1.3683453237410073, |
| "grad_norm": 2.054358959197998, |
| "learning_rate": 1.2271865385666394e-05, |
| "loss": 0.2238, |
| "step": 1665 |
| }, |
| { |
| "epoch": 1.3724563206577596, |
| "grad_norm": 1.4064335823059082, |
| "learning_rate": 1.222773260193323e-05, |
| "loss": 0.211, |
| "step": 1670 |
| }, |
| { |
| "epoch": 1.376567317574512, |
| "grad_norm": 1.4539635181427002, |
| "learning_rate": 1.2183554115285726e-05, |
| "loss": 0.1958, |
| "step": 1675 |
| }, |
| { |
| "epoch": 1.3806783144912642, |
| "grad_norm": 1.3629333972930908, |
| "learning_rate": 1.2139330832064975e-05, |
| "loss": 0.2158, |
| "step": 1680 |
| }, |
| { |
| "epoch": 1.3847893114080163, |
| "grad_norm": 0.8606613874435425, |
| "learning_rate": 1.2095063659531087e-05, |
| "loss": 0.1822, |
| "step": 1685 |
| }, |
| { |
| "epoch": 1.3889003083247689, |
| "grad_norm": 1.137616753578186, |
| "learning_rate": 1.2050753505844581e-05, |
| "loss": 0.2088, |
| "step": 1690 |
| }, |
| { |
| "epoch": 1.393011305241521, |
| "grad_norm": 1.4008034467697144, |
| "learning_rate": 1.2006401280047753e-05, |
| "loss": 0.1917, |
| "step": 1695 |
| }, |
| { |
| "epoch": 1.3971223021582735, |
| "grad_norm": 1.5580987930297852, |
| "learning_rate": 1.1962007892046017e-05, |
| "loss": 0.2043, |
| "step": 1700 |
| }, |
| { |
| "epoch": 1.4012332990750256, |
| "grad_norm": 1.2816071510314941, |
| "learning_rate": 1.191757425258927e-05, |
| "loss": 0.2047, |
| "step": 1705 |
| }, |
| { |
| "epoch": 1.4053442959917781, |
| "grad_norm": 1.567262053489685, |
| "learning_rate": 1.1873101273253167e-05, |
| "loss": 0.1961, |
| "step": 1710 |
| }, |
| { |
| "epoch": 1.4094552929085302, |
| "grad_norm": 1.2683942317962646, |
| "learning_rate": 1.1828589866420441e-05, |
| "loss": 0.1941, |
| "step": 1715 |
| }, |
| { |
| "epoch": 1.4135662898252828, |
| "grad_norm": 1.3658758401870728, |
| "learning_rate": 1.1784040945262185e-05, |
| "loss": 0.2007, |
| "step": 1720 |
| }, |
| { |
| "epoch": 1.4176772867420349, |
| "grad_norm": 1.0757827758789062, |
| "learning_rate": 1.173945542371912e-05, |
| "loss": 0.181, |
| "step": 1725 |
| }, |
| { |
| "epoch": 1.4217882836587872, |
| "grad_norm": 1.5418345928192139, |
| "learning_rate": 1.1694834216482827e-05, |
| "loss": 0.2132, |
| "step": 1730 |
| }, |
| { |
| "epoch": 1.4258992805755395, |
| "grad_norm": 1.3307230472564697, |
| "learning_rate": 1.1650178238977004e-05, |
| "loss": 0.1859, |
| "step": 1735 |
| }, |
| { |
| "epoch": 1.4300102774922918, |
| "grad_norm": 1.0295861959457397, |
| "learning_rate": 1.1605488407338674e-05, |
| "loss": 0.2017, |
| "step": 1740 |
| }, |
| { |
| "epoch": 1.4341212744090441, |
| "grad_norm": 0.7575131058692932, |
| "learning_rate": 1.1560765638399398e-05, |
| "loss": 0.2239, |
| "step": 1745 |
| }, |
| { |
| "epoch": 1.4382322713257965, |
| "grad_norm": 1.2658188343048096, |
| "learning_rate": 1.1516010849666446e-05, |
| "loss": 0.1836, |
| "step": 1750 |
| }, |
| { |
| "epoch": 1.4423432682425488, |
| "grad_norm": 1.6164782047271729, |
| "learning_rate": 1.1471224959304003e-05, |
| "loss": 0.1773, |
| "step": 1755 |
| }, |
| { |
| "epoch": 1.446454265159301, |
| "grad_norm": 1.5609745979309082, |
| "learning_rate": 1.1426408886114309e-05, |
| "loss": 0.1787, |
| "step": 1760 |
| }, |
| { |
| "epoch": 1.4505652620760534, |
| "grad_norm": 1.4867188930511475, |
| "learning_rate": 1.1381563549518823e-05, |
| "loss": 0.1734, |
| "step": 1765 |
| }, |
| { |
| "epoch": 1.4546762589928057, |
| "grad_norm": 1.335847020149231, |
| "learning_rate": 1.1336689869539352e-05, |
| "loss": 0.1931, |
| "step": 1770 |
| }, |
| { |
| "epoch": 1.458787255909558, |
| "grad_norm": 1.2672595977783203, |
| "learning_rate": 1.1291788766779179e-05, |
| "loss": 0.2077, |
| "step": 1775 |
| }, |
| { |
| "epoch": 1.4628982528263104, |
| "grad_norm": 1.1603102684020996, |
| "learning_rate": 1.1246861162404184e-05, |
| "loss": 0.2657, |
| "step": 1780 |
| }, |
| { |
| "epoch": 1.4670092497430627, |
| "grad_norm": 0.9221446514129639, |
| "learning_rate": 1.1201907978123933e-05, |
| "loss": 0.1829, |
| "step": 1785 |
| }, |
| { |
| "epoch": 1.471120246659815, |
| "grad_norm": 1.1459940671920776, |
| "learning_rate": 1.1156930136172776e-05, |
| "loss": 0.2014, |
| "step": 1790 |
| }, |
| { |
| "epoch": 1.4752312435765673, |
| "grad_norm": 1.452026605606079, |
| "learning_rate": 1.1111928559290928e-05, |
| "loss": 0.2048, |
| "step": 1795 |
| }, |
| { |
| "epoch": 1.4793422404933196, |
| "grad_norm": 1.1732393503189087, |
| "learning_rate": 1.1066904170705533e-05, |
| "loss": 0.21, |
| "step": 1800 |
| }, |
| { |
| "epoch": 1.483453237410072, |
| "grad_norm": 1.3255653381347656, |
| "learning_rate": 1.1021857894111736e-05, |
| "loss": 0.2174, |
| "step": 1805 |
| }, |
| { |
| "epoch": 1.4875642343268243, |
| "grad_norm": 1.398779034614563, |
| "learning_rate": 1.097679065365371e-05, |
| "loss": 0.2079, |
| "step": 1810 |
| }, |
| { |
| "epoch": 1.4916752312435766, |
| "grad_norm": 1.8272420167922974, |
| "learning_rate": 1.0931703373905722e-05, |
| "loss": 0.1772, |
| "step": 1815 |
| }, |
| { |
| "epoch": 1.495786228160329, |
| "grad_norm": 1.989067792892456, |
| "learning_rate": 1.0886596979853152e-05, |
| "loss": 0.2067, |
| "step": 1820 |
| }, |
| { |
| "epoch": 1.4998972250770812, |
| "grad_norm": 1.9263696670532227, |
| "learning_rate": 1.0841472396873516e-05, |
| "loss": 0.251, |
| "step": 1825 |
| }, |
| { |
| "epoch": 1.5040082219938335, |
| "grad_norm": 1.8333402872085571, |
| "learning_rate": 1.0796330550717484e-05, |
| "loss": 0.234, |
| "step": 1830 |
| }, |
| { |
| "epoch": 1.5081192189105859, |
| "grad_norm": 0.866995096206665, |
| "learning_rate": 1.0751172367489886e-05, |
| "loss": 0.2539, |
| "step": 1835 |
| }, |
| { |
| "epoch": 1.512230215827338, |
| "grad_norm": 1.1297638416290283, |
| "learning_rate": 1.0705998773630722e-05, |
| "loss": 0.2017, |
| "step": 1840 |
| }, |
| { |
| "epoch": 1.5163412127440905, |
| "grad_norm": 1.5568816661834717, |
| "learning_rate": 1.066081069589614e-05, |
| "loss": 0.1947, |
| "step": 1845 |
| }, |
| { |
| "epoch": 1.5204522096608426, |
| "grad_norm": 0.9311817288398743, |
| "learning_rate": 1.0615609061339431e-05, |
| "loss": 0.2098, |
| "step": 1850 |
| }, |
| { |
| "epoch": 1.5245632065775951, |
| "grad_norm": 1.7587332725524902, |
| "learning_rate": 1.0570394797292015e-05, |
| "loss": 0.189, |
| "step": 1855 |
| }, |
| { |
| "epoch": 1.5286742034943472, |
| "grad_norm": 1.7202497720718384, |
| "learning_rate": 1.0525168831344408e-05, |
| "loss": 0.1959, |
| "step": 1860 |
| }, |
| { |
| "epoch": 1.5327852004110998, |
| "grad_norm": 1.744030475616455, |
| "learning_rate": 1.0479932091327198e-05, |
| "loss": 0.1934, |
| "step": 1865 |
| }, |
| { |
| "epoch": 1.5368961973278519, |
| "grad_norm": 1.5156835317611694, |
| "learning_rate": 1.0434685505292008e-05, |
| "loss": 0.1969, |
| "step": 1870 |
| }, |
| { |
| "epoch": 1.5410071942446044, |
| "grad_norm": 1.2871688604354858, |
| "learning_rate": 1.0389430001492453e-05, |
| "loss": 0.1926, |
| "step": 1875 |
| }, |
| { |
| "epoch": 1.5451181911613565, |
| "grad_norm": 0.996692419052124, |
| "learning_rate": 1.0344166508365101e-05, |
| "loss": 0.1972, |
| "step": 1880 |
| }, |
| { |
| "epoch": 1.549229188078109, |
| "grad_norm": 1.0069717168807983, |
| "learning_rate": 1.0298895954510426e-05, |
| "loss": 0.2209, |
| "step": 1885 |
| }, |
| { |
| "epoch": 1.5533401849948612, |
| "grad_norm": 1.5595433712005615, |
| "learning_rate": 1.025361926867376e-05, |
| "loss": 0.258, |
| "step": 1890 |
| }, |
| { |
| "epoch": 1.5574511819116137, |
| "grad_norm": 1.4904924631118774, |
| "learning_rate": 1.0208337379726225e-05, |
| "loss": 0.2009, |
| "step": 1895 |
| }, |
| { |
| "epoch": 1.5615621788283658, |
| "grad_norm": 1.2531158924102783, |
| "learning_rate": 1.0163051216645693e-05, |
| "loss": 0.1595, |
| "step": 1900 |
| }, |
| { |
| "epoch": 1.5656731757451183, |
| "grad_norm": 0.9231387972831726, |
| "learning_rate": 1.0117761708497727e-05, |
| "loss": 0.1923, |
| "step": 1905 |
| }, |
| { |
| "epoch": 1.5697841726618704, |
| "grad_norm": 1.3887051343917847, |
| "learning_rate": 1.0072469784416505e-05, |
| "loss": 0.2554, |
| "step": 1910 |
| }, |
| { |
| "epoch": 1.573895169578623, |
| "grad_norm": 1.1096982955932617, |
| "learning_rate": 1.0027176373585774e-05, |
| "loss": 0.227, |
| "step": 1915 |
| }, |
| { |
| "epoch": 1.578006166495375, |
| "grad_norm": 1.3331483602523804, |
| "learning_rate": 9.981882405219784e-06, |
| "loss": 0.2092, |
| "step": 1920 |
| }, |
| { |
| "epoch": 1.5821171634121276, |
| "grad_norm": 1.5959802865982056, |
| "learning_rate": 9.93658880854422e-06, |
| "loss": 0.215, |
| "step": 1925 |
| }, |
| { |
| "epoch": 1.5862281603288797, |
| "grad_norm": 1.377021312713623, |
| "learning_rate": 9.891296512777145e-06, |
| "loss": 0.1934, |
| "step": 1930 |
| }, |
| { |
| "epoch": 1.590339157245632, |
| "grad_norm": 1.6592588424682617, |
| "learning_rate": 9.846006447109934e-06, |
| "loss": 0.1759, |
| "step": 1935 |
| }, |
| { |
| "epoch": 1.5944501541623843, |
| "grad_norm": 1.525452971458435, |
| "learning_rate": 9.800719540688201e-06, |
| "loss": 0.1736, |
| "step": 1940 |
| }, |
| { |
| "epoch": 1.5985611510791367, |
| "grad_norm": 1.2749650478363037, |
| "learning_rate": 9.755436722592757e-06, |
| "loss": 0.2174, |
| "step": 1945 |
| }, |
| { |
| "epoch": 1.602672147995889, |
| "grad_norm": 1.2123613357543945, |
| "learning_rate": 9.710158921820535e-06, |
| "loss": 0.2202, |
| "step": 1950 |
| }, |
| { |
| "epoch": 1.6067831449126413, |
| "grad_norm": 1.1123170852661133, |
| "learning_rate": 9.664887067265533e-06, |
| "loss": 0.2006, |
| "step": 1955 |
| }, |
| { |
| "epoch": 1.6108941418293936, |
| "grad_norm": 1.1806175708770752, |
| "learning_rate": 9.619622087699774e-06, |
| "loss": 0.2497, |
| "step": 1960 |
| }, |
| { |
| "epoch": 1.615005138746146, |
| "grad_norm": 1.343797206878662, |
| "learning_rate": 9.574364911754212e-06, |
| "loss": 0.1791, |
| "step": 1965 |
| }, |
| { |
| "epoch": 1.6191161356628982, |
| "grad_norm": 1.6461807489395142, |
| "learning_rate": 9.52911646789973e-06, |
| "loss": 0.1587, |
| "step": 1970 |
| }, |
| { |
| "epoch": 1.6232271325796506, |
| "grad_norm": 1.6421260833740234, |
| "learning_rate": 9.483877684428059e-06, |
| "loss": 0.1854, |
| "step": 1975 |
| }, |
| { |
| "epoch": 1.6273381294964029, |
| "grad_norm": 1.1155070066452026, |
| "learning_rate": 9.438649489432737e-06, |
| "loss": 0.1602, |
| "step": 1980 |
| }, |
| { |
| "epoch": 1.6314491264131552, |
| "grad_norm": 1.255240559577942, |
| "learning_rate": 9.393432810790083e-06, |
| "loss": 0.1772, |
| "step": 1985 |
| }, |
| { |
| "epoch": 1.6355601233299075, |
| "grad_norm": 1.605877161026001, |
| "learning_rate": 9.348228576140159e-06, |
| "loss": 0.1797, |
| "step": 1990 |
| }, |
| { |
| "epoch": 1.6396711202466598, |
| "grad_norm": 1.8181346654891968, |
| "learning_rate": 9.303037712867709e-06, |
| "loss": 0.2196, |
| "step": 1995 |
| }, |
| { |
| "epoch": 1.6437821171634122, |
| "grad_norm": 2.2337422370910645, |
| "learning_rate": 9.25786114808319e-06, |
| "loss": 0.1711, |
| "step": 2000 |
| }, |
| { |
| "epoch": 1.6478931140801645, |
| "grad_norm": 1.1943351030349731, |
| "learning_rate": 9.212699808603687e-06, |
| "loss": 0.2107, |
| "step": 2005 |
| }, |
| { |
| "epoch": 1.6520041109969168, |
| "grad_norm": 1.4533287286758423, |
| "learning_rate": 9.167554620933956e-06, |
| "loss": 0.2321, |
| "step": 2010 |
| }, |
| { |
| "epoch": 1.6561151079136691, |
| "grad_norm": 1.109503149986267, |
| "learning_rate": 9.122426511247381e-06, |
| "loss": 0.2227, |
| "step": 2015 |
| }, |
| { |
| "epoch": 1.6602261048304214, |
| "grad_norm": 1.6231653690338135, |
| "learning_rate": 9.07731640536698e-06, |
| "loss": 0.2459, |
| "step": 2020 |
| }, |
| { |
| "epoch": 1.6643371017471735, |
| "grad_norm": 1.4417306184768677, |
| "learning_rate": 9.032225228746424e-06, |
| "loss": 0.1839, |
| "step": 2025 |
| }, |
| { |
| "epoch": 1.668448098663926, |
| "grad_norm": 1.4839571714401245, |
| "learning_rate": 8.98715390645104e-06, |
| "loss": 0.1573, |
| "step": 2030 |
| }, |
| { |
| "epoch": 1.6725590955806782, |
| "grad_norm": 1.40507972240448, |
| "learning_rate": 8.942103363138824e-06, |
| "loss": 0.186, |
| "step": 2035 |
| }, |
| { |
| "epoch": 1.6766700924974307, |
| "grad_norm": 1.316387414932251, |
| "learning_rate": 8.897074523041499e-06, |
| "loss": 0.1919, |
| "step": 2040 |
| }, |
| { |
| "epoch": 1.6807810894141828, |
| "grad_norm": 1.687321662902832, |
| "learning_rate": 8.852068309945519e-06, |
| "loss": 0.1488, |
| "step": 2045 |
| }, |
| { |
| "epoch": 1.6848920863309353, |
| "grad_norm": 1.2021576166152954, |
| "learning_rate": 8.807085647173151e-06, |
| "loss": 0.1845, |
| "step": 2050 |
| }, |
| { |
| "epoch": 1.6890030832476874, |
| "grad_norm": 1.2431976795196533, |
| "learning_rate": 8.762127457563511e-06, |
| "loss": 0.1868, |
| "step": 2055 |
| }, |
| { |
| "epoch": 1.69311408016444, |
| "grad_norm": 1.729856252670288, |
| "learning_rate": 8.717194663453634e-06, |
| "loss": 0.1878, |
| "step": 2060 |
| }, |
| { |
| "epoch": 1.697225077081192, |
| "grad_norm": 1.2510340213775635, |
| "learning_rate": 8.672288186659555e-06, |
| "loss": 0.1861, |
| "step": 2065 |
| }, |
| { |
| "epoch": 1.7013360739979446, |
| "grad_norm": 1.1471279859542847, |
| "learning_rate": 8.627408948457408e-06, |
| "loss": 0.2274, |
| "step": 2070 |
| }, |
| { |
| "epoch": 1.7054470709146967, |
| "grad_norm": 1.554674744606018, |
| "learning_rate": 8.582557869564498e-06, |
| "loss": 0.1864, |
| "step": 2075 |
| }, |
| { |
| "epoch": 1.7095580678314493, |
| "grad_norm": 1.0342234373092651, |
| "learning_rate": 8.537735870120447e-06, |
| "loss": 0.2297, |
| "step": 2080 |
| }, |
| { |
| "epoch": 1.7136690647482014, |
| "grad_norm": 1.476158857345581, |
| "learning_rate": 8.492943869668289e-06, |
| "loss": 0.2036, |
| "step": 2085 |
| }, |
| { |
| "epoch": 1.717780061664954, |
| "grad_norm": 1.3690853118896484, |
| "learning_rate": 8.448182787135614e-06, |
| "loss": 0.2029, |
| "step": 2090 |
| }, |
| { |
| "epoch": 1.721891058581706, |
| "grad_norm": 1.627550721168518, |
| "learning_rate": 8.403453540815729e-06, |
| "loss": 0.2098, |
| "step": 2095 |
| }, |
| { |
| "epoch": 1.7260020554984585, |
| "grad_norm": 1.4499547481536865, |
| "learning_rate": 8.35875704834879e-06, |
| "loss": 0.1992, |
| "step": 2100 |
| }, |
| { |
| "epoch": 1.7301130524152106, |
| "grad_norm": 1.2868211269378662, |
| "learning_rate": 8.314094226703007e-06, |
| "loss": 0.2087, |
| "step": 2105 |
| }, |
| { |
| "epoch": 1.734224049331963, |
| "grad_norm": 1.9403841495513916, |
| "learning_rate": 8.26946599215582e-06, |
| "loss": 0.2119, |
| "step": 2110 |
| }, |
| { |
| "epoch": 1.7383350462487153, |
| "grad_norm": 1.3112574815750122, |
| "learning_rate": 8.22487326027508e-06, |
| "loss": 0.1865, |
| "step": 2115 |
| }, |
| { |
| "epoch": 1.7424460431654676, |
| "grad_norm": 1.3602478504180908, |
| "learning_rate": 8.180316945900309e-06, |
| "loss": 0.1668, |
| "step": 2120 |
| }, |
| { |
| "epoch": 1.74655704008222, |
| "grad_norm": 1.7516307830810547, |
| "learning_rate": 8.135797963123894e-06, |
| "loss": 0.1785, |
| "step": 2125 |
| }, |
| { |
| "epoch": 1.7506680369989722, |
| "grad_norm": 1.3060181140899658, |
| "learning_rate": 8.091317225272347e-06, |
| "loss": 0.2075, |
| "step": 2130 |
| }, |
| { |
| "epoch": 1.7547790339157245, |
| "grad_norm": 0.9532691836357117, |
| "learning_rate": 8.04687564488758e-06, |
| "loss": 0.2036, |
| "step": 2135 |
| }, |
| { |
| "epoch": 1.7588900308324769, |
| "grad_norm": 1.5024046897888184, |
| "learning_rate": 8.002474133708163e-06, |
| "loss": 0.1785, |
| "step": 2140 |
| }, |
| { |
| "epoch": 1.7630010277492292, |
| "grad_norm": 1.2384905815124512, |
| "learning_rate": 7.958113602650623e-06, |
| "loss": 0.1807, |
| "step": 2145 |
| }, |
| { |
| "epoch": 1.7671120246659815, |
| "grad_norm": 1.592033863067627, |
| "learning_rate": 7.913794961790783e-06, |
| "loss": 0.2129, |
| "step": 2150 |
| }, |
| { |
| "epoch": 1.7712230215827338, |
| "grad_norm": 1.531943678855896, |
| "learning_rate": 7.869519120345042e-06, |
| "loss": 0.241, |
| "step": 2155 |
| }, |
| { |
| "epoch": 1.7753340184994861, |
| "grad_norm": 1.1052135229110718, |
| "learning_rate": 7.825286986651773e-06, |
| "loss": 0.1997, |
| "step": 2160 |
| }, |
| { |
| "epoch": 1.7794450154162385, |
| "grad_norm": 1.6296882629394531, |
| "learning_rate": 7.78109946815266e-06, |
| "loss": 0.1648, |
| "step": 2165 |
| }, |
| { |
| "epoch": 1.7835560123329908, |
| "grad_norm": 1.1843360662460327, |
| "learning_rate": 7.736957471374075e-06, |
| "loss": 0.2129, |
| "step": 2170 |
| }, |
| { |
| "epoch": 1.787667009249743, |
| "grad_norm": 1.4156749248504639, |
| "learning_rate": 7.692861901908506e-06, |
| "loss": 0.2118, |
| "step": 2175 |
| }, |
| { |
| "epoch": 1.7917780061664954, |
| "grad_norm": 1.6439249515533447, |
| "learning_rate": 7.64881366439596e-06, |
| "loss": 0.1809, |
| "step": 2180 |
| }, |
| { |
| "epoch": 1.7958890030832477, |
| "grad_norm": 1.2328163385391235, |
| "learning_rate": 7.6048136625054e-06, |
| "loss": 0.2138, |
| "step": 2185 |
| }, |
| { |
| "epoch": 1.8, |
| "grad_norm": 1.3099247217178345, |
| "learning_rate": 7.560862798916229e-06, |
| "loss": 0.2181, |
| "step": 2190 |
| }, |
| { |
| "epoch": 1.8041109969167524, |
| "grad_norm": 1.3534038066864014, |
| "learning_rate": 7.516961975299744e-06, |
| "loss": 0.1762, |
| "step": 2195 |
| }, |
| { |
| "epoch": 1.8082219938335045, |
| "grad_norm": 1.7935746908187866, |
| "learning_rate": 7.473112092300654e-06, |
| "loss": 0.1943, |
| "step": 2200 |
| }, |
| { |
| "epoch": 1.812332990750257, |
| "grad_norm": 1.5042284727096558, |
| "learning_rate": 7.429314049518601e-06, |
| "loss": 0.2584, |
| "step": 2205 |
| }, |
| { |
| "epoch": 1.816443987667009, |
| "grad_norm": 1.3112459182739258, |
| "learning_rate": 7.3855687454896965e-06, |
| "loss": 0.208, |
| "step": 2210 |
| }, |
| { |
| "epoch": 1.8205549845837616, |
| "grad_norm": 0.9232465624809265, |
| "learning_rate": 7.341877077668098e-06, |
| "loss": 0.2193, |
| "step": 2215 |
| }, |
| { |
| "epoch": 1.8246659815005137, |
| "grad_norm": 1.4150248765945435, |
| "learning_rate": 7.298239942407594e-06, |
| "loss": 0.1792, |
| "step": 2220 |
| }, |
| { |
| "epoch": 1.8287769784172663, |
| "grad_norm": 1.4757970571517944, |
| "learning_rate": 7.254658234943206e-06, |
| "loss": 0.1641, |
| "step": 2225 |
| }, |
| { |
| "epoch": 1.8328879753340184, |
| "grad_norm": 1.5827895402908325, |
| "learning_rate": 7.211132849372838e-06, |
| "loss": 0.1959, |
| "step": 2230 |
| }, |
| { |
| "epoch": 1.836998972250771, |
| "grad_norm": 1.4939696788787842, |
| "learning_rate": 7.1676646786389246e-06, |
| "loss": 0.1984, |
| "step": 2235 |
| }, |
| { |
| "epoch": 1.841109969167523, |
| "grad_norm": 1.4789783954620361, |
| "learning_rate": 7.1242546145101066e-06, |
| "loss": 0.2264, |
| "step": 2240 |
| }, |
| { |
| "epoch": 1.8452209660842755, |
| "grad_norm": 1.1284780502319336, |
| "learning_rate": 7.080903547562949e-06, |
| "loss": 0.1928, |
| "step": 2245 |
| }, |
| { |
| "epoch": 1.8493319630010276, |
| "grad_norm": 1.3130507469177246, |
| "learning_rate": 7.037612367163657e-06, |
| "loss": 0.1793, |
| "step": 2250 |
| }, |
| { |
| "epoch": 1.8534429599177802, |
| "grad_norm": 1.4979199171066284, |
| "learning_rate": 6.9943819614498435e-06, |
| "loss": 0.1967, |
| "step": 2255 |
| }, |
| { |
| "epoch": 1.8575539568345323, |
| "grad_norm": 1.5476986169815063, |
| "learning_rate": 6.951213217312301e-06, |
| "loss": 0.2151, |
| "step": 2260 |
| }, |
| { |
| "epoch": 1.8616649537512848, |
| "grad_norm": 1.0267906188964844, |
| "learning_rate": 6.9081070203768e-06, |
| "loss": 0.2496, |
| "step": 2265 |
| }, |
| { |
| "epoch": 1.865775950668037, |
| "grad_norm": 1.5171856880187988, |
| "learning_rate": 6.865064254985938e-06, |
| "loss": 0.2162, |
| "step": 2270 |
| }, |
| { |
| "epoch": 1.8698869475847895, |
| "grad_norm": 1.5356831550598145, |
| "learning_rate": 6.822085804180985e-06, |
| "loss": 0.2015, |
| "step": 2275 |
| }, |
| { |
| "epoch": 1.8739979445015416, |
| "grad_norm": 1.261610984802246, |
| "learning_rate": 6.779172549683761e-06, |
| "loss": 0.196, |
| "step": 2280 |
| }, |
| { |
| "epoch": 1.878108941418294, |
| "grad_norm": 1.3029332160949707, |
| "learning_rate": 6.73632537187856e-06, |
| "loss": 0.1697, |
| "step": 2285 |
| }, |
| { |
| "epoch": 1.8822199383350462, |
| "grad_norm": 1.676698923110962, |
| "learning_rate": 6.69354514979409e-06, |
| "loss": 0.1755, |
| "step": 2290 |
| }, |
| { |
| "epoch": 1.8863309352517985, |
| "grad_norm": 2.004960298538208, |
| "learning_rate": 6.650832761085417e-06, |
| "loss": 0.1992, |
| "step": 2295 |
| }, |
| { |
| "epoch": 1.8904419321685508, |
| "grad_norm": 0.8850013017654419, |
| "learning_rate": 6.608189082015993e-06, |
| "loss": 0.221, |
| "step": 2300 |
| }, |
| { |
| "epoch": 1.8945529290853032, |
| "grad_norm": 1.1899011135101318, |
| "learning_rate": 6.565614987439648e-06, |
| "loss": 0.164, |
| "step": 2305 |
| }, |
| { |
| "epoch": 1.8986639260020555, |
| "grad_norm": 1.388355016708374, |
| "learning_rate": 6.523111350782664e-06, |
| "loss": 0.2236, |
| "step": 2310 |
| }, |
| { |
| "epoch": 1.9027749229188078, |
| "grad_norm": 1.3841829299926758, |
| "learning_rate": 6.480679044025846e-06, |
| "loss": 0.227, |
| "step": 2315 |
| }, |
| { |
| "epoch": 1.90688591983556, |
| "grad_norm": 1.0201807022094727, |
| "learning_rate": 6.438318937686631e-06, |
| "loss": 0.2162, |
| "step": 2320 |
| }, |
| { |
| "epoch": 1.9109969167523124, |
| "grad_norm": 1.0088194608688354, |
| "learning_rate": 6.396031900801238e-06, |
| "loss": 0.2136, |
| "step": 2325 |
| }, |
| { |
| "epoch": 1.9151079136690647, |
| "grad_norm": 1.3621116876602173, |
| "learning_rate": 6.3538188009068306e-06, |
| "loss": 0.1761, |
| "step": 2330 |
| }, |
| { |
| "epoch": 1.919218910585817, |
| "grad_norm": 1.4654209613800049, |
| "learning_rate": 6.311680504023718e-06, |
| "loss": 0.2004, |
| "step": 2335 |
| }, |
| { |
| "epoch": 1.9233299075025694, |
| "grad_norm": 1.19648015499115, |
| "learning_rate": 6.2696178746376035e-06, |
| "loss": 0.2066, |
| "step": 2340 |
| }, |
| { |
| "epoch": 1.9274409044193217, |
| "grad_norm": 1.38064706325531, |
| "learning_rate": 6.227631775681834e-06, |
| "loss": 0.1615, |
| "step": 2345 |
| }, |
| { |
| "epoch": 1.931551901336074, |
| "grad_norm": 1.5320490598678589, |
| "learning_rate": 6.1857230685196955e-06, |
| "loss": 0.1885, |
| "step": 2350 |
| }, |
| { |
| "epoch": 1.9356628982528263, |
| "grad_norm": 1.5025681257247925, |
| "learning_rate": 6.143892612926755e-06, |
| "loss": 0.182, |
| "step": 2355 |
| }, |
| { |
| "epoch": 1.9397738951695787, |
| "grad_norm": 1.413241982460022, |
| "learning_rate": 6.102141267073207e-06, |
| "loss": 0.2431, |
| "step": 2360 |
| }, |
| { |
| "epoch": 1.943884892086331, |
| "grad_norm": 1.4534951448440552, |
| "learning_rate": 6.060469887506282e-06, |
| "loss": 0.2318, |
| "step": 2365 |
| }, |
| { |
| "epoch": 1.9479958890030833, |
| "grad_norm": 1.328620433807373, |
| "learning_rate": 6.018879329132663e-06, |
| "loss": 0.2112, |
| "step": 2370 |
| }, |
| { |
| "epoch": 1.9521068859198356, |
| "grad_norm": 1.1238036155700684, |
| "learning_rate": 5.977370445200949e-06, |
| "loss": 0.2113, |
| "step": 2375 |
| }, |
| { |
| "epoch": 1.956217882836588, |
| "grad_norm": 1.2546100616455078, |
| "learning_rate": 5.935944087284155e-06, |
| "loss": 0.182, |
| "step": 2380 |
| }, |
| { |
| "epoch": 1.96032887975334, |
| "grad_norm": 1.6414473056793213, |
| "learning_rate": 5.894601105262241e-06, |
| "loss": 0.2252, |
| "step": 2385 |
| }, |
| { |
| "epoch": 1.9644398766700926, |
| "grad_norm": 1.3691917657852173, |
| "learning_rate": 5.853342347304665e-06, |
| "loss": 0.1921, |
| "step": 2390 |
| }, |
| { |
| "epoch": 1.9685508735868447, |
| "grad_norm": 1.4818812608718872, |
| "learning_rate": 5.812168659852998e-06, |
| "loss": 0.2008, |
| "step": 2395 |
| }, |
| { |
| "epoch": 1.9726618705035972, |
| "grad_norm": 1.7602829933166504, |
| "learning_rate": 5.7710808876035604e-06, |
| "loss": 0.1545, |
| "step": 2400 |
| }, |
| { |
| "epoch": 1.9767728674203493, |
| "grad_norm": 1.7359880208969116, |
| "learning_rate": 5.73007987349006e-06, |
| "loss": 0.1716, |
| "step": 2405 |
| }, |
| { |
| "epoch": 1.9808838643371018, |
| "grad_norm": 1.7533327341079712, |
| "learning_rate": 5.689166458666348e-06, |
| "loss": 0.1731, |
| "step": 2410 |
| }, |
| { |
| "epoch": 1.984994861253854, |
| "grad_norm": 1.7562685012817383, |
| "learning_rate": 5.64834148248912e-06, |
| "loss": 0.1772, |
| "step": 2415 |
| }, |
| { |
| "epoch": 1.9891058581706065, |
| "grad_norm": 1.2940082550048828, |
| "learning_rate": 5.6076057825007315e-06, |
| "loss": 0.1907, |
| "step": 2420 |
| }, |
| { |
| "epoch": 1.9932168550873586, |
| "grad_norm": 1.5690494775772095, |
| "learning_rate": 5.566960194411984e-06, |
| "loss": 0.2064, |
| "step": 2425 |
| }, |
| { |
| "epoch": 1.9973278520041111, |
| "grad_norm": 0.9809213876724243, |
| "learning_rate": 5.52640555208499e-06, |
| "loss": 0.1759, |
| "step": 2430 |
| }, |
| { |
| "epoch": 2.0008221993833506, |
| "grad_norm": 1.2464898824691772, |
| "learning_rate": 5.485942687516086e-06, |
| "loss": 0.1983, |
| "step": 2435 |
| }, |
| { |
| "epoch": 2.0049331963001027, |
| "grad_norm": 1.286004900932312, |
| "learning_rate": 5.445572430818744e-06, |
| "loss": 0.1692, |
| "step": 2440 |
| }, |
| { |
| "epoch": 2.0090441932168552, |
| "grad_norm": 1.5520018339157104, |
| "learning_rate": 5.405295610206525e-06, |
| "loss": 0.1707, |
| "step": 2445 |
| }, |
| { |
| "epoch": 2.0131551901336073, |
| "grad_norm": 1.2041336297988892, |
| "learning_rate": 5.3651130519761315e-06, |
| "loss": 0.1989, |
| "step": 2450 |
| }, |
| { |
| "epoch": 2.01726618705036, |
| "grad_norm": 1.3793076276779175, |
| "learning_rate": 5.3250255804904176e-06, |
| "loss": 0.2276, |
| "step": 2455 |
| }, |
| { |
| "epoch": 2.021377183967112, |
| "grad_norm": 1.8650401830673218, |
| "learning_rate": 5.285034018161503e-06, |
| "loss": 0.2138, |
| "step": 2460 |
| }, |
| { |
| "epoch": 2.0254881808838645, |
| "grad_norm": 1.3267054557800293, |
| "learning_rate": 5.245139185433875e-06, |
| "loss": 0.1427, |
| "step": 2465 |
| }, |
| { |
| "epoch": 2.0295991778006166, |
| "grad_norm": 1.0150337219238281, |
| "learning_rate": 5.205341900767575e-06, |
| "loss": 0.184, |
| "step": 2470 |
| }, |
| { |
| "epoch": 2.033710174717369, |
| "grad_norm": 1.5393043756484985, |
| "learning_rate": 5.165642980621413e-06, |
| "loss": 0.1722, |
| "step": 2475 |
| }, |
| { |
| "epoch": 2.0378211716341212, |
| "grad_norm": 1.7602789402008057, |
| "learning_rate": 5.1260432394362e-06, |
| "loss": 0.1736, |
| "step": 2480 |
| }, |
| { |
| "epoch": 2.041932168550874, |
| "grad_norm": 1.5538018941879272, |
| "learning_rate": 5.0865434896180385e-06, |
| "loss": 0.1915, |
| "step": 2485 |
| }, |
| { |
| "epoch": 2.046043165467626, |
| "grad_norm": 1.6677844524383545, |
| "learning_rate": 5.047144541521676e-06, |
| "loss": 0.2089, |
| "step": 2490 |
| }, |
| { |
| "epoch": 2.0501541623843784, |
| "grad_norm": 1.1350642442703247, |
| "learning_rate": 5.007847203433869e-06, |
| "loss": 0.226, |
| "step": 2495 |
| }, |
| { |
| "epoch": 2.0542651593011305, |
| "grad_norm": 1.5596694946289062, |
| "learning_rate": 4.968652281556794e-06, |
| "loss": 0.1517, |
| "step": 2500 |
| }, |
| { |
| "epoch": 2.058376156217883, |
| "grad_norm": 1.2307167053222656, |
| "learning_rate": 4.929560579991513e-06, |
| "loss": 0.1938, |
| "step": 2505 |
| }, |
| { |
| "epoch": 2.062487153134635, |
| "grad_norm": 1.1274449825286865, |
| "learning_rate": 4.890572900721479e-06, |
| "loss": 0.1995, |
| "step": 2510 |
| }, |
| { |
| "epoch": 2.0665981500513873, |
| "grad_norm": 1.5779458284378052, |
| "learning_rate": 4.851690043596086e-06, |
| "loss": 0.2166, |
| "step": 2515 |
| }, |
| { |
| "epoch": 2.07070914696814, |
| "grad_norm": 1.2458999156951904, |
| "learning_rate": 4.81291280631426e-06, |
| "loss": 0.1655, |
| "step": 2520 |
| }, |
| { |
| "epoch": 2.074820143884892, |
| "grad_norm": 1.380728006362915, |
| "learning_rate": 4.774241984408068e-06, |
| "loss": 0.1682, |
| "step": 2525 |
| }, |
| { |
| "epoch": 2.0789311408016444, |
| "grad_norm": 1.5582739114761353, |
| "learning_rate": 4.7356783712264405e-06, |
| "loss": 0.1587, |
| "step": 2530 |
| }, |
| { |
| "epoch": 2.0830421377183965, |
| "grad_norm": 1.5341260433197021, |
| "learning_rate": 4.697222757918872e-06, |
| "loss": 0.2258, |
| "step": 2535 |
| }, |
| { |
| "epoch": 2.087153134635149, |
| "grad_norm": 1.613232970237732, |
| "learning_rate": 4.65887593341918e-06, |
| "loss": 0.2103, |
| "step": 2540 |
| }, |
| { |
| "epoch": 2.091264131551901, |
| "grad_norm": 1.5128930807113647, |
| "learning_rate": 4.620638684429337e-06, |
| "loss": 0.2013, |
| "step": 2545 |
| }, |
| { |
| "epoch": 2.0953751284686537, |
| "grad_norm": 1.7848138809204102, |
| "learning_rate": 4.582511795403334e-06, |
| "loss": 0.2425, |
| "step": 2550 |
| }, |
| { |
| "epoch": 2.099486125385406, |
| "grad_norm": 1.4135546684265137, |
| "learning_rate": 4.544496048531062e-06, |
| "loss": 0.2001, |
| "step": 2555 |
| }, |
| { |
| "epoch": 2.1035971223021583, |
| "grad_norm": 1.6589088439941406, |
| "learning_rate": 4.506592223722306e-06, |
| "loss": 0.1777, |
| "step": 2560 |
| }, |
| { |
| "epoch": 2.1077081192189104, |
| "grad_norm": 2.0604190826416016, |
| "learning_rate": 4.46880109859069e-06, |
| "loss": 0.2061, |
| "step": 2565 |
| }, |
| { |
| "epoch": 2.111819116135663, |
| "grad_norm": 1.185441493988037, |
| "learning_rate": 4.431123448437778e-06, |
| "loss": 0.1852, |
| "step": 2570 |
| }, |
| { |
| "epoch": 2.115930113052415, |
| "grad_norm": 1.49964439868927, |
| "learning_rate": 4.393560046237143e-06, |
| "loss": 0.2298, |
| "step": 2575 |
| }, |
| { |
| "epoch": 2.1200411099691676, |
| "grad_norm": 1.7512156963348389, |
| "learning_rate": 4.3561116626185e-06, |
| "loss": 0.1768, |
| "step": 2580 |
| }, |
| { |
| "epoch": 2.1241521068859197, |
| "grad_norm": 1.6165354251861572, |
| "learning_rate": 4.31877906585191e-06, |
| "loss": 0.1517, |
| "step": 2585 |
| }, |
| { |
| "epoch": 2.1282631038026723, |
| "grad_norm": 1.2202645540237427, |
| "learning_rate": 4.281563021832027e-06, |
| "loss": 0.1552, |
| "step": 2590 |
| }, |
| { |
| "epoch": 2.1323741007194243, |
| "grad_norm": 1.716781497001648, |
| "learning_rate": 4.244464294062358e-06, |
| "loss": 0.1418, |
| "step": 2595 |
| }, |
| { |
| "epoch": 2.136485097636177, |
| "grad_norm": 1.5364643335342407, |
| "learning_rate": 4.207483643639629e-06, |
| "loss": 0.176, |
| "step": 2600 |
| }, |
| { |
| "epoch": 2.140596094552929, |
| "grad_norm": 1.496578574180603, |
| "learning_rate": 4.170621829238152e-06, |
| "loss": 0.2021, |
| "step": 2605 |
| }, |
| { |
| "epoch": 2.1447070914696815, |
| "grad_norm": 1.2782753705978394, |
| "learning_rate": 4.1338796070942576e-06, |
| "loss": 0.1705, |
| "step": 2610 |
| }, |
| { |
| "epoch": 2.1488180883864336, |
| "grad_norm": 1.102035641670227, |
| "learning_rate": 4.097257730990806e-06, |
| "loss": 0.1699, |
| "step": 2615 |
| }, |
| { |
| "epoch": 2.152929085303186, |
| "grad_norm": 1.4746476411819458, |
| "learning_rate": 4.060756952241691e-06, |
| "loss": 0.1852, |
| "step": 2620 |
| }, |
| { |
| "epoch": 2.1570400822199383, |
| "grad_norm": 1.3999295234680176, |
| "learning_rate": 4.024378019676444e-06, |
| "loss": 0.181, |
| "step": 2625 |
| }, |
| { |
| "epoch": 2.161151079136691, |
| "grad_norm": 1.6118948459625244, |
| "learning_rate": 3.988121679624874e-06, |
| "loss": 0.2021, |
| "step": 2630 |
| }, |
| { |
| "epoch": 2.165262076053443, |
| "grad_norm": 1.9151121377944946, |
| "learning_rate": 3.951988675901744e-06, |
| "loss": 0.2105, |
| "step": 2635 |
| }, |
| { |
| "epoch": 2.1693730729701954, |
| "grad_norm": 0.8960415124893188, |
| "learning_rate": 3.915979749791524e-06, |
| "loss": 0.1696, |
| "step": 2640 |
| }, |
| { |
| "epoch": 2.1734840698869475, |
| "grad_norm": 1.4472572803497314, |
| "learning_rate": 3.880095640033174e-06, |
| "loss": 0.1608, |
| "step": 2645 |
| }, |
| { |
| "epoch": 2.1775950668037, |
| "grad_norm": 1.544793725013733, |
| "learning_rate": 3.844337082804984e-06, |
| "loss": 0.1833, |
| "step": 2650 |
| }, |
| { |
| "epoch": 2.181706063720452, |
| "grad_norm": 1.5551151037216187, |
| "learning_rate": 3.8087048117094962e-06, |
| "loss": 0.1787, |
| "step": 2655 |
| }, |
| { |
| "epoch": 2.1858170606372047, |
| "grad_norm": 1.4573007822036743, |
| "learning_rate": 3.7731995577584224e-06, |
| "loss": 0.1301, |
| "step": 2660 |
| }, |
| { |
| "epoch": 2.189928057553957, |
| "grad_norm": 1.362298607826233, |
| "learning_rate": 3.737822049357662e-06, |
| "loss": 0.1516, |
| "step": 2665 |
| }, |
| { |
| "epoch": 2.1940390544707093, |
| "grad_norm": 1.576825499534607, |
| "learning_rate": 3.702573012292373e-06, |
| "loss": 0.146, |
| "step": 2670 |
| }, |
| { |
| "epoch": 2.1981500513874614, |
| "grad_norm": 1.3643438816070557, |
| "learning_rate": 3.6674531697120484e-06, |
| "loss": 0.1407, |
| "step": 2675 |
| }, |
| { |
| "epoch": 2.202261048304214, |
| "grad_norm": 1.7982343435287476, |
| "learning_rate": 3.6324632421157147e-06, |
| "loss": 0.1542, |
| "step": 2680 |
| }, |
| { |
| "epoch": 2.206372045220966, |
| "grad_norm": 1.5771673917770386, |
| "learning_rate": 3.5976039473371273e-06, |
| "loss": 0.1519, |
| "step": 2685 |
| }, |
| { |
| "epoch": 2.210483042137718, |
| "grad_norm": 1.6472742557525635, |
| "learning_rate": 3.562876000530048e-06, |
| "loss": 0.1784, |
| "step": 2690 |
| }, |
| { |
| "epoch": 2.2145940390544707, |
| "grad_norm": 1.4306912422180176, |
| "learning_rate": 3.5282801141535915e-06, |
| "loss": 0.1517, |
| "step": 2695 |
| }, |
| { |
| "epoch": 2.218705035971223, |
| "grad_norm": 1.7105224132537842, |
| "learning_rate": 3.493816997957582e-06, |
| "loss": 0.1869, |
| "step": 2700 |
| }, |
| { |
| "epoch": 2.2228160328879754, |
| "grad_norm": 1.3035056591033936, |
| "learning_rate": 3.4594873589680047e-06, |
| "loss": 0.1611, |
| "step": 2705 |
| }, |
| { |
| "epoch": 2.2269270298047275, |
| "grad_norm": 1.4090354442596436, |
| "learning_rate": 3.4252919014725137e-06, |
| "loss": 0.2124, |
| "step": 2710 |
| }, |
| { |
| "epoch": 2.23103802672148, |
| "grad_norm": 1.7435358762741089, |
| "learning_rate": 3.391231327005955e-06, |
| "loss": 0.1755, |
| "step": 2715 |
| }, |
| { |
| "epoch": 2.235149023638232, |
| "grad_norm": 1.137243390083313, |
| "learning_rate": 3.3573063343360048e-06, |
| "loss": 0.1713, |
| "step": 2720 |
| }, |
| { |
| "epoch": 2.2392600205549846, |
| "grad_norm": 1.4972217082977295, |
| "learning_rate": 3.3235176194488073e-06, |
| "loss": 0.2209, |
| "step": 2725 |
| }, |
| { |
| "epoch": 2.2433710174717367, |
| "grad_norm": 1.6285967826843262, |
| "learning_rate": 3.289865875534709e-06, |
| "loss": 0.1592, |
| "step": 2730 |
| }, |
| { |
| "epoch": 2.2474820143884893, |
| "grad_norm": 1.5644609928131104, |
| "learning_rate": 3.2563517929740484e-06, |
| "loss": 0.1639, |
| "step": 2735 |
| }, |
| { |
| "epoch": 2.2515930113052414, |
| "grad_norm": 1.5566542148590088, |
| "learning_rate": 3.2229760593229686e-06, |
| "loss": 0.1865, |
| "step": 2740 |
| }, |
| { |
| "epoch": 2.255704008221994, |
| "grad_norm": 1.6575636863708496, |
| "learning_rate": 3.1897393592993244e-06, |
| "loss": 0.1659, |
| "step": 2745 |
| }, |
| { |
| "epoch": 2.259815005138746, |
| "grad_norm": 1.5925614833831787, |
| "learning_rate": 3.1566423747686402e-06, |
| "loss": 0.1692, |
| "step": 2750 |
| }, |
| { |
| "epoch": 2.2639260020554985, |
| "grad_norm": 1.0502550601959229, |
| "learning_rate": 3.123685784730118e-06, |
| "loss": 0.1978, |
| "step": 2755 |
| }, |
| { |
| "epoch": 2.2680369989722506, |
| "grad_norm": 2.042742967605591, |
| "learning_rate": 3.090870265302697e-06, |
| "loss": 0.1601, |
| "step": 2760 |
| }, |
| { |
| "epoch": 2.272147995889003, |
| "grad_norm": 1.3015090227127075, |
| "learning_rate": 3.058196489711194e-06, |
| "loss": 0.1819, |
| "step": 2765 |
| }, |
| { |
| "epoch": 2.2762589928057553, |
| "grad_norm": 1.362306833267212, |
| "learning_rate": 3.0256651282724857e-06, |
| "loss": 0.2053, |
| "step": 2770 |
| }, |
| { |
| "epoch": 2.280369989722508, |
| "grad_norm": 1.5630079507827759, |
| "learning_rate": 2.993276848381769e-06, |
| "loss": 0.1844, |
| "step": 2775 |
| }, |
| { |
| "epoch": 2.28448098663926, |
| "grad_norm": 1.4856841564178467, |
| "learning_rate": 2.9610323144988505e-06, |
| "loss": 0.1983, |
| "step": 2780 |
| }, |
| { |
| "epoch": 2.2885919835560125, |
| "grad_norm": 1.6463249921798706, |
| "learning_rate": 2.9289321881345257e-06, |
| "loss": 0.1903, |
| "step": 2785 |
| }, |
| { |
| "epoch": 2.2927029804727646, |
| "grad_norm": 1.6934734582901, |
| "learning_rate": 2.8969771278370105e-06, |
| "loss": 0.196, |
| "step": 2790 |
| }, |
| { |
| "epoch": 2.296813977389517, |
| "grad_norm": 0.9253600239753723, |
| "learning_rate": 2.8651677891784267e-06, |
| "loss": 0.1458, |
| "step": 2795 |
| }, |
| { |
| "epoch": 2.300924974306269, |
| "grad_norm": 1.6479263305664062, |
| "learning_rate": 2.833504824741349e-06, |
| "loss": 0.179, |
| "step": 2800 |
| }, |
| { |
| "epoch": 2.3050359712230217, |
| "grad_norm": 1.1921919584274292, |
| "learning_rate": 2.8019888841054166e-06, |
| "loss": 0.1749, |
| "step": 2805 |
| }, |
| { |
| "epoch": 2.309146968139774, |
| "grad_norm": 1.347475290298462, |
| "learning_rate": 2.770620613834023e-06, |
| "loss": 0.1989, |
| "step": 2810 |
| }, |
| { |
| "epoch": 2.3132579650565264, |
| "grad_norm": 1.2929786443710327, |
| "learning_rate": 2.73940065746103e-06, |
| "loss": 0.1993, |
| "step": 2815 |
| }, |
| { |
| "epoch": 2.3173689619732785, |
| "grad_norm": 1.3094338178634644, |
| "learning_rate": 2.708329655477575e-06, |
| "loss": 0.2202, |
| "step": 2820 |
| }, |
| { |
| "epoch": 2.321479958890031, |
| "grad_norm": 1.7936643362045288, |
| "learning_rate": 2.6774082453189296e-06, |
| "loss": 0.176, |
| "step": 2825 |
| }, |
| { |
| "epoch": 2.325590955806783, |
| "grad_norm": 1.5653270483016968, |
| "learning_rate": 2.646637061351429e-06, |
| "loss": 0.1869, |
| "step": 2830 |
| }, |
| { |
| "epoch": 2.3297019527235356, |
| "grad_norm": 1.5138646364212036, |
| "learning_rate": 2.6160167348594534e-06, |
| "loss": 0.1846, |
| "step": 2835 |
| }, |
| { |
| "epoch": 2.3338129496402877, |
| "grad_norm": 1.4555552005767822, |
| "learning_rate": 2.585547894032465e-06, |
| "loss": 0.1993, |
| "step": 2840 |
| }, |
| { |
| "epoch": 2.33792394655704, |
| "grad_norm": 1.6724660396575928, |
| "learning_rate": 2.5552311639521376e-06, |
| "loss": 0.1799, |
| "step": 2845 |
| }, |
| { |
| "epoch": 2.3420349434737924, |
| "grad_norm": 1.3273122310638428, |
| "learning_rate": 2.525067166579528e-06, |
| "loss": 0.2047, |
| "step": 2850 |
| }, |
| { |
| "epoch": 2.346145940390545, |
| "grad_norm": 1.859212040901184, |
| "learning_rate": 2.4950565207423116e-06, |
| "loss": 0.1683, |
| "step": 2855 |
| }, |
| { |
| "epoch": 2.350256937307297, |
| "grad_norm": 1.3489201068878174, |
| "learning_rate": 2.4651998421220847e-06, |
| "loss": 0.1577, |
| "step": 2860 |
| }, |
| { |
| "epoch": 2.354367934224049, |
| "grad_norm": 1.8253015279769897, |
| "learning_rate": 2.43549774324175e-06, |
| "loss": 0.1904, |
| "step": 2865 |
| }, |
| { |
| "epoch": 2.3584789311408016, |
| "grad_norm": 1.5466407537460327, |
| "learning_rate": 2.405950833452928e-06, |
| "loss": 0.1754, |
| "step": 2870 |
| }, |
| { |
| "epoch": 2.362589928057554, |
| "grad_norm": 1.4899457693099976, |
| "learning_rate": 2.3765597189234756e-06, |
| "loss": 0.1667, |
| "step": 2875 |
| }, |
| { |
| "epoch": 2.3667009249743063, |
| "grad_norm": 1.3761615753173828, |
| "learning_rate": 2.347325002625034e-06, |
| "loss": 0.184, |
| "step": 2880 |
| }, |
| { |
| "epoch": 2.3708119218910584, |
| "grad_norm": 1.7067718505859375, |
| "learning_rate": 2.3182472843206647e-06, |
| "loss": 0.148, |
| "step": 2885 |
| }, |
| { |
| "epoch": 2.374922918807811, |
| "grad_norm": 1.401242971420288, |
| "learning_rate": 2.289327160552559e-06, |
| "loss": 0.215, |
| "step": 2890 |
| }, |
| { |
| "epoch": 2.379033915724563, |
| "grad_norm": 1.429301142692566, |
| "learning_rate": 2.2605652246297737e-06, |
| "loss": 0.1692, |
| "step": 2895 |
| }, |
| { |
| "epoch": 2.3831449126413156, |
| "grad_norm": 0.8169743418693542, |
| "learning_rate": 2.2319620666160735e-06, |
| "loss": 0.184, |
| "step": 2900 |
| }, |
| { |
| "epoch": 2.3872559095580677, |
| "grad_norm": 2.1176815032958984, |
| "learning_rate": 2.203518273317835e-06, |
| "loss": 0.2451, |
| "step": 2905 |
| }, |
| { |
| "epoch": 2.39136690647482, |
| "grad_norm": 1.482387661933899, |
| "learning_rate": 2.175234428271984e-06, |
| "loss": 0.1903, |
| "step": 2910 |
| }, |
| { |
| "epoch": 2.3954779033915723, |
| "grad_norm": 1.2381435632705688, |
| "learning_rate": 2.1471111117340505e-06, |
| "loss": 0.1905, |
| "step": 2915 |
| }, |
| { |
| "epoch": 2.399588900308325, |
| "grad_norm": 1.7667262554168701, |
| "learning_rate": 2.1191489006662415e-06, |
| "loss": 0.1952, |
| "step": 2920 |
| }, |
| { |
| "epoch": 2.403699897225077, |
| "grad_norm": 1.508764386177063, |
| "learning_rate": 2.091348368725614e-06, |
| "loss": 0.1693, |
| "step": 2925 |
| }, |
| { |
| "epoch": 2.4078108941418295, |
| "grad_norm": 1.4408055543899536, |
| "learning_rate": 2.0637100862523186e-06, |
| "loss": 0.1635, |
| "step": 2930 |
| }, |
| { |
| "epoch": 2.4119218910585816, |
| "grad_norm": 1.7050563097000122, |
| "learning_rate": 2.0362346202578753e-06, |
| "loss": 0.197, |
| "step": 2935 |
| }, |
| { |
| "epoch": 2.416032887975334, |
| "grad_norm": 1.7732040882110596, |
| "learning_rate": 2.008922534413551e-06, |
| "loss": 0.2079, |
| "step": 2940 |
| }, |
| { |
| "epoch": 2.420143884892086, |
| "grad_norm": 1.7007726430892944, |
| "learning_rate": 1.9817743890388098e-06, |
| "loss": 0.1562, |
| "step": 2945 |
| }, |
| { |
| "epoch": 2.4242548818088387, |
| "grad_norm": 1.3671354055404663, |
| "learning_rate": 1.9547907410897902e-06, |
| "loss": 0.1806, |
| "step": 2950 |
| }, |
| { |
| "epoch": 2.428365878725591, |
| "grad_norm": 1.6518571376800537, |
| "learning_rate": 1.927972144147905e-06, |
| "loss": 0.1814, |
| "step": 2955 |
| }, |
| { |
| "epoch": 2.4324768756423434, |
| "grad_norm": 1.8467329740524292, |
| "learning_rate": 1.901319148408467e-06, |
| "loss": 0.1511, |
| "step": 2960 |
| }, |
| { |
| "epoch": 2.4365878725590955, |
| "grad_norm": 1.5873874425888062, |
| "learning_rate": 1.8748323006694058e-06, |
| "loss": 0.1925, |
| "step": 2965 |
| }, |
| { |
| "epoch": 2.440698869475848, |
| "grad_norm": 1.7341911792755127, |
| "learning_rate": 1.8485121443200594e-06, |
| "loss": 0.1746, |
| "step": 2970 |
| }, |
| { |
| "epoch": 2.4448098663926, |
| "grad_norm": 1.740069031715393, |
| "learning_rate": 1.8223592193300111e-06, |
| "loss": 0.145, |
| "step": 2975 |
| }, |
| { |
| "epoch": 2.4489208633093527, |
| "grad_norm": 1.0969732999801636, |
| "learning_rate": 1.7963740622380199e-06, |
| "loss": 0.1566, |
| "step": 2980 |
| }, |
| { |
| "epoch": 2.4530318602261048, |
| "grad_norm": 1.5586555004119873, |
| "learning_rate": 1.7705572061410204e-06, |
| "loss": 0.151, |
| "step": 2985 |
| }, |
| { |
| "epoch": 2.4571428571428573, |
| "grad_norm": 1.4157530069351196, |
| "learning_rate": 1.7449091806831664e-06, |
| "loss": 0.2008, |
| "step": 2990 |
| }, |
| { |
| "epoch": 2.4612538540596094, |
| "grad_norm": 1.7738921642303467, |
| "learning_rate": 1.7194305120449895e-06, |
| "loss": 0.1865, |
| "step": 2995 |
| }, |
| { |
| "epoch": 2.465364850976362, |
| "grad_norm": 2.144289970397949, |
| "learning_rate": 1.6941217229325812e-06, |
| "loss": 0.1891, |
| "step": 3000 |
| }, |
| { |
| "epoch": 2.469475847893114, |
| "grad_norm": 1.4171119928359985, |
| "learning_rate": 1.6689833325668814e-06, |
| "loss": 0.1725, |
| "step": 3005 |
| }, |
| { |
| "epoch": 2.4735868448098666, |
| "grad_norm": 1.459351897239685, |
| "learning_rate": 1.6440158566730314e-06, |
| "loss": 0.1578, |
| "step": 3010 |
| }, |
| { |
| "epoch": 2.4776978417266187, |
| "grad_norm": 1.8061171770095825, |
| "learning_rate": 1.619219807469785e-06, |
| "loss": 0.1696, |
| "step": 3015 |
| }, |
| { |
| "epoch": 2.481808838643371, |
| "grad_norm": 1.492310643196106, |
| "learning_rate": 1.5945956936589924e-06, |
| "loss": 0.1902, |
| "step": 3020 |
| }, |
| { |
| "epoch": 2.4859198355601233, |
| "grad_norm": 1.3936516046524048, |
| "learning_rate": 1.5701440204151864e-06, |
| "loss": 0.171, |
| "step": 3025 |
| }, |
| { |
| "epoch": 2.490030832476876, |
| "grad_norm": 1.264660120010376, |
| "learning_rate": 1.5458652893751959e-06, |
| "loss": 0.1473, |
| "step": 3030 |
| }, |
| { |
| "epoch": 2.494141829393628, |
| "grad_norm": 1.69791841506958, |
| "learning_rate": 1.521759998627873e-06, |
| "loss": 0.1703, |
| "step": 3035 |
| }, |
| { |
| "epoch": 2.49825282631038, |
| "grad_norm": 1.560808539390564, |
| "learning_rate": 1.4978286427038602e-06, |
| "loss": 0.1656, |
| "step": 3040 |
| }, |
| { |
| "epoch": 2.5023638232271326, |
| "grad_norm": 1.5153892040252686, |
| "learning_rate": 1.4740717125654492e-06, |
| "loss": 0.1831, |
| "step": 3045 |
| }, |
| { |
| "epoch": 2.506474820143885, |
| "grad_norm": 1.5965219736099243, |
| "learning_rate": 1.4504896955965152e-06, |
| "loss": 0.1464, |
| "step": 3050 |
| }, |
| { |
| "epoch": 2.510585817060637, |
| "grad_norm": 1.9182418584823608, |
| "learning_rate": 1.4270830755925148e-06, |
| "loss": 0.1896, |
| "step": 3055 |
| }, |
| { |
| "epoch": 2.5146968139773893, |
| "grad_norm": 1.592840552330017, |
| "learning_rate": 1.403852332750545e-06, |
| "loss": 0.1584, |
| "step": 3060 |
| }, |
| { |
| "epoch": 2.518807810894142, |
| "grad_norm": 1.7259905338287354, |
| "learning_rate": 1.3807979436595187e-06, |
| "loss": 0.1739, |
| "step": 3065 |
| }, |
| { |
| "epoch": 2.5229188078108944, |
| "grad_norm": 1.8401083946228027, |
| "learning_rate": 1.357920381290374e-06, |
| "loss": 0.2003, |
| "step": 3070 |
| }, |
| { |
| "epoch": 2.5270298047276465, |
| "grad_norm": 1.3413900136947632, |
| "learning_rate": 1.3352201149863631e-06, |
| "loss": 0.17, |
| "step": 3075 |
| }, |
| { |
| "epoch": 2.5311408016443986, |
| "grad_norm": 1.2386707067489624, |
| "learning_rate": 1.3126976104534362e-06, |
| "loss": 0.2034, |
| "step": 3080 |
| }, |
| { |
| "epoch": 2.535251798561151, |
| "grad_norm": 1.4070823192596436, |
| "learning_rate": 1.2903533297506787e-06, |
| "loss": 0.1438, |
| "step": 3085 |
| }, |
| { |
| "epoch": 2.5393627954779037, |
| "grad_norm": 1.8814294338226318, |
| "learning_rate": 1.268187731280842e-06, |
| "loss": 0.1869, |
| "step": 3090 |
| }, |
| { |
| "epoch": 2.5434737923946558, |
| "grad_norm": 1.6609413623809814, |
| "learning_rate": 1.2462012697809333e-06, |
| "loss": 0.1698, |
| "step": 3095 |
| }, |
| { |
| "epoch": 2.547584789311408, |
| "grad_norm": 1.7581918239593506, |
| "learning_rate": 1.2243943963128735e-06, |
| "loss": 0.1803, |
| "step": 3100 |
| }, |
| { |
| "epoch": 2.5516957862281604, |
| "grad_norm": 1.710852861404419, |
| "learning_rate": 1.2027675582542698e-06, |
| "loss": 0.2221, |
| "step": 3105 |
| }, |
| { |
| "epoch": 2.5558067831449125, |
| "grad_norm": 1.390642523765564, |
| "learning_rate": 1.1813211992892204e-06, |
| "loss": 0.1467, |
| "step": 3110 |
| }, |
| { |
| "epoch": 2.559917780061665, |
| "grad_norm": 1.2157833576202393, |
| "learning_rate": 1.1600557593992135e-06, |
| "loss": 0.146, |
| "step": 3115 |
| }, |
| { |
| "epoch": 2.564028776978417, |
| "grad_norm": 1.5769902467727661, |
| "learning_rate": 1.138971674854099e-06, |
| "loss": 0.1735, |
| "step": 3120 |
| }, |
| { |
| "epoch": 2.5681397738951697, |
| "grad_norm": 1.6799767017364502, |
| "learning_rate": 1.1180693782031516e-06, |
| "loss": 0.1735, |
| "step": 3125 |
| }, |
| { |
| "epoch": 2.5722507708119218, |
| "grad_norm": 1.628298282623291, |
| "learning_rate": 1.0973492982661792e-06, |
| "loss": 0.1828, |
| "step": 3130 |
| }, |
| { |
| "epoch": 2.5763617677286743, |
| "grad_norm": 1.8699073791503906, |
| "learning_rate": 1.0768118601247413e-06, |
| "loss": 0.1723, |
| "step": 3135 |
| }, |
| { |
| "epoch": 2.5804727646454264, |
| "grad_norm": 1.7919323444366455, |
| "learning_rate": 1.056457485113408e-06, |
| "loss": 0.1913, |
| "step": 3140 |
| }, |
| { |
| "epoch": 2.584583761562179, |
| "grad_norm": 1.3920973539352417, |
| "learning_rate": 1.0362865908111418e-06, |
| "loss": 0.1601, |
| "step": 3145 |
| }, |
| { |
| "epoch": 2.588694758478931, |
| "grad_norm": 1.3566086292266846, |
| "learning_rate": 1.0162995910327145e-06, |
| "loss": 0.1667, |
| "step": 3150 |
| }, |
| { |
| "epoch": 2.5928057553956836, |
| "grad_norm": 1.5238653421401978, |
| "learning_rate": 9.964968958202171e-07, |
| "loss": 0.1973, |
| "step": 3155 |
| }, |
| { |
| "epoch": 2.5969167523124357, |
| "grad_norm": 1.5982362031936646, |
| "learning_rate": 9.7687891143465e-07, |
| "loss": 0.1626, |
| "step": 3160 |
| }, |
| { |
| "epoch": 2.6010277492291882, |
| "grad_norm": 1.6868717670440674, |
| "learning_rate": 9.574460403475993e-07, |
| "loss": 0.1749, |
| "step": 3165 |
| }, |
| { |
| "epoch": 2.6051387461459403, |
| "grad_norm": 1.9734680652618408, |
| "learning_rate": 9.381986812329579e-07, |
| "loss": 0.199, |
| "step": 3170 |
| }, |
| { |
| "epoch": 2.609249743062693, |
| "grad_norm": 1.6164628267288208, |
| "learning_rate": 9.19137228958773e-07, |
| "loss": 0.1394, |
| "step": 3175 |
| }, |
| { |
| "epoch": 2.613360739979445, |
| "grad_norm": 1.8937848806381226, |
| "learning_rate": 9.002620745791147e-07, |
| "loss": 0.1667, |
| "step": 3180 |
| }, |
| { |
| "epoch": 2.6174717368961975, |
| "grad_norm": 1.7090622186660767, |
| "learning_rate": 8.815736053260826e-07, |
| "loss": 0.2311, |
| "step": 3185 |
| }, |
| { |
| "epoch": 2.6215827338129496, |
| "grad_norm": 1.7733856439590454, |
| "learning_rate": 8.630722046018458e-07, |
| "loss": 0.1749, |
| "step": 3190 |
| }, |
| { |
| "epoch": 2.6256937307297017, |
| "grad_norm": 2.024850368499756, |
| "learning_rate": 8.447582519707786e-07, |
| "loss": 0.1662, |
| "step": 3195 |
| }, |
| { |
| "epoch": 2.6298047276464542, |
| "grad_norm": 1.9420026540756226, |
| "learning_rate": 8.266321231516727e-07, |
| "loss": 0.1871, |
| "step": 3200 |
| }, |
| { |
| "epoch": 2.6339157245632068, |
| "grad_norm": 1.433854341506958, |
| "learning_rate": 8.086941900100387e-07, |
| "loss": 0.1751, |
| "step": 3205 |
| }, |
| { |
| "epoch": 2.638026721479959, |
| "grad_norm": 2.0369086265563965, |
| "learning_rate": 7.909448205504633e-07, |
| "loss": 0.1296, |
| "step": 3210 |
| }, |
| { |
| "epoch": 2.642137718396711, |
| "grad_norm": 1.7490417957305908, |
| "learning_rate": 7.733843789090722e-07, |
| "loss": 0.1592, |
| "step": 3215 |
| }, |
| { |
| "epoch": 2.6462487153134635, |
| "grad_norm": 1.821723222732544, |
| "learning_rate": 7.560132253460484e-07, |
| "loss": 0.2148, |
| "step": 3220 |
| }, |
| { |
| "epoch": 2.650359712230216, |
| "grad_norm": 1.665130615234375, |
| "learning_rate": 7.388317162382475e-07, |
| "loss": 0.14, |
| "step": 3225 |
| }, |
| { |
| "epoch": 2.654470709146968, |
| "grad_norm": 1.7118257284164429, |
| "learning_rate": 7.218402040718908e-07, |
| "loss": 0.1748, |
| "step": 3230 |
| }, |
| { |
| "epoch": 2.6585817060637202, |
| "grad_norm": 2.0985677242279053, |
| "learning_rate": 7.050390374353244e-07, |
| "loss": 0.1789, |
| "step": 3235 |
| }, |
| { |
| "epoch": 2.662692702980473, |
| "grad_norm": 1.4616549015045166, |
| "learning_rate": 6.884285610118702e-07, |
| "loss": 0.1791, |
| "step": 3240 |
| }, |
| { |
| "epoch": 2.6668036998972253, |
| "grad_norm": 1.662348985671997, |
| "learning_rate": 6.720091155727626e-07, |
| "loss": 0.2191, |
| "step": 3245 |
| }, |
| { |
| "epoch": 2.6709146968139774, |
| "grad_norm": 1.784906029701233, |
| "learning_rate": 6.557810379701446e-07, |
| "loss": 0.1645, |
| "step": 3250 |
| }, |
| { |
| "epoch": 2.6750256937307295, |
| "grad_norm": 1.4837048053741455, |
| "learning_rate": 6.397446611301705e-07, |
| "loss": 0.1963, |
| "step": 3255 |
| }, |
| { |
| "epoch": 2.679136690647482, |
| "grad_norm": 1.8312329053878784, |
| "learning_rate": 6.239003140461641e-07, |
| "loss": 0.1987, |
| "step": 3260 |
| }, |
| { |
| "epoch": 2.6832476875642346, |
| "grad_norm": 1.5286824703216553, |
| "learning_rate": 6.082483217718737e-07, |
| "loss": 0.1833, |
| "step": 3265 |
| }, |
| { |
| "epoch": 2.6873586844809867, |
| "grad_norm": 1.5359361171722412, |
| "learning_rate": 5.927890054148111e-07, |
| "loss": 0.2086, |
| "step": 3270 |
| }, |
| { |
| "epoch": 2.691469681397739, |
| "grad_norm": 1.8544061183929443, |
| "learning_rate": 5.775226821296487e-07, |
| "loss": 0.1975, |
| "step": 3275 |
| }, |
| { |
| "epoch": 2.6955806783144913, |
| "grad_norm": 1.8769956827163696, |
| "learning_rate": 5.624496651117251e-07, |
| "loss": 0.1692, |
| "step": 3280 |
| }, |
| { |
| "epoch": 2.6996916752312434, |
| "grad_norm": 1.2962095737457275, |
| "learning_rate": 5.475702635906166e-07, |
| "loss": 0.1495, |
| "step": 3285 |
| }, |
| { |
| "epoch": 2.703802672147996, |
| "grad_norm": 1.6315231323242188, |
| "learning_rate": 5.328847828237882e-07, |
| "loss": 0.1544, |
| "step": 3290 |
| }, |
| { |
| "epoch": 2.707913669064748, |
| "grad_norm": 1.7691444158554077, |
| "learning_rate": 5.183935240903415e-07, |
| "loss": 0.1694, |
| "step": 3295 |
| }, |
| { |
| "epoch": 2.7120246659815006, |
| "grad_norm": 1.34203040599823, |
| "learning_rate": 5.040967846848232e-07, |
| "loss": 0.1705, |
| "step": 3300 |
| }, |
| { |
| "epoch": 2.7161356628982527, |
| "grad_norm": 1.6373629570007324, |
| "learning_rate": 4.899948579111291e-07, |
| "loss": 0.1534, |
| "step": 3305 |
| }, |
| { |
| "epoch": 2.7202466598150052, |
| "grad_norm": 1.2242546081542969, |
| "learning_rate": 4.760880330764939e-07, |
| "loss": 0.1516, |
| "step": 3310 |
| }, |
| { |
| "epoch": 2.7243576567317573, |
| "grad_norm": 1.08647620677948, |
| "learning_rate": 4.6237659548554636e-07, |
| "loss": 0.1986, |
| "step": 3315 |
| }, |
| { |
| "epoch": 2.72846865364851, |
| "grad_norm": 1.6842622756958008, |
| "learning_rate": 4.488608264344574e-07, |
| "loss": 0.2017, |
| "step": 3320 |
| }, |
| { |
| "epoch": 2.732579650565262, |
| "grad_norm": 1.7128268480300903, |
| "learning_rate": 4.3554100320517767e-07, |
| "loss": 0.1598, |
| "step": 3325 |
| }, |
| { |
| "epoch": 2.7366906474820145, |
| "grad_norm": 1.817187786102295, |
| "learning_rate": 4.2241739905974243e-07, |
| "loss": 0.2204, |
| "step": 3330 |
| }, |
| { |
| "epoch": 2.7408016443987666, |
| "grad_norm": 1.6121926307678223, |
| "learning_rate": 4.094902832346581e-07, |
| "loss": 0.1805, |
| "step": 3335 |
| }, |
| { |
| "epoch": 2.744912641315519, |
| "grad_norm": 1.8614606857299805, |
| "learning_rate": 3.9675992093539674e-07, |
| "loss": 0.2132, |
| "step": 3340 |
| }, |
| { |
| "epoch": 2.7490236382322712, |
| "grad_norm": 1.5935100317001343, |
| "learning_rate": 3.8422657333093916e-07, |
| "loss": 0.1775, |
| "step": 3345 |
| }, |
| { |
| "epoch": 2.753134635149024, |
| "grad_norm": 1.5736486911773682, |
| "learning_rate": 3.718904975484283e-07, |
| "loss": 0.1941, |
| "step": 3350 |
| }, |
| { |
| "epoch": 2.757245632065776, |
| "grad_norm": 1.0311506986618042, |
| "learning_rate": 3.5975194666788224e-07, |
| "loss": 0.1708, |
| "step": 3355 |
| }, |
| { |
| "epoch": 2.7613566289825284, |
| "grad_norm": 1.4687517881393433, |
| "learning_rate": 3.478111697170128e-07, |
| "loss": 0.1694, |
| "step": 3360 |
| }, |
| { |
| "epoch": 2.7654676258992805, |
| "grad_norm": 1.7468496561050415, |
| "learning_rate": 3.360684116661117e-07, |
| "loss": 0.2014, |
| "step": 3365 |
| }, |
| { |
| "epoch": 2.7695786228160326, |
| "grad_norm": 1.520976185798645, |
| "learning_rate": 3.245239134230305e-07, |
| "loss": 0.1587, |
| "step": 3370 |
| }, |
| { |
| "epoch": 2.773689619732785, |
| "grad_norm": 2.00669527053833, |
| "learning_rate": 3.131779118282219e-07, |
| "loss": 0.1857, |
| "step": 3375 |
| }, |
| { |
| "epoch": 2.7778006166495377, |
| "grad_norm": 1.321519374847412, |
| "learning_rate": 3.020306396499062e-07, |
| "loss": 0.128, |
| "step": 3380 |
| }, |
| { |
| "epoch": 2.78191161356629, |
| "grad_norm": 1.5371605157852173, |
| "learning_rate": 2.9108232557927164e-07, |
| "loss": 0.1696, |
| "step": 3385 |
| }, |
| { |
| "epoch": 2.786022610483042, |
| "grad_norm": 1.4579428434371948, |
| "learning_rate": 2.803331942258003e-07, |
| "loss": 0.1653, |
| "step": 3390 |
| }, |
| { |
| "epoch": 2.7901336073997944, |
| "grad_norm": 1.6946520805358887, |
| "learning_rate": 2.6978346611265083e-07, |
| "loss": 0.2164, |
| "step": 3395 |
| }, |
| { |
| "epoch": 2.794244604316547, |
| "grad_norm": 1.9009003639221191, |
| "learning_rate": 2.594333576721331e-07, |
| "loss": 0.1721, |
| "step": 3400 |
| }, |
| { |
| "epoch": 2.798355601233299, |
| "grad_norm": 1.7347790002822876, |
| "learning_rate": 2.492830812412783e-07, |
| "loss": 0.1625, |
| "step": 3405 |
| }, |
| { |
| "epoch": 2.802466598150051, |
| "grad_norm": 1.2449841499328613, |
| "learning_rate": 2.393328450574728e-07, |
| "loss": 0.1681, |
| "step": 3410 |
| }, |
| { |
| "epoch": 2.8065775950668037, |
| "grad_norm": 2.043942451477051, |
| "learning_rate": 2.295828532541855e-07, |
| "loss": 0.2155, |
| "step": 3415 |
| }, |
| { |
| "epoch": 2.8106885919835563, |
| "grad_norm": 1.5729506015777588, |
| "learning_rate": 2.200333058567905e-07, |
| "loss": 0.1848, |
| "step": 3420 |
| }, |
| { |
| "epoch": 2.8147995889003083, |
| "grad_norm": 1.1342136859893799, |
| "learning_rate": 2.1068439877845237e-07, |
| "loss": 0.1788, |
| "step": 3425 |
| }, |
| { |
| "epoch": 2.8189105858170604, |
| "grad_norm": 1.41780424118042, |
| "learning_rate": 2.01536323816115e-07, |
| "loss": 0.2229, |
| "step": 3430 |
| }, |
| { |
| "epoch": 2.823021582733813, |
| "grad_norm": 1.172594428062439, |
| "learning_rate": 1.9258926864655692e-07, |
| "loss": 0.1501, |
| "step": 3435 |
| }, |
| { |
| "epoch": 2.8271325796505655, |
| "grad_norm": 1.7016807794570923, |
| "learning_rate": 1.8384341682255225e-07, |
| "loss": 0.1886, |
| "step": 3440 |
| }, |
| { |
| "epoch": 2.8312435765673176, |
| "grad_norm": 1.7683440446853638, |
| "learning_rate": 1.7529894776909917e-07, |
| "loss": 0.1846, |
| "step": 3445 |
| }, |
| { |
| "epoch": 2.8353545734840697, |
| "grad_norm": 1.4340758323669434, |
| "learning_rate": 1.669560367797396e-07, |
| "loss": 0.1786, |
| "step": 3450 |
| }, |
| { |
| "epoch": 2.8394655704008223, |
| "grad_norm": 1.2617582082748413, |
| "learning_rate": 1.588148550129609e-07, |
| "loss": 0.2281, |
| "step": 3455 |
| }, |
| { |
| "epoch": 2.8435765673175744, |
| "grad_norm": 1.28896164894104, |
| "learning_rate": 1.5087556948868876e-07, |
| "loss": 0.165, |
| "step": 3460 |
| }, |
| { |
| "epoch": 2.847687564234327, |
| "grad_norm": 0.8633646368980408, |
| "learning_rate": 1.4313834308486097e-07, |
| "loss": 0.1673, |
| "step": 3465 |
| }, |
| { |
| "epoch": 2.851798561151079, |
| "grad_norm": 1.3453377485275269, |
| "learning_rate": 1.3560333453407682e-07, |
| "loss": 0.1887, |
| "step": 3470 |
| }, |
| { |
| "epoch": 2.8559095580678315, |
| "grad_norm": 1.9705557823181152, |
| "learning_rate": 1.2827069842035412e-07, |
| "loss": 0.1759, |
| "step": 3475 |
| }, |
| { |
| "epoch": 2.8600205549845836, |
| "grad_norm": 1.7831838130950928, |
| "learning_rate": 1.211405851759484e-07, |
| "loss": 0.1835, |
| "step": 3480 |
| }, |
| { |
| "epoch": 2.864131551901336, |
| "grad_norm": 1.3642257452011108, |
| "learning_rate": 1.1421314107826764e-07, |
| "loss": 0.2027, |
| "step": 3485 |
| }, |
| { |
| "epoch": 2.8682425488180883, |
| "grad_norm": 1.5517573356628418, |
| "learning_rate": 1.0748850824687795e-07, |
| "loss": 0.1419, |
| "step": 3490 |
| }, |
| { |
| "epoch": 2.872353545734841, |
| "grad_norm": 1.8059098720550537, |
| "learning_rate": 1.0096682464057706e-07, |
| "loss": 0.1744, |
| "step": 3495 |
| }, |
| { |
| "epoch": 2.876464542651593, |
| "grad_norm": 1.645180344581604, |
| "learning_rate": 9.46482240545743e-08, |
| "loss": 0.1418, |
| "step": 3500 |
| }, |
| { |
| "epoch": 2.8805755395683454, |
| "grad_norm": 1.353611946105957, |
| "learning_rate": 8.853283611774177e-08, |
| "loss": 0.1595, |
| "step": 3505 |
| }, |
| { |
| "epoch": 2.8846865364850975, |
| "grad_norm": 1.6561661958694458, |
| "learning_rate": 8.262078628995085e-08, |
| "loss": 0.1823, |
| "step": 3510 |
| }, |
| { |
| "epoch": 2.88879753340185, |
| "grad_norm": 1.1401209831237793, |
| "learning_rate": 7.691219585950538e-08, |
| "loss": 0.1914, |
| "step": 3515 |
| }, |
| { |
| "epoch": 2.892908530318602, |
| "grad_norm": 1.54417085647583, |
| "learning_rate": 7.140718194065033e-08, |
| "loss": 0.2103, |
| "step": 3520 |
| }, |
| { |
| "epoch": 2.8970195272353547, |
| "grad_norm": 1.6267364025115967, |
| "learning_rate": 6.610585747116705e-08, |
| "loss": 0.1746, |
| "step": 3525 |
| }, |
| { |
| "epoch": 2.901130524152107, |
| "grad_norm": 1.1939152479171753, |
| "learning_rate": 6.100833121005956e-08, |
| "loss": 0.1652, |
| "step": 3530 |
| }, |
| { |
| "epoch": 2.9052415210688594, |
| "grad_norm": 1.812522053718567, |
| "learning_rate": 5.6114707735320795e-08, |
| "loss": 0.1894, |
| "step": 3535 |
| }, |
| { |
| "epoch": 2.9093525179856115, |
| "grad_norm": 1.9067127704620361, |
| "learning_rate": 5.142508744178987e-08, |
| "loss": 0.1686, |
| "step": 3540 |
| }, |
| { |
| "epoch": 2.9134635149023635, |
| "grad_norm": 1.1483782529830933, |
| "learning_rate": 4.6939566539089265e-08, |
| "loss": 0.168, |
| "step": 3545 |
| }, |
| { |
| "epoch": 2.917574511819116, |
| "grad_norm": 2.0629680156707764, |
| "learning_rate": 4.2658237049655325e-08, |
| "loss": 0.1886, |
| "step": 3550 |
| }, |
| { |
| "epoch": 2.9216855087358686, |
| "grad_norm": 1.3101327419281006, |
| "learning_rate": 3.8581186806843086e-08, |
| "loss": 0.2038, |
| "step": 3555 |
| }, |
| { |
| "epoch": 2.9257965056526207, |
| "grad_norm": 1.8617042303085327, |
| "learning_rate": 3.470849945313548e-08, |
| "loss": 0.1801, |
| "step": 3560 |
| }, |
| { |
| "epoch": 2.929907502569373, |
| "grad_norm": 1.6082433462142944, |
| "learning_rate": 3.104025443841363e-08, |
| "loss": 0.1972, |
| "step": 3565 |
| }, |
| { |
| "epoch": 2.9340184994861254, |
| "grad_norm": 1.5044277906417847, |
| "learning_rate": 2.757652701834035e-08, |
| "loss": 0.1869, |
| "step": 3570 |
| }, |
| { |
| "epoch": 2.938129496402878, |
| "grad_norm": 1.6845835447311401, |
| "learning_rate": 2.431738825280583e-08, |
| "loss": 0.1965, |
| "step": 3575 |
| }, |
| { |
| "epoch": 2.94224049331963, |
| "grad_norm": 1.7309244871139526, |
| "learning_rate": 2.1262905004475477e-08, |
| "loss": 0.1399, |
| "step": 3580 |
| }, |
| { |
| "epoch": 2.946351490236382, |
| "grad_norm": 1.3211627006530762, |
| "learning_rate": 1.8413139937418776e-08, |
| "loss": 0.2233, |
| "step": 3585 |
| }, |
| { |
| "epoch": 2.9504624871531346, |
| "grad_norm": 1.3240909576416016, |
| "learning_rate": 1.5768151515818118e-08, |
| "loss": 0.1891, |
| "step": 3590 |
| }, |
| { |
| "epoch": 2.954573484069887, |
| "grad_norm": 1.5092717409133911, |
| "learning_rate": 1.332799400277418e-08, |
| "loss": 0.1894, |
| "step": 3595 |
| }, |
| { |
| "epoch": 2.9586844809866393, |
| "grad_norm": 1.5229946374893188, |
| "learning_rate": 1.1092717459192381e-08, |
| "loss": 0.1709, |
| "step": 3600 |
| }, |
| { |
| "epoch": 2.9627954779033914, |
| "grad_norm": 1.7171857357025146, |
| "learning_rate": 9.062367742754819e-09, |
| "loss": 0.2038, |
| "step": 3605 |
| }, |
| { |
| "epoch": 2.966906474820144, |
| "grad_norm": 1.5181477069854736, |
| "learning_rate": 7.236986506978793e-09, |
| "loss": 0.2173, |
| "step": 3610 |
| }, |
| { |
| "epoch": 2.9710174717368965, |
| "grad_norm": 1.5067954063415527, |
| "learning_rate": 5.616611200364164e-09, |
| "loss": 0.1744, |
| "step": 3615 |
| }, |
| { |
| "epoch": 2.9751284686536486, |
| "grad_norm": 1.9040520191192627, |
| "learning_rate": 4.201275065620625e-09, |
| "loss": 0.1684, |
| "step": 3620 |
| }, |
| { |
| "epoch": 2.9792394655704006, |
| "grad_norm": 1.7340034246444702, |
| "learning_rate": 2.991007138993807e-09, |
| "loss": 0.1679, |
| "step": 3625 |
| }, |
| { |
| "epoch": 2.983350462487153, |
| "grad_norm": 1.7730134725570679, |
| "learning_rate": 1.985832249662423e-09, |
| "loss": 0.2154, |
| "step": 3630 |
| }, |
| { |
| "epoch": 2.9874614594039053, |
| "grad_norm": 2.1094627380371094, |
| "learning_rate": 1.185771019230897e-09, |
| "loss": 0.1583, |
| "step": 3635 |
| }, |
| { |
| "epoch": 2.991572456320658, |
| "grad_norm": 1.5417195558547974, |
| "learning_rate": 5.908398613074795e-10, |
| "loss": 0.1622, |
| "step": 3640 |
| }, |
| { |
| "epoch": 2.99568345323741, |
| "grad_norm": 1.7417429685592651, |
| "learning_rate": 2.0105098116673938e-10, |
| "loss": 0.1632, |
| "step": 3645 |
| }, |
| { |
| "epoch": 2.9997944501541625, |
| "grad_norm": 1.8266246318817139, |
| "learning_rate": 1.64123754997636e-11, |
| "loss": 0.2147, |
| "step": 3650 |
| }, |
| { |
| "epoch": 3.0, |
| "step": 3651, |
| "total_flos": 1.482904391886766e+18, |
| "train_loss": 0.2381349169034169, |
| "train_runtime": 7521.888, |
| "train_samples_per_second": 1.94, |
| "train_steps_per_second": 0.485 |
| } |
| ], |
| "logging_steps": 5, |
| "max_steps": 3651, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 3, |
| "save_steps": 400, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1.482904391886766e+18, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|