{ "best_metric": 0.8927125930786133, "best_model_checkpoint": "/workspace/previous_works/MedBLIP/output/MedBLIP-0005/checkpoint-34364", "epoch": 1.80001047614059, "eval_steps": 17182, "global_step": 34364, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0030380807710439473, "grad_norm": 119.46666717529297, "learning_rate": 1.3268156424581008e-07, "loss": 5.0724, "step": 58 }, { "epoch": 0.006076161542087895, "grad_norm": 34.45277786254883, "learning_rate": 3.3519553072625703e-07, "loss": 3.6479, "step": 116 }, { "epoch": 0.009114242313131841, "grad_norm": 14.232598304748535, "learning_rate": 5.37709497206704e-07, "loss": 2.2356, "step": 174 }, { "epoch": 0.01215232308417579, "grad_norm": 10.709288597106934, "learning_rate": 7.402234636871509e-07, "loss": 1.9953, "step": 232 }, { "epoch": 0.015190403855219737, "grad_norm": 7.259119987487793, "learning_rate": 9.427374301675979e-07, "loss": 1.8188, "step": 290 }, { "epoch": 0.018228484626263683, "grad_norm": 6.452203273773193, "learning_rate": 1.1452513966480447e-06, "loss": 1.5639, "step": 348 }, { "epoch": 0.021266565397307632, "grad_norm": 9.458000183105469, "learning_rate": 1.3477653631284918e-06, "loss": 1.504, "step": 406 }, { "epoch": 0.02430464616835158, "grad_norm": 6.162489414215088, "learning_rate": 1.5502793296089386e-06, "loss": 1.2581, "step": 464 }, { "epoch": 0.027342726939395528, "grad_norm": 6.684648513793945, "learning_rate": 1.7527932960893857e-06, "loss": 1.1713, "step": 522 }, { "epoch": 0.030380807710439474, "grad_norm": 6.966240882873535, "learning_rate": 1.9553072625698325e-06, "loss": 1.0504, "step": 580 }, { "epoch": 0.03341888848148342, "grad_norm": 6.379108905792236, "learning_rate": 2.1578212290502796e-06, "loss": 0.968, "step": 638 }, { "epoch": 0.036456969252527366, "grad_norm": 5.3792619705200195, "learning_rate": 2.3603351955307262e-06, "loss": 0.9209, "step": 696 }, { "epoch": 0.039495050023571315, "grad_norm": 6.413719177246094, "learning_rate": 2.5628491620111733e-06, "loss": 0.8723, "step": 754 }, { "epoch": 0.042533130794615265, "grad_norm": 8.826435089111328, "learning_rate": 2.7653631284916204e-06, "loss": 0.8652, "step": 812 }, { "epoch": 0.045571211565659214, "grad_norm": 6.294381618499756, "learning_rate": 2.9678770949720674e-06, "loss": 0.8099, "step": 870 }, { "epoch": 0.04860929233670316, "grad_norm": 7.355430603027344, "learning_rate": 3.170391061452514e-06, "loss": 0.7396, "step": 928 }, { "epoch": 0.051647373107747106, "grad_norm": 10.120753288269043, "learning_rate": 3.372905027932961e-06, "loss": 0.7511, "step": 986 }, { "epoch": 0.054685453878791056, "grad_norm": 5.653336048126221, "learning_rate": 3.575418994413408e-06, "loss": 0.6688, "step": 1044 }, { "epoch": 0.057723534649835, "grad_norm": 5.749114990234375, "learning_rate": 3.7779329608938552e-06, "loss": 0.677, "step": 1102 }, { "epoch": 0.06076161542087895, "grad_norm": 7.65744686126709, "learning_rate": 3.980446927374302e-06, "loss": 0.7195, "step": 1160 }, { "epoch": 0.0637996961919229, "grad_norm": 9.296794891357422, "learning_rate": 4.1829608938547485e-06, "loss": 0.7598, "step": 1218 }, { "epoch": 0.06683777696296685, "grad_norm": 6.070080757141113, "learning_rate": 4.385474860335196e-06, "loss": 0.7015, "step": 1276 }, { "epoch": 0.0698758577340108, "grad_norm": 6.638489723205566, "learning_rate": 4.5879888268156435e-06, "loss": 0.6778, "step": 1334 }, { "epoch": 0.07291393850505473, "grad_norm": 9.076967239379883, "learning_rate": 4.790502793296089e-06, "loss": 0.5551, "step": 1392 }, { "epoch": 0.07595201927609868, "grad_norm": 6.929805278778076, "learning_rate": 4.993016759776537e-06, "loss": 0.6792, "step": 1450 }, { "epoch": 0.07899010004714263, "grad_norm": 8.316506385803223, "learning_rate": 5.195530726256983e-06, "loss": 0.6379, "step": 1508 }, { "epoch": 0.08202818081818658, "grad_norm": 9.471745491027832, "learning_rate": 5.398044692737431e-06, "loss": 0.6242, "step": 1566 }, { "epoch": 0.08506626158923053, "grad_norm": 6.022659778594971, "learning_rate": 5.6005586592178775e-06, "loss": 0.6368, "step": 1624 }, { "epoch": 0.08810434236027448, "grad_norm": 7.15187406539917, "learning_rate": 5.803072625698325e-06, "loss": 0.5381, "step": 1682 }, { "epoch": 0.09114242313131843, "grad_norm": 7.10537052154541, "learning_rate": 6.005586592178772e-06, "loss": 0.6885, "step": 1740 }, { "epoch": 0.09418050390236236, "grad_norm": 5.685272216796875, "learning_rate": 6.208100558659218e-06, "loss": 0.6066, "step": 1798 }, { "epoch": 0.09721858467340631, "grad_norm": 6.733754634857178, "learning_rate": 6.410614525139666e-06, "loss": 0.6124, "step": 1856 }, { "epoch": 0.10025666544445026, "grad_norm": 6.112730026245117, "learning_rate": 6.613128491620112e-06, "loss": 0.6224, "step": 1914 }, { "epoch": 0.10329474621549421, "grad_norm": 5.784328460693359, "learning_rate": 6.815642458100559e-06, "loss": 0.6289, "step": 1972 }, { "epoch": 0.10633282698653816, "grad_norm": 9.69115924835205, "learning_rate": 7.0181564245810065e-06, "loss": 0.5952, "step": 2030 }, { "epoch": 0.10937090775758211, "grad_norm": 5.509926795959473, "learning_rate": 7.220670391061453e-06, "loss": 0.6288, "step": 2088 }, { "epoch": 0.11240898852862606, "grad_norm": 6.545931339263916, "learning_rate": 7.423184357541901e-06, "loss": 0.5243, "step": 2146 }, { "epoch": 0.11544706929967, "grad_norm": 7.0921173095703125, "learning_rate": 7.625698324022347e-06, "loss": 0.5527, "step": 2204 }, { "epoch": 0.11848515007071395, "grad_norm": 5.278844833374023, "learning_rate": 7.828212290502794e-06, "loss": 0.6739, "step": 2262 }, { "epoch": 0.1215232308417579, "grad_norm": 5.799619197845459, "learning_rate": 8.03072625698324e-06, "loss": 0.5913, "step": 2320 }, { "epoch": 0.12456131161280184, "grad_norm": 7.148493766784668, "learning_rate": 8.233240223463687e-06, "loss": 0.6117, "step": 2378 }, { "epoch": 0.1275993923838458, "grad_norm": 5.538400650024414, "learning_rate": 8.435754189944135e-06, "loss": 0.5512, "step": 2436 }, { "epoch": 0.13063747315488974, "grad_norm": 5.364485740661621, "learning_rate": 8.638268156424582e-06, "loss": 0.5458, "step": 2494 }, { "epoch": 0.1336755539259337, "grad_norm": 7.3765549659729, "learning_rate": 8.840782122905029e-06, "loss": 0.5847, "step": 2552 }, { "epoch": 0.13671363469697764, "grad_norm": 5.364510536193848, "learning_rate": 9.043296089385475e-06, "loss": 0.542, "step": 2610 }, { "epoch": 0.1397517154680216, "grad_norm": 3.656923770904541, "learning_rate": 9.245810055865922e-06, "loss": 0.5528, "step": 2668 }, { "epoch": 0.14278979623906554, "grad_norm": 3.117631435394287, "learning_rate": 9.448324022346369e-06, "loss": 0.5793, "step": 2726 }, { "epoch": 0.14582787701010946, "grad_norm": 6.822358131408691, "learning_rate": 9.650837988826817e-06, "loss": 0.5967, "step": 2784 }, { "epoch": 0.1488659577811534, "grad_norm": 4.629315376281738, "learning_rate": 9.853351955307264e-06, "loss": 0.5945, "step": 2842 }, { "epoch": 0.15190403855219736, "grad_norm": 3.6398866176605225, "learning_rate": 1.005586592178771e-05, "loss": 0.6005, "step": 2900 }, { "epoch": 0.1549421193232413, "grad_norm": 5.811204433441162, "learning_rate": 1.0258379888268157e-05, "loss": 0.5065, "step": 2958 }, { "epoch": 0.15798020009428526, "grad_norm": 3.8507301807403564, "learning_rate": 1.0460893854748604e-05, "loss": 0.5802, "step": 3016 }, { "epoch": 0.1610182808653292, "grad_norm": 5.666468143463135, "learning_rate": 1.066340782122905e-05, "loss": 0.5342, "step": 3074 }, { "epoch": 0.16405636163637316, "grad_norm": 2.025376558303833, "learning_rate": 1.0865921787709498e-05, "loss": 0.5271, "step": 3132 }, { "epoch": 0.1670944424074171, "grad_norm": 5.698912143707275, "learning_rate": 1.1068435754189945e-05, "loss": 0.538, "step": 3190 }, { "epoch": 0.17013252317846106, "grad_norm": 4.067931652069092, "learning_rate": 1.1270949720670392e-05, "loss": 0.5601, "step": 3248 }, { "epoch": 0.173170603949505, "grad_norm": 5.068817138671875, "learning_rate": 1.1473463687150838e-05, "loss": 0.5528, "step": 3306 }, { "epoch": 0.17620868472054896, "grad_norm": 7.116920471191406, "learning_rate": 1.1675977653631285e-05, "loss": 0.5418, "step": 3364 }, { "epoch": 0.1792467654915929, "grad_norm": 8.113608360290527, "learning_rate": 1.1878491620111732e-05, "loss": 0.5196, "step": 3422 }, { "epoch": 0.18228484626263686, "grad_norm": 4.820245265960693, "learning_rate": 1.208100558659218e-05, "loss": 0.6029, "step": 3480 }, { "epoch": 0.18532292703368078, "grad_norm": 1.1077276468276978, "learning_rate": 1.2283519553072627e-05, "loss": 0.5779, "step": 3538 }, { "epoch": 0.18836100780472473, "grad_norm": 3.024624824523926, "learning_rate": 1.2486033519553073e-05, "loss": 0.5091, "step": 3596 }, { "epoch": 0.19139908857576868, "grad_norm": 8.059369087219238, "learning_rate": 1.268854748603352e-05, "loss": 0.5012, "step": 3654 }, { "epoch": 0.19443716934681263, "grad_norm": 3.9895098209381104, "learning_rate": 1.2891061452513967e-05, "loss": 0.578, "step": 3712 }, { "epoch": 0.19747525011785658, "grad_norm": 7.111061096191406, "learning_rate": 1.3093575418994415e-05, "loss": 0.4958, "step": 3770 }, { "epoch": 0.20051333088890053, "grad_norm": 5.541796684265137, "learning_rate": 1.3296089385474861e-05, "loss": 0.589, "step": 3828 }, { "epoch": 0.20355141165994448, "grad_norm": 4.365527629852295, "learning_rate": 1.3498603351955308e-05, "loss": 0.6003, "step": 3886 }, { "epoch": 0.20658949243098843, "grad_norm": 4.486824035644531, "learning_rate": 1.3701117318435755e-05, "loss": 0.6135, "step": 3944 }, { "epoch": 0.20962757320203237, "grad_norm": 5.487951278686523, "learning_rate": 1.3903631284916201e-05, "loss": 0.6234, "step": 4002 }, { "epoch": 0.21266565397307632, "grad_norm": 2.3850884437561035, "learning_rate": 1.410614525139665e-05, "loss": 0.5536, "step": 4060 }, { "epoch": 0.21570373474412027, "grad_norm": 3.7957749366760254, "learning_rate": 1.4305167597765364e-05, "loss": 0.5211, "step": 4118 }, { "epoch": 0.21874181551516422, "grad_norm": 5.272162437438965, "learning_rate": 1.450768156424581e-05, "loss": 0.4992, "step": 4176 }, { "epoch": 0.22177989628620817, "grad_norm": 4.292142868041992, "learning_rate": 1.4710195530726259e-05, "loss": 0.5124, "step": 4234 }, { "epoch": 0.22481797705725212, "grad_norm": 4.948460102081299, "learning_rate": 1.4912709497206705e-05, "loss": 0.5575, "step": 4292 }, { "epoch": 0.22785605782829604, "grad_norm": 3.456590414047241, "learning_rate": 1.5115223463687152e-05, "loss": 0.5672, "step": 4350 }, { "epoch": 0.23089413859934, "grad_norm": 5.341044902801514, "learning_rate": 1.53177374301676e-05, "loss": 0.5106, "step": 4408 }, { "epoch": 0.23393221937038394, "grad_norm": 5.5106987953186035, "learning_rate": 1.5520251396648043e-05, "loss": 0.6037, "step": 4466 }, { "epoch": 0.2369703001414279, "grad_norm": 4.941389560699463, "learning_rate": 1.5722765363128495e-05, "loss": 0.5995, "step": 4524 }, { "epoch": 0.24000838091247184, "grad_norm": 3.719957113265991, "learning_rate": 1.592527932960894e-05, "loss": 0.5531, "step": 4582 }, { "epoch": 0.2430464616835158, "grad_norm": 4.435623645782471, "learning_rate": 1.612779329608939e-05, "loss": 0.5502, "step": 4640 }, { "epoch": 0.24608454245455974, "grad_norm": 4.688556671142578, "learning_rate": 1.6330307262569833e-05, "loss": 0.5056, "step": 4698 }, { "epoch": 0.2491226232256037, "grad_norm": 5.511931896209717, "learning_rate": 1.653282122905028e-05, "loss": 0.5242, "step": 4756 }, { "epoch": 0.25216070399664764, "grad_norm": 4.933206558227539, "learning_rate": 1.673533519553073e-05, "loss": 0.5115, "step": 4814 }, { "epoch": 0.2551987847676916, "grad_norm": 2.942838191986084, "learning_rate": 1.6937849162011175e-05, "loss": 0.5941, "step": 4872 }, { "epoch": 0.25823686553873554, "grad_norm": 4.0710625648498535, "learning_rate": 1.7140363128491623e-05, "loss": 0.4887, "step": 4930 }, { "epoch": 0.2612749463097795, "grad_norm": 3.212920665740967, "learning_rate": 1.7342877094972068e-05, "loss": 0.5051, "step": 4988 }, { "epoch": 0.26431302708082344, "grad_norm": 4.390661716461182, "learning_rate": 1.7545391061452513e-05, "loss": 0.5735, "step": 5046 }, { "epoch": 0.2673511078518674, "grad_norm": 3.784395217895508, "learning_rate": 1.7747905027932965e-05, "loss": 0.4847, "step": 5104 }, { "epoch": 0.27038918862291134, "grad_norm": 4.238777160644531, "learning_rate": 1.795041899441341e-05, "loss": 0.4303, "step": 5162 }, { "epoch": 0.2734272693939553, "grad_norm": 4.616554260253906, "learning_rate": 1.8152932960893855e-05, "loss": 0.53, "step": 5220 }, { "epoch": 0.27646535016499924, "grad_norm": 3.0670206546783447, "learning_rate": 1.8355446927374303e-05, "loss": 0.5089, "step": 5278 }, { "epoch": 0.2795034309360432, "grad_norm": 5.144998550415039, "learning_rate": 1.8557960893854748e-05, "loss": 0.5352, "step": 5336 }, { "epoch": 0.28254151170708713, "grad_norm": 3.9276976585388184, "learning_rate": 1.8760474860335196e-05, "loss": 0.5001, "step": 5394 }, { "epoch": 0.2855795924781311, "grad_norm": 4.67507266998291, "learning_rate": 1.8962988826815645e-05, "loss": 0.5106, "step": 5452 }, { "epoch": 0.288617673249175, "grad_norm": 11.027061462402344, "learning_rate": 1.916550279329609e-05, "loss": 0.5311, "step": 5510 }, { "epoch": 0.2916557540202189, "grad_norm": 4.6279802322387695, "learning_rate": 1.9368016759776538e-05, "loss": 0.4722, "step": 5568 }, { "epoch": 0.2946938347912629, "grad_norm": 3.2744059562683105, "learning_rate": 1.9570530726256983e-05, "loss": 0.5435, "step": 5626 }, { "epoch": 0.2977319155623068, "grad_norm": 4.361588478088379, "learning_rate": 1.977304469273743e-05, "loss": 0.5182, "step": 5684 }, { "epoch": 0.3007699963333508, "grad_norm": 4.725919246673584, "learning_rate": 1.997555865921788e-05, "loss": 0.5108, "step": 5742 }, { "epoch": 0.3038080771043947, "grad_norm": 4.126678943634033, "learning_rate": 1.999995169004151e-05, "loss": 0.4113, "step": 5800 }, { "epoch": 0.3068461578754387, "grad_norm": 4.924627780914307, "learning_rate": 1.999977932757864e-05, "loss": 0.5911, "step": 5858 }, { "epoch": 0.3098842386464826, "grad_norm": 3.836568832397461, "learning_rate": 1.9999482004657697e-05, "loss": 0.5589, "step": 5916 }, { "epoch": 0.3129223194175266, "grad_norm": 2.1909499168395996, "learning_rate": 1.999905972499412e-05, "loss": 0.5321, "step": 5974 }, { "epoch": 0.3159604001885705, "grad_norm": 4.15761661529541, "learning_rate": 1.9998512493864858e-05, "loss": 0.4898, "step": 6032 }, { "epoch": 0.31899848095961447, "grad_norm": 4.483209133148193, "learning_rate": 1.9997840318108285e-05, "loss": 0.5339, "step": 6090 }, { "epoch": 0.3220365617306584, "grad_norm": 4.208775997161865, "learning_rate": 1.9997058007847493e-05, "loss": 0.4381, "step": 6148 }, { "epoch": 0.32507464250170237, "grad_norm": 4.7916951179504395, "learning_rate": 1.999613812340473e-05, "loss": 0.5622, "step": 6206 }, { "epoch": 0.3281127232727463, "grad_norm": 3.041353702545166, "learning_rate": 1.999509332400555e-05, "loss": 0.5797, "step": 6264 }, { "epoch": 0.33115080404379027, "grad_norm": 2.720505714416504, "learning_rate": 1.999392362270611e-05, "loss": 0.5213, "step": 6322 }, { "epoch": 0.3341888848148342, "grad_norm": 4.793097496032715, "learning_rate": 1.999262903412336e-05, "loss": 0.4593, "step": 6380 }, { "epoch": 0.33722696558587817, "grad_norm": 3.890002489089966, "learning_rate": 1.999120957443491e-05, "loss": 0.4486, "step": 6438 }, { "epoch": 0.3402650463569221, "grad_norm": 3.537182569503784, "learning_rate": 1.9989665261378772e-05, "loss": 0.4879, "step": 6496 }, { "epoch": 0.34330312712796607, "grad_norm": 1.6273483037948608, "learning_rate": 1.998799611425319e-05, "loss": 0.483, "step": 6554 }, { "epoch": 0.34634120789901, "grad_norm": 1.544161081314087, "learning_rate": 1.9986202153916356e-05, "loss": 0.5295, "step": 6612 }, { "epoch": 0.34937928867005397, "grad_norm": 4.516360759735107, "learning_rate": 1.9984283402786177e-05, "loss": 0.5544, "step": 6670 }, { "epoch": 0.3524173694410979, "grad_norm": 3.9603912830352783, "learning_rate": 1.998223988483998e-05, "loss": 0.5005, "step": 6728 }, { "epoch": 0.35545545021214187, "grad_norm": 4.063785076141357, "learning_rate": 1.998007162561423e-05, "loss": 0.5339, "step": 6786 }, { "epoch": 0.3584935309831858, "grad_norm": 4.825593948364258, "learning_rate": 1.9977778652204192e-05, "loss": 0.4702, "step": 6844 }, { "epoch": 0.36153161175422976, "grad_norm": 0.5379557013511658, "learning_rate": 1.997536099326359e-05, "loss": 0.5397, "step": 6902 }, { "epoch": 0.3645696925252737, "grad_norm": 3.922156810760498, "learning_rate": 1.9972818679004273e-05, "loss": 0.5663, "step": 6960 }, { "epoch": 0.36760777329631766, "grad_norm": 3.376941442489624, "learning_rate": 1.9970198778515604e-05, "loss": 0.5321, "step": 7018 }, { "epoch": 0.37064585406736156, "grad_norm": 4.569897174835205, "learning_rate": 1.9967409398301135e-05, "loss": 0.4517, "step": 7076 }, { "epoch": 0.3736839348384055, "grad_norm": 4.267284393310547, "learning_rate": 1.9964495462133642e-05, "loss": 0.5225, "step": 7134 }, { "epoch": 0.37672201560944946, "grad_norm": 4.1275506019592285, "learning_rate": 1.9961457006426603e-05, "loss": 0.5007, "step": 7192 }, { "epoch": 0.3797600963804934, "grad_norm": 4.481261253356934, "learning_rate": 1.995829406914954e-05, "loss": 0.4754, "step": 7250 }, { "epoch": 0.38279817715153736, "grad_norm": 2.876922845840454, "learning_rate": 1.995500668982753e-05, "loss": 0.4729, "step": 7308 }, { "epoch": 0.3858362579225813, "grad_norm": 1.0541763305664062, "learning_rate": 1.9951594909540727e-05, "loss": 0.5697, "step": 7366 }, { "epoch": 0.38887433869362525, "grad_norm": 3.462268114089966, "learning_rate": 1.9948058770923837e-05, "loss": 0.4803, "step": 7424 }, { "epoch": 0.3919124194646692, "grad_norm": 3.209782123565674, "learning_rate": 1.9944398318165578e-05, "loss": 0.5239, "step": 7482 }, { "epoch": 0.39495050023571315, "grad_norm": 4.3836445808410645, "learning_rate": 1.994061359700815e-05, "loss": 0.5096, "step": 7540 }, { "epoch": 0.3979885810067571, "grad_norm": 2.8133575916290283, "learning_rate": 1.9936704654746642e-05, "loss": 0.4546, "step": 7598 }, { "epoch": 0.40102666177780105, "grad_norm": 3.709463596343994, "learning_rate": 1.9932671540228456e-05, "loss": 0.4882, "step": 7656 }, { "epoch": 0.404064742548845, "grad_norm": 3.9174060821533203, "learning_rate": 1.992851430385269e-05, "loss": 0.4311, "step": 7714 }, { "epoch": 0.40710282331988895, "grad_norm": 2.9282238483428955, "learning_rate": 1.99242329975695e-05, "loss": 0.5204, "step": 7772 }, { "epoch": 0.4101409040909329, "grad_norm": 4.139567852020264, "learning_rate": 1.9919827674879473e-05, "loss": 0.4739, "step": 7830 }, { "epoch": 0.41317898486197685, "grad_norm": 3.436636447906494, "learning_rate": 1.9915298390832935e-05, "loss": 0.4838, "step": 7888 }, { "epoch": 0.4162170656330208, "grad_norm": 3.512646198272705, "learning_rate": 1.9910645202029272e-05, "loss": 0.4594, "step": 7946 }, { "epoch": 0.41925514640406475, "grad_norm": 3.1627018451690674, "learning_rate": 1.9905868166616234e-05, "loss": 0.5628, "step": 8004 }, { "epoch": 0.4222932271751087, "grad_norm": 1.1955090761184692, "learning_rate": 1.990096734428919e-05, "loss": 0.4587, "step": 8062 }, { "epoch": 0.42533130794615265, "grad_norm": 3.0422959327697754, "learning_rate": 1.989594279629039e-05, "loss": 0.5523, "step": 8120 }, { "epoch": 0.4283693887171966, "grad_norm": 3.0934972763061523, "learning_rate": 1.98907945854082e-05, "loss": 0.4855, "step": 8178 }, { "epoch": 0.43140746948824055, "grad_norm": 3.9694907665252686, "learning_rate": 1.9885522775976324e-05, "loss": 0.543, "step": 8236 }, { "epoch": 0.4344455502592845, "grad_norm": 4.713873386383057, "learning_rate": 1.9880127433872983e-05, "loss": 0.4901, "step": 8294 }, { "epoch": 0.43748363103032845, "grad_norm": 2.3840503692626953, "learning_rate": 1.987460862652011e-05, "loss": 0.4265, "step": 8352 }, { "epoch": 0.4405217118013724, "grad_norm": 4.123522758483887, "learning_rate": 1.9868966422882496e-05, "loss": 0.4237, "step": 8410 }, { "epoch": 0.44355979257241634, "grad_norm": 3.1017978191375732, "learning_rate": 1.986320089346693e-05, "loss": 0.4106, "step": 8468 }, { "epoch": 0.4465978733434603, "grad_norm": 2.8059699535369873, "learning_rate": 1.9857414684867994e-05, "loss": 0.4641, "step": 8526 }, { "epoch": 0.44963595411450424, "grad_norm": 4.327667236328125, "learning_rate": 1.985140484474396e-05, "loss": 0.4337, "step": 8584 }, { "epoch": 0.45267403488554814, "grad_norm": 0.9626501798629761, "learning_rate": 1.9845271898297104e-05, "loss": 0.4932, "step": 8642 }, { "epoch": 0.4557121156565921, "grad_norm": 1.3852657079696655, "learning_rate": 1.9839015922166693e-05, "loss": 0.4866, "step": 8700 }, { "epoch": 0.45875019642763604, "grad_norm": 3.2711095809936523, "learning_rate": 1.983263699452942e-05, "loss": 0.4825, "step": 8758 }, { "epoch": 0.46178827719868, "grad_norm": 4.84442138671875, "learning_rate": 1.9826135195098416e-05, "loss": 0.4559, "step": 8816 }, { "epoch": 0.46482635796972394, "grad_norm": 1.2177191972732544, "learning_rate": 1.9819510605122255e-05, "loss": 0.4795, "step": 8874 }, { "epoch": 0.4678644387407679, "grad_norm": 3.1849379539489746, "learning_rate": 1.981276330738395e-05, "loss": 0.474, "step": 8932 }, { "epoch": 0.47090251951181183, "grad_norm": 4.420878887176514, "learning_rate": 1.9805893386199892e-05, "loss": 0.4876, "step": 8990 }, { "epoch": 0.4739406002828558, "grad_norm": 2.714984893798828, "learning_rate": 1.9798900927418835e-05, "loss": 0.4491, "step": 9048 }, { "epoch": 0.47697868105389973, "grad_norm": 2.185593843460083, "learning_rate": 1.9791786018420792e-05, "loss": 0.4808, "step": 9106 }, { "epoch": 0.4800167618249437, "grad_norm": 3.3326094150543213, "learning_rate": 1.9784548748115946e-05, "loss": 0.4502, "step": 9164 }, { "epoch": 0.48305484259598763, "grad_norm": 3.4437661170959473, "learning_rate": 1.977718920694356e-05, "loss": 0.5049, "step": 9222 }, { "epoch": 0.4860929233670316, "grad_norm": 5.456835746765137, "learning_rate": 1.9769707486870825e-05, "loss": 0.4791, "step": 9280 }, { "epoch": 0.48913100413807553, "grad_norm": 5.109498023986816, "learning_rate": 1.9762103681391724e-05, "loss": 0.5396, "step": 9338 }, { "epoch": 0.4921690849091195, "grad_norm": 4.347654342651367, "learning_rate": 1.9754377885525854e-05, "loss": 0.4433, "step": 9396 }, { "epoch": 0.49520716568016343, "grad_norm": 3.837158203125, "learning_rate": 1.9746530195817243e-05, "loss": 0.4791, "step": 9454 }, { "epoch": 0.4982452464512074, "grad_norm": 3.8552966117858887, "learning_rate": 1.9738699146560578e-05, "loss": 0.4979, "step": 9512 }, { "epoch": 0.5012833272222513, "grad_norm": 1.76126229763031, "learning_rate": 1.973061006224811e-05, "loss": 0.4716, "step": 9570 }, { "epoch": 0.5043214079932953, "grad_norm": 5.198726654052734, "learning_rate": 1.9722399381103267e-05, "loss": 0.4801, "step": 9628 }, { "epoch": 0.5073594887643392, "grad_norm": 3.2313361167907715, "learning_rate": 1.9714067205729356e-05, "loss": 0.4592, "step": 9686 }, { "epoch": 0.5103975695353832, "grad_norm": 0.9486598968505859, "learning_rate": 1.9705613640247928e-05, "loss": 0.4399, "step": 9744 }, { "epoch": 0.5134356503064271, "grad_norm": 3.271669864654541, "learning_rate": 1.9697038790297442e-05, "loss": 0.4722, "step": 9802 }, { "epoch": 0.5164737310774711, "grad_norm": 5.1848039627075195, "learning_rate": 1.9688342763031993e-05, "loss": 0.4336, "step": 9860 }, { "epoch": 0.519511811848515, "grad_norm": 4.134024620056152, "learning_rate": 1.967952566711993e-05, "loss": 0.4534, "step": 9918 }, { "epoch": 0.522549892619559, "grad_norm": 3.0904159545898438, "learning_rate": 1.9670587612742515e-05, "loss": 0.4461, "step": 9976 }, { "epoch": 0.5255879733906029, "grad_norm": 3.3785481452941895, "learning_rate": 1.9661528711592553e-05, "loss": 0.4906, "step": 10034 }, { "epoch": 0.5286260541616469, "grad_norm": 4.755141258239746, "learning_rate": 1.9652349076872986e-05, "loss": 0.4519, "step": 10092 }, { "epoch": 0.5316641349326908, "grad_norm": 4.502477645874023, "learning_rate": 1.9643048823295482e-05, "loss": 0.5454, "step": 10150 }, { "epoch": 0.5347022157037348, "grad_norm": 2.3361642360687256, "learning_rate": 1.9633628067078997e-05, "loss": 0.5069, "step": 10208 }, { "epoch": 0.5377402964747787, "grad_norm": 3.6974456310272217, "learning_rate": 1.9624086925948333e-05, "loss": 0.4604, "step": 10266 }, { "epoch": 0.5407783772458227, "grad_norm": 3.7012462615966797, "learning_rate": 1.9614425519132654e-05, "loss": 0.5368, "step": 10324 }, { "epoch": 0.5438164580168666, "grad_norm": 0.9825100898742676, "learning_rate": 1.9604643967364013e-05, "loss": 0.4917, "step": 10382 }, { "epoch": 0.5468545387879106, "grad_norm": 2.8980348110198975, "learning_rate": 1.959474239287582e-05, "loss": 0.4571, "step": 10440 }, { "epoch": 0.5498926195589545, "grad_norm": 6.615330696105957, "learning_rate": 1.9584720919401342e-05, "loss": 0.4949, "step": 10498 }, { "epoch": 0.5529307003299985, "grad_norm": 4.613067626953125, "learning_rate": 1.9574579672172126e-05, "loss": 0.4072, "step": 10556 }, { "epoch": 0.5559687811010424, "grad_norm": 3.3984858989715576, "learning_rate": 1.9564318777916456e-05, "loss": 0.412, "step": 10614 }, { "epoch": 0.5590068618720864, "grad_norm": 5.624422550201416, "learning_rate": 1.9553938364857775e-05, "loss": 0.4781, "step": 10672 }, { "epoch": 0.5620449426431303, "grad_norm": 4.486995697021484, "learning_rate": 1.954343856271306e-05, "loss": 0.4426, "step": 10730 }, { "epoch": 0.5650830234141743, "grad_norm": 2.862964391708374, "learning_rate": 1.953281950269121e-05, "loss": 0.506, "step": 10788 }, { "epoch": 0.5681211041852182, "grad_norm": 0.556151807308197, "learning_rate": 1.9522267467101615e-05, "loss": 0.4095, "step": 10846 }, { "epoch": 0.5711591849562622, "grad_norm": 3.035536527633667, "learning_rate": 1.9511412341335318e-05, "loss": 0.517, "step": 10904 }, { "epoch": 0.574197265727306, "grad_norm": 3.8603086471557617, "learning_rate": 1.950043835790185e-05, "loss": 0.4344, "step": 10962 }, { "epoch": 0.57723534649835, "grad_norm": 4.4469499588012695, "learning_rate": 1.9489345653935635e-05, "loss": 0.4774, "step": 11020 }, { "epoch": 0.5802734272693939, "grad_norm": 3.4457666873931885, "learning_rate": 1.9478134368054676e-05, "loss": 0.4274, "step": 11078 }, { "epoch": 0.5833115080404379, "grad_norm": 3.056290864944458, "learning_rate": 1.9466804640358798e-05, "loss": 0.4432, "step": 11136 }, { "epoch": 0.5863495888114818, "grad_norm": 4.071867942810059, "learning_rate": 1.9455356612427928e-05, "loss": 0.4344, "step": 11194 }, { "epoch": 0.5893876695825258, "grad_norm": 2.0395846366882324, "learning_rate": 1.9443790427320303e-05, "loss": 0.4714, "step": 11252 }, { "epoch": 0.5924257503535697, "grad_norm": 4.563007354736328, "learning_rate": 1.9432106229570685e-05, "loss": 0.5157, "step": 11310 }, { "epoch": 0.5954638311246137, "grad_norm": 3.7986621856689453, "learning_rate": 1.9420304165188574e-05, "loss": 0.4977, "step": 11368 }, { "epoch": 0.5985019118956576, "grad_norm": 5.301217555999756, "learning_rate": 1.9408384381656358e-05, "loss": 0.4662, "step": 11426 }, { "epoch": 0.6015399926667016, "grad_norm": 2.3288731575012207, "learning_rate": 1.939634702792749e-05, "loss": 0.4493, "step": 11484 }, { "epoch": 0.6045780734377455, "grad_norm": 3.7128169536590576, "learning_rate": 1.9384192254424606e-05, "loss": 0.4865, "step": 11542 }, { "epoch": 0.6076161542087894, "grad_norm": 4.314477920532227, "learning_rate": 1.9371920213037665e-05, "loss": 0.4715, "step": 11600 }, { "epoch": 0.6106542349798334, "grad_norm": 2.6989047527313232, "learning_rate": 1.935953105712205e-05, "loss": 0.4345, "step": 11658 }, { "epoch": 0.6136923157508773, "grad_norm": 3.5463671684265137, "learning_rate": 1.9347024941496628e-05, "loss": 0.4611, "step": 11716 }, { "epoch": 0.6167303965219213, "grad_norm": 4.914857387542725, "learning_rate": 1.9334402022441848e-05, "loss": 0.4952, "step": 11774 }, { "epoch": 0.6197684772929652, "grad_norm": 1.5133031606674194, "learning_rate": 1.932188309270537e-05, "loss": 0.4401, "step": 11832 }, { "epoch": 0.6228065580640092, "grad_norm": 2.30916428565979, "learning_rate": 1.9309029048500578e-05, "loss": 0.4177, "step": 11890 }, { "epoch": 0.6258446388350531, "grad_norm": 2.825598955154419, "learning_rate": 1.929605867567532e-05, "loss": 0.4529, "step": 11948 }, { "epoch": 0.6288827196060971, "grad_norm": 5.285458087921143, "learning_rate": 1.9282972136311554e-05, "loss": 0.4806, "step": 12006 }, { "epoch": 0.631920800377141, "grad_norm": 2.597923755645752, "learning_rate": 1.9269769593942872e-05, "loss": 0.4566, "step": 12064 }, { "epoch": 0.634958881148185, "grad_norm": 2.780212640762329, "learning_rate": 1.9256451213552497e-05, "loss": 0.4725, "step": 12122 }, { "epoch": 0.6379969619192289, "grad_norm": 4.1638031005859375, "learning_rate": 1.9243017161571194e-05, "loss": 0.463, "step": 12180 }, { "epoch": 0.6410350426902729, "grad_norm": 4.174670219421387, "learning_rate": 1.9229467605875196e-05, "loss": 0.5236, "step": 12238 }, { "epoch": 0.6440731234613168, "grad_norm": 1.9128369092941284, "learning_rate": 1.9215802715784096e-05, "loss": 0.4621, "step": 12296 }, { "epoch": 0.6471112042323608, "grad_norm": 4.490901947021484, "learning_rate": 1.9202022662058773e-05, "loss": 0.4517, "step": 12354 }, { "epoch": 0.6501492850034047, "grad_norm": 4.426553726196289, "learning_rate": 1.9188127616899202e-05, "loss": 0.488, "step": 12412 }, { "epoch": 0.6531873657744487, "grad_norm": 3.694254159927368, "learning_rate": 1.917411775394233e-05, "loss": 0.4705, "step": 12470 }, { "epoch": 0.6562254465454926, "grad_norm": 2.3134400844573975, "learning_rate": 1.9159993248259916e-05, "loss": 0.4402, "step": 12528 }, { "epoch": 0.6592635273165366, "grad_norm": 2.868987798690796, "learning_rate": 1.9145754276356323e-05, "loss": 0.4085, "step": 12586 }, { "epoch": 0.6623016080875805, "grad_norm": 3.815828323364258, "learning_rate": 1.9131401016166326e-05, "loss": 0.5569, "step": 12644 }, { "epoch": 0.6653396888586245, "grad_norm": 2.865863800048828, "learning_rate": 1.911693364705287e-05, "loss": 0.4515, "step": 12702 }, { "epoch": 0.6683777696296684, "grad_norm": 3.7862603664398193, "learning_rate": 1.9102352349804865e-05, "loss": 0.4685, "step": 12760 }, { "epoch": 0.6714158504007124, "grad_norm": 2.3399360179901123, "learning_rate": 1.9087657306634884e-05, "loss": 0.5087, "step": 12818 }, { "epoch": 0.6744539311717563, "grad_norm": 3.208674430847168, "learning_rate": 1.9072848701176905e-05, "loss": 0.4322, "step": 12876 }, { "epoch": 0.6774920119428003, "grad_norm": 1.2508207559585571, "learning_rate": 1.9057926718484036e-05, "loss": 0.39, "step": 12934 }, { "epoch": 0.6805300927138442, "grad_norm": 3.029885768890381, "learning_rate": 1.9042891545026164e-05, "loss": 0.4881, "step": 12992 }, { "epoch": 0.6835681734848882, "grad_norm": 1.723024606704712, "learning_rate": 1.9028005500450692e-05, "loss": 0.4016, "step": 13050 }, { "epoch": 0.6866062542559321, "grad_norm": 3.0587244033813477, "learning_rate": 1.9012746453978195e-05, "loss": 0.451, "step": 13108 }, { "epoch": 0.6896443350269761, "grad_norm": 3.979196548461914, "learning_rate": 1.899737478132781e-05, "loss": 0.4584, "step": 13166 }, { "epoch": 0.69268241579802, "grad_norm": 3.2428181171417236, "learning_rate": 1.8981890674588902e-05, "loss": 0.4419, "step": 13224 }, { "epoch": 0.695720496569064, "grad_norm": 1.9672743082046509, "learning_rate": 1.8966294327255843e-05, "loss": 0.4463, "step": 13282 }, { "epoch": 0.6987585773401079, "grad_norm": 3.543287754058838, "learning_rate": 1.895058593422561e-05, "loss": 0.5232, "step": 13340 }, { "epoch": 0.7017966581111519, "grad_norm": 2.751725435256958, "learning_rate": 1.8934765691795337e-05, "loss": 0.4627, "step": 13398 }, { "epoch": 0.7048347388821958, "grad_norm": 3.9089314937591553, "learning_rate": 1.8918833797659854e-05, "loss": 0.4701, "step": 13456 }, { "epoch": 0.7078728196532398, "grad_norm": 2.623382806777954, "learning_rate": 1.890279045090924e-05, "loss": 0.4627, "step": 13514 }, { "epoch": 0.7109109004242837, "grad_norm": 3.44734263420105, "learning_rate": 1.8886635852026307e-05, "loss": 0.5063, "step": 13572 }, { "epoch": 0.7139489811953277, "grad_norm": 4.096603870391846, "learning_rate": 1.887037020288412e-05, "loss": 0.4205, "step": 13630 }, { "epoch": 0.7169870619663716, "grad_norm": 3.9694747924804688, "learning_rate": 1.8853993706743465e-05, "loss": 0.479, "step": 13688 }, { "epoch": 0.7200251427374156, "grad_norm": 2.2461936473846436, "learning_rate": 1.88375065682503e-05, "loss": 0.4222, "step": 13746 }, { "epoch": 0.7230632235084595, "grad_norm": 4.268979549407959, "learning_rate": 1.882090899343321e-05, "loss": 0.4013, "step": 13804 }, { "epoch": 0.7261013042795035, "grad_norm": 2.9464776515960693, "learning_rate": 1.8804201189700833e-05, "loss": 0.5184, "step": 13862 }, { "epoch": 0.7291393850505474, "grad_norm": 3.1404519081115723, "learning_rate": 1.8787383365839248e-05, "loss": 0.4451, "step": 13920 }, { "epoch": 0.7321774658215914, "grad_norm": 3.048670530319214, "learning_rate": 1.8770455732009393e-05, "loss": 0.457, "step": 13978 }, { "epoch": 0.7352155465926353, "grad_norm": 3.074151039123535, "learning_rate": 1.8753418499744426e-05, "loss": 0.4711, "step": 14036 }, { "epoch": 0.7382536273636792, "grad_norm": 4.1698150634765625, "learning_rate": 1.873627188194708e-05, "loss": 0.4281, "step": 14094 }, { "epoch": 0.7412917081347231, "grad_norm": 2.6520071029663086, "learning_rate": 1.8719016092887e-05, "loss": 0.497, "step": 14152 }, { "epoch": 0.7443297889057671, "grad_norm": 1.3818339109420776, "learning_rate": 1.870165134819808e-05, "loss": 0.4234, "step": 14210 }, { "epoch": 0.747367869676811, "grad_norm": 3.5460736751556396, "learning_rate": 1.868417786487575e-05, "loss": 0.4444, "step": 14268 }, { "epoch": 0.750405950447855, "grad_norm": 2.8102331161499023, "learning_rate": 1.8666595861274283e-05, "loss": 0.4159, "step": 14326 }, { "epoch": 0.7534440312188989, "grad_norm": 3.3770508766174316, "learning_rate": 1.8648905557104046e-05, "loss": 0.4357, "step": 14384 }, { "epoch": 0.7564821119899429, "grad_norm": 2.95613169670105, "learning_rate": 1.863110717342876e-05, "loss": 0.4627, "step": 14442 }, { "epoch": 0.7595201927609868, "grad_norm": 2.80786395072937, "learning_rate": 1.8613200932662764e-05, "loss": 0.4331, "step": 14500 }, { "epoch": 0.7625582735320308, "grad_norm": 2.9433181285858154, "learning_rate": 1.8595187058568197e-05, "loss": 0.5087, "step": 14558 }, { "epoch": 0.7655963543030747, "grad_norm": 2.6625008583068848, "learning_rate": 1.8577065776252218e-05, "loss": 0.5018, "step": 14616 }, { "epoch": 0.7686344350741187, "grad_norm": 3.8713533878326416, "learning_rate": 1.8558837312164198e-05, "loss": 0.4454, "step": 14674 }, { "epoch": 0.7716725158451626, "grad_norm": 3.236130475997925, "learning_rate": 1.8540501894092894e-05, "loss": 0.4463, "step": 14732 }, { "epoch": 0.7747105966162066, "grad_norm": 1.9471006393432617, "learning_rate": 1.8522059751163578e-05, "loss": 0.4615, "step": 14790 }, { "epoch": 0.7777486773872505, "grad_norm": 1.1234129667282104, "learning_rate": 1.85035111138352e-05, "loss": 0.3841, "step": 14848 }, { "epoch": 0.7807867581582945, "grad_norm": 3.155194044113159, "learning_rate": 1.8484856213897496e-05, "loss": 0.4932, "step": 14906 }, { "epoch": 0.7838248389293384, "grad_norm": 1.2532273530960083, "learning_rate": 1.8466095284468103e-05, "loss": 0.427, "step": 14964 }, { "epoch": 0.7868629197003824, "grad_norm": 3.315812349319458, "learning_rate": 1.8447228559989618e-05, "loss": 0.4406, "step": 15022 }, { "epoch": 0.7899010004714263, "grad_norm": 2.3999452590942383, "learning_rate": 1.842858427754608e-05, "loss": 0.4413, "step": 15080 }, { "epoch": 0.7929390812424703, "grad_norm": 2.896650791168213, "learning_rate": 1.8409508485466538e-05, "loss": 0.4068, "step": 15138 }, { "epoch": 0.7959771620135142, "grad_norm": 3.3152272701263428, "learning_rate": 1.8390327605464747e-05, "loss": 0.4708, "step": 15196 }, { "epoch": 0.7990152427845582, "grad_norm": 2.573716163635254, "learning_rate": 1.8371041877231145e-05, "loss": 0.4506, "step": 15254 }, { "epoch": 0.8020533235556021, "grad_norm": 1.0098395347595215, "learning_rate": 1.8351651541766398e-05, "loss": 0.4614, "step": 15312 }, { "epoch": 0.805091404326646, "grad_norm": 2.7257494926452637, "learning_rate": 1.8332156841378376e-05, "loss": 0.481, "step": 15370 }, { "epoch": 0.80812948509769, "grad_norm": 3.291948080062866, "learning_rate": 1.8312558019679113e-05, "loss": 0.4872, "step": 15428 }, { "epoch": 0.811167565868734, "grad_norm": 0.6372181177139282, "learning_rate": 1.82928553215818e-05, "loss": 0.4664, "step": 15486 }, { "epoch": 0.8142056466397779, "grad_norm": 2.14487361907959, "learning_rate": 1.8273048993297682e-05, "loss": 0.4443, "step": 15544 }, { "epoch": 0.8172437274108219, "grad_norm": 1.6703099012374878, "learning_rate": 1.8253139282333005e-05, "loss": 0.4683, "step": 15602 }, { "epoch": 0.8202818081818658, "grad_norm": 3.7610647678375244, "learning_rate": 1.8233126437485925e-05, "loss": 0.4299, "step": 15660 }, { "epoch": 0.8233198889529098, "grad_norm": 3.429608106613159, "learning_rate": 1.821301070884338e-05, "loss": 0.3976, "step": 15718 }, { "epoch": 0.8263579697239537, "grad_norm": 3.0198211669921875, "learning_rate": 1.819279234777799e-05, "loss": 0.407, "step": 15776 }, { "epoch": 0.8293960504949976, "grad_norm": 3.1796703338623047, "learning_rate": 1.817247160694489e-05, "loss": 0.4235, "step": 15834 }, { "epoch": 0.8324341312660416, "grad_norm": 2.235328197479248, "learning_rate": 1.81520487402786e-05, "loss": 0.403, "step": 15892 }, { "epoch": 0.8354722120370855, "grad_norm": 3.7409324645996094, "learning_rate": 1.8131524002989816e-05, "loss": 0.4325, "step": 15950 }, { "epoch": 0.8385102928081295, "grad_norm": 2.7824206352233887, "learning_rate": 1.811089765156227e-05, "loss": 0.432, "step": 16008 }, { "epoch": 0.8415483735791734, "grad_norm": 1.6856441497802734, "learning_rate": 1.8090528175270648e-05, "loss": 0.4156, "step": 16066 }, { "epoch": 0.8445864543502174, "grad_norm": 2.9528396129608154, "learning_rate": 1.8069701110949214e-05, "loss": 0.4486, "step": 16124 }, { "epoch": 0.8476245351212613, "grad_norm": 1.5881803035736084, "learning_rate": 1.8048773205047752e-05, "loss": 0.4133, "step": 16182 }, { "epoch": 0.8506626158923053, "grad_norm": 5.50068473815918, "learning_rate": 1.8027744719088103e-05, "loss": 0.4553, "step": 16240 }, { "epoch": 0.8537006966633492, "grad_norm": 3.2943098545074463, "learning_rate": 1.800661591584899e-05, "loss": 0.4428, "step": 16298 }, { "epoch": 0.8567387774343932, "grad_norm": 1.3706376552581787, "learning_rate": 1.798538705936273e-05, "loss": 0.4779, "step": 16356 }, { "epoch": 0.8597768582054371, "grad_norm": 2.18271541595459, "learning_rate": 1.796405841491194e-05, "loss": 0.4687, "step": 16414 }, { "epoch": 0.8628149389764811, "grad_norm": 2.5106441974639893, "learning_rate": 1.794263024902622e-05, "loss": 0.4016, "step": 16472 }, { "epoch": 0.865853019747525, "grad_norm": 2.757732629776001, "learning_rate": 1.7921102829478832e-05, "loss": 0.4948, "step": 16530 }, { "epoch": 0.868891100518569, "grad_norm": 0.37621229887008667, "learning_rate": 1.7899476425283318e-05, "loss": 0.4304, "step": 16588 }, { "epoch": 0.8719291812896129, "grad_norm": 4.135168552398682, "learning_rate": 1.787775130669019e-05, "loss": 0.4195, "step": 16646 }, { "epoch": 0.8749672620606569, "grad_norm": 2.2052392959594727, "learning_rate": 1.7855927745183504e-05, "loss": 0.4449, "step": 16704 }, { "epoch": 0.8780053428317008, "grad_norm": 2.8733346462249756, "learning_rate": 1.7834006013477513e-05, "loss": 0.5016, "step": 16762 }, { "epoch": 0.8810434236027448, "grad_norm": 1.8927271366119385, "learning_rate": 1.7811986385513226e-05, "loss": 0.3793, "step": 16820 }, { "epoch": 0.8840815043737887, "grad_norm": 3.7612531185150146, "learning_rate": 1.7789869136454988e-05, "loss": 0.3601, "step": 16878 }, { "epoch": 0.8871195851448327, "grad_norm": 1.6613848209381104, "learning_rate": 1.7767654542687057e-05, "loss": 0.4772, "step": 16936 }, { "epoch": 0.8901576659158766, "grad_norm": 2.5755159854888916, "learning_rate": 1.7745342881810144e-05, "loss": 0.4475, "step": 16994 }, { "epoch": 0.8931957466869206, "grad_norm": 2.7520928382873535, "learning_rate": 1.7722934432637937e-05, "loss": 0.3942, "step": 17052 }, { "epoch": 0.8962338274579645, "grad_norm": 4.439705848693848, "learning_rate": 1.770042947519362e-05, "loss": 0.4361, "step": 17110 }, { "epoch": 0.8992719082290085, "grad_norm": 2.091926097869873, "learning_rate": 1.7677828290706382e-05, "loss": 0.42, "step": 17168 }, { "epoch": 0.9000052380702949, "eval_accuracy": 0.8844256401062012, "eval_loss": 0.44025254249572754, "eval_runtime": 5730.5358, "eval_samples_per_second": 0.835, "eval_steps_per_second": 0.835, "step": 17182 }, { "epoch": 0.9023099890000523, "grad_norm": 0.736885666847229, "learning_rate": 1.7655131161607887e-05, "loss": 0.4806, "step": 17226 }, { "epoch": 0.9053480697710963, "grad_norm": 2.1999900341033936, "learning_rate": 1.7632732159520203e-05, "loss": 0.4541, "step": 17284 }, { "epoch": 0.9083861505421402, "grad_norm": 2.4895384311676025, "learning_rate": 1.76098456352832e-05, "loss": 0.4643, "step": 17342 }, { "epoch": 0.9114242313131842, "grad_norm": 2.773494005203247, "learning_rate": 1.7586864015968063e-05, "loss": 0.4031, "step": 17400 }, { "epoch": 0.9144623120842281, "grad_norm": 3.805938720703125, "learning_rate": 1.7563787588760503e-05, "loss": 0.4756, "step": 17458 }, { "epoch": 0.9175003928552721, "grad_norm": 2.412860631942749, "learning_rate": 1.7540616642030974e-05, "loss": 0.4453, "step": 17516 }, { "epoch": 0.920538473626316, "grad_norm": 3.0736114978790283, "learning_rate": 1.751735146533107e-05, "loss": 0.4374, "step": 17574 }, { "epoch": 0.92357655439736, "grad_norm": 3.2423524856567383, "learning_rate": 1.7493992349389927e-05, "loss": 0.3971, "step": 17632 }, { "epoch": 0.9266146351684039, "grad_norm": 2.4448719024658203, "learning_rate": 1.7470539586110572e-05, "loss": 0.4407, "step": 17690 }, { "epoch": 0.9296527159394479, "grad_norm": 4.234783172607422, "learning_rate": 1.7446993468566268e-05, "loss": 0.4136, "step": 17748 }, { "epoch": 0.9326907967104918, "grad_norm": 2.7946712970733643, "learning_rate": 1.742335429099688e-05, "loss": 0.4021, "step": 17806 }, { "epoch": 0.9357288774815358, "grad_norm": 0.8968492746353149, "learning_rate": 1.7399622348805165e-05, "loss": 0.4591, "step": 17864 }, { "epoch": 0.9387669582525797, "grad_norm": 2.746527671813965, "learning_rate": 1.7375797938553108e-05, "loss": 0.3938, "step": 17922 }, { "epoch": 0.9418050390236237, "grad_norm": 1.0526750087738037, "learning_rate": 1.73518813579582e-05, "loss": 0.4577, "step": 17980 }, { "epoch": 0.9448431197946676, "grad_norm": 2.459176778793335, "learning_rate": 1.7327872905889727e-05, "loss": 0.395, "step": 18038 }, { "epoch": 0.9478812005657116, "grad_norm": 3.1182572841644287, "learning_rate": 1.7303772882365018e-05, "loss": 0.4536, "step": 18096 }, { "epoch": 0.9509192813367555, "grad_norm": 2.8542633056640625, "learning_rate": 1.7279581588545723e-05, "loss": 0.4448, "step": 18154 }, { "epoch": 0.9539573621077995, "grad_norm": 2.421351432800293, "learning_rate": 1.7255299326734026e-05, "loss": 0.4568, "step": 18212 }, { "epoch": 0.9569954428788434, "grad_norm": 1.9418818950653076, "learning_rate": 1.7230926400368878e-05, "loss": 0.4509, "step": 18270 }, { "epoch": 0.9600335236498874, "grad_norm": 2.297189950942993, "learning_rate": 1.720688565864609e-05, "loss": 0.4185, "step": 18328 }, { "epoch": 0.9630716044209313, "grad_norm": 3.155817747116089, "learning_rate": 1.7182333868082773e-05, "loss": 0.4724, "step": 18386 }, { "epoch": 0.9661096851919753, "grad_norm": 2.3270270824432373, "learning_rate": 1.715769232476584e-05, "loss": 0.4434, "step": 18444 }, { "epoch": 0.9691477659630192, "grad_norm": 2.4003474712371826, "learning_rate": 1.7132961336623944e-05, "loss": 0.4325, "step": 18502 }, { "epoch": 0.9721858467340632, "grad_norm": 4.449118614196777, "learning_rate": 1.710814121270346e-05, "loss": 0.4497, "step": 18560 }, { "epoch": 0.9752239275051071, "grad_norm": 4.001181125640869, "learning_rate": 1.7083232263164643e-05, "loss": 0.4133, "step": 18618 }, { "epoch": 0.9782620082761511, "grad_norm": 1.3908356428146362, "learning_rate": 1.7058234799277733e-05, "loss": 0.436, "step": 18676 }, { "epoch": 0.981300089047195, "grad_norm": 2.0721793174743652, "learning_rate": 1.703314913341908e-05, "loss": 0.327, "step": 18734 }, { "epoch": 0.984338169818239, "grad_norm": 1.8085274696350098, "learning_rate": 1.700797557906723e-05, "loss": 0.4782, "step": 18792 }, { "epoch": 0.9873762505892829, "grad_norm": 2.877991199493408, "learning_rate": 1.6982714450799006e-05, "loss": 0.4804, "step": 18850 }, { "epoch": 0.9904143313603269, "grad_norm": 2.9189906120300293, "learning_rate": 1.6957366064285604e-05, "loss": 0.4473, "step": 18908 }, { "epoch": 0.9934524121313708, "grad_norm": 2.6251885890960693, "learning_rate": 1.6931930736288605e-05, "loss": 0.4665, "step": 18966 }, { "epoch": 0.9964904929024148, "grad_norm": 2.457298517227173, "learning_rate": 1.6906408784656045e-05, "loss": 0.4931, "step": 19024 }, { "epoch": 0.9995285736734587, "grad_norm": 4.4407057762146, "learning_rate": 1.6880800528318443e-05, "loss": 0.4835, "step": 19082 }, { "epoch": 1.0025666544445027, "grad_norm": 0.4853041172027588, "learning_rate": 1.68551062872848e-05, "loss": 0.4143, "step": 19140 }, { "epoch": 1.0056047352155466, "grad_norm": 3.372084379196167, "learning_rate": 1.682932638263862e-05, "loss": 0.4104, "step": 19198 }, { "epoch": 1.0086428159865906, "grad_norm": 3.5660102367401123, "learning_rate": 1.6803461136533877e-05, "loss": 0.3998, "step": 19256 }, { "epoch": 1.0116808967576345, "grad_norm": 3.154710531234741, "learning_rate": 1.6777510872191012e-05, "loss": 0.4479, "step": 19314 }, { "epoch": 1.0147189775286785, "grad_norm": 2.6014859676361084, "learning_rate": 1.675147591389286e-05, "loss": 0.4407, "step": 19372 }, { "epoch": 1.0177570582997224, "grad_norm": 2.906419515609741, "learning_rate": 1.672535658698064e-05, "loss": 0.3687, "step": 19430 }, { "epoch": 1.0207951390707664, "grad_norm": 4.82797908782959, "learning_rate": 1.669915321784986e-05, "loss": 0.4467, "step": 19488 }, { "epoch": 1.0238332198418103, "grad_norm": 2.7285468578338623, "learning_rate": 1.6673320066929267e-05, "loss": 0.4124, "step": 19546 }, { "epoch": 1.0268713006128543, "grad_norm": 3.0213897228240967, "learning_rate": 1.664695103165033e-05, "loss": 0.3955, "step": 19604 }, { "epoch": 1.0299093813838982, "grad_norm": 1.75594961643219, "learning_rate": 1.662049893393386e-05, "loss": 0.3951, "step": 19662 }, { "epoch": 1.0329474621549422, "grad_norm": 4.119887828826904, "learning_rate": 1.659396410433378e-05, "loss": 0.405, "step": 19720 }, { "epoch": 1.035985542925986, "grad_norm": 2.607999801635742, "learning_rate": 1.6567346874437857e-05, "loss": 0.4512, "step": 19778 }, { "epoch": 1.03902362369703, "grad_norm": 2.761012077331543, "learning_rate": 1.6540647576863546e-05, "loss": 0.3692, "step": 19836 }, { "epoch": 1.042061704468074, "grad_norm": 1.8808788061141968, "learning_rate": 1.6513866545253866e-05, "loss": 0.4663, "step": 19894 }, { "epoch": 1.045099785239118, "grad_norm": 3.0192790031433105, "learning_rate": 1.648700411427319e-05, "loss": 0.4485, "step": 19952 }, { "epoch": 1.048137866010162, "grad_norm": 3.0104329586029053, "learning_rate": 1.64600606196031e-05, "loss": 0.4003, "step": 20010 }, { "epoch": 1.0511759467812058, "grad_norm": 2.610039472579956, "learning_rate": 1.6433036397938168e-05, "loss": 0.3967, "step": 20068 }, { "epoch": 1.0542140275522498, "grad_norm": 2.601706027984619, "learning_rate": 1.6405931786981753e-05, "loss": 0.3656, "step": 20126 }, { "epoch": 1.0572521083232937, "grad_norm": 1.4956645965576172, "learning_rate": 1.63787471254418e-05, "loss": 0.3829, "step": 20184 }, { "epoch": 1.0602901890943377, "grad_norm": 2.5245773792266846, "learning_rate": 1.635148275302657e-05, "loss": 0.4309, "step": 20242 }, { "epoch": 1.0633282698653816, "grad_norm": 2.844923734664917, "learning_rate": 1.6324139010440435e-05, "loss": 0.4478, "step": 20300 }, { "epoch": 1.0663663506364256, "grad_norm": 2.980348587036133, "learning_rate": 1.629671623937959e-05, "loss": 0.3524, "step": 20358 }, { "epoch": 1.0694044314074695, "grad_norm": 2.8273379802703857, "learning_rate": 1.626921478252781e-05, "loss": 0.4499, "step": 20416 }, { "epoch": 1.0724425121785135, "grad_norm": 3.1932373046875, "learning_rate": 1.624163498355213e-05, "loss": 0.3989, "step": 20474 }, { "epoch": 1.0754805929495574, "grad_norm": 3.9423575401306152, "learning_rate": 1.62139771870986e-05, "loss": 0.4338, "step": 20532 }, { "epoch": 1.0785186737206014, "grad_norm": 3.1419034004211426, "learning_rate": 1.618624173878793e-05, "loss": 0.4207, "step": 20590 }, { "epoch": 1.0815567544916453, "grad_norm": 0.7265225052833557, "learning_rate": 1.61584289852112e-05, "loss": 0.3271, "step": 20648 }, { "epoch": 1.0845948352626893, "grad_norm": 1.7413750886917114, "learning_rate": 1.613053927392553e-05, "loss": 0.3501, "step": 20706 }, { "epoch": 1.0876329160337332, "grad_norm": 4.131454944610596, "learning_rate": 1.6102572953449715e-05, "loss": 0.5085, "step": 20764 }, { "epoch": 1.0906709968047772, "grad_norm": 3.0570194721221924, "learning_rate": 1.6074530373259887e-05, "loss": 0.4154, "step": 20822 }, { "epoch": 1.0937090775758211, "grad_norm": 4.058136463165283, "learning_rate": 1.6046897326469475e-05, "loss": 0.4007, "step": 20880 }, { "epoch": 1.096747158346865, "grad_norm": 2.809772491455078, "learning_rate": 1.601870457882787e-05, "loss": 0.4188, "step": 20938 }, { "epoch": 1.099785239117909, "grad_norm": 2.9016387462615967, "learning_rate": 1.5990436619518428e-05, "loss": 0.3567, "step": 20996 }, { "epoch": 1.102823319888953, "grad_norm": 3.3889174461364746, "learning_rate": 1.5962093801786668e-05, "loss": 0.3956, "step": 21054 }, { "epoch": 1.105861400659997, "grad_norm": 1.6921156644821167, "learning_rate": 1.5933676479813547e-05, "loss": 0.352, "step": 21112 }, { "epoch": 1.108899481431041, "grad_norm": 3.156926393508911, "learning_rate": 1.5905185008711063e-05, "loss": 0.4026, "step": 21170 }, { "epoch": 1.1119375622020848, "grad_norm": 1.8040690422058105, "learning_rate": 1.58766197445178e-05, "loss": 0.4248, "step": 21228 }, { "epoch": 1.1149756429731288, "grad_norm": 3.1559841632843018, "learning_rate": 1.5848475435042218e-05, "loss": 0.3691, "step": 21286 }, { "epoch": 1.1180137237441727, "grad_norm": 2.4738497734069824, "learning_rate": 1.5820260539561704e-05, "loss": 0.367, "step": 21344 }, { "epoch": 1.1210518045152167, "grad_norm": 3.917677402496338, "learning_rate": 1.579147854311163e-05, "loss": 0.426, "step": 21402 }, { "epoch": 1.1240898852862606, "grad_norm": 2.7536489963531494, "learning_rate": 1.576262417448334e-05, "loss": 0.4269, "step": 21460 }, { "epoch": 1.1271279660573046, "grad_norm": 3.678410768508911, "learning_rate": 1.5733697794250292e-05, "loss": 0.3977, "step": 21518 }, { "epoch": 1.1301660468283483, "grad_norm": 3.2154643535614014, "learning_rate": 1.5704699763885845e-05, "loss": 0.3647, "step": 21576 }, { "epoch": 1.1332041275993925, "grad_norm": 2.71803617477417, "learning_rate": 1.5675630445758707e-05, "loss": 0.3774, "step": 21634 }, { "epoch": 1.1362422083704362, "grad_norm": 2.9909651279449463, "learning_rate": 1.5646490203128424e-05, "loss": 0.3707, "step": 21692 }, { "epoch": 1.1392802891414804, "grad_norm": 3.9027791023254395, "learning_rate": 1.561727940014084e-05, "loss": 0.3925, "step": 21750 }, { "epoch": 1.1423183699125241, "grad_norm": 2.427164316177368, "learning_rate": 1.558799840182354e-05, "loss": 0.3666, "step": 21808 }, { "epoch": 1.1453564506835683, "grad_norm": 4.000829219818115, "learning_rate": 1.55586475740813e-05, "loss": 0.4233, "step": 21866 }, { "epoch": 1.148394531454612, "grad_norm": 2.324054718017578, "learning_rate": 1.5529227283691498e-05, "loss": 0.4315, "step": 21924 }, { "epoch": 1.1514326122256562, "grad_norm": 2.127596378326416, "learning_rate": 1.549973789829954e-05, "loss": 0.3987, "step": 21982 }, { "epoch": 1.1544706929967, "grad_norm": 2.658895254135132, "learning_rate": 1.5470179786414278e-05, "loss": 0.3766, "step": 22040 }, { "epoch": 1.157508773767744, "grad_norm": 3.48286509513855, "learning_rate": 1.5440553317403375e-05, "loss": 0.414, "step": 22098 }, { "epoch": 1.1605468545387878, "grad_norm": 2.305206298828125, "learning_rate": 1.5410858861488717e-05, "loss": 0.3961, "step": 22156 }, { "epoch": 1.163584935309832, "grad_norm": 2.7347195148468018, "learning_rate": 1.5381096789741777e-05, "loss": 0.3038, "step": 22214 }, { "epoch": 1.1666230160808757, "grad_norm": 3.5154871940612793, "learning_rate": 1.5351267474078967e-05, "loss": 0.3442, "step": 22272 }, { "epoch": 1.1696610968519197, "grad_norm": 3.8937666416168213, "learning_rate": 1.532137128725701e-05, "loss": 0.396, "step": 22330 }, { "epoch": 1.1726991776229636, "grad_norm": 3.284895896911621, "learning_rate": 1.529140860286828e-05, "loss": 0.3605, "step": 22388 }, { "epoch": 1.1757372583940076, "grad_norm": 2.9780056476593018, "learning_rate": 1.5261379795336102e-05, "loss": 0.3908, "step": 22446 }, { "epoch": 1.1787753391650515, "grad_norm": 3.4775991439819336, "learning_rate": 1.5231285239910119e-05, "loss": 0.4244, "step": 22504 }, { "epoch": 1.1818134199360955, "grad_norm": 3.3921220302581787, "learning_rate": 1.520112531266157e-05, "loss": 0.4234, "step": 22562 }, { "epoch": 1.1848515007071394, "grad_norm": 4.370563507080078, "learning_rate": 1.5170900390478605e-05, "loss": 0.3605, "step": 22620 }, { "epoch": 1.1878895814781834, "grad_norm": 2.411207675933838, "learning_rate": 1.5140610851061573e-05, "loss": 0.4109, "step": 22678 }, { "epoch": 1.1909276622492273, "grad_norm": 1.8209956884384155, "learning_rate": 1.5110257072918297e-05, "loss": 0.3973, "step": 22736 }, { "epoch": 1.1939657430202713, "grad_norm": 1.9300421476364136, "learning_rate": 1.5079839435359347e-05, "loss": 0.385, "step": 22794 }, { "epoch": 1.1970038237913152, "grad_norm": 2.628262519836426, "learning_rate": 1.504935831849331e-05, "loss": 0.3439, "step": 22852 }, { "epoch": 1.2000419045623592, "grad_norm": 1.3332035541534424, "learning_rate": 1.5018814103222013e-05, "loss": 0.3466, "step": 22910 }, { "epoch": 1.203079985333403, "grad_norm": 0.541492223739624, "learning_rate": 1.4988207171235807e-05, "loss": 0.4193, "step": 22968 }, { "epoch": 1.206118066104447, "grad_norm": 3.0223543643951416, "learning_rate": 1.4957537905008744e-05, "loss": 0.3866, "step": 23026 }, { "epoch": 1.209156146875491, "grad_norm": 2.3658430576324463, "learning_rate": 1.492680668779384e-05, "loss": 0.4248, "step": 23084 }, { "epoch": 1.212194227646535, "grad_norm": 2.964507579803467, "learning_rate": 1.4896013903618272e-05, "loss": 0.3904, "step": 23142 }, { "epoch": 1.215232308417579, "grad_norm": 2.325568675994873, "learning_rate": 1.4865159937278566e-05, "loss": 0.3901, "step": 23200 }, { "epoch": 1.2182703891886228, "grad_norm": 2.5834431648254395, "learning_rate": 1.4834245174335812e-05, "loss": 0.386, "step": 23258 }, { "epoch": 1.2213084699596668, "grad_norm": 3.9499247074127197, "learning_rate": 1.480327000111083e-05, "loss": 0.4012, "step": 23316 }, { "epoch": 1.2243465507307107, "grad_norm": 3.858048439025879, "learning_rate": 1.477223480467934e-05, "loss": 0.4277, "step": 23374 }, { "epoch": 1.2273846315017547, "grad_norm": 2.3730807304382324, "learning_rate": 1.4741139972867137e-05, "loss": 0.3963, "step": 23432 }, { "epoch": 1.2304227122727986, "grad_norm": 3.197197914123535, "learning_rate": 1.4709985894245246e-05, "loss": 0.4269, "step": 23490 }, { "epoch": 1.2334607930438426, "grad_norm": 1.0639739036560059, "learning_rate": 1.4678772958125043e-05, "loss": 0.486, "step": 23548 }, { "epoch": 1.2364988738148865, "grad_norm": 4.4826202392578125, "learning_rate": 1.4647501554553417e-05, "loss": 0.3983, "step": 23606 }, { "epoch": 1.2395369545859305, "grad_norm": 3.2974660396575928, "learning_rate": 1.4616172074307886e-05, "loss": 0.3893, "step": 23664 }, { "epoch": 1.2425750353569744, "grad_norm": 4.817580223083496, "learning_rate": 1.4584784908891705e-05, "loss": 0.4044, "step": 23722 }, { "epoch": 1.2456131161280184, "grad_norm": 4.333406925201416, "learning_rate": 1.455334045052899e-05, "loss": 0.3787, "step": 23780 }, { "epoch": 1.2486511968990623, "grad_norm": 4.401366233825684, "learning_rate": 1.4521839092159802e-05, "loss": 0.3653, "step": 23838 }, { "epoch": 1.2516892776701063, "grad_norm": 1.6555136442184448, "learning_rate": 1.4490281227435248e-05, "loss": 0.3371, "step": 23896 }, { "epoch": 1.2547273584411502, "grad_norm": 3.3965320587158203, "learning_rate": 1.4459212792449709e-05, "loss": 0.3923, "step": 23954 }, { "epoch": 1.2577654392121942, "grad_norm": 1.8068640232086182, "learning_rate": 1.4427544056073314e-05, "loss": 0.4196, "step": 24012 }, { "epoch": 1.2608035199832381, "grad_norm": 2.8050827980041504, "learning_rate": 1.4395819991682645e-05, "loss": 0.448, "step": 24070 }, { "epoch": 1.263841600754282, "grad_norm": 3.339679002761841, "learning_rate": 1.4364040995711812e-05, "loss": 0.4015, "step": 24128 }, { "epoch": 1.266879681525326, "grad_norm": 1.8822154998779297, "learning_rate": 1.4332207465281365e-05, "loss": 0.3494, "step": 24186 }, { "epoch": 1.26991776229637, "grad_norm": 2.0527522563934326, "learning_rate": 1.4300319798193339e-05, "loss": 0.4453, "step": 24244 }, { "epoch": 1.272955843067414, "grad_norm": 2.963618516921997, "learning_rate": 1.4268378392926277e-05, "loss": 0.451, "step": 24302 }, { "epoch": 1.2759939238384579, "grad_norm": 2.991400957107544, "learning_rate": 1.4236383648630245e-05, "loss": 0.3719, "step": 24360 }, { "epoch": 1.2790320046095018, "grad_norm": 1.891254186630249, "learning_rate": 1.4204335965121862e-05, "loss": 0.334, "step": 24418 }, { "epoch": 1.2820700853805458, "grad_norm": 3.0473315715789795, "learning_rate": 1.4172235742879283e-05, "loss": 0.4188, "step": 24476 }, { "epoch": 1.2851081661515897, "grad_norm": 1.8583441972732544, "learning_rate": 1.414008338303721e-05, "loss": 0.4133, "step": 24534 }, { "epoch": 1.2881462469226337, "grad_norm": 2.5623059272766113, "learning_rate": 1.4107879287381872e-05, "loss": 0.4317, "step": 24592 }, { "epoch": 1.2911843276936776, "grad_norm": 1.2033942937850952, "learning_rate": 1.4075623858346e-05, "loss": 0.3494, "step": 24650 }, { "epoch": 1.2942224084647216, "grad_norm": 1.8575024604797363, "learning_rate": 1.404331749900381e-05, "loss": 0.4533, "step": 24708 }, { "epoch": 1.2972604892357655, "grad_norm": 2.617656946182251, "learning_rate": 1.4010960613065956e-05, "loss": 0.2822, "step": 24766 }, { "epoch": 1.3002985700068095, "grad_norm": 1.466433048248291, "learning_rate": 1.397855360487449e-05, "loss": 0.3965, "step": 24824 }, { "epoch": 1.3033366507778534, "grad_norm": 2.6398956775665283, "learning_rate": 1.3946096879397808e-05, "loss": 0.4247, "step": 24882 }, { "epoch": 1.3063747315488974, "grad_norm": 3.450256109237671, "learning_rate": 1.3913590842225589e-05, "loss": 0.3854, "step": 24940 }, { "epoch": 1.3094128123199413, "grad_norm": 3.451451063156128, "learning_rate": 1.388103589956372e-05, "loss": 0.3852, "step": 24998 }, { "epoch": 1.3124508930909853, "grad_norm": 3.278472900390625, "learning_rate": 1.3848432458229241e-05, "loss": 0.3999, "step": 25056 }, { "epoch": 1.3154889738620292, "grad_norm": 0.6535636782646179, "learning_rate": 1.381578092564524e-05, "loss": 0.4151, "step": 25114 }, { "epoch": 1.3185270546330732, "grad_norm": 3.161670207977295, "learning_rate": 1.378308170983576e-05, "loss": 0.3413, "step": 25172 }, { "epoch": 1.3215651354041171, "grad_norm": 3.4061434268951416, "learning_rate": 1.375033521942072e-05, "loss": 0.3902, "step": 25230 }, { "epoch": 1.324603216175161, "grad_norm": 3.568188428878784, "learning_rate": 1.3717541863610799e-05, "loss": 0.3391, "step": 25288 }, { "epoch": 1.327641296946205, "grad_norm": 1.4888482093811035, "learning_rate": 1.368470205220231e-05, "loss": 0.4018, "step": 25346 }, { "epoch": 1.330679377717249, "grad_norm": 3.4848194122314453, "learning_rate": 1.36518161955721e-05, "loss": 0.4045, "step": 25404 }, { "epoch": 1.333717458488293, "grad_norm": 2.341862916946411, "learning_rate": 1.3618884704672413e-05, "loss": 0.3615, "step": 25462 }, { "epoch": 1.3367555392593369, "grad_norm": 4.151137351989746, "learning_rate": 1.3585907991025737e-05, "loss": 0.4122, "step": 25520 }, { "epoch": 1.3397936200303808, "grad_norm": 3.4976377487182617, "learning_rate": 1.3552886466719696e-05, "loss": 0.3674, "step": 25578 }, { "epoch": 1.3428317008014248, "grad_norm": 3.3959484100341797, "learning_rate": 1.3519820544401882e-05, "loss": 0.3742, "step": 25636 }, { "epoch": 1.3458697815724687, "grad_norm": 1.3133649826049805, "learning_rate": 1.3486710637274687e-05, "loss": 0.3388, "step": 25694 }, { "epoch": 1.3489078623435127, "grad_norm": 3.167113780975342, "learning_rate": 1.3453557159090159e-05, "loss": 0.3886, "step": 25752 }, { "epoch": 1.3519459431145566, "grad_norm": 2.9960269927978516, "learning_rate": 1.342036052414482e-05, "loss": 0.3896, "step": 25810 }, { "epoch": 1.3549840238856006, "grad_norm": 3.482513427734375, "learning_rate": 1.3387121147274498e-05, "loss": 0.4203, "step": 25868 }, { "epoch": 1.3580221046566445, "grad_norm": 2.4168648719787598, "learning_rate": 1.3353839443849134e-05, "loss": 0.343, "step": 25926 }, { "epoch": 1.3610601854276885, "grad_norm": 0.6455244421958923, "learning_rate": 1.33205158297676e-05, "loss": 0.3872, "step": 25984 }, { "epoch": 1.3640982661987324, "grad_norm": 4.288425445556641, "learning_rate": 1.3287150721452488e-05, "loss": 0.3655, "step": 26042 }, { "epoch": 1.3671363469697764, "grad_norm": 2.454954147338867, "learning_rate": 1.3254320850234712e-05, "loss": 0.4089, "step": 26100 }, { "epoch": 1.3701744277408203, "grad_norm": 3.7248048782348633, "learning_rate": 1.322087470228127e-05, "loss": 0.3784, "step": 26158 }, { "epoch": 1.3732125085118643, "grad_norm": 1.3180829286575317, "learning_rate": 1.3187388305241823e-05, "loss": 0.3933, "step": 26216 }, { "epoch": 1.3762505892829082, "grad_norm": 1.625256061553955, "learning_rate": 1.3153862077573157e-05, "loss": 0.4252, "step": 26274 }, { "epoch": 1.3792886700539522, "grad_norm": 2.6077065467834473, "learning_rate": 1.312029643822979e-05, "loss": 0.385, "step": 26332 }, { "epoch": 1.3823267508249961, "grad_norm": 3.2308919429779053, "learning_rate": 1.3086691806658749e-05, "loss": 0.4137, "step": 26390 }, { "epoch": 1.38536483159604, "grad_norm": 3.302467107772827, "learning_rate": 1.3053048602794315e-05, "loss": 0.3842, "step": 26448 }, { "epoch": 1.388402912367084, "grad_norm": 2.4501612186431885, "learning_rate": 1.3019367247052781e-05, "loss": 0.3889, "step": 26506 }, { "epoch": 1.391440993138128, "grad_norm": 3.628817319869995, "learning_rate": 1.29856481603272e-05, "loss": 0.3531, "step": 26564 }, { "epoch": 1.394479073909172, "grad_norm": 2.43445086479187, "learning_rate": 1.2951891763982125e-05, "loss": 0.402, "step": 26622 }, { "epoch": 1.3975171546802159, "grad_norm": 0.9383435845375061, "learning_rate": 1.2918098479848336e-05, "loss": 0.445, "step": 26680 }, { "epoch": 1.4005552354512598, "grad_norm": 3.4569339752197266, "learning_rate": 1.2884268730217577e-05, "loss": 0.3929, "step": 26738 }, { "epoch": 1.4035933162223038, "grad_norm": 1.1176038980484009, "learning_rate": 1.2850402937837283e-05, "loss": 0.4028, "step": 26796 }, { "epoch": 1.4066313969933477, "grad_norm": 3.2979259490966797, "learning_rate": 1.2816501525905282e-05, "loss": 0.4184, "step": 26854 }, { "epoch": 1.4096694777643917, "grad_norm": 3.3147165775299072, "learning_rate": 1.2782564918064522e-05, "loss": 0.4289, "step": 26912 }, { "epoch": 1.4127075585354356, "grad_norm": 3.0880119800567627, "learning_rate": 1.2748593538397764e-05, "loss": 0.4247, "step": 26970 }, { "epoch": 1.4157456393064796, "grad_norm": 2.7575788497924805, "learning_rate": 1.27145878114223e-05, "loss": 0.3868, "step": 27028 }, { "epoch": 1.4187837200775235, "grad_norm": 2.9284110069274902, "learning_rate": 1.2680548162084614e-05, "loss": 0.4337, "step": 27086 }, { "epoch": 1.4218218008485675, "grad_norm": 2.6792969703674316, "learning_rate": 1.2646475015755124e-05, "loss": 0.4215, "step": 27144 }, { "epoch": 1.4248598816196114, "grad_norm": 2.336975574493408, "learning_rate": 1.261236879822282e-05, "loss": 0.407, "step": 27202 }, { "epoch": 1.4278979623906554, "grad_norm": 2.287140369415283, "learning_rate": 1.2578818810950262e-05, "loss": 0.3581, "step": 27260 }, { "epoch": 1.4309360431616993, "grad_norm": 3.0004403591156006, "learning_rate": 1.2544648281900015e-05, "loss": 0.412, "step": 27318 }, { "epoch": 1.4339741239327433, "grad_norm": 0.6953569650650024, "learning_rate": 1.2510445954106563e-05, "loss": 0.4086, "step": 27376 }, { "epoch": 1.437012204703787, "grad_norm": 1.8512517213821411, "learning_rate": 1.2476212254973198e-05, "loss": 0.367, "step": 27434 }, { "epoch": 1.4400502854748312, "grad_norm": 3.226573944091797, "learning_rate": 1.2441947612295222e-05, "loss": 0.4576, "step": 27492 }, { "epoch": 1.4430883662458749, "grad_norm": 2.18180251121521, "learning_rate": 1.2407652454254632e-05, "loss": 0.406, "step": 27550 }, { "epoch": 1.446126447016919, "grad_norm": 2.2790727615356445, "learning_rate": 1.2373327209414759e-05, "loss": 0.3834, "step": 27608 }, { "epoch": 1.4491645277879628, "grad_norm": 3.7362709045410156, "learning_rate": 1.2338972306714889e-05, "loss": 0.3668, "step": 27666 }, { "epoch": 1.452202608559007, "grad_norm": 3.6803033351898193, "learning_rate": 1.2304588175464941e-05, "loss": 0.3552, "step": 27724 }, { "epoch": 1.4552406893300507, "grad_norm": 4.011587142944336, "learning_rate": 1.2270175245340074e-05, "loss": 0.3629, "step": 27782 }, { "epoch": 1.4582787701010949, "grad_norm": 2.53393292427063, "learning_rate": 1.223573394637533e-05, "loss": 0.4304, "step": 27840 }, { "epoch": 1.4613168508721386, "grad_norm": 1.7455615997314453, "learning_rate": 1.2201264708960252e-05, "loss": 0.3834, "step": 27898 }, { "epoch": 1.4643549316431828, "grad_norm": 3.8155839443206787, "learning_rate": 1.2166767963833519e-05, "loss": 0.3604, "step": 27956 }, { "epoch": 1.4673930124142265, "grad_norm": 2.2630105018615723, "learning_rate": 1.213224414207755e-05, "loss": 0.3833, "step": 28014 }, { "epoch": 1.4704310931852707, "grad_norm": 2.6760213375091553, "learning_rate": 1.209769367511312e-05, "loss": 0.3574, "step": 28072 }, { "epoch": 1.4734691739563144, "grad_norm": 3.230642318725586, "learning_rate": 1.206311699469398e-05, "loss": 0.4048, "step": 28130 }, { "epoch": 1.4765072547273586, "grad_norm": 4.475136756896973, "learning_rate": 1.2028514532901445e-05, "loss": 0.3787, "step": 28188 }, { "epoch": 1.4795453354984023, "grad_norm": 3.1134748458862305, "learning_rate": 1.1993886722139004e-05, "loss": 0.4555, "step": 28246 }, { "epoch": 1.4825834162694465, "grad_norm": 1.694558024406433, "learning_rate": 1.19598316646051e-05, "loss": 0.3722, "step": 28304 }, { "epoch": 1.4856214970404902, "grad_norm": 2.7863929271698, "learning_rate": 1.1925154872829044e-05, "loss": 0.3436, "step": 28362 }, { "epoch": 1.4886595778115344, "grad_norm": 1.5842992067337036, "learning_rate": 1.189045402369863e-05, "loss": 0.4055, "step": 28420 }, { "epoch": 1.491697658582578, "grad_norm": 3.305001974105835, "learning_rate": 1.185572955084683e-05, "loss": 0.4177, "step": 28478 }, { "epoch": 1.4947357393536223, "grad_norm": 1.0446484088897705, "learning_rate": 1.1820981888201819e-05, "loss": 0.3333, "step": 28536 }, { "epoch": 1.497773820124666, "grad_norm": 2.8945727348327637, "learning_rate": 1.178621146998157e-05, "loss": 0.4184, "step": 28594 }, { "epoch": 1.5008119008957101, "grad_norm": 2.099456548690796, "learning_rate": 1.1751418730688405e-05, "loss": 0.3384, "step": 28652 }, { "epoch": 1.5038499816667539, "grad_norm": 2.770231246948242, "learning_rate": 1.1716604105103582e-05, "loss": 0.4045, "step": 28710 }, { "epoch": 1.506888062437798, "grad_norm": 2.9615721702575684, "learning_rate": 1.1681768028281859e-05, "loss": 0.358, "step": 28768 }, { "epoch": 1.5099261432088418, "grad_norm": 2.6768476963043213, "learning_rate": 1.1646910935546055e-05, "loss": 0.3421, "step": 28826 }, { "epoch": 1.512964223979886, "grad_norm": 2.7448065280914307, "learning_rate": 1.1612033262481607e-05, "loss": 0.3749, "step": 28884 }, { "epoch": 1.5160023047509297, "grad_norm": 2.4983344078063965, "learning_rate": 1.1577135444931136e-05, "loss": 0.2883, "step": 28942 }, { "epoch": 1.5190403855219738, "grad_norm": 3.426910400390625, "learning_rate": 1.1542217918988993e-05, "loss": 0.4035, "step": 29000 }, { "epoch": 1.5220784662930176, "grad_norm": 2.1255111694335938, "learning_rate": 1.1507281120995808e-05, "loss": 0.3872, "step": 29058 }, { "epoch": 1.5251165470640617, "grad_norm": 2.398613691329956, "learning_rate": 1.147232548753304e-05, "loss": 0.3485, "step": 29116 }, { "epoch": 1.5281546278351055, "grad_norm": 1.043369174003601, "learning_rate": 1.1437351455417533e-05, "loss": 0.3921, "step": 29174 }, { "epoch": 1.5311927086061496, "grad_norm": 0.4801720976829529, "learning_rate": 1.1402359461696034e-05, "loss": 0.3838, "step": 29232 }, { "epoch": 1.5342307893771934, "grad_norm": 1.7236027717590332, "learning_rate": 1.1367349943639748e-05, "loss": 0.4114, "step": 29290 }, { "epoch": 1.5372688701482375, "grad_norm": 2.06709885597229, "learning_rate": 1.1332323338738873e-05, "loss": 0.3896, "step": 29348 }, { "epoch": 1.5403069509192813, "grad_norm": 2.651384115219116, "learning_rate": 1.1297280084697126e-05, "loss": 0.3873, "step": 29406 }, { "epoch": 1.5433450316903254, "grad_norm": 1.6569814682006836, "learning_rate": 1.1262825227855019e-05, "loss": 0.3591, "step": 29464 }, { "epoch": 1.5463831124613692, "grad_norm": 1.6780743598937988, "learning_rate": 1.1227750257706836e-05, "loss": 0.314, "step": 29522 }, { "epoch": 1.5494211932324133, "grad_norm": 2.9294652938842773, "learning_rate": 1.1192659945196629e-05, "loss": 0.3737, "step": 29580 }, { "epoch": 1.552459274003457, "grad_norm": 2.5051159858703613, "learning_rate": 1.115755472882423e-05, "loss": 0.3653, "step": 29638 }, { "epoch": 1.5554973547745012, "grad_norm": 0.6437486410140991, "learning_rate": 1.1122435047275705e-05, "loss": 0.3389, "step": 29696 }, { "epoch": 1.558535435545545, "grad_norm": 2.644324541091919, "learning_rate": 1.1087301339417893e-05, "loss": 0.3918, "step": 29754 }, { "epoch": 1.561573516316589, "grad_norm": 2.6001169681549072, "learning_rate": 1.1052154044292904e-05, "loss": 0.3319, "step": 29812 }, { "epoch": 1.5646115970876329, "grad_norm": 1.9335983991622925, "learning_rate": 1.101699360111264e-05, "loss": 0.4825, "step": 29870 }, { "epoch": 1.5676496778586768, "grad_norm": 2.186579704284668, "learning_rate": 1.0981820449253304e-05, "loss": 0.3636, "step": 29928 }, { "epoch": 1.5706877586297208, "grad_norm": 1.987005591392517, "learning_rate": 1.0946635028249916e-05, "loss": 0.4214, "step": 29986 }, { "epoch": 1.5737258394007647, "grad_norm": 2.18890380859375, "learning_rate": 1.0911437777790807e-05, "loss": 0.3795, "step": 30044 }, { "epoch": 1.5767639201718087, "grad_norm": 2.2321784496307373, "learning_rate": 1.0876229137712135e-05, "loss": 0.3954, "step": 30102 }, { "epoch": 1.5798020009428526, "grad_norm": 3.391721725463867, "learning_rate": 1.0841009547992398e-05, "loss": 0.4151, "step": 30160 }, { "epoch": 1.5828400817138966, "grad_norm": 4.004073619842529, "learning_rate": 1.0805779448746907e-05, "loss": 0.3318, "step": 30218 }, { "epoch": 1.5858781624849405, "grad_norm": 2.895787477493286, "learning_rate": 1.0770539280222312e-05, "loss": 0.3688, "step": 30276 }, { "epoch": 1.5889162432559845, "grad_norm": 2.4140095710754395, "learning_rate": 1.073528948279109e-05, "loss": 0.4059, "step": 30334 }, { "epoch": 1.5919543240270284, "grad_norm": 3.6965787410736084, "learning_rate": 1.070003049694605e-05, "loss": 0.359, "step": 30392 }, { "epoch": 1.5949924047980724, "grad_norm": 2.7172248363494873, "learning_rate": 1.0664762763294812e-05, "loss": 0.4282, "step": 30450 }, { "epoch": 1.5980304855691163, "grad_norm": 3.0504825115203857, "learning_rate": 1.0629486722554316e-05, "loss": 0.3838, "step": 30508 }, { "epoch": 1.6010685663401603, "grad_norm": 1.9200332164764404, "learning_rate": 1.0594202815545319e-05, "loss": 0.3814, "step": 30566 }, { "epoch": 1.6041066471112042, "grad_norm": 3.9770891666412354, "learning_rate": 1.0558911483186856e-05, "loss": 0.3785, "step": 30624 }, { "epoch": 1.6071447278822482, "grad_norm": 2.1418545246124268, "learning_rate": 1.0523613166490776e-05, "loss": 0.3282, "step": 30682 }, { "epoch": 1.610182808653292, "grad_norm": 2.4927940368652344, "learning_rate": 1.0488308306556192e-05, "loss": 0.3979, "step": 30740 }, { "epoch": 1.613220889424336, "grad_norm": 1.7132062911987305, "learning_rate": 1.0452997344563982e-05, "loss": 0.371, "step": 30798 }, { "epoch": 1.61625897019538, "grad_norm": 2.9751667976379395, "learning_rate": 1.0417680721771288e-05, "loss": 0.3782, "step": 30856 }, { "epoch": 1.619297050966424, "grad_norm": 3.013737678527832, "learning_rate": 1.0382358879505982e-05, "loss": 0.3899, "step": 30914 }, { "epoch": 1.622335131737468, "grad_norm": 3.8316636085510254, "learning_rate": 1.0347032259161162e-05, "loss": 0.3942, "step": 30972 }, { "epoch": 1.6253732125085119, "grad_norm": 2.597729444503784, "learning_rate": 1.031170130218964e-05, "loss": 0.3941, "step": 31030 }, { "epoch": 1.6284112932795558, "grad_norm": 3.033374547958374, "learning_rate": 1.0276975702213507e-05, "loss": 0.3367, "step": 31088 }, { "epoch": 1.6314493740505998, "grad_norm": 3.6567564010620117, "learning_rate": 1.0241637452361323e-05, "loss": 0.3943, "step": 31146 }, { "epoch": 1.6344874548216437, "grad_norm": 2.558410167694092, "learning_rate": 1.0206296182929831e-05, "loss": 0.4043, "step": 31204 }, { "epoch": 1.6375255355926877, "grad_norm": 1.9596946239471436, "learning_rate": 1.01709523355549e-05, "loss": 0.3589, "step": 31262 }, { "epoch": 1.6405636163637316, "grad_norm": 3.5439186096191406, "learning_rate": 1.013560635190461e-05, "loss": 0.3769, "step": 31320 }, { "epoch": 1.6436016971347756, "grad_norm": 2.5455784797668457, "learning_rate": 1.010025867367374e-05, "loss": 0.3099, "step": 31378 }, { "epoch": 1.6466397779058195, "grad_norm": 2.672985792160034, "learning_rate": 1.0064909742578242e-05, "loss": 0.3384, "step": 31436 }, { "epoch": 1.6496778586768635, "grad_norm": 3.4179306030273438, "learning_rate": 1.002956000034973e-05, "loss": 0.418, "step": 31494 }, { "epoch": 1.6527159394479074, "grad_norm": 3.8588337898254395, "learning_rate": 9.99420988872995e-06, "loss": 0.4403, "step": 31552 }, { "epoch": 1.6557540202189514, "grad_norm": 2.2216336727142334, "learning_rate": 9.958859849465258e-06, "loss": 0.3876, "step": 31610 }, { "epoch": 1.6587921009899953, "grad_norm": 5.450603485107422, "learning_rate": 9.923510324301119e-06, "loss": 0.3786, "step": 31668 }, { "epoch": 1.6618301817610392, "grad_norm": 3.075505256652832, "learning_rate": 9.888161754976566e-06, "loss": 0.3599, "step": 31726 }, { "epoch": 1.6648682625320832, "grad_norm": 0.3762458562850952, "learning_rate": 9.852814583218681e-06, "loss": 0.3758, "step": 31784 }, { "epoch": 1.6679063433031271, "grad_norm": 4.629846572875977, "learning_rate": 9.817469250737098e-06, "loss": 0.3707, "step": 31842 }, { "epoch": 1.670944424074171, "grad_norm": 2.84089732170105, "learning_rate": 9.782126199218453e-06, "loss": 0.385, "step": 31900 }, { "epoch": 1.673982504845215, "grad_norm": 3.581580400466919, "learning_rate": 9.74678587032088e-06, "loss": 0.346, "step": 31958 }, { "epoch": 1.677020585616259, "grad_norm": 2.798215866088867, "learning_rate": 9.711448705668503e-06, "loss": 0.4142, "step": 32016 }, { "epoch": 1.680058666387303, "grad_norm": 2.810606002807617, "learning_rate": 9.676115146845887e-06, "loss": 0.3711, "step": 32074 }, { "epoch": 1.683096747158347, "grad_norm": 0.6601367592811584, "learning_rate": 9.640785635392543e-06, "loss": 0.4049, "step": 32132 }, { "epoch": 1.6861348279293908, "grad_norm": 2.879519462585449, "learning_rate": 9.60546061279741e-06, "loss": 0.385, "step": 32190 }, { "epoch": 1.6891729087004348, "grad_norm": 4.493093013763428, "learning_rate": 9.570140520493336e-06, "loss": 0.385, "step": 32248 }, { "epoch": 1.6922109894714787, "grad_norm": 1.8380403518676758, "learning_rate": 9.53482579985154e-06, "loss": 0.3804, "step": 32306 }, { "epoch": 1.6952490702425227, "grad_norm": 0.3858829736709595, "learning_rate": 9.499516892176139e-06, "loss": 0.3626, "step": 32364 }, { "epoch": 1.6982871510135666, "grad_norm": 3.132283926010132, "learning_rate": 9.464214238698589e-06, "loss": 0.3205, "step": 32422 }, { "epoch": 1.7013252317846106, "grad_norm": 2.6827683448791504, "learning_rate": 9.428918280572203e-06, "loss": 0.3605, "step": 32480 }, { "epoch": 1.7043633125556545, "grad_norm": 3.2188720703125, "learning_rate": 9.394237823897566e-06, "loss": 0.4137, "step": 32538 }, { "epoch": 1.7074013933266985, "grad_norm": 2.3144068717956543, "learning_rate": 9.35895644521292e-06, "loss": 0.4068, "step": 32596 }, { "epoch": 1.7104394740977424, "grad_norm": 2.736628293991089, "learning_rate": 9.323683077214672e-06, "loss": 0.3743, "step": 32654 }, { "epoch": 1.7134775548687864, "grad_norm": 3.6730895042419434, "learning_rate": 9.28841816069017e-06, "loss": 0.3719, "step": 32712 }, { "epoch": 1.7165156356398303, "grad_norm": 4.749345779418945, "learning_rate": 9.253162136321158e-06, "loss": 0.3911, "step": 32770 }, { "epoch": 1.7195537164108743, "grad_norm": 1.4852901697158813, "learning_rate": 9.217915444678246e-06, "loss": 0.4266, "step": 32828 }, { "epoch": 1.7225917971819182, "grad_norm": 2.9961495399475098, "learning_rate": 9.182678526215428e-06, "loss": 0.3772, "step": 32886 }, { "epoch": 1.725629877952962, "grad_norm": 2.647796154022217, "learning_rate": 9.147451821264571e-06, "loss": 0.3617, "step": 32944 }, { "epoch": 1.7286679587240061, "grad_norm": 1.1617308855056763, "learning_rate": 9.112235770029908e-06, "loss": 0.2908, "step": 33002 }, { "epoch": 1.7317060394950499, "grad_norm": 2.2398462295532227, "learning_rate": 9.077030812582535e-06, "loss": 0.3906, "step": 33060 }, { "epoch": 1.734744120266094, "grad_norm": 3.6835782527923584, "learning_rate": 9.041837388854928e-06, "loss": 0.4513, "step": 33118 }, { "epoch": 1.7377822010371378, "grad_norm": 2.344299077987671, "learning_rate": 9.006655938635422e-06, "loss": 0.4084, "step": 33176 }, { "epoch": 1.740820281808182, "grad_norm": 3.1176178455352783, "learning_rate": 8.971486901562728e-06, "loss": 0.3877, "step": 33234 }, { "epoch": 1.7438583625792257, "grad_norm": 1.6773039102554321, "learning_rate": 8.936330717120455e-06, "loss": 0.3969, "step": 33292 }, { "epoch": 1.7468964433502698, "grad_norm": 3.7852532863616943, "learning_rate": 8.901187824631575e-06, "loss": 0.3343, "step": 33350 }, { "epoch": 1.7499345241213136, "grad_norm": 2.9691007137298584, "learning_rate": 8.866058663252984e-06, "loss": 0.3555, "step": 33408 }, { "epoch": 1.7529726048923577, "grad_norm": 2.2175984382629395, "learning_rate": 8.830943671969973e-06, "loss": 0.3689, "step": 33466 }, { "epoch": 1.7560106856634015, "grad_norm": 4.200769901275635, "learning_rate": 8.795843289590765e-06, "loss": 0.3527, "step": 33524 }, { "epoch": 1.7590487664344456, "grad_norm": 0.7551002502441406, "learning_rate": 8.760757954741032e-06, "loss": 0.3522, "step": 33582 }, { "epoch": 1.7620868472054894, "grad_norm": 2.0418272018432617, "learning_rate": 8.725688105858394e-06, "loss": 0.4124, "step": 33640 }, { "epoch": 1.7651249279765335, "grad_norm": 2.172964096069336, "learning_rate": 8.690634181186958e-06, "loss": 0.4303, "step": 33698 }, { "epoch": 1.7681630087475773, "grad_norm": 1.0999021530151367, "learning_rate": 8.655596618771844e-06, "loss": 0.3028, "step": 33756 }, { "epoch": 1.7712010895186214, "grad_norm": 5.170861721038818, "learning_rate": 8.620575856453699e-06, "loss": 0.3741, "step": 33814 }, { "epoch": 1.7742391702896652, "grad_norm": 2.6932246685028076, "learning_rate": 8.585572331863224e-06, "loss": 0.4294, "step": 33872 }, { "epoch": 1.7772772510607093, "grad_norm": 0.71751469373703, "learning_rate": 8.551189534523404e-06, "loss": 0.338, "step": 33930 }, { "epoch": 1.780315331831753, "grad_norm": 2.928600788116455, "learning_rate": 8.516221481428949e-06, "loss": 0.4693, "step": 33988 }, { "epoch": 1.7833534126027972, "grad_norm": 3.5148239135742188, "learning_rate": 8.481271970107997e-06, "loss": 0.4074, "step": 34046 }, { "epoch": 1.786391493373841, "grad_norm": 2.133580207824707, "learning_rate": 8.446341437300874e-06, "loss": 0.4129, "step": 34104 }, { "epoch": 1.7894295741448851, "grad_norm": 2.5686025619506836, "learning_rate": 8.411430319510761e-06, "loss": 0.3526, "step": 34162 }, { "epoch": 1.7924676549159289, "grad_norm": 4.3522562980651855, "learning_rate": 8.376539052998205e-06, "loss": 0.3587, "step": 34220 }, { "epoch": 1.795505735686973, "grad_norm": 3.373617649078369, "learning_rate": 8.34166807377569e-06, "loss": 0.4069, "step": 34278 }, { "epoch": 1.7985438164580168, "grad_norm": 2.4383530616760254, "learning_rate": 8.306817817602193e-06, "loss": 0.4002, "step": 34336 }, { "epoch": 1.80001047614059, "eval_accuracy": 0.8927125930786133, "eval_loss": 0.4064957797527313, "eval_runtime": 5473.958, "eval_samples_per_second": 0.874, "eval_steps_per_second": 0.874, "step": 34364 } ], "logging_steps": 58, "max_steps": 57273, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 17182, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 1.8389182403659694e+18, "train_batch_size": 1, "trial_name": null, "trial_params": null }