{ "best_metric": 0.8844256401062012, "best_model_checkpoint": "/workspace/previous_works/MedBLIP/output/MedBLIP-0005/checkpoint-17182", "epoch": 0.9000052380702949, "eval_steps": 17182, "global_step": 17182, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0030380807710439473, "grad_norm": 119.46666717529297, "learning_rate": 1.3268156424581008e-07, "loss": 5.0724, "step": 58 }, { "epoch": 0.006076161542087895, "grad_norm": 34.45277786254883, "learning_rate": 3.3519553072625703e-07, "loss": 3.6479, "step": 116 }, { "epoch": 0.009114242313131841, "grad_norm": 14.232598304748535, "learning_rate": 5.37709497206704e-07, "loss": 2.2356, "step": 174 }, { "epoch": 0.01215232308417579, "grad_norm": 10.709288597106934, "learning_rate": 7.402234636871509e-07, "loss": 1.9953, "step": 232 }, { "epoch": 0.015190403855219737, "grad_norm": 7.259119987487793, "learning_rate": 9.427374301675979e-07, "loss": 1.8188, "step": 290 }, { "epoch": 0.018228484626263683, "grad_norm": 6.452203273773193, "learning_rate": 1.1452513966480447e-06, "loss": 1.5639, "step": 348 }, { "epoch": 0.021266565397307632, "grad_norm": 9.458000183105469, "learning_rate": 1.3477653631284918e-06, "loss": 1.504, "step": 406 }, { "epoch": 0.02430464616835158, "grad_norm": 6.162489414215088, "learning_rate": 1.5502793296089386e-06, "loss": 1.2581, "step": 464 }, { "epoch": 0.027342726939395528, "grad_norm": 6.684648513793945, "learning_rate": 1.7527932960893857e-06, "loss": 1.1713, "step": 522 }, { "epoch": 0.030380807710439474, "grad_norm": 6.966240882873535, "learning_rate": 1.9553072625698325e-06, "loss": 1.0504, "step": 580 }, { "epoch": 0.03341888848148342, "grad_norm": 6.379108905792236, "learning_rate": 2.1578212290502796e-06, "loss": 0.968, "step": 638 }, { "epoch": 0.036456969252527366, "grad_norm": 5.3792619705200195, "learning_rate": 2.3603351955307262e-06, "loss": 0.9209, "step": 696 }, { "epoch": 0.039495050023571315, "grad_norm": 6.413719177246094, "learning_rate": 2.5628491620111733e-06, "loss": 0.8723, "step": 754 }, { "epoch": 0.042533130794615265, "grad_norm": 8.826435089111328, "learning_rate": 2.7653631284916204e-06, "loss": 0.8652, "step": 812 }, { "epoch": 0.045571211565659214, "grad_norm": 6.294381618499756, "learning_rate": 2.9678770949720674e-06, "loss": 0.8099, "step": 870 }, { "epoch": 0.04860929233670316, "grad_norm": 7.355430603027344, "learning_rate": 3.170391061452514e-06, "loss": 0.7396, "step": 928 }, { "epoch": 0.051647373107747106, "grad_norm": 10.120753288269043, "learning_rate": 3.372905027932961e-06, "loss": 0.7511, "step": 986 }, { "epoch": 0.054685453878791056, "grad_norm": 5.653336048126221, "learning_rate": 3.575418994413408e-06, "loss": 0.6688, "step": 1044 }, { "epoch": 0.057723534649835, "grad_norm": 5.749114990234375, "learning_rate": 3.7779329608938552e-06, "loss": 0.677, "step": 1102 }, { "epoch": 0.06076161542087895, "grad_norm": 7.65744686126709, "learning_rate": 3.980446927374302e-06, "loss": 0.7195, "step": 1160 }, { "epoch": 0.0637996961919229, "grad_norm": 9.296794891357422, "learning_rate": 4.1829608938547485e-06, "loss": 0.7598, "step": 1218 }, { "epoch": 0.06683777696296685, "grad_norm": 6.070080757141113, "learning_rate": 4.385474860335196e-06, "loss": 0.7015, "step": 1276 }, { "epoch": 0.0698758577340108, "grad_norm": 6.638489723205566, "learning_rate": 4.5879888268156435e-06, "loss": 0.6778, "step": 1334 }, { "epoch": 0.07291393850505473, "grad_norm": 9.076967239379883, "learning_rate": 4.790502793296089e-06, "loss": 0.5551, "step": 1392 }, { "epoch": 0.07595201927609868, "grad_norm": 6.929805278778076, "learning_rate": 4.993016759776537e-06, "loss": 0.6792, "step": 1450 }, { "epoch": 0.07899010004714263, "grad_norm": 8.316506385803223, "learning_rate": 5.195530726256983e-06, "loss": 0.6379, "step": 1508 }, { "epoch": 0.08202818081818658, "grad_norm": 9.471745491027832, "learning_rate": 5.398044692737431e-06, "loss": 0.6242, "step": 1566 }, { "epoch": 0.08506626158923053, "grad_norm": 6.022659778594971, "learning_rate": 5.6005586592178775e-06, "loss": 0.6368, "step": 1624 }, { "epoch": 0.08810434236027448, "grad_norm": 7.15187406539917, "learning_rate": 5.803072625698325e-06, "loss": 0.5381, "step": 1682 }, { "epoch": 0.09114242313131843, "grad_norm": 7.10537052154541, "learning_rate": 6.005586592178772e-06, "loss": 0.6885, "step": 1740 }, { "epoch": 0.09418050390236236, "grad_norm": 5.685272216796875, "learning_rate": 6.208100558659218e-06, "loss": 0.6066, "step": 1798 }, { "epoch": 0.09721858467340631, "grad_norm": 6.733754634857178, "learning_rate": 6.410614525139666e-06, "loss": 0.6124, "step": 1856 }, { "epoch": 0.10025666544445026, "grad_norm": 6.112730026245117, "learning_rate": 6.613128491620112e-06, "loss": 0.6224, "step": 1914 }, { "epoch": 0.10329474621549421, "grad_norm": 5.784328460693359, "learning_rate": 6.815642458100559e-06, "loss": 0.6289, "step": 1972 }, { "epoch": 0.10633282698653816, "grad_norm": 9.69115924835205, "learning_rate": 7.0181564245810065e-06, "loss": 0.5952, "step": 2030 }, { "epoch": 0.10937090775758211, "grad_norm": 5.509926795959473, "learning_rate": 7.220670391061453e-06, "loss": 0.6288, "step": 2088 }, { "epoch": 0.11240898852862606, "grad_norm": 6.545931339263916, "learning_rate": 7.423184357541901e-06, "loss": 0.5243, "step": 2146 }, { "epoch": 0.11544706929967, "grad_norm": 7.0921173095703125, "learning_rate": 7.625698324022347e-06, "loss": 0.5527, "step": 2204 }, { "epoch": 0.11848515007071395, "grad_norm": 5.278844833374023, "learning_rate": 7.828212290502794e-06, "loss": 0.6739, "step": 2262 }, { "epoch": 0.1215232308417579, "grad_norm": 5.799619197845459, "learning_rate": 8.03072625698324e-06, "loss": 0.5913, "step": 2320 }, { "epoch": 0.12456131161280184, "grad_norm": 7.148493766784668, "learning_rate": 8.233240223463687e-06, "loss": 0.6117, "step": 2378 }, { "epoch": 0.1275993923838458, "grad_norm": 5.538400650024414, "learning_rate": 8.435754189944135e-06, "loss": 0.5512, "step": 2436 }, { "epoch": 0.13063747315488974, "grad_norm": 5.364485740661621, "learning_rate": 8.638268156424582e-06, "loss": 0.5458, "step": 2494 }, { "epoch": 0.1336755539259337, "grad_norm": 7.3765549659729, "learning_rate": 8.840782122905029e-06, "loss": 0.5847, "step": 2552 }, { "epoch": 0.13671363469697764, "grad_norm": 5.364510536193848, "learning_rate": 9.043296089385475e-06, "loss": 0.542, "step": 2610 }, { "epoch": 0.1397517154680216, "grad_norm": 3.656923770904541, "learning_rate": 9.245810055865922e-06, "loss": 0.5528, "step": 2668 }, { "epoch": 0.14278979623906554, "grad_norm": 3.117631435394287, "learning_rate": 9.448324022346369e-06, "loss": 0.5793, "step": 2726 }, { "epoch": 0.14582787701010946, "grad_norm": 6.822358131408691, "learning_rate": 9.650837988826817e-06, "loss": 0.5967, "step": 2784 }, { "epoch": 0.1488659577811534, "grad_norm": 4.629315376281738, "learning_rate": 9.853351955307264e-06, "loss": 0.5945, "step": 2842 }, { "epoch": 0.15190403855219736, "grad_norm": 3.6398866176605225, "learning_rate": 1.005586592178771e-05, "loss": 0.6005, "step": 2900 }, { "epoch": 0.1549421193232413, "grad_norm": 5.811204433441162, "learning_rate": 1.0258379888268157e-05, "loss": 0.5065, "step": 2958 }, { "epoch": 0.15798020009428526, "grad_norm": 3.8507301807403564, "learning_rate": 1.0460893854748604e-05, "loss": 0.5802, "step": 3016 }, { "epoch": 0.1610182808653292, "grad_norm": 5.666468143463135, "learning_rate": 1.066340782122905e-05, "loss": 0.5342, "step": 3074 }, { "epoch": 0.16405636163637316, "grad_norm": 2.025376558303833, "learning_rate": 1.0865921787709498e-05, "loss": 0.5271, "step": 3132 }, { "epoch": 0.1670944424074171, "grad_norm": 5.698912143707275, "learning_rate": 1.1068435754189945e-05, "loss": 0.538, "step": 3190 }, { "epoch": 0.17013252317846106, "grad_norm": 4.067931652069092, "learning_rate": 1.1270949720670392e-05, "loss": 0.5601, "step": 3248 }, { "epoch": 0.173170603949505, "grad_norm": 5.068817138671875, "learning_rate": 1.1473463687150838e-05, "loss": 0.5528, "step": 3306 }, { "epoch": 0.17620868472054896, "grad_norm": 7.116920471191406, "learning_rate": 1.1675977653631285e-05, "loss": 0.5418, "step": 3364 }, { "epoch": 0.1792467654915929, "grad_norm": 8.113608360290527, "learning_rate": 1.1878491620111732e-05, "loss": 0.5196, "step": 3422 }, { "epoch": 0.18228484626263686, "grad_norm": 4.820245265960693, "learning_rate": 1.208100558659218e-05, "loss": 0.6029, "step": 3480 }, { "epoch": 0.18532292703368078, "grad_norm": 1.1077276468276978, "learning_rate": 1.2283519553072627e-05, "loss": 0.5779, "step": 3538 }, { "epoch": 0.18836100780472473, "grad_norm": 3.024624824523926, "learning_rate": 1.2486033519553073e-05, "loss": 0.5091, "step": 3596 }, { "epoch": 0.19139908857576868, "grad_norm": 8.059369087219238, "learning_rate": 1.268854748603352e-05, "loss": 0.5012, "step": 3654 }, { "epoch": 0.19443716934681263, "grad_norm": 3.9895098209381104, "learning_rate": 1.2891061452513967e-05, "loss": 0.578, "step": 3712 }, { "epoch": 0.19747525011785658, "grad_norm": 7.111061096191406, "learning_rate": 1.3093575418994415e-05, "loss": 0.4958, "step": 3770 }, { "epoch": 0.20051333088890053, "grad_norm": 5.541796684265137, "learning_rate": 1.3296089385474861e-05, "loss": 0.589, "step": 3828 }, { "epoch": 0.20355141165994448, "grad_norm": 4.365527629852295, "learning_rate": 1.3498603351955308e-05, "loss": 0.6003, "step": 3886 }, { "epoch": 0.20658949243098843, "grad_norm": 4.486824035644531, "learning_rate": 1.3701117318435755e-05, "loss": 0.6135, "step": 3944 }, { "epoch": 0.20962757320203237, "grad_norm": 5.487951278686523, "learning_rate": 1.3903631284916201e-05, "loss": 0.6234, "step": 4002 }, { "epoch": 0.21266565397307632, "grad_norm": 2.3850884437561035, "learning_rate": 1.410614525139665e-05, "loss": 0.5536, "step": 4060 }, { "epoch": 0.21570373474412027, "grad_norm": 3.7957749366760254, "learning_rate": 1.4305167597765364e-05, "loss": 0.5211, "step": 4118 }, { "epoch": 0.21874181551516422, "grad_norm": 5.272162437438965, "learning_rate": 1.450768156424581e-05, "loss": 0.4992, "step": 4176 }, { "epoch": 0.22177989628620817, "grad_norm": 4.292142868041992, "learning_rate": 1.4710195530726259e-05, "loss": 0.5124, "step": 4234 }, { "epoch": 0.22481797705725212, "grad_norm": 4.948460102081299, "learning_rate": 1.4912709497206705e-05, "loss": 0.5575, "step": 4292 }, { "epoch": 0.22785605782829604, "grad_norm": 3.456590414047241, "learning_rate": 1.5115223463687152e-05, "loss": 0.5672, "step": 4350 }, { "epoch": 0.23089413859934, "grad_norm": 5.341044902801514, "learning_rate": 1.53177374301676e-05, "loss": 0.5106, "step": 4408 }, { "epoch": 0.23393221937038394, "grad_norm": 5.5106987953186035, "learning_rate": 1.5520251396648043e-05, "loss": 0.6037, "step": 4466 }, { "epoch": 0.2369703001414279, "grad_norm": 4.941389560699463, "learning_rate": 1.5722765363128495e-05, "loss": 0.5995, "step": 4524 }, { "epoch": 0.24000838091247184, "grad_norm": 3.719957113265991, "learning_rate": 1.592527932960894e-05, "loss": 0.5531, "step": 4582 }, { "epoch": 0.2430464616835158, "grad_norm": 4.435623645782471, "learning_rate": 1.612779329608939e-05, "loss": 0.5502, "step": 4640 }, { "epoch": 0.24608454245455974, "grad_norm": 4.688556671142578, "learning_rate": 1.6330307262569833e-05, "loss": 0.5056, "step": 4698 }, { "epoch": 0.2491226232256037, "grad_norm": 5.511931896209717, "learning_rate": 1.653282122905028e-05, "loss": 0.5242, "step": 4756 }, { "epoch": 0.25216070399664764, "grad_norm": 4.933206558227539, "learning_rate": 1.673533519553073e-05, "loss": 0.5115, "step": 4814 }, { "epoch": 0.2551987847676916, "grad_norm": 2.942838191986084, "learning_rate": 1.6937849162011175e-05, "loss": 0.5941, "step": 4872 }, { "epoch": 0.25823686553873554, "grad_norm": 4.0710625648498535, "learning_rate": 1.7140363128491623e-05, "loss": 0.4887, "step": 4930 }, { "epoch": 0.2612749463097795, "grad_norm": 3.212920665740967, "learning_rate": 1.7342877094972068e-05, "loss": 0.5051, "step": 4988 }, { "epoch": 0.26431302708082344, "grad_norm": 4.390661716461182, "learning_rate": 1.7545391061452513e-05, "loss": 0.5735, "step": 5046 }, { "epoch": 0.2673511078518674, "grad_norm": 3.784395217895508, "learning_rate": 1.7747905027932965e-05, "loss": 0.4847, "step": 5104 }, { "epoch": 0.27038918862291134, "grad_norm": 4.238777160644531, "learning_rate": 1.795041899441341e-05, "loss": 0.4303, "step": 5162 }, { "epoch": 0.2734272693939553, "grad_norm": 4.616554260253906, "learning_rate": 1.8152932960893855e-05, "loss": 0.53, "step": 5220 }, { "epoch": 0.27646535016499924, "grad_norm": 3.0670206546783447, "learning_rate": 1.8355446927374303e-05, "loss": 0.5089, "step": 5278 }, { "epoch": 0.2795034309360432, "grad_norm": 5.144998550415039, "learning_rate": 1.8557960893854748e-05, "loss": 0.5352, "step": 5336 }, { "epoch": 0.28254151170708713, "grad_norm": 3.9276976585388184, "learning_rate": 1.8760474860335196e-05, "loss": 0.5001, "step": 5394 }, { "epoch": 0.2855795924781311, "grad_norm": 4.67507266998291, "learning_rate": 1.8962988826815645e-05, "loss": 0.5106, "step": 5452 }, { "epoch": 0.288617673249175, "grad_norm": 11.027061462402344, "learning_rate": 1.916550279329609e-05, "loss": 0.5311, "step": 5510 }, { "epoch": 0.2916557540202189, "grad_norm": 4.6279802322387695, "learning_rate": 1.9368016759776538e-05, "loss": 0.4722, "step": 5568 }, { "epoch": 0.2946938347912629, "grad_norm": 3.2744059562683105, "learning_rate": 1.9570530726256983e-05, "loss": 0.5435, "step": 5626 }, { "epoch": 0.2977319155623068, "grad_norm": 4.361588478088379, "learning_rate": 1.977304469273743e-05, "loss": 0.5182, "step": 5684 }, { "epoch": 0.3007699963333508, "grad_norm": 4.725919246673584, "learning_rate": 1.997555865921788e-05, "loss": 0.5108, "step": 5742 }, { "epoch": 0.3038080771043947, "grad_norm": 4.126678943634033, "learning_rate": 1.999995169004151e-05, "loss": 0.4113, "step": 5800 }, { "epoch": 0.3068461578754387, "grad_norm": 4.924627780914307, "learning_rate": 1.999977932757864e-05, "loss": 0.5911, "step": 5858 }, { "epoch": 0.3098842386464826, "grad_norm": 3.836568832397461, "learning_rate": 1.9999482004657697e-05, "loss": 0.5589, "step": 5916 }, { "epoch": 0.3129223194175266, "grad_norm": 2.1909499168395996, "learning_rate": 1.999905972499412e-05, "loss": 0.5321, "step": 5974 }, { "epoch": 0.3159604001885705, "grad_norm": 4.15761661529541, "learning_rate": 1.9998512493864858e-05, "loss": 0.4898, "step": 6032 }, { "epoch": 0.31899848095961447, "grad_norm": 4.483209133148193, "learning_rate": 1.9997840318108285e-05, "loss": 0.5339, "step": 6090 }, { "epoch": 0.3220365617306584, "grad_norm": 4.208775997161865, "learning_rate": 1.9997058007847493e-05, "loss": 0.4381, "step": 6148 }, { "epoch": 0.32507464250170237, "grad_norm": 4.7916951179504395, "learning_rate": 1.999613812340473e-05, "loss": 0.5622, "step": 6206 }, { "epoch": 0.3281127232727463, "grad_norm": 3.041353702545166, "learning_rate": 1.999509332400555e-05, "loss": 0.5797, "step": 6264 }, { "epoch": 0.33115080404379027, "grad_norm": 2.720505714416504, "learning_rate": 1.999392362270611e-05, "loss": 0.5213, "step": 6322 }, { "epoch": 0.3341888848148342, "grad_norm": 4.793097496032715, "learning_rate": 1.999262903412336e-05, "loss": 0.4593, "step": 6380 }, { "epoch": 0.33722696558587817, "grad_norm": 3.890002489089966, "learning_rate": 1.999120957443491e-05, "loss": 0.4486, "step": 6438 }, { "epoch": 0.3402650463569221, "grad_norm": 3.537182569503784, "learning_rate": 1.9989665261378772e-05, "loss": 0.4879, "step": 6496 }, { "epoch": 0.34330312712796607, "grad_norm": 1.6273483037948608, "learning_rate": 1.998799611425319e-05, "loss": 0.483, "step": 6554 }, { "epoch": 0.34634120789901, "grad_norm": 1.544161081314087, "learning_rate": 1.9986202153916356e-05, "loss": 0.5295, "step": 6612 }, { "epoch": 0.34937928867005397, "grad_norm": 4.516360759735107, "learning_rate": 1.9984283402786177e-05, "loss": 0.5544, "step": 6670 }, { "epoch": 0.3524173694410979, "grad_norm": 3.9603912830352783, "learning_rate": 1.998223988483998e-05, "loss": 0.5005, "step": 6728 }, { "epoch": 0.35545545021214187, "grad_norm": 4.063785076141357, "learning_rate": 1.998007162561423e-05, "loss": 0.5339, "step": 6786 }, { "epoch": 0.3584935309831858, "grad_norm": 4.825593948364258, "learning_rate": 1.9977778652204192e-05, "loss": 0.4702, "step": 6844 }, { "epoch": 0.36153161175422976, "grad_norm": 0.5379557013511658, "learning_rate": 1.997536099326359e-05, "loss": 0.5397, "step": 6902 }, { "epoch": 0.3645696925252737, "grad_norm": 3.922156810760498, "learning_rate": 1.9972818679004273e-05, "loss": 0.5663, "step": 6960 }, { "epoch": 0.36760777329631766, "grad_norm": 3.376941442489624, "learning_rate": 1.9970198778515604e-05, "loss": 0.5321, "step": 7018 }, { "epoch": 0.37064585406736156, "grad_norm": 4.569897174835205, "learning_rate": 1.9967409398301135e-05, "loss": 0.4517, "step": 7076 }, { "epoch": 0.3736839348384055, "grad_norm": 4.267284393310547, "learning_rate": 1.9964495462133642e-05, "loss": 0.5225, "step": 7134 }, { "epoch": 0.37672201560944946, "grad_norm": 4.1275506019592285, "learning_rate": 1.9961457006426603e-05, "loss": 0.5007, "step": 7192 }, { "epoch": 0.3797600963804934, "grad_norm": 4.481261253356934, "learning_rate": 1.995829406914954e-05, "loss": 0.4754, "step": 7250 }, { "epoch": 0.38279817715153736, "grad_norm": 2.876922845840454, "learning_rate": 1.995500668982753e-05, "loss": 0.4729, "step": 7308 }, { "epoch": 0.3858362579225813, "grad_norm": 1.0541763305664062, "learning_rate": 1.9951594909540727e-05, "loss": 0.5697, "step": 7366 }, { "epoch": 0.38887433869362525, "grad_norm": 3.462268114089966, "learning_rate": 1.9948058770923837e-05, "loss": 0.4803, "step": 7424 }, { "epoch": 0.3919124194646692, "grad_norm": 3.209782123565674, "learning_rate": 1.9944398318165578e-05, "loss": 0.5239, "step": 7482 }, { "epoch": 0.39495050023571315, "grad_norm": 4.3836445808410645, "learning_rate": 1.994061359700815e-05, "loss": 0.5096, "step": 7540 }, { "epoch": 0.3979885810067571, "grad_norm": 2.8133575916290283, "learning_rate": 1.9936704654746642e-05, "loss": 0.4546, "step": 7598 }, { "epoch": 0.40102666177780105, "grad_norm": 3.709463596343994, "learning_rate": 1.9932671540228456e-05, "loss": 0.4882, "step": 7656 }, { "epoch": 0.404064742548845, "grad_norm": 3.9174060821533203, "learning_rate": 1.992851430385269e-05, "loss": 0.4311, "step": 7714 }, { "epoch": 0.40710282331988895, "grad_norm": 2.9282238483428955, "learning_rate": 1.99242329975695e-05, "loss": 0.5204, "step": 7772 }, { "epoch": 0.4101409040909329, "grad_norm": 4.139567852020264, "learning_rate": 1.9919827674879473e-05, "loss": 0.4739, "step": 7830 }, { "epoch": 0.41317898486197685, "grad_norm": 3.436636447906494, "learning_rate": 1.9915298390832935e-05, "loss": 0.4838, "step": 7888 }, { "epoch": 0.4162170656330208, "grad_norm": 3.512646198272705, "learning_rate": 1.9910645202029272e-05, "loss": 0.4594, "step": 7946 }, { "epoch": 0.41925514640406475, "grad_norm": 3.1627018451690674, "learning_rate": 1.9905868166616234e-05, "loss": 0.5628, "step": 8004 }, { "epoch": 0.4222932271751087, "grad_norm": 1.1955090761184692, "learning_rate": 1.990096734428919e-05, "loss": 0.4587, "step": 8062 }, { "epoch": 0.42533130794615265, "grad_norm": 3.0422959327697754, "learning_rate": 1.989594279629039e-05, "loss": 0.5523, "step": 8120 }, { "epoch": 0.4283693887171966, "grad_norm": 3.0934972763061523, "learning_rate": 1.98907945854082e-05, "loss": 0.4855, "step": 8178 }, { "epoch": 0.43140746948824055, "grad_norm": 3.9694907665252686, "learning_rate": 1.9885522775976324e-05, "loss": 0.543, "step": 8236 }, { "epoch": 0.4344455502592845, "grad_norm": 4.713873386383057, "learning_rate": 1.9880127433872983e-05, "loss": 0.4901, "step": 8294 }, { "epoch": 0.43748363103032845, "grad_norm": 2.3840503692626953, "learning_rate": 1.987460862652011e-05, "loss": 0.4265, "step": 8352 }, { "epoch": 0.4405217118013724, "grad_norm": 4.123522758483887, "learning_rate": 1.9868966422882496e-05, "loss": 0.4237, "step": 8410 }, { "epoch": 0.44355979257241634, "grad_norm": 3.1017978191375732, "learning_rate": 1.986320089346693e-05, "loss": 0.4106, "step": 8468 }, { "epoch": 0.4465978733434603, "grad_norm": 2.8059699535369873, "learning_rate": 1.9857414684867994e-05, "loss": 0.4641, "step": 8526 }, { "epoch": 0.44963595411450424, "grad_norm": 4.327667236328125, "learning_rate": 1.985140484474396e-05, "loss": 0.4337, "step": 8584 }, { "epoch": 0.45267403488554814, "grad_norm": 0.9626501798629761, "learning_rate": 1.9845271898297104e-05, "loss": 0.4932, "step": 8642 }, { "epoch": 0.4557121156565921, "grad_norm": 1.3852657079696655, "learning_rate": 1.9839015922166693e-05, "loss": 0.4866, "step": 8700 }, { "epoch": 0.45875019642763604, "grad_norm": 3.2711095809936523, "learning_rate": 1.983263699452942e-05, "loss": 0.4825, "step": 8758 }, { "epoch": 0.46178827719868, "grad_norm": 4.84442138671875, "learning_rate": 1.9826135195098416e-05, "loss": 0.4559, "step": 8816 }, { "epoch": 0.46482635796972394, "grad_norm": 1.2177191972732544, "learning_rate": 1.9819510605122255e-05, "loss": 0.4795, "step": 8874 }, { "epoch": 0.4678644387407679, "grad_norm": 3.1849379539489746, "learning_rate": 1.981276330738395e-05, "loss": 0.474, "step": 8932 }, { "epoch": 0.47090251951181183, "grad_norm": 4.420878887176514, "learning_rate": 1.9805893386199892e-05, "loss": 0.4876, "step": 8990 }, { "epoch": 0.4739406002828558, "grad_norm": 2.714984893798828, "learning_rate": 1.9798900927418835e-05, "loss": 0.4491, "step": 9048 }, { "epoch": 0.47697868105389973, "grad_norm": 2.185593843460083, "learning_rate": 1.9791786018420792e-05, "loss": 0.4808, "step": 9106 }, { "epoch": 0.4800167618249437, "grad_norm": 3.3326094150543213, "learning_rate": 1.9784548748115946e-05, "loss": 0.4502, "step": 9164 }, { "epoch": 0.48305484259598763, "grad_norm": 3.4437661170959473, "learning_rate": 1.977718920694356e-05, "loss": 0.5049, "step": 9222 }, { "epoch": 0.4860929233670316, "grad_norm": 5.456835746765137, "learning_rate": 1.9769707486870825e-05, "loss": 0.4791, "step": 9280 }, { "epoch": 0.48913100413807553, "grad_norm": 5.109498023986816, "learning_rate": 1.9762103681391724e-05, "loss": 0.5396, "step": 9338 }, { "epoch": 0.4921690849091195, "grad_norm": 4.347654342651367, "learning_rate": 1.9754377885525854e-05, "loss": 0.4433, "step": 9396 }, { "epoch": 0.49520716568016343, "grad_norm": 3.837158203125, "learning_rate": 1.9746530195817243e-05, "loss": 0.4791, "step": 9454 }, { "epoch": 0.4982452464512074, "grad_norm": 3.8552966117858887, "learning_rate": 1.9738699146560578e-05, "loss": 0.4979, "step": 9512 }, { "epoch": 0.5012833272222513, "grad_norm": 1.76126229763031, "learning_rate": 1.973061006224811e-05, "loss": 0.4716, "step": 9570 }, { "epoch": 0.5043214079932953, "grad_norm": 5.198726654052734, "learning_rate": 1.9722399381103267e-05, "loss": 0.4801, "step": 9628 }, { "epoch": 0.5073594887643392, "grad_norm": 3.2313361167907715, "learning_rate": 1.9714067205729356e-05, "loss": 0.4592, "step": 9686 }, { "epoch": 0.5103975695353832, "grad_norm": 0.9486598968505859, "learning_rate": 1.9705613640247928e-05, "loss": 0.4399, "step": 9744 }, { "epoch": 0.5134356503064271, "grad_norm": 3.271669864654541, "learning_rate": 1.9697038790297442e-05, "loss": 0.4722, "step": 9802 }, { "epoch": 0.5164737310774711, "grad_norm": 5.1848039627075195, "learning_rate": 1.9688342763031993e-05, "loss": 0.4336, "step": 9860 }, { "epoch": 0.519511811848515, "grad_norm": 4.134024620056152, "learning_rate": 1.967952566711993e-05, "loss": 0.4534, "step": 9918 }, { "epoch": 0.522549892619559, "grad_norm": 3.0904159545898438, "learning_rate": 1.9670587612742515e-05, "loss": 0.4461, "step": 9976 }, { "epoch": 0.5255879733906029, "grad_norm": 3.3785481452941895, "learning_rate": 1.9661528711592553e-05, "loss": 0.4906, "step": 10034 }, { "epoch": 0.5286260541616469, "grad_norm": 4.755141258239746, "learning_rate": 1.9652349076872986e-05, "loss": 0.4519, "step": 10092 }, { "epoch": 0.5316641349326908, "grad_norm": 4.502477645874023, "learning_rate": 1.9643048823295482e-05, "loss": 0.5454, "step": 10150 }, { "epoch": 0.5347022157037348, "grad_norm": 2.3361642360687256, "learning_rate": 1.9633628067078997e-05, "loss": 0.5069, "step": 10208 }, { "epoch": 0.5377402964747787, "grad_norm": 3.6974456310272217, "learning_rate": 1.9624086925948333e-05, "loss": 0.4604, "step": 10266 }, { "epoch": 0.5407783772458227, "grad_norm": 3.7012462615966797, "learning_rate": 1.9614425519132654e-05, "loss": 0.5368, "step": 10324 }, { "epoch": 0.5438164580168666, "grad_norm": 0.9825100898742676, "learning_rate": 1.9604643967364013e-05, "loss": 0.4917, "step": 10382 }, { "epoch": 0.5468545387879106, "grad_norm": 2.8980348110198975, "learning_rate": 1.959474239287582e-05, "loss": 0.4571, "step": 10440 }, { "epoch": 0.5498926195589545, "grad_norm": 6.615330696105957, "learning_rate": 1.9584720919401342e-05, "loss": 0.4949, "step": 10498 }, { "epoch": 0.5529307003299985, "grad_norm": 4.613067626953125, "learning_rate": 1.9574579672172126e-05, "loss": 0.4072, "step": 10556 }, { "epoch": 0.5559687811010424, "grad_norm": 3.3984858989715576, "learning_rate": 1.9564318777916456e-05, "loss": 0.412, "step": 10614 }, { "epoch": 0.5590068618720864, "grad_norm": 5.624422550201416, "learning_rate": 1.9553938364857775e-05, "loss": 0.4781, "step": 10672 }, { "epoch": 0.5620449426431303, "grad_norm": 4.486995697021484, "learning_rate": 1.954343856271306e-05, "loss": 0.4426, "step": 10730 }, { "epoch": 0.5650830234141743, "grad_norm": 2.862964391708374, "learning_rate": 1.953281950269121e-05, "loss": 0.506, "step": 10788 }, { "epoch": 0.5681211041852182, "grad_norm": 0.556151807308197, "learning_rate": 1.9522267467101615e-05, "loss": 0.4095, "step": 10846 }, { "epoch": 0.5711591849562622, "grad_norm": 3.035536527633667, "learning_rate": 1.9511412341335318e-05, "loss": 0.517, "step": 10904 }, { "epoch": 0.574197265727306, "grad_norm": 3.8603086471557617, "learning_rate": 1.950043835790185e-05, "loss": 0.4344, "step": 10962 }, { "epoch": 0.57723534649835, "grad_norm": 4.4469499588012695, "learning_rate": 1.9489345653935635e-05, "loss": 0.4774, "step": 11020 }, { "epoch": 0.5802734272693939, "grad_norm": 3.4457666873931885, "learning_rate": 1.9478134368054676e-05, "loss": 0.4274, "step": 11078 }, { "epoch": 0.5833115080404379, "grad_norm": 3.056290864944458, "learning_rate": 1.9466804640358798e-05, "loss": 0.4432, "step": 11136 }, { "epoch": 0.5863495888114818, "grad_norm": 4.071867942810059, "learning_rate": 1.9455356612427928e-05, "loss": 0.4344, "step": 11194 }, { "epoch": 0.5893876695825258, "grad_norm": 2.0395846366882324, "learning_rate": 1.9443790427320303e-05, "loss": 0.4714, "step": 11252 }, { "epoch": 0.5924257503535697, "grad_norm": 4.563007354736328, "learning_rate": 1.9432106229570685e-05, "loss": 0.5157, "step": 11310 }, { "epoch": 0.5954638311246137, "grad_norm": 3.7986621856689453, "learning_rate": 1.9420304165188574e-05, "loss": 0.4977, "step": 11368 }, { "epoch": 0.5985019118956576, "grad_norm": 5.301217555999756, "learning_rate": 1.9408384381656358e-05, "loss": 0.4662, "step": 11426 }, { "epoch": 0.6015399926667016, "grad_norm": 2.3288731575012207, "learning_rate": 1.939634702792749e-05, "loss": 0.4493, "step": 11484 }, { "epoch": 0.6045780734377455, "grad_norm": 3.7128169536590576, "learning_rate": 1.9384192254424606e-05, "loss": 0.4865, "step": 11542 }, { "epoch": 0.6076161542087894, "grad_norm": 4.314477920532227, "learning_rate": 1.9371920213037665e-05, "loss": 0.4715, "step": 11600 }, { "epoch": 0.6106542349798334, "grad_norm": 2.6989047527313232, "learning_rate": 1.935953105712205e-05, "loss": 0.4345, "step": 11658 }, { "epoch": 0.6136923157508773, "grad_norm": 3.5463671684265137, "learning_rate": 1.9347024941496628e-05, "loss": 0.4611, "step": 11716 }, { "epoch": 0.6167303965219213, "grad_norm": 4.914857387542725, "learning_rate": 1.9334402022441848e-05, "loss": 0.4952, "step": 11774 }, { "epoch": 0.6197684772929652, "grad_norm": 1.5133031606674194, "learning_rate": 1.932188309270537e-05, "loss": 0.4401, "step": 11832 }, { "epoch": 0.6228065580640092, "grad_norm": 2.30916428565979, "learning_rate": 1.9309029048500578e-05, "loss": 0.4177, "step": 11890 }, { "epoch": 0.6258446388350531, "grad_norm": 2.825598955154419, "learning_rate": 1.929605867567532e-05, "loss": 0.4529, "step": 11948 }, { "epoch": 0.6288827196060971, "grad_norm": 5.285458087921143, "learning_rate": 1.9282972136311554e-05, "loss": 0.4806, "step": 12006 }, { "epoch": 0.631920800377141, "grad_norm": 2.597923755645752, "learning_rate": 1.9269769593942872e-05, "loss": 0.4566, "step": 12064 }, { "epoch": 0.634958881148185, "grad_norm": 2.780212640762329, "learning_rate": 1.9256451213552497e-05, "loss": 0.4725, "step": 12122 }, { "epoch": 0.6379969619192289, "grad_norm": 4.1638031005859375, "learning_rate": 1.9243017161571194e-05, "loss": 0.463, "step": 12180 }, { "epoch": 0.6410350426902729, "grad_norm": 4.174670219421387, "learning_rate": 1.9229467605875196e-05, "loss": 0.5236, "step": 12238 }, { "epoch": 0.6440731234613168, "grad_norm": 1.9128369092941284, "learning_rate": 1.9215802715784096e-05, "loss": 0.4621, "step": 12296 }, { "epoch": 0.6471112042323608, "grad_norm": 4.490901947021484, "learning_rate": 1.9202022662058773e-05, "loss": 0.4517, "step": 12354 }, { "epoch": 0.6501492850034047, "grad_norm": 4.426553726196289, "learning_rate": 1.9188127616899202e-05, "loss": 0.488, "step": 12412 }, { "epoch": 0.6531873657744487, "grad_norm": 3.694254159927368, "learning_rate": 1.917411775394233e-05, "loss": 0.4705, "step": 12470 }, { "epoch": 0.6562254465454926, "grad_norm": 2.3134400844573975, "learning_rate": 1.9159993248259916e-05, "loss": 0.4402, "step": 12528 }, { "epoch": 0.6592635273165366, "grad_norm": 2.868987798690796, "learning_rate": 1.9145754276356323e-05, "loss": 0.4085, "step": 12586 }, { "epoch": 0.6623016080875805, "grad_norm": 3.815828323364258, "learning_rate": 1.9131401016166326e-05, "loss": 0.5569, "step": 12644 }, { "epoch": 0.6653396888586245, "grad_norm": 2.865863800048828, "learning_rate": 1.911693364705287e-05, "loss": 0.4515, "step": 12702 }, { "epoch": 0.6683777696296684, "grad_norm": 3.7862603664398193, "learning_rate": 1.9102352349804865e-05, "loss": 0.4685, "step": 12760 }, { "epoch": 0.6714158504007124, "grad_norm": 2.3399360179901123, "learning_rate": 1.9087657306634884e-05, "loss": 0.5087, "step": 12818 }, { "epoch": 0.6744539311717563, "grad_norm": 3.208674430847168, "learning_rate": 1.9072848701176905e-05, "loss": 0.4322, "step": 12876 }, { "epoch": 0.6774920119428003, "grad_norm": 1.2508207559585571, "learning_rate": 1.9057926718484036e-05, "loss": 0.39, "step": 12934 }, { "epoch": 0.6805300927138442, "grad_norm": 3.029885768890381, "learning_rate": 1.9042891545026164e-05, "loss": 0.4881, "step": 12992 }, { "epoch": 0.6835681734848882, "grad_norm": 1.723024606704712, "learning_rate": 1.9028005500450692e-05, "loss": 0.4016, "step": 13050 }, { "epoch": 0.6866062542559321, "grad_norm": 3.0587244033813477, "learning_rate": 1.9012746453978195e-05, "loss": 0.451, "step": 13108 }, { "epoch": 0.6896443350269761, "grad_norm": 3.979196548461914, "learning_rate": 1.899737478132781e-05, "loss": 0.4584, "step": 13166 }, { "epoch": 0.69268241579802, "grad_norm": 3.2428181171417236, "learning_rate": 1.8981890674588902e-05, "loss": 0.4419, "step": 13224 }, { "epoch": 0.695720496569064, "grad_norm": 1.9672743082046509, "learning_rate": 1.8966294327255843e-05, "loss": 0.4463, "step": 13282 }, { "epoch": 0.6987585773401079, "grad_norm": 3.543287754058838, "learning_rate": 1.895058593422561e-05, "loss": 0.5232, "step": 13340 }, { "epoch": 0.7017966581111519, "grad_norm": 2.751725435256958, "learning_rate": 1.8934765691795337e-05, "loss": 0.4627, "step": 13398 }, { "epoch": 0.7048347388821958, "grad_norm": 3.9089314937591553, "learning_rate": 1.8918833797659854e-05, "loss": 0.4701, "step": 13456 }, { "epoch": 0.7078728196532398, "grad_norm": 2.623382806777954, "learning_rate": 1.890279045090924e-05, "loss": 0.4627, "step": 13514 }, { "epoch": 0.7109109004242837, "grad_norm": 3.44734263420105, "learning_rate": 1.8886635852026307e-05, "loss": 0.5063, "step": 13572 }, { "epoch": 0.7139489811953277, "grad_norm": 4.096603870391846, "learning_rate": 1.887037020288412e-05, "loss": 0.4205, "step": 13630 }, { "epoch": 0.7169870619663716, "grad_norm": 3.9694747924804688, "learning_rate": 1.8853993706743465e-05, "loss": 0.479, "step": 13688 }, { "epoch": 0.7200251427374156, "grad_norm": 2.2461936473846436, "learning_rate": 1.88375065682503e-05, "loss": 0.4222, "step": 13746 }, { "epoch": 0.7230632235084595, "grad_norm": 4.268979549407959, "learning_rate": 1.882090899343321e-05, "loss": 0.4013, "step": 13804 }, { "epoch": 0.7261013042795035, "grad_norm": 2.9464776515960693, "learning_rate": 1.8804201189700833e-05, "loss": 0.5184, "step": 13862 }, { "epoch": 0.7291393850505474, "grad_norm": 3.1404519081115723, "learning_rate": 1.8787383365839248e-05, "loss": 0.4451, "step": 13920 }, { "epoch": 0.7321774658215914, "grad_norm": 3.048670530319214, "learning_rate": 1.8770455732009393e-05, "loss": 0.457, "step": 13978 }, { "epoch": 0.7352155465926353, "grad_norm": 3.074151039123535, "learning_rate": 1.8753418499744426e-05, "loss": 0.4711, "step": 14036 }, { "epoch": 0.7382536273636792, "grad_norm": 4.1698150634765625, "learning_rate": 1.873627188194708e-05, "loss": 0.4281, "step": 14094 }, { "epoch": 0.7412917081347231, "grad_norm": 2.6520071029663086, "learning_rate": 1.8719016092887e-05, "loss": 0.497, "step": 14152 }, { "epoch": 0.7443297889057671, "grad_norm": 1.3818339109420776, "learning_rate": 1.870165134819808e-05, "loss": 0.4234, "step": 14210 }, { "epoch": 0.747367869676811, "grad_norm": 3.5460736751556396, "learning_rate": 1.868417786487575e-05, "loss": 0.4444, "step": 14268 }, { "epoch": 0.750405950447855, "grad_norm": 2.8102331161499023, "learning_rate": 1.8666595861274283e-05, "loss": 0.4159, "step": 14326 }, { "epoch": 0.7534440312188989, "grad_norm": 3.3770508766174316, "learning_rate": 1.8648905557104046e-05, "loss": 0.4357, "step": 14384 }, { "epoch": 0.7564821119899429, "grad_norm": 2.95613169670105, "learning_rate": 1.863110717342876e-05, "loss": 0.4627, "step": 14442 }, { "epoch": 0.7595201927609868, "grad_norm": 2.80786395072937, "learning_rate": 1.8613200932662764e-05, "loss": 0.4331, "step": 14500 }, { "epoch": 0.7625582735320308, "grad_norm": 2.9433181285858154, "learning_rate": 1.8595187058568197e-05, "loss": 0.5087, "step": 14558 }, { "epoch": 0.7655963543030747, "grad_norm": 2.6625008583068848, "learning_rate": 1.8577065776252218e-05, "loss": 0.5018, "step": 14616 }, { "epoch": 0.7686344350741187, "grad_norm": 3.8713533878326416, "learning_rate": 1.8558837312164198e-05, "loss": 0.4454, "step": 14674 }, { "epoch": 0.7716725158451626, "grad_norm": 3.236130475997925, "learning_rate": 1.8540501894092894e-05, "loss": 0.4463, "step": 14732 }, { "epoch": 0.7747105966162066, "grad_norm": 1.9471006393432617, "learning_rate": 1.8522059751163578e-05, "loss": 0.4615, "step": 14790 }, { "epoch": 0.7777486773872505, "grad_norm": 1.1234129667282104, "learning_rate": 1.85035111138352e-05, "loss": 0.3841, "step": 14848 }, { "epoch": 0.7807867581582945, "grad_norm": 3.155194044113159, "learning_rate": 1.8484856213897496e-05, "loss": 0.4932, "step": 14906 }, { "epoch": 0.7838248389293384, "grad_norm": 1.2532273530960083, "learning_rate": 1.8466095284468103e-05, "loss": 0.427, "step": 14964 }, { "epoch": 0.7868629197003824, "grad_norm": 3.315812349319458, "learning_rate": 1.8447228559989618e-05, "loss": 0.4406, "step": 15022 }, { "epoch": 0.7899010004714263, "grad_norm": 2.3999452590942383, "learning_rate": 1.842858427754608e-05, "loss": 0.4413, "step": 15080 }, { "epoch": 0.7929390812424703, "grad_norm": 2.896650791168213, "learning_rate": 1.8409508485466538e-05, "loss": 0.4068, "step": 15138 }, { "epoch": 0.7959771620135142, "grad_norm": 3.3152272701263428, "learning_rate": 1.8390327605464747e-05, "loss": 0.4708, "step": 15196 }, { "epoch": 0.7990152427845582, "grad_norm": 2.573716163635254, "learning_rate": 1.8371041877231145e-05, "loss": 0.4506, "step": 15254 }, { "epoch": 0.8020533235556021, "grad_norm": 1.0098395347595215, "learning_rate": 1.8351651541766398e-05, "loss": 0.4614, "step": 15312 }, { "epoch": 0.805091404326646, "grad_norm": 2.7257494926452637, "learning_rate": 1.8332156841378376e-05, "loss": 0.481, "step": 15370 }, { "epoch": 0.80812948509769, "grad_norm": 3.291948080062866, "learning_rate": 1.8312558019679113e-05, "loss": 0.4872, "step": 15428 }, { "epoch": 0.811167565868734, "grad_norm": 0.6372181177139282, "learning_rate": 1.82928553215818e-05, "loss": 0.4664, "step": 15486 }, { "epoch": 0.8142056466397779, "grad_norm": 2.14487361907959, "learning_rate": 1.8273048993297682e-05, "loss": 0.4443, "step": 15544 }, { "epoch": 0.8172437274108219, "grad_norm": 1.6703099012374878, "learning_rate": 1.8253139282333005e-05, "loss": 0.4683, "step": 15602 }, { "epoch": 0.8202818081818658, "grad_norm": 3.7610647678375244, "learning_rate": 1.8233126437485925e-05, "loss": 0.4299, "step": 15660 }, { "epoch": 0.8233198889529098, "grad_norm": 3.429608106613159, "learning_rate": 1.821301070884338e-05, "loss": 0.3976, "step": 15718 }, { "epoch": 0.8263579697239537, "grad_norm": 3.0198211669921875, "learning_rate": 1.819279234777799e-05, "loss": 0.407, "step": 15776 }, { "epoch": 0.8293960504949976, "grad_norm": 3.1796703338623047, "learning_rate": 1.817247160694489e-05, "loss": 0.4235, "step": 15834 }, { "epoch": 0.8324341312660416, "grad_norm": 2.235328197479248, "learning_rate": 1.81520487402786e-05, "loss": 0.403, "step": 15892 }, { "epoch": 0.8354722120370855, "grad_norm": 3.7409324645996094, "learning_rate": 1.8131524002989816e-05, "loss": 0.4325, "step": 15950 }, { "epoch": 0.8385102928081295, "grad_norm": 2.7824206352233887, "learning_rate": 1.811089765156227e-05, "loss": 0.432, "step": 16008 }, { "epoch": 0.8415483735791734, "grad_norm": 1.6856441497802734, "learning_rate": 1.8090528175270648e-05, "loss": 0.4156, "step": 16066 }, { "epoch": 0.8445864543502174, "grad_norm": 2.9528396129608154, "learning_rate": 1.8069701110949214e-05, "loss": 0.4486, "step": 16124 }, { "epoch": 0.8476245351212613, "grad_norm": 1.5881803035736084, "learning_rate": 1.8048773205047752e-05, "loss": 0.4133, "step": 16182 }, { "epoch": 0.8506626158923053, "grad_norm": 5.50068473815918, "learning_rate": 1.8027744719088103e-05, "loss": 0.4553, "step": 16240 }, { "epoch": 0.8537006966633492, "grad_norm": 3.2943098545074463, "learning_rate": 1.800661591584899e-05, "loss": 0.4428, "step": 16298 }, { "epoch": 0.8567387774343932, "grad_norm": 1.3706376552581787, "learning_rate": 1.798538705936273e-05, "loss": 0.4779, "step": 16356 }, { "epoch": 0.8597768582054371, "grad_norm": 2.18271541595459, "learning_rate": 1.796405841491194e-05, "loss": 0.4687, "step": 16414 }, { "epoch": 0.8628149389764811, "grad_norm": 2.5106441974639893, "learning_rate": 1.794263024902622e-05, "loss": 0.4016, "step": 16472 }, { "epoch": 0.865853019747525, "grad_norm": 2.757732629776001, "learning_rate": 1.7921102829478832e-05, "loss": 0.4948, "step": 16530 }, { "epoch": 0.868891100518569, "grad_norm": 0.37621229887008667, "learning_rate": 1.7899476425283318e-05, "loss": 0.4304, "step": 16588 }, { "epoch": 0.8719291812896129, "grad_norm": 4.135168552398682, "learning_rate": 1.787775130669019e-05, "loss": 0.4195, "step": 16646 }, { "epoch": 0.8749672620606569, "grad_norm": 2.2052392959594727, "learning_rate": 1.7855927745183504e-05, "loss": 0.4449, "step": 16704 }, { "epoch": 0.8780053428317008, "grad_norm": 2.8733346462249756, "learning_rate": 1.7834006013477513e-05, "loss": 0.5016, "step": 16762 }, { "epoch": 0.8810434236027448, "grad_norm": 1.8927271366119385, "learning_rate": 1.7811986385513226e-05, "loss": 0.3793, "step": 16820 }, { "epoch": 0.8840815043737887, "grad_norm": 3.7612531185150146, "learning_rate": 1.7789869136454988e-05, "loss": 0.3601, "step": 16878 }, { "epoch": 0.8871195851448327, "grad_norm": 1.6613848209381104, "learning_rate": 1.7767654542687057e-05, "loss": 0.4772, "step": 16936 }, { "epoch": 0.8901576659158766, "grad_norm": 2.5755159854888916, "learning_rate": 1.7745342881810144e-05, "loss": 0.4475, "step": 16994 }, { "epoch": 0.8931957466869206, "grad_norm": 2.7520928382873535, "learning_rate": 1.7722934432637937e-05, "loss": 0.3942, "step": 17052 }, { "epoch": 0.8962338274579645, "grad_norm": 4.439705848693848, "learning_rate": 1.770042947519362e-05, "loss": 0.4361, "step": 17110 }, { "epoch": 0.8992719082290085, "grad_norm": 2.091926097869873, "learning_rate": 1.7677828290706382e-05, "loss": 0.42, "step": 17168 }, { "epoch": 0.9000052380702949, "eval_accuracy": 0.8844256401062012, "eval_loss": 0.44025254249572754, "eval_runtime": 5730.5358, "eval_samples_per_second": 0.835, "eval_steps_per_second": 0.835, "step": 17182 } ], "logging_steps": 58, "max_steps": 57273, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 17182, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 9.194591202500936e+17, "train_batch_size": 1, "trial_name": null, "trial_params": null }