| { | |
| "best_metric": 0.8844256401062012, | |
| "best_model_checkpoint": "/workspace/previous_works/MedBLIP/output/MedBLIP-0005/checkpoint-17182", | |
| "epoch": 0.9000052380702949, | |
| "eval_steps": 17182, | |
| "global_step": 17182, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0030380807710439473, | |
| "grad_norm": 119.46666717529297, | |
| "learning_rate": 1.3268156424581008e-07, | |
| "loss": 5.0724, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.006076161542087895, | |
| "grad_norm": 34.45277786254883, | |
| "learning_rate": 3.3519553072625703e-07, | |
| "loss": 3.6479, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 0.009114242313131841, | |
| "grad_norm": 14.232598304748535, | |
| "learning_rate": 5.37709497206704e-07, | |
| "loss": 2.2356, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 0.01215232308417579, | |
| "grad_norm": 10.709288597106934, | |
| "learning_rate": 7.402234636871509e-07, | |
| "loss": 1.9953, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 0.015190403855219737, | |
| "grad_norm": 7.259119987487793, | |
| "learning_rate": 9.427374301675979e-07, | |
| "loss": 1.8188, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.018228484626263683, | |
| "grad_norm": 6.452203273773193, | |
| "learning_rate": 1.1452513966480447e-06, | |
| "loss": 1.5639, | |
| "step": 348 | |
| }, | |
| { | |
| "epoch": 0.021266565397307632, | |
| "grad_norm": 9.458000183105469, | |
| "learning_rate": 1.3477653631284918e-06, | |
| "loss": 1.504, | |
| "step": 406 | |
| }, | |
| { | |
| "epoch": 0.02430464616835158, | |
| "grad_norm": 6.162489414215088, | |
| "learning_rate": 1.5502793296089386e-06, | |
| "loss": 1.2581, | |
| "step": 464 | |
| }, | |
| { | |
| "epoch": 0.027342726939395528, | |
| "grad_norm": 6.684648513793945, | |
| "learning_rate": 1.7527932960893857e-06, | |
| "loss": 1.1713, | |
| "step": 522 | |
| }, | |
| { | |
| "epoch": 0.030380807710439474, | |
| "grad_norm": 6.966240882873535, | |
| "learning_rate": 1.9553072625698325e-06, | |
| "loss": 1.0504, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.03341888848148342, | |
| "grad_norm": 6.379108905792236, | |
| "learning_rate": 2.1578212290502796e-06, | |
| "loss": 0.968, | |
| "step": 638 | |
| }, | |
| { | |
| "epoch": 0.036456969252527366, | |
| "grad_norm": 5.3792619705200195, | |
| "learning_rate": 2.3603351955307262e-06, | |
| "loss": 0.9209, | |
| "step": 696 | |
| }, | |
| { | |
| "epoch": 0.039495050023571315, | |
| "grad_norm": 6.413719177246094, | |
| "learning_rate": 2.5628491620111733e-06, | |
| "loss": 0.8723, | |
| "step": 754 | |
| }, | |
| { | |
| "epoch": 0.042533130794615265, | |
| "grad_norm": 8.826435089111328, | |
| "learning_rate": 2.7653631284916204e-06, | |
| "loss": 0.8652, | |
| "step": 812 | |
| }, | |
| { | |
| "epoch": 0.045571211565659214, | |
| "grad_norm": 6.294381618499756, | |
| "learning_rate": 2.9678770949720674e-06, | |
| "loss": 0.8099, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 0.04860929233670316, | |
| "grad_norm": 7.355430603027344, | |
| "learning_rate": 3.170391061452514e-06, | |
| "loss": 0.7396, | |
| "step": 928 | |
| }, | |
| { | |
| "epoch": 0.051647373107747106, | |
| "grad_norm": 10.120753288269043, | |
| "learning_rate": 3.372905027932961e-06, | |
| "loss": 0.7511, | |
| "step": 986 | |
| }, | |
| { | |
| "epoch": 0.054685453878791056, | |
| "grad_norm": 5.653336048126221, | |
| "learning_rate": 3.575418994413408e-06, | |
| "loss": 0.6688, | |
| "step": 1044 | |
| }, | |
| { | |
| "epoch": 0.057723534649835, | |
| "grad_norm": 5.749114990234375, | |
| "learning_rate": 3.7779329608938552e-06, | |
| "loss": 0.677, | |
| "step": 1102 | |
| }, | |
| { | |
| "epoch": 0.06076161542087895, | |
| "grad_norm": 7.65744686126709, | |
| "learning_rate": 3.980446927374302e-06, | |
| "loss": 0.7195, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 0.0637996961919229, | |
| "grad_norm": 9.296794891357422, | |
| "learning_rate": 4.1829608938547485e-06, | |
| "loss": 0.7598, | |
| "step": 1218 | |
| }, | |
| { | |
| "epoch": 0.06683777696296685, | |
| "grad_norm": 6.070080757141113, | |
| "learning_rate": 4.385474860335196e-06, | |
| "loss": 0.7015, | |
| "step": 1276 | |
| }, | |
| { | |
| "epoch": 0.0698758577340108, | |
| "grad_norm": 6.638489723205566, | |
| "learning_rate": 4.5879888268156435e-06, | |
| "loss": 0.6778, | |
| "step": 1334 | |
| }, | |
| { | |
| "epoch": 0.07291393850505473, | |
| "grad_norm": 9.076967239379883, | |
| "learning_rate": 4.790502793296089e-06, | |
| "loss": 0.5551, | |
| "step": 1392 | |
| }, | |
| { | |
| "epoch": 0.07595201927609868, | |
| "grad_norm": 6.929805278778076, | |
| "learning_rate": 4.993016759776537e-06, | |
| "loss": 0.6792, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 0.07899010004714263, | |
| "grad_norm": 8.316506385803223, | |
| "learning_rate": 5.195530726256983e-06, | |
| "loss": 0.6379, | |
| "step": 1508 | |
| }, | |
| { | |
| "epoch": 0.08202818081818658, | |
| "grad_norm": 9.471745491027832, | |
| "learning_rate": 5.398044692737431e-06, | |
| "loss": 0.6242, | |
| "step": 1566 | |
| }, | |
| { | |
| "epoch": 0.08506626158923053, | |
| "grad_norm": 6.022659778594971, | |
| "learning_rate": 5.6005586592178775e-06, | |
| "loss": 0.6368, | |
| "step": 1624 | |
| }, | |
| { | |
| "epoch": 0.08810434236027448, | |
| "grad_norm": 7.15187406539917, | |
| "learning_rate": 5.803072625698325e-06, | |
| "loss": 0.5381, | |
| "step": 1682 | |
| }, | |
| { | |
| "epoch": 0.09114242313131843, | |
| "grad_norm": 7.10537052154541, | |
| "learning_rate": 6.005586592178772e-06, | |
| "loss": 0.6885, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 0.09418050390236236, | |
| "grad_norm": 5.685272216796875, | |
| "learning_rate": 6.208100558659218e-06, | |
| "loss": 0.6066, | |
| "step": 1798 | |
| }, | |
| { | |
| "epoch": 0.09721858467340631, | |
| "grad_norm": 6.733754634857178, | |
| "learning_rate": 6.410614525139666e-06, | |
| "loss": 0.6124, | |
| "step": 1856 | |
| }, | |
| { | |
| "epoch": 0.10025666544445026, | |
| "grad_norm": 6.112730026245117, | |
| "learning_rate": 6.613128491620112e-06, | |
| "loss": 0.6224, | |
| "step": 1914 | |
| }, | |
| { | |
| "epoch": 0.10329474621549421, | |
| "grad_norm": 5.784328460693359, | |
| "learning_rate": 6.815642458100559e-06, | |
| "loss": 0.6289, | |
| "step": 1972 | |
| }, | |
| { | |
| "epoch": 0.10633282698653816, | |
| "grad_norm": 9.69115924835205, | |
| "learning_rate": 7.0181564245810065e-06, | |
| "loss": 0.5952, | |
| "step": 2030 | |
| }, | |
| { | |
| "epoch": 0.10937090775758211, | |
| "grad_norm": 5.509926795959473, | |
| "learning_rate": 7.220670391061453e-06, | |
| "loss": 0.6288, | |
| "step": 2088 | |
| }, | |
| { | |
| "epoch": 0.11240898852862606, | |
| "grad_norm": 6.545931339263916, | |
| "learning_rate": 7.423184357541901e-06, | |
| "loss": 0.5243, | |
| "step": 2146 | |
| }, | |
| { | |
| "epoch": 0.11544706929967, | |
| "grad_norm": 7.0921173095703125, | |
| "learning_rate": 7.625698324022347e-06, | |
| "loss": 0.5527, | |
| "step": 2204 | |
| }, | |
| { | |
| "epoch": 0.11848515007071395, | |
| "grad_norm": 5.278844833374023, | |
| "learning_rate": 7.828212290502794e-06, | |
| "loss": 0.6739, | |
| "step": 2262 | |
| }, | |
| { | |
| "epoch": 0.1215232308417579, | |
| "grad_norm": 5.799619197845459, | |
| "learning_rate": 8.03072625698324e-06, | |
| "loss": 0.5913, | |
| "step": 2320 | |
| }, | |
| { | |
| "epoch": 0.12456131161280184, | |
| "grad_norm": 7.148493766784668, | |
| "learning_rate": 8.233240223463687e-06, | |
| "loss": 0.6117, | |
| "step": 2378 | |
| }, | |
| { | |
| "epoch": 0.1275993923838458, | |
| "grad_norm": 5.538400650024414, | |
| "learning_rate": 8.435754189944135e-06, | |
| "loss": 0.5512, | |
| "step": 2436 | |
| }, | |
| { | |
| "epoch": 0.13063747315488974, | |
| "grad_norm": 5.364485740661621, | |
| "learning_rate": 8.638268156424582e-06, | |
| "loss": 0.5458, | |
| "step": 2494 | |
| }, | |
| { | |
| "epoch": 0.1336755539259337, | |
| "grad_norm": 7.3765549659729, | |
| "learning_rate": 8.840782122905029e-06, | |
| "loss": 0.5847, | |
| "step": 2552 | |
| }, | |
| { | |
| "epoch": 0.13671363469697764, | |
| "grad_norm": 5.364510536193848, | |
| "learning_rate": 9.043296089385475e-06, | |
| "loss": 0.542, | |
| "step": 2610 | |
| }, | |
| { | |
| "epoch": 0.1397517154680216, | |
| "grad_norm": 3.656923770904541, | |
| "learning_rate": 9.245810055865922e-06, | |
| "loss": 0.5528, | |
| "step": 2668 | |
| }, | |
| { | |
| "epoch": 0.14278979623906554, | |
| "grad_norm": 3.117631435394287, | |
| "learning_rate": 9.448324022346369e-06, | |
| "loss": 0.5793, | |
| "step": 2726 | |
| }, | |
| { | |
| "epoch": 0.14582787701010946, | |
| "grad_norm": 6.822358131408691, | |
| "learning_rate": 9.650837988826817e-06, | |
| "loss": 0.5967, | |
| "step": 2784 | |
| }, | |
| { | |
| "epoch": 0.1488659577811534, | |
| "grad_norm": 4.629315376281738, | |
| "learning_rate": 9.853351955307264e-06, | |
| "loss": 0.5945, | |
| "step": 2842 | |
| }, | |
| { | |
| "epoch": 0.15190403855219736, | |
| "grad_norm": 3.6398866176605225, | |
| "learning_rate": 1.005586592178771e-05, | |
| "loss": 0.6005, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 0.1549421193232413, | |
| "grad_norm": 5.811204433441162, | |
| "learning_rate": 1.0258379888268157e-05, | |
| "loss": 0.5065, | |
| "step": 2958 | |
| }, | |
| { | |
| "epoch": 0.15798020009428526, | |
| "grad_norm": 3.8507301807403564, | |
| "learning_rate": 1.0460893854748604e-05, | |
| "loss": 0.5802, | |
| "step": 3016 | |
| }, | |
| { | |
| "epoch": 0.1610182808653292, | |
| "grad_norm": 5.666468143463135, | |
| "learning_rate": 1.066340782122905e-05, | |
| "loss": 0.5342, | |
| "step": 3074 | |
| }, | |
| { | |
| "epoch": 0.16405636163637316, | |
| "grad_norm": 2.025376558303833, | |
| "learning_rate": 1.0865921787709498e-05, | |
| "loss": 0.5271, | |
| "step": 3132 | |
| }, | |
| { | |
| "epoch": 0.1670944424074171, | |
| "grad_norm": 5.698912143707275, | |
| "learning_rate": 1.1068435754189945e-05, | |
| "loss": 0.538, | |
| "step": 3190 | |
| }, | |
| { | |
| "epoch": 0.17013252317846106, | |
| "grad_norm": 4.067931652069092, | |
| "learning_rate": 1.1270949720670392e-05, | |
| "loss": 0.5601, | |
| "step": 3248 | |
| }, | |
| { | |
| "epoch": 0.173170603949505, | |
| "grad_norm": 5.068817138671875, | |
| "learning_rate": 1.1473463687150838e-05, | |
| "loss": 0.5528, | |
| "step": 3306 | |
| }, | |
| { | |
| "epoch": 0.17620868472054896, | |
| "grad_norm": 7.116920471191406, | |
| "learning_rate": 1.1675977653631285e-05, | |
| "loss": 0.5418, | |
| "step": 3364 | |
| }, | |
| { | |
| "epoch": 0.1792467654915929, | |
| "grad_norm": 8.113608360290527, | |
| "learning_rate": 1.1878491620111732e-05, | |
| "loss": 0.5196, | |
| "step": 3422 | |
| }, | |
| { | |
| "epoch": 0.18228484626263686, | |
| "grad_norm": 4.820245265960693, | |
| "learning_rate": 1.208100558659218e-05, | |
| "loss": 0.6029, | |
| "step": 3480 | |
| }, | |
| { | |
| "epoch": 0.18532292703368078, | |
| "grad_norm": 1.1077276468276978, | |
| "learning_rate": 1.2283519553072627e-05, | |
| "loss": 0.5779, | |
| "step": 3538 | |
| }, | |
| { | |
| "epoch": 0.18836100780472473, | |
| "grad_norm": 3.024624824523926, | |
| "learning_rate": 1.2486033519553073e-05, | |
| "loss": 0.5091, | |
| "step": 3596 | |
| }, | |
| { | |
| "epoch": 0.19139908857576868, | |
| "grad_norm": 8.059369087219238, | |
| "learning_rate": 1.268854748603352e-05, | |
| "loss": 0.5012, | |
| "step": 3654 | |
| }, | |
| { | |
| "epoch": 0.19443716934681263, | |
| "grad_norm": 3.9895098209381104, | |
| "learning_rate": 1.2891061452513967e-05, | |
| "loss": 0.578, | |
| "step": 3712 | |
| }, | |
| { | |
| "epoch": 0.19747525011785658, | |
| "grad_norm": 7.111061096191406, | |
| "learning_rate": 1.3093575418994415e-05, | |
| "loss": 0.4958, | |
| "step": 3770 | |
| }, | |
| { | |
| "epoch": 0.20051333088890053, | |
| "grad_norm": 5.541796684265137, | |
| "learning_rate": 1.3296089385474861e-05, | |
| "loss": 0.589, | |
| "step": 3828 | |
| }, | |
| { | |
| "epoch": 0.20355141165994448, | |
| "grad_norm": 4.365527629852295, | |
| "learning_rate": 1.3498603351955308e-05, | |
| "loss": 0.6003, | |
| "step": 3886 | |
| }, | |
| { | |
| "epoch": 0.20658949243098843, | |
| "grad_norm": 4.486824035644531, | |
| "learning_rate": 1.3701117318435755e-05, | |
| "loss": 0.6135, | |
| "step": 3944 | |
| }, | |
| { | |
| "epoch": 0.20962757320203237, | |
| "grad_norm": 5.487951278686523, | |
| "learning_rate": 1.3903631284916201e-05, | |
| "loss": 0.6234, | |
| "step": 4002 | |
| }, | |
| { | |
| "epoch": 0.21266565397307632, | |
| "grad_norm": 2.3850884437561035, | |
| "learning_rate": 1.410614525139665e-05, | |
| "loss": 0.5536, | |
| "step": 4060 | |
| }, | |
| { | |
| "epoch": 0.21570373474412027, | |
| "grad_norm": 3.7957749366760254, | |
| "learning_rate": 1.4305167597765364e-05, | |
| "loss": 0.5211, | |
| "step": 4118 | |
| }, | |
| { | |
| "epoch": 0.21874181551516422, | |
| "grad_norm": 5.272162437438965, | |
| "learning_rate": 1.450768156424581e-05, | |
| "loss": 0.4992, | |
| "step": 4176 | |
| }, | |
| { | |
| "epoch": 0.22177989628620817, | |
| "grad_norm": 4.292142868041992, | |
| "learning_rate": 1.4710195530726259e-05, | |
| "loss": 0.5124, | |
| "step": 4234 | |
| }, | |
| { | |
| "epoch": 0.22481797705725212, | |
| "grad_norm": 4.948460102081299, | |
| "learning_rate": 1.4912709497206705e-05, | |
| "loss": 0.5575, | |
| "step": 4292 | |
| }, | |
| { | |
| "epoch": 0.22785605782829604, | |
| "grad_norm": 3.456590414047241, | |
| "learning_rate": 1.5115223463687152e-05, | |
| "loss": 0.5672, | |
| "step": 4350 | |
| }, | |
| { | |
| "epoch": 0.23089413859934, | |
| "grad_norm": 5.341044902801514, | |
| "learning_rate": 1.53177374301676e-05, | |
| "loss": 0.5106, | |
| "step": 4408 | |
| }, | |
| { | |
| "epoch": 0.23393221937038394, | |
| "grad_norm": 5.5106987953186035, | |
| "learning_rate": 1.5520251396648043e-05, | |
| "loss": 0.6037, | |
| "step": 4466 | |
| }, | |
| { | |
| "epoch": 0.2369703001414279, | |
| "grad_norm": 4.941389560699463, | |
| "learning_rate": 1.5722765363128495e-05, | |
| "loss": 0.5995, | |
| "step": 4524 | |
| }, | |
| { | |
| "epoch": 0.24000838091247184, | |
| "grad_norm": 3.719957113265991, | |
| "learning_rate": 1.592527932960894e-05, | |
| "loss": 0.5531, | |
| "step": 4582 | |
| }, | |
| { | |
| "epoch": 0.2430464616835158, | |
| "grad_norm": 4.435623645782471, | |
| "learning_rate": 1.612779329608939e-05, | |
| "loss": 0.5502, | |
| "step": 4640 | |
| }, | |
| { | |
| "epoch": 0.24608454245455974, | |
| "grad_norm": 4.688556671142578, | |
| "learning_rate": 1.6330307262569833e-05, | |
| "loss": 0.5056, | |
| "step": 4698 | |
| }, | |
| { | |
| "epoch": 0.2491226232256037, | |
| "grad_norm": 5.511931896209717, | |
| "learning_rate": 1.653282122905028e-05, | |
| "loss": 0.5242, | |
| "step": 4756 | |
| }, | |
| { | |
| "epoch": 0.25216070399664764, | |
| "grad_norm": 4.933206558227539, | |
| "learning_rate": 1.673533519553073e-05, | |
| "loss": 0.5115, | |
| "step": 4814 | |
| }, | |
| { | |
| "epoch": 0.2551987847676916, | |
| "grad_norm": 2.942838191986084, | |
| "learning_rate": 1.6937849162011175e-05, | |
| "loss": 0.5941, | |
| "step": 4872 | |
| }, | |
| { | |
| "epoch": 0.25823686553873554, | |
| "grad_norm": 4.0710625648498535, | |
| "learning_rate": 1.7140363128491623e-05, | |
| "loss": 0.4887, | |
| "step": 4930 | |
| }, | |
| { | |
| "epoch": 0.2612749463097795, | |
| "grad_norm": 3.212920665740967, | |
| "learning_rate": 1.7342877094972068e-05, | |
| "loss": 0.5051, | |
| "step": 4988 | |
| }, | |
| { | |
| "epoch": 0.26431302708082344, | |
| "grad_norm": 4.390661716461182, | |
| "learning_rate": 1.7545391061452513e-05, | |
| "loss": 0.5735, | |
| "step": 5046 | |
| }, | |
| { | |
| "epoch": 0.2673511078518674, | |
| "grad_norm": 3.784395217895508, | |
| "learning_rate": 1.7747905027932965e-05, | |
| "loss": 0.4847, | |
| "step": 5104 | |
| }, | |
| { | |
| "epoch": 0.27038918862291134, | |
| "grad_norm": 4.238777160644531, | |
| "learning_rate": 1.795041899441341e-05, | |
| "loss": 0.4303, | |
| "step": 5162 | |
| }, | |
| { | |
| "epoch": 0.2734272693939553, | |
| "grad_norm": 4.616554260253906, | |
| "learning_rate": 1.8152932960893855e-05, | |
| "loss": 0.53, | |
| "step": 5220 | |
| }, | |
| { | |
| "epoch": 0.27646535016499924, | |
| "grad_norm": 3.0670206546783447, | |
| "learning_rate": 1.8355446927374303e-05, | |
| "loss": 0.5089, | |
| "step": 5278 | |
| }, | |
| { | |
| "epoch": 0.2795034309360432, | |
| "grad_norm": 5.144998550415039, | |
| "learning_rate": 1.8557960893854748e-05, | |
| "loss": 0.5352, | |
| "step": 5336 | |
| }, | |
| { | |
| "epoch": 0.28254151170708713, | |
| "grad_norm": 3.9276976585388184, | |
| "learning_rate": 1.8760474860335196e-05, | |
| "loss": 0.5001, | |
| "step": 5394 | |
| }, | |
| { | |
| "epoch": 0.2855795924781311, | |
| "grad_norm": 4.67507266998291, | |
| "learning_rate": 1.8962988826815645e-05, | |
| "loss": 0.5106, | |
| "step": 5452 | |
| }, | |
| { | |
| "epoch": 0.288617673249175, | |
| "grad_norm": 11.027061462402344, | |
| "learning_rate": 1.916550279329609e-05, | |
| "loss": 0.5311, | |
| "step": 5510 | |
| }, | |
| { | |
| "epoch": 0.2916557540202189, | |
| "grad_norm": 4.6279802322387695, | |
| "learning_rate": 1.9368016759776538e-05, | |
| "loss": 0.4722, | |
| "step": 5568 | |
| }, | |
| { | |
| "epoch": 0.2946938347912629, | |
| "grad_norm": 3.2744059562683105, | |
| "learning_rate": 1.9570530726256983e-05, | |
| "loss": 0.5435, | |
| "step": 5626 | |
| }, | |
| { | |
| "epoch": 0.2977319155623068, | |
| "grad_norm": 4.361588478088379, | |
| "learning_rate": 1.977304469273743e-05, | |
| "loss": 0.5182, | |
| "step": 5684 | |
| }, | |
| { | |
| "epoch": 0.3007699963333508, | |
| "grad_norm": 4.725919246673584, | |
| "learning_rate": 1.997555865921788e-05, | |
| "loss": 0.5108, | |
| "step": 5742 | |
| }, | |
| { | |
| "epoch": 0.3038080771043947, | |
| "grad_norm": 4.126678943634033, | |
| "learning_rate": 1.999995169004151e-05, | |
| "loss": 0.4113, | |
| "step": 5800 | |
| }, | |
| { | |
| "epoch": 0.3068461578754387, | |
| "grad_norm": 4.924627780914307, | |
| "learning_rate": 1.999977932757864e-05, | |
| "loss": 0.5911, | |
| "step": 5858 | |
| }, | |
| { | |
| "epoch": 0.3098842386464826, | |
| "grad_norm": 3.836568832397461, | |
| "learning_rate": 1.9999482004657697e-05, | |
| "loss": 0.5589, | |
| "step": 5916 | |
| }, | |
| { | |
| "epoch": 0.3129223194175266, | |
| "grad_norm": 2.1909499168395996, | |
| "learning_rate": 1.999905972499412e-05, | |
| "loss": 0.5321, | |
| "step": 5974 | |
| }, | |
| { | |
| "epoch": 0.3159604001885705, | |
| "grad_norm": 4.15761661529541, | |
| "learning_rate": 1.9998512493864858e-05, | |
| "loss": 0.4898, | |
| "step": 6032 | |
| }, | |
| { | |
| "epoch": 0.31899848095961447, | |
| "grad_norm": 4.483209133148193, | |
| "learning_rate": 1.9997840318108285e-05, | |
| "loss": 0.5339, | |
| "step": 6090 | |
| }, | |
| { | |
| "epoch": 0.3220365617306584, | |
| "grad_norm": 4.208775997161865, | |
| "learning_rate": 1.9997058007847493e-05, | |
| "loss": 0.4381, | |
| "step": 6148 | |
| }, | |
| { | |
| "epoch": 0.32507464250170237, | |
| "grad_norm": 4.7916951179504395, | |
| "learning_rate": 1.999613812340473e-05, | |
| "loss": 0.5622, | |
| "step": 6206 | |
| }, | |
| { | |
| "epoch": 0.3281127232727463, | |
| "grad_norm": 3.041353702545166, | |
| "learning_rate": 1.999509332400555e-05, | |
| "loss": 0.5797, | |
| "step": 6264 | |
| }, | |
| { | |
| "epoch": 0.33115080404379027, | |
| "grad_norm": 2.720505714416504, | |
| "learning_rate": 1.999392362270611e-05, | |
| "loss": 0.5213, | |
| "step": 6322 | |
| }, | |
| { | |
| "epoch": 0.3341888848148342, | |
| "grad_norm": 4.793097496032715, | |
| "learning_rate": 1.999262903412336e-05, | |
| "loss": 0.4593, | |
| "step": 6380 | |
| }, | |
| { | |
| "epoch": 0.33722696558587817, | |
| "grad_norm": 3.890002489089966, | |
| "learning_rate": 1.999120957443491e-05, | |
| "loss": 0.4486, | |
| "step": 6438 | |
| }, | |
| { | |
| "epoch": 0.3402650463569221, | |
| "grad_norm": 3.537182569503784, | |
| "learning_rate": 1.9989665261378772e-05, | |
| "loss": 0.4879, | |
| "step": 6496 | |
| }, | |
| { | |
| "epoch": 0.34330312712796607, | |
| "grad_norm": 1.6273483037948608, | |
| "learning_rate": 1.998799611425319e-05, | |
| "loss": 0.483, | |
| "step": 6554 | |
| }, | |
| { | |
| "epoch": 0.34634120789901, | |
| "grad_norm": 1.544161081314087, | |
| "learning_rate": 1.9986202153916356e-05, | |
| "loss": 0.5295, | |
| "step": 6612 | |
| }, | |
| { | |
| "epoch": 0.34937928867005397, | |
| "grad_norm": 4.516360759735107, | |
| "learning_rate": 1.9984283402786177e-05, | |
| "loss": 0.5544, | |
| "step": 6670 | |
| }, | |
| { | |
| "epoch": 0.3524173694410979, | |
| "grad_norm": 3.9603912830352783, | |
| "learning_rate": 1.998223988483998e-05, | |
| "loss": 0.5005, | |
| "step": 6728 | |
| }, | |
| { | |
| "epoch": 0.35545545021214187, | |
| "grad_norm": 4.063785076141357, | |
| "learning_rate": 1.998007162561423e-05, | |
| "loss": 0.5339, | |
| "step": 6786 | |
| }, | |
| { | |
| "epoch": 0.3584935309831858, | |
| "grad_norm": 4.825593948364258, | |
| "learning_rate": 1.9977778652204192e-05, | |
| "loss": 0.4702, | |
| "step": 6844 | |
| }, | |
| { | |
| "epoch": 0.36153161175422976, | |
| "grad_norm": 0.5379557013511658, | |
| "learning_rate": 1.997536099326359e-05, | |
| "loss": 0.5397, | |
| "step": 6902 | |
| }, | |
| { | |
| "epoch": 0.3645696925252737, | |
| "grad_norm": 3.922156810760498, | |
| "learning_rate": 1.9972818679004273e-05, | |
| "loss": 0.5663, | |
| "step": 6960 | |
| }, | |
| { | |
| "epoch": 0.36760777329631766, | |
| "grad_norm": 3.376941442489624, | |
| "learning_rate": 1.9970198778515604e-05, | |
| "loss": 0.5321, | |
| "step": 7018 | |
| }, | |
| { | |
| "epoch": 0.37064585406736156, | |
| "grad_norm": 4.569897174835205, | |
| "learning_rate": 1.9967409398301135e-05, | |
| "loss": 0.4517, | |
| "step": 7076 | |
| }, | |
| { | |
| "epoch": 0.3736839348384055, | |
| "grad_norm": 4.267284393310547, | |
| "learning_rate": 1.9964495462133642e-05, | |
| "loss": 0.5225, | |
| "step": 7134 | |
| }, | |
| { | |
| "epoch": 0.37672201560944946, | |
| "grad_norm": 4.1275506019592285, | |
| "learning_rate": 1.9961457006426603e-05, | |
| "loss": 0.5007, | |
| "step": 7192 | |
| }, | |
| { | |
| "epoch": 0.3797600963804934, | |
| "grad_norm": 4.481261253356934, | |
| "learning_rate": 1.995829406914954e-05, | |
| "loss": 0.4754, | |
| "step": 7250 | |
| }, | |
| { | |
| "epoch": 0.38279817715153736, | |
| "grad_norm": 2.876922845840454, | |
| "learning_rate": 1.995500668982753e-05, | |
| "loss": 0.4729, | |
| "step": 7308 | |
| }, | |
| { | |
| "epoch": 0.3858362579225813, | |
| "grad_norm": 1.0541763305664062, | |
| "learning_rate": 1.9951594909540727e-05, | |
| "loss": 0.5697, | |
| "step": 7366 | |
| }, | |
| { | |
| "epoch": 0.38887433869362525, | |
| "grad_norm": 3.462268114089966, | |
| "learning_rate": 1.9948058770923837e-05, | |
| "loss": 0.4803, | |
| "step": 7424 | |
| }, | |
| { | |
| "epoch": 0.3919124194646692, | |
| "grad_norm": 3.209782123565674, | |
| "learning_rate": 1.9944398318165578e-05, | |
| "loss": 0.5239, | |
| "step": 7482 | |
| }, | |
| { | |
| "epoch": 0.39495050023571315, | |
| "grad_norm": 4.3836445808410645, | |
| "learning_rate": 1.994061359700815e-05, | |
| "loss": 0.5096, | |
| "step": 7540 | |
| }, | |
| { | |
| "epoch": 0.3979885810067571, | |
| "grad_norm": 2.8133575916290283, | |
| "learning_rate": 1.9936704654746642e-05, | |
| "loss": 0.4546, | |
| "step": 7598 | |
| }, | |
| { | |
| "epoch": 0.40102666177780105, | |
| "grad_norm": 3.709463596343994, | |
| "learning_rate": 1.9932671540228456e-05, | |
| "loss": 0.4882, | |
| "step": 7656 | |
| }, | |
| { | |
| "epoch": 0.404064742548845, | |
| "grad_norm": 3.9174060821533203, | |
| "learning_rate": 1.992851430385269e-05, | |
| "loss": 0.4311, | |
| "step": 7714 | |
| }, | |
| { | |
| "epoch": 0.40710282331988895, | |
| "grad_norm": 2.9282238483428955, | |
| "learning_rate": 1.99242329975695e-05, | |
| "loss": 0.5204, | |
| "step": 7772 | |
| }, | |
| { | |
| "epoch": 0.4101409040909329, | |
| "grad_norm": 4.139567852020264, | |
| "learning_rate": 1.9919827674879473e-05, | |
| "loss": 0.4739, | |
| "step": 7830 | |
| }, | |
| { | |
| "epoch": 0.41317898486197685, | |
| "grad_norm": 3.436636447906494, | |
| "learning_rate": 1.9915298390832935e-05, | |
| "loss": 0.4838, | |
| "step": 7888 | |
| }, | |
| { | |
| "epoch": 0.4162170656330208, | |
| "grad_norm": 3.512646198272705, | |
| "learning_rate": 1.9910645202029272e-05, | |
| "loss": 0.4594, | |
| "step": 7946 | |
| }, | |
| { | |
| "epoch": 0.41925514640406475, | |
| "grad_norm": 3.1627018451690674, | |
| "learning_rate": 1.9905868166616234e-05, | |
| "loss": 0.5628, | |
| "step": 8004 | |
| }, | |
| { | |
| "epoch": 0.4222932271751087, | |
| "grad_norm": 1.1955090761184692, | |
| "learning_rate": 1.990096734428919e-05, | |
| "loss": 0.4587, | |
| "step": 8062 | |
| }, | |
| { | |
| "epoch": 0.42533130794615265, | |
| "grad_norm": 3.0422959327697754, | |
| "learning_rate": 1.989594279629039e-05, | |
| "loss": 0.5523, | |
| "step": 8120 | |
| }, | |
| { | |
| "epoch": 0.4283693887171966, | |
| "grad_norm": 3.0934972763061523, | |
| "learning_rate": 1.98907945854082e-05, | |
| "loss": 0.4855, | |
| "step": 8178 | |
| }, | |
| { | |
| "epoch": 0.43140746948824055, | |
| "grad_norm": 3.9694907665252686, | |
| "learning_rate": 1.9885522775976324e-05, | |
| "loss": 0.543, | |
| "step": 8236 | |
| }, | |
| { | |
| "epoch": 0.4344455502592845, | |
| "grad_norm": 4.713873386383057, | |
| "learning_rate": 1.9880127433872983e-05, | |
| "loss": 0.4901, | |
| "step": 8294 | |
| }, | |
| { | |
| "epoch": 0.43748363103032845, | |
| "grad_norm": 2.3840503692626953, | |
| "learning_rate": 1.987460862652011e-05, | |
| "loss": 0.4265, | |
| "step": 8352 | |
| }, | |
| { | |
| "epoch": 0.4405217118013724, | |
| "grad_norm": 4.123522758483887, | |
| "learning_rate": 1.9868966422882496e-05, | |
| "loss": 0.4237, | |
| "step": 8410 | |
| }, | |
| { | |
| "epoch": 0.44355979257241634, | |
| "grad_norm": 3.1017978191375732, | |
| "learning_rate": 1.986320089346693e-05, | |
| "loss": 0.4106, | |
| "step": 8468 | |
| }, | |
| { | |
| "epoch": 0.4465978733434603, | |
| "grad_norm": 2.8059699535369873, | |
| "learning_rate": 1.9857414684867994e-05, | |
| "loss": 0.4641, | |
| "step": 8526 | |
| }, | |
| { | |
| "epoch": 0.44963595411450424, | |
| "grad_norm": 4.327667236328125, | |
| "learning_rate": 1.985140484474396e-05, | |
| "loss": 0.4337, | |
| "step": 8584 | |
| }, | |
| { | |
| "epoch": 0.45267403488554814, | |
| "grad_norm": 0.9626501798629761, | |
| "learning_rate": 1.9845271898297104e-05, | |
| "loss": 0.4932, | |
| "step": 8642 | |
| }, | |
| { | |
| "epoch": 0.4557121156565921, | |
| "grad_norm": 1.3852657079696655, | |
| "learning_rate": 1.9839015922166693e-05, | |
| "loss": 0.4866, | |
| "step": 8700 | |
| }, | |
| { | |
| "epoch": 0.45875019642763604, | |
| "grad_norm": 3.2711095809936523, | |
| "learning_rate": 1.983263699452942e-05, | |
| "loss": 0.4825, | |
| "step": 8758 | |
| }, | |
| { | |
| "epoch": 0.46178827719868, | |
| "grad_norm": 4.84442138671875, | |
| "learning_rate": 1.9826135195098416e-05, | |
| "loss": 0.4559, | |
| "step": 8816 | |
| }, | |
| { | |
| "epoch": 0.46482635796972394, | |
| "grad_norm": 1.2177191972732544, | |
| "learning_rate": 1.9819510605122255e-05, | |
| "loss": 0.4795, | |
| "step": 8874 | |
| }, | |
| { | |
| "epoch": 0.4678644387407679, | |
| "grad_norm": 3.1849379539489746, | |
| "learning_rate": 1.981276330738395e-05, | |
| "loss": 0.474, | |
| "step": 8932 | |
| }, | |
| { | |
| "epoch": 0.47090251951181183, | |
| "grad_norm": 4.420878887176514, | |
| "learning_rate": 1.9805893386199892e-05, | |
| "loss": 0.4876, | |
| "step": 8990 | |
| }, | |
| { | |
| "epoch": 0.4739406002828558, | |
| "grad_norm": 2.714984893798828, | |
| "learning_rate": 1.9798900927418835e-05, | |
| "loss": 0.4491, | |
| "step": 9048 | |
| }, | |
| { | |
| "epoch": 0.47697868105389973, | |
| "grad_norm": 2.185593843460083, | |
| "learning_rate": 1.9791786018420792e-05, | |
| "loss": 0.4808, | |
| "step": 9106 | |
| }, | |
| { | |
| "epoch": 0.4800167618249437, | |
| "grad_norm": 3.3326094150543213, | |
| "learning_rate": 1.9784548748115946e-05, | |
| "loss": 0.4502, | |
| "step": 9164 | |
| }, | |
| { | |
| "epoch": 0.48305484259598763, | |
| "grad_norm": 3.4437661170959473, | |
| "learning_rate": 1.977718920694356e-05, | |
| "loss": 0.5049, | |
| "step": 9222 | |
| }, | |
| { | |
| "epoch": 0.4860929233670316, | |
| "grad_norm": 5.456835746765137, | |
| "learning_rate": 1.9769707486870825e-05, | |
| "loss": 0.4791, | |
| "step": 9280 | |
| }, | |
| { | |
| "epoch": 0.48913100413807553, | |
| "grad_norm": 5.109498023986816, | |
| "learning_rate": 1.9762103681391724e-05, | |
| "loss": 0.5396, | |
| "step": 9338 | |
| }, | |
| { | |
| "epoch": 0.4921690849091195, | |
| "grad_norm": 4.347654342651367, | |
| "learning_rate": 1.9754377885525854e-05, | |
| "loss": 0.4433, | |
| "step": 9396 | |
| }, | |
| { | |
| "epoch": 0.49520716568016343, | |
| "grad_norm": 3.837158203125, | |
| "learning_rate": 1.9746530195817243e-05, | |
| "loss": 0.4791, | |
| "step": 9454 | |
| }, | |
| { | |
| "epoch": 0.4982452464512074, | |
| "grad_norm": 3.8552966117858887, | |
| "learning_rate": 1.9738699146560578e-05, | |
| "loss": 0.4979, | |
| "step": 9512 | |
| }, | |
| { | |
| "epoch": 0.5012833272222513, | |
| "grad_norm": 1.76126229763031, | |
| "learning_rate": 1.973061006224811e-05, | |
| "loss": 0.4716, | |
| "step": 9570 | |
| }, | |
| { | |
| "epoch": 0.5043214079932953, | |
| "grad_norm": 5.198726654052734, | |
| "learning_rate": 1.9722399381103267e-05, | |
| "loss": 0.4801, | |
| "step": 9628 | |
| }, | |
| { | |
| "epoch": 0.5073594887643392, | |
| "grad_norm": 3.2313361167907715, | |
| "learning_rate": 1.9714067205729356e-05, | |
| "loss": 0.4592, | |
| "step": 9686 | |
| }, | |
| { | |
| "epoch": 0.5103975695353832, | |
| "grad_norm": 0.9486598968505859, | |
| "learning_rate": 1.9705613640247928e-05, | |
| "loss": 0.4399, | |
| "step": 9744 | |
| }, | |
| { | |
| "epoch": 0.5134356503064271, | |
| "grad_norm": 3.271669864654541, | |
| "learning_rate": 1.9697038790297442e-05, | |
| "loss": 0.4722, | |
| "step": 9802 | |
| }, | |
| { | |
| "epoch": 0.5164737310774711, | |
| "grad_norm": 5.1848039627075195, | |
| "learning_rate": 1.9688342763031993e-05, | |
| "loss": 0.4336, | |
| "step": 9860 | |
| }, | |
| { | |
| "epoch": 0.519511811848515, | |
| "grad_norm": 4.134024620056152, | |
| "learning_rate": 1.967952566711993e-05, | |
| "loss": 0.4534, | |
| "step": 9918 | |
| }, | |
| { | |
| "epoch": 0.522549892619559, | |
| "grad_norm": 3.0904159545898438, | |
| "learning_rate": 1.9670587612742515e-05, | |
| "loss": 0.4461, | |
| "step": 9976 | |
| }, | |
| { | |
| "epoch": 0.5255879733906029, | |
| "grad_norm": 3.3785481452941895, | |
| "learning_rate": 1.9661528711592553e-05, | |
| "loss": 0.4906, | |
| "step": 10034 | |
| }, | |
| { | |
| "epoch": 0.5286260541616469, | |
| "grad_norm": 4.755141258239746, | |
| "learning_rate": 1.9652349076872986e-05, | |
| "loss": 0.4519, | |
| "step": 10092 | |
| }, | |
| { | |
| "epoch": 0.5316641349326908, | |
| "grad_norm": 4.502477645874023, | |
| "learning_rate": 1.9643048823295482e-05, | |
| "loss": 0.5454, | |
| "step": 10150 | |
| }, | |
| { | |
| "epoch": 0.5347022157037348, | |
| "grad_norm": 2.3361642360687256, | |
| "learning_rate": 1.9633628067078997e-05, | |
| "loss": 0.5069, | |
| "step": 10208 | |
| }, | |
| { | |
| "epoch": 0.5377402964747787, | |
| "grad_norm": 3.6974456310272217, | |
| "learning_rate": 1.9624086925948333e-05, | |
| "loss": 0.4604, | |
| "step": 10266 | |
| }, | |
| { | |
| "epoch": 0.5407783772458227, | |
| "grad_norm": 3.7012462615966797, | |
| "learning_rate": 1.9614425519132654e-05, | |
| "loss": 0.5368, | |
| "step": 10324 | |
| }, | |
| { | |
| "epoch": 0.5438164580168666, | |
| "grad_norm": 0.9825100898742676, | |
| "learning_rate": 1.9604643967364013e-05, | |
| "loss": 0.4917, | |
| "step": 10382 | |
| }, | |
| { | |
| "epoch": 0.5468545387879106, | |
| "grad_norm": 2.8980348110198975, | |
| "learning_rate": 1.959474239287582e-05, | |
| "loss": 0.4571, | |
| "step": 10440 | |
| }, | |
| { | |
| "epoch": 0.5498926195589545, | |
| "grad_norm": 6.615330696105957, | |
| "learning_rate": 1.9584720919401342e-05, | |
| "loss": 0.4949, | |
| "step": 10498 | |
| }, | |
| { | |
| "epoch": 0.5529307003299985, | |
| "grad_norm": 4.613067626953125, | |
| "learning_rate": 1.9574579672172126e-05, | |
| "loss": 0.4072, | |
| "step": 10556 | |
| }, | |
| { | |
| "epoch": 0.5559687811010424, | |
| "grad_norm": 3.3984858989715576, | |
| "learning_rate": 1.9564318777916456e-05, | |
| "loss": 0.412, | |
| "step": 10614 | |
| }, | |
| { | |
| "epoch": 0.5590068618720864, | |
| "grad_norm": 5.624422550201416, | |
| "learning_rate": 1.9553938364857775e-05, | |
| "loss": 0.4781, | |
| "step": 10672 | |
| }, | |
| { | |
| "epoch": 0.5620449426431303, | |
| "grad_norm": 4.486995697021484, | |
| "learning_rate": 1.954343856271306e-05, | |
| "loss": 0.4426, | |
| "step": 10730 | |
| }, | |
| { | |
| "epoch": 0.5650830234141743, | |
| "grad_norm": 2.862964391708374, | |
| "learning_rate": 1.953281950269121e-05, | |
| "loss": 0.506, | |
| "step": 10788 | |
| }, | |
| { | |
| "epoch": 0.5681211041852182, | |
| "grad_norm": 0.556151807308197, | |
| "learning_rate": 1.9522267467101615e-05, | |
| "loss": 0.4095, | |
| "step": 10846 | |
| }, | |
| { | |
| "epoch": 0.5711591849562622, | |
| "grad_norm": 3.035536527633667, | |
| "learning_rate": 1.9511412341335318e-05, | |
| "loss": 0.517, | |
| "step": 10904 | |
| }, | |
| { | |
| "epoch": 0.574197265727306, | |
| "grad_norm": 3.8603086471557617, | |
| "learning_rate": 1.950043835790185e-05, | |
| "loss": 0.4344, | |
| "step": 10962 | |
| }, | |
| { | |
| "epoch": 0.57723534649835, | |
| "grad_norm": 4.4469499588012695, | |
| "learning_rate": 1.9489345653935635e-05, | |
| "loss": 0.4774, | |
| "step": 11020 | |
| }, | |
| { | |
| "epoch": 0.5802734272693939, | |
| "grad_norm": 3.4457666873931885, | |
| "learning_rate": 1.9478134368054676e-05, | |
| "loss": 0.4274, | |
| "step": 11078 | |
| }, | |
| { | |
| "epoch": 0.5833115080404379, | |
| "grad_norm": 3.056290864944458, | |
| "learning_rate": 1.9466804640358798e-05, | |
| "loss": 0.4432, | |
| "step": 11136 | |
| }, | |
| { | |
| "epoch": 0.5863495888114818, | |
| "grad_norm": 4.071867942810059, | |
| "learning_rate": 1.9455356612427928e-05, | |
| "loss": 0.4344, | |
| "step": 11194 | |
| }, | |
| { | |
| "epoch": 0.5893876695825258, | |
| "grad_norm": 2.0395846366882324, | |
| "learning_rate": 1.9443790427320303e-05, | |
| "loss": 0.4714, | |
| "step": 11252 | |
| }, | |
| { | |
| "epoch": 0.5924257503535697, | |
| "grad_norm": 4.563007354736328, | |
| "learning_rate": 1.9432106229570685e-05, | |
| "loss": 0.5157, | |
| "step": 11310 | |
| }, | |
| { | |
| "epoch": 0.5954638311246137, | |
| "grad_norm": 3.7986621856689453, | |
| "learning_rate": 1.9420304165188574e-05, | |
| "loss": 0.4977, | |
| "step": 11368 | |
| }, | |
| { | |
| "epoch": 0.5985019118956576, | |
| "grad_norm": 5.301217555999756, | |
| "learning_rate": 1.9408384381656358e-05, | |
| "loss": 0.4662, | |
| "step": 11426 | |
| }, | |
| { | |
| "epoch": 0.6015399926667016, | |
| "grad_norm": 2.3288731575012207, | |
| "learning_rate": 1.939634702792749e-05, | |
| "loss": 0.4493, | |
| "step": 11484 | |
| }, | |
| { | |
| "epoch": 0.6045780734377455, | |
| "grad_norm": 3.7128169536590576, | |
| "learning_rate": 1.9384192254424606e-05, | |
| "loss": 0.4865, | |
| "step": 11542 | |
| }, | |
| { | |
| "epoch": 0.6076161542087894, | |
| "grad_norm": 4.314477920532227, | |
| "learning_rate": 1.9371920213037665e-05, | |
| "loss": 0.4715, | |
| "step": 11600 | |
| }, | |
| { | |
| "epoch": 0.6106542349798334, | |
| "grad_norm": 2.6989047527313232, | |
| "learning_rate": 1.935953105712205e-05, | |
| "loss": 0.4345, | |
| "step": 11658 | |
| }, | |
| { | |
| "epoch": 0.6136923157508773, | |
| "grad_norm": 3.5463671684265137, | |
| "learning_rate": 1.9347024941496628e-05, | |
| "loss": 0.4611, | |
| "step": 11716 | |
| }, | |
| { | |
| "epoch": 0.6167303965219213, | |
| "grad_norm": 4.914857387542725, | |
| "learning_rate": 1.9334402022441848e-05, | |
| "loss": 0.4952, | |
| "step": 11774 | |
| }, | |
| { | |
| "epoch": 0.6197684772929652, | |
| "grad_norm": 1.5133031606674194, | |
| "learning_rate": 1.932188309270537e-05, | |
| "loss": 0.4401, | |
| "step": 11832 | |
| }, | |
| { | |
| "epoch": 0.6228065580640092, | |
| "grad_norm": 2.30916428565979, | |
| "learning_rate": 1.9309029048500578e-05, | |
| "loss": 0.4177, | |
| "step": 11890 | |
| }, | |
| { | |
| "epoch": 0.6258446388350531, | |
| "grad_norm": 2.825598955154419, | |
| "learning_rate": 1.929605867567532e-05, | |
| "loss": 0.4529, | |
| "step": 11948 | |
| }, | |
| { | |
| "epoch": 0.6288827196060971, | |
| "grad_norm": 5.285458087921143, | |
| "learning_rate": 1.9282972136311554e-05, | |
| "loss": 0.4806, | |
| "step": 12006 | |
| }, | |
| { | |
| "epoch": 0.631920800377141, | |
| "grad_norm": 2.597923755645752, | |
| "learning_rate": 1.9269769593942872e-05, | |
| "loss": 0.4566, | |
| "step": 12064 | |
| }, | |
| { | |
| "epoch": 0.634958881148185, | |
| "grad_norm": 2.780212640762329, | |
| "learning_rate": 1.9256451213552497e-05, | |
| "loss": 0.4725, | |
| "step": 12122 | |
| }, | |
| { | |
| "epoch": 0.6379969619192289, | |
| "grad_norm": 4.1638031005859375, | |
| "learning_rate": 1.9243017161571194e-05, | |
| "loss": 0.463, | |
| "step": 12180 | |
| }, | |
| { | |
| "epoch": 0.6410350426902729, | |
| "grad_norm": 4.174670219421387, | |
| "learning_rate": 1.9229467605875196e-05, | |
| "loss": 0.5236, | |
| "step": 12238 | |
| }, | |
| { | |
| "epoch": 0.6440731234613168, | |
| "grad_norm": 1.9128369092941284, | |
| "learning_rate": 1.9215802715784096e-05, | |
| "loss": 0.4621, | |
| "step": 12296 | |
| }, | |
| { | |
| "epoch": 0.6471112042323608, | |
| "grad_norm": 4.490901947021484, | |
| "learning_rate": 1.9202022662058773e-05, | |
| "loss": 0.4517, | |
| "step": 12354 | |
| }, | |
| { | |
| "epoch": 0.6501492850034047, | |
| "grad_norm": 4.426553726196289, | |
| "learning_rate": 1.9188127616899202e-05, | |
| "loss": 0.488, | |
| "step": 12412 | |
| }, | |
| { | |
| "epoch": 0.6531873657744487, | |
| "grad_norm": 3.694254159927368, | |
| "learning_rate": 1.917411775394233e-05, | |
| "loss": 0.4705, | |
| "step": 12470 | |
| }, | |
| { | |
| "epoch": 0.6562254465454926, | |
| "grad_norm": 2.3134400844573975, | |
| "learning_rate": 1.9159993248259916e-05, | |
| "loss": 0.4402, | |
| "step": 12528 | |
| }, | |
| { | |
| "epoch": 0.6592635273165366, | |
| "grad_norm": 2.868987798690796, | |
| "learning_rate": 1.9145754276356323e-05, | |
| "loss": 0.4085, | |
| "step": 12586 | |
| }, | |
| { | |
| "epoch": 0.6623016080875805, | |
| "grad_norm": 3.815828323364258, | |
| "learning_rate": 1.9131401016166326e-05, | |
| "loss": 0.5569, | |
| "step": 12644 | |
| }, | |
| { | |
| "epoch": 0.6653396888586245, | |
| "grad_norm": 2.865863800048828, | |
| "learning_rate": 1.911693364705287e-05, | |
| "loss": 0.4515, | |
| "step": 12702 | |
| }, | |
| { | |
| "epoch": 0.6683777696296684, | |
| "grad_norm": 3.7862603664398193, | |
| "learning_rate": 1.9102352349804865e-05, | |
| "loss": 0.4685, | |
| "step": 12760 | |
| }, | |
| { | |
| "epoch": 0.6714158504007124, | |
| "grad_norm": 2.3399360179901123, | |
| "learning_rate": 1.9087657306634884e-05, | |
| "loss": 0.5087, | |
| "step": 12818 | |
| }, | |
| { | |
| "epoch": 0.6744539311717563, | |
| "grad_norm": 3.208674430847168, | |
| "learning_rate": 1.9072848701176905e-05, | |
| "loss": 0.4322, | |
| "step": 12876 | |
| }, | |
| { | |
| "epoch": 0.6774920119428003, | |
| "grad_norm": 1.2508207559585571, | |
| "learning_rate": 1.9057926718484036e-05, | |
| "loss": 0.39, | |
| "step": 12934 | |
| }, | |
| { | |
| "epoch": 0.6805300927138442, | |
| "grad_norm": 3.029885768890381, | |
| "learning_rate": 1.9042891545026164e-05, | |
| "loss": 0.4881, | |
| "step": 12992 | |
| }, | |
| { | |
| "epoch": 0.6835681734848882, | |
| "grad_norm": 1.723024606704712, | |
| "learning_rate": 1.9028005500450692e-05, | |
| "loss": 0.4016, | |
| "step": 13050 | |
| }, | |
| { | |
| "epoch": 0.6866062542559321, | |
| "grad_norm": 3.0587244033813477, | |
| "learning_rate": 1.9012746453978195e-05, | |
| "loss": 0.451, | |
| "step": 13108 | |
| }, | |
| { | |
| "epoch": 0.6896443350269761, | |
| "grad_norm": 3.979196548461914, | |
| "learning_rate": 1.899737478132781e-05, | |
| "loss": 0.4584, | |
| "step": 13166 | |
| }, | |
| { | |
| "epoch": 0.69268241579802, | |
| "grad_norm": 3.2428181171417236, | |
| "learning_rate": 1.8981890674588902e-05, | |
| "loss": 0.4419, | |
| "step": 13224 | |
| }, | |
| { | |
| "epoch": 0.695720496569064, | |
| "grad_norm": 1.9672743082046509, | |
| "learning_rate": 1.8966294327255843e-05, | |
| "loss": 0.4463, | |
| "step": 13282 | |
| }, | |
| { | |
| "epoch": 0.6987585773401079, | |
| "grad_norm": 3.543287754058838, | |
| "learning_rate": 1.895058593422561e-05, | |
| "loss": 0.5232, | |
| "step": 13340 | |
| }, | |
| { | |
| "epoch": 0.7017966581111519, | |
| "grad_norm": 2.751725435256958, | |
| "learning_rate": 1.8934765691795337e-05, | |
| "loss": 0.4627, | |
| "step": 13398 | |
| }, | |
| { | |
| "epoch": 0.7048347388821958, | |
| "grad_norm": 3.9089314937591553, | |
| "learning_rate": 1.8918833797659854e-05, | |
| "loss": 0.4701, | |
| "step": 13456 | |
| }, | |
| { | |
| "epoch": 0.7078728196532398, | |
| "grad_norm": 2.623382806777954, | |
| "learning_rate": 1.890279045090924e-05, | |
| "loss": 0.4627, | |
| "step": 13514 | |
| }, | |
| { | |
| "epoch": 0.7109109004242837, | |
| "grad_norm": 3.44734263420105, | |
| "learning_rate": 1.8886635852026307e-05, | |
| "loss": 0.5063, | |
| "step": 13572 | |
| }, | |
| { | |
| "epoch": 0.7139489811953277, | |
| "grad_norm": 4.096603870391846, | |
| "learning_rate": 1.887037020288412e-05, | |
| "loss": 0.4205, | |
| "step": 13630 | |
| }, | |
| { | |
| "epoch": 0.7169870619663716, | |
| "grad_norm": 3.9694747924804688, | |
| "learning_rate": 1.8853993706743465e-05, | |
| "loss": 0.479, | |
| "step": 13688 | |
| }, | |
| { | |
| "epoch": 0.7200251427374156, | |
| "grad_norm": 2.2461936473846436, | |
| "learning_rate": 1.88375065682503e-05, | |
| "loss": 0.4222, | |
| "step": 13746 | |
| }, | |
| { | |
| "epoch": 0.7230632235084595, | |
| "grad_norm": 4.268979549407959, | |
| "learning_rate": 1.882090899343321e-05, | |
| "loss": 0.4013, | |
| "step": 13804 | |
| }, | |
| { | |
| "epoch": 0.7261013042795035, | |
| "grad_norm": 2.9464776515960693, | |
| "learning_rate": 1.8804201189700833e-05, | |
| "loss": 0.5184, | |
| "step": 13862 | |
| }, | |
| { | |
| "epoch": 0.7291393850505474, | |
| "grad_norm": 3.1404519081115723, | |
| "learning_rate": 1.8787383365839248e-05, | |
| "loss": 0.4451, | |
| "step": 13920 | |
| }, | |
| { | |
| "epoch": 0.7321774658215914, | |
| "grad_norm": 3.048670530319214, | |
| "learning_rate": 1.8770455732009393e-05, | |
| "loss": 0.457, | |
| "step": 13978 | |
| }, | |
| { | |
| "epoch": 0.7352155465926353, | |
| "grad_norm": 3.074151039123535, | |
| "learning_rate": 1.8753418499744426e-05, | |
| "loss": 0.4711, | |
| "step": 14036 | |
| }, | |
| { | |
| "epoch": 0.7382536273636792, | |
| "grad_norm": 4.1698150634765625, | |
| "learning_rate": 1.873627188194708e-05, | |
| "loss": 0.4281, | |
| "step": 14094 | |
| }, | |
| { | |
| "epoch": 0.7412917081347231, | |
| "grad_norm": 2.6520071029663086, | |
| "learning_rate": 1.8719016092887e-05, | |
| "loss": 0.497, | |
| "step": 14152 | |
| }, | |
| { | |
| "epoch": 0.7443297889057671, | |
| "grad_norm": 1.3818339109420776, | |
| "learning_rate": 1.870165134819808e-05, | |
| "loss": 0.4234, | |
| "step": 14210 | |
| }, | |
| { | |
| "epoch": 0.747367869676811, | |
| "grad_norm": 3.5460736751556396, | |
| "learning_rate": 1.868417786487575e-05, | |
| "loss": 0.4444, | |
| "step": 14268 | |
| }, | |
| { | |
| "epoch": 0.750405950447855, | |
| "grad_norm": 2.8102331161499023, | |
| "learning_rate": 1.8666595861274283e-05, | |
| "loss": 0.4159, | |
| "step": 14326 | |
| }, | |
| { | |
| "epoch": 0.7534440312188989, | |
| "grad_norm": 3.3770508766174316, | |
| "learning_rate": 1.8648905557104046e-05, | |
| "loss": 0.4357, | |
| "step": 14384 | |
| }, | |
| { | |
| "epoch": 0.7564821119899429, | |
| "grad_norm": 2.95613169670105, | |
| "learning_rate": 1.863110717342876e-05, | |
| "loss": 0.4627, | |
| "step": 14442 | |
| }, | |
| { | |
| "epoch": 0.7595201927609868, | |
| "grad_norm": 2.80786395072937, | |
| "learning_rate": 1.8613200932662764e-05, | |
| "loss": 0.4331, | |
| "step": 14500 | |
| }, | |
| { | |
| "epoch": 0.7625582735320308, | |
| "grad_norm": 2.9433181285858154, | |
| "learning_rate": 1.8595187058568197e-05, | |
| "loss": 0.5087, | |
| "step": 14558 | |
| }, | |
| { | |
| "epoch": 0.7655963543030747, | |
| "grad_norm": 2.6625008583068848, | |
| "learning_rate": 1.8577065776252218e-05, | |
| "loss": 0.5018, | |
| "step": 14616 | |
| }, | |
| { | |
| "epoch": 0.7686344350741187, | |
| "grad_norm": 3.8713533878326416, | |
| "learning_rate": 1.8558837312164198e-05, | |
| "loss": 0.4454, | |
| "step": 14674 | |
| }, | |
| { | |
| "epoch": 0.7716725158451626, | |
| "grad_norm": 3.236130475997925, | |
| "learning_rate": 1.8540501894092894e-05, | |
| "loss": 0.4463, | |
| "step": 14732 | |
| }, | |
| { | |
| "epoch": 0.7747105966162066, | |
| "grad_norm": 1.9471006393432617, | |
| "learning_rate": 1.8522059751163578e-05, | |
| "loss": 0.4615, | |
| "step": 14790 | |
| }, | |
| { | |
| "epoch": 0.7777486773872505, | |
| "grad_norm": 1.1234129667282104, | |
| "learning_rate": 1.85035111138352e-05, | |
| "loss": 0.3841, | |
| "step": 14848 | |
| }, | |
| { | |
| "epoch": 0.7807867581582945, | |
| "grad_norm": 3.155194044113159, | |
| "learning_rate": 1.8484856213897496e-05, | |
| "loss": 0.4932, | |
| "step": 14906 | |
| }, | |
| { | |
| "epoch": 0.7838248389293384, | |
| "grad_norm": 1.2532273530960083, | |
| "learning_rate": 1.8466095284468103e-05, | |
| "loss": 0.427, | |
| "step": 14964 | |
| }, | |
| { | |
| "epoch": 0.7868629197003824, | |
| "grad_norm": 3.315812349319458, | |
| "learning_rate": 1.8447228559989618e-05, | |
| "loss": 0.4406, | |
| "step": 15022 | |
| }, | |
| { | |
| "epoch": 0.7899010004714263, | |
| "grad_norm": 2.3999452590942383, | |
| "learning_rate": 1.842858427754608e-05, | |
| "loss": 0.4413, | |
| "step": 15080 | |
| }, | |
| { | |
| "epoch": 0.7929390812424703, | |
| "grad_norm": 2.896650791168213, | |
| "learning_rate": 1.8409508485466538e-05, | |
| "loss": 0.4068, | |
| "step": 15138 | |
| }, | |
| { | |
| "epoch": 0.7959771620135142, | |
| "grad_norm": 3.3152272701263428, | |
| "learning_rate": 1.8390327605464747e-05, | |
| "loss": 0.4708, | |
| "step": 15196 | |
| }, | |
| { | |
| "epoch": 0.7990152427845582, | |
| "grad_norm": 2.573716163635254, | |
| "learning_rate": 1.8371041877231145e-05, | |
| "loss": 0.4506, | |
| "step": 15254 | |
| }, | |
| { | |
| "epoch": 0.8020533235556021, | |
| "grad_norm": 1.0098395347595215, | |
| "learning_rate": 1.8351651541766398e-05, | |
| "loss": 0.4614, | |
| "step": 15312 | |
| }, | |
| { | |
| "epoch": 0.805091404326646, | |
| "grad_norm": 2.7257494926452637, | |
| "learning_rate": 1.8332156841378376e-05, | |
| "loss": 0.481, | |
| "step": 15370 | |
| }, | |
| { | |
| "epoch": 0.80812948509769, | |
| "grad_norm": 3.291948080062866, | |
| "learning_rate": 1.8312558019679113e-05, | |
| "loss": 0.4872, | |
| "step": 15428 | |
| }, | |
| { | |
| "epoch": 0.811167565868734, | |
| "grad_norm": 0.6372181177139282, | |
| "learning_rate": 1.82928553215818e-05, | |
| "loss": 0.4664, | |
| "step": 15486 | |
| }, | |
| { | |
| "epoch": 0.8142056466397779, | |
| "grad_norm": 2.14487361907959, | |
| "learning_rate": 1.8273048993297682e-05, | |
| "loss": 0.4443, | |
| "step": 15544 | |
| }, | |
| { | |
| "epoch": 0.8172437274108219, | |
| "grad_norm": 1.6703099012374878, | |
| "learning_rate": 1.8253139282333005e-05, | |
| "loss": 0.4683, | |
| "step": 15602 | |
| }, | |
| { | |
| "epoch": 0.8202818081818658, | |
| "grad_norm": 3.7610647678375244, | |
| "learning_rate": 1.8233126437485925e-05, | |
| "loss": 0.4299, | |
| "step": 15660 | |
| }, | |
| { | |
| "epoch": 0.8233198889529098, | |
| "grad_norm": 3.429608106613159, | |
| "learning_rate": 1.821301070884338e-05, | |
| "loss": 0.3976, | |
| "step": 15718 | |
| }, | |
| { | |
| "epoch": 0.8263579697239537, | |
| "grad_norm": 3.0198211669921875, | |
| "learning_rate": 1.819279234777799e-05, | |
| "loss": 0.407, | |
| "step": 15776 | |
| }, | |
| { | |
| "epoch": 0.8293960504949976, | |
| "grad_norm": 3.1796703338623047, | |
| "learning_rate": 1.817247160694489e-05, | |
| "loss": 0.4235, | |
| "step": 15834 | |
| }, | |
| { | |
| "epoch": 0.8324341312660416, | |
| "grad_norm": 2.235328197479248, | |
| "learning_rate": 1.81520487402786e-05, | |
| "loss": 0.403, | |
| "step": 15892 | |
| }, | |
| { | |
| "epoch": 0.8354722120370855, | |
| "grad_norm": 3.7409324645996094, | |
| "learning_rate": 1.8131524002989816e-05, | |
| "loss": 0.4325, | |
| "step": 15950 | |
| }, | |
| { | |
| "epoch": 0.8385102928081295, | |
| "grad_norm": 2.7824206352233887, | |
| "learning_rate": 1.811089765156227e-05, | |
| "loss": 0.432, | |
| "step": 16008 | |
| }, | |
| { | |
| "epoch": 0.8415483735791734, | |
| "grad_norm": 1.6856441497802734, | |
| "learning_rate": 1.8090528175270648e-05, | |
| "loss": 0.4156, | |
| "step": 16066 | |
| }, | |
| { | |
| "epoch": 0.8445864543502174, | |
| "grad_norm": 2.9528396129608154, | |
| "learning_rate": 1.8069701110949214e-05, | |
| "loss": 0.4486, | |
| "step": 16124 | |
| }, | |
| { | |
| "epoch": 0.8476245351212613, | |
| "grad_norm": 1.5881803035736084, | |
| "learning_rate": 1.8048773205047752e-05, | |
| "loss": 0.4133, | |
| "step": 16182 | |
| }, | |
| { | |
| "epoch": 0.8506626158923053, | |
| "grad_norm": 5.50068473815918, | |
| "learning_rate": 1.8027744719088103e-05, | |
| "loss": 0.4553, | |
| "step": 16240 | |
| }, | |
| { | |
| "epoch": 0.8537006966633492, | |
| "grad_norm": 3.2943098545074463, | |
| "learning_rate": 1.800661591584899e-05, | |
| "loss": 0.4428, | |
| "step": 16298 | |
| }, | |
| { | |
| "epoch": 0.8567387774343932, | |
| "grad_norm": 1.3706376552581787, | |
| "learning_rate": 1.798538705936273e-05, | |
| "loss": 0.4779, | |
| "step": 16356 | |
| }, | |
| { | |
| "epoch": 0.8597768582054371, | |
| "grad_norm": 2.18271541595459, | |
| "learning_rate": 1.796405841491194e-05, | |
| "loss": 0.4687, | |
| "step": 16414 | |
| }, | |
| { | |
| "epoch": 0.8628149389764811, | |
| "grad_norm": 2.5106441974639893, | |
| "learning_rate": 1.794263024902622e-05, | |
| "loss": 0.4016, | |
| "step": 16472 | |
| }, | |
| { | |
| "epoch": 0.865853019747525, | |
| "grad_norm": 2.757732629776001, | |
| "learning_rate": 1.7921102829478832e-05, | |
| "loss": 0.4948, | |
| "step": 16530 | |
| }, | |
| { | |
| "epoch": 0.868891100518569, | |
| "grad_norm": 0.37621229887008667, | |
| "learning_rate": 1.7899476425283318e-05, | |
| "loss": 0.4304, | |
| "step": 16588 | |
| }, | |
| { | |
| "epoch": 0.8719291812896129, | |
| "grad_norm": 4.135168552398682, | |
| "learning_rate": 1.787775130669019e-05, | |
| "loss": 0.4195, | |
| "step": 16646 | |
| }, | |
| { | |
| "epoch": 0.8749672620606569, | |
| "grad_norm": 2.2052392959594727, | |
| "learning_rate": 1.7855927745183504e-05, | |
| "loss": 0.4449, | |
| "step": 16704 | |
| }, | |
| { | |
| "epoch": 0.8780053428317008, | |
| "grad_norm": 2.8733346462249756, | |
| "learning_rate": 1.7834006013477513e-05, | |
| "loss": 0.5016, | |
| "step": 16762 | |
| }, | |
| { | |
| "epoch": 0.8810434236027448, | |
| "grad_norm": 1.8927271366119385, | |
| "learning_rate": 1.7811986385513226e-05, | |
| "loss": 0.3793, | |
| "step": 16820 | |
| }, | |
| { | |
| "epoch": 0.8840815043737887, | |
| "grad_norm": 3.7612531185150146, | |
| "learning_rate": 1.7789869136454988e-05, | |
| "loss": 0.3601, | |
| "step": 16878 | |
| }, | |
| { | |
| "epoch": 0.8871195851448327, | |
| "grad_norm": 1.6613848209381104, | |
| "learning_rate": 1.7767654542687057e-05, | |
| "loss": 0.4772, | |
| "step": 16936 | |
| }, | |
| { | |
| "epoch": 0.8901576659158766, | |
| "grad_norm": 2.5755159854888916, | |
| "learning_rate": 1.7745342881810144e-05, | |
| "loss": 0.4475, | |
| "step": 16994 | |
| }, | |
| { | |
| "epoch": 0.8931957466869206, | |
| "grad_norm": 2.7520928382873535, | |
| "learning_rate": 1.7722934432637937e-05, | |
| "loss": 0.3942, | |
| "step": 17052 | |
| }, | |
| { | |
| "epoch": 0.8962338274579645, | |
| "grad_norm": 4.439705848693848, | |
| "learning_rate": 1.770042947519362e-05, | |
| "loss": 0.4361, | |
| "step": 17110 | |
| }, | |
| { | |
| "epoch": 0.8992719082290085, | |
| "grad_norm": 2.091926097869873, | |
| "learning_rate": 1.7677828290706382e-05, | |
| "loss": 0.42, | |
| "step": 17168 | |
| }, | |
| { | |
| "epoch": 0.9000052380702949, | |
| "eval_accuracy": 0.8844256401062012, | |
| "eval_loss": 0.44025254249572754, | |
| "eval_runtime": 5730.5358, | |
| "eval_samples_per_second": 0.835, | |
| "eval_steps_per_second": 0.835, | |
| "step": 17182 | |
| } | |
| ], | |
| "logging_steps": 58, | |
| "max_steps": 57273, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 17182, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 9.194591202500936e+17, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |