| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.0679668152024774, |
| "eval_steps": 500, |
| "global_step": 14000, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.00024273862572313354, |
| "grad_norm": 3.890625, |
| "learning_rate": 7.929444129783965e-08, |
| "loss": 2.5681, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.0004854772514462671, |
| "grad_norm": 6.0625, |
| "learning_rate": 1.6020713649971682e-07, |
| "loss": 2.6107, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.0007282158771694006, |
| "grad_norm": 4.875, |
| "learning_rate": 2.41119831701594e-07, |
| "loss": 2.5511, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.0009709545028925341, |
| "grad_norm": 4.15625, |
| "learning_rate": 3.2203252690347116e-07, |
| "loss": 2.5521, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.0012136931286156677, |
| "grad_norm": 3.625, |
| "learning_rate": 4.0294522210534837e-07, |
| "loss": 2.5685, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.0014564317543388013, |
| "grad_norm": 4.03125, |
| "learning_rate": 4.838579173072256e-07, |
| "loss": 2.5193, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.0016991703800619347, |
| "grad_norm": 4.53125, |
| "learning_rate": 5.647706125091027e-07, |
| "loss": 2.5484, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.0019419090057850683, |
| "grad_norm": 4.59375, |
| "learning_rate": 6.456833077109799e-07, |
| "loss": 2.5319, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.0021846476315082017, |
| "grad_norm": 5.375, |
| "learning_rate": 7.265960029128571e-07, |
| "loss": 2.5113, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.0024273862572313353, |
| "grad_norm": 4.09375, |
| "learning_rate": 8.075086981147342e-07, |
| "loss": 2.5203, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.002670124882954469, |
| "grad_norm": 3.65625, |
| "learning_rate": 8.884213933166115e-07, |
| "loss": 2.5355, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.0029128635086776026, |
| "grad_norm": 5.40625, |
| "learning_rate": 9.693340885184885e-07, |
| "loss": 2.4935, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.003155602134400736, |
| "grad_norm": 4.03125, |
| "learning_rate": 1.0502467837203658e-06, |
| "loss": 2.5222, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.0033983407601238694, |
| "grad_norm": 3.359375, |
| "learning_rate": 1.131159478922243e-06, |
| "loss": 2.5488, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.003641079385847003, |
| "grad_norm": 4.3125, |
| "learning_rate": 1.2120721741241203e-06, |
| "loss": 2.5078, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.0038838180115701366, |
| "grad_norm": 3.3125, |
| "learning_rate": 1.2929848693259976e-06, |
| "loss": 2.4933, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.00412655663729327, |
| "grad_norm": 10.5625, |
| "learning_rate": 1.3738975645278747e-06, |
| "loss": 2.5072, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.004369295263016403, |
| "grad_norm": 3.6875, |
| "learning_rate": 1.4548102597297518e-06, |
| "loss": 2.4601, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.0046120338887395375, |
| "grad_norm": 3.953125, |
| "learning_rate": 1.5357229549316289e-06, |
| "loss": 2.4811, |
| "step": 950 |
| }, |
| { |
| "epoch": 0.004854772514462671, |
| "grad_norm": 3.515625, |
| "learning_rate": 1.6166356501335062e-06, |
| "loss": 2.5233, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.005097511140185805, |
| "grad_norm": 4.0625, |
| "learning_rate": 1.6975483453353833e-06, |
| "loss": 2.4694, |
| "step": 1050 |
| }, |
| { |
| "epoch": 0.005340249765908938, |
| "grad_norm": 2.9375, |
| "learning_rate": 1.7784610405372604e-06, |
| "loss": 2.4865, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.005582988391632071, |
| "grad_norm": 3.515625, |
| "learning_rate": 1.8593737357391375e-06, |
| "loss": 2.4639, |
| "step": 1150 |
| }, |
| { |
| "epoch": 0.005825727017355205, |
| "grad_norm": 15.4375, |
| "learning_rate": 1.940286430941015e-06, |
| "loss": 2.4299, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.006068465643078338, |
| "grad_norm": 3.390625, |
| "learning_rate": 2.021199126142892e-06, |
| "loss": 2.4692, |
| "step": 1250 |
| }, |
| { |
| "epoch": 0.006311204268801472, |
| "grad_norm": 3.125, |
| "learning_rate": 2.1021118213447693e-06, |
| "loss": 2.4785, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.0065539428945246055, |
| "grad_norm": 3.40625, |
| "learning_rate": 2.1830245165466464e-06, |
| "loss": 2.4727, |
| "step": 1350 |
| }, |
| { |
| "epoch": 0.006796681520247739, |
| "grad_norm": 2.921875, |
| "learning_rate": 2.2639372117485235e-06, |
| "loss": 2.4517, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.007039420145970873, |
| "grad_norm": 4.03125, |
| "learning_rate": 2.3448499069504006e-06, |
| "loss": 2.4343, |
| "step": 1450 |
| }, |
| { |
| "epoch": 0.007282158771694006, |
| "grad_norm": 3.296875, |
| "learning_rate": 2.4257626021522777e-06, |
| "loss": 2.4255, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.00752489739741714, |
| "grad_norm": 3.3125, |
| "learning_rate": 2.506675297354155e-06, |
| "loss": 2.4238, |
| "step": 1550 |
| }, |
| { |
| "epoch": 0.007767636023140273, |
| "grad_norm": 4.34375, |
| "learning_rate": 2.5875879925560323e-06, |
| "loss": 2.4245, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.008010374648863406, |
| "grad_norm": 3.765625, |
| "learning_rate": 2.6685006877579094e-06, |
| "loss": 2.4052, |
| "step": 1650 |
| }, |
| { |
| "epoch": 0.00825311327458654, |
| "grad_norm": 2.875, |
| "learning_rate": 2.749413382959787e-06, |
| "loss": 2.4148, |
| "step": 1700 |
| }, |
| { |
| "epoch": 0.008495851900309674, |
| "grad_norm": 3.09375, |
| "learning_rate": 2.8303260781616636e-06, |
| "loss": 2.4272, |
| "step": 1750 |
| }, |
| { |
| "epoch": 0.008738590526032807, |
| "grad_norm": 3.65625, |
| "learning_rate": 2.911238773363541e-06, |
| "loss": 2.4219, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.00898132915175594, |
| "grad_norm": 3.296875, |
| "learning_rate": 2.992151468565418e-06, |
| "loss": 2.4591, |
| "step": 1850 |
| }, |
| { |
| "epoch": 0.009224067777479075, |
| "grad_norm": 3.671875, |
| "learning_rate": 3.0730641637672954e-06, |
| "loss": 2.4347, |
| "step": 1900 |
| }, |
| { |
| "epoch": 0.009466806403202207, |
| "grad_norm": 3.484375, |
| "learning_rate": 3.153976858969173e-06, |
| "loss": 2.3938, |
| "step": 1950 |
| }, |
| { |
| "epoch": 0.009709545028925341, |
| "grad_norm": 3.921875, |
| "learning_rate": 3.2348895541710496e-06, |
| "loss": 2.3816, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.009952283654648475, |
| "grad_norm": 4.5625, |
| "learning_rate": 3.315802249372927e-06, |
| "loss": 2.4554, |
| "step": 2050 |
| }, |
| { |
| "epoch": 0.01019502228037161, |
| "grad_norm": 2.46875, |
| "learning_rate": 3.396714944574804e-06, |
| "loss": 2.3965, |
| "step": 2100 |
| }, |
| { |
| "epoch": 0.010437760906094742, |
| "grad_norm": 3.078125, |
| "learning_rate": 3.4776276397766813e-06, |
| "loss": 2.3747, |
| "step": 2150 |
| }, |
| { |
| "epoch": 0.010680499531817876, |
| "grad_norm": 4.1875, |
| "learning_rate": 3.5585403349785584e-06, |
| "loss": 2.4146, |
| "step": 2200 |
| }, |
| { |
| "epoch": 0.01092323815754101, |
| "grad_norm": 3.421875, |
| "learning_rate": 3.6394530301804355e-06, |
| "loss": 2.3946, |
| "step": 2250 |
| }, |
| { |
| "epoch": 0.011165976783264142, |
| "grad_norm": 3.375, |
| "learning_rate": 3.7203657253823126e-06, |
| "loss": 2.4002, |
| "step": 2300 |
| }, |
| { |
| "epoch": 0.011408715408987276, |
| "grad_norm": 4.28125, |
| "learning_rate": 3.80127842058419e-06, |
| "loss": 2.4002, |
| "step": 2350 |
| }, |
| { |
| "epoch": 0.01165145403471041, |
| "grad_norm": 3.0, |
| "learning_rate": 3.882191115786067e-06, |
| "loss": 2.4003, |
| "step": 2400 |
| }, |
| { |
| "epoch": 0.011894192660433543, |
| "grad_norm": 3.703125, |
| "learning_rate": 3.963103810987945e-06, |
| "loss": 2.4261, |
| "step": 2450 |
| }, |
| { |
| "epoch": 0.012136931286156677, |
| "grad_norm": 3.25, |
| "learning_rate": 4.044016506189822e-06, |
| "loss": 2.342, |
| "step": 2500 |
| }, |
| { |
| "epoch": 0.01237966991187981, |
| "grad_norm": 3.25, |
| "learning_rate": 4.124929201391699e-06, |
| "loss": 2.3825, |
| "step": 2550 |
| }, |
| { |
| "epoch": 0.012622408537602945, |
| "grad_norm": 3.90625, |
| "learning_rate": 4.205841896593576e-06, |
| "loss": 2.3988, |
| "step": 2600 |
| }, |
| { |
| "epoch": 0.012865147163326077, |
| "grad_norm": 3.34375, |
| "learning_rate": 4.286754591795453e-06, |
| "loss": 2.3655, |
| "step": 2650 |
| }, |
| { |
| "epoch": 0.013107885789049211, |
| "grad_norm": 3.171875, |
| "learning_rate": 4.36766728699733e-06, |
| "loss": 2.3807, |
| "step": 2700 |
| }, |
| { |
| "epoch": 0.013350624414772345, |
| "grad_norm": 4.5, |
| "learning_rate": 4.448579982199207e-06, |
| "loss": 2.3725, |
| "step": 2750 |
| }, |
| { |
| "epoch": 0.013593363040495477, |
| "grad_norm": 2.5625, |
| "learning_rate": 4.5294926774010845e-06, |
| "loss": 2.3701, |
| "step": 2800 |
| }, |
| { |
| "epoch": 0.013836101666218611, |
| "grad_norm": 3.203125, |
| "learning_rate": 4.610405372602962e-06, |
| "loss": 2.3917, |
| "step": 2850 |
| }, |
| { |
| "epoch": 0.014078840291941746, |
| "grad_norm": 3.40625, |
| "learning_rate": 4.6913180678048396e-06, |
| "loss": 2.3837, |
| "step": 2900 |
| }, |
| { |
| "epoch": 0.014321578917664878, |
| "grad_norm": 2.578125, |
| "learning_rate": 4.772230763006716e-06, |
| "loss": 2.3561, |
| "step": 2950 |
| }, |
| { |
| "epoch": 0.014564317543388012, |
| "grad_norm": 5.5, |
| "learning_rate": 4.853143458208594e-06, |
| "loss": 2.3751, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.014807056169111146, |
| "grad_norm": 3.046875, |
| "learning_rate": 4.93405615341047e-06, |
| "loss": 2.3576, |
| "step": 3050 |
| }, |
| { |
| "epoch": 0.01504979479483428, |
| "grad_norm": 3.671875, |
| "learning_rate": 5.014968848612348e-06, |
| "loss": 2.3693, |
| "step": 3100 |
| }, |
| { |
| "epoch": 0.015292533420557412, |
| "grad_norm": 3.359375, |
| "learning_rate": 5.095881543814225e-06, |
| "loss": 2.362, |
| "step": 3150 |
| }, |
| { |
| "epoch": 0.015535272046280546, |
| "grad_norm": 3.71875, |
| "learning_rate": 5.176794239016102e-06, |
| "loss": 2.3633, |
| "step": 3200 |
| }, |
| { |
| "epoch": 0.01577801067200368, |
| "grad_norm": 3.5625, |
| "learning_rate": 5.2577069342179785e-06, |
| "loss": 2.3481, |
| "step": 3250 |
| }, |
| { |
| "epoch": 0.016020749297726813, |
| "grad_norm": 4.375, |
| "learning_rate": 5.338619629419856e-06, |
| "loss": 2.3511, |
| "step": 3300 |
| }, |
| { |
| "epoch": 0.01626348792344995, |
| "grad_norm": 4.9375, |
| "learning_rate": 5.4195323246217335e-06, |
| "loss": 2.3887, |
| "step": 3350 |
| }, |
| { |
| "epoch": 0.01650622654917308, |
| "grad_norm": 4.15625, |
| "learning_rate": 5.500445019823611e-06, |
| "loss": 2.3176, |
| "step": 3400 |
| }, |
| { |
| "epoch": 0.016748965174896213, |
| "grad_norm": 3.5, |
| "learning_rate": 5.5813577150254886e-06, |
| "loss": 2.3562, |
| "step": 3450 |
| }, |
| { |
| "epoch": 0.01699170380061935, |
| "grad_norm": 3.828125, |
| "learning_rate": 5.662270410227365e-06, |
| "loss": 2.3405, |
| "step": 3500 |
| }, |
| { |
| "epoch": 0.01723444242634248, |
| "grad_norm": 4.96875, |
| "learning_rate": 5.743183105429242e-06, |
| "loss": 2.344, |
| "step": 3550 |
| }, |
| { |
| "epoch": 0.017477181052065614, |
| "grad_norm": 2.734375, |
| "learning_rate": 5.82409580063112e-06, |
| "loss": 2.3372, |
| "step": 3600 |
| }, |
| { |
| "epoch": 0.01771991967778875, |
| "grad_norm": 2.828125, |
| "learning_rate": 5.905008495832997e-06, |
| "loss": 2.3337, |
| "step": 3650 |
| }, |
| { |
| "epoch": 0.01796265830351188, |
| "grad_norm": 4.09375, |
| "learning_rate": 5.985921191034873e-06, |
| "loss": 2.3394, |
| "step": 3700 |
| }, |
| { |
| "epoch": 0.018205396929235014, |
| "grad_norm": 4.59375, |
| "learning_rate": 6.066833886236751e-06, |
| "loss": 2.3587, |
| "step": 3750 |
| }, |
| { |
| "epoch": 0.01844813555495815, |
| "grad_norm": 2.875, |
| "learning_rate": 6.147746581438628e-06, |
| "loss": 2.3127, |
| "step": 3800 |
| }, |
| { |
| "epoch": 0.018690874180681282, |
| "grad_norm": 4.15625, |
| "learning_rate": 6.228659276640505e-06, |
| "loss": 2.3136, |
| "step": 3850 |
| }, |
| { |
| "epoch": 0.018933612806404414, |
| "grad_norm": 3.671875, |
| "learning_rate": 6.309571971842383e-06, |
| "loss": 2.3206, |
| "step": 3900 |
| }, |
| { |
| "epoch": 0.01917635143212755, |
| "grad_norm": 3.546875, |
| "learning_rate": 6.39048466704426e-06, |
| "loss": 2.3229, |
| "step": 3950 |
| }, |
| { |
| "epoch": 0.019419090057850683, |
| "grad_norm": 3.625, |
| "learning_rate": 6.471397362246137e-06, |
| "loss": 2.3229, |
| "step": 4000 |
| }, |
| { |
| "epoch": 0.019661828683573815, |
| "grad_norm": 2.796875, |
| "learning_rate": 6.552310057448014e-06, |
| "loss": 2.3161, |
| "step": 4050 |
| }, |
| { |
| "epoch": 0.01990456730929695, |
| "grad_norm": 3.953125, |
| "learning_rate": 6.633222752649892e-06, |
| "loss": 2.328, |
| "step": 4100 |
| }, |
| { |
| "epoch": 0.020147305935020083, |
| "grad_norm": 3.96875, |
| "learning_rate": 6.714135447851768e-06, |
| "loss": 2.3185, |
| "step": 4150 |
| }, |
| { |
| "epoch": 0.02039004456074322, |
| "grad_norm": 3.421875, |
| "learning_rate": 6.795048143053645e-06, |
| "loss": 2.3551, |
| "step": 4200 |
| }, |
| { |
| "epoch": 0.02063278318646635, |
| "grad_norm": 3.828125, |
| "learning_rate": 6.875960838255523e-06, |
| "loss": 2.3208, |
| "step": 4250 |
| }, |
| { |
| "epoch": 0.020875521812189483, |
| "grad_norm": 3.828125, |
| "learning_rate": 6.9568735334574e-06, |
| "loss": 2.3159, |
| "step": 4300 |
| }, |
| { |
| "epoch": 0.02111826043791262, |
| "grad_norm": 3.96875, |
| "learning_rate": 7.0377862286592764e-06, |
| "loss": 2.2877, |
| "step": 4350 |
| }, |
| { |
| "epoch": 0.02136099906363575, |
| "grad_norm": 3.53125, |
| "learning_rate": 7.118698923861154e-06, |
| "loss": 2.3062, |
| "step": 4400 |
| }, |
| { |
| "epoch": 0.021603737689358884, |
| "grad_norm": 3.578125, |
| "learning_rate": 7.1996116190630315e-06, |
| "loss": 2.3151, |
| "step": 4450 |
| }, |
| { |
| "epoch": 0.02184647631508202, |
| "grad_norm": 2.421875, |
| "learning_rate": 7.280524314264909e-06, |
| "loss": 2.3383, |
| "step": 4500 |
| }, |
| { |
| "epoch": 0.022089214940805152, |
| "grad_norm": 3.046875, |
| "learning_rate": 7.3614370094667866e-06, |
| "loss": 2.3252, |
| "step": 4550 |
| }, |
| { |
| "epoch": 0.022331953566528284, |
| "grad_norm": 3.40625, |
| "learning_rate": 7.442349704668664e-06, |
| "loss": 2.3223, |
| "step": 4600 |
| }, |
| { |
| "epoch": 0.02257469219225142, |
| "grad_norm": 3.390625, |
| "learning_rate": 7.52326239987054e-06, |
| "loss": 2.3112, |
| "step": 4650 |
| }, |
| { |
| "epoch": 0.022817430817974552, |
| "grad_norm": 4.46875, |
| "learning_rate": 7.604175095072418e-06, |
| "loss": 2.3034, |
| "step": 4700 |
| }, |
| { |
| "epoch": 0.023060169443697685, |
| "grad_norm": 3.8125, |
| "learning_rate": 7.685087790274294e-06, |
| "loss": 2.3457, |
| "step": 4750 |
| }, |
| { |
| "epoch": 0.02330290806942082, |
| "grad_norm": 4.53125, |
| "learning_rate": 7.766000485476171e-06, |
| "loss": 2.3064, |
| "step": 4800 |
| }, |
| { |
| "epoch": 0.023545646695143953, |
| "grad_norm": 5.25, |
| "learning_rate": 7.846913180678048e-06, |
| "loss": 2.3134, |
| "step": 4850 |
| }, |
| { |
| "epoch": 0.023788385320867085, |
| "grad_norm": 3.984375, |
| "learning_rate": 7.927825875879927e-06, |
| "loss": 2.2627, |
| "step": 4900 |
| }, |
| { |
| "epoch": 0.02403112394659022, |
| "grad_norm": 4.0625, |
| "learning_rate": 8.008738571081803e-06, |
| "loss": 2.331, |
| "step": 4950 |
| }, |
| { |
| "epoch": 0.024273862572313353, |
| "grad_norm": 4.78125, |
| "learning_rate": 8.08965126628368e-06, |
| "loss": 2.2673, |
| "step": 5000 |
| }, |
| { |
| "epoch": 0.02451660119803649, |
| "grad_norm": 6.09375, |
| "learning_rate": 8.170563961485558e-06, |
| "loss": 2.2814, |
| "step": 5050 |
| }, |
| { |
| "epoch": 0.02475933982375962, |
| "grad_norm": 3.671875, |
| "learning_rate": 8.251476656687436e-06, |
| "loss": 2.2875, |
| "step": 5100 |
| }, |
| { |
| "epoch": 0.025002078449482754, |
| "grad_norm": 3.375, |
| "learning_rate": 8.332389351889311e-06, |
| "loss": 2.3154, |
| "step": 5150 |
| }, |
| { |
| "epoch": 0.02524481707520589, |
| "grad_norm": 4.0, |
| "learning_rate": 8.41330204709119e-06, |
| "loss": 2.3052, |
| "step": 5200 |
| }, |
| { |
| "epoch": 0.02548755570092902, |
| "grad_norm": 4.46875, |
| "learning_rate": 8.494214742293067e-06, |
| "loss": 2.2914, |
| "step": 5250 |
| }, |
| { |
| "epoch": 0.025730294326652154, |
| "grad_norm": 3.15625, |
| "learning_rate": 8.575127437494944e-06, |
| "loss": 2.3334, |
| "step": 5300 |
| }, |
| { |
| "epoch": 0.02597303295237529, |
| "grad_norm": 3.96875, |
| "learning_rate": 8.656040132696821e-06, |
| "loss": 2.2844, |
| "step": 5350 |
| }, |
| { |
| "epoch": 0.026215771578098422, |
| "grad_norm": 4.46875, |
| "learning_rate": 8.736952827898698e-06, |
| "loss": 2.2983, |
| "step": 5400 |
| }, |
| { |
| "epoch": 0.026458510203821554, |
| "grad_norm": 3.796875, |
| "learning_rate": 8.817865523100575e-06, |
| "loss": 2.2918, |
| "step": 5450 |
| }, |
| { |
| "epoch": 0.02670124882954469, |
| "grad_norm": 4.6875, |
| "learning_rate": 8.898778218302452e-06, |
| "loss": 2.2442, |
| "step": 5500 |
| }, |
| { |
| "epoch": 0.026943987455267823, |
| "grad_norm": 3.84375, |
| "learning_rate": 8.97969091350433e-06, |
| "loss": 2.2766, |
| "step": 5550 |
| }, |
| { |
| "epoch": 0.027186726080990955, |
| "grad_norm": 4.3125, |
| "learning_rate": 9.060603608706207e-06, |
| "loss": 2.2699, |
| "step": 5600 |
| }, |
| { |
| "epoch": 0.02742946470671409, |
| "grad_norm": 3.515625, |
| "learning_rate": 9.141516303908084e-06, |
| "loss": 2.2901, |
| "step": 5650 |
| }, |
| { |
| "epoch": 0.027672203332437223, |
| "grad_norm": 3.265625, |
| "learning_rate": 9.22242899910996e-06, |
| "loss": 2.2585, |
| "step": 5700 |
| }, |
| { |
| "epoch": 0.027914941958160355, |
| "grad_norm": 4.25, |
| "learning_rate": 9.303341694311838e-06, |
| "loss": 2.2746, |
| "step": 5750 |
| }, |
| { |
| "epoch": 0.02815768058388349, |
| "grad_norm": 4.125, |
| "learning_rate": 9.384254389513715e-06, |
| "loss": 2.2992, |
| "step": 5800 |
| }, |
| { |
| "epoch": 0.028400419209606623, |
| "grad_norm": 4.9375, |
| "learning_rate": 9.465167084715594e-06, |
| "loss": 2.2443, |
| "step": 5850 |
| }, |
| { |
| "epoch": 0.028643157835329756, |
| "grad_norm": 5.5, |
| "learning_rate": 9.54607977991747e-06, |
| "loss": 2.301, |
| "step": 5900 |
| }, |
| { |
| "epoch": 0.02888589646105289, |
| "grad_norm": 3.53125, |
| "learning_rate": 9.626992475119346e-06, |
| "loss": 2.2621, |
| "step": 5950 |
| }, |
| { |
| "epoch": 0.029128635086776024, |
| "grad_norm": 4.0625, |
| "learning_rate": 9.707905170321225e-06, |
| "loss": 2.2469, |
| "step": 6000 |
| }, |
| { |
| "epoch": 0.02937137371249916, |
| "grad_norm": 3.875, |
| "learning_rate": 9.788817865523102e-06, |
| "loss": 2.2495, |
| "step": 6050 |
| }, |
| { |
| "epoch": 0.029614112338222292, |
| "grad_norm": 4.65625, |
| "learning_rate": 9.869730560724978e-06, |
| "loss": 2.2683, |
| "step": 6100 |
| }, |
| { |
| "epoch": 0.029856850963945424, |
| "grad_norm": 4.4375, |
| "learning_rate": 9.950643255926856e-06, |
| "loss": 2.2682, |
| "step": 6150 |
| }, |
| { |
| "epoch": 0.03009958958966856, |
| "grad_norm": 6.0, |
| "learning_rate": 1.0031555951128734e-05, |
| "loss": 2.2343, |
| "step": 6200 |
| }, |
| { |
| "epoch": 0.030342328215391692, |
| "grad_norm": 3.203125, |
| "learning_rate": 1.011246864633061e-05, |
| "loss": 2.2693, |
| "step": 6250 |
| }, |
| { |
| "epoch": 0.030585066841114825, |
| "grad_norm": 6.03125, |
| "learning_rate": 1.0193381341532486e-05, |
| "loss": 2.2727, |
| "step": 6300 |
| }, |
| { |
| "epoch": 0.03082780546683796, |
| "grad_norm": 3.609375, |
| "learning_rate": 1.0274294036734365e-05, |
| "loss": 2.2442, |
| "step": 6350 |
| }, |
| { |
| "epoch": 0.031070544092561093, |
| "grad_norm": 5.5625, |
| "learning_rate": 1.0355206731936242e-05, |
| "loss": 2.2315, |
| "step": 6400 |
| }, |
| { |
| "epoch": 0.03131328271828423, |
| "grad_norm": 3.671875, |
| "learning_rate": 1.0436119427138119e-05, |
| "loss": 2.2454, |
| "step": 6450 |
| }, |
| { |
| "epoch": 0.03155602134400736, |
| "grad_norm": 4.0625, |
| "learning_rate": 1.0517032122339996e-05, |
| "loss": 2.2689, |
| "step": 6500 |
| }, |
| { |
| "epoch": 0.03179875996973049, |
| "grad_norm": 4.21875, |
| "learning_rate": 1.0597944817541875e-05, |
| "loss": 2.2182, |
| "step": 6550 |
| }, |
| { |
| "epoch": 0.032041498595453625, |
| "grad_norm": 3.53125, |
| "learning_rate": 1.067885751274375e-05, |
| "loss": 2.2385, |
| "step": 6600 |
| }, |
| { |
| "epoch": 0.03228423722117676, |
| "grad_norm": 3.953125, |
| "learning_rate": 1.0759770207945627e-05, |
| "loss": 2.2407, |
| "step": 6650 |
| }, |
| { |
| "epoch": 0.0325269758468999, |
| "grad_norm": 4.3125, |
| "learning_rate": 1.0840682903147506e-05, |
| "loss": 2.2354, |
| "step": 6700 |
| }, |
| { |
| "epoch": 0.03276971447262303, |
| "grad_norm": 3.953125, |
| "learning_rate": 1.0921595598349382e-05, |
| "loss": 2.2377, |
| "step": 6750 |
| }, |
| { |
| "epoch": 0.03301245309834616, |
| "grad_norm": 4.3125, |
| "learning_rate": 1.1002508293551259e-05, |
| "loss": 2.2248, |
| "step": 6800 |
| }, |
| { |
| "epoch": 0.033255191724069294, |
| "grad_norm": 2.75, |
| "learning_rate": 1.1083420988753136e-05, |
| "loss": 2.1842, |
| "step": 6850 |
| }, |
| { |
| "epoch": 0.033497930349792426, |
| "grad_norm": 3.484375, |
| "learning_rate": 1.1164333683955013e-05, |
| "loss": 2.2184, |
| "step": 6900 |
| }, |
| { |
| "epoch": 0.03374066897551556, |
| "grad_norm": 3.53125, |
| "learning_rate": 1.1245246379156892e-05, |
| "loss": 2.2141, |
| "step": 6950 |
| }, |
| { |
| "epoch": 0.0339834076012387, |
| "grad_norm": 4.84375, |
| "learning_rate": 1.1326159074358767e-05, |
| "loss": 2.1841, |
| "step": 7000 |
| }, |
| { |
| "epoch": 0.03422614622696183, |
| "grad_norm": 5.125, |
| "learning_rate": 1.1407071769560644e-05, |
| "loss": 2.2327, |
| "step": 7050 |
| }, |
| { |
| "epoch": 0.03446888485268496, |
| "grad_norm": 4.0625, |
| "learning_rate": 1.1487984464762523e-05, |
| "loss": 2.2382, |
| "step": 7100 |
| }, |
| { |
| "epoch": 0.034711623478408095, |
| "grad_norm": 4.84375, |
| "learning_rate": 1.1568897159964399e-05, |
| "loss": 2.2254, |
| "step": 7150 |
| }, |
| { |
| "epoch": 0.03495436210413123, |
| "grad_norm": 4.625, |
| "learning_rate": 1.1649809855166276e-05, |
| "loss": 2.2133, |
| "step": 7200 |
| }, |
| { |
| "epoch": 0.03519710072985436, |
| "grad_norm": 2.921875, |
| "learning_rate": 1.1730722550368154e-05, |
| "loss": 2.216, |
| "step": 7250 |
| }, |
| { |
| "epoch": 0.0354398393555775, |
| "grad_norm": 5.1875, |
| "learning_rate": 1.181163524557003e-05, |
| "loss": 2.2227, |
| "step": 7300 |
| }, |
| { |
| "epoch": 0.03568257798130063, |
| "grad_norm": 4.875, |
| "learning_rate": 1.1892547940771909e-05, |
| "loss": 2.2058, |
| "step": 7350 |
| }, |
| { |
| "epoch": 0.03592531660702376, |
| "grad_norm": 4.59375, |
| "learning_rate": 1.1973460635973786e-05, |
| "loss": 2.207, |
| "step": 7400 |
| }, |
| { |
| "epoch": 0.036168055232746896, |
| "grad_norm": 3.75, |
| "learning_rate": 1.2054373331175661e-05, |
| "loss": 2.2047, |
| "step": 7450 |
| }, |
| { |
| "epoch": 0.03641079385847003, |
| "grad_norm": 3.828125, |
| "learning_rate": 1.213528602637754e-05, |
| "loss": 2.2141, |
| "step": 7500 |
| }, |
| { |
| "epoch": 0.03665353248419317, |
| "grad_norm": 3.9375, |
| "learning_rate": 1.2216198721579417e-05, |
| "loss": 2.2137, |
| "step": 7550 |
| }, |
| { |
| "epoch": 0.0368962711099163, |
| "grad_norm": 4.03125, |
| "learning_rate": 1.2297111416781292e-05, |
| "loss": 2.1956, |
| "step": 7600 |
| }, |
| { |
| "epoch": 0.03713900973563943, |
| "grad_norm": 3.828125, |
| "learning_rate": 1.2378024111983171e-05, |
| "loss": 2.2152, |
| "step": 7650 |
| }, |
| { |
| "epoch": 0.037381748361362564, |
| "grad_norm": 3.984375, |
| "learning_rate": 1.2458936807185048e-05, |
| "loss": 2.2039, |
| "step": 7700 |
| }, |
| { |
| "epoch": 0.0376244869870857, |
| "grad_norm": 2.84375, |
| "learning_rate": 1.2539849502386925e-05, |
| "loss": 2.2054, |
| "step": 7750 |
| }, |
| { |
| "epoch": 0.03786722561280883, |
| "grad_norm": 3.484375, |
| "learning_rate": 1.2620762197588803e-05, |
| "loss": 2.1559, |
| "step": 7800 |
| }, |
| { |
| "epoch": 0.03810996423853197, |
| "grad_norm": 4.75, |
| "learning_rate": 1.2701674892790681e-05, |
| "loss": 2.1573, |
| "step": 7850 |
| }, |
| { |
| "epoch": 0.0383527028642551, |
| "grad_norm": 2.5, |
| "learning_rate": 1.2782587587992557e-05, |
| "loss": 2.1676, |
| "step": 7900 |
| }, |
| { |
| "epoch": 0.03859544148997823, |
| "grad_norm": 4.1875, |
| "learning_rate": 1.2863500283194434e-05, |
| "loss": 2.2064, |
| "step": 7950 |
| }, |
| { |
| "epoch": 0.038838180115701365, |
| "grad_norm": 5.3125, |
| "learning_rate": 1.2944412978396313e-05, |
| "loss": 2.1678, |
| "step": 8000 |
| }, |
| { |
| "epoch": 0.0390809187414245, |
| "grad_norm": 5.09375, |
| "learning_rate": 1.3025325673598188e-05, |
| "loss": 2.2002, |
| "step": 8050 |
| }, |
| { |
| "epoch": 0.03932365736714763, |
| "grad_norm": 3.25, |
| "learning_rate": 1.3106238368800065e-05, |
| "loss": 2.1444, |
| "step": 8100 |
| }, |
| { |
| "epoch": 0.03956639599287077, |
| "grad_norm": 3.65625, |
| "learning_rate": 1.3187151064001944e-05, |
| "loss": 2.2143, |
| "step": 8150 |
| }, |
| { |
| "epoch": 0.0398091346185939, |
| "grad_norm": 3.5625, |
| "learning_rate": 1.326806375920382e-05, |
| "loss": 2.1851, |
| "step": 8200 |
| }, |
| { |
| "epoch": 0.040051873244317034, |
| "grad_norm": 3.921875, |
| "learning_rate": 1.3348976454405698e-05, |
| "loss": 2.1899, |
| "step": 8250 |
| }, |
| { |
| "epoch": 0.040294611870040166, |
| "grad_norm": 4.21875, |
| "learning_rate": 1.3429889149607575e-05, |
| "loss": 2.1567, |
| "step": 8300 |
| }, |
| { |
| "epoch": 0.0405373504957633, |
| "grad_norm": 2.875, |
| "learning_rate": 1.351080184480945e-05, |
| "loss": 2.1853, |
| "step": 8350 |
| }, |
| { |
| "epoch": 0.04078008912148644, |
| "grad_norm": 4.5, |
| "learning_rate": 1.359171454001133e-05, |
| "loss": 2.1475, |
| "step": 8400 |
| }, |
| { |
| "epoch": 0.04102282774720957, |
| "grad_norm": 3.734375, |
| "learning_rate": 1.3672627235213205e-05, |
| "loss": 2.1564, |
| "step": 8450 |
| }, |
| { |
| "epoch": 0.0412655663729327, |
| "grad_norm": 4.03125, |
| "learning_rate": 1.3753539930415084e-05, |
| "loss": 2.1772, |
| "step": 8500 |
| }, |
| { |
| "epoch": 0.041508304998655834, |
| "grad_norm": 3.953125, |
| "learning_rate": 1.3834452625616961e-05, |
| "loss": 2.2006, |
| "step": 8550 |
| }, |
| { |
| "epoch": 0.04175104362437897, |
| "grad_norm": 3.890625, |
| "learning_rate": 1.3915365320818836e-05, |
| "loss": 2.1684, |
| "step": 8600 |
| }, |
| { |
| "epoch": 0.0419937822501021, |
| "grad_norm": 3.0625, |
| "learning_rate": 1.3996278016020715e-05, |
| "loss": 2.1765, |
| "step": 8650 |
| }, |
| { |
| "epoch": 0.04223652087582524, |
| "grad_norm": 3.578125, |
| "learning_rate": 1.4077190711222592e-05, |
| "loss": 2.1336, |
| "step": 8700 |
| }, |
| { |
| "epoch": 0.04247925950154837, |
| "grad_norm": 3.828125, |
| "learning_rate": 1.4158103406424468e-05, |
| "loss": 2.1553, |
| "step": 8750 |
| }, |
| { |
| "epoch": 0.0427219981272715, |
| "grad_norm": 4.21875, |
| "learning_rate": 1.4239016101626346e-05, |
| "loss": 2.1563, |
| "step": 8800 |
| }, |
| { |
| "epoch": 0.042964736752994635, |
| "grad_norm": 3.28125, |
| "learning_rate": 1.4319928796828223e-05, |
| "loss": 2.0883, |
| "step": 8850 |
| }, |
| { |
| "epoch": 0.04320747537871777, |
| "grad_norm": 4.1875, |
| "learning_rate": 1.44008414920301e-05, |
| "loss": 2.1586, |
| "step": 8900 |
| }, |
| { |
| "epoch": 0.0434502140044409, |
| "grad_norm": 3.640625, |
| "learning_rate": 1.4481754187231978e-05, |
| "loss": 2.1743, |
| "step": 8950 |
| }, |
| { |
| "epoch": 0.04369295263016404, |
| "grad_norm": 4.5, |
| "learning_rate": 1.4562666882433856e-05, |
| "loss": 2.142, |
| "step": 9000 |
| }, |
| { |
| "epoch": 0.04393569125588717, |
| "grad_norm": 3.8125, |
| "learning_rate": 1.4643579577635732e-05, |
| "loss": 2.1355, |
| "step": 9050 |
| }, |
| { |
| "epoch": 0.044178429881610304, |
| "grad_norm": 3.515625, |
| "learning_rate": 1.4724492272837609e-05, |
| "loss": 2.1328, |
| "step": 9100 |
| }, |
| { |
| "epoch": 0.044421168507333436, |
| "grad_norm": 3.125, |
| "learning_rate": 1.4805404968039488e-05, |
| "loss": 2.1121, |
| "step": 9150 |
| }, |
| { |
| "epoch": 0.04466390713305657, |
| "grad_norm": 4.3125, |
| "learning_rate": 1.4886317663241363e-05, |
| "loss": 2.185, |
| "step": 9200 |
| }, |
| { |
| "epoch": 0.04490664575877971, |
| "grad_norm": 3.421875, |
| "learning_rate": 1.496723035844324e-05, |
| "loss": 2.1548, |
| "step": 9250 |
| }, |
| { |
| "epoch": 0.04514938438450284, |
| "grad_norm": 3.765625, |
| "learning_rate": 1.5048143053645119e-05, |
| "loss": 2.1517, |
| "step": 9300 |
| }, |
| { |
| "epoch": 0.04539212301022597, |
| "grad_norm": 3.515625, |
| "learning_rate": 1.5129055748846995e-05, |
| "loss": 2.1235, |
| "step": 9350 |
| }, |
| { |
| "epoch": 0.045634861635949105, |
| "grad_norm": 5.59375, |
| "learning_rate": 1.5209968444048873e-05, |
| "loss": 2.1636, |
| "step": 9400 |
| }, |
| { |
| "epoch": 0.04587760026167224, |
| "grad_norm": 2.78125, |
| "learning_rate": 1.529088113925075e-05, |
| "loss": 2.1182, |
| "step": 9450 |
| }, |
| { |
| "epoch": 0.04612033888739537, |
| "grad_norm": 3.234375, |
| "learning_rate": 1.5371793834452626e-05, |
| "loss": 2.1404, |
| "step": 9500 |
| }, |
| { |
| "epoch": 0.04636307751311851, |
| "grad_norm": 2.984375, |
| "learning_rate": 1.5452706529654505e-05, |
| "loss": 2.1383, |
| "step": 9550 |
| }, |
| { |
| "epoch": 0.04660581613884164, |
| "grad_norm": 3.125, |
| "learning_rate": 1.5533619224856383e-05, |
| "loss": 2.1135, |
| "step": 9600 |
| }, |
| { |
| "epoch": 0.04684855476456477, |
| "grad_norm": 3.21875, |
| "learning_rate": 1.561453192005826e-05, |
| "loss": 2.1648, |
| "step": 9650 |
| }, |
| { |
| "epoch": 0.047091293390287906, |
| "grad_norm": 2.71875, |
| "learning_rate": 1.5695444615260134e-05, |
| "loss": 2.1479, |
| "step": 9700 |
| }, |
| { |
| "epoch": 0.04733403201601104, |
| "grad_norm": 4.90625, |
| "learning_rate": 1.5776357310462013e-05, |
| "loss": 2.1343, |
| "step": 9750 |
| }, |
| { |
| "epoch": 0.04757677064173417, |
| "grad_norm": 3.875, |
| "learning_rate": 1.585727000566389e-05, |
| "loss": 2.1563, |
| "step": 9800 |
| }, |
| { |
| "epoch": 0.04781950926745731, |
| "grad_norm": 2.90625, |
| "learning_rate": 1.5938182700865767e-05, |
| "loss": 2.1448, |
| "step": 9850 |
| }, |
| { |
| "epoch": 0.04806224789318044, |
| "grad_norm": 3.578125, |
| "learning_rate": 1.6019095396067646e-05, |
| "loss": 2.1041, |
| "step": 9900 |
| }, |
| { |
| "epoch": 0.048304986518903574, |
| "grad_norm": 3.546875, |
| "learning_rate": 1.610000809126952e-05, |
| "loss": 2.1272, |
| "step": 9950 |
| }, |
| { |
| "epoch": 0.048547725144626706, |
| "grad_norm": 2.796875, |
| "learning_rate": 1.61809207864714e-05, |
| "loss": 2.1146, |
| "step": 10000 |
| }, |
| { |
| "epoch": 0.04879046377034984, |
| "grad_norm": 3.65625, |
| "learning_rate": 1.6261833481673276e-05, |
| "loss": 2.1449, |
| "step": 10050 |
| }, |
| { |
| "epoch": 0.04903320239607298, |
| "grad_norm": 3.734375, |
| "learning_rate": 1.634274617687515e-05, |
| "loss": 2.1108, |
| "step": 10100 |
| }, |
| { |
| "epoch": 0.04927594102179611, |
| "grad_norm": 4.0, |
| "learning_rate": 1.642365887207703e-05, |
| "loss": 2.0933, |
| "step": 10150 |
| }, |
| { |
| "epoch": 0.04951867964751924, |
| "grad_norm": 3.421875, |
| "learning_rate": 1.6504571567278905e-05, |
| "loss": 2.0981, |
| "step": 10200 |
| }, |
| { |
| "epoch": 0.049761418273242375, |
| "grad_norm": 4.40625, |
| "learning_rate": 1.6585484262480784e-05, |
| "loss": 2.1161, |
| "step": 10250 |
| }, |
| { |
| "epoch": 0.05000415689896551, |
| "grad_norm": 4.8125, |
| "learning_rate": 1.6666396957682663e-05, |
| "loss": 2.1318, |
| "step": 10300 |
| }, |
| { |
| "epoch": 0.05024689552468864, |
| "grad_norm": 3.375, |
| "learning_rate": 1.674730965288454e-05, |
| "loss": 2.1162, |
| "step": 10350 |
| }, |
| { |
| "epoch": 0.05048963415041178, |
| "grad_norm": 3.953125, |
| "learning_rate": 1.6828222348086417e-05, |
| "loss": 2.1361, |
| "step": 10400 |
| }, |
| { |
| "epoch": 0.05073237277613491, |
| "grad_norm": 3.40625, |
| "learning_rate": 1.6909135043288293e-05, |
| "loss": 2.1388, |
| "step": 10450 |
| }, |
| { |
| "epoch": 0.05097511140185804, |
| "grad_norm": 4.03125, |
| "learning_rate": 1.699004773849017e-05, |
| "loss": 2.1265, |
| "step": 10500 |
| }, |
| { |
| "epoch": 0.051217850027581176, |
| "grad_norm": 3.546875, |
| "learning_rate": 1.7070960433692047e-05, |
| "loss": 2.0881, |
| "step": 10550 |
| }, |
| { |
| "epoch": 0.05146058865330431, |
| "grad_norm": 2.96875, |
| "learning_rate": 1.7151873128893926e-05, |
| "loss": 2.0963, |
| "step": 10600 |
| }, |
| { |
| "epoch": 0.05170332727902744, |
| "grad_norm": 3.640625, |
| "learning_rate": 1.72327858240958e-05, |
| "loss": 2.1338, |
| "step": 10650 |
| }, |
| { |
| "epoch": 0.05194606590475058, |
| "grad_norm": 3.734375, |
| "learning_rate": 1.731369851929768e-05, |
| "loss": 2.1344, |
| "step": 10700 |
| }, |
| { |
| "epoch": 0.05218880453047371, |
| "grad_norm": 3.1875, |
| "learning_rate": 1.739461121449956e-05, |
| "loss": 2.1065, |
| "step": 10750 |
| }, |
| { |
| "epoch": 0.052431543156196844, |
| "grad_norm": 3.9375, |
| "learning_rate": 1.7475523909701434e-05, |
| "loss": 2.1218, |
| "step": 10800 |
| }, |
| { |
| "epoch": 0.05267428178191998, |
| "grad_norm": 4.03125, |
| "learning_rate": 1.755643660490331e-05, |
| "loss": 2.1298, |
| "step": 10850 |
| }, |
| { |
| "epoch": 0.05291702040764311, |
| "grad_norm": 3.609375, |
| "learning_rate": 1.7637349300105188e-05, |
| "loss": 2.1081, |
| "step": 10900 |
| }, |
| { |
| "epoch": 0.05315975903336624, |
| "grad_norm": 3.703125, |
| "learning_rate": 1.7718261995307064e-05, |
| "loss": 2.12, |
| "step": 10950 |
| }, |
| { |
| "epoch": 0.05340249765908938, |
| "grad_norm": 2.5625, |
| "learning_rate": 1.7799174690508942e-05, |
| "loss": 2.1092, |
| "step": 11000 |
| }, |
| { |
| "epoch": 0.05364523628481251, |
| "grad_norm": 3.71875, |
| "learning_rate": 1.788008738571082e-05, |
| "loss": 2.1315, |
| "step": 11050 |
| }, |
| { |
| "epoch": 0.053887974910535645, |
| "grad_norm": 3.6875, |
| "learning_rate": 1.7961000080912697e-05, |
| "loss": 2.0988, |
| "step": 11100 |
| }, |
| { |
| "epoch": 0.05413071353625878, |
| "grad_norm": 2.625, |
| "learning_rate": 1.8041912776114575e-05, |
| "loss": 2.0957, |
| "step": 11150 |
| }, |
| { |
| "epoch": 0.05437345216198191, |
| "grad_norm": 3.96875, |
| "learning_rate": 1.812282547131645e-05, |
| "loss": 2.1168, |
| "step": 11200 |
| }, |
| { |
| "epoch": 0.05461619078770505, |
| "grad_norm": 3.5, |
| "learning_rate": 1.8203738166518326e-05, |
| "loss": 2.0957, |
| "step": 11250 |
| }, |
| { |
| "epoch": 0.05485892941342818, |
| "grad_norm": 2.578125, |
| "learning_rate": 1.8284650861720205e-05, |
| "loss": 2.0774, |
| "step": 11300 |
| }, |
| { |
| "epoch": 0.055101668039151314, |
| "grad_norm": 2.328125, |
| "learning_rate": 1.8365563556922084e-05, |
| "loss": 2.0846, |
| "step": 11350 |
| }, |
| { |
| "epoch": 0.055344406664874446, |
| "grad_norm": 5.125, |
| "learning_rate": 1.844647625212396e-05, |
| "loss": 2.0767, |
| "step": 11400 |
| }, |
| { |
| "epoch": 0.05558714529059758, |
| "grad_norm": 3.453125, |
| "learning_rate": 1.8527388947325838e-05, |
| "loss": 2.077, |
| "step": 11450 |
| }, |
| { |
| "epoch": 0.05582988391632071, |
| "grad_norm": 3.890625, |
| "learning_rate": 1.8608301642527717e-05, |
| "loss": 2.0567, |
| "step": 11500 |
| }, |
| { |
| "epoch": 0.05607262254204385, |
| "grad_norm": 3.890625, |
| "learning_rate": 1.8689214337729592e-05, |
| "loss": 2.0764, |
| "step": 11550 |
| }, |
| { |
| "epoch": 0.05631536116776698, |
| "grad_norm": 2.84375, |
| "learning_rate": 1.8770127032931468e-05, |
| "loss": 2.091, |
| "step": 11600 |
| }, |
| { |
| "epoch": 0.056558099793490114, |
| "grad_norm": 3.265625, |
| "learning_rate": 1.8851039728133343e-05, |
| "loss": 2.0924, |
| "step": 11650 |
| }, |
| { |
| "epoch": 0.05680083841921325, |
| "grad_norm": 3.890625, |
| "learning_rate": 1.8931952423335222e-05, |
| "loss": 2.0802, |
| "step": 11700 |
| }, |
| { |
| "epoch": 0.05704357704493638, |
| "grad_norm": 4.03125, |
| "learning_rate": 1.90128651185371e-05, |
| "loss": 2.0968, |
| "step": 11750 |
| }, |
| { |
| "epoch": 0.05728631567065951, |
| "grad_norm": 5.03125, |
| "learning_rate": 1.9093777813738976e-05, |
| "loss": 2.0843, |
| "step": 11800 |
| }, |
| { |
| "epoch": 0.05752905429638265, |
| "grad_norm": 3.59375, |
| "learning_rate": 1.9174690508940855e-05, |
| "loss": 2.0563, |
| "step": 11850 |
| }, |
| { |
| "epoch": 0.05777179292210578, |
| "grad_norm": 2.953125, |
| "learning_rate": 1.9255603204142734e-05, |
| "loss": 2.0981, |
| "step": 11900 |
| }, |
| { |
| "epoch": 0.058014531547828915, |
| "grad_norm": 3.171875, |
| "learning_rate": 1.933651589934461e-05, |
| "loss": 2.0896, |
| "step": 11950 |
| }, |
| { |
| "epoch": 0.05825727017355205, |
| "grad_norm": 3.296875, |
| "learning_rate": 1.9417428594546484e-05, |
| "loss": 2.0678, |
| "step": 12000 |
| }, |
| { |
| "epoch": 0.05850000879927518, |
| "grad_norm": 3.46875, |
| "learning_rate": 1.9498341289748363e-05, |
| "loss": 2.1155, |
| "step": 12050 |
| }, |
| { |
| "epoch": 0.05874274742499832, |
| "grad_norm": 4.40625, |
| "learning_rate": 1.957925398495024e-05, |
| "loss": 2.0639, |
| "step": 12100 |
| }, |
| { |
| "epoch": 0.05898548605072145, |
| "grad_norm": 4.09375, |
| "learning_rate": 1.9660166680152117e-05, |
| "loss": 2.0794, |
| "step": 12150 |
| }, |
| { |
| "epoch": 0.059228224676444584, |
| "grad_norm": 3.359375, |
| "learning_rate": 1.9741079375353996e-05, |
| "loss": 2.0526, |
| "step": 12200 |
| }, |
| { |
| "epoch": 0.059470963302167716, |
| "grad_norm": 2.90625, |
| "learning_rate": 1.982199207055587e-05, |
| "loss": 2.0403, |
| "step": 12250 |
| }, |
| { |
| "epoch": 0.05971370192789085, |
| "grad_norm": 3.65625, |
| "learning_rate": 1.990290476575775e-05, |
| "loss": 2.0693, |
| "step": 12300 |
| }, |
| { |
| "epoch": 0.05995644055361398, |
| "grad_norm": 4.3125, |
| "learning_rate": 1.9983817460959626e-05, |
| "loss": 2.0828, |
| "step": 12350 |
| }, |
| { |
| "epoch": 0.06019917917933712, |
| "grad_norm": 3.78125, |
| "learning_rate": 1.9999999505548666e-05, |
| "loss": 2.0928, |
| "step": 12400 |
| }, |
| { |
| "epoch": 0.06044191780506025, |
| "grad_norm": 4.59375, |
| "learning_rate": 1.99999974968402e-05, |
| "loss": 2.0529, |
| "step": 12450 |
| }, |
| { |
| "epoch": 0.060684656430783385, |
| "grad_norm": 4.0, |
| "learning_rate": 1.9999993942971703e-05, |
| "loss": 2.0478, |
| "step": 12500 |
| }, |
| { |
| "epoch": 0.06092739505650652, |
| "grad_norm": 3.359375, |
| "learning_rate": 1.9999988843943725e-05, |
| "loss": 2.0252, |
| "step": 12550 |
| }, |
| { |
| "epoch": 0.06117013368222965, |
| "grad_norm": 3.5, |
| "learning_rate": 1.9999982199757056e-05, |
| "loss": 2.1012, |
| "step": 12600 |
| }, |
| { |
| "epoch": 0.06141287230795278, |
| "grad_norm": 3.21875, |
| "learning_rate": 1.999997401041272e-05, |
| "loss": 2.0468, |
| "step": 12650 |
| }, |
| { |
| "epoch": 0.06165561093367592, |
| "grad_norm": 2.5, |
| "learning_rate": 1.9999964275911983e-05, |
| "loss": 2.0217, |
| "step": 12700 |
| }, |
| { |
| "epoch": 0.06189834955939905, |
| "grad_norm": 4.28125, |
| "learning_rate": 1.9999952996256348e-05, |
| "loss": 2.0882, |
| "step": 12750 |
| }, |
| { |
| "epoch": 0.062141088185122186, |
| "grad_norm": 3.3125, |
| "learning_rate": 1.9999940171447564e-05, |
| "loss": 2.0498, |
| "step": 12800 |
| }, |
| { |
| "epoch": 0.06238382681084532, |
| "grad_norm": 3.609375, |
| "learning_rate": 1.9999925801487606e-05, |
| "loss": 2.0406, |
| "step": 12850 |
| }, |
| { |
| "epoch": 0.06262656543656846, |
| "grad_norm": 3.625, |
| "learning_rate": 1.9999909886378694e-05, |
| "loss": 2.1089, |
| "step": 12900 |
| }, |
| { |
| "epoch": 0.06286930406229159, |
| "grad_norm": 3.84375, |
| "learning_rate": 1.999989242612329e-05, |
| "loss": 2.0344, |
| "step": 12950 |
| }, |
| { |
| "epoch": 0.06311204268801472, |
| "grad_norm": 2.484375, |
| "learning_rate": 1.999987342072409e-05, |
| "loss": 2.0811, |
| "step": 13000 |
| }, |
| { |
| "epoch": 0.06335478131373785, |
| "grad_norm": 4.46875, |
| "learning_rate": 1.999985287018404e-05, |
| "loss": 2.0513, |
| "step": 13050 |
| }, |
| { |
| "epoch": 0.06359751993946099, |
| "grad_norm": 4.0, |
| "learning_rate": 1.99998307745063e-05, |
| "loss": 2.0702, |
| "step": 13100 |
| }, |
| { |
| "epoch": 0.06384025856518412, |
| "grad_norm": 4.34375, |
| "learning_rate": 1.9999807133694295e-05, |
| "loss": 2.0912, |
| "step": 13150 |
| }, |
| { |
| "epoch": 0.06408299719090725, |
| "grad_norm": 3.484375, |
| "learning_rate": 1.9999781947751672e-05, |
| "loss": 2.066, |
| "step": 13200 |
| }, |
| { |
| "epoch": 0.06432573581663038, |
| "grad_norm": 3.953125, |
| "learning_rate": 1.9999755216682325e-05, |
| "loss": 2.0005, |
| "step": 13250 |
| }, |
| { |
| "epoch": 0.06456847444235352, |
| "grad_norm": 2.921875, |
| "learning_rate": 1.9999726940490384e-05, |
| "loss": 2.0364, |
| "step": 13300 |
| }, |
| { |
| "epoch": 0.06481121306807665, |
| "grad_norm": 3.359375, |
| "learning_rate": 1.9999697119180222e-05, |
| "loss": 2.0808, |
| "step": 13350 |
| }, |
| { |
| "epoch": 0.0650539516937998, |
| "grad_norm": 3.21875, |
| "learning_rate": 1.9999665752756444e-05, |
| "loss": 2.0613, |
| "step": 13400 |
| }, |
| { |
| "epoch": 0.06529669031952293, |
| "grad_norm": 3.140625, |
| "learning_rate": 1.9999632841223892e-05, |
| "loss": 2.0492, |
| "step": 13450 |
| }, |
| { |
| "epoch": 0.06553942894524606, |
| "grad_norm": 3.203125, |
| "learning_rate": 1.9999598384587658e-05, |
| "loss": 2.0875, |
| "step": 13500 |
| }, |
| { |
| "epoch": 0.06578216757096919, |
| "grad_norm": 4.25, |
| "learning_rate": 1.999956238285306e-05, |
| "loss": 2.0319, |
| "step": 13550 |
| }, |
| { |
| "epoch": 0.06602490619669232, |
| "grad_norm": 4.15625, |
| "learning_rate": 1.999952483602567e-05, |
| "loss": 2.0415, |
| "step": 13600 |
| }, |
| { |
| "epoch": 0.06626764482241546, |
| "grad_norm": 3.25, |
| "learning_rate": 1.999948574411128e-05, |
| "loss": 2.0247, |
| "step": 13650 |
| }, |
| { |
| "epoch": 0.06651038344813859, |
| "grad_norm": 3.71875, |
| "learning_rate": 1.9999445107115936e-05, |
| "loss": 2.0294, |
| "step": 13700 |
| }, |
| { |
| "epoch": 0.06675312207386172, |
| "grad_norm": 4.21875, |
| "learning_rate": 1.9999402925045914e-05, |
| "loss": 2.056, |
| "step": 13750 |
| }, |
| { |
| "epoch": 0.06699586069958485, |
| "grad_norm": 3.0, |
| "learning_rate": 1.9999359197907732e-05, |
| "loss": 2.0527, |
| "step": 13800 |
| }, |
| { |
| "epoch": 0.06723859932530798, |
| "grad_norm": 3.34375, |
| "learning_rate": 1.999931392570815e-05, |
| "loss": 2.0727, |
| "step": 13850 |
| }, |
| { |
| "epoch": 0.06748133795103112, |
| "grad_norm": 3.4375, |
| "learning_rate": 1.9999267108454163e-05, |
| "loss": 2.0542, |
| "step": 13900 |
| }, |
| { |
| "epoch": 0.06772407657675426, |
| "grad_norm": 3.109375, |
| "learning_rate": 1.9999218746153e-05, |
| "loss": 1.979, |
| "step": 13950 |
| }, |
| { |
| "epoch": 0.0679668152024774, |
| "grad_norm": 3.765625, |
| "learning_rate": 1.999916883881214e-05, |
| "loss": 2.052, |
| "step": 14000 |
| } |
| ], |
| "logging_steps": 50, |
| "max_steps": 411966, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 2, |
| "save_steps": 2000, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 5.52714505134465e+17, |
| "train_batch_size": 2, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|