| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 1.1283208534208637, |
| "eval_steps": 500, |
| "global_step": 66000, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.9786302868670296e-05, |
| "loss": 0.5871, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.957260573734058e-05, |
| "loss": 0.5995, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.03, |
| "learning_rate": 4.9358908606010875e-05, |
| "loss": 0.6109, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.03, |
| "learning_rate": 4.914521147468117e-05, |
| "loss": 0.5865, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.04, |
| "learning_rate": 4.893151434335146e-05, |
| "loss": 0.5996, |
| "step": 2500 |
| }, |
| { |
| "epoch": 0.05, |
| "learning_rate": 4.871781721202175e-05, |
| "loss": 0.614, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.06, |
| "learning_rate": 4.850412008069204e-05, |
| "loss": 0.5985, |
| "step": 3500 |
| }, |
| { |
| "epoch": 0.07, |
| "learning_rate": 4.829042294936233e-05, |
| "loss": 0.6062, |
| "step": 4000 |
| }, |
| { |
| "epoch": 0.08, |
| "learning_rate": 4.8077153212295275e-05, |
| "loss": 0.6148, |
| "step": 4500 |
| }, |
| { |
| "epoch": 0.09, |
| "learning_rate": 4.7863456080965575e-05, |
| "loss": 0.5964, |
| "step": 5000 |
| }, |
| { |
| "epoch": 0.09, |
| "learning_rate": 4.764975894963586e-05, |
| "loss": 0.6037, |
| "step": 5500 |
| }, |
| { |
| "epoch": 0.1, |
| "learning_rate": 4.7436061818306155e-05, |
| "loss": 0.5954, |
| "step": 6000 |
| }, |
| { |
| "epoch": 0.11, |
| "learning_rate": 4.722236468697644e-05, |
| "loss": 0.6028, |
| "step": 6500 |
| }, |
| { |
| "epoch": 0.12, |
| "learning_rate": 4.7008667555646735e-05, |
| "loss": 0.6213, |
| "step": 7000 |
| }, |
| { |
| "epoch": 0.13, |
| "learning_rate": 4.679497042431703e-05, |
| "loss": 0.6029, |
| "step": 7500 |
| }, |
| { |
| "epoch": 0.14, |
| "learning_rate": 4.6581273292987314e-05, |
| "loss": 0.6167, |
| "step": 8000 |
| }, |
| { |
| "epoch": 0.15, |
| "learning_rate": 4.636757616165761e-05, |
| "loss": 0.59, |
| "step": 8500 |
| }, |
| { |
| "epoch": 0.15, |
| "learning_rate": 4.61538790303279e-05, |
| "loss": 0.6135, |
| "step": 9000 |
| }, |
| { |
| "epoch": 0.16, |
| "learning_rate": 4.5940181898998194e-05, |
| "loss": 0.602, |
| "step": 9500 |
| }, |
| { |
| "epoch": 0.17, |
| "learning_rate": 4.572691216193114e-05, |
| "loss": 0.5866, |
| "step": 10000 |
| }, |
| { |
| "epoch": 0.18, |
| "learning_rate": 4.551321503060143e-05, |
| "loss": 0.5948, |
| "step": 10500 |
| }, |
| { |
| "epoch": 0.19, |
| "learning_rate": 4.529951789927172e-05, |
| "loss": 0.6134, |
| "step": 11000 |
| }, |
| { |
| "epoch": 0.2, |
| "learning_rate": 4.5085820767942014e-05, |
| "loss": 0.5979, |
| "step": 11500 |
| }, |
| { |
| "epoch": 0.21, |
| "learning_rate": 4.487212363661231e-05, |
| "loss": 0.5943, |
| "step": 12000 |
| }, |
| { |
| "epoch": 0.21, |
| "learning_rate": 4.4658426505282594e-05, |
| "loss": 0.593, |
| "step": 12500 |
| }, |
| { |
| "epoch": 0.22, |
| "learning_rate": 4.444472937395289e-05, |
| "loss": 0.5928, |
| "step": 13000 |
| }, |
| { |
| "epoch": 0.23, |
| "learning_rate": 4.423103224262318e-05, |
| "loss": 0.6005, |
| "step": 13500 |
| }, |
| { |
| "epoch": 0.24, |
| "learning_rate": 4.401776250555613e-05, |
| "loss": 0.5964, |
| "step": 14000 |
| }, |
| { |
| "epoch": 0.25, |
| "learning_rate": 4.3804065374226414e-05, |
| "loss": 0.6005, |
| "step": 14500 |
| }, |
| { |
| "epoch": 0.26, |
| "learning_rate": 4.359079563715937e-05, |
| "loss": 0.5931, |
| "step": 15000 |
| }, |
| { |
| "epoch": 0.26, |
| "learning_rate": 4.337709850582966e-05, |
| "loss": 0.5928, |
| "step": 15500 |
| }, |
| { |
| "epoch": 0.27, |
| "learning_rate": 4.316340137449995e-05, |
| "loss": 0.5994, |
| "step": 16000 |
| }, |
| { |
| "epoch": 0.28, |
| "learning_rate": 4.294970424317024e-05, |
| "loss": 0.587, |
| "step": 16500 |
| }, |
| { |
| "epoch": 0.29, |
| "learning_rate": 4.2736007111840534e-05, |
| "loss": 0.6039, |
| "step": 17000 |
| }, |
| { |
| "epoch": 0.3, |
| "learning_rate": 4.252230998051082e-05, |
| "loss": 0.5943, |
| "step": 17500 |
| }, |
| { |
| "epoch": 0.31, |
| "learning_rate": 4.230861284918112e-05, |
| "loss": 0.5791, |
| "step": 18000 |
| }, |
| { |
| "epoch": 0.32, |
| "learning_rate": 4.209534311211407e-05, |
| "loss": 0.5816, |
| "step": 18500 |
| }, |
| { |
| "epoch": 0.32, |
| "learning_rate": 4.1881645980784354e-05, |
| "loss": 0.5771, |
| "step": 19000 |
| }, |
| { |
| "epoch": 0.33, |
| "learning_rate": 4.166794884945465e-05, |
| "loss": 0.5955, |
| "step": 19500 |
| }, |
| { |
| "epoch": 0.34, |
| "learning_rate": 4.1454251718124934e-05, |
| "loss": 0.5802, |
| "step": 20000 |
| }, |
| { |
| "epoch": 0.35, |
| "learning_rate": 4.124055458679523e-05, |
| "loss": 0.5824, |
| "step": 20500 |
| }, |
| { |
| "epoch": 0.36, |
| "learning_rate": 4.102728484972818e-05, |
| "loss": 0.5897, |
| "step": 21000 |
| }, |
| { |
| "epoch": 0.37, |
| "learning_rate": 4.0813587718398475e-05, |
| "loss": 0.5822, |
| "step": 21500 |
| }, |
| { |
| "epoch": 0.38, |
| "learning_rate": 4.059989058706876e-05, |
| "loss": 0.5936, |
| "step": 22000 |
| }, |
| { |
| "epoch": 0.38, |
| "learning_rate": 4.0386193455739054e-05, |
| "loss": 0.582, |
| "step": 22500 |
| }, |
| { |
| "epoch": 0.39, |
| "learning_rate": 4.017249632440934e-05, |
| "loss": 0.593, |
| "step": 23000 |
| }, |
| { |
| "epoch": 0.4, |
| "learning_rate": 3.9958799193079634e-05, |
| "loss": 0.5881, |
| "step": 23500 |
| }, |
| { |
| "epoch": 0.41, |
| "learning_rate": 3.974552945601259e-05, |
| "loss": 0.5841, |
| "step": 24000 |
| }, |
| { |
| "epoch": 0.42, |
| "learning_rate": 3.9531832324682874e-05, |
| "loss": 0.581, |
| "step": 24500 |
| }, |
| { |
| "epoch": 0.43, |
| "learning_rate": 3.931813519335317e-05, |
| "loss": 0.5957, |
| "step": 25000 |
| }, |
| { |
| "epoch": 0.44, |
| "learning_rate": 3.9104438062023454e-05, |
| "loss": 0.5887, |
| "step": 25500 |
| }, |
| { |
| "epoch": 0.44, |
| "learning_rate": 3.889074093069375e-05, |
| "loss": 0.5718, |
| "step": 26000 |
| }, |
| { |
| "epoch": 0.45, |
| "learning_rate": 3.867704379936404e-05, |
| "loss": 0.5952, |
| "step": 26500 |
| }, |
| { |
| "epoch": 0.46, |
| "learning_rate": 3.8463774062296995e-05, |
| "loss": 0.5812, |
| "step": 27000 |
| }, |
| { |
| "epoch": 0.47, |
| "learning_rate": 3.825007693096728e-05, |
| "loss": 0.5717, |
| "step": 27500 |
| }, |
| { |
| "epoch": 0.48, |
| "learning_rate": 3.8036379799637574e-05, |
| "loss": 0.5775, |
| "step": 28000 |
| }, |
| { |
| "epoch": 0.49, |
| "learning_rate": 3.782268266830786e-05, |
| "loss": 0.5737, |
| "step": 28500 |
| }, |
| { |
| "epoch": 0.5, |
| "learning_rate": 3.7609412931240815e-05, |
| "loss": 0.5904, |
| "step": 29000 |
| }, |
| { |
| "epoch": 0.5, |
| "learning_rate": 3.73957157999111e-05, |
| "loss": 0.5953, |
| "step": 29500 |
| }, |
| { |
| "epoch": 0.51, |
| "learning_rate": 3.7182018668581394e-05, |
| "loss": 0.5902, |
| "step": 30000 |
| }, |
| { |
| "epoch": 0.52, |
| "learning_rate": 3.696832153725169e-05, |
| "loss": 0.5769, |
| "step": 30500 |
| }, |
| { |
| "epoch": 0.53, |
| "learning_rate": 3.6755051800184635e-05, |
| "loss": 0.5876, |
| "step": 31000 |
| }, |
| { |
| "epoch": 0.54, |
| "learning_rate": 3.654135466885493e-05, |
| "loss": 0.5965, |
| "step": 31500 |
| }, |
| { |
| "epoch": 0.55, |
| "learning_rate": 3.6327657537525215e-05, |
| "loss": 0.5748, |
| "step": 32000 |
| }, |
| { |
| "epoch": 0.56, |
| "learning_rate": 3.611396040619551e-05, |
| "loss": 0.568, |
| "step": 32500 |
| }, |
| { |
| "epoch": 0.56, |
| "learning_rate": 3.5900263274865794e-05, |
| "loss": 0.5812, |
| "step": 33000 |
| }, |
| { |
| "epoch": 0.57, |
| "learning_rate": 3.5686993537798755e-05, |
| "loss": 0.5785, |
| "step": 33500 |
| }, |
| { |
| "epoch": 0.58, |
| "learning_rate": 3.547329640646904e-05, |
| "loss": 0.5764, |
| "step": 34000 |
| }, |
| { |
| "epoch": 0.59, |
| "learning_rate": 3.5259599275139335e-05, |
| "loss": 0.5766, |
| "step": 34500 |
| }, |
| { |
| "epoch": 0.6, |
| "learning_rate": 3.504590214380962e-05, |
| "loss": 0.5783, |
| "step": 35000 |
| }, |
| { |
| "epoch": 0.61, |
| "learning_rate": 3.4832205012479914e-05, |
| "loss": 0.5794, |
| "step": 35500 |
| }, |
| { |
| "epoch": 0.62, |
| "learning_rate": 3.46185078811502e-05, |
| "loss": 0.5741, |
| "step": 36000 |
| }, |
| { |
| "epoch": 0.62, |
| "learning_rate": 3.44048107498205e-05, |
| "loss": 0.5766, |
| "step": 36500 |
| }, |
| { |
| "epoch": 0.63, |
| "learning_rate": 3.419154101275345e-05, |
| "loss": 0.5694, |
| "step": 37000 |
| }, |
| { |
| "epoch": 0.64, |
| "learning_rate": 3.3977843881423735e-05, |
| "loss": 0.5848, |
| "step": 37500 |
| }, |
| { |
| "epoch": 0.65, |
| "learning_rate": 3.376414675009403e-05, |
| "loss": 0.577, |
| "step": 38000 |
| }, |
| { |
| "epoch": 0.66, |
| "learning_rate": 3.355044961876432e-05, |
| "loss": 0.5645, |
| "step": 38500 |
| }, |
| { |
| "epoch": 0.67, |
| "learning_rate": 3.333675248743461e-05, |
| "loss": 0.5869, |
| "step": 39000 |
| }, |
| { |
| "epoch": 0.68, |
| "learning_rate": 3.31230553561049e-05, |
| "loss": 0.5811, |
| "step": 39500 |
| }, |
| { |
| "epoch": 0.68, |
| "learning_rate": 3.2909358224775194e-05, |
| "loss": 0.5753, |
| "step": 40000 |
| }, |
| { |
| "epoch": 0.69, |
| "learning_rate": 3.269566109344549e-05, |
| "loss": 0.5787, |
| "step": 40500 |
| }, |
| { |
| "epoch": 0.7, |
| "learning_rate": 3.2482391356378434e-05, |
| "loss": 0.5823, |
| "step": 41000 |
| }, |
| { |
| "epoch": 0.71, |
| "learning_rate": 3.226869422504872e-05, |
| "loss": 0.5708, |
| "step": 41500 |
| }, |
| { |
| "epoch": 0.72, |
| "learning_rate": 3.2054997093719014e-05, |
| "loss": 0.5662, |
| "step": 42000 |
| }, |
| { |
| "epoch": 0.73, |
| "learning_rate": 3.18412999623893e-05, |
| "loss": 0.5862, |
| "step": 42500 |
| }, |
| { |
| "epoch": 0.74, |
| "learning_rate": 3.1628457619584915e-05, |
| "loss": 0.5784, |
| "step": 43000 |
| }, |
| { |
| "epoch": 0.74, |
| "learning_rate": 3.141476048825521e-05, |
| "loss": 0.5724, |
| "step": 43500 |
| }, |
| { |
| "epoch": 0.75, |
| "learning_rate": 3.1201063356925495e-05, |
| "loss": 0.5693, |
| "step": 44000 |
| }, |
| { |
| "epoch": 0.76, |
| "learning_rate": 3.098736622559579e-05, |
| "loss": 0.579, |
| "step": 44500 |
| }, |
| { |
| "epoch": 0.77, |
| "learning_rate": 3.0773669094266075e-05, |
| "loss": 0.5761, |
| "step": 45000 |
| }, |
| { |
| "epoch": 0.78, |
| "learning_rate": 3.0559971962936375e-05, |
| "loss": 0.5687, |
| "step": 45500 |
| }, |
| { |
| "epoch": 0.79, |
| "learning_rate": 3.0346274831606665e-05, |
| "loss": 0.5803, |
| "step": 46000 |
| }, |
| { |
| "epoch": 0.79, |
| "learning_rate": 3.0133005094539612e-05, |
| "loss": 0.5859, |
| "step": 46500 |
| }, |
| { |
| "epoch": 0.8, |
| "learning_rate": 2.9919307963209902e-05, |
| "loss": 0.5792, |
| "step": 47000 |
| }, |
| { |
| "epoch": 0.81, |
| "learning_rate": 2.970561083188019e-05, |
| "loss": 0.5788, |
| "step": 47500 |
| }, |
| { |
| "epoch": 0.82, |
| "learning_rate": 2.949191370055048e-05, |
| "loss": 0.565, |
| "step": 48000 |
| }, |
| { |
| "epoch": 0.83, |
| "learning_rate": 2.9278216569220778e-05, |
| "loss": 0.5819, |
| "step": 48500 |
| }, |
| { |
| "epoch": 0.84, |
| "learning_rate": 2.9064519437891068e-05, |
| "loss": 0.5763, |
| "step": 49000 |
| }, |
| { |
| "epoch": 0.85, |
| "learning_rate": 2.8850822306561358e-05, |
| "loss": 0.5721, |
| "step": 49500 |
| }, |
| { |
| "epoch": 0.85, |
| "learning_rate": 2.863712517523165e-05, |
| "loss": 0.5793, |
| "step": 50000 |
| }, |
| { |
| "epoch": 0.86, |
| "learning_rate": 2.8423855438164598e-05, |
| "loss": 0.5522, |
| "step": 50500 |
| }, |
| { |
| "epoch": 0.87, |
| "learning_rate": 2.821058570109755e-05, |
| "loss": 0.5694, |
| "step": 51000 |
| }, |
| { |
| "epoch": 0.88, |
| "learning_rate": 2.7996888569767842e-05, |
| "loss": 0.5727, |
| "step": 51500 |
| }, |
| { |
| "epoch": 0.89, |
| "learning_rate": 2.7783191438438132e-05, |
| "loss": 0.5761, |
| "step": 52000 |
| }, |
| { |
| "epoch": 0.9, |
| "learning_rate": 2.7569494307108422e-05, |
| "loss": 0.5789, |
| "step": 52500 |
| }, |
| { |
| "epoch": 0.91, |
| "learning_rate": 2.7355797175778715e-05, |
| "loss": 0.5737, |
| "step": 53000 |
| }, |
| { |
| "epoch": 0.91, |
| "learning_rate": 2.7142100044449005e-05, |
| "loss": 0.5587, |
| "step": 53500 |
| }, |
| { |
| "epoch": 0.92, |
| "learning_rate": 2.6928402913119295e-05, |
| "loss": 0.5739, |
| "step": 54000 |
| }, |
| { |
| "epoch": 0.93, |
| "learning_rate": 2.6714705781789585e-05, |
| "loss": 0.5672, |
| "step": 54500 |
| }, |
| { |
| "epoch": 0.94, |
| "learning_rate": 2.650100865045988e-05, |
| "loss": 0.5648, |
| "step": 55000 |
| }, |
| { |
| "epoch": 0.95, |
| "learning_rate": 2.628731151913017e-05, |
| "loss": 0.561, |
| "step": 55500 |
| }, |
| { |
| "epoch": 0.96, |
| "learning_rate": 2.6074041782063118e-05, |
| "loss": 0.5646, |
| "step": 56000 |
| }, |
| { |
| "epoch": 0.97, |
| "learning_rate": 2.5860344650733408e-05, |
| "loss": 0.5705, |
| "step": 56500 |
| }, |
| { |
| "epoch": 0.97, |
| "learning_rate": 2.5646647519403698e-05, |
| "loss": 0.5759, |
| "step": 57000 |
| }, |
| { |
| "epoch": 0.98, |
| "learning_rate": 2.5432950388073988e-05, |
| "loss": 0.5605, |
| "step": 57500 |
| }, |
| { |
| "epoch": 0.99, |
| "learning_rate": 2.5219680651006945e-05, |
| "loss": 0.5759, |
| "step": 58000 |
| }, |
| { |
| "epoch": 1.0, |
| "learning_rate": 2.5005983519677235e-05, |
| "loss": 0.5665, |
| "step": 58500 |
| }, |
| { |
| "epoch": 1.01, |
| "learning_rate": 2.4792286388347525e-05, |
| "loss": 0.5534, |
| "step": 59000 |
| }, |
| { |
| "epoch": 1.02, |
| "learning_rate": 2.4578589257017815e-05, |
| "loss": 0.5565, |
| "step": 59500 |
| }, |
| { |
| "epoch": 1.03, |
| "learning_rate": 2.4364892125688105e-05, |
| "loss": 0.5519, |
| "step": 60000 |
| }, |
| { |
| "epoch": 1.03, |
| "learning_rate": 2.4151194994358398e-05, |
| "loss": 0.57, |
| "step": 60500 |
| }, |
| { |
| "epoch": 1.04, |
| "learning_rate": 2.3937497863028688e-05, |
| "loss": 0.5479, |
| "step": 61000 |
| }, |
| { |
| "epoch": 1.05, |
| "learning_rate": 2.3723800731698977e-05, |
| "loss": 0.5518, |
| "step": 61500 |
| }, |
| { |
| "epoch": 1.06, |
| "learning_rate": 2.3510530994631928e-05, |
| "loss": 0.5466, |
| "step": 62000 |
| }, |
| { |
| "epoch": 1.07, |
| "learning_rate": 2.329726125756488e-05, |
| "loss": 0.5427, |
| "step": 62500 |
| }, |
| { |
| "epoch": 1.08, |
| "learning_rate": 2.3083564126235172e-05, |
| "loss": 0.5503, |
| "step": 63000 |
| }, |
| { |
| "epoch": 1.09, |
| "learning_rate": 2.2869866994905462e-05, |
| "loss": 0.557, |
| "step": 63500 |
| }, |
| { |
| "epoch": 1.09, |
| "learning_rate": 2.265616986357575e-05, |
| "loss": 0.5615, |
| "step": 64000 |
| }, |
| { |
| "epoch": 1.1, |
| "learning_rate": 2.2442900126508702e-05, |
| "loss": 0.5535, |
| "step": 64500 |
| }, |
| { |
| "epoch": 1.11, |
| "learning_rate": 2.2229202995178992e-05, |
| "loss": 0.5594, |
| "step": 65000 |
| }, |
| { |
| "epoch": 1.12, |
| "learning_rate": 2.2015505863849285e-05, |
| "loss": 0.5467, |
| "step": 65500 |
| }, |
| { |
| "epoch": 1.13, |
| "learning_rate": 2.1802236126782236e-05, |
| "loss": 0.5505, |
| "step": 66000 |
| } |
| ], |
| "logging_steps": 500, |
| "max_steps": 116988, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 2, |
| "save_steps": 2000, |
| "total_flos": 9.807079511383081e+17, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|