| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 2.0, |
| "eval_steps": 500, |
| "global_step": 588, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.017006802721088437, |
| "grad_norm": 1.1845334768295288, |
| "learning_rate": 1.6216216216216219e-06, |
| "loss": 1.3293, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.034013605442176874, |
| "grad_norm": 1.0181540250778198, |
| "learning_rate": 3.648648648648649e-06, |
| "loss": 1.2783, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.05102040816326531, |
| "grad_norm": 0.6886288523674011, |
| "learning_rate": 5.675675675675676e-06, |
| "loss": 1.2906, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.06802721088435375, |
| "grad_norm": 0.6959572434425354, |
| "learning_rate": 7.702702702702703e-06, |
| "loss": 1.2818, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.08503401360544217, |
| "grad_norm": 0.6249067187309265, |
| "learning_rate": 9.72972972972973e-06, |
| "loss": 1.2667, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.10204081632653061, |
| "grad_norm": 0.5868192911148071, |
| "learning_rate": 1.1756756756756757e-05, |
| "loss": 1.2477, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.11904761904761904, |
| "grad_norm": 0.5181043148040771, |
| "learning_rate": 1.3783783783783784e-05, |
| "loss": 1.2207, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.1360544217687075, |
| "grad_norm": 0.4317700266838074, |
| "learning_rate": 1.5810810810810808e-05, |
| "loss": 1.21, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.15306122448979592, |
| "grad_norm": 0.4549165964126587, |
| "learning_rate": 1.783783783783784e-05, |
| "loss": 1.1368, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.17006802721088435, |
| "grad_norm": 0.5308559536933899, |
| "learning_rate": 1.9864864864864866e-05, |
| "loss": 1.1875, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.1870748299319728, |
| "grad_norm": 0.5514539480209351, |
| "learning_rate": 2.1891891891891892e-05, |
| "loss": 1.1593, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.20408163265306123, |
| "grad_norm": 0.46228182315826416, |
| "learning_rate": 2.3918918918918917e-05, |
| "loss": 1.1345, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.22108843537414966, |
| "grad_norm": 0.5649062395095825, |
| "learning_rate": 2.594594594594595e-05, |
| "loss": 1.1065, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.23809523809523808, |
| "grad_norm": 0.49446532130241394, |
| "learning_rate": 2.7972972972972975e-05, |
| "loss": 1.1328, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.25510204081632654, |
| "grad_norm": 0.5560067296028137, |
| "learning_rate": 3e-05, |
| "loss": 1.1697, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.272108843537415, |
| "grad_norm": 0.4814499616622925, |
| "learning_rate": 2.999905043303196e-05, |
| "loss": 1.1659, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.2891156462585034, |
| "grad_norm": 0.5074186325073242, |
| "learning_rate": 2.999620185235149e-05, |
| "loss": 1.0924, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.30612244897959184, |
| "grad_norm": 0.592327892780304, |
| "learning_rate": 2.9991454618614338e-05, |
| "loss": 1.0793, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.3231292517006803, |
| "grad_norm": 0.5456128120422363, |
| "learning_rate": 2.998480933286269e-05, |
| "loss": 1.0836, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.3401360544217687, |
| "grad_norm": 0.6163952946662903, |
| "learning_rate": 2.9976266836449057e-05, |
| "loss": 1.0816, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.35714285714285715, |
| "grad_norm": 0.5601843595504761, |
| "learning_rate": 2.9965828210929758e-05, |
| "loss": 1.091, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.3741496598639456, |
| "grad_norm": 0.5945920944213867, |
| "learning_rate": 2.9953494777927995e-05, |
| "loss": 1.0349, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.391156462585034, |
| "grad_norm": 0.5780587792396545, |
| "learning_rate": 2.993926809896651e-05, |
| "loss": 1.0389, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.40816326530612246, |
| "grad_norm": 0.6240988969802856, |
| "learning_rate": 2.9923149975269885e-05, |
| "loss": 1.0521, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.42517006802721086, |
| "grad_norm": 0.6076016426086426, |
| "learning_rate": 2.990514244753651e-05, |
| "loss": 0.9944, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.4421768707482993, |
| "grad_norm": 0.6623475551605225, |
| "learning_rate": 2.988524779568018e-05, |
| "loss": 0.935, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.45918367346938777, |
| "grad_norm": 0.6462083458900452, |
| "learning_rate": 2.9863468538541466e-05, |
| "loss": 1.0017, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.47619047619047616, |
| "grad_norm": 0.6660473346710205, |
| "learning_rate": 2.9839807433568787e-05, |
| "loss": 0.8931, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.4931972789115646, |
| "grad_norm": 0.680072546005249, |
| "learning_rate": 2.9814267476469304e-05, |
| "loss": 0.9786, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.5102040816326531, |
| "grad_norm": 0.6932852268218994, |
| "learning_rate": 2.9786851900829633e-05, |
| "loss": 0.9335, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.5272108843537415, |
| "grad_norm": 0.7095908522605896, |
| "learning_rate": 2.9757564177706448e-05, |
| "loss": 0.9278, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.54421768707483, |
| "grad_norm": 0.7165976762771606, |
| "learning_rate": 2.972640801518701e-05, |
| "loss": 0.9029, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.5612244897959183, |
| "grad_norm": 0.745604395866394, |
| "learning_rate": 2.969338735791968e-05, |
| "loss": 0.8675, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.5782312925170068, |
| "grad_norm": 0.6811717748641968, |
| "learning_rate": 2.9658506386614525e-05, |
| "loss": 0.9125, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.5952380952380952, |
| "grad_norm": 0.7211567163467407, |
| "learning_rate": 2.962176951751396e-05, |
| "loss": 0.8937, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.6122448979591837, |
| "grad_norm": 0.7217546701431274, |
| "learning_rate": 2.958318140183364e-05, |
| "loss": 0.9474, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.6292517006802721, |
| "grad_norm": 0.7711513042449951, |
| "learning_rate": 2.9542746925173566e-05, |
| "loss": 0.882, |
| "step": 185 |
| }, |
| { |
| "epoch": 0.6462585034013606, |
| "grad_norm": 0.8753324747085571, |
| "learning_rate": 2.9500471206899528e-05, |
| "loss": 0.874, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.6632653061224489, |
| "grad_norm": 0.873315691947937, |
| "learning_rate": 2.945635959949494e-05, |
| "loss": 0.8252, |
| "step": 195 |
| }, |
| { |
| "epoch": 0.6802721088435374, |
| "grad_norm": 1.0285780429840088, |
| "learning_rate": 2.9410417687883173e-05, |
| "loss": 0.885, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.6972789115646258, |
| "grad_norm": 0.8351210951805115, |
| "learning_rate": 2.936265128872046e-05, |
| "loss": 0.8756, |
| "step": 205 |
| }, |
| { |
| "epoch": 0.7142857142857143, |
| "grad_norm": 0.8613854646682739, |
| "learning_rate": 2.931306644965944e-05, |
| "loss": 0.8489, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.7312925170068028, |
| "grad_norm": 0.9348616003990173, |
| "learning_rate": 2.9261669448583492e-05, |
| "loss": 0.8657, |
| "step": 215 |
| }, |
| { |
| "epoch": 0.7482993197278912, |
| "grad_norm": 1.0146417617797852, |
| "learning_rate": 2.9208466792811875e-05, |
| "loss": 0.815, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.7653061224489796, |
| "grad_norm": 0.9285673499107361, |
| "learning_rate": 2.915346521827586e-05, |
| "loss": 0.8164, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.782312925170068, |
| "grad_norm": 0.951054573059082, |
| "learning_rate": 2.9096671688665893e-05, |
| "loss": 0.8515, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.7993197278911565, |
| "grad_norm": 0.9739980697631836, |
| "learning_rate": 2.9038093394549946e-05, |
| "loss": 0.7521, |
| "step": 235 |
| }, |
| { |
| "epoch": 0.8163265306122449, |
| "grad_norm": 1.0542875528335571, |
| "learning_rate": 2.8977737752463094e-05, |
| "loss": 0.8105, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.8333333333333334, |
| "grad_norm": 0.9150990843772888, |
| "learning_rate": 2.891561240396855e-05, |
| "loss": 0.8032, |
| "step": 245 |
| }, |
| { |
| "epoch": 0.8503401360544217, |
| "grad_norm": 1.0160540342330933, |
| "learning_rate": 2.8851725214690155e-05, |
| "loss": 0.7096, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.8673469387755102, |
| "grad_norm": 0.9452092051506042, |
| "learning_rate": 2.8786084273316524e-05, |
| "loss": 0.6958, |
| "step": 255 |
| }, |
| { |
| "epoch": 0.8843537414965986, |
| "grad_norm": 0.8861579895019531, |
| "learning_rate": 2.8718697890576944e-05, |
| "loss": 0.7085, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.9013605442176871, |
| "grad_norm": 1.0629994869232178, |
| "learning_rate": 2.864957459818918e-05, |
| "loss": 0.6941, |
| "step": 265 |
| }, |
| { |
| "epoch": 0.9183673469387755, |
| "grad_norm": 0.9347429275512695, |
| "learning_rate": 2.8578723147779237e-05, |
| "loss": 0.689, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.935374149659864, |
| "grad_norm": 1.0432939529418945, |
| "learning_rate": 2.850615250977339e-05, |
| "loss": 0.7042, |
| "step": 275 |
| }, |
| { |
| "epoch": 0.9523809523809523, |
| "grad_norm": 0.948538601398468, |
| "learning_rate": 2.843187187226239e-05, |
| "loss": 0.7113, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.9693877551020408, |
| "grad_norm": 1.053505778312683, |
| "learning_rate": 2.835589063983821e-05, |
| "loss": 0.7173, |
| "step": 285 |
| }, |
| { |
| "epoch": 0.9863945578231292, |
| "grad_norm": 0.9371920824050903, |
| "learning_rate": 2.827821843240331e-05, |
| "loss": 0.6272, |
| "step": 290 |
| }, |
| { |
| "epoch": 1.0034013605442176, |
| "grad_norm": 1.1553608179092407, |
| "learning_rate": 2.8198865083952694e-05, |
| "loss": 0.6671, |
| "step": 295 |
| }, |
| { |
| "epoch": 1.0204081632653061, |
| "grad_norm": 1.0597134828567505, |
| "learning_rate": 2.811784064132883e-05, |
| "loss": 0.6007, |
| "step": 300 |
| }, |
| { |
| "epoch": 1.0374149659863945, |
| "grad_norm": 1.1430370807647705, |
| "learning_rate": 2.803515536294963e-05, |
| "loss": 0.6015, |
| "step": 305 |
| }, |
| { |
| "epoch": 1.054421768707483, |
| "grad_norm": 1.0705440044403076, |
| "learning_rate": 2.795081971750963e-05, |
| "loss": 0.5987, |
| "step": 310 |
| }, |
| { |
| "epoch": 1.0714285714285714, |
| "grad_norm": 1.1100271940231323, |
| "learning_rate": 2.786484438265459e-05, |
| "loss": 0.6058, |
| "step": 315 |
| }, |
| { |
| "epoch": 1.08843537414966, |
| "grad_norm": 1.1145716905593872, |
| "learning_rate": 2.7777240243629578e-05, |
| "loss": 0.5264, |
| "step": 320 |
| }, |
| { |
| "epoch": 1.1054421768707483, |
| "grad_norm": 1.0852850675582886, |
| "learning_rate": 2.7688018391900826e-05, |
| "loss": 0.549, |
| "step": 325 |
| }, |
| { |
| "epoch": 1.1224489795918366, |
| "grad_norm": 1.1767306327819824, |
| "learning_rate": 2.7597190123751422e-05, |
| "loss": 0.5997, |
| "step": 330 |
| }, |
| { |
| "epoch": 1.1394557823129252, |
| "grad_norm": 1.2128592729568481, |
| "learning_rate": 2.750476693885113e-05, |
| "loss": 0.5681, |
| "step": 335 |
| }, |
| { |
| "epoch": 1.1564625850340136, |
| "grad_norm": 1.313896894454956, |
| "learning_rate": 2.7410760538800408e-05, |
| "loss": 0.5698, |
| "step": 340 |
| }, |
| { |
| "epoch": 1.1734693877551021, |
| "grad_norm": 1.1703464984893799, |
| "learning_rate": 2.7315182825648895e-05, |
| "loss": 0.569, |
| "step": 345 |
| }, |
| { |
| "epoch": 1.1904761904761905, |
| "grad_norm": 1.1567600965499878, |
| "learning_rate": 2.7218045900388504e-05, |
| "loss": 0.5139, |
| "step": 350 |
| }, |
| { |
| "epoch": 1.2074829931972788, |
| "grad_norm": 1.097703456878662, |
| "learning_rate": 2.7119362061421303e-05, |
| "loss": 0.5093, |
| "step": 355 |
| }, |
| { |
| "epoch": 1.2244897959183674, |
| "grad_norm": 1.0932475328445435, |
| "learning_rate": 2.7019143803002465e-05, |
| "loss": 0.5731, |
| "step": 360 |
| }, |
| { |
| "epoch": 1.2414965986394557, |
| "grad_norm": 1.2174867391586304, |
| "learning_rate": 2.6917403813658364e-05, |
| "loss": 0.5077, |
| "step": 365 |
| }, |
| { |
| "epoch": 1.2585034013605443, |
| "grad_norm": 1.0711535215377808, |
| "learning_rate": 2.6814154974580092e-05, |
| "loss": 0.5205, |
| "step": 370 |
| }, |
| { |
| "epoch": 1.2755102040816326, |
| "grad_norm": 1.1222914457321167, |
| "learning_rate": 2.67094103579926e-05, |
| "loss": 0.5386, |
| "step": 375 |
| }, |
| { |
| "epoch": 1.2925170068027212, |
| "grad_norm": 1.1327167749404907, |
| "learning_rate": 2.6603183225499608e-05, |
| "loss": 0.5446, |
| "step": 380 |
| }, |
| { |
| "epoch": 1.3095238095238095, |
| "grad_norm": 1.2905243635177612, |
| "learning_rate": 2.6495487026404607e-05, |
| "loss": 0.5009, |
| "step": 385 |
| }, |
| { |
| "epoch": 1.3265306122448979, |
| "grad_norm": 1.059859037399292, |
| "learning_rate": 2.6386335396008033e-05, |
| "loss": 0.4772, |
| "step": 390 |
| }, |
| { |
| "epoch": 1.3435374149659864, |
| "grad_norm": 1.329107403755188, |
| "learning_rate": 2.6275742153880907e-05, |
| "loss": 0.5027, |
| "step": 395 |
| }, |
| { |
| "epoch": 1.3605442176870748, |
| "grad_norm": 1.1252979040145874, |
| "learning_rate": 2.6163721302115184e-05, |
| "loss": 0.4714, |
| "step": 400 |
| }, |
| { |
| "epoch": 1.3775510204081631, |
| "grad_norm": 1.1349155902862549, |
| "learning_rate": 2.6050287023550936e-05, |
| "loss": 0.4851, |
| "step": 405 |
| }, |
| { |
| "epoch": 1.3945578231292517, |
| "grad_norm": 1.0614500045776367, |
| "learning_rate": 2.59354536799807e-05, |
| "loss": 0.4893, |
| "step": 410 |
| }, |
| { |
| "epoch": 1.4115646258503403, |
| "grad_norm": 1.2005356550216675, |
| "learning_rate": 2.5819235810331115e-05, |
| "loss": 0.5233, |
| "step": 415 |
| }, |
| { |
| "epoch": 1.4285714285714286, |
| "grad_norm": 1.0654675960540771, |
| "learning_rate": 2.5701648128822205e-05, |
| "loss": 0.4876, |
| "step": 420 |
| }, |
| { |
| "epoch": 1.445578231292517, |
| "grad_norm": 1.3152744770050049, |
| "learning_rate": 2.55827055231044e-05, |
| "loss": 0.5113, |
| "step": 425 |
| }, |
| { |
| "epoch": 1.4625850340136055, |
| "grad_norm": 1.0852030515670776, |
| "learning_rate": 2.5462423052373628e-05, |
| "loss": 0.4794, |
| "step": 430 |
| }, |
| { |
| "epoch": 1.4795918367346939, |
| "grad_norm": 1.1766146421432495, |
| "learning_rate": 2.534081594546469e-05, |
| "loss": 0.4488, |
| "step": 435 |
| }, |
| { |
| "epoch": 1.4965986394557822, |
| "grad_norm": 1.2981992959976196, |
| "learning_rate": 2.5217899598923162e-05, |
| "loss": 0.4567, |
| "step": 440 |
| }, |
| { |
| "epoch": 1.5136054421768708, |
| "grad_norm": 1.0443862676620483, |
| "learning_rate": 2.5093689575056045e-05, |
| "loss": 0.4448, |
| "step": 445 |
| }, |
| { |
| "epoch": 1.5306122448979593, |
| "grad_norm": 1.130210280418396, |
| "learning_rate": 2.4968201599961445e-05, |
| "loss": 0.439, |
| "step": 450 |
| }, |
| { |
| "epoch": 1.5476190476190477, |
| "grad_norm": 1.3261239528656006, |
| "learning_rate": 2.4841451561537496e-05, |
| "loss": 0.4577, |
| "step": 455 |
| }, |
| { |
| "epoch": 1.564625850340136, |
| "grad_norm": 1.1579346656799316, |
| "learning_rate": 2.471345550747082e-05, |
| "loss": 0.4915, |
| "step": 460 |
| }, |
| { |
| "epoch": 1.5816326530612246, |
| "grad_norm": 1.1824404001235962, |
| "learning_rate": 2.4584229643204755e-05, |
| "loss": 0.4404, |
| "step": 465 |
| }, |
| { |
| "epoch": 1.598639455782313, |
| "grad_norm": 1.175586223602295, |
| "learning_rate": 2.4453790329887578e-05, |
| "loss": 0.4649, |
| "step": 470 |
| }, |
| { |
| "epoch": 1.6156462585034013, |
| "grad_norm": 1.5084452629089355, |
| "learning_rate": 2.4322154082301065e-05, |
| "loss": 0.4425, |
| "step": 475 |
| }, |
| { |
| "epoch": 1.6326530612244898, |
| "grad_norm": 1.2946665287017822, |
| "learning_rate": 2.4189337566769545e-05, |
| "loss": 0.47, |
| "step": 480 |
| }, |
| { |
| "epoch": 1.6496598639455784, |
| "grad_norm": 1.0975319147109985, |
| "learning_rate": 2.4055357599049807e-05, |
| "loss": 0.4747, |
| "step": 485 |
| }, |
| { |
| "epoch": 1.6666666666666665, |
| "grad_norm": 1.0334219932556152, |
| "learning_rate": 2.392023114220209e-05, |
| "loss": 0.4076, |
| "step": 490 |
| }, |
| { |
| "epoch": 1.683673469387755, |
| "grad_norm": 1.2870817184448242, |
| "learning_rate": 2.378397530444238e-05, |
| "loss": 0.4198, |
| "step": 495 |
| }, |
| { |
| "epoch": 1.7006802721088436, |
| "grad_norm": 1.0723446607589722, |
| "learning_rate": 2.3646607336976375e-05, |
| "loss": 0.4536, |
| "step": 500 |
| }, |
| { |
| "epoch": 1.717687074829932, |
| "grad_norm": 1.3169423341751099, |
| "learning_rate": 2.3508144631815326e-05, |
| "loss": 0.4213, |
| "step": 505 |
| }, |
| { |
| "epoch": 1.7346938775510203, |
| "grad_norm": 1.2261208295822144, |
| "learning_rate": 2.3368604719574055e-05, |
| "loss": 0.4414, |
| "step": 510 |
| }, |
| { |
| "epoch": 1.751700680272109, |
| "grad_norm": 1.2695997953414917, |
| "learning_rate": 2.322800526725141e-05, |
| "loss": 0.3781, |
| "step": 515 |
| }, |
| { |
| "epoch": 1.7687074829931972, |
| "grad_norm": 1.421828269958496, |
| "learning_rate": 2.308636407599347e-05, |
| "loss": 0.4473, |
| "step": 520 |
| }, |
| { |
| "epoch": 1.7857142857142856, |
| "grad_norm": 1.2838345766067505, |
| "learning_rate": 2.2943699078839783e-05, |
| "loss": 0.4126, |
| "step": 525 |
| }, |
| { |
| "epoch": 1.8027210884353742, |
| "grad_norm": 1.290840983390808, |
| "learning_rate": 2.2800028338452853e-05, |
| "loss": 0.4035, |
| "step": 530 |
| }, |
| { |
| "epoch": 1.8197278911564627, |
| "grad_norm": 1.3847336769104004, |
| "learning_rate": 2.2655370044831253e-05, |
| "loss": 0.378, |
| "step": 535 |
| }, |
| { |
| "epoch": 1.836734693877551, |
| "grad_norm": 1.149859070777893, |
| "learning_rate": 2.2509742513006633e-05, |
| "loss": 0.3737, |
| "step": 540 |
| }, |
| { |
| "epoch": 1.8537414965986394, |
| "grad_norm": 1.0945329666137695, |
| "learning_rate": 2.2363164180724828e-05, |
| "loss": 0.4016, |
| "step": 545 |
| }, |
| { |
| "epoch": 1.870748299319728, |
| "grad_norm": 1.2481576204299927, |
| "learning_rate": 2.2215653606111515e-05, |
| "loss": 0.4054, |
| "step": 550 |
| }, |
| { |
| "epoch": 1.8877551020408163, |
| "grad_norm": 1.065503478050232, |
| "learning_rate": 2.2067229465322578e-05, |
| "loss": 0.3944, |
| "step": 555 |
| }, |
| { |
| "epoch": 1.9047619047619047, |
| "grad_norm": 1.2339270114898682, |
| "learning_rate": 2.1917910550179527e-05, |
| "loss": 0.3505, |
| "step": 560 |
| }, |
| { |
| "epoch": 1.9217687074829932, |
| "grad_norm": 1.1712156534194946, |
| "learning_rate": 2.1767715765790303e-05, |
| "loss": 0.3497, |
| "step": 565 |
| }, |
| { |
| "epoch": 1.9387755102040818, |
| "grad_norm": 1.4303723573684692, |
| "learning_rate": 2.16166641281557e-05, |
| "loss": 0.3772, |
| "step": 570 |
| }, |
| { |
| "epoch": 1.95578231292517, |
| "grad_norm": 1.123719573020935, |
| "learning_rate": 2.1464774761761805e-05, |
| "loss": 0.3558, |
| "step": 575 |
| }, |
| { |
| "epoch": 1.9727891156462585, |
| "grad_norm": 1.1126965284347534, |
| "learning_rate": 2.131206689715863e-05, |
| "loss": 0.3763, |
| "step": 580 |
| }, |
| { |
| "epoch": 1.989795918367347, |
| "grad_norm": 1.0419843196868896, |
| "learning_rate": 2.1158559868525374e-05, |
| "loss": 0.3507, |
| "step": 585 |
| } |
| ], |
| "logging_steps": 5, |
| "max_steps": 1470, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 5, |
| "save_steps": 2000, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 8.596227027789414e+17, |
| "train_batch_size": 2, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|