| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 2.0, |
| "eval_steps": 500, |
| "global_step": 820, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.012195121951219513, |
| "grad_norm": 1.184765100479126, |
| "learning_rate": 1.1650485436893204e-06, |
| "loss": 1.2972, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.024390243902439025, |
| "grad_norm": 0.9755042195320129, |
| "learning_rate": 2.621359223300971e-06, |
| "loss": 1.3303, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.036585365853658534, |
| "grad_norm": 0.8106493949890137, |
| "learning_rate": 4.0776699029126215e-06, |
| "loss": 1.2908, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.04878048780487805, |
| "grad_norm": 0.6635167002677917, |
| "learning_rate": 5.533980582524272e-06, |
| "loss": 1.2413, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.06097560975609756, |
| "grad_norm": 0.5763493180274963, |
| "learning_rate": 6.990291262135923e-06, |
| "loss": 1.2209, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.07317073170731707, |
| "grad_norm": 0.49019569158554077, |
| "learning_rate": 8.446601941747573e-06, |
| "loss": 1.265, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.08536585365853659, |
| "grad_norm": 0.5669918060302734, |
| "learning_rate": 9.902912621359224e-06, |
| "loss": 1.2391, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.0975609756097561, |
| "grad_norm": 0.5410548448562622, |
| "learning_rate": 1.1359223300970873e-05, |
| "loss": 1.2694, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.10975609756097561, |
| "grad_norm": 0.489359587430954, |
| "learning_rate": 1.2815533980582524e-05, |
| "loss": 1.2039, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.12195121951219512, |
| "grad_norm": 0.6161925792694092, |
| "learning_rate": 1.4271844660194176e-05, |
| "loss": 1.1988, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.13414634146341464, |
| "grad_norm": 0.5076792240142822, |
| "learning_rate": 1.5728155339805827e-05, |
| "loss": 1.128, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.14634146341463414, |
| "grad_norm": 0.46671441197395325, |
| "learning_rate": 1.7184466019417476e-05, |
| "loss": 1.1529, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.15853658536585366, |
| "grad_norm": 0.511127233505249, |
| "learning_rate": 1.8640776699029126e-05, |
| "loss": 1.1286, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.17073170731707318, |
| "grad_norm": 0.5400530695915222, |
| "learning_rate": 2.009708737864078e-05, |
| "loss": 1.1555, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.18292682926829268, |
| "grad_norm": 0.5800668597221375, |
| "learning_rate": 2.1553398058252428e-05, |
| "loss": 1.1579, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.1951219512195122, |
| "grad_norm": 0.5733080506324768, |
| "learning_rate": 2.3009708737864078e-05, |
| "loss": 1.2164, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.2073170731707317, |
| "grad_norm": 0.5242522954940796, |
| "learning_rate": 2.4466019417475727e-05, |
| "loss": 1.1548, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.21951219512195122, |
| "grad_norm": 0.5192080140113831, |
| "learning_rate": 2.5922330097087377e-05, |
| "loss": 1.1127, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.23170731707317074, |
| "grad_norm": 0.6592035889625549, |
| "learning_rate": 2.737864077669903e-05, |
| "loss": 1.1187, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.24390243902439024, |
| "grad_norm": 0.5072050094604492, |
| "learning_rate": 2.8834951456310683e-05, |
| "loss": 1.1057, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.25609756097560976, |
| "grad_norm": 0.530845582485199, |
| "learning_rate": 2.9999980473292916e-05, |
| "loss": 1.0943, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.2682926829268293, |
| "grad_norm": 0.5525540113449097, |
| "learning_rate": 2.999929704388311e-05, |
| "loss": 1.0717, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.2804878048780488, |
| "grad_norm": 0.5346278548240662, |
| "learning_rate": 2.999763732995757e-05, |
| "loss": 1.0477, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.2926829268292683, |
| "grad_norm": 0.634670615196228, |
| "learning_rate": 2.999500143954488e-05, |
| "loss": 1.0227, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.3048780487804878, |
| "grad_norm": 0.6343799829483032, |
| "learning_rate": 2.9991389544211688e-05, |
| "loss": 1.0359, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.3170731707317073, |
| "grad_norm": 0.6552476286888123, |
| "learning_rate": 2.998680187905151e-05, |
| "loss": 1.0654, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.32926829268292684, |
| "grad_norm": 0.7905762195587158, |
| "learning_rate": 2.9981238742669432e-05, |
| "loss": 1.0706, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.34146341463414637, |
| "grad_norm": 0.6723063588142395, |
| "learning_rate": 2.997470049716269e-05, |
| "loss": 1.0488, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.35365853658536583, |
| "grad_norm": 0.6034112572669983, |
| "learning_rate": 2.9967187568097084e-05, |
| "loss": 1.0492, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.36585365853658536, |
| "grad_norm": 0.5780961513519287, |
| "learning_rate": 2.995870044447928e-05, |
| "loss": 0.9935, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.3780487804878049, |
| "grad_norm": 0.6658617854118347, |
| "learning_rate": 2.9949239678724995e-05, |
| "loss": 1.046, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.3902439024390244, |
| "grad_norm": 0.6619867086410522, |
| "learning_rate": 2.993880588662303e-05, |
| "loss": 0.9965, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.4024390243902439, |
| "grad_norm": 0.7782027721405029, |
| "learning_rate": 2.9927399747295178e-05, |
| "loss": 0.9886, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.4146341463414634, |
| "grad_norm": 0.6822337508201599, |
| "learning_rate": 2.9915022003152058e-05, |
| "loss": 0.9674, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.4268292682926829, |
| "grad_norm": 0.5955122113227844, |
| "learning_rate": 2.990167345984475e-05, |
| "loss": 1.0358, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.43902439024390244, |
| "grad_norm": 0.7010889649391174, |
| "learning_rate": 2.988735498621238e-05, |
| "loss": 0.9443, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.45121951219512196, |
| "grad_norm": 0.651681661605835, |
| "learning_rate": 2.9872067514225565e-05, |
| "loss": 0.9144, |
| "step": 185 |
| }, |
| { |
| "epoch": 0.4634146341463415, |
| "grad_norm": 0.7048653364181519, |
| "learning_rate": 2.985581203892575e-05, |
| "loss": 0.9196, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.47560975609756095, |
| "grad_norm": 0.7128203511238098, |
| "learning_rate": 2.9838589618360434e-05, |
| "loss": 0.9225, |
| "step": 195 |
| }, |
| { |
| "epoch": 0.4878048780487805, |
| "grad_norm": 0.7625283002853394, |
| "learning_rate": 2.9820401373514333e-05, |
| "loss": 0.9858, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.5, |
| "grad_norm": 0.7980352640151978, |
| "learning_rate": 2.9801248488236364e-05, |
| "loss": 0.9252, |
| "step": 205 |
| }, |
| { |
| "epoch": 0.5121951219512195, |
| "grad_norm": 0.6355564594268799, |
| "learning_rate": 2.978113220916265e-05, |
| "loss": 0.9262, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.524390243902439, |
| "grad_norm": 0.7836682200431824, |
| "learning_rate": 2.9760053845635324e-05, |
| "loss": 0.8971, |
| "step": 215 |
| }, |
| { |
| "epoch": 0.5365853658536586, |
| "grad_norm": 0.7409992218017578, |
| "learning_rate": 2.973801476961736e-05, |
| "loss": 0.8825, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.5487804878048781, |
| "grad_norm": 0.6563221216201782, |
| "learning_rate": 2.9715016415603217e-05, |
| "loss": 0.8866, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.5609756097560976, |
| "grad_norm": 0.6461041569709778, |
| "learning_rate": 2.9691060280525506e-05, |
| "loss": 0.8716, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.573170731707317, |
| "grad_norm": 0.6985337734222412, |
| "learning_rate": 2.9666147923657552e-05, |
| "loss": 0.8699, |
| "step": 235 |
| }, |
| { |
| "epoch": 0.5853658536585366, |
| "grad_norm": 0.8816692233085632, |
| "learning_rate": 2.9640280966511898e-05, |
| "loss": 0.8506, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.5975609756097561, |
| "grad_norm": 0.7365345358848572, |
| "learning_rate": 2.961346109273476e-05, |
| "loss": 0.9093, |
| "step": 245 |
| }, |
| { |
| "epoch": 0.6097560975609756, |
| "grad_norm": 0.8134493827819824, |
| "learning_rate": 2.9585690047996444e-05, |
| "loss": 0.7961, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.6219512195121951, |
| "grad_norm": 0.7763360738754272, |
| "learning_rate": 2.955696963987773e-05, |
| "loss": 0.8697, |
| "step": 255 |
| }, |
| { |
| "epoch": 0.6341463414634146, |
| "grad_norm": 0.7731444239616394, |
| "learning_rate": 2.9527301737752196e-05, |
| "loss": 0.8777, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.6463414634146342, |
| "grad_norm": 0.9481649398803711, |
| "learning_rate": 2.9496688272664577e-05, |
| "loss": 0.8514, |
| "step": 265 |
| }, |
| { |
| "epoch": 0.6585365853658537, |
| "grad_norm": 0.827903151512146, |
| "learning_rate": 2.9465131237205043e-05, |
| "loss": 0.908, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.6707317073170732, |
| "grad_norm": 0.8994118571281433, |
| "learning_rate": 2.943263268537953e-05, |
| "loss": 0.7373, |
| "step": 275 |
| }, |
| { |
| "epoch": 0.6829268292682927, |
| "grad_norm": 0.8990964889526367, |
| "learning_rate": 2.9399194732476032e-05, |
| "loss": 0.82, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.6951219512195121, |
| "grad_norm": 0.8939409255981445, |
| "learning_rate": 2.9364819554926912e-05, |
| "loss": 0.8102, |
| "step": 285 |
| }, |
| { |
| "epoch": 0.7073170731707317, |
| "grad_norm": 0.9013278484344482, |
| "learning_rate": 2.932950939016727e-05, |
| "loss": 0.7655, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.7195121951219512, |
| "grad_norm": 0.7653906941413879, |
| "learning_rate": 2.929326653648929e-05, |
| "loss": 0.7864, |
| "step": 295 |
| }, |
| { |
| "epoch": 0.7317073170731707, |
| "grad_norm": 0.9510161876678467, |
| "learning_rate": 2.9256093352892635e-05, |
| "loss": 0.7845, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.7439024390243902, |
| "grad_norm": 0.9910411834716797, |
| "learning_rate": 2.921799225893094e-05, |
| "loss": 0.8002, |
| "step": 305 |
| }, |
| { |
| "epoch": 0.7560975609756098, |
| "grad_norm": 0.8927392959594727, |
| "learning_rate": 2.917896573455429e-05, |
| "loss": 0.7693, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.7682926829268293, |
| "grad_norm": 0.8335655927658081, |
| "learning_rate": 2.913901631994783e-05, |
| "loss": 0.7593, |
| "step": 315 |
| }, |
| { |
| "epoch": 0.7804878048780488, |
| "grad_norm": 0.9775434136390686, |
| "learning_rate": 2.909814661536641e-05, |
| "loss": 0.7371, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.7926829268292683, |
| "grad_norm": 0.8624247908592224, |
| "learning_rate": 2.9056359280965345e-05, |
| "loss": 0.8004, |
| "step": 325 |
| }, |
| { |
| "epoch": 0.8048780487804879, |
| "grad_norm": 1.1061346530914307, |
| "learning_rate": 2.901365703662726e-05, |
| "loss": 0.7445, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.8170731707317073, |
| "grad_norm": 0.8500701189041138, |
| "learning_rate": 2.897004266178508e-05, |
| "loss": 0.7437, |
| "step": 335 |
| }, |
| { |
| "epoch": 0.8292682926829268, |
| "grad_norm": 0.9865604639053345, |
| "learning_rate": 2.892551899524109e-05, |
| "loss": 0.701, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.8414634146341463, |
| "grad_norm": 0.9660606384277344, |
| "learning_rate": 2.888008893498219e-05, |
| "loss": 0.7524, |
| "step": 345 |
| }, |
| { |
| "epoch": 0.8536585365853658, |
| "grad_norm": 0.9367020726203918, |
| "learning_rate": 2.8833755437991242e-05, |
| "loss": 0.7132, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.8658536585365854, |
| "grad_norm": 1.0071465969085693, |
| "learning_rate": 2.878652152005462e-05, |
| "loss": 0.7564, |
| "step": 355 |
| }, |
| { |
| "epoch": 0.8780487804878049, |
| "grad_norm": 1.0385185480117798, |
| "learning_rate": 2.8738390255565925e-05, |
| "loss": 0.6634, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.8902439024390244, |
| "grad_norm": 0.9005842208862305, |
| "learning_rate": 2.8689364777325847e-05, |
| "loss": 0.6951, |
| "step": 365 |
| }, |
| { |
| "epoch": 0.9024390243902439, |
| "grad_norm": 0.9195963144302368, |
| "learning_rate": 2.863944827633828e-05, |
| "loss": 0.7013, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.9146341463414634, |
| "grad_norm": 0.9449793100357056, |
| "learning_rate": 2.8588644001602623e-05, |
| "loss": 0.6901, |
| "step": 375 |
| }, |
| { |
| "epoch": 0.926829268292683, |
| "grad_norm": 0.9308488368988037, |
| "learning_rate": 2.8536955259902283e-05, |
| "loss": 0.7064, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.9390243902439024, |
| "grad_norm": 0.8601620197296143, |
| "learning_rate": 2.8484385415589485e-05, |
| "loss": 0.6913, |
| "step": 385 |
| }, |
| { |
| "epoch": 0.9512195121951219, |
| "grad_norm": 0.8974642157554626, |
| "learning_rate": 2.843093789036624e-05, |
| "loss": 0.6692, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.9634146341463414, |
| "grad_norm": 0.890214741230011, |
| "learning_rate": 2.837661616306166e-05, |
| "loss": 0.6839, |
| "step": 395 |
| }, |
| { |
| "epoch": 0.975609756097561, |
| "grad_norm": 0.8833136558532715, |
| "learning_rate": 2.8321423769405538e-05, |
| "loss": 0.742, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.9878048780487805, |
| "grad_norm": 1.1129741668701172, |
| "learning_rate": 2.8265364301798175e-05, |
| "loss": 0.6696, |
| "step": 405 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 0.957071840763092, |
| "learning_rate": 2.8208441409076576e-05, |
| "loss": 0.6266, |
| "step": 410 |
| }, |
| { |
| "epoch": 1.0121951219512195, |
| "grad_norm": 1.0043803453445435, |
| "learning_rate": 2.8150658796276964e-05, |
| "loss": 0.6141, |
| "step": 415 |
| }, |
| { |
| "epoch": 1.024390243902439, |
| "grad_norm": 1.0592577457427979, |
| "learning_rate": 2.8092020224393603e-05, |
| "loss": 0.5861, |
| "step": 420 |
| }, |
| { |
| "epoch": 1.0365853658536586, |
| "grad_norm": 1.1829588413238525, |
| "learning_rate": 2.8032529510134008e-05, |
| "loss": 0.5592, |
| "step": 425 |
| }, |
| { |
| "epoch": 1.048780487804878, |
| "grad_norm": 1.0533852577209473, |
| "learning_rate": 2.797219052567052e-05, |
| "loss": 0.6068, |
| "step": 430 |
| }, |
| { |
| "epoch": 1.0609756097560976, |
| "grad_norm": 0.9526946544647217, |
| "learning_rate": 2.791100719838827e-05, |
| "loss": 0.5667, |
| "step": 435 |
| }, |
| { |
| "epoch": 1.0731707317073171, |
| "grad_norm": 1.1012799739837646, |
| "learning_rate": 2.7848983510629554e-05, |
| "loss": 0.6413, |
| "step": 440 |
| }, |
| { |
| "epoch": 1.0853658536585367, |
| "grad_norm": 1.601194143295288, |
| "learning_rate": 2.7786123499434642e-05, |
| "loss": 0.5612, |
| "step": 445 |
| }, |
| { |
| "epoch": 1.0975609756097562, |
| "grad_norm": 0.9721382856369019, |
| "learning_rate": 2.772243125627896e-05, |
| "loss": 0.5407, |
| "step": 450 |
| }, |
| { |
| "epoch": 1.1097560975609757, |
| "grad_norm": 1.0757757425308228, |
| "learning_rate": 2.7657910926806858e-05, |
| "loss": 0.505, |
| "step": 455 |
| }, |
| { |
| "epoch": 1.1219512195121952, |
| "grad_norm": 1.2296000719070435, |
| "learning_rate": 2.7592566710561704e-05, |
| "loss": 0.559, |
| "step": 460 |
| }, |
| { |
| "epoch": 1.1341463414634148, |
| "grad_norm": 0.9386566877365112, |
| "learning_rate": 2.7526402860712597e-05, |
| "loss": 0.5553, |
| "step": 465 |
| }, |
| { |
| "epoch": 1.146341463414634, |
| "grad_norm": 1.1901946067810059, |
| "learning_rate": 2.7459423683777484e-05, |
| "loss": 0.5844, |
| "step": 470 |
| }, |
| { |
| "epoch": 1.1585365853658536, |
| "grad_norm": 1.1464612483978271, |
| "learning_rate": 2.73916335393429e-05, |
| "loss": 0.5492, |
| "step": 475 |
| }, |
| { |
| "epoch": 1.170731707317073, |
| "grad_norm": 1.005790114402771, |
| "learning_rate": 2.732303683978018e-05, |
| "loss": 0.553, |
| "step": 480 |
| }, |
| { |
| "epoch": 1.1829268292682926, |
| "grad_norm": 1.2415597438812256, |
| "learning_rate": 2.725363804995827e-05, |
| "loss": 0.5452, |
| "step": 485 |
| }, |
| { |
| "epoch": 1.1951219512195121, |
| "grad_norm": 1.0382238626480103, |
| "learning_rate": 2.7183441686953122e-05, |
| "loss": 0.5413, |
| "step": 490 |
| }, |
| { |
| "epoch": 1.2073170731707317, |
| "grad_norm": 1.0460742712020874, |
| "learning_rate": 2.711245231975367e-05, |
| "loss": 0.5109, |
| "step": 495 |
| }, |
| { |
| "epoch": 1.2195121951219512, |
| "grad_norm": 0.9935807585716248, |
| "learning_rate": 2.7040674568964454e-05, |
| "loss": 0.5572, |
| "step": 500 |
| }, |
| { |
| "epoch": 1.2317073170731707, |
| "grad_norm": 1.0190331935882568, |
| "learning_rate": 2.6968113106504865e-05, |
| "loss": 0.5075, |
| "step": 505 |
| }, |
| { |
| "epoch": 1.2439024390243902, |
| "grad_norm": 0.9684451818466187, |
| "learning_rate": 2.689477265530504e-05, |
| "loss": 0.5295, |
| "step": 510 |
| }, |
| { |
| "epoch": 1.2560975609756098, |
| "grad_norm": 1.0213184356689453, |
| "learning_rate": 2.68206579889985e-05, |
| "loss": 0.5119, |
| "step": 515 |
| }, |
| { |
| "epoch": 1.2682926829268293, |
| "grad_norm": 0.9365750551223755, |
| "learning_rate": 2.674577393161137e-05, |
| "loss": 0.5111, |
| "step": 520 |
| }, |
| { |
| "epoch": 1.2804878048780488, |
| "grad_norm": 1.0988898277282715, |
| "learning_rate": 2.667012535724847e-05, |
| "loss": 0.4914, |
| "step": 525 |
| }, |
| { |
| "epoch": 1.2926829268292683, |
| "grad_norm": 1.0466548204421997, |
| "learning_rate": 2.6593717189775986e-05, |
| "loss": 0.4429, |
| "step": 530 |
| }, |
| { |
| "epoch": 1.3048780487804879, |
| "grad_norm": 1.141400694847107, |
| "learning_rate": 2.6516554402501048e-05, |
| "loss": 0.4968, |
| "step": 535 |
| }, |
| { |
| "epoch": 1.3170731707317074, |
| "grad_norm": 0.9664748311042786, |
| "learning_rate": 2.6438642017847983e-05, |
| "loss": 0.4933, |
| "step": 540 |
| }, |
| { |
| "epoch": 1.329268292682927, |
| "grad_norm": 1.1237977743148804, |
| "learning_rate": 2.635998510703143e-05, |
| "loss": 0.4822, |
| "step": 545 |
| }, |
| { |
| "epoch": 1.3414634146341464, |
| "grad_norm": 1.1644905805587769, |
| "learning_rate": 2.6280588789726257e-05, |
| "loss": 0.5107, |
| "step": 550 |
| }, |
| { |
| "epoch": 1.3536585365853657, |
| "grad_norm": 1.103337287902832, |
| "learning_rate": 2.6200458233734316e-05, |
| "loss": 0.4696, |
| "step": 555 |
| }, |
| { |
| "epoch": 1.3658536585365852, |
| "grad_norm": 1.017683744430542, |
| "learning_rate": 2.611959865464811e-05, |
| "loss": 0.4719, |
| "step": 560 |
| }, |
| { |
| "epoch": 1.3780487804878048, |
| "grad_norm": 1.2212704420089722, |
| "learning_rate": 2.6038015315511284e-05, |
| "loss": 0.4932, |
| "step": 565 |
| }, |
| { |
| "epoch": 1.3902439024390243, |
| "grad_norm": 1.2683626413345337, |
| "learning_rate": 2.5955713526476068e-05, |
| "loss": 0.4822, |
| "step": 570 |
| }, |
| { |
| "epoch": 1.4024390243902438, |
| "grad_norm": 0.9710960984230042, |
| "learning_rate": 2.587269864445766e-05, |
| "loss": 0.4526, |
| "step": 575 |
| }, |
| { |
| "epoch": 1.4146341463414633, |
| "grad_norm": 0.9223999977111816, |
| "learning_rate": 2.578897607278555e-05, |
| "loss": 0.473, |
| "step": 580 |
| }, |
| { |
| "epoch": 1.4268292682926829, |
| "grad_norm": 1.176310658454895, |
| "learning_rate": 2.5704551260851786e-05, |
| "loss": 0.4636, |
| "step": 585 |
| }, |
| { |
| "epoch": 1.4390243902439024, |
| "grad_norm": 1.1532803773880005, |
| "learning_rate": 2.5619429703756355e-05, |
| "loss": 0.4549, |
| "step": 590 |
| }, |
| { |
| "epoch": 1.451219512195122, |
| "grad_norm": 0.9891874194145203, |
| "learning_rate": 2.5533616941949426e-05, |
| "loss": 0.4909, |
| "step": 595 |
| }, |
| { |
| "epoch": 1.4634146341463414, |
| "grad_norm": 1.1279683113098145, |
| "learning_rate": 2.5447118560870803e-05, |
| "loss": 0.4825, |
| "step": 600 |
| }, |
| { |
| "epoch": 1.475609756097561, |
| "grad_norm": 0.9699839353561401, |
| "learning_rate": 2.5359940190586337e-05, |
| "loss": 0.4478, |
| "step": 605 |
| }, |
| { |
| "epoch": 1.4878048780487805, |
| "grad_norm": 0.970900297164917, |
| "learning_rate": 2.5272087505421482e-05, |
| "loss": 0.5015, |
| "step": 610 |
| }, |
| { |
| "epoch": 1.5, |
| "grad_norm": 1.0708060264587402, |
| "learning_rate": 2.5183566223591966e-05, |
| "loss": 0.4883, |
| "step": 615 |
| }, |
| { |
| "epoch": 1.5121951219512195, |
| "grad_norm": 1.0761215686798096, |
| "learning_rate": 2.509438210683158e-05, |
| "loss": 0.4213, |
| "step": 620 |
| }, |
| { |
| "epoch": 1.524390243902439, |
| "grad_norm": 1.0248342752456665, |
| "learning_rate": 2.5004540960017185e-05, |
| "loss": 0.4311, |
| "step": 625 |
| }, |
| { |
| "epoch": 1.5365853658536586, |
| "grad_norm": 1.1032685041427612, |
| "learning_rate": 2.4914048630790857e-05, |
| "loss": 0.4608, |
| "step": 630 |
| }, |
| { |
| "epoch": 1.548780487804878, |
| "grad_norm": 1.1839253902435303, |
| "learning_rate": 2.482291100917928e-05, |
| "loss": 0.4604, |
| "step": 635 |
| }, |
| { |
| "epoch": 1.5609756097560976, |
| "grad_norm": 1.2353657484054565, |
| "learning_rate": 2.473113402721036e-05, |
| "loss": 0.4499, |
| "step": 640 |
| }, |
| { |
| "epoch": 1.5731707317073171, |
| "grad_norm": 1.177707552909851, |
| "learning_rate": 2.4638723658527152e-05, |
| "loss": 0.4172, |
| "step": 645 |
| }, |
| { |
| "epoch": 1.5853658536585367, |
| "grad_norm": 1.0010558366775513, |
| "learning_rate": 2.4545685917998994e-05, |
| "loss": 0.4417, |
| "step": 650 |
| }, |
| { |
| "epoch": 1.5975609756097562, |
| "grad_norm": 1.1996454000473022, |
| "learning_rate": 2.4452026861330054e-05, |
| "loss": 0.4586, |
| "step": 655 |
| }, |
| { |
| "epoch": 1.6097560975609757, |
| "grad_norm": 1.1821777820587158, |
| "learning_rate": 2.4357752584665123e-05, |
| "loss": 0.4828, |
| "step": 660 |
| }, |
| { |
| "epoch": 1.6219512195121952, |
| "grad_norm": 1.1734565496444702, |
| "learning_rate": 2.426286922419288e-05, |
| "loss": 0.4167, |
| "step": 665 |
| }, |
| { |
| "epoch": 1.6341463414634148, |
| "grad_norm": 0.9794202446937561, |
| "learning_rate": 2.416738295574645e-05, |
| "loss": 0.3879, |
| "step": 670 |
| }, |
| { |
| "epoch": 1.6463414634146343, |
| "grad_norm": 1.191879391670227, |
| "learning_rate": 2.4071299994401462e-05, |
| "loss": 0.4004, |
| "step": 675 |
| }, |
| { |
| "epoch": 1.6585365853658538, |
| "grad_norm": 1.0724924802780151, |
| "learning_rate": 2.397462659407148e-05, |
| "loss": 0.4385, |
| "step": 680 |
| }, |
| { |
| "epoch": 1.6707317073170733, |
| "grad_norm": 1.1953388452529907, |
| "learning_rate": 2.387736904710098e-05, |
| "loss": 0.4674, |
| "step": 685 |
| }, |
| { |
| "epoch": 1.6829268292682928, |
| "grad_norm": 1.089429497718811, |
| "learning_rate": 2.377953368385577e-05, |
| "loss": 0.4316, |
| "step": 690 |
| }, |
| { |
| "epoch": 1.6951219512195121, |
| "grad_norm": 1.1765036582946777, |
| "learning_rate": 2.368112687231095e-05, |
| "loss": 0.4237, |
| "step": 695 |
| }, |
| { |
| "epoch": 1.7073170731707317, |
| "grad_norm": 1.0054248571395874, |
| "learning_rate": 2.3582155017636455e-05, |
| "loss": 0.4639, |
| "step": 700 |
| }, |
| { |
| "epoch": 1.7195121951219512, |
| "grad_norm": 1.0205795764923096, |
| "learning_rate": 2.3482624561780128e-05, |
| "loss": 0.3966, |
| "step": 705 |
| }, |
| { |
| "epoch": 1.7317073170731707, |
| "grad_norm": 1.1560744047164917, |
| "learning_rate": 2.3382541983048425e-05, |
| "loss": 0.3839, |
| "step": 710 |
| }, |
| { |
| "epoch": 1.7439024390243902, |
| "grad_norm": 1.079033613204956, |
| "learning_rate": 2.3281913795684757e-05, |
| "loss": 0.3966, |
| "step": 715 |
| }, |
| { |
| "epoch": 1.7560975609756098, |
| "grad_norm": 1.0057902336120605, |
| "learning_rate": 2.318074654944547e-05, |
| "loss": 0.4309, |
| "step": 720 |
| }, |
| { |
| "epoch": 1.7682926829268293, |
| "grad_norm": 1.2559707164764404, |
| "learning_rate": 2.307904682917357e-05, |
| "loss": 0.4111, |
| "step": 725 |
| }, |
| { |
| "epoch": 1.7804878048780488, |
| "grad_norm": 1.2369558811187744, |
| "learning_rate": 2.2976821254370077e-05, |
| "loss": 0.4051, |
| "step": 730 |
| }, |
| { |
| "epoch": 1.7926829268292683, |
| "grad_norm": 0.968970000743866, |
| "learning_rate": 2.2874076478763187e-05, |
| "loss": 0.3857, |
| "step": 735 |
| }, |
| { |
| "epoch": 1.8048780487804879, |
| "grad_norm": 1.0662639141082764, |
| "learning_rate": 2.277081918987522e-05, |
| "loss": 0.3677, |
| "step": 740 |
| }, |
| { |
| "epoch": 1.8170731707317072, |
| "grad_norm": 1.0119315385818481, |
| "learning_rate": 2.266705610858727e-05, |
| "loss": 0.3887, |
| "step": 745 |
| }, |
| { |
| "epoch": 1.8292682926829267, |
| "grad_norm": 1.0961999893188477, |
| "learning_rate": 2.2562793988701838e-05, |
| "loss": 0.4059, |
| "step": 750 |
| }, |
| { |
| "epoch": 1.8414634146341462, |
| "grad_norm": 1.0293822288513184, |
| "learning_rate": 2.245803961650316e-05, |
| "loss": 0.3473, |
| "step": 755 |
| }, |
| { |
| "epoch": 1.8536585365853657, |
| "grad_norm": 1.076990008354187, |
| "learning_rate": 2.2352799810315534e-05, |
| "loss": 0.3493, |
| "step": 760 |
| }, |
| { |
| "epoch": 1.8658536585365852, |
| "grad_norm": 1.1946849822998047, |
| "learning_rate": 2.2247081420059526e-05, |
| "loss": 0.3746, |
| "step": 765 |
| }, |
| { |
| "epoch": 1.8780487804878048, |
| "grad_norm": 1.3098491430282593, |
| "learning_rate": 2.2140891326806106e-05, |
| "loss": 0.3596, |
| "step": 770 |
| }, |
| { |
| "epoch": 1.8902439024390243, |
| "grad_norm": 1.1211118698120117, |
| "learning_rate": 2.203423644232877e-05, |
| "loss": 0.3514, |
| "step": 775 |
| }, |
| { |
| "epoch": 1.9024390243902438, |
| "grad_norm": 1.046966314315796, |
| "learning_rate": 2.1927123708653658e-05, |
| "loss": 0.3954, |
| "step": 780 |
| }, |
| { |
| "epoch": 1.9146341463414633, |
| "grad_norm": 0.9153071045875549, |
| "learning_rate": 2.1819560097607717e-05, |
| "loss": 0.3866, |
| "step": 785 |
| }, |
| { |
| "epoch": 1.9268292682926829, |
| "grad_norm": 1.07267427444458, |
| "learning_rate": 2.1711552610364914e-05, |
| "loss": 0.39, |
| "step": 790 |
| }, |
| { |
| "epoch": 1.9390243902439024, |
| "grad_norm": 1.155359148979187, |
| "learning_rate": 2.1603108276990536e-05, |
| "loss": 0.3738, |
| "step": 795 |
| }, |
| { |
| "epoch": 1.951219512195122, |
| "grad_norm": 1.2188688516616821, |
| "learning_rate": 2.1494234155983595e-05, |
| "loss": 0.3394, |
| "step": 800 |
| }, |
| { |
| "epoch": 1.9634146341463414, |
| "grad_norm": 1.0880542993545532, |
| "learning_rate": 2.1384937333817413e-05, |
| "loss": 0.3525, |
| "step": 805 |
| }, |
| { |
| "epoch": 1.975609756097561, |
| "grad_norm": 1.1321754455566406, |
| "learning_rate": 2.1275224924478408e-05, |
| "loss": 0.324, |
| "step": 810 |
| }, |
| { |
| "epoch": 1.9878048780487805, |
| "grad_norm": 0.9325311779975891, |
| "learning_rate": 2.1165104069002983e-05, |
| "loss": 0.3555, |
| "step": 815 |
| }, |
| { |
| "epoch": 2.0, |
| "grad_norm": 0.9841716289520264, |
| "learning_rate": 2.1054581935012777e-05, |
| "loss": 0.3838, |
| "step": 820 |
| } |
| ], |
| "logging_steps": 5, |
| "max_steps": 2050, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 5, |
| "save_steps": 2000, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1.1934379878624788e+18, |
| "train_batch_size": 2, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|