| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 2.0, |
| "eval_steps": 500, |
| "global_step": 722, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.013850415512465374, |
| "grad_norm": 1.1588482856750488, |
| "learning_rate": 1.3186813186813187e-06, |
| "loss": 1.3114, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.027700831024930747, |
| "grad_norm": 0.9670264720916748, |
| "learning_rate": 2.967032967032967e-06, |
| "loss": 1.2925, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.04155124653739612, |
| "grad_norm": 0.7651219964027405, |
| "learning_rate": 4.615384615384616e-06, |
| "loss": 1.3332, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.055401662049861494, |
| "grad_norm": 0.8753677606582642, |
| "learning_rate": 6.2637362637362645e-06, |
| "loss": 1.275, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.06925207756232687, |
| "grad_norm": 0.6068143844604492, |
| "learning_rate": 7.912087912087913e-06, |
| "loss": 1.2464, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.08310249307479224, |
| "grad_norm": 0.47094252705574036, |
| "learning_rate": 9.56043956043956e-06, |
| "loss": 1.2393, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.09695290858725762, |
| "grad_norm": 0.628544270992279, |
| "learning_rate": 1.120879120879121e-05, |
| "loss": 1.2263, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.11080332409972299, |
| "grad_norm": 0.5166797041893005, |
| "learning_rate": 1.2857142857142857e-05, |
| "loss": 1.2174, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.12465373961218837, |
| "grad_norm": 0.6519356966018677, |
| "learning_rate": 1.4505494505494506e-05, |
| "loss": 1.1587, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.13850415512465375, |
| "grad_norm": 0.4232255220413208, |
| "learning_rate": 1.6153846153846154e-05, |
| "loss": 1.21, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.1523545706371191, |
| "grad_norm": 0.43410590291023254, |
| "learning_rate": 1.78021978021978e-05, |
| "loss": 1.1839, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.16620498614958448, |
| "grad_norm": 0.44104069471359253, |
| "learning_rate": 1.9450549450549452e-05, |
| "loss": 1.1774, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.18005540166204986, |
| "grad_norm": 0.5313953757286072, |
| "learning_rate": 2.10989010989011e-05, |
| "loss": 1.1863, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.19390581717451524, |
| "grad_norm": 0.45843958854675293, |
| "learning_rate": 2.2747252747252748e-05, |
| "loss": 1.1288, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.2077562326869806, |
| "grad_norm": 0.5203155279159546, |
| "learning_rate": 2.4395604395604395e-05, |
| "loss": 1.1437, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.22160664819944598, |
| "grad_norm": 0.6226593255996704, |
| "learning_rate": 2.6043956043956046e-05, |
| "loss": 1.128, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.23545706371191136, |
| "grad_norm": 0.44689756631851196, |
| "learning_rate": 2.7692307692307694e-05, |
| "loss": 1.1561, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.24930747922437674, |
| "grad_norm": 0.5292763113975525, |
| "learning_rate": 2.934065934065934e-05, |
| "loss": 1.1052, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.2631578947368421, |
| "grad_norm": 0.534803569316864, |
| "learning_rate": 2.9999773232512234e-05, |
| "loss": 1.067, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.2770083102493075, |
| "grad_norm": 0.6602870225906372, |
| "learning_rate": 2.9998387456028022e-05, |
| "loss": 1.0975, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.29085872576177285, |
| "grad_norm": 0.5924050211906433, |
| "learning_rate": 2.999574200124419e-05, |
| "loss": 1.0857, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.3047091412742382, |
| "grad_norm": 0.6111776828765869, |
| "learning_rate": 2.999183709034608e-05, |
| "loss": 1.0855, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.3185595567867036, |
| "grad_norm": 0.8191071152687073, |
| "learning_rate": 2.998667305129772e-05, |
| "loss": 1.0163, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.33240997229916897, |
| "grad_norm": 0.52001953125, |
| "learning_rate": 2.9980250317814265e-05, |
| "loss": 1.0266, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.3462603878116344, |
| "grad_norm": 0.5309084057807922, |
| "learning_rate": 2.9972569429325575e-05, |
| "loss": 1.034, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.3601108033240997, |
| "grad_norm": 0.657573938369751, |
| "learning_rate": 2.99636310309309e-05, |
| "loss": 1.0783, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.3739612188365651, |
| "grad_norm": 0.6887980103492737, |
| "learning_rate": 2.995343587334471e-05, |
| "loss": 1.0554, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.3878116343490305, |
| "grad_norm": 0.6892874836921692, |
| "learning_rate": 2.994198481283364e-05, |
| "loss": 1.0056, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.40166204986149584, |
| "grad_norm": 0.6222119927406311, |
| "learning_rate": 2.992927881114458e-05, |
| "loss": 0.9764, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.4155124653739612, |
| "grad_norm": 0.6249090433120728, |
| "learning_rate": 2.991531893542389e-05, |
| "loss": 1.0027, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.4293628808864266, |
| "grad_norm": 0.6832554340362549, |
| "learning_rate": 2.990010635812777e-05, |
| "loss": 0.9389, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.44321329639889195, |
| "grad_norm": 0.6740282773971558, |
| "learning_rate": 2.9883642356923822e-05, |
| "loss": 0.93, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.45706371191135736, |
| "grad_norm": 0.6766054034233093, |
| "learning_rate": 2.986592831458369e-05, |
| "loss": 0.9612, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.4709141274238227, |
| "grad_norm": 0.6800487637519836, |
| "learning_rate": 2.984696571886697e-05, |
| "loss": 0.9498, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.48476454293628807, |
| "grad_norm": 0.6890595555305481, |
| "learning_rate": 2.9826756162396226e-05, |
| "loss": 0.8753, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.4986149584487535, |
| "grad_norm": 0.9015688300132751, |
| "learning_rate": 2.9805301342523238e-05, |
| "loss": 0.903, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.5124653739612188, |
| "grad_norm": 0.8003932237625122, |
| "learning_rate": 2.9782603061186458e-05, |
| "loss": 0.9057, |
| "step": 185 |
| }, |
| { |
| "epoch": 0.5263157894736842, |
| "grad_norm": 0.7204357981681824, |
| "learning_rate": 2.975866322475966e-05, |
| "loss": 0.8683, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.5401662049861495, |
| "grad_norm": 0.7696855068206787, |
| "learning_rate": 2.973348384389182e-05, |
| "loss": 0.8554, |
| "step": 195 |
| }, |
| { |
| "epoch": 0.554016620498615, |
| "grad_norm": 0.8631574511528015, |
| "learning_rate": 2.9707067033338257e-05, |
| "loss": 0.8987, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.5678670360110804, |
| "grad_norm": 0.8141370415687561, |
| "learning_rate": 2.967941501178302e-05, |
| "loss": 0.8551, |
| "step": 205 |
| }, |
| { |
| "epoch": 0.5817174515235457, |
| "grad_norm": 0.7609331607818604, |
| "learning_rate": 2.9650530101652548e-05, |
| "loss": 0.8327, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.5955678670360111, |
| "grad_norm": 0.7858366966247559, |
| "learning_rate": 2.9620414728920603e-05, |
| "loss": 0.8254, |
| "step": 215 |
| }, |
| { |
| "epoch": 0.6094182825484764, |
| "grad_norm": 0.8758653998374939, |
| "learning_rate": 2.9589071422904525e-05, |
| "loss": 0.833, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.6232686980609419, |
| "grad_norm": 0.7241504192352295, |
| "learning_rate": 2.9556502816052805e-05, |
| "loss": 0.8104, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.6371191135734072, |
| "grad_norm": 0.7951213717460632, |
| "learning_rate": 2.9522711643723997e-05, |
| "loss": 0.8526, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.6509695290858726, |
| "grad_norm": 0.8177146315574646, |
| "learning_rate": 2.9487700743956955e-05, |
| "loss": 0.7508, |
| "step": 235 |
| }, |
| { |
| "epoch": 0.6648199445983379, |
| "grad_norm": 0.7762987017631531, |
| "learning_rate": 2.945147305723251e-05, |
| "loss": 0.8302, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.6786703601108033, |
| "grad_norm": 0.9242251515388489, |
| "learning_rate": 2.9414031626226472e-05, |
| "loss": 0.7728, |
| "step": 245 |
| }, |
| { |
| "epoch": 0.6925207756232687, |
| "grad_norm": 0.8518105745315552, |
| "learning_rate": 2.9375379595554113e-05, |
| "loss": 0.7502, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.7063711911357341, |
| "grad_norm": 0.8808051943778992, |
| "learning_rate": 2.9335520211506033e-05, |
| "loss": 0.7775, |
| "step": 255 |
| }, |
| { |
| "epoch": 0.7202216066481995, |
| "grad_norm": 0.9569631218910217, |
| "learning_rate": 2.9294456821775524e-05, |
| "loss": 0.7802, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.7340720221606648, |
| "grad_norm": 1.034891963005066, |
| "learning_rate": 2.9252192875177415e-05, |
| "loss": 0.7108, |
| "step": 265 |
| }, |
| { |
| "epoch": 0.7479224376731302, |
| "grad_norm": 1.0093481540679932, |
| "learning_rate": 2.920873192135839e-05, |
| "loss": 0.7309, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.7617728531855956, |
| "grad_norm": 0.9309902787208557, |
| "learning_rate": 2.9164077610498886e-05, |
| "loss": 0.7685, |
| "step": 275 |
| }, |
| { |
| "epoch": 0.775623268698061, |
| "grad_norm": 1.176249623298645, |
| "learning_rate": 2.9118233693006503e-05, |
| "loss": 0.7397, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.7894736842105263, |
| "grad_norm": 0.9168936610221863, |
| "learning_rate": 2.907120401920103e-05, |
| "loss": 0.7056, |
| "step": 285 |
| }, |
| { |
| "epoch": 0.8033240997229917, |
| "grad_norm": 1.025532603263855, |
| "learning_rate": 2.9022992538991067e-05, |
| "loss": 0.7337, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.817174515235457, |
| "grad_norm": 0.8983604907989502, |
| "learning_rate": 2.897360330154227e-05, |
| "loss": 0.794, |
| "step": 295 |
| }, |
| { |
| "epoch": 0.8310249307479224, |
| "grad_norm": 0.8976964354515076, |
| "learning_rate": 2.892304045493728e-05, |
| "loss": 0.7545, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.8448753462603878, |
| "grad_norm": 1.0854963064193726, |
| "learning_rate": 2.8871308245827336e-05, |
| "loss": 0.6853, |
| "step": 305 |
| }, |
| { |
| "epoch": 0.8587257617728532, |
| "grad_norm": 0.9506078958511353, |
| "learning_rate": 2.88184110190756e-05, |
| "loss": 0.7233, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.8725761772853186, |
| "grad_norm": 0.9690659642219543, |
| "learning_rate": 2.8764353217392253e-05, |
| "loss": 0.7283, |
| "step": 315 |
| }, |
| { |
| "epoch": 0.8864265927977839, |
| "grad_norm": 1.0485140085220337, |
| "learning_rate": 2.870913938096136e-05, |
| "loss": 0.729, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.9002770083102493, |
| "grad_norm": 1.066326379776001, |
| "learning_rate": 2.865277414705955e-05, |
| "loss": 0.665, |
| "step": 325 |
| }, |
| { |
| "epoch": 0.9141274238227147, |
| "grad_norm": 0.9876983761787415, |
| "learning_rate": 2.8595262249666536e-05, |
| "loss": 0.6573, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.9279778393351801, |
| "grad_norm": 0.9734461903572083, |
| "learning_rate": 2.8536608519067532e-05, |
| "loss": 0.627, |
| "step": 335 |
| }, |
| { |
| "epoch": 0.9418282548476454, |
| "grad_norm": 0.9485730528831482, |
| "learning_rate": 2.8476817881447555e-05, |
| "loss": 0.7214, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.9556786703601108, |
| "grad_norm": 1.0153250694274902, |
| "learning_rate": 2.8415895358477702e-05, |
| "loss": 0.6599, |
| "step": 345 |
| }, |
| { |
| "epoch": 0.9695290858725761, |
| "grad_norm": 0.9076303243637085, |
| "learning_rate": 2.8353846066893382e-05, |
| "loss": 0.7043, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.9833795013850416, |
| "grad_norm": 1.0249273777008057, |
| "learning_rate": 2.8290675218064565e-05, |
| "loss": 0.6515, |
| "step": 355 |
| }, |
| { |
| "epoch": 0.997229916897507, |
| "grad_norm": 1.1760298013687134, |
| "learning_rate": 2.822638811755812e-05, |
| "loss": 0.6412, |
| "step": 360 |
| }, |
| { |
| "epoch": 1.0110803324099722, |
| "grad_norm": 0.9463870525360107, |
| "learning_rate": 2.8160990164692183e-05, |
| "loss": 0.6286, |
| "step": 365 |
| }, |
| { |
| "epoch": 1.0249307479224377, |
| "grad_norm": 0.9781592488288879, |
| "learning_rate": 2.8094486852082692e-05, |
| "loss": 0.6016, |
| "step": 370 |
| }, |
| { |
| "epoch": 1.0387811634349031, |
| "grad_norm": 0.9843015670776367, |
| "learning_rate": 2.802688376518209e-05, |
| "loss": 0.5999, |
| "step": 375 |
| }, |
| { |
| "epoch": 1.0526315789473684, |
| "grad_norm": 1.0795265436172485, |
| "learning_rate": 2.7958186581810195e-05, |
| "loss": 0.5607, |
| "step": 380 |
| }, |
| { |
| "epoch": 1.0664819944598338, |
| "grad_norm": 1.0073837041854858, |
| "learning_rate": 2.7888401071677342e-05, |
| "loss": 0.6205, |
| "step": 385 |
| }, |
| { |
| "epoch": 1.080332409972299, |
| "grad_norm": 1.0322895050048828, |
| "learning_rate": 2.7817533095899806e-05, |
| "loss": 0.5441, |
| "step": 390 |
| }, |
| { |
| "epoch": 1.0941828254847645, |
| "grad_norm": 1.0219157934188843, |
| "learning_rate": 2.774558860650752e-05, |
| "loss": 0.6074, |
| "step": 395 |
| }, |
| { |
| "epoch": 1.10803324099723, |
| "grad_norm": 1.0589349269866943, |
| "learning_rate": 2.767257364594421e-05, |
| "loss": 0.5376, |
| "step": 400 |
| }, |
| { |
| "epoch": 1.1218836565096952, |
| "grad_norm": 0.9884157180786133, |
| "learning_rate": 2.759849434655987e-05, |
| "loss": 0.5637, |
| "step": 405 |
| }, |
| { |
| "epoch": 1.1357340720221607, |
| "grad_norm": 0.979081928730011, |
| "learning_rate": 2.7523356930095764e-05, |
| "loss": 0.5114, |
| "step": 410 |
| }, |
| { |
| "epoch": 1.149584487534626, |
| "grad_norm": 1.188364863395691, |
| "learning_rate": 2.744716770716182e-05, |
| "loss": 0.5266, |
| "step": 415 |
| }, |
| { |
| "epoch": 1.1634349030470914, |
| "grad_norm": 1.0489892959594727, |
| "learning_rate": 2.736993307670667e-05, |
| "loss": 0.5176, |
| "step": 420 |
| }, |
| { |
| "epoch": 1.1772853185595569, |
| "grad_norm": 0.9718247652053833, |
| "learning_rate": 2.7291659525480192e-05, |
| "loss": 0.5572, |
| "step": 425 |
| }, |
| { |
| "epoch": 1.1911357340720221, |
| "grad_norm": 1.092788815498352, |
| "learning_rate": 2.72123536274887e-05, |
| "loss": 0.5158, |
| "step": 430 |
| }, |
| { |
| "epoch": 1.2049861495844876, |
| "grad_norm": 1.0184890031814575, |
| "learning_rate": 2.713202204344282e-05, |
| "loss": 0.4769, |
| "step": 435 |
| }, |
| { |
| "epoch": 1.2188365650969528, |
| "grad_norm": 1.0743873119354248, |
| "learning_rate": 2.705067152019808e-05, |
| "loss": 0.524, |
| "step": 440 |
| }, |
| { |
| "epoch": 1.2326869806094183, |
| "grad_norm": 1.143704891204834, |
| "learning_rate": 2.6968308890188235e-05, |
| "loss": 0.5016, |
| "step": 445 |
| }, |
| { |
| "epoch": 1.2465373961218837, |
| "grad_norm": 1.0439151525497437, |
| "learning_rate": 2.6884941070851443e-05, |
| "loss": 0.5137, |
| "step": 450 |
| }, |
| { |
| "epoch": 1.260387811634349, |
| "grad_norm": 1.0907272100448608, |
| "learning_rate": 2.680057506404929e-05, |
| "loss": 0.5049, |
| "step": 455 |
| }, |
| { |
| "epoch": 1.2742382271468145, |
| "grad_norm": 1.1267155408859253, |
| "learning_rate": 2.6715217955478705e-05, |
| "loss": 0.504, |
| "step": 460 |
| }, |
| { |
| "epoch": 1.2880886426592797, |
| "grad_norm": 1.0027132034301758, |
| "learning_rate": 2.6628876914076865e-05, |
| "loss": 0.4873, |
| "step": 465 |
| }, |
| { |
| "epoch": 1.3019390581717452, |
| "grad_norm": 1.0440722703933716, |
| "learning_rate": 2.654155919141908e-05, |
| "loss": 0.4524, |
| "step": 470 |
| }, |
| { |
| "epoch": 1.3157894736842106, |
| "grad_norm": 0.9812374114990234, |
| "learning_rate": 2.645327212110976e-05, |
| "loss": 0.482, |
| "step": 475 |
| }, |
| { |
| "epoch": 1.3296398891966759, |
| "grad_norm": 0.9852293133735657, |
| "learning_rate": 2.636402311816649e-05, |
| "loss": 0.4817, |
| "step": 480 |
| }, |
| { |
| "epoch": 1.3434903047091413, |
| "grad_norm": 0.9881157875061035, |
| "learning_rate": 2.6273819678397233e-05, |
| "loss": 0.5042, |
| "step": 485 |
| }, |
| { |
| "epoch": 1.3573407202216066, |
| "grad_norm": 1.1235034465789795, |
| "learning_rate": 2.6182669377770807e-05, |
| "loss": 0.4982, |
| "step": 490 |
| }, |
| { |
| "epoch": 1.371191135734072, |
| "grad_norm": 1.113957405090332, |
| "learning_rate": 2.6090579871780583e-05, |
| "loss": 0.4777, |
| "step": 495 |
| }, |
| { |
| "epoch": 1.3850415512465375, |
| "grad_norm": 0.9710925221443176, |
| "learning_rate": 2.5997558894801514e-05, |
| "loss": 0.4601, |
| "step": 500 |
| }, |
| { |
| "epoch": 1.3988919667590027, |
| "grad_norm": 1.091304063796997, |
| "learning_rate": 2.5903614259440553e-05, |
| "loss": 0.3797, |
| "step": 505 |
| }, |
| { |
| "epoch": 1.4127423822714682, |
| "grad_norm": 1.0607125759124756, |
| "learning_rate": 2.580875385588048e-05, |
| "loss": 0.4758, |
| "step": 510 |
| }, |
| { |
| "epoch": 1.4265927977839334, |
| "grad_norm": 0.9960602521896362, |
| "learning_rate": 2.571298565121725e-05, |
| "loss": 0.4651, |
| "step": 515 |
| }, |
| { |
| "epoch": 1.440443213296399, |
| "grad_norm": 1.1779953241348267, |
| "learning_rate": 2.561631768879082e-05, |
| "loss": 0.433, |
| "step": 520 |
| }, |
| { |
| "epoch": 1.4542936288088644, |
| "grad_norm": 1.397922158241272, |
| "learning_rate": 2.551875808750963e-05, |
| "loss": 0.433, |
| "step": 525 |
| }, |
| { |
| "epoch": 1.4681440443213296, |
| "grad_norm": 1.1092729568481445, |
| "learning_rate": 2.5420315041168717e-05, |
| "loss": 0.4, |
| "step": 530 |
| }, |
| { |
| "epoch": 1.481994459833795, |
| "grad_norm": 1.0799415111541748, |
| "learning_rate": 2.5320996817761534e-05, |
| "loss": 0.454, |
| "step": 535 |
| }, |
| { |
| "epoch": 1.4958448753462603, |
| "grad_norm": 1.0249577760696411, |
| "learning_rate": 2.5220811758785525e-05, |
| "loss": 0.4557, |
| "step": 540 |
| }, |
| { |
| "epoch": 1.5096952908587258, |
| "grad_norm": 0.9857805967330933, |
| "learning_rate": 2.5119768278541576e-05, |
| "loss": 0.4229, |
| "step": 545 |
| }, |
| { |
| "epoch": 1.5235457063711912, |
| "grad_norm": 1.0294913053512573, |
| "learning_rate": 2.5017874863427292e-05, |
| "loss": 0.4494, |
| "step": 550 |
| }, |
| { |
| "epoch": 1.5373961218836565, |
| "grad_norm": 1.123518705368042, |
| "learning_rate": 2.4915140071224247e-05, |
| "loss": 0.4268, |
| "step": 555 |
| }, |
| { |
| "epoch": 1.5512465373961217, |
| "grad_norm": 1.096327543258667, |
| "learning_rate": 2.4811572530379253e-05, |
| "loss": 0.4339, |
| "step": 560 |
| }, |
| { |
| "epoch": 1.5650969529085872, |
| "grad_norm": 1.1293649673461914, |
| "learning_rate": 2.4707180939279658e-05, |
| "loss": 0.4019, |
| "step": 565 |
| }, |
| { |
| "epoch": 1.5789473684210527, |
| "grad_norm": 1.098160982131958, |
| "learning_rate": 2.46019740655228e-05, |
| "loss": 0.3848, |
| "step": 570 |
| }, |
| { |
| "epoch": 1.5927977839335181, |
| "grad_norm": 1.0816259384155273, |
| "learning_rate": 2.4495960745179646e-05, |
| "loss": 0.3758, |
| "step": 575 |
| }, |
| { |
| "epoch": 1.6066481994459834, |
| "grad_norm": 1.0269978046417236, |
| "learning_rate": 2.4389149882052654e-05, |
| "loss": 0.4255, |
| "step": 580 |
| }, |
| { |
| "epoch": 1.6204986149584486, |
| "grad_norm": 1.0803477764129639, |
| "learning_rate": 2.4281550446927968e-05, |
| "loss": 0.3737, |
| "step": 585 |
| }, |
| { |
| "epoch": 1.634349030470914, |
| "grad_norm": 1.121321678161621, |
| "learning_rate": 2.4173171476821997e-05, |
| "loss": 0.3985, |
| "step": 590 |
| }, |
| { |
| "epoch": 1.6481994459833795, |
| "grad_norm": 1.1697264909744263, |
| "learning_rate": 2.40640220742224e-05, |
| "loss": 0.4194, |
| "step": 595 |
| }, |
| { |
| "epoch": 1.662049861495845, |
| "grad_norm": 1.0884120464324951, |
| "learning_rate": 2.3954111406323607e-05, |
| "loss": 0.4341, |
| "step": 600 |
| }, |
| { |
| "epoch": 1.6759002770083102, |
| "grad_norm": 1.0602376461029053, |
| "learning_rate": 2.3843448704256868e-05, |
| "loss": 0.4311, |
| "step": 605 |
| }, |
| { |
| "epoch": 1.6897506925207755, |
| "grad_norm": 1.1031391620635986, |
| "learning_rate": 2.3732043262314974e-05, |
| "loss": 0.3749, |
| "step": 610 |
| }, |
| { |
| "epoch": 1.703601108033241, |
| "grad_norm": 1.1139200925827026, |
| "learning_rate": 2.3619904437171642e-05, |
| "loss": 0.3423, |
| "step": 615 |
| }, |
| { |
| "epoch": 1.7174515235457064, |
| "grad_norm": 1.2277401685714722, |
| "learning_rate": 2.3507041647095668e-05, |
| "loss": 0.4357, |
| "step": 620 |
| }, |
| { |
| "epoch": 1.7313019390581719, |
| "grad_norm": 1.1853033304214478, |
| "learning_rate": 2.3393464371159914e-05, |
| "loss": 0.3869, |
| "step": 625 |
| }, |
| { |
| "epoch": 1.745152354570637, |
| "grad_norm": 1.0660614967346191, |
| "learning_rate": 2.327918214844518e-05, |
| "loss": 0.3493, |
| "step": 630 |
| }, |
| { |
| "epoch": 1.7590027700831024, |
| "grad_norm": 1.2714390754699707, |
| "learning_rate": 2.3164204577239037e-05, |
| "loss": 0.412, |
| "step": 635 |
| }, |
| { |
| "epoch": 1.7728531855955678, |
| "grad_norm": 1.2650110721588135, |
| "learning_rate": 2.304854131422971e-05, |
| "loss": 0.36, |
| "step": 640 |
| }, |
| { |
| "epoch": 1.7867036011080333, |
| "grad_norm": 1.1004222631454468, |
| "learning_rate": 2.2932202073695003e-05, |
| "loss": 0.3482, |
| "step": 645 |
| }, |
| { |
| "epoch": 1.8005540166204987, |
| "grad_norm": 1.0999927520751953, |
| "learning_rate": 2.281519662668645e-05, |
| "loss": 0.3655, |
| "step": 650 |
| }, |
| { |
| "epoch": 1.814404432132964, |
| "grad_norm": 1.0468759536743164, |
| "learning_rate": 2.2697534800208655e-05, |
| "loss": 0.3554, |
| "step": 655 |
| }, |
| { |
| "epoch": 1.8282548476454292, |
| "grad_norm": 1.130560278892517, |
| "learning_rate": 2.2579226476393932e-05, |
| "loss": 0.3633, |
| "step": 660 |
| }, |
| { |
| "epoch": 1.8421052631578947, |
| "grad_norm": 0.9949873089790344, |
| "learning_rate": 2.2460281591672365e-05, |
| "loss": 0.3747, |
| "step": 665 |
| }, |
| { |
| "epoch": 1.8559556786703602, |
| "grad_norm": 1.043385624885559, |
| "learning_rate": 2.2340710135937233e-05, |
| "loss": 0.382, |
| "step": 670 |
| }, |
| { |
| "epoch": 1.8698060941828256, |
| "grad_norm": 1.095567226409912, |
| "learning_rate": 2.2220522151706012e-05, |
| "loss": 0.4021, |
| "step": 675 |
| }, |
| { |
| "epoch": 1.8836565096952909, |
| "grad_norm": 1.1817140579223633, |
| "learning_rate": 2.2099727733276905e-05, |
| "loss": 0.3553, |
| "step": 680 |
| }, |
| { |
| "epoch": 1.897506925207756, |
| "grad_norm": 1.1485965251922607, |
| "learning_rate": 2.1978337025881062e-05, |
| "loss": 0.3613, |
| "step": 685 |
| }, |
| { |
| "epoch": 1.9113573407202216, |
| "grad_norm": 1.130224585533142, |
| "learning_rate": 2.18563602248305e-05, |
| "loss": 0.3163, |
| "step": 690 |
| }, |
| { |
| "epoch": 1.925207756232687, |
| "grad_norm": 1.0307577848434448, |
| "learning_rate": 2.1733807574661823e-05, |
| "loss": 0.34, |
| "step": 695 |
| }, |
| { |
| "epoch": 1.9390581717451525, |
| "grad_norm": 1.173927664756775, |
| "learning_rate": 2.1610689368275827e-05, |
| "loss": 0.3666, |
| "step": 700 |
| }, |
| { |
| "epoch": 1.9529085872576177, |
| "grad_norm": 1.1913485527038574, |
| "learning_rate": 2.148701594607298e-05, |
| "loss": 0.3224, |
| "step": 705 |
| }, |
| { |
| "epoch": 1.966759002770083, |
| "grad_norm": 1.150925874710083, |
| "learning_rate": 2.1362797695085e-05, |
| "loss": 0.3357, |
| "step": 710 |
| }, |
| { |
| "epoch": 1.9806094182825484, |
| "grad_norm": 1.0675455331802368, |
| "learning_rate": 2.123804504810246e-05, |
| "loss": 0.3574, |
| "step": 715 |
| }, |
| { |
| "epoch": 1.994459833795014, |
| "grad_norm": 1.1154441833496094, |
| "learning_rate": 2.1112768482798553e-05, |
| "loss": 0.3491, |
| "step": 720 |
| } |
| ], |
| "logging_steps": 5, |
| "max_steps": 1805, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 5, |
| "save_steps": 2000, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1.0889545362880594e+18, |
| "train_batch_size": 2, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|