klora_2000_skill / 52 /trainer_state.json
RayDu0010's picture
Upload folder using huggingface_hub
4ee47ad verified
raw
history blame
25.7 kB
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 2.0,
"eval_steps": 500,
"global_step": 722,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.013850415512465374,
"grad_norm": 1.1588482856750488,
"learning_rate": 1.3186813186813187e-06,
"loss": 1.3114,
"step": 5
},
{
"epoch": 0.027700831024930747,
"grad_norm": 0.9670264720916748,
"learning_rate": 2.967032967032967e-06,
"loss": 1.2925,
"step": 10
},
{
"epoch": 0.04155124653739612,
"grad_norm": 0.7651219964027405,
"learning_rate": 4.615384615384616e-06,
"loss": 1.3332,
"step": 15
},
{
"epoch": 0.055401662049861494,
"grad_norm": 0.8753677606582642,
"learning_rate": 6.2637362637362645e-06,
"loss": 1.275,
"step": 20
},
{
"epoch": 0.06925207756232687,
"grad_norm": 0.6068143844604492,
"learning_rate": 7.912087912087913e-06,
"loss": 1.2464,
"step": 25
},
{
"epoch": 0.08310249307479224,
"grad_norm": 0.47094252705574036,
"learning_rate": 9.56043956043956e-06,
"loss": 1.2393,
"step": 30
},
{
"epoch": 0.09695290858725762,
"grad_norm": 0.628544270992279,
"learning_rate": 1.120879120879121e-05,
"loss": 1.2263,
"step": 35
},
{
"epoch": 0.11080332409972299,
"grad_norm": 0.5166797041893005,
"learning_rate": 1.2857142857142857e-05,
"loss": 1.2174,
"step": 40
},
{
"epoch": 0.12465373961218837,
"grad_norm": 0.6519356966018677,
"learning_rate": 1.4505494505494506e-05,
"loss": 1.1587,
"step": 45
},
{
"epoch": 0.13850415512465375,
"grad_norm": 0.4232255220413208,
"learning_rate": 1.6153846153846154e-05,
"loss": 1.21,
"step": 50
},
{
"epoch": 0.1523545706371191,
"grad_norm": 0.43410590291023254,
"learning_rate": 1.78021978021978e-05,
"loss": 1.1839,
"step": 55
},
{
"epoch": 0.16620498614958448,
"grad_norm": 0.44104069471359253,
"learning_rate": 1.9450549450549452e-05,
"loss": 1.1774,
"step": 60
},
{
"epoch": 0.18005540166204986,
"grad_norm": 0.5313953757286072,
"learning_rate": 2.10989010989011e-05,
"loss": 1.1863,
"step": 65
},
{
"epoch": 0.19390581717451524,
"grad_norm": 0.45843958854675293,
"learning_rate": 2.2747252747252748e-05,
"loss": 1.1288,
"step": 70
},
{
"epoch": 0.2077562326869806,
"grad_norm": 0.5203155279159546,
"learning_rate": 2.4395604395604395e-05,
"loss": 1.1437,
"step": 75
},
{
"epoch": 0.22160664819944598,
"grad_norm": 0.6226593255996704,
"learning_rate": 2.6043956043956046e-05,
"loss": 1.128,
"step": 80
},
{
"epoch": 0.23545706371191136,
"grad_norm": 0.44689756631851196,
"learning_rate": 2.7692307692307694e-05,
"loss": 1.1561,
"step": 85
},
{
"epoch": 0.24930747922437674,
"grad_norm": 0.5292763113975525,
"learning_rate": 2.934065934065934e-05,
"loss": 1.1052,
"step": 90
},
{
"epoch": 0.2631578947368421,
"grad_norm": 0.534803569316864,
"learning_rate": 2.9999773232512234e-05,
"loss": 1.067,
"step": 95
},
{
"epoch": 0.2770083102493075,
"grad_norm": 0.6602870225906372,
"learning_rate": 2.9998387456028022e-05,
"loss": 1.0975,
"step": 100
},
{
"epoch": 0.29085872576177285,
"grad_norm": 0.5924050211906433,
"learning_rate": 2.999574200124419e-05,
"loss": 1.0857,
"step": 105
},
{
"epoch": 0.3047091412742382,
"grad_norm": 0.6111776828765869,
"learning_rate": 2.999183709034608e-05,
"loss": 1.0855,
"step": 110
},
{
"epoch": 0.3185595567867036,
"grad_norm": 0.8191071152687073,
"learning_rate": 2.998667305129772e-05,
"loss": 1.0163,
"step": 115
},
{
"epoch": 0.33240997229916897,
"grad_norm": 0.52001953125,
"learning_rate": 2.9980250317814265e-05,
"loss": 1.0266,
"step": 120
},
{
"epoch": 0.3462603878116344,
"grad_norm": 0.5309084057807922,
"learning_rate": 2.9972569429325575e-05,
"loss": 1.034,
"step": 125
},
{
"epoch": 0.3601108033240997,
"grad_norm": 0.657573938369751,
"learning_rate": 2.99636310309309e-05,
"loss": 1.0783,
"step": 130
},
{
"epoch": 0.3739612188365651,
"grad_norm": 0.6887980103492737,
"learning_rate": 2.995343587334471e-05,
"loss": 1.0554,
"step": 135
},
{
"epoch": 0.3878116343490305,
"grad_norm": 0.6892874836921692,
"learning_rate": 2.994198481283364e-05,
"loss": 1.0056,
"step": 140
},
{
"epoch": 0.40166204986149584,
"grad_norm": 0.6222119927406311,
"learning_rate": 2.992927881114458e-05,
"loss": 0.9764,
"step": 145
},
{
"epoch": 0.4155124653739612,
"grad_norm": 0.6249090433120728,
"learning_rate": 2.991531893542389e-05,
"loss": 1.0027,
"step": 150
},
{
"epoch": 0.4293628808864266,
"grad_norm": 0.6832554340362549,
"learning_rate": 2.990010635812777e-05,
"loss": 0.9389,
"step": 155
},
{
"epoch": 0.44321329639889195,
"grad_norm": 0.6740282773971558,
"learning_rate": 2.9883642356923822e-05,
"loss": 0.93,
"step": 160
},
{
"epoch": 0.45706371191135736,
"grad_norm": 0.6766054034233093,
"learning_rate": 2.986592831458369e-05,
"loss": 0.9612,
"step": 165
},
{
"epoch": 0.4709141274238227,
"grad_norm": 0.6800487637519836,
"learning_rate": 2.984696571886697e-05,
"loss": 0.9498,
"step": 170
},
{
"epoch": 0.48476454293628807,
"grad_norm": 0.6890595555305481,
"learning_rate": 2.9826756162396226e-05,
"loss": 0.8753,
"step": 175
},
{
"epoch": 0.4986149584487535,
"grad_norm": 0.9015688300132751,
"learning_rate": 2.9805301342523238e-05,
"loss": 0.903,
"step": 180
},
{
"epoch": 0.5124653739612188,
"grad_norm": 0.8003932237625122,
"learning_rate": 2.9782603061186458e-05,
"loss": 0.9057,
"step": 185
},
{
"epoch": 0.5263157894736842,
"grad_norm": 0.7204357981681824,
"learning_rate": 2.975866322475966e-05,
"loss": 0.8683,
"step": 190
},
{
"epoch": 0.5401662049861495,
"grad_norm": 0.7696855068206787,
"learning_rate": 2.973348384389182e-05,
"loss": 0.8554,
"step": 195
},
{
"epoch": 0.554016620498615,
"grad_norm": 0.8631574511528015,
"learning_rate": 2.9707067033338257e-05,
"loss": 0.8987,
"step": 200
},
{
"epoch": 0.5678670360110804,
"grad_norm": 0.8141370415687561,
"learning_rate": 2.967941501178302e-05,
"loss": 0.8551,
"step": 205
},
{
"epoch": 0.5817174515235457,
"grad_norm": 0.7609331607818604,
"learning_rate": 2.9650530101652548e-05,
"loss": 0.8327,
"step": 210
},
{
"epoch": 0.5955678670360111,
"grad_norm": 0.7858366966247559,
"learning_rate": 2.9620414728920603e-05,
"loss": 0.8254,
"step": 215
},
{
"epoch": 0.6094182825484764,
"grad_norm": 0.8758653998374939,
"learning_rate": 2.9589071422904525e-05,
"loss": 0.833,
"step": 220
},
{
"epoch": 0.6232686980609419,
"grad_norm": 0.7241504192352295,
"learning_rate": 2.9556502816052805e-05,
"loss": 0.8104,
"step": 225
},
{
"epoch": 0.6371191135734072,
"grad_norm": 0.7951213717460632,
"learning_rate": 2.9522711643723997e-05,
"loss": 0.8526,
"step": 230
},
{
"epoch": 0.6509695290858726,
"grad_norm": 0.8177146315574646,
"learning_rate": 2.9487700743956955e-05,
"loss": 0.7508,
"step": 235
},
{
"epoch": 0.6648199445983379,
"grad_norm": 0.7762987017631531,
"learning_rate": 2.945147305723251e-05,
"loss": 0.8302,
"step": 240
},
{
"epoch": 0.6786703601108033,
"grad_norm": 0.9242251515388489,
"learning_rate": 2.9414031626226472e-05,
"loss": 0.7728,
"step": 245
},
{
"epoch": 0.6925207756232687,
"grad_norm": 0.8518105745315552,
"learning_rate": 2.9375379595554113e-05,
"loss": 0.7502,
"step": 250
},
{
"epoch": 0.7063711911357341,
"grad_norm": 0.8808051943778992,
"learning_rate": 2.9335520211506033e-05,
"loss": 0.7775,
"step": 255
},
{
"epoch": 0.7202216066481995,
"grad_norm": 0.9569631218910217,
"learning_rate": 2.9294456821775524e-05,
"loss": 0.7802,
"step": 260
},
{
"epoch": 0.7340720221606648,
"grad_norm": 1.034891963005066,
"learning_rate": 2.9252192875177415e-05,
"loss": 0.7108,
"step": 265
},
{
"epoch": 0.7479224376731302,
"grad_norm": 1.0093481540679932,
"learning_rate": 2.920873192135839e-05,
"loss": 0.7309,
"step": 270
},
{
"epoch": 0.7617728531855956,
"grad_norm": 0.9309902787208557,
"learning_rate": 2.9164077610498886e-05,
"loss": 0.7685,
"step": 275
},
{
"epoch": 0.775623268698061,
"grad_norm": 1.176249623298645,
"learning_rate": 2.9118233693006503e-05,
"loss": 0.7397,
"step": 280
},
{
"epoch": 0.7894736842105263,
"grad_norm": 0.9168936610221863,
"learning_rate": 2.907120401920103e-05,
"loss": 0.7056,
"step": 285
},
{
"epoch": 0.8033240997229917,
"grad_norm": 1.025532603263855,
"learning_rate": 2.9022992538991067e-05,
"loss": 0.7337,
"step": 290
},
{
"epoch": 0.817174515235457,
"grad_norm": 0.8983604907989502,
"learning_rate": 2.897360330154227e-05,
"loss": 0.794,
"step": 295
},
{
"epoch": 0.8310249307479224,
"grad_norm": 0.8976964354515076,
"learning_rate": 2.892304045493728e-05,
"loss": 0.7545,
"step": 300
},
{
"epoch": 0.8448753462603878,
"grad_norm": 1.0854963064193726,
"learning_rate": 2.8871308245827336e-05,
"loss": 0.6853,
"step": 305
},
{
"epoch": 0.8587257617728532,
"grad_norm": 0.9506078958511353,
"learning_rate": 2.88184110190756e-05,
"loss": 0.7233,
"step": 310
},
{
"epoch": 0.8725761772853186,
"grad_norm": 0.9690659642219543,
"learning_rate": 2.8764353217392253e-05,
"loss": 0.7283,
"step": 315
},
{
"epoch": 0.8864265927977839,
"grad_norm": 1.0485140085220337,
"learning_rate": 2.870913938096136e-05,
"loss": 0.729,
"step": 320
},
{
"epoch": 0.9002770083102493,
"grad_norm": 1.066326379776001,
"learning_rate": 2.865277414705955e-05,
"loss": 0.665,
"step": 325
},
{
"epoch": 0.9141274238227147,
"grad_norm": 0.9876983761787415,
"learning_rate": 2.8595262249666536e-05,
"loss": 0.6573,
"step": 330
},
{
"epoch": 0.9279778393351801,
"grad_norm": 0.9734461903572083,
"learning_rate": 2.8536608519067532e-05,
"loss": 0.627,
"step": 335
},
{
"epoch": 0.9418282548476454,
"grad_norm": 0.9485730528831482,
"learning_rate": 2.8476817881447555e-05,
"loss": 0.7214,
"step": 340
},
{
"epoch": 0.9556786703601108,
"grad_norm": 1.0153250694274902,
"learning_rate": 2.8415895358477702e-05,
"loss": 0.6599,
"step": 345
},
{
"epoch": 0.9695290858725761,
"grad_norm": 0.9076303243637085,
"learning_rate": 2.8353846066893382e-05,
"loss": 0.7043,
"step": 350
},
{
"epoch": 0.9833795013850416,
"grad_norm": 1.0249273777008057,
"learning_rate": 2.8290675218064565e-05,
"loss": 0.6515,
"step": 355
},
{
"epoch": 0.997229916897507,
"grad_norm": 1.1760298013687134,
"learning_rate": 2.822638811755812e-05,
"loss": 0.6412,
"step": 360
},
{
"epoch": 1.0110803324099722,
"grad_norm": 0.9463870525360107,
"learning_rate": 2.8160990164692183e-05,
"loss": 0.6286,
"step": 365
},
{
"epoch": 1.0249307479224377,
"grad_norm": 0.9781592488288879,
"learning_rate": 2.8094486852082692e-05,
"loss": 0.6016,
"step": 370
},
{
"epoch": 1.0387811634349031,
"grad_norm": 0.9843015670776367,
"learning_rate": 2.802688376518209e-05,
"loss": 0.5999,
"step": 375
},
{
"epoch": 1.0526315789473684,
"grad_norm": 1.0795265436172485,
"learning_rate": 2.7958186581810195e-05,
"loss": 0.5607,
"step": 380
},
{
"epoch": 1.0664819944598338,
"grad_norm": 1.0073837041854858,
"learning_rate": 2.7888401071677342e-05,
"loss": 0.6205,
"step": 385
},
{
"epoch": 1.080332409972299,
"grad_norm": 1.0322895050048828,
"learning_rate": 2.7817533095899806e-05,
"loss": 0.5441,
"step": 390
},
{
"epoch": 1.0941828254847645,
"grad_norm": 1.0219157934188843,
"learning_rate": 2.774558860650752e-05,
"loss": 0.6074,
"step": 395
},
{
"epoch": 1.10803324099723,
"grad_norm": 1.0589349269866943,
"learning_rate": 2.767257364594421e-05,
"loss": 0.5376,
"step": 400
},
{
"epoch": 1.1218836565096952,
"grad_norm": 0.9884157180786133,
"learning_rate": 2.759849434655987e-05,
"loss": 0.5637,
"step": 405
},
{
"epoch": 1.1357340720221607,
"grad_norm": 0.979081928730011,
"learning_rate": 2.7523356930095764e-05,
"loss": 0.5114,
"step": 410
},
{
"epoch": 1.149584487534626,
"grad_norm": 1.188364863395691,
"learning_rate": 2.744716770716182e-05,
"loss": 0.5266,
"step": 415
},
{
"epoch": 1.1634349030470914,
"grad_norm": 1.0489892959594727,
"learning_rate": 2.736993307670667e-05,
"loss": 0.5176,
"step": 420
},
{
"epoch": 1.1772853185595569,
"grad_norm": 0.9718247652053833,
"learning_rate": 2.7291659525480192e-05,
"loss": 0.5572,
"step": 425
},
{
"epoch": 1.1911357340720221,
"grad_norm": 1.092788815498352,
"learning_rate": 2.72123536274887e-05,
"loss": 0.5158,
"step": 430
},
{
"epoch": 1.2049861495844876,
"grad_norm": 1.0184890031814575,
"learning_rate": 2.713202204344282e-05,
"loss": 0.4769,
"step": 435
},
{
"epoch": 1.2188365650969528,
"grad_norm": 1.0743873119354248,
"learning_rate": 2.705067152019808e-05,
"loss": 0.524,
"step": 440
},
{
"epoch": 1.2326869806094183,
"grad_norm": 1.143704891204834,
"learning_rate": 2.6968308890188235e-05,
"loss": 0.5016,
"step": 445
},
{
"epoch": 1.2465373961218837,
"grad_norm": 1.0439151525497437,
"learning_rate": 2.6884941070851443e-05,
"loss": 0.5137,
"step": 450
},
{
"epoch": 1.260387811634349,
"grad_norm": 1.0907272100448608,
"learning_rate": 2.680057506404929e-05,
"loss": 0.5049,
"step": 455
},
{
"epoch": 1.2742382271468145,
"grad_norm": 1.1267155408859253,
"learning_rate": 2.6715217955478705e-05,
"loss": 0.504,
"step": 460
},
{
"epoch": 1.2880886426592797,
"grad_norm": 1.0027132034301758,
"learning_rate": 2.6628876914076865e-05,
"loss": 0.4873,
"step": 465
},
{
"epoch": 1.3019390581717452,
"grad_norm": 1.0440722703933716,
"learning_rate": 2.654155919141908e-05,
"loss": 0.4524,
"step": 470
},
{
"epoch": 1.3157894736842106,
"grad_norm": 0.9812374114990234,
"learning_rate": 2.645327212110976e-05,
"loss": 0.482,
"step": 475
},
{
"epoch": 1.3296398891966759,
"grad_norm": 0.9852293133735657,
"learning_rate": 2.636402311816649e-05,
"loss": 0.4817,
"step": 480
},
{
"epoch": 1.3434903047091413,
"grad_norm": 0.9881157875061035,
"learning_rate": 2.6273819678397233e-05,
"loss": 0.5042,
"step": 485
},
{
"epoch": 1.3573407202216066,
"grad_norm": 1.1235034465789795,
"learning_rate": 2.6182669377770807e-05,
"loss": 0.4982,
"step": 490
},
{
"epoch": 1.371191135734072,
"grad_norm": 1.113957405090332,
"learning_rate": 2.6090579871780583e-05,
"loss": 0.4777,
"step": 495
},
{
"epoch": 1.3850415512465375,
"grad_norm": 0.9710925221443176,
"learning_rate": 2.5997558894801514e-05,
"loss": 0.4601,
"step": 500
},
{
"epoch": 1.3988919667590027,
"grad_norm": 1.091304063796997,
"learning_rate": 2.5903614259440553e-05,
"loss": 0.3797,
"step": 505
},
{
"epoch": 1.4127423822714682,
"grad_norm": 1.0607125759124756,
"learning_rate": 2.580875385588048e-05,
"loss": 0.4758,
"step": 510
},
{
"epoch": 1.4265927977839334,
"grad_norm": 0.9960602521896362,
"learning_rate": 2.571298565121725e-05,
"loss": 0.4651,
"step": 515
},
{
"epoch": 1.440443213296399,
"grad_norm": 1.1779953241348267,
"learning_rate": 2.561631768879082e-05,
"loss": 0.433,
"step": 520
},
{
"epoch": 1.4542936288088644,
"grad_norm": 1.397922158241272,
"learning_rate": 2.551875808750963e-05,
"loss": 0.433,
"step": 525
},
{
"epoch": 1.4681440443213296,
"grad_norm": 1.1092729568481445,
"learning_rate": 2.5420315041168717e-05,
"loss": 0.4,
"step": 530
},
{
"epoch": 1.481994459833795,
"grad_norm": 1.0799415111541748,
"learning_rate": 2.5320996817761534e-05,
"loss": 0.454,
"step": 535
},
{
"epoch": 1.4958448753462603,
"grad_norm": 1.0249577760696411,
"learning_rate": 2.5220811758785525e-05,
"loss": 0.4557,
"step": 540
},
{
"epoch": 1.5096952908587258,
"grad_norm": 0.9857805967330933,
"learning_rate": 2.5119768278541576e-05,
"loss": 0.4229,
"step": 545
},
{
"epoch": 1.5235457063711912,
"grad_norm": 1.0294913053512573,
"learning_rate": 2.5017874863427292e-05,
"loss": 0.4494,
"step": 550
},
{
"epoch": 1.5373961218836565,
"grad_norm": 1.123518705368042,
"learning_rate": 2.4915140071224247e-05,
"loss": 0.4268,
"step": 555
},
{
"epoch": 1.5512465373961217,
"grad_norm": 1.096327543258667,
"learning_rate": 2.4811572530379253e-05,
"loss": 0.4339,
"step": 560
},
{
"epoch": 1.5650969529085872,
"grad_norm": 1.1293649673461914,
"learning_rate": 2.4707180939279658e-05,
"loss": 0.4019,
"step": 565
},
{
"epoch": 1.5789473684210527,
"grad_norm": 1.098160982131958,
"learning_rate": 2.46019740655228e-05,
"loss": 0.3848,
"step": 570
},
{
"epoch": 1.5927977839335181,
"grad_norm": 1.0816259384155273,
"learning_rate": 2.4495960745179646e-05,
"loss": 0.3758,
"step": 575
},
{
"epoch": 1.6066481994459834,
"grad_norm": 1.0269978046417236,
"learning_rate": 2.4389149882052654e-05,
"loss": 0.4255,
"step": 580
},
{
"epoch": 1.6204986149584486,
"grad_norm": 1.0803477764129639,
"learning_rate": 2.4281550446927968e-05,
"loss": 0.3737,
"step": 585
},
{
"epoch": 1.634349030470914,
"grad_norm": 1.121321678161621,
"learning_rate": 2.4173171476821997e-05,
"loss": 0.3985,
"step": 590
},
{
"epoch": 1.6481994459833795,
"grad_norm": 1.1697264909744263,
"learning_rate": 2.40640220742224e-05,
"loss": 0.4194,
"step": 595
},
{
"epoch": 1.662049861495845,
"grad_norm": 1.0884120464324951,
"learning_rate": 2.3954111406323607e-05,
"loss": 0.4341,
"step": 600
},
{
"epoch": 1.6759002770083102,
"grad_norm": 1.0602376461029053,
"learning_rate": 2.3843448704256868e-05,
"loss": 0.4311,
"step": 605
},
{
"epoch": 1.6897506925207755,
"grad_norm": 1.1031391620635986,
"learning_rate": 2.3732043262314974e-05,
"loss": 0.3749,
"step": 610
},
{
"epoch": 1.703601108033241,
"grad_norm": 1.1139200925827026,
"learning_rate": 2.3619904437171642e-05,
"loss": 0.3423,
"step": 615
},
{
"epoch": 1.7174515235457064,
"grad_norm": 1.2277401685714722,
"learning_rate": 2.3507041647095668e-05,
"loss": 0.4357,
"step": 620
},
{
"epoch": 1.7313019390581719,
"grad_norm": 1.1853033304214478,
"learning_rate": 2.3393464371159914e-05,
"loss": 0.3869,
"step": 625
},
{
"epoch": 1.745152354570637,
"grad_norm": 1.0660614967346191,
"learning_rate": 2.327918214844518e-05,
"loss": 0.3493,
"step": 630
},
{
"epoch": 1.7590027700831024,
"grad_norm": 1.2714390754699707,
"learning_rate": 2.3164204577239037e-05,
"loss": 0.412,
"step": 635
},
{
"epoch": 1.7728531855955678,
"grad_norm": 1.2650110721588135,
"learning_rate": 2.304854131422971e-05,
"loss": 0.36,
"step": 640
},
{
"epoch": 1.7867036011080333,
"grad_norm": 1.1004222631454468,
"learning_rate": 2.2932202073695003e-05,
"loss": 0.3482,
"step": 645
},
{
"epoch": 1.8005540166204987,
"grad_norm": 1.0999927520751953,
"learning_rate": 2.281519662668645e-05,
"loss": 0.3655,
"step": 650
},
{
"epoch": 1.814404432132964,
"grad_norm": 1.0468759536743164,
"learning_rate": 2.2697534800208655e-05,
"loss": 0.3554,
"step": 655
},
{
"epoch": 1.8282548476454292,
"grad_norm": 1.130560278892517,
"learning_rate": 2.2579226476393932e-05,
"loss": 0.3633,
"step": 660
},
{
"epoch": 1.8421052631578947,
"grad_norm": 0.9949873089790344,
"learning_rate": 2.2460281591672365e-05,
"loss": 0.3747,
"step": 665
},
{
"epoch": 1.8559556786703602,
"grad_norm": 1.043385624885559,
"learning_rate": 2.2340710135937233e-05,
"loss": 0.382,
"step": 670
},
{
"epoch": 1.8698060941828256,
"grad_norm": 1.095567226409912,
"learning_rate": 2.2220522151706012e-05,
"loss": 0.4021,
"step": 675
},
{
"epoch": 1.8836565096952909,
"grad_norm": 1.1817140579223633,
"learning_rate": 2.2099727733276905e-05,
"loss": 0.3553,
"step": 680
},
{
"epoch": 1.897506925207756,
"grad_norm": 1.1485965251922607,
"learning_rate": 2.1978337025881062e-05,
"loss": 0.3613,
"step": 685
},
{
"epoch": 1.9113573407202216,
"grad_norm": 1.130224585533142,
"learning_rate": 2.18563602248305e-05,
"loss": 0.3163,
"step": 690
},
{
"epoch": 1.925207756232687,
"grad_norm": 1.0307577848434448,
"learning_rate": 2.1733807574661823e-05,
"loss": 0.34,
"step": 695
},
{
"epoch": 1.9390581717451525,
"grad_norm": 1.173927664756775,
"learning_rate": 2.1610689368275827e-05,
"loss": 0.3666,
"step": 700
},
{
"epoch": 1.9529085872576177,
"grad_norm": 1.1913485527038574,
"learning_rate": 2.148701594607298e-05,
"loss": 0.3224,
"step": 705
},
{
"epoch": 1.966759002770083,
"grad_norm": 1.150925874710083,
"learning_rate": 2.1362797695085e-05,
"loss": 0.3357,
"step": 710
},
{
"epoch": 1.9806094182825484,
"grad_norm": 1.0675455331802368,
"learning_rate": 2.123804504810246e-05,
"loss": 0.3574,
"step": 715
},
{
"epoch": 1.994459833795014,
"grad_norm": 1.1154441833496094,
"learning_rate": 2.1112768482798553e-05,
"loss": 0.3491,
"step": 720
}
],
"logging_steps": 5,
"max_steps": 1805,
"num_input_tokens_seen": 0,
"num_train_epochs": 5,
"save_steps": 2000,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 1.0889545362880594e+18,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}