klora_2000_skill / 11 /trainer_state.json
RayDu0010's picture
Upload folder using huggingface_hub
6d63273 verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 2.0,
"eval_steps": 500,
"global_step": 710,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.014104372355430184,
"grad_norm": 1.0829510688781738,
"learning_rate": 1.348314606741573e-06,
"loss": 1.3063,
"step": 5
},
{
"epoch": 0.028208744710860368,
"grad_norm": 0.91203373670578,
"learning_rate": 3.033707865168539e-06,
"loss": 1.2577,
"step": 10
},
{
"epoch": 0.04231311706629055,
"grad_norm": 0.6704057455062866,
"learning_rate": 4.719101123595506e-06,
"loss": 1.334,
"step": 15
},
{
"epoch": 0.056417489421720736,
"grad_norm": 0.5428482294082642,
"learning_rate": 6.404494382022472e-06,
"loss": 1.2117,
"step": 20
},
{
"epoch": 0.07052186177715092,
"grad_norm": 0.4570103585720062,
"learning_rate": 8.089887640449438e-06,
"loss": 1.1806,
"step": 25
},
{
"epoch": 0.0846262341325811,
"grad_norm": 0.5431040525436401,
"learning_rate": 9.775280898876405e-06,
"loss": 1.1791,
"step": 30
},
{
"epoch": 0.09873060648801128,
"grad_norm": 0.6045286059379578,
"learning_rate": 1.146067415730337e-05,
"loss": 1.1561,
"step": 35
},
{
"epoch": 0.11283497884344147,
"grad_norm": 0.816319465637207,
"learning_rate": 1.3146067415730338e-05,
"loss": 1.1815,
"step": 40
},
{
"epoch": 0.12693935119887165,
"grad_norm": 0.41476839780807495,
"learning_rate": 1.4831460674157303e-05,
"loss": 1.1569,
"step": 45
},
{
"epoch": 0.14104372355430184,
"grad_norm": 0.45171546936035156,
"learning_rate": 1.651685393258427e-05,
"loss": 1.1442,
"step": 50
},
{
"epoch": 0.15514809590973203,
"grad_norm": 0.5023919343948364,
"learning_rate": 1.8202247191011237e-05,
"loss": 1.0983,
"step": 55
},
{
"epoch": 0.1692524682651622,
"grad_norm": 0.54413241147995,
"learning_rate": 1.98876404494382e-05,
"loss": 1.1343,
"step": 60
},
{
"epoch": 0.18335684062059238,
"grad_norm": 0.40992623567581177,
"learning_rate": 2.1573033707865168e-05,
"loss": 1.1189,
"step": 65
},
{
"epoch": 0.19746121297602257,
"grad_norm": 0.5576924085617065,
"learning_rate": 2.3258426966292135e-05,
"loss": 1.1297,
"step": 70
},
{
"epoch": 0.21156558533145275,
"grad_norm": 0.439005047082901,
"learning_rate": 2.4943820224719103e-05,
"loss": 1.1228,
"step": 75
},
{
"epoch": 0.22566995768688294,
"grad_norm": 0.45460307598114014,
"learning_rate": 2.6629213483146066e-05,
"loss": 1.105,
"step": 80
},
{
"epoch": 0.2397743300423131,
"grad_norm": 0.5392889976501465,
"learning_rate": 2.8314606741573034e-05,
"loss": 1.0453,
"step": 85
},
{
"epoch": 0.2538787023977433,
"grad_norm": 0.49128258228302,
"learning_rate": 3e-05,
"loss": 1.0606,
"step": 90
},
{
"epoch": 0.2679830747531735,
"grad_norm": 0.574536144733429,
"learning_rate": 2.9999348997381465e-05,
"loss": 1.0693,
"step": 95
},
{
"epoch": 0.2820874471086037,
"grad_norm": 0.5417886972427368,
"learning_rate": 2.999739604603311e-05,
"loss": 1.0346,
"step": 100
},
{
"epoch": 0.29619181946403383,
"grad_norm": 0.6563856601715088,
"learning_rate": 2.9994141315471794e-05,
"loss": 0.9918,
"step": 105
},
{
"epoch": 0.31029619181946405,
"grad_norm": 0.5459888577461243,
"learning_rate": 2.998958508820927e-05,
"loss": 1.0295,
"step": 110
},
{
"epoch": 0.3244005641748942,
"grad_norm": 0.5069293975830078,
"learning_rate": 2.998372775972765e-05,
"loss": 1.0381,
"step": 115
},
{
"epoch": 0.3385049365303244,
"grad_norm": 0.5660542249679565,
"learning_rate": 2.9976569838445096e-05,
"loss": 0.9919,
"step": 120
},
{
"epoch": 0.3526093088857546,
"grad_norm": 0.5492711663246155,
"learning_rate": 2.9968111945671674e-05,
"loss": 1.0029,
"step": 125
},
{
"epoch": 0.36671368124118475,
"grad_norm": 0.6453714370727539,
"learning_rate": 2.9958354815555426e-05,
"loss": 0.9913,
"step": 130
},
{
"epoch": 0.38081805359661497,
"grad_norm": 0.534812867641449,
"learning_rate": 2.9947299295018656e-05,
"loss": 0.9343,
"step": 135
},
{
"epoch": 0.39492242595204513,
"grad_norm": 0.5858612060546875,
"learning_rate": 2.9934946343684404e-05,
"loss": 0.9805,
"step": 140
},
{
"epoch": 0.4090267983074753,
"grad_norm": 0.5869054198265076,
"learning_rate": 2.9921297033793158e-05,
"loss": 1.0044,
"step": 145
},
{
"epoch": 0.4231311706629055,
"grad_norm": 0.7152486443519592,
"learning_rate": 2.9906352550109787e-05,
"loss": 0.9804,
"step": 150
},
{
"epoch": 0.43723554301833567,
"grad_norm": 0.6668853163719177,
"learning_rate": 2.989011418982069e-05,
"loss": 0.8677,
"step": 155
},
{
"epoch": 0.4513399153737659,
"grad_norm": 0.7731665968894958,
"learning_rate": 2.9872583362421203e-05,
"loss": 0.9194,
"step": 160
},
{
"epoch": 0.46544428772919605,
"grad_norm": 0.7152626514434814,
"learning_rate": 2.985376158959328e-05,
"loss": 0.8777,
"step": 165
},
{
"epoch": 0.4795486600846262,
"grad_norm": 0.6192904114723206,
"learning_rate": 2.983365050507336e-05,
"loss": 0.9021,
"step": 170
},
{
"epoch": 0.4936530324400564,
"grad_norm": 0.6604310274124146,
"learning_rate": 2.9812251854510603e-05,
"loss": 0.9059,
"step": 175
},
{
"epoch": 0.5077574047954866,
"grad_norm": 0.7105734348297119,
"learning_rate": 2.9789567495315357e-05,
"loss": 0.9273,
"step": 180
},
{
"epoch": 0.5218617771509168,
"grad_norm": 0.7766591906547546,
"learning_rate": 2.976559939649791e-05,
"loss": 0.8934,
"step": 185
},
{
"epoch": 0.535966149506347,
"grad_norm": 0.9151214361190796,
"learning_rate": 2.9740349638497614e-05,
"loss": 0.9166,
"step": 190
},
{
"epoch": 0.5500705218617772,
"grad_norm": 0.8262761831283569,
"learning_rate": 2.971382041300228e-05,
"loss": 0.8532,
"step": 195
},
{
"epoch": 0.5641748942172073,
"grad_norm": 0.6876681447029114,
"learning_rate": 2.9686014022757937e-05,
"loss": 0.8347,
"step": 200
},
{
"epoch": 0.5782792665726375,
"grad_norm": 0.7912429571151733,
"learning_rate": 2.965693288136897e-05,
"loss": 0.8992,
"step": 205
},
{
"epoch": 0.5923836389280677,
"grad_norm": 0.7762657403945923,
"learning_rate": 2.9626579513088606e-05,
"loss": 0.85,
"step": 210
},
{
"epoch": 0.6064880112834978,
"grad_norm": 0.8784447312355042,
"learning_rate": 2.959495655259981e-05,
"loss": 0.7934,
"step": 215
},
{
"epoch": 0.6205923836389281,
"grad_norm": 0.8992597460746765,
"learning_rate": 2.9562066744786587e-05,
"loss": 0.8254,
"step": 220
},
{
"epoch": 0.6346967559943583,
"grad_norm": 0.7901438474655151,
"learning_rate": 2.9527912944495748e-05,
"loss": 0.8078,
"step": 225
},
{
"epoch": 0.6488011283497884,
"grad_norm": 0.8354968428611755,
"learning_rate": 2.9492498116289072e-05,
"loss": 0.856,
"step": 230
},
{
"epoch": 0.6629055007052186,
"grad_norm": 1.1308631896972656,
"learning_rate": 2.9455825334186023e-05,
"loss": 0.7691,
"step": 235
},
{
"epoch": 0.6770098730606487,
"grad_norm": 0.8235520124435425,
"learning_rate": 2.9417897781396883e-05,
"loss": 0.7801,
"step": 240
},
{
"epoch": 0.691114245416079,
"grad_norm": 0.7662566900253296,
"learning_rate": 2.937871875004648e-05,
"loss": 0.7438,
"step": 245
},
{
"epoch": 0.7052186177715092,
"grad_norm": 0.8254992961883545,
"learning_rate": 2.9338291640888413e-05,
"loss": 0.8526,
"step": 250
},
{
"epoch": 0.7193229901269393,
"grad_norm": 0.783316433429718,
"learning_rate": 2.9296619963009866e-05,
"loss": 0.782,
"step": 255
},
{
"epoch": 0.7334273624823695,
"grad_norm": 0.910808801651001,
"learning_rate": 2.925370733352704e-05,
"loss": 0.7768,
"step": 260
},
{
"epoch": 0.7475317348377997,
"grad_norm": 0.8336752653121948,
"learning_rate": 2.920955747727115e-05,
"loss": 0.7485,
"step": 265
},
{
"epoch": 0.7616361071932299,
"grad_norm": 0.9764995574951172,
"learning_rate": 2.9164174226465134e-05,
"loss": 0.7457,
"step": 270
},
{
"epoch": 0.7757404795486601,
"grad_norm": 0.9447649717330933,
"learning_rate": 2.9117561520391002e-05,
"loss": 0.7196,
"step": 275
},
{
"epoch": 0.7898448519040903,
"grad_norm": 0.8971619606018066,
"learning_rate": 2.9069723405047923e-05,
"loss": 0.7608,
"step": 280
},
{
"epoch": 0.8039492242595204,
"grad_norm": 0.9162759184837341,
"learning_rate": 2.902066403280101e-05,
"loss": 0.7038,
"step": 285
},
{
"epoch": 0.8180535966149506,
"grad_norm": 1.049428105354309,
"learning_rate": 2.8970387662020898e-05,
"loss": 0.705,
"step": 290
},
{
"epoch": 0.8321579689703809,
"grad_norm": 0.8928109407424927,
"learning_rate": 2.8918898656714127e-05,
"loss": 0.7331,
"step": 295
},
{
"epoch": 0.846262341325811,
"grad_norm": 0.928637683391571,
"learning_rate": 2.8866201486144336e-05,
"loss": 0.7034,
"step": 300
},
{
"epoch": 0.8603667136812412,
"grad_norm": 0.9384335279464722,
"learning_rate": 2.881230072444432e-05,
"loss": 0.735,
"step": 305
},
{
"epoch": 0.8744710860366713,
"grad_norm": 0.879400908946991,
"learning_rate": 2.8757201050219027e-05,
"loss": 0.7082,
"step": 310
},
{
"epoch": 0.8885754583921015,
"grad_norm": 0.9233940839767456,
"learning_rate": 2.8700907246139413e-05,
"loss": 0.6922,
"step": 315
},
{
"epoch": 0.9026798307475318,
"grad_norm": 1.0604772567749023,
"learning_rate": 2.8643424198527314e-05,
"loss": 0.7058,
"step": 320
},
{
"epoch": 0.9167842031029619,
"grad_norm": 0.9149603843688965,
"learning_rate": 2.858475689693135e-05,
"loss": 0.6531,
"step": 325
},
{
"epoch": 0.9308885754583921,
"grad_norm": 0.9202451109886169,
"learning_rate": 2.852491043369377e-05,
"loss": 0.6501,
"step": 330
},
{
"epoch": 0.9449929478138223,
"grad_norm": 1.123238205909729,
"learning_rate": 2.8463890003508488e-05,
"loss": 0.6438,
"step": 335
},
{
"epoch": 0.9590973201692524,
"grad_norm": 0.9209488034248352,
"learning_rate": 2.840170090297014e-05,
"loss": 0.6568,
"step": 340
},
{
"epoch": 0.9732016925246827,
"grad_norm": 1.0271188020706177,
"learning_rate": 2.833834853011437e-05,
"loss": 0.6731,
"step": 345
},
{
"epoch": 0.9873060648801129,
"grad_norm": 1.0789600610733032,
"learning_rate": 2.827383838394926e-05,
"loss": 0.6798,
"step": 350
},
{
"epoch": 1.0,
"grad_norm": 1.23028564453125,
"learning_rate": 2.8208176063978018e-05,
"loss": 0.6558,
"step": 355
},
{
"epoch": 1.0141043723554302,
"grad_norm": 0.9986002445220947,
"learning_rate": 2.814136726971294e-05,
"loss": 0.5642,
"step": 360
},
{
"epoch": 1.0282087447108603,
"grad_norm": 1.0259571075439453,
"learning_rate": 2.8073417800180707e-05,
"loss": 0.564,
"step": 365
},
{
"epoch": 1.0423131170662905,
"grad_norm": 0.9642548561096191,
"learning_rate": 2.800433355341898e-05,
"loss": 0.5423,
"step": 370
},
{
"epoch": 1.0564174894217206,
"grad_norm": 0.9903756380081177,
"learning_rate": 2.793412052596451e-05,
"loss": 0.5246,
"step": 375
},
{
"epoch": 1.0705218617771508,
"grad_norm": 0.9968107342720032,
"learning_rate": 2.7862784812332592e-05,
"loss": 0.5371,
"step": 380
},
{
"epoch": 1.0846262341325812,
"grad_norm": 1.2498723268508911,
"learning_rate": 2.779033260448807e-05,
"loss": 0.5538,
"step": 385
},
{
"epoch": 1.0987306064880114,
"grad_norm": 1.061000943183899,
"learning_rate": 2.7716770191307887e-05,
"loss": 0.5467,
"step": 390
},
{
"epoch": 1.1128349788434415,
"grad_norm": 0.9731029272079468,
"learning_rate": 2.7642103958035188e-05,
"loss": 0.4912,
"step": 395
},
{
"epoch": 1.1269393511988717,
"grad_norm": 0.9743561744689941,
"learning_rate": 2.756634038572509e-05,
"loss": 0.5527,
"step": 400
},
{
"epoch": 1.1410437235543018,
"grad_norm": 1.0373893976211548,
"learning_rate": 2.748948605068212e-05,
"loss": 0.5109,
"step": 405
},
{
"epoch": 1.155148095909732,
"grad_norm": 1.0031832456588745,
"learning_rate": 2.7411547623889397e-05,
"loss": 0.5417,
"step": 410
},
{
"epoch": 1.1692524682651622,
"grad_norm": 1.2156072854995728,
"learning_rate": 2.7332531870429574e-05,
"loss": 0.4838,
"step": 415
},
{
"epoch": 1.1833568406205923,
"grad_norm": 0.9628976583480835,
"learning_rate": 2.7252445648897643e-05,
"loss": 0.4965,
"step": 420
},
{
"epoch": 1.1974612129760225,
"grad_norm": 1.100713849067688,
"learning_rate": 2.7171295910805585e-05,
"loss": 0.4919,
"step": 425
},
{
"epoch": 1.2115655853314529,
"grad_norm": 1.0750036239624023,
"learning_rate": 2.708908969997901e-05,
"loss": 0.5031,
"step": 430
},
{
"epoch": 1.225669957686883,
"grad_norm": 1.0613516569137573,
"learning_rate": 2.7005834151945708e-05,
"loss": 0.5087,
"step": 435
},
{
"epoch": 1.2397743300423132,
"grad_norm": 1.2760698795318604,
"learning_rate": 2.6921536493316327e-05,
"loss": 0.5021,
"step": 440
},
{
"epoch": 1.2538787023977433,
"grad_norm": 0.9473477005958557,
"learning_rate": 2.683620404115706e-05,
"loss": 0.5002,
"step": 445
},
{
"epoch": 1.2679830747531735,
"grad_norm": 1.1720986366271973,
"learning_rate": 2.674984420235455e-05,
"loss": 0.5148,
"step": 450
},
{
"epoch": 1.2820874471086037,
"grad_norm": 1.0552853345870972,
"learning_rate": 2.6662464472972958e-05,
"loss": 0.5056,
"step": 455
},
{
"epoch": 1.2961918194640338,
"grad_norm": 1.0545684099197388,
"learning_rate": 2.65740724376033e-05,
"loss": 0.4976,
"step": 460
},
{
"epoch": 1.310296191819464,
"grad_norm": 0.8876795172691345,
"learning_rate": 2.6484675768705102e-05,
"loss": 0.4761,
"step": 465
},
{
"epoch": 1.3244005641748942,
"grad_norm": 0.9161580204963684,
"learning_rate": 2.6394282225940445e-05,
"loss": 0.4589,
"step": 470
},
{
"epoch": 1.3385049365303243,
"grad_norm": 1.0101513862609863,
"learning_rate": 2.63028996555004e-05,
"loss": 0.4852,
"step": 475
},
{
"epoch": 1.3526093088857545,
"grad_norm": 1.0795925855636597,
"learning_rate": 2.6210535989423978e-05,
"loss": 0.4633,
"step": 480
},
{
"epoch": 1.3667136812411846,
"grad_norm": 1.032175064086914,
"learning_rate": 2.6117199244909655e-05,
"loss": 0.4791,
"step": 485
},
{
"epoch": 1.380818053596615,
"grad_norm": 0.9439494013786316,
"learning_rate": 2.6022897523619423e-05,
"loss": 0.4717,
"step": 490
},
{
"epoch": 1.3949224259520452,
"grad_norm": 1.0860165357589722,
"learning_rate": 2.592763901097564e-05,
"loss": 0.4829,
"step": 495
},
{
"epoch": 1.4090267983074753,
"grad_norm": 0.9606081247329712,
"learning_rate": 2.583143197545044e-05,
"loss": 0.4861,
"step": 500
},
{
"epoch": 1.4231311706629055,
"grad_norm": 1.1352707147598267,
"learning_rate": 2.5734284767848108e-05,
"loss": 0.4769,
"step": 505
},
{
"epoch": 1.4372355430183357,
"grad_norm": 1.0786997079849243,
"learning_rate": 2.5636205820580173e-05,
"loss": 0.4504,
"step": 510
},
{
"epoch": 1.4513399153737658,
"grad_norm": 1.1144368648529053,
"learning_rate": 2.553720364693351e-05,
"loss": 0.4326,
"step": 515
},
{
"epoch": 1.465444287729196,
"grad_norm": 1.0776797533035278,
"learning_rate": 2.543728684033135e-05,
"loss": 0.4121,
"step": 520
},
{
"epoch": 1.4795486600846262,
"grad_norm": 1.1324986219406128,
"learning_rate": 2.5336464073587395e-05,
"loss": 0.3999,
"step": 525
},
{
"epoch": 1.4936530324400565,
"grad_norm": 0.951062023639679,
"learning_rate": 2.5234744098153e-05,
"loss": 0.4736,
"step": 530
},
{
"epoch": 1.5077574047954867,
"grad_norm": 1.1760550737380981,
"learning_rate": 2.5132135743357546e-05,
"loss": 0.4411,
"step": 535
},
{
"epoch": 1.5218617771509169,
"grad_norm": 1.1214802265167236,
"learning_rate": 2.502864791564205e-05,
"loss": 0.43,
"step": 540
},
{
"epoch": 1.535966149506347,
"grad_norm": 1.0796761512756348,
"learning_rate": 2.492428959778609e-05,
"loss": 0.455,
"step": 545
},
{
"epoch": 1.5500705218617772,
"grad_norm": 1.148000717163086,
"learning_rate": 2.48190698481281e-05,
"loss": 0.4235,
"step": 550
},
{
"epoch": 1.5641748942172073,
"grad_norm": 1.0369551181793213,
"learning_rate": 2.4712997799779077e-05,
"loss": 0.4066,
"step": 555
},
{
"epoch": 1.5782792665726375,
"grad_norm": 1.153380036354065,
"learning_rate": 2.4606082659829852e-05,
"loss": 0.4079,
"step": 560
},
{
"epoch": 1.5923836389280677,
"grad_norm": 1.2209539413452148,
"learning_rate": 2.4498333708551906e-05,
"loss": 0.4335,
"step": 565
},
{
"epoch": 1.6064880112834978,
"grad_norm": 1.097364068031311,
"learning_rate": 2.4389760298591825e-05,
"loss": 0.4187,
"step": 570
},
{
"epoch": 1.620592383638928,
"grad_norm": 1.0410133600234985,
"learning_rate": 2.4280371854159502e-05,
"loss": 0.4305,
"step": 575
},
{
"epoch": 1.6346967559943582,
"grad_norm": 1.2560479640960693,
"learning_rate": 2.417017787021011e-05,
"loss": 0.3681,
"step": 580
},
{
"epoch": 1.6488011283497883,
"grad_norm": 1.1253535747528076,
"learning_rate": 2.405918791161992e-05,
"loss": 0.416,
"step": 585
},
{
"epoch": 1.6629055007052185,
"grad_norm": 1.1615822315216064,
"learning_rate": 2.3947411612356093e-05,
"loss": 0.4539,
"step": 590
},
{
"epoch": 1.6770098730606486,
"grad_norm": 1.0008732080459595,
"learning_rate": 2.3834858674640434e-05,
"loss": 0.4258,
"step": 595
},
{
"epoch": 1.691114245416079,
"grad_norm": 0.9982715845108032,
"learning_rate": 2.3721538868107226e-05,
"loss": 0.4154,
"step": 600
},
{
"epoch": 1.7052186177715092,
"grad_norm": 1.1458238363265991,
"learning_rate": 2.3607462028955245e-05,
"loss": 0.3803,
"step": 605
},
{
"epoch": 1.7193229901269393,
"grad_norm": 1.0373518466949463,
"learning_rate": 2.3492638059093958e-05,
"loss": 0.3826,
"step": 610
},
{
"epoch": 1.7334273624823695,
"grad_norm": 1.3240524530410767,
"learning_rate": 2.3377076925284037e-05,
"loss": 0.4086,
"step": 615
},
{
"epoch": 1.7475317348377997,
"grad_norm": 1.0962055921554565,
"learning_rate": 2.3260788658272244e-05,
"loss": 0.3553,
"step": 620
},
{
"epoch": 1.76163610719323,
"grad_norm": 1.075670838356018,
"learning_rate": 2.3143783351920753e-05,
"loss": 0.3749,
"step": 625
},
{
"epoch": 1.7757404795486602,
"grad_norm": 0.9956583976745605,
"learning_rate": 2.3026071162331012e-05,
"loss": 0.376,
"step": 630
},
{
"epoch": 1.7898448519040904,
"grad_norm": 1.0385911464691162,
"learning_rate": 2.2907662306962176e-05,
"loss": 0.3529,
"step": 635
},
{
"epoch": 1.8039492242595205,
"grad_norm": 1.147087574005127,
"learning_rate": 2.278856706374422e-05,
"loss": 0.3979,
"step": 640
},
{
"epoch": 1.8180535966149507,
"grad_norm": 1.094008445739746,
"learning_rate": 2.266879577018585e-05,
"loss": 0.3979,
"step": 645
},
{
"epoch": 1.8321579689703809,
"grad_norm": 1.116995930671692,
"learning_rate": 2.2548358822477158e-05,
"loss": 0.3869,
"step": 650
},
{
"epoch": 1.846262341325811,
"grad_norm": 0.991368293762207,
"learning_rate": 2.242726667458726e-05,
"loss": 0.343,
"step": 655
},
{
"epoch": 1.8603667136812412,
"grad_norm": 1.0833584070205688,
"learning_rate": 2.2305529837356857e-05,
"loss": 0.3563,
"step": 660
},
{
"epoch": 1.8744710860366713,
"grad_norm": 1.0229146480560303,
"learning_rate": 2.2183158877585937e-05,
"loss": 0.3327,
"step": 665
},
{
"epoch": 1.8885754583921015,
"grad_norm": 0.9877346754074097,
"learning_rate": 2.206016441711652e-05,
"loss": 0.3832,
"step": 670
},
{
"epoch": 1.9026798307475317,
"grad_norm": 1.1531109809875488,
"learning_rate": 2.1936557131910733e-05,
"loss": 0.3904,
"step": 675
},
{
"epoch": 1.9167842031029618,
"grad_norm": 1.0331673622131348,
"learning_rate": 2.1812347751124074e-05,
"loss": 0.331,
"step": 680
},
{
"epoch": 1.930888575458392,
"grad_norm": 1.2358472347259521,
"learning_rate": 2.1687547056174172e-05,
"loss": 0.3152,
"step": 685
},
{
"epoch": 1.9449929478138221,
"grad_norm": 1.2115956544876099,
"learning_rate": 2.156216587980491e-05,
"loss": 0.3641,
"step": 690
},
{
"epoch": 1.9590973201692523,
"grad_norm": 1.191973328590393,
"learning_rate": 2.1436215105146178e-05,
"loss": 0.3296,
"step": 695
},
{
"epoch": 1.9732016925246827,
"grad_norm": 1.093503475189209,
"learning_rate": 2.1309705664769198e-05,
"loss": 0.3374,
"step": 700
},
{
"epoch": 1.9873060648801129,
"grad_norm": 1.0156272649765015,
"learning_rate": 2.1182648539737547e-05,
"loss": 0.3241,
"step": 705
},
{
"epoch": 2.0,
"grad_norm": 1.5677207708358765,
"learning_rate": 2.1055054758654053e-05,
"loss": 0.3256,
"step": 710
}
],
"logging_steps": 5,
"max_steps": 1775,
"num_input_tokens_seen": 0,
"num_train_epochs": 5,
"save_steps": 2000,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 1.1044507413821522e+18,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}