CodeRM-SFT-Warmup-Selection-4B / trainer_state.json
t2ance's picture
SFT warmup LoRA for 4B judge (9367 samples, 1 epoch)
bc9cb33 verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.0,
"eval_steps": 25,
"global_step": 781,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0064047822374039285,
"grad_norm": 5.6078200340271,
"learning_rate": 1.0000000000000002e-06,
"loss": 1.1562,
"step": 5
},
{
"epoch": 0.012809564474807857,
"grad_norm": 5.326383590698242,
"learning_rate": 2.25e-06,
"loss": 1.0619,
"step": 10
},
{
"epoch": 0.019214346712211786,
"grad_norm": 2.7833762168884277,
"learning_rate": 3.5e-06,
"loss": 1.0511,
"step": 15
},
{
"epoch": 0.025619128949615714,
"grad_norm": 1.1198874711990356,
"learning_rate": 4.75e-06,
"loss": 0.931,
"step": 20
},
{
"epoch": 0.03202391118701964,
"grad_norm": 1.2183799743652344,
"learning_rate": 4.999659159998194e-06,
"loss": 0.8819,
"step": 25
},
{
"epoch": 0.03842869342442357,
"grad_norm": 0.7168175578117371,
"learning_rate": 4.998274656771894e-06,
"loss": 0.845,
"step": 30
},
{
"epoch": 0.0448334756618275,
"grad_norm": 0.5571187734603882,
"learning_rate": 4.995825777227236e-06,
"loss": 0.7982,
"step": 35
},
{
"epoch": 0.05123825789923143,
"grad_norm": 0.5454768538475037,
"learning_rate": 4.992313564696022e-06,
"loss": 0.7583,
"step": 40
},
{
"epoch": 0.057643040136635355,
"grad_norm": 0.4312361776828766,
"learning_rate": 4.9877395155372815e-06,
"loss": 0.7783,
"step": 45
},
{
"epoch": 0.06404782237403928,
"grad_norm": 0.34762468934059143,
"learning_rate": 4.982105578499759e-06,
"loss": 0.7645,
"step": 50
},
{
"epoch": 0.07045260461144322,
"grad_norm": 0.34647414088249207,
"learning_rate": 4.975414153891664e-06,
"loss": 0.7397,
"step": 55
},
{
"epoch": 0.07685738684884715,
"grad_norm": 0.33078256249427795,
"learning_rate": 4.967668092558024e-06,
"loss": 0.7325,
"step": 60
},
{
"epoch": 0.08326216908625107,
"grad_norm": 0.3410351872444153,
"learning_rate": 4.9588706946661066e-06,
"loss": 0.7037,
"step": 65
},
{
"epoch": 0.089666951323655,
"grad_norm": 0.3349353075027466,
"learning_rate": 4.949025708299395e-06,
"loss": 0.6928,
"step": 70
},
{
"epoch": 0.09607173356105893,
"grad_norm": 0.32335689663887024,
"learning_rate": 4.93813732786074e-06,
"loss": 0.6764,
"step": 75
},
{
"epoch": 0.10247651579846286,
"grad_norm": 0.33923929929733276,
"learning_rate": 4.926210192285359e-06,
"loss": 0.7398,
"step": 80
},
{
"epoch": 0.10888129803586678,
"grad_norm": 0.32697415351867676,
"learning_rate": 4.913249383064438e-06,
"loss": 0.6927,
"step": 85
},
{
"epoch": 0.11528608027327071,
"grad_norm": 0.34032362699508667,
"learning_rate": 4.899260422080195e-06,
"loss": 0.6909,
"step": 90
},
{
"epoch": 0.12169086251067464,
"grad_norm": 0.3949951231479645,
"learning_rate": 4.884249269253309e-06,
"loss": 0.6517,
"step": 95
},
{
"epoch": 0.12809564474807855,
"grad_norm": 0.40848004817962646,
"learning_rate": 4.868222320003731e-06,
"loss": 0.6625,
"step": 100
},
{
"epoch": 0.1345004269854825,
"grad_norm": 0.36571186780929565,
"learning_rate": 4.851186402525946e-06,
"loss": 0.6693,
"step": 105
},
{
"epoch": 0.14090520922288644,
"grad_norm": 0.33602604269981384,
"learning_rate": 4.8331487748798636e-06,
"loss": 0.6699,
"step": 110
},
{
"epoch": 0.14730999146029036,
"grad_norm": 0.3456333875656128,
"learning_rate": 4.814117121898554e-06,
"loss": 0.6265,
"step": 115
},
{
"epoch": 0.1537147736976943,
"grad_norm": 0.2806994915008545,
"learning_rate": 4.794099551914173e-06,
"loss": 0.6181,
"step": 120
},
{
"epoch": 0.16011955593509822,
"grad_norm": 0.352857381105423,
"learning_rate": 4.773104593303449e-06,
"loss": 0.6869,
"step": 125
},
{
"epoch": 0.16652433817250215,
"grad_norm": 0.3353271782398224,
"learning_rate": 4.751141190854214e-06,
"loss": 0.6184,
"step": 130
},
{
"epoch": 0.17292912040990607,
"grad_norm": 0.39950162172317505,
"learning_rate": 4.728218701954525e-06,
"loss": 0.6631,
"step": 135
},
{
"epoch": 0.17933390264731,
"grad_norm": 0.3852960169315338,
"learning_rate": 4.704346892606001e-06,
"loss": 0.6077,
"step": 140
},
{
"epoch": 0.18573868488471393,
"grad_norm": 0.3433144688606262,
"learning_rate": 4.6795359332630694e-06,
"loss": 0.6487,
"step": 145
},
{
"epoch": 0.19214346712211786,
"grad_norm": 0.437122106552124,
"learning_rate": 4.653796394499904e-06,
"loss": 0.6284,
"step": 150
},
{
"epoch": 0.19854824935952178,
"grad_norm": 0.3337637484073639,
"learning_rate": 4.627139242506882e-06,
"loss": 0.6177,
"step": 155
},
{
"epoch": 0.2049530315969257,
"grad_norm": 0.3271448016166687,
"learning_rate": 4.599575834418505e-06,
"loss": 0.6604,
"step": 160
},
{
"epoch": 0.21135781383432964,
"grad_norm": 0.4029073715209961,
"learning_rate": 4.571117913474749e-06,
"loss": 0.6151,
"step": 165
},
{
"epoch": 0.21776259607173357,
"grad_norm": 0.35433897376060486,
"learning_rate": 4.541777604017924e-06,
"loss": 0.5941,
"step": 170
},
{
"epoch": 0.2241673783091375,
"grad_norm": 0.40683719515800476,
"learning_rate": 4.511567406327162e-06,
"loss": 0.6196,
"step": 175
},
{
"epoch": 0.23057216054654142,
"grad_norm": 0.41791296005249023,
"learning_rate": 4.480500191292744e-06,
"loss": 0.6104,
"step": 180
},
{
"epoch": 0.23697694278394535,
"grad_norm": 0.3712579011917114,
"learning_rate": 4.448589194932521e-06,
"loss": 0.6091,
"step": 185
},
{
"epoch": 0.24338172502134928,
"grad_norm": 0.3512028157711029,
"learning_rate": 4.415848012752789e-06,
"loss": 0.5894,
"step": 190
},
{
"epoch": 0.2497865072587532,
"grad_norm": 0.3832685649394989,
"learning_rate": 4.38229059395599e-06,
"loss": 0.6143,
"step": 195
},
{
"epoch": 0.2561912894961571,
"grad_norm": 0.3921290338039398,
"learning_rate": 4.347931235497738e-06,
"loss": 0.6161,
"step": 200
},
{
"epoch": 0.26259607173356103,
"grad_norm": 0.40466588735580444,
"learning_rate": 4.312784575995669e-06,
"loss": 0.6008,
"step": 205
},
{
"epoch": 0.269000853970965,
"grad_norm": 0.9247767329216003,
"learning_rate": 4.276865589492747e-06,
"loss": 0.5971,
"step": 210
},
{
"epoch": 0.27540563620836894,
"grad_norm": 0.5003313422203064,
"learning_rate": 4.240189579077649e-06,
"loss": 0.5832,
"step": 215
},
{
"epoch": 0.28181041844577287,
"grad_norm": 0.41465386748313904,
"learning_rate": 4.202772170364969e-06,
"loss": 0.5909,
"step": 220
},
{
"epoch": 0.2882152006831768,
"grad_norm": 0.3520627021789551,
"learning_rate": 4.164629304838012e-06,
"loss": 0.5826,
"step": 225
},
{
"epoch": 0.2946199829205807,
"grad_norm": 0.38733118772506714,
"learning_rate": 4.125777233057007e-06,
"loss": 0.6092,
"step": 230
},
{
"epoch": 0.30102476515798465,
"grad_norm": 0.3622700870037079,
"learning_rate": 4.086232507735648e-06,
"loss": 0.5844,
"step": 235
},
{
"epoch": 0.3074295473953886,
"grad_norm": 0.47380566596984863,
"learning_rate": 4.0460119766889e-06,
"loss": 0.6075,
"step": 240
},
{
"epoch": 0.3138343296327925,
"grad_norm": 0.4540008008480072,
"learning_rate": 4.005132775655076e-06,
"loss": 0.572,
"step": 245
},
{
"epoch": 0.32023911187019644,
"grad_norm": 0.47770172357559204,
"learning_rate": 3.963612320995257e-06,
"loss": 0.6175,
"step": 250
},
{
"epoch": 0.32664389410760036,
"grad_norm": 0.3514467179775238,
"learning_rate": 3.921468302273137e-06,
"loss": 0.5618,
"step": 255
},
{
"epoch": 0.3330486763450043,
"grad_norm": 0.45986905694007874,
"learning_rate": 3.8787186747184826e-06,
"loss": 0.5442,
"step": 260
},
{
"epoch": 0.3394534585824082,
"grad_norm": 0.4583056569099426,
"learning_rate": 3.8353816515774115e-06,
"loss": 0.569,
"step": 265
},
{
"epoch": 0.34585824081981215,
"grad_norm": 0.3687354028224945,
"learning_rate": 3.79147569635273e-06,
"loss": 0.5555,
"step": 270
},
{
"epoch": 0.3522630230572161,
"grad_norm": 0.38063594698905945,
"learning_rate": 3.747019514937663e-06,
"loss": 0.6,
"step": 275
},
{
"epoch": 0.35866780529462,
"grad_norm": 0.430896520614624,
"learning_rate": 3.70203204764631e-06,
"loss": 0.5843,
"step": 280
},
{
"epoch": 0.36507258753202393,
"grad_norm": 0.5169083476066589,
"learning_rate": 3.6565324611442234e-06,
"loss": 0.5914,
"step": 285
},
{
"epoch": 0.37147736976942786,
"grad_norm": 0.36347025632858276,
"learning_rate": 3.6105401402825595e-06,
"loss": 0.5674,
"step": 290
},
{
"epoch": 0.3778821520068318,
"grad_norm": 0.3636574447154999,
"learning_rate": 3.5640746798392657e-06,
"loss": 0.6123,
"step": 295
},
{
"epoch": 0.3842869342442357,
"grad_norm": 0.4619109332561493,
"learning_rate": 3.5171558761708334e-06,
"loss": 0.5708,
"step": 300
},
{
"epoch": 0.39069171648163964,
"grad_norm": 0.447704553604126,
"learning_rate": 3.469803718778166e-06,
"loss": 0.5722,
"step": 305
},
{
"epoch": 0.39709649871904357,
"grad_norm": 0.39746832847595215,
"learning_rate": 3.4220383817901625e-06,
"loss": 0.5772,
"step": 310
},
{
"epoch": 0.4035012809564475,
"grad_norm": 0.4326777160167694,
"learning_rate": 3.3738802153686414e-06,
"loss": 0.5715,
"step": 315
},
{
"epoch": 0.4099060631938514,
"grad_norm": 0.4147851765155792,
"learning_rate": 3.3253497370382605e-06,
"loss": 0.572,
"step": 320
},
{
"epoch": 0.41631084543125535,
"grad_norm": 0.43767350912094116,
"learning_rate": 3.2764676229451397e-06,
"loss": 0.563,
"step": 325
},
{
"epoch": 0.4227156276686593,
"grad_norm": 0.36241263151168823,
"learning_rate": 3.227254699047904e-06,
"loss": 0.5649,
"step": 330
},
{
"epoch": 0.4291204099060632,
"grad_norm": 0.5232857465744019,
"learning_rate": 3.177731932244892e-06,
"loss": 0.5645,
"step": 335
},
{
"epoch": 0.43552519214346713,
"grad_norm": 0.4475226104259491,
"learning_rate": 3.127920421441327e-06,
"loss": 0.5767,
"step": 340
},
{
"epoch": 0.44192997438087106,
"grad_norm": 0.4484921991825104,
"learning_rate": 3.077841388560243e-06,
"loss": 0.591,
"step": 345
},
{
"epoch": 0.448334756618275,
"grad_norm": 0.5250320434570312,
"learning_rate": 3.0275161695009975e-06,
"loss": 0.5814,
"step": 350
},
{
"epoch": 0.4547395388556789,
"grad_norm": 0.47690996527671814,
"learning_rate": 2.9769662050492276e-06,
"loss": 0.5602,
"step": 355
},
{
"epoch": 0.46114432109308284,
"grad_norm": 0.4651663899421692,
"learning_rate": 2.926213031742125e-06,
"loss": 0.5741,
"step": 360
},
{
"epoch": 0.46754910333048677,
"grad_norm": 0.46296215057373047,
"learning_rate": 2.8752782726929045e-06,
"loss": 0.5614,
"step": 365
},
{
"epoch": 0.4739538855678907,
"grad_norm": 0.5162904262542725,
"learning_rate": 2.8241836283784026e-06,
"loss": 0.5483,
"step": 370
},
{
"epoch": 0.4803586678052946,
"grad_norm": 0.3958864212036133,
"learning_rate": 2.7729508673936972e-06,
"loss": 0.5745,
"step": 375
},
{
"epoch": 0.48676345004269855,
"grad_norm": 0.4186757504940033,
"learning_rate": 2.721601817177725e-06,
"loss": 0.5459,
"step": 380
},
{
"epoch": 0.4931682322801025,
"grad_norm": 0.4372413456439972,
"learning_rate": 2.6701583547138165e-06,
"loss": 0.5852,
"step": 385
},
{
"epoch": 0.4995730145175064,
"grad_norm": 0.4488023519515991,
"learning_rate": 2.618642397209126e-06,
"loss": 0.5427,
"step": 390
},
{
"epoch": 0.5059777967549103,
"grad_norm": 0.4278182089328766,
"learning_rate": 2.567075892756924e-06,
"loss": 0.5586,
"step": 395
},
{
"epoch": 0.5123825789923142,
"grad_norm": 0.48016875982284546,
"learning_rate": 2.5154808109857367e-06,
"loss": 0.5405,
"step": 400
},
{
"epoch": 0.5187873612297181,
"grad_norm": 0.5077680945396423,
"learning_rate": 2.4638791336992967e-06,
"loss": 0.5682,
"step": 405
},
{
"epoch": 0.5251921434671221,
"grad_norm": 0.5091099739074707,
"learning_rate": 2.4122928455113233e-06,
"loss": 0.5619,
"step": 410
},
{
"epoch": 0.531596925704526,
"grad_norm": 0.4333205223083496,
"learning_rate": 2.360743924479093e-06,
"loss": 0.5879,
"step": 415
},
{
"epoch": 0.53800170794193,
"grad_norm": 0.4178122282028198,
"learning_rate": 2.3092543327398083e-06,
"loss": 0.5332,
"step": 420
},
{
"epoch": 0.544406490179334,
"grad_norm": 0.4080513119697571,
"learning_rate": 2.2578460071537512e-06,
"loss": 0.5728,
"step": 425
},
{
"epoch": 0.5508112724167379,
"grad_norm": 0.48982349038124084,
"learning_rate": 2.2065408499582e-06,
"loss": 0.575,
"step": 430
},
{
"epoch": 0.5572160546541418,
"grad_norm": 0.4953416883945465,
"learning_rate": 2.155360719436102e-06,
"loss": 0.5404,
"step": 435
},
{
"epoch": 0.5636208368915457,
"grad_norm": 0.4608188271522522,
"learning_rate": 2.1043274206034727e-06,
"loss": 0.5579,
"step": 440
},
{
"epoch": 0.5700256191289497,
"grad_norm": 0.48403236269950867,
"learning_rate": 2.0534626959194816e-06,
"loss": 0.5383,
"step": 445
},
{
"epoch": 0.5764304013663536,
"grad_norm": 0.4532581865787506,
"learning_rate": 2.002788216023203e-06,
"loss": 0.5638,
"step": 450
},
{
"epoch": 0.5828351836037575,
"grad_norm": 0.53521728515625,
"learning_rate": 1.9523255705009558e-06,
"loss": 0.5549,
"step": 455
},
{
"epoch": 0.5892399658411615,
"grad_norm": 0.4711097180843353,
"learning_rate": 1.902096258688174e-06,
"loss": 0.5027,
"step": 460
},
{
"epoch": 0.5956447480785654,
"grad_norm": 0.43662044405937195,
"learning_rate": 1.8521216805097358e-06,
"loss": 0.556,
"step": 465
},
{
"epoch": 0.6020495303159693,
"grad_norm": 0.3957918882369995,
"learning_rate": 1.8024231273626424e-06,
"loss": 0.5596,
"step": 470
},
{
"epoch": 0.6084543125533732,
"grad_norm": 0.5218236446380615,
"learning_rate": 1.7530217730449312e-06,
"loss": 0.5405,
"step": 475
},
{
"epoch": 0.6148590947907772,
"grad_norm": 0.4223135709762573,
"learning_rate": 1.7039386647346975e-06,
"loss": 0.5279,
"step": 480
},
{
"epoch": 0.6212638770281811,
"grad_norm": 0.3835909068584442,
"learning_rate": 1.6551947140230568e-06,
"loss": 0.5747,
"step": 485
},
{
"epoch": 0.627668659265585,
"grad_norm": 0.5082884430885315,
"learning_rate": 1.6068106880048747e-06,
"loss": 0.5518,
"step": 490
},
{
"epoch": 0.6340734415029889,
"grad_norm": 0.4860563278198242,
"learning_rate": 1.5588072004310634e-06,
"loss": 0.5641,
"step": 495
},
{
"epoch": 0.6404782237403929,
"grad_norm": 0.4176677167415619,
"learning_rate": 1.5112047029262e-06,
"loss": 0.5547,
"step": 500
},
{
"epoch": 0.6468830059777968,
"grad_norm": 0.3659776747226715,
"learning_rate": 1.4640234762752248e-06,
"loss": 0.5503,
"step": 505
},
{
"epoch": 0.6532877882152007,
"grad_norm": 0.4908987283706665,
"learning_rate": 1.4172836217829267e-06,
"loss": 0.5549,
"step": 510
},
{
"epoch": 0.6596925704526047,
"grad_norm": 0.44962796568870544,
"learning_rate": 1.3710050527098867e-06,
"loss": 0.573,
"step": 515
},
{
"epoch": 0.6660973526900086,
"grad_norm": 0.4549601376056671,
"learning_rate": 1.3252074857885453e-06,
"loss": 0.5666,
"step": 520
},
{
"epoch": 0.6725021349274125,
"grad_norm": 0.48955774307250977,
"learning_rate": 1.2799104328229928e-06,
"loss": 0.5379,
"step": 525
},
{
"epoch": 0.6789069171648164,
"grad_norm": 0.45902734994888306,
"learning_rate": 1.2351331923760743e-06,
"loss": 0.5345,
"step": 530
},
{
"epoch": 0.6853116994022204,
"grad_norm": 0.49846968054771423,
"learning_rate": 1.1908948415473418e-06,
"loss": 0.5367,
"step": 535
},
{
"epoch": 0.6917164816396243,
"grad_norm": 0.48370206356048584,
"learning_rate": 1.1472142278453582e-06,
"loss": 0.5325,
"step": 540
},
{
"epoch": 0.6981212638770282,
"grad_norm": 0.3830443024635315,
"learning_rate": 1.1041099611578177e-06,
"loss": 0.5585,
"step": 545
},
{
"epoch": 0.7045260461144321,
"grad_norm": 0.47550487518310547,
"learning_rate": 1.0616004058229084e-06,
"loss": 0.5417,
"step": 550
},
{
"epoch": 0.7109308283518361,
"grad_norm": 0.46026965975761414,
"learning_rate": 1.0197036728052847e-06,
"loss": 0.5715,
"step": 555
},
{
"epoch": 0.71733561058924,
"grad_norm": 0.42247724533081055,
"learning_rate": 9.784376119799851e-07,
"loss": 0.5459,
"step": 560
},
{
"epoch": 0.7237403928266439,
"grad_norm": 0.5001282095909119,
"learning_rate": 9.378198045275968e-07,
"loss": 0.5557,
"step": 565
},
{
"epoch": 0.7301451750640479,
"grad_norm": 0.4762704372406006,
"learning_rate": 8.97867555443886e-07,
"loss": 0.5338,
"step": 570
},
{
"epoch": 0.7365499573014518,
"grad_norm": 0.48811063170433044,
"learning_rate": 8.585978861670958e-07,
"loss": 0.5331,
"step": 575
},
{
"epoch": 0.7429547395388557,
"grad_norm": 0.45258718729019165,
"learning_rate": 8.200275273260611e-07,
"loss": 0.5461,
"step": 580
},
{
"epoch": 0.7493595217762596,
"grad_norm": 0.4314691424369812,
"learning_rate": 7.821729116122126e-07,
"loss": 0.558,
"step": 585
},
{
"epoch": 0.7557643040136636,
"grad_norm": 0.4526233673095703,
"learning_rate": 7.450501667785146e-07,
"loss": 0.5455,
"step": 590
},
{
"epoch": 0.7621690862510675,
"grad_norm": 0.4625132977962494,
"learning_rate": 7.086751087683297e-07,
"loss": 0.5514,
"step": 595
},
{
"epoch": 0.7685738684884714,
"grad_norm": 0.4986107349395752,
"learning_rate": 6.730632349771193e-07,
"loss": 0.5566,
"step": 600
},
{
"epoch": 0.7749786507258754,
"grad_norm": 0.5132951140403748,
"learning_rate": 6.3822971764986e-07,
"loss": 0.5363,
"step": 605
},
{
"epoch": 0.7813834329632793,
"grad_norm": 0.48895248770713806,
"learning_rate": 6.041893974169963e-07,
"loss": 0.5382,
"step": 610
},
{
"epoch": 0.7877882152006832,
"grad_norm": 0.48889264464378357,
"learning_rate": 5.709567769716678e-07,
"loss": 0.5511,
"step": 615
},
{
"epoch": 0.7941929974380871,
"grad_norm": 0.4542140066623688,
"learning_rate": 5.385460148909169e-07,
"loss": 0.5227,
"step": 620
},
{
"epoch": 0.8005977796754911,
"grad_norm": 0.48940637707710266,
"learning_rate": 5.069709196035011e-07,
"loss": 0.5519,
"step": 625
},
{
"epoch": 0.807002561912895,
"grad_norm": 0.45722976326942444,
"learning_rate": 4.762449435068914e-07,
"loss": 0.5358,
"step": 630
},
{
"epoch": 0.8134073441502989,
"grad_norm": 0.5042068958282471,
"learning_rate": 4.4638117723595054e-07,
"loss": 0.5686,
"step": 635
},
{
"epoch": 0.8198121263877028,
"grad_norm": 0.4974375069141388,
"learning_rate": 4.173923440857358e-07,
"loss": 0.5528,
"step": 640
},
{
"epoch": 0.8262169086251068,
"grad_norm": 0.4234403669834137,
"learning_rate": 3.892907945908128e-07,
"loss": 0.5305,
"step": 645
},
{
"epoch": 0.8326216908625107,
"grad_norm": 0.5144878029823303,
"learning_rate": 3.6208850126337595e-07,
"loss": 0.5282,
"step": 650
},
{
"epoch": 0.8390264730999146,
"grad_norm": 0.41059333086013794,
"learning_rate": 3.357970534924229e-07,
"loss": 0.5601,
"step": 655
},
{
"epoch": 0.8454312553373186,
"grad_norm": 0.40885528922080994,
"learning_rate": 3.104276526061617e-07,
"loss": 0.536,
"step": 660
},
{
"epoch": 0.8518360375747225,
"grad_norm": 0.462971568107605,
"learning_rate": 2.859911070997437e-07,
"loss": 0.5513,
"step": 665
},
{
"epoch": 0.8582408198121264,
"grad_norm": 0.6165898442268372,
"learning_rate": 2.624978280303628e-07,
"loss": 0.5542,
"step": 670
},
{
"epoch": 0.8646456020495303,
"grad_norm": 0.514519453048706,
"learning_rate": 2.3995782458168276e-07,
"loss": 0.5572,
"step": 675
},
{
"epoch": 0.8710503842869343,
"grad_norm": 0.5139626264572144,
"learning_rate": 2.1838069979947945e-07,
"loss": 0.5372,
"step": 680
},
{
"epoch": 0.8774551665243382,
"grad_norm": 1.6515536308288574,
"learning_rate": 1.9777564650031112e-07,
"loss": 0.5515,
"step": 685
},
{
"epoch": 0.8838599487617421,
"grad_norm": 0.4731055200099945,
"learning_rate": 1.7815144335497524e-07,
"loss": 0.5515,
"step": 690
},
{
"epoch": 0.890264730999146,
"grad_norm": 0.5183550715446472,
"learning_rate": 1.5951645114839875e-07,
"loss": 0.5419,
"step": 695
},
{
"epoch": 0.89666951323655,
"grad_norm": 0.5357317328453064,
"learning_rate": 1.4187860921757252e-07,
"loss": 0.5571,
"step": 700
},
{
"epoch": 0.9030742954739539,
"grad_norm": 0.5177751779556274,
"learning_rate": 1.2524543206904188e-07,
"loss": 0.5607,
"step": 705
},
{
"epoch": 0.9094790777113578,
"grad_norm": 0.4790054261684418,
"learning_rate": 1.0962400617738872e-07,
"loss": 0.581,
"step": 710
},
{
"epoch": 0.9158838599487618,
"grad_norm": 0.5255675911903381,
"learning_rate": 9.502098696608147e-08,
"loss": 0.5449,
"step": 715
},
{
"epoch": 0.9222886421861657,
"grad_norm": 0.38730135560035706,
"learning_rate": 8.144259597196308e-08,
"loss": 0.5518,
"step": 720
},
{
"epoch": 0.9286934244235696,
"grad_norm": 0.42933622002601624,
"learning_rate": 6.889461819460485e-08,
"loss": 0.5365,
"step": 725
},
{
"epoch": 0.9350982066609735,
"grad_norm": 0.50970458984375,
"learning_rate": 5.738239963163472e-08,
"loss": 0.5282,
"step": 730
},
{
"epoch": 0.9415029888983775,
"grad_norm": 0.5318973064422607,
"learning_rate": 4.691084500110521e-08,
"loss": 0.5281,
"step": 735
},
{
"epoch": 0.9479077711357814,
"grad_norm": 0.4877215623855591,
"learning_rate": 3.748441565186583e-08,
"loss": 0.5136,
"step": 740
},
{
"epoch": 0.9543125533731853,
"grad_norm": 0.5620718002319336,
"learning_rate": 2.910712766282908e-08,
"loss": 0.5385,
"step": 745
},
{
"epoch": 0.9607173356105893,
"grad_norm": 0.5282920598983765,
"learning_rate": 2.178255013194075e-08,
"loss": 0.5296,
"step": 750
},
{
"epoch": 0.9671221178479932,
"grad_norm": 0.422025591135025,
"learning_rate": 1.5513803655587966e-08,
"loss": 0.5131,
"step": 755
},
{
"epoch": 0.9735269000853971,
"grad_norm": 0.5139475464820862,
"learning_rate": 1.0303558999082974e-08,
"loss": 0.5625,
"step": 760
},
{
"epoch": 0.979931682322801,
"grad_norm": 0.48410946130752563,
"learning_rate": 6.1540359588005416e-09,
"loss": 0.5286,
"step": 765
},
{
"epoch": 0.986336464560205,
"grad_norm": 0.4075927138328552,
"learning_rate": 3.067002416444198e-09,
"loss": 0.5113,
"step": 770
},
{
"epoch": 0.9927412467976089,
"grad_norm": 0.4521820545196533,
"learning_rate": 1.0437735858506715e-09,
"loss": 0.5399,
"step": 775
},
{
"epoch": 0.9991460290350128,
"grad_norm": 0.41308000683784485,
"learning_rate": 8.521145264978048e-11,
"loss": 0.5787,
"step": 780
},
{
"epoch": 1.0,
"step": 781,
"total_flos": 7.995150581897871e+17,
"train_loss": 0.5977290161287891,
"train_runtime": 6942.3995,
"train_samples_per_second": 1.349,
"train_steps_per_second": 0.112
}
],
"logging_steps": 5,
"max_steps": 781,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 50,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 7.995150581897871e+17,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}