klora_2000_skill / 21 /trainer_state.json
RayDu0010's picture
Upload folder using huggingface_hub
3d91327 verified
raw
history blame
26 kB
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 2.0,
"eval_steps": 500,
"global_step": 730,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.013717421124828532,
"grad_norm": 1.2567085027694702,
"learning_rate": 1.3043478260869566e-06,
"loss": 1.2424,
"step": 5
},
{
"epoch": 0.027434842249657063,
"grad_norm": 1.181019902229309,
"learning_rate": 2.9347826086956523e-06,
"loss": 1.2733,
"step": 10
},
{
"epoch": 0.0411522633744856,
"grad_norm": 0.6036953926086426,
"learning_rate": 4.565217391304348e-06,
"loss": 1.2413,
"step": 15
},
{
"epoch": 0.05486968449931413,
"grad_norm": 0.8439048528671265,
"learning_rate": 6.195652173913044e-06,
"loss": 1.2214,
"step": 20
},
{
"epoch": 0.06858710562414266,
"grad_norm": 0.5841037631034851,
"learning_rate": 7.826086956521738e-06,
"loss": 1.2187,
"step": 25
},
{
"epoch": 0.0823045267489712,
"grad_norm": 0.6085624694824219,
"learning_rate": 9.456521739130436e-06,
"loss": 1.1609,
"step": 30
},
{
"epoch": 0.09602194787379972,
"grad_norm": 0.6230579018592834,
"learning_rate": 1.108695652173913e-05,
"loss": 1.1653,
"step": 35
},
{
"epoch": 0.10973936899862825,
"grad_norm": 0.5125408172607422,
"learning_rate": 1.2717391304347827e-05,
"loss": 1.1442,
"step": 40
},
{
"epoch": 0.12345679012345678,
"grad_norm": 0.4418923258781433,
"learning_rate": 1.4347826086956522e-05,
"loss": 1.135,
"step": 45
},
{
"epoch": 0.13717421124828533,
"grad_norm": 0.39918941259384155,
"learning_rate": 1.597826086956522e-05,
"loss": 1.15,
"step": 50
},
{
"epoch": 0.15089163237311384,
"grad_norm": 0.4432643949985504,
"learning_rate": 1.7608695652173915e-05,
"loss": 1.1159,
"step": 55
},
{
"epoch": 0.1646090534979424,
"grad_norm": 0.7146240472793579,
"learning_rate": 1.9239130434782607e-05,
"loss": 1.1433,
"step": 60
},
{
"epoch": 0.17832647462277093,
"grad_norm": 0.6695220470428467,
"learning_rate": 2.0869565217391306e-05,
"loss": 1.1032,
"step": 65
},
{
"epoch": 0.19204389574759945,
"grad_norm": 0.489704966545105,
"learning_rate": 2.25e-05,
"loss": 1.0569,
"step": 70
},
{
"epoch": 0.205761316872428,
"grad_norm": 0.4658520817756653,
"learning_rate": 2.4130434782608697e-05,
"loss": 1.0715,
"step": 75
},
{
"epoch": 0.2194787379972565,
"grad_norm": 0.5942860245704651,
"learning_rate": 2.5760869565217392e-05,
"loss": 1.0534,
"step": 80
},
{
"epoch": 0.23319615912208505,
"grad_norm": 0.48524120450019836,
"learning_rate": 2.7391304347826085e-05,
"loss": 1.1362,
"step": 85
},
{
"epoch": 0.24691358024691357,
"grad_norm": 0.41874566674232483,
"learning_rate": 2.9021739130434783e-05,
"loss": 1.0455,
"step": 90
},
{
"epoch": 0.2606310013717421,
"grad_norm": 0.49967625737190247,
"learning_rate": 2.999990141214925e-05,
"loss": 1.0053,
"step": 95
},
{
"epoch": 0.27434842249657065,
"grad_norm": 0.529108464717865,
"learning_rate": 2.999879231371134e-05,
"loss": 1.0228,
"step": 100
},
{
"epoch": 0.2880658436213992,
"grad_norm": 0.5769373774528503,
"learning_rate": 2.9996450973444988e-05,
"loss": 1.0495,
"step": 105
},
{
"epoch": 0.3017832647462277,
"grad_norm": 0.6768115162849426,
"learning_rate": 2.999287758370551e-05,
"loss": 0.9451,
"step": 110
},
{
"epoch": 0.31550068587105623,
"grad_norm": 0.6151679754257202,
"learning_rate": 2.998807243806856e-05,
"loss": 1.0238,
"step": 115
},
{
"epoch": 0.3292181069958848,
"grad_norm": 0.6618012189865112,
"learning_rate": 2.998203593130602e-05,
"loss": 1.0144,
"step": 120
},
{
"epoch": 0.3429355281207133,
"grad_norm": 0.5754362344741821,
"learning_rate": 2.9974768559353564e-05,
"loss": 0.9812,
"step": 125
},
{
"epoch": 0.35665294924554186,
"grad_norm": 0.614632785320282,
"learning_rate": 2.99662709192699e-05,
"loss": 0.9152,
"step": 130
},
{
"epoch": 0.37037037037037035,
"grad_norm": 0.5445948839187622,
"learning_rate": 2.995654370918775e-05,
"loss": 1.0159,
"step": 135
},
{
"epoch": 0.3840877914951989,
"grad_norm": 0.592310905456543,
"learning_rate": 2.9945587728256456e-05,
"loss": 0.9158,
"step": 140
},
{
"epoch": 0.39780521262002744,
"grad_norm": 0.6413975954055786,
"learning_rate": 2.9933403876576364e-05,
"loss": 0.938,
"step": 145
},
{
"epoch": 0.411522633744856,
"grad_norm": 0.5976924300193787,
"learning_rate": 2.9919993155124834e-05,
"loss": 0.8984,
"step": 150
},
{
"epoch": 0.4252400548696845,
"grad_norm": 0.6372507810592651,
"learning_rate": 2.990535666567403e-05,
"loss": 0.8719,
"step": 155
},
{
"epoch": 0.438957475994513,
"grad_norm": 0.5930208563804626,
"learning_rate": 2.9889495610700416e-05,
"loss": 0.9289,
"step": 160
},
{
"epoch": 0.45267489711934156,
"grad_norm": 0.6256594061851501,
"learning_rate": 2.9872411293285916e-05,
"loss": 0.852,
"step": 165
},
{
"epoch": 0.4663923182441701,
"grad_norm": 0.6492271423339844,
"learning_rate": 2.985410511701092e-05,
"loss": 0.8581,
"step": 170
},
{
"epoch": 0.48010973936899864,
"grad_norm": 0.7370251417160034,
"learning_rate": 2.9834578585838907e-05,
"loss": 0.9052,
"step": 175
},
{
"epoch": 0.49382716049382713,
"grad_norm": 0.7324991226196289,
"learning_rate": 2.9813833303992948e-05,
"loss": 0.8242,
"step": 180
},
{
"epoch": 0.5075445816186557,
"grad_norm": 0.7410762906074524,
"learning_rate": 2.979187097582386e-05,
"loss": 0.8182,
"step": 185
},
{
"epoch": 0.5212620027434842,
"grad_norm": 0.6918640732765198,
"learning_rate": 2.976869340567021e-05,
"loss": 0.838,
"step": 190
},
{
"epoch": 0.5349794238683128,
"grad_norm": 2.690075159072876,
"learning_rate": 2.9744302497710076e-05,
"loss": 0.8393,
"step": 195
},
{
"epoch": 0.5486968449931413,
"grad_norm": 0.7638638019561768,
"learning_rate": 2.9718700255804588e-05,
"loss": 0.8171,
"step": 200
},
{
"epoch": 0.5624142661179699,
"grad_norm": 0.8737165331840515,
"learning_rate": 2.969188878333332e-05,
"loss": 0.8264,
"step": 205
},
{
"epoch": 0.5761316872427984,
"grad_norm": 0.7183387875556946,
"learning_rate": 2.9663870283021477e-05,
"loss": 0.8421,
"step": 210
},
{
"epoch": 0.5898491083676269,
"grad_norm": 0.8239722847938538,
"learning_rate": 2.9634647056758927e-05,
"loss": 0.7993,
"step": 215
},
{
"epoch": 0.6035665294924554,
"grad_norm": 0.8498063683509827,
"learning_rate": 2.960422150541109e-05,
"loss": 0.8239,
"step": 220
},
{
"epoch": 0.6172839506172839,
"grad_norm": 1.0519356727600098,
"learning_rate": 2.9572596128621683e-05,
"loss": 0.7706,
"step": 225
},
{
"epoch": 0.6310013717421125,
"grad_norm": 0.844680666923523,
"learning_rate": 2.9539773524607373e-05,
"loss": 0.7471,
"step": 230
},
{
"epoch": 0.644718792866941,
"grad_norm": 0.7600374221801758,
"learning_rate": 2.95057563899443e-05,
"loss": 0.7894,
"step": 235
},
{
"epoch": 0.6584362139917695,
"grad_norm": 0.8541322946548462,
"learning_rate": 2.947054751934656e-05,
"loss": 0.7903,
"step": 240
},
{
"epoch": 0.6721536351165981,
"grad_norm": 0.7854955196380615,
"learning_rate": 2.9434149805436586e-05,
"loss": 0.7754,
"step": 245
},
{
"epoch": 0.6858710562414266,
"grad_norm": 0.7242818474769592,
"learning_rate": 2.9396566238507496e-05,
"loss": 0.7455,
"step": 250
},
{
"epoch": 0.6995884773662552,
"grad_norm": 0.9281851053237915,
"learning_rate": 2.935779990627744e-05,
"loss": 0.7562,
"step": 255
},
{
"epoch": 0.7133058984910837,
"grad_norm": 0.8336049318313599,
"learning_rate": 2.931785399363592e-05,
"loss": 0.7557,
"step": 260
},
{
"epoch": 0.7270233196159122,
"grad_norm": 0.967589795589447,
"learning_rate": 2.9276731782382123e-05,
"loss": 0.7062,
"step": 265
},
{
"epoch": 0.7407407407407407,
"grad_norm": 0.8542262315750122,
"learning_rate": 2.9234436650955297e-05,
"loss": 0.7184,
"step": 270
},
{
"epoch": 0.7544581618655692,
"grad_norm": 0.9164705276489258,
"learning_rate": 2.9190972074157232e-05,
"loss": 0.6814,
"step": 275
},
{
"epoch": 0.7681755829903978,
"grad_norm": 0.8561894297599792,
"learning_rate": 2.9146341622866716e-05,
"loss": 0.6944,
"step": 280
},
{
"epoch": 0.7818930041152263,
"grad_norm": 0.9264963865280151,
"learning_rate": 2.910054896374623e-05,
"loss": 0.6571,
"step": 285
},
{
"epoch": 0.7956104252400549,
"grad_norm": 0.8652317523956299,
"learning_rate": 2.9053597858940666e-05,
"loss": 0.7355,
"step": 290
},
{
"epoch": 0.8093278463648834,
"grad_norm": 0.9738150835037231,
"learning_rate": 2.9005492165768278e-05,
"loss": 0.6453,
"step": 295
},
{
"epoch": 0.823045267489712,
"grad_norm": 1.012303352355957,
"learning_rate": 2.895623583640375e-05,
"loss": 0.6413,
"step": 300
},
{
"epoch": 0.8367626886145405,
"grad_norm": 0.8940839767456055,
"learning_rate": 2.890583291755351e-05,
"loss": 0.7145,
"step": 305
},
{
"epoch": 0.850480109739369,
"grad_norm": 0.8137922883033752,
"learning_rate": 2.8854287550123278e-05,
"loss": 0.6835,
"step": 310
},
{
"epoch": 0.8641975308641975,
"grad_norm": 0.8530572056770325,
"learning_rate": 2.880160396887787e-05,
"loss": 0.6702,
"step": 315
},
{
"epoch": 0.877914951989026,
"grad_norm": 0.9513605237007141,
"learning_rate": 2.8747786502093258e-05,
"loss": 0.7024,
"step": 320
},
{
"epoch": 0.8916323731138546,
"grad_norm": 0.899515688419342,
"learning_rate": 2.8692839571201e-05,
"loss": 0.6845,
"step": 325
},
{
"epoch": 0.9053497942386831,
"grad_norm": 0.9002428650856018,
"learning_rate": 2.863676769042498e-05,
"loss": 0.6447,
"step": 330
},
{
"epoch": 0.9190672153635117,
"grad_norm": 0.8433024883270264,
"learning_rate": 2.8579575466410566e-05,
"loss": 0.6325,
"step": 335
},
{
"epoch": 0.9327846364883402,
"grad_norm": 0.9360433220863342,
"learning_rate": 2.8521267597846094e-05,
"loss": 0.5826,
"step": 340
},
{
"epoch": 0.9465020576131687,
"grad_norm": 0.9888073205947876,
"learning_rate": 2.8461848875076884e-05,
"loss": 0.6349,
"step": 345
},
{
"epoch": 0.9602194787379973,
"grad_norm": 0.914045512676239,
"learning_rate": 2.8401324179711678e-05,
"loss": 0.5755,
"step": 350
},
{
"epoch": 0.9739368998628258,
"grad_norm": 0.9859684705734253,
"learning_rate": 2.8339698484221574e-05,
"loss": 0.6325,
"step": 355
},
{
"epoch": 0.9876543209876543,
"grad_norm": 0.9974327683448792,
"learning_rate": 2.827697685153151e-05,
"loss": 0.5818,
"step": 360
},
{
"epoch": 1.0,
"grad_norm": 1.362316370010376,
"learning_rate": 2.8213164434604316e-05,
"loss": 0.5783,
"step": 365
},
{
"epoch": 1.0137174211248285,
"grad_norm": 1.4332367181777954,
"learning_rate": 2.814826647601738e-05,
"loss": 0.5402,
"step": 370
},
{
"epoch": 1.027434842249657,
"grad_norm": 1.2112072706222534,
"learning_rate": 2.8082288307531914e-05,
"loss": 0.5368,
"step": 375
},
{
"epoch": 1.0411522633744856,
"grad_norm": 1.2344884872436523,
"learning_rate": 2.8015235349654938e-05,
"loss": 0.5097,
"step": 380
},
{
"epoch": 1.0548696844993142,
"grad_norm": 0.9244922995567322,
"learning_rate": 2.7947113111193936e-05,
"loss": 0.5583,
"step": 385
},
{
"epoch": 1.0685871056241427,
"grad_norm": 0.980315089225769,
"learning_rate": 2.7877927188804288e-05,
"loss": 0.51,
"step": 390
},
{
"epoch": 1.0823045267489713,
"grad_norm": 0.9603582620620728,
"learning_rate": 2.7807683266529466e-05,
"loss": 0.5517,
"step": 395
},
{
"epoch": 1.0960219478737998,
"grad_norm": 1.0928096771240234,
"learning_rate": 2.773638711533405e-05,
"loss": 0.5018,
"step": 400
},
{
"epoch": 1.1097393689986284,
"grad_norm": 1.0439014434814453,
"learning_rate": 2.7664044592629615e-05,
"loss": 0.5139,
"step": 405
},
{
"epoch": 1.123456790123457,
"grad_norm": 0.9458956718444824,
"learning_rate": 2.7590661641793513e-05,
"loss": 0.5583,
"step": 410
},
{
"epoch": 1.1371742112482854,
"grad_norm": 0.9447291493415833,
"learning_rate": 2.7516244291680565e-05,
"loss": 0.4616,
"step": 415
},
{
"epoch": 1.1508916323731138,
"grad_norm": 1.076687216758728,
"learning_rate": 2.7440798656127792e-05,
"loss": 0.4695,
"step": 420
},
{
"epoch": 1.1646090534979423,
"grad_norm": 1.0236420631408691,
"learning_rate": 2.7364330933452094e-05,
"loss": 0.5455,
"step": 425
},
{
"epoch": 1.1783264746227708,
"grad_norm": 1.0733507871627808,
"learning_rate": 2.7286847405941024e-05,
"loss": 0.4956,
"step": 430
},
{
"epoch": 1.1920438957475994,
"grad_norm": 0.9793803095817566,
"learning_rate": 2.720835443933669e-05,
"loss": 0.5202,
"step": 435
},
{
"epoch": 1.205761316872428,
"grad_norm": 1.0080183744430542,
"learning_rate": 2.712885848231273e-05,
"loss": 0.4865,
"step": 440
},
{
"epoch": 1.2194787379972565,
"grad_norm": 0.9846312999725342,
"learning_rate": 2.7048366065944538e-05,
"loss": 0.4843,
"step": 445
},
{
"epoch": 1.233196159122085,
"grad_norm": 1.0748393535614014,
"learning_rate": 2.6966883803172698e-05,
"loss": 0.4714,
"step": 450
},
{
"epoch": 1.2469135802469136,
"grad_norm": 1.0638792514801025,
"learning_rate": 2.6884418388259675e-05,
"loss": 0.5295,
"step": 455
},
{
"epoch": 1.260631001371742,
"grad_norm": 1.0708755254745483,
"learning_rate": 2.6800976596239855e-05,
"loss": 0.4713,
"step": 460
},
{
"epoch": 1.2743484224965707,
"grad_norm": 0.9797709584236145,
"learning_rate": 2.6716565282362928e-05,
"loss": 0.4489,
"step": 465
},
{
"epoch": 1.2880658436213992,
"grad_norm": 1.017024278640747,
"learning_rate": 2.663119138153069e-05,
"loss": 0.4699,
"step": 470
},
{
"epoch": 1.3017832647462277,
"grad_norm": 1.1177973747253418,
"learning_rate": 2.654486190772729e-05,
"loss": 0.4354,
"step": 475
},
{
"epoch": 1.3155006858710563,
"grad_norm": 0.9993549585342407,
"learning_rate": 2.6457583953443022e-05,
"loss": 0.4882,
"step": 480
},
{
"epoch": 1.3292181069958848,
"grad_norm": 1.1262377500534058,
"learning_rate": 2.636936468909158e-05,
"loss": 0.41,
"step": 485
},
{
"epoch": 1.3429355281207134,
"grad_norm": 0.964245080947876,
"learning_rate": 2.628021136242101e-05,
"loss": 0.4364,
"step": 490
},
{
"epoch": 1.356652949245542,
"grad_norm": 1.0607110261917114,
"learning_rate": 2.619013129791823e-05,
"loss": 0.4891,
"step": 495
},
{
"epoch": 1.3703703703703702,
"grad_norm": 1.0203349590301514,
"learning_rate": 2.6099131896207327e-05,
"loss": 0.4341,
"step": 500
},
{
"epoch": 1.3840877914951988,
"grad_norm": 0.9529250860214233,
"learning_rate": 2.6007220633441486e-05,
"loss": 0.4266,
"step": 505
},
{
"epoch": 1.3978052126200273,
"grad_norm": 0.9978516697883606,
"learning_rate": 2.591440506068883e-05,
"loss": 0.434,
"step": 510
},
{
"epoch": 1.4115226337448559,
"grad_norm": 1.03233003616333,
"learning_rate": 2.582069280331204e-05,
"loss": 0.3978,
"step": 515
},
{
"epoch": 1.4252400548696844,
"grad_norm": 1.2834852933883667,
"learning_rate": 2.5726091560341873e-05,
"loss": 0.4496,
"step": 520
},
{
"epoch": 1.438957475994513,
"grad_norm": 1.0178697109222412,
"learning_rate": 2.5630609103844646e-05,
"loss": 0.433,
"step": 525
},
{
"epoch": 1.4526748971193415,
"grad_norm": 1.0839297771453857,
"learning_rate": 2.5534253278283725e-05,
"loss": 0.4494,
"step": 530
},
{
"epoch": 1.46639231824417,
"grad_norm": 0.9672828316688538,
"learning_rate": 2.5437031999875047e-05,
"loss": 0.4438,
"step": 535
},
{
"epoch": 1.4801097393689986,
"grad_norm": 0.9954227805137634,
"learning_rate": 2.533895325593674e-05,
"loss": 0.4253,
"step": 540
},
{
"epoch": 1.4938271604938271,
"grad_norm": 0.9949136972427368,
"learning_rate": 2.5240025104232938e-05,
"loss": 0.4565,
"step": 545
},
{
"epoch": 1.5075445816186557,
"grad_norm": 1.2475636005401611,
"learning_rate": 2.514025567231178e-05,
"loss": 0.397,
"step": 550
},
{
"epoch": 1.5212620027434842,
"grad_norm": 1.0037999153137207,
"learning_rate": 2.5039653156837686e-05,
"loss": 0.3955,
"step": 555
},
{
"epoch": 1.5349794238683128,
"grad_norm": 1.2002379894256592,
"learning_rate": 2.4938225822917932e-05,
"loss": 0.3541,
"step": 560
},
{
"epoch": 1.5486968449931413,
"grad_norm": 1.0180314779281616,
"learning_rate": 2.4835982003423654e-05,
"loss": 0.403,
"step": 565
},
{
"epoch": 1.5624142661179699,
"grad_norm": 1.017683982849121,
"learning_rate": 2.473293009830522e-05,
"loss": 0.4082,
"step": 570
},
{
"epoch": 1.5761316872427984,
"grad_norm": 1.055148720741272,
"learning_rate": 2.4629078573902136e-05,
"loss": 0.4118,
"step": 575
},
{
"epoch": 1.589849108367627,
"grad_norm": 1.082885980606079,
"learning_rate": 2.45244359622475e-05,
"loss": 0.3646,
"step": 580
},
{
"epoch": 1.6035665294924555,
"grad_norm": 0.9864196181297302,
"learning_rate": 2.4419010860367013e-05,
"loss": 0.3726,
"step": 585
},
{
"epoch": 1.617283950617284,
"grad_norm": 1.0728785991668701,
"learning_rate": 2.431281192957271e-05,
"loss": 0.3665,
"step": 590
},
{
"epoch": 1.6310013717421126,
"grad_norm": 1.0340756177902222,
"learning_rate": 2.4205847894751358e-05,
"loss": 0.402,
"step": 595
},
{
"epoch": 1.6447187928669411,
"grad_norm": 0.9732591509819031,
"learning_rate": 2.409812754364768e-05,
"loss": 0.3913,
"step": 600
},
{
"epoch": 1.6584362139917697,
"grad_norm": 0.9975690245628357,
"learning_rate": 2.398965972614235e-05,
"loss": 0.356,
"step": 605
},
{
"epoch": 1.6721536351165982,
"grad_norm": 1.0766946077346802,
"learning_rate": 2.3880453353524963e-05,
"loss": 0.3921,
"step": 610
},
{
"epoch": 1.6858710562414267,
"grad_norm": 1.0128309726715088,
"learning_rate": 2.377051739776189e-05,
"loss": 0.4264,
"step": 615
},
{
"epoch": 1.6995884773662553,
"grad_norm": 1.0055245161056519,
"learning_rate": 2.3659860890759184e-05,
"loss": 0.3296,
"step": 620
},
{
"epoch": 1.7133058984910838,
"grad_norm": 1.058605432510376,
"learning_rate": 2.3548492923620567e-05,
"loss": 0.3617,
"step": 625
},
{
"epoch": 1.7270233196159122,
"grad_norm": 1.0829980373382568,
"learning_rate": 2.343642264590051e-05,
"loss": 0.3801,
"step": 630
},
{
"epoch": 1.7407407407407407,
"grad_norm": 1.2426400184631348,
"learning_rate": 2.3323659264852586e-05,
"loss": 0.3491,
"step": 635
},
{
"epoch": 1.7544581618655692,
"grad_norm": 1.0566037893295288,
"learning_rate": 2.3210212044672995e-05,
"loss": 0.3547,
"step": 640
},
{
"epoch": 1.7681755829903978,
"grad_norm": 0.9220610857009888,
"learning_rate": 2.3096090305739476e-05,
"loss": 0.3682,
"step": 645
},
{
"epoch": 1.7818930041152263,
"grad_norm": 1.124056100845337,
"learning_rate": 2.298130342384559e-05,
"loss": 0.3236,
"step": 650
},
{
"epoch": 1.7956104252400549,
"grad_norm": 1.10678231716156,
"learning_rate": 2.2865860829430405e-05,
"loss": 0.359,
"step": 655
},
{
"epoch": 1.8093278463648834,
"grad_norm": 1.1028835773468018,
"learning_rate": 2.2749772006803782e-05,
"loss": 0.3276,
"step": 660
},
{
"epoch": 1.823045267489712,
"grad_norm": 0.9391087889671326,
"learning_rate": 2.2633046493367128e-05,
"loss": 0.3589,
"step": 665
},
{
"epoch": 1.8367626886145405,
"grad_norm": 1.010254979133606,
"learning_rate": 2.2515693878829872e-05,
"loss": 0.3517,
"step": 670
},
{
"epoch": 1.850480109739369,
"grad_norm": 1.0423787832260132,
"learning_rate": 2.2397723804421613e-05,
"loss": 0.32,
"step": 675
},
{
"epoch": 1.8641975308641974,
"grad_norm": 1.1941534280776978,
"learning_rate": 2.227914596210002e-05,
"loss": 0.3128,
"step": 680
},
{
"epoch": 1.877914951989026,
"grad_norm": 0.9325462579727173,
"learning_rate": 2.2159970093754583e-05,
"loss": 0.3595,
"step": 685
},
{
"epoch": 1.8916323731138545,
"grad_norm": 1.2271301746368408,
"learning_rate": 2.2040205990406257e-05,
"loss": 0.3118,
"step": 690
},
{
"epoch": 1.905349794238683,
"grad_norm": 0.9768636226654053,
"learning_rate": 2.1919863491403083e-05,
"loss": 0.3858,
"step": 695
},
{
"epoch": 1.9190672153635115,
"grad_norm": 1.3226031064987183,
"learning_rate": 2.1798952483611812e-05,
"loss": 0.3268,
"step": 700
},
{
"epoch": 1.93278463648834,
"grad_norm": 0.8924455046653748,
"learning_rate": 2.167748290060564e-05,
"loss": 0.2887,
"step": 705
},
{
"epoch": 1.9465020576131686,
"grad_norm": 1.220363974571228,
"learning_rate": 2.1555464721848107e-05,
"loss": 0.3174,
"step": 710
},
{
"epoch": 1.9602194787379972,
"grad_norm": 1.1989498138427734,
"learning_rate": 2.1432907971873225e-05,
"loss": 0.3026,
"step": 715
},
{
"epoch": 1.9739368998628257,
"grad_norm": 1.065783977508545,
"learning_rate": 2.1309822719461905e-05,
"loss": 0.3121,
"step": 720
},
{
"epoch": 1.9876543209876543,
"grad_norm": 1.019674301147461,
"learning_rate": 2.118621907681474e-05,
"loss": 0.2984,
"step": 725
},
{
"epoch": 2.0,
"grad_norm": 1.3716942071914673,
"learning_rate": 2.106210719872121e-05,
"loss": 0.2699,
"step": 730
}
],
"logging_steps": 5,
"max_steps": 1825,
"num_input_tokens_seen": 0,
"num_train_epochs": 5,
"save_steps": 2000,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 1.1214531076208722e+18,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}