llama-8b-instruct-rsimpo-full / trainer_state.json
Sean13's picture
Model save
63810dc verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.9959925193694897,
"eval_steps": 400,
"global_step": 233,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"agreement_weights/mean": 0.9893633127212524,
"agreement_weights/std": 0.0038108511362224817,
"epoch": 0.004274646005877639,
"eta/annotator_0": 0.9899773001670837,
"grad_norm": 9.13903924052661,
"learning_rate": 2.083333333333333e-08,
"loss": 1.6004,
"rewards/accuracies": 0.46875,
"rewards/chosen": -0.68994140625,
"rewards/margins": 0.0044460296630859375,
"rewards/rejected": -0.69384765625,
"step": 1
},
{
"agreement_weights/mean": 0.9890749454498291,
"agreement_weights/std": 0.00438307598233223,
"epoch": 0.02137323002938819,
"eta/annotator_0": 0.9897143840789795,
"grad_norm": 7.102513518402141,
"learning_rate": 1.0416666666666667e-07,
"loss": 1.5937,
"rewards/accuracies": 0.46484375,
"rewards/chosen": -0.6795654296875,
"rewards/margins": 0.01406717300415039,
"rewards/rejected": -0.6934814453125,
"step": 5
},
{
"agreement_weights/mean": 0.989261269569397,
"agreement_weights/std": 0.004110876005142927,
"epoch": 0.04274646005877638,
"eta/annotator_0": 0.9895066022872925,
"grad_norm": 15.885381584580474,
"learning_rate": 2.0833333333333333e-07,
"loss": 1.5727,
"rewards/accuracies": 0.5375000238418579,
"rewards/chosen": -0.672436535358429,
"rewards/margins": 0.042784880846738815,
"rewards/rejected": -0.715039074420929,
"step": 10
},
{
"agreement_weights/mean": 0.9888086318969727,
"agreement_weights/std": 0.004452961962670088,
"epoch": 0.06411969008816458,
"eta/annotator_0": 0.9891055822372437,
"grad_norm": 12.714553836314233,
"learning_rate": 3.1249999999999997e-07,
"loss": 1.5824,
"rewards/accuracies": 0.508593738079071,
"rewards/chosen": -0.6779540777206421,
"rewards/margins": 0.028568649664521217,
"rewards/rejected": -0.7066894769668579,
"step": 15
},
{
"agreement_weights/mean": 0.9885651469230652,
"agreement_weights/std": 0.004792415536940098,
"epoch": 0.08549292011755276,
"eta/annotator_0": 0.9884439706802368,
"grad_norm": 7.472966386913895,
"learning_rate": 4.1666666666666667e-07,
"loss": 1.5768,
"rewards/accuracies": 0.5257812738418579,
"rewards/chosen": -0.667187511920929,
"rewards/margins": 0.03665924072265625,
"rewards/rejected": -0.7039550542831421,
"step": 20
},
{
"agreement_weights/mean": 0.9881379008293152,
"agreement_weights/std": 0.004852100275456905,
"epoch": 0.10686615014694095,
"eta/annotator_0": 0.987867534160614,
"grad_norm": 9.598415267317163,
"learning_rate": 4.999717571181741e-07,
"loss": 1.5823,
"rewards/accuracies": 0.5140625238418579,
"rewards/chosen": -0.681835949420929,
"rewards/margins": 0.030136490240693092,
"rewards/rejected": -0.711962878704071,
"step": 25
},
{
"agreement_weights/mean": 0.9881780743598938,
"agreement_weights/std": 0.004625464789569378,
"epoch": 0.12823938017632916,
"eta/annotator_0": 0.9875534772872925,
"grad_norm": 7.783063478858838,
"learning_rate": 4.98983926127519e-07,
"loss": 1.5677,
"rewards/accuracies": 0.5257812738418579,
"rewards/chosen": -0.6817871332168579,
"rewards/margins": 0.051012419164180756,
"rewards/rejected": -0.7325683832168579,
"step": 30
},
{
"agreement_weights/mean": 0.9877825975418091,
"agreement_weights/std": 0.00548733863979578,
"epoch": 0.14961261020571734,
"eta/annotator_0": 0.9871212244033813,
"grad_norm": 6.975340706147195,
"learning_rate": 4.965903258506806e-07,
"loss": 1.5649,
"rewards/accuracies": 0.5132812261581421,
"rewards/chosen": -0.7159668207168579,
"rewards/margins": 0.058887481689453125,
"rewards/rejected": -0.774951159954071,
"step": 35
},
{
"agreement_weights/mean": 0.9874189496040344,
"agreement_weights/std": 0.006337934639304876,
"epoch": 0.17098584023510552,
"eta/annotator_0": 0.9867643117904663,
"grad_norm": 9.52825785684312,
"learning_rate": 4.928044706128802e-07,
"loss": 1.5521,
"rewards/accuracies": 0.5296875238418579,
"rewards/chosen": -0.718823254108429,
"rewards/margins": 0.08782501518726349,
"rewards/rejected": -0.806445300579071,
"step": 40
},
{
"agreement_weights/mean": 0.9850943684577942,
"agreement_weights/std": 0.011619331315159798,
"epoch": 0.19235907026449373,
"eta/annotator_0": 0.9836000204086304,
"grad_norm": 8.176852848379246,
"learning_rate": 4.876477354446189e-07,
"loss": 1.5612,
"rewards/accuracies": 0.5023437738418579,
"rewards/chosen": -0.735595703125,
"rewards/margins": 0.07326431572437286,
"rewards/rejected": -0.80859375,
"step": 45
},
{
"agreement_weights/mean": 0.9840047955513,
"agreement_weights/std": 0.010932808741927147,
"epoch": 0.2137323002938819,
"eta/annotator_0": 0.9824264645576477,
"grad_norm": 12.12547075711657,
"learning_rate": 4.811492353977365e-07,
"loss": 1.5727,
"rewards/accuracies": 0.512499988079071,
"rewards/chosen": -0.770312488079071,
"rewards/margins": 0.052251435816287994,
"rewards/rejected": -0.822558581829071,
"step": 50
},
{
"agreement_weights/mean": 0.9834893345832825,
"agreement_weights/std": 0.010059957392513752,
"epoch": 0.2351055303232701,
"eta/annotator_0": 0.9817886352539062,
"grad_norm": 8.645788658599258,
"learning_rate": 4.7334566116112327e-07,
"loss": 1.5544,
"rewards/accuracies": 0.514843761920929,
"rewards/chosen": -0.7423095703125,
"rewards/margins": 0.09745025634765625,
"rewards/rejected": -0.8397461175918579,
"step": 55
},
{
"agreement_weights/mean": 0.9825822710990906,
"agreement_weights/std": 0.012372071854770184,
"epoch": 0.2564787603526583,
"eta/annotator_0": 0.9805394411087036,
"grad_norm": 8.8650266511209,
"learning_rate": 4.6428107190419983e-07,
"loss": 1.5354,
"rewards/accuracies": 0.5375000238418579,
"rewards/chosen": -0.77099609375,
"rewards/margins": 0.13408812880516052,
"rewards/rejected": -0.905029296875,
"step": 60
},
{
"agreement_weights/mean": 0.9812002182006836,
"agreement_weights/std": 0.014747394248843193,
"epoch": 0.2778519903820465,
"eta/annotator_0": 0.9782701730728149,
"grad_norm": 9.151952067919474,
"learning_rate": 4.540066465177783e-07,
"loss": 1.5263,
"rewards/accuracies": 0.535937488079071,
"rewards/chosen": -0.785595715045929,
"rewards/margins": 0.14189758896827698,
"rewards/rejected": -0.927734375,
"step": 65
},
{
"agreement_weights/mean": 0.9765976071357727,
"agreement_weights/std": 0.02648126147687435,
"epoch": 0.2992252204114347,
"eta/annotator_0": 0.9731000065803528,
"grad_norm": 8.708292437307195,
"learning_rate": 4.425803946568032e-07,
"loss": 1.5369,
"rewards/accuracies": 0.5570312738418579,
"rewards/chosen": -0.8238281011581421,
"rewards/margins": 0.10733337700366974,
"rewards/rejected": -0.931347668170929,
"step": 70
},
{
"agreement_weights/mean": 0.9765526056289673,
"agreement_weights/std": 0.020670022815465927,
"epoch": 0.32059845044082286,
"eta/annotator_0": 0.9643263816833496,
"grad_norm": 9.587633854940714,
"learning_rate": 4.300668292164329e-07,
"loss": 1.5067,
"rewards/accuracies": 0.559374988079071,
"rewards/chosen": -0.80029296875,
"rewards/margins": 0.2055046111345291,
"rewards/rejected": -1.0055663585662842,
"step": 75
},
{
"agreement_weights/mean": 0.9720351099967957,
"agreement_weights/std": 0.02838682010769844,
"epoch": 0.34197168047021104,
"eta/annotator_0": 0.958343505859375,
"grad_norm": 9.873051661870614,
"learning_rate": 4.165366020906683e-07,
"loss": 1.5141,
"rewards/accuracies": 0.5562499761581421,
"rewards/chosen": -0.85693359375,
"rewards/margins": 0.20672722160816193,
"rewards/rejected": -1.063623070716858,
"step": 80
},
{
"agreement_weights/mean": 0.9665838479995728,
"agreement_weights/std": 0.03906597942113876,
"epoch": 0.36334491049959927,
"eta/annotator_0": 0.9532757997512817,
"grad_norm": 10.098422274405984,
"learning_rate": 4.0206610527004607e-07,
"loss": 1.4912,
"rewards/accuracies": 0.5796874761581421,
"rewards/chosen": -0.904833972454071,
"rewards/margins": 0.2045547515153885,
"rewards/rejected": -1.1090819835662842,
"step": 85
},
{
"agreement_weights/mean": 0.9634488224983215,
"agreement_weights/std": 0.041894152760505676,
"epoch": 0.38471814052898745,
"eta/annotator_0": 0.9423317909240723,
"grad_norm": 9.104208697747195,
"learning_rate": 3.867370395306068e-07,
"loss": 1.4817,
"rewards/accuracies": 0.55859375,
"rewards/chosen": -0.89501953125,
"rewards/margins": 0.223399356007576,
"rewards/rejected": -1.118554711341858,
"step": 90
},
{
"agreement_weights/mean": 0.9626390337944031,
"agreement_weights/std": 0.03493572026491165,
"epoch": 0.40609137055837563,
"eta/annotator_0": 0.9396722912788391,
"grad_norm": 7.126343696816854,
"learning_rate": 3.7063595314933156e-07,
"loss": 1.4836,
"rewards/accuracies": 0.5843750238418579,
"rewards/chosen": -0.925732433795929,
"rewards/margins": 0.2864089906215668,
"rewards/rejected": -1.2123534679412842,
"step": 95
},
{
"agreement_weights/mean": 0.9595847129821777,
"agreement_weights/std": 0.04063795506954193,
"epoch": 0.4274646005877638,
"eta/annotator_0": 0.9406528472900391,
"grad_norm": 10.548775383749875,
"learning_rate": 3.5385375325047163e-07,
"loss": 1.4949,
"rewards/accuracies": 0.5804687738418579,
"rewards/chosen": -0.927539050579071,
"rewards/margins": 0.225819393992424,
"rewards/rejected": -1.153662085533142,
"step": 100
},
{
"agreement_weights/mean": 0.9436739087104797,
"agreement_weights/std": 0.06930957734584808,
"epoch": 0.448837830617152,
"eta/annotator_0": 0.929207444190979,
"grad_norm": 9.347532066530137,
"learning_rate": 3.36485192541719e-07,
"loss": 1.4894,
"rewards/accuracies": 0.561718761920929,
"rewards/chosen": -1.030908226966858,
"rewards/margins": 0.16647644340991974,
"rewards/rejected": -1.1977050304412842,
"step": 105
},
{
"agreement_weights/mean": 0.9477313756942749,
"agreement_weights/std": 0.049039699137210846,
"epoch": 0.4702110606465402,
"eta/annotator_0": 0.9266969561576843,
"grad_norm": 9.688059674033342,
"learning_rate": 3.186283343381213e-07,
"loss": 1.453,
"rewards/accuracies": 0.6109374761581421,
"rewards/chosen": -1.014257788658142,
"rewards/margins": 0.3441413938999176,
"rewards/rejected": -1.3579590320587158,
"step": 110
},
{
"agreement_weights/mean": 0.9420230984687805,
"agreement_weights/std": 0.05487104505300522,
"epoch": 0.4915842906759284,
"eta/annotator_0": 0.9168604612350464,
"grad_norm": 9.736883695942245,
"learning_rate": 3.003839988942255e-07,
"loss": 1.4668,
"rewards/accuracies": 0.59765625,
"rewards/chosen": -1.000146508216858,
"rewards/margins": 0.23876723647117615,
"rewards/rejected": -1.238916039466858,
"step": 115
},
{
"agreement_weights/mean": 0.9381793737411499,
"agreement_weights/std": 0.06204790621995926,
"epoch": 0.5129575207053166,
"eta/annotator_0": 0.9129531979560852,
"grad_norm": 9.73732055546487,
"learning_rate": 2.8185519417047623e-07,
"loss": 1.4459,
"rewards/accuracies": 0.628125011920929,
"rewards/chosen": -1.0601074695587158,
"rewards/margins": 0.3048393130302429,
"rewards/rejected": -1.365087866783142,
"step": 120
},
{
"agreement_weights/mean": 0.9374347925186157,
"agreement_weights/std": 0.06295043975114822,
"epoch": 0.5343307507347048,
"eta/annotator_0": 0.9159477353096008,
"grad_norm": 8.827071222761129,
"learning_rate": 2.631465342477719e-07,
"loss": 1.425,
"rewards/accuracies": 0.629687488079071,
"rewards/chosen": -1.0723145008087158,
"rewards/margins": 0.35406264662742615,
"rewards/rejected": -1.4267089366912842,
"step": 125
},
{
"agreement_weights/mean": 0.9301049113273621,
"agreement_weights/std": 0.07405496388673782,
"epoch": 0.555703980764093,
"eta/annotator_0": 0.9193568229675293,
"grad_norm": 10.235426456133796,
"learning_rate": 2.44363648673827e-07,
"loss": 1.4406,
"rewards/accuracies": 0.633593738079071,
"rewards/chosen": -1.1417968273162842,
"rewards/margins": 0.286337286233902,
"rewards/rejected": -1.427587866783142,
"step": 130
},
{
"agreement_weights/mean": 0.9224993586540222,
"agreement_weights/std": 0.08192013949155807,
"epoch": 0.5770772107934812,
"eta/annotator_0": 0.9185341000556946,
"grad_norm": 13.055371078114435,
"learning_rate": 2.2561258607618294e-07,
"loss": 1.4315,
"rewards/accuracies": 0.628125011920929,
"rewards/chosen": -1.1554687023162842,
"rewards/margins": 0.29844361543655396,
"rewards/rejected": -1.4543945789337158,
"step": 135
},
{
"agreement_weights/mean": 0.9188439249992371,
"agreement_weights/std": 0.08069366961717606,
"epoch": 0.5984504408228694,
"eta/annotator_0": 0.9095417857170105,
"grad_norm": 13.367593154679895,
"learning_rate": 2.069992154090854e-07,
"loss": 1.4244,
"rewards/accuracies": 0.621874988079071,
"rewards/chosen": -1.1845214366912842,
"rewards/margins": 0.3037376403808594,
"rewards/rejected": -1.4882323741912842,
"step": 140
},
{
"agreement_weights/mean": 0.9211521148681641,
"agreement_weights/std": 0.07038389146327972,
"epoch": 0.6198236708522575,
"eta/annotator_0": 0.9060202836990356,
"grad_norm": 9.782018935027837,
"learning_rate": 1.886286282148002e-07,
"loss": 1.4255,
"rewards/accuracies": 0.64453125,
"rewards/chosen": -1.1845703125,
"rewards/margins": 0.3395950198173523,
"rewards/rejected": -1.5246093273162842,
"step": 145
},
{
"agreement_weights/mean": 0.9100608825683594,
"agreement_weights/std": 0.0978657454252243,
"epoch": 0.6411969008816457,
"eta/annotator_0": 0.9040514826774597,
"grad_norm": 8.821271102986696,
"learning_rate": 1.7060454527421686e-07,
"loss": 1.3959,
"rewards/accuracies": 0.6421874761581421,
"rewards/chosen": -1.296484351158142,
"rewards/margins": 0.3692916929721832,
"rewards/rejected": -1.665771484375,
"step": 150
},
{
"agreement_weights/mean": 0.9100178480148315,
"agreement_weights/std": 0.09003014117479324,
"epoch": 0.6625701309110339,
"eta/annotator_0": 0.8944258689880371,
"grad_norm": 12.000776056381852,
"learning_rate": 1.5302873099680374e-07,
"loss": 1.3975,
"rewards/accuracies": 0.6390625238418579,
"rewards/chosen": -1.254052758216858,
"rewards/margins": 0.3678039610385895,
"rewards/rejected": -1.6216309070587158,
"step": 155
},
{
"agreement_weights/mean": 0.9026163220405579,
"agreement_weights/std": 0.10090925544500351,
"epoch": 0.6839433609404221,
"eta/annotator_0": 0.875512957572937,
"grad_norm": 13.811663032880974,
"learning_rate": 1.360004188562841e-07,
"loss": 1.4053,
"rewards/accuracies": 0.6429687738418579,
"rewards/chosen": -1.293554663658142,
"rewards/margins": 0.36749571561813354,
"rewards/rejected": -1.6610839366912842,
"step": 160
},
{
"agreement_weights/mean": 0.9087193608283997,
"agreement_weights/std": 0.08467105031013489,
"epoch": 0.7053165909698104,
"eta/annotator_0": 0.8710571527481079,
"grad_norm": 8.565292902683819,
"learning_rate": 1.1961575111603586e-07,
"loss": 1.3804,
"rewards/accuracies": 0.660937488079071,
"rewards/chosen": -1.29052734375,
"rewards/margins": 0.430908203125,
"rewards/rejected": -1.7209961414337158,
"step": 165
},
{
"agreement_weights/mean": 0.9024698138237,
"agreement_weights/std": 0.09167172759771347,
"epoch": 0.7266898209991985,
"eta/annotator_0": 0.8774527311325073,
"grad_norm": 12.636141779987188,
"learning_rate": 1.0396723600754143e-07,
"loss": 1.4046,
"rewards/accuracies": 0.657031238079071,
"rewards/chosen": -1.3505370616912842,
"rewards/margins": 0.3924667239189148,
"rewards/rejected": -1.7434570789337158,
"step": 170
},
{
"agreement_weights/mean": 0.9095037579536438,
"agreement_weights/std": 0.08033014833927155,
"epoch": 0.7480630510285867,
"eta/annotator_0": 0.883401095867157,
"grad_norm": 16.247214462681676,
"learning_rate": 8.914322542666822e-08,
"loss": 1.3835,
"rewards/accuracies": 0.671875,
"rewards/chosen": -1.332275390625,
"rewards/margins": 0.44039231538772583,
"rewards/rejected": -1.7732422351837158,
"step": 175
},
{
"agreement_weights/mean": 0.9053813815116882,
"agreement_weights/std": 0.08672865480184555,
"epoch": 0.7694362810579749,
"eta/annotator_0": 0.8866379857063293,
"grad_norm": 10.38117956645124,
"learning_rate": 7.522741609672193e-08,
"loss": 1.3894,
"rewards/accuracies": 0.6695312261581421,
"rewards/chosen": -1.363916039466858,
"rewards/margins": 0.42420655488967896,
"rewards/rejected": -1.787695288658142,
"step": 180
},
{
"agreement_weights/mean": 0.9022833108901978,
"agreement_weights/std": 0.0886184424161911,
"epoch": 0.7908095110873631,
"eta/annotator_0": 0.8881160020828247,
"grad_norm": 11.639888229803354,
"learning_rate": 6.229837701471644e-08,
"loss": 1.3881,
"rewards/accuracies": 0.632031261920929,
"rewards/chosen": -1.4340331554412842,
"rewards/margins": 0.4479431211948395,
"rewards/rejected": -1.8821289539337158,
"step": 185
},
{
"agreement_weights/mean": 0.9009215235710144,
"agreement_weights/std": 0.1035546064376831,
"epoch": 0.8121827411167513,
"eta/annotator_0": 0.8845187425613403,
"grad_norm": 10.334279908094931,
"learning_rate": 5.0429105848910996e-08,
"loss": 1.3478,
"rewards/accuracies": 0.671875,
"rewards/chosen": -1.462890625,
"rewards/margins": 0.4393371641635895,
"rewards/rejected": -1.902441382408142,
"step": 190
},
{
"agreement_weights/mean": 0.9054125547409058,
"agreement_weights/std": 0.09317369014024734,
"epoch": 0.8335559711461394,
"eta/annotator_0": 0.8970395922660828,
"grad_norm": 13.529308896096568,
"learning_rate": 3.968661679220467e-08,
"loss": 1.3466,
"rewards/accuracies": 0.6875,
"rewards/chosen": -1.447851538658142,
"rewards/margins": 0.5635604858398438,
"rewards/rejected": -2.010498046875,
"step": 195
},
{
"agreement_weights/mean": 0.909978985786438,
"agreement_weights/std": 0.08680907636880875,
"epoch": 0.8549292011755276,
"eta/annotator_0": 0.9059289693832397,
"grad_norm": 14.8750987464841,
"learning_rate": 3.013156219837776e-08,
"loss": 1.3394,
"rewards/accuracies": 0.686718761920929,
"rewards/chosen": -1.44580078125,
"rewards/margins": 0.5717681646347046,
"rewards/rejected": -2.017578125,
"step": 200
},
{
"agreement_weights/mean": 0.9009490013122559,
"agreement_weights/std": 0.09950422495603561,
"epoch": 0.8763024312049158,
"eta/annotator_0": 0.9060670137405396,
"grad_norm": 11.03690850563146,
"learning_rate": 2.1817890137430932e-08,
"loss": 1.373,
"rewards/accuracies": 0.6585937738418579,
"rewards/chosen": -1.499365210533142,
"rewards/margins": 0.44548338651657104,
"rewards/rejected": -1.9451172351837158,
"step": 205
},
{
"agreement_weights/mean": 0.8993496894836426,
"agreement_weights/std": 0.10450420528650284,
"epoch": 0.897675661234304,
"eta/annotator_0": 0.9054271578788757,
"grad_norm": 10.926435820427173,
"learning_rate": 1.479253980347392e-08,
"loss": 1.3494,
"rewards/accuracies": 0.6703125238418579,
"rewards/chosen": -1.4269530773162842,
"rewards/margins": 0.4932968020439148,
"rewards/rejected": -1.920312523841858,
"step": 210
},
{
"agreement_weights/mean": 0.8937209844589233,
"agreement_weights/std": 0.10731947422027588,
"epoch": 0.9190488912636923,
"eta/annotator_0": 0.9135538339614868,
"grad_norm": 13.805011448701395,
"learning_rate": 9.095176494896661e-09,
"loss": 1.3761,
"rewards/accuracies": 0.643750011920929,
"rewards/chosen": -1.497216820716858,
"rewards/margins": 0.4012207090854645,
"rewards/rejected": -1.899999976158142,
"step": 215
},
{
"agreement_weights/mean": 0.898374080657959,
"agreement_weights/std": 0.09499609470367432,
"epoch": 0.9404221212930804,
"eta/annotator_0": 0.9047737121582031,
"grad_norm": 15.308557834289246,
"learning_rate": 4.757967663132689e-09,
"loss": 1.3681,
"rewards/accuracies": 0.6656249761581421,
"rewards/chosen": -1.476318359375,
"rewards/margins": 0.48259276151657104,
"rewards/rejected": -1.959130883216858,
"step": 220
},
{
"agreement_weights/mean": 0.8990669250488281,
"agreement_weights/std": 0.09686337411403656,
"epoch": 0.9617953513224686,
"eta/annotator_0": 0.8999508619308472,
"grad_norm": 10.411971079104832,
"learning_rate": 1.8054012944479224e-09,
"loss": 1.3562,
"rewards/accuracies": 0.6859375238418579,
"rewards/chosen": -1.455712914466858,
"rewards/margins": 0.5232818722724915,
"rewards/rejected": -1.979394555091858,
"step": 225
},
{
"agreement_weights/mean": 0.9076964259147644,
"agreement_weights/std": 0.08395025134086609,
"epoch": 0.9831685813518568,
"eta/annotator_0": 0.9021452069282532,
"grad_norm": 9.775521792655194,
"learning_rate": 2.541476501764228e-10,
"loss": 1.3527,
"rewards/accuracies": 0.69140625,
"rewards/chosen": -1.46875,
"rewards/margins": 0.5552108883857727,
"rewards/rejected": -2.024218797683716,
"step": 230
},
{
"epoch": 0.9959925193694897,
"step": 233,
"total_flos": 0.0,
"train_loss": 1.4599583829421343,
"train_runtime": 7117.4583,
"train_samples_per_second": 8.413,
"train_steps_per_second": 0.033
}
],
"logging_steps": 5,
"max_steps": 233,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 1000000,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}