gemma-7b-softplus-basic-5e-5-v4 / trainer_state.json
silviasapora's picture
Model save
d855855 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 2.9765925925925925,
"eval_steps": 500,
"global_step": 315,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.047407407407407405,
"grad_norm": 1114.319091796875,
"learning_rate": 7.8125e-06,
"log_odds_chosen": -1.007889747619629,
"log_odds_ratio": -10.178816795349121,
"logps/chosen": -21.696243286132812,
"logps/rejected": -20.688587188720703,
"loss": 547.5699,
"nll_loss": 9.67590618133545,
"rewards/accuracies": 0.4781250059604645,
"rewards/chosen": -10.848121643066406,
"rewards/margins": -0.503828227519989,
"rewards/rejected": -10.344293594360352,
"step": 5
},
{
"epoch": 0.09481481481481481,
"grad_norm": 923.0885620117188,
"learning_rate": 1.5625e-05,
"log_odds_chosen": -2.801370143890381,
"log_odds_ratio": -10.164667129516602,
"logps/chosen": -20.971328735351562,
"logps/rejected": -18.17032241821289,
"loss": 553.2887,
"nll_loss": 8.679121971130371,
"rewards/accuracies": 0.4468750059604645,
"rewards/chosen": -10.485664367675781,
"rewards/margins": -1.4005038738250732,
"rewards/rejected": -9.085161209106445,
"step": 10
},
{
"epoch": 0.14222222222222222,
"grad_norm": 460.3068542480469,
"learning_rate": 2.34375e-05,
"log_odds_chosen": -1.2739719152450562,
"log_odds_ratio": -10.46790599822998,
"logps/chosen": -20.88275909423828,
"logps/rejected": -19.607295989990234,
"loss": 521.1988,
"nll_loss": 8.076498031616211,
"rewards/accuracies": 0.512499988079071,
"rewards/chosen": -10.44137954711914,
"rewards/margins": -0.6377328038215637,
"rewards/rejected": -9.803647994995117,
"step": 15
},
{
"epoch": 0.18962962962962962,
"grad_norm": 4484.80712890625,
"learning_rate": 3.125e-05,
"log_odds_chosen": -3.9785079956054688,
"log_odds_ratio": -8.86056900024414,
"logps/chosen": -16.492053985595703,
"logps/rejected": -12.51595401763916,
"loss": 449.7023,
"nll_loss": 6.58371639251709,
"rewards/accuracies": 0.4375,
"rewards/chosen": -8.246026992797852,
"rewards/margins": -1.9880508184432983,
"rewards/rejected": -6.25797700881958,
"step": 20
},
{
"epoch": 0.23703703703703705,
"grad_norm": 703.4686279296875,
"learning_rate": 3.90625e-05,
"log_odds_chosen": -0.08368232101202011,
"log_odds_ratio": -1.9139995574951172,
"logps/chosen": -4.356338977813721,
"logps/rejected": -4.261002540588379,
"loss": 110.5851,
"nll_loss": 3.422860622406006,
"rewards/accuracies": 0.550000011920929,
"rewards/chosen": -2.1781694889068604,
"rewards/margins": -0.0476679690182209,
"rewards/rejected": -2.1305012702941895,
"step": 25
},
{
"epoch": 0.28444444444444444,
"grad_norm": 436.39642333984375,
"learning_rate": 4.6875e-05,
"log_odds_chosen": -0.037819117307662964,
"log_odds_ratio": -1.0239157676696777,
"logps/chosen": -2.210418462753296,
"logps/rejected": -2.175981044769287,
"loss": 55.4786,
"nll_loss": 2.731612205505371,
"rewards/accuracies": 0.518750011920929,
"rewards/chosen": -1.105209231376648,
"rewards/margins": -0.017218593508005142,
"rewards/rejected": -1.0879905223846436,
"step": 30
},
{
"epoch": 0.33185185185185184,
"grad_norm": 202.19808959960938,
"learning_rate": 4.998613757348784e-05,
"log_odds_chosen": 0.19444182515144348,
"log_odds_ratio": -0.8455008268356323,
"logps/chosen": -1.7893317937850952,
"logps/rejected": -1.9684311151504517,
"loss": 41.4203,
"nll_loss": 2.4831597805023193,
"rewards/accuracies": 0.512499988079071,
"rewards/chosen": -0.8946658968925476,
"rewards/margins": 0.08954959362745285,
"rewards/rejected": -0.9842155575752258,
"step": 35
},
{
"epoch": 0.37925925925925924,
"grad_norm": 168.00384521484375,
"learning_rate": 4.990147841143462e-05,
"log_odds_chosen": 0.29362189769744873,
"log_odds_ratio": -0.7308410406112671,
"logps/chosen": -1.5209834575653076,
"logps/rejected": -1.7699060440063477,
"loss": 33.351,
"nll_loss": 2.3416779041290283,
"rewards/accuracies": 0.565625011920929,
"rewards/chosen": -0.7604917287826538,
"rewards/margins": 0.12446129322052002,
"rewards/rejected": -0.8849530220031738,
"step": 40
},
{
"epoch": 0.4266666666666667,
"grad_norm": 193.2564239501953,
"learning_rate": 4.97401218720448e-05,
"log_odds_chosen": 0.298764705657959,
"log_odds_ratio": -0.7091392278671265,
"logps/chosen": -1.45993971824646,
"logps/rejected": -1.725881576538086,
"loss": 31.5672,
"nll_loss": 2.40507173538208,
"rewards/accuracies": 0.5843750238418579,
"rewards/chosen": -0.72996985912323,
"rewards/margins": 0.1329708844423294,
"rewards/rejected": -0.862940788269043,
"step": 45
},
{
"epoch": 0.4740740740740741,
"grad_norm": 265.47442626953125,
"learning_rate": 4.9502564938797946e-05,
"log_odds_chosen": 0.3887616693973541,
"log_odds_ratio": -0.6871553063392639,
"logps/chosen": -1.3525534868240356,
"logps/rejected": -1.6923545598983765,
"loss": 27.2478,
"nll_loss": 2.172515392303467,
"rewards/accuracies": 0.612500011920929,
"rewards/chosen": -0.6762767434120178,
"rewards/margins": 0.16990050673484802,
"rewards/rejected": -0.8461772799491882,
"step": 50
},
{
"epoch": 0.5214814814814814,
"grad_norm": 976.0902709960938,
"learning_rate": 4.918953929490768e-05,
"log_odds_chosen": 1.4601755142211914,
"log_odds_ratio": -0.7356270551681519,
"logps/chosen": -1.7127513885498047,
"logps/rejected": -3.102419137954712,
"loss": 21.1464,
"nll_loss": 2.375514268875122,
"rewards/accuracies": 0.6156250238418579,
"rewards/chosen": -0.8563756942749023,
"rewards/margins": 0.6948337554931641,
"rewards/rejected": -1.551209568977356,
"step": 55
},
{
"epoch": 0.5688888888888889,
"grad_norm": 19477.037109375,
"learning_rate": 4.88020090697132e-05,
"log_odds_chosen": 9.887539863586426,
"log_odds_ratio": -2.272916316986084,
"logps/chosen": -5.333150863647461,
"logps/rejected": -15.112531661987305,
"loss": -21.8917,
"nll_loss": 5.349360466003418,
"rewards/accuracies": 0.703125,
"rewards/chosen": -2.6665754318237305,
"rewards/margins": 4.889690399169922,
"rewards/rejected": -7.556265830993652,
"step": 60
},
{
"epoch": 0.6162962962962963,
"grad_norm": 63504.7578125,
"learning_rate": 4.834116786912897e-05,
"log_odds_chosen": 6.005472183227539,
"log_odds_ratio": -5.739134788513184,
"logps/chosen": -10.86668872833252,
"logps/rejected": -16.817562103271484,
"loss": 161.1276,
"nll_loss": 9.975183486938477,
"rewards/accuracies": 0.628125011920929,
"rewards/chosen": -5.43334436416626,
"rewards/margins": 2.975436210632324,
"rewards/rejected": -8.408781051635742,
"step": 65
},
{
"epoch": 0.6637037037037037,
"grad_norm": 3978.453857421875,
"learning_rate": 4.7808435099299045e-05,
"log_odds_chosen": 17.832883834838867,
"log_odds_ratio": -5.360915184020996,
"logps/chosen": -8.222649574279785,
"logps/rejected": -25.95966148376465,
"loss": -106.2073,
"nll_loss": 7.058934211730957,
"rewards/accuracies": 0.7437499761581421,
"rewards/chosen": -4.111324787139893,
"rewards/margins": 8.868505477905273,
"rewards/rejected": -12.979830741882324,
"step": 70
},
{
"epoch": 0.7111111111111111,
"grad_norm": 16509.505859375,
"learning_rate": 4.720545159477922e-05,
"log_odds_chosen": 20.426956176757812,
"log_odds_ratio": -9.292292594909668,
"logps/chosen": -15.002415657043457,
"logps/rejected": -35.350364685058594,
"loss": 6.2434,
"nll_loss": 11.920351028442383,
"rewards/accuracies": 0.6937500238418579,
"rewards/chosen": -7.5012078285217285,
"rewards/margins": 10.173975944519043,
"rewards/rejected": -17.675182342529297,
"step": 75
},
{
"epoch": 0.7585185185185185,
"grad_norm": 3993.2314453125,
"learning_rate": 4.653407456471222e-05,
"log_odds_chosen": 21.786739349365234,
"log_odds_ratio": -9.150012969970703,
"logps/chosen": -12.506328582763672,
"logps/rejected": -34.199073791503906,
"loss": -92.8843,
"nll_loss": 10.512908935546875,
"rewards/accuracies": 0.65625,
"rewards/chosen": -6.253164291381836,
"rewards/margins": 10.8463716506958,
"rewards/rejected": -17.099536895751953,
"step": 80
},
{
"epoch": 0.8059259259259259,
"grad_norm": 3684.40771484375,
"learning_rate": 4.579637187256222e-05,
"log_odds_chosen": 21.78746795654297,
"log_odds_ratio": -7.730111122131348,
"logps/chosen": -8.821676254272461,
"logps/rejected": -30.544620513916016,
"loss": -188.5252,
"nll_loss": 7.775321006774902,
"rewards/accuracies": 0.6625000238418579,
"rewards/chosen": -4.4108381271362305,
"rewards/margins": 10.861473083496094,
"rewards/rejected": -15.272310256958008,
"step": 85
},
{
"epoch": 0.8533333333333334,
"grad_norm": 4620.634765625,
"learning_rate": 4.499461566702685e-05,
"log_odds_chosen": 28.149227142333984,
"log_odds_ratio": -15.921751022338867,
"logps/chosen": -24.887939453125,
"logps/rejected": -52.94123077392578,
"loss": 93.227,
"nll_loss": 20.595924377441406,
"rewards/accuracies": 0.6656249761581421,
"rewards/chosen": -12.4439697265625,
"rewards/margins": 14.026643753051758,
"rewards/rejected": -26.47061538696289,
"step": 90
},
{
"epoch": 0.9007407407407407,
"grad_norm": 2822.56982421875,
"learning_rate": 4.413127538374411e-05,
"log_odds_chosen": 48.35707473754883,
"log_odds_ratio": -11.363774299621582,
"logps/chosen": -19.054187774658203,
"logps/rejected": -67.31429290771484,
"loss": -343.8322,
"nll_loss": 13.582868576049805,
"rewards/accuracies": 0.6812499761581421,
"rewards/chosen": -9.527093887329102,
"rewards/margins": 24.130054473876953,
"rewards/rejected": -33.65714645385742,
"step": 95
},
{
"epoch": 0.9481481481481482,
"grad_norm": 11888.7197265625,
"learning_rate": 4.320901013934887e-05,
"log_odds_chosen": 37.94886016845703,
"log_odds_ratio": -13.369104385375977,
"logps/chosen": -25.14851951599121,
"logps/rejected": -62.99921798706055,
"loss": -14.4122,
"nll_loss": 19.4227294921875,
"rewards/accuracies": 0.6656249761581421,
"rewards/chosen": -12.574259757995605,
"rewards/margins": 18.925350189208984,
"rewards/rejected": -31.499608993530273,
"step": 100
},
{
"epoch": 0.9955555555555555,
"grad_norm": 17781.978515625,
"learning_rate": 4.223066054130568e-05,
"log_odds_chosen": 32.50678253173828,
"log_odds_ratio": -13.40135669708252,
"logps/chosen": -26.233570098876953,
"logps/rejected": -58.676490783691406,
"loss": 106.4254,
"nll_loss": 19.13003158569336,
"rewards/accuracies": 0.6625000238418579,
"rewards/chosen": -13.116785049438477,
"rewards/margins": 16.22146224975586,
"rewards/rejected": -29.338245391845703,
"step": 105
},
{
"epoch": 1.037925925925926,
"grad_norm": 8946.7998046875,
"learning_rate": 4.1199239938743797e-05,
"log_odds_chosen": 11.558371543884277,
"log_odds_ratio": -7.9565300941467285,
"logps/chosen": -13.197209358215332,
"logps/rejected": -24.688472747802734,
"loss": 99.8268,
"nll_loss": 10.583297729492188,
"rewards/accuracies": 0.618881106376648,
"rewards/chosen": -6.598604679107666,
"rewards/margins": 5.745632171630859,
"rewards/rejected": -12.344236373901367,
"step": 110
},
{
"epoch": 1.0853333333333333,
"grad_norm": 28189.90625,
"learning_rate": 4.0117925141242174e-05,
"log_odds_chosen": 23.58847427368164,
"log_odds_ratio": -8.089736938476562,
"logps/chosen": -9.830774307250977,
"logps/rejected": -33.307220458984375,
"loss": -189.9675,
"nll_loss": 7.8513503074646,
"rewards/accuracies": 0.6812499761581421,
"rewards/chosen": -4.915387153625488,
"rewards/margins": 11.7382230758667,
"rewards/rejected": -16.653610229492188,
"step": 115
},
{
"epoch": 1.1327407407407408,
"grad_norm": 4763.630859375,
"learning_rate": 3.899004663415084e-05,
"log_odds_chosen": 37.111053466796875,
"log_odds_ratio": -8.056583404541016,
"logps/chosen": -9.871607780456543,
"logps/rejected": -46.8720588684082,
"loss": -404.5778,
"nll_loss": 8.503942489624023,
"rewards/accuracies": 0.6812499761581421,
"rewards/chosen": -4.9358038902282715,
"rewards/margins": 18.500225067138672,
"rewards/rejected": -23.4360294342041,
"step": 120
},
{
"epoch": 1.1801481481481482,
"grad_norm": 5928.169921875,
"learning_rate": 3.781907832058587e-05,
"log_odds_chosen": 35.93215560913086,
"log_odds_ratio": -9.4933443069458,
"logps/chosen": -11.483396530151367,
"logps/rejected": -47.30681610107422,
"loss": -357.2632,
"nll_loss": 10.28132438659668,
"rewards/accuracies": 0.671875,
"rewards/chosen": -5.741698265075684,
"rewards/margins": 17.911710739135742,
"rewards/rejected": -23.65340805053711,
"step": 125
},
{
"epoch": 1.2275555555555555,
"grad_norm": 11281.740234375,
"learning_rate": 3.660862682169282e-05,
"log_odds_chosen": 40.6527214050293,
"log_odds_ratio": -11.877891540527344,
"logps/chosen": -16.65973472595215,
"logps/rejected": -57.17784881591797,
"loss": -304.9178,
"nll_loss": 12.847620964050293,
"rewards/accuracies": 0.699999988079071,
"rewards/chosen": -8.329867362976074,
"rewards/margins": 20.259056091308594,
"rewards/rejected": -28.588924407958984,
"step": 130
},
{
"epoch": 1.274962962962963,
"grad_norm": 7086.1142578125,
"learning_rate": 3.5362420368134356e-05,
"log_odds_chosen": 11.272117614746094,
"log_odds_ratio": -6.918790340423584,
"logps/chosen": -7.792928218841553,
"logps/rejected": -18.96177101135254,
"loss": -39.8691,
"nll_loss": 6.798020362854004,
"rewards/accuracies": 0.6812499761581421,
"rewards/chosen": -3.8964641094207764,
"rewards/margins": 5.584420204162598,
"rewards/rejected": -9.48088550567627,
"step": 135
},
{
"epoch": 1.3223703703703704,
"grad_norm": 137599.171875,
"learning_rate": 3.408429731701635e-05,
"log_odds_chosen": 21.463497161865234,
"log_odds_ratio": -2.791048049926758,
"logps/chosen": -5.230650424957275,
"logps/rejected": -26.6016902923584,
"loss": -218.9189,
"nll_loss": 4.977573394775391,
"rewards/accuracies": 0.6656249761581421,
"rewards/chosen": -2.6153252124786377,
"rewards/margins": 10.68552017211914,
"rewards/rejected": -13.3008451461792,
"step": 140
},
{
"epoch": 1.3697777777777778,
"grad_norm": 20666.33203125,
"learning_rate": 3.2778194329621104e-05,
"log_odds_chosen": 40.393245697021484,
"log_odds_ratio": -13.250892639160156,
"logps/chosen": -19.773197174072266,
"logps/rejected": -60.044952392578125,
"loss": -223.2017,
"nll_loss": 16.38017463684082,
"rewards/accuracies": 0.684374988079071,
"rewards/chosen": -9.886598587036133,
"rewards/margins": 20.135875701904297,
"rewards/rejected": -30.022476196289062,
"step": 145
},
{
"epoch": 1.417185185185185,
"grad_norm": 19448.947265625,
"learning_rate": 3.144813424636031e-05,
"log_odds_chosen": 45.739837646484375,
"log_odds_ratio": -10.681756973266602,
"logps/chosen": -15.583732604980469,
"logps/rejected": -61.16387176513672,
"loss": -401.1902,
"nll_loss": 12.756240844726562,
"rewards/accuracies": 0.703125,
"rewards/chosen": -7.791866302490234,
"rewards/margins": 22.790069580078125,
"rewards/rejected": -30.58193588256836,
"step": 150
},
{
"epoch": 1.4645925925925927,
"grad_norm": 7016.2177734375,
"learning_rate": 3.0098213696293542e-05,
"log_odds_chosen": 26.974849700927734,
"log_odds_ratio": -9.331676483154297,
"logps/chosen": -11.568083763122559,
"logps/rejected": -38.42928695678711,
"loss": -208.4774,
"nll_loss": 10.451626777648926,
"rewards/accuracies": 0.6781250238418579,
"rewards/chosen": -5.784041881561279,
"rewards/margins": 13.430601119995117,
"rewards/rejected": -19.214643478393555,
"step": 155
},
{
"epoch": 1.512,
"grad_norm": 2596.490234375,
"learning_rate": 2.8732590479375165e-05,
"log_odds_chosen": 17.883655548095703,
"log_odds_ratio": -1.8087437152862549,
"logps/chosen": -3.404132843017578,
"logps/rejected": -21.178518295288086,
"loss": -204.2252,
"nll_loss": 3.781926393508911,
"rewards/accuracies": 0.643750011920929,
"rewards/chosen": -1.702066421508789,
"rewards/margins": 8.88719367980957,
"rewards/rejected": -10.589259147644043,
"step": 160
},
{
"epoch": 1.5594074074074074,
"grad_norm": 15875.6162109375,
"learning_rate": 2.7355470760292956e-05,
"log_odds_chosen": 17.571182250976562,
"log_odds_ratio": -6.643049716949463,
"logps/chosen": -9.500158309936523,
"logps/rejected": -26.94256591796875,
"loss": -81.0908,
"nll_loss": 8.642666816711426,
"rewards/accuracies": 0.6781250238418579,
"rewards/chosen": -4.750079154968262,
"rewards/margins": 8.72120475769043,
"rewards/rejected": -13.471282958984375,
"step": 165
},
{
"epoch": 1.6068148148148147,
"grad_norm": 6882.3740234375,
"learning_rate": 2.597109611334169e-05,
"log_odds_chosen": 37.06153106689453,
"log_odds_ratio": -7.4608635902404785,
"logps/chosen": -15.66374683380127,
"logps/rejected": -52.575416564941406,
"loss": -208.487,
"nll_loss": 13.647031784057617,
"rewards/accuracies": 0.731249988079071,
"rewards/chosen": -7.831873416900635,
"rewards/margins": 18.45583152770996,
"rewards/rejected": -26.287708282470703,
"step": 170
},
{
"epoch": 1.6542222222222223,
"grad_norm": 6623.11572265625,
"learning_rate": 2.458373045823404e-05,
"log_odds_chosen": 21.147552490234375,
"log_odds_ratio": -7.808111667633057,
"logps/chosen": -9.466218948364258,
"logps/rejected": -30.470600128173828,
"loss": -157.8435,
"nll_loss": 7.549225807189941,
"rewards/accuracies": 0.668749988079071,
"rewards/chosen": -4.733109474182129,
"rewards/margins": 10.502188682556152,
"rewards/rejected": -15.235300064086914,
"step": 175
},
{
"epoch": 1.7016296296296296,
"grad_norm": 5010.78564453125,
"learning_rate": 2.3197646927086697e-05,
"log_odds_chosen": 24.123126983642578,
"log_odds_ratio": -1.9697444438934326,
"logps/chosen": -2.780689001083374,
"logps/rejected": -26.779048919677734,
"loss": -326.2735,
"nll_loss": 3.6041767597198486,
"rewards/accuracies": 0.7093750238418579,
"rewards/chosen": -1.390344500541687,
"rewards/margins": 11.99917984008789,
"rewards/rejected": -13.389524459838867,
"step": 180
},
{
"epoch": 1.749037037037037,
"grad_norm": 6793.22412109375,
"learning_rate": 2.1817114703032176e-05,
"log_odds_chosen": 38.25811004638672,
"log_odds_ratio": -7.5069379806518555,
"logps/chosen": -9.548910140991211,
"logps/rejected": -47.6781120300293,
"loss": -424.2517,
"nll_loss": 7.210787296295166,
"rewards/accuracies": 0.7250000238418579,
"rewards/chosen": -4.7744550704956055,
"rewards/margins": 19.06460189819336,
"rewards/rejected": -23.83905601501465,
"step": 185
},
{
"epoch": 1.7964444444444445,
"grad_norm": 68079.1953125,
"learning_rate": 2.0446385870993467e-05,
"log_odds_chosen": 41.46270751953125,
"log_odds_ratio": -9.237896919250488,
"logps/chosen": -11.56389331817627,
"logps/rejected": -52.86220169067383,
"loss": -438.3831,
"nll_loss": 10.06884765625,
"rewards/accuracies": 0.753125011920929,
"rewards/chosen": -5.781946659088135,
"rewards/margins": 20.649154663085938,
"rewards/rejected": -26.431100845336914,
"step": 190
},
{
"epoch": 1.8438518518518519,
"grad_norm": 19936.81640625,
"learning_rate": 1.9089682321121834e-05,
"log_odds_chosen": 43.99555206298828,
"log_odds_ratio": -7.328845977783203,
"logps/chosen": -9.099332809448242,
"logps/rejected": -52.933860778808594,
"loss": -527.35,
"nll_loss": 8.21430778503418,
"rewards/accuracies": 0.7406250238418579,
"rewards/chosen": -4.549666404724121,
"rewards/margins": 21.91726303100586,
"rewards/rejected": -26.466930389404297,
"step": 195
},
{
"epoch": 1.8912592592592592,
"grad_norm": 291176.21875,
"learning_rate": 1.775118274523545e-05,
"log_odds_chosen": 32.236446380615234,
"log_odds_ratio": -11.109225273132324,
"logps/chosen": -14.257405281066895,
"logps/rejected": -46.36336898803711,
"loss": -234.8545,
"nll_loss": 11.66010856628418,
"rewards/accuracies": 0.71875,
"rewards/chosen": -7.128702640533447,
"rewards/margins": 16.052982330322266,
"rewards/rejected": -23.181684494018555,
"step": 200
},
{
"epoch": 1.9386666666666668,
"grad_norm": 3509.22021484375,
"learning_rate": 1.643500976631037e-05,
"log_odds_chosen": 30.14800453186035,
"log_odds_ratio": -12.824560165405273,
"logps/chosen": -15.561635971069336,
"logps/rejected": -45.55414581298828,
"loss": -186.7682,
"nll_loss": 12.363306045532227,
"rewards/accuracies": 0.715624988079071,
"rewards/chosen": -7.780817985534668,
"rewards/margins": 14.996258735656738,
"rewards/rejected": -22.77707290649414,
"step": 205
},
{
"epoch": 1.986074074074074,
"grad_norm": 2383.834228515625,
"learning_rate": 1.514521724066537e-05,
"log_odds_chosen": 41.85120391845703,
"log_odds_ratio": -13.449541091918945,
"logps/chosen": -15.545976638793945,
"logps/rejected": -57.27630615234375,
"loss": -384.8493,
"nll_loss": 11.846510887145996,
"rewards/accuracies": 0.6656249761581421,
"rewards/chosen": -7.772988319396973,
"rewards/margins": 20.86516761779785,
"rewards/rejected": -28.638153076171875,
"step": 210
},
{
"epoch": 2.0284444444444443,
"grad_norm": 24200.37109375,
"learning_rate": 1.3885777771950348e-05,
"log_odds_chosen": 33.1331787109375,
"log_odds_ratio": -8.699053764343262,
"logps/chosen": -9.583888053894043,
"logps/rejected": -42.54216384887695,
"loss": -321.3827,
"nll_loss": 8.21044921875,
"rewards/accuracies": 0.7272727489471436,
"rewards/chosen": -4.7919440269470215,
"rewards/margins": 16.479137420654297,
"rewards/rejected": -21.271081924438477,
"step": 215
},
{
"epoch": 2.075851851851852,
"grad_norm": 6451.2578125,
"learning_rate": 1.2660570475395683e-05,
"log_odds_chosen": 19.57032585144043,
"log_odds_ratio": -11.807035446166992,
"logps/chosen": -13.269747734069824,
"logps/rejected": -32.721336364746094,
"loss": -75.1553,
"nll_loss": 10.17901611328125,
"rewards/accuracies": 0.684374988079071,
"rewards/chosen": -6.634873867034912,
"rewards/margins": 9.725793838500977,
"rewards/rejected": -16.360668182373047,
"step": 220
},
{
"epoch": 2.1232592592592594,
"grad_norm": 3175.251708984375,
"learning_rate": 1.1473369030008974e-05,
"log_odds_chosen": 44.743412017822266,
"log_odds_ratio": -6.601927280426025,
"logps/chosen": -7.938143730163574,
"logps/rejected": -52.494384765625,
"loss": -564.2982,
"nll_loss": 7.033768653869629,
"rewards/accuracies": 0.721875011920929,
"rewards/chosen": -3.969071865081787,
"rewards/margins": 22.278118133544922,
"rewards/rejected": -26.2471923828125,
"step": 225
},
{
"epoch": 2.1706666666666665,
"grad_norm": 10925.5673828125,
"learning_rate": 1.0327830055518842e-05,
"log_odds_chosen": 37.528343200683594,
"log_odds_ratio": -7.298922061920166,
"logps/chosen": -8.199933052062988,
"logps/rejected": -45.548763275146484,
"loss": -451.7604,
"nll_loss": 6.6627702713012695,
"rewards/accuracies": 0.7124999761581421,
"rewards/chosen": -4.099966526031494,
"rewards/margins": 18.674419403076172,
"rewards/rejected": -22.774381637573242,
"step": 230
},
{
"epoch": 2.218074074074074,
"grad_norm": 5350.28662109375,
"learning_rate": 9.227481849865235e-06,
"log_odds_chosen": 37.19614791870117,
"log_odds_ratio": -10.001937866210938,
"logps/chosen": -12.3423490524292,
"logps/rejected": -49.37834930419922,
"loss": -357.3079,
"nll_loss": 9.177508354187012,
"rewards/accuracies": 0.684374988079071,
"rewards/chosen": -6.1711745262146,
"rewards/margins": 18.518001556396484,
"rewards/rejected": -24.68917465209961,
"step": 235
},
{
"epoch": 2.2654814814814817,
"grad_norm": 10187.1015625,
"learning_rate": 8.175713521924978e-06,
"log_odds_chosen": 42.718074798583984,
"log_odds_ratio": -11.093441009521484,
"logps/chosen": -14.150568008422852,
"logps/rejected": -56.68629837036133,
"loss": -405.1544,
"nll_loss": 11.543497085571289,
"rewards/accuracies": 0.7406250238418579,
"rewards/chosen": -7.075284004211426,
"rewards/margins": 21.267864227294922,
"rewards/rejected": -28.343149185180664,
"step": 240
},
{
"epoch": 2.3128888888888888,
"grad_norm": 5219.4873046875,
"learning_rate": 7.1757645529443665e-06,
"log_odds_chosen": 44.08445358276367,
"log_odds_ratio": -11.732454299926758,
"logps/chosen": -13.956028938293457,
"logps/rejected": -57.879173278808594,
"loss": -443.6736,
"nll_loss": 10.992512702941895,
"rewards/accuracies": 0.737500011920929,
"rewards/chosen": -6.9780144691467285,
"rewards/margins": 21.96157455444336,
"rewards/rejected": -28.939586639404297,
"step": 245
},
{
"epoch": 2.3602962962962963,
"grad_norm": 11797.4462890625,
"learning_rate": 6.230714818829733e-06,
"log_odds_chosen": 35.4526252746582,
"log_odds_ratio": -8.070879936218262,
"logps/chosen": -9.394768714904785,
"logps/rejected": -44.66648483276367,
"loss": -392.5384,
"nll_loss": 7.098433017730713,
"rewards/accuracies": 0.6875,
"rewards/chosen": -4.697384357452393,
"rewards/margins": 17.6358585357666,
"rewards/rejected": -22.333242416381836,
"step": 250
},
{
"epoch": 2.407703703703704,
"grad_norm": 11236.10546875,
"learning_rate": 5.343475104027743e-06,
"log_odds_chosen": 37.37392807006836,
"log_odds_ratio": -9.121585845947266,
"logps/chosen": -12.584083557128906,
"logps/rejected": -49.78114318847656,
"loss": -338.2161,
"nll_loss": 9.931722640991211,
"rewards/accuracies": 0.715624988079071,
"rewards/chosen": -6.292041778564453,
"rewards/margins": 18.598529815673828,
"rewards/rejected": -24.89057159423828,
"step": 255
},
{
"epoch": 2.455111111111111,
"grad_norm": 15344.7255859375,
"learning_rate": 4.516778136213037e-06,
"log_odds_chosen": 34.853729248046875,
"log_odds_ratio": -6.747254371643066,
"logps/chosen": -9.025346755981445,
"logps/rejected": -43.715370178222656,
"loss": -374.0381,
"nll_loss": 8.037846565246582,
"rewards/accuracies": 0.71875,
"rewards/chosen": -4.512673377990723,
"rewards/margins": 17.345016479492188,
"rewards/rejected": -21.857685089111328,
"step": 260
},
{
"epoch": 2.5025185185185186,
"grad_norm": 8925.0234375,
"learning_rate": 3.7531701693965554e-06,
"log_odds_chosen": 52.23662567138672,
"log_odds_ratio": -5.923990726470947,
"logps/chosen": -7.497511386871338,
"logps/rejected": -59.578468322753906,
"loss": -687.7863,
"nll_loss": 6.113022327423096,
"rewards/accuracies": 0.721875011920929,
"rewards/chosen": -3.748755693435669,
"rewards/margins": 26.040477752685547,
"rewards/rejected": -29.789234161376953,
"step": 265
},
{
"epoch": 2.549925925925926,
"grad_norm": 3612.017333984375,
"learning_rate": 3.055003141378948e-06,
"log_odds_chosen": 44.58516311645508,
"log_odds_ratio": -5.719125747680664,
"logps/chosen": -10.139649391174316,
"logps/rejected": -54.54728317260742,
"loss": -477.472,
"nll_loss": 8.543855667114258,
"rewards/accuracies": 0.7562500238418579,
"rewards/chosen": -5.069824695587158,
"rewards/margins": 22.203815460205078,
"rewards/rejected": -27.27364158630371,
"step": 270
},
{
"epoch": 2.5973333333333333,
"grad_norm": 9717.0439453125,
"learning_rate": 2.424427429704365e-06,
"log_odds_chosen": 38.22019577026367,
"log_odds_ratio": -7.6910576820373535,
"logps/chosen": -10.25065803527832,
"logps/rejected": -48.30152130126953,
"loss": -403.615,
"nll_loss": 8.413914680480957,
"rewards/accuracies": 0.731249988079071,
"rewards/chosen": -5.12532901763916,
"rewards/margins": 19.025432586669922,
"rewards/rejected": -24.150760650634766,
"step": 275
},
{
"epoch": 2.644740740740741,
"grad_norm": 7232.400390625,
"learning_rate": 1.8633852284264508e-06,
"log_odds_chosen": 48.507225036621094,
"log_odds_ratio": -8.356045722961426,
"logps/chosen": -10.951539993286133,
"logps/rejected": -59.291465759277344,
"loss": -556.3883,
"nll_loss": 8.764814376831055,
"rewards/accuracies": 0.699999988079071,
"rewards/chosen": -5.475769996643066,
"rewards/margins": 24.169963836669922,
"rewards/rejected": -29.645732879638672,
"step": 280
},
{
"epoch": 2.6921481481481484,
"grad_norm": 8219.6298828125,
"learning_rate": 1.3736045660864034e-06,
"log_odds_chosen": 41.27231979370117,
"log_odds_ratio": -6.389247417449951,
"logps/chosen": -11.282487869262695,
"logps/rejected": -52.3975830078125,
"loss": -398.777,
"nll_loss": 9.017220497131348,
"rewards/accuracies": 0.75,
"rewards/chosen": -5.641243934631348,
"rewards/margins": 20.55754852294922,
"rewards/rejected": -26.19879150390625,
"step": 285
},
{
"epoch": 2.7395555555555555,
"grad_norm": 9359.0595703125,
"learning_rate": 9.565939833279192e-07,
"log_odds_chosen": 24.906938552856445,
"log_odds_ratio": -18.328092575073242,
"logps/chosen": -23.42938804626465,
"logps/rejected": -48.18115997314453,
"loss": 60.6635,
"nll_loss": 17.129901885986328,
"rewards/accuracies": 0.6968749761581421,
"rewards/chosen": -11.714694023132324,
"rewards/margins": 12.375885963439941,
"rewards/rejected": -24.090579986572266,
"step": 290
},
{
"epoch": 2.786962962962963,
"grad_norm": 4202.4697265625,
"learning_rate": 6.136378865420872e-07,
"log_odds_chosen": 37.57846450805664,
"log_odds_ratio": -9.300204277038574,
"logps/chosen": -13.682713508605957,
"logps/rejected": -51.1092414855957,
"loss": -309.3759,
"nll_loss": 10.95567512512207,
"rewards/accuracies": 0.6937500238418579,
"rewards/chosen": -6.8413567543029785,
"rewards/margins": 18.713260650634766,
"rewards/rejected": -25.55462074279785,
"step": 295
},
{
"epoch": 2.83437037037037,
"grad_norm": 29844.5703125,
"learning_rate": 3.45792591853214e-07,
"log_odds_chosen": 43.591487884521484,
"log_odds_ratio": -10.49733829498291,
"logps/chosen": -14.3878755569458,
"logps/rejected": -57.82908248901367,
"loss": -402.1579,
"nll_loss": 10.638388633728027,
"rewards/accuracies": 0.7124999761581421,
"rewards/chosen": -7.1939377784729,
"rewards/margins": 21.720603942871094,
"rewards/rejected": -28.914541244506836,
"step": 300
},
{
"epoch": 2.8817777777777778,
"grad_norm": 16960.9296875,
"learning_rate": 1.538830716302092e-07,
"log_odds_chosen": 41.2000732421875,
"log_odds_ratio": -11.432271957397461,
"logps/chosen": -13.90955924987793,
"logps/rejected": -54.91774368286133,
"loss": -393.7107,
"nll_loss": 10.265416145324707,
"rewards/accuracies": 0.715624988079071,
"rewards/chosen": -6.954779624938965,
"rewards/margins": 20.504093170166016,
"rewards/rejected": -27.458871841430664,
"step": 305
},
{
"epoch": 2.9291851851851853,
"grad_norm": 9071.34375,
"learning_rate": 3.8500413544415025e-08,
"log_odds_chosen": 41.77397918701172,
"log_odds_ratio": -10.353241920471191,
"logps/chosen": -12.447257995605469,
"logps/rejected": -54.037437438964844,
"loss": -432.6522,
"nll_loss": 10.377059936523438,
"rewards/accuracies": 0.7749999761581421,
"rewards/chosen": -6.223628997802734,
"rewards/margins": 20.795089721679688,
"rewards/rejected": -27.018718719482422,
"step": 310
},
{
"epoch": 2.9765925925925925,
"grad_norm": 8651.380859375,
"learning_rate": 0.0,
"log_odds_chosen": 53.18922805786133,
"log_odds_ratio": -10.290987014770508,
"logps/chosen": -12.473676681518555,
"logps/rejected": -65.4810791015625,
"loss": -613.3333,
"nll_loss": 10.335878372192383,
"rewards/accuracies": 0.7124999761581421,
"rewards/chosen": -6.236838340759277,
"rewards/margins": 26.50370216369629,
"rewards/rejected": -32.74053955078125,
"step": 315
},
{
"epoch": 2.9765925925925925,
"step": 315,
"total_flos": 0.0,
"train_loss": -186.8132874746171,
"train_runtime": 9331.4928,
"train_samples_per_second": 2.17,
"train_steps_per_second": 0.034
}
],
"logging_steps": 5,
"max_steps": 315,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 100000,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}