{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.9765925925925925, "eval_steps": 500, "global_step": 315, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.047407407407407405, "grad_norm": 1114.319091796875, "learning_rate": 7.8125e-06, "log_odds_chosen": -1.007889747619629, "log_odds_ratio": -10.178816795349121, "logps/chosen": -21.696243286132812, "logps/rejected": -20.688587188720703, "loss": 547.5699, "nll_loss": 9.67590618133545, "rewards/accuracies": 0.4781250059604645, "rewards/chosen": -10.848121643066406, "rewards/margins": -0.503828227519989, "rewards/rejected": -10.344293594360352, "step": 5 }, { "epoch": 0.09481481481481481, "grad_norm": 923.0885620117188, "learning_rate": 1.5625e-05, "log_odds_chosen": -2.801370143890381, "log_odds_ratio": -10.164667129516602, "logps/chosen": -20.971328735351562, "logps/rejected": -18.17032241821289, "loss": 553.2887, "nll_loss": 8.679121971130371, "rewards/accuracies": 0.4468750059604645, "rewards/chosen": -10.485664367675781, "rewards/margins": -1.4005038738250732, "rewards/rejected": -9.085161209106445, "step": 10 }, { "epoch": 0.14222222222222222, "grad_norm": 460.3068542480469, "learning_rate": 2.34375e-05, "log_odds_chosen": -1.2739719152450562, "log_odds_ratio": -10.46790599822998, "logps/chosen": -20.88275909423828, "logps/rejected": -19.607295989990234, "loss": 521.1988, "nll_loss": 8.076498031616211, "rewards/accuracies": 0.512499988079071, "rewards/chosen": -10.44137954711914, "rewards/margins": -0.6377328038215637, "rewards/rejected": -9.803647994995117, "step": 15 }, { "epoch": 0.18962962962962962, "grad_norm": 4484.80712890625, "learning_rate": 3.125e-05, "log_odds_chosen": -3.9785079956054688, "log_odds_ratio": -8.86056900024414, "logps/chosen": -16.492053985595703, "logps/rejected": -12.51595401763916, "loss": 449.7023, "nll_loss": 6.58371639251709, "rewards/accuracies": 0.4375, "rewards/chosen": -8.246026992797852, "rewards/margins": -1.9880508184432983, "rewards/rejected": -6.25797700881958, "step": 20 }, { "epoch": 0.23703703703703705, "grad_norm": 703.4686279296875, "learning_rate": 3.90625e-05, "log_odds_chosen": -0.08368232101202011, "log_odds_ratio": -1.9139995574951172, "logps/chosen": -4.356338977813721, "logps/rejected": -4.261002540588379, "loss": 110.5851, "nll_loss": 3.422860622406006, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -2.1781694889068604, "rewards/margins": -0.0476679690182209, "rewards/rejected": -2.1305012702941895, "step": 25 }, { "epoch": 0.28444444444444444, "grad_norm": 436.39642333984375, "learning_rate": 4.6875e-05, "log_odds_chosen": -0.037819117307662964, "log_odds_ratio": -1.0239157676696777, "logps/chosen": -2.210418462753296, "logps/rejected": -2.175981044769287, "loss": 55.4786, "nll_loss": 2.731612205505371, "rewards/accuracies": 0.518750011920929, "rewards/chosen": -1.105209231376648, "rewards/margins": -0.017218593508005142, "rewards/rejected": -1.0879905223846436, "step": 30 }, { "epoch": 0.33185185185185184, "grad_norm": 202.19808959960938, "learning_rate": 4.998613757348784e-05, "log_odds_chosen": 0.19444182515144348, "log_odds_ratio": -0.8455008268356323, "logps/chosen": -1.7893317937850952, "logps/rejected": -1.9684311151504517, "loss": 41.4203, "nll_loss": 2.4831597805023193, "rewards/accuracies": 0.512499988079071, "rewards/chosen": -0.8946658968925476, "rewards/margins": 0.08954959362745285, "rewards/rejected": -0.9842155575752258, "step": 35 }, { "epoch": 0.37925925925925924, "grad_norm": 168.00384521484375, "learning_rate": 4.990147841143462e-05, "log_odds_chosen": 0.29362189769744873, "log_odds_ratio": -0.7308410406112671, "logps/chosen": -1.5209834575653076, "logps/rejected": -1.7699060440063477, "loss": 33.351, "nll_loss": 2.3416779041290283, "rewards/accuracies": 0.565625011920929, "rewards/chosen": -0.7604917287826538, "rewards/margins": 0.12446129322052002, "rewards/rejected": -0.8849530220031738, "step": 40 }, { "epoch": 0.4266666666666667, "grad_norm": 193.2564239501953, "learning_rate": 4.97401218720448e-05, "log_odds_chosen": 0.298764705657959, "log_odds_ratio": -0.7091392278671265, "logps/chosen": -1.45993971824646, "logps/rejected": -1.725881576538086, "loss": 31.5672, "nll_loss": 2.40507173538208, "rewards/accuracies": 0.5843750238418579, "rewards/chosen": -0.72996985912323, "rewards/margins": 0.1329708844423294, "rewards/rejected": -0.862940788269043, "step": 45 }, { "epoch": 0.4740740740740741, "grad_norm": 265.47442626953125, "learning_rate": 4.9502564938797946e-05, "log_odds_chosen": 0.3887616693973541, "log_odds_ratio": -0.6871553063392639, "logps/chosen": -1.3525534868240356, "logps/rejected": -1.6923545598983765, "loss": 27.2478, "nll_loss": 2.172515392303467, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.6762767434120178, "rewards/margins": 0.16990050673484802, "rewards/rejected": -0.8461772799491882, "step": 50 }, { "epoch": 0.5214814814814814, "grad_norm": 976.0902709960938, "learning_rate": 4.918953929490768e-05, "log_odds_chosen": 1.4601755142211914, "log_odds_ratio": -0.7356270551681519, "logps/chosen": -1.7127513885498047, "logps/rejected": -3.102419137954712, "loss": 21.1464, "nll_loss": 2.375514268875122, "rewards/accuracies": 0.6156250238418579, "rewards/chosen": -0.8563756942749023, "rewards/margins": 0.6948337554931641, "rewards/rejected": -1.551209568977356, "step": 55 }, { "epoch": 0.5688888888888889, "grad_norm": 19477.037109375, "learning_rate": 4.88020090697132e-05, "log_odds_chosen": 9.887539863586426, "log_odds_ratio": -2.272916316986084, "logps/chosen": -5.333150863647461, "logps/rejected": -15.112531661987305, "loss": -21.8917, "nll_loss": 5.349360466003418, "rewards/accuracies": 0.703125, "rewards/chosen": -2.6665754318237305, "rewards/margins": 4.889690399169922, "rewards/rejected": -7.556265830993652, "step": 60 }, { "epoch": 0.6162962962962963, "grad_norm": 63504.7578125, "learning_rate": 4.834116786912897e-05, "log_odds_chosen": 6.005472183227539, "log_odds_ratio": -5.739134788513184, "logps/chosen": -10.86668872833252, "logps/rejected": -16.817562103271484, "loss": 161.1276, "nll_loss": 9.975183486938477, "rewards/accuracies": 0.628125011920929, "rewards/chosen": -5.43334436416626, "rewards/margins": 2.975436210632324, "rewards/rejected": -8.408781051635742, "step": 65 }, { "epoch": 0.6637037037037037, "grad_norm": 3978.453857421875, "learning_rate": 4.7808435099299045e-05, "log_odds_chosen": 17.832883834838867, "log_odds_ratio": -5.360915184020996, "logps/chosen": -8.222649574279785, "logps/rejected": -25.95966148376465, "loss": -106.2073, "nll_loss": 7.058934211730957, "rewards/accuracies": 0.7437499761581421, "rewards/chosen": -4.111324787139893, "rewards/margins": 8.868505477905273, "rewards/rejected": -12.979830741882324, "step": 70 }, { "epoch": 0.7111111111111111, "grad_norm": 16509.505859375, "learning_rate": 4.720545159477922e-05, "log_odds_chosen": 20.426956176757812, "log_odds_ratio": -9.292292594909668, "logps/chosen": -15.002415657043457, "logps/rejected": -35.350364685058594, "loss": 6.2434, "nll_loss": 11.920351028442383, "rewards/accuracies": 0.6937500238418579, "rewards/chosen": -7.5012078285217285, "rewards/margins": 10.173975944519043, "rewards/rejected": -17.675182342529297, "step": 75 }, { "epoch": 0.7585185185185185, "grad_norm": 3993.2314453125, "learning_rate": 4.653407456471222e-05, "log_odds_chosen": 21.786739349365234, "log_odds_ratio": -9.150012969970703, "logps/chosen": -12.506328582763672, "logps/rejected": -34.199073791503906, "loss": -92.8843, "nll_loss": 10.512908935546875, "rewards/accuracies": 0.65625, "rewards/chosen": -6.253164291381836, "rewards/margins": 10.8463716506958, "rewards/rejected": -17.099536895751953, "step": 80 }, { "epoch": 0.8059259259259259, "grad_norm": 3684.40771484375, "learning_rate": 4.579637187256222e-05, "log_odds_chosen": 21.78746795654297, "log_odds_ratio": -7.730111122131348, "logps/chosen": -8.821676254272461, "logps/rejected": -30.544620513916016, "loss": -188.5252, "nll_loss": 7.775321006774902, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -4.4108381271362305, "rewards/margins": 10.861473083496094, "rewards/rejected": -15.272310256958008, "step": 85 }, { "epoch": 0.8533333333333334, "grad_norm": 4620.634765625, "learning_rate": 4.499461566702685e-05, "log_odds_chosen": 28.149227142333984, "log_odds_ratio": -15.921751022338867, "logps/chosen": -24.887939453125, "logps/rejected": -52.94123077392578, "loss": 93.227, "nll_loss": 20.595924377441406, "rewards/accuracies": 0.6656249761581421, "rewards/chosen": -12.4439697265625, "rewards/margins": 14.026643753051758, "rewards/rejected": -26.47061538696289, "step": 90 }, { "epoch": 0.9007407407407407, "grad_norm": 2822.56982421875, "learning_rate": 4.413127538374411e-05, "log_odds_chosen": 48.35707473754883, "log_odds_ratio": -11.363774299621582, "logps/chosen": -19.054187774658203, "logps/rejected": -67.31429290771484, "loss": -343.8322, "nll_loss": 13.582868576049805, "rewards/accuracies": 0.6812499761581421, "rewards/chosen": -9.527093887329102, "rewards/margins": 24.130054473876953, "rewards/rejected": -33.65714645385742, "step": 95 }, { "epoch": 0.9481481481481482, "grad_norm": 11888.7197265625, "learning_rate": 4.320901013934887e-05, "log_odds_chosen": 37.94886016845703, "log_odds_ratio": -13.369104385375977, "logps/chosen": -25.14851951599121, "logps/rejected": -62.99921798706055, "loss": -14.4122, "nll_loss": 19.4227294921875, "rewards/accuracies": 0.6656249761581421, "rewards/chosen": -12.574259757995605, "rewards/margins": 18.925350189208984, "rewards/rejected": -31.499608993530273, "step": 100 }, { "epoch": 0.9955555555555555, "grad_norm": 17781.978515625, "learning_rate": 4.223066054130568e-05, "log_odds_chosen": 32.50678253173828, "log_odds_ratio": -13.40135669708252, "logps/chosen": -26.233570098876953, "logps/rejected": -58.676490783691406, "loss": 106.4254, "nll_loss": 19.13003158569336, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -13.116785049438477, "rewards/margins": 16.22146224975586, "rewards/rejected": -29.338245391845703, "step": 105 }, { "epoch": 1.037925925925926, "grad_norm": 8946.7998046875, "learning_rate": 4.1199239938743797e-05, "log_odds_chosen": 11.558371543884277, "log_odds_ratio": -7.9565300941467285, "logps/chosen": -13.197209358215332, "logps/rejected": -24.688472747802734, "loss": 99.8268, "nll_loss": 10.583297729492188, "rewards/accuracies": 0.618881106376648, "rewards/chosen": -6.598604679107666, "rewards/margins": 5.745632171630859, "rewards/rejected": -12.344236373901367, "step": 110 }, { "epoch": 1.0853333333333333, "grad_norm": 28189.90625, "learning_rate": 4.0117925141242174e-05, "log_odds_chosen": 23.58847427368164, "log_odds_ratio": -8.089736938476562, "logps/chosen": -9.830774307250977, "logps/rejected": -33.307220458984375, "loss": -189.9675, "nll_loss": 7.8513503074646, "rewards/accuracies": 0.6812499761581421, "rewards/chosen": -4.915387153625488, "rewards/margins": 11.7382230758667, "rewards/rejected": -16.653610229492188, "step": 115 }, { "epoch": 1.1327407407407408, "grad_norm": 4763.630859375, "learning_rate": 3.899004663415084e-05, "log_odds_chosen": 37.111053466796875, "log_odds_ratio": -8.056583404541016, "logps/chosen": -9.871607780456543, "logps/rejected": -46.8720588684082, "loss": -404.5778, "nll_loss": 8.503942489624023, "rewards/accuracies": 0.6812499761581421, "rewards/chosen": -4.9358038902282715, "rewards/margins": 18.500225067138672, "rewards/rejected": -23.4360294342041, "step": 120 }, { "epoch": 1.1801481481481482, "grad_norm": 5928.169921875, "learning_rate": 3.781907832058587e-05, "log_odds_chosen": 35.93215560913086, "log_odds_ratio": -9.4933443069458, "logps/chosen": -11.483396530151367, "logps/rejected": -47.30681610107422, "loss": -357.2632, "nll_loss": 10.28132438659668, "rewards/accuracies": 0.671875, "rewards/chosen": -5.741698265075684, "rewards/margins": 17.911710739135742, "rewards/rejected": -23.65340805053711, "step": 125 }, { "epoch": 1.2275555555555555, "grad_norm": 11281.740234375, "learning_rate": 3.660862682169282e-05, "log_odds_chosen": 40.6527214050293, "log_odds_ratio": -11.877891540527344, "logps/chosen": -16.65973472595215, "logps/rejected": -57.17784881591797, "loss": -304.9178, "nll_loss": 12.847620964050293, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -8.329867362976074, "rewards/margins": 20.259056091308594, "rewards/rejected": -28.588924407958984, "step": 130 }, { "epoch": 1.274962962962963, "grad_norm": 7086.1142578125, "learning_rate": 3.5362420368134356e-05, "log_odds_chosen": 11.272117614746094, "log_odds_ratio": -6.918790340423584, "logps/chosen": -7.792928218841553, "logps/rejected": -18.96177101135254, "loss": -39.8691, "nll_loss": 6.798020362854004, "rewards/accuracies": 0.6812499761581421, "rewards/chosen": -3.8964641094207764, "rewards/margins": 5.584420204162598, "rewards/rejected": -9.48088550567627, "step": 135 }, { "epoch": 1.3223703703703704, "grad_norm": 137599.171875, "learning_rate": 3.408429731701635e-05, "log_odds_chosen": 21.463497161865234, "log_odds_ratio": -2.791048049926758, "logps/chosen": -5.230650424957275, "logps/rejected": -26.6016902923584, "loss": -218.9189, "nll_loss": 4.977573394775391, "rewards/accuracies": 0.6656249761581421, "rewards/chosen": -2.6153252124786377, "rewards/margins": 10.68552017211914, "rewards/rejected": -13.3008451461792, "step": 140 }, { "epoch": 1.3697777777777778, "grad_norm": 20666.33203125, "learning_rate": 3.2778194329621104e-05, "log_odds_chosen": 40.393245697021484, "log_odds_ratio": -13.250892639160156, "logps/chosen": -19.773197174072266, "logps/rejected": -60.044952392578125, "loss": -223.2017, "nll_loss": 16.38017463684082, "rewards/accuracies": 0.684374988079071, "rewards/chosen": -9.886598587036133, "rewards/margins": 20.135875701904297, "rewards/rejected": -30.022476196289062, "step": 145 }, { "epoch": 1.417185185185185, "grad_norm": 19448.947265625, "learning_rate": 3.144813424636031e-05, "log_odds_chosen": 45.739837646484375, "log_odds_ratio": -10.681756973266602, "logps/chosen": -15.583732604980469, "logps/rejected": -61.16387176513672, "loss": -401.1902, "nll_loss": 12.756240844726562, "rewards/accuracies": 0.703125, "rewards/chosen": -7.791866302490234, "rewards/margins": 22.790069580078125, "rewards/rejected": -30.58193588256836, "step": 150 }, { "epoch": 1.4645925925925927, "grad_norm": 7016.2177734375, "learning_rate": 3.0098213696293542e-05, "log_odds_chosen": 26.974849700927734, "log_odds_ratio": -9.331676483154297, "logps/chosen": -11.568083763122559, "logps/rejected": -38.42928695678711, "loss": -208.4774, "nll_loss": 10.451626777648926, "rewards/accuracies": 0.6781250238418579, "rewards/chosen": -5.784041881561279, "rewards/margins": 13.430601119995117, "rewards/rejected": -19.214643478393555, "step": 155 }, { "epoch": 1.512, "grad_norm": 2596.490234375, "learning_rate": 2.8732590479375165e-05, "log_odds_chosen": 17.883655548095703, "log_odds_ratio": -1.8087437152862549, "logps/chosen": -3.404132843017578, "logps/rejected": -21.178518295288086, "loss": -204.2252, "nll_loss": 3.781926393508911, "rewards/accuracies": 0.643750011920929, "rewards/chosen": -1.702066421508789, "rewards/margins": 8.88719367980957, "rewards/rejected": -10.589259147644043, "step": 160 }, { "epoch": 1.5594074074074074, "grad_norm": 15875.6162109375, "learning_rate": 2.7355470760292956e-05, "log_odds_chosen": 17.571182250976562, "log_odds_ratio": -6.643049716949463, "logps/chosen": -9.500158309936523, "logps/rejected": -26.94256591796875, "loss": -81.0908, "nll_loss": 8.642666816711426, "rewards/accuracies": 0.6781250238418579, "rewards/chosen": -4.750079154968262, "rewards/margins": 8.72120475769043, "rewards/rejected": -13.471282958984375, "step": 165 }, { "epoch": 1.6068148148148147, "grad_norm": 6882.3740234375, "learning_rate": 2.597109611334169e-05, "log_odds_chosen": 37.06153106689453, "log_odds_ratio": -7.4608635902404785, "logps/chosen": -15.66374683380127, "logps/rejected": -52.575416564941406, "loss": -208.487, "nll_loss": 13.647031784057617, "rewards/accuracies": 0.731249988079071, "rewards/chosen": -7.831873416900635, "rewards/margins": 18.45583152770996, "rewards/rejected": -26.287708282470703, "step": 170 }, { "epoch": 1.6542222222222223, "grad_norm": 6623.11572265625, "learning_rate": 2.458373045823404e-05, "log_odds_chosen": 21.147552490234375, "log_odds_ratio": -7.808111667633057, "logps/chosen": -9.466218948364258, "logps/rejected": -30.470600128173828, "loss": -157.8435, "nll_loss": 7.549225807189941, "rewards/accuracies": 0.668749988079071, "rewards/chosen": -4.733109474182129, "rewards/margins": 10.502188682556152, "rewards/rejected": -15.235300064086914, "step": 175 }, { "epoch": 1.7016296296296296, "grad_norm": 5010.78564453125, "learning_rate": 2.3197646927086697e-05, "log_odds_chosen": 24.123126983642578, "log_odds_ratio": -1.9697444438934326, "logps/chosen": -2.780689001083374, "logps/rejected": -26.779048919677734, "loss": -326.2735, "nll_loss": 3.6041767597198486, "rewards/accuracies": 0.7093750238418579, "rewards/chosen": -1.390344500541687, "rewards/margins": 11.99917984008789, "rewards/rejected": -13.389524459838867, "step": 180 }, { "epoch": 1.749037037037037, "grad_norm": 6793.22412109375, "learning_rate": 2.1817114703032176e-05, "log_odds_chosen": 38.25811004638672, "log_odds_ratio": -7.5069379806518555, "logps/chosen": -9.548910140991211, "logps/rejected": -47.6781120300293, "loss": -424.2517, "nll_loss": 7.210787296295166, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -4.7744550704956055, "rewards/margins": 19.06460189819336, "rewards/rejected": -23.83905601501465, "step": 185 }, { "epoch": 1.7964444444444445, "grad_norm": 68079.1953125, "learning_rate": 2.0446385870993467e-05, "log_odds_chosen": 41.46270751953125, "log_odds_ratio": -9.237896919250488, "logps/chosen": -11.56389331817627, "logps/rejected": -52.86220169067383, "loss": -438.3831, "nll_loss": 10.06884765625, "rewards/accuracies": 0.753125011920929, "rewards/chosen": -5.781946659088135, "rewards/margins": 20.649154663085938, "rewards/rejected": -26.431100845336914, "step": 190 }, { "epoch": 1.8438518518518519, "grad_norm": 19936.81640625, "learning_rate": 1.9089682321121834e-05, "log_odds_chosen": 43.99555206298828, "log_odds_ratio": -7.328845977783203, "logps/chosen": -9.099332809448242, "logps/rejected": -52.933860778808594, "loss": -527.35, "nll_loss": 8.21430778503418, "rewards/accuracies": 0.7406250238418579, "rewards/chosen": -4.549666404724121, "rewards/margins": 21.91726303100586, "rewards/rejected": -26.466930389404297, "step": 195 }, { "epoch": 1.8912592592592592, "grad_norm": 291176.21875, "learning_rate": 1.775118274523545e-05, "log_odds_chosen": 32.236446380615234, "log_odds_ratio": -11.109225273132324, "logps/chosen": -14.257405281066895, "logps/rejected": -46.36336898803711, "loss": -234.8545, "nll_loss": 11.66010856628418, "rewards/accuracies": 0.71875, "rewards/chosen": -7.128702640533447, "rewards/margins": 16.052982330322266, "rewards/rejected": -23.181684494018555, "step": 200 }, { "epoch": 1.9386666666666668, "grad_norm": 3509.22021484375, "learning_rate": 1.643500976631037e-05, "log_odds_chosen": 30.14800453186035, "log_odds_ratio": -12.824560165405273, "logps/chosen": -15.561635971069336, "logps/rejected": -45.55414581298828, "loss": -186.7682, "nll_loss": 12.363306045532227, "rewards/accuracies": 0.715624988079071, "rewards/chosen": -7.780817985534668, "rewards/margins": 14.996258735656738, "rewards/rejected": -22.77707290649414, "step": 205 }, { "epoch": 1.986074074074074, "grad_norm": 2383.834228515625, "learning_rate": 1.514521724066537e-05, "log_odds_chosen": 41.85120391845703, "log_odds_ratio": -13.449541091918945, "logps/chosen": -15.545976638793945, "logps/rejected": -57.27630615234375, "loss": -384.8493, "nll_loss": 11.846510887145996, "rewards/accuracies": 0.6656249761581421, "rewards/chosen": -7.772988319396973, "rewards/margins": 20.86516761779785, "rewards/rejected": -28.638153076171875, "step": 210 }, { "epoch": 2.0284444444444443, "grad_norm": 24200.37109375, "learning_rate": 1.3885777771950348e-05, "log_odds_chosen": 33.1331787109375, "log_odds_ratio": -8.699053764343262, "logps/chosen": -9.583888053894043, "logps/rejected": -42.54216384887695, "loss": -321.3827, "nll_loss": 8.21044921875, "rewards/accuracies": 0.7272727489471436, "rewards/chosen": -4.7919440269470215, "rewards/margins": 16.479137420654297, "rewards/rejected": -21.271081924438477, "step": 215 }, { "epoch": 2.075851851851852, "grad_norm": 6451.2578125, "learning_rate": 1.2660570475395683e-05, "log_odds_chosen": 19.57032585144043, "log_odds_ratio": -11.807035446166992, "logps/chosen": -13.269747734069824, "logps/rejected": -32.721336364746094, "loss": -75.1553, "nll_loss": 10.17901611328125, "rewards/accuracies": 0.684374988079071, "rewards/chosen": -6.634873867034912, "rewards/margins": 9.725793838500977, "rewards/rejected": -16.360668182373047, "step": 220 }, { "epoch": 2.1232592592592594, "grad_norm": 3175.251708984375, "learning_rate": 1.1473369030008974e-05, "log_odds_chosen": 44.743412017822266, "log_odds_ratio": -6.601927280426025, "logps/chosen": -7.938143730163574, "logps/rejected": -52.494384765625, "loss": -564.2982, "nll_loss": 7.033768653869629, "rewards/accuracies": 0.721875011920929, "rewards/chosen": -3.969071865081787, "rewards/margins": 22.278118133544922, "rewards/rejected": -26.2471923828125, "step": 225 }, { "epoch": 2.1706666666666665, "grad_norm": 10925.5673828125, "learning_rate": 1.0327830055518842e-05, "log_odds_chosen": 37.528343200683594, "log_odds_ratio": -7.298922061920166, "logps/chosen": -8.199933052062988, "logps/rejected": -45.548763275146484, "loss": -451.7604, "nll_loss": 6.6627702713012695, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -4.099966526031494, "rewards/margins": 18.674419403076172, "rewards/rejected": -22.774381637573242, "step": 230 }, { "epoch": 2.218074074074074, "grad_norm": 5350.28662109375, "learning_rate": 9.227481849865235e-06, "log_odds_chosen": 37.19614791870117, "log_odds_ratio": -10.001937866210938, "logps/chosen": -12.3423490524292, "logps/rejected": -49.37834930419922, "loss": -357.3079, "nll_loss": 9.177508354187012, "rewards/accuracies": 0.684374988079071, "rewards/chosen": -6.1711745262146, "rewards/margins": 18.518001556396484, "rewards/rejected": -24.68917465209961, "step": 235 }, { "epoch": 2.2654814814814817, "grad_norm": 10187.1015625, "learning_rate": 8.175713521924978e-06, "log_odds_chosen": 42.718074798583984, "log_odds_ratio": -11.093441009521484, "logps/chosen": -14.150568008422852, "logps/rejected": -56.68629837036133, "loss": -405.1544, "nll_loss": 11.543497085571289, "rewards/accuracies": 0.7406250238418579, "rewards/chosen": -7.075284004211426, "rewards/margins": 21.267864227294922, "rewards/rejected": -28.343149185180664, "step": 240 }, { "epoch": 2.3128888888888888, "grad_norm": 5219.4873046875, "learning_rate": 7.1757645529443665e-06, "log_odds_chosen": 44.08445358276367, "log_odds_ratio": -11.732454299926758, "logps/chosen": -13.956028938293457, "logps/rejected": -57.879173278808594, "loss": -443.6736, "nll_loss": 10.992512702941895, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -6.9780144691467285, "rewards/margins": 21.96157455444336, "rewards/rejected": -28.939586639404297, "step": 245 }, { "epoch": 2.3602962962962963, "grad_norm": 11797.4462890625, "learning_rate": 6.230714818829733e-06, "log_odds_chosen": 35.4526252746582, "log_odds_ratio": -8.070879936218262, "logps/chosen": -9.394768714904785, "logps/rejected": -44.66648483276367, "loss": -392.5384, "nll_loss": 7.098433017730713, "rewards/accuracies": 0.6875, "rewards/chosen": -4.697384357452393, "rewards/margins": 17.6358585357666, "rewards/rejected": -22.333242416381836, "step": 250 }, { "epoch": 2.407703703703704, "grad_norm": 11236.10546875, "learning_rate": 5.343475104027743e-06, "log_odds_chosen": 37.37392807006836, "log_odds_ratio": -9.121585845947266, "logps/chosen": -12.584083557128906, "logps/rejected": -49.78114318847656, "loss": -338.2161, "nll_loss": 9.931722640991211, "rewards/accuracies": 0.715624988079071, "rewards/chosen": -6.292041778564453, "rewards/margins": 18.598529815673828, "rewards/rejected": -24.89057159423828, "step": 255 }, { "epoch": 2.455111111111111, "grad_norm": 15344.7255859375, "learning_rate": 4.516778136213037e-06, "log_odds_chosen": 34.853729248046875, "log_odds_ratio": -6.747254371643066, "logps/chosen": -9.025346755981445, "logps/rejected": -43.715370178222656, "loss": -374.0381, "nll_loss": 8.037846565246582, "rewards/accuracies": 0.71875, "rewards/chosen": -4.512673377990723, "rewards/margins": 17.345016479492188, "rewards/rejected": -21.857685089111328, "step": 260 }, { "epoch": 2.5025185185185186, "grad_norm": 8925.0234375, "learning_rate": 3.7531701693965554e-06, "log_odds_chosen": 52.23662567138672, "log_odds_ratio": -5.923990726470947, "logps/chosen": -7.497511386871338, "logps/rejected": -59.578468322753906, "loss": -687.7863, "nll_loss": 6.113022327423096, "rewards/accuracies": 0.721875011920929, "rewards/chosen": -3.748755693435669, "rewards/margins": 26.040477752685547, "rewards/rejected": -29.789234161376953, "step": 265 }, { "epoch": 2.549925925925926, "grad_norm": 3612.017333984375, "learning_rate": 3.055003141378948e-06, "log_odds_chosen": 44.58516311645508, "log_odds_ratio": -5.719125747680664, "logps/chosen": -10.139649391174316, "logps/rejected": -54.54728317260742, "loss": -477.472, "nll_loss": 8.543855667114258, "rewards/accuracies": 0.7562500238418579, "rewards/chosen": -5.069824695587158, "rewards/margins": 22.203815460205078, "rewards/rejected": -27.27364158630371, "step": 270 }, { "epoch": 2.5973333333333333, "grad_norm": 9717.0439453125, "learning_rate": 2.424427429704365e-06, "log_odds_chosen": 38.22019577026367, "log_odds_ratio": -7.6910576820373535, "logps/chosen": -10.25065803527832, "logps/rejected": -48.30152130126953, "loss": -403.615, "nll_loss": 8.413914680480957, "rewards/accuracies": 0.731249988079071, "rewards/chosen": -5.12532901763916, "rewards/margins": 19.025432586669922, "rewards/rejected": -24.150760650634766, "step": 275 }, { "epoch": 2.644740740740741, "grad_norm": 7232.400390625, "learning_rate": 1.8633852284264508e-06, "log_odds_chosen": 48.507225036621094, "log_odds_ratio": -8.356045722961426, "logps/chosen": -10.951539993286133, "logps/rejected": -59.291465759277344, "loss": -556.3883, "nll_loss": 8.764814376831055, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -5.475769996643066, "rewards/margins": 24.169963836669922, "rewards/rejected": -29.645732879638672, "step": 280 }, { "epoch": 2.6921481481481484, "grad_norm": 8219.6298828125, "learning_rate": 1.3736045660864034e-06, "log_odds_chosen": 41.27231979370117, "log_odds_ratio": -6.389247417449951, "logps/chosen": -11.282487869262695, "logps/rejected": -52.3975830078125, "loss": -398.777, "nll_loss": 9.017220497131348, "rewards/accuracies": 0.75, "rewards/chosen": -5.641243934631348, "rewards/margins": 20.55754852294922, "rewards/rejected": -26.19879150390625, "step": 285 }, { "epoch": 2.7395555555555555, "grad_norm": 9359.0595703125, "learning_rate": 9.565939833279192e-07, "log_odds_chosen": 24.906938552856445, "log_odds_ratio": -18.328092575073242, "logps/chosen": -23.42938804626465, "logps/rejected": -48.18115997314453, "loss": 60.6635, "nll_loss": 17.129901885986328, "rewards/accuracies": 0.6968749761581421, "rewards/chosen": -11.714694023132324, "rewards/margins": 12.375885963439941, "rewards/rejected": -24.090579986572266, "step": 290 }, { "epoch": 2.786962962962963, "grad_norm": 4202.4697265625, "learning_rate": 6.136378865420872e-07, "log_odds_chosen": 37.57846450805664, "log_odds_ratio": -9.300204277038574, "logps/chosen": -13.682713508605957, "logps/rejected": -51.1092414855957, "loss": -309.3759, "nll_loss": 10.95567512512207, "rewards/accuracies": 0.6937500238418579, "rewards/chosen": -6.8413567543029785, "rewards/margins": 18.713260650634766, "rewards/rejected": -25.55462074279785, "step": 295 }, { "epoch": 2.83437037037037, "grad_norm": 29844.5703125, "learning_rate": 3.45792591853214e-07, "log_odds_chosen": 43.591487884521484, "log_odds_ratio": -10.49733829498291, "logps/chosen": -14.3878755569458, "logps/rejected": -57.82908248901367, "loss": -402.1579, "nll_loss": 10.638388633728027, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -7.1939377784729, "rewards/margins": 21.720603942871094, "rewards/rejected": -28.914541244506836, "step": 300 }, { "epoch": 2.8817777777777778, "grad_norm": 16960.9296875, "learning_rate": 1.538830716302092e-07, "log_odds_chosen": 41.2000732421875, "log_odds_ratio": -11.432271957397461, "logps/chosen": -13.90955924987793, "logps/rejected": -54.91774368286133, "loss": -393.7107, "nll_loss": 10.265416145324707, "rewards/accuracies": 0.715624988079071, "rewards/chosen": -6.954779624938965, "rewards/margins": 20.504093170166016, "rewards/rejected": -27.458871841430664, "step": 305 }, { "epoch": 2.9291851851851853, "grad_norm": 9071.34375, "learning_rate": 3.8500413544415025e-08, "log_odds_chosen": 41.77397918701172, "log_odds_ratio": -10.353241920471191, "logps/chosen": -12.447257995605469, "logps/rejected": -54.037437438964844, "loss": -432.6522, "nll_loss": 10.377059936523438, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -6.223628997802734, "rewards/margins": 20.795089721679688, "rewards/rejected": -27.018718719482422, "step": 310 }, { "epoch": 2.9765925925925925, "grad_norm": 8651.380859375, "learning_rate": 0.0, "log_odds_chosen": 53.18922805786133, "log_odds_ratio": -10.290987014770508, "logps/chosen": -12.473676681518555, "logps/rejected": -65.4810791015625, "loss": -613.3333, "nll_loss": 10.335878372192383, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -6.236838340759277, "rewards/margins": 26.50370216369629, "rewards/rejected": -32.74053955078125, "step": 315 }, { "epoch": 2.9765925925925925, "step": 315, "total_flos": 0.0, "train_loss": -186.8132874746171, "train_runtime": 9331.4928, "train_samples_per_second": 2.17, "train_steps_per_second": 0.034 } ], "logging_steps": 5, "max_steps": 315, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 100000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }