AmberYifan's picture
Model save
5492ebd verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.0,
"eval_steps": 500,
"global_step": 1250,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0008,
"grad_norm": 234.60278509074556,
"learning_rate": 0.0,
"logits/chosen": 0.107421875,
"logits/rejected": 0.08984375,
"logps/chosen": -262.0,
"logps/rejected": -342.0,
"loss": 0.6914,
"nll_loss": 1.015625,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 1
},
{
"epoch": 0.008,
"grad_norm": 282.5477340162511,
"learning_rate": 3.6e-08,
"logits/chosen": -0.1501736044883728,
"logits/rejected": 0.009562174789607525,
"logps/chosen": -338.22222900390625,
"logps/rejected": -378.0,
"loss": 0.6885,
"nll_loss": 0.9717881679534912,
"rewards/accuracies": 0.3194444477558136,
"rewards/chosen": 0.015223185531795025,
"rewards/margins": 0.0276963971555233,
"rewards/rejected": -0.01256646029651165,
"step": 10
},
{
"epoch": 0.016,
"grad_norm": 206.12921184995773,
"learning_rate": 7.599999999999999e-08,
"logits/chosen": 0.1673583984375,
"logits/rejected": 0.0367431640625,
"logps/chosen": -207.0500030517578,
"logps/rejected": -415.20001220703125,
"loss": 0.6115,
"nll_loss": 0.9085937738418579,
"rewards/accuracies": 0.675000011920929,
"rewards/chosen": -0.02422180213034153,
"rewards/margins": 0.18081054091453552,
"rewards/rejected": -0.20512695610523224,
"step": 20
},
{
"epoch": 0.024,
"grad_norm": 158.12897373074685,
"learning_rate": 1.16e-07,
"logits/chosen": -0.01387939415872097,
"logits/rejected": 0.06098632887005806,
"logps/chosen": -323.70001220703125,
"logps/rejected": -389.6000061035156,
"loss": 0.4236,
"nll_loss": 0.9488281011581421,
"rewards/accuracies": 0.949999988079071,
"rewards/chosen": -0.1749267578125,
"rewards/margins": 0.7095702886581421,
"rewards/rejected": -0.8843749761581421,
"step": 30
},
{
"epoch": 0.032,
"grad_norm": 97.36760160402432,
"learning_rate": 1.56e-07,
"logits/chosen": -0.011962890625,
"logits/rejected": 0.02890625037252903,
"logps/chosen": -329.5,
"logps/rejected": -412.3999938964844,
"loss": 0.2188,
"nll_loss": 0.99609375,
"rewards/accuracies": 0.9750000238418579,
"rewards/chosen": -0.4403320252895355,
"rewards/margins": 1.7859375476837158,
"rewards/rejected": -2.2265625,
"step": 40
},
{
"epoch": 0.04,
"grad_norm": 67.3942566184986,
"learning_rate": 1.96e-07,
"logits/chosen": 0.07175292819738388,
"logits/rejected": -0.018310546875,
"logps/chosen": -261.70001220703125,
"logps/rejected": -420.20001220703125,
"loss": 0.1252,
"nll_loss": 1.0148437023162842,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.517773449420929,
"rewards/margins": 2.8515625,
"rewards/rejected": -3.3734374046325684,
"step": 50
},
{
"epoch": 0.048,
"grad_norm": 25.358406316482952,
"learning_rate": 2.3599999999999997e-07,
"logits/chosen": 0.17060546576976776,
"logits/rejected": 0.15923461318016052,
"logps/chosen": -264.1000061035156,
"logps/rejected": -445.6000061035156,
"loss": 0.0376,
"nll_loss": 0.9632812738418579,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.8160156011581421,
"rewards/margins": 4.489062309265137,
"rewards/rejected": -5.306250095367432,
"step": 60
},
{
"epoch": 0.056,
"grad_norm": 83.05887552576917,
"learning_rate": 2.7600000000000004e-07,
"logits/chosen": 0.147216796875,
"logits/rejected": 0.18815918266773224,
"logps/chosen": -271.70001220703125,
"logps/rejected": -455.20001220703125,
"loss": 0.0631,
"nll_loss": 0.940625011920929,
"rewards/accuracies": 0.987500011920929,
"rewards/chosen": -1.316015601158142,
"rewards/margins": 6.240624904632568,
"rewards/rejected": -7.556250095367432,
"step": 70
},
{
"epoch": 0.064,
"grad_norm": 25.278258079670092,
"learning_rate": 3.1599999999999997e-07,
"logits/chosen": 0.17365722358226776,
"logits/rejected": 0.2812866270542145,
"logps/chosen": -328.8999938964844,
"logps/rejected": -457.0,
"loss": 0.0121,
"nll_loss": 1.033203125,
"rewards/accuracies": 1.0,
"rewards/chosen": -1.478515625,
"rewards/margins": 7.271874904632568,
"rewards/rejected": -8.743749618530273,
"step": 80
},
{
"epoch": 0.072,
"grad_norm": 8.036414090919388,
"learning_rate": 3.5599999999999996e-07,
"logits/chosen": 0.3252929747104645,
"logits/rejected": 0.3529296815395355,
"logps/chosen": -282.1000061035156,
"logps/rejected": -486.0,
"loss": 0.0349,
"nll_loss": 1.041406273841858,
"rewards/accuracies": 0.9750000238418579,
"rewards/chosen": -2.042187452316284,
"rewards/margins": 8.703125,
"rewards/rejected": -10.743749618530273,
"step": 90
},
{
"epoch": 0.08,
"grad_norm": 0.06277898862179868,
"learning_rate": 3.96e-07,
"logits/chosen": 0.11843261867761612,
"logits/rejected": 0.28974610567092896,
"logps/chosen": -338.29998779296875,
"logps/rejected": -518.5999755859375,
"loss": 0.0113,
"nll_loss": 1.068750023841858,
"rewards/accuracies": 1.0,
"rewards/chosen": -1.958593726158142,
"rewards/margins": 10.34375,
"rewards/rejected": -12.306249618530273,
"step": 100
},
{
"epoch": 0.088,
"grad_norm": 186.15959679077883,
"learning_rate": 4.36e-07,
"logits/chosen": 0.16660156846046448,
"logits/rejected": 0.23691406846046448,
"logps/chosen": -336.79998779296875,
"logps/rejected": -498.3999938964844,
"loss": 0.0368,
"nll_loss": 1.010156273841858,
"rewards/accuracies": 0.987500011920929,
"rewards/chosen": -2.2421875,
"rewards/margins": 10.175000190734863,
"rewards/rejected": -12.431249618530273,
"step": 110
},
{
"epoch": 0.096,
"grad_norm": 1.030662736090751,
"learning_rate": 4.76e-07,
"logits/chosen": 0.3314208984375,
"logits/rejected": 0.39873045682907104,
"logps/chosen": -291.20001220703125,
"logps/rejected": -559.7999877929688,
"loss": 0.0055,
"nll_loss": 0.977734386920929,
"rewards/accuracies": 1.0,
"rewards/chosen": -2.3921875953674316,
"rewards/margins": 13.274999618530273,
"rewards/rejected": -15.681249618530273,
"step": 120
},
{
"epoch": 0.104,
"grad_norm": 10.790384157037435,
"learning_rate": 4.982222222222223e-07,
"logits/chosen": 0.33642578125,
"logits/rejected": 0.3980468809604645,
"logps/chosen": -316.8999938964844,
"logps/rejected": -563.7999877929688,
"loss": 0.0057,
"nll_loss": 1.100000023841858,
"rewards/accuracies": 1.0,
"rewards/chosen": -2.5250000953674316,
"rewards/margins": 14.125,
"rewards/rejected": -16.65625,
"step": 130
},
{
"epoch": 0.112,
"grad_norm": 6.132805095404835,
"learning_rate": 4.937777777777777e-07,
"logits/chosen": 0.42326658964157104,
"logits/rejected": 0.41484373807907104,
"logps/chosen": -286.6000061035156,
"logps/rejected": -567.0,
"loss": 0.0025,
"nll_loss": 1.1179687976837158,
"rewards/accuracies": 1.0,
"rewards/chosen": -3.112499952316284,
"rewards/margins": 14.568750381469727,
"rewards/rejected": -17.6875,
"step": 140
},
{
"epoch": 0.12,
"grad_norm": 0.021958637023240073,
"learning_rate": 4.893333333333333e-07,
"logits/chosen": 0.45771485567092896,
"logits/rejected": 0.517138659954071,
"logps/chosen": -292.1000061035156,
"logps/rejected": -587.5999755859375,
"loss": 0.0195,
"nll_loss": 1.0183594226837158,
"rewards/accuracies": 0.987500011920929,
"rewards/chosen": -3.3921875953674316,
"rewards/margins": 15.631250381469727,
"rewards/rejected": -19.018749237060547,
"step": 150
},
{
"epoch": 0.128,
"grad_norm": 0.13947826744502106,
"learning_rate": 4.848888888888888e-07,
"logits/chosen": 0.2855468690395355,
"logits/rejected": 0.37548828125,
"logps/chosen": -297.95001220703125,
"logps/rejected": -582.4000244140625,
"loss": 0.0012,
"nll_loss": 1.080078125,
"rewards/accuracies": 1.0,
"rewards/chosen": -2.78515625,
"rewards/margins": 15.831250190734863,
"rewards/rejected": -18.618749618530273,
"step": 160
},
{
"epoch": 0.136,
"grad_norm": 0.16318898226871553,
"learning_rate": 4.804444444444444e-07,
"logits/chosen": 0.34228515625,
"logits/rejected": 0.42265623807907104,
"logps/chosen": -280.8999938964844,
"logps/rejected": -588.0,
"loss": 0.0029,
"nll_loss": 1.0382812023162842,
"rewards/accuracies": 1.0,
"rewards/chosen": -3.028125047683716,
"rewards/margins": 16.481250762939453,
"rewards/rejected": -19.512500762939453,
"step": 170
},
{
"epoch": 0.144,
"grad_norm": 16.913628536041927,
"learning_rate": 4.76e-07,
"logits/chosen": 0.29877930879592896,
"logits/rejected": 0.38134765625,
"logps/chosen": -337.3999938964844,
"logps/rejected": -592.4000244140625,
"loss": 0.0117,
"nll_loss": 1.0945312976837158,
"rewards/accuracies": 1.0,
"rewards/chosen": -2.65234375,
"rewards/margins": 17.700000762939453,
"rewards/rejected": -20.362499237060547,
"step": 180
},
{
"epoch": 0.152,
"grad_norm": 9.054585013706896,
"learning_rate": 4.7155555555555556e-07,
"logits/chosen": 0.4610839784145355,
"logits/rejected": 0.5546875,
"logps/chosen": -301.6000061035156,
"logps/rejected": -574.4000244140625,
"loss": 0.0198,
"nll_loss": 1.0695312023162842,
"rewards/accuracies": 0.987500011920929,
"rewards/chosen": -2.5296874046325684,
"rewards/margins": 16.774999618530273,
"rewards/rejected": -19.318750381469727,
"step": 190
},
{
"epoch": 0.16,
"grad_norm": 0.025795673574277502,
"learning_rate": 4.6711111111111104e-07,
"logits/chosen": 0.42558592557907104,
"logits/rejected": 0.5215820074081421,
"logps/chosen": -290.5,
"logps/rejected": -604.4000244140625,
"loss": 0.0011,
"nll_loss": 1.0128905773162842,
"rewards/accuracies": 1.0,
"rewards/chosen": -2.750781297683716,
"rewards/margins": 18.137500762939453,
"rewards/rejected": -20.887500762939453,
"step": 200
},
{
"epoch": 0.168,
"grad_norm": 0.049230968184915055,
"learning_rate": 4.6266666666666663e-07,
"logits/chosen": 0.4349609315395355,
"logits/rejected": 0.5816406011581421,
"logps/chosen": -299.20001220703125,
"logps/rejected": -581.4000244140625,
"loss": 0.0012,
"nll_loss": 1.058984398841858,
"rewards/accuracies": 1.0,
"rewards/chosen": -2.5531249046325684,
"rewards/margins": 17.506250381469727,
"rewards/rejected": -20.075000762939453,
"step": 210
},
{
"epoch": 0.176,
"grad_norm": 0.12051591908423682,
"learning_rate": 4.5822222222222216e-07,
"logits/chosen": 0.33723145723342896,
"logits/rejected": 0.4976562559604645,
"logps/chosen": -331.29998779296875,
"logps/rejected": -598.7999877929688,
"loss": 0.0014,
"nll_loss": 1.0636718273162842,
"rewards/accuracies": 1.0,
"rewards/chosen": -2.875,
"rewards/margins": 17.549999237060547,
"rewards/rejected": -20.412500381469727,
"step": 220
},
{
"epoch": 0.184,
"grad_norm": 0.02082872725280439,
"learning_rate": 4.5377777777777775e-07,
"logits/chosen": 0.44482421875,
"logits/rejected": 0.587109386920929,
"logps/chosen": -266.6000061035156,
"logps/rejected": -608.0,
"loss": 0.0113,
"nll_loss": 0.9273437261581421,
"rewards/accuracies": 0.987500011920929,
"rewards/chosen": -2.444531202316284,
"rewards/margins": 18.037500381469727,
"rewards/rejected": -20.487499237060547,
"step": 230
},
{
"epoch": 0.192,
"grad_norm": 1.2819745634114876,
"learning_rate": 4.493333333333333e-07,
"logits/chosen": 0.3896484375,
"logits/rejected": 0.533886730670929,
"logps/chosen": -330.5,
"logps/rejected": -569.7999877929688,
"loss": 0.0097,
"nll_loss": 0.998828113079071,
"rewards/accuracies": 1.0,
"rewards/chosen": -2.526562452316284,
"rewards/margins": 17.625,
"rewards/rejected": -20.162500381469727,
"step": 240
},
{
"epoch": 0.2,
"grad_norm": 0.6312660403152253,
"learning_rate": 4.4488888888888887e-07,
"logits/chosen": 0.39438170194625854,
"logits/rejected": 0.45518797636032104,
"logps/chosen": -317.6000061035156,
"logps/rejected": -540.5999755859375,
"loss": 0.0351,
"nll_loss": 1.03515625,
"rewards/accuracies": 0.987500011920929,
"rewards/chosen": -2.47265625,
"rewards/margins": 15.524999618530273,
"rewards/rejected": -18.0,
"step": 250
},
{
"epoch": 0.208,
"grad_norm": 0.013098055970262888,
"learning_rate": 4.4044444444444445e-07,
"logits/chosen": 0.3513244688510895,
"logits/rejected": 0.47832030057907104,
"logps/chosen": -316.3999938964844,
"logps/rejected": -599.5999755859375,
"loss": 0.0237,
"nll_loss": 1.089453101158142,
"rewards/accuracies": 0.987500011920929,
"rewards/chosen": -2.32421875,
"rewards/margins": 17.71875,
"rewards/rejected": -20.075000762939453,
"step": 260
},
{
"epoch": 0.216,
"grad_norm": 0.02660915986368852,
"learning_rate": 4.36e-07,
"logits/chosen": 0.4693359434604645,
"logits/rejected": 0.5892578363418579,
"logps/chosen": -300.79998779296875,
"logps/rejected": -594.2000122070312,
"loss": 0.0108,
"nll_loss": 1.070703148841858,
"rewards/accuracies": 1.0,
"rewards/chosen": -3.1343750953674316,
"rewards/margins": 17.587499618530273,
"rewards/rejected": -20.737499237060547,
"step": 270
},
{
"epoch": 0.224,
"grad_norm": 0.026490947430251415,
"learning_rate": 4.3155555555555557e-07,
"logits/chosen": 0.40800780057907104,
"logits/rejected": 0.58984375,
"logps/chosen": -319.79998779296875,
"logps/rejected": -617.4000244140625,
"loss": 0.0016,
"nll_loss": 1.0261719226837158,
"rewards/accuracies": 1.0,
"rewards/chosen": -2.5062499046325684,
"rewards/margins": 19.556249618530273,
"rewards/rejected": -22.087499618530273,
"step": 280
},
{
"epoch": 0.232,
"grad_norm": 0.026710360631259922,
"learning_rate": 4.271111111111111e-07,
"logits/chosen": 0.554211437702179,
"logits/rejected": 0.658398449420929,
"logps/chosen": -281.5,
"logps/rejected": -627.2000122070312,
"loss": 0.0065,
"nll_loss": 0.9703124761581421,
"rewards/accuracies": 1.0,
"rewards/chosen": -2.4585938453674316,
"rewards/margins": 21.037500381469727,
"rewards/rejected": -23.5,
"step": 290
},
{
"epoch": 0.24,
"grad_norm": 0.0629472840725987,
"learning_rate": 4.226666666666667e-07,
"logits/chosen": 0.492919921875,
"logits/rejected": 0.6646484136581421,
"logps/chosen": -280.3999938964844,
"logps/rejected": -636.4000244140625,
"loss": 0.0011,
"nll_loss": 1.0402343273162842,
"rewards/accuracies": 1.0,
"rewards/chosen": -2.484375,
"rewards/margins": 21.662500381469727,
"rewards/rejected": -24.125,
"step": 300
},
{
"epoch": 0.248,
"grad_norm": 0.015481044961707728,
"learning_rate": 4.1822222222222217e-07,
"logits/chosen": 0.518505871295929,
"logits/rejected": 0.6767578125,
"logps/chosen": -301.6000061035156,
"logps/rejected": -672.4000244140625,
"loss": 0.001,
"nll_loss": 1.0242187976837158,
"rewards/accuracies": 1.0,
"rewards/chosen": -2.29296875,
"rewards/margins": 22.825000762939453,
"rewards/rejected": -25.087499618530273,
"step": 310
},
{
"epoch": 0.256,
"grad_norm": 0.011465936087268223,
"learning_rate": 4.1377777777777776e-07,
"logits/chosen": 0.39887696504592896,
"logits/rejected": 0.503710925579071,
"logps/chosen": -398.6000061035156,
"logps/rejected": -589.7999877929688,
"loss": 0.0023,
"nll_loss": 1.108984351158142,
"rewards/accuracies": 1.0,
"rewards/chosen": -2.241406202316284,
"rewards/margins": 18.181249618530273,
"rewards/rejected": -20.412500381469727,
"step": 320
},
{
"epoch": 0.264,
"grad_norm": 0.009443129326879707,
"learning_rate": 4.093333333333333e-07,
"logits/chosen": 0.42631834745407104,
"logits/rejected": 0.555468738079071,
"logps/chosen": -311.70001220703125,
"logps/rejected": -587.2000122070312,
"loss": 0.0022,
"nll_loss": 0.985546886920929,
"rewards/accuracies": 1.0,
"rewards/chosen": -1.837499976158142,
"rewards/margins": 18.837499618530273,
"rewards/rejected": -20.649999618530273,
"step": 330
},
{
"epoch": 0.272,
"grad_norm": 0.0101159188120434,
"learning_rate": 4.048888888888889e-07,
"logits/chosen": 0.3521057069301605,
"logits/rejected": 0.47089844942092896,
"logps/chosen": -259.6000061035156,
"logps/rejected": -614.2000122070312,
"loss": 0.001,
"nll_loss": 0.975390613079071,
"rewards/accuracies": 1.0,
"rewards/chosen": -1.9140625,
"rewards/margins": 20.387500762939453,
"rewards/rejected": -22.318750381469727,
"step": 340
},
{
"epoch": 0.28,
"grad_norm": 0.01605643719718742,
"learning_rate": 4.004444444444444e-07,
"logits/chosen": 0.3182617127895355,
"logits/rejected": 0.4351562559604645,
"logps/chosen": -269.79998779296875,
"logps/rejected": -607.2000122070312,
"loss": 0.0078,
"nll_loss": 0.967968761920929,
"rewards/accuracies": 1.0,
"rewards/chosen": -1.5499999523162842,
"rewards/margins": 20.274999618530273,
"rewards/rejected": -21.799999237060547,
"step": 350
},
{
"epoch": 0.288,
"grad_norm": 0.01795986015889223,
"learning_rate": 3.96e-07,
"logits/chosen": 0.4128173887729645,
"logits/rejected": 0.5787109136581421,
"logps/chosen": -280.20001220703125,
"logps/rejected": -583.5999755859375,
"loss": 0.0054,
"nll_loss": 1.0378906726837158,
"rewards/accuracies": 1.0,
"rewards/chosen": -1.6953125,
"rewards/margins": 19.5,
"rewards/rejected": -21.212499618530273,
"step": 360
},
{
"epoch": 0.296,
"grad_norm": 0.032900120617305545,
"learning_rate": 3.9155555555555553e-07,
"logits/chosen": 0.3366943299770355,
"logits/rejected": 0.56396484375,
"logps/chosen": -314.6000061035156,
"logps/rejected": -623.2000122070312,
"loss": 0.0012,
"nll_loss": 1.031640648841858,
"rewards/accuracies": 1.0,
"rewards/chosen": -1.967187523841858,
"rewards/margins": 22.0,
"rewards/rejected": -23.962499618530273,
"step": 370
},
{
"epoch": 0.304,
"grad_norm": 0.03368133834851561,
"learning_rate": 3.871111111111111e-07,
"logits/chosen": 0.443115234375,
"logits/rejected": 0.6058593988418579,
"logps/chosen": -303.5,
"logps/rejected": -611.2000122070312,
"loss": 0.0033,
"nll_loss": 1.0988280773162842,
"rewards/accuracies": 1.0,
"rewards/chosen": -2.264843702316284,
"rewards/margins": 20.337499618530273,
"rewards/rejected": -22.612499237060547,
"step": 380
},
{
"epoch": 0.312,
"grad_norm": 8.91531946245202,
"learning_rate": 3.8266666666666665e-07,
"logits/chosen": 0.37250977754592896,
"logits/rejected": 0.570019543170929,
"logps/chosen": -355.79998779296875,
"logps/rejected": -587.0,
"loss": 0.0075,
"nll_loss": 1.019140601158142,
"rewards/accuracies": 1.0,
"rewards/chosen": -1.7722656726837158,
"rewards/margins": 18.850000381469727,
"rewards/rejected": -20.612499237060547,
"step": 390
},
{
"epoch": 0.32,
"grad_norm": 0.014188316050120144,
"learning_rate": 3.7822222222222224e-07,
"logits/chosen": 0.28227537870407104,
"logits/rejected": 0.44189453125,
"logps/chosen": -302.5,
"logps/rejected": -622.2000122070312,
"loss": 0.0018,
"nll_loss": 1.0828125476837158,
"rewards/accuracies": 1.0,
"rewards/chosen": -1.759374976158142,
"rewards/margins": 20.856250762939453,
"rewards/rejected": -22.625,
"step": 400
},
{
"epoch": 0.328,
"grad_norm": 0.01629482028046429,
"learning_rate": 3.7377777777777777e-07,
"logits/chosen": 0.4126953184604645,
"logits/rejected": 0.501953125,
"logps/chosen": -356.8999938964844,
"logps/rejected": -628.0,
"loss": 0.001,
"nll_loss": 1.040624976158142,
"rewards/accuracies": 1.0,
"rewards/chosen": -2.0132813453674316,
"rewards/margins": 21.137500762939453,
"rewards/rejected": -23.149999618530273,
"step": 410
},
{
"epoch": 0.336,
"grad_norm": 0.017142773967819217,
"learning_rate": 3.693333333333333e-07,
"logits/chosen": 0.3507751524448395,
"logits/rejected": 0.4869628846645355,
"logps/chosen": -333.95001220703125,
"logps/rejected": -605.2000122070312,
"loss": 0.001,
"nll_loss": 0.9984375238418579,
"rewards/accuracies": 1.0,
"rewards/chosen": -1.606054663658142,
"rewards/margins": 20.237499237060547,
"rewards/rejected": -21.862499237060547,
"step": 420
},
{
"epoch": 0.344,
"grad_norm": 0.18744587283856406,
"learning_rate": 3.6488888888888884e-07,
"logits/chosen": 0.4670043885707855,
"logits/rejected": 0.5835937261581421,
"logps/chosen": -272.5,
"logps/rejected": -609.2000122070312,
"loss": 0.001,
"nll_loss": 0.9847656488418579,
"rewards/accuracies": 1.0,
"rewards/chosen": -1.394140601158142,
"rewards/margins": 20.149999618530273,
"rewards/rejected": -21.5625,
"step": 430
},
{
"epoch": 0.352,
"grad_norm": 0.7115134446778305,
"learning_rate": 3.604444444444444e-07,
"logits/chosen": 0.32639771699905396,
"logits/rejected": 0.49003905057907104,
"logps/chosen": -271.1000061035156,
"logps/rejected": -617.0,
"loss": 0.001,
"nll_loss": 0.901562511920929,
"rewards/accuracies": 1.0,
"rewards/chosen": -1.182031273841858,
"rewards/margins": 21.112499237060547,
"rewards/rejected": -22.3125,
"step": 440
},
{
"epoch": 0.36,
"grad_norm": 0.08581115615312221,
"learning_rate": 3.5599999999999996e-07,
"logits/chosen": 0.4197753965854645,
"logits/rejected": 0.5601562261581421,
"logps/chosen": -299.20001220703125,
"logps/rejected": -558.4000244140625,
"loss": 0.0051,
"nll_loss": 0.979296863079071,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.885546863079071,
"rewards/margins": 18.625,
"rewards/rejected": -19.512500762939453,
"step": 450
},
{
"epoch": 0.368,
"grad_norm": 0.050537690816069084,
"learning_rate": 3.5155555555555554e-07,
"logits/chosen": 0.36616212129592896,
"logits/rejected": 0.5220702886581421,
"logps/chosen": -293.20001220703125,
"logps/rejected": -601.4000244140625,
"loss": 0.0011,
"nll_loss": 1.007421851158142,
"rewards/accuracies": 1.0,
"rewards/chosen": -1.21875,
"rewards/margins": 20.475000381469727,
"rewards/rejected": -21.6875,
"step": 460
},
{
"epoch": 0.376,
"grad_norm": 0.02175945703204624,
"learning_rate": 3.471111111111111e-07,
"logits/chosen": 0.4150390625,
"logits/rejected": 0.5416015386581421,
"logps/chosen": -276.8999938964844,
"logps/rejected": -617.7999877929688,
"loss": 0.0011,
"nll_loss": 1.1179687976837158,
"rewards/accuracies": 1.0,
"rewards/chosen": -1.5373046398162842,
"rewards/margins": 20.962499618530273,
"rewards/rejected": -22.5,
"step": 470
},
{
"epoch": 0.384,
"grad_norm": 0.026063826437843437,
"learning_rate": 3.4266666666666666e-07,
"logits/chosen": 0.45039063692092896,
"logits/rejected": 0.612500011920929,
"logps/chosen": -272.3999938964844,
"logps/rejected": -599.7999877929688,
"loss": 0.0011,
"nll_loss": 0.9125000238418579,
"rewards/accuracies": 1.0,
"rewards/chosen": -1.2761719226837158,
"rewards/margins": 21.975000381469727,
"rewards/rejected": -23.25,
"step": 480
},
{
"epoch": 0.392,
"grad_norm": 0.014336581093924161,
"learning_rate": 3.382222222222222e-07,
"logits/chosen": 0.38768309354782104,
"logits/rejected": 0.51953125,
"logps/chosen": -373.20001220703125,
"logps/rejected": -591.5999755859375,
"loss": 0.0011,
"nll_loss": 1.019921898841858,
"rewards/accuracies": 1.0,
"rewards/chosen": -1.96875,
"rewards/margins": 20.899999618530273,
"rewards/rejected": -22.862499237060547,
"step": 490
},
{
"epoch": 0.4,
"grad_norm": 0.015681965151029324,
"learning_rate": 3.337777777777778e-07,
"logits/chosen": 0.28288573026657104,
"logits/rejected": 0.4932617247104645,
"logps/chosen": -301.70001220703125,
"logps/rejected": -696.0,
"loss": 0.0011,
"nll_loss": 1.058203101158142,
"rewards/accuracies": 1.0,
"rewards/chosen": -1.958593726158142,
"rewards/margins": 25.9375,
"rewards/rejected": -27.912500381469727,
"step": 500
},
{
"epoch": 0.408,
"grad_norm": 0.029148974279792687,
"learning_rate": 3.293333333333333e-07,
"logits/chosen": 0.31782227754592896,
"logits/rejected": 0.4458984434604645,
"logps/chosen": -302.6000061035156,
"logps/rejected": -636.0,
"loss": 0.0136,
"nll_loss": 0.9886718988418579,
"rewards/accuracies": 0.987500011920929,
"rewards/chosen": -1.5339844226837158,
"rewards/margins": 22.774999618530273,
"rewards/rejected": -24.287500381469727,
"step": 510
},
{
"epoch": 0.416,
"grad_norm": 0.019209526445581045,
"learning_rate": 3.248888888888889e-07,
"logits/chosen": 0.3396972715854645,
"logits/rejected": 0.4786132872104645,
"logps/chosen": -296.29998779296875,
"logps/rejected": -648.0,
"loss": 0.0011,
"nll_loss": 1.05078125,
"rewards/accuracies": 1.0,
"rewards/chosen": -1.097070336341858,
"rewards/margins": 23.450000762939453,
"rewards/rejected": -24.575000762939453,
"step": 520
},
{
"epoch": 0.424,
"grad_norm": 0.016375124643676898,
"learning_rate": 3.204444444444444e-07,
"logits/chosen": 0.2938476502895355,
"logits/rejected": 0.45917969942092896,
"logps/chosen": -328.5,
"logps/rejected": -683.2000122070312,
"loss": 0.0011,
"nll_loss": 1.078515648841858,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.935742199420929,
"rewards/margins": 24.362499237060547,
"rewards/rejected": -25.274999618530273,
"step": 530
},
{
"epoch": 0.432,
"grad_norm": 0.03023805422018604,
"learning_rate": 3.1599999999999997e-07,
"logits/chosen": 0.3899902403354645,
"logits/rejected": 0.4580078125,
"logps/chosen": -258.04998779296875,
"logps/rejected": -600.0,
"loss": 0.0009,
"nll_loss": 0.878125011920929,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.675488293170929,
"rewards/margins": 21.325000762939453,
"rewards/rejected": -22.0,
"step": 540
},
{
"epoch": 0.44,
"grad_norm": 0.01610635441474965,
"learning_rate": 3.115555555555555e-07,
"logits/chosen": 0.3402954041957855,
"logits/rejected": 0.46113282442092896,
"logps/chosen": -274.29998779296875,
"logps/rejected": -666.4000244140625,
"loss": 0.001,
"nll_loss": 0.967968761920929,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.11225585639476776,
"rewards/margins": 23.512500762939453,
"rewards/rejected": -23.625,
"step": 550
},
{
"epoch": 0.448,
"grad_norm": 0.09940501062248701,
"learning_rate": 3.071111111111111e-07,
"logits/chosen": 0.13032226264476776,
"logits/rejected": 0.25639647245407104,
"logps/chosen": -349.79998779296875,
"logps/rejected": -608.5999755859375,
"loss": 0.0056,
"nll_loss": 1.128515601158142,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.763476550579071,
"rewards/margins": 20.899999618530273,
"rewards/rejected": -21.637500762939453,
"step": 560
},
{
"epoch": 0.456,
"grad_norm": 0.015044416279848708,
"learning_rate": 3.026666666666666e-07,
"logits/chosen": 0.2685302793979645,
"logits/rejected": 0.44746094942092896,
"logps/chosen": -277.29998779296875,
"logps/rejected": -619.2000122070312,
"loss": 0.0032,
"nll_loss": 0.977343738079071,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.5107055902481079,
"rewards/margins": 22.412500381469727,
"rewards/rejected": -22.912500381469727,
"step": 570
},
{
"epoch": 0.464,
"grad_norm": 0.010492314365964279,
"learning_rate": 2.982222222222222e-07,
"logits/chosen": 0.24605712294578552,
"logits/rejected": 0.38178712129592896,
"logps/chosen": -282.45001220703125,
"logps/rejected": -596.4000244140625,
"loss": 0.001,
"nll_loss": 0.9585937261581421,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.3153625428676605,
"rewards/margins": 21.8125,
"rewards/rejected": -22.125,
"step": 580
},
{
"epoch": 0.472,
"grad_norm": 0.011244630233026583,
"learning_rate": 2.937777777777778e-07,
"logits/chosen": 0.185791015625,
"logits/rejected": 0.36054688692092896,
"logps/chosen": -298.29998779296875,
"logps/rejected": -561.4000244140625,
"loss": 0.001,
"nll_loss": 0.9632812738418579,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.29365235567092896,
"rewards/margins": 20.21875,
"rewards/rejected": -20.512500762939453,
"step": 590
},
{
"epoch": 0.48,
"grad_norm": 0.014891293094420017,
"learning_rate": 2.8933333333333333e-07,
"logits/chosen": 0.3611816465854645,
"logits/rejected": 0.46757811307907104,
"logps/chosen": -321.70001220703125,
"logps/rejected": -618.0,
"loss": 0.0012,
"nll_loss": 1.0792968273162842,
"rewards/accuracies": 1.0,
"rewards/chosen": -1.0291016101837158,
"rewards/margins": 22.037500381469727,
"rewards/rejected": -23.049999237060547,
"step": 600
},
{
"epoch": 0.488,
"grad_norm": 0.016715039332789686,
"learning_rate": 2.848888888888889e-07,
"logits/chosen": 0.3804687559604645,
"logits/rejected": 0.546875,
"logps/chosen": -266.79998779296875,
"logps/rejected": -629.5999755859375,
"loss": 0.0009,
"nll_loss": 0.9234374761581421,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.6703125238418579,
"rewards/margins": 22.493749618530273,
"rewards/rejected": -23.149999618530273,
"step": 610
},
{
"epoch": 0.496,
"grad_norm": 0.007446183758849815,
"learning_rate": 2.8044444444444445e-07,
"logits/chosen": 0.38258057832717896,
"logits/rejected": 0.46435546875,
"logps/chosen": -264.04998779296875,
"logps/rejected": -692.4000244140625,
"loss": 0.001,
"nll_loss": 0.966015636920929,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.674023449420929,
"rewards/margins": 25.799999237060547,
"rewards/rejected": -26.475000381469727,
"step": 620
},
{
"epoch": 0.504,
"grad_norm": 0.06960779594217158,
"learning_rate": 2.7600000000000004e-07,
"logits/chosen": 0.22910156846046448,
"logits/rejected": 0.3974609375,
"logps/chosen": -257.70001220703125,
"logps/rejected": -636.4000244140625,
"loss": 0.0136,
"nll_loss": 0.969531238079071,
"rewards/accuracies": 0.987500011920929,
"rewards/chosen": -1.108984351158142,
"rewards/margins": 23.912500381469727,
"rewards/rejected": -25.012500762939453,
"step": 630
},
{
"epoch": 0.512,
"grad_norm": 0.018519025389240995,
"learning_rate": 2.715555555555555e-07,
"logits/chosen": 0.5155273675918579,
"logits/rejected": 0.7171875238418579,
"logps/chosen": -312.0,
"logps/rejected": -652.0,
"loss": 0.0041,
"nll_loss": 0.944531261920929,
"rewards/accuracies": 1.0,
"rewards/chosen": -1.06640625,
"rewards/margins": 25.774999618530273,
"rewards/rejected": -26.850000381469727,
"step": 640
},
{
"epoch": 0.52,
"grad_norm": 0.02373063016084682,
"learning_rate": 2.671111111111111e-07,
"logits/chosen": 0.4817748963832855,
"logits/rejected": 0.6402343511581421,
"logps/chosen": -285.20001220703125,
"logps/rejected": -635.0,
"loss": 0.0023,
"nll_loss": 1.037500023841858,
"rewards/accuracies": 1.0,
"rewards/chosen": -2.0257811546325684,
"rewards/margins": 25.0,
"rewards/rejected": -27.0,
"step": 650
},
{
"epoch": 0.528,
"grad_norm": 0.04312745328739261,
"learning_rate": 2.6266666666666664e-07,
"logits/chosen": 0.45927733182907104,
"logits/rejected": 0.6669921875,
"logps/chosen": -315.5,
"logps/rejected": -683.5999755859375,
"loss": 0.0011,
"nll_loss": 1.0671875476837158,
"rewards/accuracies": 1.0,
"rewards/chosen": -1.6691405773162842,
"rewards/margins": 27.862499237060547,
"rewards/rejected": -29.524999618530273,
"step": 660
},
{
"epoch": 0.536,
"grad_norm": 8.406219083200051,
"learning_rate": 2.582222222222222e-07,
"logits/chosen": 0.51904296875,
"logits/rejected": 0.666015625,
"logps/chosen": -280.0,
"logps/rejected": -706.7999877929688,
"loss": 0.0013,
"nll_loss": 1.0515625476837158,
"rewards/accuracies": 1.0,
"rewards/chosen": -1.578515648841858,
"rewards/margins": 28.737499237060547,
"rewards/rejected": -30.299999237060547,
"step": 670
},
{
"epoch": 0.544,
"grad_norm": 0.00797165057348372,
"learning_rate": 2.5377777777777776e-07,
"logits/chosen": 0.425048828125,
"logits/rejected": 0.6025390625,
"logps/chosen": -304.20001220703125,
"logps/rejected": -616.2000122070312,
"loss": 0.0073,
"nll_loss": 0.931640625,
"rewards/accuracies": 1.0,
"rewards/chosen": -1.0339844226837158,
"rewards/margins": 23.674999237060547,
"rewards/rejected": -24.6875,
"step": 680
},
{
"epoch": 0.552,
"grad_norm": 0.02117016630770859,
"learning_rate": 2.493333333333333e-07,
"logits/chosen": 0.43408203125,
"logits/rejected": 0.5884765386581421,
"logps/chosen": -271.20001220703125,
"logps/rejected": -651.2000122070312,
"loss": 0.001,
"nll_loss": 1.0207030773162842,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.912792980670929,
"rewards/margins": 25.412500381469727,
"rewards/rejected": -26.325000762939453,
"step": 690
},
{
"epoch": 0.56,
"grad_norm": 0.05013906609321948,
"learning_rate": 2.448888888888889e-07,
"logits/chosen": 0.5001465082168579,
"logits/rejected": 0.587890625,
"logps/chosen": -291.0,
"logps/rejected": -642.5999755859375,
"loss": 0.0011,
"nll_loss": 1.056249976158142,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.5542968511581421,
"rewards/margins": 24.600000381469727,
"rewards/rejected": -25.149999618530273,
"step": 700
},
{
"epoch": 0.568,
"grad_norm": 0.010508494344882753,
"learning_rate": 2.404444444444444e-07,
"logits/chosen": 0.4154296815395355,
"logits/rejected": 0.53466796875,
"logps/chosen": -281.8999938964844,
"logps/rejected": -616.5999755859375,
"loss": 0.001,
"nll_loss": 1.004296898841858,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.6047607660293579,
"rewards/margins": 23.875,
"rewards/rejected": -24.487499237060547,
"step": 710
},
{
"epoch": 0.576,
"grad_norm": 0.02085599761375334,
"learning_rate": 2.3599999999999997e-07,
"logits/chosen": 0.40234375,
"logits/rejected": 0.593945324420929,
"logps/chosen": -295.20001220703125,
"logps/rejected": -631.2000122070312,
"loss": 0.0011,
"nll_loss": 1.082421898841858,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.5250488519668579,
"rewards/margins": 23.5625,
"rewards/rejected": -24.112499237060547,
"step": 720
},
{
"epoch": 0.584,
"grad_norm": 0.022872642095837056,
"learning_rate": 2.3155555555555553e-07,
"logits/chosen": 0.3960937559604645,
"logits/rejected": 0.5274413824081421,
"logps/chosen": -269.79998779296875,
"logps/rejected": -597.2000122070312,
"loss": 0.0009,
"nll_loss": 0.9195312261581421,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.25908201932907104,
"rewards/margins": 22.393749237060547,
"rewards/rejected": -22.643749237060547,
"step": 730
},
{
"epoch": 0.592,
"grad_norm": 0.012548738999103248,
"learning_rate": 2.2711111111111112e-07,
"logits/chosen": 0.3612304627895355,
"logits/rejected": 0.47871094942092896,
"logps/chosen": -264.20001220703125,
"logps/rejected": -630.0,
"loss": 0.0009,
"nll_loss": 0.899609386920929,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.07817383110523224,
"rewards/margins": 23.512500762939453,
"rewards/rejected": -23.649999618530273,
"step": 740
},
{
"epoch": 0.6,
"grad_norm": 0.009873598939126866,
"learning_rate": 2.2266666666666668e-07,
"logits/chosen": 0.3773437440395355,
"logits/rejected": 0.5074218511581421,
"logps/chosen": -303.3999938964844,
"logps/rejected": -563.2000122070312,
"loss": 0.0009,
"nll_loss": 0.873828113079071,
"rewards/accuracies": 1.0,
"rewards/chosen": 0.1182861328125,
"rewards/margins": 20.112499237060547,
"rewards/rejected": -19.987499237060547,
"step": 750
},
{
"epoch": 0.608,
"grad_norm": 2.1722284009630792,
"learning_rate": 2.1822222222222224e-07,
"logits/chosen": 0.45292967557907104,
"logits/rejected": 0.45878905057907104,
"logps/chosen": -267.79998779296875,
"logps/rejected": -575.7999877929688,
"loss": 0.0013,
"nll_loss": 0.91796875,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.02753906324505806,
"rewards/margins": 20.549999237060547,
"rewards/rejected": -20.587499618530273,
"step": 760
},
{
"epoch": 0.616,
"grad_norm": 0.015566357128601925,
"learning_rate": 2.1377777777777777e-07,
"logits/chosen": 0.3982177674770355,
"logits/rejected": 0.540234386920929,
"logps/chosen": -265.5,
"logps/rejected": -687.5999755859375,
"loss": 0.0096,
"nll_loss": 0.9742187261581421,
"rewards/accuracies": 0.987500011920929,
"rewards/chosen": -0.7779296636581421,
"rewards/margins": 28.487499237060547,
"rewards/rejected": -29.274999618530273,
"step": 770
},
{
"epoch": 0.624,
"grad_norm": 0.0852360643714337,
"learning_rate": 2.0933333333333333e-07,
"logits/chosen": 0.3617187440395355,
"logits/rejected": 0.48701173067092896,
"logps/chosen": -265.8500061035156,
"logps/rejected": -620.0,
"loss": 0.0009,
"nll_loss": 0.9300781488418579,
"rewards/accuracies": 1.0,
"rewards/chosen": 0.07557983696460724,
"rewards/margins": 23.362499237060547,
"rewards/rejected": -23.274999618530273,
"step": 780
},
{
"epoch": 0.632,
"grad_norm": 0.00755420050240308,
"learning_rate": 2.048888888888889e-07,
"logits/chosen": 0.28996580839157104,
"logits/rejected": 0.535351574420929,
"logps/chosen": -256.5,
"logps/rejected": -633.2000122070312,
"loss": 0.007,
"nll_loss": 0.967578113079071,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.40595704317092896,
"rewards/margins": 24.274999618530273,
"rewards/rejected": -24.700000762939453,
"step": 790
},
{
"epoch": 0.64,
"grad_norm": 0.01713698594594714,
"learning_rate": 2.0044444444444445e-07,
"logits/chosen": 0.24697265028953552,
"logits/rejected": 0.4248046875,
"logps/chosen": -283.8999938964844,
"logps/rejected": -630.0,
"loss": 0.001,
"nll_loss": 0.9644531011581421,
"rewards/accuracies": 1.0,
"rewards/chosen": 0.10643310844898224,
"rewards/margins": 23.575000762939453,
"rewards/rejected": -23.487499237060547,
"step": 800
},
{
"epoch": 0.648,
"grad_norm": 0.008040661363419143,
"learning_rate": 1.96e-07,
"logits/chosen": 0.31492918729782104,
"logits/rejected": 0.41838377714157104,
"logps/chosen": -306.79998779296875,
"logps/rejected": -615.2000122070312,
"loss": 0.0014,
"nll_loss": 0.9761718511581421,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.2574706971645355,
"rewards/margins": 22.5,
"rewards/rejected": -22.762500762939453,
"step": 810
},
{
"epoch": 0.656,
"grad_norm": 0.011050522622669116,
"learning_rate": 1.9155555555555554e-07,
"logits/chosen": 0.36284178495407104,
"logits/rejected": 0.5755859613418579,
"logps/chosen": -284.6000061035156,
"logps/rejected": -627.2000122070312,
"loss": 0.001,
"nll_loss": 1.021875023841858,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.15849609673023224,
"rewards/margins": 24.737499237060547,
"rewards/rejected": -24.912500381469727,
"step": 820
},
{
"epoch": 0.664,
"grad_norm": 1.3243299715147776,
"learning_rate": 1.871111111111111e-07,
"logits/chosen": 0.40766602754592896,
"logits/rejected": 0.55078125,
"logps/chosen": -256.0,
"logps/rejected": -638.7999877929688,
"loss": 0.001,
"nll_loss": 0.9371093511581421,
"rewards/accuracies": 1.0,
"rewards/chosen": 0.19802245497703552,
"rewards/margins": 25.137500762939453,
"rewards/rejected": -24.962499618530273,
"step": 830
},
{
"epoch": 0.672,
"grad_norm": 0.14410920057877055,
"learning_rate": 1.8266666666666666e-07,
"logits/chosen": 0.37744140625,
"logits/rejected": 0.558398425579071,
"logps/chosen": -298.3999938964844,
"logps/rejected": -634.0,
"loss": 0.0016,
"nll_loss": 0.9925781488418579,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.5572754144668579,
"rewards/margins": 24.287500381469727,
"rewards/rejected": -24.862499237060547,
"step": 840
},
{
"epoch": 0.68,
"grad_norm": 0.008316643842253967,
"learning_rate": 1.7822222222222222e-07,
"logits/chosen": 0.3272949159145355,
"logits/rejected": 0.524121105670929,
"logps/chosen": -298.6000061035156,
"logps/rejected": -672.7999877929688,
"loss": 0.0127,
"nll_loss": 0.9996093511581421,
"rewards/accuracies": 0.987500011920929,
"rewards/chosen": -0.3311523497104645,
"rewards/margins": 25.774999618530273,
"rewards/rejected": -26.100000381469727,
"step": 850
},
{
"epoch": 0.688,
"grad_norm": 0.008261030762950254,
"learning_rate": 1.7377777777777778e-07,
"logits/chosen": 0.4465576112270355,
"logits/rejected": 0.6617187261581421,
"logps/chosen": -281.1000061035156,
"logps/rejected": -612.0,
"loss": 0.0011,
"nll_loss": 0.98046875,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.07645263522863388,
"rewards/margins": 22.987499237060547,
"rewards/rejected": -23.0625,
"step": 860
},
{
"epoch": 0.696,
"grad_norm": 0.012271312104445061,
"learning_rate": 1.6933333333333334e-07,
"logits/chosen": 0.4715820252895355,
"logits/rejected": 0.6121581792831421,
"logps/chosen": -285.6000061035156,
"logps/rejected": -622.4000244140625,
"loss": 0.0011,
"nll_loss": 0.889453113079071,
"rewards/accuracies": 1.0,
"rewards/chosen": 0.10859374701976776,
"rewards/margins": 24.6875,
"rewards/rejected": -24.587499618530273,
"step": 870
},
{
"epoch": 0.704,
"grad_norm": 0.008446600990786186,
"learning_rate": 1.6488888888888887e-07,
"logits/chosen": 0.4478515684604645,
"logits/rejected": 0.648632824420929,
"logps/chosen": -293.20001220703125,
"logps/rejected": -619.2000122070312,
"loss": 0.0008,
"nll_loss": 0.8179687261581421,
"rewards/accuracies": 1.0,
"rewards/chosen": 0.14560547471046448,
"rewards/margins": 24.024999618530273,
"rewards/rejected": -23.875,
"step": 880
},
{
"epoch": 0.712,
"grad_norm": 0.007390463100531587,
"learning_rate": 1.6044444444444443e-07,
"logits/chosen": 0.47856444120407104,
"logits/rejected": 0.5884765386581421,
"logps/chosen": -263.3999938964844,
"logps/rejected": -658.0,
"loss": 0.0009,
"nll_loss": 0.9476562738418579,
"rewards/accuracies": 1.0,
"rewards/chosen": 0.11513672024011612,
"rewards/margins": 26.512500762939453,
"rewards/rejected": -26.375,
"step": 890
},
{
"epoch": 0.72,
"grad_norm": 0.00835958049715363,
"learning_rate": 1.56e-07,
"logits/chosen": 0.24870605766773224,
"logits/rejected": 0.455322265625,
"logps/chosen": -257.79998779296875,
"logps/rejected": -668.0,
"loss": 0.0009,
"nll_loss": 0.9390624761581421,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.04535522311925888,
"rewards/margins": 26.987499237060547,
"rewards/rejected": -27.037500381469727,
"step": 900
},
{
"epoch": 0.728,
"grad_norm": 0.009631942998860495,
"learning_rate": 1.5155555555555555e-07,
"logits/chosen": 0.4524902403354645,
"logits/rejected": 0.631640613079071,
"logps/chosen": -226.6999969482422,
"logps/rejected": -674.4000244140625,
"loss": 0.0009,
"nll_loss": 0.9175781011581421,
"rewards/accuracies": 1.0,
"rewards/chosen": 0.06406249850988388,
"rewards/margins": 27.8125,
"rewards/rejected": -27.75,
"step": 910
},
{
"epoch": 0.736,
"grad_norm": 0.0089109719938143,
"learning_rate": 1.4711111111111111e-07,
"logits/chosen": 0.3174072206020355,
"logits/rejected": 0.40791016817092896,
"logps/chosen": -313.5,
"logps/rejected": -609.5999755859375,
"loss": 0.001,
"nll_loss": 0.943359375,
"rewards/accuracies": 1.0,
"rewards/chosen": 0.3261352479457855,
"rewards/margins": 23.612499237060547,
"rewards/rejected": -23.287500381469727,
"step": 920
},
{
"epoch": 0.744,
"grad_norm": 0.017708759105074332,
"learning_rate": 1.4266666666666665e-07,
"logits/chosen": 0.30195313692092896,
"logits/rejected": 0.4756835997104645,
"logps/chosen": -248.89999389648438,
"logps/rejected": -651.5999755859375,
"loss": 0.0009,
"nll_loss": 0.919921875,
"rewards/accuracies": 1.0,
"rewards/chosen": 0.13002929091453552,
"rewards/margins": 26.625,
"rewards/rejected": -26.487499237060547,
"step": 930
},
{
"epoch": 0.752,
"grad_norm": 0.02847716886316666,
"learning_rate": 1.382222222222222e-07,
"logits/chosen": 0.3302246034145355,
"logits/rejected": 0.5365234613418579,
"logps/chosen": -278.1000061035156,
"logps/rejected": -616.4000244140625,
"loss": 0.021,
"nll_loss": 1.017187476158142,
"rewards/accuracies": 0.987500011920929,
"rewards/chosen": 0.10097656399011612,
"rewards/margins": 23.318750381469727,
"rewards/rejected": -23.237499237060547,
"step": 940
},
{
"epoch": 0.76,
"grad_norm": 0.0045187999902578015,
"learning_rate": 1.3377777777777777e-07,
"logits/chosen": 0.31437987089157104,
"logits/rejected": 0.5342773199081421,
"logps/chosen": -319.70001220703125,
"logps/rejected": -623.0,
"loss": 0.001,
"nll_loss": 0.9664062261581421,
"rewards/accuracies": 1.0,
"rewards/chosen": 0.3951171934604645,
"rewards/margins": 24.412500381469727,
"rewards/rejected": -24.0,
"step": 950
},
{
"epoch": 0.768,
"grad_norm": 0.07958922138660834,
"learning_rate": 1.2933333333333333e-07,
"logits/chosen": 0.3418945372104645,
"logits/rejected": 0.6171875,
"logps/chosen": -278.20001220703125,
"logps/rejected": -639.2000122070312,
"loss": 0.0009,
"nll_loss": 0.899609386920929,
"rewards/accuracies": 1.0,
"rewards/chosen": 0.687060534954071,
"rewards/margins": 24.649999618530273,
"rewards/rejected": -24.0,
"step": 960
},
{
"epoch": 0.776,
"grad_norm": 0.008878025257232514,
"learning_rate": 1.2488888888888889e-07,
"logits/chosen": 0.36860352754592896,
"logits/rejected": 0.5000976324081421,
"logps/chosen": -252.1999969482422,
"logps/rejected": -642.4000244140625,
"loss": 0.0008,
"nll_loss": 0.837890625,
"rewards/accuracies": 1.0,
"rewards/chosen": 0.6498047113418579,
"rewards/margins": 24.712499618530273,
"rewards/rejected": -24.075000762939453,
"step": 970
},
{
"epoch": 0.784,
"grad_norm": 0.00886428654417825,
"learning_rate": 1.2044444444444445e-07,
"logits/chosen": 0.3182617127895355,
"logits/rejected": 0.526562511920929,
"logps/chosen": -296.20001220703125,
"logps/rejected": -644.4000244140625,
"loss": 0.0009,
"nll_loss": 0.914843738079071,
"rewards/accuracies": 1.0,
"rewards/chosen": 0.883837878704071,
"rewards/margins": 24.725000381469727,
"rewards/rejected": -23.837499618530273,
"step": 980
},
{
"epoch": 0.792,
"grad_norm": 0.010048774551857776,
"learning_rate": 1.16e-07,
"logits/chosen": 0.20156249403953552,
"logits/rejected": 0.45976561307907104,
"logps/chosen": -333.5,
"logps/rejected": -593.0,
"loss": 0.0011,
"nll_loss": 1.0207030773162842,
"rewards/accuracies": 1.0,
"rewards/chosen": 0.47138673067092896,
"rewards/margins": 22.174999237060547,
"rewards/rejected": -21.6875,
"step": 990
},
{
"epoch": 0.8,
"grad_norm": 5.74291381581626,
"learning_rate": 1.1155555555555555e-07,
"logits/chosen": 0.3219238221645355,
"logits/rejected": 0.49858397245407104,
"logps/chosen": -278.75,
"logps/rejected": -644.2000122070312,
"loss": 0.0019,
"nll_loss": 1.019140601158142,
"rewards/accuracies": 1.0,
"rewards/chosen": 0.7679687738418579,
"rewards/margins": 25.0,
"rewards/rejected": -24.200000762939453,
"step": 1000
},
{
"epoch": 0.808,
"grad_norm": 0.02192293336950942,
"learning_rate": 1.0711111111111111e-07,
"logits/chosen": 0.569140613079071,
"logits/rejected": 0.702343761920929,
"logps/chosen": -254.3000030517578,
"logps/rejected": -685.5999755859375,
"loss": 0.0009,
"nll_loss": 0.868359386920929,
"rewards/accuracies": 1.0,
"rewards/chosen": 0.591113269329071,
"rewards/margins": 27.024999618530273,
"rewards/rejected": -26.424999237060547,
"step": 1010
},
{
"epoch": 0.816,
"grad_norm": 0.009300952984474926,
"learning_rate": 1.0266666666666666e-07,
"logits/chosen": 0.533111572265625,
"logits/rejected": 0.6361328363418579,
"logps/chosen": -233.25,
"logps/rejected": -630.4000244140625,
"loss": 0.0009,
"nll_loss": 0.8453124761581421,
"rewards/accuracies": 1.0,
"rewards/chosen": 0.5562499761581421,
"rewards/margins": 24.325000762939453,
"rewards/rejected": -23.75,
"step": 1020
},
{
"epoch": 0.824,
"grad_norm": 0.046196100888199323,
"learning_rate": 9.822222222222222e-08,
"logits/chosen": 0.42668455839157104,
"logits/rejected": 0.6005859375,
"logps/chosen": -274.5,
"logps/rejected": -626.4000244140625,
"loss": 0.001,
"nll_loss": 0.940625011920929,
"rewards/accuracies": 1.0,
"rewards/chosen": 0.694628894329071,
"rewards/margins": 24.924999237060547,
"rewards/rejected": -24.25,
"step": 1030
},
{
"epoch": 0.832,
"grad_norm": 0.005292906779736119,
"learning_rate": 9.377777777777778e-08,
"logits/chosen": 0.4056640565395355,
"logits/rejected": 0.615234375,
"logps/chosen": -284.70001220703125,
"logps/rejected": -645.5999755859375,
"loss": 0.0041,
"nll_loss": 0.9710937738418579,
"rewards/accuracies": 1.0,
"rewards/chosen": 0.47343748807907104,
"rewards/margins": 25.4375,
"rewards/rejected": -24.962499618530273,
"step": 1040
},
{
"epoch": 0.84,
"grad_norm": 0.07957225270899694,
"learning_rate": 8.933333333333333e-08,
"logits/chosen": 0.4527343809604645,
"logits/rejected": 0.659960925579071,
"logps/chosen": -297.8999938964844,
"logps/rejected": -642.2000122070312,
"loss": 0.0011,
"nll_loss": 1.062890648841858,
"rewards/accuracies": 1.0,
"rewards/chosen": 0.14885254204273224,
"rewards/margins": 25.674999237060547,
"rewards/rejected": -25.549999237060547,
"step": 1050
},
{
"epoch": 0.848,
"grad_norm": 0.02087149426239816,
"learning_rate": 8.488888888888889e-08,
"logits/chosen": 0.45966798067092896,
"logits/rejected": 0.640820324420929,
"logps/chosen": -267.29998779296875,
"logps/rejected": -657.5999755859375,
"loss": 0.0048,
"nll_loss": 0.8785156011581421,
"rewards/accuracies": 1.0,
"rewards/chosen": 0.26695555448532104,
"rewards/margins": 27.325000762939453,
"rewards/rejected": -27.0625,
"step": 1060
},
{
"epoch": 0.856,
"grad_norm": 0.013941417548488667,
"learning_rate": 8.044444444444445e-08,
"logits/chosen": 0.39580076932907104,
"logits/rejected": 0.616015613079071,
"logps/chosen": -279.3999938964844,
"logps/rejected": -689.5999755859375,
"loss": 0.001,
"nll_loss": 0.974609375,
"rewards/accuracies": 1.0,
"rewards/chosen": 0.3509277403354645,
"rewards/margins": 29.162500381469727,
"rewards/rejected": -28.825000762939453,
"step": 1070
},
{
"epoch": 0.864,
"grad_norm": 0.05356592295983735,
"learning_rate": 7.599999999999999e-08,
"logits/chosen": 0.35834962129592896,
"logits/rejected": 0.5601562261581421,
"logps/chosen": -264.6000061035156,
"logps/rejected": -654.4000244140625,
"loss": 0.001,
"nll_loss": 0.9996093511581421,
"rewards/accuracies": 1.0,
"rewards/chosen": 0.139892578125,
"rewards/margins": 26.475000381469727,
"rewards/rejected": -26.337499618530273,
"step": 1080
},
{
"epoch": 0.872,
"grad_norm": 0.04051423211552107,
"learning_rate": 7.155555555555555e-08,
"logits/chosen": 0.4306640625,
"logits/rejected": 0.589648425579071,
"logps/chosen": -277.3999938964844,
"logps/rejected": -653.5999755859375,
"loss": 0.0045,
"nll_loss": 0.9339843988418579,
"rewards/accuracies": 1.0,
"rewards/chosen": 0.3447265625,
"rewards/margins": 26.325000762939453,
"rewards/rejected": -25.975000381469727,
"step": 1090
},
{
"epoch": 0.88,
"grad_norm": 0.02104507845831199,
"learning_rate": 6.71111111111111e-08,
"logits/chosen": 0.269287109375,
"logits/rejected": 0.5531250238418579,
"logps/chosen": -333.29998779296875,
"logps/rejected": -603.0,
"loss": 0.001,
"nll_loss": 0.9921875,
"rewards/accuracies": 1.0,
"rewards/chosen": 0.49541014432907104,
"rewards/margins": 23.712499618530273,
"rewards/rejected": -23.212499618530273,
"step": 1100
},
{
"epoch": 0.888,
"grad_norm": 0.026162991645433887,
"learning_rate": 6.266666666666666e-08,
"logits/chosen": 0.5933593511581421,
"logits/rejected": 0.702343761920929,
"logps/chosen": -242.4499969482422,
"logps/rejected": -657.5999755859375,
"loss": 0.0009,
"nll_loss": 0.883984386920929,
"rewards/accuracies": 1.0,
"rewards/chosen": 0.2967773377895355,
"rewards/margins": 26.862499237060547,
"rewards/rejected": -26.575000762939453,
"step": 1110
},
{
"epoch": 0.896,
"grad_norm": 0.03066308474144947,
"learning_rate": 5.822222222222222e-08,
"logits/chosen": 0.4716796875,
"logits/rejected": 0.6839843988418579,
"logps/chosen": -220.10000610351562,
"logps/rejected": -684.7999877929688,
"loss": 0.0009,
"nll_loss": 0.9468749761581421,
"rewards/accuracies": 1.0,
"rewards/chosen": 0.584765613079071,
"rewards/margins": 28.125,
"rewards/rejected": -27.549999237060547,
"step": 1120
},
{
"epoch": 0.904,
"grad_norm": 0.023039708522050593,
"learning_rate": 5.377777777777778e-08,
"logits/chosen": 0.3741699159145355,
"logits/rejected": 0.5889648199081421,
"logps/chosen": -277.29998779296875,
"logps/rejected": -665.7999877929688,
"loss": 0.0014,
"nll_loss": 0.9859374761581421,
"rewards/accuracies": 1.0,
"rewards/chosen": 0.4515624940395355,
"rewards/margins": 26.174999237060547,
"rewards/rejected": -25.725000381469727,
"step": 1130
},
{
"epoch": 0.912,
"grad_norm": 0.04174862262602521,
"learning_rate": 4.933333333333333e-08,
"logits/chosen": 0.3539062440395355,
"logits/rejected": 0.5293945074081421,
"logps/chosen": -331.8999938964844,
"logps/rejected": -592.0,
"loss": 0.0064,
"nll_loss": 0.887890636920929,
"rewards/accuracies": 1.0,
"rewards/chosen": 0.621826171875,
"rewards/margins": 22.587499618530273,
"rewards/rejected": -21.975000381469727,
"step": 1140
},
{
"epoch": 0.92,
"grad_norm": 0.36396437493512307,
"learning_rate": 4.4888888888888885e-08,
"logits/chosen": 0.39692384004592896,
"logits/rejected": 0.5400390625,
"logps/chosen": -262.8999938964844,
"logps/rejected": -643.7999877929688,
"loss": 0.001,
"nll_loss": 0.9195312261581421,
"rewards/accuracies": 1.0,
"rewards/chosen": 0.6341797113418579,
"rewards/margins": 25.431249618530273,
"rewards/rejected": -24.799999237060547,
"step": 1150
},
{
"epoch": 0.928,
"grad_norm": 0.01230667079616308,
"learning_rate": 4.044444444444444e-08,
"logits/chosen": 0.29730224609375,
"logits/rejected": 0.5694335699081421,
"logps/chosen": -283.8999938964844,
"logps/rejected": -612.5999755859375,
"loss": 0.0009,
"nll_loss": 0.8515625,
"rewards/accuracies": 1.0,
"rewards/chosen": 0.666796863079071,
"rewards/margins": 24.575000762939453,
"rewards/rejected": -23.899999618530273,
"step": 1160
},
{
"epoch": 0.936,
"grad_norm": 0.014453975200438642,
"learning_rate": 3.6e-08,
"logits/chosen": 0.3432373106479645,
"logits/rejected": 0.5855468511581421,
"logps/chosen": -291.3999938964844,
"logps/rejected": -665.2000122070312,
"loss": 0.001,
"nll_loss": 1.019921898841858,
"rewards/accuracies": 1.0,
"rewards/chosen": 0.6011718511581421,
"rewards/margins": 26.850000381469727,
"rewards/rejected": -26.262500762939453,
"step": 1170
},
{
"epoch": 0.944,
"grad_norm": 0.01768958135813815,
"learning_rate": 3.155555555555556e-08,
"logits/chosen": 0.31098634004592896,
"logits/rejected": 0.5472656488418579,
"logps/chosen": -295.70001220703125,
"logps/rejected": -587.2000122070312,
"loss": 0.0055,
"nll_loss": 0.8902343511581421,
"rewards/accuracies": 1.0,
"rewards/chosen": 0.17841796576976776,
"rewards/margins": 22.399999618530273,
"rewards/rejected": -22.225000381469727,
"step": 1180
},
{
"epoch": 0.952,
"grad_norm": 0.009303330717789412,
"learning_rate": 2.7111111111111108e-08,
"logits/chosen": 0.263427734375,
"logits/rejected": 0.49003905057907104,
"logps/chosen": -262.6000061035156,
"logps/rejected": -649.2000122070312,
"loss": 0.0009,
"nll_loss": 0.8871093988418579,
"rewards/accuracies": 1.0,
"rewards/chosen": 0.3846679627895355,
"rewards/margins": 25.137500762939453,
"rewards/rejected": -24.762500762939453,
"step": 1190
},
{
"epoch": 0.96,
"grad_norm": 0.033005829470572054,
"learning_rate": 2.2666666666666668e-08,
"logits/chosen": 0.3676391541957855,
"logits/rejected": 0.5830078125,
"logps/chosen": -295.70001220703125,
"logps/rejected": -625.2000122070312,
"loss": 0.001,
"nll_loss": 0.9925781488418579,
"rewards/accuracies": 1.0,
"rewards/chosen": 0.7079833745956421,
"rewards/margins": 25.0625,
"rewards/rejected": -24.3125,
"step": 1200
},
{
"epoch": 0.968,
"grad_norm": 0.040751069146410926,
"learning_rate": 1.822222222222222e-08,
"logits/chosen": 0.3670410215854645,
"logits/rejected": 0.5015624761581421,
"logps/chosen": -256.29998779296875,
"logps/rejected": -645.4000244140625,
"loss": 0.0049,
"nll_loss": 0.907031238079071,
"rewards/accuracies": 1.0,
"rewards/chosen": 0.8086913824081421,
"rewards/margins": 26.362499237060547,
"rewards/rejected": -25.549999237060547,
"step": 1210
},
{
"epoch": 0.976,
"grad_norm": 0.28250040304556245,
"learning_rate": 1.3777777777777778e-08,
"logits/chosen": 0.431640625,
"logits/rejected": 0.626171886920929,
"logps/chosen": -267.3500061035156,
"logps/rejected": -647.5999755859375,
"loss": 0.0021,
"nll_loss": 0.9437500238418579,
"rewards/accuracies": 1.0,
"rewards/chosen": 0.539379894733429,
"rewards/margins": 26.012500762939453,
"rewards/rejected": -25.462499618530273,
"step": 1220
},
{
"epoch": 0.984,
"grad_norm": 0.039993394141365025,
"learning_rate": 9.333333333333334e-09,
"logits/chosen": 0.45268553495407104,
"logits/rejected": 0.6796875,
"logps/chosen": -277.79998779296875,
"logps/rejected": -639.5999755859375,
"loss": 0.0008,
"nll_loss": 0.813281238079071,
"rewards/accuracies": 1.0,
"rewards/chosen": 0.44189453125,
"rewards/margins": 26.350000381469727,
"rewards/rejected": -25.924999237060547,
"step": 1230
},
{
"epoch": 0.992,
"grad_norm": 0.010994332832260341,
"learning_rate": 4.888888888888888e-09,
"logits/chosen": 0.42723387479782104,
"logits/rejected": 0.5927734375,
"logps/chosen": -252.60000610351562,
"logps/rejected": -644.7999877929688,
"loss": 0.0012,
"nll_loss": 0.8550781011581421,
"rewards/accuracies": 1.0,
"rewards/chosen": 0.605175793170929,
"rewards/margins": 25.524999618530273,
"rewards/rejected": -24.924999237060547,
"step": 1240
},
{
"epoch": 1.0,
"grad_norm": 0.02065652761694791,
"learning_rate": 4.4444444444444443e-10,
"logits/chosen": 0.35319823026657104,
"logits/rejected": 0.5824218988418579,
"logps/chosen": -258.6000061035156,
"logps/rejected": -651.2000122070312,
"loss": 0.0134,
"nll_loss": 0.932421863079071,
"rewards/accuracies": 0.987500011920929,
"rewards/chosen": 0.7752929925918579,
"rewards/margins": 26.612499237060547,
"rewards/rejected": -25.837499618530273,
"step": 1250
},
{
"epoch": 1.0,
"eval_logits/chosen": 0.22201773524284363,
"eval_logits/rejected": 0.42946213483810425,
"eval_logps/chosen": -328.9230651855469,
"eval_logps/rejected": -597.076904296875,
"eval_loss": 0.012361373752355576,
"eval_nll_loss": 0.9699519276618958,
"eval_rewards/accuracies": 0.9903846383094788,
"eval_rewards/chosen": 0.4366079568862915,
"eval_rewards/margins": 22.413461685180664,
"eval_rewards/rejected": -21.975961685180664,
"eval_runtime": 8.634,
"eval_samples_per_second": 11.582,
"eval_steps_per_second": 1.506,
"step": 1250
},
{
"epoch": 1.0,
"step": 1250,
"total_flos": 0.0,
"train_loss": 0.02150259389877319,
"train_runtime": 2425.829,
"train_samples_per_second": 4.122,
"train_steps_per_second": 0.515
}
],
"logging_steps": 10,
"max_steps": 1250,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}