AmberYifan's picture
Model save
c127657 verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.0,
"eval_steps": 500,
"global_step": 1309,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0007639419404125286,
"grad_norm": 170.45702027292876,
"learning_rate": 0.0,
"logits/chosen": -0.3203125,
"logits/rejected": -0.380859375,
"logps/chosen": -480.0,
"logps/rejected": -448.0,
"loss": 0.6914,
"nll_loss": 1.015625,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 1
},
{
"epoch": 0.007639419404125287,
"grad_norm": 178.20144137294952,
"learning_rate": 3.435114503816794e-08,
"logits/chosen": -0.1680501252412796,
"logits/rejected": -0.4995659589767456,
"logps/chosen": -491.5555419921875,
"logps/rejected": -436.4444580078125,
"loss": 0.7147,
"nll_loss": 0.9696180820465088,
"rewards/accuracies": 0.1527777761220932,
"rewards/chosen": -0.02501763217151165,
"rewards/margins": -0.0271742083132267,
"rewards/rejected": 0.0020887586288154125,
"step": 10
},
{
"epoch": 0.015278838808250574,
"grad_norm": 180.13025532672927,
"learning_rate": 7.251908396946564e-08,
"logits/chosen": -0.3433593809604645,
"logits/rejected": -0.49980467557907104,
"logps/chosen": -485.0,
"logps/rejected": -437.3999938964844,
"loss": 0.6991,
"nll_loss": 0.95703125,
"rewards/accuracies": 0.32499998807907104,
"rewards/chosen": 0.0037719726096838713,
"rewards/margins": 0.012493896298110485,
"rewards/rejected": -0.00875244103372097,
"step": 20
},
{
"epoch": 0.02291825821237586,
"grad_norm": 169.32677311123774,
"learning_rate": 1.1068702290076336e-07,
"logits/chosen": -0.31000977754592896,
"logits/rejected": -0.4932617247104645,
"logps/chosen": -589.7999877929688,
"logps/rejected": -451.20001220703125,
"loss": 0.6083,
"nll_loss": 0.965624988079071,
"rewards/accuracies": 0.7124999761581421,
"rewards/chosen": -0.05126953125,
"rewards/margins": 0.21274414658546448,
"rewards/rejected": -0.26396483182907104,
"step": 30
},
{
"epoch": 0.030557677616501147,
"grad_norm": 113.32771627574577,
"learning_rate": 1.4885496183206107e-07,
"logits/chosen": -0.33909910917282104,
"logits/rejected": -0.608105480670929,
"logps/chosen": -462.3999938964844,
"logps/rejected": -450.20001220703125,
"loss": 0.4859,
"nll_loss": 1.0949218273162842,
"rewards/accuracies": 0.887499988079071,
"rewards/chosen": -0.30421751737594604,
"rewards/margins": 0.539471447467804,
"rewards/rejected": -0.8431640863418579,
"step": 40
},
{
"epoch": 0.03819709702062643,
"grad_norm": 84.47866594960938,
"learning_rate": 1.8702290076335877e-07,
"logits/chosen": -0.13973388075828552,
"logits/rejected": -0.29816895723342896,
"logps/chosen": -527.2000122070312,
"logps/rejected": -477.20001220703125,
"loss": 0.342,
"nll_loss": 0.8960937261581421,
"rewards/accuracies": 0.9624999761581421,
"rewards/chosen": -0.676953136920929,
"rewards/margins": 1.025781273841858,
"rewards/rejected": -1.7023437023162842,
"step": 50
},
{
"epoch": 0.04583651642475172,
"grad_norm": 107.60021345550501,
"learning_rate": 2.2519083969465648e-07,
"logits/chosen": -0.22696533799171448,
"logits/rejected": -0.53271484375,
"logps/chosen": -635.0,
"logps/rejected": -527.5999755859375,
"loss": 0.254,
"nll_loss": 1.017968773841858,
"rewards/accuracies": 0.925000011920929,
"rewards/chosen": -0.8374999761581421,
"rewards/margins": 1.696874976158142,
"rewards/rejected": -2.53515625,
"step": 60
},
{
"epoch": 0.053475935828877004,
"grad_norm": 28.927358056320834,
"learning_rate": 2.633587786259542e-07,
"logits/chosen": -0.37421876192092896,
"logits/rejected": -0.861621081829071,
"logps/chosen": -514.0,
"logps/rejected": -477.0,
"loss": 0.1214,
"nll_loss": 1.064453125,
"rewards/accuracies": 0.9750000238418579,
"rewards/chosen": -1.0789062976837158,
"rewards/margins": 2.9828124046325684,
"rewards/rejected": -4.064062595367432,
"step": 70
},
{
"epoch": 0.061115355233002294,
"grad_norm": 49.8426089072564,
"learning_rate": 3.015267175572519e-07,
"logits/chosen": -0.3656372129917145,
"logits/rejected": -0.7126709222793579,
"logps/chosen": -464.6000061035156,
"logps/rejected": -484.3999938964844,
"loss": 0.096,
"nll_loss": 0.94921875,
"rewards/accuracies": 0.987500011920929,
"rewards/chosen": -2.1742186546325684,
"rewards/margins": 3.567187547683716,
"rewards/rejected": -5.737500190734863,
"step": 80
},
{
"epoch": 0.06875477463712758,
"grad_norm": 5.605991342526607,
"learning_rate": 3.396946564885496e-07,
"logits/chosen": -0.3612304627895355,
"logits/rejected": -0.696093738079071,
"logps/chosen": -688.7999877929688,
"logps/rejected": -585.0,
"loss": 0.0882,
"nll_loss": 1.0398437976837158,
"rewards/accuracies": 0.9375,
"rewards/chosen": -2.598437547683716,
"rewards/margins": 4.298437595367432,
"rewards/rejected": -6.896874904632568,
"step": 90
},
{
"epoch": 0.07639419404125286,
"grad_norm": 28.200619850609755,
"learning_rate": 3.7786259541984735e-07,
"logits/chosen": -0.7310546636581421,
"logits/rejected": -0.8763672113418579,
"logps/chosen": -570.4000244140625,
"logps/rejected": -548.7999877929688,
"loss": 0.0508,
"nll_loss": 1.110937476158142,
"rewards/accuracies": 0.987500011920929,
"rewards/chosen": -3.4749999046325684,
"rewards/margins": 5.371874809265137,
"rewards/rejected": -8.850000381469727,
"step": 100
},
{
"epoch": 0.08403361344537816,
"grad_norm": 6.3873299119979645,
"learning_rate": 4.1603053435114506e-07,
"logits/chosen": -0.636962890625,
"logits/rejected": -0.724414050579071,
"logps/chosen": -592.7999877929688,
"logps/rejected": -585.5999755859375,
"loss": 0.0221,
"nll_loss": 0.946093738079071,
"rewards/accuracies": 1.0,
"rewards/chosen": -4.209374904632568,
"rewards/margins": 6.618750095367432,
"rewards/rejected": -10.824999809265137,
"step": 110
},
{
"epoch": 0.09167303284950344,
"grad_norm": 31.832402884669897,
"learning_rate": 4.541984732824427e-07,
"logits/chosen": -0.7198730707168579,
"logits/rejected": -0.973437488079071,
"logps/chosen": -609.5999755859375,
"logps/rejected": -571.7999877929688,
"loss": 0.0651,
"nll_loss": 1.049218773841858,
"rewards/accuracies": 0.9624999761581421,
"rewards/chosen": -5.90625,
"rewards/margins": 7.715624809265137,
"rewards/rejected": -13.612500190734863,
"step": 120
},
{
"epoch": 0.09931245225362872,
"grad_norm": 3.881434643874199,
"learning_rate": 4.923664122137405e-07,
"logits/chosen": -0.7142578363418579,
"logits/rejected": -0.9664062261581421,
"logps/chosen": -641.0,
"logps/rejected": -595.5999755859375,
"loss": 0.0132,
"nll_loss": 1.142968773841858,
"rewards/accuracies": 1.0,
"rewards/chosen": -7.037499904632568,
"rewards/margins": 8.584375381469727,
"rewards/rejected": -15.618749618530273,
"step": 130
},
{
"epoch": 0.10695187165775401,
"grad_norm": 8.414178904055158,
"learning_rate": 4.966044142614601e-07,
"logits/chosen": -1.041015625,
"logits/rejected": -1.1277344226837158,
"logps/chosen": -511.0,
"logps/rejected": -616.7999877929688,
"loss": 0.0167,
"nll_loss": 1.2097656726837158,
"rewards/accuracies": 1.0,
"rewards/chosen": -6.787499904632568,
"rewards/margins": 9.756250381469727,
"rewards/rejected": -16.53125,
"step": 140
},
{
"epoch": 0.11459129106187929,
"grad_norm": 0.032452792842087574,
"learning_rate": 4.923599320882851e-07,
"logits/chosen": -0.950976550579071,
"logits/rejected": -1.0867187976837158,
"logps/chosen": -611.5999755859375,
"logps/rejected": -621.5999755859375,
"loss": 0.0094,
"nll_loss": 1.13671875,
"rewards/accuracies": 1.0,
"rewards/chosen": -6.931250095367432,
"rewards/margins": 10.837499618530273,
"rewards/rejected": -17.768749237060547,
"step": 150
},
{
"epoch": 0.12223071046600459,
"grad_norm": 2.0648500557312355,
"learning_rate": 4.881154499151103e-07,
"logits/chosen": -0.81689453125,
"logits/rejected": -1.2109375,
"logps/chosen": -565.4000244140625,
"logps/rejected": -601.2000122070312,
"loss": 0.0546,
"nll_loss": 1.2062499523162842,
"rewards/accuracies": 0.9750000238418579,
"rewards/chosen": -6.356249809265137,
"rewards/margins": 10.96875,
"rewards/rejected": -17.34375,
"step": 160
},
{
"epoch": 0.12987012987012986,
"grad_norm": 156.0698805984043,
"learning_rate": 4.838709677419355e-07,
"logits/chosen": -0.837597668170929,
"logits/rejected": -1.15234375,
"logps/chosen": -627.2000122070312,
"logps/rejected": -588.4000244140625,
"loss": 0.0343,
"nll_loss": 1.1160156726837158,
"rewards/accuracies": 0.987500011920929,
"rewards/chosen": -7.525000095367432,
"rewards/margins": 10.606249809265137,
"rewards/rejected": -18.137500762939453,
"step": 170
},
{
"epoch": 0.13750954927425516,
"grad_norm": 5.873948116015113,
"learning_rate": 4.796264855687606e-07,
"logits/chosen": -0.7525390386581421,
"logits/rejected": -1.002343773841858,
"logps/chosen": -559.4000244140625,
"logps/rejected": -559.0,
"loss": 0.1111,
"nll_loss": 1.126562476158142,
"rewards/accuracies": 0.9750000238418579,
"rewards/chosen": -5.265625,
"rewards/margins": 10.53125,
"rewards/rejected": -15.774999618530273,
"step": 180
},
{
"epoch": 0.14514896867838045,
"grad_norm": 3.7990208299387955,
"learning_rate": 4.753820033955857e-07,
"logits/chosen": -0.87109375,
"logits/rejected": -1.031640648841858,
"logps/chosen": -582.7999877929688,
"logps/rejected": -604.2000122070312,
"loss": 0.0453,
"nll_loss": 1.1710937023162842,
"rewards/accuracies": 0.9750000238418579,
"rewards/chosen": -4.251562595367432,
"rewards/margins": 11.637499809265137,
"rewards/rejected": -15.887499809265137,
"step": 190
},
{
"epoch": 0.15278838808250572,
"grad_norm": 0.017438679531302244,
"learning_rate": 4.7113752122241087e-07,
"logits/chosen": -0.7158203125,
"logits/rejected": -0.7515624761581421,
"logps/chosen": -643.0,
"logps/rejected": -667.0,
"loss": 0.0052,
"nll_loss": 0.990234375,
"rewards/accuracies": 1.0,
"rewards/chosen": -4.699999809265137,
"rewards/margins": 12.118749618530273,
"rewards/rejected": -16.825000762939453,
"step": 200
},
{
"epoch": 0.16042780748663102,
"grad_norm": 0.9343571136429083,
"learning_rate": 4.66893039049236e-07,
"logits/chosen": -0.68115234375,
"logits/rejected": -0.9722656011581421,
"logps/chosen": -562.4000244140625,
"logps/rejected": -658.2000122070312,
"loss": 0.0294,
"nll_loss": 1.154296875,
"rewards/accuracies": 0.987500011920929,
"rewards/chosen": -5.481249809265137,
"rewards/margins": 12.212499618530273,
"rewards/rejected": -17.681249618530273,
"step": 210
},
{
"epoch": 0.16806722689075632,
"grad_norm": 13.184210820946406,
"learning_rate": 4.6264855687606106e-07,
"logits/chosen": -0.8500000238418579,
"logits/rejected": -0.9549804925918579,
"logps/chosen": -510.79998779296875,
"logps/rejected": -650.2000122070312,
"loss": 0.0139,
"nll_loss": 1.064062476158142,
"rewards/accuracies": 1.0,
"rewards/chosen": -6.331250190734863,
"rewards/margins": 11.449999809265137,
"rewards/rejected": -17.793750762939453,
"step": 220
},
{
"epoch": 0.17570664629488159,
"grad_norm": 0.1965917564106709,
"learning_rate": 4.5840407470288624e-07,
"logits/chosen": -0.682421863079071,
"logits/rejected": -0.934374988079071,
"logps/chosen": -543.7999877929688,
"logps/rejected": -630.0,
"loss": 0.0022,
"nll_loss": 1.0968749523162842,
"rewards/accuracies": 1.0,
"rewards/chosen": -5.925000190734863,
"rewards/margins": 12.75,
"rewards/rejected": -18.662500381469727,
"step": 230
},
{
"epoch": 0.18334606569900688,
"grad_norm": 0.07593077705850547,
"learning_rate": 4.5415959252971136e-07,
"logits/chosen": -0.6851562261581421,
"logits/rejected": -1.002343773841858,
"logps/chosen": -518.4000244140625,
"logps/rejected": -575.0,
"loss": 0.0236,
"nll_loss": 1.168359398841858,
"rewards/accuracies": 0.987500011920929,
"rewards/chosen": -6.784375190734863,
"rewards/margins": 12.274999618530273,
"rewards/rejected": -19.0625,
"step": 240
},
{
"epoch": 0.19098548510313215,
"grad_norm": 12.098498040730545,
"learning_rate": 4.499151103565365e-07,
"logits/chosen": -0.5663086175918579,
"logits/rejected": -0.6595703363418579,
"logps/chosen": -686.4000244140625,
"logps/rejected": -734.0,
"loss": 0.0089,
"nll_loss": 1.1203124523162842,
"rewards/accuracies": 1.0,
"rewards/chosen": -7.21875,
"rewards/margins": 11.8125,
"rewards/rejected": -19.037500381469727,
"step": 250
},
{
"epoch": 0.19862490450725745,
"grad_norm": 0.06575954655043179,
"learning_rate": 4.456706281833616e-07,
"logits/chosen": -0.6996093988418579,
"logits/rejected": -1.0099608898162842,
"logps/chosen": -555.2000122070312,
"logps/rejected": -585.5999755859375,
"loss": 0.0365,
"nll_loss": 1.1339843273162842,
"rewards/accuracies": 0.987500011920929,
"rewards/chosen": -6.209374904632568,
"rewards/margins": 13.375,
"rewards/rejected": -19.575000762939453,
"step": 260
},
{
"epoch": 0.20626432391138275,
"grad_norm": 0.09027759120640111,
"learning_rate": 4.4142614601018673e-07,
"logits/chosen": -0.6943359375,
"logits/rejected": -1.0158202648162842,
"logps/chosen": -602.7999877929688,
"logps/rejected": -640.0,
"loss": 0.0016,
"nll_loss": 1.111718773841858,
"rewards/accuracies": 1.0,
"rewards/chosen": -5.479687690734863,
"rewards/margins": 14.71875,
"rewards/rejected": -20.225000381469727,
"step": 270
},
{
"epoch": 0.21390374331550802,
"grad_norm": 0.17134559287010234,
"learning_rate": 4.3718166383701186e-07,
"logits/chosen": -0.658984363079071,
"logits/rejected": -1.2468750476837158,
"logps/chosen": -639.2000122070312,
"logps/rejected": -695.2000122070312,
"loss": 0.0025,
"nll_loss": 1.1902344226837158,
"rewards/accuracies": 1.0,
"rewards/chosen": -6.2578125,
"rewards/margins": 13.84375,
"rewards/rejected": -20.112499237060547,
"step": 280
},
{
"epoch": 0.2215431627196333,
"grad_norm": 0.1098174270847581,
"learning_rate": 4.32937181663837e-07,
"logits/chosen": -0.48955076932907104,
"logits/rejected": -0.794726550579071,
"logps/chosen": -671.4000244140625,
"logps/rejected": -693.5999755859375,
"loss": 0.0079,
"nll_loss": 1.026953101158142,
"rewards/accuracies": 1.0,
"rewards/chosen": -6.5078125,
"rewards/margins": 14.581250190734863,
"rewards/rejected": -21.075000762939453,
"step": 290
},
{
"epoch": 0.22918258212375858,
"grad_norm": 0.28994386032232317,
"learning_rate": 4.286926994906621e-07,
"logits/chosen": -0.778124988079071,
"logits/rejected": -1.2712891101837158,
"logps/chosen": -580.2000122070312,
"logps/rejected": -701.5999755859375,
"loss": 0.0178,
"nll_loss": 1.097265601158142,
"rewards/accuracies": 0.987500011920929,
"rewards/chosen": -5.809374809265137,
"rewards/margins": 16.912500381469727,
"rewards/rejected": -22.75,
"step": 300
},
{
"epoch": 0.23682200152788388,
"grad_norm": 2.0198779669945854,
"learning_rate": 4.244482173174873e-07,
"logits/chosen": -0.6753906011581421,
"logits/rejected": -0.8785156011581421,
"logps/chosen": -503.3999938964844,
"logps/rejected": -644.4000244140625,
"loss": 0.0178,
"nll_loss": 1.0109374523162842,
"rewards/accuracies": 0.987500011920929,
"rewards/chosen": -5.831250190734863,
"rewards/margins": 15.168749809265137,
"rewards/rejected": -21.0,
"step": 310
},
{
"epoch": 0.24446142093200918,
"grad_norm": 4.494678945589362,
"learning_rate": 4.202037351443124e-07,
"logits/chosen": -0.7886718511581421,
"logits/rejected": -1.163671851158142,
"logps/chosen": -533.5999755859375,
"logps/rejected": -610.2000122070312,
"loss": 0.0019,
"nll_loss": 1.0773437023162842,
"rewards/accuracies": 1.0,
"rewards/chosen": -4.510937690734863,
"rewards/margins": 16.59375,
"rewards/rejected": -21.125,
"step": 320
},
{
"epoch": 0.25210084033613445,
"grad_norm": 138.82132160510554,
"learning_rate": 4.159592529711375e-07,
"logits/chosen": -0.6773437261581421,
"logits/rejected": -1.036523461341858,
"logps/chosen": -586.4000244140625,
"logps/rejected": -664.7999877929688,
"loss": 0.031,
"nll_loss": 1.092187523841858,
"rewards/accuracies": 0.987500011920929,
"rewards/chosen": -4.827343940734863,
"rewards/margins": 16.943750381469727,
"rewards/rejected": -21.762500762939453,
"step": 330
},
{
"epoch": 0.2597402597402597,
"grad_norm": 0.025227316427022712,
"learning_rate": 4.1171477079796265e-07,
"logits/chosen": -0.734814465045929,
"logits/rejected": -0.9996093511581421,
"logps/chosen": -568.4000244140625,
"logps/rejected": -720.7999877929688,
"loss": 0.0084,
"nll_loss": 1.066015601158142,
"rewards/accuracies": 1.0,
"rewards/chosen": -4.956250190734863,
"rewards/margins": 18.075000762939453,
"rewards/rejected": -23.037500381469727,
"step": 340
},
{
"epoch": 0.26737967914438504,
"grad_norm": 0.01517317320009414,
"learning_rate": 4.074702886247878e-07,
"logits/chosen": -0.7974609136581421,
"logits/rejected": -1.169531226158142,
"logps/chosen": -553.7999877929688,
"logps/rejected": -682.4000244140625,
"loss": 0.0211,
"nll_loss": 1.1316406726837158,
"rewards/accuracies": 0.987500011920929,
"rewards/chosen": -5.162499904632568,
"rewards/margins": 16.975000381469727,
"rewards/rejected": -22.162500381469727,
"step": 350
},
{
"epoch": 0.2750190985485103,
"grad_norm": 0.041343364887589294,
"learning_rate": 4.0322580645161285e-07,
"logits/chosen": -0.662109375,
"logits/rejected": -0.91796875,
"logps/chosen": -620.4000244140625,
"logps/rejected": -662.4000244140625,
"loss": 0.0023,
"nll_loss": 1.0871093273162842,
"rewards/accuracies": 1.0,
"rewards/chosen": -3.9710936546325684,
"rewards/margins": 16.931249618530273,
"rewards/rejected": -20.924999237060547,
"step": 360
},
{
"epoch": 0.2826585179526356,
"grad_norm": 0.11405518122478978,
"learning_rate": 3.98981324278438e-07,
"logits/chosen": -0.982617199420929,
"logits/rejected": -1.167089819908142,
"logps/chosen": -529.5999755859375,
"logps/rejected": -669.5999755859375,
"loss": 0.0031,
"nll_loss": 1.079687476158142,
"rewards/accuracies": 1.0,
"rewards/chosen": -2.2236328125,
"rewards/margins": 17.143749237060547,
"rewards/rejected": -19.350000381469727,
"step": 370
},
{
"epoch": 0.2902979373567609,
"grad_norm": 0.04601729303454066,
"learning_rate": 3.9473684210526315e-07,
"logits/chosen": -0.7412109375,
"logits/rejected": -1.019921898841858,
"logps/chosen": -605.2000122070312,
"logps/rejected": -632.0,
"loss": 0.0223,
"nll_loss": 1.0417969226837158,
"rewards/accuracies": 0.987500011920929,
"rewards/chosen": -2.555468797683716,
"rewards/margins": 14.84375,
"rewards/rejected": -17.387500762939453,
"step": 380
},
{
"epoch": 0.2979373567608862,
"grad_norm": 1.1114016283135653,
"learning_rate": 3.9049235993208827e-07,
"logits/chosen": -0.7603515386581421,
"logits/rejected": -1.0732421875,
"logps/chosen": -573.4000244140625,
"logps/rejected": -604.4000244140625,
"loss": 0.0016,
"nll_loss": 1.0964844226837158,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.951171875,
"rewards/margins": 16.706249237060547,
"rewards/rejected": -17.637500762939453,
"step": 390
},
{
"epoch": 0.30557677616501144,
"grad_norm": 0.012006618077367264,
"learning_rate": 3.862478777589134e-07,
"logits/chosen": -0.7928711175918579,
"logits/rejected": -1.187109351158142,
"logps/chosen": -566.0,
"logps/rejected": -637.5999755859375,
"loss": 0.0013,
"nll_loss": 1.038671851158142,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.5223633050918579,
"rewards/margins": 18.431249618530273,
"rewards/rejected": -18.956249237060547,
"step": 400
},
{
"epoch": 0.31321619556913677,
"grad_norm": 0.02301493074282767,
"learning_rate": 3.820033955857385e-07,
"logits/chosen": -0.776562511920929,
"logits/rejected": -1.072265625,
"logps/chosen": -542.2000122070312,
"logps/rejected": -660.7999877929688,
"loss": 0.0011,
"nll_loss": 0.9789062738418579,
"rewards/accuracies": 1.0,
"rewards/chosen": -1.2033202648162842,
"rewards/margins": 16.987499237060547,
"rewards/rejected": -18.1875,
"step": 410
},
{
"epoch": 0.32085561497326204,
"grad_norm": 0.019580609038995344,
"learning_rate": 3.7775891341256364e-07,
"logits/chosen": -0.573535144329071,
"logits/rejected": -1.023046851158142,
"logps/chosen": -610.7999877929688,
"logps/rejected": -669.2000122070312,
"loss": 0.0016,
"nll_loss": 1.0207030773162842,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.8062499761581421,
"rewards/margins": 17.950000762939453,
"rewards/rejected": -18.768749237060547,
"step": 420
},
{
"epoch": 0.3284950343773873,
"grad_norm": 0.04377587946128281,
"learning_rate": 3.735144312393888e-07,
"logits/chosen": -0.741406261920929,
"logits/rejected": -1.0378906726837158,
"logps/chosen": -615.7999877929688,
"logps/rejected": -727.4000244140625,
"loss": 0.0011,
"nll_loss": 0.981249988079071,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.4393310546875,
"rewards/margins": 18.5,
"rewards/rejected": -18.918750762939453,
"step": 430
},
{
"epoch": 0.33613445378151263,
"grad_norm": 0.03635815986206019,
"learning_rate": 3.692699490662139e-07,
"logits/chosen": -0.5271972417831421,
"logits/rejected": -0.888427734375,
"logps/chosen": -523.2000122070312,
"logps/rejected": -679.5999755859375,
"loss": 0.0012,
"nll_loss": 0.9750000238418579,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.815673828125,
"rewards/margins": 18.475000381469727,
"rewards/rejected": -19.287500381469727,
"step": 440
},
{
"epoch": 0.3437738731856379,
"grad_norm": 1.1649563263655982,
"learning_rate": 3.65025466893039e-07,
"logits/chosen": -0.6383301019668579,
"logits/rejected": -1.141015648841858,
"logps/chosen": -571.5999755859375,
"logps/rejected": -628.0,
"loss": 0.0375,
"nll_loss": 1.175390601158142,
"rewards/accuracies": 0.9750000238418579,
"rewards/chosen": -0.02695312537252903,
"rewards/margins": 15.956250190734863,
"rewards/rejected": -15.975000381469727,
"step": 450
},
{
"epoch": 0.35141329258976317,
"grad_norm": 2.473516604965399,
"learning_rate": 3.607809847198642e-07,
"logits/chosen": -0.904296875,
"logits/rejected": -1.099609375,
"logps/chosen": -502.0,
"logps/rejected": -662.7999877929688,
"loss": 0.0022,
"nll_loss": 0.987109363079071,
"rewards/accuracies": 1.0,
"rewards/chosen": 1.136083960533142,
"rewards/margins": 18.356250762939453,
"rewards/rejected": -17.225000381469727,
"step": 460
},
{
"epoch": 0.35905271199388844,
"grad_norm": 0.019754385722868507,
"learning_rate": 3.5653650254668926e-07,
"logits/chosen": -0.616748034954071,
"logits/rejected": -0.873828113079071,
"logps/chosen": -512.7999877929688,
"logps/rejected": -639.4000244140625,
"loss": 0.001,
"nll_loss": 1.00390625,
"rewards/accuracies": 1.0,
"rewards/chosen": 1.5836913585662842,
"rewards/margins": 18.700000762939453,
"rewards/rejected": -17.118749618530273,
"step": 470
},
{
"epoch": 0.36669213139801377,
"grad_norm": 0.012995476138882066,
"learning_rate": 3.5229202037351443e-07,
"logits/chosen": -0.879833996295929,
"logits/rejected": -1.171484351158142,
"logps/chosen": -481.20001220703125,
"logps/rejected": -624.0,
"loss": 0.0013,
"nll_loss": 0.9457031488418579,
"rewards/accuracies": 1.0,
"rewards/chosen": 0.20126953721046448,
"rewards/margins": 17.8125,
"rewards/rejected": -17.600000381469727,
"step": 480
},
{
"epoch": 0.37433155080213903,
"grad_norm": 0.0300203308477904,
"learning_rate": 3.4804753820033956e-07,
"logits/chosen": -0.627734363079071,
"logits/rejected": -1.034765601158142,
"logps/chosen": -575.7999877929688,
"logps/rejected": -640.0,
"loss": 0.002,
"nll_loss": 1.0011718273162842,
"rewards/accuracies": 1.0,
"rewards/chosen": 0.12880858778953552,
"rewards/margins": 18.268749237060547,
"rewards/rejected": -18.125,
"step": 490
},
{
"epoch": 0.3819709702062643,
"grad_norm": 0.011247181105488207,
"learning_rate": 3.438030560271647e-07,
"logits/chosen": -0.7381836175918579,
"logits/rejected": -0.9857422113418579,
"logps/chosen": -526.7999877929688,
"logps/rejected": -615.5999755859375,
"loss": 0.0012,
"nll_loss": 1.021484375,
"rewards/accuracies": 1.0,
"rewards/chosen": 0.921679675579071,
"rewards/margins": 18.206249237060547,
"rewards/rejected": -17.293750762939453,
"step": 500
},
{
"epoch": 0.38961038961038963,
"grad_norm": 0.013542548861045974,
"learning_rate": 3.395585738539898e-07,
"logits/chosen": -0.645312488079071,
"logits/rejected": -0.8863281011581421,
"logps/chosen": -615.0,
"logps/rejected": -656.4000244140625,
"loss": 0.0014,
"nll_loss": 0.9214843511581421,
"rewards/accuracies": 1.0,
"rewards/chosen": 0.16367188096046448,
"rewards/margins": 18.606250762939453,
"rewards/rejected": -18.46875,
"step": 510
},
{
"epoch": 0.3972498090145149,
"grad_norm": 0.023268796810063094,
"learning_rate": 3.3531409168081493e-07,
"logits/chosen": -0.6841796636581421,
"logits/rejected": -0.912109375,
"logps/chosen": -529.2000122070312,
"logps/rejected": -675.5999755859375,
"loss": 0.0056,
"nll_loss": 0.888671875,
"rewards/accuracies": 1.0,
"rewards/chosen": 1.271484375,
"rewards/margins": 18.581249237060547,
"rewards/rejected": -17.331249237060547,
"step": 520
},
{
"epoch": 0.40488922841864017,
"grad_norm": 0.008467585302493998,
"learning_rate": 3.3106960950764005e-07,
"logits/chosen": -0.848437488079071,
"logits/rejected": -1.3093750476837158,
"logps/chosen": -609.2000122070312,
"logps/rejected": -703.5999755859375,
"loss": 0.0028,
"nll_loss": 1.0167968273162842,
"rewards/accuracies": 1.0,
"rewards/chosen": 2.343554735183716,
"rewards/margins": 20.337499618530273,
"rewards/rejected": -17.981250762939453,
"step": 530
},
{
"epoch": 0.4125286478227655,
"grad_norm": 3.5149140081773647,
"learning_rate": 3.268251273344652e-07,
"logits/chosen": -0.753710925579071,
"logits/rejected": -1.161718726158142,
"logps/chosen": -523.4000244140625,
"logps/rejected": -566.5999755859375,
"loss": 0.0212,
"nll_loss": 1.001953125,
"rewards/accuracies": 0.987500011920929,
"rewards/chosen": 2.139843702316284,
"rewards/margins": 18.256250381469727,
"rewards/rejected": -16.106250762939453,
"step": 540
},
{
"epoch": 0.42016806722689076,
"grad_norm": 0.01515961183908316,
"learning_rate": 3.225806451612903e-07,
"logits/chosen": -0.751171886920929,
"logits/rejected": -1.060644507408142,
"logps/chosen": -495.20001220703125,
"logps/rejected": -612.4000244140625,
"loss": 0.0018,
"nll_loss": 0.955078125,
"rewards/accuracies": 1.0,
"rewards/chosen": 1.314208984375,
"rewards/margins": 19.225000381469727,
"rewards/rejected": -17.887500762939453,
"step": 550
},
{
"epoch": 0.42780748663101603,
"grad_norm": 0.010042277083548832,
"learning_rate": 3.183361629881154e-07,
"logits/chosen": -0.8096679449081421,
"logits/rejected": -1.017187476158142,
"logps/chosen": -509.79998779296875,
"logps/rejected": -657.5999755859375,
"loss": 0.0096,
"nll_loss": 0.9886718988418579,
"rewards/accuracies": 0.987500011920929,
"rewards/chosen": 1.565820336341858,
"rewards/margins": 18.668750762939453,
"rewards/rejected": -17.09375,
"step": 560
},
{
"epoch": 0.43544690603514136,
"grad_norm": 357.30801484169297,
"learning_rate": 3.140916808149406e-07,
"logits/chosen": -0.5929931402206421,
"logits/rejected": -0.9761718511581421,
"logps/chosen": -537.7999877929688,
"logps/rejected": -641.5999755859375,
"loss": 0.1376,
"nll_loss": 0.9449218511581421,
"rewards/accuracies": 0.9750000238418579,
"rewards/chosen": 0.69873046875,
"rewards/margins": 19.549999237060547,
"rewards/rejected": -18.875,
"step": 570
},
{
"epoch": 0.4430863254392666,
"grad_norm": 0.012075462997198727,
"learning_rate": 3.0984719864176567e-07,
"logits/chosen": -0.578125,
"logits/rejected": -0.950390636920929,
"logps/chosen": -502.0,
"logps/rejected": -646.7999877929688,
"loss": 0.001,
"nll_loss": 0.899218738079071,
"rewards/accuracies": 1.0,
"rewards/chosen": 1.9572265148162842,
"rewards/margins": 20.924999237060547,
"rewards/rejected": -18.987499237060547,
"step": 580
},
{
"epoch": 0.4507257448433919,
"grad_norm": 0.008925296396445518,
"learning_rate": 3.056027164685908e-07,
"logits/chosen": -0.655078113079071,
"logits/rejected": -1.072265625,
"logps/chosen": -509.6000061035156,
"logps/rejected": -565.0,
"loss": 0.0113,
"nll_loss": 1.0390625,
"rewards/accuracies": 0.987500011920929,
"rewards/chosen": 2.5933594703674316,
"rewards/margins": 19.112499237060547,
"rewards/rejected": -16.537500381469727,
"step": 590
},
{
"epoch": 0.45836516424751717,
"grad_norm": 0.017036225092415577,
"learning_rate": 3.0135823429541597e-07,
"logits/chosen": -0.8695312738418579,
"logits/rejected": -1.1437499523162842,
"logps/chosen": -499.79998779296875,
"logps/rejected": -588.4000244140625,
"loss": 0.0009,
"nll_loss": 0.923828125,
"rewards/accuracies": 1.0,
"rewards/chosen": 3.021191358566284,
"rewards/margins": 19.168750762939453,
"rewards/rejected": -16.149999618530273,
"step": 600
},
{
"epoch": 0.4660045836516425,
"grad_norm": 3.1773486559043835,
"learning_rate": 2.9711375212224104e-07,
"logits/chosen": -0.520703136920929,
"logits/rejected": -0.919140636920929,
"logps/chosen": -497.20001220703125,
"logps/rejected": -629.2000122070312,
"loss": 0.0016,
"nll_loss": 0.955859363079071,
"rewards/accuracies": 1.0,
"rewards/chosen": 3.760937452316284,
"rewards/margins": 18.887500762939453,
"rewards/rejected": -15.15625,
"step": 610
},
{
"epoch": 0.47364400305576776,
"grad_norm": 3.105337083291931,
"learning_rate": 2.928692699490662e-07,
"logits/chosen": -0.666015625,
"logits/rejected": -1.2062499523162842,
"logps/chosen": -525.5999755859375,
"logps/rejected": -617.0,
"loss": 0.0017,
"nll_loss": 0.96484375,
"rewards/accuracies": 1.0,
"rewards/chosen": 2.859375,
"rewards/margins": 19.674999237060547,
"rewards/rejected": -16.793750762939453,
"step": 620
},
{
"epoch": 0.48128342245989303,
"grad_norm": 0.008823604800824746,
"learning_rate": 2.8862478777589134e-07,
"logits/chosen": -0.7459961175918579,
"logits/rejected": -1.0949218273162842,
"logps/chosen": -612.0,
"logps/rejected": -654.4000244140625,
"loss": 0.0099,
"nll_loss": 0.9632812738418579,
"rewards/accuracies": 0.987500011920929,
"rewards/chosen": 2.2660155296325684,
"rewards/margins": 20.5,
"rewards/rejected": -18.21875,
"step": 630
},
{
"epoch": 0.48892284186401835,
"grad_norm": 0.032537468902626224,
"learning_rate": 2.8438030560271646e-07,
"logits/chosen": -0.6953125,
"logits/rejected": -1.015234351158142,
"logps/chosen": -602.7999877929688,
"logps/rejected": -617.0,
"loss": 0.003,
"nll_loss": 1.107421875,
"rewards/accuracies": 1.0,
"rewards/chosen": 3.46875,
"rewards/margins": 21.0,
"rewards/rejected": -17.524999618530273,
"step": 640
},
{
"epoch": 0.4965622612681436,
"grad_norm": 0.018661326082527362,
"learning_rate": 2.801358234295416e-07,
"logits/chosen": -0.6830078363418579,
"logits/rejected": -1.1179687976837158,
"logps/chosen": -403.79998779296875,
"logps/rejected": -591.0,
"loss": 0.001,
"nll_loss": 0.951953113079071,
"rewards/accuracies": 1.0,
"rewards/chosen": 2.8648924827575684,
"rewards/margins": 19.831249237060547,
"rewards/rejected": -16.962499618530273,
"step": 650
},
{
"epoch": 0.5042016806722689,
"grad_norm": 0.15005148372984548,
"learning_rate": 2.758913412563667e-07,
"logits/chosen": -0.636279284954071,
"logits/rejected": -1.015234351158142,
"logps/chosen": -487.79998779296875,
"logps/rejected": -609.5999755859375,
"loss": 0.0063,
"nll_loss": 0.90625,
"rewards/accuracies": 1.0,
"rewards/chosen": 3.2437500953674316,
"rewards/margins": 18.637500762939453,
"rewards/rejected": -15.399999618530273,
"step": 660
},
{
"epoch": 0.5118411000763942,
"grad_norm": 0.009676166350461137,
"learning_rate": 2.7164685908319183e-07,
"logits/chosen": -0.519238293170929,
"logits/rejected": -0.81494140625,
"logps/chosen": -523.0,
"logps/rejected": -656.7999877929688,
"loss": 0.0045,
"nll_loss": 0.9761718511581421,
"rewards/accuracies": 1.0,
"rewards/chosen": 3.0859375,
"rewards/margins": 19.78125,
"rewards/rejected": -16.65625,
"step": 670
},
{
"epoch": 0.5194805194805194,
"grad_norm": 0.01124980367552591,
"learning_rate": 2.67402376910017e-07,
"logits/chosen": -0.5941406488418579,
"logits/rejected": -1.1257812976837158,
"logps/chosen": -451.79998779296875,
"logps/rejected": -609.5999755859375,
"loss": 0.001,
"nll_loss": 0.935546875,
"rewards/accuracies": 1.0,
"rewards/chosen": 4.285937309265137,
"rewards/margins": 20.774999618530273,
"rewards/rejected": -16.5,
"step": 680
},
{
"epoch": 0.5271199388846448,
"grad_norm": 1.031627498209902,
"learning_rate": 2.631578947368421e-07,
"logits/chosen": -0.6713622808456421,
"logits/rejected": -0.9341796636581421,
"logps/chosen": -528.7999877929688,
"logps/rejected": -627.5999755859375,
"loss": 0.0011,
"nll_loss": 0.9410156011581421,
"rewards/accuracies": 1.0,
"rewards/chosen": 4.165722846984863,
"rewards/margins": 20.112499237060547,
"rewards/rejected": -15.949999809265137,
"step": 690
},
{
"epoch": 0.5347593582887701,
"grad_norm": 0.008612481920354584,
"learning_rate": 2.589134125636672e-07,
"logits/chosen": -0.626953125,
"logits/rejected": -1.13671875,
"logps/chosen": -568.4000244140625,
"logps/rejected": -658.5999755859375,
"loss": 0.0153,
"nll_loss": 0.9609375,
"rewards/accuracies": 1.0,
"rewards/chosen": 3.57421875,
"rewards/margins": 17.993749618530273,
"rewards/rejected": -14.399999618530273,
"step": 700
},
{
"epoch": 0.5423987776928954,
"grad_norm": 0.3960498135346062,
"learning_rate": 2.546689303904924e-07,
"logits/chosen": -0.4974121153354645,
"logits/rejected": -0.73291015625,
"logps/chosen": -462.20001220703125,
"logps/rejected": -560.5999755859375,
"loss": 0.0009,
"nll_loss": 0.8374999761581421,
"rewards/accuracies": 1.0,
"rewards/chosen": 5.01953125,
"rewards/margins": 19.4375,
"rewards/rejected": -14.418749809265137,
"step": 710
},
{
"epoch": 0.5500381970970206,
"grad_norm": 0.010307871330397535,
"learning_rate": 2.5042444821731745e-07,
"logits/chosen": -0.713671863079071,
"logits/rejected": -0.9332031011581421,
"logps/chosen": -481.0,
"logps/rejected": -631.4000244140625,
"loss": 0.0021,
"nll_loss": 0.879687488079071,
"rewards/accuracies": 1.0,
"rewards/chosen": 3.451367139816284,
"rewards/margins": 19.412500381469727,
"rewards/rejected": -15.962499618530273,
"step": 720
},
{
"epoch": 0.5576776165011459,
"grad_norm": 0.012937044581846035,
"learning_rate": 2.4617996604414257e-07,
"logits/chosen": -0.8158203363418579,
"logits/rejected": -0.971972644329071,
"logps/chosen": -485.79998779296875,
"logps/rejected": -641.5999755859375,
"loss": 0.001,
"nll_loss": 0.8539062738418579,
"rewards/accuracies": 1.0,
"rewards/chosen": 4.935937404632568,
"rewards/margins": 19.575000762939453,
"rewards/rejected": -14.625,
"step": 730
},
{
"epoch": 0.5653170359052712,
"grad_norm": 0.00943278466797185,
"learning_rate": 2.4193548387096775e-07,
"logits/chosen": -0.7265625,
"logits/rejected": -1.0984375476837158,
"logps/chosen": -467.3999938964844,
"logps/rejected": -628.4000244140625,
"loss": 0.0009,
"nll_loss": 0.907421886920929,
"rewards/accuracies": 1.0,
"rewards/chosen": 4.9765625,
"rewards/margins": 20.825000762939453,
"rewards/rejected": -15.850000381469727,
"step": 740
},
{
"epoch": 0.5729564553093965,
"grad_norm": 0.012206838530460923,
"learning_rate": 2.3769100169779285e-07,
"logits/chosen": -0.4423828125,
"logits/rejected": -0.7054198980331421,
"logps/chosen": -529.4000244140625,
"logps/rejected": -661.2000122070312,
"loss": 0.0244,
"nll_loss": 0.9312499761581421,
"rewards/accuracies": 0.987500011920929,
"rewards/chosen": 4.828125,
"rewards/margins": 18.549999237060547,
"rewards/rejected": -13.71875,
"step": 750
},
{
"epoch": 0.5805958747135218,
"grad_norm": 0.02778687692676317,
"learning_rate": 2.33446519524618e-07,
"logits/chosen": -0.5909179449081421,
"logits/rejected": -0.9839843511581421,
"logps/chosen": -538.7999877929688,
"logps/rejected": -577.2000122070312,
"loss": 0.006,
"nll_loss": 0.9312499761581421,
"rewards/accuracies": 1.0,
"rewards/chosen": 5.465624809265137,
"rewards/margins": 19.181249618530273,
"rewards/rejected": -13.706250190734863,
"step": 760
},
{
"epoch": 0.5882352941176471,
"grad_norm": 0.005731559164767884,
"learning_rate": 2.2920203735144312e-07,
"logits/chosen": -0.5236572027206421,
"logits/rejected": -0.7826172113418579,
"logps/chosen": -469.20001220703125,
"logps/rejected": -575.7999877929688,
"loss": 0.0009,
"nll_loss": 0.8277343511581421,
"rewards/accuracies": 1.0,
"rewards/chosen": 5.328125,
"rewards/margins": 19.4375,
"rewards/rejected": -14.106249809265137,
"step": 770
},
{
"epoch": 0.5958747135217723,
"grad_norm": 2.1424567307120626,
"learning_rate": 2.2495755517826824e-07,
"logits/chosen": -0.641796886920929,
"logits/rejected": -0.885937511920929,
"logps/chosen": -497.20001220703125,
"logps/rejected": -620.0,
"loss": 0.0011,
"nll_loss": 0.9375,
"rewards/accuracies": 1.0,
"rewards/chosen": 4.8671875,
"rewards/margins": 19.137500762939453,
"rewards/rejected": -14.243749618530273,
"step": 780
},
{
"epoch": 0.6035141329258976,
"grad_norm": 0.578328922678163,
"learning_rate": 2.2071307300509337e-07,
"logits/chosen": -0.666796863079071,
"logits/rejected": -1.081640601158142,
"logps/chosen": -468.20001220703125,
"logps/rejected": -602.7999877929688,
"loss": 0.0009,
"nll_loss": 0.861328125,
"rewards/accuracies": 1.0,
"rewards/chosen": 5.762499809265137,
"rewards/margins": 21.625,
"rewards/rejected": -15.862500190734863,
"step": 790
},
{
"epoch": 0.6111535523300229,
"grad_norm": 71.3110272488238,
"learning_rate": 2.164685908319185e-07,
"logits/chosen": -0.59033203125,
"logits/rejected": -1.054101586341858,
"logps/chosen": -490.6000061035156,
"logps/rejected": -576.5999755859375,
"loss": 0.0253,
"nll_loss": 0.8355468511581421,
"rewards/accuracies": 0.987500011920929,
"rewards/chosen": 5.728125095367432,
"rewards/margins": 20.274999618530273,
"rewards/rejected": -14.543749809265137,
"step": 800
},
{
"epoch": 0.6187929717341482,
"grad_norm": 139.17739524623934,
"learning_rate": 2.1222410865874364e-07,
"logits/chosen": -0.5176025629043579,
"logits/rejected": -0.8919922113418579,
"logps/chosen": -384.3999938964844,
"logps/rejected": -602.7999877929688,
"loss": 0.0771,
"nll_loss": 0.791796863079071,
"rewards/accuracies": 0.9750000238418579,
"rewards/chosen": 5.825781345367432,
"rewards/margins": 19.893749237060547,
"rewards/rejected": -14.0625,
"step": 810
},
{
"epoch": 0.6264323911382735,
"grad_norm": 0.009397467485646585,
"learning_rate": 2.0797962648556874e-07,
"logits/chosen": -0.4286132752895355,
"logits/rejected": -0.8472656011581421,
"logps/chosen": -455.0,
"logps/rejected": -579.0,
"loss": 0.0008,
"nll_loss": 0.780468761920929,
"rewards/accuracies": 1.0,
"rewards/chosen": 5.40625,
"rewards/margins": 19.087499618530273,
"rewards/rejected": -13.706250190734863,
"step": 820
},
{
"epoch": 0.6340718105423988,
"grad_norm": 0.01440039833737066,
"learning_rate": 2.037351443123939e-07,
"logits/chosen": -0.5261474847793579,
"logits/rejected": -0.8597656488418579,
"logps/chosen": -419.3999938964844,
"logps/rejected": -557.4000244140625,
"loss": 0.0201,
"nll_loss": 0.774218738079071,
"rewards/accuracies": 0.987500011920929,
"rewards/chosen": 5.792187690734863,
"rewards/margins": 18.5625,
"rewards/rejected": -12.774999618530273,
"step": 830
},
{
"epoch": 0.6417112299465241,
"grad_norm": 0.01637511729658493,
"learning_rate": 1.99490662139219e-07,
"logits/chosen": -0.7147461175918579,
"logits/rejected": -1.2234375476837158,
"logps/chosen": -453.79998779296875,
"logps/rejected": -580.2000122070312,
"loss": 0.0009,
"nll_loss": 0.876953125,
"rewards/accuracies": 1.0,
"rewards/chosen": 7.037499904632568,
"rewards/margins": 19.987499237060547,
"rewards/rejected": -12.949999809265137,
"step": 840
},
{
"epoch": 0.6493506493506493,
"grad_norm": 0.011789748248626662,
"learning_rate": 1.9524617996604413e-07,
"logits/chosen": -0.48930662870407104,
"logits/rejected": -0.76318359375,
"logps/chosen": -497.20001220703125,
"logps/rejected": -608.2000122070312,
"loss": 0.0032,
"nll_loss": 0.862500011920929,
"rewards/accuracies": 1.0,
"rewards/chosen": 6.518750190734863,
"rewards/margins": 19.756250381469727,
"rewards/rejected": -13.25,
"step": 850
},
{
"epoch": 0.6569900687547746,
"grad_norm": 0.008438444203134894,
"learning_rate": 1.9100169779286926e-07,
"logits/chosen": -0.7650390863418579,
"logits/rejected": -1.181640625,
"logps/chosen": -470.0,
"logps/rejected": -614.0,
"loss": 0.0062,
"nll_loss": 0.888671875,
"rewards/accuracies": 1.0,
"rewards/chosen": 4.34521484375,
"rewards/margins": 20.0625,
"rewards/rejected": -15.731249809265137,
"step": 860
},
{
"epoch": 0.6646294881588999,
"grad_norm": 0.009088437756158809,
"learning_rate": 1.867572156196944e-07,
"logits/chosen": -0.8885742425918579,
"logits/rejected": -1.2628905773162842,
"logps/chosen": -426.20001220703125,
"logps/rejected": -576.7999877929688,
"loss": 0.0173,
"nll_loss": 0.8238281011581421,
"rewards/accuracies": 0.987500011920929,
"rewards/chosen": 5.240624904632568,
"rewards/margins": 20.837499618530273,
"rewards/rejected": -15.59375,
"step": 870
},
{
"epoch": 0.6722689075630253,
"grad_norm": 0.01123214371256042,
"learning_rate": 1.825127334465195e-07,
"logits/chosen": -0.37451171875,
"logits/rejected": -0.9312499761581421,
"logps/chosen": -491.20001220703125,
"logps/rejected": -562.4000244140625,
"loss": 0.0012,
"nll_loss": 0.788281261920929,
"rewards/accuracies": 1.0,
"rewards/chosen": 5.337500095367432,
"rewards/margins": 21.512500762939453,
"rewards/rejected": -16.168750762939453,
"step": 880
},
{
"epoch": 0.6799083269671505,
"grad_norm": 0.011919242875290744,
"learning_rate": 1.7826825127334463e-07,
"logits/chosen": -0.776611328125,
"logits/rejected": -1.2238280773162842,
"logps/chosen": -501.3999938964844,
"logps/rejected": -579.2000122070312,
"loss": 0.001,
"nll_loss": 0.9183593988418579,
"rewards/accuracies": 1.0,
"rewards/chosen": 6.053124904632568,
"rewards/margins": 21.612499237060547,
"rewards/rejected": -15.568750381469727,
"step": 890
},
{
"epoch": 0.6875477463712758,
"grad_norm": 0.5307999135503628,
"learning_rate": 1.7402376910016978e-07,
"logits/chosen": -0.46171873807907104,
"logits/rejected": -1.008203148841858,
"logps/chosen": -419.6000061035156,
"logps/rejected": -630.7999877929688,
"loss": 0.0011,
"nll_loss": 0.93359375,
"rewards/accuracies": 1.0,
"rewards/chosen": 5.2734375,
"rewards/margins": 20.600000381469727,
"rewards/rejected": -15.318750381469727,
"step": 900
},
{
"epoch": 0.6951871657754011,
"grad_norm": 0.008377959097262699,
"learning_rate": 1.697792869269949e-07,
"logits/chosen": -0.581738293170929,
"logits/rejected": -0.8985351324081421,
"logps/chosen": -690.5999755859375,
"logps/rejected": -672.0,
"loss": 0.0009,
"nll_loss": 0.889453113079071,
"rewards/accuracies": 1.0,
"rewards/chosen": 5.235937595367432,
"rewards/margins": 21.450000762939453,
"rewards/rejected": -16.1875,
"step": 910
},
{
"epoch": 0.7028265851795263,
"grad_norm": 3.821622931707774,
"learning_rate": 1.6553480475382003e-07,
"logits/chosen": -0.772656261920929,
"logits/rejected": -1.0529296398162842,
"logps/chosen": -438.79998779296875,
"logps/rejected": -667.2000122070312,
"loss": 0.002,
"nll_loss": 0.907421886920929,
"rewards/accuracies": 1.0,
"rewards/chosen": 4.903124809265137,
"rewards/margins": 19.887500762939453,
"rewards/rejected": -14.981249809265137,
"step": 920
},
{
"epoch": 0.7104660045836516,
"grad_norm": 0.007254526001241224,
"learning_rate": 1.6129032258064515e-07,
"logits/chosen": -0.530517578125,
"logits/rejected": -0.891406238079071,
"logps/chosen": -515.0,
"logps/rejected": -594.7999877929688,
"loss": 0.0163,
"nll_loss": 0.7972656488418579,
"rewards/accuracies": 0.987500011920929,
"rewards/chosen": 5.315625190734863,
"rewards/margins": 20.762500762939453,
"rewards/rejected": -15.443750381469727,
"step": 930
},
{
"epoch": 0.7181054239877769,
"grad_norm": 0.6325222750613556,
"learning_rate": 1.570458404074703e-07,
"logits/chosen": -0.598095715045929,
"logits/rejected": -1.2218749523162842,
"logps/chosen": -448.20001220703125,
"logps/rejected": -602.2000122070312,
"loss": 0.0011,
"nll_loss": 0.846484363079071,
"rewards/accuracies": 1.0,
"rewards/chosen": 5.682812690734863,
"rewards/margins": 21.262500762939453,
"rewards/rejected": -15.550000190734863,
"step": 940
},
{
"epoch": 0.7257448433919023,
"grad_norm": 72.89567177903392,
"learning_rate": 1.528013582342954e-07,
"logits/chosen": -0.4874267578125,
"logits/rejected": -0.8919922113418579,
"logps/chosen": -536.4000244140625,
"logps/rejected": -637.7999877929688,
"loss": 0.0072,
"nll_loss": 0.93359375,
"rewards/accuracies": 1.0,
"rewards/chosen": 5.042187690734863,
"rewards/margins": 21.524999618530273,
"rewards/rejected": -16.462499618530273,
"step": 950
},
{
"epoch": 0.7333842627960275,
"grad_norm": 0.008456676433411622,
"learning_rate": 1.4855687606112052e-07,
"logits/chosen": -0.766406238079071,
"logits/rejected": -0.9742187261581421,
"logps/chosen": -489.79998779296875,
"logps/rejected": -652.5999755859375,
"loss": 0.0014,
"nll_loss": 0.858203113079071,
"rewards/accuracies": 1.0,
"rewards/chosen": 4.606249809265137,
"rewards/margins": 21.575000762939453,
"rewards/rejected": -16.96875,
"step": 960
},
{
"epoch": 0.7410236822001528,
"grad_norm": 0.007962632127454504,
"learning_rate": 1.4431239388794567e-07,
"logits/chosen": -0.66693115234375,
"logits/rejected": -1.0675780773162842,
"logps/chosen": -438.6000061035156,
"logps/rejected": -614.0,
"loss": 0.001,
"nll_loss": 0.880859375,
"rewards/accuracies": 1.0,
"rewards/chosen": 5.057812690734863,
"rewards/margins": 20.262500762939453,
"rewards/rejected": -15.206250190734863,
"step": 970
},
{
"epoch": 0.7486631016042781,
"grad_norm": 0.02441213499286302,
"learning_rate": 1.400679117147708e-07,
"logits/chosen": -0.687695324420929,
"logits/rejected": -1.129296898841858,
"logps/chosen": -447.0,
"logps/rejected": -597.0,
"loss": 0.0009,
"nll_loss": 0.920703113079071,
"rewards/accuracies": 1.0,
"rewards/chosen": 5.3046875,
"rewards/margins": 19.575000762939453,
"rewards/rejected": -14.293749809265137,
"step": 980
},
{
"epoch": 0.7563025210084033,
"grad_norm": 0.014320563932426218,
"learning_rate": 1.3582342954159592e-07,
"logits/chosen": -0.742382824420929,
"logits/rejected": -1.034570336341858,
"logps/chosen": -528.7999877929688,
"logps/rejected": -701.2000122070312,
"loss": 0.0108,
"nll_loss": 0.867968738079071,
"rewards/accuracies": 0.987500011920929,
"rewards/chosen": 5.854687690734863,
"rewards/margins": 22.125,
"rewards/rejected": -16.243749618530273,
"step": 990
},
{
"epoch": 0.7639419404125286,
"grad_norm": 0.008404334531270792,
"learning_rate": 1.3157894736842104e-07,
"logits/chosen": -0.5796874761581421,
"logits/rejected": -1.0949218273162842,
"logps/chosen": -478.79998779296875,
"logps/rejected": -641.2000122070312,
"loss": 0.0016,
"nll_loss": 0.8394531011581421,
"rewards/accuracies": 1.0,
"rewards/chosen": 6.021874904632568,
"rewards/margins": 22.325000762939453,
"rewards/rejected": -16.3125,
"step": 1000
},
{
"epoch": 0.771581359816654,
"grad_norm": 0.011247353406659095,
"learning_rate": 1.273344651952462e-07,
"logits/chosen": -0.642773449420929,
"logits/rejected": -1.0187499523162842,
"logps/chosen": -458.79998779296875,
"logps/rejected": -693.4000244140625,
"loss": 0.0009,
"nll_loss": 0.797656238079071,
"rewards/accuracies": 1.0,
"rewards/chosen": 5.579687595367432,
"rewards/margins": 22.825000762939453,
"rewards/rejected": -17.274999618530273,
"step": 1010
},
{
"epoch": 0.7792207792207793,
"grad_norm": 0.011990054903778509,
"learning_rate": 1.2308998302207129e-07,
"logits/chosen": -0.7708984613418579,
"logits/rejected": -1.10546875,
"logps/chosen": -528.2000122070312,
"logps/rejected": -659.2000122070312,
"loss": 0.011,
"nll_loss": 0.966796875,
"rewards/accuracies": 0.987500011920929,
"rewards/chosen": 5.373437404632568,
"rewards/margins": 20.850000381469727,
"rewards/rejected": -15.475000381469727,
"step": 1020
},
{
"epoch": 0.7868601986249045,
"grad_norm": 0.013096305405330763,
"learning_rate": 1.1884550084889642e-07,
"logits/chosen": -0.7157226800918579,
"logits/rejected": -0.9664062261581421,
"logps/chosen": -532.4000244140625,
"logps/rejected": -603.7999877929688,
"loss": 0.001,
"nll_loss": 0.969531238079071,
"rewards/accuracies": 1.0,
"rewards/chosen": 5.923437595367432,
"rewards/margins": 22.424999237060547,
"rewards/rejected": -16.493749618530273,
"step": 1030
},
{
"epoch": 0.7944996180290298,
"grad_norm": 0.008024100644779104,
"learning_rate": 1.1460101867572156e-07,
"logits/chosen": -0.8031250238418579,
"logits/rejected": -1.0529296398162842,
"logps/chosen": -563.4000244140625,
"logps/rejected": -657.4000244140625,
"loss": 0.0008,
"nll_loss": 0.835156261920929,
"rewards/accuracies": 1.0,
"rewards/chosen": 5.645312309265137,
"rewards/margins": 21.875,
"rewards/rejected": -16.237499237060547,
"step": 1040
},
{
"epoch": 0.8021390374331551,
"grad_norm": 0.013602335841313738,
"learning_rate": 1.1035653650254668e-07,
"logits/chosen": -0.711621105670929,
"logits/rejected": -1.1335937976837158,
"logps/chosen": -425.6000061035156,
"logps/rejected": -589.2000122070312,
"loss": 0.0011,
"nll_loss": 0.768750011920929,
"rewards/accuracies": 1.0,
"rewards/chosen": 5.151562690734863,
"rewards/margins": 21.924999237060547,
"rewards/rejected": -16.756250381469727,
"step": 1050
},
{
"epoch": 0.8097784568372803,
"grad_norm": 0.011969353381424996,
"learning_rate": 1.0611205432937182e-07,
"logits/chosen": -0.632275402545929,
"logits/rejected": -0.9330078363418579,
"logps/chosen": -544.2000122070312,
"logps/rejected": -695.5999755859375,
"loss": 0.0009,
"nll_loss": 0.856640636920929,
"rewards/accuracies": 1.0,
"rewards/chosen": 5.390625,
"rewards/margins": 22.424999237060547,
"rewards/rejected": -17.037500381469727,
"step": 1060
},
{
"epoch": 0.8174178762414056,
"grad_norm": 60.818967384865154,
"learning_rate": 1.0186757215619694e-07,
"logits/chosen": -0.6527343988418579,
"logits/rejected": -1.0265624523162842,
"logps/chosen": -494.6000061035156,
"logps/rejected": -675.2000122070312,
"loss": 0.0154,
"nll_loss": 0.754687488079071,
"rewards/accuracies": 0.987500011920929,
"rewards/chosen": 5.512499809265137,
"rewards/margins": 23.350000381469727,
"rewards/rejected": -17.831249237060547,
"step": 1070
},
{
"epoch": 0.825057295645531,
"grad_norm": 0.011992048240526737,
"learning_rate": 9.762308998302207e-08,
"logits/chosen": -0.7574218511581421,
"logits/rejected": -1.2898437976837158,
"logps/chosen": -418.79998779296875,
"logps/rejected": -582.0,
"loss": 0.0012,
"nll_loss": 0.856640636920929,
"rewards/accuracies": 1.0,
"rewards/chosen": 5.678906440734863,
"rewards/margins": 21.8125,
"rewards/rejected": -16.125,
"step": 1080
},
{
"epoch": 0.8326967150496563,
"grad_norm": 0.008473889793906824,
"learning_rate": 9.33786078098472e-08,
"logits/chosen": -0.682324230670929,
"logits/rejected": -1.134765625,
"logps/chosen": -449.0,
"logps/rejected": -582.7999877929688,
"loss": 0.0021,
"nll_loss": 0.912109375,
"rewards/accuracies": 1.0,
"rewards/chosen": 4.525000095367432,
"rewards/margins": 21.087499618530273,
"rewards/rejected": -16.543750762939453,
"step": 1090
},
{
"epoch": 0.8403361344537815,
"grad_norm": 4.444711961785813,
"learning_rate": 8.913412563667231e-08,
"logits/chosen": -0.8392578363418579,
"logits/rejected": -1.182031273841858,
"logps/chosen": -433.79998779296875,
"logps/rejected": -678.7999877929688,
"loss": 0.0013,
"nll_loss": 0.8675781488418579,
"rewards/accuracies": 1.0,
"rewards/chosen": 6.346875190734863,
"rewards/margins": 22.712499618530273,
"rewards/rejected": -16.387500762939453,
"step": 1100
},
{
"epoch": 0.8479755538579068,
"grad_norm": 0.00814358465753462,
"learning_rate": 8.488964346349745e-08,
"logits/chosen": -0.4921875,
"logits/rejected": -1.0751953125,
"logps/chosen": -438.20001220703125,
"logps/rejected": -626.0,
"loss": 0.0008,
"nll_loss": 0.7953125238418579,
"rewards/accuracies": 1.0,
"rewards/chosen": 6.388281345367432,
"rewards/margins": 21.575000762939453,
"rewards/rejected": -15.162500381469727,
"step": 1110
},
{
"epoch": 0.8556149732620321,
"grad_norm": 0.03463987081566878,
"learning_rate": 8.064516129032257e-08,
"logits/chosen": -0.847460925579071,
"logits/rejected": -1.1613280773162842,
"logps/chosen": -450.6000061035156,
"logps/rejected": -595.0,
"loss": 0.0036,
"nll_loss": 0.8121093511581421,
"rewards/accuracies": 1.0,
"rewards/chosen": 5.856249809265137,
"rewards/margins": 19.899999618530273,
"rewards/rejected": -14.037500381469727,
"step": 1120
},
{
"epoch": 0.8632543926661573,
"grad_norm": 0.007682054439671777,
"learning_rate": 7.64006791171477e-08,
"logits/chosen": -0.606127917766571,
"logits/rejected": -0.908007800579071,
"logps/chosen": -438.0,
"logps/rejected": -600.5999755859375,
"loss": 0.0008,
"nll_loss": 0.760937511920929,
"rewards/accuracies": 1.0,
"rewards/chosen": 6.009375095367432,
"rewards/margins": 21.5,
"rewards/rejected": -15.481249809265137,
"step": 1130
},
{
"epoch": 0.8708938120702827,
"grad_norm": 0.11056210671905395,
"learning_rate": 7.215619694397283e-08,
"logits/chosen": -0.5245116949081421,
"logits/rejected": -1.024023413658142,
"logps/chosen": -468.6000061035156,
"logps/rejected": -541.4000244140625,
"loss": 0.0086,
"nll_loss": 0.802734375,
"rewards/accuracies": 1.0,
"rewards/chosen": 5.4765625,
"rewards/margins": 20.725000381469727,
"rewards/rejected": -15.231249809265137,
"step": 1140
},
{
"epoch": 0.878533231474408,
"grad_norm": 0.011222005761275277,
"learning_rate": 6.791171477079796e-08,
"logits/chosen": -0.7525390386581421,
"logits/rejected": -0.9857422113418579,
"logps/chosen": -463.6000061035156,
"logps/rejected": -689.7999877929688,
"loss": 0.0009,
"nll_loss": 0.9242187738418579,
"rewards/accuracies": 1.0,
"rewards/chosen": 6.189062595367432,
"rewards/margins": 21.350000381469727,
"rewards/rejected": -15.15625,
"step": 1150
},
{
"epoch": 0.8861726508785333,
"grad_norm": 0.011114824226909615,
"learning_rate": 6.36672325976231e-08,
"logits/chosen": -0.73583984375,
"logits/rejected": -1.1023437976837158,
"logps/chosen": -468.6000061035156,
"logps/rejected": -638.0,
"loss": 0.0009,
"nll_loss": 0.862500011920929,
"rewards/accuracies": 1.0,
"rewards/chosen": 5.5078125,
"rewards/margins": 21.381250381469727,
"rewards/rejected": -15.856249809265137,
"step": 1160
},
{
"epoch": 0.8938120702826585,
"grad_norm": 0.2933378882242121,
"learning_rate": 5.942275042444821e-08,
"logits/chosen": -0.630859375,
"logits/rejected": -1.0300781726837158,
"logps/chosen": -426.79998779296875,
"logps/rejected": -549.5999755859375,
"loss": 0.0008,
"nll_loss": 0.800000011920929,
"rewards/accuracies": 1.0,
"rewards/chosen": 6.603125095367432,
"rewards/margins": 21.325000762939453,
"rewards/rejected": -14.731249809265137,
"step": 1170
},
{
"epoch": 0.9014514896867838,
"grad_norm": 0.019123808537423514,
"learning_rate": 5.517826825127334e-08,
"logits/chosen": -0.660082995891571,
"logits/rejected": -1.091406226158142,
"logps/chosen": -484.20001220703125,
"logps/rejected": -687.2000122070312,
"loss": 0.0158,
"nll_loss": 0.8843749761581421,
"rewards/accuracies": 0.987500011920929,
"rewards/chosen": 5.125781059265137,
"rewards/margins": 19.993749618530273,
"rewards/rejected": -14.868749618530273,
"step": 1180
},
{
"epoch": 0.9090909090909091,
"grad_norm": 0.007638493845027638,
"learning_rate": 5.093378607809847e-08,
"logits/chosen": -0.6869140863418579,
"logits/rejected": -1.1027343273162842,
"logps/chosen": -446.6000061035156,
"logps/rejected": -580.5999755859375,
"loss": 0.0011,
"nll_loss": 0.8980468511581421,
"rewards/accuracies": 1.0,
"rewards/chosen": 6.057812690734863,
"rewards/margins": 20.299999237060547,
"rewards/rejected": -14.21875,
"step": 1190
},
{
"epoch": 0.9167303284950343,
"grad_norm": 0.008513929557654372,
"learning_rate": 4.66893039049236e-08,
"logits/chosen": -0.590624988079071,
"logits/rejected": -0.91796875,
"logps/chosen": -457.3999938964844,
"logps/rejected": -603.0,
"loss": 0.0008,
"nll_loss": 0.7699218988418579,
"rewards/accuracies": 1.0,
"rewards/chosen": 6.3125,
"rewards/margins": 21.287500381469727,
"rewards/rejected": -14.987500190734863,
"step": 1200
},
{
"epoch": 0.9243697478991597,
"grad_norm": 0.5109154797836508,
"learning_rate": 4.2444821731748725e-08,
"logits/chosen": -0.728015124797821,
"logits/rejected": -1.0966796875,
"logps/chosen": -475.79998779296875,
"logps/rejected": -594.5999755859375,
"loss": 0.0009,
"nll_loss": 0.8374999761581421,
"rewards/accuracies": 1.0,
"rewards/chosen": 7.025000095367432,
"rewards/margins": 21.950000762939453,
"rewards/rejected": -14.925000190734863,
"step": 1210
},
{
"epoch": 0.932009167303285,
"grad_norm": 0.00821641172434573,
"learning_rate": 3.820033955857385e-08,
"logits/chosen": -0.6214843988418579,
"logits/rejected": -1.1667969226837158,
"logps/chosen": -622.2000122070312,
"logps/rejected": -649.0,
"loss": 0.0048,
"nll_loss": 0.9234374761581421,
"rewards/accuracies": 1.0,
"rewards/chosen": 6.256249904632568,
"rewards/margins": 19.975000381469727,
"rewards/rejected": -13.6875,
"step": 1220
},
{
"epoch": 0.9396485867074102,
"grad_norm": 0.019088814928989045,
"learning_rate": 3.395585738539898e-08,
"logits/chosen": -0.7289062738418579,
"logits/rejected": -1.05078125,
"logps/chosen": -475.20001220703125,
"logps/rejected": -590.2000122070312,
"loss": 0.0028,
"nll_loss": 0.856249988079071,
"rewards/accuracies": 1.0,
"rewards/chosen": 6.353125095367432,
"rewards/margins": 20.362499237060547,
"rewards/rejected": -14.006250381469727,
"step": 1230
},
{
"epoch": 0.9472880061115355,
"grad_norm": 0.04074144502311008,
"learning_rate": 2.9711375212224106e-08,
"logits/chosen": -0.9458984136581421,
"logits/rejected": -1.208593726158142,
"logps/chosen": -498.20001220703125,
"logps/rejected": -555.0,
"loss": 0.0013,
"nll_loss": 0.936718761920929,
"rewards/accuracies": 1.0,
"rewards/chosen": 5.618750095367432,
"rewards/margins": 19.981250762939453,
"rewards/rejected": -14.387499809265137,
"step": 1240
},
{
"epoch": 0.9549274255156608,
"grad_norm": 0.013648349371693833,
"learning_rate": 2.5466893039049236e-08,
"logits/chosen": -0.591796875,
"logits/rejected": -0.834765613079071,
"logps/chosen": -443.3999938964844,
"logps/rejected": -564.7999877929688,
"loss": 0.0009,
"nll_loss": 0.8203125,
"rewards/accuracies": 1.0,
"rewards/chosen": 6.153124809265137,
"rewards/margins": 20.524999618530273,
"rewards/rejected": -14.368749618530273,
"step": 1250
},
{
"epoch": 0.9625668449197861,
"grad_norm": 0.027600337620728884,
"learning_rate": 2.1222410865874363e-08,
"logits/chosen": -0.802734375,
"logits/rejected": -1.180078148841858,
"logps/chosen": -452.6000061035156,
"logps/rejected": -605.2000122070312,
"loss": 0.0009,
"nll_loss": 0.866015613079071,
"rewards/accuracies": 1.0,
"rewards/chosen": 6.921875,
"rewards/margins": 21.4375,
"rewards/rejected": -14.493749618530273,
"step": 1260
},
{
"epoch": 0.9702062643239114,
"grad_norm": 0.00753937380269379,
"learning_rate": 1.697792869269949e-08,
"logits/chosen": -0.6973632574081421,
"logits/rejected": -1.0812499523162842,
"logps/chosen": -451.20001220703125,
"logps/rejected": -592.2000122070312,
"loss": 0.0028,
"nll_loss": 0.729296863079071,
"rewards/accuracies": 1.0,
"rewards/chosen": 7.060937404632568,
"rewards/margins": 22.299999237060547,
"rewards/rejected": -15.225000381469727,
"step": 1270
},
{
"epoch": 0.9778456837280367,
"grad_norm": 0.009276587437573345,
"learning_rate": 1.2733446519524618e-08,
"logits/chosen": -0.73193359375,
"logits/rejected": -0.993847668170929,
"logps/chosen": -461.6000061035156,
"logps/rejected": -583.0,
"loss": 0.0009,
"nll_loss": 0.8374999761581421,
"rewards/accuracies": 1.0,
"rewards/chosen": 6.074999809265137,
"rewards/margins": 20.087499618530273,
"rewards/rejected": -14.024999618530273,
"step": 1280
},
{
"epoch": 0.985485103132162,
"grad_norm": 0.009811636634928695,
"learning_rate": 8.488964346349745e-09,
"logits/chosen": -0.7230468988418579,
"logits/rejected": -1.204687476158142,
"logps/chosen": -441.79998779296875,
"logps/rejected": -617.0,
"loss": 0.0019,
"nll_loss": 0.755859375,
"rewards/accuracies": 1.0,
"rewards/chosen": 7.2421875,
"rewards/margins": 22.043750762939453,
"rewards/rejected": -14.800000190734863,
"step": 1290
},
{
"epoch": 0.9931245225362872,
"grad_norm": 0.016745625389395048,
"learning_rate": 4.244482173174872e-09,
"logits/chosen": -0.4999023377895355,
"logits/rejected": -0.810351550579071,
"logps/chosen": -444.79998779296875,
"logps/rejected": -611.5999755859375,
"loss": 0.0009,
"nll_loss": 0.786328136920929,
"rewards/accuracies": 1.0,
"rewards/chosen": 6.178124904632568,
"rewards/margins": 21.649999618530273,
"rewards/rejected": -15.481249809265137,
"step": 1300
},
{
"epoch": 1.0,
"eval_logits/chosen": -0.5260667204856873,
"eval_logits/rejected": -0.9918870329856873,
"eval_logps/chosen": -428.30767822265625,
"eval_logps/rejected": -523.2307739257812,
"eval_loss": 0.0017267990624532104,
"eval_nll_loss": 0.8383413553237915,
"eval_rewards/accuracies": 1.0,
"eval_rewards/chosen": 7.533653736114502,
"eval_rewards/margins": 22.846153259277344,
"eval_rewards/rejected": -15.331730842590332,
"eval_runtime": 16.743,
"eval_samples_per_second": 5.973,
"eval_steps_per_second": 0.776,
"step": 1309
},
{
"epoch": 1.0,
"step": 1309,
"total_flos": 0.0,
"train_loss": 0.03593526968435079,
"train_runtime": 4678.8034,
"train_samples_per_second": 2.237,
"train_steps_per_second": 0.28
}
],
"logging_steps": 10,
"max_steps": 1309,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}