| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 1.0, |
| "eval_steps": 500, |
| "global_step": 1309, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0007639419404125286, |
| "grad_norm": 170.45702027292876, |
| "learning_rate": 0.0, |
| "logits/chosen": -0.3203125, |
| "logits/rejected": -0.380859375, |
| "logps/chosen": -480.0, |
| "logps/rejected": -448.0, |
| "loss": 0.6914, |
| "nll_loss": 1.015625, |
| "rewards/accuracies": 0.0, |
| "rewards/chosen": 0.0, |
| "rewards/margins": 0.0, |
| "rewards/rejected": 0.0, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.007639419404125287, |
| "grad_norm": 178.20144137294952, |
| "learning_rate": 3.435114503816794e-08, |
| "logits/chosen": -0.1680501252412796, |
| "logits/rejected": -0.4995659589767456, |
| "logps/chosen": -491.5555419921875, |
| "logps/rejected": -436.4444580078125, |
| "loss": 0.7147, |
| "nll_loss": 0.9696180820465088, |
| "rewards/accuracies": 0.1527777761220932, |
| "rewards/chosen": -0.02501763217151165, |
| "rewards/margins": -0.0271742083132267, |
| "rewards/rejected": 0.0020887586288154125, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.015278838808250574, |
| "grad_norm": 180.13025532672927, |
| "learning_rate": 7.251908396946564e-08, |
| "logits/chosen": -0.3433593809604645, |
| "logits/rejected": -0.49980467557907104, |
| "logps/chosen": -485.0, |
| "logps/rejected": -437.3999938964844, |
| "loss": 0.6991, |
| "nll_loss": 0.95703125, |
| "rewards/accuracies": 0.32499998807907104, |
| "rewards/chosen": 0.0037719726096838713, |
| "rewards/margins": 0.012493896298110485, |
| "rewards/rejected": -0.00875244103372097, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.02291825821237586, |
| "grad_norm": 169.32677311123774, |
| "learning_rate": 1.1068702290076336e-07, |
| "logits/chosen": -0.31000977754592896, |
| "logits/rejected": -0.4932617247104645, |
| "logps/chosen": -589.7999877929688, |
| "logps/rejected": -451.20001220703125, |
| "loss": 0.6083, |
| "nll_loss": 0.965624988079071, |
| "rewards/accuracies": 0.7124999761581421, |
| "rewards/chosen": -0.05126953125, |
| "rewards/margins": 0.21274414658546448, |
| "rewards/rejected": -0.26396483182907104, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.030557677616501147, |
| "grad_norm": 113.32771627574577, |
| "learning_rate": 1.4885496183206107e-07, |
| "logits/chosen": -0.33909910917282104, |
| "logits/rejected": -0.608105480670929, |
| "logps/chosen": -462.3999938964844, |
| "logps/rejected": -450.20001220703125, |
| "loss": 0.4859, |
| "nll_loss": 1.0949218273162842, |
| "rewards/accuracies": 0.887499988079071, |
| "rewards/chosen": -0.30421751737594604, |
| "rewards/margins": 0.539471447467804, |
| "rewards/rejected": -0.8431640863418579, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.03819709702062643, |
| "grad_norm": 84.47866594960938, |
| "learning_rate": 1.8702290076335877e-07, |
| "logits/chosen": -0.13973388075828552, |
| "logits/rejected": -0.29816895723342896, |
| "logps/chosen": -527.2000122070312, |
| "logps/rejected": -477.20001220703125, |
| "loss": 0.342, |
| "nll_loss": 0.8960937261581421, |
| "rewards/accuracies": 0.9624999761581421, |
| "rewards/chosen": -0.676953136920929, |
| "rewards/margins": 1.025781273841858, |
| "rewards/rejected": -1.7023437023162842, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.04583651642475172, |
| "grad_norm": 107.60021345550501, |
| "learning_rate": 2.2519083969465648e-07, |
| "logits/chosen": -0.22696533799171448, |
| "logits/rejected": -0.53271484375, |
| "logps/chosen": -635.0, |
| "logps/rejected": -527.5999755859375, |
| "loss": 0.254, |
| "nll_loss": 1.017968773841858, |
| "rewards/accuracies": 0.925000011920929, |
| "rewards/chosen": -0.8374999761581421, |
| "rewards/margins": 1.696874976158142, |
| "rewards/rejected": -2.53515625, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.053475935828877004, |
| "grad_norm": 28.927358056320834, |
| "learning_rate": 2.633587786259542e-07, |
| "logits/chosen": -0.37421876192092896, |
| "logits/rejected": -0.861621081829071, |
| "logps/chosen": -514.0, |
| "logps/rejected": -477.0, |
| "loss": 0.1214, |
| "nll_loss": 1.064453125, |
| "rewards/accuracies": 0.9750000238418579, |
| "rewards/chosen": -1.0789062976837158, |
| "rewards/margins": 2.9828124046325684, |
| "rewards/rejected": -4.064062595367432, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.061115355233002294, |
| "grad_norm": 49.8426089072564, |
| "learning_rate": 3.015267175572519e-07, |
| "logits/chosen": -0.3656372129917145, |
| "logits/rejected": -0.7126709222793579, |
| "logps/chosen": -464.6000061035156, |
| "logps/rejected": -484.3999938964844, |
| "loss": 0.096, |
| "nll_loss": 0.94921875, |
| "rewards/accuracies": 0.987500011920929, |
| "rewards/chosen": -2.1742186546325684, |
| "rewards/margins": 3.567187547683716, |
| "rewards/rejected": -5.737500190734863, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.06875477463712758, |
| "grad_norm": 5.605991342526607, |
| "learning_rate": 3.396946564885496e-07, |
| "logits/chosen": -0.3612304627895355, |
| "logits/rejected": -0.696093738079071, |
| "logps/chosen": -688.7999877929688, |
| "logps/rejected": -585.0, |
| "loss": 0.0882, |
| "nll_loss": 1.0398437976837158, |
| "rewards/accuracies": 0.9375, |
| "rewards/chosen": -2.598437547683716, |
| "rewards/margins": 4.298437595367432, |
| "rewards/rejected": -6.896874904632568, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.07639419404125286, |
| "grad_norm": 28.200619850609755, |
| "learning_rate": 3.7786259541984735e-07, |
| "logits/chosen": -0.7310546636581421, |
| "logits/rejected": -0.8763672113418579, |
| "logps/chosen": -570.4000244140625, |
| "logps/rejected": -548.7999877929688, |
| "loss": 0.0508, |
| "nll_loss": 1.110937476158142, |
| "rewards/accuracies": 0.987500011920929, |
| "rewards/chosen": -3.4749999046325684, |
| "rewards/margins": 5.371874809265137, |
| "rewards/rejected": -8.850000381469727, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.08403361344537816, |
| "grad_norm": 6.3873299119979645, |
| "learning_rate": 4.1603053435114506e-07, |
| "logits/chosen": -0.636962890625, |
| "logits/rejected": -0.724414050579071, |
| "logps/chosen": -592.7999877929688, |
| "logps/rejected": -585.5999755859375, |
| "loss": 0.0221, |
| "nll_loss": 0.946093738079071, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -4.209374904632568, |
| "rewards/margins": 6.618750095367432, |
| "rewards/rejected": -10.824999809265137, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.09167303284950344, |
| "grad_norm": 31.832402884669897, |
| "learning_rate": 4.541984732824427e-07, |
| "logits/chosen": -0.7198730707168579, |
| "logits/rejected": -0.973437488079071, |
| "logps/chosen": -609.5999755859375, |
| "logps/rejected": -571.7999877929688, |
| "loss": 0.0651, |
| "nll_loss": 1.049218773841858, |
| "rewards/accuracies": 0.9624999761581421, |
| "rewards/chosen": -5.90625, |
| "rewards/margins": 7.715624809265137, |
| "rewards/rejected": -13.612500190734863, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.09931245225362872, |
| "grad_norm": 3.881434643874199, |
| "learning_rate": 4.923664122137405e-07, |
| "logits/chosen": -0.7142578363418579, |
| "logits/rejected": -0.9664062261581421, |
| "logps/chosen": -641.0, |
| "logps/rejected": -595.5999755859375, |
| "loss": 0.0132, |
| "nll_loss": 1.142968773841858, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -7.037499904632568, |
| "rewards/margins": 8.584375381469727, |
| "rewards/rejected": -15.618749618530273, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.10695187165775401, |
| "grad_norm": 8.414178904055158, |
| "learning_rate": 4.966044142614601e-07, |
| "logits/chosen": -1.041015625, |
| "logits/rejected": -1.1277344226837158, |
| "logps/chosen": -511.0, |
| "logps/rejected": -616.7999877929688, |
| "loss": 0.0167, |
| "nll_loss": 1.2097656726837158, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -6.787499904632568, |
| "rewards/margins": 9.756250381469727, |
| "rewards/rejected": -16.53125, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.11459129106187929, |
| "grad_norm": 0.032452792842087574, |
| "learning_rate": 4.923599320882851e-07, |
| "logits/chosen": -0.950976550579071, |
| "logits/rejected": -1.0867187976837158, |
| "logps/chosen": -611.5999755859375, |
| "logps/rejected": -621.5999755859375, |
| "loss": 0.0094, |
| "nll_loss": 1.13671875, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -6.931250095367432, |
| "rewards/margins": 10.837499618530273, |
| "rewards/rejected": -17.768749237060547, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.12223071046600459, |
| "grad_norm": 2.0648500557312355, |
| "learning_rate": 4.881154499151103e-07, |
| "logits/chosen": -0.81689453125, |
| "logits/rejected": -1.2109375, |
| "logps/chosen": -565.4000244140625, |
| "logps/rejected": -601.2000122070312, |
| "loss": 0.0546, |
| "nll_loss": 1.2062499523162842, |
| "rewards/accuracies": 0.9750000238418579, |
| "rewards/chosen": -6.356249809265137, |
| "rewards/margins": 10.96875, |
| "rewards/rejected": -17.34375, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.12987012987012986, |
| "grad_norm": 156.0698805984043, |
| "learning_rate": 4.838709677419355e-07, |
| "logits/chosen": -0.837597668170929, |
| "logits/rejected": -1.15234375, |
| "logps/chosen": -627.2000122070312, |
| "logps/rejected": -588.4000244140625, |
| "loss": 0.0343, |
| "nll_loss": 1.1160156726837158, |
| "rewards/accuracies": 0.987500011920929, |
| "rewards/chosen": -7.525000095367432, |
| "rewards/margins": 10.606249809265137, |
| "rewards/rejected": -18.137500762939453, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.13750954927425516, |
| "grad_norm": 5.873948116015113, |
| "learning_rate": 4.796264855687606e-07, |
| "logits/chosen": -0.7525390386581421, |
| "logits/rejected": -1.002343773841858, |
| "logps/chosen": -559.4000244140625, |
| "logps/rejected": -559.0, |
| "loss": 0.1111, |
| "nll_loss": 1.126562476158142, |
| "rewards/accuracies": 0.9750000238418579, |
| "rewards/chosen": -5.265625, |
| "rewards/margins": 10.53125, |
| "rewards/rejected": -15.774999618530273, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.14514896867838045, |
| "grad_norm": 3.7990208299387955, |
| "learning_rate": 4.753820033955857e-07, |
| "logits/chosen": -0.87109375, |
| "logits/rejected": -1.031640648841858, |
| "logps/chosen": -582.7999877929688, |
| "logps/rejected": -604.2000122070312, |
| "loss": 0.0453, |
| "nll_loss": 1.1710937023162842, |
| "rewards/accuracies": 0.9750000238418579, |
| "rewards/chosen": -4.251562595367432, |
| "rewards/margins": 11.637499809265137, |
| "rewards/rejected": -15.887499809265137, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.15278838808250572, |
| "grad_norm": 0.017438679531302244, |
| "learning_rate": 4.7113752122241087e-07, |
| "logits/chosen": -0.7158203125, |
| "logits/rejected": -0.7515624761581421, |
| "logps/chosen": -643.0, |
| "logps/rejected": -667.0, |
| "loss": 0.0052, |
| "nll_loss": 0.990234375, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -4.699999809265137, |
| "rewards/margins": 12.118749618530273, |
| "rewards/rejected": -16.825000762939453, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.16042780748663102, |
| "grad_norm": 0.9343571136429083, |
| "learning_rate": 4.66893039049236e-07, |
| "logits/chosen": -0.68115234375, |
| "logits/rejected": -0.9722656011581421, |
| "logps/chosen": -562.4000244140625, |
| "logps/rejected": -658.2000122070312, |
| "loss": 0.0294, |
| "nll_loss": 1.154296875, |
| "rewards/accuracies": 0.987500011920929, |
| "rewards/chosen": -5.481249809265137, |
| "rewards/margins": 12.212499618530273, |
| "rewards/rejected": -17.681249618530273, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.16806722689075632, |
| "grad_norm": 13.184210820946406, |
| "learning_rate": 4.6264855687606106e-07, |
| "logits/chosen": -0.8500000238418579, |
| "logits/rejected": -0.9549804925918579, |
| "logps/chosen": -510.79998779296875, |
| "logps/rejected": -650.2000122070312, |
| "loss": 0.0139, |
| "nll_loss": 1.064062476158142, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -6.331250190734863, |
| "rewards/margins": 11.449999809265137, |
| "rewards/rejected": -17.793750762939453, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.17570664629488159, |
| "grad_norm": 0.1965917564106709, |
| "learning_rate": 4.5840407470288624e-07, |
| "logits/chosen": -0.682421863079071, |
| "logits/rejected": -0.934374988079071, |
| "logps/chosen": -543.7999877929688, |
| "logps/rejected": -630.0, |
| "loss": 0.0022, |
| "nll_loss": 1.0968749523162842, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -5.925000190734863, |
| "rewards/margins": 12.75, |
| "rewards/rejected": -18.662500381469727, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.18334606569900688, |
| "grad_norm": 0.07593077705850547, |
| "learning_rate": 4.5415959252971136e-07, |
| "logits/chosen": -0.6851562261581421, |
| "logits/rejected": -1.002343773841858, |
| "logps/chosen": -518.4000244140625, |
| "logps/rejected": -575.0, |
| "loss": 0.0236, |
| "nll_loss": 1.168359398841858, |
| "rewards/accuracies": 0.987500011920929, |
| "rewards/chosen": -6.784375190734863, |
| "rewards/margins": 12.274999618530273, |
| "rewards/rejected": -19.0625, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.19098548510313215, |
| "grad_norm": 12.098498040730545, |
| "learning_rate": 4.499151103565365e-07, |
| "logits/chosen": -0.5663086175918579, |
| "logits/rejected": -0.6595703363418579, |
| "logps/chosen": -686.4000244140625, |
| "logps/rejected": -734.0, |
| "loss": 0.0089, |
| "nll_loss": 1.1203124523162842, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -7.21875, |
| "rewards/margins": 11.8125, |
| "rewards/rejected": -19.037500381469727, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.19862490450725745, |
| "grad_norm": 0.06575954655043179, |
| "learning_rate": 4.456706281833616e-07, |
| "logits/chosen": -0.6996093988418579, |
| "logits/rejected": -1.0099608898162842, |
| "logps/chosen": -555.2000122070312, |
| "logps/rejected": -585.5999755859375, |
| "loss": 0.0365, |
| "nll_loss": 1.1339843273162842, |
| "rewards/accuracies": 0.987500011920929, |
| "rewards/chosen": -6.209374904632568, |
| "rewards/margins": 13.375, |
| "rewards/rejected": -19.575000762939453, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.20626432391138275, |
| "grad_norm": 0.09027759120640111, |
| "learning_rate": 4.4142614601018673e-07, |
| "logits/chosen": -0.6943359375, |
| "logits/rejected": -1.0158202648162842, |
| "logps/chosen": -602.7999877929688, |
| "logps/rejected": -640.0, |
| "loss": 0.0016, |
| "nll_loss": 1.111718773841858, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -5.479687690734863, |
| "rewards/margins": 14.71875, |
| "rewards/rejected": -20.225000381469727, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.21390374331550802, |
| "grad_norm": 0.17134559287010234, |
| "learning_rate": 4.3718166383701186e-07, |
| "logits/chosen": -0.658984363079071, |
| "logits/rejected": -1.2468750476837158, |
| "logps/chosen": -639.2000122070312, |
| "logps/rejected": -695.2000122070312, |
| "loss": 0.0025, |
| "nll_loss": 1.1902344226837158, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -6.2578125, |
| "rewards/margins": 13.84375, |
| "rewards/rejected": -20.112499237060547, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.2215431627196333, |
| "grad_norm": 0.1098174270847581, |
| "learning_rate": 4.32937181663837e-07, |
| "logits/chosen": -0.48955076932907104, |
| "logits/rejected": -0.794726550579071, |
| "logps/chosen": -671.4000244140625, |
| "logps/rejected": -693.5999755859375, |
| "loss": 0.0079, |
| "nll_loss": 1.026953101158142, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -6.5078125, |
| "rewards/margins": 14.581250190734863, |
| "rewards/rejected": -21.075000762939453, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.22918258212375858, |
| "grad_norm": 0.28994386032232317, |
| "learning_rate": 4.286926994906621e-07, |
| "logits/chosen": -0.778124988079071, |
| "logits/rejected": -1.2712891101837158, |
| "logps/chosen": -580.2000122070312, |
| "logps/rejected": -701.5999755859375, |
| "loss": 0.0178, |
| "nll_loss": 1.097265601158142, |
| "rewards/accuracies": 0.987500011920929, |
| "rewards/chosen": -5.809374809265137, |
| "rewards/margins": 16.912500381469727, |
| "rewards/rejected": -22.75, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.23682200152788388, |
| "grad_norm": 2.0198779669945854, |
| "learning_rate": 4.244482173174873e-07, |
| "logits/chosen": -0.6753906011581421, |
| "logits/rejected": -0.8785156011581421, |
| "logps/chosen": -503.3999938964844, |
| "logps/rejected": -644.4000244140625, |
| "loss": 0.0178, |
| "nll_loss": 1.0109374523162842, |
| "rewards/accuracies": 0.987500011920929, |
| "rewards/chosen": -5.831250190734863, |
| "rewards/margins": 15.168749809265137, |
| "rewards/rejected": -21.0, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.24446142093200918, |
| "grad_norm": 4.494678945589362, |
| "learning_rate": 4.202037351443124e-07, |
| "logits/chosen": -0.7886718511581421, |
| "logits/rejected": -1.163671851158142, |
| "logps/chosen": -533.5999755859375, |
| "logps/rejected": -610.2000122070312, |
| "loss": 0.0019, |
| "nll_loss": 1.0773437023162842, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -4.510937690734863, |
| "rewards/margins": 16.59375, |
| "rewards/rejected": -21.125, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.25210084033613445, |
| "grad_norm": 138.82132160510554, |
| "learning_rate": 4.159592529711375e-07, |
| "logits/chosen": -0.6773437261581421, |
| "logits/rejected": -1.036523461341858, |
| "logps/chosen": -586.4000244140625, |
| "logps/rejected": -664.7999877929688, |
| "loss": 0.031, |
| "nll_loss": 1.092187523841858, |
| "rewards/accuracies": 0.987500011920929, |
| "rewards/chosen": -4.827343940734863, |
| "rewards/margins": 16.943750381469727, |
| "rewards/rejected": -21.762500762939453, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.2597402597402597, |
| "grad_norm": 0.025227316427022712, |
| "learning_rate": 4.1171477079796265e-07, |
| "logits/chosen": -0.734814465045929, |
| "logits/rejected": -0.9996093511581421, |
| "logps/chosen": -568.4000244140625, |
| "logps/rejected": -720.7999877929688, |
| "loss": 0.0084, |
| "nll_loss": 1.066015601158142, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -4.956250190734863, |
| "rewards/margins": 18.075000762939453, |
| "rewards/rejected": -23.037500381469727, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.26737967914438504, |
| "grad_norm": 0.01517317320009414, |
| "learning_rate": 4.074702886247878e-07, |
| "logits/chosen": -0.7974609136581421, |
| "logits/rejected": -1.169531226158142, |
| "logps/chosen": -553.7999877929688, |
| "logps/rejected": -682.4000244140625, |
| "loss": 0.0211, |
| "nll_loss": 1.1316406726837158, |
| "rewards/accuracies": 0.987500011920929, |
| "rewards/chosen": -5.162499904632568, |
| "rewards/margins": 16.975000381469727, |
| "rewards/rejected": -22.162500381469727, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.2750190985485103, |
| "grad_norm": 0.041343364887589294, |
| "learning_rate": 4.0322580645161285e-07, |
| "logits/chosen": -0.662109375, |
| "logits/rejected": -0.91796875, |
| "logps/chosen": -620.4000244140625, |
| "logps/rejected": -662.4000244140625, |
| "loss": 0.0023, |
| "nll_loss": 1.0871093273162842, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -3.9710936546325684, |
| "rewards/margins": 16.931249618530273, |
| "rewards/rejected": -20.924999237060547, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.2826585179526356, |
| "grad_norm": 0.11405518122478978, |
| "learning_rate": 3.98981324278438e-07, |
| "logits/chosen": -0.982617199420929, |
| "logits/rejected": -1.167089819908142, |
| "logps/chosen": -529.5999755859375, |
| "logps/rejected": -669.5999755859375, |
| "loss": 0.0031, |
| "nll_loss": 1.079687476158142, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -2.2236328125, |
| "rewards/margins": 17.143749237060547, |
| "rewards/rejected": -19.350000381469727, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.2902979373567609, |
| "grad_norm": 0.04601729303454066, |
| "learning_rate": 3.9473684210526315e-07, |
| "logits/chosen": -0.7412109375, |
| "logits/rejected": -1.019921898841858, |
| "logps/chosen": -605.2000122070312, |
| "logps/rejected": -632.0, |
| "loss": 0.0223, |
| "nll_loss": 1.0417969226837158, |
| "rewards/accuracies": 0.987500011920929, |
| "rewards/chosen": -2.555468797683716, |
| "rewards/margins": 14.84375, |
| "rewards/rejected": -17.387500762939453, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.2979373567608862, |
| "grad_norm": 1.1114016283135653, |
| "learning_rate": 3.9049235993208827e-07, |
| "logits/chosen": -0.7603515386581421, |
| "logits/rejected": -1.0732421875, |
| "logps/chosen": -573.4000244140625, |
| "logps/rejected": -604.4000244140625, |
| "loss": 0.0016, |
| "nll_loss": 1.0964844226837158, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.951171875, |
| "rewards/margins": 16.706249237060547, |
| "rewards/rejected": -17.637500762939453, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.30557677616501144, |
| "grad_norm": 0.012006618077367264, |
| "learning_rate": 3.862478777589134e-07, |
| "logits/chosen": -0.7928711175918579, |
| "logits/rejected": -1.187109351158142, |
| "logps/chosen": -566.0, |
| "logps/rejected": -637.5999755859375, |
| "loss": 0.0013, |
| "nll_loss": 1.038671851158142, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.5223633050918579, |
| "rewards/margins": 18.431249618530273, |
| "rewards/rejected": -18.956249237060547, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.31321619556913677, |
| "grad_norm": 0.02301493074282767, |
| "learning_rate": 3.820033955857385e-07, |
| "logits/chosen": -0.776562511920929, |
| "logits/rejected": -1.072265625, |
| "logps/chosen": -542.2000122070312, |
| "logps/rejected": -660.7999877929688, |
| "loss": 0.0011, |
| "nll_loss": 0.9789062738418579, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -1.2033202648162842, |
| "rewards/margins": 16.987499237060547, |
| "rewards/rejected": -18.1875, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.32085561497326204, |
| "grad_norm": 0.019580609038995344, |
| "learning_rate": 3.7775891341256364e-07, |
| "logits/chosen": -0.573535144329071, |
| "logits/rejected": -1.023046851158142, |
| "logps/chosen": -610.7999877929688, |
| "logps/rejected": -669.2000122070312, |
| "loss": 0.0016, |
| "nll_loss": 1.0207030773162842, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.8062499761581421, |
| "rewards/margins": 17.950000762939453, |
| "rewards/rejected": -18.768749237060547, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.3284950343773873, |
| "grad_norm": 0.04377587946128281, |
| "learning_rate": 3.735144312393888e-07, |
| "logits/chosen": -0.741406261920929, |
| "logits/rejected": -1.0378906726837158, |
| "logps/chosen": -615.7999877929688, |
| "logps/rejected": -727.4000244140625, |
| "loss": 0.0011, |
| "nll_loss": 0.981249988079071, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.4393310546875, |
| "rewards/margins": 18.5, |
| "rewards/rejected": -18.918750762939453, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.33613445378151263, |
| "grad_norm": 0.03635815986206019, |
| "learning_rate": 3.692699490662139e-07, |
| "logits/chosen": -0.5271972417831421, |
| "logits/rejected": -0.888427734375, |
| "logps/chosen": -523.2000122070312, |
| "logps/rejected": -679.5999755859375, |
| "loss": 0.0012, |
| "nll_loss": 0.9750000238418579, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.815673828125, |
| "rewards/margins": 18.475000381469727, |
| "rewards/rejected": -19.287500381469727, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.3437738731856379, |
| "grad_norm": 1.1649563263655982, |
| "learning_rate": 3.65025466893039e-07, |
| "logits/chosen": -0.6383301019668579, |
| "logits/rejected": -1.141015648841858, |
| "logps/chosen": -571.5999755859375, |
| "logps/rejected": -628.0, |
| "loss": 0.0375, |
| "nll_loss": 1.175390601158142, |
| "rewards/accuracies": 0.9750000238418579, |
| "rewards/chosen": -0.02695312537252903, |
| "rewards/margins": 15.956250190734863, |
| "rewards/rejected": -15.975000381469727, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.35141329258976317, |
| "grad_norm": 2.473516604965399, |
| "learning_rate": 3.607809847198642e-07, |
| "logits/chosen": -0.904296875, |
| "logits/rejected": -1.099609375, |
| "logps/chosen": -502.0, |
| "logps/rejected": -662.7999877929688, |
| "loss": 0.0022, |
| "nll_loss": 0.987109363079071, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 1.136083960533142, |
| "rewards/margins": 18.356250762939453, |
| "rewards/rejected": -17.225000381469727, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.35905271199388844, |
| "grad_norm": 0.019754385722868507, |
| "learning_rate": 3.5653650254668926e-07, |
| "logits/chosen": -0.616748034954071, |
| "logits/rejected": -0.873828113079071, |
| "logps/chosen": -512.7999877929688, |
| "logps/rejected": -639.4000244140625, |
| "loss": 0.001, |
| "nll_loss": 1.00390625, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 1.5836913585662842, |
| "rewards/margins": 18.700000762939453, |
| "rewards/rejected": -17.118749618530273, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.36669213139801377, |
| "grad_norm": 0.012995476138882066, |
| "learning_rate": 3.5229202037351443e-07, |
| "logits/chosen": -0.879833996295929, |
| "logits/rejected": -1.171484351158142, |
| "logps/chosen": -481.20001220703125, |
| "logps/rejected": -624.0, |
| "loss": 0.0013, |
| "nll_loss": 0.9457031488418579, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.20126953721046448, |
| "rewards/margins": 17.8125, |
| "rewards/rejected": -17.600000381469727, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.37433155080213903, |
| "grad_norm": 0.0300203308477904, |
| "learning_rate": 3.4804753820033956e-07, |
| "logits/chosen": -0.627734363079071, |
| "logits/rejected": -1.034765601158142, |
| "logps/chosen": -575.7999877929688, |
| "logps/rejected": -640.0, |
| "loss": 0.002, |
| "nll_loss": 1.0011718273162842, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.12880858778953552, |
| "rewards/margins": 18.268749237060547, |
| "rewards/rejected": -18.125, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.3819709702062643, |
| "grad_norm": 0.011247181105488207, |
| "learning_rate": 3.438030560271647e-07, |
| "logits/chosen": -0.7381836175918579, |
| "logits/rejected": -0.9857422113418579, |
| "logps/chosen": -526.7999877929688, |
| "logps/rejected": -615.5999755859375, |
| "loss": 0.0012, |
| "nll_loss": 1.021484375, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.921679675579071, |
| "rewards/margins": 18.206249237060547, |
| "rewards/rejected": -17.293750762939453, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.38961038961038963, |
| "grad_norm": 0.013542548861045974, |
| "learning_rate": 3.395585738539898e-07, |
| "logits/chosen": -0.645312488079071, |
| "logits/rejected": -0.8863281011581421, |
| "logps/chosen": -615.0, |
| "logps/rejected": -656.4000244140625, |
| "loss": 0.0014, |
| "nll_loss": 0.9214843511581421, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.16367188096046448, |
| "rewards/margins": 18.606250762939453, |
| "rewards/rejected": -18.46875, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.3972498090145149, |
| "grad_norm": 0.023268796810063094, |
| "learning_rate": 3.3531409168081493e-07, |
| "logits/chosen": -0.6841796636581421, |
| "logits/rejected": -0.912109375, |
| "logps/chosen": -529.2000122070312, |
| "logps/rejected": -675.5999755859375, |
| "loss": 0.0056, |
| "nll_loss": 0.888671875, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 1.271484375, |
| "rewards/margins": 18.581249237060547, |
| "rewards/rejected": -17.331249237060547, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.40488922841864017, |
| "grad_norm": 0.008467585302493998, |
| "learning_rate": 3.3106960950764005e-07, |
| "logits/chosen": -0.848437488079071, |
| "logits/rejected": -1.3093750476837158, |
| "logps/chosen": -609.2000122070312, |
| "logps/rejected": -703.5999755859375, |
| "loss": 0.0028, |
| "nll_loss": 1.0167968273162842, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.343554735183716, |
| "rewards/margins": 20.337499618530273, |
| "rewards/rejected": -17.981250762939453, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.4125286478227655, |
| "grad_norm": 3.5149140081773647, |
| "learning_rate": 3.268251273344652e-07, |
| "logits/chosen": -0.753710925579071, |
| "logits/rejected": -1.161718726158142, |
| "logps/chosen": -523.4000244140625, |
| "logps/rejected": -566.5999755859375, |
| "loss": 0.0212, |
| "nll_loss": 1.001953125, |
| "rewards/accuracies": 0.987500011920929, |
| "rewards/chosen": 2.139843702316284, |
| "rewards/margins": 18.256250381469727, |
| "rewards/rejected": -16.106250762939453, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.42016806722689076, |
| "grad_norm": 0.01515961183908316, |
| "learning_rate": 3.225806451612903e-07, |
| "logits/chosen": -0.751171886920929, |
| "logits/rejected": -1.060644507408142, |
| "logps/chosen": -495.20001220703125, |
| "logps/rejected": -612.4000244140625, |
| "loss": 0.0018, |
| "nll_loss": 0.955078125, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 1.314208984375, |
| "rewards/margins": 19.225000381469727, |
| "rewards/rejected": -17.887500762939453, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.42780748663101603, |
| "grad_norm": 0.010042277083548832, |
| "learning_rate": 3.183361629881154e-07, |
| "logits/chosen": -0.8096679449081421, |
| "logits/rejected": -1.017187476158142, |
| "logps/chosen": -509.79998779296875, |
| "logps/rejected": -657.5999755859375, |
| "loss": 0.0096, |
| "nll_loss": 0.9886718988418579, |
| "rewards/accuracies": 0.987500011920929, |
| "rewards/chosen": 1.565820336341858, |
| "rewards/margins": 18.668750762939453, |
| "rewards/rejected": -17.09375, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.43544690603514136, |
| "grad_norm": 357.30801484169297, |
| "learning_rate": 3.140916808149406e-07, |
| "logits/chosen": -0.5929931402206421, |
| "logits/rejected": -0.9761718511581421, |
| "logps/chosen": -537.7999877929688, |
| "logps/rejected": -641.5999755859375, |
| "loss": 0.1376, |
| "nll_loss": 0.9449218511581421, |
| "rewards/accuracies": 0.9750000238418579, |
| "rewards/chosen": 0.69873046875, |
| "rewards/margins": 19.549999237060547, |
| "rewards/rejected": -18.875, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.4430863254392666, |
| "grad_norm": 0.012075462997198727, |
| "learning_rate": 3.0984719864176567e-07, |
| "logits/chosen": -0.578125, |
| "logits/rejected": -0.950390636920929, |
| "logps/chosen": -502.0, |
| "logps/rejected": -646.7999877929688, |
| "loss": 0.001, |
| "nll_loss": 0.899218738079071, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 1.9572265148162842, |
| "rewards/margins": 20.924999237060547, |
| "rewards/rejected": -18.987499237060547, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.4507257448433919, |
| "grad_norm": 0.008925296396445518, |
| "learning_rate": 3.056027164685908e-07, |
| "logits/chosen": -0.655078113079071, |
| "logits/rejected": -1.072265625, |
| "logps/chosen": -509.6000061035156, |
| "logps/rejected": -565.0, |
| "loss": 0.0113, |
| "nll_loss": 1.0390625, |
| "rewards/accuracies": 0.987500011920929, |
| "rewards/chosen": 2.5933594703674316, |
| "rewards/margins": 19.112499237060547, |
| "rewards/rejected": -16.537500381469727, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.45836516424751717, |
| "grad_norm": 0.017036225092415577, |
| "learning_rate": 3.0135823429541597e-07, |
| "logits/chosen": -0.8695312738418579, |
| "logits/rejected": -1.1437499523162842, |
| "logps/chosen": -499.79998779296875, |
| "logps/rejected": -588.4000244140625, |
| "loss": 0.0009, |
| "nll_loss": 0.923828125, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.021191358566284, |
| "rewards/margins": 19.168750762939453, |
| "rewards/rejected": -16.149999618530273, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.4660045836516425, |
| "grad_norm": 3.1773486559043835, |
| "learning_rate": 2.9711375212224104e-07, |
| "logits/chosen": -0.520703136920929, |
| "logits/rejected": -0.919140636920929, |
| "logps/chosen": -497.20001220703125, |
| "logps/rejected": -629.2000122070312, |
| "loss": 0.0016, |
| "nll_loss": 0.955859363079071, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.760937452316284, |
| "rewards/margins": 18.887500762939453, |
| "rewards/rejected": -15.15625, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.47364400305576776, |
| "grad_norm": 3.105337083291931, |
| "learning_rate": 2.928692699490662e-07, |
| "logits/chosen": -0.666015625, |
| "logits/rejected": -1.2062499523162842, |
| "logps/chosen": -525.5999755859375, |
| "logps/rejected": -617.0, |
| "loss": 0.0017, |
| "nll_loss": 0.96484375, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.859375, |
| "rewards/margins": 19.674999237060547, |
| "rewards/rejected": -16.793750762939453, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.48128342245989303, |
| "grad_norm": 0.008823604800824746, |
| "learning_rate": 2.8862478777589134e-07, |
| "logits/chosen": -0.7459961175918579, |
| "logits/rejected": -1.0949218273162842, |
| "logps/chosen": -612.0, |
| "logps/rejected": -654.4000244140625, |
| "loss": 0.0099, |
| "nll_loss": 0.9632812738418579, |
| "rewards/accuracies": 0.987500011920929, |
| "rewards/chosen": 2.2660155296325684, |
| "rewards/margins": 20.5, |
| "rewards/rejected": -18.21875, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.48892284186401835, |
| "grad_norm": 0.032537468902626224, |
| "learning_rate": 2.8438030560271646e-07, |
| "logits/chosen": -0.6953125, |
| "logits/rejected": -1.015234351158142, |
| "logps/chosen": -602.7999877929688, |
| "logps/rejected": -617.0, |
| "loss": 0.003, |
| "nll_loss": 1.107421875, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.46875, |
| "rewards/margins": 21.0, |
| "rewards/rejected": -17.524999618530273, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.4965622612681436, |
| "grad_norm": 0.018661326082527362, |
| "learning_rate": 2.801358234295416e-07, |
| "logits/chosen": -0.6830078363418579, |
| "logits/rejected": -1.1179687976837158, |
| "logps/chosen": -403.79998779296875, |
| "logps/rejected": -591.0, |
| "loss": 0.001, |
| "nll_loss": 0.951953113079071, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.8648924827575684, |
| "rewards/margins": 19.831249237060547, |
| "rewards/rejected": -16.962499618530273, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.5042016806722689, |
| "grad_norm": 0.15005148372984548, |
| "learning_rate": 2.758913412563667e-07, |
| "logits/chosen": -0.636279284954071, |
| "logits/rejected": -1.015234351158142, |
| "logps/chosen": -487.79998779296875, |
| "logps/rejected": -609.5999755859375, |
| "loss": 0.0063, |
| "nll_loss": 0.90625, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.2437500953674316, |
| "rewards/margins": 18.637500762939453, |
| "rewards/rejected": -15.399999618530273, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.5118411000763942, |
| "grad_norm": 0.009676166350461137, |
| "learning_rate": 2.7164685908319183e-07, |
| "logits/chosen": -0.519238293170929, |
| "logits/rejected": -0.81494140625, |
| "logps/chosen": -523.0, |
| "logps/rejected": -656.7999877929688, |
| "loss": 0.0045, |
| "nll_loss": 0.9761718511581421, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.0859375, |
| "rewards/margins": 19.78125, |
| "rewards/rejected": -16.65625, |
| "step": 670 |
| }, |
| { |
| "epoch": 0.5194805194805194, |
| "grad_norm": 0.01124980367552591, |
| "learning_rate": 2.67402376910017e-07, |
| "logits/chosen": -0.5941406488418579, |
| "logits/rejected": -1.1257812976837158, |
| "logps/chosen": -451.79998779296875, |
| "logps/rejected": -609.5999755859375, |
| "loss": 0.001, |
| "nll_loss": 0.935546875, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 4.285937309265137, |
| "rewards/margins": 20.774999618530273, |
| "rewards/rejected": -16.5, |
| "step": 680 |
| }, |
| { |
| "epoch": 0.5271199388846448, |
| "grad_norm": 1.031627498209902, |
| "learning_rate": 2.631578947368421e-07, |
| "logits/chosen": -0.6713622808456421, |
| "logits/rejected": -0.9341796636581421, |
| "logps/chosen": -528.7999877929688, |
| "logps/rejected": -627.5999755859375, |
| "loss": 0.0011, |
| "nll_loss": 0.9410156011581421, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 4.165722846984863, |
| "rewards/margins": 20.112499237060547, |
| "rewards/rejected": -15.949999809265137, |
| "step": 690 |
| }, |
| { |
| "epoch": 0.5347593582887701, |
| "grad_norm": 0.008612481920354584, |
| "learning_rate": 2.589134125636672e-07, |
| "logits/chosen": -0.626953125, |
| "logits/rejected": -1.13671875, |
| "logps/chosen": -568.4000244140625, |
| "logps/rejected": -658.5999755859375, |
| "loss": 0.0153, |
| "nll_loss": 0.9609375, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.57421875, |
| "rewards/margins": 17.993749618530273, |
| "rewards/rejected": -14.399999618530273, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.5423987776928954, |
| "grad_norm": 0.3960498135346062, |
| "learning_rate": 2.546689303904924e-07, |
| "logits/chosen": -0.4974121153354645, |
| "logits/rejected": -0.73291015625, |
| "logps/chosen": -462.20001220703125, |
| "logps/rejected": -560.5999755859375, |
| "loss": 0.0009, |
| "nll_loss": 0.8374999761581421, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 5.01953125, |
| "rewards/margins": 19.4375, |
| "rewards/rejected": -14.418749809265137, |
| "step": 710 |
| }, |
| { |
| "epoch": 0.5500381970970206, |
| "grad_norm": 0.010307871330397535, |
| "learning_rate": 2.5042444821731745e-07, |
| "logits/chosen": -0.713671863079071, |
| "logits/rejected": -0.9332031011581421, |
| "logps/chosen": -481.0, |
| "logps/rejected": -631.4000244140625, |
| "loss": 0.0021, |
| "nll_loss": 0.879687488079071, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.451367139816284, |
| "rewards/margins": 19.412500381469727, |
| "rewards/rejected": -15.962499618530273, |
| "step": 720 |
| }, |
| { |
| "epoch": 0.5576776165011459, |
| "grad_norm": 0.012937044581846035, |
| "learning_rate": 2.4617996604414257e-07, |
| "logits/chosen": -0.8158203363418579, |
| "logits/rejected": -0.971972644329071, |
| "logps/chosen": -485.79998779296875, |
| "logps/rejected": -641.5999755859375, |
| "loss": 0.001, |
| "nll_loss": 0.8539062738418579, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 4.935937404632568, |
| "rewards/margins": 19.575000762939453, |
| "rewards/rejected": -14.625, |
| "step": 730 |
| }, |
| { |
| "epoch": 0.5653170359052712, |
| "grad_norm": 0.00943278466797185, |
| "learning_rate": 2.4193548387096775e-07, |
| "logits/chosen": -0.7265625, |
| "logits/rejected": -1.0984375476837158, |
| "logps/chosen": -467.3999938964844, |
| "logps/rejected": -628.4000244140625, |
| "loss": 0.0009, |
| "nll_loss": 0.907421886920929, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 4.9765625, |
| "rewards/margins": 20.825000762939453, |
| "rewards/rejected": -15.850000381469727, |
| "step": 740 |
| }, |
| { |
| "epoch": 0.5729564553093965, |
| "grad_norm": 0.012206838530460923, |
| "learning_rate": 2.3769100169779285e-07, |
| "logits/chosen": -0.4423828125, |
| "logits/rejected": -0.7054198980331421, |
| "logps/chosen": -529.4000244140625, |
| "logps/rejected": -661.2000122070312, |
| "loss": 0.0244, |
| "nll_loss": 0.9312499761581421, |
| "rewards/accuracies": 0.987500011920929, |
| "rewards/chosen": 4.828125, |
| "rewards/margins": 18.549999237060547, |
| "rewards/rejected": -13.71875, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.5805958747135218, |
| "grad_norm": 0.02778687692676317, |
| "learning_rate": 2.33446519524618e-07, |
| "logits/chosen": -0.5909179449081421, |
| "logits/rejected": -0.9839843511581421, |
| "logps/chosen": -538.7999877929688, |
| "logps/rejected": -577.2000122070312, |
| "loss": 0.006, |
| "nll_loss": 0.9312499761581421, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 5.465624809265137, |
| "rewards/margins": 19.181249618530273, |
| "rewards/rejected": -13.706250190734863, |
| "step": 760 |
| }, |
| { |
| "epoch": 0.5882352941176471, |
| "grad_norm": 0.005731559164767884, |
| "learning_rate": 2.2920203735144312e-07, |
| "logits/chosen": -0.5236572027206421, |
| "logits/rejected": -0.7826172113418579, |
| "logps/chosen": -469.20001220703125, |
| "logps/rejected": -575.7999877929688, |
| "loss": 0.0009, |
| "nll_loss": 0.8277343511581421, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 5.328125, |
| "rewards/margins": 19.4375, |
| "rewards/rejected": -14.106249809265137, |
| "step": 770 |
| }, |
| { |
| "epoch": 0.5958747135217723, |
| "grad_norm": 2.1424567307120626, |
| "learning_rate": 2.2495755517826824e-07, |
| "logits/chosen": -0.641796886920929, |
| "logits/rejected": -0.885937511920929, |
| "logps/chosen": -497.20001220703125, |
| "logps/rejected": -620.0, |
| "loss": 0.0011, |
| "nll_loss": 0.9375, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 4.8671875, |
| "rewards/margins": 19.137500762939453, |
| "rewards/rejected": -14.243749618530273, |
| "step": 780 |
| }, |
| { |
| "epoch": 0.6035141329258976, |
| "grad_norm": 0.578328922678163, |
| "learning_rate": 2.2071307300509337e-07, |
| "logits/chosen": -0.666796863079071, |
| "logits/rejected": -1.081640601158142, |
| "logps/chosen": -468.20001220703125, |
| "logps/rejected": -602.7999877929688, |
| "loss": 0.0009, |
| "nll_loss": 0.861328125, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 5.762499809265137, |
| "rewards/margins": 21.625, |
| "rewards/rejected": -15.862500190734863, |
| "step": 790 |
| }, |
| { |
| "epoch": 0.6111535523300229, |
| "grad_norm": 71.3110272488238, |
| "learning_rate": 2.164685908319185e-07, |
| "logits/chosen": -0.59033203125, |
| "logits/rejected": -1.054101586341858, |
| "logps/chosen": -490.6000061035156, |
| "logps/rejected": -576.5999755859375, |
| "loss": 0.0253, |
| "nll_loss": 0.8355468511581421, |
| "rewards/accuracies": 0.987500011920929, |
| "rewards/chosen": 5.728125095367432, |
| "rewards/margins": 20.274999618530273, |
| "rewards/rejected": -14.543749809265137, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.6187929717341482, |
| "grad_norm": 139.17739524623934, |
| "learning_rate": 2.1222410865874364e-07, |
| "logits/chosen": -0.5176025629043579, |
| "logits/rejected": -0.8919922113418579, |
| "logps/chosen": -384.3999938964844, |
| "logps/rejected": -602.7999877929688, |
| "loss": 0.0771, |
| "nll_loss": 0.791796863079071, |
| "rewards/accuracies": 0.9750000238418579, |
| "rewards/chosen": 5.825781345367432, |
| "rewards/margins": 19.893749237060547, |
| "rewards/rejected": -14.0625, |
| "step": 810 |
| }, |
| { |
| "epoch": 0.6264323911382735, |
| "grad_norm": 0.009397467485646585, |
| "learning_rate": 2.0797962648556874e-07, |
| "logits/chosen": -0.4286132752895355, |
| "logits/rejected": -0.8472656011581421, |
| "logps/chosen": -455.0, |
| "logps/rejected": -579.0, |
| "loss": 0.0008, |
| "nll_loss": 0.780468761920929, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 5.40625, |
| "rewards/margins": 19.087499618530273, |
| "rewards/rejected": -13.706250190734863, |
| "step": 820 |
| }, |
| { |
| "epoch": 0.6340718105423988, |
| "grad_norm": 0.01440039833737066, |
| "learning_rate": 2.037351443123939e-07, |
| "logits/chosen": -0.5261474847793579, |
| "logits/rejected": -0.8597656488418579, |
| "logps/chosen": -419.3999938964844, |
| "logps/rejected": -557.4000244140625, |
| "loss": 0.0201, |
| "nll_loss": 0.774218738079071, |
| "rewards/accuracies": 0.987500011920929, |
| "rewards/chosen": 5.792187690734863, |
| "rewards/margins": 18.5625, |
| "rewards/rejected": -12.774999618530273, |
| "step": 830 |
| }, |
| { |
| "epoch": 0.6417112299465241, |
| "grad_norm": 0.01637511729658493, |
| "learning_rate": 1.99490662139219e-07, |
| "logits/chosen": -0.7147461175918579, |
| "logits/rejected": -1.2234375476837158, |
| "logps/chosen": -453.79998779296875, |
| "logps/rejected": -580.2000122070312, |
| "loss": 0.0009, |
| "nll_loss": 0.876953125, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 7.037499904632568, |
| "rewards/margins": 19.987499237060547, |
| "rewards/rejected": -12.949999809265137, |
| "step": 840 |
| }, |
| { |
| "epoch": 0.6493506493506493, |
| "grad_norm": 0.011789748248626662, |
| "learning_rate": 1.9524617996604413e-07, |
| "logits/chosen": -0.48930662870407104, |
| "logits/rejected": -0.76318359375, |
| "logps/chosen": -497.20001220703125, |
| "logps/rejected": -608.2000122070312, |
| "loss": 0.0032, |
| "nll_loss": 0.862500011920929, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 6.518750190734863, |
| "rewards/margins": 19.756250381469727, |
| "rewards/rejected": -13.25, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.6569900687547746, |
| "grad_norm": 0.008438444203134894, |
| "learning_rate": 1.9100169779286926e-07, |
| "logits/chosen": -0.7650390863418579, |
| "logits/rejected": -1.181640625, |
| "logps/chosen": -470.0, |
| "logps/rejected": -614.0, |
| "loss": 0.0062, |
| "nll_loss": 0.888671875, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 4.34521484375, |
| "rewards/margins": 20.0625, |
| "rewards/rejected": -15.731249809265137, |
| "step": 860 |
| }, |
| { |
| "epoch": 0.6646294881588999, |
| "grad_norm": 0.009088437756158809, |
| "learning_rate": 1.867572156196944e-07, |
| "logits/chosen": -0.8885742425918579, |
| "logits/rejected": -1.2628905773162842, |
| "logps/chosen": -426.20001220703125, |
| "logps/rejected": -576.7999877929688, |
| "loss": 0.0173, |
| "nll_loss": 0.8238281011581421, |
| "rewards/accuracies": 0.987500011920929, |
| "rewards/chosen": 5.240624904632568, |
| "rewards/margins": 20.837499618530273, |
| "rewards/rejected": -15.59375, |
| "step": 870 |
| }, |
| { |
| "epoch": 0.6722689075630253, |
| "grad_norm": 0.01123214371256042, |
| "learning_rate": 1.825127334465195e-07, |
| "logits/chosen": -0.37451171875, |
| "logits/rejected": -0.9312499761581421, |
| "logps/chosen": -491.20001220703125, |
| "logps/rejected": -562.4000244140625, |
| "loss": 0.0012, |
| "nll_loss": 0.788281261920929, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 5.337500095367432, |
| "rewards/margins": 21.512500762939453, |
| "rewards/rejected": -16.168750762939453, |
| "step": 880 |
| }, |
| { |
| "epoch": 0.6799083269671505, |
| "grad_norm": 0.011919242875290744, |
| "learning_rate": 1.7826825127334463e-07, |
| "logits/chosen": -0.776611328125, |
| "logits/rejected": -1.2238280773162842, |
| "logps/chosen": -501.3999938964844, |
| "logps/rejected": -579.2000122070312, |
| "loss": 0.001, |
| "nll_loss": 0.9183593988418579, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 6.053124904632568, |
| "rewards/margins": 21.612499237060547, |
| "rewards/rejected": -15.568750381469727, |
| "step": 890 |
| }, |
| { |
| "epoch": 0.6875477463712758, |
| "grad_norm": 0.5307999135503628, |
| "learning_rate": 1.7402376910016978e-07, |
| "logits/chosen": -0.46171873807907104, |
| "logits/rejected": -1.008203148841858, |
| "logps/chosen": -419.6000061035156, |
| "logps/rejected": -630.7999877929688, |
| "loss": 0.0011, |
| "nll_loss": 0.93359375, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 5.2734375, |
| "rewards/margins": 20.600000381469727, |
| "rewards/rejected": -15.318750381469727, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.6951871657754011, |
| "grad_norm": 0.008377959097262699, |
| "learning_rate": 1.697792869269949e-07, |
| "logits/chosen": -0.581738293170929, |
| "logits/rejected": -0.8985351324081421, |
| "logps/chosen": -690.5999755859375, |
| "logps/rejected": -672.0, |
| "loss": 0.0009, |
| "nll_loss": 0.889453113079071, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 5.235937595367432, |
| "rewards/margins": 21.450000762939453, |
| "rewards/rejected": -16.1875, |
| "step": 910 |
| }, |
| { |
| "epoch": 0.7028265851795263, |
| "grad_norm": 3.821622931707774, |
| "learning_rate": 1.6553480475382003e-07, |
| "logits/chosen": -0.772656261920929, |
| "logits/rejected": -1.0529296398162842, |
| "logps/chosen": -438.79998779296875, |
| "logps/rejected": -667.2000122070312, |
| "loss": 0.002, |
| "nll_loss": 0.907421886920929, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 4.903124809265137, |
| "rewards/margins": 19.887500762939453, |
| "rewards/rejected": -14.981249809265137, |
| "step": 920 |
| }, |
| { |
| "epoch": 0.7104660045836516, |
| "grad_norm": 0.007254526001241224, |
| "learning_rate": 1.6129032258064515e-07, |
| "logits/chosen": -0.530517578125, |
| "logits/rejected": -0.891406238079071, |
| "logps/chosen": -515.0, |
| "logps/rejected": -594.7999877929688, |
| "loss": 0.0163, |
| "nll_loss": 0.7972656488418579, |
| "rewards/accuracies": 0.987500011920929, |
| "rewards/chosen": 5.315625190734863, |
| "rewards/margins": 20.762500762939453, |
| "rewards/rejected": -15.443750381469727, |
| "step": 930 |
| }, |
| { |
| "epoch": 0.7181054239877769, |
| "grad_norm": 0.6325222750613556, |
| "learning_rate": 1.570458404074703e-07, |
| "logits/chosen": -0.598095715045929, |
| "logits/rejected": -1.2218749523162842, |
| "logps/chosen": -448.20001220703125, |
| "logps/rejected": -602.2000122070312, |
| "loss": 0.0011, |
| "nll_loss": 0.846484363079071, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 5.682812690734863, |
| "rewards/margins": 21.262500762939453, |
| "rewards/rejected": -15.550000190734863, |
| "step": 940 |
| }, |
| { |
| "epoch": 0.7257448433919023, |
| "grad_norm": 72.89567177903392, |
| "learning_rate": 1.528013582342954e-07, |
| "logits/chosen": -0.4874267578125, |
| "logits/rejected": -0.8919922113418579, |
| "logps/chosen": -536.4000244140625, |
| "logps/rejected": -637.7999877929688, |
| "loss": 0.0072, |
| "nll_loss": 0.93359375, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 5.042187690734863, |
| "rewards/margins": 21.524999618530273, |
| "rewards/rejected": -16.462499618530273, |
| "step": 950 |
| }, |
| { |
| "epoch": 0.7333842627960275, |
| "grad_norm": 0.008456676433411622, |
| "learning_rate": 1.4855687606112052e-07, |
| "logits/chosen": -0.766406238079071, |
| "logits/rejected": -0.9742187261581421, |
| "logps/chosen": -489.79998779296875, |
| "logps/rejected": -652.5999755859375, |
| "loss": 0.0014, |
| "nll_loss": 0.858203113079071, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 4.606249809265137, |
| "rewards/margins": 21.575000762939453, |
| "rewards/rejected": -16.96875, |
| "step": 960 |
| }, |
| { |
| "epoch": 0.7410236822001528, |
| "grad_norm": 0.007962632127454504, |
| "learning_rate": 1.4431239388794567e-07, |
| "logits/chosen": -0.66693115234375, |
| "logits/rejected": -1.0675780773162842, |
| "logps/chosen": -438.6000061035156, |
| "logps/rejected": -614.0, |
| "loss": 0.001, |
| "nll_loss": 0.880859375, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 5.057812690734863, |
| "rewards/margins": 20.262500762939453, |
| "rewards/rejected": -15.206250190734863, |
| "step": 970 |
| }, |
| { |
| "epoch": 0.7486631016042781, |
| "grad_norm": 0.02441213499286302, |
| "learning_rate": 1.400679117147708e-07, |
| "logits/chosen": -0.687695324420929, |
| "logits/rejected": -1.129296898841858, |
| "logps/chosen": -447.0, |
| "logps/rejected": -597.0, |
| "loss": 0.0009, |
| "nll_loss": 0.920703113079071, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 5.3046875, |
| "rewards/margins": 19.575000762939453, |
| "rewards/rejected": -14.293749809265137, |
| "step": 980 |
| }, |
| { |
| "epoch": 0.7563025210084033, |
| "grad_norm": 0.014320563932426218, |
| "learning_rate": 1.3582342954159592e-07, |
| "logits/chosen": -0.742382824420929, |
| "logits/rejected": -1.034570336341858, |
| "logps/chosen": -528.7999877929688, |
| "logps/rejected": -701.2000122070312, |
| "loss": 0.0108, |
| "nll_loss": 0.867968738079071, |
| "rewards/accuracies": 0.987500011920929, |
| "rewards/chosen": 5.854687690734863, |
| "rewards/margins": 22.125, |
| "rewards/rejected": -16.243749618530273, |
| "step": 990 |
| }, |
| { |
| "epoch": 0.7639419404125286, |
| "grad_norm": 0.008404334531270792, |
| "learning_rate": 1.3157894736842104e-07, |
| "logits/chosen": -0.5796874761581421, |
| "logits/rejected": -1.0949218273162842, |
| "logps/chosen": -478.79998779296875, |
| "logps/rejected": -641.2000122070312, |
| "loss": 0.0016, |
| "nll_loss": 0.8394531011581421, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 6.021874904632568, |
| "rewards/margins": 22.325000762939453, |
| "rewards/rejected": -16.3125, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.771581359816654, |
| "grad_norm": 0.011247353406659095, |
| "learning_rate": 1.273344651952462e-07, |
| "logits/chosen": -0.642773449420929, |
| "logits/rejected": -1.0187499523162842, |
| "logps/chosen": -458.79998779296875, |
| "logps/rejected": -693.4000244140625, |
| "loss": 0.0009, |
| "nll_loss": 0.797656238079071, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 5.579687595367432, |
| "rewards/margins": 22.825000762939453, |
| "rewards/rejected": -17.274999618530273, |
| "step": 1010 |
| }, |
| { |
| "epoch": 0.7792207792207793, |
| "grad_norm": 0.011990054903778509, |
| "learning_rate": 1.2308998302207129e-07, |
| "logits/chosen": -0.7708984613418579, |
| "logits/rejected": -1.10546875, |
| "logps/chosen": -528.2000122070312, |
| "logps/rejected": -659.2000122070312, |
| "loss": 0.011, |
| "nll_loss": 0.966796875, |
| "rewards/accuracies": 0.987500011920929, |
| "rewards/chosen": 5.373437404632568, |
| "rewards/margins": 20.850000381469727, |
| "rewards/rejected": -15.475000381469727, |
| "step": 1020 |
| }, |
| { |
| "epoch": 0.7868601986249045, |
| "grad_norm": 0.013096305405330763, |
| "learning_rate": 1.1884550084889642e-07, |
| "logits/chosen": -0.7157226800918579, |
| "logits/rejected": -0.9664062261581421, |
| "logps/chosen": -532.4000244140625, |
| "logps/rejected": -603.7999877929688, |
| "loss": 0.001, |
| "nll_loss": 0.969531238079071, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 5.923437595367432, |
| "rewards/margins": 22.424999237060547, |
| "rewards/rejected": -16.493749618530273, |
| "step": 1030 |
| }, |
| { |
| "epoch": 0.7944996180290298, |
| "grad_norm": 0.008024100644779104, |
| "learning_rate": 1.1460101867572156e-07, |
| "logits/chosen": -0.8031250238418579, |
| "logits/rejected": -1.0529296398162842, |
| "logps/chosen": -563.4000244140625, |
| "logps/rejected": -657.4000244140625, |
| "loss": 0.0008, |
| "nll_loss": 0.835156261920929, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 5.645312309265137, |
| "rewards/margins": 21.875, |
| "rewards/rejected": -16.237499237060547, |
| "step": 1040 |
| }, |
| { |
| "epoch": 0.8021390374331551, |
| "grad_norm": 0.013602335841313738, |
| "learning_rate": 1.1035653650254668e-07, |
| "logits/chosen": -0.711621105670929, |
| "logits/rejected": -1.1335937976837158, |
| "logps/chosen": -425.6000061035156, |
| "logps/rejected": -589.2000122070312, |
| "loss": 0.0011, |
| "nll_loss": 0.768750011920929, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 5.151562690734863, |
| "rewards/margins": 21.924999237060547, |
| "rewards/rejected": -16.756250381469727, |
| "step": 1050 |
| }, |
| { |
| "epoch": 0.8097784568372803, |
| "grad_norm": 0.011969353381424996, |
| "learning_rate": 1.0611205432937182e-07, |
| "logits/chosen": -0.632275402545929, |
| "logits/rejected": -0.9330078363418579, |
| "logps/chosen": -544.2000122070312, |
| "logps/rejected": -695.5999755859375, |
| "loss": 0.0009, |
| "nll_loss": 0.856640636920929, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 5.390625, |
| "rewards/margins": 22.424999237060547, |
| "rewards/rejected": -17.037500381469727, |
| "step": 1060 |
| }, |
| { |
| "epoch": 0.8174178762414056, |
| "grad_norm": 60.818967384865154, |
| "learning_rate": 1.0186757215619694e-07, |
| "logits/chosen": -0.6527343988418579, |
| "logits/rejected": -1.0265624523162842, |
| "logps/chosen": -494.6000061035156, |
| "logps/rejected": -675.2000122070312, |
| "loss": 0.0154, |
| "nll_loss": 0.754687488079071, |
| "rewards/accuracies": 0.987500011920929, |
| "rewards/chosen": 5.512499809265137, |
| "rewards/margins": 23.350000381469727, |
| "rewards/rejected": -17.831249237060547, |
| "step": 1070 |
| }, |
| { |
| "epoch": 0.825057295645531, |
| "grad_norm": 0.011992048240526737, |
| "learning_rate": 9.762308998302207e-08, |
| "logits/chosen": -0.7574218511581421, |
| "logits/rejected": -1.2898437976837158, |
| "logps/chosen": -418.79998779296875, |
| "logps/rejected": -582.0, |
| "loss": 0.0012, |
| "nll_loss": 0.856640636920929, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 5.678906440734863, |
| "rewards/margins": 21.8125, |
| "rewards/rejected": -16.125, |
| "step": 1080 |
| }, |
| { |
| "epoch": 0.8326967150496563, |
| "grad_norm": 0.008473889793906824, |
| "learning_rate": 9.33786078098472e-08, |
| "logits/chosen": -0.682324230670929, |
| "logits/rejected": -1.134765625, |
| "logps/chosen": -449.0, |
| "logps/rejected": -582.7999877929688, |
| "loss": 0.0021, |
| "nll_loss": 0.912109375, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 4.525000095367432, |
| "rewards/margins": 21.087499618530273, |
| "rewards/rejected": -16.543750762939453, |
| "step": 1090 |
| }, |
| { |
| "epoch": 0.8403361344537815, |
| "grad_norm": 4.444711961785813, |
| "learning_rate": 8.913412563667231e-08, |
| "logits/chosen": -0.8392578363418579, |
| "logits/rejected": -1.182031273841858, |
| "logps/chosen": -433.79998779296875, |
| "logps/rejected": -678.7999877929688, |
| "loss": 0.0013, |
| "nll_loss": 0.8675781488418579, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 6.346875190734863, |
| "rewards/margins": 22.712499618530273, |
| "rewards/rejected": -16.387500762939453, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.8479755538579068, |
| "grad_norm": 0.00814358465753462, |
| "learning_rate": 8.488964346349745e-08, |
| "logits/chosen": -0.4921875, |
| "logits/rejected": -1.0751953125, |
| "logps/chosen": -438.20001220703125, |
| "logps/rejected": -626.0, |
| "loss": 0.0008, |
| "nll_loss": 0.7953125238418579, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 6.388281345367432, |
| "rewards/margins": 21.575000762939453, |
| "rewards/rejected": -15.162500381469727, |
| "step": 1110 |
| }, |
| { |
| "epoch": 0.8556149732620321, |
| "grad_norm": 0.03463987081566878, |
| "learning_rate": 8.064516129032257e-08, |
| "logits/chosen": -0.847460925579071, |
| "logits/rejected": -1.1613280773162842, |
| "logps/chosen": -450.6000061035156, |
| "logps/rejected": -595.0, |
| "loss": 0.0036, |
| "nll_loss": 0.8121093511581421, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 5.856249809265137, |
| "rewards/margins": 19.899999618530273, |
| "rewards/rejected": -14.037500381469727, |
| "step": 1120 |
| }, |
| { |
| "epoch": 0.8632543926661573, |
| "grad_norm": 0.007682054439671777, |
| "learning_rate": 7.64006791171477e-08, |
| "logits/chosen": -0.606127917766571, |
| "logits/rejected": -0.908007800579071, |
| "logps/chosen": -438.0, |
| "logps/rejected": -600.5999755859375, |
| "loss": 0.0008, |
| "nll_loss": 0.760937511920929, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 6.009375095367432, |
| "rewards/margins": 21.5, |
| "rewards/rejected": -15.481249809265137, |
| "step": 1130 |
| }, |
| { |
| "epoch": 0.8708938120702827, |
| "grad_norm": 0.11056210671905395, |
| "learning_rate": 7.215619694397283e-08, |
| "logits/chosen": -0.5245116949081421, |
| "logits/rejected": -1.024023413658142, |
| "logps/chosen": -468.6000061035156, |
| "logps/rejected": -541.4000244140625, |
| "loss": 0.0086, |
| "nll_loss": 0.802734375, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 5.4765625, |
| "rewards/margins": 20.725000381469727, |
| "rewards/rejected": -15.231249809265137, |
| "step": 1140 |
| }, |
| { |
| "epoch": 0.878533231474408, |
| "grad_norm": 0.011222005761275277, |
| "learning_rate": 6.791171477079796e-08, |
| "logits/chosen": -0.7525390386581421, |
| "logits/rejected": -0.9857422113418579, |
| "logps/chosen": -463.6000061035156, |
| "logps/rejected": -689.7999877929688, |
| "loss": 0.0009, |
| "nll_loss": 0.9242187738418579, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 6.189062595367432, |
| "rewards/margins": 21.350000381469727, |
| "rewards/rejected": -15.15625, |
| "step": 1150 |
| }, |
| { |
| "epoch": 0.8861726508785333, |
| "grad_norm": 0.011114824226909615, |
| "learning_rate": 6.36672325976231e-08, |
| "logits/chosen": -0.73583984375, |
| "logits/rejected": -1.1023437976837158, |
| "logps/chosen": -468.6000061035156, |
| "logps/rejected": -638.0, |
| "loss": 0.0009, |
| "nll_loss": 0.862500011920929, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 5.5078125, |
| "rewards/margins": 21.381250381469727, |
| "rewards/rejected": -15.856249809265137, |
| "step": 1160 |
| }, |
| { |
| "epoch": 0.8938120702826585, |
| "grad_norm": 0.2933378882242121, |
| "learning_rate": 5.942275042444821e-08, |
| "logits/chosen": -0.630859375, |
| "logits/rejected": -1.0300781726837158, |
| "logps/chosen": -426.79998779296875, |
| "logps/rejected": -549.5999755859375, |
| "loss": 0.0008, |
| "nll_loss": 0.800000011920929, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 6.603125095367432, |
| "rewards/margins": 21.325000762939453, |
| "rewards/rejected": -14.731249809265137, |
| "step": 1170 |
| }, |
| { |
| "epoch": 0.9014514896867838, |
| "grad_norm": 0.019123808537423514, |
| "learning_rate": 5.517826825127334e-08, |
| "logits/chosen": -0.660082995891571, |
| "logits/rejected": -1.091406226158142, |
| "logps/chosen": -484.20001220703125, |
| "logps/rejected": -687.2000122070312, |
| "loss": 0.0158, |
| "nll_loss": 0.8843749761581421, |
| "rewards/accuracies": 0.987500011920929, |
| "rewards/chosen": 5.125781059265137, |
| "rewards/margins": 19.993749618530273, |
| "rewards/rejected": -14.868749618530273, |
| "step": 1180 |
| }, |
| { |
| "epoch": 0.9090909090909091, |
| "grad_norm": 0.007638493845027638, |
| "learning_rate": 5.093378607809847e-08, |
| "logits/chosen": -0.6869140863418579, |
| "logits/rejected": -1.1027343273162842, |
| "logps/chosen": -446.6000061035156, |
| "logps/rejected": -580.5999755859375, |
| "loss": 0.0011, |
| "nll_loss": 0.8980468511581421, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 6.057812690734863, |
| "rewards/margins": 20.299999237060547, |
| "rewards/rejected": -14.21875, |
| "step": 1190 |
| }, |
| { |
| "epoch": 0.9167303284950343, |
| "grad_norm": 0.008513929557654372, |
| "learning_rate": 4.66893039049236e-08, |
| "logits/chosen": -0.590624988079071, |
| "logits/rejected": -0.91796875, |
| "logps/chosen": -457.3999938964844, |
| "logps/rejected": -603.0, |
| "loss": 0.0008, |
| "nll_loss": 0.7699218988418579, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 6.3125, |
| "rewards/margins": 21.287500381469727, |
| "rewards/rejected": -14.987500190734863, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.9243697478991597, |
| "grad_norm": 0.5109154797836508, |
| "learning_rate": 4.2444821731748725e-08, |
| "logits/chosen": -0.728015124797821, |
| "logits/rejected": -1.0966796875, |
| "logps/chosen": -475.79998779296875, |
| "logps/rejected": -594.5999755859375, |
| "loss": 0.0009, |
| "nll_loss": 0.8374999761581421, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 7.025000095367432, |
| "rewards/margins": 21.950000762939453, |
| "rewards/rejected": -14.925000190734863, |
| "step": 1210 |
| }, |
| { |
| "epoch": 0.932009167303285, |
| "grad_norm": 0.00821641172434573, |
| "learning_rate": 3.820033955857385e-08, |
| "logits/chosen": -0.6214843988418579, |
| "logits/rejected": -1.1667969226837158, |
| "logps/chosen": -622.2000122070312, |
| "logps/rejected": -649.0, |
| "loss": 0.0048, |
| "nll_loss": 0.9234374761581421, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 6.256249904632568, |
| "rewards/margins": 19.975000381469727, |
| "rewards/rejected": -13.6875, |
| "step": 1220 |
| }, |
| { |
| "epoch": 0.9396485867074102, |
| "grad_norm": 0.019088814928989045, |
| "learning_rate": 3.395585738539898e-08, |
| "logits/chosen": -0.7289062738418579, |
| "logits/rejected": -1.05078125, |
| "logps/chosen": -475.20001220703125, |
| "logps/rejected": -590.2000122070312, |
| "loss": 0.0028, |
| "nll_loss": 0.856249988079071, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 6.353125095367432, |
| "rewards/margins": 20.362499237060547, |
| "rewards/rejected": -14.006250381469727, |
| "step": 1230 |
| }, |
| { |
| "epoch": 0.9472880061115355, |
| "grad_norm": 0.04074144502311008, |
| "learning_rate": 2.9711375212224106e-08, |
| "logits/chosen": -0.9458984136581421, |
| "logits/rejected": -1.208593726158142, |
| "logps/chosen": -498.20001220703125, |
| "logps/rejected": -555.0, |
| "loss": 0.0013, |
| "nll_loss": 0.936718761920929, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 5.618750095367432, |
| "rewards/margins": 19.981250762939453, |
| "rewards/rejected": -14.387499809265137, |
| "step": 1240 |
| }, |
| { |
| "epoch": 0.9549274255156608, |
| "grad_norm": 0.013648349371693833, |
| "learning_rate": 2.5466893039049236e-08, |
| "logits/chosen": -0.591796875, |
| "logits/rejected": -0.834765613079071, |
| "logps/chosen": -443.3999938964844, |
| "logps/rejected": -564.7999877929688, |
| "loss": 0.0009, |
| "nll_loss": 0.8203125, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 6.153124809265137, |
| "rewards/margins": 20.524999618530273, |
| "rewards/rejected": -14.368749618530273, |
| "step": 1250 |
| }, |
| { |
| "epoch": 0.9625668449197861, |
| "grad_norm": 0.027600337620728884, |
| "learning_rate": 2.1222410865874363e-08, |
| "logits/chosen": -0.802734375, |
| "logits/rejected": -1.180078148841858, |
| "logps/chosen": -452.6000061035156, |
| "logps/rejected": -605.2000122070312, |
| "loss": 0.0009, |
| "nll_loss": 0.866015613079071, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 6.921875, |
| "rewards/margins": 21.4375, |
| "rewards/rejected": -14.493749618530273, |
| "step": 1260 |
| }, |
| { |
| "epoch": 0.9702062643239114, |
| "grad_norm": 0.00753937380269379, |
| "learning_rate": 1.697792869269949e-08, |
| "logits/chosen": -0.6973632574081421, |
| "logits/rejected": -1.0812499523162842, |
| "logps/chosen": -451.20001220703125, |
| "logps/rejected": -592.2000122070312, |
| "loss": 0.0028, |
| "nll_loss": 0.729296863079071, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 7.060937404632568, |
| "rewards/margins": 22.299999237060547, |
| "rewards/rejected": -15.225000381469727, |
| "step": 1270 |
| }, |
| { |
| "epoch": 0.9778456837280367, |
| "grad_norm": 0.009276587437573345, |
| "learning_rate": 1.2733446519524618e-08, |
| "logits/chosen": -0.73193359375, |
| "logits/rejected": -0.993847668170929, |
| "logps/chosen": -461.6000061035156, |
| "logps/rejected": -583.0, |
| "loss": 0.0009, |
| "nll_loss": 0.8374999761581421, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 6.074999809265137, |
| "rewards/margins": 20.087499618530273, |
| "rewards/rejected": -14.024999618530273, |
| "step": 1280 |
| }, |
| { |
| "epoch": 0.985485103132162, |
| "grad_norm": 0.009811636634928695, |
| "learning_rate": 8.488964346349745e-09, |
| "logits/chosen": -0.7230468988418579, |
| "logits/rejected": -1.204687476158142, |
| "logps/chosen": -441.79998779296875, |
| "logps/rejected": -617.0, |
| "loss": 0.0019, |
| "nll_loss": 0.755859375, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 7.2421875, |
| "rewards/margins": 22.043750762939453, |
| "rewards/rejected": -14.800000190734863, |
| "step": 1290 |
| }, |
| { |
| "epoch": 0.9931245225362872, |
| "grad_norm": 0.016745625389395048, |
| "learning_rate": 4.244482173174872e-09, |
| "logits/chosen": -0.4999023377895355, |
| "logits/rejected": -0.810351550579071, |
| "logps/chosen": -444.79998779296875, |
| "logps/rejected": -611.5999755859375, |
| "loss": 0.0009, |
| "nll_loss": 0.786328136920929, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 6.178124904632568, |
| "rewards/margins": 21.649999618530273, |
| "rewards/rejected": -15.481249809265137, |
| "step": 1300 |
| }, |
| { |
| "epoch": 1.0, |
| "eval_logits/chosen": -0.5260667204856873, |
| "eval_logits/rejected": -0.9918870329856873, |
| "eval_logps/chosen": -428.30767822265625, |
| "eval_logps/rejected": -523.2307739257812, |
| "eval_loss": 0.0017267990624532104, |
| "eval_nll_loss": 0.8383413553237915, |
| "eval_rewards/accuracies": 1.0, |
| "eval_rewards/chosen": 7.533653736114502, |
| "eval_rewards/margins": 22.846153259277344, |
| "eval_rewards/rejected": -15.331730842590332, |
| "eval_runtime": 16.743, |
| "eval_samples_per_second": 5.973, |
| "eval_steps_per_second": 0.776, |
| "step": 1309 |
| }, |
| { |
| "epoch": 1.0, |
| "step": 1309, |
| "total_flos": 0.0, |
| "train_loss": 0.03593526968435079, |
| "train_runtime": 4678.8034, |
| "train_samples_per_second": 2.237, |
| "train_steps_per_second": 0.28 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 1309, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 0.0, |
| "train_batch_size": 2, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|