| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 2.9798657718120807, |
| "eval_steps": 0, |
| "global_step": 222, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.013422818791946308, |
| "grad_norm": 316.97791395060926, |
| "learning_rate": 4.347826086956521e-08, |
| "logits/chosen": 0.86328125, |
| "logits/rejected": 1.09375, |
| "logps/chosen": -127.5, |
| "logps/rejected": -150.0, |
| "loss": 0.6914, |
| "rewards/accuracies": 0.0, |
| "rewards/chosen": 0.0, |
| "rewards/margins": 0.0, |
| "rewards/rejected": 0.0, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.026845637583892617, |
| "grad_norm": 320.21238167334917, |
| "learning_rate": 8.695652173913042e-08, |
| "logits/chosen": 1.515625, |
| "logits/rejected": 2.109375, |
| "logps/chosen": -115.5, |
| "logps/rejected": -178.0, |
| "loss": 0.6914, |
| "rewards/accuracies": 0.0, |
| "rewards/chosen": 0.0, |
| "rewards/margins": 0.0, |
| "rewards/rejected": 0.0, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.040268456375838924, |
| "grad_norm": 296.99175855416775, |
| "learning_rate": 1.3043478260869563e-07, |
| "logits/chosen": 1.2265625, |
| "logits/rejected": 0.8046875, |
| "logps/chosen": -180.0, |
| "logps/rejected": -130.0, |
| "loss": 0.71, |
| "rewards/accuracies": 0.25, |
| "rewards/chosen": -0.125, |
| "rewards/margins": -0.125, |
| "rewards/rejected": 0.0, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.053691275167785234, |
| "grad_norm": 307.2302419398515, |
| "learning_rate": 1.7391304347826085e-07, |
| "logits/chosen": 0.81640625, |
| "logits/rejected": 1.171875, |
| "logps/chosen": -137.0, |
| "logps/rejected": -193.0, |
| "loss": 0.7095, |
| "rewards/accuracies": 0.25, |
| "rewards/chosen": -0.125, |
| "rewards/margins": -0.25, |
| "rewards/rejected": 0.125, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.06711409395973154, |
| "grad_norm": 281.7519292418279, |
| "learning_rate": 2.1739130434782607e-07, |
| "logits/chosen": -0.146484375, |
| "logits/rejected": -0.07275390625, |
| "logps/chosen": -138.0, |
| "logps/rejected": -138.0, |
| "loss": 0.7065, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": 0.25, |
| "rewards/margins": 0.0625, |
| "rewards/rejected": 0.1875, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.08053691275167785, |
| "grad_norm": 334.9778297964166, |
| "learning_rate": 2.6086956521739126e-07, |
| "logits/chosen": 0.44140625, |
| "logits/rejected": 0.51953125, |
| "logps/chosen": -122.0, |
| "logps/rejected": -109.0, |
| "loss": 0.7487, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": 0.25, |
| "rewards/margins": 0.03125, |
| "rewards/rejected": 0.21875, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.09395973154362416, |
| "grad_norm": 310.7570200892832, |
| "learning_rate": 3.043478260869565e-07, |
| "logits/chosen": 0.1708984375, |
| "logits/rejected": 0.6953125, |
| "logps/chosen": -115.0, |
| "logps/rejected": -178.0, |
| "loss": 0.7487, |
| "rewards/accuracies": 0.25, |
| "rewards/chosen": -0.046875, |
| "rewards/margins": 0.015625, |
| "rewards/rejected": -0.0625, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.10738255033557047, |
| "grad_norm": 262.65343185083736, |
| "learning_rate": 3.478260869565217e-07, |
| "logits/chosen": 1.3203125, |
| "logits/rejected": 0.9296875, |
| "logps/chosen": -80.0, |
| "logps/rejected": -88.5, |
| "loss": 0.5881, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": 0.28125, |
| "rewards/margins": 0.3125, |
| "rewards/rejected": -0.03125, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.12080536912751678, |
| "grad_norm": 360.8428876216846, |
| "learning_rate": 3.9130434782608694e-07, |
| "logits/chosen": 1.359375, |
| "logits/rejected": 1.09375, |
| "logps/chosen": -125.0, |
| "logps/rejected": -127.0, |
| "loss": 0.8363, |
| "rewards/accuracies": 0.25, |
| "rewards/chosen": -0.125, |
| "rewards/margins": -0.28125, |
| "rewards/rejected": 0.15625, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.1342281879194631, |
| "grad_norm": 255.9449987869854, |
| "learning_rate": 4.3478260869565214e-07, |
| "logits/chosen": 1.1953125, |
| "logits/rejected": 1.203125, |
| "logps/chosen": -154.0, |
| "logps/rejected": -121.0, |
| "loss": 0.571, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": 0.3125, |
| "rewards/margins": 0.5, |
| "rewards/rejected": -0.1875, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.1476510067114094, |
| "grad_norm": 346.20438885823984, |
| "learning_rate": 4.782608695652174e-07, |
| "logits/chosen": 1.0, |
| "logits/rejected": 0.7890625, |
| "logps/chosen": -214.0, |
| "logps/rejected": -219.0, |
| "loss": 0.7852, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": 0.078125, |
| "rewards/margins": -0.359375, |
| "rewards/rejected": 0.4375, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.1610738255033557, |
| "grad_norm": 269.1956711192441, |
| "learning_rate": 5.217391304347825e-07, |
| "logits/chosen": 0.32421875, |
| "logits/rejected": 0.09716796875, |
| "logps/chosen": -167.0, |
| "logps/rejected": -137.0, |
| "loss": 0.637, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": 0.375, |
| "rewards/margins": 0.625, |
| "rewards/rejected": -0.25, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.174496644295302, |
| "grad_norm": 282.7338298830031, |
| "learning_rate": 5.652173913043477e-07, |
| "logits/chosen": 1.359375, |
| "logits/rejected": 1.015625, |
| "logps/chosen": -172.0, |
| "logps/rejected": -130.0, |
| "loss": 0.6496, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": 0.5625, |
| "rewards/margins": 0.4375, |
| "rewards/rejected": 0.125, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.18791946308724833, |
| "grad_norm": 309.4997861737965, |
| "learning_rate": 6.08695652173913e-07, |
| "logits/chosen": 0.04052734375, |
| "logits/rejected": 0.00390625, |
| "logps/chosen": -140.0, |
| "logps/rejected": -129.0, |
| "loss": 0.7821, |
| "rewards/accuracies": 0.0, |
| "rewards/chosen": 0.0625, |
| "rewards/margins": -0.15625, |
| "rewards/rejected": 0.21875, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.20134228187919462, |
| "grad_norm": 312.46545912340383, |
| "learning_rate": 6.521739130434782e-07, |
| "logits/chosen": 0.9453125, |
| "logits/rejected": 0.96484375, |
| "logps/chosen": -121.0, |
| "logps/rejected": -120.0, |
| "loss": 0.7118, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": -0.25, |
| "rewards/margins": 0.0625, |
| "rewards/rejected": -0.3125, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.21476510067114093, |
| "grad_norm": 303.0405196118898, |
| "learning_rate": 6.956521739130434e-07, |
| "logits/chosen": -0.03515625, |
| "logits/rejected": 1.171875, |
| "logps/chosen": -101.0, |
| "logps/rejected": -97.5, |
| "loss": 0.6458, |
| "rewards/accuracies": 0.25, |
| "rewards/chosen": 0.03125, |
| "rewards/margins": 0.03125, |
| "rewards/rejected": 0.0, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.22818791946308725, |
| "grad_norm": 298.356070519565, |
| "learning_rate": 7.391304347826086e-07, |
| "logits/chosen": 0.466796875, |
| "logits/rejected": 0.84375, |
| "logps/chosen": -100.5, |
| "logps/rejected": -136.0, |
| "loss": 0.6341, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": 0.125, |
| "rewards/margins": 0.125, |
| "rewards/rejected": 0.0, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.24161073825503357, |
| "grad_norm": 282.77957696018876, |
| "learning_rate": 7.826086956521739e-07, |
| "logits/chosen": 0.8046875, |
| "logits/rejected": 0.625, |
| "logps/chosen": -99.5, |
| "logps/rejected": -136.0, |
| "loss": 0.7319, |
| "rewards/accuracies": 0.25, |
| "rewards/chosen": -0.25, |
| "rewards/margins": -0.1875, |
| "rewards/rejected": -0.0625, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.2550335570469799, |
| "grad_norm": 258.05534960333887, |
| "learning_rate": 8.260869565217391e-07, |
| "logits/chosen": 1.59375, |
| "logits/rejected": 1.5703125, |
| "logps/chosen": -156.0, |
| "logps/rejected": -133.0, |
| "loss": 0.5654, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": -0.125, |
| "rewards/margins": 0.3125, |
| "rewards/rejected": -0.4375, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.2684563758389262, |
| "grad_norm": 333.8352033041929, |
| "learning_rate": 8.695652173913043e-07, |
| "logits/chosen": 1.046875, |
| "logits/rejected": 1.3125, |
| "logps/chosen": -129.0, |
| "logps/rejected": -136.0, |
| "loss": 0.7298, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": 0.125, |
| "rewards/margins": 0.1875, |
| "rewards/rejected": -0.0625, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.28187919463087246, |
| "grad_norm": 305.6684446174624, |
| "learning_rate": 9.130434782608695e-07, |
| "logits/chosen": 0.6328125, |
| "logits/rejected": 0.9375, |
| "logps/chosen": -117.0, |
| "logps/rejected": -180.0, |
| "loss": 0.7222, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": 0.125, |
| "rewards/margins": -0.125, |
| "rewards/rejected": 0.25, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.2953020134228188, |
| "grad_norm": 260.1289506856149, |
| "learning_rate": 9.565217391304349e-07, |
| "logits/chosen": 0.70703125, |
| "logits/rejected": 1.1328125, |
| "logps/chosen": -35.0, |
| "logps/rejected": -46.25, |
| "loss": 0.6589, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": 0.05859375, |
| "rewards/margins": 0.16796875, |
| "rewards/rejected": -0.109375, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.3087248322147651, |
| "grad_norm": 278.34995613754035, |
| "learning_rate": 1e-06, |
| "logits/chosen": 0.75390625, |
| "logits/rejected": 0.73046875, |
| "logps/chosen": -156.0, |
| "logps/rejected": -152.0, |
| "loss": 0.6354, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.40625, |
| "rewards/margins": 0.84375, |
| "rewards/rejected": -0.4375, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.3221476510067114, |
| "grad_norm": 233.06948826427978, |
| "learning_rate": 9.999376947588285e-07, |
| "logits/chosen": -0.04638671875, |
| "logits/rejected": 0.1689453125, |
| "logps/chosen": -92.0, |
| "logps/rejected": -136.0, |
| "loss": 0.5417, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": 0.0, |
| "rewards/margins": 0.4375, |
| "rewards/rejected": -0.4375, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.33557046979865773, |
| "grad_norm": 278.50973158832136, |
| "learning_rate": 9.99750794563087e-07, |
| "logits/chosen": 0.8046875, |
| "logits/rejected": 1.421875, |
| "logps/chosen": -127.5, |
| "logps/rejected": -162.0, |
| "loss": 0.6742, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": 0.0625, |
| "rewards/margins": 0.1875, |
| "rewards/rejected": -0.125, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.348993288590604, |
| "grad_norm": 269.19679796957723, |
| "learning_rate": 9.994393459922216e-07, |
| "logits/chosen": 0.578125, |
| "logits/rejected": 0.37890625, |
| "logps/chosen": -95.0, |
| "logps/rejected": -110.0, |
| "loss": 0.5146, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.1875, |
| "rewards/margins": 0.875, |
| "rewards/rejected": -0.6875, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.3624161073825503, |
| "grad_norm": 196.4517902957254, |
| "learning_rate": 9.990034266657467e-07, |
| "logits/chosen": 1.15625, |
| "logits/rejected": 1.09375, |
| "logps/chosen": -141.0, |
| "logps/rejected": -149.0, |
| "loss": 0.4653, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": -0.09375, |
| "rewards/margins": 0.5625, |
| "rewards/rejected": -0.65625, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.37583892617449666, |
| "grad_norm": 210.82144225997905, |
| "learning_rate": 9.984431452238966e-07, |
| "logits/chosen": 0.4453125, |
| "logits/rejected": 0.453125, |
| "logps/chosen": -112.0, |
| "logps/rejected": -136.0, |
| "loss": 0.4992, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": -0.125, |
| "rewards/margins": 0.1875, |
| "rewards/rejected": -0.3125, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.38926174496644295, |
| "grad_norm": 229.4387268433427, |
| "learning_rate": 9.97758641300553e-07, |
| "logits/chosen": 0.0400390625, |
| "logits/rejected": 0.234375, |
| "logps/chosen": -77.0, |
| "logps/rejected": -85.0, |
| "loss": 0.4958, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -0.15625, |
| "rewards/margins": 0.25, |
| "rewards/rejected": -0.40625, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.40268456375838924, |
| "grad_norm": 224.2234125086321, |
| "learning_rate": 9.96950085488444e-07, |
| "logits/chosen": 0.0966796875, |
| "logits/rejected": 0.12158203125, |
| "logps/chosen": -233.0, |
| "logps/rejected": -192.0, |
| "loss": 0.4652, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": 0.5, |
| "rewards/margins": 0.625, |
| "rewards/rejected": -0.125, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.4161073825503356, |
| "grad_norm": 218.79146408518088, |
| "learning_rate": 9.960176792966288e-07, |
| "logits/chosen": 0.376953125, |
| "logits/rejected": 0.76953125, |
| "logps/chosen": -151.0, |
| "logps/rejected": -186.0, |
| "loss": 0.4307, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": 0.78125, |
| "rewards/margins": 1.96875, |
| "rewards/rejected": -1.1875, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.42953020134228187, |
| "grad_norm": 234.76035113582137, |
| "learning_rate": 9.949616551002785e-07, |
| "logits/chosen": 1.484375, |
| "logits/rejected": 2.125, |
| "logps/chosen": -115.5, |
| "logps/rejected": -128.0, |
| "loss": 0.5723, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": -0.1875, |
| "rewards/margins": 0.6875, |
| "rewards/rejected": -0.875, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.4429530201342282, |
| "grad_norm": 265.78924153069073, |
| "learning_rate": 9.937822760827619e-07, |
| "logits/chosen": 1.34375, |
| "logits/rejected": 1.875, |
| "logps/chosen": -96.0, |
| "logps/rejected": -131.0, |
| "loss": 0.5628, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": -0.09375, |
| "rewards/margins": 0.125, |
| "rewards/rejected": -0.21875, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.4563758389261745, |
| "grad_norm": 188.15510343729397, |
| "learning_rate": 9.924798361700554e-07, |
| "logits/chosen": 1.390625, |
| "logits/rejected": 1.40625, |
| "logps/chosen": -160.0, |
| "logps/rejected": -155.0, |
| "loss": 0.4393, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": -0.3125, |
| "rewards/margins": 0.1875, |
| "rewards/rejected": -0.5, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.4697986577181208, |
| "grad_norm": 184.26566167062134, |
| "learning_rate": 9.910546599574902e-07, |
| "logits/chosen": 0.953125, |
| "logits/rejected": 0.95703125, |
| "logps/chosen": -85.0, |
| "logps/rejected": -122.5, |
| "loss": 0.4133, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": 0.171875, |
| "rewards/margins": 0.453125, |
| "rewards/rejected": -0.28125, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.48322147651006714, |
| "grad_norm": 175.4002282124261, |
| "learning_rate": 9.895071026288573e-07, |
| "logits/chosen": 0.00640869140625, |
| "logits/rejected": 0.4453125, |
| "logps/chosen": -152.0, |
| "logps/rejected": -196.0, |
| "loss": 0.2941, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.6875, |
| "rewards/margins": 2.4375, |
| "rewards/rejected": -1.75, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.4966442953020134, |
| "grad_norm": 224.60918154752224, |
| "learning_rate": 9.878375498678867e-07, |
| "logits/chosen": -0.1337890625, |
| "logits/rejected": 0.443359375, |
| "logps/chosen": -166.0, |
| "logps/rejected": -216.0, |
| "loss": 0.3926, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": 0.5, |
| "rewards/margins": 1.375, |
| "rewards/rejected": -0.875, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.5100671140939598, |
| "grad_norm": 246.2589461213588, |
| "learning_rate": 9.860464177621284e-07, |
| "logits/chosen": 0.578125, |
| "logits/rejected": 0.3203125, |
| "logps/chosen": -126.0, |
| "logps/rejected": -109.5, |
| "loss": 0.443, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": 0.09375, |
| "rewards/margins": 1.375, |
| "rewards/rejected": -1.28125, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.5234899328859061, |
| "grad_norm": 166.84341628834525, |
| "learning_rate": 9.841341526992535e-07, |
| "logits/chosen": 0.236328125, |
| "logits/rejected": 0.8203125, |
| "logps/chosen": -106.0, |
| "logps/rejected": -133.0, |
| "loss": 0.3483, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": -0.3125, |
| "rewards/margins": 0.5625, |
| "rewards/rejected": -0.875, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.5369127516778524, |
| "grad_norm": 200.39914415959888, |
| "learning_rate": 9.821012312558059e-07, |
| "logits/chosen": 0.96875, |
| "logits/rejected": 0.59765625, |
| "logps/chosen": -120.0, |
| "logps/rejected": -106.0, |
| "loss": 0.4129, |
| "rewards/accuracies": 0.25, |
| "rewards/chosen": 0.1875, |
| "rewards/margins": 0.34375, |
| "rewards/rejected": -0.15625, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.5503355704697986, |
| "grad_norm": 152.05567773094523, |
| "learning_rate": 9.799481600784286e-07, |
| "logits/chosen": 1.6875, |
| "logits/rejected": 2.09375, |
| "logps/chosen": -128.0, |
| "logps/rejected": -258.0, |
| "loss": 0.2243, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.375, |
| "rewards/margins": 3.0625, |
| "rewards/rejected": -2.6875, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.5637583892617449, |
| "grad_norm": 199.2294955589925, |
| "learning_rate": 9.776754757575973e-07, |
| "logits/chosen": 0.9609375, |
| "logits/rejected": 1.1796875, |
| "logps/chosen": -150.0, |
| "logps/rejected": -166.0, |
| "loss": 0.3205, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -0.140625, |
| "rewards/margins": 0.734375, |
| "rewards/rejected": -0.875, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.5771812080536913, |
| "grad_norm": 157.50000350180815, |
| "learning_rate": 9.752837446938914e-07, |
| "logits/chosen": 0.6640625, |
| "logits/rejected": 0.57421875, |
| "logps/chosen": -108.0, |
| "logps/rejected": -132.0, |
| "loss": 0.3333, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.125, |
| "rewards/margins": 0.8125, |
| "rewards/rejected": -0.9375, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.5906040268456376, |
| "grad_norm": 196.50929153843916, |
| "learning_rate": 9.727735629568335e-07, |
| "logits/chosen": 0.33203125, |
| "logits/rejected": 0.396484375, |
| "logps/chosen": -146.0, |
| "logps/rejected": -141.0, |
| "loss": 0.45, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": 0.8125, |
| "rewards/margins": 1.25, |
| "rewards/rejected": -0.4375, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.6040268456375839, |
| "grad_norm": 137.27969044382448, |
| "learning_rate": 9.701455561363377e-07, |
| "logits/chosen": 0.0810546875, |
| "logits/rejected": 0.1806640625, |
| "logps/chosen": -132.0, |
| "logps/rejected": -156.0, |
| "loss": 0.247, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 1.5625, |
| "rewards/margins": 2.6875, |
| "rewards/rejected": -1.125, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.6174496644295302, |
| "grad_norm": 133.9292244204905, |
| "learning_rate": 9.67400379186799e-07, |
| "logits/chosen": -0.921875, |
| "logits/rejected": -0.79296875, |
| "logps/chosen": -74.0, |
| "logps/rejected": -82.0, |
| "loss": 0.1977, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 1.515625, |
| "rewards/margins": 2.28125, |
| "rewards/rejected": -0.765625, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.6308724832214765, |
| "grad_norm": 189.9282156190453, |
| "learning_rate": 9.645387162638652e-07, |
| "logits/chosen": 0.6953125, |
| "logits/rejected": 0.8359375, |
| "logps/chosen": -178.0, |
| "logps/rejected": -160.0, |
| "loss": 0.2958, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.625, |
| "rewards/margins": 1.84375, |
| "rewards/rejected": -1.21875, |
| "step": 47 |
| }, |
| { |
| "epoch": 0.6442953020134228, |
| "grad_norm": 145.8536278840751, |
| "learning_rate": 9.615612805539303e-07, |
| "logits/chosen": 1.15625, |
| "logits/rejected": 1.8125, |
| "logps/chosen": -82.5, |
| "logps/rejected": -121.0, |
| "loss": 0.2335, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.5859375, |
| "rewards/margins": 1.6640625, |
| "rewards/rejected": -1.078125, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.6577181208053692, |
| "grad_norm": 168.2368559847673, |
| "learning_rate": 9.584688140963944e-07, |
| "logits/chosen": 0.029296875, |
| "logits/rejected": 0.6640625, |
| "logps/chosen": -152.0, |
| "logps/rejected": -176.0, |
| "loss": 0.2925, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.6875, |
| "rewards/margins": 5.0, |
| "rewards/rejected": -2.3125, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.6711409395973155, |
| "grad_norm": 90.26685617771182, |
| "learning_rate": 9.552620875987312e-07, |
| "logits/chosen": 0.7109375, |
| "logits/rejected": 1.0859375, |
| "logps/chosen": -155.0, |
| "logps/rejected": -189.0, |
| "loss": 0.1759, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": 2.8125, |
| "rewards/margins": 2.875, |
| "rewards/rejected": -0.0625, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.6845637583892618, |
| "grad_norm": 161.68678516400212, |
| "learning_rate": 9.519419002444118e-07, |
| "logits/chosen": 0.765625, |
| "logits/rejected": 1.5, |
| "logps/chosen": -105.0, |
| "logps/rejected": -186.0, |
| "loss": 0.2773, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.0625, |
| "rewards/margins": 3.25, |
| "rewards/rejected": -1.1875, |
| "step": 51 |
| }, |
| { |
| "epoch": 0.697986577181208, |
| "grad_norm": 116.30317809994341, |
| "learning_rate": 9.485090794937317e-07, |
| "logits/chosen": 0.369140625, |
| "logits/rejected": 1.015625, |
| "logps/chosen": -147.0, |
| "logps/rejected": -158.0, |
| "loss": 0.1838, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.4375, |
| "rewards/margins": 4.875, |
| "rewards/rejected": -1.4375, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.7114093959731543, |
| "grad_norm": 115.5453240211095, |
| "learning_rate": 9.4496448087759e-07, |
| "logits/chosen": 1.0859375, |
| "logits/rejected": 1.4296875, |
| "logps/chosen": -158.0, |
| "logps/rejected": -206.0, |
| "loss": 0.1532, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.3125, |
| "rewards/margins": 4.4375, |
| "rewards/rejected": -4.125, |
| "step": 53 |
| }, |
| { |
| "epoch": 0.7248322147651006, |
| "grad_norm": 151.00137937073285, |
| "learning_rate": 9.413089877842735e-07, |
| "logits/chosen": 0.8984375, |
| "logits/rejected": 0.67578125, |
| "logps/chosen": -156.0, |
| "logps/rejected": -166.0, |
| "loss": 0.2604, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.5, |
| "rewards/margins": 1.9375, |
| "rewards/rejected": -2.4375, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.738255033557047, |
| "grad_norm": 121.8761212766047, |
| "learning_rate": 9.375435112392969e-07, |
| "logits/chosen": -0.58984375, |
| "logits/rejected": -0.1171875, |
| "logps/chosen": -123.0, |
| "logps/rejected": -236.0, |
| "loss": 0.1429, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 1.25, |
| "rewards/margins": 7.5, |
| "rewards/rejected": -6.25, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.7516778523489933, |
| "grad_norm": 189.03161073826737, |
| "learning_rate": 9.336689896783572e-07, |
| "logits/chosen": -0.6328125, |
| "logits/rejected": -0.7109375, |
| "logps/chosen": -197.0, |
| "logps/rejected": -187.0, |
| "loss": 0.2165, |
| "rewards/accuracies": 0.25, |
| "rewards/chosen": 2.25, |
| "rewards/margins": 2.71875, |
| "rewards/rejected": -0.46875, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.7651006711409396, |
| "grad_norm": 107.72885924741587, |
| "learning_rate": 9.29686388713456e-07, |
| "logits/chosen": -0.244140625, |
| "logits/rejected": 0.26171875, |
| "logps/chosen": -116.0, |
| "logps/rejected": -168.0, |
| "loss": 0.1597, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": 1.5, |
| "rewards/margins": 3.125, |
| "rewards/rejected": -1.625, |
| "step": 57 |
| }, |
| { |
| "epoch": 0.7785234899328859, |
| "grad_norm": 80.02513282586197, |
| "learning_rate": 9.255967008922473e-07, |
| "logits/chosen": 0.025390625, |
| "logits/rejected": -0.0322265625, |
| "logps/chosen": -260.0, |
| "logps/rejected": -272.0, |
| "loss": 0.1172, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 6.5, |
| "rewards/margins": 11.875, |
| "rewards/rejected": -5.375, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.7919463087248322, |
| "grad_norm": 61.82855219508814, |
| "learning_rate": 9.214009454506752e-07, |
| "logits/chosen": 0.359375, |
| "logits/rejected": 0.55078125, |
| "logps/chosen": -134.0, |
| "logps/rejected": -162.0, |
| "loss": 0.0776, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.4375, |
| "rewards/margins": 5.875, |
| "rewards/rejected": -2.4375, |
| "step": 59 |
| }, |
| { |
| "epoch": 0.8053691275167785, |
| "grad_norm": 87.5310679148203, |
| "learning_rate": 9.171001680589587e-07, |
| "logits/chosen": 1.515625, |
| "logits/rejected": 1.359375, |
| "logps/chosen": -144.0, |
| "logps/rejected": -170.0, |
| "loss": 0.086, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.8125, |
| "rewards/margins": 6.0625, |
| "rewards/rejected": -5.25, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.8187919463087249, |
| "grad_norm": 94.98485933507177, |
| "learning_rate": 9.126954405609882e-07, |
| "logits/chosen": -1.328125, |
| "logits/rejected": -1.6640625, |
| "logps/chosen": -115.0, |
| "logps/rejected": -89.0, |
| "loss": 0.1292, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.59375, |
| "rewards/margins": 4.6875, |
| "rewards/rejected": -5.3125, |
| "step": 61 |
| }, |
| { |
| "epoch": 0.8322147651006712, |
| "grad_norm": 84.08521076849105, |
| "learning_rate": 9.081878607071995e-07, |
| "logits/chosen": 0.8828125, |
| "logits/rejected": 1.6171875, |
| "logps/chosen": -87.0, |
| "logps/rejected": -130.0, |
| "loss": 0.108, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.3125, |
| "rewards/margins": 3.0625, |
| "rewards/rejected": -2.75, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.8456375838926175, |
| "grad_norm": 71.02125606408353, |
| "learning_rate": 9.035785518809926e-07, |
| "logits/chosen": 0.154296875, |
| "logits/rejected": 0.578125, |
| "logps/chosen": -118.0, |
| "logps/rejected": -149.0, |
| "loss": 0.078, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.21875, |
| "rewards/margins": 2.40625, |
| "rewards/rejected": -2.1875, |
| "step": 63 |
| }, |
| { |
| "epoch": 0.8590604026845637, |
| "grad_norm": 94.51081771355827, |
| "learning_rate": 8.988686628187596e-07, |
| "logits/chosen": -0.37890625, |
| "logits/rejected": 0.072265625, |
| "logps/chosen": -98.0, |
| "logps/rejected": -118.5, |
| "loss": 0.0861, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": 0.5, |
| "rewards/margins": 3.09375, |
| "rewards/rejected": -2.59375, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.87248322147651, |
| "grad_norm": 166.0543842640925, |
| "learning_rate": 8.940593673235961e-07, |
| "logits/chosen": 1.1796875, |
| "logits/rejected": 1.28125, |
| "logps/chosen": -61.0, |
| "logps/rejected": -74.0, |
| "loss": 0.2046, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.90625, |
| "rewards/margins": 2.15625, |
| "rewards/rejected": -3.0625, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.8859060402684564, |
| "grad_norm": 55.221935410851465, |
| "learning_rate": 8.891518639727649e-07, |
| "logits/chosen": 0.2421875, |
| "logits/rejected": 0.1103515625, |
| "logps/chosen": -237.0, |
| "logps/rejected": -234.0, |
| "loss": 0.0529, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 4.5625, |
| "rewards/margins": 7.0, |
| "rewards/rejected": -2.4375, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.8993288590604027, |
| "grad_norm": 136.84606095636403, |
| "learning_rate": 8.841473758189852e-07, |
| "logits/chosen": 1.078125, |
| "logits/rejected": 1.3828125, |
| "logps/chosen": -96.0, |
| "logps/rejected": -127.5, |
| "loss": 0.0831, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.125, |
| "rewards/margins": 5.09375, |
| "rewards/rejected": -2.96875, |
| "step": 67 |
| }, |
| { |
| "epoch": 0.912751677852349, |
| "grad_norm": 72.8783809894428, |
| "learning_rate": 8.790471500856227e-07, |
| "logits/chosen": -0.30078125, |
| "logits/rejected": 0.2578125, |
| "logps/chosen": -106.0, |
| "logps/rejected": -152.0, |
| "loss": 0.0813, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.625, |
| "rewards/margins": 6.0625, |
| "rewards/rejected": -2.4375, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.9261744966442953, |
| "grad_norm": 149.40438009877843, |
| "learning_rate": 8.738524578558546e-07, |
| "logits/chosen": 0.30859375, |
| "logits/rejected": 0.8359375, |
| "logps/chosen": -118.5, |
| "logps/rejected": -167.0, |
| "loss": 0.1764, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": 2.875, |
| "rewards/margins": 9.8125, |
| "rewards/rejected": -6.9375, |
| "step": 69 |
| }, |
| { |
| "epoch": 0.9395973154362416, |
| "grad_norm": 110.9947650787701, |
| "learning_rate": 8.685645937558894e-07, |
| "logits/chosen": 1.5, |
| "logits/rejected": 2.1875, |
| "logps/chosen": -200.0, |
| "logps/rejected": -210.0, |
| "loss": 0.1196, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 1.25, |
| "rewards/margins": 5.375, |
| "rewards/rejected": -4.125, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.9530201342281879, |
| "grad_norm": 67.29974977847421, |
| "learning_rate": 8.631848756323197e-07, |
| "logits/chosen": -0.119140625, |
| "logits/rejected": 0.61328125, |
| "logps/chosen": -83.0, |
| "logps/rejected": -134.0, |
| "loss": 0.0889, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.34375, |
| "rewards/margins": 4.09375, |
| "rewards/rejected": -4.4375, |
| "step": 71 |
| }, |
| { |
| "epoch": 0.9664429530201343, |
| "grad_norm": 67.29089440200656, |
| "learning_rate": 8.577146442236856e-07, |
| "logits/chosen": 0.12109375, |
| "logits/rejected": 0.20703125, |
| "logps/chosen": -99.0, |
| "logps/rejected": -122.0, |
| "loss": 0.0882, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 1.71875, |
| "rewards/margins": 3.59375, |
| "rewards/rejected": -1.875, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.9798657718120806, |
| "grad_norm": 30.621291076198204, |
| "learning_rate": 8.521552628263361e-07, |
| "logits/chosen": 1.1015625, |
| "logits/rejected": 1.0625, |
| "logps/chosen": -121.0, |
| "logps/rejected": -166.0, |
| "loss": 0.039, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 1.5, |
| "rewards/margins": 4.0, |
| "rewards/rejected": -2.5, |
| "step": 73 |
| }, |
| { |
| "epoch": 0.9932885906040269, |
| "grad_norm": 188.921461152021, |
| "learning_rate": 8.465081169546658e-07, |
| "logits/chosen": 0.921875, |
| "logits/rejected": 0.9609375, |
| "logps/chosen": -87.5, |
| "logps/rejected": -100.5, |
| "loss": 0.0981, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 1.8125, |
| "rewards/margins": 4.59375, |
| "rewards/rejected": -2.78125, |
| "step": 74 |
| }, |
| { |
| "epoch": 1.0067114093959733, |
| "grad_norm": 16.342940458429116, |
| "learning_rate": 8.407746139958168e-07, |
| "logits/chosen": 0.71484375, |
| "logits/rejected": 1.2109375, |
| "logps/chosen": -98.0, |
| "logps/rejected": -92.0, |
| "loss": 0.0157, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 1.03125, |
| "rewards/margins": 4.8125, |
| "rewards/rejected": -3.78125, |
| "step": 75 |
| }, |
| { |
| "epoch": 1.0201342281879195, |
| "grad_norm": 7.695753089891094, |
| "learning_rate": 8.349561828589275e-07, |
| "logits/chosen": 0.7421875, |
| "logits/rejected": 0.96484375, |
| "logps/chosen": -114.5, |
| "logps/rejected": -143.0, |
| "loss": 0.0086, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.03125, |
| "rewards/margins": 7.40625, |
| "rewards/rejected": -5.375, |
| "step": 76 |
| }, |
| { |
| "epoch": 1.0335570469798658, |
| "grad_norm": 3.97920779662753, |
| "learning_rate": 8.290542736190188e-07, |
| "logits/chosen": 0.044921875, |
| "logits/rejected": -0.158203125, |
| "logps/chosen": -89.0, |
| "logps/rejected": -124.0, |
| "loss": 0.0043, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 5.53125, |
| "rewards/margins": 10.125, |
| "rewards/rejected": -4.5625, |
| "step": 77 |
| }, |
| { |
| "epoch": 1.0469798657718121, |
| "grad_norm": 5.178156118093246, |
| "learning_rate": 8.230703571556048e-07, |
| "logits/chosen": 0.578125, |
| "logits/rejected": -0.1552734375, |
| "logps/chosen": -136.0, |
| "logps/rejected": -185.0, |
| "loss": 0.0056, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.0625, |
| "rewards/margins": 8.875, |
| "rewards/rejected": -6.8125, |
| "step": 78 |
| }, |
| { |
| "epoch": 1.0604026845637584, |
| "grad_norm": 5.363603356053278, |
| "learning_rate": 8.170059247861193e-07, |
| "logits/chosen": 0.44140625, |
| "logits/rejected": 0.53515625, |
| "logps/chosen": -108.0, |
| "logps/rejected": -127.5, |
| "loss": 0.0055, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.125, |
| "rewards/margins": 8.5, |
| "rewards/rejected": -5.375, |
| "step": 79 |
| }, |
| { |
| "epoch": 1.0738255033557047, |
| "grad_norm": 1.1742657635295901, |
| "learning_rate": 8.108624878942476e-07, |
| "logits/chosen": 1.1875, |
| "logits/rejected": 1.4765625, |
| "logps/chosen": -81.5, |
| "logps/rejected": -118.5, |
| "loss": 0.001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.0625, |
| "rewards/margins": 8.875, |
| "rewards/rejected": -6.8125, |
| "step": 80 |
| }, |
| { |
| "epoch": 1.087248322147651, |
| "grad_norm": 2.2188166194810006, |
| "learning_rate": 8.046415775532584e-07, |
| "logits/chosen": -0.21875, |
| "logits/rejected": -0.111328125, |
| "logps/chosen": -105.0, |
| "logps/rejected": -140.0, |
| "loss": 0.0026, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.515625, |
| "rewards/margins": 10.25, |
| "rewards/rejected": -6.6875, |
| "step": 81 |
| }, |
| { |
| "epoch": 1.1006711409395973, |
| "grad_norm": 18.55303933040324, |
| "learning_rate": 7.98344744144428e-07, |
| "logits/chosen": 0.318359375, |
| "logits/rejected": -0.24609375, |
| "logps/chosen": -156.0, |
| "logps/rejected": -145.0, |
| "loss": 0.0161, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 5.125, |
| "rewards/margins": 9.8125, |
| "rewards/rejected": -4.6875, |
| "step": 82 |
| }, |
| { |
| "epoch": 1.1140939597315436, |
| "grad_norm": 2.5907801972535665, |
| "learning_rate": 7.919735569706532e-07, |
| "logits/chosen": -0.443359375, |
| "logits/rejected": 0.02734375, |
| "logps/chosen": -93.5, |
| "logps/rejected": -132.0, |
| "loss": 0.0028, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 9.375, |
| "rewards/margins": 21.125, |
| "rewards/rejected": -11.8125, |
| "step": 83 |
| }, |
| { |
| "epoch": 1.1275167785234899, |
| "grad_norm": 1.474856541528767, |
| "learning_rate": 7.855296038653473e-07, |
| "logits/chosen": 0.3515625, |
| "logits/rejected": 0.703125, |
| "logps/chosen": -111.0, |
| "logps/rejected": -143.0, |
| "loss": 0.0012, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 4.0, |
| "rewards/margins": 11.0, |
| "rewards/rejected": -7.03125, |
| "step": 84 |
| }, |
| { |
| "epoch": 1.1409395973154361, |
| "grad_norm": 1.252650574794432, |
| "learning_rate": 7.7901449079672e-07, |
| "logits/chosen": 0.37890625, |
| "logits/rejected": 0.71875, |
| "logps/chosen": -101.0, |
| "logps/rejected": -126.0, |
| "loss": 0.0014, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 1.75, |
| "rewards/margins": 9.25, |
| "rewards/rejected": -7.53125, |
| "step": 85 |
| }, |
| { |
| "epoch": 1.1543624161073827, |
| "grad_norm": 4.532807290474358, |
| "learning_rate": 7.724298414675352e-07, |
| "logits/chosen": 0.90234375, |
| "logits/rejected": 1.6328125, |
| "logps/chosen": -154.0, |
| "logps/rejected": -164.0, |
| "loss": 0.0029, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 4.1875, |
| "rewards/margins": 9.75, |
| "rewards/rejected": -5.5625, |
| "step": 86 |
| }, |
| { |
| "epoch": 1.167785234899329, |
| "grad_norm": 12.257533233326445, |
| "learning_rate": 7.657772969104507e-07, |
| "logits/chosen": -0.025390625, |
| "logits/rejected": 0.126953125, |
| "logps/chosen": -241.0, |
| "logps/rejected": -282.0, |
| "loss": 0.0062, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.125, |
| "rewards/margins": 16.25, |
| "rewards/rejected": -13.125, |
| "step": 87 |
| }, |
| { |
| "epoch": 1.1812080536912752, |
| "grad_norm": 3.2594066581738734, |
| "learning_rate": 7.590585150790387e-07, |
| "logits/chosen": -0.06689453125, |
| "logits/rejected": 0.0859375, |
| "logps/chosen": -128.0, |
| "logps/rejected": -126.5, |
| "loss": 0.0024, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.96875, |
| "rewards/margins": 11.125, |
| "rewards/rejected": -8.125, |
| "step": 88 |
| }, |
| { |
| "epoch": 1.1946308724832215, |
| "grad_norm": 1.689678600897324, |
| "learning_rate": 7.522751704345887e-07, |
| "logits/chosen": 1.1328125, |
| "logits/rejected": 0.359375, |
| "logps/chosen": -117.0, |
| "logps/rejected": -112.0, |
| "loss": 0.0013, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 4.6875, |
| "rewards/margins": 11.8125, |
| "rewards/rejected": -7.15625, |
| "step": 89 |
| }, |
| { |
| "epoch": 1.2080536912751678, |
| "grad_norm": 12.90463794364264, |
| "learning_rate": 7.454289535287967e-07, |
| "logits/chosen": 0.032958984375, |
| "logits/rejected": 0.7265625, |
| "logps/chosen": -130.0, |
| "logps/rejected": -206.0, |
| "loss": 0.0091, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 4.90625, |
| "rewards/margins": 11.5, |
| "rewards/rejected": -6.5625, |
| "step": 90 |
| }, |
| { |
| "epoch": 1.221476510067114, |
| "grad_norm": 0.7368569495398003, |
| "learning_rate": 7.385215705824448e-07, |
| "logits/chosen": 1.0078125, |
| "logits/rejected": 1.3359375, |
| "logps/chosen": -103.0, |
| "logps/rejected": -175.0, |
| "loss": 0.0009, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.78125, |
| "rewards/margins": 8.625, |
| "rewards/rejected": -7.875, |
| "step": 91 |
| }, |
| { |
| "epoch": 1.2348993288590604, |
| "grad_norm": 10.607931466543848, |
| "learning_rate": 7.315547430601738e-07, |
| "logits/chosen": 1.09375, |
| "logits/rejected": 1.7734375, |
| "logps/chosen": -147.0, |
| "logps/rejected": -205.0, |
| "loss": 0.007, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 1.5, |
| "rewards/margins": 6.625, |
| "rewards/rejected": -5.125, |
| "step": 92 |
| }, |
| { |
| "epoch": 1.2483221476510067, |
| "grad_norm": 4.33218010786588, |
| "learning_rate": 7.245302072414601e-07, |
| "logits/chosen": 0.44921875, |
| "logits/rejected": 0.7734375, |
| "logps/chosen": -192.0, |
| "logps/rejected": -202.0, |
| "loss": 0.0041, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.21875, |
| "rewards/margins": 13.125, |
| "rewards/rejected": -9.9375, |
| "step": 93 |
| }, |
| { |
| "epoch": 1.261744966442953, |
| "grad_norm": 2.9637993733005548, |
| "learning_rate": 7.174497137878965e-07, |
| "logits/chosen": 0.5703125, |
| "logits/rejected": 0.431640625, |
| "logps/chosen": -160.0, |
| "logps/rejected": -161.0, |
| "loss": 0.002, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.1875, |
| "rewards/margins": 8.625, |
| "rewards/rejected": -8.8125, |
| "step": 94 |
| }, |
| { |
| "epoch": 1.2751677852348993, |
| "grad_norm": 19.555003982399544, |
| "learning_rate": 7.103150273068921e-07, |
| "logits/chosen": -1.0, |
| "logits/rejected": -0.3046875, |
| "logps/chosen": -92.5, |
| "logps/rejected": -170.0, |
| "loss": 0.0109, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.53125, |
| "rewards/margins": 11.1875, |
| "rewards/rejected": -11.75, |
| "step": 95 |
| }, |
| { |
| "epoch": 1.2885906040268456, |
| "grad_norm": 5.018942211058499, |
| "learning_rate": 7.031279259118946e-07, |
| "logits/chosen": -1.0078125, |
| "logits/rejected": -1.40625, |
| "logps/chosen": -128.0, |
| "logps/rejected": -155.0, |
| "loss": 0.0025, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -1.375, |
| "rewards/margins": 18.0, |
| "rewards/rejected": -19.5, |
| "step": 96 |
| }, |
| { |
| "epoch": 1.302013422818792, |
| "grad_norm": 13.668642092116187, |
| "learning_rate": 6.958902007792465e-07, |
| "logits/chosen": 1.4609375, |
| "logits/rejected": 1.015625, |
| "logps/chosen": -140.0, |
| "logps/rejected": -156.0, |
| "loss": 0.0087, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.6875, |
| "rewards/margins": 9.5, |
| "rewards/rejected": -6.8125, |
| "step": 97 |
| }, |
| { |
| "epoch": 1.3154362416107381, |
| "grad_norm": 7.957898712794316, |
| "learning_rate": 6.886036557017881e-07, |
| "logits/chosen": 0.50390625, |
| "logits/rejected": 1.0, |
| "logps/chosen": -142.0, |
| "logps/rejected": -182.0, |
| "loss": 0.0065, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 1.6875, |
| "rewards/margins": 9.375, |
| "rewards/rejected": -7.6875, |
| "step": 98 |
| }, |
| { |
| "epoch": 1.3288590604026846, |
| "grad_norm": 35.374516908422066, |
| "learning_rate": 6.812701066393123e-07, |
| "logits/chosen": -0.64453125, |
| "logits/rejected": 0.169921875, |
| "logps/chosen": -109.5, |
| "logps/rejected": -128.0, |
| "loss": 0.03, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.375, |
| "rewards/margins": 12.25, |
| "rewards/rejected": -9.875, |
| "step": 99 |
| }, |
| { |
| "epoch": 1.342281879194631, |
| "grad_norm": 8.489290840205843, |
| "learning_rate": 6.738913812659912e-07, |
| "logits/chosen": -0.1982421875, |
| "logits/rejected": -0.29296875, |
| "logps/chosen": -157.0, |
| "logps/rejected": -186.0, |
| "loss": 0.005, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.484375, |
| "rewards/margins": 9.5625, |
| "rewards/rejected": -7.09375, |
| "step": 100 |
| }, |
| { |
| "epoch": 1.3557046979865772, |
| "grad_norm": 1.273570981721026, |
| "learning_rate": 6.664693185148806e-07, |
| "logits/chosen": -0.9375, |
| "logits/rejected": -0.6171875, |
| "logps/chosen": -71.0, |
| "logps/rejected": -99.0, |
| "loss": 0.0014, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -3.328125, |
| "rewards/margins": 11.1875, |
| "rewards/rejected": -14.5, |
| "step": 101 |
| }, |
| { |
| "epoch": 1.3691275167785235, |
| "grad_norm": 12.691638946077195, |
| "learning_rate": 6.590057681196191e-07, |
| "logits/chosen": -0.59765625, |
| "logits/rejected": -0.0966796875, |
| "logps/chosen": -148.0, |
| "logps/rejected": -167.0, |
| "loss": 0.0087, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.9375, |
| "rewards/margins": 13.8125, |
| "rewards/rejected": -9.875, |
| "step": 102 |
| }, |
| { |
| "epoch": 1.3825503355704698, |
| "grad_norm": 1.194548914112796, |
| "learning_rate": 6.515025901534363e-07, |
| "logits/chosen": -0.6953125, |
| "logits/rejected": -0.671875, |
| "logps/chosen": -89.0, |
| "logps/rejected": -136.0, |
| "loss": 0.001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 4.46875, |
| "rewards/margins": 15.375, |
| "rewards/rejected": -10.875, |
| "step": 103 |
| }, |
| { |
| "epoch": 1.395973154362416, |
| "grad_norm": 4.105125773983479, |
| "learning_rate": 6.439616545655833e-07, |
| "logits/chosen": -0.5625, |
| "logits/rejected": -0.078125, |
| "logps/chosen": -100.5, |
| "logps/rejected": -170.0, |
| "loss": 0.0028, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.3125, |
| "rewards/margins": 18.75, |
| "rewards/rejected": -16.5, |
| "step": 104 |
| }, |
| { |
| "epoch": 1.4093959731543624, |
| "grad_norm": 4.804881345080172, |
| "learning_rate": 6.363848407153017e-07, |
| "logits/chosen": 0.5234375, |
| "logits/rejected": 1.0546875, |
| "logps/chosen": -115.0, |
| "logps/rejected": -118.0, |
| "loss": 0.0029, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.5625, |
| "rewards/margins": 9.625, |
| "rewards/rejected": -6.0625, |
| "step": 105 |
| }, |
| { |
| "epoch": 1.4228187919463087, |
| "grad_norm": 0.9972861709658724, |
| "learning_rate": 6.287740369034485e-07, |
| "logits/chosen": -0.095703125, |
| "logits/rejected": 0.15625, |
| "logps/chosen": -117.5, |
| "logps/rejected": -174.0, |
| "loss": 0.0007, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.5, |
| "rewards/margins": 19.25, |
| "rewards/rejected": -15.625, |
| "step": 106 |
| }, |
| { |
| "epoch": 1.436241610738255, |
| "grad_norm": 7.783849954903146, |
| "learning_rate": 6.211311399018916e-07, |
| "logits/chosen": -0.8515625, |
| "logits/rejected": -0.51953125, |
| "logps/chosen": -142.0, |
| "logps/rejected": -186.0, |
| "loss": 0.005, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 4.9375, |
| "rewards/margins": 15.25, |
| "rewards/rejected": -10.375, |
| "step": 107 |
| }, |
| { |
| "epoch": 1.4496644295302015, |
| "grad_norm": 0.2393104816166644, |
| "learning_rate": 6.13458054480795e-07, |
| "logits/chosen": -0.3828125, |
| "logits/rejected": -0.267578125, |
| "logps/chosen": -160.0, |
| "logps/rejected": -184.0, |
| "loss": 0.0003, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 4.5625, |
| "rewards/margins": 15.5, |
| "rewards/rejected": -10.9375, |
| "step": 108 |
| }, |
| { |
| "epoch": 1.4630872483221475, |
| "grad_norm": 0.84206416329658, |
| "learning_rate": 6.057566929339095e-07, |
| "logits/chosen": -0.17578125, |
| "logits/rejected": -0.24609375, |
| "logps/chosen": -106.5, |
| "logps/rejected": -150.0, |
| "loss": 0.0008, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 1.59375, |
| "rewards/margins": 9.25, |
| "rewards/rejected": -7.625, |
| "step": 109 |
| }, |
| { |
| "epoch": 1.476510067114094, |
| "grad_norm": 1.2327719606392833, |
| "learning_rate": 5.980289746019891e-07, |
| "logits/chosen": -0.98046875, |
| "logits/rejected": -1.2109375, |
| "logps/chosen": -100.0, |
| "logps/rejected": -113.0, |
| "loss": 0.0007, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 11.0625, |
| "rewards/margins": 20.375, |
| "rewards/rejected": -9.375, |
| "step": 110 |
| }, |
| { |
| "epoch": 1.4899328859060403, |
| "grad_norm": 0.14191543825131345, |
| "learning_rate": 5.902768253944511e-07, |
| "logits/chosen": -0.380859375, |
| "logits/rejected": 0.0791015625, |
| "logps/chosen": -144.0, |
| "logps/rejected": -154.0, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 5.125, |
| "rewards/margins": 9.625, |
| "rewards/rejected": -4.53125, |
| "step": 111 |
| }, |
| { |
| "epoch": 1.5033557046979866, |
| "grad_norm": 2.434514405857578, |
| "learning_rate": 5.825021773093996e-07, |
| "logits/chosen": -0.734375, |
| "logits/rejected": 0.03515625, |
| "logps/chosen": -80.5, |
| "logps/rejected": -246.0, |
| "loss": 0.0017, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.5625, |
| "rewards/margins": 16.5, |
| "rewards/rejected": -12.875, |
| "step": 112 |
| }, |
| { |
| "epoch": 1.516778523489933, |
| "grad_norm": 0.7697614239216926, |
| "learning_rate": 5.747069679521305e-07, |
| "logits/chosen": 1.453125, |
| "logits/rejected": 1.296875, |
| "logps/chosen": -130.0, |
| "logps/rejected": -166.0, |
| "loss": 0.0008, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.53125, |
| "rewards/margins": 11.5, |
| "rewards/rejected": -8.9375, |
| "step": 113 |
| }, |
| { |
| "epoch": 1.5302013422818792, |
| "grad_norm": 15.870268453862796, |
| "learning_rate": 5.668931400522395e-07, |
| "logits/chosen": -0.03515625, |
| "logits/rejected": -0.65625, |
| "logps/chosen": -148.0, |
| "logps/rejected": -158.0, |
| "loss": 0.0084, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 7.8125, |
| "rewards/margins": 23.625, |
| "rewards/rejected": -15.875, |
| "step": 114 |
| }, |
| { |
| "epoch": 1.5436241610738255, |
| "grad_norm": 0.7970654918372324, |
| "learning_rate": 5.59062640979454e-07, |
| "logits/chosen": -0.35546875, |
| "logits/rejected": 0.099609375, |
| "logps/chosen": -138.0, |
| "logps/rejected": -167.0, |
| "loss": 0.0006, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.0625, |
| "rewards/margins": 13.1875, |
| "rewards/rejected": -11.125, |
| "step": 115 |
| }, |
| { |
| "epoch": 1.5570469798657718, |
| "grad_norm": 1.3955582466387715, |
| "learning_rate": 5.512174222583066e-07, |
| "logits/chosen": 0.58203125, |
| "logits/rejected": -0.02392578125, |
| "logps/chosen": -235.0, |
| "logps/rejected": -127.0, |
| "loss": 0.0013, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 5.5, |
| "rewards/margins": 14.875, |
| "rewards/rejected": -9.375, |
| "step": 116 |
| }, |
| { |
| "epoch": 1.570469798657718, |
| "grad_norm": 1.3089153115059224, |
| "learning_rate": 5.433594390817755e-07, |
| "logits/chosen": 0.765625, |
| "logits/rejected": 1.578125, |
| "logps/chosen": -156.0, |
| "logps/rejected": -206.0, |
| "loss": 0.0009, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.5, |
| "rewards/margins": 12.625, |
| "rewards/rejected": -9.125, |
| "step": 117 |
| }, |
| { |
| "epoch": 1.5838926174496644, |
| "grad_norm": 5.1209862474975605, |
| "learning_rate": 5.354906498240079e-07, |
| "logits/chosen": 1.4609375, |
| "logits/rejected": 1.0234375, |
| "logps/chosen": -98.5, |
| "logps/rejected": -98.0, |
| "loss": 0.0032, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.125, |
| "rewards/margins": 9.0, |
| "rewards/rejected": -6.90625, |
| "step": 118 |
| }, |
| { |
| "epoch": 1.5973154362416109, |
| "grad_norm": 2.0143010331470426, |
| "learning_rate": 5.27613015552254e-07, |
| "logits/chosen": -0.2451171875, |
| "logits/rejected": -0.361328125, |
| "logps/chosen": -111.0, |
| "logps/rejected": -110.0, |
| "loss": 0.0013, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 4.03125, |
| "rewards/margins": 11.75, |
| "rewards/rejected": -7.6875, |
| "step": 119 |
| }, |
| { |
| "epoch": 1.610738255033557, |
| "grad_norm": 0.8804392446661593, |
| "learning_rate": 5.197284995381264e-07, |
| "logits/chosen": 0.0245361328125, |
| "logits/rejected": 0.43359375, |
| "logps/chosen": -176.0, |
| "logps/rejected": -210.0, |
| "loss": 0.0008, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 7.0, |
| "rewards/margins": 19.625, |
| "rewards/rejected": -12.625, |
| "step": 120 |
| }, |
| { |
| "epoch": 1.6241610738255035, |
| "grad_norm": 1.117425903818996, |
| "learning_rate": 5.118390667683119e-07, |
| "logits/chosen": -0.3515625, |
| "logits/rejected": -0.17578125, |
| "logps/chosen": -146.0, |
| "logps/rejected": -192.0, |
| "loss": 0.001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 5.90625, |
| "rewards/margins": 23.125, |
| "rewards/rejected": -17.25, |
| "step": 121 |
| }, |
| { |
| "epoch": 1.6375838926174495, |
| "grad_norm": 1.1187984471032872, |
| "learning_rate": 5.039466834548567e-07, |
| "logits/chosen": -0.87109375, |
| "logits/rejected": -0.5625, |
| "logps/chosen": -105.0, |
| "logps/rejected": -149.0, |
| "loss": 0.0005, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 5.375, |
| "rewards/margins": 13.25, |
| "rewards/rejected": -7.875, |
| "step": 122 |
| }, |
| { |
| "epoch": 1.651006711409396, |
| "grad_norm": 0.33656915007850763, |
| "learning_rate": 4.960533165451435e-07, |
| "logits/chosen": -0.25, |
| "logits/rejected": -0.0830078125, |
| "logps/chosen": -128.0, |
| "logps/rejected": -144.0, |
| "loss": 0.0003, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.9375, |
| "rewards/margins": 12.3125, |
| "rewards/rejected": -8.375, |
| "step": 123 |
| }, |
| { |
| "epoch": 1.6644295302013423, |
| "grad_norm": 0.5334984974014568, |
| "learning_rate": 4.881609332316881e-07, |
| "logits/chosen": 0.369140625, |
| "logits/rejected": 0.54296875, |
| "logps/chosen": -152.0, |
| "logps/rejected": -182.0, |
| "loss": 0.0002, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.71875, |
| "rewards/margins": 13.375, |
| "rewards/rejected": -10.625, |
| "step": 124 |
| }, |
| { |
| "epoch": 1.6778523489932886, |
| "grad_norm": 7.835269891896578, |
| "learning_rate": 4.802715004618737e-07, |
| "logits/chosen": -0.2265625, |
| "logits/rejected": 0.44921875, |
| "logps/chosen": -72.5, |
| "logps/rejected": -150.0, |
| "loss": 0.0044, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 1.7109375, |
| "rewards/margins": 11.25, |
| "rewards/rejected": -9.5, |
| "step": 125 |
| }, |
| { |
| "epoch": 1.691275167785235, |
| "grad_norm": 3.783250644515536, |
| "learning_rate": 4.7238698444774593e-07, |
| "logits/chosen": -1.46875, |
| "logits/rejected": -1.0, |
| "logps/chosen": -62.75, |
| "logps/rejected": -104.0, |
| "loss": 0.0032, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.546875, |
| "rewards/margins": 16.0, |
| "rewards/rejected": -13.375, |
| "step": 126 |
| }, |
| { |
| "epoch": 1.7046979865771812, |
| "grad_norm": 4.79724903560345, |
| "learning_rate": 4.6450935017599195e-07, |
| "logits/chosen": -0.29296875, |
| "logits/rejected": 0.078125, |
| "logps/chosen": -110.0, |
| "logps/rejected": -182.0, |
| "loss": 0.0033, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.28125, |
| "rewards/margins": 12.5, |
| "rewards/rejected": -10.25, |
| "step": 127 |
| }, |
| { |
| "epoch": 1.7181208053691275, |
| "grad_norm": 8.223743623974363, |
| "learning_rate": 4.5664056091822465e-07, |
| "logits/chosen": -0.52734375, |
| "logits/rejected": -0.48046875, |
| "logps/chosen": -89.0, |
| "logps/rejected": -112.0, |
| "loss": 0.008, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.265625, |
| "rewards/margins": 18.0, |
| "rewards/rejected": -14.75, |
| "step": 128 |
| }, |
| { |
| "epoch": 1.7315436241610738, |
| "grad_norm": 5.21471157795929, |
| "learning_rate": 4.4878257774169345e-07, |
| "logits/chosen": 1.1015625, |
| "logits/rejected": 1.859375, |
| "logps/chosen": -161.0, |
| "logps/rejected": -268.0, |
| "loss": 0.0021, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.1875, |
| "rewards/margins": 9.0625, |
| "rewards/rejected": -8.875, |
| "step": 129 |
| }, |
| { |
| "epoch": 1.7449664429530203, |
| "grad_norm": 0.6772448197318001, |
| "learning_rate": 4.4093735902054603e-07, |
| "logits/chosen": 0.1962890625, |
| "logits/rejected": 0.85546875, |
| "logps/chosen": -120.0, |
| "logps/rejected": -168.0, |
| "loss": 0.0006, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.0, |
| "rewards/margins": 11.6875, |
| "rewards/rejected": -9.6875, |
| "step": 130 |
| }, |
| { |
| "epoch": 1.7583892617449663, |
| "grad_norm": 2.3798718473775407, |
| "learning_rate": 4.331068599477605e-07, |
| "logits/chosen": -0.306640625, |
| "logits/rejected": -0.431640625, |
| "logps/chosen": -84.5, |
| "logps/rejected": -115.0, |
| "loss": 0.0017, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 1.203125, |
| "rewards/margins": 10.9375, |
| "rewards/rejected": -9.75, |
| "step": 131 |
| }, |
| { |
| "epoch": 1.7718120805369129, |
| "grad_norm": 0.19696843032685835, |
| "learning_rate": 4.252930320478695e-07, |
| "logits/chosen": 0.55078125, |
| "logits/rejected": 1.09375, |
| "logps/chosen": -106.0, |
| "logps/rejected": -200.0, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.9375, |
| "rewards/margins": 14.4375, |
| "rewards/rejected": -11.5, |
| "step": 132 |
| }, |
| { |
| "epoch": 1.785234899328859, |
| "grad_norm": 0.6762022924163246, |
| "learning_rate": 4.1749782269060043e-07, |
| "logits/chosen": -0.11865234375, |
| "logits/rejected": 0.1240234375, |
| "logps/chosen": -84.0, |
| "logps/rejected": -126.0, |
| "loss": 0.0005, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.515625, |
| "rewards/margins": 11.6875, |
| "rewards/rejected": -8.1875, |
| "step": 133 |
| }, |
| { |
| "epoch": 1.7986577181208054, |
| "grad_norm": 2.143182334362571, |
| "learning_rate": 4.09723174605549e-07, |
| "logits/chosen": 0.82421875, |
| "logits/rejected": 1.0625, |
| "logps/chosen": -111.0, |
| "logps/rejected": -221.0, |
| "loss": 0.0017, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.65625, |
| "rewards/margins": 16.0, |
| "rewards/rejected": -13.3125, |
| "step": 134 |
| }, |
| { |
| "epoch": 1.8120805369127517, |
| "grad_norm": 28.90404860779291, |
| "learning_rate": 4.01971025398011e-07, |
| "logits/chosen": -0.9296875, |
| "logits/rejected": -0.423828125, |
| "logps/chosen": -144.0, |
| "logps/rejected": -194.0, |
| "loss": 0.0199, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 7.875, |
| "rewards/margins": 20.5, |
| "rewards/rejected": -12.625, |
| "step": 135 |
| }, |
| { |
| "epoch": 1.825503355704698, |
| "grad_norm": 3.275655892122567, |
| "learning_rate": 3.942433070660905e-07, |
| "logits/chosen": -1.0703125, |
| "logits/rejected": -0.38671875, |
| "logps/chosen": -96.0, |
| "logps/rejected": -145.0, |
| "loss": 0.0014, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.78125, |
| "rewards/margins": 14.0, |
| "rewards/rejected": -14.75, |
| "step": 136 |
| }, |
| { |
| "epoch": 1.8389261744966443, |
| "grad_norm": 0.4614476645335935, |
| "learning_rate": 3.865419455192048e-07, |
| "logits/chosen": 0.0400390625, |
| "logits/rejected": 0.228515625, |
| "logps/chosen": -140.0, |
| "logps/rejected": -174.0, |
| "loss": 0.0004, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 5.6875, |
| "rewards/margins": 15.1875, |
| "rewards/rejected": -9.5, |
| "step": 137 |
| }, |
| { |
| "epoch": 1.8523489932885906, |
| "grad_norm": 0.6025210241714362, |
| "learning_rate": 3.788688600981085e-07, |
| "logits/chosen": -0.26953125, |
| "logits/rejected": -0.47265625, |
| "logps/chosen": -155.0, |
| "logps/rejected": -124.0, |
| "loss": 0.0004, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.40625, |
| "rewards/margins": 11.0, |
| "rewards/rejected": -8.5625, |
| "step": 138 |
| }, |
| { |
| "epoch": 1.8657718120805369, |
| "grad_norm": 40.4801745827274, |
| "learning_rate": 3.7122596309655174e-07, |
| "logits/chosen": -0.546875, |
| "logits/rejected": -0.6015625, |
| "logps/chosen": -114.5, |
| "logps/rejected": -170.0, |
| "loss": 0.0242, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.625, |
| "rewards/margins": 14.25, |
| "rewards/rejected": -11.625, |
| "step": 139 |
| }, |
| { |
| "epoch": 1.8791946308724832, |
| "grad_norm": 0.36856314373131915, |
| "learning_rate": 3.6361515928469845e-07, |
| "logits/chosen": 0.380859375, |
| "logits/rejected": -0.16796875, |
| "logps/chosen": -110.5, |
| "logps/rejected": -111.5, |
| "loss": 0.0003, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.375, |
| "rewards/margins": 11.875, |
| "rewards/rejected": -11.5, |
| "step": 140 |
| }, |
| { |
| "epoch": 1.8926174496644297, |
| "grad_norm": 37.29857910882844, |
| "learning_rate": 3.560383454344168e-07, |
| "logits/chosen": -0.265625, |
| "logits/rejected": 0.15234375, |
| "logps/chosen": -121.0, |
| "logps/rejected": -128.0, |
| "loss": 0.0203, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.75, |
| "rewards/margins": 9.125, |
| "rewards/rejected": -6.40625, |
| "step": 141 |
| }, |
| { |
| "epoch": 1.9060402684563758, |
| "grad_norm": 0.6134596336148415, |
| "learning_rate": 3.484974098465636e-07, |
| "logits/chosen": -1.1171875, |
| "logits/rejected": -0.9921875, |
| "logps/chosen": -120.0, |
| "logps/rejected": -158.0, |
| "loss": 0.0005, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.4375, |
| "rewards/margins": 11.6875, |
| "rewards/rejected": -12.125, |
| "step": 142 |
| }, |
| { |
| "epoch": 1.9194630872483223, |
| "grad_norm": 3.6457141240616147, |
| "learning_rate": 3.409942318803809e-07, |
| "logits/chosen": 0.24609375, |
| "logits/rejected": 0.5625, |
| "logps/chosen": -115.5, |
| "logps/rejected": -158.0, |
| "loss": 0.0016, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.1875, |
| "rewards/margins": 11.9375, |
| "rewards/rejected": -8.75, |
| "step": 143 |
| }, |
| { |
| "epoch": 1.9328859060402683, |
| "grad_norm": 0.7732701048562175, |
| "learning_rate": 3.335306814851195e-07, |
| "logits/chosen": -0.158203125, |
| "logits/rejected": 0.125, |
| "logps/chosen": -96.0, |
| "logps/rejected": -134.0, |
| "loss": 0.0006, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.78125, |
| "rewards/margins": 12.5625, |
| "rewards/rejected": -8.75, |
| "step": 144 |
| }, |
| { |
| "epoch": 1.9463087248322148, |
| "grad_norm": 0.32377376860458673, |
| "learning_rate": 3.261086187340088e-07, |
| "logits/chosen": 0.34765625, |
| "logits/rejected": 0.466796875, |
| "logps/chosen": -214.0, |
| "logps/rejected": -243.0, |
| "loss": 0.0003, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 6.5, |
| "rewards/margins": 18.25, |
| "rewards/rejected": -11.75, |
| "step": 145 |
| }, |
| { |
| "epoch": 1.959731543624161, |
| "grad_norm": 0.33931684180319055, |
| "learning_rate": 3.187298933606878e-07, |
| "logits/chosen": 0.24609375, |
| "logits/rejected": 0.166015625, |
| "logps/chosen": -158.0, |
| "logps/rejected": -147.0, |
| "loss": 0.0002, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.8125, |
| "rewards/margins": 13.5625, |
| "rewards/rejected": -12.75, |
| "step": 146 |
| }, |
| { |
| "epoch": 1.9731543624161074, |
| "grad_norm": 0.036679139127851305, |
| "learning_rate": 3.1139634429821195e-07, |
| "logits/chosen": -1.2109375, |
| "logits/rejected": -0.142578125, |
| "logps/chosen": -75.0, |
| "logps/rejected": -146.0, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 6.4375, |
| "rewards/margins": 17.0, |
| "rewards/rejected": -10.625, |
| "step": 147 |
| }, |
| { |
| "epoch": 1.9865771812080537, |
| "grad_norm": 3.0122112521033113, |
| "learning_rate": 3.041097992207534e-07, |
| "logits/chosen": -0.36328125, |
| "logits/rejected": 0.21875, |
| "logps/chosen": -140.0, |
| "logps/rejected": -164.0, |
| "loss": 0.0013, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.0625, |
| "rewards/margins": 14.125, |
| "rewards/rejected": -11.0, |
| "step": 148 |
| }, |
| { |
| "epoch": 2.0, |
| "grad_norm": 0.06581830858065343, |
| "learning_rate": 2.9687207408810555e-07, |
| "logits/chosen": 0.310546875, |
| "logits/rejected": 0.765625, |
| "logps/chosen": -153.0, |
| "logps/rejected": -168.0, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.78125, |
| "rewards/margins": 11.75, |
| "rewards/rejected": -12.5625, |
| "step": 149 |
| }, |
| { |
| "epoch": 2.0134228187919465, |
| "grad_norm": 0.1894473762562904, |
| "learning_rate": 2.8968497269310797e-07, |
| "logits/chosen": -0.70703125, |
| "logits/rejected": -1.015625, |
| "logps/chosen": -156.0, |
| "logps/rejected": -142.0, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 4.65625, |
| "rewards/margins": 16.0, |
| "rewards/rejected": -11.3125, |
| "step": 150 |
| }, |
| { |
| "epoch": 2.0268456375838926, |
| "grad_norm": 0.3166445418481177, |
| "learning_rate": 2.8255028621210354e-07, |
| "logits/chosen": -1.15625, |
| "logits/rejected": -0.8671875, |
| "logps/chosen": -67.0, |
| "logps/rejected": -118.0, |
| "loss": 0.0002, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.65625, |
| "rewards/margins": 15.5, |
| "rewards/rejected": -12.875, |
| "step": 151 |
| }, |
| { |
| "epoch": 2.040268456375839, |
| "grad_norm": 0.10060281063304236, |
| "learning_rate": 2.7546979275853987e-07, |
| "logits/chosen": -1.046875, |
| "logits/rejected": -0.921875, |
| "logps/chosen": -118.0, |
| "logps/rejected": -242.0, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 5.875, |
| "rewards/margins": 35.5, |
| "rewards/rejected": -29.5, |
| "step": 152 |
| }, |
| { |
| "epoch": 2.053691275167785, |
| "grad_norm": 0.029261474444326915, |
| "learning_rate": 2.684452569398261e-07, |
| "logits/chosen": -0.09765625, |
| "logits/rejected": -0.0078125, |
| "logps/chosen": -120.0, |
| "logps/rejected": -127.0, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.96875, |
| "rewards/margins": 12.625, |
| "rewards/rejected": -8.6875, |
| "step": 153 |
| }, |
| { |
| "epoch": 2.0671140939597317, |
| "grad_norm": 0.07377090871392322, |
| "learning_rate": 2.614784294175554e-07, |
| "logits/chosen": -0.6484375, |
| "logits/rejected": 0.17578125, |
| "logps/chosen": -76.0, |
| "logps/rejected": -189.0, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.6875, |
| "rewards/margins": 22.125, |
| "rewards/rejected": -21.5, |
| "step": 154 |
| }, |
| { |
| "epoch": 2.0805369127516777, |
| "grad_norm": 0.13600847657907822, |
| "learning_rate": 2.545710464712032e-07, |
| "logits/chosen": -0.1796875, |
| "logits/rejected": 0.08203125, |
| "logps/chosen": -95.0, |
| "logps/rejected": -154.0, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.96875, |
| "rewards/margins": 15.5625, |
| "rewards/rejected": -11.5625, |
| "step": 155 |
| }, |
| { |
| "epoch": 2.0939597315436242, |
| "grad_norm": 0.06348564739132218, |
| "learning_rate": 2.477248295654113e-07, |
| "logits/chosen": -0.458984375, |
| "logits/rejected": 0.2890625, |
| "logps/chosen": -128.0, |
| "logps/rejected": -183.0, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 1.0, |
| "rewards/margins": 14.9375, |
| "rewards/rejected": -13.9375, |
| "step": 156 |
| }, |
| { |
| "epoch": 2.1073825503355703, |
| "grad_norm": 0.09396441188908901, |
| "learning_rate": 2.409414849209612e-07, |
| "logits/chosen": -0.1884765625, |
| "logits/rejected": 1.6171875, |
| "logps/chosen": -81.0, |
| "logps/rejected": -188.0, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.3125, |
| "rewards/margins": 12.25, |
| "rewards/rejected": -9.9375, |
| "step": 157 |
| }, |
| { |
| "epoch": 2.120805369127517, |
| "grad_norm": 0.4686669453110811, |
| "learning_rate": 2.3422270308954933e-07, |
| "logits/chosen": -1.21875, |
| "logits/rejected": -0.392578125, |
| "logps/chosen": -53.0, |
| "logps/rejected": -74.5, |
| "loss": 0.0002, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.40625, |
| "rewards/margins": 15.0, |
| "rewards/rejected": -14.5, |
| "step": 158 |
| }, |
| { |
| "epoch": 2.134228187919463, |
| "grad_norm": 0.27303818497026094, |
| "learning_rate": 2.275701585324649e-07, |
| "logits/chosen": -0.54296875, |
| "logits/rejected": -0.76171875, |
| "logps/chosen": -123.0, |
| "logps/rejected": -133.0, |
| "loss": 0.0002, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.546875, |
| "rewards/margins": 17.75, |
| "rewards/rejected": -15.25, |
| "step": 159 |
| }, |
| { |
| "epoch": 2.1476510067114094, |
| "grad_norm": 0.2517214530806255, |
| "learning_rate": 2.2098550920327995e-07, |
| "logits/chosen": 0.03125, |
| "logits/rejected": 0.31640625, |
| "logps/chosen": -87.0, |
| "logps/rejected": -146.0, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.75, |
| "rewards/margins": 14.3125, |
| "rewards/rejected": -13.5625, |
| "step": 160 |
| }, |
| { |
| "epoch": 2.1610738255033555, |
| "grad_norm": 0.03752622378950569, |
| "learning_rate": 2.144703961346526e-07, |
| "logits/chosen": 0.609375, |
| "logits/rejected": 0.55078125, |
| "logps/chosen": -174.0, |
| "logps/rejected": -186.0, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 4.25, |
| "rewards/margins": 13.4375, |
| "rewards/rejected": -9.1875, |
| "step": 161 |
| }, |
| { |
| "epoch": 2.174496644295302, |
| "grad_norm": 0.05988803532422006, |
| "learning_rate": 2.080264430293468e-07, |
| "logits/chosen": 0.30078125, |
| "logits/rejected": 0.234375, |
| "logps/chosen": -152.0, |
| "logps/rejected": -168.0, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 6.125, |
| "rewards/margins": 16.25, |
| "rewards/rejected": -10.1875, |
| "step": 162 |
| }, |
| { |
| "epoch": 2.1879194630872485, |
| "grad_norm": 0.20922924966030673, |
| "learning_rate": 2.0165525585557203e-07, |
| "logits/chosen": -0.322265625, |
| "logits/rejected": 0.140625, |
| "logps/chosen": -95.5, |
| "logps/rejected": -165.0, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 1.328125, |
| "rewards/margins": 13.4375, |
| "rewards/rejected": -12.125, |
| "step": 163 |
| }, |
| { |
| "epoch": 2.2013422818791946, |
| "grad_norm": 0.08964239202695976, |
| "learning_rate": 1.953584224467418e-07, |
| "logits/chosen": -0.9609375, |
| "logits/rejected": -0.7578125, |
| "logps/chosen": -164.0, |
| "logps/rejected": -207.0, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.6875, |
| "rewards/margins": 14.75, |
| "rewards/rejected": -11.0625, |
| "step": 164 |
| }, |
| { |
| "epoch": 2.214765100671141, |
| "grad_norm": 0.1394991195345149, |
| "learning_rate": 1.8913751210575247e-07, |
| "logits/chosen": -0.765625, |
| "logits/rejected": -0.38671875, |
| "logps/chosen": -146.0, |
| "logps/rejected": -174.0, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 1.78125, |
| "rewards/margins": 15.8125, |
| "rewards/rejected": -14.0, |
| "step": 165 |
| }, |
| { |
| "epoch": 2.228187919463087, |
| "grad_norm": 0.06224467844986688, |
| "learning_rate": 1.8299407521388065e-07, |
| "logits/chosen": -1.171875, |
| "logits/rejected": -0.4609375, |
| "logps/chosen": -80.0, |
| "logps/rejected": -172.0, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.4375, |
| "rewards/margins": 20.0, |
| "rewards/rejected": -19.5, |
| "step": 166 |
| }, |
| { |
| "epoch": 2.2416107382550337, |
| "grad_norm": 0.017121698135529603, |
| "learning_rate": 1.7692964284439506e-07, |
| "logits/chosen": -0.703125, |
| "logits/rejected": -0.69921875, |
| "logps/chosen": -122.5, |
| "logps/rejected": -194.0, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.59375, |
| "rewards/margins": 22.25, |
| "rewards/rejected": -19.75, |
| "step": 167 |
| }, |
| { |
| "epoch": 2.2550335570469797, |
| "grad_norm": 0.14929743942170393, |
| "learning_rate": 1.709457263809812e-07, |
| "logits/chosen": 0.12451171875, |
| "logits/rejected": 0.8828125, |
| "logps/chosen": -96.0, |
| "logps/rejected": -178.0, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 1.65625, |
| "rewards/margins": 14.9375, |
| "rewards/rejected": -13.3125, |
| "step": 168 |
| }, |
| { |
| "epoch": 2.2684563758389262, |
| "grad_norm": 0.11892827820734325, |
| "learning_rate": 1.6504381714107252e-07, |
| "logits/chosen": 0.578125, |
| "logits/rejected": 0.69921875, |
| "logps/chosen": -135.0, |
| "logps/rejected": -136.0, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 4.5, |
| "rewards/margins": 11.125, |
| "rewards/rejected": -6.625, |
| "step": 169 |
| }, |
| { |
| "epoch": 2.2818791946308723, |
| "grad_norm": 0.03271406609147028, |
| "learning_rate": 1.5922538600418317e-07, |
| "logits/chosen": 0.984375, |
| "logits/rejected": 0.6015625, |
| "logps/chosen": -112.5, |
| "logps/rejected": -133.0, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.90625, |
| "rewards/margins": 15.125, |
| "rewards/rejected": -11.1875, |
| "step": 170 |
| }, |
| { |
| "epoch": 2.295302013422819, |
| "grad_norm": 0.4554889059510281, |
| "learning_rate": 1.534918830453341e-07, |
| "logits/chosen": -0.40625, |
| "logits/rejected": 0.369140625, |
| "logps/chosen": -111.5, |
| "logps/rejected": -220.0, |
| "loss": 0.0003, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 6.34375, |
| "rewards/margins": 22.25, |
| "rewards/rejected": -16.0, |
| "step": 171 |
| }, |
| { |
| "epoch": 2.3087248322147653, |
| "grad_norm": 0.006581780362424091, |
| "learning_rate": 1.4784473717366387e-07, |
| "logits/chosen": -1.3828125, |
| "logits/rejected": 0.337890625, |
| "logps/chosen": -109.0, |
| "logps/rejected": -268.0, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.1875, |
| "rewards/margins": 23.0, |
| "rewards/rejected": -23.25, |
| "step": 172 |
| }, |
| { |
| "epoch": 2.3221476510067114, |
| "grad_norm": 0.012619009500990765, |
| "learning_rate": 1.422853557763144e-07, |
| "logits/chosen": -1.2109375, |
| "logits/rejected": -0.375, |
| "logps/chosen": -66.0, |
| "logps/rejected": -122.0, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.390625, |
| "rewards/margins": 14.5, |
| "rewards/rejected": -14.125, |
| "step": 173 |
| }, |
| { |
| "epoch": 2.335570469798658, |
| "grad_norm": 0.01796264226604086, |
| "learning_rate": 1.3681512436768046e-07, |
| "logits/chosen": -1.578125, |
| "logits/rejected": -0.97265625, |
| "logps/chosen": -63.0, |
| "logps/rejected": -124.0, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.40625, |
| "rewards/margins": 20.25, |
| "rewards/rejected": -18.0, |
| "step": 174 |
| }, |
| { |
| "epoch": 2.348993288590604, |
| "grad_norm": 0.07568692984212666, |
| "learning_rate": 1.3143540624411058e-07, |
| "logits/chosen": -0.59765625, |
| "logits/rejected": -0.1630859375, |
| "logps/chosen": -96.0, |
| "logps/rejected": -174.0, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.4375, |
| "rewards/margins": 16.0, |
| "rewards/rejected": -13.5, |
| "step": 175 |
| }, |
| { |
| "epoch": 2.3624161073825505, |
| "grad_norm": 0.06708516383348302, |
| "learning_rate": 1.2614754214414548e-07, |
| "logits/chosen": 0.012939453125, |
| "logits/rejected": 0.421875, |
| "logps/chosen": -146.0, |
| "logps/rejected": -208.0, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 5.9375, |
| "rewards/margins": 16.875, |
| "rewards/rejected": -10.9375, |
| "step": 176 |
| }, |
| { |
| "epoch": 2.3758389261744965, |
| "grad_norm": 0.10761380684324387, |
| "learning_rate": 1.2095284991437733e-07, |
| "logits/chosen": -0.384765625, |
| "logits/rejected": 0.033203125, |
| "logps/chosen": -119.5, |
| "logps/rejected": -139.0, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.5, |
| "rewards/margins": 18.5, |
| "rewards/rejected": -14.9375, |
| "step": 177 |
| }, |
| { |
| "epoch": 2.389261744966443, |
| "grad_norm": 0.028119404041323803, |
| "learning_rate": 1.1585262418101466e-07, |
| "logits/chosen": -0.7734375, |
| "logits/rejected": -0.8125, |
| "logps/chosen": -107.0, |
| "logps/rejected": -192.0, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.8125, |
| "rewards/margins": 23.75, |
| "rewards/rejected": -20.0, |
| "step": 178 |
| }, |
| { |
| "epoch": 2.402684563758389, |
| "grad_norm": 0.06427772073604729, |
| "learning_rate": 1.1084813602723514e-07, |
| "logits/chosen": 0.162109375, |
| "logits/rejected": 0.267578125, |
| "logps/chosen": -127.0, |
| "logps/rejected": -149.0, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.125, |
| "rewards/margins": 10.8125, |
| "rewards/rejected": -10.9375, |
| "step": 179 |
| }, |
| { |
| "epoch": 2.4161073825503356, |
| "grad_norm": 0.031365944388578905, |
| "learning_rate": 1.0594063267640385e-07, |
| "logits/chosen": -0.8046875, |
| "logits/rejected": -0.67578125, |
| "logps/chosen": -128.0, |
| "logps/rejected": -214.0, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 7.1875, |
| "rewards/margins": 20.75, |
| "rewards/rejected": -13.625, |
| "step": 180 |
| }, |
| { |
| "epoch": 2.4295302013422817, |
| "grad_norm": 0.052700817810413206, |
| "learning_rate": 1.0113133718124034e-07, |
| "logits/chosen": -0.453125, |
| "logits/rejected": -0.0556640625, |
| "logps/chosen": -122.5, |
| "logps/rejected": -156.0, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 1.875, |
| "rewards/margins": 10.1875, |
| "rewards/rejected": -8.3125, |
| "step": 181 |
| }, |
| { |
| "epoch": 2.442953020134228, |
| "grad_norm": 0.042796528316408955, |
| "learning_rate": 9.642144811900737e-08, |
| "logits/chosen": -0.63671875, |
| "logits/rejected": 0.1494140625, |
| "logps/chosen": -124.5, |
| "logps/rejected": -189.0, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 5.0, |
| "rewards/margins": 17.0, |
| "rewards/rejected": -12.0625, |
| "step": 182 |
| }, |
| { |
| "epoch": 2.4563758389261743, |
| "grad_norm": 0.05890047207159928, |
| "learning_rate": 9.181213929280046e-08, |
| "logits/chosen": -1.0390625, |
| "logits/rejected": -0.5390625, |
| "logps/chosen": -132.0, |
| "logps/rejected": -132.0, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.84375, |
| "rewards/margins": 10.625, |
| "rewards/rejected": -11.4375, |
| "step": 183 |
| }, |
| { |
| "epoch": 2.469798657718121, |
| "grad_norm": 0.08060615031100707, |
| "learning_rate": 8.730455943901199e-08, |
| "logits/chosen": 0.37109375, |
| "logits/rejected": 0.8515625, |
| "logps/chosen": -96.0, |
| "logps/rejected": -164.0, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.3125, |
| "rewards/margins": 13.0, |
| "rewards/rejected": -10.6875, |
| "step": 184 |
| }, |
| { |
| "epoch": 2.4832214765100673, |
| "grad_norm": 0.5557104575302182, |
| "learning_rate": 8.289983194104127e-08, |
| "logits/chosen": -1.2109375, |
| "logits/rejected": -0.8046875, |
| "logps/chosen": -114.5, |
| "logps/rejected": -144.0, |
| "loss": 0.0004, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -4.1875, |
| "rewards/margins": 12.0, |
| "rewards/rejected": -16.25, |
| "step": 185 |
| }, |
| { |
| "epoch": 2.4966442953020134, |
| "grad_norm": 0.05401238664527387, |
| "learning_rate": 7.85990545493247e-08, |
| "logits/chosen": -0.4921875, |
| "logits/rejected": -0.03515625, |
| "logps/chosen": -102.0, |
| "logps/rejected": -202.0, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 9.5, |
| "rewards/margins": 30.75, |
| "rewards/rejected": -21.375, |
| "step": 186 |
| }, |
| { |
| "epoch": 2.51006711409396, |
| "grad_norm": 0.04674015967818173, |
| "learning_rate": 7.440329910775272e-08, |
| "logits/chosen": -0.53125, |
| "logits/rejected": -0.353515625, |
| "logps/chosen": -77.0, |
| "logps/rejected": -140.0, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 5.8125, |
| "rewards/margins": 19.25, |
| "rewards/rejected": -13.5, |
| "step": 187 |
| }, |
| { |
| "epoch": 2.523489932885906, |
| "grad_norm": 0.03178944530558772, |
| "learning_rate": 7.0313611286544e-08, |
| "logits/chosen": -0.259765625, |
| "logits/rejected": -0.67578125, |
| "logps/chosen": -153.0, |
| "logps/rejected": -137.0, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.1875, |
| "rewards/margins": 14.0625, |
| "rewards/rejected": -13.875, |
| "step": 188 |
| }, |
| { |
| "epoch": 2.5369127516778525, |
| "grad_norm": 0.0227179843800343, |
| "learning_rate": 6.633101032164273e-08, |
| "logits/chosen": -1.0703125, |
| "logits/rejected": -0.98046875, |
| "logps/chosen": -90.5, |
| "logps/rejected": -144.0, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.015625, |
| "rewards/margins": 19.75, |
| "rewards/rejected": -17.75, |
| "step": 189 |
| }, |
| { |
| "epoch": 2.5503355704697985, |
| "grad_norm": 0.03328439296626924, |
| "learning_rate": 6.24564887607032e-08, |
| "logits/chosen": -0.421875, |
| "logits/rejected": -0.42578125, |
| "logps/chosen": -100.5, |
| "logps/rejected": -124.5, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 10.4375, |
| "rewards/margins": 25.25, |
| "rewards/rejected": -14.8125, |
| "step": 190 |
| }, |
| { |
| "epoch": 2.563758389261745, |
| "grad_norm": 0.047336260336703316, |
| "learning_rate": 5.869101221572653e-08, |
| "logits/chosen": -0.97265625, |
| "logits/rejected": -0.8125, |
| "logps/chosen": -246.0, |
| "logps/rejected": -352.0, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 5.125, |
| "rewards/margins": 20.125, |
| "rewards/rejected": -15.0, |
| "step": 191 |
| }, |
| { |
| "epoch": 2.577181208053691, |
| "grad_norm": 0.1370090812674146, |
| "learning_rate": 5.503551912240989e-08, |
| "logits/chosen": 0.14453125, |
| "logits/rejected": 0.3125, |
| "logps/chosen": -212.0, |
| "logps/rejected": -200.0, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 10.25, |
| "rewards/margins": 23.5, |
| "rewards/rejected": -13.25, |
| "step": 192 |
| }, |
| { |
| "epoch": 2.5906040268456376, |
| "grad_norm": 0.027265765751800227, |
| "learning_rate": 5.1490920506268246e-08, |
| "logits/chosen": -0.5625, |
| "logits/rejected": -0.0283203125, |
| "logps/chosen": -116.5, |
| "logps/rejected": -177.0, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 5.375, |
| "rewards/margins": 18.5, |
| "rewards/rejected": -13.1875, |
| "step": 193 |
| }, |
| { |
| "epoch": 2.604026845637584, |
| "grad_norm": 0.15268686054307481, |
| "learning_rate": 4.805809975558828e-08, |
| "logits/chosen": -1.484375, |
| "logits/rejected": -0.81640625, |
| "logps/chosen": -67.5, |
| "logps/rejected": -125.0, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -2.375, |
| "rewards/margins": 17.75, |
| "rewards/rejected": -20.0, |
| "step": 194 |
| }, |
| { |
| "epoch": 2.61744966442953, |
| "grad_norm": 0.0271020117795592, |
| "learning_rate": 4.4737912401268894e-08, |
| "logits/chosen": -0.345703125, |
| "logits/rejected": -0.5546875, |
| "logps/chosen": -173.0, |
| "logps/rejected": -204.0, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 8.8125, |
| "rewards/margins": 17.0, |
| "rewards/rejected": -8.1875, |
| "step": 195 |
| }, |
| { |
| "epoch": 2.6308724832214763, |
| "grad_norm": 0.02768965257686107, |
| "learning_rate": 4.15311859036056e-08, |
| "logits/chosen": -0.51953125, |
| "logits/rejected": 0.11181640625, |
| "logps/chosen": -110.0, |
| "logps/rejected": -150.0, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 1.25, |
| "rewards/margins": 16.25, |
| "rewards/rejected": -15.0, |
| "step": 196 |
| }, |
| { |
| "epoch": 2.6442953020134228, |
| "grad_norm": 0.030583666420774456, |
| "learning_rate": 3.843871944606969e-08, |
| "logits/chosen": -1.046875, |
| "logits/rejected": -0.55859375, |
| "logps/chosen": -104.0, |
| "logps/rejected": -134.0, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 4.84375, |
| "rewards/margins": 20.0, |
| "rewards/rejected": -15.125, |
| "step": 197 |
| }, |
| { |
| "epoch": 2.6577181208053693, |
| "grad_norm": 0.01983459256439879, |
| "learning_rate": 3.546128373613472e-08, |
| "logits/chosen": -0.08984375, |
| "logits/rejected": -0.01171875, |
| "logps/chosen": -148.0, |
| "logps/rejected": -152.0, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 1.25, |
| "rewards/margins": 10.875, |
| "rewards/rejected": -9.625, |
| "step": 198 |
| }, |
| { |
| "epoch": 2.6711409395973154, |
| "grad_norm": 0.06308533652339048, |
| "learning_rate": 3.2599620813200835e-08, |
| "logits/chosen": -1.75, |
| "logits/rejected": -1.375, |
| "logps/chosen": -126.0, |
| "logps/rejected": -159.0, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 6.6875, |
| "rewards/margins": 21.25, |
| "rewards/rejected": -14.5625, |
| "step": 199 |
| }, |
| { |
| "epoch": 2.684563758389262, |
| "grad_norm": 0.02445058345278849, |
| "learning_rate": 2.985444386366226e-08, |
| "logits/chosen": -0.265625, |
| "logits/rejected": -0.3203125, |
| "logps/chosen": -109.5, |
| "logps/rejected": -155.0, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 6.40625, |
| "rewards/margins": 21.0, |
| "rewards/rejected": -14.5625, |
| "step": 200 |
| }, |
| { |
| "epoch": 2.697986577181208, |
| "grad_norm": 0.023961536504966064, |
| "learning_rate": 2.7226437043166518e-08, |
| "logits/chosen": -0.2080078125, |
| "logits/rejected": 0.09375, |
| "logps/chosen": -87.5, |
| "logps/rejected": -140.0, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.09375, |
| "rewards/margins": 15.125, |
| "rewards/rejected": -13.0, |
| "step": 201 |
| }, |
| { |
| "epoch": 2.7114093959731544, |
| "grad_norm": 0.16636675194104117, |
| "learning_rate": 2.47162553061086e-08, |
| "logits/chosen": 0.0, |
| "logits/rejected": -0.10546875, |
| "logps/chosen": -170.0, |
| "logps/rejected": -181.0, |
| "loss": 0.0002, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.15625, |
| "rewards/margins": 11.6875, |
| "rewards/rejected": -8.5, |
| "step": 202 |
| }, |
| { |
| "epoch": 2.7248322147651005, |
| "grad_norm": 0.1382165261046544, |
| "learning_rate": 2.232452424240261e-08, |
| "logits/chosen": -1.671875, |
| "logits/rejected": -1.203125, |
| "logps/chosen": -88.5, |
| "logps/rejected": -142.0, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.5, |
| "rewards/margins": 17.25, |
| "rewards/rejected": -16.625, |
| "step": 203 |
| }, |
| { |
| "epoch": 2.738255033557047, |
| "grad_norm": 0.15799222178098044, |
| "learning_rate": 2.0051839921571444e-08, |
| "logits/chosen": -0.625, |
| "logits/rejected": -0.2333984375, |
| "logps/chosen": -114.0, |
| "logps/rejected": -154.0, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.5, |
| "rewards/margins": 10.9375, |
| "rewards/rejected": -10.4375, |
| "step": 204 |
| }, |
| { |
| "epoch": 2.751677852348993, |
| "grad_norm": 0.2806855827386244, |
| "learning_rate": 1.789876874419416e-08, |
| "logits/chosen": -0.06640625, |
| "logits/rejected": 0.107421875, |
| "logps/chosen": -121.5, |
| "logps/rejected": -115.0, |
| "loss": 0.0002, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -1.8125, |
| "rewards/margins": 8.75, |
| "rewards/rejected": -10.625, |
| "step": 205 |
| }, |
| { |
| "epoch": 2.7651006711409396, |
| "grad_norm": 0.01678193593288143, |
| "learning_rate": 1.5865847300746415e-08, |
| "logits/chosen": -0.046875, |
| "logits/rejected": 0.0546875, |
| "logps/chosen": -109.0, |
| "logps/rejected": -146.0, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.125, |
| "rewards/margins": 20.875, |
| "rewards/rejected": -18.75, |
| "step": 206 |
| }, |
| { |
| "epoch": 2.778523489932886, |
| "grad_norm": 0.08731970382784059, |
| "learning_rate": 1.395358223787152e-08, |
| "logits/chosen": -1.0625, |
| "logits/rejected": -0.859375, |
| "logps/chosen": -160.0, |
| "logps/rejected": -209.0, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 4.90625, |
| "rewards/margins": 19.625, |
| "rewards/rejected": -14.6875, |
| "step": 207 |
| }, |
| { |
| "epoch": 2.791946308724832, |
| "grad_norm": 0.05918517232146335, |
| "learning_rate": 1.21624501321132e-08, |
| "logits/chosen": -0.4921875, |
| "logits/rejected": 0.0, |
| "logps/chosen": -85.5, |
| "logps/rejected": -103.0, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 1.421875, |
| "rewards/margins": 9.8125, |
| "rewards/rejected": -8.375, |
| "step": 208 |
| }, |
| { |
| "epoch": 2.8053691275167782, |
| "grad_norm": 0.08699969705520388, |
| "learning_rate": 1.0492897371142728e-08, |
| "logits/chosen": -0.83984375, |
| "logits/rejected": -0.75390625, |
| "logps/chosen": -137.0, |
| "logps/rejected": -168.0, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.03125, |
| "rewards/margins": 13.6875, |
| "rewards/rejected": -13.6875, |
| "step": 209 |
| }, |
| { |
| "epoch": 2.8187919463087248, |
| "grad_norm": 0.07365183308008798, |
| "learning_rate": 8.945340042509797e-09, |
| "logits/chosen": -0.47265625, |
| "logits/rejected": -0.26953125, |
| "logps/chosen": -88.0, |
| "logps/rejected": -115.0, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.8125, |
| "rewards/margins": 8.9375, |
| "rewards/rejected": -6.125, |
| "step": 210 |
| }, |
| { |
| "epoch": 2.8322147651006713, |
| "grad_norm": 0.038963738208368105, |
| "learning_rate": 7.520163829944803e-09, |
| "logits/chosen": -0.029296875, |
| "logits/rejected": 0.28515625, |
| "logps/chosen": -148.0, |
| "logps/rejected": -180.0, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 4.15625, |
| "rewards/margins": 20.75, |
| "rewards/rejected": -16.625, |
| "step": 211 |
| }, |
| { |
| "epoch": 2.8456375838926173, |
| "grad_norm": 0.05147316584374017, |
| "learning_rate": 6.217723917238127e-09, |
| "logits/chosen": -1.2109375, |
| "logits/rejected": -0.93359375, |
| "logps/chosen": -49.75, |
| "logps/rejected": -85.0, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 1.4140625, |
| "rewards/margins": 11.1875, |
| "rewards/rejected": -9.8125, |
| "step": 212 |
| }, |
| { |
| "epoch": 2.859060402684564, |
| "grad_norm": 0.023022328378360397, |
| "learning_rate": 5.038344899721436e-09, |
| "logits/chosen": -0.055419921875, |
| "logits/rejected": 0.1142578125, |
| "logps/chosen": -122.0, |
| "logps/rejected": -165.0, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 1.84375, |
| "rewards/margins": 14.125, |
| "rewards/rejected": -12.3125, |
| "step": 213 |
| }, |
| { |
| "epoch": 2.87248322147651, |
| "grad_norm": 0.06406190550280132, |
| "learning_rate": 3.982320703371067e-09, |
| "logits/chosen": -2.03125, |
| "logits/rejected": -1.640625, |
| "logps/chosen": -73.5, |
| "logps/rejected": -113.0, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.15625, |
| "rewards/margins": 17.0, |
| "rewards/rejected": -14.875, |
| "step": 214 |
| }, |
| { |
| "epoch": 2.8859060402684564, |
| "grad_norm": 0.08226436699918951, |
| "learning_rate": 3.0499145115561177e-09, |
| "logits/chosen": -0.2890625, |
| "logits/rejected": -0.2734375, |
| "logps/chosen": -127.0, |
| "logps/rejected": -150.0, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.125, |
| "rewards/margins": 13.5, |
| "rewards/rejected": -11.375, |
| "step": 215 |
| }, |
| { |
| "epoch": 2.899328859060403, |
| "grad_norm": 0.06701429938679973, |
| "learning_rate": 2.2413586994470825e-09, |
| "logits/chosen": -1.2265625, |
| "logits/rejected": -1.4140625, |
| "logps/chosen": -116.0, |
| "logps/rejected": -126.0, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 7.40625, |
| "rewards/margins": 21.75, |
| "rewards/rejected": -14.375, |
| "step": 216 |
| }, |
| { |
| "epoch": 2.912751677852349, |
| "grad_norm": 0.15959257076282526, |
| "learning_rate": 1.5568547761034001e-09, |
| "logits/chosen": 0.078125, |
| "logits/rejected": 0.1376953125, |
| "logps/chosen": -162.0, |
| "logps/rejected": -162.0, |
| "loss": 0.0002, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.75, |
| "rewards/margins": 10.1875, |
| "rewards/rejected": -9.4375, |
| "step": 217 |
| }, |
| { |
| "epoch": 2.926174496644295, |
| "grad_norm": 0.06860454706791937, |
| "learning_rate": 9.965733342532923e-10, |
| "logits/chosen": -0.68359375, |
| "logits/rejected": -0.453125, |
| "logps/chosen": -127.0, |
| "logps/rejected": -209.0, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 11.6875, |
| "rewards/margins": 28.0, |
| "rewards/rejected": -16.25, |
| "step": 218 |
| }, |
| { |
| "epoch": 2.9395973154362416, |
| "grad_norm": 0.8332909450330214, |
| "learning_rate": 5.606540077782162e-10, |
| "logits/chosen": 0.58984375, |
| "logits/rejected": 0.318359375, |
| "logps/chosen": -136.0, |
| "logps/rejected": -165.0, |
| "loss": 0.0002, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.625, |
| "rewards/margins": 9.8125, |
| "rewards/rejected": -10.4375, |
| "step": 219 |
| }, |
| { |
| "epoch": 2.953020134228188, |
| "grad_norm": 0.035230135449407174, |
| "learning_rate": 2.4920543691309137e-10, |
| "logits/chosen": 1.0, |
| "logits/rejected": 0.7109375, |
| "logps/chosen": -102.0, |
| "logps/rejected": -145.0, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.71875, |
| "rewards/margins": 14.9375, |
| "rewards/rejected": -12.1875, |
| "step": 220 |
| }, |
| { |
| "epoch": 2.966442953020134, |
| "grad_norm": 0.155120664961761, |
| "learning_rate": 6.230524117134539e-11, |
| "logits/chosen": 0.080078125, |
| "logits/rejected": 0.6015625, |
| "logps/chosen": -122.0, |
| "logps/rejected": -198.0, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.5625, |
| "rewards/margins": 14.0625, |
| "rewards/rejected": -11.5, |
| "step": 221 |
| }, |
| { |
| "epoch": 2.9798657718120807, |
| "grad_norm": 0.09475005748408788, |
| "learning_rate": 0.0, |
| "logits/chosen": 0.16015625, |
| "logits/rejected": 0.345703125, |
| "logps/chosen": -121.0, |
| "logps/rejected": -154.0, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.125, |
| "rewards/margins": 15.625, |
| "rewards/rejected": -12.5, |
| "step": 222 |
| }, |
| { |
| "epoch": 2.9798657718120807, |
| "step": 222, |
| "total_flos": 0.0, |
| "train_loss": 0.1353412357209354, |
| "train_runtime": 2791.2284, |
| "train_samples_per_second": 1.913, |
| "train_steps_per_second": 0.08 |
| } |
| ], |
| "logging_steps": 1, |
| "max_steps": 222, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 3, |
| "save_steps": 400, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 0.0, |
| "train_batch_size": 2, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|