| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 3.0, |
| "eval_steps": 500, |
| "global_step": 1446, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.002074688796680498, |
| "grad_norm": 8.200224942309267, |
| "learning_rate": 3.4482758620689654e-09, |
| "logits/chosen": -0.7109375, |
| "logits/rejected": -0.94140625, |
| "logps/chosen": -336.0, |
| "logps/rejected": -288.0, |
| "loss": 0.6914, |
| "rewards/accuracies": 0.0, |
| "rewards/chosen": 0.0, |
| "rewards/margins": 0.0, |
| "rewards/rejected": 0.0, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.02074688796680498, |
| "grad_norm": 8.985547005814288, |
| "learning_rate": 3.448275862068965e-08, |
| "logits/chosen": -1.40625, |
| "logits/rejected": -1.46875, |
| "logps/chosen": -380.0, |
| "logps/rejected": -326.0, |
| "loss": 0.692, |
| "rewards/accuracies": 0.1111111119389534, |
| "rewards/chosen": -0.0011138916015625, |
| "rewards/margins": -0.00055694580078125, |
| "rewards/rejected": -0.00055694580078125, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.04149377593360996, |
| "grad_norm": 7.537541071519504, |
| "learning_rate": 6.89655172413793e-08, |
| "logits/chosen": -1.4296875, |
| "logits/rejected": -1.4453125, |
| "logps/chosen": -328.0, |
| "logps/rejected": -366.0, |
| "loss": 0.6914, |
| "rewards/accuracies": 0.25, |
| "rewards/chosen": 0.000751495361328125, |
| "rewards/margins": -0.00225830078125, |
| "rewards/rejected": 0.0030059814453125, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.06224066390041494, |
| "grad_norm": 8.46410953809254, |
| "learning_rate": 1.0344827586206897e-07, |
| "logits/chosen": -1.375, |
| "logits/rejected": -1.46875, |
| "logps/chosen": -420.0, |
| "logps/rejected": -374.0, |
| "loss": 0.692, |
| "rewards/accuracies": 0.20000000298023224, |
| "rewards/chosen": 0.00238037109375, |
| "rewards/margins": 0.001129150390625, |
| "rewards/rejected": 0.001251220703125, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.08298755186721991, |
| "grad_norm": 8.157147362592257, |
| "learning_rate": 1.379310344827586e-07, |
| "logits/chosen": -1.453125, |
| "logits/rejected": -1.5078125, |
| "logps/chosen": -432.0, |
| "logps/rejected": -388.0, |
| "loss": 0.6911, |
| "rewards/accuracies": 0.44999998807907104, |
| "rewards/chosen": 0.00250244140625, |
| "rewards/margins": 0.0052490234375, |
| "rewards/rejected": -0.00274658203125, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.1037344398340249, |
| "grad_norm": 8.08592590477226, |
| "learning_rate": 1.7241379310344828e-07, |
| "logits/chosen": -1.40625, |
| "logits/rejected": -1.4296875, |
| "logps/chosen": -340.0, |
| "logps/rejected": -300.0, |
| "loss": 0.6909, |
| "rewards/accuracies": 0.3499999940395355, |
| "rewards/chosen": 0.0054931640625, |
| "rewards/margins": 0.00150299072265625, |
| "rewards/rejected": 0.003997802734375, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.12448132780082988, |
| "grad_norm": 8.524351634398997, |
| "learning_rate": 2.0689655172413793e-07, |
| "logits/chosen": -1.4375, |
| "logits/rejected": -1.4375, |
| "logps/chosen": -506.0, |
| "logps/rejected": -478.0, |
| "loss": 0.6896, |
| "rewards/accuracies": 0.44999998807907104, |
| "rewards/chosen": 0.02001953125, |
| "rewards/margins": 0.0030059814453125, |
| "rewards/rejected": 0.0169677734375, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.14522821576763487, |
| "grad_norm": 7.5678704019111365, |
| "learning_rate": 2.413793103448276e-07, |
| "logits/chosen": -1.4296875, |
| "logits/rejected": -1.46875, |
| "logps/chosen": -386.0, |
| "logps/rejected": -262.0, |
| "loss": 0.6877, |
| "rewards/accuracies": 0.6000000238418579, |
| "rewards/chosen": 0.028076171875, |
| "rewards/margins": 0.020263671875, |
| "rewards/rejected": 0.00775146484375, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.16597510373443983, |
| "grad_norm": 7.513277000409331, |
| "learning_rate": 2.758620689655172e-07, |
| "logits/chosen": -1.3515625, |
| "logits/rejected": -1.375, |
| "logps/chosen": -320.0, |
| "logps/rejected": -312.0, |
| "loss": 0.6847, |
| "rewards/accuracies": 0.44999998807907104, |
| "rewards/chosen": 0.029052734375, |
| "rewards/margins": 0.001739501953125, |
| "rewards/rejected": 0.0272216796875, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.18672199170124482, |
| "grad_norm": 7.275100713194908, |
| "learning_rate": 3.103448275862069e-07, |
| "logits/chosen": -1.40625, |
| "logits/rejected": -1.4140625, |
| "logps/chosen": -452.0, |
| "logps/rejected": -404.0, |
| "loss": 0.6811, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": 0.062255859375, |
| "rewards/margins": 0.04150390625, |
| "rewards/rejected": 0.020751953125, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.2074688796680498, |
| "grad_norm": 7.279491499537065, |
| "learning_rate": 3.4482758620689656e-07, |
| "logits/chosen": -1.46875, |
| "logits/rejected": -1.4921875, |
| "logps/chosen": -488.0, |
| "logps/rejected": -470.0, |
| "loss": 0.6749, |
| "rewards/accuracies": 0.6000000238418579, |
| "rewards/chosen": 0.08154296875, |
| "rewards/margins": 0.04296875, |
| "rewards/rejected": 0.038818359375, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.22821576763485477, |
| "grad_norm": 7.0878060086303085, |
| "learning_rate": 3.793103448275862e-07, |
| "logits/chosen": -1.4375, |
| "logits/rejected": -1.40625, |
| "logps/chosen": -346.0, |
| "logps/rejected": -350.0, |
| "loss": 0.6672, |
| "rewards/accuracies": 0.6499999761581421, |
| "rewards/chosen": 0.078125, |
| "rewards/margins": 0.041748046875, |
| "rewards/rejected": 0.03662109375, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.24896265560165975, |
| "grad_norm": 6.836599327469131, |
| "learning_rate": 4.1379310344827586e-07, |
| "logits/chosen": -1.453125, |
| "logits/rejected": -1.453125, |
| "logps/chosen": -436.0, |
| "logps/rejected": -328.0, |
| "loss": 0.6642, |
| "rewards/accuracies": 0.6499999761581421, |
| "rewards/chosen": 0.12109375, |
| "rewards/margins": 0.09521484375, |
| "rewards/rejected": 0.0260009765625, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.2697095435684647, |
| "grad_norm": 7.277406568232138, |
| "learning_rate": 4.482758620689655e-07, |
| "logits/chosen": -1.5078125, |
| "logits/rejected": -1.421875, |
| "logps/chosen": -368.0, |
| "logps/rejected": -350.0, |
| "loss": 0.6436, |
| "rewards/accuracies": 0.800000011920929, |
| "rewards/chosen": 0.0615234375, |
| "rewards/margins": 0.0908203125, |
| "rewards/rejected": -0.029052734375, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.29045643153526973, |
| "grad_norm": 7.649612249719239, |
| "learning_rate": 4.827586206896552e-07, |
| "logits/chosen": -1.3984375, |
| "logits/rejected": -1.390625, |
| "logps/chosen": -362.0, |
| "logps/rejected": -322.0, |
| "loss": 0.6473, |
| "rewards/accuracies": 0.800000011920929, |
| "rewards/chosen": 0.046875, |
| "rewards/margins": 0.150390625, |
| "rewards/rejected": -0.103515625, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.3112033195020747, |
| "grad_norm": 7.208353024534284, |
| "learning_rate": 4.99981778257793e-07, |
| "logits/chosen": -1.453125, |
| "logits/rejected": -1.4375, |
| "logps/chosen": -436.0, |
| "logps/rejected": -432.0, |
| "loss": 0.6378, |
| "rewards/accuracies": 0.44999998807907104, |
| "rewards/chosen": -0.05517578125, |
| "rewards/margins": 0.0341796875, |
| "rewards/rejected": -0.0888671875, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.33195020746887965, |
| "grad_norm": 8.231246609192565, |
| "learning_rate": 4.998360202572815e-07, |
| "logits/chosen": -1.3359375, |
| "logits/rejected": -1.3984375, |
| "logps/chosen": -372.0, |
| "logps/rejected": -376.0, |
| "loss": 0.6266, |
| "rewards/accuracies": 0.699999988079071, |
| "rewards/chosen": 0.003570556640625, |
| "rewards/margins": 0.1337890625, |
| "rewards/rejected": -0.1298828125, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.35269709543568467, |
| "grad_norm": 7.711663165069283, |
| "learning_rate": 4.995445892440316e-07, |
| "logits/chosen": -1.40625, |
| "logits/rejected": -1.2578125, |
| "logps/chosen": -388.0, |
| "logps/rejected": -434.0, |
| "loss": 0.631, |
| "rewards/accuracies": 0.699999988079071, |
| "rewards/chosen": -0.1064453125, |
| "rewards/margins": 0.083984375, |
| "rewards/rejected": -0.1904296875, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.37344398340248963, |
| "grad_norm": 8.152087874731409, |
| "learning_rate": 4.991076551440359e-07, |
| "logits/chosen": -1.2890625, |
| "logits/rejected": -1.2421875, |
| "logps/chosen": -450.0, |
| "logps/rejected": -448.0, |
| "loss": 0.6192, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": 0.021484375, |
| "rewards/margins": 0.1630859375, |
| "rewards/rejected": -0.1416015625, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.3941908713692946, |
| "grad_norm": 8.294099174165812, |
| "learning_rate": 4.985254727224266e-07, |
| "logits/chosen": -1.2578125, |
| "logits/rejected": -1.21875, |
| "logps/chosen": -436.0, |
| "logps/rejected": -402.0, |
| "loss": 0.6019, |
| "rewards/accuracies": 0.699999988079071, |
| "rewards/chosen": 0.146484375, |
| "rewards/margins": 0.232421875, |
| "rewards/rejected": -0.08642578125, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.4149377593360996, |
| "grad_norm": 8.447421779277066, |
| "learning_rate": 4.977983814349285e-07, |
| "logits/chosen": -1.3125, |
| "logits/rejected": -1.359375, |
| "logps/chosen": -468.0, |
| "logps/rejected": -396.0, |
| "loss": 0.608, |
| "rewards/accuracies": 0.699999988079071, |
| "rewards/chosen": -0.220703125, |
| "rewards/margins": 0.0311279296875, |
| "rewards/rejected": -0.251953125, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.43568464730290457, |
| "grad_norm": 9.723114067333197, |
| "learning_rate": 4.969268052299307e-07, |
| "logits/chosen": -1.1796875, |
| "logits/rejected": -1.234375, |
| "logps/chosen": -356.0, |
| "logps/rejected": -366.0, |
| "loss": 0.5878, |
| "rewards/accuracies": 0.699999988079071, |
| "rewards/chosen": -0.1279296875, |
| "rewards/margins": 0.29296875, |
| "rewards/rejected": -0.421875, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.45643153526970953, |
| "grad_norm": 10.716738710931661, |
| "learning_rate": 4.959112523012938e-07, |
| "logits/chosen": -1.328125, |
| "logits/rejected": -1.359375, |
| "logps/chosen": -480.0, |
| "logps/rejected": -460.0, |
| "loss": 0.5766, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -0.328125, |
| "rewards/margins": 0.3671875, |
| "rewards/rejected": -0.6953125, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.47717842323651455, |
| "grad_norm": 9.819110125640536, |
| "learning_rate": 4.947523147920345e-07, |
| "logits/chosen": -1.3203125, |
| "logits/rejected": -1.234375, |
| "logps/chosen": -548.0, |
| "logps/rejected": -450.0, |
| "loss": 0.5689, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -0.68359375, |
| "rewards/margins": 0.330078125, |
| "rewards/rejected": -1.015625, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.4979253112033195, |
| "grad_norm": 11.406864477616395, |
| "learning_rate": 4.934506684490621e-07, |
| "logits/chosen": -1.2109375, |
| "logits/rejected": -1.1953125, |
| "logps/chosen": -448.0, |
| "logps/rejected": -442.0, |
| "loss": 0.5737, |
| "rewards/accuracies": 0.699999988079071, |
| "rewards/chosen": -0.6796875, |
| "rewards/margins": 0.375, |
| "rewards/rejected": -1.0546875, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.5186721991701245, |
| "grad_norm": 12.451204475119791, |
| "learning_rate": 4.920070722291682e-07, |
| "logits/chosen": -1.3515625, |
| "logits/rejected": -1.4140625, |
| "logps/chosen": -520.0, |
| "logps/rejected": -552.0, |
| "loss": 0.5527, |
| "rewards/accuracies": 0.800000011920929, |
| "rewards/chosen": -0.8515625, |
| "rewards/margins": 0.7890625, |
| "rewards/rejected": -1.640625, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.5394190871369294, |
| "grad_norm": 14.291210178551019, |
| "learning_rate": 4.904223678564975e-07, |
| "logits/chosen": -1.171875, |
| "logits/rejected": -1.1015625, |
| "logps/chosen": -498.0, |
| "logps/rejected": -450.0, |
| "loss": 0.5554, |
| "rewards/accuracies": 0.699999988079071, |
| "rewards/chosen": -0.953125, |
| "rewards/margins": 0.32421875, |
| "rewards/rejected": -1.2734375, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.5601659751037344, |
| "grad_norm": 13.863578096384135, |
| "learning_rate": 4.886974793317607e-07, |
| "logits/chosen": -1.171875, |
| "logits/rejected": -1.28125, |
| "logps/chosen": -516.0, |
| "logps/rejected": -532.0, |
| "loss": 0.5048, |
| "rewards/accuracies": 0.699999988079071, |
| "rewards/chosen": -0.8203125, |
| "rewards/margins": 0.875, |
| "rewards/rejected": -1.6875, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.5809128630705395, |
| "grad_norm": 12.53026425282876, |
| "learning_rate": 4.86833412393473e-07, |
| "logits/chosen": -1.0703125, |
| "logits/rejected": -1.1328125, |
| "logps/chosen": -432.0, |
| "logps/rejected": -452.0, |
| "loss": 0.5557, |
| "rewards/accuracies": 0.6499999761581421, |
| "rewards/chosen": -0.68359375, |
| "rewards/margins": 0.80078125, |
| "rewards/rejected": -1.4921875, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.6016597510373444, |
| "grad_norm": 14.022892671657644, |
| "learning_rate": 4.848312539315334e-07, |
| "logits/chosen": -1.375, |
| "logits/rejected": -1.359375, |
| "logps/chosen": -488.0, |
| "logps/rejected": -454.0, |
| "loss": 0.5069, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -0.7890625, |
| "rewards/margins": 0.55859375, |
| "rewards/rejected": -1.34375, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.6224066390041494, |
| "grad_norm": 16.89545464924121, |
| "learning_rate": 4.826921713534873e-07, |
| "logits/chosen": -1.2265625, |
| "logits/rejected": -1.28125, |
| "logps/chosen": -520.0, |
| "logps/rejected": -548.0, |
| "loss": 0.5104, |
| "rewards/accuracies": 0.800000011920929, |
| "rewards/chosen": -0.67578125, |
| "rewards/margins": 0.74609375, |
| "rewards/rejected": -1.421875, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.6431535269709544, |
| "grad_norm": 14.047474903550272, |
| "learning_rate": 4.804174119038404e-07, |
| "logits/chosen": -1.171875, |
| "logits/rejected": -1.15625, |
| "logps/chosen": -472.0, |
| "logps/rejected": -498.0, |
| "loss": 0.5325, |
| "rewards/accuracies": 0.8500000238418579, |
| "rewards/chosen": -0.640625, |
| "rewards/margins": 0.69140625, |
| "rewards/rejected": -1.328125, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.6639004149377593, |
| "grad_norm": 11.967189628642249, |
| "learning_rate": 4.78008301936823e-07, |
| "logits/chosen": -1.2265625, |
| "logits/rejected": -1.1875, |
| "logps/chosen": -458.0, |
| "logps/rejected": -504.0, |
| "loss": 0.514, |
| "rewards/accuracies": 0.800000011920929, |
| "rewards/chosen": -0.59375, |
| "rewards/margins": 0.734375, |
| "rewards/rejected": -1.328125, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.6846473029045643, |
| "grad_norm": 13.820746186375771, |
| "learning_rate": 4.754662461430258e-07, |
| "logits/chosen": -1.3203125, |
| "logits/rejected": -1.328125, |
| "logps/chosen": -612.0, |
| "logps/rejected": -556.0, |
| "loss": 0.5133, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -1.15625, |
| "rewards/margins": 0.69140625, |
| "rewards/rejected": -1.84375, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.7053941908713693, |
| "grad_norm": 15.47060039648899, |
| "learning_rate": 4.727927267303612e-07, |
| "logits/chosen": -1.171875, |
| "logits/rejected": -1.09375, |
| "logps/chosen": -452.0, |
| "logps/rejected": -454.0, |
| "loss": 0.5407, |
| "rewards/accuracies": 0.550000011920929, |
| "rewards/chosen": -1.265625, |
| "rewards/margins": 0.5234375, |
| "rewards/rejected": -1.7890625, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.7261410788381742, |
| "grad_norm": 11.835480559229415, |
| "learning_rate": 4.699893025598255e-07, |
| "logits/chosen": -1.1875, |
| "logits/rejected": -1.1953125, |
| "logps/chosen": -484.0, |
| "logps/rejected": -490.0, |
| "loss": 0.5124, |
| "rewards/accuracies": 0.699999988079071, |
| "rewards/chosen": -1.1796875, |
| "rewards/margins": 0.384765625, |
| "rewards/rejected": -1.5703125, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.7468879668049793, |
| "grad_norm": 17.32570297851737, |
| "learning_rate": 4.67057608236567e-07, |
| "logits/chosen": -1.0703125, |
| "logits/rejected": -1.0, |
| "logps/chosen": -436.0, |
| "logps/rejected": -468.0, |
| "loss": 0.4606, |
| "rewards/accuracies": 0.8500000238418579, |
| "rewards/chosen": -1.234375, |
| "rewards/margins": 0.796875, |
| "rewards/rejected": -2.03125, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.7676348547717843, |
| "grad_norm": 14.712913827549949, |
| "learning_rate": 4.6399935315678893e-07, |
| "logits/chosen": -1.0703125, |
| "logits/rejected": -1.0859375, |
| "logps/chosen": -552.0, |
| "logps/rejected": -498.0, |
| "loss": 0.4847, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -1.78125, |
| "rewards/margins": 0.6328125, |
| "rewards/rejected": -2.40625, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.7883817427385892, |
| "grad_norm": 15.253980446488892, |
| "learning_rate": 4.608163205110447e-07, |
| "logits/chosen": -1.3125, |
| "logits/rejected": -1.359375, |
| "logps/chosen": -544.0, |
| "logps/rejected": -572.0, |
| "loss": 0.4847, |
| "rewards/accuracies": 0.6000000238418579, |
| "rewards/chosen": -0.890625, |
| "rewards/margins": 0.765625, |
| "rewards/rejected": -1.65625, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.8091286307053942, |
| "grad_norm": 14.996759932569487, |
| "learning_rate": 4.5751036624450445e-07, |
| "logits/chosen": -1.4140625, |
| "logits/rejected": -1.34375, |
| "logps/chosen": -552.0, |
| "logps/rejected": -540.0, |
| "loss": 0.4827, |
| "rewards/accuracies": 0.6499999761581421, |
| "rewards/chosen": -1.046875, |
| "rewards/margins": 0.375, |
| "rewards/rejected": -1.421875, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.8298755186721992, |
| "grad_norm": 16.766050665345595, |
| "learning_rate": 4.540834179748012e-07, |
| "logits/chosen": -1.171875, |
| "logits/rejected": -1.25, |
| "logps/chosen": -420.0, |
| "logps/rejected": -528.0, |
| "loss": 0.4556, |
| "rewards/accuracies": 0.800000011920929, |
| "rewards/chosen": -1.21875, |
| "rewards/margins": 0.60546875, |
| "rewards/rejected": -1.8203125, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.8506224066390041, |
| "grad_norm": 19.558534780127147, |
| "learning_rate": 4.5053747386808564e-07, |
| "logits/chosen": -1.1953125, |
| "logits/rejected": -1.3203125, |
| "logps/chosen": -508.0, |
| "logps/rejected": -688.0, |
| "loss": 0.4707, |
| "rewards/accuracies": 0.699999988079071, |
| "rewards/chosen": -1.3671875, |
| "rewards/margins": 0.9296875, |
| "rewards/rejected": -2.296875, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.8713692946058091, |
| "grad_norm": 14.183474677606634, |
| "learning_rate": 4.4687460147394706e-07, |
| "logits/chosen": -1.3125, |
| "logits/rejected": -1.3515625, |
| "logps/chosen": -532.0, |
| "logps/rejected": -588.0, |
| "loss": 0.4869, |
| "rewards/accuracies": 0.800000011920929, |
| "rewards/chosen": -1.1796875, |
| "rewards/margins": 0.73046875, |
| "rewards/rejected": -1.9140625, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.8921161825726142, |
| "grad_norm": 13.143331743152638, |
| "learning_rate": 4.4309693651987726e-07, |
| "logits/chosen": -1.328125, |
| "logits/rejected": -1.28125, |
| "logps/chosen": -624.0, |
| "logps/rejected": -600.0, |
| "loss": 0.4787, |
| "rewards/accuracies": 0.8500000238418579, |
| "rewards/chosen": -1.1484375, |
| "rewards/margins": 1.0390625, |
| "rewards/rejected": -2.1875, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.9128630705394191, |
| "grad_norm": 16.10745504932835, |
| "learning_rate": 4.3920668166598273e-07, |
| "logits/chosen": -1.3671875, |
| "logits/rejected": -1.40625, |
| "logps/chosen": -476.0, |
| "logps/rejected": -520.0, |
| "loss": 0.423, |
| "rewards/accuracies": 0.6499999761581421, |
| "rewards/chosen": -1.0703125, |
| "rewards/margins": 0.88671875, |
| "rewards/rejected": -1.9609375, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.9336099585062241, |
| "grad_norm": 17.6229119736944, |
| "learning_rate": 4.352061052206695e-07, |
| "logits/chosen": -1.28125, |
| "logits/rejected": -1.2578125, |
| "logps/chosen": -580.0, |
| "logps/rejected": -644.0, |
| "loss": 0.4367, |
| "rewards/accuracies": 0.800000011920929, |
| "rewards/chosen": -2.140625, |
| "rewards/margins": 0.88671875, |
| "rewards/rejected": -3.03125, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.9543568464730291, |
| "grad_norm": 27.688196316137688, |
| "learning_rate": 4.3109753981805045e-07, |
| "logits/chosen": -1.3515625, |
| "logits/rejected": -1.3828125, |
| "logps/chosen": -696.0, |
| "logps/rejected": -692.0, |
| "loss": 0.4332, |
| "rewards/accuracies": 0.800000011920929, |
| "rewards/chosen": -1.890625, |
| "rewards/margins": 1.3125, |
| "rewards/rejected": -3.1875, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.975103734439834, |
| "grad_norm": 19.37927464187824, |
| "learning_rate": 4.2688338105784584e-07, |
| "logits/chosen": -1.203125, |
| "logits/rejected": -1.2890625, |
| "logps/chosen": -556.0, |
| "logps/rejected": -568.0, |
| "loss": 0.4428, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -1.2890625, |
| "rewards/margins": 1.09375, |
| "rewards/rejected": -2.390625, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.995850622406639, |
| "grad_norm": 16.81355148557565, |
| "learning_rate": 4.2256608610857014e-07, |
| "logits/chosen": -1.3984375, |
| "logits/rejected": -1.390625, |
| "logps/chosen": -604.0, |
| "logps/rejected": -576.0, |
| "loss": 0.4534, |
| "rewards/accuracies": 0.8500000238418579, |
| "rewards/chosen": -1.3828125, |
| "rewards/margins": 1.0390625, |
| "rewards/rejected": -2.421875, |
| "step": 480 |
| }, |
| { |
| "epoch": 1.016597510373444, |
| "grad_norm": 22.885104807982884, |
| "learning_rate": 4.181481722748197e-07, |
| "logits/chosen": -1.3671875, |
| "logits/rejected": -1.328125, |
| "logps/chosen": -446.0, |
| "logps/rejected": -548.0, |
| "loss": 0.3546, |
| "rewards/accuracies": 0.8500000238418579, |
| "rewards/chosen": -1.4375, |
| "rewards/margins": 1.0546875, |
| "rewards/rejected": -2.5, |
| "step": 490 |
| }, |
| { |
| "epoch": 1.037344398340249, |
| "grad_norm": 28.388093085355763, |
| "learning_rate": 4.136322155294968e-07, |
| "logits/chosen": -1.1875, |
| "logits/rejected": -1.3046875, |
| "logps/chosen": -656.0, |
| "logps/rejected": -740.0, |
| "loss": 0.3066, |
| "rewards/accuracies": 0.8999999761581421, |
| "rewards/chosen": -2.25, |
| "rewards/margins": 1.2890625, |
| "rewards/rejected": -3.53125, |
| "step": 500 |
| }, |
| { |
| "epoch": 1.058091286307054, |
| "grad_norm": 18.000547586432397, |
| "learning_rate": 4.090208490118253e-07, |
| "logits/chosen": -1.21875, |
| "logits/rejected": -1.3671875, |
| "logps/chosen": -792.0, |
| "logps/rejected": -820.0, |
| "loss": 0.3002, |
| "rewards/accuracies": 0.8500000238418579, |
| "rewards/chosen": -2.234375, |
| "rewards/margins": 1.640625, |
| "rewards/rejected": -3.859375, |
| "step": 510 |
| }, |
| { |
| "epoch": 1.0788381742738589, |
| "grad_norm": 26.116853952186087, |
| "learning_rate": 4.0431676149203457e-07, |
| "logits/chosen": -1.25, |
| "logits/rejected": -1.2421875, |
| "logps/chosen": -544.0, |
| "logps/rejected": -660.0, |
| "loss": 0.305, |
| "rewards/accuracies": 0.949999988079071, |
| "rewards/chosen": -1.8828125, |
| "rewards/margins": 1.9453125, |
| "rewards/rejected": -3.828125, |
| "step": 520 |
| }, |
| { |
| "epoch": 1.099585062240664, |
| "grad_norm": 20.665661382654836, |
| "learning_rate": 3.995226958036058e-07, |
| "logits/chosen": -1.0625, |
| "logits/rejected": -1.125, |
| "logps/chosen": -628.0, |
| "logps/rejected": -728.0, |
| "loss": 0.282, |
| "rewards/accuracies": 0.800000011920929, |
| "rewards/chosen": -2.53125, |
| "rewards/margins": 1.625, |
| "rewards/rejected": -4.15625, |
| "step": 530 |
| }, |
| { |
| "epoch": 1.120331950207469, |
| "grad_norm": 20.874625813560073, |
| "learning_rate": 3.9464144724399605e-07, |
| "logits/chosen": -1.1328125, |
| "logits/rejected": -1.109375, |
| "logps/chosen": -656.0, |
| "logps/rejected": -780.0, |
| "loss": 0.2842, |
| "rewards/accuracies": 0.949999988079071, |
| "rewards/chosen": -1.9140625, |
| "rewards/margins": 2.1875, |
| "rewards/rejected": -4.09375, |
| "step": 540 |
| }, |
| { |
| "epoch": 1.1410788381742738, |
| "grad_norm": 15.838820640241945, |
| "learning_rate": 3.896758619447714e-07, |
| "logits/chosen": -1.1796875, |
| "logits/rejected": -1.21875, |
| "logps/chosen": -608.0, |
| "logps/rejected": -804.0, |
| "loss": 0.2805, |
| "rewards/accuracies": 0.8999999761581421, |
| "rewards/chosen": -2.484375, |
| "rewards/margins": 2.078125, |
| "rewards/rejected": -4.5625, |
| "step": 550 |
| }, |
| { |
| "epoch": 1.161825726141079, |
| "grad_norm": 16.332425845632645, |
| "learning_rate": 3.846288352121003e-07, |
| "logits/chosen": -1.3203125, |
| "logits/rejected": -1.265625, |
| "logps/chosen": -696.0, |
| "logps/rejected": -712.0, |
| "loss": 0.3048, |
| "rewards/accuracies": 0.8500000238418579, |
| "rewards/chosen": -2.765625, |
| "rewards/margins": 1.0234375, |
| "rewards/rejected": -3.78125, |
| "step": 560 |
| }, |
| { |
| "epoch": 1.1825726141078838, |
| "grad_norm": 17.929954538214847, |
| "learning_rate": 3.795033098385744e-07, |
| "logits/chosen": -1.28125, |
| "logits/rejected": -1.3125, |
| "logps/chosen": -620.0, |
| "logps/rejected": -832.0, |
| "loss": 0.2894, |
| "rewards/accuracies": 0.8500000238418579, |
| "rewards/chosen": -2.609375, |
| "rewards/margins": 1.71875, |
| "rewards/rejected": -4.34375, |
| "step": 570 |
| }, |
| { |
| "epoch": 1.2033195020746887, |
| "grad_norm": 32.79066665207477, |
| "learning_rate": 3.7430227438734086e-07, |
| "logits/chosen": -1.2265625, |
| "logits/rejected": -1.1875, |
| "logps/chosen": -656.0, |
| "logps/rejected": -764.0, |
| "loss": 0.2766, |
| "rewards/accuracies": 0.949999988079071, |
| "rewards/chosen": -2.640625, |
| "rewards/margins": 1.609375, |
| "rewards/rejected": -4.25, |
| "step": 580 |
| }, |
| { |
| "epoch": 1.2240663900414939, |
| "grad_norm": 18.82509345910388, |
| "learning_rate": 3.690287614495481e-07, |
| "logits/chosen": -1.328125, |
| "logits/rejected": -1.3203125, |
| "logps/chosen": -740.0, |
| "logps/rejected": -920.0, |
| "loss": 0.2188, |
| "rewards/accuracies": 0.8500000238418579, |
| "rewards/chosen": -2.75, |
| "rewards/margins": 2.296875, |
| "rewards/rejected": -5.0625, |
| "step": 590 |
| }, |
| { |
| "epoch": 1.2448132780082988, |
| "grad_norm": 22.18730206624369, |
| "learning_rate": 3.6368584587611854e-07, |
| "logits/chosen": -1.2734375, |
| "logits/rejected": -1.3203125, |
| "logps/chosen": -692.0, |
| "logps/rejected": -920.0, |
| "loss": 0.2944, |
| "rewards/accuracies": 0.8999999761581421, |
| "rewards/chosen": -3.125, |
| "rewards/margins": 2.28125, |
| "rewards/rejected": -5.40625, |
| "step": 600 |
| }, |
| { |
| "epoch": 1.2655601659751037, |
| "grad_norm": 16.250841109003115, |
| "learning_rate": 3.582766429848818e-07, |
| "logits/chosen": -1.34375, |
| "logits/rejected": -1.375, |
| "logps/chosen": -652.0, |
| "logps/rejected": -756.0, |
| "loss": 0.2651, |
| "rewards/accuracies": 0.8999999761581421, |
| "rewards/chosen": -2.59375, |
| "rewards/margins": 1.75, |
| "rewards/rejected": -4.34375, |
| "step": 610 |
| }, |
| { |
| "epoch": 1.2863070539419086, |
| "grad_norm": 32.2790592968402, |
| "learning_rate": 3.528043067441123e-07, |
| "logits/chosen": -1.2265625, |
| "logits/rejected": -1.28125, |
| "logps/chosen": -516.0, |
| "logps/rejected": -724.0, |
| "loss": 0.2655, |
| "rewards/accuracies": 0.800000011920929, |
| "rewards/chosen": -2.5, |
| "rewards/margins": 1.7734375, |
| "rewards/rejected": -4.28125, |
| "step": 620 |
| }, |
| { |
| "epoch": 1.3070539419087137, |
| "grad_norm": 21.008974688174074, |
| "learning_rate": 3.472720279335305e-07, |
| "logits/chosen": -1.3515625, |
| "logits/rejected": -1.3125, |
| "logps/chosen": -716.0, |
| "logps/rejected": -856.0, |
| "loss": 0.2614, |
| "rewards/accuracies": 0.8500000238418579, |
| "rewards/chosen": -3.0, |
| "rewards/margins": 1.8828125, |
| "rewards/rejected": -4.875, |
| "step": 630 |
| }, |
| { |
| "epoch": 1.3278008298755186, |
| "grad_norm": 24.817331119096536, |
| "learning_rate": 3.4168303228384097e-07, |
| "logits/chosen": -1.2578125, |
| "logits/rejected": -1.34375, |
| "logps/chosen": -744.0, |
| "logps/rejected": -880.0, |
| "loss": 0.2602, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -2.984375, |
| "rewards/margins": 1.375, |
| "rewards/rejected": -4.375, |
| "step": 640 |
| }, |
| { |
| "epoch": 1.3485477178423237, |
| "grad_norm": 16.172899710422996, |
| "learning_rate": 3.36040578595891e-07, |
| "logits/chosen": -1.2109375, |
| "logits/rejected": -1.234375, |
| "logps/chosen": -704.0, |
| "logps/rejected": -928.0, |
| "loss": 0.2566, |
| "rewards/accuracies": 0.8999999761581421, |
| "rewards/chosen": -2.8125, |
| "rewards/margins": 2.09375, |
| "rewards/rejected": -4.90625, |
| "step": 650 |
| }, |
| { |
| "epoch": 1.3692946058091287, |
| "grad_norm": 28.674908041856455, |
| "learning_rate": 3.303479568405467e-07, |
| "logits/chosen": -1.2578125, |
| "logits/rejected": -1.34375, |
| "logps/chosen": -692.0, |
| "logps/rejected": -712.0, |
| "loss": 0.248, |
| "rewards/accuracies": 0.8500000238418579, |
| "rewards/chosen": -2.59375, |
| "rewards/margins": 1.40625, |
| "rewards/rejected": -4.0, |
| "step": 660 |
| }, |
| { |
| "epoch": 1.3900414937759336, |
| "grad_norm": 18.929031877783473, |
| "learning_rate": 3.246084862403949e-07, |
| "logits/chosen": -1.0546875, |
| "logits/rejected": -1.1875, |
| "logps/chosen": -724.0, |
| "logps/rejected": -856.0, |
| "loss": 0.2285, |
| "rewards/accuracies": 0.949999988079071, |
| "rewards/chosen": -2.609375, |
| "rewards/margins": 2.390625, |
| "rewards/rejected": -5.0, |
| "step": 670 |
| }, |
| { |
| "epoch": 1.4107883817427385, |
| "grad_norm": 27.863022449197903, |
| "learning_rate": 3.188255133343896e-07, |
| "logits/chosen": -1.3359375, |
| "logits/rejected": -1.390625, |
| "logps/chosen": -744.0, |
| "logps/rejected": -1012.0, |
| "loss": 0.2616, |
| "rewards/accuracies": 0.949999988079071, |
| "rewards/chosen": -3.328125, |
| "rewards/margins": 2.359375, |
| "rewards/rejected": -5.6875, |
| "step": 680 |
| }, |
| { |
| "epoch": 1.4315352697095436, |
| "grad_norm": 20.6381381654102, |
| "learning_rate": 3.1300241002656964e-07, |
| "logits/chosen": -1.2578125, |
| "logits/rejected": -1.3125, |
| "logps/chosen": -744.0, |
| "logps/rejected": -932.0, |
| "loss": 0.2343, |
| "rewards/accuracies": 0.8999999761581421, |
| "rewards/chosen": -3.046875, |
| "rewards/margins": 2.515625, |
| "rewards/rejected": -5.5625, |
| "step": 690 |
| }, |
| { |
| "epoch": 1.4522821576763485, |
| "grad_norm": 21.416162109964308, |
| "learning_rate": 3.071425716199882e-07, |
| "logits/chosen": -1.265625, |
| "logits/rejected": -1.3203125, |
| "logps/chosen": -708.0, |
| "logps/rejected": -940.0, |
| "loss": 0.2137, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -2.765625, |
| "rewards/margins": 2.734375, |
| "rewards/rejected": -5.5, |
| "step": 700 |
| }, |
| { |
| "epoch": 1.4730290456431536, |
| "grad_norm": 27.537624866222753, |
| "learning_rate": 3.0124941483699753e-07, |
| "logits/chosen": -1.34375, |
| "logits/rejected": -1.34375, |
| "logps/chosen": -892.0, |
| "logps/rejected": -1064.0, |
| "loss": 0.2099, |
| "rewards/accuracies": 0.8500000238418579, |
| "rewards/chosen": -4.34375, |
| "rewards/margins": 1.875, |
| "rewards/rejected": -6.21875, |
| "step": 710 |
| }, |
| { |
| "epoch": 1.4937759336099585, |
| "grad_norm": 17.582919177615466, |
| "learning_rate": 2.953263758270459e-07, |
| "logits/chosen": -1.21875, |
| "logits/rejected": -1.328125, |
| "logps/chosen": -612.0, |
| "logps/rejected": -720.0, |
| "loss": 0.236, |
| "rewards/accuracies": 0.949999988079071, |
| "rewards/chosen": -2.390625, |
| "rewards/margins": 1.9453125, |
| "rewards/rejected": -4.34375, |
| "step": 720 |
| }, |
| { |
| "epoch": 1.5145228215767634, |
| "grad_norm": 33.007588469499844, |
| "learning_rate": 2.8937690816314577e-07, |
| "logits/chosen": -1.328125, |
| "logits/rejected": -1.390625, |
| "logps/chosen": -744.0, |
| "logps/rejected": -980.0, |
| "loss": 0.2203, |
| "rewards/accuracies": 0.949999988079071, |
| "rewards/chosen": -3.515625, |
| "rewards/margins": 2.5625, |
| "rewards/rejected": -6.0625, |
| "step": 730 |
| }, |
| { |
| "epoch": 1.5352697095435683, |
| "grad_norm": 24.645168136425823, |
| "learning_rate": 2.834044808281841e-07, |
| "logits/chosen": -1.3046875, |
| "logits/rejected": -1.4375, |
| "logps/chosen": -664.0, |
| "logps/rejected": -868.0, |
| "loss": 0.229, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -2.6875, |
| "rewards/margins": 2.75, |
| "rewards/rejected": -5.4375, |
| "step": 740 |
| }, |
| { |
| "epoch": 1.5560165975103735, |
| "grad_norm": 19.904894364928772, |
| "learning_rate": 2.774125761922463e-07, |
| "logits/chosen": -1.1953125, |
| "logits/rejected": -1.21875, |
| "logps/chosen": -580.0, |
| "logps/rejected": -884.0, |
| "loss": 0.2046, |
| "rewards/accuracies": 0.8999999761581421, |
| "rewards/chosen": -2.453125, |
| "rewards/margins": 2.90625, |
| "rewards/rejected": -5.34375, |
| "step": 750 |
| }, |
| { |
| "epoch": 1.5767634854771784, |
| "grad_norm": 30.15619672097981, |
| "learning_rate": 2.714046879821358e-07, |
| "logits/chosen": -1.1875, |
| "logits/rejected": -1.328125, |
| "logps/chosen": -716.0, |
| "logps/rejected": -992.0, |
| "loss": 0.2351, |
| "rewards/accuracies": 0.949999988079071, |
| "rewards/chosen": -3.484375, |
| "rewards/margins": 2.9375, |
| "rewards/rejected": -6.40625, |
| "step": 760 |
| }, |
| { |
| "epoch": 1.5975103734439835, |
| "grad_norm": 27.783614402532425, |
| "learning_rate": 2.653843192442699e-07, |
| "logits/chosen": -1.2734375, |
| "logits/rejected": -1.234375, |
| "logps/chosen": -724.0, |
| "logps/rejected": -860.0, |
| "loss": 0.2573, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -3.203125, |
| "rewards/margins": 2.046875, |
| "rewards/rejected": -5.25, |
| "step": 770 |
| }, |
| { |
| "epoch": 1.6182572614107884, |
| "grad_norm": 19.560666870988193, |
| "learning_rate": 2.5935498030214397e-07, |
| "logits/chosen": -1.3515625, |
| "logits/rejected": -1.40625, |
| "logps/chosen": -700.0, |
| "logps/rejected": -860.0, |
| "loss": 0.2086, |
| "rewards/accuracies": 0.8500000238418579, |
| "rewards/chosen": -3.0, |
| "rewards/margins": 1.8828125, |
| "rewards/rejected": -4.875, |
| "step": 780 |
| }, |
| { |
| "epoch": 1.6390041493775933, |
| "grad_norm": 21.195930951645135, |
| "learning_rate": 2.533201867095504e-07, |
| "logits/chosen": -1.375, |
| "logits/rejected": -1.34375, |
| "logps/chosen": -756.0, |
| "logps/rejected": -1048.0, |
| "loss": 0.2734, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -3.53125, |
| "rewards/margins": 3.640625, |
| "rewards/rejected": -7.1875, |
| "step": 790 |
| }, |
| { |
| "epoch": 1.6597510373443982, |
| "grad_norm": 20.462891895112204, |
| "learning_rate": 2.472834572007493e-07, |
| "logits/chosen": -1.4140625, |
| "logits/rejected": -1.4765625, |
| "logps/chosen": -728.0, |
| "logps/rejected": -872.0, |
| "loss": 0.2306, |
| "rewards/accuracies": 0.8999999761581421, |
| "rewards/chosen": -2.421875, |
| "rewards/margins": 2.84375, |
| "rewards/rejected": -5.28125, |
| "step": 800 |
| }, |
| { |
| "epoch": 1.6804979253112033, |
| "grad_norm": 23.905476075586677, |
| "learning_rate": 2.4124831163878427e-07, |
| "logits/chosen": -1.234375, |
| "logits/rejected": -1.3125, |
| "logps/chosen": -700.0, |
| "logps/rejected": -936.0, |
| "loss": 0.2081, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -3.3125, |
| "rewards/margins": 2.609375, |
| "rewards/rejected": -5.9375, |
| "step": 810 |
| }, |
| { |
| "epoch": 1.7012448132780082, |
| "grad_norm": 23.605115040621836, |
| "learning_rate": 2.3521826896313965e-07, |
| "logits/chosen": -1.2734375, |
| "logits/rejected": -1.4140625, |
| "logps/chosen": -704.0, |
| "logps/rejected": -1104.0, |
| "loss": 0.2039, |
| "rewards/accuracies": 0.949999988079071, |
| "rewards/chosen": -3.59375, |
| "rewards/margins": 4.0, |
| "rewards/rejected": -7.59375, |
| "step": 820 |
| }, |
| { |
| "epoch": 1.7219917012448134, |
| "grad_norm": 25.173345988155088, |
| "learning_rate": 2.2919684513793704e-07, |
| "logits/chosen": -1.2421875, |
| "logits/rejected": -1.4296875, |
| "logps/chosen": -736.0, |
| "logps/rejected": -972.0, |
| "loss": 0.2168, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -3.609375, |
| "rewards/margins": 2.84375, |
| "rewards/rejected": -6.4375, |
| "step": 830 |
| }, |
| { |
| "epoch": 1.7427385892116183, |
| "grad_norm": 18.48915801624125, |
| "learning_rate": 2.2318755110186602e-07, |
| "logits/chosen": -1.2734375, |
| "logits/rejected": -1.3671875, |
| "logps/chosen": -688.0, |
| "logps/rejected": -828.0, |
| "loss": 0.2236, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -2.4375, |
| "rewards/margins": 2.546875, |
| "rewards/rejected": -5.0, |
| "step": 840 |
| }, |
| { |
| "epoch": 1.7634854771784232, |
| "grad_norm": 18.093111350178464, |
| "learning_rate": 2.171938907210457e-07, |
| "logits/chosen": -1.2421875, |
| "logits/rejected": -1.2109375, |
| "logps/chosen": -632.0, |
| "logps/rejected": -904.0, |
| "loss": 0.203, |
| "rewards/accuracies": 0.949999988079071, |
| "rewards/chosen": -2.890625, |
| "rewards/margins": 2.390625, |
| "rewards/rejected": -5.28125, |
| "step": 850 |
| }, |
| { |
| "epoch": 1.784232365145228, |
| "grad_norm": 19.412483996631583, |
| "learning_rate": 2.1121935874600914e-07, |
| "logits/chosen": -1.2265625, |
| "logits/rejected": -1.3203125, |
| "logps/chosen": -704.0, |
| "logps/rejected": -944.0, |
| "loss": 0.2065, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -3.25, |
| "rewards/margins": 2.71875, |
| "rewards/rejected": -5.96875, |
| "step": 860 |
| }, |
| { |
| "epoch": 1.8049792531120332, |
| "grad_norm": 27.072030223868076, |
| "learning_rate": 2.052674387740039e-07, |
| "logits/chosen": -1.34375, |
| "logits/rejected": -1.3046875, |
| "logps/chosen": -736.0, |
| "logps/rejected": -1008.0, |
| "loss": 0.2191, |
| "rewards/accuracies": 0.949999988079071, |
| "rewards/chosen": -3.5, |
| "rewards/margins": 2.875, |
| "rewards/rejected": -6.375, |
| "step": 870 |
| }, |
| { |
| "epoch": 1.8257261410788381, |
| "grad_norm": 23.115028154031677, |
| "learning_rate": 1.9934160121779511e-07, |
| "logits/chosen": -1.140625, |
| "logits/rejected": -1.25, |
| "logps/chosen": -812.0, |
| "logps/rejected": -1000.0, |
| "loss": 0.2042, |
| "rewards/accuracies": 0.949999988079071, |
| "rewards/chosen": -3.765625, |
| "rewards/margins": 2.296875, |
| "rewards/rejected": -6.0625, |
| "step": 880 |
| }, |
| { |
| "epoch": 1.8464730290456433, |
| "grad_norm": 21.41388111371229, |
| "learning_rate": 1.9344530128215644e-07, |
| "logits/chosen": -1.2734375, |
| "logits/rejected": -1.3515625, |
| "logps/chosen": -752.0, |
| "logps/rejected": -852.0, |
| "loss": 0.2198, |
| "rewards/accuracies": 0.8999999761581421, |
| "rewards/chosen": -2.59375, |
| "rewards/margins": 2.453125, |
| "rewards/rejected": -5.03125, |
| "step": 890 |
| }, |
| { |
| "epoch": 1.8672199170124482, |
| "grad_norm": 21.253233874014462, |
| "learning_rate": 1.8758197694922812e-07, |
| "logits/chosen": -1.25, |
| "logits/rejected": -1.3671875, |
| "logps/chosen": -740.0, |
| "logps/rejected": -944.0, |
| "loss": 0.2285, |
| "rewards/accuracies": 0.8999999761581421, |
| "rewards/chosen": -2.953125, |
| "rewards/margins": 2.078125, |
| "rewards/rejected": -5.03125, |
| "step": 900 |
| }, |
| { |
| "epoch": 1.887966804979253, |
| "grad_norm": 20.368884562679153, |
| "learning_rate": 1.8175504697391728e-07, |
| "logits/chosen": -1.265625, |
| "logits/rejected": -1.296875, |
| "logps/chosen": -852.0, |
| "logps/rejected": -968.0, |
| "loss": 0.1723, |
| "rewards/accuracies": 0.8999999761581421, |
| "rewards/chosen": -3.953125, |
| "rewards/margins": 2.015625, |
| "rewards/rejected": -5.96875, |
| "step": 910 |
| }, |
| { |
| "epoch": 1.908713692946058, |
| "grad_norm": 16.864214805797634, |
| "learning_rate": 1.7596790889050907e-07, |
| "logits/chosen": -1.34375, |
| "logits/rejected": -1.34375, |
| "logps/chosen": -720.0, |
| "logps/rejected": -940.0, |
| "loss": 0.1957, |
| "rewards/accuracies": 0.949999988079071, |
| "rewards/chosen": -3.0625, |
| "rewards/margins": 2.78125, |
| "rewards/rejected": -5.84375, |
| "step": 920 |
| }, |
| { |
| "epoch": 1.929460580912863, |
| "grad_norm": 16.073322912809243, |
| "learning_rate": 1.702239370316515e-07, |
| "logits/chosen": -1.2578125, |
| "logits/rejected": -1.34375, |
| "logps/chosen": -792.0, |
| "logps/rejected": -1056.0, |
| "loss": 0.1968, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -3.5, |
| "rewards/margins": 3.4375, |
| "rewards/rejected": -6.9375, |
| "step": 930 |
| }, |
| { |
| "epoch": 1.950207468879668, |
| "grad_norm": 22.36544108559203, |
| "learning_rate": 1.645264805608674e-07, |
| "logits/chosen": -1.3046875, |
| "logits/rejected": -1.2890625, |
| "logps/chosen": -816.0, |
| "logps/rejected": -1032.0, |
| "loss": 0.1829, |
| "rewards/accuracies": 0.800000011920929, |
| "rewards/chosen": -3.75, |
| "rewards/margins": 2.296875, |
| "rewards/rejected": -6.0625, |
| "step": 940 |
| }, |
| { |
| "epoch": 1.9709543568464731, |
| "grad_norm": 27.387195026328936, |
| "learning_rate": 1.58878861519743e-07, |
| "logits/chosen": -1.21875, |
| "logits/rejected": -1.28125, |
| "logps/chosen": -624.0, |
| "logps/rejected": -928.0, |
| "loss": 0.2129, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -3.125, |
| "rewards/margins": 2.9375, |
| "rewards/rejected": -6.0625, |
| "step": 950 |
| }, |
| { |
| "epoch": 1.991701244813278, |
| "grad_norm": 36.05924674346635, |
| "learning_rate": 1.5328437289093015e-07, |
| "logits/chosen": -1.3203125, |
| "logits/rejected": -1.3515625, |
| "logps/chosen": -712.0, |
| "logps/rejected": -1020.0, |
| "loss": 0.1837, |
| "rewards/accuracies": 0.949999988079071, |
| "rewards/chosen": -3.65625, |
| "rewards/margins": 2.78125, |
| "rewards/rejected": -6.4375, |
| "step": 960 |
| }, |
| { |
| "epoch": 2.012448132780083, |
| "grad_norm": 9.806579172400033, |
| "learning_rate": 1.4774627667809223e-07, |
| "logits/chosen": -1.3046875, |
| "logits/rejected": -1.3515625, |
| "logps/chosen": -700.0, |
| "logps/rejected": -1008.0, |
| "loss": 0.1358, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -3.171875, |
| "rewards/margins": 3.421875, |
| "rewards/rejected": -6.5625, |
| "step": 970 |
| }, |
| { |
| "epoch": 2.033195020746888, |
| "grad_norm": 11.092351348103373, |
| "learning_rate": 1.4226780200391267e-07, |
| "logits/chosen": -0.91796875, |
| "logits/rejected": -1.0, |
| "logps/chosen": -756.0, |
| "logps/rejected": -1160.0, |
| "loss": 0.0784, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -4.0625, |
| "rewards/margins": 3.78125, |
| "rewards/rejected": -7.84375, |
| "step": 980 |
| }, |
| { |
| "epoch": 2.0539419087136928, |
| "grad_norm": 10.816346221719781, |
| "learning_rate": 1.3685214322727596e-07, |
| "logits/chosen": -1.046875, |
| "logits/rejected": -1.203125, |
| "logps/chosen": -880.0, |
| "logps/rejected": -1272.0, |
| "loss": 0.0719, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -4.59375, |
| "rewards/margins": 4.875, |
| "rewards/rejected": -9.4375, |
| "step": 990 |
| }, |
| { |
| "epoch": 2.074688796680498, |
| "grad_norm": 10.729589636729887, |
| "learning_rate": 1.3150245808071854e-07, |
| "logits/chosen": -1.265625, |
| "logits/rejected": -1.265625, |
| "logps/chosen": -1020.0, |
| "logps/rejected": -1288.0, |
| "loss": 0.0733, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -5.21875, |
| "rewards/margins": 3.71875, |
| "rewards/rejected": -8.9375, |
| "step": 1000 |
| }, |
| { |
| "epoch": 2.095435684647303, |
| "grad_norm": 7.237413191543059, |
| "learning_rate": 1.2622186582923566e-07, |
| "logits/chosen": -1.203125, |
| "logits/rejected": -1.28125, |
| "logps/chosen": -996.0, |
| "logps/rejected": -1264.0, |
| "loss": 0.0756, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -4.53125, |
| "rewards/margins": 3.25, |
| "rewards/rejected": -7.78125, |
| "step": 1010 |
| }, |
| { |
| "epoch": 2.116182572614108, |
| "grad_norm": 14.390311697685057, |
| "learning_rate": 1.2101344545151713e-07, |
| "logits/chosen": -1.234375, |
| "logits/rejected": -1.2890625, |
| "logps/chosen": -856.0, |
| "logps/rejected": -1168.0, |
| "loss": 0.0688, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -4.71875, |
| "rewards/margins": 3.578125, |
| "rewards/rejected": -8.3125, |
| "step": 1020 |
| }, |
| { |
| "epoch": 2.136929460580913, |
| "grad_norm": 9.371742649196001, |
| "learning_rate": 1.1588023384467335e-07, |
| "logits/chosen": -1.2578125, |
| "logits/rejected": -1.34375, |
| "logps/chosen": -848.0, |
| "logps/rejected": -1240.0, |
| "loss": 0.0873, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -4.78125, |
| "rewards/margins": 4.53125, |
| "rewards/rejected": -9.3125, |
| "step": 1030 |
| }, |
| { |
| "epoch": 2.1576763485477177, |
| "grad_norm": 13.411003854877661, |
| "learning_rate": 1.1082522405349834e-07, |
| "logits/chosen": -1.21875, |
| "logits/rejected": -1.2734375, |
| "logps/chosen": -744.0, |
| "logps/rejected": -1144.0, |
| "loss": 0.0694, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -3.546875, |
| "rewards/margins": 4.875, |
| "rewards/rejected": -8.4375, |
| "step": 1040 |
| }, |
| { |
| "epoch": 2.1784232365145226, |
| "grad_norm": 10.33738561356746, |
| "learning_rate": 1.0585136352530172e-07, |
| "logits/chosen": -1.4296875, |
| "logits/rejected": -1.484375, |
| "logps/chosen": -876.0, |
| "logps/rejected": -1184.0, |
| "loss": 0.0812, |
| "rewards/accuracies": 0.8999999761581421, |
| "rewards/chosen": -4.5625, |
| "rewards/margins": 3.015625, |
| "rewards/rejected": -7.59375, |
| "step": 1050 |
| }, |
| { |
| "epoch": 2.199170124481328, |
| "grad_norm": 15.189705649161667, |
| "learning_rate": 1.0096155239132675e-07, |
| "logits/chosen": -1.296875, |
| "logits/rejected": -1.328125, |
| "logps/chosen": -672.0, |
| "logps/rejected": -924.0, |
| "loss": 0.0763, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -3.625, |
| "rewards/margins": 3.21875, |
| "rewards/rejected": -6.84375, |
| "step": 1060 |
| }, |
| { |
| "epoch": 2.219917012448133, |
| "grad_norm": 9.763294237607763, |
| "learning_rate": 9.615864177575836e-08, |
| "logits/chosen": -1.2109375, |
| "logits/rejected": -1.3125, |
| "logps/chosen": -1112.0, |
| "logps/rejected": -1456.0, |
| "loss": 0.0735, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -5.53125, |
| "rewards/margins": 5.21875, |
| "rewards/rejected": -10.75, |
| "step": 1070 |
| }, |
| { |
| "epoch": 2.240663900414938, |
| "grad_norm": 12.16820558108209, |
| "learning_rate": 9.144543213330493e-08, |
| "logits/chosen": -1.390625, |
| "logits/rejected": -1.390625, |
| "logps/chosen": -848.0, |
| "logps/rejected": -1296.0, |
| "loss": 0.0737, |
| "rewards/accuracies": 0.949999988079071, |
| "rewards/chosen": -4.3125, |
| "rewards/margins": 4.9375, |
| "rewards/rejected": -9.25, |
| "step": 1080 |
| }, |
| { |
| "epoch": 2.2614107883817427, |
| "grad_norm": 13.349946012287942, |
| "learning_rate": 8.682467161632508e-08, |
| "logits/chosen": -1.3359375, |
| "logits/rejected": -1.421875, |
| "logps/chosen": -764.0, |
| "logps/rejected": -1216.0, |
| "loss": 0.0706, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -4.25, |
| "rewards/margins": 4.34375, |
| "rewards/rejected": -8.5625, |
| "step": 1090 |
| }, |
| { |
| "epoch": 2.2821576763485476, |
| "grad_norm": 20.08687451523014, |
| "learning_rate": 8.229905447244942e-08, |
| "logits/chosen": -1.328125, |
| "logits/rejected": -1.3828125, |
| "logps/chosen": -908.0, |
| "logps/rejected": -1176.0, |
| "loss": 0.0642, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -4.4375, |
| "rewards/margins": 3.53125, |
| "rewards/rejected": -8.0, |
| "step": 1100 |
| }, |
| { |
| "epoch": 2.3029045643153525, |
| "grad_norm": 16.153630097446893, |
| "learning_rate": 7.787121947363393e-08, |
| "logits/chosen": -1.1171875, |
| "logits/rejected": -1.1953125, |
| "logps/chosen": -964.0, |
| "logps/rejected": -1360.0, |
| "loss": 0.0691, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -5.21875, |
| "rewards/margins": 4.34375, |
| "rewards/rejected": -9.5625, |
| "step": 1110 |
| }, |
| { |
| "epoch": 2.323651452282158, |
| "grad_norm": 14.370101853953887, |
| "learning_rate": 7.354374837755919e-08, |
| "logits/chosen": -1.1328125, |
| "logits/rejected": -1.21875, |
| "logps/chosen": -892.0, |
| "logps/rejected": -1272.0, |
| "loss": 0.0749, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -4.8125, |
| "rewards/margins": 4.625, |
| "rewards/rejected": -9.4375, |
| "step": 1120 |
| }, |
| { |
| "epoch": 2.3443983402489628, |
| "grad_norm": 15.871062321763814, |
| "learning_rate": 6.931916442227335e-08, |
| "logits/chosen": -1.28125, |
| "logits/rejected": -1.28125, |
| "logps/chosen": -808.0, |
| "logps/rejected": -1184.0, |
| "loss": 0.068, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -4.34375, |
| "rewards/margins": 4.1875, |
| "rewards/rejected": -8.5, |
| "step": 1130 |
| }, |
| { |
| "epoch": 2.3651452282157677, |
| "grad_norm": 8.312067272107306, |
| "learning_rate": 6.519993085495622e-08, |
| "logits/chosen": -1.1640625, |
| "logits/rejected": -1.3046875, |
| "logps/chosen": -928.0, |
| "logps/rejected": -1440.0, |
| "loss": 0.0656, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -4.96875, |
| "rewards/margins": 5.375, |
| "rewards/rejected": -10.375, |
| "step": 1140 |
| }, |
| { |
| "epoch": 2.3858921161825726, |
| "grad_norm": 10.430795094415418, |
| "learning_rate": 6.118844949566293e-08, |
| "logits/chosen": -1.2578125, |
| "logits/rejected": -1.3515625, |
| "logps/chosen": -900.0, |
| "logps/rejected": -1384.0, |
| "loss": 0.0564, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -4.875, |
| "rewards/margins": 5.71875, |
| "rewards/rejected": -10.5625, |
| "step": 1150 |
| }, |
| { |
| "epoch": 2.4066390041493775, |
| "grad_norm": 22.177489465859505, |
| "learning_rate": 5.728705933688349e-08, |
| "logits/chosen": -1.2265625, |
| "logits/rejected": -1.2890625, |
| "logps/chosen": -908.0, |
| "logps/rejected": -1344.0, |
| "loss": 0.0803, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -4.875, |
| "rewards/margins": 4.28125, |
| "rewards/rejected": -9.125, |
| "step": 1160 |
| }, |
| { |
| "epoch": 2.4273858921161824, |
| "grad_norm": 6.857181586976422, |
| "learning_rate": 5.3498035179736475e-08, |
| "logits/chosen": -1.21875, |
| "logits/rejected": -1.3203125, |
| "logps/chosen": -768.0, |
| "logps/rejected": -1144.0, |
| "loss": 0.0637, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -4.65625, |
| "rewards/margins": 3.875, |
| "rewards/rejected": -8.5625, |
| "step": 1170 |
| }, |
| { |
| "epoch": 2.4481327800829877, |
| "grad_norm": 18.641976214083737, |
| "learning_rate": 4.98235863075899e-08, |
| "logits/chosen": -1.203125, |
| "logits/rejected": -1.234375, |
| "logps/chosen": -844.0, |
| "logps/rejected": -1304.0, |
| "loss": 0.0823, |
| "rewards/accuracies": 0.8999999761581421, |
| "rewards/chosen": -4.75, |
| "rewards/margins": 4.75, |
| "rewards/rejected": -9.5, |
| "step": 1180 |
| }, |
| { |
| "epoch": 2.4688796680497926, |
| "grad_norm": 11.12170347340474, |
| "learning_rate": 4.626585519788476e-08, |
| "logits/chosen": -1.28125, |
| "logits/rejected": -1.28125, |
| "logps/chosen": -776.0, |
| "logps/rejected": -1272.0, |
| "loss": 0.0621, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -3.859375, |
| "rewards/margins": 5.21875, |
| "rewards/rejected": -9.0625, |
| "step": 1190 |
| }, |
| { |
| "epoch": 2.4896265560165975, |
| "grad_norm": 10.533918362287494, |
| "learning_rate": 4.2826916272911154e-08, |
| "logits/chosen": -1.1640625, |
| "logits/rejected": -1.2890625, |
| "logps/chosen": -916.0, |
| "logps/rejected": -1256.0, |
| "loss": 0.0647, |
| "rewards/accuracies": 0.949999988079071, |
| "rewards/chosen": -4.96875, |
| "rewards/margins": 4.09375, |
| "rewards/rejected": -9.0625, |
| "step": 1200 |
| }, |
| { |
| "epoch": 2.5103734439834025, |
| "grad_norm": 11.17720015590518, |
| "learning_rate": 3.950877469026523e-08, |
| "logits/chosen": -1.2734375, |
| "logits/rejected": -1.3046875, |
| "logps/chosen": -1128.0, |
| "logps/rejected": -1528.0, |
| "loss": 0.0642, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -5.625, |
| "rewards/margins": 5.1875, |
| "rewards/rejected": -10.8125, |
| "step": 1210 |
| }, |
| { |
| "epoch": 2.5311203319502074, |
| "grad_norm": 19.957831888824806, |
| "learning_rate": 3.631336517369313e-08, |
| "logits/chosen": -1.1953125, |
| "logits/rejected": -1.328125, |
| "logps/chosen": -768.0, |
| "logps/rejected": -1160.0, |
| "loss": 0.0622, |
| "rewards/accuracies": 0.949999988079071, |
| "rewards/chosen": -4.28125, |
| "rewards/margins": 4.125, |
| "rewards/rejected": -8.4375, |
| "step": 1220 |
| }, |
| { |
| "epoch": 2.5518672199170123, |
| "grad_norm": 17.996461125043027, |
| "learning_rate": 3.3242550885002805e-08, |
| "logits/chosen": -1.1953125, |
| "logits/rejected": -1.2421875, |
| "logps/chosen": -1008.0, |
| "logps/rejected": -1400.0, |
| "loss": 0.0673, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -5.0, |
| "rewards/margins": 5.03125, |
| "rewards/rejected": -10.0, |
| "step": 1230 |
| }, |
| { |
| "epoch": 2.572614107883817, |
| "grad_norm": 12.137212294540161, |
| "learning_rate": 3.029812233770215e-08, |
| "logits/chosen": -1.2421875, |
| "logits/rejected": -1.234375, |
| "logps/chosen": -776.0, |
| "logps/rejected": -1112.0, |
| "loss": 0.0709, |
| "rewards/accuracies": 0.949999988079071, |
| "rewards/chosen": -4.03125, |
| "rewards/margins": 3.65625, |
| "rewards/rejected": -7.6875, |
| "step": 1240 |
| }, |
| { |
| "epoch": 2.5933609958506225, |
| "grad_norm": 6.03151820091559, |
| "learning_rate": 2.74817963529958e-08, |
| "logits/chosen": -1.1171875, |
| "logits/rejected": -1.171875, |
| "logps/chosen": -672.0, |
| "logps/rejected": -1160.0, |
| "loss": 0.0584, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -3.765625, |
| "rewards/margins": 4.8125, |
| "rewards/rejected": -8.5625, |
| "step": 1250 |
| }, |
| { |
| "epoch": 2.6141078838174274, |
| "grad_norm": 25.243724218800992, |
| "learning_rate": 2.479521505875079e-08, |
| "logits/chosen": -1.265625, |
| "logits/rejected": -1.2578125, |
| "logps/chosen": -976.0, |
| "logps/rejected": -1352.0, |
| "loss": 0.0643, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -5.53125, |
| "rewards/margins": 4.8125, |
| "rewards/rejected": -10.375, |
| "step": 1260 |
| }, |
| { |
| "epoch": 2.6348547717842323, |
| "grad_norm": 9.357215641019858, |
| "learning_rate": 2.223994493201342e-08, |
| "logits/chosen": -1.2265625, |
| "logits/rejected": -1.3125, |
| "logps/chosen": -888.0, |
| "logps/rejected": -1272.0, |
| "loss": 0.0645, |
| "rewards/accuracies": 0.949999988079071, |
| "rewards/chosen": -4.84375, |
| "rewards/margins": 4.15625, |
| "rewards/rejected": -9.0, |
| "step": 1270 |
| }, |
| { |
| "epoch": 2.6556016597510372, |
| "grad_norm": 11.426915475943359, |
| "learning_rate": 1.9817475885636868e-08, |
| "logits/chosen": -1.109375, |
| "logits/rejected": -1.265625, |
| "logps/chosen": -924.0, |
| "logps/rejected": -1344.0, |
| "loss": 0.0652, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -4.8125, |
| "rewards/margins": 4.90625, |
| "rewards/rejected": -9.6875, |
| "step": 1280 |
| }, |
| { |
| "epoch": 2.6763485477178426, |
| "grad_norm": 13.561447852898182, |
| "learning_rate": 1.7529220399550376e-08, |
| "logits/chosen": -1.140625, |
| "logits/rejected": -1.25, |
| "logps/chosen": -988.0, |
| "logps/rejected": -1472.0, |
| "loss": 0.0567, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -5.625, |
| "rewards/margins": 5.28125, |
| "rewards/rejected": -10.875, |
| "step": 1290 |
| }, |
| { |
| "epoch": 2.6970954356846475, |
| "grad_norm": 8.09337248601455, |
| "learning_rate": 1.5376512697178713e-08, |
| "logits/chosen": -1.234375, |
| "logits/rejected": -1.2265625, |
| "logps/chosen": -824.0, |
| "logps/rejected": -1224.0, |
| "loss": 0.063, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -4.5625, |
| "rewards/margins": 4.71875, |
| "rewards/rejected": -9.25, |
| "step": 1300 |
| }, |
| { |
| "epoch": 2.7178423236514524, |
| "grad_norm": 16.861726238832322, |
| "learning_rate": 1.3360607967490307e-08, |
| "logits/chosen": -1.1640625, |
| "logits/rejected": -1.15625, |
| "logps/chosen": -1048.0, |
| "logps/rejected": -1400.0, |
| "loss": 0.0647, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -5.78125, |
| "rewards/margins": 4.34375, |
| "rewards/rejected": -10.125, |
| "step": 1310 |
| }, |
| { |
| "epoch": 2.7385892116182573, |
| "grad_norm": 20.632985201134645, |
| "learning_rate": 1.1482681633128738e-08, |
| "logits/chosen": -1.3046875, |
| "logits/rejected": -1.3515625, |
| "logps/chosen": -920.0, |
| "logps/rejected": -1200.0, |
| "loss": 0.0816, |
| "rewards/accuracies": 0.949999988079071, |
| "rewards/chosen": -4.40625, |
| "rewards/margins": 3.65625, |
| "rewards/rejected": -8.0625, |
| "step": 1320 |
| }, |
| { |
| "epoch": 2.759336099585062, |
| "grad_norm": 9.84758156918914, |
| "learning_rate": 9.743828665053466e-09, |
| "logits/chosen": -1.1796875, |
| "logits/rejected": -1.203125, |
| "logps/chosen": -868.0, |
| "logps/rejected": -1296.0, |
| "loss": 0.05, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -4.625, |
| "rewards/margins": 5.0, |
| "rewards/rejected": -9.625, |
| "step": 1330 |
| }, |
| { |
| "epoch": 2.780082987551867, |
| "grad_norm": 11.328737169791557, |
| "learning_rate": 8.145062944090425e-09, |
| "logits/chosen": -1.2578125, |
| "logits/rejected": -1.1953125, |
| "logps/chosen": -928.0, |
| "logps/rejected": -1368.0, |
| "loss": 0.0804, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -5.0, |
| "rewards/margins": 4.25, |
| "rewards/rejected": -9.25, |
| "step": 1340 |
| }, |
| { |
| "epoch": 2.800829875518672, |
| "grad_norm": 24.41038502693378, |
| "learning_rate": 6.687316669763937e-09, |
| "logits/chosen": -1.2265625, |
| "logits/rejected": -1.3125, |
| "logps/chosen": -808.0, |
| "logps/rejected": -1200.0, |
| "loss": 0.0622, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -4.46875, |
| "rewards/margins": 4.28125, |
| "rewards/rejected": -8.75, |
| "step": 1350 |
| }, |
| { |
| "epoch": 2.821576763485477, |
| "grad_norm": 15.889369514063155, |
| "learning_rate": 5.371439816754892e-09, |
| "logits/chosen": -1.1640625, |
| "logits/rejected": -1.234375, |
| "logps/chosen": -856.0, |
| "logps/rejected": -1136.0, |
| "loss": 0.0684, |
| "rewards/accuracies": 0.8500000238418579, |
| "rewards/chosen": -4.96875, |
| "rewards/margins": 3.5, |
| "rewards/rejected": -8.4375, |
| "step": 1360 |
| }, |
| { |
| "epoch": 2.8423236514522823, |
| "grad_norm": 13.23599572717501, |
| "learning_rate": 4.198199639302152e-09, |
| "logits/chosen": -1.1796875, |
| "logits/rejected": -1.21875, |
| "logps/chosen": -800.0, |
| "logps/rejected": -1280.0, |
| "loss": 0.0608, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -4.59375, |
| "rewards/margins": 4.71875, |
| "rewards/rejected": -9.3125, |
| "step": 1370 |
| }, |
| { |
| "epoch": 2.863070539419087, |
| "grad_norm": 9.4244826040974, |
| "learning_rate": 3.1682802238362506e-09, |
| "logits/chosen": -1.1953125, |
| "logits/rejected": -1.2578125, |
| "logps/chosen": -820.0, |
| "logps/rejected": -1280.0, |
| "loss": 0.0549, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -4.40625, |
| "rewards/margins": 4.59375, |
| "rewards/rejected": -9.0, |
| "step": 1380 |
| }, |
| { |
| "epoch": 2.883817427385892, |
| "grad_norm": 9.572805205888423, |
| "learning_rate": 2.2822820901060025e-09, |
| "logits/chosen": -1.3046875, |
| "logits/rejected": -1.2265625, |
| "logps/chosen": -920.0, |
| "logps/rejected": -1168.0, |
| "loss": 0.0583, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -4.5625, |
| "rewards/margins": 4.0625, |
| "rewards/rejected": -8.625, |
| "step": 1390 |
| }, |
| { |
| "epoch": 2.904564315352697, |
| "grad_norm": 5.8710007402955835, |
| "learning_rate": 1.5407218410307398e-09, |
| "logits/chosen": -1.3125, |
| "logits/rejected": -1.3828125, |
| "logps/chosen": -824.0, |
| "logps/rejected": -1168.0, |
| "loss": 0.0627, |
| "rewards/accuracies": 0.8999999761581421, |
| "rewards/chosen": -5.09375, |
| "rewards/margins": 3.34375, |
| "rewards/rejected": -8.4375, |
| "step": 1400 |
| }, |
| { |
| "epoch": 2.9253112033195023, |
| "grad_norm": 8.81585615187788, |
| "learning_rate": 9.440318614823417e-10, |
| "logits/chosen": -1.234375, |
| "logits/rejected": -1.2421875, |
| "logps/chosen": -788.0, |
| "logps/rejected": -1128.0, |
| "loss": 0.0704, |
| "rewards/accuracies": 0.949999988079071, |
| "rewards/chosen": -4.34375, |
| "rewards/margins": 3.9375, |
| "rewards/rejected": -8.3125, |
| "step": 1410 |
| }, |
| { |
| "epoch": 2.9460580912863072, |
| "grad_norm": 8.442496137212597, |
| "learning_rate": 4.925600661726537e-10, |
| "logits/chosen": -1.2421875, |
| "logits/rejected": -1.3359375, |
| "logps/chosen": -916.0, |
| "logps/rejected": -1320.0, |
| "loss": 0.0613, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -4.46875, |
| "rewards/margins": 4.84375, |
| "rewards/rejected": -9.3125, |
| "step": 1420 |
| }, |
| { |
| "epoch": 2.966804979253112, |
| "grad_norm": 14.014913089264976, |
| "learning_rate": 1.8656969679323176e-10, |
| "logits/chosen": -1.2734375, |
| "logits/rejected": -1.2890625, |
| "logps/chosen": -936.0, |
| "logps/rejected": -1256.0, |
| "loss": 0.0667, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -5.0, |
| "rewards/margins": 4.40625, |
| "rewards/rejected": -9.375, |
| "step": 1430 |
| }, |
| { |
| "epoch": 2.987551867219917, |
| "grad_norm": 8.079064512428207, |
| "learning_rate": 2.6239168525898915e-11, |
| "logits/chosen": -1.25, |
| "logits/rejected": -1.1328125, |
| "logps/chosen": -860.0, |
| "logps/rejected": -1152.0, |
| "loss": 0.0733, |
| "rewards/accuracies": 0.949999988079071, |
| "rewards/chosen": -4.65625, |
| "rewards/margins": 3.859375, |
| "rewards/rejected": -8.5, |
| "step": 1440 |
| }, |
| { |
| "epoch": 3.0, |
| "step": 1446, |
| "total_flos": 0.0, |
| "train_loss": 0.29230043576466097, |
| "train_runtime": 30177.164, |
| "train_samples_per_second": 3.062, |
| "train_steps_per_second": 0.048 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 1446, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 3, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 0.0, |
| "train_batch_size": 2, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|